summaryrefslogtreecommitdiffstats
path: root/ctdb/common
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/common')
-rw-r--r--ctdb/common/cmdline.c598
-rw-r--r--ctdb/common/cmdline.h163
-rw-r--r--ctdb/common/comm.c427
-rw-r--r--ctdb/common/comm.h101
-rw-r--r--ctdb/common/common.h160
-rw-r--r--ctdb/common/conf.c1391
-rw-r--r--ctdb/common/conf.h473
-rw-r--r--ctdb/common/conf_tool.c321
-rw-r--r--ctdb/common/conf_tool.h39
-rw-r--r--ctdb/common/ctdb_io.c498
-rw-r--r--ctdb/common/ctdb_ltdb.c430
-rw-r--r--ctdb/common/ctdb_util.c681
-rw-r--r--ctdb/common/db_hash.c295
-rw-r--r--ctdb/common/db_hash.h174
-rw-r--r--ctdb/common/event_script.c247
-rw-r--r--ctdb/common/event_script.h72
-rw-r--r--ctdb/common/hash_count.c219
-rw-r--r--ctdb/common/hash_count.h94
-rw-r--r--ctdb/common/line.c145
-rw-r--r--ctdb/common/line.h62
-rw-r--r--ctdb/common/logging.c745
-rw-r--r--ctdb/common/logging.h51
-rw-r--r--ctdb/common/logging_conf.c127
-rw-r--r--ctdb/common/logging_conf.h36
-rw-r--r--ctdb/common/path.c211
-rw-r--r--ctdb/common/path.h39
-rw-r--r--ctdb/common/path_tool.c384
-rw-r--r--ctdb/common/path_tool.h38
-rw-r--r--ctdb/common/pidfile.c85
-rw-r--r--ctdb/common/pidfile.h51
-rw-r--r--ctdb/common/pkt_read.c190
-rw-r--r--ctdb/common/pkt_read.h98
-rw-r--r--ctdb/common/pkt_write.c101
-rw-r--r--ctdb/common/pkt_write.h79
-rw-r--r--ctdb/common/rb_tree.c1101
-rw-r--r--ctdb/common/rb_tree.h90
-rw-r--r--ctdb/common/reqid.c89
-rw-r--r--ctdb/common/reqid.h89
-rw-r--r--ctdb/common/run_event.c829
-rw-r--r--ctdb/common/run_event.h150
-rw-r--r--ctdb/common/run_proc.c503
-rw-r--r--ctdb/common/run_proc.h100
-rw-r--r--ctdb/common/sock_client.c334
-rw-r--r--ctdb/common/sock_client.h129
-rw-r--r--ctdb/common/sock_daemon.c1100
-rw-r--r--ctdb/common/sock_daemon.h283
-rw-r--r--ctdb/common/sock_io.c328
-rw-r--r--ctdb/common/sock_io.h39
-rw-r--r--ctdb/common/srvid.c280
-rw-r--r--ctdb/common/srvid.h121
-rw-r--r--ctdb/common/system.c237
-rw-r--r--ctdb/common/system.h37
-rw-r--r--ctdb/common/system_socket.c1168
-rw-r--r--ctdb/common/system_socket.h46
-rw-r--r--ctdb/common/tmon.c602
-rw-r--r--ctdb/common/tmon.h218
-rw-r--r--ctdb/common/tunable.c401
-rw-r--r--ctdb/common/tunable.h35
58 files changed, 17134 insertions, 0 deletions
diff --git a/ctdb/common/cmdline.c b/ctdb/common/cmdline.c
new file mode 100644
index 0000000..ce368a9
--- /dev/null
+++ b/ctdb/common/cmdline.c
@@ -0,0 +1,598 @@
+/*
+ Command line processing
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+
+#include "common/cmdline.h"
+
+#define CMDLINE_MAX_LEN 80
+
+struct cmdline_section {
+ const char *name;
+ struct cmdline_command *commands;
+};
+
+struct cmdline_context {
+ const char *prog;
+ struct poptOption *options;
+ struct cmdline_section *section;
+ int num_sections;
+ size_t max_len;
+ poptContext pc;
+ int argc, arg0;
+ const char **argv;
+ struct cmdline_command *match_cmd;
+};
+
+static bool cmdline_show_help = false;
+
+static void cmdline_popt_help(poptContext pc,
+ enum poptCallbackReason reason,
+ struct poptOption *key,
+ const char *arg,
+ void *data)
+{
+ if (key->shortName == 'h') {
+ cmdline_show_help = true;
+ }
+}
+
+struct poptOption cmdline_help_options[] = {
+ { NULL, '\0', POPT_ARG_CALLBACK, cmdline_popt_help, 0, NULL, NULL },
+ { "help", 'h', 0, NULL, 'h', "Show this help message", NULL },
+ POPT_TABLEEND
+};
+
+#define CMDLINE_HELP_OPTIONS \
+ { NULL, '\0', POPT_ARG_INCLUDE_TABLE, cmdline_help_options, \
+ 0, "Help Options:", NULL }
+
+static bool cmdline_option_check(struct poptOption *option)
+{
+ if (option->longName == NULL) {
+ D_ERR("Option has no long name\n");
+ return false;
+ }
+
+ if (option->argInfo != POPT_ARG_STRING &&
+ option->argInfo != POPT_ARG_INT &&
+ option->argInfo != POPT_ARG_LONG &&
+ option->argInfo != POPT_ARG_VAL &&
+ option->argInfo != POPT_ARG_FLOAT &&
+ option->argInfo != POPT_ARG_DOUBLE) {
+ D_ERR("Option '%s' has unsupported type\n", option->longName);
+ return false;
+ }
+
+ if (option->arg == NULL) {
+ D_ERR("Option '%s' has invalid arg\n", option->longName);
+ return false;
+ }
+
+ if (option->descrip == NULL) {
+ D_ERR("Option '%s' has no help msg\n", option->longName);
+ return false;
+ }
+
+ return true;
+}
+
+static bool cmdline_options_check(struct poptOption *options)
+{
+ int i;
+ bool ok;
+
+ if (options == NULL) {
+ return true;
+ }
+
+ i = 0;
+ while (options[i].longName != NULL || options[i].shortName != '\0') {
+ ok = cmdline_option_check(&options[i]);
+ if (!ok) {
+ return false;
+ }
+ i++;
+ }
+
+ return true;
+}
+
+static int cmdline_options_define(TALLOC_CTX *mem_ctx,
+ struct poptOption *user_options,
+ struct poptOption **result)
+{
+ struct poptOption *options;
+ int count, i;
+
+ count = (user_options == NULL ? 2 : 3);
+
+ options = talloc_array(mem_ctx, struct poptOption, count);
+ if (options == NULL) {
+ return ENOMEM;
+ }
+
+ i = 0;
+ options[i++] = (struct poptOption) CMDLINE_HELP_OPTIONS;
+ if (user_options != NULL) {
+ options[i++] = (struct poptOption) {
+ .argInfo = POPT_ARG_INCLUDE_TABLE,
+ .arg = user_options,
+ .descrip = "Options:",
+ };
+ }
+ options[i++] = (struct poptOption) POPT_TABLEEND;
+
+ *result = options;
+ return 0;
+}
+
+static bool cmdline_command_check(struct cmdline_command *cmd, size_t *max_len)
+{
+ size_t len;
+
+ if (cmd->name == NULL) {
+ return false;
+ }
+
+ if (cmd->fn == NULL) {
+ D_ERR("Command '%s' has no implementation function\n",
+ cmd->name);
+ return false;
+ }
+
+ if (cmd->msg_help == NULL) {
+ D_ERR("Command '%s' has no help msg\n", cmd->name);
+ return false;
+ }
+
+ len = strlen(cmd->name);
+ if (cmd->msg_args != NULL) {
+ len += strlen(cmd->msg_args);
+ }
+ if (len > CMDLINE_MAX_LEN) {
+ D_ERR("Command '%s' is too long (%zu)\n", cmd->name, len);
+ return false;
+ }
+
+ if (len > *max_len) {
+ *max_len = len;
+ }
+
+ len = strlen(cmd->msg_help);
+ if (len > CMDLINE_MAX_LEN) {
+ D_ERR("Command '%s' help too long (%zu)\n", cmd->name, len);
+ return false;
+ }
+
+ return true;
+}
+
+static bool cmdline_commands_check(struct cmdline_command *commands,
+ size_t *max_len)
+{
+ int i;
+ bool ok;
+
+ if (commands == NULL) {
+ return false;
+ }
+
+ for (i=0; commands[i].name != NULL; i++) {
+ ok = cmdline_command_check(&commands[i], max_len);
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int cmdline_context_destructor(struct cmdline_context *cmdline);
+
+static int cmdline_section_add(struct cmdline_context *cmdline,
+ const char *name,
+ struct cmdline_command *commands)
+{
+ struct cmdline_section *section;
+ size_t max_len = 0;
+ bool ok;
+
+ ok = cmdline_commands_check(commands, &max_len);
+ if (!ok) {
+ return EINVAL;
+ }
+
+ section = talloc_realloc(cmdline,
+ cmdline->section,
+ struct cmdline_section,
+ cmdline->num_sections + 1);
+ if (section == NULL) {
+ return ENOMEM;
+ }
+
+ section[cmdline->num_sections] = (struct cmdline_section) {
+ .name = name,
+ .commands = commands,
+ };
+
+ if (max_len > cmdline->max_len) {
+ cmdline->max_len = max_len;
+ }
+
+ cmdline->section = section;
+ cmdline->num_sections += 1;
+
+ return 0;
+}
+
+int cmdline_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ const char *name,
+ struct cmdline_command *commands,
+ struct cmdline_context **result)
+{
+ struct cmdline_context *cmdline;
+ int ret;
+ bool ok;
+
+ if (prog == NULL) {
+ return EINVAL;
+ }
+
+ ok = cmdline_options_check(options);
+ if (!ok) {
+ return EINVAL;
+ }
+
+ cmdline = talloc_zero(mem_ctx, struct cmdline_context);
+ if (cmdline == NULL) {
+ return ENOMEM;
+ }
+
+ cmdline->prog = talloc_strdup(cmdline, prog);
+ if (cmdline->prog == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+
+ ret = cmdline_options_define(cmdline, options, &cmdline->options);
+ if (ret != 0) {
+ talloc_free(cmdline);
+ return ret;
+ }
+
+ ret = cmdline_section_add(cmdline, name, commands);
+ if (ret != 0) {
+ talloc_free(cmdline);
+ return ret;
+ }
+
+ cmdline->argc = 1;
+ cmdline->argv = talloc_array(cmdline, const char *, 2);
+ if (cmdline->argv == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+ cmdline->argv[0] = cmdline->prog;
+ cmdline->argv[1] = NULL;
+
+ /* Dummy popt context for generating help */
+ cmdline->pc = poptGetContext(cmdline->prog,
+ cmdline->argc,
+ cmdline->argv,
+ cmdline->options,
+ 0);
+ if (cmdline->pc == NULL) {
+ talloc_free(cmdline);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(cmdline, cmdline_context_destructor);
+
+ *result = cmdline;
+ return 0;
+}
+
+static int cmdline_context_destructor(struct cmdline_context *cmdline)
+{
+ if (cmdline->pc != NULL) {
+ poptFreeContext(cmdline->pc);
+ }
+
+ return 0;
+}
+
+int cmdline_add(struct cmdline_context *cmdline,
+ const char *name,
+ struct cmdline_command *commands)
+{
+ return cmdline_section_add(cmdline, name, commands);
+}
+
+static int cmdline_parse_options(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv)
+{
+ int opt;
+
+ if (cmdline->pc != NULL) {
+ poptFreeContext(cmdline->pc);
+ }
+
+ cmdline->pc = poptGetContext(cmdline->prog,
+ argc,
+ argv,
+ cmdline->options,
+ 0);
+ if (cmdline->pc == NULL) {
+ return ENOMEM;
+ }
+
+ while ((opt = poptGetNextOpt(cmdline->pc)) != -1) {
+ D_ERR("Invalid option %s: %s\n",
+ poptBadOption(cmdline->pc, 0),
+ poptStrerror(opt));
+ return EINVAL;
+ }
+
+ /* Set up remaining arguments for commands */
+ cmdline->argc = 0;
+ cmdline->argv = poptGetArgs(cmdline->pc);
+ if (cmdline->argv != NULL) {
+ while (cmdline->argv[cmdline->argc] != NULL) {
+ cmdline->argc++;
+ }
+ }
+
+ return 0;
+}
+
+static int cmdline_match_section(struct cmdline_context *cmdline,
+ struct cmdline_section *section)
+{
+ int i;
+
+ for (i=0; section->commands[i].name != NULL; i++) {
+ struct cmdline_command *cmd;
+ char name[CMDLINE_MAX_LEN+1];
+ size_t len;
+ char *t, *str;
+ int n = 0;
+ bool match = false;
+
+ cmd = &section->commands[i];
+ len = strlcpy(name, cmd->name, sizeof(name));
+ if (len >= sizeof(name)) {
+ D_ERR("Skipping long command '%s'\n", cmd->name);
+ continue;
+ }
+
+ str = name;
+ while ((t = strtok(str, " ")) != NULL) {
+ if (n >= cmdline->argc) {
+ match = false;
+ break;
+ }
+ if (cmdline->argv[n] == NULL) {
+ match = false;
+ break;
+ }
+ if (strcmp(cmdline->argv[n], t) == 0) {
+ match = true;
+ cmdline->arg0 = n+1;
+ } else {
+ match = false;
+ break;
+ }
+
+ n += 1;
+ str = NULL;
+ }
+
+ if (match) {
+ cmdline->match_cmd = cmd;
+ return 0;
+ }
+ }
+
+ cmdline->match_cmd = NULL;
+ return ENOENT;
+}
+
+static int cmdline_match(struct cmdline_context *cmdline)
+{
+ int i, ret = ENOENT;
+
+ if (cmdline->argc == 0 || cmdline->argv == NULL) {
+ cmdline->match_cmd = NULL;
+ return EINVAL;
+ }
+
+ for (i=0; i<cmdline->num_sections; i++) {
+ ret = cmdline_match_section(cmdline, &cmdline->section[i]);
+ if (ret == 0) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int cmdline_parse(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv,
+ bool parse_options)
+{
+ int ret;
+
+ if (argc < 2) {
+ cmdline_usage(cmdline, NULL);
+ return EINVAL;
+ }
+
+ cmdline_show_help = false;
+
+ if (parse_options) {
+ ret = cmdline_parse_options(cmdline, argc, argv);
+ if (ret != 0) {
+ cmdline_usage(cmdline, NULL);
+ return ret;
+ }
+ } else {
+ cmdline->argc = argc;
+ cmdline->argv = argv;
+ }
+
+ ret = cmdline_match(cmdline);
+
+ if (ret != 0 || cmdline_show_help) {
+ const char *name = NULL;
+
+ if (cmdline->match_cmd != NULL) {
+ name = cmdline->match_cmd->name;
+ }
+
+ cmdline_usage(cmdline, name);
+
+ if (cmdline_show_help) {
+ ret = EAGAIN;
+ }
+ }
+
+ return ret;
+}
+
+static void cmdline_usage_command(struct cmdline_context *cmdline,
+ struct cmdline_command *cmd,
+ bool print_all)
+{
+ size_t len;
+
+ len = strlen(cmd->name);
+
+ printf(" %s ", cmd->name);
+ if (print_all) {
+ printf("%-*s",
+ (int)(cmdline->max_len-len),
+ cmd->msg_args == NULL ? "" : cmd->msg_args);
+ } else {
+ printf("%s", cmd->msg_args == NULL ? "" : cmd->msg_args);
+ }
+ printf(" %s\n", cmd->msg_help);
+}
+
+static void cmdline_usage_section(struct cmdline_context *cmdline,
+ struct cmdline_section *section)
+{
+ int i;
+
+ printf("\n");
+
+ if (section->name != NULL) {
+ printf("%s ", section->name);
+ }
+ printf("Commands:\n");
+ for (i=0; section->commands[i].name != NULL; i++) {
+ cmdline_usage_command(cmdline, &section->commands[i], true);
+
+ }
+}
+
+static void cmdline_usage_full(struct cmdline_context *cmdline)
+{
+ int i;
+
+ poptSetOtherOptionHelp(cmdline->pc, "[<options>] <command> [<args>]");
+ poptPrintHelp(cmdline->pc, stdout, 0);
+
+ for (i=0; i<cmdline->num_sections; i++) {
+ cmdline_usage_section(cmdline, &cmdline->section[i]);
+ }
+}
+
+void cmdline_usage(struct cmdline_context *cmdline, const char *cmd_name)
+{
+ struct cmdline_command *cmd = NULL;
+ int i, j;
+
+ if (cmd_name == NULL) {
+ cmdline_usage_full(cmdline);
+ return;
+ }
+
+ for (j=0; j<cmdline->num_sections; j++) {
+ struct cmdline_section *section = &cmdline->section[j];
+
+ for (i=0; section->commands[i].name != NULL; i++) {
+ if (strcmp(section->commands[i].name, cmd_name) == 0) {
+ cmd = &section->commands[i];
+ break;
+ }
+ }
+ }
+
+ if (cmd == NULL) {
+ cmdline_usage_full(cmdline);
+ return;
+ }
+
+ poptSetOtherOptionHelp(cmdline->pc, "<command> [<args>]");
+ poptPrintUsage(cmdline->pc, stdout, 0);
+
+ printf("\n");
+ cmdline_usage_command(cmdline, cmd, false);
+}
+
+int cmdline_run(struct cmdline_context *cmdline,
+ void *private_data,
+ int *result)
+{
+ struct cmdline_command *cmd = cmdline->match_cmd;
+ TALLOC_CTX *tmp_ctx;
+ int ret;
+
+ if (cmd == NULL) {
+ return ENOENT;
+ }
+
+ tmp_ctx = talloc_new(cmdline);
+ if (tmp_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ ret = cmd->fn(tmp_ctx,
+ cmdline->argc - cmdline->arg0,
+ &cmdline->argv[cmdline->arg0],
+ private_data);
+
+ talloc_free(tmp_ctx);
+
+ if (result != NULL) {
+ *result = ret;
+ }
+ return 0;
+}
diff --git a/ctdb/common/cmdline.h b/ctdb/common/cmdline.h
new file mode 100644
index 0000000..51519ca
--- /dev/null
+++ b/ctdb/common/cmdline.h
@@ -0,0 +1,163 @@
+/*
+ Command line processing
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CMDLINE_H__
+#define __CTDB_CMDLINE_H__
+
+#include <popt.h>
+#include <talloc.h>
+
+/**
+ * @file cmdline.h
+ *
+ * @brief Command-line handling with options and commands
+ *
+ * This abstraction encapsulates the boiler-plate for parsing options,
+ * commands and arguments on command-line.
+ *
+ * Options handling is done using popt.
+ */
+
+/**
+ * @brief Abstract data structure holding command-line configuration
+ */
+struct cmdline_context;
+
+/**
+ * @brief A command definition structure
+ *
+ * @name is the name of the command
+ * @fn is the implementation of the command
+ * @msg_help is the help message describing command
+ * @msg_args is the help message describing arguments
+ *
+ * A command name can be a single word or multiple words separated with spaces.
+ *
+ * An implementation function should return 0 on success and non-zero value
+ * on failure. This value is returned as result in @cmdline_run.
+ */
+struct cmdline_command {
+ const char *name;
+ int (*fn)(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data);
+ const char *msg_help;
+ const char *msg_args;
+};
+
+/**
+ * @brief convenience macro to define the end of commands list
+ *
+ * Here is an example of defining commands list.
+ *
+ * struct cmdline_command commands[] = {
+ * { "command1", command1_func, "Run command1", NULL },
+ * { "command2", command2_func, "Run command2", "<filename>" },
+ * CMDLINE_TABLEEND
+ * };
+ */
+#define CMDLINE_TABLEEND { NULL, NULL, NULL, NULL }
+
+/**
+ * @brief Initialize cmdline abstraction
+ *
+ * If there are no options, options can be NULL.
+ *
+ * Help options (--help, -h) are automatically added to the options.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] prog Program name
+ * @param[in] options Command-line options
+ * @param[in] section Name of section grouping specified commands
+ * @param[in] commands Commands array
+ * @param[out] result New cmdline context
+ * @return 0 on success, errno on failure
+ *
+ * Freeing cmdline context will free up all the resources.
+ */
+int cmdline_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ const char *section,
+ struct cmdline_command *commands,
+ struct cmdline_context **result);
+
+
+/**
+ * @brief Add command line section/commands
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] section Name of section grouping specified commands
+ * @param[in] commands Commands array
+ * @return 0 on success, errno on failure
+ */
+int cmdline_add(struct cmdline_context *cmdline,
+ const char *section,
+ struct cmdline_command *commands);
+
+/**
+ * @brief Parse command line options and commands/arguments
+ *
+ * This function parses the arguments to process options and commands.
+ *
+ * This function should be passed the arguments to main() and parse_options
+ * should be set to true. If cmdline is used for handling second-level
+ * commands, then parse_options should be set to false.
+ *
+ * If argv does not match any command, then ENOENT is returned.
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments array
+ * @param[in] parse_options Whether to parse for options
+ * @return 0 on success, errno on failure
+ */
+int cmdline_parse(struct cmdline_context *cmdline,
+ int argc,
+ const char **argv,
+ bool parse_options);
+
+/**
+ * @brief Execute the function for the command matched by @cmdline_parse
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] private_data Private data for implementation function
+ * @param[out] result Return value from the implementation function
+ * @return 0 on success, errno on failure
+ *
+ * If help options are specified, then detailed help will be printed and
+ * the return value will be EAGAIN.
+ */
+int cmdline_run(struct cmdline_context *cmdline,
+ void *private_data,
+ int *result);
+
+/**
+ * @brief Print usage help message to stdout
+ *
+ * @param[in] cmdline Cmdline context
+ * @param[in] command Command string
+ *
+ * If command is NULL, then full help is printed.
+ * If command is specified, then compact help is printed.
+ */
+void cmdline_usage(struct cmdline_context *cmdline, const char *command);
+
+#endif /* __CTDB_CMDLINE_H__ */
diff --git a/ctdb/common/comm.c b/ctdb/common/comm.c
new file mode 100644
index 0000000..12f4970
--- /dev/null
+++ b/ctdb/common/comm.c
@@ -0,0 +1,427 @@
+/*
+ Communication endpoint implementation
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_read.h"
+#include "pkt_write.h"
+#include "comm.h"
+
+/*
+ * Communication endpoint around a socket
+ */
+
+#define SMALL_PKT_SIZE 1024
+
+struct comm_context {
+ int fd;
+ comm_read_handler_fn read_handler;
+ void *read_private_data;
+ comm_dead_handler_fn dead_handler;
+ void *dead_private_data;
+ uint8_t small_pkt[SMALL_PKT_SIZE];
+ struct tevent_req *read_req, *write_req;
+ struct tevent_fd *fde;
+ struct tevent_queue *queue;
+};
+
+static void comm_fd_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data);
+static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen);
+static void comm_read_failed(struct tevent_req *req);
+
+
+int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd,
+ comm_read_handler_fn read_handler, void *read_private_data,
+ comm_dead_handler_fn dead_handler, void *dead_private_data,
+ struct comm_context **result)
+{
+ struct comm_context *comm;
+ int ret;
+
+ if (fd < 0) {
+ return EINVAL;
+ }
+
+ if (dead_handler == NULL) {
+ return EINVAL;
+ }
+
+ /* Socket queue relies on non-blocking sockets. */
+ ret = set_blocking(fd, false);
+ if (ret == -1) {
+ return EIO;
+ }
+
+ comm = talloc_zero(mem_ctx, struct comm_context);
+ if (comm == NULL) {
+ return ENOMEM;
+ }
+
+ comm->fd = fd;
+ comm->read_handler = read_handler;
+ comm->read_private_data = read_private_data;
+ comm->dead_handler = dead_handler;
+ comm->dead_private_data = dead_private_data;
+
+ comm->queue = tevent_queue_create(comm, "comm write queue");
+ if (comm->queue == NULL) {
+ goto fail;
+ }
+
+ /* Set up to write packets */
+ comm->fde = tevent_add_fd(ev, comm, fd, TEVENT_FD_READ,
+ comm_fd_handler, comm);
+ if (comm->fde == NULL) {
+ goto fail;
+ }
+
+ /* Set up to read packets */
+ if (read_handler != NULL) {
+ struct tevent_req *req;
+
+ req = comm_read_send(comm, ev, comm, comm->small_pkt,
+ SMALL_PKT_SIZE);
+ if (req == NULL) {
+ goto fail;
+ }
+
+ tevent_req_set_callback(req, comm_read_failed, comm);
+ comm->read_req = req;
+ }
+
+ *result = comm;
+ return 0;
+
+fail:
+ talloc_free(comm);
+ return ENOMEM;
+}
+
+
+/*
+ * Read packets
+ */
+
+struct comm_read_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ uint8_t *buf;
+ size_t buflen;
+ struct tevent_req *subreq;
+};
+
+static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data);
+static void comm_read_done(struct tevent_req *subreq);
+
+static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req, *subreq;
+ struct comm_read_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct comm_read_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->comm = comm;
+ state->buf = buf;
+ state->buflen = buflen;
+
+ subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t),
+ state->buf, state->buflen,
+ comm_read_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->subreq = subreq;
+
+ tevent_req_set_callback(subreq, comm_read_done, req);
+ return req;
+}
+
+static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data)
+{
+ uint32_t packet_len;
+
+ if (buflen < sizeof(uint32_t)) {
+ return sizeof(uint32_t) - buflen;
+ }
+
+ packet_len = *(uint32_t *)buf;
+
+ return packet_len - buflen;
+}
+
+static void comm_read_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct comm_read_state *state = tevent_req_data(
+ req, struct comm_read_state);
+ struct comm_context *comm = state->comm;
+ ssize_t nread;
+ uint8_t *buf;
+ bool free_buf;
+ int err = 0;
+
+ nread = pkt_read_recv(subreq, state, &buf, &free_buf, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ if (nread == -1) {
+ tevent_req_error(req, err);
+ return;
+ }
+
+ comm->read_handler(buf, nread, comm->read_private_data);
+
+ if (free_buf) {
+ talloc_free(buf);
+ }
+
+ subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t),
+ state->buf, state->buflen,
+ comm_read_more, NULL);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ state->subreq = subreq;
+
+ tevent_req_set_callback(subreq, comm_read_done, req);
+}
+
+static void comm_read_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ }
+}
+
+static void comm_read_failed(struct tevent_req *req)
+{
+ struct comm_context *comm = tevent_req_callback_data(
+ req, struct comm_context);
+
+ comm_read_recv(req, NULL);
+ TALLOC_FREE(req);
+ comm->read_req = NULL;
+ if (comm->dead_handler != NULL) {
+ comm->dead_handler(comm->dead_private_data);
+ }
+}
+
+
+/*
+ * Write packets
+ */
+
+struct comm_write_entry {
+ struct comm_context *comm;
+ struct tevent_queue_entry *qentry;
+ struct tevent_req *req;
+};
+
+struct comm_write_state {
+ struct tevent_context *ev;
+ struct comm_context *comm;
+ struct comm_write_entry *entry;
+ struct tevent_req *subreq;
+ uint8_t *buf;
+ size_t buflen, nwritten;
+};
+
+static int comm_write_entry_destructor(struct comm_write_entry *entry);
+static void comm_write_trigger(struct tevent_req *req, void *private_data);
+static void comm_write_done(struct tevent_req *subreq);
+
+struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct comm_write_state *state;
+ struct comm_write_entry *entry;
+
+ req = tevent_req_create(mem_ctx, &state, struct comm_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->comm = comm;
+ state->buf = buf;
+ state->buflen = buflen;
+
+ entry = talloc_zero(state, struct comm_write_entry);
+ if (tevent_req_nomem(entry, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ entry->comm = comm;
+ entry->req = req;
+ entry->qentry = tevent_queue_add_entry(comm->queue, ev, req,
+ comm_write_trigger, NULL);
+ if (tevent_req_nomem(entry->qentry, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->entry = entry;
+ talloc_set_destructor(entry, comm_write_entry_destructor);
+
+ return req;
+}
+
+static int comm_write_entry_destructor(struct comm_write_entry *entry)
+{
+ struct comm_context *comm = entry->comm;
+
+ if (comm->write_req == entry->req) {
+ comm->write_req = NULL;
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ }
+
+ TALLOC_FREE(entry->qentry);
+ return 0;
+}
+
+static void comm_write_trigger(struct tevent_req *req, void *private_data)
+{
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ struct comm_context *comm = state->comm;
+ struct tevent_req *subreq;
+
+ comm->write_req = req;
+
+ subreq = pkt_write_send(state, state->ev, comm->fd,
+ state->buf, state->buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ state->subreq = subreq;
+ tevent_req_set_callback(subreq, comm_write_done, req);
+ TEVENT_FD_WRITEABLE(comm->fde);
+}
+
+static void comm_write_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ struct comm_context *comm = state->comm;
+ ssize_t nwritten;
+ int err = 0;
+
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ nwritten = pkt_write_recv(subreq, &err);
+ TALLOC_FREE(subreq);
+ state->subreq = NULL;
+ comm->write_req = NULL;
+ if (nwritten == -1) {
+ if (err == EPIPE) {
+ comm->dead_handler(comm->dead_private_data);
+ }
+ tevent_req_error(req, err);
+ return;
+ }
+
+ state->nwritten = nwritten;
+ state->entry->qentry = NULL;
+ TALLOC_FREE(state->entry);
+ tevent_req_done(req);
+}
+
+bool comm_write_recv(struct tevent_req *req, int *perr)
+{
+ struct comm_write_state *state = tevent_req_data(
+ req, struct comm_write_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ if (state->nwritten != state->buflen) {
+ *perr = EIO;
+ return false;
+ }
+
+ *perr = 0;
+ return true;
+}
+
+static void comm_fd_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct comm_context *comm = talloc_get_type_abort(
+ private_data, struct comm_context);
+
+ if (flags & TEVENT_FD_READ) {
+ struct comm_read_state *read_state;
+
+ if (comm->read_req == NULL) {
+ /* This should never happen */
+ abort();
+ }
+
+ read_state = tevent_req_data(comm->read_req,
+ struct comm_read_state);
+ pkt_read_handler(ev, fde, flags, read_state->subreq);
+ }
+
+ if (flags & TEVENT_FD_WRITE) {
+ struct comm_write_state *write_state;
+
+ if (comm->write_req == NULL) {
+ TEVENT_FD_NOT_WRITEABLE(comm->fde);
+ return;
+ }
+
+ write_state = tevent_req_data(comm->write_req,
+ struct comm_write_state);
+ pkt_write_handler(ev, fde, flags, write_state->subreq);
+ }
+}
diff --git a/ctdb/common/comm.h b/ctdb/common/comm.h
new file mode 100644
index 0000000..e11d38e
--- /dev/null
+++ b/ctdb/common/comm.h
@@ -0,0 +1,101 @@
+/*
+ Communication endpoint API
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_COMM_H__
+#define __CTDB_COMM_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file comm.h
+ *
+ * @brief Communication over a socket or file descriptor
+ *
+ * This abstraction is a wrapper around a socket or file descriptor to
+ * send/receive complete packets.
+ */
+
+/**
+ * @brief Packet handler function
+ *
+ * This function is registered while setting up communication endpoint. Any
+ * time packets are read, this function is called.
+ */
+typedef void (*comm_read_handler_fn)(uint8_t *buf, size_t buflen,
+ void *private_data);
+
+/**
+ * @brief Communication endpoint dead handler function
+ *
+ * This function is called when the communication endpoint is closed.
+ */
+typedef void (*comm_dead_handler_fn)(void *private_data);
+
+/**
+ * @brief Abstract struct to store communication endpoint details
+ */
+struct comm_context;
+
+/**
+ * @brief Initialize the communication endpoint
+ *
+ * This return a new communication context. Freeing this context will free all
+ * memory associated with it.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The socket or file descriptor
+ * @param[in] read_handler The packet handler function
+ * @param[in] read_private_data Private data for read handler function
+ * @param[in] dead_handler The communication dead handler function
+ * @param[in] dead_private_data Private data for dead handler function
+ * @param[out] result The new comm_context structure
+ * @return 0 on success, errno on failure
+ */
+int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd,
+ comm_read_handler_fn read_handler, void *read_private_data,
+ comm_dead_handler_fn dead_handler, void *dead_private_data,
+ struct comm_context **result);
+
+/**
+ * @brief Async computation start to send a packet
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] comm Communication context
+ * @param[in] buf The packet data
+ * @param[in] buflen The size of the packet
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct comm_context *comm,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation end to send a packet
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool comm_write_recv(struct tevent_req *req, int *perr);
+
+#endif /* __CTDB_COMM_H__ */
diff --git a/ctdb/common/common.h b/ctdb/common/common.h
new file mode 100644
index 0000000..9a73bec
--- /dev/null
+++ b/ctdb/common/common.h
@@ -0,0 +1,160 @@
+/*
+ ctdb database library
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_COMMON_H__
+#define __CTDB_COMMON_H__
+
+#include "lib/util/attr.h"
+
+/* From common/ctdb_io.c */
+
+typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length,
+ void *private_data);
+
+uint32_t ctdb_queue_length(struct ctdb_queue *queue);
+
+int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length);
+
+int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd);
+
+struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx, int fd, int alignment,
+ ctdb_queue_cb_fn_t callback,
+ void *private_data, const char *fmt, ...)
+ PRINTF_ATTRIBUTE(7,8);
+
+/* From common/ctdb_ltdb.c */
+
+int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex);
+
+struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb,
+ const char *name);
+
+bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db);
+bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db);
+bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db);
+
+bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db);
+void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db);
+void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db);
+
+bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db);
+void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db);
+
+uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key);
+
+int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data);
+
+int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data);
+
+int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key);
+
+int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn);
+
+typedef void (*ctdb_trackingdb_cb)(struct ctdb_context *ctdb, uint32_t pnn,
+ void *private_data);
+
+void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data,
+ ctdb_trackingdb_cb cb, void *private_data);
+
+int ctdb_null_func(struct ctdb_call_info *call);
+
+int ctdb_fetch_func(struct ctdb_call_info *call);
+
+int ctdb_fetch_with_header_func(struct ctdb_call_info *call);
+
+/* from common/ctdb_util.c */
+
+const char *ctdb_errstr(struct ctdb_context *ctdb);
+
+void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
+ PRINTF_ATTRIBUTE(2,3);
+
+void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) _NORETURN_;
+
+void ctdb_die(struct ctdb_context *ctdb, const char *msg) _NORETURN_;
+
+bool ctdb_set_helper(const char *type, char *helper, size_t size,
+ const char *envvar,
+ const char *dir, const char *file);
+
+int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str,
+ ctdb_sock_addr *address);
+
+bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2);
+
+uint32_t ctdb_hash(const TDB_DATA *key);
+
+struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
+ struct ctdb_marshall_buffer *m,
+ uint32_t db_id,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data);
+
+TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m);
+
+struct ctdb_rec_data_old *ctdb_marshall_loop_next(
+ struct ctdb_marshall_buffer *m,
+ struct ctdb_rec_data_old *r,
+ uint32_t *reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *key, TDB_DATA *data);
+
+void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
+void ctdb_canonicalize_ip_inplace(ctdb_sock_addr *ip);
+
+bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2);
+
+bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2);
+
+char *ctdb_addr_to_str(ctdb_sock_addr *addr);
+
+unsigned ctdb_addr_to_port(ctdb_sock_addr *addr);
+
+struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist);
+
+struct ctdb_node_map_old *ctdb_node_list_to_map(struct ctdb_node **nodes,
+ uint32_t num_nodes,
+ TALLOC_CTX *mem_ctx);
+
+const char *runstate_to_string(enum ctdb_runstate runstate);
+
+enum ctdb_runstate runstate_from_string(const char *label);
+
+void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate);
+
+uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key);
+
+#endif /* __CTDB_COMMON_H__ */
diff --git a/ctdb/common/conf.c b/ctdb/common/conf.c
new file mode 100644
index 0000000..a8ff724
--- /dev/null
+++ b/ctdb/common/conf.c
@@ -0,0 +1,1391 @@
+/*
+ Configuration file handling on top of tini
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/locale.h"
+
+#include <talloc.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/tini.h"
+#include "lib/util/debug.h"
+
+#include "common/conf.h"
+
+struct conf_value {
+ enum conf_type type;
+ union {
+ const char *string;
+ int integer;
+ bool boolean;
+ } data;
+};
+
+union conf_pointer {
+ const char **string;
+ int *integer;
+ bool *boolean;
+};
+
+struct conf_option {
+ struct conf_option *prev, *next;
+
+ const char *name;
+ enum conf_type type;
+ void *validate;
+
+ struct conf_value default_value;
+ bool default_set;
+
+ struct conf_value *value, *new_value;
+ union conf_pointer ptr;
+ bool temporary_modified;
+};
+
+struct conf_section {
+ struct conf_section *prev, *next;
+
+ const char *name;
+ conf_validate_section_fn validate;
+ struct conf_option *option;
+};
+
+struct conf_context {
+ const char *filename;
+ struct conf_section *section;
+ bool define_failed;
+ bool ignore_unknown;
+ bool reload;
+ bool validation_active;
+};
+
+/*
+ * Functions related to conf_value
+ */
+
+static int string_to_string(TALLOC_CTX *mem_ctx,
+ const char *str,
+ const char **str_val)
+{
+ char *t;
+
+ if (str == NULL) {
+ return EINVAL;
+ }
+
+ t = talloc_strdup(mem_ctx, str);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ *str_val = t;
+ return 0;
+}
+
+static int string_to_integer(const char *str, int *int_val)
+{
+ long t;
+ char *endptr = NULL;
+
+ if (str == NULL) {
+ return EINVAL;
+ }
+
+ t = strtol(str, &endptr, 0);
+ if (*str != '\0' || endptr == NULL) {
+ if (t < 0 || t > INT_MAX) {
+ return EINVAL;
+ }
+
+ *int_val = (int)t;
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+static int string_to_boolean(const char *str, bool *bool_val)
+{
+ if (strcasecmp(str, "true") == 0 || strcasecmp(str, "yes") == 0) {
+ *bool_val = true;
+ return 0;
+ }
+
+ if (strcasecmp(str, "false") == 0 || strcasecmp(str, "no") == 0) {
+ *bool_val = false;
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+static int conf_value_from_string(TALLOC_CTX *mem_ctx,
+ const char *str,
+ struct conf_value *value)
+{
+ int ret;
+
+ switch (value->type) {
+ case CONF_STRING:
+ ret = string_to_string(mem_ctx, str, &value->data.string);
+ break;
+
+ case CONF_INTEGER:
+ ret = string_to_integer(str, &value->data.integer);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = string_to_boolean(str, &value->data.boolean);
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ return ret;
+}
+
+static bool conf_value_compare(struct conf_value *old, struct conf_value *new)
+{
+ if (old == NULL || new == NULL) {
+ return false;
+ }
+
+ if (old->type != new->type) {
+ return false;
+ }
+
+ switch (old->type) {
+ case CONF_STRING:
+ if (old->data.string == NULL && new->data.string == NULL) {
+ return true;
+ }
+ if (old->data.string != NULL && new->data.string != NULL) {
+ if (strcmp(old->data.string, new->data.string) == 0) {
+ return true;
+ }
+ }
+ break;
+
+ case CONF_INTEGER:
+ if (old->data.integer == new->data.integer) {
+ return true;
+ }
+ break;
+
+ case CONF_BOOLEAN:
+ if (old->data.boolean == new->data.boolean) {
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+static int conf_value_copy(TALLOC_CTX *mem_ctx,
+ struct conf_value *src,
+ struct conf_value *dst)
+{
+ if (src->type != dst->type) {
+ return EINVAL;
+ }
+
+ switch (src->type) {
+ case CONF_STRING:
+ if (dst->data.string != NULL) {
+ talloc_free(discard_const(dst->data.string));
+ }
+ if (src->data.string == NULL) {
+ dst->data.string = NULL;
+ } else {
+ dst->data.string = talloc_strdup(
+ mem_ctx, src->data.string);
+ if (dst->data.string == NULL) {
+ return ENOMEM;
+ }
+ }
+ break;
+
+ case CONF_INTEGER:
+ dst->data.integer = src->data.integer;
+ break;
+
+ case CONF_BOOLEAN:
+ dst->data.boolean = src->data.boolean;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+static void conf_value_dump(const char *key,
+ struct conf_value *value,
+ bool is_default,
+ bool is_temporary,
+ FILE *fp)
+{
+ if ((value->type == CONF_STRING && value->data.string == NULL) ||
+ is_default) {
+ fprintf(fp, "\t# %s = ", key);
+ } else {
+ fprintf(fp, "\t%s = ", key);
+ }
+
+ switch (value->type) {
+ case CONF_STRING:
+ if (value->data.string != NULL) {
+ fprintf(fp, "%s", value->data.string);
+ }
+ break;
+
+ case CONF_INTEGER:
+ fprintf(fp, "%d", value->data.integer);
+ break;
+
+ case CONF_BOOLEAN:
+ fprintf(fp, "%s", (value->data.boolean ? "true" : "false"));
+ break;
+ }
+
+ if (is_temporary) {
+ fprintf(fp, " # temporary");
+ }
+
+ fprintf(fp, "\n");
+}
+
+/*
+ * Functions related to conf_option
+ */
+
+static struct conf_option *conf_option_find(struct conf_section *s,
+ const char *key)
+{
+ struct conf_option *opt;
+
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ if (strcmp(opt->name, key) == 0) {
+ return opt;
+ }
+ }
+
+ return NULL;
+}
+
+static void conf_option_set_ptr_value(struct conf_option *opt)
+{
+ switch (opt->type) {
+ case CONF_STRING:
+ if (opt->ptr.string != NULL) {
+ *(opt->ptr.string) = opt->value->data.string;
+ }
+ break;
+
+ case CONF_INTEGER:
+ if (opt->ptr.integer != NULL) {
+ *(opt->ptr.integer) = opt->value->data.integer;
+ }
+ break;
+
+ case CONF_BOOLEAN:
+ if (opt->ptr.boolean != NULL) {
+ *(opt->ptr.boolean) = opt->value->data.boolean;
+ }
+ break;
+ }
+}
+
+static void conf_option_default(struct conf_option *opt);
+
+static int conf_option_add(struct conf_section *s,
+ const char *key,
+ enum conf_type type,
+ void *validate,
+ struct conf_option **popt)
+{
+ struct conf_option *opt;
+
+ opt = conf_option_find(s, key);
+ if (opt != NULL) {
+ D_ERR("conf: option \"%s\" already exists\n", key);
+ return EEXIST;
+ }
+
+ opt = talloc_zero(s, struct conf_option);
+ if (opt == NULL) {
+ return ENOMEM;
+ }
+
+ opt->name = talloc_strdup(opt, key);
+ if (opt->name == NULL) {
+ talloc_free(opt);
+ return ENOMEM;
+ }
+
+ opt->type = type;
+ opt->validate = validate;
+
+ DLIST_ADD_END(s->option, opt);
+
+ if (popt != NULL) {
+ *popt = opt;
+ }
+
+ return 0;
+}
+
+static int conf_option_set_default(struct conf_option *opt,
+ struct conf_value *default_value)
+{
+ int ret;
+
+ opt->default_value.type = opt->type;
+
+ ret = conf_value_copy(opt, default_value, &opt->default_value);
+ if (ret != 0) {
+ return ret;
+ }
+
+ opt->default_set = true;
+ opt->temporary_modified = false;
+
+ return 0;
+}
+
+static void conf_option_set_ptr(struct conf_option *opt,
+ union conf_pointer *ptr)
+{
+ opt->ptr = *ptr;
+}
+
+static bool conf_option_validate_string(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_string_option_fn validate =
+ (conf_validate_string_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.string,
+ value->data.string,
+ mode);
+}
+
+static bool conf_option_validate_integer(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_integer_option_fn validate =
+ (conf_validate_integer_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.integer,
+ value->data.integer,
+ mode);
+}
+
+static bool conf_option_validate_boolean(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ conf_validate_boolean_option_fn validate =
+ (conf_validate_boolean_option_fn)opt->validate;
+
+ return validate(opt->name,
+ opt->value->data.boolean,
+ value->data.boolean,
+ mode);
+}
+
+static bool conf_option_validate(struct conf_option *opt,
+ struct conf_value *value,
+ enum conf_update_mode mode)
+{
+ int ret;
+
+ if (opt->validate == NULL) {
+ return true;
+ }
+
+ switch (opt->type) {
+ case CONF_STRING:
+ ret = conf_option_validate_string(opt, value, mode);
+ break;
+
+ case CONF_INTEGER:
+ ret = conf_option_validate_integer(opt, value, mode);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = conf_option_validate_boolean(opt, value, mode);
+ break;
+
+ default:
+ ret = EINVAL;
+ }
+
+ return ret;
+}
+
+static bool conf_option_same_value(struct conf_option *opt,
+ struct conf_value *new_value)
+{
+ return conf_value_compare(opt->value, new_value);
+}
+
+static int conf_option_new_value(struct conf_option *opt,
+ struct conf_value *new_value,
+ enum conf_update_mode mode)
+{
+ int ret;
+ bool ok;
+
+ if (opt->new_value != &opt->default_value) {
+ TALLOC_FREE(opt->new_value);
+ }
+
+ if (new_value == &opt->default_value) {
+ /*
+ * This happens only during load/reload. Set the value to
+ * default value, so if the config option is dropped from
+ * config file, then it gets reset to default.
+ */
+ opt->new_value = &opt->default_value;
+ } else {
+ ok = conf_option_validate(opt, new_value, mode);
+ if (!ok) {
+ D_ERR("conf: validation for option \"%s\" failed\n",
+ opt->name);
+ return EINVAL;
+ }
+
+ opt->new_value = talloc_zero(opt, struct conf_value);
+ if (opt->new_value == NULL) {
+ return ENOMEM;
+ }
+
+ opt->new_value->type = opt->value->type;
+ ret = conf_value_copy(opt, new_value, opt->new_value);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ conf_option_set_ptr_value(opt);
+
+ if (new_value != &opt->default_value) {
+ if (mode == CONF_MODE_API) {
+ opt->temporary_modified = true;
+ } else {
+ opt->temporary_modified = false;
+ }
+ }
+
+ return 0;
+}
+
+static int conf_option_new_default_value(struct conf_option *opt,
+ enum conf_update_mode mode)
+{
+ return conf_option_new_value(opt, &opt->default_value, mode);
+}
+
+static void conf_option_default(struct conf_option *opt)
+{
+ if (! opt->default_set) {
+ return;
+ }
+
+ if (opt->value != &opt->default_value) {
+ TALLOC_FREE(opt->value);
+ }
+
+ opt->value = &opt->default_value;
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_reset(struct conf_option *opt)
+{
+ if (opt->new_value != &opt->default_value) {
+ TALLOC_FREE(opt->new_value);
+ }
+
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_update(struct conf_option *opt)
+{
+ if (opt->new_value == NULL) {
+ return;
+ }
+
+ if (opt->value != &opt->default_value) {
+ TALLOC_FREE(opt->value);
+ }
+
+ opt->value = opt->new_value;
+ opt->new_value = NULL;
+
+ conf_option_set_ptr_value(opt);
+}
+
+static void conf_option_reset_temporary(struct conf_option *opt)
+{
+ opt->temporary_modified = false;
+}
+
+static bool conf_option_is_default(struct conf_option *opt)
+{
+ return (opt->value == &opt->default_value);
+}
+
+static void conf_option_dump(struct conf_option *opt, FILE *fp)
+{
+ bool is_default;
+
+ is_default = conf_option_is_default(opt);
+
+ conf_value_dump(opt->name,
+ opt->value,
+ is_default,
+ opt->temporary_modified,
+ fp);
+}
+
+/*
+ * Functions related to conf_section
+ */
+
+static struct conf_section *conf_section_find(struct conf_context *conf,
+ const char *section)
+{
+ struct conf_section *s;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ if (strcasecmp(s->name, section) == 0) {
+ return s;
+ }
+ }
+
+ return NULL;
+}
+
+static int conf_section_add(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate)
+{
+ struct conf_section *s;
+
+ s = conf_section_find(conf, section);
+ if (s != NULL) {
+ return EEXIST;
+ }
+
+ s = talloc_zero(conf, struct conf_section);
+ if (s == NULL) {
+ return ENOMEM;
+ }
+
+ s->name = talloc_strdup(s, section);
+ if (s->name == NULL) {
+ talloc_free(s);
+ return ENOMEM;
+ }
+
+ s->validate = validate;
+
+ DLIST_ADD_END(conf->section, s);
+ return 0;
+}
+
+static bool conf_section_validate(struct conf_context *conf,
+ struct conf_section *s,
+ enum conf_update_mode mode)
+{
+ bool ok;
+
+ if (s->validate == NULL) {
+ return true;
+ }
+
+ ok = s->validate(conf, s->name, mode);
+ if (!ok) {
+ D_ERR("conf: validation for section [%s] failed\n", s->name);
+ }
+
+ return ok;
+}
+
+static void conf_section_dump(struct conf_section *s, FILE *fp)
+{
+ fprintf(fp, "[%s]\n", s->name);
+}
+
+/*
+ * Functions related to conf_context
+ */
+
+static void conf_all_default(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_default(opt);
+ }
+ }
+}
+
+static int conf_all_temporary_default(struct conf_context *conf,
+ enum conf_update_mode mode)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ ret = conf_option_new_default_value(opt, mode);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void conf_all_reset(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_reset(opt);
+ }
+ }
+}
+
+static void conf_all_update(struct conf_context *conf)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_update(opt);
+ conf_option_reset_temporary(opt);
+ }
+ }
+}
+
+/*
+ * API functions
+ */
+
+int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result)
+{
+ struct conf_context *conf;
+
+ conf = talloc_zero(mem_ctx, struct conf_context);
+ if (conf == NULL) {
+ return ENOMEM;
+ }
+
+ conf->define_failed = false;
+
+ *result = conf;
+ return 0;
+}
+
+void conf_define_section(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate)
+{
+ int ret;
+
+ if (conf->define_failed) {
+ return;
+ }
+
+ if (section == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ret = conf_section_add(conf, section, validate);
+ if (ret != 0) {
+ conf->define_failed = true;
+ return;
+ }
+}
+
+static struct conf_option *conf_define(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type type,
+ conf_validate_string_option_fn validate)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ D_ERR("conf: unknown section [%s]\n", section);
+ return NULL;
+ }
+
+ if (key == NULL) {
+ D_ERR("conf: option name null in section [%s]\n", section);
+ return NULL;
+ }
+
+ ret = conf_option_add(s, key, type, validate, &opt);
+ if (ret != 0) {
+ return NULL;
+ }
+
+ return opt;
+}
+
+static void conf_define_post(struct conf_context *conf,
+ struct conf_option *opt,
+ struct conf_value *default_value)
+{
+ int ret;
+
+ ret = conf_option_set_default(opt, default_value);
+ if (ret != 0) {
+ conf->define_failed = true;
+ return;
+ }
+
+ conf_option_default(opt);
+}
+
+void conf_define_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *default_str_val,
+ conf_validate_string_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_STRING, validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_STRING;
+ default_value.data.string = default_str_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+void conf_define_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const int default_int_val,
+ conf_validate_integer_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_INTEGER, (void *)validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_INTEGER;
+ default_value.data.integer = default_int_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+
+void conf_define_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const bool default_bool_val,
+ conf_validate_boolean_option_fn validate)
+{
+ struct conf_option *opt;
+ struct conf_value default_value;
+
+ if (! conf_valid(conf)) {
+ return;
+ }
+
+ opt = conf_define(conf, section, key, CONF_BOOLEAN, (void *)validate);
+ if (opt == NULL) {
+ conf->define_failed = true;
+ return;
+ }
+
+ default_value.type = CONF_BOOLEAN;
+ default_value.data.boolean = default_bool_val;
+
+ conf_define_post(conf, opt, &default_value);
+}
+
+static struct conf_option *_conf_option(struct conf_context *conf,
+ const char *section,
+ const char *key)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return NULL;
+ }
+
+ opt = conf_option_find(s, key);
+ return opt;
+}
+
+void conf_assign_string_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_STRING) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.string = str_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+void conf_assign_integer_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_INTEGER) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.integer = int_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+void conf_assign_boolean_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_ptr)
+{
+ struct conf_option *opt;
+ union conf_pointer ptr;
+
+ opt = _conf_option(conf, section, key);
+ if (opt == NULL) {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key);
+ conf->define_failed = true;
+ return;
+ }
+
+ if (opt->type != CONF_BOOLEAN) {
+ conf->define_failed = true;
+ return;
+ }
+
+ ptr.boolean = bool_ptr;
+ conf_option_set_ptr(opt, &ptr);
+ conf_option_set_ptr_value(opt);
+}
+
+bool conf_query(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type *type)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ if (! conf_valid(conf)) {
+ return false;
+ }
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return false;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return false;
+ }
+
+ if (type != NULL) {
+ *type = opt->type;
+ }
+ return true;
+}
+
+bool conf_valid(struct conf_context *conf)
+{
+ if (conf->define_failed) {
+ return false;
+ }
+
+ return true;
+}
+
+void conf_set_defaults(struct conf_context *conf)
+{
+ conf_all_default(conf);
+}
+
+struct conf_load_state {
+ struct conf_context *conf;
+ struct conf_section *s;
+ enum conf_update_mode mode;
+ int err;
+};
+
+static bool conf_load_section(const char *section, void *private_data);
+static bool conf_load_option(const char *name,
+ const char *value_str,
+ void *private_data);
+
+static int conf_load_internal(struct conf_context *conf)
+{
+ struct conf_load_state state;
+ FILE *fp;
+ int ret;
+ bool ok;
+
+ state = (struct conf_load_state) {
+ .conf = conf,
+ .mode = (conf->reload ? CONF_MODE_RELOAD : CONF_MODE_LOAD),
+ };
+
+ ret = conf_all_temporary_default(conf, state.mode);
+ if (ret != 0) {
+ return ret;
+ }
+
+ fp = fopen(conf->filename, "r");
+ if (fp == NULL) {
+ return errno;
+ }
+
+ ok = tini_parse(fp,
+ false,
+ conf_load_section,
+ conf_load_option,
+ &state);
+ fclose(fp);
+ if (!ok) {
+ goto fail;
+ }
+
+ /* Process the last section */
+ if (state.s != NULL) {
+ ok = conf_section_validate(conf, state.s, state.mode);
+ if (!ok) {
+ state.err = EINVAL;
+ goto fail;
+ }
+ }
+
+ if (state.err != 0) {
+ goto fail;
+ }
+
+ conf_all_update(conf);
+ return 0;
+
+fail:
+ conf_all_reset(conf);
+ return state.err;
+}
+
+static bool conf_load_section(const char *section, void *private_data)
+{
+ struct conf_load_state *state =
+ (struct conf_load_state *)private_data;
+ bool ok;
+
+ if (state->s != NULL) {
+ ok = conf_section_validate(state->conf, state->s, state->mode);
+ if (!ok) {
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ state->s = conf_section_find(state->conf, section);
+ if (state->s == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: ignoring unknown section [%s]\n",
+ section);
+ } else {
+ D_ERR("conf: unknown section [%s]\n", section);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ return true;
+}
+
+static bool conf_load_option(const char *name,
+ const char *value_str,
+ void *private_data)
+{
+ struct conf_load_state *state =
+ (struct conf_load_state *)private_data;
+ struct conf_option *opt;
+ TALLOC_CTX *tmp_ctx;
+ struct conf_value value;
+ int ret;
+ bool ok;
+
+ if (state->s == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: unknown section for option \"%s\"\n",
+ name);
+ return true;
+ } else {
+ D_ERR("conf: unknown section for option \"%s\"\n",
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ opt = conf_option_find(state->s, name);
+ if (opt == NULL) {
+ if (state->conf->ignore_unknown) {
+ D_DEBUG("conf: unknown option [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ return true;
+ } else {
+ D_ERR("conf: unknown option [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+ }
+
+ if (strlen(value_str) == 0) {
+ D_ERR("conf: empty value [%s] -> \"%s\"\n",
+ state->s->name,
+ name);
+ state->err = EINVAL;
+ return true;
+ }
+
+ tmp_ctx = talloc_new(state->conf);
+ if (tmp_ctx == NULL) {
+ state->err = ENOMEM;
+ return false;
+ }
+
+ value.type = opt->type;
+ ret = conf_value_from_string(tmp_ctx, value_str, &value);
+ if (ret != 0) {
+ D_ERR("conf: invalid value [%s] -> \"%s\" = \"%s\"\n",
+ state->s->name,
+ name,
+ value_str);
+ talloc_free(tmp_ctx);
+ state->err = ret;
+ return true;
+ }
+
+ ok = conf_option_same_value(opt, &value);
+ if (ok) {
+ goto done;
+ }
+
+ ret = conf_option_new_value(opt, &value, state->mode);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ state->err = ret;
+ return true;
+ }
+
+done:
+ talloc_free(tmp_ctx);
+ return true;
+
+}
+
+int conf_load(struct conf_context *conf,
+ const char *filename,
+ bool ignore_unknown)
+{
+ conf->filename = talloc_strdup(conf, filename);
+ if (conf->filename == NULL) {
+ return ENOMEM;
+ }
+
+ conf->ignore_unknown = ignore_unknown;
+
+ D_NOTICE("Reading config file %s\n", filename);
+
+ return conf_load_internal(conf);
+}
+
+int conf_reload(struct conf_context *conf)
+{
+ int ret;
+
+ if (conf->filename == NULL) {
+ return EPERM;
+ }
+
+ D_NOTICE("Re-reading config file %s\n", conf->filename);
+
+ conf->reload = true;
+ ret = conf_load_internal(conf);
+ conf->reload = false;
+
+ return ret;
+}
+
+static int conf_set(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ struct conf_value *value)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+ int ret;
+ bool ok;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return EINVAL;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return EINVAL;
+ }
+
+ if (opt->type != value->type) {
+ return EINVAL;
+ }
+
+ ok = conf_option_same_value(opt, value);
+ if (ok) {
+ return 0;
+ }
+
+ ret = conf_option_new_value(opt, value, CONF_MODE_API);
+ if (ret != 0) {
+ conf_option_reset(opt);
+ return ret;
+ }
+
+ ok = conf_section_validate(conf, s, CONF_MODE_API);
+ if (!ok) {
+ conf_option_reset(opt);
+ return EINVAL;
+ }
+
+ conf_option_update(opt);
+ return 0;
+}
+
+int conf_set_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *str_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_STRING;
+ value.data.string = str_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+int conf_set_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int int_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_INTEGER;
+ value.data.integer = int_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+int conf_set_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool bool_val)
+{
+ struct conf_value value;
+
+ value.type = CONF_BOOLEAN;
+ value.data.boolean = bool_val;
+
+ return conf_set(conf, section, key, &value);
+}
+
+static int conf_get(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type type,
+ const struct conf_value **value,
+ bool *is_default)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ s = conf_section_find(conf, section);
+ if (s == NULL) {
+ return EINVAL;
+ }
+
+ opt = conf_option_find(s, key);
+ if (opt == NULL) {
+ return EINVAL;
+ }
+
+ if (opt->type != type) {
+ return EINVAL;
+ }
+
+ *value = opt->value;
+ if (is_default != NULL) {
+ *is_default = conf_option_is_default(opt);
+ }
+
+ return 0;
+}
+
+int conf_get_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_STRING, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *str_val = value->data.string;
+ return 0;
+}
+
+int conf_get_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_INTEGER, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *int_val = value->data.integer;
+ return 0;
+}
+
+int conf_get_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_val,
+ bool *is_default)
+{
+ const struct conf_value *value;
+ int ret;
+
+ ret = conf_get(conf, section, key, CONF_BOOLEAN, &value, is_default);
+ if (ret != 0) {
+ return ret;
+ }
+
+ *bool_val = value->data.boolean;
+ return 0;
+}
+
+void conf_dump(struct conf_context *conf, FILE *fp)
+{
+ struct conf_section *s;
+ struct conf_option *opt;
+
+ for (s = conf->section; s != NULL; s = s->next) {
+ conf_section_dump(s, fp);
+ for (opt = s->option; opt != NULL; opt = opt->next) {
+ conf_option_dump(opt, fp);
+ }
+ }
+}
diff --git a/ctdb/common/conf.h b/ctdb/common/conf.h
new file mode 100644
index 0000000..4dbf9c3
--- /dev/null
+++ b/ctdb/common/conf.h
@@ -0,0 +1,473 @@
+/*
+ Configuration file handling on top of tini
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CONF_H__
+#define __CTDB_CONF_H__
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <talloc.h>
+
+/**
+ * @file conf.h
+ *
+ * @brief Configuration file handling with sections and key-value pairs
+ *
+ * CTDB settings can be written in a configuration file ctdb.conf (similar to
+ * samba's smb.conf). Various daemons and tools will consult the configuration
+ * file for runtime settings.
+ *
+ * The configuration will be organized in sections depending on various
+ * components. Each section will have various configuration options in the form
+ * of key-value pairs.
+ *
+ * [section1]
+ * key1 = value1
+ * ...
+ *
+ * [section2]
+ * key2 = value2
+ * ...
+ *
+ * ...
+ *
+ */
+
+/**
+ * @brief Abstract data structure holding the configuration options
+ */
+struct conf_context;
+
+/**
+ * @brief configuration option update mode
+ *
+ * When a value of configuration option is changed, update mode is set
+ * appropriately.
+ *
+ * CONF_MODE_API - value modified using set functions
+ * CONF_MODE_LOAD - value modified via conf_load
+ * CONF_MODE_RELOAD - value modified via conf_reload
+ */
+enum conf_update_mode {
+ CONF_MODE_API,
+ CONF_MODE_LOAD,
+ CONF_MODE_RELOAD,
+};
+
+/**
+ * @brief configuration option type
+ */
+enum conf_type {
+ CONF_STRING,
+ CONF_INTEGER,
+ CONF_BOOLEAN,
+};
+
+/**
+ * @brief Configuration section validation function
+ *
+ * Check if all the configuration options are consistent with each-other
+ */
+typedef bool (*conf_validate_section_fn)(struct conf_context *conf,
+ const char *section,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for string
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_string_option_fn)(const char *key,
+ const char *old_value,
+ const char *new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for integer
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_integer_option_fn)(const char *key,
+ int old_value,
+ int new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Configuration option validation function for boolean
+ *
+ * Check if a configuration option value is valid
+ */
+typedef bool (*conf_validate_boolean_option_fn)(const char *key,
+ bool old_value,
+ bool new_value,
+ enum conf_update_mode mode);
+
+/**
+ * @brief Initialize configuration option database
+ *
+ * This return a new configuration options context. Freeing this context will
+ * free up all the memory associated with the configuration options.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] result The new configuration options context
+ * @return 0 on success, errno on failure
+ */
+int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result);
+
+/**
+ * @brief Define a section for organizing configuration options
+ *
+ * This functions creates a section to organize configuration option. The
+ * section names are case-insensitive and are always stored in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] validate The validation function for configuration options
+ */
+void conf_define_section(struct conf_context *conf,
+ const char *section,
+ conf_validate_section_fn validate);
+
+/**
+ * @brief Define a configuration option which has a string value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *default_value,
+ conf_validate_string_option_fn validate);
+
+/**
+ * @brief Define a configuration option which has an integer value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const int default_value,
+ conf_validate_integer_option_fn validate);
+
+/**
+ * @brief Define a configuration option which has an boolean value
+ *
+ * This functions adds a new configuration option organized under a given
+ * section. Configuration options are case-insensitive and are always stored
+ * in lower case.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] default_value The default value for the configuration option
+ * @param[in] validate The validation function for the configuration option
+ */
+void conf_define_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const bool default_value,
+ conf_validate_boolean_option_fn validate);
+
+/**
+ * @brief Assign user-accessible pointer for string option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ */
+void conf_assign_string_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **ptr);
+
+/**
+ * @brief Assign user-accessible pointer for integer option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ */
+void conf_assign_integer_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *ptr);
+
+/**
+ * @brief Assign user-accessible pointer for boolean option
+ *
+ * This pointer can be used for accessing the value of configuration option
+ * directly without requiring a function call.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[in] ptr User-accessible pointer to the value
+ * @return true on success, false on failure
+ */
+void conf_assign_boolean_pointer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *ptr);
+
+/**
+ * @brief Query a configuration option
+ *
+ * This function checks if a configuration option is defined or not.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of the section
+ * @param[in] key The name of the configuration option
+ * @param[out] type The type of the configuration option
+ * @return true on success, false if section/option is not defined
+ */
+bool conf_query(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ enum conf_type *type);
+
+/**
+ * @brief Check if the defined configuration options are valid
+ *
+ * This function must be called after creating configuration options
+ * to confirm that all the option definitions are valid.
+ *
+ * @param[in] conf The configuration options context
+ * @return true on success, false on failure
+ */
+bool conf_valid(struct conf_context *conf);
+
+/**
+ * @brief Set the default values for all configuration options
+ *
+ * This function resets all the configuration options to their default values.
+ *
+ * @param[in] conf The connfiguration options context
+ */
+void conf_set_defaults(struct conf_context *conf);
+
+/**
+ * @brief Load the values for configuration option values from a file
+ *
+ * This function will update the values of the configuration options from those
+ * specified in a file. This function will fail in case it encounters an
+ * undefined option. Any sections which are not defined, will be ignored.
+ *
+ * This function will call validation function (if specified) before updating
+ * the value of a configuration option. After updating all the values for a
+ * section, the validation for section (if specified) will be called. If any
+ * of the validation functions return error, then all the configuration
+ * options will be reset to their previous values.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] filename The configuration file
+ * @param[in] skip_unknown Whether unknown config options should be ignored
+ * @return 0 on success, errno on failure
+ */
+int conf_load(struct conf_context *conf,
+ const char *filename,
+ bool ignore_unknown);
+
+/**
+ * @brief Reload the values for configuration options
+ *
+ * This function will re-load the values of the configuration options. This
+ * function can be called only after successful call to conf_load().
+ *
+ * @see conf_load
+ *
+ * @param[in] conf The configuration options context
+ * @return 0 on success, errno on failure.
+ */
+int conf_reload(struct conf_context *conf);
+
+/**
+ * @brief Set the string value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] str_val The string value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char *str_val);
+
+/**
+ * @brief Set the integer value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] int_val The integer value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int int_val);
+
+/**
+ * @brief Set the boolean value of a configuration option
+ *
+ * This function can be used to update the value of a configuration option.
+ * This will call the validation function for that option (if defined) and
+ * the section validation function (if defined).
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option should not be changed via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[in] bool_val The boolean value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_set_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool bool_val);
+
+/**
+ * @brief Get the string value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] str_val The string value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_string(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ const char **str_val,
+ bool *is_default);
+
+/**
+ * @brief Get the integer value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] int_val The integer value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_integer(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ int *int_val,
+ bool *is_default);
+
+/**
+ * @brief Get the boolean value of a configuration option
+ *
+ * This function can be used to fetch the current value of a configuration
+ * option.
+ *
+ * If a user-defined storage pointer is provided, then the value of a
+ * configuration option can be accessed directly via that pointer.
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] section The name of a section
+ * @param[in] key The name of a configuration option
+ * @param[out] bool_val The boolean value of the configuration option
+ * @param[out] is_default True if the value is default value
+ * @return 0 on success, errno in case of failure
+ */
+int conf_get_boolean(struct conf_context *conf,
+ const char *section,
+ const char *key,
+ bool *bool_val,
+ bool *is_default);
+
+/**
+ * @brief Dump the configuration in a file
+ *
+ * All the configuration options are dumped with their current values.
+ * If an option has a default value, then it is commented.
+ *
+ * Here is a sample output:
+ *
+ * [section1]
+ * key1 = value1
+ * key2 = value2
+ * # key3 = default_value3
+ * [section2]
+ * key4 = value4
+ *
+ * @param[in] conf The configuration options context
+ * @param[in] fp File pointer
+ */
+void conf_dump(struct conf_context *conf, FILE *fp);
+
+#endif /* __CTDB_CONF_H__ */
diff --git a/ctdb/common/conf_tool.c b/ctdb/common/conf_tool.c
new file mode 100644
index 0000000..2d0543d
--- /dev/null
+++ b/ctdb/common/conf_tool.c
@@ -0,0 +1,321 @@
+/*
+ Config options tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/cmdline.h"
+#include "common/conf.h"
+#include "common/path.h"
+
+#include "common/logging_conf.h"
+#include "cluster/cluster_conf.h"
+#include "database/database_conf.h"
+#include "event/event_conf.h"
+#include "failover/failover_conf.h"
+#include "server/legacy_conf.h"
+
+#include "common/conf_tool.h"
+
+struct conf_tool_context {
+ struct cmdline_context *cmdline;
+ const char *conf_file;
+ struct conf_context *conf;
+};
+
+static int conf_tool_dump(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ int ret;
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "dump");
+ return EINVAL;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, true);
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ conf_dump(ctx->conf, stdout);
+ return 0;
+}
+
+static int conf_tool_get(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ const char *section, *option;
+ enum conf_type type;
+ int ret;
+ bool ok;
+ const char *s_val = NULL;
+ int i_val;
+ bool b_val;
+
+ if (argc != 2) {
+ cmdline_usage(ctx->cmdline, "get");
+ return EINVAL;
+ }
+
+ section = argv[0];
+ option = argv[1];
+
+ ok = conf_query(ctx->conf, section, option, &type);
+ if (!ok) {
+ D_ERR("Configuration option [%s] -> \"%s\" not defined\n",
+ section, option);
+ return ENOENT;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, true);
+ if (ret != 0 && ret != ENOENT) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ switch (type) {
+ case CONF_STRING:
+ ret = conf_get_string(ctx->conf,
+ section,
+ option,
+ &s_val,
+ NULL);
+ break;
+
+ case CONF_INTEGER:
+ ret = conf_get_integer(ctx->conf,
+ section,
+ option,
+ &i_val,
+ NULL);
+ break;
+
+ case CONF_BOOLEAN:
+ ret = conf_get_boolean(ctx->conf,
+ section,
+ option,
+ &b_val,
+ NULL);
+ break;
+
+ default:
+ D_ERR("Unknown configuration option type\n");
+ return EINVAL;
+ }
+
+ if (ret != 0) {
+ D_ERR("Failed to get configuration option value\n");
+ return ret;
+ }
+
+ switch (type) {
+ case CONF_STRING:
+ printf("%s\n", s_val == NULL ? "" : s_val);
+ break;
+
+ case CONF_INTEGER:
+ printf("%d\n", i_val);
+ break;
+
+ case CONF_BOOLEAN:
+ printf("%s\n", b_val ? "true" : "false");
+ break;
+ }
+
+ return 0;
+}
+
+static int conf_tool_validate(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct conf_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct conf_tool_context);
+ int ret;
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "validate");
+ return EINVAL;
+ }
+
+ ret = conf_load(ctx->conf, ctx->conf_file, false);
+ if (ret != 0) {
+ D_ERR("Failed to load config file %s\n", ctx->conf_file);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct cmdline_command conf_commands[] = {
+ { "dump", conf_tool_dump,
+ "Dump configuration", NULL },
+ { "get", conf_tool_get,
+ "Get a config value", "<section> <key>" },
+ { "validate", conf_tool_validate,
+ "Validate configuration file", NULL },
+ CMDLINE_TABLEEND
+};
+
+int conf_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct conf_tool_context **result)
+{
+ struct conf_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct conf_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(ctx,
+ prog,
+ options,
+ NULL,
+ conf_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int conf_tool_run(struct conf_tool_context *ctx, int *result)
+{
+ int ret;
+
+ ctx->conf_file = path_config(ctx);
+ if (ctx->conf_file == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = conf_init(ctx, &ctx->conf);
+ if (ret != 0) {
+ D_ERR("Failed to initialize config\n");
+ return ret;
+ }
+
+ /* Call functions to initialize config sections/variables */
+ logging_conf_init(ctx->conf, NULL);
+ cluster_conf_init(ctx->conf);
+ database_conf_init(ctx->conf);
+ event_conf_init(ctx->conf);
+ failover_conf_init(ctx->conf);
+ legacy_conf_init(ctx->conf);
+
+ if (! conf_valid(ctx->conf)) {
+ D_ERR("Failed to define configuration options\n");
+ return EINVAL;
+ }
+
+ ret = cmdline_run(ctx->cmdline, ctx, result);
+ return ret;
+}
+
+#ifdef CTDB_CONF_TOOL
+
+static struct {
+ const char *debug;
+} conf_data = {
+ .debug = "ERROR",
+};
+
+struct poptOption conf_options[] = {
+ POPT_AUTOHELP
+ { "debug", 'd', POPT_ARG_STRING, &conf_data.debug, 0,
+ "debug level", "ERROR|WARNING|NOTICE|INFO|DEBUG" },
+ POPT_TABLEEND
+};
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct conf_tool_context *ctx;
+ int ret, result;
+ int level;
+ bool ok;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = conf_tool_init(mem_ctx,
+ "ctdb-config",
+ conf_options,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-config", DEBUG_STDERR);
+ ok = debug_level_parse(conf_data.debug, &level);
+ if (!ok) {
+ level = DEBUG_ERR;
+ }
+ debuglevel_set(level);
+
+ ret = conf_tool_run(ctx, &result);
+ if (ret != 0) {
+ result = 1;
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_CONF_TOOL */
diff --git a/ctdb/common/conf_tool.h b/ctdb/common/conf_tool.h
new file mode 100644
index 0000000..c77419f
--- /dev/null
+++ b/ctdb/common/conf_tool.h
@@ -0,0 +1,39 @@
+/*
+ Config options tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_CONF_TOOL_H__
+#define __CTDB_CONF_TOOL_H__
+
+#include <stdbool.h>
+#include <popt.h>
+#include <talloc.h>
+
+struct conf_tool_context;
+
+int conf_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct conf_tool_context **result);
+
+int conf_tool_run(struct conf_tool_context *ctx, int *result);
+
+#endif /* __CTDB_CONF_TOOL_H__ */
diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
new file mode 100644
index 0000000..bf8bc73
--- /dev/null
+++ b/ctdb/common/ctdb_io.c
@@ -0,0 +1,498 @@
+/*
+ ctdb database library
+ Utility functions to read/write blobs of data from a file descriptor
+ and handle the case where we might need multiple read/writes to get all the
+ data.
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+#include "lib/util/sys_rw.h"
+
+#include "ctdb_private.h"
+#include "ctdb_client.h"
+
+#include "common/logging.h"
+#include "common/common.h"
+
+/* structures for packet queueing - see common/ctdb_io.c */
+struct ctdb_buffer {
+ uint8_t *data;
+ uint32_t length;
+ uint32_t size;
+ uint32_t offset;
+};
+
+struct ctdb_queue_pkt {
+ struct ctdb_queue_pkt *next, *prev;
+ uint8_t *data;
+ uint32_t length;
+ uint32_t full_length;
+ uint8_t buf[];
+};
+
+struct ctdb_queue {
+ struct ctdb_context *ctdb;
+ struct tevent_immediate *im;
+ struct ctdb_buffer buffer; /* input buffer */
+ struct ctdb_queue_pkt *out_queue, *out_queue_tail;
+ uint32_t out_queue_length;
+ struct tevent_fd *fde;
+ int fd;
+ size_t alignment;
+ void *private_data;
+ ctdb_queue_cb_fn_t callback;
+ TALLOC_CTX *data_pool;
+ const char *name;
+ uint32_t buffer_size;
+};
+
+
+
+uint32_t ctdb_queue_length(struct ctdb_queue *queue)
+{
+ return queue->out_queue_length;
+}
+
+static void queue_process(struct ctdb_queue *queue);
+
+static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+
+ queue_process(queue);
+}
+
+/*
+ * This function is used to process data in queue buffer.
+ *
+ * Queue callback function can end up freeing the queue, there should not be a
+ * loop processing packets from queue buffer. Instead set up a timed event for
+ * immediate run to process remaining packets from buffer.
+ */
+static void queue_process(struct ctdb_queue *queue)
+{
+ uint32_t pkt_size;
+ uint8_t *data = NULL;
+
+ if (queue->buffer.length < sizeof(pkt_size)) {
+ return;
+ }
+
+ /* Did we at least read the size into the buffer */
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
+ if (pkt_size == 0) {
+ DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n"));
+ goto failed;
+ }
+
+ /* the buffer doesn't contain the full packet, return to get the rest */
+ if (queue->buffer.length < pkt_size) {
+ return;
+ }
+
+ /* Extract complete packet */
+ data = talloc_memdup(queue->data_pool,
+ queue->buffer.data + queue->buffer.offset,
+ pkt_size);
+
+ if (data == NULL) {
+ D_ERR("read error alloc failed for %u\n", pkt_size);
+ return;
+ }
+
+ queue->buffer.offset += pkt_size;
+ queue->buffer.length -= pkt_size;
+
+ if (queue->buffer.offset < pkt_size ||
+ queue->buffer.offset > queue->buffer.size) {
+ D_ERR("buffer offset overflow\n");
+ TALLOC_FREE(queue->buffer.data);
+ memset(&queue->buffer, 0, sizeof(queue->buffer));
+ goto failed;
+ }
+
+ if (queue->buffer.length > 0) {
+ /* There is more data to be processed, schedule an event */
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_process_event, queue);
+ } else {
+ if (queue->buffer.size > queue->buffer_size) {
+ TALLOC_FREE(queue->buffer.data);
+ queue->buffer.size = 0;
+ }
+ queue->buffer.offset = 0;
+ }
+
+ /* It is the responsibility of the callback to free 'data' */
+ queue->callback(data, pkt_size, queue->private_data);
+ return;
+
+failed:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+/*
+ called when an incoming connection is readable
+ This function MUST be safe for reentry via the queue callback!
+*/
+static void queue_io_read(struct ctdb_queue *queue)
+{
+ int num_ready = 0;
+ uint32_t pkt_size = 0;
+ uint32_t start_offset;
+ ssize_t nread;
+ uint8_t *data;
+
+ /* check how much data is available on the socket for immediately
+ guaranteed nonblocking access.
+ as long as we are careful never to try to read more than this
+ we know all reads will be successful and will neither block
+ nor fail with a "data not available right now" error
+ */
+ if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
+ return;
+ }
+ if (num_ready == 0) {
+ /* the descriptor has been closed */
+ goto failed;
+ }
+
+ if (queue->buffer.data == NULL) {
+ /* starting fresh, allocate buf to read data */
+ queue->buffer.data = talloc_size(queue, queue->buffer_size);
+ if (queue->buffer.data == NULL) {
+ DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready));
+ goto failed;
+ }
+ queue->buffer.size = queue->buffer_size;
+ goto data_read;
+ }
+
+ if (sizeof(pkt_size) > queue->buffer.length) {
+ /* data read is not sufficient to gather message size */
+ goto buffer_shift;
+ }
+
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
+ if (pkt_size > queue->buffer.size) {
+ data = talloc_realloc_size(queue,
+ queue->buffer.data,
+ pkt_size);
+ if (data == NULL) {
+ DBG_ERR("read error realloc failed for %u\n", pkt_size);
+ goto failed;
+ }
+ queue->buffer.data = data;
+ queue->buffer.size = pkt_size;
+ /* fall through here as we might need to move the data as well */
+ }
+
+buffer_shift:
+ if (sizeof(pkt_size) > queue->buffer.size - queue->buffer.offset ||
+ pkt_size > queue->buffer.size - queue->buffer.offset) {
+ /* Either the offset has progressed too far to host at least
+ * the size information or the remaining space in the buffer
+ * is not sufficient for the full message.
+ * Therefore, move the data and try again.
+ */
+ memmove(queue->buffer.data,
+ queue->buffer.data + queue->buffer.offset,
+ queue->buffer.length);
+ queue->buffer.offset = 0;
+ }
+
+data_read:
+ start_offset = queue->buffer.length + queue->buffer.offset;
+ if (start_offset < queue->buffer.length) {
+ DBG_ERR("Buffer overflow\n");
+ goto failed;
+ }
+ if (start_offset > queue->buffer.size) {
+ DBG_ERR("Buffer overflow\n");
+ goto failed;
+ }
+
+ num_ready = MIN(num_ready, queue->buffer.size - start_offset);
+
+ if (num_ready > 0) {
+ nread = sys_read(queue->fd,
+ queue->buffer.data +
+ queue->buffer.offset +
+ queue->buffer.length,
+ num_ready);
+ if (nread <= 0) {
+ DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
+ goto failed;
+ }
+ queue->buffer.length += nread;
+ }
+
+ queue_process(queue);
+ return;
+
+failed:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+
+/* used when an event triggers a dead queue */
+static void queue_dead(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+
+/*
+ called when an incoming connection is writeable
+*/
+static void queue_io_write(struct ctdb_queue *queue)
+{
+ while (queue->out_queue) {
+ struct ctdb_queue_pkt *pkt = queue->out_queue;
+ ssize_t n;
+ if (queue->ctdb->flags & CTDB_FLAG_TORTURE) {
+ n = write(queue->fd, pkt->data, 1);
+ } else {
+ n = write(queue->fd, pkt->data, pkt->length);
+ }
+
+ if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ if (pkt->length != pkt->full_length) {
+ /* partial packet sent - we have to drop it */
+ DLIST_REMOVE(queue->out_queue, pkt);
+ queue->out_queue_length--;
+ talloc_free(pkt);
+ }
+ TALLOC_FREE(queue->fde);
+ queue->fd = -1;
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
+ return;
+ }
+ if (n <= 0) return;
+
+ if (n != pkt->length) {
+ pkt->length -= n;
+ pkt->data += n;
+ return;
+ }
+
+ DLIST_REMOVE(queue->out_queue, pkt);
+ queue->out_queue_length--;
+ talloc_free(pkt);
+ }
+
+ TEVENT_FD_NOT_WRITEABLE(queue->fde);
+}
+
+/*
+ called when an incoming connection is readable or writeable
+*/
+static void queue_io_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, void *private_data)
+{
+ struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
+
+ if (flags & TEVENT_FD_READ) {
+ queue_io_read(queue);
+ } else {
+ queue_io_write(queue);
+ }
+}
+
+
+/*
+ queue a packet for sending
+*/
+int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
+{
+ struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
+ struct ctdb_queue_pkt *pkt;
+ uint32_t length2, full_length;
+
+ /* If the queue does not have valid fd, no point queueing a packet */
+ if (queue->fd == -1) {
+ return 0;
+ }
+
+ if (queue->alignment) {
+ /* enforce the length and alignment rules from the tcp packet allocator */
+ length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
+ *(uint32_t *)data = length2;
+ } else {
+ length2 = length;
+ }
+
+ if (length2 != length) {
+ memset(data+length, 0, length2-length);
+ }
+
+ full_length = length2;
+
+ /* if the queue is empty then try an immediate write, avoiding
+ queue overhead. This relies on non-blocking sockets */
+ if (queue->out_queue == NULL && queue->fd != -1 &&
+ !(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
+ ssize_t n = write(queue->fd, data, length2);
+ if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ TALLOC_FREE(queue->fde);
+ queue->fd = -1;
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
+ /* yes, we report success, as the dead node is
+ handled via a separate event */
+ return 0;
+ }
+ if (n > 0) {
+ data += n;
+ length2 -= n;
+ }
+ if (length2 == 0) return 0;
+ }
+
+ pkt = talloc_size(
+ queue, offsetof(struct ctdb_queue_pkt, buf) + length2);
+ CTDB_NO_MEMORY(queue->ctdb, pkt);
+ talloc_set_name_const(pkt, "struct ctdb_queue_pkt");
+
+ pkt->data = pkt->buf;
+ memcpy(pkt->data, data, length2);
+
+ pkt->length = length2;
+ pkt->full_length = full_length;
+
+ if (queue->out_queue == NULL && queue->fd != -1) {
+ TEVENT_FD_WRITEABLE(queue->fde);
+ }
+
+ DLIST_ADD_END(queue->out_queue, pkt);
+
+ queue->out_queue_length++;
+
+ if (queue->ctdb->tunable.verbose_memory_names != 0) {
+ switch (hdr->operation) {
+ case CTDB_REQ_CONTROL: {
+ struct ctdb_req_control_old *c = (struct ctdb_req_control_old *)hdr;
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s control opcode=%u srvid=%llu datalen=%u",
+ queue->name, (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
+ break;
+ }
+ case CTDB_REQ_MESSAGE: {
+ struct ctdb_req_message_old *m = (struct ctdb_req_message_old *)hdr;
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s message srvid=%llu datalen=%u",
+ queue->name, (unsigned long long)m->srvid, (unsigned)m->datalen);
+ break;
+ }
+ default:
+ talloc_set_name(pkt, "ctdb_queue_pkt: %s operation=%u length=%u src=%u dest=%u",
+ queue->name, (unsigned)hdr->operation, (unsigned)hdr->length,
+ (unsigned)hdr->srcnode, (unsigned)hdr->destnode);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ setup the fd used by the queue
+ */
+int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd)
+{
+ queue->fd = fd;
+ TALLOC_FREE(queue->fde);
+
+ if (fd != -1) {
+ queue->fde = tevent_add_fd(queue->ctdb->ev, queue, fd,
+ TEVENT_FD_READ,
+ queue_io_handler, queue);
+ if (queue->fde == NULL) {
+ return -1;
+ }
+ tevent_fd_set_auto_close(queue->fde);
+
+ if (queue->out_queue) {
+ TEVENT_FD_WRITEABLE(queue->fde);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ setup a packet queue on a socket
+ */
+struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx, int fd, int alignment,
+ ctdb_queue_cb_fn_t callback,
+ void *private_data, const char *fmt, ...)
+{
+ struct ctdb_queue *queue;
+ va_list ap;
+
+ queue = talloc_zero(mem_ctx, struct ctdb_queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue);
+ va_start(ap, fmt);
+ queue->name = talloc_vasprintf(mem_ctx, fmt, ap);
+ va_end(ap);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->name);
+
+ queue->im= tevent_create_immediate(queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->im);
+
+ queue->ctdb = ctdb;
+ queue->fd = fd;
+ queue->alignment = alignment;
+ queue->private_data = private_data;
+ queue->callback = callback;
+ if (fd != -1) {
+ if (ctdb_queue_set_fd(queue, fd) != 0) {
+ talloc_free(queue);
+ return NULL;
+ }
+ }
+
+ queue->buffer_size = ctdb->tunable.queue_buffer_size;
+ /* In client code, ctdb->tunable is not initialized.
+ * This does not affect recovery daemon.
+ */
+ if (queue->buffer_size == 0) {
+ queue->buffer_size = 1024;
+ }
+
+ queue->data_pool = talloc_pool(queue, queue->buffer_size);
+ if (queue->data_pool == NULL) {
+ TALLOC_FREE(queue);
+ return NULL;
+ }
+
+ return queue;
+}
diff --git a/ctdb/common/ctdb_ltdb.c b/ctdb/common/ctdb_ltdb.c
new file mode 100644
index 0000000..6634416
--- /dev/null
+++ b/ctdb/common/ctdb_ltdb.c
@@ -0,0 +1,430 @@
+/*
+ ctdb ltdb code
+
+ Copyright (C) Andrew Tridgell 2006
+ Copyright (C) Ronnie sahlberg 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+
+#include "lib/tdb_wrap/tdb_wrap.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/debug.h"
+
+#include "ctdb_private.h"
+
+#include "common/common.h"
+#include "common/logging.h"
+
+
+/*
+ * Calculate tdb flags based on database type
+ */
+int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex)
+{
+ int tdb_flags = 0;
+
+ if (db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ tdb_flags = TDB_DEFAULT;
+
+ } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ tdb_flags = TDB_NOSYNC |
+ TDB_CLEAR_IF_FIRST |
+ TDB_INCOMPATIBLE_HASH;
+
+ } else {
+ tdb_flags = TDB_NOSYNC |
+ TDB_CLEAR_IF_FIRST |
+ TDB_INCOMPATIBLE_HASH;
+
+#ifdef TDB_MUTEX_LOCKING
+ if (with_mutex && tdb_runtime_check_for_robust_mutexes()) {
+ tdb_flags |= TDB_MUTEX_LOCKING;
+ }
+#endif
+
+ }
+
+ tdb_flags |= TDB_DISALLOW_NESTING;
+ if (with_valgrind) {
+ tdb_flags |= TDB_NOMMAP;
+ }
+
+ return tdb_flags;
+}
+
+/*
+ find an attached ctdb_db handle given a name
+ */
+struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *name)
+{
+ struct ctdb_db_context *tmp_db;
+ for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
+ if (strcmp(name, tmp_db->db_name) == 0) {
+ return tmp_db;
+ }
+ }
+ return NULL;
+}
+
+bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) {
+ return true;
+ }
+ return false;
+}
+
+bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED) {
+ return true;
+ }
+ return false;
+}
+
+bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db)
+{
+ if ((ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) ||
+ (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED)) {
+ return false;
+ }
+ return true;
+}
+
+bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_READONLY) {
+ return true;
+ }
+ return false;
+}
+
+void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags |= CTDB_DB_FLAGS_READONLY;
+}
+
+void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags &= ~CTDB_DB_FLAGS_READONLY;
+}
+
+bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db)
+{
+ if (ctdb_db->db_flags & CTDB_DB_FLAGS_STICKY) {
+ return true;
+ }
+ return false;
+}
+
+void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db)
+{
+ ctdb_db->db_flags |= CTDB_DB_FLAGS_STICKY;
+}
+
+/*
+ return the lmaster given a key
+*/
+uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key)
+{
+ uint32_t idx, lmaster;
+
+ idx = ctdb_hash(key) % ctdb->vnn_map->size;
+ lmaster = ctdb->vnn_map->map[idx];
+
+ return lmaster;
+}
+
+
+/*
+ construct an initial header for a record with no ltdb header yet
+*/
+static void ltdb_initial_header(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header)
+{
+ ZERO_STRUCTP(header);
+ /* initial dmaster is the lmaster */
+ header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
+ header->flags = CTDB_REC_FLAG_AUTOMATIC;
+}
+
+struct ctdb_ltdb_fetch_state {
+ struct ctdb_ltdb_header *header;
+ TALLOC_CTX *mem_ctx;
+ TDB_DATA *data;
+ int ret;
+ bool found;
+};
+
+static int ctdb_ltdb_fetch_fn(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct ctdb_ltdb_fetch_state *state = private_data;
+ struct ctdb_ltdb_header *header = state->header;
+ TDB_DATA *dstdata = state->data;
+
+ if (data.dsize < sizeof(*header)) {
+ return 0;
+ }
+
+ state->found = true;
+ memcpy(header, data.dptr, sizeof(*header));
+
+ if (dstdata != NULL) {
+ dstdata->dsize = data.dsize - sizeof(struct ctdb_ltdb_header);
+ dstdata->dptr = talloc_memdup(
+ state->mem_ctx,
+ data.dptr + sizeof(struct ctdb_ltdb_header),
+ dstdata->dsize);
+ if (dstdata->dptr == NULL) {
+ state->ret = -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ fetch a record from the ltdb, separating out the header information
+ and returning the body of the record. A valid (initial) header is
+ returned if the record is not present
+*/
+int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key, struct ctdb_ltdb_header *header,
+ TALLOC_CTX *mem_ctx, TDB_DATA *data)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ struct ctdb_ltdb_fetch_state state = {
+ .header = header,
+ .mem_ctx = mem_ctx,
+ .data = data,
+ .found = false,
+ };
+ int ret;
+
+ ret = tdb_parse_record(
+ ctdb_db->ltdb->tdb, key, ctdb_ltdb_fetch_fn, &state);
+
+ if (ret == -1) {
+ enum TDB_ERROR err = tdb_error(ctdb_db->ltdb->tdb);
+ if (err != TDB_ERR_NOEXIST) {
+ return -1;
+ }
+ }
+
+ if (state.ret != 0) {
+ DBG_DEBUG("ctdb_ltdb_fetch_fn failed\n");
+ return state.ret;
+ }
+
+ if (state.found) {
+ return 0;
+ }
+
+ if (data != NULL) {
+ *data = tdb_null;
+ }
+
+ if (ctdb->vnn_map == NULL) {
+ /* called from the client */
+ header->dmaster = (uint32_t)-1;
+ return -1;
+ }
+
+ ltdb_initial_header(ctdb_db, key, header);
+ if (ctdb_db_persistent(ctdb_db) ||
+ header->dmaster == ctdb_db->ctdb->pnn) {
+
+ ret = ctdb_ltdb_store(ctdb_db, key, header, tdb_null);
+ if (ret != 0) {
+ DBG_NOTICE("failed to store initial header\n");
+ }
+ }
+
+ return 0;
+}
+
+/*
+ write a record to a normal database
+*/
+int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
+ struct ctdb_ltdb_header *header, TDB_DATA data)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ TDB_DATA rec[2];
+ uint32_t hsize = sizeof(struct ctdb_ltdb_header);
+ int ret;
+
+ if (ctdb_db->ctdb_ltdb_store_fn) {
+ return ctdb_db->ctdb_ltdb_store_fn(ctdb_db, key, header, data);
+ }
+
+ if (ctdb->flags & CTDB_FLAG_TORTURE) {
+ TDB_DATA old;
+ struct ctdb_ltdb_header *h2;
+
+ old = tdb_fetch(ctdb_db->ltdb->tdb, key);
+ h2 = (struct ctdb_ltdb_header *)old.dptr;
+ if (old.dptr != NULL && old.dsize >= hsize &&
+ h2->rsn > header->rsn) {
+ DEBUG(DEBUG_ERR,
+ ("RSN regression! %"PRIu64" %"PRIu64"\n",
+ h2->rsn, header->rsn));
+ }
+ if (old.dptr != NULL) {
+ free(old.dptr);
+ }
+ }
+
+ rec[0].dsize = hsize;
+ rec[0].dptr = (uint8_t *)header;
+
+ rec[1].dsize = data.dsize;
+ rec[1].dptr = data.dptr;
+
+ ret = tdb_storev(ctdb_db->ltdb->tdb, key, rec, 2, TDB_REPLACE);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to store dynamic data\n"));
+ }
+
+ return ret;
+}
+
+/*
+ lock a record in the ltdb, given a key
+ */
+int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ return tdb_chainlock(ctdb_db->ltdb->tdb, key);
+}
+
+/*
+ unlock a record in the ltdb, given a key
+ */
+int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ int ret = tdb_chainunlock(ctdb_db->ltdb->tdb, key);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,("tdb_chainunlock failed on db %s [%s]\n", ctdb_db->db_name, tdb_errorstr(ctdb_db->ltdb->tdb)));
+ }
+ return ret;
+}
+
+
+/*
+ delete a record from a normal database
+*/
+int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key)
+{
+ if (! ctdb_db_volatile(ctdb_db)) {
+ DEBUG(DEBUG_WARNING,
+ ("Ignored deletion of empty record from "
+ "non-volatile database\n"));
+ return 0;
+ }
+ if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
+ DEBUG(DEBUG_ERR,("Failed to delete empty record.\n"));
+ return -1;
+ }
+ return 0;
+}
+
+int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn)
+{
+ unsigned int byte_pos = pnn / 8;
+ unsigned char bit_mask = 1 << (pnn % 8);
+
+ if (byte_pos + 1 > data->dsize) {
+ char *buf;
+
+ buf = malloc(byte_pos + 1);
+ memset(buf, 0, byte_pos + 1);
+ if (buf == NULL) {
+ DEBUG(DEBUG_ERR, ("Out of memory when allocating buffer of %d bytes for trackingdb\n", byte_pos + 1));
+ return -1;
+ }
+ if (data->dptr != NULL) {
+ memcpy(buf, data->dptr, data->dsize);
+ free(data->dptr);
+ }
+ data->dptr = (uint8_t *)buf;
+ data->dsize = byte_pos + 1;
+ }
+
+ data->dptr[byte_pos] |= bit_mask;
+ return 0;
+}
+
+void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data, ctdb_trackingdb_cb cb, void *private_data)
+{
+ unsigned int i;
+
+ for(i = 0; i < data.dsize; i++) {
+ unsigned int j;
+
+ for (j=0; j<8; j++) {
+ int mask = 1<<j;
+
+ if (data.dptr[i] & mask) {
+ cb(ctdb, i * 8 + j, private_data);
+ }
+ }
+ }
+}
+
+/*
+ this is the dummy null procedure that all databases support
+*/
+int ctdb_null_func(struct ctdb_call_info *call)
+{
+ return 0;
+}
+
+/*
+ this is a plain fetch procedure that all databases support
+*/
+int ctdb_fetch_func(struct ctdb_call_info *call)
+{
+ call->reply_data = &call->record_data;
+ return 0;
+}
+
+/*
+ this is a plain fetch procedure that all databases support
+ this returns the full record including the ltdb header
+*/
+int ctdb_fetch_with_header_func(struct ctdb_call_info *call)
+{
+ call->reply_data = talloc(call, TDB_DATA);
+ if (call->reply_data == NULL) {
+ return -1;
+ }
+ call->reply_data->dsize = sizeof(struct ctdb_ltdb_header) + call->record_data.dsize;
+ call->reply_data->dptr = talloc_size(call->reply_data, call->reply_data->dsize);
+ if (call->reply_data->dptr == NULL) {
+ return -1;
+ }
+ memcpy(call->reply_data->dptr, call->header, sizeof(struct ctdb_ltdb_header));
+ memcpy(&call->reply_data->dptr[sizeof(struct ctdb_ltdb_header)], call->record_data.dptr, call->record_data.dsize);
+
+ return 0;
+}
+
diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c
new file mode 100644
index 0000000..5c7731c
--- /dev/null
+++ b/ctdb/common/ctdb_util.c
@@ -0,0 +1,681 @@
+/*
+ ctdb utility code
+
+ Copyright (C) Andrew Tridgell 2006
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <tdb.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/samba_util.h"
+
+#include "ctdb_private.h"
+
+#include "protocol/protocol_util.h"
+
+#include "common/reqid.h"
+#include "common/system.h"
+#include "common/common.h"
+#include "common/logging.h"
+
+/*
+ return error string for last error
+*/
+const char *ctdb_errstr(struct ctdb_context *ctdb)
+{
+ return ctdb->err_msg;
+}
+
+
+/*
+ remember an error message
+*/
+void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
+{
+ va_list ap;
+ talloc_free(ctdb->err_msg);
+ va_start(ap, fmt);
+ ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
+ DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg));
+ va_end(ap);
+}
+
+/*
+ a fatal internal error occurred - no hope for recovery
+*/
+void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
+{
+ DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg));
+ abort();
+}
+
+/*
+ like ctdb_fatal() but a core/backtrace would not be useful
+*/
+void ctdb_die(struct ctdb_context *ctdb, const char *msg)
+{
+ DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg));
+ exit(1);
+}
+
+/* Set the path of a helper program from envvar, falling back to
+ * dir/file if envvar unset. type is a string to print in log
+ * messages. helper is assumed to point to a statically allocated
+ * array of size bytes, initialised to "". If file is NULL don't fall
+ * back if envvar is unset. If dir is NULL and envvar is unset (but
+ * file is not NULL) then this is an error. Returns true if helper is
+ * set, either previously or this time. */
+bool ctdb_set_helper(const char *type, char *helper, size_t size,
+ const char *envvar,
+ const char *dir, const char *file)
+{
+ const char *t;
+ struct stat st;
+
+ if (helper[0] != '\0') {
+ /* Already set */
+ return true;
+ }
+
+ t = getenv(envvar);
+ if (t != NULL) {
+ if (strlen(t) >= size) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - path too long\n", type));
+ return false;
+ }
+
+ strncpy(helper, t, size);
+ } else if (file == NULL) {
+ return false;
+ } else if (dir == NULL) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - dir is NULL\n", type));
+ return false;
+ } else {
+ int ret;
+
+ ret = snprintf(helper, size, "%s/%s", dir, file);
+ if (ret < 0 || (size_t)ret >= size) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s - path too long\n", type));
+ return false;
+ }
+ }
+
+ if (stat(helper, &st) != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s \"%s\" - %s\n",
+ type, helper, strerror(errno)));
+ return false;
+ }
+ if (!(st.st_mode & S_IXUSR)) {
+ DEBUG(DEBUG_ERR,
+ ("Unable to set %s \"%s\" - not executable\n",
+ type, helper));
+ return false;
+ }
+
+ DEBUG(DEBUG_NOTICE,
+ ("Set %s to \"%s\"\n", type, helper));
+ return true;
+}
+
+/*
+ parse a IP:port pair
+*/
+int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str,
+ ctdb_sock_addr *address)
+{
+ struct servent *se;
+ int port;
+ int ret;
+
+ setservent(0);
+ se = getservbyname("ctdb", "tcp");
+ endservent();
+
+ if (se == NULL) {
+ port = CTDB_PORT;
+ } else {
+ port = ntohs(se->s_port);
+ }
+
+ ret = ctdb_sock_addr_from_string(str, address, false);
+ if (ret != 0) {
+ return -1;
+ }
+ ctdb_sock_addr_set_port(address, port);
+
+ return 0;
+}
+
+
+/*
+ check if two addresses are the same
+*/
+bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2)
+{
+ return ctdb_same_ip(a1, a2) &&
+ ctdb_addr_to_port(a1) == ctdb_addr_to_port(a2);
+}
+
+
+/*
+ hash function for mapping data to a VNN - taken from tdb
+*/
+uint32_t ctdb_hash(const TDB_DATA *key)
+{
+ return tdb_jenkins_hash(discard_const(key));
+}
+
+
+static uint32_t ctdb_marshall_record_size(TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ return offsetof(struct ctdb_rec_data_old, data) + key.dsize +
+ data.dsize + (header ? sizeof(*header) : 0);
+}
+
+static void ctdb_marshall_record_copy(struct ctdb_rec_data_old *rec,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data,
+ uint32_t length)
+{
+ uint32_t offset;
+
+ rec->length = length;
+ rec->reqid = reqid;
+ rec->keylen = key.dsize;
+ memcpy(&rec->data[0], key.dptr, key.dsize);
+ offset = key.dsize;
+
+ if (header) {
+ rec->datalen = data.dsize + sizeof(*header);
+ memcpy(&rec->data[offset], header, sizeof(*header));
+ offset += sizeof(*header);
+ } else {
+ rec->datalen = data.dsize;
+ }
+ memcpy(&rec->data[offset], data.dptr, data.dsize);
+}
+
+/*
+ form a ctdb_rec_data record from a key/data pair
+
+ note that header may be NULL. If not NULL then it is included in the data portion
+ of the record
+ */
+struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ size_t length;
+ struct ctdb_rec_data_old *d;
+
+ length = ctdb_marshall_record_size(key, header, data);
+
+ d = (struct ctdb_rec_data_old *)talloc_size(mem_ctx, length);
+ if (d == NULL) {
+ return NULL;
+ }
+
+ ctdb_marshall_record_copy(d, reqid, key, header, data, length);
+ return d;
+}
+
+
+/* helper function for marshalling multiple records */
+struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
+ struct ctdb_marshall_buffer *m,
+ uint32_t db_id,
+ uint32_t reqid,
+ TDB_DATA key,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA data)
+{
+ struct ctdb_rec_data_old *r;
+ struct ctdb_marshall_buffer *m2;
+ uint32_t length, offset;
+
+ length = ctdb_marshall_record_size(key, header, data);
+
+ if (m == NULL) {
+ offset = offsetof(struct ctdb_marshall_buffer, data);
+ m2 = talloc_zero_size(mem_ctx, offset + length);
+ } else {
+ offset = talloc_get_size(m);
+ m2 = talloc_realloc_size(mem_ctx, m, offset + length);
+ }
+ if (m2 == NULL) {
+ TALLOC_FREE(m);
+ return NULL;
+ }
+
+ if (m == NULL) {
+ m2->db_id = db_id;
+ }
+
+ r = (struct ctdb_rec_data_old *)((uint8_t *)m2 + offset);
+ ctdb_marshall_record_copy(r, reqid, key, header, data, length);
+ m2->count++;
+
+ return m2;
+}
+
+/* we've finished marshalling, return a data blob with the marshalled records */
+TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
+{
+ TDB_DATA data;
+ data.dptr = (uint8_t *)m;
+ data.dsize = talloc_get_size(m);
+ return data;
+}
+
+/*
+ loop over a marshalling buffer
+
+ - pass r==NULL to start
+ - loop the number of times indicated by m->count
+*/
+struct ctdb_rec_data_old *ctdb_marshall_loop_next(
+ struct ctdb_marshall_buffer *m,
+ struct ctdb_rec_data_old *r,
+ uint32_t *reqid,
+ struct ctdb_ltdb_header *header,
+ TDB_DATA *key, TDB_DATA *data)
+{
+ if (r == NULL) {
+ r = (struct ctdb_rec_data_old *)&m->data[0];
+ } else {
+ r = (struct ctdb_rec_data_old *)(r->length + (uint8_t *)r);
+ }
+
+ if (reqid != NULL) {
+ *reqid = r->reqid;
+ }
+
+ if (key != NULL) {
+ key->dptr = &r->data[0];
+ key->dsize = r->keylen;
+ }
+ if (data != NULL) {
+ data->dptr = &r->data[r->keylen];
+ data->dsize = r->datalen;
+ if (header != NULL) {
+ data->dptr += sizeof(*header);
+ data->dsize -= sizeof(*header);
+ }
+ }
+
+ if (header != NULL) {
+ if (r->datalen < sizeof(*header)) {
+ return NULL;
+ }
+ memcpy(header, &r->data[r->keylen], sizeof(*header));
+ }
+
+ return r;
+}
+
+/*
+ This is used to canonicalize a ctdb_sock_addr structure.
+*/
+void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
+{
+ ZERO_STRUCTP(cip);
+
+ if (ip->sa.sa_family == AF_INET6) {
+ const char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
+ if (memcmp(&ip->ip6.sin6_addr, prefix, sizeof(prefix)) == 0) {
+ /* Copy IPv4-mapped IPv6 addresses as IPv4 */
+ cip->ip.sin_family = AF_INET;
+#ifdef HAVE_SOCK_SIN_LEN
+ cip->ip.sin_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip.sin_port = ip->ip6.sin6_port;
+ memcpy(&cip->ip.sin_addr,
+ &ip->ip6.sin6_addr.s6_addr[12],
+ sizeof(cip->ip.sin_addr));
+ } else {
+ cip->ip6.sin6_family = AF_INET6;
+#ifdef HAVE_SOCK_SIN6_LEN
+ cip->ip6.sin6_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip6.sin6_port = ip->ip6.sin6_port;
+ memcpy(&cip->ip6.sin6_addr,
+ &ip->ip6.sin6_addr,
+ sizeof(cip->ip6.sin6_addr));
+ }
+
+ return;
+ }
+
+ if (ip->sa.sa_family == AF_INET) {
+ cip->ip.sin_family = AF_INET;
+#ifdef HAVE_SOCK_SIN_LEN
+ cip->ip.sin_len = sizeof(ctdb_sock_addr);
+#endif
+ cip->ip.sin_port = ip->ip.sin_port;
+ memcpy(&cip->ip.sin_addr,
+ &ip->ip.sin_addr,
+ sizeof(ip->ip.sin_addr));
+
+ return;
+ }
+}
+
+void ctdb_canonicalize_ip_inplace(ctdb_sock_addr *ip)
+{
+ ctdb_sock_addr tmp;
+ ctdb_canonicalize_ip(ip, &tmp);
+ memcpy(ip, &tmp, sizeof(tmp));
+}
+
+bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
+{
+ ctdb_sock_addr ip1, ip2;
+
+ ctdb_canonicalize_ip(tip1, &ip1);
+ ctdb_canonicalize_ip(tip2, &ip2);
+
+ if (ip1.sa.sa_family != ip2.sa.sa_family) {
+ return false;
+ }
+
+ switch (ip1.sa.sa_family) {
+ case AF_INET:
+ return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
+ case AF_INET6:
+ return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
+ &ip2.ip6.sin6_addr.s6_addr[0],
+ 16);
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ compare two ctdb_sock_addr structures
+ */
+bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
+{
+ return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
+}
+
+char *ctdb_addr_to_str(ctdb_sock_addr *addr)
+{
+ static char cip[128] = "";
+
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
+ break;
+ case AF_INET6:
+ inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
+ }
+
+ return cip;
+}
+
+unsigned ctdb_addr_to_port(ctdb_sock_addr *addr)
+{
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr->ip.sin_port);
+ break;
+ case AF_INET6:
+ return ntohs(addr->ip6.sin6_port);
+ break;
+ default:
+ DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
+ }
+
+ return 0;
+}
+
+/* Add a node to a node map with given address and flags */
+static bool node_map_add(TALLOC_CTX *mem_ctx,
+ const char *nstr, uint32_t flags,
+ struct ctdb_node_map_old **node_map)
+{
+ ctdb_sock_addr addr;
+ uint32_t num;
+ size_t s;
+ struct ctdb_node_and_flags *n;
+
+ /* Might as well do this before trying to allocate memory */
+ if (ctdb_parse_address(mem_ctx, nstr, &addr) == -1) {
+ return false;
+ }
+
+ num = (*node_map)->num + 1;
+ s = offsetof(struct ctdb_node_map_old, nodes) +
+ num * sizeof(struct ctdb_node_and_flags);
+ *node_map = talloc_realloc_size(mem_ctx, *node_map, s);
+ if (*node_map == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return false;
+ }
+
+ n = &(*node_map)->nodes[(*node_map)->num];
+ n->addr = addr;
+ n->pnn = (*node_map)->num;
+ n->flags = flags;
+
+ (*node_map)->num++;
+
+ return true;
+}
+
+/* Read a nodes file into a node map */
+struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx,
+ const char *nlist)
+{
+ char **lines;
+ int nlines;
+ int i;
+ struct ctdb_node_map_old *ret;
+
+ /* Allocate node map header */
+ ret = talloc_zero_size(mem_ctx, offsetof(struct ctdb_node_map_old, nodes));
+ if (ret == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return false;
+ }
+
+ lines = file_lines_load(nlist, &nlines, 0, mem_ctx);
+ if (lines == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to read nodes file \"%s\"\n", nlist));
+ return false;
+ }
+ while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
+ nlines--;
+ }
+
+ for (i=0; i < nlines; i++) {
+ char *node;
+ uint32_t flags;
+ size_t len;
+
+ node = lines[i];
+ /* strip leading spaces */
+ while((*node == ' ') || (*node == '\t')) {
+ node++;
+ }
+
+ len = strlen(node);
+
+ while ((len > 1) &&
+ ((node[len-1] == ' ') || (node[len-1] == '\t')))
+ {
+ node[len-1] = '\0';
+ len--;
+ }
+
+ if (len == 0) {
+ continue;
+ }
+ if (*node == '#') {
+ /* A "deleted" node is a node that is
+ commented out in the nodes file. This is
+ used instead of removing a line, which
+ would cause subsequent nodes to change
+ their PNN. */
+ flags = NODE_FLAGS_DELETED;
+ node = discard_const("0.0.0.0");
+ } else {
+ flags = 0;
+ }
+ if (!node_map_add(mem_ctx, node, flags, &ret)) {
+ talloc_free(lines);
+ TALLOC_FREE(ret);
+ return NULL;
+ }
+ }
+
+ talloc_free(lines);
+ return ret;
+}
+
+struct ctdb_node_map_old *
+ctdb_node_list_to_map(struct ctdb_node **nodes, uint32_t num_nodes,
+ TALLOC_CTX *mem_ctx)
+{
+ uint32_t i;
+ size_t size;
+ struct ctdb_node_map_old *node_map;
+
+ size = offsetof(struct ctdb_node_map_old, nodes) +
+ num_nodes * sizeof(struct ctdb_node_and_flags);
+ node_map = (struct ctdb_node_map_old *)talloc_zero_size(mem_ctx, size);
+ if (node_map == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " Failed to allocate nodemap array\n"));
+ return NULL;
+ }
+
+ node_map->num = num_nodes;
+ for (i=0; i<num_nodes; i++) {
+ node_map->nodes[i].addr = nodes[i]->address;
+ node_map->nodes[i].pnn = nodes[i]->pnn;
+ node_map->nodes[i].flags = nodes[i]->flags;
+ }
+
+ return node_map;
+}
+
+const char *ctdb_eventscript_call_names[] = {
+ "init",
+ "setup",
+ "startup",
+ "startrecovery",
+ "recovered",
+ "takeip",
+ "releaseip",
+ "stopped",
+ "monitor",
+ "status",
+ "shutdown",
+ "reload",
+ "updateip",
+ "ipreallocated"
+};
+
+/* Runstate handling */
+static struct {
+ enum ctdb_runstate runstate;
+ const char * label;
+} runstate_map[] = {
+ { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
+ { CTDB_RUNSTATE_INIT, "INIT" },
+ { CTDB_RUNSTATE_SETUP, "SETUP" },
+ { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
+ { CTDB_RUNSTATE_STARTUP, "STARTUP" },
+ { CTDB_RUNSTATE_RUNNING, "RUNNING" },
+ { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
+ { -1, NULL },
+};
+
+const char *runstate_to_string(enum ctdb_runstate runstate)
+{
+ int i;
+ for (i=0; runstate_map[i].label != NULL ; i++) {
+ if (runstate_map[i].runstate == runstate) {
+ return runstate_map[i].label;
+ }
+ }
+
+ return runstate_map[0].label;
+}
+
+enum ctdb_runstate runstate_from_string(const char *label)
+{
+ int i;
+ for (i=0; runstate_map[i].label != NULL; i++) {
+ if (strcasecmp(runstate_map[i].label, label) == 0) {
+ return runstate_map[i].runstate;
+ }
+ }
+
+ return CTDB_RUNSTATE_UNKNOWN;
+}
+
+void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate)
+{
+ DEBUG(DEBUG_NOTICE,("Set runstate to %s (%d)\n",
+ runstate_to_string(runstate), runstate));
+
+ if (runstate <= ctdb->runstate) {
+ ctdb_fatal(ctdb, "runstate must always increase");
+ }
+
+ ctdb->runstate = runstate;
+}
+
+/* Convert arbitrary data to 4-byte boundary padded uint32 array */
+uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key)
+{
+ uint32_t idkey_size, *k;
+
+ idkey_size = 1 + (key.dsize + sizeof(uint32_t)-1) / sizeof(uint32_t);
+
+ k = talloc_zero_array(mem_ctx, uint32_t, idkey_size);
+ if (k == NULL) {
+ return NULL;
+ }
+
+ k[0] = idkey_size;
+ memcpy(&k[1], key.dptr, key.dsize);
+
+ return k;
+}
diff --git a/ctdb/common/db_hash.c b/ctdb/common/db_hash.c
new file mode 100644
index 0000000..8dd62c4
--- /dev/null
+++ b/ctdb/common/db_hash.c
@@ -0,0 +1,295 @@
+/*
+ Using tdb as a hash table
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+#include <tdb.h>
+
+#include "common/db_hash.h"
+
+struct db_hash_context {
+ struct tdb_context *db;
+};
+
+
+static int db_hash_destructor(struct db_hash_context *dh)
+{
+ if (dh->db != NULL) {
+ tdb_close(dh->db);
+ dh->db = NULL;
+ }
+ return 0;
+}
+
+int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size,
+ enum db_hash_type type, struct db_hash_context **result)
+{
+ struct db_hash_context *dh;
+ int tdb_flags = TDB_INTERNAL | TDB_DISALLOW_NESTING;
+
+ dh = talloc_zero(mem_ctx, struct db_hash_context);
+ if (dh == NULL) {
+ return ENOMEM;
+ }
+
+ if (type == DB_HASH_COMPLEX) {
+ tdb_flags |= TDB_INCOMPATIBLE_HASH;
+ }
+
+ dh->db = tdb_open(name, hash_size, tdb_flags, O_RDWR|O_CREAT, 0);
+ if (dh->db == NULL) {
+ talloc_free(dh);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(dh, db_hash_destructor);
+ *result = dh;
+ return 0;
+}
+
+static int db_hash_map_tdb_error(struct db_hash_context *dh)
+{
+ enum TDB_ERROR tdb_err;
+ int ret;
+
+ tdb_err = tdb_error(dh->db);
+ switch (tdb_err) {
+ case TDB_SUCCESS:
+ ret = 0; break;
+ case TDB_ERR_OOM:
+ ret = ENOMEM; break;
+ case TDB_ERR_EXISTS:
+ ret = EEXIST; break;
+ case TDB_ERR_NOEXIST:
+ ret = ENOENT; break;
+ case TDB_ERR_EINVAL:
+ ret = EINVAL; break;
+ default:
+ ret = EIO; break;
+ }
+ return ret;
+}
+
+int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen)
+{
+ TDB_DATA key, data;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ data.dptr = databuf;
+ data.dsize = datalen;
+
+ ret = tdb_store(dh->db, key, data, TDB_INSERT);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen)
+{
+ TDB_DATA key, data;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ data.dptr = databuf;
+ data.dsize = datalen;
+
+ ret = tdb_store(dh->db, key, data, TDB_REPLACE);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen)
+{
+ TDB_DATA key;
+ int ret;
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ ret = tdb_delete(dh->db, key);
+ if (ret != 0) {
+ ret = db_hash_map_tdb_error(dh);
+ }
+ return ret;
+}
+
+struct db_hash_fetch_state {
+ db_hash_record_parser_fn parser;
+ void *private_data;
+};
+
+static int db_hash_fetch_parser(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+ struct db_hash_fetch_state *state =
+ (struct db_hash_fetch_state *)private_data;
+ int ret;
+
+ ret = state->parser(key.dptr, key.dsize, data.dptr, data.dsize,
+ state->private_data);
+ return ret;
+}
+
+int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ db_hash_record_parser_fn parser, void *private_data)
+{
+ struct db_hash_fetch_state state;
+ TDB_DATA key;
+ int ret;
+
+ if (dh == NULL || parser == NULL) {
+ return EINVAL;
+ }
+
+ state.parser = parser;
+ state.private_data = private_data;
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ ret = tdb_parse_record(dh->db, key, db_hash_fetch_parser, &state);
+ if (ret == -1) {
+ return ENOENT;
+ }
+ return ret;
+}
+
+int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen)
+{
+ TDB_DATA key;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ key.dptr = keybuf;
+ key.dsize = keylen;
+
+ ret = tdb_exists(dh->db, key);
+ if (ret == 1) {
+ /* Key found */
+ ret = 0;
+ } else {
+ ret = db_hash_map_tdb_error(dh);
+ if (ret == 0) {
+ ret = ENOENT;
+ }
+ }
+ return ret;
+}
+
+struct db_hash_traverse_state {
+ db_hash_record_parser_fn parser;
+ void *private_data;
+};
+
+static int db_hash_traverse_parser(struct tdb_context *tdb,
+ TDB_DATA key, TDB_DATA data,
+ void *private_data)
+{
+ struct db_hash_traverse_state *state =
+ (struct db_hash_traverse_state *)private_data;
+
+ return state->parser(key.dptr, key.dsize, data.dptr, data.dsize,
+ state->private_data);
+}
+
+int db_hash_traverse(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser, void *private_data,
+ int *count)
+{
+ struct db_hash_traverse_state state;
+ int ret;
+
+ if (dh == NULL) {
+ return EINVAL;
+ }
+
+ /* Special case, for counting records */
+ if (parser == NULL) {
+ ret = tdb_traverse_read(dh->db, NULL, NULL);
+ } else {
+ state.parser = parser;
+ state.private_data = private_data;
+
+ ret = tdb_traverse_read(dh->db, db_hash_traverse_parser, &state);
+ }
+
+ if (ret == -1) {
+ ret = db_hash_map_tdb_error(dh);
+ } else {
+ if (count != NULL) {
+ *count = ret;
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int db_hash_traverse_update(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser,
+ void *private_data, int *count)
+{
+ struct db_hash_traverse_state state;
+ int ret;
+
+ if (dh == NULL || parser == NULL) {
+ return EINVAL;
+ }
+
+ state.parser = parser;
+ state.private_data = private_data;
+
+ ret = tdb_traverse(dh->db, db_hash_traverse_parser, &state);
+ if (ret == -1) {
+ ret = db_hash_map_tdb_error(dh);
+ } else {
+ if (count != NULL) {
+ *count = ret;
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
diff --git a/ctdb/common/db_hash.h b/ctdb/common/db_hash.h
new file mode 100644
index 0000000..67e2b85
--- /dev/null
+++ b/ctdb/common/db_hash.h
@@ -0,0 +1,174 @@
+/*
+ Using tdb as a hash table
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_DB_HASH_H__
+#define __CTDB_DB_HASH_H__
+
+#include <talloc.h>
+#include <tdb.h>
+
+/**
+ * @file db_hash.h
+ *
+ * @brief Use tdb database as a hash table
+ *
+ * This uses in-memory tdb databases to create a fixed sized hash table.
+ */
+
+/**
+ * @brief Hash type to indicate the hashing function to use.
+ *
+ * DB_HASH_SIMPLE uses default hashing function
+ * DB_HASH_COMPLEX uses jenkins hashing function
+ */
+enum db_hash_type {
+ DB_HASH_SIMPLE,
+ DB_HASH_COMPLEX,
+};
+
+/**
+ * @brief Parser callback function called when fetching a record
+ *
+ * This function is called when fetching a record. This function should
+ * not modify key and data arguments.
+ *
+ * The function should return 0 on success and errno on error.
+ */
+typedef int (*db_hash_record_parser_fn)(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data);
+
+/**
+ * @brief Abstract structure representing tdb hash table
+ */
+struct db_hash_context;
+
+/**
+ * @brief Initialize tdb hash table
+ *
+ * This returns a new tdb hash table context which is a talloc context. Freeing
+ * this context will free all the memory associated with the hash table.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] name The name for the hash table
+ * @param[in] hash_size The size of the hash table
+ * @param[in] type The type of hashing function to use
+ * @param[out] result The new db_hash_context structure
+ * @return 0 on success, errno on failure
+ */
+int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size,
+ enum db_hash_type type, struct db_hash_context **result);
+
+/**
+ * @brief Insert a record into the hash table
+ *
+ * The key and data can be any binary data. Insert only if the record does not
+ * exist. If the record already exists, return error.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] databuf The data buffer
+ * @param[in] datalen The data length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen);
+
+/**
+ * @brief Add a record into the hash table
+ *
+ * The key and data can be any binary data. If the record does not exist,
+ * insert the record. If the record already exists, replace the record.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] databuf The data buffer
+ * @param[in] datalen The data length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen);
+/**
+ * @brief Delete a record from the hash table
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @return 0 on success, errno on failure
+ */
+int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen);
+
+/**
+ * @brief Fetch a record from the hash table
+ *
+ * The key and data can be any binary data.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @param[in] parser Function called when the matching record is found
+ * @param[in] private_data Private data to parser function
+ * @return 0 on success, errno on failure
+ */
+int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen,
+ db_hash_record_parser_fn parser, void *private_data);
+
+/**
+ * @brief Check if a record exists in the hash table
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] keybuf The key buffer
+ * @param[in] keylen The key length
+ * @return 0 if the record exists, errno on failure
+ */
+int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen);
+
+/**
+ * @brief Traverse the database without modification
+ *
+ * The parser function should return non-zero value to stop traverse.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] parser Function called for each record
+ * @param[in] private_data Private data to parser function
+ * @param[out] count Number of records traversed
+ * @return 0 on success, errno on failure
+ */
+int db_hash_traverse(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser, void *private_data,
+ int *count);
+
+/**
+ * @brief Traverse the database for modifications
+ *
+ * The parser function should return non-zero value to stop traverse.
+ *
+ * @param[in] dh The tdb hash table context
+ * @param[in] parser Function called for each record
+ * @param[in] private_data Private data to parser function
+ * @param[out] count Number of records traversed
+ * @return 0 on success, errno on failure
+ */
+int db_hash_traverse_update(struct db_hash_context *dh,
+ db_hash_record_parser_fn parser,
+ void *private_data, int *count);
+
+#endif /* __CTDB_DB_HASH_H__ */
diff --git a/ctdb/common/event_script.c b/ctdb/common/event_script.c
new file mode 100644
index 0000000..edd607f
--- /dev/null
+++ b/ctdb/common/event_script.c
@@ -0,0 +1,247 @@
+/*
+ Low level event script handling
+
+ Copyright (C) Amitay Isaacs 2017
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "system/glob.h"
+
+#include <talloc.h>
+
+#include "common/event_script.h"
+
+static int script_filter(const struct dirent *de)
+{
+ int ret;
+
+ /* Match a script pattern */
+ ret = fnmatch("[0-9][0-9].*.script", de->d_name, 0);
+ if (ret == 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+int event_script_get_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct event_script_list **out)
+{
+ struct dirent **namelist = NULL;
+ struct event_script_list *script_list = NULL;
+ size_t ds_len;
+ int count, ret;
+ int i;
+
+ count = scandir(script_dir, &namelist, script_filter, alphasort);
+ if (count == -1) {
+ ret = errno;
+ goto done;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct event_script_list);
+ if (script_list == NULL) {
+ goto nomem;
+ }
+
+ if (count == 0) {
+ ret = 0;
+ *out = script_list;
+ goto done;
+ }
+
+ script_list->num_scripts = count;
+ script_list->script = talloc_zero_array(script_list,
+ struct event_script *,
+ count);
+ if (script_list->script == NULL) {
+ goto nomem;
+ }
+
+ ds_len = strlen(".script");
+ for (i = 0; i < count; i++) {
+ struct event_script *s;
+ struct stat statbuf;
+
+ s = talloc_zero(script_list->script, struct event_script);
+ if (s == NULL) {
+ goto nomem;
+ }
+
+ script_list->script[i] = s;
+
+ s->name = talloc_strndup(script_list->script,
+ namelist[i]->d_name,
+ strlen(namelist[i]->d_name) - ds_len);
+ if (s->name == NULL) {
+ goto nomem;
+ }
+
+ s->path = talloc_asprintf(script_list->script,
+ "%s/%s",
+ script_dir,
+ namelist[i]->d_name);
+ if (s->path == NULL) {
+ goto nomem;
+ }
+
+ ret = stat(s->path, &statbuf);
+ if (ret == 0) {
+ /*
+ * If ret != 0 this is either a dangling
+ * symlink or it has just disappeared. Either
+ * way, it isn't executable. See the note
+ * below about things that have disappeared.
+ */
+ if (statbuf.st_mode & S_IXUSR) {
+ s->enabled = true;
+ }
+ }
+ }
+
+ *out = script_list;
+ ret = 0;
+ goto done;
+
+nomem:
+ ret = ENOMEM;
+ talloc_free(script_list);
+
+done:
+ if (namelist != NULL && count != -1) {
+ for (i=0; i<count; i++) {
+ free(namelist[i]);
+ }
+ free(namelist);
+ }
+
+ return ret;
+}
+
+int event_script_chmod(const char *script_dir,
+ const char *script_name,
+ bool enable)
+{
+ const char *dot_script = ".script";
+ size_t ds_len = strlen(dot_script);
+ size_t sn_len = strlen(script_name);
+ DIR *dirp;
+ struct dirent *de;
+ char buf[PATH_MAX];
+ const char *script_file;
+ int ret, new_mode;
+ char filename[PATH_MAX];
+ struct stat st;
+ bool found;
+ ino_t found_inode;
+ int fd = -1;
+
+ /* Allow script_name to already have ".script" suffix */
+ if (sn_len > ds_len &&
+ strcmp(&script_name[sn_len - ds_len], dot_script) == 0) {
+ script_file = script_name;
+ } else {
+ ret = snprintf(buf, sizeof(buf), "%s.script", script_name);
+ if (ret < 0 || (size_t)ret >= sizeof(buf)) {
+ return ENAMETOOLONG;
+ }
+ script_file = buf;
+ }
+
+ dirp = opendir(script_dir);
+ if (dirp == NULL) {
+ return errno;
+ }
+
+ found = false;
+ while ((de = readdir(dirp)) != NULL) {
+ if (strcmp(de->d_name, script_file) == 0) {
+ /* check for valid script names */
+ ret = script_filter(de);
+ if (ret == 0) {
+ closedir(dirp);
+ return EINVAL;
+ }
+
+ found = true;
+ found_inode = de->d_ino;
+ break;
+ }
+ }
+ closedir(dirp);
+
+ if (! found) {
+ return ENOENT;
+ }
+
+ ret = snprintf(filename,
+ sizeof(filename),
+ "%s/%s",
+ script_dir,
+ script_file);
+ if (ret < 0 || (size_t)ret >= sizeof(filename)) {
+ return ENAMETOOLONG;
+ }
+
+ fd = open(filename, O_RDWR);
+ if (fd == -1) {
+ ret = errno;
+ goto done;
+ }
+
+ ret = fstat(fd, &st);
+ if (ret != 0) {
+ ret = errno;
+ goto done;
+ }
+
+ /*
+ * If the directory entry inode number doesn't match the one
+ * returned by fstat() then this is probably a symlink, so the
+ * caller should not be calling this function. Note that this
+ * is a cheap sanity check to catch most programming errors.
+ * This doesn't cost any extra system calls but can still miss
+ * the unlikely case where the symlink is to a file on a
+ * different filesystem with the same inode number as the
+ * symlink.
+ */
+ if (found && found_inode != st.st_ino) {
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (enable) {
+ new_mode = st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH);
+ } else {
+ new_mode = st.st_mode & ~(S_IXUSR | S_IXGRP | S_IXOTH);
+ }
+
+ ret = fchmod(fd, new_mode);
+ if (ret != 0) {
+ ret = errno;
+ goto done;
+ }
+
+done:
+ if (fd != -1) {
+ close(fd);
+ }
+ return ret;
+}
diff --git a/ctdb/common/event_script.h b/ctdb/common/event_script.h
new file mode 100644
index 0000000..bf5a8fd
--- /dev/null
+++ b/ctdb/common/event_script.h
@@ -0,0 +1,72 @@
+/*
+ Low level event script handling
+
+ Copyright (C) Amitay Isaacs 2017
+ Copyright (C) Martin Schwenke 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SCRIPT_H__
+#define __CTDB_SCRIPT_H__
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+
+/**
+ * @file script.h
+ *
+ * @brief Script listing and manipulation
+ */
+
+
+struct event_script {
+ char *name;
+ char *path;
+ bool enabled;
+};
+
+struct event_script_list {
+ unsigned int num_scripts;
+ struct event_script **script;
+};
+
+
+/**
+ * @brief Retrieve a list of scripts
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] script_dir Directory containing scripts
+ * @param[out] out List of scripts
+ * @return 0 on success, errno on failure
+ */
+int event_script_get_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct event_script_list **out);
+
+/**
+ * @brief Make a script executable or not executable
+ *
+ * @param[in] script_dir Directory containing script
+ * @param[in] script_name Name of the script to enable
+ * @param[in] executable True if script should be made executable
+ * @return 0 on success, errno on failure
+ */
+int event_script_chmod(const char *script_dir,
+ const char *script_name,
+ bool executable);
+
+#endif /* __CTDB_SCRIPT_H__ */
diff --git a/ctdb/common/hash_count.c b/ctdb/common/hash_count.c
new file mode 100644
index 0000000..f845016
--- /dev/null
+++ b/ctdb/common/hash_count.c
@@ -0,0 +1,219 @@
+/*
+ Using hash table for counting events
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/time.h"
+
+#include <tdb.h>
+
+#include "lib/util/time.h"
+
+#include "common/db_hash.h"
+#include "common/hash_count.h"
+
+struct hash_count_value {
+ struct timeval update_time;
+ uint64_t counter;
+};
+
+struct hash_count_context {
+ struct db_hash_context *dh;
+ struct timeval update_interval;
+ hash_count_update_handler_fn handler;
+ void *private_data;
+};
+
+/*
+ * Initialise hash count map
+ */
+int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval update_interval,
+ hash_count_update_handler_fn handler, void *private_data,
+ struct hash_count_context **result)
+{
+ struct hash_count_context *hcount;
+ int ret;
+
+ if (handler == NULL) {
+ return EINVAL;
+ }
+
+ hcount = talloc_zero(mem_ctx, struct hash_count_context);
+ if (hcount == NULL) {
+ return ENOMEM;
+ }
+
+ ret = db_hash_init(hcount, "hash_count_db", 8192, DB_HASH_COMPLEX,
+ &hcount->dh);
+ if (ret != 0) {
+ talloc_free(hcount);
+ return ret;
+ }
+
+ hcount->update_interval = update_interval;
+ hcount->handler = handler;
+ hcount->private_data = private_data;
+
+ *result = hcount;
+ return 0;
+}
+
+static int hash_count_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct hash_count_value *value =
+ (struct hash_count_value *)private_data;
+
+ if (datalen != sizeof(struct hash_count_value)) {
+ return EIO;
+ }
+
+ *value = *(struct hash_count_value *)databuf;
+ return 0;
+}
+
+static int hash_count_fetch(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_fetch(hcount->dh, key.dptr, key.dsize,
+ hash_count_fetch_parser, value);
+}
+
+static int hash_count_insert(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_insert(hcount->dh, key.dptr, key.dsize,
+ (uint8_t *)value,
+ sizeof(struct hash_count_value));
+}
+
+static int hash_count_update(struct hash_count_context *hcount, TDB_DATA key,
+ struct hash_count_value *value)
+{
+ return db_hash_add(hcount->dh, key.dptr, key.dsize,
+ (uint8_t *)value, sizeof(struct hash_count_value));
+}
+
+int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key)
+{
+ struct hash_count_value value;
+ struct timeval current_time = timeval_current();
+ int ret;
+
+ if (hcount == NULL) {
+ return EINVAL;
+ }
+
+ ret = hash_count_fetch(hcount, key, &value);
+ if (ret == 0) {
+ struct timeval tmp_t;
+
+ tmp_t = timeval_sum(&value.update_time,
+ &hcount->update_interval);
+ if (timeval_compare(&current_time, &tmp_t) < 0) {
+ value.counter += 1;
+ } else {
+ value.update_time = current_time;
+ value.counter = 1;
+ }
+
+ hcount->handler(key, value.counter, hcount->private_data);
+ ret = hash_count_update(hcount, key, &value);
+
+ } else if (ret == ENOENT) {
+ value.update_time = current_time;
+ value.counter = 1;
+
+ hcount->handler(key, value.counter, hcount->private_data);
+ ret = hash_count_insert(hcount, key, &value);
+ }
+
+ return ret;
+}
+
+static struct timeval timeval_subtract(const struct timeval *tv1,
+ const struct timeval *tv2)
+{
+ struct timeval tv = *tv1;
+ const unsigned int million = 1000000;
+
+ if (tv.tv_sec > 1) {
+ tv.tv_sec -= 1;
+ tv.tv_usec += million;
+ } else {
+ return tv;
+ }
+
+ tv.tv_sec -= tv2->tv_sec;
+ tv.tv_usec -= tv2->tv_usec;
+
+ tv.tv_sec += tv.tv_usec / million;
+ tv.tv_usec = tv.tv_usec % million;
+
+ return tv;
+}
+
+struct hash_count_expire_state {
+ struct db_hash_context *dh;
+ struct timeval last_time;
+ int count;
+};
+
+static int hash_count_expire_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct hash_count_expire_state *state =
+ (struct hash_count_expire_state *)private_data;
+ struct hash_count_value *value;
+ int ret = 0;
+
+ if (datalen != sizeof(struct hash_count_value)) {
+ return EIO;
+ }
+
+ value = (struct hash_count_value *)databuf;
+ if (timeval_compare(&value->update_time, &state->last_time) < 0) {
+ ret = db_hash_delete(state->dh, keybuf, keylen);
+ if (ret == 0) {
+ state->count += 1;
+ }
+ }
+
+ return ret;
+}
+
+void hash_count_expire(struct hash_count_context *hcount, int *delete_count)
+{
+ struct timeval current_time = timeval_current();
+ struct hash_count_expire_state state;
+
+ state.dh = hcount->dh;
+ state.last_time = timeval_subtract(&current_time,
+ &hcount->update_interval);
+ state.count = 0;
+
+ (void) db_hash_traverse_update(hcount->dh, hash_count_expire_parser,
+ &state, NULL);
+
+ if (delete_count != NULL) {
+ *delete_count = state.count;
+ }
+}
diff --git a/ctdb/common/hash_count.h b/ctdb/common/hash_count.h
new file mode 100644
index 0000000..f14c82c
--- /dev/null
+++ b/ctdb/common/hash_count.h
@@ -0,0 +1,94 @@
+/*
+ Using hash table for counting events
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_HASH_COUNT_H__
+#define __CTDB_HASH_COUNT_H__
+
+/**
+ * @file hash_count.h
+ *
+ * @brief Count key-based events for specified interval
+ *
+ * This can be used to measure the rate of events based on any interval.
+ * For example, number of occurrences per second.
+ */
+
+/**
+ * @brief Handler callback function called when counter is incremented
+ *
+ * This function is called every time a counter is incremented for a key.
+ * The counter argument is the number of times the increment function is
+ * called during a count interval.
+ *
+ * This function should not modify key and data arguments.
+ */
+typedef void (*hash_count_update_handler_fn)(TDB_DATA key, uint64_t counter,
+ void *private_data);
+
+/**
+ * @brief Abstract structure representing hash based counting
+ */
+struct hash_count_context;
+
+/**
+ * @brief Initialize hash counting
+ *
+ * This return a new hash count context which is a talloc context. Freeing
+ * this context will free all the memory associated with hash count.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] count_interval The time interval for counting events
+ * @param[in] handler Function called when counter is incremented
+ * @param[in] private_data Private data to handler function
+ * @param[out] result The new hash_count structure
+ * @return 0 on success, errno on failure
+ */
+int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval count_interval,
+ hash_count_update_handler_fn handler, void *private_data,
+ struct hash_count_context **result);
+
+/**
+ * @brief Increment a counter for a key
+ *
+ * First time this is called for a key, corresponding counter is set to 1
+ * and the start time is noted. For all subsequent calls made during the
+ * count_interval (used in initializing the context) will increment
+ * corresponding counter for the key. After the count_interval has elapsed,
+ * the counter will be reset to 1.
+ *
+ * @param[in] hcount The hash count context
+ * @param[in] key The key for which counter is updated
+ * @return 0 on success, errno on failure
+ *
+ * This will result in a callback function being called.
+ */
+int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key);
+
+/**
+ * @brief Remove keys for which count interval has elapsed
+ *
+ * This function is used to clean the database of keys for which there are
+ * no recent events.
+ *
+ * @param[in] hcount The hash count context
+ * @param[out] delete_count The number of keys deleted
+ */
+void hash_count_expire(struct hash_count_context *hcount, int *delete_count);
+
+#endif /* __CTDB_HASH_COUNT_H__ */
diff --git a/ctdb/common/line.c b/ctdb/common/line.c
new file mode 100644
index 0000000..c4c6726
--- /dev/null
+++ b/ctdb/common/line.c
@@ -0,0 +1,145 @@
+/*
+ Line based I/O over fds
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/sys_rw.h"
+
+#include "common/line.h"
+
+struct line_read_state {
+ line_process_fn_t callback;
+ void *private_data;
+ char *buf;
+ size_t hint, len, offset;
+ int num_lines;
+};
+
+static bool line_read_one(char *buf, size_t start, size_t len, size_t *pos)
+{
+ size_t i;
+
+ for (i=start; i<len; i++) {
+ if (buf[i] == '\n' || buf[i] == '\0') {
+ *pos = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int line_read_process(struct line_read_state *state)
+{
+ size_t start = 0;
+ size_t pos = 0;
+
+ while (1) {
+ int ret;
+ bool ok;
+
+ ok = line_read_one(state->buf, start, state->offset, &pos);
+ if (! ok) {
+ break;
+ }
+
+ state->buf[pos] = '\0';
+ state->num_lines += 1;
+
+ ret = state->callback(state->buf + start, state->private_data);
+ if (ret != 0) {
+ return ret;
+ }
+
+ start = pos+1;
+ }
+
+ if (pos > 0) {
+ if (pos+1 < state->offset) {
+ memmove(state->buf,
+ state->buf + pos+1,
+ state->offset - (pos+1));
+ }
+ state->offset -= (pos+1);
+ }
+
+ return 0;
+}
+
+int line_read(int fd,
+ size_t length,
+ TALLOC_CTX *mem_ctx,
+ line_process_fn_t callback,
+ void *private_data,
+ int *num_lines)
+{
+ struct line_read_state state;
+
+ if (length < 32) {
+ length = 32;
+ }
+
+ state = (struct line_read_state) {
+ .callback = callback,
+ .private_data = private_data,
+ .hint = length,
+ };
+
+ while (1) {
+ ssize_t n;
+ int ret;
+
+ if (state.offset == state.len) {
+ state.len += state.hint;
+ state.buf = talloc_realloc_size(mem_ctx,
+ state.buf,
+ state.len);
+ if (state.buf == NULL) {
+ return ENOMEM;
+ }
+ }
+
+ n = sys_read(fd,
+ state.buf + state.offset,
+ state.len - state.offset);
+ if (n < 0) {
+ return errno;
+ }
+ if (n == 0) {
+ break;
+ }
+
+ state.offset += n;
+
+ ret = line_read_process(&state);
+ if (ret != 0) {
+ if (num_lines != NULL) {
+ *num_lines = state.num_lines;
+ }
+ return ret;
+ }
+ }
+
+ if (num_lines != NULL) {
+ *num_lines = state.num_lines;
+ }
+ return 0;
+}
diff --git a/ctdb/common/line.h b/ctdb/common/line.h
new file mode 100644
index 0000000..6b67f1e
--- /dev/null
+++ b/ctdb/common/line.h
@@ -0,0 +1,62 @@
+/*
+ Line based I/O over fds
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_LINE_H__
+#define __CTDB_LINE_H__
+
+#include <talloc.h>
+
+/**
+ * @file line.h
+ *
+ * @brief Line based I/O over pipes and sockets
+ */
+
+/**
+ * @brief The callback routine called to process a line
+ *
+ * @param[in] line The line read
+ * @param[in] private_data Private data for callback
+ * @return 0 to continue processing lines, non-zero to stop reading
+ */
+typedef int (*line_process_fn_t)(char *line, void *private_data);
+
+/**
+ * @brief Read a line (terminated by \n or \0)
+ *
+ * If there is any read error on fd, then errno will be returned.
+ * If callback function returns a non-zero value, then that value will be
+ * returned.
+ *
+ * @param[in] fd The file descriptor
+ * @param[in] length The expected length of a line (this is only a hint)
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] callback Callback function called when a line is read
+ * @param[in] private_data Private data for callback
+ * @param[out] num_lines Number of lines read so far
+ * @return 0 on on success, errno on failure
+ */
+int line_read(int fd,
+ size_t length,
+ TALLOC_CTX *mem_ctx,
+ line_process_fn_t callback,
+ void *private_data,
+ int *num_lines);
+
+#endif /* __CTDB_LINE_H__ */
diff --git a/ctdb/common/logging.c b/ctdb/common/logging.c
new file mode 100644
index 0000000..ad6d0c9
--- /dev/null
+++ b/ctdb/common/logging.c
@@ -0,0 +1,745 @@
+/*
+ Logging utilities
+
+ Copyright (C) Andrew Tridgell 2008
+ Copyright (C) Martin Schwenke 2014
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/locale.h"
+#include "system/time.h"
+#include "system/filesys.h"
+#include "system/syslog.h"
+#include "system/dir.h"
+
+#include "lib/util/time_basic.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+#include "lib/util/samba_util.h" /* get_myname() */
+
+#include "common/logging.h"
+
+struct {
+ int log_level;
+ const char *log_string;
+} log_string_map[] = {
+ { DEBUG_ERR, "ERROR" },
+ { DEBUG_WARNING, "WARNING" },
+ { 2, "WARNING" },
+ { DEBUG_NOTICE, "NOTICE" },
+ { 4, "NOTICE" },
+ { DEBUG_INFO, "INFO" },
+ { 6, "INFO" },
+ { 7, "INFO" },
+ { 8, "INFO" },
+ { 9, "INFO" },
+ { DEBUG_DEBUG, "DEBUG" },
+};
+
+bool debug_level_parse(const char *log_string, int *log_level)
+{
+ size_t i;
+
+ if (log_string == NULL) {
+ return false;
+ }
+
+ if (isdigit(log_string[0])) {
+ int level = atoi(log_string);
+
+ if (level >= 0 && (size_t)level < ARRAY_SIZE(log_string_map)) {
+ *log_level = level;
+ return true;
+ }
+ return false;
+ }
+
+ for (i=0; i<ARRAY_SIZE(log_string_map); i++) {
+ if (strncasecmp(log_string_map[i].log_string,
+ log_string, strlen(log_string)) == 0) {
+ *log_level = log_string_map[i].log_level;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+const char *debug_level_to_string(int log_level)
+{
+ size_t i;
+
+ for (i=0; i < ARRAY_SIZE(log_string_map); i++) {
+ if (log_string_map[i].log_level == log_level) {
+ return log_string_map[i].log_string;
+ }
+ }
+ return "UNKNOWN";
+}
+
+int debug_level_from_string(const char *log_string)
+{
+ bool found;
+ int log_level;
+
+ found = debug_level_parse(log_string, &log_level);
+ if (found) {
+ return log_level;
+ }
+
+ /* Default debug level */
+ return DEBUG_ERR;
+}
+
+/*
+ * file logging backend
+ */
+
+static bool file_log_validate(const char *option)
+{
+ char *t, *dir;
+ struct stat st;
+ int ret;
+
+ if (option == NULL || strcmp(option, "-") == 0) {
+ return true;
+ }
+
+ t = strdup(option);
+ if (t == NULL) {
+ return false;
+ }
+
+ dir = dirname(t);
+
+ ret = stat(dir, &st);
+ free(t);
+ if (ret != 0) {
+ return false;
+ }
+
+ if (! S_ISDIR(st.st_mode)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int file_log_setup(TALLOC_CTX *mem_ctx,
+ const char *option,
+ const char *app_name)
+{
+ struct debug_settings settings = {
+ .debug_syslog_format = DEBUG_SYSLOG_FORMAT_ALWAYS,
+ .debug_hires_timestamp = true,
+ .debug_no_stderr_redirect = true,
+ };
+ const char *t = NULL;
+
+ if (option == NULL || strcmp(option, "-") == 0) {
+ /*
+ * Logging to stderr is the default and has already
+ * been done in logging init
+ */
+ return 0;
+ }
+
+ /*
+ * Support logging of fake hostname in local daemons. This
+ * hostname is basename(getenv(CTDB_BASE)).
+ */
+ t = getenv("CTDB_TEST_MODE");
+ if (t != NULL) {
+ t = getenv("CTDB_BASE");
+ if (t != NULL) {
+ const char *p = strrchr(t, '/');
+ if (p != NULL) {
+ p++;
+ if (p[0] == '\0') {
+ p = "unknown";
+ }
+ } else {
+ p = t;
+ }
+
+ debug_set_hostname(p);
+ }
+ }
+
+ debug_set_settings(&settings, "file", 0, false);
+ debug_set_logfile(option);
+ setup_logging(app_name, DEBUG_FILE);
+
+ return 0;
+}
+
+/*
+ * syslog logging backend
+ */
+
+/* Copied from lib/util/debug.c */
+static int debug_level_to_priority(int level)
+{
+ /*
+ * map debug levels to syslog() priorities
+ */
+ static const int priority_map[] = {
+ LOG_ERR, /* 0 */
+ LOG_WARNING, /* 1 */
+ LOG_NOTICE, /* 2 */
+ LOG_NOTICE, /* 3 */
+ LOG_NOTICE, /* 4 */
+ LOG_NOTICE, /* 5 */
+ LOG_INFO, /* 6 */
+ LOG_INFO, /* 7 */
+ LOG_INFO, /* 8 */
+ LOG_INFO, /* 9 */
+ };
+ int priority;
+
+ if ((size_t)level >= ARRAY_SIZE(priority_map) || level < 0) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = priority_map[level];
+ }
+ return priority;
+}
+
+struct syslog_log_state {
+ int fd;
+ const char *app_name;
+ const char *hostname;
+ int (*format)(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize);
+ /* RFC3164 says: The total length of the packet MUST be 1024
+ bytes or less. */
+ char buffer[1024];
+ unsigned int dropped_count;
+};
+
+/* Format messages as per RFC3164
+ *
+ * It appears that some syslog daemon implementations do not allow a
+ * hostname when messages are sent via a Unix domain socket, so omit
+ * it. Similarly, syslogd on FreeBSD does not understand the hostname
+ * part of the header, even when logging via UDP. Note that most
+ * implementations will log messages against "localhost" when logging
+ * via UDP. A timestamp could be sent but rsyslogd on Linux limits
+ * the timestamp logged to the precision that was received on
+ * /dev/log. It seems sane to send degenerate RFC3164 messages
+ * without a header at all, so that the daemon will generate high
+ * resolution timestamps if configured.
+ */
+static int format_rfc3164(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize)
+{
+ int pri;
+ int len;
+
+ pri = LOG_DAEMON | debug_level_to_priority(dbglevel);
+ len = snprintf(buf, bsize, "<%d>%s[%u]: %s",
+ pri, state->app_name, getpid(), str);
+ buf[bsize-1] = '\0';
+ len = MIN(len, bsize - 1);
+
+ return len;
+}
+
+/* Format messages as per RFC5424
+ *
+ * <165>1 2003-08-24T05:14:15.000003-07:00 192.0.2.1
+ * myproc 8710 - - %% It's time to make the do-nuts.
+ */
+static int format_rfc5424(int dbglevel, struct syslog_log_state *state,
+ const char *str, char *buf, int bsize)
+{
+ int pri;
+ struct timeval tv;
+ struct timeval_buf tvbuf;
+ int len, s;
+
+ /* Header */
+ pri = LOG_DAEMON | debug_level_to_priority(dbglevel);
+ GetTimeOfDay(&tv);
+ len = snprintf(buf, bsize,
+ "<%d>1 %s %s %s %u - - ",
+ pri, timeval_str_buf(&tv, true, true, &tvbuf),
+ state->hostname, state->app_name, getpid());
+ /* A truncated header is not useful... */
+ if (len >= bsize) {
+ return -1;
+ }
+
+ /* Message */
+ s = snprintf(&buf[len], bsize - len, "%s", str);
+ buf[bsize-1] = '\0';
+ len = MIN(len + s, bsize - 1);
+
+ return len;
+}
+
+static void syslog_log(void *private_data, int level, const char *msg)
+{
+ syslog(debug_level_to_priority(level), "%s", msg);
+}
+
+static int syslog_log_sock_maybe(struct syslog_log_state *state,
+ int level, const char *msg)
+{
+ int n;
+ ssize_t ret;
+
+ n = state->format(level, state, msg, state->buffer,
+ sizeof(state->buffer));
+ if (n == -1) {
+ return E2BIG;
+ }
+
+ do {
+ ret = write(state->fd, state->buffer, n);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ return errno;
+ }
+
+ return 0;
+
+}
+static void syslog_log_sock(void *private_data, int level, const char *msg)
+{
+ struct syslog_log_state *state = talloc_get_type_abort(
+ private_data, struct syslog_log_state);
+ int ret;
+
+ if (state->dropped_count > 0) {
+ char t[64] = { 0 };
+ snprintf(t, sizeof(t),
+ "[Dropped %u log messages]\n",
+ state->dropped_count);
+ t[sizeof(t)-1] = '\0';
+ ret = syslog_log_sock_maybe(state, level, t);
+ if (ret == EAGAIN || ret == EWOULDBLOCK) {
+ state->dropped_count++;
+ /*
+ * If above failed then actually drop the
+ * message that would be logged below, since
+ * it would have been dropped anyway and it is
+ * also likely to fail. Falling through and
+ * attempting to log the message also means
+ * that the dropped message count will be
+ * logged out of order.
+ */
+ return;
+ }
+ if (ret != 0) {
+ /* Silent failure on any other error */
+ return;
+ }
+ state->dropped_count = 0;
+ }
+
+ ret = syslog_log_sock_maybe(state, level, msg);
+ if (ret == EAGAIN || ret == EWOULDBLOCK) {
+ state->dropped_count++;
+ }
+}
+
+static int syslog_log_setup_syslog(TALLOC_CTX *mem_ctx, const char *app_name)
+{
+ openlog(app_name, LOG_PID, LOG_DAEMON);
+
+ debug_set_callback(NULL, syslog_log);
+
+ return 0;
+}
+
+static int syslog_log_state_destructor(struct syslog_log_state *state)
+{
+ if (state->fd != -1) {
+ close(state->fd);
+ state->fd = -1;
+ }
+ return 0;
+}
+
+static int syslog_log_setup_common(TALLOC_CTX *mem_ctx, const char *app_name,
+ struct syslog_log_state **result)
+{
+ struct syslog_log_state *state;
+
+ state = talloc_zero(mem_ctx, struct syslog_log_state);
+ if (state == NULL) {
+ return ENOMEM;
+ }
+
+ state->fd = -1;
+ state->app_name = app_name;
+ talloc_set_destructor(state, syslog_log_state_destructor);
+
+ *result = state;
+ return 0;
+}
+
+#ifdef _PATH_LOG
+static int syslog_log_setup_nonblocking(TALLOC_CTX *mem_ctx,
+ const char *app_name)
+{
+ struct syslog_log_state *state = NULL;
+ struct sockaddr_un dest;
+ int ret;
+
+ ret = syslog_log_setup_common(mem_ctx, app_name, &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ state->fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (state->fd == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ dest.sun_family = AF_UNIX;
+ strncpy(dest.sun_path, _PATH_LOG, sizeof(dest.sun_path)-1);
+ ret = connect(state->fd,
+ (struct sockaddr *)&dest, sizeof(dest));
+ if (ret == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ ret = set_blocking(state->fd, false);
+ if (ret != 0) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ if (! set_close_on_exec(state->fd)) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ state->hostname = NULL; /* Make this explicit */
+ state->format = format_rfc3164;
+
+ debug_set_callback(state, syslog_log_sock);
+
+ return 0;
+}
+#endif /* _PATH_LOG */
+
+static int syslog_log_setup_udp(TALLOC_CTX *mem_ctx, const char *app_name,
+ bool rfc5424)
+{
+ struct syslog_log_state *state = NULL;
+ struct sockaddr_in dest;
+ int ret;
+
+ ret = syslog_log_setup_common(mem_ctx, app_name, &state);
+ if (ret != 0) {
+ return ret;
+ }
+
+ state->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (state->fd == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ dest.sin_family = AF_INET;
+ dest.sin_port = htons(514);
+ dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ret = connect(state->fd,
+ (struct sockaddr *)&dest, sizeof(dest));
+ if (ret == -1) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ if (! set_close_on_exec(state->fd)) {
+ int save_errno = errno;
+ talloc_free(state);
+ return save_errno;
+ }
+
+ state->hostname = get_myname(state);
+ if (state->hostname == NULL) {
+ /* Use a fallback instead of failing initialisation */
+ state->hostname = "localhost";
+ }
+ if (rfc5424) {
+ state->format = format_rfc5424;
+ } else {
+ state->format = format_rfc3164;
+ }
+
+ debug_set_callback(state, syslog_log_sock);
+
+ return 0;
+}
+
+static bool syslog_log_validate(const char *option)
+{
+ if (option == NULL) {
+ return true;
+#ifdef _PATH_LOG
+ } else if (strcmp(option, "nonblocking") == 0) {
+ return true;
+#endif
+ } else if (strcmp(option, "udp") == 0) {
+ return true;
+ } else if (strcmp(option, "udp-rfc5424") == 0) {
+ return true;
+ }
+
+ return false;
+}
+
+static int syslog_log_setup(TALLOC_CTX *mem_ctx, const char *option,
+ const char *app_name)
+{
+ if (option == NULL) {
+ return syslog_log_setup_syslog(mem_ctx, app_name);
+#ifdef _PATH_LOG
+ } else if (strcmp(option, "nonblocking") == 0) {
+ return syslog_log_setup_nonblocking(mem_ctx, app_name);
+#endif
+ } else if (strcmp(option, "udp") == 0) {
+ return syslog_log_setup_udp(mem_ctx, app_name, false);
+ } else if (strcmp(option, "udp-rfc5424") == 0) {
+ return syslog_log_setup_udp(mem_ctx, app_name, true);
+ }
+
+ return EINVAL;
+}
+
+struct log_backend {
+ const char *name;
+ bool (*validate)(const char *option);
+ int (*setup)(TALLOC_CTX *mem_ctx,
+ const char *option,
+ const char *app_name);
+};
+
+static struct log_backend log_backend[] = {
+ {
+ .name = "file",
+ .validate = file_log_validate,
+ .setup = file_log_setup,
+ },
+ {
+ .name = "syslog",
+ .validate = syslog_log_validate,
+ .setup = syslog_log_setup,
+ },
+};
+
+static int log_backend_parse(TALLOC_CTX *mem_ctx,
+ const char *logging,
+ struct log_backend **backend,
+ char **backend_option)
+{
+ struct log_backend *b = NULL;
+ char *t, *name, *option;
+ size_t i;
+
+ t = talloc_strdup(mem_ctx, logging);
+ if (t == NULL) {
+ return ENOMEM;
+ }
+
+ name = strtok(t, ":");
+ if (name == NULL) {
+ talloc_free(t);
+ return EINVAL;
+ }
+ option = strtok(NULL, ":");
+
+ for (i=0; i<ARRAY_SIZE(log_backend); i++) {
+ if (strcmp(log_backend[i].name, name) == 0) {
+ b = &log_backend[i];
+ }
+ }
+
+ if (b == NULL) {
+ talloc_free(t);
+ return ENOENT;
+ }
+
+ *backend = b;
+ if (option != NULL) {
+ *backend_option = talloc_strdup(mem_ctx, option);
+ if (*backend_option == NULL) {
+ talloc_free(t);
+ return ENOMEM;
+ }
+ } else {
+ *backend_option = NULL;
+ }
+
+ talloc_free(t);
+ return 0;
+}
+
+bool logging_validate(const char *logging)
+{
+ TALLOC_CTX *tmp_ctx;
+ struct log_backend *backend;
+ char *option;
+ int ret;
+ bool status;
+
+ tmp_ctx = talloc_new(NULL);
+ if (tmp_ctx == NULL) {
+ return false;
+ }
+
+ ret = log_backend_parse(tmp_ctx, logging, &backend, &option);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ return false;
+ }
+
+ status = backend->validate(option);
+ talloc_free(tmp_ctx);
+ return status;
+}
+
+/* Initialise logging */
+int logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debug_level, const char *app_name)
+{
+ struct log_backend *backend = NULL;
+ char *option = NULL;
+ int level;
+ int ret;
+
+ setup_logging(app_name, DEBUG_DEFAULT_STDERR);
+
+ if (debug_level == NULL) {
+ debug_level = getenv("CTDB_DEBUGLEVEL");
+ }
+ if (! debug_level_parse(debug_level, &level)) {
+ return EINVAL;
+ }
+ debuglevel_set(level);
+
+ if (logging == NULL) {
+ logging = getenv("CTDB_LOGGING");
+ }
+ if (logging == NULL || logging[0] == '\0') {
+ return EINVAL;
+ }
+
+ ret = log_backend_parse(mem_ctx, logging, &backend, &option);
+ if (ret != 0) {
+ if (ret == ENOENT) {
+ fprintf(stderr, "Invalid logging option \'%s\'\n",
+ logging);
+ }
+ talloc_free(option);
+ return ret;
+ }
+
+ ret = backend->setup(mem_ctx, option, app_name);
+ talloc_free(option);
+ return ret;
+}
+
+bool logging_reopen_logs(void)
+{
+ bool status;
+
+ status = reopen_logs_internal();
+
+ return status;
+}
+
+struct logging_reopen_logs_data {
+ void (*hook)(void *private_data);
+ void *private_data;
+};
+
+static void logging_sig_hup_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum,
+ int count,
+ void *dont_care,
+ void *private_data)
+{
+ bool status;
+
+ if (private_data != NULL) {
+ struct logging_reopen_logs_data *data = talloc_get_type_abort(
+ private_data, struct logging_reopen_logs_data);
+
+ if (data->hook != NULL) {
+ data->hook(data->private_data);
+ }
+ }
+
+ status = logging_reopen_logs();
+ if (!status) {
+ D_WARNING("Failed to reopen logs\n");
+ return;
+ }
+
+ D_NOTICE("Reopened logs\n");
+
+}
+
+bool logging_setup_sighup_handler(struct tevent_context *ev,
+ TALLOC_CTX *talloc_ctx,
+ void (*hook)(void *private_data),
+ void *private_data)
+{
+ struct logging_reopen_logs_data *data = NULL;
+ struct tevent_signal *se;
+
+ if (hook != NULL) {
+ data = talloc(talloc_ctx, struct logging_reopen_logs_data);
+ if (data == NULL) {
+ return false;
+ }
+
+ data->hook = hook;
+ data->private_data = private_data;
+ }
+
+
+ se = tevent_add_signal(ev,
+ talloc_ctx,
+ SIGHUP,
+ 0,
+ logging_sig_hup_handler,
+ data);
+ if (se == NULL) {
+ talloc_free(data);
+ return false;
+ }
+
+ return true;
+}
diff --git a/ctdb/common/logging.h b/ctdb/common/logging.h
new file mode 100644
index 0000000..542b4a3
--- /dev/null
+++ b/ctdb/common/logging.h
@@ -0,0 +1,51 @@
+/*
+ Logging utilities
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_LOGGING_H__
+#define __CTDB_LOGGING_H__
+
+#include <talloc.h>
+#include <tevent.h>
+#include "lib/util/debug.h"
+
+#define DEBUG_ERR DBGLVL_ERR
+#define DEBUG_WARNING DBGLVL_WARNING
+#define DEBUG_NOTICE DBGLVL_NOTICE
+#define DEBUG_INFO DBGLVL_INFO
+#define DEBUG_DEBUG DBGLVL_DEBUG
+
+/* These are used in many places, so define them here to avoid churn */
+#define DEBUG_ALERT DEBUG_ERR
+#define DEBUG_CRIT DEBUG_ERR
+
+bool debug_level_parse(const char *log_string, int *log_level);
+const char *debug_level_to_string(int log_level);
+int debug_level_from_string(const char *log_string);
+
+bool logging_validate(const char *logging);
+int logging_init(TALLOC_CTX *mem_ctx, const char *logging,
+ const char *debuglevel, const char *app_name);
+
+bool logging_reopen_logs(void);
+bool logging_setup_sighup_handler(struct tevent_context *ev,
+ TALLOC_CTX *talloc_ctx,
+ void (*hook)(void *private_data),
+ void *private_data);
+
+#endif /* __CTDB_LOGGING_H__ */
diff --git a/ctdb/common/logging_conf.c b/ctdb/common/logging_conf.c
new file mode 100644
index 0000000..38b3003
--- /dev/null
+++ b/ctdb/common/logging_conf.c
@@ -0,0 +1,127 @@
+/*
+ CTDB logging config handling
+
+ Copyright (C) Martin Schwenke 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "common/conf.h"
+#include "common/logging.h"
+#include "common/logging_conf.h"
+
+#define LOGGING_LOCATION_DEFAULT "file:" LOGDIR "/log.ctdb"
+#define LOGGING_LOG_LEVEL_DEFAULT "NOTICE"
+
+static bool logging_conf_validate_log_level(const char *key,
+ const char *old_loglevel,
+ const char *new_loglevel,
+ enum conf_update_mode mode)
+{
+ int log_level;
+ bool ok;
+
+ ok = debug_level_parse(new_loglevel, &log_level);
+ if (!ok) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool logging_conf_validate_location(const char *key,
+ const char *old_location,
+ const char *new_location,
+ enum conf_update_mode mode)
+{
+ bool ok;
+
+ ok = logging_validate(new_location);
+ if (!ok) {
+ return false;
+ }
+
+ if (mode == CONF_MODE_RELOAD &&
+ strcmp(old_location, new_location) != 0) {
+ D_WARNING("Ignoring update of %s config option \"%s\"\n",
+ LOGGING_CONF_SECTION, key);
+ return false;
+ }
+
+ return true;
+}
+
+void logging_conf_init(struct conf_context *conf,
+ const char *default_log_level)
+{
+ const char *log_level;
+
+ log_level = (default_log_level == NULL) ?
+ LOGGING_LOG_LEVEL_DEFAULT :
+ default_log_level;
+
+ conf_define_section(conf, LOGGING_CONF_SECTION, NULL);
+
+ conf_define_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOCATION,
+ LOGGING_LOCATION_DEFAULT,
+ logging_conf_validate_location);
+
+ conf_define_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOG_LEVEL,
+ log_level,
+ logging_conf_validate_log_level);
+}
+
+const char *logging_conf_location(struct conf_context *conf)
+{
+ const char *out = NULL;
+ int ret;
+
+ ret = conf_get_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOCATION,
+ &out,
+ NULL);
+ if (ret != 0) {
+ /* Can't really happen, but return default */
+ return LOGGING_LOCATION_DEFAULT;
+ }
+
+ return out;
+}
+
+const char *logging_conf_log_level(struct conf_context *conf)
+{
+ const char *out = NULL;
+ int ret;
+
+ ret = conf_get_string(conf,
+ LOGGING_CONF_SECTION,
+ LOGGING_CONF_LOG_LEVEL,
+ &out,
+ NULL);
+ if (ret != 0) {
+ /* Can't really happen, but return default */
+ return LOGGING_LOG_LEVEL_DEFAULT;
+ }
+
+ return out;
+}
diff --git a/ctdb/common/logging_conf.h b/ctdb/common/logging_conf.h
new file mode 100644
index 0000000..fab478d
--- /dev/null
+++ b/ctdb/common/logging_conf.h
@@ -0,0 +1,36 @@
+/*
+ CTDB logging config handling
+
+ Copyright (C) Martin Schwenke 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __LOGGING_CONF_H__
+#define __LOGGING_CONF_H__
+
+#include "common/conf.h"
+
+#define LOGGING_CONF_SECTION "logging"
+
+#define LOGGING_CONF_LOCATION "location"
+#define LOGGING_CONF_LOG_LEVEL "log level"
+
+void logging_conf_init(struct conf_context *conf,
+ const char *default_log_level);
+
+const char *logging_conf_location(struct conf_context *conf);
+const char *logging_conf_log_level(struct conf_context *conf);
+
+#endif /* __LOGGING_CONF_H__ */
diff --git a/ctdb/common/path.c b/ctdb/common/path.c
new file mode 100644
index 0000000..ea3b08f
--- /dev/null
+++ b/ctdb/common/path.c
@@ -0,0 +1,211 @@
+/*
+ Construct runtime paths
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include "lib/util/debug.h"
+
+#include "common/path.h"
+
+#define CTDB_CONFIG_FILE "ctdb.conf"
+
+struct {
+ char *basedir;
+ char datadir[PATH_MAX];
+ char etcdir[PATH_MAX];
+ char rundir[PATH_MAX];
+ char vardir[PATH_MAX];
+ bool test_mode;
+ bool basedir_set;
+ bool datadir_set;
+ bool etcdir_set;
+ bool rundir_set;
+ bool vardir_set;
+} ctdb_paths = {
+ .datadir = CTDB_DATADIR,
+ .etcdir = CTDB_ETCDIR,
+ .rundir = CTDB_RUNDIR,
+ .vardir = CTDB_VARDIR,
+};
+
+static void path_set_basedir(void)
+{
+ const char *t;
+
+ t = getenv("CTDB_TEST_MODE");
+ if (t == NULL) {
+ goto done;
+ }
+
+ ctdb_paths.test_mode = true;
+
+ ctdb_paths.basedir = getenv("CTDB_BASE");
+ if (ctdb_paths.basedir == NULL) {
+ D_ERR("Broken CTDB setup, CTDB_BASE not set in test mode\n");
+ abort();
+ }
+
+done:
+ ctdb_paths.basedir_set = true;
+}
+
+static bool path_construct(char *path, const char *subdir)
+{
+ char p[PATH_MAX];
+ int len;
+
+ if (! ctdb_paths.basedir_set) {
+ path_set_basedir();
+ }
+
+ if (! ctdb_paths.test_mode) {
+ return true;
+ }
+
+ if (subdir == NULL) {
+ len = snprintf(p, sizeof(p), "%s", ctdb_paths.basedir);
+ } else {
+ len = snprintf(p,
+ sizeof(p),
+ "%s/%s",
+ ctdb_paths.basedir,
+ subdir);
+ }
+
+ if ((size_t)len >= sizeof(p)) {
+ return false;
+ }
+
+ strncpy(path, p, PATH_MAX);
+ return true;
+}
+
+const char *path_datadir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.datadir_set) {
+ ok = path_construct(ctdb_paths.datadir, "share");
+ if (!ok) {
+ D_ERR("Failed to construct DATADIR\n");
+ } else {
+ ctdb_paths.datadir_set = true;
+ }
+ }
+
+ return ctdb_paths.datadir;
+}
+
+const char *path_etcdir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.etcdir_set) {
+ ok = path_construct(ctdb_paths.etcdir, NULL);
+ if (!ok) {
+ D_ERR("Failed to construct ETCDIR\n");
+ } else {
+ ctdb_paths.etcdir_set = true;
+ }
+ }
+
+ return ctdb_paths.etcdir;
+}
+
+const char *path_rundir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.rundir_set) {
+ ok = path_construct(ctdb_paths.rundir, "run");
+ if (!ok) {
+ D_ERR("Failed to construct RUNDIR\n");
+ } else {
+ ctdb_paths.rundir_set = true;
+ }
+ }
+
+ return ctdb_paths.rundir;
+}
+
+const char *path_vardir(void)
+{
+ bool ok;
+
+ if (! ctdb_paths.vardir_set) {
+ ok = path_construct(ctdb_paths.vardir, "var");
+ if (!ok) {
+ D_ERR("Failed to construct VARDIR\n");
+ } else {
+ ctdb_paths.vardir_set = true;
+ }
+ }
+
+ return ctdb_paths.vardir;
+}
+
+char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_datadir(), path);
+}
+
+char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_etcdir(), path);
+}
+
+char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_rundir(), path);
+}
+
+char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path)
+{
+ return talloc_asprintf(mem_ctx, "%s/%s", path_vardir(), path);
+}
+
+char *path_config(TALLOC_CTX *mem_ctx)
+{
+ return path_etcdir_append(mem_ctx, CTDB_CONFIG_FILE);
+}
+
+char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon)
+{
+ if (strcmp(daemon, "ctdbd") == 0) {
+ const char *t = getenv("CTDB_SOCKET");
+
+ if (t != NULL) {
+ return talloc_strdup(mem_ctx, t);
+ }
+ }
+
+ return talloc_asprintf(mem_ctx,
+ "%s/%s.socket",
+ path_rundir(),
+ daemon);
+}
+
+char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon)
+{
+ return talloc_asprintf(mem_ctx,
+ "%s/%s.pid",
+ path_rundir(),
+ daemon);
+}
diff --git a/ctdb/common/path.h b/ctdb/common/path.h
new file mode 100644
index 0000000..dcc6c20
--- /dev/null
+++ b/ctdb/common/path.h
@@ -0,0 +1,39 @@
+/*
+ Construct runtime paths
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PATH_H__
+#define __CTDB_PATH_H__
+
+#include <talloc.h>
+
+const char *path_datadir(void);
+const char *path_etcdir(void);
+const char *path_rundir(void);
+const char *path_vardir(void);
+
+char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path);
+char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path);
+
+char *path_config(TALLOC_CTX *mem_ctx);
+char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon);
+char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon);
+
+#endif /* __CTDB_PATH_H__ */
diff --git a/ctdb/common/path_tool.c b/ctdb/common/path_tool.c
new file mode 100644
index 0000000..44d29b6
--- /dev/null
+++ b/ctdb/common/path_tool.c
@@ -0,0 +1,384 @@
+/*
+ path tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/cmdline.h"
+#include "common/path.h"
+#include "common/path_tool.h"
+
+struct path_tool_context {
+ struct cmdline_context *cmdline;
+};
+
+static int path_tool_config(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "config");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_config(mem_ctx));
+
+ return 0;
+}
+
+static int path_tool_pidfile(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "pidfile");
+ return EINVAL;
+ }
+
+ p = path_pidfile(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_socket(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "socket");
+ return EINVAL;
+ }
+
+ p = path_socket(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_datadir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "datadir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_datadir());
+
+ return 0;
+}
+
+static int path_tool_datadir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "datadir append");
+ return EINVAL;
+ }
+
+ p = path_datadir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_etcdir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "etcdir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_etcdir());
+
+ return 0;
+}
+
+static int path_tool_etcdir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "etcdir append");
+ return EINVAL;
+ }
+
+ p = path_etcdir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_rundir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "rundir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_rundir());
+
+ return 0;
+}
+
+static int path_tool_rundir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "rundir append");
+ return EINVAL;
+ }
+
+ p = path_rundir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+static int path_tool_vardir(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+
+ if (argc != 0) {
+ cmdline_usage(ctx->cmdline, "vardir");
+ return EINVAL;
+ }
+
+ printf("%s\n", path_vardir());
+
+ return 0;
+}
+
+static int path_tool_vardir_append(TALLOC_CTX *mem_ctx,
+ int argc,
+ const char **argv,
+ void *private_data)
+{
+ struct path_tool_context *ctx = talloc_get_type_abort(
+ private_data, struct path_tool_context);
+ char *p;
+
+ if (argc != 1) {
+ cmdline_usage(ctx->cmdline, "vardir append");
+ return EINVAL;
+ }
+
+ p = path_vardir_append(mem_ctx, argv[0]);
+ if (p == NULL) {
+ D_ERR("Memory allocation error\n");
+ return 1;
+ }
+
+ printf("%s\n", p);
+
+ return 0;
+}
+
+struct cmdline_command path_commands[] = {
+ { "config", path_tool_config,
+ "Get path of CTDB config file", NULL },
+ { "pidfile", path_tool_pidfile,
+ "Get path of CTDB daemon pidfile", "<daemon>" },
+ { "socket", path_tool_socket,
+ "Get path of CTDB daemon socket", "<daemon>" },
+ { "datadir append", path_tool_datadir_append,
+ "Get path relative to CTDB DATADIR", "<path>" },
+ { "datadir", path_tool_datadir,
+ "Get path of CTDB DATADIR", NULL },
+ { "etcdir append", path_tool_etcdir_append,
+ "Get path relative to CTDB ETCDIR", "<path>" },
+ { "etcdir", path_tool_etcdir,
+ "Get path of CTDB ETCDIR", NULL },
+ { "rundir append", path_tool_rundir_append,
+ "Get path relative to CTDB RUNDIR", "<path>" },
+ { "rundir", path_tool_rundir,
+ "Get path of CTDB RUNDIR", NULL },
+ { "vardir append", path_tool_vardir_append,
+ "Get path relative to CTDB VARDIR", "<path>" },
+ { "vardir", path_tool_vardir,
+ "Get path of CTDB VARDIR", NULL },
+ CMDLINE_TABLEEND
+};
+
+int path_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct path_tool_context **result)
+{
+ struct path_tool_context *ctx;
+ int ret;
+
+ ctx = talloc_zero(mem_ctx, struct path_tool_context);
+ if (ctx == NULL) {
+ D_ERR("Memory allocation error\n");
+ return ENOMEM;
+ }
+
+ ret = cmdline_init(ctx,
+ prog,
+ options,
+ NULL,
+ path_commands,
+ &ctx->cmdline);
+ if (ret != 0) {
+ D_ERR("Failed to initialize cmdline, ret=%d\n", ret);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options);
+ if (ret != 0) {
+ cmdline_usage(ctx->cmdline, NULL);
+ talloc_free(ctx);
+ return ret;
+ }
+
+ *result = ctx;
+ return 0;
+}
+
+int path_tool_run(struct path_tool_context *ctx, int *result)
+{
+ return cmdline_run(ctx->cmdline, ctx, result);
+}
+
+#ifdef CTDB_PATH_TOOL
+
+int main(int argc, const char **argv)
+{
+ TALLOC_CTX *mem_ctx;
+ struct path_tool_context *ctx;
+ int ret, result;
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "Memory allocation error\n");
+ exit(1);
+ }
+
+ ret = path_tool_init(mem_ctx,
+ "ctdb-path",
+ NULL,
+ argc,
+ argv,
+ true,
+ &ctx);
+ if (ret != 0) {
+ talloc_free(mem_ctx);
+ exit(1);
+ }
+
+ setup_logging("ctdb-path", DEBUG_STDERR);
+ debuglevel_set(DEBUG_ERR);
+
+ ret = path_tool_run(ctx, &result);
+ if (ret != 0) {
+ result = 1;
+ }
+
+ talloc_free(mem_ctx);
+ exit(result);
+}
+
+#endif /* CTDB_PATH_TOOL */
diff --git a/ctdb/common/path_tool.h b/ctdb/common/path_tool.h
new file mode 100644
index 0000000..bc6ea62
--- /dev/null
+++ b/ctdb/common/path_tool.h
@@ -0,0 +1,38 @@
+/*
+ path tool
+
+ Copyright (C) Amitay Isaacs 2018
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PATH_TOOL__
+#define __CTDB_PATH_TOOL__
+
+#include <talloc.h>
+#include <popt.h>
+
+struct path_tool_context;
+
+int path_tool_init(TALLOC_CTX *mem_ctx,
+ const char *prog,
+ struct poptOption *options,
+ int argc,
+ const char **argv,
+ bool parse_options,
+ struct path_tool_context **result);
+
+int path_tool_run(struct path_tool_context *ctx, int *result);
+
+#endif /* __CTDB_PATH_TOOL__ */
diff --git a/ctdb/common/pidfile.c b/ctdb/common/pidfile.c
new file mode 100644
index 0000000..47589f4
--- /dev/null
+++ b/ctdb/common/pidfile.c
@@ -0,0 +1,85 @@
+/*
+ Create and remove pidfile
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <talloc.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/pidfile.h"
+
+#include "common/pidfile.h"
+
+struct pidfile_context {
+ const char *pidfile;
+ int fd;
+ pid_t pid;
+};
+
+static int pidfile_context_destructor(struct pidfile_context *pid_ctx);
+
+int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile,
+ struct pidfile_context **result)
+{
+ struct pidfile_context *pid_ctx;
+ int fd, ret = 0;
+
+ pid_ctx = talloc_zero(mem_ctx, struct pidfile_context);
+ if (pid_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ pid_ctx->pidfile = talloc_strdup(pid_ctx, pidfile);
+ if (pid_ctx->pidfile == NULL) {
+ ret = ENOMEM;
+ goto fail;
+ }
+
+ pid_ctx->pid = getpid();
+
+ ret = pidfile_path_create(pid_ctx->pidfile, &fd, NULL);
+ if (ret != 0) {
+ return ret;
+ }
+
+ pid_ctx->fd = fd;
+
+ talloc_set_destructor(pid_ctx, pidfile_context_destructor);
+
+ *result = pid_ctx;
+ return 0;
+
+fail:
+ talloc_free(pid_ctx);
+ return ret;
+}
+
+static int pidfile_context_destructor(struct pidfile_context *pid_ctx)
+{
+ if (getpid() != pid_ctx->pid) {
+ return 0;
+ }
+
+ (void) unlink(pid_ctx->pidfile);
+
+ pidfile_fd_close(pid_ctx->fd);
+
+ return 0;
+}
diff --git a/ctdb/common/pidfile.h b/ctdb/common/pidfile.h
new file mode 100644
index 0000000..bc4e3a7
--- /dev/null
+++ b/ctdb/common/pidfile.h
@@ -0,0 +1,51 @@
+/*
+ Create and remove pidfile
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PIDFILE_H__
+#define __CTDB_PIDFILE_H__
+
+#include <talloc.h>
+
+/**
+ * @file pidfile.h
+ *
+ * @brief Routines to manage PID file
+ */
+
+/**
+ * @brief Abstract struct to store pidfile details
+ */
+struct pidfile_context;
+
+/**
+ * @brief Create a PID file
+ *
+ * This creates a PID file, locks it, and writes PID.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] pidfile Path of PID file
+ * @param[out] result Pidfile context
+ * @return 0 on success, errno on failure
+ *
+ * Freeing the pidfile_context, will delete the pidfile.
+ */
+int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile,
+ struct pidfile_context **result);
+
+#endif /* __CTDB_PIDFILE_H__ */
diff --git a/ctdb/common/pkt_read.c b/ctdb/common/pkt_read.c
new file mode 100644
index 0000000..212ace5
--- /dev/null
+++ b/ctdb/common/pkt_read.c
@@ -0,0 +1,190 @@
+/*
+ Reading packets using fixed and dynamic buffer
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* This is similar to read_packet abstraction. The main different is that
+ * tevent fd event is created only once.
+ */
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_read.h"
+
+/*
+ * Read a packet using fixed buffer
+ */
+
+struct pkt_read_state {
+ int fd;
+ uint8_t *buf;
+ size_t buflen;
+ size_t nread, total;
+ bool use_fixed;
+ ssize_t (*more)(uint8_t *buf, size_t buflen, void *private_data);
+ void *private_data;
+};
+
+struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, size_t initial,
+ uint8_t *buf, size_t buflen,
+ ssize_t (*more)(uint8_t *buf,
+ size_t buflen,
+ void *private_data),
+ void *private_data)
+{
+ struct tevent_req *req;
+ struct pkt_read_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct pkt_read_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = fd;
+
+ if (buf == NULL || buflen == 0) {
+ state->use_fixed = false;
+ state->buf = talloc_array(state, uint8_t, initial);
+ if (state->buf == NULL) {
+ talloc_free(req);
+ return NULL;
+ }
+ state->buflen = initial;
+ } else {
+ state->use_fixed = true;
+ state->buf = buf;
+ state->buflen = buflen;
+ }
+
+ state->nread = 0;
+ state->total = initial;
+
+ state->more = more;
+ state->private_data = private_data;
+
+ return req;
+}
+
+void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req)
+{
+ struct pkt_read_state *state = tevent_req_data(
+ req, struct pkt_read_state);
+ ssize_t nread, more;
+ uint8_t *tmp;
+
+ nread = read(state->fd, state->buf + state->nread,
+ state->total - state->nread);
+ if ((nread == -1) && (errno == EINTR)) {
+ /* retry */
+ return;
+ }
+ if (nread == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nread == 0) {
+ /* fd closed */
+ tevent_req_error(req, EPIPE);
+ return;
+ }
+
+ state->nread += nread;
+ if (state->nread < state->total) {
+ /* come back later */
+ return;
+ }
+
+ /* Check if "more" asks for more data */
+ if (state->more == NULL) {
+ tevent_req_done(req);
+ return;
+ }
+
+ more = state->more(state->buf, state->nread, state->private_data);
+ if (more == -1) {
+ /* invalid packet */
+ tevent_req_error(req, EIO);
+ return;
+ }
+ if (more == 0) {
+ tevent_req_done(req);
+ return;
+ }
+
+ if (state->total + more < state->total) {
+ /* int wrapped */
+ tevent_req_error(req, EMSGSIZE);
+ return;
+ }
+
+ if (state->total + more < state->buflen) {
+ /* continue using fixed buffer */
+ state->total += more;
+ return;
+ }
+
+ if (state->use_fixed) {
+ /* switch to dynamic buffer */
+ tmp = talloc_array(state, uint8_t, state->total + more);
+ if (tevent_req_nomem(tmp, req)) {
+ return;
+ }
+
+ memcpy(tmp, state->buf, state->total);
+ state->use_fixed = false;
+ } else {
+ tmp = talloc_realloc(state, state->buf, uint8_t,
+ state->total + more);
+ if (tevent_req_nomem(tmp, req)) {
+ return;
+ }
+ }
+
+ state->buf = tmp;
+ state->buflen = state->total + more;
+ state->total += more;
+}
+
+ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ uint8_t **pbuf, bool *free_buf, int *perrno)
+{
+ struct pkt_read_state *state = tevent_req_data(
+ req, struct pkt_read_state);
+
+ if (tevent_req_is_unix_error(req, perrno)) {
+ return -1;
+ }
+
+ if (state->use_fixed) {
+ *pbuf = state->buf;
+ *free_buf = false;
+ } else {
+ *pbuf = talloc_steal(mem_ctx, state->buf);
+ *free_buf = true;
+ }
+
+ return state->total;
+}
diff --git a/ctdb/common/pkt_read.h b/ctdb/common/pkt_read.h
new file mode 100644
index 0000000..25d4a51
--- /dev/null
+++ b/ctdb/common/pkt_read.h
@@ -0,0 +1,98 @@
+/*
+ API for reading packets using fixed and dynamic buffer
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PKT_READ_H__
+#define __CTDB_PKT_READ_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file pkt_read.h
+ *
+ * @brief Read a packet using fixed size buffer or allocated memory.
+ *
+ * CTDB communication uses lots of small packets. This abstraction avoids the
+ * need to allocate memory for small packets. Only if the received packet is
+ * larger than the fixed memory buffer, use talloc to allocate memory.
+ */
+
+/**
+ * @brief Start async computation to read a packet
+ *
+ * This returns a tevent request to read a packet from given fd. The fd
+ * should be nonblocking. Freeing this request will free all the memory
+ * associated with the request.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The non-blocking file/socket descriptor to read from
+ * @param[in] initial Initial amount of data to read
+ * @param[in] buf The static buffer to read data in
+ * @param[in] buflen The size of the static buffer
+ * @param[in] more The function to check if the bytes read forms a packet
+ * @param[in] private_data Private data to pass to more function
+ * @return new tevent request or NULL on failure
+ */
+struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, size_t initial,
+ uint8_t *buf, size_t buflen,
+ ssize_t (*more)(uint8_t *buf,
+ size_t buflen,
+ void *private_data),
+ void *private_data);
+
+/**
+ * @brief Function to actually read data from the socket
+ *
+ * This function should be called, when tevent fd event is triggered. This
+ * function has the syntax of tevent_fd_handler_t. The private_data for this
+ * function is the tevent request created by pkt_read_send function.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] fde Tevent fd context
+ * @param[in] flags Tevent fd flags
+ * @param[in] req The active tevent request
+ */
+void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req);
+
+/**
+ * @brief Retrieve a packet
+ *
+ * This function returns the pkt read from fd.
+ *
+ * @param[in] req Tevent request
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] pbuf The pointer to the buffer
+ * @param[out] free_buf Boolean to indicate that caller should free buffer
+ * @param[out] perrno errno in case of failure
+ * @return the size of the pkt, or -1 on failure
+ *
+ * If the pkt data is dynamically allocated, then it is moved under the
+ * specified talloc memory context and free_buf is set to true. It is the
+ * responsibility of the caller to the free the memory returned.
+ *
+ * If the pkt data is stored in the fixed buffer, then free_buf is set to false.
+ */
+ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
+ uint8_t **pbuf, bool *free_buf, int *perrno);
+
+#endif /* __CTDB_PKT_READ_H__ */
diff --git a/ctdb/common/pkt_write.c b/ctdb/common/pkt_write.c
new file mode 100644
index 0000000..b1c1730
--- /dev/null
+++ b/ctdb/common/pkt_write.c
@@ -0,0 +1,101 @@
+/*
+ Write a packet
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+
+#include "pkt_write.h"
+
+/*
+ * Write a packet
+ */
+
+struct pkt_write_state {
+ int fd;
+ uint8_t *buf;
+ size_t buflen, offset;
+};
+
+struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct pkt_write_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct pkt_write_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->fd = fd;
+ state->buf = buf;
+ state->buflen = buflen;
+ state->offset = 0;
+
+ return req;
+}
+
+void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req)
+{
+ struct pkt_write_state *state = tevent_req_data(
+ req, struct pkt_write_state);
+ ssize_t nwritten;
+
+ nwritten = write(state->fd, state->buf + state->offset,
+ state->buflen - state->offset);
+ if ((nwritten == -1) && (errno == EINTR)) {
+ /* retry */
+ return;
+ }
+ if (nwritten == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nwritten == 0) {
+ /* retry */
+ return;
+ }
+
+ state->offset += nwritten;
+ if (state->offset < state->buflen) {
+ /* come back later */
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+ssize_t pkt_write_recv(struct tevent_req *req, int *perrno)
+{
+ struct pkt_write_state *state = tevent_req_data(
+ req, struct pkt_write_state);
+
+ if (tevent_req_is_unix_error(req, perrno)) {
+ return -1;
+ }
+
+ return state->offset;
+}
diff --git a/ctdb/common/pkt_write.h b/ctdb/common/pkt_write.h
new file mode 100644
index 0000000..19d8045
--- /dev/null
+++ b/ctdb/common/pkt_write.h
@@ -0,0 +1,79 @@
+/*
+ API for writing a packet
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_PKT_WRITE_H__
+#define __CTDB_PKT_WRITE_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file pkt_write.h
+ *
+ * @brief Write a packet.
+ *
+ * Write a complete packet with possibly multiple system calls.
+ */
+
+/**
+ * @brief Start async computation to write a packet
+ *
+ * This returns a tevent request to write a packet to given fd. The fd
+ * should be nonblocking. Freeing this request will free all the memory
+ * associated with the request.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd The non-blocking file/socket descriptor to write to
+ * @param[in] buf The data
+ * @param[in] buflen The size of the data
+ * @return new tevent request or NULL on failure
+ */
+struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd, uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Function to actually write data to the socket
+ *
+ * This function should be called, when tevent fd event is triggered
+ * for TEVENT_FD_WRITE event. This function has the syntax of
+ * tevent_fd_handler_t. The private_data for this function is the tevent
+ * request created by pkt_write_send function.
+ *
+ * @param[in] ev Tevent context
+ * @param[in] fde Tevent fd context
+ * @param[in] flags Tevent fd flags
+ * @param[in] req The active tevent request
+ */
+void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde,
+ uint16_t flags, struct tevent_req *req);
+
+/**
+ * @brief Packet is sent
+ *
+ * This function returns the number of bytes written.
+ *
+ * @param[in] req Tevent request
+ * @param[out] perrno errno in case of failure
+ * @return the number of bytes written, or -1 on failure
+ */
+ssize_t pkt_write_recv(struct tevent_req *req, int *perrno);
+
+#endif /* __CTDB_PKT_WRITE_H__ */
diff --git a/ctdb/common/rb_tree.c b/ctdb/common/rb_tree.c
new file mode 100644
index 0000000..8e13dff
--- /dev/null
+++ b/ctdb/common/rb_tree.c
@@ -0,0 +1,1101 @@
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/rb_tree.h"
+
+#define NO_MEMORY_FATAL(p) do { if (!(p)) { \
+ DEBUG(DEBUG_CRIT,("Out of memory for %s at %s\n", #p, __location__)); \
+ exit(10); \
+ }} while (0)
+
+
+static void
+tree_destructor_traverse_node(TALLOC_CTX *mem_ctx, trbt_node_t *node)
+{
+ talloc_set_destructor(node, NULL);
+ if (node->left) {
+ tree_destructor_traverse_node(mem_ctx, node->left);
+ }
+ if (node->right) {
+ tree_destructor_traverse_node(mem_ctx, node->right);
+ }
+ talloc_steal(mem_ctx, node);
+}
+
+/*
+ destroy a tree and remove all its nodes
+ */
+static int tree_destructor(trbt_tree_t *tree)
+{
+ TALLOC_CTX *tmp_ctx;
+ trbt_node_t *node;
+
+ if (tree == NULL) {
+ return 0;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return 0;
+ }
+
+ /* traverse the tree and remove the node destructor and steal
+ the node to the temporary context.
+ we don't want to use the existing destructor for the node
+ since that will remove the nodes one by one from the tree.
+ since the entire tree will be completely destroyed we don't care
+ if it is inconsistent or unbalanced while freeing the
+ individual nodes
+ */
+ tmp_ctx = talloc_new(NULL);
+ tree_destructor_traverse_node(tmp_ctx, node);
+ talloc_free(tmp_ctx);
+
+ return 0;
+}
+
+
+/* create a red black tree */
+trbt_tree_t *
+trbt_create(TALLOC_CTX *memctx, uint32_t flags)
+{
+ trbt_tree_t *tree;
+
+ tree = talloc_zero(memctx, trbt_tree_t);
+ NO_MEMORY_FATAL(tree);
+
+ /* If the tree is freed, we must walk over all entries and steal the
+ node from the stored data pointer and release the node.
+ Note, when we free the tree we only free the tree and not any of
+ the data stored in the tree.
+ */
+ talloc_set_destructor(tree, tree_destructor);
+ tree->flags = flags;
+
+ return tree;
+}
+
+static inline trbt_node_t *
+trbt_parent(trbt_node_t *node)
+{
+ return node->parent;
+}
+
+static inline trbt_node_t *
+trbt_grandparent(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(parent){
+ return parent->parent;
+ }
+ return NULL;
+}
+
+static inline trbt_node_t *
+trbt_uncle(trbt_node_t *node)
+{
+ trbt_node_t *parent, *grandparent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return NULL;
+ }
+ if(parent==grandparent->left){
+ return grandparent->right;
+ }
+ return grandparent->left;
+}
+
+
+static inline void trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node);
+static inline void trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node);
+
+static inline void
+trbt_rotate_left(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->right;
+ } else {
+ node->parent->right=node->right;
+ }
+ } else {
+ tree->root=node->right;
+ }
+ node->right->parent=node->parent;
+ node->parent=node->right;
+ node->right=node->right->left;
+ if(node->right){
+ node->right->parent=node;
+ }
+ node->parent->left=node;
+}
+
+static inline void
+trbt_rotate_right(trbt_node_t *node)
+{
+ trbt_tree_t *tree = node->tree;
+
+ if(node->parent){
+ if(node->parent->left==node){
+ node->parent->left=node->left;
+ } else {
+ node->parent->right=node->left;
+ }
+ } else {
+ tree->root=node->left;
+ }
+ node->left->parent=node->parent;
+ node->parent=node->left;
+ node->left=node->left->right;
+ if(node->left){
+ node->left->parent=node;
+ }
+ node->parent->right=node;
+}
+
+/* NULL nodes are black by definition */
+static inline int trbt_get_color(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->rb_color;
+}
+static inline int trbt_get_color_left(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->left==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->left->rb_color;
+}
+static inline int trbt_get_color_right(trbt_node_t *node)
+{
+ if (node==NULL) {
+ return TRBT_BLACK;
+ }
+ if (node->right==NULL) {
+ return TRBT_BLACK;
+ }
+ return node->right->rb_color;
+}
+/* setting a NULL node to black is a nop */
+static inline void trbt_set_color(trbt_node_t *node, int color)
+{
+ if (node == NULL) {
+ return;
+ }
+ node->rb_color = color;
+}
+static inline void trbt_set_color_left(trbt_node_t *node, int color)
+{
+ if (node == NULL || node->left == NULL) {
+ return;
+ }
+ node->left->rb_color = color;
+}
+static inline void trbt_set_color_right(trbt_node_t *node, int color)
+{
+ if (node == NULL || node->right == NULL) {
+ return;
+ }
+ node->right->rb_color = color;
+}
+
+static inline void
+trbt_insert_case5(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ parent->rb_color=TRBT_BLACK;
+ grandparent->rb_color=TRBT_RED;
+ if( (node==parent->left) && (parent==grandparent->left) ){
+ trbt_rotate_right(grandparent);
+ } else {
+ trbt_rotate_left(grandparent);
+ }
+}
+
+static inline void
+trbt_insert_case4(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ grandparent=trbt_parent(parent);
+ if(!grandparent){
+ return;
+ }
+ if( (node==parent->right) && (parent==grandparent->left) ){
+ trbt_rotate_left(parent);
+ node=node->left;
+ } else if( (node==parent->left) && (parent==grandparent->right) ){
+ trbt_rotate_right(parent);
+ node=node->right;
+ }
+ trbt_insert_case5(tree, node);
+}
+
+static inline void
+trbt_insert_case3(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *grandparent;
+ trbt_node_t *parent;
+ trbt_node_t *uncle;
+
+ uncle=trbt_uncle(node);
+ if(uncle && (uncle->rb_color==TRBT_RED)){
+ parent=trbt_parent(node);
+ parent->rb_color=TRBT_BLACK;
+ uncle->rb_color=TRBT_BLACK;
+ grandparent=trbt_grandparent(node);
+ grandparent->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, grandparent);
+ } else {
+ trbt_insert_case4(tree, node);
+ }
+}
+
+static inline void
+trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ /* parent is always non-NULL here */
+ if(parent->rb_color==TRBT_BLACK){
+ return;
+ }
+ trbt_insert_case3(tree, node);
+}
+
+static inline void
+trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ node->rb_color=TRBT_BLACK;
+ return;
+ }
+ trbt_insert_case2(tree, node);
+}
+
+static inline trbt_node_t *
+trbt_sibling(trbt_node_t *node)
+{
+ trbt_node_t *parent;
+
+ parent=trbt_parent(node);
+ if(!parent){
+ return NULL;
+ }
+
+ if (node == parent->left) {
+ return parent->right;
+ } else {
+ return parent->left;
+ }
+}
+
+static inline void
+trbt_delete_case6(trbt_node_t *node)
+{
+ trbt_node_t *sibling, *parent;
+
+ sibling = trbt_sibling(node);
+ parent = trbt_parent(node);
+
+ trbt_set_color(sibling, parent->rb_color);
+ trbt_set_color(parent, TRBT_BLACK);
+ if (node == parent->left) {
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(parent);
+ } else {
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(parent);
+ }
+}
+
+
+static inline void
+trbt_delete_case5(trbt_node_t *node)
+{
+ trbt_node_t *parent, *sibling;
+
+ parent = trbt_parent(node);
+ sibling = trbt_sibling(node);
+ if ( (node == parent->left)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_RED)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_left(sibling, TRBT_BLACK);
+ trbt_rotate_right(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+ if ( (node == parent->right)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_RED)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color_right(sibling, TRBT_BLACK);
+ trbt_rotate_left(sibling);
+ trbt_delete_case6(node);
+ return;
+ }
+
+ trbt_delete_case6(node);
+}
+
+static inline void
+trbt_delete_case4(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_RED)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_set_color(node->parent, TRBT_BLACK);
+ } else {
+ trbt_delete_case5(node);
+ }
+}
+
+static void trbt_delete_case1(trbt_node_t *node);
+
+static inline void
+trbt_delete_case3(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if ( (trbt_get_color(node->parent) == TRBT_BLACK)
+ &&(trbt_get_color(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_left(sibling) == TRBT_BLACK)
+ &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){
+ trbt_set_color(sibling, TRBT_RED);
+ trbt_delete_case1(node->parent);
+ } else {
+ trbt_delete_case4(node);
+ }
+}
+
+static inline void
+trbt_delete_case2(trbt_node_t *node)
+{
+ trbt_node_t *sibling;
+
+ sibling = trbt_sibling(node);
+ if (trbt_get_color(sibling) == TRBT_RED) {
+ trbt_set_color(node->parent, TRBT_RED);
+ trbt_set_color(sibling, TRBT_BLACK);
+ if (node == node->parent->left) {
+ trbt_rotate_left(node->parent);
+ } else {
+ trbt_rotate_right(node->parent);
+ }
+ }
+ trbt_delete_case3(node);
+}
+
+static void
+trbt_delete_case1(trbt_node_t *node)
+{
+ if (!node->parent) {
+ return;
+ } else {
+ trbt_delete_case2(node);
+ }
+}
+
+static void
+delete_node(trbt_node_t *node, bool from_destructor)
+{
+ trbt_node_t *parent, *child, dc;
+ trbt_node_t *temp = NULL;
+
+ /* This node has two child nodes, then just copy the content
+ from the next smaller node with this node and delete the
+ predecessor instead.
+ The predecessor is guaranteed to have at most one child
+ node since its right arm must be NULL
+ (It must be NULL since we are its successor and we are above
+ it in the tree)
+ */
+ if (node->left != NULL && node->right != NULL) {
+ /* This node has two children, just copy the data */
+ /* find the predecessor */
+ temp = node->left;
+
+ while (temp->right != NULL) {
+ temp = temp->right;
+ }
+
+ /* swap the predecessor data and key with the node to
+ be deleted.
+ */
+ node->key32 = temp->key32;
+ node->data = temp->data;
+ /* now we let node hang off the new data */
+ talloc_steal(node->data, node);
+
+ temp->data = NULL;
+ temp->key32 = -1;
+ /* then delete the temp node.
+ this node is guaranteed to have at least one leaf
+ child */
+ delete_node(temp, from_destructor);
+ goto finished;
+ }
+
+
+ /* There is at most one child to this node to be deleted */
+ child = node->left;
+ if (node->right) {
+ child = node->right;
+ }
+
+ /* If the node to be deleted did not have any child at all we
+ create a temporary dummy node for the child and mark it black.
+ Once the delete of the node is finished, we remove this dummy
+ node, which is simple to do since it is guaranteed that it will
+ still not have any children after the delete operation.
+ This is because we don't represent the leaf-nodes as actual nodes
+ in this implementation.
+ */
+ if (!child) {
+ child = &dc;
+ child->tree = node->tree;
+ child->left=NULL;
+ child->right=NULL;
+ child->rb_color=TRBT_BLACK;
+ child->data=NULL;
+ }
+
+ /* replace node with child */
+ parent = trbt_parent(node);
+ if (parent) {
+ if (parent->left == node) {
+ parent->left = child;
+ } else {
+ parent->right = child;
+ }
+ } else {
+ node->tree->root = child;
+ }
+ child->parent = node->parent;
+
+
+ if (node->rb_color == TRBT_BLACK) {
+ if (trbt_get_color(child) == TRBT_RED) {
+ child->rb_color = TRBT_BLACK;
+ } else {
+ trbt_delete_case1(child);
+ }
+ }
+
+ /* If we had to create a temporary dummy node to represent a black
+ leaf child we now has to delete it.
+ This is simple since this dummy node originally had no children
+ and we are guaranteed that it will also not have any children
+ after the node has been deleted and any possible rotations
+ have occurred.
+
+ The only special case is if this was the last node of the tree
+ in which case we have to reset the root to NULL as well.
+ Othervise it is enough to just unlink the child from its new
+ parent.
+ */
+ if (child == &dc) {
+ if (child->parent == NULL) {
+ node->tree->root = NULL;
+ } else if (child == child->parent->left) {
+ child->parent->left = NULL;
+ } else {
+ child->parent->right = NULL;
+ }
+ }
+
+finished:
+ if (!from_destructor) {
+ talloc_free(node);
+ }
+
+ /* if we came from a destructor and temp!=NULL this means we
+ did the node-swap but now the tree still contains the old
+ node which was freed in the destructor. Not good.
+ */
+ if (from_destructor && temp) {
+ temp->key32 = node->key32;
+ temp->rb_color = node->rb_color;
+
+ temp->data = node->data;
+ talloc_steal(temp->data, temp);
+
+ temp->parent = node->parent;
+ if (temp->parent) {
+ if (temp->parent->left == node) {
+ temp->parent->left = temp;
+ } else {
+ temp->parent->right = temp;
+ }
+ }
+
+ temp->left = node->left;
+ if (temp->left) {
+ temp->left->parent = temp;
+ }
+ temp->right = node->right;
+ if (temp->right) {
+ temp->right->parent = temp;
+ }
+
+ if (temp->tree->root == node) {
+ temp->tree->root = temp;
+ }
+ }
+
+ if ( (node->tree->flags & TRBT_AUTOFREE)
+ && (node->tree->root == NULL) ) {
+ talloc_free(node->tree);
+ }
+
+ return;
+}
+
+/*
+ destroy a node and remove it from its tree
+ */
+static int node_destructor(trbt_node_t *node)
+{
+ delete_node(node, true);
+
+ return 0;
+}
+
+static inline trbt_node_t *
+trbt_create_node(trbt_tree_t *tree, trbt_node_t *parent, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=talloc_zero(tree, trbt_node_t);
+ NO_MEMORY_FATAL(node);
+
+ node->tree=tree;
+ node->rb_color=TRBT_BLACK;
+ node->parent=parent;
+ node->left=NULL;
+ node->right=NULL;
+ node->key32=key;
+ node->data = data;
+
+ /* let this node hang off data so that it is removed when
+ data is freed
+ */
+ talloc_steal(data, node);
+ talloc_set_destructor(node, node_destructor);
+
+ return node;
+}
+
+/* insert a new node in the tree.
+ if there is already a node with a matching key in the tree
+ we replace it with the new data and return a pointer to the old data
+ in case the caller wants to take any special action
+ */
+void *
+trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ /* is this the first node ?*/
+ if(!node){
+ node = trbt_create_node(tree, NULL, key, data);
+
+ tree->root=node;
+ return NULL;
+ }
+
+ /* it was not the new root so walk the tree until we find where to
+ * insert this new leaf.
+ */
+ while(1){
+ /* this node already exists, replace data and return the
+ old data
+ */
+ if(key==node->key32){
+ void *old_data;
+
+ old_data = node->data;
+ node->data = data;
+ /* Let the node now be owned by the new data
+ so the node is freed when the enw data is released
+ */
+ talloc_steal(node->data, node);
+
+ return old_data;
+ }
+ if(key<node->key32) {
+ if(!node->left){
+ /* new node to the left */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->left=new_node;
+ node=new_node;
+
+ break;
+ }
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32) {
+ if(!node->right){
+ /* new node to the right */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key, data);
+ node->right=new_node;
+ node=new_node;
+ break;
+ }
+ node=node->right;
+ continue;
+ }
+ }
+
+ /* node will now point to the newly created node */
+ node->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, node);
+ return NULL;
+}
+
+void *
+trbt_lookup32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ while(node){
+ if(key==node->key32){
+ return node->data;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+ return NULL;
+}
+
+
+/* This deletes a node from the tree.
+ Note that this does not release the data that the node points to
+*/
+void
+trbt_delete32(trbt_tree_t *tree, uint32_t key)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ while(node){
+ if(key==node->key32){
+ delete_node(node, false);
+ return;
+ }
+ if(key<node->key32){
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32){
+ node=node->right;
+ continue;
+ }
+ }
+}
+
+
+void
+trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param)
+{
+ trbt_node_t *node;
+
+ node=tree->root;
+
+ /* is this the first node ?*/
+ if(!node){
+ node = trbt_create_node(tree, NULL, key,
+ callback(param, NULL));
+
+ tree->root=node;
+ return;
+ }
+
+ /* it was not the new root so walk the tree until we find where to
+ * insert this new leaf.
+ */
+ while(1){
+ /* this node already exists, replace it
+ */
+ if(key==node->key32){
+ node->data = callback(param, node->data);
+ talloc_steal(node->data, node);
+
+ return;
+ }
+ if(key<node->key32) {
+ if(!node->left){
+ /* new node to the left */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key,
+ callback(param, NULL));
+ node->left=new_node;
+ node=new_node;
+
+ break;
+ }
+ node=node->left;
+ continue;
+ }
+ if(key>node->key32) {
+ if(!node->right){
+ /* new node to the right */
+ trbt_node_t *new_node;
+
+ new_node = trbt_create_node(tree, node, key,
+ callback(param, NULL));
+ node->right=new_node;
+ node=new_node;
+ break;
+ }
+ node=node->right;
+ continue;
+ }
+ }
+
+ /* node will now point to the newly created node */
+ node->rb_color=TRBT_RED;
+ trbt_insert_case1(tree, node);
+ return;
+}
+
+
+struct trbt_array_param {
+ void *(*callback)(void *param, void *data);
+ void *param;
+ uint32_t keylen;
+ uint32_t *key;
+ trbt_tree_t *tree;
+};
+static void *array_insert_callback(void *p, void *data)
+{
+ struct trbt_array_param *param = (struct trbt_array_param *)p;
+ trbt_tree_t *tree = NULL;
+
+
+ /* if keylen has reached 0 we are done and can call the users
+ callback function with the users parameters
+ */
+ if (param->keylen == 0) {
+ return param->callback(param->param, data);
+ }
+
+
+ /* keylen is not zero yes so we must create/process more subtrees */
+ /* if data is NULL this means we did not yet have a subtree here
+ and we must create one.
+ */
+ if (data == NULL) {
+ /* create a new subtree and hang it off our current tree
+ set it to autofree so that the tree is freed when
+ the last node in it has been released.
+ */
+ tree = trbt_create(param->tree, TRBT_AUTOFREE);
+ } else {
+ /* we already have a subtree for this path */
+ tree = (trbt_tree_t *)data;
+ }
+
+ trbt_insertarray32_callback(tree, param->keylen, param->key, param->callback, param->param);
+
+ /* now return either the old tree we got in *data or the new tree
+ we created to our caller so he can update his pointer in his
+ tree to point to our subtree
+ */
+ return tree;
+}
+
+
+
+/* insert into the tree using an array of uint32 as a key */
+void
+trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*cb)(void *param, void *data), void *pm)
+{
+ struct trbt_array_param tap;
+
+ /* keylen-1 and key[1] since the call to insert32 will consume the
+ first part of the key.
+ */
+ tap.callback= cb;
+ tap.param = pm;
+ tap.keylen = keylen-1;
+ tap.key = &key[1];
+ tap.tree = tree;
+
+ trbt_insert32_callback(tree, key[0], array_insert_callback, &tap);
+}
+
+/* lookup the tree using an array of uint32 as a key */
+void *
+trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key)
+{
+ /* if keylen is 1 we can do a regular lookup and return this to the
+ user
+ */
+ if (keylen == 1) {
+ return trbt_lookup32(tree, key[0]);
+ }
+
+ /* we need to lookup the next subtree */
+ tree = trbt_lookup32(tree, key[0]);
+ if (tree == NULL) {
+ /* the key does not exist, return NULL */
+ return NULL;
+ }
+
+ /* now lookup the next part of the key in our new tree */
+ return trbt_lookuparray32(tree, keylen-1, &key[1]);
+}
+
+
+/* traverse a tree starting at node */
+static int
+trbt_traversearray32_node(trbt_node_t *node, uint32_t keylen,
+ int (*callback)(void *param, void *data),
+ void *param)
+{
+ trbt_node_t *left = node->left;
+ trbt_node_t *right = node->right;
+
+ if (left) {
+ int ret;
+ ret = trbt_traversearray32_node(left, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ /* this is the smallest node in this subtree
+ if keylen is 0 this means we can just call the callback
+ otherwise we must pull the next subtree and traverse that one as well
+ */
+ if (keylen == 0) {
+ int ret;
+
+ ret = callback(param, node->data);
+ if (ret != 0) {
+ return ret;
+ }
+ } else {
+ int ret;
+
+ ret = trbt_traversearray32(node->data, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ if (right) {
+ int ret;
+
+ ret = trbt_traversearray32_node(right, keylen, callback, param);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+
+/* traverse the tree using an array of uint32 as a key */
+int
+trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen,
+ int (*callback)(void *param, void *data),
+ void *param)
+{
+ trbt_node_t *node;
+
+ if (tree == NULL) {
+ return 0;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return 0;
+ }
+
+ return trbt_traversearray32_node(node, keylen-1, callback, param);
+}
+
+
+/* this function will return the first node in a tree where
+ the key is an array of uint32_t
+*/
+void *
+trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen)
+{
+ trbt_node_t *node;
+
+ if (keylen < 1) {
+ return NULL;
+ }
+
+ if (tree == NULL) {
+ return NULL;
+ }
+
+ node=tree->root;
+ if (node == NULL) {
+ return NULL;
+ }
+
+ while (node->left) {
+ node = node->left;
+ }
+
+ /* we found our node so return the data */
+ if (keylen == 1) {
+ return node->data;
+ }
+
+ /* we are still traversing subtrees so find the first node in the
+ next level of trees
+ */
+ return trbt_findfirstarray32(node->data, keylen-1);
+}
+
+
+#ifdef TEST_RB_TREE
+static void printtree(trbt_node_t *node, int levels)
+{
+ int i;
+ if(node==NULL)return;
+ printtree(node->left, levels+1);
+
+ for(i=0;i<levels;i++)printf(" ");
+ printf("key:%d COLOR:%s (node:%p parent:%p left:%p right:%p)\n",node->key32,node->rb_color==TRBT_BLACK?"BLACK":"RED", node, node->parent, node->left, node->right);
+
+ printtree(node->right, levels+1);
+ printf("\n");
+}
+
+void print_tree(trbt_tree_t *tree)
+{
+ if(tree->root==NULL){
+ printf("tree is empty\n");
+ return;
+ }
+ printf("---\n");
+ printtree(tree->root->left, 1);
+ printf("root node key:%d COLOR:%s (node:%p left:%p right:%p)\n",tree->root->key32,tree->root->rb_color==TRBT_BLACK?"BLACK":"RED", tree->root, tree->root->left, tree->root->right);
+ printtree(tree->root->right, 1);
+ printf("===\n");
+}
+
+void
+test_tree(void)
+{
+ trbt_tree_t *tree;
+ char *str;
+ int i, ret;
+ int NUM=15;
+ int cnt=0;
+
+ tree=trbt_create(talloc_new(NULL), 0);
+#if 0
+ for(i=0;i<10;i++){
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+ }
+ printf("deleting node %i\n",3);
+ trbt_delete32(tree, 3);
+ print_tree(tree);
+ for(i=0;i<10;i++){
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+exit(0);
+#endif
+ while(++cnt){
+ int i;
+ printf("iteration : %d\n",cnt);
+ i=random()%20;
+ printf("adding node %i\n",i);
+ trbt_insert32(tree, i, NULL);
+ print_tree(tree);
+
+ i=random()%20;
+ printf("deleting node %i\n",i);
+ trbt_delete32(tree, i);
+ print_tree(tree);
+ }
+
+}
+
+#endif /* TEST_RB_TREE */
diff --git a/ctdb/common/rb_tree.h b/ctdb/common/rb_tree.h
new file mode 100644
index 0000000..59e7ccc
--- /dev/null
+++ b/ctdb/common/rb_tree.h
@@ -0,0 +1,90 @@
+/*
+ a talloc based red-black tree
+
+ Copyright (C) Ronnie Sahlberg 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _RB_TREE_H
+#define _RB_TREE_H
+
+#define TRBT_RED 0x00
+#define TRBT_BLACK 0x01
+typedef struct trbt_node {
+ struct trbt_tree *tree;
+ struct trbt_node *parent;
+ struct trbt_node *left;
+ struct trbt_node *right;
+ uint32_t rb_color;
+ uint32_t key32;
+ void *data;
+} trbt_node_t;
+
+typedef struct trbt_tree {
+ trbt_node_t *root;
+/* automatically free the tree when the last node has been deleted */
+#define TRBT_AUTOFREE 0x00000001
+ uint32_t flags;
+} trbt_tree_t;
+
+
+
+/* Create a RB tree */
+trbt_tree_t *trbt_create(TALLOC_CTX *memctx, uint32_t flags);
+
+/* Lookup a node in the tree and return a pointer to data or NULL */
+void *trbt_lookup32(trbt_tree_t *tree, uint32_t key);
+
+/* Insert a new node into the tree. If there was already a node with this
+ key the pointer to the previous data is returned.
+ The tree will talloc_steal() the data inserted into the tree .
+*/
+void *trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data);
+
+/* Insert a new node into the tree.
+ If this is a new node:
+ callback is called with data==NULL and param=param
+ the returned value from the callback is talloc_stolen and inserted in the
+ tree.
+ If a node already exists for this key then:
+ callback is called with data==existing data and param=param
+ the returned value is talloc_stolen and inserted in the tree
+*/
+void trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param);
+
+/* Delete a node from the tree and free all data associated with it */
+void trbt_delete32(trbt_tree_t *tree, uint32_t key);
+
+
+/* insert into the tree with a key based on an array of uint32 */
+void trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*callback)(void *param, void *data), void *param);
+
+/* Lookup a node in the tree with a key based on an array of uint32
+ and return a pointer to data or NULL */
+void *trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key);
+
+/* Traverse a tree with a key based on an array of uint32
+ returns 0 if traverse completed
+ !0 if the traverse was aborted
+
+ If the callback returns !0 the traverse will be aborted
+*/
+int trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen, int (*callback)(void *param, void *data), void *param);
+
+/* Lookup the first node in the tree with a key based on an array of uint32
+ and return a pointer to data or NULL */
+void *trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen);
+
+#endif /* _RB_TREE_H */
diff --git a/ctdb/common/reqid.c b/ctdb/common/reqid.c
new file mode 100644
index 0000000..0e651cf
--- /dev/null
+++ b/ctdb/common/reqid.c
@@ -0,0 +1,89 @@
+/*
+ ctdb request id handling code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <talloc.h>
+
+#include "lib/util/idtree.h"
+#include "reqid.h"
+
+struct reqid_context {
+ struct idr_context *idr;
+ uint32_t lastid;
+};
+
+int reqid_init(TALLOC_CTX *mem_ctx, int start_id,
+ struct reqid_context **result)
+{
+ struct reqid_context *reqid_ctx;
+
+ reqid_ctx = talloc_zero(mem_ctx, struct reqid_context);
+ if (reqid_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ reqid_ctx->idr = idr_init(reqid_ctx);
+ if (reqid_ctx->idr == NULL) {
+ talloc_free(reqid_ctx);
+ return ENOMEM;
+ }
+
+ if (start_id <= 0) {
+ start_id = 1;
+ }
+ reqid_ctx->lastid = start_id;
+
+ *result = reqid_ctx;
+ return 0;
+}
+
+uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data)
+{
+ int id;
+
+ id = idr_get_new_above(reqid_ctx->idr, private_data,
+ reqid_ctx->lastid+1, INT_MAX);
+ if (id < 0) {
+ /* reqid wrapped */
+ id = idr_get_new(reqid_ctx->idr, private_data, INT_MAX);
+ }
+ if (id == -1) {
+ return REQID_INVALID;
+ }
+
+ reqid_ctx->lastid = id;
+ return id;
+}
+
+void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid)
+{
+ return idr_find(reqid_ctx->idr, reqid);
+}
+
+int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid)
+{
+ int ret;
+
+ ret = idr_remove(reqid_ctx->idr, reqid);
+ if (ret < 0) {
+ return ENOENT;
+ }
+ return 0;
+}
diff --git a/ctdb/common/reqid.h b/ctdb/common/reqid.h
new file mode 100644
index 0000000..d6d3936
--- /dev/null
+++ b/ctdb/common/reqid.h
@@ -0,0 +1,89 @@
+/*
+ Request id database
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_REQID_H__
+#define __CTDB_REQID_H__
+
+#include <talloc.h>
+
+/**
+ * @file reqid.h
+ *
+ * @brief Request id database
+ *
+ * CTDB tracks messages using request id. CTDB stores client state for each
+ * request id to process the replies correctly.
+ */
+
+/**
+ * @brief Abstract struct to store request id database
+ */
+struct reqid_context;
+
+#define REQID_INVALID 0xffffffff
+
+/**
+ * @brief Initialize request id database
+ *
+ * This returns a new request id context. Freeing this context will free
+ * all the memory associated with request id database.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] start_id The initial id
+ * @param[out] result The new talloc_context structure
+ * @return 0 on success, errno on failure
+ */
+int reqid_init(TALLOC_CTX *mem_ctx, int start_id,
+ struct reqid_context **result);
+
+/**
+ * @brief Generate new request id and associate given data with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] private_data The state to associate with new request id
+ * @return new request id, REQID_INVALID on failure
+ */
+uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data);
+
+#ifdef DOXYGEN
+/**
+ * @brief Fetch the data associated with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] reqid The request id
+ * @param[in] type The data type of the stored data
+ * @return the data stored for the reqid, NULL on failure
+ */
+type *reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid, #type);
+#else
+void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid);
+#define reqid_find(ctx, reqid, type) \
+ (type *)talloc_check_name(_reqid_find(ctx, reqid), #type)
+#endif
+
+/**
+ * @brief Remove the data associated with the request id
+ *
+ * @param[in] reqid_ctx The request id context
+ * @param[in] reqid The request id
+ * @return 0 on success, errno on failure
+ */
+int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid);
+
+#endif /* __CTDB_REQID_H__ */
diff --git a/ctdb/common/run_event.c b/ctdb/common/run_event.c
new file mode 100644
index 0000000..d283664
--- /dev/null
+++ b/ctdb/common/run_event.c
@@ -0,0 +1,829 @@
+/*
+ Run scripts in a directory with specific event arguments
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/dir.h"
+#include "system/glob.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/run_proc.h"
+#include "common/event_script.h"
+
+#include "common/run_event.h"
+
+/*
+ * Utility functions
+ */
+
+static int get_script_list(TALLOC_CTX *mem_ctx,
+ const char *script_dir,
+ struct run_event_script_list **out)
+{
+ struct event_script_list *s_list;
+ struct run_event_script_list *script_list;
+ unsigned int i;
+ int ret;
+
+ ret = event_script_get_list(mem_ctx, script_dir, &s_list);
+ if (ret != 0) {
+ if (ret == ENOENT) {
+ D_WARNING("event script dir %s removed\n", script_dir);
+ } else {
+ D_WARNING("failed to get script list for %s, ret=%d\n",
+ script_dir, ret);
+ }
+ return ret;
+ }
+
+ if (s_list->num_scripts == 0) {
+ *out = NULL;
+ talloc_free(s_list);
+ return 0;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct run_event_script_list);
+ if (script_list == NULL) {
+ talloc_free(s_list);
+ return ENOMEM;
+ }
+
+ script_list->num_scripts = s_list->num_scripts;
+ script_list->script = talloc_zero_array(script_list,
+ struct run_event_script,
+ script_list->num_scripts);
+ if (script_list->script == NULL) {
+ talloc_free(s_list);
+ talloc_free(script_list);
+ return ENOMEM;
+ }
+
+ for (i = 0; i < s_list->num_scripts; i++) {
+ struct event_script *s = s_list->script[i];
+ struct run_event_script *script = &script_list->script[i];
+
+ script->name = talloc_steal(script_list->script, s->name);
+
+ if (! s->enabled) {
+ script->summary = -ENOEXEC;
+ }
+ }
+
+ talloc_free(s_list);
+ *out = script_list;
+ return 0;
+}
+
+static int script_args(TALLOC_CTX *mem_ctx, const char *event_str,
+ const char *arg_str, const char ***out)
+{
+ const char **argv;
+ size_t argc;
+ size_t len;
+
+ /* Preallocate argv array to avoid reallocation. */
+ len = 8;
+ argv = talloc_array(mem_ctx, const char *, len);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+
+ argv[0] = NULL; /* script name */
+ argv[1] = event_str;
+ argc = 2;
+
+ if (arg_str != NULL) {
+ char *str, *t, *tok;
+
+ str = talloc_strdup(argv, arg_str);
+ if (str == NULL) {
+ return ENOMEM;
+ }
+
+ t = str;
+ while ((tok = strtok(t, " ")) != NULL) {
+ argv[argc] = talloc_strdup(argv, tok);
+ if (argv[argc] == NULL) {
+ talloc_free(argv);
+ return ENOMEM;
+ }
+ argc += 1;
+ if (argc >= len) {
+ argv = talloc_realloc(mem_ctx, argv,
+ const char *, len + 8);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+ len += 8;
+ }
+ t = NULL;
+ }
+
+ talloc_free(str);
+ }
+
+ argv[argc] = NULL;
+ /* argc += 1 */
+
+ *out = argv;
+ return 0;
+}
+
+struct run_event_context {
+ struct run_proc_context *run_proc_ctx;
+ const char *script_dir;
+ const char *debug_prog;
+ bool debug_running;
+
+ struct tevent_queue *queue;
+ struct tevent_req *current_req;
+ bool monitor_running;
+};
+
+
+int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx,
+ const char *script_dir, const char *debug_prog,
+ struct run_event_context **out)
+{
+ struct run_event_context *run_ctx;
+ struct stat st;
+ int ret;
+
+ run_ctx = talloc_zero(mem_ctx, struct run_event_context);
+ if (run_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ run_ctx->run_proc_ctx = run_proc_ctx;
+
+ ret = stat(script_dir, &st);
+ if (ret != 0) {
+ ret = errno;
+ talloc_free(run_ctx);
+ return ret;
+ }
+
+ if (! S_ISDIR(st.st_mode)) {
+ talloc_free(run_ctx);
+ return ENOTDIR;
+ }
+
+ run_ctx->script_dir = talloc_strdup(run_ctx, script_dir);
+ if (run_ctx->script_dir == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ if (debug_prog != NULL) {
+ run_ctx->debug_prog = talloc_strdup(run_ctx, debug_prog);
+ if (run_ctx->debug_prog == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+ }
+
+ run_ctx->debug_running = false;
+
+ run_ctx->queue = tevent_queue_create(run_ctx, "run event queue");
+ if (run_ctx->queue == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ run_ctx->monitor_running = false;
+
+ *out = run_ctx;
+ return 0;
+}
+
+static struct run_proc_context *
+run_event_run_proc_context(struct run_event_context *run_ctx)
+{
+ return run_ctx->run_proc_ctx;
+}
+
+static const char *run_event_script_dir(struct run_event_context *run_ctx)
+{
+ return run_ctx->script_dir;
+}
+
+static const char *run_event_debug_prog(struct run_event_context *run_ctx)
+{
+ return run_ctx->debug_prog;
+}
+
+static struct tevent_queue *run_event_queue(struct run_event_context *run_ctx)
+{
+ return run_ctx->queue;
+}
+
+static void run_event_start_running(struct run_event_context *run_ctx,
+ struct tevent_req *req, bool is_monitor)
+{
+ run_ctx->current_req = req;
+ run_ctx->monitor_running = is_monitor;
+}
+
+static void run_event_stop_running(struct run_event_context *run_ctx)
+{
+ run_ctx->current_req = NULL;
+ run_ctx->monitor_running = false;
+}
+
+static struct tevent_req *run_event_get_running(
+ struct run_event_context *run_ctx,
+ bool *is_monitor)
+{
+ *is_monitor = run_ctx->monitor_running;
+ return run_ctx->current_req;
+}
+
+static int run_event_script_status(struct run_event_script *script)
+{
+ int ret;
+
+ if (script->result.sig > 0) {
+ ret = -EINTR;
+ } else if (script->result.err > 0) {
+ if (script->result.err == EACCES) {
+ /* Map EACCESS to ENOEXEC */
+ ret = -ENOEXEC;
+ } else {
+ ret = -script->result.err;
+ }
+ } else {
+ ret = script->result.status;
+ }
+
+ return ret;
+}
+
+int run_event_list(struct run_event_context *run_ctx,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output)
+{
+ struct event_script_list *s_list = NULL;
+ struct run_event_script_list *script_list = NULL;
+ unsigned int i;
+ int ret;
+
+ ret = event_script_get_list(mem_ctx,
+ run_event_script_dir(run_ctx),
+ &s_list);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (s_list->num_scripts == 0) {
+ *output = NULL;
+ talloc_free(s_list);
+ return 0;
+ }
+
+ script_list = talloc_zero(mem_ctx, struct run_event_script_list);
+ if (script_list == NULL) {
+ return ENOMEM;
+ }
+
+ script_list->num_scripts = s_list->num_scripts;
+ script_list->script = talloc_zero_array(script_list,
+ struct run_event_script,
+ script_list->num_scripts);
+ if (script_list->script == NULL) {
+ talloc_free(s_list);
+ talloc_free(script_list);
+ return ENOMEM;
+ }
+
+ for (i=0; i < s_list->num_scripts; i++) {
+ struct event_script *s = s_list->script[i];
+ struct run_event_script *script = &script_list->script[i];
+
+ script->name = talloc_steal(script_list->script, s->name);
+
+ if (! s->enabled) {
+ script->summary = -ENOEXEC;
+ }
+ }
+
+
+ talloc_free(s_list);
+ *output = script_list;
+ return 0;
+}
+
+int run_event_script_enable(struct run_event_context *run_ctx,
+ const char *script_name)
+{
+ return event_script_chmod(run_event_script_dir(run_ctx),
+ script_name,
+ true);
+}
+
+int run_event_script_disable(struct run_event_context *run_ctx,
+ const char *script_name)
+{
+ return event_script_chmod(run_event_script_dir(run_ctx),
+ script_name,
+ false);
+}
+
+/*
+ * Run debug program to diagnose hung scripts
+ */
+
+static int debug_args(TALLOC_CTX *mem_ctx, const char *path,
+ const char *event_str, pid_t pid, const char ***out)
+{
+ const char **argv;
+
+ argv = talloc_array(mem_ctx, const char *, 4);
+ if (argv == NULL) {
+ return ENOMEM;
+ }
+
+ argv[0] = path;
+ argv[1] = talloc_asprintf(argv, "%d", pid);
+ argv[2] = event_str;
+ if (argv[1] == NULL) {
+ talloc_free(argv);
+ return ENOMEM;
+ }
+ argv[3] = NULL;
+
+ *out = argv;
+ return 0;
+}
+
+static void debug_log(int loglevel, const char *output, const char *log_prefix)
+{
+ char *line, *s;
+
+ s = strdup(output);
+ if (s == NULL) {
+ DEBUG(loglevel, ("%s: %s\n", log_prefix, output));
+ return;
+ }
+
+ line = strtok(s, "\n");
+ while (line != NULL) {
+ DEBUG(loglevel, ("%s: %s\n", log_prefix, line));
+ line = strtok(NULL, "\n");
+ }
+ free(s);
+}
+
+struct run_debug_state {
+ struct run_event_context *run_ctx;
+ pid_t pid;
+};
+
+static void run_debug_done(struct tevent_req *subreq);
+
+static struct tevent_req *run_debug_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str, pid_t pid)
+{
+ struct tevent_req *req, *subreq;
+ struct run_debug_state *state;
+ const char **argv;
+ const char *debug_prog;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_debug_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->run_ctx = run_ctx;
+ state->pid = pid;
+
+ debug_prog = run_event_debug_prog(run_ctx);
+ if (debug_prog == NULL) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (run_ctx->debug_running) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (pid == -1) {
+ D_DEBUG("Event script terminated, nothing to debug\n");
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ ret = debug_args(state, debug_prog, event_str, pid, &argv);
+ if (ret != 0) {
+ D_ERR("debug_args() failed\n");
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ D_DEBUG("Running debug %s with args \"%s %s\"\n",
+ debug_prog, argv[1], argv[2]);
+
+ subreq = run_proc_send(state, ev, run_event_run_proc_context(run_ctx),
+ debug_prog, argv, -1, tevent_timeval_zero());
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, run_debug_done, req);
+
+ run_ctx->debug_running = true;
+
+ talloc_free(argv);
+ return req;
+}
+
+static void run_debug_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_debug_state *state = tevent_req_data(
+ req, struct run_debug_state);
+ char *output;
+ int ret;
+ bool status;
+
+ state->run_ctx->debug_running = false;
+
+ status = run_proc_recv(subreq, &ret, NULL, NULL, state, &output);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("Running debug failed, ret=%d\n", ret);
+ }
+
+ /* Log output */
+ if (output != NULL) {
+ debug_log(DEBUG_ERR, output, "event_debug");
+ talloc_free(output);
+ }
+
+ kill(-state->pid, SIGTERM);
+ tevent_req_done(req);
+}
+
+static bool run_debug_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Run a single event
+ */
+
+struct run_event_state {
+ struct tevent_context *ev;
+ struct run_event_context *run_ctx;
+ const char *event_str;
+ const char *arg_str;
+ struct timeval timeout;
+ bool continue_on_failure;
+
+ struct run_event_script_list *script_list;
+ const char **argv;
+ struct tevent_req *script_subreq;
+ unsigned int index;
+ bool cancelled;
+};
+
+static void run_event_cancel(struct tevent_req *req);
+static void run_event_trigger(struct tevent_req *req, void *private_data);
+static struct tevent_req *run_event_run_script(struct tevent_req *req);
+static void run_event_next_script(struct tevent_req *subreq);
+static void run_event_debug(struct tevent_req *req, pid_t pid);
+static void run_event_debug_done(struct tevent_req *subreq);
+
+struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str,
+ const char *arg_str,
+ struct timeval timeout,
+ bool continue_on_failure)
+{
+ struct tevent_req *req, *current_req;
+ struct run_event_state *state;
+ bool monitor_running, status;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_event_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->run_ctx = run_ctx;
+ state->event_str = talloc_strdup(state, event_str);
+ if (tevent_req_nomem(state->event_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+ if (arg_str != NULL) {
+ state->arg_str = talloc_strdup(state, arg_str);
+ if (tevent_req_nomem(state->arg_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+ state->timeout = timeout;
+ state->continue_on_failure = continue_on_failure;
+ state->cancelled = false;
+
+ state->script_list = talloc_zero(state, struct run_event_script_list);
+ if (tevent_req_nomem(state->script_list, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ /*
+ * If monitor event is running,
+ * cancel the running monitor event and run new event
+ *
+ * If any other event is running,
+ * if new event is monitor, cancel that event
+ * else add new event to the queue
+ */
+
+ current_req = run_event_get_running(run_ctx, &monitor_running);
+ if (current_req != NULL) {
+ if (monitor_running) {
+ run_event_cancel(current_req);
+ } else if (strcmp(event_str, "monitor") == 0) {
+ state->script_list->summary = -ECANCELED;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ status = tevent_queue_add(run_event_queue(run_ctx), ev, req,
+ run_event_trigger, NULL);
+ if (! status) {
+ tevent_req_error(req, ENOMEM);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void run_event_cancel(struct tevent_req *req)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+
+ run_event_stop_running(state->run_ctx);
+
+ state->script_list->summary = -ECANCELED;
+ state->cancelled = true;
+
+ TALLOC_FREE(state->script_subreq);
+
+ tevent_req_done(req);
+}
+
+static void run_event_trigger(struct tevent_req *req, void *private_data)
+{
+ struct tevent_req *subreq;
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script_list *script_list;
+ int ret;
+ bool is_monitor = false;
+
+ D_DEBUG("Running event %s with args \"%s\"\n", state->event_str,
+ state->arg_str == NULL ? "(null)" : state->arg_str);
+
+ ret = get_script_list(state,
+ run_event_script_dir(state->run_ctx),
+ &script_list);
+ if (ret != 0) {
+ D_ERR("get_script_list() failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* No scripts */
+ if (script_list == NULL || script_list->num_scripts == 0) {
+ tevent_req_done(req);
+ return;
+ }
+
+ talloc_free(state->script_list);
+ state->script_list = script_list;
+
+ ret = script_args(state, state->event_str, state->arg_str,
+ &state->argv);
+ if (ret != 0) {
+ D_ERR("script_args() failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->index = 0;
+
+ subreq = run_event_run_script(req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_next_script, req);
+
+ state->script_subreq = subreq;
+
+ if (strcmp(state->event_str, "monitor") == 0) {
+ is_monitor = true;
+ }
+ run_event_start_running(state->run_ctx, req, is_monitor);
+}
+
+static struct tevent_req *run_event_run_script(struct tevent_req *req)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script *script;
+ struct tevent_req *subreq;
+ char *path;
+
+ script = &state->script_list->script[state->index];
+
+ path = talloc_asprintf(state, "%s/%s.script",
+ run_event_script_dir(state->run_ctx),
+ script->name);
+ if (path == NULL) {
+ return NULL;
+ }
+
+ state->argv[0] = script->name;
+ script->begin = tevent_timeval_current();
+
+ D_DEBUG("Running %s with args \"%s %s\"\n",
+ path, state->argv[0], state->argv[1]);
+
+ subreq = run_proc_send(state, state->ev,
+ run_event_run_proc_context(state->run_ctx),
+ path, state->argv, -1, state->timeout);
+
+ talloc_free(path);
+
+ return subreq;
+}
+
+static void run_event_next_script(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct run_event_script *script;
+ pid_t pid;
+ int ret;
+ bool status;
+
+ script = &state->script_list->script[state->index];
+ script->end = tevent_timeval_current();
+
+ status = run_proc_recv(subreq, &ret, &script->result, &pid,
+ state->script_list, &script->output);
+ TALLOC_FREE(subreq);
+ state->script_subreq = NULL;
+ if (! status) {
+ D_ERR("run_proc failed for %s, ret=%d\n", script->name, ret);
+ run_event_stop_running(state->run_ctx);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ if (state->cancelled) {
+ return;
+ }
+
+ /* Log output */
+ if (script->output != NULL) {
+ debug_log(DEBUG_ERR, script->output, script->name);
+ }
+
+ D_DEBUG("Script %s finished sig=%d, err=%d, status=%d\n",
+ script->name, script->result.sig, script->result.err,
+ script->result.status);
+
+
+ /* If a script fails, stop running */
+ script->summary = run_event_script_status(script);
+ if (script->summary != 0 && script->summary != -ENOEXEC) {
+ state->script_list->summary = script->summary;
+
+ if (! state->continue_on_failure) {
+ state->script_list->num_scripts = state->index + 1;
+
+ if (script->summary == -ETIMEDOUT && pid != -1) {
+ run_event_debug(req, pid);
+ }
+ D_NOTICE("%s event %s\n", state->event_str,
+ (script->summary == -ETIMEDOUT) ?
+ "timed out" :
+ "failed");
+ run_event_stop_running(state->run_ctx);
+ tevent_req_done(req);
+ return;
+ }
+ }
+
+ state->index += 1;
+
+ /* All scripts executed */
+ if (state->index >= state->script_list->num_scripts) {
+ run_event_stop_running(state->run_ctx);
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = run_event_run_script(req);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_next_script, req);
+
+ state->script_subreq = subreq;
+}
+
+static void run_event_debug(struct tevent_req *req, pid_t pid)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ struct tevent_req *subreq;
+
+ /* Debug script is run with ectx as the memory context */
+ subreq = run_debug_send(state->run_ctx, state->ev, state->run_ctx,
+ state->event_str, pid);
+ if (subreq == NULL) {
+ /* If run debug fails, it's not an error */
+ D_NOTICE("Failed to run event debug\n");
+ return;
+ }
+ tevent_req_set_callback(subreq, run_event_debug_done, NULL);
+}
+
+static void run_event_debug_done(struct tevent_req *subreq)
+{
+ int ret = 0;
+ bool status;
+
+ status = run_debug_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_NOTICE("run_debug() failed, ret=%d\n", ret);
+ }
+}
+
+bool run_event_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **script_list)
+{
+ struct run_event_state *state = tevent_req_data(
+ req, struct run_event_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (script_list != NULL) {
+ *script_list = talloc_steal(mem_ctx, state->script_list);
+ }
+ return true;
+}
+
diff --git a/ctdb/common/run_event.h b/ctdb/common/run_event.h
new file mode 100644
index 0000000..f53bca3
--- /dev/null
+++ b/ctdb/common/run_event.h
@@ -0,0 +1,150 @@
+/*
+ Run scripts in a directory with specific event arguments
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_RUN_EVENT_H__
+#define __CTDB_RUN_EVENT_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/run_proc.h"
+
+/**
+ * @file run_event.h
+ *
+ * @brief Run scripts in a directory with specific event arguments.
+ *
+ * This abstraction allows one to execute multiple scripts in a directory
+ * (specified by script_dir) with given event and arguments.
+ *
+ * At one time, only one event can be run. Multiple run_event calls
+ * will cause events to be queued up. They will be run sequentially.
+ *
+ * A "monitor" event is special and has special semantics.
+ *
+ * If a monitor event is running and another event is scheduled, the
+ * currently running monitor event is cancelled.
+ *
+ * If an event (not monitor) is running and monitor event is scheduled,
+ * then the monior event will be cancelled immediately.
+ */
+
+/**
+ * @brief The run process context
+ */
+struct run_event_context;
+
+struct run_event_script {
+ char *name;
+ struct timeval begin, end;
+ struct run_proc_result result;
+ int summary;
+ char *output;
+};
+
+struct run_event_script_list {
+ uint32_t num_scripts;
+ struct run_event_script *script;
+ int summary;
+};
+
+
+/**
+ * @brief Initialize the context for running events
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] script_dir Directory containing script to run
+ * @param[in] debug_prog Path of a program to run if a script hangs
+ * @param[out] result New run_event context
+ * @return 0 on success, errno on error
+ */
+int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx,
+ const char *script_dir, const char *debug_prog,
+ struct run_event_context **result);
+
+/**
+ * @brief Get a list of scripts
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output List of valid scripts
+ * @return 0 on success, errno on failure
+ */
+int run_event_list(struct run_event_context *run_ctx,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output);
+
+/**
+ * @brief Enable a script
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] script_name Name of the script to enable
+ * @return 0 on success, errno on failure
+ */
+int run_event_script_enable(struct run_event_context *run_ctx,
+ const char *script_name);
+
+/**
+ * @brief Disable a script
+ *
+ * @param[in] run_ctx Run_event context
+ * @param[in] script_name Name of the script to disable
+ * @return 0 on success, errno on failure
+ */
+int run_event_script_disable(struct run_event_context *run_ctx,
+ const char *script_name);
+
+/**
+ * @brief Async computation start to run an event
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] run_ctx Run_event context
+ * @param[in] event_str The event argument to the script
+ * @param[in] arg_str Event arguments to the script
+ * @param[in] timeout How long to wait for execution
+ * @param[in] continue_on_failure Whether to continue to run events on failure
+ * @return new tevent request, or NULL on failure
+ *
+ * arg_str contains optional arguments for an event.
+ */
+struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_event_context *run_ctx,
+ const char *event_str,
+ const char *arg_str,
+ struct timeval timeout,
+ bool continue_on_failure);
+
+/**
+ * @brief Async computation end to run an event
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output List of scripts executed and their status
+ * @return true on success, false on failure
+ */
+bool run_event_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct run_event_script_list **output);
+
+#endif /* __CTDB_RUN_EVENT_H__ */
+
diff --git a/ctdb/common/run_proc.c b/ctdb/common/run_proc.c
new file mode 100644
index 0000000..84bc343
--- /dev/null
+++ b/ctdb/common/run_proc.c
@@ -0,0 +1,503 @@
+/*
+ Run a child process and collect the output
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/tevent_unix.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/blocking.h"
+#include "lib/util/dlinklist.h"
+
+#include "common/run_proc.h"
+
+/*
+ * Process abstraction
+ */
+
+struct run_proc_context;
+
+struct proc_context {
+ struct proc_context *prev, *next;
+
+ pid_t pid;
+
+ int fd;
+ struct tevent_fd *fde;
+
+ char *output;
+ struct run_proc_result result;
+
+ struct tevent_req *req;
+};
+
+static int proc_destructor(struct proc_context *proc);
+
+static struct proc_context *proc_new(TALLOC_CTX *mem_ctx,
+ struct run_proc_context *run_ctx)
+{
+ struct proc_context *proc;
+
+ proc = talloc_zero(mem_ctx, struct proc_context);
+ if (proc == NULL) {
+ return NULL;
+ }
+
+ proc->pid = -1;
+ proc->fd = -1;
+
+ talloc_set_destructor(proc, proc_destructor);
+
+ return proc;
+}
+
+static void run_proc_kill(struct tevent_req *req);
+
+static int proc_destructor(struct proc_context *proc)
+{
+ if (proc->req != NULL) {
+ run_proc_kill(proc->req);
+ }
+
+ talloc_free(proc->fde);
+ if (proc->pid != -1) {
+ kill(-proc->pid, SIGKILL);
+ }
+
+ return 0;
+}
+
+static void proc_read_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data);
+
+static int proc_start(struct proc_context *proc, struct tevent_context *ev,
+ const char *path, const char **argv, int stdin_fd)
+{
+ int fd[2];
+ int ret;
+
+ ret = pipe(fd);
+ if (ret != 0) {
+ return ret;
+ }
+
+ proc->pid = fork();
+ if (proc->pid == -1) {
+ ret = errno;
+ close(fd[0]);
+ close(fd[1]);
+ return ret;
+ }
+
+ if (proc->pid == 0) {
+ close(fd[0]);
+
+ ret = dup2(fd[1], STDOUT_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+ ret = dup2(fd[1], STDERR_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+
+ close(fd[1]);
+
+ if (stdin_fd != -1) {
+ ret = dup2(stdin_fd, STDIN_FILENO);
+ if (ret == -1) {
+ exit(64 + errno);
+ }
+ }
+
+ ret = setpgid(0, 0);
+ if (ret != 0) {
+ exit(64 + errno);
+ }
+
+ ret = execv(path, discard_const(argv));
+ if (ret != 0) {
+ exit(64 + errno);
+ }
+
+ exit(64 + ENOEXEC);
+ }
+
+ close(fd[1]);
+
+ proc->fd = fd[0];
+ proc->fde = tevent_add_fd(ev, proc, fd[0], TEVENT_FD_READ,
+ proc_read_handler, proc);
+ if (proc->fde == NULL) {
+ close(fd[0]);
+ return ENOMEM;
+ }
+
+ tevent_fd_set_auto_close(proc->fde);
+
+ return 0;
+}
+
+static void proc_read_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct proc_context *proc = talloc_get_type_abort(
+ private_data, struct proc_context);
+ size_t offset;
+ ssize_t nread;
+ int len = 0;
+ int ret;
+
+ ret = ioctl(proc->fd, FIONREAD, &len);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ if (len == 0) {
+ /* pipe closed */
+ goto close;
+ }
+
+ offset = (proc->output == NULL) ? 0 : strlen(proc->output);
+
+ proc->output = talloc_realloc(proc, proc->output, char, offset+len+1);
+ if (proc->output == NULL) {
+ goto fail;
+ }
+
+ nread = sys_read(proc->fd, proc->output + offset, len);
+ if (nread == -1) {
+ goto fail;
+ }
+ proc->output[offset+nread] = '\0';
+ return;
+
+fail:
+ if (proc->pid != -1) {
+ kill(-proc->pid, SIGKILL);
+ proc->pid = -1;
+ }
+close:
+ TALLOC_FREE(proc->fde);
+ proc->fd = -1;
+}
+
+
+/*
+ * Run proc abstraction
+ */
+
+struct run_proc_context {
+ struct tevent_context *ev;
+ struct tevent_signal *se;
+ struct proc_context *plist;
+};
+
+static void run_proc_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data);
+static int run_proc_context_destructor(struct run_proc_context *run_ctx);
+static void run_proc_done(struct tevent_req *req);
+
+int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_proc_context **result)
+{
+ struct run_proc_context *run_ctx;
+
+ run_ctx = talloc_zero(mem_ctx, struct run_proc_context);
+ if (run_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ run_ctx->ev = ev;
+ run_ctx->se = tevent_add_signal(ev, run_ctx, SIGCHLD, 0,
+ run_proc_signal_handler, run_ctx);
+ if (run_ctx->se == NULL) {
+ talloc_free(run_ctx);
+ return ENOMEM;
+ }
+
+ talloc_set_destructor(run_ctx, run_proc_context_destructor);
+
+ *result = run_ctx;
+ return 0;
+}
+
+static void run_proc_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct run_proc_context *run_ctx = talloc_get_type_abort(
+ private_data, struct run_proc_context);
+ struct proc_context *proc;
+ pid_t pid = -1;
+ int status;
+
+again:
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == -1) {
+ return;
+ }
+
+ if (pid == 0) {
+ return;
+ }
+
+ for (proc = run_ctx->plist; proc != NULL; proc = proc->next) {
+ if (proc->pid == pid) {
+ break;
+ }
+ }
+
+ if (proc == NULL) {
+ /* unknown process */
+ goto again;
+ }
+
+ /* Mark the process as terminated */
+ proc->pid = -1;
+
+ /* Update process status */
+ if (WIFEXITED(status)) {
+ int pstatus = WEXITSTATUS(status);
+ if (WIFSIGNALED(status)) {
+ proc->result.sig = WTERMSIG(status);
+ } else if (pstatus >= 64 && pstatus < 255) {
+ proc->result.err = pstatus-64;
+ } else {
+ proc->result.status = pstatus;
+ }
+ } else if (WIFSIGNALED(status)) {
+ proc->result.sig = WTERMSIG(status);
+ }
+
+ /* Confirm that all data has been read from the pipe */
+ if (proc->fd != -1) {
+ proc_read_handler(ev, proc->fde, 0, proc);
+ TALLOC_FREE(proc->fde);
+ proc->fd = -1;
+ }
+
+ DLIST_REMOVE(run_ctx->plist, proc);
+
+ /* Active run_proc request */
+ if (proc->req != NULL) {
+ run_proc_done(proc->req);
+ } else {
+ talloc_free(proc);
+ }
+
+ goto again;
+}
+
+static int run_proc_context_destructor(struct run_proc_context *run_ctx)
+{
+ struct proc_context *proc;
+
+ /* Get rid of signal handler */
+ TALLOC_FREE(run_ctx->se);
+
+ /* Kill any pending processes */
+ while ((proc = run_ctx->plist) != NULL) {
+ DLIST_REMOVE(run_ctx->plist, proc);
+ talloc_free(proc);
+ }
+
+ return 0;
+}
+
+struct run_proc_state {
+ struct tevent_context *ev;
+ struct run_proc_context *run_ctx;
+ struct proc_context *proc;
+
+ struct run_proc_result result;
+ char *output;
+ pid_t pid;
+};
+
+static int run_proc_state_destructor(struct run_proc_state *state);
+static void run_proc_timedout(struct tevent_req *subreq);
+
+struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_proc_context *run_ctx,
+ const char *path, const char **argv,
+ int stdin_fd, struct timeval timeout)
+{
+ struct tevent_req *req;
+ struct run_proc_state *state;
+ struct stat st;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct run_proc_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->run_ctx = run_ctx;
+ state->pid = -1;
+
+ ret = stat(path, &st);
+ if (ret != 0) {
+ state->result.err = errno;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ if (! (st.st_mode & S_IXUSR)) {
+ state->result.err = EACCES;
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ state->proc = proc_new(run_ctx, run_ctx);
+ if (tevent_req_nomem(state->proc, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->proc->req = req;
+ DLIST_ADD(run_ctx->plist, state->proc);
+
+ ret = proc_start(state->proc, ev, path, argv, stdin_fd);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ talloc_set_destructor(state, run_proc_state_destructor);
+
+ if (! tevent_timeval_is_zero(&timeout)) {
+ struct tevent_req *subreq;
+
+ subreq = tevent_wakeup_send(state, ev, timeout);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, run_proc_timedout, req);
+ }
+
+ return req;
+}
+
+static int run_proc_state_destructor(struct run_proc_state *state)
+{
+ /* Do not get rid of the child process if timeout has occurred */
+ if ((state->proc != NULL) && (state->proc->req != NULL)) {
+ state->proc->req = NULL;
+ DLIST_REMOVE(state->run_ctx->plist, state->proc);
+ TALLOC_FREE(state->proc);
+ }
+
+ return 0;
+}
+
+static void run_proc_done(struct tevent_req *req)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+
+ state->proc->req = NULL;
+
+ state->result = state->proc->result;
+ if (state->proc->output != NULL) {
+ state->output = talloc_move(state, &state->proc->output);
+ }
+ talloc_steal(state, state->proc);
+
+ tevent_req_done(req);
+}
+
+static void run_proc_kill(struct tevent_req *req)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+
+ state->proc->req = NULL;
+ state->proc = NULL;
+
+ state->result.sig = SIGKILL;
+
+ tevent_req_done(req);
+}
+
+static void run_proc_timedout(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+ bool status;
+
+ state->proc->req = NULL;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ state->result.err = ETIMEDOUT;
+ if (state->proc->output != NULL) {
+ state->output = talloc_move(state, &state->proc->output);
+ }
+ state->pid = state->proc->pid;
+
+ tevent_req_done(req);
+}
+
+bool run_proc_recv(struct tevent_req *req, int *perr,
+ struct run_proc_result *result, pid_t *pid,
+ TALLOC_CTX *mem_ctx, char **output)
+{
+ struct run_proc_state *state = tevent_req_data(
+ req, struct run_proc_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (result != NULL) {
+ *result = state->result;
+ }
+
+ if (pid != NULL) {
+ *pid = state->pid;
+ }
+
+ if (output != NULL) {
+ *output = talloc_move(mem_ctx, &state->output);
+ }
+
+ return true;
+}
diff --git a/ctdb/common/run_proc.h b/ctdb/common/run_proc.h
new file mode 100644
index 0000000..7b06dad
--- /dev/null
+++ b/ctdb/common/run_proc.h
@@ -0,0 +1,100 @@
+/*
+ Run a child process and collect the output
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_RUN_PROC_H__
+#define __CTDB_RUN_PROC_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file run_proc.h
+ *
+ * @brief Run a process and capture the output
+ *
+ * This abstraction allows one to execute scripts with argumunts.
+ */
+
+/**
+ * @brief The run process context
+ */
+struct run_proc_context;
+
+/**
+ * @brief The exit status structure
+ *
+ * If the process is terminated due to a signal, sig is set.
+ * If the process is terminated due to an error, err is set.
+ * If the process terminates normally, status is set.
+ */
+struct run_proc_result {
+ int sig;
+ int err;
+ int status;
+};
+
+/**
+ * @brief Initialize the context for running processes
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[out] result New run_proc context
+ * @return 0 on success, errno on error
+ */
+int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ struct run_proc_context **result);
+
+/**
+ * @brief Async computation start to run an executable
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] run_ctx Run_proc context
+ * @param[in] prog The path to the executable
+ * @param[in] argv Arguments to the executable
+ * @param[in] stdin_fd Assign stdin_fd as stdin for the process, -1 if not
+ * @param[in] timeout How long to wait for execution
+ * @return new tevent request, or NULL on failure
+ *
+ * argv must include program name as argv[0] and must be null terminated.
+ */
+struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct run_proc_context *run_ctx,
+ const char *prog, const char **argv,
+ int stdin_fd, struct timeval timeout);
+
+/**
+ * @brief Async computation end to run an executable
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[out] result The exit status of the executable
+ * @param[out] pid The pid of the child process (still running)
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] output The output from the executable (stdio + stderr)
+ * @return true on success, false on failure
+ *
+ * The returned pid is -1 if the process has terminated.
+ */
+bool run_proc_recv(struct tevent_req *req, int *perr,
+ struct run_proc_result *result, pid_t *pid,
+ TALLOC_CTX *mem_ctx, char **output);
+
+#endif /* __CTDB_RUN_PROC_H__ */
diff --git a/ctdb/common/sock_client.c b/ctdb/common/sock_client.c
new file mode 100644
index 0000000..75f471f
--- /dev/null
+++ b/ctdb/common/sock_client.c
@@ -0,0 +1,334 @@
+/*
+ A client based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/comm.h"
+#include "common/sock_client.h"
+
+struct sock_client_context {
+ struct sock_client_proto_funcs *funcs;
+ void *private_data;
+
+ void (*disconnect_callback)(void *private_data);
+ void *disconnect_data;
+
+ int fd;
+ struct comm_context *comm;
+ struct reqid_context *idr;
+};
+
+/*
+ * connect to a unix domain socket
+ */
+
+static int socket_connect(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int fd, ret;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("socket path too long: %s\n", sockpath);
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket create failed - %s\n", sockpath);
+ return -1;
+ }
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != 0) {
+ D_ERR("socket connect failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/*
+ * Socket client
+ */
+
+static int sock_client_context_destructor(struct sock_client_context *sockc);
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void sock_client_dead_handler(void *private_data);
+
+static void sock_client_msg_reply(struct sock_client_context *sockc,
+ uint8_t *buf, size_t buflen);
+
+int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath,
+ struct sock_client_proto_funcs *funcs,
+ void *private_data,
+ struct sock_client_context **result)
+{
+ struct sock_client_context *sockc;
+ int ret;
+
+ if (sockpath == NULL) {
+ return EINVAL;
+ }
+
+ if (funcs == NULL || funcs->request_push == NULL ||
+ funcs->reply_pull == NULL || funcs->reply_reqid == NULL) {
+ return EINVAL;
+ }
+
+ sockc = talloc_zero(mem_ctx, struct sock_client_context);
+ if (sockc == NULL) {
+ return ENOMEM;
+ }
+
+ sockc->funcs = funcs;
+ sockc->private_data = private_data;
+
+ sockc->fd = socket_connect(sockpath);
+ if (sockc->fd == -1) {
+ talloc_free(sockc);
+ return EIO;
+ }
+
+ ret = comm_setup(sockc, ev, sockc->fd,
+ sock_client_read_handler, sockc,
+ sock_client_dead_handler, sockc,
+ &sockc->comm);
+ if (ret != 0) {
+ D_ERR("comm_setup() failed, ret=%d\n", ret);
+ close(sockc->fd);
+ talloc_free(sockc);
+ return ret;
+ }
+
+ ret = reqid_init(sockc, INT_MAX-200, &sockc->idr);
+ if (ret != 0) {
+ D_ERR("reqid_init() failed, ret=%d\n", ret);
+ close(sockc->fd);
+ talloc_free(sockc);
+ return ret;
+ }
+
+ talloc_set_destructor(sockc, sock_client_context_destructor);
+
+ *result = sockc;
+ return 0;
+}
+
+static int sock_client_context_destructor(struct sock_client_context *sockc)
+{
+ TALLOC_FREE(sockc->comm);
+ if (sockc->fd != -1) {
+ close(sockc->fd);
+ sockc->fd = -1;
+ }
+ return 0;
+}
+
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct sock_client_context *sockc = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+
+ sock_client_msg_reply(sockc, buf, buflen);
+}
+
+static void sock_client_dead_handler(void *private_data)
+{
+ struct sock_client_context *sockc = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+
+ if (sockc->disconnect_callback != NULL) {
+ sockc->disconnect_callback(sockc->disconnect_data);
+ talloc_free(sockc);
+ return;
+ }
+
+ D_NOTICE("connection to daemon closed, exiting\n");
+ exit(1);
+}
+
+void sock_client_set_disconnect_callback(struct sock_client_context *sockc,
+ sock_client_callback_func_t callback,
+ void *private_data)
+{
+ sockc->disconnect_callback = callback;
+ sockc->disconnect_data = private_data;
+}
+
+
+struct sock_client_msg_state {
+ struct sock_client_context *sockc;
+ uint32_t reqid;
+ struct tevent_req *req;
+ void *reply;
+};
+
+static int sock_client_msg_state_destructor(
+ struct sock_client_msg_state *state);
+static void sock_client_msg_done(struct tevent_req *subreq);
+
+struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *sockc,
+ struct timeval timeout,
+ void *request)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_client_msg_state *state;
+ uint8_t *buf;
+ size_t buflen;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct sock_client_msg_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->sockc = sockc;
+
+ state->reqid = reqid_new(sockc->idr, state);
+ if (state->reqid == REQID_INVALID) {
+ talloc_free(req);
+ return NULL;
+ }
+
+ state->req = req;
+
+ talloc_set_destructor(state, sock_client_msg_state_destructor);
+
+ ret = sockc->funcs->request_push(request, state->reqid, state,
+ &buf, &buflen, sockc->private_data);
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = comm_write_send(state, ev, sockc->comm, buf, buflen);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_client_msg_done, req);
+
+ if (! timeval_is_zero(&timeout)) {
+ if (!tevent_req_set_endtime(req, ev, timeout)) {
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ return req;
+}
+
+static int sock_client_msg_state_destructor(
+ struct sock_client_msg_state *state)
+{
+ reqid_remove(state->sockc->idr, state->reqid);
+ return 0;
+}
+
+static void sock_client_msg_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = comm_write_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ /* wait for the reply or timeout */
+}
+
+static void sock_client_msg_reply(struct sock_client_context *sockc,
+ uint8_t *buf, size_t buflen)
+{
+ struct sock_client_msg_state *state;
+ uint32_t reqid;
+ int ret;
+
+ ret = sockc->funcs->reply_reqid(buf, buflen, &reqid,
+ sockc->private_data);
+ if (ret != 0) {
+ D_WARNING("Invalid packet received, ret=%d\n", ret);
+ return;
+ }
+
+ state = reqid_find(sockc->idr, reqid, struct sock_client_msg_state);
+ if (state == NULL) {
+ return;
+ }
+
+ if (reqid != state->reqid) {
+ return;
+ }
+
+ ret = sockc->funcs->reply_pull(buf, buflen, state, &state->reply,
+ sockc->private_data);
+ if (ret != 0) {
+ tevent_req_error(state->req, ret);
+ return;
+ }
+
+ tevent_req_done(state->req);
+}
+
+bool sock_client_msg_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, void *reply)
+{
+ struct sock_client_msg_state *state = tevent_req_data(
+ req, struct sock_client_msg_state);
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (reply != NULL) {
+ *(void **)reply = talloc_steal(mem_ctx, state->reply);
+ }
+
+ return true;
+}
diff --git a/ctdb/common/sock_client.h b/ctdb/common/sock_client.h
new file mode 100644
index 0000000..49a0a52
--- /dev/null
+++ b/ctdb/common/sock_client.h
@@ -0,0 +1,129 @@
+/*
+ A client based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2017
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_CLIENT_H__
+#define __CTDB_SOCK_CLIENT_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file sock_client.h
+ *
+ * @brief A framework for a client based on unix-domain sockets.
+ *
+ * This abstraction allows one to build clients that communicate using
+ * unix-domain sockets. It takes care of the common boilerplate.
+ */
+
+/**
+ * @brief The abstract socket daemon context
+ */
+struct sock_client_context;
+
+/**
+ * @brief callback function
+ *
+ * This function can be registered to be called in case daemon goes away.
+ */
+typedef void (*sock_client_callback_func_t)(void *private_data);
+
+/**
+ * @brief Protocol marshalling functions
+ *
+ * The typical protocol packet will have a header and a payload.
+ * Header will contain at least 2 fields: length and reqid
+ *
+ * request_push() is called when the request packet needs to be marshalled
+ *
+ * reply_pull() is called to unmarshall data into a reply packet
+ *
+ * reply_reqid() is called to extract request id from a reply packet
+ */
+struct sock_client_proto_funcs {
+ int (*request_push)(void *request, uint32_t reqid,
+ TALLOC_CTX *mem_ctx,
+ uint8_t **buf, size_t *buflen,
+ void *private_data);
+
+ int (*reply_pull)(uint8_t *buf, size_t buflen,
+ TALLOC_CTX *mem_ctx, void **reply,
+ void *private_data);
+
+ int (*reply_reqid)(uint8_t *buf, size_t buflen,
+ uint32_t *reqid, void *private_data);
+};
+
+/**
+ * @brief Create a new socket client
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockpath Unix domain socket path
+ * @param[in] funcs Protocol marshalling functions
+ * @param[in] private_data Private data for protocol functions
+ * @param[out] result New socket client context
+ * @return 0 on success, errno on failure
+ */
+int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev,
+ const char *sockpath,
+ struct sock_client_proto_funcs *funcs,
+ void *private_data,
+ struct sock_client_context **result);
+
+/**
+ * @brief Register a callback in case of client disconnection
+ *
+ * @param[in] sockc Socket client context
+ * @param[in] callback Callback function
+ * @param[in] private_data Private data for callback function
+ */
+void sock_client_set_disconnect_callback(struct sock_client_context *sockc,
+ sock_client_callback_func_t callback,
+ void *private_data);
+
+/**
+ * @brief Async computation to send data to the daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockc The socket client context
+ * @param[in] timeout How long to wait for
+ * @param[in] request Request packet to be sent
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *sockc,
+ struct timeval timeout,
+ void *request);
+
+/**
+ * @brief Async computation end to send data to the daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] reply Reply received from server
+ * @return true on success, false on failure
+ */
+bool sock_client_msg_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, void *reply);
+
+#endif /* __CTDB_SOCK_CLIENT_H__ */
diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c
new file mode 100644
index 0000000..e31a364
--- /dev/null
+++ b/ctdb/common/sock_daemon.c
@@ -0,0 +1,1100 @@
+/*
+ A server based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+#include "system/wait.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/async_req/async_sock.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+#include "lib/util/dlinklist.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/become_daemon.h"
+#include "lib/util/sys_rw.h"
+
+#include "common/logging.h"
+#include "common/reqid.h"
+#include "common/comm.h"
+#include "common/pidfile.h"
+#include "common/system.h"
+#include "common/sock_daemon.h"
+
+struct sock_socket {
+ struct sock_socket *prev, *next;
+
+ const char *sockpath;
+ struct sock_socket_funcs *funcs;
+ void *private_data;
+
+ int fd;
+ struct tevent_req *req;
+};
+
+struct sock_client {
+ struct sock_client *prev, *next;
+
+ struct tevent_req *req;
+ struct sock_client_context *client_ctx;
+};
+
+struct sock_client_context {
+ struct tevent_context *ev;
+ struct sock_socket *sock;
+ int fd;
+ struct comm_context *comm;
+
+ struct sock_client *client;
+};
+
+struct sock_daemon_context {
+ struct sock_daemon_funcs *funcs;
+ void *private_data;
+
+ struct pidfile_context *pid_ctx;
+ struct sock_socket *socket_list;
+ int startup_fd;
+};
+
+/*
+ * Process a single client
+ */
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data);
+static void sock_client_read_done(struct tevent_req *subreq);
+static void sock_client_dead_handler(void *private_data);
+static int sock_client_context_destructor(
+ struct sock_client_context *client_ctx);
+
+static int sock_client_context_init(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_socket *sock,
+ int client_fd,
+ struct sock_client *client,
+ struct sock_client_context **result)
+{
+ struct sock_client_context *client_ctx;
+ int ret;
+
+ client_ctx = talloc_zero(mem_ctx, struct sock_client_context);
+ if (client_ctx == NULL) {
+ return ENOMEM;
+ }
+
+ client_ctx->ev = ev;
+ client_ctx->sock = sock;
+ client_ctx->fd = client_fd;
+ client_ctx->client = client;
+
+ ret = comm_setup(client_ctx, ev, client_fd,
+ sock_client_read_handler, client_ctx,
+ sock_client_dead_handler, client_ctx,
+ &client_ctx->comm);
+ if (ret != 0) {
+ talloc_free(client_ctx);
+ return ret;
+ }
+
+ if (sock->funcs->connect != NULL) {
+ pid_t pid;
+ bool status;
+
+ (void) ctdb_get_peer_pid(client_fd, &pid);
+
+ status = sock->funcs->connect(client_ctx,
+ pid,
+ sock->private_data);
+ if (! status) {
+ talloc_free(client_ctx);
+ close(client_fd);
+ return 0;
+ }
+ }
+
+ talloc_set_destructor(client_ctx, sock_client_context_destructor);
+
+ *result = client_ctx;
+ return 0;
+}
+
+static void sock_client_read_handler(uint8_t *buf, size_t buflen,
+ void *private_data)
+{
+ struct sock_client_context *client_ctx = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+ struct tevent_req *subreq;
+
+ subreq = sock->funcs->read_send(client_ctx, client_ctx->ev,
+ client_ctx, buf, buflen,
+ sock->private_data);
+ if (subreq == NULL) {
+ talloc_free(client_ctx);
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_client_read_done, client_ctx);
+}
+
+static void sock_client_read_done(struct tevent_req *subreq)
+{
+ struct sock_client_context *client_ctx = tevent_req_callback_data(
+ subreq, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+ int ret;
+ bool status;
+
+ status = sock->funcs->read_recv(subreq, &ret);
+ if (! status) {
+ D_ERR("client read failed with ret=%d\n", ret);
+ talloc_free(client_ctx);
+ }
+}
+
+static void sock_client_dead_handler(void *private_data)
+{
+ struct sock_client_context *client_ctx = talloc_get_type_abort(
+ private_data, struct sock_client_context);
+ struct sock_socket *sock = client_ctx->sock;
+
+ if (sock->funcs->disconnect != NULL) {
+ sock->funcs->disconnect(client_ctx, sock->private_data);
+ }
+
+ talloc_free(client_ctx);
+}
+
+static int sock_client_context_destructor(
+ struct sock_client_context *client_ctx)
+{
+ TALLOC_FREE(client_ctx->client);
+ TALLOC_FREE(client_ctx->comm);
+ if (client_ctx->fd != -1) {
+ close(client_ctx->fd);
+ client_ctx->fd = -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Process a single listening socket
+ */
+
+static int socket_setup(const char *sockpath, bool remove_before_use)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int ret, fd;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("socket path too long: %s\n", sockpath);
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket create failed - %s\n", sockpath);
+ return -1;
+ }
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ D_ERR("socket set nonblocking failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ if (remove_before_use) {
+ unlink(sockpath);
+ }
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret != 0) {
+ D_ERR("socket bind failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ ret = listen(fd, 10);
+ if (ret != 0) {
+ D_ERR("socket listen failed - %s\n", sockpath);
+ close(fd);
+ return -1;
+ }
+
+ D_NOTICE("listening on %s\n", sockpath);
+
+ return fd;
+}
+
+static int sock_socket_destructor(struct sock_socket *sock);
+
+static int sock_socket_init(TALLOC_CTX *mem_ctx, const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data,
+ struct sock_socket **result)
+{
+ struct sock_socket *sock;
+
+ if (funcs == NULL) {
+ return EINVAL;
+ }
+ if (funcs->read_send == NULL || funcs->read_recv == NULL) {
+ return EINVAL;
+ }
+
+ sock = talloc_zero(mem_ctx, struct sock_socket);
+ if (sock == NULL) {
+ return ENOMEM;
+ }
+
+ sock->sockpath = talloc_strdup(sock, sockpath);
+ if (sock->sockpath == NULL) {
+ talloc_free(sock);
+ return ENOMEM;
+ }
+ sock->funcs = funcs;
+ sock->private_data = private_data;
+ sock->fd = -1;
+
+ talloc_set_destructor(sock, sock_socket_destructor);
+
+ *result = sock;
+ return 0;
+}
+
+static int sock_socket_destructor(struct sock_socket *sock)
+{
+ TALLOC_FREE(sock->req);
+
+ if (sock->fd != -1) {
+ close(sock->fd);
+ sock->fd = -1;
+ }
+
+ unlink(sock->sockpath);
+ return 0;
+}
+
+
+struct sock_socket_start_state {
+ struct tevent_context *ev;
+ struct sock_socket *sock;
+
+ struct sock_client *client_list;
+};
+
+static int sock_socket_start_state_destructor(
+ struct sock_socket_start_state *state);
+static void sock_socket_start_new_client(struct tevent_req *subreq);
+static int sock_socket_start_client_destructor(struct sock_client *client);
+
+static struct tevent_req *sock_socket_start_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_socket *sock,
+ bool remove_before_use)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_socket_start_state *state;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct sock_socket_start_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->sock = sock;
+
+ sock->fd = socket_setup(sock->sockpath, remove_before_use);
+ if (sock->fd == -1) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ talloc_set_destructor(state, sock_socket_start_state_destructor);
+
+ subreq = accept_send(state, ev, sock->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_socket_start_new_client, req);
+
+ sock->req = req;
+
+ return req;
+}
+
+static int sock_socket_start_state_destructor(
+ struct sock_socket_start_state *state)
+{
+ struct sock_client *client;
+
+ while ((client = state->client_list) != NULL) {
+ talloc_free(client);
+ }
+
+ return 0;
+}
+
+static void sock_socket_start_new_client(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_socket_start_state *state = tevent_req_data(
+ req, struct sock_socket_start_state);
+ struct sock_client *client;
+ int client_fd, ret;
+
+ client_fd = accept_recv(subreq, NULL, NULL, &ret);
+ TALLOC_FREE(subreq);
+ if (client_fd == -1) {
+ D_ERR("failed to accept new connection\n");
+ }
+
+ subreq = accept_send(state, state->ev, state->sock->fd);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_socket_start_new_client, req);
+
+ if (client_fd == -1) {
+ return;
+ }
+
+ client = talloc_zero(state, struct sock_client);
+ if (tevent_req_nomem(client, req)) {
+ close(client_fd);
+ return;
+ }
+
+ client->req = req;
+
+ ret = sock_client_context_init(client, state->ev, state->sock,
+ client_fd, client, &client->client_ctx);
+ if (ret != 0) {
+ talloc_free(client);
+ return;
+ }
+
+ talloc_set_destructor(client, sock_socket_start_client_destructor);
+ DLIST_ADD(state->client_list, client);
+}
+
+static int sock_socket_start_client_destructor(struct sock_client *client)
+{
+ struct sock_socket_start_state *state = tevent_req_data(
+ client->req, struct sock_socket_start_state);
+
+ DLIST_REMOVE(state->client_list, client);
+ TALLOC_FREE(client->client_ctx);
+
+ return 0;
+}
+
+static bool sock_socket_start_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx, const char **sockpath)
+{
+ struct sock_socket_start_state *state = tevent_req_data(
+ req, struct sock_socket_start_state);
+ int ret;
+
+ state->sock->req = NULL;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ if (sockpath != NULL) {
+ *sockpath = talloc_steal(mem_ctx, state->sock->sockpath);
+ }
+
+ return true;
+}
+
+/*
+ * Send message to a client
+ */
+
+struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client_ctx,
+ uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+
+ req = comm_write_send(mem_ctx, ev, client_ctx->comm, buf, buflen);
+
+ return req;
+}
+
+bool sock_socket_write_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+ bool status;
+
+ status = comm_write_recv(req, &ret);
+ if (! status) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ }
+
+ return status;
+}
+
+/*
+ * Socket daemon
+ */
+
+int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name,
+ const char *logging, const char *debug_level,
+ struct sock_daemon_funcs *funcs,
+ void *private_data,
+ struct sock_daemon_context **out)
+{
+ struct sock_daemon_context *sockd;
+ int ret;
+
+ sockd = talloc_zero(mem_ctx, struct sock_daemon_context);
+ if (sockd == NULL) {
+ return ENOMEM;
+ }
+
+ sockd->funcs = funcs;
+ sockd->private_data = private_data;
+ sockd->startup_fd = -1;
+
+ ret = logging_init(sockd, logging, debug_level, daemon_name);
+ if (ret != 0) {
+ fprintf(stderr,
+ "Failed to initialize logging, logging=%s, debug=%s\n",
+ logging, debug_level);
+ return ret;
+ }
+
+ *out = sockd;
+ return 0;
+}
+
+int sock_daemon_add_unix(struct sock_daemon_context *sockd,
+ const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data)
+{
+ struct sock_socket *sock;
+ int ret;
+
+ ret = sock_socket_init(sockd, sockpath, funcs, private_data, &sock);
+ if (ret != 0) {
+ return ret;
+ }
+
+
+ DLIST_ADD(sockd->socket_list, sock);
+ return 0;
+}
+
+bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd)
+{
+ if (! set_close_on_exec(fd)) {
+ D_ERR("Failed to set close-on-exec on startup fd\n");
+ return false;
+ }
+
+ sockd->startup_fd = fd;
+ return true;
+}
+
+/*
+ * Run socket daemon
+ */
+
+struct sock_daemon_run_state {
+ struct tevent_context *ev;
+ struct sock_daemon_context *sockd;
+ pid_t pid_watch;
+
+ int fd;
+ int exit_code;
+};
+
+static void sock_daemon_run_started(struct tevent_req *subreq);
+static void sock_daemon_run_startup_done(struct tevent_req *subreq);
+static void sock_daemon_run_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data);
+static void sock_daemon_run_reconfigure(struct tevent_req *req);
+static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq);
+static void sock_daemon_run_reopen_logs(struct tevent_req *req);
+static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq);
+static void sock_daemon_run_shutdown(struct tevent_req *req);
+static void sock_daemon_run_shutdown_done(struct tevent_req *subreq);
+static void sock_daemon_run_exit(struct tevent_req *req);
+static bool sock_daemon_run_socket_listen(struct tevent_req *req);
+static void sock_daemon_run_socket_fail(struct tevent_req *subreq);
+static void sock_daemon_run_watch_pid(struct tevent_req *subreq);
+static void sock_daemon_run_wait(struct tevent_req *req);
+static void sock_daemon_run_wait_done(struct tevent_req *subreq);
+static void sock_daemon_startup_notify(struct sock_daemon_context *sockd);
+
+struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch)
+{
+ struct tevent_req *req, *subreq;
+ struct sock_daemon_run_state *state;
+ struct tevent_signal *se;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct sock_daemon_run_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ become_daemon(do_fork, !create_session, false);
+
+ if (pidfile != NULL) {
+ int ret = pidfile_context_create(sockd, pidfile,
+ &sockd->pid_ctx);
+ if (ret != 0) {
+ tevent_req_error(req, EEXIST);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ state->ev = ev;
+ state->sockd = sockd;
+ state->pid_watch = pid_watch;
+ state->fd = -1;
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(0, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_started, req);
+
+ se = tevent_add_signal(ev, state, SIGHUP, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGUSR1, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGINT, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ se = tevent_add_signal(ev, state, SIGTERM, 0,
+ sock_daemon_run_signal_handler, req);
+ if (tevent_req_nomem(se, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ if (pid_watch > 1) {
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(1,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_watch_pid,
+ req);
+ }
+
+ return req;
+}
+
+static void sock_daemon_run_started(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("daemon started, pid=%u\n", getpid());
+
+ if (sockd->funcs != NULL && sockd->funcs->startup_send != NULL &&
+ sockd->funcs->startup_recv != NULL) {
+ subreq = sockd->funcs->startup_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_startup_done,
+ req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->startup != NULL) {
+ int ret;
+
+ ret = sockd->funcs->startup(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("startup failed, ret=%d\n", ret);
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("startup completed successfully\n");
+ }
+
+ status = sock_daemon_run_socket_listen(req);
+ if (! status) {
+ return;
+ }
+ sock_daemon_run_wait(req);
+
+ sock_daemon_startup_notify(sockd);
+}
+
+static void sock_daemon_run_startup_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->startup_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("startup failed, ret=%d\n", ret);
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ D_NOTICE("startup completed successfully\n");
+
+ status = sock_daemon_run_socket_listen(req);
+ if (! status) {
+ return;
+ }
+ sock_daemon_run_wait(req);
+
+ sock_daemon_startup_notify(sockd);
+}
+
+static void sock_daemon_run_signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+
+ D_NOTICE("Received signal %d\n", signum);
+
+ if (signum == SIGUSR1) {
+ sock_daemon_run_reconfigure(req);
+ return;
+ }
+
+ if (signum == SIGHUP) {
+ sock_daemon_run_reopen_logs(req);
+ return;
+ }
+
+ if (signum == SIGINT || signum == SIGTERM) {
+ state->exit_code = EINTR;
+ sock_daemon_run_shutdown(req);
+ }
+}
+
+static void sock_daemon_run_reconfigure(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->reconfigure_send != NULL &&
+ sockd->funcs->reconfigure_recv != NULL) {
+ subreq = sockd->funcs->reconfigure_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ sock_daemon_run_reconfigure_done, req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->reconfigure != NULL) {
+ int ret;
+
+ ret = sockd->funcs->reconfigure(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("reconfigure failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reconfigure completed successfully\n");
+ }
+}
+
+static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->reconfigure_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("reconfigure failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reconfigure completed successfully\n");
+}
+
+static void sock_daemon_run_reopen_logs(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->reopen_logs_send != NULL &&
+ sockd->funcs->reopen_logs_recv != NULL) {
+ subreq = sockd->funcs->reopen_logs_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq,
+ sock_daemon_run_reopen_logs_done, req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->reopen_logs != NULL) {
+ int ret;
+
+ ret = sockd->funcs->reopen_logs(sockd->private_data);
+ if (ret != 0) {
+ D_ERR("reopen logs, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reopen logs completed successfully\n");
+ }
+}
+
+static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret;
+ bool status;
+
+ status = sockd->funcs->reopen_logs_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("reopen logs failed, ret=%d\n", ret);
+ return;
+ }
+
+ D_NOTICE("reopen logs completed successfully\n");
+}
+
+static void sock_daemon_run_shutdown(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ struct sock_socket *sock;
+
+ D_NOTICE("Shutting down\n");
+
+ while ((sock = sockd->socket_list) != NULL) {
+ DLIST_REMOVE(sockd->socket_list, sock);
+ TALLOC_FREE(sock);
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->shutdown_send != NULL &&
+ sockd->funcs->shutdown_recv != NULL) {
+ subreq = sockd->funcs->shutdown_send(state, state->ev,
+ sockd->private_data);
+ if (subreq == NULL) {
+ sock_daemon_run_exit(req);
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_shutdown_done,
+ req);
+ return;
+ }
+
+ if (sockd->funcs != NULL && sockd->funcs->shutdown != NULL) {
+ sockd->funcs->shutdown(sockd->private_data);
+ }
+
+ sock_daemon_run_exit(req);
+}
+
+static void sock_daemon_run_shutdown_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ sockd->funcs->shutdown_recv(subreq);
+ TALLOC_FREE(subreq);
+
+ sock_daemon_run_exit(req);
+}
+
+static void sock_daemon_run_exit(struct tevent_req *req)
+{
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ TALLOC_FREE(sockd->pid_ctx);
+
+ if (state->exit_code == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, state->exit_code);
+ }
+}
+
+static bool sock_daemon_run_socket_listen(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ struct sock_socket *sock;
+ bool remove_before_use = false;
+
+ if (sockd->pid_ctx != NULL) {
+ remove_before_use = true;
+ }
+ for (sock = sockd->socket_list; sock != NULL; sock = sock->next) {
+ subreq = sock_socket_start_send(state, state->ev, sock,
+ remove_before_use);
+ if (tevent_req_nomem(subreq, req)) {
+ return false;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_socket_fail,
+ req);
+ }
+
+ return true;
+}
+
+static void sock_daemon_run_socket_fail(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ const char *sockpath = "INVALID";
+ int ret = 0;
+ bool status;
+
+ status = sock_socket_start_recv(subreq, &ret, state, &sockpath);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("socket %s closed unexpectedly\n", sockpath);
+ state->exit_code = ret;
+ } else {
+ state->exit_code = 0;
+ }
+
+ sock_daemon_run_shutdown(req);
+}
+
+static void sock_daemon_run_watch_pid(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ int ret;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ tevent_req_error(req, EIO);
+ return;
+ }
+
+ ret = kill(state->pid_watch, 0);
+ if (ret == -1) {
+ if (errno == ESRCH) {
+ D_ERR("PID %d gone away, exiting\n", state->pid_watch);
+ state->exit_code = ESRCH;
+ sock_daemon_run_shutdown(req);
+ return;
+ } else {
+ D_ERR("Failed to check PID status %d, ret=%d\n",
+ state->pid_watch, errno);
+ }
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_watch_pid, req);
+}
+
+static void sock_daemon_run_wait(struct tevent_req *req)
+{
+ struct tevent_req *subreq;
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+
+ if (sockd->funcs != NULL && sockd->funcs->wait_send != NULL &&
+ sockd->funcs->wait_recv != NULL) {
+ subreq = sockd->funcs->wait_send(state, state->ev,
+ sockd->private_data);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, sock_daemon_run_wait_done,
+ req);
+ }
+}
+
+static void sock_daemon_run_wait_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct sock_daemon_run_state *state = tevent_req_data(
+ req, struct sock_daemon_run_state);
+ struct sock_daemon_context *sockd = state->sockd;
+ int ret = 0;
+ bool status;
+
+ status = sockd->funcs->wait_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ state->exit_code = ret;
+ } else {
+ state->exit_code = 0;
+ }
+
+ sock_daemon_run_shutdown(req);
+}
+
+static void sock_daemon_startup_notify(struct sock_daemon_context *sockd)
+{
+ if (sockd->startup_fd != -1) {
+ unsigned int zero = 0;
+ ssize_t num;
+
+ num = sys_write(sockd->startup_fd, &zero, sizeof(zero));
+ if (num != sizeof(zero)) {
+ D_WARNING("Failed to write zero to pipe FD\n");
+ }
+ }
+}
+
+bool sock_daemon_run_recv(struct tevent_req *req, int *perr)
+{
+ int ret;
+
+ if (tevent_req_is_unix_error(req, &ret)) {
+ if (perr != NULL) {
+ *perr = ret;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+int sock_daemon_run(struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch)
+{
+ struct tevent_req *req;
+ int ret;
+ bool status;
+
+ req = sock_daemon_run_send(ev, ev, sockd,
+ pidfile, do_fork, create_session, pid_watch);
+ if (req == NULL) {
+ return ENOMEM;
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = sock_daemon_run_recv(req, &ret);
+ TALLOC_FREE(req);
+ if (! status) {
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h
new file mode 100644
index 0000000..85ed961
--- /dev/null
+++ b/ctdb/common/sock_daemon.h
@@ -0,0 +1,283 @@
+/*
+ A server based on unix domain socket
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_DAEMON_H__
+#define __CTDB_SOCK_DAEMON_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "common/logging.h"
+
+/**
+ * @file sock_daemon.h
+ *
+ * @brief A framework for a server based on unix-domain sockets.
+ *
+ * This abstraction allows one to build simple servers that communicate using
+ * unix-domain sockets. It takes care of the common boilerplate.
+ */
+
+/**
+ * @brief The abstract socket daemon context
+ */
+struct sock_daemon_context;
+
+/**
+ * @brief The abstract socket client context
+ */
+struct sock_client_context;
+
+/**
+ * @brief The callback routines called during daemon life cycle
+ *
+ * startup() is called when the daemon starts running
+ * either via sock_daemon_run() or via sock_daemon_run_send()
+ * startup() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will return error.
+ *
+ * startup_send()/startup_recv() is the async version of startup()
+ *
+ * reconfigure() is called when the daemon receives SIGUSR1
+ * reconfigure() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will continue to run.
+ *
+ * reconfigure_send()/reconfigure_recv() is the async version of reconfigure()
+ *
+ * reopen_logs() is called when the daemon receives SIGHUP
+ * reopen_logs() should return 0 for success, non-zero value on failure
+ * On failure, sock_daemon_run() will continue to run.
+ *
+ * reopen_logs_send()/reopen_logs_recv() is the async version of reopen_logs()
+ *
+ * shutdown() is called when process receives SIGINT or SIGTERM or
+ * when wait computation has finished
+ *
+ * shutdown_send()/shutdown_recv() is the async version of shutdown()
+ *
+ * Please note that only one (sync or async) version of these functions
+ * will be called. If both versions are defined, then only async function
+ * will be called.
+ *
+ * wait_send() starts the async computation to keep running the daemon
+ * wait_recv() ends the async computation to keep running the daemon
+ *
+ * If wait_send()/wait_recv() is NULL, then daemon will keep running forever.
+ * If wait_send() returns req, then when req is over, daemon will shutdown.
+ */
+struct sock_daemon_funcs {
+ int (*startup)(void *private_data);
+
+ struct tevent_req * (*startup_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*startup_recv)(struct tevent_req *req, int *perr);
+
+ int (*reconfigure)(void *private_data);
+
+ struct tevent_req * (*reconfigure_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*reconfigure_recv)(struct tevent_req *req, int *perr);
+
+ int (*reopen_logs)(void *private_data);
+
+ struct tevent_req * (*reopen_logs_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*reopen_logs_recv)(struct tevent_req *req, int *perr);
+
+ void (*shutdown)(void *private_data);
+
+ struct tevent_req * (*shutdown_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ void (*shutdown_recv)(struct tevent_req *req);
+
+ struct tevent_req * (*wait_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ void *private_data);
+ bool (*wait_recv)(struct tevent_req *req, int *perr);
+};
+
+/**
+ * @brief The callback routines called for an unix-domain socket
+ *
+ * connect() is called when there is a new connection
+ *
+ * @param[in] client The new socket client context
+ * @param[in] pid The pid of the new client process, or -1 if unknown
+ * @param[in] private_data Private data set with the socket
+ * @return true if connection should be accepted, false otherwise
+ *
+ *
+ * disconnect() is called when client closes connection
+ *
+ * @param[in] client The socket client context
+ * @param[in] private_data Private data associated with the socket
+ *
+ *
+ * read_send() starts the async computation to process data on the socket
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client The socket client context
+ * @param[in] buf Data received from the client
+ * @param[in] buflen Length of the data
+ * @param[i] private_data Private data associatedwith the socket
+ * @return new tevent request, or NULL on failure
+ *
+ *
+ * read_recv() ends the async computation to process data on the socket
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ *
+ */
+struct sock_socket_funcs {
+ bool (*connect)(struct sock_client_context *client,
+ pid_t pid,
+ void *private_data);
+ void (*disconnect)(struct sock_client_context *client,
+ void *private_data);
+
+ struct tevent_req * (*read_send)(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen,
+ void *private_data);
+ bool (*read_recv)(struct tevent_req *req, int *perr);
+};
+
+/**
+ * @brief Async computation to send data to the client
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] client The socket client context
+ * @param[in] buf Data to be sent to the client
+ * @param[in] buflen Length of the data
+ * @return new tevent request, or NULL on failure
+ */
+struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_client_context *client,
+ uint8_t *buf, size_t buflen);
+
+/**
+ * @brief Async computation end to send data to client
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool sock_socket_write_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Create a new socket daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] daemon_name Name of the daemon, used for logging
+ * @param[in] logging Logging setup string
+ * @param[in] debug_level Debug level to log at
+ * @param[in] funcs Socket daemon callback routines
+ * @param[in] private_data Private data associated with callback routines
+ * @param[out] result New socket daemon context
+ * @return 0 on success, errno on failure
+ */
+int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name,
+ const char *logging, const char *debug_level,
+ struct sock_daemon_funcs *funcs,
+ void *private_data,
+ struct sock_daemon_context **result);
+
+/**
+ * @brief Create and listen to the unix domain socket
+ *
+ * @param[in] sockd Socket daemon context
+ * @param[in] sockpath Unix domain socket path
+ * @param[in] funcs socket callback routines
+ * @param[in] private_data Private data associated with callback routines
+ * @return 0 on success, errno on failure
+ */
+int sock_daemon_add_unix(struct sock_daemon_context *sockd,
+ const char *sockpath,
+ struct sock_socket_funcs *funcs,
+ void *private_data);
+
+/**
+ * @brief Set file descriptor for indicating startup success
+ *
+ * On successful completion, 0 (unsigned int) will be written to the fd.
+ *
+ * @param[in] sockd Socket daemon context
+ * @param[in] fd File descriptor
+ * @return true on success, false on error
+ */
+bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd);
+
+/**
+ * @brief Async computation start to run a socket daemon
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] sockd The socket daemon context
+ * @param[in] pidfile PID file to create, NULL if no PID file required
+ * @param[in] do_fork Whether the daemon should fork on startup
+ * @param[in] create_session Whether the daemon should create a new session
+ * @param[in] pid_watch PID to watch. If PID goes away, shutdown.
+ * @return new tevent request, NULL on failure
+ */
+struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch);
+
+/**
+ * @brief Async computation end to run a socket daemon
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool sock_daemon_run_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Sync way to start a daemon
+ *
+ * @param[in] ev Tevent context
+ * @param[in] sockd The socket daemon context
+ * @param[in] pidfile PID file to create, NULL if no PID file required
+ * @param[in] do_fork Whether the daemon should fork on startup
+ * @param[in] create_session Whether the daemon should create a new session
+ * @param[in] pid_watch PID to watch. If PID goes away, shutdown.
+ * @return 0 on success, errno on failure
+ *
+ * This call will return only on shutdown of the daemon
+ */
+int sock_daemon_run(struct tevent_context *ev,
+ struct sock_daemon_context *sockd,
+ const char *pidfile,
+ bool do_fork, bool create_session,
+ pid_t pid_watch);
+
+#endif /* __CTDB_SOCK_DAEMON_H__ */
diff --git a/ctdb/common/sock_io.c b/ctdb/common/sock_io.c
new file mode 100644
index 0000000..81e82c5
--- /dev/null
+++ b/ctdb/common/sock_io.c
@@ -0,0 +1,328 @@
+/*
+ Generic Unix-domain Socket I/O
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/sys_rw.h"
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+
+#include "common/logging.h"
+#include "common/sock_io.h"
+
+bool sock_clean(const char *sockpath)
+{
+ int ret;
+
+ ret = unlink(sockpath);
+ if (ret == 0) {
+ D_WARNING("Removed stale socket %s\n", sockpath);
+ } else if (errno != ENOENT) {
+ D_ERR("Failed to remove stale socket %s\n", sockpath);
+ return false;
+ }
+
+ return true;
+}
+
+int sock_connect(const char *sockpath)
+{
+ struct sockaddr_un addr;
+ size_t len;
+ int fd, ret;
+
+ if (sockpath == NULL) {
+ D_ERR("Invalid socket path\n");
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path));
+ if (len >= sizeof(addr.sun_path)) {
+ D_ERR("Socket path too long, len=%zu\n", strlen(sockpath));
+ return -1;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ D_ERR("socket() failed, errno=%d\n", errno);
+ return -1;
+ }
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret == -1) {
+ D_ERR("connect() failed, errno=%d\n", errno);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+struct sock_queue {
+ struct tevent_context *ev;
+ sock_queue_callback_fn_t callback;
+ void *private_data;
+ int fd;
+
+ struct tevent_immediate *im;
+ struct tevent_queue *queue;
+ struct tevent_fd *fde;
+ uint8_t *buf;
+ size_t buflen, begin, end;
+};
+
+/*
+ * The reserved talloc headers, SOCK_QUEUE_OBJ_COUNT,
+ * and the pre-allocated pool-memory SOCK_QUEUE_POOL_SIZE,
+ * are used for the sub-objects queue->im, queue->queue, queue->fde
+ * and queue->buf.
+ * If the memory allocating sub-objects of struct sock_queue change,
+ * those values need to be adjusted.
+ */
+#define SOCK_QUEUE_OBJ_COUNT 4
+#define SOCK_QUEUE_POOL_SIZE 2048
+
+static bool sock_queue_set_fd(struct sock_queue *queue, int fd);
+static void sock_queue_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data);
+static void sock_queue_process(struct sock_queue *queue);
+static void sock_queue_process_event(struct tevent_context *ev,
+ struct tevent_immediate *im,
+ void *private_data);
+
+struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ sock_queue_callback_fn_t callback,
+ void *private_data)
+{
+ struct sock_queue *queue;
+
+ queue = talloc_pooled_object(mem_ctx, struct sock_queue,
+ SOCK_QUEUE_OBJ_COUNT, SOCK_QUEUE_POOL_SIZE);
+ if (queue == NULL) {
+ return NULL;
+ }
+ memset(queue, 0, sizeof(struct sock_queue));
+
+ queue->ev = ev;
+ queue->callback = callback;
+ queue->private_data = private_data;
+
+ queue->im = tevent_create_immediate(queue);
+ if (queue->im == NULL) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ queue->queue = tevent_queue_create(queue, "out-queue");
+ if (queue->queue == NULL) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ if (! sock_queue_set_fd(queue, fd)) {
+ talloc_free(queue);
+ return NULL;
+ }
+
+ return queue;
+}
+
+static bool sock_queue_set_fd(struct sock_queue *queue, int fd)
+{
+ TALLOC_FREE(queue->fde);
+ queue->fd = fd;
+
+ if (fd != -1) {
+ int ret;
+
+ ret = set_blocking(fd, false);
+ if (ret != 0) {
+ return false;
+ }
+
+ queue->fde = tevent_add_fd(queue->ev, queue, fd,
+ TEVENT_FD_READ,
+ sock_queue_handler, queue);
+ if (queue->fde == NULL) {
+ return false;
+ }
+ tevent_fd_set_auto_close(queue->fde);
+ }
+
+ return true;
+}
+
+static void sock_queue_handler(struct tevent_context *ev,
+ struct tevent_fd *fde, uint16_t flags,
+ void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+ int ret, num_ready;
+ ssize_t nread;
+
+ ret = ioctl(queue->fd, FIONREAD, &num_ready);
+ if (ret != 0) {
+ /* Ignore */
+ return;
+ }
+
+ if (num_ready == 0) {
+ /* descriptor has been closed */
+ goto fail;
+ }
+
+ if ((size_t)num_ready > queue->buflen - queue->end) {
+ queue->buf = talloc_realloc_size(queue, queue->buf,
+ queue->end + num_ready);
+ if (queue->buf == NULL) {
+ goto fail;
+ }
+ queue->buflen = queue->end + num_ready;
+ }
+
+ nread = sys_read(queue->fd, queue->buf + queue->end, num_ready);
+ if (nread < 0) {
+ goto fail;
+ }
+ queue->end += nread;
+
+ sock_queue_process(queue);
+ return;
+
+fail:
+ queue->callback(NULL, 0, queue->private_data);
+}
+
+static void sock_queue_process(struct sock_queue *queue)
+{
+ uint32_t pkt_size;
+
+ if ((queue->end - queue->begin) < sizeof(uint32_t)) {
+ /* not enough data */
+ return;
+ }
+
+ pkt_size = *(uint32_t *)(queue->buf + queue->begin);
+ if (pkt_size == 0) {
+ D_ERR("Invalid packet of length 0\n");
+ queue->callback(NULL, 0, queue->private_data);
+ return;
+ }
+
+ if ((queue->end - queue->begin) < pkt_size) {
+ /* not enough data */
+ return;
+ }
+
+ queue->callback(queue->buf + queue->begin, pkt_size,
+ queue->private_data);
+ queue->begin += pkt_size;
+
+ if (queue->begin < queue->end) {
+ /* more data to be processed */
+ tevent_schedule_immediate(queue->im, queue->ev,
+ sock_queue_process_event, queue);
+ } else {
+ TALLOC_FREE(queue->buf);
+ queue->buflen = 0;
+ queue->begin = 0;
+ queue->end = 0;
+ }
+}
+
+static void sock_queue_process_event(struct tevent_context *ev,
+ struct tevent_immediate *im,
+ void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+
+ sock_queue_process(queue);
+}
+
+struct sock_queue_write_state {
+ uint8_t *pkt;
+ uint32_t pkt_size;
+};
+
+static void sock_queue_trigger(struct tevent_req *req, void *private_data);
+
+int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen)
+{
+ struct tevent_req *req;
+ struct sock_queue_write_state *state;
+ struct tevent_queue_entry *qentry;
+
+ if (buflen >= INT32_MAX) {
+ return -1;
+ }
+
+ req = tevent_req_create(queue, &state, struct sock_queue_write_state);
+ if (req == NULL) {
+ return -1;
+ }
+
+ state->pkt = buf;
+ state->pkt_size = (uint32_t)buflen;
+
+ qentry = tevent_queue_add_entry(queue->queue, queue->ev, req,
+ sock_queue_trigger, queue);
+ if (qentry == NULL) {
+ talloc_free(req);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void sock_queue_trigger(struct tevent_req *req, void *private_data)
+{
+ struct sock_queue *queue = talloc_get_type_abort(
+ private_data, struct sock_queue);
+ struct sock_queue_write_state *state = tevent_req_data(
+ req, struct sock_queue_write_state);
+ size_t offset = 0;
+
+ do {
+ ssize_t nwritten;
+
+ nwritten = sys_write(queue->fd, state->pkt + offset,
+ state->pkt_size - offset);
+ if (nwritten < 0) {
+ queue->callback(NULL, 0, queue->private_data);
+ return;
+ }
+ offset += nwritten;
+
+ } while (offset < state->pkt_size);
+
+ tevent_req_done(req);
+ talloc_free(req);
+}
diff --git a/ctdb/common/sock_io.h b/ctdb/common/sock_io.h
new file mode 100644
index 0000000..8b6e4eb
--- /dev/null
+++ b/ctdb/common/sock_io.h
@@ -0,0 +1,39 @@
+/*
+ Generic Socket I/O
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SOCK_IO_H__
+#define __CTDB_SOCK_IO_H__
+
+typedef void (*sock_queue_callback_fn_t)(uint8_t *buf, size_t buflen,
+ void *private_data);
+
+struct sock_queue;
+
+bool sock_clean(const char *sockpath);
+int sock_connect(const char *sockpath);
+
+struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ sock_queue_callback_fn_t callback,
+ void *private_data);
+
+int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen);
+
+#endif /* __CTDB_SOCK_IO_H__ */
diff --git a/ctdb/common/srvid.c b/ctdb/common/srvid.c
new file mode 100644
index 0000000..3304994
--- /dev/null
+++ b/ctdb/common/srvid.c
@@ -0,0 +1,280 @@
+/*
+ Message handler database based on srvid
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+
+#include <tdb.h>
+
+#include "lib/util/dlinklist.h"
+#include "common/db_hash.h"
+#include "common/srvid.h"
+
+struct srvid_handler_list;
+
+struct srvid_context {
+ struct db_hash_context *dh;
+ struct srvid_handler_list *list;
+};
+
+struct srvid_handler {
+ struct srvid_handler *prev, *next;
+ struct srvid_handler_list *list;
+ srvid_handler_fn handler;
+ void *private_data;
+};
+
+struct srvid_handler_list {
+ struct srvid_handler_list *prev, *next;
+ struct srvid_context *srv;
+ uint64_t srvid;
+ struct srvid_handler *h;
+};
+
+
+/*
+ * Initialise message srvid context and database
+ */
+int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result)
+{
+ struct srvid_context *srv;
+ int ret;
+
+ srv = talloc_zero(mem_ctx, struct srvid_context);
+ if (srv == NULL) {
+ return ENOMEM;
+ }
+
+ ret = db_hash_init(srv, "messagedb", 8192, DB_HASH_SIMPLE, &srv->dh);
+ if (ret != 0) {
+ talloc_free(srv);
+ return ret;
+ }
+
+ *result = srv;
+ return 0;
+}
+
+/*
+ * Wrapper functions to insert/delete/fetch srvid_hander_list
+ */
+
+static int srvid_insert(struct srvid_context *srv, uint64_t srvid,
+ struct srvid_handler_list *list)
+{
+ return db_hash_insert(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t),
+ (uint8_t *)&list, sizeof(list));
+}
+
+static int srvid_delete(struct srvid_context *srv, uint64_t srvid)
+{
+ return db_hash_delete(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t));
+}
+
+static int srvid_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct srvid_handler_list **list =
+ (struct srvid_handler_list **)private_data;
+
+ if (datalen != sizeof(*list)) {
+ return EIO;
+ }
+
+ *list = *(struct srvid_handler_list **)databuf;
+ return 0;
+}
+
+static int srvid_fetch(struct srvid_context *srv, uint64_t srvid,
+ struct srvid_handler_list **list)
+{
+ return db_hash_fetch(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t),
+ srvid_fetch_parser, list);
+}
+
+/*
+ * When a handler is freed, remove it from the list
+ */
+static int srvid_handler_destructor(struct srvid_handler *h)
+{
+ struct srvid_handler_list *list = h->list;
+
+ DLIST_REMOVE(list->h, h);
+ if (list->h == NULL) {
+ talloc_free(list);
+ }
+ return 0;
+}
+
+/*
+ * When a list is freed, remove all handlers and remove db entry
+ */
+static int srvid_handler_list_destructor(struct srvid_handler_list *list)
+{
+ struct srvid_handler *h;
+
+ while (list->h != NULL) {
+ h = list->h;
+ DLIST_REMOVE(list->h, h);
+ TALLOC_FREE(h);
+ }
+
+ srvid_delete(list->srv, list->srvid);
+ DLIST_REMOVE(list->srv->list, list);
+ return 0;
+}
+
+/*
+ * Register a message handler
+ */
+int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ if (srv == NULL) {
+ return EINVAL;
+ }
+
+ h = talloc_zero(mem_ctx, struct srvid_handler);
+ if (h == NULL) {
+ return ENOMEM;
+ }
+
+ h->handler = handler;
+ h->private_data = private_data;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ /* srvid not yet registered */
+ list = talloc_zero(srv, struct srvid_handler_list);
+ if (list == NULL) {
+ talloc_free(h);
+ return ENOMEM;
+ }
+
+ list->srv = srv;
+ list->srvid = srvid;
+
+ ret = srvid_insert(srv, srvid, list);
+ if (ret != 0) {
+ talloc_free(h);
+ talloc_free(list);
+ return ret;
+ }
+
+ DLIST_ADD(srv->list, list);
+ talloc_set_destructor(list, srvid_handler_list_destructor);
+ }
+
+ h->list = list;
+ DLIST_ADD(list->h, h);
+ talloc_set_destructor(h, srvid_handler_destructor);
+ return 0;
+}
+
+/*
+ * Deregister a message handler
+ */
+int srvid_deregister(struct srvid_context *srv, uint64_t srvid,
+ void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (h = list->h; h != NULL; h = h->next) {
+ if (h->private_data == private_data) {
+ talloc_free(h);
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
+/*
+ * Check if a message handler exists
+ */
+int srvid_exists(struct srvid_context *srv, uint64_t srvid, void *private_data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret != 0) {
+ return ret;
+ }
+ if (list->h == NULL) {
+ return ENOENT;
+ }
+
+ if (private_data != NULL) {
+ for (h = list->h; h != NULL; h = h->next) {
+ if (h->private_data == private_data) {
+ return 0;
+ }
+ }
+
+ return ENOENT;
+ }
+
+ return 0;
+}
+
+/*
+ * Send a message to registered srvid and srvid_all
+ */
+int srvid_dispatch(struct srvid_context *srv, uint64_t srvid,
+ uint64_t srvid_all, TDB_DATA data)
+{
+ struct srvid_handler_list *list;
+ struct srvid_handler *h;
+ int ret;
+
+ ret = srvid_fetch(srv, srvid, &list);
+ if (ret == 0) {
+ for (h = list->h; h != NULL; h = h->next) {
+ h->handler(srvid, data, h->private_data);
+ }
+ }
+
+ if (srvid_all == 0) {
+ return ret;
+ }
+
+ ret = srvid_fetch(srv, srvid_all, &list);
+ if (ret == 0) {
+ for (h = list->h; h != NULL; h = h->next) {
+ h->handler(srvid, data, h->private_data);
+ }
+ }
+
+ return ret;
+}
diff --git a/ctdb/common/srvid.h b/ctdb/common/srvid.h
new file mode 100644
index 0000000..c0c2b30
--- /dev/null
+++ b/ctdb/common/srvid.h
@@ -0,0 +1,121 @@
+/*
+ Message handler database based on srvid
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SRVID_H__
+#define __CTDB_SRVID_H__
+
+#include <talloc.h>
+#include <tdb.h>
+
+/**
+ * @file srvid.h
+ *
+ * @brief Database of message handlers based on srvid
+ *
+ * CTDB can be used to send messages between clients across nodes using
+ * CTDB_REQ_MESSAGE. Clients register for messages based on srvid. CTDB itself
+ * uses a small set of srvid messages. A large range (2^56) of srvid messages
+ * is reserved for Samba.
+ */
+
+/**
+ * @brief Message handler function
+ *
+ * To receive messages for a specific srvid, register a message handler function
+ * for the srvid.
+ */
+typedef void (*srvid_handler_fn)(uint64_t srvid, TDB_DATA data,
+ void *private_data);
+
+/**
+ * @brief Abstract struct to store srvid message handler database
+ */
+struct srvid_context;
+
+/**
+ * @brief Initialize srvid message handler database
+ *
+ * This returns a new srvid message handler database context. Freeing
+ * this context will free all the memory associated with the hash table.
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[out] result The new db_hash_context structure
+ * @return 0 on success, errno on failure
+ */
+int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result);
+
+/**
+ * @brief Register a message handler for a srvid
+ *
+ * The message handler is allocated using the specified talloc context. Freeing
+ * this talloc context, removes the message handler.
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] mem_ctx Talloc memory context for message handler
+ * @param[in] srvid The srvid
+ * @param[in] handler The message handler function for srvid
+ * @param[in] private_data Private data for message handler function
+ * @return 0 on success, errno on failure
+ */
+int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx,
+ uint64_t srvid, srvid_handler_fn handler,
+ void *private_data);
+
+/**
+ * @brief Unregister a message handler for a srvid
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] private_data Private data of message handler function
+ * @return 0 on success, errno on failure
+ */
+int srvid_deregister(struct srvid_context *srv, uint64_t srvid,
+ void *private_data);
+
+/**
+ * @brief Check if any message handler is registered for srvid
+ *
+ * If private_data is NULL, then check if there is any registration
+ * for * specified srvid. If private_data is not NULL, then check for
+ * registration that matches the specified private data.
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] private_data Private data
+ * @return 0 on success, errno on failure
+ */
+int srvid_exists(struct srvid_context *srv, uint64_t srvid,
+ void *private_data);
+
+/**
+ * @brief Call message handlers for given srvid
+ *
+ * @param[in] srv The srvid message handler database context
+ * @param[in] srvid The srvid
+ * @param[in] srvid_all The srvid that gets all messages
+ * @param[in] data The data passed to each message handler
+ * @return 0 on success, errno on failure
+ *
+ * If srvid_all passed is 0, the message is not sent to message handlers
+ * registered with special srvid to receive all messages.
+ */
+int srvid_dispatch(struct srvid_context *srv, uint64_t srvid,
+ uint64_t srvid_all, TDB_DATA data);
+
+#endif /* __CTDB_SRVID_H__ */
diff --git a/ctdb/common/system.c b/ctdb/common/system.c
new file mode 100644
index 0000000..05a9564
--- /dev/null
+++ b/ctdb/common/system.c
@@ -0,0 +1,237 @@
+/*
+ common system utilities
+
+ Copyright (C) Amitay Isaacs 2014
+ Copyright (C) Martin Schwenke 2014
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/shmem.h"
+#include "system/network.h"
+
+#include <talloc.h>
+#include <libgen.h>
+
+#include "lib/util/debug.h"
+
+#include "protocol/protocol.h"
+
+#include "common/logging.h"
+#include "common/system.h"
+
+#ifdef HAVE_SCHED_H
+#include <sched.h>
+#endif
+
+#ifdef HAVE_PROCINFO_H
+#include <procinfo.h>
+#endif
+
+/*
+ if possible, make this task real time
+ */
+bool set_scheduler(void)
+{
+#ifdef _AIX_
+#ifdef HAVE_THREAD_SETSCHED
+ struct thrdentry64 te;
+ tid64_t ti;
+
+ ti = 0ULL;
+ if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
+ DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
+ return false;
+ }
+
+ if (thread_setsched(te.ti_tid, 0, SCHED_RR) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_RR (%s)\n",
+ strerror(errno)));
+ return false;
+ } else {
+ return true;
+ }
+#endif
+#else /* no AIX */
+#ifdef HAVE_SCHED_SETSCHEDULER
+ struct sched_param p;
+
+ p.sched_priority = 1;
+
+ if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
+ DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n",
+ strerror(errno)));
+ return false;
+ } else {
+ return true;
+ }
+#endif
+#endif
+ DEBUG(DEBUG_CRIT,("No way to set real-time priority.\n"));
+ return false;
+}
+
+/*
+ reset scheduler from real-time to normal scheduling
+ */
+void reset_scheduler(void)
+{
+#ifdef _AIX_
+#ifdef HAVE_THREAD_SETSCHED
+ struct thrdentry64 te;
+ tid64_t ti;
+
+ ti = 0ULL;
+ if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) {
+ DEBUG(DEBUG_ERR, ("Unable to get thread information\n"));
+ }
+ if (thread_setsched(te.ti_tid, 0, SCHED_OTHER) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
+ }
+#endif
+#else /* no AIX */
+#ifdef HAVE_SCHED_SETSCHEDULER
+ struct sched_param p;
+
+ p.sched_priority = 0;
+ if (sched_setscheduler(0, SCHED_OTHER, &p) == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n"));
+ }
+#endif
+#endif
+}
+
+/* we don't lock future pages here; it would increase the chance that
+ * we'd fail to mmap later on. */
+void lockdown_memory(bool valgrinding)
+{
+#if defined(HAVE_MLOCKALL) && !defined(_AIX_)
+ /* Extra stack, please! */
+ char dummy[10000];
+ memset(dummy, 0, sizeof(dummy));
+
+ if (valgrinding) {
+ return;
+ }
+
+ /* Ignore when running in local daemons mode */
+ if (getuid() != 0) {
+ return;
+ }
+
+ /* Avoid compiler optimizing out dummy. */
+ mlock(dummy, sizeof(dummy));
+ if (mlockall(MCL_CURRENT) != 0) {
+ DEBUG(DEBUG_WARNING,("Failed to lockdown memory: %s'\n",
+ strerror(errno)));
+ }
+#endif
+}
+
+void ctdb_wait_for_process_to_exit(pid_t pid)
+{
+ while (kill(pid, 0) == 0 || errno != ESRCH) {
+ sleep(5);
+ }
+}
+
+#ifdef HAVE_IF_NAMEINDEX
+
+bool ctdb_sys_check_iface_exists(const char *iface)
+{
+ struct if_nameindex *ifnis, *ifni;
+ bool found = false;
+
+ ifnis = if_nameindex();
+ if (ifnis == NULL) {
+ DBG_ERR("Failed to retrieve interface list\n");
+ return false;
+ }
+
+ for (ifni = ifnis;
+ ifni->if_index != 0 || ifni->if_name != NULL;
+ ifni++) {
+ int cmp = strcmp(iface, ifni->if_name);
+ if (cmp == 0) {
+ found = true;
+ goto done;
+ }
+ }
+
+done:
+ if_freenameindex(ifnis);
+
+ return found;
+}
+
+#else /* HAVE_IF_NAMEINDEX */
+
+bool ctdb_sys_check_iface_exists(const char *iface)
+{
+ /* Not implemented: Interface always considered present */
+ return true;
+}
+
+#endif /* HAVE_IF_NAMEINDEX */
+
+#ifdef HAVE_PEERCRED
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ struct ucred cr;
+ socklen_t crl = sizeof(struct ucred);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl);
+ if (ret == 0) {
+ *peer_pid = cr.pid;
+ } else {
+ *peer_pid = -1;
+ }
+ return ret;
+}
+
+#else /* HAVE_PEERCRED */
+
+#ifdef _AIX_
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ struct peercred_struct cr;
+ socklen_t crl = sizeof(struct peercred_struct);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_PEERID, &cr, &crl);
+ if (ret == 0) {
+ *peer_pid = cr.pid;
+ } else {
+ *peer_pid = -1;
+ }
+ return ret;
+}
+
+#else /* _AIX_ */
+
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
+{
+ /* Not implemented */
+ *peer_pid = -1;
+ return ENOSYS;
+}
+
+#endif /* _AIX_ */
+
+#endif /* HAVE_PEERCRED */
diff --git a/ctdb/common/system.h b/ctdb/common/system.h
new file mode 100644
index 0000000..042e7cc
--- /dev/null
+++ b/ctdb/common/system.h
@@ -0,0 +1,37 @@
+/*
+ System specific code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SYSTEM_H__
+#define __CTDB_SYSTEM_H__
+
+#include <talloc.h>
+
+/* From system_util.c */
+
+bool set_scheduler(void);
+void reset_scheduler(void);
+
+void lockdown_memory(bool valgrinding);
+
+void ctdb_wait_for_process_to_exit(pid_t pid);
+
+bool ctdb_sys_check_iface_exists(const char *iface);
+int ctdb_get_peer_pid(const int fd, pid_t *peer_pid);
+
+#endif /* __CTDB_SYSTEM_H__ */
diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c
new file mode 100644
index 0000000..273b9c3
--- /dev/null
+++ b/ctdb/common/system_socket.c
@@ -0,0 +1,1168 @@
+/*
+ ctdb system specific code to manage raw sockets on linux
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Marc Dequènes (Duck) 2009
+ Copyright (C) Volker Lendecke 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+/*
+ * Use BSD struct tcphdr field names for portability. Modern glibc
+ * makes them available by default via <netinet/tcp.h> but older glibc
+ * requires __FAVOR_BSD to be defined.
+ *
+ * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
+ * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
+ * set. Including "replace.h" above causes <features.h> to be
+ * indirectly included and this will not set __FAVOR_BSD because
+ * _GNU_SOURCE is set in Samba's "config.h" (which is included by
+ * "replace.h").
+ *
+ * Therefore, set __FAVOR_BSD by hand below.
+ */
+#define __FAVOR_BSD 1
+#include "system/network.h"
+
+#ifdef HAVE_NETINET_IF_ETHER_H
+#include <netinet/if_ether.h>
+#endif
+#ifdef HAVE_NETINET_IP6_H
+#include <netinet/ip6.h>
+#endif
+#ifdef HAVE_NETINET_ICMP6_H
+#include <netinet/icmp6.h>
+#endif
+#ifdef HAVE_LINUX_IF_PACKET_H
+#include <linux/if_packet.h>
+#endif
+
+#ifndef ETHERTYPE_IP6
+#define ETHERTYPE_IP6 0x86dd
+#endif
+
+#include "lib/util/debug.h"
+#include "lib/util/blocking.h"
+
+#include "protocol/protocol.h"
+
+#include "common/logging.h"
+#include "common/system_socket.h"
+
+/*
+ uint16 checksum for n bytes
+ */
+static uint32_t uint16_checksum(uint8_t *data, size_t n)
+{
+ uint32_t sum=0;
+ uint16_t value;
+
+ while (n>=2) {
+ memcpy(&value, data, 2);
+ sum += (uint32_t)ntohs(value);
+ data += 2;
+ n -= 2;
+ }
+ if (n == 1) {
+ sum += (uint32_t)ntohs(*data);
+ }
+ return sum;
+}
+
+/*
+ * See if the given IP is currently on an interface
+ */
+bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
+{
+ int s;
+ int ret;
+ ctdb_sock_addr __addr = *_addr;
+ ctdb_sock_addr *addr = &__addr;
+ socklen_t addrlen = 0;
+
+ switch (addr->sa.sa_family) {
+ case AF_INET:
+ addr->ip.sin_port = 0;
+ addrlen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ addr->ip6.sin6_port = 0;
+ addrlen = sizeof(struct sockaddr_in6);
+ break;
+ }
+
+ s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+ if (s == -1) {
+ return false;
+ }
+
+ ret = bind(s, (struct sockaddr *)addr, addrlen);
+
+ close(s);
+ return ret == 0;
+}
+
+/*
+ * simple TCP checksum - assumes data is multiple of 2 bytes long
+ */
+static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
+{
+ uint32_t sum = uint16_checksum(data, n);
+ uint16_t sum2;
+
+ sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
+ sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
+ sum += ip->ip_p + n;
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum2 = htons(sum);
+ sum2 = ~sum2;
+ if (sum2 == 0) {
+ return 0xFFFF;
+ }
+ return sum2;
+}
+
+static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
+{
+ uint16_t phdr[3];
+ uint32_t sum = 0;
+ uint16_t sum2;
+ uint32_t len;
+
+ sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
+ sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
+
+ len = htonl(n);
+ phdr[0] = len & UINT16_MAX;
+ phdr[1] = (len >> 16) & UINT16_MAX;
+ /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
+ phdr[2] = htons(ip6->ip6_nxt);
+ sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
+
+ sum += uint16_checksum(data, n);
+
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ sum2 = htons(sum);
+ sum2 = ~sum2;
+ if (sum2 == 0) {
+ return 0xFFFF;
+ }
+ return sum2;
+}
+
+/*
+ * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
+ */
+
+#ifdef HAVE_PACKETSOCKET
+
+/*
+ * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
+ * packets
+ */
+
+#define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
+ sizeof(struct ether_arp)
+
+#define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
+ sizeof(struct ip6_hdr) + \
+ sizeof(struct nd_neighbor_advert) + \
+ sizeof(struct nd_opt_hdr) + \
+ sizeof(struct ether_addr)
+
+#define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
+
+#define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
+
+static int arp_build(uint8_t *buffer,
+ size_t buflen,
+ const struct sockaddr_in *addr,
+ const struct ether_addr *hwaddr,
+ bool reply,
+ struct ether_addr **ether_dhost,
+ size_t *len)
+{
+ size_t l = ARP_BUFFER_SIZE;
+ struct ether_header *eh;
+ struct ether_arp *ea;
+ struct arphdr *ah;
+
+ if (addr->sin_family != AF_INET) {
+ return EINVAL;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ memset(buffer, 0 , l);
+
+ eh = (struct ether_header *)buffer;
+ memset(eh->ether_dhost, 0xff, ETH_ALEN);
+ memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
+ eh->ether_type = htons(ETHERTYPE_ARP);
+
+ ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
+ ah = &ea->ea_hdr;
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+ ah->ar_pro = htons(ETH_P_IP);
+ ah->ar_hln = ETH_ALEN;
+ ah->ar_pln = sizeof(ea->arp_spa);
+
+ if (! reply) {
+ ah->ar_op = htons(ARPOP_REQUEST);
+ memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
+ memset(ea->arp_tha, 0, ETH_ALEN);
+ memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
+ } else {
+ ah->ar_op = htons(ARPOP_REPLY);
+ memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
+ memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
+ memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
+ }
+
+ *ether_dhost = (struct ether_addr *)eh->ether_dhost;
+ *len = l;
+ return 0;
+}
+
+static int ip6_na_build(uint8_t *buffer,
+ size_t buflen,
+ const struct sockaddr_in6 *addr,
+ const struct ether_addr *hwaddr,
+ struct ether_addr **ether_dhost,
+ size_t *len)
+{
+ size_t l = IP6_NA_BUFFER_SIZE;
+ struct ether_header *eh;
+ struct ip6_hdr *ip6;
+ struct nd_neighbor_advert *nd_na;
+ struct nd_opt_hdr *nd_oh;
+ struct ether_addr *ea;
+ int ret;
+
+ if (addr->sin6_family != AF_INET6) {
+ return EINVAL;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ memset(buffer, 0 , l);
+
+ eh = (struct ether_header *)buffer;
+ /*
+ * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
+ * section 7) - note memset 0 above!
+ */
+ eh->ether_dhost[0] = 0x33;
+ eh->ether_dhost[1] = 0x33;
+ eh->ether_dhost[5] = 0x01;
+ memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
+ eh->ether_type = htons(ETHERTYPE_IP6);
+
+ ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
+ ip6->ip6_vfc = 6 << 4;
+ ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
+ sizeof(struct nd_opt_hdr) +
+ ETH_ALEN);
+ ip6->ip6_nxt = IPPROTO_ICMPV6;
+ ip6->ip6_hlim = 255;
+ ip6->ip6_src = addr->sin6_addr;
+ /* all-nodes multicast */
+
+ ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
+ if (ret != 1) {
+ return EIO;
+ }
+
+ nd_na = (struct nd_neighbor_advert *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr));
+ nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
+ nd_na->nd_na_code = 0;
+ nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
+ nd_na->nd_na_target = addr->sin6_addr;
+
+ /* Option: Target link-layer address */
+ nd_oh = (struct nd_opt_hdr *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr) +
+ sizeof(struct nd_neighbor_advert));
+ nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
+
+ ea = (struct ether_addr *)(buffer +
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr) +
+ sizeof(struct nd_neighbor_advert) +
+ sizeof(struct nd_opt_hdr));
+ memcpy(ea, hwaddr, ETH_ALEN);
+
+ nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
+ ntohs(ip6->ip6_plen),
+ ip6);
+
+ *ether_dhost = (struct ether_addr *)eh->ether_dhost;
+ *len = l;
+ return 0;
+}
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
+{
+ int s;
+ struct sockaddr_ll sall = {0};
+ struct ifreq if_hwaddr = {
+ .ifr_ifru = {
+ .ifru_flags = 0
+ },
+ };
+ uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
+ struct ifreq ifr = {
+ .ifr_ifru = {
+ .ifru_flags = 0
+ },
+ };
+ struct ether_addr *hwaddr = NULL;
+ struct ether_addr *ether_dhost = NULL;
+ size_t len = 0;
+ int ret = 0;
+
+ s = socket(AF_PACKET, SOCK_RAW, 0);
+ if (s == -1) {
+ ret = errno;
+ DBG_ERR("Failed to open raw socket\n");
+ return ret;
+ }
+ DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
+
+ /* Find interface */
+ strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
+ if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
+ ret = errno;
+ DBG_ERR("Interface '%s' not found\n", iface);
+ goto fail;
+ }
+
+ /* Get MAC address */
+ strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
+ ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
+ if ( ret < 0 ) {
+ ret = errno;
+ DBG_ERR("ioctl failed\n");
+ goto fail;
+ }
+ if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
+ ret = 0;
+ D_DEBUG("Ignoring loopback arp request\n");
+ goto fail;
+ }
+ if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
+ ret = EINVAL;
+ DBG_ERR("Not an ethernet address family (0x%x)\n",
+ if_hwaddr.ifr_hwaddr.sa_family);
+ goto fail;;
+ }
+
+ /* Set up most of destination address structure */
+ sall.sll_family = AF_PACKET;
+ sall.sll_halen = sizeof(struct ether_addr);
+ sall.sll_protocol = htons(ETH_P_ALL);
+ sall.sll_ifindex = ifr.ifr_ifindex;
+
+ /* For clarity */
+ hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
+
+ switch (addr->ip.sin_family) {
+ case AF_INET:
+ /* Send gratuitous ARP */
+ ret = arp_build(buffer,
+ sizeof(buffer),
+ &addr->ip,
+ hwaddr,
+ false,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build ARP request\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ /* Send unsolicited ARP reply */
+ ret = arp_build(buffer,
+ sizeof(buffer),
+ &addr->ip,
+ hwaddr,
+ true,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build ARP reply\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ close(s);
+ break;
+
+ case AF_INET6:
+ ret = ip6_na_build(buffer,
+ sizeof(buffer),
+ &addr->ip6,
+ hwaddr,
+ &ether_dhost,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
+ goto fail;
+ }
+
+ memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
+
+ ret = sendto(s,
+ buffer,
+ len,
+ 0,
+ (struct sockaddr *)&sall,
+ sizeof(sall));
+ if (ret < 0 ) {
+ ret = errno;
+ DBG_ERR("Failed sendto\n");
+ goto fail;
+ }
+
+ close(s);
+ break;
+
+ default:
+ ret = EINVAL;
+ DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
+ addr->ip.sin_family);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ close(s);
+ return ret;
+}
+
+#else /* HAVE_PACKETSOCKET */
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
+{
+ /* Not implemented */
+ return ENOSYS;
+}
+
+#endif /* HAVE_PACKETSOCKET */
+
+
+#define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
+ sizeof(struct tcphdr)
+
+#define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
+ sizeof(struct tcphdr)
+
+static int tcp4_build(uint8_t *buf,
+ size_t buflen,
+ const struct sockaddr_in *src,
+ const struct sockaddr_in *dst,
+ uint32_t seq,
+ uint32_t ack,
+ int rst,
+ size_t *len)
+{
+ size_t l = IP4_TCP_BUFFER_SIZE;
+ struct {
+ struct ip ip;
+ struct tcphdr tcp;
+ } *ip4pkt;
+
+ if (l != sizeof(*ip4pkt)) {
+ return EMSGSIZE;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ ip4pkt = (void *)buf;
+ memset(ip4pkt, 0, l);
+
+ ip4pkt->ip.ip_v = 4;
+ ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
+ ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
+ ip4pkt->ip.ip_ttl = 255;
+ ip4pkt->ip.ip_p = IPPROTO_TCP;
+ ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
+ ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
+ ip4pkt->ip.ip_sum = 0;
+
+ ip4pkt->tcp.th_sport = src->sin_port;
+ ip4pkt->tcp.th_dport = dst->sin_port;
+ ip4pkt->tcp.th_seq = seq;
+ ip4pkt->tcp.th_ack = ack;
+ ip4pkt->tcp.th_flags = 0;
+ ip4pkt->tcp.th_flags |= TH_ACK;
+ if (rst) {
+ ip4pkt->tcp.th_flags |= TH_RST;
+ }
+ ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
+ /* this makes it easier to spot in a sniffer */
+ ip4pkt->tcp.th_win = htons(1234);
+ ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
+ sizeof(ip4pkt->tcp),
+ &ip4pkt->ip);
+
+ *len = l;
+ return 0;
+}
+
+static int tcp6_build(uint8_t *buf,
+ size_t buflen,
+ const struct sockaddr_in6 *src,
+ const struct sockaddr_in6 *dst,
+ uint32_t seq,
+ uint32_t ack,
+ int rst,
+ size_t *len)
+{
+ size_t l = IP6_TCP_BUFFER_SIZE;
+ struct {
+ struct ip6_hdr ip6;
+ struct tcphdr tcp;
+ } *ip6pkt;
+
+ if (l != sizeof(*ip6pkt)) {
+ return EMSGSIZE;
+ }
+
+ if (buflen < l) {
+ return EMSGSIZE;
+ }
+
+ ip6pkt = (void *)buf;
+ memset(ip6pkt, 0, l);
+
+ ip6pkt->ip6.ip6_vfc = 6 << 4;
+ ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
+ ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
+ ip6pkt->ip6.ip6_hlim = 64;
+ ip6pkt->ip6.ip6_src = src->sin6_addr;
+ ip6pkt->ip6.ip6_dst = dst->sin6_addr;
+
+ ip6pkt->tcp.th_sport = src->sin6_port;
+ ip6pkt->tcp.th_dport = dst->sin6_port;
+ ip6pkt->tcp.th_seq = seq;
+ ip6pkt->tcp.th_ack = ack;
+ ip6pkt->tcp.th_flags = 0;
+ ip6pkt->tcp.th_flags |= TH_ACK;
+ if (rst) {
+ ip6pkt->tcp.th_flags |= TH_RST;
+ }
+ ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
+ /* this makes it easier to spot in a sniffer */
+ ip6pkt->tcp.th_win = htons(1234);
+ ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
+ sizeof(ip6pkt->tcp),
+ &ip6pkt->ip6);
+
+ *len = l;
+ return 0;
+}
+
+/*
+ * Send tcp segment from the specified IP/port to the specified
+ * destination IP/port.
+ *
+ * This is used to trigger the receiving host into sending its own ACK,
+ * which should trigger early detection of TCP reset by the client
+ * after IP takeover
+ *
+ * This can also be used to send RST segments (if rst is true) and also
+ * if correct seq and ack numbers are provided.
+ */
+int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
+ const ctdb_sock_addr *src,
+ uint32_t seq,
+ uint32_t ack,
+ int rst)
+{
+ uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
+ size_t len = 0;
+ int ret;
+ int s;
+ uint32_t one = 1;
+ struct sockaddr_in6 tmpdest = { 0 };
+ int saved_errno;
+
+ switch (src->ip.sin_family) {
+ case AF_INET:
+ ret = tcp4_build(buf,
+ sizeof(buf),
+ &src->ip,
+ &dest->ip,
+ seq,
+ ack,
+ rst,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build TCP packet (%d)\n", ret);
+ return ret;
+ }
+
+ /* open a raw socket to send this segment from */
+ s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+ if (s == -1) {
+ DBG_ERR("Failed to open raw socket (%s)\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
+ if (ret != 0) {
+ DBG_ERR("Failed to setup IP headers (%s)\n",
+ strerror(errno));
+ close(s);
+ return -1;
+ }
+
+ ret = sendto(s,
+ buf,
+ len,
+ 0,
+ (const struct sockaddr *)&dest->ip,
+ sizeof(dest->ip));
+ saved_errno = errno;
+ close(s);
+ if (ret == -1) {
+ D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ DBG_ERR("Failed sendto - didn't send full packet\n");
+ return -1;
+ }
+ break;
+
+ case AF_INET6:
+ ret = tcp6_build(buf,
+ sizeof(buf),
+ &src->ip6,
+ &dest->ip6,
+ seq,
+ ack,
+ rst,
+ &len);
+ if (ret != 0) {
+ DBG_ERR("Failed to build TCP packet (%d)\n", ret);
+ return ret;
+ }
+
+ s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+ if (s == -1) {
+ DBG_ERR("Failed to open sending socket\n");
+ return -1;
+
+ }
+ /*
+ * sendto() on an IPv6 raw socket requires the port to
+ * be either 0 or a protocol value
+ */
+ tmpdest = dest->ip6;
+ tmpdest.sin6_port = 0;
+
+ ret = sendto(s,
+ buf,
+ len,
+ 0,
+ (const struct sockaddr *)&tmpdest,
+ sizeof(tmpdest));
+ saved_errno = errno;
+ close(s);
+ if (ret == -1) {
+ D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ DBG_ERR("Failed sendto - didn't send full packet\n");
+ return -1;
+ }
+ break;
+
+ default:
+ DBG_ERR("Not an ipv4/v6 address\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tcp4_extract(const uint8_t *ip_pkt,
+ size_t pktlen,
+ struct sockaddr_in *src,
+ struct sockaddr_in *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ const struct ip *ip;
+ const struct tcphdr *tcp;
+
+ if (pktlen < sizeof(struct ip)) {
+ return EMSGSIZE;
+ }
+
+ ip = (const struct ip *)ip_pkt;
+
+ /* IPv4 only */
+ if (ip->ip_v != 4) {
+ return ENOMSG;
+ }
+ /* Don't look at fragments */
+ if ((ntohs(ip->ip_off)&0x1fff) != 0) {
+ return ENOMSG;
+ }
+ /* TCP only */
+ if (ip->ip_p != IPPROTO_TCP) {
+ return ENOMSG;
+ }
+
+ /* Ensure there is enough of the packet to gather required fields */
+ if (pktlen <
+ (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
+ return EMSGSIZE;
+ }
+
+ tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
+
+ src->sin_family = AF_INET;
+ src->sin_addr.s_addr = ip->ip_src.s_addr;
+ src->sin_port = tcp->th_sport;
+
+ dst->sin_family = AF_INET;
+ dst->sin_addr.s_addr = ip->ip_dst.s_addr;
+ dst->sin_port = tcp->th_dport;
+
+ *ack_seq = tcp->th_ack;
+ *seq = tcp->th_seq;
+ if (window != NULL) {
+ *window = tcp->th_win;
+ }
+ if (rst != NULL) {
+ *rst = tcp->th_flags & TH_RST;
+ }
+
+ return 0;
+}
+
+static int tcp6_extract(const uint8_t *ip_pkt,
+ size_t pktlen,
+ struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ const struct ip6_hdr *ip6;
+ const struct tcphdr *tcp;
+
+ /* Ensure there is enough of the packet to gather required fields */
+ if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
+ return EMSGSIZE;
+ }
+
+ ip6 = (const struct ip6_hdr *)ip_pkt;
+
+ /* IPv6 only */
+ if ((ip6->ip6_vfc >> 4) != 6){
+ return ENOMSG;
+ }
+
+ /* TCP only */
+ if (ip6->ip6_nxt != IPPROTO_TCP) {
+ return ENOMSG;
+ }
+
+ tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
+
+ src->sin6_family = AF_INET6;
+ src->sin6_port = tcp->th_sport;
+ src->sin6_addr = ip6->ip6_src;
+
+ dst->sin6_family = AF_INET6;
+ dst->sin6_port = tcp->th_dport;
+ dst->sin6_addr = ip6->ip6_dst;
+
+ *ack_seq = tcp->th_ack;
+ *seq = tcp->th_seq;
+ if (window != NULL) {
+ *window = tcp->th_win;
+ }
+ if (rst != NULL) {
+ *rst = tcp->th_flags & TH_RST;
+ }
+
+ return 0;
+}
+
+/*
+ * Packet capture
+ *
+ * If AF_PACKET is available then use a raw socket otherwise use pcap.
+ * wscript has checked to make sure that pcap is available if needed.
+ */
+
+#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
+
+/*
+ * This function is used to open a raw socket to capture from
+ */
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
+{
+ int s, ret;
+
+ /* Open a socket to capture all traffic */
+ s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (s == -1) {
+ DBG_ERR("Failed to open raw socket\n");
+ return -1;
+ }
+
+ DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
+
+ ret = set_blocking(s, false);
+ if (ret != 0) {
+ DBG_ERR("Failed to set socket non-blocking (%s)\n",
+ strerror(errno));
+ close(s);
+ return -1;
+ }
+
+ set_close_on_exec(s);
+
+ return s;
+}
+
+/*
+ * This function is used to do any additional cleanup required when closing
+ * a capture socket.
+ * Note that the socket itself is closed automatically in the caller.
+ */
+int ctdb_sys_close_capture_socket(void *private_data)
+{
+ return 0;
+}
+
+
+/*
+ * called when the raw socket becomes readable
+ */
+int ctdb_sys_read_tcp_packet(int s, void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ ssize_t nread;
+ uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
+ struct ether_header *eth;
+ int ret;
+
+ nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
+ if (nread == -1) {
+ return errno;
+ }
+ if ((size_t)nread < sizeof(*eth)) {
+ return EMSGSIZE;
+ }
+
+ ZERO_STRUCTP(src);
+ ZERO_STRUCTP(dst);
+
+ /* Ethernet */
+ eth = (struct ether_header *)pkt;
+
+ /* we want either IPv4 or IPv6 */
+ if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
+ ret = tcp4_extract(pkt + sizeof(struct ether_header),
+ (size_t)nread - sizeof(struct ether_header),
+ &src->ip,
+ &dst->ip,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ return ret;
+
+ } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
+ ret = tcp6_extract(pkt + sizeof(struct ether_header),
+ (size_t)nread - sizeof(struct ether_header),
+ &src->ip6,
+ &dst->ip6,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ return ret;
+ }
+
+ return ENOMSG;
+}
+
+#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
+
+#include <pcap.h>
+
+/*
+ * Assume this exists if pcap.h exists - it has been around for a
+ * while
+ */
+#include <pcap/sll.h>
+
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
+{
+ char errbuf[PCAP_ERRBUF_SIZE];
+ pcap_t *pt;
+ int pcap_packet_type;
+ const char *t = NULL;
+ int fd;
+ int ret;
+
+ pt = pcap_create(iface, errbuf);
+ if (pt == NULL) {
+ DBG_ERR("Failed to open pcap capture device %s (%s)\n",
+ iface,
+ errbuf);
+ return -1;
+ }
+ /*
+ * pcap isn't very clear about defaults...
+ */
+ ret = pcap_set_snaplen(pt, 100);
+ if (ret < 0) {
+ DBG_ERR("Failed to set snaplen for pcap capture\n");
+ goto fail;
+ }
+ ret = pcap_set_promisc(pt, 0);
+ if (ret < 0) {
+ DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
+ goto fail;
+ }
+ ret = pcap_set_timeout(pt, 0);
+ if (ret < 0) {
+ DBG_ERR("Failed to set timeout for pcap capture\n");
+ goto fail;
+ }
+#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
+ ret = pcap_set_immediate_mode(pt, 1);
+ if (ret < 0) {
+ DBG_ERR("Failed to set immediate mode for pcap capture\n");
+ goto fail;
+ }
+#endif
+ ret = pcap_activate(pt);
+ if (ret < 0) {
+ DBG_ERR("Failed to activate pcap capture\n");
+ goto fail;
+ }
+
+ pcap_packet_type = pcap_datalink(pt);
+ switch (pcap_packet_type) {
+ case DLT_EN10MB:
+ t = "DLT_EN10MB";
+ break;
+ case DLT_LINUX_SLL:
+ t = "DLT_LINUX_SLL";
+ break;
+#ifdef DLT_LINUX_SLL2
+ case DLT_LINUX_SLL2:
+ t = "DLT_LINUX_SLL2";
+ break;
+#endif /* DLT_LINUX_SLL2 */
+ default:
+ DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
+ goto fail;
+ }
+
+ fd = pcap_get_selectable_fd(pt);
+ DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
+ t,
+ fd);
+
+ *((pcap_t **)private_data) = pt;
+ return fd;
+
+fail:
+ pcap_close(pt);
+ return -1;
+}
+
+int ctdb_sys_close_capture_socket(void *private_data)
+{
+ pcap_t *pt = (pcap_t *)private_data;
+ pcap_close(pt);
+ return 0;
+}
+
+int ctdb_sys_read_tcp_packet(int s,
+ void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window)
+{
+ int ret;
+ struct pcap_pkthdr pkthdr;
+ const u_char *buffer;
+ pcap_t *pt = (pcap_t *)private_data;
+ int pcap_packet_type;
+ uint16_t ether_type;
+ size_t ll_hdr_len;
+
+ buffer=pcap_next(pt, &pkthdr);
+ if (buffer==NULL) {
+ return ENOMSG;
+ }
+
+ ZERO_STRUCTP(src);
+ ZERO_STRUCTP(dst);
+
+ pcap_packet_type = pcap_datalink(pt);
+ switch (pcap_packet_type) {
+ case DLT_EN10MB: {
+ const struct ether_header *eth =
+ (const struct ether_header *)buffer;
+ ether_type = ntohs(eth->ether_type);
+ ll_hdr_len = sizeof(struct ether_header);
+ break;
+ }
+ case DLT_LINUX_SLL: {
+ const struct sll_header *sll =
+ (const struct sll_header *)buffer;
+ uint16_t arphrd_type = ntohs(sll->sll_hatype);
+ switch (arphrd_type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_INFINIBAND:
+ break;
+ default:
+ DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
+ arphrd_type);
+ return EPROTONOSUPPORT;
+ }
+ ether_type = ntohs(sll->sll_protocol);
+ ll_hdr_len = SLL_HDR_LEN;
+ break;
+ }
+#ifdef DLT_LINUX_SLL2
+ case DLT_LINUX_SLL2: {
+ const struct sll2_header *sll2 =
+ (const struct sll2_header *)buffer;
+ uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
+ switch (arphrd_type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_INFINIBAND:
+ break;
+ default:
+ DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
+ arphrd_type);
+ return EPROTONOSUPPORT;
+ }
+ ether_type = ntohs(sll2->sll2_protocol);
+ ll_hdr_len = SLL2_HDR_LEN;
+ break;
+ }
+#endif /* DLT_LINUX_SLL2 */
+ default:
+ DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
+ return EPROTONOSUPPORT;
+ }
+
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ ret = tcp4_extract(buffer + ll_hdr_len,
+ (size_t)pkthdr.caplen - ll_hdr_len,
+ &src->ip,
+ &dst->ip,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ break;
+ case ETHERTYPE_IP6:
+ ret = tcp6_extract(buffer + ll_hdr_len,
+ (size_t)pkthdr.caplen - ll_hdr_len,
+ &src->ip6,
+ &dst->ip6,
+ ack_seq,
+ seq,
+ rst,
+ window);
+ break;
+ case ETHERTYPE_ARP:
+ /* Silently ignore ARP packets */
+ return EPROTO;
+ default:
+ DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
+ return EPROTO;
+ }
+
+ return ret;
+}
+
+#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
diff --git a/ctdb/common/system_socket.h b/ctdb/common/system_socket.h
new file mode 100644
index 0000000..065c53c
--- /dev/null
+++ b/ctdb/common/system_socket.h
@@ -0,0 +1,46 @@
+/*
+ System specific network code
+
+ Copyright (C) Amitay Isaacs 2015
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_SYSTEM_SOCKET_H__
+#define __CTDB_SYSTEM_SOCKET_H__
+
+#include "protocol/protocol.h"
+
+bool ctdb_sys_have_ip(ctdb_sock_addr *addr);
+
+int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface);
+
+int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
+ const ctdb_sock_addr *src,
+ uint32_t seq,
+ uint32_t ack,
+ int rst);
+
+int ctdb_sys_open_capture_socket(const char *iface, void **private_data);
+int ctdb_sys_close_capture_socket(void *private_data);
+int ctdb_sys_read_tcp_packet(int s,
+ void *private_data,
+ ctdb_sock_addr *src,
+ ctdb_sock_addr *dst,
+ uint32_t *ack_seq,
+ uint32_t *seq,
+ int *rst,
+ uint16_t *window);
+
+#endif /* __CTDB_SYSTEM_SOCKET_H__ */
diff --git a/ctdb/common/tmon.c b/ctdb/common/tmon.c
new file mode 100644
index 0000000..04bad1f
--- /dev/null
+++ b/ctdb/common/tmon.c
@@ -0,0 +1,602 @@
+/*
+ Trivial FD monitoring
+
+ Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+
+#include <ctype.h>
+
+#include "lib/util/blocking.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
+#include "lib/util/util.h"
+#include "lib/util/smb_strtox.h"
+
+#include "lib/async_req/async_sock.h"
+
+#include "common/tmon.h"
+
+
+enum tmon_message_type {
+ TMON_MSG_EXIT = 1,
+ TMON_MSG_ERRNO,
+ TMON_MSG_PING,
+ TMON_MSG_ASCII,
+ TMON_MSG_CUSTOM,
+};
+
+struct tmon_pkt {
+ enum tmon_message_type type;
+ uint16_t val;
+};
+
+struct tmon_buf {
+ uint8_t data[4];
+};
+
+static void tmon_packet_push(struct tmon_pkt *pkt,
+ struct tmon_buf *buf)
+{
+ uint16_t type_n, val_n;
+
+ type_n = htons(pkt->type);
+ val_n = htons(pkt->val);
+ memcpy(&buf->data[0], &type_n, 2);
+ memcpy(&buf->data[2], &val_n, 2);
+}
+
+static void tmon_packet_pull(struct tmon_buf *buf,
+ struct tmon_pkt *pkt)
+{
+ uint16_t type_n, val_n;
+
+ memcpy(&type_n, &buf->data[0], 2);
+ memcpy(&val_n, &buf->data[2], 2);
+
+ pkt->type = ntohs(type_n);
+ pkt->val = ntohs(val_n);
+}
+
+static int tmon_packet_write(int fd, struct tmon_pkt *pkt)
+{
+ struct tmon_buf buf;
+ ssize_t n;
+
+ tmon_packet_push(pkt, &buf);
+
+ n = sys_write(fd, &buf.data[0], sizeof(buf.data));
+ if (n == -1) {
+ return errno;
+ }
+ return 0;
+}
+
+bool tmon_set_exit(struct tmon_pkt *pkt)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_EXIT,
+ };
+
+ return true;
+}
+
+bool tmon_set_errno(struct tmon_pkt *pkt, int err)
+{
+ if (err <= 0 || err > UINT16_MAX) {
+ return false;
+ }
+
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_ERRNO,
+ .val = (uint16_t)err,
+ };
+
+ return true;
+}
+
+bool tmon_set_ping(struct tmon_pkt *pkt)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_PING,
+ };
+
+ return true;
+}
+
+bool tmon_set_ascii(struct tmon_pkt *pkt, char c)
+{
+ if (!isascii(c)) {
+ return false;
+ }
+
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_ASCII,
+ .val = (uint16_t)c,
+ };
+
+ return true;
+}
+
+bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val)
+{
+ *pkt = (struct tmon_pkt) {
+ .type = TMON_MSG_CUSTOM,
+ .val = val,
+ };
+
+ return true;
+}
+
+static bool tmon_parse_exit(struct tmon_pkt *pkt)
+{
+ if (pkt->type != TMON_MSG_EXIT) {
+ return false;
+ }
+ if (pkt->val != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool tmon_parse_errno(struct tmon_pkt *pkt, int *err)
+{
+ if (pkt->type != TMON_MSG_ERRNO) {
+ return false;
+ }
+ *err= (int)pkt->val;
+
+ return true;
+}
+
+bool tmon_parse_ping(struct tmon_pkt *pkt)
+{
+ if (pkt->type != TMON_MSG_PING) {
+ return false;
+ }
+ if (pkt->val != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c)
+{
+ if (pkt->type != TMON_MSG_ASCII) {
+ return false;
+ }
+ if (!isascii((int)pkt->val)) {
+ return false;
+ }
+ *c = (char)pkt->val;
+
+ return true;
+}
+
+bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val)
+{
+ if (pkt->type != TMON_MSG_CUSTOM) {
+ return false;
+ }
+ *val = pkt->val;
+
+ return true;
+}
+
+struct tmon_state {
+ int fd;
+ int direction;
+ struct tevent_context *ev;
+ bool monitor_close;
+ unsigned long write_interval;
+ unsigned long read_timeout;
+ struct tmon_actions actions;
+ struct tevent_timer *timer;
+ void *private_data;
+};
+
+static void tmon_readable(struct tevent_req *subreq);
+static bool tmon_set_timeout(struct tevent_req *req,
+ struct tevent_context *ev);
+static void tmon_timedout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data);
+static void tmon_write_loop(struct tevent_req *subreq);
+
+struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long read_timeout,
+ unsigned long write_interval,
+ struct tmon_actions *actions,
+ void *private_data)
+{
+ struct tevent_req *req, *subreq;
+ struct tmon_state *state;
+ bool status;
+
+ req = tevent_req_create(mem_ctx, &state, struct tmon_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ if (actions != NULL) {
+ /* If FD isn't readable then read actions are invalid */
+ if (!(direction & TMON_FD_READ) &&
+ (actions->timeout_callback != NULL ||
+ actions->read_callback != NULL ||
+ read_timeout != 0)) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ /* If FD isn't writeable then write actions are invalid */
+ if (!(direction & TMON_FD_WRITE) &&
+ (actions->write_callback != NULL ||
+ write_interval != 0)) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ /* Can't specify write interval without a callback */
+ if (state->write_interval != 0 &&
+ state->actions.write_callback == NULL) {
+ tevent_req_error(req, EINVAL);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ state->fd = fd;
+ state->direction = direction;
+ state->ev = ev;
+ state->write_interval = write_interval;
+ state->read_timeout = read_timeout;
+ state->private_data = private_data;
+
+ if (actions != NULL) {
+ state->actions = *actions;
+ }
+
+ status = set_close_on_exec(fd);
+ if (!status) {
+ tevent_req_error(req, errno);
+ return tevent_req_post(req, ev);
+ }
+
+ if (direction & TMON_FD_READ) {
+ subreq = wait_for_read_send(state, ev, fd, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, tmon_readable, req);
+ }
+
+ if (state->read_timeout != 0) {
+ status = tmon_set_timeout(req, state->ev);
+ if (!status) {
+ tevent_req_error(req, ENOMEM);
+ return tevent_req_post(req, ev);
+ }
+ }
+
+ if (state->write_interval != 0) {
+ subreq = tevent_wakeup_send(
+ state,
+ state->ev,
+ tevent_timeval_current_ofs(state->write_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, state->ev);
+ }
+ tevent_req_set_callback(subreq, tmon_write_loop, req);
+ }
+
+ return req;
+}
+
+static void tmon_readable(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tmon_state *state = tevent_req_data( req, struct tmon_state);
+ struct tmon_buf buf;
+ struct tmon_pkt pkt;
+ ssize_t nread;
+ bool status;
+ int err;
+ int ret;
+
+ status = wait_for_read_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ if (ret == EPIPE && state->actions.close_callback != NULL) {
+ ret = state->actions.close_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ }
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+ return;
+ }
+
+ nread = sys_read(state->fd, buf.data, sizeof(buf.data));
+ if (nread == -1) {
+ tevent_req_error(req, errno);
+ return;
+ }
+ if (nread == 0) {
+ /* Can't happen, treat like EPIPE, above */
+ tevent_req_error(req, EPIPE);
+ return;
+ }
+ if (nread != sizeof(buf.data)) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+
+ tmon_packet_pull(&buf, &pkt);
+
+ switch (pkt.type) {
+ case TMON_MSG_EXIT:
+ status = tmon_parse_exit(&pkt);
+ if (!status) {
+ tevent_req_error(req, EPROTO);
+ return;
+ }
+ tevent_req_done(req);
+ return;
+ case TMON_MSG_ERRNO:
+ status = tmon_parse_errno(&pkt, &err);
+ if (!status) {
+ err = EPROTO;
+ }
+ tevent_req_error(req, err);
+ return;
+ default:
+ break;
+ }
+
+ if (state->actions.read_callback == NULL) {
+ /* Shouldn't happen, other end should not write */
+ tevent_req_error(req, EIO);
+ return;
+ }
+ ret = state->actions.read_callback(state->private_data, &pkt);
+ if (ret == TMON_STATUS_EXIT) {
+ tevent_req_done(req);
+ return;
+ }
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = wait_for_read_send(state, state->ev, state->fd, true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tmon_readable, req);
+
+ /* Reset read timeout */
+ if (state->read_timeout != 0) {
+ status = tmon_set_timeout(req, state->ev);
+ if (!status) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+ }
+}
+
+static bool tmon_set_timeout(struct tevent_req *req,
+ struct tevent_context *ev)
+{
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ struct timeval endtime =
+ tevent_timeval_current_ofs(state->read_timeout, 0);
+
+ TALLOC_FREE(state->timer);
+
+ state->timer = tevent_add_timer(ev, req, endtime, tmon_timedout, req);
+ if (tevent_req_nomem(state->timer, req)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void tmon_timedout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ struct tevent_req *req = talloc_get_type_abort(
+ private_data, struct tevent_req);
+ struct tmon_state *state = tevent_req_data(req, struct tmon_state);
+ int ret;
+
+ TALLOC_FREE(state->timer);
+
+ if (state->actions.timeout_callback != NULL) {
+ ret = state->actions.timeout_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ } else {
+ ret = ETIMEDOUT;
+ }
+
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void tmon_write_loop(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ struct tmon_pkt pkt;
+ int ret;
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (!status) {
+ /* Ignore error */
+ }
+
+ ret = state->actions.write_callback(state->private_data, &pkt);
+ if (ret == TMON_STATUS_EXIT) {
+ tevent_req_done(req);
+ return;
+ }
+ if (ret == TMON_STATUS_SKIP) {
+ goto done;
+ }
+ if (ret != 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ status = tmon_write(req, &pkt);
+ if (!status) {
+ return;
+ }
+
+done:
+ subreq = tevent_wakeup_send(
+ state,
+ state->ev,
+ tevent_timeval_current_ofs(state->write_interval, 0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, tmon_write_loop, req);
+}
+
+bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt)
+{
+ struct tmon_state *state = tevent_req_data(
+ req, struct tmon_state);
+ int ret;
+
+ if (state->fd == -1) {
+ return false;
+ }
+
+ if (!(state->direction & TMON_FD_WRITE)) {
+ tevent_req_error(req, EINVAL);
+ return false;
+ }
+
+ ret = tmon_packet_write(state->fd, pkt);
+ if (ret != 0) {
+ if (ret == EPIPE && state->actions.close_callback != NULL) {
+ ret = state->actions.close_callback(state->private_data);
+ if (ret == TMON_STATUS_EXIT) {
+ ret = 0;
+ }
+ }
+
+ if (ret == 0) {
+ tevent_req_done(req);
+ } else {
+ tevent_req_error(req, ret);
+ }
+ state->fd = -1;
+ return false;
+ }
+
+ return true;
+}
+
+bool tmon_recv(struct tevent_req *req, int *perr)
+{
+ if (tevent_req_is_unix_error(req, perr)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int ping_writer(void *private_data, struct tmon_pkt *pkt)
+{
+ tmon_set_ping(pkt);
+
+ return 0;
+}
+
+static int ping_reader(void *private_data, struct tmon_pkt *pkt)
+{
+ bool status;
+
+ /* Only expect pings */
+ status = tmon_parse_ping(pkt);
+ if (!status) {
+ return EPROTO;
+ }
+
+ return 0;
+}
+
+struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval)
+{
+ struct tevent_req *req;
+ struct tmon_actions actions = {
+ .write_callback = NULL,
+ };
+
+ if ((direction & TMON_FD_WRITE) && interval != 0) {
+ actions.write_callback = ping_writer;
+ }
+ if ((direction & TMON_FD_READ) && timeout != 0) {
+ actions.read_callback = ping_reader;
+ }
+
+ req = tmon_send(mem_ctx,
+ ev,
+ fd,
+ direction,
+ timeout,
+ interval,
+ &actions,
+ NULL);
+ return req;
+}
+
+bool tmon_ping_recv(struct tevent_req *req, int *perr)
+{
+ bool status;
+
+ status = tmon_recv(req, perr);
+
+ return status;
+}
diff --git a/ctdb/common/tmon.h b/ctdb/common/tmon.h
new file mode 100644
index 0000000..7cbfbbd
--- /dev/null
+++ b/ctdb/common/tmon.h
@@ -0,0 +1,218 @@
+/*
+ Trivial FD monitoring
+
+ Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_TMON_H__
+#define __CTDB_TMON_H__
+
+#include <talloc.h>
+#include <tevent.h>
+
+/**
+ * @file tmon.h
+ *
+ * @brief Interprocess file descriptor (pipe and socketpair) monitoring
+ *
+ * Assumes 2 processes connected by a pipe(2) or a socketpair(2). A
+ * simple protocol is defined to allow sending various types of status
+ * information. When a pipe(2) is used the reader can monitor for
+ * close and read packets, while the sender can write packets. When a
+ * socketpair(2) is used then both ends can monitor for close, and
+ * read and write packets. A read timeout can be specified,
+ * terminating the computation if no packets are received.
+ *
+ * A simplified interface is provided to monitor for close and allow
+ * sending/monitoring of one-way ping packets. A ping timeout occurs
+ * when one end is expecting pings but none are received during the
+ * timeout interval - no response is sent to pings, they merely reset
+ * a timer on the receiving end.
+ */
+
+struct tmon_pkt;
+
+struct tmon_actions {
+ int (*write_callback)(void *private_data, struct tmon_pkt *pkt);
+ int (*timeout_callback)(void *private_data);
+ int (*read_callback)(void *private_data, struct tmon_pkt *pkt);
+ int (*close_callback)(void *private_data);
+};
+
+/*
+ * Return value from write_callback() and read_callback() to cause the
+ * computation to exit successfully. For consistency this can also be
+ * used with timeout_callback() and close_callback().
+ */
+#define TMON_STATUS_EXIT (-1)
+
+/* Return value from write_callback() to skip write */
+#define TMON_STATUS_SKIP (-2)
+
+/* For direction, below */
+#define TMON_FD_READ 0x1
+#define TMON_FD_WRITE 0x2
+#define TMON_FD_BOTH (TMON_FD_READ | TMON_FD_WRITE)
+
+/**
+ * @brief Async computation to start FD monitoring
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd File descriptor for "this" end of pipe/socketpair
+ * @param[in] direction Read, write or both - for sanity checking
+ * @param[in] read_timeout Seconds to trigger timeout when no packets received
+ * @param[in] write_interval Seconds to trigger write_callback
+ * @param[in] actions struct containing callbacks
+ * @param[in] private_data Passed to callbacks
+ * @return new tevent request or NULL on failure
+ *
+ * @note read_timeout implies monitor_close
+ *
+ * @note The computation will complete when:
+ *
+ * - The writing end closes (e.g. writer process terminates) - EPIPE
+ * - read_timeout is non-zero and timeout occurs - ETIMEDOUT
+ * - Packets received with no read_callback defined - EIO
+ * - Invalid or unexpected packet received - EPROTO
+ * - File descriptor readable but no bytes to read - error: EPIPE
+ * - Invalid combination of direction, callbacks, timeouts: EINVAL
+ * - An unexpected error occurs - other
+ *
+ * @note action callbacks return an int that can be used to trigger
+ * other errors or override an error. For example:
+ *
+ * - write_callback() can return non-zero errno, causing an error
+ * - close_callback() can return zero, overriding the default EPIPE error
+ * - timeout_callback() can return something other than ETIMEDOUT
+ * - read_callback() can return EPROTO for unexpected packet types
+ *
+ * Reading of exit and errno packets is handled internally (read
+ * callback is never called). Write callback can return special
+ * value TMON_STATUS_SKIP to avoid sending any data.
+ */
+struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long read_timeout,
+ unsigned long write_interval,
+ struct tmon_actions *actions,
+ void *private_data);
+
+/**
+ * @brief Async computation to end FD monitoring
+ *
+ * @param[in] req Tevent request
+ * @param[out] perr errno in case of failure
+ * @return true on success, false on failure
+ */
+bool tmon_recv(struct tevent_req *req, int *perr);
+
+/**
+ * @brief Fill in an exit packet
+ *
+ * @param[in,out] pkt An exit packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_exit(struct tmon_pkt *pkt);
+/**
+ * @brief Fill in an errno packet
+ *
+ * @param[in,out] pkt An errno packet
+ * @param[in] err An errno to send in packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_errno(struct tmon_pkt *pkt, int err);
+/**
+ * @brief Fill in a ping packet
+ *
+ * @param[in,out] pkt A ping packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_ping(struct tmon_pkt *pkt);
+/**
+ * @brief Fill in an ASCII packet
+ *
+ * @param[in,out] pkt An ASCII packet
+ * @param[in] c An ASCII character to send in packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_ascii(struct tmon_pkt *pkt, char c);
+/**
+ * @brief Fill in a custom packet
+ *
+ * @param[in,out] pkt A custom packet
+ * @param[in] val A uint16_t to send in a custom packet
+ * @return true on success, false on failure
+ */
+bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val);
+
+/**
+ * @brief Validate a ping packet
+ *
+ * @param[in] pkt A ping packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_ping(struct tmon_pkt *pkt);
+
+/**
+ * @brief Validate ASCII packet and parse out character
+ *
+ * @param[in] pkt An ASCII packet
+ * @param[out] c An ASCII character value from packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c);
+
+/**
+ * @brief Validate custom packet and parse out value
+ *
+ * @param[in] pkt A custom packet
+ * @param[out] val A uint16_t value from packet
+ * @return true on success, false on failure
+ */
+bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val);
+
+/**
+ * @brief Write a packet
+ *
+ * @param[in] req Tevent request created by tmon_send
+ * @param[in] pkt Packet to write
+ * @return true on success, false on failure
+ */
+bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt);
+
+/**
+ * @brief Async computation to start ping monitoring
+ *
+ * @param[in] mem_ctx Talloc memory context
+ * @param[in] ev Tevent context
+ * @param[in] fd File descriptor for "this" end of pipe/socketpair
+ * @param[in] direction Read, write or both - for sanity checking
+ * @param[in] timeout Timeout for pings on receiving end
+ * @param[in] interval Send a ping packet every interval seconds
+ */
+struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ int fd,
+ int direction,
+ unsigned long timeout,
+ unsigned long interval);
+
+bool tmon_ping_recv(struct tevent_req *req, int *perr);
+
+#endif /* __CTDB_TMON_H__ */
diff --git a/ctdb/common/tunable.c b/ctdb/common/tunable.c
new file mode 100644
index 0000000..f366f23
--- /dev/null
+++ b/ctdb/common/tunable.c
@@ -0,0 +1,401 @@
+/*
+ Tunables utilities
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/filesys.h"
+#include "system/locale.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/smb_strtox.h"
+#include "lib/util/tini.h"
+
+#include "protocol/protocol.h"
+
+#include "common/tunable.h"
+
+static struct {
+ const char *label;
+ uint32_t value;
+ bool obsolete;
+ size_t offset;
+} tunable_map[] = {
+ { "MaxRedirectCount", 3, true,
+ offsetof(struct ctdb_tunable_list, max_redirect_count) },
+ { "SeqnumInterval", 1000, false,
+ offsetof(struct ctdb_tunable_list, seqnum_interval) },
+ { "ControlTimeout", 60, false,
+ offsetof(struct ctdb_tunable_list, control_timeout) },
+ { "TraverseTimeout", 20, false,
+ offsetof(struct ctdb_tunable_list, traverse_timeout) },
+ { "KeepaliveInterval", 5, false,
+ offsetof(struct ctdb_tunable_list, keepalive_interval) },
+ { "KeepaliveLimit", 5, false,
+ offsetof(struct ctdb_tunable_list, keepalive_limit) },
+ { "RecoverTimeout", 30, false,
+ offsetof(struct ctdb_tunable_list, recover_timeout) },
+ { "RecoverInterval", 1, false,
+ offsetof(struct ctdb_tunable_list, recover_interval) },
+ { "ElectionTimeout", 3, false,
+ offsetof(struct ctdb_tunable_list, election_timeout) },
+ { "TakeoverTimeout", 9, false,
+ offsetof(struct ctdb_tunable_list, takeover_timeout) },
+ { "MonitorInterval", 15, false,
+ offsetof(struct ctdb_tunable_list, monitor_interval) },
+ { "TickleUpdateInterval", 20, false,
+ offsetof(struct ctdb_tunable_list, tickle_update_interval) },
+ { "EventScriptTimeout", 30, false,
+ offsetof(struct ctdb_tunable_list, script_timeout) },
+ { "MonitorTimeoutCount", 20, false,
+ offsetof(struct ctdb_tunable_list, monitor_timeout_count) },
+ { "EventScriptUnhealthyOnTimeout", 0, true,
+ offsetof(struct ctdb_tunable_list, script_unhealthy_on_timeout) },
+ { "RecoveryGracePeriod", 120, false,
+ offsetof(struct ctdb_tunable_list, recovery_grace_period) },
+ { "RecoveryBanPeriod", 300, false,
+ offsetof(struct ctdb_tunable_list, recovery_ban_period) },
+ { "DatabaseHashSize", 100001, false,
+ offsetof(struct ctdb_tunable_list, database_hash_size) },
+ { "DatabaseMaxDead", 5, false,
+ offsetof(struct ctdb_tunable_list, database_max_dead) },
+ { "RerecoveryTimeout", 10, false,
+ offsetof(struct ctdb_tunable_list, rerecovery_timeout) },
+ { "EnableBans", 1, false,
+ offsetof(struct ctdb_tunable_list, enable_bans) },
+ { "DeterministicIPs", 0, true,
+ offsetof(struct ctdb_tunable_list, deterministic_public_ips) },
+ { "LCP2PublicIPs", 1, true,
+ offsetof(struct ctdb_tunable_list, lcp2_public_ip_assignment) },
+ { "ReclockPingPeriod", 60, true,
+ offsetof(struct ctdb_tunable_list, reclock_ping_period) },
+ { "NoIPFailback", 0, false,
+ offsetof(struct ctdb_tunable_list, no_ip_failback) },
+ { "DisableIPFailover", 0, true,
+ offsetof(struct ctdb_tunable_list, disable_ip_failover) },
+ { "VerboseMemoryNames", 0, false,
+ offsetof(struct ctdb_tunable_list, verbose_memory_names) },
+ { "RecdPingTimeout", 60, false,
+ offsetof(struct ctdb_tunable_list, recd_ping_timeout) },
+ { "RecdFailCount", 10, false,
+ offsetof(struct ctdb_tunable_list, recd_ping_failcount) },
+ { "LogLatencyMs", 0, false,
+ offsetof(struct ctdb_tunable_list, log_latency_ms) },
+ { "RecLockLatencyMs", 1000, false,
+ offsetof(struct ctdb_tunable_list, reclock_latency_ms) },
+ { "RecoveryDropAllIPs", 120, false,
+ offsetof(struct ctdb_tunable_list, recovery_drop_all_ips) },
+ { "VerifyRecoveryLock", 1, true,
+ offsetof(struct ctdb_tunable_list, verify_recovery_lock) },
+ { "VacuumInterval", 10, false,
+ offsetof(struct ctdb_tunable_list, vacuum_interval) },
+ { "VacuumMaxRunTime", 120, false,
+ offsetof(struct ctdb_tunable_list, vacuum_max_run_time) },
+ { "RepackLimit", 10*1000, false,
+ offsetof(struct ctdb_tunable_list, repack_limit) },
+ { "VacuumLimit", 5*1000, true,
+ offsetof(struct ctdb_tunable_list, vacuum_limit) },
+ { "VacuumFastPathCount", 60, false,
+ offsetof(struct ctdb_tunable_list, vacuum_fast_path_count) },
+ { "MaxQueueDropMsg", 1000*1000, false,
+ offsetof(struct ctdb_tunable_list, max_queue_depth_drop_msg) },
+ { "AllowUnhealthyDBRead", 0, false,
+ offsetof(struct ctdb_tunable_list, allow_unhealthy_db_read) },
+ { "StatHistoryInterval", 1, false,
+ offsetof(struct ctdb_tunable_list, stat_history_interval) },
+ { "DeferredAttachTO", 120, false,
+ offsetof(struct ctdb_tunable_list, deferred_attach_timeout) },
+ { "AllowClientDBAttach", 1, false,
+ offsetof(struct ctdb_tunable_list, allow_client_db_attach) },
+ { "RecoverPDBBySeqNum", 1, true,
+ offsetof(struct ctdb_tunable_list, recover_pdb_by_seqnum) },
+ { "DeferredRebalanceOnNodeAdd", 300, true,
+ offsetof(struct ctdb_tunable_list, deferred_rebalance_on_node_add) },
+ { "FetchCollapse", 1, false,
+ offsetof(struct ctdb_tunable_list, fetch_collapse) },
+ { "HopcountMakeSticky", 50, false,
+ offsetof(struct ctdb_tunable_list, hopcount_make_sticky) },
+ { "StickyDuration", 600, false,
+ offsetof(struct ctdb_tunable_list, sticky_duration) },
+ { "StickyPindown", 200, false,
+ offsetof(struct ctdb_tunable_list, sticky_pindown) },
+ { "NoIPTakeover", 0, false,
+ offsetof(struct ctdb_tunable_list, no_ip_takeover) },
+ { "DBRecordCountWarn", 100*1000, false,
+ offsetof(struct ctdb_tunable_list, db_record_count_warn) },
+ { "DBRecordSizeWarn", 10*1000*1000, false,
+ offsetof(struct ctdb_tunable_list, db_record_size_warn) },
+ { "DBSizeWarn", 100*1000*1000, false,
+ offsetof(struct ctdb_tunable_list, db_size_warn) },
+ { "PullDBPreallocation", 10*1024*1024, false,
+ offsetof(struct ctdb_tunable_list, pulldb_preallocation_size) },
+ { "NoIPHostOnAllDisabled", 1, true,
+ offsetof(struct ctdb_tunable_list, no_ip_host_on_all_disabled) },
+ { "Samba3AvoidDeadlocks", 0, true,
+ offsetof(struct ctdb_tunable_list, samba3_hack) },
+ { "TDBMutexEnabled", 1, true,
+ offsetof(struct ctdb_tunable_list, mutex_enabled) },
+ { "LockProcessesPerDB", 200, false,
+ offsetof(struct ctdb_tunable_list, lock_processes_per_db) },
+ { "RecBufferSizeLimit", 1000*1000, false,
+ offsetof(struct ctdb_tunable_list, rec_buffer_size_limit) },
+ { "QueueBufferSize", 1024, false,
+ offsetof(struct ctdb_tunable_list, queue_buffer_size) },
+ { "IPAllocAlgorithm", 2, false,
+ offsetof(struct ctdb_tunable_list, ip_alloc_algorithm) },
+ { "AllowMixedVersions", 0, false,
+ offsetof(struct ctdb_tunable_list, allow_mixed_versions) },
+ { .obsolete = true, }
+};
+
+void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ size_t offset = tunable_map[i].offset;
+ uint32_t value = tunable_map[i].value;
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list + offset);
+ *value_ptr = value;
+ }
+}
+
+bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t *value)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (strcasecmp(tunable_map[i].label, tunable_str) == 0) {
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list +
+ tunable_map[i].offset);
+ *value = *value_ptr;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t value,
+ bool *obsolete)
+{
+ int i;
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (strcasecmp(tunable_map[i].label, tunable_str) == 0) {
+ uint32_t *value_ptr;
+
+ value_ptr = (uint32_t *)((uint8_t *)tun_list +
+ tunable_map[i].offset);
+ *value_ptr = value;
+ if (obsolete != NULL) {
+ *obsolete = tunable_map[i].obsolete;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx)
+{
+ struct ctdb_var_list *list;
+ int i;
+
+ list = talloc_zero(mem_ctx, struct ctdb_var_list);
+ if (list == NULL) {
+ return NULL;
+ }
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (tunable_map[i].obsolete) {
+ continue;
+ }
+
+ list->var = talloc_realloc(list, list->var, const char *,
+ list->count + 1);
+ if (list->var == NULL) {
+ goto fail;
+ }
+
+ list->var[list->count] = talloc_strdup(list,
+ tunable_map[i].label);
+ if (list->var[list->count] == NULL) {
+ goto fail;
+ }
+
+ list->count += 1;
+ }
+
+ return list;
+
+fail:
+ TALLOC_FREE(list);
+ return NULL;
+}
+
+char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx)
+{
+ char *str = NULL;
+ int i;
+
+ str = talloc_strdup(mem_ctx, ":");
+ if (str == NULL) {
+ return NULL;
+ }
+
+ for (i=0; tunable_map[i].label != NULL; i++) {
+ if (tunable_map[i].obsolete) {
+ continue;
+ }
+
+ str = talloc_asprintf_append(str, "%s:",
+ tunable_map[i].label);
+ if (str == NULL) {
+ return NULL;
+ }
+ }
+
+ /* Remove the last ':' */
+ str[strlen(str)-1] = '\0';
+
+ return str;
+}
+
+struct tunable_load_state {
+ struct ctdb_tunable_list *tun_list;
+ bool status;
+ const char *func;
+};
+
+static bool tunable_section(const char *section, void *private_data)
+{
+ struct tunable_load_state *state =
+ (struct tunable_load_state *)private_data;
+
+ D_ERR("%s: Invalid line for section [%s] - sections not supported \n",
+ state->func,
+ section);
+ state->status = false;
+
+ return true;
+}
+
+static bool tunable_option(const char *name,
+ const char *value,
+ void *private_data)
+{
+ struct tunable_load_state *state =
+ (struct tunable_load_state *)private_data;
+ unsigned long num;
+ bool obsolete;
+ bool ok;
+ int ret;
+
+ if (value[0] == '\0') {
+ D_ERR("%s: Invalid line containing \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+
+ num = smb_strtoul(value, NULL, 0, &ret, SMB_STR_FULL_STR_CONV);
+ if (ret != 0) {
+ D_ERR("%s: Invalid value \"%s\" for tunable \"%s\"\n",
+ state->func,
+ value,
+ name);
+ state->status = false;
+ return true;
+ }
+
+ ok = ctdb_tunable_set_value(state->tun_list,
+ name,
+ (uint32_t)num,
+ &obsolete);
+ if (!ok) {
+ D_ERR("%s: Unknown tunable \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+ if (obsolete) {
+ D_ERR("%s: Obsolete tunable \"%s\"\n", state->func, name);
+ state->status = false;
+ return true;
+ }
+
+ return true;
+}
+
+bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list *tun_list,
+ const char *file)
+{
+ struct tunable_load_state state = {
+ .tun_list = tun_list,
+ .status = true,
+ .func = __FUNCTION__,
+ };
+ FILE *fp;
+ bool status;
+
+ ctdb_tunable_set_defaults(tun_list);
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ if (errno == ENOENT) {
+ /* Doesn't need to exist */
+ return true;
+ }
+
+ DBG_ERR("Failed to open %s\n", file);
+ return false;
+ }
+
+ D_NOTICE("Loading tunables from %s\n", file);
+ /*
+ * allow_empty_value=true is somewhat counter-intuitive.
+ * However, if allow_empty_value=false then a tunable with no
+ * equals or value is regarded as empty and is simply ignored.
+ * Use true so an "empty value" can be caught in
+ * tunable_option().
+ *
+ * tunable_section() and tunable_option() return true while
+ * setting state.status=false, allowing all possible errors
+ * with tunables and values to be reported. This helps to
+ * avoid a potential game of whack-a-mole in a well-formed
+ * file with multiple minor errors.
+ */
+ status = tini_parse(fp, true, tunable_section, tunable_option, &state);
+
+ fclose(fp);
+
+ if (!status) {
+ DBG_ERR("Syntax error\n");
+ }
+
+ return status && state.status;
+}
diff --git a/ctdb/common/tunable.h b/ctdb/common/tunable.h
new file mode 100644
index 0000000..89f99f1
--- /dev/null
+++ b/ctdb/common/tunable.h
@@ -0,0 +1,35 @@
+/*
+ Tunable utilities
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __CTDB_TUNABLE_H__
+#define __CTDB_TUNABLE_H__
+
+void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list);
+bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t *value);
+bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list,
+ const char *tunable_str, uint32_t value,
+ bool *obsolete);
+struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx);
+char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx);
+bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx,
+ struct ctdb_tunable_list *tun_list,
+ const char *file);
+
+#endif /* __CTDB_TUNABLE_H__ */