diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:47:29 +0000 |
commit | 4f5791ebd03eaec1c7da0865a383175b05102712 (patch) | |
tree | 8ce7b00f7a76baa386372422adebbe64510812d4 /ctdb/common | |
parent | Initial commit. (diff) | |
download | samba-upstream.tar.xz samba-upstream.zip |
Adding upstream version 2:4.17.12+dfsg.upstream/2%4.17.12+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ctdb/common')
58 files changed, 17126 insertions, 0 deletions
diff --git a/ctdb/common/cmdline.c b/ctdb/common/cmdline.c new file mode 100644 index 0000000..ce368a9 --- /dev/null +++ b/ctdb/common/cmdline.c @@ -0,0 +1,598 @@ +/* + Command line processing + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <popt.h> +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/debug.h" + +#include "common/cmdline.h" + +#define CMDLINE_MAX_LEN 80 + +struct cmdline_section { + const char *name; + struct cmdline_command *commands; +}; + +struct cmdline_context { + const char *prog; + struct poptOption *options; + struct cmdline_section *section; + int num_sections; + size_t max_len; + poptContext pc; + int argc, arg0; + const char **argv; + struct cmdline_command *match_cmd; +}; + +static bool cmdline_show_help = false; + +static void cmdline_popt_help(poptContext pc, + enum poptCallbackReason reason, + struct poptOption *key, + const char *arg, + void *data) +{ + if (key->shortName == 'h') { + cmdline_show_help = true; + } +} + +struct poptOption cmdline_help_options[] = { + { NULL, '\0', POPT_ARG_CALLBACK, cmdline_popt_help, 0, NULL, NULL }, + { "help", 'h', 0, NULL, 'h', "Show this help message", NULL }, + POPT_TABLEEND +}; + +#define CMDLINE_HELP_OPTIONS \ + { NULL, '\0', POPT_ARG_INCLUDE_TABLE, cmdline_help_options, \ + 0, "Help Options:", NULL } + +static bool cmdline_option_check(struct poptOption *option) +{ + if (option->longName == NULL) { + D_ERR("Option has no long name\n"); + return false; + } + + if (option->argInfo != POPT_ARG_STRING && + option->argInfo != POPT_ARG_INT && + option->argInfo != POPT_ARG_LONG && + option->argInfo != POPT_ARG_VAL && + option->argInfo != POPT_ARG_FLOAT && + option->argInfo != POPT_ARG_DOUBLE) { + D_ERR("Option '%s' has unsupported type\n", option->longName); + return false; + } + + if (option->arg == NULL) { + D_ERR("Option '%s' has invalid arg\n", option->longName); + return false; + } + + if (option->descrip == NULL) { + D_ERR("Option '%s' has no help msg\n", option->longName); + return false; + } + + return true; +} + +static bool cmdline_options_check(struct poptOption *options) +{ + int i; + bool ok; + + if (options == NULL) { + return true; + } + + i = 0; + while (options[i].longName != NULL || options[i].shortName != '\0') { + ok = cmdline_option_check(&options[i]); + if (!ok) { + return false; + } + i++; + } + + return true; +} + +static int cmdline_options_define(TALLOC_CTX *mem_ctx, + struct poptOption *user_options, + struct poptOption **result) +{ + struct poptOption *options; + int count, i; + + count = (user_options == NULL ? 2 : 3); + + options = talloc_array(mem_ctx, struct poptOption, count); + if (options == NULL) { + return ENOMEM; + } + + i = 0; + options[i++] = (struct poptOption) CMDLINE_HELP_OPTIONS; + if (user_options != NULL) { + options[i++] = (struct poptOption) { + .argInfo = POPT_ARG_INCLUDE_TABLE, + .arg = user_options, + .descrip = "Options:", + }; + } + options[i++] = (struct poptOption) POPT_TABLEEND; + + *result = options; + return 0; +} + +static bool cmdline_command_check(struct cmdline_command *cmd, size_t *max_len) +{ + size_t len; + + if (cmd->name == NULL) { + return false; + } + + if (cmd->fn == NULL) { + D_ERR("Command '%s' has no implementation function\n", + cmd->name); + return false; + } + + if (cmd->msg_help == NULL) { + D_ERR("Command '%s' has no help msg\n", cmd->name); + return false; + } + + len = strlen(cmd->name); + if (cmd->msg_args != NULL) { + len += strlen(cmd->msg_args); + } + if (len > CMDLINE_MAX_LEN) { + D_ERR("Command '%s' is too long (%zu)\n", cmd->name, len); + return false; + } + + if (len > *max_len) { + *max_len = len; + } + + len = strlen(cmd->msg_help); + if (len > CMDLINE_MAX_LEN) { + D_ERR("Command '%s' help too long (%zu)\n", cmd->name, len); + return false; + } + + return true; +} + +static bool cmdline_commands_check(struct cmdline_command *commands, + size_t *max_len) +{ + int i; + bool ok; + + if (commands == NULL) { + return false; + } + + for (i=0; commands[i].name != NULL; i++) { + ok = cmdline_command_check(&commands[i], max_len); + if (!ok) { + return false; + } + } + + return true; +} + +static int cmdline_context_destructor(struct cmdline_context *cmdline); + +static int cmdline_section_add(struct cmdline_context *cmdline, + const char *name, + struct cmdline_command *commands) +{ + struct cmdline_section *section; + size_t max_len = 0; + bool ok; + + ok = cmdline_commands_check(commands, &max_len); + if (!ok) { + return EINVAL; + } + + section = talloc_realloc(cmdline, + cmdline->section, + struct cmdline_section, + cmdline->num_sections + 1); + if (section == NULL) { + return ENOMEM; + } + + section[cmdline->num_sections] = (struct cmdline_section) { + .name = name, + .commands = commands, + }; + + if (max_len > cmdline->max_len) { + cmdline->max_len = max_len; + } + + cmdline->section = section; + cmdline->num_sections += 1; + + return 0; +} + +int cmdline_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + const char *name, + struct cmdline_command *commands, + struct cmdline_context **result) +{ + struct cmdline_context *cmdline; + int ret; + bool ok; + + if (prog == NULL) { + return EINVAL; + } + + ok = cmdline_options_check(options); + if (!ok) { + return EINVAL; + } + + cmdline = talloc_zero(mem_ctx, struct cmdline_context); + if (cmdline == NULL) { + return ENOMEM; + } + + cmdline->prog = talloc_strdup(cmdline, prog); + if (cmdline->prog == NULL) { + talloc_free(cmdline); + return ENOMEM; + } + + ret = cmdline_options_define(cmdline, options, &cmdline->options); + if (ret != 0) { + talloc_free(cmdline); + return ret; + } + + ret = cmdline_section_add(cmdline, name, commands); + if (ret != 0) { + talloc_free(cmdline); + return ret; + } + + cmdline->argc = 1; + cmdline->argv = talloc_array(cmdline, const char *, 2); + if (cmdline->argv == NULL) { + talloc_free(cmdline); + return ENOMEM; + } + cmdline->argv[0] = cmdline->prog; + cmdline->argv[1] = NULL; + + /* Dummy popt context for generating help */ + cmdline->pc = poptGetContext(cmdline->prog, + cmdline->argc, + cmdline->argv, + cmdline->options, + 0); + if (cmdline->pc == NULL) { + talloc_free(cmdline); + return ENOMEM; + } + + talloc_set_destructor(cmdline, cmdline_context_destructor); + + *result = cmdline; + return 0; +} + +static int cmdline_context_destructor(struct cmdline_context *cmdline) +{ + if (cmdline->pc != NULL) { + poptFreeContext(cmdline->pc); + } + + return 0; +} + +int cmdline_add(struct cmdline_context *cmdline, + const char *name, + struct cmdline_command *commands) +{ + return cmdline_section_add(cmdline, name, commands); +} + +static int cmdline_parse_options(struct cmdline_context *cmdline, + int argc, + const char **argv) +{ + int opt; + + if (cmdline->pc != NULL) { + poptFreeContext(cmdline->pc); + } + + cmdline->pc = poptGetContext(cmdline->prog, + argc, + argv, + cmdline->options, + 0); + if (cmdline->pc == NULL) { + return ENOMEM; + } + + while ((opt = poptGetNextOpt(cmdline->pc)) != -1) { + D_ERR("Invalid option %s: %s\n", + poptBadOption(cmdline->pc, 0), + poptStrerror(opt)); + return EINVAL; + } + + /* Set up remaining arguments for commands */ + cmdline->argc = 0; + cmdline->argv = poptGetArgs(cmdline->pc); + if (cmdline->argv != NULL) { + while (cmdline->argv[cmdline->argc] != NULL) { + cmdline->argc++; + } + } + + return 0; +} + +static int cmdline_match_section(struct cmdline_context *cmdline, + struct cmdline_section *section) +{ + int i; + + for (i=0; section->commands[i].name != NULL; i++) { + struct cmdline_command *cmd; + char name[CMDLINE_MAX_LEN+1]; + size_t len; + char *t, *str; + int n = 0; + bool match = false; + + cmd = §ion->commands[i]; + len = strlcpy(name, cmd->name, sizeof(name)); + if (len >= sizeof(name)) { + D_ERR("Skipping long command '%s'\n", cmd->name); + continue; + } + + str = name; + while ((t = strtok(str, " ")) != NULL) { + if (n >= cmdline->argc) { + match = false; + break; + } + if (cmdline->argv[n] == NULL) { + match = false; + break; + } + if (strcmp(cmdline->argv[n], t) == 0) { + match = true; + cmdline->arg0 = n+1; + } else { + match = false; + break; + } + + n += 1; + str = NULL; + } + + if (match) { + cmdline->match_cmd = cmd; + return 0; + } + } + + cmdline->match_cmd = NULL; + return ENOENT; +} + +static int cmdline_match(struct cmdline_context *cmdline) +{ + int i, ret = ENOENT; + + if (cmdline->argc == 0 || cmdline->argv == NULL) { + cmdline->match_cmd = NULL; + return EINVAL; + } + + for (i=0; i<cmdline->num_sections; i++) { + ret = cmdline_match_section(cmdline, &cmdline->section[i]); + if (ret == 0) { + break; + } + } + + return ret; +} + +int cmdline_parse(struct cmdline_context *cmdline, + int argc, + const char **argv, + bool parse_options) +{ + int ret; + + if (argc < 2) { + cmdline_usage(cmdline, NULL); + return EINVAL; + } + + cmdline_show_help = false; + + if (parse_options) { + ret = cmdline_parse_options(cmdline, argc, argv); + if (ret != 0) { + cmdline_usage(cmdline, NULL); + return ret; + } + } else { + cmdline->argc = argc; + cmdline->argv = argv; + } + + ret = cmdline_match(cmdline); + + if (ret != 0 || cmdline_show_help) { + const char *name = NULL; + + if (cmdline->match_cmd != NULL) { + name = cmdline->match_cmd->name; + } + + cmdline_usage(cmdline, name); + + if (cmdline_show_help) { + ret = EAGAIN; + } + } + + return ret; +} + +static void cmdline_usage_command(struct cmdline_context *cmdline, + struct cmdline_command *cmd, + bool print_all) +{ + size_t len; + + len = strlen(cmd->name); + + printf(" %s ", cmd->name); + if (print_all) { + printf("%-*s", + (int)(cmdline->max_len-len), + cmd->msg_args == NULL ? "" : cmd->msg_args); + } else { + printf("%s", cmd->msg_args == NULL ? "" : cmd->msg_args); + } + printf(" %s\n", cmd->msg_help); +} + +static void cmdline_usage_section(struct cmdline_context *cmdline, + struct cmdline_section *section) +{ + int i; + + printf("\n"); + + if (section->name != NULL) { + printf("%s ", section->name); + } + printf("Commands:\n"); + for (i=0; section->commands[i].name != NULL; i++) { + cmdline_usage_command(cmdline, §ion->commands[i], true); + + } +} + +static void cmdline_usage_full(struct cmdline_context *cmdline) +{ + int i; + + poptSetOtherOptionHelp(cmdline->pc, "[<options>] <command> [<args>]"); + poptPrintHelp(cmdline->pc, stdout, 0); + + for (i=0; i<cmdline->num_sections; i++) { + cmdline_usage_section(cmdline, &cmdline->section[i]); + } +} + +void cmdline_usage(struct cmdline_context *cmdline, const char *cmd_name) +{ + struct cmdline_command *cmd = NULL; + int i, j; + + if (cmd_name == NULL) { + cmdline_usage_full(cmdline); + return; + } + + for (j=0; j<cmdline->num_sections; j++) { + struct cmdline_section *section = &cmdline->section[j]; + + for (i=0; section->commands[i].name != NULL; i++) { + if (strcmp(section->commands[i].name, cmd_name) == 0) { + cmd = §ion->commands[i]; + break; + } + } + } + + if (cmd == NULL) { + cmdline_usage_full(cmdline); + return; + } + + poptSetOtherOptionHelp(cmdline->pc, "<command> [<args>]"); + poptPrintUsage(cmdline->pc, stdout, 0); + + printf("\n"); + cmdline_usage_command(cmdline, cmd, false); +} + +int cmdline_run(struct cmdline_context *cmdline, + void *private_data, + int *result) +{ + struct cmdline_command *cmd = cmdline->match_cmd; + TALLOC_CTX *tmp_ctx; + int ret; + + if (cmd == NULL) { + return ENOENT; + } + + tmp_ctx = talloc_new(cmdline); + if (tmp_ctx == NULL) { + return ENOMEM; + } + + ret = cmd->fn(tmp_ctx, + cmdline->argc - cmdline->arg0, + &cmdline->argv[cmdline->arg0], + private_data); + + talloc_free(tmp_ctx); + + if (result != NULL) { + *result = ret; + } + return 0; +} diff --git a/ctdb/common/cmdline.h b/ctdb/common/cmdline.h new file mode 100644 index 0000000..b9a128c --- /dev/null +++ b/ctdb/common/cmdline.h @@ -0,0 +1,163 @@ +/* + Command line processing + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_CMDLINE_H__ +#define __CTDB_CMDLINE_H__ + +#include <popt.h> +#include <talloc.h> + +/** + * @file cmdline.h + * + * @brief Command-line handling with options and commands + * + * This abstraction encapsulates the boiler-plate for parsing options, + * commands and arguments on command-line. + * + * Options handling is done using popt. + */ + +/** + * @brief Abstract data structure holding command-line configuration + */ +struct cmdline_context; + +/** + * @brief A command definition structure + * + * @name is the name of the command + * @fn is the implementation of the command + * @msg_help is the help message describing command + * @msg_args is the help message describing arguments + * + * A command name can be a single word or multiple words separated with spaces. + * + * An implementation function should return 0 on success and non-zero value + * on failure. This value is returned as result in @cmdline_run. + */ +struct cmdline_command { + const char *name; + int (*fn)(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data); + const char *msg_help; + const char *msg_args; +}; + +/** + * @brief convinience macro to define the end of commands list + * + * Here is an example of defining commands list. + * + * struct cmdline_command commands[] = { + * { "command1", command1_func, "Run command1", NULL }, + * { "command2", command2_func, "Run command2", "<filename>" }, + * CMDLINE_TABLEEND + * }; + */ +#define CMDLINE_TABLEEND { NULL, NULL, NULL, NULL } + +/** + * @brief Initialize cmdline abstraction + * + * If there are no options, options can be NULL. + * + * Help options (--help, -h) are automatically added to the options. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] prog Program name + * @param[in] options Command-line options + * @param[in] section Name of section grouping specified commands + * @param[in] commands Commands array + * @param[out] result New cmdline context + * @return 0 on success, errno on failure + * + * Freeing cmdline context will free up all the resources. + */ +int cmdline_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + const char *section, + struct cmdline_command *commands, + struct cmdline_context **result); + + +/** + * @brief Add command line section/commands + * + * @param[in] cmdline Cmdline context + * @param[in] section Name of section grouping specified commands + * @param[in] commands Commands array + * @return 0 on success, errno on failure + */ +int cmdline_add(struct cmdline_context *cmdline, + const char *section, + struct cmdline_command *commands); + +/** + * @brief Parse command line options and commands/arguments + * + * This function parses the arguments to process options and commands. + * + * This function should be passed the arguments to main() and parse_options + * should be set to true. If cmdline is used for handling second-level + * commands, then parse_options should be set to false. + * + * If argv does not match any command, then ENOENT is returned. + * + * @param[in] cmdline Cmdline context + * @param[in] argc Number of arguments + * @param[in] argv Arguments array + * @param[in] parse_options Whether to parse for options + * @return 0 on success, errno on failure + */ +int cmdline_parse(struct cmdline_context *cmdline, + int argc, + const char **argv, + bool parse_options); + +/** + * @brief Excecute the function for the command matched by @cmdline_parse + * + * @param[in] cmdline Cmdline context + * @param[in] private_data Private data for implementation function + * @param[out] result Return value from the implementation function + * @return 0 on success, errno on failure + * + * If help options are specified, then detailed help will be printed and + * the return value will be EAGAIN. + */ +int cmdline_run(struct cmdline_context *cmdline, + void *private_data, + int *result); + +/** + * @brief Print usage help message to stdout + * + * @param[in] cmdline Cmdline context + * @param[in] command Command string + * + * If command is NULL, then full help is printed. + * If command is specified, then compact help is printed. + */ +void cmdline_usage(struct cmdline_context *cmdline, const char *command); + +#endif /* __CTDB_CMDLINE_H__ */ diff --git a/ctdb/common/comm.c b/ctdb/common/comm.c new file mode 100644 index 0000000..12f4970 --- /dev/null +++ b/ctdb/common/comm.c @@ -0,0 +1,427 @@ +/* + Communication endpoint implementation + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" +#include "system/filesys.h" + +#include <talloc.h> +#include <tdb.h> + +#include "lib/util/blocking.h" +#include "lib/util/tevent_unix.h" + +#include "pkt_read.h" +#include "pkt_write.h" +#include "comm.h" + +/* + * Communication endpoint around a socket + */ + +#define SMALL_PKT_SIZE 1024 + +struct comm_context { + int fd; + comm_read_handler_fn read_handler; + void *read_private_data; + comm_dead_handler_fn dead_handler; + void *dead_private_data; + uint8_t small_pkt[SMALL_PKT_SIZE]; + struct tevent_req *read_req, *write_req; + struct tevent_fd *fde; + struct tevent_queue *queue; +}; + +static void comm_fd_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, void *private_data); +static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct comm_context *comm, + uint8_t *buf, size_t buflen); +static void comm_read_failed(struct tevent_req *req); + + +int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd, + comm_read_handler_fn read_handler, void *read_private_data, + comm_dead_handler_fn dead_handler, void *dead_private_data, + struct comm_context **result) +{ + struct comm_context *comm; + int ret; + + if (fd < 0) { + return EINVAL; + } + + if (dead_handler == NULL) { + return EINVAL; + } + + /* Socket queue relies on non-blocking sockets. */ + ret = set_blocking(fd, false); + if (ret == -1) { + return EIO; + } + + comm = talloc_zero(mem_ctx, struct comm_context); + if (comm == NULL) { + return ENOMEM; + } + + comm->fd = fd; + comm->read_handler = read_handler; + comm->read_private_data = read_private_data; + comm->dead_handler = dead_handler; + comm->dead_private_data = dead_private_data; + + comm->queue = tevent_queue_create(comm, "comm write queue"); + if (comm->queue == NULL) { + goto fail; + } + + /* Set up to write packets */ + comm->fde = tevent_add_fd(ev, comm, fd, TEVENT_FD_READ, + comm_fd_handler, comm); + if (comm->fde == NULL) { + goto fail; + } + + /* Set up to read packets */ + if (read_handler != NULL) { + struct tevent_req *req; + + req = comm_read_send(comm, ev, comm, comm->small_pkt, + SMALL_PKT_SIZE); + if (req == NULL) { + goto fail; + } + + tevent_req_set_callback(req, comm_read_failed, comm); + comm->read_req = req; + } + + *result = comm; + return 0; + +fail: + talloc_free(comm); + return ENOMEM; +} + + +/* + * Read packets + */ + +struct comm_read_state { + struct tevent_context *ev; + struct comm_context *comm; + uint8_t *buf; + size_t buflen; + struct tevent_req *subreq; +}; + +static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data); +static void comm_read_done(struct tevent_req *subreq); + +static struct tevent_req *comm_read_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct comm_context *comm, + uint8_t *buf, size_t buflen) +{ + struct tevent_req *req, *subreq; + struct comm_read_state *state; + + req = tevent_req_create(mem_ctx, &state, struct comm_read_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->comm = comm; + state->buf = buf; + state->buflen = buflen; + + subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t), + state->buf, state->buflen, + comm_read_more, NULL); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + state->subreq = subreq; + + tevent_req_set_callback(subreq, comm_read_done, req); + return req; +} + +static ssize_t comm_read_more(uint8_t *buf, size_t buflen, void *private_data) +{ + uint32_t packet_len; + + if (buflen < sizeof(uint32_t)) { + return sizeof(uint32_t) - buflen; + } + + packet_len = *(uint32_t *)buf; + + return packet_len - buflen; +} + +static void comm_read_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct comm_read_state *state = tevent_req_data( + req, struct comm_read_state); + struct comm_context *comm = state->comm; + ssize_t nread; + uint8_t *buf; + bool free_buf; + int err = 0; + + nread = pkt_read_recv(subreq, state, &buf, &free_buf, &err); + TALLOC_FREE(subreq); + state->subreq = NULL; + if (nread == -1) { + tevent_req_error(req, err); + return; + } + + comm->read_handler(buf, nread, comm->read_private_data); + + if (free_buf) { + talloc_free(buf); + } + + subreq = pkt_read_send(state, state->ev, comm->fd, sizeof(uint32_t), + state->buf, state->buflen, + comm_read_more, NULL); + if (tevent_req_nomem(subreq, req)) { + return; + } + state->subreq = subreq; + + tevent_req_set_callback(subreq, comm_read_done, req); +} + +static void comm_read_recv(struct tevent_req *req, int *perr) +{ + int err; + + if (tevent_req_is_unix_error(req, &err)) { + if (perr != NULL) { + *perr = err; + } + } +} + +static void comm_read_failed(struct tevent_req *req) +{ + struct comm_context *comm = tevent_req_callback_data( + req, struct comm_context); + + comm_read_recv(req, NULL); + TALLOC_FREE(req); + comm->read_req = NULL; + if (comm->dead_handler != NULL) { + comm->dead_handler(comm->dead_private_data); + } +} + + +/* + * Write packets + */ + +struct comm_write_entry { + struct comm_context *comm; + struct tevent_queue_entry *qentry; + struct tevent_req *req; +}; + +struct comm_write_state { + struct tevent_context *ev; + struct comm_context *comm; + struct comm_write_entry *entry; + struct tevent_req *subreq; + uint8_t *buf; + size_t buflen, nwritten; +}; + +static int comm_write_entry_destructor(struct comm_write_entry *entry); +static void comm_write_trigger(struct tevent_req *req, void *private_data); +static void comm_write_done(struct tevent_req *subreq); + +struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct comm_context *comm, + uint8_t *buf, size_t buflen) +{ + struct tevent_req *req; + struct comm_write_state *state; + struct comm_write_entry *entry; + + req = tevent_req_create(mem_ctx, &state, struct comm_write_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->comm = comm; + state->buf = buf; + state->buflen = buflen; + + entry = talloc_zero(state, struct comm_write_entry); + if (tevent_req_nomem(entry, req)) { + return tevent_req_post(req, ev); + } + + entry->comm = comm; + entry->req = req; + entry->qentry = tevent_queue_add_entry(comm->queue, ev, req, + comm_write_trigger, NULL); + if (tevent_req_nomem(entry->qentry, req)) { + return tevent_req_post(req, ev); + } + + state->entry = entry; + talloc_set_destructor(entry, comm_write_entry_destructor); + + return req; +} + +static int comm_write_entry_destructor(struct comm_write_entry *entry) +{ + struct comm_context *comm = entry->comm; + + if (comm->write_req == entry->req) { + comm->write_req = NULL; + TEVENT_FD_NOT_WRITEABLE(comm->fde); + } + + TALLOC_FREE(entry->qentry); + return 0; +} + +static void comm_write_trigger(struct tevent_req *req, void *private_data) +{ + struct comm_write_state *state = tevent_req_data( + req, struct comm_write_state); + struct comm_context *comm = state->comm; + struct tevent_req *subreq; + + comm->write_req = req; + + subreq = pkt_write_send(state, state->ev, comm->fd, + state->buf, state->buflen); + if (tevent_req_nomem(subreq, req)) { + return; + } + + state->subreq = subreq; + tevent_req_set_callback(subreq, comm_write_done, req); + TEVENT_FD_WRITEABLE(comm->fde); +} + +static void comm_write_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct comm_write_state *state = tevent_req_data( + req, struct comm_write_state); + struct comm_context *comm = state->comm; + ssize_t nwritten; + int err = 0; + + TEVENT_FD_NOT_WRITEABLE(comm->fde); + nwritten = pkt_write_recv(subreq, &err); + TALLOC_FREE(subreq); + state->subreq = NULL; + comm->write_req = NULL; + if (nwritten == -1) { + if (err == EPIPE) { + comm->dead_handler(comm->dead_private_data); + } + tevent_req_error(req, err); + return; + } + + state->nwritten = nwritten; + state->entry->qentry = NULL; + TALLOC_FREE(state->entry); + tevent_req_done(req); +} + +bool comm_write_recv(struct tevent_req *req, int *perr) +{ + struct comm_write_state *state = tevent_req_data( + req, struct comm_write_state); + int err; + + if (tevent_req_is_unix_error(req, &err)) { + if (perr != NULL) { + *perr = err; + } + return false; + } + + if (state->nwritten != state->buflen) { + *perr = EIO; + return false; + } + + *perr = 0; + return true; +} + +static void comm_fd_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, void *private_data) +{ + struct comm_context *comm = talloc_get_type_abort( + private_data, struct comm_context); + + if (flags & TEVENT_FD_READ) { + struct comm_read_state *read_state; + + if (comm->read_req == NULL) { + /* This should never happen */ + abort(); + } + + read_state = tevent_req_data(comm->read_req, + struct comm_read_state); + pkt_read_handler(ev, fde, flags, read_state->subreq); + } + + if (flags & TEVENT_FD_WRITE) { + struct comm_write_state *write_state; + + if (comm->write_req == NULL) { + TEVENT_FD_NOT_WRITEABLE(comm->fde); + return; + } + + write_state = tevent_req_data(comm->write_req, + struct comm_write_state); + pkt_write_handler(ev, fde, flags, write_state->subreq); + } +} diff --git a/ctdb/common/comm.h b/ctdb/common/comm.h new file mode 100644 index 0000000..e11d38e --- /dev/null +++ b/ctdb/common/comm.h @@ -0,0 +1,101 @@ +/* + Communication endpoint API + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_COMM_H__ +#define __CTDB_COMM_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file comm.h + * + * @brief Communication over a socket or file descriptor + * + * This abstraction is a wrapper around a socket or file descriptor to + * send/receive complete packets. + */ + +/** + * @brief Packet handler function + * + * This function is registered while setting up communication endpoint. Any + * time packets are read, this function is called. + */ +typedef void (*comm_read_handler_fn)(uint8_t *buf, size_t buflen, + void *private_data); + +/** + * @brief Communication endpoint dead handler function + * + * This function is called when the communication endpoint is closed. + */ +typedef void (*comm_dead_handler_fn)(void *private_data); + +/** + * @brief Abstract struct to store communication endpoint details + */ +struct comm_context; + +/** + * @brief Initialize the communication endpoint + * + * This return a new communication context. Freeing this context will free all + * memory associated with it. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] fd The socket or file descriptor + * @param[in] read_handler The packet handler function + * @param[in] read_private_data Private data for read handler function + * @param[in] dead_handler The communication dead handler function + * @param[in] dead_private_data Private data for dead handler function + * @param[out] result The new comm_context structure + * @return 0 on success, errno on failure + */ +int comm_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, int fd, + comm_read_handler_fn read_handler, void *read_private_data, + comm_dead_handler_fn dead_handler, void *dead_private_data, + struct comm_context **result); + +/** + * @brief Async computation start to send a packet + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] comm Communication context + * @param[in] buf The packet data + * @param[in] buflen The size of the packet + * @return new tevent request, or NULL on failure + */ +struct tevent_req *comm_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct comm_context *comm, + uint8_t *buf, size_t buflen); + +/** + * @brief Async computation end to send a packet + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @return true on success, false on failure + */ +bool comm_write_recv(struct tevent_req *req, int *perr); + +#endif /* __CTDB_COMM_H__ */ diff --git a/ctdb/common/common.h b/ctdb/common/common.h new file mode 100644 index 0000000..c50b52a --- /dev/null +++ b/ctdb/common/common.h @@ -0,0 +1,159 @@ +/* + ctdb database library + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_COMMON_H__ +#define __CTDB_COMMON_H__ + +#include "lib/util/attr.h" + +/* From common/ctdb_io.c */ + +typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length, + void *private_data); + +uint32_t ctdb_queue_length(struct ctdb_queue *queue); + +int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length); + +int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd); + +struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb, + TALLOC_CTX *mem_ctx, int fd, int alignment, + ctdb_queue_cb_fn_t callback, + void *private_data, const char *fmt, ...) + PRINTF_ATTRIBUTE(7,8); + +/* From common/ctdb_ltdb.c */ + +int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex); + +struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, + const char *name); + +bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db); +bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db); +bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db); + +bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db); +void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db); +void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db); + +bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db); +void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db); + +uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key); + +int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, + TDB_DATA key, struct ctdb_ltdb_header *header, + TALLOC_CTX *mem_ctx, TDB_DATA *data); + +int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, + struct ctdb_ltdb_header *header, TDB_DATA data); + +int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key); + +int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key); + +int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key); + +int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn); + +typedef void (*ctdb_trackingdb_cb)(struct ctdb_context *ctdb, uint32_t pnn, + void *private_data); + +void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data, + ctdb_trackingdb_cb cb, void *private_data); + +int ctdb_null_func(struct ctdb_call_info *call); + +int ctdb_fetch_func(struct ctdb_call_info *call); + +int ctdb_fetch_with_header_func(struct ctdb_call_info *call); + +/* from common/ctdb_util.c */ + +const char *ctdb_errstr(struct ctdb_context *ctdb); + +void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) + PRINTF_ATTRIBUTE(2,3); + +void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) _NORETURN_; + +void ctdb_die(struct ctdb_context *ctdb, const char *msg) _NORETURN_; + +bool ctdb_set_helper(const char *type, char *helper, size_t size, + const char *envvar, + const char *dir, const char *file); + +int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str, + ctdb_sock_addr *address); + +bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2); + +uint32_t ctdb_hash(const TDB_DATA *key); + +struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data); + +struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx, + struct ctdb_marshall_buffer *m, + uint32_t db_id, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data); + +TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m); + +struct ctdb_rec_data_old *ctdb_marshall_loop_next( + struct ctdb_marshall_buffer *m, + struct ctdb_rec_data_old *r, + uint32_t *reqid, + struct ctdb_ltdb_header *header, + TDB_DATA *key, TDB_DATA *data); + +void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip); + +bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2); + +bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2); + +char *ctdb_addr_to_str(ctdb_sock_addr *addr); + +unsigned ctdb_addr_to_port(ctdb_sock_addr *addr); + +struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx, + const char *nlist); + +struct ctdb_node_map_old *ctdb_node_list_to_map(struct ctdb_node **nodes, + uint32_t num_nodes, + TALLOC_CTX *mem_ctx); + +const char *runstate_to_string(enum ctdb_runstate runstate); + +enum ctdb_runstate runstate_from_string(const char *label); + +void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate); + +uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key); + +#endif /* __CTDB_COMMON_H__ */ diff --git a/ctdb/common/conf.c b/ctdb/common/conf.c new file mode 100644 index 0000000..e849ff4 --- /dev/null +++ b/ctdb/common/conf.c @@ -0,0 +1,1391 @@ +/* + Configuration file handling on top of tini + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/locale.h" + +#include <talloc.h> + +#include "lib/util/dlinklist.h" +#include "lib/util/tini.h" +#include "lib/util/debug.h" + +#include "common/conf.h" + +struct conf_value { + enum conf_type type; + union { + const char *string; + int integer; + bool boolean; + } data; +}; + +union conf_pointer { + const char **string; + int *integer; + bool *boolean; +}; + +struct conf_option { + struct conf_option *prev, *next; + + const char *name; + enum conf_type type; + void *validate; + + struct conf_value default_value; + bool default_set; + + struct conf_value *value, *new_value; + union conf_pointer ptr; + bool temporary_modified; +}; + +struct conf_section { + struct conf_section *prev, *next; + + const char *name; + conf_validate_section_fn validate; + struct conf_option *option; +}; + +struct conf_context { + const char *filename; + struct conf_section *section; + bool define_failed; + bool ignore_unknown; + bool reload; + bool validation_active; +}; + +/* + * Functions related to conf_value + */ + +static int string_to_string(TALLOC_CTX *mem_ctx, + const char *str, + const char **str_val) +{ + char *t; + + if (str == NULL) { + return EINVAL; + } + + t = talloc_strdup(mem_ctx, str); + if (t == NULL) { + return ENOMEM; + } + + *str_val = t; + return 0; +} + +static int string_to_integer(const char *str, int *int_val) +{ + long t; + char *endptr = NULL; + + if (str == NULL) { + return EINVAL; + } + + t = strtol(str, &endptr, 0); + if (*str != '\0' || endptr == NULL) { + if (t < 0 || t > INT_MAX) { + return EINVAL; + } + + *int_val = (int)t; + return 0; + } + + return EINVAL; +} + +static int string_to_boolean(const char *str, bool *bool_val) +{ + if (strcasecmp(str, "true") == 0 || strcasecmp(str, "yes") == 0) { + *bool_val = true; + return 0; + } + + if (strcasecmp(str, "false") == 0 || strcasecmp(str, "no") == 0) { + *bool_val = false; + return 0; + } + + return EINVAL; +} + +static int conf_value_from_string(TALLOC_CTX *mem_ctx, + const char *str, + struct conf_value *value) +{ + int ret; + + switch (value->type) { + case CONF_STRING: + ret = string_to_string(mem_ctx, str, &value->data.string); + break; + + case CONF_INTEGER: + ret = string_to_integer(str, &value->data.integer); + break; + + case CONF_BOOLEAN: + ret = string_to_boolean(str, &value->data.boolean); + break; + + default: + return EINVAL; + } + + return ret; +} + +static bool conf_value_compare(struct conf_value *old, struct conf_value *new) +{ + if (old == NULL || new == NULL) { + return false; + } + + if (old->type != new->type) { + return false; + } + + switch (old->type) { + case CONF_STRING: + if (old->data.string == NULL && new->data.string == NULL) { + return true; + } + if (old->data.string != NULL && new->data.string != NULL) { + if (strcmp(old->data.string, new->data.string) == 0) { + return true; + } + } + break; + + case CONF_INTEGER: + if (old->data.integer == new->data.integer) { + return true; + } + break; + + case CONF_BOOLEAN: + if (old->data.boolean == new->data.boolean) { + return true; + } + break; + } + + return false; +} + +static int conf_value_copy(TALLOC_CTX *mem_ctx, + struct conf_value *src, + struct conf_value *dst) +{ + if (src->type != dst->type) { + return EINVAL; + } + + switch (src->type) { + case CONF_STRING: + if (dst->data.string != NULL) { + talloc_free(discard_const(dst->data.string)); + } + if (src->data.string == NULL) { + dst->data.string = NULL; + } else { + dst->data.string = talloc_strdup( + mem_ctx, src->data.string); + if (dst->data.string == NULL) { + return ENOMEM; + } + } + break; + + case CONF_INTEGER: + dst->data.integer = src->data.integer; + break; + + case CONF_BOOLEAN: + dst->data.boolean = src->data.boolean; + break; + + default: + return EINVAL; + } + + return 0; +} + +static void conf_value_dump(const char *key, + struct conf_value *value, + bool is_default, + bool is_temporary, + FILE *fp) +{ + if ((value->type == CONF_STRING && value->data.string == NULL) || + is_default) { + fprintf(fp, "\t# %s = ", key); + } else { + fprintf(fp, "\t%s = ", key); + } + + switch (value->type) { + case CONF_STRING: + if (value->data.string != NULL) { + fprintf(fp, "%s", value->data.string); + } + break; + + case CONF_INTEGER: + fprintf(fp, "%d", value->data.integer); + break; + + case CONF_BOOLEAN: + fprintf(fp, "%s", (value->data.boolean ? "true" : "false")); + break; + } + + if (is_temporary) { + fprintf(fp, " # temporary"); + } + + fprintf(fp, "\n"); +} + +/* + * Functions related to conf_option + */ + +static struct conf_option *conf_option_find(struct conf_section *s, + const char *key) +{ + struct conf_option *opt; + + for (opt = s->option; opt != NULL; opt = opt->next) { + if (strcmp(opt->name, key) == 0) { + return opt; + } + } + + return NULL; +} + +static void conf_option_set_ptr_value(struct conf_option *opt) +{ + switch (opt->type) { + case CONF_STRING: + if (opt->ptr.string != NULL) { + *(opt->ptr.string) = opt->value->data.string; + } + break; + + case CONF_INTEGER: + if (opt->ptr.integer != NULL) { + *(opt->ptr.integer) = opt->value->data.integer; + } + break; + + case CONF_BOOLEAN: + if (opt->ptr.boolean != NULL) { + *(opt->ptr.boolean) = opt->value->data.boolean; + } + break; + } +} + +static void conf_option_default(struct conf_option *opt); + +static int conf_option_add(struct conf_section *s, + const char *key, + enum conf_type type, + void *validate, + struct conf_option **popt) +{ + struct conf_option *opt; + + opt = conf_option_find(s, key); + if (opt != NULL) { + D_ERR("conf: option \"%s\" already exists\n", key); + return EEXIST; + } + + opt = talloc_zero(s, struct conf_option); + if (opt == NULL) { + return ENOMEM; + } + + opt->name = talloc_strdup(opt, key); + if (opt->name == NULL) { + talloc_free(opt); + return ENOMEM; + } + + opt->type = type; + opt->validate = validate; + + DLIST_ADD_END(s->option, opt); + + if (popt != NULL) { + *popt = opt; + } + + return 0; +} + +static int conf_option_set_default(struct conf_option *opt, + struct conf_value *default_value) +{ + int ret; + + opt->default_value.type = opt->type; + + ret = conf_value_copy(opt, default_value, &opt->default_value); + if (ret != 0) { + return ret; + } + + opt->default_set = true; + opt->temporary_modified = false; + + return 0; +} + +static void conf_option_set_ptr(struct conf_option *opt, + union conf_pointer *ptr) +{ + opt->ptr = *ptr; +} + +static bool conf_option_validate_string(struct conf_option *opt, + struct conf_value *value, + enum conf_update_mode mode) +{ + conf_validate_string_option_fn validate = + (conf_validate_string_option_fn)opt->validate; + + return validate(opt->name, + opt->value->data.string, + value->data.string, + mode); +} + +static bool conf_option_validate_integer(struct conf_option *opt, + struct conf_value *value, + enum conf_update_mode mode) +{ + conf_validate_integer_option_fn validate = + (conf_validate_integer_option_fn)opt->validate; + + return validate(opt->name, + opt->value->data.integer, + value->data.integer, + mode); +} + +static bool conf_option_validate_boolean(struct conf_option *opt, + struct conf_value *value, + enum conf_update_mode mode) +{ + conf_validate_boolean_option_fn validate = + (conf_validate_boolean_option_fn)opt->validate; + + return validate(opt->name, + opt->value->data.boolean, + value->data.boolean, + mode); +} + +static bool conf_option_validate(struct conf_option *opt, + struct conf_value *value, + enum conf_update_mode mode) +{ + int ret; + + if (opt->validate == NULL) { + return true; + } + + switch (opt->type) { + case CONF_STRING: + ret = conf_option_validate_string(opt, value, mode); + break; + + case CONF_INTEGER: + ret = conf_option_validate_integer(opt, value, mode); + break; + + case CONF_BOOLEAN: + ret = conf_option_validate_boolean(opt, value, mode); + break; + + default: + ret = EINVAL; + } + + return ret; +} + +static bool conf_option_same_value(struct conf_option *opt, + struct conf_value *new_value) +{ + return conf_value_compare(opt->value, new_value); +} + +static int conf_option_new_value(struct conf_option *opt, + struct conf_value *new_value, + enum conf_update_mode mode) +{ + int ret; + bool ok; + + if (opt->new_value != &opt->default_value) { + TALLOC_FREE(opt->new_value); + } + + if (new_value == &opt->default_value) { + /* + * This happens only during load/reload. Set the value to + * default value, so if the config option is dropped from + * config file, then it get's reset to default. + */ + opt->new_value = &opt->default_value; + } else { + ok = conf_option_validate(opt, new_value, mode); + if (!ok) { + D_ERR("conf: validation for option \"%s\" failed\n", + opt->name); + return EINVAL; + } + + opt->new_value = talloc_zero(opt, struct conf_value); + if (opt->new_value == NULL) { + return ENOMEM; + } + + opt->new_value->type = opt->value->type; + ret = conf_value_copy(opt, new_value, opt->new_value); + if (ret != 0) { + return ret; + } + } + + conf_option_set_ptr_value(opt); + + if (new_value != &opt->default_value) { + if (mode == CONF_MODE_API) { + opt->temporary_modified = true; + } else { + opt->temporary_modified = false; + } + } + + return 0; +} + +static int conf_option_new_default_value(struct conf_option *opt, + enum conf_update_mode mode) +{ + return conf_option_new_value(opt, &opt->default_value, mode); +} + +static void conf_option_default(struct conf_option *opt) +{ + if (! opt->default_set) { + return; + } + + if (opt->value != &opt->default_value) { + TALLOC_FREE(opt->value); + } + + opt->value = &opt->default_value; + conf_option_set_ptr_value(opt); +} + +static void conf_option_reset(struct conf_option *opt) +{ + if (opt->new_value != &opt->default_value) { + TALLOC_FREE(opt->new_value); + } + + conf_option_set_ptr_value(opt); +} + +static void conf_option_update(struct conf_option *opt) +{ + if (opt->new_value == NULL) { + return; + } + + if (opt->value != &opt->default_value) { + TALLOC_FREE(opt->value); + } + + opt->value = opt->new_value; + opt->new_value = NULL; + + conf_option_set_ptr_value(opt); +} + +static void conf_option_reset_temporary(struct conf_option *opt) +{ + opt->temporary_modified = false; +} + +static bool conf_option_is_default(struct conf_option *opt) +{ + return (opt->value == &opt->default_value); +} + +static void conf_option_dump(struct conf_option *opt, FILE *fp) +{ + bool is_default; + + is_default = conf_option_is_default(opt); + + conf_value_dump(opt->name, + opt->value, + is_default, + opt->temporary_modified, + fp); +} + +/* + * Functions related to conf_section + */ + +static struct conf_section *conf_section_find(struct conf_context *conf, + const char *section) +{ + struct conf_section *s; + + for (s = conf->section; s != NULL; s = s->next) { + if (strcasecmp(s->name, section) == 0) { + return s; + } + } + + return NULL; +} + +static int conf_section_add(struct conf_context *conf, + const char *section, + conf_validate_section_fn validate) +{ + struct conf_section *s; + + s = conf_section_find(conf, section); + if (s != NULL) { + return EEXIST; + } + + s = talloc_zero(conf, struct conf_section); + if (s == NULL) { + return ENOMEM; + } + + s->name = talloc_strdup(s, section); + if (s->name == NULL) { + talloc_free(s); + return ENOMEM; + } + + s->validate = validate; + + DLIST_ADD_END(conf->section, s); + return 0; +} + +static bool conf_section_validate(struct conf_context *conf, + struct conf_section *s, + enum conf_update_mode mode) +{ + bool ok; + + if (s->validate == NULL) { + return true; + } + + ok = s->validate(conf, s->name, mode); + if (!ok) { + D_ERR("conf: validation for section [%s] failed\n", s->name); + } + + return ok; +} + +static void conf_section_dump(struct conf_section *s, FILE *fp) +{ + fprintf(fp, "[%s]\n", s->name); +} + +/* + * Functions related to conf_context + */ + +static void conf_all_default(struct conf_context *conf) +{ + struct conf_section *s; + struct conf_option *opt; + + for (s = conf->section; s != NULL; s = s->next) { + for (opt = s->option; opt != NULL; opt = opt->next) { + conf_option_default(opt); + } + } +} + +static int conf_all_temporary_default(struct conf_context *conf, + enum conf_update_mode mode) +{ + struct conf_section *s; + struct conf_option *opt; + int ret; + + for (s = conf->section; s != NULL; s = s->next) { + for (opt = s->option; opt != NULL; opt = opt->next) { + ret = conf_option_new_default_value(opt, mode); + if (ret != 0) { + return ret; + } + } + } + + return 0; +} + +static void conf_all_reset(struct conf_context *conf) +{ + struct conf_section *s; + struct conf_option *opt; + + for (s = conf->section; s != NULL; s = s->next) { + for (opt = s->option; opt != NULL; opt = opt->next) { + conf_option_reset(opt); + } + } +} + +static void conf_all_update(struct conf_context *conf) +{ + struct conf_section *s; + struct conf_option *opt; + + for (s = conf->section; s != NULL; s = s->next) { + for (opt = s->option; opt != NULL; opt = opt->next) { + conf_option_update(opt); + conf_option_reset_temporary(opt); + } + } +} + +/* + * API functions + */ + +int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result) +{ + struct conf_context *conf; + + conf = talloc_zero(mem_ctx, struct conf_context); + if (conf == NULL) { + return ENOMEM; + } + + conf->define_failed = false; + + *result = conf; + return 0; +} + +void conf_define_section(struct conf_context *conf, + const char *section, + conf_validate_section_fn validate) +{ + int ret; + + if (conf->define_failed) { + return; + } + + if (section == NULL) { + conf->define_failed = true; + return; + } + + ret = conf_section_add(conf, section, validate); + if (ret != 0) { + conf->define_failed = true; + return; + } +} + +static struct conf_option *conf_define(struct conf_context *conf, + const char *section, + const char *key, + enum conf_type type, + conf_validate_string_option_fn validate) +{ + struct conf_section *s; + struct conf_option *opt; + int ret; + + s = conf_section_find(conf, section); + if (s == NULL) { + D_ERR("conf: unknown section [%s]\n", section); + return NULL; + } + + if (key == NULL) { + D_ERR("conf: option name null in section [%s]\n", section); + return NULL; + } + + ret = conf_option_add(s, key, type, validate, &opt); + if (ret != 0) { + return NULL; + } + + return opt; +} + +static void conf_define_post(struct conf_context *conf, + struct conf_option *opt, + struct conf_value *default_value) +{ + int ret; + + ret = conf_option_set_default(opt, default_value); + if (ret != 0) { + conf->define_failed = true; + return; + } + + conf_option_default(opt); +} + +void conf_define_string(struct conf_context *conf, + const char *section, + const char *key, + const char *default_str_val, + conf_validate_string_option_fn validate) +{ + struct conf_option *opt; + struct conf_value default_value; + + if (! conf_valid(conf)) { + return; + } + + opt = conf_define(conf, section, key, CONF_STRING, validate); + if (opt == NULL) { + conf->define_failed = true; + return; + } + + default_value.type = CONF_STRING; + default_value.data.string = default_str_val; + + conf_define_post(conf, opt, &default_value); +} + +void conf_define_integer(struct conf_context *conf, + const char *section, + const char *key, + const int default_int_val, + conf_validate_integer_option_fn validate) +{ + struct conf_option *opt; + struct conf_value default_value; + + if (! conf_valid(conf)) { + return; + } + + opt = conf_define(conf, section, key, CONF_INTEGER, (void *)validate); + if (opt == NULL) { + conf->define_failed = true; + return; + } + + default_value.type = CONF_INTEGER; + default_value.data.integer = default_int_val; + + conf_define_post(conf, opt, &default_value); +} + + +void conf_define_boolean(struct conf_context *conf, + const char *section, + const char *key, + const bool default_bool_val, + conf_validate_boolean_option_fn validate) +{ + struct conf_option *opt; + struct conf_value default_value; + + if (! conf_valid(conf)) { + return; + } + + opt = conf_define(conf, section, key, CONF_BOOLEAN, (void *)validate); + if (opt == NULL) { + conf->define_failed = true; + return; + } + + default_value.type = CONF_BOOLEAN; + default_value.data.boolean = default_bool_val; + + conf_define_post(conf, opt, &default_value); +} + +static struct conf_option *_conf_option(struct conf_context *conf, + const char *section, + const char *key) +{ + struct conf_section *s; + struct conf_option *opt; + + s = conf_section_find(conf, section); + if (s == NULL) { + return NULL; + } + + opt = conf_option_find(s, key); + return opt; +} + +void conf_assign_string_pointer(struct conf_context *conf, + const char *section, + const char *key, + const char **str_ptr) +{ + struct conf_option *opt; + union conf_pointer ptr; + + opt = _conf_option(conf, section, key); + if (opt == NULL) { + D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key); + conf->define_failed = true; + return; + } + + if (opt->type != CONF_STRING) { + conf->define_failed = true; + return; + } + + ptr.string = str_ptr; + conf_option_set_ptr(opt, &ptr); + conf_option_set_ptr_value(opt); +} + +void conf_assign_integer_pointer(struct conf_context *conf, + const char *section, + const char *key, + int *int_ptr) +{ + struct conf_option *opt; + union conf_pointer ptr; + + opt = _conf_option(conf, section, key); + if (opt == NULL) { + D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key); + conf->define_failed = true; + return; + } + + if (opt->type != CONF_INTEGER) { + conf->define_failed = true; + return; + } + + ptr.integer = int_ptr; + conf_option_set_ptr(opt, &ptr); + conf_option_set_ptr_value(opt); +} + +void conf_assign_boolean_pointer(struct conf_context *conf, + const char *section, + const char *key, + bool *bool_ptr) +{ + struct conf_option *opt; + union conf_pointer ptr; + + opt = _conf_option(conf, section, key); + if (opt == NULL) { + D_ERR("conf: unknown option [%s] -> \"%s\"\n", section, key); + conf->define_failed = true; + return; + } + + if (opt->type != CONF_BOOLEAN) { + conf->define_failed = true; + return; + } + + ptr.boolean = bool_ptr; + conf_option_set_ptr(opt, &ptr); + conf_option_set_ptr_value(opt); +} + +bool conf_query(struct conf_context *conf, + const char *section, + const char *key, + enum conf_type *type) +{ + struct conf_section *s; + struct conf_option *opt; + + if (! conf_valid(conf)) { + return false; + } + + s = conf_section_find(conf, section); + if (s == NULL) { + return false; + } + + opt = conf_option_find(s, key); + if (opt == NULL) { + return false; + } + + if (type != NULL) { + *type = opt->type; + } + return true; +} + +bool conf_valid(struct conf_context *conf) +{ + if (conf->define_failed) { + return false; + } + + return true; +} + +void conf_set_defaults(struct conf_context *conf) +{ + conf_all_default(conf); +} + +struct conf_load_state { + struct conf_context *conf; + struct conf_section *s; + enum conf_update_mode mode; + int err; +}; + +static bool conf_load_section(const char *section, void *private_data); +static bool conf_load_option(const char *name, + const char *value_str, + void *private_data); + +static int conf_load_internal(struct conf_context *conf) +{ + struct conf_load_state state; + FILE *fp; + int ret; + bool ok; + + state = (struct conf_load_state) { + .conf = conf, + .mode = (conf->reload ? CONF_MODE_RELOAD : CONF_MODE_LOAD), + }; + + ret = conf_all_temporary_default(conf, state.mode); + if (ret != 0) { + return ret; + } + + fp = fopen(conf->filename, "r"); + if (fp == NULL) { + return errno; + } + + ok = tini_parse(fp, + false, + conf_load_section, + conf_load_option, + &state); + fclose(fp); + if (!ok) { + goto fail; + } + + /* Process the last section */ + if (state.s != NULL) { + ok = conf_section_validate(conf, state.s, state.mode); + if (!ok) { + state.err = EINVAL; + goto fail; + } + } + + if (state.err != 0) { + goto fail; + } + + conf_all_update(conf); + return 0; + +fail: + conf_all_reset(conf); + return state.err; +} + +static bool conf_load_section(const char *section, void *private_data) +{ + struct conf_load_state *state = + (struct conf_load_state *)private_data; + bool ok; + + if (state->s != NULL) { + ok = conf_section_validate(state->conf, state->s, state->mode); + if (!ok) { + state->err = EINVAL; + return true; + } + } + + state->s = conf_section_find(state->conf, section); + if (state->s == NULL) { + if (state->conf->ignore_unknown) { + D_DEBUG("conf: ignoring unknown section [%s]\n", + section); + } else { + D_ERR("conf: unknown section [%s]\n", section); + state->err = EINVAL; + return true; + } + } + + return true; +} + +static bool conf_load_option(const char *name, + const char *value_str, + void *private_data) +{ + struct conf_load_state *state = + (struct conf_load_state *)private_data; + struct conf_option *opt; + TALLOC_CTX *tmp_ctx; + struct conf_value value; + int ret; + bool ok; + + if (state->s == NULL) { + if (state->conf->ignore_unknown) { + D_DEBUG("conf: unknown section for option \"%s\"\n", + name); + return true; + } else { + D_ERR("conf: unknown section for option \"%s\"\n", + name); + state->err = EINVAL; + return true; + } + } + + opt = conf_option_find(state->s, name); + if (opt == NULL) { + if (state->conf->ignore_unknown) { + D_DEBUG("conf: unknown option [%s] -> \"%s\"\n", + state->s->name, + name); + return true; + } else { + D_ERR("conf: unknown option [%s] -> \"%s\"\n", + state->s->name, + name); + state->err = EINVAL; + return true; + } + } + + if (strlen(value_str) == 0) { + D_ERR("conf: empty value [%s] -> \"%s\"\n", + state->s->name, + name); + state->err = EINVAL; + return true; + } + + tmp_ctx = talloc_new(state->conf); + if (tmp_ctx == NULL) { + state->err = ENOMEM; + return false; + } + + value.type = opt->type; + ret = conf_value_from_string(tmp_ctx, value_str, &value); + if (ret != 0) { + D_ERR("conf: invalid value [%s] -> \"%s\" = \"%s\"\n", + state->s->name, + name, + value_str); + talloc_free(tmp_ctx); + state->err = ret; + return true; + } + + ok = conf_option_same_value(opt, &value); + if (ok) { + goto done; + } + + ret = conf_option_new_value(opt, &value, state->mode); + if (ret != 0) { + talloc_free(tmp_ctx); + state->err = ret; + return true; + } + +done: + talloc_free(tmp_ctx); + return true; + +} + +int conf_load(struct conf_context *conf, + const char *filename, + bool ignore_unknown) +{ + conf->filename = talloc_strdup(conf, filename); + if (conf->filename == NULL) { + return ENOMEM; + } + + conf->ignore_unknown = ignore_unknown; + + D_NOTICE("Reading config file %s\n", filename); + + return conf_load_internal(conf); +} + +int conf_reload(struct conf_context *conf) +{ + int ret; + + if (conf->filename == NULL) { + return EPERM; + } + + D_NOTICE("Re-reading config file %s\n", conf->filename); + + conf->reload = true; + ret = conf_load_internal(conf); + conf->reload = false; + + return ret; +} + +static int conf_set(struct conf_context *conf, + const char *section, + const char *key, + struct conf_value *value) +{ + struct conf_section *s; + struct conf_option *opt; + int ret; + bool ok; + + s = conf_section_find(conf, section); + if (s == NULL) { + return EINVAL; + } + + opt = conf_option_find(s, key); + if (opt == NULL) { + return EINVAL; + } + + if (opt->type != value->type) { + return EINVAL; + } + + ok = conf_option_same_value(opt, value); + if (ok) { + return 0; + } + + ret = conf_option_new_value(opt, value, CONF_MODE_API); + if (ret != 0) { + conf_option_reset(opt); + return ret; + } + + ok = conf_section_validate(conf, s, CONF_MODE_API); + if (!ok) { + conf_option_reset(opt); + return EINVAL; + } + + conf_option_update(opt); + return 0; +} + +int conf_set_string(struct conf_context *conf, + const char *section, + const char *key, + const char *str_val) +{ + struct conf_value value; + + value.type = CONF_STRING; + value.data.string = str_val; + + return conf_set(conf, section, key, &value); +} + +int conf_set_integer(struct conf_context *conf, + const char *section, + const char *key, + int int_val) +{ + struct conf_value value; + + value.type = CONF_INTEGER; + value.data.integer = int_val; + + return conf_set(conf, section, key, &value); +} + +int conf_set_boolean(struct conf_context *conf, + const char *section, + const char *key, + bool bool_val) +{ + struct conf_value value; + + value.type = CONF_BOOLEAN; + value.data.boolean = bool_val; + + return conf_set(conf, section, key, &value); +} + +static int conf_get(struct conf_context *conf, + const char *section, + const char *key, + enum conf_type type, + const struct conf_value **value, + bool *is_default) +{ + struct conf_section *s; + struct conf_option *opt; + + s = conf_section_find(conf, section); + if (s == NULL) { + return EINVAL; + } + + opt = conf_option_find(s, key); + if (opt == NULL) { + return EINVAL; + } + + if (opt->type != type) { + return EINVAL; + } + + *value = opt->value; + if (is_default != NULL) { + *is_default = conf_option_is_default(opt); + } + + return 0; +} + +int conf_get_string(struct conf_context *conf, + const char *section, + const char *key, + const char **str_val, + bool *is_default) +{ + const struct conf_value *value; + int ret; + + ret = conf_get(conf, section, key, CONF_STRING, &value, is_default); + if (ret != 0) { + return ret; + } + + *str_val = value->data.string; + return 0; +} + +int conf_get_integer(struct conf_context *conf, + const char *section, + const char *key, + int *int_val, + bool *is_default) +{ + const struct conf_value *value; + int ret; + + ret = conf_get(conf, section, key, CONF_INTEGER, &value, is_default); + if (ret != 0) { + return ret; + } + + *int_val = value->data.integer; + return 0; +} + +int conf_get_boolean(struct conf_context *conf, + const char *section, + const char *key, + bool *bool_val, + bool *is_default) +{ + const struct conf_value *value; + int ret; + + ret = conf_get(conf, section, key, CONF_BOOLEAN, &value, is_default); + if (ret != 0) { + return ret; + } + + *bool_val = value->data.boolean; + return 0; +} + +void conf_dump(struct conf_context *conf, FILE *fp) +{ + struct conf_section *s; + struct conf_option *opt; + + for (s = conf->section; s != NULL; s = s->next) { + conf_section_dump(s, fp); + for (opt = s->option; opt != NULL; opt = opt->next) { + conf_option_dump(opt, fp); + } + } +} diff --git a/ctdb/common/conf.h b/ctdb/common/conf.h new file mode 100644 index 0000000..6b152c1 --- /dev/null +++ b/ctdb/common/conf.h @@ -0,0 +1,473 @@ +/* + Configuration file handling on top of tini + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_CONF_H__ +#define __CTDB_CONF_H__ + +#include <stdio.h> +#include <stdbool.h> +#include <talloc.h> + +/** + * @file conf.h + * + * @brief Configuration file handling with sections and key-value pairs + * + * CTDB settings can be written in a configuration file ctdb.conf (similar to + * samba's smb.conf). Various daemons and tools will consult the configuration + * file for runtime settings. + * + * The configuration will be organized in sections depending on various + * components. Each section will have various configuration options in the form + * of key-value pairs. + * + * [section1] + * key1 = value1 + * ... + * + * [section2] + * key2 = value2 + * ... + * + * ... + * + */ + +/** + * @brief Abstract data structure holding the configuration options + */ +struct conf_context; + +/** + * @brief configuration option update mode + * + * When a value of configuration option is changed, update mode is set + * appropriately. + * + * CONF_MODE_API - value modified using set functions + * CONF_MODE_LOAD - value modified via conf_load + * CONF_MODE_RELOAD - value modified via conf_reload + */ +enum conf_update_mode { + CONF_MODE_API, + CONF_MODE_LOAD, + CONF_MODE_RELOAD, +}; + +/** + * @brief configuration option type + */ +enum conf_type { + CONF_STRING, + CONF_INTEGER, + CONF_BOOLEAN, +}; + +/** + * @brief Configuration section validation function + * + * Check if all the configuration options are consistent with each-other + */ +typedef bool (*conf_validate_section_fn)(struct conf_context *conf, + const char *section, + enum conf_update_mode mode); + +/** + * @brief Configuration option validation function for string + * + * Check if a configuration option value is valid + */ +typedef bool (*conf_validate_string_option_fn)(const char *key, + const char *old_value, + const char *new_value, + enum conf_update_mode mode); + +/** + * @brief Configuration option validation function for integer + * + * Check if a configuration option value is valid + */ +typedef bool (*conf_validate_integer_option_fn)(const char *key, + int old_value, + int new_value, + enum conf_update_mode mode); + +/** + * @brief Configuration option validation function for boolean + * + * Check if a configuration option value is valid + */ +typedef bool (*conf_validate_boolean_option_fn)(const char *key, + bool old_value, + bool new_value, + enum conf_update_mode mode); + +/** + * @brief Initialize configuration option database + * + * This return a new configuration options context. Freeing this context will + * free up all the memory associated with the configuration options. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] result The new configuration options context + * @return 0 on success, errno on failure + */ +int conf_init(TALLOC_CTX *mem_ctx, struct conf_context **result); + +/** + * @brief Define a section for organizing configuration options + * + * This functions creates a section to organize configuration option. The + * section names are case-insensitive and are always stored in lower case. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] validate The validation function for configuration options + */ +void conf_define_section(struct conf_context *conf, + const char *section, + conf_validate_section_fn validate); + +/** + * @brief Define a configuration option which has a string value + * + * This functions adds a new configuration option organized under a given + * section. Configuration options are case-insensitive and are always stored + * in lower case. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] default_value The default value for the configuration option + * @param[in] validate The validation function for the configuration option + */ +void conf_define_string(struct conf_context *conf, + const char *section, + const char *key, + const char *default_value, + conf_validate_string_option_fn validate); + +/** + * @brief Define a configuration option which has an integer value + * + * This functions adds a new configuration option organized under a given + * section. Configuration options are case-insensitive and are always stored + * in lower case. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] default_value The default value for the configuration option + * @param[in] validate The validation function for the configuration option + */ +void conf_define_integer(struct conf_context *conf, + const char *section, + const char *key, + const int default_value, + conf_validate_integer_option_fn validate); + +/** + * @brief Define a configuration option which has an boolean value + * + * This functions adds a new configuration option organized under a given + * section. Configuration options are case-insensitive and are always stored + * in lower case. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] default_value The default value for the configuration option + * @param[in] validate The validation function for the configuration option + */ +void conf_define_boolean(struct conf_context *conf, + const char *section, + const char *key, + const bool default_value, + conf_validate_boolean_option_fn validate); + +/** + * @brief Assign user-accessible pointer for string option + * + * This pointer can be used for accessing the value of configuration option + * directly without requiring a function call. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] ptr User-accessible pointer to the value + */ +void conf_assign_string_pointer(struct conf_context *conf, + const char *section, + const char *key, + const char **ptr); + +/** + * @brief Assign user-accessible pointer for integer option + * + * This pointer can be used for accessing the value of configuration option + * directly without requiring a function call. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] ptr User-accessible pointer to the value + */ +void conf_assign_integer_pointer(struct conf_context *conf, + const char *section, + const char *key, + int *ptr); + +/** + * @brief Assign user-accessible pointer for boolean option + * + * This pointer can be used for accessing the value of configuration option + * directly without requiring a function call. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[in] ptr User-accessible pointer to the value + * @return true on success, false on failure + */ +void conf_assign_boolean_pointer(struct conf_context *conf, + const char *section, + const char *key, + bool *ptr); + +/** + * @brief Query a configuration option + * + * This function checks if a configuration option is defined or not. + * + * @param[in] conf The configuration options context + * @param[in] section The name of the section + * @param[in] key The name of the configuration option + * @param[out] type The type of the configuration option + * @return true on success, false if section/option is not defined + */ +bool conf_query(struct conf_context *conf, + const char *section, + const char *key, + enum conf_type *type); + +/** + * @brief Check if the defined configuration options are valid + * + * This function must be called after creating configuration options + * to confirm that all the option definitions are valid. + * + * @param[in] conf The configuration options context + * @return true on success, false on failure + */ +bool conf_valid(struct conf_context *conf); + +/** + * @brief Set the default values for all configuration options + * + * This function resets all the configuration options to their default values. + * + * @param[in] conf The connfiguration options context + */ +void conf_set_defaults(struct conf_context *conf); + +/** + * @brief Load the values for configuration option values from a file + * + * This function will update the values of the configuration options from those + * specified in a file. This function will fail in case it encounters an + * undefined option. Any sections which are not defined, will be ignored. + * + * This function will call validation function (if specified) before updating + * the value of a configuration option. After updating all the values for a + * section, the validation for section (if specified) will be called. If any + * of the validation functions return error, then all the configuration + * options will be reset to their previous values. + * + * @param[in] conf The configuration options context + * @param[in] filename The configuration file + * @param[in] skip_unknown Whether unknown config options should be ignored + * @return 0 on success, errno on failure + */ +int conf_load(struct conf_context *conf, + const char *filename, + bool ignore_unknown); + +/** + * @brief Reload the values for configuration options + * + * This function will re-load the values of the configuration options. This + * function can be called only after succesful call to conf_load(). + * + * @see conf_load + * + * @param[in] conf The configuration options context + * @return 0 on success, errno on failure. + */ +int conf_reload(struct conf_context *conf); + +/** + * @brief Set the string value of a configuration option + * + * This function can be used to update the value of a configuration option. + * This will call the validation function for that option (if defined) and + * the section validation function (if defined). + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option should not be changed via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[in] str_val The string value + * @return 0 on success, errno in case of failure + */ +int conf_set_string(struct conf_context *conf, + const char *section, + const char *key, + const char *str_val); + +/** + * @brief Set the integer value of a configuration option + * + * This function can be used to update the value of a configuration option. + * This will call the validation function for that option (if defined) and + * the section validation function (if defined). + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option should not be changed via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[in] int_val The integer value + * @return 0 on success, errno in case of failure + */ +int conf_set_integer(struct conf_context *conf, + const char *section, + const char *key, + int int_val); + +/** + * @brief Set the boolean value of a configuration option + * + * This function can be used to update the value of a configuration option. + * This will call the validation function for that option (if defined) and + * the section validation function (if defined). + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option should not be changed via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[in] bool_val The boolean value + * @return 0 on success, errno in case of failure + */ +int conf_set_boolean(struct conf_context *conf, + const char *section, + const char *key, + bool bool_val); + +/** + * @brief Get the string value of a configuration option + * + * This function can be used to fetch the current value of a configuration + * option. + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option can be accessed directly via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[out] str_val The string value of the configuration option + * @param[out] is_default True if the value is default value + * @return 0 on success, errno in case of failure + */ +int conf_get_string(struct conf_context *conf, + const char *section, + const char *key, + const char **str_val, + bool *is_default); + +/** + * @brief Get the integer value of a configuration option + * + * This function can be used to fetch the current value of a configuration + * option. + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option can be accessed directly via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[out] int_val The integer value of the configuration option + * @param[out] is_default True if the value is default value + * @return 0 on success, errno in case of failure + */ +int conf_get_integer(struct conf_context *conf, + const char *section, + const char *key, + int *int_val, + bool *is_default); + +/** + * @brief Get the boolean value of a configuration option + * + * This function can be used to fetch the current value of a configuration + * option. + * + * If a user-defined storage pointer is provided, then the value of a + * configuration option can be accessed directly via that pointer. + * + * @param[in] conf The configuration options context + * @param[in] section The name of a section + * @param[in] key The name of a configuration option + * @param[out] bool_val The boolean value of the configuration option + * @param[out] is_default True if the value is default value + * @return 0 on success, errno in case of failure + */ +int conf_get_boolean(struct conf_context *conf, + const char *section, + const char *key, + bool *bool_val, + bool *is_default); + +/** + * @brief Dump the configuration in a file + * + * All the configuration options are dumped with their current values. + * If an option has a default value, then it is commented. + * + * Here is a sample output: + * + * [section1] + * key1 = value1 + * key2 = value2 + * # key3 = default_value3 + * [section2] + * key4 = value4 + * + * @param[in] conf The configuration options context + * @param[in] fp File pointer + */ +void conf_dump(struct conf_context *conf, FILE *fp); + +#endif /* __CTDB_CONF_H__ */ diff --git a/ctdb/common/conf_tool.c b/ctdb/common/conf_tool.c new file mode 100644 index 0000000..2d0543d --- /dev/null +++ b/ctdb/common/conf_tool.c @@ -0,0 +1,321 @@ +/* + Config options tool + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "lib/util/debug.h" + +#include "common/logging.h" +#include "common/cmdline.h" +#include "common/conf.h" +#include "common/path.h" + +#include "common/logging_conf.h" +#include "cluster/cluster_conf.h" +#include "database/database_conf.h" +#include "event/event_conf.h" +#include "failover/failover_conf.h" +#include "server/legacy_conf.h" + +#include "common/conf_tool.h" + +struct conf_tool_context { + struct cmdline_context *cmdline; + const char *conf_file; + struct conf_context *conf; +}; + +static int conf_tool_dump(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct conf_tool_context *ctx = talloc_get_type_abort( + private_data, struct conf_tool_context); + int ret; + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "dump"); + return EINVAL; + } + + ret = conf_load(ctx->conf, ctx->conf_file, true); + if (ret != 0 && ret != ENOENT) { + D_ERR("Failed to load config file %s\n", ctx->conf_file); + return ret; + } + + conf_dump(ctx->conf, stdout); + return 0; +} + +static int conf_tool_get(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct conf_tool_context *ctx = talloc_get_type_abort( + private_data, struct conf_tool_context); + const char *section, *option; + enum conf_type type; + int ret; + bool ok; + const char *s_val = NULL; + int i_val; + bool b_val; + + if (argc != 2) { + cmdline_usage(ctx->cmdline, "get"); + return EINVAL; + } + + section = argv[0]; + option = argv[1]; + + ok = conf_query(ctx->conf, section, option, &type); + if (!ok) { + D_ERR("Configuration option [%s] -> \"%s\" not defined\n", + section, option); + return ENOENT; + } + + ret = conf_load(ctx->conf, ctx->conf_file, true); + if (ret != 0 && ret != ENOENT) { + D_ERR("Failed to load config file %s\n", ctx->conf_file); + return ret; + } + + switch (type) { + case CONF_STRING: + ret = conf_get_string(ctx->conf, + section, + option, + &s_val, + NULL); + break; + + case CONF_INTEGER: + ret = conf_get_integer(ctx->conf, + section, + option, + &i_val, + NULL); + break; + + case CONF_BOOLEAN: + ret = conf_get_boolean(ctx->conf, + section, + option, + &b_val, + NULL); + break; + + default: + D_ERR("Unknown configuration option type\n"); + return EINVAL; + } + + if (ret != 0) { + D_ERR("Failed to get configuration option value\n"); + return ret; + } + + switch (type) { + case CONF_STRING: + printf("%s\n", s_val == NULL ? "" : s_val); + break; + + case CONF_INTEGER: + printf("%d\n", i_val); + break; + + case CONF_BOOLEAN: + printf("%s\n", b_val ? "true" : "false"); + break; + } + + return 0; +} + +static int conf_tool_validate(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct conf_tool_context *ctx = talloc_get_type_abort( + private_data, struct conf_tool_context); + int ret; + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "validate"); + return EINVAL; + } + + ret = conf_load(ctx->conf, ctx->conf_file, false); + if (ret != 0) { + D_ERR("Failed to load config file %s\n", ctx->conf_file); + return ret; + } + + return 0; +} + +struct cmdline_command conf_commands[] = { + { "dump", conf_tool_dump, + "Dump configuration", NULL }, + { "get", conf_tool_get, + "Get a config value", "<section> <key>" }, + { "validate", conf_tool_validate, + "Validate configuration file", NULL }, + CMDLINE_TABLEEND +}; + +int conf_tool_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + int argc, + const char **argv, + bool parse_options, + struct conf_tool_context **result) +{ + struct conf_tool_context *ctx; + int ret; + + ctx = talloc_zero(mem_ctx, struct conf_tool_context); + if (ctx == NULL) { + D_ERR("Memory allocation error\n"); + return ENOMEM; + } + + ret = cmdline_init(ctx, + prog, + options, + NULL, + conf_commands, + &ctx->cmdline); + if (ret != 0) { + D_ERR("Failed to initialize cmdline, ret=%d\n", ret); + talloc_free(ctx); + return ret; + } + + ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options); + if (ret != 0) { + cmdline_usage(ctx->cmdline, NULL); + talloc_free(ctx); + return ret; + } + + *result = ctx; + return 0; +} + +int conf_tool_run(struct conf_tool_context *ctx, int *result) +{ + int ret; + + ctx->conf_file = path_config(ctx); + if (ctx->conf_file == NULL) { + D_ERR("Memory allocation error\n"); + return ENOMEM; + } + + ret = conf_init(ctx, &ctx->conf); + if (ret != 0) { + D_ERR("Failed to initialize config\n"); + return ret; + } + + /* Call functions to initialize config sections/variables */ + logging_conf_init(ctx->conf, NULL); + cluster_conf_init(ctx->conf); + database_conf_init(ctx->conf); + event_conf_init(ctx->conf); + failover_conf_init(ctx->conf); + legacy_conf_init(ctx->conf); + + if (! conf_valid(ctx->conf)) { + D_ERR("Failed to define configuration options\n"); + return EINVAL; + } + + ret = cmdline_run(ctx->cmdline, ctx, result); + return ret; +} + +#ifdef CTDB_CONF_TOOL + +static struct { + const char *debug; +} conf_data = { + .debug = "ERROR", +}; + +struct poptOption conf_options[] = { + POPT_AUTOHELP + { "debug", 'd', POPT_ARG_STRING, &conf_data.debug, 0, + "debug level", "ERROR|WARNING|NOTICE|INFO|DEBUG" }, + POPT_TABLEEND +}; + +int main(int argc, const char **argv) +{ + TALLOC_CTX *mem_ctx; + struct conf_tool_context *ctx; + int ret, result; + int level; + bool ok; + + mem_ctx = talloc_new(NULL); + if (mem_ctx == NULL) { + fprintf(stderr, "Memory allocation error\n"); + exit(1); + } + + ret = conf_tool_init(mem_ctx, + "ctdb-config", + conf_options, + argc, + argv, + true, + &ctx); + if (ret != 0) { + talloc_free(mem_ctx); + exit(1); + } + + setup_logging("ctdb-config", DEBUG_STDERR); + ok = debug_level_parse(conf_data.debug, &level); + if (!ok) { + level = DEBUG_ERR; + } + debuglevel_set(level); + + ret = conf_tool_run(ctx, &result); + if (ret != 0) { + result = 1; + } + + talloc_free(mem_ctx); + exit(result); +} + +#endif /* CTDB_CONF_TOOL */ diff --git a/ctdb/common/conf_tool.h b/ctdb/common/conf_tool.h new file mode 100644 index 0000000..c77419f --- /dev/null +++ b/ctdb/common/conf_tool.h @@ -0,0 +1,39 @@ +/* + Config options tool + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_CONF_TOOL_H__ +#define __CTDB_CONF_TOOL_H__ + +#include <stdbool.h> +#include <popt.h> +#include <talloc.h> + +struct conf_tool_context; + +int conf_tool_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + int argc, + const char **argv, + bool parse_options, + struct conf_tool_context **result); + +int conf_tool_run(struct conf_tool_context *ctx, int *result); + +#endif /* __CTDB_CONF_TOOL_H__ */ diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c new file mode 100644 index 0000000..bf8bc73 --- /dev/null +++ b/ctdb/common/ctdb_io.c @@ -0,0 +1,498 @@ +/* + ctdb database library + Utility functions to read/write blobs of data from a file descriptor + and handle the case where we might need multiple read/writes to get all the + data. + + Copyright (C) Andrew Tridgell 2006 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" +#include "system/filesys.h" + +#include <tdb.h> +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/dlinklist.h" +#include "lib/util/debug.h" +#include "lib/util/sys_rw.h" + +#include "ctdb_private.h" +#include "ctdb_client.h" + +#include "common/logging.h" +#include "common/common.h" + +/* structures for packet queueing - see common/ctdb_io.c */ +struct ctdb_buffer { + uint8_t *data; + uint32_t length; + uint32_t size; + uint32_t offset; +}; + +struct ctdb_queue_pkt { + struct ctdb_queue_pkt *next, *prev; + uint8_t *data; + uint32_t length; + uint32_t full_length; + uint8_t buf[]; +}; + +struct ctdb_queue { + struct ctdb_context *ctdb; + struct tevent_immediate *im; + struct ctdb_buffer buffer; /* input buffer */ + struct ctdb_queue_pkt *out_queue, *out_queue_tail; + uint32_t out_queue_length; + struct tevent_fd *fde; + int fd; + size_t alignment; + void *private_data; + ctdb_queue_cb_fn_t callback; + TALLOC_CTX *data_pool; + const char *name; + uint32_t buffer_size; +}; + + + +uint32_t ctdb_queue_length(struct ctdb_queue *queue) +{ + return queue->out_queue_length; +} + +static void queue_process(struct ctdb_queue *queue); + +static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im, + void *private_data) +{ + struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); + + queue_process(queue); +} + +/* + * This function is used to process data in queue buffer. + * + * Queue callback function can end up freeing the queue, there should not be a + * loop processing packets from queue buffer. Instead set up a timed event for + * immediate run to process remaining packets from buffer. + */ +static void queue_process(struct ctdb_queue *queue) +{ + uint32_t pkt_size; + uint8_t *data = NULL; + + if (queue->buffer.length < sizeof(pkt_size)) { + return; + } + + /* Did we at least read the size into the buffer */ + pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset); + if (pkt_size == 0) { + DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n")); + goto failed; + } + + /* the buffer doesn't contain the full packet, return to get the rest */ + if (queue->buffer.length < pkt_size) { + return; + } + + /* Extract complete packet */ + data = talloc_memdup(queue->data_pool, + queue->buffer.data + queue->buffer.offset, + pkt_size); + + if (data == NULL) { + D_ERR("read error alloc failed for %u\n", pkt_size); + return; + } + + queue->buffer.offset += pkt_size; + queue->buffer.length -= pkt_size; + + if (queue->buffer.offset < pkt_size || + queue->buffer.offset > queue->buffer.size) { + D_ERR("buffer offset overflow\n"); + TALLOC_FREE(queue->buffer.data); + memset(&queue->buffer, 0, sizeof(queue->buffer)); + goto failed; + } + + if (queue->buffer.length > 0) { + /* There is more data to be processed, schedule an event */ + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_process_event, queue); + } else { + if (queue->buffer.size > queue->buffer_size) { + TALLOC_FREE(queue->buffer.data); + queue->buffer.size = 0; + } + queue->buffer.offset = 0; + } + + /* It is the responsibility of the callback to free 'data' */ + queue->callback(data, pkt_size, queue->private_data); + return; + +failed: + queue->callback(NULL, 0, queue->private_data); +} + +/* + called when an incoming connection is readable + This function MUST be safe for reentry via the queue callback! +*/ +static void queue_io_read(struct ctdb_queue *queue) +{ + int num_ready = 0; + uint32_t pkt_size = 0; + uint32_t start_offset; + ssize_t nread; + uint8_t *data; + + /* check how much data is available on the socket for immediately + guaranteed nonblocking access. + as long as we are careful never to try to read more than this + we know all reads will be successful and will neither block + nor fail with a "data not available right now" error + */ + if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) { + return; + } + if (num_ready == 0) { + /* the descriptor has been closed */ + goto failed; + } + + if (queue->buffer.data == NULL) { + /* starting fresh, allocate buf to read data */ + queue->buffer.data = talloc_size(queue, queue->buffer_size); + if (queue->buffer.data == NULL) { + DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready)); + goto failed; + } + queue->buffer.size = queue->buffer_size; + goto data_read; + } + + if (sizeof(pkt_size) > queue->buffer.length) { + /* data read is not sufficient to gather message size */ + goto buffer_shift; + } + + pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset); + if (pkt_size > queue->buffer.size) { + data = talloc_realloc_size(queue, + queue->buffer.data, + pkt_size); + if (data == NULL) { + DBG_ERR("read error realloc failed for %u\n", pkt_size); + goto failed; + } + queue->buffer.data = data; + queue->buffer.size = pkt_size; + /* fall through here as we might need to move the data as well */ + } + +buffer_shift: + if (sizeof(pkt_size) > queue->buffer.size - queue->buffer.offset || + pkt_size > queue->buffer.size - queue->buffer.offset) { + /* Either the offset has progressed too far to host at least + * the size information or the remaining space in the buffer + * is not sufficient for the full message. + * Therefore, move the data and try again. + */ + memmove(queue->buffer.data, + queue->buffer.data + queue->buffer.offset, + queue->buffer.length); + queue->buffer.offset = 0; + } + +data_read: + start_offset = queue->buffer.length + queue->buffer.offset; + if (start_offset < queue->buffer.length) { + DBG_ERR("Buffer overflow\n"); + goto failed; + } + if (start_offset > queue->buffer.size) { + DBG_ERR("Buffer overflow\n"); + goto failed; + } + + num_ready = MIN(num_ready, queue->buffer.size - start_offset); + + if (num_ready > 0) { + nread = sys_read(queue->fd, + queue->buffer.data + + queue->buffer.offset + + queue->buffer.length, + num_ready); + if (nread <= 0) { + DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread)); + goto failed; + } + queue->buffer.length += nread; + } + + queue_process(queue); + return; + +failed: + queue->callback(NULL, 0, queue->private_data); +} + + +/* used when an event triggers a dead queue */ +static void queue_dead(struct tevent_context *ev, struct tevent_immediate *im, + void *private_data) +{ + struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); + queue->callback(NULL, 0, queue->private_data); +} + + +/* + called when an incoming connection is writeable +*/ +static void queue_io_write(struct ctdb_queue *queue) +{ + while (queue->out_queue) { + struct ctdb_queue_pkt *pkt = queue->out_queue; + ssize_t n; + if (queue->ctdb->flags & CTDB_FLAG_TORTURE) { + n = write(queue->fd, pkt->data, 1); + } else { + n = write(queue->fd, pkt->data, pkt->length); + } + + if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { + if (pkt->length != pkt->full_length) { + /* partial packet sent - we have to drop it */ + DLIST_REMOVE(queue->out_queue, pkt); + queue->out_queue_length--; + talloc_free(pkt); + } + TALLOC_FREE(queue->fde); + queue->fd = -1; + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_dead, queue); + return; + } + if (n <= 0) return; + + if (n != pkt->length) { + pkt->length -= n; + pkt->data += n; + return; + } + + DLIST_REMOVE(queue->out_queue, pkt); + queue->out_queue_length--; + talloc_free(pkt); + } + + TEVENT_FD_NOT_WRITEABLE(queue->fde); +} + +/* + called when an incoming connection is readable or writeable +*/ +static void queue_io_handler(struct tevent_context *ev, struct tevent_fd *fde, + uint16_t flags, void *private_data) +{ + struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); + + if (flags & TEVENT_FD_READ) { + queue_io_read(queue); + } else { + queue_io_write(queue); + } +} + + +/* + queue a packet for sending +*/ +int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length) +{ + struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; + struct ctdb_queue_pkt *pkt; + uint32_t length2, full_length; + + /* If the queue does not have valid fd, no point queueing a packet */ + if (queue->fd == -1) { + return 0; + } + + if (queue->alignment) { + /* enforce the length and alignment rules from the tcp packet allocator */ + length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1); + *(uint32_t *)data = length2; + } else { + length2 = length; + } + + if (length2 != length) { + memset(data+length, 0, length2-length); + } + + full_length = length2; + + /* if the queue is empty then try an immediate write, avoiding + queue overhead. This relies on non-blocking sockets */ + if (queue->out_queue == NULL && queue->fd != -1 && + !(queue->ctdb->flags & CTDB_FLAG_TORTURE)) { + ssize_t n = write(queue->fd, data, length2); + if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { + TALLOC_FREE(queue->fde); + queue->fd = -1; + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_dead, queue); + /* yes, we report success, as the dead node is + handled via a separate event */ + return 0; + } + if (n > 0) { + data += n; + length2 -= n; + } + if (length2 == 0) return 0; + } + + pkt = talloc_size( + queue, offsetof(struct ctdb_queue_pkt, buf) + length2); + CTDB_NO_MEMORY(queue->ctdb, pkt); + talloc_set_name_const(pkt, "struct ctdb_queue_pkt"); + + pkt->data = pkt->buf; + memcpy(pkt->data, data, length2); + + pkt->length = length2; + pkt->full_length = full_length; + + if (queue->out_queue == NULL && queue->fd != -1) { + TEVENT_FD_WRITEABLE(queue->fde); + } + + DLIST_ADD_END(queue->out_queue, pkt); + + queue->out_queue_length++; + + if (queue->ctdb->tunable.verbose_memory_names != 0) { + switch (hdr->operation) { + case CTDB_REQ_CONTROL: { + struct ctdb_req_control_old *c = (struct ctdb_req_control_old *)hdr; + talloc_set_name(pkt, "ctdb_queue_pkt: %s control opcode=%u srvid=%llu datalen=%u", + queue->name, (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen); + break; + } + case CTDB_REQ_MESSAGE: { + struct ctdb_req_message_old *m = (struct ctdb_req_message_old *)hdr; + talloc_set_name(pkt, "ctdb_queue_pkt: %s message srvid=%llu datalen=%u", + queue->name, (unsigned long long)m->srvid, (unsigned)m->datalen); + break; + } + default: + talloc_set_name(pkt, "ctdb_queue_pkt: %s operation=%u length=%u src=%u dest=%u", + queue->name, (unsigned)hdr->operation, (unsigned)hdr->length, + (unsigned)hdr->srcnode, (unsigned)hdr->destnode); + break; + } + } + + return 0; +} + + +/* + setup the fd used by the queue + */ +int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd) +{ + queue->fd = fd; + TALLOC_FREE(queue->fde); + + if (fd != -1) { + queue->fde = tevent_add_fd(queue->ctdb->ev, queue, fd, + TEVENT_FD_READ, + queue_io_handler, queue); + if (queue->fde == NULL) { + return -1; + } + tevent_fd_set_auto_close(queue->fde); + + if (queue->out_queue) { + TEVENT_FD_WRITEABLE(queue->fde); + } + } + + return 0; +} + +/* + setup a packet queue on a socket + */ +struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb, + TALLOC_CTX *mem_ctx, int fd, int alignment, + ctdb_queue_cb_fn_t callback, + void *private_data, const char *fmt, ...) +{ + struct ctdb_queue *queue; + va_list ap; + + queue = talloc_zero(mem_ctx, struct ctdb_queue); + CTDB_NO_MEMORY_NULL(ctdb, queue); + va_start(ap, fmt); + queue->name = talloc_vasprintf(mem_ctx, fmt, ap); + va_end(ap); + CTDB_NO_MEMORY_NULL(ctdb, queue->name); + + queue->im= tevent_create_immediate(queue); + CTDB_NO_MEMORY_NULL(ctdb, queue->im); + + queue->ctdb = ctdb; + queue->fd = fd; + queue->alignment = alignment; + queue->private_data = private_data; + queue->callback = callback; + if (fd != -1) { + if (ctdb_queue_set_fd(queue, fd) != 0) { + talloc_free(queue); + return NULL; + } + } + + queue->buffer_size = ctdb->tunable.queue_buffer_size; + /* In client code, ctdb->tunable is not initialized. + * This does not affect recovery daemon. + */ + if (queue->buffer_size == 0) { + queue->buffer_size = 1024; + } + + queue->data_pool = talloc_pool(queue, queue->buffer_size); + if (queue->data_pool == NULL) { + TALLOC_FREE(queue); + return NULL; + } + + return queue; +} diff --git a/ctdb/common/ctdb_ltdb.c b/ctdb/common/ctdb_ltdb.c new file mode 100644 index 0000000..0b79ab4 --- /dev/null +++ b/ctdb/common/ctdb_ltdb.c @@ -0,0 +1,430 @@ +/* + ctdb ltdb code + + Copyright (C) Andrew Tridgell 2006 + Copyright (C) Ronnie sahlberg 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" +#include "system/filesys.h" + +#include <tdb.h> + +#include "lib/tdb_wrap/tdb_wrap.h" +#include "lib/util/dlinklist.h" +#include "lib/util/debug.h" + +#include "ctdb_private.h" + +#include "common/common.h" +#include "common/logging.h" + + +/* + * Calculate tdb flags based on databse type + */ +int ctdb_db_tdb_flags(uint8_t db_flags, bool with_valgrind, bool with_mutex) +{ + int tdb_flags = 0; + + if (db_flags & CTDB_DB_FLAGS_PERSISTENT) { + tdb_flags = TDB_DEFAULT; + + } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) { + tdb_flags = TDB_NOSYNC | + TDB_CLEAR_IF_FIRST | + TDB_INCOMPATIBLE_HASH; + + } else { + tdb_flags = TDB_NOSYNC | + TDB_CLEAR_IF_FIRST | + TDB_INCOMPATIBLE_HASH; + +#ifdef TDB_MUTEX_LOCKING + if (with_mutex && tdb_runtime_check_for_robust_mutexes()) { + tdb_flags |= TDB_MUTEX_LOCKING; + } +#endif + + } + + tdb_flags |= TDB_DISALLOW_NESTING; + if (with_valgrind) { + tdb_flags |= TDB_NOMMAP; + } + + return tdb_flags; +} + +/* + find an attached ctdb_db handle given a name + */ +struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *name) +{ + struct ctdb_db_context *tmp_db; + for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) { + if (strcmp(name, tmp_db->db_name) == 0) { + return tmp_db; + } + } + return NULL; +} + +bool ctdb_db_persistent(struct ctdb_db_context *ctdb_db) +{ + if (ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) { + return true; + } + return false; +} + +bool ctdb_db_replicated(struct ctdb_db_context *ctdb_db) +{ + if (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED) { + return true; + } + return false; +} + +bool ctdb_db_volatile(struct ctdb_db_context *ctdb_db) +{ + if ((ctdb_db->db_flags & CTDB_DB_FLAGS_PERSISTENT) || + (ctdb_db->db_flags & CTDB_DB_FLAGS_REPLICATED)) { + return false; + } + return true; +} + +bool ctdb_db_readonly(struct ctdb_db_context *ctdb_db) +{ + if (ctdb_db->db_flags & CTDB_DB_FLAGS_READONLY) { + return true; + } + return false; +} + +void ctdb_db_set_readonly(struct ctdb_db_context *ctdb_db) +{ + ctdb_db->db_flags |= CTDB_DB_FLAGS_READONLY; +} + +void ctdb_db_reset_readonly(struct ctdb_db_context *ctdb_db) +{ + ctdb_db->db_flags &= ~CTDB_DB_FLAGS_READONLY; +} + +bool ctdb_db_sticky(struct ctdb_db_context *ctdb_db) +{ + if (ctdb_db->db_flags & CTDB_DB_FLAGS_STICKY) { + return true; + } + return false; +} + +void ctdb_db_set_sticky(struct ctdb_db_context *ctdb_db) +{ + ctdb_db->db_flags |= CTDB_DB_FLAGS_STICKY; +} + +/* + return the lmaster given a key +*/ +uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key) +{ + uint32_t idx, lmaster; + + idx = ctdb_hash(key) % ctdb->vnn_map->size; + lmaster = ctdb->vnn_map->map[idx]; + + return lmaster; +} + + +/* + construct an initial header for a record with no ltdb header yet +*/ +static void ltdb_initial_header(struct ctdb_db_context *ctdb_db, + TDB_DATA key, + struct ctdb_ltdb_header *header) +{ + ZERO_STRUCTP(header); + /* initial dmaster is the lmaster */ + header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key); + header->flags = CTDB_REC_FLAG_AUTOMATIC; +} + +struct ctdb_ltdb_fetch_state { + struct ctdb_ltdb_header *header; + TALLOC_CTX *mem_ctx; + TDB_DATA *data; + int ret; + bool found; +}; + +static int ctdb_ltdb_fetch_fn(TDB_DATA key, TDB_DATA data, void *private_data) +{ + struct ctdb_ltdb_fetch_state *state = private_data; + struct ctdb_ltdb_header *header = state->header; + TDB_DATA *dstdata = state->data; + + if (data.dsize < sizeof(*header)) { + return 0; + } + + state->found = true; + memcpy(header, data.dptr, sizeof(*header)); + + if (dstdata != NULL) { + dstdata->dsize = data.dsize - sizeof(struct ctdb_ltdb_header); + dstdata->dptr = talloc_memdup( + state->mem_ctx, + data.dptr + sizeof(struct ctdb_ltdb_header), + dstdata->dsize); + if (dstdata->dptr == NULL) { + state->ret = -1; + } + } + + return 0; +} + +/* + fetch a record from the ltdb, separating out the header information + and returning the body of the record. A valid (initial) header is + returned if the record is not present +*/ +int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, + TDB_DATA key, struct ctdb_ltdb_header *header, + TALLOC_CTX *mem_ctx, TDB_DATA *data) +{ + struct ctdb_context *ctdb = ctdb_db->ctdb; + struct ctdb_ltdb_fetch_state state = { + .header = header, + .mem_ctx = mem_ctx, + .data = data, + .found = false, + }; + int ret; + + ret = tdb_parse_record( + ctdb_db->ltdb->tdb, key, ctdb_ltdb_fetch_fn, &state); + + if (ret == -1) { + enum TDB_ERROR err = tdb_error(ctdb_db->ltdb->tdb); + if (err != TDB_ERR_NOEXIST) { + return -1; + } + } + + if (state.ret != 0) { + DBG_DEBUG("ctdb_ltdb_fetch_fn failed\n"); + return state.ret; + } + + if (state.found) { + return 0; + } + + if (data != NULL) { + *data = tdb_null; + } + + if (ctdb->vnn_map == NULL) { + /* called from the client */ + header->dmaster = (uint32_t)-1; + return -1; + } + + ltdb_initial_header(ctdb_db, key, header); + if (ctdb_db_persistent(ctdb_db) || + header->dmaster == ctdb_db->ctdb->pnn) { + + ret = ctdb_ltdb_store(ctdb_db, key, header, tdb_null); + if (ret != 0) { + DBG_NOTICE("failed to store initial header\n"); + } + } + + return 0; +} + +/* + write a record to a normal database +*/ +int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, + struct ctdb_ltdb_header *header, TDB_DATA data) +{ + struct ctdb_context *ctdb = ctdb_db->ctdb; + TDB_DATA rec[2]; + uint32_t hsize = sizeof(struct ctdb_ltdb_header); + int ret; + + if (ctdb_db->ctdb_ltdb_store_fn) { + return ctdb_db->ctdb_ltdb_store_fn(ctdb_db, key, header, data); + } + + if (ctdb->flags & CTDB_FLAG_TORTURE) { + TDB_DATA old; + struct ctdb_ltdb_header *h2; + + old = tdb_fetch(ctdb_db->ltdb->tdb, key); + h2 = (struct ctdb_ltdb_header *)old.dptr; + if (old.dptr != NULL && old.dsize >= hsize && + h2->rsn > header->rsn) { + DEBUG(DEBUG_ERR, + ("RSN regression! %"PRIu64" %"PRIu64"\n", + h2->rsn, header->rsn)); + } + if (old.dptr != NULL) { + free(old.dptr); + } + } + + rec[0].dsize = hsize; + rec[0].dptr = (uint8_t *)header; + + rec[1].dsize = data.dsize; + rec[1].dptr = data.dptr; + + ret = tdb_storev(ctdb_db->ltdb->tdb, key, rec, 2, TDB_REPLACE); + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " Failed to store dynamic data\n")); + } + + return ret; +} + +/* + lock a record in the ltdb, given a key + */ +int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key) +{ + return tdb_chainlock(ctdb_db->ltdb->tdb, key); +} + +/* + unlock a record in the ltdb, given a key + */ +int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key) +{ + int ret = tdb_chainunlock(ctdb_db->ltdb->tdb, key); + if (ret != 0) { + DEBUG(DEBUG_ERR,("tdb_chainunlock failed on db %s [%s]\n", ctdb_db->db_name, tdb_errorstr(ctdb_db->ltdb->tdb))); + } + return ret; +} + + +/* + delete a record from a normal database +*/ +int ctdb_ltdb_delete(struct ctdb_db_context *ctdb_db, TDB_DATA key) +{ + if (! ctdb_db_volatile(ctdb_db)) { + DEBUG(DEBUG_WARNING, + ("Ignored deletion of empty record from " + "non-volatile database\n")); + return 0; + } + if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) { + DEBUG(DEBUG_ERR,("Failed to delete empty record.")); + return -1; + } + return 0; +} + +int ctdb_trackingdb_add_pnn(struct ctdb_context *ctdb, TDB_DATA *data, uint32_t pnn) +{ + unsigned int byte_pos = pnn / 8; + unsigned char bit_mask = 1 << (pnn % 8); + + if (byte_pos + 1 > data->dsize) { + char *buf; + + buf = malloc(byte_pos + 1); + memset(buf, 0, byte_pos + 1); + if (buf == NULL) { + DEBUG(DEBUG_ERR, ("Out of memory when allocating buffer of %d bytes for trackingdb\n", byte_pos + 1)); + return -1; + } + if (data->dptr != NULL) { + memcpy(buf, data->dptr, data->dsize); + free(data->dptr); + } + data->dptr = (uint8_t *)buf; + data->dsize = byte_pos + 1; + } + + data->dptr[byte_pos] |= bit_mask; + return 0; +} + +void ctdb_trackingdb_traverse(struct ctdb_context *ctdb, TDB_DATA data, ctdb_trackingdb_cb cb, void *private_data) +{ + unsigned int i; + + for(i = 0; i < data.dsize; i++) { + unsigned int j; + + for (j=0; j<8; j++) { + int mask = 1<<j; + + if (data.dptr[i] & mask) { + cb(ctdb, i * 8 + j, private_data); + } + } + } +} + +/* + this is the dummy null procedure that all databases support +*/ +int ctdb_null_func(struct ctdb_call_info *call) +{ + return 0; +} + +/* + this is a plain fetch procedure that all databases support +*/ +int ctdb_fetch_func(struct ctdb_call_info *call) +{ + call->reply_data = &call->record_data; + return 0; +} + +/* + this is a plain fetch procedure that all databases support + this returns the full record including the ltdb header +*/ +int ctdb_fetch_with_header_func(struct ctdb_call_info *call) +{ + call->reply_data = talloc(call, TDB_DATA); + if (call->reply_data == NULL) { + return -1; + } + call->reply_data->dsize = sizeof(struct ctdb_ltdb_header) + call->record_data.dsize; + call->reply_data->dptr = talloc_size(call->reply_data, call->reply_data->dsize); + if (call->reply_data->dptr == NULL) { + return -1; + } + memcpy(call->reply_data->dptr, call->header, sizeof(struct ctdb_ltdb_header)); + memcpy(&call->reply_data->dptr[sizeof(struct ctdb_ltdb_header)], call->record_data.dptr, call->record_data.dsize); + + return 0; +} + diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c new file mode 100644 index 0000000..3f8fff9 --- /dev/null +++ b/ctdb/common/ctdb_util.c @@ -0,0 +1,674 @@ +/* + ctdb utility code + + Copyright (C) Andrew Tridgell 2006 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" +#include "system/filesys.h" +#include "system/wait.h" + +#include <tdb.h> + +#include "lib/util/debug.h" +#include "lib/util/samba_util.h" + +#include "ctdb_private.h" + +#include "protocol/protocol_util.h" + +#include "common/reqid.h" +#include "common/system.h" +#include "common/common.h" +#include "common/logging.h" + +/* + return error string for last error +*/ +const char *ctdb_errstr(struct ctdb_context *ctdb) +{ + return ctdb->err_msg; +} + + +/* + remember an error message +*/ +void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) +{ + va_list ap; + talloc_free(ctdb->err_msg); + va_start(ap, fmt); + ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap); + DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg)); + va_end(ap); +} + +/* + a fatal internal error occurred - no hope for recovery +*/ +void ctdb_fatal(struct ctdb_context *ctdb, const char *msg) +{ + DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg)); + abort(); +} + +/* + like ctdb_fatal() but a core/backtrace would not be useful +*/ +void ctdb_die(struct ctdb_context *ctdb, const char *msg) +{ + DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg)); + exit(1); +} + +/* Set the path of a helper program from envvar, falling back to + * dir/file if envvar unset. type is a string to print in log + * messages. helper is assumed to point to a statically allocated + * array of size bytes, initialised to "". If file is NULL don't fall + * back if envvar is unset. If dir is NULL and envvar is unset (but + * file is not NULL) then this is an error. Returns true if helper is + * set, either previously or this time. */ +bool ctdb_set_helper(const char *type, char *helper, size_t size, + const char *envvar, + const char *dir, const char *file) +{ + const char *t; + struct stat st; + + if (helper[0] != '\0') { + /* Already set */ + return true; + } + + t = getenv(envvar); + if (t != NULL) { + if (strlen(t) >= size) { + DEBUG(DEBUG_ERR, + ("Unable to set %s - path too long\n", type)); + return false; + } + + strncpy(helper, t, size); + } else if (file == NULL) { + return false; + } else if (dir == NULL) { + DEBUG(DEBUG_ERR, + ("Unable to set %s - dir is NULL\n", type)); + return false; + } else { + int ret; + + ret = snprintf(helper, size, "%s/%s", dir, file); + if (ret < 0 || (size_t)ret >= size) { + DEBUG(DEBUG_ERR, + ("Unable to set %s - path too long\n", type)); + return false; + } + } + + if (stat(helper, &st) != 0) { + DEBUG(DEBUG_ERR, + ("Unable to set %s \"%s\" - %s\n", + type, helper, strerror(errno))); + return false; + } + if (!(st.st_mode & S_IXUSR)) { + DEBUG(DEBUG_ERR, + ("Unable to set %s \"%s\" - not executable\n", + type, helper)); + return false; + } + + DEBUG(DEBUG_NOTICE, + ("Set %s to \"%s\"\n", type, helper)); + return true; +} + +/* + parse a IP:port pair +*/ +int ctdb_parse_address(TALLOC_CTX *mem_ctx, const char *str, + ctdb_sock_addr *address) +{ + struct servent *se; + int port; + int ret; + + setservent(0); + se = getservbyname("ctdb", "tcp"); + endservent(); + + if (se == NULL) { + port = CTDB_PORT; + } else { + port = ntohs(se->s_port); + } + + ret = ctdb_sock_addr_from_string(str, address, false); + if (ret != 0) { + return -1; + } + ctdb_sock_addr_set_port(address, port); + + return 0; +} + + +/* + check if two addresses are the same +*/ +bool ctdb_same_address(ctdb_sock_addr *a1, ctdb_sock_addr *a2) +{ + return ctdb_same_ip(a1, a2) && + ctdb_addr_to_port(a1) == ctdb_addr_to_port(a2); +} + + +/* + hash function for mapping data to a VNN - taken from tdb +*/ +uint32_t ctdb_hash(const TDB_DATA *key) +{ + return tdb_jenkins_hash(discard_const(key)); +} + + +static uint32_t ctdb_marshall_record_size(TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data) +{ + return offsetof(struct ctdb_rec_data_old, data) + key.dsize + + data.dsize + (header ? sizeof(*header) : 0); +} + +static void ctdb_marshall_record_copy(struct ctdb_rec_data_old *rec, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data, + uint32_t length) +{ + uint32_t offset; + + rec->length = length; + rec->reqid = reqid; + rec->keylen = key.dsize; + memcpy(&rec->data[0], key.dptr, key.dsize); + offset = key.dsize; + + if (header) { + rec->datalen = data.dsize + sizeof(*header); + memcpy(&rec->data[offset], header, sizeof(*header)); + offset += sizeof(*header); + } else { + rec->datalen = data.dsize; + } + memcpy(&rec->data[offset], data.dptr, data.dsize); +} + +/* + form a ctdb_rec_data record from a key/data pair + + note that header may be NULL. If not NULL then it is included in the data portion + of the record + */ +struct ctdb_rec_data_old *ctdb_marshall_record(TALLOC_CTX *mem_ctx, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data) +{ + size_t length; + struct ctdb_rec_data_old *d; + + length = ctdb_marshall_record_size(key, header, data); + + d = (struct ctdb_rec_data_old *)talloc_size(mem_ctx, length); + if (d == NULL) { + return NULL; + } + + ctdb_marshall_record_copy(d, reqid, key, header, data, length); + return d; +} + + +/* helper function for marshalling multiple records */ +struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx, + struct ctdb_marshall_buffer *m, + uint32_t db_id, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data) +{ + struct ctdb_rec_data_old *r; + struct ctdb_marshall_buffer *m2; + uint32_t length, offset; + + length = ctdb_marshall_record_size(key, header, data); + + if (m == NULL) { + offset = offsetof(struct ctdb_marshall_buffer, data); + m2 = talloc_zero_size(mem_ctx, offset + length); + } else { + offset = talloc_get_size(m); + m2 = talloc_realloc_size(mem_ctx, m, offset + length); + } + if (m2 == NULL) { + TALLOC_FREE(m); + return NULL; + } + + if (m == NULL) { + m2->db_id = db_id; + } + + r = (struct ctdb_rec_data_old *)((uint8_t *)m2 + offset); + ctdb_marshall_record_copy(r, reqid, key, header, data, length); + m2->count++; + + return m2; +} + +/* we've finished marshalling, return a data blob with the marshalled records */ +TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m) +{ + TDB_DATA data; + data.dptr = (uint8_t *)m; + data.dsize = talloc_get_size(m); + return data; +} + +/* + loop over a marshalling buffer + + - pass r==NULL to start + - loop the number of times indicated by m->count +*/ +struct ctdb_rec_data_old *ctdb_marshall_loop_next( + struct ctdb_marshall_buffer *m, + struct ctdb_rec_data_old *r, + uint32_t *reqid, + struct ctdb_ltdb_header *header, + TDB_DATA *key, TDB_DATA *data) +{ + if (r == NULL) { + r = (struct ctdb_rec_data_old *)&m->data[0]; + } else { + r = (struct ctdb_rec_data_old *)(r->length + (uint8_t *)r); + } + + if (reqid != NULL) { + *reqid = r->reqid; + } + + if (key != NULL) { + key->dptr = &r->data[0]; + key->dsize = r->keylen; + } + if (data != NULL) { + data->dptr = &r->data[r->keylen]; + data->dsize = r->datalen; + if (header != NULL) { + data->dptr += sizeof(*header); + data->dsize -= sizeof(*header); + } + } + + if (header != NULL) { + if (r->datalen < sizeof(*header)) { + return NULL; + } + memcpy(header, &r->data[r->keylen], sizeof(*header)); + } + + return r; +} + +/* + This is used to canonicalize a ctdb_sock_addr structure. +*/ +void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip) +{ + ZERO_STRUCTP(cip); + + if (ip->sa.sa_family == AF_INET6) { + const char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff }; + if (memcmp(&ip->ip6.sin6_addr, prefix, sizeof(prefix)) == 0) { + /* Copy IPv4-mapped IPv6 addresses as IPv4 */ + cip->ip.sin_family = AF_INET; +#ifdef HAVE_SOCK_SIN_LEN + cip->ip.sin_len = sizeof(ctdb_sock_addr); +#endif + cip->ip.sin_port = ip->ip6.sin6_port; + memcpy(&cip->ip.sin_addr, + &ip->ip6.sin6_addr.s6_addr[12], + sizeof(cip->ip.sin_addr)); + } else { + cip->ip6.sin6_family = AF_INET6; +#ifdef HAVE_SOCK_SIN6_LEN + cip->ip6.sin6_len = sizeof(ctdb_sock_addr); +#endif + cip->ip6.sin6_port = ip->ip6.sin6_port; + memcpy(&cip->ip6.sin6_addr, + &ip->ip6.sin6_addr, + sizeof(cip->ip6.sin6_addr)); + } + + return; + } + + if (ip->sa.sa_family == AF_INET) { + cip->ip.sin_family = AF_INET; +#ifdef HAVE_SOCK_SIN_LEN + cip->ip.sin_len = sizeof(ctdb_sock_addr); +#endif + cip->ip.sin_port = ip->ip.sin_port; + memcpy(&cip->ip.sin_addr, + &ip->ip.sin_addr, + sizeof(ip->ip.sin_addr)); + + return; + } +} + +bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2) +{ + ctdb_sock_addr ip1, ip2; + + ctdb_canonicalize_ip(tip1, &ip1); + ctdb_canonicalize_ip(tip2, &ip2); + + if (ip1.sa.sa_family != ip2.sa.sa_family) { + return false; + } + + switch (ip1.sa.sa_family) { + case AF_INET: + return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr; + case AF_INET6: + return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0], + &ip2.ip6.sin6_addr.s6_addr[0], + 16); + default: + DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family)); + return false; + } + + return true; +} + +/* + compare two ctdb_sock_addr structures + */ +bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2) +{ + return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port; +} + +char *ctdb_addr_to_str(ctdb_sock_addr *addr) +{ + static char cip[128] = ""; + + switch (addr->sa.sa_family) { + case AF_INET: + inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip)); + break; + case AF_INET6: + inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip)); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family)); + } + + return cip; +} + +unsigned ctdb_addr_to_port(ctdb_sock_addr *addr) +{ + switch (addr->sa.sa_family) { + case AF_INET: + return ntohs(addr->ip.sin_port); + break; + case AF_INET6: + return ntohs(addr->ip6.sin6_port); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family)); + } + + return 0; +} + +/* Add a node to a node map with given address and flags */ +static bool node_map_add(TALLOC_CTX *mem_ctx, + const char *nstr, uint32_t flags, + struct ctdb_node_map_old **node_map) +{ + ctdb_sock_addr addr; + uint32_t num; + size_t s; + struct ctdb_node_and_flags *n; + + /* Might as well do this before trying to allocate memory */ + if (ctdb_parse_address(mem_ctx, nstr, &addr) == -1) { + return false; + } + + num = (*node_map)->num + 1; + s = offsetof(struct ctdb_node_map_old, nodes) + + num * sizeof(struct ctdb_node_and_flags); + *node_map = talloc_realloc_size(mem_ctx, *node_map, s); + if (*node_map == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Out of memory\n")); + return false; + } + + n = &(*node_map)->nodes[(*node_map)->num]; + n->addr = addr; + n->pnn = (*node_map)->num; + n->flags = flags; + + (*node_map)->num++; + + return true; +} + +/* Read a nodes file into a node map */ +struct ctdb_node_map_old *ctdb_read_nodes_file(TALLOC_CTX *mem_ctx, + const char *nlist) +{ + char **lines; + int nlines; + int i; + struct ctdb_node_map_old *ret; + + /* Allocate node map header */ + ret = talloc_zero_size(mem_ctx, offsetof(struct ctdb_node_map_old, nodes)); + if (ret == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Out of memory\n")); + return false; + } + + lines = file_lines_load(nlist, &nlines, 0, mem_ctx); + if (lines == NULL) { + DEBUG(DEBUG_ERR, ("Failed to read nodes file \"%s\"\n", nlist)); + return false; + } + while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) { + nlines--; + } + + for (i=0; i < nlines; i++) { + char *node; + uint32_t flags; + size_t len; + + node = lines[i]; + /* strip leading spaces */ + while((*node == ' ') || (*node == '\t')) { + node++; + } + + len = strlen(node); + + while ((len > 1) && + ((node[len-1] == ' ') || (node[len-1] == '\t'))) + { + node[len-1] = '\0'; + len--; + } + + if (len == 0) { + continue; + } + if (*node == '#') { + /* A "deleted" node is a node that is + commented out in the nodes file. This is + used instead of removing a line, which + would cause subsequent nodes to change + their PNN. */ + flags = NODE_FLAGS_DELETED; + node = discard_const("0.0.0.0"); + } else { + flags = 0; + } + if (!node_map_add(mem_ctx, node, flags, &ret)) { + talloc_free(lines); + TALLOC_FREE(ret); + return NULL; + } + } + + talloc_free(lines); + return ret; +} + +struct ctdb_node_map_old * +ctdb_node_list_to_map(struct ctdb_node **nodes, uint32_t num_nodes, + TALLOC_CTX *mem_ctx) +{ + uint32_t i; + size_t size; + struct ctdb_node_map_old *node_map; + + size = offsetof(struct ctdb_node_map_old, nodes) + + num_nodes * sizeof(struct ctdb_node_and_flags); + node_map = (struct ctdb_node_map_old *)talloc_zero_size(mem_ctx, size); + if (node_map == NULL) { + DEBUG(DEBUG_ERR, + (__location__ " Failed to allocate nodemap array\n")); + return NULL; + } + + node_map->num = num_nodes; + for (i=0; i<num_nodes; i++) { + node_map->nodes[i].addr = nodes[i]->address; + node_map->nodes[i].pnn = nodes[i]->pnn; + node_map->nodes[i].flags = nodes[i]->flags; + } + + return node_map; +} + +const char *ctdb_eventscript_call_names[] = { + "init", + "setup", + "startup", + "startrecovery", + "recovered", + "takeip", + "releaseip", + "stopped", + "monitor", + "status", + "shutdown", + "reload", + "updateip", + "ipreallocated" +}; + +/* Runstate handling */ +static struct { + enum ctdb_runstate runstate; + const char * label; +} runstate_map[] = { + { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" }, + { CTDB_RUNSTATE_INIT, "INIT" }, + { CTDB_RUNSTATE_SETUP, "SETUP" }, + { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" }, + { CTDB_RUNSTATE_STARTUP, "STARTUP" }, + { CTDB_RUNSTATE_RUNNING, "RUNNING" }, + { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" }, + { -1, NULL }, +}; + +const char *runstate_to_string(enum ctdb_runstate runstate) +{ + int i; + for (i=0; runstate_map[i].label != NULL ; i++) { + if (runstate_map[i].runstate == runstate) { + return runstate_map[i].label; + } + } + + return runstate_map[0].label; +} + +enum ctdb_runstate runstate_from_string(const char *label) +{ + int i; + for (i=0; runstate_map[i].label != NULL; i++) { + if (strcasecmp(runstate_map[i].label, label) == 0) { + return runstate_map[i].runstate; + } + } + + return CTDB_RUNSTATE_UNKNOWN; +} + +void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate) +{ + DEBUG(DEBUG_NOTICE,("Set runstate to %s (%d)\n", + runstate_to_string(runstate), runstate)); + + if (runstate <= ctdb->runstate) { + ctdb_fatal(ctdb, "runstate must always increase"); + } + + ctdb->runstate = runstate; +} + +/* Convert arbitrary data to 4-byte boundary padded uint32 array */ +uint32_t *ctdb_key_to_idkey(TALLOC_CTX *mem_ctx, TDB_DATA key) +{ + uint32_t idkey_size, *k; + + idkey_size = 1 + (key.dsize + sizeof(uint32_t)-1) / sizeof(uint32_t); + + k = talloc_zero_array(mem_ctx, uint32_t, idkey_size); + if (k == NULL) { + return NULL; + } + + k[0] = idkey_size; + memcpy(&k[1], key.dptr, key.dsize); + + return k; +} diff --git a/ctdb/common/db_hash.c b/ctdb/common/db_hash.c new file mode 100644 index 0000000..8dd62c4 --- /dev/null +++ b/ctdb/common/db_hash.c @@ -0,0 +1,295 @@ +/* + Using tdb as a hash table + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" + +#include <talloc.h> +#include <tdb.h> + +#include "common/db_hash.h" + +struct db_hash_context { + struct tdb_context *db; +}; + + +static int db_hash_destructor(struct db_hash_context *dh) +{ + if (dh->db != NULL) { + tdb_close(dh->db); + dh->db = NULL; + } + return 0; +} + +int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size, + enum db_hash_type type, struct db_hash_context **result) +{ + struct db_hash_context *dh; + int tdb_flags = TDB_INTERNAL | TDB_DISALLOW_NESTING; + + dh = talloc_zero(mem_ctx, struct db_hash_context); + if (dh == NULL) { + return ENOMEM; + } + + if (type == DB_HASH_COMPLEX) { + tdb_flags |= TDB_INCOMPATIBLE_HASH; + } + + dh->db = tdb_open(name, hash_size, tdb_flags, O_RDWR|O_CREAT, 0); + if (dh->db == NULL) { + talloc_free(dh); + return ENOMEM; + } + + talloc_set_destructor(dh, db_hash_destructor); + *result = dh; + return 0; +} + +static int db_hash_map_tdb_error(struct db_hash_context *dh) +{ + enum TDB_ERROR tdb_err; + int ret; + + tdb_err = tdb_error(dh->db); + switch (tdb_err) { + case TDB_SUCCESS: + ret = 0; break; + case TDB_ERR_OOM: + ret = ENOMEM; break; + case TDB_ERR_EXISTS: + ret = EEXIST; break; + case TDB_ERR_NOEXIST: + ret = ENOENT; break; + case TDB_ERR_EINVAL: + ret = EINVAL; break; + default: + ret = EIO; break; + } + return ret; +} + +int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen) +{ + TDB_DATA key, data; + int ret; + + if (dh == NULL) { + return EINVAL; + } + + key.dptr = keybuf; + key.dsize = keylen; + + data.dptr = databuf; + data.dsize = datalen; + + ret = tdb_store(dh->db, key, data, TDB_INSERT); + if (ret != 0) { + ret = db_hash_map_tdb_error(dh); + } + return ret; +} + +int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen) +{ + TDB_DATA key, data; + int ret; + + if (dh == NULL) { + return EINVAL; + } + + key.dptr = keybuf; + key.dsize = keylen; + + data.dptr = databuf; + data.dsize = datalen; + + ret = tdb_store(dh->db, key, data, TDB_REPLACE); + if (ret != 0) { + ret = db_hash_map_tdb_error(dh); + } + return ret; +} + +int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen) +{ + TDB_DATA key; + int ret; + + key.dptr = keybuf; + key.dsize = keylen; + + if (dh == NULL) { + return EINVAL; + } + + ret = tdb_delete(dh->db, key); + if (ret != 0) { + ret = db_hash_map_tdb_error(dh); + } + return ret; +} + +struct db_hash_fetch_state { + db_hash_record_parser_fn parser; + void *private_data; +}; + +static int db_hash_fetch_parser(TDB_DATA key, TDB_DATA data, void *private_data) +{ + struct db_hash_fetch_state *state = + (struct db_hash_fetch_state *)private_data; + int ret; + + ret = state->parser(key.dptr, key.dsize, data.dptr, data.dsize, + state->private_data); + return ret; +} + +int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + db_hash_record_parser_fn parser, void *private_data) +{ + struct db_hash_fetch_state state; + TDB_DATA key; + int ret; + + if (dh == NULL || parser == NULL) { + return EINVAL; + } + + state.parser = parser; + state.private_data = private_data; + + key.dptr = keybuf; + key.dsize = keylen; + + ret = tdb_parse_record(dh->db, key, db_hash_fetch_parser, &state); + if (ret == -1) { + return ENOENT; + } + return ret; +} + +int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen) +{ + TDB_DATA key; + int ret; + + if (dh == NULL) { + return EINVAL; + } + + key.dptr = keybuf; + key.dsize = keylen; + + ret = tdb_exists(dh->db, key); + if (ret == 1) { + /* Key found */ + ret = 0; + } else { + ret = db_hash_map_tdb_error(dh); + if (ret == 0) { + ret = ENOENT; + } + } + return ret; +} + +struct db_hash_traverse_state { + db_hash_record_parser_fn parser; + void *private_data; +}; + +static int db_hash_traverse_parser(struct tdb_context *tdb, + TDB_DATA key, TDB_DATA data, + void *private_data) +{ + struct db_hash_traverse_state *state = + (struct db_hash_traverse_state *)private_data; + + return state->parser(key.dptr, key.dsize, data.dptr, data.dsize, + state->private_data); +} + +int db_hash_traverse(struct db_hash_context *dh, + db_hash_record_parser_fn parser, void *private_data, + int *count) +{ + struct db_hash_traverse_state state; + int ret; + + if (dh == NULL) { + return EINVAL; + } + + /* Special case, for counting records */ + if (parser == NULL) { + ret = tdb_traverse_read(dh->db, NULL, NULL); + } else { + state.parser = parser; + state.private_data = private_data; + + ret = tdb_traverse_read(dh->db, db_hash_traverse_parser, &state); + } + + if (ret == -1) { + ret = db_hash_map_tdb_error(dh); + } else { + if (count != NULL) { + *count = ret; + } + ret = 0; + } + + return ret; +} + +int db_hash_traverse_update(struct db_hash_context *dh, + db_hash_record_parser_fn parser, + void *private_data, int *count) +{ + struct db_hash_traverse_state state; + int ret; + + if (dh == NULL || parser == NULL) { + return EINVAL; + } + + state.parser = parser; + state.private_data = private_data; + + ret = tdb_traverse(dh->db, db_hash_traverse_parser, &state); + if (ret == -1) { + ret = db_hash_map_tdb_error(dh); + } else { + if (count != NULL) { + *count = ret; + } + ret = 0; + } + + return ret; +} diff --git a/ctdb/common/db_hash.h b/ctdb/common/db_hash.h new file mode 100644 index 0000000..67e2b85 --- /dev/null +++ b/ctdb/common/db_hash.h @@ -0,0 +1,174 @@ +/* + Using tdb as a hash table + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_DB_HASH_H__ +#define __CTDB_DB_HASH_H__ + +#include <talloc.h> +#include <tdb.h> + +/** + * @file db_hash.h + * + * @brief Use tdb database as a hash table + * + * This uses in-memory tdb databases to create a fixed sized hash table. + */ + +/** + * @brief Hash type to indicate the hashing function to use. + * + * DB_HASH_SIMPLE uses default hashing function + * DB_HASH_COMPLEX uses jenkins hashing function + */ +enum db_hash_type { + DB_HASH_SIMPLE, + DB_HASH_COMPLEX, +}; + +/** + * @brief Parser callback function called when fetching a record + * + * This function is called when fetching a record. This function should + * not modify key and data arguments. + * + * The function should return 0 on success and errno on error. + */ +typedef int (*db_hash_record_parser_fn)(uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen, + void *private_data); + +/** + * @brief Abstract structure representing tdb hash table + */ +struct db_hash_context; + +/** + * @brief Initialize tdb hash table + * + * This returns a new tdb hash table context which is a talloc context. Freeing + * this context will free all the memory associated with the hash table. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] name The name for the hash table + * @param[in] hash_size The size of the hash table + * @param[in] type The type of hashing function to use + * @param[out] result The new db_hash_context structure + * @return 0 on success, errno on failure + */ +int db_hash_init(TALLOC_CTX *mem_ctx, const char *name, int hash_size, + enum db_hash_type type, struct db_hash_context **result); + +/** + * @brief Insert a record into the hash table + * + * The key and data can be any binary data. Insert only if the record does not + * exist. If the record already exists, return error. + * + * @param[in] dh The tdb hash table context + * @param[in] keybuf The key buffer + * @param[in] keylen The key length + * @param[in] databuf The data buffer + * @param[in] datalen The data length + * @return 0 on success, errno on failure + */ +int db_hash_insert(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen); + +/** + * @brief Add a record into the hash table + * + * The key and data can be any binary data. If the record does not exist, + * insert the record. If the record already exists, replace the record. + * + * @param[in] dh The tdb hash table context + * @param[in] keybuf The key buffer + * @param[in] keylen The key length + * @param[in] databuf The data buffer + * @param[in] datalen The data length + * @return 0 on success, errno on failure + */ +int db_hash_add(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen); +/** + * @brief Delete a record from the hash table + * + * @param[in] dh The tdb hash table context + * @param[in] keybuf The key buffer + * @param[in] keylen The key length + * @return 0 on success, errno on failure + */ +int db_hash_delete(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen); + +/** + * @brief Fetch a record from the hash table + * + * The key and data can be any binary data. + * + * @param[in] dh The tdb hash table context + * @param[in] keybuf The key buffer + * @param[in] keylen The key length + * @param[in] parser Function called when the matching record is found + * @param[in] private_data Private data to parser function + * @return 0 on success, errno on failure + */ +int db_hash_fetch(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen, + db_hash_record_parser_fn parser, void *private_data); + +/** + * @brief Check if a record exists in the hash table + * + * @param[in] dh The tdb hash table context + * @param[in] keybuf The key buffer + * @param[in] keylen The key length + * @return 0 if the record exists, errno on failure + */ +int db_hash_exists(struct db_hash_context *dh, uint8_t *keybuf, size_t keylen); + +/** + * @brief Traverse the database without modification + * + * The parser function should return non-zero value to stop traverse. + * + * @param[in] dh The tdb hash table context + * @param[in] parser Function called for each record + * @param[in] private_data Private data to parser function + * @param[out] count Number of records traversed + * @return 0 on success, errno on failure + */ +int db_hash_traverse(struct db_hash_context *dh, + db_hash_record_parser_fn parser, void *private_data, + int *count); + +/** + * @brief Traverse the database for modifications + * + * The parser function should return non-zero value to stop traverse. + * + * @param[in] dh The tdb hash table context + * @param[in] parser Function called for each record + * @param[in] private_data Private data to parser function + * @param[out] count Number of records traversed + * @return 0 on success, errno on failure + */ +int db_hash_traverse_update(struct db_hash_context *dh, + db_hash_record_parser_fn parser, + void *private_data, int *count); + +#endif /* __CTDB_DB_HASH_H__ */ diff --git a/ctdb/common/event_script.c b/ctdb/common/event_script.c new file mode 100644 index 0000000..edd607f --- /dev/null +++ b/ctdb/common/event_script.c @@ -0,0 +1,247 @@ +/* + Low level event script handling + + Copyright (C) Amitay Isaacs 2017 + Copyright (C) Martin Schwenke 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/dir.h" +#include "system/glob.h" + +#include <talloc.h> + +#include "common/event_script.h" + +static int script_filter(const struct dirent *de) +{ + int ret; + + /* Match a script pattern */ + ret = fnmatch("[0-9][0-9].*.script", de->d_name, 0); + if (ret == 0) { + return 1; + } + + return 0; +} + +int event_script_get_list(TALLOC_CTX *mem_ctx, + const char *script_dir, + struct event_script_list **out) +{ + struct dirent **namelist = NULL; + struct event_script_list *script_list = NULL; + size_t ds_len; + int count, ret; + int i; + + count = scandir(script_dir, &namelist, script_filter, alphasort); + if (count == -1) { + ret = errno; + goto done; + } + + script_list = talloc_zero(mem_ctx, struct event_script_list); + if (script_list == NULL) { + goto nomem; + } + + if (count == 0) { + ret = 0; + *out = script_list; + goto done; + } + + script_list->num_scripts = count; + script_list->script = talloc_zero_array(script_list, + struct event_script *, + count); + if (script_list->script == NULL) { + goto nomem; + } + + ds_len = strlen(".script"); + for (i = 0; i < count; i++) { + struct event_script *s; + struct stat statbuf; + + s = talloc_zero(script_list->script, struct event_script); + if (s == NULL) { + goto nomem; + } + + script_list->script[i] = s; + + s->name = talloc_strndup(script_list->script, + namelist[i]->d_name, + strlen(namelist[i]->d_name) - ds_len); + if (s->name == NULL) { + goto nomem; + } + + s->path = talloc_asprintf(script_list->script, + "%s/%s", + script_dir, + namelist[i]->d_name); + if (s->path == NULL) { + goto nomem; + } + + ret = stat(s->path, &statbuf); + if (ret == 0) { + /* + * If ret != 0 this is either a dangling + * symlink or it has just disappeared. Either + * way, it isn't executable. See the note + * below about things that have disappeared. + */ + if (statbuf.st_mode & S_IXUSR) { + s->enabled = true; + } + } + } + + *out = script_list; + ret = 0; + goto done; + +nomem: + ret = ENOMEM; + talloc_free(script_list); + +done: + if (namelist != NULL && count != -1) { + for (i=0; i<count; i++) { + free(namelist[i]); + } + free(namelist); + } + + return ret; +} + +int event_script_chmod(const char *script_dir, + const char *script_name, + bool enable) +{ + const char *dot_script = ".script"; + size_t ds_len = strlen(dot_script); + size_t sn_len = strlen(script_name); + DIR *dirp; + struct dirent *de; + char buf[PATH_MAX]; + const char *script_file; + int ret, new_mode; + char filename[PATH_MAX]; + struct stat st; + bool found; + ino_t found_inode; + int fd = -1; + + /* Allow script_name to already have ".script" suffix */ + if (sn_len > ds_len && + strcmp(&script_name[sn_len - ds_len], dot_script) == 0) { + script_file = script_name; + } else { + ret = snprintf(buf, sizeof(buf), "%s.script", script_name); + if (ret < 0 || (size_t)ret >= sizeof(buf)) { + return ENAMETOOLONG; + } + script_file = buf; + } + + dirp = opendir(script_dir); + if (dirp == NULL) { + return errno; + } + + found = false; + while ((de = readdir(dirp)) != NULL) { + if (strcmp(de->d_name, script_file) == 0) { + /* check for valid script names */ + ret = script_filter(de); + if (ret == 0) { + closedir(dirp); + return EINVAL; + } + + found = true; + found_inode = de->d_ino; + break; + } + } + closedir(dirp); + + if (! found) { + return ENOENT; + } + + ret = snprintf(filename, + sizeof(filename), + "%s/%s", + script_dir, + script_file); + if (ret < 0 || (size_t)ret >= sizeof(filename)) { + return ENAMETOOLONG; + } + + fd = open(filename, O_RDWR); + if (fd == -1) { + ret = errno; + goto done; + } + + ret = fstat(fd, &st); + if (ret != 0) { + ret = errno; + goto done; + } + + /* + * If the directory entry inode number doesn't match the one + * returned by fstat() then this is probably a symlink, so the + * caller should not be calling this function. Note that this + * is a cheap sanity check to catch most programming errors. + * This doesn't cost any extra system calls but can still miss + * the unlikely case where the symlink is to a file on a + * different filesystem with the same inode number as the + * symlink. + */ + if (found && found_inode != st.st_ino) { + ret = EINVAL; + goto done; + } + + if (enable) { + new_mode = st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH); + } else { + new_mode = st.st_mode & ~(S_IXUSR | S_IXGRP | S_IXOTH); + } + + ret = fchmod(fd, new_mode); + if (ret != 0) { + ret = errno; + goto done; + } + +done: + if (fd != -1) { + close(fd); + } + return ret; +} diff --git a/ctdb/common/event_script.h b/ctdb/common/event_script.h new file mode 100644 index 0000000..bf5a8fd --- /dev/null +++ b/ctdb/common/event_script.h @@ -0,0 +1,72 @@ +/* + Low level event script handling + + Copyright (C) Amitay Isaacs 2017 + Copyright (C) Martin Schwenke 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SCRIPT_H__ +#define __CTDB_SCRIPT_H__ + +#include "replace.h" +#include "system/filesys.h" + +#include <talloc.h> + +/** + * @file script.h + * + * @brief Script listing and manipulation + */ + + +struct event_script { + char *name; + char *path; + bool enabled; +}; + +struct event_script_list { + unsigned int num_scripts; + struct event_script **script; +}; + + +/** + * @brief Retrieve a list of scripts + * + * @param[in] mem_ctx Talloc memory context + * @param[in] script_dir Directory containing scripts + * @param[out] out List of scripts + * @return 0 on success, errno on failure + */ +int event_script_get_list(TALLOC_CTX *mem_ctx, + const char *script_dir, + struct event_script_list **out); + +/** + * @brief Make a script executable or not executable + * + * @param[in] script_dir Directory containing script + * @param[in] script_name Name of the script to enable + * @param[in] executable True if script should be made executable + * @return 0 on success, errno on failure + */ +int event_script_chmod(const char *script_dir, + const char *script_name, + bool executable); + +#endif /* __CTDB_SCRIPT_H__ */ diff --git a/ctdb/common/hash_count.c b/ctdb/common/hash_count.c new file mode 100644 index 0000000..f845016 --- /dev/null +++ b/ctdb/common/hash_count.c @@ -0,0 +1,219 @@ +/* + Using hash table for counting events + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/time.h" + +#include <tdb.h> + +#include "lib/util/time.h" + +#include "common/db_hash.h" +#include "common/hash_count.h" + +struct hash_count_value { + struct timeval update_time; + uint64_t counter; +}; + +struct hash_count_context { + struct db_hash_context *dh; + struct timeval update_interval; + hash_count_update_handler_fn handler; + void *private_data; +}; + +/* + * Initialise hash count map + */ +int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval update_interval, + hash_count_update_handler_fn handler, void *private_data, + struct hash_count_context **result) +{ + struct hash_count_context *hcount; + int ret; + + if (handler == NULL) { + return EINVAL; + } + + hcount = talloc_zero(mem_ctx, struct hash_count_context); + if (hcount == NULL) { + return ENOMEM; + } + + ret = db_hash_init(hcount, "hash_count_db", 8192, DB_HASH_COMPLEX, + &hcount->dh); + if (ret != 0) { + talloc_free(hcount); + return ret; + } + + hcount->update_interval = update_interval; + hcount->handler = handler; + hcount->private_data = private_data; + + *result = hcount; + return 0; +} + +static int hash_count_fetch_parser(uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen, + void *private_data) +{ + struct hash_count_value *value = + (struct hash_count_value *)private_data; + + if (datalen != sizeof(struct hash_count_value)) { + return EIO; + } + + *value = *(struct hash_count_value *)databuf; + return 0; +} + +static int hash_count_fetch(struct hash_count_context *hcount, TDB_DATA key, + struct hash_count_value *value) +{ + return db_hash_fetch(hcount->dh, key.dptr, key.dsize, + hash_count_fetch_parser, value); +} + +static int hash_count_insert(struct hash_count_context *hcount, TDB_DATA key, + struct hash_count_value *value) +{ + return db_hash_insert(hcount->dh, key.dptr, key.dsize, + (uint8_t *)value, + sizeof(struct hash_count_value)); +} + +static int hash_count_update(struct hash_count_context *hcount, TDB_DATA key, + struct hash_count_value *value) +{ + return db_hash_add(hcount->dh, key.dptr, key.dsize, + (uint8_t *)value, sizeof(struct hash_count_value)); +} + +int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key) +{ + struct hash_count_value value; + struct timeval current_time = timeval_current(); + int ret; + + if (hcount == NULL) { + return EINVAL; + } + + ret = hash_count_fetch(hcount, key, &value); + if (ret == 0) { + struct timeval tmp_t; + + tmp_t = timeval_sum(&value.update_time, + &hcount->update_interval); + if (timeval_compare(¤t_time, &tmp_t) < 0) { + value.counter += 1; + } else { + value.update_time = current_time; + value.counter = 1; + } + + hcount->handler(key, value.counter, hcount->private_data); + ret = hash_count_update(hcount, key, &value); + + } else if (ret == ENOENT) { + value.update_time = current_time; + value.counter = 1; + + hcount->handler(key, value.counter, hcount->private_data); + ret = hash_count_insert(hcount, key, &value); + } + + return ret; +} + +static struct timeval timeval_subtract(const struct timeval *tv1, + const struct timeval *tv2) +{ + struct timeval tv = *tv1; + const unsigned int million = 1000000; + + if (tv.tv_sec > 1) { + tv.tv_sec -= 1; + tv.tv_usec += million; + } else { + return tv; + } + + tv.tv_sec -= tv2->tv_sec; + tv.tv_usec -= tv2->tv_usec; + + tv.tv_sec += tv.tv_usec / million; + tv.tv_usec = tv.tv_usec % million; + + return tv; +} + +struct hash_count_expire_state { + struct db_hash_context *dh; + struct timeval last_time; + int count; +}; + +static int hash_count_expire_parser(uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen, + void *private_data) +{ + struct hash_count_expire_state *state = + (struct hash_count_expire_state *)private_data; + struct hash_count_value *value; + int ret = 0; + + if (datalen != sizeof(struct hash_count_value)) { + return EIO; + } + + value = (struct hash_count_value *)databuf; + if (timeval_compare(&value->update_time, &state->last_time) < 0) { + ret = db_hash_delete(state->dh, keybuf, keylen); + if (ret == 0) { + state->count += 1; + } + } + + return ret; +} + +void hash_count_expire(struct hash_count_context *hcount, int *delete_count) +{ + struct timeval current_time = timeval_current(); + struct hash_count_expire_state state; + + state.dh = hcount->dh; + state.last_time = timeval_subtract(¤t_time, + &hcount->update_interval); + state.count = 0; + + (void) db_hash_traverse_update(hcount->dh, hash_count_expire_parser, + &state, NULL); + + if (delete_count != NULL) { + *delete_count = state.count; + } +} diff --git a/ctdb/common/hash_count.h b/ctdb/common/hash_count.h new file mode 100644 index 0000000..f14c82c --- /dev/null +++ b/ctdb/common/hash_count.h @@ -0,0 +1,94 @@ +/* + Using hash table for counting events + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_HASH_COUNT_H__ +#define __CTDB_HASH_COUNT_H__ + +/** + * @file hash_count.h + * + * @brief Count key-based events for specified interval + * + * This can be used to measure the rate of events based on any interval. + * For example, number of occurrences per second. + */ + +/** + * @brief Handler callback function called when counter is incremented + * + * This function is called every time a counter is incremented for a key. + * The counter argument is the number of times the increment function is + * called during a count interval. + * + * This function should not modify key and data arguments. + */ +typedef void (*hash_count_update_handler_fn)(TDB_DATA key, uint64_t counter, + void *private_data); + +/** + * @brief Abstract structure representing hash based counting + */ +struct hash_count_context; + +/** + * @brief Initialize hash counting + * + * This return a new hash count context which is a talloc context. Freeing + * this context will free all the memory associated with hash count. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] count_interval The time interval for counting events + * @param[in] handler Function called when counter is incremented + * @param[in] private_data Private data to handler function + * @param[out] result The new hash_count structure + * @return 0 on success, errno on failure + */ +int hash_count_init(TALLOC_CTX *mem_ctx, struct timeval count_interval, + hash_count_update_handler_fn handler, void *private_data, + struct hash_count_context **result); + +/** + * @brief Increment a counter for a key + * + * First time this is called for a key, corresponding counter is set to 1 + * and the start time is noted. For all subsequent calls made during the + * count_interval (used in initializing the context) will increment + * corresponding counter for the key. After the count_interval has elapsed, + * the counter will be reset to 1. + * + * @param[in] hcount The hash count context + * @param[in] key The key for which counter is updated + * @return 0 on success, errno on failure + * + * This will result in a callback function being called. + */ +int hash_count_increment(struct hash_count_context *hcount, TDB_DATA key); + +/** + * @brief Remove keys for which count interval has elapsed + * + * This function is used to clean the database of keys for which there are + * no recent events. + * + * @param[in] hcount The hash count context + * @param[out] delete_count The number of keys deleted + */ +void hash_count_expire(struct hash_count_context *hcount, int *delete_count); + +#endif /* __CTDB_HASH_COUNT_H__ */ diff --git a/ctdb/common/line.c b/ctdb/common/line.c new file mode 100644 index 0000000..c4c6726 --- /dev/null +++ b/ctdb/common/line.c @@ -0,0 +1,145 @@ +/* + Line based I/O over fds + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "lib/util/sys_rw.h" + +#include "common/line.h" + +struct line_read_state { + line_process_fn_t callback; + void *private_data; + char *buf; + size_t hint, len, offset; + int num_lines; +}; + +static bool line_read_one(char *buf, size_t start, size_t len, size_t *pos) +{ + size_t i; + + for (i=start; i<len; i++) { + if (buf[i] == '\n' || buf[i] == '\0') { + *pos = i; + return true; + } + } + + return false; +} + +static int line_read_process(struct line_read_state *state) +{ + size_t start = 0; + size_t pos = 0; + + while (1) { + int ret; + bool ok; + + ok = line_read_one(state->buf, start, state->offset, &pos); + if (! ok) { + break; + } + + state->buf[pos] = '\0'; + state->num_lines += 1; + + ret = state->callback(state->buf + start, state->private_data); + if (ret != 0) { + return ret; + } + + start = pos+1; + } + + if (pos > 0) { + if (pos+1 < state->offset) { + memmove(state->buf, + state->buf + pos+1, + state->offset - (pos+1)); + } + state->offset -= (pos+1); + } + + return 0; +} + +int line_read(int fd, + size_t length, + TALLOC_CTX *mem_ctx, + line_process_fn_t callback, + void *private_data, + int *num_lines) +{ + struct line_read_state state; + + if (length < 32) { + length = 32; + } + + state = (struct line_read_state) { + .callback = callback, + .private_data = private_data, + .hint = length, + }; + + while (1) { + ssize_t n; + int ret; + + if (state.offset == state.len) { + state.len += state.hint; + state.buf = talloc_realloc_size(mem_ctx, + state.buf, + state.len); + if (state.buf == NULL) { + return ENOMEM; + } + } + + n = sys_read(fd, + state.buf + state.offset, + state.len - state.offset); + if (n < 0) { + return errno; + } + if (n == 0) { + break; + } + + state.offset += n; + + ret = line_read_process(&state); + if (ret != 0) { + if (num_lines != NULL) { + *num_lines = state.num_lines; + } + return ret; + } + } + + if (num_lines != NULL) { + *num_lines = state.num_lines; + } + return 0; +} diff --git a/ctdb/common/line.h b/ctdb/common/line.h new file mode 100644 index 0000000..6b67f1e --- /dev/null +++ b/ctdb/common/line.h @@ -0,0 +1,62 @@ +/* + Line based I/O over fds + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_LINE_H__ +#define __CTDB_LINE_H__ + +#include <talloc.h> + +/** + * @file line.h + * + * @brief Line based I/O over pipes and sockets + */ + +/** + * @brief The callback routine called to process a line + * + * @param[in] line The line read + * @param[in] private_data Private data for callback + * @return 0 to continue processing lines, non-zero to stop reading + */ +typedef int (*line_process_fn_t)(char *line, void *private_data); + +/** + * @brief Read a line (terminated by \n or \0) + * + * If there is any read error on fd, then errno will be returned. + * If callback function returns a non-zero value, then that value will be + * returned. + * + * @param[in] fd The file descriptor + * @param[in] length The expected length of a line (this is only a hint) + * @param[in] mem_ctx Talloc memory context + * @param[in] callback Callback function called when a line is read + * @param[in] private_data Private data for callback + * @param[out] num_lines Number of lines read so far + * @return 0 on on success, errno on failure + */ +int line_read(int fd, + size_t length, + TALLOC_CTX *mem_ctx, + line_process_fn_t callback, + void *private_data, + int *num_lines); + +#endif /* __CTDB_LINE_H__ */ diff --git a/ctdb/common/logging.c b/ctdb/common/logging.c new file mode 100644 index 0000000..3aa5ca9 --- /dev/null +++ b/ctdb/common/logging.c @@ -0,0 +1,745 @@ +/* + Logging utilities + + Copyright (C) Andrew Tridgell 2008 + Copyright (C) Martin Schwenke 2014 + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" +#include "system/locale.h" +#include "system/time.h" +#include "system/filesys.h" +#include "system/syslog.h" +#include "system/dir.h" + +#include "lib/util/time_basic.h" +#include "lib/util/sys_rw.h" +#include "lib/util/debug.h" +#include "lib/util/blocking.h" +#include "lib/util/samba_util.h" /* get_myname() */ + +#include "common/logging.h" + +struct { + int log_level; + const char *log_string; +} log_string_map[] = { + { DEBUG_ERR, "ERROR" }, + { DEBUG_WARNING, "WARNING" }, + { 2, "WARNING" }, + { DEBUG_NOTICE, "NOTICE" }, + { 4, "NOTICE" }, + { DEBUG_INFO, "INFO" }, + { 6, "INFO" }, + { 7, "INFO" }, + { 8, "INFO" }, + { 9, "INFO" }, + { DEBUG_DEBUG, "DEBUG" }, +}; + +bool debug_level_parse(const char *log_string, int *log_level) +{ + size_t i; + + if (log_string == NULL) { + return false; + } + + if (isdigit(log_string[0])) { + int level = atoi(log_string); + + if (level >= 0 && (size_t)level < ARRAY_SIZE(log_string_map)) { + *log_level = level; + return true; + } + return false; + } + + for (i=0; i<ARRAY_SIZE(log_string_map); i++) { + if (strncasecmp(log_string_map[i].log_string, + log_string, strlen(log_string)) == 0) { + *log_level = log_string_map[i].log_level; + return true; + } + } + + return false; +} + +const char *debug_level_to_string(int log_level) +{ + size_t i; + + for (i=0; i < ARRAY_SIZE(log_string_map); i++) { + if (log_string_map[i].log_level == log_level) { + return log_string_map[i].log_string; + } + } + return "UNKNOWN"; +} + +int debug_level_from_string(const char *log_string) +{ + bool found; + int log_level; + + found = debug_level_parse(log_string, &log_level); + if (found) { + return log_level; + } + + /* Default debug level */ + return DEBUG_ERR; +} + +/* + * file logging backend + */ + +static bool file_log_validate(const char *option) +{ + char *t, *dir; + struct stat st; + int ret; + + if (option == NULL || strcmp(option, "-") == 0) { + return true; + } + + t = strdup(option); + if (t == NULL) { + return false; + } + + dir = dirname(t); + + ret = stat(dir, &st); + free(t); + if (ret != 0) { + return false; + } + + if (! S_ISDIR(st.st_mode)) { + return false; + } + + return true; +} + +static int file_log_setup(TALLOC_CTX *mem_ctx, + const char *option, + const char *app_name) +{ + struct debug_settings settings = { + .debug_syslog_format = true, + .debug_hires_timestamp = true, + .debug_no_stderr_redirect = true, + }; + const char *t = NULL; + + if (option == NULL || strcmp(option, "-") == 0) { + /* + * Logging to stderr is the default and has already + * been done in logging init + */ + return 0; + } + + /* + * Support logging of fake hostname in local daemons. This + * hostname is basename(getenv(CTDB_BASE)). + */ + t = getenv("CTDB_TEST_MODE"); + if (t != NULL) { + t = getenv("CTDB_BASE"); + if (t != NULL) { + const char *p = strrchr(t, '/'); + if (p != NULL) { + p++; + if (p[0] == '\0') { + p = "unknown"; + } + } else { + p = t; + } + + debug_set_hostname(p); + } + } + + debug_set_settings(&settings, "file", 0, false); + debug_set_logfile(option); + setup_logging(app_name, DEBUG_FILE); + + return 0; +} + +/* + * syslog logging backend + */ + +/* Copied from lib/util/debug.c */ +static int debug_level_to_priority(int level) +{ + /* + * map debug levels to syslog() priorities + */ + static const int priority_map[] = { + LOG_ERR, /* 0 */ + LOG_WARNING, /* 1 */ + LOG_NOTICE, /* 2 */ + LOG_NOTICE, /* 3 */ + LOG_NOTICE, /* 4 */ + LOG_NOTICE, /* 5 */ + LOG_INFO, /* 6 */ + LOG_INFO, /* 7 */ + LOG_INFO, /* 8 */ + LOG_INFO, /* 9 */ + }; + int priority; + + if ((size_t)level >= ARRAY_SIZE(priority_map) || level < 0) { + priority = LOG_DEBUG; + } else { + priority = priority_map[level]; + } + return priority; +} + +struct syslog_log_state { + int fd; + const char *app_name; + const char *hostname; + int (*format)(int dbglevel, struct syslog_log_state *state, + const char *str, char *buf, int bsize); + /* RFC3164 says: The total length of the packet MUST be 1024 + bytes or less. */ + char buffer[1024]; + unsigned int dropped_count; +}; + +/* Format messages as per RFC3164 + * + * It appears that some syslog daemon implementations do not allow a + * hostname when messages are sent via a Unix domain socket, so omit + * it. Similarly, syslogd on FreeBSD does not understand the hostname + * part of the header, even when logging via UDP. Note that most + * implementations will log messages against "localhost" when logging + * via UDP. A timestamp could be sent but rsyslogd on Linux limits + * the timestamp logged to the precision that was received on + * /dev/log. It seems sane to send degenerate RFC3164 messages + * without a header at all, so that the daemon will generate high + * resolution timestamps if configured. + */ +static int format_rfc3164(int dbglevel, struct syslog_log_state *state, + const char *str, char *buf, int bsize) +{ + int pri; + int len; + + pri = LOG_DAEMON | debug_level_to_priority(dbglevel); + len = snprintf(buf, bsize, "<%d>%s[%u]: %s", + pri, state->app_name, getpid(), str); + buf[bsize-1] = '\0'; + len = MIN(len, bsize - 1); + + return len; +} + +/* Format messages as per RFC5424 + * + * <165>1 2003-08-24T05:14:15.000003-07:00 192.0.2.1 + * myproc 8710 - - %% It's time to make the do-nuts. + */ +static int format_rfc5424(int dbglevel, struct syslog_log_state *state, + const char *str, char *buf, int bsize) +{ + int pri; + struct timeval tv; + struct timeval_buf tvbuf; + int len, s; + + /* Header */ + pri = LOG_DAEMON | debug_level_to_priority(dbglevel); + GetTimeOfDay(&tv); + len = snprintf(buf, bsize, + "<%d>1 %s %s %s %u - - ", + pri, timeval_str_buf(&tv, true, true, &tvbuf), + state->hostname, state->app_name, getpid()); + /* A truncated header is not useful... */ + if (len >= bsize) { + return -1; + } + + /* Message */ + s = snprintf(&buf[len], bsize - len, "%s", str); + buf[bsize-1] = '\0'; + len = MIN(len + s, bsize - 1); + + return len; +} + +static void syslog_log(void *private_data, int level, const char *msg) +{ + syslog(debug_level_to_priority(level), "%s", msg); +} + +static int syslog_log_sock_maybe(struct syslog_log_state *state, + int level, const char *msg) +{ + int n; + ssize_t ret; + + n = state->format(level, state, msg, state->buffer, + sizeof(state->buffer)); + if (n == -1) { + return E2BIG; + } + + do { + ret = write(state->fd, state->buffer, n); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + return errno; + } + + return 0; + +} +static void syslog_log_sock(void *private_data, int level, const char *msg) +{ + struct syslog_log_state *state = talloc_get_type_abort( + private_data, struct syslog_log_state); + int ret; + + if (state->dropped_count > 0) { + char t[64] = { 0 }; + snprintf(t, sizeof(t), + "[Dropped %u log messages]\n", + state->dropped_count); + t[sizeof(t)-1] = '\0'; + ret = syslog_log_sock_maybe(state, level, t); + if (ret == EAGAIN || ret == EWOULDBLOCK) { + state->dropped_count++; + /* + * If above failed then actually drop the + * message that would be logged below, since + * it would have been dropped anyway and it is + * also likely to fail. Falling through and + * attempting to log the message also means + * that the dropped message count will be + * logged out of order. + */ + return; + } + if (ret != 0) { + /* Silent failure on any other error */ + return; + } + state->dropped_count = 0; + } + + ret = syslog_log_sock_maybe(state, level, msg); + if (ret == EAGAIN || ret == EWOULDBLOCK) { + state->dropped_count++; + } +} + +static int syslog_log_setup_syslog(TALLOC_CTX *mem_ctx, const char *app_name) +{ + openlog(app_name, LOG_PID, LOG_DAEMON); + + debug_set_callback(NULL, syslog_log); + + return 0; +} + +static int syslog_log_state_destructor(struct syslog_log_state *state) +{ + if (state->fd != -1) { + close(state->fd); + state->fd = -1; + } + return 0; +} + +static int syslog_log_setup_common(TALLOC_CTX *mem_ctx, const char *app_name, + struct syslog_log_state **result) +{ + struct syslog_log_state *state; + + state = talloc_zero(mem_ctx, struct syslog_log_state); + if (state == NULL) { + return ENOMEM; + } + + state->fd = -1; + state->app_name = app_name; + talloc_set_destructor(state, syslog_log_state_destructor); + + *result = state; + return 0; +} + +#ifdef _PATH_LOG +static int syslog_log_setup_nonblocking(TALLOC_CTX *mem_ctx, + const char *app_name) +{ + struct syslog_log_state *state = NULL; + struct sockaddr_un dest; + int ret; + + ret = syslog_log_setup_common(mem_ctx, app_name, &state); + if (ret != 0) { + return ret; + } + + state->fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (state->fd == -1) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + dest.sun_family = AF_UNIX; + strncpy(dest.sun_path, _PATH_LOG, sizeof(dest.sun_path)-1); + ret = connect(state->fd, + (struct sockaddr *)&dest, sizeof(dest)); + if (ret == -1) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + ret = set_blocking(state->fd, false); + if (ret != 0) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + if (! set_close_on_exec(state->fd)) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + state->hostname = NULL; /* Make this explicit */ + state->format = format_rfc3164; + + debug_set_callback(state, syslog_log_sock); + + return 0; +} +#endif /* _PATH_LOG */ + +static int syslog_log_setup_udp(TALLOC_CTX *mem_ctx, const char *app_name, + bool rfc5424) +{ + struct syslog_log_state *state = NULL; + struct sockaddr_in dest; + int ret; + + ret = syslog_log_setup_common(mem_ctx, app_name, &state); + if (ret != 0) { + return ret; + } + + state->fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (state->fd == -1) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + dest.sin_family = AF_INET; + dest.sin_port = htons(514); + dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + ret = connect(state->fd, + (struct sockaddr *)&dest, sizeof(dest)); + if (ret == -1) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + if (! set_close_on_exec(state->fd)) { + int save_errno = errno; + talloc_free(state); + return save_errno; + } + + state->hostname = get_myname(state); + if (state->hostname == NULL) { + /* Use a fallback instead of failing initialisation */ + state->hostname = "localhost"; + } + if (rfc5424) { + state->format = format_rfc5424; + } else { + state->format = format_rfc3164; + } + + debug_set_callback(state, syslog_log_sock); + + return 0; +} + +static bool syslog_log_validate(const char *option) +{ + if (option == NULL) { + return true; +#ifdef _PATH_LOG + } else if (strcmp(option, "nonblocking") == 0) { + return true; +#endif + } else if (strcmp(option, "udp") == 0) { + return true; + } else if (strcmp(option, "udp-rfc5424") == 0) { + return true; + } + + return false; +} + +static int syslog_log_setup(TALLOC_CTX *mem_ctx, const char *option, + const char *app_name) +{ + if (option == NULL) { + return syslog_log_setup_syslog(mem_ctx, app_name); +#ifdef _PATH_LOG + } else if (strcmp(option, "nonblocking") == 0) { + return syslog_log_setup_nonblocking(mem_ctx, app_name); +#endif + } else if (strcmp(option, "udp") == 0) { + return syslog_log_setup_udp(mem_ctx, app_name, false); + } else if (strcmp(option, "udp-rfc5424") == 0) { + return syslog_log_setup_udp(mem_ctx, app_name, true); + } + + return EINVAL; +} + +struct log_backend { + const char *name; + bool (*validate)(const char *option); + int (*setup)(TALLOC_CTX *mem_ctx, + const char *option, + const char *app_name); +}; + +static struct log_backend log_backend[] = { + { + .name = "file", + .validate = file_log_validate, + .setup = file_log_setup, + }, + { + .name = "syslog", + .validate = syslog_log_validate, + .setup = syslog_log_setup, + }, +}; + +static int log_backend_parse(TALLOC_CTX *mem_ctx, + const char *logging, + struct log_backend **backend, + char **backend_option) +{ + struct log_backend *b = NULL; + char *t, *name, *option; + size_t i; + + t = talloc_strdup(mem_ctx, logging); + if (t == NULL) { + return ENOMEM; + } + + name = strtok(t, ":"); + if (name == NULL) { + talloc_free(t); + return EINVAL; + } + option = strtok(NULL, ":"); + + for (i=0; i<ARRAY_SIZE(log_backend); i++) { + if (strcmp(log_backend[i].name, name) == 0) { + b = &log_backend[i]; + } + } + + if (b == NULL) { + talloc_free(t); + return ENOENT; + } + + *backend = b; + if (option != NULL) { + *backend_option = talloc_strdup(mem_ctx, option); + if (*backend_option == NULL) { + talloc_free(t); + return ENOMEM; + } + } else { + *backend_option = NULL; + } + + talloc_free(t); + return 0; +} + +bool logging_validate(const char *logging) +{ + TALLOC_CTX *tmp_ctx; + struct log_backend *backend; + char *option; + int ret; + bool status; + + tmp_ctx = talloc_new(NULL); + if (tmp_ctx == NULL) { + return false; + } + + ret = log_backend_parse(tmp_ctx, logging, &backend, &option); + if (ret != 0) { + talloc_free(tmp_ctx); + return false; + } + + status = backend->validate(option); + talloc_free(tmp_ctx); + return status; +} + +/* Initialise logging */ +int logging_init(TALLOC_CTX *mem_ctx, const char *logging, + const char *debug_level, const char *app_name) +{ + struct log_backend *backend = NULL; + char *option = NULL; + int level; + int ret; + + setup_logging(app_name, DEBUG_DEFAULT_STDERR); + + if (debug_level == NULL) { + debug_level = getenv("CTDB_DEBUGLEVEL"); + } + if (! debug_level_parse(debug_level, &level)) { + return EINVAL; + } + debuglevel_set(level); + + if (logging == NULL) { + logging = getenv("CTDB_LOGGING"); + } + if (logging == NULL || logging[0] == '\0') { + return EINVAL; + } + + ret = log_backend_parse(mem_ctx, logging, &backend, &option); + if (ret != 0) { + if (ret == ENOENT) { + fprintf(stderr, "Invalid logging option \'%s\'\n", + logging); + } + talloc_free(option); + return ret; + } + + ret = backend->setup(mem_ctx, option, app_name); + talloc_free(option); + return ret; +} + +bool logging_reopen_logs(void) +{ + bool status; + + status = reopen_logs_internal(); + + return status; +} + +struct logging_reopen_logs_data { + void (*hook)(void *private_data); + void *private_data; +}; + +static void logging_sig_hup_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, + int count, + void *dont_care, + void *private_data) +{ + bool status; + + if (private_data != NULL) { + struct logging_reopen_logs_data *data = talloc_get_type_abort( + private_data, struct logging_reopen_logs_data); + + if (data->hook != NULL) { + data->hook(data->private_data); + } + } + + status = logging_reopen_logs(); + if (!status) { + D_WARNING("Failed to reopen logs\n"); + return; + } + + D_NOTICE("Reopened logs\n"); + +} + +bool logging_setup_sighup_handler(struct tevent_context *ev, + TALLOC_CTX *talloc_ctx, + void (*hook)(void *private_data), + void *private_data) +{ + struct logging_reopen_logs_data *data = NULL; + struct tevent_signal *se; + + if (hook != NULL) { + data = talloc(talloc_ctx, struct logging_reopen_logs_data); + if (data == NULL) { + return false; + } + + data->hook = hook; + data->private_data = private_data; + } + + + se = tevent_add_signal(ev, + talloc_ctx, + SIGHUP, + 0, + logging_sig_hup_handler, + data); + if (se == NULL) { + talloc_free(data); + return false; + } + + return true; +} diff --git a/ctdb/common/logging.h b/ctdb/common/logging.h new file mode 100644 index 0000000..542b4a3 --- /dev/null +++ b/ctdb/common/logging.h @@ -0,0 +1,51 @@ +/* + Logging utilities + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_LOGGING_H__ +#define __CTDB_LOGGING_H__ + +#include <talloc.h> +#include <tevent.h> +#include "lib/util/debug.h" + +#define DEBUG_ERR DBGLVL_ERR +#define DEBUG_WARNING DBGLVL_WARNING +#define DEBUG_NOTICE DBGLVL_NOTICE +#define DEBUG_INFO DBGLVL_INFO +#define DEBUG_DEBUG DBGLVL_DEBUG + +/* These are used in many places, so define them here to avoid churn */ +#define DEBUG_ALERT DEBUG_ERR +#define DEBUG_CRIT DEBUG_ERR + +bool debug_level_parse(const char *log_string, int *log_level); +const char *debug_level_to_string(int log_level); +int debug_level_from_string(const char *log_string); + +bool logging_validate(const char *logging); +int logging_init(TALLOC_CTX *mem_ctx, const char *logging, + const char *debuglevel, const char *app_name); + +bool logging_reopen_logs(void); +bool logging_setup_sighup_handler(struct tevent_context *ev, + TALLOC_CTX *talloc_ctx, + void (*hook)(void *private_data), + void *private_data); + +#endif /* __CTDB_LOGGING_H__ */ diff --git a/ctdb/common/logging_conf.c b/ctdb/common/logging_conf.c new file mode 100644 index 0000000..1cd929e --- /dev/null +++ b/ctdb/common/logging_conf.c @@ -0,0 +1,127 @@ +/* + CTDB logging config handling + + Copyright (C) Martin Schwenke 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "common/conf.h" +#include "common/logging.h" +#include "common/logging_conf.h" + +#define LOGGING_LOCATION_DEFAULT "file:" LOGDIR "/log.ctdb" +#define LOGGING_LOG_LEVEL_DEFAULT "ERROR" + +static bool logging_conf_validate_log_level(const char *key, + const char *old_loglevel, + const char *new_loglevel, + enum conf_update_mode mode) +{ + int log_level; + bool ok; + + ok = debug_level_parse(new_loglevel, &log_level); + if (!ok) { + return false; + } + + return true; +} + +static bool logging_conf_validate_location(const char *key, + const char *old_location, + const char *new_location, + enum conf_update_mode mode) +{ + bool ok; + + ok = logging_validate(new_location); + if (!ok) { + return false; + } + + if (mode == CONF_MODE_RELOAD && + strcmp(old_location, new_location) != 0) { + D_WARNING("Ignoring update of %s config option \"%s\"\n", + LOGGING_CONF_SECTION, key); + return false; + } + + return true; +} + +void logging_conf_init(struct conf_context *conf, + const char *default_log_level) +{ + const char *log_level; + + log_level = (default_log_level == NULL) ? + LOGGING_LOG_LEVEL_DEFAULT : + default_log_level; + + conf_define_section(conf, LOGGING_CONF_SECTION, NULL); + + conf_define_string(conf, + LOGGING_CONF_SECTION, + LOGGING_CONF_LOCATION, + LOGGING_LOCATION_DEFAULT, + logging_conf_validate_location); + + conf_define_string(conf, + LOGGING_CONF_SECTION, + LOGGING_CONF_LOG_LEVEL, + log_level, + logging_conf_validate_log_level); +} + +const char *logging_conf_location(struct conf_context *conf) +{ + const char *out = NULL; + int ret; + + ret = conf_get_string(conf, + LOGGING_CONF_SECTION, + LOGGING_CONF_LOCATION, + &out, + NULL); + if (ret != 0) { + /* Can't really happen, but return default */ + return LOGGING_LOCATION_DEFAULT; + } + + return out; +} + +const char *logging_conf_log_level(struct conf_context *conf) +{ + const char *out = NULL; + int ret; + + ret = conf_get_string(conf, + LOGGING_CONF_SECTION, + LOGGING_CONF_LOG_LEVEL, + &out, + NULL); + if (ret != 0) { + /* Can't really happen, but return default */ + return LOGGING_LOG_LEVEL_DEFAULT; + } + + return out; +} diff --git a/ctdb/common/logging_conf.h b/ctdb/common/logging_conf.h new file mode 100644 index 0000000..fab478d --- /dev/null +++ b/ctdb/common/logging_conf.h @@ -0,0 +1,36 @@ +/* + CTDB logging config handling + + Copyright (C) Martin Schwenke 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __LOGGING_CONF_H__ +#define __LOGGING_CONF_H__ + +#include "common/conf.h" + +#define LOGGING_CONF_SECTION "logging" + +#define LOGGING_CONF_LOCATION "location" +#define LOGGING_CONF_LOG_LEVEL "log level" + +void logging_conf_init(struct conf_context *conf, + const char *default_log_level); + +const char *logging_conf_location(struct conf_context *conf); +const char *logging_conf_log_level(struct conf_context *conf); + +#endif /* __LOGGING_CONF_H__ */ diff --git a/ctdb/common/path.c b/ctdb/common/path.c new file mode 100644 index 0000000..ea3b08f --- /dev/null +++ b/ctdb/common/path.c @@ -0,0 +1,211 @@ +/* + Construct runtime paths + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" + +#include "lib/util/debug.h" + +#include "common/path.h" + +#define CTDB_CONFIG_FILE "ctdb.conf" + +struct { + char *basedir; + char datadir[PATH_MAX]; + char etcdir[PATH_MAX]; + char rundir[PATH_MAX]; + char vardir[PATH_MAX]; + bool test_mode; + bool basedir_set; + bool datadir_set; + bool etcdir_set; + bool rundir_set; + bool vardir_set; +} ctdb_paths = { + .datadir = CTDB_DATADIR, + .etcdir = CTDB_ETCDIR, + .rundir = CTDB_RUNDIR, + .vardir = CTDB_VARDIR, +}; + +static void path_set_basedir(void) +{ + const char *t; + + t = getenv("CTDB_TEST_MODE"); + if (t == NULL) { + goto done; + } + + ctdb_paths.test_mode = true; + + ctdb_paths.basedir = getenv("CTDB_BASE"); + if (ctdb_paths.basedir == NULL) { + D_ERR("Broken CTDB setup, CTDB_BASE not set in test mode\n"); + abort(); + } + +done: + ctdb_paths.basedir_set = true; +} + +static bool path_construct(char *path, const char *subdir) +{ + char p[PATH_MAX]; + int len; + + if (! ctdb_paths.basedir_set) { + path_set_basedir(); + } + + if (! ctdb_paths.test_mode) { + return true; + } + + if (subdir == NULL) { + len = snprintf(p, sizeof(p), "%s", ctdb_paths.basedir); + } else { + len = snprintf(p, + sizeof(p), + "%s/%s", + ctdb_paths.basedir, + subdir); + } + + if ((size_t)len >= sizeof(p)) { + return false; + } + + strncpy(path, p, PATH_MAX); + return true; +} + +const char *path_datadir(void) +{ + bool ok; + + if (! ctdb_paths.datadir_set) { + ok = path_construct(ctdb_paths.datadir, "share"); + if (!ok) { + D_ERR("Failed to construct DATADIR\n"); + } else { + ctdb_paths.datadir_set = true; + } + } + + return ctdb_paths.datadir; +} + +const char *path_etcdir(void) +{ + bool ok; + + if (! ctdb_paths.etcdir_set) { + ok = path_construct(ctdb_paths.etcdir, NULL); + if (!ok) { + D_ERR("Failed to construct ETCDIR\n"); + } else { + ctdb_paths.etcdir_set = true; + } + } + + return ctdb_paths.etcdir; +} + +const char *path_rundir(void) +{ + bool ok; + + if (! ctdb_paths.rundir_set) { + ok = path_construct(ctdb_paths.rundir, "run"); + if (!ok) { + D_ERR("Failed to construct RUNDIR\n"); + } else { + ctdb_paths.rundir_set = true; + } + } + + return ctdb_paths.rundir; +} + +const char *path_vardir(void) +{ + bool ok; + + if (! ctdb_paths.vardir_set) { + ok = path_construct(ctdb_paths.vardir, "var"); + if (!ok) { + D_ERR("Failed to construct VARDIR\n"); + } else { + ctdb_paths.vardir_set = true; + } + } + + return ctdb_paths.vardir; +} + +char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path) +{ + return talloc_asprintf(mem_ctx, "%s/%s", path_datadir(), path); +} + +char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path) +{ + return talloc_asprintf(mem_ctx, "%s/%s", path_etcdir(), path); +} + +char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path) +{ + return talloc_asprintf(mem_ctx, "%s/%s", path_rundir(), path); +} + +char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path) +{ + return talloc_asprintf(mem_ctx, "%s/%s", path_vardir(), path); +} + +char *path_config(TALLOC_CTX *mem_ctx) +{ + return path_etcdir_append(mem_ctx, CTDB_CONFIG_FILE); +} + +char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon) +{ + if (strcmp(daemon, "ctdbd") == 0) { + const char *t = getenv("CTDB_SOCKET"); + + if (t != NULL) { + return talloc_strdup(mem_ctx, t); + } + } + + return talloc_asprintf(mem_ctx, + "%s/%s.socket", + path_rundir(), + daemon); +} + +char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon) +{ + return talloc_asprintf(mem_ctx, + "%s/%s.pid", + path_rundir(), + daemon); +} diff --git a/ctdb/common/path.h b/ctdb/common/path.h new file mode 100644 index 0000000..dcc6c20 --- /dev/null +++ b/ctdb/common/path.h @@ -0,0 +1,39 @@ +/* + Construct runtime paths + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_PATH_H__ +#define __CTDB_PATH_H__ + +#include <talloc.h> + +const char *path_datadir(void); +const char *path_etcdir(void); +const char *path_rundir(void); +const char *path_vardir(void); + +char *path_datadir_append(TALLOC_CTX *mem_ctx, const char *path); +char *path_etcdir_append(TALLOC_CTX *mem_ctx, const char *path); +char *path_rundir_append(TALLOC_CTX *mem_ctx, const char *path); +char *path_vardir_append(TALLOC_CTX *mem_ctx, const char *path); + +char *path_config(TALLOC_CTX *mem_ctx); +char *path_socket(TALLOC_CTX *mem_ctx, const char *daemon); +char *path_pidfile(TALLOC_CTX *mem_ctx, const char *daemon); + +#endif /* __CTDB_PATH_H__ */ diff --git a/ctdb/common/path_tool.c b/ctdb/common/path_tool.c new file mode 100644 index 0000000..44d29b6 --- /dev/null +++ b/ctdb/common/path_tool.c @@ -0,0 +1,384 @@ +/* + path tool + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "lib/util/debug.h" + +#include "common/logging.h" +#include "common/cmdline.h" +#include "common/path.h" +#include "common/path_tool.h" + +struct path_tool_context { + struct cmdline_context *cmdline; +}; + +static int path_tool_config(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "config"); + return EINVAL; + } + + printf("%s\n", path_config(mem_ctx)); + + return 0; +} + +static int path_tool_pidfile(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "pidfile"); + return EINVAL; + } + + p = path_pidfile(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +static int path_tool_socket(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "socket"); + return EINVAL; + } + + p = path_socket(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +static int path_tool_datadir(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "datadir"); + return EINVAL; + } + + printf("%s\n", path_datadir()); + + return 0; +} + +static int path_tool_datadir_append(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "datadir append"); + return EINVAL; + } + + p = path_datadir_append(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +static int path_tool_etcdir(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "etcdir"); + return EINVAL; + } + + printf("%s\n", path_etcdir()); + + return 0; +} + +static int path_tool_etcdir_append(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "etcdir append"); + return EINVAL; + } + + p = path_etcdir_append(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +static int path_tool_rundir(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "rundir"); + return EINVAL; + } + + printf("%s\n", path_rundir()); + + return 0; +} + +static int path_tool_rundir_append(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "rundir append"); + return EINVAL; + } + + p = path_rundir_append(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +static int path_tool_vardir(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + + if (argc != 0) { + cmdline_usage(ctx->cmdline, "vardir"); + return EINVAL; + } + + printf("%s\n", path_vardir()); + + return 0; +} + +static int path_tool_vardir_append(TALLOC_CTX *mem_ctx, + int argc, + const char **argv, + void *private_data) +{ + struct path_tool_context *ctx = talloc_get_type_abort( + private_data, struct path_tool_context); + char *p; + + if (argc != 1) { + cmdline_usage(ctx->cmdline, "vardir append"); + return EINVAL; + } + + p = path_vardir_append(mem_ctx, argv[0]); + if (p == NULL) { + D_ERR("Memory allocation error\n"); + return 1; + } + + printf("%s\n", p); + + return 0; +} + +struct cmdline_command path_commands[] = { + { "config", path_tool_config, + "Get path of CTDB config file", NULL }, + { "pidfile", path_tool_pidfile, + "Get path of CTDB daemon pidfile", "<daemon>" }, + { "socket", path_tool_socket, + "Get path of CTDB daemon socket", "<daemon>" }, + { "datadir append", path_tool_datadir_append, + "Get path relative to CTDB DATADIR", "<path>" }, + { "datadir", path_tool_datadir, + "Get path of CTDB DATADIR", NULL }, + { "etcdir append", path_tool_etcdir_append, + "Get path relative to CTDB ETCDIR", "<path>" }, + { "etcdir", path_tool_etcdir, + "Get path of CTDB ETCDIR", NULL }, + { "rundir append", path_tool_rundir_append, + "Get path relative to CTDB RUNDIR", "<path>" }, + { "rundir", path_tool_rundir, + "Get path of CTDB RUNDIR", NULL }, + { "vardir append", path_tool_vardir_append, + "Get path relative to CTDB VARDIR", "<path>" }, + { "vardir", path_tool_vardir, + "Get path of CTDB VARDIR", NULL }, + CMDLINE_TABLEEND +}; + +int path_tool_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + int argc, + const char **argv, + bool parse_options, + struct path_tool_context **result) +{ + struct path_tool_context *ctx; + int ret; + + ctx = talloc_zero(mem_ctx, struct path_tool_context); + if (ctx == NULL) { + D_ERR("Memory allocation error\n"); + return ENOMEM; + } + + ret = cmdline_init(ctx, + prog, + options, + NULL, + path_commands, + &ctx->cmdline); + if (ret != 0) { + D_ERR("Failed to initialize cmdline, ret=%d\n", ret); + talloc_free(ctx); + return ret; + } + + ret = cmdline_parse(ctx->cmdline, argc, argv, parse_options); + if (ret != 0) { + cmdline_usage(ctx->cmdline, NULL); + talloc_free(ctx); + return ret; + } + + *result = ctx; + return 0; +} + +int path_tool_run(struct path_tool_context *ctx, int *result) +{ + return cmdline_run(ctx->cmdline, ctx, result); +} + +#ifdef CTDB_PATH_TOOL + +int main(int argc, const char **argv) +{ + TALLOC_CTX *mem_ctx; + struct path_tool_context *ctx; + int ret, result; + + mem_ctx = talloc_new(NULL); + if (mem_ctx == NULL) { + fprintf(stderr, "Memory allocation error\n"); + exit(1); + } + + ret = path_tool_init(mem_ctx, + "ctdb-path", + NULL, + argc, + argv, + true, + &ctx); + if (ret != 0) { + talloc_free(mem_ctx); + exit(1); + } + + setup_logging("ctdb-path", DEBUG_STDERR); + debuglevel_set(DEBUG_ERR); + + ret = path_tool_run(ctx, &result); + if (ret != 0) { + result = 1; + } + + talloc_free(mem_ctx); + exit(result); +} + +#endif /* CTDB_PATH_TOOL */ diff --git a/ctdb/common/path_tool.h b/ctdb/common/path_tool.h new file mode 100644 index 0000000..bc6ea62 --- /dev/null +++ b/ctdb/common/path_tool.h @@ -0,0 +1,38 @@ +/* + path tool + + Copyright (C) Amitay Isaacs 2018 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_PATH_TOOL__ +#define __CTDB_PATH_TOOL__ + +#include <talloc.h> +#include <popt.h> + +struct path_tool_context; + +int path_tool_init(TALLOC_CTX *mem_ctx, + const char *prog, + struct poptOption *options, + int argc, + const char **argv, + bool parse_options, + struct path_tool_context **result); + +int path_tool_run(struct path_tool_context *ctx, int *result); + +#endif /* __CTDB_PATH_TOOL__ */ diff --git a/ctdb/common/pidfile.c b/ctdb/common/pidfile.c new file mode 100644 index 0000000..47589f4 --- /dev/null +++ b/ctdb/common/pidfile.c @@ -0,0 +1,85 @@ +/* + Create and remove pidfile + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" + +#include <talloc.h> + +#include "lib/util/blocking.h" +#include "lib/util/pidfile.h" + +#include "common/pidfile.h" + +struct pidfile_context { + const char *pidfile; + int fd; + pid_t pid; +}; + +static int pidfile_context_destructor(struct pidfile_context *pid_ctx); + +int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile, + struct pidfile_context **result) +{ + struct pidfile_context *pid_ctx; + int fd, ret = 0; + + pid_ctx = talloc_zero(mem_ctx, struct pidfile_context); + if (pid_ctx == NULL) { + return ENOMEM; + } + + pid_ctx->pidfile = talloc_strdup(pid_ctx, pidfile); + if (pid_ctx->pidfile == NULL) { + ret = ENOMEM; + goto fail; + } + + pid_ctx->pid = getpid(); + + ret = pidfile_path_create(pid_ctx->pidfile, &fd, NULL); + if (ret != 0) { + return ret; + } + + pid_ctx->fd = fd; + + talloc_set_destructor(pid_ctx, pidfile_context_destructor); + + *result = pid_ctx; + return 0; + +fail: + talloc_free(pid_ctx); + return ret; +} + +static int pidfile_context_destructor(struct pidfile_context *pid_ctx) +{ + if (getpid() != pid_ctx->pid) { + return 0; + } + + (void) unlink(pid_ctx->pidfile); + + pidfile_fd_close(pid_ctx->fd); + + return 0; +} diff --git a/ctdb/common/pidfile.h b/ctdb/common/pidfile.h new file mode 100644 index 0000000..bc4e3a7 --- /dev/null +++ b/ctdb/common/pidfile.h @@ -0,0 +1,51 @@ +/* + Create and remove pidfile + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_PIDFILE_H__ +#define __CTDB_PIDFILE_H__ + +#include <talloc.h> + +/** + * @file pidfile.h + * + * @brief Routines to manage PID file + */ + +/** + * @brief Abstract struct to store pidfile details + */ +struct pidfile_context; + +/** + * @brief Create a PID file + * + * This creates a PID file, locks it, and writes PID. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] pidfile Path of PID file + * @param[out] result Pidfile context + * @return 0 on success, errno on failure + * + * Freeing the pidfile_context, will delete the pidfile. + */ +int pidfile_context_create(TALLOC_CTX *mem_ctx, const char *pidfile, + struct pidfile_context **result); + +#endif /* __CTDB_PIDFILE_H__ */ diff --git a/ctdb/common/pkt_read.c b/ctdb/common/pkt_read.c new file mode 100644 index 0000000..212ace5 --- /dev/null +++ b/ctdb/common/pkt_read.c @@ -0,0 +1,190 @@ +/* + Reading packets using fixed and dynamic buffer + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +/* This is similar to read_packet abstraction. The main different is that + * tevent fd event is created only once. + */ + +#include "replace.h" +#include "system/network.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/tevent_unix.h" + +#include "pkt_read.h" + +/* + * Read a packet using fixed buffer + */ + +struct pkt_read_state { + int fd; + uint8_t *buf; + size_t buflen; + size_t nread, total; + bool use_fixed; + ssize_t (*more)(uint8_t *buf, size_t buflen, void *private_data); + void *private_data; +}; + +struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, size_t initial, + uint8_t *buf, size_t buflen, + ssize_t (*more)(uint8_t *buf, + size_t buflen, + void *private_data), + void *private_data) +{ + struct tevent_req *req; + struct pkt_read_state *state; + + req = tevent_req_create(mem_ctx, &state, struct pkt_read_state); + if (req == NULL) { + return NULL; + } + + state->fd = fd; + + if (buf == NULL || buflen == 0) { + state->use_fixed = false; + state->buf = talloc_array(state, uint8_t, initial); + if (state->buf == NULL) { + talloc_free(req); + return NULL; + } + state->buflen = initial; + } else { + state->use_fixed = true; + state->buf = buf; + state->buflen = buflen; + } + + state->nread = 0; + state->total = initial; + + state->more = more; + state->private_data = private_data; + + return req; +} + +void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde, + uint16_t flags, struct tevent_req *req) +{ + struct pkt_read_state *state = tevent_req_data( + req, struct pkt_read_state); + ssize_t nread, more; + uint8_t *tmp; + + nread = read(state->fd, state->buf + state->nread, + state->total - state->nread); + if ((nread == -1) && (errno == EINTR)) { + /* retry */ + return; + } + if (nread == -1) { + tevent_req_error(req, errno); + return; + } + if (nread == 0) { + /* fd closed */ + tevent_req_error(req, EPIPE); + return; + } + + state->nread += nread; + if (state->nread < state->total) { + /* come back later */ + return; + } + + /* Check if "more" asks for more data */ + if (state->more == NULL) { + tevent_req_done(req); + return; + } + + more = state->more(state->buf, state->nread, state->private_data); + if (more == -1) { + /* invalid packet */ + tevent_req_error(req, EIO); + return; + } + if (more == 0) { + tevent_req_done(req); + return; + } + + if (state->total + more < state->total) { + /* int wrapped */ + tevent_req_error(req, EMSGSIZE); + return; + } + + if (state->total + more < state->buflen) { + /* continue using fixed buffer */ + state->total += more; + return; + } + + if (state->use_fixed) { + /* switch to dynamic buffer */ + tmp = talloc_array(state, uint8_t, state->total + more); + if (tevent_req_nomem(tmp, req)) { + return; + } + + memcpy(tmp, state->buf, state->total); + state->use_fixed = false; + } else { + tmp = talloc_realloc(state, state->buf, uint8_t, + state->total + more); + if (tevent_req_nomem(tmp, req)) { + return; + } + } + + state->buf = tmp; + state->buflen = state->total + more; + state->total += more; +} + +ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx, + uint8_t **pbuf, bool *free_buf, int *perrno) +{ + struct pkt_read_state *state = tevent_req_data( + req, struct pkt_read_state); + + if (tevent_req_is_unix_error(req, perrno)) { + return -1; + } + + if (state->use_fixed) { + *pbuf = state->buf; + *free_buf = false; + } else { + *pbuf = talloc_steal(mem_ctx, state->buf); + *free_buf = true; + } + + return state->total; +} diff --git a/ctdb/common/pkt_read.h b/ctdb/common/pkt_read.h new file mode 100644 index 0000000..25d4a51 --- /dev/null +++ b/ctdb/common/pkt_read.h @@ -0,0 +1,98 @@ +/* + API for reading packets using fixed and dynamic buffer + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_PKT_READ_H__ +#define __CTDB_PKT_READ_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file pkt_read.h + * + * @brief Read a packet using fixed size buffer or allocated memory. + * + * CTDB communication uses lots of small packets. This abstraction avoids the + * need to allocate memory for small packets. Only if the received packet is + * larger than the fixed memory buffer, use talloc to allocate memory. + */ + +/** + * @brief Start async computation to read a packet + * + * This returns a tevent request to read a packet from given fd. The fd + * should be nonblocking. Freeing this request will free all the memory + * associated with the request. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] fd The non-blocking file/socket descriptor to read from + * @param[in] initial Initial amount of data to read + * @param[in] buf The static buffer to read data in + * @param[in] buflen The size of the static buffer + * @param[in] more The function to check if the bytes read forms a packet + * @param[in] private_data Private data to pass to more function + * @return new tevent request or NULL on failure + */ +struct tevent_req *pkt_read_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, size_t initial, + uint8_t *buf, size_t buflen, + ssize_t (*more)(uint8_t *buf, + size_t buflen, + void *private_data), + void *private_data); + +/** + * @brief Function to actually read data from the socket + * + * This function should be called, when tevent fd event is triggered. This + * function has the syntax of tevent_fd_handler_t. The private_data for this + * function is the tevent request created by pkt_read_send function. + * + * @param[in] ev Tevent context + * @param[in] fde Tevent fd context + * @param[in] flags Tevent fd flags + * @param[in] req The active tevent request + */ +void pkt_read_handler(struct tevent_context *ev, struct tevent_fd *fde, + uint16_t flags, struct tevent_req *req); + +/** + * @brief Retrieve a packet + * + * This function returns the pkt read from fd. + * + * @param[in] req Tevent request + * @param[in] mem_ctx Talloc memory context + * @param[out] pbuf The pointer to the buffer + * @param[out] free_buf Boolean to indicate that caller should free buffer + * @param[out] perrno errno in case of failure + * @return the size of the pkt, or -1 on failure + * + * If the pkt data is dynamically allocated, then it is moved under the + * specified talloc memory context and free_buf is set to true. It is the + * responsibility of the caller to the free the memory returned. + * + * If the pkt data is stored in the fixed buffer, then free_buf is set to false. + */ +ssize_t pkt_read_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx, + uint8_t **pbuf, bool *free_buf, int *perrno); + +#endif /* __CTDB_PKT_READ_H__ */ diff --git a/ctdb/common/pkt_write.c b/ctdb/common/pkt_write.c new file mode 100644 index 0000000..b1c1730 --- /dev/null +++ b/ctdb/common/pkt_write.c @@ -0,0 +1,101 @@ +/* + Write a packet + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/network.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/tevent_unix.h" + +#include "pkt_write.h" + +/* + * Write a packet + */ + +struct pkt_write_state { + int fd; + uint8_t *buf; + size_t buflen, offset; +}; + +struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, uint8_t *buf, size_t buflen) +{ + struct tevent_req *req; + struct pkt_write_state *state; + + req = tevent_req_create(mem_ctx, &state, struct pkt_write_state); + if (req == NULL) { + return NULL; + } + + state->fd = fd; + state->buf = buf; + state->buflen = buflen; + state->offset = 0; + + return req; +} + +void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde, + uint16_t flags, struct tevent_req *req) +{ + struct pkt_write_state *state = tevent_req_data( + req, struct pkt_write_state); + ssize_t nwritten; + + nwritten = write(state->fd, state->buf + state->offset, + state->buflen - state->offset); + if ((nwritten == -1) && (errno == EINTR)) { + /* retry */ + return; + } + if (nwritten == -1) { + tevent_req_error(req, errno); + return; + } + if (nwritten == 0) { + /* retry */ + return; + } + + state->offset += nwritten; + if (state->offset < state->buflen) { + /* come back later */ + return; + } + + tevent_req_done(req); +} + +ssize_t pkt_write_recv(struct tevent_req *req, int *perrno) +{ + struct pkt_write_state *state = tevent_req_data( + req, struct pkt_write_state); + + if (tevent_req_is_unix_error(req, perrno)) { + return -1; + } + + return state->offset; +} diff --git a/ctdb/common/pkt_write.h b/ctdb/common/pkt_write.h new file mode 100644 index 0000000..19d8045 --- /dev/null +++ b/ctdb/common/pkt_write.h @@ -0,0 +1,79 @@ +/* + API for writing a packet + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_PKT_WRITE_H__ +#define __CTDB_PKT_WRITE_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file pkt_write.h + * + * @brief Write a packet. + * + * Write a complete packet with possibly multiple system calls. + */ + +/** + * @brief Start async computation to write a packet + * + * This returns a tevent request to write a packet to given fd. The fd + * should be nonblocking. Freeing this request will free all the memory + * associated with the request. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] fd The non-blocking file/socket descriptor to write to + * @param[in] buf The data + * @param[in] buflen The size of the data + * @return new tevent request or NULL on failure + */ +struct tevent_req *pkt_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, uint8_t *buf, size_t buflen); + +/** + * @brief Function to actually write data to the socket + * + * This function should be called, when tevent fd event is triggered + * for TEVENT_FD_WRITE event. This function has the syntax of + * tevent_fd_handler_t. The private_data for this function is the tevent + * request created by pkt_write_send function. + * + * @param[in] ev Tevent context + * @param[in] fde Tevent fd context + * @param[in] flags Tevent fd flags + * @param[in] req The active tevent request + */ +void pkt_write_handler(struct tevent_context *ev, struct tevent_fd *fde, + uint16_t flags, struct tevent_req *req); + +/** + * @brief Packet is sent + * + * This function returns the number of bytes written. + * + * @param[in] req Tevent request + * @param[out] perrno errno in case of failure + * @return the number of bytes written, or -1 on failure + */ +ssize_t pkt_write_recv(struct tevent_req *req, int *perrno); + +#endif /* __CTDB_PKT_WRITE_H__ */ diff --git a/ctdb/common/rb_tree.c b/ctdb/common/rb_tree.c new file mode 100644 index 0000000..0965623 --- /dev/null +++ b/ctdb/common/rb_tree.c @@ -0,0 +1,1101 @@ +/* + a talloc based red-black tree + + Copyright (C) Ronnie Sahlberg 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "lib/util/debug.h" + +#include "common/logging.h" +#include "common/rb_tree.h" + +#define NO_MEMORY_FATAL(p) do { if (!(p)) { \ + DEBUG(DEBUG_CRIT,("Out of memory for %s at %s\n", #p, __location__)); \ + exit(10); \ + }} while (0) + + +static void +tree_destructor_traverse_node(TALLOC_CTX *mem_ctx, trbt_node_t *node) +{ + talloc_set_destructor(node, NULL); + if (node->left) { + tree_destructor_traverse_node(mem_ctx, node->left); + } + if (node->right) { + tree_destructor_traverse_node(mem_ctx, node->right); + } + talloc_steal(mem_ctx, node); +} + +/* + destroy a tree and remove all its nodes + */ +static int tree_destructor(trbt_tree_t *tree) +{ + TALLOC_CTX *tmp_ctx; + trbt_node_t *node; + + if (tree == NULL) { + return 0; + } + + node=tree->root; + if (node == NULL) { + return 0; + } + + /* traverse the tree and remove the node destructor and steal + the node to the temporary context. + we don't want to use the existing destructor for the node + since that will remove the nodes one by one from the tree. + since the entire tree will be completely destroyed we don't care + if it is inconsistent or unbalanced while freeing the + individual nodes + */ + tmp_ctx = talloc_new(NULL); + tree_destructor_traverse_node(tmp_ctx, node); + talloc_free(tmp_ctx); + + return 0; +} + + +/* create a red black tree */ +trbt_tree_t * +trbt_create(TALLOC_CTX *memctx, uint32_t flags) +{ + trbt_tree_t *tree; + + tree = talloc_zero(memctx, trbt_tree_t); + NO_MEMORY_FATAL(tree); + + /* If the tree is freed, we must walk over all entries and steal the + node from the stored data pointer and release the node. + Note, when we free the tree we only free the tree and not any of + the data stored in the tree. + */ + talloc_set_destructor(tree, tree_destructor); + tree->flags = flags; + + return tree; +} + +static inline trbt_node_t * +trbt_parent(trbt_node_t *node) +{ + return node->parent; +} + +static inline trbt_node_t * +trbt_grandparent(trbt_node_t *node) +{ + trbt_node_t *parent; + + parent=trbt_parent(node); + if(parent){ + return parent->parent; + } + return NULL; +} + +static inline trbt_node_t * +trbt_uncle(trbt_node_t *node) +{ + trbt_node_t *parent, *grandparent; + + parent=trbt_parent(node); + if(!parent){ + return NULL; + } + grandparent=trbt_parent(parent); + if(!grandparent){ + return NULL; + } + if(parent==grandparent->left){ + return grandparent->right; + } + return grandparent->left; +} + + +static inline void trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node); +static inline void trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node); + +static inline void +trbt_rotate_left(trbt_node_t *node) +{ + trbt_tree_t *tree = node->tree; + + if(node->parent){ + if(node->parent->left==node){ + node->parent->left=node->right; + } else { + node->parent->right=node->right; + } + } else { + tree->root=node->right; + } + node->right->parent=node->parent; + node->parent=node->right; + node->right=node->right->left; + if(node->right){ + node->right->parent=node; + } + node->parent->left=node; +} + +static inline void +trbt_rotate_right(trbt_node_t *node) +{ + trbt_tree_t *tree = node->tree; + + if(node->parent){ + if(node->parent->left==node){ + node->parent->left=node->left; + } else { + node->parent->right=node->left; + } + } else { + tree->root=node->left; + } + node->left->parent=node->parent; + node->parent=node->left; + node->left=node->left->right; + if(node->left){ + node->left->parent=node; + } + node->parent->right=node; +} + +/* NULL nodes are black by definition */ +static inline int trbt_get_color(trbt_node_t *node) +{ + if (node==NULL) { + return TRBT_BLACK; + } + return node->rb_color; +} +static inline int trbt_get_color_left(trbt_node_t *node) +{ + if (node==NULL) { + return TRBT_BLACK; + } + if (node->left==NULL) { + return TRBT_BLACK; + } + return node->left->rb_color; +} +static inline int trbt_get_color_right(trbt_node_t *node) +{ + if (node==NULL) { + return TRBT_BLACK; + } + if (node->right==NULL) { + return TRBT_BLACK; + } + return node->right->rb_color; +} +/* setting a NULL node to black is a nop */ +static inline void trbt_set_color(trbt_node_t *node, int color) +{ + if (node == NULL) { + return; + } + node->rb_color = color; +} +static inline void trbt_set_color_left(trbt_node_t *node, int color) +{ + if (node == NULL || node->left == NULL) { + return; + } + node->left->rb_color = color; +} +static inline void trbt_set_color_right(trbt_node_t *node, int color) +{ + if (node == NULL || node->right == NULL) { + return; + } + node->right->rb_color = color; +} + +static inline void +trbt_insert_case5(trbt_tree_t *tree, trbt_node_t *node) +{ + trbt_node_t *grandparent; + trbt_node_t *parent; + + parent=trbt_parent(node); + grandparent=trbt_parent(parent); + parent->rb_color=TRBT_BLACK; + grandparent->rb_color=TRBT_RED; + if( (node==parent->left) && (parent==grandparent->left) ){ + trbt_rotate_right(grandparent); + } else { + trbt_rotate_left(grandparent); + } +} + +static inline void +trbt_insert_case4(trbt_tree_t *tree, trbt_node_t *node) +{ + trbt_node_t *grandparent; + trbt_node_t *parent; + + parent=trbt_parent(node); + grandparent=trbt_parent(parent); + if(!grandparent){ + return; + } + if( (node==parent->right) && (parent==grandparent->left) ){ + trbt_rotate_left(parent); + node=node->left; + } else if( (node==parent->left) && (parent==grandparent->right) ){ + trbt_rotate_right(parent); + node=node->right; + } + trbt_insert_case5(tree, node); +} + +static inline void +trbt_insert_case3(trbt_tree_t *tree, trbt_node_t *node) +{ + trbt_node_t *grandparent; + trbt_node_t *parent; + trbt_node_t *uncle; + + uncle=trbt_uncle(node); + if(uncle && (uncle->rb_color==TRBT_RED)){ + parent=trbt_parent(node); + parent->rb_color=TRBT_BLACK; + uncle->rb_color=TRBT_BLACK; + grandparent=trbt_grandparent(node); + grandparent->rb_color=TRBT_RED; + trbt_insert_case1(tree, grandparent); + } else { + trbt_insert_case4(tree, node); + } +} + +static inline void +trbt_insert_case2(trbt_tree_t *tree, trbt_node_t *node) +{ + trbt_node_t *parent; + + parent=trbt_parent(node); + /* parent is always non-NULL here */ + if(parent->rb_color==TRBT_BLACK){ + return; + } + trbt_insert_case3(tree, node); +} + +static inline void +trbt_insert_case1(trbt_tree_t *tree, trbt_node_t *node) +{ + trbt_node_t *parent; + + parent=trbt_parent(node); + if(!parent){ + node->rb_color=TRBT_BLACK; + return; + } + trbt_insert_case2(tree, node); +} + +static inline trbt_node_t * +trbt_sibling(trbt_node_t *node) +{ + trbt_node_t *parent; + + parent=trbt_parent(node); + if(!parent){ + return NULL; + } + + if (node == parent->left) { + return parent->right; + } else { + return parent->left; + } +} + +static inline void +trbt_delete_case6(trbt_node_t *node) +{ + trbt_node_t *sibling, *parent; + + sibling = trbt_sibling(node); + parent = trbt_parent(node); + + trbt_set_color(sibling, parent->rb_color); + trbt_set_color(parent, TRBT_BLACK); + if (node == parent->left) { + trbt_set_color_right(sibling, TRBT_BLACK); + trbt_rotate_left(parent); + } else { + trbt_set_color_left(sibling, TRBT_BLACK); + trbt_rotate_right(parent); + } +} + + +static inline void +trbt_delete_case5(trbt_node_t *node) +{ + trbt_node_t *parent, *sibling; + + parent = trbt_parent(node); + sibling = trbt_sibling(node); + if ( (node == parent->left) + &&(trbt_get_color(sibling) == TRBT_BLACK) + &&(trbt_get_color_left(sibling) == TRBT_RED) + &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){ + trbt_set_color(sibling, TRBT_RED); + trbt_set_color_left(sibling, TRBT_BLACK); + trbt_rotate_right(sibling); + trbt_delete_case6(node); + return; + } + if ( (node == parent->right) + &&(trbt_get_color(sibling) == TRBT_BLACK) + &&(trbt_get_color_right(sibling) == TRBT_RED) + &&(trbt_get_color_left(sibling) == TRBT_BLACK) ){ + trbt_set_color(sibling, TRBT_RED); + trbt_set_color_right(sibling, TRBT_BLACK); + trbt_rotate_left(sibling); + trbt_delete_case6(node); + return; + } + + trbt_delete_case6(node); +} + +static inline void +trbt_delete_case4(trbt_node_t *node) +{ + trbt_node_t *sibling; + + sibling = trbt_sibling(node); + if ( (trbt_get_color(node->parent) == TRBT_RED) + &&(trbt_get_color(sibling) == TRBT_BLACK) + &&(trbt_get_color_left(sibling) == TRBT_BLACK) + &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){ + trbt_set_color(sibling, TRBT_RED); + trbt_set_color(node->parent, TRBT_BLACK); + } else { + trbt_delete_case5(node); + } +} + +static void trbt_delete_case1(trbt_node_t *node); + +static inline void +trbt_delete_case3(trbt_node_t *node) +{ + trbt_node_t *sibling; + + sibling = trbt_sibling(node); + if ( (trbt_get_color(node->parent) == TRBT_BLACK) + &&(trbt_get_color(sibling) == TRBT_BLACK) + &&(trbt_get_color_left(sibling) == TRBT_BLACK) + &&(trbt_get_color_right(sibling) == TRBT_BLACK) ){ + trbt_set_color(sibling, TRBT_RED); + trbt_delete_case1(node->parent); + } else { + trbt_delete_case4(node); + } +} + +static inline void +trbt_delete_case2(trbt_node_t *node) +{ + trbt_node_t *sibling; + + sibling = trbt_sibling(node); + if (trbt_get_color(sibling) == TRBT_RED) { + trbt_set_color(node->parent, TRBT_RED); + trbt_set_color(sibling, TRBT_BLACK); + if (node == node->parent->left) { + trbt_rotate_left(node->parent); + } else { + trbt_rotate_right(node->parent); + } + } + trbt_delete_case3(node); +} + +static void +trbt_delete_case1(trbt_node_t *node) +{ + if (!node->parent) { + return; + } else { + trbt_delete_case2(node); + } +} + +static void +delete_node(trbt_node_t *node, bool from_destructor) +{ + trbt_node_t *parent, *child, dc; + trbt_node_t *temp = NULL; + + /* This node has two child nodes, then just copy the content + from the next smaller node with this node and delete the + predecessor instead. + The predecessor is guaranteed to have at most one child + node since its right arm must be NULL + (It must be NULL since we are its sucessor and we are above + it in the tree) + */ + if (node->left != NULL && node->right != NULL) { + /* This node has two children, just copy the data */ + /* find the predecessor */ + temp = node->left; + + while (temp->right != NULL) { + temp = temp->right; + } + + /* swap the predecessor data and key with the node to + be deleted. + */ + node->key32 = temp->key32; + node->data = temp->data; + /* now we let node hang off the new data */ + talloc_steal(node->data, node); + + temp->data = NULL; + temp->key32 = -1; + /* then delete the temp node. + this node is guaranteed to have at least one leaf + child */ + delete_node(temp, from_destructor); + goto finished; + } + + + /* There is at most one child to this node to be deleted */ + child = node->left; + if (node->right) { + child = node->right; + } + + /* If the node to be deleted did not have any child at all we + create a temporary dummy node for the child and mark it black. + Once the delete of the node is finished, we remove this dummy + node, which is simple to do since it is guaranteed that it will + still not have any children after the delete operation. + This is because we don't represent the leaf-nodes as actual nodes + in this implementation. + */ + if (!child) { + child = &dc; + child->tree = node->tree; + child->left=NULL; + child->right=NULL; + child->rb_color=TRBT_BLACK; + child->data=NULL; + } + + /* replace node with child */ + parent = trbt_parent(node); + if (parent) { + if (parent->left == node) { + parent->left = child; + } else { + parent->right = child; + } + } else { + node->tree->root = child; + } + child->parent = node->parent; + + + if (node->rb_color == TRBT_BLACK) { + if (trbt_get_color(child) == TRBT_RED) { + child->rb_color = TRBT_BLACK; + } else { + trbt_delete_case1(child); + } + } + + /* If we had to create a temporary dummy node to represent a black + leaf child we now has to delete it. + This is simple since this dummy node originally had no children + and we are guaranteed that it will also not have any children + after the node has been deleted and any possible rotations + have occurred. + + The only special case is if this was the last node of the tree + in which case we have to reset the root to NULL as well. + Othervise it is enough to just unlink the child from its new + parent. + */ + if (child == &dc) { + if (child->parent == NULL) { + node->tree->root = NULL; + } else if (child == child->parent->left) { + child->parent->left = NULL; + } else { + child->parent->right = NULL; + } + } + +finished: + if (!from_destructor) { + talloc_free(node); + } + + /* if we came from a destructor and temp!=NULL this means we + did the node-swap but now the tree still contains the old + node which was freed in the destructor. Not good. + */ + if (from_destructor && temp) { + temp->key32 = node->key32; + temp->rb_color = node->rb_color; + + temp->data = node->data; + talloc_steal(temp->data, temp); + + temp->parent = node->parent; + if (temp->parent) { + if (temp->parent->left == node) { + temp->parent->left = temp; + } else { + temp->parent->right = temp; + } + } + + temp->left = node->left; + if (temp->left) { + temp->left->parent = temp; + } + temp->right = node->right; + if (temp->right) { + temp->right->parent = temp; + } + + if (temp->tree->root == node) { + temp->tree->root = temp; + } + } + + if ( (node->tree->flags & TRBT_AUTOFREE) + && (node->tree->root == NULL) ) { + talloc_free(node->tree); + } + + return; +} + +/* + destroy a node and remove it from its tree + */ +static int node_destructor(trbt_node_t *node) +{ + delete_node(node, true); + + return 0; +} + +static inline trbt_node_t * +trbt_create_node(trbt_tree_t *tree, trbt_node_t *parent, uint32_t key, void *data) +{ + trbt_node_t *node; + + node=talloc_zero(tree, trbt_node_t); + NO_MEMORY_FATAL(node); + + node->tree=tree; + node->rb_color=TRBT_BLACK; + node->parent=parent; + node->left=NULL; + node->right=NULL; + node->key32=key; + node->data = data; + + /* let this node hang off data so that it is removed when + data is freed + */ + talloc_steal(data, node); + talloc_set_destructor(node, node_destructor); + + return node; +} + +/* insert a new node in the tree. + if there is already a node with a matching key in the tree + we replace it with the new data and return a pointer to the old data + in case the caller wants to take any special action + */ +void * +trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data) +{ + trbt_node_t *node; + + node=tree->root; + + /* is this the first node ?*/ + if(!node){ + node = trbt_create_node(tree, NULL, key, data); + + tree->root=node; + return NULL; + } + + /* it was not the new root so walk the tree until we find where to + * insert this new leaf. + */ + while(1){ + /* this node already exists, replace data and return the + old data + */ + if(key==node->key32){ + void *old_data; + + old_data = node->data; + node->data = data; + /* Let the node now be owned by the new data + so the node is freed when the enw data is released + */ + talloc_steal(node->data, node); + + return old_data; + } + if(key<node->key32) { + if(!node->left){ + /* new node to the left */ + trbt_node_t *new_node; + + new_node = trbt_create_node(tree, node, key, data); + node->left=new_node; + node=new_node; + + break; + } + node=node->left; + continue; + } + if(key>node->key32) { + if(!node->right){ + /* new node to the right */ + trbt_node_t *new_node; + + new_node = trbt_create_node(tree, node, key, data); + node->right=new_node; + node=new_node; + break; + } + node=node->right; + continue; + } + } + + /* node will now point to the newly created node */ + node->rb_color=TRBT_RED; + trbt_insert_case1(tree, node); + return NULL; +} + +void * +trbt_lookup32(trbt_tree_t *tree, uint32_t key) +{ + trbt_node_t *node; + + node=tree->root; + + while(node){ + if(key==node->key32){ + return node->data; + } + if(key<node->key32){ + node=node->left; + continue; + } + if(key>node->key32){ + node=node->right; + continue; + } + } + return NULL; +} + + +/* This deletes a node from the tree. + Note that this does not release the data that the node points to +*/ +void +trbt_delete32(trbt_tree_t *tree, uint32_t key) +{ + trbt_node_t *node; + + node=tree->root; + + while(node){ + if(key==node->key32){ + delete_node(node, false); + return; + } + if(key<node->key32){ + node=node->left; + continue; + } + if(key>node->key32){ + node=node->right; + continue; + } + } +} + + +void +trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param) +{ + trbt_node_t *node; + + node=tree->root; + + /* is this the first node ?*/ + if(!node){ + node = trbt_create_node(tree, NULL, key, + callback(param, NULL)); + + tree->root=node; + return; + } + + /* it was not the new root so walk the tree until we find where to + * insert this new leaf. + */ + while(1){ + /* this node already exists, replace it + */ + if(key==node->key32){ + node->data = callback(param, node->data); + talloc_steal(node->data, node); + + return; + } + if(key<node->key32) { + if(!node->left){ + /* new node to the left */ + trbt_node_t *new_node; + + new_node = trbt_create_node(tree, node, key, + callback(param, NULL)); + node->left=new_node; + node=new_node; + + break; + } + node=node->left; + continue; + } + if(key>node->key32) { + if(!node->right){ + /* new node to the right */ + trbt_node_t *new_node; + + new_node = trbt_create_node(tree, node, key, + callback(param, NULL)); + node->right=new_node; + node=new_node; + break; + } + node=node->right; + continue; + } + } + + /* node will now point to the newly created node */ + node->rb_color=TRBT_RED; + trbt_insert_case1(tree, node); + return; +} + + +struct trbt_array_param { + void *(*callback)(void *param, void *data); + void *param; + uint32_t keylen; + uint32_t *key; + trbt_tree_t *tree; +}; +static void *array_insert_callback(void *p, void *data) +{ + struct trbt_array_param *param = (struct trbt_array_param *)p; + trbt_tree_t *tree = NULL; + + + /* if keylen has reached 0 we are done and can call the users + callback function with the users parameters + */ + if (param->keylen == 0) { + return param->callback(param->param, data); + } + + + /* keylen is not zero yes so we must create/process more subtrees */ + /* if data is NULL this means we did not yet have a subtree here + and we must create one. + */ + if (data == NULL) { + /* create a new subtree and hang it off our current tree + set it to autofree so that the tree is freed when + the last node in it has been released. + */ + tree = trbt_create(param->tree, TRBT_AUTOFREE); + } else { + /* we already have a subtree for this path */ + tree = (trbt_tree_t *)data; + } + + trbt_insertarray32_callback(tree, param->keylen, param->key, param->callback, param->param); + + /* now return either the old tree we got in *data or the new tree + we created to our caller so he can update his pointer in his + tree to point to our subtree + */ + return tree; +} + + + +/* insert into the tree using an array of uint32 as a key */ +void +trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*cb)(void *param, void *data), void *pm) +{ + struct trbt_array_param tap; + + /* keylen-1 and key[1] since the call to insert32 will consume the + first part of the key. + */ + tap.callback= cb; + tap.param = pm; + tap.keylen = keylen-1; + tap.key = &key[1]; + tap.tree = tree; + + trbt_insert32_callback(tree, key[0], array_insert_callback, &tap); +} + +/* lookup the tree using an array of uint32 as a key */ +void * +trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key) +{ + /* if keylen is 1 we can do a regular lookup and return this to the + user + */ + if (keylen == 1) { + return trbt_lookup32(tree, key[0]); + } + + /* we need to lookup the next subtree */ + tree = trbt_lookup32(tree, key[0]); + if (tree == NULL) { + /* the key does not exist, return NULL */ + return NULL; + } + + /* now lookup the next part of the key in our new tree */ + return trbt_lookuparray32(tree, keylen-1, &key[1]); +} + + +/* traverse a tree starting at node */ +static int +trbt_traversearray32_node(trbt_node_t *node, uint32_t keylen, + int (*callback)(void *param, void *data), + void *param) +{ + trbt_node_t *left = node->left; + trbt_node_t *right = node->right; + + if (left) { + int ret; + ret = trbt_traversearray32_node(left, keylen, callback, param); + if (ret != 0) { + return ret; + } + } + + /* this is the smallest node in this subtree + if keylen is 0 this means we can just call the callback + otherwise we must pull the next subtree and traverse that one as well + */ + if (keylen == 0) { + int ret; + + ret = callback(param, node->data); + if (ret != 0) { + return ret; + } + } else { + int ret; + + ret = trbt_traversearray32(node->data, keylen, callback, param); + if (ret != 0) { + return ret; + } + } + + if (right) { + int ret; + + ret = trbt_traversearray32_node(right, keylen, callback, param); + if (ret != 0) { + return ret; + } + } + + return 0; +} + + +/* traverse the tree using an array of uint32 as a key */ +int +trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen, + int (*callback)(void *param, void *data), + void *param) +{ + trbt_node_t *node; + + if (tree == NULL) { + return 0; + } + + node=tree->root; + if (node == NULL) { + return 0; + } + + return trbt_traversearray32_node(node, keylen-1, callback, param); +} + + +/* this function will return the first node in a tree where + the key is an array of uint32_t +*/ +void * +trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen) +{ + trbt_node_t *node; + + if (keylen < 1) { + return NULL; + } + + if (tree == NULL) { + return NULL; + } + + node=tree->root; + if (node == NULL) { + return NULL; + } + + while (node->left) { + node = node->left; + } + + /* we found our node so return the data */ + if (keylen == 1) { + return node->data; + } + + /* we are still traversing subtrees so find the first node in the + next level of trees + */ + return trbt_findfirstarray32(node->data, keylen-1); +} + + +#ifdef TEST_RB_TREE +static void printtree(trbt_node_t *node, int levels) +{ + int i; + if(node==NULL)return; + printtree(node->left, levels+1); + + for(i=0;i<levels;i++)printf(" "); + printf("key:%d COLOR:%s (node:%p parent:%p left:%p right:%p)\n",node->key32,node->rb_color==TRBT_BLACK?"BLACK":"RED", node, node->parent, node->left, node->right); + + printtree(node->right, levels+1); + printf("\n"); +} + +void print_tree(trbt_tree_t *tree) +{ + if(tree->root==NULL){ + printf("tree is empty\n"); + return; + } + printf("---\n"); + printtree(tree->root->left, 1); + printf("root node key:%d COLOR:%s (node:%p left:%p right:%p)\n",tree->root->key32,tree->root->rb_color==TRBT_BLACK?"BLACK":"RED", tree->root, tree->root->left, tree->root->right); + printtree(tree->root->right, 1); + printf("===\n"); +} + +void +test_tree(void) +{ + trbt_tree_t *tree; + char *str; + int i, ret; + int NUM=15; + int cnt=0; + + tree=trbt_create(talloc_new(NULL), 0); +#if 0 + for(i=0;i<10;i++){ + printf("adding node %i\n",i); + trbt_insert32(tree, i, NULL); + print_tree(tree); + } + printf("deleting node %i\n",3); + trbt_delete32(tree, 3); + print_tree(tree); + for(i=0;i<10;i++){ + printf("deleting node %i\n",i); + trbt_delete32(tree, i); + print_tree(tree); + } +exit(0); +#endif + while(++cnt){ + int i; + printf("iteration : %d\n",cnt); + i=random()%20; + printf("adding node %i\n",i); + trbt_insert32(tree, i, NULL); + print_tree(tree); + + i=random()%20; + printf("deleting node %i\n",i); + trbt_delete32(tree, i); + print_tree(tree); + } + +} + +#endif /* TEST_RB_TREE */ diff --git a/ctdb/common/rb_tree.h b/ctdb/common/rb_tree.h new file mode 100644 index 0000000..b5ddbb2 --- /dev/null +++ b/ctdb/common/rb_tree.h @@ -0,0 +1,90 @@ +/* + a talloc based red-black tree + + Copyright (C) Ronnie Sahlberg 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _RB_TREE_H +#define _RB_TREE_H + +#define TRBT_RED 0x00 +#define TRBT_BLACK 0x01 +typedef struct trbt_node { + struct trbt_tree *tree; + struct trbt_node *parent; + struct trbt_node *left; + struct trbt_node *right; + uint32_t rb_color; + uint32_t key32; + void *data; +} trbt_node_t; + +typedef struct trbt_tree { + trbt_node_t *root; +/* automatically free the tree when the last node has been deleted */ +#define TRBT_AUTOFREE 0x00000001 + uint32_t flags; +} trbt_tree_t; + + + +/* Create a RB tree */ +trbt_tree_t *trbt_create(TALLOC_CTX *memctx, uint32_t flags); + +/* Lookup a node in the tree and return a pointer to data or NULL */ +void *trbt_lookup32(trbt_tree_t *tree, uint32_t key); + +/* Insert a new node into the tree. If there was already a node with this + key the pointer to the previous data is returned. + The tree will talloc_steal() the data inserted into the tree . +*/ +void *trbt_insert32(trbt_tree_t *tree, uint32_t key, void *data); + +/* Insert a new node into the tree. + If this is a new node: + callback is called with data==NULL and param=param + the returned value from the callback is talloc_stolen and inserted in the + tree. + If a node already exists for this key then: + callback is called with data==existing data and param=param + the returned calue is talloc_stolen and inserted in the tree +*/ +void trbt_insert32_callback(trbt_tree_t *tree, uint32_t key, void *(*callback)(void *param, void *data), void *param); + +/* Delete a node from the tree and free all data associated with it */ +void trbt_delete32(trbt_tree_t *tree, uint32_t key); + + +/* insert into the tree with a key based on an array of uint32 */ +void trbt_insertarray32_callback(trbt_tree_t *tree, uint32_t keylen, uint32_t *key, void *(*callback)(void *param, void *data), void *param); + +/* Lookup a node in the tree with a key based on an array of uint32 + and return a pointer to data or NULL */ +void *trbt_lookuparray32(trbt_tree_t *tree, uint32_t keylen, uint32_t *key); + +/* Traverse a tree with a key based on an array of uint32 + returns 0 if traverse completed + !0 if the traverse was aborted + + If the callback returns !0 the traverse will be aborted +*/ +int trbt_traversearray32(trbt_tree_t *tree, uint32_t keylen, int (*callback)(void *param, void *data), void *param); + +/* Lookup the first node in the tree with a key based on an array of uint32 + and return a pointer to data or NULL */ +void *trbt_findfirstarray32(trbt_tree_t *tree, uint32_t keylen); + +#endif /* _RB_TREE_H */ diff --git a/ctdb/common/reqid.c b/ctdb/common/reqid.c new file mode 100644 index 0000000..0e651cf --- /dev/null +++ b/ctdb/common/reqid.c @@ -0,0 +1,89 @@ +/* + ctdb request id handling code + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <talloc.h> + +#include "lib/util/idtree.h" +#include "reqid.h" + +struct reqid_context { + struct idr_context *idr; + uint32_t lastid; +}; + +int reqid_init(TALLOC_CTX *mem_ctx, int start_id, + struct reqid_context **result) +{ + struct reqid_context *reqid_ctx; + + reqid_ctx = talloc_zero(mem_ctx, struct reqid_context); + if (reqid_ctx == NULL) { + return ENOMEM; + } + + reqid_ctx->idr = idr_init(reqid_ctx); + if (reqid_ctx->idr == NULL) { + talloc_free(reqid_ctx); + return ENOMEM; + } + + if (start_id <= 0) { + start_id = 1; + } + reqid_ctx->lastid = start_id; + + *result = reqid_ctx; + return 0; +} + +uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data) +{ + int id; + + id = idr_get_new_above(reqid_ctx->idr, private_data, + reqid_ctx->lastid+1, INT_MAX); + if (id < 0) { + /* reqid wrapped */ + id = idr_get_new(reqid_ctx->idr, private_data, INT_MAX); + } + if (id == -1) { + return REQID_INVALID; + } + + reqid_ctx->lastid = id; + return id; +} + +void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid) +{ + return idr_find(reqid_ctx->idr, reqid); +} + +int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid) +{ + int ret; + + ret = idr_remove(reqid_ctx->idr, reqid); + if (ret < 0) { + return ENOENT; + } + return 0; +} diff --git a/ctdb/common/reqid.h b/ctdb/common/reqid.h new file mode 100644 index 0000000..736e5b3 --- /dev/null +++ b/ctdb/common/reqid.h @@ -0,0 +1,89 @@ +/* + Request id database + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_REQID_H__ +#define __CTDB_REQID_H__ + +#include <talloc.h> + +/** + * @file reqid.h + * + * @brief Request id database + * + * CTDB tracks messsages using request id. CTDB stores client state for each + * request id to process the replies correctly. + */ + +/** + * @brief Abstract struct to store request id database + */ +struct reqid_context; + +#define REQID_INVALID 0xffffffff + +/** + * @brief Initialize request id database + * + * This returns a new request id context. Freeing this context will free + * all the memory associated with request id database. + * + * @param[in] mem_ctx Talloc memory context + * @param[in] start_id The initial id + * @param[out] result The new talloc_context structure + * @return 0 on success, errno on failure + */ +int reqid_init(TALLOC_CTX *mem_ctx, int start_id, + struct reqid_context **result); + +/** + * @brief Generate new request id and associate given data with the request id + * + * @param[in] reqid_ctx The request id context + * @param[in] private_data The state to associate with new request id + * @return new request id, REQID_INVALID on failure + */ +uint32_t reqid_new(struct reqid_context *reqid_ctx, void *private_data); + +#ifdef DOXYGEN +/** + * @brief Fetch the data associated with the request id + * + * @param[in] reqid_ctx The request id context + * @param[in] reqid The request id + * @param[in] type The data type of the stored data + * @return the data stored for the reqid, NULL on failure + */ +type *reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid, #type); +#else +void *_reqid_find(struct reqid_context *reqid_ctx, uint32_t reqid); +#define reqid_find(ctx, reqid, type) \ + (type *)talloc_check_name(_reqid_find(ctx, reqid), #type) +#endif + +/** + * @brief Remove the data associated with the request id + * + * @param[in] reqid_ctx The request id context + * @param[in] reqid The request id + * @return 0 on success, errno on failure + */ +int reqid_remove(struct reqid_context *reqid_ctx, uint32_t reqid); + +#endif /* __CTDB_REQID_H__ */ diff --git a/ctdb/common/run_event.c b/ctdb/common/run_event.c new file mode 100644 index 0000000..d283664 --- /dev/null +++ b/ctdb/common/run_event.c @@ -0,0 +1,829 @@ +/* + Run scripts in a directory with specific event arguments + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/dir.h" +#include "system/glob.h" +#include "system/wait.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/tevent_unix.h" +#include "lib/util/debug.h" + +#include "common/logging.h" +#include "common/run_proc.h" +#include "common/event_script.h" + +#include "common/run_event.h" + +/* + * Utility functions + */ + +static int get_script_list(TALLOC_CTX *mem_ctx, + const char *script_dir, + struct run_event_script_list **out) +{ + struct event_script_list *s_list; + struct run_event_script_list *script_list; + unsigned int i; + int ret; + + ret = event_script_get_list(mem_ctx, script_dir, &s_list); + if (ret != 0) { + if (ret == ENOENT) { + D_WARNING("event script dir %s removed\n", script_dir); + } else { + D_WARNING("failed to get script list for %s, ret=%d\n", + script_dir, ret); + } + return ret; + } + + if (s_list->num_scripts == 0) { + *out = NULL; + talloc_free(s_list); + return 0; + } + + script_list = talloc_zero(mem_ctx, struct run_event_script_list); + if (script_list == NULL) { + talloc_free(s_list); + return ENOMEM; + } + + script_list->num_scripts = s_list->num_scripts; + script_list->script = talloc_zero_array(script_list, + struct run_event_script, + script_list->num_scripts); + if (script_list->script == NULL) { + talloc_free(s_list); + talloc_free(script_list); + return ENOMEM; + } + + for (i = 0; i < s_list->num_scripts; i++) { + struct event_script *s = s_list->script[i]; + struct run_event_script *script = &script_list->script[i]; + + script->name = talloc_steal(script_list->script, s->name); + + if (! s->enabled) { + script->summary = -ENOEXEC; + } + } + + talloc_free(s_list); + *out = script_list; + return 0; +} + +static int script_args(TALLOC_CTX *mem_ctx, const char *event_str, + const char *arg_str, const char ***out) +{ + const char **argv; + size_t argc; + size_t len; + + /* Preallocate argv array to avoid reallocation. */ + len = 8; + argv = talloc_array(mem_ctx, const char *, len); + if (argv == NULL) { + return ENOMEM; + } + + argv[0] = NULL; /* script name */ + argv[1] = event_str; + argc = 2; + + if (arg_str != NULL) { + char *str, *t, *tok; + + str = talloc_strdup(argv, arg_str); + if (str == NULL) { + return ENOMEM; + } + + t = str; + while ((tok = strtok(t, " ")) != NULL) { + argv[argc] = talloc_strdup(argv, tok); + if (argv[argc] == NULL) { + talloc_free(argv); + return ENOMEM; + } + argc += 1; + if (argc >= len) { + argv = talloc_realloc(mem_ctx, argv, + const char *, len + 8); + if (argv == NULL) { + return ENOMEM; + } + len += 8; + } + t = NULL; + } + + talloc_free(str); + } + + argv[argc] = NULL; + /* argc += 1 */ + + *out = argv; + return 0; +} + +struct run_event_context { + struct run_proc_context *run_proc_ctx; + const char *script_dir; + const char *debug_prog; + bool debug_running; + + struct tevent_queue *queue; + struct tevent_req *current_req; + bool monitor_running; +}; + + +int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx, + const char *script_dir, const char *debug_prog, + struct run_event_context **out) +{ + struct run_event_context *run_ctx; + struct stat st; + int ret; + + run_ctx = talloc_zero(mem_ctx, struct run_event_context); + if (run_ctx == NULL) { + return ENOMEM; + } + + run_ctx->run_proc_ctx = run_proc_ctx; + + ret = stat(script_dir, &st); + if (ret != 0) { + ret = errno; + talloc_free(run_ctx); + return ret; + } + + if (! S_ISDIR(st.st_mode)) { + talloc_free(run_ctx); + return ENOTDIR; + } + + run_ctx->script_dir = talloc_strdup(run_ctx, script_dir); + if (run_ctx->script_dir == NULL) { + talloc_free(run_ctx); + return ENOMEM; + } + + if (debug_prog != NULL) { + run_ctx->debug_prog = talloc_strdup(run_ctx, debug_prog); + if (run_ctx->debug_prog == NULL) { + talloc_free(run_ctx); + return ENOMEM; + } + } + + run_ctx->debug_running = false; + + run_ctx->queue = tevent_queue_create(run_ctx, "run event queue"); + if (run_ctx->queue == NULL) { + talloc_free(run_ctx); + return ENOMEM; + } + + run_ctx->monitor_running = false; + + *out = run_ctx; + return 0; +} + +static struct run_proc_context * +run_event_run_proc_context(struct run_event_context *run_ctx) +{ + return run_ctx->run_proc_ctx; +} + +static const char *run_event_script_dir(struct run_event_context *run_ctx) +{ + return run_ctx->script_dir; +} + +static const char *run_event_debug_prog(struct run_event_context *run_ctx) +{ + return run_ctx->debug_prog; +} + +static struct tevent_queue *run_event_queue(struct run_event_context *run_ctx) +{ + return run_ctx->queue; +} + +static void run_event_start_running(struct run_event_context *run_ctx, + struct tevent_req *req, bool is_monitor) +{ + run_ctx->current_req = req; + run_ctx->monitor_running = is_monitor; +} + +static void run_event_stop_running(struct run_event_context *run_ctx) +{ + run_ctx->current_req = NULL; + run_ctx->monitor_running = false; +} + +static struct tevent_req *run_event_get_running( + struct run_event_context *run_ctx, + bool *is_monitor) +{ + *is_monitor = run_ctx->monitor_running; + return run_ctx->current_req; +} + +static int run_event_script_status(struct run_event_script *script) +{ + int ret; + + if (script->result.sig > 0) { + ret = -EINTR; + } else if (script->result.err > 0) { + if (script->result.err == EACCES) { + /* Map EACCESS to ENOEXEC */ + ret = -ENOEXEC; + } else { + ret = -script->result.err; + } + } else { + ret = script->result.status; + } + + return ret; +} + +int run_event_list(struct run_event_context *run_ctx, + TALLOC_CTX *mem_ctx, + struct run_event_script_list **output) +{ + struct event_script_list *s_list = NULL; + struct run_event_script_list *script_list = NULL; + unsigned int i; + int ret; + + ret = event_script_get_list(mem_ctx, + run_event_script_dir(run_ctx), + &s_list); + if (ret != 0) { + return ret; + } + + if (s_list->num_scripts == 0) { + *output = NULL; + talloc_free(s_list); + return 0; + } + + script_list = talloc_zero(mem_ctx, struct run_event_script_list); + if (script_list == NULL) { + return ENOMEM; + } + + script_list->num_scripts = s_list->num_scripts; + script_list->script = talloc_zero_array(script_list, + struct run_event_script, + script_list->num_scripts); + if (script_list->script == NULL) { + talloc_free(s_list); + talloc_free(script_list); + return ENOMEM; + } + + for (i=0; i < s_list->num_scripts; i++) { + struct event_script *s = s_list->script[i]; + struct run_event_script *script = &script_list->script[i]; + + script->name = talloc_steal(script_list->script, s->name); + + if (! s->enabled) { + script->summary = -ENOEXEC; + } + } + + + talloc_free(s_list); + *output = script_list; + return 0; +} + +int run_event_script_enable(struct run_event_context *run_ctx, + const char *script_name) +{ + return event_script_chmod(run_event_script_dir(run_ctx), + script_name, + true); +} + +int run_event_script_disable(struct run_event_context *run_ctx, + const char *script_name) +{ + return event_script_chmod(run_event_script_dir(run_ctx), + script_name, + false); +} + +/* + * Run debug program to diagnose hung scripts + */ + +static int debug_args(TALLOC_CTX *mem_ctx, const char *path, + const char *event_str, pid_t pid, const char ***out) +{ + const char **argv; + + argv = talloc_array(mem_ctx, const char *, 4); + if (argv == NULL) { + return ENOMEM; + } + + argv[0] = path; + argv[1] = talloc_asprintf(argv, "%d", pid); + argv[2] = event_str; + if (argv[1] == NULL) { + talloc_free(argv); + return ENOMEM; + } + argv[3] = NULL; + + *out = argv; + return 0; +} + +static void debug_log(int loglevel, const char *output, const char *log_prefix) +{ + char *line, *s; + + s = strdup(output); + if (s == NULL) { + DEBUG(loglevel, ("%s: %s\n", log_prefix, output)); + return; + } + + line = strtok(s, "\n"); + while (line != NULL) { + DEBUG(loglevel, ("%s: %s\n", log_prefix, line)); + line = strtok(NULL, "\n"); + } + free(s); +} + +struct run_debug_state { + struct run_event_context *run_ctx; + pid_t pid; +}; + +static void run_debug_done(struct tevent_req *subreq); + +static struct tevent_req *run_debug_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct run_event_context *run_ctx, + const char *event_str, pid_t pid) +{ + struct tevent_req *req, *subreq; + struct run_debug_state *state; + const char **argv; + const char *debug_prog; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct run_debug_state); + if (req == NULL) { + return NULL; + } + + state->run_ctx = run_ctx; + state->pid = pid; + + debug_prog = run_event_debug_prog(run_ctx); + if (debug_prog == NULL) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + if (run_ctx->debug_running) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + if (pid == -1) { + D_DEBUG("Event script terminated, nothing to debug\n"); + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + ret = debug_args(state, debug_prog, event_str, pid, &argv); + if (ret != 0) { + D_ERR("debug_args() failed\n"); + tevent_req_error(req, ret); + return tevent_req_post(req, ev); + } + + D_DEBUG("Running debug %s with args \"%s %s\"\n", + debug_prog, argv[1], argv[2]); + + subreq = run_proc_send(state, ev, run_event_run_proc_context(run_ctx), + debug_prog, argv, -1, tevent_timeval_zero()); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, run_debug_done, req); + + run_ctx->debug_running = true; + + talloc_free(argv); + return req; +} + +static void run_debug_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct run_debug_state *state = tevent_req_data( + req, struct run_debug_state); + char *output; + int ret; + bool status; + + state->run_ctx->debug_running = false; + + status = run_proc_recv(subreq, &ret, NULL, NULL, state, &output); + TALLOC_FREE(subreq); + if (! status) { + D_ERR("Running debug failed, ret=%d\n", ret); + } + + /* Log output */ + if (output != NULL) { + debug_log(DEBUG_ERR, output, "event_debug"); + talloc_free(output); + } + + kill(-state->pid, SIGTERM); + tevent_req_done(req); +} + +static bool run_debug_recv(struct tevent_req *req, int *perr) +{ + int ret; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + return true; +} + +/* + * Run a single event + */ + +struct run_event_state { + struct tevent_context *ev; + struct run_event_context *run_ctx; + const char *event_str; + const char *arg_str; + struct timeval timeout; + bool continue_on_failure; + + struct run_event_script_list *script_list; + const char **argv; + struct tevent_req *script_subreq; + unsigned int index; + bool cancelled; +}; + +static void run_event_cancel(struct tevent_req *req); +static void run_event_trigger(struct tevent_req *req, void *private_data); +static struct tevent_req *run_event_run_script(struct tevent_req *req); +static void run_event_next_script(struct tevent_req *subreq); +static void run_event_debug(struct tevent_req *req, pid_t pid); +static void run_event_debug_done(struct tevent_req *subreq); + +struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct run_event_context *run_ctx, + const char *event_str, + const char *arg_str, + struct timeval timeout, + bool continue_on_failure) +{ + struct tevent_req *req, *current_req; + struct run_event_state *state; + bool monitor_running, status; + + req = tevent_req_create(mem_ctx, &state, struct run_event_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->run_ctx = run_ctx; + state->event_str = talloc_strdup(state, event_str); + if (tevent_req_nomem(state->event_str, req)) { + return tevent_req_post(req, ev); + } + if (arg_str != NULL) { + state->arg_str = talloc_strdup(state, arg_str); + if (tevent_req_nomem(state->arg_str, req)) { + return tevent_req_post(req, ev); + } + } + state->timeout = timeout; + state->continue_on_failure = continue_on_failure; + state->cancelled = false; + + state->script_list = talloc_zero(state, struct run_event_script_list); + if (tevent_req_nomem(state->script_list, req)) { + return tevent_req_post(req, ev); + } + + /* + * If monitor event is running, + * cancel the running monitor event and run new event + * + * If any other event is running, + * if new event is monitor, cancel that event + * else add new event to the queue + */ + + current_req = run_event_get_running(run_ctx, &monitor_running); + if (current_req != NULL) { + if (monitor_running) { + run_event_cancel(current_req); + } else if (strcmp(event_str, "monitor") == 0) { + state->script_list->summary = -ECANCELED; + tevent_req_done(req); + return tevent_req_post(req, ev); + } + } + + status = tevent_queue_add(run_event_queue(run_ctx), ev, req, + run_event_trigger, NULL); + if (! status) { + tevent_req_error(req, ENOMEM); + return tevent_req_post(req, ev); + } + + return req; +} + +static void run_event_cancel(struct tevent_req *req) +{ + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + + run_event_stop_running(state->run_ctx); + + state->script_list->summary = -ECANCELED; + state->cancelled = true; + + TALLOC_FREE(state->script_subreq); + + tevent_req_done(req); +} + +static void run_event_trigger(struct tevent_req *req, void *private_data) +{ + struct tevent_req *subreq; + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + struct run_event_script_list *script_list; + int ret; + bool is_monitor = false; + + D_DEBUG("Running event %s with args \"%s\"\n", state->event_str, + state->arg_str == NULL ? "(null)" : state->arg_str); + + ret = get_script_list(state, + run_event_script_dir(state->run_ctx), + &script_list); + if (ret != 0) { + D_ERR("get_script_list() failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + /* No scripts */ + if (script_list == NULL || script_list->num_scripts == 0) { + tevent_req_done(req); + return; + } + + talloc_free(state->script_list); + state->script_list = script_list; + + ret = script_args(state, state->event_str, state->arg_str, + &state->argv); + if (ret != 0) { + D_ERR("script_args() failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + state->index = 0; + + subreq = run_event_run_script(req); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, run_event_next_script, req); + + state->script_subreq = subreq; + + if (strcmp(state->event_str, "monitor") == 0) { + is_monitor = true; + } + run_event_start_running(state->run_ctx, req, is_monitor); +} + +static struct tevent_req *run_event_run_script(struct tevent_req *req) +{ + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + struct run_event_script *script; + struct tevent_req *subreq; + char *path; + + script = &state->script_list->script[state->index]; + + path = talloc_asprintf(state, "%s/%s.script", + run_event_script_dir(state->run_ctx), + script->name); + if (path == NULL) { + return NULL; + } + + state->argv[0] = script->name; + script->begin = tevent_timeval_current(); + + D_DEBUG("Running %s with args \"%s %s\"\n", + path, state->argv[0], state->argv[1]); + + subreq = run_proc_send(state, state->ev, + run_event_run_proc_context(state->run_ctx), + path, state->argv, -1, state->timeout); + + talloc_free(path); + + return subreq; +} + +static void run_event_next_script(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + struct run_event_script *script; + pid_t pid; + int ret; + bool status; + + script = &state->script_list->script[state->index]; + script->end = tevent_timeval_current(); + + status = run_proc_recv(subreq, &ret, &script->result, &pid, + state->script_list, &script->output); + TALLOC_FREE(subreq); + state->script_subreq = NULL; + if (! status) { + D_ERR("run_proc failed for %s, ret=%d\n", script->name, ret); + run_event_stop_running(state->run_ctx); + tevent_req_error(req, ret); + return; + } + + if (state->cancelled) { + return; + } + + /* Log output */ + if (script->output != NULL) { + debug_log(DEBUG_ERR, script->output, script->name); + } + + D_DEBUG("Script %s finished sig=%d, err=%d, status=%d\n", + script->name, script->result.sig, script->result.err, + script->result.status); + + + /* If a script fails, stop running */ + script->summary = run_event_script_status(script); + if (script->summary != 0 && script->summary != -ENOEXEC) { + state->script_list->summary = script->summary; + + if (! state->continue_on_failure) { + state->script_list->num_scripts = state->index + 1; + + if (script->summary == -ETIMEDOUT && pid != -1) { + run_event_debug(req, pid); + } + D_NOTICE("%s event %s\n", state->event_str, + (script->summary == -ETIMEDOUT) ? + "timed out" : + "failed"); + run_event_stop_running(state->run_ctx); + tevent_req_done(req); + return; + } + } + + state->index += 1; + + /* All scripts executed */ + if (state->index >= state->script_list->num_scripts) { + run_event_stop_running(state->run_ctx); + tevent_req_done(req); + return; + } + + subreq = run_event_run_script(req); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, run_event_next_script, req); + + state->script_subreq = subreq; +} + +static void run_event_debug(struct tevent_req *req, pid_t pid) +{ + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + struct tevent_req *subreq; + + /* Debug script is run with ectx as the memory context */ + subreq = run_debug_send(state->run_ctx, state->ev, state->run_ctx, + state->event_str, pid); + if (subreq == NULL) { + /* If run debug fails, it's not an error */ + D_NOTICE("Failed to run event debug\n"); + return; + } + tevent_req_set_callback(subreq, run_event_debug_done, NULL); +} + +static void run_event_debug_done(struct tevent_req *subreq) +{ + int ret = 0; + bool status; + + status = run_debug_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + D_NOTICE("run_debug() failed, ret=%d\n", ret); + } +} + +bool run_event_recv(struct tevent_req *req, int *perr, + TALLOC_CTX *mem_ctx, + struct run_event_script_list **script_list) +{ + struct run_event_state *state = tevent_req_data( + req, struct run_event_state); + int ret; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + if (script_list != NULL) { + *script_list = talloc_steal(mem_ctx, state->script_list); + } + return true; +} + diff --git a/ctdb/common/run_event.h b/ctdb/common/run_event.h new file mode 100644 index 0000000..f53bca3 --- /dev/null +++ b/ctdb/common/run_event.h @@ -0,0 +1,150 @@ +/* + Run scripts in a directory with specific event arguments + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_RUN_EVENT_H__ +#define __CTDB_RUN_EVENT_H__ + +#include <talloc.h> +#include <tevent.h> + +#include "common/run_proc.h" + +/** + * @file run_event.h + * + * @brief Run scripts in a directory with specific event arguments. + * + * This abstraction allows one to execute multiple scripts in a directory + * (specified by script_dir) with given event and arguments. + * + * At one time, only one event can be run. Multiple run_event calls + * will cause events to be queued up. They will be run sequentially. + * + * A "monitor" event is special and has special semantics. + * + * If a monitor event is running and another event is scheduled, the + * currently running monitor event is cancelled. + * + * If an event (not monitor) is running and monitor event is scheduled, + * then the monior event will be cancelled immediately. + */ + +/** + * @brief The run process context + */ +struct run_event_context; + +struct run_event_script { + char *name; + struct timeval begin, end; + struct run_proc_result result; + int summary; + char *output; +}; + +struct run_event_script_list { + uint32_t num_scripts; + struct run_event_script *script; + int summary; +}; + + +/** + * @brief Initialize the context for running events + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] script_dir Directory containing script to run + * @param[in] debug_prog Path of a program to run if a script hangs + * @param[out] result New run_event context + * @return 0 on success, errno on error + */ +int run_event_init(TALLOC_CTX *mem_ctx, struct run_proc_context *run_proc_ctx, + const char *script_dir, const char *debug_prog, + struct run_event_context **result); + +/** + * @brief Get a list of scripts + * + * @param[in] run_ctx Run_event context + * @param[in] mem_ctx Talloc memory context + * @param[out] output List of valid scripts + * @return 0 on success, errno on failure + */ +int run_event_list(struct run_event_context *run_ctx, + TALLOC_CTX *mem_ctx, + struct run_event_script_list **output); + +/** + * @brief Enable a script + * + * @param[in] run_ctx Run_event context + * @param[in] script_name Name of the script to enable + * @return 0 on success, errno on failure + */ +int run_event_script_enable(struct run_event_context *run_ctx, + const char *script_name); + +/** + * @brief Disable a script + * + * @param[in] run_ctx Run_event context + * @param[in] script_name Name of the script to disable + * @return 0 on success, errno on failure + */ +int run_event_script_disable(struct run_event_context *run_ctx, + const char *script_name); + +/** + * @brief Async computation start to run an event + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] run_ctx Run_event context + * @param[in] event_str The event argument to the script + * @param[in] arg_str Event arguments to the script + * @param[in] timeout How long to wait for execution + * @param[in] continue_on_failure Whether to continue to run events on failure + * @return new tevent request, or NULL on failure + * + * arg_str contains optional arguments for an event. + */ +struct tevent_req *run_event_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct run_event_context *run_ctx, + const char *event_str, + const char *arg_str, + struct timeval timeout, + bool continue_on_failure); + +/** + * @brief Async computation end to run an event + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @param[in] mem_ctx Talloc memory context + * @param[out] output List of scripts executed and their status + * @return true on success, false on failure + */ +bool run_event_recv(struct tevent_req *req, int *perr, + TALLOC_CTX *mem_ctx, + struct run_event_script_list **output); + +#endif /* __CTDB_RUN_EVENT_H__ */ + diff --git a/ctdb/common/run_proc.c b/ctdb/common/run_proc.c new file mode 100644 index 0000000..84bc343 --- /dev/null +++ b/ctdb/common/run_proc.c @@ -0,0 +1,503 @@ +/* + Run a child process and collect the output + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/wait.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/tevent_unix.h" +#include "lib/util/sys_rw.h" +#include "lib/util/blocking.h" +#include "lib/util/dlinklist.h" + +#include "common/run_proc.h" + +/* + * Process abstraction + */ + +struct run_proc_context; + +struct proc_context { + struct proc_context *prev, *next; + + pid_t pid; + + int fd; + struct tevent_fd *fde; + + char *output; + struct run_proc_result result; + + struct tevent_req *req; +}; + +static int proc_destructor(struct proc_context *proc); + +static struct proc_context *proc_new(TALLOC_CTX *mem_ctx, + struct run_proc_context *run_ctx) +{ + struct proc_context *proc; + + proc = talloc_zero(mem_ctx, struct proc_context); + if (proc == NULL) { + return NULL; + } + + proc->pid = -1; + proc->fd = -1; + + talloc_set_destructor(proc, proc_destructor); + + return proc; +} + +static void run_proc_kill(struct tevent_req *req); + +static int proc_destructor(struct proc_context *proc) +{ + if (proc->req != NULL) { + run_proc_kill(proc->req); + } + + talloc_free(proc->fde); + if (proc->pid != -1) { + kill(-proc->pid, SIGKILL); + } + + return 0; +} + +static void proc_read_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data); + +static int proc_start(struct proc_context *proc, struct tevent_context *ev, + const char *path, const char **argv, int stdin_fd) +{ + int fd[2]; + int ret; + + ret = pipe(fd); + if (ret != 0) { + return ret; + } + + proc->pid = fork(); + if (proc->pid == -1) { + ret = errno; + close(fd[0]); + close(fd[1]); + return ret; + } + + if (proc->pid == 0) { + close(fd[0]); + + ret = dup2(fd[1], STDOUT_FILENO); + if (ret == -1) { + exit(64 + errno); + } + ret = dup2(fd[1], STDERR_FILENO); + if (ret == -1) { + exit(64 + errno); + } + + close(fd[1]); + + if (stdin_fd != -1) { + ret = dup2(stdin_fd, STDIN_FILENO); + if (ret == -1) { + exit(64 + errno); + } + } + + ret = setpgid(0, 0); + if (ret != 0) { + exit(64 + errno); + } + + ret = execv(path, discard_const(argv)); + if (ret != 0) { + exit(64 + errno); + } + + exit(64 + ENOEXEC); + } + + close(fd[1]); + + proc->fd = fd[0]; + proc->fde = tevent_add_fd(ev, proc, fd[0], TEVENT_FD_READ, + proc_read_handler, proc); + if (proc->fde == NULL) { + close(fd[0]); + return ENOMEM; + } + + tevent_fd_set_auto_close(proc->fde); + + return 0; +} + +static void proc_read_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data) +{ + struct proc_context *proc = talloc_get_type_abort( + private_data, struct proc_context); + size_t offset; + ssize_t nread; + int len = 0; + int ret; + + ret = ioctl(proc->fd, FIONREAD, &len); + if (ret != 0) { + goto fail; + } + + if (len == 0) { + /* pipe closed */ + goto close; + } + + offset = (proc->output == NULL) ? 0 : strlen(proc->output); + + proc->output = talloc_realloc(proc, proc->output, char, offset+len+1); + if (proc->output == NULL) { + goto fail; + } + + nread = sys_read(proc->fd, proc->output + offset, len); + if (nread == -1) { + goto fail; + } + proc->output[offset+nread] = '\0'; + return; + +fail: + if (proc->pid != -1) { + kill(-proc->pid, SIGKILL); + proc->pid = -1; + } +close: + TALLOC_FREE(proc->fde); + proc->fd = -1; +} + + +/* + * Run proc abstraction + */ + +struct run_proc_context { + struct tevent_context *ev; + struct tevent_signal *se; + struct proc_context *plist; +}; + +static void run_proc_signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data); +static int run_proc_context_destructor(struct run_proc_context *run_ctx); +static void run_proc_done(struct tevent_req *req); + +int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct run_proc_context **result) +{ + struct run_proc_context *run_ctx; + + run_ctx = talloc_zero(mem_ctx, struct run_proc_context); + if (run_ctx == NULL) { + return ENOMEM; + } + + run_ctx->ev = ev; + run_ctx->se = tevent_add_signal(ev, run_ctx, SIGCHLD, 0, + run_proc_signal_handler, run_ctx); + if (run_ctx->se == NULL) { + talloc_free(run_ctx); + return ENOMEM; + } + + talloc_set_destructor(run_ctx, run_proc_context_destructor); + + *result = run_ctx; + return 0; +} + +static void run_proc_signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data) +{ + struct run_proc_context *run_ctx = talloc_get_type_abort( + private_data, struct run_proc_context); + struct proc_context *proc; + pid_t pid = -1; + int status; + +again: + pid = waitpid(-1, &status, WNOHANG); + if (pid == -1) { + return; + } + + if (pid == 0) { + return; + } + + for (proc = run_ctx->plist; proc != NULL; proc = proc->next) { + if (proc->pid == pid) { + break; + } + } + + if (proc == NULL) { + /* unknown process */ + goto again; + } + + /* Mark the process as terminated */ + proc->pid = -1; + + /* Update process status */ + if (WIFEXITED(status)) { + int pstatus = WEXITSTATUS(status); + if (WIFSIGNALED(status)) { + proc->result.sig = WTERMSIG(status); + } else if (pstatus >= 64 && pstatus < 255) { + proc->result.err = pstatus-64; + } else { + proc->result.status = pstatus; + } + } else if (WIFSIGNALED(status)) { + proc->result.sig = WTERMSIG(status); + } + + /* Confirm that all data has been read from the pipe */ + if (proc->fd != -1) { + proc_read_handler(ev, proc->fde, 0, proc); + TALLOC_FREE(proc->fde); + proc->fd = -1; + } + + DLIST_REMOVE(run_ctx->plist, proc); + + /* Active run_proc request */ + if (proc->req != NULL) { + run_proc_done(proc->req); + } else { + talloc_free(proc); + } + + goto again; +} + +static int run_proc_context_destructor(struct run_proc_context *run_ctx) +{ + struct proc_context *proc; + + /* Get rid of signal handler */ + TALLOC_FREE(run_ctx->se); + + /* Kill any pending processes */ + while ((proc = run_ctx->plist) != NULL) { + DLIST_REMOVE(run_ctx->plist, proc); + talloc_free(proc); + } + + return 0; +} + +struct run_proc_state { + struct tevent_context *ev; + struct run_proc_context *run_ctx; + struct proc_context *proc; + + struct run_proc_result result; + char *output; + pid_t pid; +}; + +static int run_proc_state_destructor(struct run_proc_state *state); +static void run_proc_timedout(struct tevent_req *subreq); + +struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct run_proc_context *run_ctx, + const char *path, const char **argv, + int stdin_fd, struct timeval timeout) +{ + struct tevent_req *req; + struct run_proc_state *state; + struct stat st; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct run_proc_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->run_ctx = run_ctx; + state->pid = -1; + + ret = stat(path, &st); + if (ret != 0) { + state->result.err = errno; + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + if (! (st.st_mode & S_IXUSR)) { + state->result.err = EACCES; + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + state->proc = proc_new(run_ctx, run_ctx); + if (tevent_req_nomem(state->proc, req)) { + return tevent_req_post(req, ev); + } + + state->proc->req = req; + DLIST_ADD(run_ctx->plist, state->proc); + + ret = proc_start(state->proc, ev, path, argv, stdin_fd); + if (ret != 0) { + tevent_req_error(req, ret); + return tevent_req_post(req, ev); + } + + talloc_set_destructor(state, run_proc_state_destructor); + + if (! tevent_timeval_is_zero(&timeout)) { + struct tevent_req *subreq; + + subreq = tevent_wakeup_send(state, ev, timeout); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, run_proc_timedout, req); + } + + return req; +} + +static int run_proc_state_destructor(struct run_proc_state *state) +{ + /* Do not get rid of the child process if timeout has occurred */ + if ((state->proc != NULL) && (state->proc->req != NULL)) { + state->proc->req = NULL; + DLIST_REMOVE(state->run_ctx->plist, state->proc); + TALLOC_FREE(state->proc); + } + + return 0; +} + +static void run_proc_done(struct tevent_req *req) +{ + struct run_proc_state *state = tevent_req_data( + req, struct run_proc_state); + + state->proc->req = NULL; + + state->result = state->proc->result; + if (state->proc->output != NULL) { + state->output = talloc_move(state, &state->proc->output); + } + talloc_steal(state, state->proc); + + tevent_req_done(req); +} + +static void run_proc_kill(struct tevent_req *req) +{ + struct run_proc_state *state = tevent_req_data( + req, struct run_proc_state); + + state->proc->req = NULL; + state->proc = NULL; + + state->result.sig = SIGKILL; + + tevent_req_done(req); +} + +static void run_proc_timedout(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct run_proc_state *state = tevent_req_data( + req, struct run_proc_state); + bool status; + + state->proc->req = NULL; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, EIO); + return; + } + + state->result.err = ETIMEDOUT; + if (state->proc->output != NULL) { + state->output = talloc_move(state, &state->proc->output); + } + state->pid = state->proc->pid; + + tevent_req_done(req); +} + +bool run_proc_recv(struct tevent_req *req, int *perr, + struct run_proc_result *result, pid_t *pid, + TALLOC_CTX *mem_ctx, char **output) +{ + struct run_proc_state *state = tevent_req_data( + req, struct run_proc_state); + int ret; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + if (result != NULL) { + *result = state->result; + } + + if (pid != NULL) { + *pid = state->pid; + } + + if (output != NULL) { + *output = talloc_move(mem_ctx, &state->output); + } + + return true; +} diff --git a/ctdb/common/run_proc.h b/ctdb/common/run_proc.h new file mode 100644 index 0000000..7b06dad --- /dev/null +++ b/ctdb/common/run_proc.h @@ -0,0 +1,100 @@ +/* + Run a child process and collect the output + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_RUN_PROC_H__ +#define __CTDB_RUN_PROC_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file run_proc.h + * + * @brief Run a process and capture the output + * + * This abstraction allows one to execute scripts with argumunts. + */ + +/** + * @brief The run process context + */ +struct run_proc_context; + +/** + * @brief The exit status structure + * + * If the process is terminated due to a signal, sig is set. + * If the process is terminated due to an error, err is set. + * If the process terminates normally, status is set. + */ +struct run_proc_result { + int sig; + int err; + int status; +}; + +/** + * @brief Initialize the context for running processes + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[out] result New run_proc context + * @return 0 on success, errno on error + */ +int run_proc_init(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct run_proc_context **result); + +/** + * @brief Async computation start to run an executable + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] run_ctx Run_proc context + * @param[in] prog The path to the executable + * @param[in] argv Arguments to the executable + * @param[in] stdin_fd Assign stdin_fd as stdin for the process, -1 if not + * @param[in] timeout How long to wait for execution + * @return new tevent request, or NULL on failure + * + * argv must include program name as argv[0] and must be null terminated. + */ +struct tevent_req *run_proc_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct run_proc_context *run_ctx, + const char *prog, const char **argv, + int stdin_fd, struct timeval timeout); + +/** + * @brief Async computation end to run an executable + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @param[out] result The exit status of the executable + * @param[out] pid The pid of the child process (still running) + * @param[in] mem_ctx Talloc memory context + * @param[out] output The output from the executable (stdio + stderr) + * @return true on success, false on failure + * + * The returned pid is -1 if the process has terminated. + */ +bool run_proc_recv(struct tevent_req *req, int *perr, + struct run_proc_result *result, pid_t *pid, + TALLOC_CTX *mem_ctx, char **output); + +#endif /* __CTDB_RUN_PROC_H__ */ diff --git a/ctdb/common/sock_client.c b/ctdb/common/sock_client.c new file mode 100644 index 0000000..75f471f --- /dev/null +++ b/ctdb/common/sock_client.c @@ -0,0 +1,334 @@ +/* + A client based on unix domain socket + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/network.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/debug.h" +#include "lib/util/time.h" +#include "lib/util/tevent_unix.h" + +#include "common/logging.h" +#include "common/reqid.h" +#include "common/comm.h" +#include "common/sock_client.h" + +struct sock_client_context { + struct sock_client_proto_funcs *funcs; + void *private_data; + + void (*disconnect_callback)(void *private_data); + void *disconnect_data; + + int fd; + struct comm_context *comm; + struct reqid_context *idr; +}; + +/* + * connect to a unix domain socket + */ + +static int socket_connect(const char *sockpath) +{ + struct sockaddr_un addr; + size_t len; + int fd, ret; + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + + len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path)); + if (len >= sizeof(addr.sun_path)) { + D_ERR("socket path too long: %s\n", sockpath); + return -1; + } + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd == -1) { + D_ERR("socket create failed - %s\n", sockpath); + return -1; + } + + ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret != 0) { + D_ERR("socket connect failed - %s\n", sockpath); + close(fd); + return -1; + } + + return fd; +} + +/* + * Socket client + */ + +static int sock_client_context_destructor(struct sock_client_context *sockc); +static void sock_client_read_handler(uint8_t *buf, size_t buflen, + void *private_data); +static void sock_client_dead_handler(void *private_data); + +static void sock_client_msg_reply(struct sock_client_context *sockc, + uint8_t *buf, size_t buflen); + +int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + const char *sockpath, + struct sock_client_proto_funcs *funcs, + void *private_data, + struct sock_client_context **result) +{ + struct sock_client_context *sockc; + int ret; + + if (sockpath == NULL) { + return EINVAL; + } + + if (funcs == NULL || funcs->request_push == NULL || + funcs->reply_pull == NULL || funcs->reply_reqid == NULL) { + return EINVAL; + } + + sockc = talloc_zero(mem_ctx, struct sock_client_context); + if (sockc == NULL) { + return ENOMEM; + } + + sockc->funcs = funcs; + sockc->private_data = private_data; + + sockc->fd = socket_connect(sockpath); + if (sockc->fd == -1) { + talloc_free(sockc); + return EIO; + } + + ret = comm_setup(sockc, ev, sockc->fd, + sock_client_read_handler, sockc, + sock_client_dead_handler, sockc, + &sockc->comm); + if (ret != 0) { + D_ERR("comm_setup() failed, ret=%d\n", ret); + close(sockc->fd); + talloc_free(sockc); + return ret; + } + + ret = reqid_init(sockc, INT_MAX-200, &sockc->idr); + if (ret != 0) { + D_ERR("reqid_init() failed, ret=%d\n", ret); + close(sockc->fd); + talloc_free(sockc); + return ret; + } + + talloc_set_destructor(sockc, sock_client_context_destructor); + + *result = sockc; + return 0; +} + +static int sock_client_context_destructor(struct sock_client_context *sockc) +{ + TALLOC_FREE(sockc->comm); + if (sockc->fd != -1) { + close(sockc->fd); + sockc->fd = -1; + } + return 0; +} + + +static void sock_client_read_handler(uint8_t *buf, size_t buflen, + void *private_data) +{ + struct sock_client_context *sockc = talloc_get_type_abort( + private_data, struct sock_client_context); + + sock_client_msg_reply(sockc, buf, buflen); +} + +static void sock_client_dead_handler(void *private_data) +{ + struct sock_client_context *sockc = talloc_get_type_abort( + private_data, struct sock_client_context); + + if (sockc->disconnect_callback != NULL) { + sockc->disconnect_callback(sockc->disconnect_data); + talloc_free(sockc); + return; + } + + D_NOTICE("connection to daemon closed, exiting\n"); + exit(1); +} + +void sock_client_set_disconnect_callback(struct sock_client_context *sockc, + sock_client_callback_func_t callback, + void *private_data) +{ + sockc->disconnect_callback = callback; + sockc->disconnect_data = private_data; +} + + +struct sock_client_msg_state { + struct sock_client_context *sockc; + uint32_t reqid; + struct tevent_req *req; + void *reply; +}; + +static int sock_client_msg_state_destructor( + struct sock_client_msg_state *state); +static void sock_client_msg_done(struct tevent_req *subreq); + +struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_client_context *sockc, + struct timeval timeout, + void *request) +{ + struct tevent_req *req, *subreq; + struct sock_client_msg_state *state; + uint8_t *buf; + size_t buflen; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct sock_client_msg_state); + if (req == NULL) { + return NULL; + } + + state->sockc = sockc; + + state->reqid = reqid_new(sockc->idr, state); + if (state->reqid == REQID_INVALID) { + talloc_free(req); + return NULL; + } + + state->req = req; + + talloc_set_destructor(state, sock_client_msg_state_destructor); + + ret = sockc->funcs->request_push(request, state->reqid, state, + &buf, &buflen, sockc->private_data); + if (ret != 0) { + tevent_req_error(req, ret); + return tevent_req_post(req, ev); + } + + subreq = comm_write_send(state, ev, sockc->comm, buf, buflen); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, sock_client_msg_done, req); + + if (! timeval_is_zero(&timeout)) { + if (!tevent_req_set_endtime(req, ev, timeout)) { + return tevent_req_post(req, ev); + } + } + + return req; +} + +static int sock_client_msg_state_destructor( + struct sock_client_msg_state *state) +{ + reqid_remove(state->sockc->idr, state->reqid); + return 0; +} + +static void sock_client_msg_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + int ret; + bool status; + + status = comm_write_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, ret); + return; + } + + /* wait for the reply or timeout */ +} + +static void sock_client_msg_reply(struct sock_client_context *sockc, + uint8_t *buf, size_t buflen) +{ + struct sock_client_msg_state *state; + uint32_t reqid; + int ret; + + ret = sockc->funcs->reply_reqid(buf, buflen, &reqid, + sockc->private_data); + if (ret != 0) { + D_WARNING("Invalid packet received, ret=%d\n", ret); + return; + } + + state = reqid_find(sockc->idr, reqid, struct sock_client_msg_state); + if (state == NULL) { + return; + } + + if (reqid != state->reqid) { + return; + } + + ret = sockc->funcs->reply_pull(buf, buflen, state, &state->reply, + sockc->private_data); + if (ret != 0) { + tevent_req_error(state->req, ret); + return; + } + + tevent_req_done(state->req); +} + +bool sock_client_msg_recv(struct tevent_req *req, int *perr, + TALLOC_CTX *mem_ctx, void *reply) +{ + struct sock_client_msg_state *state = tevent_req_data( + req, struct sock_client_msg_state); + int ret; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + if (reply != NULL) { + *(void **)reply = talloc_steal(mem_ctx, state->reply); + } + + return true; +} diff --git a/ctdb/common/sock_client.h b/ctdb/common/sock_client.h new file mode 100644 index 0000000..d661c85 --- /dev/null +++ b/ctdb/common/sock_client.h @@ -0,0 +1,129 @@ +/* + A client based on unix domain socket + + Copyright (C) Amitay Isaacs 2017 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SOCK_CLIENT_H__ +#define __CTDB_SOCK_CLIENT_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file sock_client.h + * + * @brief A framework for a client based on unix-domain sockets. + * + * This abstraction allows one to build clients that communicate using + * unix-domain sockets. It takes care of the common boilerplate. + */ + +/** + * @brief The abstract socket daemon context + */ +struct sock_client_context; + +/** + * @brief callback function + * + * This function can be registered to be called in case daemon goes away. + */ +typedef void (*sock_client_callback_func_t)(void *private_data); + +/** + * @brief Protocol marshalling functions + * + * The typical protocol packet will have a header and a payload. + * Header will contain at least 2 fields: length and reqid + * + * request_push() is called when the request packet needs to be marshalled + * + * reply_pull() is called to unmarshall data into a reply packet + * + * reply_reqid() is called to extract request id from a reply packet + */ +struct sock_client_proto_funcs { + int (*request_push)(void *request, uint32_t reqid, + TALLOC_CTX *mem_ctx, + uint8_t **buf, size_t *buflen, + void *private_data); + + int (*reply_pull)(uint8_t *buf, size_t buflen, + TALLOC_CTX *mem_ctx, void **reply, + void *private_data); + + int (*reply_reqid)(uint8_t *buf, size_t buflen, + uint32_t *reqid, void *private_data); +}; + +/** + * @brief Create a new socket client + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] sockpath Unix domain socket path + * @param[in] funcs Protocol marshalling functions + * @param[in] private_data Private data for protocol functions + * @param[out] result New socket client context + * @return 0 on success, errno on failure + */ +int sock_client_setup(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + const char *sockpath, + struct sock_client_proto_funcs *funcs, + void *private_data, + struct sock_client_context **result); + +/** + * @brief Register a callback in case of client disconnection + * + * @param[in] sockc Socket client context + * @param[in] callback Callback function + * @param[in] private_data Private data for callback function + */ +void sock_client_set_disconnect_callback(struct sock_client_context *sockc, + sock_client_callback_func_t callback, + void *private_data); + +/** + * @brief Async computation to send data to the daemon + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] sockc The socket client context + * @param[in] timeout How long to wait for + * @param[in] request Requeset packet to be sent + * @return new tevent request, or NULL on failure + */ +struct tevent_req *sock_client_msg_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_client_context *sockc, + struct timeval timeout, + void *request); + +/** + * @brief Async computation end to send data to the daemon + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @param[in] mem_ctx Talloc memory context + * @param[out] reply Reply received from server + * @return true on success, false on failure + */ +bool sock_client_msg_recv(struct tevent_req *req, int *perr, + TALLOC_CTX *mem_ctx, void *reply); + +#endif /* __CTDB_SOCK_CLIENT_H__ */ diff --git a/ctdb/common/sock_daemon.c b/ctdb/common/sock_daemon.c new file mode 100644 index 0000000..e31a364 --- /dev/null +++ b/ctdb/common/sock_daemon.c @@ -0,0 +1,1100 @@ +/* + A server based on unix domain socket + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/network.h" +#include "system/wait.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/async_req/async_sock.h" +#include "lib/util/debug.h" +#include "lib/util/blocking.h" +#include "lib/util/dlinklist.h" +#include "lib/util/tevent_unix.h" +#include "lib/util/become_daemon.h" +#include "lib/util/sys_rw.h" + +#include "common/logging.h" +#include "common/reqid.h" +#include "common/comm.h" +#include "common/pidfile.h" +#include "common/system.h" +#include "common/sock_daemon.h" + +struct sock_socket { + struct sock_socket *prev, *next; + + const char *sockpath; + struct sock_socket_funcs *funcs; + void *private_data; + + int fd; + struct tevent_req *req; +}; + +struct sock_client { + struct sock_client *prev, *next; + + struct tevent_req *req; + struct sock_client_context *client_ctx; +}; + +struct sock_client_context { + struct tevent_context *ev; + struct sock_socket *sock; + int fd; + struct comm_context *comm; + + struct sock_client *client; +}; + +struct sock_daemon_context { + struct sock_daemon_funcs *funcs; + void *private_data; + + struct pidfile_context *pid_ctx; + struct sock_socket *socket_list; + int startup_fd; +}; + +/* + * Process a single client + */ + +static void sock_client_read_handler(uint8_t *buf, size_t buflen, + void *private_data); +static void sock_client_read_done(struct tevent_req *subreq); +static void sock_client_dead_handler(void *private_data); +static int sock_client_context_destructor( + struct sock_client_context *client_ctx); + +static int sock_client_context_init(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_socket *sock, + int client_fd, + struct sock_client *client, + struct sock_client_context **result) +{ + struct sock_client_context *client_ctx; + int ret; + + client_ctx = talloc_zero(mem_ctx, struct sock_client_context); + if (client_ctx == NULL) { + return ENOMEM; + } + + client_ctx->ev = ev; + client_ctx->sock = sock; + client_ctx->fd = client_fd; + client_ctx->client = client; + + ret = comm_setup(client_ctx, ev, client_fd, + sock_client_read_handler, client_ctx, + sock_client_dead_handler, client_ctx, + &client_ctx->comm); + if (ret != 0) { + talloc_free(client_ctx); + return ret; + } + + if (sock->funcs->connect != NULL) { + pid_t pid; + bool status; + + (void) ctdb_get_peer_pid(client_fd, &pid); + + status = sock->funcs->connect(client_ctx, + pid, + sock->private_data); + if (! status) { + talloc_free(client_ctx); + close(client_fd); + return 0; + } + } + + talloc_set_destructor(client_ctx, sock_client_context_destructor); + + *result = client_ctx; + return 0; +} + +static void sock_client_read_handler(uint8_t *buf, size_t buflen, + void *private_data) +{ + struct sock_client_context *client_ctx = talloc_get_type_abort( + private_data, struct sock_client_context); + struct sock_socket *sock = client_ctx->sock; + struct tevent_req *subreq; + + subreq = sock->funcs->read_send(client_ctx, client_ctx->ev, + client_ctx, buf, buflen, + sock->private_data); + if (subreq == NULL) { + talloc_free(client_ctx); + return; + } + tevent_req_set_callback(subreq, sock_client_read_done, client_ctx); +} + +static void sock_client_read_done(struct tevent_req *subreq) +{ + struct sock_client_context *client_ctx = tevent_req_callback_data( + subreq, struct sock_client_context); + struct sock_socket *sock = client_ctx->sock; + int ret; + bool status; + + status = sock->funcs->read_recv(subreq, &ret); + if (! status) { + D_ERR("client read failed with ret=%d\n", ret); + talloc_free(client_ctx); + } +} + +static void sock_client_dead_handler(void *private_data) +{ + struct sock_client_context *client_ctx = talloc_get_type_abort( + private_data, struct sock_client_context); + struct sock_socket *sock = client_ctx->sock; + + if (sock->funcs->disconnect != NULL) { + sock->funcs->disconnect(client_ctx, sock->private_data); + } + + talloc_free(client_ctx); +} + +static int sock_client_context_destructor( + struct sock_client_context *client_ctx) +{ + TALLOC_FREE(client_ctx->client); + TALLOC_FREE(client_ctx->comm); + if (client_ctx->fd != -1) { + close(client_ctx->fd); + client_ctx->fd = -1; + } + + return 0; +} + +/* + * Process a single listening socket + */ + +static int socket_setup(const char *sockpath, bool remove_before_use) +{ + struct sockaddr_un addr; + size_t len; + int ret, fd; + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + + len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path)); + if (len >= sizeof(addr.sun_path)) { + D_ERR("socket path too long: %s\n", sockpath); + return -1; + } + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd == -1) { + D_ERR("socket create failed - %s\n", sockpath); + return -1; + } + + ret = set_blocking(fd, false); + if (ret != 0) { + D_ERR("socket set nonblocking failed - %s\n", sockpath); + close(fd); + return -1; + } + + if (remove_before_use) { + unlink(sockpath); + } + + ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret != 0) { + D_ERR("socket bind failed - %s\n", sockpath); + close(fd); + return -1; + } + + ret = listen(fd, 10); + if (ret != 0) { + D_ERR("socket listen failed - %s\n", sockpath); + close(fd); + return -1; + } + + D_NOTICE("listening on %s\n", sockpath); + + return fd; +} + +static int sock_socket_destructor(struct sock_socket *sock); + +static int sock_socket_init(TALLOC_CTX *mem_ctx, const char *sockpath, + struct sock_socket_funcs *funcs, + void *private_data, + struct sock_socket **result) +{ + struct sock_socket *sock; + + if (funcs == NULL) { + return EINVAL; + } + if (funcs->read_send == NULL || funcs->read_recv == NULL) { + return EINVAL; + } + + sock = talloc_zero(mem_ctx, struct sock_socket); + if (sock == NULL) { + return ENOMEM; + } + + sock->sockpath = talloc_strdup(sock, sockpath); + if (sock->sockpath == NULL) { + talloc_free(sock); + return ENOMEM; + } + sock->funcs = funcs; + sock->private_data = private_data; + sock->fd = -1; + + talloc_set_destructor(sock, sock_socket_destructor); + + *result = sock; + return 0; +} + +static int sock_socket_destructor(struct sock_socket *sock) +{ + TALLOC_FREE(sock->req); + + if (sock->fd != -1) { + close(sock->fd); + sock->fd = -1; + } + + unlink(sock->sockpath); + return 0; +} + + +struct sock_socket_start_state { + struct tevent_context *ev; + struct sock_socket *sock; + + struct sock_client *client_list; +}; + +static int sock_socket_start_state_destructor( + struct sock_socket_start_state *state); +static void sock_socket_start_new_client(struct tevent_req *subreq); +static int sock_socket_start_client_destructor(struct sock_client *client); + +static struct tevent_req *sock_socket_start_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_socket *sock, + bool remove_before_use) +{ + struct tevent_req *req, *subreq; + struct sock_socket_start_state *state; + + req = tevent_req_create(mem_ctx, &state, + struct sock_socket_start_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->sock = sock; + + sock->fd = socket_setup(sock->sockpath, remove_before_use); + if (sock->fd == -1) { + tevent_req_error(req, EIO); + return tevent_req_post(req, ev); + } + + talloc_set_destructor(state, sock_socket_start_state_destructor); + + subreq = accept_send(state, ev, sock->fd); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, sock_socket_start_new_client, req); + + sock->req = req; + + return req; +} + +static int sock_socket_start_state_destructor( + struct sock_socket_start_state *state) +{ + struct sock_client *client; + + while ((client = state->client_list) != NULL) { + talloc_free(client); + } + + return 0; +} + +static void sock_socket_start_new_client(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_socket_start_state *state = tevent_req_data( + req, struct sock_socket_start_state); + struct sock_client *client; + int client_fd, ret; + + client_fd = accept_recv(subreq, NULL, NULL, &ret); + TALLOC_FREE(subreq); + if (client_fd == -1) { + D_ERR("failed to accept new connection\n"); + } + + subreq = accept_send(state, state->ev, state->sock->fd); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, sock_socket_start_new_client, req); + + if (client_fd == -1) { + return; + } + + client = talloc_zero(state, struct sock_client); + if (tevent_req_nomem(client, req)) { + close(client_fd); + return; + } + + client->req = req; + + ret = sock_client_context_init(client, state->ev, state->sock, + client_fd, client, &client->client_ctx); + if (ret != 0) { + talloc_free(client); + return; + } + + talloc_set_destructor(client, sock_socket_start_client_destructor); + DLIST_ADD(state->client_list, client); +} + +static int sock_socket_start_client_destructor(struct sock_client *client) +{ + struct sock_socket_start_state *state = tevent_req_data( + client->req, struct sock_socket_start_state); + + DLIST_REMOVE(state->client_list, client); + TALLOC_FREE(client->client_ctx); + + return 0; +} + +static bool sock_socket_start_recv(struct tevent_req *req, int *perr, + TALLOC_CTX *mem_ctx, const char **sockpath) +{ + struct sock_socket_start_state *state = tevent_req_data( + req, struct sock_socket_start_state); + int ret; + + state->sock->req = NULL; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + if (sockpath != NULL) { + *sockpath = talloc_steal(mem_ctx, state->sock->sockpath); + } + + return true; +} + +/* + * Send message to a client + */ + +struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_client_context *client_ctx, + uint8_t *buf, size_t buflen) +{ + struct tevent_req *req; + + req = comm_write_send(mem_ctx, ev, client_ctx->comm, buf, buflen); + + return req; +} + +bool sock_socket_write_recv(struct tevent_req *req, int *perr) +{ + int ret; + bool status; + + status = comm_write_recv(req, &ret); + if (! status) { + if (perr != NULL) { + *perr = ret; + } + } + + return status; +} + +/* + * Socket daemon + */ + +int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name, + const char *logging, const char *debug_level, + struct sock_daemon_funcs *funcs, + void *private_data, + struct sock_daemon_context **out) +{ + struct sock_daemon_context *sockd; + int ret; + + sockd = talloc_zero(mem_ctx, struct sock_daemon_context); + if (sockd == NULL) { + return ENOMEM; + } + + sockd->funcs = funcs; + sockd->private_data = private_data; + sockd->startup_fd = -1; + + ret = logging_init(sockd, logging, debug_level, daemon_name); + if (ret != 0) { + fprintf(stderr, + "Failed to initialize logging, logging=%s, debug=%s\n", + logging, debug_level); + return ret; + } + + *out = sockd; + return 0; +} + +int sock_daemon_add_unix(struct sock_daemon_context *sockd, + const char *sockpath, + struct sock_socket_funcs *funcs, + void *private_data) +{ + struct sock_socket *sock; + int ret; + + ret = sock_socket_init(sockd, sockpath, funcs, private_data, &sock); + if (ret != 0) { + return ret; + } + + + DLIST_ADD(sockd->socket_list, sock); + return 0; +} + +bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd) +{ + if (! set_close_on_exec(fd)) { + D_ERR("Failed to set close-on-exec on startup fd\n"); + return false; + } + + sockd->startup_fd = fd; + return true; +} + +/* + * Run socket daemon + */ + +struct sock_daemon_run_state { + struct tevent_context *ev; + struct sock_daemon_context *sockd; + pid_t pid_watch; + + int fd; + int exit_code; +}; + +static void sock_daemon_run_started(struct tevent_req *subreq); +static void sock_daemon_run_startup_done(struct tevent_req *subreq); +static void sock_daemon_run_signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data); +static void sock_daemon_run_reconfigure(struct tevent_req *req); +static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq); +static void sock_daemon_run_reopen_logs(struct tevent_req *req); +static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq); +static void sock_daemon_run_shutdown(struct tevent_req *req); +static void sock_daemon_run_shutdown_done(struct tevent_req *subreq); +static void sock_daemon_run_exit(struct tevent_req *req); +static bool sock_daemon_run_socket_listen(struct tevent_req *req); +static void sock_daemon_run_socket_fail(struct tevent_req *subreq); +static void sock_daemon_run_watch_pid(struct tevent_req *subreq); +static void sock_daemon_run_wait(struct tevent_req *req); +static void sock_daemon_run_wait_done(struct tevent_req *subreq); +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd); + +struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_daemon_context *sockd, + const char *pidfile, + bool do_fork, bool create_session, + pid_t pid_watch) +{ + struct tevent_req *req, *subreq; + struct sock_daemon_run_state *state; + struct tevent_signal *se; + + req = tevent_req_create(mem_ctx, &state, + struct sock_daemon_run_state); + if (req == NULL) { + return NULL; + } + + become_daemon(do_fork, !create_session, false); + + if (pidfile != NULL) { + int ret = pidfile_context_create(sockd, pidfile, + &sockd->pid_ctx); + if (ret != 0) { + tevent_req_error(req, EEXIST); + return tevent_req_post(req, ev); + } + } + + state->ev = ev; + state->sockd = sockd; + state->pid_watch = pid_watch; + state->fd = -1; + + subreq = tevent_wakeup_send(state, ev, + tevent_timeval_current_ofs(0, 0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, sock_daemon_run_started, req); + + se = tevent_add_signal(ev, state, SIGHUP, 0, + sock_daemon_run_signal_handler, req); + if (tevent_req_nomem(se, req)) { + return tevent_req_post(req, ev); + } + + se = tevent_add_signal(ev, state, SIGUSR1, 0, + sock_daemon_run_signal_handler, req); + if (tevent_req_nomem(se, req)) { + return tevent_req_post(req, ev); + } + + se = tevent_add_signal(ev, state, SIGINT, 0, + sock_daemon_run_signal_handler, req); + if (tevent_req_nomem(se, req)) { + return tevent_req_post(req, ev); + } + + se = tevent_add_signal(ev, state, SIGTERM, 0, + sock_daemon_run_signal_handler, req); + if (tevent_req_nomem(se, req)) { + return tevent_req_post(req, ev); + } + + if (pid_watch > 1) { + subreq = tevent_wakeup_send(state, ev, + tevent_timeval_current_ofs(1,0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, sock_daemon_run_watch_pid, + req); + } + + return req; +} + +static void sock_daemon_run_started(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, EIO); + return; + } + + D_NOTICE("daemon started, pid=%u\n", getpid()); + + if (sockd->funcs != NULL && sockd->funcs->startup_send != NULL && + sockd->funcs->startup_recv != NULL) { + subreq = sockd->funcs->startup_send(state, state->ev, + sockd->private_data); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, sock_daemon_run_startup_done, + req); + return; + } + + if (sockd->funcs != NULL && sockd->funcs->startup != NULL) { + int ret; + + ret = sockd->funcs->startup(sockd->private_data); + if (ret != 0) { + D_ERR("startup failed, ret=%d\n", ret); + tevent_req_error(req, EIO); + return; + } + + D_NOTICE("startup completed successfully\n"); + } + + status = sock_daemon_run_socket_listen(req); + if (! status) { + return; + } + sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); +} + +static void sock_daemon_run_startup_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + int ret; + bool status; + + status = sockd->funcs->startup_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + D_ERR("startup failed, ret=%d\n", ret); + tevent_req_error(req, EIO); + return; + } + + D_NOTICE("startup completed successfully\n"); + + status = sock_daemon_run_socket_listen(req); + if (! status) { + return; + } + sock_daemon_run_wait(req); + + sock_daemon_startup_notify(sockd); +} + +static void sock_daemon_run_signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort( + private_data, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + + D_NOTICE("Received signal %d\n", signum); + + if (signum == SIGUSR1) { + sock_daemon_run_reconfigure(req); + return; + } + + if (signum == SIGHUP) { + sock_daemon_run_reopen_logs(req); + return; + } + + if (signum == SIGINT || signum == SIGTERM) { + state->exit_code = EINTR; + sock_daemon_run_shutdown(req); + } +} + +static void sock_daemon_run_reconfigure(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + + if (sockd->funcs != NULL && sockd->funcs->reconfigure_send != NULL && + sockd->funcs->reconfigure_recv != NULL) { + subreq = sockd->funcs->reconfigure_send(state, state->ev, + sockd->private_data); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, + sock_daemon_run_reconfigure_done, req); + return; + } + + if (sockd->funcs != NULL && sockd->funcs->reconfigure != NULL) { + int ret; + + ret = sockd->funcs->reconfigure(sockd->private_data); + if (ret != 0) { + D_ERR("reconfigure failed, ret=%d\n", ret); + return; + } + + D_NOTICE("reconfigure completed successfully\n"); + } +} + +static void sock_daemon_run_reconfigure_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + int ret; + bool status; + + status = sockd->funcs->reconfigure_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + D_ERR("reconfigure failed, ret=%d\n", ret); + return; + } + + D_NOTICE("reconfigure completed successfully\n"); +} + +static void sock_daemon_run_reopen_logs(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + + if (sockd->funcs != NULL && sockd->funcs->reopen_logs_send != NULL && + sockd->funcs->reopen_logs_recv != NULL) { + subreq = sockd->funcs->reopen_logs_send(state, state->ev, + sockd->private_data); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, + sock_daemon_run_reopen_logs_done, req); + return; + } + + if (sockd->funcs != NULL && sockd->funcs->reopen_logs != NULL) { + int ret; + + ret = sockd->funcs->reopen_logs(sockd->private_data); + if (ret != 0) { + D_ERR("reopen logs, ret=%d\n", ret); + return; + } + + D_NOTICE("reopen logs completed successfully\n"); + } +} + +static void sock_daemon_run_reopen_logs_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + int ret; + bool status; + + status = sockd->funcs->reopen_logs_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + D_ERR("reopen logs failed, ret=%d\n", ret); + return; + } + + D_NOTICE("reopen logs completed successfully\n"); +} + +static void sock_daemon_run_shutdown(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + struct sock_socket *sock; + + D_NOTICE("Shutting down\n"); + + while ((sock = sockd->socket_list) != NULL) { + DLIST_REMOVE(sockd->socket_list, sock); + TALLOC_FREE(sock); + } + + if (sockd->funcs != NULL && sockd->funcs->shutdown_send != NULL && + sockd->funcs->shutdown_recv != NULL) { + subreq = sockd->funcs->shutdown_send(state, state->ev, + sockd->private_data); + if (subreq == NULL) { + sock_daemon_run_exit(req); + return; + } + tevent_req_set_callback(subreq, sock_daemon_run_shutdown_done, + req); + return; + } + + if (sockd->funcs != NULL && sockd->funcs->shutdown != NULL) { + sockd->funcs->shutdown(sockd->private_data); + } + + sock_daemon_run_exit(req); +} + +static void sock_daemon_run_shutdown_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + + sockd->funcs->shutdown_recv(subreq); + TALLOC_FREE(subreq); + + sock_daemon_run_exit(req); +} + +static void sock_daemon_run_exit(struct tevent_req *req) +{ + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + + TALLOC_FREE(sockd->pid_ctx); + + if (state->exit_code == 0) { + tevent_req_done(req); + } else { + tevent_req_error(req, state->exit_code); + } +} + +static bool sock_daemon_run_socket_listen(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + struct sock_socket *sock; + bool remove_before_use = false; + + if (sockd->pid_ctx != NULL) { + remove_before_use = true; + } + for (sock = sockd->socket_list; sock != NULL; sock = sock->next) { + subreq = sock_socket_start_send(state, state->ev, sock, + remove_before_use); + if (tevent_req_nomem(subreq, req)) { + return false; + } + tevent_req_set_callback(subreq, sock_daemon_run_socket_fail, + req); + } + + return true; +} + +static void sock_daemon_run_socket_fail(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + const char *sockpath = "INVALID"; + int ret = 0; + bool status; + + status = sock_socket_start_recv(subreq, &ret, state, &sockpath); + TALLOC_FREE(subreq); + if (! status) { + D_ERR("socket %s closed unexpectedly\n", sockpath); + state->exit_code = ret; + } else { + state->exit_code = 0; + } + + sock_daemon_run_shutdown(req); +} + +static void sock_daemon_run_watch_pid(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + int ret; + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, EIO); + return; + } + + ret = kill(state->pid_watch, 0); + if (ret == -1) { + if (errno == ESRCH) { + D_ERR("PID %d gone away, exiting\n", state->pid_watch); + state->exit_code = ESRCH; + sock_daemon_run_shutdown(req); + return; + } else { + D_ERR("Failed to check PID status %d, ret=%d\n", + state->pid_watch, errno); + } + } + + subreq = tevent_wakeup_send(state, state->ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, sock_daemon_run_watch_pid, req); +} + +static void sock_daemon_run_wait(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + + if (sockd->funcs != NULL && sockd->funcs->wait_send != NULL && + sockd->funcs->wait_recv != NULL) { + subreq = sockd->funcs->wait_send(state, state->ev, + sockd->private_data); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, sock_daemon_run_wait_done, + req); + } +} + +static void sock_daemon_run_wait_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct sock_daemon_run_state *state = tevent_req_data( + req, struct sock_daemon_run_state); + struct sock_daemon_context *sockd = state->sockd; + int ret = 0; + bool status; + + status = sockd->funcs->wait_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + state->exit_code = ret; + } else { + state->exit_code = 0; + } + + sock_daemon_run_shutdown(req); +} + +static void sock_daemon_startup_notify(struct sock_daemon_context *sockd) +{ + if (sockd->startup_fd != -1) { + unsigned int zero = 0; + ssize_t num; + + num = sys_write(sockd->startup_fd, &zero, sizeof(zero)); + if (num != sizeof(zero)) { + D_WARNING("Failed to write zero to pipe FD\n"); + } + } +} + +bool sock_daemon_run_recv(struct tevent_req *req, int *perr) +{ + int ret; + + if (tevent_req_is_unix_error(req, &ret)) { + if (perr != NULL) { + *perr = ret; + } + return false; + } + + return true; +} + +int sock_daemon_run(struct tevent_context *ev, + struct sock_daemon_context *sockd, + const char *pidfile, + bool do_fork, bool create_session, + pid_t pid_watch) +{ + struct tevent_req *req; + int ret; + bool status; + + req = sock_daemon_run_send(ev, ev, sockd, + pidfile, do_fork, create_session, pid_watch); + if (req == NULL) { + return ENOMEM; + } + + tevent_req_poll(req, ev); + + status = sock_daemon_run_recv(req, &ret); + TALLOC_FREE(req); + if (! status) { + return ret; + } + + return 0; +} diff --git a/ctdb/common/sock_daemon.h b/ctdb/common/sock_daemon.h new file mode 100644 index 0000000..7653d2c --- /dev/null +++ b/ctdb/common/sock_daemon.h @@ -0,0 +1,283 @@ +/* + A server based on unix domain socket + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SOCK_DAEMON_H__ +#define __CTDB_SOCK_DAEMON_H__ + +#include <talloc.h> +#include <tevent.h> + +#include "common/logging.h" + +/** + * @file sock_daemon.h + * + * @brief A framework for a server based on unix-domain sockets. + * + * This abstraction allows one to build simple servers that communicate using + * unix-domain sockets. It takes care of the common boilerplate. + */ + +/** + * @brief The abstract socket daemon context + */ +struct sock_daemon_context; + +/** + * @brief The abstract socket client context + */ +struct sock_client_context; + +/** + * @brief The callback routines called during daemon life cycle + * + * startup() is called when the daemon starts running + * either via sock_daemon_run() or via sock_daemon_run_send() + * startup() should return 0 for success, non-zero value on failure + * On failure, sock_daemon_run() will return error. + * + * startup_send()/startup_recv() is the async version of startup() + * + * reconfigure() is called when the daemon receives SIGUSR1 + * reconfigure() should return 0 for success, non-zero value on failure + * On failure, sock_daemon_run() will continue to run. + * + * reconfigure_send()/reconfigure_recv() is the async version of reconfigure() + * + * reopen_logs() is called when the daemon receives SIGHUP + * reopen_logs() should return 0 for success, non-zero value on failure + * On failure, sock_daemon_run() will continue to run. + * + * reopen_logs_send()/reopen_logs_recv() is the async version of reopen_logs() + * + * shutdown() is called when process receives SIGINT or SIGTERM or + * when wait computation has finished + * + * shutdown_send()/shutdown_recv() is the async version of shutdown() + * + * Please note that only one (sync or async) version of these functions + * will be called. If both versions are defined, then only async function + * will be called. + * + * wait_send() starts the async computation to keep running the daemon + * wait_recv() ends the async computation to keep running the daemon + * + * If wait_send()/wait_recv() is NULL, then daemon will keep running forever. + * If wait_send() returns req, then when req is over, daemon will shutdown. + */ +struct sock_daemon_funcs { + int (*startup)(void *private_data); + + struct tevent_req * (*startup_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + void *private_data); + bool (*startup_recv)(struct tevent_req *req, int *perr); + + int (*reconfigure)(void *private_data); + + struct tevent_req * (*reconfigure_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + void *private_data); + bool (*reconfigure_recv)(struct tevent_req *req, int *perr); + + int (*reopen_logs)(void *private_data); + + struct tevent_req * (*reopen_logs_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + void *private_data); + bool (*reopen_logs_recv)(struct tevent_req *req, int *perr); + + void (*shutdown)(void *private_data); + + struct tevent_req * (*shutdown_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + void *private_data); + void (*shutdown_recv)(struct tevent_req *req); + + struct tevent_req * (*wait_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + void *private_data); + bool (*wait_recv)(struct tevent_req *req, int *perr); +}; + +/** + * @brief The callback routines called for an unix-domain socket + * + * connect() is called when there is a new connection + * + * @param[in] client The new socket client context + * @param[in] pid The pid of the new client process, or -1 if unknown + * @param[in] private_data Private data set with the socket + * @return true if connection should be accepted, false otherwise + * + * + * disconnect() is called when client closes connection + * + * @param[in] client The socket client context + * @param[in] private_data Private data associated with the socket + * + * + * read_send() starts the async computation to process data on the socket + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] client The socket client context + * @param[in] buf Data received from the client + * @param[in] buflen Length of the data + * @param[i] private_data Private data associatedwith the socket + * @return new tevent reques, or NULL on failure + * + * + * read_recv() ends the async computation to process data on the socket + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @return true on success, false on failure + * + */ +struct sock_socket_funcs { + bool (*connect)(struct sock_client_context *client, + pid_t pid, + void *private_data); + void (*disconnect)(struct sock_client_context *client, + void *private_data); + + struct tevent_req * (*read_send)(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_client_context *client, + uint8_t *buf, size_t buflen, + void *private_data); + bool (*read_recv)(struct tevent_req *req, int *perr); +}; + +/** + * @brief Async computation to send data to the client + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] client The socket client context + * @param[in] buf Data to be sent to the client + * @param[in] buflen Length of the data + * @return new tevent request, or NULL on failure + */ +struct tevent_req *sock_socket_write_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_client_context *client, + uint8_t *buf, size_t buflen); + +/** + * @brief Async computation end to send data to client + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @return true on success, false on failure + */ +bool sock_socket_write_recv(struct tevent_req *req, int *perr); + +/** + * @brief Create a new socket daemon + * + * @param[in] mem_ctx Talloc memory context + * @param[in] daemon_name Name of the daemon, used for logging + * @param[in] logging Logging setup string + * @param[in] debug_level Debug level to log at + * @param[in] funcs Socket daemon callback routines + * @param[in] private_data Private data associated with callback routines + * @param[out] result New socket daemon context + * @return 0 on success, errno on failure + */ +int sock_daemon_setup(TALLOC_CTX *mem_ctx, const char *daemon_name, + const char *logging, const char *debug_level, + struct sock_daemon_funcs *funcs, + void *private_data, + struct sock_daemon_context **result); + +/** + * @brief Create and listen to the unix domain socket + * + * @param[in] sockd Socket daemon context + * @param[in] sockpath Unix domain socket path + * @param[in] funcs socket callback routines + * @param[in] private_data Private data associated with callback routines + * @return 0 on success, errno on failure + */ +int sock_daemon_add_unix(struct sock_daemon_context *sockd, + const char *sockpath, + struct sock_socket_funcs *funcs, + void *private_data); + +/** + * @brief Set file descriptor for indicating startup success + * + * On successful completion, 0 (unsigned int) will be written to the fd. + * + * @param[in] sockd Socket daemon context + * @param[in] fd File descriptor + * @return true on success, false on error + */ +bool sock_daemon_set_startup_fd(struct sock_daemon_context *sockd, int fd); + +/** + * @brief Async computation start to run a socket daemon + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] sockd The socket daemon context + * @param[in] pidfile PID file to create, NULL if no PID file required + * @param[in] do_fork Whether the daemon should fork on startup + * @param[in] create_session Whether the daemon should create a new session + * @param[in] pid_watch PID to watch. If PID goes away, shutdown. + * @return new tevent request, NULL on failure + */ +struct tevent_req *sock_daemon_run_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sock_daemon_context *sockd, + const char *pidfile, + bool do_fork, bool create_session, + pid_t pid_watch); + +/** + * @brief Async computation end to run a socket daemon + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @return true on success, false on failure + */ +bool sock_daemon_run_recv(struct tevent_req *req, int *perr); + +/** + * @brief Sync way to start a daemon + * + * @param[in] ev Tevent context + * @param[in] sockd The socket daemon context + * @param[in] pidfile PID file to create, NULL if no PID file required + * @param[in] do_fork Whether the daemon should fork on startup + * @param[in] create_session Whether the daemon should create a new session + * @param[in] pid_watch PID to watch. If PID goes away, shutdown. + * @return 0 on success, errno on failure + * + * This call will return only on shutdown of the daemon + */ +int sock_daemon_run(struct tevent_context *ev, + struct sock_daemon_context *sockd, + const char *pidfile, + bool do_fork, bool create_session, + pid_t pid_watch); + +#endif /* __CTDB_SOCK_DAEMON_H__ */ diff --git a/ctdb/common/sock_io.c b/ctdb/common/sock_io.c new file mode 100644 index 0000000..81e82c5 --- /dev/null +++ b/ctdb/common/sock_io.c @@ -0,0 +1,328 @@ +/* + Generic Unix-domain Socket I/O + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/network.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/sys_rw.h" +#include "lib/util/debug.h" +#include "lib/util/blocking.h" + +#include "common/logging.h" +#include "common/sock_io.h" + +bool sock_clean(const char *sockpath) +{ + int ret; + + ret = unlink(sockpath); + if (ret == 0) { + D_WARNING("Removed stale socket %s\n", sockpath); + } else if (errno != ENOENT) { + D_ERR("Failed to remove stale socket %s\n", sockpath); + return false; + } + + return true; +} + +int sock_connect(const char *sockpath) +{ + struct sockaddr_un addr; + size_t len; + int fd, ret; + + if (sockpath == NULL) { + D_ERR("Invalid socket path\n"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + len = strlcpy(addr.sun_path, sockpath, sizeof(addr.sun_path)); + if (len >= sizeof(addr.sun_path)) { + D_ERR("Socket path too long, len=%zu\n", strlen(sockpath)); + return -1; + } + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd == -1) { + D_ERR("socket() failed, errno=%d\n", errno); + return -1; + } + + ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret == -1) { + D_ERR("connect() failed, errno=%d\n", errno); + close(fd); + return -1; + } + + return fd; +} + +struct sock_queue { + struct tevent_context *ev; + sock_queue_callback_fn_t callback; + void *private_data; + int fd; + + struct tevent_immediate *im; + struct tevent_queue *queue; + struct tevent_fd *fde; + uint8_t *buf; + size_t buflen, begin, end; +}; + +/* + * The reserved talloc headers, SOCK_QUEUE_OBJ_COUNT, + * and the pre-allocated pool-memory SOCK_QUEUE_POOL_SIZE, + * are used for the sub-objects queue->im, queue->queue, queue->fde + * and queue->buf. + * If the memory allocating sub-objects of struct sock_queue change, + * those values need to be adjusted. + */ +#define SOCK_QUEUE_OBJ_COUNT 4 +#define SOCK_QUEUE_POOL_SIZE 2048 + +static bool sock_queue_set_fd(struct sock_queue *queue, int fd); +static void sock_queue_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data); +static void sock_queue_process(struct sock_queue *queue); +static void sock_queue_process_event(struct tevent_context *ev, + struct tevent_immediate *im, + void *private_data); + +struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + sock_queue_callback_fn_t callback, + void *private_data) +{ + struct sock_queue *queue; + + queue = talloc_pooled_object(mem_ctx, struct sock_queue, + SOCK_QUEUE_OBJ_COUNT, SOCK_QUEUE_POOL_SIZE); + if (queue == NULL) { + return NULL; + } + memset(queue, 0, sizeof(struct sock_queue)); + + queue->ev = ev; + queue->callback = callback; + queue->private_data = private_data; + + queue->im = tevent_create_immediate(queue); + if (queue->im == NULL) { + talloc_free(queue); + return NULL; + } + + queue->queue = tevent_queue_create(queue, "out-queue"); + if (queue->queue == NULL) { + talloc_free(queue); + return NULL; + } + + if (! sock_queue_set_fd(queue, fd)) { + talloc_free(queue); + return NULL; + } + + return queue; +} + +static bool sock_queue_set_fd(struct sock_queue *queue, int fd) +{ + TALLOC_FREE(queue->fde); + queue->fd = fd; + + if (fd != -1) { + int ret; + + ret = set_blocking(fd, false); + if (ret != 0) { + return false; + } + + queue->fde = tevent_add_fd(queue->ev, queue, fd, + TEVENT_FD_READ, + sock_queue_handler, queue); + if (queue->fde == NULL) { + return false; + } + tevent_fd_set_auto_close(queue->fde); + } + + return true; +} + +static void sock_queue_handler(struct tevent_context *ev, + struct tevent_fd *fde, uint16_t flags, + void *private_data) +{ + struct sock_queue *queue = talloc_get_type_abort( + private_data, struct sock_queue); + int ret, num_ready; + ssize_t nread; + + ret = ioctl(queue->fd, FIONREAD, &num_ready); + if (ret != 0) { + /* Ignore */ + return; + } + + if (num_ready == 0) { + /* descriptor has been closed */ + goto fail; + } + + if ((size_t)num_ready > queue->buflen - queue->end) { + queue->buf = talloc_realloc_size(queue, queue->buf, + queue->end + num_ready); + if (queue->buf == NULL) { + goto fail; + } + queue->buflen = queue->end + num_ready; + } + + nread = sys_read(queue->fd, queue->buf + queue->end, num_ready); + if (nread < 0) { + goto fail; + } + queue->end += nread; + + sock_queue_process(queue); + return; + +fail: + queue->callback(NULL, 0, queue->private_data); +} + +static void sock_queue_process(struct sock_queue *queue) +{ + uint32_t pkt_size; + + if ((queue->end - queue->begin) < sizeof(uint32_t)) { + /* not enough data */ + return; + } + + pkt_size = *(uint32_t *)(queue->buf + queue->begin); + if (pkt_size == 0) { + D_ERR("Invalid packet of length 0\n"); + queue->callback(NULL, 0, queue->private_data); + return; + } + + if ((queue->end - queue->begin) < pkt_size) { + /* not enough data */ + return; + } + + queue->callback(queue->buf + queue->begin, pkt_size, + queue->private_data); + queue->begin += pkt_size; + + if (queue->begin < queue->end) { + /* more data to be processed */ + tevent_schedule_immediate(queue->im, queue->ev, + sock_queue_process_event, queue); + } else { + TALLOC_FREE(queue->buf); + queue->buflen = 0; + queue->begin = 0; + queue->end = 0; + } +} + +static void sock_queue_process_event(struct tevent_context *ev, + struct tevent_immediate *im, + void *private_data) +{ + struct sock_queue *queue = talloc_get_type_abort( + private_data, struct sock_queue); + + sock_queue_process(queue); +} + +struct sock_queue_write_state { + uint8_t *pkt; + uint32_t pkt_size; +}; + +static void sock_queue_trigger(struct tevent_req *req, void *private_data); + +int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen) +{ + struct tevent_req *req; + struct sock_queue_write_state *state; + struct tevent_queue_entry *qentry; + + if (buflen >= INT32_MAX) { + return -1; + } + + req = tevent_req_create(queue, &state, struct sock_queue_write_state); + if (req == NULL) { + return -1; + } + + state->pkt = buf; + state->pkt_size = (uint32_t)buflen; + + qentry = tevent_queue_add_entry(queue->queue, queue->ev, req, + sock_queue_trigger, queue); + if (qentry == NULL) { + talloc_free(req); + return -1; + } + + return 0; +} + +static void sock_queue_trigger(struct tevent_req *req, void *private_data) +{ + struct sock_queue *queue = talloc_get_type_abort( + private_data, struct sock_queue); + struct sock_queue_write_state *state = tevent_req_data( + req, struct sock_queue_write_state); + size_t offset = 0; + + do { + ssize_t nwritten; + + nwritten = sys_write(queue->fd, state->pkt + offset, + state->pkt_size - offset); + if (nwritten < 0) { + queue->callback(NULL, 0, queue->private_data); + return; + } + offset += nwritten; + + } while (offset < state->pkt_size); + + tevent_req_done(req); + talloc_free(req); +} diff --git a/ctdb/common/sock_io.h b/ctdb/common/sock_io.h new file mode 100644 index 0000000..8b6e4eb --- /dev/null +++ b/ctdb/common/sock_io.h @@ -0,0 +1,39 @@ +/* + Generic Socket I/O + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SOCK_IO_H__ +#define __CTDB_SOCK_IO_H__ + +typedef void (*sock_queue_callback_fn_t)(uint8_t *buf, size_t buflen, + void *private_data); + +struct sock_queue; + +bool sock_clean(const char *sockpath); +int sock_connect(const char *sockpath); + +struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + sock_queue_callback_fn_t callback, + void *private_data); + +int sock_queue_write(struct sock_queue *queue, uint8_t *buf, size_t buflen); + +#endif /* __CTDB_SOCK_IO_H__ */ diff --git a/ctdb/common/srvid.c b/ctdb/common/srvid.c new file mode 100644 index 0000000..3304994 --- /dev/null +++ b/ctdb/common/srvid.c @@ -0,0 +1,280 @@ +/* + Message handler database based on srvid + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" + +#include <tdb.h> + +#include "lib/util/dlinklist.h" +#include "common/db_hash.h" +#include "common/srvid.h" + +struct srvid_handler_list; + +struct srvid_context { + struct db_hash_context *dh; + struct srvid_handler_list *list; +}; + +struct srvid_handler { + struct srvid_handler *prev, *next; + struct srvid_handler_list *list; + srvid_handler_fn handler; + void *private_data; +}; + +struct srvid_handler_list { + struct srvid_handler_list *prev, *next; + struct srvid_context *srv; + uint64_t srvid; + struct srvid_handler *h; +}; + + +/* + * Initialise message srvid context and database + */ +int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result) +{ + struct srvid_context *srv; + int ret; + + srv = talloc_zero(mem_ctx, struct srvid_context); + if (srv == NULL) { + return ENOMEM; + } + + ret = db_hash_init(srv, "messagedb", 8192, DB_HASH_SIMPLE, &srv->dh); + if (ret != 0) { + talloc_free(srv); + return ret; + } + + *result = srv; + return 0; +} + +/* + * Wrapper functions to insert/delete/fetch srvid_hander_list + */ + +static int srvid_insert(struct srvid_context *srv, uint64_t srvid, + struct srvid_handler_list *list) +{ + return db_hash_insert(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t), + (uint8_t *)&list, sizeof(list)); +} + +static int srvid_delete(struct srvid_context *srv, uint64_t srvid) +{ + return db_hash_delete(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t)); +} + +static int srvid_fetch_parser(uint8_t *keybuf, size_t keylen, + uint8_t *databuf, size_t datalen, + void *private_data) +{ + struct srvid_handler_list **list = + (struct srvid_handler_list **)private_data; + + if (datalen != sizeof(*list)) { + return EIO; + } + + *list = *(struct srvid_handler_list **)databuf; + return 0; +} + +static int srvid_fetch(struct srvid_context *srv, uint64_t srvid, + struct srvid_handler_list **list) +{ + return db_hash_fetch(srv->dh, (uint8_t *)&srvid, sizeof(uint64_t), + srvid_fetch_parser, list); +} + +/* + * When a handler is freed, remove it from the list + */ +static int srvid_handler_destructor(struct srvid_handler *h) +{ + struct srvid_handler_list *list = h->list; + + DLIST_REMOVE(list->h, h); + if (list->h == NULL) { + talloc_free(list); + } + return 0; +} + +/* + * When a list is freed, remove all handlers and remove db entry + */ +static int srvid_handler_list_destructor(struct srvid_handler_list *list) +{ + struct srvid_handler *h; + + while (list->h != NULL) { + h = list->h; + DLIST_REMOVE(list->h, h); + TALLOC_FREE(h); + } + + srvid_delete(list->srv, list->srvid); + DLIST_REMOVE(list->srv->list, list); + return 0; +} + +/* + * Register a message handler + */ +int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx, + uint64_t srvid, srvid_handler_fn handler, + void *private_data) +{ + struct srvid_handler_list *list; + struct srvid_handler *h; + int ret; + + if (srv == NULL) { + return EINVAL; + } + + h = talloc_zero(mem_ctx, struct srvid_handler); + if (h == NULL) { + return ENOMEM; + } + + h->handler = handler; + h->private_data = private_data; + + ret = srvid_fetch(srv, srvid, &list); + if (ret != 0) { + /* srvid not yet registered */ + list = talloc_zero(srv, struct srvid_handler_list); + if (list == NULL) { + talloc_free(h); + return ENOMEM; + } + + list->srv = srv; + list->srvid = srvid; + + ret = srvid_insert(srv, srvid, list); + if (ret != 0) { + talloc_free(h); + talloc_free(list); + return ret; + } + + DLIST_ADD(srv->list, list); + talloc_set_destructor(list, srvid_handler_list_destructor); + } + + h->list = list; + DLIST_ADD(list->h, h); + talloc_set_destructor(h, srvid_handler_destructor); + return 0; +} + +/* + * Deregister a message handler + */ +int srvid_deregister(struct srvid_context *srv, uint64_t srvid, + void *private_data) +{ + struct srvid_handler_list *list; + struct srvid_handler *h; + int ret; + + ret = srvid_fetch(srv, srvid, &list); + if (ret != 0) { + return ret; + } + + for (h = list->h; h != NULL; h = h->next) { + if (h->private_data == private_data) { + talloc_free(h); + return 0; + } + } + + return ENOENT; +} + +/* + * Check if a message handler exists + */ +int srvid_exists(struct srvid_context *srv, uint64_t srvid, void *private_data) +{ + struct srvid_handler_list *list; + struct srvid_handler *h; + int ret; + + ret = srvid_fetch(srv, srvid, &list); + if (ret != 0) { + return ret; + } + if (list->h == NULL) { + return ENOENT; + } + + if (private_data != NULL) { + for (h = list->h; h != NULL; h = h->next) { + if (h->private_data == private_data) { + return 0; + } + } + + return ENOENT; + } + + return 0; +} + +/* + * Send a message to registered srvid and srvid_all + */ +int srvid_dispatch(struct srvid_context *srv, uint64_t srvid, + uint64_t srvid_all, TDB_DATA data) +{ + struct srvid_handler_list *list; + struct srvid_handler *h; + int ret; + + ret = srvid_fetch(srv, srvid, &list); + if (ret == 0) { + for (h = list->h; h != NULL; h = h->next) { + h->handler(srvid, data, h->private_data); + } + } + + if (srvid_all == 0) { + return ret; + } + + ret = srvid_fetch(srv, srvid_all, &list); + if (ret == 0) { + for (h = list->h; h != NULL; h = h->next) { + h->handler(srvid, data, h->private_data); + } + } + + return ret; +} diff --git a/ctdb/common/srvid.h b/ctdb/common/srvid.h new file mode 100644 index 0000000..c0c2b30 --- /dev/null +++ b/ctdb/common/srvid.h @@ -0,0 +1,121 @@ +/* + Message handler database based on srvid + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SRVID_H__ +#define __CTDB_SRVID_H__ + +#include <talloc.h> +#include <tdb.h> + +/** + * @file srvid.h + * + * @brief Database of message handlers based on srvid + * + * CTDB can be used to send messages between clients across nodes using + * CTDB_REQ_MESSAGE. Clients register for messages based on srvid. CTDB itself + * uses a small set of srvid messages. A large range (2^56) of srvid messages + * is reserved for Samba. + */ + +/** + * @brief Message handler function + * + * To receive messages for a specific srvid, register a message handler function + * for the srvid. + */ +typedef void (*srvid_handler_fn)(uint64_t srvid, TDB_DATA data, + void *private_data); + +/** + * @brief Abstract struct to store srvid message handler database + */ +struct srvid_context; + +/** + * @brief Initialize srvid message handler database + * + * This returns a new srvid message handler database context. Freeing + * this context will free all the memory associated with the hash table. + * + * @param[in] mem_ctx Talloc memory context + * @param[out] result The new db_hash_context structure + * @return 0 on success, errno on failure + */ +int srvid_init(TALLOC_CTX *mem_ctx, struct srvid_context **result); + +/** + * @brief Register a message handler for a srvid + * + * The message handler is allocated using the specified talloc context. Freeing + * this talloc context, removes the message handler. + * + * @param[in] srv The srvid message handler database context + * @param[in] mem_ctx Talloc memory context for message handler + * @param[in] srvid The srvid + * @param[in] handler The message handler function for srvid + * @param[in] private_data Private data for message handler function + * @return 0 on success, errno on failure + */ +int srvid_register(struct srvid_context *srv, TALLOC_CTX *mem_ctx, + uint64_t srvid, srvid_handler_fn handler, + void *private_data); + +/** + * @brief Unregister a message handler for a srvid + * + * @param[in] srv The srvid message handler database context + * @param[in] srvid The srvid + * @param[in] private_data Private data of message handler function + * @return 0 on success, errno on failure + */ +int srvid_deregister(struct srvid_context *srv, uint64_t srvid, + void *private_data); + +/** + * @brief Check if any message handler is registered for srvid + * + * If private_data is NULL, then check if there is any registration + * for * specified srvid. If private_data is not NULL, then check for + * registration that matches the specified private data. + * + * @param[in] srv The srvid message handler database context + * @param[in] srvid The srvid + * @param[in] private_data Private data + * @return 0 on success, errno on failure + */ +int srvid_exists(struct srvid_context *srv, uint64_t srvid, + void *private_data); + +/** + * @brief Call message handlers for given srvid + * + * @param[in] srv The srvid message handler database context + * @param[in] srvid The srvid + * @param[in] srvid_all The srvid that gets all messages + * @param[in] data The data passed to each message handler + * @return 0 on success, errno on failure + * + * If srvid_all passed is 0, the message is not sent to message handlers + * registered with special srvid to receive all messages. + */ +int srvid_dispatch(struct srvid_context *srv, uint64_t srvid, + uint64_t srvid_all, TDB_DATA data); + +#endif /* __CTDB_SRVID_H__ */ diff --git a/ctdb/common/system.c b/ctdb/common/system.c new file mode 100644 index 0000000..08dc682 --- /dev/null +++ b/ctdb/common/system.c @@ -0,0 +1,237 @@ +/* + common system utilities + + Copyright (C) Amitay Isaacs 2014 + Copyright (C) Martin Schwenke 2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/shmem.h" +#include "system/network.h" + +#include <talloc.h> +#include <libgen.h> + +#include "lib/util/debug.h" + +#include "protocol/protocol.h" + +#include "common/logging.h" +#include "common/system.h" + +#ifdef HAVE_SCHED_H +#include <sched.h> +#endif + +#ifdef HAVE_PROCINFO_H +#include <procinfo.h> +#endif + +/* + if possible, make this task real time + */ +bool set_scheduler(void) +{ +#ifdef _AIX_ +#ifdef HAVE_THREAD_SETSCHED + struct thrdentry64 te; + tid64_t ti; + + ti = 0ULL; + if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) { + DEBUG(DEBUG_ERR, ("Unable to get thread information\n")); + return false; + } + + if (thread_setsched(te.ti_tid, 0, SCHED_RR) == -1) { + DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_RR (%s)\n", + strerror(errno))); + return false; + } else { + return true; + } +#endif +#else /* no AIX */ +#ifdef HAVE_SCHED_SETSCHEDULER + struct sched_param p; + + p.sched_priority = 1; + + if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) { + DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n", + strerror(errno))); + return false; + } else { + return true; + } +#endif +#endif + DEBUG(DEBUG_CRIT,("No way to set real-time priority.\n")); + return false; +} + +/* + reset scheduler from real-time to normal scheduling + */ +void reset_scheduler(void) +{ +#ifdef _AIX_ +#ifdef HAVE_THREAD_SETSCHED + struct thrdentry64 te; + tid64_t ti; + + ti = 0ULL; + if (getthrds64(getpid(), &te, sizeof(te), &ti, 1) != 1) { + DEBUG(DEBUG_ERR, ("Unable to get thread information\n")); + } + if (thread_setsched(te.ti_tid, 0, SCHED_OTHER) == -1) { + DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n")); + } +#endif +#else /* no AIX */ +#ifdef HAVE_SCHED_SETSCHEDULER + struct sched_param p; + + p.sched_priority = 0; + if (sched_setscheduler(0, SCHED_OTHER, &p) == -1) { + DEBUG(DEBUG_ERR, ("Unable to set scheduler to SCHED_OTHER\n")); + } +#endif +#endif +} + +/* we don't lock future pages here; it would increase the chance that + * we'd fail to mmap later on. */ +void lockdown_memory(bool valgrinding) +{ +#if defined(HAVE_MLOCKALL) && !defined(_AIX_) + /* Extra stack, please! */ + char dummy[10000]; + memset(dummy, 0, sizeof(dummy)); + + if (valgrinding) { + return; + } + + /* Ignore when running in local daemons mode */ + if (getuid() != 0) { + return; + } + + /* Avoid compiler optimizing out dummy. */ + mlock(dummy, sizeof(dummy)); + if (mlockall(MCL_CURRENT) != 0) { + DEBUG(DEBUG_WARNING,("Failed to lockdown memory: %s'\n", + strerror(errno))); + } +#endif +} + +void ctdb_wait_for_process_to_exit(pid_t pid) +{ + while (kill(pid, 0) == 0 || errno != ESRCH) { + sleep(5); + } +} + +#ifdef HAVE_IF_NAMEINDEX + +bool ctdb_sys_check_iface_exists(const char *iface) +{ + struct if_nameindex *ifnis, *ifni; + bool found = false; + + ifnis = if_nameindex(); + if (ifnis == NULL) { + DBG_ERR("Failed to retrieve inteface list\n"); + return false; + } + + for (ifni = ifnis; + ifni->if_index != 0 || ifni->if_name != NULL; + ifni++) { + int cmp = strcmp(iface, ifni->if_name); + if (cmp == 0) { + found = true; + goto done; + } + } + +done: + if_freenameindex(ifnis); + + return found; +} + +#else /* HAVE_IF_NAMEINDEX */ + +bool ctdb_sys_check_iface_exists(const char *iface) +{ + /* Not implemented: Interface always considered present */ + return true; +} + +#endif /* HAVE_IF_NAMEINDEX */ + +#ifdef HAVE_PEERCRED + +int ctdb_get_peer_pid(const int fd, pid_t *peer_pid) +{ + struct ucred cr; + socklen_t crl = sizeof(struct ucred); + int ret; + + ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl); + if (ret == 0) { + *peer_pid = cr.pid; + } else { + *peer_pid = -1; + } + return ret; +} + +#else /* HAVE_PEERCRED */ + +#ifdef _AIX_ + +int ctdb_get_peer_pid(const int fd, pid_t *peer_pid) +{ + struct peercred_struct cr; + socklen_t crl = sizeof(struct peercred_struct); + int ret; + + ret = getsockopt(fd, SOL_SOCKET, SO_PEERID, &cr, &crl); + if (ret == 0) { + *peer_pid = cr.pid; + } else { + *peer_pid = -1; + } + return ret; +} + +#else /* _AIX_ */ + +int ctdb_get_peer_pid(const int fd, pid_t *peer_pid) +{ + /* Not implemented */ + *peer_pid = -1; + return ENOSYS; +} + +#endif /* _AIX_ */ + +#endif /* HAVE_PEERCRED */ diff --git a/ctdb/common/system.h b/ctdb/common/system.h new file mode 100644 index 0000000..042e7cc --- /dev/null +++ b/ctdb/common/system.h @@ -0,0 +1,37 @@ +/* + System specific code + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SYSTEM_H__ +#define __CTDB_SYSTEM_H__ + +#include <talloc.h> + +/* From system_util.c */ + +bool set_scheduler(void); +void reset_scheduler(void); + +void lockdown_memory(bool valgrinding); + +void ctdb_wait_for_process_to_exit(pid_t pid); + +bool ctdb_sys_check_iface_exists(const char *iface); +int ctdb_get_peer_pid(const int fd, pid_t *peer_pid); + +#endif /* __CTDB_SYSTEM_H__ */ diff --git a/ctdb/common/system_socket.c b/ctdb/common/system_socket.c new file mode 100644 index 0000000..273b9c3 --- /dev/null +++ b/ctdb/common/system_socket.c @@ -0,0 +1,1168 @@ +/* + ctdb system specific code to manage raw sockets on linux + + Copyright (C) Ronnie Sahlberg 2007 + Copyright (C) Andrew Tridgell 2007 + Copyright (C) Marc Dequènes (Duck) 2009 + Copyright (C) Volker Lendecke 2012 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +/* + * Use BSD struct tcphdr field names for portability. Modern glibc + * makes them available by default via <netinet/tcp.h> but older glibc + * requires __FAVOR_BSD to be defined. + * + * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE + * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not + * set. Including "replace.h" above causes <features.h> to be + * indirectly included and this will not set __FAVOR_BSD because + * _GNU_SOURCE is set in Samba's "config.h" (which is included by + * "replace.h"). + * + * Therefore, set __FAVOR_BSD by hand below. + */ +#define __FAVOR_BSD 1 +#include "system/network.h" + +#ifdef HAVE_NETINET_IF_ETHER_H +#include <netinet/if_ether.h> +#endif +#ifdef HAVE_NETINET_IP6_H +#include <netinet/ip6.h> +#endif +#ifdef HAVE_NETINET_ICMP6_H +#include <netinet/icmp6.h> +#endif +#ifdef HAVE_LINUX_IF_PACKET_H +#include <linux/if_packet.h> +#endif + +#ifndef ETHERTYPE_IP6 +#define ETHERTYPE_IP6 0x86dd +#endif + +#include "lib/util/debug.h" +#include "lib/util/blocking.h" + +#include "protocol/protocol.h" + +#include "common/logging.h" +#include "common/system_socket.h" + +/* + uint16 checksum for n bytes + */ +static uint32_t uint16_checksum(uint8_t *data, size_t n) +{ + uint32_t sum=0; + uint16_t value; + + while (n>=2) { + memcpy(&value, data, 2); + sum += (uint32_t)ntohs(value); + data += 2; + n -= 2; + } + if (n == 1) { + sum += (uint32_t)ntohs(*data); + } + return sum; +} + +/* + * See if the given IP is currently on an interface + */ +bool ctdb_sys_have_ip(ctdb_sock_addr *_addr) +{ + int s; + int ret; + ctdb_sock_addr __addr = *_addr; + ctdb_sock_addr *addr = &__addr; + socklen_t addrlen = 0; + + switch (addr->sa.sa_family) { + case AF_INET: + addr->ip.sin_port = 0; + addrlen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + addr->ip6.sin6_port = 0; + addrlen = sizeof(struct sockaddr_in6); + break; + } + + s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP); + if (s == -1) { + return false; + } + + ret = bind(s, (struct sockaddr *)addr, addrlen); + + close(s); + return ret == 0; +} + +/* + * simple TCP checksum - assumes data is multiple of 2 bytes long + */ +static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip) +{ + uint32_t sum = uint16_checksum(data, n); + uint16_t sum2; + + sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src)); + sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst)); + sum += ip->ip_p + n; + sum = (sum & 0xFFFF) + (sum >> 16); + sum = (sum & 0xFFFF) + (sum >> 16); + sum2 = htons(sum); + sum2 = ~sum2; + if (sum2 == 0) { + return 0xFFFF; + } + return sum2; +} + +static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6) +{ + uint16_t phdr[3]; + uint32_t sum = 0; + uint16_t sum2; + uint32_t len; + + sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16); + sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16); + + len = htonl(n); + phdr[0] = len & UINT16_MAX; + phdr[1] = (len >> 16) & UINT16_MAX; + /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */ + phdr[2] = htons(ip6->ip6_nxt); + sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr)); + + sum += uint16_checksum(data, n); + + sum = (sum & 0xFFFF) + (sum >> 16); + sum = (sum & 0xFFFF) + (sum >> 16); + sum2 = htons(sum); + sum2 = ~sum2; + if (sum2 == 0) { + return 0xFFFF; + } + return sum2; +} + +/* + * Send gratuitous ARP request/reply or IPv6 neighbor advertisement + */ + +#ifdef HAVE_PACKETSOCKET + +/* + * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement + * packets + */ + +#define ARP_STRUCT_SIZE sizeof(struct ether_header) + \ + sizeof(struct ether_arp) + +#define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \ + sizeof(struct ip6_hdr) + \ + sizeof(struct nd_neighbor_advert) + \ + sizeof(struct nd_opt_hdr) + \ + sizeof(struct ether_addr) + +#define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64) + +#define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64) + +static int arp_build(uint8_t *buffer, + size_t buflen, + const struct sockaddr_in *addr, + const struct ether_addr *hwaddr, + bool reply, + struct ether_addr **ether_dhost, + size_t *len) +{ + size_t l = ARP_BUFFER_SIZE; + struct ether_header *eh; + struct ether_arp *ea; + struct arphdr *ah; + + if (addr->sin_family != AF_INET) { + return EINVAL; + } + + if (buflen < l) { + return EMSGSIZE; + } + + memset(buffer, 0 , l); + + eh = (struct ether_header *)buffer; + memset(eh->ether_dhost, 0xff, ETH_ALEN); + memcpy(eh->ether_shost, hwaddr, ETH_ALEN); + eh->ether_type = htons(ETHERTYPE_ARP); + + ea = (struct ether_arp *)(buffer + sizeof(struct ether_header)); + ah = &ea->ea_hdr; + ah->ar_hrd = htons(ARPHRD_ETHER); + ah->ar_pro = htons(ETH_P_IP); + ah->ar_hln = ETH_ALEN; + ah->ar_pln = sizeof(ea->arp_spa); + + if (! reply) { + ah->ar_op = htons(ARPOP_REQUEST); + memcpy(ea->arp_sha, hwaddr, ETH_ALEN); + memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa)); + memset(ea->arp_tha, 0, ETH_ALEN); + memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa)); + } else { + ah->ar_op = htons(ARPOP_REPLY); + memcpy(ea->arp_sha, hwaddr, ETH_ALEN); + memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa)); + memcpy(ea->arp_tha, hwaddr, ETH_ALEN); + memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa)); + } + + *ether_dhost = (struct ether_addr *)eh->ether_dhost; + *len = l; + return 0; +} + +static int ip6_na_build(uint8_t *buffer, + size_t buflen, + const struct sockaddr_in6 *addr, + const struct ether_addr *hwaddr, + struct ether_addr **ether_dhost, + size_t *len) +{ + size_t l = IP6_NA_BUFFER_SIZE; + struct ether_header *eh; + struct ip6_hdr *ip6; + struct nd_neighbor_advert *nd_na; + struct nd_opt_hdr *nd_oh; + struct ether_addr *ea; + int ret; + + if (addr->sin6_family != AF_INET6) { + return EINVAL; + } + + if (buflen < l) { + return EMSGSIZE; + } + + memset(buffer, 0 , l); + + eh = (struct ether_header *)buffer; + /* + * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464, + * section 7) - note memset 0 above! + */ + eh->ether_dhost[0] = 0x33; + eh->ether_dhost[1] = 0x33; + eh->ether_dhost[5] = 0x01; + memcpy(eh->ether_shost, hwaddr, ETH_ALEN); + eh->ether_type = htons(ETHERTYPE_IP6); + + ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header)); + ip6->ip6_vfc = 6 << 4; + ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) + + sizeof(struct nd_opt_hdr) + + ETH_ALEN); + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + ip6->ip6_src = addr->sin6_addr; + /* all-nodes multicast */ + + ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst); + if (ret != 1) { + return EIO; + } + + nd_na = (struct nd_neighbor_advert *)(buffer + + sizeof(struct ether_header) + + sizeof(struct ip6_hdr)); + nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; + nd_na->nd_na_code = 0; + nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE; + nd_na->nd_na_target = addr->sin6_addr; + + /* Option: Target link-layer address */ + nd_oh = (struct nd_opt_hdr *)(buffer + + sizeof(struct ether_header) + + sizeof(struct ip6_hdr) + + sizeof(struct nd_neighbor_advert)); + nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR; + nd_oh->nd_opt_len = 1; /* multiple of 8 octets */ + + ea = (struct ether_addr *)(buffer + + sizeof(struct ether_header) + + sizeof(struct ip6_hdr) + + sizeof(struct nd_neighbor_advert) + + sizeof(struct nd_opt_hdr)); + memcpy(ea, hwaddr, ETH_ALEN); + + nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na, + ntohs(ip6->ip6_plen), + ip6); + + *ether_dhost = (struct ether_addr *)eh->ether_dhost; + *len = l; + return 0; +} + +int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) +{ + int s; + struct sockaddr_ll sall = {0}; + struct ifreq if_hwaddr = { + .ifr_ifru = { + .ifru_flags = 0 + }, + }; + uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)]; + struct ifreq ifr = { + .ifr_ifru = { + .ifru_flags = 0 + }, + }; + struct ether_addr *hwaddr = NULL; + struct ether_addr *ether_dhost = NULL; + size_t len = 0; + int ret = 0; + + s = socket(AF_PACKET, SOCK_RAW, 0); + if (s == -1) { + ret = errno; + DBG_ERR("Failed to open raw socket\n"); + return ret; + } + DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s); + + /* Find interface */ + strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); + if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) { + ret = errno; + DBG_ERR("Interface '%s' not found\n", iface); + goto fail; + } + + /* Get MAC address */ + strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)); + ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); + if ( ret < 0 ) { + ret = errno; + DBG_ERR("ioctl failed\n"); + goto fail; + } + if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) { + ret = 0; + D_DEBUG("Ignoring loopback arp request\n"); + goto fail; + } + if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) { + ret = EINVAL; + DBG_ERR("Not an ethernet address family (0x%x)\n", + if_hwaddr.ifr_hwaddr.sa_family); + goto fail;; + } + + /* Set up most of destination address structure */ + sall.sll_family = AF_PACKET; + sall.sll_halen = sizeof(struct ether_addr); + sall.sll_protocol = htons(ETH_P_ALL); + sall.sll_ifindex = ifr.ifr_ifindex; + + /* For clarity */ + hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data; + + switch (addr->ip.sin_family) { + case AF_INET: + /* Send gratuitous ARP */ + ret = arp_build(buffer, + sizeof(buffer), + &addr->ip, + hwaddr, + false, + ðer_dhost, + &len); + if (ret != 0) { + DBG_ERR("Failed to build ARP request\n"); + goto fail; + } + + memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); + + ret = sendto(s, + buffer, + len, + 0, + (struct sockaddr *)&sall, + sizeof(sall)); + if (ret < 0 ) { + ret = errno; + DBG_ERR("Failed sendto\n"); + goto fail; + } + + /* Send unsolicited ARP reply */ + ret = arp_build(buffer, + sizeof(buffer), + &addr->ip, + hwaddr, + true, + ðer_dhost, + &len); + if (ret != 0) { + DBG_ERR("Failed to build ARP reply\n"); + goto fail; + } + + memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); + + ret = sendto(s, + buffer, + len, + 0, + (struct sockaddr *)&sall, + sizeof(sall)); + if (ret < 0 ) { + ret = errno; + DBG_ERR("Failed sendto\n"); + goto fail; + } + + close(s); + break; + + case AF_INET6: + ret = ip6_na_build(buffer, + sizeof(buffer), + &addr->ip6, + hwaddr, + ðer_dhost, + &len); + if (ret != 0) { + DBG_ERR("Failed to build IPv6 neighbor advertisement\n"); + goto fail; + } + + memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); + + ret = sendto(s, + buffer, + len, + 0, + (struct sockaddr *)&sall, + sizeof(sall)); + if (ret < 0 ) { + ret = errno; + DBG_ERR("Failed sendto\n"); + goto fail; + } + + close(s); + break; + + default: + ret = EINVAL; + DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n", + addr->ip.sin_family); + goto fail; + } + + return 0; + +fail: + close(s); + return ret; +} + +#else /* HAVE_PACKETSOCKET */ + +int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) +{ + /* Not implemented */ + return ENOSYS; +} + +#endif /* HAVE_PACKETSOCKET */ + + +#define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \ + sizeof(struct tcphdr) + +#define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \ + sizeof(struct tcphdr) + +static int tcp4_build(uint8_t *buf, + size_t buflen, + const struct sockaddr_in *src, + const struct sockaddr_in *dst, + uint32_t seq, + uint32_t ack, + int rst, + size_t *len) +{ + size_t l = IP4_TCP_BUFFER_SIZE; + struct { + struct ip ip; + struct tcphdr tcp; + } *ip4pkt; + + if (l != sizeof(*ip4pkt)) { + return EMSGSIZE; + } + + if (buflen < l) { + return EMSGSIZE; + } + + ip4pkt = (void *)buf; + memset(ip4pkt, 0, l); + + ip4pkt->ip.ip_v = 4; + ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t); + ip4pkt->ip.ip_len = htons(sizeof(ip4pkt)); + ip4pkt->ip.ip_ttl = 255; + ip4pkt->ip.ip_p = IPPROTO_TCP; + ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr; + ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr; + ip4pkt->ip.ip_sum = 0; + + ip4pkt->tcp.th_sport = src->sin_port; + ip4pkt->tcp.th_dport = dst->sin_port; + ip4pkt->tcp.th_seq = seq; + ip4pkt->tcp.th_ack = ack; + ip4pkt->tcp.th_flags = 0; + ip4pkt->tcp.th_flags |= TH_ACK; + if (rst) { + ip4pkt->tcp.th_flags |= TH_RST; + } + ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t); + /* this makes it easier to spot in a sniffer */ + ip4pkt->tcp.th_win = htons(1234); + ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp, + sizeof(ip4pkt->tcp), + &ip4pkt->ip); + + *len = l; + return 0; +} + +static int tcp6_build(uint8_t *buf, + size_t buflen, + const struct sockaddr_in6 *src, + const struct sockaddr_in6 *dst, + uint32_t seq, + uint32_t ack, + int rst, + size_t *len) +{ + size_t l = IP6_TCP_BUFFER_SIZE; + struct { + struct ip6_hdr ip6; + struct tcphdr tcp; + } *ip6pkt; + + if (l != sizeof(*ip6pkt)) { + return EMSGSIZE; + } + + if (buflen < l) { + return EMSGSIZE; + } + + ip6pkt = (void *)buf; + memset(ip6pkt, 0, l); + + ip6pkt->ip6.ip6_vfc = 6 << 4; + ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr)); + ip6pkt->ip6.ip6_nxt = IPPROTO_TCP; + ip6pkt->ip6.ip6_hlim = 64; + ip6pkt->ip6.ip6_src = src->sin6_addr; + ip6pkt->ip6.ip6_dst = dst->sin6_addr; + + ip6pkt->tcp.th_sport = src->sin6_port; + ip6pkt->tcp.th_dport = dst->sin6_port; + ip6pkt->tcp.th_seq = seq; + ip6pkt->tcp.th_ack = ack; + ip6pkt->tcp.th_flags = 0; + ip6pkt->tcp.th_flags |= TH_ACK; + if (rst) { + ip6pkt->tcp.th_flags |= TH_RST; + } + ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t); + /* this makes it easier to spot in a sniffer */ + ip6pkt->tcp.th_win = htons(1234); + ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp, + sizeof(ip6pkt->tcp), + &ip6pkt->ip6); + + *len = l; + return 0; +} + +/* + * Send tcp segment from the specified IP/port to the specified + * destination IP/port. + * + * This is used to trigger the receiving host into sending its own ACK, + * which should trigger early detection of TCP reset by the client + * after IP takeover + * + * This can also be used to send RST segments (if rst is true) and also + * if correct seq and ack numbers are provided. + */ +int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, + const ctdb_sock_addr *src, + uint32_t seq, + uint32_t ack, + int rst) +{ + uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)]; + size_t len = 0; + int ret; + int s; + uint32_t one = 1; + struct sockaddr_in6 tmpdest = { 0 }; + int saved_errno; + + switch (src->ip.sin_family) { + case AF_INET: + ret = tcp4_build(buf, + sizeof(buf), + &src->ip, + &dest->ip, + seq, + ack, + rst, + &len); + if (ret != 0) { + DBG_ERR("Failed to build TCP packet (%d)\n", ret); + return ret; + } + + /* open a raw socket to send this segment from */ + s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (s == -1) { + DBG_ERR("Failed to open raw socket (%s)\n", + strerror(errno)); + return -1; + } + + ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one)); + if (ret != 0) { + DBG_ERR("Failed to setup IP headers (%s)\n", + strerror(errno)); + close(s); + return -1; + } + + ret = sendto(s, + buf, + len, + 0, + (const struct sockaddr *)&dest->ip, + sizeof(dest->ip)); + saved_errno = errno; + close(s); + if (ret == -1) { + D_ERR("Failed sendto (%s)\n", strerror(saved_errno)); + return -1; + } + if ((size_t)ret != len) { + DBG_ERR("Failed sendto - didn't send full packet\n"); + return -1; + } + break; + + case AF_INET6: + ret = tcp6_build(buf, + sizeof(buf), + &src->ip6, + &dest->ip6, + seq, + ack, + rst, + &len); + if (ret != 0) { + DBG_ERR("Failed to build TCP packet (%d)\n", ret); + return ret; + } + + s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); + if (s == -1) { + DBG_ERR("Failed to open sending socket\n"); + return -1; + + } + /* + * sendto() on an IPv6 raw socket requires the port to + * be either 0 or a protocol value + */ + tmpdest = dest->ip6; + tmpdest.sin6_port = 0; + + ret = sendto(s, + buf, + len, + 0, + (const struct sockaddr *)&tmpdest, + sizeof(tmpdest)); + saved_errno = errno; + close(s); + if (ret == -1) { + D_ERR("Failed sendto (%s)\n", strerror(saved_errno)); + return -1; + } + if ((size_t)ret != len) { + DBG_ERR("Failed sendto - didn't send full packet\n"); + return -1; + } + break; + + default: + DBG_ERR("Not an ipv4/v6 address\n"); + return -1; + } + + return 0; +} + +static int tcp4_extract(const uint8_t *ip_pkt, + size_t pktlen, + struct sockaddr_in *src, + struct sockaddr_in *dst, + uint32_t *ack_seq, + uint32_t *seq, + int *rst, + uint16_t *window) +{ + const struct ip *ip; + const struct tcphdr *tcp; + + if (pktlen < sizeof(struct ip)) { + return EMSGSIZE; + } + + ip = (const struct ip *)ip_pkt; + + /* IPv4 only */ + if (ip->ip_v != 4) { + return ENOMSG; + } + /* Don't look at fragments */ + if ((ntohs(ip->ip_off)&0x1fff) != 0) { + return ENOMSG; + } + /* TCP only */ + if (ip->ip_p != IPPROTO_TCP) { + return ENOMSG; + } + + /* Ensure there is enough of the packet to gather required fields */ + if (pktlen < + (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) { + return EMSGSIZE; + } + + tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t))); + + src->sin_family = AF_INET; + src->sin_addr.s_addr = ip->ip_src.s_addr; + src->sin_port = tcp->th_sport; + + dst->sin_family = AF_INET; + dst->sin_addr.s_addr = ip->ip_dst.s_addr; + dst->sin_port = tcp->th_dport; + + *ack_seq = tcp->th_ack; + *seq = tcp->th_seq; + if (window != NULL) { + *window = tcp->th_win; + } + if (rst != NULL) { + *rst = tcp->th_flags & TH_RST; + } + + return 0; +} + +static int tcp6_extract(const uint8_t *ip_pkt, + size_t pktlen, + struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + uint32_t *ack_seq, + uint32_t *seq, + int *rst, + uint16_t *window) +{ + const struct ip6_hdr *ip6; + const struct tcphdr *tcp; + + /* Ensure there is enough of the packet to gather required fields */ + if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) { + return EMSGSIZE; + } + + ip6 = (const struct ip6_hdr *)ip_pkt; + + /* IPv6 only */ + if ((ip6->ip6_vfc >> 4) != 6){ + return ENOMSG; + } + + /* TCP only */ + if (ip6->ip6_nxt != IPPROTO_TCP) { + return ENOMSG; + } + + tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr)); + + src->sin6_family = AF_INET6; + src->sin6_port = tcp->th_sport; + src->sin6_addr = ip6->ip6_src; + + dst->sin6_family = AF_INET6; + dst->sin6_port = tcp->th_dport; + dst->sin6_addr = ip6->ip6_dst; + + *ack_seq = tcp->th_ack; + *seq = tcp->th_seq; + if (window != NULL) { + *window = tcp->th_win; + } + if (rst != NULL) { + *rst = tcp->th_flags & TH_RST; + } + + return 0; +} + +/* + * Packet capture + * + * If AF_PACKET is available then use a raw socket otherwise use pcap. + * wscript has checked to make sure that pcap is available if needed. + */ + +#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) + +/* + * This function is used to open a raw socket to capture from + */ +int ctdb_sys_open_capture_socket(const char *iface, void **private_data) +{ + int s, ret; + + /* Open a socket to capture all traffic */ + s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (s == -1) { + DBG_ERR("Failed to open raw socket\n"); + return -1; + } + + DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s); + + ret = set_blocking(s, false); + if (ret != 0) { + DBG_ERR("Failed to set socket non-blocking (%s)\n", + strerror(errno)); + close(s); + return -1; + } + + set_close_on_exec(s); + + return s; +} + +/* + * This function is used to do any additional cleanup required when closing + * a capture socket. + * Note that the socket itself is closed automatically in the caller. + */ +int ctdb_sys_close_capture_socket(void *private_data) +{ + return 0; +} + + +/* + * called when the raw socket becomes readable + */ +int ctdb_sys_read_tcp_packet(int s, void *private_data, + ctdb_sock_addr *src, + ctdb_sock_addr *dst, + uint32_t *ack_seq, + uint32_t *seq, + int *rst, + uint16_t *window) +{ + ssize_t nread; + uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */ + struct ether_header *eth; + int ret; + + nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC); + if (nread == -1) { + return errno; + } + if ((size_t)nread < sizeof(*eth)) { + return EMSGSIZE; + } + + ZERO_STRUCTP(src); + ZERO_STRUCTP(dst); + + /* Ethernet */ + eth = (struct ether_header *)pkt; + + /* we want either IPv4 or IPv6 */ + if (ntohs(eth->ether_type) == ETHERTYPE_IP) { + ret = tcp4_extract(pkt + sizeof(struct ether_header), + (size_t)nread - sizeof(struct ether_header), + &src->ip, + &dst->ip, + ack_seq, + seq, + rst, + window); + return ret; + + } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) { + ret = tcp6_extract(pkt + sizeof(struct ether_header), + (size_t)nread - sizeof(struct ether_header), + &src->ip6, + &dst->ip6, + ack_seq, + seq, + rst, + window); + return ret; + } + + return ENOMSG; +} + +#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */ + +#include <pcap.h> + +/* + * Assume this exists if pcap.h exists - it has been around for a + * while + */ +#include <pcap/sll.h> + +int ctdb_sys_open_capture_socket(const char *iface, void **private_data) +{ + char errbuf[PCAP_ERRBUF_SIZE]; + pcap_t *pt; + int pcap_packet_type; + const char *t = NULL; + int fd; + int ret; + + pt = pcap_create(iface, errbuf); + if (pt == NULL) { + DBG_ERR("Failed to open pcap capture device %s (%s)\n", + iface, + errbuf); + return -1; + } + /* + * pcap isn't very clear about defaults... + */ + ret = pcap_set_snaplen(pt, 100); + if (ret < 0) { + DBG_ERR("Failed to set snaplen for pcap capture\n"); + goto fail; + } + ret = pcap_set_promisc(pt, 0); + if (ret < 0) { + DBG_ERR("Failed to unset promiscuous mode for pcap capture\n"); + goto fail; + } + ret = pcap_set_timeout(pt, 0); + if (ret < 0) { + DBG_ERR("Failed to set timeout for pcap capture\n"); + goto fail; + } +#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE + ret = pcap_set_immediate_mode(pt, 1); + if (ret < 0) { + DBG_ERR("Failed to set immediate mode for pcap capture\n"); + goto fail; + } +#endif + ret = pcap_activate(pt); + if (ret < 0) { + DBG_ERR("Failed to activate pcap capture\n"); + goto fail; + } + + pcap_packet_type = pcap_datalink(pt); + switch (pcap_packet_type) { + case DLT_EN10MB: + t = "DLT_EN10MB"; + break; + case DLT_LINUX_SLL: + t = "DLT_LINUX_SLL"; + break; +#ifdef DLT_LINUX_SLL2 + case DLT_LINUX_SLL2: + t = "DLT_LINUX_SLL2"; + break; +#endif /* DLT_LINUX_SLL2 */ + default: + DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type); + goto fail; + } + + fd = pcap_get_selectable_fd(pt); + DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n", + t, + fd); + + *((pcap_t **)private_data) = pt; + return fd; + +fail: + pcap_close(pt); + return -1; +} + +int ctdb_sys_close_capture_socket(void *private_data) +{ + pcap_t *pt = (pcap_t *)private_data; + pcap_close(pt); + return 0; +} + +int ctdb_sys_read_tcp_packet(int s, + void *private_data, + ctdb_sock_addr *src, + ctdb_sock_addr *dst, + uint32_t *ack_seq, + uint32_t *seq, + int *rst, + uint16_t *window) +{ + int ret; + struct pcap_pkthdr pkthdr; + const u_char *buffer; + pcap_t *pt = (pcap_t *)private_data; + int pcap_packet_type; + uint16_t ether_type; + size_t ll_hdr_len; + + buffer=pcap_next(pt, &pkthdr); + if (buffer==NULL) { + return ENOMSG; + } + + ZERO_STRUCTP(src); + ZERO_STRUCTP(dst); + + pcap_packet_type = pcap_datalink(pt); + switch (pcap_packet_type) { + case DLT_EN10MB: { + const struct ether_header *eth = + (const struct ether_header *)buffer; + ether_type = ntohs(eth->ether_type); + ll_hdr_len = sizeof(struct ether_header); + break; + } + case DLT_LINUX_SLL: { + const struct sll_header *sll = + (const struct sll_header *)buffer; + uint16_t arphrd_type = ntohs(sll->sll_hatype); + switch (arphrd_type) { + case ARPHRD_ETHER: + case ARPHRD_INFINIBAND: + break; + default: + DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n", + arphrd_type); + return EPROTONOSUPPORT; + } + ether_type = ntohs(sll->sll_protocol); + ll_hdr_len = SLL_HDR_LEN; + break; + } +#ifdef DLT_LINUX_SLL2 + case DLT_LINUX_SLL2: { + const struct sll2_header *sll2 = + (const struct sll2_header *)buffer; + uint16_t arphrd_type = ntohs(sll2->sll2_hatype); + switch (arphrd_type) { + case ARPHRD_ETHER: + case ARPHRD_INFINIBAND: + break; + default: + DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n", + arphrd_type); + return EPROTONOSUPPORT; + } + ether_type = ntohs(sll2->sll2_protocol); + ll_hdr_len = SLL2_HDR_LEN; + break; + } +#endif /* DLT_LINUX_SLL2 */ + default: + DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type); + return EPROTONOSUPPORT; + } + + switch (ether_type) { + case ETHERTYPE_IP: + ret = tcp4_extract(buffer + ll_hdr_len, + (size_t)pkthdr.caplen - ll_hdr_len, + &src->ip, + &dst->ip, + ack_seq, + seq, + rst, + window); + break; + case ETHERTYPE_IP6: + ret = tcp6_extract(buffer + ll_hdr_len, + (size_t)pkthdr.caplen - ll_hdr_len, + &src->ip6, + &dst->ip6, + ack_seq, + seq, + rst, + window); + break; + case ETHERTYPE_ARP: + /* Silently ignore ARP packets */ + return EPROTO; + default: + DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type); + return EPROTO; + } + + return ret; +} + +#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */ diff --git a/ctdb/common/system_socket.h b/ctdb/common/system_socket.h new file mode 100644 index 0000000..065c53c --- /dev/null +++ b/ctdb/common/system_socket.h @@ -0,0 +1,46 @@ +/* + System specific network code + + Copyright (C) Amitay Isaacs 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_SYSTEM_SOCKET_H__ +#define __CTDB_SYSTEM_SOCKET_H__ + +#include "protocol/protocol.h" + +bool ctdb_sys_have_ip(ctdb_sock_addr *addr); + +int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface); + +int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, + const ctdb_sock_addr *src, + uint32_t seq, + uint32_t ack, + int rst); + +int ctdb_sys_open_capture_socket(const char *iface, void **private_data); +int ctdb_sys_close_capture_socket(void *private_data); +int ctdb_sys_read_tcp_packet(int s, + void *private_data, + ctdb_sock_addr *src, + ctdb_sock_addr *dst, + uint32_t *ack_seq, + uint32_t *seq, + int *rst, + uint16_t *window); + +#endif /* __CTDB_SYSTEM_SOCKET_H__ */ diff --git a/ctdb/common/tmon.c b/ctdb/common/tmon.c new file mode 100644 index 0000000..04bad1f --- /dev/null +++ b/ctdb/common/tmon.c @@ -0,0 +1,602 @@ +/* + Trivial FD monitoring + + Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" + +#include <ctype.h> + +#include "lib/util/blocking.h" +#include "lib/util/sys_rw.h" +#include "lib/util/tevent_unix.h" +#include "lib/util/util.h" +#include "lib/util/smb_strtox.h" + +#include "lib/async_req/async_sock.h" + +#include "common/tmon.h" + + +enum tmon_message_type { + TMON_MSG_EXIT = 1, + TMON_MSG_ERRNO, + TMON_MSG_PING, + TMON_MSG_ASCII, + TMON_MSG_CUSTOM, +}; + +struct tmon_pkt { + enum tmon_message_type type; + uint16_t val; +}; + +struct tmon_buf { + uint8_t data[4]; +}; + +static void tmon_packet_push(struct tmon_pkt *pkt, + struct tmon_buf *buf) +{ + uint16_t type_n, val_n; + + type_n = htons(pkt->type); + val_n = htons(pkt->val); + memcpy(&buf->data[0], &type_n, 2); + memcpy(&buf->data[2], &val_n, 2); +} + +static void tmon_packet_pull(struct tmon_buf *buf, + struct tmon_pkt *pkt) +{ + uint16_t type_n, val_n; + + memcpy(&type_n, &buf->data[0], 2); + memcpy(&val_n, &buf->data[2], 2); + + pkt->type = ntohs(type_n); + pkt->val = ntohs(val_n); +} + +static int tmon_packet_write(int fd, struct tmon_pkt *pkt) +{ + struct tmon_buf buf; + ssize_t n; + + tmon_packet_push(pkt, &buf); + + n = sys_write(fd, &buf.data[0], sizeof(buf.data)); + if (n == -1) { + return errno; + } + return 0; +} + +bool tmon_set_exit(struct tmon_pkt *pkt) +{ + *pkt = (struct tmon_pkt) { + .type = TMON_MSG_EXIT, + }; + + return true; +} + +bool tmon_set_errno(struct tmon_pkt *pkt, int err) +{ + if (err <= 0 || err > UINT16_MAX) { + return false; + } + + *pkt = (struct tmon_pkt) { + .type = TMON_MSG_ERRNO, + .val = (uint16_t)err, + }; + + return true; +} + +bool tmon_set_ping(struct tmon_pkt *pkt) +{ + *pkt = (struct tmon_pkt) { + .type = TMON_MSG_PING, + }; + + return true; +} + +bool tmon_set_ascii(struct tmon_pkt *pkt, char c) +{ + if (!isascii(c)) { + return false; + } + + *pkt = (struct tmon_pkt) { + .type = TMON_MSG_ASCII, + .val = (uint16_t)c, + }; + + return true; +} + +bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val) +{ + *pkt = (struct tmon_pkt) { + .type = TMON_MSG_CUSTOM, + .val = val, + }; + + return true; +} + +static bool tmon_parse_exit(struct tmon_pkt *pkt) +{ + if (pkt->type != TMON_MSG_EXIT) { + return false; + } + if (pkt->val != 0) { + return false; + } + + return true; +} + +static bool tmon_parse_errno(struct tmon_pkt *pkt, int *err) +{ + if (pkt->type != TMON_MSG_ERRNO) { + return false; + } + *err= (int)pkt->val; + + return true; +} + +bool tmon_parse_ping(struct tmon_pkt *pkt) +{ + if (pkt->type != TMON_MSG_PING) { + return false; + } + if (pkt->val != 0) { + return false; + } + + return true; +} + +bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c) +{ + if (pkt->type != TMON_MSG_ASCII) { + return false; + } + if (!isascii((int)pkt->val)) { + return false; + } + *c = (char)pkt->val; + + return true; +} + +bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val) +{ + if (pkt->type != TMON_MSG_CUSTOM) { + return false; + } + *val = pkt->val; + + return true; +} + +struct tmon_state { + int fd; + int direction; + struct tevent_context *ev; + bool monitor_close; + unsigned long write_interval; + unsigned long read_timeout; + struct tmon_actions actions; + struct tevent_timer *timer; + void *private_data; +}; + +static void tmon_readable(struct tevent_req *subreq); +static bool tmon_set_timeout(struct tevent_req *req, + struct tevent_context *ev); +static void tmon_timedout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval now, + void *private_data); +static void tmon_write_loop(struct tevent_req *subreq); + +struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + int direction, + unsigned long read_timeout, + unsigned long write_interval, + struct tmon_actions *actions, + void *private_data) +{ + struct tevent_req *req, *subreq; + struct tmon_state *state; + bool status; + + req = tevent_req_create(mem_ctx, &state, struct tmon_state); + if (req == NULL) { + return NULL; + } + + if (actions != NULL) { + /* If FD isn't readable then read actions are invalid */ + if (!(direction & TMON_FD_READ) && + (actions->timeout_callback != NULL || + actions->read_callback != NULL || + read_timeout != 0)) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + /* If FD isn't writeable then write actions are invalid */ + if (!(direction & TMON_FD_WRITE) && + (actions->write_callback != NULL || + write_interval != 0)) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + /* Can't specify write interval without a callback */ + if (state->write_interval != 0 && + state->actions.write_callback == NULL) { + tevent_req_error(req, EINVAL); + return tevent_req_post(req, ev); + } + } + + state->fd = fd; + state->direction = direction; + state->ev = ev; + state->write_interval = write_interval; + state->read_timeout = read_timeout; + state->private_data = private_data; + + if (actions != NULL) { + state->actions = *actions; + } + + status = set_close_on_exec(fd); + if (!status) { + tevent_req_error(req, errno); + return tevent_req_post(req, ev); + } + + if (direction & TMON_FD_READ) { + subreq = wait_for_read_send(state, ev, fd, true); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, tmon_readable, req); + } + + if (state->read_timeout != 0) { + status = tmon_set_timeout(req, state->ev); + if (!status) { + tevent_req_error(req, ENOMEM); + return tevent_req_post(req, ev); + } + } + + if (state->write_interval != 0) { + subreq = tevent_wakeup_send( + state, + state->ev, + tevent_timeval_current_ofs(state->write_interval, 0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, state->ev); + } + tevent_req_set_callback(subreq, tmon_write_loop, req); + } + + return req; +} + +static void tmon_readable(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct tmon_state *state = tevent_req_data( req, struct tmon_state); + struct tmon_buf buf; + struct tmon_pkt pkt; + ssize_t nread; + bool status; + int err; + int ret; + + status = wait_for_read_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (!status) { + if (ret == EPIPE && state->actions.close_callback != NULL) { + ret = state->actions.close_callback(state->private_data); + if (ret == TMON_STATUS_EXIT) { + ret = 0; + } + } + if (ret == 0) { + tevent_req_done(req); + } else { + tevent_req_error(req, ret); + } + return; + } + + nread = sys_read(state->fd, buf.data, sizeof(buf.data)); + if (nread == -1) { + tevent_req_error(req, errno); + return; + } + if (nread == 0) { + /* Can't happen, treat like EPIPE, above */ + tevent_req_error(req, EPIPE); + return; + } + if (nread != sizeof(buf.data)) { + tevent_req_error(req, EPROTO); + return; + } + + tmon_packet_pull(&buf, &pkt); + + switch (pkt.type) { + case TMON_MSG_EXIT: + status = tmon_parse_exit(&pkt); + if (!status) { + tevent_req_error(req, EPROTO); + return; + } + tevent_req_done(req); + return; + case TMON_MSG_ERRNO: + status = tmon_parse_errno(&pkt, &err); + if (!status) { + err = EPROTO; + } + tevent_req_error(req, err); + return; + default: + break; + } + + if (state->actions.read_callback == NULL) { + /* Shouldn't happen, other end should not write */ + tevent_req_error(req, EIO); + return; + } + ret = state->actions.read_callback(state->private_data, &pkt); + if (ret == TMON_STATUS_EXIT) { + tevent_req_done(req); + return; + } + if (ret != 0) { + tevent_req_error(req, ret); + return; + } + + subreq = wait_for_read_send(state, state->ev, state->fd, true); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, tmon_readable, req); + + /* Reset read timeout */ + if (state->read_timeout != 0) { + status = tmon_set_timeout(req, state->ev); + if (!status) { + tevent_req_error(req, ENOMEM); + return; + } + } +} + +static bool tmon_set_timeout(struct tevent_req *req, + struct tevent_context *ev) +{ + struct tmon_state *state = tevent_req_data( + req, struct tmon_state); + struct timeval endtime = + tevent_timeval_current_ofs(state->read_timeout, 0); + + TALLOC_FREE(state->timer); + + state->timer = tevent_add_timer(ev, req, endtime, tmon_timedout, req); + if (tevent_req_nomem(state->timer, req)) { + return false; + } + + return true; +} + +static void tmon_timedout(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval now, + void *private_data) +{ + struct tevent_req *req = talloc_get_type_abort( + private_data, struct tevent_req); + struct tmon_state *state = tevent_req_data(req, struct tmon_state); + int ret; + + TALLOC_FREE(state->timer); + + if (state->actions.timeout_callback != NULL) { + ret = state->actions.timeout_callback(state->private_data); + if (ret == TMON_STATUS_EXIT) { + ret = 0; + } + } else { + ret = ETIMEDOUT; + } + + if (ret == 0) { + tevent_req_done(req); + } else { + tevent_req_error(req, ret); + } +} + +static void tmon_write_loop(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct tmon_state *state = tevent_req_data( + req, struct tmon_state); + struct tmon_pkt pkt; + int ret; + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (!status) { + /* Ignore error */ + } + + ret = state->actions.write_callback(state->private_data, &pkt); + if (ret == TMON_STATUS_EXIT) { + tevent_req_done(req); + return; + } + if (ret == TMON_STATUS_SKIP) { + goto done; + } + if (ret != 0) { + tevent_req_error(req, ret); + return; + } + + status = tmon_write(req, &pkt); + if (!status) { + return; + } + +done: + subreq = tevent_wakeup_send( + state, + state->ev, + tevent_timeval_current_ofs(state->write_interval, 0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, tmon_write_loop, req); +} + +bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt) +{ + struct tmon_state *state = tevent_req_data( + req, struct tmon_state); + int ret; + + if (state->fd == -1) { + return false; + } + + if (!(state->direction & TMON_FD_WRITE)) { + tevent_req_error(req, EINVAL); + return false; + } + + ret = tmon_packet_write(state->fd, pkt); + if (ret != 0) { + if (ret == EPIPE && state->actions.close_callback != NULL) { + ret = state->actions.close_callback(state->private_data); + if (ret == TMON_STATUS_EXIT) { + ret = 0; + } + } + + if (ret == 0) { + tevent_req_done(req); + } else { + tevent_req_error(req, ret); + } + state->fd = -1; + return false; + } + + return true; +} + +bool tmon_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} + +static int ping_writer(void *private_data, struct tmon_pkt *pkt) +{ + tmon_set_ping(pkt); + + return 0; +} + +static int ping_reader(void *private_data, struct tmon_pkt *pkt) +{ + bool status; + + /* Only expect pings */ + status = tmon_parse_ping(pkt); + if (!status) { + return EPROTO; + } + + return 0; +} + +struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + int direction, + unsigned long timeout, + unsigned long interval) +{ + struct tevent_req *req; + struct tmon_actions actions = { + .write_callback = NULL, + }; + + if ((direction & TMON_FD_WRITE) && interval != 0) { + actions.write_callback = ping_writer; + } + if ((direction & TMON_FD_READ) && timeout != 0) { + actions.read_callback = ping_reader; + } + + req = tmon_send(mem_ctx, + ev, + fd, + direction, + timeout, + interval, + &actions, + NULL); + return req; +} + +bool tmon_ping_recv(struct tevent_req *req, int *perr) +{ + bool status; + + status = tmon_recv(req, perr); + + return status; +} diff --git a/ctdb/common/tmon.h b/ctdb/common/tmon.h new file mode 100644 index 0000000..1d315a9 --- /dev/null +++ b/ctdb/common/tmon.h @@ -0,0 +1,218 @@ +/* + Trivial FD monitoring + + Copyright (C) Martin Schwenke & Amitay Isaacs, DataDirect Networks 2022 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_TMON_H__ +#define __CTDB_TMON_H__ + +#include <talloc.h> +#include <tevent.h> + +/** + * @file tmon.h + * + * @brief Interprocess file descriptor (pipe and socketpair) monitoring + * + * Assumes 2 processes connected by a pipe(2) or a socketpair(2). A + * simple protocol is defined to allow sending various types of status + * information. When a pipe(2) is used the reader can monitor for + * close and read packets, while the sender can write packets. When a + * socketpair(2) is used then both ends can monitor for close, and + * read and write packets. A read timeout can be specified, + * terminating the computation if no packets are received. + * + * A simplified interface is provided to monitor for close and allow + * sending/monitoring of one-way ping packets. A ping timeout occurs + * when one end is expecting pings but none are received during the + * timeout interval - no response is sent to pings, they merely reset + * a timer on the receiving end. + */ + +struct tmon_pkt; + +struct tmon_actions { + int (*write_callback)(void *private_data, struct tmon_pkt *pkt); + int (*timeout_callback)(void *private_data); + int (*read_callback)(void *private_data, struct tmon_pkt *pkt); + int (*close_callback)(void *private_data); +}; + +/* + * Return value from write_callback() and read_callback() to cause the + * computation to exit successfully. For consistency this can also be + * used with timeout_callback() and close_callback(). + */ +#define TMON_STATUS_EXIT (-1) + +/* Return value from write_callback() to skip write */ +#define TMON_STATUS_SKIP (-2) + +/* For direction, below */ +#define TMON_FD_READ 0x1 +#define TMON_FD_WRITE 0x2 +#define TMON_FD_BOTH (TMON_FD_READ | TMON_FD_WRITE) + +/** + * @brief Async computation to start FD monitoring + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] fd File descriptor for "this" end of pipe/socketpair + * @param[in] direction Read, write or both - for sanity checking + * @param[in] read_timeout Seconds to trigger timeout when no packets received + * @param[in] write_interval Seconds to trigger write_callback + * @param[in] actions struct containing callbacks + * @param[in] private_data Passed to callbacks + * @return new tevent request or NULL on failure + * + * @note read_timeout implies monitor_close + * + * @note The computation will complete when: + * + * - The writing end closes (e.g. writer process terminates) - EPIPE + * - read_timeout is non-zero and timeout occurs - ETIMEDOUT + * - Packets received with no read_callback defined - EIO + * - Invalid or unexpected packet received - EPROTO + * - File descriptor readable but no bytes to read - error: EPIPE + * - Invalid combination of direction, callbacks, timeouts: EINVAL + * - An unexpected error occurs - other + * + * @note action callbacks return an int that can be used to trigger + * other errors or override an error. For example: + * + * - write_callback() can return non-zero errno, causing an error + * - close_callback() can return zero, overriding the default EPIPE error + * - timeout_callback() can return something other than ETIMEDOUT + * - read_callback() can return EPROTO for unexpected packet types + * + * Reading of exit and errno packets is handled internally (read + * callback is never called). Write callback can return special + * value TMON_STATUS_SKIP to avoid sending any data. + */ +struct tevent_req *tmon_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + int direction, + unsigned long read_timeout, + unsigned long write_interval, + struct tmon_actions *actions, + void *private_data); + +/** + * @brief Async computation to end FD monitoring + * + * @param[in] req Tevent request + * @param[out] perr errno in case of failure + * @return true on success, false on failure + */ +bool tmon_recv(struct tevent_req *req, int *perr); + +/** + * @brief Fill in an exit packet + * + * @param[in,out] pkt An exit packet + * @return true on success, false on failure + */ +bool tmon_set_exit(struct tmon_pkt *pkt); +/** + * @brief Fill in an errno packet + * + * @param[in,out] pkt An errno packet + * @param[in] err An errno to send in packet + * @return true on success, false on failure + */ +bool tmon_set_errno(struct tmon_pkt *pkt, int err); +/** + * @brief Fill in a ping packet + * + * @param[in,out] pkt A ping packet + * @return true on success, false on failure + */ +bool tmon_set_ping(struct tmon_pkt *pkt); +/** + * @brief Fill in an ASCII packet + * + * @param[in,out] pkt An ASCII packet + * @param[in] c An ASCII character to send in packet + * @return true on success, false on failure + */ +bool tmon_set_ascii(struct tmon_pkt *pkt, char c); +/** + * @brief Fill in a custom packet + * + * @param[in,out] pkt A custom packet + * @param[in] val A uint16_t to send in a custom packet + * @return true on success, false on failure + */ +bool tmon_set_custom(struct tmon_pkt *pkt, uint16_t val); + +/** + * @brief Validate a ping packet + * + * @param[in] pkt A ping packet + * @return true on success, false on failure + */ +bool tmon_parse_ping(struct tmon_pkt *pkt); + +/** + * @brief Validate ASCII packet and parse out character + * + * @param[in] pkt An ASCII packet + * @param[out] c An ASCII character value from packet + * @return true on success, false on failure + */ +bool tmon_parse_ascii(struct tmon_pkt *pkt, char *c); + +/** + * @brief Validate custom packet and parse out value + * + * @param[in] pkt A custom packet + * @param[out] val A uint16_t value from packet + * @return true on success, false on failure + */ +bool tmon_parse_custom(struct tmon_pkt *pkt, uint16_t *val); + +/** + * @brief Write a packet + * + * @param[in] req Tevent request created by tmon_send + * @param[in] pkt Packet to write + * @return true on sucess, false on failure + */ +bool tmon_write(struct tevent_req *req, struct tmon_pkt *pkt); + +/** + * @brief Async computation to start ping monitoring + * + * @param[in] mem_ctx Talloc memory context + * @param[in] ev Tevent context + * @param[in] fd File descriptor for "this" end of pipe/socketpair + * @param[in] direction Read, write or both - for sanity checking + * @param[in] timeout Timeout for pings on receiving end + * @param[in] interval Send a ping packet every interval seconds + */ +struct tevent_req *tmon_ping_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + int fd, + int direction, + unsigned long timeout, + unsigned long interval); + +bool tmon_ping_recv(struct tevent_req *req, int *perr); + +#endif /* __CTDB_TMON_H__ */ diff --git a/ctdb/common/tunable.c b/ctdb/common/tunable.c new file mode 100644 index 0000000..f366f23 --- /dev/null +++ b/ctdb/common/tunable.c @@ -0,0 +1,401 @@ +/* + Tunables utilities + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "system/filesys.h" +#include "system/locale.h" +#include "system/network.h" + +#include <talloc.h> + +#include "lib/util/debug.h" +#include "lib/util/smb_strtox.h" +#include "lib/util/tini.h" + +#include "protocol/protocol.h" + +#include "common/tunable.h" + +static struct { + const char *label; + uint32_t value; + bool obsolete; + size_t offset; +} tunable_map[] = { + { "MaxRedirectCount", 3, true, + offsetof(struct ctdb_tunable_list, max_redirect_count) }, + { "SeqnumInterval", 1000, false, + offsetof(struct ctdb_tunable_list, seqnum_interval) }, + { "ControlTimeout", 60, false, + offsetof(struct ctdb_tunable_list, control_timeout) }, + { "TraverseTimeout", 20, false, + offsetof(struct ctdb_tunable_list, traverse_timeout) }, + { "KeepaliveInterval", 5, false, + offsetof(struct ctdb_tunable_list, keepalive_interval) }, + { "KeepaliveLimit", 5, false, + offsetof(struct ctdb_tunable_list, keepalive_limit) }, + { "RecoverTimeout", 30, false, + offsetof(struct ctdb_tunable_list, recover_timeout) }, + { "RecoverInterval", 1, false, + offsetof(struct ctdb_tunable_list, recover_interval) }, + { "ElectionTimeout", 3, false, + offsetof(struct ctdb_tunable_list, election_timeout) }, + { "TakeoverTimeout", 9, false, + offsetof(struct ctdb_tunable_list, takeover_timeout) }, + { "MonitorInterval", 15, false, + offsetof(struct ctdb_tunable_list, monitor_interval) }, + { "TickleUpdateInterval", 20, false, + offsetof(struct ctdb_tunable_list, tickle_update_interval) }, + { "EventScriptTimeout", 30, false, + offsetof(struct ctdb_tunable_list, script_timeout) }, + { "MonitorTimeoutCount", 20, false, + offsetof(struct ctdb_tunable_list, monitor_timeout_count) }, + { "EventScriptUnhealthyOnTimeout", 0, true, + offsetof(struct ctdb_tunable_list, script_unhealthy_on_timeout) }, + { "RecoveryGracePeriod", 120, false, + offsetof(struct ctdb_tunable_list, recovery_grace_period) }, + { "RecoveryBanPeriod", 300, false, + offsetof(struct ctdb_tunable_list, recovery_ban_period) }, + { "DatabaseHashSize", 100001, false, + offsetof(struct ctdb_tunable_list, database_hash_size) }, + { "DatabaseMaxDead", 5, false, + offsetof(struct ctdb_tunable_list, database_max_dead) }, + { "RerecoveryTimeout", 10, false, + offsetof(struct ctdb_tunable_list, rerecovery_timeout) }, + { "EnableBans", 1, false, + offsetof(struct ctdb_tunable_list, enable_bans) }, + { "DeterministicIPs", 0, true, + offsetof(struct ctdb_tunable_list, deterministic_public_ips) }, + { "LCP2PublicIPs", 1, true, + offsetof(struct ctdb_tunable_list, lcp2_public_ip_assignment) }, + { "ReclockPingPeriod", 60, true, + offsetof(struct ctdb_tunable_list, reclock_ping_period) }, + { "NoIPFailback", 0, false, + offsetof(struct ctdb_tunable_list, no_ip_failback) }, + { "DisableIPFailover", 0, true, + offsetof(struct ctdb_tunable_list, disable_ip_failover) }, + { "VerboseMemoryNames", 0, false, + offsetof(struct ctdb_tunable_list, verbose_memory_names) }, + { "RecdPingTimeout", 60, false, + offsetof(struct ctdb_tunable_list, recd_ping_timeout) }, + { "RecdFailCount", 10, false, + offsetof(struct ctdb_tunable_list, recd_ping_failcount) }, + { "LogLatencyMs", 0, false, + offsetof(struct ctdb_tunable_list, log_latency_ms) }, + { "RecLockLatencyMs", 1000, false, + offsetof(struct ctdb_tunable_list, reclock_latency_ms) }, + { "RecoveryDropAllIPs", 120, false, + offsetof(struct ctdb_tunable_list, recovery_drop_all_ips) }, + { "VerifyRecoveryLock", 1, true, + offsetof(struct ctdb_tunable_list, verify_recovery_lock) }, + { "VacuumInterval", 10, false, + offsetof(struct ctdb_tunable_list, vacuum_interval) }, + { "VacuumMaxRunTime", 120, false, + offsetof(struct ctdb_tunable_list, vacuum_max_run_time) }, + { "RepackLimit", 10*1000, false, + offsetof(struct ctdb_tunable_list, repack_limit) }, + { "VacuumLimit", 5*1000, true, + offsetof(struct ctdb_tunable_list, vacuum_limit) }, + { "VacuumFastPathCount", 60, false, + offsetof(struct ctdb_tunable_list, vacuum_fast_path_count) }, + { "MaxQueueDropMsg", 1000*1000, false, + offsetof(struct ctdb_tunable_list, max_queue_depth_drop_msg) }, + { "AllowUnhealthyDBRead", 0, false, + offsetof(struct ctdb_tunable_list, allow_unhealthy_db_read) }, + { "StatHistoryInterval", 1, false, + offsetof(struct ctdb_tunable_list, stat_history_interval) }, + { "DeferredAttachTO", 120, false, + offsetof(struct ctdb_tunable_list, deferred_attach_timeout) }, + { "AllowClientDBAttach", 1, false, + offsetof(struct ctdb_tunable_list, allow_client_db_attach) }, + { "RecoverPDBBySeqNum", 1, true, + offsetof(struct ctdb_tunable_list, recover_pdb_by_seqnum) }, + { "DeferredRebalanceOnNodeAdd", 300, true, + offsetof(struct ctdb_tunable_list, deferred_rebalance_on_node_add) }, + { "FetchCollapse", 1, false, + offsetof(struct ctdb_tunable_list, fetch_collapse) }, + { "HopcountMakeSticky", 50, false, + offsetof(struct ctdb_tunable_list, hopcount_make_sticky) }, + { "StickyDuration", 600, false, + offsetof(struct ctdb_tunable_list, sticky_duration) }, + { "StickyPindown", 200, false, + offsetof(struct ctdb_tunable_list, sticky_pindown) }, + { "NoIPTakeover", 0, false, + offsetof(struct ctdb_tunable_list, no_ip_takeover) }, + { "DBRecordCountWarn", 100*1000, false, + offsetof(struct ctdb_tunable_list, db_record_count_warn) }, + { "DBRecordSizeWarn", 10*1000*1000, false, + offsetof(struct ctdb_tunable_list, db_record_size_warn) }, + { "DBSizeWarn", 100*1000*1000, false, + offsetof(struct ctdb_tunable_list, db_size_warn) }, + { "PullDBPreallocation", 10*1024*1024, false, + offsetof(struct ctdb_tunable_list, pulldb_preallocation_size) }, + { "NoIPHostOnAllDisabled", 1, true, + offsetof(struct ctdb_tunable_list, no_ip_host_on_all_disabled) }, + { "Samba3AvoidDeadlocks", 0, true, + offsetof(struct ctdb_tunable_list, samba3_hack) }, + { "TDBMutexEnabled", 1, true, + offsetof(struct ctdb_tunable_list, mutex_enabled) }, + { "LockProcessesPerDB", 200, false, + offsetof(struct ctdb_tunable_list, lock_processes_per_db) }, + { "RecBufferSizeLimit", 1000*1000, false, + offsetof(struct ctdb_tunable_list, rec_buffer_size_limit) }, + { "QueueBufferSize", 1024, false, + offsetof(struct ctdb_tunable_list, queue_buffer_size) }, + { "IPAllocAlgorithm", 2, false, + offsetof(struct ctdb_tunable_list, ip_alloc_algorithm) }, + { "AllowMixedVersions", 0, false, + offsetof(struct ctdb_tunable_list, allow_mixed_versions) }, + { .obsolete = true, } +}; + +void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list) +{ + int i; + + for (i=0; tunable_map[i].label != NULL; i++) { + size_t offset = tunable_map[i].offset; + uint32_t value = tunable_map[i].value; + uint32_t *value_ptr; + + value_ptr = (uint32_t *)((uint8_t *)tun_list + offset); + *value_ptr = value; + } +} + +bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list, + const char *tunable_str, uint32_t *value) +{ + int i; + + for (i=0; tunable_map[i].label != NULL; i++) { + if (strcasecmp(tunable_map[i].label, tunable_str) == 0) { + uint32_t *value_ptr; + + value_ptr = (uint32_t *)((uint8_t *)tun_list + + tunable_map[i].offset); + *value = *value_ptr; + return true; + } + } + + return false; +} + +bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list, + const char *tunable_str, uint32_t value, + bool *obsolete) +{ + int i; + + for (i=0; tunable_map[i].label != NULL; i++) { + if (strcasecmp(tunable_map[i].label, tunable_str) == 0) { + uint32_t *value_ptr; + + value_ptr = (uint32_t *)((uint8_t *)tun_list + + tunable_map[i].offset); + *value_ptr = value; + if (obsolete != NULL) { + *obsolete = tunable_map[i].obsolete; + } + return true; + } + } + + return false; +} + +struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx) +{ + struct ctdb_var_list *list; + int i; + + list = talloc_zero(mem_ctx, struct ctdb_var_list); + if (list == NULL) { + return NULL; + } + + for (i=0; tunable_map[i].label != NULL; i++) { + if (tunable_map[i].obsolete) { + continue; + } + + list->var = talloc_realloc(list, list->var, const char *, + list->count + 1); + if (list->var == NULL) { + goto fail; + } + + list->var[list->count] = talloc_strdup(list, + tunable_map[i].label); + if (list->var[list->count] == NULL) { + goto fail; + } + + list->count += 1; + } + + return list; + +fail: + TALLOC_FREE(list); + return NULL; +} + +char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx) +{ + char *str = NULL; + int i; + + str = talloc_strdup(mem_ctx, ":"); + if (str == NULL) { + return NULL; + } + + for (i=0; tunable_map[i].label != NULL; i++) { + if (tunable_map[i].obsolete) { + continue; + } + + str = talloc_asprintf_append(str, "%s:", + tunable_map[i].label); + if (str == NULL) { + return NULL; + } + } + + /* Remove the last ':' */ + str[strlen(str)-1] = '\0'; + + return str; +} + +struct tunable_load_state { + struct ctdb_tunable_list *tun_list; + bool status; + const char *func; +}; + +static bool tunable_section(const char *section, void *private_data) +{ + struct tunable_load_state *state = + (struct tunable_load_state *)private_data; + + D_ERR("%s: Invalid line for section [%s] - sections not supported \n", + state->func, + section); + state->status = false; + + return true; +} + +static bool tunable_option(const char *name, + const char *value, + void *private_data) +{ + struct tunable_load_state *state = + (struct tunable_load_state *)private_data; + unsigned long num; + bool obsolete; + bool ok; + int ret; + + if (value[0] == '\0') { + D_ERR("%s: Invalid line containing \"%s\"\n", state->func, name); + state->status = false; + return true; + } + + num = smb_strtoul(value, NULL, 0, &ret, SMB_STR_FULL_STR_CONV); + if (ret != 0) { + D_ERR("%s: Invalid value \"%s\" for tunable \"%s\"\n", + state->func, + value, + name); + state->status = false; + return true; + } + + ok = ctdb_tunable_set_value(state->tun_list, + name, + (uint32_t)num, + &obsolete); + if (!ok) { + D_ERR("%s: Unknown tunable \"%s\"\n", state->func, name); + state->status = false; + return true; + } + if (obsolete) { + D_ERR("%s: Obsolete tunable \"%s\"\n", state->func, name); + state->status = false; + return true; + } + + return true; +} + +bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx, + struct ctdb_tunable_list *tun_list, + const char *file) +{ + struct tunable_load_state state = { + .tun_list = tun_list, + .status = true, + .func = __FUNCTION__, + }; + FILE *fp; + bool status; + + ctdb_tunable_set_defaults(tun_list); + + fp = fopen(file, "r"); + if (fp == NULL) { + if (errno == ENOENT) { + /* Doesn't need to exist */ + return true; + } + + DBG_ERR("Failed to open %s\n", file); + return false; + } + + D_NOTICE("Loading tunables from %s\n", file); + /* + * allow_empty_value=true is somewhat counter-intuitive. + * However, if allow_empty_value=false then a tunable with no + * equals or value is regarded as empty and is simply ignored. + * Use true so an "empty value" can be caught in + * tunable_option(). + * + * tunable_section() and tunable_option() return true while + * setting state.status=false, allowing all possible errors + * with tunables and values to be reported. This helps to + * avoid a potential game of whack-a-mole in a well-formed + * file with multiple minor errors. + */ + status = tini_parse(fp, true, tunable_section, tunable_option, &state); + + fclose(fp); + + if (!status) { + DBG_ERR("Syntax error\n"); + } + + return status && state.status; +} diff --git a/ctdb/common/tunable.h b/ctdb/common/tunable.h new file mode 100644 index 0000000..89f99f1 --- /dev/null +++ b/ctdb/common/tunable.h @@ -0,0 +1,35 @@ +/* + Tunable utilities + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef __CTDB_TUNABLE_H__ +#define __CTDB_TUNABLE_H__ + +void ctdb_tunable_set_defaults(struct ctdb_tunable_list *tun_list); +bool ctdb_tunable_get_value(struct ctdb_tunable_list *tun_list, + const char *tunable_str, uint32_t *value); +bool ctdb_tunable_set_value(struct ctdb_tunable_list *tun_list, + const char *tunable_str, uint32_t value, + bool *obsolete); +struct ctdb_var_list *ctdb_tunable_names(TALLOC_CTX *mem_ctx); +char *ctdb_tunable_names_to_string(TALLOC_CTX *mem_ctx); +bool ctdb_tunable_load_file(TALLOC_CTX *mem_ctx, + struct ctdb_tunable_list *tun_list, + const char *file); + +#endif /* __CTDB_TUNABLE_H__ */ |