diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:50:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:50:17 +0000 |
commit | 86ed03f8adee56c050c73018537371c230a664a6 (patch) | |
tree | eae3d04cdf1c49848e5a671327ab38297f4acb0d /agents/kdump/fence_kdump.c | |
parent | Initial commit. (diff) | |
download | fence-agents-86ed03f8adee56c050c73018537371c230a664a6.tar.xz fence-agents-86ed03f8adee56c050c73018537371c230a664a6.zip |
Adding upstream version 4.12.1.upstream/4.12.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'agents/kdump/fence_kdump.c')
-rw-r--r-- | agents/kdump/fence_kdump.c | 592 |
1 files changed, 592 insertions, 0 deletions
diff --git a/agents/kdump/fence_kdump.c b/agents/kdump/fence_kdump.c new file mode 100644 index 0000000..eda1559 --- /dev/null +++ b/agents/kdump/fence_kdump.c @@ -0,0 +1,592 @@ +/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- + * + * Copyright (c) Ryan O'Hara (rohara@redhat.com) + * Copyright (c) Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <syslog.h> +#include <ctype.h> +#include <errno.h> +#include <netdb.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> + +#include "options.h" +#include "message.h" +#include "version.h" + +static int verbose = 0; + +#define log_debug(lvl, fmt, args...) \ +do { \ + if (lvl <= verbose) { \ + fprintf (stdout, "[debug]: " fmt, ##args); \ + syslog (LOG_INFO, fmt, ##args); \ + } \ +} while (0); + +#define log_error(lvl, fmt, args...) \ +do { \ + if (lvl <= verbose) { \ + fprintf (stderr, "[error]: " fmt, ##args); \ + syslog (LOG_ERR, fmt, ##args); \ + } \ +} while (0); + +static int +trim (char *str) +{ + char *p; + int len; + + if (!str) return (0); + + len = strlen (str); + + while (len--) { + if (isspace (str[len])) { + str[len] = 0; + } else { + break; + } + } + + for (p = str; *p && isspace (*p); p++); + + memmove (str, p, strlen (p) + 1); + + return (strlen (str)); +} + +static int +do_action_monitor (void) +{ + const char cmdline_path[] = "/proc/cmdline"; + FILE *procFile; + size_t sz = 0; + char *lines = NULL; + int result = 1; + + procFile = fopen(cmdline_path, "r"); + + if (procFile == NULL) { + log_error (0, "Unable to open file %s (%s)\n", cmdline_path, strerror (errno)); + return 1; + } + + while (!feof (procFile)) { + ssize_t rv = getline (&lines, &sz, procFile); + if ((rv != -1) && (strstr(lines, "crashkernel=") != NULL)) { + result = 0; + } + } + + free (lines); + fclose (procFile); + + return result; +} + +static int +do_action_off (const fence_kdump_opts_t *opts) +{ + int error; + fd_set rfds; + fence_kdump_msg_t msg; + fence_kdump_node_t *node; + struct timeval timeout; + struct addrinfo hints; + fence_kdump_node_t *check_node; + char addr[NI_MAXHOST]; + char port[NI_MAXSERV]; + struct sockaddr_storage ss; + socklen_t size = sizeof (ss); + + if (list_empty (&opts->nodes)) { + return (1); + } else { + node = list_first_entry (&opts->nodes, fence_kdump_node_t, list); + } + + timeout.tv_sec = opts->timeout; + timeout.tv_usec = 0; + + FD_ZERO (&rfds); + FD_SET (node->socket, &rfds); + + // create listening socket + memset (&hints, 0, sizeof (hints)); + + hints.ai_family = opts->family; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_UDP; + hints.ai_flags = AI_NUMERICSERV; + + hints.ai_family = node->info->ai_family; + hints.ai_flags |= AI_PASSIVE; + + freeaddrinfo (node->info); + + node->info = NULL; + error = getaddrinfo (NULL, node->port, &hints, &node->info); + if (error != 0) { + log_error (2, "getaddrinfo (%s)\n", gai_strerror (error)); + free_node (node); + return (1); + } + + error = bind (node->socket, node->info->ai_addr, node->info->ai_addrlen); + if (error != 0) { + log_error (2, "bind (%s)\n", strerror (errno)); + free_node (node); + return (1); + } + + list_for_each_entry (check_node, &opts->nodes, list) { + log_debug (0, "waiting for message from '%s'\n", check_node->addr); + if (node->info->ai_family != check_node->info->ai_family) { + log_error (0, "mixing IPv4 and IPv6 nodes is not supported\n"); + return (1); + } + } + + for (;;) { + error = select (node->socket + 1, &rfds, NULL, NULL, &timeout); + if (error < 0) { + log_error (2, "select (%s)\n", strerror (errno)); + break; + } + if (error == 0) { + log_debug (0, "timeout after %d seconds\n", opts->timeout); + break; + } + + error = recvfrom (node->socket, &msg, sizeof (msg), 0, (struct sockaddr *) &ss, &size); + if (error < 0) { + log_error (2, "recvfrom (%s)\n", strerror (errno)); + continue; + } + + error = getnameinfo ((struct sockaddr *) &ss, size, + addr, sizeof (addr), + port, sizeof (port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (error != 0) { + log_error (2, "getnameinfo (%s)\n", gai_strerror (error)); + continue; + } + + if (msg.magic != FENCE_KDUMP_MAGIC) { + log_debug (1, "invalid magic number '0x%X'\n", msg.magic); + continue; + } + + // check if we have matched messages from any known node + list_for_each_entry (check_node, &opts->nodes, list) { + error = strcasecmp (check_node->addr, addr); + if (error == 0 ) { + switch (msg.version) { + case FENCE_KDUMP_MSGV1: + log_debug (0, "received valid message from '%s'\n", addr); + return (0); + default: + log_debug (1, "invalid message version '0x%X'\n", msg.version); + continue; + } + } + } + log_debug (1, "discard message from '%s'\n", addr); + + } + + return (1); +} + +static int +do_action_metadata (const char *self) +{ + fprintf (stdout, "<?xml version=\"1.0\" ?>\n"); + fprintf (stdout, "<resource-agent name=\"%s\"", basename (self)); + fprintf (stdout, " shortdesc=\"fencing agent for use with kdump crash recovery service\">\n"); + fprintf (stdout, "<longdesc>"); + fprintf (stdout, "fence_kdump is an I/O fencing agent to be used with the kdump\n" + "crash recovery service. When the fence_kdump agent is invoked,\n" + "it will listen for a message from the failed node that acknowledges\n" + "that the failed node is executing the kdump crash kernel.\n" + "Note that fence_kdump is not a replacement for traditional\n" + "fencing methods. The fence_kdump agent can only detect that a\n" + "node has entered the kdump crash recovery service. This allows the\n" + "kdump crash recovery service complete without being preempted by\n" + "traditional power fencing methods.\n\n" + "Note: the \"off\" action listen for message from failed node that\n" + "acknowledges node has entered kdump crash recovery service. If a valid\n" + "message is received from the failed node, the node is considered to be\n" + "fenced and the agent returns success. Failure to receive a valid\n" + "message from the failed node in the given timeout period results in\n" + "fencing failure. When multiple node names/IP addresses are specified\n" + "a single valid message is sufficient for success. This is useful when\n" + "single node can send message via several different IP addresses.\n"); + fprintf (stdout, "</longdesc>\n"); + fprintf (stdout, "<vendor-url>http://www.kernel.org/pub/linux/utils/kernel/kexec/</vendor-url>\n"); + + fprintf (stdout, "<parameters>\n"); + + fprintf (stdout, "\t<parameter name=\"nodename\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-n, --nodename=NODE[,NODE...]\" />\n"); + fprintf (stdout, "\t\t<content type=\"string\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "List of names or IP addresses of node to be fenced. This option is\n" + "required for the \"off\" action. Multiple values separated by commas\n" + "can be specified. All values must be of same IP network family." ); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"ipport\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-p, --ipport=PORT\" />\n"); + fprintf (stdout, "\t\t<content type=\"string\" default=\"7410\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "IP port number that the fence_kdump agent will use to listen for\n" + "messages."); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"family\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-f, --family=FAMILY\" />\n"); + fprintf (stdout, "\t\t<content type=\"string\" default=\"auto\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "IP network family. Force the fence_kdump agent to use a specific\n" + "family. The value for FAMILY can be \"auto\", \"ipv4\", or\n" + "\"ipv6\"."); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"action\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-o, --action=ACTION\" />\n"); + fprintf (stdout, "\t\t<content type=\"string\" default=\"off\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "Fencing action to perform. The value for ACTION can be either\n" + "\"off\" or \"metadata\"."); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"timeout\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-t, --timeout=TIMEOUT\" />\n"); + fprintf (stdout, "\t\t<content type=\"string\" default=\"60\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "Number of seconds to wait for message from failed node. If no message\n" + "is received within TIMEOUT seconds, the fence_kdump agent\n" + "returns failure."); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"verbose\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-v, --verbose\" />\n"); + fprintf (stdout, "\t\t<content type=\"boolean\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "Print verbose output"); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"version\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-V, --version\" />\n"); + fprintf (stdout, "\t\t<content type=\"boolean\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "Print version"); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "\t<parameter name=\"usage\" unique=\"0\" required=\"0\">\n"); + fprintf (stdout, "\t\t<getopt mixed=\"-h, --help\" />\n"); + fprintf (stdout, "\t\t<content type=\"boolean\" />\n"); + fprintf (stdout, "\t\t<shortdesc lang=\"en\">%s</shortdesc>\n", + "Print usage"); + fprintf (stdout, "\t</parameter>\n"); + + fprintf (stdout, "</parameters>\n"); + + fprintf (stdout, "<actions>\n"); + fprintf (stdout, "\t<action name=\"off\" />\n"); + fprintf (stdout, "\t<action name=\"monitor\" />\n"); + fprintf (stdout, "\t<action name=\"metadata\" />\n"); + fprintf (stdout, "\t<action name=\"validate-all\" />\n"); + fprintf (stdout, "</actions>\n"); + + fprintf (stdout, "</resource-agent>\n"); + + return (0); +} + +static void +print_usage (const char *self) +{ + fprintf (stdout, "Usage: %s [options]\n", basename (self)); + fprintf (stdout, "\n"); + fprintf (stdout, "Options:\n"); + fprintf (stdout, "\n"); + fprintf (stdout, "%s\n", + " -n, --nodename=NODE[,NODE...]List of names or IP addresses of node to be fenced"); + fprintf (stdout, "%s\n", + " -p, --ipport=PORT IP port number (default: 7410)"); + fprintf (stdout, "%s\n", + " -f, --family=FAMILY Network family: ([auto], ipv4, ipv6)"); + fprintf (stdout, "%s\n", + " -o, --action=ACTION Fencing action: ([off], monitor, metadata, validate-all)"); + fprintf (stdout, "%s\n", + " -t, --timeout=TIMEOUT Timeout in seconds (default: 60)"); + fprintf (stdout, "%s\n", + " -v, --verbose Print verbose output"); + fprintf (stdout, "%s\n", + " -V, --version Print version"); + fprintf (stdout, "%s\n", + " -h, --help Print usage"); + fprintf (stdout, "\n"); + + return; +} + +static int +get_options_node (fence_kdump_opts_t *opts) +{ + int error; + struct addrinfo hints; + fence_kdump_node_t *node; + + node = malloc (sizeof (fence_kdump_node_t)); + if (!node) { + log_error (2, "malloc (%s)\n", strerror (errno)); + return (1); + } + + memset (node, 0, sizeof (fence_kdump_node_t)); + memset (&hints, 0, sizeof (hints)); + + hints.ai_family = opts->family; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_UDP; + hints.ai_flags = AI_NUMERICSERV; + + strncpy (node->name, opts->nodename, sizeof (node->name) - 1); + snprintf (node->port, sizeof (node->port), "%d", opts->ipport); + + node->info = NULL; + error = getaddrinfo (node->name, node->port, &hints, &node->info); + if (error != 0) { + log_error (2, "getaddrinfo (%s)\n", gai_strerror (error)); + free_node (node); + return (1); + } + + error = getnameinfo (node->info->ai_addr, node->info->ai_addrlen, + node->addr, sizeof (node->addr), + node->port, sizeof (node->port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (error != 0) { + log_error (2, "getnameinfo (%s)\n", gai_strerror (error)); + free_node (node); + return (1); + } + + node->socket = socket (node->info->ai_family, + node->info->ai_socktype, + node->info->ai_protocol); + if (node->socket < 0) { + log_error (2, "socket (%s)\n", strerror (errno)); + free_node (node); + return (1); + } + + list_add_tail (&node->list, &opts->nodes); + + return (0); +} + +static void +get_options (int argc, char **argv, fence_kdump_opts_t *opts) +{ + int opt; + + struct option options[] = { + { "nodename", required_argument, NULL, 'n' }, + { "ipport", required_argument, NULL, 'p' }, + { "family", required_argument, NULL, 'f' }, + { "action", required_argument, NULL, 'o' }, + { "timeout", required_argument, NULL, 't' }, + { "verbose", optional_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'h' }, + { 0, 0, 0, 0 } + }; + + while ((opt = getopt_long (argc, argv, "n:p:f:o:t:v::Vh", options, NULL)) != EOF) { + switch (opt) { + case 'n': + set_option_nodename (opts, optarg); + break; + case 'p': + set_option_ipport (opts, optarg); + break; + case 'f': + set_option_family (opts, optarg); + break; + case 'o': + set_option_action (opts, optarg); + break; + case 't': + set_option_timeout (opts, optarg); + break; + case 'v': + set_option_verbose (opts, optarg); + break; + case 'V': + print_version (argv[0]); + exit (0); + case 'h': + print_usage (argv[0]); + exit (0); + default: + print_usage (argv[0]); + exit (1); + } + } + + verbose = opts->verbose; + + return; +} + +static void +get_options_stdin (fence_kdump_opts_t *opts) +{ + char buf[1024]; + char *opt; + char *arg; + + while (fgets (buf, sizeof (buf), stdin) != 0) { + if (trim (buf) == 0) { + continue; + } + if (buf[0] == '#') { + continue; + } + + opt = buf; + + if ((arg = strchr (opt, '=')) != 0) { + *arg = 0; + arg += 1; + } else { + continue; + } + + if (!strcasecmp (opt, "nodename")) { + set_option_nodename (opts, arg); + continue; + } + if (!strcasecmp (opt, "ipport")) { + set_option_ipport (opts, arg); + continue; + } + if (!strcasecmp (opt, "family")) { + set_option_family (opts, arg); + continue; + } + if (!strcasecmp (opt, "action")) { + set_option_action (opts, arg); + continue; + } + if (!strcasecmp (opt, "timeout")) { + set_option_timeout (opts, arg); + continue; + } + if (!strcasecmp (opt, "verbose")) { + set_option_verbose (opts, arg); + continue; + } + } + + verbose = opts->verbose; + + return; +} + +int +main (int argc, char **argv) +{ + int error = 1; + fence_kdump_opts_t opts; + char *ptr; + char *node_list; + + init_options (&opts); + + if (argc > 1) { + get_options (argc, argv, &opts); + } else { + get_options_stdin (&opts); + } + + openlog ("fence_kdump", LOG_CONS|LOG_PID, LOG_DAEMON); + + if (opts.action == FENCE_KDUMP_ACTION_OFF) { + if (opts.nodename == NULL) { + log_error (0, "action 'off' requires nodename\n"); + exit (1); + } + node_list = (char *)malloc(strlen(opts.nodename)+1); + + strcpy(node_list, opts.nodename); //make local copy of nodename on which we can safely iterate + // iterate through node_list + for (ptr = strtok(node_list, ","); ptr != NULL; ptr = strtok(NULL, ",")) { + set_option_nodename (&opts, ptr); //overwrite nodename for next function + if (get_options_node (&opts) != 0) { + log_error (0, "failed to get node '%s'\n", opts.nodename); + exit (1); + } + } + free(node_list); + } + + if (verbose != 0) { + //clear nodename to avoid showing just last nodename here + free(opts.nodename); + opts.nodename = NULL; + print_options (&opts); + } + + switch (opts.action) { + case FENCE_KDUMP_ACTION_OFF: + error = do_action_off (&opts); + break; + case FENCE_KDUMP_ACTION_METADATA: + error = do_action_metadata (argv[0]); + break; + case FENCE_KDUMP_ACTION_MONITOR: + error = do_action_monitor (); + break; + case FENCE_KDUMP_ACTION_VALIDATE: + error = 0; + break; + default: + break; + } + + free_options (&opts); + + return (error); +} |