diff options
Diffstat (limited to 'ctdb/server/ctdb_takeover.c')
-rw-r--r-- | ctdb/server/ctdb_takeover.c | 2653 |
1 files changed, 2653 insertions, 0 deletions
diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c new file mode 100644 index 0000000..4d2d041 --- /dev/null +++ b/ctdb/server/ctdb_takeover.c @@ -0,0 +1,2653 @@ +/* + ctdb ip takeover code + + Copyright (C) Ronnie Sahlberg 2007 + Copyright (C) Andrew Tridgell 2007 + Copyright (C) Martin Schwenke 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "replace.h" +#include "system/network.h" +#include "system/filesys.h" +#include "system/time.h" +#include "system/wait.h" + +#include <talloc.h> +#include <tevent.h> + +#include "lib/util/dlinklist.h" +#include "lib/util/debug.h" +#include "lib/util/samba_util.h" +#include "lib/util/sys_rw.h" +#include "lib/util/util_process.h" + +#include "protocol/protocol_util.h" + +#include "ctdb_private.h" +#include "ctdb_client.h" + +#include "common/reqid.h" +#include "common/system.h" +#include "common/system_socket.h" +#include "common/common.h" +#include "common/logging.h" + +#include "server/ctdb_config.h" + +#include "server/ipalloc.h" + +#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0) + +#define CTDB_ARP_INTERVAL 1 +#define CTDB_ARP_REPEAT 3 + +struct ctdb_interface { + struct ctdb_interface *prev, *next; + const char *name; + bool link_up; + uint32_t references; +}; + +struct vnn_interface { + struct vnn_interface *prev, *next; + struct ctdb_interface *iface; +}; + +/* state associated with a public ip address */ +struct ctdb_vnn { + struct ctdb_vnn *prev, *next; + + struct ctdb_interface *iface; + struct vnn_interface *ifaces; + ctdb_sock_addr public_address; + uint8_t public_netmask_bits; + + /* + * The node number that is serving this public address - set + * to CTDB_UNKNOWN_PNN if node is serving it + */ + uint32_t pnn; + + /* List of clients to tickle for this public address */ + struct ctdb_tcp_array *tcp_array; + + /* whether we need to update the other nodes with changes to our list + of connected clients */ + bool tcp_update_needed; + + /* a context to hang sending gratious arp events off */ + TALLOC_CTX *takeover_ctx; + + /* Set to true any time an update to this VNN is in flight. + This helps to avoid races. */ + bool update_in_flight; + + /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP + * address then this flag is set. It will be deleted in the + * release IP callback. */ + bool delete_pending; +}; + +static const char *iface_string(const struct ctdb_interface *iface) +{ + return (iface != NULL ? iface->name : "__none__"); +} + +static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn) +{ + return iface_string(vnn->iface); +} + +static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb, + const char *iface); + +static struct ctdb_interface * +ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface) +{ + struct ctdb_interface *i; + + if (strlen(iface) > CTDB_IFACE_SIZE) { + DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface)); + return NULL; + } + + /* Verify that we don't have an entry for this ip yet */ + i = ctdb_find_iface(ctdb, iface); + if (i != NULL) { + return i; + } + + /* create a new structure for this interface */ + i = talloc_zero(ctdb, struct ctdb_interface); + if (i == NULL) { + DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); + return NULL; + } + i->name = talloc_strdup(i, iface); + if (i->name == NULL) { + DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); + talloc_free(i); + return NULL; + } + + i->link_up = true; + + DLIST_ADD(ctdb->ifaces, i); + + return i; +} + +static bool vnn_has_interface(struct ctdb_vnn *vnn, + const struct ctdb_interface *iface) +{ + struct vnn_interface *i; + + for (i = vnn->ifaces; i != NULL; i = i->next) { + if (iface == i->iface) { + return true; + } + } + + return false; +} + +/* If any interfaces now have no possible IPs then delete them. This + * implementation is naive (i.e. simple) rather than clever + * (i.e. complex). Given that this is run on delip and that operation + * is rare, this doesn't need to be efficient - it needs to be + * foolproof. One alternative is reference counting, where the logic + * is distributed and can, therefore, be broken in multiple places. + * Another alternative is to build a red-black tree of interfaces that + * can have addresses (by walking ctdb->vnn once) and then walking + * ctdb->ifaces once and deleting those not in the tree. Let's go to + * one of those if the naive implementation causes problems... :-) + */ +static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + struct ctdb_interface *i, *next; + + /* For each interface, check if there's an IP using it. */ + for (i = ctdb->ifaces; i != NULL; i = next) { + struct ctdb_vnn *tv; + bool found; + next = i->next; + + /* Only consider interfaces named in the given VNN. */ + if (!vnn_has_interface(vnn, i)) { + continue; + } + + /* Search for a vnn with this interface. */ + found = false; + for (tv=ctdb->vnn; tv; tv=tv->next) { + if (vnn_has_interface(tv, i)) { + found = true; + break; + } + } + + if (!found) { + /* None of the VNNs are using this interface. */ + DLIST_REMOVE(ctdb->ifaces, i); + talloc_free(i); + } + } +} + + +static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb, + const char *iface) +{ + struct ctdb_interface *i; + + for (i=ctdb->ifaces;i;i=i->next) { + if (strcmp(i->name, iface) == 0) { + return i; + } + } + + return NULL; +} + +static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + struct vnn_interface *i; + struct ctdb_interface *cur = NULL; + struct ctdb_interface *best = NULL; + + for (i = vnn->ifaces; i != NULL; i = i->next) { + + cur = i->iface; + + if (!cur->link_up) { + continue; + } + + if (best == NULL) { + best = cur; + continue; + } + + if (cur->references < best->references) { + best = cur; + continue; + } + } + + return best; +} + +static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + struct ctdb_interface *best = NULL; + + if (vnn->iface) { + DEBUG(DEBUG_INFO, (__location__ " public address '%s' " + "still assigned to iface '%s'\n", + ctdb_addr_to_str(&vnn->public_address), + ctdb_vnn_iface_string(vnn))); + return 0; + } + + best = ctdb_vnn_best_iface(ctdb, vnn); + if (best == NULL) { + DEBUG(DEBUG_ERR, (__location__ " public address '%s' " + "cannot assign to iface any iface\n", + ctdb_addr_to_str(&vnn->public_address))); + return -1; + } + + vnn->iface = best; + best->references++; + vnn->pnn = ctdb->pnn; + + DEBUG(DEBUG_INFO, (__location__ " public address '%s' " + "now assigned to iface '%s' refs[%d]\n", + ctdb_addr_to_str(&vnn->public_address), + ctdb_vnn_iface_string(vnn), + best->references)); + return 0; +} + +static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + DEBUG(DEBUG_INFO, (__location__ " public address '%s' " + "now unassigned (old iface '%s' refs[%d])\n", + ctdb_addr_to_str(&vnn->public_address), + ctdb_vnn_iface_string(vnn), + vnn->iface?vnn->iface->references:0)); + if (vnn->iface) { + vnn->iface->references--; + } + vnn->iface = NULL; + if (vnn->pnn == ctdb->pnn) { + vnn->pnn = CTDB_UNKNOWN_PNN; + } +} + +static bool ctdb_vnn_available(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + uint32_t flags; + struct vnn_interface *i; + + /* Nodes that are not RUNNING can not host IPs */ + if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) { + return false; + } + + flags = ctdb->nodes[ctdb->pnn]->flags; + if ((flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED)) != 0) { + return false; + } + + if (vnn->delete_pending) { + return false; + } + + if (vnn->iface && vnn->iface->link_up) { + return true; + } + + for (i = vnn->ifaces; i != NULL; i = i->next) { + if (i->iface->link_up) { + return true; + } + } + + return false; +} + +struct ctdb_takeover_arp { + struct ctdb_context *ctdb; + uint32_t count; + ctdb_sock_addr addr; + struct ctdb_tcp_array *tcparray; + struct ctdb_vnn *vnn; +}; + + +/* + lists of tcp endpoints + */ +struct ctdb_tcp_list { + struct ctdb_tcp_list *prev, *next; + struct ctdb_connection connection; +}; + +/* + list of clients to kill on IP release + */ +struct ctdb_client_ip { + struct ctdb_client_ip *prev, *next; + struct ctdb_context *ctdb; + ctdb_sock_addr addr; + uint32_t client_id; +}; + + +/* + send a gratuitous arp + */ +static void ctdb_control_send_arp(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_takeover_arp *arp = talloc_get_type(private_data, + struct ctdb_takeover_arp); + int ret; + struct ctdb_tcp_array *tcparray; + const char *iface; + + /* IP address might have been released between sends */ + if (arp->vnn->iface == NULL) { + DBG_INFO("Cancelling ARP send for released IP %s\n", + ctdb_addr_to_str(&arp->vnn->public_address)); + talloc_free(arp); + return; + } + + iface = ctdb_vnn_iface_string(arp->vnn); + ret = ctdb_sys_send_arp(&arp->addr, iface); + if (ret != 0) { + DBG_ERR("Failed to send ARP on interface %s: %s\n", + iface, strerror(ret)); + } + + tcparray = arp->tcparray; + if (tcparray) { + unsigned int i; + + for (i=0;i<tcparray->num;i++) { + struct ctdb_connection *tcon; + char buf[128]; + + tcon = &tcparray->connections[i]; + ret = ctdb_connection_to_buf(buf, + sizeof(buf), + tcon, + false, + " -> "); + if (ret != 0) { + strlcpy(buf, "UNKNOWN", sizeof(buf)); + } + D_INFO("Send TCP tickle ACK: %s\n", buf); + ret = ctdb_sys_send_tcp( + &tcon->src, + &tcon->dst, + 0, 0, 0); + if (ret != 0) { + DBG_ERR("Failed to send TCP tickle ACK: %s\n", + buf); + } + } + } + + arp->count++; + + if (arp->count == CTDB_ARP_REPEAT) { + talloc_free(arp); + return; + } + + tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx, + timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), + ctdb_control_send_arp, arp); +} + +static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn) +{ + struct ctdb_takeover_arp *arp; + struct ctdb_tcp_array *tcparray; + + if (!vnn->takeover_ctx) { + vnn->takeover_ctx = talloc_new(vnn); + if (!vnn->takeover_ctx) { + return -1; + } + } + + arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp); + if (!arp) { + return -1; + } + + arp->ctdb = ctdb; + arp->addr = vnn->public_address; + arp->vnn = vnn; + + tcparray = vnn->tcp_array; + if (tcparray) { + /* add all of the known tcp connections for this IP to the + list of tcp connections to send tickle acks for */ + arp->tcparray = talloc_steal(arp, tcparray); + + vnn->tcp_array = NULL; + vnn->tcp_update_needed = true; + } + + tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx, + timeval_zero(), ctdb_control_send_arp, arp); + + return 0; +} + +struct ctdb_do_takeip_state { + struct ctdb_req_control_old *c; + struct ctdb_vnn *vnn; +}; + +/* + called when takeip event finishes + */ +static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status, + void *private_data) +{ + struct ctdb_do_takeip_state *state = + talloc_get_type(private_data, struct ctdb_do_takeip_state); + int32_t ret; + TDB_DATA data; + + if (status != 0) { + if (status == -ETIMEDOUT) { + ctdb_ban_self(ctdb); + } + DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n", + ctdb_addr_to_str(&state->vnn->public_address), + ctdb_vnn_iface_string(state->vnn))); + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + + talloc_free(state); + return; + } + + if (ctdb->do_checkpublicip) { + + ret = ctdb_announce_vnn_iface(ctdb, state->vnn); + if (ret != 0) { + ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL); + talloc_free(state); + return; + } + + } + + data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address); + data.dsize = strlen((char *)data.dptr) + 1; + DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr)); + + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data); + + + /* the control succeeded */ + ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); + talloc_free(state); + return; +} + +static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state) +{ + state->vnn->update_in_flight = false; + return 0; +} + +/* + take over an ip address + */ +static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + struct ctdb_vnn *vnn) +{ + int ret; + struct ctdb_do_takeip_state *state; + + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + + ret = ctdb_vnn_assign_iface(ctdb, vnn); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to " + "assign a usable interface\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + + state = talloc(vnn, struct ctdb_do_takeip_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = NULL; + state->vnn = vnn; + + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_takeip_destructor); + + DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + + ret = ctdb_event_script_callback(ctdb, + state, + ctdb_do_takeip_callback, + state, + CTDB_EVENT_TAKE_IP, + "%s %s %u", + ctdb_vnn_iface_string(vnn), + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits); + + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n", + ctdb_addr_to_str(&vnn->public_address), + ctdb_vnn_iface_string(vnn))); + talloc_free(state); + return -1; + } + + state->c = talloc_steal(ctdb, c); + return 0; +} + +struct ctdb_do_updateip_state { + struct ctdb_req_control_old *c; + struct ctdb_interface *old; + struct ctdb_vnn *vnn; +}; + +/* + called when updateip event finishes + */ +static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status, + void *private_data) +{ + struct ctdb_do_updateip_state *state = + talloc_get_type(private_data, struct ctdb_do_updateip_state); + + if (status != 0) { + if (status == -ETIMEDOUT) { + ctdb_ban_self(ctdb); + } + DEBUG(DEBUG_ERR, + ("Failed update of IP %s from interface %s to %s\n", + ctdb_addr_to_str(&state->vnn->public_address), + iface_string(state->old), + ctdb_vnn_iface_string(state->vnn))); + + /* + * All we can do is reset the old interface + * and let the next run fix it + */ + ctdb_vnn_unassign_iface(ctdb, state->vnn); + state->vnn->iface = state->old; + state->vnn->iface->references++; + + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + talloc_free(state); + return; + } + + /* the control succeeded */ + ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); + talloc_free(state); + return; +} + +static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state) +{ + state->vnn->update_in_flight = false; + return 0; +} + +/* + update (move) an ip address + */ +static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + struct ctdb_vnn *vnn) +{ + int ret; + struct ctdb_do_updateip_state *state; + struct ctdb_interface *old = vnn->iface; + const char *old_name = iface_string(old); + const char *new_name; + + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + + ctdb_vnn_unassign_iface(ctdb, vnn); + ret = ctdb_vnn_assign_iface(ctdb, vnn); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to " + "assign a usable interface (old iface '%s')\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + old_name)); + return -1; + } + + if (old == vnn->iface) { + /* A benign update from one interface onto itself. + * no need to run the eventscripts in this case, just return + * success. + */ + ctdb_request_control_reply(ctdb, c, NULL, 0, NULL); + return 0; + } + + state = talloc(vnn, struct ctdb_do_updateip_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = NULL; + state->old = old; + state->vnn = vnn; + + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_updateip_destructor); + + new_name = ctdb_vnn_iface_string(vnn); + DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from " + "interface %s to %s\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + old_name, + new_name)); + + ret = ctdb_event_script_callback(ctdb, + state, + ctdb_do_updateip_callback, + state, + CTDB_EVENT_UPDATE_IP, + "%s %s %s %u", + old_name, + new_name, + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits); + if (ret != 0) { + DEBUG(DEBUG_ERR, + ("Failed update IP %s from interface %s to %s\n", + ctdb_addr_to_str(&vnn->public_address), + old_name, new_name)); + talloc_free(state); + return -1; + } + + state->c = talloc_steal(ctdb, c); + return 0; +} + +/* + Find the vnn of the node that has a public ip address + returns -1 if the address is not known as a public address + */ +static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr) +{ + struct ctdb_vnn *vnn; + + for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + if (ctdb_same_ip(&vnn->public_address, addr)) { + return vnn; + } + } + + return NULL; +} + +/* + take over an ip address + */ +int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, + bool *async_reply) +{ + int ret; + struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; + struct ctdb_vnn *vnn; + bool have_ip = false; + bool do_updateip = false; + bool do_takeip = false; + struct ctdb_interface *best_iface = NULL; + + if (pip->pnn != ctdb->pnn) { + DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' " + "with pnn %d, but we're node %d\n", + ctdb_addr_to_str(&pip->addr), + pip->pnn, ctdb->pnn)); + return -1; + } + + /* update out vnn list */ + vnn = find_public_ip_vnn(ctdb, &pip->addr); + if (vnn == NULL) { + DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", + ctdb_addr_to_str(&pip->addr))); + return 0; + } + + if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) { + have_ip = ctdb_sys_have_ip(&pip->addr); + } + best_iface = ctdb_vnn_best_iface(ctdb, vnn); + if (best_iface == NULL) { + DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find" + "a usable interface (old %s, have_ip %d)\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn), + have_ip)); + return -1; + } + + if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != CTDB_UNKNOWN_PNN) { + DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, " + "and we have it on iface[%s], but it was assigned to node %d" + "and we are node %d, banning ourself\n", + ctdb_addr_to_str(&vnn->public_address), + ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn)); + ctdb_ban_self(ctdb); + return -1; + } + + if (vnn->pnn == CTDB_UNKNOWN_PNN && have_ip) { + /* This will cause connections to be reset and + * reestablished. However, this is a very unusual + * situation and doing this will completely repair the + * inconsistency in the VNN. + */ + DEBUG(DEBUG_WARNING, + (__location__ + " Doing updateip for IP %s already on an interface\n", + ctdb_addr_to_str(&vnn->public_address))); + do_updateip = true; + } + + if (vnn->iface) { + if (vnn->iface != best_iface) { + if (!vnn->iface->link_up) { + do_updateip = true; + } else if (vnn->iface->references > (best_iface->references + 1)) { + /* only move when the rebalance gains something */ + do_updateip = true; + } + } + } + + if (!have_ip) { + if (do_updateip) { + ctdb_vnn_unassign_iface(ctdb, vnn); + do_updateip = false; + } + do_takeip = true; + } + + if (do_takeip) { + ret = ctdb_do_takeip(ctdb, c, vnn); + if (ret != 0) { + return -1; + } + } else if (do_updateip) { + ret = ctdb_do_updateip(ctdb, c, vnn); + if (ret != 0) { + return -1; + } + } else { + /* + * The interface is up and the kernel known the ip + * => do nothing + */ + DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n", + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + return 0; + } + + /* tell ctdb_control.c that we will be replying asynchronously */ + *async_reply = true; + + return 0; +} + +static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn) +{ + DLIST_REMOVE(ctdb->vnn, vnn); + ctdb_vnn_unassign_iface(ctdb, vnn); + ctdb_remove_orphaned_ifaces(ctdb, vnn); + talloc_free(vnn); +} + +static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb, + struct ctdb_vnn *vnn, + ctdb_sock_addr *addr) +{ + TDB_DATA data; + + /* Send a message to all clients of this node telling them + * that the cluster has been reconfigured and they should + * close any connections on this IP address + */ + data.dptr = (uint8_t *)ctdb_addr_to_str(addr); + data.dsize = strlen((char *)data.dptr)+1; + DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr)); + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); + + ctdb_vnn_unassign_iface(ctdb, vnn); + + /* Process the IP if it has been marked for deletion */ + if (vnn->delete_pending) { + do_delete_ip(ctdb, vnn); + return NULL; + } + + return vnn; +} + +struct release_ip_callback_state { + struct ctdb_req_control_old *c; + ctdb_sock_addr *addr; + struct ctdb_vnn *vnn; + uint32_t target_pnn; +}; + +/* + called when releaseip event finishes + */ +static void release_ip_callback(struct ctdb_context *ctdb, int status, + void *private_data) +{ + struct release_ip_callback_state *state = + talloc_get_type(private_data, struct release_ip_callback_state); + + if (status == -ETIMEDOUT) { + ctdb_ban_self(ctdb); + } + + if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) { + if (ctdb_sys_have_ip(state->addr)) { + DEBUG(DEBUG_ERR, + ("IP %s still hosted during release IP callback, failing\n", + ctdb_addr_to_str(state->addr))); + ctdb_request_control_reply(ctdb, state->c, + NULL, -1, NULL); + talloc_free(state); + return; + } + } + + state->vnn->pnn = state->target_pnn; + state->vnn = release_ip_post(ctdb, state->vnn, state->addr); + + /* the control succeeded */ + ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL); + talloc_free(state); +} + +static int ctdb_releaseip_destructor(struct release_ip_callback_state *state) +{ + if (state->vnn != NULL) { + state->vnn->update_in_flight = false; + } + return 0; +} + +/* + release an ip address + */ +int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, + bool *async_reply) +{ + int ret; + struct release_ip_callback_state *state; + struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; + struct ctdb_vnn *vnn; + const char *iface; + + /* update our vnn list */ + vnn = find_public_ip_vnn(ctdb, &pip->addr); + if (vnn == NULL) { + DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n", + ctdb_addr_to_str(&pip->addr))); + return 0; + } + + /* stop any previous arps */ + talloc_free(vnn->takeover_ctx); + vnn->takeover_ctx = NULL; + + /* RELEASE_IP controls are sent to all nodes that should not + * be hosting a particular IP. This serves 2 purposes. The + * first is to help resolve any inconsistencies. If a node + * does unexpectly host an IP then it will be released. The + * 2nd is to use a "redundant release" to tell non-takeover + * nodes where an IP is moving to. This is how "ctdb ip" can + * report the (likely) location of an IP by only asking the + * local node. Redundant releases need to update the PNN but + * are otherwise ignored. + */ + if (ctdb_config.failover_disabled == 0 && ctdb->do_checkpublicip) { + if (!ctdb_sys_have_ip(&pip->addr)) { + DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + vnn->pnn = pip->pnn; + ctdb_vnn_unassign_iface(ctdb, vnn); + return 0; + } + } else { + if (vnn->iface == NULL) { + DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n", + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits)); + vnn->pnn = pip->pnn; + return 0; + } + } + + /* There is a potential race between take_ip and us because we + * update the VNN via a callback that run when the + * eventscripts have been run. Avoid the race by allowing one + * update to be in flight at a time. + */ + if (vnn->update_in_flight) { + DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected " + "update for this IP already in flight\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits)); + return -1; + } + + iface = ctdb_vnn_iface_string(vnn); + + DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n", + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + iface, + pip->pnn)); + + state = talloc(ctdb, struct release_ip_callback_state); + if (state == NULL) { + ctdb_set_error(ctdb, "Out of memory at %s:%d", + __FILE__, __LINE__); + return -1; + } + + state->c = NULL; + state->addr = talloc(state, ctdb_sock_addr); + if (state->addr == NULL) { + ctdb_set_error(ctdb, "Out of memory at %s:%d", + __FILE__, __LINE__); + talloc_free(state); + return -1; + } + *state->addr = pip->addr; + state->target_pnn = pip->pnn; + state->vnn = vnn; + + vnn->update_in_flight = true; + talloc_set_destructor(state, ctdb_releaseip_destructor); + + ret = ctdb_event_script_callback(ctdb, + state, release_ip_callback, state, + CTDB_EVENT_RELEASE_IP, + "%s %s %u", + iface, + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n", + ctdb_addr_to_str(&pip->addr), + ctdb_vnn_iface_string(vnn))); + talloc_free(state); + return -1; + } + + /* tell the control that we will be reply asynchronously */ + *async_reply = true; + state->c = talloc_steal(state, c); + return 0; +} + +static int ctdb_add_public_address(struct ctdb_context *ctdb, + ctdb_sock_addr *addr, + unsigned mask, const char *ifaces, + bool check_address) +{ + struct ctdb_vnn *vnn; + char *tmp; + const char *iface; + + /* Verify that we don't have an entry for this IP yet */ + for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { + if (ctdb_same_sockaddr(addr, &vnn->public_address)) { + D_ERR("Duplicate public IP address '%s'\n", + ctdb_addr_to_str(addr)); + return -1; + } + } + + /* Create a new VNN structure for this IP address */ + vnn = talloc_zero(ctdb, struct ctdb_vnn); + if (vnn == NULL) { + DBG_ERR("Memory allocation error\n"); + return -1; + } + tmp = talloc_strdup(vnn, ifaces); + if (tmp == NULL) { + DBG_ERR("Memory allocation error\n"); + talloc_free(vnn); + return -1; + } + for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) { + struct vnn_interface *vnn_iface; + struct ctdb_interface *i; + + if (!ctdb_sys_check_iface_exists(iface)) { + D_ERR("Unknown interface %s for public address %s\n", + iface, + ctdb_addr_to_str(addr)); + talloc_free(vnn); + return -1; + } + + i = ctdb_add_local_iface(ctdb, iface); + if (i == NULL) { + D_ERR("Failed to add interface '%s' " + "for public address %s\n", + iface, + ctdb_addr_to_str(addr)); + talloc_free(vnn); + return -1; + } + + vnn_iface = talloc_zero(vnn, struct vnn_interface); + if (vnn_iface == NULL) { + DBG_ERR("Memory allocation error\n"); + talloc_free(vnn); + return -1; + } + + vnn_iface->iface = i; + DLIST_ADD_END(vnn->ifaces, vnn_iface); + } + talloc_free(tmp); + vnn->public_address = *addr; + vnn->public_netmask_bits = mask; + vnn->pnn = -1; + + DLIST_ADD(ctdb->vnn, vnn); + + return 0; +} + +/* + setup the public address lists from a file +*/ +int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses) +{ + bool ok; + char **lines; + int nlines; + int i; + + /* If no public addresses file given then try the default */ + if (ctdb->public_addresses_file == NULL) { + const char *b = getenv("CTDB_BASE"); + if (b == NULL) { + DBG_ERR("CTDB_BASE not set\n"); + return -1; + } + ctdb->public_addresses_file = talloc_asprintf( + ctdb, "%s/%s", b, "public_addresses"); + if (ctdb->public_addresses_file == NULL) { + DBG_ERR("Out of memory\n"); + return -1; + } + } + + /* If the file doesn't exist then warn and do nothing */ + ok = file_exist(ctdb->public_addresses_file); + if (!ok) { + D_WARNING("Not loading public addresses, no file %s\n", + ctdb->public_addresses_file); + return 0; + } + + lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb); + if (lines == NULL) { + ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file); + return -1; + } + while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) { + nlines--; + } + + for (i=0;i<nlines;i++) { + unsigned mask; + ctdb_sock_addr addr; + const char *addrstr; + const char *ifaces; + char *tok, *line; + int ret; + + line = lines[i]; + while ((*line == ' ') || (*line == '\t')) { + line++; + } + if (*line == '#') { + continue; + } + if (strcmp(line, "") == 0) { + continue; + } + tok = strtok(line, " \t"); + addrstr = tok; + + tok = strtok(NULL, " \t"); + if (tok == NULL) { + D_ERR("No interface specified at line %u " + "of public addresses file\n", i+1); + talloc_free(lines); + return -1; + } + ifaces = tok; + + if (addrstr == NULL) { + D_ERR("Badly formed line %u in public address list\n", + i+1); + talloc_free(lines); + return -1; + } + + ret = ctdb_sock_addr_mask_from_string(addrstr, &addr, &mask); + if (ret != 0) { + D_ERR("Badly formed line %u in public address list\n", + i+1); + talloc_free(lines); + return -1; + } + + if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) { + DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1)); + talloc_free(lines); + return -1; + } + } + + + D_NOTICE("Loaded public addresses from %s\n", + ctdb->public_addresses_file); + + talloc_free(lines); + return 0; +} + +/* + destroy a ctdb_client_ip structure + */ +static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip) +{ + DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n", + ctdb_addr_to_str(&ip->addr), + ntohs(ip->addr.ip.sin_port), + ip->client_id)); + + DLIST_REMOVE(ip->ctdb->client_ip_list, ip); + return 0; +} + +/* + called by a client to inform us of a TCP connection that it is managing + that should tickled with an ACK when IP takeover is done + */ +int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, + TDB_DATA indata) +{ + struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client); + struct ctdb_connection *tcp_sock = NULL; + struct ctdb_tcp_list *tcp; + struct ctdb_connection t; + int ret; + TDB_DATA data; + struct ctdb_client_ip *ip; + struct ctdb_vnn *vnn; + ctdb_sock_addr src_addr; + ctdb_sock_addr dst_addr; + + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; + } + + tcp_sock = (struct ctdb_connection *)indata.dptr; + + src_addr = tcp_sock->src; + ctdb_canonicalize_ip(&src_addr, &tcp_sock->src); + ZERO_STRUCT(src_addr); + memcpy(&src_addr, &tcp_sock->src, sizeof(src_addr)); + + dst_addr = tcp_sock->dst; + ctdb_canonicalize_ip(&dst_addr, &tcp_sock->dst); + ZERO_STRUCT(dst_addr); + memcpy(&dst_addr, &tcp_sock->dst, sizeof(dst_addr)); + + vnn = find_public_ip_vnn(ctdb, &dst_addr); + if (vnn == NULL) { + char *src_addr_str = NULL; + char *dst_addr_str = NULL; + + switch (dst_addr.sa.sa_family) { + case AF_INET: + if (ntohl(dst_addr.ip.sin_addr.s_addr) == INADDR_LOOPBACK) { + /* ignore ... */ + return 0; + } + break; + case AF_INET6: + break; + default: + DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", + dst_addr.sa.sa_family)); + return 0; + } + + src_addr_str = ctdb_sock_addr_to_string(client, &src_addr, false); + dst_addr_str = ctdb_sock_addr_to_string(client, &dst_addr, false); + DEBUG(DEBUG_ERR,( + "Could not register TCP connection from " + "%s to %s (not a public address) (port %u) " + "(client_id %u pid %u).\n", + src_addr_str, + dst_addr_str, + ctdb_sock_addr_port(&dst_addr), + client_id, client->pid)); + TALLOC_FREE(src_addr_str); + TALLOC_FREE(dst_addr_str); + return 0; + } + + if (vnn->pnn != ctdb->pnn) { + DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n", + ctdb_addr_to_str(&dst_addr), + client_id, client->pid)); + /* failing this call will tell smbd to die */ + return -1; + } + + ip = talloc(client, struct ctdb_client_ip); + CTDB_NO_MEMORY(ctdb, ip); + + ip->ctdb = ctdb; + ip->addr = dst_addr; + ip->client_id = client_id; + talloc_set_destructor(ip, ctdb_client_ip_destructor); + DLIST_ADD(ctdb->client_ip_list, ip); + + tcp = talloc(client, struct ctdb_tcp_list); + CTDB_NO_MEMORY(ctdb, tcp); + + tcp->connection.src = tcp_sock->src; + tcp->connection.dst = tcp_sock->dst; + + DLIST_ADD(client->tcp_list, tcp); + + t.src = tcp_sock->src; + t.dst = tcp_sock->dst; + + data.dptr = (uint8_t *)&t; + data.dsize = sizeof(t); + + switch (dst_addr.sa.sa_family) { + case AF_INET: + DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n", + (unsigned)ntohs(tcp_sock->dst.ip.sin_port), + ctdb_addr_to_str(&tcp_sock->src), + (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid)); + break; + case AF_INET6: + DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n", + (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port), + ctdb_addr_to_str(&tcp_sock->src), + (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid)); + break; + default: + DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", + dst_addr.sa.sa_family)); + } + + + /* tell all nodes about this tcp connection */ + ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, + CTDB_CONTROL_TCP_ADD, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n")); + return -1; + } + + return 0; +} + +/* + find a tcp address on a list + */ +static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, + struct ctdb_connection *tcp) +{ + unsigned int i; + + if (array == NULL) { + return NULL; + } + + for (i=0;i<array->num;i++) { + if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) && + ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) { + return &array->connections[i]; + } + } + return NULL; +} + + + +/* + called by a daemon to inform us of a TCP connection that one of its + clients managing that should tickled with an ACK when IP takeover is + done + */ +int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed) +{ + struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr; + struct ctdb_tcp_array *tcparray; + struct ctdb_connection tcp; + struct ctdb_vnn *vnn; + + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; + } + + vnn = find_public_ip_vnn(ctdb, &p->dst); + if (vnn == NULL) { + DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n", + ctdb_addr_to_str(&p->dst))); + + return -1; + } + + + tcparray = vnn->tcp_array; + + /* If this is the first tickle */ + if (tcparray == NULL) { + tcparray = talloc(vnn, struct ctdb_tcp_array); + CTDB_NO_MEMORY(ctdb, tcparray); + vnn->tcp_array = tcparray; + + tcparray->num = 0; + tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection)); + CTDB_NO_MEMORY(ctdb, tcparray->connections); + + tcparray->connections[tcparray->num].src = p->src; + tcparray->connections[tcparray->num].dst = p->dst; + tcparray->num++; + + if (tcp_update_needed) { + vnn->tcp_update_needed = true; + } + return 0; + } + + + /* Do we already have this tickle ?*/ + tcp.src = p->src; + tcp.dst = p->dst; + if (ctdb_tcp_find(tcparray, &tcp) != NULL) { + DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n", + ctdb_addr_to_str(&tcp.dst), + ntohs(tcp.dst.ip.sin_port), + vnn->pnn)); + return 0; + } + + /* A new tickle, we must add it to the array */ + tcparray->connections = talloc_realloc(tcparray, tcparray->connections, + struct ctdb_connection, + tcparray->num+1); + CTDB_NO_MEMORY(ctdb, tcparray->connections); + + tcparray->connections[tcparray->num].src = p->src; + tcparray->connections[tcparray->num].dst = p->dst; + tcparray->num++; + + DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n", + ctdb_addr_to_str(&tcp.dst), + ntohs(tcp.dst.ip.sin_port), + vnn->pnn)); + + if (tcp_update_needed) { + vnn->tcp_update_needed = true; + } + + return 0; +} + + +static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn) +{ + struct ctdb_connection *tcpp; + + if (vnn == NULL) { + return; + } + + /* if the array is empty we cant remove it + and we don't need to do anything + */ + if (vnn->tcp_array == NULL) { + DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n", + ctdb_addr_to_str(&conn->dst), + ntohs(conn->dst.ip.sin_port))); + return; + } + + + /* See if we know this connection + if we don't know this connection then we don't need to do anything + */ + tcpp = ctdb_tcp_find(vnn->tcp_array, conn); + if (tcpp == NULL) { + DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n", + ctdb_addr_to_str(&conn->dst), + ntohs(conn->dst.ip.sin_port))); + return; + } + + + /* We need to remove this entry from the array. + Instead of allocating a new array and copying data to it + we cheat and just copy the last entry in the existing array + to the entry that is to be removed and just shring the + ->num field + */ + *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1]; + vnn->tcp_array->num--; + + /* If we deleted the last entry we also need to remove the entire array + */ + if (vnn->tcp_array->num == 0) { + talloc_free(vnn->tcp_array); + vnn->tcp_array = NULL; + } + + vnn->tcp_update_needed = true; + + DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n", + ctdb_addr_to_str(&conn->src), + ntohs(conn->src.ip.sin_port))); +} + + +/* + called by a daemon to inform us of a TCP connection that one of its + clients used are no longer needed in the tickle database + */ +int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_vnn *vnn; + struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr; + + /* If we don't have public IPs, tickles are useless */ + if (ctdb->vnn == NULL) { + return 0; + } + + vnn = find_public_ip_vnn(ctdb, &conn->dst); + if (vnn == NULL) { + DEBUG(DEBUG_ERR, + (__location__ " unable to find public address %s\n", + ctdb_addr_to_str(&conn->dst))); + return 0; + } + + ctdb_remove_connection(vnn, conn); + + return 0; +} + + +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force); + +/* + Called when another daemon starts - causes all tickles for all + public addresses we are serving to be sent to the new node on the + next check. This actually causes the tickles to be sent to the + other node immediately. In case there is an error, the periodic + timer will send the updates on timer event. This is simple and + doesn't require careful error handling. + */ +int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn) +{ + DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n", + (unsigned long) pnn)); + + ctdb_send_set_tcp_tickles_for_all(ctdb, true); + return 0; +} + + +/* + called when a client structure goes away - hook to remove + elements from the tcp_list in all daemons + */ +void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) +{ + while (client->tcp_list) { + struct ctdb_vnn *vnn; + struct ctdb_tcp_list *tcp = client->tcp_list; + struct ctdb_connection *conn = &tcp->connection; + + DLIST_REMOVE(client->tcp_list, tcp); + + vnn = find_public_ip_vnn(client->ctdb, + &conn->dst); + if (vnn == NULL) { + DEBUG(DEBUG_ERR, + (__location__ " unable to find public address %s\n", + ctdb_addr_to_str(&conn->dst))); + continue; + } + + /* If the IP address is hosted on this node then + * remove the connection. */ + if (vnn->pnn == client->ctdb->pnn) { + ctdb_remove_connection(vnn, conn); + } + + /* Otherwise this function has been called because the + * server IP address has been released to another node + * and the client has exited. This means that we + * should not delete the connection information. The + * takeover node processes connections too. */ + } +} + + +void ctdb_release_all_ips(struct ctdb_context *ctdb) +{ + struct ctdb_vnn *vnn, *next; + int count = 0; + + if (ctdb_config.failover_disabled == 1) { + return; + } + + for (vnn = ctdb->vnn; vnn != NULL; vnn = next) { + /* vnn can be freed below in release_ip_post() */ + next = vnn->next; + + if (!ctdb_sys_have_ip(&vnn->public_address)) { + ctdb_vnn_unassign_iface(ctdb, vnn); + continue; + } + + /* Don't allow multiple releases at once. Some code, + * particularly ctdb_tickle_sentenced_connections() is + * not re-entrant */ + if (vnn->update_in_flight) { + DEBUG(DEBUG_WARNING, + (__location__ + " Not releasing IP %s/%u on interface %s, an update is already in progress\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + continue; + } + vnn->update_in_flight = true; + + DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + + ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u", + ctdb_vnn_iface_string(vnn), + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits); + /* releaseip timeouts are converted to success, so to + * detect failures just check if the IP address is + * still there... + */ + if (ctdb_sys_have_ip(&vnn->public_address)) { + DEBUG(DEBUG_ERR, + (__location__ + " IP address %s not released\n", + ctdb_addr_to_str(&vnn->public_address))); + vnn->update_in_flight = false; + continue; + } + + vnn = release_ip_post(ctdb, vnn, &vnn->public_address); + if (vnn != NULL) { + vnn->update_in_flight = false; + } + count++; + } + + DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count)); +} + + +/* + get list of public IPs + */ +int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, TDB_DATA *outdata) +{ + int i, num, len; + struct ctdb_public_ip_list_old *ips; + struct ctdb_vnn *vnn; + bool only_available = false; + + if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) { + only_available = true; + } + + /* count how many public ip structures we have */ + num = 0; + for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + num++; + } + + len = offsetof(struct ctdb_public_ip_list_old, ips) + + num*sizeof(struct ctdb_public_ip); + ips = talloc_zero_size(outdata, len); + CTDB_NO_MEMORY(ctdb, ips); + + i = 0; + for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + if (only_available && !ctdb_vnn_available(ctdb, vnn)) { + continue; + } + ips->ips[i].pnn = vnn->pnn; + ips->ips[i].addr = vnn->public_address; + i++; + } + ips->num = i; + len = offsetof(struct ctdb_public_ip_list_old, ips) + + i*sizeof(struct ctdb_public_ip); + + outdata->dsize = len; + outdata->dptr = (uint8_t *)ips; + + return 0; +} + + +int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, + TDB_DATA *outdata) +{ + int i, num, len; + ctdb_sock_addr *addr; + struct ctdb_public_ip_info_old *info; + struct ctdb_vnn *vnn; + struct vnn_interface *iface; + + addr = (ctdb_sock_addr *)indata.dptr; + + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, " + "'%s'not a public address\n", + ctdb_addr_to_str(addr))); + return -1; + } + + /* count how many public ip structures we have */ + num = 0; + for (iface = vnn->ifaces; iface != NULL; iface = iface->next) { + num++; + } + + len = offsetof(struct ctdb_public_ip_info_old, ifaces) + + num*sizeof(struct ctdb_iface); + info = talloc_zero_size(outdata, len); + CTDB_NO_MEMORY(ctdb, info); + + info->ip.addr = vnn->public_address; + info->ip.pnn = vnn->pnn; + info->active_idx = 0xFFFFFFFF; + + i = 0; + for (iface = vnn->ifaces; iface != NULL; iface = iface->next) { + struct ctdb_interface *cur; + + cur = iface->iface; + if (vnn->iface == cur) { + info->active_idx = i; + } + strncpy(info->ifaces[i].name, cur->name, + sizeof(info->ifaces[i].name)); + info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0'; + info->ifaces[i].link_state = cur->link_up; + info->ifaces[i].references = cur->references; + + i++; + } + info->num = i; + len = offsetof(struct ctdb_public_ip_info_old, ifaces) + + i*sizeof(struct ctdb_iface); + + outdata->dsize = len; + outdata->dptr = (uint8_t *)info; + + return 0; +} + +int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA *outdata) +{ + int i, num, len; + struct ctdb_iface_list_old *ifaces; + struct ctdb_interface *cur; + + /* count how many public ip structures we have */ + num = 0; + for (cur=ctdb->ifaces;cur;cur=cur->next) { + num++; + } + + len = offsetof(struct ctdb_iface_list_old, ifaces) + + num*sizeof(struct ctdb_iface); + ifaces = talloc_zero_size(outdata, len); + CTDB_NO_MEMORY(ctdb, ifaces); + + i = 0; + for (cur=ctdb->ifaces;cur;cur=cur->next) { + strncpy(ifaces->ifaces[i].name, cur->name, + sizeof(ifaces->ifaces[i].name)); + ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0'; + ifaces->ifaces[i].link_state = cur->link_up; + ifaces->ifaces[i].references = cur->references; + i++; + } + ifaces->num = i; + len = offsetof(struct ctdb_iface_list_old, ifaces) + + i*sizeof(struct ctdb_iface); + + outdata->dsize = len; + outdata->dptr = (uint8_t *)ifaces; + + return 0; +} + +int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata) +{ + struct ctdb_iface *info; + struct ctdb_interface *iface; + bool link_up = false; + + info = (struct ctdb_iface *)indata.dptr; + + if (info->name[CTDB_IFACE_SIZE] != '\0') { + int len = strnlen(info->name, CTDB_IFACE_SIZE); + DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n", + len, len, info->name)); + return -1; + } + + switch (info->link_state) { + case 0: + link_up = false; + break; + case 1: + link_up = true; + break; + default: + DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n", + (unsigned int)info->link_state)); + return -1; + } + + if (info->references != 0) { + DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n", + (unsigned int)info->references)); + return -1; + } + + iface = ctdb_find_iface(ctdb, info->name); + if (iface == NULL) { + return -1; + } + + if (link_up == iface->link_up) { + return 0; + } + + DEBUG(DEBUG_ERR, + ("iface[%s] has changed it's link status %s => %s\n", + iface->name, + iface->link_up?"up":"down", + link_up?"up":"down")); + + iface->link_up = link_up; + return 0; +} + + +/* + called by a daemon to inform us of the entire list of TCP tickles for + a particular public address. + this control should only be sent by the node that is currently serving + that public address. + */ +int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr; + struct ctdb_tcp_array *tcparray; + struct ctdb_vnn *vnn; + + /* We must at least have tickles.num or else we cant verify the size + of the received data blob + */ + if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) { + DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n")); + return -1; + } + + /* verify that the size of data matches what we expect */ + if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections) + + sizeof(struct ctdb_connection) * list->num) { + DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n")); + return -1; + } + + DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n", + ctdb_addr_to_str(&list->addr))); + + vnn = find_public_ip_vnn(ctdb, &list->addr); + if (vnn == NULL) { + DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", + ctdb_addr_to_str(&list->addr))); + + return 1; + } + + if (vnn->pnn == ctdb->pnn) { + DEBUG(DEBUG_INFO, + ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n", + ctdb_addr_to_str(&list->addr))); + return 0; + } + + /* remove any old ticklelist we might have */ + talloc_free(vnn->tcp_array); + vnn->tcp_array = NULL; + + tcparray = talloc(vnn, struct ctdb_tcp_array); + CTDB_NO_MEMORY(ctdb, tcparray); + + tcparray->num = list->num; + + tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num); + CTDB_NO_MEMORY(ctdb, tcparray->connections); + + memcpy(tcparray->connections, &list->connections[0], + sizeof(struct ctdb_connection)*tcparray->num); + + /* We now have a new fresh tickle list array for this vnn */ + vnn->tcp_array = tcparray; + + return 0; +} + +/* + called to return the full list of tickles for the puclic address associated + with the provided vnn + */ +int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata) +{ + ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr; + struct ctdb_tickle_list_old *list; + struct ctdb_tcp_array *tcparray; + unsigned int num, i; + struct ctdb_vnn *vnn; + unsigned port; + + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", + ctdb_addr_to_str(addr))); + + return 1; + } + + port = ctdb_addr_to_port(addr); + + tcparray = vnn->tcp_array; + num = 0; + if (tcparray != NULL) { + if (port == 0) { + /* All connections */ + num = tcparray->num; + } else { + /* Count connections for port */ + for (i = 0; i < tcparray->num; i++) { + if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) { + num++; + } + } + } + } + + outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections) + + sizeof(struct ctdb_connection) * num; + + outdata->dptr = talloc_size(outdata, outdata->dsize); + CTDB_NO_MEMORY(ctdb, outdata->dptr); + list = (struct ctdb_tickle_list_old *)outdata->dptr; + + list->addr = *addr; + list->num = num; + + if (num == 0) { + return 0; + } + + num = 0; + for (i = 0; i < tcparray->num; i++) { + if (port == 0 || \ + port == ctdb_addr_to_port(&tcparray->connections[i].dst)) { + list->connections[num] = tcparray->connections[i]; + num++; + } + } + + return 0; +} + + +/* + set the list of all tcp tickles for a public address + */ +static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb, + ctdb_sock_addr *addr, + struct ctdb_tcp_array *tcparray) +{ + int ret, num; + TDB_DATA data; + struct ctdb_tickle_list_old *list; + + if (tcparray) { + num = tcparray->num; + } else { + num = 0; + } + + data.dsize = offsetof(struct ctdb_tickle_list_old, connections) + + sizeof(struct ctdb_connection) * num; + data.dptr = talloc_size(ctdb, data.dsize); + CTDB_NO_MEMORY(ctdb, data.dptr); + + list = (struct ctdb_tickle_list_old *)data.dptr; + list->addr = *addr; + list->num = num; + if (tcparray) { + memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num); + } + + ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0, + CTDB_CONTROL_SET_TCP_TICKLE_LIST, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n")); + return -1; + } + + talloc_free(data.dptr); + + return ret; +} + +static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb, + bool force) +{ + struct ctdb_vnn *vnn; + int ret; + + for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) { + /* we only send out updates for public addresses that + we have taken over + */ + if (ctdb->pnn != vnn->pnn) { + continue; + } + + /* We only send out the updates if we need to */ + if (!force && !vnn->tcp_update_needed) { + continue; + } + + ret = ctdb_send_set_tcp_tickles_for_ip(ctdb, + &vnn->public_address, + vnn->tcp_array); + if (ret != 0) { + D_ERR("Failed to send the tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); + vnn->tcp_update_needed = true; + } else { + D_INFO("Sent tickle update for ip %s\n", + ctdb_addr_to_str(&vnn->public_address)); + vnn->tcp_update_needed = false; + } + } + +} + +/* + perform tickle updates if required + */ +static void ctdb_update_tcp_tickles(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type( + private_data, struct ctdb_context); + + ctdb_send_set_tcp_tickles_for_all(ctdb, false); + + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, + timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), + ctdb_update_tcp_tickles, ctdb); +} + +/* + start periodic update of tcp tickles + */ +void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb) +{ + ctdb->tickle_update_context = talloc_new(ctdb); + + tevent_add_timer(ctdb->ev, ctdb->tickle_update_context, + timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), + ctdb_update_tcp_tickles, ctdb); +} + + + + +struct control_gratious_arp { + struct ctdb_context *ctdb; + ctdb_sock_addr addr; + const char *iface; + int count; +}; + +/* + send a control_gratuitous arp + */ +static void send_gratious_arp(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + int ret; + struct control_gratious_arp *arp = talloc_get_type(private_data, + struct control_gratious_arp); + + ret = ctdb_sys_send_arp(&arp->addr, arp->iface); + if (ret != 0) { + DBG_ERR("Failed to send gratuitous ARP on iface %s: %s\n", + arp->iface, strerror(ret)); + } + + + arp->count++; + if (arp->count == CTDB_ARP_REPEAT) { + talloc_free(arp); + return; + } + + tevent_add_timer(arp->ctdb->ev, arp, + timeval_current_ofs(CTDB_ARP_INTERVAL, 0), + send_gratious_arp, arp); +} + + +/* + send a gratious arp + */ +int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr; + struct control_gratious_arp *arp; + + /* verify the size of indata */ + if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) { + DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", + (unsigned)indata.dsize, + (unsigned)offsetof(struct ctdb_addr_info_old, iface))); + return -1; + } + if (indata.dsize != + ( offsetof(struct ctdb_addr_info_old, iface) + + gratious_arp->len ) ){ + + DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes " + "but should be %u bytes\n", + (unsigned)indata.dsize, + (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len))); + return -1; + } + + + arp = talloc(ctdb, struct control_gratious_arp); + CTDB_NO_MEMORY(ctdb, arp); + + arp->ctdb = ctdb; + arp->addr = gratious_arp->addr; + arp->iface = talloc_strdup(arp, gratious_arp->iface); + CTDB_NO_MEMORY(ctdb, arp->iface); + arp->count = 0; + + tevent_add_timer(arp->ctdb->ev, arp, + timeval_zero(), send_gratious_arp, arp); + + return 0; +} + +int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr; + int ret; + + /* verify the size of indata */ + if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) { + DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n")); + return -1; + } + if (indata.dsize != + ( offsetof(struct ctdb_addr_info_old, iface) + + pub->len ) ){ + + DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes " + "but should be %u bytes\n", + (unsigned)indata.dsize, + (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len))); + return -1; + } + + DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr))); + + ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true); + + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n")); + return -1; + } + + return 0; +} + +int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr; + struct ctdb_vnn *vnn; + + /* verify the size of indata */ + if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) { + DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n")); + return -1; + } + if (indata.dsize != + ( offsetof(struct ctdb_addr_info_old, iface) + + pub->len ) ){ + + DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes " + "but should be %u bytes\n", + (unsigned)indata.dsize, + (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len))); + return -1; + } + + DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr))); + + /* walk over all public addresses until we find a match */ + for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { + if (ctdb_same_ip(&vnn->public_address, &pub->addr)) { + if (vnn->pnn == ctdb->pnn) { + /* This IP is currently being hosted. + * Defer the deletion until the next + * takeover run. "ctdb reloadips" will + * always cause a takeover run. "ctdb + * delip" will now need an explicit + * "ctdb ipreallocated" afterwards. */ + vnn->delete_pending = true; + } else { + /* This IP is not hosted on the + * current node so just delete it + * now. */ + do_delete_ip(ctdb, vnn); + } + + return 0; + } + } + + DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n", + ctdb_addr_to_str(&pub->addr))); + return -1; +} + + +struct ipreallocated_callback_state { + struct ctdb_req_control_old *c; +}; + +static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb, + int status, void *p) +{ + struct ipreallocated_callback_state *state = + talloc_get_type(p, struct ipreallocated_callback_state); + + if (status != 0) { + DEBUG(DEBUG_ERR, + (" \"ipreallocated\" event script failed (status %d)\n", + status)); + if (status == -ETIMEDOUT) { + ctdb_ban_self(ctdb); + } + } + + ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); + talloc_free(state); +} + +/* A control to run the ipreallocated event */ +int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + bool *async_reply) +{ + int ret; + struct ipreallocated_callback_state *state; + + state = talloc(ctdb, struct ipreallocated_callback_state); + CTDB_NO_MEMORY(ctdb, state); + + DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n")); + + ret = ctdb_event_script_callback(ctdb, state, + ctdb_ipreallocated_callback, state, + CTDB_EVENT_IPREALLOCATED, + "%s", ""); + + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n")); + talloc_free(state); + return -1; + } + + /* tell the control that we will be reply asynchronously */ + state->c = talloc_steal(state, c); + *async_reply = true; + + return 0; +} + + +struct ctdb_reloadips_handle { + struct ctdb_context *ctdb; + struct ctdb_req_control_old *c; + int status; + int fd[2]; + pid_t child; + struct tevent_fd *fde; +}; + +static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h) +{ + if (h == h->ctdb->reload_ips) { + h->ctdb->reload_ips = NULL; + } + if (h->c != NULL) { + ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL); + h->c = NULL; + } + ctdb_kill(h->ctdb, h->child, SIGKILL); + return 0; +} + +static void ctdb_reloadips_timeout_event(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle); + + talloc_free(h); +} + +static void ctdb_reloadips_child_handler(struct tevent_context *ev, + struct tevent_fd *fde, + uint16_t flags, void *private_data) +{ + struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle); + + char res; + int ret; + + ret = sys_read(h->fd[0], &res, 1); + if (ret < 1 || res != 0) { + DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n")); + res = 1; + } + h->status = res; + + talloc_free(h); +} + +static int ctdb_reloadips_child(struct ctdb_context *ctdb) +{ + TALLOC_CTX *mem_ctx = talloc_new(NULL); + struct ctdb_public_ip_list_old *ips; + struct ctdb_vnn *vnn; + struct client_async_data *async_data; + struct timeval timeout; + TDB_DATA data; + struct ctdb_client_control_state *state; + bool first_add; + unsigned int i; + int ret; + + CTDB_NO_MEMORY(ctdb, mem_ctx); + + /* Read IPs from local node */ + ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), + CTDB_CURRENT_NODE, mem_ctx, &ips); + if (ret != 0) { + DEBUG(DEBUG_ERR, + ("Unable to fetch public IPs from local node\n")); + talloc_free(mem_ctx); + return -1; + } + + /* Read IPs file - this is safe since this is a child process */ + ctdb->vnn = NULL; + if (ctdb_set_public_addresses(ctdb, false) != 0) { + DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n")); + talloc_free(mem_ctx); + return -1; + } + + async_data = talloc_zero(mem_ctx, struct client_async_data); + CTDB_NO_MEMORY(ctdb, async_data); + + /* Compare IPs between node and file for IPs to be deleted */ + for (i = 0; i < ips->num; i++) { + /* */ + for (vnn = ctdb->vnn; vnn; vnn = vnn->next) { + if (ctdb_same_ip(&vnn->public_address, + &ips->ips[i].addr)) { + /* IP is still in file */ + break; + } + } + + if (vnn == NULL) { + /* Delete IP ips->ips[i] */ + struct ctdb_addr_info_old *pub; + + DEBUG(DEBUG_NOTICE, + ("IP %s no longer configured, deleting it\n", + ctdb_addr_to_str(&ips->ips[i].addr))); + + pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old); + CTDB_NO_MEMORY(ctdb, pub); + + pub->addr = ips->ips[i].addr; + pub->mask = 0; + pub->len = 0; + + timeout = TAKEOVER_TIMEOUT(); + + data.dsize = offsetof(struct ctdb_addr_info_old, + iface) + pub->len; + data.dptr = (uint8_t *)pub; + + state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0, + CTDB_CONTROL_DEL_PUBLIC_IP, + 0, data, async_data, + &timeout, NULL); + if (state == NULL) { + DEBUG(DEBUG_ERR, + (__location__ + " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n")); + goto failed; + } + + ctdb_client_async_add(async_data, state); + } + } + + /* Compare IPs between node and file for IPs to be added */ + first_add = true; + for (vnn = ctdb->vnn; vnn; vnn = vnn->next) { + for (i = 0; i < ips->num; i++) { + if (ctdb_same_ip(&vnn->public_address, + &ips->ips[i].addr)) { + /* IP already on node */ + break; + } + } + if (i == ips->num) { + /* Add IP ips->ips[i] */ + struct ctdb_addr_info_old *pub; + const char *ifaces = NULL; + uint32_t len; + struct vnn_interface *iface = NULL; + + DEBUG(DEBUG_NOTICE, + ("New IP %s configured, adding it\n", + ctdb_addr_to_str(&vnn->public_address))); + if (first_add) { + uint32_t pnn = ctdb_get_pnn(ctdb); + + data.dsize = sizeof(pnn); + data.dptr = (uint8_t *)&pnn; + + ret = ctdb_client_send_message( + ctdb, + CTDB_BROADCAST_CONNECTED, + CTDB_SRVID_REBALANCE_NODE, + data); + if (ret != 0) { + DEBUG(DEBUG_WARNING, + ("Failed to send message to force node reallocation - IPs may be unbalanced\n")); + } + + first_add = false; + } + + ifaces = vnn->ifaces->iface->name; + iface = vnn->ifaces->next; + while (iface != NULL) { + ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, + iface->iface->name); + iface = iface->next; + } + + len = strlen(ifaces) + 1; + pub = talloc_zero_size(mem_ctx, + offsetof(struct ctdb_addr_info_old, iface) + len); + CTDB_NO_MEMORY(ctdb, pub); + + pub->addr = vnn->public_address; + pub->mask = vnn->public_netmask_bits; + pub->len = len; + memcpy(&pub->iface[0], ifaces, pub->len); + + timeout = TAKEOVER_TIMEOUT(); + + data.dsize = offsetof(struct ctdb_addr_info_old, + iface) + pub->len; + data.dptr = (uint8_t *)pub; + + state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0, + CTDB_CONTROL_ADD_PUBLIC_IP, + 0, data, async_data, + &timeout, NULL); + if (state == NULL) { + DEBUG(DEBUG_ERR, + (__location__ + " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n")); + goto failed; + } + + ctdb_client_async_add(async_data, state); + } + } + + if (ctdb_client_async_wait(ctdb, async_data) != 0) { + DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n")); + goto failed; + } + + talloc_free(mem_ctx); + return 0; + +failed: + talloc_free(mem_ctx); + return -1; +} + +/* This control is sent to force the node to re-read the public addresses file + and drop any addresses we should nnot longer host, and add new addresses + that we are now able to host +*/ +int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply) +{ + struct ctdb_reloadips_handle *h; + pid_t parent = getpid(); + + if (ctdb->reload_ips != NULL) { + talloc_free(ctdb->reload_ips); + ctdb->reload_ips = NULL; + } + + h = talloc(ctdb, struct ctdb_reloadips_handle); + CTDB_NO_MEMORY(ctdb, h); + h->ctdb = ctdb; + h->c = NULL; + h->status = -1; + + if (pipe(h->fd) == -1) { + DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n")); + talloc_free(h); + return -1; + } + + h->child = ctdb_fork(ctdb); + if (h->child == (pid_t)-1) { + DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n")); + close(h->fd[0]); + close(h->fd[1]); + talloc_free(h); + return -1; + } + + /* child process */ + if (h->child == 0) { + signed char res = 0; + + close(h->fd[0]); + + prctl_set_comment("ctdb_reloadips"); + if (switch_from_server_to_client(ctdb) != 0) { + DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n")); + res = -1; + } else { + res = ctdb_reloadips_child(ctdb); + if (res != 0) { + DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n")); + } + } + + sys_write(h->fd[1], &res, 1); + ctdb_wait_for_process_to_exit(parent); + _exit(0); + } + + h->c = talloc_steal(h, c); + + close(h->fd[1]); + set_close_on_exec(h->fd[0]); + + talloc_set_destructor(h, ctdb_reloadips_destructor); + + + h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ, + ctdb_reloadips_child_handler, (void *)h); + tevent_fd_set_auto_close(h->fde); + + tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0), + ctdb_reloadips_timeout_event, h); + + /* we reply later */ + *async_reply = true; + return 0; +} |