summaryrefslogtreecommitdiffstats
path: root/ctdb/server/ipalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'ctdb/server/ipalloc.c')
-rw-r--r--ctdb/server/ipalloc.c284
1 files changed, 284 insertions, 0 deletions
diff --git a/ctdb/server/ipalloc.c b/ctdb/server/ipalloc.c
new file mode 100644
index 0000000..7f49364
--- /dev/null
+++ b/ctdb/server/ipalloc.c
@@ -0,0 +1,284 @@
+/*
+ ctdb ip takeover code
+
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+
+#include <talloc.h>
+
+#include "lib/util/debug.h"
+
+#include "common/logging.h"
+#include "common/rb_tree.h"
+
+#include "protocol/protocol_util.h"
+
+#include "server/ipalloc_private.h"
+
+/* Initialise main ipalloc state and sub-structures */
+struct ipalloc_state *
+ipalloc_state_init(TALLOC_CTX *mem_ctx,
+ uint32_t num_nodes,
+ enum ipalloc_algorithm algorithm,
+ bool no_ip_takeover,
+ bool no_ip_failback,
+ uint32_t *force_rebalance_nodes)
+{
+ struct ipalloc_state *ipalloc_state =
+ talloc_zero(mem_ctx, struct ipalloc_state);
+ if (ipalloc_state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ return NULL;
+ }
+
+ ipalloc_state->num = num_nodes;
+
+ ipalloc_state->algorithm = algorithm;
+ ipalloc_state->no_ip_takeover = no_ip_takeover;
+ ipalloc_state->no_ip_failback = no_ip_failback;
+ ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
+
+ return ipalloc_state;
+}
+
+static void *add_ip_callback(void *parm, void *data)
+{
+ struct public_ip_list *this_ip = parm;
+ struct public_ip_list *prev_ip = data;
+
+ if (prev_ip == NULL) {
+ return parm;
+ }
+ if (this_ip->pnn == CTDB_UNKNOWN_PNN) {
+ this_ip->pnn = prev_ip->pnn;
+ }
+
+ return parm;
+}
+
+static int getips_count_callback(void *param, void *data)
+{
+ struct public_ip_list **ip_list = (struct public_ip_list **)param;
+ struct public_ip_list *new_ip = (struct public_ip_list *)data;
+
+ new_ip->next = *ip_list;
+ *ip_list = new_ip;
+ return 0;
+}
+
+/* Nodes only know about those public addresses that they are
+ * configured to serve and no individual node has a full list of all
+ * public addresses configured across the cluster. Therefore, a
+ * merged list of all public addresses needs to be built so that IP
+ * allocation can be done. */
+static struct public_ip_list *
+create_merged_ip_list(struct ipalloc_state *ipalloc_state)
+{
+ unsigned int i, j;
+ struct public_ip_list *ip_list;
+ struct ctdb_public_ip_list *public_ips;
+ struct trbt_tree *ip_tree;
+ int ret;
+
+ ip_tree = trbt_create(ipalloc_state, 0);
+
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
+ return NULL;
+ }
+
+ for (i=0; i < ipalloc_state->num; i++) {
+
+ public_ips = &ipalloc_state->known_public_ips[i];
+
+ for (j=0; j < public_ips->num; j++) {
+ struct public_ip_list *tmp_ip;
+
+ /* This is returned as part of ip_list */
+ tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
+ if (tmp_ip == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " out of memory\n"));
+ talloc_free(ip_tree);
+ return NULL;
+ }
+
+ /* Do not use information about IP addresses hosted
+ * on other nodes, it may not be accurate */
+ if (public_ips->ip[j].pnn == i) {
+ tmp_ip->pnn = public_ips->ip[j].pnn;
+ } else {
+ tmp_ip->pnn = CTDB_UNKNOWN_PNN;
+ }
+ tmp_ip->addr = public_ips->ip[j].addr;
+ tmp_ip->next = NULL;
+
+ trbt_insertarray32_callback(ip_tree,
+ IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
+ add_ip_callback,
+ tmp_ip);
+ }
+ }
+
+ ip_list = NULL;
+ ret = trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
+ if (ret != 0) {
+ DBG_ERR("Error traversing the IP tree.\n");
+ }
+
+ talloc_free(ip_tree);
+
+ return ip_list;
+}
+
+static bool populate_bitmap(struct ipalloc_state *ipalloc_state)
+{
+ struct public_ip_list *ip = NULL;
+ unsigned int i, j;
+
+ for (ip = ipalloc_state->all_ips; ip != NULL; ip = ip->next) {
+
+ ip->known_on = bitmap_talloc(ip, ipalloc_state->num);
+ if (ip->known_on == NULL) {
+ return false;
+ }
+
+ ip->available_on = bitmap_talloc(ip, ipalloc_state->num);
+ if (ip->available_on == NULL) {
+ return false;
+ }
+
+ for (i = 0; i < ipalloc_state->num; i++) {
+ struct ctdb_public_ip_list *known =
+ &ipalloc_state->known_public_ips[i];
+ struct ctdb_public_ip_list *avail =
+ &ipalloc_state->available_public_ips[i];
+
+ /* Check to see if "ip" is available on node "i" */
+ for (j = 0; j < avail->num; j++) {
+ if (ctdb_sock_addr_same_ip(
+ &ip->addr, &avail->ip[j].addr)) {
+ bitmap_set(ip->available_on, i);
+ break;
+ }
+ }
+
+ /* Optimisation: available => known */
+ if (bitmap_query(ip->available_on, i)) {
+ bitmap_set(ip->known_on, i);
+ continue;
+ }
+
+ /* Check to see if "ip" is known on node "i" */
+ for (j = 0; j < known->num; j++) {
+ if (ctdb_sock_addr_same_ip(
+ &ip->addr, &known->ip[j].addr)) {
+ bitmap_set(ip->known_on, i);
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
+ struct ctdb_public_ip_list *known_ips,
+ struct ctdb_public_ip_list *available_ips)
+{
+ ipalloc_state->available_public_ips = available_ips;
+ ipalloc_state->known_public_ips = known_ips;
+}
+
+/* This can only return false if there are no available IPs *and*
+ * there are no IP addresses currently allocated. If the latter is
+ * true then the cluster can clearly host IPs... just not necessarily
+ * right now... */
+bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state)
+{
+ unsigned int i;
+ bool have_ips = false;
+
+ for (i=0; i < ipalloc_state->num; i++) {
+ struct ctdb_public_ip_list *ips =
+ ipalloc_state->known_public_ips;
+ if (ips[i].num != 0) {
+ unsigned int j;
+ have_ips = true;
+ /* Succeed if an address is hosted on node i */
+ for (j=0; j < ips[i].num; j++) {
+ if (ips[i].ip[j].pnn == i) {
+ return true;
+ }
+ }
+ }
+ }
+
+ if (! have_ips) {
+ return false;
+ }
+
+ /* At this point there are known addresses but none are
+ * hosted. Need to check if cluster can now host some
+ * addresses.
+ */
+ for (i=0; i < ipalloc_state->num; i++) {
+ if (ipalloc_state->available_public_ips[i].num != 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* The calculation part of the IP allocation algorithm. */
+struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state)
+{
+ bool ret = false;
+
+ ipalloc_state->all_ips = create_merged_ip_list(ipalloc_state);
+ if (ipalloc_state->all_ips == NULL) {
+ return NULL;
+ }
+
+ if (!populate_bitmap(ipalloc_state)) {
+ return NULL;
+ }
+
+ switch (ipalloc_state->algorithm) {
+ case IPALLOC_LCP2:
+ ret = ipalloc_lcp2(ipalloc_state);
+ break;
+ case IPALLOC_DETERMINISTIC:
+ ret = ipalloc_deterministic(ipalloc_state);
+ break;
+ case IPALLOC_NONDETERMINISTIC:
+ ret = ipalloc_nondeterministic(ipalloc_state);
+ break;
+ }
+
+ /* at this point ->pnn is the node which will own each IP
+ or CTDB_UNKNOWN_PNN if there is no node that can cover this ip
+ */
+
+ return (ret ? ipalloc_state->all_ips : NULL);
+}