summaryrefslogtreecommitdiff
path: root/ctdb/server/ctdb_takeover_helper.c
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2016-11-10 16:47:38 +1100
committerAmitay Isaacs <amitay@samba.org>2016-12-19 04:07:08 +0100
commit8d9b41d42772e8d9fb56548f06f0602d926db6d2 (patch)
treeff16c5a1741c61df963610d51484bf730d19506d /ctdb/server/ctdb_takeover_helper.c
parent605347faf6fc8f05f945b38a0d351efe378a18d2 (diff)
downloadsamba-8d9b41d42772e8d9fb56548f06f0602d926db6d2.tar.gz
ctdb-takeover: Add takeover helper
Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb/server/ctdb_takeover_helper.c')
-rw-r--r--ctdb/server/ctdb_takeover_helper.c1206
1 files changed, 1206 insertions, 0 deletions
diff --git a/ctdb/server/ctdb_takeover_helper.c b/ctdb/server/ctdb_takeover_helper.c
new file mode 100644
index 00000000000..847a49d2734
--- /dev/null
+++ b/ctdb/server/ctdb_takeover_helper.c
@@ -0,0 +1,1206 @@
+/*
+ CTDB IP takeover helper
+
+ Copyright (C) Martin Schwenke 2016
+
+ Based on ctdb_recovery_helper.c
+ Copyright (C) Amitay Isaacs 2015
+
+ and ctdb_takeover.c
+ Copyright (C) Ronnie Sahlberg 2007
+ Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Martin Schwenke 2011
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "replace.h"
+#include "system/network.h"
+#include "system/filesys.h"
+
+#include <popt.h>
+#include <talloc.h>
+#include <tevent.h>
+
+#include "lib/util/debug.h"
+#include "lib/util/strv.h"
+#include "lib/util/strv_util.h"
+#include "lib/util/sys_rw.h"
+#include "lib/util/time.h"
+#include "lib/util/tevent_unix.h"
+
+#include "protocol/protocol.h"
+#include "protocol/protocol_api.h"
+#include "client/client.h"
+
+#include "common/logging.h"
+
+#include "server/ipalloc.h"
+
+static int takeover_timeout = 9;
+
+#define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
+
+/*
+ * Utility functions
+ */
+
+static bool generic_recv(struct tevent_req *req, int *perr)
+{
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static enum ipalloc_algorithm
+determine_algorithm(const struct ctdb_tunable_list *tunables)
+{
+ switch (tunables->ip_alloc_algorithm) {
+ case 0:
+ return IPALLOC_DETERMINISTIC;
+ case 1:
+ return IPALLOC_NONDETERMINISTIC;
+ case 2:
+ return IPALLOC_LCP2;
+ default:
+ return IPALLOC_LCP2;
+ };
+}
+
+/**********************************************************************/
+
+struct get_public_ips_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ uint32_t *pnns;
+ int count;
+ struct ctdb_public_ip_list *ips;
+};
+
+static void get_public_ips_done(struct tevent_req *subreq);
+
+static struct tevent_req *get_public_ips_send(
+ TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count,
+ bool available_only)
+{
+ struct tevent_req *req, *subreq;
+ struct get_public_ips_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
+ if (req == NULL) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->pnns = pnns;
+ state->count = count;
+ state->ips = NULL;
+
+ ctdb_req_control_get_public_ips(&request, available_only);
+ subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
+ state->pnns,
+ state->count,
+ TIMEOUT(), &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, get_public_ips_done, req);
+
+ return req;
+}
+
+static void get_public_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct get_public_ips_state *state = tevent_req_data(
+ req, struct get_public_ips_state);
+ struct ctdb_reply_control **reply;
+ int *err_list;
+ int ret, i;
+ bool status;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
+ &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ int ret2;
+ uint32_t pnn;
+
+ ret2 = ctdb_client_control_multi_error(state->pnns,
+ state->count,
+ err_list, &pnn);
+ if (ret2 != 0) {
+ D_ERR("control GET_PUBLIC_IPS failed on "
+ "node %u, ret=%d\n", pnn, ret2);
+ } else {
+ D_ERR("control GET_PUBLIC_IPS failed, "
+ "ret=%d\n", ret);
+ }
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->ips = talloc_zero_array(state, struct ctdb_public_ip_list,
+ state->count);
+ if (tevent_req_nomem(state->ips, req)) {
+ return;
+ }
+
+ for (i = 0; i < state->count; i++) {
+ uint32_t pnn;
+ struct ctdb_public_ip_list *ips;
+
+ pnn = state->pnns[i];
+ ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
+ &ips);
+ if (ret != 0) {
+ D_ERR("control GET_PUBLIC_IPS failed on "
+ "node %u\n", pnn);
+ tevent_req_error(req, EIO);
+ return;
+ }
+ state->ips[pnn] = *ips;
+ }
+
+ talloc_free(reply);
+
+ tevent_req_done(req);
+}
+
+static bool get_public_ips_recv(struct tevent_req *req, int *perr,
+ TALLOC_CTX *mem_ctx,
+ struct ctdb_public_ip_list **ips)
+{
+ struct get_public_ips_state *state = tevent_req_data(
+ req, struct get_public_ips_state);
+ int err;
+
+ if (tevent_req_is_unix_error(req, &err)) {
+ if (perr != NULL) {
+ *perr = err;
+ }
+ return false;
+ }
+
+ *ips = talloc_steal(mem_ctx, state->ips);
+
+ return true;
+}
+
+/**********************************************************************/
+
+struct release_ip_state {
+ int num_sent;
+ int num_replies;
+ int num_fails;
+ int err_any;
+ uint32_t *ban_credits;
+};
+
+struct release_ip_one_state {
+ struct tevent_req *req;
+ uint32_t *pnns;
+ int count;
+ const char *ip_str;
+};
+
+static void release_ip_done(struct tevent_req *subreq);
+
+static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count,
+ struct timeval timeout,
+ struct public_ip_list *all_ips,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct release_ip_state *state;
+ struct ctdb_req_control request;
+ struct public_ip_list *tmp_ip;
+
+ req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->num_sent = 0;
+ state->num_replies = 0;
+ state->num_fails = 0;
+ state->ban_credits = ban_credits;
+
+ /* Send a RELEASE_IP to all nodes that should not be hosting
+ * each IP. For each IP, all but one of these will be
+ * redundant. However, the redundant ones are used to tell
+ * nodes which node should be hosting the IP so that commands
+ * like "ctdb ip" can display a particular nodes idea of who
+ * is hosting what. */
+ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
+ struct release_ip_one_state *substate;
+ struct ctdb_public_ip ip;
+ int i;
+
+ substate = talloc_zero(state, struct release_ip_one_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->pnns = talloc_zero_array(substate, uint32_t, count);
+ if (tevent_req_nomem(substate->pnns, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->count = 0;
+ substate->req = req;
+
+ substate->ip_str = ctdb_sock_addr_to_string(substate,
+ &tmp_ip->addr);
+ if (tevent_req_nomem(substate->ip_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ for (i = 0; i < count; i++) {
+ uint32_t pnn = pnns[i];
+ /* If pnn is not the node that should be
+ * hosting the IP then add it to the list of
+ * nodes that need to do a release. */
+ if (tmp_ip->pnn != pnn) {
+ substate->pnns[substate->count] = pnn;
+ substate->count++;
+ }
+ }
+
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ ctdb_req_control_release_ip(&request, &ip);
+ subreq = ctdb_client_control_multi_send(state, ev, client,
+ substate->pnns,
+ substate->count,
+ timeout,/* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, release_ip_done, substate);
+
+ state->num_sent++;
+ }
+
+ return req;
+}
+
+static void release_ip_done(struct tevent_req *subreq)
+{
+ struct release_ip_one_state *substate = tevent_req_callback_data(
+ subreq, struct release_ip_one_state);
+ struct tevent_req *req = substate->req;
+ struct release_ip_state *state = tevent_req_data(
+ req, struct release_ip_state);
+ int ret, i;
+ int *err_list;
+ bool status, found_errors;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, NULL);
+ TALLOC_FREE(subreq);
+
+ if (status) {
+ D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
+ substate->ip_str, substate->count);
+ goto done;
+ }
+
+ /* Get some clear error messages out of err_list and count
+ * banning credits
+ */
+ found_errors = false;
+ for (i = 0; i < substate->count; i++) {
+ int err = err_list[i];
+ if (err != 0) {
+ uint32_t pnn = substate->pnns[i];
+
+ D_ERR("RELEASE_IP %s failed on node %u, "
+ "ret=%d\n", substate->ip_str, pnn, err);
+
+ state->ban_credits[pnn]++;
+ state->err_any = err;
+ found_errors = true;
+ }
+ }
+ if (! found_errors) {
+ D_ERR("RELEASE_IP %s internal error, ret=%d\n",
+ substate->ip_str, ret);
+ state->err_any = EIO;
+ }
+
+ state->num_fails++;
+
+done:
+ talloc_free(substate);
+
+ state->num_replies++;
+
+ if (state->num_replies < state->num_sent) {
+ /* Not all replies received, don't go further */
+ return;
+ }
+
+ if (state->num_fails > 0) {
+ tevent_req_error(req, state->err_any);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool release_ip_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+struct take_ip_state {
+ int num_sent;
+ int num_replies;
+ int num_fails;
+ int err_any;
+ uint32_t *ban_credits;
+};
+
+struct take_ip_one_state {
+ struct tevent_req *req;
+ uint32_t pnn;
+ const char *ip_str;
+};
+
+static void take_ip_done(struct tevent_req *subreq);
+
+static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ struct timeval timeout,
+ struct public_ip_list *all_ips,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct take_ip_state *state;
+ struct ctdb_req_control request;
+ struct public_ip_list *tmp_ip;
+
+ req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->num_sent = 0;
+ state->num_replies = 0;
+ state->num_fails = 0;
+ state->ban_credits = ban_credits;
+
+ /* For each IP, send a TAKOVER_IP to the node that should be
+ * hosting it. Many of these will often be redundant (since
+ * the allocation won't have changed) but they can be useful
+ * to recover from inconsistencies. */
+ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
+ struct take_ip_one_state *substate;
+ struct ctdb_public_ip ip;
+
+ if (tmp_ip->pnn == -1) {
+ /* IP will be unassigned */
+ continue;
+ }
+
+ substate = talloc_zero(state, struct take_ip_one_state);
+ if (tevent_req_nomem(substate, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ substate->req = req;
+ substate->pnn = tmp_ip->pnn;
+
+ substate->ip_str = ctdb_sock_addr_to_string(substate,
+ &tmp_ip->addr);
+ if (tevent_req_nomem(substate->ip_str, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ ip.pnn = tmp_ip->pnn;
+ ip.addr = tmp_ip->addr;
+ ctdb_req_control_takeover_ip(&request, &ip);
+ subreq = ctdb_client_control_send(
+ state, ev, client, tmp_ip->pnn,
+ timeout, /* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, take_ip_done, substate);
+
+ state->num_sent++;
+ }
+
+ /* None sent, finished... */
+ if (state->num_sent == 0) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ return req;
+}
+
+static void take_ip_done(struct tevent_req *subreq)
+{
+ struct take_ip_one_state *substate = tevent_req_callback_data(
+ subreq, struct take_ip_one_state);
+ struct tevent_req *req = substate->req;
+ struct ctdb_reply_control *reply;
+ struct take_ip_state *state = tevent_req_data(
+ req, struct take_ip_state);
+ int ret = 0;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
+ substate->ip_str, substate->pnn, ret);
+ goto fail;
+ }
+
+ ret = ctdb_reply_control_takeover_ip(reply);
+ if (ret != 0) {
+ D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
+ substate->ip_str, substate->pnn, ret);
+ goto fail;
+ }
+
+ D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
+ substate->ip_str, substate->pnn);
+ goto done;
+
+fail:
+ state->ban_credits[substate->pnn]++;
+ state->num_fails++;
+ state->err_any = ret;
+
+done:
+ talloc_free(substate);
+
+ state->num_replies++;
+
+ if (state->num_replies < state->num_sent) {
+ /* Not all replies received, don't go further */
+ return;
+ }
+
+ if (state->num_fails > 0) {
+ tevent_req_error(req, state->err_any);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+static bool take_ip_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+struct ipreallocated_state {
+ uint32_t *pnns;
+ int count;
+ uint32_t *ban_credits;
+};
+
+static void ipreallocated_done(struct tevent_req *subreq);
+
+static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *pnns,
+ int count,
+ struct timeval timeout,
+ uint32_t *ban_credits)
+{
+ struct tevent_req *req, *subreq;
+ struct ipreallocated_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->pnns = pnns;
+ state->count = count;
+ state->ban_credits = ban_credits;
+
+ ctdb_req_control_ipreallocated(&request);
+ subreq = ctdb_client_control_multi_send(state, ev, client,
+ pnns, count,
+ timeout, /* cumulative */
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, ipreallocated_done, req);
+
+ return req;
+}
+
+static void ipreallocated_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct ipreallocated_state *state = tevent_req_data(
+ req, struct ipreallocated_state);
+ int *err_list = NULL;
+ int ret, i;
+ bool status, found_errors;
+
+ status = ctdb_client_control_multi_recv(subreq, &ret, state,
+ &err_list, NULL);
+ TALLOC_FREE(subreq);
+
+ if (status) {
+ D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
+ tevent_req_done(req);
+ return;
+ }
+
+ /* Get some clear error messages out of err_list and count
+ * banning credits
+ */
+ found_errors = false;
+ for (i = 0; i < state->count; i++) {
+ int err = err_list[i];
+ if (err != 0) {
+ uint32_t pnn = state->pnns[i];
+
+ D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
+ pnn, err);
+
+ state->ban_credits[pnn]++;
+ found_errors = true;
+ }
+ }
+
+ if (! found_errors) {
+ D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
+ }
+
+ tevent_req_error(req, ret);
+}
+
+static bool ipreallocated_recv(struct tevent_req *req, int *perr)
+{
+ return generic_recv(req, perr);
+}
+
+/**********************************************************************/
+
+/*
+ * Recalculate the allocation of public IPs to nodes and have the
+ * nodes host their allocated addresses.
+ *
+ * - Get tunables
+ * - Get nodemap
+ * - Initialise IP allocation state. Pass:
+ * + algorithm to be used;
+ * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
+ * + list of nodes to force rebalance (internal structure, currently
+ * no way to fetch, only used by LCP2 for nodes that have had new
+ * IP addresses added).
+ * - Set IP flags for IP allocation based on node map
+ * - Retrieve known and available IP addresses (done separately so
+ * values can be faked in unit testing)
+ * - Use ipalloc_set_public_ips() to set known and available IP
+ * addresses for allocation
+ * - If cluster can't host IP addresses then jump to IPREALLOCATED
+ * - Run IP allocation algorithm
+ * - Send RELEASE_IP to all nodes for IPs they should not host
+ * - Send TAKE_IP to all nodes for IPs they should host
+ * - Send IPREALLOCATED to all nodes
+ */
+
+struct takeover_state {
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ struct timeval timeout;
+ int num_nodes;
+ uint32_t *pnns_connected;
+ int num_connected;
+ uint32_t *pnns_active;
+ int num_active;
+ uint32_t destnode;
+ uint32_t *force_rebalance_nodes;
+ struct ctdb_tunable_list *tun_list;
+ struct ipalloc_state *ipalloc_state;
+ struct ctdb_public_ip_list *known_ips;
+ struct public_ip_list *all_ips;
+ uint32_t *ban_credits;
+};
+
+static void takeover_tunables_done(struct tevent_req *subreq);
+static void takeover_nodemap_done(struct tevent_req *subreq);
+static void takeover_known_ips_done(struct tevent_req *subreq);
+static void takeover_avail_ips_done(struct tevent_req *subreq);
+static void takeover_release_ip_done(struct tevent_req *subreq);
+static void takeover_take_ip_done(struct tevent_req *subreq);
+static void takeover_ipreallocated(struct tevent_req *req);
+static void takeover_ipreallocated_done(struct tevent_req *subreq);
+static void takeover_failed(struct tevent_req *subreq, int ret);
+static void takeover_failed_done(struct tevent_req *subreq);
+
+static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct ctdb_client_context *client,
+ uint32_t *force_rebalance_nodes)
+{
+ struct tevent_req *req, *subreq;
+ struct takeover_state *state;
+ struct ctdb_req_control request;
+
+ req = tevent_req_create(mem_ctx, &state, struct takeover_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->client = client;
+ state->force_rebalance_nodes = force_rebalance_nodes;
+ state->destnode = ctdb_client_pnn(client);
+
+ ctdb_req_control_get_all_tunables(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, takeover_tunables_done, req);
+
+ return req;
+}
+
+static void takeover_tunables_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct ctdb_reply_control *reply;
+ struct ctdb_req_control request;
+ int ret;
+ bool status;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_all_tunables(reply, state,
+ &state->tun_list);
+ if (ret != 0) {
+ D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ talloc_free(reply);
+
+ takeover_timeout = state->tun_list->takeover_timeout;
+
+ ctdb_req_control_get_nodemap(&request);
+ subreq = ctdb_client_control_send(state, state->ev, state->client,
+ state->destnode, TIMEOUT(),
+ &request);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_nodemap_done, req);
+}
+
+static void takeover_nodemap_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct ctdb_reply_control *reply;
+ bool status;
+ int ret;
+ struct ctdb_node_map *nodemap;
+
+ status = ctdb_client_control_recv(subreq, &ret, state, &reply);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
+ state->destnode, ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
+ if (ret != 0) {
+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ state->num_nodes = nodemap->num;
+
+ state->num_connected = list_of_connected_nodes(nodemap,
+ CTDB_UNKNOWN_PNN, state,
+ &state->pnns_connected);
+ if (state->num_connected <= 0) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ state->num_active = list_of_active_nodes(nodemap,
+ CTDB_UNKNOWN_PNN, state,
+ &state->pnns_active);
+ if (state->num_active <= 0) {
+ tevent_req_error(req, ENOMEM);
+ return;
+ }
+
+ /* Default timeout for early jump to IPREALLOCATED. See below
+ * for explanation of 3 times...
+ */
+ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
+
+ state->ban_credits = talloc_zero_array(state, uint32_t,
+ state->num_nodes);
+ if (tevent_req_nomem(state->ban_credits, req)) {
+ return;
+ }
+
+ if (state->tun_list->disable_ip_failover != 0) {
+ /* IP failover is completely disabled so just send out
+ * ipreallocated event.
+ */
+ takeover_ipreallocated(req);
+ return;
+ }
+
+ state->ipalloc_state =
+ ipalloc_state_init(
+ state, state->num_nodes,
+ determine_algorithm(state->tun_list),
+ (state->tun_list->no_ip_takeover != 0),
+ (state->tun_list->no_ip_failback != 0),
+ (state->tun_list->no_ip_host_on_all_disabled != 0),
+ state->force_rebalance_nodes);
+ if (tevent_req_nomem(state->ipalloc_state, req)) {
+ return;
+ }
+
+ ipalloc_set_node_flags(state->ipalloc_state, nodemap);
+
+ subreq = get_public_ips_send(state, state->ev, state->client,
+ state->pnns_active, state->num_active,
+ false);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, takeover_known_ips_done, req);
+}
+
+static void takeover_known_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ int ret;
+ bool status;
+
+ status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("Failed to fetch known public IPs\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ subreq = get_public_ips_send(state, state->ev, state->client,
+ state->pnns_active, state->num_active,
+ true);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+
+ tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
+}
+
+static void takeover_avail_ips_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ bool status;
+ int ret;
+ struct ctdb_public_ip_list *available_ips;
+
+ status = get_public_ips_recv(subreq, &ret, state, &available_ips);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ D_ERR("Failed to fetch available public IPs\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ ipalloc_set_public_ips(state->ipalloc_state,
+ state->known_ips, available_ips);
+
+ if (! ipalloc_can_host_ips(state->ipalloc_state)) {
+ D_NOTICE("No nodes available to host public IPs yet\n");
+ takeover_ipreallocated(req);
+ return;
+ }
+
+ /* Do the IP reassignment calculations */
+ state->all_ips = ipalloc(state->ipalloc_state);
+ if (tevent_req_nomem(state->all_ips, req)) {
+ return;
+ }
+
+ /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
+ * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
+ * seconds. However, RELEASE_IP can take longer due to TCP
+ * connection killing, so sometimes needs more time.
+ * Therefore, use a cumulative timeout of TakeoverTimeout * 3
+ * seconds across all 3 stages. No explicit expiry checks are
+ * needed before each stage because tevent is smart enough to
+ * fire the timeouts even if they are in the past. Initialise
+ * this here so it explicitly covers the stages we're
+ * interested in but, in particular, not the time taken by the
+ * ipalloc().
+ */
+ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
+
+ subreq = release_ip_send(state, state->ev, state->client,
+ state->pnns_connected, state->num_connected,
+ state->timeout, state->all_ips,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_release_ip_done, req);
+}
+
+static void takeover_release_ip_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ int ret;
+ bool status;
+
+ status = release_ip_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ /* All released, now for takeovers */
+
+ subreq = take_ip_send(state, state->ev, state->client,
+ state->timeout, state->all_ips,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_take_ip_done, req);
+}
+
+static void takeover_take_ip_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret = 0;
+ bool status;
+
+ status = take_ip_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ takeover_ipreallocated(req);
+}
+
+static void takeover_ipreallocated(struct tevent_req *req)
+{
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct tevent_req *subreq;
+
+ subreq = ipreallocated_send(state, state->ev, state->client,
+ state->pnns_connected,
+ state->num_connected,
+ state->timeout,
+ state->ban_credits);
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
+}
+
+static void takeover_ipreallocated_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ int ret;
+ bool status;
+
+ status = ipreallocated_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+
+ if (! status) {
+ takeover_failed(req, ret);
+ return;
+ }
+
+ tevent_req_done(req);
+}
+
+struct takeover_failed_state {
+ struct tevent_req *req;
+ int ret;
+};
+
+void takeover_failed(struct tevent_req *req, int ret)
+{
+ struct takeover_state *state = tevent_req_data(
+ req, struct takeover_state);
+ struct tevent_req *subreq;
+ uint32_t max_pnn = CTDB_UNKNOWN_PNN;
+ int max_credits = 0;
+ int pnn;
+
+ /* Check that bans are enabled */
+ if (state->tun_list->enable_bans == 0) {
+ tevent_req_error(req, ret);
+ return;
+ }
+
+ for (pnn = 0; pnn < state->num_nodes; pnn++) {
+ if (state->ban_credits[pnn] > max_credits) {
+ max_pnn = pnn;
+ max_credits = state->ban_credits[pnn];
+ }
+ }
+
+ if (max_credits > 0) {
+ struct ctdb_req_message message;
+ struct takeover_failed_state *substate;
+
+ D_WARNING("Assigning banning credits to node %u\n", max_pnn);
+
+ substate = talloc_zero(state, struct takeover_failed_state);
+ if (tevent_req_nomem(substate, req)) {
+ return;
+ }
+ substate->req = req;
+ substate->ret = ret;
+
+ message.srvid = CTDB_SRVID_BANNING;
+ message.data.pnn = max_pnn;
+
+ subreq = ctdb_client_message_send(
+ state, state->ev, state->client,
+ ctdb_client_pnn(state->client),
+ &message);
+ if (subreq == NULL) {
+ D_ERR("failed to assign banning credits\n");
+ tevent_req_error(req, ret);
+ return;
+ }
+ tevent_req_set_callback(subreq, takeover_failed_done, substate);
+ } else {
+ tevent_req_error(req, ret);
+ }
+}
+
+static void takeover_failed_done(struct tevent_req *subreq)
+{
+ struct takeover_failed_state *substate = tevent_req_callback_data(
+ subreq, struct takeover_failed_state);
+ struct tevent_req *req = substate->req;
+ int ret;
+ bool status;
+
+ status = ctdb_client_message_recv(subreq, &ret);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ D_ERR("failed to assign banning credits, ret=%d\n", ret);
+ }
+
+ ret = substate->ret;
+ talloc_free(substate);
+ tevent_req_error(req, ret);
+}
+
+static void takeover_recv(struct tevent_req *req, int *perr)
+{
+ generic_recv(req, perr);
+}
+
+static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
+{
+ char *strv = NULL;
+ int num, i, ret;
+ char *t;
+ uint32_t *nodes;
+
+ ret = strv_split(mem_ctx, &strv, s, ",");
+ if (ret != 0) {
+ D_ERR("out of memory\n");
+ return NULL;
+ }
+
+ num = strv_count(strv);
+
+ nodes = talloc_array(mem_ctx, uint32_t, num);
+ if (nodes == NULL) {
+ D_ERR("out of memory\n");
+ return NULL;
+ }
+
+ t = NULL;
+ for (i = 0; i < num; i++) {
+ t = strv_next(strv, t);
+ nodes[i] = atoi(t);
+ }
+
+ return nodes;
+}
+
+static void usage(const char *progname)
+{
+ fprintf(stderr,
+ "\nUsage: %s <output-fd> <ctdb-socket-path> "
+ "[<force-rebalance-nodes>]\n",
+ progname);
+}
+
+/*
+ * Arguments - write fd, socket path
+ */
+int main(int argc, const char *argv[])
+{
+ int write_fd;
+ const char *sockpath;
+ TALLOC_CTX *mem_ctx;
+ struct tevent_context *ev;
+ struct ctdb_client_context *client;
+ int ret;
+ struct tevent_req *req;
+ uint32_t *force_rebalance_nodes = NULL;
+
+ if (argc < 3 || argc > 4) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ write_fd = atoi(argv[1]);
+ sockpath = argv[2];
+
+ mem_ctx = talloc_new(NULL);
+ if (mem_ctx == NULL) {
+ fprintf(stderr, "talloc_new() failed\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ if (argc == 4) {
+ force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
+ if (force_rebalance_nodes == NULL) {
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
+
+ ev = tevent_context_init(mem_ctx);
+ if (ev == NULL) {
+ D_ERR("tevent_context_init() failed\n");
+ ret = ENOMEM;
+ goto done;
+ }
+
+ ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
+ if (ret != 0) {
+ D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
+ goto done;
+ }
+
+ req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
+ if (req == NULL) {
+ D_ERR("takeover_send() failed\n");
+ ret = 1;
+ goto done;
+ }
+
+ if (! tevent_req_poll(req, ev)) {
+ D_ERR("tevent_req_poll() failed\n");
+ ret = 1;
+ goto done;
+ }
+
+ takeover_recv(req, &ret);
+ TALLOC_FREE(req);
+ if (ret != 0) {
+ D_ERR("takeover run failed, ret=%d\n", ret);
+ }
+
+done:
+ sys_write_v(write_fd, &ret, sizeof(ret));
+
+ talloc_free(mem_ctx);
+ return ret;
+}