/* CTDB IP takeover helper Copyright (C) Martin Schwenke 2016 Based on ctdb_recovery_helper.c Copyright (C) Amitay Isaacs 2015 and ctdb_takeover.c Copyright (C) Ronnie Sahlberg 2007 Copyright (C) Andrew Tridgell 2007 Copyright (C) Martin Schwenke 2011 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include "replace.h" #include "system/network.h" #include "system/filesys.h" #include #include #include #include "lib/util/debug.h" #include "lib/util/strv.h" #include "lib/util/strv_util.h" #include "lib/util/sys_rw.h" #include "lib/util/time.h" #include "lib/util/tevent_unix.h" #include "protocol/protocol.h" #include "protocol/protocol_api.h" #include "protocol/protocol_util.h" #include "client/client.h" #include "common/logging.h" #include "server/ipalloc.h" static int takeover_timeout = 9; #define TIMEOUT() timeval_current_ofs(takeover_timeout, 0) /* * Utility functions */ static bool generic_recv(struct tevent_req *req, int *perr) { int err; if (tevent_req_is_unix_error(req, &err)) { if (perr != NULL) { *perr = err; } return false; } return true; } static enum ipalloc_algorithm determine_algorithm(const struct ctdb_tunable_list *tunables) { switch (tunables->ip_alloc_algorithm) { case 0: return IPALLOC_DETERMINISTIC; case 1: return IPALLOC_NONDETERMINISTIC; case 2: return IPALLOC_LCP2; default: return IPALLOC_LCP2; }; } /**********************************************************************/ struct get_public_ips_state { uint32_t *pnns; int count; struct ctdb_public_ip_list *ips; uint32_t *ban_credits; }; static void get_public_ips_done(struct tevent_req *subreq); static struct tevent_req *get_public_ips_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *pnns, int count, int num_nodes, uint32_t *ban_credits, bool available_only) { struct tevent_req *req, *subreq; struct get_public_ips_state *state; struct ctdb_req_control request; req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state); if (req == NULL) { return NULL; } state->pnns = pnns; state->count = count; state->ban_credits = ban_credits; state->ips = talloc_zero_array(state, struct ctdb_public_ip_list, num_nodes); if (tevent_req_nomem(state->ips, req)) { return tevent_req_post(req, ev); } /* Short circuit if no nodes being asked for IPs */ if (state->count == 0) { tevent_req_done(req); return tevent_req_post(req, ev); } ctdb_req_control_get_public_ips(&request, available_only); subreq = ctdb_client_control_multi_send(mem_ctx, ev, client, state->pnns, state->count, TIMEOUT(), &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, get_public_ips_done, req); return req; } static void get_public_ips_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct get_public_ips_state *state = tevent_req_data( req, struct get_public_ips_state); struct ctdb_reply_control **reply; int *err_list; int ret, i; bool status, found_errors; status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list, &reply); TALLOC_FREE(subreq); if (! status) { found_errors = false; for (i = 0; i < state->count; i++) { if (err_list[i] != 0) { uint32_t pnn = state->pnns[i]; D_ERR("control GET_PUBLIC_IPS failed on " "node %u, ret=%d\n", pnn, err_list[i]); state->ban_credits[pnn]++; found_errors = true; } } tevent_req_error(req, ret); return; } found_errors = false; for (i = 0; i < state->count; i++) { uint32_t pnn; struct ctdb_public_ip_list *ips; pnn = state->pnns[i]; ret = ctdb_reply_control_get_public_ips(reply[i], state->ips, &ips); if (ret != 0) { D_ERR("control GET_PUBLIC_IPS failed on " "node %u\n", pnn); state->ban_credits[pnn]++; found_errors = true; continue; } D_INFO("Fetched public IPs from node %u\n", pnn); state->ips[pnn] = *ips; } if (found_errors) { tevent_req_error(req, EIO); return; } talloc_free(reply); tevent_req_done(req); } static bool get_public_ips_recv(struct tevent_req *req, int *perr, TALLOC_CTX *mem_ctx, struct ctdb_public_ip_list **ips) { struct get_public_ips_state *state = tevent_req_data( req, struct get_public_ips_state); int err; if (tevent_req_is_unix_error(req, &err)) { if (perr != NULL) { *perr = err; } return false; } *ips = talloc_steal(mem_ctx, state->ips); return true; } /**********************************************************************/ struct release_ip_state { int num_sent; int num_replies; int num_fails; int err_any; uint32_t *ban_credits; }; struct release_ip_one_state { struct tevent_req *req; uint32_t *pnns; int count; const char *ip_str; }; static void release_ip_done(struct tevent_req *subreq); static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *pnns, int count, struct timeval timeout, struct public_ip_list *all_ips, uint32_t *ban_credits) { struct tevent_req *req, *subreq; struct release_ip_state *state; struct ctdb_req_control request; struct public_ip_list *tmp_ip; req = tevent_req_create(mem_ctx, &state, struct release_ip_state); if (req == NULL) { return NULL; } state->num_sent = 0; state->num_replies = 0; state->num_fails = 0; state->ban_credits = ban_credits; /* Send a RELEASE_IP to all nodes that should not be hosting * each IP. For each IP, all but one of these will be * redundant. However, the redundant ones are used to tell * nodes which node should be hosting the IP so that commands * like "ctdb ip" can display a particular nodes idea of who * is hosting what. */ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) { struct release_ip_one_state *substate; struct ctdb_public_ip ip; int i; substate = talloc_zero(state, struct release_ip_one_state); if (tevent_req_nomem(substate, req)) { return tevent_req_post(req, ev); } substate->pnns = talloc_zero_array(substate, uint32_t, count); if (tevent_req_nomem(substate->pnns, req)) { return tevent_req_post(req, ev); } substate->count = 0; substate->req = req; substate->ip_str = ctdb_sock_addr_to_string(substate, &tmp_ip->addr, false); if (tevent_req_nomem(substate->ip_str, req)) { return tevent_req_post(req, ev); } for (i = 0; i < count; i++) { uint32_t pnn = pnns[i]; /* Skip this node if IP is not known */ if (! bitmap_query(tmp_ip->known_on, pnn)) { continue; } /* If pnn is not the node that should be * hosting the IP then add it to the list of * nodes that need to do a release. */ if (tmp_ip->pnn != pnn) { substate->pnns[substate->count] = pnn; substate->count++; } } if (substate->count == 0) { /* No releases to send for this address... */ TALLOC_FREE(substate); continue; } ip.pnn = tmp_ip->pnn; ip.addr = tmp_ip->addr; ctdb_req_control_release_ip(&request, &ip); subreq = ctdb_client_control_multi_send(state, ev, client, substate->pnns, substate->count, timeout,/* cumulative */ &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, release_ip_done, substate); state->num_sent++; } /* None sent, finished... */ if (state->num_sent == 0) { tevent_req_done(req); return tevent_req_post(req, ev); } return req; } static void release_ip_done(struct tevent_req *subreq) { struct release_ip_one_state *substate = tevent_req_callback_data( subreq, struct release_ip_one_state); struct tevent_req *req = substate->req; struct release_ip_state *state = tevent_req_data( req, struct release_ip_state); int ret, i; int *err_list; bool status, found_errors; status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list, NULL); TALLOC_FREE(subreq); if (status) { D_INFO("RELEASE_IP %s succeeded on %d nodes\n", substate->ip_str, substate->count); goto done; } /* Get some clear error messages out of err_list and count * banning credits */ found_errors = false; for (i = 0; i < substate->count; i++) { int err = err_list[i]; if (err != 0) { uint32_t pnn = substate->pnns[i]; D_ERR("RELEASE_IP %s failed on node %u, " "ret=%d\n", substate->ip_str, pnn, err); state->ban_credits[pnn]++; state->err_any = err; found_errors = true; } } if (! found_errors) { D_ERR("RELEASE_IP %s internal error, ret=%d\n", substate->ip_str, ret); state->err_any = EIO; } state->num_fails++; done: talloc_free(substate); state->num_replies++; if (state->num_replies < state->num_sent) { /* Not all replies received, don't go further */ return; } if (state->num_fails > 0) { tevent_req_error(req, state->err_any); return; } tevent_req_done(req); } static bool release_ip_recv(struct tevent_req *req, int *perr) { return generic_recv(req, perr); } /**********************************************************************/ struct take_ip_state { int num_sent; int num_replies; int num_fails; int err_any; uint32_t *ban_credits; }; struct take_ip_one_state { struct tevent_req *req; uint32_t pnn; const char *ip_str; }; static void take_ip_done(struct tevent_req *subreq); static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, struct timeval timeout, struct public_ip_list *all_ips, uint32_t *ban_credits) { struct tevent_req *req, *subreq; struct take_ip_state *state; struct ctdb_req_control request; struct public_ip_list *tmp_ip; req = tevent_req_create(mem_ctx, &state, struct take_ip_state); if (req == NULL) { return NULL; } state->num_sent = 0; state->num_replies = 0; state->num_fails = 0; state->ban_credits = ban_credits; /* For each IP, send a TAKOVER_IP to the node that should be * hosting it. Many of these will often be redundant (since * the allocation won't have changed) but they can be useful * to recover from inconsistencies. */ for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) { struct take_ip_one_state *substate; struct ctdb_public_ip ip; if (tmp_ip->pnn == -1) { /* IP will be unassigned */ continue; } substate = talloc_zero(state, struct take_ip_one_state); if (tevent_req_nomem(substate, req)) { return tevent_req_post(req, ev); } substate->req = req; substate->pnn = tmp_ip->pnn; substate->ip_str = ctdb_sock_addr_to_string(substate, &tmp_ip->addr, false); if (tevent_req_nomem(substate->ip_str, req)) { return tevent_req_post(req, ev); } ip.pnn = tmp_ip->pnn; ip.addr = tmp_ip->addr; ctdb_req_control_takeover_ip(&request, &ip); subreq = ctdb_client_control_send( state, ev, client, tmp_ip->pnn, timeout, /* cumulative */ &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, take_ip_done, substate); state->num_sent++; } /* None sent, finished... */ if (state->num_sent == 0) { tevent_req_done(req); return tevent_req_post(req, ev); } return req; } static void take_ip_done(struct tevent_req *subreq) { struct take_ip_one_state *substate = tevent_req_callback_data( subreq, struct take_ip_one_state); struct tevent_req *req = substate->req; struct ctdb_reply_control *reply; struct take_ip_state *state = tevent_req_data( req, struct take_ip_state); int ret = 0; bool status; status = ctdb_client_control_recv(subreq, &ret, state, &reply); TALLOC_FREE(subreq); if (! status) { D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n", substate->ip_str, substate->pnn, ret); goto fail; } ret = ctdb_reply_control_takeover_ip(reply); if (ret != 0) { D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n", substate->ip_str, substate->pnn, ret); goto fail; } D_INFO("TAKEOVER_IP %s succeeded on node %u\n", substate->ip_str, substate->pnn); goto done; fail: state->ban_credits[substate->pnn]++; state->num_fails++; state->err_any = ret; done: talloc_free(substate); state->num_replies++; if (state->num_replies < state->num_sent) { /* Not all replies received, don't go further */ return; } if (state->num_fails > 0) { tevent_req_error(req, state->err_any); return; } tevent_req_done(req); } static bool take_ip_recv(struct tevent_req *req, int *perr) { return generic_recv(req, perr); } /**********************************************************************/ struct ipreallocated_state { uint32_t *pnns; int count; uint32_t *ban_credits; }; static void ipreallocated_done(struct tevent_req *subreq); static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *pnns, int count, struct timeval timeout, uint32_t *ban_credits) { struct tevent_req *req, *subreq; struct ipreallocated_state *state; struct ctdb_req_control request; req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state); if (req == NULL) { return NULL; } state->pnns = pnns; state->count = count; state->ban_credits = ban_credits; ctdb_req_control_ipreallocated(&request); subreq = ctdb_client_control_multi_send(state, ev, client, pnns, count, timeout, /* cumulative */ &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, ipreallocated_done, req); return req; } static void ipreallocated_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct ipreallocated_state *state = tevent_req_data( req, struct ipreallocated_state); int *err_list = NULL; int ret, i; bool status, found_errors; status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list, NULL); TALLOC_FREE(subreq); if (status) { D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count); tevent_req_done(req); return; } /* Get some clear error messages out of err_list and count * banning credits */ found_errors = false; for (i = 0; i < state->count; i++) { int err = err_list[i]; if (err != 0) { uint32_t pnn = state->pnns[i]; D_ERR("IPREALLOCATED failed on node %u, ret=%d\n", pnn, err); state->ban_credits[pnn]++; found_errors = true; } } if (! found_errors) { D_ERR("IPREALLOCATED internal error, ret=%d\n", ret); } tevent_req_error(req, ret); } static bool ipreallocated_recv(struct tevent_req *req, int *perr) { return generic_recv(req, perr); } /**********************************************************************/ /* * Recalculate the allocation of public IPs to nodes and have the * nodes host their allocated addresses. * * - Get tunables * - Get nodemap * - Initialise IP allocation state. Pass: * + algorithm to be used; * + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled) * + list of nodes to force rebalance (internal structure, currently * no way to fetch, only used by LCP2 for nodes that have had new * IP addresses added). * - Set IP flags for IP allocation based on node map * - Retrieve known and available IP addresses (done separately so * values can be faked in unit testing) * - Use ipalloc_set_public_ips() to set known and available IP * addresses for allocation * - If cluster can't host IP addresses then jump to IPREALLOCATED * - Run IP allocation algorithm * - Send RELEASE_IP to all nodes for IPs they should not host * - Send TAKE_IP to all nodes for IPs they should host * - Send IPREALLOCATED to all nodes */ struct takeover_state { struct tevent_context *ev; struct ctdb_client_context *client; struct timeval timeout; int num_nodes; uint32_t *pnns_connected; int num_connected; uint32_t *pnns_active; int num_active; uint32_t destnode; uint32_t *force_rebalance_nodes; struct ctdb_tunable_list *tun_list; struct ipalloc_state *ipalloc_state; struct ctdb_public_ip_list *known_ips; struct public_ip_list *all_ips; uint32_t *ban_credits; }; static void takeover_tunables_done(struct tevent_req *subreq); static void takeover_nodemap_done(struct tevent_req *subreq); static void takeover_known_ips_done(struct tevent_req *subreq); static void takeover_avail_ips_done(struct tevent_req *subreq); static void takeover_release_ip_done(struct tevent_req *subreq); static void takeover_take_ip_done(struct tevent_req *subreq); static void takeover_ipreallocated(struct tevent_req *req); static void takeover_ipreallocated_done(struct tevent_req *subreq); static void takeover_failed(struct tevent_req *subreq, int ret); static void takeover_failed_done(struct tevent_req *subreq); static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, uint32_t *force_rebalance_nodes) { struct tevent_req *req, *subreq; struct takeover_state *state; struct ctdb_req_control request; req = tevent_req_create(mem_ctx, &state, struct takeover_state); if (req == NULL) { return NULL; } state->ev = ev; state->client = client; state->force_rebalance_nodes = force_rebalance_nodes; state->destnode = ctdb_client_pnn(client); ctdb_req_control_get_all_tunables(&request); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, takeover_tunables_done, req); return req; } static void takeover_tunables_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct takeover_state *state = tevent_req_data( req, struct takeover_state); struct ctdb_reply_control *reply; struct ctdb_req_control request; int ret; bool status; status = ctdb_client_control_recv(subreq, &ret, state, &reply); TALLOC_FREE(subreq); if (! status) { D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } ret = ctdb_reply_control_get_all_tunables(reply, state, &state->tun_list); if (ret != 0) { D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } talloc_free(reply); takeover_timeout = state->tun_list->takeover_timeout; ctdb_req_control_get_nodemap(&request); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), &request); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_nodemap_done, req); } static void takeover_nodemap_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct takeover_state *state = tevent_req_data( req, struct takeover_state); struct ctdb_reply_control *reply; bool status; int ret; struct ctdb_node_map *nodemap; status = ctdb_client_control_recv(subreq, &ret, state, &reply); TALLOC_FREE(subreq); if (! status) { D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n", state->destnode, ret); tevent_req_error(req, ret); return; } ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); if (ret != 0) { D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } state->num_nodes = nodemap->num; state->num_connected = list_of_connected_nodes(nodemap, CTDB_UNKNOWN_PNN, state, &state->pnns_connected); if (state->num_connected <= 0) { tevent_req_error(req, ENOMEM); return; } state->num_active = list_of_active_nodes(nodemap, CTDB_UNKNOWN_PNN, state, &state->pnns_active); if (state->num_active <= 0) { tevent_req_error(req, ENOMEM); return; } /* Default timeout for early jump to IPREALLOCATED. See below * for explanation of 3 times... */ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0); state->ban_credits = talloc_zero_array(state, uint32_t, state->num_nodes); if (tevent_req_nomem(state->ban_credits, req)) { return; } if (state->tun_list->disable_ip_failover != 0) { /* IP failover is completely disabled so just send out * ipreallocated event. */ takeover_ipreallocated(req); return; } state->ipalloc_state = ipalloc_state_init( state, state->num_nodes, determine_algorithm(state->tun_list), (state->tun_list->no_ip_takeover != 0), (state->tun_list->no_ip_failback != 0), (state->tun_list->no_ip_host_on_all_disabled != 0), state->force_rebalance_nodes); if (tevent_req_nomem(state->ipalloc_state, req)) { return; } ipalloc_set_node_flags(state->ipalloc_state, nodemap); subreq = get_public_ips_send(state, state->ev, state->client, state->pnns_connected, state->num_connected, state->num_nodes, state->ban_credits, false); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_known_ips_done, req); } static void takeover_known_ips_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct takeover_state *state = tevent_req_data( req, struct takeover_state); int ret; bool status; uint32_t *pnns = NULL; int count, i; status = get_public_ips_recv(subreq, &ret, state, &state->known_ips); TALLOC_FREE(subreq); if (! status) { D_ERR("Failed to fetch known public IPs\n"); takeover_failed(req, ret); return; } /* Get available IPs from active nodes that actually have known IPs */ pnns = talloc_zero_array(state, uint32_t, state->num_active); if (tevent_req_nomem(pnns, req)) { return; } count = 0; for (i = 0; i < state->num_active; i++) { uint32_t pnn = state->pnns_active[i]; /* If pnn has IPs then fetch available IPs from it */ if (state->known_ips[pnn].num > 0) { pnns[count] = pnn; count++; } } subreq = get_public_ips_send(state, state->ev, state->client, pnns, count, state->num_nodes, state->ban_credits, true); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_avail_ips_done, req); } static void takeover_avail_ips_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct takeover_state *state = tevent_req_data( req, struct takeover_state); bool status; int ret; struct ctdb_public_ip_list *available_ips; status = get_public_ips_recv(subreq, &ret, state, &available_ips); TALLOC_FREE(subreq); if (! status) { D_ERR("Failed to fetch available public IPs\n"); takeover_failed(req, ret); return; } ipalloc_set_public_ips(state->ipalloc_state, state->known_ips, available_ips); if (! ipalloc_can_host_ips(state->ipalloc_state)) { D_NOTICE("No nodes available to host public IPs yet\n"); takeover_ipreallocated(req); return; } /* Do the IP reassignment calculations */ state->all_ips = ipalloc(state->ipalloc_state); if (tevent_req_nomem(state->all_ips, req)) { return; } /* Each of the following stages (RELEASE_IP, TAKEOVER_IP, * IPREALLOCATED) notionally has a timeout of TakeoverTimeout * seconds. However, RELEASE_IP can take longer due to TCP * connection killing, so sometimes needs more time. * Therefore, use a cumulative timeout of TakeoverTimeout * 3 * seconds across all 3 stages. No explicit expiry checks are * needed before each stage because tevent is smart enough to * fire the timeouts even if they are in the past. Initialise * this here so it explicitly covers the stages we're * interested in but, in particular, not the time taken by the * ipalloc(). */ state->timeout = timeval_current_ofs(3 * takeover_timeout, 0); subreq = release_ip_send(state, state->ev, state->client, state->pnns_connected, state->num_connected, state->timeout, state->all_ips, state->ban_credits); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_release_ip_done, req); } static void takeover_release_ip_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); struct takeover_state *state = tevent_req_data( req, struct takeover_state); int ret; bool status; status = release_ip_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { takeover_failed(req, ret); return; } /* All released, now for takeovers */ subreq = take_ip_send(state, state->ev, state->client, state->timeout, state->all_ips, state->ban_credits); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_take_ip_done, req); } static void takeover_take_ip_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); int ret = 0; bool status; status = take_ip_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { takeover_failed(req, ret); return; } takeover_ipreallocated(req); } static void takeover_ipreallocated(struct tevent_req *req) { struct takeover_state *state = tevent_req_data( req, struct takeover_state); struct tevent_req *subreq; subreq = ipreallocated_send(state, state->ev, state->client, state->pnns_connected, state->num_connected, state->timeout, state->ban_credits); if (tevent_req_nomem(subreq, req)) { return; } tevent_req_set_callback(subreq, takeover_ipreallocated_done, req); } static void takeover_ipreallocated_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); int ret; bool status; status = ipreallocated_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { takeover_failed(req, ret); return; } tevent_req_done(req); } struct takeover_failed_state { struct tevent_req *req; int ret; }; void takeover_failed(struct tevent_req *req, int ret) { struct takeover_state *state = tevent_req_data( req, struct takeover_state); struct tevent_req *subreq; uint32_t max_pnn = CTDB_UNKNOWN_PNN; int max_credits = 0; int pnn; /* Check that bans are enabled */ if (state->tun_list->enable_bans == 0) { tevent_req_error(req, ret); return; } for (pnn = 0; pnn < state->num_nodes; pnn++) { if (state->ban_credits[pnn] > max_credits) { max_pnn = pnn; max_credits = state->ban_credits[pnn]; } } if (max_credits > 0) { struct ctdb_req_message message; struct takeover_failed_state *substate; D_WARNING("Assigning banning credits to node %u\n", max_pnn); substate = talloc_zero(state, struct takeover_failed_state); if (tevent_req_nomem(substate, req)) { return; } substate->req = req; substate->ret = ret; message.srvid = CTDB_SRVID_BANNING; message.data.pnn = max_pnn; subreq = ctdb_client_message_send( state, state->ev, state->client, ctdb_client_pnn(state->client), &message); if (subreq == NULL) { D_ERR("failed to assign banning credits\n"); tevent_req_error(req, ret); return; } tevent_req_set_callback(subreq, takeover_failed_done, substate); } else { tevent_req_error(req, ret); } } static void takeover_failed_done(struct tevent_req *subreq) { struct takeover_failed_state *substate = tevent_req_callback_data( subreq, struct takeover_failed_state); struct tevent_req *req = substate->req; int ret; bool status; status = ctdb_client_message_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { D_ERR("failed to assign banning credits, ret=%d\n", ret); } ret = substate->ret; talloc_free(substate); tevent_req_error(req, ret); } static void takeover_recv(struct tevent_req *req, int *perr) { generic_recv(req, perr); } static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s) { char *strv = NULL; int num, i, ret; char *t; uint32_t *nodes; ret = strv_split(mem_ctx, &strv, s, ","); if (ret != 0) { D_ERR("out of memory\n"); return NULL; } num = strv_count(strv); nodes = talloc_array(mem_ctx, uint32_t, num); if (nodes == NULL) { D_ERR("out of memory\n"); return NULL; } t = NULL; for (i = 0; i < num; i++) { t = strv_next(strv, t); nodes[i] = atoi(t); } return nodes; } static void usage(const char *progname) { fprintf(stderr, "\nUsage: %s " "[]\n", progname); } /* * Arguments - write fd, socket path */ int main(int argc, const char *argv[]) { int write_fd; const char *sockpath; TALLOC_CTX *mem_ctx; struct tevent_context *ev; struct ctdb_client_context *client; int ret; struct tevent_req *req; uint32_t *force_rebalance_nodes = NULL; if (argc < 3 || argc > 4) { usage(argv[0]); exit(1); } write_fd = atoi(argv[1]); sockpath = argv[2]; mem_ctx = talloc_new(NULL); if (mem_ctx == NULL) { fprintf(stderr, "talloc_new() failed\n"); ret = ENOMEM; goto done; } if (argc == 4) { force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]); if (force_rebalance_nodes == NULL) { usage(argv[0]); ret = EINVAL; goto done; } } ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover"); if (ret != 0) { fprintf(stderr, "ctdb-takeover: Unable to initialize logging\n"); goto done; } ev = tevent_context_init(mem_ctx); if (ev == NULL) { D_ERR("tevent_context_init() failed\n"); ret = ENOMEM; goto done; } ret = ctdb_client_init(mem_ctx, ev, sockpath, &client); if (ret != 0) { D_ERR("ctdb_client_init() failed, ret=%d\n", ret); goto done; } req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes); if (req == NULL) { D_ERR("takeover_send() failed\n"); ret = 1; goto done; } if (! tevent_req_poll(req, ev)) { D_ERR("tevent_req_poll() failed\n"); ret = 1; goto done; } takeover_recv(req, &ret); TALLOC_FREE(req); if (ret != 0) { D_ERR("takeover run failed, ret=%d\n", ret); } done: sys_write_v(write_fd, &ret, sizeof(ret)); talloc_free(mem_ctx); return ret; }