diff options
author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2008-12-02 13:26:30 +1100 |
---|---|---|
committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2008-12-02 13:26:30 +1100 |
commit | edb7241c05c2d11b73d5db0b13460dc363e90919 (patch) | |
tree | 06cd18e39bd331c5175951d68abb6471c554b189 | |
parent | 7592a97d163ffd6fceb15ff57f5067a67626ba7d (diff) | |
download | samba-edb7241c05c2d11b73d5db0b13460dc363e90919.tar.gz |
redesign how reloadnodes is implemented.
modify the transport methods to allow to restart individual connections
and set up destructors properly.
only tear down/set-up tcp connections to nodes removed from the cluster
or nodes added to the cluster.
Leave tcp connections to unchanged nodes connected.
make "ctdb reloadnodes" explicitely cause a recovery of the cluster once
the files have been realoaded
(This used to be ctdb commit d1057ed6de7de9f2a64d8fa012c52647e89b515b)
-rw-r--r-- | ctdb/include/ctdb_private.h | 3 | ||||
-rw-r--r-- | ctdb/server/ctdb_recover.c | 29 | ||||
-rw-r--r-- | ctdb/tcp/tcp_connect.c | 5 | ||||
-rw-r--r-- | ctdb/tcp/tcp_init.c | 48 | ||||
-rw-r--r-- | ctdb/tools/ctdb.c | 3 |
5 files changed, 71 insertions, 17 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 19f85f2e11e..c40ffbdaf93 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -236,8 +236,9 @@ struct ctdb_node { */ struct ctdb_methods { int (*initialise)(struct ctdb_context *); /* initialise transport structures */ - int (*start)(struct ctdb_context *); /* start protocol processing */ + int (*start)(struct ctdb_context *); /* start the transport */ int (*add_node)(struct ctdb_node *); /* setup a new node */ + int (*connect_node)(struct ctdb_node *); /* connect to node */ int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length); void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t ); void (*shutdown)(struct ctdb_context *); /* shutdown transport */ diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 9be2eb2a3cf..8bed9e6aea7 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -213,20 +213,43 @@ static void ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { - int i; - + int i, num_nodes; struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + TALLOC_CTX *tmp_ctx; + struct ctdb_node **nodes; + + tmp_ctx = talloc_new(ctdb); + + /* steal the old nodes file for a while */ + talloc_steal(tmp_ctx, ctdb->nodes); + nodes = ctdb->nodes; + ctdb->nodes = NULL; + num_nodes = ctdb->num_nodes; + ctdb->num_nodes = 0; + /* load the new nodes file */ ctdb_load_nodes_file(ctdb); for (i=0; i<ctdb->num_nodes; i++) { + /* keep any identical pre-existing nodes and connections */ + if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) { + talloc_free(ctdb->nodes[i]); + ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]); + continue; + } + + /* any new or different nodes must be added */ if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) { DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i)); ctdb_fatal(ctdb, "failed to add node. shutting down\n"); } + if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) { + DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i)); + ctdb_fatal(ctdb, "failed to connect to node. shutting down\n"); + } } - ctdb->methods->start(ctdb); + talloc_free(tmp_ctx); return; } diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 0e892affe91..bd8af57e08a 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -47,6 +47,7 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) /* called when a complete packet has come in - should not happen on this socket + unless the other side closes the connection with RST or FIN */ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) { @@ -59,7 +60,8 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) } ctdb_tcp_stop_connection(node); - tnode->connect_te = event_add_timed(node->ctdb->ev, tnode, timeval_zero(), + tnode->connect_te = event_add_timed(node->ctdb->ev, tnode, + timeval_current_ofs(3, 0), ctdb_tcp_node_connect, node); } @@ -149,6 +151,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, return; } +DEBUG(DEBUG_ERR,("create socket...\n")); tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); set_nonblocking(tnode->fd); set_close_on_exec(tnode->fd); diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 8b33efeb067..737bd8e4a54 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -25,6 +25,17 @@ #include "../include/ctdb_private.h" #include "ctdb_tcp.h" +static int tnode_destructor(struct ctdb_tcp_node *tnode) +{ + struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node); + + if (tnode->fd != -1) { + close(tnode->fd); + tnode->fd = -1; + } + + return 0; +} /* initialise tcp portion of a ctdb node @@ -37,6 +48,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) tnode->fd = -1; node->private_data = tnode; + talloc_set_destructor(tnode, tnode_destructor); tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT, ctdb_tcp_tnode_cb, node); @@ -70,21 +82,18 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb) /* start the protocol going */ -static int ctdb_tcp_start(struct ctdb_context *ctdb) +static int ctdb_tcp_connect_node(struct ctdb_node *node) { - int i; + struct ctdb_context *ctdb = node->ctdb; + struct ctdb_tcp_node *tnode = talloc_get_type( + node->private_data, struct ctdb_tcp_node); - /* startup connections to the other servers - will happen on + /* startup connection to the other server - will happen on next event loop */ - for (i=0;i<ctdb->num_nodes;i++) { - struct ctdb_node *node = *(ctdb->nodes + i); - struct ctdb_tcp_node *tnode = talloc_get_type( - node->private_data, struct ctdb_tcp_node); - if (!ctdb_same_address(&ctdb->address, &node->address)) { - tnode->connect_te = event_add_timed(ctdb->ev, tnode, - timeval_zero(), - ctdb_tcp_node_connect, node); - } + if (!ctdb_same_address(&ctdb->address, &node->address)) { + tnode->connect_te = event_add_timed(ctdb->ev, tnode, + timeval_zero(), + ctdb_tcp_node_connect, node); } return 0; @@ -119,6 +128,20 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb) ctdb->private_data = NULL; } +/* + start the transport +*/ +static int ctdb_tcp_start(struct ctdb_context *ctdb) +{ + int i; + + for (i=0; i<ctdb->num_nodes; i++) { + ctdb_tcp_connect_node(ctdb->nodes[i]); + } + + return 0; +} + /* transport packet allocator - allows transport to control memory for packets @@ -138,6 +161,7 @@ static const struct ctdb_methods ctdb_tcp_methods = { .start = ctdb_tcp_start, .queue_pkt = ctdb_tcp_queue_pkt, .add_node = ctdb_tcp_add_node, + .connect_node = ctdb_tcp_connect_node, .allocate_pkt = ctdb_tcp_allocate_pkt, .shutdown = ctdb_tcp_shutdown, .restart = ctdb_tcp_restart, diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 63fba20c64c..c9656fe9794 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -2406,6 +2406,9 @@ static int control_reload_nodes_file(struct ctdb_context *ctdb, int argc, const DEBUG(DEBUG_ERR, ("ERROR: Failed to reload nodes file on node %u. You MUST fix that node manually!\n", mypnn)); } + /* initiate a recovery */ + control_recover(ctdb, argc, argv); + return 0; } |