summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonnie Sahlberg <ronniesahlberg@gmail.com>2008-12-02 13:26:30 +1100
committerRonnie Sahlberg <ronniesahlberg@gmail.com>2008-12-02 13:26:30 +1100
commitedb7241c05c2d11b73d5db0b13460dc363e90919 (patch)
tree06cd18e39bd331c5175951d68abb6471c554b189
parent7592a97d163ffd6fceb15ff57f5067a67626ba7d (diff)
downloadsamba-edb7241c05c2d11b73d5db0b13460dc363e90919.tar.gz
redesign how reloadnodes is implemented.
modify the transport methods to allow to restart individual connections and set up destructors properly. only tear down/set-up tcp connections to nodes removed from the cluster or nodes added to the cluster. Leave tcp connections to unchanged nodes connected. make "ctdb reloadnodes" explicitely cause a recovery of the cluster once the files have been realoaded (This used to be ctdb commit d1057ed6de7de9f2a64d8fa012c52647e89b515b)
-rw-r--r--ctdb/include/ctdb_private.h3
-rw-r--r--ctdb/server/ctdb_recover.c29
-rw-r--r--ctdb/tcp/tcp_connect.c5
-rw-r--r--ctdb/tcp/tcp_init.c48
-rw-r--r--ctdb/tools/ctdb.c3
5 files changed, 71 insertions, 17 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 19f85f2e11e..c40ffbdaf93 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -236,8 +236,9 @@ struct ctdb_node {
*/
struct ctdb_methods {
int (*initialise)(struct ctdb_context *); /* initialise transport structures */
- int (*start)(struct ctdb_context *); /* start protocol processing */
+ int (*start)(struct ctdb_context *); /* start the transport */
int (*add_node)(struct ctdb_node *); /* setup a new node */
+ int (*connect_node)(struct ctdb_node *); /* connect to node */
int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length);
void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t );
void (*shutdown)(struct ctdb_context *); /* shutdown transport */
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
index 9be2eb2a3cf..8bed9e6aea7 100644
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -213,20 +213,43 @@ static void
ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
- int i;
-
+ int i, num_nodes;
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ TALLOC_CTX *tmp_ctx;
+ struct ctdb_node **nodes;
+
+ tmp_ctx = talloc_new(ctdb);
+
+ /* steal the old nodes file for a while */
+ talloc_steal(tmp_ctx, ctdb->nodes);
+ nodes = ctdb->nodes;
+ ctdb->nodes = NULL;
+ num_nodes = ctdb->num_nodes;
+ ctdb->num_nodes = 0;
+ /* load the new nodes file */
ctdb_load_nodes_file(ctdb);
for (i=0; i<ctdb->num_nodes; i++) {
+ /* keep any identical pre-existing nodes and connections */
+ if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) {
+ talloc_free(ctdb->nodes[i]);
+ ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]);
+ continue;
+ }
+
+ /* any new or different nodes must be added */
if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) {
DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i));
ctdb_fatal(ctdb, "failed to add node. shutting down\n");
}
+ if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) {
+ DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i));
+ ctdb_fatal(ctdb, "failed to connect to node. shutting down\n");
+ }
}
- ctdb->methods->start(ctdb);
+ talloc_free(tmp_ctx);
return;
}
diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c
index 0e892affe91..bd8af57e08a 100644
--- a/ctdb/tcp/tcp_connect.c
+++ b/ctdb/tcp/tcp_connect.c
@@ -47,6 +47,7 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node)
/*
called when a complete packet has come in - should not happen on this socket
+ unless the other side closes the connection with RST or FIN
*/
void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
{
@@ -59,7 +60,8 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
}
ctdb_tcp_stop_connection(node);
- tnode->connect_te = event_add_timed(node->ctdb->ev, tnode, timeval_zero(),
+ tnode->connect_te = event_add_timed(node->ctdb->ev, tnode,
+ timeval_current_ofs(3, 0),
ctdb_tcp_node_connect, node);
}
@@ -149,6 +151,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
return;
}
+DEBUG(DEBUG_ERR,("create socket...\n"));
tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
set_nonblocking(tnode->fd);
set_close_on_exec(tnode->fd);
diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c
index 8b33efeb067..737bd8e4a54 100644
--- a/ctdb/tcp/tcp_init.c
+++ b/ctdb/tcp/tcp_init.c
@@ -25,6 +25,17 @@
#include "../include/ctdb_private.h"
#include "ctdb_tcp.h"
+static int tnode_destructor(struct ctdb_tcp_node *tnode)
+{
+ struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node);
+
+ if (tnode->fd != -1) {
+ close(tnode->fd);
+ tnode->fd = -1;
+ }
+
+ return 0;
+}
/*
initialise tcp portion of a ctdb node
@@ -37,6 +48,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node)
tnode->fd = -1;
node->private_data = tnode;
+ talloc_set_destructor(tnode, tnode_destructor);
tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT,
ctdb_tcp_tnode_cb, node);
@@ -70,21 +82,18 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
/*
start the protocol going
*/
-static int ctdb_tcp_start(struct ctdb_context *ctdb)
+static int ctdb_tcp_connect_node(struct ctdb_node *node)
{
- int i;
+ struct ctdb_context *ctdb = node->ctdb;
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->private_data, struct ctdb_tcp_node);
- /* startup connections to the other servers - will happen on
+ /* startup connection to the other server - will happen on
next event loop */
- for (i=0;i<ctdb->num_nodes;i++) {
- struct ctdb_node *node = *(ctdb->nodes + i);
- struct ctdb_tcp_node *tnode = talloc_get_type(
- node->private_data, struct ctdb_tcp_node);
- if (!ctdb_same_address(&ctdb->address, &node->address)) {
- tnode->connect_te = event_add_timed(ctdb->ev, tnode,
- timeval_zero(),
- ctdb_tcp_node_connect, node);
- }
+ if (!ctdb_same_address(&ctdb->address, &node->address)) {
+ tnode->connect_te = event_add_timed(ctdb->ev, tnode,
+ timeval_zero(),
+ ctdb_tcp_node_connect, node);
}
return 0;
@@ -119,6 +128,20 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb)
ctdb->private_data = NULL;
}
+/*
+ start the transport
+*/
+static int ctdb_tcp_start(struct ctdb_context *ctdb)
+{
+ int i;
+
+ for (i=0; i<ctdb->num_nodes; i++) {
+ ctdb_tcp_connect_node(ctdb->nodes[i]);
+ }
+
+ return 0;
+}
+
/*
transport packet allocator - allows transport to control memory for packets
@@ -138,6 +161,7 @@ static const struct ctdb_methods ctdb_tcp_methods = {
.start = ctdb_tcp_start,
.queue_pkt = ctdb_tcp_queue_pkt,
.add_node = ctdb_tcp_add_node,
+ .connect_node = ctdb_tcp_connect_node,
.allocate_pkt = ctdb_tcp_allocate_pkt,
.shutdown = ctdb_tcp_shutdown,
.restart = ctdb_tcp_restart,
diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c
index 63fba20c64c..c9656fe9794 100644
--- a/ctdb/tools/ctdb.c
+++ b/ctdb/tools/ctdb.c
@@ -2406,6 +2406,9 @@ static int control_reload_nodes_file(struct ctdb_context *ctdb, int argc, const
DEBUG(DEBUG_ERR, ("ERROR: Failed to reload nodes file on node %u. You MUST fix that node manually!\n", mypnn));
}
+ /* initiate a recovery */
+ control_recover(ctdb, argc, argv);
+
return 0;
}