summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonnie Sahlberg <sahlberg@ronnie>2007-10-19 08:58:30 +1000
committerRonnie Sahlberg <sahlberg@ronnie>2007-10-19 08:58:30 +1000
commitd1ba047b7f8748ffa22bdbea29e4515e8be97d8b (patch)
tree1f5ea21d2b3f3b6a38f21eb5a42aa42c71a33979
parent755511d28df7d0f16abfc37e9c5a607e075fc7fe (diff)
downloadsamba-d1ba047b7f8748ffa22bdbea29e4515e8be97d8b.tar.gz
add a new transport method so that when a node is marked as dead, we
shut down and restart the transport othervise, if we use the tcp transport the tcp connection might try to retransmit the queued data during the time the node is unavailable. this together with the exponential backoff for tcp means that the tcp connection quickly reaches the maximum backoff rto which is often 60 or 120 seconds. this would mean that it could take up to 60/120 seconds before the tcp layer detects that the connection is dead and it has to be reestablished. (This used to be ctdb commit 0256db470879ce556b0f00070f7ebeaf37e529ab)
-rw-r--r--ctdb/include/ctdb_private.h1
-rw-r--r--ctdb/server/ctdb_server.c5
-rw-r--r--ctdb/tcp/tcp_init.c23
3 files changed, 28 insertions, 1 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index 3298106d629..4eeb17ec8de 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -207,6 +207,7 @@ struct ctdb_methods {
int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length);
void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t );
void (*shutdown)(struct ctdb_context *); /* shutdown transport */
+ void (*restart)(struct ctdb_node *); /* stop and restart the connection */
};
/*
diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c
index 5cdc2a5deaf..dddf90753bd 100644
--- a/ctdb/server/ctdb_server.c
+++ b/ctdb/server/ctdb_server.c
@@ -338,9 +338,12 @@ void ctdb_node_dead(struct ctdb_node *node)
node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY;
node->rx_cnt = 0;
node->dead_count = 0;
- DEBUG(1,("%s: node %s is dead: %u connected\n",
+
+ DEBUG(0,("%s: node %s is dead: %u connected\n",
node->ctdb->name, node->name, node->ctdb->num_connected));
ctdb_daemon_cancel_controls(node->ctdb, node);
+
+ node->ctdb->methods->restart(node);
}
/*
diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c
index f5d4e4c1d6b..98a4c493f2e 100644
--- a/ctdb/tcp/tcp_init.c
+++ b/ctdb/tcp/tcp_init.c
@@ -88,6 +88,28 @@ static int ctdb_tcp_start(struct ctdb_context *ctdb)
return 0;
}
+/*
+ shutdown and try to restart a connection to a node after it has been
+ disconnected
+*/
+static void ctdb_tcp_restart(struct ctdb_node *node)
+{
+ struct ctdb_tcp_node *tnode = talloc_get_type(
+ node->private_data, struct ctdb_tcp_node);
+
+ DEBUG(0,("Tearing down connection to dead node :%d\n", node->pnn));
+
+ if (tnode->fd == -1) {
+ close(tnode->fd);
+ tnode->fd = -1;
+ }
+
+ ctdb_queue_set_fd(tnode->out_queue, -1);
+
+ event_add_timed(node->ctdb->ev, tnode, timeval_zero(),
+ ctdb_tcp_node_connect, node);
+}
+
/*
shutdown the transport
@@ -121,6 +143,7 @@ static const struct ctdb_methods ctdb_tcp_methods = {
.add_node = ctdb_tcp_add_node,
.allocate_pkt = ctdb_tcp_allocate_pkt,
.shutdown = ctdb_tcp_shutdown,
+ .restart = ctdb_tcp_restart,
};
/*