diff options
author | Ronnie Sahlberg <sahlberg@ronnie> | 2007-10-19 08:58:30 +1000 |
---|---|---|
committer | Ronnie Sahlberg <sahlberg@ronnie> | 2007-10-19 08:58:30 +1000 |
commit | d1ba047b7f8748ffa22bdbea29e4515e8be97d8b (patch) | |
tree | 1f5ea21d2b3f3b6a38f21eb5a42aa42c71a33979 | |
parent | 755511d28df7d0f16abfc37e9c5a607e075fc7fe (diff) | |
download | samba-d1ba047b7f8748ffa22bdbea29e4515e8be97d8b.tar.gz |
add a new transport method so that when a node is marked as dead, we
shut down and restart the transport
othervise, if we use the tcp transport the tcp connection might try to
retransmit the queued data during the time the node is unavailable.
this together with the exponential backoff for tcp means that the tcp
connection quickly reaches the maximum backoff rto which is often 60 or
120 seconds. this would mean that it could take up to 60/120 seconds
before the tcp layer detects that the connection is dead and it has to
be reestablished.
(This used to be ctdb commit 0256db470879ce556b0f00070f7ebeaf37e529ab)
-rw-r--r-- | ctdb/include/ctdb_private.h | 1 | ||||
-rw-r--r-- | ctdb/server/ctdb_server.c | 5 | ||||
-rw-r--r-- | ctdb/tcp/tcp_init.c | 23 |
3 files changed, 28 insertions, 1 deletions
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 3298106d629..4eeb17ec8de 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -207,6 +207,7 @@ struct ctdb_methods { int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length); void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t ); void (*shutdown)(struct ctdb_context *); /* shutdown transport */ + void (*restart)(struct ctdb_node *); /* stop and restart the connection */ }; /* diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 5cdc2a5deaf..dddf90753bd 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -338,9 +338,12 @@ void ctdb_node_dead(struct ctdb_node *node) node->flags |= NODE_FLAGS_DISCONNECTED | NODE_FLAGS_UNHEALTHY; node->rx_cnt = 0; node->dead_count = 0; - DEBUG(1,("%s: node %s is dead: %u connected\n", + + DEBUG(0,("%s: node %s is dead: %u connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); ctdb_daemon_cancel_controls(node->ctdb, node); + + node->ctdb->methods->restart(node); } /* diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index f5d4e4c1d6b..98a4c493f2e 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -88,6 +88,28 @@ static int ctdb_tcp_start(struct ctdb_context *ctdb) return 0; } +/* + shutdown and try to restart a connection to a node after it has been + disconnected +*/ +static void ctdb_tcp_restart(struct ctdb_node *node) +{ + struct ctdb_tcp_node *tnode = talloc_get_type( + node->private_data, struct ctdb_tcp_node); + + DEBUG(0,("Tearing down connection to dead node :%d\n", node->pnn)); + + if (tnode->fd == -1) { + close(tnode->fd); + tnode->fd = -1; + } + + ctdb_queue_set_fd(tnode->out_queue, -1); + + event_add_timed(node->ctdb->ev, tnode, timeval_zero(), + ctdb_tcp_node_connect, node); +} + /* shutdown the transport @@ -121,6 +143,7 @@ static const struct ctdb_methods ctdb_tcp_methods = { .add_node = ctdb_tcp_add_node, .allocate_pkt = ctdb_tcp_allocate_pkt, .shutdown = ctdb_tcp_shutdown, + .restart = ctdb_tcp_restart, }; /* |