summaryrefslogtreecommitdiff
path: root/ctdb/server/ctdb_call.c
diff options
context:
space:
mode:
authorAmitay Isaacs <amitay@gmail.com>2017-05-18 11:50:09 +1000
committerAmitay Isaacs <amitay@samba.org>2017-05-24 17:03:28 +0200
commitf5f05a644dadc0b1858c99c5f1f5af1ef80f3a28 (patch)
tree02ffe4f90267cf4b0d5d92da005df6e700e19e73 /ctdb/server/ctdb_call.c
parenta50b25d0ebbe731a766f8d2ce1924b34d6041668 (diff)
downloadsamba-f5f05a644dadc0b1858c99c5f1f5af1ef80f3a28.tar.gz
ctdb-readonly: Avoid a tight loop waiting for revoke to complete
BUG: https://bugzilla.samba.org/show_bug.cgi?id=12697 During revoking readonly delegations, if one of the nodes disappears, then there is no point re-trying revoking readonly delegation immedately. The database needs to be recovered before the revoke operation can succeed. However, if the revoke is successful, then all the write requests need to be processed immediately before the read-only requests. This avoids starving write requests, in case there are read-only requests coming from other nodes. In deferred_call_destructor, the result of revoke is not available and deferred calls cannot be correctly ordered. To correctly order the deferred calls, process them in revokechild_destructor where the result of revoke is known. Signed-off-by: Amitay Isaacs <amitay@gmail.com> Reviewed-by: Martin Schwenke <martin@meltin.net>
Diffstat (limited to 'ctdb/server/ctdb_call.c')
-rw-r--r--ctdb/server/ctdb_call.c91
1 files changed, 58 insertions, 33 deletions
diff --git a/ctdb/server/ctdb_call.c b/ctdb/server/ctdb_call.c
index ed0817db1cb..fa82b49b556 100644
--- a/ctdb/server/ctdb_call.c
+++ b/ctdb/server/ctdb_call.c
@@ -1571,6 +1571,7 @@ void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
struct revokechild_deferred_call {
+ struct revokechild_deferred_call *prev, *next;
struct ctdb_context *ctdb;
struct ctdb_req_header *hdr;
deferred_requeue_fn fn;
@@ -1586,48 +1587,31 @@ struct revokechild_handle {
int fd[2];
pid_t child;
TDB_DATA key;
-};
-
-struct revokechild_requeue_handle {
- struct ctdb_context *ctdb;
- struct ctdb_req_header *hdr;
- deferred_requeue_fn fn;
- void *ctx;
+ struct revokechild_deferred_call *deferred_call_list;
};
static void deferred_call_requeue(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *private_data)
{
- struct revokechild_requeue_handle *requeue_handle = talloc_get_type(private_data, struct revokechild_requeue_handle);
+ struct revokechild_deferred_call *dlist = talloc_get_type_abort(
+ private_data, struct revokechild_deferred_call);
- requeue_handle->fn(requeue_handle->ctx, requeue_handle->hdr);
- talloc_free(requeue_handle);
-}
+ while (dlist != NULL) {
+ struct revokechild_deferred_call *dcall = dlist;
-static int deferred_call_destructor(struct revokechild_deferred_call *deferred_call)
-{
- struct ctdb_context *ctdb = deferred_call->ctdb;
- struct revokechild_requeue_handle *requeue_handle = talloc(ctdb, struct revokechild_requeue_handle);
- struct ctdb_req_call_old *c = (struct ctdb_req_call_old *)deferred_call->hdr;
-
- requeue_handle->ctdb = ctdb;
- requeue_handle->hdr = deferred_call->hdr;
- requeue_handle->fn = deferred_call->fn;
- requeue_handle->ctx = deferred_call->ctx;
- talloc_steal(requeue_handle, requeue_handle->hdr);
-
- /* when revoking, any READONLY requests have 1 second grace to let read/write finish first */
- tevent_add_timer(ctdb->ev, requeue_handle,
- timeval_current_ofs(c->flags & CTDB_WANT_READONLY ? 1 : 0, 0),
- deferred_call_requeue, requeue_handle);
-
- return 0;
+ DLIST_REMOVE(dlist, dcall);
+ dcall->fn(dcall->ctx, dcall->hdr);
+ talloc_free(dcall);
+ }
}
static int revokechild_destructor(struct revokechild_handle *rc)
{
+ struct revokechild_deferred_call *now_list = NULL;
+ struct revokechild_deferred_call *delay_list = NULL;
+
if (rc->fde != NULL) {
talloc_free(rc->fde);
}
@@ -1641,6 +1625,48 @@ static int revokechild_destructor(struct revokechild_handle *rc)
ctdb_kill(rc->ctdb, rc->child, SIGKILL);
DLIST_REMOVE(rc->ctdb_db->revokechild_active, rc);
+
+ while (rc->deferred_call_list != NULL) {
+ struct revokechild_deferred_call *dcall;
+
+ dcall = rc->deferred_call_list;
+ DLIST_REMOVE(rc->deferred_call_list, dcall);
+
+ /* If revoke is successful, then first process all the calls
+ * that need write access, and delay readonly requests by 1
+ * second grace.
+ *
+ * If revoke is unsuccessful, most likely because of node
+ * failure, delay all the pending requests, so database can
+ * be recovered.
+ */
+
+ if (rc->status == 0) {
+ struct ctdb_req_call_old *c;
+
+ c = (struct ctdb_req_call_old *)dcall->hdr;
+ if (c->flags & CTDB_WANT_READONLY) {
+ DLIST_ADD(delay_list, dcall);
+ } else {
+ DLIST_ADD(now_list, dcall);
+ }
+ } else {
+ DLIST_ADD(delay_list, dcall);
+ }
+ }
+
+ if (now_list != NULL) {
+ tevent_add_timer(rc->ctdb->ev, rc->ctdb_db,
+ tevent_timeval_current_ofs(0, 0),
+ deferred_call_requeue, now_list);
+ }
+
+ if (delay_list != NULL) {
+ tevent_add_timer(rc->ctdb->ev, rc->ctdb_db,
+ tevent_timeval_current_ofs(1, 0),
+ deferred_call_requeue, delay_list);
+ }
+
return 0;
}
@@ -1918,19 +1944,18 @@ int ctdb_add_revoke_deferred_call(struct ctdb_context *ctdb, struct ctdb_db_cont
return -1;
}
- deferred_call = talloc(rc, struct revokechild_deferred_call);
+ deferred_call = talloc(ctdb_db, struct revokechild_deferred_call);
if (deferred_call == NULL) {
DEBUG(DEBUG_ERR,("Failed to allocate deferred call structure for revoking record\n"));
return -1;
}
deferred_call->ctdb = ctdb;
- deferred_call->hdr = hdr;
+ deferred_call->hdr = talloc_steal(deferred_call, hdr);
deferred_call->fn = fn;
deferred_call->ctx = call_context;
- talloc_set_destructor(deferred_call, deferred_call_destructor);
- talloc_steal(deferred_call, hdr);
+ DLIST_ADD(rc->deferred_call_list, deferred_call);
return 0;
}