diff options
author | Martin Schwenke <martin@meltin.net> | 2016-01-19 20:33:58 +1100 |
---|---|---|
committer | Amitay Isaacs <amitay@samba.org> | 2016-04-28 09:39:16 +0200 |
commit | 64d557200ed63e1ff21cd0078e86957b689eff7e (patch) | |
tree | 9e4100d5d4e05ae0c46d58847a13632293b9d7ec /ctdb/server/ctdb_recover.c | |
parent | 0b0b954ff23149655640571801e2a3f572ebeadc (diff) | |
download | samba-64d557200ed63e1ff21cd0078e86957b689eff7e.tar.gz |
ctdb-recovery: Reimplement ctdb_recovery_lock() using ctdb_cluster_mutex()
Replace the file descriptor for the recovery lock in the CTDB context
with the cluster mutex handle, where non-NULL means locked.
Attempting to take the recovery lock is now asynchronous and no longer
blocks the recovery daemon.
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb/server/ctdb_recover.c')
-rw-r--r-- | ctdb/server/ctdb_recover.c | 93 |
1 files changed, 53 insertions, 40 deletions
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 8314388930c..f58f9a6da2a 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -1137,60 +1137,73 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, bool ctdb_recovery_have_lock(struct ctdb_context *ctdb) { - return ctdb->recovery_lock_fd != -1; + return (ctdb->recovery_lock_handle != NULL); } -/* - try and get the recovery lock in shared storage - should only work - on the recovery master recovery daemon. Anywhere else is a bug - */ -bool ctdb_recovery_lock(struct ctdb_context *ctdb) +struct hold_reclock_state { + bool done; + char status; +}; + +static void hold_reclock_handler(struct ctdb_context *ctdb, + char status, + double latency, + struct ctdb_cluster_mutex_handle *h, + void *private_data) { - struct flock lock; + struct hold_reclock_state *s = + (struct hold_reclock_state *) private_data; + + switch (status) { + case '0': + ctdb->recovery_lock_handle = h; + break; - ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, - O_RDWR|O_CREAT, 0600); - if (ctdb->recovery_lock_fd == -1) { + case '1': DEBUG(DEBUG_ERR, - ("ctdb_recovery_lock: Unable to open %s - (%s)\n", - ctdb->recovery_lock_file, strerror(errno))); - return false; + ("Unable to take recovery lock - contention\n")); + talloc_free(h); + break; + + default: + DEBUG(DEBUG_ERR, ("ERROR: when taking recovery lock\n")); + talloc_free(h); } - set_close_on_exec(ctdb->recovery_lock_fd); - - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 1; - lock.l_pid = 0; - - if (fcntl(ctdb->recovery_lock_fd, F_SETLK, &lock) != 0) { - int saved_errno = errno; - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - /* Fail silently on these errors, since they indicate - * lock contention, but log an error for any other - * failure. */ - if (saved_errno != EACCES && - saved_errno != EAGAIN) { - DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Failed to get " - "recovery lock on '%s' - (%s)\n", - ctdb->recovery_lock_file, - strerror(saved_errno))); - } - return false; + s->done = true; + s->status = status; +} + +bool ctdb_recovery_lock(struct ctdb_context *ctdb) +{ + struct ctdb_cluster_mutex_handle *h; + struct hold_reclock_state s = { + .done = false, + .status = '0', + }; + + h = ctdb_cluster_mutex(ctdb, 0); + if (h == NULL) { + return -1; } - return true; + h->handler = hold_reclock_handler; + h->private_data = &s; + + while (!s.done) { + tevent_loop_once(ctdb->ev); + } + + h->private_data = NULL; + + return (s.status == '0'); } void ctdb_recovery_unlock(struct ctdb_context *ctdb) { - if (ctdb->recovery_lock_fd != -1) { + if (ctdb->recovery_lock_handle != NULL) { DEBUG(DEBUG_NOTICE, ("Releasing recovery lock\n")); - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; + TALLOC_FREE(ctdb->recovery_lock_handle); } } |