summaryrefslogtreecommitdiff
path: root/ctdb/server/ctdb_recover.c
diff options
context:
space:
mode:
authorMartin Schwenke <martin@meltin.net>2016-01-19 20:33:58 +1100
committerAmitay Isaacs <amitay@samba.org>2016-04-28 09:39:16 +0200
commit64d557200ed63e1ff21cd0078e86957b689eff7e (patch)
tree9e4100d5d4e05ae0c46d58847a13632293b9d7ec /ctdb/server/ctdb_recover.c
parent0b0b954ff23149655640571801e2a3f572ebeadc (diff)
downloadsamba-64d557200ed63e1ff21cd0078e86957b689eff7e.tar.gz
ctdb-recovery: Reimplement ctdb_recovery_lock() using ctdb_cluster_mutex()
Replace the file descriptor for the recovery lock in the CTDB context with the cluster mutex handle, where non-NULL means locked. Attempting to take the recovery lock is now asynchronous and no longer blocks the recovery daemon. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Diffstat (limited to 'ctdb/server/ctdb_recover.c')
-rw-r--r--ctdb/server/ctdb_recover.c93
1 files changed, 53 insertions, 40 deletions
diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c
index 8314388930c..f58f9a6da2a 100644
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@ -1137,60 +1137,73 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
bool ctdb_recovery_have_lock(struct ctdb_context *ctdb)
{
- return ctdb->recovery_lock_fd != -1;
+ return (ctdb->recovery_lock_handle != NULL);
}
-/*
- try and get the recovery lock in shared storage - should only work
- on the recovery master recovery daemon. Anywhere else is a bug
- */
-bool ctdb_recovery_lock(struct ctdb_context *ctdb)
+struct hold_reclock_state {
+ bool done;
+ char status;
+};
+
+static void hold_reclock_handler(struct ctdb_context *ctdb,
+ char status,
+ double latency,
+ struct ctdb_cluster_mutex_handle *h,
+ void *private_data)
{
- struct flock lock;
+ struct hold_reclock_state *s =
+ (struct hold_reclock_state *) private_data;
+
+ switch (status) {
+ case '0':
+ ctdb->recovery_lock_handle = h;
+ break;
- ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file,
- O_RDWR|O_CREAT, 0600);
- if (ctdb->recovery_lock_fd == -1) {
+ case '1':
DEBUG(DEBUG_ERR,
- ("ctdb_recovery_lock: Unable to open %s - (%s)\n",
- ctdb->recovery_lock_file, strerror(errno)));
- return false;
+ ("Unable to take recovery lock - contention\n"));
+ talloc_free(h);
+ break;
+
+ default:
+ DEBUG(DEBUG_ERR, ("ERROR: when taking recovery lock\n"));
+ talloc_free(h);
}
- set_close_on_exec(ctdb->recovery_lock_fd);
-
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 1;
- lock.l_pid = 0;
-
- if (fcntl(ctdb->recovery_lock_fd, F_SETLK, &lock) != 0) {
- int saved_errno = errno;
- close(ctdb->recovery_lock_fd);
- ctdb->recovery_lock_fd = -1;
- /* Fail silently on these errors, since they indicate
- * lock contention, but log an error for any other
- * failure. */
- if (saved_errno != EACCES &&
- saved_errno != EAGAIN) {
- DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Failed to get "
- "recovery lock on '%s' - (%s)\n",
- ctdb->recovery_lock_file,
- strerror(saved_errno)));
- }
- return false;
+ s->done = true;
+ s->status = status;
+}
+
+bool ctdb_recovery_lock(struct ctdb_context *ctdb)
+{
+ struct ctdb_cluster_mutex_handle *h;
+ struct hold_reclock_state s = {
+ .done = false,
+ .status = '0',
+ };
+
+ h = ctdb_cluster_mutex(ctdb, 0);
+ if (h == NULL) {
+ return -1;
}
- return true;
+ h->handler = hold_reclock_handler;
+ h->private_data = &s;
+
+ while (!s.done) {
+ tevent_loop_once(ctdb->ev);
+ }
+
+ h->private_data = NULL;
+
+ return (s.status == '0');
}
void ctdb_recovery_unlock(struct ctdb_context *ctdb)
{
- if (ctdb->recovery_lock_fd != -1) {
+ if (ctdb->recovery_lock_handle != NULL) {
DEBUG(DEBUG_NOTICE, ("Releasing recovery lock\n"));
- close(ctdb->recovery_lock_fd);
- ctdb->recovery_lock_fd = -1;
+ TALLOC_FREE(ctdb->recovery_lock_handle);
}
}