ctdb-recoverd: Use race for cluster lock as election when lock is enabled

If the cluster is partitioned then nodes in one partition can not take the lock anyway, so election is pointless. It just introduces unnecessary corner cases. Instead just race for the lock. When a node notices a lack of leader and notifies other nodes of an election via an unknown leader broadcast, the cluster lock election is hooked into this broadcast. The test needs to be updated because losing the cluster lock can now result in a leadership change. Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
author: Martin Schwenke <martin@meltin.net> 2020-03-18 15:14:39 +1100
committer: Martin Schwenke <martins@samba.org> 2022-01-17 10:21:33 +0000
commit: 73555e8248aff683b6cb3a02262a66ab52f2c665 (patch)
tree: 58a11768369bf2c80e5abaf38360d2d4b18c96ed /ctdb
parent: 938d64c8ff3d1776c2d5959714c4c11eba7278c4 (diff)
download: samba-73555e8248aff683b6cb3a02262a66ab52f2c665.tar.gz
2 files changed, 48 insertions, 2 deletions
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c
index 51c4693c974..c2a48a07b4e 100644
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -1831,6 +1831,37 @@ static void election_handler(uint64_t srvid, TDB_DATA data, void *private_data)
 	return;
 }
 
+static void cluster_lock_election(struct ctdb_recoverd *rec)
+{
+	bool ok;
+
+	if (!this_node_can_be_leader(rec)) {
+		if (cluster_lock_held(rec)) {
+			cluster_lock_release(rec);
+		}
+		return;
+	}
+
+	/*
+	 * Don't need to unconditionally release the lock and then
+	 * attempt to retake it.  This provides stability.
+	 */
+	if (cluster_lock_held(rec)) {
+		return;
+	}
+
+	rec->leader = CTDB_UNKNOWN_PNN;
+	rec->election_in_progress = true;
+
+	ok = cluster_lock_take(rec);
+	if (ok) {
+		rec->leader = rec->pnn;
+		D_WARNING("Took cluster lock, leader=%"PRIu32"\n", rec->leader);
+	}
+
+	rec->election_in_progress = false;
+}
+
 /*
   force the start of the election process
  */
@@ -1848,6 +1879,11 @@ static void force_election(struct ctdb_recoverd *rec)
 		return;
 	}
 
+	if (cluster_lock_enabled(rec)) {
+		cluster_lock_election(rec);
+		return;
+	}
+
 	talloc_free(rec->election_timeout);
 	rec->election_in_progress = true;
 	rec->election_timeout = tevent_add_timer(
@@ -2007,12 +2043,23 @@ static void leader_handler(uint64_t srvid, TDB_DATA data, void *private_data)
 	}
 
 	if (pnn == CTDB_UNKNOWN_PNN) {
+		bool was_election_in_progress = rec->election_in_progress;
+
 		/*
 		 * Leader broadcast timeout was cancelled above - stop
 		 * main loop from restarting it until election is
 		 * complete
 		 */
 		rec->election_in_progress = true;
+
+		/*
+		 * This is the only notification for a cluster lock
+		 * election, so handle it here...
+		 */
+		if (cluster_lock_enabled(rec) && !was_election_in_progress) {
+			cluster_lock_election(rec);
+		}
+
 		return;
 	}
 
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
index 36b72818f24..35363d11f1d 100755
--- a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
+++ b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
@@ -82,8 +82,7 @@ leader_old="$leader"
 leader_get "$test_node"
 
 if [ "$leader" != "$leader_old" ] ; then
-	ctdb_test_fail \
-		"BAD: Leader has changed to node ${leader}"
+	echo "OK: Leader has changed to node ${leader_new}"
 fi
 echo "GOOD: Leader is still node ${leader}"
 echo
author	Martin Schwenke <martin@meltin.net>	2020-03-18 15:14:39 +1100
committer	Martin Schwenke <martins@samba.org>	2022-01-17 10:21:33 +0000
commit	73555e8248aff683b6cb3a02262a66ab52f2c665 (patch)
tree	58a11768369bf2c80e5abaf38360d2d4b18c96ed /ctdb
parent	938d64c8ff3d1776c2d5959714c4c11eba7278c4 (diff)
download	samba-73555e8248aff683b6cb3a02262a66ab52f2c665.tar.gz