ctdb-tests: Add recovery record resurrection test for volatile databases

Ensure that deleted records and vacuumed records are not resurrected from recently inactive nodes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com> (cherry picked from commit dcc9935995a5a7b40df64653a605d1af89075bd1)
author: Martin Schwenke <martin@meltin.net> 2018-09-24 16:17:19 +1000
committer: Karolin Seeger <kseeger@samba.org> 2018-10-10 15:51:56 +0200
commit: 4e0071465b59d1f3c1bf60532ff2666927bf3c83 (patch)
tree: 026128b882d179984ffd73917dfe1109a2260c25 /ctdb
parent: d8087c060389ff017c307f5e3249019bbabaa8f1 (diff)
download: samba-4e0071465b59d1f3c1bf60532ff2666927bf3c83.tar.gz
1 files changed, 84 insertions, 0 deletions
diff --git a/ctdb/tests/simple/69_recovery_resurrect_deleted.sh b/ctdb/tests/simple/69_recovery_resurrect_deleted.sh
new file mode 100755
index 00000000000..95e79fdd491
--- /dev/null
+++ b/ctdb/tests/simple/69_recovery_resurrect_deleted.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+test_info()
+{
+    cat <<EOF
+Ensure recovery doesn't resurrect deleted records from recently inactive nodes
+EOF
+}
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+ctdb_test_init "$@"
+
+set -e
+
+cluster_is_healthy
+
+# Reset configuration
+ctdb_restart_when_done
+
+testdb="rec_test.tdb"
+
+echo "Getting list of nodes..."
+try_command_on_node -v any "onnode -pq all ctdb pnn | grep '^[0-9][0-9]*$'"
+
+first=$(echo "$out" | sed -n -e '1p')
+second=$(echo "$out" | sed -n -e '2p')
+notfirst=$(echo "$out" | tail -n +2)
+
+echo "Create/wipe test database ${testdb}"
+try_command_on_node $first $CTDB attach "$testdb"
+try_command_on_node $first $CTDB wipedb "$testdb"
+
+echo "store key(test1) data(value1)"
+try_command_on_node $first $CTDB writekey "$testdb" test1 value1
+
+echo "Migrate key(test1) to all nodes"
+try_command_on_node all $CTDB readkey "$testdb" test1
+
+echo "Stop node ${first}"
+try_command_on_node $first $CTDB stop
+wait_until_node_has_status $first stopped
+
+echo "Delete key(test1)"
+try_command_on_node $second $CTDB deletekey "$testdb" test1
+
+database_has_zero_records ()
+{
+	local n
+	for n in $notfirst ; do
+		try_command_on_node $n $CTDB cattdb "$testdb"
+		if echo "$out" | grep -q '^key(' ; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+echo "Get vacuum interval"
+try_command_on_node -v $second $CTDB getvar VacuumInterval
+vacuum_interval="${out#* = }"
+
+echo "Wait until vacuuming deletes the record on active nodes"
+# Why 4?  Steps are:
+# 1. Original node processes delete queue, asks lmaster to fetch
+# 2. lmaster recoverd fetches
+# 3. lmaster processes delete queue
+# If vacuuming is just missed then need an extra interval.
+wait_until $((vacuum_interval * 4)) database_has_zero_records
+
+echo "Continue node ${first}"
+try_command_on_node $first $CTDB continue
+wait_until_node_has_status $first notstopped
+
+echo "Get database contents"
+try_command_on_node -v $first $CTDB catdb "$testdb"
+
+if echo "$out" | grep -q '^key(' ; then
+	echo "BAD: Deleted record has been resurrected"
+	exit 1
+fi
+
+echo "GOOD: Deleted record is still gone"
author	Martin Schwenke <martin@meltin.net>	2018-09-24 16:17:19 +1000
committer	Karolin Seeger <kseeger@samba.org>	2018-10-10 15:51:56 +0200
commit	4e0071465b59d1f3c1bf60532ff2666927bf3c83 (patch)
tree	026128b882d179984ffd73917dfe1109a2260c25 /ctdb
parent	d8087c060389ff017c307f5e3249019bbabaa8f1 (diff)
download	samba-4e0071465b59d1f3c1bf60532ff2666927bf3c83.tar.gz