From 9ad9300d032b5e0aba5af4f7affd81600c9f8a5d Mon Sep 17 00:00:00 2001 From: Alan Conway Date: Fri, 11 Jan 2013 20:34:19 +0000 Subject: QPID-4516: Sporadic failure in ha_tests test_failover_send_receive Several fixes were required in the code to correct this problem: - Missing break statement in switch. - Remove unused function HaBroker::resetMembership - Abort connection of timed-out backups so they can attempt to reconnect. - New primary resets membership before allowing backups to connect. - Test for and ignore double-promotion. - HaBroker: dynamic logPrefix() shows status. Made status atomic for efficient access for log messages. - Update primary status in membership. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1432273 13f79535-47bb-0310-9956-ffa450edef68 --- qpid/cpp/src/tests/ha_tests.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'qpid/cpp/src/tests') diff --git a/qpid/cpp/src/tests/ha_tests.py b/qpid/cpp/src/tests/ha_tests.py index fdbd8a153b..1725c594de 100755 --- a/qpid/cpp/src/tests/ha_tests.py +++ b/qpid/cpp/src/tests/ha_tests.py @@ -1068,13 +1068,15 @@ class RecoveryTests(HaBrokerTest): l = LogLevel(ERROR) # Hide expected WARNING log messages from failover. try: # We don't want backups to time out for this test, set long timeout. - cluster = HaCluster(self, 4, args=["--ha-backup-timeout=100000"]); + cluster = HaCluster(self, 4, args=["--ha-backup-timeout=120"]); # Wait for the primary to be ready cluster[0].wait_status("active") + for b in cluster[1:4]: b.wait_status("ready") # Create a queue before the failure. s1 = cluster.connect(0).session().sender("q1;{create:always}") for b in cluster: b.wait_backup("q1") for i in xrange(100): s1.send(str(i)) + # Kill primary and 2 backups cluster[3].wait_status("ready") for i in [0,1,2]: cluster.kill(i, False) @@ -1091,14 +1093,16 @@ class RecoveryTests(HaBrokerTest): s2 = cluster.connect(3).session().sender("q2;{create:always}") # Verify that messages sent are not completed - for i in xrange(100,200): s1.send(str(i), sync=False); s2.send(str(i), sync=False) + for i in xrange(100,200): + s1.send(str(i), sync=False); + s2.send(str(i), sync=False) assertSyncTimeout(s1) self.assertEqual(s1.unsettled(), 100) assertSyncTimeout(s2) self.assertEqual(s2.unsettled(), 100) # Verify we can receive even if sending is on hold: - cluster[3].assert_browse("q1", [str(i) for i in range(100)+range(100,200)]) + cluster[3].assert_browse("q1", [str(i) for i in range(200)]) # Restart backups, verify queues are released only when both backups are up cluster.restart(1) @@ -1106,11 +1110,10 @@ class RecoveryTests(HaBrokerTest): self.assertEqual(s1.unsettled(), 100) assertSyncTimeout(s2) self.assertEqual(s2.unsettled(), 100) - self.assertEqual(cluster[3].ha_status(), "recovering") cluster.restart(2) # Verify everything is up to date and active - def settled(sender): sender.sync(); return sender.unsettled() == 0; + def settled(sender): sender.sync(timeout=1); return sender.unsettled() == 0; assert retry(lambda: settled(s1)), "Unsetttled=%s"%(s1.unsettled()) assert retry(lambda: settled(s2)), "Unsetttled=%s"%(s2.unsettled()) cluster[1].assert_browse_backup("q1", [str(i) for i in range(100)+range(100,200)]) -- cgit v1.2.1