From 913c1dff25da868988b337e04f5cf5067bea3590 Mon Sep 17 00:00:00 2001 From: Alan Conway Date: Thu, 4 Jul 2013 15:30:19 +0000 Subject: QPID-4944: HA Sporadic failure: test_failover_send_receive Test failing if run as: ha_tests.py -DDURATION=2 AssertionError: Stalled test0 waiting for 248, sent 1228 The problem was a missing call to notify() when a ReplicatingSubscription skipped a message. That resulted in very long (>1s) delays between skipped messages which caused the test to time out. Changes: - ReplicatingSubscription::deliver call notify() to keep consumer active. - Re-enable test_failover_send_receive. - Increase default credit for replicating subscription to match qpid-send. - Rename ReplicatingSubscription::unacked as unready, clearer meaning. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1499789 13f79535-47bb-0310-9956-ffa450edef68 --- qpid/cpp/src/tests/brokertest.py | 3 ++- qpid/cpp/src/tests/ha_tests.py | 10 ++++------ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'qpid/cpp/src/tests') diff --git a/qpid/cpp/src/tests/brokertest.py b/qpid/cpp/src/tests/brokertest.py index 220c5f4367..15372b312d 100644 --- a/qpid/cpp/src/tests/brokertest.py +++ b/qpid/cpp/src/tests/brokertest.py @@ -578,7 +578,7 @@ class NumberedReceiver(Thread): """ def __init__(self, broker, sender=None, queue="test-queue", connection_options=RECONNECT_OPTIONS, - failover_updates=True, url=None): + failover_updates=True, url=None, args=[]): """ sender: enable flow control. Call sender.received(n) for each message received. """ @@ -591,6 +591,7 @@ class NumberedReceiver(Thread): "--forever" ] if failover_updates: cmd += [ "--failover-updates" ] + cmd += args self.receiver = self.test.popen( cmd, expect=EXPECT_RUNNING, stdout=PIPE) self.lock = Lock() diff --git a/qpid/cpp/src/tests/ha_tests.py b/qpid/cpp/src/tests/ha_tests.py index 2235e87dc5..f3360658d3 100755 --- a/qpid/cpp/src/tests/ha_tests.py +++ b/qpid/cpp/src/tests/ha_tests.py @@ -927,9 +927,7 @@ class LongTests(HaBrokerTest): if d: return float(d)*60 else: return 3 # Default is to be quick - # FIXME aconway 2013-06-27: skip this test pending a fix for - # https://issues.apache.org/jira/browse/QPID-4944 - def skip_test_failover_send_receive(self): + def test_failover_send_receive(self): """Test failover with continuous send-receive""" brokers = HaCluster(self, 3) @@ -937,7 +935,7 @@ class LongTests(HaBrokerTest): n = 10 senders = [NumberedSender(brokers[0], url=brokers.url, max_depth=1024, failover_updates=False, - queue="test%s"%(i)) for i in xrange(n)] + queue="test%s"%(i)) for i in xrange(n)] receivers = [NumberedReceiver(brokers[0], url=brokers.url, sender=senders[i], failover_updates=False, queue="test%s"%(i)) for i in xrange(n)] @@ -966,7 +964,7 @@ class LongTests(HaBrokerTest): # one or two backups are running, for s in senders: s.sender.assert_running() for r in receivers: r.receiver.assert_running() - checkpoint = [ r.received+100 for r in receivers ] + checkpoint = [ r.received+10 for r in receivers ] victim = random.choice([0,1,2,primary]) # Give the primary a better chance. if victim == primary: # Don't kill primary till it is active and the next @@ -982,7 +980,7 @@ class LongTests(HaBrokerTest): # Make sure we are not stalled map(wait_passed, receivers, checkpoint) # Run another checkpoint to ensure things work in this configuration - checkpoint = [ r.received+100 for r in receivers ] + checkpoint = [ r.received+10 for r in receivers ] map(wait_passed, receivers, checkpoint) i += 1 except: -- cgit v1.2.1