From a347fd1b7adc8289812e05f98c2d0f85953c62b2 Mon Sep 17 00:00:00 2001 From: Michael Goulish Date: Tue, 15 Jun 2010 14:07:53 +0000 Subject: This change is a partial fix for the problem of long cluster failovers. This change addresses part of that problem: long newbie-broker catchup times. At this time, it looks as though there are two distinct mechanisms causing these long catchup times. They roughly divide into a population of times less than 20 seconds, and a population of much longer times (up to hundreds of seconds). This fix addresses only the shorter times. In a test of 25 failovers before and after the change, it reduced those times by an average of nearly 50%. A T-test on the times indicates that the likelihood of the observed change happening randomly is less than 2%. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@954895 13f79535-47bb-0310-9956-ffa450edef68 --- qpid/cpp/src/qpid/cluster/Cluster.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'qpid/cpp/src') diff --git a/qpid/cpp/src/qpid/cluster/Cluster.cpp b/qpid/cpp/src/qpid/cluster/Cluster.cpp index 6b9fceccd9..9ca6fbf2bf 100644 --- a/qpid/cpp/src/qpid/cluster/Cluster.cpp +++ b/qpid/cpp/src/qpid/cluster/Cluster.cpp @@ -444,8 +444,10 @@ void Cluster::deliveredEvent(const Event& e) { EventFrame ef(e, e.getFrame()); // Stop the deliverEventQueue on update offers. // This preserves the connection decoder fragments for an update. + // Only do this for the two brokers that are directly involved in this + // offer: the one making the offer, or the one receiving it. const ClusterUpdateOfferBody* offer = castUpdateOffer(ef.frame.getBody()); - if (offer) { + if (offer && ( e.getMemberId() == self || MemberId(offer->getUpdatee()) == self) ) { QPID_LOG(info, *this << " stall for update offer from " << e.getMemberId() << " to " << MemberId(offer->getUpdatee())); deliverEventQueue.stop(); -- cgit v1.2.1