summaryrefslogtreecommitdiff
path: root/qpid/java/bdbstore/src
diff options
context:
space:
mode:
authorKeith Wall <kwall@apache.org>2014-11-13 16:24:06 +0000
committerKeith Wall <kwall@apache.org>2014-11-13 16:24:06 +0000
commit6a46b152b4ca9d63c99cef6daa6e0d16dd9cb95e (patch)
tree80ee21c4efe204582f762dbaaec1f8924d1f6349 /qpid/java/bdbstore/src
parentff1ad8e3929aeecd5e3dcf6b8d039b4fb935b968 (diff)
downloadqpid-python-6a46b152b4ca9d63c99cef6daa6e0d16dd9cb95e.tar.gz
QPID-6225: [Java Broker] Reduce the frequency with with the failure to ping a remote node is reported
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1639380 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'qpid/java/bdbstore/src')
-rw-r--r--qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java42
1 files changed, 37 insertions, 5 deletions
diff --git a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
index 387742fbff..52a7e28f31 100644
--- a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
+++ b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
@@ -1496,6 +1496,8 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan
private class RemoteNodeStateLearner implements Callable<Void>
{
+ private static final long TIMEOUT_WARN_GAP = 1000 * 60 * 5;
+ private final Map<ReplicationNode, Long> _currentlyTimedOutNodes = new HashMap<>();
private Map<String, ReplicatedEnvironment.State> _previousGroupState = Collections.emptyMap();
private boolean _previousDesignatedPrimary;
private int _previousElectableGroupOverride;
@@ -1624,7 +1626,7 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan
private Map<ReplicationNode, NodeState> discoverNodeStates(Collection<ReplicationNode> electableNodes)
{
final Map<ReplicationNode, NodeState> nodeStates = new HashMap<ReplicationNode, NodeState>();
- Set<Future<Void>> futures = new HashSet<Future<Void>>();
+ Map<ReplicationNode, Future<Void>> futureMap = new HashMap<ReplicationNode, Future<Void>>();
for (final ReplicationNode node : electableNodes)
{
@@ -1649,14 +1651,24 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan
return null;
}
});
- futures.add(future);
+ futureMap.put(node, future);
}
- for (Future<Void> future : futures)
+ boolean atLeastOneNodeTimesOut = false;
+
+ for (Map.Entry<ReplicationNode, Future<Void>> entry : futureMap.entrySet())
{
+ ReplicationNode node = entry.getKey();
+ String nodeName = node.getName();
+ Future<Void> future = entry.getValue();
try
{
future.get(_remoteNodeMonitorInterval, TimeUnit.MILLISECONDS);
+ if (_currentlyTimedOutNodes.remove(node) != null)
+ {
+ LOGGER.warn("Node '" + nodeName + "' from group " + _configuration.getGroupName()
+ + " is responding again.");
+ }
}
catch (InterruptedException e)
{
@@ -1664,14 +1676,34 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan
}
catch (ExecutionException e)
{
- LOGGER.warn("Cannot update node state for group " + _configuration.getGroupName(), e.getCause());
+ LOGGER.warn("Cannot determine state for node '" + nodeName + "' from group "
+ + _configuration.getGroupName(), e.getCause());
}
catch (TimeoutException e)
{
- LOGGER.warn("Timeout whilst updating node state for group " + _configuration.getGroupName());
+ atLeastOneNodeTimesOut = true;
+ if (! _currentlyTimedOutNodes.containsKey(node))
+ {
+ LOGGER.warn("Timeout whilst determining state for node '" + nodeName + "' from group "
+ + _configuration.getGroupName());
+ _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+ }
+ else if (_currentlyTimedOutNodes.get(node) > (System.currentTimeMillis() + TIMEOUT_WARN_GAP))
+ {
+ LOGGER.warn("Node '" + nodeName + "' from group "
+ + _configuration.getGroupName()
+ + " is still timing out.");
+ _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+ }
+
future.cancel(true);
}
}
+
+ if (!atLeastOneNodeTimesOut)
+ {
+ _currentlyTimedOutNodes.clear();
+ }
return nodeStates;
}