From 6a46b152b4ca9d63c99cef6daa6e0d16dd9cb95e Mon Sep 17 00:00:00 2001 From: Keith Wall Date: Thu, 13 Nov 2014 16:24:06 +0000 Subject: QPID-6225: [Java Broker] Reduce the frequency with with the failure to ping a remote node is reported git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1639380 13f79535-47bb-0310-9956-ffa450edef68 --- .../replication/ReplicatedEnvironmentFacade.java | 42 +++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'qpid/java/bdbstore') diff --git a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java index 387742fbff..52a7e28f31 100644 --- a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java +++ b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java @@ -1496,6 +1496,8 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan private class RemoteNodeStateLearner implements Callable { + private static final long TIMEOUT_WARN_GAP = 1000 * 60 * 5; + private final Map _currentlyTimedOutNodes = new HashMap<>(); private Map _previousGroupState = Collections.emptyMap(); private boolean _previousDesignatedPrimary; private int _previousElectableGroupOverride; @@ -1624,7 +1626,7 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan private Map discoverNodeStates(Collection electableNodes) { final Map nodeStates = new HashMap(); - Set> futures = new HashSet>(); + Map> futureMap = new HashMap>(); for (final ReplicationNode node : electableNodes) { @@ -1649,14 +1651,24 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan return null; } }); - futures.add(future); + futureMap.put(node, future); } - for (Future future : futures) + boolean atLeastOneNodeTimesOut = false; + + for (Map.Entry> entry : futureMap.entrySet()) { + ReplicationNode node = entry.getKey(); + String nodeName = node.getName(); + Future future = entry.getValue(); try { future.get(_remoteNodeMonitorInterval, TimeUnit.MILLISECONDS); + if (_currentlyTimedOutNodes.remove(node) != null) + { + LOGGER.warn("Node '" + nodeName + "' from group " + _configuration.getGroupName() + + " is responding again."); + } } catch (InterruptedException e) { @@ -1664,14 +1676,34 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan } catch (ExecutionException e) { - LOGGER.warn("Cannot update node state for group " + _configuration.getGroupName(), e.getCause()); + LOGGER.warn("Cannot determine state for node '" + nodeName + "' from group " + + _configuration.getGroupName(), e.getCause()); } catch (TimeoutException e) { - LOGGER.warn("Timeout whilst updating node state for group " + _configuration.getGroupName()); + atLeastOneNodeTimesOut = true; + if (! _currentlyTimedOutNodes.containsKey(node)) + { + LOGGER.warn("Timeout whilst determining state for node '" + nodeName + "' from group " + + _configuration.getGroupName()); + _currentlyTimedOutNodes.put(node, System.currentTimeMillis()); + } + else if (_currentlyTimedOutNodes.get(node) > (System.currentTimeMillis() + TIMEOUT_WARN_GAP)) + { + LOGGER.warn("Node '" + nodeName + "' from group " + + _configuration.getGroupName() + + " is still timing out."); + _currentlyTimedOutNodes.put(node, System.currentTimeMillis()); + } + future.cancel(true); } } + + if (!atLeastOneNodeTimesOut) + { + _currentlyTimedOutNodes.clear(); + } return nodeStates; } -- cgit v1.2.1