diff options
| author | Keith Wall <kwall@apache.org> | 2014-11-13 16:24:06 +0000 |
|---|---|---|
| committer | Keith Wall <kwall@apache.org> | 2014-11-13 16:24:06 +0000 |
| commit | 6a46b152b4ca9d63c99cef6daa6e0d16dd9cb95e (patch) | |
| tree | 80ee21c4efe204582f762dbaaec1f8924d1f6349 /qpid/java/bdbstore/src | |
| parent | ff1ad8e3929aeecd5e3dcf6b8d039b4fb935b968 (diff) | |
| download | qpid-python-6a46b152b4ca9d63c99cef6daa6e0d16dd9cb95e.tar.gz | |
QPID-6225: [Java Broker] Reduce the frequency with with the failure to ping a remote node is reported
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@1639380 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'qpid/java/bdbstore/src')
| -rw-r--r-- | qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java | 42 |
1 files changed, 37 insertions, 5 deletions
diff --git a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java index 387742fbff..52a7e28f31 100644 --- a/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java +++ b/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java @@ -1496,6 +1496,8 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan private class RemoteNodeStateLearner implements Callable<Void> { + private static final long TIMEOUT_WARN_GAP = 1000 * 60 * 5; + private final Map<ReplicationNode, Long> _currentlyTimedOutNodes = new HashMap<>(); private Map<String, ReplicatedEnvironment.State> _previousGroupState = Collections.emptyMap(); private boolean _previousDesignatedPrimary; private int _previousElectableGroupOverride; @@ -1624,7 +1626,7 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan private Map<ReplicationNode, NodeState> discoverNodeStates(Collection<ReplicationNode> electableNodes) { final Map<ReplicationNode, NodeState> nodeStates = new HashMap<ReplicationNode, NodeState>(); - Set<Future<Void>> futures = new HashSet<Future<Void>>(); + Map<ReplicationNode, Future<Void>> futureMap = new HashMap<ReplicationNode, Future<Void>>(); for (final ReplicationNode node : electableNodes) { @@ -1649,14 +1651,24 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan return null; } }); - futures.add(future); + futureMap.put(node, future); } - for (Future<Void> future : futures) + boolean atLeastOneNodeTimesOut = false; + + for (Map.Entry<ReplicationNode, Future<Void>> entry : futureMap.entrySet()) { + ReplicationNode node = entry.getKey(); + String nodeName = node.getName(); + Future<Void> future = entry.getValue(); try { future.get(_remoteNodeMonitorInterval, TimeUnit.MILLISECONDS); + if (_currentlyTimedOutNodes.remove(node) != null) + { + LOGGER.warn("Node '" + nodeName + "' from group " + _configuration.getGroupName() + + " is responding again."); + } } catch (InterruptedException e) { @@ -1664,14 +1676,34 @@ public class ReplicatedEnvironmentFacade implements EnvironmentFacade, StateChan } catch (ExecutionException e) { - LOGGER.warn("Cannot update node state for group " + _configuration.getGroupName(), e.getCause()); + LOGGER.warn("Cannot determine state for node '" + nodeName + "' from group " + + _configuration.getGroupName(), e.getCause()); } catch (TimeoutException e) { - LOGGER.warn("Timeout whilst updating node state for group " + _configuration.getGroupName()); + atLeastOneNodeTimesOut = true; + if (! _currentlyTimedOutNodes.containsKey(node)) + { + LOGGER.warn("Timeout whilst determining state for node '" + nodeName + "' from group " + + _configuration.getGroupName()); + _currentlyTimedOutNodes.put(node, System.currentTimeMillis()); + } + else if (_currentlyTimedOutNodes.get(node) > (System.currentTimeMillis() + TIMEOUT_WARN_GAP)) + { + LOGGER.warn("Node '" + nodeName + "' from group " + + _configuration.getGroupName() + + " is still timing out."); + _currentlyTimedOutNodes.put(node, System.currentTimeMillis()); + } + future.cancel(true); } } + + if (!atLeastOneNodeTimesOut) + { + _currentlyTimedOutNodes.clear(); + } return nodeStates; } |
