summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCommander Dishwasher <roy.antman@gmail.com>2019-09-30 10:23:06 -0400
committerDana Powers <dana.powers@gmail.com>2019-09-30 07:23:06 -0700
commit298cb0dbef58f6bb267235911b6ca86039bf8cda (patch)
tree0d9686680bf2488151047b2af716044337021b1b
parent0f929bd866f1526fc5d18068c31903f1ae3393d2 (diff)
downloadkafka-python-298cb0dbef58f6bb267235911b6ca86039bf8cda.tar.gz
Issue #1780 - Consumer hang indefinitely in fetcher._retrieve_offsets() due to topic deletion while rebalancing (#1782)
-rw-r--r--kafka/consumer/fetcher.py28
-rw-r--r--kafka/coordinator/consumer.py6
-rw-r--r--test/test_fetcher.py4
3 files changed, 26 insertions, 12 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py
index 1c8ac51..f781d4c 100644
--- a/kafka/consumer/fetcher.py
+++ b/kafka/consumer/fetcher.py
@@ -235,14 +235,16 @@ class Fetcher(six.Iterator):
log.debug("Resetting offset for partition %s to %s offset.",
partition, strategy)
offsets = self._retrieve_offsets({partition: timestamp})
- if partition not in offsets:
- raise NoOffsetForPartitionError(partition)
- offset = offsets[partition][0]
- # we might lose the assignment while fetching the offset,
- # so check it is still active
- if self._subscriptions.is_assigned(partition):
- self._subscriptions.seek(partition, offset)
+ if partition in offsets:
+ offset = offsets[partition][0]
+
+ # we might lose the assignment while fetching the offset,
+ # so check it is still active
+ if self._subscriptions.is_assigned(partition):
+ self._subscriptions.seek(partition, offset)
+ else:
+ log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,))
def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")):
"""Fetch offset for each partition passed in ``timestamps`` map.
@@ -267,6 +269,9 @@ class Fetcher(six.Iterator):
start_time = time.time()
remaining_ms = timeout_ms
while remaining_ms > 0:
+ if not timestamps:
+ return {}
+
future = self._send_offset_requests(timestamps)
self._client.poll(future=future, timeout_ms=remaining_ms)
@@ -283,6 +288,15 @@ class Fetcher(six.Iterator):
if future.exception.invalid_metadata:
refresh_future = self._client.cluster.request_update()
self._client.poll(future=refresh_future, timeout_ms=remaining_ms)
+
+ # Issue #1780
+ # Recheck partition existance after after a successful metadata refresh
+ if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata):
+ log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existance")
+ unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata
+ if not self._client.cluster.leader_for_partition(unknown_partition):
+ log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, ))
+ timestamps.pop(unknown_partition)
else:
time.sleep(self.config['retry_backoff_ms'] / 1000.0)
diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py
index 9d6f4eb..9b7a3cd 100644
--- a/kafka/coordinator/consumer.py
+++ b/kafka/coordinator/consumer.py
@@ -225,7 +225,11 @@ class ConsumerCoordinator(BaseCoordinator):
self._subscription.needs_fetch_committed_offsets = True
# update partition assignment
- self._subscription.assign_from_subscribed(assignment.partitions())
+ try:
+ self._subscription.assign_from_subscribed(assignment.partitions())
+ except ValueError as e:
+ log.warning("%s. Probably due to a deleted topic. Requesting Re-join" % e)
+ self.request_rejoin()
# give the assignor a chance to update internal state
# based on the received assignment
diff --git a/test/test_fetcher.py b/test/test_fetcher.py
index a3eea09..b61a0f0 100644
--- a/test/test_fetcher.py
+++ b/test/test_fetcher.py
@@ -138,10 +138,6 @@ def test__reset_offset(fetcher, mocker):
fetcher._subscriptions.need_offset_reset(tp)
mocked = mocker.patch.object(fetcher, '_retrieve_offsets')
- mocked.return_value = {}
- with pytest.raises(NoOffsetForPartitionError):
- fetcher._reset_offset(tp)
-
mocked.return_value = {tp: (1001, None)}
fetcher._reset_offset(tp)
assert not fetcher._subscriptions.assignment[tp].awaiting_reset