diff options
author | Commander Dishwasher <roy.antman@gmail.com> | 2019-09-30 10:23:06 -0400 |
---|---|---|
committer | Dana Powers <dana.powers@gmail.com> | 2019-09-30 07:23:06 -0700 |
commit | 298cb0dbef58f6bb267235911b6ca86039bf8cda (patch) | |
tree | 0d9686680bf2488151047b2af716044337021b1b | |
parent | 0f929bd866f1526fc5d18068c31903f1ae3393d2 (diff) | |
download | kafka-python-298cb0dbef58f6bb267235911b6ca86039bf8cda.tar.gz |
Issue #1780 - Consumer hang indefinitely in fetcher._retrieve_offsets() due to topic deletion while rebalancing (#1782)
-rw-r--r-- | kafka/consumer/fetcher.py | 28 | ||||
-rw-r--r-- | kafka/coordinator/consumer.py | 6 | ||||
-rw-r--r-- | test/test_fetcher.py | 4 |
3 files changed, 26 insertions, 12 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 1c8ac51..f781d4c 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -235,14 +235,16 @@ class Fetcher(six.Iterator): log.debug("Resetting offset for partition %s to %s offset.", partition, strategy) offsets = self._retrieve_offsets({partition: timestamp}) - if partition not in offsets: - raise NoOffsetForPartitionError(partition) - offset = offsets[partition][0] - # we might lose the assignment while fetching the offset, - # so check it is still active - if self._subscriptions.is_assigned(partition): - self._subscriptions.seek(partition, offset) + if partition in offsets: + offset = offsets[partition][0] + + # we might lose the assignment while fetching the offset, + # so check it is still active + if self._subscriptions.is_assigned(partition): + self._subscriptions.seek(partition, offset) + else: + log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,)) def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): """Fetch offset for each partition passed in ``timestamps`` map. @@ -267,6 +269,9 @@ class Fetcher(six.Iterator): start_time = time.time() remaining_ms = timeout_ms while remaining_ms > 0: + if not timestamps: + return {} + future = self._send_offset_requests(timestamps) self._client.poll(future=future, timeout_ms=remaining_ms) @@ -283,6 +288,15 @@ class Fetcher(six.Iterator): if future.exception.invalid_metadata: refresh_future = self._client.cluster.request_update() self._client.poll(future=refresh_future, timeout_ms=remaining_ms) + + # Issue #1780 + # Recheck partition existance after after a successful metadata refresh + if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata): + log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existance") + unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata + if not self._client.cluster.leader_for_partition(unknown_partition): + log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, )) + timestamps.pop(unknown_partition) else: time.sleep(self.config['retry_backoff_ms'] / 1000.0) diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index 9d6f4eb..9b7a3cd 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -225,7 +225,11 @@ class ConsumerCoordinator(BaseCoordinator): self._subscription.needs_fetch_committed_offsets = True # update partition assignment - self._subscription.assign_from_subscribed(assignment.partitions()) + try: + self._subscription.assign_from_subscribed(assignment.partitions()) + except ValueError as e: + log.warning("%s. Probably due to a deleted topic. Requesting Re-join" % e) + self.request_rejoin() # give the assignor a chance to update internal state # based on the received assignment diff --git a/test/test_fetcher.py b/test/test_fetcher.py index a3eea09..b61a0f0 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -138,10 +138,6 @@ def test__reset_offset(fetcher, mocker): fetcher._subscriptions.need_offset_reset(tp) mocked = mocker.patch.object(fetcher, '_retrieve_offsets') - mocked.return_value = {} - with pytest.raises(NoOffsetForPartitionError): - fetcher._reset_offset(tp) - mocked.return_value = {tp: (1001, None)} fetcher._reset_offset(tp) assert not fetcher._subscriptions.assignment[tp].awaiting_reset |