summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDana Powers <dana.powers@gmail.com>2016-07-14 23:57:04 -0700
committerDana Powers <dana.powers@gmail.com>2016-07-14 23:57:04 -0700
commitad13500cd1276b71bd88fbe3836d7982a6bf1ce3 (patch)
tree102f7c3250b4c995a1e29135fb78aebce59d3b49
parented6098c272f90edb375b3ac39c2556338c810a35 (diff)
downloadkafka-python-ad13500cd1276b71bd88fbe3836d7982a6bf1ce3.tar.gz
Add skip_double_compressed_messages option to KafkaConsumerunrecurse_unpack_message_set
-rw-r--r--kafka/consumer/fetcher.py12
-rw-r--r--kafka/consumer/group.py8
2 files changed, 20 insertions, 0 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py
index 62e28d6..34ff4cb 100644
--- a/kafka/consumer/fetcher.py
+++ b/kafka/consumer/fetcher.py
@@ -39,6 +39,7 @@ class Fetcher(six.Iterator):
'fetch_max_wait_ms': 500,
'max_partition_fetch_bytes': 1048576,
'check_crcs': True,
+ 'skip_double_compressed_messages': False,
'iterator_refetch_records': 1, # undocumented -- interface may change
'api_version': (0, 8, 0),
}
@@ -71,6 +72,13 @@ class Fetcher(six.Iterator):
consumed. This ensures no on-the-wire or on-disk corruption to
the messages occurred. This check adds some overhead, so it may
be disabled in cases seeking extreme performance. Default: True
+ skip_double_compressed_messages (bool): A bug in KafkaProducer
+ caused some messages to be corrupted via double-compression.
+ By default, the fetcher will return the messages as a compressed
+ blob of bytes with a single offset, i.e. how the message was
+ actually published to the cluster. If you prefer to have the
+ fetcher automatically detect corrupt messages and skip them,
+ set this option to True. Default: False.
"""
self.config = copy.copy(self.DEFAULT_CONFIG)
for key in self.config:
@@ -368,6 +376,10 @@ class Fetcher(six.Iterator):
' double-compressed. This should not'
' happen -- check your producers!',
tp, offset)
+ if self.config['skip_double_compressed_messages']:
+ log.warning('Skipping double-compressed message at'
+ ' %s %d', tp, offset)
+ continue
if msg.magic > 0:
last_offset, _, _ = inner_mset[-1]
diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py
index 8509999..7fe509a 100644
--- a/kafka/consumer/group.py
+++ b/kafka/consumer/group.py
@@ -123,6 +123,13 @@ class KafkaConsumer(six.Iterator):
consumer_timeout_ms (int): number of milliseconds to block during
message iteration before raising StopIteration (i.e., ending the
iterator). Default -1 (block forever).
+ skip_double_compressed_messages (bool): A bug in KafkaProducer <= 1.2.4
+ caused some messages to be corrupted via double-compression.
+ By default, the fetcher will return these messages as a compressed
+ blob of bytes with a single offset, i.e. how the message was
+ actually published to the cluster. If you prefer to have the
+ fetcher automatically detect corrupt messages and skip them,
+ set this option to True. Default: False.
security_protocol (str): Protocol used to communicate with brokers.
Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT.
ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping
@@ -189,6 +196,7 @@ class KafkaConsumer(six.Iterator):
'send_buffer_bytes': None,
'receive_buffer_bytes': None,
'consumer_timeout_ms': -1,
+ 'skip_double_compressed_messages': False,
'security_protocol': 'PLAINTEXT',
'ssl_context': None,
'ssl_check_hostname': True,