diff options
author | Dana Powers <dana.powers@gmail.com> | 2016-05-22 10:28:56 -0700 |
---|---|---|
committer | Dana Powers <dana.powers@gmail.com> | 2016-05-22 10:28:56 -0700 |
commit | 7941a2ac7ec6663f08c6291d92746eae9f792916 (patch) | |
tree | f3b75dcea569e28f1685500af53bff34514374b9 | |
parent | 92f859d8da5c3f35ab3738ef2725fff05b6cf57f (diff) | |
parent | aa5bde6ac382966395f8f1466c46d55cf28c2cce (diff) | |
download | kafka-python-7941a2ac7ec6663f08c6291d92746eae9f792916.tar.gz |
Merge pull request #693 from dpkp/message_format_v1
Message format v1 (KIP-31 / KIP-32)
-rw-r--r-- | kafka/consumer/fetcher.py | 24 | ||||
-rw-r--r-- | kafka/producer/buffer.py | 6 | ||||
-rw-r--r-- | kafka/producer/future.py | 18 | ||||
-rw-r--r-- | kafka/producer/kafka.py | 15 | ||||
-rw-r--r-- | kafka/producer/record_accumulator.py | 32 | ||||
-rw-r--r-- | kafka/producer/sender.py | 9 | ||||
-rw-r--r-- | kafka/protocol/legacy.py | 8 | ||||
-rw-r--r-- | kafka/protocol/message.py | 132 | ||||
-rw-r--r-- | kafka/protocol/types.py | 47 | ||||
-rw-r--r-- | test/test_consumer_group.py | 26 | ||||
-rw-r--r-- | test/test_protocol.py | 984 | ||||
-rw-r--r-- | test/test_protocol_legacy.py | 848 |
12 files changed, 1188 insertions, 961 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 3a5e37e..bf59775 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -19,7 +19,7 @@ log = logging.getLogger(__name__) ConsumerRecord = collections.namedtuple("ConsumerRecord", - ["topic", "partition", "offset", "key", "value"]) + ["topic", "partition", "offset", "timestamp", "timestamp_type", "key", "value"]) class NoOffsetForPartitionError(Errors.KafkaError): @@ -351,17 +351,33 @@ class Fetcher(six.Iterator): position) return dict(drained) - def _unpack_message_set(self, tp, messages): + def _unpack_message_set(self, tp, messages, relative_offset=0): try: for offset, size, msg in messages: if self.config['check_crcs'] and not msg.validate_crc(): raise Errors.InvalidMessageError(msg) elif msg.is_compressed(): - for record in self._unpack_message_set(tp, msg.decompress()): + mset = msg.decompress() + # new format uses relative offsets for compressed messages + if msg.magic > 0: + last_offset, _, _ = mset[-1] + relative = offset - last_offset + else: + relative = 0 + for record in self._unpack_message_set(tp, mset, relative): yield record else: + # Message v1 adds timestamp + if msg.magic > 0: + timestamp = msg.timestamp + timestamp_type = msg.timestamp_type + else: + timestamp = timestamp_type = None key, value = self._deserialize(msg) - yield ConsumerRecord(tp.topic, tp.partition, offset, key, value) + yield ConsumerRecord(tp.topic, tp.partition, + offset + relative_offset, + timestamp, timestamp_type, + key, value) # If unpacking raises StopIteration, it is erroneously # caught by the generator. We want all exceptions to be raised # back to the user. See Issue 545 diff --git a/kafka/producer/buffer.py b/kafka/producer/buffer.py index b2ac747..ba9b5db 100644 --- a/kafka/producer/buffer.py +++ b/kafka/producer/buffer.py @@ -29,7 +29,7 @@ class MessageSetBuffer(object): 'snappy': (has_snappy, snappy_encode, Message.CODEC_SNAPPY), 'lz4': (has_lz4, lz4_encode, Message.CODEC_LZ4), } - def __init__(self, buf, batch_size, compression_type=None): + def __init__(self, buf, batch_size, compression_type=None, message_version=0): if compression_type is not None: assert compression_type in self._COMPRESSORS, 'Unrecognized compression type' checker, encoder, attributes = self._COMPRESSORS[compression_type] @@ -40,6 +40,7 @@ class MessageSetBuffer(object): self._compressor = None self._compression_attributes = None + self._message_version = message_version self._buffer = buf # Init MessageSetSize to 0 -- update on close self._buffer.seek(0) @@ -85,7 +86,8 @@ class MessageSetBuffer(object): # TODO: avoid copies with bytearray / memoryview self._buffer.seek(4) msg = Message(self._compressor(self._buffer.read()), - attributes=self._compression_attributes) + attributes=self._compression_attributes, + magic=self._message_version) encoded = msg.encode() self._buffer.seek(4) self._buffer.write(Int64.encode(0)) # offset 0 for wrapper msg diff --git a/kafka/producer/future.py b/kafka/producer/future.py index 35520d8..acf4255 100644 --- a/kafka/producer/future.py +++ b/kafka/producer/future.py @@ -29,16 +29,21 @@ class FutureProduceResult(Future): class FutureRecordMetadata(Future): - def __init__(self, produce_future, relative_offset): + def __init__(self, produce_future, relative_offset, timestamp_ms): super(FutureRecordMetadata, self).__init__() self._produce_future = produce_future self.relative_offset = relative_offset + self.timestamp_ms = timestamp_ms produce_future.add_callback(self._produce_success) produce_future.add_errback(self.failure) - def _produce_success(self, base_offset): + def _produce_success(self, offset_and_timestamp): + base_offset, timestamp_ms = offset_and_timestamp + if timestamp_ms is None: + timestamp_ms = self.timestamp_ms self.success(RecordMetadata(self._produce_future.topic_partition, - base_offset, self.relative_offset)) + base_offset, timestamp_ms, + self.relative_offset)) def get(self, timeout=None): if not self.is_done and not self._produce_future.await(timeout): @@ -51,12 +56,13 @@ class FutureRecordMetadata(Future): class RecordMetadata(collections.namedtuple( - 'RecordMetadata', 'topic partition topic_partition offset')): - def __new__(cls, tp, base_offset, relative_offset=None): + 'RecordMetadata', 'topic partition topic_partition offset timestamp')): + def __new__(cls, tp, base_offset, timestamp, relative_offset=None): offset = base_offset if relative_offset is not None and base_offset != -1: offset += relative_offset - return super(RecordMetadata, cls).__new__(cls, tp.topic, tp.partition, tp, offset) + return super(RecordMetadata, cls).__new__(cls, tp.topic, tp.partition, + tp, offset, timestamp) def __str__(self): return 'RecordMetadata(topic=%s, partition=%s, offset=%s)' % ( diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 7e8f625..7aa24b3 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -347,7 +347,7 @@ class KafkaProducer(object): max_wait = self.config['max_block_ms'] / 1000.0 return self._wait_on_metadata(topic, max_wait) - def send(self, topic, value=None, key=None, partition=None): + def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): """Publish a message to a topic. Arguments: @@ -368,6 +368,8 @@ class KafkaProducer(object): partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. + timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) + to use as the message timestamp. Defaults to current time. Returns: FutureRecordMetadata: resolves to RecordMetadata @@ -396,8 +398,11 @@ class KafkaProducer(object): self._ensure_valid_record_size(message_size) tp = TopicPartition(topic, partition) + if timestamp_ms is None: + timestamp_ms = int(time.time() * 1000) log.debug("Sending (key=%s value=%s) to %s", key, value, tp) - result = self._accumulator.append(tp, key_bytes, value_bytes, + result = self._accumulator.append(tp, timestamp_ms, + key_bytes, value_bytes, self.config['max_block_ms']) future, batch_is_full, new_batch_created = result if batch_is_full or new_batch_created: @@ -416,8 +421,10 @@ class KafkaProducer(object): except Exception as e: log.debug("Exception occurred during message send: %s", e) return FutureRecordMetadata( - FutureProduceResult(TopicPartition(topic, partition)), - -1).failure(e) + FutureProduceResult( + TopicPartition(topic, partition)), + -1, None + ).failure(e) def flush(self, timeout=None): """ diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index 9eb0e95..4434b18 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -36,7 +36,7 @@ class AtomicInteger(object): class RecordBatch(object): - def __init__(self, tp, records): + def __init__(self, tp, records, message_version=0): self.record_count = 0 #self.max_record_size = 0 # for metrics only now = time.time() @@ -46,22 +46,25 @@ class RecordBatch(object): self.last_attempt = now self.last_append = now self.records = records + self.message_version = message_version self.topic_partition = tp self.produce_future = FutureProduceResult(tp) self._retry = False - def try_append(self, key, value): + def try_append(self, timestamp_ms, key, value): if not self.records.has_room_for(key, value): return None - self.records.append(self.record_count, Message(value, key=key)) + msg = Message(value, key=key, magic=self.message_version) + self.records.append(self.record_count, msg) # self.max_record_size = max(self.max_record_size, Record.record_size(key, value)) # for metrics only self.last_append = time.time() - future = FutureRecordMetadata(self.produce_future, self.record_count) + future = FutureRecordMetadata(self.produce_future, self.record_count, + timestamp_ms) self.record_count += 1 return future - def done(self, base_offset=None, exception=None): + def done(self, base_offset=None, timestamp_ms=None, exception=None): log.debug("Produced messages to topic-partition %s with base offset" " %s and error %s.", self.topic_partition, base_offset, exception) # trace @@ -69,7 +72,7 @@ class RecordBatch(object): log.warning('Batch is already closed -- ignoring batch.done()') return elif exception is None: - self.produce_future.success(base_offset) + self.produce_future.success((base_offset, timestamp_ms)) else: self.produce_future.failure(exception) @@ -78,7 +81,7 @@ class RecordBatch(object): if ((self.records.is_full() and request_timeout_ms < since_append_ms) or (request_timeout_ms < (since_append_ms + linger_ms))): self.records.close() - self.done(-1, Errors.KafkaTimeoutError( + self.done(-1, None, Errors.KafkaTimeoutError( "Batch containing %s record(s) expired due to timeout while" " requesting metadata from brokers for %s", self.record_count, self.topic_partition)) @@ -137,6 +140,7 @@ class RecordAccumulator(object): 'compression_type': None, 'linger_ms': 0, 'retry_backoff_ms': 100, + 'message_version': 0, } def __init__(self, **configs): @@ -155,7 +159,7 @@ class RecordAccumulator(object): self.config['batch_size']) self._incomplete = IncompleteRecordBatches() - def append(self, tp, key, value, max_time_to_block_ms): + def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms): """Add a record to the accumulator, return the append result. The append result will contain the future metadata, and flag for @@ -164,6 +168,7 @@ class RecordAccumulator(object): Arguments: tp (TopicPartition): The topic/partition to which this record is being sent + timestamp_ms (int): The timestamp of the record (epoch ms) key (bytes): The key for the record value (bytes): The value for the record max_time_to_block_ms (int): The maximum time in milliseconds to @@ -188,7 +193,7 @@ class RecordAccumulator(object): dq = self._batches[tp] if dq: last = dq[-1] - future = last.try_append(key, value) + future = last.try_append(timestamp_ms, key, value) if future is not None: batch_is_full = len(dq) > 1 or last.records.is_full() return future, batch_is_full, False @@ -211,7 +216,7 @@ class RecordAccumulator(object): if dq: last = dq[-1] - future = last.try_append(key, value) + future = last.try_append(timestamp_ms, key, value) if future is not None: # Somebody else found us a batch, return the one we # waited for! Hopefully this doesn't happen often... @@ -220,9 +225,10 @@ class RecordAccumulator(object): return future, batch_is_full, False records = MessageSetBuffer(buf, self.config['batch_size'], - self.config['compression_type']) - batch = RecordBatch(tp, records) - future = batch.try_append(key, value) + self.config['compression_type'], + self.config['message_version']) + batch = RecordBatch(tp, records, self.config['message_version']) + future = batch.try_append(timestamp_ms, key, value) if not future: raise Exception() diff --git a/kafka/producer/sender.py b/kafka/producer/sender.py index bf7c163..9c36c9b 100644 --- a/kafka/producer/sender.py +++ b/kafka/producer/sender.py @@ -163,7 +163,7 @@ class Sender(threading.Thread): def _failed_produce(self, batches, node_id, error): log.debug("Error sending produce request to node %d: %s", node_id, error) # trace for batch in batches: - self._complete_batch(batch, error, -1) + self._complete_batch(batch, error, -1, None) def _handle_produce_response(self, batches, response): """Handle a produce response.""" @@ -183,15 +183,16 @@ class Sender(threading.Thread): else: # this is the acks = 0 case, just complete all requests for batch in batches: - self._complete_batch(batch, None, -1) + self._complete_batch(batch, None, -1, None) - def _complete_batch(self, batch, error, base_offset): + def _complete_batch(self, batch, error, base_offset, timestamp_ms=None): """Complete or retry the given batch of records. Arguments: batch (RecordBatch): The record batch error (Exception): The error (or None if none) base_offset (int): The base offset assigned to the records if successful + timestamp_ms (int, optional): The timestamp returned by the broker for this batch """ # Standardize no-error to None if error is Errors.NoError: @@ -210,7 +211,7 @@ class Sender(threading.Thread): error = error(batch.topic_partition.topic) # tell the user the result of their request - batch.done(base_offset, error) + batch.done(base_offset, timestamp_ms, error) self._accumulator.deallocate(batch) if getattr(error, 'invalid_metadata', False): diff --git a/kafka/protocol/legacy.py b/kafka/protocol/legacy.py index cd100d6..6ab2511 100644 --- a/kafka/protocol/legacy.py +++ b/kafka/protocol/legacy.py @@ -143,9 +143,11 @@ class KafkaProtocol(object): topic, [( partition, - [(0, 0, kafka.protocol.message.Message(msg.value, key=msg.key, - magic=msg.magic, - attributes=msg.attributes)) + [(0, + kafka.protocol.message.Message( + msg.value, key=msg.key, + magic=msg.magic, attributes=msg.attributes + ).encode()) for msg in payload.messages]) for partition, payload in topic_payloads.items()]) for topic, topic_payloads in group_by_topic_and_partition(payloads).items()]) diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index ae261bf..473ca56 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -1,4 +1,5 @@ import io +import time from ..codec import (has_gzip, has_snappy, has_lz4, gzip_decode, snappy_decode, lz4_decode) @@ -11,22 +12,39 @@ from ..util import crc32 class Message(Struct): - SCHEMA = Schema( - ('crc', Int32), - ('magic', Int8), - ('attributes', Int8), - ('key', Bytes), - ('value', Bytes) - ) - CODEC_MASK = 0x03 + SCHEMAS = [ + Schema( + ('crc', Int32), + ('magic', Int8), + ('attributes', Int8), + ('key', Bytes), + ('value', Bytes)), + Schema( + ('crc', Int32), + ('magic', Int8), + ('attributes', Int8), + ('timestamp', Int64), + ('key', Bytes), + ('value', Bytes)), + ] + SCHEMA = SCHEMAS[1] + CODEC_MASK = 0x07 CODEC_GZIP = 0x01 CODEC_SNAPPY = 0x02 CODEC_LZ4 = 0x03 - HEADER_SIZE = 14 # crc(4), magic(1), attributes(1), key+value size(4*2) + TIMESTAMP_TYPE_MASK = 0x08 + HEADER_SIZE = 22 # crc(4), magic(1), attributes(1), timestamp(8), key+value size(4*2) - def __init__(self, value, key=None, magic=0, attributes=0, crc=0): + def __init__(self, value, key=None, magic=0, attributes=0, crc=0, + timestamp=None): assert value is None or isinstance(value, bytes), 'value must be bytes' assert key is None or isinstance(key, bytes), 'key must be bytes' + assert magic > 0 or timestamp is None, 'timestamp not supported in v0' + + # Default timestamp to now for v1 messages + if magic > 0 and timestamp is None: + timestamp = int(time.time() * 1000) + self.timestamp = timestamp self.crc = crc self.magic = magic self.attributes = attributes @@ -34,22 +52,48 @@ class Message(Struct): self.value = value self.encode = self._encode_self + @property + def timestamp_type(self): + """0 for CreateTime; 1 for LogAppendTime; None if unsupported. + + Value is determined by broker; produced messages should always set to 0 + Requires Kafka >= 0.10 / message version >= 1 + """ + if self.magic == 0: + return None + return self.attributes & self.TIMESTAMP_TYPE_MASK + def _encode_self(self, recalc_crc=True): - message = Message.SCHEMA.encode( - (self.crc, self.magic, self.attributes, self.key, self.value) - ) + version = self.magic + if version == 1: + fields = (self.crc, self.magic, self.attributes, self.timestamp, self.key, self.value) + elif version == 0: + fields = (self.crc, self.magic, self.attributes, self.key, self.value) + else: + raise ValueError('Unrecognized message version: %s' % version) + message = Message.SCHEMAS[version].encode(fields) if not recalc_crc: return message self.crc = crc32(message[4:]) - return self.SCHEMA.fields[0].encode(self.crc) + message[4:] + crc_field = self.SCHEMAS[version].fields[0] + return crc_field.encode(self.crc) + message[4:] @classmethod def decode(cls, data): if isinstance(data, bytes): data = io.BytesIO(data) - fields = [field.decode(data) for field in cls.SCHEMA.fields] - return cls(fields[4], key=fields[3], - magic=fields[1], attributes=fields[2], crc=fields[0]) + # Partial decode required to determine message version + base_fields = cls.SCHEMAS[0].fields[0:3] + crc, magic, attributes = [field.decode(data) for field in base_fields] + remaining = cls.SCHEMAS[magic].fields[3:] + fields = [field.decode(data) for field in remaining] + if magic == 1: + timestamp = fields[0] + else: + timestamp = None + return cls(fields[-1], key=fields[-2], + magic=magic, attributes=attributes, crc=crc, + timestamp=timestamp) def validate_crc(self): raw_msg = self._encode_self(recalc_crc=False) @@ -90,8 +134,7 @@ class PartialMessage(bytes): class MessageSet(AbstractType): ITEM = Schema( ('offset', Int64), - ('message_size', Int32), - ('message', Message.SCHEMA) + ('message', Bytes) ) HEADER_SIZE = 12 # offset + message_size @@ -105,20 +148,13 @@ class MessageSet(AbstractType): return items.read(size + 4) encoded_values = [] - for (offset, message_size, message) in items: - if isinstance(message, Message): - encoded_message = message.encode() - else: - encoded_message = cls.ITEM.fields[2].encode(message) - if recalc_message_size: - message_size = len(encoded_message) - encoded_values.append(cls.ITEM.fields[0].encode(offset)) - encoded_values.append(cls.ITEM.fields[1].encode(message_size)) - encoded_values.append(encoded_message) + for (offset, message) in items: + encoded_values.append(Int64.encode(offset)) + encoded_values.append(Bytes.encode(message)) encoded = b''.join(encoded_values) if not size: return encoded - return Int32.encode(len(encoded)) + encoded + return Bytes.encode(encoded) @classmethod def decode(cls, data, bytes_to_read=None): @@ -131,30 +167,18 @@ class MessageSet(AbstractType): bytes_to_read = Int32.decode(data) items = [] - # We need at least 8 + 4 + 14 bytes to read offset + message size + message - # (14 bytes is a message w/ null key and null value) - while bytes_to_read >= 26: - offset = Int64.decode(data) - bytes_to_read -= 8 - - message_size = Int32.decode(data) - bytes_to_read -= 4 - - # if FetchRequest max_bytes is smaller than the available message set - # the server returns partial data for the final message - if message_size > bytes_to_read: + # if FetchRequest max_bytes is smaller than the available message set + # the server returns partial data for the final message + while bytes_to_read: + try: + offset = Int64.decode(data) + msg_bytes = Bytes.decode(data) + bytes_to_read -= 8 + 4 + len(msg_bytes) + items.append((offset, len(msg_bytes), Message.decode(msg_bytes))) + except ValueError: + # PartialMessage to signal that max_bytes may be too small + items.append((None, None, PartialMessage())) break - - message = Message.decode(data) - bytes_to_read -= message_size - - items.append((offset, message_size, message)) - - # If any bytes are left over, clear them from the buffer - # and append a PartialMessage to signal that max_bytes may be too small - if bytes_to_read: - items.append((None, None, PartialMessage(data.read(bytes_to_read)))) - return items @classmethod @@ -164,4 +188,4 @@ class MessageSet(AbstractType): decoded = cls.decode(messages) messages.seek(offset) messages = decoded - return '[' + ', '.join([cls.ITEM.repr(m) for m in messages]) + ']' + return str([cls.ITEM.repr(m) for m in messages]) diff --git a/kafka/protocol/types.py b/kafka/protocol/types.py index 01799bb..18aaca1 100644 --- a/kafka/protocol/types.py +++ b/kafka/protocol/types.py @@ -1,52 +1,63 @@ from __future__ import absolute_import -from struct import pack, unpack +from struct import pack, unpack, error from .abstract import AbstractType +def _pack(f, value): + try: + return pack(f, value) + except error: + raise ValueError(error) + + +def _unpack(f, data): + try: + (value,) = unpack(f, data) + return value + except error: + raise ValueError(error) + + class Int8(AbstractType): @classmethod def encode(cls, value): - return pack('>b', value) + return _pack('>b', value) @classmethod def decode(cls, data): - (value,) = unpack('>b', data.read(1)) - return value + return _unpack('>b', data.read(1)) class Int16(AbstractType): @classmethod def encode(cls, value): - return pack('>h', value) + return _pack('>h', value) @classmethod def decode(cls, data): - (value,) = unpack('>h', data.read(2)) - return value + return _unpack('>h', data.read(2)) class Int32(AbstractType): @classmethod def encode(cls, value): - return pack('>i', value) + return _pack('>i', value) @classmethod def decode(cls, data): - (value,) = unpack('>i', data.read(4)) - return value + return _unpack('>i', data.read(4)) class Int64(AbstractType): @classmethod def encode(cls, value): - return pack('>q', value) + return _pack('>q', value) @classmethod def decode(cls, data): - (value,) = unpack('>q', data.read(8)) - return value + return _unpack('>q', data.read(8)) class String(AbstractType): @@ -63,7 +74,10 @@ class String(AbstractType): length = Int16.decode(data) if length < 0: return None - return data.read(length).decode(self.encoding) + value = data.read(length) + if len(value) != length: + raise ValueError('Buffer underrun decoding string') + return value.decode(self.encoding) class Bytes(AbstractType): @@ -79,7 +93,10 @@ class Bytes(AbstractType): length = Int32.decode(data) if length < 0: return None - return data.read(length) + value = data.read(length) + if len(value) != length: + raise ValueError('Buffer underrun decoding Bytes') + return value class Schema(AbstractType): diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index d8a0041..04ed9bb 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -87,21 +87,21 @@ def test_group(kafka_broker, topic): elif not consumers[c].assignment(): break + # If all consumers exist and have an assignment + else: + # Verify all consumers are in the same generation - generations = set() - for consumer in six.itervalues(consumers): - generations.add(consumer._coordinator.generation) - if len(generations) != 1: + # then log state and break while loop + generations = set([consumer._coordinator.generation + for consumer in list(consumers.values())]) + + if len(generations) == 1: + for c, consumer in list(consumers.items()): + logging.info("[%s] %s %s: %s", c, + consumer._coordinator.generation, + consumer._coordinator.member_id, + consumer.assignment()) break - - # If all checks passed, log state and break while loop - else: - for c in range(num_consumers): - logging.info("[%s] %s %s: %s", c, - consumers[c]._coordinator.generation, - consumers[c]._coordinator.member_id, - consumers[c].assignment()) - break assert time.time() < timeout, "timeout waiting for assignments" group_assignment = set() diff --git a/test/test_protocol.py b/test/test_protocol.py index d705e3a..247fcc3 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -1,848 +1,146 @@ #pylint: skip-file -from contextlib import contextmanager import struct +import pytest import six -from mock import patch, sentinel -from . import unittest -from kafka.codec import has_snappy, gzip_decode, snappy_decode -from kafka.errors import ( - ChecksumError, KafkaUnavailableError, UnsupportedCodecError, - ConsumerFetchSizeTooSmall, ProtocolError) -from kafka.protocol import ( - ATTRIBUTE_CODEC_MASK, CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, KafkaProtocol, - create_message, create_gzip_message, create_snappy_message, - create_message_set) -from kafka.structs import ( - OffsetRequestPayload, OffsetResponsePayload, - OffsetCommitRequestPayload, OffsetCommitResponsePayload, - OffsetFetchRequestPayload, OffsetFetchResponsePayload, - ProduceRequestPayload, ProduceResponsePayload, - FetchRequestPayload, FetchResponsePayload, - Message, OffsetAndMessage, BrokerMetadata, ConsumerMetadataResponse) - - -class TestProtocol(unittest.TestCase): - def test_create_message(self): - payload = "test" - key = "key" - msg = create_message(payload, key) - self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, 0) - self.assertEqual(msg.key, key) - self.assertEqual(msg.value, payload) - - def test_create_gzip(self): - payloads = [(b"v1", None), (b"v2", None)] - msg = create_gzip_message(payloads) - self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) - self.assertEqual(msg.key, None) - # Need to decode to check since gzipped payload is non-deterministic - decoded = gzip_decode(msg.value) - expect = b"".join([ - struct.pack(">q", 0), # MsgSet offset - struct.pack(">i", 16), # MsgSet size - struct.pack(">i", 1285512130), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", -1), # -1 indicates a null key - struct.pack(">i", 2), # Msg length (bytes) - b"v1", # Message contents - - struct.pack(">q", 0), # MsgSet offset - struct.pack(">i", 16), # MsgSet size - struct.pack(">i", -711587208), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", -1), # -1 indicates a null key - struct.pack(">i", 2), # Msg length (bytes) - b"v2", # Message contents - ]) - - self.assertEqual(decoded, expect) - - def test_create_gzip_keyed(self): - payloads = [(b"v1", b"k1"), (b"v2", b"k2")] - msg = create_gzip_message(payloads) - self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) - self.assertEqual(msg.key, None) - # Need to decode to check since gzipped payload is non-deterministic - decoded = gzip_decode(msg.value) - expect = b"".join([ - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", 1474775406), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k1", # Key - struct.pack(">i", 2), # Length of value - b"v1", # Value - - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", -16383415), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k2", # Key - struct.pack(">i", 2), # Length of value - b"v2", # Value - ]) - - self.assertEqual(decoded, expect) - - @unittest.skipUnless(has_snappy(), "Snappy not available") - def test_create_snappy(self): - payloads = [(b"v1", None), (b"v2", None)] - msg = create_snappy_message(payloads) - self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) - self.assertEqual(msg.key, None) - decoded = snappy_decode(msg.value) - expect = b"".join([ - struct.pack(">q", 0), # MsgSet offset - struct.pack(">i", 16), # MsgSet size - struct.pack(">i", 1285512130), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", -1), # -1 indicates a null key - struct.pack(">i", 2), # Msg length (bytes) - b"v1", # Message contents - - struct.pack(">q", 0), # MsgSet offset - struct.pack(">i", 16), # MsgSet size - struct.pack(">i", -711587208), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", -1), # -1 indicates a null key - struct.pack(">i", 2), # Msg length (bytes) - b"v2", # Message contents - ]) - - self.assertEqual(decoded, expect) - - @unittest.skipUnless(has_snappy(), "Snappy not available") - def test_create_snappy_keyed(self): - payloads = [(b"v1", b"k1"), (b"v2", b"k2")] - msg = create_snappy_message(payloads) - self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) - self.assertEqual(msg.key, None) - decoded = snappy_decode(msg.value) - expect = b"".join([ - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", 1474775406), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k1", # Key - struct.pack(">i", 2), # Length of value - b"v1", # Value - - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", -16383415), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k2", # Key - struct.pack(">i", 2), # Length of value - b"v2", # Value - ]) - - self.assertEqual(decoded, expect) - - def test_encode_message_header(self): - expect = b"".join([ - struct.pack(">h", 10), # API Key - struct.pack(">h", 0), # API Version - struct.pack(">i", 4), # Correlation Id - struct.pack(">h", len("client3")), # Length of clientId - b"client3", # ClientId - ]) - - encoded = KafkaProtocol._encode_message_header(b"client3", 4, 10) - self.assertEqual(encoded, expect) - - def test_encode_message(self): - message = create_message(b"test", b"key") - encoded = KafkaProtocol._encode_message(message) - expect = b"".join([ - struct.pack(">i", -1427009701), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 3), # Length of key - b"key", # key - struct.pack(">i", 4), # Length of value - b"test", # value - ]) - - self.assertEqual(encoded, expect) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_message(self): - encoded = b"".join([ - struct.pack(">i", -1427009701), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 3), # Length of key - b"key", # key - struct.pack(">i", 4), # Length of value - b"test", # value - ]) - - offset = 10 - (returned_offset, decoded_message) = list(KafkaProtocol._decode_message(encoded, offset))[0] - - self.assertEqual(returned_offset, offset) - self.assertEqual(decoded_message, create_message(b"test", b"key")) - - def test_encode_message_failure(self): - with self.assertRaises(ProtocolError): - KafkaProtocol._encode_message(Message(1, 0, "key", "test")) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_message_set(self): - message_set = [ - create_message(b"v1", b"k1"), - create_message(b"v2", b"k2") - ] - - encoded = KafkaProtocol._encode_message_set(message_set) - expect = b"".join([ - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", 1474775406), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k1", # Key - struct.pack(">i", 2), # Length of value - b"v1", # Value - - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", -16383415), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k2", # Key - struct.pack(">i", 2), # Length of value - b"v2", # Value - ]) - - self.assertEqual(encoded, expect) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_message_set(self): - encoded = b"".join([ - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", 1474775406), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k1", # Key - struct.pack(">i", 2), # Length of value - b"v1", # Value - - struct.pack(">q", 1), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", -16383415), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k2", # Key - struct.pack(">i", 2), # Length of value - b"v2", # Value - ]) - - msgs = list(KafkaProtocol._decode_message_set_iter(encoded)) - self.assertEqual(len(msgs), 2) - msg1, msg2 = msgs - - returned_offset1, decoded_message1 = msg1 - returned_offset2, decoded_message2 = msg2 - - self.assertEqual(returned_offset1, 0) - self.assertEqual(decoded_message1, create_message(b"v1", b"k1")) - - self.assertEqual(returned_offset2, 1) - self.assertEqual(decoded_message2, create_message(b"v2", b"k2")) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_message_gzip(self): - gzip_encoded = (b'\xc0\x11\xb2\xf0\x00\x01\xff\xff\xff\xff\x00\x00\x000' - b'\x1f\x8b\x08\x00\xa1\xc1\xc5R\x02\xffc`\x80\x03\x01' - b'\x9f\xf9\xd1\x87\x18\x18\xfe\x03\x01\x90\xc7Tf\xc8' - b'\x80$wu\x1aW\x05\x92\x9c\x11\x00z\xc0h\x888\x00\x00' - b'\x00') - offset = 11 - messages = list(KafkaProtocol._decode_message(gzip_encoded, offset)) - - self.assertEqual(len(messages), 2) - msg1, msg2 = messages - - returned_offset1, decoded_message1 = msg1 - self.assertEqual(returned_offset1, 0) - self.assertEqual(decoded_message1, create_message(b"v1")) - - returned_offset2, decoded_message2 = msg2 - self.assertEqual(returned_offset2, 0) - self.assertEqual(decoded_message2, create_message(b"v2")) - - @unittest.skip('needs updating for new protocol classes') - @unittest.skipUnless(has_snappy(), "Snappy not available") - def test_decode_message_snappy(self): - snappy_encoded = (b'\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00' - b'\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff' - b'\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5' - b'\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2') - offset = 11 - messages = list(KafkaProtocol._decode_message(snappy_encoded, offset)) - self.assertEqual(len(messages), 2) - - msg1, msg2 = messages - - returned_offset1, decoded_message1 = msg1 - self.assertEqual(returned_offset1, 0) - self.assertEqual(decoded_message1, create_message(b"v1")) - - returned_offset2, decoded_message2 = msg2 - self.assertEqual(returned_offset2, 0) - self.assertEqual(decoded_message2, create_message(b"v2")) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_message_checksum_error(self): - invalid_encoded_message = b"This is not a valid encoded message" - iter = KafkaProtocol._decode_message(invalid_encoded_message, 0) - self.assertRaises(ChecksumError, list, iter) - - # NOTE: The error handling in _decode_message_set_iter() is questionable. - # If it's modified, the next two tests might need to be fixed. - @unittest.skip('needs updating for new protocol classes') - def test_decode_message_set_fetch_size_too_small(self): - with self.assertRaises(ConsumerFetchSizeTooSmall): - list(KafkaProtocol._decode_message_set_iter('a')) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_message_set_stop_iteration(self): - encoded = b"".join([ - struct.pack(">q", 0), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", 1474775406), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k1", # Key - struct.pack(">i", 2), # Length of value - b"v1", # Value - - struct.pack(">q", 1), # MsgSet Offset - struct.pack(">i", 18), # Msg Size - struct.pack(">i", -16383415), # CRC - struct.pack(">bb", 0, 0), # Magic, flags - struct.pack(">i", 2), # Length of key - b"k2", # Key - struct.pack(">i", 2), # Length of value - b"v2", # Value - b"@1$%(Y!", # Random padding - ]) - - msgs = MessageSet.decode(io.BytesIO(encoded)) - self.assertEqual(len(msgs), 2) - msg1, msg2 = msgs - - returned_offset1, msg_size1, decoded_message1 = msg1 - returned_offset2, msg_size2, decoded_message2 = msg2 - - self.assertEqual(returned_offset1, 0) - self.assertEqual(decoded_message1.value, b"v1") - self.assertEqual(decoded_message1.key, b"k1") - - self.assertEqual(returned_offset2, 1) - self.assertEqual(decoded_message2.value, b"v2") - self.assertEqual(decoded_message2.key, b"k2") - - @unittest.skip('needs updating for new protocol classes') - def test_encode_produce_request(self): - requests = [ - ProduceRequestPayload("topic1", 0, [ - kafka.protocol.message.Message(b"a"), - kafka.protocol.message.Message(b"b") - ]), - ProduceRequestPayload("topic2", 1, [ - kafka.protocol.message.Message(b"c") - ]) - ] - - msg_a_binary = KafkaProtocol._encode_message(create_message(b"a")) - msg_b_binary = KafkaProtocol._encode_message(create_message(b"b")) - msg_c_binary = KafkaProtocol._encode_message(create_message(b"c")) - - header = b"".join([ - struct.pack('>i', 0x94), # The length of the message overall - struct.pack('>h', 0), # Msg Header, Message type = Produce - struct.pack('>h', 0), # Msg Header, API version - struct.pack('>i', 2), # Msg Header, Correlation ID - struct.pack('>h7s', 7, b"client1"), # Msg Header, The client ID - struct.pack('>h', 2), # Num acks required - struct.pack('>i', 100), # Request Timeout - struct.pack('>i', 2), # The number of requests - ]) - - total_len = len(msg_a_binary) + len(msg_b_binary) - topic1 = b"".join([ - struct.pack('>h6s', 6, b'topic1'), # The topic1 - struct.pack('>i', 1), # One message set - struct.pack('>i', 0), # Partition 0 - struct.pack('>i', total_len + 24), # Size of the incoming message set - struct.pack('>q', 0), # No offset specified - struct.pack('>i', len(msg_a_binary)), # Length of message - msg_a_binary, # Actual message - struct.pack('>q', 0), # No offset specified - struct.pack('>i', len(msg_b_binary)), # Length of message - msg_b_binary, # Actual message - ]) - - topic2 = b"".join([ - struct.pack('>h6s', 6, b'topic2'), # The topic1 - struct.pack('>i', 1), # One message set - struct.pack('>i', 1), # Partition 1 - struct.pack('>i', len(msg_c_binary) + 12), # Size of the incoming message set - struct.pack('>q', 0), # No offset specified - struct.pack('>i', len(msg_c_binary)), # Length of message - msg_c_binary, # Actual message - ]) - - expected1 = b"".join([ header, topic1, topic2 ]) - expected2 = b"".join([ header, topic2, topic1 ]) - - encoded = KafkaProtocol.encode_produce_request(b"client1", 2, requests, 2, 100) - self.assertIn(encoded, [ expected1, expected2 ]) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_produce_response(self): - t1 = b"topic1" - t2 = b"topic2" - _long = int - if six.PY2: - _long = long - encoded = struct.pack('>iih%dsiihqihqh%dsiihq' % (len(t1), len(t2)), - 2, 2, len(t1), t1, 2, 0, 0, _long(10), 1, 1, _long(20), - len(t2), t2, 1, 0, 0, _long(30)) - responses = list(KafkaProtocol.decode_produce_response(encoded)) - self.assertEqual(responses, - [ProduceResponse(t1, 0, 0, _long(10)), - ProduceResponse(t1, 1, 1, _long(20)), - ProduceResponse(t2, 0, 0, _long(30))]) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_fetch_request(self): - requests = [ - FetchRequest(b"topic1", 0, 10, 1024), - FetchRequest(b"topic2", 1, 20, 100), - ] - - header = b"".join([ - struct.pack('>i', 89), # The length of the message overall - struct.pack('>h', 1), # Msg Header, Message type = Fetch - struct.pack('>h', 0), # Msg Header, API version - struct.pack('>i', 3), # Msg Header, Correlation ID - struct.pack('>h7s', 7, b"client1"),# Msg Header, The client ID - struct.pack('>i', -1), # Replica Id - struct.pack('>i', 2), # Max wait time - struct.pack('>i', 100), # Min bytes - struct.pack('>i', 2), # Num requests - ]) - - topic1 = b"".join([ - struct.pack('>h6s', 6, b'topic1'),# Topic - struct.pack('>i', 1), # Num Payloads - struct.pack('>i', 0), # Partition 0 - struct.pack('>q', 10), # Offset - struct.pack('>i', 1024), # Max Bytes - ]) - - topic2 = b"".join([ - struct.pack('>h6s', 6, b'topic2'),# Topic - struct.pack('>i', 1), # Num Payloads - struct.pack('>i', 1), # Partition 0 - struct.pack('>q', 20), # Offset - struct.pack('>i', 100), # Max Bytes - ]) - - expected1 = b"".join([ header, topic1, topic2 ]) - expected2 = b"".join([ header, topic2, topic1 ]) - - encoded = KafkaProtocol.encode_fetch_request(b"client1", 3, requests, 2, 100) - self.assertIn(encoded, [ expected1, expected2 ]) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_fetch_response(self): - t1 = b"topic1" - t2 = b"topic2" - msgs = [create_message(msg) - for msg in [b"message1", b"hi", b"boo", b"foo", b"so fun!"]] - ms1 = KafkaProtocol._encode_message_set([msgs[0], msgs[1]]) - ms2 = KafkaProtocol._encode_message_set([msgs[2]]) - ms3 = KafkaProtocol._encode_message_set([msgs[3], msgs[4]]) - - encoded = struct.pack('>iih%dsiihqi%dsihqi%dsh%dsiihqi%ds' % - (len(t1), len(ms1), len(ms2), len(t2), len(ms3)), - 4, 2, len(t1), t1, 2, 0, 0, 10, len(ms1), ms1, 1, - 1, 20, len(ms2), ms2, len(t2), t2, 1, 0, 0, 30, - len(ms3), ms3) - - responses = list(KafkaProtocol.decode_fetch_response(encoded)) - def expand_messages(response): - return FetchResponsePayload(response.topic, response.partition, - response.error, response.highwaterMark, - list(response.messages)) - - expanded_responses = list(map(expand_messages, responses)) - expect = [FetchResponsePayload(t1, 0, 0, 10, [OffsetAndMessage(0, msgs[0]), - OffsetAndMessage(0, msgs[1])]), - FetchResponsePayload(t1, 1, 1, 20, [OffsetAndMessage(0, msgs[2])]), - FetchResponsePayload(t2, 0, 0, 30, [OffsetAndMessage(0, msgs[3]), - OffsetAndMessage(0, msgs[4])])] - self.assertEqual(expanded_responses, expect) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_metadata_request_no_topics(self): - expected = b"".join([ - struct.pack(">i", 17), # Total length of the request - struct.pack('>h', 3), # API key metadata fetch - struct.pack('>h', 0), # API version - struct.pack('>i', 4), # Correlation ID - struct.pack('>h3s', 3, b"cid"),# The client ID - struct.pack('>i', 0), # No topics, give all the data! - ]) - - encoded = KafkaProtocol.encode_metadata_request(b"cid", 4) - - self.assertEqual(encoded, expected) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_metadata_request_with_topics(self): - expected = b"".join([ - struct.pack(">i", 25), # Total length of the request - struct.pack('>h', 3), # API key metadata fetch - struct.pack('>h', 0), # API version - struct.pack('>i', 4), # Correlation ID - struct.pack('>h3s', 3, b"cid"),# The client ID - struct.pack('>i', 2), # Number of topics in the request - struct.pack('>h2s', 2, b"t1"), # Topic "t1" - struct.pack('>h2s', 2, b"t2"), # Topic "t2" - ]) - - encoded = KafkaProtocol.encode_metadata_request(b"cid", 4, [b"t1", b"t2"]) - - self.assertEqual(encoded, expected) - - def _create_encoded_metadata_response(self, brokers, topics): - encoded = [] - encoded.append(struct.pack('>ii', 3, len(brokers))) - for broker in brokers: - encoded.append(struct.pack('>ih%dsi' % len(broker.host), - broker.nodeId, len(broker.host), - broker.host, broker.port)) - - encoded.append(struct.pack('>i', len(topics))) - for topic in topics: - encoded.append(struct.pack('>hh%dsi' % len(topic.topic), - topic.error, len(topic.topic), - topic.topic, len(topic.partitions))) - for metadata in topic.partitions: - encoded.append(struct.pack('>hiii', metadata.error, - metadata.partition, metadata.leader, - len(metadata.replicas))) - if len(metadata.replicas) > 0: - encoded.append(struct.pack('>%di' % len(metadata.replicas), - *metadata.replicas)) - - encoded.append(struct.pack('>i', len(metadata.isr))) - if len(metadata.isr) > 0: - encoded.append(struct.pack('>%di' % len(metadata.isr), - *metadata.isr)) - return b''.join(encoded) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_metadata_response(self): - node_brokers = [ - BrokerMetadata(0, b"brokers1.kafka.rdio.com", 1000), - BrokerMetadata(1, b"brokers1.kafka.rdio.com", 1001), - BrokerMetadata(3, b"brokers2.kafka.rdio.com", 1000) - ] - - ''' - topic_partitions = [ - TopicMetadata(b"topic1", 0, [ - PartitionMetadata(b"topic1", 0, 1, (0, 2), (2,), 0), - PartitionMetadata(b"topic1", 1, 3, (0, 1), (0, 1), 1) - ]), - TopicMetadata(b"topic2", 1, [ - PartitionMetadata(b"topic2", 0, 0, (), (), 0), - ]), - ] - encoded = self._create_encoded_metadata_response(node_brokers, - topic_partitions) - decoded = KafkaProtocol.decode_metadata_response(encoded) - self.assertEqual(decoded, (node_brokers, topic_partitions)) - ''' - - def test_encode_consumer_metadata_request(self): - expected = b"".join([ - struct.pack(">i", 17), # Total length of the request - struct.pack('>h', 10), # API key consumer metadata - struct.pack('>h', 0), # API version - struct.pack('>i', 4), # Correlation ID - struct.pack('>h3s', 3, b"cid"),# The client ID - struct.pack('>h2s', 2, b"g1"), # Group "g1" - ]) - - encoded = KafkaProtocol.encode_consumer_metadata_request(b"cid", 4, b"g1") - - self.assertEqual(encoded, expected) - - def test_decode_consumer_metadata_response(self): - encoded = b"".join([ - struct.pack(">i", 42), # Correlation ID - struct.pack(">h", 0), # No Error - struct.pack(">i", 1), # Broker ID - struct.pack(">h23s", 23, b"brokers1.kafka.rdio.com"), # Broker Host - struct.pack(">i", 1000), # Broker Port - ]) - - results = KafkaProtocol.decode_consumer_metadata_response(encoded) - self.assertEqual(results, - ConsumerMetadataResponse(error = 0, nodeId = 1, host = b'brokers1.kafka.rdio.com', port = 1000) - ) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_offset_request(self): - expected = b"".join([ - struct.pack(">i", 21), # Total length of the request - struct.pack('>h', 2), # Message type = offset fetch - struct.pack('>h', 0), # API version - struct.pack('>i', 4), # Correlation ID - struct.pack('>h3s', 3, b"cid"), # The client ID - struct.pack('>i', -1), # Replica Id - struct.pack('>i', 0), # No topic/partitions - ]) - - encoded = KafkaProtocol.encode_offset_request(b"cid", 4) - - self.assertEqual(encoded, expected) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_offset_request__no_payload(self): - expected = b"".join([ - struct.pack(">i", 65), # Total length of the request - - struct.pack('>h', 2), # Message type = offset fetch - struct.pack('>h', 0), # API version - struct.pack('>i', 4), # Correlation ID - struct.pack('>h3s', 3, b"cid"), # The client ID - struct.pack('>i', -1), # Replica Id - struct.pack('>i', 1), # Num topics - struct.pack(">h6s", 6, b"topic1"),# Topic for the request - struct.pack(">i", 2), # Two partitions - - struct.pack(">i", 3), # Partition 3 - struct.pack(">q", -1), # No time offset - struct.pack(">i", 1), # One offset requested - - struct.pack(">i", 4), # Partition 3 - struct.pack(">q", -1), # No time offset - struct.pack(">i", 1), # One offset requested - ]) - - encoded = KafkaProtocol.encode_offset_request(b"cid", 4, [ - OffsetRequest(b'topic1', 3, -1, 1), - OffsetRequest(b'topic1', 4, -1, 1), - ]) - - self.assertEqual(encoded, expected) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_offset_response(self): - encoded = b"".join([ - struct.pack(">i", 42), # Correlation ID - struct.pack(">i", 1), # One topics - struct.pack(">h6s", 6, b"topic1"),# First topic - struct.pack(">i", 2), # Two partitions - - struct.pack(">i", 2), # Partition 2 - struct.pack(">h", 0), # No error - struct.pack(">i", 1), # One offset - struct.pack(">q", 4), # Offset 4 - - struct.pack(">i", 4), # Partition 4 - struct.pack(">h", 0), # No error - struct.pack(">i", 1), # One offset - struct.pack(">q", 8), # Offset 8 - ]) - - results = KafkaProtocol.decode_offset_response(encoded) - self.assertEqual(set(results), set([ - OffsetResponse(topic = b'topic1', partition = 2, error = 0, offsets=(4,)), - OffsetResponse(topic = b'topic1', partition = 4, error = 0, offsets=(8,)), - ])) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_offset_commit_request(self): - header = b"".join([ - struct.pack('>i', 99), # Total message length - - struct.pack('>h', 8), # Message type = offset commit - struct.pack('>h', 0), # API version - struct.pack('>i', 42), # Correlation ID - struct.pack('>h9s', 9, b"client_id"),# The client ID - struct.pack('>h8s', 8, b"group_id"), # The group to commit for - struct.pack('>i', 2), # Num topics - ]) - - topic1 = b"".join([ - struct.pack(">h6s", 6, b"topic1"), # Topic for the request - struct.pack(">i", 2), # Two partitions - struct.pack(">i", 0), # Partition 0 - struct.pack(">q", 123), # Offset 123 - struct.pack(">h", -1), # Null metadata - struct.pack(">i", 1), # Partition 1 - struct.pack(">q", 234), # Offset 234 - struct.pack(">h", -1), # Null metadata - ]) - - topic2 = b"".join([ - struct.pack(">h6s", 6, b"topic2"), # Topic for the request - struct.pack(">i", 1), # One partition - struct.pack(">i", 2), # Partition 2 - struct.pack(">q", 345), # Offset 345 - struct.pack(">h", -1), # Null metadata - ]) - - expected1 = b"".join([ header, topic1, topic2 ]) - expected2 = b"".join([ header, topic2, topic1 ]) - - encoded = KafkaProtocol.encode_offset_commit_request(b"client_id", 42, b"group_id", [ - OffsetCommitRequest(b"topic1", 0, 123, None), - OffsetCommitRequest(b"topic1", 1, 234, None), - OffsetCommitRequest(b"topic2", 2, 345, None), - ]) - - self.assertIn(encoded, [ expected1, expected2 ]) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_offset_commit_response(self): - encoded = b"".join([ - struct.pack(">i", 42), # Correlation ID - struct.pack(">i", 1), # One topic - struct.pack(">h6s", 6, b"topic1"),# First topic - struct.pack(">i", 2), # Two partitions - - struct.pack(">i", 2), # Partition 2 - struct.pack(">h", 0), # No error - - struct.pack(">i", 4), # Partition 4 - struct.pack(">h", 0), # No error - ]) - - results = KafkaProtocol.decode_offset_commit_response(encoded) - self.assertEqual(set(results), set([ - OffsetCommitResponse(topic = b'topic1', partition = 2, error = 0), - OffsetCommitResponse(topic = b'topic1', partition = 4, error = 0), - ])) - - @unittest.skip('needs updating for new protocol classes') - def test_encode_offset_fetch_request(self): - header = b"".join([ - struct.pack('>i', 69), # Total message length - struct.pack('>h', 9), # Message type = offset fetch - struct.pack('>h', 0), # API version - struct.pack('>i', 42), # Correlation ID - struct.pack('>h9s', 9, b"client_id"),# The client ID - struct.pack('>h8s', 8, b"group_id"), # The group to commit for - struct.pack('>i', 2), # Num topics - ]) - - topic1 = b"".join([ - struct.pack(">h6s", 6, b"topic1"), # Topic for the request - struct.pack(">i", 2), # Two partitions - struct.pack(">i", 0), # Partition 0 - struct.pack(">i", 1), # Partition 1 - ]) - - topic2 = b"".join([ - struct.pack(">h6s", 6, b"topic2"), # Topic for the request - struct.pack(">i", 1), # One partitions - struct.pack(">i", 2), # Partition 2 - ]) - - expected1 = b"".join([ header, topic1, topic2 ]) - expected2 = b"".join([ header, topic2, topic1 ]) - - encoded = KafkaProtocol.encode_offset_fetch_request(b"client_id", 42, b"group_id", [ - OffsetFetchRequest(b"topic1", 0), - OffsetFetchRequest(b"topic1", 1), - OffsetFetchRequest(b"topic2", 2), - ]) - - self.assertIn(encoded, [ expected1, expected2 ]) - - @unittest.skip('needs updating for new protocol classes') - def test_decode_offset_fetch_response(self): - encoded = b"".join([ - struct.pack(">i", 42), # Correlation ID - struct.pack(">i", 1), # One topics - struct.pack(">h6s", 6, b"topic1"),# First topic - struct.pack(">i", 2), # Two partitions - - struct.pack(">i", 2), # Partition 2 - struct.pack(">q", 4), # Offset 4 - struct.pack(">h4s", 4, b"meta"), # Metadata - struct.pack(">h", 0), # No error - - struct.pack(">i", 4), # Partition 4 - struct.pack(">q", 8), # Offset 8 - struct.pack(">h4s", 4, b"meta"), # Metadata - struct.pack(">h", 0), # No error - ]) - - results = KafkaProtocol.decode_offset_fetch_response(encoded) - self.assertEqual(set(results), set([ - OffsetFetchResponse(topic = b'topic1', partition = 2, offset = 4, error = 0, metadata = b"meta"), - OffsetFetchResponse(topic = b'topic1', partition = 4, offset = 8, error = 0, metadata = b"meta"), - ])) - - @contextmanager - def mock_create_message_fns(self): - import kafka.protocol - with patch.object(kafka.protocol.legacy, "create_message", - return_value=sentinel.message): - with patch.object(kafka.protocol.legacy, "create_gzip_message", - return_value=sentinel.gzip_message): - with patch.object(kafka.protocol.legacy, "create_snappy_message", - return_value=sentinel.snappy_message): - yield - - def test_create_message_set(self): - messages = [(1, "k1"), (2, "k2"), (3, "k3")] - - # Default codec is CODEC_NONE. Expect list of regular messages. - expect = [sentinel.message] * len(messages) - with self.mock_create_message_fns(): - message_set = create_message_set(messages) - self.assertEqual(message_set, expect) - - # CODEC_NONE: Expect list of regular messages. - expect = [sentinel.message] * len(messages) - with self.mock_create_message_fns(): - message_set = create_message_set(messages, CODEC_NONE) - self.assertEqual(message_set, expect) - - # CODEC_GZIP: Expect list of one gzip-encoded message. - expect = [sentinel.gzip_message] - with self.mock_create_message_fns(): - message_set = create_message_set(messages, CODEC_GZIP) - self.assertEqual(message_set, expect) - - # CODEC_SNAPPY: Expect list of one snappy-encoded message. - expect = [sentinel.snappy_message] - with self.mock_create_message_fns(): - message_set = create_message_set(messages, CODEC_SNAPPY) - self.assertEqual(message_set, expect) - - # Unknown codec should raise UnsupportedCodecError. - with self.assertRaises(UnsupportedCodecError): - create_message_set(messages, -1) +from kafka.protocol.api import RequestHeader +from kafka.protocol.commit import GroupCoordinatorRequest +from kafka.protocol.message import Message, MessageSet + + +def test_create_message(): + payload = b'test' + key = b'key' + msg = Message(payload, key=key) + assert msg.magic == 0 + assert msg.attributes == 0 + assert msg.key == key + assert msg.value == payload + + +def test_encode_message_v0(): + message = Message(b'test', key=b'key') + encoded = message.encode() + expect = b''.join([ + struct.pack('>i', -1427009701), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 3), # Length of key + b'key', # key + struct.pack('>i', 4), # Length of value + b'test', # value + ]) + assert encoded == expect + + +def test_encode_message_v1(): + message = Message(b'test', key=b'key', magic=1, timestamp=1234) + encoded = message.encode() + expect = b''.join([ + struct.pack('>i', 1331087195), # CRC + struct.pack('>bb', 1, 0), # Magic, flags + struct.pack('>q', 1234), # Timestamp + struct.pack('>i', 3), # Length of key + b'key', # key + struct.pack('>i', 4), # Length of value + b'test', # value + ]) + assert encoded == expect + + +def test_decode_message(): + encoded = b''.join([ + struct.pack('>i', -1427009701), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 3), # Length of key + b'key', # key + struct.pack('>i', 4), # Length of value + b'test', # value + ]) + decoded_message = Message.decode(encoded) + msg = Message(b'test', key=b'key') + msg.encode() # crc is recalculated during encoding + assert decoded_message == msg + + +def test_encode_message_set(): + messages = [ + Message(b'v1', key=b'k1'), + Message(b'v2', key=b'k2') + ] + encoded = MessageSet.encode([(0, msg.encode()) + for msg in messages], + size=False) + expect = b''.join([ + struct.pack('>q', 0), # MsgSet Offset + struct.pack('>i', 18), # Msg Size + struct.pack('>i', 1474775406), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 2), # Length of key + b'k1', # Key + struct.pack('>i', 2), # Length of value + b'v1', # Value + + struct.pack('>q', 0), # MsgSet Offset + struct.pack('>i', 18), # Msg Size + struct.pack('>i', -16383415), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 2), # Length of key + b'k2', # Key + struct.pack('>i', 2), # Length of value + b'v2', # Value + ]) + assert encoded == expect + + +def test_decode_message_set(): + encoded = b''.join([ + struct.pack('>q', 0), # MsgSet Offset + struct.pack('>i', 18), # Msg Size + struct.pack('>i', 1474775406), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 2), # Length of key + b'k1', # Key + struct.pack('>i', 2), # Length of value + b'v1', # Value + + struct.pack('>q', 1), # MsgSet Offset + struct.pack('>i', 18), # Msg Size + struct.pack('>i', -16383415), # CRC + struct.pack('>bb', 0, 0), # Magic, flags + struct.pack('>i', 2), # Length of key + b'k2', # Key + struct.pack('>i', 2), # Length of value + b'v2', # Value + ]) + + msgs = MessageSet.decode(encoded, bytes_to_read=len(encoded)) + assert len(msgs) == 2 + msg1, msg2 = msgs + + returned_offset1, message1_size, decoded_message1 = msg1 + returned_offset2, message2_size, decoded_message2 = msg2 + + assert returned_offset1 == 0 + message1 = Message(b'v1', key=b'k1') + message1.encode() + assert decoded_message1 == message1 + + assert returned_offset2 == 1 + message2 = Message(b'v2', key=b'k2') + message2.encode() + assert decoded_message2 == message2 + + +def test_encode_message_header(): + expect = b''.join([ + struct.pack('>h', 10), # API Key + struct.pack('>h', 0), # API Version + struct.pack('>i', 4), # Correlation Id + struct.pack('>h', len('client3')), # Length of clientId + b'client3', # ClientId + ]) + + req = GroupCoordinatorRequest[0]('foo') + header = RequestHeader(req, correlation_id=4, client_id='client3') + assert header.encode() == expect diff --git a/test/test_protocol_legacy.py b/test/test_protocol_legacy.py new file mode 100644 index 0000000..d705e3a --- /dev/null +++ b/test/test_protocol_legacy.py @@ -0,0 +1,848 @@ +#pylint: skip-file +from contextlib import contextmanager +import struct + +import six +from mock import patch, sentinel +from . import unittest + +from kafka.codec import has_snappy, gzip_decode, snappy_decode +from kafka.errors import ( + ChecksumError, KafkaUnavailableError, UnsupportedCodecError, + ConsumerFetchSizeTooSmall, ProtocolError) +from kafka.protocol import ( + ATTRIBUTE_CODEC_MASK, CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, KafkaProtocol, + create_message, create_gzip_message, create_snappy_message, + create_message_set) +from kafka.structs import ( + OffsetRequestPayload, OffsetResponsePayload, + OffsetCommitRequestPayload, OffsetCommitResponsePayload, + OffsetFetchRequestPayload, OffsetFetchResponsePayload, + ProduceRequestPayload, ProduceResponsePayload, + FetchRequestPayload, FetchResponsePayload, + Message, OffsetAndMessage, BrokerMetadata, ConsumerMetadataResponse) + + +class TestProtocol(unittest.TestCase): + def test_create_message(self): + payload = "test" + key = "key" + msg = create_message(payload, key) + self.assertEqual(msg.magic, 0) + self.assertEqual(msg.attributes, 0) + self.assertEqual(msg.key, key) + self.assertEqual(msg.value, payload) + + def test_create_gzip(self): + payloads = [(b"v1", None), (b"v2", None)] + msg = create_gzip_message(payloads) + self.assertEqual(msg.magic, 0) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) + self.assertEqual(msg.key, None) + # Need to decode to check since gzipped payload is non-deterministic + decoded = gzip_decode(msg.value) + expect = b"".join([ + struct.pack(">q", 0), # MsgSet offset + struct.pack(">i", 16), # MsgSet size + struct.pack(">i", 1285512130), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", -1), # -1 indicates a null key + struct.pack(">i", 2), # Msg length (bytes) + b"v1", # Message contents + + struct.pack(">q", 0), # MsgSet offset + struct.pack(">i", 16), # MsgSet size + struct.pack(">i", -711587208), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", -1), # -1 indicates a null key + struct.pack(">i", 2), # Msg length (bytes) + b"v2", # Message contents + ]) + + self.assertEqual(decoded, expect) + + def test_create_gzip_keyed(self): + payloads = [(b"v1", b"k1"), (b"v2", b"k2")] + msg = create_gzip_message(payloads) + self.assertEqual(msg.magic, 0) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) + self.assertEqual(msg.key, None) + # Need to decode to check since gzipped payload is non-deterministic + decoded = gzip_decode(msg.value) + expect = b"".join([ + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", 1474775406), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k1", # Key + struct.pack(">i", 2), # Length of value + b"v1", # Value + + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", -16383415), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k2", # Key + struct.pack(">i", 2), # Length of value + b"v2", # Value + ]) + + self.assertEqual(decoded, expect) + + @unittest.skipUnless(has_snappy(), "Snappy not available") + def test_create_snappy(self): + payloads = [(b"v1", None), (b"v2", None)] + msg = create_snappy_message(payloads) + self.assertEqual(msg.magic, 0) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) + self.assertEqual(msg.key, None) + decoded = snappy_decode(msg.value) + expect = b"".join([ + struct.pack(">q", 0), # MsgSet offset + struct.pack(">i", 16), # MsgSet size + struct.pack(">i", 1285512130), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", -1), # -1 indicates a null key + struct.pack(">i", 2), # Msg length (bytes) + b"v1", # Message contents + + struct.pack(">q", 0), # MsgSet offset + struct.pack(">i", 16), # MsgSet size + struct.pack(">i", -711587208), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", -1), # -1 indicates a null key + struct.pack(">i", 2), # Msg length (bytes) + b"v2", # Message contents + ]) + + self.assertEqual(decoded, expect) + + @unittest.skipUnless(has_snappy(), "Snappy not available") + def test_create_snappy_keyed(self): + payloads = [(b"v1", b"k1"), (b"v2", b"k2")] + msg = create_snappy_message(payloads) + self.assertEqual(msg.magic, 0) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) + self.assertEqual(msg.key, None) + decoded = snappy_decode(msg.value) + expect = b"".join([ + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", 1474775406), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k1", # Key + struct.pack(">i", 2), # Length of value + b"v1", # Value + + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", -16383415), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k2", # Key + struct.pack(">i", 2), # Length of value + b"v2", # Value + ]) + + self.assertEqual(decoded, expect) + + def test_encode_message_header(self): + expect = b"".join([ + struct.pack(">h", 10), # API Key + struct.pack(">h", 0), # API Version + struct.pack(">i", 4), # Correlation Id + struct.pack(">h", len("client3")), # Length of clientId + b"client3", # ClientId + ]) + + encoded = KafkaProtocol._encode_message_header(b"client3", 4, 10) + self.assertEqual(encoded, expect) + + def test_encode_message(self): + message = create_message(b"test", b"key") + encoded = KafkaProtocol._encode_message(message) + expect = b"".join([ + struct.pack(">i", -1427009701), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 3), # Length of key + b"key", # key + struct.pack(">i", 4), # Length of value + b"test", # value + ]) + + self.assertEqual(encoded, expect) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_message(self): + encoded = b"".join([ + struct.pack(">i", -1427009701), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 3), # Length of key + b"key", # key + struct.pack(">i", 4), # Length of value + b"test", # value + ]) + + offset = 10 + (returned_offset, decoded_message) = list(KafkaProtocol._decode_message(encoded, offset))[0] + + self.assertEqual(returned_offset, offset) + self.assertEqual(decoded_message, create_message(b"test", b"key")) + + def test_encode_message_failure(self): + with self.assertRaises(ProtocolError): + KafkaProtocol._encode_message(Message(1, 0, "key", "test")) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_message_set(self): + message_set = [ + create_message(b"v1", b"k1"), + create_message(b"v2", b"k2") + ] + + encoded = KafkaProtocol._encode_message_set(message_set) + expect = b"".join([ + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", 1474775406), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k1", # Key + struct.pack(">i", 2), # Length of value + b"v1", # Value + + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", -16383415), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k2", # Key + struct.pack(">i", 2), # Length of value + b"v2", # Value + ]) + + self.assertEqual(encoded, expect) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_message_set(self): + encoded = b"".join([ + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", 1474775406), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k1", # Key + struct.pack(">i", 2), # Length of value + b"v1", # Value + + struct.pack(">q", 1), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", -16383415), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k2", # Key + struct.pack(">i", 2), # Length of value + b"v2", # Value + ]) + + msgs = list(KafkaProtocol._decode_message_set_iter(encoded)) + self.assertEqual(len(msgs), 2) + msg1, msg2 = msgs + + returned_offset1, decoded_message1 = msg1 + returned_offset2, decoded_message2 = msg2 + + self.assertEqual(returned_offset1, 0) + self.assertEqual(decoded_message1, create_message(b"v1", b"k1")) + + self.assertEqual(returned_offset2, 1) + self.assertEqual(decoded_message2, create_message(b"v2", b"k2")) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_message_gzip(self): + gzip_encoded = (b'\xc0\x11\xb2\xf0\x00\x01\xff\xff\xff\xff\x00\x00\x000' + b'\x1f\x8b\x08\x00\xa1\xc1\xc5R\x02\xffc`\x80\x03\x01' + b'\x9f\xf9\xd1\x87\x18\x18\xfe\x03\x01\x90\xc7Tf\xc8' + b'\x80$wu\x1aW\x05\x92\x9c\x11\x00z\xc0h\x888\x00\x00' + b'\x00') + offset = 11 + messages = list(KafkaProtocol._decode_message(gzip_encoded, offset)) + + self.assertEqual(len(messages), 2) + msg1, msg2 = messages + + returned_offset1, decoded_message1 = msg1 + self.assertEqual(returned_offset1, 0) + self.assertEqual(decoded_message1, create_message(b"v1")) + + returned_offset2, decoded_message2 = msg2 + self.assertEqual(returned_offset2, 0) + self.assertEqual(decoded_message2, create_message(b"v2")) + + @unittest.skip('needs updating for new protocol classes') + @unittest.skipUnless(has_snappy(), "Snappy not available") + def test_decode_message_snappy(self): + snappy_encoded = (b'\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00' + b'\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff' + b'\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5' + b'\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2') + offset = 11 + messages = list(KafkaProtocol._decode_message(snappy_encoded, offset)) + self.assertEqual(len(messages), 2) + + msg1, msg2 = messages + + returned_offset1, decoded_message1 = msg1 + self.assertEqual(returned_offset1, 0) + self.assertEqual(decoded_message1, create_message(b"v1")) + + returned_offset2, decoded_message2 = msg2 + self.assertEqual(returned_offset2, 0) + self.assertEqual(decoded_message2, create_message(b"v2")) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_message_checksum_error(self): + invalid_encoded_message = b"This is not a valid encoded message" + iter = KafkaProtocol._decode_message(invalid_encoded_message, 0) + self.assertRaises(ChecksumError, list, iter) + + # NOTE: The error handling in _decode_message_set_iter() is questionable. + # If it's modified, the next two tests might need to be fixed. + @unittest.skip('needs updating for new protocol classes') + def test_decode_message_set_fetch_size_too_small(self): + with self.assertRaises(ConsumerFetchSizeTooSmall): + list(KafkaProtocol._decode_message_set_iter('a')) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_message_set_stop_iteration(self): + encoded = b"".join([ + struct.pack(">q", 0), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", 1474775406), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k1", # Key + struct.pack(">i", 2), # Length of value + b"v1", # Value + + struct.pack(">q", 1), # MsgSet Offset + struct.pack(">i", 18), # Msg Size + struct.pack(">i", -16383415), # CRC + struct.pack(">bb", 0, 0), # Magic, flags + struct.pack(">i", 2), # Length of key + b"k2", # Key + struct.pack(">i", 2), # Length of value + b"v2", # Value + b"@1$%(Y!", # Random padding + ]) + + msgs = MessageSet.decode(io.BytesIO(encoded)) + self.assertEqual(len(msgs), 2) + msg1, msg2 = msgs + + returned_offset1, msg_size1, decoded_message1 = msg1 + returned_offset2, msg_size2, decoded_message2 = msg2 + + self.assertEqual(returned_offset1, 0) + self.assertEqual(decoded_message1.value, b"v1") + self.assertEqual(decoded_message1.key, b"k1") + + self.assertEqual(returned_offset2, 1) + self.assertEqual(decoded_message2.value, b"v2") + self.assertEqual(decoded_message2.key, b"k2") + + @unittest.skip('needs updating for new protocol classes') + def test_encode_produce_request(self): + requests = [ + ProduceRequestPayload("topic1", 0, [ + kafka.protocol.message.Message(b"a"), + kafka.protocol.message.Message(b"b") + ]), + ProduceRequestPayload("topic2", 1, [ + kafka.protocol.message.Message(b"c") + ]) + ] + + msg_a_binary = KafkaProtocol._encode_message(create_message(b"a")) + msg_b_binary = KafkaProtocol._encode_message(create_message(b"b")) + msg_c_binary = KafkaProtocol._encode_message(create_message(b"c")) + + header = b"".join([ + struct.pack('>i', 0x94), # The length of the message overall + struct.pack('>h', 0), # Msg Header, Message type = Produce + struct.pack('>h', 0), # Msg Header, API version + struct.pack('>i', 2), # Msg Header, Correlation ID + struct.pack('>h7s', 7, b"client1"), # Msg Header, The client ID + struct.pack('>h', 2), # Num acks required + struct.pack('>i', 100), # Request Timeout + struct.pack('>i', 2), # The number of requests + ]) + + total_len = len(msg_a_binary) + len(msg_b_binary) + topic1 = b"".join([ + struct.pack('>h6s', 6, b'topic1'), # The topic1 + struct.pack('>i', 1), # One message set + struct.pack('>i', 0), # Partition 0 + struct.pack('>i', total_len + 24), # Size of the incoming message set + struct.pack('>q', 0), # No offset specified + struct.pack('>i', len(msg_a_binary)), # Length of message + msg_a_binary, # Actual message + struct.pack('>q', 0), # No offset specified + struct.pack('>i', len(msg_b_binary)), # Length of message + msg_b_binary, # Actual message + ]) + + topic2 = b"".join([ + struct.pack('>h6s', 6, b'topic2'), # The topic1 + struct.pack('>i', 1), # One message set + struct.pack('>i', 1), # Partition 1 + struct.pack('>i', len(msg_c_binary) + 12), # Size of the incoming message set + struct.pack('>q', 0), # No offset specified + struct.pack('>i', len(msg_c_binary)), # Length of message + msg_c_binary, # Actual message + ]) + + expected1 = b"".join([ header, topic1, topic2 ]) + expected2 = b"".join([ header, topic2, topic1 ]) + + encoded = KafkaProtocol.encode_produce_request(b"client1", 2, requests, 2, 100) + self.assertIn(encoded, [ expected1, expected2 ]) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_produce_response(self): + t1 = b"topic1" + t2 = b"topic2" + _long = int + if six.PY2: + _long = long + encoded = struct.pack('>iih%dsiihqihqh%dsiihq' % (len(t1), len(t2)), + 2, 2, len(t1), t1, 2, 0, 0, _long(10), 1, 1, _long(20), + len(t2), t2, 1, 0, 0, _long(30)) + responses = list(KafkaProtocol.decode_produce_response(encoded)) + self.assertEqual(responses, + [ProduceResponse(t1, 0, 0, _long(10)), + ProduceResponse(t1, 1, 1, _long(20)), + ProduceResponse(t2, 0, 0, _long(30))]) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_fetch_request(self): + requests = [ + FetchRequest(b"topic1", 0, 10, 1024), + FetchRequest(b"topic2", 1, 20, 100), + ] + + header = b"".join([ + struct.pack('>i', 89), # The length of the message overall + struct.pack('>h', 1), # Msg Header, Message type = Fetch + struct.pack('>h', 0), # Msg Header, API version + struct.pack('>i', 3), # Msg Header, Correlation ID + struct.pack('>h7s', 7, b"client1"),# Msg Header, The client ID + struct.pack('>i', -1), # Replica Id + struct.pack('>i', 2), # Max wait time + struct.pack('>i', 100), # Min bytes + struct.pack('>i', 2), # Num requests + ]) + + topic1 = b"".join([ + struct.pack('>h6s', 6, b'topic1'),# Topic + struct.pack('>i', 1), # Num Payloads + struct.pack('>i', 0), # Partition 0 + struct.pack('>q', 10), # Offset + struct.pack('>i', 1024), # Max Bytes + ]) + + topic2 = b"".join([ + struct.pack('>h6s', 6, b'topic2'),# Topic + struct.pack('>i', 1), # Num Payloads + struct.pack('>i', 1), # Partition 0 + struct.pack('>q', 20), # Offset + struct.pack('>i', 100), # Max Bytes + ]) + + expected1 = b"".join([ header, topic1, topic2 ]) + expected2 = b"".join([ header, topic2, topic1 ]) + + encoded = KafkaProtocol.encode_fetch_request(b"client1", 3, requests, 2, 100) + self.assertIn(encoded, [ expected1, expected2 ]) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_fetch_response(self): + t1 = b"topic1" + t2 = b"topic2" + msgs = [create_message(msg) + for msg in [b"message1", b"hi", b"boo", b"foo", b"so fun!"]] + ms1 = KafkaProtocol._encode_message_set([msgs[0], msgs[1]]) + ms2 = KafkaProtocol._encode_message_set([msgs[2]]) + ms3 = KafkaProtocol._encode_message_set([msgs[3], msgs[4]]) + + encoded = struct.pack('>iih%dsiihqi%dsihqi%dsh%dsiihqi%ds' % + (len(t1), len(ms1), len(ms2), len(t2), len(ms3)), + 4, 2, len(t1), t1, 2, 0, 0, 10, len(ms1), ms1, 1, + 1, 20, len(ms2), ms2, len(t2), t2, 1, 0, 0, 30, + len(ms3), ms3) + + responses = list(KafkaProtocol.decode_fetch_response(encoded)) + def expand_messages(response): + return FetchResponsePayload(response.topic, response.partition, + response.error, response.highwaterMark, + list(response.messages)) + + expanded_responses = list(map(expand_messages, responses)) + expect = [FetchResponsePayload(t1, 0, 0, 10, [OffsetAndMessage(0, msgs[0]), + OffsetAndMessage(0, msgs[1])]), + FetchResponsePayload(t1, 1, 1, 20, [OffsetAndMessage(0, msgs[2])]), + FetchResponsePayload(t2, 0, 0, 30, [OffsetAndMessage(0, msgs[3]), + OffsetAndMessage(0, msgs[4])])] + self.assertEqual(expanded_responses, expect) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_metadata_request_no_topics(self): + expected = b"".join([ + struct.pack(">i", 17), # Total length of the request + struct.pack('>h', 3), # API key metadata fetch + struct.pack('>h', 0), # API version + struct.pack('>i', 4), # Correlation ID + struct.pack('>h3s', 3, b"cid"),# The client ID + struct.pack('>i', 0), # No topics, give all the data! + ]) + + encoded = KafkaProtocol.encode_metadata_request(b"cid", 4) + + self.assertEqual(encoded, expected) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_metadata_request_with_topics(self): + expected = b"".join([ + struct.pack(">i", 25), # Total length of the request + struct.pack('>h', 3), # API key metadata fetch + struct.pack('>h', 0), # API version + struct.pack('>i', 4), # Correlation ID + struct.pack('>h3s', 3, b"cid"),# The client ID + struct.pack('>i', 2), # Number of topics in the request + struct.pack('>h2s', 2, b"t1"), # Topic "t1" + struct.pack('>h2s', 2, b"t2"), # Topic "t2" + ]) + + encoded = KafkaProtocol.encode_metadata_request(b"cid", 4, [b"t1", b"t2"]) + + self.assertEqual(encoded, expected) + + def _create_encoded_metadata_response(self, brokers, topics): + encoded = [] + encoded.append(struct.pack('>ii', 3, len(brokers))) + for broker in brokers: + encoded.append(struct.pack('>ih%dsi' % len(broker.host), + broker.nodeId, len(broker.host), + broker.host, broker.port)) + + encoded.append(struct.pack('>i', len(topics))) + for topic in topics: + encoded.append(struct.pack('>hh%dsi' % len(topic.topic), + topic.error, len(topic.topic), + topic.topic, len(topic.partitions))) + for metadata in topic.partitions: + encoded.append(struct.pack('>hiii', metadata.error, + metadata.partition, metadata.leader, + len(metadata.replicas))) + if len(metadata.replicas) > 0: + encoded.append(struct.pack('>%di' % len(metadata.replicas), + *metadata.replicas)) + + encoded.append(struct.pack('>i', len(metadata.isr))) + if len(metadata.isr) > 0: + encoded.append(struct.pack('>%di' % len(metadata.isr), + *metadata.isr)) + return b''.join(encoded) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_metadata_response(self): + node_brokers = [ + BrokerMetadata(0, b"brokers1.kafka.rdio.com", 1000), + BrokerMetadata(1, b"brokers1.kafka.rdio.com", 1001), + BrokerMetadata(3, b"brokers2.kafka.rdio.com", 1000) + ] + + ''' + topic_partitions = [ + TopicMetadata(b"topic1", 0, [ + PartitionMetadata(b"topic1", 0, 1, (0, 2), (2,), 0), + PartitionMetadata(b"topic1", 1, 3, (0, 1), (0, 1), 1) + ]), + TopicMetadata(b"topic2", 1, [ + PartitionMetadata(b"topic2", 0, 0, (), (), 0), + ]), + ] + encoded = self._create_encoded_metadata_response(node_brokers, + topic_partitions) + decoded = KafkaProtocol.decode_metadata_response(encoded) + self.assertEqual(decoded, (node_brokers, topic_partitions)) + ''' + + def test_encode_consumer_metadata_request(self): + expected = b"".join([ + struct.pack(">i", 17), # Total length of the request + struct.pack('>h', 10), # API key consumer metadata + struct.pack('>h', 0), # API version + struct.pack('>i', 4), # Correlation ID + struct.pack('>h3s', 3, b"cid"),# The client ID + struct.pack('>h2s', 2, b"g1"), # Group "g1" + ]) + + encoded = KafkaProtocol.encode_consumer_metadata_request(b"cid", 4, b"g1") + + self.assertEqual(encoded, expected) + + def test_decode_consumer_metadata_response(self): + encoded = b"".join([ + struct.pack(">i", 42), # Correlation ID + struct.pack(">h", 0), # No Error + struct.pack(">i", 1), # Broker ID + struct.pack(">h23s", 23, b"brokers1.kafka.rdio.com"), # Broker Host + struct.pack(">i", 1000), # Broker Port + ]) + + results = KafkaProtocol.decode_consumer_metadata_response(encoded) + self.assertEqual(results, + ConsumerMetadataResponse(error = 0, nodeId = 1, host = b'brokers1.kafka.rdio.com', port = 1000) + ) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_offset_request(self): + expected = b"".join([ + struct.pack(">i", 21), # Total length of the request + struct.pack('>h', 2), # Message type = offset fetch + struct.pack('>h', 0), # API version + struct.pack('>i', 4), # Correlation ID + struct.pack('>h3s', 3, b"cid"), # The client ID + struct.pack('>i', -1), # Replica Id + struct.pack('>i', 0), # No topic/partitions + ]) + + encoded = KafkaProtocol.encode_offset_request(b"cid", 4) + + self.assertEqual(encoded, expected) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_offset_request__no_payload(self): + expected = b"".join([ + struct.pack(">i", 65), # Total length of the request + + struct.pack('>h', 2), # Message type = offset fetch + struct.pack('>h', 0), # API version + struct.pack('>i', 4), # Correlation ID + struct.pack('>h3s', 3, b"cid"), # The client ID + struct.pack('>i', -1), # Replica Id + struct.pack('>i', 1), # Num topics + struct.pack(">h6s", 6, b"topic1"),# Topic for the request + struct.pack(">i", 2), # Two partitions + + struct.pack(">i", 3), # Partition 3 + struct.pack(">q", -1), # No time offset + struct.pack(">i", 1), # One offset requested + + struct.pack(">i", 4), # Partition 3 + struct.pack(">q", -1), # No time offset + struct.pack(">i", 1), # One offset requested + ]) + + encoded = KafkaProtocol.encode_offset_request(b"cid", 4, [ + OffsetRequest(b'topic1', 3, -1, 1), + OffsetRequest(b'topic1', 4, -1, 1), + ]) + + self.assertEqual(encoded, expected) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_offset_response(self): + encoded = b"".join([ + struct.pack(">i", 42), # Correlation ID + struct.pack(">i", 1), # One topics + struct.pack(">h6s", 6, b"topic1"),# First topic + struct.pack(">i", 2), # Two partitions + + struct.pack(">i", 2), # Partition 2 + struct.pack(">h", 0), # No error + struct.pack(">i", 1), # One offset + struct.pack(">q", 4), # Offset 4 + + struct.pack(">i", 4), # Partition 4 + struct.pack(">h", 0), # No error + struct.pack(">i", 1), # One offset + struct.pack(">q", 8), # Offset 8 + ]) + + results = KafkaProtocol.decode_offset_response(encoded) + self.assertEqual(set(results), set([ + OffsetResponse(topic = b'topic1', partition = 2, error = 0, offsets=(4,)), + OffsetResponse(topic = b'topic1', partition = 4, error = 0, offsets=(8,)), + ])) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_offset_commit_request(self): + header = b"".join([ + struct.pack('>i', 99), # Total message length + + struct.pack('>h', 8), # Message type = offset commit + struct.pack('>h', 0), # API version + struct.pack('>i', 42), # Correlation ID + struct.pack('>h9s', 9, b"client_id"),# The client ID + struct.pack('>h8s', 8, b"group_id"), # The group to commit for + struct.pack('>i', 2), # Num topics + ]) + + topic1 = b"".join([ + struct.pack(">h6s", 6, b"topic1"), # Topic for the request + struct.pack(">i", 2), # Two partitions + struct.pack(">i", 0), # Partition 0 + struct.pack(">q", 123), # Offset 123 + struct.pack(">h", -1), # Null metadata + struct.pack(">i", 1), # Partition 1 + struct.pack(">q", 234), # Offset 234 + struct.pack(">h", -1), # Null metadata + ]) + + topic2 = b"".join([ + struct.pack(">h6s", 6, b"topic2"), # Topic for the request + struct.pack(">i", 1), # One partition + struct.pack(">i", 2), # Partition 2 + struct.pack(">q", 345), # Offset 345 + struct.pack(">h", -1), # Null metadata + ]) + + expected1 = b"".join([ header, topic1, topic2 ]) + expected2 = b"".join([ header, topic2, topic1 ]) + + encoded = KafkaProtocol.encode_offset_commit_request(b"client_id", 42, b"group_id", [ + OffsetCommitRequest(b"topic1", 0, 123, None), + OffsetCommitRequest(b"topic1", 1, 234, None), + OffsetCommitRequest(b"topic2", 2, 345, None), + ]) + + self.assertIn(encoded, [ expected1, expected2 ]) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_offset_commit_response(self): + encoded = b"".join([ + struct.pack(">i", 42), # Correlation ID + struct.pack(">i", 1), # One topic + struct.pack(">h6s", 6, b"topic1"),# First topic + struct.pack(">i", 2), # Two partitions + + struct.pack(">i", 2), # Partition 2 + struct.pack(">h", 0), # No error + + struct.pack(">i", 4), # Partition 4 + struct.pack(">h", 0), # No error + ]) + + results = KafkaProtocol.decode_offset_commit_response(encoded) + self.assertEqual(set(results), set([ + OffsetCommitResponse(topic = b'topic1', partition = 2, error = 0), + OffsetCommitResponse(topic = b'topic1', partition = 4, error = 0), + ])) + + @unittest.skip('needs updating for new protocol classes') + def test_encode_offset_fetch_request(self): + header = b"".join([ + struct.pack('>i', 69), # Total message length + struct.pack('>h', 9), # Message type = offset fetch + struct.pack('>h', 0), # API version + struct.pack('>i', 42), # Correlation ID + struct.pack('>h9s', 9, b"client_id"),# The client ID + struct.pack('>h8s', 8, b"group_id"), # The group to commit for + struct.pack('>i', 2), # Num topics + ]) + + topic1 = b"".join([ + struct.pack(">h6s", 6, b"topic1"), # Topic for the request + struct.pack(">i", 2), # Two partitions + struct.pack(">i", 0), # Partition 0 + struct.pack(">i", 1), # Partition 1 + ]) + + topic2 = b"".join([ + struct.pack(">h6s", 6, b"topic2"), # Topic for the request + struct.pack(">i", 1), # One partitions + struct.pack(">i", 2), # Partition 2 + ]) + + expected1 = b"".join([ header, topic1, topic2 ]) + expected2 = b"".join([ header, topic2, topic1 ]) + + encoded = KafkaProtocol.encode_offset_fetch_request(b"client_id", 42, b"group_id", [ + OffsetFetchRequest(b"topic1", 0), + OffsetFetchRequest(b"topic1", 1), + OffsetFetchRequest(b"topic2", 2), + ]) + + self.assertIn(encoded, [ expected1, expected2 ]) + + @unittest.skip('needs updating for new protocol classes') + def test_decode_offset_fetch_response(self): + encoded = b"".join([ + struct.pack(">i", 42), # Correlation ID + struct.pack(">i", 1), # One topics + struct.pack(">h6s", 6, b"topic1"),# First topic + struct.pack(">i", 2), # Two partitions + + struct.pack(">i", 2), # Partition 2 + struct.pack(">q", 4), # Offset 4 + struct.pack(">h4s", 4, b"meta"), # Metadata + struct.pack(">h", 0), # No error + + struct.pack(">i", 4), # Partition 4 + struct.pack(">q", 8), # Offset 8 + struct.pack(">h4s", 4, b"meta"), # Metadata + struct.pack(">h", 0), # No error + ]) + + results = KafkaProtocol.decode_offset_fetch_response(encoded) + self.assertEqual(set(results), set([ + OffsetFetchResponse(topic = b'topic1', partition = 2, offset = 4, error = 0, metadata = b"meta"), + OffsetFetchResponse(topic = b'topic1', partition = 4, offset = 8, error = 0, metadata = b"meta"), + ])) + + @contextmanager + def mock_create_message_fns(self): + import kafka.protocol + with patch.object(kafka.protocol.legacy, "create_message", + return_value=sentinel.message): + with patch.object(kafka.protocol.legacy, "create_gzip_message", + return_value=sentinel.gzip_message): + with patch.object(kafka.protocol.legacy, "create_snappy_message", + return_value=sentinel.snappy_message): + yield + + def test_create_message_set(self): + messages = [(1, "k1"), (2, "k2"), (3, "k3")] + + # Default codec is CODEC_NONE. Expect list of regular messages. + expect = [sentinel.message] * len(messages) + with self.mock_create_message_fns(): + message_set = create_message_set(messages) + self.assertEqual(message_set, expect) + + # CODEC_NONE: Expect list of regular messages. + expect = [sentinel.message] * len(messages) + with self.mock_create_message_fns(): + message_set = create_message_set(messages, CODEC_NONE) + self.assertEqual(message_set, expect) + + # CODEC_GZIP: Expect list of one gzip-encoded message. + expect = [sentinel.gzip_message] + with self.mock_create_message_fns(): + message_set = create_message_set(messages, CODEC_GZIP) + self.assertEqual(message_set, expect) + + # CODEC_SNAPPY: Expect list of one snappy-encoded message. + expect = [sentinel.snappy_message] + with self.mock_create_message_fns(): + message_set = create_message_set(messages, CODEC_SNAPPY) + self.assertEqual(message_set, expect) + + # Unknown codec should raise UnsupportedCodecError. + with self.assertRaises(UnsupportedCodecError): + create_message_set(messages, -1) |