summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMahendra M <mahendra.m@gmail.com>2013-05-29 14:20:58 +0530
committerDavid Arthur <mumrah@gmail.com>2013-05-29 16:38:39 -0400
commit2c257eeb1f02748840a8f4535d8d2a88ef5235f2 (patch)
tree73ff852a845bdb947f0820ea1ff64c7b72436c9b
parent97962d4af3c0e57a549f432a7604f8ba0c5914fd (diff)
downloadkafka-python-2c257eeb1f02748840a8f4535d8d2a88ef5235f2.tar.gz
PEP8-ify most of the files
consumer.py and conn.py will be done later after pending merges
-rw-r--r--kafka/__init__.py2
-rw-r--r--kafka/codec.py8
-rw-r--r--kafka/common.py45
-rw-r--r--kafka/producer.py4
-rw-r--r--kafka/protocol.py231
-rw-r--r--kafka/queue.py96
-rw-r--r--kafka/util.py51
7 files changed, 309 insertions, 128 deletions
diff --git a/kafka/__init__.py b/kafka/__init__.py
index 5198d2f..d229169 100644
--- a/kafka/__init__.py
+++ b/kafka/__init__.py
@@ -4,7 +4,7 @@ __author__ = 'David Arthur'
__license__ = 'Apache License 2.0'
__copyright__ = 'Copyright 2012, David Arthur under Apache License, v2.0'
-from kafka.client import KafkaClient
+from kafka.client import KafkaClient
from kafka.conn import KafkaConnection
from kafka.protocol import (
create_message, create_gzip_message, create_snappy_message
diff --git a/kafka/codec.py b/kafka/codec.py
index 83f3c0b..c7d3992 100644
--- a/kafka/codec.py
+++ b/kafka/codec.py
@@ -6,10 +6,11 @@ log = logging.getLogger("kafka.codec")
try:
import snappy
- hasSnappy=True
+ hasSnappy = True
except ImportError:
log.warn("Snappy codec not available")
- hasSnappy=False
+ hasSnappy = False
+
def gzip_encode(payload):
buf = StringIO()
@@ -21,6 +22,7 @@ def gzip_encode(payload):
buf.close()
return out
+
def gzip_decode(payload):
buf = StringIO(payload)
f = gzip.GzipFile(fileobj=buf, mode='r')
@@ -29,11 +31,13 @@ def gzip_decode(payload):
buf.close()
return out
+
def snappy_encode(payload):
if not hasSnappy:
raise NotImplementedError("Snappy codec not available")
return snappy.compress(payload)
+
def snappy_decode(payload):
if not hasSnappy:
raise NotImplementedError("Snappy codec not available")
diff --git a/kafka/common.py b/kafka/common.py
index 447684f..9aab8fc 100644
--- a/kafka/common.py
+++ b/kafka/common.py
@@ -5,26 +5,49 @@ from collections import namedtuple
###############
# Request payloads
-ProduceRequest = namedtuple("ProduceRequest", ["topic", "partition", "messages"])
-FetchRequest = namedtuple("FetchRequest", ["topic", "partition", "offset", "max_bytes"])
-OffsetRequest = namedtuple("OffsetRequest", ["topic", "partition", "time", "max_offsets"])
-OffsetCommitRequest = namedtuple("OffsetCommitRequest", ["topic", "partition", "offset", "metadata"])
+ProduceRequest = namedtuple("ProduceRequest",
+ ["topic", "partition", "messages"])
+
+FetchRequest = namedtuple("FetchRequest",
+ ["topic", "partition", "offset", "max_bytes"])
+
+OffsetRequest = namedtuple("OffsetRequest",
+ ["topic", "partition", "time", "max_offsets"])
+
+OffsetCommitRequest = namedtuple("OffsetCommitRequest",
+ ["topic", "partition", "offset", "metadata"])
+
OffsetFetchRequest = namedtuple("OffsetFetchRequest", ["topic", "partition"])
# Response payloads
-ProduceResponse = namedtuple("ProduceResponse", ["topic", "partition", "error", "offset"])
-FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error", "highwaterMark", "messages"])
-OffsetResponse = namedtuple("OffsetResponse", ["topic", "partition", "error", "offsets"])
-OffsetCommitResponse = namedtuple("OffsetCommitResponse", ["topic", "partition", "error"])
-OffsetFetchResponse = namedtuple("OffsetFetchResponse", ["topic", "partition", "offset", "metadata", "error"])
+ProduceResponse = namedtuple("ProduceResponse",
+ ["topic", "partition", "error", "offset"])
+
+FetchResponse = namedtuple("FetchResponse", ["topic", "partition", "error",
+ "highwaterMark", "messages"])
+
+OffsetResponse = namedtuple("OffsetResponse",
+ ["topic", "partition", "error", "offsets"])
+
+OffsetCommitResponse = namedtuple("OffsetCommitResponse",
+ ["topic", "partition", "error"])
+
+OffsetFetchResponse = namedtuple("OffsetFetchResponse",
+ ["topic", "partition", "offset",
+ "metadata", "error"])
+
BrokerMetadata = namedtuple("BrokerMetadata", ["nodeId", "host", "port"])
-PartitionMetadata = namedtuple("PartitionMetadata", ["topic", "partition", "leader", "replicas", "isr"])
-# Other useful structs
+PartitionMetadata = namedtuple("PartitionMetadata",
+ ["topic", "partition", "leader",
+ "replicas", "isr"])
+
+# Other useful structs
OffsetAndMessage = namedtuple("OffsetAndMessage", ["offset", "message"])
Message = namedtuple("Message", ["magic", "attributes", "key", "value"])
TopicAndPartition = namedtuple("TopicAndPartition", ["topic", "partition"])
+
class ErrorMapping(object):
# Many of these are not actually used by the client
UNKNOWN = -1
diff --git a/kafka/producer.py b/kafka/producer.py
index 47e690b..589eb11 100644
--- a/kafka/producer.py
+++ b/kafka/producer.py
@@ -6,6 +6,7 @@ from kafka.protocol import create_message
log = logging.getLogger("kafka")
+
class SimpleProducer(object):
"""
A simple, round-robbin producer. Each message goes to exactly one partition
@@ -18,6 +19,7 @@ class SimpleProducer(object):
def send_messages(self, *msg):
req = ProduceRequest(self.topic, self.next_partition.next(),
- messages=[create_message(m) for m in msg])
+ messages=[create_message(m) for m in msg])
+
resp = self.client.send_produce_request([req])[0]
assert resp.error == 0
diff --git a/kafka/protocol.py b/kafka/protocol.py
index 94a7f2a..6bd5c73 100644
--- a/kafka/protocol.py
+++ b/kafka/protocol.py
@@ -18,10 +18,12 @@ from kafka.util import (
log = logging.getLogger("kafka")
+
class KafkaProtocol(object):
"""
- Class to encapsulate all of the protocol encoding/decoding. This class does not
- have any state associated with it, it is purely for organization.
+ Class to encapsulate all of the protocol encoding/decoding.
+ This class does not have any state associated with it, it is purely
+ for organization.
"""
PRODUCE_KEY = 0
FETCH_KEY = 1
@@ -44,18 +46,18 @@ class KafkaProtocol(object):
"""
Encode the common request envelope
"""
- return struct.pack('>hhih%ds' % len(client_id),
+ return struct.pack('>hhih%ds' % len(client_id),
request_key, # ApiKey
0, # ApiVersion
correlation_id, # CorrelationId
- len(client_id), #
+ len(client_id),
client_id) # ClientId
@classmethod
def _encode_message_set(cls, messages):
"""
- Encode a MessageSet. Unlike other arrays in the protocol, MessageSets are
- not length-prefixed
+ Encode a MessageSet. Unlike other arrays in the protocol,
+ MessageSets are not length-prefixed
Format
======
@@ -66,7 +68,8 @@ class KafkaProtocol(object):
message_set = ""
for message in messages:
encoded_message = KafkaProtocol._encode_message(message)
- message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
+ message_set += struct.pack('>qi%ds' % len(encoded_message), 0,
+ len(encoded_message), encoded_message)
return message_set
@classmethod
@@ -74,10 +77,10 @@ class KafkaProtocol(object):
"""
Encode a single message.
- The magic number of a message is a format version number. The only supported
- magic number right now is zero
+ The magic number of a message is a format version number.
+ The only supported magic number right now is zero
- Format
+ Format
======
Message => Crc MagicByte Attributes Key Value
Crc => int32
@@ -96,24 +99,27 @@ class KafkaProtocol(object):
raise Exception("Unexpected magic number: %d" % message.magic)
return msg
-
@classmethod
def _decode_message_set_iter(cls, data):
"""
Iteratively decode a MessageSet
- Reads repeated elements of (offset, message), calling decode_message to decode a
- single message. Since compressed messages contain futher MessageSets, these two methods
- have been decoupled so that they may recurse easily.
+ Reads repeated elements of (offset, message), calling decode_message
+ to decode a single message. Since compressed messages contain futher
+ MessageSets, these two methods have been decoupled so that they may
+ recurse easily.
"""
cur = 0
while cur < len(data):
try:
((offset, ), cur) = relative_unpack('>q', data, cur)
(msg, cur) = read_int_string(data, cur)
- for (offset, message) in KafkaProtocol._decode_message(msg, offset):
+ for (offset, message) in KafkaProtocol._decode_message(msg,
+ offset):
yield OffsetAndMessage(offset, message)
- except BufferUnderflowError: # If we get a partial read of a message, stop
+
+ except BufferUnderflowError:
+ # If we get a partial read of a message, stop
raise StopIteration()
@classmethod
@@ -121,9 +127,10 @@ class KafkaProtocol(object):
"""
Decode a single Message
- The only caller of this method is decode_message_set_iter. They are decoupled to
- support nested messages (compressed MessageSets). The offset is actually read from
- decode_message_set_iter (it is part of the MessageSet payload).
+ The only caller of this method is decode_message_set_iter.
+ They are decoupled to support nested messages (compressed MessageSets).
+ The offset is actually read from decode_message_set_iter (it is part
+ of the MessageSet payload).
"""
((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
if crc != zlib.crc32(data[4:]):
@@ -131,23 +138,29 @@ class KafkaProtocol(object):
(key, cur) = read_int_string(data, cur)
(value, cur) = read_int_string(data, cur)
- if att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_NONE:
+
+ codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK
+
+ if codec == KafkaProtocol.CODEC_NONE:
yield (offset, Message(magic, att, key, value))
- elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_GZIP:
+
+ elif codec == KafkaProtocol.CODEC_GZIP:
gz = gzip_decode(value)
- for (offset, message) in KafkaProtocol._decode_message_set_iter(gz):
- yield (offset, message)
- elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_SNAPPY:
+ for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
+ yield (offset, msg)
+
+ elif codec == KafkaProtocol.CODEC_SNAPPY:
snp = snappy_decode(value)
- for (offset, message) in KafkaProtocol._decode_message_set_iter(snp):
- yield (offset, message)
+ for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
+ yield (offset, msg)
##################
# Public API #
##################
@classmethod
- def encode_produce_request(cls, client_id, correlation_id, payloads=[], acks=1, timeout=1000):
+ def encode_produce_request(cls, client_id, correlation_id,
+ payloads=None, acks=1, timeout=1000):
"""
Encode some ProduceRequest structs
@@ -161,39 +174,53 @@ class KafkaProtocol(object):
1: written to disk by the leader
2+: waits for this many number of replicas to sync
-1: waits for all replicas to be in sync
- timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout
+ timeout: Maximum time the server will wait for acks from replicas.
+ This is _not_ a socket timeout
"""
+ payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY)
+
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.PRODUCE_KEY)
+
message += struct.pack('>hii', acks, timeout, len(grouped_payloads))
+
for topic, topic_payloads in grouped_payloads.items():
- message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads))
+ message += struct.pack('>h%dsi' % len(topic),
+ len(topic), topic, len(topic_payloads))
+
for partition, payload in topic_payloads.items():
- message_set = KafkaProtocol._encode_message_set(payload.messages)
- message += struct.pack('>ii%ds' % len(message_set), partition, len(message_set), message_set)
+ msg_set = KafkaProtocol._encode_message_set(payload.messages)
+ message += struct.pack('>ii%ds' % len(msg_set), partition,
+ len(msg_set), msg_set)
+
return struct.pack('>i%ds' % len(message), len(message), message)
@classmethod
def decode_produce_response(cls, data):
"""
- Decode bytes to a ProduceResponse
+ Decode bytes to a ProduceResponse
Params
======
data: bytes to decode
"""
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
+
for i in range(num_topics):
((strlen,), cur) = relative_unpack('>h', data, cur)
- topic = data[cur:cur+strlen]
+ topic = data[cur:cur + strlen]
cur += strlen
((num_partitions,), cur) = relative_unpack('>i', data, cur)
for i in range(num_partitions):
- ((partition, error, offset), cur) = relative_unpack('>ihq', data, cur)
+ ((partition, error, offset), cur) = relative_unpack('>ihq',
+ data, cur)
+
yield ProduceResponse(topic, partition, error, offset)
@classmethod
- def encode_fetch_request(cls, client_id, correlation_id, payloads=[], max_wait_time=100, min_bytes=4096):
+ def encode_fetch_request(cls, client_id, correlation_id, payloads=None,
+ max_wait_time=100, min_bytes=4096):
"""
Encodes some FetchRequest structs
@@ -203,17 +230,27 @@ class KafkaProtocol(object):
correlation_id: string
payloads: list of FetchRequest
max_wait_time: int, how long to block waiting on min_bytes of data
- min_bytes: int, the minimum number of bytes to accumulate before returning the response
+ min_bytes: int, the minimum number of bytes to accumulate before
+ returning the response
"""
-
+
+ payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.FETCH_KEY)
- message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) # -1 is the replica id
+
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.FETCH_KEY)
+
+ # -1 is the replica id
+ message += struct.pack('>iiii', -1, max_wait_time, min_bytes,
+ len(grouped_payloads))
+
for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
for partition, payload in topic_payloads.items():
- message += struct.pack('>iqi', partition, payload.offset, payload.max_bytes)
+ message += struct.pack('>iqi', partition, payload.offset,
+ payload.max_bytes)
+
return struct.pack('>i%ds' % len(message), len(message), message)
@classmethod
@@ -226,25 +263,41 @@ class KafkaProtocol(object):
data: bytes to decode
"""
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
+
for i in range(num_topics):
(topic, cur) = read_short_string(data, cur)
((num_partitions,), cur) = relative_unpack('>i', data, cur)
+
for i in range(num_partitions):
- ((partition, error, highwater_mark_offset), cur) = relative_unpack('>ihq', data, cur)
+ ((partition, error, highwater_mark_offset), cur) = \
+ relative_unpack('>ihq', data, cur)
+
(message_set, cur) = read_int_string(data, cur)
- yield FetchResponse(topic, partition, error, highwater_mark_offset,
+
+ yield FetchResponse(
+ topic, partition, error,
+ highwater_mark_offset,
KafkaProtocol._decode_message_set_iter(message_set))
@classmethod
- def encode_offset_request(cls, client_id, correlation_id, payloads=[]):
+ def encode_offset_request(cls, client_id, correlation_id, payloads=None):
+ payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_KEY)
- message += struct.pack('>ii', -1, len(grouped_payloads)) # -1 is the replica id
+
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.OFFSET_KEY)
+
+ # -1 is the replica id
+ message += struct.pack('>ii', -1, len(grouped_payloads))
+
for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
+
for partition, payload in topic_payloads.items():
- message += struct.pack('>iqi', partition, payload.time, payload.max_offsets)
+ message += struct.pack('>iqi', partition, payload.time,
+ payload.max_offsets)
+
return struct.pack('>i%ds' % len(message), len(message), message)
@classmethod
@@ -257,19 +310,24 @@ class KafkaProtocol(object):
data: bytes to decode
"""
((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0)
+
for i in range(num_topics):
(topic, cur) = read_short_string(data, cur)
((num_partitions,), cur) = relative_unpack('>i', data, cur)
+
for i in range(num_partitions):
- ((partition, error, num_offsets,), cur) = relative_unpack('>ihi', data, cur)
+ ((partition, error, num_offsets,), cur) = \
+ relative_unpack('>ihi', data, cur)
+
offsets = []
for j in range(num_offsets):
((offset,), cur) = relative_unpack('>q', data, cur)
offsets.append(offset)
+
yield OffsetResponse(topic, partition, error, tuple(offsets))
@classmethod
- def encode_metadata_request(cls, client_id, correlation_id, topics=[]):
+ def encode_metadata_request(cls, client_id, correlation_id, topics=None):
"""
Encode a MetadataRequest
@@ -279,10 +337,15 @@ class KafkaProtocol(object):
correlation_id: string
topics: list of strings
"""
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY)
+ topics = [] if topics is None else topics
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.METADATA_KEY)
+
message += struct.pack('>i', len(topics))
+
for topic in topics:
message += struct.pack('>h%ds' % len(topic), len(topic), topic)
+
return write_int_string(message)
@classmethod
@@ -307,22 +370,34 @@ class KafkaProtocol(object):
# Topic info
((num_topics,), cur) = relative_unpack('>i', data, cur)
topicMetadata = {}
+
for i in range(num_topics):
((topicError,), cur) = relative_unpack('>h', data, cur)
(topicName, cur) = read_short_string(data, cur)
((num_partitions,), cur) = relative_unpack('>i', data, cur)
partitionMetadata = {}
+
for j in range(num_partitions):
- ((partitionErrorCode, partition, leader, numReplicas), cur) = relative_unpack('>hiii', data, cur)
- (replicas, cur) = relative_unpack('>%di' % numReplicas, data, cur)
+ ((partitionErrorCode, partition, leader, numReplicas), cur) = \
+ relative_unpack('>hiii', data, cur)
+
+ (replicas, cur) = relative_unpack('>%di' % numReplicas,
+ data, cur)
+
((numIsr,), cur) = relative_unpack('>i', data, cur)
(isr, cur) = relative_unpack('>%di' % numIsr, data, cur)
- partitionMetadata[partition] = PartitionMetadata(topicName, partition, leader, replicas, isr)
+
+ partitionMetadata[partition] = \
+ PartitionMetadata(topicName, partition, leader,
+ replicas, isr)
+
topicMetadata[topicName] = partitionMetadata
+
return (brokers, topicMetadata)
@classmethod
- def encode_offset_commit_request(cls, client_id, correlation_id, group, payloads):
+ def encode_offset_commit_request(cls, client_id, correlation_id,
+ group, payloads):
"""
Encode some OffsetCommitRequest structs
@@ -333,16 +408,21 @@ class KafkaProtocol(object):
group: string, the consumer group you are committing offsets for
payloads: list of OffsetCommitRequest
"""
- grouped_payloads= group_by_topic_and_partition(payloads)
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_COMMIT_KEY)
+ grouped_payloads = group_by_topic_and_partition(payloads)
+
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.OFFSET_COMMIT_KEY)
message += write_short_string(group)
message += struct.pack('>i', len(grouped_payloads))
+
for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
+
for partition, payload in topic_payloads.items():
message += struct.pack('>iq', partition, payload.offset)
message += write_short_string(payload.metadata)
+
return struct.pack('>i%ds' % len(message), len(message), message)
@classmethod
@@ -357,15 +437,18 @@ class KafkaProtocol(object):
((correlation_id,), cur) = relative_unpack('>i', data, 0)
(client_id, cur) = read_short_string(data, cur)
((num_topics,), cur) = relative_unpack('>i', data, cur)
+
for i in xrange(num_topics):
(topic, cur) = read_short_string(data, cur)
((num_partitions,), cur) = relative_unpack('>i', data, cur)
+
for i in xrange(num_partitions):
((partition, error), cur) = relative_unpack('>ih', data, cur)
yield OffsetCommitResponse(topic, partition, error)
@classmethod
- def encode_offset_fetch_request(cls, client_id, correlation_id, group, payloads):
+ def encode_offset_fetch_request(cls, client_id, correlation_id,
+ group, payloads):
"""
Encode some OffsetFetchRequest structs
@@ -377,14 +460,19 @@ class KafkaProtocol(object):
payloads: list of OffsetFetchRequest
"""
grouped_payloads = group_by_topic_and_partition(payloads)
- message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_FETCH_KEY)
+ message = cls._encode_message_header(client_id, correlation_id,
+ KafkaProtocol.OFFSET_FETCH_KEY)
+
message += write_short_string(group)
message += struct.pack('>i', len(grouped_payloads))
+
for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
+
for partition, payload in topic_payloads.items():
message += struct.pack('>i', partition)
+
return struct.pack('>i%ds' % len(message), len(message), message)
@classmethod
@@ -400,14 +488,19 @@ class KafkaProtocol(object):
((correlation_id,), cur) = relative_unpack('>i', data, 0)
(client_id, cur) = read_short_string(data, cur)
((num_topics,), cur) = relative_unpack('>i', data, cur)
+
for i in range(num_topics):
(topic, cur) = read_short_string(data, cur)
((num_partitions,), cur) = relative_unpack('>i', data, cur)
+
for i in range(num_partitions):
((partition, offset), cur) = relative_unpack('>iq', data, cur)
(metadata, cur) = read_short_string(data, cur)
((error,), cur) = relative_unpack('>h', data, cur)
- yield OffsetFetchResponse(topic, partition, offset, metadata, error)
+
+ yield OffsetFetchResponse(topic, partition, offset,
+ metadata, error)
+
def create_message(payload, key=None):
"""
@@ -420,6 +513,7 @@ def create_message(payload, key=None):
"""
return Message(0, 0, key, payload)
+
def create_gzip_message(payloads, key=None):
"""
Construct a Gzipped Message containing multiple Messages
@@ -433,9 +527,13 @@ def create_gzip_message(payloads, key=None):
key: bytes, a key used for partition routing (optional)
"""
message_set = KafkaProtocol._encode_message_set(
- [create_message(payload) for payload in payloads])
- gzipped = gzip_encode(message_set)
- return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
+ [create_message(payload) for payload in payloads])
+
+ gzipped = gzip_encode(message_set)
+ codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP
+
+ return Message(0, 0x00 | codec, key, gzipped)
+
def create_snappy_message(payloads, key=None):
"""
@@ -450,6 +548,9 @@ def create_snappy_message(payloads, key=None):
key: bytes, a key used for partition routing (optional)
"""
message_set = KafkaProtocol._encode_message_set(
- [create_message(payload) for payload in payloads])
- snapped = snappy_encode(message_set)
- return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped)
+ [create_message(payload) for payload in payloads])
+
+ snapped = snappy_encode(message_set)
+ codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY
+
+ return Message(0, 0x00 | codec, key, snapped)
diff --git a/kafka/queue.py b/kafka/queue.py
index 6fe9eaa..3bd7dca 100644
--- a/kafka/queue.py
+++ b/kafka/queue.py
@@ -10,8 +10,10 @@ log = logging.getLogger("kafka")
raise NotImplementedError("Still need to refactor this class")
+
class KafkaConsumerProcess(Process):
- def __init__(self, client, topic, partition, out_queue, barrier, consumer_fetch_size=1024, consumer_sleep=200):
+ def __init__(self, client, topic, partition, out_queue, barrier,
+ consumer_fetch_size=1024, consumer_sleep=200):
self.client = copy(client)
self.topic = topic
self.partition = partition
@@ -23,29 +25,40 @@ class KafkaConsumerProcess(Process):
Process.__init__(self)
def __str__(self):
- return "[KafkaConsumerProcess: topic=%s, partition=%s, sleep=%s]" % (
- self.topic, self.partition, self.consumer_sleep)
+ return "[KafkaConsumerProcess: topic=%s, partition=%s, sleep=%s]" % \
+ (self.topic, self.partition, self.consumer_sleep)
def run(self):
self.barrier.wait()
log.info("Starting %s" % self)
- fetchRequest = FetchRequest(self.topic, self.partition, offset=0, size=self.consumer_fetch_size)
+ fetchRequest = FetchRequest(self.topic, self.partition,
+ offset=0, size=self.consumer_fetch_size)
+
while True:
- if self.barrier.is_set() == False:
+ if self.barrier.is_set() is False:
log.info("Shutdown %s" % self)
self.client.close()
break
+
lastOffset = fetchRequest.offset
(messages, fetchRequest) = self.client.get_message_set(fetchRequest)
+
if fetchRequest.offset == lastOffset:
- log.debug("No more data for this partition, sleeping a bit (200ms)")
+ log.debug("No more data for this partition, "
+ "sleeping a bit (200ms)")
time.sleep(self.consumer_sleep)
continue
+
for message in messages:
self.out_queue.put(message)
+
class KafkaProducerProcess(Process):
- def __init__(self, client, topic, in_queue, barrier, producer_flush_buffer=500, producer_flush_timeout=2000, producer_timeout=100):
+ def __init__(self, client, topic, in_queue, barrier,
+ producer_flush_buffer=500,
+ producer_flush_timeout=2000,
+ producer_timeout=100):
+
self.client = copy(client)
self.topic = topic
self.in_queue = in_queue
@@ -57,8 +70,10 @@ class KafkaProducerProcess(Process):
Process.__init__(self)
def __str__(self):
- return "[KafkaProducerProcess: topic=%s, flush_buffer=%s, flush_timeout=%s, timeout=%s]" % (
- self.topic, self.producer_flush_buffer, self.producer_flush_timeout, self.producer_timeout)
+ return "[KafkaProducerProcess: topic=%s, flush_buffer=%s, \
+ flush_timeout=%s, timeout=%s]" % (
+ self.topic, self.producer_flush_buffer,
+ self.producer_flush_timeout, self.producer_timeout)
def run(self):
self.barrier.wait()
@@ -67,36 +82,47 @@ class KafkaProducerProcess(Process):
last_produce = time.time()
def flush(messages):
- self.client.send_message_set(ProduceRequest(self.topic, -1, messages))
+ self.client.send_message_set(ProduceRequest(self.topic, -1,
+ messages))
del messages[:]
while True:
- if self.barrier.is_set() == False:
+ if self.barrier.is_set() is False:
log.info("Shutdown %s, flushing messages" % self)
flush(messages)
self.client.close()
break
+
if len(messages) > self.producer_flush_buffer:
- log.debug("Message count threashold reached. Flushing messages")
+ log.debug("Message count threshold reached. Flushing messages")
flush(messages)
last_produce = time.time()
+
elif (time.time() - last_produce) > self.producer_flush_timeout:
log.debug("Producer timeout reached. Flushing messages")
flush(messages)
last_produce = time.time()
+
try:
- messages.append(KafkaClient.create_message(self.in_queue.get(True, self.producer_timeout)))
+ msg = KafkaClient.create_message(self.in_queue.get(True,
+ self.producer_timeout))
+ messages.append(msg)
+
except Empty:
continue
+
class KafkaQueue(object):
- def __init__(self, client, topic, partitions, producer_config={}, consumer_config={}):
+ def __init__(self, client, topic, partitions,
+ producer_config=None, consumer_config=None):
"""
- KafkaQueue a Queue-like object backed by a Kafka producer and some number of consumers
+ KafkaQueue a Queue-like object backed by a Kafka producer and some
+ number of consumers
- Messages are eagerly loaded by the consumer in batches of size consumer_fetch_size.
- Messages are buffered in the producer thread until producer_flush_timeout or
- producer_flush_buffer is reached.
+ Messages are eagerly loaded by the consumer in batches of size
+ consumer_fetch_size.
+ Messages are buffered in the producer thread until
+ producer_flush_timeout or producer_flush_buffer is reached.
Params
======
@@ -108,21 +134,26 @@ class KafkaQueue(object):
Consumer Config
===============
- consumer_fetch_size: int, number of bytes to fetch in one call to Kafka. Default
- is 1024
- consumer_sleep: int, time in milliseconds a consumer should sleep when it reaches
- the end of a partition. Default is 200
+ consumer_fetch_size: int, number of bytes to fetch in one call
+ to Kafka. Default is 1024
+ consumer_sleep: int, time in milliseconds a consumer should sleep
+ when it reaches the end of a partition. Default is 200
Producer Config
===============
- producer_timeout: int, time in milliseconds a producer should wait for messages to
- enqueue for producing. Default is 100
- producer_flush_timeout: int, time in milliseconds a producer should allow messages
- to accumulate before sending to Kafka. Default is 2000
- producer_flush_buffer: int, number of messages a producer should allow to accumulate.
- Default is 500
-
+ producer_timeout: int, time in milliseconds a producer should
+ wait for messages to enqueue for producing.
+ Default is 100
+ producer_flush_timeout: int, time in milliseconds a producer
+ should allow messages to accumulate before
+ sending to Kafka. Default is 2000
+ producer_flush_buffer: int, number of messages a producer should
+ allow to accumulate. Default is 500
+
"""
+ producer_config = {} if producer_config is None else producer_config
+ consumer_config = {} if consumer_config is None else consumer_config
+
self.in_queue = Queue()
self.out_queue = Queue()
self.consumers = []
@@ -130,12 +161,15 @@ class KafkaQueue(object):
# Initialize and start consumer threads
for partition in partitions:
- consumer = KafkaConsumerProcess(client, topic, partition, self.in_queue, self.barrier, **consumer_config)
+ consumer = KafkaConsumerProcess(client, topic, partition,
+ self.in_queue, self.barrier,
+ **consumer_config)
consumer.start()
self.consumers.append(consumer)
# Initialize and start producer thread
- self.producer = KafkaProducerProcess(client, topic, self.out_queue, self.barrier, **producer_config)
+ self.producer = KafkaProducerProcess(client, topic, self.out_queue,
+ self.barrier, **producer_config)
self.producer.start()
# Trigger everything to start
diff --git a/kafka/util.py b/kafka/util.py
index 8c02cb2..10bf838 100644
--- a/kafka/util.py
+++ b/kafka/util.py
@@ -3,64 +3,81 @@ from itertools import groupby
import struct
from threading import Timer
+
def write_int_string(s):
if s is None:
return struct.pack('>i', -1)
else:
return struct.pack('>i%ds' % len(s), len(s), s)
+
def write_short_string(s):
if s is None:
return struct.pack('>h', -1)
else:
return struct.pack('>h%ds' % len(s), len(s), s)
+
def read_short_string(data, cur):
- if len(data) < cur+2:
+ if len(data) < cur + 2:
raise BufferUnderflowError("Not enough data left")
- (strLen,) = struct.unpack('>h', data[cur:cur+2])
+
+ (strLen,) = struct.unpack('>h', data[cur:cur + 2])
if strLen == -1:
- return (None, cur+2)
+ return (None, cur + 2)
+
cur += 2
- if len(data) < cur+strLen:
+ if len(data) < cur + strLen:
raise BufferUnderflowError("Not enough data left")
- out = data[cur:cur+strLen]
- return (out, cur+strLen)
+
+ out = data[cur:cur + strLen]
+ return (out, cur + strLen)
+
def read_int_string(data, cur):
- if len(data) < cur+4:
+ if len(data) < cur + 4:
raise BufferUnderflowError("Not enough data left")
- (strLen,) = struct.unpack('>i', data[cur:cur+4])
+
+ (strLen,) = struct.unpack('>i', data[cur:cur + 4])
if strLen == -1:
- return (None, cur+4)
+ return (None, cur + 4)
+
cur += 4
- if len(data) < cur+strLen:
+ if len(data) < cur + strLen:
raise BufferUnderflowError("Not enough data left")
- out = data[cur:cur+strLen]
- return (out, cur+strLen)
+
+ out = data[cur:cur + strLen]
+ return (out, cur + strLen)
+
def relative_unpack(fmt, data, cur):
size = struct.calcsize(fmt)
- if len(data) < cur+size:
+ if len(data) < cur + size:
raise BufferUnderflowError("Not enough data left")
- out = struct.unpack(fmt, data[cur:cur+size])
- return (out, cur+size)
+
+ out = struct.unpack(fmt, data[cur:cur + size])
+ return (out, cur + size)
+
def group_by_topic_and_partition(tuples):
out = defaultdict(dict)
for t in tuples:
out[t.topic][t.partition] = t
- return out
+ return out
+
class BufferUnderflowError(Exception):
pass
+
class ChecksumError(Exception):
pass
+
class ReentrantTimer(object):
"""
- A timer that can be restarted, unlike threading.Timer (although this uses threading.Timer)
+ A timer that can be restarted, unlike threading.Timer
+ (although this uses threading.Timer)
t: timer interval in milliseconds
fn: a callable to invoke