From 3a9f74e79c3d1912d8c0c1ad4d1478c611caba0a Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Wed, 10 Jun 2009 10:58:09 +0900 Subject: Make msgpack package instead of module. --- python/msgpack/_msgpack.pyx | 202 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 python/msgpack/_msgpack.pyx (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx new file mode 100644 index 0000000..f24f403 --- /dev/null +++ b/python/msgpack/_msgpack.pyx @@ -0,0 +1,202 @@ +# coding: utf-8 + +from cStringIO import StringIO + +cdef extern from "Python.h": + ctypedef char* const_char_ptr "const char*" + ctypedef struct PyObject + cdef object PyString_FromStringAndSize(const_char_ptr b, Py_ssize_t len) + +cdef extern from "stdlib.h": + void* malloc(int) + void free(void*) + +cdef extern from "string.h": + int memcpy(char*dst, char*src, unsigned int size) + +cdef extern from "pack.h": + ctypedef int (*msgpack_packer_write)(void* data, const_char_ptr buf, unsigned int len) + + struct msgpack_packer: + void *data + msgpack_packer_write callback + + void msgpack_packer_init(msgpack_packer* pk, void* data, msgpack_packer_write callback) + void msgpack_pack_int(msgpack_packer* pk, int d) + void msgpack_pack_nil(msgpack_packer* pk) + void msgpack_pack_true(msgpack_packer* pk) + void msgpack_pack_false(msgpack_packer* pk) + void msgpack_pack_long_long(msgpack_packer* pk, long long d) + void msgpack_pack_double(msgpack_packer* pk, double d) + void msgpack_pack_array(msgpack_packer* pk, size_t l) + void msgpack_pack_map(msgpack_packer* pk, size_t l) + void msgpack_pack_raw(msgpack_packer* pk, size_t l) + void msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + + +cdef int BUFF_SIZE=2*1024 + +cdef class Packer: + """Packer that pack data into strm. + + strm must have `write(bytes)` method. + size specifies local buffer size. + """ + cdef char* buff + cdef unsigned int length + cdef unsigned int allocated + cdef msgpack_packer pk + cdef object strm + + def __init__(self, strm, int size=0): + if size <= 0: + size = BUFF_SIZE + + self.strm = strm + self.buff = malloc(size) + self.allocated = size + self.length = 0 + + msgpack_packer_init(&self.pk, self, _packer_write) + + def __del__(self): + free(self.buff); + + def flush(self): + """Flash local buffer and output stream if it has 'flush()' method.""" + if self.length > 0: + self.strm.write(PyString_FromStringAndSize(self.buff, self.length)) + self.length = 0 + if hasattr(self.strm, 'flush'): + self.strm.flush() + + def pack_list(self, len): + """Start packing sequential objects. + + Example: + + packer.pack_list(2) + packer.pack('foo') + packer.pack('bar') + + This code is same as below code: + + packer.pack(['foo', 'bar']) + """ + msgpack_pack_array(&self.pk, len) + + def pack_dict(self, len): + """Start packing key-value objects. + + Example: + + packer.pack_dict(1) + packer.pack('foo') + packer.pack('bar') + + This code is same as below code: + + packer.pack({'foo', 'bar'}) + """ + msgpack_pack_map(&self.pk, len) + + cdef __pack(self, object o): + cdef long long intval + cdef double fval + cdef char* rawval + + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif isinstance(o, long): + intval = o + msgpack_pack_long_long(&self.pk, intval) + elif isinstance(o, int): + intval = o + msgpack_pack_long_long(&self.pk, intval) + elif isinstance(o, float): + fval = 9 + msgpack_pack_double(&self.pk, fval) + elif isinstance(o, str): + rawval = o + msgpack_pack_raw(&self.pk, len(o)) + msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif isinstance(o, unicode): + o = o.encode('utf-8') + rawval = o + msgpack_pack_raw(&self.pk, len(o)) + msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif isinstance(o, dict): + msgpack_pack_map(&self.pk, len(o)) + for k,v in o.iteritems(): + self.pack(k) + self.pack(v) + elif isinstance(o, tuple) or isinstance(o, list): + msgpack_pack_array(&self.pk, len(o)) + for v in o: + self.pack(v) + else: + # TODO: Serialize with defalt() like simplejson. + raise TypeError, "can't serialize %r" % (o,) + + def pack(self, obj, flush=True): + self.__pack(obj) + if flush: + self.flush() + +cdef int _packer_write(Packer packer, const_char_ptr b, unsigned int l): + if packer.length + l > packer.allocated: + if packer.length > 0: + packer.strm.write(PyString_FromStringAndSize(packer.buff, packer.length)) + if l > 64: + packer.strm.write(PyString_FromStringAndSize(b, l)) + packer.length = 0 + else: + memcpy(packer.buff, b, l) + packer.length = l + else: + memcpy(packer.buff + packer.length, b, l) + packer.length += l + return 0 + +def pack(object o, object stream): + packer = Packer(stream) + packer.pack(o) + packer.flush() + +def packs(object o): + buf = StringIO() + packer = Packer(buf) + packer.pack(o) + packer.flush() + return buf.getvalue() + +cdef extern from "unpack.h": + ctypedef struct template_context: + pass + int template_execute(template_context* ctx, const_char_ptr data, + size_t len, size_t* off) + void template_init(template_context* ctx) + object template_data(template_context* ctx) + + +def unpacks(object packed_bytes): + """Unpack packed_bytes to object. Returns unpacked object.""" + cdef const_char_ptr p = packed_bytes + cdef template_context ctx + cdef size_t off = 0 + template_init(&ctx) + template_execute(&ctx, p, len(packed_bytes), &off) + return template_data(&ctx) + +def unpack(object stream): + """unpack from stream.""" + packed = stream.read() + return unpacks(packed) + +cdef class Unpacker: + """Do nothing. This function is for symmetric to Packer""" + unpack = staticmethod(unpacks) -- cgit v1.2.1 From e814986b4ec017ab124dcf16496a6fefa65a9876 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Wed, 10 Jun 2009 13:33:42 +0900 Subject: Fix document miss. --- python/msgpack/_msgpack.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index f24f403..7c07cde 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -79,7 +79,7 @@ cdef class Packer: packer.pack('foo') packer.pack('bar') - This code is same as below code: + This is same to: packer.pack(['foo', 'bar']) """ @@ -94,9 +94,9 @@ cdef class Packer: packer.pack('foo') packer.pack('bar') - This code is same as below code: + This is same to: - packer.pack({'foo', 'bar'}) + packer.pack({'foo': 'bar'}) """ msgpack_pack_map(&self.pk, len) -- cgit v1.2.1 From 3c3df3133c6f6320a03ebcaef546ab9c2c53154d Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Fri, 26 Jun 2009 14:10:20 +0900 Subject: Implement streaming deserializer. --- python/msgpack/_msgpack.pyx | 162 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 147 insertions(+), 15 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index cbdcfc5..c37f8ba 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -6,13 +6,16 @@ cdef extern from "Python.h": ctypedef char* const_char_ptr "const char*" ctypedef struct PyObject cdef object PyString_FromStringAndSize(const_char_ptr b, Py_ssize_t len) + char* PyString_AsString(object o) cdef extern from "stdlib.h": - void* malloc(int) + void* malloc(size_t) + void* realloc(void*, size_t) void free(void*) cdef extern from "string.h": - int memcpy(char*dst, char*src, unsigned int size) + void* memcpy(char* dst, char* src, size_t size) + void* memmove(char* dst, char* src, size_t size) cdef extern from "pack.h": ctypedef int (*msgpack_packer_write)(void* data, const_char_ptr buf, unsigned int len) @@ -34,8 +37,6 @@ cdef extern from "pack.h": void msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) -cdef int BUFF_SIZE=2*1024 - cdef class Packer: """Packer that pack data into strm. @@ -48,10 +49,7 @@ cdef class Packer: cdef msgpack_packer pk cdef object strm - def __init__(self, strm, int size=0): - if size <= 0: - size = BUFF_SIZE - + def __init__(self, strm, int size=4*1024): self.strm = strm self.buff = malloc(size) self.allocated = size @@ -147,6 +145,8 @@ cdef class Packer: if flush: self.flush() + close = flush + cdef int _packer_write(Packer packer, const_char_ptr b, unsigned int l): if packer.length + l > packer.allocated: if packer.length > 0: @@ -163,20 +163,28 @@ cdef int _packer_write(Packer packer, const_char_ptr b, unsigned int l): return 0 def pack(object o, object stream): + u"""pack o and write to stream).""" packer = Packer(stream) packer.pack(o) packer.flush() -def packs(object o): +def packb(object o): + u"""pack o and return packed bytes.""" buf = StringIO() packer = Packer(buf) packer.pack(o) packer.flush() return buf.getvalue() +packs = packb + cdef extern from "unpack.h": ctypedef struct template_context: - pass + PyObject* obj + size_t count + unsigned int ct + PyObject* key + int template_execute(template_context* ctx, const_char_ptr data, size_t len, size_t* off) void template_init(template_context* ctx) @@ -188,15 +196,139 @@ def unpacks(object packed_bytes): cdef const_char_ptr p = packed_bytes cdef template_context ctx cdef size_t off = 0 + cdef int ret template_init(&ctx) - template_execute(&ctx, p, len(packed_bytes), &off) - return template_data(&ctx) + ret = template_execute(&ctx, p, len(packed_bytes), &off) + if ret == 1: + return template_data(&ctx) + else: + return None def unpack(object stream): """unpack from stream.""" packed = stream.read() return unpacks(packed) -cdef class Unpacker: - """Do nothing. This function is for symmetric to Packer""" - unpack = staticmethod(unpacks) +cdef class UnpackIterator(object): + cdef object unpacker + + def __init__(self, unpacker): + self.unpacker = unpacker + + def __next__(self): + return self.unpacker.unpack() + + def __iter__(self): + return self + +cdef class Unpacker(object): + """Unpacker(file_like=None, read_size=4096) + + Streaming unpacker. + file_like must have read(n) method. + read_size is used like file_like.read(read_size) + + If file_like is None, you can feed() bytes. feed() is useful + for unpack from non-blocking stream. + + exsample 1: + unpacker = Unpacker(afile) + for o in unpacker: + do_something(o) + + example 2: + unpacker = Unpacker() + while 1: + buf = astream.read() + unpacker.feed(buf) + for o in unpacker: + do_something(o) + """ + + cdef template_context ctx + cdef char* buf + cdef size_t buf_size, buf_head, buf_tail + cdef object file_like + cdef int read_size + cdef object waiting_bytes + + def __init__(self, file_like=None, int read_size=4096): + self.file_like = file_like + self.read_size = read_size + self.waiting_bytes = [] + self.buf = malloc(read_size) + self.buf_size = read_size + self.buf_head = 0 + self.buf_tail = 0 + template_init(&self.ctx) + + def feed(self, next_bytes): + if not isinstance(next_bytes, str): + raise ValueError, "Argument must be bytes object" + self.waiting_bytes.append(next_bytes) + + cdef append_buffer(self): + cdef char* buf = self.buf + cdef Py_ssize_t tail = self.buf_tail + cdef Py_ssize_t l + + for b in self.waiting_bytes: + l = len(b) + memcpy(buf + tail, PyString_AsString(b), l) + tail += l + self.buf_tail = tail + del self.waiting_bytes[:] + + # prepare self.buf + cdef fill_buffer(self): + cdef Py_ssize_t add_size + + if self.file_like is not None: + self.waiting_bytes.append(self.file_like.read(self.read_size)) + + if not self.waiting_bytes: + return + + add_size = 0 + for b in self.waiting_bytes: + add_size += len(b) + + cdef char* buf = self.buf + cdef size_t head = self.buf_head + cdef size_t tail = self.buf_tail + cdef size_t size = self.buf_size + + if self.buf_tail + add_size <= self.buf_size: + # do nothing. + pass + if self.buf_tail - self.buf_head + add_size < self.buf_size: + # move to front. + memmove(buf, buf + head, tail - head) + tail -= head + head = 0 + else: + # expand buffer + size = tail + add_size + buf = realloc(buf, size) + + self.buf = buf + self.buf_head = head + self.buf_tail = tail + self.buf_size = size + + self.append_buffer() + + cpdef unpack(self): + """unpack one object""" + cdef int ret + self.fill_buffer() + ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if ret == 1: + return template_data(&self.ctx) + elif ret == 0: + raise StopIteration, "No more unpack data." + else: + raise ValueError, "Unpack failed." + + def __iter__(self): + return UnpackIterator(self) -- cgit v1.2.1 From 1b07b61c048f749c7a5e505617b0e59974f81077 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Sun, 28 Jun 2009 21:24:02 +0900 Subject: Ues more suitable type when packing. --- python/msgpack/_msgpack.pyx | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index c37f8ba..cde5313 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -7,6 +7,8 @@ cdef extern from "Python.h": ctypedef struct PyObject cdef object PyString_FromStringAndSize(const_char_ptr b, Py_ssize_t len) char* PyString_AsString(object o) + int PyMapping_Check(object o) + int PySequence_Check(object o) cdef extern from "stdlib.h": void* malloc(size_t) @@ -37,7 +39,7 @@ cdef extern from "pack.h": void msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) -cdef class Packer: +cdef class Packer(object): """Packer that pack data into strm. strm must have `write(bytes)` method. @@ -99,7 +101,8 @@ cdef class Packer: msgpack_pack_map(&self.pk, len) cdef __pack(self, object o): - cdef long long intval + cdef long long llval + cdef long longval cdef double fval cdef char* rawval @@ -110,11 +113,11 @@ cdef class Packer: elif o is False: msgpack_pack_false(&self.pk) elif isinstance(o, long): - intval = o - msgpack_pack_long_long(&self.pk, intval) + llval = o + msgpack_pack_long_long(&self.pk, llval) elif isinstance(o, int): - intval = o - msgpack_pack_long_long(&self.pk, intval) + longval = o + msgpack_pack_long_long(&self.pk, longval) elif isinstance(o, float): fval = o msgpack_pack_double(&self.pk, fval) @@ -127,12 +130,12 @@ cdef class Packer: rawval = o msgpack_pack_raw(&self.pk, len(o)) msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif isinstance(o, dict): + elif PyMapping_Check(o): msgpack_pack_map(&self.pk, len(o)) for k,v in o.iteritems(): self.pack(k) self.pack(v) - elif isinstance(o, tuple) or isinstance(o, list): + elif PySequence_Check(o): msgpack_pack_array(&self.pk, len(o)) for v in o: self.pack(v) -- cgit v1.2.1 From 257270c1ebc5bb6ac6c60e47130e2a577ffb6714 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Mon, 29 Jun 2009 08:23:49 +0900 Subject: Refactor packing code. --- python/msgpack/_msgpack.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index cde5313..279fdf5 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -31,6 +31,7 @@ cdef extern from "pack.h": void msgpack_pack_nil(msgpack_packer* pk) void msgpack_pack_true(msgpack_packer* pk) void msgpack_pack_false(msgpack_packer* pk) + void msgpack_pack_long(msgpack_packer* pk, long d) void msgpack_pack_long_long(msgpack_packer* pk, long long d) void msgpack_pack_double(msgpack_packer* pk, double d) void msgpack_pack_array(msgpack_packer* pk, size_t l) @@ -60,7 +61,7 @@ cdef class Packer(object): msgpack_packer_init(&self.pk, self, _packer_write) def __del__(self): - free(self.buff); + free(self.buff) def flush(self): """Flash local buffer and output stream if it has 'flush()' method.""" @@ -117,7 +118,7 @@ cdef class Packer(object): msgpack_pack_long_long(&self.pk, llval) elif isinstance(o, int): longval = o - msgpack_pack_long_long(&self.pk, longval) + msgpack_pack_long(&self.pk, longval) elif isinstance(o, float): fval = o msgpack_pack_double(&self.pk, fval) @@ -133,12 +134,12 @@ cdef class Packer(object): elif PyMapping_Check(o): msgpack_pack_map(&self.pk, len(o)) for k,v in o.iteritems(): - self.pack(k) - self.pack(v) + self.__pack(k) + self.__pack(v) elif PySequence_Check(o): msgpack_pack_array(&self.pk, len(o)) for v in o: - self.pack(v) + self.__pack(v) else: # TODO: Serialize with defalt() like simplejson. raise TypeError, "can't serialize %r" % (o,) @@ -154,7 +155,7 @@ cdef int _packer_write(Packer packer, const_char_ptr b, unsigned int l): if packer.length + l > packer.allocated: if packer.length > 0: packer.strm.write(PyString_FromStringAndSize(packer.buff, packer.length)) - if l > 64: + if l > packer.allocated/4: packer.strm.write(PyString_FromStringAndSize(b, l)) packer.length = 0 else: @@ -176,7 +177,6 @@ def packb(object o): buf = StringIO() packer = Packer(buf) packer.pack(o) - packer.flush() return buf.getvalue() packs = packb -- cgit v1.2.1 From 9015bd4ecfe7c30aef47d7c3c6fbc6fd4219eae0 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Mon, 29 Jun 2009 10:09:04 +0900 Subject: Fix error on packing unsigned long long. --- python/msgpack/_msgpack.pyx | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index 279fdf5..e1c497b 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -9,6 +9,8 @@ cdef extern from "Python.h": char* PyString_AsString(object o) int PyMapping_Check(object o) int PySequence_Check(object o) + long long PyLong_AsLongLong(object o) + unsigned long long PyLong_AsUnsignedLongLong(object o) cdef extern from "stdlib.h": void* malloc(size_t) @@ -27,17 +29,18 @@ cdef extern from "pack.h": msgpack_packer_write callback void msgpack_packer_init(msgpack_packer* pk, void* data, msgpack_packer_write callback) - void msgpack_pack_int(msgpack_packer* pk, int d) - void msgpack_pack_nil(msgpack_packer* pk) - void msgpack_pack_true(msgpack_packer* pk) - void msgpack_pack_false(msgpack_packer* pk) - void msgpack_pack_long(msgpack_packer* pk, long d) - void msgpack_pack_long_long(msgpack_packer* pk, long long d) - void msgpack_pack_double(msgpack_packer* pk, double d) - void msgpack_pack_array(msgpack_packer* pk, size_t l) - void msgpack_pack_map(msgpack_packer* pk, size_t l) - void msgpack_pack_raw(msgpack_packer* pk, size_t l) - void msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) cdef class Packer(object): @@ -103,6 +106,7 @@ cdef class Packer(object): cdef __pack(self, object o): cdef long long llval + cdef unsigned long long ullval cdef long longval cdef double fval cdef char* rawval @@ -114,8 +118,12 @@ cdef class Packer(object): elif o is False: msgpack_pack_false(&self.pk) elif isinstance(o, long): - llval = o - msgpack_pack_long_long(&self.pk, llval) + if o > 0: + ullval = PyLong_AsUnsignedLongLong(o) + msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = PyLong_AsLongLong(o) + msgpack_pack_long_long(&self.pk, llval) elif isinstance(o, int): longval = o msgpack_pack_long(&self.pk, longval) -- cgit v1.2.1 From d4317fdc853c7dbe3bc57718dc009cb8b0a2b351 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Tue, 30 Jun 2009 23:03:33 +0900 Subject: Some optimization on packing. --- python/msgpack/_msgpack.pyx | 69 +++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 46 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index e1c497b..26222aa 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -1,16 +1,23 @@ # coding: utf-8 -from cStringIO import StringIO +import cStringIO cdef extern from "Python.h": ctypedef char* const_char_ptr "const char*" ctypedef struct PyObject cdef object PyString_FromStringAndSize(const_char_ptr b, Py_ssize_t len) + PyObject* Py_True + PyObject* Py_False char* PyString_AsString(object o) int PyMapping_Check(object o) int PySequence_Check(object o) long long PyLong_AsLongLong(object o) unsigned long long PyLong_AsUnsignedLongLong(object o) + int PyLong_Check(object o) + int PyInt_Check(object o) + int PyFloat_Check(object o) + int PyString_Check(object o) + int PyUnicode_Check(object o) cdef extern from "stdlib.h": void* malloc(size_t) @@ -22,13 +29,9 @@ cdef extern from "string.h": void* memmove(char* dst, char* src, size_t size) cdef extern from "pack.h": - ctypedef int (*msgpack_packer_write)(void* data, const_char_ptr buf, unsigned int len) - struct msgpack_packer: - void *data - msgpack_packer_write callback + PyObject* writer - void msgpack_packer_init(msgpack_packer* pk, void* data, msgpack_packer_write callback) int msgpack_pack_int(msgpack_packer* pk, int d) int msgpack_pack_nil(msgpack_packer* pk) int msgpack_pack_true(msgpack_packer* pk) @@ -49,28 +52,17 @@ cdef class Packer(object): strm must have `write(bytes)` method. size specifies local buffer size. """ - cdef char* buff - cdef unsigned int length - cdef unsigned int allocated cdef msgpack_packer pk cdef object strm + cdef object writer - def __init__(self, strm, int size=4*1024): - self.strm = strm - self.buff = malloc(size) - self.allocated = size - self.length = 0 - - msgpack_packer_init(&self.pk, self, _packer_write) - - def __del__(self): - free(self.buff) + def __init__(self, strm_, int size=4*1024): + self.strm = strm_ + self.writer = strm_.write + self.pk.writer = self.writer def flush(self): """Flash local buffer and output stream if it has 'flush()' method.""" - if self.length > 0: - self.strm.write(PyString_FromStringAndSize(self.buff, self.length)) - self.length = 0 if hasattr(self.strm, 'flush'): self.strm.flush() @@ -113,28 +105,28 @@ cdef class Packer(object): if o is None: msgpack_pack_nil(&self.pk) - elif o is True: + elif o == Py_True: msgpack_pack_true(&self.pk) - elif o is False: + elif o == Py_False: msgpack_pack_false(&self.pk) - elif isinstance(o, long): + elif PyLong_Check(o): if o > 0: ullval = PyLong_AsUnsignedLongLong(o) msgpack_pack_unsigned_long_long(&self.pk, ullval) else: llval = PyLong_AsLongLong(o) msgpack_pack_long_long(&self.pk, llval) - elif isinstance(o, int): + elif PyInt_Check(o): longval = o msgpack_pack_long(&self.pk, longval) - elif isinstance(o, float): + elif PyFloat_Check(o): fval = o msgpack_pack_double(&self.pk, fval) - elif isinstance(o, str): + elif PyString_Check(o): rawval = o msgpack_pack_raw(&self.pk, len(o)) msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif isinstance(o, unicode): + elif PyUnicode_Check(o): o = o.encode('utf-8') rawval = o msgpack_pack_raw(&self.pk, len(o)) @@ -152,28 +144,13 @@ cdef class Packer(object): # TODO: Serialize with defalt() like simplejson. raise TypeError, "can't serialize %r" % (o,) - def pack(self, obj, flush=True): + def pack(self, object obj, flush=True): self.__pack(obj) if flush: self.flush() close = flush -cdef int _packer_write(Packer packer, const_char_ptr b, unsigned int l): - if packer.length + l > packer.allocated: - if packer.length > 0: - packer.strm.write(PyString_FromStringAndSize(packer.buff, packer.length)) - if l > packer.allocated/4: - packer.strm.write(PyString_FromStringAndSize(b, l)) - packer.length = 0 - else: - memcpy(packer.buff, b, l) - packer.length = l - else: - memcpy(packer.buff + packer.length, b, l) - packer.length += l - return 0 - def pack(object o, object stream): u"""pack o and write to stream).""" packer = Packer(stream) @@ -182,7 +159,7 @@ def pack(object o, object stream): def packb(object o): u"""pack o and return packed bytes.""" - buf = StringIO() + buf = cStringIO.StringIO() packer = Packer(buf) packer.pack(o) return buf.getvalue() -- cgit v1.2.1 From 03942a1b9020b00bf47e93b7d5ec606e8160c054 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Wed, 1 Jul 2009 00:57:46 +0900 Subject: Major speedup on packing. --- python/msgpack/_msgpack.pyx | 60 ++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index 26222aa..9d766c5 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -18,6 +18,7 @@ cdef extern from "Python.h": int PyFloat_Check(object o) int PyString_Check(object o) int PyUnicode_Check(object o) + object PyBuffer_FromMemory(const_char_ptr b, Py_ssize_t len) cdef extern from "stdlib.h": void* malloc(size_t) @@ -30,7 +31,9 @@ cdef extern from "string.h": cdef extern from "pack.h": struct msgpack_packer: - PyObject* writer + char* buf + size_t length + size_t buf_size int msgpack_pack_int(msgpack_packer* pk, int d) int msgpack_pack_nil(msgpack_packer* pk) @@ -56,45 +59,16 @@ cdef class Packer(object): cdef object strm cdef object writer - def __init__(self, strm_, int size=4*1024): - self.strm = strm_ - self.writer = strm_.write - self.pk.writer = self.writer + def __init__(self, strm): + self.strm = strm - def flush(self): - """Flash local buffer and output stream if it has 'flush()' method.""" - if hasattr(self.strm, 'flush'): - self.strm.flush() - - def pack_list(self, len): - """Start packing sequential objects. - - Example: - - packer.pack_list(2) - packer.pack('foo') - packer.pack('bar') - - This is same to: - - packer.pack(['foo', 'bar']) - """ - msgpack_pack_array(&self.pk, len) + cdef int buf_size = 1024*1024 + self.pk.buf = malloc(buf_size); + self.pk.buf_size = buf_size + self.pk.length = 0 - def pack_dict(self, len): - """Start packing key-value objects. - - Example: - - packer.pack_dict(1) - packer.pack('foo') - packer.pack('bar') - - This is same to: - - packer.pack({'foo': 'bar'}) - """ - msgpack_pack_map(&self.pk, len) + def __del__(self): + free(self.pk.buf); cdef __pack(self, object o): cdef long long llval @@ -144,11 +118,19 @@ cdef class Packer(object): # TODO: Serialize with defalt() like simplejson. raise TypeError, "can't serialize %r" % (o,) - def pack(self, object obj, flush=True): + def pack(self, object obj, object flush=True): self.__pack(obj) + buf = PyBuffer_FromMemory(self.pk.buf, self.pk.length) + self.pk.length = 0 + self.strm.write(buf) if flush: self.flush() + def flush(self): + """Flash local buffer and output stream if it has 'flush()' method.""" + if hasattr(self.strm, 'flush'): + self.strm.flush() + close = flush def pack(object o, object stream): -- cgit v1.2.1 From 78db826a75d9f091f2175ace5f2cd9cb81c0f115 Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Wed, 1 Jul 2009 20:55:24 +0900 Subject: Fix memory leak. Remove stream packing feature. Add errorcheck in packing. --- python/msgpack/_msgpack.pyx | 135 +++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 63 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index 9d766c5..c455394 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -5,20 +5,22 @@ import cStringIO cdef extern from "Python.h": ctypedef char* const_char_ptr "const char*" ctypedef struct PyObject + cdef object PyString_FromStringAndSize(const_char_ptr b, Py_ssize_t len) - PyObject* Py_True - PyObject* Py_False - char* PyString_AsString(object o) - int PyMapping_Check(object o) - int PySequence_Check(object o) - long long PyLong_AsLongLong(object o) - unsigned long long PyLong_AsUnsignedLongLong(object o) - int PyLong_Check(object o) - int PyInt_Check(object o) - int PyFloat_Check(object o) - int PyString_Check(object o) - int PyUnicode_Check(object o) - object PyBuffer_FromMemory(const_char_ptr b, Py_ssize_t len) + cdef PyObject* Py_True + cdef PyObject* Py_False + + cdef char* PyString_AsString(object o) + cdef long long PyLong_AsLongLong(object o) + cdef unsigned long long PyLong_AsUnsignedLongLong(object o) + + cdef int PyMapping_Check(object o) + cdef int PySequence_Check(object o) + cdef int PyLong_Check(object o) + cdef int PyInt_Check(object o) + cdef int PyFloat_Check(object o) + cdef int PyString_Check(object o) + cdef int PyUnicode_Check(object o) cdef extern from "stdlib.h": void* malloc(size_t) @@ -50,101 +52,101 @@ cdef extern from "pack.h": cdef class Packer(object): - """Packer that pack data into strm. + """MessagePack Packer + + usage: - strm must have `write(bytes)` method. - size specifies local buffer size. + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) """ cdef msgpack_packer pk - cdef object strm - cdef object writer - - def __init__(self, strm): - self.strm = strm + def __cinit__(self): cdef int buf_size = 1024*1024 self.pk.buf = malloc(buf_size); self.pk.buf_size = buf_size self.pk.length = 0 - def __del__(self): + def __dealloc__(self): free(self.pk.buf); - cdef __pack(self, object o): + cdef int __pack(self, object o): cdef long long llval cdef unsigned long long ullval cdef long longval cdef double fval cdef char* rawval + cdef int ret if o is None: - msgpack_pack_nil(&self.pk) + ret = msgpack_pack_nil(&self.pk) elif o == Py_True: - msgpack_pack_true(&self.pk) + ret = msgpack_pack_true(&self.pk) elif o == Py_False: - msgpack_pack_false(&self.pk) + ret = msgpack_pack_false(&self.pk) elif PyLong_Check(o): if o > 0: ullval = PyLong_AsUnsignedLongLong(o) - msgpack_pack_unsigned_long_long(&self.pk, ullval) + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) else: llval = PyLong_AsLongLong(o) - msgpack_pack_long_long(&self.pk, llval) + ret = msgpack_pack_long_long(&self.pk, llval) elif PyInt_Check(o): longval = o - msgpack_pack_long(&self.pk, longval) + ret = msgpack_pack_long(&self.pk, longval) elif PyFloat_Check(o): fval = o - msgpack_pack_double(&self.pk, fval) + ret = msgpack_pack_double(&self.pk, fval) elif PyString_Check(o): rawval = o - msgpack_pack_raw(&self.pk, len(o)) - msgpack_pack_raw_body(&self.pk, rawval, len(o)) + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) elif PyUnicode_Check(o): o = o.encode('utf-8') rawval = o - msgpack_pack_raw(&self.pk, len(o)) - msgpack_pack_raw_body(&self.pk, rawval, len(o)) + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) elif PyMapping_Check(o): - msgpack_pack_map(&self.pk, len(o)) - for k,v in o.iteritems(): - self.__pack(k) - self.__pack(v) + ret = msgpack_pack_map(&self.pk, len(o)) + if ret == 0: + for k,v in o.iteritems(): + ret = self.__pack(k) + if ret != 0: break + ret = self.__pack(v) + if ret != 0: break elif PySequence_Check(o): - msgpack_pack_array(&self.pk, len(o)) - for v in o: - self.__pack(v) + ret = msgpack_pack_array(&self.pk, len(o)) + if ret == 0: + for v in o: + ret = self.__pack(v) + if ret != 0: break else: # TODO: Serialize with defalt() like simplejson. raise TypeError, "can't serialize %r" % (o,) + return ret - def pack(self, object obj, object flush=True): - self.__pack(obj) - buf = PyBuffer_FromMemory(self.pk.buf, self.pk.length) + def pack(self, object obj): + cdef int ret + ret = self.__pack(obj) + if ret: + raise TypeError + buf = PyString_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 - self.strm.write(buf) - if flush: - self.flush() - - def flush(self): - """Flash local buffer and output stream if it has 'flush()' method.""" - if hasattr(self.strm, 'flush'): - self.strm.flush() + return buf - close = flush def pack(object o, object stream): - u"""pack o and write to stream).""" - packer = Packer(stream) - packer.pack(o) - packer.flush() + """pack a object `o` and write it to stream).""" + packer = Packer() + stream.write(packer.pack(o)) def packb(object o): - u"""pack o and return packed bytes.""" - buf = cStringIO.StringIO() - packer = Packer(buf) - packer.pack(o) - return buf.getvalue() + """pack o and return packed bytes.""" + packer = Packer() + return packer.pack(o) packs = packb @@ -222,7 +224,14 @@ cdef class Unpacker(object): cdef int read_size cdef object waiting_bytes - def __init__(self, file_like=None, int read_size=4096): + def __cinit__(self): + self.buf = NULL + + def __dealloc__(self): + if self.buf: + free(self.buf); + + def __init__(self, file_like=None, int read_size=1024*1024): self.file_like = file_like self.read_size = read_size self.waiting_bytes = [] -- cgit v1.2.1 From 900785e1aa2e031a4496f8c2a30bb95d0c950b9b Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Sun, 12 Jul 2009 09:29:11 +0900 Subject: Clean up --- python/msgpack/_msgpack.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index c455394..b407b92 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -163,7 +163,7 @@ cdef extern from "unpack.h": object template_data(template_context* ctx) -def unpacks(object packed_bytes): +def unpackb(object packed_bytes): """Unpack packed_bytes to object. Returns unpacked object.""" cdef const_char_ptr p = packed_bytes cdef template_context ctx @@ -176,10 +176,12 @@ def unpacks(object packed_bytes): else: return None +unpacks = unpackb + def unpack(object stream): """unpack from stream.""" packed = stream.read() - return unpacks(packed) + return unpackb(packed) cdef class UnpackIterator(object): cdef object unpacker -- cgit v1.2.1 From e5c49dae13b26f2155dffffbc4e9915badbcecfb Mon Sep 17 00:00:00 2001 From: Naoki INADA Date: Sun, 12 Jul 2009 20:02:21 +0900 Subject: Release 0.1.0 --- python/msgpack/_msgpack.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'python/msgpack/_msgpack.pyx') diff --git a/python/msgpack/_msgpack.pyx b/python/msgpack/_msgpack.pyx index b407b92..7990a18 100644 --- a/python/msgpack/_msgpack.pyx +++ b/python/msgpack/_msgpack.pyx @@ -139,7 +139,7 @@ cdef class Packer(object): def pack(object o, object stream): - """pack a object `o` and write it to stream).""" + """pack an object `o` and write it to stream).""" packer = Packer() stream.write(packer.pack(o)) @@ -164,7 +164,7 @@ cdef extern from "unpack.h": def unpackb(object packed_bytes): - """Unpack packed_bytes to object. Returns unpacked object.""" + """Unpack packed_bytes to object. Returns an unpacked object.""" cdef const_char_ptr p = packed_bytes cdef template_context ctx cdef size_t off = 0 @@ -179,7 +179,7 @@ def unpackb(object packed_bytes): unpacks = unpackb def unpack(object stream): - """unpack from stream.""" + """unpack an object from stream.""" packed = stream.read() return unpackb(packed) @@ -196,14 +196,14 @@ cdef class UnpackIterator(object): return self cdef class Unpacker(object): - """Unpacker(file_like=None, read_size=4096) + """Unpacker(file_like=None, read_size=1024*1024) Streaming unpacker. file_like must have read(n) method. read_size is used like file_like.read(read_size) - If file_like is None, you can feed() bytes. feed() is useful - for unpack from non-blocking stream. + If file_like is None, you can ``feed()`` bytes. ``feed()`` is + useful for unpacking from non-blocking stream. exsample 1: unpacker = Unpacker(afile) -- cgit v1.2.1