diff options
-rw-r--r-- | ChangeLog.rst | 9 | ||||
-rw-r--r-- | msgpack/_msgpack.pyx | 138 | ||||
-rw-r--r-- | msgpack/unpack.h | 30 | ||||
-rw-r--r-- | msgpack/unpack_template.h | 24 | ||||
-rw-r--r-- | test/test_obj.py | 10 | ||||
-rw-r--r-- | test/test_pack.py | 7 | ||||
-rw-r--r-- | test/test_sequnpack.py | 15 |
7 files changed, 143 insertions, 90 deletions
diff --git a/ChangeLog.rst b/ChangeLog.rst index f49b577..6a4d27b 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,12 @@ +0.3.0 +===== +:release date: in development + +Changes +------- +* Add ``.skip()`` method to ``Unpacker`` (thanks to jnothman) + + 0.2.3 ======= :release date: in development diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index 44a50ae..dca6237 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -201,6 +201,7 @@ cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook + bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook char *encoding char *unicode_errors @@ -213,13 +214,54 @@ cdef extern from "unpack.h": PyObject* key int template_execute(template_context* ctx, const_char_ptr data, - size_t len, size_t* off) except -1 + size_t len, size_t* off, bint construct) except -1 void template_init(template_context* ctx) object template_data(template_context* ctx) +cdef inline init_ctx(template_context *ctx, object object_hook, object object_pairs_hook, object list_hook, bint use_list, encoding, unicode_errors): + template_init(ctx) + ctx.user.use_list = use_list + ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL + + if object_hook is not None and object_pairs_hook is not None: + raise ValueError("object_pairs_hook and object_hook are mutually exclusive.") + + if object_hook is not None: + if not PyCallable_Check(object_hook): + raise TypeError("object_hook must be a callable.") + ctx.user.object_hook = <PyObject*>object_hook + + if object_pairs_hook is None: + ctx.user.has_pairs_hook = False + else: + if not PyCallable_Check(object_pairs_hook): + raise TypeError("object_pairs_hook must be a callable.") + ctx.user.object_hook = <PyObject*>object_pairs_hook + ctx.user.has_pairs_hook = True + + if list_hook is not None: + if not PyCallable_Check(list_hook): + raise TypeError("list_hook must be a callable.") + ctx.user.list_hook = <PyObject*>list_hook + + if encoding is None: + ctx.user.encoding = NULL + ctx.user.unicode_errors = NULL + else: + if isinstance(encoding, unicode): + _bencoding = encoding.encode('ascii') + else: + _bencoding = encoding + ctx.user.encoding = PyBytes_AsString(_bencoding) + if isinstance(unicode_errors, unicode): + _berrors = unicode_errors.encode('ascii') + else: + _berrors = unicode_errors + ctx.user.unicode_errors = PyBytes_AsString(_berrors) def unpackb(object packed, object object_hook=None, object list_hook=None, use_list=None, encoding=None, unicode_errors="strict", + object_pairs_hook=None, ): """Unpack packed_bytes to object. Returns an unpacked object. @@ -234,39 +276,11 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len) - if encoding is None: - enc = NULL - err = NULL - else: - if isinstance(encoding, unicode): - bencoding = encoding.encode('ascii') - else: - bencoding = encoding - if isinstance(unicode_errors, unicode): - berrors = unicode_errors.encode('ascii') - else: - berrors = unicode_errors - enc = PyBytes_AsString(bencoding) - err = PyBytes_AsString(berrors) - - template_init(&ctx) if use_list is None: warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) - ctx.user.use_list = 0 - else: - ctx.user.use_list = use_list - ctx.user.object_hook = ctx.user.list_hook = NULL - ctx.user.encoding = <const_char_ptr>enc - ctx.user.unicode_errors = <const_char_ptr>err - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - ctx.user.object_hook = <PyObject*>object_hook - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - ctx.user.list_hook = <PyObject*>list_hook - ret = template_execute(&ctx, buf, buf_len, &off) + use_list = 0 + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors) + ret = template_execute(&ctx, buf, buf_len, &off, 1) if ret == 1: obj = template_data(&ctx) if off < buf_len: @@ -278,6 +292,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, object object_hook=None, object list_hook=None, use_list=None, encoding=None, unicode_errors="strict", + object_pairs_hook=None, ): """Unpack an object from `stream`. @@ -287,7 +302,7 @@ def unpack(object stream, object object_hook=None, object list_hook=None, warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) use_list = 0 return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, list_hook=list_hook, + object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors, ) @@ -307,7 +322,10 @@ cdef class Unpacker(object): Otherwise, it is deserialized to Python tuple. `object_hook` is same to simplejson. If it is not None, it should be callable - and Unpacker calls it when deserializing key-value. + and Unpacker calls it with a dict argument after deserializing a map. + + `object_pairs_hook` is same to simplejson. If it is not None, it should be callable + and Unpacker calls it with a list of key-value pairs after deserializing a map. `encoding` is encoding used for decoding msgpack bytes. If it is None (default), msgpack bytes is deserialized to Python bytes. @@ -357,9 +375,8 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, Py_ssize_t read_size=0, use_list=None, - object object_hook=None, object list_hook=None, + object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, - object object_pairs_hook=None, ): if use_list is None: warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) @@ -384,31 +401,7 @@ cdef class Unpacker(object): self.buf_size = read_size self.buf_head = 0 self.buf_tail = 0 - template_init(&self.ctx) - self.ctx.user.use_list = use_list - self.ctx.user.object_hook = self.ctx.user.list_hook = <PyObject*>NULL - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - self.ctx.user.object_hook = <PyObject*>object_hook - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - self.ctx.user.list_hook = <PyObject*>list_hook - if encoding is None: - self.ctx.user.encoding = NULL - self.ctx.user.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.ctx.user.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.ctx.user.unicode_errors = PyBytes_AsString(self._berrors) + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors) def feed(self, object next_bytes): cdef char* buf @@ -469,15 +462,18 @@ cdef class Unpacker(object): else: self.file_like = None - cpdef unpack(self): - """unpack one object""" + cdef object _unpack(self, bint construct): cdef int ret + cdef object obj while 1: - ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head, construct) if ret == 1: - o = template_data(&self.ctx) + if construct: + obj = template_data(&self.ctx) + else: + obj = None template_init(&self.ctx) - return o + return obj elif ret == 0: if self.file_like is not None: self.read_from_file() @@ -486,11 +482,19 @@ cdef class Unpacker(object): else: raise ValueError("Unpack failed: error = %d" % (ret,)) + def unpack(self): + """unpack one object""" + return self._unpack(1) + + def skip(self): + """read and ignore one object, returning None""" + return self._unpack(0) + def __iter__(self): return self def __next__(self): - return self.unpack() + return self._unpack(1) # for debug. #def _buf(self): diff --git a/msgpack/unpack.h b/msgpack/unpack.h index a106f9c..7064a1b 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -22,6 +22,7 @@ typedef struct unpack_user { int use_list; PyObject *object_hook; + bool has_pairs_hook; PyObject *list_hook; const char *encoding; const char *unicode_errors; @@ -160,9 +161,7 @@ static inline int template_callback_array_item(unpack_user* u, unsigned int curr static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *arglist = Py_BuildValue("(O)", *c); - PyObject *new_c = PyEval_CallObject(u->list_hook, arglist); - Py_DECREF(arglist); + PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c); Py_DECREF(*c); *c = new_c; } @@ -171,16 +170,31 @@ static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_obj static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) { - PyObject *p = PyDict_New(); + PyObject *p; + if (u->has_pairs_hook) { + p = PyList_New(n); // Or use tuple? + } + else { + p = PyDict_New(); + } if (!p) return -1; *o = p; return 0; } -static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) +static inline int template_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { - if (PyDict_SetItem(*c, k, v) == 0) { + if (u->has_pairs_hook) { + msgpack_unpack_object item = PyTuple_Pack(2, k, v); + if (!item) + return -1; + Py_DECREF(k); + Py_DECREF(v); + PyList_SET_ITEM(*c, current, item); + return 0; + } + else if (PyDict_SetItem(*c, k, v) == 0) { Py_DECREF(k); Py_DECREF(v); return 0; @@ -191,9 +205,7 @@ static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_obje static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *arglist = Py_BuildValue("(O)", *c); - PyObject *new_c = PyEval_CallObject(u->object_hook, arglist); - Py_DECREF(arglist); + PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c); Py_DECREF(*c); *c = new_c; } diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index b844a24..6080a51 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -95,7 +95,7 @@ msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context } -msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off, int construct) { assert(len >= *off); @@ -117,14 +117,17 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c int ret; +#define construct_cb(name) \ + construct && msgpack_unpack_callback(name) + #define push_simple_value(func) \ - if(msgpack_unpack_callback(func)(user, &obj) < 0) { goto _failed; } \ + if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ goto _push #define push_fixed_value(func, arg) \ - if(msgpack_unpack_callback(func)(user, arg, &obj) < 0) { goto _failed; } \ + if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ goto _push #define push_variable_value(func, base, pos, len) \ - if(msgpack_unpack_callback(func)(user, \ + if(construct_cb(func)(user, \ (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ goto _push @@ -140,9 +143,9 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c #define start_container(func, count_, ct_) \ if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ - if(msgpack_unpack_callback(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ + if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ if((count_) == 0) { obj = stack[top].obj; \ - msgpack_unpack_callback(func##_end)(user, &obj); \ + construct_cb(func##_end)(user, &obj); \ goto _push; } \ stack[top].ct = ct_; \ stack[top].size = count_; \ @@ -340,10 +343,10 @@ _push: c = &stack[top-1]; switch(c->ct) { case CT_ARRAY_ITEM: - if(msgpack_unpack_callback(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } + if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } if(++c->count == c->size) { obj = c->obj; - msgpack_unpack_callback(_array_end)(user, &obj); + construct_cb(_array_end)(user, &obj); --top; /*printf("stack pop %d\n", top);*/ goto _push; @@ -354,10 +357,10 @@ _push: c->ct = CT_MAP_VALUE; goto _header_again; case CT_MAP_VALUE: - if(msgpack_unpack_callback(_map_item)(user, &c->obj, c->map_key, obj) < 0) { goto _failed; } + if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } if(++c->count == c->size) { obj = c->obj; - msgpack_unpack_callback(_map_end)(user, &obj); + construct_cb(_map_end)(user, &obj); --top; /*printf("stack pop %d\n", top);*/ goto _push; @@ -399,6 +402,7 @@ _end: *off = p - (const unsigned char*)data; return ret; +#undef construct_cb } diff --git a/test/test_obj.py b/test/test_obj.py index d809093..12a149f 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -26,6 +26,16 @@ def test_decode_hook(): unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) eq_(unpacked[1], 1+2j) +def test_decode_pairs_hook(): + packed = packb([3, {1: 2, 3: 4}]) + prod_sum = 1 * 2 + 3 * 4 + unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l)) + eq_(unpacked[1], prod_sum) + +@raises(ValueError) +def test_only_one_obj_hook(): + unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + @raises(ValueError) def test_bad_hook(): packed = packb([3, 1+2j], default=lambda o: o) diff --git a/test/test_pack.py b/test/test_pack.py index 9bd2b32..9009d35 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -110,10 +110,9 @@ def test_odict(): seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] od = odict(seq) assert_equal(unpackb(packb(od), use_list=1), dict(seq)) - # After object_pairs_hook is implemented. - #def pair_hook(seq): - # return seq - #assert_equal(unpackb(packb(od), object_pairs_hook=pair_hook), seq) + def pair_hook(seq): + return seq + assert_equal(unpackb(packb(od), object_pairs_hook=pair_hook), seq) if __name__ == '__main__': diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 21fc3be..dac36a8 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -28,6 +28,21 @@ def test_foobar(): k += 1 assert k == len(b'foobar') +def test_foobar_skip(): + unpacker = Unpacker(read_size=3, use_list=1) + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + unpacker.skip() + assert unpacker.unpack() == ord(b'o') + unpacker.skip() + assert unpacker.unpack() == ord(b'a') + unpacker.skip() + try: + o = unpacker.unpack() + assert 0, "should raise exception" + except StopIteration: + assert 1, "ok" + def test_maxbuffersize(): nose.tools.assert_raises(ValueError, Unpacker, read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) |