diff options
author | INADA Naoki <inada-n@klab.com> | 2013-10-20 15:08:31 +0900 |
---|---|---|
committer | INADA Naoki <inada-n@klab.com> | 2013-10-20 15:08:31 +0900 |
commit | 27f0cba8a5f36393517ee85d2c339847b76e0c6b (patch) | |
tree | 470bc84905240c82d2e1f4e710d55a288661f635 | |
parent | 7123341ca89a9a3afee8521cc16a1a419ea8871e (diff) | |
parent | 6386481024ec045d9ef991a2c975902276812508 (diff) | |
download | msgpack-python-27f0cba8a5f36393517ee85d2c339847b76e0c6b.tar.gz |
Merge branch 'master' of https://github.com/antocuni/msgpack-python into newspec
Conflicts:
msgpack/fallback.py
msgpack/unpack.h
msgpack/unpack_define.h
msgpack/unpack_template.h
-rw-r--r-- | README.rst | 8 | ||||
-rw-r--r-- | msgpack/_packer.pyx | 12 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 39 | ||||
-rw-r--r-- | msgpack/pack.h | 2 | ||||
-rw-r--r-- | msgpack/pack_template.h | 60 | ||||
-rw-r--r-- | msgpack/unpack.h | 18 | ||||
-rw-r--r-- | msgpack/unpack_define.h | 14 | ||||
-rw-r--r-- | msgpack/unpack_template.h | 69 | ||||
-rw-r--r-- | setup.py | 1 | ||||
-rw-r--r-- | test/test_extension.py | 64 | ||||
-rw-r--r-- | test/test_sequnpack.py | 15 |
11 files changed, 261 insertions, 41 deletions
@@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. +Extended types +^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types using the msgpack feature +of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now): + +https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py + Advanced unpacking control ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 7082445..df670ed 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -5,6 +5,7 @@ from cpython cimport * from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * +from libc.stdint cimport int8_t from msgpack.exceptions import PackValueError @@ -29,6 +30,7 @@ cdef extern from "pack.h": int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 @@ -183,6 +185,9 @@ cdef class Packer(object): for v in o: ret = self._pack(v, nest_limit-1) if ret != 0: break + elif self.handle_unknown_type(o): + # it means that obj was succesfully packed, so we are done + return 0 elif self._default: o = self._default(o) ret = self._pack(o, nest_limit-1) @@ -202,6 +207,13 @@ cdef class Packer(object): self.pk.length = 0 return buf + def handle_unknown_type(self, obj): + return None + + def pack_extended_type(self, typecode, data): + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + def pack_array_header(self, size_t size): cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 7b0c8a6..cf30670 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -24,6 +24,7 @@ cdef extern from "unpack.h": PyObject* object_hook bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook + PyObject* ext_type_hook char *encoding char *unicode_errors @@ -45,6 +46,7 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, + object ext_type_hook, bint use_list, char* encoding, char* unicode_errors): unpack_init(ctx) ctx.user.use_list = use_list @@ -71,9 +73,17 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("list_hook must be a callable.") ctx.user.list_hook = <PyObject*>list_hook + if ext_type_hook is not None: + if not PyCallable_Check(ext_type_hook): + raise TypeError("ext_type_hook must be a callable.") + ctx.user.ext_type_hook = <PyObject*>ext_type_hook + ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, @@ -106,7 +116,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, unicode_errors = unicode_errors.encode('ascii') cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type, + use_list, cenc, cerr) ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) @@ -248,7 +259,10 @@ cdef class Unpacker(object): self.unicode_errors = unicode_errors cerr = PyBytes_AsString(self.unicode_errors) - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + ext_type_hook = self.read_extended_type + Py_INCREF(ext_type_hook) + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_type_hook, use_list, cenc, cerr) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" @@ -358,6 +372,24 @@ cdef class Unpacker(object): """ return self._unpack(unpack_construct, write_bytes) + def unpack_one(self, object write_bytes=None): + """ + unpack one object + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `UnpackValueError` if there are no more bytes to unpack. + Raises ``ExtraData`` if there are still bytes left after the unpacking. + """ + try: + result = self.unpack() + except OutOfData: + raise UnpackValueError("Data is not enough") + if self.buf_head < self.buf_tail: + raise ExtraData(result, self.buf[self.buf_head:]) + return result + def skip(self, object write_bytes=None): """ read and ignore one object, returning None @@ -385,6 +417,9 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header, write_bytes) + def read_extended_type(self, typecode, data): + return default_read_extended_type(typecode, data) + def __iter__(self): return self diff --git a/msgpack/pack.h b/msgpack/pack.h index 001a0c1..a71c87b 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); +static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index d228d7a..0fe9514 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -708,6 +708,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t msgpack_pack_append_buffer(x, (const unsigned char*)b, l); } +/* + * Ext + */ + +static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + + + #undef msgpack_pack_append_buffer #undef TAKE8_8 diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 03c735e..327a524 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,6 +24,7 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; + PyObject *ext_type_hook; const char *encoding; const char *unicode_errors; } unpack_user; @@ -235,4 +236,21 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* return 0; } +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int lenght, msgpack_unpack_object* o) +{ + PyObject *py; + int8_t typecode = (int8_t)*pos++; + if (!u->ext_type_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL"); + return -1; + } + // lenght also includes the typecode, so the actual data is lenght-1 + py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1); + if (!py) + return -1; + *o = py; + return 0; +} + #include "unpack_template.h" diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 0b14f52..2ee92b5 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -45,10 +45,11 @@ typedef enum { CS_BIN_8 = 0x04, CS_BIN_16 = 0x05, CS_BIN_32 = 0x06, - //CS_ = 0x07, - //CS_ = 0x08, - //CS_ = 0x09, + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, + CS_FLOAT = 0x0a, CS_DOUBLE = 0x0b, CS_UINT_8 = 0x0c, @@ -60,6 +61,12 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, + CS_RAW_8 = 0x19, CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, @@ -70,6 +77,7 @@ typedef enum { ACS_RAW_VALUE, ACS_BIN_VALUE, + ACS_EXT_VALUE, } msgpack_unpack_state; diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 25229ac..1a709ec 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -184,9 +184,15 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l push_simple_value(_false); case 0xc3: // true push_simple_value(_true); - //case 0xc7: - //case 0xc8: - //case 0xc9: + //case 0xc4: + //case 0xc5: + //case 0xc6: + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); case 0xca: // float case 0xcb: // double case 0xcc: // unsigned int 8 @@ -198,15 +204,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case 0xd2: // signed int 32 case 0xd3: // signed int 64 again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - case 0xc4: // bin 8 - case 0xc5: // bin 16 - case 0xc6: // bin 32 - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - case 0xd9: // raw 8 + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + //case 0xd9: case 0xda: // raw 16 case 0xdb: // raw 32 case 0xdc: // array 16 @@ -237,8 +244,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l if((size_t)(pe - p) < trail) { goto _out; } n = p; p += trail - 1; switch(cs) { - //case CS_ - //case CS_ + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); case CS_FLOAT: { union { uint32_t i; float f; } mem; mem.i = _msgpack_load32(uint32_t,n); @@ -269,26 +284,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_INT_64: push_fixed_value(_int64, _msgpack_load64(int64_t,n)); - //case CS_ - //case CS_ - //case CS_BIG_INT_16: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero); - //case CS_BIG_INT_32: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero); - //case ACS_BIG_INT_VALUE: - //_big_int_zero: - // // FIXME - // push_variable_value(_big_int, data, n, trail); - - //case CS_BIG_FLOAT_16: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero); - //case CS_BIG_FLOAT_32: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero); - //case ACS_BIG_FLOAT_VALUE: - //_big_float_zero: - // // FIXME - // push_variable_value(_big_float, data, n, trail); - case CS_BIN_8: again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); case CS_BIN_16: @@ -309,6 +304,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l _raw_zero: push_variable_value(_raw, data, n, trail); + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + case CS_ARRAY_16: start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); case CS_ARRAY_32: @@ -320,7 +319,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_MAP_32: /* FIXME security guard */ start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - + default: goto _failed; } @@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'): libraries=libraries, include_dirs=['.'], define_macros=macros, + extra_compile_args=['-O0'], )) del libraries, macros diff --git a/test/test_extension.py b/test/test_extension.py new file mode 100644 index 0000000..94117e1 --- /dev/null +++ b/test/test_extension.py @@ -0,0 +1,64 @@ +import py +import array +import struct +import msgpack + +def test_pack_extended_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_extended_type(0x42, s) + return packer.bytes() + assert p('A') == '\xd4\x42A' # fixext 1 + assert p('AB') == '\xd5\x42AB' # fixext 2 + assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 + assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8 + assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16 + assert p('ABC') == '\xc7\x03\x42ABC' # ext 8 + assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 + assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 + +def test_unpack_extended_type(): + class MyUnpacker(msgpack.Unpacker): + def read_extended_type(self, typecode, data): + return (typecode, data) + + def u(s): + unpacker = MyUnpacker() + unpacker.feed(s) + return unpacker.unpack_one() + + assert u('\xd4\x42A') == (0x42, 'A') # fixext 1 + assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2 + assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4 + assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8 + assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16 + assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8 + assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) == + (0x42, 'A'*0x0123)) # ext 16 + assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) == + (0x42, 'A'*0x00012345)) # ext 32 + + +def test_extension_type(): + class MyPacker(msgpack.Packer): + def handle_unknown_type(self, obj): + if isinstance(obj, array.array): + typecode = 123 # application specific typecode + data = obj.tostring() + self.pack_extended_type(typecode, data) + return True + + class MyUnpacker(msgpack.Unpacker): + def read_extended_type(self, typecode, data): + assert typecode == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] + packer = MyPacker() + unpacker = MyUnpacker(None) + s = packer.pack(obj) + unpacker.feed(s) + obj2 = unpacker.unpack_one() + assert obj == obj2 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9db14ca..abc447a 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # coding: utf-8 +import py import six from msgpack import Unpacker, BufferFull -from msgpack.exceptions import OutOfData +from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError from pytest import raises @@ -85,3 +86,15 @@ def test_readbytes(): assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') +def test_unpack_one(): + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abc') + assert unpacker.unpack_one() == 'abc' + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abcd') + py.test.raises(ExtraData, "unpacker.unpack_one()") + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03ab') + py.test.raises(UnpackValueError, "unpacker.unpack_one()") |