From d61097511a1caa0e3bc5a70c1d2d92f448bd5025 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Tue, 15 Oct 2013 16:59:43 +0200 Subject: add support for extended types: you can now pack/unpack custom python objects by subclassing Packer and Unpacker --- msgpack/fallback.py | 131 +++++++++++++++++++++++++++++++------------------ test/test_extension.py | 24 +++++++++ 2 files changed, 108 insertions(+), 47 deletions(-) create mode 100644 test/test_extension.py diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 8f9d646..b7f455b 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -58,54 +58,9 @@ TYPE_ARRAY = 1 TYPE_MAP = 2 TYPE_RAW = 3 -DEFAULT_RECURSE_LIMIT=511 - -def pack(o, stream, **kwargs): - """ - Pack object `o` and write it to `stream` - - See :class:`Packer` for options. - """ - packer = Packer(**kwargs) - stream.write(packer.pack(o)) - -def packb(o, **kwargs): - """ - Pack object `o` and return packed bytes +EXTENDED_TYPE = 1000 - See :class:`Packer` for options. - """ - return Packer(**kwargs).pack(o) - -def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - -def unpackb(packed, **kwargs): - """ - Unpack an object from `packed`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(None, **kwargs) - unpacker.feed(packed) - try: - ret = unpacker._fb_unpack() - except OutOfData: - raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret +DEFAULT_RECURSE_LIMIT=511 class Unpacker(object): """ @@ -334,6 +289,9 @@ class Unpacker(object): elif b == 0xdf: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] typ = TYPE_MAP + elif b == 0xc9: + n, typ = struct.unpack(">Ib", self._fb_read(5, write_bytes)) + typ += EXTENDED_TYPE else: raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj @@ -390,6 +348,10 @@ class Unpacker(object): if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) return obj + if typ >= EXTENDED_TYPE: + typ -= EXTENDED_TYPE + data = self._fb_read(n, write_bytes) + return self.handle_extended_type(typ, data) assert typ == TYPE_IMMEDIATE return obj @@ -411,6 +373,9 @@ class Unpacker(object): self._fb_consume() return ret + def handle_extended_type(self, typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def read_array_header(self, write_bytes=None): ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) self._fb_consume() @@ -521,10 +486,33 @@ class Packer(object): if isinstance(obj, dict): return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) + if self.pack_extended_type(obj): + # it means that obj was succesfully handled by + # handle_extended_type, so we are done + return if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) raise TypeError("Cannot serialize %r" % obj) + def pack_extended_type(self, obj): + res = self.handle_extended_type(obj) + if res is None: + return False + fmt, typecode, data = res + # for now we support only this. We should add support for the other + # fixext/ext formats + assert fmt == "ext 32" + assert 0 <= typecode <= 127 + N = len(data) + self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) + self._buffer.write(data) + return True + + def handle_extended_type(self, obj): + # by default we don't support any extended type. This can be + # overridden by subclasses + return None + def pack(self, obj): self._pack(obj) ret = self._buffer.getvalue() @@ -590,3 +578,52 @@ class Packer(object): def reset(self): self._buffer = StringIO() + + +def pack(o, stream, Packer=Packer, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + +def packb(o, Packer=Packer, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + +def unpack(stream, Unpacker=Unpacker, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(stream, **kwargs) + ret = unpacker._fb_unpack() + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) + return ret + +def unpackb(packed, Unpacker=Unpacker, **kwargs): + """ + Unpack an object from `packed`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, **kwargs) + unpacker.feed(packed) + try: + ret = unpacker._fb_unpack() + except OutOfData: + raise UnpackValueError("Data is not enough.") + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) + return ret + diff --git a/test/test_extension.py b/test/test_extension.py new file mode 100644 index 0000000..45e6027 --- /dev/null +++ b/test/test_extension.py @@ -0,0 +1,24 @@ +import array +import msgpack + +def test_extension_type(): + class MyPacker(msgpack.Packer): + def handle_extended_type(self, obj): + if isinstance(obj, array.array): + fmt = "ext 32" + typecode = 123 # application specific typecode + data = obj.tostring() + return fmt, typecode, data + + class MyUnpacker(msgpack.Unpacker): + def handle_extended_type(self, typecode, data): + assert typecode == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, MyPacker) + obj2 = msgpack.unpackb(s, MyUnpacker) + assert obj == obj2 + -- cgit v1.2.1 From 5529dfe59660f3c2fc5058e6fa42b24fe764a255 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 14:38:52 +0200 Subject: kill some duplicate code from unpack/unpackb and move the logic to Unpacker.unpack_one. By doing this we no longer need to make the module-level pack/unpack parametric on the class, because they contain no logic at all --- msgpack/fallback.py | 95 +++++++++++++++++++++++++------------------------- test/test_extension.py | 8 +++-- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b7f455b..2c79482 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -62,6 +62,44 @@ EXTENDED_TYPE = 1000 DEFAULT_RECURSE_LIMIT=511 +def pack(o, stream, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + +def packb(o, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(stream, **kwargs) + return unpacker.unpack_one() + +def unpackb(packed, **kwargs): + """ + Unpack an object from `packed`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, **kwargs) + unpacker.feed(packed) + return unpacker.unpack_one() + class Unpacker(object): """ Streaming unpacker. @@ -149,6 +187,15 @@ class Unpacker(object): raise ValueError("object_pairs_hook and object_hook are mutually " "exclusive") + def unpack_one(self): + try: + ret = self._fb_unpack() + except OutOfData: + raise UnpackValueError("Data is not enough.") + if self._fb_got_extradata(): + raise ExtraData(ret, self._fb_get_extradata()) + return ret + def feed(self, next_bytes): if isinstance(next_bytes, array.array): next_bytes = next_bytes.tostring() @@ -579,51 +626,3 @@ class Packer(object): def reset(self): self._buffer = StringIO() - -def pack(o, stream, Packer=Packer, **kwargs): - """ - Pack object `o` and write it to `stream` - - See :class:`Packer` for options. - """ - packer = Packer(**kwargs) - stream.write(packer.pack(o)) - -def packb(o, Packer=Packer, **kwargs): - """ - Pack object `o` and return packed bytes - - See :class:`Packer` for options. - """ - return Packer(**kwargs).pack(o) - -def unpack(stream, Unpacker=Unpacker, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - -def unpackb(packed, Unpacker=Unpacker, **kwargs): - """ - Unpack an object from `packed`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(None, **kwargs) - unpacker.feed(packed) - try: - ret = unpacker._fb_unpack() - except OutOfData: - raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - diff --git a/test/test_extension.py b/test/test_extension.py index 45e6027..0a9c14f 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -18,7 +18,9 @@ def test_extension_type(): return obj obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] - s = msgpack.packb(obj, MyPacker) - obj2 = msgpack.unpackb(s, MyUnpacker) + packer = MyPacker() + unpacker = MyUnpacker(None) + s = packer.pack(obj) + unpacker.feed(s) + obj2 = unpacker.unpack_one() assert obj == obj2 - -- cgit v1.2.1 From 522c4bfc7993c296b78df9c9c91aac5fd40ae8e0 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:03:58 +0200 Subject: slightly change to API --- msgpack/fallback.py | 26 ++++++++++---------------- test/test_extension.py | 7 ++++--- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 2c79482..101bd0f 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -398,7 +398,7 @@ class Unpacker(object): if typ >= EXTENDED_TYPE: typ -= EXTENDED_TYPE data = self._fb_read(n, write_bytes) - return self.handle_extended_type(typ, data) + return self.read_extended_type(typ, data) assert typ == TYPE_IMMEDIATE return obj @@ -420,7 +420,7 @@ class Unpacker(object): self._fb_consume() return ret - def handle_extended_type(self, typecode, data): + def read_extended_type(self, typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) def read_array_header(self, write_bytes=None): @@ -533,19 +533,19 @@ class Packer(object): if isinstance(obj, dict): return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) - if self.pack_extended_type(obj): - # it means that obj was succesfully handled by - # handle_extended_type, so we are done + if self.handle_unknown_type(obj): + # it means that obj was succesfully packed, so we are done return if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) raise TypeError("Cannot serialize %r" % obj) - def pack_extended_type(self, obj): - res = self.handle_extended_type(obj) - if res is None: - return False - fmt, typecode, data = res + def handle_unknown_type(self, obj): + # by default we don't support any extended type. This can be + # overridden by subclasses + return None + + def pack_extended_type(self, fmt, typecode, data): # for now we support only this. We should add support for the other # fixext/ext formats assert fmt == "ext 32" @@ -553,12 +553,6 @@ class Packer(object): N = len(data) self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) self._buffer.write(data) - return True - - def handle_extended_type(self, obj): - # by default we don't support any extended type. This can be - # overridden by subclasses - return None def pack(self, obj): self._pack(obj) diff --git a/test/test_extension.py b/test/test_extension.py index 0a9c14f..0b26f8e 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -3,15 +3,16 @@ import msgpack def test_extension_type(): class MyPacker(msgpack.Packer): - def handle_extended_type(self, obj): + def handle_unknown_type(self, obj): if isinstance(obj, array.array): fmt = "ext 32" typecode = 123 # application specific typecode data = obj.tostring() - return fmt, typecode, data + self.pack_extended_type(fmt, typecode, data) + return True class MyUnpacker(msgpack.Unpacker): - def handle_extended_type(self, typecode, data): + def read_extended_type(self, typecode, data): assert typecode == 123 obj = array.array('d') obj.fromstring(data) -- cgit v1.2.1 From c727440ba5fe2f77d6cc03171ad7c193a3f481ee Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:45:50 +0200 Subject: automatically find the best format to encode extended types --- msgpack/fallback.py | 28 ++++++++++++++++++++++------ test/test_extension.py | 18 ++++++++++++++++-- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 101bd0f..f984dcd 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -545,13 +545,29 @@ class Packer(object): # overridden by subclasses return None - def pack_extended_type(self, fmt, typecode, data): - # for now we support only this. We should add support for the other - # fixext/ext formats - assert fmt == "ext 32" + def pack_extended_type(self, typecode, data): assert 0 <= typecode <= 127 - N = len(data) - self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) + n = len(data) + if n == 1: + header = struct.pack(">BB", 0xd4, typecode) # fixext 1 + elif n == 2: + header = struct.pack(">BB", 0xd5, typecode) # fixext 2 + elif n == 4: + header = struct.pack(">BB", 0xd6, typecode) # fixext 4 + elif n == 8: + header = struct.pack(">BB", 0xd7, typecode) # fixext 8 + elif n == 16: + header = struct.pack(">BB", 0xd8, typecode) # fixext 16 + elif n <= 2**8-1: + header = struct.pack(">BBB", 0xc7, n, typecode) # ext 8 + elif n <= 2**16-1: + header = struct.pack(">BHB", 0xc8, n, typecode) # ext 16 + elif n <= 2**32-1: + header = struct.pack(">BIB", 0xc9, n, typecode) # ext 32 + else: + raise PackValueError("ext data too large") + # + self._buffer.write(header) self._buffer.write(data) def pack(self, obj): diff --git a/test/test_extension.py b/test/test_extension.py index 0b26f8e..1908fa2 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,14 +1,28 @@ import array +import struct import msgpack +def test_pack_extended_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_extended_type(0x42, s) + return packer._buffer.getvalue() + assert p('A') == '\xd4\x42A' # fixext 1 + assert p('AB') == '\xd5\x42AB' # fixext 2 + assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 + assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8 + assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16 + assert p('ABC') == '\xc7\x03\x42ABC' # ext 8 + assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 + assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 + def test_extension_type(): class MyPacker(msgpack.Packer): def handle_unknown_type(self, obj): if isinstance(obj, array.array): - fmt = "ext 32" typecode = 123 # application specific typecode data = obj.tostring() - self.pack_extended_type(fmt, typecode, data) + self.pack_extended_type(typecode, data) return True class MyUnpacker(msgpack.Unpacker): -- cgit v1.2.1 From afa28fb2051cb00f03c83e020745e1eb238ff4ac Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:54:12 +0200 Subject: add support to unpack all ext formats --- msgpack/fallback.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f984dcd..c272420 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -336,7 +336,33 @@ class Unpacker(object): elif b == 0xdf: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] typ = TYPE_MAP - elif b == 0xc9: + elif b == 0xd4: # fixext 1 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 1 + typ += EXTENDED_TYPE + elif b == 0xd5: # fixext 2 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 2 + typ += EXTENDED_TYPE + elif b == 0xd6: # fixext 4 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 4 + typ += EXTENDED_TYPE + elif b == 0xd7: # fixext 8 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 8 + typ += EXTENDED_TYPE + elif b == 0xd8: # fixext 16 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 16 + typ += EXTENDED_TYPE + elif b == 0xc7: # ext 8 + n, typ = struct.unpack(">Bb", self._fb_read(2, write_bytes)) + typ += EXTENDED_TYPE + elif b == 0xc8: # ext 16 + n, typ = struct.unpack(">Hb", self._fb_read(3, write_bytes)) + typ += EXTENDED_TYPE + elif b == 0xc9: # ext 32 n, typ = struct.unpack(">Ib", self._fb_read(5, write_bytes)) typ += EXTENDED_TYPE else: -- cgit v1.2.1 From 5467515065b95496b9f5b9d842ffc73c9ccb806e Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 17:33:54 +0200 Subject: implement Packer.pack_extended_type also in the cython version of the code --- msgpack/_packer.pyx | 6 +++++ msgpack/pack.h | 2 ++ msgpack/pack_template.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ test/test_extension.py | 2 +- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 6289192..985559c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -5,6 +5,7 @@ from cpython cimport * from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * +from libc.stdint cimport int8_t from msgpack.exceptions import PackValueError @@ -27,6 +28,7 @@ cdef extern from "pack.h": int msgpack_pack_map(msgpack_packer* pk, size_t l) int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 @@ -193,6 +195,10 @@ cdef class Packer(object): self.pk.length = 0 return buf + def pack_extended_type(self, typecode, data): + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + def pack_array_header(self, size_t size): cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: diff --git a/msgpack/pack.h b/msgpack/pack.h index 1539991..08fdd82 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -70,6 +70,8 @@ static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); +static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 9e00d7e..ac9815f 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -683,6 +683,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t msgpack_pack_append_buffer(x, (const unsigned char*)b, l); } +/* + * Ext + */ + +static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + + + #undef msgpack_pack_append_buffer #undef TAKE8_8 diff --git a/test/test_extension.py b/test/test_extension.py index 1908fa2..9ec1153 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -6,7 +6,7 @@ def test_pack_extended_type(): def p(s): packer = msgpack.Packer() packer.pack_extended_type(0x42, s) - return packer._buffer.getvalue() + return packer.bytes() assert p('A') == '\xd4\x42A' # fixext 1 assert p('AB') == '\xd5\x42AB' # fixext 2 assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 -- cgit v1.2.1 From a7485eccb2e5fcebbd76612a658f2e18bdebe745 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 17:46:42 +0200 Subject: add the hook for unknown types also to the cython Packer --- msgpack/_packer.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 985559c..f2a0f76 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -176,6 +176,9 @@ cdef class Packer(object): for v in o: ret = self._pack(v, nest_limit-1) if ret != 0: break + elif self.handle_unknown_type(o): + # it means that obj was succesfully packed, so we are done + return 0 elif self._default: o = self._default(o) ret = self._pack(o, nest_limit-1) @@ -195,6 +198,9 @@ cdef class Packer(object): self.pk.length = 0 return buf + def handle_unknown_type(self, obj): + return None + def pack_extended_type(self, typecode, data): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) -- cgit v1.2.1 From ff858387d37d37ec4472f6b6ac7010d8f2b0744f Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 01:49:03 +0200 Subject: implement unpack_one also for the cython version, and add a test for it --- msgpack/_unpacker.pyx | 18 ++++++++++++++++++ test/test_sequnpack.py | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 1f4dd85..e05b9ed 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -359,6 +359,24 @@ cdef class Unpacker(object): """ return self._unpack(unpack_construct, write_bytes) + def unpack_one(self, object write_bytes=None): + """ + unpack one object + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `UnpackValueError` if there are no more bytes to unpack. + Raises ``ExtraData`` if there are still bytes left after the unpacking. + """ + try: + result = self.unpack() + except OutOfData: + raise UnpackValueError("Data is not enough") + if self.buf_head < self.buf_tail: + raise ExtraData(result, self.buf[self.buf_head:]) + return result + def skip(self, object write_bytes=None): """ read and ignore one object, returning None diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9db14ca..abc447a 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # coding: utf-8 +import py import six from msgpack import Unpacker, BufferFull -from msgpack.exceptions import OutOfData +from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError from pytest import raises @@ -85,3 +86,15 @@ def test_readbytes(): assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') +def test_unpack_one(): + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abc') + assert unpacker.unpack_one() == 'abc' + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abcd') + py.test.raises(ExtraData, "unpacker.unpack_one()") + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03ab') + py.test.raises(UnpackValueError, "unpacker.unpack_one()") -- cgit v1.2.1 From 985d4c1496d8c9186079ebc4e42aee319e67c385 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 11:34:28 +0200 Subject: add a test for unpacking extended types --- test/test_extension.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/test_extension.py b/test/test_extension.py index 9ec1153..96944a3 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -16,6 +16,28 @@ def test_pack_extended_type(): assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 +def test_unpack_extended_type(): + class MyUnpacker(msgpack.Unpacker): + def read_extended_type(self, typecode, data): + return (typecode, data) + + def u(s): + unpacker = MyUnpacker() + unpacker.feed(s) + return unpacker.unpack_one() + + assert u('\xd4\x42A') == (0x42, 'A') # fixext 1 + assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2 + assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4 + assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8 + assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16 + assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8 + assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) == + (0x42, 'A'*0x0123)) # ext 16 + assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) == + (0x42, 'A'*0x00012345)) # ext 32 + + def test_extension_type(): class MyPacker(msgpack.Packer): def handle_unknown_type(self, obj): -- cgit v1.2.1 From 56dd1650a42a454027ba335b494100a9f211758e Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 17:27:16 +0200 Subject: implement unpacking for all the fixtext formats --- msgpack/_unpacker.pyx | 21 +++++++++++++++++++-- msgpack/unpack.h | 18 ++++++++++++++++++ msgpack/unpack_define.h | 14 ++++++++------ msgpack/unpack_template.h | 22 +++++++++++++++------- setup.py | 1 + test/test_extension.py | 1 + 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index e05b9ed..6500ef7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -25,6 +25,7 @@ cdef extern from "unpack.h": PyObject* object_hook bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook + PyObject* ext_type_hook char *encoding char *unicode_errors @@ -46,6 +47,7 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, + object ext_type_hook, bint use_list, char* encoding, char* unicode_errors): unpack_init(ctx) ctx.user.use_list = use_list @@ -72,9 +74,17 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("list_hook must be a callable.") ctx.user.list_hook = list_hook + if ext_type_hook is not None: + if not PyCallable_Check(ext_type_hook): + raise TypeError("ext_type_hook must be a callable.") + ctx.user.ext_type_hook = ext_type_hook + ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, @@ -107,7 +117,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, unicode_errors = unicode_errors.encode('ascii') cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type, + use_list, cenc, cerr) ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) @@ -249,7 +260,10 @@ cdef class Unpacker(object): self.unicode_errors = unicode_errors cerr = PyBytes_AsString(self.unicode_errors) - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + ext_type_hook = self.read_extended_type + Py_INCREF(ext_type_hook) + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_type_hook, use_list, cenc, cerr) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" @@ -404,6 +418,9 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header, write_bytes) + def read_extended_type(self, typecode, data): + return default_read_extended_type(typecode, data) + def __iter__(self): return self diff --git a/msgpack/unpack.h b/msgpack/unpack.h index baeed1f..97ebd3f 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,6 +24,7 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; + PyObject *ext_type_hook; const char *encoding; const char *unicode_errors; } unpack_user; @@ -226,4 +227,21 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* return 0; } +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int lenght, msgpack_unpack_object* o) +{ + PyObject *py; + int8_t typecode = (int8_t)*pos++; + if (!u->ext_type_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL"); + return -1; + } + // lenght also includes the typecode, so the actual data is lenght-1 + py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1); + if (!py) + return -1; + *o = py; + return 0; +} + #include "unpack_template.h" diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index c81b990..986fa91 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -59,12 +59,12 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, - //CS_ = 0x14, - //CS_ = 0x15, - //CS_BIG_INT_16 = 0x16, - //CS_BIG_INT_32 = 0x17, - //CS_BIG_FLOAT_16 = 0x18, - //CS_BIG_FLOAT_32 = 0x19, + CS_FIXEXT1 = 0x14, + CS_FIXEXT2 = 0x15, + CS_FIXEXT4 = 0x16, + CS_FIXEXT8 = 0x17, + CS_FIXEXT16 = 0x18, + CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, CS_ARRAY_16 = 0x1c, @@ -75,6 +75,8 @@ typedef enum { //ACS_BIG_INT_VALUE, //ACS_BIG_FLOAT_VALUE, ACS_RAW_VALUE, + ACS_EXT_VALUE, + } msgpack_unpack_state; diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 29ac935..b051075 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -202,12 +202,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case 0xd2: // signed int 32 case 0xd3: // signed int 64 again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - //case 0xd9: // big float 32 + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + //case 0xd9: case 0xda: // raw 16 case 0xdb: // raw 32 case 0xdc: // array 16 @@ -298,6 +302,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l _raw_zero: push_variable_value(_raw, data, n, trail); + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + case CS_ARRAY_16: start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); case CS_ARRAY_32: @@ -309,7 +317,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_MAP_32: /* FIXME security guard */ start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - + default: goto _failed; } diff --git a/setup.py b/setup.py index 1055a61..83ae79f 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'): libraries=libraries, include_dirs=['.'], define_macros=macros, + extra_compile_args=['-O0'], )) del libraries, macros diff --git a/test/test_extension.py b/test/test_extension.py index 96944a3..94117e1 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,3 +1,4 @@ +import py import array import struct import msgpack -- cgit v1.2.1 From c9b97f078854dda02dc9404ee9d88ca5e16fb493 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 18:04:30 +0200 Subject: implement unpacking of ext 8,16,32 --- msgpack/unpack_define.h | 17 ++++++++--------- msgpack/unpack_template.h | 21 ++++++++++++++++----- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 986fa91..17c287e 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -44,10 +44,9 @@ typedef enum { //CS_ = 0x04, //CS_ = 0x05, //CS_ = 0x06, - //CS_ = 0x07, - - //CS_ = 0x08, - //CS_ = 0x09, + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, CS_FLOAT = 0x0a, CS_DOUBLE = 0x0b, CS_UINT_8 = 0x0c, @@ -59,11 +58,11 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, - CS_FIXEXT1 = 0x14, - CS_FIXEXT2 = 0x15, - CS_FIXEXT4 = 0x16, - CS_FIXEXT8 = 0x17, - CS_FIXEXT16 = 0x18, + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index b051075..0c6af0e 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -188,9 +188,12 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l //case 0xc4: //case 0xc5: //case 0xc6: - //case 0xc7: - //case 0xc8: - //case 0xc9: + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); case 0xca: // float case 0xcb: // double case 0xcc: // unsigned int 8 @@ -242,8 +245,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l if((size_t)(pe - p) < trail) { goto _out; } n = p; p += trail - 1; switch(cs) { - //case CS_ - //case CS_ + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); case CS_FLOAT: { union { uint32_t i; float f; } mem; mem.i = _msgpack_load32(uint32_t,n); -- cgit v1.2.1 From 6386481024ec045d9ef991a2c975902276812508 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 18:43:16 +0200 Subject: add a note in the README --- README.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.rst b/README.rst index 294cd63..600d7f7 100644 --- a/README.rst +++ b/README.rst @@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. +Extended types +^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types using the msgpack feature +of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now): + +https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py + Advanced unpacking control ^^^^^^^^^^^^^^^^^^^^^^^^^^ -- cgit v1.2.1