From d61097511a1caa0e3bc5a70c1d2d92f448bd5025 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Tue, 15 Oct 2013 16:59:43 +0200 Subject: add support for extended types: you can now pack/unpack custom python objects by subclassing Packer and Unpacker --- msgpack/fallback.py | 131 +++++++++++++++++++++++++++++++------------------ test/test_extension.py | 24 +++++++++ 2 files changed, 108 insertions(+), 47 deletions(-) create mode 100644 test/test_extension.py diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 8f9d646..b7f455b 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -58,54 +58,9 @@ TYPE_ARRAY = 1 TYPE_MAP = 2 TYPE_RAW = 3 -DEFAULT_RECURSE_LIMIT=511 - -def pack(o, stream, **kwargs): - """ - Pack object `o` and write it to `stream` - - See :class:`Packer` for options. - """ - packer = Packer(**kwargs) - stream.write(packer.pack(o)) - -def packb(o, **kwargs): - """ - Pack object `o` and return packed bytes +EXTENDED_TYPE = 1000 - See :class:`Packer` for options. - """ - return Packer(**kwargs).pack(o) - -def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - -def unpackb(packed, **kwargs): - """ - Unpack an object from `packed`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(None, **kwargs) - unpacker.feed(packed) - try: - ret = unpacker._fb_unpack() - except OutOfData: - raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret +DEFAULT_RECURSE_LIMIT=511 class Unpacker(object): """ @@ -334,6 +289,9 @@ class Unpacker(object): elif b == 0xdf: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] typ = TYPE_MAP + elif b == 0xc9: + n, typ = struct.unpack(">Ib", self._fb_read(5, write_bytes)) + typ += EXTENDED_TYPE else: raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj @@ -390,6 +348,10 @@ class Unpacker(object): if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) return obj + if typ >= EXTENDED_TYPE: + typ -= EXTENDED_TYPE + data = self._fb_read(n, write_bytes) + return self.handle_extended_type(typ, data) assert typ == TYPE_IMMEDIATE return obj @@ -411,6 +373,9 @@ class Unpacker(object): self._fb_consume() return ret + def handle_extended_type(self, typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def read_array_header(self, write_bytes=None): ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) self._fb_consume() @@ -521,10 +486,33 @@ class Packer(object): if isinstance(obj, dict): return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) + if self.pack_extended_type(obj): + # it means that obj was succesfully handled by + # handle_extended_type, so we are done + return if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) raise TypeError("Cannot serialize %r" % obj) + def pack_extended_type(self, obj): + res = self.handle_extended_type(obj) + if res is None: + return False + fmt, typecode, data = res + # for now we support only this. We should add support for the other + # fixext/ext formats + assert fmt == "ext 32" + assert 0 <= typecode <= 127 + N = len(data) + self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) + self._buffer.write(data) + return True + + def handle_extended_type(self, obj): + # by default we don't support any extended type. This can be + # overridden by subclasses + return None + def pack(self, obj): self._pack(obj) ret = self._buffer.getvalue() @@ -590,3 +578,52 @@ class Packer(object): def reset(self): self._buffer = StringIO() + + +def pack(o, stream, Packer=Packer, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + +def packb(o, Packer=Packer, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + +def unpack(stream, Unpacker=Unpacker, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(stream, **kwargs) + ret = unpacker._fb_unpack() + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) + return ret + +def unpackb(packed, Unpacker=Unpacker, **kwargs): + """ + Unpack an object from `packed`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, **kwargs) + unpacker.feed(packed) + try: + ret = unpacker._fb_unpack() + except OutOfData: + raise UnpackValueError("Data is not enough.") + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) + return ret + diff --git a/test/test_extension.py b/test/test_extension.py new file mode 100644 index 0000000..45e6027 --- /dev/null +++ b/test/test_extension.py @@ -0,0 +1,24 @@ +import array +import msgpack + +def test_extension_type(): + class MyPacker(msgpack.Packer): + def handle_extended_type(self, obj): + if isinstance(obj, array.array): + fmt = "ext 32" + typecode = 123 # application specific typecode + data = obj.tostring() + return fmt, typecode, data + + class MyUnpacker(msgpack.Unpacker): + def handle_extended_type(self, typecode, data): + assert typecode == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, MyPacker) + obj2 = msgpack.unpackb(s, MyUnpacker) + assert obj == obj2 + -- cgit v1.2.1 From 5529dfe59660f3c2fc5058e6fa42b24fe764a255 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 14:38:52 +0200 Subject: kill some duplicate code from unpack/unpackb and move the logic to Unpacker.unpack_one. By doing this we no longer need to make the module-level pack/unpack parametric on the class, because they contain no logic at all --- msgpack/fallback.py | 95 +++++++++++++++++++++++++------------------------- test/test_extension.py | 8 +++-- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b7f455b..2c79482 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -62,6 +62,44 @@ EXTENDED_TYPE = 1000 DEFAULT_RECURSE_LIMIT=511 +def pack(o, stream, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + +def packb(o, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(stream, **kwargs) + return unpacker.unpack_one() + +def unpackb(packed, **kwargs): + """ + Unpack an object from `packed`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, **kwargs) + unpacker.feed(packed) + return unpacker.unpack_one() + class Unpacker(object): """ Streaming unpacker. @@ -149,6 +187,15 @@ class Unpacker(object): raise ValueError("object_pairs_hook and object_hook are mutually " "exclusive") + def unpack_one(self): + try: + ret = self._fb_unpack() + except OutOfData: + raise UnpackValueError("Data is not enough.") + if self._fb_got_extradata(): + raise ExtraData(ret, self._fb_get_extradata()) + return ret + def feed(self, next_bytes): if isinstance(next_bytes, array.array): next_bytes = next_bytes.tostring() @@ -579,51 +626,3 @@ class Packer(object): def reset(self): self._buffer = StringIO() - -def pack(o, stream, Packer=Packer, **kwargs): - """ - Pack object `o` and write it to `stream` - - See :class:`Packer` for options. - """ - packer = Packer(**kwargs) - stream.write(packer.pack(o)) - -def packb(o, Packer=Packer, **kwargs): - """ - Pack object `o` and return packed bytes - - See :class:`Packer` for options. - """ - return Packer(**kwargs).pack(o) - -def unpack(stream, Unpacker=Unpacker, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - -def unpackb(packed, Unpacker=Unpacker, **kwargs): - """ - Unpack an object from `packed`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - unpacker = Unpacker(None, **kwargs) - unpacker.feed(packed) - try: - ret = unpacker._fb_unpack() - except OutOfData: - raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret - diff --git a/test/test_extension.py b/test/test_extension.py index 45e6027..0a9c14f 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -18,7 +18,9 @@ def test_extension_type(): return obj obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] - s = msgpack.packb(obj, MyPacker) - obj2 = msgpack.unpackb(s, MyUnpacker) + packer = MyPacker() + unpacker = MyUnpacker(None) + s = packer.pack(obj) + unpacker.feed(s) + obj2 = unpacker.unpack_one() assert obj == obj2 - -- cgit v1.2.1 From 522c4bfc7993c296b78df9c9c91aac5fd40ae8e0 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:03:58 +0200 Subject: slightly change to API --- msgpack/fallback.py | 26 ++++++++++---------------- test/test_extension.py | 7 ++++--- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 2c79482..101bd0f 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -398,7 +398,7 @@ class Unpacker(object): if typ >= EXTENDED_TYPE: typ -= EXTENDED_TYPE data = self._fb_read(n, write_bytes) - return self.handle_extended_type(typ, data) + return self.read_extended_type(typ, data) assert typ == TYPE_IMMEDIATE return obj @@ -420,7 +420,7 @@ class Unpacker(object): self._fb_consume() return ret - def handle_extended_type(self, typecode, data): + def read_extended_type(self, typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) def read_array_header(self, write_bytes=None): @@ -533,19 +533,19 @@ class Packer(object): if isinstance(obj, dict): return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) - if self.pack_extended_type(obj): - # it means that obj was succesfully handled by - # handle_extended_type, so we are done + if self.handle_unknown_type(obj): + # it means that obj was succesfully packed, so we are done return if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) raise TypeError("Cannot serialize %r" % obj) - def pack_extended_type(self, obj): - res = self.handle_extended_type(obj) - if res is None: - return False - fmt, typecode, data = res + def handle_unknown_type(self, obj): + # by default we don't support any extended type. This can be + # overridden by subclasses + return None + + def pack_extended_type(self, fmt, typecode, data): # for now we support only this. We should add support for the other # fixext/ext formats assert fmt == "ext 32" @@ -553,12 +553,6 @@ class Packer(object): N = len(data) self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) self._buffer.write(data) - return True - - def handle_extended_type(self, obj): - # by default we don't support any extended type. This can be - # overridden by subclasses - return None def pack(self, obj): self._pack(obj) diff --git a/test/test_extension.py b/test/test_extension.py index 0a9c14f..0b26f8e 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -3,15 +3,16 @@ import msgpack def test_extension_type(): class MyPacker(msgpack.Packer): - def handle_extended_type(self, obj): + def handle_unknown_type(self, obj): if isinstance(obj, array.array): fmt = "ext 32" typecode = 123 # application specific typecode data = obj.tostring() - return fmt, typecode, data + self.pack_extended_type(fmt, typecode, data) + return True class MyUnpacker(msgpack.Unpacker): - def handle_extended_type(self, typecode, data): + def read_extended_type(self, typecode, data): assert typecode == 123 obj = array.array('d') obj.fromstring(data) -- cgit v1.2.1 From c727440ba5fe2f77d6cc03171ad7c193a3f481ee Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:45:50 +0200 Subject: automatically find the best format to encode extended types --- msgpack/fallback.py | 28 ++++++++++++++++++++++------ test/test_extension.py | 18 ++++++++++++++++-- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 101bd0f..f984dcd 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -545,13 +545,29 @@ class Packer(object): # overridden by subclasses return None - def pack_extended_type(self, fmt, typecode, data): - # for now we support only this. We should add support for the other - # fixext/ext formats - assert fmt == "ext 32" + def pack_extended_type(self, typecode, data): assert 0 <= typecode <= 127 - N = len(data) - self._buffer.write(struct.pack('>BIB', 0xc9, N, typecode)) + n = len(data) + if n == 1: + header = struct.pack(">BB", 0xd4, typecode) # fixext 1 + elif n == 2: + header = struct.pack(">BB", 0xd5, typecode) # fixext 2 + elif n == 4: + header = struct.pack(">BB", 0xd6, typecode) # fixext 4 + elif n == 8: + header = struct.pack(">BB", 0xd7, typecode) # fixext 8 + elif n == 16: + header = struct.pack(">BB", 0xd8, typecode) # fixext 16 + elif n <= 2**8-1: + header = struct.pack(">BBB", 0xc7, n, typecode) # ext 8 + elif n <= 2**16-1: + header = struct.pack(">BHB", 0xc8, n, typecode) # ext 16 + elif n <= 2**32-1: + header = struct.pack(">BIB", 0xc9, n, typecode) # ext 32 + else: + raise PackValueError("ext data too large") + # + self._buffer.write(header) self._buffer.write(data) def pack(self, obj): diff --git a/test/test_extension.py b/test/test_extension.py index 0b26f8e..1908fa2 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,14 +1,28 @@ import array +import struct import msgpack +def test_pack_extended_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_extended_type(0x42, s) + return packer._buffer.getvalue() + assert p('A') == '\xd4\x42A' # fixext 1 + assert p('AB') == '\xd5\x42AB' # fixext 2 + assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 + assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8 + assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16 + assert p('ABC') == '\xc7\x03\x42ABC' # ext 8 + assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 + assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 + def test_extension_type(): class MyPacker(msgpack.Packer): def handle_unknown_type(self, obj): if isinstance(obj, array.array): - fmt = "ext 32" typecode = 123 # application specific typecode data = obj.tostring() - self.pack_extended_type(fmt, typecode, data) + self.pack_extended_type(typecode, data) return True class MyUnpacker(msgpack.Unpacker): -- cgit v1.2.1 From afa28fb2051cb00f03c83e020745e1eb238ff4ac Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 15:54:12 +0200 Subject: add support to unpack all ext formats --- msgpack/fallback.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f984dcd..c272420 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -336,7 +336,33 @@ class Unpacker(object): elif b == 0xdf: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] typ = TYPE_MAP - elif b == 0xc9: + elif b == 0xd4: # fixext 1 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 1 + typ += EXTENDED_TYPE + elif b == 0xd5: # fixext 2 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 2 + typ += EXTENDED_TYPE + elif b == 0xd6: # fixext 4 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 4 + typ += EXTENDED_TYPE + elif b == 0xd7: # fixext 8 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 8 + typ += EXTENDED_TYPE + elif b == 0xd8: # fixext 16 + typ = struct.unpack(">B", self._fb_read(1, write_bytes))[0] + n = 16 + typ += EXTENDED_TYPE + elif b == 0xc7: # ext 8 + n, typ = struct.unpack(">Bb", self._fb_read(2, write_bytes)) + typ += EXTENDED_TYPE + elif b == 0xc8: # ext 16 + n, typ = struct.unpack(">Hb", self._fb_read(3, write_bytes)) + typ += EXTENDED_TYPE + elif b == 0xc9: # ext 32 n, typ = struct.unpack(">Ib", self._fb_read(5, write_bytes)) typ += EXTENDED_TYPE else: -- cgit v1.2.1 From 5467515065b95496b9f5b9d842ffc73c9ccb806e Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 17:33:54 +0200 Subject: implement Packer.pack_extended_type also in the cython version of the code --- msgpack/_packer.pyx | 6 +++++ msgpack/pack.h | 2 ++ msgpack/pack_template.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ test/test_extension.py | 2 +- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 6289192..985559c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -5,6 +5,7 @@ from cpython cimport * from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * +from libc.stdint cimport int8_t from msgpack.exceptions import PackValueError @@ -27,6 +28,7 @@ cdef extern from "pack.h": int msgpack_pack_map(msgpack_packer* pk, size_t l) int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 @@ -193,6 +195,10 @@ cdef class Packer(object): self.pk.length = 0 return buf + def pack_extended_type(self, typecode, data): + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + def pack_array_header(self, size_t size): cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: diff --git a/msgpack/pack.h b/msgpack/pack.h index 1539991..08fdd82 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -70,6 +70,8 @@ static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); +static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 9e00d7e..ac9815f 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -683,6 +683,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t msgpack_pack_append_buffer(x, (const unsigned char*)b, l); } +/* + * Ext + */ + +static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + + + #undef msgpack_pack_append_buffer #undef TAKE8_8 diff --git a/test/test_extension.py b/test/test_extension.py index 1908fa2..9ec1153 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -6,7 +6,7 @@ def test_pack_extended_type(): def p(s): packer = msgpack.Packer() packer.pack_extended_type(0x42, s) - return packer._buffer.getvalue() + return packer.bytes() assert p('A') == '\xd4\x42A' # fixext 1 assert p('AB') == '\xd5\x42AB' # fixext 2 assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 -- cgit v1.2.1 From a7485eccb2e5fcebbd76612a658f2e18bdebe745 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Fri, 18 Oct 2013 17:46:42 +0200 Subject: add the hook for unknown types also to the cython Packer --- msgpack/_packer.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 985559c..f2a0f76 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -176,6 +176,9 @@ cdef class Packer(object): for v in o: ret = self._pack(v, nest_limit-1) if ret != 0: break + elif self.handle_unknown_type(o): + # it means that obj was succesfully packed, so we are done + return 0 elif self._default: o = self._default(o) ret = self._pack(o, nest_limit-1) @@ -195,6 +198,9 @@ cdef class Packer(object): self.pk.length = 0 return buf + def handle_unknown_type(self, obj): + return None + def pack_extended_type(self, typecode, data): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) -- cgit v1.2.1 From ff858387d37d37ec4472f6b6ac7010d8f2b0744f Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 01:49:03 +0200 Subject: implement unpack_one also for the cython version, and add a test for it --- msgpack/_unpacker.pyx | 18 ++++++++++++++++++ test/test_sequnpack.py | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 1f4dd85..e05b9ed 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -359,6 +359,24 @@ cdef class Unpacker(object): """ return self._unpack(unpack_construct, write_bytes) + def unpack_one(self, object write_bytes=None): + """ + unpack one object + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `UnpackValueError` if there are no more bytes to unpack. + Raises ``ExtraData`` if there are still bytes left after the unpacking. + """ + try: + result = self.unpack() + except OutOfData: + raise UnpackValueError("Data is not enough") + if self.buf_head < self.buf_tail: + raise ExtraData(result, self.buf[self.buf_head:]) + return result + def skip(self, object write_bytes=None): """ read and ignore one object, returning None diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9db14ca..abc447a 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # coding: utf-8 +import py import six from msgpack import Unpacker, BufferFull -from msgpack.exceptions import OutOfData +from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError from pytest import raises @@ -85,3 +86,15 @@ def test_readbytes(): assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') +def test_unpack_one(): + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abc') + assert unpacker.unpack_one() == 'abc' + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03abcd') + py.test.raises(ExtraData, "unpacker.unpack_one()") + # + unpacker = Unpacker() + unpacker.feed('\xda\x00\x03ab') + py.test.raises(UnpackValueError, "unpacker.unpack_one()") -- cgit v1.2.1 From 985d4c1496d8c9186079ebc4e42aee319e67c385 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 11:34:28 +0200 Subject: add a test for unpacking extended types --- test/test_extension.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/test_extension.py b/test/test_extension.py index 9ec1153..96944a3 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -16,6 +16,28 @@ def test_pack_extended_type(): assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 +def test_unpack_extended_type(): + class MyUnpacker(msgpack.Unpacker): + def read_extended_type(self, typecode, data): + return (typecode, data) + + def u(s): + unpacker = MyUnpacker() + unpacker.feed(s) + return unpacker.unpack_one() + + assert u('\xd4\x42A') == (0x42, 'A') # fixext 1 + assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2 + assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4 + assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8 + assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16 + assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8 + assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) == + (0x42, 'A'*0x0123)) # ext 16 + assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) == + (0x42, 'A'*0x00012345)) # ext 32 + + def test_extension_type(): class MyPacker(msgpack.Packer): def handle_unknown_type(self, obj): -- cgit v1.2.1 From 56dd1650a42a454027ba335b494100a9f211758e Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 17:27:16 +0200 Subject: implement unpacking for all the fixtext formats --- msgpack/_unpacker.pyx | 21 +++++++++++++++++++-- msgpack/unpack.h | 18 ++++++++++++++++++ msgpack/unpack_define.h | 14 ++++++++------ msgpack/unpack_template.h | 22 +++++++++++++++------- setup.py | 1 + test/test_extension.py | 1 + 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index e05b9ed..6500ef7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -25,6 +25,7 @@ cdef extern from "unpack.h": PyObject* object_hook bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook + PyObject* ext_type_hook char *encoding char *unicode_errors @@ -46,6 +47,7 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, + object ext_type_hook, bint use_list, char* encoding, char* unicode_errors): unpack_init(ctx) ctx.user.use_list = use_list @@ -72,9 +74,17 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("list_hook must be a callable.") ctx.user.list_hook = list_hook + if ext_type_hook is not None: + if not PyCallable_Check(ext_type_hook): + raise TypeError("ext_type_hook must be a callable.") + ctx.user.ext_type_hook = ext_type_hook + ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, @@ -107,7 +117,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, unicode_errors = unicode_errors.encode('ascii') cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type, + use_list, cenc, cerr) ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) @@ -249,7 +260,10 @@ cdef class Unpacker(object): self.unicode_errors = unicode_errors cerr = PyBytes_AsString(self.unicode_errors) - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + ext_type_hook = self.read_extended_type + Py_INCREF(ext_type_hook) + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_type_hook, use_list, cenc, cerr) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" @@ -404,6 +418,9 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header, write_bytes) + def read_extended_type(self, typecode, data): + return default_read_extended_type(typecode, data) + def __iter__(self): return self diff --git a/msgpack/unpack.h b/msgpack/unpack.h index baeed1f..97ebd3f 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,6 +24,7 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; + PyObject *ext_type_hook; const char *encoding; const char *unicode_errors; } unpack_user; @@ -226,4 +227,21 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* return 0; } +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int lenght, msgpack_unpack_object* o) +{ + PyObject *py; + int8_t typecode = (int8_t)*pos++; + if (!u->ext_type_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL"); + return -1; + } + // lenght also includes the typecode, so the actual data is lenght-1 + py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1); + if (!py) + return -1; + *o = py; + return 0; +} + #include "unpack_template.h" diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index c81b990..986fa91 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -59,12 +59,12 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, - //CS_ = 0x14, - //CS_ = 0x15, - //CS_BIG_INT_16 = 0x16, - //CS_BIG_INT_32 = 0x17, - //CS_BIG_FLOAT_16 = 0x18, - //CS_BIG_FLOAT_32 = 0x19, + CS_FIXEXT1 = 0x14, + CS_FIXEXT2 = 0x15, + CS_FIXEXT4 = 0x16, + CS_FIXEXT8 = 0x17, + CS_FIXEXT16 = 0x18, + CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, CS_ARRAY_16 = 0x1c, @@ -75,6 +75,8 @@ typedef enum { //ACS_BIG_INT_VALUE, //ACS_BIG_FLOAT_VALUE, ACS_RAW_VALUE, + ACS_EXT_VALUE, + } msgpack_unpack_state; diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 29ac935..b051075 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -202,12 +202,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case 0xd2: // signed int 32 case 0xd3: // signed int 64 again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - //case 0xd9: // big float 32 + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + //case 0xd9: case 0xda: // raw 16 case 0xdb: // raw 32 case 0xdc: // array 16 @@ -298,6 +302,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l _raw_zero: push_variable_value(_raw, data, n, trail); + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + case CS_ARRAY_16: start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); case CS_ARRAY_32: @@ -309,7 +317,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_MAP_32: /* FIXME security guard */ start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - + default: goto _failed; } diff --git a/setup.py b/setup.py index 1055a61..83ae79f 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'): libraries=libraries, include_dirs=['.'], define_macros=macros, + extra_compile_args=['-O0'], )) del libraries, macros diff --git a/test/test_extension.py b/test/test_extension.py index 96944a3..94117e1 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,3 +1,4 @@ +import py import array import struct import msgpack -- cgit v1.2.1 From c9b97f078854dda02dc9404ee9d88ca5e16fb493 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 18:04:30 +0200 Subject: implement unpacking of ext 8,16,32 --- msgpack/unpack_define.h | 17 ++++++++--------- msgpack/unpack_template.h | 21 ++++++++++++++++----- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 986fa91..17c287e 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -44,10 +44,9 @@ typedef enum { //CS_ = 0x04, //CS_ = 0x05, //CS_ = 0x06, - //CS_ = 0x07, - - //CS_ = 0x08, - //CS_ = 0x09, + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, CS_FLOAT = 0x0a, CS_DOUBLE = 0x0b, CS_UINT_8 = 0x0c, @@ -59,11 +58,11 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, - CS_FIXEXT1 = 0x14, - CS_FIXEXT2 = 0x15, - CS_FIXEXT4 = 0x16, - CS_FIXEXT8 = 0x17, - CS_FIXEXT16 = 0x18, + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index b051075..0c6af0e 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -188,9 +188,12 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l //case 0xc4: //case 0xc5: //case 0xc6: - //case 0xc7: - //case 0xc8: - //case 0xc9: + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); case 0xca: // float case 0xcb: // double case 0xcc: // unsigned int 8 @@ -242,8 +245,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l if((size_t)(pe - p) < trail) { goto _out; } n = p; p += trail - 1; switch(cs) { - //case CS_ - //case CS_ + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); case CS_FLOAT: { union { uint32_t i; float f; } mem; mem.i = _msgpack_load32(uint32_t,n); -- cgit v1.2.1 From 6386481024ec045d9ef991a2c975902276812508 Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Sat, 19 Oct 2013 18:43:16 +0200 Subject: add a note in the README --- README.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.rst b/README.rst index 294cd63..600d7f7 100644 --- a/README.rst +++ b/README.rst @@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. +Extended types +^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types using the msgpack feature +of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now): + +https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py + Advanced unpacking control ^^^^^^^^^^^^^^^^^^^^^^^^^^ -- cgit v1.2.1 From aa68c9b8330b130d600b22ec47d5c3841499b536 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 20 Oct 2013 15:40:20 +0900 Subject: fallback: Support pack_ext_type. --- msgpack/_packer.pyx | 2 +- msgpack/_unpacker.pyx | 2 -- msgpack/fallback.py | 46 +++++++++++++++++++++++++++++++++++++++------- test/test_extension.py | 26 +++++++++++++------------- 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index df670ed..f033263 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -210,7 +210,7 @@ cdef class Packer(object): def handle_unknown_type(self, obj): return None - def pack_extended_type(self, typecode, data): + def pack_ext_type(self, typecode, data): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index cf30670..b0e66db 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -32,8 +32,6 @@ cdef extern from "unpack.h": msgpack_user user PyObject* obj size_t count - unsigned int ct - PyObject* key ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, size_t len, size_t* off) except? -1 diff --git a/msgpack/fallback.py b/msgpack/fallback.py index dfaaa54..0b29700 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -42,11 +42,11 @@ else: newlist_hint = lambda size: [] from msgpack.exceptions import ( - BufferFull, - OutOfData, - UnpackValueError, - PackValueError, - ExtraData) + BufferFull, + OutOfData, + UnpackValueError, + PackValueError, + ExtraData) from msgpack import ExtType @@ -65,6 +65,7 @@ TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -78,6 +79,7 @@ def unpack(stream, **kwargs): raise ExtraData(ret, unpacker._fb_get_extradata()) return ret + def unpackb(packed, **kwargs): """ Unpack an object from `packed`. @@ -95,6 +97,7 @@ def unpackb(packed, **kwargs): raise ExtraData(ret, unpacker._fb_get_extradata()) return ret + class Unpacker(object): """ Streaming unpacker. @@ -548,8 +551,8 @@ class Packer(object): if isinstance(obj, Unicode): if self._encoding is None: raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") + "Can't encode unicode string: " + "no encoding is specified") obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) if n <= 0x1f: @@ -616,6 +619,35 @@ class Packer(object): self._buffer = StringIO(ret) return ret + def pack_ext_type(self, typecode, data): + if not isinstance(typecode, int): + raise TypeError("typecode must have int type.") + if not 0 <= typecode <= 127: + raise ValueError("typecode should be 0-127") + if not isinstance(data, bytes): + raise TypeError("data must have bytes type") + L = len(data) + if L > 0xffffffff: + raise ValueError("Too large data") + if L == 1: + self._buffer.write(b'\xd4') + elif L == 2: + self._buffer.write(b'\xd5') + elif L == 4: + self._buffer.write(b'\xd6') + elif L == 8: + self._buffer.write(b'\xd7') + elif L == 16: + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(b'\xc7' + struct.pack('B', L)) + elif L <= 0xffff: + self._buffer.write(b'\xc8' + struct.pack('>H', L)) + else: + self._buffer.write(b'\xc9' + struct.pack('>I', L)) + self._buffer.write(struct.pack('B', typecode)) + self._buffer.write(data) + def _fb_pack_array_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x90 + n)) diff --git a/test/test_extension.py b/test/test_extension.py index 94117e1..f2fa363 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,21 +1,21 @@ -import py import array -import struct import msgpack -def test_pack_extended_type(): + +def test_pack_ext_type(): def p(s): packer = msgpack.Packer() - packer.pack_extended_type(0x42, s) + packer.pack_ext_type(0x42, s) return packer.bytes() - assert p('A') == '\xd4\x42A' # fixext 1 - assert p('AB') == '\xd5\x42AB' # fixext 2 - assert p('ABCD') == '\xd6\x42ABCD' # fixext 4 - assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8 - assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16 - assert p('ABC') == '\xc7\x03\x42ABC' # ext 8 - assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16 - assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32 + assert p(b'A') == b'\xd4\x42A' # fixext 1 + assert p(b'AB') == b'\xd5\x42AB' # fixext 2 + assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 + assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 + assert p(b'A'*16) == b'\xd8\x42' + 'A'*16 # fixext 16 + assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 + assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 + assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + def test_unpack_extended_type(): class MyUnpacker(msgpack.Unpacker): @@ -45,7 +45,7 @@ def test_extension_type(): if isinstance(obj, array.array): typecode = 123 # application specific typecode data = obj.tostring() - self.pack_extended_type(typecode, data) + self.pack_ext_type(typecode, data) return True class MyUnpacker(msgpack.Unpacker): -- cgit v1.2.1 From 96bcd76f49afd00f5b7def1ff7cfd002a7fa477d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 20 Oct 2013 20:28:32 +0900 Subject: Packing ExtType and some cleanup --- msgpack/__init__.py | 2 +- msgpack/_packer.pyx | 150 +++++++++++++++++++++++++----------------------- msgpack/_unpacker.pyx | 49 +++++----------- msgpack/pack_template.h | 3 +- msgpack/unpack.h | 10 ++-- test/test_extension.py | 75 +++++++++++------------- test/test_sequnpack.py | 13 ----- 7 files changed, 132 insertions(+), 170 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 79107b6..a7b47b1 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -26,6 +26,7 @@ def pack(o, stream, **kwargs): packer = Packer(**kwargs) stream.write(packer.pack(o)) + def packb(o, **kwargs): """ Pack object `o` and return packed bytes @@ -40,4 +41,3 @@ loads = unpackb dump = pack dumps = packb - diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f033263..f63667c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -8,6 +8,8 @@ from libc.limits cimport * from libc.stdint cimport int8_t from msgpack.exceptions import PackValueError +from msgpack import ExtType + cdef extern from "pack.h": struct msgpack_packer: @@ -120,80 +122,87 @@ cdef class Packer(object): cdef int ret cdef dict d cdef size_t L + cdef int default_used = 0 if nest_limit < 0: raise PackValueError("recursion limit exceeded.") - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) - else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): - rawval = o - L = len(o) - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + rawval = o + L = len(o) + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = o + ret = msgpack_pack_map(&self.pk, len(d)) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + ret = msgpack_pack_map(&self.pk, len(o)) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o[0] + rawval = o[1] + L = len(o[1]) + ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_CheckExact(o): - d = o - ret = msgpack_pack_map(&self.pk, len(d)) - if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyDict_Check(o): - ret = msgpack_pack_map(&self.pk, len(o)) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyTuple_Check(o) or PyList_Check(o): - ret = msgpack_pack_array(&self.pk, len(o)) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif self.handle_unknown_type(o): - # it means that obj was succesfully packed, so we are done - return 0 - elif self._default: - o = self._default(o) - ret = self._pack(o, nest_limit-1) - else: - raise TypeError("can't serialize %r" % (o,)) - return ret + elif PyTuple_Check(o) or PyList_Check(o): + ret = msgpack_pack_array(&self.pk, len(o)) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + else: + raise TypeError("can't serialize %r" % (o,)) + return ret cpdef pack(self, object obj): cdef int ret @@ -207,9 +216,6 @@ cdef class Packer(object): self.pk.length = 0 return buf - def handle_unknown_type(self, obj): - return None - def pack_ext_type(self, typecode, data): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index b0e66db..d5aa46e 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -16,6 +16,7 @@ from msgpack.exceptions import ( UnpackValueError, ExtraData, ) +from msgpack import ExtType cdef extern from "unpack.h": @@ -24,7 +25,7 @@ cdef extern from "unpack.h": PyObject* object_hook bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook - PyObject* ext_type_hook + PyObject* ext_hook char *encoding char *unicode_errors @@ -43,8 +44,8 @@ cdef extern from "unpack.h": object unpack_data(unpack_context* ctx) cdef inline init_ctx(unpack_context *ctx, - object object_hook, object object_pairs_hook, object list_hook, - object ext_type_hook, + object object_hook, object object_pairs_hook, + object list_hook, object ext_hook, bint use_list, char* encoding, char* unicode_errors): unpack_init(ctx) ctx.user.use_list = use_list @@ -71,10 +72,10 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("list_hook must be a callable.") ctx.user.list_hook = list_hook - if ext_type_hook is not None: - if not PyCallable_Check(ext_type_hook): - raise TypeError("ext_type_hook must be a callable.") - ctx.user.ext_type_hook = ext_type_hook + if ext_hook is not None: + if not PyCallable_Check(ext_hook): + raise TypeError("ext_hook must be a callable.") + ctx.user.ext_hook = ext_hook ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors @@ -84,8 +85,7 @@ def default_read_extended_type(typecode, data): def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, - ): + object_pairs_hook=None, ext_hook=ExtType): """ Unpack packed_bytes to object. Returns an unpacked object. @@ -114,8 +114,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, unicode_errors = unicode_errors.encode('ascii') cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type, - use_list, cenc, cerr) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr) ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) @@ -220,7 +220,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, object object_hook=None, object object_pairs_hook=None, object list_hook=None, str encoding=None, str unicode_errors='strict', int max_buffer_size=0, - ): + object ext_hook=ExtType): cdef char *cenc=NULL, *cerr=NULL self.file_like = file_like @@ -257,10 +257,8 @@ cdef class Unpacker(object): self.unicode_errors = unicode_errors cerr = PyBytes_AsString(self.unicode_errors) - ext_type_hook = self.read_extended_type - Py_INCREF(ext_type_hook) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_type_hook, use_list, cenc, cerr) + ext_hook, use_list, cenc, cerr) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" @@ -370,24 +368,6 @@ cdef class Unpacker(object): """ return self._unpack(unpack_construct, write_bytes) - def unpack_one(self, object write_bytes=None): - """ - unpack one object - - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - - Raises `UnpackValueError` if there are no more bytes to unpack. - Raises ``ExtraData`` if there are still bytes left after the unpacking. - """ - try: - result = self.unpack() - except OutOfData: - raise UnpackValueError("Data is not enough") - if self.buf_head < self.buf_tail: - raise ExtraData(result, self.buf[self.buf_head:]) - return result - def skip(self, object write_bytes=None): """ read and ignore one object, returning None @@ -415,9 +395,6 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header, write_bytes) - def read_extended_type(self, typecode, data): - return default_read_extended_type(typecode, data) - def __iter__(self): return self diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 0fe9514..8b91619 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -687,7 +687,7 @@ static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) { if (!x->use_bin_type) { - return msgpack_pack_raw(x, l) + return msgpack_pack_raw(x, l); } if (l < 256) { unsigned char buf[2] = {0xc4, (unsigned char)l}; @@ -711,7 +711,6 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t /* * Ext */ - static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l) { if (l == 1) { diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 327a524..3c09747 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,7 +24,7 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; - PyObject *ext_type_hook; + PyObject *ext_hook; const char *encoding; const char *unicode_errors; } unpack_user; @@ -241,12 +241,12 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch { PyObject *py; int8_t typecode = (int8_t)*pos++; - if (!u->ext_type_hook) { - PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL"); + if (!u->ext_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); return -1; } - // lenght also includes the typecode, so the actual data is lenght-1 - py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1); + // length also includes the typecode, so the actual data is lenght-1 + py = PyEval_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1); if (!py) return -1; *o = py; diff --git a/test/test_extension.py b/test/test_extension.py index f2fa363..2f85ce3 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,7 @@ +from __future__ import print_function import array import msgpack +from msgpack import ExtType def test_pack_ext_type(): @@ -11,54 +13,45 @@ def test_pack_ext_type(): assert p(b'AB') == b'\xd5\x42AB' # fixext 2 assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 - assert p(b'A'*16) == b'\xd8\x42' + 'A'*16 # fixext 16 + assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 -def test_unpack_extended_type(): - class MyUnpacker(msgpack.Unpacker): - def read_extended_type(self, typecode, data): - return (typecode, data) +def test_unpack_ext_type(): + def check(b, expected): + assert msgpack.unpackb(b) == expected - def u(s): - unpacker = MyUnpacker() - unpacker.feed(s) - return unpacker.unpack_one() - - assert u('\xd4\x42A') == (0x42, 'A') # fixext 1 - assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2 - assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4 - assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8 - assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16 - assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8 - assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) == - (0x42, 'A'*0x0123)) # ext 16 - assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) == - (0x42, 'A'*0x00012345)) # ext 32 + check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 + check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 + check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 + check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 + check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 + check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 + check(b'\xc8\x01\x23\x42' + b'A'*0x0123, + ExtType(0x42, b'A'*0x0123)) # ext 16 + check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, + ExtType(0x42, b'A'*0x00012345)) # ext 32 def test_extension_type(): - class MyPacker(msgpack.Packer): - def handle_unknown_type(self, obj): - if isinstance(obj, array.array): - typecode = 123 # application specific typecode - data = obj.tostring() - self.pack_ext_type(typecode, data) - return True - - class MyUnpacker(msgpack.Unpacker): - def read_extended_type(self, typecode, data): - assert typecode == 123 - obj = array.array('d') - obj.fromstring(data) - return obj - - obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])] - packer = MyPacker() - unpacker = MyUnpacker(None) - s = packer.pack(obj) - unpacker.feed(s) - obj2 = unpacker.unpack_one() + def default(obj): + print('default called', obj) + if isinstance(obj, array.array): + typecode = 123 # application specific typecode + data = obj.tostring() + return ExtType(typecode, data) + raise TypeError("Unknwon type object %r" % (obj,)) + + def ext_hook(code, data): + print('ext_hook called', code, data) + assert code == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, default=default) + obj2 = msgpack.unpackb(s, ext_hook=ext_hook) assert obj == obj2 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index abc447a..af66b78 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -85,16 +85,3 @@ def test_readbytes(): assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') - -def test_unpack_one(): - unpacker = Unpacker() - unpacker.feed('\xda\x00\x03abc') - assert unpacker.unpack_one() == 'abc' - # - unpacker = Unpacker() - unpacker.feed('\xda\x00\x03abcd') - py.test.raises(ExtraData, "unpacker.unpack_one()") - # - unpacker = Unpacker() - unpacker.feed('\xda\x00\x03ab') - py.test.raises(UnpackValueError, "unpacker.unpack_one()") -- cgit v1.2.1 From 822cce823cfea8e9f7625598a125897718b4ab58 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 20 Oct 2013 22:59:27 +0900 Subject: Support unpacking new types. --- msgpack/unpack.h | 10 +++++++--- msgpack/unpack_define.h | 1 - msgpack/unpack_template.h | 15 +++++++++------ test/test_obj.py | 2 +- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 3c09747..c733b24 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -157,7 +157,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunction(u->list_hook, "(O)", *c); if (!new_c) return -1; Py_DECREF(*c); @@ -203,7 +203,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunction(u->object_hook, "(O)", *c); if (!new_c) return -1; @@ -246,7 +246,11 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch return -1; } // length also includes the typecode, so the actual data is lenght-1 - py = PyEval_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1); +#if PY_MAJOR_VERSION == 2 + py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1); +#else + py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, lenght-1); +#endif if (!py) return -1; *o = py; diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 2ee92b5..0dd708d 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -93,4 +93,3 @@ typedef enum { #endif #endif /* msgpack/unpack_define.h */ - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 1a709ec..7646896 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -178,15 +178,17 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l switch(*p) { case 0xc0: // nil push_simple_value(_nil); - //case 0xc1: // string - // again_terminal_trail(NEXT_CS(p), p+1); + //case 0xc1: // never used case 0xc2: // false push_simple_value(_false); case 0xc3: // true push_simple_value(_true); - //case 0xc4: - //case 0xc5: - //case 0xc6: + case 0xc4: // bin 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc5: // bin 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc6: // bin 32 + again_fixed_trail(NEXT_CS(p), 4); case 0xc7: // ext 8 again_fixed_trail(NEXT_CS(p), 1); case 0xc8: // ext 16 @@ -213,7 +215,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l _ext_zero); case 0xd8: // fixext 16 again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); - //case 0xd9: + case 0xd9: // str 8 + again_fixed_trail(NEXT_CS(p), 1); case 0xda: // raw 16 case 0xdb: // raw 32 case 0xdc: // array 16 diff --git a/test/test_obj.py b/test/test_obj.py index fbf610c..9083218 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -35,7 +35,7 @@ def test_only_one_obj_hook(): unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) def test_bad_hook(): - with raises(ValueError): + with raises(TypeError): packed = packb([3, 1+2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) -- cgit v1.2.1 From 0d5c58bd517caddd6b62a8931f6833e2a3add283 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 20 Oct 2013 23:06:02 +0900 Subject: cleanup --- msgpack/unpack_template.h | 2 +- setup.py | 1 - test/test_sequnpack.py | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 7646896..d34eced 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -322,7 +322,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_MAP_32: /* FIXME security guard */ start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - + default: goto _failed; } diff --git a/setup.py b/setup.py index 83ae79f..1055a61 100644 --- a/setup.py +++ b/setup.py @@ -92,7 +92,6 @@ if not hasattr(sys, 'pypy_version_info'): libraries=libraries, include_dirs=['.'], define_macros=macros, - extra_compile_args=['-O0'], )) del libraries, macros diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index af66b78..f541207 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,10 +1,9 @@ #!/usr/bin/env python # coding: utf-8 -import py import six from msgpack import Unpacker, BufferFull -from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError +from msgpack.exceptions import OutOfData from pytest import raises -- cgit v1.2.1 From 84dc99c894be82b7a8c3708a3554888a6133b33b Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 20 Oct 2013 23:27:32 +0900 Subject: Add ext_type example to README. --- README.rst | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 600d7f7..99fb923 100644 --- a/README.rst +++ b/README.rst @@ -143,10 +143,27 @@ key-value pairs. Extended types ^^^^^^^^^^^^^^^ -It is also possible to pack/unpack custom data types using the msgpack feature -of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now): - -https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py +It is also possible to pack/unpack custom data types using the msgpack 2.0 feature. + + >>> import msgpack + >>> import array + >>> def default(obj): + ... if isinstance(obj, array.array) and obj.typecode == 'd': + ... return msgpack.ExtType(42, obj.tostring()) + ... raise TypeError("Unknown type: %r" % (obj,)) + ... + >>> def ext_hook(code, data): + ... if code == 42: + ... a = array.array('d') + ... a.fromstring(data) + ... return a + ... return ExtType(code, data) + ... + >>> data = array.array('d', [1.2, 3.4]) + >>> packed = msgpack.packb(data, default=default) + >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) + >>> data == unpacked + True Advanced unpacking control -- cgit v1.2.1 From cb789596787592f4ec6bf7dcc0c646e8976b3f16 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 21 Oct 2013 00:01:47 +0900 Subject: Update README. --- README.rst | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 99fb923..c51e518 100644 --- a/README.rst +++ b/README.rst @@ -3,8 +3,8 @@ MessagePack for Python ======================= :author: INADA Naoki -:version: 0.3.0 -:date: 2012-12-07 +:version: 0.4.0 +:date: 2013-10-21 .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png :target: https://travis-ci.org/#!/msgpack/msgpack-python @@ -39,8 +39,40 @@ amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.) Without extension, using pure python implementation on CPython runs slowly. +Notes +----- + +Note for msgpack 2.0 support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +msgpack 2.0 adds two types: *bin* and *ext*. + +*raw* was bytes or string type like Python 2's ``str``. +To distinguish string and bytes, msgpack 2.0 adds *bin*. +It is non-string binary like Python 3's ``bytes``. + +To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to +packer argument. + + >>> import msgpack + >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) + >>> msgpack.unpackb(packed, encoding='utf-8') + ['spam', u'egg'] + +You shoud use it carefully. When you use ``use_bin_type=True``, packed +binary can be unpacked by unpackers supporting msgpack-2.0. + +To use *ext* type, pass ``msgpack.ExtType`` object to packer. + + >>> import msgpack + >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) + >>> msgpack.unpackb(packed) + ExtType(code=42, data='xyzzy') + +You can use it with ``default`` and ``ext_hook``. See below. + Note for msgpack 0.2.x users ----------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The msgpack 0.3 have some incompatible changes. -- cgit v1.2.1 From 37c2ad63af8a6e5cb6944f80d931fedbc6b49e7d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 21 Oct 2013 00:29:05 +0900 Subject: Add tests and bugfix. --- msgpack/pack_template.h | 3 ++- msgpack/unpack.h | 4 ++-- test/test_newspec.py | 23 +++++++++++++++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 8b91619..2879bbd 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -705,7 +705,8 @@ static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) { - msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + return 0; } /* diff --git a/msgpack/unpack.h b/msgpack/unpack.h index c733b24..aced40b 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -157,7 +157,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *new_c = PyObject_CallFunction(u->list_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); if (!new_c) return -1; Py_DECREF(*c); @@ -203,7 +203,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *new_c = PyObject_CallFunction(u->object_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); if (!new_c) return -1; diff --git a/test/test_newspec.py b/test/test_newspec.py index 8bc2cfe..ab05029 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,6 +1,6 @@ # coding: utf-8 -from msgpack import packb, unpackb +from msgpack import packb, unpackb, ExtType def test_str8(): @@ -66,4 +66,23 @@ def test_bin32(): assert b[5:] == data assert unpackb(b) == data - +def test_ext(): + def check(ext, packed): + assert packb(ext) == packed + assert unpackb(packed) == ext + check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 + check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 + check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 + check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 + check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + # ext 8 + check(ExtType(0x42, b''), b'\xc7\x00\x42') + check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + # ext 16 + check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) + check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + # ext 32 + check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + # needs large memory + #check(ExtType(0x42, b'Z'*0xffffffff), + # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) -- cgit v1.2.1 From e3fee4db5fbf1ead4a98fff6c8843574480c3c2a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 21 Oct 2013 00:59:22 +0900 Subject: fallback: support packing ExtType --- msgpack/__init__.py | 26 +++++++- msgpack/_packer.pyx | 5 +- msgpack/fallback.py | 181 ++++++++++++++++++++++++++++++---------------------- 3 files changed, 131 insertions(+), 81 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index a7b47b1..56a0b36 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -2,9 +2,31 @@ from msgpack._version import version from msgpack.exceptions import * -from collections import namedtuple -ExtType = namedtuple('ExtType', 'code data') +class ExtType(object): + __slots__ = ('code', 'data') + + def __init__(self, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + self.code = code + self.data = data + + def __eq__(self, other): + if not isinstance(other, ExtType): + return NotImplemented + return self.code == other.code and self.data == other.data + + def __hash__(self): + return self.code ^ hash(self.data) + + def __repr__(self): + return "msgpack.ExtType(%r, %r)" % (self.code, self.data) + import os if os.environ.get('MSGPACK_PUREPYTHON'): diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f63667c..f2d058e 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -37,7 +37,6 @@ cdef extern from "pack.h": cdef int DEFAULT_RECURSE_LIMIT=511 - cdef class Packer(object): """ MessagePack Packer @@ -185,8 +184,8 @@ cdef class Packer(object): if ret != 0: break elif isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. - longval = o[0] - rawval = o[1] + longval = o.code + rawval = o.data L = len(o[1]) ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 0b29700..bf5b1c2 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -506,82 +506,111 @@ class Packer(object): self._default = default def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): - if nest_limit < 0: - raise PackValueError("recursion limit exceeded") - if obj is None: - return self._buffer.write(b"\xc0") - if isinstance(obj, bool): - if obj: - return self._buffer.write(b"\xc3") - return self._buffer.write(b"\xc2") - if isinstance(obj, int_types): - if 0 <= obj < 0x80: - return self._buffer.write(struct.pack("B", obj)) - if -0x20 <= obj < 0: - return self._buffer.write(struct.pack("b", obj)) - if 0x80 <= obj <= 0xff: - return self._buffer.write(struct.pack("BB", 0xcc, obj)) - if -0x80 <= obj < 0: - return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) - if 0xff < obj <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xcd, obj)) - if -0x8000 <= obj < -0x80: - return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) - if 0xffff < obj <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xce, obj)) - if -0x80000000 <= obj < -0x8000: - return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) - if 0xffffffff < obj <= 0xffffffffffffffff: - return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) - if -0x8000000000000000 <= obj < -0x80000000: - return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) - raise PackValueError("Integer value out of range") - if self._use_bin_type and isinstance(obj, bytes): - n = len(obj) - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: - raise PackValueError("Bytes is too large") - return self._buffer.write(obj) - if isinstance(obj, (Unicode, bytes)): - if isinstance(obj, Unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) - n = len(obj) - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: - raise PackValueError("String is too large") - return self._buffer.write(obj) - if isinstance(obj, float): - if self._use_float: - return self._buffer.write(struct.pack(">Bf", 0xca, obj)) - return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if isinstance(obj, (list, tuple)): - n = len(obj) - self._fb_pack_array_header(n) - for i in xrange(n): - self._pack(obj[i], nest_limit - 1) - return - if isinstance(obj, dict): - return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), - nest_limit - 1) - if self._default is not None: - return self._pack(self._default(obj), nest_limit - 1) - raise TypeError("Cannot serialize %r" % obj) + default_used = False + while True: + if nest_limit < 0: + raise PackValueError("recursion limit exceeded") + if obj is None: + return self._buffer.write(b"\xc0") + if isinstance(obj, bool): + if obj: + return self._buffer.write(b"\xc3") + return self._buffer.write(b"\xc2") + if isinstance(obj, int_types): + if 0 <= obj < 0x80: + return self._buffer.write(struct.pack("B", obj)) + if -0x20 <= obj < 0: + return self._buffer.write(struct.pack("b", obj)) + if 0x80 <= obj <= 0xff: + return self._buffer.write(struct.pack("BB", 0xcc, obj)) + if -0x80 <= obj < 0: + return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) + if 0xff < obj <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xcd, obj)) + if -0x8000 <= obj < -0x80: + return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) + if 0xffff < obj <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xce, obj)) + if -0x80000000 <= obj < -0x8000: + return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) + if 0xffffffff < obj <= 0xffffffffffffffff: + return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) + if -0x8000000000000000 <= obj < -0x80000000: + return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + raise PackValueError("Integer value out of range") + if self._use_bin_type and isinstance(obj, bytes): + n = len(obj) + if n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError("Bytes is too large") + return self._buffer.write(obj) + if isinstance(obj, (Unicode, bytes)): + if isinstance(obj, Unicode): + if self._encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") + obj = obj.encode(self._encoding, self._unicode_errors) + n = len(obj) + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError("String is too large") + return self._buffer.write(obj) + if isinstance(obj, float): + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xca, obj)) + return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) + if isinstance(obj, ExtType): + code = obj.code + data = obj.data + assert isinstance(code, int) + assert isinstance(data, bytes) + L = len(data) + if L == 1: + self._buffer.write(b'\xd4') + elif L == 2: + self._buffer.write(b'\xd5') + elif L == 4: + self._buffer.write(b'\xd6') + elif L == 8: + self._buffer.write(b'\xd7') + elif L == 16: + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(struct.pack(">BB", 0xc7, L)) + elif L <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc8, L)) + else: + self._buffer.write(struct.pack(">BI", 0xc9, L)) + self._buffer.write(struct.pack("b", code)) + self._buffer.write(data) + return + if isinstance(obj, (list, tuple)): + n = len(obj) + self._fb_pack_array_header(n) + for i in xrange(n): + self._pack(obj[i], nest_limit - 1) + return + if isinstance(obj, dict): + return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), + nest_limit - 1) + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = 1 + continue + raise TypeError("Cannot serialize %r" % obj) def pack(self, obj): self._pack(obj) -- cgit v1.2.1 From d84a403bc0bbbb36c4a5833e00269eef6c4a91ae Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 21 Oct 2013 01:12:57 +0900 Subject: fix bugs. --- msgpack/__init__.py | 20 ++++---------------- msgpack/_packer.pyx | 2 +- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 56a0b36..a958025 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -2,30 +2,18 @@ from msgpack._version import version from msgpack.exceptions import * +from collections import namedtuple -class ExtType(object): - __slots__ = ('code', 'data') - def __init__(self, code, data): +class ExtType(namedtuple('ExtType', 'code data')): + def __new__(cls, code, data): if not isinstance(code, int): raise TypeError("code must be int") if not isinstance(data, bytes): raise TypeError("data must be bytes") if not 0 <= code <= 127: raise ValueError("code must be 0~127") - self.code = code - self.data = data - - def __eq__(self, other): - if not isinstance(other, ExtType): - return NotImplemented - return self.code == other.code and self.data == other.data - - def __hash__(self): - return self.code ^ hash(self.data) - - def __repr__(self): - return "msgpack.ExtType(%r, %r)" % (self.code, self.data) + return super(ExtType, cls).__new__(cls, code, data) import os diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f2d058e..f261f08 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -186,7 +186,7 @@ cdef class Packer(object): # This should be before Tuple because ExtType is namedtuple. longval = o.code rawval = o.data - L = len(o[1]) + L = len(o.data) ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyTuple_Check(o) or PyList_Check(o): -- cgit v1.2.1