diff options
-rw-r--r-- | README.rst | 63 | ||||
-rw-r--r-- | msgpack/__init__.py | 14 | ||||
-rw-r--r-- | msgpack/_packer.pyx | 151 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 26 | ||||
-rw-r--r-- | msgpack/fallback.py | 219 | ||||
-rw-r--r-- | msgpack/pack.h | 2 | ||||
-rw-r--r-- | msgpack/pack_template.h | 64 | ||||
-rw-r--r-- | msgpack/unpack.h | 26 | ||||
-rw-r--r-- | msgpack/unpack_define.h | 15 | ||||
-rw-r--r-- | msgpack/unpack_template.h | 74 | ||||
-rw-r--r-- | test/test_extension.py | 57 | ||||
-rw-r--r-- | test/test_newspec.py | 23 | ||||
-rw-r--r-- | test/test_obj.py | 2 | ||||
-rw-r--r-- | test/test_sequnpack.py | 1 |
14 files changed, 530 insertions, 207 deletions
@@ -3,8 +3,8 @@ MessagePack for Python ======================= :author: INADA Naoki -:version: 0.3.0 -:date: 2012-12-07 +:version: 0.4.0 +:date: 2013-10-21 .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png :target: https://travis-ci.org/#!/msgpack/msgpack-python @@ -39,8 +39,40 @@ amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.) Without extension, using pure python implementation on CPython runs slowly. +Notes +----- + +Note for msgpack 2.0 support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +msgpack 2.0 adds two types: *bin* and *ext*. + +*raw* was bytes or string type like Python 2's ``str``. +To distinguish string and bytes, msgpack 2.0 adds *bin*. +It is non-string binary like Python 3's ``bytes``. + +To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to +packer argument. + + >>> import msgpack + >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) + >>> msgpack.unpackb(packed, encoding='utf-8') + ['spam', u'egg'] + +You shoud use it carefully. When you use ``use_bin_type=True``, packed +binary can be unpacked by unpackers supporting msgpack-2.0. + +To use *ext* type, pass ``msgpack.ExtType`` object to packer. + + >>> import msgpack + >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) + >>> msgpack.unpackb(packed) + ExtType(code=42, data='xyzzy') + +You can use it with ``default`` and ``ext_hook``. See below. + Note for msgpack 0.2.x users ----------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The msgpack 0.3 have some incompatible changes. @@ -140,6 +172,31 @@ It is also possible to pack/unpack custom data types. Here is an example for ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. +Extended types +^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types using the msgpack 2.0 feature. + + >>> import msgpack + >>> import array + >>> def default(obj): + ... if isinstance(obj, array.array) and obj.typecode == 'd': + ... return msgpack.ExtType(42, obj.tostring()) + ... raise TypeError("Unknown type: %r" % (obj,)) + ... + >>> def ext_hook(code, data): + ... if code == 42: + ... a = array.array('d') + ... a.fromstring(data) + ... return a + ... return ExtType(code, data) + ... + >>> data = array.array('d', [1.2, 3.4]) + >>> packed = msgpack.packb(data, default=default) + >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) + >>> data == unpacked + True + Advanced unpacking control ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 79107b6..a958025 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,7 +4,17 @@ from msgpack.exceptions import * from collections import namedtuple -ExtType = namedtuple('ExtType', 'code data') + +class ExtType(namedtuple('ExtType', 'code data')): + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super(ExtType, cls).__new__(cls, code, data) + import os if os.environ.get('MSGPACK_PUREPYTHON'): @@ -26,6 +36,7 @@ def pack(o, stream, **kwargs): packer = Packer(**kwargs) stream.write(packer.pack(o)) + def packb(o, **kwargs): """ Pack object `o` and return packed bytes @@ -40,4 +51,3 @@ loads = unpackb dump = pack dumps = packb - diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 7082445..f261f08 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -5,8 +5,11 @@ from cpython cimport * from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * +from libc.stdint cimport int8_t from msgpack.exceptions import PackValueError +from msgpack import ExtType + cdef extern from "pack.h": struct msgpack_packer: @@ -29,11 +32,11 @@ cdef extern from "pack.h": int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 - cdef class Packer(object): """ MessagePack Packer @@ -118,77 +121,87 @@ cdef class Packer(object): cdef int ret cdef dict d cdef size_t L + cdef int default_used = 0 if nest_limit < 0: raise PackValueError("recursion limit exceeded.") - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) - else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): - rawval = o - L = len(o) - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + rawval = o + L = len(o) + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = <dict>o + ret = msgpack_pack_map(&self.pk, len(d)) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + ret = msgpack_pack_map(&self.pk, len(o)) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o.code + rawval = o.data + L = len(o.data) + ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_CheckExact(o): - d = <dict>o - ret = msgpack_pack_map(&self.pk, len(d)) - if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyDict_Check(o): - ret = msgpack_pack_map(&self.pk, len(o)) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyTuple_Check(o) or PyList_Check(o): - ret = msgpack_pack_array(&self.pk, len(o)) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif self._default: - o = self._default(o) - ret = self._pack(o, nest_limit-1) - else: - raise TypeError("can't serialize %r" % (o,)) - return ret + elif PyTuple_Check(o) or PyList_Check(o): + ret = msgpack_pack_array(&self.pk, len(o)) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + else: + raise TypeError("can't serialize %r" % (o,)) + return ret cpdef pack(self, object obj): cdef int ret @@ -202,6 +215,10 @@ cdef class Packer(object): self.pk.length = 0 return buf + def pack_ext_type(self, typecode, data): + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + def pack_array_header(self, size_t size): cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 7b0c8a6..d5aa46e 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -16,6 +16,7 @@ from msgpack.exceptions import ( UnpackValueError, ExtraData, ) +from msgpack import ExtType cdef extern from "unpack.h": @@ -24,6 +25,7 @@ cdef extern from "unpack.h": PyObject* object_hook bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook + PyObject* ext_hook char *encoding char *unicode_errors @@ -31,8 +33,6 @@ cdef extern from "unpack.h": msgpack_user user PyObject* obj size_t count - unsigned int ct - PyObject* key ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, size_t len, size_t* off) except? -1 @@ -44,7 +44,8 @@ cdef extern from "unpack.h": object unpack_data(unpack_context* ctx) cdef inline init_ctx(unpack_context *ctx, - object object_hook, object object_pairs_hook, object list_hook, + object object_hook, object object_pairs_hook, + object list_hook, object ext_hook, bint use_list, char* encoding, char* unicode_errors): unpack_init(ctx) ctx.user.use_list = use_list @@ -71,13 +72,20 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("list_hook must be a callable.") ctx.user.list_hook = <PyObject*>list_hook + if ext_hook is not None: + if not PyCallable_Check(ext_hook): + raise TypeError("ext_hook must be a callable.") + ctx.user.ext_hook = <PyObject*>ext_hook + ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, - ): + object_pairs_hook=None, ext_hook=ExtType): """ Unpack packed_bytes to object. Returns an unpacked object. @@ -106,7 +114,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, unicode_errors = unicode_errors.encode('ascii') cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr) ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) @@ -211,7 +220,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, object object_hook=None, object object_pairs_hook=None, object list_hook=None, str encoding=None, str unicode_errors='strict', int max_buffer_size=0, - ): + object ext_hook=ExtType): cdef char *cenc=NULL, *cerr=NULL self.file_like = file_like @@ -248,7 +257,8 @@ cdef class Unpacker(object): self.unicode_errors = unicode_errors cerr = PyBytes_AsString(self.unicode_errors) - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_hook, use_list, cenc, cerr) def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" diff --git a/msgpack/fallback.py b/msgpack/fallback.py index dfaaa54..bf5b1c2 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -42,11 +42,11 @@ else: newlist_hint = lambda size: [] from msgpack.exceptions import ( - BufferFull, - OutOfData, - UnpackValueError, - PackValueError, - ExtraData) + BufferFull, + OutOfData, + UnpackValueError, + PackValueError, + ExtraData) from msgpack import ExtType @@ -65,6 +65,7 @@ TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -78,6 +79,7 @@ def unpack(stream, **kwargs): raise ExtraData(ret, unpacker._fb_get_extradata()) return ret + def unpackb(packed, **kwargs): """ Unpack an object from `packed`. @@ -95,6 +97,7 @@ def unpackb(packed, **kwargs): raise ExtraData(ret, unpacker._fb_get_extradata()) return ret + class Unpacker(object): """ Streaming unpacker. @@ -503,82 +506,111 @@ class Packer(object): self._default = default def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): - if nest_limit < 0: - raise PackValueError("recursion limit exceeded") - if obj is None: - return self._buffer.write(b"\xc0") - if isinstance(obj, bool): - if obj: - return self._buffer.write(b"\xc3") - return self._buffer.write(b"\xc2") - if isinstance(obj, int_types): - if 0 <= obj < 0x80: - return self._buffer.write(struct.pack("B", obj)) - if -0x20 <= obj < 0: - return self._buffer.write(struct.pack("b", obj)) - if 0x80 <= obj <= 0xff: - return self._buffer.write(struct.pack("BB", 0xcc, obj)) - if -0x80 <= obj < 0: - return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) - if 0xff < obj <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xcd, obj)) - if -0x8000 <= obj < -0x80: - return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) - if 0xffff < obj <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xce, obj)) - if -0x80000000 <= obj < -0x8000: - return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) - if 0xffffffff < obj <= 0xffffffffffffffff: - return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) - if -0x8000000000000000 <= obj < -0x80000000: - return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) - raise PackValueError("Integer value out of range") - if self._use_bin_type and isinstance(obj, bytes): - n = len(obj) - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: - raise PackValueError("Bytes is too large") - return self._buffer.write(obj) - if isinstance(obj, (Unicode, bytes)): - if isinstance(obj, Unicode): - if self._encoding is None: - raise TypeError( + default_used = False + while True: + if nest_limit < 0: + raise PackValueError("recursion limit exceeded") + if obj is None: + return self._buffer.write(b"\xc0") + if isinstance(obj, bool): + if obj: + return self._buffer.write(b"\xc3") + return self._buffer.write(b"\xc2") + if isinstance(obj, int_types): + if 0 <= obj < 0x80: + return self._buffer.write(struct.pack("B", obj)) + if -0x20 <= obj < 0: + return self._buffer.write(struct.pack("b", obj)) + if 0x80 <= obj <= 0xff: + return self._buffer.write(struct.pack("BB", 0xcc, obj)) + if -0x80 <= obj < 0: + return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) + if 0xff < obj <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xcd, obj)) + if -0x8000 <= obj < -0x80: + return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) + if 0xffff < obj <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xce, obj)) + if -0x80000000 <= obj < -0x8000: + return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) + if 0xffffffff < obj <= 0xffffffffffffffff: + return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) + if -0x8000000000000000 <= obj < -0x80000000: + return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + raise PackValueError("Integer value out of range") + if self._use_bin_type and isinstance(obj, bytes): + n = len(obj) + if n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError("Bytes is too large") + return self._buffer.write(obj) + if isinstance(obj, (Unicode, bytes)): + if isinstance(obj, Unicode): + if self._encoding is None: + raise TypeError( "Can't encode unicode string: " "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) - n = len(obj) - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: - raise PackValueError("String is too large") - return self._buffer.write(obj) - if isinstance(obj, float): - if self._use_float: - return self._buffer.write(struct.pack(">Bf", 0xca, obj)) - return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if isinstance(obj, (list, tuple)): - n = len(obj) - self._fb_pack_array_header(n) - for i in xrange(n): - self._pack(obj[i], nest_limit - 1) - return - if isinstance(obj, dict): - return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), - nest_limit - 1) - if self._default is not None: - return self._pack(self._default(obj), nest_limit - 1) - raise TypeError("Cannot serialize %r" % obj) + obj = obj.encode(self._encoding, self._unicode_errors) + n = len(obj) + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError("String is too large") + return self._buffer.write(obj) + if isinstance(obj, float): + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xca, obj)) + return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) + if isinstance(obj, ExtType): + code = obj.code + data = obj.data + assert isinstance(code, int) + assert isinstance(data, bytes) + L = len(data) + if L == 1: + self._buffer.write(b'\xd4') + elif L == 2: + self._buffer.write(b'\xd5') + elif L == 4: + self._buffer.write(b'\xd6') + elif L == 8: + self._buffer.write(b'\xd7') + elif L == 16: + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(struct.pack(">BB", 0xc7, L)) + elif L <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc8, L)) + else: + self._buffer.write(struct.pack(">BI", 0xc9, L)) + self._buffer.write(struct.pack("b", code)) + self._buffer.write(data) + return + if isinstance(obj, (list, tuple)): + n = len(obj) + self._fb_pack_array_header(n) + for i in xrange(n): + self._pack(obj[i], nest_limit - 1) + return + if isinstance(obj, dict): + return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), + nest_limit - 1) + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = 1 + continue + raise TypeError("Cannot serialize %r" % obj) def pack(self, obj): self._pack(obj) @@ -616,6 +648,35 @@ class Packer(object): self._buffer = StringIO(ret) return ret + def pack_ext_type(self, typecode, data): + if not isinstance(typecode, int): + raise TypeError("typecode must have int type.") + if not 0 <= typecode <= 127: + raise ValueError("typecode should be 0-127") + if not isinstance(data, bytes): + raise TypeError("data must have bytes type") + L = len(data) + if L > 0xffffffff: + raise ValueError("Too large data") + if L == 1: + self._buffer.write(b'\xd4') + elif L == 2: + self._buffer.write(b'\xd5') + elif L == 4: + self._buffer.write(b'\xd6') + elif L == 8: + self._buffer.write(b'\xd7') + elif L == 16: + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(b'\xc7' + struct.pack('B', L)) + elif L <= 0xffff: + self._buffer.write(b'\xc8' + struct.pack('>H', L)) + else: + self._buffer.write(b'\xc9' + struct.pack('>I', L)) + self._buffer.write(struct.pack('B', typecode)) + self._buffer.write(data) + def _fb_pack_array_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x90 + n)) diff --git a/msgpack/pack.h b/msgpack/pack.h index 001a0c1..a71c87b 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); +static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index d228d7a..2879bbd 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -687,7 +687,7 @@ static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) { if (!x->use_bin_type) { - return msgpack_pack_raw(x, l) + return msgpack_pack_raw(x, l); } if (l < 256) { unsigned char buf[2] = {0xc4, (unsigned char)l}; @@ -705,9 +705,69 @@ static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) { - msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + return 0; } +/* + * Ext + */ +static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + + + #undef msgpack_pack_append_buffer #undef TAKE8_8 diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 03c735e..aced40b 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,6 +24,7 @@ typedef struct unpack_user { PyObject *object_hook; bool has_pairs_hook; PyObject *list_hook; + PyObject *ext_hook; const char *encoding; const char *unicode_errors; } unpack_user; @@ -156,7 +157,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); if (!new_c) return -1; Py_DECREF(*c); @@ -202,7 +203,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); if (!new_c) return -1; @@ -235,4 +236,25 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* return 0; } +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int lenght, msgpack_unpack_object* o) +{ + PyObject *py; + int8_t typecode = (int8_t)*pos++; + if (!u->ext_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); + return -1; + } + // length also includes the typecode, so the actual data is lenght-1 +#if PY_MAJOR_VERSION == 2 + py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1); +#else + py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, lenght-1); +#endif + if (!py) + return -1; + *o = py; + return 0; +} + #include "unpack_template.h" diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 0b14f52..0dd708d 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -45,10 +45,11 @@ typedef enum { CS_BIN_8 = 0x04, CS_BIN_16 = 0x05, CS_BIN_32 = 0x06, - //CS_ = 0x07, - //CS_ = 0x08, - //CS_ = 0x09, + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, + CS_FLOAT = 0x0a, CS_DOUBLE = 0x0b, CS_UINT_8 = 0x0c, @@ -60,6 +61,12 @@ typedef enum { CS_INT_32 = 0x12, CS_INT_64 = 0x13, + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, + CS_RAW_8 = 0x19, CS_RAW_16 = 0x1a, CS_RAW_32 = 0x1b, @@ -70,6 +77,7 @@ typedef enum { ACS_RAW_VALUE, ACS_BIN_VALUE, + ACS_EXT_VALUE, } msgpack_unpack_state; @@ -85,4 +93,3 @@ typedef enum { #endif #endif /* msgpack/unpack_define.h */ - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 25229ac..d34eced 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -178,15 +178,23 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l switch(*p) { case 0xc0: // nil push_simple_value(_nil); - //case 0xc1: // string - // again_terminal_trail(NEXT_CS(p), p+1); + //case 0xc1: // never used case 0xc2: // false push_simple_value(_false); case 0xc3: // true push_simple_value(_true); - //case 0xc7: - //case 0xc8: - //case 0xc9: + case 0xc4: // bin 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc5: // bin 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc6: // bin 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); case 0xca: // float case 0xcb: // double case 0xcc: // unsigned int 8 @@ -198,15 +206,17 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case 0xd2: // signed int 32 case 0xd3: // signed int 64 again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - case 0xc4: // bin 8 - case 0xc5: // bin 16 - case 0xc6: // bin 32 - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - case 0xd9: // raw 8 + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + case 0xd9: // str 8 + again_fixed_trail(NEXT_CS(p), 1); case 0xda: // raw 16 case 0xdb: // raw 32 case 0xdc: // array 16 @@ -237,8 +247,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l if((size_t)(pe - p) < trail) { goto _out; } n = p; p += trail - 1; switch(cs) { - //case CS_ - //case CS_ + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); case CS_FLOAT: { union { uint32_t i; float f; } mem; mem.i = _msgpack_load32(uint32_t,n); @@ -269,26 +287,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l case CS_INT_64: push_fixed_value(_int64, _msgpack_load64(int64_t,n)); - //case CS_ - //case CS_ - //case CS_BIG_INT_16: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero); - //case CS_BIG_INT_32: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero); - //case ACS_BIG_INT_VALUE: - //_big_int_zero: - // // FIXME - // push_variable_value(_big_int, data, n, trail); - - //case CS_BIG_FLOAT_16: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero); - //case CS_BIG_FLOAT_32: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero); - //case ACS_BIG_FLOAT_VALUE: - //_big_float_zero: - // // FIXME - // push_variable_value(_big_float, data, n, trail); - case CS_BIN_8: again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); case CS_BIN_16: @@ -309,6 +307,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l _raw_zero: push_variable_value(_raw, data, n, trail); + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + case CS_ARRAY_16: start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); case CS_ARRAY_32: diff --git a/test/test_extension.py b/test/test_extension.py new file mode 100644 index 0000000..2f85ce3 --- /dev/null +++ b/test/test_extension.py @@ -0,0 +1,57 @@ +from __future__ import print_function +import array +import msgpack +from msgpack import ExtType + + +def test_pack_ext_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_ext_type(0x42, s) + return packer.bytes() + assert p(b'A') == b'\xd4\x42A' # fixext 1 + assert p(b'AB') == b'\xd5\x42AB' # fixext 2 + assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 + assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 + assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 + assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 + assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 + assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + + +def test_unpack_ext_type(): + def check(b, expected): + assert msgpack.unpackb(b) == expected + + check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 + check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 + check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 + check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 + check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 + check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 + check(b'\xc8\x01\x23\x42' + b'A'*0x0123, + ExtType(0x42, b'A'*0x0123)) # ext 16 + check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, + ExtType(0x42, b'A'*0x00012345)) # ext 32 + + +def test_extension_type(): + def default(obj): + print('default called', obj) + if isinstance(obj, array.array): + typecode = 123 # application specific typecode + data = obj.tostring() + return ExtType(typecode, data) + raise TypeError("Unknwon type object %r" % (obj,)) + + def ext_hook(code, data): + print('ext_hook called', code, data) + assert code == 123 + obj = array.array('d') + obj.fromstring(data) + return obj + + obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, default=default) + obj2 = msgpack.unpackb(s, ext_hook=ext_hook) + assert obj == obj2 diff --git a/test/test_newspec.py b/test/test_newspec.py index 8bc2cfe..ab05029 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,6 +1,6 @@ # coding: utf-8 -from msgpack import packb, unpackb +from msgpack import packb, unpackb, ExtType def test_str8(): @@ -66,4 +66,23 @@ def test_bin32(): assert b[5:] == data assert unpackb(b) == data - +def test_ext(): + def check(ext, packed): + assert packb(ext) == packed + assert unpackb(packed) == ext + check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 + check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 + check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 + check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 + check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + # ext 8 + check(ExtType(0x42, b''), b'\xc7\x00\x42') + check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + # ext 16 + check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) + check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + # ext 32 + check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + # needs large memory + #check(ExtType(0x42, b'Z'*0xffffffff), + # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/test/test_obj.py b/test/test_obj.py index fbf610c..9083218 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -35,7 +35,7 @@ def test_only_one_obj_hook(): unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) def test_bad_hook(): - with raises(ValueError): + with raises(TypeError): packed = packb([3, 1+2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9db14ca..f541207 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -84,4 +84,3 @@ def test_readbytes(): assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') - |