summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorINADA Naoki <songofacandy@gmail.com>2013-10-20 09:18:50 -0700
committerINADA Naoki <songofacandy@gmail.com>2013-10-20 09:18:50 -0700
commite802abebf18f0126aaad0c7bdf8ba0042cb4602d (patch)
tree67e86c2d28bce248448c8bb92ba196c1381c5d12
parentec0691fb2c7ca28eb4544b98dcb5e59933233997 (diff)
parentd84a403bc0bbbb36c4a5833e00269eef6c4a91ae (diff)
downloadmsgpack-python-e802abebf18f0126aaad0c7bdf8ba0042cb4602d.tar.gz
Merge pull request #79 from msgpack/newspec
[WIP] Newspec stage 2.
-rw-r--r--README.rst63
-rw-r--r--msgpack/__init__.py14
-rw-r--r--msgpack/_packer.pyx151
-rw-r--r--msgpack/_unpacker.pyx26
-rw-r--r--msgpack/fallback.py219
-rw-r--r--msgpack/pack.h2
-rw-r--r--msgpack/pack_template.h64
-rw-r--r--msgpack/unpack.h26
-rw-r--r--msgpack/unpack_define.h15
-rw-r--r--msgpack/unpack_template.h74
-rw-r--r--test/test_extension.py57
-rw-r--r--test/test_newspec.py23
-rw-r--r--test/test_obj.py2
-rw-r--r--test/test_sequnpack.py1
14 files changed, 530 insertions, 207 deletions
diff --git a/README.rst b/README.rst
index 294cd63..c51e518 100644
--- a/README.rst
+++ b/README.rst
@@ -3,8 +3,8 @@ MessagePack for Python
=======================
:author: INADA Naoki
-:version: 0.3.0
-:date: 2012-12-07
+:version: 0.4.0
+:date: 2013-10-21
.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png
:target: https://travis-ci.org/#!/msgpack/msgpack-python
@@ -39,8 +39,40 @@ amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.)
Without extension, using pure python implementation on CPython runs slowly.
+Notes
+-----
+
+Note for msgpack 2.0 support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+msgpack 2.0 adds two types: *bin* and *ext*.
+
+*raw* was bytes or string type like Python 2's ``str``.
+To distinguish string and bytes, msgpack 2.0 adds *bin*.
+It is non-string binary like Python 3's ``bytes``.
+
+To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to
+packer argument.
+
+ >>> import msgpack
+ >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True)
+ >>> msgpack.unpackb(packed, encoding='utf-8')
+ ['spam', u'egg']
+
+You shoud use it carefully. When you use ``use_bin_type=True``, packed
+binary can be unpacked by unpackers supporting msgpack-2.0.
+
+To use *ext* type, pass ``msgpack.ExtType`` object to packer.
+
+ >>> import msgpack
+ >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
+ >>> msgpack.unpackb(packed)
+ ExtType(code=42, data='xyzzy')
+
+You can use it with ``default`` and ``ext_hook``. See below.
+
Note for msgpack 0.2.x users
-----------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The msgpack 0.3 have some incompatible changes.
@@ -140,6 +172,31 @@ It is also possible to pack/unpack custom data types. Here is an example for
``object_pairs_hook`` callback may instead be used to receive a list of
key-value pairs.
+Extended types
+^^^^^^^^^^^^^^^
+
+It is also possible to pack/unpack custom data types using the msgpack 2.0 feature.
+
+ >>> import msgpack
+ >>> import array
+ >>> def default(obj):
+ ... if isinstance(obj, array.array) and obj.typecode == 'd':
+ ... return msgpack.ExtType(42, obj.tostring())
+ ... raise TypeError("Unknown type: %r" % (obj,))
+ ...
+ >>> def ext_hook(code, data):
+ ... if code == 42:
+ ... a = array.array('d')
+ ... a.fromstring(data)
+ ... return a
+ ... return ExtType(code, data)
+ ...
+ >>> data = array.array('d', [1.2, 3.4])
+ >>> packed = msgpack.packb(data, default=default)
+ >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
+ >>> data == unpacked
+ True
+
Advanced unpacking control
^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/msgpack/__init__.py b/msgpack/__init__.py
index 79107b6..a958025 100644
--- a/msgpack/__init__.py
+++ b/msgpack/__init__.py
@@ -4,7 +4,17 @@ from msgpack.exceptions import *
from collections import namedtuple
-ExtType = namedtuple('ExtType', 'code data')
+
+class ExtType(namedtuple('ExtType', 'code data')):
+ def __new__(cls, code, data):
+ if not isinstance(code, int):
+ raise TypeError("code must be int")
+ if not isinstance(data, bytes):
+ raise TypeError("data must be bytes")
+ if not 0 <= code <= 127:
+ raise ValueError("code must be 0~127")
+ return super(ExtType, cls).__new__(cls, code, data)
+
import os
if os.environ.get('MSGPACK_PUREPYTHON'):
@@ -26,6 +36,7 @@ def pack(o, stream, **kwargs):
packer = Packer(**kwargs)
stream.write(packer.pack(o))
+
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
@@ -40,4 +51,3 @@ loads = unpackb
dump = pack
dumps = packb
-
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 7082445..f261f08 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -5,8 +5,11 @@ from cpython cimport *
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
+from libc.stdint cimport int8_t
from msgpack.exceptions import PackValueError
+from msgpack import ExtType
+
cdef extern from "pack.h":
struct msgpack_packer:
@@ -29,11 +32,11 @@ cdef extern from "pack.h":
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
+ int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
-
cdef class Packer(object):
"""
MessagePack Packer
@@ -118,77 +121,87 @@ cdef class Packer(object):
cdef int ret
cdef dict d
cdef size_t L
+ cdef int default_used = 0
if nest_limit < 0:
raise PackValueError("recursion limit exceeded.")
- if o is None:
- ret = msgpack_pack_nil(&self.pk)
- elif isinstance(o, bool):
- if o:
- ret = msgpack_pack_true(&self.pk)
- else:
- ret = msgpack_pack_false(&self.pk)
- elif PyLong_Check(o):
- if o > 0:
- ullval = o
- ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
- else:
- llval = o
- ret = msgpack_pack_long_long(&self.pk, llval)
- elif PyInt_Check(o):
- longval = o
- ret = msgpack_pack_long(&self.pk, longval)
- elif PyFloat_Check(o):
- if self.use_float:
- fval = o
- ret = msgpack_pack_float(&self.pk, fval)
- else:
- dval = o
- ret = msgpack_pack_double(&self.pk, dval)
- elif PyBytes_Check(o):
- rawval = o
- L = len(o)
- ret = msgpack_pack_bin(&self.pk, L)
- if ret == 0:
+ while True:
+ if o is None:
+ ret = msgpack_pack_nil(&self.pk)
+ elif isinstance(o, bool):
+ if o:
+ ret = msgpack_pack_true(&self.pk)
+ else:
+ ret = msgpack_pack_false(&self.pk)
+ elif PyLong_Check(o):
+ if o > 0:
+ ullval = o
+ ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
+ else:
+ llval = o
+ ret = msgpack_pack_long_long(&self.pk, llval)
+ elif PyInt_Check(o):
+ longval = o
+ ret = msgpack_pack_long(&self.pk, longval)
+ elif PyFloat_Check(o):
+ if self.use_float:
+ fval = o
+ ret = msgpack_pack_float(&self.pk, fval)
+ else:
+ dval = o
+ ret = msgpack_pack_double(&self.pk, dval)
+ elif PyBytes_Check(o):
+ rawval = o
+ L = len(o)
+ ret = msgpack_pack_bin(&self.pk, L)
+ if ret == 0:
+ ret = msgpack_pack_raw_body(&self.pk, rawval, L)
+ elif PyUnicode_Check(o):
+ if not self.encoding:
+ raise TypeError("Can't encode unicode string: no encoding is specified")
+ o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
+ rawval = o
+ ret = msgpack_pack_raw(&self.pk, len(o))
+ if ret == 0:
+ ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
+ elif PyDict_CheckExact(o):
+ d = <dict>o
+ ret = msgpack_pack_map(&self.pk, len(d))
+ if ret == 0:
+ for k, v in d.iteritems():
+ ret = self._pack(k, nest_limit-1)
+ if ret != 0: break
+ ret = self._pack(v, nest_limit-1)
+ if ret != 0: break
+ elif PyDict_Check(o):
+ ret = msgpack_pack_map(&self.pk, len(o))
+ if ret == 0:
+ for k, v in o.items():
+ ret = self._pack(k, nest_limit-1)
+ if ret != 0: break
+ ret = self._pack(v, nest_limit-1)
+ if ret != 0: break
+ elif isinstance(o, ExtType):
+ # This should be before Tuple because ExtType is namedtuple.
+ longval = o.code
+ rawval = o.data
+ L = len(o.data)
+ ret = msgpack_pack_ext(&self.pk, longval, L)
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
- elif PyUnicode_Check(o):
- if not self.encoding:
- raise TypeError("Can't encode unicode string: no encoding is specified")
- o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
- rawval = o
- ret = msgpack_pack_raw(&self.pk, len(o))
- if ret == 0:
- ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
- elif PyDict_CheckExact(o):
- d = <dict>o
- ret = msgpack_pack_map(&self.pk, len(d))
- if ret == 0:
- for k, v in d.iteritems():
- ret = self._pack(k, nest_limit-1)
- if ret != 0: break
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif PyDict_Check(o):
- ret = msgpack_pack_map(&self.pk, len(o))
- if ret == 0:
- for k, v in o.items():
- ret = self._pack(k, nest_limit-1)
- if ret != 0: break
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif PyTuple_Check(o) or PyList_Check(o):
- ret = msgpack_pack_array(&self.pk, len(o))
- if ret == 0:
- for v in o:
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif self._default:
- o = self._default(o)
- ret = self._pack(o, nest_limit-1)
- else:
- raise TypeError("can't serialize %r" % (o,))
- return ret
+ elif PyTuple_Check(o) or PyList_Check(o):
+ ret = msgpack_pack_array(&self.pk, len(o))
+ if ret == 0:
+ for v in o:
+ ret = self._pack(v, nest_limit-1)
+ if ret != 0: break
+ elif not default_used and self._default:
+ o = self._default(o)
+ default_used = 1
+ continue
+ else:
+ raise TypeError("can't serialize %r" % (o,))
+ return ret
cpdef pack(self, object obj):
cdef int ret
@@ -202,6 +215,10 @@ cdef class Packer(object):
self.pk.length = 0
return buf
+ def pack_ext_type(self, typecode, data):
+ msgpack_pack_ext(&self.pk, typecode, len(data))
+ msgpack_pack_raw_body(&self.pk, data, len(data))
+
def pack_array_header(self, size_t size):
cdef int ret = msgpack_pack_array(&self.pk, size)
if ret == -1:
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 7b0c8a6..d5aa46e 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -16,6 +16,7 @@ from msgpack.exceptions import (
UnpackValueError,
ExtraData,
)
+from msgpack import ExtType
cdef extern from "unpack.h":
@@ -24,6 +25,7 @@ cdef extern from "unpack.h":
PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook
+ PyObject* ext_hook
char *encoding
char *unicode_errors
@@ -31,8 +33,6 @@ cdef extern from "unpack.h":
msgpack_user user
PyObject* obj
size_t count
- unsigned int ct
- PyObject* key
ctypedef int (*execute_fn)(unpack_context* ctx, const char* data,
size_t len, size_t* off) except? -1
@@ -44,7 +44,8 @@ cdef extern from "unpack.h":
object unpack_data(unpack_context* ctx)
cdef inline init_ctx(unpack_context *ctx,
- object object_hook, object object_pairs_hook, object list_hook,
+ object object_hook, object object_pairs_hook,
+ object list_hook, object ext_hook,
bint use_list, char* encoding, char* unicode_errors):
unpack_init(ctx)
ctx.user.use_list = use_list
@@ -71,13 +72,20 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
+ if ext_hook is not None:
+ if not PyCallable_Check(ext_hook):
+ raise TypeError("ext_hook must be a callable.")
+ ctx.user.ext_hook = <PyObject*>ext_hook
+
ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors
+def default_read_extended_type(typecode, data):
+ raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
+
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
- object_pairs_hook=None,
- ):
+ object_pairs_hook=None, ext_hook=ExtType):
"""
Unpack packed_bytes to object. Returns an unpacked object.
@@ -106,7 +114,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
- init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
+ init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
+ use_list, cenc, cerr)
ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
@@ -211,7 +220,7 @@ cdef class Unpacker(object):
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
str encoding=None, str unicode_errors='strict', int max_buffer_size=0,
- ):
+ object ext_hook=ExtType):
cdef char *cenc=NULL, *cerr=NULL
self.file_like = file_like
@@ -248,7 +257,8 @@ cdef class Unpacker(object):
self.unicode_errors = unicode_errors
cerr = PyBytes_AsString(self.unicode_errors)
- init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
+ init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
+ ext_hook, use_list, cenc, cerr)
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index dfaaa54..bf5b1c2 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -42,11 +42,11 @@ else:
newlist_hint = lambda size: []
from msgpack.exceptions import (
- BufferFull,
- OutOfData,
- UnpackValueError,
- PackValueError,
- ExtraData)
+ BufferFull,
+ OutOfData,
+ UnpackValueError,
+ PackValueError,
+ ExtraData)
from msgpack import ExtType
@@ -65,6 +65,7 @@ TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
+
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
@@ -78,6 +79,7 @@ def unpack(stream, **kwargs):
raise ExtraData(ret, unpacker._fb_get_extradata())
return ret
+
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
@@ -95,6 +97,7 @@ def unpackb(packed, **kwargs):
raise ExtraData(ret, unpacker._fb_get_extradata())
return ret
+
class Unpacker(object):
"""
Streaming unpacker.
@@ -503,82 +506,111 @@ class Packer(object):
self._default = default
def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
- if nest_limit < 0:
- raise PackValueError("recursion limit exceeded")
- if obj is None:
- return self._buffer.write(b"\xc0")
- if isinstance(obj, bool):
- if obj:
- return self._buffer.write(b"\xc3")
- return self._buffer.write(b"\xc2")
- if isinstance(obj, int_types):
- if 0 <= obj < 0x80:
- return self._buffer.write(struct.pack("B", obj))
- if -0x20 <= obj < 0:
- return self._buffer.write(struct.pack("b", obj))
- if 0x80 <= obj <= 0xff:
- return self._buffer.write(struct.pack("BB", 0xcc, obj))
- if -0x80 <= obj < 0:
- return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
- if 0xff < obj <= 0xffff:
- return self._buffer.write(struct.pack(">BH", 0xcd, obj))
- if -0x8000 <= obj < -0x80:
- return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
- if 0xffff < obj <= 0xffffffff:
- return self._buffer.write(struct.pack(">BI", 0xce, obj))
- if -0x80000000 <= obj < -0x8000:
- return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
- if 0xffffffff < obj <= 0xffffffffffffffff:
- return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
- if -0x8000000000000000 <= obj < -0x80000000:
- return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
- raise PackValueError("Integer value out of range")
- if self._use_bin_type and isinstance(obj, bytes):
- n = len(obj)
- if n <= 0xff:
- self._buffer.write(struct.pack('>BB', 0xc4, n))
- elif n <= 0xffff:
- self._buffer.write(struct.pack(">BH", 0xc5, n))
- elif n <= 0xffffffff:
- self._buffer.write(struct.pack(">BI", 0xc6, n))
- else:
- raise PackValueError("Bytes is too large")
- return self._buffer.write(obj)
- if isinstance(obj, (Unicode, bytes)):
- if isinstance(obj, Unicode):
- if self._encoding is None:
- raise TypeError(
+ default_used = False
+ while True:
+ if nest_limit < 0:
+ raise PackValueError("recursion limit exceeded")
+ if obj is None:
+ return self._buffer.write(b"\xc0")
+ if isinstance(obj, bool):
+ if obj:
+ return self._buffer.write(b"\xc3")
+ return self._buffer.write(b"\xc2")
+ if isinstance(obj, int_types):
+ if 0 <= obj < 0x80:
+ return self._buffer.write(struct.pack("B", obj))
+ if -0x20 <= obj < 0:
+ return self._buffer.write(struct.pack("b", obj))
+ if 0x80 <= obj <= 0xff:
+ return self._buffer.write(struct.pack("BB", 0xcc, obj))
+ if -0x80 <= obj < 0:
+ return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
+ if 0xff < obj <= 0xffff:
+ return self._buffer.write(struct.pack(">BH", 0xcd, obj))
+ if -0x8000 <= obj < -0x80:
+ return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
+ if 0xffff < obj <= 0xffffffff:
+ return self._buffer.write(struct.pack(">BI", 0xce, obj))
+ if -0x80000000 <= obj < -0x8000:
+ return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
+ if 0xffffffff < obj <= 0xffffffffffffffff:
+ return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
+ if -0x8000000000000000 <= obj < -0x80000000:
+ return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
+ raise PackValueError("Integer value out of range")
+ if self._use_bin_type and isinstance(obj, bytes):
+ n = len(obj)
+ if n <= 0xff:
+ self._buffer.write(struct.pack('>BB', 0xc4, n))
+ elif n <= 0xffff:
+ self._buffer.write(struct.pack(">BH", 0xc5, n))
+ elif n <= 0xffffffff:
+ self._buffer.write(struct.pack(">BI", 0xc6, n))
+ else:
+ raise PackValueError("Bytes is too large")
+ return self._buffer.write(obj)
+ if isinstance(obj, (Unicode, bytes)):
+ if isinstance(obj, Unicode):
+ if self._encoding is None:
+ raise TypeError(
"Can't encode unicode string: "
"no encoding is specified")
- obj = obj.encode(self._encoding, self._unicode_errors)
- n = len(obj)
- if n <= 0x1f:
- self._buffer.write(struct.pack('B', 0xa0 + n))
- elif self._use_bin_type and n <= 0xff:
- self._buffer.write(struct.pack('>BB', 0xd9, n))
- elif n <= 0xffff:
- self._buffer.write(struct.pack(">BH", 0xda, n))
- elif n <= 0xffffffff:
- self._buffer.write(struct.pack(">BI", 0xdb, n))
- else:
- raise PackValueError("String is too large")
- return self._buffer.write(obj)
- if isinstance(obj, float):
- if self._use_float:
- return self._buffer.write(struct.pack(">Bf", 0xca, obj))
- return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
- if isinstance(obj, (list, tuple)):
- n = len(obj)
- self._fb_pack_array_header(n)
- for i in xrange(n):
- self._pack(obj[i], nest_limit - 1)
- return
- if isinstance(obj, dict):
- return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
- nest_limit - 1)
- if self._default is not None:
- return self._pack(self._default(obj), nest_limit - 1)
- raise TypeError("Cannot serialize %r" % obj)
+ obj = obj.encode(self._encoding, self._unicode_errors)
+ n = len(obj)
+ if n <= 0x1f:
+ self._buffer.write(struct.pack('B', 0xa0 + n))
+ elif self._use_bin_type and n <= 0xff:
+ self._buffer.write(struct.pack('>BB', 0xd9, n))
+ elif n <= 0xffff:
+ self._buffer.write(struct.pack(">BH", 0xda, n))
+ elif n <= 0xffffffff:
+ self._buffer.write(struct.pack(">BI", 0xdb, n))
+ else:
+ raise PackValueError("String is too large")
+ return self._buffer.write(obj)
+ if isinstance(obj, float):
+ if self._use_float:
+ return self._buffer.write(struct.pack(">Bf", 0xca, obj))
+ return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
+ if isinstance(obj, ExtType):
+ code = obj.code
+ data = obj.data
+ assert isinstance(code, int)
+ assert isinstance(data, bytes)
+ L = len(data)
+ if L == 1:
+ self._buffer.write(b'\xd4')
+ elif L == 2:
+ self._buffer.write(b'\xd5')
+ elif L == 4:
+ self._buffer.write(b'\xd6')
+ elif L == 8:
+ self._buffer.write(b'\xd7')
+ elif L == 16:
+ self._buffer.write(b'\xd8')
+ elif L <= 0xff:
+ self._buffer.write(struct.pack(">BB", 0xc7, L))
+ elif L <= 0xffff:
+ self._buffer.write(struct.pack(">BH", 0xc8, L))
+ else:
+ self._buffer.write(struct.pack(">BI", 0xc9, L))
+ self._buffer.write(struct.pack("b", code))
+ self._buffer.write(data)
+ return
+ if isinstance(obj, (list, tuple)):
+ n = len(obj)
+ self._fb_pack_array_header(n)
+ for i in xrange(n):
+ self._pack(obj[i], nest_limit - 1)
+ return
+ if isinstance(obj, dict):
+ return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
+ nest_limit - 1)
+ if not default_used and self._default is not None:
+ obj = self._default(obj)
+ default_used = 1
+ continue
+ raise TypeError("Cannot serialize %r" % obj)
def pack(self, obj):
self._pack(obj)
@@ -616,6 +648,35 @@ class Packer(object):
self._buffer = StringIO(ret)
return ret
+ def pack_ext_type(self, typecode, data):
+ if not isinstance(typecode, int):
+ raise TypeError("typecode must have int type.")
+ if not 0 <= typecode <= 127:
+ raise ValueError("typecode should be 0-127")
+ if not isinstance(data, bytes):
+ raise TypeError("data must have bytes type")
+ L = len(data)
+ if L > 0xffffffff:
+ raise ValueError("Too large data")
+ if L == 1:
+ self._buffer.write(b'\xd4')
+ elif L == 2:
+ self._buffer.write(b'\xd5')
+ elif L == 4:
+ self._buffer.write(b'\xd6')
+ elif L == 8:
+ self._buffer.write(b'\xd7')
+ elif L == 16:
+ self._buffer.write(b'\xd8')
+ elif L <= 0xff:
+ self._buffer.write(b'\xc7' + struct.pack('B', L))
+ elif L <= 0xffff:
+ self._buffer.write(b'\xc8' + struct.pack('>H', L))
+ else:
+ self._buffer.write(b'\xc9' + struct.pack('>I', L))
+ self._buffer.write(struct.pack('B', typecode))
+ self._buffer.write(data)
+
def _fb_pack_array_header(self, n):
if n <= 0x0f:
return self._buffer.write(struct.pack('B', 0x90 + n))
diff --git a/msgpack/pack.h b/msgpack/pack.h
index 001a0c1..a71c87b 100644
--- a/msgpack/pack.h
+++ b/msgpack/pack.h
@@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
+static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l);
+
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{
char* buf = pk->buf;
diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h
index d228d7a..2879bbd 100644
--- a/msgpack/pack_template.h
+++ b/msgpack/pack_template.h
@@ -687,7 +687,7 @@ static inline int msgpack_pack_raw(msgpack_packer* x, size_t l)
static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
{
if (!x->use_bin_type) {
- return msgpack_pack_raw(x, l)
+ return msgpack_pack_raw(x, l);
}
if (l < 256) {
unsigned char buf[2] = {0xc4, (unsigned char)l};
@@ -705,9 +705,69 @@ static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l)
{
- msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
+ if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
+ return 0;
}
+/*
+ * Ext
+ */
+static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l)
+{
+ if (l == 1) {
+ unsigned char buf[2];
+ buf[0] = 0xd4;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 2) {
+ unsigned char buf[2];
+ buf[0] = 0xd5;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 4) {
+ unsigned char buf[2];
+ buf[0] = 0xd6;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 8) {
+ unsigned char buf[2];
+ buf[0] = 0xd7;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 16) {
+ unsigned char buf[2];
+ buf[0] = 0xd8;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l < 256) {
+ unsigned char buf[3];
+ buf[0] = 0xc7;
+ buf[1] = l;
+ buf[2] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 3);
+ } else if(l < 65536) {
+ unsigned char buf[4];
+ buf[0] = 0xc8;
+ _msgpack_store16(&buf[1], (uint16_t)l);
+ buf[3] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 4);
+ } else {
+ unsigned char buf[6];
+ buf[0] = 0xc9;
+ _msgpack_store32(&buf[1], (uint32_t)l);
+ buf[5] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 6);
+ }
+
+}
+
+
+
#undef msgpack_pack_append_buffer
#undef TAKE8_8
diff --git a/msgpack/unpack.h b/msgpack/unpack.h
index 03c735e..aced40b 100644
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@@ -24,6 +24,7 @@ typedef struct unpack_user {
PyObject *object_hook;
bool has_pairs_hook;
PyObject *list_hook;
+ PyObject *ext_hook;
const char *encoding;
const char *unicode_errors;
} unpack_user;
@@ -156,7 +157,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren
static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->list_hook) {
- PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c);
+ PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL);
if (!new_c)
return -1;
Py_DECREF(*c);
@@ -202,7 +203,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current,
static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->object_hook) {
- PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c);
+ PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL);
if (!new_c)
return -1;
@@ -235,4 +236,25 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0;
}
+static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
+ unsigned int lenght, msgpack_unpack_object* o)
+{
+ PyObject *py;
+ int8_t typecode = (int8_t)*pos++;
+ if (!u->ext_hook) {
+ PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
+ return -1;
+ }
+ // length also includes the typecode, so the actual data is lenght-1
+#if PY_MAJOR_VERSION == 2
+ py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1);
+#else
+ py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, lenght-1);
+#endif
+ if (!py)
+ return -1;
+ *o = py;
+ return 0;
+}
+
#include "unpack_template.h"
diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h
index 0b14f52..0dd708d 100644
--- a/msgpack/unpack_define.h
+++ b/msgpack/unpack_define.h
@@ -45,10 +45,11 @@ typedef enum {
CS_BIN_8 = 0x04,
CS_BIN_16 = 0x05,
CS_BIN_32 = 0x06,
- //CS_ = 0x07,
- //CS_ = 0x08,
- //CS_ = 0x09,
+ CS_EXT_8 = 0x07,
+ CS_EXT_16 = 0x08,
+ CS_EXT_32 = 0x09,
+
CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c,
@@ -60,6 +61,12 @@ typedef enum {
CS_INT_32 = 0x12,
CS_INT_64 = 0x13,
+ //CS_FIXEXT1 = 0x14,
+ //CS_FIXEXT2 = 0x15,
+ //CS_FIXEXT4 = 0x16,
+ //CS_FIXEXT8 = 0x17,
+ //CS_FIXEXT16 = 0x18,
+
CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b,
@@ -70,6 +77,7 @@ typedef enum {
ACS_RAW_VALUE,
ACS_BIN_VALUE,
+ ACS_EXT_VALUE,
} msgpack_unpack_state;
@@ -85,4 +93,3 @@ typedef enum {
#endif
#endif /* msgpack/unpack_define.h */
-
diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h
index 25229ac..d34eced 100644
--- a/msgpack/unpack_template.h
+++ b/msgpack/unpack_template.h
@@ -178,15 +178,23 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
switch(*p) {
case 0xc0: // nil
push_simple_value(_nil);
- //case 0xc1: // string
- // again_terminal_trail(NEXT_CS(p), p+1);
+ //case 0xc1: // never used
case 0xc2: // false
push_simple_value(_false);
case 0xc3: // true
push_simple_value(_true);
- //case 0xc7:
- //case 0xc8:
- //case 0xc9:
+ case 0xc4: // bin 8
+ again_fixed_trail(NEXT_CS(p), 1);
+ case 0xc5: // bin 16
+ again_fixed_trail(NEXT_CS(p), 2);
+ case 0xc6: // bin 32
+ again_fixed_trail(NEXT_CS(p), 4);
+ case 0xc7: // ext 8
+ again_fixed_trail(NEXT_CS(p), 1);
+ case 0xc8: // ext 16
+ again_fixed_trail(NEXT_CS(p), 2);
+ case 0xc9: // ext 32
+ again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float
case 0xcb: // double
case 0xcc: // unsigned int 8
@@ -198,15 +206,17 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32
case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
- case 0xc4: // bin 8
- case 0xc5: // bin 16
- case 0xc6: // bin 32
- //case 0xd4:
- //case 0xd5:
- //case 0xd6: // big integer 16
- //case 0xd7: // big integer 32
- //case 0xd8: // big float 16
- case 0xd9: // raw 8
+ case 0xd4: // fixext 1
+ case 0xd5: // fixext 2
+ case 0xd6: // fixext 4
+ case 0xd7: // fixext 8
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ (1 << (((unsigned int)*p) & 0x03))+1,
+ _ext_zero);
+ case 0xd8: // fixext 16
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
+ case 0xd9: // str 8
+ again_fixed_trail(NEXT_CS(p), 1);
case 0xda: // raw 16
case 0xdb: // raw 32
case 0xdc: // array 16
@@ -237,8 +247,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1;
switch(cs) {
- //case CS_
- //case CS_
+ case CS_EXT_8:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
+ case CS_EXT_16:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load16(uint16_t,n)+1,
+ _ext_zero);
+ case CS_EXT_32:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load32(uint32_t,n)+1,
+ _ext_zero);
case CS_FLOAT: {
union { uint32_t i; float f; } mem;
mem.i = _msgpack_load32(uint32_t,n);
@@ -269,26 +287,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n));
- //case CS_
- //case CS_
- //case CS_BIG_INT_16:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
- //case CS_BIG_INT_32:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
- //case ACS_BIG_INT_VALUE:
- //_big_int_zero:
- // // FIXME
- // push_variable_value(_big_int, data, n, trail);
-
- //case CS_BIG_FLOAT_16:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
- //case CS_BIG_FLOAT_32:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
- //case ACS_BIG_FLOAT_VALUE:
- //_big_float_zero:
- // // FIXME
- // push_variable_value(_big_float, data, n, trail);
-
case CS_BIN_8:
again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
case CS_BIN_16:
@@ -309,6 +307,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero:
push_variable_value(_raw, data, n, trail);
+ case ACS_EXT_VALUE:
+ _ext_zero:
+ push_variable_value(_ext, data, n, trail);
+
case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32:
diff --git a/test/test_extension.py b/test/test_extension.py
new file mode 100644
index 0000000..2f85ce3
--- /dev/null
+++ b/test/test_extension.py
@@ -0,0 +1,57 @@
+from __future__ import print_function
+import array
+import msgpack
+from msgpack import ExtType
+
+
+def test_pack_ext_type():
+ def p(s):
+ packer = msgpack.Packer()
+ packer.pack_ext_type(0x42, s)
+ return packer.bytes()
+ assert p(b'A') == b'\xd4\x42A' # fixext 1
+ assert p(b'AB') == b'\xd5\x42AB' # fixext 2
+ assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4
+ assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8
+ assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16
+ assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8
+ assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16
+ assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32
+
+
+def test_unpack_ext_type():
+ def check(b, expected):
+ assert msgpack.unpackb(b) == expected
+
+ check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1
+ check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2
+ check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4
+ check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8
+ check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16
+ check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8
+ check(b'\xc8\x01\x23\x42' + b'A'*0x0123,
+ ExtType(0x42, b'A'*0x0123)) # ext 16
+ check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345,
+ ExtType(0x42, b'A'*0x00012345)) # ext 32
+
+
+def test_extension_type():
+ def default(obj):
+ print('default called', obj)
+ if isinstance(obj, array.array):
+ typecode = 123 # application specific typecode
+ data = obj.tostring()
+ return ExtType(typecode, data)
+ raise TypeError("Unknwon type object %r" % (obj,))
+
+ def ext_hook(code, data):
+ print('ext_hook called', code, data)
+ assert code == 123
+ obj = array.array('d')
+ obj.fromstring(data)
+ return obj
+
+ obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])]
+ s = msgpack.packb(obj, default=default)
+ obj2 = msgpack.unpackb(s, ext_hook=ext_hook)
+ assert obj == obj2
diff --git a/test/test_newspec.py b/test/test_newspec.py
index 8bc2cfe..ab05029 100644
--- a/test/test_newspec.py
+++ b/test/test_newspec.py
@@ -1,6 +1,6 @@
# coding: utf-8
-from msgpack import packb, unpackb
+from msgpack import packb, unpackb, ExtType
def test_str8():
@@ -66,4 +66,23 @@ def test_bin32():
assert b[5:] == data
assert unpackb(b) == data
-
+def test_ext():
+ def check(ext, packed):
+ assert packb(ext) == packed
+ assert unpackb(packed) == ext
+ check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1
+ check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2
+ check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4
+ check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8
+ check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16
+ # ext 8
+ check(ExtType(0x42, b''), b'\xc7\x00\x42')
+ check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255)
+ # ext 16
+ check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256)
+ check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff)
+ # ext 32
+ check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000)
+ # needs large memory
+ #check(ExtType(0x42, b'Z'*0xffffffff),
+ # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff)
diff --git a/test/test_obj.py b/test/test_obj.py
index fbf610c..9083218 100644
--- a/test/test_obj.py
+++ b/test/test_obj.py
@@ -35,7 +35,7 @@ def test_only_one_obj_hook():
unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x)
def test_bad_hook():
- with raises(ValueError):
+ with raises(TypeError):
packed = packb([3, 1+2j], default=lambda o: o)
unpacked = unpackb(packed, use_list=1)
diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py
index 9db14ca..f541207 100644
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@@ -84,4 +84,3 @@ def test_readbytes():
assert unpacker.read_bytes(3) == b'oob'
assert unpacker.unpack() == ord(b'a')
assert unpacker.unpack() == ord(b'r')
-