summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorINADA Naoki <inada-n@klab.com>2013-10-20 15:08:31 +0900
committerINADA Naoki <inada-n@klab.com>2013-10-20 15:08:31 +0900
commit27f0cba8a5f36393517ee85d2c339847b76e0c6b (patch)
tree470bc84905240c82d2e1f4e710d55a288661f635
parent7123341ca89a9a3afee8521cc16a1a419ea8871e (diff)
parent6386481024ec045d9ef991a2c975902276812508 (diff)
downloadmsgpack-python-27f0cba8a5f36393517ee85d2c339847b76e0c6b.tar.gz
Merge branch 'master' of https://github.com/antocuni/msgpack-python into newspec
Conflicts: msgpack/fallback.py msgpack/unpack.h msgpack/unpack_define.h msgpack/unpack_template.h
-rw-r--r--README.rst8
-rw-r--r--msgpack/_packer.pyx12
-rw-r--r--msgpack/_unpacker.pyx39
-rw-r--r--msgpack/pack.h2
-rw-r--r--msgpack/pack_template.h60
-rw-r--r--msgpack/unpack.h18
-rw-r--r--msgpack/unpack_define.h14
-rw-r--r--msgpack/unpack_template.h69
-rw-r--r--setup.py1
-rw-r--r--test/test_extension.py64
-rw-r--r--test/test_sequnpack.py15
11 files changed, 261 insertions, 41 deletions
diff --git a/README.rst b/README.rst
index 294cd63..600d7f7 100644
--- a/README.rst
+++ b/README.rst
@@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for
``object_pairs_hook`` callback may instead be used to receive a list of
key-value pairs.
+Extended types
+^^^^^^^^^^^^^^^
+
+It is also possible to pack/unpack custom data types using the msgpack feature
+of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now):
+
+https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py
+
Advanced unpacking control
^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 7082445..df670ed 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -5,6 +5,7 @@ from cpython cimport *
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
+from libc.stdint cimport int8_t
from msgpack.exceptions import PackValueError
@@ -29,6 +30,7 @@ cdef extern from "pack.h":
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
+ int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
@@ -183,6 +185,9 @@ cdef class Packer(object):
for v in o:
ret = self._pack(v, nest_limit-1)
if ret != 0: break
+ elif self.handle_unknown_type(o):
+ # it means that obj was succesfully packed, so we are done
+ return 0
elif self._default:
o = self._default(o)
ret = self._pack(o, nest_limit-1)
@@ -202,6 +207,13 @@ cdef class Packer(object):
self.pk.length = 0
return buf
+ def handle_unknown_type(self, obj):
+ return None
+
+ def pack_extended_type(self, typecode, data):
+ msgpack_pack_ext(&self.pk, typecode, len(data))
+ msgpack_pack_raw_body(&self.pk, data, len(data))
+
def pack_array_header(self, size_t size):
cdef int ret = msgpack_pack_array(&self.pk, size)
if ret == -1:
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 7b0c8a6..cf30670 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -24,6 +24,7 @@ cdef extern from "unpack.h":
PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook
+ PyObject* ext_type_hook
char *encoding
char *unicode_errors
@@ -45,6 +46,7 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook, object list_hook,
+ object ext_type_hook,
bint use_list, char* encoding, char* unicode_errors):
unpack_init(ctx)
ctx.user.use_list = use_list
@@ -71,9 +73,17 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
+ if ext_type_hook is not None:
+ if not PyCallable_Check(ext_type_hook):
+ raise TypeError("ext_type_hook must be a callable.")
+ ctx.user.ext_type_hook = <PyObject*>ext_type_hook
+
ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors
+def default_read_extended_type(typecode, data):
+ raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
+
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None,
@@ -106,7 +116,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
- init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
+ init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type,
+ use_list, cenc, cerr)
ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
@@ -248,7 +259,10 @@ cdef class Unpacker(object):
self.unicode_errors = unicode_errors
cerr = PyBytes_AsString(self.unicode_errors)
- init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
+ ext_type_hook = self.read_extended_type
+ Py_INCREF(ext_type_hook)
+ init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
+ ext_type_hook, use_list, cenc, cerr)
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
@@ -358,6 +372,24 @@ cdef class Unpacker(object):
"""
return self._unpack(unpack_construct, write_bytes)
+ def unpack_one(self, object write_bytes=None):
+ """
+ unpack one object
+
+ If write_bytes is not None, it will be called with parts of the raw
+ message as it is unpacked.
+
+ Raises `UnpackValueError` if there are no more bytes to unpack.
+ Raises ``ExtraData`` if there are still bytes left after the unpacking.
+ """
+ try:
+ result = self.unpack()
+ except OutOfData:
+ raise UnpackValueError("Data is not enough")
+ if self.buf_head < self.buf_tail:
+ raise ExtraData(result, self.buf[self.buf_head:])
+ return result
+
def skip(self, object write_bytes=None):
"""
read and ignore one object, returning None
@@ -385,6 +417,9 @@ cdef class Unpacker(object):
"""
return self._unpack(read_map_header, write_bytes)
+ def read_extended_type(self, typecode, data):
+ return default_read_extended_type(typecode, data)
+
def __iter__(self):
return self
diff --git a/msgpack/pack.h b/msgpack/pack.h
index 001a0c1..a71c87b 100644
--- a/msgpack/pack.h
+++ b/msgpack/pack.h
@@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
+static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l);
+
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{
char* buf = pk->buf;
diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h
index d228d7a..0fe9514 100644
--- a/msgpack/pack_template.h
+++ b/msgpack/pack_template.h
@@ -708,6 +708,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t
msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
}
+/*
+ * Ext
+ */
+
+static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l)
+{
+ if (l == 1) {
+ unsigned char buf[2];
+ buf[0] = 0xd4;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 2) {
+ unsigned char buf[2];
+ buf[0] = 0xd5;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 4) {
+ unsigned char buf[2];
+ buf[0] = 0xd6;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 8) {
+ unsigned char buf[2];
+ buf[0] = 0xd7;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 16) {
+ unsigned char buf[2];
+ buf[0] = 0xd8;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l < 256) {
+ unsigned char buf[3];
+ buf[0] = 0xc7;
+ buf[1] = l;
+ buf[2] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 3);
+ } else if(l < 65536) {
+ unsigned char buf[4];
+ buf[0] = 0xc8;
+ _msgpack_store16(&buf[1], (uint16_t)l);
+ buf[3] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 4);
+ } else {
+ unsigned char buf[6];
+ buf[0] = 0xc9;
+ _msgpack_store32(&buf[1], (uint32_t)l);
+ buf[5] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 6);
+ }
+
+}
+
+
+
#undef msgpack_pack_append_buffer
#undef TAKE8_8
diff --git a/msgpack/unpack.h b/msgpack/unpack.h
index 03c735e..327a524 100644
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@@ -24,6 +24,7 @@ typedef struct unpack_user {
PyObject *object_hook;
bool has_pairs_hook;
PyObject *list_hook;
+ PyObject *ext_type_hook;
const char *encoding;
const char *unicode_errors;
} unpack_user;
@@ -235,4 +236,21 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0;
}
+static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
+ unsigned int lenght, msgpack_unpack_object* o)
+{
+ PyObject *py;
+ int8_t typecode = (int8_t)*pos++;
+ if (!u->ext_type_hook) {
+ PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL");
+ return -1;
+ }
+ // lenght also includes the typecode, so the actual data is lenght-1
+ py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1);
+ if (!py)
+ return -1;
+ *o = py;
+ return 0;
+}
+
#include "unpack_template.h"
diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h
index 0b14f52..2ee92b5 100644
--- a/msgpack/unpack_define.h
+++ b/msgpack/unpack_define.h
@@ -45,10 +45,11 @@ typedef enum {
CS_BIN_8 = 0x04,
CS_BIN_16 = 0x05,
CS_BIN_32 = 0x06,
- //CS_ = 0x07,
- //CS_ = 0x08,
- //CS_ = 0x09,
+ CS_EXT_8 = 0x07,
+ CS_EXT_16 = 0x08,
+ CS_EXT_32 = 0x09,
+
CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c,
@@ -60,6 +61,12 @@ typedef enum {
CS_INT_32 = 0x12,
CS_INT_64 = 0x13,
+ //CS_FIXEXT1 = 0x14,
+ //CS_FIXEXT2 = 0x15,
+ //CS_FIXEXT4 = 0x16,
+ //CS_FIXEXT8 = 0x17,
+ //CS_FIXEXT16 = 0x18,
+
CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b,
@@ -70,6 +77,7 @@ typedef enum {
ACS_RAW_VALUE,
ACS_BIN_VALUE,
+ ACS_EXT_VALUE,
} msgpack_unpack_state;
diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h
index 25229ac..1a709ec 100644
--- a/msgpack/unpack_template.h
+++ b/msgpack/unpack_template.h
@@ -184,9 +184,15 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
push_simple_value(_false);
case 0xc3: // true
push_simple_value(_true);
- //case 0xc7:
- //case 0xc8:
- //case 0xc9:
+ //case 0xc4:
+ //case 0xc5:
+ //case 0xc6:
+ case 0xc7: // ext 8
+ again_fixed_trail(NEXT_CS(p), 1);
+ case 0xc8: // ext 16
+ again_fixed_trail(NEXT_CS(p), 2);
+ case 0xc9: // ext 32
+ again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float
case 0xcb: // double
case 0xcc: // unsigned int 8
@@ -198,15 +204,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32
case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
- case 0xc4: // bin 8
- case 0xc5: // bin 16
- case 0xc6: // bin 32
- //case 0xd4:
- //case 0xd5:
- //case 0xd6: // big integer 16
- //case 0xd7: // big integer 32
- //case 0xd8: // big float 16
- case 0xd9: // raw 8
+ case 0xd4: // fixext 1
+ case 0xd5: // fixext 2
+ case 0xd6: // fixext 4
+ case 0xd7: // fixext 8
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ (1 << (((unsigned int)*p) & 0x03))+1,
+ _ext_zero);
+ case 0xd8: // fixext 16
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
+ //case 0xd9:
case 0xda: // raw 16
case 0xdb: // raw 32
case 0xdc: // array 16
@@ -237,8 +244,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1;
switch(cs) {
- //case CS_
- //case CS_
+ case CS_EXT_8:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
+ case CS_EXT_16:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load16(uint16_t,n)+1,
+ _ext_zero);
+ case CS_EXT_32:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load32(uint32_t,n)+1,
+ _ext_zero);
case CS_FLOAT: {
union { uint32_t i; float f; } mem;
mem.i = _msgpack_load32(uint32_t,n);
@@ -269,26 +284,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n));
- //case CS_
- //case CS_
- //case CS_BIG_INT_16:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
- //case CS_BIG_INT_32:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
- //case ACS_BIG_INT_VALUE:
- //_big_int_zero:
- // // FIXME
- // push_variable_value(_big_int, data, n, trail);
-
- //case CS_BIG_FLOAT_16:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
- //case CS_BIG_FLOAT_32:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
- //case ACS_BIG_FLOAT_VALUE:
- //_big_float_zero:
- // // FIXME
- // push_variable_value(_big_float, data, n, trail);
-
case CS_BIN_8:
again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
case CS_BIN_16:
@@ -309,6 +304,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero:
push_variable_value(_raw, data, n, trail);
+ case ACS_EXT_VALUE:
+ _ext_zero:
+ push_variable_value(_ext, data, n, trail);
+
case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32:
@@ -320,7 +319,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_MAP_32:
/* FIXME security guard */
start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY);
-
+
default:
goto _failed;
}
diff --git a/setup.py b/setup.py
index 1055a61..83ae79f 100644
--- a/setup.py
+++ b/setup.py
@@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'):
libraries=libraries,
include_dirs=['.'],
define_macros=macros,
+ extra_compile_args=['-O0'],
))
del libraries, macros
diff --git a/test/test_extension.py b/test/test_extension.py
new file mode 100644
index 0000000..94117e1
--- /dev/null
+++ b/test/test_extension.py
@@ -0,0 +1,64 @@
+import py
+import array
+import struct
+import msgpack
+
+def test_pack_extended_type():
+ def p(s):
+ packer = msgpack.Packer()
+ packer.pack_extended_type(0x42, s)
+ return packer.bytes()
+ assert p('A') == '\xd4\x42A' # fixext 1
+ assert p('AB') == '\xd5\x42AB' # fixext 2
+ assert p('ABCD') == '\xd6\x42ABCD' # fixext 4
+ assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8
+ assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16
+ assert p('ABC') == '\xc7\x03\x42ABC' # ext 8
+ assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16
+ assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32
+
+def test_unpack_extended_type():
+ class MyUnpacker(msgpack.Unpacker):
+ def read_extended_type(self, typecode, data):
+ return (typecode, data)
+
+ def u(s):
+ unpacker = MyUnpacker()
+ unpacker.feed(s)
+ return unpacker.unpack_one()
+
+ assert u('\xd4\x42A') == (0x42, 'A') # fixext 1
+ assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2
+ assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4
+ assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8
+ assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16
+ assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8
+ assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) ==
+ (0x42, 'A'*0x0123)) # ext 16
+ assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) ==
+ (0x42, 'A'*0x00012345)) # ext 32
+
+
+def test_extension_type():
+ class MyPacker(msgpack.Packer):
+ def handle_unknown_type(self, obj):
+ if isinstance(obj, array.array):
+ typecode = 123 # application specific typecode
+ data = obj.tostring()
+ self.pack_extended_type(typecode, data)
+ return True
+
+ class MyUnpacker(msgpack.Unpacker):
+ def read_extended_type(self, typecode, data):
+ assert typecode == 123
+ obj = array.array('d')
+ obj.fromstring(data)
+ return obj
+
+ obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])]
+ packer = MyPacker()
+ unpacker = MyUnpacker(None)
+ s = packer.pack(obj)
+ unpacker.feed(s)
+ obj2 = unpacker.unpack_one()
+ assert obj == obj2
diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py
index 9db14ca..abc447a 100644
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@@ -1,9 +1,10 @@
#!/usr/bin/env python
# coding: utf-8
+import py
import six
from msgpack import Unpacker, BufferFull
-from msgpack.exceptions import OutOfData
+from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError
from pytest import raises
@@ -85,3 +86,15 @@ def test_readbytes():
assert unpacker.unpack() == ord(b'a')
assert unpacker.unpack() == ord(b'r')
+def test_unpack_one():
+ unpacker = Unpacker()
+ unpacker.feed('\xda\x00\x03abc')
+ assert unpacker.unpack_one() == 'abc'
+ #
+ unpacker = Unpacker()
+ unpacker.feed('\xda\x00\x03abcd')
+ py.test.raises(ExtraData, "unpacker.unpack_one()")
+ #
+ unpacker = Unpacker()
+ unpacker.feed('\xda\x00\x03ab')
+ py.test.raises(UnpackValueError, "unpacker.unpack_one()")