summaryrefslogtreecommitdiff
path: root/msgpack/_unpacker.pyx
diff options
context:
space:
mode:
authorINADA Naoki <methane@users.noreply.github.com>2018-01-11 17:02:41 +0900
committerGitHub <noreply@github.com>2018-01-11 17:02:41 +0900
commit5534d0c7af0114db3d27f7b96c82a7fe22ce1e40 (patch)
tree57d598860b021b723074504bd84391852b376eb9 /msgpack/_unpacker.pyx
parent50ea49c86f5aaff8bb1cd37778b50b13df83ba8f (diff)
downloadmsgpack-python-5534d0c7af0114db3d27f7b96c82a7fe22ce1e40.tar.gz
Add raw_as_bytes option to Unpacker. (#265)
Diffstat (limited to 'msgpack/_unpacker.pyx')
-rw-r--r--msgpack/_unpacker.pyx81
1 files changed, 52 insertions, 29 deletions
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 564749e..b796d04 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -43,8 +43,9 @@ from msgpack import ExtType
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
bint use_list
- PyObject* object_hook
+ bint raw_as_bytes
bint has_pairs_hook # call object_hook with k-v pairs
+ PyObject* object_hook
PyObject* list_hook
PyObject* ext_hook
char *encoding
@@ -73,12 +74,14 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
- bint use_list, char* encoding, char* unicode_errors,
+ bint use_list, bint raw_as_bytes,
+ char* encoding, char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
Py_ssize_t max_ext_len):
unpack_init(ctx)
ctx.user.use_list = use_list
+ ctx.user.raw_as_bytes = raw_as_bytes
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
ctx.user.max_str_len = max_str_len
ctx.user.max_bin_len = max_bin_len
@@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
return 1
def unpackb(object packed, object object_hook=None, object list_hook=None,
- bint use_list=1, encoding=None, unicode_errors="strict",
+ bint use_list=True, bint raw_as_bytes=True,
+ encoding=None, unicode_errors="strict",
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=2147483647, # 2**32-1
Py_ssize_t max_bin_len=2147483647,
@@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef char* cerr = NULL
cdef int new_protocol = 0
- get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
+ if encoding is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
+ if isinstance(encoding, unicode):
+ encoding = encoding.encode('ascii')
+ elif not isinstance(encoding, bytes):
+ raise TypeError("encoding should be bytes or unicode")
+ cenc = PyBytes_AsString(encoding)
+
+ if unicode_errors is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
+ if isinstance(unicode_errors, unicode):
+ unicode_errors = unicode_errors.encode('ascii')
+ elif not isinstance(unicode_errors, bytes):
+ raise TypeError("unicode_errors should be bytes or unicode")
+ cerr = PyBytes_AsString(unicode_errors)
+ get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
try:
- if encoding is not None:
- if isinstance(encoding, unicode):
- encoding = encoding.encode('ascii')
- cenc = PyBytes_AsString(encoding)
-
- if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- unicode_errors = unicode_errors.encode('ascii')
- cerr = PyBytes_AsString(unicode_errors)
-
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
- use_list, cenc, cerr,
+ use_list, raw_as_bytes, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
@@ -252,6 +261,16 @@ cdef class Unpacker(object):
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
+ :param bool raw_as_bytes:
+ If true, unpack msgpack raw to Python bytes (default).
+ Otherwise, unpack to Python str (or unicode on Python 2) by decoding
+ with UTF-8 encoding (recommended).
+ Currently, the default is true, but it will be changed to false in
+ near future. So you must specify it explicitly for keeping backward
+ compatibility.
+
+ *encoding* option which is deprecated overrides this option.
+
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
@@ -262,14 +281,6 @@ cdef class Unpacker(object):
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
- :param str encoding:
- Encoding used for decoding msgpack raw.
- If it is None (default), msgpack raw is deserialized to Python bytes.
-
- :param str unicode_errors:
- Used for decoding msgpack raw with *encoding*.
- (default: `'strict'`)
-
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
Raises `BufferFull` exception when it is insufficient.
@@ -287,16 +298,25 @@ cdef class Unpacker(object):
:param int max_map_len:
Limits max length of map. (default: 2**31-1)
+ :param str encoding:
+ Deprecated, use raw_as_bytes instead.
+ Encoding used for decoding msgpack raw.
+ If it is None (default), msgpack raw is deserialized to Python bytes.
+
+ :param str unicode_errors:
+ Deprecated. Used for decoding msgpack raw with *encoding*.
+ (default: `'strict'`)
+
- example of streaming deserialize from file-like object::
+ Example of streaming deserialize from file-like object::
- unpacker = Unpacker(file_like)
+ unpacker = Unpacker(file_like, raw_as_bytes=False)
for o in unpacker:
process(o)
- example of streaming deserialize from socket::
+ Example of streaming deserialize from socket::
- unpacker = Unpacker()
+ unpacker = Unpacker(raw_as_bytes=False)
while True:
buf = sock.recv(1024**2)
if not buf:
@@ -324,7 +344,8 @@ cdef class Unpacker(object):
PyMem_Free(self.buf)
self.buf = NULL
- def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+ def __init__(self, file_like=None, Py_ssize_t read_size=0,
+ bint use_list=True, bint raw_as_bytes=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
encoding=None, unicode_errors='strict', int max_buffer_size=0,
object ext_hook=ExtType,
@@ -363,6 +384,7 @@ cdef class Unpacker(object):
self.stream_offset = 0
if encoding is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
if isinstance(encoding, unicode):
self.encoding = encoding.encode('ascii')
elif isinstance(encoding, bytes):
@@ -372,6 +394,7 @@ cdef class Unpacker(object):
cenc = PyBytes_AsString(self.encoding)
if unicode_errors is not None:
+ PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
if isinstance(unicode_errors, unicode):
self.unicode_errors = unicode_errors.encode('ascii')
elif isinstance(unicode_errors, bytes):
@@ -381,7 +404,7 @@ cdef class Unpacker(object):
cerr = PyBytes_AsString(self.unicode_errors)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
- ext_hook, use_list, cenc, cerr,
+ ext_hook, use_list, raw_as_bytes, cenc, cerr,
max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len)