diff options
author | INADA Naoki <methane@users.noreply.github.com> | 2018-01-11 17:02:41 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-11 17:02:41 +0900 |
commit | 5534d0c7af0114db3d27f7b96c82a7fe22ce1e40 (patch) | |
tree | 57d598860b021b723074504bd84391852b376eb9 /msgpack/_unpacker.pyx | |
parent | 50ea49c86f5aaff8bb1cd37778b50b13df83ba8f (diff) | |
download | msgpack-python-5534d0c7af0114db3d27f7b96c82a7fe22ce1e40.tar.gz |
Add raw_as_bytes option to Unpacker. (#265)
Diffstat (limited to 'msgpack/_unpacker.pyx')
-rw-r--r-- | msgpack/_unpacker.pyx | 81 |
1 files changed, 52 insertions, 29 deletions
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 564749e..b796d04 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -43,8 +43,9 @@ from msgpack import ExtType cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list - PyObject* object_hook + bint raw_as_bytes bint has_pairs_hook # call object_hook with k-v pairs + PyObject* object_hook PyObject* list_hook PyObject* ext_hook char *encoding @@ -73,12 +74,14 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, char* encoding, char* unicode_errors, + bint use_list, bint raw_as_bytes, + char* encoding, char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list + ctx.user.raw_as_bytes = raw_as_bytes ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj, return 1 def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", + bint use_list=True, bint raw_as_bytes=True, + encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef char* cerr = NULL cdef int new_protocol = 0 - get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + elif not isinstance(encoding, bytes): + raise TypeError("encoding should be bytes or unicode") + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + elif not isinstance(unicode_errors, bytes): + raise TypeError("unicode_errors should be bytes or unicode") + cerr = PyBytes_AsString(unicode_errors) + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, + use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -252,6 +261,16 @@ cdef class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) + :param bool raw_as_bytes: + If true, unpack msgpack raw to Python bytes (default). + Otherwise, unpack to Python str (or unicode on Python 2) by decoding + with UTF-8 encoding (recommended). + Currently, the default is true, but it will be changed to false in + near future. So you must specify it explicitly for keeping backward + compatibility. + + *encoding* option which is deprecated overrides this option. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -262,14 +281,6 @@ cdef class Unpacker(object): Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - - :param str unicode_errors: - Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) - :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. @@ -287,16 +298,25 @@ cdef class Unpacker(object): :param int max_map_len: Limits max length of map. (default: 2**31-1) + :param str encoding: + Deprecated, use raw_as_bytes instead. + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + + :param str unicode_errors: + Deprecated. Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) + - example of streaming deserialize from file-like object:: + Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like) + unpacker = Unpacker(file_like, raw_as_bytes=False) for o in unpacker: process(o) - example of streaming deserialize from socket:: + Example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(raw_as_bytes=False) while True: buf = sock.recv(1024**2) if not buf: @@ -324,7 +344,8 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL - def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, + def __init__(self, file_like=None, Py_ssize_t read_size=0, + bint use_list=True, bint raw_as_bytes=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, @@ -363,6 +384,7 @@ cdef class Unpacker(object): self.stream_offset = 0 if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) if isinstance(encoding, unicode): self.encoding = encoding.encode('ascii') elif isinstance(encoding, bytes): @@ -372,6 +394,7 @@ cdef class Unpacker(object): cenc = PyBytes_AsString(self.encoding) if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) if isinstance(unicode_errors, unicode): self.unicode_errors = unicode_errors.encode('ascii') elif isinstance(unicode_errors, bytes): @@ -381,7 +404,7 @@ cdef class Unpacker(object): cerr = PyBytes_AsString(self.unicode_errors) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, cenc, cerr, + ext_hook, use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) |