diff options
author | INADA Naoki <songofacandy@gmail.com> | 2012-07-13 21:28:16 +0900 |
---|---|---|
committer | INADA Naoki <songofacandy@gmail.com> | 2012-07-13 21:28:16 +0900 |
commit | 7b1167044b17572126fc69b89eb6fa9c0a5afb91 (patch) | |
tree | 8fc6615daebf9c7d1efe75543c47313196cb783f | |
parent | e133c7fd27a515feba7691d6ad37cc9d58dc5b42 (diff) | |
download | msgpack-python-7b1167044b17572126fc69b89eb6fa9c0a5afb91.tar.gz |
Add max_buffer_size to Unpacker.
-rw-r--r-- | ChangeLog.rst | 16 | ||||
-rw-r--r-- | msgpack/_msgpack.pyx | 71 |
2 files changed, 73 insertions, 14 deletions
diff --git a/ChangeLog.rst b/ChangeLog.rst index e255e19..e86e3bd 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,19 @@ +0.2.1 +======= +:release date: NOT RELEASED YET + +Changes +------- +* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size + and allows unpack data from untrusted source safely. + +* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce + derease in rare case but memory efficient and don't allocate than ``max_buffer_size``. + +Bugs fixed +---------- + + 0.2.0 ======= :release date: 2012-06-27 diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index 526c003..96abb42 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -10,6 +10,9 @@ cdef extern from "Python.h": from libc.stdlib cimport * from libc.string cimport * +from libc.limits cimport * + + import gc _gc_disable = gc.disable _gc_enable = gc.enable @@ -35,6 +38,11 @@ cdef extern from "pack.h": cdef int DEFAULT_RECURSE_LIMIT=511 + +class BufferFull(Exception): + pass + + cdef class Packer(object): """MessagePack Packer @@ -193,7 +201,9 @@ cdef extern from "unpack.h": object template_data(template_context* ctx) -def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"): +def unpackb(object packed, object object_hook=None, object list_hook=None, + bint use_list=0, encoding=None, unicode_errors="strict", + ): """ Unpack packed_bytes to object. Returns an unpacked object.""" cdef template_context ctx @@ -243,12 +253,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint return None -def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"): +def unpack(object stream, object object_hook=None, object list_hook=None, + bint use_list=0, encoding=None, unicode_errors="strict", + ): """ unpack an object from stream. """ return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors) + object_hook=object_hook, list_hook=list_hook, + encoding=encoding, unicode_errors=unicode_errors, + ) cdef class Unpacker(object): """ @@ -259,7 +273,7 @@ cdef class Unpacker(object): When `Unpacker` initialized with `file_like`, unpacker reads serialized data from it and `.feed()` method is not usable. - `read_size` is used as `file_like.read(read_size)`. (default: 1M) + `read_size` is used as `file_like.read(read_size)`. (default: 1024**2) If `use_list` is true, msgpack list is deserialized to Python list. Otherwise, it is deserialized to Python tuple. (default: False) @@ -272,11 +286,24 @@ cdef class Unpacker(object): `unicode_errors` is used for decoding bytes. - example:: + `max_buffer_size` limits size of data waiting unpacked. 0 means unlimited + (default). + Raises `BufferFull` exception when it is insufficient. + You shoud set this parameter when unpacking data from untrasted source. + + example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + do_something(o) + + example of streaming deserialize from socket:: unpacker = Unpacker() while 1: - buf = astream.read() + buf = sock.recv(1024**2) + if not buf: + break unpacker.feed(buf) for o in unpacker: do_something(o) @@ -293,6 +320,7 @@ cdef class Unpacker(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors + cdef size_t max_buffer_size def __cinit__(self): self.buf = NULL @@ -303,7 +331,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, Py_ssize_t read_size=1024*1024, bint use_list=0, object object_hook=None, object list_hook=None, - encoding=None, unicode_errors='strict'): + encoding=None, unicode_errors='strict', int max_buffer_size=0): self.use_list = use_list self.file_like = file_like if file_like: @@ -314,6 +342,10 @@ cdef class Unpacker(object): self.buf = <char*>malloc(read_size) if self.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") + if max_buffer_size: + self.max_buffer_size = max_buffer_size + else: + self.max_buffer_size = INT_MAX self.buf_size = read_size self.buf_head = 0 self.buf_tail = 0 @@ -355,28 +387,36 @@ cdef class Unpacker(object): cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: char* buf = self.buf + char* new_buf size_t head = self.buf_head size_t tail = self.buf_tail size_t buf_size = self.buf_size size_t new_size if tail + _buf_len > buf_size: - if ((tail - head) + _buf_len)*2 < buf_size: + if ((tail - head) + _buf_len) <= buf_size: # move to front. memmove(buf, buf + head, tail - head) tail -= head head = 0 else: # expand buffer. - new_size = tail + _buf_len - if new_size < buf_size*2: - new_size = buf_size*2 - buf = <char*>realloc(buf, new_size) - if buf == NULL: + new_size = (tail-head) + _buf_len + if new_size > self.max_buffer_size: + raise BufferFull + new_size = min(new_size*2, self.max_buffer_size) + new_buf = <char*>malloc(new_size) + if new_buf == NULL: # self.buf still holds old buffer and will be freed during # obj destruction raise MemoryError("Unable to enlarge internal buffer.") + memcpy(new_buf, buf + head, tail - head) + free(buf) + + buf = new_buf buf_size = new_size + tail -= head + head = 0 memcpy(buf + tail, <char*>(_buf), _buf_len) self.buf = buf @@ -387,7 +427,10 @@ cdef class Unpacker(object): # prepare self.buf from file_like cdef fill_buffer(self): if self.file_like is not None: - next_bytes = self.file_like_read(self.read_size) + next_bytes = self.file_like_read( + max(self.read_size, + self.max_buffer_size - (self.buf_tail - self.buf_head) + )) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) |