summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorINADA Naoki <songofacandy@gmail.com>2012-07-13 21:28:16 +0900
committerINADA Naoki <songofacandy@gmail.com>2012-07-13 21:28:16 +0900
commit7b1167044b17572126fc69b89eb6fa9c0a5afb91 (patch)
tree8fc6615daebf9c7d1efe75543c47313196cb783f
parente133c7fd27a515feba7691d6ad37cc9d58dc5b42 (diff)
downloadmsgpack-python-7b1167044b17572126fc69b89eb6fa9c0a5afb91.tar.gz
Add max_buffer_size to Unpacker.
-rw-r--r--ChangeLog.rst16
-rw-r--r--msgpack/_msgpack.pyx71
2 files changed, 73 insertions, 14 deletions
diff --git a/ChangeLog.rst b/ChangeLog.rst
index e255e19..e86e3bd 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -1,3 +1,19 @@
+0.2.1
+=======
+:release date: NOT RELEASED YET
+
+Changes
+-------
+* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size
+ and allows unpack data from untrusted source safely.
+
+* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce
+ derease in rare case but memory efficient and don't allocate than ``max_buffer_size``.
+
+Bugs fixed
+----------
+
+
0.2.0
=======
:release date: 2012-06-27
diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx
index 526c003..96abb42 100644
--- a/msgpack/_msgpack.pyx
+++ b/msgpack/_msgpack.pyx
@@ -10,6 +10,9 @@ cdef extern from "Python.h":
from libc.stdlib cimport *
from libc.string cimport *
+from libc.limits cimport *
+
+
import gc
_gc_disable = gc.disable
_gc_enable = gc.enable
@@ -35,6 +38,11 @@ cdef extern from "pack.h":
cdef int DEFAULT_RECURSE_LIMIT=511
+
+class BufferFull(Exception):
+ pass
+
+
cdef class Packer(object):
"""MessagePack Packer
@@ -193,7 +201,9 @@ cdef extern from "unpack.h":
object template_data(template_context* ctx)
-def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
+def unpackb(object packed, object object_hook=None, object list_hook=None,
+ bint use_list=0, encoding=None, unicode_errors="strict",
+ ):
"""
Unpack packed_bytes to object. Returns an unpacked object."""
cdef template_context ctx
@@ -243,12 +253,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint
return None
-def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
+def unpack(object stream, object object_hook=None, object list_hook=None,
+ bint use_list=0, encoding=None, unicode_errors="strict",
+ ):
"""
unpack an object from stream.
"""
return unpackb(stream.read(), use_list=use_list,
- object_hook=object_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors)
+ object_hook=object_hook, list_hook=list_hook,
+ encoding=encoding, unicode_errors=unicode_errors,
+ )
cdef class Unpacker(object):
"""
@@ -259,7 +273,7 @@ cdef class Unpacker(object):
When `Unpacker` initialized with `file_like`, unpacker reads serialized data
from it and `.feed()` method is not usable.
- `read_size` is used as `file_like.read(read_size)`. (default: 1M)
+ `read_size` is used as `file_like.read(read_size)`. (default: 1024**2)
If `use_list` is true, msgpack list is deserialized to Python list.
Otherwise, it is deserialized to Python tuple. (default: False)
@@ -272,11 +286,24 @@ cdef class Unpacker(object):
`unicode_errors` is used for decoding bytes.
- example::
+ `max_buffer_size` limits size of data waiting unpacked. 0 means unlimited
+ (default).
+ Raises `BufferFull` exception when it is insufficient.
+ You shoud set this parameter when unpacking data from untrasted source.
+
+ example of streaming deserialize from file-like object::
+
+ unpacker = Unpacker(file_like)
+ for o in unpacker:
+ do_something(o)
+
+ example of streaming deserialize from socket::
unpacker = Unpacker()
while 1:
- buf = astream.read()
+ buf = sock.recv(1024**2)
+ if not buf:
+ break
unpacker.feed(buf)
for o in unpacker:
do_something(o)
@@ -293,6 +320,7 @@ cdef class Unpacker(object):
cdef object _berrors
cdef char *encoding
cdef char *unicode_errors
+ cdef size_t max_buffer_size
def __cinit__(self):
self.buf = NULL
@@ -303,7 +331,7 @@ cdef class Unpacker(object):
def __init__(self, file_like=None, Py_ssize_t read_size=1024*1024, bint use_list=0,
object object_hook=None, object list_hook=None,
- encoding=None, unicode_errors='strict'):
+ encoding=None, unicode_errors='strict', int max_buffer_size=0):
self.use_list = use_list
self.file_like = file_like
if file_like:
@@ -314,6 +342,10 @@ cdef class Unpacker(object):
self.buf = <char*>malloc(read_size)
if self.buf == NULL:
raise MemoryError("Unable to allocate internal buffer.")
+ if max_buffer_size:
+ self.max_buffer_size = max_buffer_size
+ else:
+ self.max_buffer_size = INT_MAX
self.buf_size = read_size
self.buf_head = 0
self.buf_tail = 0
@@ -355,28 +387,36 @@ cdef class Unpacker(object):
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
cdef:
char* buf = self.buf
+ char* new_buf
size_t head = self.buf_head
size_t tail = self.buf_tail
size_t buf_size = self.buf_size
size_t new_size
if tail + _buf_len > buf_size:
- if ((tail - head) + _buf_len)*2 < buf_size:
+ if ((tail - head) + _buf_len) <= buf_size:
# move to front.
memmove(buf, buf + head, tail - head)
tail -= head
head = 0
else:
# expand buffer.
- new_size = tail + _buf_len
- if new_size < buf_size*2:
- new_size = buf_size*2
- buf = <char*>realloc(buf, new_size)
- if buf == NULL:
+ new_size = (tail-head) + _buf_len
+ if new_size > self.max_buffer_size:
+ raise BufferFull
+ new_size = min(new_size*2, self.max_buffer_size)
+ new_buf = <char*>malloc(new_size)
+ if new_buf == NULL:
# self.buf still holds old buffer and will be freed during
# obj destruction
raise MemoryError("Unable to enlarge internal buffer.")
+ memcpy(new_buf, buf + head, tail - head)
+ free(buf)
+
+ buf = new_buf
buf_size = new_size
+ tail -= head
+ head = 0
memcpy(buf + tail, <char*>(_buf), _buf_len)
self.buf = buf
@@ -387,7 +427,10 @@ cdef class Unpacker(object):
# prepare self.buf from file_like
cdef fill_buffer(self):
if self.file_like is not None:
- next_bytes = self.file_like_read(self.read_size)
+ next_bytes = self.file_like_read(
+ max(self.read_size,
+ self.max_buffer_size - (self.buf_tail - self.buf_head)
+ ))
if next_bytes:
self.append_buffer(PyBytes_AsString(next_bytes),
PyBytes_Size(next_bytes))