summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArmin Rigo <arigo@tunes.org>2016-04-16 23:28:14 +0200
committerArmin Rigo <arigo@tunes.org>2016-04-16 23:28:14 +0200
commit4b765c16d4f60f46084abd74341e79267be751d0 (patch)
treedc5b35a54a20ec09fb4622d7251ba7783e7a60be
parent745ca71e9cda78085c3df98e9e526d34719e8100 (diff)
downloadcffi-4b765c16d4f60f46084abd74341e79267be751d0.tar.gz
Remove again ffi.rawstring(), and implement instead ffi.unpack().
Pre-documentation notes: (hi Amaury :-) * ffi.unpack(<cdata 'char'>, n) == ffi.buffer(<cdata 'char'>, n)[:] but I hope it is a little bit more natural * ffi.unpack(<cdata 'wchar_t'>, n): this is the original motivation, because it has no previous equivalent * ffi.unpack(<cdata 'int'>, n) == list(<cdata 'int'>[0:n]) but should be much faster on CPython
-rw-r--r--c/_cffi_backend.c121
-rw-r--r--c/ffi_obj.c25
-rw-r--r--c/test_c.py51
-rw-r--r--cffi/api.py23
-rw-r--r--testing/cffi0/test_ffi_backend.py10
-rw-r--r--testing/cffi1/test_ffi_obj.py13
6 files changed, 175 insertions, 68 deletions
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
index 288e5b5..24b0f7a 100644
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -5582,37 +5582,118 @@ static PyObject *b_string(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
-static PyObject *b_rawstring(PyObject *self, PyObject *arg)
+static PyObject *b_unpack(PyObject *self, PyObject *args, PyObject *kwds)
{
CDataObject *cd;
CTypeDescrObject *ctitem;
- Py_ssize_t length;
+ Py_ssize_t i, length, itemsize, best_alignment;
+ PyObject *result;
+ char *src;
+ int casenum;
+ static char *keywords[] = {"cdata", "length", NULL};
- if (!CData_Check(arg)) {
- PyErr_SetString(PyExc_TypeError, "expected a 'cdata' object");
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!n:unpack", keywords,
+ &CData_Type, &cd, &length))
return NULL;
- }
- cd = (CDataObject *)arg;
+
ctitem = cd->c_type->ct_itemdescr;
- if ((cd->c_type->ct_flags & CT_ARRAY) &&
- (ctitem->ct_flags & (CT_PRIMITIVE_CHAR |
- CT_PRIMITIVE_SIGNED |
- CT_PRIMITIVE_UNSIGNED))) {
- length = get_array_length(cd);
+ if (!(cd->c_type->ct_flags & (CT_ARRAY|CT_POINTER)) ||
+ !(ctitem->ct_flags & CT_PRIMITIVE_ANY)) {
+ PyErr_Format(PyExc_TypeError,
+ "expected a pointer to a primitive type, got '%s'",
+ cd->c_type->ct_name);
+ return NULL;
+ }
+ if (length < 0) {
+ PyErr_SetString(PyExc_ValueError, "'length' cannot be negative");
+ return NULL;
+ }
+ if (cd->c_data == NULL) {
+ PyObject *s = cdata_repr(cd);
+ if (s != NULL) {
+ PyErr_Format(PyExc_RuntimeError,
+ "cannot use unpack() on %s",
+ PyText_AS_UTF8(s));
+ Py_DECREF(s);
+ }
+ return NULL;
+ }
+
+ /* byte- and unicode strings */
+ if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
if (ctitem->ct_size == sizeof(char))
return PyBytes_FromStringAndSize(cd->c_data, length);
#ifdef HAVE_WCHAR_H
- else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
- assert(ctitem->ct_size == sizeof(wchar_t));
+ else if (ctitem->ct_size == sizeof(wchar_t))
return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
- }
#endif
}
- PyErr_Format(PyExc_TypeError,
- "expected a 'char[]' or 'uint8_t[]' or 'int8_t[]' "
- "or 'wchar_t[]', got '%s'",
- cd->c_type->ct_name);
- return NULL;
+
+ /* else, the result is a list. This implementation should be
+ equivalent to, but on CPython much faster than, 'list(p[0:length])'.
+ */
+ result = PyList_New(length); if (result == NULL) return NULL;
+
+ src = cd->c_data;
+ itemsize = ctitem->ct_size;
+ best_alignment = ctitem->ct_length;
+
+ casenum = -1;
+ if ((best_alignment & (best_alignment - 1)) == 0 &&
+ (((uintptr_t)src) & (best_alignment - 1)) == 0) {
+ /* Source data is fully aligned; we can directly read without
+ memcpy(). The unaligned case is expected to be rare; in
+ this situation it is ok to fall back to the general
+ convert_to_object() in the loop. For now we also use this
+ fall-back for types that are too large.
+ */
+ if (ctitem->ct_flags & CT_PRIMITIVE_SIGNED) {
+ if (itemsize == sizeof(long)) casenum = 3;
+ else if (itemsize == sizeof(int)) casenum = 2;
+ else if (itemsize == sizeof(short)) casenum = 1;
+ else if (itemsize == sizeof(signed char)) casenum = 0;
+ }
+ else if (ctitem->ct_flags & CT_PRIMITIVE_UNSIGNED) {
+ /* Note: we never pick case 6 if sizeof(int) == sizeof(long),
+ so that case 6 below can assume that the 'unsigned int' result
+ would always fit in a 'signed long'. */
+ if (itemsize == sizeof(unsigned long)) casenum = 7;
+ else if (itemsize == sizeof(unsigned int)) casenum = 6;
+ else if (itemsize == sizeof(unsigned short)) casenum = 5;
+ else if (itemsize == sizeof(unsigned char)) casenum = 4;
+ }
+ else if (ctitem->ct_flags & CT_PRIMITIVE_FLOAT) {
+ if (itemsize == sizeof(double)) casenum = 9;
+ else if (itemsize == sizeof(float)) casenum = 8;
+ }
+ }
+
+ for (i = 0; i < length; i++) {
+ PyObject *x;
+ switch (casenum) {
+ /* general case */
+ default: x = convert_to_object(src, ctitem); break;
+
+ /* special cases for performance only */
+ case 0: x = PyInt_FromLong(*(signed char *)src); break;
+ case 1: x = PyInt_FromLong(*(short *)src); break;
+ case 2: x = PyInt_FromLong(*(int *)src); break;
+ case 3: x = PyInt_FromLong(*(long *)src); break;
+ case 4: x = PyInt_FromLong(*(unsigned char *)src); break;
+ case 5: x = PyInt_FromLong(*(unsigned short *)src); break;
+ case 6: x = PyInt_FromLong((long)*(unsigned int *)src); break;
+ case 7: x = PyLong_FromUnsignedLong(*(unsigned long *)src); break;
+ case 8: x = PyFloat_FromDouble(*(float *)src); break;
+ case 9: x = PyFloat_FromDouble(*(double *)src); break;
+ }
+ if (x == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ PyList_SET_ITEM(result, i, x);
+ src += itemsize;
+ }
+ return result;
}
static PyObject *b_buffer(PyObject *self, PyObject *args, PyObject *kwds)
@@ -6258,7 +6339,7 @@ static PyMethodDef FFIBackendMethods[] = {
{"rawaddressof", b_rawaddressof, METH_VARARGS},
{"getcname", b_getcname, METH_VARARGS},
{"string", (PyCFunction)b_string, METH_VARARGS | METH_KEYWORDS},
- {"rawstring", b_rawstring, METH_O},
+ {"unpack", (PyCFunction)b_unpack, METH_VARARGS | METH_KEYWORDS},
{"buffer", (PyCFunction)b_buffer, METH_VARARGS | METH_KEYWORDS},
{"get_errno", b_get_errno, METH_NOARGS},
{"set_errno", b_set_errno, METH_O},
diff --git a/c/ffi_obj.c b/c/ffi_obj.c
index dcb72b6..ff6f946 100644
--- a/c/ffi_obj.c
+++ b/c/ffi_obj.c
@@ -459,18 +459,21 @@ PyDoc_STRVAR(ffi_string_doc,
#define ffi_string b_string /* ffi_string() => b_string()
from _cffi_backend.c */
-PyDoc_STRVAR(ffi_rawstring_doc,
-"Convert a cdata that is an array of 'char' or 'wchar_t' to\n"
-"a byte or unicode string. Unlike ffi.string(), it does not stop\n"
-"at the first null.\n"
+PyDoc_STRVAR(ffi_unpack_doc,
+"Unpack an array of primitive C data of the given length,\n"
+"returning a Python string/unicode/list.\n"
"\n"
-"Note that if you have a pointer and an explicit length, you\n"
-"can use 'p[0:length]' to make an array view. This is similar to\n"
-"the construct 'list(p[0:length])', which returns a list of chars/\n"
-"unichars/ints/floats.");
+"If 'cdata' is a pointer to 'char', returns a byte string.\n"
+"Unlike ffi.string(), it does not stop at the first null.\n"
+"\n"
+"If 'cdata' is a pointer to 'wchar_t', returns a unicode string.\n"
+"'length' is measured in wchar_t's; it is not the size in bytes.\n"
+"\n"
+"If 'cdata' is a pointer to some other integer or floating-point\n"
+"type, returns a list of 'length' integers or floats.");
-#define ffi_rawstring b_rawstring /* ffi_rawstring() => b_rawstring()
- from _cffi_backend.c */
+#define ffi_unpack b_unpack /* ffi_unpack() => b_unpack()
+ from _cffi_backend.c */
PyDoc_STRVAR(ffi_buffer_doc,
"Return a read-write buffer object that references the raw C data\n"
@@ -1103,10 +1106,10 @@ static PyMethodDef ffi_methods[] = {
{"new_allocator",(PyCFunction)ffi_new_allocator,METH_VKW,ffi_new_allocator_doc},
{"new_handle", (PyCFunction)ffi_new_handle, METH_O, ffi_new_handle_doc},
{"offsetof", (PyCFunction)ffi_offsetof, METH_VARARGS, ffi_offsetof_doc},
- {"rawstring", (PyCFunction)ffi_rawstring, METH_O, ffi_rawstring_doc},
{"sizeof", (PyCFunction)ffi_sizeof, METH_O, ffi_sizeof_doc},
{"string", (PyCFunction)ffi_string, METH_VKW, ffi_string_doc},
{"typeof", (PyCFunction)ffi_typeof, METH_O, ffi_typeof_doc},
+ {"unpack", (PyCFunction)ffi_unpack, METH_VKW, ffi_unpack_doc},
{NULL}
};
diff --git a/c/test_c.py b/c/test_c.py
index c7cf0e8..e44d430 100644
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -3526,21 +3526,48 @@ def test_get_common_types():
_get_common_types(d)
assert d['bool'] == '_Bool'
-def test_rawstring():
+def test_unpack():
BChar = new_primitive_type("char")
BArray = new_array_type(new_pointer_type(BChar), 10) # char[10]
p = newp(BArray, b"abc\x00def")
- assert rawstring(p) == b"abc\x00def\x00\x00\x00"
- assert rawstring(p[1:6]) == b"bc\x00de"
+ p0 = p
+ assert unpack(p, 10) == b"abc\x00def\x00\x00\x00"
+ assert unpack(p+1, 5) == b"bc\x00de"
BWChar = new_primitive_type("wchar_t")
BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10]
p = newp(BArray, u"abc\x00def")
- assert rawstring(p) == u"abc\x00def\x00\x00\x00"
- assert rawstring(p[1:6]) == u"bc\x00de"
- BChar = new_primitive_type("uint8_t")
- BArray = new_array_type(new_pointer_type(BChar), 10) # uint8_t[10]
- p = newp(BArray, [65 + i for i in range(10)])
- assert rawstring(p) == b"ABCDEFGHIJ"
- #
- py.test.raises(TypeError, rawstring, "foobar")
- py.test.raises(TypeError, rawstring, p + 1)
+ assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
+
+ for typename, samples in [
+ ("uint8_t", [0, 2**8-1]),
+ ("uint16_t", [0, 2**16-1]),
+ ("uint32_t", [0, 2**32-1]),
+ ("uint64_t", [0, 2**64-1]),
+ ("int8_t", [-2**7, 2**7-1]),
+ ("int16_t", [-2**15, 2**15-1]),
+ ("int32_t", [-2**31, 2**31-1]),
+ ("int64_t", [-2**63, 2**63-1]),
+ ("_Bool", [0, 1]),
+ ("float", [0.0, 10.5]),
+ ("double", [12.34, 56.78]),
+ ]:
+ BItem = new_primitive_type(typename)
+ BArray = new_array_type(new_pointer_type(BItem), 10)
+ p = newp(BArray, samples)
+ result = unpack(p, len(samples))
+ assert result == samples
+ for i in range(len(samples)):
+ assert result[i] == p[i] and type(result[i]) is type(p[i])
+ #
+ BInt = new_primitive_type("int")
+ py.test.raises(TypeError, unpack, p)
+ py.test.raises(TypeError, unpack, b"foobar", 6)
+ py.test.raises(TypeError, unpack, cast(BInt, 42), 1)
+ BFunc = new_function_type((BInt, BInt), BInt, False)
+ py.test.raises(TypeError, unpack, cast(new_pointer_type(BFunc), 42), 1)
+ #
+ py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 0)
+ py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 10)
+ #
+ py.test.raises(ValueError, unpack, p0, -1)
+ py.test.raises(ValueError, unpack, p, -1)
diff --git a/cffi/api.py b/cffi/api.py
index 41aea9b..7c6ad28 100644
--- a/cffi/api.py
+++ b/cffi/api.py
@@ -299,17 +299,20 @@ class FFI(object):
"""
return self._backend.string(cdata, maxlen)
- def rawstring(self, cdata):
- """Convert a cdata that is an array of 'char' or 'wchar_t' to
- a byte or unicode string. Unlike ffi.string(), it does not stop
- at the first null.
-
- Note that if you have a pointer and an explicit length, you
- can use 'p[0:length]' to make an array view. This is similar to
- the construct 'list(p[0:length])', which returns a list of chars/
- unichars/ints/floats.
+ def unpack(self, cdata, length):
+ """Unpack an array of primitive C data of the given length,
+ returning a Python string/unicode/list.
+
+ If 'cdata' is a pointer to 'char', returns a byte string.
+ Unlike ffi.string(), it does not stop at the first null.
+
+ If 'cdata' is a pointer to 'wchar_t', returns a unicode string.
+ 'length' is measured in wchar_t's; it is not the size in bytes.
+
+ If 'cdata' is a pointer to some other integer or floating-point
+ type, returns a list of 'length' integers or floats.
"""
- return self._backend.rawstring(cdata)
+ return self._backend.unpack(cdata, length)
def buffer(self, cdata, size=-1):
"""Return a read-write buffer object that references the raw C data
diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py
index 6841c23..b96bae4 100644
--- a/testing/cffi0/test_ffi_backend.py
+++ b/testing/cffi0/test_ffi_backend.py
@@ -473,11 +473,9 @@ class TestBitfield:
['a', 'cc', 'ccc'],
['aa', 'aaa', 'g'])
- def test_rawstring(self):
+ def test_unpack(self):
ffi = FFI()
p = ffi.new("char[]", b"abc\x00def")
- assert ffi.rawstring(p) == b"abc\x00def\x00"
- assert ffi.rawstring(p[1:6]) == b"bc\x00de"
- p = ffi.new("wchar_t[]", u"abc\x00def")
- assert ffi.rawstring(p) == u"abc\x00def\x00"
- assert ffi.rawstring(p[1:6]) == u"bc\x00de"
+ assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
+ p = ffi.new("int[]", [-123456789])
+ assert ffi.unpack(p, 1) == [-123456789]
diff --git a/testing/cffi1/test_ffi_obj.py b/testing/cffi1/test_ffi_obj.py
index 8d96ac5..30b93f1 100644
--- a/testing/cffi1/test_ffi_obj.py
+++ b/testing/cffi1/test_ffi_obj.py
@@ -496,14 +496,9 @@ def test_init_once_multithread_failure():
time.sleep(0.51)
assert seen == ['init!', 'oops'] * 3
-def test_rawstring():
+def test_unpack():
ffi = _cffi1_backend.FFI()
p = ffi.new("char[]", b"abc\x00def")
- assert ffi.rawstring(p) == b"abc\x00def\x00"
- assert ffi.rawstring(p[1:6]) == b"bc\x00de"
- p = ffi.new("wchar_t[]", u"abc\x00def")
- assert ffi.rawstring(p) == u"abc\x00def\x00"
- assert ffi.rawstring(p[1:6]) == u"bc\x00de"
- #
- py.test.raises(TypeError, ffi.rawstring, "foobar")
- py.test.raises(TypeError, ffi.rawstring, p + 1)
+ assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
+ p = ffi.new("int[]", [-123456789])
+ assert ffi.unpack(p, 1) == [-123456789]