diff options
author | Armin Rigo <arigo@tunes.org> | 2016-04-16 23:28:14 +0200 |
---|---|---|
committer | Armin Rigo <arigo@tunes.org> | 2016-04-16 23:28:14 +0200 |
commit | 4b765c16d4f60f46084abd74341e79267be751d0 (patch) | |
tree | dc5b35a54a20ec09fb4622d7251ba7783e7a60be | |
parent | 745ca71e9cda78085c3df98e9e526d34719e8100 (diff) | |
download | cffi-4b765c16d4f60f46084abd74341e79267be751d0.tar.gz |
Remove again ffi.rawstring(), and implement instead ffi.unpack().
Pre-documentation notes: (hi Amaury :-)
* ffi.unpack(<cdata 'char'>, n) == ffi.buffer(<cdata 'char'>, n)[:]
but I hope it is a little bit more natural
* ffi.unpack(<cdata 'wchar_t'>, n): this is the original motivation,
because it has no previous equivalent
* ffi.unpack(<cdata 'int'>, n) == list(<cdata 'int'>[0:n])
but should be much faster on CPython
-rw-r--r-- | c/_cffi_backend.c | 121 | ||||
-rw-r--r-- | c/ffi_obj.c | 25 | ||||
-rw-r--r-- | c/test_c.py | 51 | ||||
-rw-r--r-- | cffi/api.py | 23 | ||||
-rw-r--r-- | testing/cffi0/test_ffi_backend.py | 10 | ||||
-rw-r--r-- | testing/cffi1/test_ffi_obj.py | 13 |
6 files changed, 175 insertions, 68 deletions
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c index 288e5b5..24b0f7a 100644 --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -5582,37 +5582,118 @@ static PyObject *b_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } -static PyObject *b_rawstring(PyObject *self, PyObject *arg) +static PyObject *b_unpack(PyObject *self, PyObject *args, PyObject *kwds) { CDataObject *cd; CTypeDescrObject *ctitem; - Py_ssize_t length; + Py_ssize_t i, length, itemsize, best_alignment; + PyObject *result; + char *src; + int casenum; + static char *keywords[] = {"cdata", "length", NULL}; - if (!CData_Check(arg)) { - PyErr_SetString(PyExc_TypeError, "expected a 'cdata' object"); + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!n:unpack", keywords, + &CData_Type, &cd, &length)) return NULL; - } - cd = (CDataObject *)arg; + ctitem = cd->c_type->ct_itemdescr; - if ((cd->c_type->ct_flags & CT_ARRAY) && - (ctitem->ct_flags & (CT_PRIMITIVE_CHAR | - CT_PRIMITIVE_SIGNED | - CT_PRIMITIVE_UNSIGNED))) { - length = get_array_length(cd); + if (!(cd->c_type->ct_flags & (CT_ARRAY|CT_POINTER)) || + !(ctitem->ct_flags & CT_PRIMITIVE_ANY)) { + PyErr_Format(PyExc_TypeError, + "expected a pointer to a primitive type, got '%s'", + cd->c_type->ct_name); + return NULL; + } + if (length < 0) { + PyErr_SetString(PyExc_ValueError, "'length' cannot be negative"); + return NULL; + } + if (cd->c_data == NULL) { + PyObject *s = cdata_repr(cd); + if (s != NULL) { + PyErr_Format(PyExc_RuntimeError, + "cannot use unpack() on %s", + PyText_AS_UTF8(s)); + Py_DECREF(s); + } + return NULL; + } + + /* byte- and unicode strings */ + if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) { if (ctitem->ct_size == sizeof(char)) return PyBytes_FromStringAndSize(cd->c_data, length); #ifdef HAVE_WCHAR_H - else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) { - assert(ctitem->ct_size == sizeof(wchar_t)); + else if (ctitem->ct_size == sizeof(wchar_t)) return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length); - } #endif } - PyErr_Format(PyExc_TypeError, - "expected a 'char[]' or 'uint8_t[]' or 'int8_t[]' " - "or 'wchar_t[]', got '%s'", - cd->c_type->ct_name); - return NULL; + + /* else, the result is a list. This implementation should be + equivalent to, but on CPython much faster than, 'list(p[0:length])'. + */ + result = PyList_New(length); if (result == NULL) return NULL; + + src = cd->c_data; + itemsize = ctitem->ct_size; + best_alignment = ctitem->ct_length; + + casenum = -1; + if ((best_alignment & (best_alignment - 1)) == 0 && + (((uintptr_t)src) & (best_alignment - 1)) == 0) { + /* Source data is fully aligned; we can directly read without + memcpy(). The unaligned case is expected to be rare; in + this situation it is ok to fall back to the general + convert_to_object() in the loop. For now we also use this + fall-back for types that are too large. + */ + if (ctitem->ct_flags & CT_PRIMITIVE_SIGNED) { + if (itemsize == sizeof(long)) casenum = 3; + else if (itemsize == sizeof(int)) casenum = 2; + else if (itemsize == sizeof(short)) casenum = 1; + else if (itemsize == sizeof(signed char)) casenum = 0; + } + else if (ctitem->ct_flags & CT_PRIMITIVE_UNSIGNED) { + /* Note: we never pick case 6 if sizeof(int) == sizeof(long), + so that case 6 below can assume that the 'unsigned int' result + would always fit in a 'signed long'. */ + if (itemsize == sizeof(unsigned long)) casenum = 7; + else if (itemsize == sizeof(unsigned int)) casenum = 6; + else if (itemsize == sizeof(unsigned short)) casenum = 5; + else if (itemsize == sizeof(unsigned char)) casenum = 4; + } + else if (ctitem->ct_flags & CT_PRIMITIVE_FLOAT) { + if (itemsize == sizeof(double)) casenum = 9; + else if (itemsize == sizeof(float)) casenum = 8; + } + } + + for (i = 0; i < length; i++) { + PyObject *x; + switch (casenum) { + /* general case */ + default: x = convert_to_object(src, ctitem); break; + + /* special cases for performance only */ + case 0: x = PyInt_FromLong(*(signed char *)src); break; + case 1: x = PyInt_FromLong(*(short *)src); break; + case 2: x = PyInt_FromLong(*(int *)src); break; + case 3: x = PyInt_FromLong(*(long *)src); break; + case 4: x = PyInt_FromLong(*(unsigned char *)src); break; + case 5: x = PyInt_FromLong(*(unsigned short *)src); break; + case 6: x = PyInt_FromLong((long)*(unsigned int *)src); break; + case 7: x = PyLong_FromUnsignedLong(*(unsigned long *)src); break; + case 8: x = PyFloat_FromDouble(*(float *)src); break; + case 9: x = PyFloat_FromDouble(*(double *)src); break; + } + if (x == NULL) { + Py_DECREF(result); + return NULL; + } + PyList_SET_ITEM(result, i, x); + src += itemsize; + } + return result; } static PyObject *b_buffer(PyObject *self, PyObject *args, PyObject *kwds) @@ -6258,7 +6339,7 @@ static PyMethodDef FFIBackendMethods[] = { {"rawaddressof", b_rawaddressof, METH_VARARGS}, {"getcname", b_getcname, METH_VARARGS}, {"string", (PyCFunction)b_string, METH_VARARGS | METH_KEYWORDS}, - {"rawstring", b_rawstring, METH_O}, + {"unpack", (PyCFunction)b_unpack, METH_VARARGS | METH_KEYWORDS}, {"buffer", (PyCFunction)b_buffer, METH_VARARGS | METH_KEYWORDS}, {"get_errno", b_get_errno, METH_NOARGS}, {"set_errno", b_set_errno, METH_O}, diff --git a/c/ffi_obj.c b/c/ffi_obj.c index dcb72b6..ff6f946 100644 --- a/c/ffi_obj.c +++ b/c/ffi_obj.c @@ -459,18 +459,21 @@ PyDoc_STRVAR(ffi_string_doc, #define ffi_string b_string /* ffi_string() => b_string() from _cffi_backend.c */ -PyDoc_STRVAR(ffi_rawstring_doc, -"Convert a cdata that is an array of 'char' or 'wchar_t' to\n" -"a byte or unicode string. Unlike ffi.string(), it does not stop\n" -"at the first null.\n" +PyDoc_STRVAR(ffi_unpack_doc, +"Unpack an array of primitive C data of the given length,\n" +"returning a Python string/unicode/list.\n" "\n" -"Note that if you have a pointer and an explicit length, you\n" -"can use 'p[0:length]' to make an array view. This is similar to\n" -"the construct 'list(p[0:length])', which returns a list of chars/\n" -"unichars/ints/floats."); +"If 'cdata' is a pointer to 'char', returns a byte string.\n" +"Unlike ffi.string(), it does not stop at the first null.\n" +"\n" +"If 'cdata' is a pointer to 'wchar_t', returns a unicode string.\n" +"'length' is measured in wchar_t's; it is not the size in bytes.\n" +"\n" +"If 'cdata' is a pointer to some other integer or floating-point\n" +"type, returns a list of 'length' integers or floats."); -#define ffi_rawstring b_rawstring /* ffi_rawstring() => b_rawstring() - from _cffi_backend.c */ +#define ffi_unpack b_unpack /* ffi_unpack() => b_unpack() + from _cffi_backend.c */ PyDoc_STRVAR(ffi_buffer_doc, "Return a read-write buffer object that references the raw C data\n" @@ -1103,10 +1106,10 @@ static PyMethodDef ffi_methods[] = { {"new_allocator",(PyCFunction)ffi_new_allocator,METH_VKW,ffi_new_allocator_doc}, {"new_handle", (PyCFunction)ffi_new_handle, METH_O, ffi_new_handle_doc}, {"offsetof", (PyCFunction)ffi_offsetof, METH_VARARGS, ffi_offsetof_doc}, - {"rawstring", (PyCFunction)ffi_rawstring, METH_O, ffi_rawstring_doc}, {"sizeof", (PyCFunction)ffi_sizeof, METH_O, ffi_sizeof_doc}, {"string", (PyCFunction)ffi_string, METH_VKW, ffi_string_doc}, {"typeof", (PyCFunction)ffi_typeof, METH_O, ffi_typeof_doc}, + {"unpack", (PyCFunction)ffi_unpack, METH_VKW, ffi_unpack_doc}, {NULL} }; diff --git a/c/test_c.py b/c/test_c.py index c7cf0e8..e44d430 100644 --- a/c/test_c.py +++ b/c/test_c.py @@ -3526,21 +3526,48 @@ def test_get_common_types(): _get_common_types(d) assert d['bool'] == '_Bool' -def test_rawstring(): +def test_unpack(): BChar = new_primitive_type("char") BArray = new_array_type(new_pointer_type(BChar), 10) # char[10] p = newp(BArray, b"abc\x00def") - assert rawstring(p) == b"abc\x00def\x00\x00\x00" - assert rawstring(p[1:6]) == b"bc\x00de" + p0 = p + assert unpack(p, 10) == b"abc\x00def\x00\x00\x00" + assert unpack(p+1, 5) == b"bc\x00de" BWChar = new_primitive_type("wchar_t") BArray = new_array_type(new_pointer_type(BWChar), 10) # wchar_t[10] p = newp(BArray, u"abc\x00def") - assert rawstring(p) == u"abc\x00def\x00\x00\x00" - assert rawstring(p[1:6]) == u"bc\x00de" - BChar = new_primitive_type("uint8_t") - BArray = new_array_type(new_pointer_type(BChar), 10) # uint8_t[10] - p = newp(BArray, [65 + i for i in range(10)]) - assert rawstring(p) == b"ABCDEFGHIJ" - # - py.test.raises(TypeError, rawstring, "foobar") - py.test.raises(TypeError, rawstring, p + 1) + assert unpack(p, 10) == u"abc\x00def\x00\x00\x00" + + for typename, samples in [ + ("uint8_t", [0, 2**8-1]), + ("uint16_t", [0, 2**16-1]), + ("uint32_t", [0, 2**32-1]), + ("uint64_t", [0, 2**64-1]), + ("int8_t", [-2**7, 2**7-1]), + ("int16_t", [-2**15, 2**15-1]), + ("int32_t", [-2**31, 2**31-1]), + ("int64_t", [-2**63, 2**63-1]), + ("_Bool", [0, 1]), + ("float", [0.0, 10.5]), + ("double", [12.34, 56.78]), + ]: + BItem = new_primitive_type(typename) + BArray = new_array_type(new_pointer_type(BItem), 10) + p = newp(BArray, samples) + result = unpack(p, len(samples)) + assert result == samples + for i in range(len(samples)): + assert result[i] == p[i] and type(result[i]) is type(p[i]) + # + BInt = new_primitive_type("int") + py.test.raises(TypeError, unpack, p) + py.test.raises(TypeError, unpack, b"foobar", 6) + py.test.raises(TypeError, unpack, cast(BInt, 42), 1) + BFunc = new_function_type((BInt, BInt), BInt, False) + py.test.raises(TypeError, unpack, cast(new_pointer_type(BFunc), 42), 1) + # + py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 0) + py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 10) + # + py.test.raises(ValueError, unpack, p0, -1) + py.test.raises(ValueError, unpack, p, -1) diff --git a/cffi/api.py b/cffi/api.py index 41aea9b..7c6ad28 100644 --- a/cffi/api.py +++ b/cffi/api.py @@ -299,17 +299,20 @@ class FFI(object): """ return self._backend.string(cdata, maxlen) - def rawstring(self, cdata): - """Convert a cdata that is an array of 'char' or 'wchar_t' to - a byte or unicode string. Unlike ffi.string(), it does not stop - at the first null. - - Note that if you have a pointer and an explicit length, you - can use 'p[0:length]' to make an array view. This is similar to - the construct 'list(p[0:length])', which returns a list of chars/ - unichars/ints/floats. + def unpack(self, cdata, length): + """Unpack an array of primitive C data of the given length, + returning a Python string/unicode/list. + + If 'cdata' is a pointer to 'char', returns a byte string. + Unlike ffi.string(), it does not stop at the first null. + + If 'cdata' is a pointer to 'wchar_t', returns a unicode string. + 'length' is measured in wchar_t's; it is not the size in bytes. + + If 'cdata' is a pointer to some other integer or floating-point + type, returns a list of 'length' integers or floats. """ - return self._backend.rawstring(cdata) + return self._backend.unpack(cdata, length) def buffer(self, cdata, size=-1): """Return a read-write buffer object that references the raw C data diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py index 6841c23..b96bae4 100644 --- a/testing/cffi0/test_ffi_backend.py +++ b/testing/cffi0/test_ffi_backend.py @@ -473,11 +473,9 @@ class TestBitfield: ['a', 'cc', 'ccc'], ['aa', 'aaa', 'g']) - def test_rawstring(self): + def test_unpack(self): ffi = FFI() p = ffi.new("char[]", b"abc\x00def") - assert ffi.rawstring(p) == b"abc\x00def\x00" - assert ffi.rawstring(p[1:6]) == b"bc\x00de" - p = ffi.new("wchar_t[]", u"abc\x00def") - assert ffi.rawstring(p) == u"abc\x00def\x00" - assert ffi.rawstring(p[1:6]) == u"bc\x00de" + assert ffi.unpack(p+1, 7) == b"bc\x00def\x00" + p = ffi.new("int[]", [-123456789]) + assert ffi.unpack(p, 1) == [-123456789] diff --git a/testing/cffi1/test_ffi_obj.py b/testing/cffi1/test_ffi_obj.py index 8d96ac5..30b93f1 100644 --- a/testing/cffi1/test_ffi_obj.py +++ b/testing/cffi1/test_ffi_obj.py @@ -496,14 +496,9 @@ def test_init_once_multithread_failure(): time.sleep(0.51) assert seen == ['init!', 'oops'] * 3 -def test_rawstring(): +def test_unpack(): ffi = _cffi1_backend.FFI() p = ffi.new("char[]", b"abc\x00def") - assert ffi.rawstring(p) == b"abc\x00def\x00" - assert ffi.rawstring(p[1:6]) == b"bc\x00de" - p = ffi.new("wchar_t[]", u"abc\x00def") - assert ffi.rawstring(p) == u"abc\x00def\x00" - assert ffi.rawstring(p[1:6]) == u"bc\x00de" - # - py.test.raises(TypeError, ffi.rawstring, "foobar") - py.test.raises(TypeError, ffi.rawstring, p + 1) + assert ffi.unpack(p+1, 7) == b"bc\x00def\x00" + p = ffi.new("int[]", [-123456789]) + assert ffi.unpack(p, 1) == [-123456789] |