Remove again ffi.rawstring(), and implement instead ffi.unpack().

Pre-documentation notes: (hi Amaury :-) * ffi.unpack(<cdata 'char'>, n) == ffi.buffer(<cdata 'char'>, n)[:] but I hope it is a little bit more natural * ffi.unpack(<cdata 'wchar_t'>, n): this is the original motivation, because it has no previous equivalent * ffi.unpack(<cdata 'int'>, n) == list(<cdata 'int'>[0:n]) but should be much faster on CPython
author: Armin Rigo <arigo@tunes.org> 2016-04-16 23:28:14 +0200
committer: Armin Rigo <arigo@tunes.org> 2016-04-16 23:28:14 +0200
commit: 4b765c16d4f60f46084abd74341e79267be751d0 (patch)
tree: dc5b35a54a20ec09fb4622d7251ba7783e7a60be
parent: 745ca71e9cda78085c3df98e9e526d34719e8100 (diff)
download: cffi-4b765c16d4f60f46084abd74341e79267be751d0.tar.gz
6 files changed, 175 insertions, 68 deletions
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
index 288e5b5..24b0f7a 100644
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -5582,37 +5582,118 @@ static PyObject *b_string(PyObject *self, PyObject *args, PyObject *kwds)
     return NULL;
 }
 
-static PyObject *b_rawstring(PyObject *self, PyObject *arg)
+static PyObject *b_unpack(PyObject *self, PyObject *args, PyObject *kwds)
 {
     CDataObject *cd;
     CTypeDescrObject *ctitem;
-    Py_ssize_t length;
+    Py_ssize_t i, length, itemsize, best_alignment;
+    PyObject *result;
+    char *src;
+    int casenum;
+    static char *keywords[] = {"cdata", "length", NULL};
 
-    if (!CData_Check(arg)) {
-        PyErr_SetString(PyExc_TypeError, "expected a 'cdata' object");
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!n:unpack", keywords,
+                                     &CData_Type, &cd, &length))
         return NULL;
-    }
-    cd = (CDataObject *)arg;
+
     ctitem = cd->c_type->ct_itemdescr;
-    if ((cd->c_type->ct_flags & CT_ARRAY) &&
-        (ctitem->ct_flags & (CT_PRIMITIVE_CHAR |
-                             CT_PRIMITIVE_SIGNED |
-                             CT_PRIMITIVE_UNSIGNED))) {
-        length = get_array_length(cd);
+    if (!(cd->c_type->ct_flags & (CT_ARRAY|CT_POINTER)) ||
+        !(ctitem->ct_flags & CT_PRIMITIVE_ANY)) {
+        PyErr_Format(PyExc_TypeError,
+                     "expected a pointer to a primitive type, got '%s'",
+                     cd->c_type->ct_name);
+        return NULL;
+    }
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError, "'length' cannot be negative");
+        return NULL;
+    }
+    if (cd->c_data == NULL) {
+        PyObject *s = cdata_repr(cd);
+        if (s != NULL) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "cannot use unpack() on %s",
+                         PyText_AS_UTF8(s));
+            Py_DECREF(s);
+        }
+        return NULL;
+    }
+
+    /* byte- and unicode strings */
+    if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
         if (ctitem->ct_size == sizeof(char))
             return PyBytes_FromStringAndSize(cd->c_data, length);
 #ifdef HAVE_WCHAR_H
-        else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
-            assert(ctitem->ct_size == sizeof(wchar_t));
+        else if (ctitem->ct_size == sizeof(wchar_t))
             return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
-        }
 #endif
     }
-    PyErr_Format(PyExc_TypeError,
-                 "expected a 'char[]' or 'uint8_t[]' or 'int8_t[]' "
-                 "or 'wchar_t[]', got '%s'",
-                 cd->c_type->ct_name);
-    return NULL;
+
+    /* else, the result is a list.  This implementation should be
+       equivalent to, but on CPython much faster than, 'list(p[0:length])'.
+    */
+    result = PyList_New(length); if (result == NULL) return NULL;
+
+    src = cd->c_data;
+    itemsize = ctitem->ct_size;
+    best_alignment = ctitem->ct_length;
+
+    casenum = -1;
+    if ((best_alignment & (best_alignment - 1)) == 0 &&
+        (((uintptr_t)src) & (best_alignment - 1)) == 0) {
+        /* Source data is fully aligned; we can directly read without
+           memcpy().  The unaligned case is expected to be rare; in
+           this situation it is ok to fall back to the general
+           convert_to_object() in the loop.  For now we also use this
+           fall-back for types that are too large.
+        */
+        if (ctitem->ct_flags & CT_PRIMITIVE_SIGNED) {
+            if (itemsize == sizeof(long))             casenum = 3;
+            else if (itemsize == sizeof(int))         casenum = 2;
+            else if (itemsize == sizeof(short))       casenum = 1;
+            else if (itemsize == sizeof(signed char)) casenum = 0;
+        }
+        else if (ctitem->ct_flags & CT_PRIMITIVE_UNSIGNED) {
+            /* Note: we never pick case 6 if sizeof(int) == sizeof(long),
+               so that case 6 below can assume that the 'unsigned int' result
+               would always fit in a 'signed long'. */
+            if      (itemsize == sizeof(unsigned long))  casenum = 7;
+            else if (itemsize == sizeof(unsigned int))   casenum = 6;
+            else if (itemsize == sizeof(unsigned short)) casenum = 5;
+            else if (itemsize == sizeof(unsigned char))  casenum = 4;
+        }
+        else if (ctitem->ct_flags & CT_PRIMITIVE_FLOAT) {
+            if      (itemsize == sizeof(double)) casenum = 9;
+            else if (itemsize == sizeof(float))  casenum = 8;
+        }
+    }
+
+    for (i = 0; i < length; i++) {
+        PyObject *x;
+        switch (casenum) {
+            /* general case */
+        default: x = convert_to_object(src, ctitem); break;
+
+            /* special cases for performance only */
+        case 0: x = PyInt_FromLong(*(signed char *)src); break;
+        case 1: x = PyInt_FromLong(*(short *)src); break;
+        case 2: x = PyInt_FromLong(*(int *)src); break;
+        case 3: x = PyInt_FromLong(*(long *)src); break;
+        case 4: x = PyInt_FromLong(*(unsigned char *)src); break;
+        case 5: x = PyInt_FromLong(*(unsigned short *)src); break;
+        case 6: x = PyInt_FromLong((long)*(unsigned int *)src); break;
+        case 7: x = PyLong_FromUnsignedLong(*(unsigned long *)src); break;
+        case 8: x = PyFloat_FromDouble(*(float *)src); break;
+        case 9: x = PyFloat_FromDouble(*(double *)src); break;
+        }
+        if (x == NULL) {
+            Py_DECREF(result);
+            return NULL;
+        }
+        PyList_SET_ITEM(result, i, x);
+        src += itemsize;
+    }
+    return result;
 }
 
 static PyObject *b_buffer(PyObject *self, PyObject *args, PyObject *kwds)
@@ -6258,7 +6339,7 @@ static PyMethodDef FFIBackendMethods[] = {
     {"rawaddressof", b_rawaddressof, METH_VARARGS},
     {"getcname", b_getcname, METH_VARARGS},
     {"string", (PyCFunction)b_string, METH_VARARGS | METH_KEYWORDS},
-    {"rawstring", b_rawstring, METH_O},
+    {"unpack", (PyCFunction)b_unpack, METH_VARARGS | METH_KEYWORDS},
     {"buffer", (PyCFunction)b_buffer, METH_VARARGS | METH_KEYWORDS},
     {"get_errno", b_get_errno, METH_NOARGS},
     {"set_errno", b_set_errno, METH_O},
diff --git a/c/ffi_obj.c b/c/ffi_obj.c
index dcb72b6..ff6f946 100644
--- a/c/ffi_obj.c
+++ b/c/ffi_obj.c
@@ -459,18 +459,21 @@ PyDoc_STRVAR(ffi_string_doc,
 #define ffi_string  b_string     /* ffi_string() => b_string()
                                     from _cffi_backend.c */
 
-PyDoc_STRVAR(ffi_rawstring_doc,
-"Convert a cdata that is an array of 'char' or 'wchar_t' to\n"
-"a byte or unicode string.  Unlike ffi.string(), it does not stop\n"
-"at the first null.\n"
+PyDoc_STRVAR(ffi_unpack_doc,
+"Unpack an array of primitive C data of the given length,\n"
+"returning a Python string/unicode/list.\n"
 "\n"
-"Note that if you have a pointer and an explicit length, you\n"
-"can use 'p[0:length]' to make an array view.  This is similar to\n"
-"the construct 'list(p[0:length])', which returns a list of chars/\n"
-"unichars/ints/floats.");
+"If 'cdata' is a pointer to 'char', returns a byte string.\n"
+"Unlike ffi.string(), it does not stop at the first null.\n"
+"\n"
+"If 'cdata' is a pointer to 'wchar_t', returns a unicode string.\n"
+"'length' is measured in wchar_t's; it is not the size in bytes.\n"
+"\n"
+"If 'cdata' is a pointer to some other integer or floating-point\n"
+"type, returns a list of 'length' integers or floats.");
 
-#define ffi_rawstring  b_rawstring     /* ffi_rawstring() => b_rawstring()
-                                          from _cffi_backend.c */
+#define ffi_unpack  b_unpack     /* ffi_unpack() => b_unpack()
+                                    from _cffi_backend.c */
 
 PyDoc_STRVAR(ffi_buffer_doc,
 "Return a read-write buffer object that references the raw C data\n"
@@ -1103,10 +1106,10 @@ static PyMethodDef ffi_methods[] = {
 {"new_allocator",(PyCFunction)ffi_new_allocator,METH_VKW,ffi_new_allocator_doc},
  {"new_handle", (PyCFunction)ffi_new_handle, METH_O,       ffi_new_handle_doc},
  {"offsetof",   (PyCFunction)ffi_offsetof,   METH_VARARGS, ffi_offsetof_doc},
- {"rawstring",  (PyCFunction)ffi_rawstring,  METH_O,       ffi_rawstring_doc},
  {"sizeof",     (PyCFunction)ffi_sizeof,     METH_O,       ffi_sizeof_doc},
  {"string",     (PyCFunction)ffi_string,     METH_VKW,     ffi_string_doc},
  {"typeof",     (PyCFunction)ffi_typeof,     METH_O,       ffi_typeof_doc},
+ {"unpack",     (PyCFunction)ffi_unpack,     METH_VKW,     ffi_unpack_doc},
  {NULL}
 };
 
diff --git a/c/test_c.py b/c/test_c.py
index c7cf0e8..e44d430 100644
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -3526,21 +3526,48 @@ def test_get_common_types():
     _get_common_types(d)
     assert d['bool'] == '_Bool'
 
-def test_rawstring():
+def test_unpack():
     BChar = new_primitive_type("char")
     BArray = new_array_type(new_pointer_type(BChar), 10)   # char[10]
     p = newp(BArray, b"abc\x00def")
-    assert rawstring(p) == b"abc\x00def\x00\x00\x00"
-    assert rawstring(p[1:6]) == b"bc\x00de"
+    p0 = p
+    assert unpack(p, 10) == b"abc\x00def\x00\x00\x00"
+    assert unpack(p+1, 5) == b"bc\x00de"
     BWChar = new_primitive_type("wchar_t")
     BArray = new_array_type(new_pointer_type(BWChar), 10)   # wchar_t[10]
     p = newp(BArray, u"abc\x00def")
-    assert rawstring(p) == u"abc\x00def\x00\x00\x00"
-    assert rawstring(p[1:6]) == u"bc\x00de"
-    BChar = new_primitive_type("uint8_t")
-    BArray = new_array_type(new_pointer_type(BChar), 10)   # uint8_t[10]
-    p = newp(BArray, [65 + i for i in range(10)])
-    assert rawstring(p) == b"ABCDEFGHIJ"
-    #
-    py.test.raises(TypeError, rawstring, "foobar")
-    py.test.raises(TypeError, rawstring, p + 1)
+    assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
+
+    for typename, samples in [
+            ("uint8_t",  [0, 2**8-1]),
+            ("uint16_t", [0, 2**16-1]),
+            ("uint32_t", [0, 2**32-1]),
+            ("uint64_t", [0, 2**64-1]),
+            ("int8_t",  [-2**7, 2**7-1]),
+            ("int16_t", [-2**15, 2**15-1]),
+            ("int32_t", [-2**31, 2**31-1]),
+            ("int64_t", [-2**63, 2**63-1]),
+            ("_Bool", [0, 1]),
+            ("float", [0.0, 10.5]),
+            ("double", [12.34, 56.78]),
+            ]:
+        BItem = new_primitive_type(typename)
+        BArray = new_array_type(new_pointer_type(BItem), 10)
+        p = newp(BArray, samples)
+        result = unpack(p, len(samples))
+        assert result == samples
+        for i in range(len(samples)):
+            assert result[i] == p[i] and type(result[i]) is type(p[i])
+    #
+    BInt = new_primitive_type("int")
+    py.test.raises(TypeError, unpack, p)
+    py.test.raises(TypeError, unpack, b"foobar", 6)
+    py.test.raises(TypeError, unpack, cast(BInt, 42), 1)
+    BFunc = new_function_type((BInt, BInt), BInt, False)
+    py.test.raises(TypeError, unpack, cast(new_pointer_type(BFunc), 42), 1)
+    #
+    py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 0)
+    py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 10)
+    #
+    py.test.raises(ValueError, unpack, p0, -1)
+    py.test.raises(ValueError, unpack, p, -1)
diff --git a/cffi/api.py b/cffi/api.py
index 41aea9b..7c6ad28 100644
--- a/cffi/api.py
+++ b/cffi/api.py
@@ -299,17 +299,20 @@ class FFI(object):
         """
         return self._backend.string(cdata, maxlen)
 
-    def rawstring(self, cdata):
-        """Convert a cdata that is an array of 'char' or 'wchar_t' to
-        a byte or unicode string.  Unlike ffi.string(), it does not stop
-        at the first null.
-
-        Note that if you have a pointer and an explicit length, you
-        can use 'p[0:length]' to make an array view.  This is similar to
-        the construct 'list(p[0:length])', which returns a list of chars/
-        unichars/ints/floats.
+    def unpack(self, cdata, length):
+        """Unpack an array of primitive C data of the given length,
+        returning a Python string/unicode/list.
+
+        If 'cdata' is a pointer to 'char', returns a byte string.
+        Unlike ffi.string(), it does not stop at the first null.
+
+        If 'cdata' is a pointer to 'wchar_t', returns a unicode string.
+        'length' is measured in wchar_t's; it is not the size in bytes.
+
+        If 'cdata' is a pointer to some other integer or floating-point
+        type, returns a list of 'length' integers or floats.
         """
-        return self._backend.rawstring(cdata)
+        return self._backend.unpack(cdata, length)
 
     def buffer(self, cdata, size=-1):
         """Return a read-write buffer object that references the raw C data
diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py
index 6841c23..b96bae4 100644
--- a/testing/cffi0/test_ffi_backend.py
+++ b/testing/cffi0/test_ffi_backend.py
@@ -473,11 +473,9 @@ class TestBitfield:
                                     ['a', 'cc', 'ccc'],
                                     ['aa', 'aaa', 'g'])
 
-    def test_rawstring(self):
+    def test_unpack(self):
         ffi = FFI()
         p = ffi.new("char[]", b"abc\x00def")
-        assert ffi.rawstring(p) == b"abc\x00def\x00"
-        assert ffi.rawstring(p[1:6]) == b"bc\x00de"
-        p = ffi.new("wchar_t[]", u"abc\x00def")
-        assert ffi.rawstring(p) == u"abc\x00def\x00"
-        assert ffi.rawstring(p[1:6]) == u"bc\x00de"
+        assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
+        p = ffi.new("int[]", [-123456789])
+        assert ffi.unpack(p, 1) == [-123456789]
diff --git a/testing/cffi1/test_ffi_obj.py b/testing/cffi1/test_ffi_obj.py
index 8d96ac5..30b93f1 100644
--- a/testing/cffi1/test_ffi_obj.py
+++ b/testing/cffi1/test_ffi_obj.py
@@ -496,14 +496,9 @@ def test_init_once_multithread_failure():
         time.sleep(0.51)
     assert seen == ['init!', 'oops'] * 3
 
-def test_rawstring():
+def test_unpack():
     ffi = _cffi1_backend.FFI()
     p = ffi.new("char[]", b"abc\x00def")
-    assert ffi.rawstring(p) == b"abc\x00def\x00"
-    assert ffi.rawstring(p[1:6]) == b"bc\x00de"
-    p = ffi.new("wchar_t[]", u"abc\x00def")
-    assert ffi.rawstring(p) == u"abc\x00def\x00"
-    assert ffi.rawstring(p[1:6]) == u"bc\x00de"
-    #
-    py.test.raises(TypeError, ffi.rawstring, "foobar")
-    py.test.raises(TypeError, ffi.rawstring, p + 1)
+    assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
+    p = ffi.new("int[]", [-123456789])
+    assert ffi.unpack(p, 1) == [-123456789]
author	Armin Rigo <arigo@tunes.org>	2016-04-16 23:28:14 +0200
committer	Armin Rigo <arigo@tunes.org>	2016-04-16 23:28:14 +0200
commit	4b765c16d4f60f46084abd74341e79267be751d0 (patch)
tree	dc5b35a54a20ec09fb4622d7251ba7783e7a60be
parent	745ca71e9cda78085c3df98e9e526d34719e8100 (diff)
download	cffi-4b765c16d4f60f46084abd74341e79267be751d0.tar.gz