Optimize bytes.fromhex() and bytearray.fromhex()

Issue #25401: Optimize bytes.fromhex() and bytearray.fromhex(): they are now between 2x and 3.5x faster. Changes: * Use a fast-path working on a char* string for ASCII string * Use a slow-path for non-ASCII string * Replace slow hex_digit_to_int() function with a O(1) lookup in _PyLong_DigitValue precomputed table * Use _PyBytesWriter API to handle the buffer * Add unit tests to check the error position in error messages
author: Victor Stinner <victor.stinner@gmail.com> 2015-10-14 11:25:33 +0200
committer: Victor Stinner <victor.stinner@gmail.com> 2015-10-14 11:25:33 +0200
commit: 2bf8993db966256d564d87865ceddf0e33c02500 (patch)
tree: 8b172dcec9ee6d9584c75ecc933b418b5210963b /Objects/bytearrayobject.c
parent: ebcf9edc05c03af38c01d8aeb05494b68169756c (diff)
download: cpython-git-2bf8993db966256d564d87865ceddf0e33c02500.tar.gz
1 files changed, 1 insertions, 42 deletions
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index e535bce8d7..b270fcccc6 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2823,48 +2823,7 @@ static PyObject *
 bytearray_fromhex_impl(PyObject*cls, PyObject *string)
 /*[clinic end generated code: output=df3da60129b3700c input=907bbd2d34d9367a]*/
 {
-    PyObject *newbytes;
-    char *buf;
-    Py_ssize_t hexlen, byteslen, i, j;
-    int top, bot;
-    void *data;
-    unsigned int kind;
-
-    assert(PyUnicode_Check(string));
-    if (PyUnicode_READY(string))
-        return NULL;
-    kind = PyUnicode_KIND(string);
-    data = PyUnicode_DATA(string);
-    hexlen = PyUnicode_GET_LENGTH(string);
-
-    byteslen = hexlen/2; /* This overestimates if there are spaces */
-    newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
-    if (!newbytes)
-        return NULL;
-    buf = PyByteArray_AS_STRING(newbytes);
-    for (i = j = 0; i < hexlen; i += 2) {
-        /* skip over spaces in the input */
-        while (PyUnicode_READ(kind, data, i) == ' ')
-            i++;
-        if (i >= hexlen)
-            break;
-        top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
-        bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
-        if (top == -1 || bot == -1) {
-            PyErr_Format(PyExc_ValueError,
-                         "non-hexadecimal number found in "
-                         "fromhex() arg at position %zd", i);
-            goto error;
-        }
-        buf[j++] = (top << 4) + bot;
-    }
-    if (PyByteArray_Resize(newbytes, j) < 0)
-        goto error;
-    return newbytes;
-
-  error:
-    Py_DECREF(newbytes);
-    return NULL;
+    return _PyBytes_FromHex(string, 1);
 }
 
 PyDoc_STRVAR(hex__doc__,
author	Victor Stinner <victor.stinner@gmail.com>	2015-10-14 11:25:33 +0200
committer	Victor Stinner <victor.stinner@gmail.com>	2015-10-14 11:25:33 +0200
commit	2bf8993db966256d564d87865ceddf0e33c02500 (patch)
tree	8b172dcec9ee6d9584c75ecc933b418b5210963b /Objects/bytearrayobject.c
parent	ebcf9edc05c03af38c01d8aeb05494b68169756c (diff)
download	cpython-git-2bf8993db966256d564d87865ceddf0e33c02500.tar.gz