diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2020-08-30 12:05:16 +0100 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2020-08-30 12:36:31 +0100 |
commit | 68cfbd8d2f41a793ec226369834e714b35a667d8 (patch) | |
tree | 5d28a802e6a1c5a0afc05a0f1cd5465505478e83 | |
parent | bbe2cca1925873dea538674b96b53b5cef0a148a (diff) | |
download | numpy-68cfbd8d2f41a793ec226369834e714b35a667d8.tar.gz |
MAINT: Use utf8 strings in more of datetime
This fixes an omission in a previous patch that did not allow μs in datetime_as_string.
This also will very slightly speed up most uses, since all but very unusual code will be passing in `str` not `bytes`.
-rw-r--r-- | numpy/core/src/multiarray/datetime_busday.c | 20 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime_busdaycal.c | 25 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime_strings.c | 43 | ||||
-rw-r--r-- | numpy/core/tests/test_datetime.py | 5 |
4 files changed, 49 insertions, 44 deletions
diff --git a/numpy/core/src/multiarray/datetime_busday.c b/numpy/core/src/multiarray/datetime_busday.c index d3cce8a37..5acba600a 100644 --- a/numpy/core/src/multiarray/datetime_busday.c +++ b/numpy/core/src/multiarray/datetime_busday.c @@ -834,24 +834,24 @@ static int PyArray_BusDayRollConverter(PyObject *roll_in, NPY_BUSDAY_ROLL *roll) { PyObject *obj = roll_in; - char *str; - Py_ssize_t len; - /* Make obj into an ASCII string */ - Py_INCREF(obj); - if (PyUnicode_Check(obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(obj); + /* Make obj into an UTF8 string */ + if (PyBytes_Check(obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { Py_DECREF(obj); return 0; } - Py_DECREF(obj); obj = obj_str; } + else { + Py_INCREF(obj); + } - if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) { + Py_ssize_t len; + char const *str = PyUnicode_AsUTF8AndSize(obj, &len); + if (str == NULL) { Py_DECREF(obj); return 0; } diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c index 2374eaa63..906e3b539 100644 --- a/numpy/core/src/multiarray/datetime_busdaycal.c +++ b/numpy/core/src/multiarray/datetime_busdaycal.c @@ -30,33 +30,32 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) { PyObject *obj = weekmask_in; - /* Make obj into an ASCII string if it is UNICODE */ - Py_INCREF(obj); - if (PyUnicode_Check(obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(obj); + /* Make obj into an UTF8 string */ + if (PyBytes_Check(obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { Py_DECREF(obj); return 0; } - Py_DECREF(obj); obj = obj_str; } + else { + Py_INCREF(obj); + } + if (PyBytes_Check(obj)) { - char *str; Py_ssize_t len; - int i; - - if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) { + char const *str = PyUnicode_AsUTF8AndSize(obj, &len); + if (str == NULL) { Py_DECREF(obj); return 0; } /* Length 7 is a string like "1111100" */ if (len == 7) { - for (i = 0; i < 7; ++i) { + for (int i = 0; i < 7; ++i) { switch(str[i]) { case '0': weekmask[i] = 0; @@ -75,7 +74,7 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) general_weekmask_string: /* a string like "SatSun" or "Mon Tue Wed" */ memset(weekmask, 0, 7); - for (i = 0; i < len; i += 3) { + for (int i = 0; i < len; i += 3) { while (isspace(str[i])) ++i; diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c index f847c7ea8..49c7e81eb 100644 --- a/numpy/core/src/multiarray/datetime_strings.c +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -1385,21 +1385,23 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, /* Parse the input unit if provided */ if (unit_in != NULL && unit_in != Py_None) { PyObject *strobj; - char *str = NULL; - Py_ssize_t len = 0; - if (PyUnicode_Check(unit_in)) { - strobj = PyUnicode_AsASCIIString(unit_in); - if (strobj == NULL) { - goto fail; + if (PyBytes_Check(unit_in)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(unit_in, NULL, NULL); + if (obj_str == NULL) { + return 0; } + strobj = obj_str; } else { + Py_INCREF(unit_in); strobj = unit_in; - Py_INCREF(strobj); } - if (PyBytes_AsStringAndSize(strobj, &str, &len) < 0) { + Py_ssize_t len = 0; + char *str = PyUnicode_AsUTF8AndSize(strobj, &len); + if (str == NULL) { Py_DECREF(strobj); goto fail; } @@ -1434,24 +1436,27 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, /* Get the input time zone */ if (timezone_obj != NULL) { - /* Convert to ASCII if it's unicode */ - if (PyUnicode_Check(timezone_obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(timezone_obj); + PyObject *strobj; + if (PyBytes_Check(timezone_obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(timezone_obj, NULL, NULL); if (obj_str == NULL) { goto fail; } - Py_DECREF(timezone_obj); - timezone_obj = obj_str; + strobj = obj_str; } + else { + Py_INCREF(timezone_obj); + strobj = unit_in; + } + + Py_SETREF(timezone_obj, strobj); /* Check for the supported string inputs */ - if (PyBytes_Check(timezone_obj)) { - char *str; + if (PyUnicode_Check(timezone_obj)) { Py_ssize_t len; - - if (PyBytes_AsStringAndSize(timezone_obj, &str, &len) < 0) { + char const *str = PyUnicode_AsUTF8AndSize(timezone_obj, &len); + if (str == NULL) { goto fail; } diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index f725091c5..f58cf307a 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -1654,8 +1654,9 @@ class TestDateTime: '1959-10-13T12:34:56') assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ms')), '1959-10-13T12:34:56.789') - assert_equal(np.datetime_as_string(np.datetime64(datetime, 'us')), - '1959-10-13T12:34:56.789012') + for us in ['us', 'μs', b'us']: # check non-ascii and bytes too + assert_equal(np.datetime_as_string(np.datetime64(datetime, us)), + '1959-10-13T12:34:56.789012') datetime = '1969-12-31T23:34:56.789012345678901234' |