summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2020-08-30 12:05:16 +0100
committerEric Wieser <wieser.eric@gmail.com>2020-08-30 12:36:31 +0100
commit68cfbd8d2f41a793ec226369834e714b35a667d8 (patch)
tree5d28a802e6a1c5a0afc05a0f1cd5465505478e83
parentbbe2cca1925873dea538674b96b53b5cef0a148a (diff)
downloadnumpy-68cfbd8d2f41a793ec226369834e714b35a667d8.tar.gz
MAINT: Use utf8 strings in more of datetime
This fixes an omission in a previous patch that did not allow μs in datetime_as_string. This also will very slightly speed up most uses, since all but very unusual code will be passing in `str` not `bytes`.
-rw-r--r--numpy/core/src/multiarray/datetime_busday.c20
-rw-r--r--numpy/core/src/multiarray/datetime_busdaycal.c25
-rw-r--r--numpy/core/src/multiarray/datetime_strings.c43
-rw-r--r--numpy/core/tests/test_datetime.py5
4 files changed, 49 insertions, 44 deletions
diff --git a/numpy/core/src/multiarray/datetime_busday.c b/numpy/core/src/multiarray/datetime_busday.c
index d3cce8a37..5acba600a 100644
--- a/numpy/core/src/multiarray/datetime_busday.c
+++ b/numpy/core/src/multiarray/datetime_busday.c
@@ -834,24 +834,24 @@ static int
PyArray_BusDayRollConverter(PyObject *roll_in, NPY_BUSDAY_ROLL *roll)
{
PyObject *obj = roll_in;
- char *str;
- Py_ssize_t len;
- /* Make obj into an ASCII string */
- Py_INCREF(obj);
- if (PyUnicode_Check(obj)) {
- /* accept unicode input */
- PyObject *obj_str;
- obj_str = PyUnicode_AsASCIIString(obj);
+ /* Make obj into an UTF8 string */
+ if (PyBytes_Check(obj)) {
+ /* accept bytes input */
+ PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL);
if (obj_str == NULL) {
Py_DECREF(obj);
return 0;
}
- Py_DECREF(obj);
obj = obj_str;
}
+ else {
+ Py_INCREF(obj);
+ }
- if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) {
+ Py_ssize_t len;
+ char const *str = PyUnicode_AsUTF8AndSize(obj, &len);
+ if (str == NULL) {
Py_DECREF(obj);
return 0;
}
diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c
index 2374eaa63..906e3b539 100644
--- a/numpy/core/src/multiarray/datetime_busdaycal.c
+++ b/numpy/core/src/multiarray/datetime_busdaycal.c
@@ -30,33 +30,32 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask)
{
PyObject *obj = weekmask_in;
- /* Make obj into an ASCII string if it is UNICODE */
- Py_INCREF(obj);
- if (PyUnicode_Check(obj)) {
- /* accept unicode input */
- PyObject *obj_str;
- obj_str = PyUnicode_AsASCIIString(obj);
+ /* Make obj into an UTF8 string */
+ if (PyBytes_Check(obj)) {
+ /* accept bytes input */
+ PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL);
if (obj_str == NULL) {
Py_DECREF(obj);
return 0;
}
- Py_DECREF(obj);
obj = obj_str;
}
+ else {
+ Py_INCREF(obj);
+ }
+
if (PyBytes_Check(obj)) {
- char *str;
Py_ssize_t len;
- int i;
-
- if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) {
+ char const *str = PyUnicode_AsUTF8AndSize(obj, &len);
+ if (str == NULL) {
Py_DECREF(obj);
return 0;
}
/* Length 7 is a string like "1111100" */
if (len == 7) {
- for (i = 0; i < 7; ++i) {
+ for (int i = 0; i < 7; ++i) {
switch(str[i]) {
case '0':
weekmask[i] = 0;
@@ -75,7 +74,7 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask)
general_weekmask_string:
/* a string like "SatSun" or "Mon Tue Wed" */
memset(weekmask, 0, 7);
- for (i = 0; i < len; i += 3) {
+ for (int i = 0; i < len; i += 3) {
while (isspace(str[i]))
++i;
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
index f847c7ea8..49c7e81eb 100644
--- a/numpy/core/src/multiarray/datetime_strings.c
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -1385,21 +1385,23 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args,
/* Parse the input unit if provided */
if (unit_in != NULL && unit_in != Py_None) {
PyObject *strobj;
- char *str = NULL;
- Py_ssize_t len = 0;
- if (PyUnicode_Check(unit_in)) {
- strobj = PyUnicode_AsASCIIString(unit_in);
- if (strobj == NULL) {
- goto fail;
+ if (PyBytes_Check(unit_in)) {
+ /* accept bytes input */
+ PyObject *obj_str = PyUnicode_FromEncodedObject(unit_in, NULL, NULL);
+ if (obj_str == NULL) {
+ return 0;
}
+ strobj = obj_str;
}
else {
+ Py_INCREF(unit_in);
strobj = unit_in;
- Py_INCREF(strobj);
}
- if (PyBytes_AsStringAndSize(strobj, &str, &len) < 0) {
+ Py_ssize_t len = 0;
+ char *str = PyUnicode_AsUTF8AndSize(strobj, &len);
+ if (str == NULL) {
Py_DECREF(strobj);
goto fail;
}
@@ -1434,24 +1436,27 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args,
/* Get the input time zone */
if (timezone_obj != NULL) {
- /* Convert to ASCII if it's unicode */
- if (PyUnicode_Check(timezone_obj)) {
- /* accept unicode input */
- PyObject *obj_str;
- obj_str = PyUnicode_AsASCIIString(timezone_obj);
+ PyObject *strobj;
+ if (PyBytes_Check(timezone_obj)) {
+ /* accept bytes input */
+ PyObject *obj_str = PyUnicode_FromEncodedObject(timezone_obj, NULL, NULL);
if (obj_str == NULL) {
goto fail;
}
- Py_DECREF(timezone_obj);
- timezone_obj = obj_str;
+ strobj = obj_str;
}
+ else {
+ Py_INCREF(timezone_obj);
+ strobj = unit_in;
+ }
+
+ Py_SETREF(timezone_obj, strobj);
/* Check for the supported string inputs */
- if (PyBytes_Check(timezone_obj)) {
- char *str;
+ if (PyUnicode_Check(timezone_obj)) {
Py_ssize_t len;
-
- if (PyBytes_AsStringAndSize(timezone_obj, &str, &len) < 0) {
+ char const *str = PyUnicode_AsUTF8AndSize(timezone_obj, &len);
+ if (str == NULL) {
goto fail;
}
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index f725091c5..f58cf307a 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1654,8 +1654,9 @@ class TestDateTime:
'1959-10-13T12:34:56')
assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ms')),
'1959-10-13T12:34:56.789')
- assert_equal(np.datetime_as_string(np.datetime64(datetime, 'us')),
- '1959-10-13T12:34:56.789012')
+ for us in ['us', 'μs', b'us']: # check non-ascii and bytes too
+ assert_equal(np.datetime_as_string(np.datetime64(datetime, us)),
+ '1959-10-13T12:34:56.789012')
datetime = '1969-12-31T23:34:56.789012345678901234'