diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 64 |
1 files changed, 41 insertions, 23 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 18b9458721..276547ca48 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12309,31 +12309,22 @@ unicode_isnumeric_impl(PyObject *self) Py_RETURN_TRUE; } -int -PyUnicode_IsIdentifier(PyObject *self) +Py_ssize_t +_PyUnicode_ScanIdentifier(PyObject *self) { Py_ssize_t i; - int ready = PyUnicode_IS_READY(self); + if (PyUnicode_READY(self) == -1) + return -1; - Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self); + Py_ssize_t len = PyUnicode_GET_LENGTH(self); if (len == 0) { /* an empty string is not a valid identifier */ return 0; } - int kind = 0; - const void *data = NULL; - const wchar_t *wstr = NULL; - Py_UCS4 ch; - if (ready) { - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - ch = PyUnicode_READ(kind, data, 0); - } - else { - wstr = _PyUnicode_WSTR(self); - ch = wstr[0]; - } + int kind = PyUnicode_KIND(self); + const void *data = PyUnicode_DATA(self); + Py_UCS4 ch = PyUnicode_READ(kind, data, 0); /* PEP 3131 says that the first character must be in XID_Start and subsequent characters in XID_Continue, and for the ASCII range, the 2.x rules apply (i.e @@ -12347,17 +12338,44 @@ PyUnicode_IsIdentifier(PyObject *self) } for (i = 1; i < len; i++) { - if (ready) { - ch = PyUnicode_READ(kind, data, i); + ch = PyUnicode_READ(kind, data, i); + if (!_PyUnicode_IsXidContinue(ch)) { + return i; } - else { - ch = wstr[i]; + } + return i; +} + +int +PyUnicode_IsIdentifier(PyObject *self) +{ + if (PyUnicode_IS_READY(self)) { + Py_ssize_t i = _PyUnicode_ScanIdentifier(self); + Py_ssize_t len = PyUnicode_GET_LENGTH(self); + /* an empty string is not a valid identifier */ + return len && i == len; + } + else { + Py_ssize_t i, len = PyUnicode_GET_SIZE(self); + if (len == 0) { + /* an empty string is not a valid identifier */ + return 0; } - if (!_PyUnicode_IsXidContinue(ch)) { + + const wchar_t *wstr = _PyUnicode_WSTR(self); + Py_UCS4 ch = wstr[0]; + if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) { return 0; } + + for (i = 1; i < len; i++) { + ch = wstr[i]; + if (!_PyUnicode_IsXidContinue(ch)) { + return 0; + } + } + return 1; } - return 1; } /*[clinic input] |