summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c49
1 files changed, 37 insertions, 12 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 38fb3ffc5e..d6fc03e1ae 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6271,9 +6271,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject *
-_PyUnicode_DecodeUnicodeEscape(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
Py_ssize_t size,
const char *errors,
+ Py_ssize_t *consumed,
const char **first_invalid_escape)
{
const char *starts = s;
@@ -6286,6 +6287,9 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
*first_invalid_escape = NULL;
if (size == 0) {
+ if (consumed) {
+ *consumed = 0;
+ }
_Py_RETURN_UNICODE_EMPTY();
}
/* Escaped strings will always be longer than the resulting
@@ -6336,7 +6340,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
/* \ - Escapes */
if (s >= end) {
message = "\\ at end of string";
- goto error;
+ goto incomplete;
}
c = (unsigned char) *s++;
@@ -6390,7 +6394,10 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
count = 8;
message = "truncated \\UXXXXXXXX escape";
hexescape:
- for (ch = 0; count && s < end; ++s, --count) {
+ for (ch = 0; count; ++s, --count) {
+ if (s >= end) {
+ goto incomplete;
+ }
c = (unsigned char)*s;
ch <<= 4;
if (c >= '0' && c <= '9') {
@@ -6403,12 +6410,9 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
ch += c - ('A' - 10);
}
else {
- break;
+ goto error;
}
}
- if (count) {
- goto error;
- }
/* when we get here, ch is a 32-bit unicode character */
if (ch > MAX_UNICODE) {
@@ -6435,14 +6439,20 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
}
message = "malformed \\N character escape";
- if (s < end && *s == '{') {
+ if (s >= end) {
+ goto incomplete;
+ }
+ if (*s == '{') {
const char *start = ++s;
size_t namelen;
/* look for the closing brace */
while (s < end && *s != '}')
s++;
+ if (s >= end) {
+ goto incomplete;
+ }
namelen = s - start;
- if (namelen && s < end) {
+ if (namelen) {
/* found a name. look it up in the unicode database */
s++;
ch = 0xffffffff; /* in case 'getcode' messes up */
@@ -6468,6 +6478,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
continue;
}
+ incomplete:
+ if (consumed) {
+ *consumed = startinpos;
+ break;
+ }
error:
endinpos = s-starts;
writer.min_length = end - s + writer.pos;
@@ -6496,12 +6511,14 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
}
PyObject *
-PyUnicode_DecodeUnicodeEscape(const char *s,
+_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
- const char *errors)
+ const char *errors,
+ Py_ssize_t *consumed)
{
const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ consumed,
&first_invalid_escape);
if (result == NULL)
return NULL;
@@ -6516,6 +6533,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
return result;
}
+PyObject *
+PyUnicode_DecodeUnicodeEscape(const char *s,
+ Py_ssize_t size,
+ const char *errors)
+{
+ return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL);
+}
+
/* Return a Unicode-Escape string version of the Unicode object. */
PyObject *