summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2007-06-10 09:51:05 +0000
committerMartin v. Löwis <martin@v.loewis.de>2007-06-10 09:51:05 +0000
commit9af92ce45f16eb33317b938fa8dfceb8c316d0ce (patch)
tree471235ceb5a75e27ff13a262a14a14a28e2ce998
parent9d4b31b76f07638537a67ad5a9e09f72620abb59 (diff)
downloadcpython-9af92ce45f16eb33317b938fa8dfceb8c316d0ce.tar.gz
Make identifiers str (not str8) objects throughout.
This affects the parser, various object implementations, and all places that put identifiers into C string literals. In testing, a number of crashes occurred as code would fail when the recursion limit was reached (such as the Unicode interning dictionary having key/value pairs where key is not value). To solve these, I added an overflowed flag, which allows for 50 more recursions after the limit was reached and the exception was raised, and a recursion_critical flag, which indicates that recursion absolutely must be allowed, i.e. that a certain call must not cause a stack overflow exception. There are still some places where both str and str8 are accepted as identifiers; these should eventually be removed.
-rw-r--r--Include/ceval.h13
-rw-r--r--Include/pystate.h4
-rw-r--r--Include/stringobject.h4
-rw-r--r--Include/unicodeobject.h36
-rw-r--r--Lib/test/test_frozen.py2
-rw-r--r--Lib/test/test_new.py2
-rw-r--r--Lib/test/test_sys.py12
-rw-r--r--Modules/_codecsmodule.c2
-rw-r--r--Modules/_hotshot.c2
-rw-r--r--Modules/cPickle.c12
-rw-r--r--Modules/gcmodule.c2
-rw-r--r--Modules/unicodedata.c2
-rw-r--r--Objects/abstract.c12
-rw-r--r--Objects/bytesobject.c2
-rw-r--r--Objects/classobject.c10
-rw-r--r--Objects/codeobject.c16
-rw-r--r--Objects/complexobject.c4
-rw-r--r--Objects/descrobject.c7
-rw-r--r--Objects/dictobject.c10
-rw-r--r--Objects/frameobject.c6
-rw-r--r--Objects/funcobject.c4
-rw-r--r--Objects/methodobject.c4
-rw-r--r--Objects/moduleobject.c2
-rw-r--r--Objects/object.c108
-rw-r--r--Objects/stringobject.c12
-rw-r--r--Objects/typeobject.c89
-rw-r--r--Objects/unicodeobject.c97
-rw-r--r--Parser/tokenizer.c15
-rw-r--r--Python/Python-ast.c2
-rw-r--r--Python/ast.c23
-rw-r--r--Python/bltinmodule.c24
-rw-r--r--Python/ceval.c25
-rw-r--r--Python/compile.c86
-rw-r--r--Python/future.c2
-rw-r--r--Python/import.c56
-rw-r--r--Python/modsupport.c4
-rw-r--r--Python/pystate.c2
-rw-r--r--Python/pythonrun.c2
-rw-r--r--Python/symtable.c30
-rw-r--r--Python/sysmodule.c4
40 files changed, 462 insertions, 289 deletions
diff --git a/Include/ceval.h b/Include/ceval.h
index 15b29c6615..c9c59eb17c 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -50,7 +50,10 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
(_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \
_Py_CheckRecursiveCall(where))
#define Py_LeaveRecursiveCall() \
- (--PyThreadState_GET()->recursion_depth)
+ do{ if((--PyThreadState_GET()->recursion_depth) < \
+ _Py_CheckRecursionLimit - 50); \
+ PyThreadState_GET()->overflowed = 0; \
+ } while(0)
PyAPI_FUNC(int) _Py_CheckRecursiveCall(char *where);
PyAPI_DATA(int) _Py_CheckRecursionLimit;
#ifdef USE_STACKCHECK
@@ -59,6 +62,14 @@ PyAPI_DATA(int) _Py_CheckRecursionLimit;
# define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit)
#endif
+#define Py_ALLOW_RECURSION \
+ do { unsigned char _old = PyThreadState_GET()->recursion_critical;\
+ PyThreadState_GET()->recursion_critical = 1;
+
+#define Py_END_ALLOW_RECURSION \
+ PyThreadState_GET()->recursion_critical = _old; \
+ } while(0);
+
PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *);
PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *);
diff --git a/Include/pystate.h b/Include/pystate.h
index 4919d99b6c..0681e6565e 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -61,6 +61,10 @@ typedef struct _ts {
struct _frame *frame;
int recursion_depth;
+ char overflowed; /* The stack has overflowed. Allow 50 more calls
+ to handle the runtime error. */
+ char recursion_critical; /* The current calls must not cause
+ a stack overflow. */
/* 'tracing' keeps track of the execution depth when tracing/profiling.
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
diff --git a/Include/stringobject.h b/Include/stringobject.h
index 2b8cc2fdef..0a932f043a 100644
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -84,8 +84,8 @@ PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
/* Macro, trading safety for speed */
-#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
-#define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size)
+#define PyString_AS_STRING(op) (assert(PyString_Check(op)),(((PyStringObject *)(op))->ob_sval))
+#define PyString_GET_SIZE(op) (assert(PyString_Check(op)),(((PyStringObject *)(op))->ob_size))
/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*,
x must be an iterable object. */
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 2a27dbc0c8..1f6b729b40 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -410,13 +410,13 @@ PyAPI_DATA(PyTypeObject) PyUnicode_Type;
/* Fast access macros */
#define PyUnicode_GET_SIZE(op) \
- (((PyUnicodeObject *)(op))->length)
+ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
#define PyUnicode_GET_DATA_SIZE(op) \
- (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
+ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
#define PyUnicode_AS_UNICODE(op) \
- (((PyUnicodeObject *)(op))->str)
+ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
#define PyUnicode_AS_DATA(op) \
- ((const char *)((PyUnicodeObject *)(op))->str)
+ (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
/* --- Constants ---------------------------------------------------------- */
@@ -627,6 +627,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *);
+/* Return a char* holding the default encoded value of the
+ Unicode object.
+*/
+
+PyAPI_FUNC(char *) PyUnicode_AsString(PyObject*);
+
+
/* Returns the currently active default encoding.
The default encoding is currently implemented as run-time settable
@@ -1193,6 +1200,11 @@ PyAPI_FUNC(int) PyUnicode_Compare(
PyObject *right /* Right string */
);
+PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
+ PyObject *left,
+ const char *right
+ );
+
/* Rich compare two strings and return one of the following:
- NULL in case an exception was raised
@@ -1310,6 +1322,22 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Py_UNICODE ch /* Unicode character */
);
+PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
+ Py_UNICODE *s1, const Py_UNICODE *s2);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
+ Py_UNICODE *s1, const Py_UNICODE *s2, size_t n);
+
+PyAPI_FUNC(int) Py_UNICODE_strcmp(
+ const Py_UNICODE *s1, const Py_UNICODE *s2);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
+ const Py_UNICODE *s, Py_UNICODE c
+ );
+
+
#ifdef __cplusplus
}
#endif
diff --git a/Lib/test/test_frozen.py b/Lib/test/test_frozen.py
index 678b9a8d98..f1299f0d99 100644
--- a/Lib/test/test_frozen.py
+++ b/Lib/test/test_frozen.py
@@ -10,6 +10,8 @@
from test.test_support import TestFailed
import sys, os
+raise TestFailed, "test currently causes assertion in debug mode"
+
try:
import __hello__
except ImportError as x:
diff --git a/Lib/test/test_new.py b/Lib/test/test_new.py
index c919621d7c..797a8c349f 100644
--- a/Lib/test/test_new.py
+++ b/Lib/test/test_new.py
@@ -143,7 +143,7 @@ class NewTest(unittest.TestCase):
firstlineno, lnotab)
# new.code used to be a way to mutate a tuple...
- class S(str8):
+ class S(str):
pass
t = (S("ab"),)
d = new.code(argcount, kwonlyargcount, nlocals, stacksize,
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index b038ff4e13..e72b7f8d58 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -321,12 +321,6 @@ class SysModuleTest(unittest.TestCase):
self.assertRaises(TypeError, sys.intern, S("abc"))
- # It's still safe to pass these strings to routines that
- # call intern internally, e.g. PyObject_SetAttr().
- s = S("abc")
- setattr(s, s, s)
- self.assertEqual(getattr(s, s), s)
-
s = "never interned as unicode before"
self.assert_(sys.intern(s) is s)
s2 = s.swapcase().swapcase()
@@ -338,6 +332,12 @@ class SysModuleTest(unittest.TestCase):
self.assertRaises(TypeError, sys.intern, U("abc"))
+ # It's still safe to pass these strings to routines that
+ # call intern internally, e.g. PyObject_SetAttr().
+ s = U("abc")
+ setattr(s, s, s)
+ self.assertEqual(getattr(s, s), s)
+
def test_main():
test.test_support.run_unittest(SysModuleTest)
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index cd766c3334..de5270d607 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -172,7 +172,7 @@ escape_encode(PyObject *self,
&PyString_Type, &str, &errors))
return NULL;
- size = PyUnicode_GET_SIZE(str);
+ size = PyString_GET_SIZE(str);
newsize = 4*size;
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
PyErr_SetString(PyExc_OverflowError,
diff --git a/Modules/_hotshot.c b/Modules/_hotshot.c
index 21bd383902..fc4a1defd3 100644
--- a/Modules/_hotshot.c
+++ b/Modules/_hotshot.c
@@ -810,7 +810,7 @@ get_fileno(ProfilerObject *self, PyCodeObject *fcode)
PyObject *name = PyDict_GetItem(dict, obj);
if (name == NULL) {
if (pack_define_func(self, fileno, fcode->co_firstlineno,
- PyString_AS_STRING(fcode->co_name)) < 0) {
+ PyUnicode_AsString(fcode->co_name)) < 0) {
Py_DECREF(obj);
return -1;
}
diff --git a/Modules/cPickle.c b/Modules/cPickle.c
index 68990c9fde..a4dff7b6a7 100644
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@@ -1829,8 +1829,8 @@ save_global(Picklerobject *self, PyObject *args, PyObject *name)
(name_size = PyString_Size(global_name)) < 0)
goto finally;
- module_str = PyString_AS_STRING((PyStringObject *)module);
- name_str = PyString_AS_STRING((PyStringObject *)global_name);
+ module_str = PyUnicode_AsString(module);
+ name_str = PyUnicode_AsString(global_name);
/* XXX This can be doing a relative import. Clearly it shouldn't,
but I don't know how to stop it. :-( */
@@ -1842,7 +1842,7 @@ save_global(Picklerobject *self, PyObject *args, PyObject *name)
"OS", args, module);
goto finally;
}
- klass = PyObject_GetAttrString(mod, name_str);
+ klass = PyObject_GetAttr(mod, global_name);
if (klass == NULL) {
cPickle_ErrFormat(PicklingError,
"Can't pickle %s: attribute lookup %s.%s "
@@ -2223,7 +2223,7 @@ save(Picklerobject *self, PyObject *args, int pers_save)
res = save_string(self, args, 0);
goto finally;
}
- if ((type == &PyUnicode_Type) && (PyString_GET_SIZE(args) < 2)) {
+ if ((type == &PyUnicode_Type) && (PyUnicode_GET_SIZE(args) < 2)) {
res = save_unicode(self, args, 0);
goto finally;
}
@@ -3584,7 +3584,7 @@ load_global(Unpicklerobject *self)
Py_DECREF(module_name);
return bad_readline();
}
- if ((class_name = PyString_FromStringAndSize(s, len - 1))) {
+ if ((class_name = PyUnicode_FromStringAndSize(s, len - 1))) {
class = find_class(module_name, class_name,
self->find_class);
Py_DECREF(class_name);
@@ -5379,7 +5379,7 @@ init_stuff(PyObject *module_dict)
{
PyObject *copy_reg, *t, *r;
-#define INIT_STR(S) if (!( S ## _str=PyString_InternFromString(#S))) return -1;
+#define INIT_STR(S) if (!( S ## _str=PyUnicode_InternFromString(#S))) return -1;
if (PyType_Ready(&Unpicklertype) < 0)
return -1;
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 2dd058e4b8..adcdb5f909 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -713,7 +713,7 @@ collect(int generation)
double t1 = 0.0;
if (delstr == NULL) {
- delstr = PyString_InternFromString("__del__");
+ delstr = PyUnicode_InternFromString("__del__");
if (delstr == NULL)
Py_FatalError("gc couldn't allocate \"__del__\"");
}
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index da8af34336..f660046f7f 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -515,7 +515,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
/* Hangul Decomposition adds three characters in
a single step, so we need atleast that much room. */
if (space < 3) {
- Py_ssize_t newsize = PyString_GET_SIZE(result) + 10;
+ Py_ssize_t newsize = PyUnicode_GET_SIZE(result) + 10;
space += 10;
if (PyUnicode_Resize(&result, newsize) == -1)
return NULL;
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 84b3384925..6e638525d7 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -207,7 +207,7 @@ PyObject_DelItemString(PyObject *o, char *key)
null_error();
return -1;
}
- okey = PyString_FromString(key);
+ okey = PyUnicode_FromString(key);
if (okey == NULL)
return -1;
ret = PyObject_DelItem(o, okey);
@@ -1598,7 +1598,7 @@ PyMapping_GetItemString(PyObject *o, char *key)
if (key == NULL)
return null_error();
- okey = PyString_FromString(key);
+ okey = PyUnicode_FromString(key);
if (okey == NULL)
return NULL;
r = PyObject_GetItem(o, okey);
@@ -1617,7 +1617,7 @@ PyMapping_SetItemString(PyObject *o, char *key, PyObject *value)
return -1;
}
- okey = PyString_FromString(key);
+ okey = PyUnicode_FromString(key);
if (okey == NULL)
return -1;
r = PyObject_SetItem(o, okey, value);
@@ -1989,11 +1989,13 @@ abstract_get_bases(PyObject *cls)
PyObject *bases;
if (__bases__ == NULL) {
- __bases__ = PyString_FromString("__bases__");
+ __bases__ = PyUnicode_FromString("__bases__");
if (__bases__ == NULL)
return NULL;
}
+ Py_ALLOW_RECURSION
bases = PyObject_GetAttr(cls, __bases__);
+ Py_END_ALLOW_RECURSION
if (bases == NULL) {
if (PyErr_ExceptionMatches(PyExc_AttributeError))
PyErr_Clear();
@@ -2067,7 +2069,7 @@ recursive_isinstance(PyObject *inst, PyObject *cls, int recursion_depth)
int retval = 0;
if (__class__ == NULL) {
- __class__ = PyString_FromString("__class__");
+ __class__ = PyUnicode_FromString("__class__");
if (__class__ == NULL)
return -1;
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 2a1dbcb0b8..532e63777c 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1078,7 +1078,7 @@ bytes_count(PyBytesObject *self, PyObject *args)
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL;
- _adjust_indices(&start, &end, PyString_GET_SIZE(self));
+ _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
return PyInt_FromSsize_t(
stringlib_count(str + start, end - start, sub, sub_len)
diff --git a/Objects/classobject.c b/Objects/classobject.c
index b7711d56d7..3cf64de870 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -100,7 +100,7 @@ method_get_doc(PyMethodObject *im, void *context)
{
static PyObject *docstr;
if (docstr == NULL) {
- docstr= PyString_InternFromString("__doc__");
+ docstr= PyUnicode_InternFromString("__doc__");
if (docstr == NULL)
return NULL;
}
@@ -235,12 +235,12 @@ method_repr(PyMethodObject *a)
return NULL;
PyErr_Clear();
}
- else if (!PyString_Check(funcname)) {
+ else if (!PyUnicode_Check(funcname)) {
Py_DECREF(funcname);
funcname = NULL;
}
else
- sfuncname = PyString_AS_STRING(funcname);
+ sfuncname = PyUnicode_AsString(funcname);
if (klass == NULL)
klassname = NULL;
else {
@@ -250,12 +250,12 @@ method_repr(PyMethodObject *a)
return NULL;
PyErr_Clear();
}
- else if (!PyString_Check(klassname)) {
+ else if (!PyUnicode_Check(klassname)) {
Py_DECREF(klassname);
klassname = NULL;
}
else
- sklassname = PyString_AS_STRING(klassname);
+ sklassname = PyUnicode_AsString(klassname);
}
if (self == NULL)
result = PyUnicode_FromFormat("<unbound method %s.%s>",
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 6763950249..c7351930da 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -32,10 +32,10 @@ intern_strings(PyObject *tuple)
for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
PyObject *v = PyTuple_GET_ITEM(tuple, i);
- if (v == NULL || !PyString_CheckExact(v)) {
+ if (v == NULL || !PyUnicode_CheckExact(v)) {
Py_FatalError("non-string found in code slot");
}
- PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
+ PyUnicode_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
}
}
@@ -58,7 +58,7 @@ PyCode_New(int argcount, int kwonlyargcount,
varnames == NULL || !PyTuple_Check(varnames) ||
freevars == NULL || !PyTuple_Check(freevars) ||
cellvars == NULL || !PyTuple_Check(cellvars) ||
- name == NULL || !PyString_Check(name) ||
+ name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
filename == NULL || !PyString_Check(filename) ||
lnotab == NULL || !PyString_Check(lnotab) ||
!PyObject_CheckReadBuffer(code)) {
@@ -148,10 +148,10 @@ validate_and_copy_tuple(PyObject *tup)
for (i = 0; i < len; i++) {
item = PyTuple_GET_ITEM(tup, i);
- if (PyString_CheckExact(item)) {
+ if (PyUnicode_CheckExact(item)) {
Py_INCREF(item);
}
- else if (!PyString_Check(item)) {
+ else if (!PyUnicode_Check(item)) {
PyErr_Format(
PyExc_TypeError,
"name tuples must contain only "
@@ -161,9 +161,9 @@ validate_and_copy_tuple(PyObject *tup)
return NULL;
}
else {
- item = PyString_FromStringAndSize(
- PyString_AS_STRING(item),
- PyString_GET_SIZE(item));
+ item = PyUnicode_FromUnicode(
+ PyUnicode_AS_UNICODE(item),
+ PyUnicode_GET_SIZE(item));
if (item == NULL) {
Py_DECREF(newtuple);
return NULL;
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index ed2e475fb9..4580ef2bea 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -269,7 +269,7 @@ PyComplex_AsCComplex(PyObject *op)
{
PyObject *complexfunc;
if (!complex_str) {
- if (!(complex_str = PyString_FromString("__complex__")))
+ if (!(complex_str = PyUnicode_FromString("__complex__")))
return cv;
}
complexfunc = _PyType_Lookup(op->ob_type, complex_str);
@@ -900,7 +900,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
/* XXX Hack to support classes with __complex__ method */
if (complexstr == NULL) {
- complexstr = PyString_InternFromString("__complex__");
+ complexstr = PyUnicode_InternFromString("__complex__");
if (complexstr == NULL)
return NULL;
}
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index e9ccefaf94..acd24007d1 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -15,7 +15,10 @@ descr_dealloc(PyDescrObject *descr)
static char *
descr_name(PyDescrObject *descr)
{
- if (descr->d_name != NULL && PyString_Check(descr->d_name))
+ if (descr->d_name != NULL && PyUnicode_Check(descr->d_name))
+ return PyUnicode_AsString(descr->d_name);
+ else if (descr->d_name != NULL && PyString_Check(descr->d_name))
+ /* XXX this should not happen */
return PyString_AS_STRING(descr->d_name);
else
return "?";
@@ -581,7 +584,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name)
if (descr != NULL) {
Py_XINCREF(type);
descr->d_type = type;
- descr->d_name = PyString_InternFromString(name);
+ descr->d_name = PyUnicode_InternFromString(name);
if (descr->d_name == NULL) {
Py_DECREF(descr);
descr = NULL;
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index b45a664fb7..639c3c5098 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1040,7 +1040,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
static PyObject *missing_str = NULL;
if (missing_str == NULL)
missing_str =
- PyString_InternFromString("__missing__");
+ PyUnicode_InternFromString("__missing__");
missing = _PyType_Lookup(mp->ob_type, missing_str);
if (missing != NULL)
return PyObject_CallFunctionObjArgs(missing,
@@ -2073,7 +2073,7 @@ PyObject *
PyDict_GetItemString(PyObject *v, const char *key)
{
PyObject *kv, *rv;
- kv = PyString_FromString(key);
+ kv = PyUnicode_FromString(key);
if (kv == NULL)
return NULL;
rv = PyDict_GetItem(v, kv);
@@ -2086,10 +2086,10 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
{
PyObject *kv;
int err;
- kv = PyString_FromString(key);
+ kv = PyUnicode_FromString(key);
if (kv == NULL)
return -1;
- PyString_InternInPlace(&kv); /* XXX Should we really? */
+ PyUnicode_InternInPlace(&kv); /* XXX Should we really? */
err = PyDict_SetItem(v, kv, item);
Py_DECREF(kv);
return err;
@@ -2100,7 +2100,7 @@ PyDict_DelItemString(PyObject *v, const char *key)
{
PyObject *kv;
int err;
- kv = PyString_FromString(key);
+ kv = PyUnicode_FromString(key);
if (kv == NULL)
return -1;
err = PyDict_DelItem(v, kv);
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index f780b3abd6..bb27f1c10e 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -542,7 +542,7 @@ static PyObject *builtin_object;
int _PyFrame_Init()
{
- builtin_object = PyString_InternFromString("__builtins__");
+ builtin_object = PyUnicode_InternFromString("__builtins__");
return (builtin_object != NULL);
}
@@ -722,7 +722,7 @@ map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values,
for (j = nmap; --j >= 0; ) {
PyObject *key = PyTuple_GET_ITEM(map, j);
PyObject *value = values[j];
- assert(PyString_Check(key));
+ assert(PyString_Check(key)/*XXX this should go*/ || PyUnicode_Check(key));
if (deref) {
assert(PyCell_Check(value));
value = PyCell_GET(value);
@@ -770,7 +770,7 @@ dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values,
for (j = nmap; --j >= 0; ) {
PyObject *key = PyTuple_GET_ITEM(map, j);
PyObject *value = PyObject_GetItem(dict, key);
- assert(PyString_Check(key));
+ assert(PyUnicode_Check(key));
/* We only care about NULLs if clear is true. */
if (value == NULL) {
PyErr_Clear();
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 6f17e7ac89..ff1b4c87e5 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -322,7 +322,7 @@ func_set_name(PyFunctionObject *op, PyObject *value)
/* Not legal to del f.func_name or to set it to anything
* other than a string object. */
- if (value == NULL || !PyString_Check(value)) {
+ if (value == NULL || (!PyString_Check(value) && !PyUnicode_Check(value))) {
PyErr_SetString(PyExc_TypeError,
"__name__ must be set to a string object");
return -1;
@@ -516,7 +516,7 @@ func_new(PyTypeObject* type, PyObject* args, PyObject* kw)
if (nfree != nclosure)
return PyErr_Format(PyExc_ValueError,
"%s requires closure of length %zd, not %zd",
- PyString_AS_STRING(code->co_name),
+ PyUnicode_AsString(code->co_name),
nfree, nclosure);
if (nclosure) {
Py_ssize_t i;
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
index 6199805239..2d1c688c33 100644
--- a/Objects/methodobject.c
+++ b/Objects/methodobject.c
@@ -143,7 +143,7 @@ meth_get__doc__(PyCFunctionObject *m, void *closure)
static PyObject *
meth_get__name__(PyCFunctionObject *m, void *closure)
{
- return PyString_FromString(m->m_ml->ml_name);
+ return PyUnicode_FromString(m->m_ml->ml_name);
}
static int
@@ -297,7 +297,7 @@ listmethodchain(PyMethodChain *chain)
i = 0;
for (c = chain; c != NULL; c = c->link) {
for (ml = c->methods; ml->ml_name != NULL; ml++) {
- PyList_SetItem(v, i, PyString_FromString(ml->ml_name));
+ PyList_SetItem(v, i, PyUnicode_FromString(ml->ml_name));
i++;
}
}
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index daf24eb2d8..7c5e47f450 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -22,7 +22,7 @@ PyModule_New(const char *name)
m = PyObject_GC_New(PyModuleObject, &PyModule_Type);
if (m == NULL)
return NULL;
- nameobj = PyString_FromString(name);
+ nameobj = PyUnicode_FromString(name);
m->md_dict = PyDict_New();
if (m->md_dict == NULL || nameobj == NULL)
goto fail;
diff --git a/Objects/object.c b/Objects/object.c
index be7d5018fd..c701af007a 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -465,7 +465,7 @@ PyObject_Unicode(PyObject *v)
check this before trying the __unicode__
method. */
if (unicodestr == NULL) {
- unicodestr= PyString_InternFromString("__unicode__");
+ unicodestr= PyUnicode_InternFromString("__unicode__");
if (unicodestr == NULL)
return NULL;
}
@@ -852,7 +852,7 @@ PyObject_GetAttrString(PyObject *v, const char *name)
if (v->ob_type->tp_getattr != NULL)
return (*v->ob_type->tp_getattr)(v, (char*)name);
- w = PyString_InternFromString(name);
+ w = PyUnicode_InternFromString(name);
if (w == NULL)
return NULL;
res = PyObject_GetAttr(v, w);
@@ -880,7 +880,7 @@ PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w)
if (v->ob_type->tp_setattr != NULL)
return (*v->ob_type->tp_setattr)(v, (char*)name, w);
- s = PyString_InternFromString(name);
+ s = PyUnicode_InternFromString(name);
if (s == NULL)
return -1;
res = PyObject_SetAttr(v, s, w);
@@ -893,30 +893,19 @@ PyObject_GetAttr(PyObject *v, PyObject *name)
{
PyTypeObject *tp = v->ob_type;
- if (!PyString_Check(name)) {
- /* The Unicode to string conversion is done here because the
- existing tp_getattro slots expect a string object as name
- and we wouldn't want to break those. */
- if (PyUnicode_Check(name)) {
- name = _PyUnicode_AsDefaultEncodedString(name, NULL);
- if (name == NULL)
- return NULL;
- }
- else
- {
- PyErr_Format(PyExc_TypeError,
- "attribute name must be string, not '%.200s'",
- name->ob_type->tp_name);
- return NULL;
- }
+ if (!PyUnicode_Check(name)) {
+ PyErr_Format(PyExc_TypeError,
+ "attribute name must be string, not '%.200s'",
+ name->ob_type->tp_name);
+ return NULL;
}
if (tp->tp_getattro != NULL)
return (*tp->tp_getattro)(v, name);
if (tp->tp_getattr != NULL)
- return (*tp->tp_getattr)(v, PyString_AS_STRING(name));
+ return (*tp->tp_getattr)(v, PyUnicode_AsString(name));
PyErr_Format(PyExc_AttributeError,
"'%.50s' object has no attribute '%.400s'",
- tp->tp_name, PyString_AS_STRING(name));
+ tp->tp_name, PyUnicode_AsString(name));
return NULL;
}
@@ -938,33 +927,22 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
PyTypeObject *tp = v->ob_type;
int err;
- if (!PyString_Check(name)) {
- /* The Unicode to string conversion is done here because the
- existing tp_setattro slots expect a string object as name
- and we wouldn't want to break those. */
- if (PyUnicode_Check(name)) {
- name = _PyUnicode_AsDefaultEncodedString(name, NULL);
- if (name == NULL)
- return -1;
- }
- else
- {
- PyErr_Format(PyExc_TypeError,
- "attribute name must be string, not '%.200s'",
- name->ob_type->tp_name);
- return -1;
- }
+ if (!PyUnicode_Check(name)) {
+ PyErr_Format(PyExc_TypeError,
+ "attribute name must be string, not '%.200s'",
+ name->ob_type->tp_name);
+ return -1;
}
Py_INCREF(name);
- PyString_InternInPlace(&name);
+ PyUnicode_InternInPlace(&name);
if (tp->tp_setattro != NULL) {
err = (*tp->tp_setattro)(v, name, value);
Py_DECREF(name);
return err;
}
if (tp->tp_setattr != NULL) {
- err = (*tp->tp_setattr)(v, PyString_AS_STRING(name), value);
+ err = (*tp->tp_setattr)(v, PyUnicode_AsString(name), value);
Py_DECREF(name);
return err;
}
@@ -976,14 +954,14 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
"(%s .%.100s)",
tp->tp_name,
value==NULL ? "del" : "assign to",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
else
PyErr_Format(PyExc_TypeError,
"'%.100s' object has only read-only attributes "
"(%s .%.100s)",
tp->tp_name,
value==NULL ? "del" : "assign to",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
return -1;
}
@@ -1033,22 +1011,11 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
Py_ssize_t dictoffset;
PyObject **dictptr;
- if (!PyString_Check(name)){
- /* The Unicode to string conversion is done here because the
- existing tp_setattro slots expect a string object as name
- and we wouldn't want to break those. */
- if (PyUnicode_Check(name)) {
- name = PyUnicode_AsEncodedString(name, NULL, NULL);
- if (name == NULL)
- return NULL;
- }
- else
- {
- PyErr_Format(PyExc_TypeError,
- "attribute name must be string, not '%.200s'",
- name->ob_type->tp_name);
- return NULL;
- }
+ if (!PyUnicode_Check(name)){
+ PyErr_Format(PyExc_TypeError,
+ "attribute name must be string, not '%.200s'",
+ name->ob_type->tp_name);
+ return NULL;
}
else
Py_INCREF(name);
@@ -1134,7 +1101,7 @@ PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
PyErr_Format(PyExc_AttributeError,
"'%.50s' object has no attribute '%.400s'",
- tp->tp_name, PyString_AS_STRING(name));
+ tp->tp_name, PyUnicode_AsString(name));
done:
Py_DECREF(name);
return res;
@@ -1149,22 +1116,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value)
PyObject **dictptr;
int res = -1;
- if (!PyString_Check(name)){
- /* The Unicode to string conversion is done here because the
- existing tp_setattro slots expect a string object as name
- and we wouldn't want to break those. */
- if (PyUnicode_Check(name)) {
- name = PyUnicode_AsEncodedString(name, NULL, NULL);
- if (name == NULL)
- return -1;
- }
- else
- {
- PyErr_Format(PyExc_TypeError,
- "attribute name must be string, not '%.200s'",
- name->ob_type->tp_name);
- return -1;
- }
+ if (!PyUnicode_Check(name)){
+ PyErr_Format(PyExc_TypeError,
+ "attribute name must be string, not '%.200s'",
+ name->ob_type->tp_name);
+ return -1;
}
else
Py_INCREF(name);
@@ -1212,13 +1168,13 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value)
if (descr == NULL) {
PyErr_Format(PyExc_AttributeError,
"'%.100s' object has no attribute '%.200s'",
- tp->tp_name, PyString_AS_STRING(name));
+ tp->tp_name, PyUnicode_AsString(name));
goto done;
}
PyErr_Format(PyExc_AttributeError,
"'%.50s' object attribute '%.400s' is read-only",
- tp->tp_name, PyString_AS_STRING(name));
+ tp->tp_name, PyUnicode_AsString(name));
done:
Py_DECREF(name);
return res;
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 60e6129f23..92bc95bf37 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -686,6 +686,11 @@ string_getbuffer(register PyObject *op)
Py_ssize_t
PyString_Size(register PyObject *op)
{
+ if (PyUnicode_Check(op)) {
+ op = _PyUnicode_AsDefaultEncodedString(op, NULL);
+ if (!op)
+ return -1;
+ }
if (!PyString_Check(op))
return string_getsize(op);
return ((PyStringObject *)op) -> ob_size;
@@ -694,6 +699,11 @@ PyString_Size(register PyObject *op)
/*const*/ char *
PyString_AsString(register PyObject *op)
{
+ if (PyUnicode_Check(op)) {
+ op = _PyUnicode_AsDefaultEncodedString(op, NULL);
+ if (!op)
+ return NULL;
+ }
if (!PyString_Check(op))
return string_getbuffer(op);
return ((PyStringObject *)op) -> ob_sval;
@@ -824,7 +834,7 @@ PyString_Repr(PyObject *obj, int smartquotes)
{
static const char *hexdigits = "0123456789abcdef";
register PyStringObject* op = (PyStringObject*) obj;
- Py_ssize_t length = PyUnicode_GET_SIZE(op);
+ Py_ssize_t length = PyString_GET_SIZE(op);
size_t newsize = 2 + 4 * op->ob_size;
PyObject *v;
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 5983011222..ab86f54c54 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -35,7 +35,7 @@ type_name(PyTypeObject *type, void *context)
s = type->tp_name;
else
s++;
- return PyString_FromString(s);
+ return PyUnicode_FromString(s);
}
}
@@ -97,9 +97,9 @@ type_module(PyTypeObject *type, void *context)
else {
s = strrchr(type->tp_name, '.');
if (s != NULL)
- return PyString_FromStringAndSize(
+ return PyUnicode_FromStringAndSize(
type->tp_name, (Py_ssize_t)(s - type->tp_name));
- return PyString_FromString("__builtin__");
+ return PyUnicode_FromString("__builtin__");
}
}
@@ -371,7 +371,7 @@ type_repr(PyTypeObject *type)
mod = type_module(type, NULL);
if (mod == NULL)
PyErr_Clear();
- else if (!PyString_Check(mod)) {
+ else if (!PyUnicode_Check(mod)) {
Py_DECREF(mod);
mod = NULL;
}
@@ -384,11 +384,11 @@ type_repr(PyTypeObject *type)
else
kind = "type";
- if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__")) {
+ if (mod != NULL && strcmp(PyUnicode_AsString(mod), "__builtin__")) {
rtn = PyUnicode_FromFormat("<%s '%s.%s'>",
kind,
- PyString_AS_STRING(mod),
- PyString_AS_STRING(name));
+ PyUnicode_AsString(mod),
+ PyUnicode_AsString(name));
}
else
rtn = PyUnicode_FromFormat("<%s '%s'>", kind, type->tp_name);
@@ -859,7 +859,7 @@ lookup_maybe(PyObject *self, char *attrstr, PyObject **attrobj)
PyObject *res;
if (*attrobj == NULL) {
- *attrobj = PyString_InternFromString(attrstr);
+ *attrobj = PyUnicode_InternFromString(attrstr);
if (*attrobj == NULL)
return NULL;
}
@@ -1415,7 +1415,7 @@ get_dict_descriptor(PyTypeObject *type)
PyObject *descr;
if (dict_str == NULL) {
- dict_str = PyString_InternFromString("__dict__");
+ dict_str = PyUnicode_InternFromString("__dict__");
if (dict_str == NULL)
return NULL;
}
@@ -1564,14 +1564,14 @@ valid_identifier(PyObject *s)
unsigned char *p;
Py_ssize_t i, n;
- if (!PyString_Check(s)) {
+ if (!PyUnicode_Check(s)) {
PyErr_Format(PyExc_TypeError,
"__slots__ items must be strings, not '%.200s'",
s->ob_type->tp_name);
return 0;
}
- p = (unsigned char *) PyString_AS_STRING(s);
- n = PyString_GET_SIZE(s);
+ p = (unsigned char *) PyUnicode_AsString(s);
+ n = strlen((char*)p)/*XXX PyString_GET_SIZE(s)*/;
/* We must reject an empty name. As a hack, we bump the
length to 1 so that the loop will balk on the trailing \0. */
if (n == 0)
@@ -1792,22 +1792,13 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
return NULL;
}
- tmp = _unicode_to_string(slots, nslots);
- if (tmp == NULL)
- goto bad_slots;
- if (tmp != slots) {
- Py_DECREF(slots);
- slots = tmp;
- }
/* Check for valid slot names and two special cases */
for (i = 0; i < nslots; i++) {
PyObject *tmp = PyTuple_GET_ITEM(slots, i);
- char *s;
if (!valid_identifier(tmp))
goto bad_slots;
- assert(PyString_Check(tmp));
- s = PyString_AS_STRING(tmp);
- if (strcmp(s, "__dict__") == 0) {
+ assert(PyUnicode_Check(tmp));
+ if (PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) {
if (!may_add_dict || add_dict) {
PyErr_SetString(PyExc_TypeError,
"__dict__ slot disallowed: "
@@ -1816,7 +1807,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
}
add_dict++;
}
- if (strcmp(s, "__weakref__") == 0) {
+ if (PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0) {
if (!may_add_weak || add_weak) {
PyErr_SetString(PyExc_TypeError,
"__weakref__ slot disallowed: "
@@ -1836,11 +1827,11 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
if (newslots == NULL)
goto bad_slots;
for (i = j = 0; i < nslots; i++) {
- char *s;
tmp = PyTuple_GET_ITEM(slots, i);
- s = PyString_AS_STRING(tmp);
- if ((add_dict && strcmp(s, "__dict__") == 0) ||
- (add_weak && strcmp(s, "__weakref__") == 0))
+ if ((add_dict &&
+ PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) ||
+ (add_weak &&
+ PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0))
continue;
tmp =_Py_Mangle(name, tmp);
if (!tmp)
@@ -1917,7 +1908,15 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
type->tp_as_sequence = &et->as_sequence;
type->tp_as_mapping = &et->as_mapping;
type->tp_as_buffer = &et->as_buffer;
- type->tp_name = PyString_AS_STRING(name);
+ if (PyString_Check(name))
+ type->tp_name = PyString_AsString(name);
+ else {
+ type->tp_name = PyUnicode_AsString(name);
+ if (!type->tp_name) {
+ Py_DECREF(type);
+ return NULL;
+ }
+ }
/* Set tp_base and tp_bases */
type->tp_bases = bases;
@@ -1980,7 +1979,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
slotoffset = base->tp_basicsize;
if (slots != NULL) {
for (i = 0; i < nslots; i++, mp++) {
- mp->name = PyString_AS_STRING(
+ mp->name = PyUnicode_AsString(
PyTuple_GET_ITEM(slots, i));
mp->type = T_OBJECT_EX;
mp->offset = slotoffset;
@@ -2157,7 +2156,7 @@ type_getattro(PyTypeObject *type, PyObject *name)
/* Give up */
PyErr_Format(PyExc_AttributeError,
"type object '%.50s' has no attribute '%.400s'",
- type->tp_name, PyString_AS_STRING(name));
+ type->tp_name, PyUnicode_AsString(name));
return NULL;
}
@@ -2473,7 +2472,7 @@ object_repr(PyObject *self)
mod = type_module(type, NULL);
if (mod == NULL)
PyErr_Clear();
- else if (!PyString_Check(mod)) {
+ else if (!PyUnicode_Check(mod)) {
Py_DECREF(mod);
mod = NULL;
}
@@ -2482,8 +2481,8 @@ object_repr(PyObject *self)
return NULL;
if (mod != NULL && strcmp(PyString_AS_STRING(mod), "__builtin__"))
rtn = PyUnicode_FromFormat("<%s.%s object at %p>",
- PyString_AS_STRING(mod),
- PyString_AS_STRING(name),
+ PyUnicode_AsString(mod),
+ PyUnicode_AsString(name),
self);
else
rtn = PyUnicode_FromFormat("<%s object at %p>",
@@ -2686,7 +2685,7 @@ import_copy_reg(void)
static PyObject *copy_reg_str;
if (!copy_reg_str) {
- copy_reg_str = PyString_InternFromString("copy_reg");
+ copy_reg_str = PyUnicode_InternFromString("copy_reg");
if (copy_reg_str == NULL)
return NULL;
}
@@ -4330,7 +4329,7 @@ slot_sq_item(PyObject *self, Py_ssize_t i)
descrgetfunc f;
if (getitem_str == NULL) {
- getitem_str = PyString_InternFromString("__getitem__");
+ getitem_str = PyUnicode_InternFromString("__getitem__");
if (getitem_str == NULL)
return NULL;
}
@@ -4760,13 +4759,13 @@ slot_tp_getattr_hook(PyObject *self, PyObject *name)
static PyObject *getattr_str = NULL;
if (getattr_str == NULL) {
- getattr_str = PyString_InternFromString("__getattr__");
+ getattr_str = PyUnicode_InternFromString("__getattr__");
if (getattr_str == NULL)
return NULL;
}
if (getattribute_str == NULL) {
getattribute_str =
- PyString_InternFromString("__getattribute__");
+ PyUnicode_InternFromString("__getattribute__");
if (getattribute_str == NULL)
return NULL;
}
@@ -4898,7 +4897,7 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type)
static PyObject *get_str = NULL;
if (get_str == NULL) {
- get_str = PyString_InternFromString("__get__");
+ get_str = PyUnicode_InternFromString("__get__");
if (get_str == NULL)
return NULL;
}
@@ -4968,7 +4967,7 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_ssize_t i, n;
if (new_str == NULL) {
- new_str = PyString_InternFromString("__new__");
+ new_str = PyUnicode_InternFromString("__new__");
if (new_str == NULL)
return NULL;
}
@@ -5490,7 +5489,7 @@ init_slotdefs(void)
if (initialized)
return;
for (p = slotdefs; p->name; p++) {
- p->name_strobj = PyString_InternFromString(p->name);
+ p->name_strobj = PyUnicode_InternFromString(p->name);
if (!p->name_strobj)
Py_FatalError("Out of memory interning slotdef names");
}
@@ -5717,9 +5716,9 @@ super_getattro(PyObject *self, PyObject *name)
if (!skip) {
/* We want __class__ to return the class of the super object
(i.e. super, or a subclass), not the class of su->obj. */
- skip = (PyString_Check(name) &&
- PyString_GET_SIZE(name) == 9 &&
- strcmp(PyString_AS_STRING(name), "__class__") == 0);
+ skip = (PyUnicode_Check(name) &&
+ PyUnicode_GET_SIZE(name) == 9 &&
+ PyUnicode_CompareWithASCIIString(name, "__class__") == 0);
}
if (!skip) {
@@ -5809,7 +5808,7 @@ supercheck(PyTypeObject *type, PyObject *obj)
PyObject *class_attr;
if (class_str == NULL) {
- class_str = PyString_FromString("__class__");
+ class_str = PyUnicode_FromString("__class__");
if (class_str == NULL)
return NULL;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e793418813..87c5c99728 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -458,8 +458,10 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
/* Copy the Unicode data into the new object */
if (u != NULL) {
Py_UNICODE *p = unicode->str;
- while ((*p++ = *u++))
- ;
+ while (size--)
+ *p++ = *u++;
+ /* Don't need to write trailing 0 because
+ that's already done by _PyUnicode_New */
}
return (PyObject *)unicode;
@@ -1184,6 +1186,16 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
return v;
}
+char*
+PyUnicode_AsString(PyObject *unicode)
+{
+ assert(PyUnicode_Check(unicode));
+ unicode = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
+ if (!unicode)
+ return NULL;
+ return PyString_AsString(unicode);
+}
+
Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
{
if (!PyUnicode_Check(unicode)) {
@@ -3247,7 +3259,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
goto onError;
}
}
- if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
+ if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
@@ -5861,6 +5873,24 @@ int PyUnicode_Compare(PyObject *left,
return -1;
}
+int
+PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
+{
+ int i;
+ Py_UNICODE *id;
+ assert(PyUnicode_Check(uni));
+ id = PyUnicode_AS_UNICODE(uni);
+ /* Compare Unicode string and source character set string */
+ for (i = 0; id[i] && str[i]; i++)
+ if (id[i] != str[i])
+ return ((int)id[i] < (int)str[i]) ? -1 : 1;
+ if (id[i])
+ return 1; /* uni is longer */
+ if (str[i])
+ return -1; /* str is longer */
+ return 0;
+}
+
PyObject *PyUnicode_RichCompare(PyObject *left,
PyObject *right,
int op)
@@ -8671,7 +8701,13 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
}
+ /* It might be that the GetItem call fails even
+ though the key is present in the dictionary,
+ namely when this happens during a stack overflow. */
+ Py_ALLOW_RECURSION
t = PyDict_GetItem(interned, (PyObject *)s);
+ Py_END_ALLOW_RECURSION
+
if (t) {
Py_INCREF(t);
Py_DECREF(*p);
@@ -8679,10 +8715,13 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
+ PyThreadState_GET()->recursion_critical = 1;
if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
PyErr_Clear();
+ PyThreadState_GET()->recursion_critical = 0;
return;
}
+ PyThreadState_GET()->recursion_critical = 0;
/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
s->ob_refcnt -= 2;
@@ -8879,6 +8918,58 @@ unicode_iter(PyObject *seq)
return (PyObject *)it;
}
+size_t
+Py_UNICODE_strlen(const Py_UNICODE *u)
+{
+ int res = 0;
+ while(*u++)
+ res++;
+ return res;
+}
+
+Py_UNICODE*
+Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+ Py_UNICODE *u = s1;
+ while ((*u++ = *s2++));
+ return s1;
+}
+
+Py_UNICODE*
+Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
+{
+ Py_UNICODE *u = s1;
+ while ((*u++ = *s2++))
+ if (n-- == 0)
+ break;
+ return s1;
+}
+
+int
+Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
+{
+ while (*s1 && *s2 && *s1 == *s2)
+ s1++, s2++;
+ if (*s1 && *s2)
+ return (*s1 < *s2) ? -1 : +1;
+ if (*s1)
+ return 1;
+ if (*s2)
+ return -1;
+ return 0;
+}
+
+Py_UNICODE*
+Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
+{
+ const Py_UNICODE *p;
+ for (p = s; *p; p++)
+ if (*p == c)
+ return (Py_UNICODE*)p;
+ return NULL;
+}
+
+
#ifdef __cplusplus
}
#endif
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index f3eeb2c252..e7dada63bc 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -18,6 +18,17 @@
#include "abstract.h"
#endif /* PGEN */
+#define is_potential_identifier_start(c) (\
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || c == '_')
+
+#define is_potential_identifier_char(c) (\
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || (c >= '0' && c <= '9')\
+ || c == '_')
+
extern char *PyOS_Readline(FILE *, FILE *, char *);
/* Return malloc'ed string including trailing \n;
empty malloc'ed string for EOF;
@@ -1209,7 +1220,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
}
/* Identifier (most frequent token!) */
- if (isalpha(c) || c == '_') {
+ if (is_potential_identifier_start(c)) {
/* Process r"", u"" and ur"" */
switch (c) {
case 'r':
@@ -1227,7 +1238,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
goto letter_quote;
break;
}
- while (isalnum(c) || c == '_') {
+ while (is_potential_identifier_char(c)) {
c = tok_nextc(tok);
}
tok_backup(tok, c);
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index 791b32d3fb..18c2eb5d31 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -3280,3 +3280,5 @@ PyObject* PyAST_mod2obj(mod_ty t)
init_types();
return ast2obj_mod(t);
}
+
+
diff --git a/Python/ast.c b/Python/ast.c
index e0bd18e731..b34411ba41 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -48,7 +48,8 @@ static PyObject *parsestrplus(struct compiling *, const node *n,
static identifier
new_identifier(const char* n, PyArena *arena) {
- PyObject* id = PyString_InternFromString(n);
+ PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
+ PyUnicode_InternInPlace(&id);
PyArena_AddPyObject(arena, id);
return id;
}
@@ -334,12 +335,10 @@ static const char* FORBIDDEN[] = {
static int
forbidden_name(expr_ty e, const node *n)
{
- const char *id;
const char **p;
- assert(PyString_Check(e->v.Name.id));
- id = PyString_AS_STRING(e->v.Name.id);
+ assert(PyUnicode_Check(e->v.Name.id));
for (p = FORBIDDEN; *p; p++) {
- if (strcmp(*p, id) == 0) {
+ if (PyUnicode_CompareWithASCIIString(e->v.Name.id, *p) == 0) {
ast_error(n, "assignment to keyword");
return 1;
}
@@ -375,7 +374,7 @@ set_context(expr_ty e, expr_context_ty ctx, const node *n)
switch (e->kind) {
case Attribute_kind:
if (ctx == Store &&
- !strcmp(PyString_AS_STRING(e->v.Attribute.attr), "None")) {
+ !PyUnicode_CompareWithASCIIString(e->v.Attribute.attr, "None")) {
return ast_error(n, "assignment to None");
}
e->v.Attribute.ctx = ctx;
@@ -2235,6 +2234,7 @@ alias_for_import_name(struct compiling *c, const node *n)
int i;
size_t len;
char *s;
+ PyObject *uni;
len = 0;
for (i = 0; i < NCH(n); i += 2)
@@ -2255,13 +2255,20 @@ alias_for_import_name(struct compiling *c, const node *n)
}
--s;
*s = '\0';
- PyString_InternInPlace(&str);
+ uni = PyUnicode_DecodeUTF8(PyString_AS_STRING(str),
+ PyString_GET_SIZE(str),
+ NULL);
+ Py_DECREF(str);
+ if (!uni)
+ return NULL;
+ str = uni;
+ PyUnicode_InternInPlace(&str);
PyArena_AddPyObject(c->c_arena, str);
return alias(str, NULL, c->c_arena);
}
break;
case STAR:
- str = PyString_InternFromString("*");
+ str = PyUnicode_InternFromString("*");
PyArena_AddPyObject(c->c_arena, str);
return alias(str, NULL, c->c_arena);
default:
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 97b2c5e1e6..d4c8a74905 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -48,7 +48,7 @@ builtin___build_class__(PyObject *self, PyObject *args, PyObject *kwds)
}
func = PyTuple_GET_ITEM(args, 0); /* Better be callable */
name = PyTuple_GET_ITEM(args, 1);
- if (!PyString_Check(name)) {
+ if ((!PyString_Check(name) && !PyUnicode_Check(name))) {
PyErr_SetString(PyExc_TypeError,
"__build_class__: name is not a string");
return NULL;
@@ -835,20 +835,23 @@ globals and locals. If only globals is given, locals defaults to it.");
static PyObject *
builtin_getattr(PyObject *self, PyObject *args)
{
- PyObject *v, *result, *dflt = NULL;
+ PyObject *v, *result, *dflt = NULL, *release = NULL;
PyObject *name;
if (!PyArg_UnpackTuple(args, "getattr", 2, 3, &v, &name, &dflt))
return NULL;
- if (PyUnicode_Check(name)) {
- name = _PyUnicode_AsDefaultEncodedString(name, NULL);
- if (name == NULL)
+
+ if (PyString_Check(name)) {
+ release = PyString_AsDecodedObject(name, NULL, NULL);
+ if (!release)
return NULL;
+ name = release;
}
- if (!PyString_Check(name)) {
+ if (!PyUnicode_Check(name)) {
PyErr_SetString(PyExc_TypeError,
"getattr(): attribute name must be string");
+ Py_XDECREF(release);
return NULL;
}
result = PyObject_GetAttr(v, name);
@@ -859,6 +862,7 @@ builtin_getattr(PyObject *self, PyObject *args)
Py_INCREF(dflt);
result = dflt;
}
+ Py_XDECREF(release);
return result;
}
@@ -894,13 +898,7 @@ builtin_hasattr(PyObject *self, PyObject *args)
if (!PyArg_UnpackTuple(args, "hasattr", 2, 2, &v, &name))
return NULL;
- if (PyUnicode_Check(name)) {
- name = _PyUnicode_AsDefaultEncodedString(name, NULL);
- if (name == NULL)
- return NULL;
- }
-
- if (!PyString_Check(name)) {
+ if (!PyUnicode_Check(name)) {
PyErr_SetString(PyExc_TypeError,
"hasattr(): attribute name must be string");
return NULL;
diff --git a/Python/ceval.c b/Python/ceval.c
index 710a0d1561..bb05a16347 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -454,8 +454,19 @@ _Py_CheckRecursiveCall(char *where)
return -1;
}
#endif
+ if (tstate->recursion_critical)
+ /* Somebody asked that we don't check for recursion. */
+ return 0;
+ if (tstate->overflowed) {
+ if (tstate->recursion_depth > recursion_limit + 50) {
+ /* Overflowing while handling an overflow. Give up. */
+ Py_FatalError("Cannot recover from stack overflow.");
+ }
+ return 0;
+ }
if (tstate->recursion_depth > recursion_limit) {
--tstate->recursion_depth;
+ tstate->overflowed = 1;
PyErr_Format(PyExc_RuntimeError,
"maximum recursion depth exceeded%s",
where);
@@ -2759,7 +2770,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
vars into frame. This isn't too efficient right now. */
if (PyTuple_GET_SIZE(co->co_cellvars)) {
int i, j, nargs, found;
- char *cellname, *argname;
+ Py_UNICODE *cellname, *argname;
PyObject *c;
nargs = co->co_argcount;
@@ -2776,13 +2787,13 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
list so that we can march over it more efficiently?
*/
for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {
- cellname = PyString_AS_STRING(
+ cellname = PyUnicode_AS_UNICODE(
PyTuple_GET_ITEM(co->co_cellvars, i));
found = 0;
for (j = 0; j < nargs; j++) {
- argname = PyString_AS_STRING(
+ argname = PyUnicode_AS_UNICODE(
PyTuple_GET_ITEM(co->co_varnames, j));
- if (strcmp(cellname, argname) == 0) {
+ if (Py_UNICODE_strcmp(cellname, argname) == 0) {
c = PyCell_New(GETLOCAL(j));
if (c == NULL)
goto fail;
@@ -3428,7 +3439,7 @@ PyEval_GetFuncName(PyObject *func)
if (PyMethod_Check(func))
return PyEval_GetFuncName(PyMethod_GET_FUNCTION(func));
else if (PyFunction_Check(func))
- return PyString_AsString(((PyFunctionObject*)func)->func_name);
+ return PyUnicode_AsString(((PyFunctionObject*)func)->func_name);
else if (PyCFunction_Check(func))
return ((PyCFunctionObject*)func)->m_ml->ml_name;
else
@@ -4052,8 +4063,8 @@ import_all_from(PyObject *locals, PyObject *v)
break;
}
if (skip_leading_underscores &&
- PyString_Check(name) &&
- PyString_AS_STRING(name)[0] == '_')
+ PyUnicode_Check(name) &&
+ PyUnicode_AS_UNICODE(name)[0] == '_')
{
Py_DECREF(name);
continue;
diff --git a/Python/compile.c b/Python/compile.c
index 359de587c5..fde4591cf3 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -194,16 +194,16 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
{
/* Name mangling: __private becomes _classname__private.
This is independent from how the name is used. */
- const char *p, *name = PyString_AsString(ident);
- char *buffer;
+ const Py_UNICODE *p, *name = PyUnicode_AS_UNICODE(ident);
+ Py_UNICODE *buffer;
size_t nlen, plen;
- if (privateobj == NULL || !PyString_Check(privateobj) ||
+ if (privateobj == NULL || !PyUnicode_Check(privateobj) ||
name == NULL || name[0] != '_' || name[1] != '_') {
Py_INCREF(ident);
return ident;
}
- p = PyString_AsString(privateobj);
- nlen = strlen(name);
+ p = PyUnicode_AS_UNICODE(privateobj);
+ nlen = Py_UNICODE_strlen(name);
/* Don't mangle __id__ or names with dots.
The only time a name with a dot can occur is when
@@ -214,26 +214,26 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
mangling of the module name, e.g. __M.X.
*/
if ((name[nlen-1] == '_' && name[nlen-2] == '_')
- || strchr(name, '.')) {
+ || Py_UNICODE_strchr(name, '.')) {
Py_INCREF(ident);
return ident; /* Don't mangle __whatever__ */
}
/* Strip leading underscores from class name */
while (*p == '_')
p++;
- if (*p == '\0') {
+ if (*p == 0) {
Py_INCREF(ident);
return ident; /* Don't mangle if class is just underscores */
}
- plen = strlen(p);
- ident = PyString_FromStringAndSize(NULL, 1 + nlen + plen);
+ plen = Py_UNICODE_strlen(p);
+ ident = PyUnicode_FromStringAndSize(NULL, 1 + nlen + plen);
if (!ident)
return 0;
/* ident = "_" + p[:plen] + name # i.e. 1+plen+nlen bytes */
- buffer = PyString_AS_STRING(ident);
+ buffer = PyUnicode_AS_UNICODE(ident);
buffer[0] = '_';
- strncpy(buffer+1, p, plen);
- strcpy(buffer+1+plen, name);
+ Py_UNICODE_strncpy(buffer+1, p, plen);
+ Py_UNICODE_strcpy(buffer+1+plen, name);
return ident;
}
@@ -259,7 +259,7 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags,
int merged;
if (!__doc__) {
- __doc__ = PyString_InternFromString("__doc__");
+ __doc__ = PyUnicode_InternFromString("__doc__");
if (!__doc__)
return NULL;
}
@@ -551,7 +551,7 @@ compiler_new_tmpname(struct compiler *c)
{
char tmpname[256];
PyOS_snprintf(tmpname, sizeof(tmpname), "_[%d]", ++c->u->u_tmpname);
- return PyString_FromString(tmpname);
+ return PyUnicode_FromString(tmpname);
}
/* Allocate a new block and return a pointer to it.
@@ -1143,7 +1143,7 @@ compiler_mod(struct compiler *c, mod_ty mod)
int addNone = 1;
static PyObject *module;
if (!module) {
- module = PyString_FromString("<module>");
+ module = PyUnicode_FromString("<module>");
if (!module)
return NULL;
}
@@ -1362,7 +1362,7 @@ compiler_visit_annotations(struct compiler *c, arguments_ty args,
goto error;
if (!return_str) {
- return_str = PyString_InternFromString("return");
+ return_str = PyUnicode_InternFromString("return");
if (!return_str)
goto error;
}
@@ -1488,12 +1488,12 @@ compiler_class(struct compiler *c, stmt_ty s)
/* initialize statics */
if (build_class == NULL) {
- build_class = PyString_FromString("__build_class__");
+ build_class = PyUnicode_FromString("__build_class__");
if (build_class == NULL)
return 0;
}
if (locals == NULL) {
- locals = PyString_FromString("__locals__");
+ locals = PyUnicode_FromString("__locals__");
if (locals == NULL)
return 0;
}
@@ -1533,7 +1533,7 @@ compiler_class(struct compiler *c, stmt_ty s)
/* ... and store it into f_locals */
ADDOP_IN_SCOPE(c, STORE_LOCALS);
/* load __name__ ... */
- str = PyString_InternFromString("__name__");
+ str = PyUnicode_InternFromString("__name__");
if (!str || !compiler_nameop(c, str, Load)) {
Py_XDECREF(str);
compiler_exit_scope(c);
@@ -1541,7 +1541,7 @@ compiler_class(struct compiler *c, stmt_ty s)
}
Py_DECREF(str);
/* ... and store it as __module__ */
- str = PyString_InternFromString("__module__");
+ str = PyUnicode_InternFromString("__module__");
if (!str || !compiler_nameop(c, str, Store)) {
Py_XDECREF(str);
compiler_exit_scope(c);
@@ -1627,7 +1627,7 @@ compiler_lambda(struct compiler *c, expr_ty e)
assert(e->kind == Lambda_kind);
if (!name) {
- name = PyString_InternFromString("<lambda>");
+ name = PyUnicode_InternFromString("<lambda>");
if (!name)
return 0;
}
@@ -2027,17 +2027,17 @@ compiler_import_as(struct compiler *c, identifier name, identifier asname)
If there is a dot in name, we need to split it and emit a
LOAD_ATTR for each name.
*/
- const char *src = PyString_AS_STRING(name);
- const char *dot = strchr(src, '.');
+ const Py_UNICODE *src = PyUnicode_AS_UNICODE(name);
+ const Py_UNICODE *dot = Py_UNICODE_strchr(src, '.');
if (dot) {
/* Consume the base module name to get the first attribute */
src = dot + 1;
while (dot) {
/* NB src is only defined when dot != NULL */
PyObject *attr;
- dot = strchr(src, '.');
- attr = PyString_FromStringAndSize(src,
- dot ? dot - src : strlen(src));
+ dot = Py_UNICODE_strchr(src, '.');
+ attr = PyUnicode_FromUnicode(src,
+ dot ? dot - src : Py_UNICODE_strlen(src));
if (!attr)
return -1;
ADDOP_O(c, LOAD_ATTR, attr, names);
@@ -2081,11 +2081,11 @@ compiler_import(struct compiler *c, stmt_ty s)
}
else {
identifier tmp = alias->name;
- const char *base = PyString_AS_STRING(alias->name);
- char *dot = strchr(base, '.');
+ const Py_UNICODE *base = PyUnicode_AS_UNICODE(alias->name);
+ Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
if (dot)
- tmp = PyString_FromStringAndSize(base,
- dot - base);
+ tmp = PyUnicode_FromUnicode(base,
+ dot - base);
r = compiler_nameop(c, tmp, Store);
if (dot) {
Py_DECREF(tmp);
@@ -2122,8 +2122,8 @@ compiler_from_import(struct compiler *c, stmt_ty s)
}
if (s->lineno > c->c_future->ff_lineno) {
- if (!strcmp(PyString_AS_STRING(s->v.ImportFrom.module),
- "__future__")) {
+ if (!PyUnicode_CompareWithASCIIString(s->v.ImportFrom.module,
+ "__future__")) {
Py_DECREF(level);
Py_DECREF(names);
return compiler_error(c,
@@ -2142,7 +2142,7 @@ compiler_from_import(struct compiler *c, stmt_ty s)
alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
identifier store_name;
- if (i == 0 && *PyString_AS_STRING(alias->name) == '*') {
+ if (i == 0 && *PyUnicode_AS_UNICODE(alias->name) == '*') {
assert(n == 1);
ADDOP(c, IMPORT_STAR);
return 1;
@@ -2172,7 +2172,7 @@ compiler_assert(struct compiler *c, stmt_ty s)
if (Py_OptimizeFlag)
return 1;
if (assertion_error == NULL) {
- assertion_error = PyString_FromString("AssertionError");
+ assertion_error = PyUnicode_FromString("AssertionError");
if (assertion_error == NULL)
return 0;
}
@@ -2417,7 +2417,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
/* First check for assignment to __debug__. Param? */
if ((ctx == Store || ctx == AugStore || ctx == Del)
- && !strcmp(PyString_AS_STRING(name), "__debug__")) {
+ && !PyUnicode_CompareWithASCIIString(name, "__debug__")) {
return compiler_error(c, "can not assign to __debug__");
}
@@ -2455,7 +2455,7 @@ mangled = _Py_Mangle(c->u->u_private, name);
}
/* XXX Leave assert here, but handle __doc__ and the like better */
- assert(scope || PyString_AS_STRING(name)[0] == '_');
+ assert(scope || PyUnicode_AS_UNICODE(name)[0] == '_');
switch (optype) {
case OP_DEREF:
@@ -2889,7 +2889,7 @@ compiler_genexp(struct compiler *c, expr_ty e)
{
static identifier name;
if (!name) {
- name = PyString_FromString("<genexp>");
+ name = PyUnicode_FromString("<genexp>");
if (!name)
return 0;
}
@@ -2904,7 +2904,7 @@ compiler_listcomp(struct compiler *c, expr_ty e)
{
static identifier name;
if (!name) {
- name = PyString_FromString("<listcomp>");
+ name = PyUnicode_FromString("<listcomp>");
if (!name)
return 0;
}
@@ -2919,7 +2919,7 @@ compiler_setcomp(struct compiler *c, expr_ty e)
{
static identifier name;
if (!name) {
- name = PyString_FromString("<setcomp>");
+ name = PyUnicode_FromString("<setcomp>");
if (!name)
return 0;
}
@@ -2957,8 +2957,8 @@ expr_constant(expr_ty e)
case Name_kind:
/* __debug__ is not assignable, so we can optimize
* it away in if and while statements */
- if (strcmp(PyString_AS_STRING(e->v.Name.id),
- "__debug__") == 0)
+ if (PyUnicode_CompareWithASCIIString(e->v.Name.id,
+ "__debug__") == 0)
return ! Py_OptimizeFlag;
/* fall through */
default:
@@ -2999,12 +2999,12 @@ compiler_with(struct compiler *c, stmt_ty s)
assert(s->kind == With_kind);
if (!enter_attr) {
- enter_attr = PyString_InternFromString("__enter__");
+ enter_attr = PyUnicode_InternFromString("__enter__");
if (!enter_attr)
return 0;
}
if (!exit_attr) {
- exit_attr = PyString_InternFromString("__exit__");
+ exit_attr = PyUnicode_InternFromString("__exit__");
if (!exit_attr)
return 0;
}
diff --git a/Python/future.c b/Python/future.c
index d6f11a4469..2092f58753 100644
--- a/Python/future.c
+++ b/Python/future.c
@@ -55,7 +55,7 @@ future_parse(PyFutureFeatures *ff, mod_ty mod, const char *filename)
static PyObject *future;
if (!future) {
- future = PyString_InternFromString("__future__");
+ future = PyUnicode_InternFromString("__future__");
if (!future)
return 0;
}
diff --git a/Python/import.c b/Python/import.c
index 75f1e015b0..0e4e50c8f5 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1920,7 +1920,7 @@ PyImport_ImportFrozenModule(char *name)
if (m == NULL)
goto err_return;
d = PyModule_GetDict(m);
- s = PyString_InternFromString(name);
+ s = PyUnicode_InternFromString(name);
if (s == NULL)
goto err_return;
err = PyDict_SetItemString(d, "__path__", s);
@@ -1949,7 +1949,7 @@ PyImport_ImportModule(const char *name)
PyObject *pname;
PyObject *result;
- pname = PyString_FromString(name);
+ pname = PyUnicode_FromString(name);
if (pname == NULL)
return NULL;
result = PyImport_Import(pname);
@@ -2084,12 +2084,12 @@ get_parent(PyObject *globals, char *buf, Py_ssize_t *p_buflen, int level)
return Py_None;
if (namestr == NULL) {
- namestr = PyString_InternFromString("__name__");
+ namestr = PyUnicode_InternFromString("__name__");
if (namestr == NULL)
return NULL;
}
if (pathstr == NULL) {
- pathstr = PyString_InternFromString("__path__");
+ pathstr = PyUnicode_InternFromString("__path__");
if (pathstr == NULL)
return NULL;
}
@@ -2097,9 +2097,18 @@ get_parent(PyObject *globals, char *buf, Py_ssize_t *p_buflen, int level)
*buf = '\0';
*p_buflen = 0;
modname = PyDict_GetItem(globals, namestr);
- if (modname == NULL || !PyString_Check(modname))
+ if (modname == NULL || (!PyString_Check(modname) && !PyUnicode_Check(modname)))
return Py_None;
+ if (PyUnicode_Check(modname)) {
+ /* XXX need to support Unicode better */
+ modname = _PyUnicode_AsDefaultEncodedString(modname, NULL);
+ if (!modname) {
+ PyErr_Clear();
+ return NULL;
+ }
+ }
+
modpath = PyDict_GetItem(globals, pathstr);
if (modpath != NULL) {
Py_ssize_t len = PyString_GET_SIZE(modname);
@@ -2254,13 +2263,23 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen,
}
return 0;
}
- if (!PyString_Check(item)) {
+ if (PyString_Check(item)) {
+ /* XXX there shouldn't be any str8 objects here */
+ PyObject *uni = PyUnicode_DecodeASCII(PyString_AsString(item),
+ PyString_Size(item),
+ "strict");
+ Py_DECREF(item);
+ if (!uni)
+ return 0;
+ item = uni;
+ }
+ if (!PyUnicode_Check(item)) {
PyErr_SetString(PyExc_TypeError,
- "Item in ``from list'' not a string");
+ "Item in ``from list'' not a unicode string");
Py_DECREF(item);
return 0;
}
- if (PyString_AS_STRING(item)[0] == '*') {
+ if (PyUnicode_AS_UNICODE(item)[0] == '*') {
PyObject *all;
Py_DECREF(item);
/* See if the package defines __all__ */
@@ -2279,9 +2298,23 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen,
}
hasit = PyObject_HasAttr(mod, item);
if (!hasit) {
- char *subname = PyString_AS_STRING(item);
+ PyObject *item8;
+ char *subname;
PyObject *submod;
char *p;
+ if (!Py_FileSystemDefaultEncoding) {
+ item8 = PyUnicode_EncodeASCII(PyUnicode_AsUnicode(item),
+ PyUnicode_GetSize(item),
+ "strict");
+ } else {
+ item8 = PyUnicode_AsEncodedObject(item,
+ Py_FileSystemDefaultEncoding, "strict");
+ }
+ if (!item8) {
+ PyErr_SetString(PyExc_ValueError, "Cannot encode path item");
+ return 0;
+ }
+ subname = PyBytes_AsString(item8);
if (buflen + strlen(subname) >= MAXPATHLEN) {
PyErr_SetString(PyExc_ValueError,
"Module name too long");
@@ -2292,6 +2325,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen,
*p++ = '.';
strcpy(p, subname);
submod = import_submodule(mod, subname, buf);
+ Py_DECREF(item8);
Py_XDECREF(submod);
if (submod == NULL) {
Py_DECREF(item);
@@ -2515,10 +2549,10 @@ PyImport_Import(PyObject *module_name)
/* Initialize constant string objects */
if (silly_list == NULL) {
- import_str = PyString_InternFromString("__import__");
+ import_str = PyUnicode_InternFromString("__import__");
if (import_str == NULL)
return NULL;
- builtins_str = PyString_InternFromString("__builtins__");
+ builtins_str = PyUnicode_InternFromString("__builtins__");
if (builtins_str == NULL)
return NULL;
silly_list = Py_BuildValue("[s]", "__doc__");
diff --git a/Python/modsupport.c b/Python/modsupport.c
index a272ce3135..1ea08c39a5 100644
--- a/Python/modsupport.c
+++ b/Python/modsupport.c
@@ -65,7 +65,7 @@ Py_InitModule4(const char *name, PyMethodDef *methods, const char *doc,
return NULL;
d = PyModule_GetDict(m);
if (methods != NULL) {
- n = PyString_FromString(name);
+ n = PyUnicode_FromString(name);
if (n == NULL)
return NULL;
for (ml = methods; ml->ml_name != NULL; ml++) {
@@ -689,5 +689,5 @@ PyModule_AddIntConstant(PyObject *m, const char *name, long value)
int
PyModule_AddStringConstant(PyObject *m, const char *name, const char *value)
{
- return PyModule_AddObject(m, name, PyString_FromString(value));
+ return PyModule_AddObject(m, name, PyUnicode_FromString(value));
}
diff --git a/Python/pystate.c b/Python/pystate.c
index 086789d355..1914ba8e14 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -167,6 +167,8 @@ PyThreadState_New(PyInterpreterState *interp)
tstate->frame = NULL;
tstate->recursion_depth = 0;
+ tstate->overflowed = 0;
+ tstate->recursion_critical = 0;
tstate->tracing = 0;
tstate->use_tracing = 0;
tstate->tick_counter = 0;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index c2005f1baf..5daf7dd0ec 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1133,6 +1133,8 @@ PyErr_Display(PyObject *exception, PyObject *value, PyObject *tb)
PyObject *f = PySys_GetObject("stderr");
Py_INCREF(value);
if (f == NULL)
+ _PyObject_Dump(value);
+ if (f == NULL)
fprintf(stderr, "lost sys.stderr\n");
else {
fflush(stdout);
diff --git a/Python/symtable.c b/Python/symtable.c
index f3a2c78c16..5df7318d17 100644
--- a/Python/symtable.c
+++ b/Python/symtable.c
@@ -92,7 +92,7 @@ ste_repr(PySTEntryObject *ste)
PyOS_snprintf(buf, sizeof(buf),
"<symtable entry %.100s(%ld), line %d>",
- PyString_AS_STRING(ste->ste_name),
+ PyUnicode_AsString(ste->ste_name),
PyInt_AS_LONG(ste->ste_id), ste->ste_lineno);
return PyUnicode_FromString(buf);
}
@@ -190,7 +190,7 @@ static identifier top = NULL, lambda = NULL, genexpr = NULL,
listcomp = NULL, setcomp = NULL;
#define GET_IDENTIFIER(VAR) \
- ((VAR) ? (VAR) : ((VAR) = PyString_InternFromString(# VAR)))
+ ((VAR) ? (VAR) : ((VAR) = PyUnicode_InternFromString(# VAR)))
#define DUPLICATE_ARGUMENT \
"duplicate argument '%s' in function definition"
@@ -390,13 +390,13 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags,
if (flags & DEF_PARAM) {
PyErr_Format(PyExc_SyntaxError,
"name '%s' is parameter and global",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
return 0;
}
if (flags & DEF_NONLOCAL) {
PyErr_Format(PyExc_SyntaxError,
"name '%s' is nonlocal and global",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
return 0;
}
SET_SCOPE(scopes, name, GLOBAL_EXPLICIT);
@@ -410,7 +410,7 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags,
if (flags & DEF_PARAM) {
PyErr_Format(PyExc_SyntaxError,
"name '%s' is parameter and nonlocal",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
return 0;
}
if (!bound) {
@@ -421,7 +421,7 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags,
if (!PySet_Contains(bound, name)) {
PyErr_Format(PyExc_SyntaxError,
"no binding for nonlocal '%s' found",
- PyString_AS_STRING(name));
+ PyUnicode_AsString(name));
return 0;
}
@@ -524,7 +524,7 @@ check_unoptimized(const PySTEntryObject* ste) {
PyOS_snprintf(buf, sizeof(buf),
"import * is not allowed in function '%.100s' "
"because it is %s",
- PyString_AS_STRING(ste->ste_name), trailer);
+ PyUnicode_AsString(ste->ste_name), trailer);
break;
}
@@ -984,7 +984,7 @@ symtable_new_tmpname(struct symtable *st)
PyOS_snprintf(tmpname, sizeof(tmpname), "_[%d]",
++st->st_cur->ste_tmpname);
- tmp = PyString_InternFromString(tmpname);
+ tmp = PyUnicode_InternFromString(tmpname);
if (!tmp)
return 0;
if (!symtable_add_def(st, tmp, DEF_LOCAL))
@@ -1129,7 +1129,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s)
asdl_seq *seq = s->v.Global.names;
for (i = 0; i < asdl_seq_LEN(seq); i++) {
identifier name = (identifier)asdl_seq_GET(seq, i);
- char *c_name = PyString_AS_STRING(name);
+ char *c_name = PyUnicode_AsString(name);
long cur = symtable_lookup(st, name);
if (cur < 0)
return 0;
@@ -1156,7 +1156,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s)
asdl_seq *seq = s->v.Nonlocal.names;
for (i = 0; i < asdl_seq_LEN(seq); i++) {
identifier name = (identifier)asdl_seq_GET(seq, i);
- char *c_name = PyString_AS_STRING(name);
+ char *c_name = PyUnicode_AsString(name);
long cur = symtable_lookup(st, name);
if (cur < 0)
return 0;
@@ -1316,7 +1316,7 @@ symtable_visit_expr(struct symtable *st, expr_ty e)
static int
symtable_implicit_arg(struct symtable *st, int pos)
{
- PyObject *id = PyString_FromFormat(".%d", pos);
+ PyObject *id = PyUnicode_FromFormat(".%d", pos);
if (id == NULL)
return 0;
if (!symtable_add_def(st, id, DEF_PARAM)) {
@@ -1425,10 +1425,10 @@ symtable_visit_alias(struct symtable *st, alias_ty a)
*/
PyObject *store_name;
PyObject *name = (a->asname == NULL) ? a->name : a->asname;
- const char *base = PyString_AS_STRING(name);
- char *dot = strchr(base, '.');
+ const Py_UNICODE *base = PyUnicode_AS_UNICODE(name);
+ Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
if (dot) {
- store_name = PyString_FromStringAndSize(base, dot - base);
+ store_name = PyUnicode_FromUnicode(base, dot - base);
if (!store_name)
return 0;
}
@@ -1436,7 +1436,7 @@ symtable_visit_alias(struct symtable *st, alias_ty a)
store_name = name;
Py_INCREF(store_name);
}
- if (strcmp(PyString_AS_STRING(name), "*")) {
+ if (PyUnicode_CompareWithASCIIString(name, "*")) {
int r = symtable_add_def(st, store_name, DEF_IMPORT);
Py_DECREF(store_name);
return r;
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 30e0180da9..1b7674b9b0 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -280,7 +280,7 @@ trace_init(void)
int i;
for (i = 0; i < 7; ++i) {
if (whatstrings[i] == NULL) {
- name = PyString_InternFromString(whatnames[i]);
+ name = PyUnicode_InternFromString(whatnames[i]);
if (name == NULL)
return -1;
whatstrings[i] = name;
@@ -801,7 +801,7 @@ list_builtin_module_names(void)
if (list == NULL)
return NULL;
for (i = 0; PyImport_Inittab[i].name != NULL; i++) {
- PyObject *name = PyString_FromString(
+ PyObject *name = PyUnicode_FromString(
PyImport_Inittab[i].name);
if (name == NULL)
break;