diff options
author | Benjamin Peterson <benjamin@python.org> | 2014-04-14 11:46:51 -0400 |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2014-04-14 11:46:51 -0400 |
commit | 7a94319d434dec81571925b7a9d9312c08a8c483 (patch) | |
tree | eca86248ce52d5e8e73e63f27742cc0e2aa7767e /Modules/_elementtree.c | |
parent | d3f55143df3157922d225653c574aed1c86056ac (diff) | |
parent | dea0e854fbc25dada42f1d324362aba73c473caa (diff) | |
download | cpython-7a94319d434dec81571925b7a9d9312c08a8c483.tar.gz |
merge 3.2
Diffstat (limited to 'Modules/_elementtree.c')
-rw-r--r-- | Modules/_elementtree.c | 2386 |
1 files changed, 1485 insertions, 901 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 17d5d09962..f45893f15d 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -48,6 +48,7 @@ /* See http://www.python.org/psf/license for licensing details. */ #include "Python.h" +#include "structmember.h" #define VERSION "1.0.6" @@ -57,9 +58,6 @@ /* Leave defined to include the expat-based XMLParser type */ #define USE_EXPAT -/* Define to do all expat calls via pyexpat's embedded expat library */ -/* #define USE_PYEXPAT_CAPI */ - /* An element can hold this many children without extra memory allocations. */ #define STATIC_CHILDREN 4 @@ -70,7 +68,7 @@ helps if you have lots of leaf nodes with attributes). */ /* Also note that pymalloc always allocates blocks in multiples of - eight bytes. For the current version of cElementTree, this means + eight bytes. For the current C version of ElementTree, this means that the number of children should be an even number, at least on 32-bit platforms. */ @@ -94,39 +92,36 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) #define LOCAL(type) static type #endif -/* compatibility macros */ -#if (PY_VERSION_HEX < 0x02060000) -#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif - -#if (PY_VERSION_HEX < 0x02050000) -typedef int Py_ssize_t; -#define lenfunc inquiry -#endif - -#if (PY_VERSION_HEX < 0x02040000) -#define PyDict_CheckExact PyDict_Check - -#if !defined(Py_RETURN_NONE) -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None -#endif -#endif - /* macros used to store 'join' flags in string object pointers. note that all use of text and tail as object pointers must be wrapped in JOIN_OBJ. see comments in the ElementObject definition for more info. */ #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) -#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) +#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1)) + +/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by + * reference since this function sets it to NULL. +*/ +static void _clear_joined_ptr(PyObject **p) +{ + if (*p) { + PyObject *tmp = JOIN_OBJ(*p); + *p = NULL; + Py_DECREF(tmp); + } +} + +/* Types defined by this extension */ +static PyTypeObject Element_Type; +static PyTypeObject ElementIter_Type; +static PyTypeObject TreeBuilder_Type; +static PyTypeObject XMLParser_Type; + /* glue functions (see the init function for details) */ static PyObject* elementtree_parseerror_obj; -static PyObject* elementtree_copyelement_obj; static PyObject* elementtree_deepcopy_obj; -static PyObject* elementtree_iter_obj; -static PyObject* elementtree_itertext_obj; static PyObject* elementpath_obj; /* helpers */ @@ -147,17 +142,11 @@ deepcopy(PyObject* object, PyObject* memo) return NULL; } - args = PyTuple_New(2); + args = PyTuple_Pack(2, object, memo); if (!args) return NULL; - - Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); - Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo); - result = PyObject_CallObject(elementtree_deepcopy_obj, args); - Py_DECREF(args); - return result; } @@ -165,53 +154,30 @@ LOCAL(PyObject*) list_join(PyObject* list) { /* join list elements (destroying the list in the process) */ - PyObject* joiner; - PyObject* function; - PyObject* args; PyObject* result; - switch (PyList_GET_SIZE(list)) { - case 0: - Py_DECREF(list); - return PyBytes_FromString(""); - case 1: - result = PyList_GET_ITEM(list, 0); - Py_INCREF(result); - Py_DECREF(list); - return result; - } - - /* two or more elements: slice out a suitable separator from the - first member, and use that to join the entire list */ - - joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); + joiner = PyUnicode_FromStringAndSize("", 0); if (!joiner) return NULL; - - function = PyObject_GetAttrString(joiner, "join"); - if (!function) { - Py_DECREF(joiner); - return NULL; - } - - args = PyTuple_New(1); - if (!args) - return NULL; - - PyTuple_SET_ITEM(args, 0, list); - - result = PyObject_CallObject(function, args); - - Py_DECREF(args); /* also removes list */ - Py_DECREF(function); + result = PyUnicode_Join(joiner, list); Py_DECREF(joiner); - + if (result) + Py_DECREF(list); return result; } +/* Is the given object an empty dictionary? +*/ +static int +is_empty_dict(PyObject *obj) +{ + return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0; +} + + /* -------------------------------------------------------------------- */ -/* the element type */ +/* the Element type */ typedef struct { @@ -226,7 +192,7 @@ typedef struct { PyObject* *children; PyObject* _children[STATIC_CHILDREN]; - + } ElementObjectExtra; typedef struct { @@ -249,17 +215,18 @@ typedef struct { ElementObjectExtra* extra; + PyObject *weakreflist; /* For tp_weaklistoffset */ + } ElementObject; -static PyTypeObject Element_Type; #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) /* -------------------------------------------------------------------- */ -/* element constructor and destructor */ +/* Element constructors and destructor */ LOCAL(int) -element_new_extra(ElementObject* self, PyObject* attrib) +create_extra(ElementObject* self, PyObject* attrib) { self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); if (!self->extra) @@ -279,47 +246,48 @@ element_new_extra(ElementObject* self, PyObject* attrib) } LOCAL(void) -element_dealloc_extra(ElementObject* self) +dealloc_extra(ElementObject* self) { + ElementObjectExtra *myextra; int i; - Py_DECREF(self->extra->attrib); + if (!self->extra) + return; + + /* Avoid DECREFs calling into this code again (cycles, etc.) + */ + myextra = self->extra; + self->extra = NULL; + + Py_DECREF(myextra->attrib); - for (i = 0; i < self->extra->length; i++) - Py_DECREF(self->extra->children[i]); + for (i = 0; i < myextra->length; i++) + Py_DECREF(myextra->children[i]); - if (self->extra->children != self->extra->_children) - PyObject_Free(self->extra->children); + if (myextra->children != myextra->_children) + PyObject_Free(myextra->children); - PyObject_Free(self->extra); + PyObject_Free(myextra); } +/* Convenience internal function to create new Element objects with the given + * tag and attributes. +*/ LOCAL(PyObject*) -element_new(PyObject* tag, PyObject* attrib) +create_new_element(PyObject* tag, PyObject* attrib) { ElementObject* self; - self = PyObject_New(ElementObject, &Element_Type); + self = PyObject_GC_New(ElementObject, &Element_Type); if (self == NULL) return NULL; - - /* use None for empty dictionaries */ - if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) - attrib = Py_None; - self->extra = NULL; - if (attrib != Py_None) { - - if (element_new_extra(self, attrib) < 0) { + if (attrib != Py_None && !is_empty_dict(attrib)) { + if (create_extra(self, attrib) < 0) { PyObject_Del(self); return NULL; } - - self->extra->length = 0; - self->extra->allocated = STATIC_CHILDREN; - self->extra->children = self->extra->_children; - } Py_INCREF(tag); @@ -331,11 +299,131 @@ element_new(PyObject* tag, PyObject* attrib) Py_INCREF(Py_None); self->tail = Py_None; - ALLOC(sizeof(ElementObject), "create element"); + self->weakreflist = NULL; + ALLOC(sizeof(ElementObject), "create element"); + PyObject_GC_Track(self); return (PyObject*) self; } +static PyObject * +element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); + if (e != NULL) { + Py_INCREF(Py_None); + e->tag = Py_None; + + Py_INCREF(Py_None); + e->text = Py_None; + + Py_INCREF(Py_None); + e->tail = Py_None; + + e->extra = NULL; + e->weakreflist = NULL; + } + return (PyObject *)e; +} + +/* Helper function for extracting the attrib dictionary from a keywords dict. + * This is required by some constructors/functions in this module that can + * either accept attrib as a keyword argument or all attributes splashed + * directly into *kwds. + * + * Return a dictionary with the content of kwds merged into the content of + * attrib. If there is no attrib keyword, return a copy of kwds. + */ +static PyObject* +get_attrib_from_keywords(PyObject *kwds) +{ + PyObject *attrib_str = PyUnicode_FromString("attrib"); + PyObject *attrib = PyDict_GetItem(kwds, attrib_str); + + if (attrib) { + /* If attrib was found in kwds, copy its value and remove it from + * kwds + */ + if (!PyDict_Check(attrib)) { + Py_DECREF(attrib_str); + PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", + Py_TYPE(attrib)->tp_name); + return NULL; + } + attrib = PyDict_Copy(attrib); + PyDict_DelItem(kwds, attrib_str); + } else { + attrib = PyDict_New(); + } + + Py_DECREF(attrib_str); + + if (attrib) + if (PyDict_Update(attrib, kwds) < 0) + return NULL; + return attrib; +} + +static int +element_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *tag; + PyObject *tmp; + PyObject *attrib = NULL; + ElementObject *self_elem; + + if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) + return -1; + + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return -1; + if (kwds) { + if (PyDict_Update(attrib, kwds) < 0) { + Py_DECREF(attrib); + return -1; + } + } + } else if (kwds) { + /* have keywords args */ + attrib = get_attrib_from_keywords(kwds); + if (!attrib) + return -1; + } + + self_elem = (ElementObject *)self; + + if (attrib != NULL && !is_empty_dict(attrib)) { + if (create_extra(self_elem, attrib) < 0) { + Py_DECREF(attrib); + return -1; + } + } + + /* We own a reference to attrib here and it's no longer needed. */ + Py_XDECREF(attrib); + + /* Replace the objects already pointed to by tag, text and tail. */ + tmp = self_elem->tag; + Py_INCREF(tag); + self_elem->tag = tag; + Py_DECREF(tmp); + + tmp = self_elem->text; + Py_INCREF(Py_None); + self_elem->text = Py_None; + Py_DECREF(JOIN_OBJ(tmp)); + + tmp = self_elem->tail; + Py_INCREF(Py_None); + self_elem->tail = Py_None; + Py_DECREF(JOIN_OBJ(tmp)); + + return 0; +} + LOCAL(int) element_resize(ElementObject* self, int extra) { @@ -346,7 +434,7 @@ element_resize(ElementObject* self, int extra) elements. set an exception and return -1 if allocation failed */ if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); size = self->extra->length + extra; @@ -354,14 +442,14 @@ element_resize(ElementObject* self, int extra) /* use Python 2.4's list growth strategy */ size = (size >> 3) + (size < 9 ? 3 : 6) + size; /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" - * which needs at least 4 bytes. - * Although it's a false alarm always assume at least one child to + * which needs at least 4 bytes. + * Although it's a false alarm always assume at least one child to * be safe. */ size = size ? size : 1; if (self->extra->children != self->extra->_children) { /* Coverity CID #182 size_error: Allocating 1 bytes to pointer - * "children", which needs at least 4 bytes. Although it's a + * "children", which needs at least 4 bytes. Although it's a * false alarm always assume at least one child to be safe. */ children = PyObject_Realloc(self->extra->children, @@ -412,11 +500,11 @@ element_get_attrib(ElementObject* self) PyObject* res = self->extra->attrib; if (res == Py_None) { - Py_DECREF(res); /* create missing dictionary */ res = PyDict_New(); if (!res) return NULL; + Py_DECREF(Py_None); self->extra->attrib = res; } @@ -464,36 +552,7 @@ element_get_tail(ElementObject* self) } static PyObject* -element(PyObject* self, PyObject* args, PyObject* kw) -{ - PyObject* elem; - - PyObject* tag; - PyObject* attrib = NULL; - if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, - &PyDict_Type, &attrib)) - return NULL; - - if (attrib || kw) { - attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); - if (!attrib) - return NULL; - if (kw) - PyDict_Update(attrib, kw); - } else { - Py_INCREF(Py_None); - attrib = Py_None; - } - - elem = element_new(tag, attrib); - - Py_DECREF(attrib); - - return elem; -} - -static PyObject* -subelement(PyObject* self, PyObject* args, PyObject* kw) +subelement(PyObject *self, PyObject *args, PyObject *kwds) { PyObject* elem; @@ -505,18 +564,28 @@ subelement(PyObject* self, PyObject* args, PyObject* kw) &PyDict_Type, &attrib)) return NULL; - if (attrib || kw) { - attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return NULL; + if (kwds) { + if (PyDict_Update(attrib, kwds) < 0) { + return NULL; + } + } + } else if (kwds) { + /* have keyword args */ + attrib = get_attrib_from_keywords(kwds); if (!attrib) return NULL; - if (kw) - PyDict_Update(attrib, kw); } else { + /* no attrib arg, no kwds, so no attribute */ Py_INCREF(Py_None); attrib = Py_None; } - elem = element_new(tag, attrib); + elem = create_new_element(tag, attrib); Py_DECREF(attrib); @@ -528,20 +597,51 @@ subelement(PyObject* self, PyObject* args, PyObject* kw) return elem; } +static int +element_gc_traverse(ElementObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->tag); + Py_VISIT(JOIN_OBJ(self->text)); + Py_VISIT(JOIN_OBJ(self->tail)); + + if (self->extra) { + int i; + Py_VISIT(self->extra->attrib); + + for (i = 0; i < self->extra->length; ++i) + Py_VISIT(self->extra->children[i]); + } + return 0; +} + +static int +element_gc_clear(ElementObject *self) +{ + Py_CLEAR(self->tag); + _clear_joined_ptr(&self->text); + _clear_joined_ptr(&self->tail); + + /* After dropping all references from extra, it's no longer valid anyway, + * so fully deallocate it. + */ + dealloc_extra(self); + return 0; +} + static void element_dealloc(ElementObject* self) { - if (self->extra) - element_dealloc_extra(self); + PyObject_GC_UnTrack(self); - /* discard attributes */ - Py_DECREF(self->tag); - Py_DECREF(JOIN_OBJ(self->text)); - Py_DECREF(JOIN_OBJ(self->tail)); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); - RELEASE(sizeof(ElementObject), "destroy element"); + /* element_gc_clear clears all references and deallocates extra + */ + element_gc_clear(self); - PyObject_Del(self); + RELEASE(sizeof(ElementObject), "destroy element"); + Py_TYPE(self)->tp_free((PyObject *)self); } /* -------------------------------------------------------------------- */ @@ -561,15 +661,12 @@ element_append(ElementObject* self, PyObject* args) } static PyObject* -element_clear(ElementObject* self, PyObject* args) +element_clearmethod(ElementObject* self, PyObject* args) { if (!PyArg_ParseTuple(args, ":clear")) return NULL; - if (self->extra) { - element_dealloc_extra(self); - self->extra = NULL; - } + dealloc_extra(self); Py_INCREF(Py_None); Py_DECREF(JOIN_OBJ(self->text)); @@ -591,7 +688,7 @@ element_copy(ElementObject* self, PyObject* args) if (!PyArg_ParseTuple(args, ":__copy__")) return NULL; - element = (ElementObject*) element_new( + element = (ElementObject*) create_new_element( self->tag, (self->extra) ? self->extra->attrib : Py_None ); if (!element) @@ -606,7 +703,7 @@ element_copy(ElementObject* self, PyObject* args) Py_INCREF(JOIN_OBJ(element->tail)); if (self->extra) { - + if (element_resize(element, self->extra->length) < 0) { Py_DECREF(element); return NULL; @@ -618,7 +715,7 @@ element_copy(ElementObject* self, PyObject* args) } element->extra->length = self->extra->length; - + } return (PyObject*) element; @@ -654,14 +751,14 @@ element_deepcopy(ElementObject* self, PyObject* args) attrib = Py_None; } - element = (ElementObject*) element_new(tag, attrib); + element = (ElementObject*) create_new_element(tag, attrib); Py_DECREF(tag); Py_DECREF(attrib); if (!element) return NULL; - + text = deepcopy(JOIN_OBJ(self->text), memo); if (!text) goto error; @@ -675,7 +772,7 @@ element_deepcopy(ElementObject* self, PyObject* args) element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); if (self->extra) { - + if (element_resize(element, self->extra->length) < 0) goto error; @@ -689,11 +786,11 @@ element_deepcopy(ElementObject* self, PyObject* args) } element->extra->length = self->extra->length; - + } /* add object to memo dictionary (so deepcopy won't visit it again) */ - id = PyLong_FromLong((Py_uintptr_t) self); + id = PyLong_FromSsize_t((Py_uintptr_t) self); if (!id) goto error; @@ -711,6 +808,186 @@ element_deepcopy(ElementObject* self, PyObject* args) return NULL; } +static PyObject* +element_sizeof(PyObject* myself, PyObject* args) +{ + ElementObject *self = (ElementObject*)myself; + Py_ssize_t result = sizeof(ElementObject); + if (self->extra) { + result += sizeof(ElementObjectExtra); + if (self->extra->children != self->extra->_children) + result += sizeof(PyObject*) * self->extra->allocated; + } + return PyLong_FromSsize_t(result); +} + +/* dict keys for getstate/setstate. */ +#define PICKLED_TAG "tag" +#define PICKLED_CHILDREN "_children" +#define PICKLED_ATTRIB "attrib" +#define PICKLED_TAIL "tail" +#define PICKLED_TEXT "text" + +/* __getstate__ returns a fabricated instance dict as in the pure-Python + * Element implementation, for interoperability/interchangeability. This + * makes the pure-Python implementation details an API, but (a) there aren't + * any unnecessary structures there; and (b) it buys compatibility with 3.2 + * pickles. See issue #16076. + */ +static PyObject * +element_getstate(ElementObject *self) +{ + int i, noattrib; + PyObject *instancedict = NULL, *children; + + /* Build a list of children. */ + children = PyList_New(self->extra ? self->extra->length : 0); + if (!children) + return NULL; + for (i = 0; i < PyList_GET_SIZE(children); i++) { + PyObject *child = self->extra->children[i]; + Py_INCREF(child); + PyList_SET_ITEM(children, i, child); + } + + /* Construct the state object. */ + noattrib = (self->extra == NULL || self->extra->attrib == Py_None); + if (noattrib) + instancedict = Py_BuildValue("{sOsOs{}sOsO}", + PICKLED_TAG, self->tag, + PICKLED_CHILDREN, children, + PICKLED_ATTRIB, + PICKLED_TEXT, JOIN_OBJ(self->text), + PICKLED_TAIL, JOIN_OBJ(self->tail)); + else + instancedict = Py_BuildValue("{sOsOsOsOsO}", + PICKLED_TAG, self->tag, + PICKLED_CHILDREN, children, + PICKLED_ATTRIB, self->extra->attrib, + PICKLED_TEXT, JOIN_OBJ(self->text), + PICKLED_TAIL, JOIN_OBJ(self->tail)); + if (instancedict) { + Py_DECREF(children); + return instancedict; + } + else { + for (i = 0; i < PyList_GET_SIZE(children); i++) + Py_DECREF(PyList_GET_ITEM(children, i)); + Py_DECREF(children); + + return NULL; + } +} + +static PyObject * +element_setstate_from_attributes(ElementObject *self, + PyObject *tag, + PyObject *attrib, + PyObject *text, + PyObject *tail, + PyObject *children) +{ + Py_ssize_t i, nchildren; + + if (!tag) { + PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); + return NULL; + } + + Py_CLEAR(self->tag); + self->tag = tag; + Py_INCREF(self->tag); + + _clear_joined_ptr(&self->text); + self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; + Py_INCREF(JOIN_OBJ(self->text)); + + _clear_joined_ptr(&self->tail); + self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; + Py_INCREF(JOIN_OBJ(self->tail)); + + /* Handle ATTRIB and CHILDREN. */ + if (!children && !attrib) + Py_RETURN_NONE; + + /* Compute 'nchildren'. */ + if (children) { + if (!PyList_Check(children)) { + PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); + return NULL; + } + nchildren = PyList_Size(children); + } + else { + nchildren = 0; + } + + /* Allocate 'extra'. */ + if (element_resize(self, nchildren)) { + return NULL; + } + assert(self->extra && self->extra->allocated >= nchildren); + + /* Copy children */ + for (i = 0; i < nchildren; i++) { + self->extra->children[i] = PyList_GET_ITEM(children, i); + Py_INCREF(self->extra->children[i]); + } + + self->extra->length = nchildren; + self->extra->allocated = nchildren; + + /* Stash attrib. */ + if (attrib) { + Py_CLEAR(self->extra->attrib); + self->extra->attrib = attrib; + Py_INCREF(attrib); + } + + Py_RETURN_NONE; +} + +/* __setstate__ for Element instance from the Python implementation. + * 'state' should be the instance dict. + */ +static PyObject * +element_setstate_from_Python(ElementObject *self, PyObject *state) +{ + static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT, + PICKLED_TAIL, PICKLED_CHILDREN, 0}; + PyObject *args; + PyObject *tag, *attrib, *text, *tail, *children; + PyObject *retval; + + tag = attrib = text = tail = children = NULL; + args = PyTuple_New(0); + if (!args) + return NULL; + + if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, + &attrib, &text, &tail, &children)) + retval = element_setstate_from_attributes(self, tag, attrib, text, + tail, children); + else + retval = NULL; + + Py_DECREF(args); + return retval; +} + +static PyObject * +element_setstate(ElementObject *self, PyObject *state) +{ + if (!PyDict_CheckExact(state)) { + PyErr_Format(PyExc_TypeError, + "Don't know how to unpickle \"%.200R\" as an Element", + state); + return NULL; + } + else + return element_setstate_from_Python(self, state); +} + LOCAL(int) checkpath(PyObject* tag) { @@ -723,13 +1000,16 @@ checkpath(PyObject* tag) (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') if (PyUnicode_Check(tag)) { - Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); - for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { - if (p[i] == '{') + const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); + void *data = PyUnicode_DATA(tag); + unsigned int kind = PyUnicode_KIND(tag); + for (i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch == '{') check = 0; - else if (p[i] == '}') + else if (ch == '}') check = 1; - else if (check && PATHCHAR(p[i])) + else if (check && PATHCHAR(ch)) return 1; } return 0; @@ -772,6 +1052,15 @@ element_extend(ElementObject* self, PyObject* args) seqlen = PySequence_Size(seq); for (i = 0; i < seqlen; i++) { PyObject* element = PySequence_Fast_GET_ITEM(seq, i); + if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) { + Py_DECREF(seq); + PyErr_Format( + PyExc_TypeError, + "expected an Element, not \"%.200s\"", + Py_TYPE(element)->tp_name); + return NULL; + } + if (element_add_subelement(self, element) < 0) { Py_DECREF(seq); return NULL; @@ -784,23 +1073,27 @@ element_extend(ElementObject* self, PyObject* args) } static PyObject* -element_find(ElementObject* self, PyObject* args) +element_find(ElementObject *self, PyObject *args, PyObject *kwds) { int i; - PyObject* tag; PyObject* namespaces = Py_None; - if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) + static char *kwlist[] = {"path", "namespaces", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist, + &tag, &namespaces)) return NULL; - if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "find", "OOO", self, tag, namespaces + if (checkpath(tag) || namespaces != Py_None) { + _Py_IDENTIFIER(find); + return _PyObject_CallMethodId( + elementpath_obj, &PyId_find, "OOO", self, tag, namespaces ); + } if (!self->extra) Py_RETURN_NONE; - + for (i = 0; i < self->extra->length; i++) { PyObject* item = self->extra->children[i]; if (Element_CheckExact(item) && @@ -814,19 +1107,22 @@ element_find(ElementObject* self, PyObject* args) } static PyObject* -element_findtext(ElementObject* self, PyObject* args) +element_findtext(ElementObject *self, PyObject *args, PyObject *kwds) { int i; - PyObject* tag; PyObject* default_value = Py_None; PyObject* namespaces = Py_None; - if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) + _Py_IDENTIFIER(findtext); + static char *kwlist[] = {"path", "default", "namespaces", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist, + &tag, &default_value, &namespaces)) return NULL; if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces + return _PyObject_CallMethodId( + elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces ); if (!self->extra) { @@ -840,7 +1136,7 @@ element_findtext(ElementObject* self, PyObject* args) PyObject* text = element_get_text(item); if (text == Py_None) - return PyUnicode_FromString(""); + return PyUnicode_New(0, 0); Py_XINCREF(text); return text; } @@ -851,20 +1147,24 @@ element_findtext(ElementObject* self, PyObject* args) } static PyObject* -element_findall(ElementObject* self, PyObject* args) +element_findall(ElementObject *self, PyObject *args, PyObject *kwds) { int i; PyObject* out; - PyObject* tag; PyObject* namespaces = Py_None; - if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) + static char *kwlist[] = {"path", "namespaces", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist, + &tag, &namespaces)) return NULL; - if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "findall", "OOO", self, tag, namespaces + if (checkpath(tag) || namespaces != Py_None) { + _Py_IDENTIFIER(findall); + return _PyObject_CallMethodId( + elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces ); + } out = PyList_New(0); if (!out) @@ -888,26 +1188,33 @@ element_findall(ElementObject* self, PyObject* args) } static PyObject* -element_iterfind(ElementObject* self, PyObject* args) +element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds) { PyObject* tag; PyObject* namespaces = Py_None; - if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) + _Py_IDENTIFIER(iterfind); + static char *kwlist[] = {"path", "namespaces", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist, + &tag, &namespaces)) return NULL; - return PyObject_CallMethod( - elementpath_obj, "iterfind", "OOO", self, tag, namespaces + return _PyObject_CallMethodId( + elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces ); } static PyObject* -element_get(ElementObject* self, PyObject* args) +element_get(ElementObject* self, PyObject* args, PyObject* kwds) { PyObject* value; + static char* kwlist[] = {"key", "default", 0}; PyObject* key; PyObject* default_value = Py_None; - if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key, + &default_value)) return NULL; if (!self->extra || self->extra->attrib == Py_None) @@ -949,67 +1256,34 @@ element_getchildren(ElementObject* self, PyObject* args) return list; } -static PyObject* -element_iter(ElementObject* self, PyObject* args) -{ - PyObject* result; - - PyObject* tag = Py_None; - if (!PyArg_ParseTuple(args, "|O:iter", &tag)) - return NULL; - - if (!elementtree_iter_obj) { - PyErr_SetString( - PyExc_RuntimeError, - "iter helper not found" - ); - return NULL; - } - args = PyTuple_New(2); - if (!args) - return NULL; +static PyObject * +create_elementiter(ElementObject *self, PyObject *tag, int gettext); - Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); - Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag); - result = PyObject_CallObject(elementtree_iter_obj, args); +static PyObject * +element_iter(ElementObject *self, PyObject *args, PyObject *kwds) +{ + PyObject* tag = Py_None; + static char* kwlist[] = {"tag", 0}; - Py_DECREF(args); + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag)) + return NULL; - return result; + return create_elementiter(self, tag, 0); } static PyObject* element_itertext(ElementObject* self, PyObject* args) { - PyObject* result; - if (!PyArg_ParseTuple(args, ":itertext")) return NULL; - if (!elementtree_itertext_obj) { - PyErr_SetString( - PyExc_RuntimeError, - "itertext helper not found" - ); - return NULL; - } - - args = PyTuple_New(1); - if (!args) - return NULL; - - Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); - - result = PyObject_CallObject(elementtree_itertext_obj, args); - - Py_DECREF(args); - - return result; + return create_elementiter(self, Py_None, 1); } + static PyObject* element_getitem(PyObject* self_, Py_ssize_t index) { @@ -1039,7 +1313,7 @@ element_insert(ElementObject* self, PyObject* args) return NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); if (index < 0) { index += self->extra->length; @@ -1110,7 +1384,7 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw) if (!attrib) return NULL; - elem = element_new(tag, attrib); + elem = create_new_element(tag, attrib); Py_DECREF(attrib); @@ -1118,31 +1392,6 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw) } static PyObject* -element_reduce(ElementObject* self, PyObject* args) -{ - if (!PyArg_ParseTuple(args, ":__reduce__")) - return NULL; - - /* Hack alert: This method is used to work around a __copy__ - problem on certain 2.3 and 2.4 versions. To save time and - simplify the code, we create the copy in here, and use a dummy - copyelement helper to trick the copy module into doing the - right thing. */ - - if (!elementtree_copyelement_obj) { - PyErr_SetString( - PyExc_RuntimeError, - "copyelement helper not found" - ); - return NULL; - } - - return Py_BuildValue( - "O(N)", elementtree_copyelement_obj, element_copy(self, args) - ); -} - -static PyObject* element_remove(ElementObject* self, PyObject* args) { int i; @@ -1189,7 +1438,10 @@ element_remove(ElementObject* self, PyObject* args) static PyObject* element_repr(ElementObject* self) { - return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); + if (self->tag) + return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); + else + return PyUnicode_FromFormat("<Element at %p>", self); } static PyObject* @@ -1203,7 +1455,7 @@ element_set(ElementObject* self, PyObject* args) return NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); attrib = element_get_attrib(self); if (!attrib) @@ -1250,13 +1502,8 @@ element_subscr(PyObject* self_, PyObject* item) { ElementObject* self = (ElementObject*) self_; -#if (PY_VERSION_HEX < 0x02050000) - if (PyInt_Check(item) || PyLong_Check(item)) { - long i = PyInt_AsLong(item); -#else if (PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); -#endif if (i == -1 && PyErr_Occurred()) { return NULL; @@ -1307,13 +1554,8 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) { ElementObject* self = (ElementObject*) self_; -#if (PY_VERSION_HEX < 0x02050000) - if (PyInt_Check(item) || PyLong_Check(item)) { - long i = PyInt_AsLong(item); -#else if (PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); -#endif if (i == -1 && PyErr_Occurred()) { return -1; @@ -1329,7 +1571,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) PyObject* seq = NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); if (PySlice_GetIndicesEx(item, self->extra->length, @@ -1337,9 +1579,74 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) return -1; } - if (value == NULL) - newlen = 0; + if (value == NULL) { + /* Delete slice */ + size_t cur; + Py_ssize_t i; + + if (slicelen <= 0) + return 0; + + /* Since we're deleting, the direction of the range doesn't matter, + * so for simplicity make it always ascending. + */ + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelen - 1) - 1; + step = -step; + } + + assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *)); + + /* recycle is a list that will contain all the children + * scheduled for removal. + */ + if (!(recycle = PyList_New(slicelen))) { + PyErr_NoMemory(); + return -1; + } + + /* This loop walks over all the children that have to be deleted, + * with cur pointing at them. num_moved is the amount of children + * until the next deleted child that have to be "shifted down" to + * occupy the deleted's places. + * Note that in the ith iteration, shifting is done i+i places down + * because i children were already removed. + */ + for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { + /* Compute how many children have to be moved, clipping at the + * list end. + */ + Py_ssize_t num_moved = step - 1; + if (cur + step >= (size_t)self->extra->length) { + num_moved = self->extra->length - cur - 1; + } + + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + + memmove( + self->extra->children + cur - i, + self->extra->children + cur + 1, + num_moved * sizeof(PyObject *)); + } + + /* Leftover "tail" after the last removed child */ + cur = start + (size_t)slicelen * step; + if (cur < (size_t)self->extra->length) { + memmove( + self->extra->children + cur - slicelen, + self->extra->children + cur, + (self->extra->length - cur) * sizeof(PyObject *)); + } + + self->extra->length -= slicelen; + + /* Discard the recycle list with all the deleted sub-elements */ + Py_XDECREF(recycle); + return 0; + } else { + /* A new slice is actually being assigned */ seq = PySequence_Fast(value, ""); if (!seq) { PyErr_Format( @@ -1354,19 +1661,13 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) if (step != 1 && newlen != slicelen) { PyErr_Format(PyExc_ValueError, -#if (PY_VERSION_HEX < 0x02050000) - "attempt to assign sequence of size %d " - "to extended slice of size %d", -#else "attempt to assign sequence of size %zd " "to extended slice of size %zd", -#endif newlen, slicelen ); return -1; } - /* Resize before creating the recycle bin, to prevent refleaks. */ if (newlen > slicelen) { if (element_resize(self, newlen - slicelen) < 0) { @@ -1431,25 +1732,25 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) static PyMethodDef element_methods[] = { - {"clear", (PyCFunction) element_clear, METH_VARARGS}, + {"clear", (PyCFunction) element_clearmethod, METH_VARARGS}, - {"get", (PyCFunction) element_get, METH_VARARGS}, + {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS}, {"set", (PyCFunction) element_set, METH_VARARGS}, - {"find", (PyCFunction) element_find, METH_VARARGS}, - {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, - {"findall", (PyCFunction) element_findall, METH_VARARGS}, + {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS}, + {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS}, + {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS}, {"append", (PyCFunction) element_append, METH_VARARGS}, {"extend", (PyCFunction) element_extend, METH_VARARGS}, {"insert", (PyCFunction) element_insert, METH_VARARGS}, {"remove", (PyCFunction) element_remove, METH_VARARGS}, - {"iter", (PyCFunction) element_iter, METH_VARARGS}, + {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS}, {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, - {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS}, + {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS}, - {"getiterator", (PyCFunction) element_iter, METH_VARARGS}, + {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS}, {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, {"items", (PyCFunction) element_items, METH_VARARGS}, @@ -1459,18 +1760,9 @@ static PyMethodDef element_methods[] = { {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, - - /* Some 2.3 and 2.4 versions do not handle the __copy__ method on - C objects correctly, so we have to fake it using a __reduce__- - based hack (see the element_reduce implementation above for - details). */ - - /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're - using a runtime test to figure out if we need to fake things - or now (see the init code below). The following entry is - enabled only if the hack is needed. */ - - {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, + {"__sizeof__", element_sizeof, METH_NOARGS}, + {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS}, + {"__setstate__", (PyCFunction)element_setstate, METH_O}, {NULL, NULL} }; @@ -1483,7 +1775,7 @@ element_getattro(ElementObject* self, PyObject* nameobj) if (PyUnicode_Check(nameobj)) name = _PyUnicode_AsString(nameobj); - + if (name == NULL) return NULL; @@ -1510,7 +1802,7 @@ element_getattro(ElementObject* self, PyObject* nameobj) } else if (strcmp(name, "attrib") == 0) { PyErr_Clear(); if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); res = element_get_attrib(self); } @@ -1522,17 +1814,15 @@ element_getattro(ElementObject* self, PyObject* nameobj) } static int -element_setattr(ElementObject* self, const char* name, PyObject* value) +element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value) { - if (value == NULL) { - PyErr_SetString( - PyExc_AttributeError, - "can't delete element attributes" - ); - return -1; - } + char *name = ""; + if (PyUnicode_Check(nameobj)) + name = _PyUnicode_AsString(nameobj); - if (strcmp(name, "tag") == 0) { + if (name == NULL) { + return -1; + } else if (strcmp(name, "tag") == 0) { Py_DECREF(self->tag); self->tag = value; Py_INCREF(self->tag); @@ -1546,12 +1836,13 @@ element_setattr(ElementObject* self, const char* name, PyObject* value) Py_INCREF(self->tail); } else if (strcmp(name, "attrib") == 0) { if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); Py_DECREF(self->extra->attrib); self->extra->attrib = value; Py_INCREF(self->extra->attrib); } else { - PyErr_SetString(PyExc_AttributeError, name); + PyErr_SetString(PyExc_AttributeError, + "Can't set arbitrary attributes on Element"); return -1; } @@ -1576,136 +1867,504 @@ static PyMappingMethods element_as_mapping = { static PyTypeObject Element_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "Element", sizeof(ElementObject), 0, + "xml.etree.ElementTree.Element", sizeof(ElementObject), 0, /* methods */ - (destructor)element_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - (setattrfunc)element_setattr, /* tp_setattr */ - 0, /* tp_reserved */ - (reprfunc)element_repr, /* tp_repr */ - 0, /* tp_as_number */ - &element_as_sequence, /* tp_as_sequence */ - &element_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - (getattrofunc)element_getattro, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - element_methods, /* tp_methods */ - 0, /* tp_members */ + (destructor)element_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + (reprfunc)element_repr, /* tp_repr */ + 0, /* tp_as_number */ + &element_as_sequence, /* tp_as_sequence */ + &element_as_mapping, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)element_getattro, /* tp_getattro */ + (setattrofunc)element_setattro, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)element_gc_traverse, /* tp_traverse */ + (inquiry)element_gc_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + element_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)element_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + element_new, /* tp_new */ + 0, /* tp_free */ }; +/******************************* Element iterator ****************************/ + +/* ElementIterObject represents the iteration state over an XML element in + * pre-order traversal. To keep track of which sub-element should be returned + * next, a stack of parents is maintained. This is a standard stack-based + * iterative pre-order traversal of a tree. + * The stack is managed using a single-linked list starting at parent_stack. + * Each stack node contains the saved parent to which we should return after + * the current one is exhausted, and the next child to examine in that parent. + */ +typedef struct ParentLocator_t { + ElementObject *parent; + Py_ssize_t child_index; + struct ParentLocator_t *next; +} ParentLocator; + +typedef struct { + PyObject_HEAD + ParentLocator *parent_stack; + ElementObject *root_element; + PyObject *sought_tag; + int root_done; + int gettext; +} ElementIterObject; + + +static void +elementiter_dealloc(ElementIterObject *it) +{ + ParentLocator *p = it->parent_stack; + while (p) { + ParentLocator *temp = p; + Py_XDECREF(p->parent); + p = p->next; + PyObject_Free(temp); + } + + Py_XDECREF(it->sought_tag); + Py_XDECREF(it->root_element); + + PyObject_GC_UnTrack(it); + PyObject_GC_Del(it); +} + +static int +elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg) +{ + ParentLocator *p = it->parent_stack; + while (p) { + Py_VISIT(p->parent); + p = p->next; + } + + Py_VISIT(it->root_element); + Py_VISIT(it->sought_tag); + return 0; +} + +/* Helper function for elementiter_next. Add a new parent to the parent stack. + */ +static ParentLocator * +parent_stack_push_new(ParentLocator *stack, ElementObject *parent) +{ + ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator)); + if (new_node) { + new_node->parent = parent; + Py_INCREF(parent); + new_node->child_index = 0; + new_node->next = stack; + } + return new_node; +} + +static PyObject * +elementiter_next(ElementIterObject *it) +{ + /* Sub-element iterator. + * + * A short note on gettext: this function serves both the iter() and + * itertext() methods to avoid code duplication. However, there are a few + * small differences in the way these iterations work. Namely: + * - itertext() only yields text from nodes that have it, and continues + * iterating when a node doesn't have text (so it doesn't return any + * node like iter()) + * - itertext() also has to handle tail, after finishing with all the + * children of a node. + */ + ElementObject *cur_parent; + Py_ssize_t child_index; + + while (1) { + /* Handle the case reached in the beginning and end of iteration, where + * the parent stack is empty. The root_done flag gives us indication + * whether we've just started iterating (so root_done is 0), in which + * case the root is returned. If root_done is 1 and we're here, the + * iterator is exhausted. + */ + if (!it->parent_stack->parent) { + if (it->root_done) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } else { + it->parent_stack = parent_stack_push_new(it->parent_stack, + it->root_element); + if (!it->parent_stack) { + PyErr_NoMemory(); + return NULL; + } + + it->root_done = 1; + if (it->sought_tag == Py_None || + PyObject_RichCompareBool(it->root_element->tag, + it->sought_tag, Py_EQ) == 1) { + if (it->gettext) { + PyObject *text = element_get_text(it->root_element); + if (!text) + return NULL; + if (PyObject_IsTrue(text)) { + Py_INCREF(text); + return text; + } + } else { + Py_INCREF(it->root_element); + return (PyObject *)it->root_element; + } + } + } + } + + /* See if there are children left to traverse in the current parent. If + * yes, visit the next child. If not, pop the stack and try again. + */ + cur_parent = it->parent_stack->parent; + child_index = it->parent_stack->child_index; + if (cur_parent->extra && child_index < cur_parent->extra->length) { + ElementObject *child = (ElementObject *) + cur_parent->extra->children[child_index]; + it->parent_stack->child_index++; + it->parent_stack = parent_stack_push_new(it->parent_stack, + child); + if (!it->parent_stack) { + PyErr_NoMemory(); + return NULL; + } + + if (it->gettext) { + PyObject *text = element_get_text(child); + if (!text) + return NULL; + if (PyObject_IsTrue(text)) { + Py_INCREF(text); + return text; + } + } else if (it->sought_tag == Py_None || + PyObject_RichCompareBool(child->tag, + it->sought_tag, Py_EQ) == 1) { + Py_INCREF(child); + return (PyObject *)child; + } + else + continue; + } + else { + PyObject *tail; + ParentLocator *next = it->parent_stack->next; + if (it->gettext) { + tail = element_get_tail(cur_parent); + if (!tail) + return NULL; + } + else + tail = Py_None; + Py_XDECREF(it->parent_stack->parent); + PyObject_Free(it->parent_stack); + it->parent_stack = next; + + /* Note that extra condition on it->parent_stack->parent here; + * this is because itertext() is supposed to only return *inner* + * text, not text following the element it began iteration with. + */ + if (it->parent_stack->parent && PyObject_IsTrue(tail)) { + Py_INCREF(tail); + return tail; + } + } + } + + return NULL; +} + + +static PyTypeObject ElementIter_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + /* Using the module's name since the pure-Python implementation does not + have such a type. */ + "_elementtree._element_iterator", /* tp_name */ + sizeof(ElementIterObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)elementiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)elementiter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)elementiter_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +static PyObject * +create_elementiter(ElementObject *self, PyObject *tag, int gettext) +{ + ElementIterObject *it; + PyObject *star = NULL; + + it = PyObject_GC_New(ElementIterObject, &ElementIter_Type); + if (!it) + return NULL; + if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) { + PyObject_GC_Del(it); + return NULL; + } + + it->parent_stack->parent = NULL; + it->parent_stack->child_index = 0; + it->parent_stack->next = NULL; + + if (PyUnicode_Check(tag)) + star = PyUnicode_FromString("*"); + else if (PyBytes_Check(tag)) + star = PyBytes_FromString("*"); + + if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1) + tag = Py_None; + + Py_XDECREF(star); + it->sought_tag = tag; + it->root_done = 0; + it->gettext = gettext; + it->root_element = self; + + Py_INCREF(self); + Py_INCREF(tag); + + PyObject_GC_Track(it); + return (PyObject *)it; +} + + /* ==================================================================== */ /* the tree builder type */ typedef struct { PyObject_HEAD - PyObject* root; /* root node (first created node) */ + PyObject *root; /* root node (first created node) */ - ElementObject* this; /* current node */ - ElementObject* last; /* most recently created node */ + PyObject *this; /* current node */ + PyObject *last; /* most recently created node */ - PyObject* data; /* data collector (string or list), or NULL */ + PyObject *data; /* data collector (string or list), or NULL */ - PyObject* stack; /* element stack */ - Py_ssize_t index; /* current stack size (0=empty) */ + PyObject *stack; /* element stack */ + Py_ssize_t index; /* current stack size (0 means empty) */ - /* element tracing */ - PyObject* events; /* list of events, or NULL if not collecting */ - PyObject* start_event_obj; /* event objects (NULL to ignore) */ - PyObject* end_event_obj; - PyObject* start_ns_event_obj; - PyObject* end_ns_event_obj; + PyObject *element_factory; + /* element tracing */ + PyObject *events; /* list of events, or NULL if not collecting */ + PyObject *start_event_obj; /* event objects (NULL to ignore) */ + PyObject *end_event_obj; + PyObject *start_ns_event_obj; + PyObject *end_ns_event_obj; } TreeBuilderObject; -static PyTypeObject TreeBuilder_Type; - #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) /* -------------------------------------------------------------------- */ /* constructor and destructor */ -LOCAL(PyObject*) -treebuilder_new(void) +static PyObject * +treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - TreeBuilderObject* self; + TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0); + if (t != NULL) { + t->root = NULL; - self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); - if (self == NULL) - return NULL; - - self->root = NULL; + Py_INCREF(Py_None); + t->this = Py_None; + Py_INCREF(Py_None); + t->last = Py_None; + + t->data = NULL; + t->element_factory = NULL; + t->stack = PyList_New(20); + if (!t->stack) { + Py_DECREF(t->this); + Py_DECREF(t->last); + Py_DECREF((PyObject *) t); + return NULL; + } + t->index = 0; - Py_INCREF(Py_None); - self->this = (ElementObject*) Py_None; + t->events = NULL; + t->start_event_obj = t->end_event_obj = NULL; + t->start_ns_event_obj = t->end_ns_event_obj = NULL; + } + return (PyObject *)t; +} - Py_INCREF(Py_None); - self->last = (ElementObject*) Py_None; +static int +treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"element_factory", 0}; + PyObject *element_factory = NULL; + TreeBuilderObject *self_tb = (TreeBuilderObject *)self; + PyObject *tmp; - self->data = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist, + &element_factory)) { + return -1; + } - self->stack = PyList_New(20); - self->index = 0; + if (element_factory) { + Py_INCREF(element_factory); + tmp = self_tb->element_factory; + self_tb->element_factory = element_factory; + Py_XDECREF(tmp); + } - self->events = NULL; - self->start_event_obj = self->end_event_obj = NULL; - self->start_ns_event_obj = self->end_ns_event_obj = NULL; + return 0; +} - ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); +static int +treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->root); + Py_VISIT(self->this); + Py_VISIT(self->last); + Py_VISIT(self->data); + Py_VISIT(self->stack); + Py_VISIT(self->element_factory); + return 0; +} - return (PyObject*) self; +static int +treebuilder_gc_clear(TreeBuilderObject *self) +{ + Py_CLEAR(self->end_ns_event_obj); + Py_CLEAR(self->start_ns_event_obj); + Py_CLEAR(self->end_event_obj); + Py_CLEAR(self->start_event_obj); + Py_CLEAR(self->events); + Py_CLEAR(self->stack); + Py_CLEAR(self->data); + Py_CLEAR(self->last); + Py_CLEAR(self->this); + Py_CLEAR(self->element_factory); + Py_CLEAR(self->root); + return 0; } -static PyObject* -treebuilder(PyObject* self_, PyObject* args) +static void +treebuilder_dealloc(TreeBuilderObject *self) { - if (!PyArg_ParseTuple(args, ":TreeBuilder")) - return NULL; - - return treebuilder_new(); + PyObject_GC_UnTrack(self); + treebuilder_gc_clear(self); + Py_TYPE(self)->tp_free((PyObject *)self); } -static void -treebuilder_dealloc(TreeBuilderObject* self) -{ - Py_XDECREF(self->end_ns_event_obj); - Py_XDECREF(self->start_ns_event_obj); - Py_XDECREF(self->end_event_obj); - Py_XDECREF(self->start_event_obj); - Py_XDECREF(self->events); - Py_DECREF(self->stack); - Py_XDECREF(self->data); - Py_DECREF(self->last); - Py_DECREF(self->this); - Py_XDECREF(self->root); +/* -------------------------------------------------------------------- */ +/* helpers for handling of arbitrary element-like objects */ - RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); +static int +treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data, + PyObject **dest, _Py_Identifier *name) +{ + if (Element_CheckExact(element)) { + Py_DECREF(JOIN_OBJ(*dest)); + *dest = JOIN_SET(data, PyList_CheckExact(data)); + return 0; + } + else { + PyObject *joined = list_join(data); + int r; + if (joined == NULL) + return -1; + r = _PyObject_SetAttrId(element, name, joined); + Py_DECREF(joined); + return r; + } +} - PyObject_Del(self); +/* These two functions steal a reference to data */ +static int +treebuilder_set_element_text(PyObject *element, PyObject *data) +{ + _Py_IDENTIFIER(text); + return treebuilder_set_element_text_or_tail( + element, data, &((ElementObject *) element)->text, &PyId_text); } -/* -------------------------------------------------------------------- */ -/* handlers */ +static int +treebuilder_set_element_tail(PyObject *element, PyObject *data) +{ + _Py_IDENTIFIER(tail); + return treebuilder_set_element_text_or_tail( + element, data, &((ElementObject *) element)->tail, &PyId_tail); +} -LOCAL(PyObject*) -treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, - PyObject* standalone) +static int +treebuilder_add_subelement(PyObject *element, PyObject *child) { - Py_RETURN_NONE; + _Py_IDENTIFIER(append); + if (Element_CheckExact(element)) { + ElementObject *elem = (ElementObject *) element; + return element_add_subelement(elem, child); + } + else { + PyObject *res; + res = _PyObject_CallMethodId(element, &PyId_append, "O", child); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; + } } +/* -------------------------------------------------------------------- */ +/* handlers */ + LOCAL(PyObject*) treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, PyObject* attrib) @@ -1715,27 +2374,29 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, if (self->data) { if (self->this == self->last) { - Py_DECREF(JOIN_OBJ(self->last->text)); - self->last->text = JOIN_SET( - self->data, PyList_CheckExact(self->data) - ); - } else { - Py_DECREF(JOIN_OBJ(self->last->tail)); - self->last->tail = JOIN_SET( - self->data, PyList_CheckExact(self->data) - ); + if (treebuilder_set_element_text(self->last, self->data)) + return NULL; + } + else { + if (treebuilder_set_element_tail(self->last, self->data)) + return NULL; } self->data = NULL; } - node = element_new(tag, attrib); - if (!node) + if (self->element_factory && self->element_factory != Py_None) { + node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib); + } else { + node = create_new_element(tag, attrib); + } + if (!node) { return NULL; + } - this = (PyObject*) self->this; + this = self->this; if (this != Py_None) { - if (element_add_subelement((ElementObject*) this, node) < 0) + if (treebuilder_add_subelement(this, node) < 0) goto error; } else { if (self->root) { @@ -1761,19 +2422,17 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, Py_DECREF(this); Py_INCREF(node); - self->this = (ElementObject*) node; + self->this = node; Py_DECREF(self->last); Py_INCREF(node); - self->last = (ElementObject*) node; + self->last = node; if (self->start_event_obj) { PyObject* res; PyObject* action = self->start_event_obj; - res = PyTuple_New(2); + res = PyTuple_Pack(2, action, node); if (res) { - Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); - Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); PyList_Append(self->events, res); Py_DECREF(res); } else @@ -1791,7 +2450,7 @@ LOCAL(PyObject*) treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) { if (!self->data) { - if (self->last == (ElementObject*) Py_None) { + if (self->last == Py_None) { /* ignore calls to data before the first call to start */ Py_RETURN_NONE; } @@ -1801,6 +2460,7 @@ treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) /* more than one item; use a list to collect items */ if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { + /* XXX this code path unused in Python 3? */ /* expat often generates single character data sections; handle the most common case by resizing the existing string... */ Py_ssize_t size = PyBytes_GET_SIZE(self->data); @@ -1830,15 +2490,11 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) if (self->data) { if (self->this == self->last) { - Py_DECREF(JOIN_OBJ(self->last->text)); - self->last->text = JOIN_SET( - self->data, PyList_CheckExact(self->data) - ); + if (treebuilder_set_element_text(self->last, self->data)) + return NULL; } else { - Py_DECREF(JOIN_OBJ(self->last->tail)); - self->last->tail = JOIN_SET( - self->data, PyList_CheckExact(self->data) - ); + if (treebuilder_set_element_tail(self->last, self->data)) + return NULL; } self->data = NULL; } @@ -1858,17 +2514,15 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) Py_DECREF(self->last); - self->last = (ElementObject*) self->this; - self->this = (ElementObject*) item; + self->last = self->this; + self->this = item; if (self->end_event_obj) { PyObject* res; PyObject* action = self->end_event_obj; PyObject* node = (PyObject*) self->last; - res = PyTuple_New(2); + res = PyTuple_Pack(2, action, node); if (res) { - Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); - Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node); PyList_Append(self->events, res); Py_DECREF(res); } else @@ -1914,8 +2568,12 @@ treebuilder_handle_namespace(TreeBuilderObject* self, int start, PyTuple_SET_ITEM(res, 1, parcel); PyList_Append(self->events, res); Py_DECREF(res); - } else + } + else { + Py_DECREF(action); + Py_DECREF(parcel); PyErr_Clear(); /* FIXME: propagate error */ + } } /* -------------------------------------------------------------------- */ @@ -1977,55 +2635,54 @@ treebuilder_start(TreeBuilderObject* self, PyObject* args) return treebuilder_handle_start(self, tag, attrib); } -static PyObject* -treebuilder_xml(TreeBuilderObject* self, PyObject* args) -{ - PyObject* encoding; - PyObject* standalone; - if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) - return NULL; - - return treebuilder_handle_xml(self, encoding, standalone); -} - static PyMethodDef treebuilder_methods[] = { {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, - {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, {NULL, NULL} }; static PyTypeObject TreeBuilder_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "TreeBuilder", sizeof(TreeBuilderObject), 0, + "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0, /* methods */ - (destructor)treebuilder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - treebuilder_methods, /* tp_methods */ - 0, /* tp_members */ + (destructor)treebuilder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)treebuilder_gc_traverse, /* tp_traverse */ + (inquiry)treebuilder_gc_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + treebuilder_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)treebuilder_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + treebuilder_new, /* tp_new */ + 0, /* tp_free */ }; /* ==================================================================== */ @@ -2034,39 +2691,36 @@ static PyTypeObject TreeBuilder_Type = { #if defined(USE_EXPAT) #include "expat.h" - -#if defined(USE_PYEXPAT_CAPI) #include "pyexpat.h" -static struct PyExpat_CAPI* expat_capi; +static struct PyExpat_CAPI *expat_capi; #define EXPAT(func) (expat_capi->func) -#else -#define EXPAT(func) (XML_##func) -#endif + +static XML_Memory_Handling_Suite ExpatMemoryHandler = { + PyObject_Malloc, PyObject_Realloc, PyObject_Free}; typedef struct { PyObject_HEAD XML_Parser parser; - PyObject* target; - PyObject* entity; - - PyObject* names; + PyObject *target; + PyObject *entity; - PyObject* handle_xml; + PyObject *names; - PyObject* handle_start; - PyObject* handle_data; - PyObject* handle_end; + PyObject *handle_start; + PyObject *handle_data; + PyObject *handle_end; - PyObject* handle_comment; - PyObject* handle_pi; + PyObject *handle_comment; + PyObject *handle_pi; + PyObject *handle_doctype; - PyObject* handle_close; + PyObject *handle_close; } XMLParserObject; -static PyTypeObject XMLParser_Type; +#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type) /* helpers */ @@ -2076,7 +2730,7 @@ makeuniversal(XMLParserObject* self, const char* string) /* convert a UTF-8 tag/attribute name from the expat parser to a universal name string */ - int size = strlen(string); + Py_ssize_t size = (Py_ssize_t) strlen(string); PyObject* key; PyObject* value; @@ -2095,7 +2749,7 @@ makeuniversal(XMLParserObject* self, const char* string) PyObject* tag; char* p; - int i; + Py_ssize_t i; /* look for namespace separator */ for (i = 0; i < size; i++) @@ -2113,7 +2767,7 @@ makeuniversal(XMLParserObject* self, const char* string) Py_INCREF(key); tag = key; } - + /* decode universal name */ p = PyBytes_AS_STRING(tag); value = PyUnicode_DecodeUTF8(p, size, "strict"); @@ -2135,20 +2789,39 @@ makeuniversal(XMLParserObject* self, const char* string) return value; } +/* Set the ParseError exception with the given parameters. + * If message is not NULL, it's used as the error string. Otherwise, the + * message string is the default for the given error_code. +*/ static void -expat_set_error(const char* message, int line, int column) +expat_set_error(enum XML_Error error_code, int line, int column, char *message) { - PyObject *error; - PyObject *position; - char buffer[256]; + PyObject *errmsg, *error, *position, *code; - sprintf(buffer, "%.100s: line %d, column %d", message, line, column); + errmsg = PyUnicode_FromFormat("%s: line %d, column %d", + message ? message : EXPAT(ErrorString)(error_code), + line, column); + if (errmsg == NULL) + return; - error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); + error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg); + Py_DECREF(errmsg); if (!error) return; - /* add position attribute */ + /* Add code and position attributes */ + code = PyLong_FromLong((long)error_code); + if (!code) { + Py_DECREF(error); + return; + } + if (PyObject_SetAttrString(error, "code", code) == -1) { + Py_DECREF(error); + Py_DECREF(code); + return; + } + Py_DECREF(code); + position = Py_BuildValue("(ii)", line, column); if (!position) { Py_DECREF(error); @@ -2200,9 +2873,10 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, char message[128] = "undefined entity "; strncat(message, data_in, data_len < 100?data_len:100); expat_set_error( - message, + XML_ERROR_UNDEFINED_ENTITY, EXPAT(GetErrorLineNumber)(self->parser), - EXPAT(GetErrorColumnNumber)(self->parser) + EXPAT(GetErrorColumnNumber)(self->parser), + message ); } @@ -2247,21 +2921,18 @@ expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, attrib_in += 2; } } else { - Py_INCREF(Py_None); - attrib = Py_None; + /* Pass an empty dictionary on */ + attrib = PyDict_New(); + if (!attrib) + return; } - if (TreeBuilder_CheckExact(self->target)) + if (TreeBuilder_CheckExact(self->target)) { /* shortcut */ res = treebuilder_handle_start((TreeBuilderObject*) self->target, tag, attrib); + } else if (self->handle_start) { - if (attrib == Py_None) { - Py_DECREF(attrib); - attrib = PyDict_New(); - if (!attrib) - return; - } res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); } else res = NULL; @@ -2326,7 +2997,10 @@ expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, PyObject* sprefix = NULL; PyObject* suri = NULL; - suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict"); + if (uri) + suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict"); + else + suri = PyUnicode_FromString(""); if (!suri) return; @@ -2372,6 +3046,78 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) } static void +expat_start_doctype_handler(XMLParserObject *self, + const XML_Char *doctype_name, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ + PyObject *self_pyobj = (PyObject *)self; + PyObject *doctype_name_obj, *sysid_obj, *pubid_obj; + PyObject *parser_doctype = NULL; + PyObject *res = NULL; + + doctype_name_obj = makeuniversal(self, doctype_name); + if (!doctype_name_obj) + return; + + if (sysid) { + sysid_obj = makeuniversal(self, sysid); + if (!sysid_obj) { + Py_DECREF(doctype_name_obj); + return; + } + } else { + Py_INCREF(Py_None); + sysid_obj = Py_None; + } + + if (pubid) { + pubid_obj = makeuniversal(self, pubid); + if (!pubid_obj) { + Py_DECREF(doctype_name_obj); + Py_DECREF(sysid_obj); + return; + } + } else { + Py_INCREF(Py_None); + pubid_obj = Py_None; + } + + /* If the target has a handler for doctype, call it. */ + if (self->handle_doctype) { + res = PyObject_CallFunction(self->handle_doctype, "OOO", + doctype_name_obj, pubid_obj, sysid_obj); + Py_CLEAR(res); + } + + /* Now see if the parser itself has a doctype method. If yes and it's + * a subclass, call it but warn about deprecation. If it's not a subclass + * (i.e. vanilla XMLParser), do nothing. + */ + parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); + if (parser_doctype) { + if (!XMLParser_CheckExact(self_pyobj)) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "This method of XMLParser is deprecated. Define" + " doctype() method on the TreeBuilder target.", + 1) < 0) { + goto clear; + } + res = PyObject_CallFunction(parser_doctype, "OOO", + doctype_name_obj, pubid_obj, sysid_obj); + Py_CLEAR(res); + } + } + +clear: + Py_XDECREF(parser_doctype); + Py_DECREF(doctype_name_obj); + Py_DECREF(pubid_obj); + Py_DECREF(sysid_obj); +} + +static void expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, const XML_Char* data_in) { @@ -2394,185 +3140,160 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, } } -static int -expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, - XML_Encoding *info) -{ - PyObject* u; - Py_UNICODE* p; - unsigned char s[256]; - int i; - - memset(info, 0, sizeof(XML_Encoding)); - - for (i = 0; i < 256; i++) - s[i] = i; - - u = PyUnicode_Decode((char*) s, 256, name, "replace"); - if (!u) - return XML_STATUS_ERROR; - - if (PyUnicode_GET_SIZE(u) != 256) { - Py_DECREF(u); - return XML_STATUS_ERROR; - } - - p = PyUnicode_AS_UNICODE(u); +/* -------------------------------------------------------------------- */ - for (i = 0; i < 256; i++) { - if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) - info->map[i] = p[i]; - else - info->map[i] = -1; +static PyObject * +xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); + if (self) { + self->parser = NULL; + self->target = self->entity = self->names = NULL; + self->handle_start = self->handle_data = self->handle_end = NULL; + self->handle_comment = self->handle_pi = self->handle_close = NULL; + self->handle_doctype = NULL; } - - Py_DECREF(u); - - return XML_STATUS_OK; + return (PyObject *)self; } -/* -------------------------------------------------------------------- */ -/* constructor and destructor */ - -static PyObject* -xmlparser(PyObject* self_, PyObject* args, PyObject* kw) +static int +xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds) { - XMLParserObject* self; - /* FIXME: does this need to be static? */ - static XML_Memory_Handling_Suite memory_handler; + XMLParserObject *self_xp = (XMLParserObject *)self; + PyObject *target = NULL, *html = NULL; + char *encoding = NULL; + static char *kwlist[] = {"html", "target", "encoding", 0}; - PyObject* target = NULL; - char* encoding = NULL; - static char* kwlist[] = { "target", "encoding", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, - &target, &encoding)) - return NULL; - -#if defined(USE_PYEXPAT_CAPI) - if (!expat_capi) { - PyErr_SetString( - PyExc_RuntimeError, "cannot load dispatch table from pyexpat" - ); - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist, + &html, &target, &encoding)) { + return -1; } -#endif - self = PyObject_New(XMLParserObject, &XMLParser_Type); - if (self == NULL) - return NULL; + self_xp->entity = PyDict_New(); + if (!self_xp->entity) + return -1; - self->entity = PyDict_New(); - if (!self->entity) { - PyObject_Del(self); - return NULL; - } - - self->names = PyDict_New(); - if (!self->names) { - PyObject_Del(self->entity); - PyObject_Del(self); - return NULL; + self_xp->names = PyDict_New(); + if (!self_xp->names) { + Py_CLEAR(self_xp->entity); + return -1; } - memory_handler.malloc_fcn = PyObject_Malloc; - memory_handler.realloc_fcn = PyObject_Realloc; - memory_handler.free_fcn = PyObject_Free; - - self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); - if (!self->parser) { - PyObject_Del(self->names); - PyObject_Del(self->entity); - PyObject_Del(self); + self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); + if (!self_xp->parser) { + Py_CLEAR(self_xp->entity); + Py_CLEAR(self_xp->names); PyErr_NoMemory(); - return NULL; + return -1; } - /* setup target handlers */ - if (!target) { - target = treebuilder_new(); + if (target) { + Py_INCREF(target); + } else { + target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); if (!target) { - EXPAT(ParserFree)(self->parser); - PyObject_Del(self->names); - PyObject_Del(self->entity); - PyObject_Del(self); - return NULL; + Py_CLEAR(self_xp->entity); + Py_CLEAR(self_xp->names); + EXPAT(ParserFree)(self_xp->parser); + return -1; } - } else - Py_INCREF(target); - self->target = target; + } + self_xp->target = target; - self->handle_xml = PyObject_GetAttrString(target, "xml"); - self->handle_start = PyObject_GetAttrString(target, "start"); - self->handle_data = PyObject_GetAttrString(target, "data"); - self->handle_end = PyObject_GetAttrString(target, "end"); - self->handle_comment = PyObject_GetAttrString(target, "comment"); - self->handle_pi = PyObject_GetAttrString(target, "pi"); - self->handle_close = PyObject_GetAttrString(target, "close"); + self_xp->handle_start = PyObject_GetAttrString(target, "start"); + self_xp->handle_data = PyObject_GetAttrString(target, "data"); + self_xp->handle_end = PyObject_GetAttrString(target, "end"); + self_xp->handle_comment = PyObject_GetAttrString(target, "comment"); + self_xp->handle_pi = PyObject_GetAttrString(target, "pi"); + self_xp->handle_close = PyObject_GetAttrString(target, "close"); + self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype"); PyErr_Clear(); /* configure parser */ - EXPAT(SetUserData)(self->parser, self); + EXPAT(SetUserData)(self_xp->parser, self_xp); EXPAT(SetElementHandler)( - self->parser, + self_xp->parser, (XML_StartElementHandler) expat_start_handler, (XML_EndElementHandler) expat_end_handler ); EXPAT(SetDefaultHandlerExpand)( - self->parser, + self_xp->parser, (XML_DefaultHandler) expat_default_handler ); EXPAT(SetCharacterDataHandler)( - self->parser, + self_xp->parser, (XML_CharacterDataHandler) expat_data_handler ); - if (self->handle_comment) + if (self_xp->handle_comment) EXPAT(SetCommentHandler)( - self->parser, + self_xp->parser, (XML_CommentHandler) expat_comment_handler ); - if (self->handle_pi) + if (self_xp->handle_pi) EXPAT(SetProcessingInstructionHandler)( - self->parser, + self_xp->parser, (XML_ProcessingInstructionHandler) expat_pi_handler ); + EXPAT(SetStartDoctypeDeclHandler)( + self_xp->parser, + (XML_StartDoctypeDeclHandler) expat_start_doctype_handler + ); EXPAT(SetUnknownEncodingHandler)( - self->parser, - (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL + self_xp->parser, + EXPAT(DefaultUnknownEncodingHandler), NULL ); - ALLOC(sizeof(XMLParserObject), "create expatparser"); + return 0; +} - return (PyObject*) self; +static int +xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->handle_close); + Py_VISIT(self->handle_pi); + Py_VISIT(self->handle_comment); + Py_VISIT(self->handle_end); + Py_VISIT(self->handle_data); + Py_VISIT(self->handle_start); + + Py_VISIT(self->target); + Py_VISIT(self->entity); + Py_VISIT(self->names); + + return 0; } -static void -xmlparser_dealloc(XMLParserObject* self) +static int +xmlparser_gc_clear(XMLParserObject *self) { EXPAT(ParserFree)(self->parser); - Py_XDECREF(self->handle_close); - Py_XDECREF(self->handle_pi); - Py_XDECREF(self->handle_comment); - Py_XDECREF(self->handle_end); - Py_XDECREF(self->handle_data); - Py_XDECREF(self->handle_start); - Py_XDECREF(self->handle_xml); - - Py_DECREF(self->target); - Py_DECREF(self->entity); - Py_DECREF(self->names); + Py_CLEAR(self->handle_close); + Py_CLEAR(self->handle_pi); + Py_CLEAR(self->handle_comment); + Py_CLEAR(self->handle_end); + Py_CLEAR(self->handle_data); + Py_CLEAR(self->handle_start); + Py_CLEAR(self->handle_doctype); - RELEASE(sizeof(XMLParserObject), "destroy expatparser"); + Py_CLEAR(self->target); + Py_CLEAR(self->entity); + Py_CLEAR(self->names); - PyObject_Del(self); + return 0; } -/* -------------------------------------------------------------------- */ -/* methods (in alphabetical order) */ +static void +xmlparser_dealloc(XMLParserObject* self) +{ + PyObject_GC_UnTrack(self); + xmlparser_gc_clear(self); + Py_TYPE(self)->tp_free((PyObject *)self); +} LOCAL(PyObject*) -expat_parse(XMLParserObject* self, char* data, int data_len, int final) +expat_parse(XMLParserObject* self, const char* data, int data_len, int final) { int ok; @@ -2583,9 +3304,10 @@ expat_parse(XMLParserObject* self, char* data, int data_len, int final) if (!ok) { expat_set_error( - EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), + EXPAT(GetErrorCode)(self->parser), EXPAT(GetErrorLineNumber)(self->parser), - EXPAT(GetErrorColumnNumber)(self->parser) + EXPAT(GetErrorColumnNumber)(self->parser), + NULL ); return NULL; } @@ -2617,16 +3339,37 @@ xmlparser_close(XMLParserObject* self, PyObject* args) } static PyObject* -xmlparser_feed(XMLParserObject* self, PyObject* args) +xmlparser_feed(XMLParserObject* self, PyObject* arg) { /* feed data to parser */ - char* data; - int data_len; - if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) - return NULL; - - return expat_parse(self, data, data_len, 0); + if (PyUnicode_Check(arg)) { + Py_ssize_t data_len; + const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len); + if (data == NULL) + return NULL; + if (data_len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + /* Explicitly set UTF-8 encoding. Return code ignored. */ + (void)EXPAT(SetEncoding)(self->parser, "utf-8"); + return expat_parse(self, data, (int)data_len, 0); + } + else { + Py_buffer view; + PyObject *res; + if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0) + return NULL; + if (view.len > INT_MAX) { + PyBuffer_Release(&view); + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + res = expat_parse(self, view.buf, (int)view.len, 0); + PyBuffer_Release(&view); + return res; + } } static PyObject* @@ -2636,6 +3379,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) PyObject* reader; PyObject* buffer; + PyObject* temp; PyObject* res; PyObject* fileobj; @@ -2645,7 +3389,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) reader = PyObject_GetAttrString(fileobj, "read"); if (!reader) return NULL; - + /* read from open file object */ for (;;) { @@ -2657,7 +3401,22 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) return NULL; } - if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { + if (PyUnicode_CheckExact(buffer)) { + /* A unicode object is encoded into bytes using UTF-8 */ + if (PyUnicode_GET_SIZE(buffer) == 0) { + Py_DECREF(buffer); + break; + } + temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); + Py_DECREF(buffer); + if (!temp) { + /* Propagate exception from PyUnicode_AsEncodedString */ + Py_DECREF(reader); + return NULL; + } + buffer = temp; + } + else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { Py_DECREF(buffer); break; } @@ -2689,7 +3448,13 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) } static PyObject* -xmlparser_setevents(XMLParserObject* self, PyObject* args) +xmlparser_doctype(XMLParserObject *self, PyObject *args) +{ + Py_RETURN_NONE; +} + +static PyObject* +xmlparser_setevents(XMLParserObject *self, PyObject* args) { /* activate element event reporting */ @@ -2705,7 +3470,7 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) if (!TreeBuilder_CheckExact(self->target)) { PyErr_SetString( PyExc_TypeError, - "event handling only supported for cElementTree.Treebuilder " + "event handling only supported for ElementTree.TreeBuilder " "targets" ); return NULL; @@ -2718,10 +3483,10 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) target->events = events; /* clear out existing events */ - Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; - Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; - Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; - Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; + Py_CLEAR(target->start_event_obj); + Py_CLEAR(target->end_event_obj); + Py_CLEAR(target->start_ns_event_obj); + Py_CLEAR(target->end_ns_event_obj); if (event_set == Py_None) { /* default is "end" only */ @@ -2789,14 +3554,15 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) } static PyMethodDef xmlparser_methods[] = { - {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, + {"feed", (PyCFunction) xmlparser_feed, METH_O}, {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, + {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS}, {NULL, NULL} }; -static PyObject* +static PyObject* xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) { if (PyUnicode_Check(nameobj)) { @@ -2822,33 +3588,44 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) static PyTypeObject XMLParser_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "XMLParser", sizeof(XMLParserObject), 0, + "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0, /* methods */ - (destructor)xmlparser_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - (getattrofunc)xmlparser_getattro, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - xmlparser_methods, /* tp_methods */ - 0, /* tp_members */ + (destructor)xmlparser_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)xmlparser_getattro, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)xmlparser_gc_traverse, /* tp_traverse */ + (inquiry)xmlparser_gc_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + xmlparser_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)xmlparser_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + xmlparser_new, /* tp_new */ + 0, /* tp_free */ }; #endif @@ -2857,13 +3634,7 @@ static PyTypeObject XMLParser_Type = { /* python module interface */ static PyMethodDef _functions[] = { - {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, - {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, - {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, -#if defined(USE_EXPAT) - {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, - {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, -#endif + {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS}, {NULL, NULL} }; @@ -2883,11 +3654,11 @@ static struct PyModuleDef _elementtreemodule = { PyMODINIT_FUNC PyInit__elementtree(void) { - PyObject* m; - PyObject* g; - char* bootstrap; + PyObject *m, *temp; /* Initialize object types */ + if (PyType_Ready(&ElementIter_Type) < 0) + return NULL; if (PyType_Ready(&TreeBuilder_Type) < 0) return NULL; if (PyType_Ready(&Element_Type) < 0) @@ -2901,212 +3672,15 @@ PyInit__elementtree(void) if (!m) return NULL; - /* The code below requires that the module gets already added - to sys.modules. */ - PyDict_SetItemString(PyImport_GetModuleDict(), - _elementtreemodule.m_name, - m); - - /* python glue code */ - - g = PyDict_New(); - if (!g) - return NULL; - - PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); - - bootstrap = ( - - "from copy import copy, deepcopy\n" - - "try:\n" - " from xml.etree import ElementTree\n" - "except ImportError:\n" - " import ElementTree\n" - "ET = ElementTree\n" - "del ElementTree\n" - - "import _elementtree as cElementTree\n" - - "try:\n" /* check if copy works as is */ - " copy(cElementTree.Element('x'))\n" - "except:\n" - " def copyelement(elem):\n" - " return elem\n" - - "class CommentProxy:\n" - " def __call__(self, text=None):\n" - " element = cElementTree.Element(ET.Comment)\n" - " element.text = text\n" - " return element\n" - " def __eq__(self, other):\n" - " return ET.Comment == other\n" - "cElementTree.Comment = CommentProxy()\n" - - "class ElementTree(ET.ElementTree):\n" /* public */ - " def parse(self, source, parser=None):\n" - " close_source = False\n" - " if not hasattr(source, 'read'):\n" - " source = open(source, 'rb')\n" - " close_source = True\n" - " try:\n" - " if parser is not None:\n" - " while 1:\n" - " data = source.read(65536)\n" - " if not data:\n" - " break\n" - " parser.feed(data)\n" - " self._root = parser.close()\n" - " else:\n" - " parser = cElementTree.XMLParser()\n" - " self._root = parser._parse(source)\n" - " return self._root\n" - " finally:\n" - " if close_source:\n" - " source.close()\n" - "cElementTree.ElementTree = ElementTree\n" - - "def iter(node, tag=None):\n" /* helper */ - " if tag == '*':\n" - " tag = None\n" - " if tag is None or node.tag == tag:\n" - " yield node\n" - " for node in node:\n" - " for node in iter(node, tag):\n" - " yield node\n" - - "def itertext(node):\n" /* helper */ - " if node.text:\n" - " yield node.text\n" - " for e in node:\n" - " for s in e.itertext():\n" - " yield s\n" - " if e.tail:\n" - " yield e.tail\n" - - "def parse(source, parser=None):\n" /* public */ - " tree = ElementTree()\n" - " tree.parse(source, parser)\n" - " return tree\n" - "cElementTree.parse = parse\n" - - "class iterparse:\n" - " root = None\n" - " def __init__(self, file, events=None):\n" - " self._close_file = False\n" - " if not hasattr(file, 'read'):\n" - " file = open(file, 'rb')\n" - " self._close_file = True\n" - " self._file = file\n" - " self._events = []\n" - " self._index = 0\n" - " self._error = None\n" - " self.root = self._root = None\n" - " b = cElementTree.TreeBuilder()\n" - " self._parser = cElementTree.XMLParser(b)\n" - " self._parser._setevents(self._events, events)\n" - " def __next__(self):\n" - " while 1:\n" - " try:\n" - " item = self._events[self._index]\n" - " self._index += 1\n" - " return item\n" - " except IndexError:\n" - " pass\n" - " if self._error:\n" - " e = self._error\n" - " self._error = None\n" - " raise e\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" - " try:\n" - " self._parser.feed(data)\n" - " except SyntaxError as exc:\n" - " self._error = exc\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" - " def __iter__(self):\n" - " return self\n" - "cElementTree.iterparse = iterparse\n" - - "class PIProxy:\n" - " def __call__(self, target, text=None):\n" - " element = cElementTree.Element(ET.PI)\n" - " element.text = target\n" - " if text:\n" - " element.text = element.text + ' ' + text\n" - " return element\n" - " def __eq__(self, other):\n" - " return ET.PI == other\n" - "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n" - - "def XML(text):\n" /* public */ - " parser = cElementTree.XMLParser()\n" - " parser.feed(text)\n" - " return parser.close()\n" - "cElementTree.XML = cElementTree.fromstring = XML\n" - - "def XMLID(text):\n" /* public */ - " tree = XML(text)\n" - " ids = {}\n" - " for elem in tree.iter():\n" - " id = elem.get('id')\n" - " if id:\n" - " ids[id] = elem\n" - " return tree, ids\n" - "cElementTree.XMLID = XMLID\n" - - "try:\n" - " register_namespace = ET.register_namespace\n" - "except AttributeError:\n" - " def register_namespace(prefix, uri):\n" - " ET._namespace_map[uri] = prefix\n" - "cElementTree.register_namespace = register_namespace\n" - - "cElementTree.dump = ET.dump\n" - "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" - "cElementTree.iselement = ET.iselement\n" - "cElementTree.QName = ET.QName\n" - "cElementTree.tostring = ET.tostring\n" - "cElementTree.fromstringlist = ET.fromstringlist\n" - "cElementTree.tostringlist = ET.tostringlist\n" - "cElementTree.VERSION = '" VERSION "'\n" - "cElementTree.__version__ = '" VERSION "'\n" - - ); - - if (!PyRun_String(bootstrap, Py_file_input, g, NULL)) - return NULL; - - elementpath_obj = PyDict_GetItemString(g, "ElementPath"); - - elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); - if (elementtree_copyelement_obj) { - /* reduce hack needed; enable reduce method */ - PyMethodDef* mp; - for (mp = element_methods; mp->ml_name; mp++) - if (mp->ml_meth == (PyCFunction) element_reduce) { - mp->ml_name = "__reduce__"; - break; - } - } else - PyErr_Clear(); + if (!(temp = PyImport_ImportModule("copy"))) + return NULL; + elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); + Py_XDECREF(temp); - elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); - elementtree_iter_obj = PyDict_GetItemString(g, "iter"); - elementtree_itertext_obj = PyDict_GetItemString(g, "itertext"); + if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) + return NULL; -#if defined(USE_PYEXPAT_CAPI) - /* link against pyexpat, if possible */ + /* link against pyexpat */ expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); if (expat_capi) { /* check that it's usable */ @@ -3122,13 +3696,23 @@ PyInit__elementtree(void) } else { return NULL; } -#endif elementtree_parseerror_obj = PyErr_NewException( - "cElementTree.ParseError", PyExc_SyntaxError, NULL + "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL ); Py_INCREF(elementtree_parseerror_obj); PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); + Py_INCREF((PyObject *)&Element_Type); + PyModule_AddObject(m, "Element", (PyObject *)&Element_Type); + + Py_INCREF((PyObject *)&TreeBuilder_Type); + PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type); + +#if defined(USE_EXPAT) + Py_INCREF((PyObject *)&XMLParser_Type); + PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type); +#endif + return m; } |