diff options
| author | scoder <none@none> | 2009-03-21 16:01:01 +0100 |
|---|---|---|
| committer | scoder <none@none> | 2009-03-21 16:01:01 +0100 |
| commit | b28cc28cfc282e9b2c8df13203c33e4b7e3cd58e (patch) | |
| tree | 941acbf78dc4a7059a6521190b7a99ddeb595226 | |
| parent | eab87eac89589c1ae3a4708c0a9f12817aa29318 (diff) | |
| download | python-lxml-b28cc28cfc282e9b2c8df13203c33e4b7e3cd58e.tar.gz | |
[svn r4134] r5093@delle: sbehnel | 2009-03-21 15:54:31 +0100
support for standalone flag both in tree.docinfo and serialiser
--HG--
branch : trunk
| -rw-r--r-- | CHANGES.txt | 7 | ||||
| -rw-r--r-- | src/lxml/lxml.etree.pyx | 67 | ||||
| -rw-r--r-- | src/lxml/serializer.pxi | 24 | ||||
| -rw-r--r-- | src/lxml/tests/test_etree.py | 72 | ||||
| -rw-r--r-- | src/lxml/tree.pxd | 1 |
5 files changed, 153 insertions, 18 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 349adca4..dc73f851 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,13 @@ lxml changelog 2.2 (?) ======= +Features added +-------------- + +* Support for ``standalone`` flag in XML declaration through + ``tree.docinfo.standalone`` and by passing ``standalone=True/False`` + on serialisation. + Bugs fixed ---------- diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx index 0891f459..6ac64fec 100644 --- a/src/lxml/lxml.etree.pyx +++ b/src/lxml/lxml.etree.pyx @@ -317,6 +317,12 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: encoding = funicode(c_doc.encoding) return (version, encoding) + cdef isstandalone(self): + if self._c_doc.standalone == -1: + return None + else: + return <bint>(self._c_doc.standalone == 1) + cdef buildNewPrefix(self): if self._ns_counter < python.PyTuple_GET_SIZE(_PREFIX_CACHE): ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter) @@ -441,6 +447,17 @@ cdef class DocInfo: xml_version, encoding = self._doc.getxmlinfo() return encoding + property standalone: + u"""Returns the standalone flag as declared by the document. The possible + values are True (``standalone='yes'``), False + (``standalone='no'`` or flag not provided in the declaration), + and None (unknown or no declaration found). Note that a + normal truth test on this value will always tell if the + ``standalone`` flag was set to ``'yes'`` or not. + """ + def __get__(self): + return self._doc.isstandalone() + property URL: u"The source URL of the document (or None if unknown)." def __get__(self): @@ -1622,10 +1639,10 @@ cdef public class _ElementTree [ type LxmlElementTreeType, def write(self, file, *, encoding=None, method=u"xml", pretty_print=False, xml_declaration=None, with_tail=True, - compression=0): + standalone=None, compression=0): u"""write(self, file, encoding=None, method="xml", pretty_print=False, xml_declaration=None, with_tail=True, - compression=0) + standalone=None, compression=0) Write the tree to a filename, file or file-like object. @@ -1634,9 +1651,14 @@ cdef public class _ElementTree [ type LxmlElementTreeType, The keyword argument 'method' selects the output method: 'xml' or 'html'. + Passing a boolean value to the ``standalone`` option will + output an XML declaration with the corresponding + ``standalone`` flag. + The ``compression`` option enables GZip compression level 1-9. """ cdef bint write_declaration + cdef int is_standalone self._assertHasRoot() # suppress decl. in default case (purely for ElementTree compatibility) if xml_declaration is not None: @@ -1650,10 +1672,19 @@ cdef public class _ElementTree [ type LxmlElementTreeType, encoding = encoding.upper() write_declaration = encoding not in \ (u'US-ASCII', u'ASCII', u'UTF8', u'UTF-8') + if standalone is None: + is_standalone = -1 + elif standalone: + write_declaration = 1 + is_standalone = 1 + else: + write_declaration = 1 + is_standalone = 0 if compression is None or compression < 0: compression = 0 _tofilelike(file, self._context_node, encoding, method, - write_declaration, 1, pretty_print, with_tail, compression) + write_declaration, 1, pretty_print, with_tail, + is_standalone, compression) def getpath(self, _Element element not None): u"""getpath(self, element) @@ -2537,9 +2568,11 @@ def dump(_Element elem not None, *, pretty_print=True, with_tail=True): _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail) def tostring(element_or_tree, *, encoding=None, method=u"xml", - xml_declaration=None, pretty_print=False, with_tail=True): + xml_declaration=None, pretty_print=False, with_tail=True, + standalone=None): u"""tostring(element_or_tree, encoding=None, method="xml", - xml_declaration=None, pretty_print=False, with_tail=True) + xml_declaration=None, pretty_print=False, with_tail=True, + standalone=None) Serialize an element to an encoded string representation of its XML tree. @@ -2557,11 +2590,15 @@ def tostring(element_or_tree, *, encoding=None, method=u"xml", The keyword argument 'method' selects the output method: 'xml', 'html' or plain 'text'. + Passing a boolean value to the ``standalone`` option will output + an XML declaration with the corresponding ``standalone`` flag. + You can prevent the tail text of the element from being serialised by passing the boolean ``with_tail`` option. This has no impact on the tail text of children, which will always be serialised. """ cdef bint write_declaration + cdef int is_standalone if encoding is _unicode: if xml_declaration: raise ValueError, \ @@ -2575,14 +2612,23 @@ def tostring(element_or_tree, *, encoding=None, method=u"xml", write_declaration = xml_declaration if encoding is None: encoding = u'ASCII' + if standalone is None: + is_standalone = -1 + elif standalone: + write_declaration = 1 + is_standalone = 1 + else: + write_declaration = 1 + is_standalone = 0 if isinstance(element_or_tree, _Element): return _tostring(<_Element>element_or_tree, encoding, method, - write_declaration, 0, pretty_print, with_tail) + write_declaration, 0, pretty_print, with_tail, + is_standalone) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, encoding, method, write_declaration, 1, pretty_print, - with_tail) + with_tail, is_standalone) else: raise TypeError, u"Type '%s' cannot be serialized." % \ python._fqtypename(element_or_tree) @@ -2601,7 +2647,7 @@ def tostringlist(element_or_tree, *args, **kwargs): def tounicode(element_or_tree, *, method=u"xml", pretty_print=False, with_tail=True): u"""tounicode(element_or_tree, method="xml", pretty_print=False, - with_tail=True) + with_tail=True) Serialize an element to the Python unicode representation of its XML tree. @@ -2623,10 +2669,11 @@ def tounicode(element_or_tree, *, method=u"xml", pretty_print=False, """ if isinstance(element_or_tree, _Element): return _tostring(<_Element>element_or_tree, _unicode, method, - 0, 0, pretty_print, with_tail) + 0, 0, pretty_print, with_tail, -1) elif isinstance(element_or_tree, _ElementTree): return _tostring((<_ElementTree>element_or_tree)._context_node, - _unicode, method, 0, 1, pretty_print, with_tail) + _unicode, method, 0, 1, pretty_print, with_tail, + -1) else: raise TypeError, u"Type '%s' cannot be serialized." % \ type(element_or_tree) diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi index 576a4ce5..84a189d6 100644 --- a/src/lxml/serializer.pxi +++ b/src/lxml/serializer.pxi @@ -75,7 +75,7 @@ cdef _textToString(xmlNode* c_node, encoding, bint with_tail): cdef _tostring(_Element element, encoding, method, bint write_xml_declaration, bint write_complete_document, - bint pretty_print, bint with_tail): + bint pretty_print, bint with_tail, int standalone): u"""Serialize an element to an encoded string representation of its XML tree. """ @@ -111,7 +111,7 @@ cdef _tostring(_Element element, encoding, method, with nogil: _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method, write_xml_declaration, write_complete_document, - pretty_print, with_tail) + pretty_print, with_tail, standalone) tree.xmlOutputBufferFlush(c_buffer) if c_buffer.conv is not NULL: c_result_buffer = c_buffer.conv @@ -155,12 +155,13 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, char* encoding, int c_method, bint write_xml_declaration, bint write_complete_document, - bint pretty_print, bint with_tail) nogil: + bint pretty_print, bint with_tail, + int standalone) nogil: cdef xmlDoc* c_doc cdef xmlNode* c_nsdecl_node c_doc = c_node.doc if write_xml_declaration and c_method == OUTPUT_METHOD_XML: - _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding) + _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone) # write internal DTD subset, preceding PIs/comments, etc. if write_complete_document: @@ -203,14 +204,20 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer, tree.xmlOutputBufferWrite(c_buffer, 1, "\n") cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, - char* version, char* encoding) nogil: + char* version, char* encoding, + int standalone) nogil: if version is NULL: version = "1.0" tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='") tree.xmlOutputBufferWriteString(c_buffer, version) tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='") tree.xmlOutputBufferWriteString(c_buffer, encoding) - tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n") + if standalone == 0: + tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n") + elif standalone == 1: + tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n") + else: + tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n") cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc, char* c_root_name, @@ -351,7 +358,8 @@ cdef int _closeFilelikeWriter(void* ctxt): cdef _tofilelike(f, _Element element, encoding, method, bint write_xml_declaration, bint write_doctype, - bint pretty_print, bint with_tail, int compression): + bint pretty_print, bint with_tail, int standalone, + int compression): cdef python.PyThreadState* state = NULL cdef _FilelikeWriter writer cdef tree.xmlOutputBuffer* c_buffer @@ -408,7 +416,7 @@ cdef _tofilelike(f, _Element element, encoding, method, _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method, write_xml_declaration, write_doctype, - pretty_print, with_tail) + pretty_print, with_tail, standalone) error_result = c_buffer.error if error_result == xmlerror.XML_ERR_OK: error_result = tree.xmlOutputBufferClose(c_buffer) diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index b4e0a811..d35be101 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -2313,6 +2313,78 @@ class ETreeOnlyTestCase(HelperTestCase): result = tostring(a, with_tail=True) self.assertEquals(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL")) + def test_standalone(self): + tostring = self.etree.tostring + XML = self.etree.XML + ElementTree = self.etree.ElementTree + Element = self.etree.Element + + tree = Element("root").getroottree() + self.assertEquals(None, tree.docinfo.standalone) + + tree = XML(_bytes("<root/>")).getroottree() + self.assertEquals(None, tree.docinfo.standalone) + + tree = XML(_bytes( + "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>" + )).getroottree() + self.assertEquals(True, tree.docinfo.standalone) + + tree = XML(_bytes( + "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>" + )).getroottree() + self.assertEquals(False, tree.docinfo.standalone) + + def test_tostring_standalone(self): + tostring = self.etree.tostring + XML = self.etree.XML + ElementTree = self.etree.ElementTree + + root = XML(_bytes("<root/>")) + + tree = ElementTree(root) + self.assertEquals(None, tree.docinfo.standalone) + + result = tostring(root, xml_declaration=True, encoding="ASCII") + self.assertEquals(result, _bytes( + "<?xml version='1.0' encoding='ASCII'?>\n<root/>")) + + result = tostring(root, xml_declaration=True, encoding="ASCII", + standalone=True) + self.assertEquals(result, _bytes( + "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>")) + + tree = ElementTree(XML(result)) + self.assertEquals(True, tree.docinfo.standalone) + + result = tostring(root, xml_declaration=True, encoding="ASCII", + standalone=False) + self.assertEquals(result, _bytes( + "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>")) + + tree = ElementTree(XML(result)) + self.assertEquals(False, tree.docinfo.standalone) + + def test_tostring_standalone_in_out(self): + tostring = self.etree.tostring + XML = self.etree.XML + ElementTree = self.etree.ElementTree + + root = XML(_bytes( + "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>")) + + tree = ElementTree(root) + self.assertEquals(True, tree.docinfo.standalone) + + result = tostring(root, xml_declaration=True, encoding="ASCII") + self.assertEquals(result, _bytes( + "<?xml version='1.0' encoding='ASCII'?>\n<root/>")) + + result = tostring(root, xml_declaration=True, encoding="ASCII", + standalone=True) + self.assertEquals(result, _bytes( + "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>")) + def test_tostring_method_text_encoding(self): tostring = self.etree.tostring Element = self.etree.Element diff --git a/src/lxml/tree.pxd b/src/lxml/tree.pxd index 78ca566f..521fbc5f 100644 --- a/src/lxml/tree.pxd +++ b/src/lxml/tree.pxd @@ -131,6 +131,7 @@ cdef extern from "libxml/tree.h": xmlDoc* doc xmlDict* dict xmlHashTable* ids + int standalone char* version char* encoding char* URL |
