summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorscoder <none@none>2009-03-21 16:01:01 +0100
committerscoder <none@none>2009-03-21 16:01:01 +0100
commitb28cc28cfc282e9b2c8df13203c33e4b7e3cd58e (patch)
tree941acbf78dc4a7059a6521190b7a99ddeb595226
parenteab87eac89589c1ae3a4708c0a9f12817aa29318 (diff)
downloadpython-lxml-b28cc28cfc282e9b2c8df13203c33e4b7e3cd58e.tar.gz
[svn r4134] r5093@delle: sbehnel | 2009-03-21 15:54:31 +0100
support for standalone flag both in tree.docinfo and serialiser --HG-- branch : trunk
-rw-r--r--CHANGES.txt7
-rw-r--r--src/lxml/lxml.etree.pyx67
-rw-r--r--src/lxml/serializer.pxi24
-rw-r--r--src/lxml/tests/test_etree.py72
-rw-r--r--src/lxml/tree.pxd1
5 files changed, 153 insertions, 18 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 349adca4..dc73f851 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -5,6 +5,13 @@ lxml changelog
2.2 (?)
=======
+Features added
+--------------
+
+* Support for ``standalone`` flag in XML declaration through
+ ``tree.docinfo.standalone`` and by passing ``standalone=True/False``
+ on serialisation.
+
Bugs fixed
----------
diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx
index 0891f459..6ac64fec 100644
--- a/src/lxml/lxml.etree.pyx
+++ b/src/lxml/lxml.etree.pyx
@@ -317,6 +317,12 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
encoding = funicode(c_doc.encoding)
return (version, encoding)
+ cdef isstandalone(self):
+ if self._c_doc.standalone == -1:
+ return None
+ else:
+ return <bint>(self._c_doc.standalone == 1)
+
cdef buildNewPrefix(self):
if self._ns_counter < python.PyTuple_GET_SIZE(_PREFIX_CACHE):
ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter)
@@ -441,6 +447,17 @@ cdef class DocInfo:
xml_version, encoding = self._doc.getxmlinfo()
return encoding
+ property standalone:
+ u"""Returns the standalone flag as declared by the document. The possible
+ values are True (``standalone='yes'``), False
+ (``standalone='no'`` or flag not provided in the declaration),
+ and None (unknown or no declaration found). Note that a
+ normal truth test on this value will always tell if the
+ ``standalone`` flag was set to ``'yes'`` or not.
+ """
+ def __get__(self):
+ return self._doc.isstandalone()
+
property URL:
u"The source URL of the document (or None if unknown)."
def __get__(self):
@@ -1622,10 +1639,10 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
def write(self, file, *, encoding=None, method=u"xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- compression=0):
+ standalone=None, compression=0):
u"""write(self, file, encoding=None, method="xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- compression=0)
+ standalone=None, compression=0)
Write the tree to a filename, file or file-like object.
@@ -1634,9 +1651,14 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
The keyword argument 'method' selects the output method: 'xml' or
'html'.
+ Passing a boolean value to the ``standalone`` option will
+ output an XML declaration with the corresponding
+ ``standalone`` flag.
+
The ``compression`` option enables GZip compression level 1-9.
"""
cdef bint write_declaration
+ cdef int is_standalone
self._assertHasRoot()
# suppress decl. in default case (purely for ElementTree compatibility)
if xml_declaration is not None:
@@ -1650,10 +1672,19 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
encoding = encoding.upper()
write_declaration = encoding not in \
(u'US-ASCII', u'ASCII', u'UTF8', u'UTF-8')
+ if standalone is None:
+ is_standalone = -1
+ elif standalone:
+ write_declaration = 1
+ is_standalone = 1
+ else:
+ write_declaration = 1
+ is_standalone = 0
if compression is None or compression < 0:
compression = 0
_tofilelike(file, self._context_node, encoding, method,
- write_declaration, 1, pretty_print, with_tail, compression)
+ write_declaration, 1, pretty_print, with_tail,
+ is_standalone, compression)
def getpath(self, _Element element not None):
u"""getpath(self, element)
@@ -2537,9 +2568,11 @@ def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
_dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method=u"xml",
- xml_declaration=None, pretty_print=False, with_tail=True):
+ xml_declaration=None, pretty_print=False, with_tail=True,
+ standalone=None):
u"""tostring(element_or_tree, encoding=None, method="xml",
- xml_declaration=None, pretty_print=False, with_tail=True)
+ xml_declaration=None, pretty_print=False, with_tail=True,
+ standalone=None)
Serialize an element to an encoded string representation of its XML
tree.
@@ -2557,11 +2590,15 @@ def tostring(element_or_tree, *, encoding=None, method=u"xml",
The keyword argument 'method' selects the output method: 'xml',
'html' or plain 'text'.
+ Passing a boolean value to the ``standalone`` option will output
+ an XML declaration with the corresponding ``standalone`` flag.
+
You can prevent the tail text of the element from being serialised
by passing the boolean ``with_tail`` option. This has no impact
on the tail text of children, which will always be serialised.
"""
cdef bint write_declaration
+ cdef int is_standalone
if encoding is _unicode:
if xml_declaration:
raise ValueError, \
@@ -2575,14 +2612,23 @@ def tostring(element_or_tree, *, encoding=None, method=u"xml",
write_declaration = xml_declaration
if encoding is None:
encoding = u'ASCII'
+ if standalone is None:
+ is_standalone = -1
+ elif standalone:
+ write_declaration = 1
+ is_standalone = 1
+ else:
+ write_declaration = 1
+ is_standalone = 0
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, encoding, method,
- write_declaration, 0, pretty_print, with_tail)
+ write_declaration, 0, pretty_print, with_tail,
+ is_standalone)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
encoding, method, write_declaration, 1, pretty_print,
- with_tail)
+ with_tail, is_standalone)
else:
raise TypeError, u"Type '%s' cannot be serialized." % \
python._fqtypename(element_or_tree)
@@ -2601,7 +2647,7 @@ def tostringlist(element_or_tree, *args, **kwargs):
def tounicode(element_or_tree, *, method=u"xml", pretty_print=False,
with_tail=True):
u"""tounicode(element_or_tree, method="xml", pretty_print=False,
- with_tail=True)
+ with_tail=True)
Serialize an element to the Python unicode representation of its XML
tree.
@@ -2623,10 +2669,11 @@ def tounicode(element_or_tree, *, method=u"xml", pretty_print=False,
"""
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, _unicode, method,
- 0, 0, pretty_print, with_tail)
+ 0, 0, pretty_print, with_tail, -1)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- _unicode, method, 0, 1, pretty_print, with_tail)
+ _unicode, method, 0, 1, pretty_print, with_tail,
+ -1)
else:
raise TypeError, u"Type '%s' cannot be serialized." % \
type(element_or_tree)
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index 576a4ce5..84a189d6 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -75,7 +75,7 @@ cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
cdef _tostring(_Element element, encoding, method,
bint write_xml_declaration, bint write_complete_document,
- bint pretty_print, bint with_tail):
+ bint pretty_print, bint with_tail, int standalone):
u"""Serialize an element to an encoded string representation of its XML
tree.
"""
@@ -111,7 +111,7 @@ cdef _tostring(_Element element, encoding, method,
with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_complete_document,
- pretty_print, with_tail)
+ pretty_print, with_tail, standalone)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -155,12 +155,13 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
xmlNode* c_node, char* encoding, int c_method,
bint write_xml_declaration,
bint write_complete_document,
- bint pretty_print, bint with_tail) nogil:
+ bint pretty_print, bint with_tail,
+ int standalone) nogil:
cdef xmlDoc* c_doc
cdef xmlNode* c_nsdecl_node
c_doc = c_node.doc
if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
- _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
# write internal DTD subset, preceding PIs/comments, etc.
if write_complete_document:
@@ -203,14 +204,20 @@ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
- char* version, char* encoding) nogil:
+ char* version, char* encoding,
+ int standalone) nogil:
if version is NULL:
version = "1.0"
tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
tree.xmlOutputBufferWriteString(c_buffer, version)
tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
tree.xmlOutputBufferWriteString(c_buffer, encoding)
- tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
+ if standalone == 0:
+ tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
+ elif standalone == 1:
+ tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
+ else:
+ tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
xmlDoc* c_doc, char* c_root_name,
@@ -351,7 +358,8 @@ cdef int _closeFilelikeWriter(void* ctxt):
cdef _tofilelike(f, _Element element, encoding, method,
bint write_xml_declaration, bint write_doctype,
- bint pretty_print, bint with_tail, int compression):
+ bint pretty_print, bint with_tail, int standalone,
+ int compression):
cdef python.PyThreadState* state = NULL
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
@@ -408,7 +416,7 @@ cdef _tofilelike(f, _Element element, encoding, method,
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_doctype,
- pretty_print, with_tail)
+ pretty_print, with_tail, standalone)
error_result = c_buffer.error
if error_result == xmlerror.XML_ERR_OK:
error_result = tree.xmlOutputBufferClose(c_buffer)
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index b4e0a811..d35be101 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -2313,6 +2313,78 @@ class ETreeOnlyTestCase(HelperTestCase):
result = tostring(a, with_tail=True)
self.assertEquals(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
+ def test_standalone(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+ Element = self.etree.Element
+
+ tree = Element("root").getroottree()
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ tree = XML(_bytes("<root/>")).getroottree()
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ tree = XML(_bytes(
+ "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
+ )).getroottree()
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ tree = XML(_bytes(
+ "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
+ )).getroottree()
+ self.assertEquals(False, tree.docinfo.standalone)
+
+ def test_tostring_standalone(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+
+ root = XML(_bytes("<root/>"))
+
+ tree = ElementTree(root)
+ self.assertEquals(None, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII")
+ self.assertEquals(result, _bytes(
+ "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=True)
+ self.assertEquals(result, _bytes(
+ "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
+
+ tree = ElementTree(XML(result))
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=False)
+ self.assertEquals(result, _bytes(
+ "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
+
+ tree = ElementTree(XML(result))
+ self.assertEquals(False, tree.docinfo.standalone)
+
+ def test_tostring_standalone_in_out(self):
+ tostring = self.etree.tostring
+ XML = self.etree.XML
+ ElementTree = self.etree.ElementTree
+
+ root = XML(_bytes(
+ "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
+
+ tree = ElementTree(root)
+ self.assertEquals(True, tree.docinfo.standalone)
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII")
+ self.assertEquals(result, _bytes(
+ "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
+
+ result = tostring(root, xml_declaration=True, encoding="ASCII",
+ standalone=True)
+ self.assertEquals(result, _bytes(
+ "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
+
def test_tostring_method_text_encoding(self):
tostring = self.etree.tostring
Element = self.etree.Element
diff --git a/src/lxml/tree.pxd b/src/lxml/tree.pxd
index 78ca566f..521fbc5f 100644
--- a/src/lxml/tree.pxd
+++ b/src/lxml/tree.pxd
@@ -131,6 +131,7 @@ cdef extern from "libxml/tree.h":
xmlDoc* doc
xmlDict* dict
xmlHashTable* ids
+ int standalone
char* version
char* encoding
char* URL