summaryrefslogtreecommitdiff
path: root/src/lxml/apihelpers.pxi
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2013-03-29 23:49:18 +0100
committerStefan Behnel <stefan_ml@behnel.de>2013-03-29 23:49:18 +0100
commitcbfedcd89ab2d2274a7d1266633ea7887524d76c (patch)
tree6edb7bb44d96bb6bad5c5ddfabac83310fe25343 /src/lxml/apihelpers.pxi
parent5b0b3d62077a3309350da20b7e89deb448ec1fe9 (diff)
downloadpython-lxml-cbfedcd89ab2d2274a7d1266633ea7887524d76c.tar.gz
fix regex and add test for it
Diffstat (limited to 'src/lxml/apihelpers.pxi')
-rw-r--r--src/lxml/apihelpers.pxi4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 8d11f7db..870d72fc 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -577,7 +577,7 @@ cdef list _collectAttributes(xmlNode* c_node, int collecttype):
cdef object __RE_XML_ENCODING
__RE_XML_ENCODING = re.compile(
- ur'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\']\s*', re.U)
+ ur'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
cdef object __REPLACE_XML_ENCODING
__REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub
@@ -587,7 +587,7 @@ __HAS_XML_ENCODING = __RE_XML_ENCODING.match
cdef object _stripEncodingDeclaration(object xml_string):
# this is a hack to remove the XML encoding declaration from unicode
- return __REPLACE_XML_ENCODING(ur'\g<1> ', xml_string)
+ return __REPLACE_XML_ENCODING(ur'\g<1>\g<2>', xml_string)
cdef bint _hasEncodingDeclaration(object xml_string):
# check if a (unicode) string has an XML encoding declaration