diff options
| author | Stefan Behnel <stefan_ml@behnel.de> | 2013-03-29 23:49:18 +0100 |
|---|---|---|
| committer | Stefan Behnel <stefan_ml@behnel.de> | 2013-03-29 23:49:18 +0100 |
| commit | cbfedcd89ab2d2274a7d1266633ea7887524d76c (patch) | |
| tree | 6edb7bb44d96bb6bad5c5ddfabac83310fe25343 /src/lxml/apihelpers.pxi | |
| parent | 5b0b3d62077a3309350da20b7e89deb448ec1fe9 (diff) | |
| download | python-lxml-cbfedcd89ab2d2274a7d1266633ea7887524d76c.tar.gz | |
fix regex and add test for it
Diffstat (limited to 'src/lxml/apihelpers.pxi')
| -rw-r--r-- | src/lxml/apihelpers.pxi | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index 8d11f7db..870d72fc 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -577,7 +577,7 @@ cdef list _collectAttributes(xmlNode* c_node, int collecttype): cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( - ur'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\']\s*', re.U) + ur'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U) cdef object __REPLACE_XML_ENCODING __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub @@ -587,7 +587,7 @@ __HAS_XML_ENCODING = __RE_XML_ENCODING.match cdef object _stripEncodingDeclaration(object xml_string): # this is a hack to remove the XML encoding declaration from unicode - return __REPLACE_XML_ENCODING(ur'\g<1> ', xml_string) + return __REPLACE_XML_ENCODING(ur'\g<1>\g<2>', xml_string) cdef bint _hasEncodingDeclaration(object xml_string): # check if a (unicode) string has an XML encoding declaration |
