summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2013-04-27 15:14:53 +0200
committerStefan Behnel <stefan_ml@behnel.de>2013-04-27 15:14:53 +0200
commit7698fac30c6f859a1150a79a67540fe3b3fd77d1 (patch)
tree592200bfca26314de228da6a33eab305f77d3285
parente6873f0ca287c89ac13d6a54f397bb6f721c5ed5 (diff)
downloadpython-lxml-7698fac30c6f859a1150a79a67540fe3b3fd77d1.tar.gz
safely report IOErrors even in the face of unexpectedly encoded file names
--HG-- extra : rebase_source : e19f5f1b11ba54126f147b2a67110b2d9b66754d
-rw-r--r--src/lxml/apihelpers.pxi6
-rw-r--r--src/lxml/parser.pxi10
2 files changed, 14 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 3ddcf6fb..50026cd8 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -1384,7 +1384,11 @@ cdef object _encodeFilename(object filename):
cdef object _decodeFilename(const_xmlChar* c_path):
u"""Make the filename a unicode string if we are in Py3.
"""
- c_len = tree.xmlStrlen(c_path)
+ return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path))
+
+cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len):
+ u"""Make the filename a unicode string if we are in Py3.
+ """
if _isFilePath(c_path):
try:
return python.PyUnicode_Decode(
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index b606badc..c036284f 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -586,9 +586,17 @@ cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
_ErrorLog error_log) except 0:
if filename is not None and \
ctxt.lastError.domain == xmlerror.XML_FROM_IO:
+ if isinstance(filename, bytes):
+ filename = _decodeFilenameWithLength(
+ <bytes>filename, len(<bytes>filename))
if ctxt.lastError.message is not NULL:
+ try:
+ message = (ctxt.lastError.message).decode('utf-8')
+ except UnicodeDecodeError:
+ # the filename may be in there => play safe
+ message = (ctxt.lastError.message).decode('iso8859-1')
message = u"Error reading file '%s': %s" % (
- filename, (ctxt.lastError.message).strip())
+ filename, message.strip())
else:
message = u"Error reading '%s'" % filename
raise IOError, message