diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2013-04-27 15:14:53 +0200 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2013-04-27 15:14:53 +0200 |
commit | 7698fac30c6f859a1150a79a67540fe3b3fd77d1 (patch) | |
tree | 592200bfca26314de228da6a33eab305f77d3285 | |
parent | e6873f0ca287c89ac13d6a54f397bb6f721c5ed5 (diff) | |
download | python-lxml-7698fac30c6f859a1150a79a67540fe3b3fd77d1.tar.gz |
safely report IOErrors even in the face of unexpectedly encoded file names
--HG--
extra : rebase_source : e19f5f1b11ba54126f147b2a67110b2d9b66754d
-rw-r--r-- | src/lxml/apihelpers.pxi | 6 | ||||
-rw-r--r-- | src/lxml/parser.pxi | 10 |
2 files changed, 14 insertions, 2 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index 3ddcf6fb..50026cd8 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -1384,7 +1384,11 @@ cdef object _encodeFilename(object filename): cdef object _decodeFilename(const_xmlChar* c_path): u"""Make the filename a unicode string if we are in Py3. """ - c_len = tree.xmlStrlen(c_path) + return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path)) + +cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len): + u"""Make the filename a unicode string if we are in Py3. + """ if _isFilePath(c_path): try: return python.PyUnicode_Decode( diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index b606badc..c036284f 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -586,9 +586,17 @@ cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename, _ErrorLog error_log) except 0: if filename is not None and \ ctxt.lastError.domain == xmlerror.XML_FROM_IO: + if isinstance(filename, bytes): + filename = _decodeFilenameWithLength( + <bytes>filename, len(<bytes>filename)) if ctxt.lastError.message is not NULL: + try: + message = (ctxt.lastError.message).decode('utf-8') + except UnicodeDecodeError: + # the filename may be in there => play safe + message = (ctxt.lastError.message).decode('iso8859-1') message = u"Error reading file '%s': %s" % ( - filename, (ctxt.lastError.message).strip()) + filename, message.strip()) else: message = u"Error reading '%s'" % filename raise IOError, message |