From c62b41f3df3e3ee98cc605a64b90152d3a9c631e Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Fri, 22 Jan 2016 10:37:39 +0100 Subject: expose https://github.com/fbenkstein and fix name() / cmdline() encoding errors on linux / py3 --- CREDITS | 4 ++++ HISTORY.rst | 14 ++++++++++++++ docs/index.rst | 14 +++++++++++++- psutil/__init__.py | 9 ++++++++- psutil/_pslinux.py | 19 +++++++++++++------ test/test_psutil.py | 30 +++++++++++++++--------------- 6 files changed, 67 insertions(+), 23 deletions(-) diff --git a/CREDITS b/CREDITS index c5e5be93..b600dea8 100644 --- a/CREDITS +++ b/CREDITS @@ -354,3 +354,7 @@ I: 688 N: Syohei YOSHIDA W: https://github.com/syohex I: 730 + +N: Frank Benkstein +W: https://github.com/fbenkstein +I: 732, 733 diff --git a/HISTORY.rst b/HISTORY.rst index f39478d6..f514d104 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,5 +1,19 @@ Bug tracker at https://github.com/giampaolo/psutil/issues +3.5.0 - XXXX-XX-XX +================== + +**Enhancements** + +- #733: exposed a new ENCODING_ERRORS_HANDLER constant for dealing with + encoding errors on Python 3. + + +**Bug fixes** + +- #733: [Linux] process name() and exe() can fail on Python 3 if string + contains non-UTF8 charaters. (patch by Frank Benkstein) + 3.4.2 - 2016-01-20 ================== diff --git a/docs/index.rst b/docs/index.rst index 8b900b7e..83b78694 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1284,7 +1284,7 @@ Popen class Constants ========= -.. _const-pstatus: +.. _const-procfs_path: .. data:: PROCFS_PATH The path of the /proc filesystem on Linux and Solaris (defaults to "/proc"). @@ -1296,6 +1296,18 @@ Constants .. versionadded:: 3.2.3 .. versionchanged:: 3.4.2 also available on Solaris. +.. _const-encoding_errors_handler: +.. data:: ENCODING_ERRORS_HANDLER + + Dictates how to handle encoding and decoding errors (for instance when + reading files in /proc via `open `__). + This is only used in Python 3 (Python 2 ignores this constant). + By default this is set to `'surrogateescape'`. See + `here `__ for + a complete list of available error handlers. + + .. versionadded:: 3.5.0 + .. _const-pstatus: .. data:: STATUS_RUNNING STATUS_SLEEPING diff --git a/psutil/__init__.py b/psutil/__init__.py index d46e034e..f3425de7 100644 --- a/psutil/__init__.py +++ b/psutil/__init__.py @@ -141,6 +141,12 @@ else: # pragma: no cover raise NotImplementedError('platform %s is not supported' % sys.platform) +# Dictates how to handle encoding and decoding errors (with open()) +# on Python 3. This is public API and it will be retrieved from _ps*.py +# modules via sys.modules. +ENCODING_ERRORS_HANDLER = 'surrogateescape' + + __all__ = [ # exceptions "Error", "NoSuchProcess", "ZombieProcess", "AccessDenied", @@ -155,6 +161,7 @@ __all__ = [ "CONN_LAST_ACK", "CONN_LISTEN", "CONN_CLOSING", "CONN_NONE", "AF_LINK", "NIC_DUPLEX_FULL", "NIC_DUPLEX_HALF", "NIC_DUPLEX_UNKNOWN", + "ENCODING_ERRORS_HANDLER", # classes "Process", "Popen", # functions @@ -168,7 +175,7 @@ __all__ = [ ] __all__.extend(_psplatform.__extra__all__) __author__ = "Giampaolo Rodola'" -__version__ = "3.4.2" +__version__ = "3.5.0" version_info = tuple([int(num) for num in __version__.split('.')]) AF_LINK = _psplatform.AF_LINK _TOTAL_PHYMEM = None diff --git a/psutil/_pslinux.py b/psutil/_pslinux.py index 243f1626..f96bcc95 100644 --- a/psutil/_pslinux.py +++ b/psutil/_pslinux.py @@ -139,11 +139,16 @@ def open_binary(fname, **kwargs): def open_text(fname, **kwargs): - """On Python 3 opens a file in text mode by using fs encoding. + """On Python 3 opens a file in text mode by using fs encoding and + a proper en/decoding errors handler. On Python 2 this is just an alias for open(name, 'rt'). """ - if PY3 and 'encoding' not in kwargs: - kwargs['encoding'] = FS_ENCODING + if PY3: + # See: + # https://github.com/giampaolo/psutil/issues/675 + # https://github.com/giampaolo/psutil/pull/733 + kwargs.setdefault('encoding', FS_ENCODING) + kwargs.setdefault('errors', get_encoding_errors_handler()) return open(fname, "rt", **kwargs) @@ -151,6 +156,10 @@ def get_procfs_path(): return sys.modules['psutil'].PROCFS_PATH +def get_encoding_errors_handler(): + return sys.modules['psutil'].ENCODING_ERRORS_HANDLER + + def readlink(path): """Wrapper around os.readlink().""" assert isinstance(path, basestring), path @@ -600,9 +609,7 @@ class Connections: def process_unix(self, file, family, inodes, filter_pid=None): """Parse /proc/net/unix files.""" - # see: https://github.com/giampaolo/psutil/issues/675 - kw = dict(errors='replace') if PY3 else dict() - with open_text(file, buffering=BIGGER_FILE_BUFFERING, **kw) as f: + with open_text(file, buffering=BIGGER_FILE_BUFFERING) as f: f.readline() # skip the first line for line in f: tokens = line.split() diff --git a/test/test_psutil.py b/test/test_psutil.py index f9592ec0..0a817f59 100644 --- a/test/test_psutil.py +++ b/test/test_psutil.py @@ -20,7 +20,6 @@ import atexit import collections import contextlib import datetime -import distutils.spawn import errno import functools import json @@ -543,8 +542,8 @@ if WINDOWS: return (wv[0], wv[1], sp) -# In Python 3 paths are unicode objects by default. Surrogate escapes are used -# to handle non-character data. +# In Python 3 paths are unicode objects by default. Surrogate escapes +# are used to handle non-character data. def encode_path(path): if PY3: return path.encode(sys.getfilesystemencoding(), @@ -3253,18 +3252,21 @@ class TestUnicode(unittest.TestCase): class TestNonUnicode(unittest.TestCase): - "Test handling of non-utf8 data." + """Test handling of non-utf8 data.""" @classmethod def setUpClass(cls): - cls.temp_directory = tempfile.mkdtemp(suffix=b"") + if PY3: + # Fix around https://bugs.python.org/issue24230 + cls.temp_directory = tempfile.mkdtemp().encode('utf8') + else: + cls.temp_directory = tempfile.mkdtemp(suffix=b"") - # Return an executable that runs until we close its stdin + # Return an executable that runs until we close its stdin. if WINDOWS: - cls.test_executable = distutils.spawn.find_executable("cmd.exe") + cls.test_executable = which("cmd.exe") else: - assert POSIX - cls.test_executable = "/bin/cat" + cls.test_executable = which("cat") @classmethod def tearDownClass(cls): @@ -3310,8 +3312,6 @@ class TestNonUnicode(unittest.TestCase): cmd = [self.test_executable] if WINDOWS: cmd.extend(["/K", "type \xc0\x80"]) - else: - cmd.extend([b"\xc0\x80", b"-"]) subp = get_test_subprocess(cmd=cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -3342,10 +3342,10 @@ class TestNonUnicode(unittest.TestCase): test_script = os.path.join(self.temp_directory, b"test.py") with open(test_script, "wt") as f: f.write(textwrap.dedent(r""" - import sys, os - with open(%r, "wb") as f1, open(__file__, "rb") as f2: - sys.stdin.read() - """ % funny_file)) + import sys + with open(%r, "wb") as f1, open(__file__, "rb") as f2: + sys.stdin.read() + """ % funny_file)) self.addCleanup(safe_remove, test_script) subp = get_test_subprocess(cmd=[PYTHON, decode_path(test_script)], stdin=subprocess.PIPE, -- cgit v1.2.1