diff options
author | Thomas Kluyver <takowl@gmail.com> | 2015-09-09 12:31:45 +0100 |
---|---|---|
committer | Thomas Kluyver <takowl@gmail.com> | 2015-09-09 12:31:45 +0100 |
commit | 82e38877c9806fd8afd534ed31f911ed570c2c40 (patch) | |
tree | ba53b0176b82fce5d8d5b88e0ee95803641e0f50 | |
parent | 32f8ea68c68a8b67a82308cadf0b6f4ed24d27e2 (diff) | |
parent | e067741654f4d6c5498bdb11a2d703640dc32758 (diff) | |
download | pexpect-git-82e38877c9806fd8afd534ed31f911ed570c2c40.tar.gz |
Merge pull request #182 from takluyver/reintegrate-unicode
'Unicode mode' in spawn class
-rw-r--r-- | doc/api/pexpect.rst | 32 | ||||
-rw-r--r-- | pexpect/__init__.py | 36 | ||||
-rw-r--r-- | pexpect/async.py | 2 | ||||
-rw-r--r-- | pexpect/pty_spawn.py | 45 | ||||
-rw-r--r-- | pexpect/spawnbase.py | 125 |
5 files changed, 111 insertions, 129 deletions
diff --git a/doc/api/pexpect.rst b/doc/api/pexpect.rst index 565f0ef..79bbcef 100644 --- a/doc/api/pexpect.rst +++ b/doc/api/pexpect.rst @@ -36,9 +36,9 @@ spawn class .. note:: - With a :class:`spawn` instance, the log files should be open for - writing binary data. With a :class:`spawnu` instance, they should - be open for writing unicode text. + With :class:`spawn` in bytes mode, the log files should be open for + writing binary data. In unicode mode, they should + be open for writing unicode text. See :ref:`unicode`. Controlling the child process ````````````````````````````` @@ -69,31 +69,35 @@ Controlling the child process Handling unicode ```````````````` -For backwards compatibility, :class:`spawn` can handle some Unicode: its -send methods will encode arbitrary unicode as UTF-8 before sending it to the -child process, and its expect methods can accept ascii-only unicode strings. -However, for a proper unicode API to a subprocess, use this subclass: +By default, :class:`spawn` is a bytes interface: its read methods return bytes, +and its write/send and expect methods expect bytes. If you pass the *encoding* +parameter to the constructor, it will instead act as a unicode interface: +strings you send will be encoded using that encoding, and bytes received will +be decoded before returning them to you. In this mode, patterns for +:meth:`~spawn.expect` and :meth:`~spawn.expect_exact` should also be unicode. + +.. versionchanged:: 4.0 -.. autoclass:: spawnu - :show-inheritance: + :class:`spawn` provides both the bytes and unicode interfaces. In Pexpect + 3.x, the unicode interface was provided by a separate ``spawnu`` class. -There is also a :func:`runu` function, the unicode counterpart to :func:`run`. +For backwards compatibility, some Unicode is allowed in bytes mode: the +send methods will encode arbitrary unicode as UTF-8 before sending it to the +child process, and its expect methods can accept ascii-only unicode strings. .. note:: Unicode handling with pexpect works the same way on Python 2 and 3, despite the difference in names. I.e.: - - :class:`spawn` works with ``str`` on Python 2, and :class:`bytes` on Python 3, - - :class:`spawnu` works with ``unicode`` on Python 2, and :class:`str` on Python 3. + - Bytes mode works with ``str`` on Python 2, and :class:`bytes` on Python 3, + - Unicode mode works with ``unicode`` on Python 2, and :class:`str` on Python 3. run function ------------ .. autofunction:: run -.. autofunction:: runu - Exceptions ---------- diff --git a/pexpect/__init__.py b/pexpect/__init__.py index 4b153f4..db5be16 100644 --- a/pexpect/__init__.py +++ b/pexpect/__init__.py @@ -77,7 +77,7 @@ __all__ = ['ExceptionPexpect', 'EOF', 'TIMEOUT', 'spawn', 'spawnu', 'run', 'runu 'which', 'split_command_line', '__version__', '__revision__'] def run(command, timeout=30, withexitstatus=False, events=None, - extra_args=None, logfile=None, cwd=None, env=None): + extra_args=None, logfile=None, cwd=None, env=None, **kwargs): ''' This function runs the given command; waits for it to finish; then @@ -159,29 +159,16 @@ def run(command, timeout=30, withexitstatus=False, events=None, sent to the child. 'extra_args' is not used by directly run(). It provides a way to pass data to a callback function through run() through the locals dictionary passed to a callback. - ''' - return _run(command, timeout=timeout, withexitstatus=withexitstatus, - events=events, extra_args=extra_args, logfile=logfile, cwd=cwd, - env=env, _spawn=spawn) - -def runu(command, timeout=30, withexitstatus=False, events=None, - extra_args=None, logfile=None, cwd=None, env=None, **kwargs): - """This offers the same interface as :func:`run`, but using unicode. - Like :class:`spawnu`, you can pass ``encoding`` and ``errors`` parameters, - which will be used for both input and output. - """ - return _run(command, timeout=timeout, withexitstatus=withexitstatus, - events=events, extra_args=extra_args, logfile=logfile, cwd=cwd, - env=env, _spawn=spawnu, **kwargs) - -def _run(command, timeout, withexitstatus, events, extra_args, logfile, cwd, - env, _spawn, **kwargs): + Like :class:`spawn`, passing *encoding* will make it work with unicode + instead of bytes. You can pass *codec_errors* to control how errors in + encoding and decoding are handled. + ''' if timeout == -1: - child = _spawn(command, maxread=2000, logfile=logfile, cwd=cwd, env=env, + child = spawn(command, maxread=2000, logfile=logfile, cwd=cwd, env=env, **kwargs) else: - child = _spawn(command, timeout=timeout, maxread=2000, logfile=logfile, + child = spawn(command, timeout=timeout, maxread=2000, logfile=logfile, cwd=cwd, env=env, **kwargs) if isinstance(events, list): patterns= [x for x,y in events] @@ -232,4 +219,13 @@ def _run(command, timeout, withexitstatus, events, extra_args, logfile, cwd, else: return child_result +def runu(command, timeout=30, withexitstatus=False, events=None, + extra_args=None, logfile=None, cwd=None, env=None, **kwargs): + """Deprecated: pass encoding to run() instead. + """ + kwargs.setdefault('encoding', 'utf-8') + return run(command, timeout=timeout, withexitstatus=withexitstatus, + events=events, extra_args=extra_args, logfile=logfile, cwd=cwd, + env=env, **kwargs) + # vim: set shiftround expandtab tabstop=4 shiftwidth=4 ft=python autoindent : diff --git a/pexpect/async.py b/pexpect/async.py index 5e5e9ee..ad75994 100644 --- a/pexpect/async.py +++ b/pexpect/async.py @@ -37,7 +37,7 @@ class PatternWaiter(asyncio.Protocol): def data_received(self, data): spawn = self.expecter.spawn - s = spawn._coerce_read_string(data) + s = spawn._decoder.decode(data) spawn._log(s, 'read') if self.fut.done(): diff --git a/pexpect/pty_spawn.py b/pexpect/pty_spawn.py index fe2cc0c..cdbb54f 100644 --- a/pexpect/pty_spawn.py +++ b/pexpect/pty_spawn.py @@ -14,7 +14,7 @@ import ptyprocess from ptyprocess.ptyprocess import use_native_pty_fork from .exceptions import ExceptionPexpect, EOF, TIMEOUT -from .spawnbase import SpawnBase, SpawnBaseUnicode +from .spawnbase import SpawnBase from .utils import which, split_command_line @contextmanager @@ -30,14 +30,14 @@ PY3 = (sys.version_info[0] >= 3) class spawn(SpawnBase): '''This is the main class interface for Pexpect. Use this class to start and control child applications. ''' - ptyprocess_class = ptyprocess.PtyProcess # This is purely informational now - changing it has no effect use_native_pty_fork = use_native_pty_fork def __init__(self, command, args=[], timeout=30, maxread=2000, searchwindowsize=None, logfile=None, cwd=None, env=None, - ignore_sighup=True, echo=True, preexec_fn=None): + ignore_sighup=True, echo=True, preexec_fn=None, + encoding=None, codec_errors='strict'): '''This is the constructor. The command parameter may be a string that includes a command and any arguments to the command. For example:: @@ -172,7 +172,7 @@ class spawn(SpawnBase): signal handlers. ''' super(spawn, self).__init__(timeout=timeout, maxread=maxread, searchwindowsize=searchwindowsize, - logfile=logfile) + logfile=logfile, encoding=encoding, codec_errors=codec_errors) self.STDIN_FILENO = pty.STDIN_FILENO self.STDOUT_FILENO = pty.STDOUT_FILENO self.STDERR_FILENO = pty.STDERR_FILENO @@ -277,7 +277,7 @@ class spawn(SpawnBase): preexec_fn() kwargs['preexec_fn'] = preexec_wrapper - self.ptyproc = self.ptyprocess_class.spawn(self.args, env=self.env, + self.ptyproc = ptyprocess.PtyProcess.spawn(self.args, env=self.env, cwd=self.cwd, **kwargs) self.pid = self.ptyproc.pid @@ -503,10 +503,8 @@ class spawn(SpawnBase): s = self._coerce_send_string(s) self._log(s, 'send') - return self._send(s) - - def _send(self, s): - return os.write(self.child_fd, s) + b = self._encoder.encode(s, final=False) + return os.write(self.child_fd, b) def sendline(self, s=''): '''Wraps send(), sending string ``s`` to child process, with @@ -519,9 +517,11 @@ class spawn(SpawnBase): n = n + self.send(self.linesep) return n - def _log_control(self, byte): + def _log_control(self, s): """Write control characters to the appropriate log files""" - self._log(byte, 'send') + if self.encoding is not None: + s = s.decode(self.encoding, 'replace') + self._log(s, 'send') def sendcontrol(self, char): '''Helper method that wraps send() with mnemonic access for sending control @@ -805,22 +805,7 @@ class spawn(SpawnBase): # this actually is an exception. raise - -class spawnu(SpawnBaseUnicode, spawn): - """Works like spawn, but accepts and returns unicode strings. - - Extra parameters: - - :param encoding: The encoding to use for communications (default: 'utf-8') - :param errors: How to handle encoding/decoding errors; one of 'strict' - (the default), 'ignore', or 'replace', as described - for :meth:`~bytes.decode` and :meth:`~str.encode`. - """ - ptyprocess_class = ptyprocess.PtyProcessUnicode - - def _send(self, s): - return os.write(self.child_fd, s.encode(self.encoding, self.errors)) - - def _log_control(self, byte): - s = byte.decode(self.encoding, 'replace') - self._log(s, 'send') +def spawnu(*args, **kwargs): + """Deprecated: pass encoding to spawn() instead.""" + kwargs.setdefault('encoding', 'utf-8') + return spawn(*args, **kwargs) diff --git a/pexpect/spawnbase.py b/pexpect/spawnbase.py index d79c5c0..9fd2e18 100644 --- a/pexpect/spawnbase.py +++ b/pexpect/spawnbase.py @@ -7,35 +7,30 @@ from .exceptions import ExceptionPexpect, EOF, TIMEOUT from .expect import Expecter, searcher_string, searcher_re PY3 = (sys.version_info[0] >= 3) +text_type = str if PY3 else unicode + +class _NullCoder(object): + """Pass bytes through unchanged.""" + @staticmethod + def encode(b, final=False): + return b + + @staticmethod + def decode(b, final=False): + return b class SpawnBase(object): """A base class providing the backwards-compatible spawn API for Pexpect. - This should not be instantiated directly: use :class:`pexpect.spawn` or :class:`pexpect.fdpexpect.fdspawn`.""" - string_type = bytes - if PY3: - allowed_string_types = (bytes, str) - linesep = os.linesep.encode('ascii') - crlf = '\r\n'.encode('ascii') - - @staticmethod - def write_to_stdout(b): - try: - return sys.stdout.buffer.write(b) - except AttributeError: - # If stdout has been replaced, it may not have .buffer - return sys.stdout.write(b.decode('ascii', 'replace')) - else: - allowed_string_types = (basestring,) # analysis:ignore - linesep = os.linesep - crlf = '\r\n' - write_to_stdout = sys.stdout.write - + This should not be instantiated directly: use :class:`pexpect.spawn` or + :class:`pexpect.fdpexpect.fdspawn`. + """ encoding = None pid = None flag_eof = False - def __init__(self, timeout=30, maxread=2000, searchwindowsize=None, logfile=None): + def __init__(self, timeout=30, maxread=2000, searchwindowsize=None, + logfile=None, encoding=None, codec_errors='strict'): self.stdin = sys.stdin self.stdout = sys.stdout self.stderr = sys.stderr @@ -63,7 +58,7 @@ class SpawnBase(object): # max bytes to read at one time into buffer self.maxread = maxread # This is the read buffer. See maxread. - self.buffer = self.string_type() + self.buffer = bytes() if (encoding is None) else text_type() # Data before searchwindowsize point is preserved, but not searched. self.searchwindowsize = searchwindowsize # Delay used before sending data to child. Time in seconds. @@ -79,6 +74,42 @@ class SpawnBase(object): self.name = '<' + repr(self) + '>' self.closed = True + # Unicode interface + self.encoding = encoding + self.codec_errors = codec_errors + if encoding is None: + # bytes mode (accepts some unicode for backwards compatibility) + self._encoder = self._decoder = _NullCoder() + self.string_type = bytes + self.crlf = b'\r\n' + if PY3: + self.allowed_string_types = (bytes, str) + self.linesep = os.linesep.encode('ascii') + def write_to_stdout(b): + try: + return sys.stdout.buffer.write(b) + except AttributeError: + # If stdout has been replaced, it may not have .buffer + return sys.stdout.write(b.decode('ascii', 'replace')) + self.write_to_stdout = write_to_stdout + else: + self.allowed_string_types = (basestring,) # analysis:ignore + self.linesep = os.linesep + self.write_to_stdout = sys.stdout.write + else: + # unicode mode + self._encoder = codecs.getincrementalencoder(encoding)(codec_errors) + self._decoder = codecs.getincrementaldecoder(encoding)(codec_errors) + self.string_type = text_type + self.crlf = u'\r\n' + self.allowed_string_types = (text_type, ) + if PY3: + self.linesep = os.linesep + else: + self.linesep = os.linesep.decode('ascii') + # This can handle unicode in both Python 2 and 3 + self.write_to_stdout = sys.stdout.write + def _log(self, s, direction): if self.logfile is not None: self.logfile.write(s) @@ -88,22 +119,19 @@ class SpawnBase(object): second_log.write(s) second_log.flush() - @staticmethod - def _coerce_expect_string(s): - if not isinstance(s, bytes): + # For backwards compatibility, in bytes mode (when encoding is None) + # unicode is accepted for send and expect. Unicode mode is strictly unicode + # only. + def _coerce_expect_string(self, s): + if self.encoding is None and not isinstance(s, bytes): return s.encode('ascii') return s - @staticmethod - def _coerce_send_string(s): - if not isinstance(s, bytes): + def _coerce_send_string(self, s): + if self.encoding is None and not isinstance(s, bytes): return s.encode('utf-8') return s - @staticmethod - def _coerce_read_string(s): - return s - def read_nonblocking(self, size=1, timeout=None): """This reads data from the file descriptor. @@ -125,7 +153,7 @@ class SpawnBase(object): self.flag_eof = True raise EOF('End Of File (EOF). Empty string style platform.') - s = self._coerce_read_string(s) + s = self._decoder.decode(s, final=False) self._log(s, 'read') return s @@ -451,34 +479,3 @@ class SpawnBase(object): # We rely on subclasses to implement close(). If they don't, it's not # clear what a context manager should do. self.close() - -class SpawnBaseUnicode(SpawnBase): - if PY3: - string_type = str - allowed_string_types = (str, ) - linesep = os.linesep - crlf = '\r\n' - else: - string_type = unicode - allowed_string_types = (unicode, ) - linesep = os.linesep.decode('ascii') - crlf = '\r\n'.decode('ascii') - # This can handle unicode in both Python 2 and 3 - write_to_stdout = sys.stdout.write - - def __init__(self, *args, **kwargs): - self.encoding = kwargs.pop('encoding', 'utf-8') - self.errors = kwargs.pop('errors', 'strict') - self._decoder = codecs.getincrementaldecoder(self.encoding)(errors=self.errors) - super(SpawnBaseUnicode, self).__init__(*args, **kwargs) - - @staticmethod - def _coerce_expect_string(s): - return s - - @staticmethod - def _coerce_send_string(s): - return s - - def _coerce_read_string(self, s): - return self._decoder.decode(s, final=False)
\ No newline at end of file |