summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Kluyver <takowl@gmail.com>2015-09-09 12:31:45 +0100
committerThomas Kluyver <takowl@gmail.com>2015-09-09 12:31:45 +0100
commit82e38877c9806fd8afd534ed31f911ed570c2c40 (patch)
treeba53b0176b82fce5d8d5b88e0ee95803641e0f50
parent32f8ea68c68a8b67a82308cadf0b6f4ed24d27e2 (diff)
parente067741654f4d6c5498bdb11a2d703640dc32758 (diff)
downloadpexpect-git-82e38877c9806fd8afd534ed31f911ed570c2c40.tar.gz
Merge pull request #182 from takluyver/reintegrate-unicode
'Unicode mode' in spawn class
-rw-r--r--doc/api/pexpect.rst32
-rw-r--r--pexpect/__init__.py36
-rw-r--r--pexpect/async.py2
-rw-r--r--pexpect/pty_spawn.py45
-rw-r--r--pexpect/spawnbase.py125
5 files changed, 111 insertions, 129 deletions
diff --git a/doc/api/pexpect.rst b/doc/api/pexpect.rst
index 565f0ef..79bbcef 100644
--- a/doc/api/pexpect.rst
+++ b/doc/api/pexpect.rst
@@ -36,9 +36,9 @@ spawn class
.. note::
- With a :class:`spawn` instance, the log files should be open for
- writing binary data. With a :class:`spawnu` instance, they should
- be open for writing unicode text.
+ With :class:`spawn` in bytes mode, the log files should be open for
+ writing binary data. In unicode mode, they should
+ be open for writing unicode text. See :ref:`unicode`.
Controlling the child process
`````````````````````````````
@@ -69,31 +69,35 @@ Controlling the child process
Handling unicode
````````````````
-For backwards compatibility, :class:`spawn` can handle some Unicode: its
-send methods will encode arbitrary unicode as UTF-8 before sending it to the
-child process, and its expect methods can accept ascii-only unicode strings.
-However, for a proper unicode API to a subprocess, use this subclass:
+By default, :class:`spawn` is a bytes interface: its read methods return bytes,
+and its write/send and expect methods expect bytes. If you pass the *encoding*
+parameter to the constructor, it will instead act as a unicode interface:
+strings you send will be encoded using that encoding, and bytes received will
+be decoded before returning them to you. In this mode, patterns for
+:meth:`~spawn.expect` and :meth:`~spawn.expect_exact` should also be unicode.
+
+.. versionchanged:: 4.0
-.. autoclass:: spawnu
- :show-inheritance:
+ :class:`spawn` provides both the bytes and unicode interfaces. In Pexpect
+ 3.x, the unicode interface was provided by a separate ``spawnu`` class.
-There is also a :func:`runu` function, the unicode counterpart to :func:`run`.
+For backwards compatibility, some Unicode is allowed in bytes mode: the
+send methods will encode arbitrary unicode as UTF-8 before sending it to the
+child process, and its expect methods can accept ascii-only unicode strings.
.. note::
Unicode handling with pexpect works the same way on Python 2 and 3, despite
the difference in names. I.e.:
- - :class:`spawn` works with ``str`` on Python 2, and :class:`bytes` on Python 3,
- - :class:`spawnu` works with ``unicode`` on Python 2, and :class:`str` on Python 3.
+ - Bytes mode works with ``str`` on Python 2, and :class:`bytes` on Python 3,
+ - Unicode mode works with ``unicode`` on Python 2, and :class:`str` on Python 3.
run function
------------
.. autofunction:: run
-.. autofunction:: runu
-
Exceptions
----------
diff --git a/pexpect/__init__.py b/pexpect/__init__.py
index 4b153f4..db5be16 100644
--- a/pexpect/__init__.py
+++ b/pexpect/__init__.py
@@ -77,7 +77,7 @@ __all__ = ['ExceptionPexpect', 'EOF', 'TIMEOUT', 'spawn', 'spawnu', 'run', 'runu
'which', 'split_command_line', '__version__', '__revision__']
def run(command, timeout=30, withexitstatus=False, events=None,
- extra_args=None, logfile=None, cwd=None, env=None):
+ extra_args=None, logfile=None, cwd=None, env=None, **kwargs):
'''
This function runs the given command; waits for it to finish; then
@@ -159,29 +159,16 @@ def run(command, timeout=30, withexitstatus=False, events=None,
sent to the child. 'extra_args' is not used by directly run(). It provides
a way to pass data to a callback function through run() through the locals
dictionary passed to a callback.
- '''
- return _run(command, timeout=timeout, withexitstatus=withexitstatus,
- events=events, extra_args=extra_args, logfile=logfile, cwd=cwd,
- env=env, _spawn=spawn)
-
-def runu(command, timeout=30, withexitstatus=False, events=None,
- extra_args=None, logfile=None, cwd=None, env=None, **kwargs):
- """This offers the same interface as :func:`run`, but using unicode.
- Like :class:`spawnu`, you can pass ``encoding`` and ``errors`` parameters,
- which will be used for both input and output.
- """
- return _run(command, timeout=timeout, withexitstatus=withexitstatus,
- events=events, extra_args=extra_args, logfile=logfile, cwd=cwd,
- env=env, _spawn=spawnu, **kwargs)
-
-def _run(command, timeout, withexitstatus, events, extra_args, logfile, cwd,
- env, _spawn, **kwargs):
+ Like :class:`spawn`, passing *encoding* will make it work with unicode
+ instead of bytes. You can pass *codec_errors* to control how errors in
+ encoding and decoding are handled.
+ '''
if timeout == -1:
- child = _spawn(command, maxread=2000, logfile=logfile, cwd=cwd, env=env,
+ child = spawn(command, maxread=2000, logfile=logfile, cwd=cwd, env=env,
**kwargs)
else:
- child = _spawn(command, timeout=timeout, maxread=2000, logfile=logfile,
+ child = spawn(command, timeout=timeout, maxread=2000, logfile=logfile,
cwd=cwd, env=env, **kwargs)
if isinstance(events, list):
patterns= [x for x,y in events]
@@ -232,4 +219,13 @@ def _run(command, timeout, withexitstatus, events, extra_args, logfile, cwd,
else:
return child_result
+def runu(command, timeout=30, withexitstatus=False, events=None,
+ extra_args=None, logfile=None, cwd=None, env=None, **kwargs):
+ """Deprecated: pass encoding to run() instead.
+ """
+ kwargs.setdefault('encoding', 'utf-8')
+ return run(command, timeout=timeout, withexitstatus=withexitstatus,
+ events=events, extra_args=extra_args, logfile=logfile, cwd=cwd,
+ env=env, **kwargs)
+
# vim: set shiftround expandtab tabstop=4 shiftwidth=4 ft=python autoindent :
diff --git a/pexpect/async.py b/pexpect/async.py
index 5e5e9ee..ad75994 100644
--- a/pexpect/async.py
+++ b/pexpect/async.py
@@ -37,7 +37,7 @@ class PatternWaiter(asyncio.Protocol):
def data_received(self, data):
spawn = self.expecter.spawn
- s = spawn._coerce_read_string(data)
+ s = spawn._decoder.decode(data)
spawn._log(s, 'read')
if self.fut.done():
diff --git a/pexpect/pty_spawn.py b/pexpect/pty_spawn.py
index fe2cc0c..cdbb54f 100644
--- a/pexpect/pty_spawn.py
+++ b/pexpect/pty_spawn.py
@@ -14,7 +14,7 @@ import ptyprocess
from ptyprocess.ptyprocess import use_native_pty_fork
from .exceptions import ExceptionPexpect, EOF, TIMEOUT
-from .spawnbase import SpawnBase, SpawnBaseUnicode
+from .spawnbase import SpawnBase
from .utils import which, split_command_line
@contextmanager
@@ -30,14 +30,14 @@ PY3 = (sys.version_info[0] >= 3)
class spawn(SpawnBase):
'''This is the main class interface for Pexpect. Use this class to start
and control child applications. '''
- ptyprocess_class = ptyprocess.PtyProcess
# This is purely informational now - changing it has no effect
use_native_pty_fork = use_native_pty_fork
def __init__(self, command, args=[], timeout=30, maxread=2000,
searchwindowsize=None, logfile=None, cwd=None, env=None,
- ignore_sighup=True, echo=True, preexec_fn=None):
+ ignore_sighup=True, echo=True, preexec_fn=None,
+ encoding=None, codec_errors='strict'):
'''This is the constructor. The command parameter may be a string that
includes a command and any arguments to the command. For example::
@@ -172,7 +172,7 @@ class spawn(SpawnBase):
signal handlers.
'''
super(spawn, self).__init__(timeout=timeout, maxread=maxread, searchwindowsize=searchwindowsize,
- logfile=logfile)
+ logfile=logfile, encoding=encoding, codec_errors=codec_errors)
self.STDIN_FILENO = pty.STDIN_FILENO
self.STDOUT_FILENO = pty.STDOUT_FILENO
self.STDERR_FILENO = pty.STDERR_FILENO
@@ -277,7 +277,7 @@ class spawn(SpawnBase):
preexec_fn()
kwargs['preexec_fn'] = preexec_wrapper
- self.ptyproc = self.ptyprocess_class.spawn(self.args, env=self.env,
+ self.ptyproc = ptyprocess.PtyProcess.spawn(self.args, env=self.env,
cwd=self.cwd, **kwargs)
self.pid = self.ptyproc.pid
@@ -503,10 +503,8 @@ class spawn(SpawnBase):
s = self._coerce_send_string(s)
self._log(s, 'send')
- return self._send(s)
-
- def _send(self, s):
- return os.write(self.child_fd, s)
+ b = self._encoder.encode(s, final=False)
+ return os.write(self.child_fd, b)
def sendline(self, s=''):
'''Wraps send(), sending string ``s`` to child process, with
@@ -519,9 +517,11 @@ class spawn(SpawnBase):
n = n + self.send(self.linesep)
return n
- def _log_control(self, byte):
+ def _log_control(self, s):
"""Write control characters to the appropriate log files"""
- self._log(byte, 'send')
+ if self.encoding is not None:
+ s = s.decode(self.encoding, 'replace')
+ self._log(s, 'send')
def sendcontrol(self, char):
'''Helper method that wraps send() with mnemonic access for sending control
@@ -805,22 +805,7 @@ class spawn(SpawnBase):
# this actually is an exception.
raise
-
-class spawnu(SpawnBaseUnicode, spawn):
- """Works like spawn, but accepts and returns unicode strings.
-
- Extra parameters:
-
- :param encoding: The encoding to use for communications (default: 'utf-8')
- :param errors: How to handle encoding/decoding errors; one of 'strict'
- (the default), 'ignore', or 'replace', as described
- for :meth:`~bytes.decode` and :meth:`~str.encode`.
- """
- ptyprocess_class = ptyprocess.PtyProcessUnicode
-
- def _send(self, s):
- return os.write(self.child_fd, s.encode(self.encoding, self.errors))
-
- def _log_control(self, byte):
- s = byte.decode(self.encoding, 'replace')
- self._log(s, 'send')
+def spawnu(*args, **kwargs):
+ """Deprecated: pass encoding to spawn() instead."""
+ kwargs.setdefault('encoding', 'utf-8')
+ return spawn(*args, **kwargs)
diff --git a/pexpect/spawnbase.py b/pexpect/spawnbase.py
index d79c5c0..9fd2e18 100644
--- a/pexpect/spawnbase.py
+++ b/pexpect/spawnbase.py
@@ -7,35 +7,30 @@ from .exceptions import ExceptionPexpect, EOF, TIMEOUT
from .expect import Expecter, searcher_string, searcher_re
PY3 = (sys.version_info[0] >= 3)
+text_type = str if PY3 else unicode
+
+class _NullCoder(object):
+ """Pass bytes through unchanged."""
+ @staticmethod
+ def encode(b, final=False):
+ return b
+
+ @staticmethod
+ def decode(b, final=False):
+ return b
class SpawnBase(object):
"""A base class providing the backwards-compatible spawn API for Pexpect.
- This should not be instantiated directly: use :class:`pexpect.spawn` or :class:`pexpect.fdpexpect.fdspawn`."""
- string_type = bytes
- if PY3:
- allowed_string_types = (bytes, str)
- linesep = os.linesep.encode('ascii')
- crlf = '\r\n'.encode('ascii')
-
- @staticmethod
- def write_to_stdout(b):
- try:
- return sys.stdout.buffer.write(b)
- except AttributeError:
- # If stdout has been replaced, it may not have .buffer
- return sys.stdout.write(b.decode('ascii', 'replace'))
- else:
- allowed_string_types = (basestring,) # analysis:ignore
- linesep = os.linesep
- crlf = '\r\n'
- write_to_stdout = sys.stdout.write
-
+ This should not be instantiated directly: use :class:`pexpect.spawn` or
+ :class:`pexpect.fdpexpect.fdspawn`.
+ """
encoding = None
pid = None
flag_eof = False
- def __init__(self, timeout=30, maxread=2000, searchwindowsize=None, logfile=None):
+ def __init__(self, timeout=30, maxread=2000, searchwindowsize=None,
+ logfile=None, encoding=None, codec_errors='strict'):
self.stdin = sys.stdin
self.stdout = sys.stdout
self.stderr = sys.stderr
@@ -63,7 +58,7 @@ class SpawnBase(object):
# max bytes to read at one time into buffer
self.maxread = maxread
# This is the read buffer. See maxread.
- self.buffer = self.string_type()
+ self.buffer = bytes() if (encoding is None) else text_type()
# Data before searchwindowsize point is preserved, but not searched.
self.searchwindowsize = searchwindowsize
# Delay used before sending data to child. Time in seconds.
@@ -79,6 +74,42 @@ class SpawnBase(object):
self.name = '<' + repr(self) + '>'
self.closed = True
+ # Unicode interface
+ self.encoding = encoding
+ self.codec_errors = codec_errors
+ if encoding is None:
+ # bytes mode (accepts some unicode for backwards compatibility)
+ self._encoder = self._decoder = _NullCoder()
+ self.string_type = bytes
+ self.crlf = b'\r\n'
+ if PY3:
+ self.allowed_string_types = (bytes, str)
+ self.linesep = os.linesep.encode('ascii')
+ def write_to_stdout(b):
+ try:
+ return sys.stdout.buffer.write(b)
+ except AttributeError:
+ # If stdout has been replaced, it may not have .buffer
+ return sys.stdout.write(b.decode('ascii', 'replace'))
+ self.write_to_stdout = write_to_stdout
+ else:
+ self.allowed_string_types = (basestring,) # analysis:ignore
+ self.linesep = os.linesep
+ self.write_to_stdout = sys.stdout.write
+ else:
+ # unicode mode
+ self._encoder = codecs.getincrementalencoder(encoding)(codec_errors)
+ self._decoder = codecs.getincrementaldecoder(encoding)(codec_errors)
+ self.string_type = text_type
+ self.crlf = u'\r\n'
+ self.allowed_string_types = (text_type, )
+ if PY3:
+ self.linesep = os.linesep
+ else:
+ self.linesep = os.linesep.decode('ascii')
+ # This can handle unicode in both Python 2 and 3
+ self.write_to_stdout = sys.stdout.write
+
def _log(self, s, direction):
if self.logfile is not None:
self.logfile.write(s)
@@ -88,22 +119,19 @@ class SpawnBase(object):
second_log.write(s)
second_log.flush()
- @staticmethod
- def _coerce_expect_string(s):
- if not isinstance(s, bytes):
+ # For backwards compatibility, in bytes mode (when encoding is None)
+ # unicode is accepted for send and expect. Unicode mode is strictly unicode
+ # only.
+ def _coerce_expect_string(self, s):
+ if self.encoding is None and not isinstance(s, bytes):
return s.encode('ascii')
return s
- @staticmethod
- def _coerce_send_string(s):
- if not isinstance(s, bytes):
+ def _coerce_send_string(self, s):
+ if self.encoding is None and not isinstance(s, bytes):
return s.encode('utf-8')
return s
- @staticmethod
- def _coerce_read_string(s):
- return s
-
def read_nonblocking(self, size=1, timeout=None):
"""This reads data from the file descriptor.
@@ -125,7 +153,7 @@ class SpawnBase(object):
self.flag_eof = True
raise EOF('End Of File (EOF). Empty string style platform.')
- s = self._coerce_read_string(s)
+ s = self._decoder.decode(s, final=False)
self._log(s, 'read')
return s
@@ -451,34 +479,3 @@ class SpawnBase(object):
# We rely on subclasses to implement close(). If they don't, it's not
# clear what a context manager should do.
self.close()
-
-class SpawnBaseUnicode(SpawnBase):
- if PY3:
- string_type = str
- allowed_string_types = (str, )
- linesep = os.linesep
- crlf = '\r\n'
- else:
- string_type = unicode
- allowed_string_types = (unicode, )
- linesep = os.linesep.decode('ascii')
- crlf = '\r\n'.decode('ascii')
- # This can handle unicode in both Python 2 and 3
- write_to_stdout = sys.stdout.write
-
- def __init__(self, *args, **kwargs):
- self.encoding = kwargs.pop('encoding', 'utf-8')
- self.errors = kwargs.pop('errors', 'strict')
- self._decoder = codecs.getincrementaldecoder(self.encoding)(errors=self.errors)
- super(SpawnBaseUnicode, self).__init__(*args, **kwargs)
-
- @staticmethod
- def _coerce_expect_string(s):
- return s
-
- @staticmethod
- def _coerce_send_string(s):
- return s
-
- def _coerce_read_string(self, s):
- return self._decoder.decode(s, final=False) \ No newline at end of file