diff options
author | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 +0000 |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 +0000 |
commit | 98297ee7815939b124156e438b22bd652d67b5db (patch) | |
tree | a9d239ebd87c73af2571ab48003984c4e18e27e5 /Lib | |
parent | a19f80c6df2df5e8a5d0cff37131097835ef971e (diff) | |
download | cpython-git-98297ee7815939b124156e438b22bd652d67b5db.tar.gz |
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137
branch. The most obvious changes:
- str8 renamed to bytes (PyString at the C level);
- bytes renamed to buffer (PyBytes at the C level);
- PyString and PyUnicode are no longer compatible.
I.e. we now have an immutable bytes type and a mutable bytes type.
The behavior of PyString was modified quite a bit, to make it more
bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Lib')
77 files changed, 984 insertions, 880 deletions
diff --git a/Lib/_abcoll.py b/Lib/_abcoll.py index 8f630bf166..ec3e2f838c 100644 --- a/Lib/_abcoll.py +++ b/Lib/_abcoll.py @@ -489,7 +489,7 @@ class Sequence(metaclass=ABCMeta): Sequence.register(tuple) Sequence.register(str) -Sequence.register(str8) +Sequence.register(bytes) Sequence.register(memoryview) diff --git a/Lib/base64.py b/Lib/base64.py index e100e0fab0..18beffcda9 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -27,10 +27,13 @@ __all__ = [ ] +bytes_buffer = (bytes, buffer) # Types acceptable as binary data + + def _translate(s, altchars): - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): raise TypeError("expected bytes, not %s" % s.__class__.__name__) - translation = bytes(range(256)) + translation = buffer(range(256)) for k, v in altchars.items(): translation[ord(k)] = v[0] return s.translate(translation) @@ -49,12 +52,12 @@ def b64encode(s, altchars=None): The encoded byte string is returned. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): s = bytes(s, "ascii") # Strip off the trailing newline encoded = binascii.b2a_base64(s)[:-1] if altchars is not None: - if not isinstance(altchars, bytes): + if not isinstance(altchars, bytes_buffer): altchars = bytes(altchars, "ascii") assert len(altchars) == 2, repr(altchars) return _translate(encoded, {'+': altchars[0:1], '/': altchars[1:2]}) @@ -72,10 +75,10 @@ def b64decode(s, altchars=None): s were incorrectly padded or if there are non-alphabet characters present in the string. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): s = bytes(s) if altchars is not None: - if not isinstance(altchars, bytes): + if not isinstance(altchars, bytes_buffer): altchars = bytes(altchars, "ascii") assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) @@ -144,7 +147,7 @@ def b32encode(s): s is the byte string to encode. The encoded byte string is returned. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): s = bytes(s) quanta, leftover = divmod(len(s), 5) # Pad the last quantum with zero bits if necessary @@ -201,7 +204,7 @@ def b32decode(s, casefold=False, map01=None): the input is incorrectly padded or if there are non-alphabet characters present in the input. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): s = bytes(s) quanta, leftover = divmod(len(s), 8) if leftover: @@ -210,12 +213,12 @@ def b32decode(s, casefold=False, map01=None): # False, or the character to map the digit 1 (one) to. It should be # either L (el) or I (eye). if map01: - if not isinstance(map01, bytes): + if not isinstance(map01, bytes_buffer): map01 = bytes(map01) assert len(map01) == 1, repr(map01) - s = _translate(s, {'0': b'O', '1': map01}) + s = _translate(s, {b'0': b'O', b'1': map01}) if casefold: - s = bytes(str(s, "ascii").upper(), "ascii") + s = s.upper() # Strip off pad characters from the right. We need to count the pad # characters because this will tell us how many null bytes to remove from # the end of the decoded string. @@ -266,7 +269,7 @@ def b16encode(s): s is the byte string to encode. The encoded byte string is returned. """ - return bytes(str(binascii.hexlify(s), "ascii").upper(), "ascii") + return binascii.hexlify(s).upper() def b16decode(s, casefold=False): @@ -280,10 +283,10 @@ def b16decode(s, casefold=False): s were incorrectly padded or if there are non-alphabet characters present in the string. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): s = bytes(s) if casefold: - s = bytes(str(s, "ascii").upper(), "ascii") + s = s.upper() if re.search('[^0-9A-F]', s): raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) @@ -327,7 +330,7 @@ def encodestring(s): Argument and return value are bytes. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): raise TypeError("expected bytes, not %s" % s.__class__.__name__) pieces = [] for i in range(0, len(s), MAXBINSIZE): @@ -341,7 +344,7 @@ def decodestring(s): Argument and return value are bytes. """ - if not isinstance(s, bytes): + if not isinstance(s, bytes_buffer): raise TypeError("expected bytes, not %s" % s.__class__.__name__) return binascii.a2b_base64(s) diff --git a/Lib/copy.py b/Lib/copy.py index fa75daa544..1a14f0ed7f 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -187,7 +187,7 @@ try: d[complex] = _deepcopy_atomic except NameError: pass -d[str8] = _deepcopy_atomic +d[bytes] = _deepcopy_atomic d[str] = _deepcopy_atomic try: d[types.CodeType] = _deepcopy_atomic diff --git a/Lib/ctypes/test/test_array_in_pointer.py b/Lib/ctypes/test/test_array_in_pointer.py index 2b939f0e10..6bed1f1e67 100644 --- a/Lib/ctypes/test/test_array_in_pointer.py +++ b/Lib/ctypes/test/test_array_in_pointer.py @@ -6,7 +6,7 @@ import re def dump(obj): # helper function to dump memory contents in hex, with a hyphen # between the bytes. - h = str(hexlify(memoryview(obj))) + h = hexlify(memoryview(obj)).decode() return re.sub(r"(..)", r"\1-", h)[:-1] diff --git a/Lib/ctypes/test/test_byteswap.py b/Lib/ctypes/test/test_byteswap.py index dab97224fe..67fa44b630 100644 --- a/Lib/ctypes/test/test_byteswap.py +++ b/Lib/ctypes/test/test_byteswap.py @@ -4,7 +4,7 @@ from binascii import hexlify from ctypes import * def bin(s): - return str(hexlify(memoryview(s))).upper() + return hexlify(memoryview(s)).decode().upper() # Each *simple* type that supports different byte orders has an # __ctype_be__ attribute that specifies the same type in BIG ENDIAN diff --git a/Lib/ctypes/test/test_slicing.py b/Lib/ctypes/test/test_slicing.py index 28e66da733..4974302620 100644 --- a/Lib/ctypes/test/test_slicing.py +++ b/Lib/ctypes/test/test_slicing.py @@ -115,7 +115,7 @@ class SlicesTestCase(unittest.TestCase): dll.my_strdup.errcheck = errcheck try: res = dll.my_strdup(s) - self.failUnlessEqual(res, str(s)) + self.failUnlessEqual(res, s.decode()) finally: del dll.my_strdup.errcheck diff --git a/Lib/dumbdbm.py b/Lib/dumbdbm.py index b47b7bd4b2..78e4999b89 100644 --- a/Lib/dumbdbm.py +++ b/Lib/dumbdbm.py @@ -163,7 +163,7 @@ class _Database(UserDict.DictMixin): if not isinstance(key, bytes): raise TypeError("keys must be bytes") key = key.decode("latin-1") # hashable bytes - if not isinstance(val, (str8, bytes)): + if not isinstance(val, (buffer, bytes)): raise TypeError("values must be byte strings") if key not in self._index: self._addkey(key, self._addval(val)) diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index 369bf10789..cff558e805 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -70,7 +70,7 @@ def header_encode(header_bytes, charset='iso-8859-1'): # Return empty headers unchanged if not header_bytes: return str(header_bytes) - encoded = b64encode(header_bytes) + encoded = b64encode(header_bytes).decode("ascii") return '=?%s?b?%s?=' % (charset, encoded) @@ -93,7 +93,7 @@ def body_encode(s, maxlinelen=76, eol=NL): for i in range(0, len(s), max_unencoded): # BAW: should encode() inherit b2a_base64()'s dubious behavior in # adding a newline to the encoded string? - enc = str(b2a_base64(s[i:i + max_unencoded])) + enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") if enc.endswith(NL) and eol != NL: enc = enc[:-1] + eol encvec.append(enc) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 74a3c9d397..c544004fea 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2448,9 +2448,7 @@ Here's the message body def test_crlf_separation(self): eq = self.assertEqual - # XXX When Guido fixes TextIOWrapper.read() to act just like - # .readlines(), open this in 'rb' mode with newlines='\n'. - with openfile('msg_26.txt', mode='rb') as fp: + with openfile('msg_26.txt', newline='\n') as fp: msg = Parser().parse(fp) eq(len(msg.get_payload()), 2) part1 = msg.get_payload(0) diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 87e57455a5..d72eae9328 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -52,7 +52,7 @@ def normalize_encoding(encoding): non-ASCII characters, these must be Latin-1 compatible. """ - if isinstance(encoding, str8): + if isinstance(encoding, bytes): encoding = str(encoding, "ascii") chars = [] punct = False diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index b81e5fa158..30f507a34e 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -151,9 +151,9 @@ class Codec(codecs.Codec): raise UnicodeError("unsupported error handling "+errors) if not input: - return b"", 0 + return b'', 0 - result = b"" + result = buffer() labels = dots.split(input) if labels and not labels[-1]: trailing_dot = b'.' @@ -165,7 +165,7 @@ class Codec(codecs.Codec): # Join with U+002E result.extend(b'.') result.extend(ToASCII(label)) - return result+trailing_dot, len(input) + return bytes(result+trailing_dot), len(input) def decode(self, input, errors='strict'): @@ -216,7 +216,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): if labels: trailing_dot = b'.' - result = b"" + result = buffer() size = 0 for label in labels: if size: @@ -228,7 +228,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): result += trailing_dot size += len(trailing_dot) - return (result, size) + return (bytes(result), size) class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index f08c807cd9..56e6958b2c 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -10,7 +10,7 @@ import codecs def segregate(str): """3.1 Basic code point segregation""" - base = b"" + base = buffer() extended = set() for c in str: if ord(c) < 128: @@ -18,7 +18,7 @@ def segregate(str): else: extended.add(c) extended = sorted(extended) - return (base, extended) + return bytes(base), extended def selective_len(str, max): """Return the length of str, considering only characters below max.""" @@ -78,13 +78,13 @@ def T(j, bias): digits = b"abcdefghijklmnopqrstuvwxyz0123456789" def generate_generalized_integer(N, bias): """3.3 Generalized variable-length integers""" - result = b"" + result = buffer() j = 0 while 1: t = T(j, bias) if N < t: result.append(digits[N]) - return result + return bytes(result) result.append(digits[t + ((N - t) % (36 - t))]) N = (N - t) // (36 - t) j += 1 @@ -107,13 +107,13 @@ def adapt(delta, first, numchars): def generate_integers(baselen, deltas): """3.4 Bias adaptation""" # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = b"" + result = buffer() bias = 72 for points, delta in enumerate(deltas): s = generate_generalized_integer(delta, bias) result.extend(s) bias = adapt(delta, points==0, baselen+points+1) - return result + return bytes(result) def punycode_encode(text): base, extended = segregate(text) diff --git a/Lib/gettext.py b/Lib/gettext.py index 8ff0a80ab6..be24f1dc62 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -292,7 +292,7 @@ class GNUTranslations(NullTranslations): # Catalog description lastk = k = None for b_item in tmsg.split('\n'.encode("ascii")): - item = str(b_item).strip() + item = b_item.decode().strip() if not item: continue if ':' in item: diff --git a/Lib/httplib.py b/Lib/httplib.py index e891883504..dc8bd6bae9 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -827,6 +827,7 @@ class HTTPConnection: if self.port == HTTP_PORT: self.putheader('Host', host_enc) else: + host_enc = host_enc.decode("ascii") self.putheader('Host', "%s:%s" % (host_enc, self.port)) # note: we are assuming that clients will not attempt to set these @@ -860,8 +861,12 @@ class HTTPConnection: if self.__state != _CS_REQ_STARTED: raise CannotSendHeader() - header = '%s: %s' % (header, value) - self._output(header.encode('ascii')) + if hasattr(header, 'encode'): + header = header.encode('ascii') + if hasattr(value, 'encode'): + value = value.encode('ascii') + header = header + b': ' + value + self._output(header) def endheaders(self): """Indicate that the last header line has been sent to the server.""" diff --git a/Lib/idlelib/OutputWindow.py b/Lib/idlelib/OutputWindow.py index ac1361bfdd..42aa77e2bb 100644 --- a/Lib/idlelib/OutputWindow.py +++ b/Lib/idlelib/OutputWindow.py @@ -35,7 +35,7 @@ class OutputWindow(EditorWindow): # Act as output file def write(self, s, tags=(), mark="insert"): - if isinstance(s, (bytes, str8)): + if isinstance(s, (bytes, bytes)): s = s.decode(IOBinding.encoding, "replace") self.text.insert(mark, s, tags) self.text.see(mark) @@ -391,7 +391,7 @@ class IOBase(metaclass=abc.ABCMeta): return 1 if limit is None: limit = -1 - res = bytes() + res = buffer() while limit < 0 or len(res) < limit: b = self.read(nreadahead()) if not b: @@ -399,7 +399,7 @@ class IOBase(metaclass=abc.ABCMeta): res += b if res.endswith(b"\n"): break - return res + return bytes(res) def __iter__(self): self._checkClosed() @@ -454,20 +454,20 @@ class RawIOBase(IOBase): n = -1 if n < 0: return self.readall() - b = bytes(n.__index__()) + b = buffer(n.__index__()) n = self.readinto(b) del b[n:] - return b + return bytes(b) def readall(self): """readall() -> bytes. Read until EOF, using multiple read() call.""" - res = bytes() + res = buffer() while True: data = self.read(DEFAULT_BUFFER_SIZE) if not data: break res += data - return res + return bytes(res) def readinto(self, b: bytes) -> int: """readinto(b: bytes) -> int. Read up to len(b) bytes into b. @@ -655,14 +655,14 @@ class BytesIO(BufferedIOBase): # XXX More docs def __init__(self, initial_bytes=None): - buffer = b"" + buf = buffer() if initial_bytes is not None: - buffer += initial_bytes - self._buffer = buffer + buf += initial_bytes + self._buffer = buf self._pos = 0 def getvalue(self): - return self._buffer + return bytes(self._buffer) def read(self, n=None): if n is None: @@ -672,7 +672,7 @@ class BytesIO(BufferedIOBase): newpos = min(len(self._buffer), self._pos + n) b = self._buffer[self._pos : newpos] self._pos = newpos - return b + return bytes(b) def read1(self, n): return self.read(n) @@ -819,7 +819,7 @@ class BufferedWriter(_BufferedIOMixin): self.max_buffer_size = (2*buffer_size if max_buffer_size is None else max_buffer_size) - self._write_buf = b"" + self._write_buf = buffer() def write(self, b): if self.closed: @@ -1186,7 +1186,7 @@ class TextIOWrapper(TextIOBase): try: decoder.setstate((b"", decoder_state)) n = 0 - bb = bytes(1) + bb = buffer(1) for i, bb[0] in enumerate(readahead): n += len(decoder.decode(bb)) if n >= needed: @@ -1266,7 +1266,9 @@ class TextIOWrapper(TextIOBase): return line def readline(self, limit=None): - if limit is not None: + if limit is None: + limit = -1 + if limit >= 0: # XXX Hack to support limit argument, for backwards compatibility line = self.readline() if len(line) <= limit: diff --git a/Lib/mailbox.py b/Lib/mailbox.py index a37bec9138..13e3eb7efa 100755 --- a/Lib/mailbox.py +++ b/Lib/mailbox.py @@ -333,7 +333,7 @@ class Maildir(Mailbox): def get_file(self, key): """Return a file-like representation or raise a KeyError.""" - f = open(os.path.join(self._path, self._lookup(key)), 'rb') + f = open(os.path.join(self._path, self._lookup(key)), 'r') return _ProxyFile(f) def iterkeys(self): @@ -936,7 +936,7 @@ class MH(Mailbox): def get_file(self, key): """Return a file-like representation or raise a KeyError.""" try: - f = open(os.path.join(self._path, str(key)), 'rb') + f = open(os.path.join(self._path, str(key)), 'r') except IOError as e: if e.errno == errno.ENOENT: raise KeyError('No message with key: %s' % key) @@ -1762,11 +1762,11 @@ class _ProxyFile: def read(self, size=None): """Read bytes.""" - return str(self._read(size, self._file.read)) + return self._read(size, self._file.read) def readline(self, size=None): """Read a line.""" - return str(self._read(size, self._file.readline)) + return self._read(size, self._file.readline) def readlines(self, sizehint=None): """Read multiple lines.""" diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index cc5ad19be0..c345a33711 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -17,12 +17,12 @@ else: READ_MODE = "r" # XXX Clean up once str8's cstor matches bytes. -LOAD_CONST = str8([dis.opname.index('LOAD_CONST')]) -IMPORT_NAME = str8([dis.opname.index('IMPORT_NAME')]) -STORE_NAME = str8([dis.opname.index('STORE_NAME')]) -STORE_GLOBAL = str8([dis.opname.index('STORE_GLOBAL')]) +LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) +IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) +STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) +STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) STORE_OPS = [STORE_NAME, STORE_GLOBAL] -HAVE_ARGUMENT = str8([dis.HAVE_ARGUMENT]) +HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) # Modulefinder does a good job at simulating Python's, but it can not # handle __path__ modifications packages make at runtime. Therefore there @@ -368,7 +368,7 @@ class ModuleFinder: consts = co.co_consts LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME while code: - c = str8([code[0]]) + c = bytes([code[0]]) if c in STORE_OPS: oparg, = unpack('<H', code[1:3]) yield "store", (names[oparg],) diff --git a/Lib/pickle.py b/Lib/pickle.py index 18ad2100b5..d7bf24e69e 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -38,6 +38,9 @@ import codecs __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] +# Shortcut for use in isinstance testing +bytes_types = (bytes, buffer, memoryview) + # These are purely informational; no code uses these. format_version = "2.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 @@ -499,10 +502,10 @@ class Pickler: else: self.write(BINSTRING + pack("<i", n) + bytes(obj)) else: - # Strip leading 's' due to repr() of str8() returning s'...' - self.write(STRING + repr(obj).lstrip("s").encode("ascii") + b'\n') + # Strip leading 'b' due to repr() of bytes() returning b'...' + self.write(STRING + repr(obj).lstrip("b").encode("ascii") + b'\n') self.memoize(obj) - dispatch[str8] = save_string + dispatch[bytes] = save_string def save_unicode(self, obj, pack=struct.pack): if self.bin: @@ -804,7 +807,7 @@ class Unpickler: key = read(1) if not key: raise EOFError - assert isinstance(key, bytes) + assert isinstance(key, bytes_types) dispatch[key[0]](self) except _Stop as stopinst: return stopinst.value @@ -906,7 +909,8 @@ class Unpickler: dispatch[BINFLOAT[0]] = load_binfloat def load_string(self): - rep = self.readline()[:-1] + orig = self.readline() + rep = orig[:-1] for q in (b'"', b"'"): # double or single quote if rep.startswith(q): if not rep.endswith(q): @@ -914,13 +918,13 @@ class Unpickler: rep = rep[len(q):-len(q)] break else: - raise ValueError("insecure string pickle") - self.append(str(codecs.escape_decode(rep)[0], "latin-1")) + raise ValueError("insecure string pickle: %r" % orig) + self.append(codecs.escape_decode(rep)[0]) dispatch[STRING[0]] = load_string def load_binstring(self): len = mloads(b'i' + self.read(4)) - self.append(str(self.read(len), "latin-1")) + self.append(self.read(len)) dispatch[BINSTRING[0]] = load_binstring def load_unicode(self): @@ -934,7 +938,7 @@ class Unpickler: def load_short_binstring(self): len = ord(self.read(1)) - self.append(str(self.read(len), "latin-1")) + self.append(bytes(self.read(len))) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_tuple(self): @@ -1063,9 +1067,9 @@ class Unpickler: def find_class(self, module, name): # Subclasses may override this - if isinstance(module, bytes): + if isinstance(module, bytes_types): module = module.decode("utf-8") - if isinstance(name, bytes): + if isinstance(name, bytes_types): name = name.decode("utf-8") __import__(module) mod = sys.modules[module] @@ -1099,7 +1103,7 @@ class Unpickler: dispatch[DUP[0]] = load_dup def load_get(self): - self.append(self.memo[str(self.readline())[:-1]]) + self.append(self.memo[self.readline()[:-1].decode("ascii")]) dispatch[GET[0]] = load_get def load_binget(self): @@ -1113,7 +1117,7 @@ class Unpickler: dispatch[LONG_BINGET[0]] = load_long_binget def load_put(self): - self.memo[str(self.readline()[:-1])] = self.stack[-1] + self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1] dispatch[PUT[0]] = load_put def load_binput(self): @@ -1298,7 +1302,7 @@ def dumps(obj, protocol=None): f = io.BytesIO() Pickler(f, protocol).dump(obj) res = f.getvalue() - assert isinstance(res, bytes) + assert isinstance(res, bytes_types) return res def load(file): diff --git a/Lib/pickletools.py b/Lib/pickletools.py index b1337c41ce..af84c1f8b9 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -11,11 +11,15 @@ dis(pickle, out=None, memo=None, indentlevel=4) ''' import codecs +import pickle +import re __all__ = ['dis', 'genops', ] +bytes_types = pickle.bytes_types + # Other ideas: # # - A pickle verifier: read a pickle and check it exhaustively for @@ -307,7 +311,7 @@ def read_stringnl(f, decode=True, stripquotes=True): raise ValueError("no string quotes around %r" % data) if decode: - data = str(codecs.escape_decode(data)[0]) + data = codecs.escape_decode(data)[0].decode("ascii") return data stringnl = ArgumentDescriptor( @@ -321,7 +325,7 @@ stringnl = ArgumentDescriptor( """) def read_stringnl_noescape(f): - return read_stringnl(f, decode=False, stripquotes=False) + return read_stringnl(f, stripquotes=False) stringnl_noescape = ArgumentDescriptor( name='stringnl_noescape', @@ -744,14 +748,14 @@ pyfloat = StackObject( doc="A Python float object.") pystring = StackObject( - name='str', - obtype=str, - doc="A Python string object.") + name='bytes', + obtype=bytes, + doc="A Python bytes object.") pyunicode = StackObject( - name='unicode', + name='str', obtype=str, - doc="A Python Unicode string object.") + doc="A Python string object.") pynone = StackObject( name="None", @@ -1735,7 +1739,6 @@ for d in opcodes: del d def assure_pickle_consistency(verbose=False): - import pickle, re copy = code2op.copy() for name in pickle.__all__: @@ -1803,7 +1806,7 @@ def genops(pickle): to query its current position) pos is None. """ - if isinstance(pickle, bytes): + if isinstance(pickle, bytes_types): import io pickle = io.BytesIO(pickle) @@ -1978,7 +1981,7 @@ class _Example: _dis_test = r""" >>> import pickle ->>> x = [1, 2, (3, 4), {str8(b'abc'): "def"}] +>>> x = [1, 2, (3, 4), {bytes(b'abc'): "def"}] >>> pkl = pickle.dumps(x, 0) >>> dis(pkl) 0: ( MARK diff --git a/Lib/plat-mac/aepack.py b/Lib/plat-mac/aepack.py index 3caf2f5e00..e958b85e9a 100644 --- a/Lib/plat-mac/aepack.py +++ b/Lib/plat-mac/aepack.py @@ -98,7 +98,7 @@ def pack(x, forcetype = None): return AE.AECreateDesc(b'long', struct.pack('l', x)) if isinstance(x, float): return AE.AECreateDesc(b'doub', struct.pack('d', x)) - if isinstance(x, (bytes, str8)): + if isinstance(x, (bytes, buffer)): return AE.AECreateDesc(b'TEXT', x) if isinstance(x, str): # See http://developer.apple.com/documentation/Carbon/Reference/Apple_Event_Manager/Reference/reference.html#//apple_ref/doc/constant_group/typeUnicodeText diff --git a/Lib/plat-mac/aetypes.py b/Lib/plat-mac/aetypes.py index cf6e3b940c..d29ea975e0 100644 --- a/Lib/plat-mac/aetypes.py +++ b/Lib/plat-mac/aetypes.py @@ -22,7 +22,18 @@ def _four_char_code(four_chars): four_chars must contain only ASCII characters. """ - return ("%-4.4s" % str(four_chars)).encode("latin-1") + if isinstance(four_chars, (bytes, buffer)): + b = bytes(four_chars[:4]) + n = len(b) + if n < 4: + b += b' ' * (4 - n) + return b + else: + s = str(four_chars)[:4] + n = len(s) + if n < 4: + s += ' ' * (4 - n) + return bytes(s, "latin-1") # MacRoman? class Unknown: """An uninterpreted AE object""" @@ -47,7 +58,7 @@ class Enum: return "Enum(%r)" % (self.enum,) def __str__(self): - return self.enum.strip(b' ') + return self.enum.decode("latin-1").strip(" ") def __aepack__(self): return pack(self.enum, typeEnumeration) @@ -559,7 +570,7 @@ class DelayedComponentItem: return "selector for element %s of %s"%(self.__class__.__name__, str(self.fr)) template = """ -class %s(ComponentItem): want = '%s' +class %s(ComponentItem): want = %r """ exec(template % ("Text", b'text')) diff --git a/Lib/plat-mac/plistlib.py b/Lib/plat-mac/plistlib.py index e0e01ffbb5..72dfa2e414 100644 --- a/Lib/plat-mac/plistlib.py +++ b/Lib/plat-mac/plistlib.py @@ -164,7 +164,7 @@ class DumbXMLWriter: def simpleElement(self, element, value=None): if value is not None: - value = _escapeAndEncode(value) + value = _escape(value) self.writeln("<%s>%s</%s>" % (element, value, element)) else: self.writeln("<%s/>" % element) @@ -207,7 +207,7 @@ _controlCharPat = re.compile( r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") -def _escapeAndEncode(text): +def _escape(text): m = _controlCharPat.search(text) if m is not None: raise ValueError("strings can't contains control characters; " @@ -217,7 +217,7 @@ def _escapeAndEncode(text): text = text.replace("&", "&") # escape '&' text = text.replace("<", "<") # escape '<' text = text.replace(">", ">") # escape '>' - return text.encode("utf-8") # encode as UTF-8 + return text PLISTHEADER = b"""\ diff --git a/Lib/sqlite3/dbapi2.py b/Lib/sqlite3/dbapi2.py index 52fb4ae4f7..d051f0432f 100644 --- a/Lib/sqlite3/dbapi2.py +++ b/Lib/sqlite3/dbapi2.py @@ -60,13 +60,13 @@ def register_adapters_and_converters(): return val.isoformat(" ") def convert_date(val): - return datetime.date(*map(int, val.split("-"))) + return datetime.date(*map(int, val.split(b"-"))) def convert_timestamp(val): - datepart, timepart = val.split(" ") - year, month, day = map(int, datepart.split("-")) - timepart_full = timepart.split(".") - hours, minutes, seconds = map(int, timepart_full[0].split(":")) + datepart, timepart = val.split(b" ") + year, month, day = map(int, datepart.split(b"-")) + timepart_full = timepart.split(b".") + hours, minutes, seconds = map(int, timepart_full[0].split(b":")) if len(timepart_full) == 2: microseconds = int(timepart_full[1]) else: diff --git a/Lib/sqlite3/test/factory.py b/Lib/sqlite3/test/factory.py index f20848fc81..a9a828fc12 100644 --- a/Lib/sqlite3/test/factory.py +++ b/Lib/sqlite3/test/factory.py @@ -163,8 +163,8 @@ class TextFactoryTests(unittest.TestCase): germany = "Deutchland" a_row = self.con.execute("select ?", (austria,)).fetchone() d_row = self.con.execute("select ?", (germany,)).fetchone() - self.failUnless(type(a_row[0]) == str, "type of non-ASCII row must be unicode") - self.failUnless(type(d_row[0]) == str8, "type of ASCII-only row must be str8") + self.failUnless(type(a_row[0]) == str, "type of non-ASCII row must be str") + self.failUnless(type(d_row[0]) == str, "type of ASCII-only row must be str") def tearDown(self): self.con.close() diff --git a/Lib/sqlite3/test/types.py b/Lib/sqlite3/test/types.py index 4ff948d6cc..8845e0cb59 100644 --- a/Lib/sqlite3/test/types.py +++ b/Lib/sqlite3/test/types.py @@ -62,11 +62,12 @@ class SqliteTypeTests(unittest.TestCase): self.failUnlessEqual(row[0], val) def CheckBlob(self): - val = memoryview(b"Guglhupf") + sample = b"Guglhupf" + val = memoryview(sample) self.cur.execute("insert into test(b) values (?)", (val,)) self.cur.execute("select b from test") row = self.cur.fetchone() - self.failUnlessEqual(row[0], val) + self.failUnlessEqual(row[0], sample) def CheckUnicodeExecute(self): self.cur.execute("select 'Österreich'") @@ -76,8 +77,8 @@ class SqliteTypeTests(unittest.TestCase): class DeclTypesTests(unittest.TestCase): class Foo: def __init__(self, _val): - if isinstance(_val, str8): - # sqlite3 always calls __init__ with a str8 created from a + if isinstance(_val, bytes): + # sqlite3 always calls __init__ with a bytes created from a # UTF-8 string when __conform__ was used to store the object. _val = _val.decode('utf8') self.val = _val @@ -207,11 +208,12 @@ class DeclTypesTests(unittest.TestCase): def CheckBlob(self): # default - val = memoryview(b"Guglhupf") + sample = b"Guglhupf" + val = memoryview(sample) self.cur.execute("insert into test(bin) values (?)", (val,)) self.cur.execute("select bin from test") row = self.cur.fetchone() - self.failUnlessEqual(row[0], val) + self.failUnlessEqual(row[0], sample) class ColNamesTests(unittest.TestCase): def setUp(self): @@ -219,13 +221,11 @@ class ColNamesTests(unittest.TestCase): self.cur = self.con.cursor() self.cur.execute("create table test(x foo)") - sqlite.converters["FOO"] = lambda x: "[%s]" % x - sqlite.converters["BAR"] = lambda x: "<%s>" % x + sqlite.converters["BAR"] = lambda x: b"<" + x + b">" sqlite.converters["EXC"] = lambda x: 5/0 sqlite.converters["B1B1"] = lambda x: "MARKER" def tearDown(self): - del sqlite.converters["FOO"] del sqlite.converters["BAR"] del sqlite.converters["EXC"] del sqlite.converters["B1B1"] @@ -252,14 +252,14 @@ class ColNamesTests(unittest.TestCase): self.cur.execute("insert into test(x) values (?)", ("xxx",)) self.cur.execute('select x as "x [bar]" from test') val = self.cur.fetchone()[0] - self.failUnlessEqual(val, "<xxx>") + self.failUnlessEqual(val, b"<xxx>") # Check if the stripping of colnames works. Everything after the first # whitespace should be stripped. self.failUnlessEqual(self.cur.description[0][0], "x") def CheckCaseInConverterName(self): - self.cur.execute("""select 'other' as "x [b1b1]\"""") + self.cur.execute("select 'other' as \"x [b1b1]\"") val = self.cur.fetchone()[0] self.failUnlessEqual(val, "MARKER") diff --git a/Lib/sqlite3/test/userfunctions.py b/Lib/sqlite3/test/userfunctions.py index 994057e945..dc3a709acd 100644 --- a/Lib/sqlite3/test/userfunctions.py +++ b/Lib/sqlite3/test/userfunctions.py @@ -198,7 +198,7 @@ class FunctionTests(unittest.TestCase): cur.execute("select returnblob()") val = cur.fetchone()[0] self.failUnlessEqual(type(val), bytes) - self.failUnlessEqual(val, memoryview(b"blob")) + self.failUnlessEqual(val, b"blob") def CheckFuncException(self): cur = self.con.cursor() diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index bf3e23ff03..fa0b8aaf82 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -192,14 +192,14 @@ class Tokenizer: char = self.string[self.index:self.index+1] # Special case for the str8, since indexing returns a integer # XXX This is only needed for test_bug_926075 in test_re.py - if isinstance(self.string, str8): + if isinstance(self.string, bytes): char = chr(char) if char == "\\": try: c = self.string[self.index + 1] except IndexError: raise error("bogus escape (end of line)") - if isinstance(self.string, str8): + if isinstance(self.string, bytes): char = chr(c) char = char + c self.index = self.index + len(char) diff --git a/Lib/string.py b/Lib/string.py index 03179fbb44..6117ac06e5 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -41,7 +41,7 @@ def capwords(s, sep=None): # Construct a translation map for bytes.translate -def maketrans(frm, to): +def maketrans(frm: bytes, to: bytes) -> bytes: """maketrans(frm, to) -> bytes Return a translation table (a bytes object of length 256) @@ -53,10 +53,10 @@ def maketrans(frm, to): raise ValueError("maketrans arguments must have same length") if not (isinstance(frm, bytes) and isinstance(to, bytes)): raise TypeError("maketrans arguments must be bytes objects") - L = bytes(range(256)) + L = buffer(range(256)) for i, c in enumerate(frm): L[c] = to[i] - return L + return bytes(L) #################################################################### diff --git a/Lib/struct.py b/Lib/struct.py index 10085b713e..45f6729a17 100644 --- a/Lib/struct.py +++ b/Lib/struct.py @@ -26,8 +26,6 @@ Whitespace between formats is ignored. The variable struct.error is an exception raised on errors. """ -# XXX Move the bytes and str8 casts into the _struct module - __version__ = '3.0' @@ -36,7 +34,9 @@ from _struct import Struct as _Struct, error class Struct(_Struct): def __init__(self, fmt): if isinstance(fmt, str): - fmt = str8(fmt, 'latin1') + fmt = bytes(fmt, 'ascii') + elif isinstance(fmt, buffer): + fmt = bytes(fmt) _Struct.__init__(self, fmt) _MAXCACHE = 100 diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 6eb93857a2..d134c3a45e 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -552,10 +552,9 @@ class Popen(object): self.stderr = io.TextIOWrapper(self.stderr) - def _translate_newlines(self, data): - data = data.replace(b"\r\n", b"\n") - data = data.replace(b"\r", b"\n") - return str(data) + def _translate_newlines(self, data, encoding): + data = data.replace(b"\r\n", b"\n").replace(b"\r", b"\n") + return data.decode(encoding) def __del__(self, sys=sys): @@ -825,16 +824,6 @@ class Popen(object): if stderr is not None: stderr = stderr[0] - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). - if self.universal_newlines: - if stdout is not None: - stdout = self._translate_newlines(stdout) - if stderr is not None: - stderr = self._translate_newlines(stderr) - self.wait() return (stdout, stderr) @@ -960,7 +949,8 @@ class Popen(object): os.close(p2cread) if c2pwrite is not None and c2pwrite not in (p2cread, 1): os.close(c2pwrite) - if errwrite is not None and errwrite not in (p2cread, c2pwrite, 2): + if (errwrite is not None and + errwrite not in (p2cread, c2pwrite, 2)): os.close(errwrite) # Close all other fds, if asked for @@ -1046,8 +1036,7 @@ class Popen(object): if self.stdin: if isinstance(input, str): # Unicode input = input.encode("utf-8") # XXX What else? - if not isinstance(input, (bytes, str8)): - input = bytes(input) + input = bytes(input) read_set = [] write_set = [] stdout = None # Return @@ -1072,6 +1061,9 @@ class Popen(object): while read_set or write_set: rlist, wlist, xlist = select.select(read_set, write_set, []) + # XXX Rewrite these to use non-blocking I/O on the + # file objects; they are no longer using C stdio! + if self.stdin in wlist: # When select has indicated that the file is writable, # we can write up to PIPE_BUF bytes without risk @@ -1099,19 +1091,19 @@ class Popen(object): # All data exchanged. Translate lists into strings. if stdout is not None: - stdout = b''.join(stdout) + stdout = b"".join(stdout) if stderr is not None: - stderr = b''.join(stderr) + stderr = b"".join(stderr) - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). + # Translate newlines, if requested. + # This also turns bytes into strings. if self.universal_newlines: if stdout is not None: - stdout = self._translate_newlines(stdout) + stdout = self._translate_newlines(stdout, + self.stdout.encoding) if stderr is not None: - stderr = self._translate_newlines(stderr) + stderr = self._translate_newlines(stderr, + self.stderr.encoding) self.wait() return (stdout, stderr) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 4864d97d0d..aef8f940c6 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -30,13 +30,12 @@ """ __version__ = "$Revision$" -# $Source$ version = "0.9.0" -__author__ = "Lars Gust\xe4bel (lars@gustaebel.de)" +__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" __date__ = "$Date$" __cvsid__ = "$Id$" -__credits__ = "Gustavo Niemeyer, Niels Gust\xe4bel, Richard Townsend." +__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." #--------- # Imports @@ -223,7 +222,7 @@ def itn(n, digits=8, format=DEFAULT_FORMAT): # this could raise OverflowError. n = struct.unpack("L", struct.pack("l", n))[0] - s = b"" + s = buffer() for i in range(digits - 1): s.insert(0, n & 0o377) n >>= 8 diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 85d58e6518..d725a9d0b6 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -497,7 +497,7 @@ class SpooledTemporaryFile: else: # Setting newline="\n" avoids newline translation; # this is important because otherwise on Windows we'd - # get double newline translation upon rollover(). + # hget double newline translation upon rollover(). self._file = _io.StringIO(encoding=encoding, newline="\n") self._max_size = max_size self._rolled = False diff --git a/Lib/test/buffer_tests.py b/Lib/test/buffer_tests.py index 01ac3c52e2..db27759d79 100644 --- a/Lib/test/buffer_tests.py +++ b/Lib/test/buffer_tests.py @@ -1,11 +1,11 @@ -# Tests that work for both str8 (bytes) and bytes (buffer) objects. +# Tests that work for both bytes and buffer objects. # See PEP 3137. import struct import sys class MixinBytesBufferCommonTests(object): - """Tests that work for both str8 (bytes) and bytes (buffer) objects. + """Tests that work for both bytes and buffer objects. See PEP 3137. """ diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index 3714a4115a..965252ca07 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -44,5 +44,6 @@ BaseException +-- SyntaxWarning +-- UserWarning +-- FutureWarning - +-- ImportWarning - +-- UnicodeWarning + +-- ImportWarning + +-- UnicodeWarning + +-- BytesWarning diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 0b1cf5a057..6db3572d3a 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -5,6 +5,8 @@ import copy_reg from test.test_support import TestFailed, TESTFN, run_with_locale +from pickle import bytes_types + # Tests that try a number of pickle protocols should have a # for proto in protocols: # kind of outer loop. @@ -87,149 +89,137 @@ class use_metaclass(object, metaclass=metaclass): # DATA0 .. DATA2 are the pickles we expect under the various protocols, for # the object returned by create_data(). -# break into multiple strings to avoid confusing font-lock-mode -DATA0 = b"""(lp1 -I0 -aL1L -aF2 -ac__builtin__ -complex -p2 -""" + \ -b"""(F3 -F0 -tRp3 -aI1 -aI-1 -aI255 -aI-255 -aI-256 -aI65535 -aI-65535 -aI-65536 -aI2147483647 -aI-2147483647 -aI-2147483648 -a""" + \ -b"""(S'abc' -p4 -g4 -""" + \ -b"""(i__main__ -C -p5 -""" + \ -b"""(dp6 -S'foo' -p7 -I1 -sS'bar' -p8 -I2 -sbg5 -tp9 -ag9 -aI5 -a. -""" - -# Disassembly of DATA0. +DATA0 = ( + b'(lp0\nL0\naL1\naF2.0\nac' + b'__builtin__\ncomplex\n' + b'p1\n(F3.0\nF0.0\ntp2\nRp' + b'3\naL1\naL-1\naL255\naL-' + b'255\naL-256\naL65535\na' + b'L-65535\naL-65536\naL2' + b'147483647\naL-2147483' + b'647\naL-2147483648\na(' + b'Vabc\np4\ng4\nccopy_reg' + b'\n_reconstructor\np5\n(' + b'c__main__\nC\np6\nc__bu' + b'iltin__\nobject\np7\nNt' + b'p8\nRp9\n(dp10\nVfoo\np1' + b'1\nL1\nsVbar\np12\nL2\nsb' + b'g9\ntp13\nag13\naL5\na.' +) + +# Disassembly of DATA0 DATA0_DIS = """\ 0: ( MARK 1: l LIST (MARK at 0) - 2: p PUT 1 - 5: I INT 0 + 2: p PUT 0 + 5: L LONG 0 8: a APPEND - 9: L LONG 1L - 13: a APPEND - 14: F FLOAT 2.0 - 17: a APPEND - 18: c GLOBAL '__builtin__ complex' - 39: p PUT 2 - 42: ( MARK - 43: F FLOAT 3.0 - 46: F FLOAT 0.0 - 49: t TUPLE (MARK at 42) - 50: R REDUCE - 51: p PUT 3 - 54: a APPEND - 55: I INT 1 - 58: a APPEND - 59: I INT -1 - 63: a APPEND - 64: I INT 255 - 69: a APPEND - 70: I INT -255 - 76: a APPEND - 77: I INT -256 - 83: a APPEND - 84: I INT 65535 + 9: L LONG 1 + 12: a APPEND + 13: F FLOAT 2.0 + 18: a APPEND + 19: c GLOBAL '__builtin__ complex' + 40: p PUT 1 + 43: ( MARK + 44: F FLOAT 3.0 + 49: F FLOAT 0.0 + 54: t TUPLE (MARK at 43) + 55: p PUT 2 + 58: R REDUCE + 59: p PUT 3 + 62: a APPEND + 63: L LONG 1 + 66: a APPEND + 67: L LONG -1 + 71: a APPEND + 72: L LONG 255 + 77: a APPEND + 78: L LONG -255 + 84: a APPEND + 85: L LONG -256 91: a APPEND - 92: I INT -65535 - 100: a APPEND - 101: I INT -65536 - 109: a APPEND - 110: I INT 2147483647 - 122: a APPEND - 123: I INT -2147483647 - 136: a APPEND - 137: I INT -2147483648 - 150: a APPEND - 151: ( MARK - 152: S STRING 'abc' - 159: p PUT 4 - 162: g GET 4 - 165: ( MARK - 166: i INST '__main__ C' (MARK at 165) - 178: p PUT 5 - 181: ( MARK - 182: d DICT (MARK at 181) - 183: p PUT 6 - 186: S STRING 'foo' - 193: p PUT 7 - 196: I INT 1 - 199: s SETITEM - 200: S STRING 'bar' - 207: p PUT 8 - 210: I INT 2 - 213: s SETITEM - 214: b BUILD - 215: g GET 5 - 218: t TUPLE (MARK at 151) - 219: p PUT 9 - 222: a APPEND - 223: g GET 9 - 226: a APPEND - 227: I INT 5 - 230: a APPEND - 231: . STOP + 92: L LONG 65535 + 99: a APPEND + 100: L LONG -65535 + 108: a APPEND + 109: L LONG -65536 + 117: a APPEND + 118: L LONG 2147483647 + 130: a APPEND + 131: L LONG -2147483647 + 144: a APPEND + 145: L LONG -2147483648 + 158: a APPEND + 159: ( MARK + 160: V UNICODE 'abc' + 165: p PUT 4 + 168: g GET 4 + 171: c GLOBAL 'copy_reg _reconstructor' + 196: p PUT 5 + 199: ( MARK + 200: c GLOBAL '__main__ C' + 212: p PUT 6 + 215: c GLOBAL '__builtin__ object' + 235: p PUT 7 + 238: N NONE + 239: t TUPLE (MARK at 199) + 240: p PUT 8 + 243: R REDUCE + 244: p PUT 9 + 247: ( MARK + 248: d DICT (MARK at 247) + 249: p PUT 10 + 253: V UNICODE 'foo' + 258: p PUT 11 + 262: L LONG 1 + 265: s SETITEM + 266: V UNICODE 'bar' + 271: p PUT 12 + 275: L LONG 2 + 278: s SETITEM + 279: b BUILD + 280: g GET 9 + 283: t TUPLE (MARK at 159) + 284: p PUT 13 + 288: a APPEND + 289: g GET 13 + 293: a APPEND + 294: L LONG 5 + 297: a APPEND + 298: . STOP highest protocol among opcodes = 0 """ -DATA1 = (b']q\x01(K\x00L1L\nG@\x00\x00\x00\x00\x00\x00\x00' - b'c__builtin__\ncomplex\nq\x02(G@\x08\x00\x00\x00\x00\x00' - b'\x00G\x00\x00\x00\x00\x00\x00\x00\x00tRq\x03K\x01J\xff\xff' - b'\xff\xffK\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xff' - b'J\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00' - b'\x00\x80J\x00\x00\x00\x80(U\x03abcq\x04h\x04(c__main__\n' - b'C\nq\x05oq\x06}q\x07(U\x03fooq\x08K\x01U\x03barq\tK\x02ubh' - b'\x06tq\nh\nK\x05e.' - ) - -# Disassembly of DATA1. +DATA1 = ( + b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c__' + b'builtin__\ncomplex\nq\x01' + b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t' + b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ' + b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff' + b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab' + b'cq\x04h\x04ccopy_reg\n_reco' + b'nstructor\nq\x05(c__main' + b'__\nC\nq\x06c__builtin__\n' + b'object\nq\x07Ntq\x08Rq\t}q\n(' + b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar' + b'q\x0cK\x02ubh\ttq\rh\rK\x05e.' +) + +# Disassembly of DATA1 DATA1_DIS = """\ 0: ] EMPTY_LIST - 1: q BINPUT 1 + 1: q BINPUT 0 3: ( MARK 4: K BININT1 0 - 6: L LONG 1L - 10: G BINFLOAT 2.0 - 19: c GLOBAL '__builtin__ complex' - 40: q BINPUT 2 - 42: ( MARK - 43: G BINFLOAT 3.0 - 52: G BINFLOAT 0.0 - 61: t TUPLE (MARK at 42) + 6: K BININT1 1 + 8: G BINFLOAT 2.0 + 17: c GLOBAL '__builtin__ complex' + 38: q BINPUT 1 + 40: ( MARK + 41: G BINFLOAT 3.0 + 50: G BINFLOAT 0.0 + 59: t TUPLE (MARK at 40) + 60: q BINPUT 2 62: R REDUCE 63: q BINPUT 3 65: K BININT1 1 @@ -244,97 +234,110 @@ DATA1_DIS = """\ 102: J BININT -2147483647 107: J BININT -2147483648 112: ( MARK - 113: U SHORT_BINSTRING 'abc' - 118: q BINPUT 4 - 120: h BINGET 4 - 122: ( MARK - 123: c GLOBAL '__main__ C' - 135: q BINPUT 5 - 137: o OBJ (MARK at 122) - 138: q BINPUT 6 - 140: } EMPTY_DICT - 141: q BINPUT 7 - 143: ( MARK - 144: U SHORT_BINSTRING 'foo' - 149: q BINPUT 8 - 151: K BININT1 1 - 153: U SHORT_BINSTRING 'bar' - 158: q BINPUT 9 - 160: K BININT1 2 - 162: u SETITEMS (MARK at 143) - 163: b BUILD - 164: h BINGET 6 - 166: t TUPLE (MARK at 112) - 167: q BINPUT 10 - 169: h BINGET 10 - 171: K BININT1 5 - 173: e APPENDS (MARK at 3) - 174: . STOP + 113: X BINUNICODE 'abc' + 121: q BINPUT 4 + 123: h BINGET 4 + 125: c GLOBAL 'copy_reg _reconstructor' + 150: q BINPUT 5 + 152: ( MARK + 153: c GLOBAL '__main__ C' + 165: q BINPUT 6 + 167: c GLOBAL '__builtin__ object' + 187: q BINPUT 7 + 189: N NONE + 190: t TUPLE (MARK at 152) + 191: q BINPUT 8 + 193: R REDUCE + 194: q BINPUT 9 + 196: } EMPTY_DICT + 197: q BINPUT 10 + 199: ( MARK + 200: X BINUNICODE 'foo' + 208: q BINPUT 11 + 210: K BININT1 1 + 212: X BINUNICODE 'bar' + 220: q BINPUT 12 + 222: K BININT1 2 + 224: u SETITEMS (MARK at 199) + 225: b BUILD + 226: h BINGET 9 + 228: t TUPLE (MARK at 112) + 229: q BINPUT 13 + 231: h BINGET 13 + 233: K BININT1 5 + 235: e APPENDS (MARK at 3) + 236: . STOP highest protocol among opcodes = 1 """ -DATA2 = (b'\x80\x02]q\x01(K\x00\x8a\x01\x01G@\x00\x00\x00\x00\x00\x00\x00' - b'c__builtin__\ncomplex\nq\x02G@\x08\x00\x00\x00\x00\x00\x00G\x00' - b'\x00\x00\x00\x00\x00\x00\x00\x86Rq\x03K\x01J\xff\xff\xff\xffK' - b'\xffJ\x01\xff\xff\xffJ\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xff' - b'J\x00\x00\xff\xffJ\xff\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00' - b'\x80(U\x03abcq\x04h\x04(c__main__\nC\nq\x05oq\x06}q\x07(U\x03foo' - b'q\x08K\x01U\x03barq\tK\x02ubh\x06tq\nh\nK\x05e.') - -# Disassembly of DATA2. +DATA2 = ( + b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c' + b'__builtin__\ncomplex\n' + b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff' + b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff' + b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a' + b'bcq\x04h\x04c__main__\nC\nq\x05' + b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01' + b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh' + b'\nK\x05e.' +) + +# Disassembly of DATA2 DATA2_DIS = """\ 0: \x80 PROTO 2 2: ] EMPTY_LIST - 3: q BINPUT 1 + 3: q BINPUT 0 5: ( MARK 6: K BININT1 0 - 8: \x8a LONG1 1L - 11: G BINFLOAT 2.0 - 20: c GLOBAL '__builtin__ complex' - 41: q BINPUT 2 - 43: G BINFLOAT 3.0 - 52: G BINFLOAT 0.0 - 61: \x86 TUPLE2 - 62: R REDUCE - 63: q BINPUT 3 - 65: K BININT1 1 - 67: J BININT -1 - 72: K BININT1 255 - 74: J BININT -255 - 79: J BININT -256 - 84: M BININT2 65535 - 87: J BININT -65535 - 92: J BININT -65536 - 97: J BININT 2147483647 - 102: J BININT -2147483647 - 107: J BININT -2147483648 - 112: ( MARK - 113: U SHORT_BINSTRING 'abc' - 118: q BINPUT 4 - 120: h BINGET 4 - 122: ( MARK - 123: c GLOBAL '__main__ C' - 135: q BINPUT 5 - 137: o OBJ (MARK at 122) - 138: q BINPUT 6 - 140: } EMPTY_DICT - 141: q BINPUT 7 - 143: ( MARK - 144: U SHORT_BINSTRING 'foo' - 149: q BINPUT 8 - 151: K BININT1 1 - 153: U SHORT_BINSTRING 'bar' - 158: q BINPUT 9 - 160: K BININT1 2 - 162: u SETITEMS (MARK at 143) - 163: b BUILD - 164: h BINGET 6 - 166: t TUPLE (MARK at 112) - 167: q BINPUT 10 - 169: h BINGET 10 - 171: K BININT1 5 - 173: e APPENDS (MARK at 5) - 174: . STOP + 8: K BININT1 1 + 10: G BINFLOAT 2.0 + 19: c GLOBAL '__builtin__ complex' + 40: q BINPUT 1 + 42: G BINFLOAT 3.0 + 51: G BINFLOAT 0.0 + 60: \x86 TUPLE2 + 61: q BINPUT 2 + 63: R REDUCE + 64: q BINPUT 3 + 66: K BININT1 1 + 68: J BININT -1 + 73: K BININT1 255 + 75: J BININT -255 + 80: J BININT -256 + 85: M BININT2 65535 + 88: J BININT -65535 + 93: J BININT -65536 + 98: J BININT 2147483647 + 103: J BININT -2147483647 + 108: J BININT -2147483648 + 113: ( MARK + 114: X BINUNICODE 'abc' + 122: q BINPUT 4 + 124: h BINGET 4 + 126: c GLOBAL '__main__ C' + 138: q BINPUT 5 + 140: ) EMPTY_TUPLE + 141: \x81 NEWOBJ + 142: q BINPUT 6 + 144: } EMPTY_DICT + 145: q BINPUT 7 + 147: ( MARK + 148: X BINUNICODE 'foo' + 156: q BINPUT 8 + 158: K BININT1 1 + 160: X BINUNICODE 'bar' + 168: q BINPUT 9 + 170: K BININT1 2 + 172: u SETITEMS (MARK at 147) + 173: b BUILD + 174: h BINGET 6 + 176: t TUPLE (MARK at 113) + 177: q BINPUT 10 + 179: h BINGET 10 + 181: K BININT1 5 + 183: e APPENDS (MARK at 5) + 184: . STOP highest protocol among opcodes = 2 """ @@ -393,11 +396,14 @@ class AbstractPickleTests(unittest.TestCase): got = self.loads(s) self.assertEqual(expected, got) - def test_load_from_canned_string(self): - expected = self._testdata - for canned in DATA0, DATA1, DATA2: - got = self.loads(canned) - self.assertEqual(expected, got) + def test_load_from_data0(self): + self.assertEqual(self._testdata, self.loads(DATA0)) + + def test_load_from_data1(self): + self.assertEqual(self._testdata, self.loads(DATA1)) + + def test_load_from_data2(self): + self.assertEqual(self._testdata, self.loads(DATA2)) # There are gratuitous differences between pickles produced by # pickle and cPickle, largely because cPickle starts PUT indices at @@ -762,7 +768,7 @@ class AbstractPickleTests(unittest.TestCase): x = dict.fromkeys(range(n)) for proto in protocols: s = self.dumps(x, proto) - assert isinstance(s, bytes) + assert isinstance(s, bytes_types) y = self.loads(s) self.assertEqual(x, y) num_setitems = count_opcode(pickle.SETITEMS, s) @@ -996,3 +1002,21 @@ class AbstractPersistentPicklerTests(unittest.TestCase): self.assertEqual(self.loads(self.dumps(L, 1)), L) self.assertEqual(self.id_count, 5) self.assertEqual(self.load_count, 5) + +if __name__ == "__main__": + # Print some stuff that can be used to rewrite DATA{0,1,2} + from pickletools import dis + x = create_data() + for i in range(3): + p = pickle.dumps(x, i) + print("DATA{0} = (".format(i)) + for j in range(0, len(p), 20): + b = bytes(p[j:j+20]) + print(" {0!r}".format(b)) + print(")") + print() + print("# Disassembly of DATA{0}".format(i)) + print("DATA{0}_DIS = \"\"\"\\".format(i)) + dis(p) + print("\"\"\"") + print() diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index d612022738..cd92511af5 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -278,6 +278,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, huntrleaks[1] = int(huntrleaks[1]) if len(huntrleaks) == 2 or not huntrleaks[2]: huntrleaks[2:] = ["reflog.txt"] + # Avoid false positives due to the character cache in + # stringobject.c filling slowly with random data + warm_char_cache() elif o in ('-M', '--memlimit'): test_support.set_memlimit(a) elif o in ('-u', '--use'): @@ -357,9 +360,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, # Strip .py extensions. if args: - args = map(removepy, args) + args = list(map(removepy, args)) if tests: - tests = map(removepy, tests) + tests = list(map(removepy, tests)) stdtests = STDTESTS[:] nottests = NOTTESTS.copy() @@ -768,6 +771,11 @@ def dash_R_cleanup(fs, ps, pic, abcs): # Collect cyclic trash. gc.collect() +def warm_char_cache(): + s = bytes(range(256)) + for i in range(256): + s[i:i+1] + def reportdiff(expected, output): import difflib print("*" * 70) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 9da062ea58..a789515fb3 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -558,10 +558,10 @@ class CommonTest(BaseTest): a = self.type2test('DNSSEC') b = self.type2test('') for c in a: - # Special case for the str8, since indexing returns a integer - # XXX Maybe it would be a good idea to seperate str8's tests... - if self.type2test == str8: - c = chr(c) +## # Special case for the str8, since indexing returns a integer +## # XXX Maybe it would be a good idea to seperate str8's tests... +## if self.type2test == str8: +## c = chr(c) b += c hash(b) self.assertEqual(hash(a), hash(b)) @@ -992,14 +992,14 @@ class MixinStrUnicodeUserStringTest: self.checkequal('abc', 'a', 'join', ('abc',)) self.checkequal('z', 'a', 'join', UserList(['z'])) self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c']) - self.checkequal('a.b.3', '.', 'join', ['a', 'b', 3]) + self.assertRaises(TypeError, '.'.join, ['a', 'b', 3]) for i in [5, 25, 125]: self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ['a' * i] * i) self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ('a' * i,) * i) - self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) + #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) self.checkequal('a b c', ' ', 'join', BadSeq2()) self.checkraises(TypeError, ' ', 'join') @@ -1147,16 +1147,16 @@ class MixinStrUnicodeTest: s2 = "".join([s1]) self.assert_(s1 is s2) - elif t is str8: - s1 = subclass("abcd") - s2 = "".join([s1]) - self.assert_(s1 is not s2) - self.assert_(type(s2) is str) # promotes! +## elif t is str8: +## s1 = subclass("abcd") +## s2 = "".join([s1]) +## self.assert_(s1 is not s2) +## self.assert_(type(s2) is str) # promotes! - s1 = t("abcd") - s2 = "".join([s1]) - self.assert_(s1 is not s2) - self.assert_(type(s2) is str) # promotes! +## s1 = t("abcd") +## s2 = "".join([s1]) +## self.assert_(s1 is not s2) +## self.assert_(type(s2) is str) # promotes! else: self.fail("unexpected type for MixinStrUnicodeTest %r" % t) diff --git a/Lib/test/test_asynchat.py b/Lib/test/test_asynchat.py index 78f58683e2..83b09720db 100644 --- a/Lib/test/test_asynchat.py +++ b/Lib/test/test_asynchat.py @@ -105,17 +105,17 @@ class TestAsynchat(unittest.TestCase): def test_line_terminator1(self): # test one-character terminator for l in (1,2,3): - self.line_terminator_check(b'\n', l) + self.line_terminator_check('\n', l) def test_line_terminator2(self): # test two-character terminator for l in (1,2,3): - self.line_terminator_check(b'\r\n', l) + self.line_terminator_check('\r\n', l) def test_line_terminator3(self): # test three-character terminator for l in (1,2,3): - self.line_terminator_check(b'qqq', l) + self.line_terminator_check('qqq', l) def numeric_terminator_check(self, termlen): # Try reading a fixed number of bytes diff --git a/Lib/test/test_asyncore.py b/Lib/test/test_asyncore.py index 33c2fb2636..6dc73adb86 100644 --- a/Lib/test/test_asyncore.py +++ b/Lib/test/test_asyncore.py @@ -70,7 +70,6 @@ def capture_server(evt, buf): r, w, e = select.select([conn], [], []) if r: data = conn.recv(10) - assert isinstance(data, bytes) # keep everything except for the newline terminator buf.write(data.replace(b'\n', b'')) if b'\n' in data: diff --git a/Lib/test/test_audioop.py b/Lib/test/test_audioop.py index 194d78391d..fada40ce36 100644 --- a/Lib/test/test_audioop.py +++ b/Lib/test/test_audioop.py @@ -87,7 +87,7 @@ def testadd(data): print('add') data2 = [] for d in data: - str = bytes(len(d)) + str = buffer(len(d)) for i,b in enumerate(d): str[i] = 2*b data2.append(str) @@ -177,7 +177,7 @@ def testmul(data): print('mul') data2 = [] for d in data: - str = bytes(len(d)) + str = buffer(len(d)) for i,b in enumerate(d): str[i] = 2*b data2.append(str) @@ -207,7 +207,7 @@ def testreverse(data): def testtomono(data): if verbose: print('tomono') - data2 = b'' + data2 = buffer() for d in data[0]: data2.append(d) data2.append(d) @@ -218,7 +218,7 @@ def testtomono(data): def testtostereo(data): if verbose: print('tostereo') - data2 = b'' + data2 = buffer() for d in data[0]: data2.append(d) data2.append(d) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 9229f384ad..fa13563daa 100755 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -56,7 +56,7 @@ class BinASCIITest(unittest.TestCase): a = binascii.b2a_base64(b) lines.append(a) - fillers = bytes() + fillers = buffer() valid = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/" for i in range(256): if i not in valid: @@ -64,7 +64,7 @@ class BinASCIITest(unittest.TestCase): def addnoise(line): noise = fillers ratio = len(line) // len(noise) - res = bytes() + res = buffer() while line and noise: if len(line) // len(noise) > ratio: c, line = line[0], line[1:] @@ -72,7 +72,7 @@ class BinASCIITest(unittest.TestCase): c, noise = noise[0], noise[1:] res.append(c) return res + noise + line - res = bytes() + res = buffer() for line in map(addnoise, lines): b = binascii.a2b_base64(line) res += b diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 9670be0556..4f843289ac 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -580,8 +580,7 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(hash(1), hash(1)) self.assertEqual(hash(1), hash(1.0)) hash('spam') - self.assertEqual(hash('spam'), hash(str8(b'spam'))) # remove str8() - # when b"" is immutable + self.assertEqual(hash('spam'), hash(b'spam')) hash((0,1,2,3)) def f(): pass self.assertRaises(TypeError, hash, []) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 932fa448be..b3c13b3b39 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1,4 +1,9 @@ -"""Unit tests for the bytes type.""" +"""Unit tests for the bytes and buffer types. + +XXX This is a mess. Common tests should be moved to buffer_tests.py, +which itself ought to be unified with string_tests.py (and the latter +should be modernized). +""" import os import re @@ -7,6 +12,7 @@ import copy import pickle import tempfile import unittest +import warnings import test.test_support import test.string_tests import test.buffer_tests @@ -14,13 +20,19 @@ import test.buffer_tests class BytesTest(unittest.TestCase): + def setUp(self): + self.warning_filters = warnings.filters[:] + + def tearDown(self): + warnings.filters = self.warning_filters + def test_basics(self): - b = bytes() - self.assertEqual(type(b), bytes) - self.assertEqual(b.__class__, bytes) + b = buffer() + self.assertEqual(type(b), buffer) + self.assertEqual(b.__class__, buffer) def test_empty_sequence(self): - b = bytes() + b = buffer() self.assertEqual(len(b), 0) self.assertRaises(IndexError, lambda: b[0]) self.assertRaises(IndexError, lambda: b[1]) @@ -36,7 +48,7 @@ class BytesTest(unittest.TestCase): def test_from_list(self): ints = list(range(256)) - b = bytes(i for i in ints) + b = buffer(i for i in ints) self.assertEqual(len(b), 256) self.assertEqual(list(b), ints) @@ -46,44 +58,57 @@ class BytesTest(unittest.TestCase): self.i = i def __index__(self): return self.i - b = bytes([C(), C(1), C(254), C(255)]) + b = buffer([C(), C(1), C(254), C(255)]) self.assertEqual(list(b), [0, 1, 254, 255]) - self.assertRaises(ValueError, bytes, [C(-1)]) - self.assertRaises(ValueError, bytes, [C(256)]) + self.assertRaises(ValueError, buffer, [C(-1)]) + self.assertRaises(ValueError, buffer, [C(256)]) + + def test_from_ssize(self): + self.assertEqual(buffer(0), b'') + self.assertEqual(buffer(1), b'\x00') + self.assertEqual(buffer(5), b'\x00\x00\x00\x00\x00') + self.assertRaises(ValueError, buffer, -1) + + self.assertEqual(buffer('0', 'ascii'), b'0') + self.assertEqual(buffer(b'0'), b'0') def test_constructor_type_errors(self): - self.assertRaises(TypeError, bytes, 0.0) + self.assertRaises(TypeError, buffer, 0.0) class C: pass - self.assertRaises(TypeError, bytes, ["0"]) - self.assertRaises(TypeError, bytes, [0.0]) - self.assertRaises(TypeError, bytes, [None]) - self.assertRaises(TypeError, bytes, [C()]) + self.assertRaises(TypeError, buffer, ["0"]) + self.assertRaises(TypeError, buffer, [0.0]) + self.assertRaises(TypeError, buffer, [None]) + self.assertRaises(TypeError, buffer, [C()]) def test_constructor_value_errors(self): - self.assertRaises(ValueError, bytes, [-1]) - self.assertRaises(ValueError, bytes, [-sys.maxint]) - self.assertRaises(ValueError, bytes, [-sys.maxint-1]) - self.assertRaises(ValueError, bytes, [-sys.maxint-2]) - self.assertRaises(ValueError, bytes, [-10**100]) - self.assertRaises(ValueError, bytes, [256]) - self.assertRaises(ValueError, bytes, [257]) - self.assertRaises(ValueError, bytes, [sys.maxint]) - self.assertRaises(ValueError, bytes, [sys.maxint+1]) - self.assertRaises(ValueError, bytes, [10**100]) - - def test_repr(self): - self.assertEqual(repr(bytes()), "b''") - self.assertEqual(repr(bytes([0])), "b'\\x00'") - self.assertEqual(repr(bytes([0, 1, 254, 255])), - "b'\\x00\\x01\\xfe\\xff'") - self.assertEqual(repr(b"abc"), "b'abc'") - self.assertEqual(repr(b"'"), "b'\\''") + self.assertRaises(ValueError, buffer, [-1]) + self.assertRaises(ValueError, buffer, [-sys.maxint]) + self.assertRaises(ValueError, buffer, [-sys.maxint-1]) + self.assertRaises(ValueError, buffer, [-sys.maxint-2]) + self.assertRaises(ValueError, buffer, [-10**100]) + self.assertRaises(ValueError, buffer, [256]) + self.assertRaises(ValueError, buffer, [257]) + self.assertRaises(ValueError, buffer, [sys.maxint]) + self.assertRaises(ValueError, buffer, [sys.maxint+1]) + self.assertRaises(ValueError, buffer, [10**100]) + + def test_repr_str(self): + warnings.simplefilter('ignore', BytesWarning) + for f in str, repr: + self.assertEqual(f(buffer()), "buffer(b'')") + self.assertEqual(f(buffer([0])), "buffer(b'\\x00')") + self.assertEqual(f(buffer([0, 1, 254, 255])), + "buffer(b'\\x00\\x01\\xfe\\xff')") + self.assertEqual(f(b"abc"), "b'abc'") + self.assertEqual(f(b"'"), '''b"'"''') + self.assertEqual(f(b"'\""), r"""b'\'"'""") + def test_compare(self): - b1 = bytes([1, 2, 3]) - b2 = bytes([1, 2, 3]) - b3 = bytes([1, 3]) + b1 = buffer([1, 2, 3]) + b2 = buffer([1, 2, 3]) + b3 = buffer([1, 3]) self.assertEqual(b1, b2) self.failUnless(b2 != b3) @@ -103,54 +128,58 @@ class BytesTest(unittest.TestCase): self.failIf(b3 < b2) self.failIf(b3 <= b2) - def test_compare_to_str(self): - self.assertEqual(b"abc" == str8(b"abc"), True) - self.assertEqual(b"ab" != str8(b"abc"), True) - self.assertEqual(b"ab" <= str8(b"abc"), True) - self.assertEqual(b"ab" < str8(b"abc"), True) - self.assertEqual(b"abc" >= str8(b"ab"), True) - self.assertEqual(b"abc" > str8(b"ab"), True) - - self.assertEqual(b"abc" != str8(b"abc"), False) - self.assertEqual(b"ab" == str8(b"abc"), False) - self.assertEqual(b"ab" > str8(b"abc"), False) - self.assertEqual(b"ab" >= str8(b"abc"), False) - self.assertEqual(b"abc" < str8(b"ab"), False) - self.assertEqual(b"abc" <= str8(b"ab"), False) - - self.assertEqual(str8(b"abc") == b"abc", True) - self.assertEqual(str8(b"ab") != b"abc", True) - self.assertEqual(str8(b"ab") <= b"abc", True) - self.assertEqual(str8(b"ab") < b"abc", True) - self.assertEqual(str8(b"abc") >= b"ab", True) - self.assertEqual(str8(b"abc") > b"ab", True) - - self.assertEqual(str8(b"abc") != b"abc", False) - self.assertEqual(str8(b"ab") == b"abc", False) - self.assertEqual(str8(b"ab") > b"abc", False) - self.assertEqual(str8(b"ab") >= b"abc", False) - self.assertEqual(str8(b"abc") < b"ab", False) - self.assertEqual(str8(b"abc") <= b"ab", False) + def test_compare_bytes_to_buffer(self): + self.assertEqual(b"abc" == bytes(b"abc"), True) + self.assertEqual(b"ab" != bytes(b"abc"), True) + self.assertEqual(b"ab" <= bytes(b"abc"), True) + self.assertEqual(b"ab" < bytes(b"abc"), True) + self.assertEqual(b"abc" >= bytes(b"ab"), True) + self.assertEqual(b"abc" > bytes(b"ab"), True) + + self.assertEqual(b"abc" != bytes(b"abc"), False) + self.assertEqual(b"ab" == bytes(b"abc"), False) + self.assertEqual(b"ab" > bytes(b"abc"), False) + self.assertEqual(b"ab" >= bytes(b"abc"), False) + self.assertEqual(b"abc" < bytes(b"ab"), False) + self.assertEqual(b"abc" <= bytes(b"ab"), False) + + self.assertEqual(bytes(b"abc") == b"abc", True) + self.assertEqual(bytes(b"ab") != b"abc", True) + self.assertEqual(bytes(b"ab") <= b"abc", True) + self.assertEqual(bytes(b"ab") < b"abc", True) + self.assertEqual(bytes(b"abc") >= b"ab", True) + self.assertEqual(bytes(b"abc") > b"ab", True) + + self.assertEqual(bytes(b"abc") != b"abc", False) + self.assertEqual(bytes(b"ab") == b"abc", False) + self.assertEqual(bytes(b"ab") > b"abc", False) + self.assertEqual(bytes(b"ab") >= b"abc", False) + self.assertEqual(bytes(b"abc") < b"ab", False) + self.assertEqual(bytes(b"abc") <= b"ab", False) + def test_compare_to_str(self): + warnings.simplefilter('ignore', BytesWarning) # Byte comparisons with unicode should always fail! # Test this for all expected byte orders and Unicode character sizes self.assertEqual(b"\0a\0b\0c" == "abc", False) self.assertEqual(b"\0\0\0a\0\0\0b\0\0\0c" == "abc", False) self.assertEqual(b"a\0b\0c\0" == "abc", False) self.assertEqual(b"a\0\0\0b\0\0\0c\0\0\0" == "abc", False) - self.assertEqual(bytes() == str(), False) - self.assertEqual(bytes() != str(), True) + self.assertEqual(buffer() == str(), False) + self.assertEqual(buffer() != str(), True) def test_nohash(self): - self.assertRaises(TypeError, hash, bytes()) + self.assertRaises(TypeError, hash, buffer()) def test_doc(self): + self.failUnless(buffer.__doc__ != None) + self.failUnless(buffer.__doc__.startswith("buffer("), buffer.__doc__) self.failUnless(bytes.__doc__ != None) - self.failUnless(bytes.__doc__.startswith("bytes(")) + self.failUnless(bytes.__doc__.startswith("bytes("), bytes.__doc__) def test_buffer_api(self): short_sample = b"Hello world\n" - sample = short_sample + b"x"*(20 - len(short_sample)) + sample = short_sample + b"\0"*(20 - len(short_sample)) tfn = tempfile.mktemp() try: # Prepare @@ -158,7 +187,7 @@ class BytesTest(unittest.TestCase): f.write(short_sample) # Test readinto with open(tfn, "rb") as f: - b = b"x"*20 + b = buffer(20) n = f.readinto(b) self.assertEqual(n, len(short_sample)) self.assertEqual(list(b), list(sample)) @@ -176,25 +205,25 @@ class BytesTest(unittest.TestCase): def test_reversed(self): input = list(map(ord, "Hello")) - b = bytes(input) + b = buffer(input) output = list(reversed(b)) input.reverse() self.assertEqual(output, input) def test_reverse(self): - b = b'hello' + b = buffer(b'hello') self.assertEqual(b.reverse(), None) self.assertEqual(b, b'olleh') - b = b'hello1' # test even number of items + b = buffer(b'hello1') # test even number of items b.reverse() self.assertEqual(b, b'1olleh') - b = bytes() + b = buffer() b.reverse() self.assertFalse(b) def test_getslice(self): def by(s): - return bytes(map(ord, s)) + return buffer(map(ord, s)) b = by("Hello, world") self.assertEqual(b[:5], by("Hello")) @@ -215,33 +244,33 @@ class BytesTest(unittest.TestCase): def test_extended_getslice(self): # Test extended slicing by comparing with list slicing. L = list(range(255)) - b = bytes(L) + b = buffer(L) indices = (0, None, 1, 3, 19, 100, -1, -2, -31, -100) for start in indices: for stop in indices: # Skip step 0 (invalid) for step in indices[1:]: - self.assertEqual(b[start:stop:step], bytes(L[start:stop:step])) + self.assertEqual(b[start:stop:step], buffer(L[start:stop:step])) def test_regexps(self): def by(s): - return bytes(map(ord, s)) + return buffer(map(ord, s)) b = by("Hello, world") self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")]) def test_setitem(self): - b = bytes([1, 2, 3]) + b = buffer([1, 2, 3]) b[1] = 100 - self.assertEqual(b, bytes([1, 100, 3])) + self.assertEqual(b, buffer([1, 100, 3])) b[-1] = 200 - self.assertEqual(b, bytes([1, 100, 200])) + self.assertEqual(b, buffer([1, 100, 200])) class C: def __init__(self, i=0): self.i = i def __index__(self): return self.i b[0] = C(10) - self.assertEqual(b, bytes([10, 100, 200])) + self.assertEqual(b, buffer([10, 100, 200])) try: b[3] = 0 self.fail("Didn't raise IndexError") @@ -269,35 +298,35 @@ class BytesTest(unittest.TestCase): pass def test_delitem(self): - b = bytes(range(10)) + b = buffer(range(10)) del b[0] - self.assertEqual(b, bytes(range(1, 10))) + self.assertEqual(b, buffer(range(1, 10))) del b[-1] - self.assertEqual(b, bytes(range(1, 9))) + self.assertEqual(b, buffer(range(1, 9))) del b[4] - self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8])) + self.assertEqual(b, buffer([1, 2, 3, 4, 6, 7, 8])) def test_setslice(self): - b = bytes(range(10)) + b = buffer(range(10)) self.assertEqual(list(b), list(range(10))) - b[0:5] = bytes([1, 1, 1, 1, 1]) - self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9])) + b[0:5] = buffer([1, 1, 1, 1, 1]) + self.assertEqual(b, buffer([1, 1, 1, 1, 1, 5, 6, 7, 8, 9])) del b[0:-5] - self.assertEqual(b, bytes([5, 6, 7, 8, 9])) + self.assertEqual(b, buffer([5, 6, 7, 8, 9])) - b[0:0] = bytes([0, 1, 2, 3, 4]) - self.assertEqual(b, bytes(range(10))) + b[0:0] = buffer([0, 1, 2, 3, 4]) + self.assertEqual(b, buffer(range(10))) - b[-7:-3] = bytes([100, 101]) - self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9])) + b[-7:-3] = buffer([100, 101]) + self.assertEqual(b, buffer([0, 1, 2, 100, 101, 7, 8, 9])) b[3:5] = [3, 4, 5, 6] - self.assertEqual(b, bytes(range(10))) + self.assertEqual(b, buffer(range(10))) b[3:0] = [42, 42, 42] - self.assertEqual(b, bytes([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9])) + self.assertEqual(b, buffer([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9])) def test_extended_set_del_slice(self): indices = (0, None, 1, 3, 19, 300, -1, -2, -31, -300) @@ -306,93 +335,96 @@ class BytesTest(unittest.TestCase): # Skip invalid step 0 for step in indices[1:]: L = list(range(255)) - b = bytes(L) + b = buffer(L) # Make sure we have a slice of exactly the right length, # but with different data. data = L[start:stop:step] data.reverse() L[start:stop:step] = data b[start:stop:step] = data - self.assertEquals(b, bytes(L)) + self.assertEquals(b, buffer(L)) del L[start:stop:step] del b[start:stop:step] - self.assertEquals(b, bytes(L)) + self.assertEquals(b, buffer(L)) def test_setslice_trap(self): # This test verifies that we correctly handle assigning self # to a slice of self (the old Lambert Meertens trap). - b = bytes(range(256)) + b = buffer(range(256)) b[8:] = b - self.assertEqual(b, bytes(list(range(8)) + list(range(256)))) + self.assertEqual(b, buffer(list(range(8)) + list(range(256)))) def test_encoding(self): sample = "Hello world\n\u1234\u5678\u9abc\udef0" for enc in ("utf8", "utf16"): - b = bytes(sample, enc) - self.assertEqual(b, bytes(sample.encode(enc))) - self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1") - b = bytes(sample, "latin1", "ignore") - self.assertEqual(b, bytes(sample[:-4], "utf-8")) + b = buffer(sample, enc) + self.assertEqual(b, buffer(sample.encode(enc))) + self.assertRaises(UnicodeEncodeError, buffer, sample, "latin1") + b = buffer(sample, "latin1", "ignore") + self.assertEqual(b, buffer(sample[:-4], "utf-8")) def test_decode(self): sample = "Hello world\n\u1234\u5678\u9abc\def0\def0" for enc in ("utf8", "utf16"): - b = bytes(sample, enc) + b = buffer(sample, enc) self.assertEqual(b.decode(enc), sample) sample = "Hello world\n\x80\x81\xfe\xff" - b = bytes(sample, "latin1") + b = buffer(sample, "latin1") self.assertRaises(UnicodeDecodeError, b.decode, "utf8") self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n") def test_from_buffer(self): - sample = str8(b"Hello world\n\x80\x81\xfe\xff") + sample = bytes(b"Hello world\n\x80\x81\xfe\xff") buf = memoryview(sample) - b = bytes(buf) - self.assertEqual(b, bytes(sample)) + b = buffer(buf) + self.assertEqual(b, buffer(sample)) def test_to_str(self): - sample = "Hello world\n\x80\x81\xfe\xff" - b = bytes(sample, "utf-8") - self.assertEqual(str(b), sample) + warnings.simplefilter('ignore', BytesWarning) + self.assertEqual(str(b''), "b''") + self.assertEqual(str(b'x'), "b'x'") + self.assertEqual(str(b'\x80'), "b'\\x80'") def test_from_int(self): - b = bytes(0) - self.assertEqual(b, bytes()) - b = bytes(10) - self.assertEqual(b, bytes([0]*10)) - b = bytes(10000) - self.assertEqual(b, bytes([0]*10000)) + b = buffer(0) + self.assertEqual(b, buffer()) + b = buffer(10) + self.assertEqual(b, buffer([0]*10)) + b = buffer(10000) + self.assertEqual(b, buffer([0]*10000)) def test_concat(self): b1 = b"abc" b2 = b"def" self.assertEqual(b1 + b2, b"abcdef") - self.assertEqual(b1 + str8(b"def"), b"abcdef") - self.assertEqual(str8(b"def") + b1, b"defabc") + self.assertEqual(b1 + bytes(b"def"), b"abcdef") + self.assertEqual(bytes(b"def") + b1, b"defabc") self.assertRaises(TypeError, lambda: b1 + "def") self.assertRaises(TypeError, lambda: "abc" + b2) def test_repeat(self): - b = b"abc" - self.assertEqual(b * 3, b"abcabcabc") - self.assertEqual(b * 0, bytes()) - self.assertEqual(b * -1, bytes()) - self.assertRaises(TypeError, lambda: b * 3.14) - self.assertRaises(TypeError, lambda: 3.14 * b) - self.assertRaises(MemoryError, lambda: b * sys.maxint) + for b in b"abc", buffer(b"abc"): + self.assertEqual(b * 3, b"abcabcabc") + self.assertEqual(b * 0, b"") + self.assertEqual(b * -1, b"") + self.assertRaises(TypeError, lambda: b * 3.14) + self.assertRaises(TypeError, lambda: 3.14 * b) + # XXX Shouldn't bytes and buffer agree on what to raise? + self.assertRaises((OverflowError, MemoryError), + lambda: b * sys.maxint) def test_repeat_1char(self): - self.assertEqual(b'x'*100, bytes([ord('x')]*100)) + self.assertEqual(b'x'*100, buffer([ord('x')]*100)) def test_iconcat(self): - b = b"abc" + b = buffer(b"abc") b1 = b b += b"def" self.assertEqual(b, b"abcdef") self.assertEqual(b, b1) self.failUnless(b is b1) - b += str8(b"xyz") + b += b"xyz" self.assertEqual(b, b"abcdefxyz") try: b += "" @@ -402,7 +434,7 @@ class BytesTest(unittest.TestCase): self.fail("bytes += unicode didn't raise TypeError") def test_irepeat(self): - b = b"abc" + b = buffer(b"abc") b1 = b b *= 3 self.assertEqual(b, b"abcabcabc") @@ -410,38 +442,39 @@ class BytesTest(unittest.TestCase): self.failUnless(b is b1) def test_irepeat_1char(self): - b = b"x" + b = buffer(b"x") b1 = b b *= 100 - self.assertEqual(b, bytes([ord("x")]*100)) + self.assertEqual(b, b"x"*100) self.assertEqual(b, b1) self.failUnless(b is b1) def test_contains(self): - b = b"abc" - self.failUnless(ord('a') in b) - self.failUnless(int(ord('a')) in b) - self.failIf(200 in b) - self.failIf(200 in b) - self.assertRaises(ValueError, lambda: 300 in b) - self.assertRaises(ValueError, lambda: -1 in b) - self.assertRaises(TypeError, lambda: None in b) - self.assertRaises(TypeError, lambda: float(ord('a')) in b) - self.assertRaises(TypeError, lambda: "a" in b) - self.failUnless(b"" in b) - self.failUnless(b"a" in b) - self.failUnless(b"b" in b) - self.failUnless(b"c" in b) - self.failUnless(b"ab" in b) - self.failUnless(b"bc" in b) - self.failUnless(b"abc" in b) - self.failIf(b"ac" in b) - self.failIf(b"d" in b) - self.failIf(b"dab" in b) - self.failIf(b"abd" in b) + for b in b"abc", buffer(b"abc"): + self.failUnless(ord('a') in b) + self.failUnless(int(ord('a')) in b) + self.failIf(200 in b) + self.failIf(200 in b) + self.assertRaises(ValueError, lambda: 300 in b) + self.assertRaises(ValueError, lambda: -1 in b) + self.assertRaises(TypeError, lambda: None in b) + self.assertRaises(TypeError, lambda: float(ord('a')) in b) + self.assertRaises(TypeError, lambda: "a" in b) + for f in bytes, buffer: + self.failUnless(f(b"") in b) + self.failUnless(f(b"a") in b) + self.failUnless(f(b"b") in b) + self.failUnless(f(b"c") in b) + self.failUnless(f(b"ab") in b) + self.failUnless(f(b"bc") in b) + self.failUnless(f(b"abc") in b) + self.failIf(f(b"ac") in b) + self.failIf(f(b"d") in b) + self.failIf(f(b"dab") in b) + self.failIf(f(b"abd") in b) def test_alloc(self): - b = bytes() + b = buffer() alloc = b.__alloc__() self.assert_(alloc >= 0) seq = [alloc] @@ -453,23 +486,23 @@ class BytesTest(unittest.TestCase): seq.append(alloc) def test_fromhex(self): - self.assertRaises(TypeError, bytes.fromhex) - self.assertRaises(TypeError, bytes.fromhex, 1) - self.assertEquals(bytes.fromhex(''), bytes()) - b = bytes([0x1a, 0x2b, 0x30]) - self.assertEquals(bytes.fromhex('1a2B30'), b) - self.assertEquals(bytes.fromhex(' 1A 2B 30 '), b) - self.assertEquals(bytes.fromhex('0000'), b'\0\0') - self.assertRaises(TypeError, bytes.fromhex, b'1B') - self.assertRaises(ValueError, bytes.fromhex, 'a') - self.assertRaises(ValueError, bytes.fromhex, 'rt') - self.assertRaises(ValueError, bytes.fromhex, '1a b cd') - self.assertRaises(ValueError, bytes.fromhex, '\x00') - self.assertRaises(ValueError, bytes.fromhex, '12 \x00 34') + self.assertRaises(TypeError, buffer.fromhex) + self.assertRaises(TypeError, buffer.fromhex, 1) + self.assertEquals(buffer.fromhex(''), buffer()) + b = buffer([0x1a, 0x2b, 0x30]) + self.assertEquals(buffer.fromhex('1a2B30'), b) + self.assertEquals(buffer.fromhex(' 1A 2B 30 '), b) + self.assertEquals(buffer.fromhex('0000'), b'\0\0') + self.assertRaises(TypeError, buffer.fromhex, b'1B') + self.assertRaises(ValueError, buffer.fromhex, 'a') + self.assertRaises(ValueError, buffer.fromhex, 'rt') + self.assertRaises(ValueError, buffer.fromhex, '1a b cd') + self.assertRaises(ValueError, buffer.fromhex, '\x00') + self.assertRaises(ValueError, buffer.fromhex, '12 \x00 34') def test_join(self): - self.assertEqual(b"".join([]), bytes()) - self.assertEqual(b"".join([bytes()]), bytes()) + self.assertEqual(b"".join([]), b"") + self.assertEqual(b"".join([b""]), b"") for lst in [[b"abc"], [b"a", b"bc"], [b"ab", b"c"], [b"a", b"b", b"c"]]: self.assertEqual(b"".join(lst), b"abc") self.assertEqual(b"".join(tuple(lst)), b"abc") @@ -485,20 +518,20 @@ class BytesTest(unittest.TestCase): (br"\xaa\x00\000\200", r"\xaa\x00\000\200"), ] for b, s in tests: - self.assertEqual(b, bytes(s, 'latin-1')) + self.assertEqual(b, buffer(s, 'latin-1')) for c in range(128, 256): self.assertRaises(SyntaxError, eval, 'b"%s"' % chr(c)) def test_extend(self): orig = b'hello' - a = bytes(orig) + a = buffer(orig) a.extend(a) self.assertEqual(a, orig + orig) self.assertEqual(a[5:], orig) def test_remove(self): - b = b'hello' + b = buffer(b'hello') b.remove(ord('l')) self.assertEqual(b, b'helo') b.remove(ord('l')) @@ -513,15 +546,15 @@ class BytesTest(unittest.TestCase): self.assertRaises(TypeError, lambda: b.remove(b'e')) def test_pop(self): - b = b'world' + b = buffer(b'world') self.assertEqual(b.pop(), ord('d')) self.assertEqual(b.pop(0), ord('w')) self.assertEqual(b.pop(-2), ord('r')) self.assertRaises(IndexError, lambda: b.pop(10)) - self.assertRaises(OverflowError, lambda: bytes().pop()) + self.assertRaises(OverflowError, lambda: buffer().pop()) def test_nosort(self): - self.assertRaises(AttributeError, lambda: bytes().sort()) + self.assertRaises(AttributeError, lambda: buffer().sort()) def test_index(self): b = b'parrot' @@ -537,17 +570,17 @@ class BytesTest(unittest.TestCase): self.assertEqual(b.count(b'w'), 0) def test_append(self): - b = b'hell' + b = buffer(b'hell') b.append(ord('o')) self.assertEqual(b, b'hello') self.assertEqual(b.append(100), None) - b = bytes() + b = buffer() b.append(ord('A')) self.assertEqual(len(b), 1) self.assertRaises(TypeError, lambda: b.append(b'o')) def test_insert(self): - b = b'msssspp' + b = buffer(b'msssspp') b.insert(1, ord('i')) b.insert(4, ord('i')) b.insert(-2, ord('i')) @@ -557,7 +590,7 @@ class BytesTest(unittest.TestCase): def test_startswith(self): b = b'hello' - self.assertFalse(bytes().startswith(b"anything")) + self.assertFalse(buffer().startswith(b"anything")) self.assertTrue(b.startswith(b"hello")) self.assertTrue(b.startswith(b"hel")) self.assertTrue(b.startswith(b"h")) @@ -566,7 +599,7 @@ class BytesTest(unittest.TestCase): def test_endswith(self): b = b'hello' - self.assertFalse(bytes().endswith(b"anything")) + self.assertFalse(buffer().endswith(b"anything")) self.assertTrue(b.endswith(b"hello")) self.assertTrue(b.endswith(b"llo")) self.assertTrue(b.endswith(b"o")) @@ -612,7 +645,7 @@ class BytesTest(unittest.TestCase): def test_translate(self): b = b'hello' - rosetta = bytes(range(0, 256)) + rosetta = buffer(range(0, 256)) rosetta[ord('o')] = ord('e') c = b.translate(rosetta, b'l') self.assertEqual(b, b'hello') @@ -658,10 +691,10 @@ class BytesTest(unittest.TestCase): self.assertEqual(b' a bb c '.rsplit(None,2), [b' a', b'bb', b'c']) self.assertEqual(b' a bb c '.rsplit(None, 3), [b'a', b'bb', b'c']) - def test_rplit_buffer(self): + def test_rsplit_buffer(self): self.assertEqual(b'a b'.rsplit(memoryview(b' ')), [b'a', b'b']) - def test_rplit_string_error(self): + def test_rsplit_string_error(self): self.assertRaises(TypeError, b'a b'.rsplit, ' ') def test_partition(self): @@ -727,6 +760,28 @@ class BytesTest(unittest.TestCase): self.assertEqual([ord(b[i:i+1]) for i in range(len(b))], [0, 65, 127, 128, 255]) + def test_partition_buffer_doesnt_share_nullstring(self): + a, b, c = buffer(b"x").partition(b"y") + self.assertEqual(b, b"") + self.assertEqual(c, b"") + self.assert_(b is not c) + b += b"!" + self.assertEqual(c, b"") + a, b, c = buffer(b"x").partition(b"y") + self.assertEqual(b, b"") + self.assertEqual(c, b"") + # Same for rpartition + b, c, a = buffer(b"x").rpartition(b"y") + self.assertEqual(b, b"") + self.assertEqual(c, b"") + self.assert_(b is not c) + b += b"!" + self.assertEqual(c, b"") + c, b, a = buffer(b"x").rpartition(b"y") + self.assertEqual(b, b"") + self.assertEqual(c, b"") + + # Optimizations: # __iter__? (optimization) # __reversed__? (optimization) @@ -745,7 +800,7 @@ class BytesTest(unittest.TestCase): class BufferPEP3137Test(unittest.TestCase, test.buffer_tests.MixinBytesBufferCommonTests): def marshal(self, x): - return bytes(x) + return buffer(x) # TODO this should become: #return buffer(x) # once the bytes -> buffer and str8 -> bytes rename happens @@ -763,7 +818,7 @@ class BufferPEP3137Test(unittest.TestCase, class BytesAsStringTest(test.string_tests.BaseTest): - type2test = bytes + type2test = buffer def fixtype(self, obj): if isinstance(obj, str): @@ -783,17 +838,17 @@ class BytesAsStringTest(test.string_tests.BaseTest): pass -class BytesSubclass(bytes): +class BufferSubclass(buffer): pass -class BytesSubclassTest(unittest.TestCase): +class BufferSubclassTest(unittest.TestCase): def test_basic(self): - self.assert_(issubclass(BytesSubclass, bytes)) - self.assert_(isinstance(BytesSubclass(), bytes)) + self.assert_(issubclass(BufferSubclass, buffer)) + self.assert_(isinstance(BufferSubclass(), buffer)) a, b = b"abcd", b"efgh" - _a, _b = BytesSubclass(a), BytesSubclass(b) + _a, _b = BufferSubclass(a), BufferSubclass(b) # test comparison operators with subclass instances self.assert_(_a == _a) @@ -816,19 +871,19 @@ class BytesSubclassTest(unittest.TestCase): # Make sure join returns a NEW object for single item sequences # involving a subclass. # Make sure that it is of the appropriate type. - s1 = BytesSubclass(b"abcd") - s2 = b"".join([s1]) + s1 = BufferSubclass(b"abcd") + s2 = buffer().join([s1]) self.assert_(s1 is not s2) - self.assert_(type(s2) is bytes) + self.assert_(type(s2) is buffer, type(s2)) # Test reverse, calling join on subclass s3 = s1.join([b"abcd"]) - self.assert_(type(s3) is bytes) + self.assert_(type(s3) is buffer) def test_pickle(self): - a = BytesSubclass(b"abcd") + a = BufferSubclass(b"abcd") a.x = 10 - a.y = BytesSubclass(b"efgh") + a.y = BufferSubclass(b"efgh") for proto in range(pickle.HIGHEST_PROTOCOL): b = pickle.loads(pickle.dumps(a, proto)) self.assertNotEqual(id(a), id(b)) @@ -839,9 +894,9 @@ class BytesSubclassTest(unittest.TestCase): self.assertEqual(type(a.y), type(b.y)) def test_copy(self): - a = BytesSubclass(b"abcd") + a = BufferSubclass(b"abcd") a.x = 10 - a.y = BytesSubclass(b"efgh") + a.y = BufferSubclass(b"efgh") for copy_method in (copy.copy, copy.deepcopy): b = copy_method(a) self.assertNotEqual(id(a), id(b)) @@ -852,9 +907,9 @@ class BytesSubclassTest(unittest.TestCase): self.assertEqual(type(a.y), type(b.y)) def test_init_override(self): - class subclass(bytes): + class subclass(buffer): def __init__(self, newarg=1, *args, **kwargs): - bytes.__init__(self, *args, **kwargs) + buffer.__init__(self, *args, **kwargs) x = subclass(4, source=b"abcd") self.assertEqual(x, b"abcd") x = subclass(newarg=4, source=b"abcd") @@ -864,7 +919,7 @@ class BytesSubclassTest(unittest.TestCase): def test_main(): test.test_support.run_unittest(BytesTest) test.test_support.run_unittest(BytesAsStringTest) - test.test_support.run_unittest(BytesSubclassTest) + test.test_support.run_unittest(BufferSubclassTest) test.test_support.run_unittest(BufferPEP3137Test) if __name__ == "__main__": diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 2233f841ba..39bf19c329 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -160,12 +160,12 @@ class BZ2FileTest(BaseTest): def testWriteMethodsOnReadOnlyFile(self): bz2f = BZ2File(self.filename, "w") - bz2f.write("abc") + bz2f.write(b"abc") bz2f.close() bz2f = BZ2File(self.filename, "r") - self.assertRaises(IOError, bz2f.write, "a") - self.assertRaises(IOError, bz2f.writelines, ["a"]) + self.assertRaises(IOError, bz2f.write, b"a") + self.assertRaises(IOError, bz2f.writelines, [b"a"]) def testSeekForward(self): # "Test BZ2File.seek(150, 0)" @@ -307,7 +307,7 @@ class BZ2DecompressorTest(BaseTest): # "Calling BZ2Decompressor.decompress() after EOS must raise EOFError" bz2d = BZ2Decompressor() text = bz2d.decompress(self.DATA) - self.assertRaises(EOFError, bz2d.decompress, "anything") + self.assertRaises(EOFError, bz2d.decompress, b"anything") class FuncTest(BaseTest): diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 9cf43a5b72..218bfc53ed 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -33,13 +33,13 @@ class BadObjectUnicodeEncodeError(UnicodeEncodeError): # A UnicodeDecodeError object without an end attribute class NoEndUnicodeDecodeError(UnicodeDecodeError): def __init__(self): - UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") + UnicodeDecodeError.__init__(self, "ascii", buffer(b""), 0, 1, "bad") del self.end # A UnicodeDecodeError object with a bad object attribute class BadObjectUnicodeDecodeError(UnicodeDecodeError): def __init__(self): - UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") + UnicodeDecodeError.__init__(self, "ascii", buffer(b""), 0, 1, "bad") self.object = [] # A UnicodeTranslateError object without a start attribute @@ -181,7 +181,7 @@ class CodecCallbackTest(unittest.TestCase): # mapped through the encoding again. This means, that # to be able to use e.g. the "replace" handler, the # charmap has to have a mapping for "?". - charmap = dict((ord(c), str8(2*c.upper(), 'ascii')) for c in "abcdefgh") + charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") sin = "abc" sout = b"AABBCC" self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout) @@ -189,7 +189,7 @@ class CodecCallbackTest(unittest.TestCase): sin = "abcA" self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) - charmap[ord("?")] = str8(b"XYZ") + charmap[ord("?")] = b"XYZ" sin = "abcDEF" sout = b"AABBCCXYZXYZXYZ" self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout) @@ -309,7 +309,7 @@ class CodecCallbackTest(unittest.TestCase): # check with one argument too much self.assertRaises(TypeError, exctype, *(args + ["too much"])) # check with one argument of the wrong type - wrongargs = [ "spam", str8(b"eggs"), b"spam", 42, 1.0, None ] + wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ] for i in range(len(args)): for wrongarg in wrongargs: if type(wrongarg) is type(args[i]): @@ -363,12 +363,12 @@ class CodecCallbackTest(unittest.TestCase): def test_unicodedecodeerror(self): self.check_exceptionobjectargs( UnicodeDecodeError, - ["ascii", b"g\xfcrk", 1, 2, "ouch"], + ["ascii", buffer(b"g\xfcrk"), 1, 2, "ouch"], "'ascii' codec can't decode byte 0xfc in position 1: ouch" ) self.check_exceptionobjectargs( UnicodeDecodeError, - ["ascii", b"g\xfcrk", 1, 3, "ouch"], + ["ascii", buffer(b"g\xfcrk"), 1, 3, "ouch"], "'ascii' codec can't decode bytes in position 1-2: ouch" ) @@ -442,7 +442,7 @@ class CodecCallbackTest(unittest.TestCase): ) self.assertEquals( codecs.ignore_errors( - UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")), + UnicodeDecodeError("ascii", buffer(b"\xff"), 0, 1, "ouch")), ("", 1) ) self.assertEquals( @@ -482,7 +482,7 @@ class CodecCallbackTest(unittest.TestCase): ) self.assertEquals( codecs.replace_errors( - UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")), + UnicodeDecodeError("ascii", buffer(b"\xff"), 0, 1, "ouch")), ("\ufffd", 1) ) self.assertEquals( @@ -508,7 +508,7 @@ class CodecCallbackTest(unittest.TestCase): self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, - UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch") + UnicodeDecodeError("ascii", buffer(b"\xff"), 0, 1, "ouch") ) self.assertRaises( TypeError, @@ -542,7 +542,7 @@ class CodecCallbackTest(unittest.TestCase): self.assertRaises( TypeError, codecs.backslashreplace_errors, - UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch") + UnicodeDecodeError("ascii", buffer(b"\xff"), 0, 1, "ouch") ) self.assertRaises( TypeError, diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 22db2ca20f..5833c6db18 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -802,7 +802,7 @@ class UnicodeInternalTest(unittest.TestCase): if sys.maxunicode > 0xffff: codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) decoder = codecs.getdecoder("unicode_internal") - ab = "ab".encode("unicode_internal") + ab = "ab".encode("unicode_internal").decode() ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), "ascii"), "UnicodeInternalTest") @@ -1265,7 +1265,9 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): encodedresult = b"" for c in s: writer.write(c) - encodedresult += q.read() + chunk = q.read() + self.assert_(type(chunk) is bytes, type(chunk)) + encodedresult += chunk q = Queue(b"") reader = codecs.getreader(encoding)(q) decodedresult = "" diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index f92eb6f4cb..86b47ded4d 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -91,7 +91,7 @@ class TestOneTrickPonyABCs(unittest.TestCase): def test_Hashable(self): # Check some non-hashables - non_samples = [bytes(), list(), set(), dict()] + non_samples = [buffer(), list(), set(), dict()] for x in non_samples: self.failIf(isinstance(x, Hashable), repr(x)) self.failIf(issubclass(type(x), Hashable), repr(type(x))) @@ -100,7 +100,7 @@ class TestOneTrickPonyABCs(unittest.TestCase): int(), float(), complex(), str(), tuple(), frozenset(), - int, list, object, type, + int, list, object, type, bytes() ] for x in samples: self.failUnless(isinstance(x, Hashable), repr(x)) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 1d54953426..e7ffc02b7d 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -157,7 +157,7 @@ if 1: s256 = "".join(["\n"] * 256 + ["spam"]) co = compile(s256, 'fn', 'exec') self.assertEqual(co.co_firstlineno, 257) - self.assertEqual(co.co_lnotab, str8()) + self.assertEqual(co.co_lnotab, bytes()) def test_literals_with_leading_zeroes(self): for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index b1a1b38802..6e5b990ef9 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -1111,7 +1111,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase): # This shouldn't blow up because of the month byte alone. If # the implementation changes to do more-careful checking, it may # blow up because other fields are insane. - self.theclass(bytes(base[:2] + chr(ord_byte) + base[3:], "ascii")) + self.theclass(buffer(base[:2] + chr(ord_byte) + base[3:], "ascii")) ############################################################################# # datetime tests diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 67ae239b68..961369f757 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3145,7 +3145,7 @@ def str_of_str_subclass(): class octetstring(str): def __str__(self): - return str(binascii.b2a_hex(self)) + return binascii.b2a_hex(self).decode("ascii") def __repr__(self): return self + " repr" diff --git a/Lib/test/test_dumbdbm.py b/Lib/test/test_dumbdbm.py index 3553c1961d..9a2cb682a0 100644 --- a/Lib/test/test_dumbdbm.py +++ b/Lib/test/test_dumbdbm.py @@ -115,7 +115,7 @@ class DumbDBMTestCase(unittest.TestCase): # Mangle the file by changing the line separator to Windows or Unix data = io.open(_fname + '.dir', 'rb').read() - if os.linesep == b'\n': + if os.linesep == '\n': data = data.replace(b'\n', b'\r\n') else: data = data.replace(b'\r\n', b'\n') diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index d2a21913a4..c405ac98f6 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -253,6 +253,12 @@ class ExceptionTests(unittest.TestCase): 'ordinal not in range'), 'encoding' : 'ascii', 'object' : 'a', 'start' : 0, 'reason' : 'ordinal not in range'}), + (UnicodeDecodeError, ('ascii', buffer(b'\xff'), 0, 1, + 'ordinal not in range'), + {'args' : ('ascii', buffer(b'\xff'), 0, 1, + 'ordinal not in range'), + 'encoding' : 'ascii', 'object' : b'\xff', + 'start' : 0, 'reason' : 'ordinal not in range'}), (UnicodeDecodeError, ('ascii', b'\xff', 0, 1, 'ordinal not in range'), {'args' : ('ascii', b'\xff', 0, 1, @@ -278,7 +284,7 @@ class ExceptionTests(unittest.TestCase): try: e = exc(*args) except: - print("\nexc=%r, args=%r" % (exc, args)) + print("\nexc=%r, args=%r" % (exc, args), file=sys.stderr) raise else: # Verify module name diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 4360c54069..ca5e537b79 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -40,14 +40,14 @@ class FormatFunctionsTestCase(unittest.TestCase): 'chicken', 'unknown') BE_DOUBLE_INF = b'\x7f\xf0\x00\x00\x00\x00\x00\x00' -LE_DOUBLE_INF = bytes(reversed(BE_DOUBLE_INF)) +LE_DOUBLE_INF = bytes(reversed(buffer(BE_DOUBLE_INF))) BE_DOUBLE_NAN = b'\x7f\xf8\x00\x00\x00\x00\x00\x00' -LE_DOUBLE_NAN = bytes(reversed(BE_DOUBLE_NAN)) +LE_DOUBLE_NAN = bytes(reversed(buffer(BE_DOUBLE_NAN))) BE_FLOAT_INF = b'\x7f\x80\x00\x00' -LE_FLOAT_INF = bytes(reversed(BE_FLOAT_INF)) +LE_FLOAT_INF = bytes(reversed(buffer(BE_FLOAT_INF))) BE_FLOAT_NAN = b'\x7f\xc0\x00\x00' -LE_FLOAT_NAN = bytes(reversed(BE_FLOAT_NAN)) +LE_FLOAT_NAN = bytes(reversed(buffer(BE_FLOAT_NAN))) # on non-IEEE platforms, attempting to unpack a bit pattern # representing an infinity or a NaN should raise an exception. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 2216a9998e..7e5a8a5c95 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -157,7 +157,8 @@ class BasicTest(TestCase): sock = FakeSocket(body) conn.sock = sock conn.request('GET', '/foo', body) - self.assertTrue(sock.data.startswith(expected)) + self.assertTrue(sock.data.startswith(expected), '%r != %r' % + (sock.data[:len(expected)], expected)) class OfflineTest(TestCase): def test_responses(self): diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 9d4163edc1..6091e89497 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -88,7 +88,7 @@ class IOTest(unittest.TestCase): self.assertEqual(f.tell(), 6) self.assertEqual(f.seek(-1, 1), 5) self.assertEqual(f.tell(), 5) - self.assertEqual(f.write(str8(b" world\n\n\n")), 9) + self.assertEqual(f.write(buffer(b" world\n\n\n")), 9) self.assertEqual(f.seek(0), 0) self.assertEqual(f.write(b"h"), 1) self.assertEqual(f.seek(-1, 2), 13) @@ -99,6 +99,7 @@ class IOTest(unittest.TestCase): def read_ops(self, f, buffered=False): data = f.read(5) self.assertEqual(data, b"hello") + data = buffer(data) self.assertEqual(f.readinto(data), 5) self.assertEqual(data, b" worl") self.assertEqual(f.readinto(data), 2) @@ -107,11 +108,11 @@ class IOTest(unittest.TestCase): self.assertEqual(f.seek(0), 0) self.assertEqual(f.read(20), b"hello world\n") self.assertEqual(f.read(1), b"") - self.assertEqual(f.readinto(b"x"), 0) + self.assertEqual(f.readinto(buffer(b"x")), 0) self.assertEqual(f.seek(-6, 2), 6) self.assertEqual(f.read(5), b"world") self.assertEqual(f.read(0), b"") - self.assertEqual(f.readinto(b""), 0) + self.assertEqual(f.readinto(buffer()), 0) self.assertEqual(f.seek(-6, 1), 5) self.assertEqual(f.read(5), b" worl") self.assertEqual(f.tell(), 10) @@ -687,7 +688,7 @@ class TextIOWrapperTest(unittest.TestCase): f.close() f = io.open(test_support.TESTFN, "r", encoding="utf-8") s = f.read(prefix_size) - self.assertEquals(s, str(prefix)) + self.assertEquals(s, str(prefix, "ascii")) self.assertEquals(f.tell(), prefix_size) self.assertEquals(f.readline(), u_suffix) diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 4345be715f..a7142552bb 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -168,9 +168,11 @@ class TestMailbox(TestBase): # Get file representations of messages key0 = self._box.add(self._template % 0) key1 = self._box.add(_sample_message) - self.assertEqual(self._box.get_file(key0).read().replace(os.linesep, '\n'), + data0 = self._box.get_file(key0).read() + data1 = self._box.get_file(key1).read() + self.assertEqual(data0.replace(os.linesep, '\n'), self._template % 0) - self.assertEqual(self._box.get_file(key1).read().replace(os.linesep, '\n'), + self.assertEqual(data1.replace(os.linesep, '\n'), _sample_message) def test_iterkeys(self): @@ -1488,69 +1490,73 @@ class TestProxyFileBase(TestBase): def _test_read(self, proxy): # Read by byte proxy.seek(0) - self.assertEqual(proxy.read(), 'bar') + self.assertEqual(proxy.read(), b'bar') proxy.seek(1) - self.assertEqual(proxy.read(), 'ar') + self.assertEqual(proxy.read(), b'ar') proxy.seek(0) - self.assertEqual(proxy.read(2), 'ba') + self.assertEqual(proxy.read(2), b'ba') proxy.seek(1) - self.assertEqual(proxy.read(-1), 'ar') + self.assertEqual(proxy.read(-1), b'ar') proxy.seek(2) - self.assertEqual(proxy.read(1000), 'r') + self.assertEqual(proxy.read(1000), b'r') def _test_readline(self, proxy): # Read by line + linesep = os.linesep.encode() proxy.seek(0) - self.assertEqual(proxy.readline(), 'foo' + os.linesep) - self.assertEqual(proxy.readline(), 'bar' + os.linesep) - self.assertEqual(proxy.readline(), 'fred' + os.linesep) - self.assertEqual(proxy.readline(), 'bob') + self.assertEqual(proxy.readline(), b'foo' + linesep) + self.assertEqual(proxy.readline(), b'bar' + linesep) + self.assertEqual(proxy.readline(), b'fred' + linesep) + self.assertEqual(proxy.readline(), b'bob') proxy.seek(2) - self.assertEqual(proxy.readline(), 'o' + os.linesep) + self.assertEqual(proxy.readline(), b'o' + linesep) proxy.seek(6 + 2 * len(os.linesep)) - self.assertEqual(proxy.readline(), 'fred' + os.linesep) + self.assertEqual(proxy.readline(), b'fred' + linesep) proxy.seek(6 + 2 * len(os.linesep)) - self.assertEqual(proxy.readline(2), 'fr') - self.assertEqual(proxy.readline(-10), 'ed' + os.linesep) + self.assertEqual(proxy.readline(2), b'fr') + self.assertEqual(proxy.readline(-10), b'ed' + linesep) def _test_readlines(self, proxy): # Read multiple lines + linesep = os.linesep.encode() proxy.seek(0) - self.assertEqual(proxy.readlines(), ['foo' + os.linesep, - 'bar' + os.linesep, - 'fred' + os.linesep, 'bob']) + self.assertEqual(proxy.readlines(), [b'foo' + linesep, + b'bar' + linesep, + b'fred' + linesep, b'bob']) proxy.seek(0) - self.assertEqual(proxy.readlines(2), ['foo' + os.linesep]) - proxy.seek(3 + len(os.linesep)) - self.assertEqual(proxy.readlines(4 + len(os.linesep)), - ['bar' + os.linesep, 'fred' + os.linesep]) + self.assertEqual(proxy.readlines(2), [b'foo' + linesep]) + proxy.seek(3 + len(linesep)) + self.assertEqual(proxy.readlines(4 + len(linesep)), + [b'bar' + linesep, b'fred' + linesep]) proxy.seek(3) - self.assertEqual(proxy.readlines(1000), [os.linesep, 'bar' + os.linesep, - 'fred' + os.linesep, 'bob']) + self.assertEqual(proxy.readlines(1000), [linesep, b'bar' + linesep, + b'fred' + linesep, b'bob']) def _test_iteration(self, proxy): # Iterate by line + linesep = os.linesep.encode() proxy.seek(0) iterator = iter(proxy) - self.assertEqual(next(iterator), 'foo' + os.linesep) - self.assertEqual(next(iterator), 'bar' + os.linesep) - self.assertEqual(next(iterator), 'fred' + os.linesep) - self.assertEqual(next(iterator), 'bob') + self.assertEqual(next(iterator), b'foo' + linesep) + self.assertEqual(next(iterator), b'bar' + linesep) + self.assertEqual(next(iterator), b'fred' + linesep) + self.assertEqual(next(iterator), b'bob') self.assertRaises(StopIteration, next, iterator) def _test_seek_and_tell(self, proxy): # Seek and use tell to check position + linesep = os.linesep.encode() proxy.seek(3) self.assertEqual(proxy.tell(), 3) - self.assertEqual(proxy.read(len(os.linesep)), os.linesep) + self.assertEqual(proxy.read(len(linesep)), linesep) proxy.seek(2, 1) - self.assertEqual(proxy.read(1 + len(os.linesep)), 'r' + os.linesep) - proxy.seek(-3 - len(os.linesep), 2) - self.assertEqual(proxy.read(3), 'bar') + self.assertEqual(proxy.read(1 + len(linesep)), b'r' + linesep) + proxy.seek(-3 - len(linesep), 2) + self.assertEqual(proxy.read(3), b'bar') proxy.seek(2, 0) - self.assertEqual(proxy.read(), 'o' + os.linesep + 'bar' + os.linesep) + self.assertEqual(proxy.read(), b'o' + linesep + b'bar' + linesep) proxy.seek(100) - self.assertEqual(proxy.read(), '') + self.failIf(proxy.read()) def _test_close(self, proxy): # Close a file diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 3e44886d68..1e3520f97a 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -39,7 +39,7 @@ class IntTestCase(unittest.TestCase, HelperMixin): # we're running the test on a 32-bit box, of course. def to_little_endian_string(value, nbytes): - b = bytes() + b = buffer() for i in range(nbytes): b.append(value & 0xff) value >>= 8 diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 974fde2ee3..3d30109f5f 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -39,15 +39,15 @@ class MmapTests(unittest.TestCase): self.assertEqual(len(m), 2*PAGESIZE) - self.assertEqual(m[0], b'\0') + self.assertEqual(m[0], 0) self.assertEqual(m[0:3], b'\0\0\0') # Modify the file's content - m[0] = b'3' + m[0] = b'3'[0] m[PAGESIZE +3: PAGESIZE +3+3] = b'bar' # Check that the modification worked - self.assertEqual(m[0], b'3') + self.assertEqual(m[0], b'3'[0]) self.assertEqual(m[0:3], b'3\0\0') self.assertEqual(m[PAGESIZE-1 : PAGESIZE + 7], b'\0foobar\0') @@ -297,11 +297,11 @@ class MmapTests(unittest.TestCase): # anonymous mmap.mmap(-1, PAGE) m = mmap.mmap(-1, PAGESIZE) for x in range(PAGESIZE): - self.assertEqual(m[x], b'\0', "anonymously mmap'ed contents should be zero") + self.assertEqual(m[x], 0, + "anonymously mmap'ed contents should be zero") - b = bytes(1) for x in range(PAGESIZE): - b[0] = x & 255 + b = x & 0xff m[x] = b self.assertEqual(m[x], b) diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index e51be04143..957b9fc9ec 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -52,6 +52,10 @@ class TestBase: func = self.encode if expected: result = func(source, scheme)[0] + if func is self.decode: + self.assert_(type(result) is str, type(result)) + else: + self.assert_(type(result) is bytes, type(result)) self.assertEqual(result, expected) else: self.assertRaises(UnicodeError, func, source, scheme) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 11254f4d8e..f54381fa54 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -10,6 +10,9 @@ from test.pickletester import AbstractPersistentPicklerTests class PickleTests(AbstractPickleTests, AbstractPickleModuleTests): + module = pickle + error = KeyError + def dumps(self, arg, proto=0, fast=0): # Ignore fast return pickle.dumps(arg, proto) @@ -18,9 +21,6 @@ class PickleTests(AbstractPickleTests, AbstractPickleModuleTests): # Ignore fast return pickle.loads(buf) - module = pickle - error = KeyError - class PicklerTests(AbstractPickleTests): error = KeyError diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 3569453f17..efd5fb0748 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -193,6 +193,11 @@ class PosixTester(unittest.TestCase): if hasattr(st, 'st_flags'): posix.lchflags(test_support.TESTFN, st.st_flags) + def test_environ(self): + for k, v in posix.environ.items(): + self.assertEqual(type(k), str) + self.assertEqual(type(v), str) + def test_main(): test_support.run_unittest(PosixTester) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index d2b56431d5..db6a97d80e 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -96,12 +96,12 @@ simple_err(struct.pack, 'iii', 3) simple_err(struct.pack, 'i', 3, 3, 3) simple_err(struct.pack, 'i', 'foo') simple_err(struct.pack, 'P', 'foo') -simple_err(struct.unpack, 'd', 'flap') +simple_err(struct.unpack, 'd', b'flap') s = struct.pack('ii', 1, 2) simple_err(struct.unpack, 'iii', s) simple_err(struct.unpack, 'i', s) -c = str8(b'a') +c = b'a' b = 1 h = 255 i = 65535 @@ -184,9 +184,9 @@ for fmt, arg, big, lil, asy in tests: xfmt, n, len(res))) rev = struct.unpack(xfmt, res)[0] if isinstance(arg, str): - # Strings are returned as str8 since you can't know the encoding of + # Strings are returned as bytes since you can't know the encoding of # the string when packed. - arg = str8(arg, 'latin1') + arg = bytes(arg, 'latin1') if rev != arg and not asy: raise TestFailed("unpack(%r, %r) -> (%r,) # expected (%r,)" % ( fmt, res, rev, arg)) @@ -428,14 +428,14 @@ for args in [("bB", 1), def test_p_code(): for code, input, expected, expectedback in [ - ('p','abc', '\x00', str8()), - ('1p', 'abc', '\x00', str8()), - ('2p', 'abc', '\x01a', str8(b'a')), - ('3p', 'abc', '\x02ab', str8(b'ab')), - ('4p', 'abc', '\x03abc', str8(b'abc')), - ('5p', 'abc', '\x03abc\x00', str8(b'abc')), - ('6p', 'abc', '\x03abc\x00\x00', str8(b'abc')), - ('1000p', 'x'*1000, '\xff' + 'x'*999, str8(b'x'*255))]: + ('p','abc', '\x00', b''), + ('1p', 'abc', '\x00', b''), + ('2p', 'abc', '\x01a', b'a'), + ('3p', 'abc', '\x02ab', b'ab'), + ('4p', 'abc', '\x03abc', b'abc'), + ('5p', 'abc', '\x03abc\x00', b'abc'), + ('6p', 'abc', '\x03abc\x00\x00', b'abc'), + ('1000p', 'x'*1000, '\xff' + 'x'*999, b'x'*255)]: expected = bytes(expected, "latin-1") got = struct.pack(code, input) if got != expected: @@ -560,26 +560,26 @@ def test_unpack_from(): test_string = b'abcd01234' fmt = '4s' s = struct.Struct(fmt) - for cls in (str, str8, bytes): # XXX + memoryview + for cls in (buffer, bytes): if verbose: print("test_unpack_from using", cls.__name__) data = cls(test_string) - if not isinstance(data, (str8, bytes)): - bytes_data = str8(data, 'latin1') + if not isinstance(data, (buffer, bytes)): + bytes_data = bytes(data, 'latin1') else: bytes_data = data - vereq(s.unpack_from(data), (str8(b'abcd'),)) - vereq(s.unpack_from(data, 2), (str8(b'cd01'),)) - vereq(s.unpack_from(data, 4), (str8(b'0123'),)) + vereq(s.unpack_from(data), (b'abcd',)) + vereq(s.unpack_from(data, 2), (b'cd01',)) + vereq(s.unpack_from(data, 4), (b'0123',)) for i in range(6): vereq(s.unpack_from(data, i), (bytes_data[i:i+4],)) for i in range(6, len(test_string) + 1): simple_err(s.unpack_from, data, i) - for cls in (str, str8, bytes): # XXX + memoryview + for cls in (buffer, bytes): data = cls(test_string) - vereq(struct.unpack_from(fmt, data), (str8(b'abcd'),)) - vereq(struct.unpack_from(fmt, data, 2), (str8(b'cd01'),)) - vereq(struct.unpack_from(fmt, data, 4), (str8(b'0123'),)) + vereq(struct.unpack_from(fmt, data), (b'abcd',)) + vereq(struct.unpack_from(fmt, data, 2), (b'cd01',)) + vereq(struct.unpack_from(fmt, data, 4), (b'0123',)) for i in range(6): vereq(struct.unpack_from(fmt, data, i), (bytes_data[i:i+4],)) for i in range(6, len(test_string) + 1): diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 39a889ddaf..806791b9f1 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -24,7 +24,8 @@ else: # shutdown time. That frustrates tests trying to check stderr produced # from a spawned Python process. def remove_stderr_debug_decorations(stderr): - return re.sub(r"\[\d+ refs\]\r?\n?$", "", str(stderr)) + return re.sub("\[\d+ refs\]\r?\n?$", "", stderr.decode()).encode() + #return re.sub(r"\[\d+ refs\]\r?\n?$", "", stderr) class ProcessTestCase(unittest.TestCase): def setUp(self): @@ -77,9 +78,9 @@ class ProcessTestCase(unittest.TestCase): newenv = os.environ.copy() newenv["FRUIT"] = "banana" rc = subprocess.call([sys.executable, "-c", - 'import sys, os;' \ - 'sys.exit(os.getenv("FRUIT")=="banana")'], - env=newenv) + 'import sys, os;' + 'sys.exit(os.getenv("FRUIT")=="banana")'], + env=newenv) self.assertEqual(rc, 1) def test_stdin_none(self): @@ -180,7 +181,7 @@ class ProcessTestCase(unittest.TestCase): 'import sys; sys.stderr.write("strawberry")'], stderr=subprocess.PIPE) self.assertEqual(remove_stderr_debug_decorations(p.stderr.read()), - "strawberry") + b"strawberry") def test_stderr_filedes(self): # stderr is set to open file descriptor @@ -192,7 +193,7 @@ class ProcessTestCase(unittest.TestCase): p.wait() os.lseek(d, 0, 0) self.assertEqual(remove_stderr_debug_decorations(os.read(d, 1024)), - "strawberry") + b"strawberry") def test_stderr_fileobj(self): # stderr is set to open file object @@ -203,36 +204,36 @@ class ProcessTestCase(unittest.TestCase): p.wait() tf.seek(0) self.assertEqual(remove_stderr_debug_decorations(tf.read()), - "strawberry") + b"strawberry") def test_stdout_stderr_pipe(self): # capture stdout and stderr to the same pipe p = subprocess.Popen([sys.executable, "-c", - 'import sys;' \ - 'sys.stdout.write("apple");' \ - 'sys.stdout.flush();' \ - 'sys.stderr.write("orange")'], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + 'import sys;' + 'sys.stdout.write("apple");' + 'sys.stdout.flush();' + 'sys.stderr.write("orange")'], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) output = p.stdout.read() stripped = remove_stderr_debug_decorations(output) - self.assertEqual(stripped, "appleorange") + self.assertEqual(stripped, b"appleorange") def test_stdout_stderr_file(self): # capture stdout and stderr to the same open file tf = tempfile.TemporaryFile() p = subprocess.Popen([sys.executable, "-c", - 'import sys;' \ - 'sys.stdout.write("apple");' \ - 'sys.stdout.flush();' \ - 'sys.stderr.write("orange")'], - stdout=tf, - stderr=tf) + 'import sys;' + 'sys.stdout.write("apple");' + 'sys.stdout.flush();' + 'sys.stderr.write("orange")'], + stdout=tf, + stderr=tf) p.wait() tf.seek(0) output = tf.read() stripped = remove_stderr_debug_decorations(output) - self.assertEqual(stripped, "appleorange") + self.assertEqual(stripped, b"appleorange") def test_stdout_filedes_of_stdout(self): # stdout is set to 1 (#1531862). @@ -249,10 +250,10 @@ class ProcessTestCase(unittest.TestCase): tmpdir = os.getcwd() os.chdir(cwd) p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' \ - 'sys.stdout.write(os.getcwd())'], - stdout=subprocess.PIPE, - cwd=tmpdir) + 'import sys,os;' + 'sys.stdout.write(os.getcwd())'], + stdout=subprocess.PIPE, + cwd=tmpdir) normcase = os.path.normcase self.assertEqual(normcase(p.stdout.read().decode("utf-8")), normcase(tmpdir)) @@ -261,15 +262,16 @@ class ProcessTestCase(unittest.TestCase): newenv = os.environ.copy() newenv["FRUIT"] = "orange" p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' \ - 'sys.stdout.write(os.getenv("FRUIT"))'], - stdout=subprocess.PIPE, - env=newenv) + 'import sys,os;' + 'sys.stdout.write(os.getenv("FRUIT"))'], + stdout=subprocess.PIPE, + env=newenv) self.assertEqual(p.stdout.read(), b"orange") def test_communicate_stdin(self): p = subprocess.Popen([sys.executable, "-c", - 'import sys; sys.exit(sys.stdin.read() == "pear")'], + 'import sys;' + 'sys.exit(sys.stdin.read() == "pear")'], stdin=subprocess.PIPE) p.communicate(b"pear") self.assertEqual(p.returncode, 1) @@ -294,16 +296,16 @@ class ProcessTestCase(unittest.TestCase): def test_communicate(self): p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' \ - 'sys.stderr.write("pineapple");' \ - 'sys.stdout.write(sys.stdin.read())'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + 'import sys,os;' + 'sys.stderr.write("pineapple");' + 'sys.stdout.write(sys.stdin.read())'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) (stdout, stderr) = p.communicate("banana") self.assertEqual(stdout, b"banana") self.assertEqual(remove_stderr_debug_decorations(stderr), - "pineapple") + b"pineapple") def test_communicate_returns(self): # communicate() should return None if no redirection is active @@ -325,13 +327,13 @@ class ProcessTestCase(unittest.TestCase): os.close(x) os.close(y) p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' - 'sys.stdout.write(sys.stdin.read(47));' \ - 'sys.stderr.write("xyz"*%d);' \ - 'sys.stdout.write(sys.stdin.read())' % pipe_buf], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + 'import sys,os;' + 'sys.stdout.write(sys.stdin.read(47));' + 'sys.stderr.write("xyz"*%d);' + 'sys.stdout.write(sys.stdin.read())' % pipe_buf], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) string_to_write = b"abc"*pipe_buf (stdout, stderr) = p.communicate(string_to_write) self.assertEqual(stdout, string_to_write) @@ -339,68 +341,69 @@ class ProcessTestCase(unittest.TestCase): def test_writes_before_communicate(self): # stdin.write before communicate() p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' \ - 'sys.stdout.write(sys.stdin.read())'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + 'import sys,os;' + 'sys.stdout.write(sys.stdin.read())'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) p.stdin.write(b"banana") (stdout, stderr) = p.communicate(b"split") self.assertEqual(stdout, b"bananasplit") - self.assertEqual(remove_stderr_debug_decorations(stderr), "") + self.assertEqual(remove_stderr_debug_decorations(stderr), b"") def test_universal_newlines(self): p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' + SETBINARY + - 'sys.stdout.write("line1\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line2\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line3\\r\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line4\\r");' - 'sys.stdout.flush();' - 'sys.stdout.write("\\nline5");' - 'sys.stdout.flush();' - 'sys.stdout.write("\\nline6");'], - stdout=subprocess.PIPE, - universal_newlines=1) + 'import sys,os;' + SETBINARY + + 'sys.stdout.write("line1\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line2\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line3\\r\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line4\\r");' + 'sys.stdout.flush();' + 'sys.stdout.write("\\nline5");' + 'sys.stdout.flush();' + 'sys.stdout.write("\\nline6");'], + stdout=subprocess.PIPE, + universal_newlines=1) stdout = p.stdout.read() self.assertEqual(stdout, "line1\nline2\nline3\nline4\nline5\nline6") def test_universal_newlines_communicate(self): # universal newlines through communicate() p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' + SETBINARY + - 'sys.stdout.write("line1\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line2\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line3\\r\\n");' - 'sys.stdout.flush();' - 'sys.stdout.write("line4\\r");' - 'sys.stdout.flush();' - 'sys.stdout.write("\\nline5");' - 'sys.stdout.flush();' - 'sys.stdout.write("\\nline6");'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=1) + 'import sys,os;' + SETBINARY + + 'sys.stdout.write("line1\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line2\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line3\\r\\n");' + 'sys.stdout.flush();' + 'sys.stdout.write("line4\\r");' + 'sys.stdout.flush();' + 'sys.stdout.write("\\nline5");' + 'sys.stdout.flush();' + 'sys.stdout.write("\\nline6");'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=1) (stdout, stderr) = p.communicate() self.assertEqual(stdout, "line1\nline2\nline3\nline4\nline5\nline6") def test_no_leaking(self): # Make sure we leak no resources - if not hasattr(test_support, "is_resource_enabled") \ - or test_support.is_resource_enabled("subprocess") and not mswindows: + if (not hasattr(test_support, "is_resource_enabled") or + test_support.is_resource_enabled("subprocess") and not mswindows): max_handles = 1026 # too much for most UNIX systems else: max_handles = 65 for i in range(max_handles): p = subprocess.Popen([sys.executable, "-c", - "import sys;sys.stdout.write(sys.stdin.read())"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + "import sys;" + "sys.stdout.write(sys.stdin.read())"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) data = p.communicate("lime")[0] self.assertEqual(data, b"lime") @@ -516,10 +519,11 @@ class ProcessTestCase(unittest.TestCase): def test_preexec(self): # preexec function p = subprocess.Popen([sys.executable, "-c", - 'import sys,os;' \ - 'sys.stdout.write(os.getenv("FRUIT"))'], - stdout=subprocess.PIPE, - preexec_fn=lambda: os.putenv("FRUIT", "apple")) + 'import sys,os;' + 'sys.stdout.write(os.getenv("FRUIT"))'], + stdout=subprocess.PIPE, + preexec_fn=lambda: os.putenv("FRUIT", + "apple")) self.assertEqual(p.stdout.read(), b"apple") def test_args_string(self): @@ -654,4 +658,4 @@ def test_main(): test_support.reap_children() if __name__ == "__main__": - test_main() + unittest.main() # XXX test_main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8741830694..ad7082eae3 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -300,7 +300,7 @@ class SysModuleTest(unittest.TestCase): def test_intern(self): self.assertRaises(TypeError, sys.intern) - s = str8(b"never interned before") + s = "never interned before" self.assert_(sys.intern(s) is s) s2 = s.swapcase().swapcase() self.assert_(sys.intern(s2) is s) @@ -310,28 +310,11 @@ class SysModuleTest(unittest.TestCase): # We don't want them in the interned dict and if they aren't # actually interned, we don't want to create the appearance # that they are by allowing intern() to succeeed. - class S(str8): + class S(str): def __hash__(self): return 123 - self.assertRaises(TypeError, sys.intern, S(b"abc")) - - s = "never interned as unicode before" - self.assert_(sys.intern(s) is s) - s2 = s.swapcase().swapcase() - self.assert_(sys.intern(s2) is s) - - class U(str): - def __hash__(self): - return 123 - - self.assertRaises(TypeError, sys.intern, U("abc")) - - # It's still safe to pass these strings to routines that - # call intern internally, e.g. PyObject_SetAttr(). - s = U("abc") - setattr(s, s, s) - self.assertEqual(getattr(s, s), s) + self.assertRaises(TypeError, sys.intern, S("abc")) def test_main(): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 49708458b8..d53317fae2 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -6,7 +6,11 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """#" -import unittest, sys, struct, codecs, new +import codecs +import struct +import sys +import unittest +import warnings from test import test_support, string_tests # Error handling (bad decoder return) @@ -34,6 +38,12 @@ class UnicodeTest( ): type2test = str + def setUp(self): + self.warning_filters = warnings.filters[:] + + def tearDown(self): + warnings.filters = self.warning_filters + def checkequalnofix(self, result, object, methodname, *args): method = getattr(object, methodname) realresult = method(*args) @@ -192,8 +202,10 @@ class UnicodeTest( self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd']) self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd')) self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz')) - self.checkequalnofix('1 2 foo', ' ', 'join', [1, 2, MyWrapper('foo')]) - self.checkraises(TypeError, ' ', 'join', [1, 2, 3, bytes()]) + self.checkraises(TypeError, ' ', 'join', ['1', '2', MyWrapper('foo')]) + self.checkraises(TypeError, ' ', 'join', ['1', '2', '3', bytes()]) + self.checkraises(TypeError, ' ', 'join', [1, 2, 3]) + self.checkraises(TypeError, ' ', 'join', ['1', '2', 3]) def test_replace(self): string_tests.CommonTest.test_replace(self) @@ -202,9 +214,12 @@ class UnicodeTest( self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) self.assertRaises(TypeError, 'replace'.replace, "r", 42) - def test_str8_comparison(self): - self.assertEqual('abc' == str8(b'abc'), False) - self.assertEqual('abc' != str8(b'abc'), True) + def test_bytes_comparison(self): + warnings.simplefilter('ignore', BytesWarning) + self.assertEqual('abc' == b'abc', False) + self.assertEqual('abc' != b'abc', True) + self.assertEqual('abc' == buffer(b'abc'), False) + self.assertEqual('abc' != buffer(b'abc'), True) def test_comparison(self): # Comparisons: @@ -661,16 +676,6 @@ class UnicodeTest( 'strings are converted to unicode' ) - class UnicodeCompat: - def __init__(self, x): - self.x = x - def __unicode__(self): - return self.x - - self.assertEqual( - str(UnicodeCompat('__unicode__ compatible objects are recognized')), - '__unicode__ compatible objects are recognized') - class StringCompat: def __init__(self, x): self.x = x @@ -688,14 +693,6 @@ class UnicodeTest( self.assertEqual(str(o), 'unicode(obj) is compatible to str()') self.assertEqual(str(o), 'unicode(obj) is compatible to str()') - # %-formatting and .__unicode__() - self.assertEqual('%s' % - UnicodeCompat("u'%s' % obj uses obj.__unicode__()"), - "u'%s' % obj uses obj.__unicode__()") - self.assertEqual('%s' % - UnicodeCompat("u'%s' % obj falls back to obj.__str__()"), - "u'%s' % obj falls back to obj.__str__()") - for obj in (123, 123.45, 123): self.assertEqual(str(obj), str(str(obj))) @@ -970,48 +967,46 @@ class UnicodeTest( return "foo" class Foo1: - def __unicode__(self): + def __str__(self): return "foo" class Foo2(object): - def __unicode__(self): + def __str__(self): return "foo" class Foo3(object): - def __unicode__(self): + def __str__(self): return "foo" class Foo4(str): - def __unicode__(self): + def __str__(self): return "foo" class Foo5(str): - def __unicode__(self): + def __str__(self): return "foo" class Foo6(str): def __str__(self): return "foos" - def __unicode__(self): + def __str__(self): return "foou" class Foo7(str): def __str__(self): return "foos" - def __unicode__(self): + def __str__(self): return "foou" class Foo8(str): def __new__(cls, content=""): return str.__new__(cls, 2*content) - def __unicode__(self): + def __str__(self): return self class Foo9(str): def __str__(self): - return "string" - def __unicode__(self): return "not unicode" self.assertEqual(str(Foo0()), "foo") diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index ff2dcf5756..ba97e5d923 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -176,7 +176,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): def test_east_asian_width(self): eaw = self.db.east_asian_width - self.assertRaises(TypeError, eaw, str8(b'a')) + self.assertRaises(TypeError, eaw, b'a') + self.assertRaises(TypeError, eaw, buffer()) self.assertRaises(TypeError, eaw, '') self.assertRaises(TypeError, eaw, 'ra') self.assertEqual(eaw('\x1e'), 'N') diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 5cbc652208..393e99711e 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -999,7 +999,8 @@ class HandlerTests(unittest.TestCase): self.assertEqual(len(http_handler.requests), 2) self.assertFalse(http_handler.requests[0].has_header(auth_header)) userpass = bytes('%s:%s' % (user, password), "ascii") - auth_hdr_value = 'Basic ' + str(base64.encodestring(userpass)).strip() + auth_hdr_value = ('Basic ' + + base64.encodestring(userpass).strip().decode()) self.assertEqual(http_handler.requests[1].get_header(auth_header), auth_hdr_value) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 4788d3a045..1a6b7f33bc 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -184,7 +184,7 @@ def parseliteral(): >>> print(ET.tostring(element)) <html><body>text</body></html> >>> print(repr(ET.tostring(element, "ascii"))) - b'<?xml version=\'1.0\' encoding=\'ascii\'?>\n<html><body>text</body></html>' + b"<?xml version='1.0' encoding='ascii'?>\n<html><body>text</body></html>" >>> _, ids = ET.XMLID("<html><body>text</body></html>") >>> len(ids) 0 diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 86f1853084..c8eec40bec 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -176,7 +176,7 @@ def parseliteral(): >>> print(ET.tostring(element)) <html><body>text</body></html> >>> print(repr(ET.tostring(element, "ascii"))) - b'<?xml version=\'1.0\' encoding=\'ascii\'?>\n<html><body>text</body></html>' + b"<?xml version='1.0' encoding='ascii'?>\n<html><body>text</body></html>" >>> _, ids = ET.XMLID("<html><body>text</body></html>") >>> len(ids) 0 diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 58935b7f03..cb20222de6 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -153,7 +153,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): def testBadMagic(self): # make pyc magic word invalid, forcing loading from .py - badmagic_pyc = bytes(test_pyc) + badmagic_pyc = buffer(test_pyc) badmagic_pyc[0] ^= 0x04 # flip an arbitrary bit files = {TESTMOD + ".py": (NOW, test_src), TESTMOD + pyc_ext: (NOW, badmagic_pyc)} @@ -161,7 +161,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): def testBadMagic2(self): # make pyc magic word invalid, causing an ImportError - badmagic_pyc = bytes(test_pyc) + badmagic_pyc = buffer(test_pyc) badmagic_pyc[0] ^= 0x04 # flip an arbitrary bit files = {TESTMOD + pyc_ext: (NOW, badmagic_pyc)} try: @@ -172,7 +172,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): self.fail("expected ImportError; import from bad pyc") def testBadMTime(self): - badtime_pyc = bytes(test_pyc) + badtime_pyc = buffer(test_pyc) badtime_pyc[7] ^= 0x02 # flip the second bit -- not the first as that one # isn't stored in the .py's mtime in the zip archive. files = {TESTMOD + ".py": (NOW, test_src), diff --git a/Lib/test/testcodec.py b/Lib/test/testcodec.py index 0db18c17ca..77f408b2dc 100644 --- a/Lib/test/testcodec.py +++ b/Lib/test/testcodec.py @@ -36,7 +36,7 @@ def getregentry(): decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x78: "abc", # 1-n decoding mapping - str8(b"abc"): 0x0078,# 1-n encoding mapping + b"abc": 0x0078,# 1-n encoding mapping 0x01: None, # decoding mapping to <undefined> 0x79: "", # decoding mapping to <remove character> }) diff --git a/Lib/urllib.py b/Lib/urllib.py index b2542fce24..81a8cd69e4 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -925,22 +925,14 @@ class addinfourl(addbase): # unquote('abc%20def') -> 'abc def' # quote('abc def') -> 'abc%20def') -try: - str -except NameError: - def _is_unicode(x): - return 0 -else: - def _is_unicode(x): - return isinstance(x, str) - def toBytes(url): """toBytes(u"URL") --> 'URL'.""" # Most URL schemes require ASCII. If that changes, the conversion - # can be relaxed - if _is_unicode(url): + # can be relaxed. + # XXX get rid of toBytes() + if isinstance(url, str): try: - url = url.encode("ASCII") + url = url.encode("ASCII").decode() except UnicodeError: raise UnicodeError("URL " + repr(url) + " contains non-ASCII characters") @@ -1203,7 +1195,7 @@ def urlencode(query,doseq=0): if isinstance(v, str): v = quote_plus(v) l.append(k + '=' + v) - elif _is_unicode(v): + elif isinstance(v, str): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError diff --git a/Lib/urllib2.py b/Lib/urllib2.py index d7679fc5eb..fb2c3033bd 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -802,7 +802,7 @@ class AbstractBasicAuthHandler: user, pw = self.passwd.find_user_password(realm, host) if pw is not None: raw = "%s:%s" % (user, pw) - auth = 'Basic %s' % str(base64.b64encode(raw)).strip() + auth = 'Basic %s' % base64.b64encode(raw).strip().decode() if req.headers.get(self.auth_header, None) == auth: return None req.add_header(self.auth_header, auth) diff --git a/Lib/uuid.py b/Lib/uuid.py index 74d4a7a22b..06115c70ba 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -234,7 +234,7 @@ class UUID(object): @property def bytes(self): - bytes = b'' + bytes = buffer() for shift in range(0, 128, 8): bytes.insert(0, (self.int >> shift) & 0xff) return bytes @@ -548,7 +548,7 @@ def uuid4(): return UUID(bytes=os.urandom(16), version=4) except: import random - bytes = [chr(random.randrange(256)) for i in range(16)] + bytes = bytes_(random.randrange(256) for i in range(16)) return UUID(bytes=bytes, version=4) def uuid5(namespace, name): diff --git a/Lib/xmlrpclib.py b/Lib/xmlrpclib.py index efb0a7b48f..da96420311 100644 --- a/Lib/xmlrpclib.py +++ b/Lib/xmlrpclib.py @@ -622,7 +622,7 @@ class Marshaller: write("<value><string>") write(escape(value)) write("</string></value>\n") - dispatch[str8] = dump_string + dispatch[bytes] = dump_string def dump_unicode(self, value, write, escape=escape): write("<value><string>") diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 088f4a06a4..97f639d57c 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -678,7 +678,7 @@ class ZipFile: print(centdir) filename = fp.read(centdir[_CD_FILENAME_LENGTH]) # Create ZipInfo instance to store file information - x = ZipInfo(str(filename)) + x = ZipInfo(filename.decode("utf-8")) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) total = (total + centdir[_CD_FILENAME_LENGTH] |