#7475: add (un)transform method to bytes/bytearray and str, add back codecs that can be used with them from Python 2.

author: Georg Brandl <georg@python.org> 2010-12-02 18:06:51 +0000
committer: Georg Brandl <georg@python.org> 2010-12-02 18:06:51 +0000
commit: 02524629f39bb70f4ea00ab8e64d694e08719227 (patch)
tree: de93598f38e1b4d84eaa743b31df1a79a21c957c /Lib/codecs.py
parent: de0ab5eab31c9ea9a628718778b0dc57df0d8470 (diff)
download: cpython-git-02524629f39bb70f4ea00ab8e64d694e08719227.tar.gz
1 files changed, 13 insertions, 10 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index f6c2448168..b150d64d53 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -396,6 +396,8 @@ class StreamWriter(Codec):
 
 class StreamReader(Codec):
 
+    charbuffertype = str
+
     def __init__(self, stream, errors='strict'):
 
         """ Creates a StreamReader instance.
@@ -417,9 +419,8 @@ class StreamReader(Codec):
         self.stream = stream
         self.errors = errors
         self.bytebuffer = b""
-        # For str->str decoding this will stay a str
-        # For str->unicode decoding the first read will promote it to unicode
-        self.charbuffer = ""
+        self._empty_charbuffer = self.charbuffertype()
+        self.charbuffer = self._empty_charbuffer
         self.linebuffer = None
 
     def decode(self, input, errors='strict'):
@@ -455,7 +456,7 @@ class StreamReader(Codec):
         """
         # If we have lines cached, first merge them back into characters
         if self.linebuffer:
-            self.charbuffer = "".join(self.linebuffer)
+            self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
             self.linebuffer = None
 
         # read until we get the required number of characters (if available)
@@ -498,7 +499,7 @@ class StreamReader(Codec):
         if chars < 0:
             # Return everything we've got
             result = self.charbuffer
-            self.charbuffer = ""
+            self.charbuffer = self._empty_charbuffer
         else:
             # Return the first chars characters
             result = self.charbuffer[:chars]
@@ -529,7 +530,7 @@ class StreamReader(Codec):
             return line
 
         readsize = size or 72
-        line = ""
+        line = self._empty_charbuffer
         # If size is given, we call read() only once
         while True:
             data = self.read(readsize, firstline=True)
@@ -537,7 +538,8 @@ class StreamReader(Codec):
                 # If we're at a "\r" read one extra character (which might
                 # be a "\n") to get a proper line ending. If the stream is
                 # temporarily exhausted we return the wrong line ending.
-                if data.endswith("\r"):
+                if (isinstance(data, str) and data.endswith("\r")) or \
+                   (isinstance(data, bytes) and data.endswith(b"\r")):
                     data += self.read(size=1, chars=1)
 
             line += data
@@ -563,7 +565,8 @@ class StreamReader(Codec):
                 line0withoutend = lines[0].splitlines(False)[0]
                 if line0withend != line0withoutend: # We really have a line end
                     # Put the rest back together and keep it until the next call
-                    self.charbuffer = "".join(lines[1:]) + self.charbuffer
+                    self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
+                                      self.charbuffer
                     if keepends:
                         line = line0withend
                     else:
@@ -574,7 +577,7 @@ class StreamReader(Codec):
                 if line and not keepends:
                     line = line.splitlines(False)[0]
                 break
-            if readsize<8000:
+            if readsize < 8000:
                 readsize *= 2
         return line
 
@@ -603,7 +606,7 @@ class StreamReader(Codec):
 
         """
         self.bytebuffer = b""
-        self.charbuffer = ""
+        self.charbuffer = self._empty_charbuffer
         self.linebuffer = None
 
     def seek(self, offset, whence=0):
author	Georg Brandl <georg@python.org>	2010-12-02 18:06:51 +0000
committer	Georg Brandl <georg@python.org>	2010-12-02 18:06:51 +0000
commit	02524629f39bb70f4ea00ab8e64d694e08719227 (patch)
tree	de93598f38e1b4d84eaa743b31df1a79a21c957c /Lib/codecs.py
parent	de0ab5eab31c9ea9a628718778b0dc57df0d8470 (diff)
download	cpython-git-02524629f39bb70f4ea00ab8e64d694e08719227.tar.gz