Patch #1436130: codecs.lookup() now returns a CodecInfo object (a subclass

of tuple) that provides incremental decoders and encoders (a way to use stateful codecs without the stream API). Functions codecs.getincrementaldecoder() and codecs.getincrementalencoder() have been added.
author: Walter Dörwald <walter@livinglogic.de> 2006-03-15 11:35:15 +0000
committer: Walter Dörwald <walter@livinglogic.de> 2006-03-15 11:35:15 +0000
commit: 3f980a10690922d8ed0c46fb9b8f756a275d0a8a (patch)
tree: c730d5cbc4691f49e2375c1717707ac10e050f40 /Lib/codecs.py
parent: fa4e011e4a0ce1fba35e6bef1c254576fd50b20f (diff)
download: cpython-3f980a10690922d8ed0c46fb9b8f756a275d0a8a.tar.gz
1 files changed, 172 insertions, 11 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 6895a228fe..28856c714e 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,6 +73,23 @@ BOM64_BE = BOM_UTF32_BE
 
 ### Codec base classes (defining the API)
 
+class CodecInfo(tuple):
+
+    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
+        incrementalencoder=None, incrementaldecoder=None, name=None):
+        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
+        self.name = name
+        self.encode = encode
+        self.decode = decode
+        self.incrementalencoder = incrementalencoder
+        self.incrementaldecoder = incrementaldecoder
+        self.streamwriter = streamwriter
+        self.streamreader = streamreader
+        return self
+
+    def __repr__(self):
+        return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+
 class Codec:
 
     """ Defines the interface for stateless encoders/decoders.
@@ -137,6 +154,88 @@ class Codec:
         """
         raise NotImplementedError
 
+class IncrementalEncoder(object):
+    """
+    A IncrementalEncoder encodes an input in multiple steps. The input can be
+    passed piece by piece to the encode() method. The IncrementalEncoder remembers
+    the state of the Encoding process between calls to encode().
+    """
+    def __init__(self, errors='strict'):
+        """
+        Creates a IncrementalEncoder instance.
+
+        The IncrementalEncoder may use different error handling schemes by
+        providing the errors keyword argument. See the module docstring
+        for a list of possible values.
+        """
+        self.errors = errors
+        self.buffer = ""
+
+    def encode(self, input, final=False):
+        """
+        Encodes input and returns the resulting object.
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """
+        Resets the encoder to the initial state.
+        """
+
+class IncrementalDecoder(object):
+    """
+    An IncrementalDecoder decodes an input in multiple steps. The input can be
+    passed piece by piece to the decode() method. The IncrementalDecoder
+    remembers the state of the decoding process between calls to decode().
+    """
+    def __init__(self, errors='strict'):
+        """
+        Creates a IncrementalDecoder instance.
+
+        The IncrementalDecoder may use different error handling schemes by
+        providing the errors keyword argument. See the module docstring
+        for a list of possible values.
+        """
+        self.errors = errors
+
+    def decode(self, input, final=False):
+        """
+        Decodes input and returns the resulting object.
+        """
+        raise NotImplementedError
+
+    def reset(self):
+        """
+        Resets the decoder to the initial state.
+        """
+
+class BufferedIncrementalDecoder(IncrementalDecoder):
+    """
+    This subclass of IncrementalDecoder can be used as the baseclass for an
+    incremental decoder if the decoder must be able to handle incomplete byte
+    sequences.
+    """
+    def __init__(self, errors='strict'):
+        IncrementalDecoder.__init__(self, errors)
+        self.buffer = "" # undecoded input that is kept between calls to decode()
+
+    def _buffer_decode(self, input, errors, final):
+        # Overwrite this method in subclasses: It must decode input
+        # and return an (output, length consumed) tuple
+        raise NotImplementedError
+
+    def decode(self, input, final=False):
+        # decode input (taking the buffer into account)
+        data = self.buffer + input
+        (result, consumed) = self._buffer_decode(data, self.errors, final)
+        # keep undecoded input until the next call
+        self.buffer = data[consumed:]
+        return result
+
+    def reset(self):
+        IncrementalDecoder.reset(self)
+        self.bytebuffer = ""
+
 #
 # The StreamWriter and StreamReader class provide generic working
 # interfaces which can be used to implement new encoding submodules
@@ -666,8 +765,8 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
     file = __builtin__.open(filename, mode, buffering)
     if encoding is None:
         return file
-    (e, d, sr, sw) = lookup(encoding)
-    srw = StreamReaderWriter(file, sr, sw, errors)
+    info = lookup(encoding)
+    srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
     # Add attributes to simplify introspection
     srw.encoding = encoding
     return srw
@@ -699,11 +798,9 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
     """
     if file_encoding is None:
         file_encoding = data_encoding
-    encode, decode = lookup(data_encoding)[:2]
-    Reader, Writer = lookup(file_encoding)[2:]
-    sr = StreamRecoder(file,
-                       encode, decode, Reader, Writer,
-                       errors)
+    info = lookup(data_encoding)
+    sr = StreamRecoder(file, info.encode, info.decode,
+                       info.streamreader, info.streamwriter, errors)
     # Add attributes to simplify introspection
     sr.data_encoding = data_encoding
     sr.file_encoding = file_encoding
@@ -719,7 +816,7 @@ def getencoder(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[0]
+    return lookup(encoding).encode
 
 def getdecoder(encoding):
 
@@ -729,7 +826,35 @@ def getdecoder(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[1]
+    return lookup(encoding).decode
+
+def getincrementalencoder(encoding):
+
+    """ Lookup up the codec for the given encoding and return
+        its IncrementalEncoder class or factory function.
+
+        Raises a LookupError in case the encoding cannot be found
+        or the codecs doesn't provide an incremental encoder.
+
+    """
+    encoder = lookup(encoding).incrementalencoder
+    if encoder is None:
+        raise LookupError(encoding)
+    return encoder
+
+def getincrementaldecoder(encoding):
+
+    """ Lookup up the codec for the given encoding and return
+        its IncrementalDecoder class or factory function.
+
+        Raises a LookupError in case the encoding cannot be found
+        or the codecs doesn't provide an incremental decoder.
+
+    """
+    decoder = lookup(encoding).incrementaldecoder
+    if decoder is None:
+        raise LookupError(encoding)
+    return decoder
 
 def getreader(encoding):
 
@@ -739,7 +864,7 @@ def getreader(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[2]
+    return lookup(encoding).streamreader
 
 def getwriter(encoding):
 
@@ -749,7 +874,43 @@ def getwriter(encoding):
         Raises a LookupError in case the encoding cannot be found.
 
     """
-    return lookup(encoding)[3]
+    return lookup(encoding).streamwriter
+
+def iterencode(iterator, encoding, errors='strict', **kwargs):
+    """
+    Encoding iterator.
+
+    Encodes the input strings from the iterator using a IncrementalEncoder.
+
+    errors and kwargs are passed through to the IncrementalEncoder
+    constructor.
+    """
+    encoder = getincrementalencoder(encoding)(errors, **kwargs)
+    for input in iterator:
+        output = encoder.encode(input)
+        if output:
+            yield output
+    output = encoder.encode("", True)
+    if output:
+        yield output
+
+def iterdecode(iterator, encoding, errors='strict', **kwargs):
+    """
+    Decoding iterator.
+
+    Decodes the input strings from the iterator using a IncrementalDecoder.
+
+    errors and kwargs are passed through to the IncrementalDecoder
+    constructor.
+    """
+    decoder = getincrementaldecoder(encoding)(errors, **kwargs)
+    for input in iterator:
+        output = decoder.decode(input)
+        if output:
+            yield output
+    output = decoder.decode("", True)
+    if output:
+        yield output
 
 ### Helpers for charmap-based codecs
author	Walter Dörwald <walter@livinglogic.de>	2006-03-15 11:35:15 +0000
committer	Walter Dörwald <walter@livinglogic.de>	2006-03-15 11:35:15 +0000
commit	3f980a10690922d8ed0c46fb9b8f756a275d0a8a (patch)
tree	c730d5cbc4691f49e2375c1717707ac10e050f40 /Lib/codecs.py
parent	fa4e011e4a0ce1fba35e6bef1c254576fd50b20f (diff)
download	cpython-3f980a10690922d8ed0c46fb9b8f756a275d0a8a.tar.gz