summaryrefslogtreecommitdiff
path: root/Lib/codecs.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2006-03-15 11:35:15 +0000
committerWalter Dörwald <walter@livinglogic.de>2006-03-15 11:35:15 +0000
commit3f980a10690922d8ed0c46fb9b8f756a275d0a8a (patch)
treec730d5cbc4691f49e2375c1717707ac10e050f40 /Lib/codecs.py
parentfa4e011e4a0ce1fba35e6bef1c254576fd50b20f (diff)
downloadcpython-3f980a10690922d8ed0c46fb9b8f756a275d0a8a.tar.gz
Patch #1436130: codecs.lookup() now returns a CodecInfo object (a subclass
of tuple) that provides incremental decoders and encoders (a way to use stateful codecs without the stream API). Functions codecs.getincrementaldecoder() and codecs.getincrementalencoder() have been added.
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py183
1 files changed, 172 insertions, 11 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 6895a228fe..28856c714e 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,6 +73,23 @@ BOM64_BE = BOM_UTF32_BE
### Codec base classes (defining the API)
+class CodecInfo(tuple):
+
+ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
+ incrementalencoder=None, incrementaldecoder=None, name=None):
+ self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
+ self.name = name
+ self.encode = encode
+ self.decode = decode
+ self.incrementalencoder = incrementalencoder
+ self.incrementaldecoder = incrementaldecoder
+ self.streamwriter = streamwriter
+ self.streamreader = streamreader
+ return self
+
+ def __repr__(self):
+ return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+
class Codec:
""" Defines the interface for stateless encoders/decoders.
@@ -137,6 +154,88 @@ class Codec:
"""
raise NotImplementedError
+class IncrementalEncoder(object):
+ """
+ A IncrementalEncoder encodes an input in multiple steps. The input can be
+ passed piece by piece to the encode() method. The IncrementalEncoder remembers
+ the state of the Encoding process between calls to encode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Creates a IncrementalEncoder instance.
+
+ The IncrementalEncoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+ self.buffer = ""
+
+ def encode(self, input, final=False):
+ """
+ Encodes input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Resets the encoder to the initial state.
+ """
+
+class IncrementalDecoder(object):
+ """
+ An IncrementalDecoder decodes an input in multiple steps. The input can be
+ passed piece by piece to the decode() method. The IncrementalDecoder
+ remembers the state of the decoding process between calls to decode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Creates a IncrementalDecoder instance.
+
+ The IncrementalDecoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+
+ def decode(self, input, final=False):
+ """
+ Decodes input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Resets the decoder to the initial state.
+ """
+
+class BufferedIncrementalDecoder(IncrementalDecoder):
+ """
+ This subclass of IncrementalDecoder can be used as the baseclass for an
+ incremental decoder if the decoder must be able to handle incomplete byte
+ sequences.
+ """
+ def __init__(self, errors='strict'):
+ IncrementalDecoder.__init__(self, errors)
+ self.buffer = "" # undecoded input that is kept between calls to decode()
+
+ def _buffer_decode(self, input, errors, final):
+ # Overwrite this method in subclasses: It must decode input
+ # and return an (output, length consumed) tuple
+ raise NotImplementedError
+
+ def decode(self, input, final=False):
+ # decode input (taking the buffer into account)
+ data = self.buffer + input
+ (result, consumed) = self._buffer_decode(data, self.errors, final)
+ # keep undecoded input until the next call
+ self.buffer = data[consumed:]
+ return result
+
+ def reset(self):
+ IncrementalDecoder.reset(self)
+ self.bytebuffer = ""
+
#
# The StreamWriter and StreamReader class provide generic working
# interfaces which can be used to implement new encoding submodules
@@ -666,8 +765,8 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
file = __builtin__.open(filename, mode, buffering)
if encoding is None:
return file
- (e, d, sr, sw) = lookup(encoding)
- srw = StreamReaderWriter(file, sr, sw, errors)
+ info = lookup(encoding)
+ srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
# Add attributes to simplify introspection
srw.encoding = encoding
return srw
@@ -699,11 +798,9 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
"""
if file_encoding is None:
file_encoding = data_encoding
- encode, decode = lookup(data_encoding)[:2]
- Reader, Writer = lookup(file_encoding)[2:]
- sr = StreamRecoder(file,
- encode, decode, Reader, Writer,
- errors)
+ info = lookup(data_encoding)
+ sr = StreamRecoder(file, info.encode, info.decode,
+ info.streamreader, info.streamwriter, errors)
# Add attributes to simplify introspection
sr.data_encoding = data_encoding
sr.file_encoding = file_encoding
@@ -719,7 +816,7 @@ def getencoder(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
- return lookup(encoding)[0]
+ return lookup(encoding).encode
def getdecoder(encoding):
@@ -729,7 +826,35 @@ def getdecoder(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
- return lookup(encoding)[1]
+ return lookup(encoding).decode
+
+def getincrementalencoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalEncoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental encoder.
+
+ """
+ encoder = lookup(encoding).incrementalencoder
+ if encoder is None:
+ raise LookupError(encoding)
+ return encoder
+
+def getincrementaldecoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalDecoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental decoder.
+
+ """
+ decoder = lookup(encoding).incrementaldecoder
+ if decoder is None:
+ raise LookupError(encoding)
+ return decoder
def getreader(encoding):
@@ -739,7 +864,7 @@ def getreader(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
- return lookup(encoding)[2]
+ return lookup(encoding).streamreader
def getwriter(encoding):
@@ -749,7 +874,43 @@ def getwriter(encoding):
Raises a LookupError in case the encoding cannot be found.
"""
- return lookup(encoding)[3]
+ return lookup(encoding).streamwriter
+
+def iterencode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Encoding iterator.
+
+ Encodes the input strings from the iterator using a IncrementalEncoder.
+
+ errors and kwargs are passed through to the IncrementalEncoder
+ constructor.
+ """
+ encoder = getincrementalencoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = encoder.encode(input)
+ if output:
+ yield output
+ output = encoder.encode("", True)
+ if output:
+ yield output
+
+def iterdecode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Decoding iterator.
+
+ Decodes the input strings from the iterator using a IncrementalDecoder.
+
+ errors and kwargs are passed through to the IncrementalDecoder
+ constructor.
+ """
+ decoder = getincrementaldecoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = decoder.decode(input)
+ if output:
+ yield output
+ output = decoder.decode("", True)
+ if output:
+ yield output
### Helpers for charmap-based codecs