From a977329b6fb0e4c95cabb9043794de69b27a1099 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Fri, 21 Apr 2006 09:43:23 +0000 Subject: Merge part of the trunk changes into the p3yk branch. This merges from 43030 (branch-creation time) up to 43067. 43068 and 43069 contain a little swapping action between re.py and sre.py, and this mightily confuses svn merge, so later changes are going in separately. This merge should break no additional tests. The last-merged revision is going in a 'last_merge' property on '.' (the branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I couldn't find it, but we can easily change it afterwards ;) --- Lib/encodings/utf_8_sig.py | 47 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'Lib/encodings/utf_8_sig.py') diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py index fa437e6929..cd14ab0765 100644 --- a/Lib/encodings/utf_8_sig.py +++ b/Lib/encodings/utf_8_sig.py @@ -22,6 +22,42 @@ def decode(input, errors='strict'): (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix) +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.first = True + + def encode(self, input, final=False): + if self.first: + self.first = False + return codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0] + else: + return codecs.utf_8_encode(input, errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.first = True + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.first = True + + def _buffer_decode(self, input, errors, final): + if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM + if len(input) < 3: + # not enough data to decide if this really is a BOM + # => try again on the next call + return (u"", 0) + (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) + self.first = False + return (output, consumed+3) + return codecs.utf_8_decode(input, errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.first = True + class StreamWriter(codecs.StreamWriter): def reset(self): codecs.StreamWriter.reset(self) @@ -53,5 +89,12 @@ class StreamReader(codecs.StreamReader): ### encodings module API def getregentry(): - - return (encode,decode,StreamReader,StreamWriter) + return codecs.CodecInfo( + name='utf-8-sig', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) -- cgit v1.2.1