From a977329b6fb0e4c95cabb9043794de69b27a1099 Mon Sep 17 00:00:00 2001
From: Thomas Wouters <thomas@python.org>
Date: Fri, 21 Apr 2006 09:43:23 +0000
Subject: Merge part of the trunk changes into the p3yk branch. This merges
 from 43030 (branch-creation time) up to 43067. 43068 and 43069 contain a
 little swapping action between re.py and sre.py, and this mightily confuses
 svn merge, so later changes are going in separately.

This merge should break no additional tests.

The last-merged revision is going in a 'last_merge' property on '.' (the
branch directory.) Arbitrarily chosen, really; if there's a BCP for this, I
couldn't find it, but we can easily change it afterwards ;)
---
 Lib/encodings/utf_8_sig.py | 47 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

(limited to 'Lib/encodings/utf_8_sig.py')

diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index fa437e6929..cd14ab0765 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -22,6 +22,42 @@ def decode(input, errors='strict'):
     (output, consumed) = codecs.utf_8_decode(input, errors, True)
     return (output, consumed+prefix)
 
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        codecs.IncrementalEncoder.__init__(self, errors)
+        self.first = True
+
+    def encode(self, input, final=False):
+        if self.first:
+            self.first = False
+            return codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0]
+        else:
+            return codecs.utf_8_encode(input, errors)[0]
+
+    def reset(self):
+        codecs.IncrementalEncoder.reset(self)
+        self.first = True
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    def __init__(self, errors='strict'):
+        codecs.BufferedIncrementalDecoder.__init__(self, errors)
+        self.first = True
+
+    def _buffer_decode(self, input, errors, final):
+        if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
+            if len(input) < 3:
+                # not enough data to decide if this really is a BOM
+                # => try again on the next call
+                return (u"", 0)
+            (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
+            self.first = False
+            return (output, consumed+3)
+        return codecs.utf_8_decode(input, errors, final)
+
+    def reset(self):
+        codecs.BufferedIncrementalDecoder.reset(self)
+        self.first = True
+
 class StreamWriter(codecs.StreamWriter):
     def reset(self):
         codecs.StreamWriter.reset(self)
@@ -53,5 +89,12 @@ class StreamReader(codecs.StreamReader):
 ### encodings module API
 
 def getregentry():
-
-    return (encode,decode,StreamReader,StreamWriter)
+    return codecs.CodecInfo(
+        name='utf-8-sig',
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
-- 
cgit v1.2.1