diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2010-03-18 22:27:53 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2010-03-18 22:27:53 +0000 |
commit | 6625dcc441b468f22e6b4a35221b907fa23fc633 (patch) | |
tree | 01e77fe55041f098878b992b0fbfcd8c4cba4d79 /docutils/io.py | |
parent | d0f8bcaf1fcc1cb7999022c3fc114b6c2f54eec1 (diff) | |
download | docutils-6625dcc441b468f22e6b4a35221b907fa23fc633.tar.gz |
Fix input/output for py3k.
In py3k, decoding/encoding of text files is done automatically during
read/write operations and the write() method fails with encoded (i.e.
binary) strings. For performance, and universal newline support, we
use the built-in encoding mechanism instead of the "homegrown" one if
py3k is detected; as a consequence, io.FileIO.write() returns a
Unicode string (the correct argument type for file.write() in py3k).
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@6269 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils/io.py')
-rw-r--r-- | docutils/io.py | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/docutils/io.py b/docutils/io.py index 9523142fd..66f22a15d 100644 --- a/docutils/io.py +++ b/docutils/io.py @@ -15,6 +15,7 @@ try: except: pass import re +import codecs from docutils import TransformSpec from docutils._compat import b @@ -89,11 +90,6 @@ class Input(TransformSpec): # data that *IS* UTF-8: encodings = ['utf-8'] try: - # for Python 2.2 compatibility - encodings.append(locale.nl_langinfo(locale.CODESET)) - except: - pass - try: encodings.append(locale.getlocale()[1]) except: pass @@ -127,10 +123,10 @@ class Input(TransformSpec): coding_slug = re.compile(b("coding[:=]\s*([-\w.]+)")) """Encoding declaration pattern.""" - byte_order_marks = ((b('\xef\xbb\xbf'), 'utf-8'), - (b('\xfe\xff'), 'utf-16-be'), - (b('\xff\xfe'), 'utf-16-le'),) - """Sequence of (start_bytes, encoding) tuples to for encoding detection. + byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # actually 'utf-8-sig' + (codecs.BOM_UTF16_BE, 'utf-16-be'), + (codecs.BOM_UTF16_LE, 'utf-16-le'),) + """Sequence of (start_bytes, encoding) tuples for encoding detection. The first bytes of input data are checked against the start_bytes strings. A match indicates the given encoding.""" @@ -227,8 +223,15 @@ class FileInput(Input): self.handle_io_errors = handle_io_errors if source is None: if source_path: + # Specify encoding in Python 3 + if sys.version_info >= (3,0): + kwargs = {'encoding': self.encoding, + 'errors': self.error_handler} + else: + kwargs = {} + try: - self.source = open(source_path, mode) + self.source = open(source_path, mode, **kwargs) except IOError, error: if not handle_io_errors: raise @@ -310,8 +313,17 @@ class FileOutput(Output): pass def open(self): + # Specify encoding in Python 3. + # (Do not use binary mode ('wb') as this prevents the + # conversion of newlines to the system specific default.) + if sys.version_info >= (3,0): + kwargs = {'encoding': self.encoding, + 'errors': self.error_handler} + else: + kwargs = {} + try: - self.destination = open(self.destination_path, 'w') + self.destination = open(self.destination_path, 'w', **kwargs) except IOError, error: if not self.handle_io_errors: raise @@ -323,8 +335,14 @@ class FileOutput(Output): self.opened = 1 def write(self, data): - """Encode `data`, write it to a single file, and return it.""" - output = self.encode(data) + """Encode `data`, write it to a single file, and return it. + + In Python 3, a (unicode) String is returned. + """ + if sys.version_info >= (3,0): + output = data # in py3k, write expects a (Unicode) string + else: + output = self.encode(data) if not self.opened: self.open() try: |