Fix handling of UTF-16 encoded source without trailing newline.

Decoding a UTF-16 encoded source with BOM after auto-detection of the encoding failed. The newline normalization in `docutils.FileInput.read()` produced invalid UTF-16 because it added one byte (binary ASCII newline). Postponing the newline normalization after the decoding step solves this problem. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk/docutils@9099 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2022-07-04 21:06:38 +0000
committer: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2022-07-04 21:06:38 +0000
commit: 350808ec2c71144c51d0b2219edd9d889d3d2cf7 (patch)
tree: c790ff68be64e5f8cff94fb7050905c4a85d9ce1 /docutils
parent: 5d9022ac0e6215adb53f462e9ef89998106f22fb (diff)
download: docutils-350808ec2c71144c51d0b2219edd9d889d3d2cf7.tar.gz
1 files changed, 3 insertions, 5 deletions
diff --git a/docutils/io.py b/docutils/io.py
index 9eac03128..4cbf4d7e6 100644
--- a/docutils/io.py
+++ b/docutils/io.py
@@ -383,8 +383,6 @@ class FileInput(Input):
             if self.source is sys.stdin:
                 # read as binary data to circumvent auto-decoding
                 data = self.source.buffer.read()
-                # normalize newlines
-                data = b'\n'.join(data.splitlines()+[b''])
             else:
                 data = self.source.read()
         except (UnicodeError, LookupError):
@@ -393,14 +391,14 @@ class FileInput(Input):
                 b_source = open(self.source_path, 'rb')
                 data = b_source.read()
                 b_source.close()
-                # normalize newlines
-                data = b'\n'.join(data.splitlines()+[b''])
             else:
                 raise
         finally:
             if self.autoclose:
                 self.close()
-        return self.decode(data)
+        data = self.decode(data)
+        # normalise newlines
+        return '\n'.join(data.splitlines()+[''])
 
     def readlines(self):
         """
author	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2022-07-04 21:06:38 +0000
committer	milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2022-07-04 21:06:38 +0000
commit	350808ec2c71144c51d0b2219edd9d889d3d2cf7 (patch)
tree	c790ff68be64e5f8cff94fb7050905c4a85d9ce1 /docutils
parent	5d9022ac0e6215adb53f462e9ef89998106f22fb (diff)
download	docutils-350808ec2c71144c51d0b2219edd9d889d3d2cf7.tar.gz