summaryrefslogtreecommitdiff
path: root/docutils
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2022-07-04 21:06:38 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2022-07-04 21:06:38 +0000
commit350808ec2c71144c51d0b2219edd9d889d3d2cf7 (patch)
treec790ff68be64e5f8cff94fb7050905c4a85d9ce1 /docutils
parent5d9022ac0e6215adb53f462e9ef89998106f22fb (diff)
downloaddocutils-350808ec2c71144c51d0b2219edd9d889d3d2cf7.tar.gz
Fix handling of UTF-16 encoded source without trailing newline.
Decoding a UTF-16 encoded source with BOM after auto-detection of the encoding failed. The newline normalization in `docutils.FileInput.read()` produced invalid UTF-16 because it added one byte (binary ASCII newline). Postponing the newline normalization after the decoding step solves this problem. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk/docutils@9099 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r--docutils/io.py8
1 files changed, 3 insertions, 5 deletions
diff --git a/docutils/io.py b/docutils/io.py
index 9eac03128..4cbf4d7e6 100644
--- a/docutils/io.py
+++ b/docutils/io.py
@@ -383,8 +383,6 @@ class FileInput(Input):
if self.source is sys.stdin:
# read as binary data to circumvent auto-decoding
data = self.source.buffer.read()
- # normalize newlines
- data = b'\n'.join(data.splitlines()+[b''])
else:
data = self.source.read()
except (UnicodeError, LookupError):
@@ -393,14 +391,14 @@ class FileInput(Input):
b_source = open(self.source_path, 'rb')
data = b_source.read()
b_source.close()
- # normalize newlines
- data = b'\n'.join(data.splitlines()+[b''])
else:
raise
finally:
if self.autoclose:
self.close()
- return self.decode(data)
+ data = self.decode(data)
+ # normalise newlines
+ return '\n'.join(data.splitlines()+[''])
def readlines(self):
"""