diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-07-04 21:06:38 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2022-07-04 21:06:38 +0000 |
| commit | 350808ec2c71144c51d0b2219edd9d889d3d2cf7 (patch) | |
| tree | c790ff68be64e5f8cff94fb7050905c4a85d9ce1 /docutils | |
| parent | 5d9022ac0e6215adb53f462e9ef89998106f22fb (diff) | |
| download | docutils-350808ec2c71144c51d0b2219edd9d889d3d2cf7.tar.gz | |
Fix handling of UTF-16 encoded source without trailing newline.
Decoding a UTF-16 encoded source with BOM after auto-detection of the
encoding failed.
The newline normalization in `docutils.FileInput.read()`
produced invalid UTF-16 because it added one byte
(binary ASCII newline).
Postponing the newline normalization after the decoding step solves
this problem.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk/docutils@9099 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
| -rw-r--r-- | docutils/io.py | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/docutils/io.py b/docutils/io.py index 9eac03128..4cbf4d7e6 100644 --- a/docutils/io.py +++ b/docutils/io.py @@ -383,8 +383,6 @@ class FileInput(Input): if self.source is sys.stdin: # read as binary data to circumvent auto-decoding data = self.source.buffer.read() - # normalize newlines - data = b'\n'.join(data.splitlines()+[b'']) else: data = self.source.read() except (UnicodeError, LookupError): @@ -393,14 +391,14 @@ class FileInput(Input): b_source = open(self.source_path, 'rb') data = b_source.read() b_source.close() - # normalize newlines - data = b'\n'.join(data.splitlines()+[b'']) else: raise finally: if self.autoclose: self.close() - return self.decode(data) + data = self.decode(data) + # normalise newlines + return '\n'.join(data.splitlines()+['']) def readlines(self): """ |
