From fc59cfc7455fd34ab1e293b4e5ac54cffbc7502f Mon Sep 17 00:00:00 2001 From: milde Date: Mon, 7 Nov 2022 11:00:43 +0000 Subject: Simplify and expand tests for handling the encoding of included files. Use a simpler sample file for signed UTF-16 input. Test encoding auto-detection to ensure it is in sync with encoding handling for the main document. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9220 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- .../test_rst/test_directives/test_include.py | 40 +++++++++++---- .../test_rst/test_directives/test_raw.py | 42 +++++++++------- .../test_rst/test_directives/test_tables.py | 58 ++++++++++++++++++++++ 3 files changed, 114 insertions(+), 26 deletions(-) (limited to 'docutils') diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_include.py b/docutils/test/test_parsers/test_rst/test_directives/test_include.py index 2e1c41b86..32f3f88cd 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_include.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_include.py @@ -55,8 +55,8 @@ include15 = mydir('includes/include15.txt') include16 = mydir('includes/include16.txt') include_literal = mydir('include_literal.txt') include_md = mydir('include.md') -utf_16_file = mydir('utf-16.csv') -utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe " +utf_16_file = 'data/utf-16-le-sig.txt' +utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff " "in position 0: ordinal not in range(128)") nonexistent = os.path.join(os.path.dirname(parsers.rst.states.__file__), 'include', 'nonexistent') @@ -498,17 +498,39 @@ Encoding: .. include:: %s :encoding: utf-16 """ % reldir(utf_16_file), -b"""\ +"""\ Encoding: - "Treat", "Quantity", "Description" - "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" - "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be - crunchy, now would it?" - "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" -""".decode('raw_unicode_escape')], + Grüße +"""], +["""\ +Default encoding: auto-determine (here via BOM). + +.. include:: %s +""" % reldir(utf_16_file), +"""\ + + + Default encoding: auto-determine (here via BOM). + + Grüße +"""], +["""\ +Default encoding: auto-determine (via encoding declaration). + +.. include:: data/latin2.txt +""", +"""\ + + + Default encoding: auto-determine (via encoding declaration). + + -*- encoding: latin2 -*- + + škoda +"""], ["""\ Include file is UTF-16-encoded, and is not valid ASCII. diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py index a37224c53..bb1c87e79 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py @@ -23,9 +23,8 @@ def suite(): mydir = 'test_parsers/test_rst/test_directives/' raw1 = os.path.join(mydir, 'raw1.txt') -utf_16_file = os.path.join(mydir, 'utf-16.csv') -utf_16_file_rel = utils.relative_path(None, utf_16_file) -utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe " +utf_16_file = 'data/utf-16-le-sig.txt' +utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff " "in position 0: ordinal not in range(128)") totest = {} @@ -94,25 +93,34 @@ totest['raw'] = [ """], ["""\ .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: utf-16 -""" % utf_16_file_rel, -b"""\ +""", +"""\ - - "Treat", "Quantity", "Description" - "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" - "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be - crunchy, now would it?" - "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" -""".decode('raw_unicode_escape') % utf_16_file_rel], + + Grüße +"""], +["""\ +Default encoding: auto-determine (here via BOM). + +.. raw:: html + :file: data/utf-16-le-sig.txt +""", +"""\ + + + Default encoding: auto-determine (here via BOM). + + Grüße +"""], ["""\ Raw input file is UTF-16-encoded, and is not valid ASCII. .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: ascii -""" % utf_16_file_rel, +""", """\ @@ -123,9 +131,9 @@ Raw input file is UTF-16-encoded, and is not valid ASCII. %s .. raw:: html - :file: %s + :file: data/utf-16-le-sig.txt :encoding: ascii -""" % (utf_16_error_str, utf_16_file_rel)], +""" % utf_16_error_str], ["""\ .. raw:: html :encoding: utf-8 diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py index a8a73ced2..dc6b065fe 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py @@ -1165,6 +1165,64 @@ bad_encoding_result \u00bfOn a \u03c3\u03c4\u03b9\u03ba? """], ["""\ +.. csv-table:: auto encoding + :file: %s + :header-rows: 1 +""" % utf_16_csv, +"""\ + + + + auto encoding + <tgroup cols="3"> + <colspec colwidth="33"> + <colspec colwidth="33"> + <colspec colwidth="33"> + <thead> + <row> + <entry> + <paragraph> + Treat + <entry> + <paragraph> + Quantity + <entry> + <paragraph> + Description + <tbody> + <row> + <entry> + <paragraph> + Albatr\u00b0\u00df + <entry> + <paragraph> + 2.99 + <entry> + <paragraph> + \u00a1On a \u03c3\u03c4\u03b9\u03ba! + <row> + <entry> + <paragraph> + Crunchy Frog + <entry> + <paragraph> + 1.49 + <entry> + <paragraph> + If we took the b\u00f6nes out, it wouldn\u2019t be + crunchy, now would it? + <row> + <entry> + <paragraph> + Gannet Ripple + <entry> + <paragraph> + 1.99 + <entry> + <paragraph> + \u00bfOn a \u03c3\u03c4\u03b9\u03ba? +"""], +["""\ .. csv-table:: no CSV data :file: %s """ % empty_txt, -- cgit v1.2.1