From fc59cfc7455fd34ab1e293b4e5ac54cffbc7502f Mon Sep 17 00:00:00 2001
From: milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Mon, 7 Nov 2022 11:00:43 +0000
Subject: Simplify and expand tests for handling the encoding of included
 files.

Use a simpler sample file for signed UTF-16 input.

Test encoding auto-detection
to ensure it is in sync with encoding handling for the main document.

git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9220 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 .../test_rst/test_directives/test_include.py       | 40 +++++++++++----
 .../test_rst/test_directives/test_raw.py           | 42 +++++++++-------
 .../test_rst/test_directives/test_tables.py        | 58 ++++++++++++++++++++++
 3 files changed, 114 insertions(+), 26 deletions(-)

(limited to 'docutils')
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_include.py b/docutils/test/test_parsers/test_rst/test_directives/test_include.py
index 2e1c41b86..32f3f88cd 100755
--- a/docutils/test/test_parsers/test_rst/test_directives/test_include.py
+++ b/docutils/test/test_parsers/test_rst/test_directives/test_include.py
@@ -55,8 +55,8 @@ include15 = mydir('includes/include15.txt')
 include16 = mydir('includes/include16.txt')
 include_literal = mydir('include_literal.txt')
 include_md = mydir('include.md')
-utf_16_file = mydir('utf-16.csv')
-utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe "
+utf_16_file = 'data/utf-16-le-sig.txt'
+utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff "
                     "in position 0: ordinal not in range(128)")
 nonexistent = os.path.join(os.path.dirname(parsers.rst.states.__file__),
                            'include', 'nonexistent')
@@ -498,17 +498,39 @@ Encoding:
 .. include:: %s
    :encoding: utf-16
 """ % reldir(utf_16_file),
-b"""\
+"""\
 <document source="test data">
     <paragraph>
         Encoding:
     <paragraph>
-        "Treat", "Quantity", "Description"
-        "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!"
-        "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be
-        crunchy, now would it?"
-        "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?"
-""".decode('raw_unicode_escape')],
+        Grüße
+"""],
+["""\
+Default encoding: auto-determine (here via BOM).
+
+.. include:: %s
+""" % reldir(utf_16_file),
+"""\
+<document source="test data">
+    <paragraph>
+        Default encoding: auto-determine (here via BOM).
+    <paragraph>
+        Grüße
+"""],
+["""\
+Default encoding: auto-determine (via encoding declaration).
+
+.. include:: data/latin2.txt
+""",
+"""\
+<document source="test data">
+    <paragraph>
+        Default encoding: auto-determine (via encoding declaration).
+    <comment xml:space="preserve">
+        -*- encoding: latin2 -*-
+    <paragraph>
+        škoda
+"""],
 ["""\
 Include file is UTF-16-encoded, and is not valid ASCII.
 
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py
index a37224c53..bb1c87e79 100755
--- a/docutils/test/test_parsers/test_rst/test_directives/test_raw.py
+++ b/docutils/test/test_parsers/test_rst/test_directives/test_raw.py
@@ -23,9 +23,8 @@ def suite():
 
 mydir = 'test_parsers/test_rst/test_directives/'
 raw1 = os.path.join(mydir, 'raw1.txt')
-utf_16_file = os.path.join(mydir, 'utf-16.csv')
-utf_16_file_rel = utils.relative_path(None, utf_16_file)
-utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xfe "
+utf_16_file = 'data/utf-16-le-sig.txt'
+utf_16_error_str = ("UnicodeDecodeError: 'ascii' codec can't decode byte 0xff "
                     "in position 0: ordinal not in range(128)")
 
 totest = {}
@@ -94,25 +93,34 @@ totest['raw'] = [
 """],
 ["""\
 .. raw:: html
-   :file: %s
+   :file: data/utf-16-le-sig.txt
    :encoding: utf-16
-""" % utf_16_file_rel,
-b"""\
+""",
+"""\
 <document source="test data">
-    <raw format="html" source="%s" xml:space="preserve">
-        "Treat", "Quantity", "Description"
-        "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!"
-        "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be
-        crunchy, now would it?"
-        "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?"
-""".decode('raw_unicode_escape') % utf_16_file_rel],
+    <raw format="html" source="data/utf-16-le-sig.txt" xml:space="preserve">
+        Grüße
+"""],
+["""\
+Default encoding: auto-determine (here via BOM).
+
+.. raw:: html
+   :file: data/utf-16-le-sig.txt
+""",
+"""\
+<document source="test data">
+    <paragraph>
+        Default encoding: auto-determine (here via BOM).
+    <raw format="html" source="data/utf-16-le-sig.txt" xml:space="preserve">
+        Grüße
+"""],
 ["""\
 Raw input file is UTF-16-encoded, and is not valid ASCII.
 
 .. raw:: html
-   :file: %s
+   :file: data/utf-16-le-sig.txt
    :encoding: ascii
-""" % utf_16_file_rel,
+""",
 """\
 <document source="test data">
     <paragraph>
@@ -123,9 +131,9 @@ Raw input file is UTF-16-encoded, and is not valid ASCII.
             %s
         <literal_block xml:space="preserve">
             .. raw:: html
-               :file: %s
+               :file: data/utf-16-le-sig.txt
                :encoding: ascii
-""" % (utf_16_error_str, utf_16_file_rel)],
+""" % utf_16_error_str],
 ["""\
 .. raw:: html
    :encoding: utf-8
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py
index a8a73ced2..dc6b065fe 100755
--- a/docutils/test/test_parsers/test_rst/test_directives/test_tables.py
+++ b/docutils/test/test_parsers/test_rst/test_directives/test_tables.py
@@ -1165,6 +1165,64 @@ bad_encoding_result
                             \u00bfOn a \u03c3\u03c4\u03b9\u03ba?
 """],
 ["""\
+.. csv-table:: auto encoding
+   :file: %s
+   :header-rows: 1
+""" % utf_16_csv,
+"""\
+<document source="test data">
+    <table>
+        <title>
+            auto encoding
+        <tgroup cols="3">
+            <colspec colwidth="33">
+            <colspec colwidth="33">
+            <colspec colwidth="33">
+            <thead>
+                <row>
+                    <entry>
+                        <paragraph>
+                            Treat
+                    <entry>
+                        <paragraph>
+                            Quantity
+                    <entry>
+                        <paragraph>
+                            Description
+            <tbody>
+                <row>
+                    <entry>
+                        <paragraph>
+                            Albatr\u00b0\u00df
+                    <entry>
+                        <paragraph>
+                            2.99
+                    <entry>
+                        <paragraph>
+                            \u00a1On a \u03c3\u03c4\u03b9\u03ba!
+                <row>
+                    <entry>
+                        <paragraph>
+                            Crunchy Frog
+                    <entry>
+                        <paragraph>
+                            1.49
+                    <entry>
+                        <paragraph>
+                            If we took the b\u00f6nes out, it wouldn\u2019t be
+                            crunchy, now would it?
+                <row>
+                    <entry>
+                        <paragraph>
+                            Gannet Ripple
+                    <entry>
+                        <paragraph>
+                            1.99
+                    <entry>
+                        <paragraph>
+                            \u00bfOn a \u03c3\u03c4\u03b9\u03ba?
+"""],
+["""\
 .. csv-table:: no CSV data
    :file: %s
 """ % empty_txt,
-- 
cgit v1.2.1