summaryrefslogtreecommitdiff
path: root/Lib/test/test_difflib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_difflib.py')
-rw-r--r--Lib/test/test_difflib.py182
1 files changed, 181 insertions, 1 deletions
diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py
index 0ba8f0e05b..ab9debf8e2 100644
--- a/Lib/test/test_difflib.py
+++ b/Lib/test/test_difflib.py
@@ -107,6 +107,20 @@ patch914575_to1 = """
5. Flat is better than nested.
"""
+patch914575_nonascii_from1 = """
+ 1. Beautiful is beTTer than ugly.
+ 2. Explicit is better than ımplıcıt.
+ 3. Simple is better than complex.
+ 4. Complex is better than complicated.
+"""
+
+patch914575_nonascii_to1 = """
+ 1. Beautiful is better than ügly.
+ 3. Sımple is better than complex.
+ 4. Complicated is better than cömplex.
+ 5. Flat is better than nested.
+"""
+
patch914575_from2 = """
\t\tLine 1: preceeded by from:[tt] to:[ssss]
\t\tLine 2: preceeded by from:[sstt] to:[sssst]
@@ -223,6 +237,27 @@ class TestSFpatches(unittest.TestCase):
new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
difflib.SequenceMatcher(None, old, new).get_opcodes()
+ def test_make_file_default_charset(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_from1.splitlines(),
+ patch914575_to1.splitlines())
+ self.assertIn('content="text/html; charset=utf-8"', output)
+
+ def test_make_file_iso88591_charset(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_from1.splitlines(),
+ patch914575_to1.splitlines(),
+ charset='iso-8859-1')
+ self.assertIn('content="text/html; charset=iso-8859-1"', output)
+
+ def test_make_file_usascii_charset_with_nonascii_input(self):
+ html_diff = difflib.HtmlDiff()
+ output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
+ patch914575_nonascii_to1.splitlines(),
+ charset='us-ascii')
+ self.assertIn('content="text/html; charset=us-ascii"', output)
+ self.assertIn('ımplıcıt', output)
+
class TestOutputFormat(unittest.TestCase):
def test_tab_delimiter(self):
@@ -287,12 +322,157 @@ class TestOutputFormat(unittest.TestCase):
self.assertEqual(fmt(0,0), '0')
+class TestBytes(unittest.TestCase):
+ # don't really care about the content of the output, just the fact
+ # that it's bytes and we don't crash
+ def check(self, diff):
+ diff = list(diff) # trigger exceptions first
+ for line in diff:
+ self.assertIsInstance(
+ line, bytes,
+ "all lines of diff should be bytes, but got: %r" % line)
+
+ def test_byte_content(self):
+ # if we receive byte strings, we return byte strings
+ a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes
+ b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
+
+ unified = difflib.unified_diff
+ context = difflib.context_diff
+
+ check = self.check
+ check(difflib.diff_bytes(unified, a, a))
+ check(difflib.diff_bytes(unified, a, b))
+
+ # now with filenames (content and filenames are all bytes!)
+ check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
+ check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
+
+ # and with filenames and dates
+ check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
+ check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
+
+ # same all over again, with context diff
+ check(difflib.diff_bytes(context, a, a))
+ check(difflib.diff_bytes(context, a, b))
+ check(difflib.diff_bytes(context, a, a, b'a', b'a'))
+ check(difflib.diff_bytes(context, a, b, b'a', b'b'))
+ check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
+ check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
+
+ def test_byte_filenames(self):
+ # somebody renamed a file from ISO-8859-2 to UTF-8
+ fna = b'\xb3odz.txt' # "łodz.txt"
+ fnb = b'\xc5\x82odz.txt'
+
+ # they transcoded the content at the same time
+ a = [b'\xa3odz is a city in Poland.']
+ b = [b'\xc5\x81odz is a city in Poland.']
+
+ check = self.check
+ unified = difflib.unified_diff
+ context = difflib.context_diff
+ check(difflib.diff_bytes(unified, a, b, fna, fnb))
+ check(difflib.diff_bytes(context, a, b, fna, fnb))
+
+ def assertDiff(expect, actual):
+ # do not compare expect and equal as lists, because unittest
+ # uses difflib to report difference between lists
+ actual = list(actual)
+ self.assertEqual(len(expect), len(actual))
+ for e, a in zip(expect, actual):
+ self.assertEqual(e, a)
+
+ expect = [
+ b'--- \xb3odz.txt',
+ b'+++ \xc5\x82odz.txt',
+ b'@@ -1 +1 @@',
+ b'-\xa3odz is a city in Poland.',
+ b'+\xc5\x81odz is a city in Poland.',
+ ]
+ actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
+ assertDiff(expect, actual)
+
+ # with dates (plain ASCII)
+ datea = b'2005-03-18'
+ dateb = b'2005-03-19'
+ check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
+ check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
+
+ expect = [
+ # note the mixed encodings here: this is deeply wrong by every
+ # tenet of Unicode, but it doesn't crash, it's parseable by
+ # patch, and it's how UNIX(tm) diff behaves
+ b'--- \xb3odz.txt\t2005-03-18',
+ b'+++ \xc5\x82odz.txt\t2005-03-19',
+ b'@@ -1 +1 @@',
+ b'-\xa3odz is a city in Poland.',
+ b'+\xc5\x81odz is a city in Poland.',
+ ]
+ actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
+ lineterm=b'')
+ assertDiff(expect, actual)
+
+ def test_mixed_types_content(self):
+ # type of input content must be consistent: all str or all bytes
+ a = [b'hello']
+ b = ['hello']
+
+ unified = difflib.unified_diff
+ context = difflib.context_diff
+
+ expect = "lines to compare must be str, not bytes (b'hello')"
+ self._assert_type_error(expect, unified, a, b)
+ self._assert_type_error(expect, unified, b, a)
+ self._assert_type_error(expect, context, a, b)
+ self._assert_type_error(expect, context, b, a)
+
+ expect = "all arguments must be bytes, not str ('hello')"
+ self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
+ self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
+ self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
+ self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
+
+ def test_mixed_types_filenames(self):
+ # cannot pass filenames as bytes if content is str (this may not be
+ # the right behaviour, but at least the test demonstrates how
+ # things work)
+ a = ['hello\n']
+ b = ['ohell\n']
+ fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1
+ fnb = b'ol\xc3a9.txt' # to UTF-8
+ self._assert_type_error(
+ "all arguments must be str, not: b'ol\\xe9.txt'",
+ difflib.unified_diff, a, b, fna, fnb)
+
+ def test_mixed_types_dates(self):
+ # type of dates must be consistent with type of contents
+ a = [b'foo\n']
+ b = [b'bar\n']
+ datea = '1 fév'
+ dateb = '3 fév'
+ self._assert_type_error(
+ "all arguments must be bytes, not str ('1 fév')",
+ difflib.diff_bytes, difflib.unified_diff,
+ a, b, b'a', b'b', datea, dateb)
+
+ # if input is str, non-ASCII dates are fine
+ a = ['foo\n']
+ b = ['bar\n']
+ list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
+
+ def _assert_type_error(self, msg, generator, *args):
+ with self.assertRaises(TypeError) as ctx:
+ list(generator(*args))
+ self.assertEqual(msg, str(ctx.exception))
+
+
def test_main():
difflib.HtmlDiff._default_prefix = 0
Doctests = doctest.DocTestSuite(difflib)
run_unittest(
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
- TestOutputFormat, Doctests)
+ TestOutputFormat, TestBytes, Doctests)
if __name__ == '__main__':
test_main()