summaryrefslogtreecommitdiff
path: root/Lib/test/test_tokenize.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-01-09 18:36:09 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2014-01-09 18:36:09 +0200
commit768c16ce0273a74fa846cc388753280b17b02cfc (patch)
treed2fc7f94a08fb20f882e3e0b299a59fea1251aa8 /Lib/test/test_tokenize.py
parent21e7d4cd5eb5a1ee153baf4c7915db80e6ca59e1 (diff)
downloadcpython-git-768c16ce0273a74fa846cc388753280b17b02cfc.tar.gz
Issue #18960: Fix bugs with Python source code encoding in the second line.
* The first line of Python script could be executed twice when the source encoding (not equal to 'utf-8') was specified on the second line. * Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. * As a consequence, 'python -x' works now again with files with the source encoding declarations specified on the second file, and can be used again to make Python batch files on Windows. * The tokenize module now ignore the source encoding declaration on the second line if the first line contains anything except a comment. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment.
Diffstat (limited to 'Lib/test/test_tokenize.py')
-rw-r--r--Lib/test/test_tokenize.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 17650855eb..6ed859707f 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -885,6 +885,39 @@ class TestDetectEncoding(TestCase):
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
+ def test_cookie_second_line_noncommented_first_line(self):
+ lines = (
+ b"print('\xc2\xa3')\n",
+ b'# vim: set fileencoding=iso8859-15 :\n',
+ b"print('\xe2\x82\xac')\n"
+ )
+ encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+ self.assertEqual(encoding, 'utf-8')
+ expected = [b"print('\xc2\xa3')\n"]
+ self.assertEqual(consumed_lines, expected)
+
+ def test_cookie_second_line_commented_first_line(self):
+ lines = (
+ b"#print('\xc2\xa3')\n",
+ b'# vim: set fileencoding=iso8859-15 :\n',
+ b"print('\xe2\x82\xac')\n"
+ )
+ encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+ self.assertEqual(encoding, 'iso8859-15')
+ expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
+ self.assertEqual(consumed_lines, expected)
+
+ def test_cookie_second_line_empty_first_line(self):
+ lines = (
+ b'\n',
+ b'# vim: set fileencoding=iso8859-15 :\n',
+ b"print('\xe2\x82\xac')\n"
+ )
+ encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+ self.assertEqual(encoding, 'iso8859-15')
+ expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
+ self.assertEqual(consumed_lines, expected)
+
def test_latin1_normalization(self):
# See get_normal_name() in tokenizer.c.
encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",