Merge pull request #1031 from srittau/encoding

Coding detection closer to PEP 263
author: Timothy Edmund Crosley <timothy.crosley@gmail.com> 2019-10-09 23:47:17 -0700
committer: GitHub <noreply@github.com> 2019-10-09 23:47:17 -0700
commit: 73c34bf262f1a3531fdc7c3c1c0ceede9549ec08 (patch)
tree: 28e5dd475145f522ae23304eed8f3e5c9461b4d7
parent: ca031619b00560e35b160df5db802c90ec374db5 (diff)
parent: 51ac7a455f93897f4e6ba6b8678329030257bf9f (diff)
download: isort-73c34bf262f1a3531fdc7c3c1c0ceede9549ec08.tar.gz
2 files changed, 25 insertions, 3 deletions
diff --git a/isort/compat.py b/isort/compat.py
index 0af5d44a..9e3e6e52 100644
--- a/isort/compat.py
+++ b/isort/compat.py
@@ -12,17 +12,17 @@ from isort.isort import _SortImports
 
 def determine_file_encoding(file_path: Path, default: str = "utf-8") -> str:
     # see https://www.python.org/dev/peps/pep-0263/
-    pattern = re.compile(br"coding[:=]\s*([-\w.]+)")
+    pattern = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
 
     coding = default
     with file_path.open("rb") as f:
         for line_number, line in enumerate(f, 1):
+            if line_number > 2:
+                break
             groups = re.findall(pattern, line)
             if groups:
                 coding = groups[0].decode("ascii")
                 break
-            if line_number > 2:
-                break
 
     return coding
 
diff --git a/test_isort.py b/test_isort.py
index 1a833aca..c7585889 100644
--- a/test_isort.py
+++ b/test_isort.py
@@ -2071,6 +2071,28 @@ def test_other_file_encodings(tmpdir) -> None:
         )
 
 
+def test_encoding_not_in_comment(tmpdir) -> None:
+    """Test that 'encoding' not in a comment is ignored"""
+    tmp_fname = tmpdir.join("test_encoding.py")
+    file_contents = "class Foo\n    coding: latin1\n\ns = u'ã'\n".format("utf8")
+    tmp_fname.write_binary(file_contents.encode("utf8"))
+    assert (
+        SortImports(file_path=str(tmp_fname), settings_path=os.getcwd()).output
+        == file_contents
+    )
+
+
+def test_encoding_not_in_first_two_lines(tmpdir) -> None:
+    """Test that 'encoding' not in the first two lines is ignored"""
+    tmp_fname = tmpdir.join("test_encoding.py")
+    file_contents = "\n\n# -*- coding: latin1\n\ns = u'ã'\n".format("utf8")
+    tmp_fname.write_binary(file_contents.encode("utf8"))
+    assert (
+        SortImports(file_path=str(tmp_fname), settings_path=os.getcwd()).output
+        == file_contents
+    )
+
+
 def test_comment_at_top_of_file() -> None:
     """Test to ensure isort correctly handles top of file comments"""
     test_input = (
author	Timothy Edmund Crosley <timothy.crosley@gmail.com>	2019-10-09 23:47:17 -0700
committer	GitHub <noreply@github.com>	2019-10-09 23:47:17 -0700
commit	73c34bf262f1a3531fdc7c3c1c0ceede9549ec08 (patch)
tree	28e5dd475145f522ae23304eed8f3e5c9461b4d7
parent	ca031619b00560e35b160df5db802c90ec374db5 (diff)
parent	51ac7a455f93897f4e6ba6b8678329030257bf9f (diff)
download	isort-73c34bf262f1a3531fdc7c3c1c0ceede9549ec08.tar.gz