Implemented #1487: Improved handling of encoding errors.

author: Timothy Crosley <timothy.crosley@gmail.com> 2020-10-03 01:01:49 -0700
committer: Timothy Crosley <timothy.crosley@gmail.com> 2020-10-03 01:01:49 -0700
commit: dbeaaa63611c19b942261ec0115c57290c8f8719 (patch)
tree: 57adb2612bd0e589033072908b3549c743786758
parent: 6d556bd206b1165b2b6d674c7e4fe0ad77946551 (diff)
download: isort-dbeaaa63611c19b942261ec0115c57290c8f8719.tar.gz
6 files changed, 91 insertions, 9 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 150256ea..e7fb1aab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ Find out more about isort's release policy [here](https://pycqa.github.io/isort/
   - Implemented #1494: Default to sorting imports within `.pxd` files.
   - Implemented #1502: Improved float-to-top behavior when there is an existing import section present at top-of-file.
   - Implemented #1511: Support for easily seeing all files isort will be ran against using `isort . --show-files`.
+  - Implemented #1487: Improved handling of encoding errors.
   - Improved handling of unsupported configuration option errors (see #1475).
   - Fixed #1463: Better interactive documentation for future option.
   - Fixed #1461: Quiet config option not respected by file API in some circumstances.
@@ -16,7 +17,7 @@ Find out more about isort's release policy [here](https://pycqa.github.io/isort/
   - Fixed #1492: --check does not work with stdin source.
   - Fixed #1499: isort gets confused by single line, multi-line style comments when using float-to-top.
 Potentially breaking changes:
-  - Fixed #1486: "Google" profile is not quite Google style.
+  - Fixed #1486: "Google" profile is not quite Google style.  
 
   Goal Zero: (Tickets related to aspirational goal of achieving 0 regressions for remaining 5.0.0 lifespan):
   - Implemented #1472: Full testing of stdin CLI Options
diff --git a/isort/exceptions.py b/isort/exceptions.py
index 265928e9..b98454a2 100644
--- a/isort/exceptions.py
+++ b/isort/exceptions.py
@@ -1,5 +1,6 @@
 """All isort specific exception classes should be defined here"""
-from typing import Any, Dict
+from pathlib import Path
+from typing import Any, Dict, Union
 
 from .profiles import profiles
 
@@ -157,3 +158,14 @@ class UnsupportedSettings(ISortError):
             "https://pycqa.github.io/isort/docs/configuration/options/.\n"
         )
         self.unsupported_settings = unsupported_settings
+
+
+class UnsupportedEncoding(ISortError):
+    """Raised when isort encounters an encoding error while trying to read a file"""
+
+    def __init__(
+        self,
+        filename: Union[str, Path],
+    ):
+        super().__init__(f"Unknown or unsupported encoding in {filename}")
+        self.filename = filename
diff --git a/isort/io.py b/isort/io.py
index a0357347..7ff2807d 100644
--- a/isort/io.py
+++ b/isort/io.py
@@ -4,7 +4,9 @@ import tokenize
 from contextlib import contextmanager
 from io import BytesIO, StringIO, TextIOWrapper
 from pathlib import Path
-from typing import Iterator, NamedTuple, TextIO, Union
+from typing import Callable, Iterator, NamedTuple, TextIO, Union
+
+from isort.exceptions import UnsupportedEncoding
 
 _ENCODING_PATTERN = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
 
@@ -15,8 +17,15 @@ class File(NamedTuple):
     encoding: str
 
     @staticmethod
+    def detect_encoding(filename: str, readline: Callable[[], bytes]):
+        try:
+            return tokenize.detect_encoding(readline)[0]
+        except Exception:
+            raise UnsupportedEncoding(filename)
+
+    @staticmethod
     def from_contents(contents: str, filename: str) -> "File":
-        encoding, _ = tokenize.detect_encoding(BytesIO(contents.encode("utf-8")).readline)
+        encoding = File.detect_encoding(filename, BytesIO(contents.encode("utf-8")).readline)
         return File(StringIO(contents), path=Path(filename).resolve(), encoding=encoding)
 
     @property
@@ -30,7 +39,7 @@ class File(NamedTuple):
         """
         buffer = open(filename, "rb")
         try:
-            encoding, _ = tokenize.detect_encoding(buffer.readline)
+            encoding = File.detect_encoding(filename, buffer.readline)
             buffer.seek(0)
             text = TextIOWrapper(buffer, encoding, line_buffering=True, newline="")
             text.mode = "r"  # type: ignore
diff --git a/isort/main.py b/isort/main.py
index c518a92d..841caca5 100644
--- a/isort/main.py
+++ b/isort/main.py
@@ -10,7 +10,7 @@ from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Set
 from warnings import warn
 
 from . import __version__, api, sections
-from .exceptions import FileSkipped
+from .exceptions import FileSkipped, UnsupportedEncoding
 from .format import create_terminal_printer
 from .logo import ASCII_ART
 from .profiles import profiles
@@ -67,9 +67,10 @@ Visit https://pycqa.github.io/isort/ for complete information about how to use i
 
 
 class SortAttempt:
-    def __init__(self, incorrectly_sorted: bool, skipped: bool) -> None:
+    def __init__(self, incorrectly_sorted: bool, skipped: bool, supported_encoding: bool) -> None:
         self.incorrectly_sorted = incorrectly_sorted
         self.skipped = skipped
+        self.supported_encoding = supported_encoding
 
 
 def sort_imports(
@@ -88,7 +89,7 @@ def sort_imports(
                 incorrectly_sorted = not api.check_file(file_name, config=config, **kwargs)
             except FileSkipped:
                 skipped = True
-            return SortAttempt(incorrectly_sorted, skipped)
+            return SortAttempt(incorrectly_sorted, skipped, True)
         else:
             try:
                 incorrectly_sorted = not api.sort_file(
@@ -100,10 +101,14 @@ def sort_imports(
                 )
             except FileSkipped:
                 skipped = True
-            return SortAttempt(incorrectly_sorted, skipped)
+            return SortAttempt(incorrectly_sorted, skipped, True)
     except (OSError, ValueError) as error:
         warn(f"Unable to parse file {file_name} due to {error}")
         return None
+    except UnsupportedEncoding:
+        if config.verbose:
+            warn(f"Encoding not supported for {file_name}")
+        return SortAttempt(incorrectly_sorted, skipped, False)
     except Exception:
         printer = create_terminal_printer(color=config.color_output)
         printer.error(
@@ -852,6 +857,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
     remapped_deprecated_args = config_dict.pop("remapped_deprecated_args", False)
     wrong_sorted_files = False
     all_attempt_broken = False
+    no_valid_encodings = False
 
     if "src_paths" in config_dict:
         config_dict["src_paths"] = {
@@ -901,6 +907,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
             return
         num_skipped = 0
         num_broken = 0
+        num_invalid_encoding = 0
         if config.verbose:
             print(ASCII_ART)
 
@@ -934,6 +941,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
 
         # If any files passed in are missing considered as error, should be removed
         is_no_attempt = True
+        any_encoding_valid = False
         for sort_attempt in attempt_iterator:
             if not sort_attempt:
                 continue  # pragma: no cover - shouldn't happen, satisfies type constraint
@@ -944,6 +952,12 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
                 num_skipped += (
                     1  # pragma: no cover - shouldn't happen, due to skip in iter_source_code
                 )
+
+            if not sort_attempt.supported_encoding:
+                num_invalid_encoding += 1
+            else:
+                any_encoding_valid = True
+
             is_no_attempt = False
 
         num_skipped += len(skipped)
@@ -965,6 +979,8 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
 
         if num_broken > 0 and is_no_attempt:
             all_attempt_broken = True
+        if num_invalid_encoding > 0 and not any_encoding_valid:
+            no_valid_encodings = True
 
     if not config.quiet and (remapped_deprecated_args or deprecated_flags):
         if remapped_deprecated_args:
@@ -988,6 +1004,11 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
     if all_attempt_broken:
         sys.exit(1)
 
+    if no_valid_encodings:
+        printer = create_terminal_printer(color=config.color_output)
+        printer.error("No valid encodings.")
+        sys.exit(1)
+
 
 if __name__ == "__main__":
     main()
diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py
index bb6ee2a3..d9eae8bf 100644
--- a/tests/unit/test_exceptions.py
+++ b/tests/unit/test_exceptions.py
@@ -90,3 +90,11 @@ class TestUnsupportedSettings(TestISortError):
 
     def test_variables(self):
         assert self.instance.unsupported_settings == {"apply": {"value": "true", "source": "/"}}
+
+
+class TestUnsupportedEncoding(TestISortError):
+    def setup_class(self):
+        self.instance = exceptions.UnsupportedEncoding("file.py")
+
+    def test_variables(self):
+        assert self.instance.filename == "file.py"
diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
index ef837b61..28da37eb 100644
--- a/tests/unit/test_main.py
+++ b/tests/unit/test_main.py
@@ -719,3 +719,34 @@ import os
 import sys
 """
     )
+
+
+def test_unsupported_encodings(tmpdir, capsys):
+    tmp_file = tmpdir.join("file.py")
+    # fmt: off
+    tmp_file.write(
+        u'''
+# [syntax-error]\
+# -*- coding: IBO-8859-1 -*-
+""" check correct unknown encoding declaration
+"""
+__revision__ = 'יייי'
+'''
+    )
+    # fmt: on
+
+    # should throw an error if only unsupported encoding provided
+    with pytest.raises(SystemExit):
+        main.main([str(tmp_file)])
+    out, error = capsys.readouterr()
+
+    assert "No valid encodings." in error
+
+    # should not throw an error if at least one valid encoding found
+    normal_file = tmpdir.join("file1.py")
+    normal_file.write("import os\nimport sys")
+
+    main.main([str(tmp_file), str(normal_file), "--verbose"])
+    out, error = capsys.readouterr()
+
+    assert not error
author	Timothy Crosley <timothy.crosley@gmail.com>	2020-10-03 01:01:49 -0700
committer	Timothy Crosley <timothy.crosley@gmail.com>	2020-10-03 01:01:49 -0700
commit	dbeaaa63611c19b942261ec0115c57290c8f8719 (patch)
tree	57adb2612bd0e589033072908b3549c743786758
parent	6d556bd206b1165b2b6d674c7e4fe0ad77946551 (diff)
download	isort-dbeaaa63611c19b942261ec0115c57290c8f8719.tar.gz