summaryrefslogtreecommitdiff
path: root/isort/io.py
blob: bf99348266b70cfab304027c120d3685546e2257 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Defines any IO utilities used by isort"""
import locale
import re
from pathlib import Path
from typing import NamedTuple, Tuple

from .exceptions import UnableToDetermineEncoding

_ENCODING_PATTERN = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")


class File(NamedTuple):
    contents: str
    path: Path
    encoding: str

    @staticmethod
    def read(filename: str) -> "File":
        file_path = Path(filename).resolve()
        contents, encoding = _read_file_contents(file_path)
        return File(contents=contents, path=file_path, encoding=encoding)

    @staticmethod
    def from_contents(contents: str, filename: str) -> "File":
        return File(
            contents, path=Path(filename).resolve(), encoding=_determine_content_encoding(contents)
        )

    @property
    def extension(self):
        return self.path.suffix.lstrip(".")


def _determine_stream_encoding(stream, default: str = "utf-8") -> str:
    for line_number, line in enumerate(stream, 1):
        if line_number > 2:
            break
        groups = re.findall(_ENCODING_PATTERN, line)
        if groups:
            return groups[0].decode("ascii")

    return default


def _determine_content_encoding(content: str, default: str = "utf-8"):
    return _determine_stream_encoding(content.encode(default).split(b"\n"), default=default)


def _determine_file_encoding(file_path: Path, default: str = "utf-8") -> str:
    # see https://www.python.org/dev/peps/pep-0263/
    with file_path.open("rb") as open_file:
        return _determine_stream_encoding(open_file, default=default)


def _read_file_contents(file_path: Path) -> Tuple[str, str]:
    encoding = _determine_file_encoding(file_path)
    with file_path.open(encoding=encoding, newline="") as file_to_import_sort:
        try:
            file_contents = file_to_import_sort.read()
            return file_contents, encoding
        except UnicodeDecodeError:
            pass

    # Try default encoding for open(mode='r') on the system
    fallback_encoding = locale.getpreferredencoding(False)
    with file_path.open(encoding=fallback_encoding, newline="") as file_to_import_sort:
        try:
            file_contents = file_to_import_sort.read()
            return file_contents, fallback_encoding
        except UnicodeDecodeError:
            pass

    raise UnableToDetermineEncoding(file_path, encoding, fallback_encoding)