| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
 | #!/usr/bin/env python3
"""\
List python source files.
There are three functions to check whether a file is a Python source, listed
here with increasing complexity:
- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
  the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().
The file also must be of appropriate size - not bigger than a megabyte.
walk_python_files() recursively lists all Python files under the given directories.
"""
__author__ = "Oleg Broytmann, Georg Brandl"
__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
import os, re
binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
debug = False
def print_debug(msg):
    if debug: print(msg)
def _open(fullpath):
    try:
        size = os.stat(fullpath).st_size
    except OSError as err: # Permission denied - ignore the file
        print_debug("%s: permission denied: %s" % (fullpath, err))
        return None
    if size > 1024*1024: # too big
        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
        return None
    try:
        return open(fullpath, "rb")
    except IOError as err: # Access denied, or a special file - ignore it
        print_debug("%s: access denied: %s" % (fullpath, err))
        return None
def has_python_ext(fullpath):
    return fullpath.endswith(".py") or fullpath.endswith(".pyw")
def looks_like_python(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False
    with infile:
        line = infile.readline()
    if binary_re.search(line):
        # file appears to be binary
        print_debug("%s: appears to be binary" % fullpath)
        return False
    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
        return True
    elif b"python" in line:
        # disguised Python script (e.g. CGI)
        return True
    return False
def can_be_compiled(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False
    with infile:
        code = infile.read()
    try:
        compile(code, fullpath, "exec")
    except Exception as err:
        print_debug("%s: cannot compile: %s" % (fullpath, err))
        return False
    return True
def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
    """\
    Recursively yield all Python source files below the given paths.
    paths: a list of files and/or directories to be checked.
    is_python: a function that takes a file name and checks whether it is a
               Python source file
    exclude_dirs: a list of directory base names that should be excluded in
                  the search
    """
    if exclude_dirs is None:
        exclude_dirs=[]
    for path in paths:
        print_debug("testing: %s" % path)
        if os.path.isfile(path):
            if is_python(path):
                yield path
        elif os.path.isdir(path):
            print_debug("    it is a directory")
            for dirpath, dirnames, filenames in os.walk(path):
                for exclude in exclude_dirs:
                    if exclude in dirnames:
                        dirnames.remove(exclude)
                for filename in filenames:
                    fullpath = os.path.join(dirpath, filename)
                    print_debug("testing: %s" % fullpath)
                    if is_python(fullpath):
                        yield fullpath
        else:
            print_debug("    unknown type")
if __name__ == "__main__":
    # Two simple examples/tests
    for fullpath in walk_python_files(['.']):
        print(fullpath)
    print("----------")
    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
        print(fullpath)
 |