diff options
| author | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-12 18:59:05 +0200 |
|---|---|---|
| committer | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-12 18:59:05 +0200 |
| commit | bb6ffa8b7f2a0865c8ecda05c98f51794fcfef50 (patch) | |
| tree | 873798ec55ab305e591d2a6077ab5603b026eae3 /scripts | |
| parent | e988ae62abf8ea588046312f4935121643691ef7 (diff) | |
| download | psutil-bb6ffa8b7f2a0865c8ecda05c98f51794fcfef50.tar.gz | |
broken links: also inspect C and H files
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/internal/check_broken_links.py | 46 |
1 files changed, 39 insertions, 7 deletions
diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py index 3d1766b3..0ae2b323 100755 --- a/scripts/internal/check_broken_links.py +++ b/scripts/internal/check_broken_links.py @@ -55,7 +55,7 @@ HERE = os.path.abspath(os.path.dirname(__file__)) REGEX = re.compile( r'(?:http|ftp|https)?://' r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') -REQUEST_TIMEOUT = 10 +REQUEST_TIMEOUT = 15 # There are some status codes sent by websites on HEAD request. # Like 503 by Microsoft, and 401 by Apple # They need to be sent GET request @@ -87,27 +87,26 @@ def find_urls(s): return list(set([sanitize_url(x) for x in matches])) -def parse_rst(filename): +def parse_rst(fname): """Look for links in a .rst file.""" - with open(filename) as f: + with open(fname) as f: text = f.read() urls = find_urls(text) # HISTORY file has a lot of dead links. - if filename == 'HISTORY.rst' and urls: + if fname == 'HISTORY.rst' and urls: urls = [ x for x in urls if not x.startswith('https://github.com/giampaolo/psutil/issues')] return urls -def parse_py(filename): +def parse_py(fname): """Look for links in a .py file.""" - with open(filename) as f: + with open(fname) as f: lines = f.readlines() urls = set() for i, line in enumerate(lines): for url in find_urls(line): - url = urls[0] # comment block if line.lstrip().startswith('# '): subidx = i + 1 @@ -122,12 +121,45 @@ def parse_py(filename): return list(urls) +def parse_c(fname): + """Look for links in a .py file.""" + with open(fname) as f: + lines = f.readlines() + urls = set() + for i, line in enumerate(lines): + for url in find_urls(line): + # comment block // + if line.lstrip().startswith('// '): + subidx = i + 1 + while True: + nextline = lines[subidx].strip() + if re.match('^// .+', nextline): + url += nextline[2:].strip() + else: + break + subidx += 1 + # comment block /* + elif line.lstrip().startswith('* '): + subidx = i + 1 + while True: + nextline = lines[subidx].strip() + if re.match('^\* .+', nextline): + url += nextline[1:].strip() + else: + break + subidx += 1 + urls.add(url) + return list(urls) + + def get_urls(fname): """Extracts all URLs available in specified fname.""" if fname.endswith('.rst'): return parse_rst(fname) elif fname.endswith('.py'): return parse_py(fname) + elif fname.endswith('.c') or fname.endswith('.h'): + return parse_c(fname) else: return [] |
