summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorGiampaolo Rodola <g.rodola@gmail.com>2017-05-12 18:59:05 +0200
committerGiampaolo Rodola <g.rodola@gmail.com>2017-05-12 18:59:05 +0200
commitbb6ffa8b7f2a0865c8ecda05c98f51794fcfef50 (patch)
tree873798ec55ab305e591d2a6077ab5603b026eae3 /scripts
parente988ae62abf8ea588046312f4935121643691ef7 (diff)
downloadpsutil-bb6ffa8b7f2a0865c8ecda05c98f51794fcfef50.tar.gz
broken links: also inspect C and H files
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/internal/check_broken_links.py46
1 files changed, 39 insertions, 7 deletions
diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py
index 3d1766b3..0ae2b323 100755
--- a/scripts/internal/check_broken_links.py
+++ b/scripts/internal/check_broken_links.py
@@ -55,7 +55,7 @@ HERE = os.path.abspath(os.path.dirname(__file__))
REGEX = re.compile(
r'(?:http|ftp|https)?://'
r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
-REQUEST_TIMEOUT = 10
+REQUEST_TIMEOUT = 15
# There are some status codes sent by websites on HEAD request.
# Like 503 by Microsoft, and 401 by Apple
# They need to be sent GET request
@@ -87,27 +87,26 @@ def find_urls(s):
return list(set([sanitize_url(x) for x in matches]))
-def parse_rst(filename):
+def parse_rst(fname):
"""Look for links in a .rst file."""
- with open(filename) as f:
+ with open(fname) as f:
text = f.read()
urls = find_urls(text)
# HISTORY file has a lot of dead links.
- if filename == 'HISTORY.rst' and urls:
+ if fname == 'HISTORY.rst' and urls:
urls = [
x for x in urls if
not x.startswith('https://github.com/giampaolo/psutil/issues')]
return urls
-def parse_py(filename):
+def parse_py(fname):
"""Look for links in a .py file."""
- with open(filename) as f:
+ with open(fname) as f:
lines = f.readlines()
urls = set()
for i, line in enumerate(lines):
for url in find_urls(line):
- url = urls[0]
# comment block
if line.lstrip().startswith('# '):
subidx = i + 1
@@ -122,12 +121,45 @@ def parse_py(filename):
return list(urls)
+def parse_c(fname):
+ """Look for links in a .py file."""
+ with open(fname) as f:
+ lines = f.readlines()
+ urls = set()
+ for i, line in enumerate(lines):
+ for url in find_urls(line):
+ # comment block //
+ if line.lstrip().startswith('// '):
+ subidx = i + 1
+ while True:
+ nextline = lines[subidx].strip()
+ if re.match('^// .+', nextline):
+ url += nextline[2:].strip()
+ else:
+ break
+ subidx += 1
+ # comment block /*
+ elif line.lstrip().startswith('* '):
+ subidx = i + 1
+ while True:
+ nextline = lines[subidx].strip()
+ if re.match('^\* .+', nextline):
+ url += nextline[1:].strip()
+ else:
+ break
+ subidx += 1
+ urls.add(url)
+ return list(urls)
+
+
def get_urls(fname):
"""Extracts all URLs available in specified fname."""
if fname.endswith('.rst'):
return parse_rst(fname)
elif fname.endswith('.py'):
return parse_py(fname)
+ elif fname.endswith('.c') or fname.endswith('.h'):
+ return parse_c(fname)
else:
return []