diff options
| author | Himanshu Shekhar <himanshushekharb16@gmail.com> | 2017-04-30 02:22:22 +0530 |
|---|---|---|
| committer | Himanshu Shekhar <himanshushekharb16@gmail.com> | 2017-04-30 02:22:22 +0530 |
| commit | 4592acd7f2bce250814af960e8c6d6f0c5b1368a (patch) | |
| tree | 4cdb597e2e068f99f07a38c1d8b309324cbc527a /scripts | |
| parent | b220c3bcad5e674146a8e5257b666d1699d61259 (diff) | |
| download | psutil-4592acd7f2bce250814af960e8c6d6f0c5b1368a.tar.gz | |
handle some special error codes if not 200
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/internal/check_broken_links.py | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py index 8690981d..7e54c4cd 100755 --- a/scripts/internal/check_broken_links.py +++ b/scripts/internal/check_broken_links.py @@ -52,6 +52,11 @@ HERE = os.path.abspath(os.path.dirname(__file__)) REGEX = r'(?:http|ftp|https)?://' \ r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' +# There are some status codes sent by websites on HEAD request. +# Like 503 by Microsoft, and 401 by Apple +# They need to be sent GET request +RETRY_STATUSES = [503, 401, 403] + def get_urls(filename): """Extracts all URLs available in specified filename @@ -69,7 +74,7 @@ def get_urls(filename): # correct urls which are between < and/or > i = 0 while i < len(urls): - urls[i] = re.sub("[\*<>]", '', urls[i]) + urls[i] = re.sub("[\*<>\(\)\)]", '', urls[i]) i += 1 return urls @@ -82,6 +87,10 @@ def validate_url(url): """ try: res = requests.head(url) + # some websites deny 503, like Microsoft + # and some send 401, like Apple, observations + if (not res.ok) and (res.status_code in RETRY_STATUSES): + res = requests.get(url) return res.ok except requests.exceptions.RequestException: return False |
