summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorHimanshu Shekhar <himanshushekharb16@gmail.com>2017-04-30 02:22:22 +0530
committerHimanshu Shekhar <himanshushekharb16@gmail.com>2017-04-30 02:22:22 +0530
commit4592acd7f2bce250814af960e8c6d6f0c5b1368a (patch)
tree4cdb597e2e068f99f07a38c1d8b309324cbc527a /scripts
parentb220c3bcad5e674146a8e5257b666d1699d61259 (diff)
downloadpsutil-4592acd7f2bce250814af960e8c6d6f0c5b1368a.tar.gz
handle some special error codes if not 200
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/internal/check_broken_links.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py
index 8690981d..7e54c4cd 100755
--- a/scripts/internal/check_broken_links.py
+++ b/scripts/internal/check_broken_links.py
@@ -52,6 +52,11 @@ HERE = os.path.abspath(os.path.dirname(__file__))
REGEX = r'(?:http|ftp|https)?://' \
r'(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
+# There are some status codes sent by websites on HEAD request.
+# Like 503 by Microsoft, and 401 by Apple
+# They need to be sent GET request
+RETRY_STATUSES = [503, 401, 403]
+
def get_urls(filename):
"""Extracts all URLs available in specified filename
@@ -69,7 +74,7 @@ def get_urls(filename):
# correct urls which are between < and/or >
i = 0
while i < len(urls):
- urls[i] = re.sub("[\*<>]", '', urls[i])
+ urls[i] = re.sub("[\*<>\(\)\)]", '', urls[i])
i += 1
return urls
@@ -82,6 +87,10 @@ def validate_url(url):
"""
try:
res = requests.head(url)
+ # some websites deny 503, like Microsoft
+ # and some send 401, like Apple, observations
+ if (not res.ok) and (res.status_code in RETRY_STATUSES):
+ res = requests.get(url)
return res.ok
except requests.exceptions.RequestException:
return False