diff options
author | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-12 15:30:35 +0200 |
---|---|---|
committer | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-12 15:30:35 +0200 |
commit | 3775929b7b6dc802abd32028f17585f82fbb12be (patch) | |
tree | 8913bec4fb75f382a65b42a44bedefb5fb22dabe | |
parent | ec6fbeb15cf5bf12352fde6ebcfbe3f74529f336 (diff) | |
download | psutil-3775929b7b6dc802abd32028f17585f82fbb12be.tar.gz |
faster regex
-rwxr-xr-x | scripts/internal/check_broken_links.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py index 3cc78ec8..cd9875da 100755 --- a/scripts/internal/check_broken_links.py +++ b/scripts/internal/check_broken_links.py @@ -41,11 +41,11 @@ Author: Himanshu Shekhar <https://github.com/himanshub16> (2017) from __future__ import print_function +import concurrent.futures import os import re import sys import traceback -import concurrent.futures import requests @@ -60,10 +60,10 @@ REQUEST_TIMEOUT = 30 RETRY_STATUSES = [503, 401, 403] -def get_urls_rst(filename): +def get_urls_rst(filename, _regex=re.compile(REGEX)): with open(filename) as f: text = f.read() - urls = re.findall(REGEX, text) + urls = _regex.findall(text) # remove duplicates, list for sets are not iterable urls = list(set(urls)) # correct urls which are between < and/or > |