From 3775929b7b6dc802abd32028f17585f82fbb12be Mon Sep 17 00:00:00 2001 From: Giampaolo Rodola Date: Fri, 12 May 2017 15:30:35 +0200 Subject: faster regex --- scripts/internal/check_broken_links.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/internal/check_broken_links.py b/scripts/internal/check_broken_links.py index 3cc78ec8..cd9875da 100755 --- a/scripts/internal/check_broken_links.py +++ b/scripts/internal/check_broken_links.py @@ -41,11 +41,11 @@ Author: Himanshu Shekhar (2017) from __future__ import print_function +import concurrent.futures import os import re import sys import traceback -import concurrent.futures import requests @@ -60,10 +60,10 @@ REQUEST_TIMEOUT = 30 RETRY_STATUSES = [503, 401, 403] -def get_urls_rst(filename): +def get_urls_rst(filename, _regex=re.compile(REGEX)): with open(filename) as f: text = f.read() - urls = re.findall(REGEX, text) + urls = _regex.findall(text) # remove duplicates, list for sets are not iterable urls = list(set(urls)) # correct urls which are between < and/or > -- cgit v1.2.1