summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authormolsonkiko <46202915+molsonkiko@users.noreply.github.com>2023-04-25 11:46:56 -0700
committerGitHub <noreply@github.com>2023-04-25 20:46:56 +0200
commit46cc9556224ecba0fddd5f78be74289a5ffd64b4 (patch)
treeec6a4c69006f9f00a241aeb70262263a1732fafd /numpy
parentfe5472fa4eae131ff9646d7c980c6c4081c10386 (diff)
downloadnumpy-46cc9556224ecba0fddd5f78be74289a5ffd64b4.tar.gz
TST: Remove crackfortran.nameargspattern time test that failed randomly (#23662)
also made the threshold for rejecting a regex as too slow much more lenient. 200ms should be enough time even for a bad CPU on a bad day. a bad regex should fail with near certainty
Diffstat (limited to 'numpy')
-rw-r--r--numpy/f2py/tests/test_crackfortran.py40
1 files changed, 15 insertions, 25 deletions
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py
index 23965087d..39555df05 100644
--- a/numpy/f2py/tests/test_crackfortran.py
+++ b/numpy/f2py/tests/test_crackfortran.py
@@ -290,36 +290,26 @@ class TestNameArgsPatternBacktracking:
def test_nameargspattern_backtracking(self, adversary):
'''address ReDOS vulnerability:
https://github.com/numpy/numpy/issues/23338'''
- last_median = 0.
- trials_per_count = 128
+ trials_per_batch = 12
+ batches_per_regex = 4
start_reps, end_reps = 15, 25
- times_median_doubled = 0
for ii in range(start_reps, end_reps):
repeated_adversary = adversary * ii
- times = []
- for _ in range(trials_per_count):
- t0 = time.perf_counter()
- mtch = nameargspattern.search(repeated_adversary)
- times.append(time.perf_counter() - t0)
- # We should use a measure of time that's resilient to outliers.
- # Times jump around a lot due to the CPU's scheduler.
- median = np.median(times)
+ # test times in small batches.
+ # this gives us more chances to catch a bad regex
+ # while still catching it before too long if it is bad
+ for _ in range(batches_per_regex):
+ times = []
+ for _ in range(trials_per_batch):
+ t0 = time.perf_counter()
+ mtch = nameargspattern.search(repeated_adversary)
+ times.append(time.perf_counter() - t0)
+ # our pattern should be much faster than 0.2s per search
+ # it's unlikely that a bad regex will pass even on fast CPUs
+ assert np.median(times) < 0.2
assert not mtch
# if the adversary is capped with @)@, it becomes acceptable
# according to the old version of the regex.
# that should still be true.
good_version_of_adversary = repeated_adversary + '@)@'
- assert nameargspattern.search(good_version_of_adversary)
- if ii > start_reps:
- # the hallmark of exponentially catastrophic backtracking
- # is that runtime doubles for every added instance of
- # the problematic pattern.
- times_median_doubled += median > 2 * last_median
- # also try to rule out non-exponential but still bad cases
- # arbitrarily, we should set a hard limit of 10ms as too slow
- assert median < trials_per_count * 0.01
- last_median = median
- # we accept that maybe the median might double once, due to
- # the CPU scheduler acting weird or whatever. More than that
- # seems suspicious.
- assert times_median_doubled < 2 \ No newline at end of file
+ assert nameargspattern.search(good_version_of_adversary) \ No newline at end of file