diff options
| author | molsonkiko <46202915+molsonkiko@users.noreply.github.com> | 2023-04-25 11:46:56 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-25 20:46:56 +0200 |
| commit | 46cc9556224ecba0fddd5f78be74289a5ffd64b4 (patch) | |
| tree | ec6a4c69006f9f00a241aeb70262263a1732fafd /numpy | |
| parent | fe5472fa4eae131ff9646d7c980c6c4081c10386 (diff) | |
| download | numpy-46cc9556224ecba0fddd5f78be74289a5ffd64b4.tar.gz | |
TST: Remove crackfortran.nameargspattern time test that failed randomly (#23662)
also made the threshold for rejecting a regex as too slow
much more lenient.
200ms should be enough time even for a bad CPU on a bad day.
a bad regex should fail with near certainty
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/f2py/tests/test_crackfortran.py | 40 |
1 files changed, 15 insertions, 25 deletions
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py index 23965087d..39555df05 100644 --- a/numpy/f2py/tests/test_crackfortran.py +++ b/numpy/f2py/tests/test_crackfortran.py @@ -290,36 +290,26 @@ class TestNameArgsPatternBacktracking: def test_nameargspattern_backtracking(self, adversary): '''address ReDOS vulnerability: https://github.com/numpy/numpy/issues/23338''' - last_median = 0. - trials_per_count = 128 + trials_per_batch = 12 + batches_per_regex = 4 start_reps, end_reps = 15, 25 - times_median_doubled = 0 for ii in range(start_reps, end_reps): repeated_adversary = adversary * ii - times = [] - for _ in range(trials_per_count): - t0 = time.perf_counter() - mtch = nameargspattern.search(repeated_adversary) - times.append(time.perf_counter() - t0) - # We should use a measure of time that's resilient to outliers. - # Times jump around a lot due to the CPU's scheduler. - median = np.median(times) + # test times in small batches. + # this gives us more chances to catch a bad regex + # while still catching it before too long if it is bad + for _ in range(batches_per_regex): + times = [] + for _ in range(trials_per_batch): + t0 = time.perf_counter() + mtch = nameargspattern.search(repeated_adversary) + times.append(time.perf_counter() - t0) + # our pattern should be much faster than 0.2s per search + # it's unlikely that a bad regex will pass even on fast CPUs + assert np.median(times) < 0.2 assert not mtch # if the adversary is capped with @)@, it becomes acceptable # according to the old version of the regex. # that should still be true. good_version_of_adversary = repeated_adversary + '@)@' - assert nameargspattern.search(good_version_of_adversary) - if ii > start_reps: - # the hallmark of exponentially catastrophic backtracking - # is that runtime doubles for every added instance of - # the problematic pattern. - times_median_doubled += median > 2 * last_median - # also try to rule out non-exponential but still bad cases - # arbitrarily, we should set a hard limit of 10ms as too slow - assert median < trials_per_count * 0.01 - last_median = median - # we accept that maybe the median might double once, due to - # the CPU scheduler acting weird or whatever. More than that - # seems suspicious. - assert times_median_doubled < 2
\ No newline at end of file + assert nameargspattern.search(good_version_of_adversary)
\ No newline at end of file |
