From 73a85c4e1da42db28e3de57c868d24a089b8d277 Mon Sep 17 00:00:00 2001 From: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> Date: Sun, 28 Feb 2021 13:20:50 -0500 Subject: bpo-41972: Use the two-way algorithm for string searching (GH-22904) Implement an enhanced variant of Crochemore and Perrin's Two-Way string searching algorithm, which reduces worst-case time from quadratic (the product of the string and pattern lengths) to linear. This applies to forward searches (like``find``, ``index``, ``replace``); the algorithm for reverse searches (like ``rfind``) is not changed. Co-authored-by: Tim Peters --- Lib/test/string_tests.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'Lib/test/string_tests.py') diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 527f505c01..840d7bb755 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -5,6 +5,7 @@ Common tests shared by test_unicode, test_userstring and test_bytes. import unittest, string, sys, struct from test import support from collections import UserList +import random class Sequence: def __init__(self, seq='wxyz'): self.seq = seq @@ -317,6 +318,44 @@ class BaseTest: else: self.checkraises(TypeError, 'hello', 'rindex', 42) + def test_find_periodic_pattern(self): + """Cover the special path for periodic patterns.""" + def reference_find(p, s): + for i in range(len(s)): + if s.startswith(p, i): + return i + return -1 + + rr = random.randrange + choices = random.choices + for _ in range(1000): + p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20) + p = p0[:len(p0) - rr(10)] # pop off some characters + left = ''.join(choices('abcdef', k=rr(2000))) + right = ''.join(choices('abcdef', k=rr(2000))) + text = left + p + right + with self.subTest(p=p, text=text): + self.checkequal(reference_find(p, text), + text, 'find', p) + + def test_find_shift_table_overflow(self): + """When the table of 8-bit shifts overflows.""" + N = 2**8 + 100 + + # first check the periodic case + # here, the shift for 'b' is N + 1. + pattern1 = 'a' * N + 'b' + 'a' * N + text1 = 'babbaa' * N + pattern1 + self.checkequal(len(text1)-len(pattern1), + text1, 'find', pattern1) + + # now check the non-periodic case + # here, the shift for 'd' is 3*(N+1)+1 + pattern2 = 'ddd' + 'abc' * N + "eee" + text2 = pattern2[:-1] + "ddeede" * 2 * N + pattern2 + "de" * N + self.checkequal(len(text2) - N*len("de") - len(pattern2), + text2, 'find', pattern2) + def test_lower(self): self.checkequal('hello', 'HeLLo', 'lower') self.checkequal('hello', 'hello', 'lower') -- cgit v1.2.1