summaryrefslogtreecommitdiff
path: root/Lib/sre_compile.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-14 08:32:33 +0300
committerGitHub <noreply@github.com>2017-05-14 08:32:33 +0300
commit821a9d146bc04a1bc1a9807962990a1f59d692b8 (patch)
treee981ba61ef49d7bcd83474cefe76ee3f18a6dc3f /Lib/sre_compile.py
parentcbddf58c797f850a5b06f317a4bb7ab69c6e9715 (diff)
downloadcpython-git-821a9d146bc04a1bc1a9807962990a1f59d692b8.tar.gz
bpo-30340: Enhanced regular expressions optimization. (#1542)
This increased the performance of matching some patterns up to 25 times.
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r--Lib/sre_compile.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index cebecb93c0..aeb89bcc7b 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -20,6 +20,7 @@ _LITERAL_CODES = {LITERAL, NOT_LITERAL}
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
_SUCCESS_CODES = {SUCCESS, FAILURE}
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
+_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
# Sets of lowercase characters which have the same uppercase.
_equivalences = (
@@ -125,7 +126,7 @@ def _compile(code, pattern, flags):
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
- elif _simple(av) and op is not REPEAT:
+ if _simple(av[2]):
if op is MAX_REPEAT:
emit(REPEAT_ONE)
else:
@@ -404,10 +405,14 @@ def _bytes_to_codes(b):
assert len(a) * a.itemsize == len(b)
return a.tolist()
-def _simple(av):
- # check if av is a "simple" operator
- lo, hi = av[2].getwidth()
- return lo == hi == 1 and av[2][0][0] != SUBPATTERN
+def _simple(p):
+ # check if this subpattern is a "simple" operator
+ if len(p) != 1:
+ return False
+ op, av = p[0]
+ if op is SUBPATTERN:
+ return av[0] is None and _simple(av[-1])
+ return op in _UNIT_CODES
def _generate_overlap_table(prefix):
"""