summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-07-17 14:52:48 +0000
committerGuido van Rossum <guido@python.org>1997-07-17 14:52:48 +0000
commita0e4c1bffc3454345fd79708e9e43a2412ce1197 (patch)
tree890eed283933ac77a039f0ded1bb6b6c091e1175
parent75fce308bc79ab1f0774e9b3f61031121994e5df (diff)
downloadcpython-git-a0e4c1bffc3454345fd79708e9e43a2412ce1197.tar.gz
Jeffrey's latest -- seems to solve most problems!
-rw-r--r--Lib/re.py89
-rw-r--r--Lib/test/output/test_re150
-rwxr-xr-xLib/test/re_tests.py66
-rw-r--r--Lib/test/test_re.py24
4 files changed, 136 insertions, 193 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 904522fc3d..7ff53caa6a 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -60,6 +60,7 @@ def valid_identifier(id):
_cache = {}
_MAXCACHE = 20
+
def _cachecompile(pattern, flags):
key = (pattern, flags)
try:
@@ -74,16 +75,16 @@ def _cachecompile(pattern, flags):
def match(pattern, string, flags=0):
return _cachecompile(pattern, flags).match(string)
-
+
def search(pattern, string, flags=0):
return _cachecompile(pattern, flags).search(string)
-
+
def sub(pattern, repl, string, count=0):
return _cachecompile(pattern).sub(repl, string, count)
def subn(pattern, repl, string, count=0):
return _cachecompile(pattern).subn(repl, string, count)
-
+
def split(pattern, string, maxsplit=0):
return _cachecompile(pattern).subn(string, maxsplit)
@@ -100,12 +101,16 @@ class RegexObject:
self.groupindex = groupindex
self.callouts = callouts
self.fastmap = build_fastmap(code)
+
if code[0].name == 'bol':
self.anchor = 1
+
elif code[0].name == 'begbuf':
self.anchor = 2
+
else:
self.anchor = 0
+
self.buffer = assemble(code)
def search(self, string, pos=0):
regs = reop.search(self.buffer,
@@ -118,10 +123,12 @@ class RegexObject:
pos)
if regs is None:
return None
+
return MatchObject(self,
string,
pos,
regs)
+
def match(self, string, pos=0):
regs = reop.match(self.buffer,
self.num_regs,
@@ -133,14 +140,18 @@ class RegexObject:
pos)
if regs is None:
return None
+
return MatchObject(self,
string,
pos,
regs)
+
def sub(self, repl, string, count=0):
pass
+
def subn(self, repl, string, count=0):
pass
+
def split(self, string, maxsplit=0):
pass
@@ -150,6 +161,7 @@ class MatchObject:
self.string = string
self.pos = pos
self.regs = regs
+
def start(self, g):
if type(g) == type(''):
try:
@@ -157,6 +169,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][0]
+
def end(self, g):
if type(g) == type(''):
try:
@@ -164,6 +177,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g][1]
+
def span(self, g):
if type(g) == type(''):
try:
@@ -171,6 +185,7 @@ class MatchObject:
except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined')
return self.regs[g]
+
def group(self, *groups):
if len(groups) == 0:
groups = range(1, self.re.num_regs)
@@ -339,7 +354,7 @@ class UpdateFailureJump(JumpInstruction):
JumpInstruction.__init__(self, chr(12), label)
class DummyFailureJump(JumpInstruction):
- name = 'update_failure_jump'
+ name = 'dummy_failure_jump'
def __init__(self, label):
JumpInstruction.__init__(self, chr(13), label)
@@ -764,11 +779,34 @@ def expand_escape(pattern, index, context=NORMAL):
def compile(pattern, flags=0):
stack = []
- index = 0
label = 0
register = 1
groupindex = {}
callouts = []
+
+ # preprocess the pattern looking for embedded pattern modifiers
+
+ index = 0
+ while (index != -1):
+ index = string.find(pattern, '(?', index)
+ if index != -1:
+ index = index + 2
+ if (index < len(pattern)) and (pattern[index] in 'iImMsSxX'):
+ while (index < len(pattern)) and (pattern[index] != ')'):
+ if pattern[index] in 'iI':
+ flags = flags | IGNORECASE
+ elif pattern[index] in 'mM':
+ flags = flags | MULTILINE
+ elif pattern[index] in 'sS':
+ flags = flags | DOTALL
+ elif pattern[index] in 'xX':
+ flags = flags | VERBOSE
+ else:
+ raise error, 'unknown flag'
+ index = index + 1
+
+ index = 0
+
while (index < len(pattern)):
char = pattern[index]
index = index + 1
@@ -809,12 +847,6 @@ def compile(pattern, flags=0):
raise error, 'unknown escape type'
elif char == '|':
- if len(stack) == 0:
- raise error, 'alternate with nothing on the left'
- if stack[-1][0].name == '(':
- raise error, 'alternate with nothing on the left in the group'
- if stack[-1][0].name == '|':
- raise error, 'alternates with nothing inbetween them'
expr = []
while (len(stack) != 0) and \
@@ -915,17 +947,10 @@ def compile(pattern, flags=0):
'assertion is unsupported')
elif pattern[index] in 'iImMsSxX':
+ # ignore embedded pattern modifiers here, they
+ # have already been taken care of in the
+ # preprocessing
while (index < len(pattern)) and (pattern[index] != ')'):
- if pattern[index] in 'iI':
- flags = flags | IGNORECASE
- elif pattern[index] in 'mM':
- flags = flags | MULTILINE
- elif pattern[index] in 'sS':
- flags = flags | DOTALL
- elif pattern[index] in 'xX':
- flags = flags | VERBOSE
- else:
- raise error, 'unknown flag'
index = index + 1
index = index + 1
@@ -947,13 +972,6 @@ def compile(pattern, flags=0):
if len(stack) == 0:
raise error, 'too many close parens'
- if len(expr) == 0:
- raise error, 'nothing inside parens'
-
- # check to see if alternation used correctly
- if (expr[-1].name == '|'):
- raise error, 'alternate with nothing on the right'
-
# remove markers left by alternation
expr = filter(lambda x: x.name != '|', expr)
@@ -1023,18 +1041,17 @@ def compile(pattern, flags=0):
while min > 0:
expr = expr + stack[-1]
min = min - 1
- registers = registers_used(stack[-1])
if minimal:
expr = expr + \
([Jump(label + 1),
Label(label)] + \
stack[-1] + \
[Label(label + 1),
- FailureJump(label, registers)])
+ FailureJump(label)])
else:
expr = expr + \
([Label(label),
- FailureJump(label + 1, registers)] +
+ FailureJump(label + 1)] +
stack[-1] +
[StarJump(label),
Label(label + 1)])
@@ -1109,7 +1126,7 @@ def compile(pattern, flags=0):
registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching
- expr = [JumpInstructions(label + 1),
+ expr = [Jump(label + 1),
Label(label)] + \
stack[-1] + \
[Label(label + 1),
@@ -1130,9 +1147,10 @@ def compile(pattern, flags=0):
# positive closure
if len(stack) == 0:
raise error, '+ needs something to repeat'
+
if (stack[-1][0].name == '(') or (stack[-1][0].name == '|'):
raise error, '+ needs something to repeat'
- registers = registers_used(stack[-1])
+
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy
expr = [Label(label)] + \
@@ -1156,7 +1174,6 @@ def compile(pattern, flags=0):
elif char == '?':
if len(stack) == 0:
raise error, 'need something to be optional'
- registers = registers_used(stack[-1])
if (index < len(pattern)) and (pattern[index] == '?'):
# non-greedy matching
expr = [FailureJump(label),
@@ -1177,7 +1194,7 @@ def compile(pattern, flags=0):
elif char == '.':
if flags & DOTALL:
- stack.append(Set(map(chr, range(256))))
+ stack.append([Set(map(chr, range(256)))])
else:
stack.append([AnyChar()])
@@ -1337,8 +1354,6 @@ def compile(pattern, flags=0):
del stack[-1]
if len(code) == 0:
raise error, 'no code generated'
- if (code[-1].name == '|'):
- raise error, 'alternate with nothing on the right'
code = filter(lambda x: x.name != '|', code)
need_label = 0
for i in range(len(code)):
diff --git a/Lib/test/output/test_re b/Lib/test/output/test_re
index a143b519ff..c03b0f089c 100644
--- a/Lib/test/output/test_re
+++ b/Lib/test/output/test_re
@@ -59,7 +59,6 @@ test_re
('ab|cd', 'abc', 0, 'found', 'ab')
('ab|cd', 'abcd', 0, 'found', 'ab')
('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
-=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
('$b', 'b', 1)
('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error')
('a\\(*b', 'ab', 0, 'found', 'ab')
@@ -84,7 +83,6 @@ test_re
('[abhgefdc]ij', 'hij', 0, 'found', 'hij')
('^(ab|cd)e', 'abcde', 1, 'xg1y', 'xy')
('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
-=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b')
('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a')
('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc')
@@ -155,15 +153,12 @@ test_re
('ab*bc', 'abbc', 0, 'found', 'abbc')
('ab*bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{0,}bc', 'abbbbc', 0, 'found', 'abbbbc')
-*** Unexpected error ***
('ab+bc', 'abbc', 0, 'found', 'abbc')
('ab+bc', 'abc', 1)
('ab+bc', 'abq', 1)
('ab{1,}bc', 'abq', 1)
-*** Unexpected error ***
('ab+bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{1,}bc', 'abbbbc', 0, 'found', 'abbbbc')
-*** Unexpected error ***
('ab{1,3}bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{3,4}bc', 'abbbbc', 0, 'found', 'abbbbc')
('ab{4,5}bc', 'abbbbc', 1)
@@ -205,13 +200,11 @@ test_re
('ab|cd', 'abc', 0, 'found', 'ab')
('ab|cd', 'abcd', 0, 'found', 'ab')
('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
-=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-')
('*a', '-', 2)
('(*)b', '-', 2)
('$b', 'b', 1)
('a\\', '-', 2)
-('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-')
-=== grouping error ('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-') 'a(b-Error' should be 'a(b-'
+('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error')
('a\\(*b', 'ab', 0, 'found', 'ab')
('a\\(*b', 'a((b', 0, 'found', 'a((b')
('a\\\\b', 'a\\b', 0, 'found', 'a\\b')
@@ -221,14 +214,11 @@ test_re
('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c')
('a+b+c', 'aabbabc', 0, 'found', 'abc')
('a{1,}b{1,}c', 'aabbabc', 0, 'found', 'abc')
-*** Unexpected error ***
('a.+?c', 'abcabc', 0, 'found', 'abc')
('(a+|b)*', 'ab', 0, 'found+"-"+g1', 'ab-b')
('(a+|b){0,}', 'ab', 0, 'found+"-"+g1', 'ab-b')
-*** Unexpected error ***
('(a+|b)+', 'ab', 0, 'found+"-"+g1', 'ab-b')
('(a+|b){1,}', 'ab', 0, 'found+"-"+g1', 'ab-b')
-*** Unexpected error ***
('(a+|b)?', 'ab', 0, 'found+"-"+g1', 'a-a')
('(a+|b){0,1}', 'ab', 0, 'found+"-"+g1', 'a-a')
(')(', '-', 2)
@@ -246,7 +236,6 @@ test_re
('[abhgefdc]ij', 'hij', 0, 'found', 'hij')
('^(ab|cd)e', 'abcde', 1)
('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
-=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b')
('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a')
('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc')
@@ -258,260 +247,159 @@ test_re
('(ab|a)b*c', 'abc', 0, 'found+"-"+g1', 'abc-ab')
('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d')
('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', 0, 'found', 'alpha')
-('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-')
-=== grouping error ('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-') 'bh-None' should be 'bh-'
-('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-')
-=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-'
+('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-None')
+('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
('(bc+d$|ef*g.|h?i(j|k))', 'ij', 0, 'found+"-"+g1+"-"+g2', 'ij-ij-j')
('(bc+d$|ef*g.|h?i(j|k))', 'effg', 1)
('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', 1)
-('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-')
-=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-'
+('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
('((((((((((a))))))))))', 'a', 0, 'g10', 'a')
('((((((((((a))))))))))\\10', 'aa', 0, 'found', 'aa')
-('((((((((((a))))))))))\\41', 'aa', 1)
-=== Syntax error: ('((((((((((a))))))))))\\41', 'aa', 1)
-('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!')
-=== Syntax error: ('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!')
('(((((((((a)))))))))', 'a', 0, 'found', 'a')
('multiple words of text', 'uh-uh', 1)
('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words')
('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de')
-('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)')
-=== grouping error ('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)') ('b', 'a') should be '(b, a)'
+('\\((.*), (.*)\\)', '(a, b)', 0, 'g2+"-"+g1', 'b-a')
('[k]', 'ab', 1)
('a[-]?c', 'ac', 0, 'found', 'ac')
('(abc)\\1', 'abcabc', 0, 'g1', 'abc')
('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc')
('(?i)abc', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)abc', 'ABC', 0, 'found', 'ABC')
('(?i)abc', 'XBC', 1)
('(?i)abc', 'AXC', 1)
('(?i)abc', 'ABX', 1)
('(?i)abc', 'XABCY', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)abc', 'XABCY', 0, 'found', 'ABC')
('(?i)abc', 'ABABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)abc', 'ABABC', 0, 'found', 'ABC')
('(?i)ab*c', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab*c', 'ABC', 0, 'found', 'ABC')
('(?i)ab*bc', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab*bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC')
-=== Failed incorrectly ('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab*?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-*** Unexpected error ***
('(?i)ab{0,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-*** Unexpected error ***
('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC')
-=== Failed incorrectly ('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab+bc', 'ABC', 1)
('(?i)ab+bc', 'ABQ', 1)
('(?i)ab{1,}bc', 'ABQ', 1)
-*** Unexpected error ***
('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-=== Failed incorrectly ('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{1,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-*** Unexpected error ***
('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-=== Failed incorrectly ('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
-=== Failed incorrectly ('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC')
('(?i)ab{4,5}?bc', 'ABBBBC', 1)
('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC')
-=== Failed incorrectly ('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC')
('(?i)ab??bc', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab??bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC')
('(?i)ab??bc', 'ABBBBC', 1)
('(?i)ab??c', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab??c', 'ABC', 0, 'found', 'ABC')
('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC')
('(?i)^abc$', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)^abc$', 'ABC', 0, 'found', 'ABC')
('(?i)^abc$', 'ABCC', 1)
('(?i)^abc', 'ABCC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)^abc', 'ABCC', 0, 'found', 'ABC')
('(?i)^abc$', 'AABC', 1)
('(?i)abc$', 'AABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)abc$', 'AABC', 0, 'found', 'ABC')
('(?i)^', 'ABC', 0, 'found', '')
('(?i)$', 'ABC', 0, 'found', '')
('(?i)a.c', 'ABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)a.c', 'ABC', 0, 'found', 'ABC')
('(?i)a.c', 'AXC', 0, 'found', 'AXC')
-=== Failed incorrectly ('(?i)a.c', 'AXC', 0, 'found', 'AXC')
('(?i)a.*?c', 'AXYZC', 0, 'found', 'AXYZC')
-*** Unexpected error ***
('(?i)a.*c', 'AXYZD', 1)
('(?i)a[bc]d', 'ABC', 1)
('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD')
-=== Failed incorrectly ('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD')
('(?i)a[b-d]e', 'ABD', 1)
('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE')
-=== Failed incorrectly ('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE')
('(?i)a[b-d]', 'AAC', 0, 'found', 'AC')
-=== Failed incorrectly ('(?i)a[b-d]', 'AAC', 0, 'found', 'AC')
('(?i)a[-b]', 'A-', 0, 'found', 'A-')
-=== Failed incorrectly ('(?i)a[-b]', 'A-', 0, 'found', 'A-')
('(?i)a[b-]', 'A-', 0, 'found', 'A-')
-=== Failed incorrectly ('(?i)a[b-]', 'A-', 0, 'found', 'A-')
('(?i)a[b-a]', '-', 2)
('(?i)a[]b', '-', 2)
('(?i)a[', '-', 2)
('(?i)a]', 'A]', 0, 'found', 'A]')
-=== Failed incorrectly ('(?i)a]', 'A]', 0, 'found', 'A]')
('(?i)a[]]b', 'A]B', 0, 'found', 'A]B')
-=== Failed incorrectly ('(?i)a[]]b', 'A]B', 0, 'found', 'A]B')
('(?i)a[^bc]d', 'AED', 0, 'found', 'AED')
-=== Failed incorrectly ('(?i)a[^bc]d', 'AED', 0, 'found', 'AED')
('(?i)a[^bc]d', 'ABD', 1)
('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC')
-=== Failed incorrectly ('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC')
('(?i)a[^-b]c', 'A-C', 1)
('(?i)a[^]b]c', 'A]C', 1)
('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC')
-=== Failed incorrectly ('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC')
('(?i)ab|cd', 'ABC', 0, 'found', 'AB')
-=== Failed incorrectly ('(?i)ab|cd', 'ABC', 0, 'found', 'AB')
('(?i)ab|cd', 'ABCD', 0, 'found', 'AB')
-=== Failed incorrectly ('(?i)ab|cd', 'ABCD', 0, 'found', 'AB')
('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-')
-=== Syntax error: ('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-')
('(?i)*a', '-', 2)
('(?i)(*)b', '-', 2)
('(?i)$b', 'B', 1)
('(?i)a\\', '-', 2)
-('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-')
-=== Failed incorrectly ('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-')
+('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-Error')
('(?i)a\\(*b', 'AB', 0, 'found', 'AB')
-=== Failed incorrectly ('(?i)a\\(*b', 'AB', 0, 'found', 'AB')
('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B')
-=== Failed incorrectly ('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B')
('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B')
-=== Failed incorrectly ('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B')
('(?i)abc)', '-', 2)
('(?i)(abc', '-', 2)
('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A')
-=== Failed incorrectly ('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A')
('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C')
-=== Failed incorrectly ('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C')
('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC')
('(?i)a{1,}b{1,}c', 'AABBABC', 0, 'found', 'ABC')
-*** Unexpected error ***
('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC')
('(?i)a.*?c', 'ABCABC', 0, 'found', 'ABC')
-*** Unexpected error ***
('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC')
-=== Failed incorrectly ('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC')
('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B')
-=== grouping error ('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B') '-None' should be 'AB-B'
('(?i)(a+|b){0,}', 'AB', 0, 'found+"-"+g1', 'AB-B')
-*** Unexpected error ***
('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B')
-=== Failed incorrectly ('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B')
('(?i)(a+|b){1,}', 'AB', 0, 'found+"-"+g1', 'AB-B')
-*** Unexpected error ***
('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A')
-=== grouping error ('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A'
('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A')
-=== grouping error ('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A'
-('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-')
-=== grouping error ('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-') '-None' should be '-'
+('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-None')
('(?i))(', '-', 2)
('(?i)[^ab]*', 'CDE', 0, 'found', 'CDE')
('(?i)abc', '', 1)
('(?i)a*', '', 0, 'found', '')
('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C')
-=== Failed incorrectly ('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C')
('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A')
-=== Failed incorrectly ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A')
('(?i)a|b|c|d|e', 'E', 0, 'found', 'E')
-=== Failed incorrectly ('(?i)a|b|c|d|e', 'E', 0, 'found', 'E')
('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E')
-=== Failed incorrectly ('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E')
('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG')
-=== Failed incorrectly ('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG')
('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB')
-=== Failed incorrectly ('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB')
('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A')
-=== Failed incorrectly ('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A')
('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD')
-=== Failed incorrectly ('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD')
('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ')
-=== Failed incorrectly ('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ')
('(?i)^(ab|cd)e', 'ABCDE', 1)
('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-')
-=== Syntax error: ('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-')
('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B')
-=== Failed incorrectly ('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B')
('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A')
-=== Failed incorrectly ('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A')
('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC')
-=== Failed incorrectly ('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC')
('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
-=== Failed incorrectly ('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
-=== Failed incorrectly ('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D')
('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD')
-=== Failed incorrectly ('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD')
('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE')
-=== Failed incorrectly ('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE')
('(?i)a[bcd]+dcdcde', 'ADCDCDE', 1)
('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB')
-=== Failed incorrectly ('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB')
('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D')
-=== Failed incorrectly ('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D')
('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', 0, 'found', 'ALPHA')
-('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-')
-=== Failed incorrectly ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-')
-('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
-=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
+('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-None')
+('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J')
-=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J')
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', 1)
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', 1)
-('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
-=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-')
+('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None')
('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A')
-=== Failed incorrectly ('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A')
('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA')
-=== Failed incorrectly ('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA')
-('(?i)((((((((((a))))))))))\\41', 'AA', 1)
-=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'AA', 1)
-('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!')
-=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!')
('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A')
-=== Failed incorrectly ('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A')
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A')
-=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A')
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C')
-=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C')
('(?i)multiple words of text', 'UH-UH', 1)
('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS')
-=== Failed incorrectly ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS')
('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE')
-=== Failed incorrectly ('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE')
-('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)')
-=== grouping error ('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)') ('B', 'A') should be '(B, A)'
+('(?i)\\((.*), (.*)\\)', '(A, B)', 0, 'g2+"-"+g1', 'B-A')
('(?i)[k]', 'AB', 1)
('(?i)a[-]?c', 'AC', 0, 'found', 'AC')
-=== Failed incorrectly ('(?i)a[-]?c', 'AC', 0, 'found', 'AC')
('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC')
-=== Failed incorrectly ('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC')
('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC')
-=== grouping error ('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC') '' should be 'ABC'
-('a(?!b).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
-('a(?=d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
-('a(?=c|d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
('a(?:b|c|d)(.)', 'ace', 0, 'g1', 'e')
('a(?:b|c|d)*(.)', 'ace', 0, 'g1', 'e')
('a(?:b|c|d)+?(.)', 'ace', 0, 'g1', 'e')
-('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1+"-"+g2', 'c-e')
+('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1 + g2', 'ce')
('^(.+)?B', 'AB', 0, 'g1', 'A')
+('w(?# comment', 'w', 2)
+('w(?# comment 1)xy(?# comment 2)z', 'wxyz', 0, 'found', 'wxyz')
+('w# comment 1\012 x(?x) y\012\011# comment 2\012\011z', 'wxyz', 0, 'found', 'wxyz')
+('^abc', 'jkl\012abc\012xyz', 1)
+('(?m)^abc', 'jkl\012abc\012xyz', 0, 'found', 'abc')
+('a.b', 'a\012b', 1)
+('(?s)a.b', 'a\012b', 0, 'found', 'a\012b')
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index fde2955868..c4510b39d6 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -262,7 +262,7 @@ tests = [
('(*)b', '-', SYNTAX_ERROR),
('$b', 'b', FAIL),
('a\\', '-', SYNTAX_ERROR),
- ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-'),
+ ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
@@ -306,21 +306,22 @@ tests = [
('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
- ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-'),
- ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'),
+ ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
- ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'),
+ ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
- ('((((((((((a))))))))))\\41', 'aa', FAIL),
- ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
+# Python does not have the same rules for \\41 so this is a syntax error
+# ('((((((((((a))))))))))\\41', 'aa', FAIL),
+# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
('multiple words of text', 'uh-uh', FAIL),
('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
- ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, '(g2, g1)', '(b, a)'),
+ ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
('[k]', 'ab', FAIL),
##('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'),
##('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'),
@@ -389,7 +390,7 @@ tests = [
('(?i)(*)b', '-', SYNTAX_ERROR),
('(?i)$b', 'B', FAIL),
('(?i)a\\', '-', SYNTAX_ERROR),
- ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-'),
+ ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
@@ -409,7 +410,7 @@ tests = [
('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
- ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-'),
+ ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
('(?i))(', '-', SYNTAX_ERROR),
('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
('(?i)abc', '', FAIL),
@@ -436,35 +437,62 @@ tests = [
('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
- ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-'),
- ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'),
+ ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
- ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'),
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
- ('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
- ('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
+ #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
+ #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
('(?i)multiple words of text', 'UH-UH', FAIL),
('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
- ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, '(g2, g1)', '(B, A)'),
+ ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
('(?i)[k]', 'AB', FAIL),
##('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
##('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
- ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
- ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
- ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
+ # these zero-width assertions are not supported
+ #('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
+ #('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
+ #('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
- ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1+"-"+g2', 'c-e'),
+ ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
+
+ # Comments using the (?#...) syntax
+
+ ('w(?# comment', 'w', SYNTAX_ERROR),
+ ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
+
+ # Comments using the x embedded pattern modifier (in an unusual place too)
+
+ ("""w# comment 1
+ x(?x) y
+ # comment 2
+ z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
+
+ # using the m embedded pattern modifier
+
+ ('^abc', """jkl
+abc
+xyz""", FAIL),
+ ('(?m)^abc', """jkl
+abc
+xyz""", SUCCEED, 'found', 'abc'),
+
+ # using the s embedded pattern modifier
+
+ ('a.b', 'a\nb', FAIL),
+ ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f1b270df3b..6b8c65d71a 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,5 +1,10 @@
+#!/usr/local/bin/python
+# -*- mode: python -*-
+# $Id$
+
from test_support import verbose
import re
+import reop
import sys, os, string, traceback
from re_tests import *
@@ -7,6 +12,7 @@ if verbose: print 'Running re_tests test suite'
for t in tests:
print t
+ sys.stdout.flush()
pattern=s=outcome=repl=expected=None
if len(t)==5:
pattern, s, outcome, repl, expected = t
@@ -21,6 +27,8 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
print '=== Syntax error:', t
+ except KeyboardInterrupt:
+ raise KeyboardInterrupt
except:
print '*** Unexpected error ***'
if verbose:
@@ -28,7 +36,7 @@ for t in tests:
else:
try:
result=obj.search(s)
- except regex.error, msg:
+ except (re.error, reop.error), msg:
print '=== Unexpected exception', t, repr(msg)
if outcome==SYNTAX_ERROR:
# This should have been a syntax error; forget it.
@@ -41,22 +49,26 @@ for t in tests:
# Matched, as expected, so now we compute the
# result string and compare it to our expected result.
start, end = result.span(0)
- vardict={'found': result.group(0), 'groups': result.group()}
+ vardict={'found': result.group(0),
+ 'groups': result.group(),
+ 'flags': result.re.flags}
for i in range(1, 100):
try:
gi = result.group(i)
# Special hack because else the string concat fails:
- if gi is None: gi = "None"
+ if gi is None:
+ gi = "None"
except IndexError:
gi = "Error"
vardict['g%d' % i] = gi
for i in result.re.groupindex.keys():
try:
gi = result.group(i)
+ if gi is None:
+ gi = "None"
except IndexError:
- pass
- else:
- vardict[i] = str(gi)
+ gi = "Error"
+ vardict[i] = gi
repl=eval(repl, vardict)
if repl!=expected:
print '=== grouping error', t,