diff options
author | Guido van Rossum <guido@python.org> | 1997-07-17 14:52:48 +0000 |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-07-17 14:52:48 +0000 |
commit | a0e4c1bffc3454345fd79708e9e43a2412ce1197 (patch) | |
tree | 890eed283933ac77a039f0ded1bb6b6c091e1175 | |
parent | 75fce308bc79ab1f0774e9b3f61031121994e5df (diff) | |
download | cpython-git-a0e4c1bffc3454345fd79708e9e43a2412ce1197.tar.gz |
Jeffrey's latest -- seems to solve most problems!
-rw-r--r-- | Lib/re.py | 89 | ||||
-rw-r--r-- | Lib/test/output/test_re | 150 | ||||
-rwxr-xr-x | Lib/test/re_tests.py | 66 | ||||
-rw-r--r-- | Lib/test/test_re.py | 24 |
4 files changed, 136 insertions, 193 deletions
@@ -60,6 +60,7 @@ def valid_identifier(id): _cache = {} _MAXCACHE = 20 + def _cachecompile(pattern, flags): key = (pattern, flags) try: @@ -74,16 +75,16 @@ def _cachecompile(pattern, flags): def match(pattern, string, flags=0): return _cachecompile(pattern, flags).match(string) - + def search(pattern, string, flags=0): return _cachecompile(pattern, flags).search(string) - + def sub(pattern, repl, string, count=0): return _cachecompile(pattern).sub(repl, string, count) def subn(pattern, repl, string, count=0): return _cachecompile(pattern).subn(repl, string, count) - + def split(pattern, string, maxsplit=0): return _cachecompile(pattern).subn(string, maxsplit) @@ -100,12 +101,16 @@ class RegexObject: self.groupindex = groupindex self.callouts = callouts self.fastmap = build_fastmap(code) + if code[0].name == 'bol': self.anchor = 1 + elif code[0].name == 'begbuf': self.anchor = 2 + else: self.anchor = 0 + self.buffer = assemble(code) def search(self, string, pos=0): regs = reop.search(self.buffer, @@ -118,10 +123,12 @@ class RegexObject: pos) if regs is None: return None + return MatchObject(self, string, pos, regs) + def match(self, string, pos=0): regs = reop.match(self.buffer, self.num_regs, @@ -133,14 +140,18 @@ class RegexObject: pos) if regs is None: return None + return MatchObject(self, string, pos, regs) + def sub(self, repl, string, count=0): pass + def subn(self, repl, string, count=0): pass + def split(self, string, maxsplit=0): pass @@ -150,6 +161,7 @@ class MatchObject: self.string = string self.pos = pos self.regs = regs + def start(self, g): if type(g) == type(''): try: @@ -157,6 +169,7 @@ class MatchObject: except (KeyError, TypeError): raise IndexError, ('group "' + g + '" is undefined') return self.regs[g][0] + def end(self, g): if type(g) == type(''): try: @@ -164,6 +177,7 @@ class MatchObject: except (KeyError, TypeError): raise IndexError, ('group "' + g + '" is undefined') return self.regs[g][1] + def span(self, g): if type(g) == type(''): try: @@ -171,6 +185,7 @@ class MatchObject: except (KeyError, TypeError): raise IndexError, ('group "' + g + '" is undefined') return self.regs[g] + def group(self, *groups): if len(groups) == 0: groups = range(1, self.re.num_regs) @@ -339,7 +354,7 @@ class UpdateFailureJump(JumpInstruction): JumpInstruction.__init__(self, chr(12), label) class DummyFailureJump(JumpInstruction): - name = 'update_failure_jump' + name = 'dummy_failure_jump' def __init__(self, label): JumpInstruction.__init__(self, chr(13), label) @@ -764,11 +779,34 @@ def expand_escape(pattern, index, context=NORMAL): def compile(pattern, flags=0): stack = [] - index = 0 label = 0 register = 1 groupindex = {} callouts = [] + + # preprocess the pattern looking for embedded pattern modifiers + + index = 0 + while (index != -1): + index = string.find(pattern, '(?', index) + if index != -1: + index = index + 2 + if (index < len(pattern)) and (pattern[index] in 'iImMsSxX'): + while (index < len(pattern)) and (pattern[index] != ')'): + if pattern[index] in 'iI': + flags = flags | IGNORECASE + elif pattern[index] in 'mM': + flags = flags | MULTILINE + elif pattern[index] in 'sS': + flags = flags | DOTALL + elif pattern[index] in 'xX': + flags = flags | VERBOSE + else: + raise error, 'unknown flag' + index = index + 1 + + index = 0 + while (index < len(pattern)): char = pattern[index] index = index + 1 @@ -809,12 +847,6 @@ def compile(pattern, flags=0): raise error, 'unknown escape type' elif char == '|': - if len(stack) == 0: - raise error, 'alternate with nothing on the left' - if stack[-1][0].name == '(': - raise error, 'alternate with nothing on the left in the group' - if stack[-1][0].name == '|': - raise error, 'alternates with nothing inbetween them' expr = [] while (len(stack) != 0) and \ @@ -915,17 +947,10 @@ def compile(pattern, flags=0): 'assertion is unsupported') elif pattern[index] in 'iImMsSxX': + # ignore embedded pattern modifiers here, they + # have already been taken care of in the + # preprocessing while (index < len(pattern)) and (pattern[index] != ')'): - if pattern[index] in 'iI': - flags = flags | IGNORECASE - elif pattern[index] in 'mM': - flags = flags | MULTILINE - elif pattern[index] in 'sS': - flags = flags | DOTALL - elif pattern[index] in 'xX': - flags = flags | VERBOSE - else: - raise error, 'unknown flag' index = index + 1 index = index + 1 @@ -947,13 +972,6 @@ def compile(pattern, flags=0): if len(stack) == 0: raise error, 'too many close parens' - if len(expr) == 0: - raise error, 'nothing inside parens' - - # check to see if alternation used correctly - if (expr[-1].name == '|'): - raise error, 'alternate with nothing on the right' - # remove markers left by alternation expr = filter(lambda x: x.name != '|', expr) @@ -1023,18 +1041,17 @@ def compile(pattern, flags=0): while min > 0: expr = expr + stack[-1] min = min - 1 - registers = registers_used(stack[-1]) if minimal: expr = expr + \ ([Jump(label + 1), Label(label)] + \ stack[-1] + \ [Label(label + 1), - FailureJump(label, registers)]) + FailureJump(label)]) else: expr = expr + \ ([Label(label), - FailureJump(label + 1, registers)] + + FailureJump(label + 1)] + stack[-1] + [StarJump(label), Label(label + 1)]) @@ -1109,7 +1126,7 @@ def compile(pattern, flags=0): registers = registers_used(stack[-1]) if (index < len(pattern)) and (pattern[index] == '?'): # non-greedy matching - expr = [JumpInstructions(label + 1), + expr = [Jump(label + 1), Label(label)] + \ stack[-1] + \ [Label(label + 1), @@ -1130,9 +1147,10 @@ def compile(pattern, flags=0): # positive closure if len(stack) == 0: raise error, '+ needs something to repeat' + if (stack[-1][0].name == '(') or (stack[-1][0].name == '|'): raise error, '+ needs something to repeat' - registers = registers_used(stack[-1]) + if (index < len(pattern)) and (pattern[index] == '?'): # non-greedy expr = [Label(label)] + \ @@ -1156,7 +1174,6 @@ def compile(pattern, flags=0): elif char == '?': if len(stack) == 0: raise error, 'need something to be optional' - registers = registers_used(stack[-1]) if (index < len(pattern)) and (pattern[index] == '?'): # non-greedy matching expr = [FailureJump(label), @@ -1177,7 +1194,7 @@ def compile(pattern, flags=0): elif char == '.': if flags & DOTALL: - stack.append(Set(map(chr, range(256)))) + stack.append([Set(map(chr, range(256)))]) else: stack.append([AnyChar()]) @@ -1337,8 +1354,6 @@ def compile(pattern, flags=0): del stack[-1] if len(code) == 0: raise error, 'no code generated' - if (code[-1].name == '|'): - raise error, 'alternate with nothing on the right' code = filter(lambda x: x.name != '|', code) need_label = 0 for i in range(len(code)): diff --git a/Lib/test/output/test_re b/Lib/test/output/test_re index a143b519ff..c03b0f089c 100644 --- a/Lib/test/output/test_re +++ b/Lib/test/output/test_re @@ -59,7 +59,6 @@ test_re ('ab|cd', 'abc', 0, 'found', 'ab') ('ab|cd', 'abcd', 0, 'found', 'ab') ('()ef', 'def', 0, 'found+"-"+g1', 'ef-') -=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-') ('$b', 'b', 1) ('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error') ('a\\(*b', 'ab', 0, 'found', 'ab') @@ -84,7 +83,6 @@ test_re ('[abhgefdc]ij', 'hij', 0, 'found', 'hij') ('^(ab|cd)e', 'abcde', 1, 'xg1y', 'xy') ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') -=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') ('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b') ('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a') ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc') @@ -155,15 +153,12 @@ test_re ('ab*bc', 'abbc', 0, 'found', 'abbc') ('ab*bc', 'abbbbc', 0, 'found', 'abbbbc') ('ab{0,}bc', 'abbbbc', 0, 'found', 'abbbbc') -*** Unexpected error *** ('ab+bc', 'abbc', 0, 'found', 'abbc') ('ab+bc', 'abc', 1) ('ab+bc', 'abq', 1) ('ab{1,}bc', 'abq', 1) -*** Unexpected error *** ('ab+bc', 'abbbbc', 0, 'found', 'abbbbc') ('ab{1,}bc', 'abbbbc', 0, 'found', 'abbbbc') -*** Unexpected error *** ('ab{1,3}bc', 'abbbbc', 0, 'found', 'abbbbc') ('ab{3,4}bc', 'abbbbc', 0, 'found', 'abbbbc') ('ab{4,5}bc', 'abbbbc', 1) @@ -205,13 +200,11 @@ test_re ('ab|cd', 'abc', 0, 'found', 'ab') ('ab|cd', 'abcd', 0, 'found', 'ab') ('()ef', 'def', 0, 'found+"-"+g1', 'ef-') -=== Syntax error: ('()ef', 'def', 0, 'found+"-"+g1', 'ef-') ('*a', '-', 2) ('(*)b', '-', 2) ('$b', 'b', 1) ('a\\', '-', 2) -('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-') -=== grouping error ('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-') 'a(b-Error' should be 'a(b-' +('a\\(b', 'a(b', 0, 'found+"-"+g1', 'a(b-Error') ('a\\(*b', 'ab', 0, 'found', 'ab') ('a\\(*b', 'a((b', 0, 'found', 'a((b') ('a\\\\b', 'a\\b', 0, 'found', 'a\\b') @@ -221,14 +214,11 @@ test_re ('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c') ('a+b+c', 'aabbabc', 0, 'found', 'abc') ('a{1,}b{1,}c', 'aabbabc', 0, 'found', 'abc') -*** Unexpected error *** ('a.+?c', 'abcabc', 0, 'found', 'abc') ('(a+|b)*', 'ab', 0, 'found+"-"+g1', 'ab-b') ('(a+|b){0,}', 'ab', 0, 'found+"-"+g1', 'ab-b') -*** Unexpected error *** ('(a+|b)+', 'ab', 0, 'found+"-"+g1', 'ab-b') ('(a+|b){1,}', 'ab', 0, 'found+"-"+g1', 'ab-b') -*** Unexpected error *** ('(a+|b)?', 'ab', 0, 'found+"-"+g1', 'a-a') ('(a+|b){0,1}', 'ab', 0, 'found+"-"+g1', 'a-a') (')(', '-', 2) @@ -246,7 +236,6 @@ test_re ('[abhgefdc]ij', 'hij', 0, 'found', 'hij') ('^(ab|cd)e', 'abcde', 1) ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') -=== Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-') ('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b') ('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a') ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc') @@ -258,260 +247,159 @@ test_re ('(ab|a)b*c', 'abc', 0, 'found+"-"+g1', 'abc-ab') ('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d') ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', 0, 'found', 'alpha') -('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-') -=== grouping error ('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-') 'bh-None' should be 'bh-' -('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') -=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-' +('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-None') +('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None') ('(bc+d$|ef*g.|h?i(j|k))', 'ij', 0, 'found+"-"+g1+"-"+g2', 'ij-ij-j') ('(bc+d$|ef*g.|h?i(j|k))', 'effg', 1) ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', 1) -('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') -=== grouping error ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-') 'effgz-effgz-None' should be 'effgz-effgz-' +('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None') ('((((((((((a))))))))))', 'a', 0, 'g10', 'a') ('((((((((((a))))))))))\\10', 'aa', 0, 'found', 'aa') -('((((((((((a))))))))))\\41', 'aa', 1) -=== Syntax error: ('((((((((((a))))))))))\\41', 'aa', 1) -('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!') -=== Syntax error: ('((((((((((a))))))))))\\41', 'a!', 0, 'found', 'a!') ('(((((((((a)))))))))', 'a', 0, 'found', 'a') ('multiple words of text', 'uh-uh', 1) ('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words') ('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de') -('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)') -=== grouping error ('\\((.*), (.*)\\)', '(a, b)', 0, '(g2, g1)', '(b, a)') ('b', 'a') should be '(b, a)' +('\\((.*), (.*)\\)', '(a, b)', 0, 'g2+"-"+g1', 'b-a') ('[k]', 'ab', 1) ('a[-]?c', 'ac', 0, 'found', 'ac') ('(abc)\\1', 'abcabc', 0, 'g1', 'abc') ('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc') ('(?i)abc', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)abc', 'ABC', 0, 'found', 'ABC') ('(?i)abc', 'XBC', 1) ('(?i)abc', 'AXC', 1) ('(?i)abc', 'ABX', 1) ('(?i)abc', 'XABCY', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)abc', 'XABCY', 0, 'found', 'ABC') ('(?i)abc', 'ABABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)abc', 'ABABC', 0, 'found', 'ABC') ('(?i)ab*c', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab*c', 'ABC', 0, 'found', 'ABC') ('(?i)ab*bc', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab*bc', 'ABC', 0, 'found', 'ABC') ('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC') -=== Failed incorrectly ('(?i)ab*bc', 'ABBC', 0, 'found', 'ABBC') ('(?i)ab*?bc', 'ABBBBC', 0, 'found', 'ABBBBC') -*** Unexpected error *** ('(?i)ab{0,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') -*** Unexpected error *** ('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC') -=== Failed incorrectly ('(?i)ab+?bc', 'ABBC', 0, 'found', 'ABBC') ('(?i)ab+bc', 'ABC', 1) ('(?i)ab+bc', 'ABQ', 1) ('(?i)ab{1,}bc', 'ABQ', 1) -*** Unexpected error *** ('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC') -=== Failed incorrectly ('(?i)ab+bc', 'ABBBBC', 0, 'found', 'ABBBBC') ('(?i)ab{1,}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') -*** Unexpected error *** ('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') -=== Failed incorrectly ('(?i)ab{1,3}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') ('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') -=== Failed incorrectly ('(?i)ab{3,4}?bc', 'ABBBBC', 0, 'found', 'ABBBBC') ('(?i)ab{4,5}?bc', 'ABBBBC', 1) ('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC') -=== Failed incorrectly ('(?i)ab??bc', 'ABBC', 0, 'found', 'ABBC') ('(?i)ab??bc', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab??bc', 'ABC', 0, 'found', 'ABC') ('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab{0,1}?bc', 'ABC', 0, 'found', 'ABC') ('(?i)ab??bc', 'ABBBBC', 1) ('(?i)ab??c', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab??c', 'ABC', 0, 'found', 'ABC') ('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)ab{0,1}?c', 'ABC', 0, 'found', 'ABC') ('(?i)^abc$', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)^abc$', 'ABC', 0, 'found', 'ABC') ('(?i)^abc$', 'ABCC', 1) ('(?i)^abc', 'ABCC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)^abc', 'ABCC', 0, 'found', 'ABC') ('(?i)^abc$', 'AABC', 1) ('(?i)abc$', 'AABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)abc$', 'AABC', 0, 'found', 'ABC') ('(?i)^', 'ABC', 0, 'found', '') ('(?i)$', 'ABC', 0, 'found', '') ('(?i)a.c', 'ABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)a.c', 'ABC', 0, 'found', 'ABC') ('(?i)a.c', 'AXC', 0, 'found', 'AXC') -=== Failed incorrectly ('(?i)a.c', 'AXC', 0, 'found', 'AXC') ('(?i)a.*?c', 'AXYZC', 0, 'found', 'AXYZC') -*** Unexpected error *** ('(?i)a.*c', 'AXYZD', 1) ('(?i)a[bc]d', 'ABC', 1) ('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD') -=== Failed incorrectly ('(?i)a[bc]d', 'ABD', 0, 'found', 'ABD') ('(?i)a[b-d]e', 'ABD', 1) ('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE') -=== Failed incorrectly ('(?i)a[b-d]e', 'ACE', 0, 'found', 'ACE') ('(?i)a[b-d]', 'AAC', 0, 'found', 'AC') -=== Failed incorrectly ('(?i)a[b-d]', 'AAC', 0, 'found', 'AC') ('(?i)a[-b]', 'A-', 0, 'found', 'A-') -=== Failed incorrectly ('(?i)a[-b]', 'A-', 0, 'found', 'A-') ('(?i)a[b-]', 'A-', 0, 'found', 'A-') -=== Failed incorrectly ('(?i)a[b-]', 'A-', 0, 'found', 'A-') ('(?i)a[b-a]', '-', 2) ('(?i)a[]b', '-', 2) ('(?i)a[', '-', 2) ('(?i)a]', 'A]', 0, 'found', 'A]') -=== Failed incorrectly ('(?i)a]', 'A]', 0, 'found', 'A]') ('(?i)a[]]b', 'A]B', 0, 'found', 'A]B') -=== Failed incorrectly ('(?i)a[]]b', 'A]B', 0, 'found', 'A]B') ('(?i)a[^bc]d', 'AED', 0, 'found', 'AED') -=== Failed incorrectly ('(?i)a[^bc]d', 'AED', 0, 'found', 'AED') ('(?i)a[^bc]d', 'ABD', 1) ('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC') -=== Failed incorrectly ('(?i)a[^-b]c', 'ADC', 0, 'found', 'ADC') ('(?i)a[^-b]c', 'A-C', 1) ('(?i)a[^]b]c', 'A]C', 1) ('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC') -=== Failed incorrectly ('(?i)a[^]b]c', 'ADC', 0, 'found', 'ADC') ('(?i)ab|cd', 'ABC', 0, 'found', 'AB') -=== Failed incorrectly ('(?i)ab|cd', 'ABC', 0, 'found', 'AB') ('(?i)ab|cd', 'ABCD', 0, 'found', 'AB') -=== Failed incorrectly ('(?i)ab|cd', 'ABCD', 0, 'found', 'AB') ('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-') -=== Syntax error: ('(?i)()ef', 'DEF', 0, 'found+"-"+g1', 'EF-') ('(?i)*a', '-', 2) ('(?i)(*)b', '-', 2) ('(?i)$b', 'B', 1) ('(?i)a\\', '-', 2) -('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-') -=== Failed incorrectly ('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-') +('(?i)a\\(b', 'A(B', 0, 'found+"-"+g1', 'A(B-Error') ('(?i)a\\(*b', 'AB', 0, 'found', 'AB') -=== Failed incorrectly ('(?i)a\\(*b', 'AB', 0, 'found', 'AB') ('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B') -=== Failed incorrectly ('(?i)a\\(*b', 'A((B', 0, 'found', 'A((B') ('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B') -=== Failed incorrectly ('(?i)a\\\\b', 'A\\B', 0, 'found', 'A\\B') ('(?i)abc)', '-', 2) ('(?i)(abc', '-', 2) ('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A') -=== Failed incorrectly ('(?i)((a))', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'A-A-A') ('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C') -=== Failed incorrectly ('(?i)(a)b(c)', 'ABC', 0, 'found+"-"+g1+"-"+g2', 'ABC-A-C') ('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)a+b+c', 'AABBABC', 0, 'found', 'ABC') ('(?i)a{1,}b{1,}c', 'AABBABC', 0, 'found', 'ABC') -*** Unexpected error *** ('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)a.+?c', 'ABCABC', 0, 'found', 'ABC') ('(?i)a.*?c', 'ABCABC', 0, 'found', 'ABC') -*** Unexpected error *** ('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC') -=== Failed incorrectly ('(?i)a.{0,5}?c', 'ABCABC', 0, 'found', 'ABC') ('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B') -=== grouping error ('(?i)(a+|b)*', 'AB', 0, 'found+"-"+g1', 'AB-B') '-None' should be 'AB-B' ('(?i)(a+|b){0,}', 'AB', 0, 'found+"-"+g1', 'AB-B') -*** Unexpected error *** ('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B') -=== Failed incorrectly ('(?i)(a+|b)+', 'AB', 0, 'found+"-"+g1', 'AB-B') ('(?i)(a+|b){1,}', 'AB', 0, 'found+"-"+g1', 'AB-B') -*** Unexpected error *** ('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A') -=== grouping error ('(?i)(a+|b)?', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A' ('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A') -=== grouping error ('(?i)(a+|b){0,1}', 'AB', 0, 'found+"-"+g1', 'A-A') '-None' should be 'A-A' -('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-') -=== grouping error ('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-') '-None' should be '-' +('(?i)(a+|b){0,1}?', 'AB', 0, 'found+"-"+g1', '-None') ('(?i))(', '-', 2) ('(?i)[^ab]*', 'CDE', 0, 'found', 'CDE') ('(?i)abc', '', 1) ('(?i)a*', '', 0, 'found', '') ('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C') -=== Failed incorrectly ('(?i)([abc])*d', 'ABBBCD', 0, 'found+"-"+g1', 'ABBBCD-C') ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') -=== Failed incorrectly ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') ('(?i)a|b|c|d|e', 'E', 0, 'found', 'E') -=== Failed incorrectly ('(?i)a|b|c|d|e', 'E', 0, 'found', 'E') ('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E') -=== Failed incorrectly ('(?i)(a|b|c|d|e)f', 'EF', 0, 'found+"-"+g1', 'EF-E') ('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG') -=== Failed incorrectly ('(?i)abcd*efg', 'ABCDEFG', 0, 'found', 'ABCDEFG') ('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB') -=== Failed incorrectly ('(?i)ab*', 'XABYABBBZ', 0, 'found', 'AB') ('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A') -=== Failed incorrectly ('(?i)ab*', 'XAYABBBZ', 0, 'found', 'A') ('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD') -=== Failed incorrectly ('(?i)(ab|cd)e', 'ABCDE', 0, 'found+"-"+g1', 'CDE-CD') ('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ') -=== Failed incorrectly ('(?i)[abhgefdc]ij', 'HIJ', 0, 'found', 'HIJ') ('(?i)^(ab|cd)e', 'ABCDE', 1) ('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-') -=== Syntax error: ('(?i)(abc|)ef', 'ABCDEF', 0, 'found+"-"+g1', 'EF-') ('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B') -=== Failed incorrectly ('(?i)(a|b)c*d', 'ABCD', 0, 'found+"-"+g1', 'BCD-B') ('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A') -=== Failed incorrectly ('(?i)(ab|ab*)bc', 'ABC', 0, 'found+"-"+g1', 'ABC-A') ('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC') -=== Failed incorrectly ('(?i)a([bc]*)c*', 'ABC', 0, 'found+"-"+g1', 'ABC-BC') ('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D') -=== Failed incorrectly ('(?i)a([bc]*)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D') ('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D') -=== Failed incorrectly ('(?i)a([bc]+)(c*d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D') ('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD') -=== Failed incorrectly ('(?i)a([bc]*)(c+d)', 'ABCD', 0, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD') ('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE') -=== Failed incorrectly ('(?i)a[bcd]*dcdcde', 'ADCDCDE', 0, 'found', 'ADCDCDE') ('(?i)a[bcd]+dcdcde', 'ADCDCDE', 1) ('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB') -=== Failed incorrectly ('(?i)(ab|a)b*c', 'ABC', 0, 'found+"-"+g1', 'ABC-AB') ('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D') -=== Failed incorrectly ('(?i)((a)(b)c)(d)', 'ABCD', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D') ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', 0, 'found', 'ALPHA') -('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-') -=== Failed incorrectly ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-') -('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-') -=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-') +('(?i)^a(bc+|b[eh])g|.h$', 'ABH', 0, 'found+"-"+g1', 'BH-None') +('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None') ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J') -=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', 0, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J') ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', 1) ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', 1) -('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-') -=== Failed incorrectly ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-') +('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', 0, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None') ('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A') -=== Failed incorrectly ('(?i)((((((((((a))))))))))', 'A', 0, 'g10', 'A') ('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA') -=== Failed incorrectly ('(?i)((((((((((a))))))))))\\10', 'AA', 0, 'found', 'AA') -('(?i)((((((((((a))))))))))\\41', 'AA', 1) -=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'AA', 1) -('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!') -=== Syntax error: ('(?i)((((((((((a))))))))))\\41', 'A!', 0, 'found', 'A!') ('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A') -=== Failed incorrectly ('(?i)(((((((((a)))))))))', 'A', 0, 'found', 'A') ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A') -=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', 0, 'g1', 'A') ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C') -=== Failed incorrectly ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', 0, 'g1', 'C') ('(?i)multiple words of text', 'UH-UH', 1) ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS') -=== Failed incorrectly ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', 0, 'found', 'MULTIPLE WORDS') ('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE') -=== Failed incorrectly ('(?i)(.*)c(.*)', 'ABCDE', 0, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE') -('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)') -=== grouping error ('(?i)\\((.*), (.*)\\)', '(A, B)', 0, '(g2, g1)', '(B, A)') ('B', 'A') should be '(B, A)' +('(?i)\\((.*), (.*)\\)', '(A, B)', 0, 'g2+"-"+g1', 'B-A') ('(?i)[k]', 'AB', 1) ('(?i)a[-]?c', 'AC', 0, 'found', 'AC') -=== Failed incorrectly ('(?i)a[-]?c', 'AC', 0, 'found', 'AC') ('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC') -=== Failed incorrectly ('(?i)(abc)\\1', 'ABCABC', 0, 'g1', 'ABC') ('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC') -=== grouping error ('(?i)([a-c]*)\\1', 'ABCABC', 0, 'g1', 'ABC') '' should be 'ABC' -('a(?!b).', 'abad', 0, 'found', 'ad') -=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad') -('a(?=d).', 'abad', 0, 'found', 'ad') -=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad') -('a(?=c|d).', 'abad', 0, 'found', 'ad') -=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad') ('a(?:b|c|d)(.)', 'ace', 0, 'g1', 'e') ('a(?:b|c|d)*(.)', 'ace', 0, 'g1', 'e') ('a(?:b|c|d)+?(.)', 'ace', 0, 'g1', 'e') -('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1+"-"+g2', 'c-e') +('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', 0, 'g1 + g2', 'ce') ('^(.+)?B', 'AB', 0, 'g1', 'A') +('w(?# comment', 'w', 2) +('w(?# comment 1)xy(?# comment 2)z', 'wxyz', 0, 'found', 'wxyz') +('w# comment 1\012 x(?x) y\012\011# comment 2\012\011z', 'wxyz', 0, 'found', 'wxyz') +('^abc', 'jkl\012abc\012xyz', 1) +('(?m)^abc', 'jkl\012abc\012xyz', 0, 'found', 'abc') +('a.b', 'a\012b', 1) +('(?s)a.b', 'a\012b', 0, 'found', 'a\012b') diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index fde2955868..c4510b39d6 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -262,7 +262,7 @@ tests = [ ('(*)b', '-', SYNTAX_ERROR), ('$b', 'b', FAIL), ('a\\', '-', SYNTAX_ERROR), - ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-'), + ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), @@ -306,21 +306,22 @@ tests = [ ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), - ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-'), - ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'), + ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), + ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), - ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-'), + ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'), ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'), - ('((((((((((a))))))))))\\41', 'aa', FAIL), - ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), +# Python does not have the same rules for \\41 so this is a syntax error +# ('((((((((((a))))))))))\\41', 'aa', FAIL), +# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), ('multiple words of text', 'uh-uh', FAIL), ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), - ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, '(g2, g1)', '(b, a)'), + ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), ('[k]', 'ab', FAIL), ##('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'), ##('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'), @@ -389,7 +390,7 @@ tests = [ ('(?i)(*)b', '-', SYNTAX_ERROR), ('(?i)$b', 'B', FAIL), ('(?i)a\\', '-', SYNTAX_ERROR), - ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-'), + ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'), ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'), ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'), ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'), @@ -409,7 +410,7 @@ tests = [ ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), - ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-'), + ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'), ('(?i))(', '-', SYNTAX_ERROR), ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'), ('(?i)abc', '', FAIL), @@ -436,35 +437,62 @@ tests = [ ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'), ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'), ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'), - ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-'), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'), + ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL), ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-'), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'), ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), - ('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), - ('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), + #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), + #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), ('(?i)multiple words of text', 'UH-UH', FAIL), ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'), ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'), - ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, '(g2, g1)', '(B, A)'), + ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'), ('(?i)[k]', 'AB', FAIL), ##('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'), ##('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'), ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'), ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), - ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), - ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), - ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), + # these zero-width assertions are not supported + #('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), + #('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), + #('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'), ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'), ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'), - ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1+"-"+g2', 'c-e'), + ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'), ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), + + # Comments using the (?#...) syntax + + ('w(?# comment', 'w', SYNTAX_ERROR), + ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'), + + # Comments using the x embedded pattern modifier (in an unusual place too) + + ("""w# comment 1 + x(?x) y + # comment 2 + z""", 'wxyz', SUCCEED, 'found', 'wxyz'), + + # using the m embedded pattern modifier + + ('^abc', """jkl +abc +xyz""", FAIL), + ('(?m)^abc', """jkl +abc +xyz""", SUCCEED, 'found', 'abc'), + + # using the s embedded pattern modifier + + ('a.b', 'a\nb', FAIL), + ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), ] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f1b270df3b..6b8c65d71a 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,5 +1,10 @@ +#!/usr/local/bin/python +# -*- mode: python -*- +# $Id$ + from test_support import verbose import re +import reop import sys, os, string, traceback from re_tests import * @@ -7,6 +12,7 @@ if verbose: print 'Running re_tests test suite' for t in tests: print t + sys.stdout.flush() pattern=s=outcome=repl=expected=None if len(t)==5: pattern, s, outcome, repl, expected = t @@ -21,6 +27,8 @@ for t in tests: if outcome==SYNTAX_ERROR: pass # Expected a syntax error else: print '=== Syntax error:', t + except KeyboardInterrupt: + raise KeyboardInterrupt except: print '*** Unexpected error ***' if verbose: @@ -28,7 +36,7 @@ for t in tests: else: try: result=obj.search(s) - except regex.error, msg: + except (re.error, reop.error), msg: print '=== Unexpected exception', t, repr(msg) if outcome==SYNTAX_ERROR: # This should have been a syntax error; forget it. @@ -41,22 +49,26 @@ for t in tests: # Matched, as expected, so now we compute the # result string and compare it to our expected result. start, end = result.span(0) - vardict={'found': result.group(0), 'groups': result.group()} + vardict={'found': result.group(0), + 'groups': result.group(), + 'flags': result.re.flags} for i in range(1, 100): try: gi = result.group(i) # Special hack because else the string concat fails: - if gi is None: gi = "None" + if gi is None: + gi = "None" except IndexError: gi = "Error" vardict['g%d' % i] = gi for i in result.re.groupindex.keys(): try: gi = result.group(i) + if gi is None: + gi = "None" except IndexError: - pass - else: - vardict[i] = str(gi) + gi = "Error" + vardict[i] = gi repl=eval(repl, vardict) if repl!=expected: print '=== grouping error', t, |