diff options
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r-- | Lib/test/test_re.py | 95 |
1 files changed, 81 insertions, 14 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f8a564762f..1f739672a2 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,13 +1,15 @@ from test.test_support import verbose, run_unittest, import_module import re from re import Scanner -import sys, os, traceback +import sys +import string +import traceback from weakref import proxy # Misc tests from Tim Peters' re.doc # WARNING: Don't change details in these tests if you don't know -# what you're doing. Some of these tests were carefuly modeled to +# what you're doing. Some of these tests were carefully modeled to # cover most of the code. import unittest @@ -429,17 +431,61 @@ class ReTests(unittest.TestCase): self.assertEqual(re.search("\s(b)", " b").group(1), "b") self.assertEqual(re.search("a\s", "a ").group(0), "a ") - def test_re_escape(self): - p="" - for i in range(0, 256): - p = p + chr(i) - self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, - True) - self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1)) + def assertMatch(self, pattern, text, match=None, span=None, + matcher=re.match): + if match is None and span is None: + # the pattern matches the whole text + match = text + span = (0, len(text)) + elif match is None or span is None: + raise ValueError('If match is not None, span should be specified ' + '(and vice versa).') + m = matcher(pattern, text) + self.assertTrue(m) + self.assertEqual(m.group(), match) + self.assertEqual(m.span(), span) - pat=re.compile(re.escape(p)) - self.assertEqual(pat.match(p) is not None, True) - self.assertEqual(pat.match(p).span(), (0,256)) + def test_re_escape(self): + alnum_chars = string.ascii_letters + string.digits + p = u''.join(unichr(i) for i in range(256)) + for c in p: + if c in alnum_chars: + self.assertEqual(re.escape(c), c) + elif c == u'\x00': + self.assertEqual(re.escape(c), u'\\000') + else: + self.assertEqual(re.escape(c), u'\\' + c) + self.assertMatch(re.escape(c), c) + self.assertMatch(re.escape(p), p) + + def test_re_escape_byte(self): + alnum_chars = (string.ascii_letters + string.digits).encode('ascii') + p = ''.join(chr(i) for i in range(256)) + for b in p: + if b in alnum_chars: + self.assertEqual(re.escape(b), b) + elif b == b'\x00': + self.assertEqual(re.escape(b), b'\\000') + else: + self.assertEqual(re.escape(b), b'\\' + b) + self.assertMatch(re.escape(b), b) + self.assertMatch(re.escape(p), p) + + def test_re_escape_non_ascii(self): + s = u'xxx\u2620\u2620\u2620xxx' + s_escaped = re.escape(s) + self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') + self.assertMatch(s_escaped, s) + self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, + u'x\u2620\u2620\u2620x', (2, 7), re.search) + + def test_re_escape_non_ascii_bytes(self): + b = u'y\u2620y\u2620y'.encode('utf-8') + b_escaped = re.escape(b) + self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') + self.assertMatch(b_escaped, b) + res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) + self.assertEqual(len(res), 2) def test_pickling(self): import pickle @@ -603,7 +649,7 @@ class ReTests(unittest.TestCase): unicode except NameError: return # no problem if we have no unicode - self.assert_(re.compile('bug_926075') is not + self.assertTrue(re.compile('bug_926075') is not re.compile(eval("u'bug_926075'"))) def test_bug_931848(self): @@ -630,6 +676,27 @@ class ReTests(unittest.TestCase): self.assertEqual(iter.next().span(), (4, 4)) self.assertRaises(StopIteration, iter.next) + def test_bug_6561(self): + # '\d' should match characters in Unicode category 'Nd' + # (Number, Decimal Digit), but not those in 'Nl' (Number, + # Letter) or 'No' (Number, Other). + decimal_digits = [ + u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' + u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' + u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' + ] + for x in decimal_digits: + self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) + + not_decimal_digits = [ + u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' + u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' + u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' + u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' + ] + for x in not_decimal_digits: + self.assertIsNone(re.match('^\d$', x, re.UNICODE)) + def test_empty_array(self): # SF buf 1647541 import array @@ -691,7 +758,7 @@ class ReTests(unittest.TestCase): self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) def run_re_tests(): - from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR + from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: print 'Running re_tests test suite' else: |