summaryrefslogtreecommitdiff
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py95
1 files changed, 81 insertions, 14 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f8a564762f..1f739672a2 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,13 +1,15 @@
from test.test_support import verbose, run_unittest, import_module
import re
from re import Scanner
-import sys, os, traceback
+import sys
+import string
+import traceback
from weakref import proxy
# Misc tests from Tim Peters' re.doc
# WARNING: Don't change details in these tests if you don't know
-# what you're doing. Some of these tests were carefuly modeled to
+# what you're doing. Some of these tests were carefully modeled to
# cover most of the code.
import unittest
@@ -429,17 +431,61 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.search("\s(b)", " b").group(1), "b")
self.assertEqual(re.search("a\s", "a ").group(0), "a ")
- def test_re_escape(self):
- p=""
- for i in range(0, 256):
- p = p + chr(i)
- self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
- True)
- self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
+ def assertMatch(self, pattern, text, match=None, span=None,
+ matcher=re.match):
+ if match is None and span is None:
+ # the pattern matches the whole text
+ match = text
+ span = (0, len(text))
+ elif match is None or span is None:
+ raise ValueError('If match is not None, span should be specified '
+ '(and vice versa).')
+ m = matcher(pattern, text)
+ self.assertTrue(m)
+ self.assertEqual(m.group(), match)
+ self.assertEqual(m.span(), span)
- pat=re.compile(re.escape(p))
- self.assertEqual(pat.match(p) is not None, True)
- self.assertEqual(pat.match(p).span(), (0,256))
+ def test_re_escape(self):
+ alnum_chars = string.ascii_letters + string.digits
+ p = u''.join(unichr(i) for i in range(256))
+ for c in p:
+ if c in alnum_chars:
+ self.assertEqual(re.escape(c), c)
+ elif c == u'\x00':
+ self.assertEqual(re.escape(c), u'\\000')
+ else:
+ self.assertEqual(re.escape(c), u'\\' + c)
+ self.assertMatch(re.escape(c), c)
+ self.assertMatch(re.escape(p), p)
+
+ def test_re_escape_byte(self):
+ alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+ p = ''.join(chr(i) for i in range(256))
+ for b in p:
+ if b in alnum_chars:
+ self.assertEqual(re.escape(b), b)
+ elif b == b'\x00':
+ self.assertEqual(re.escape(b), b'\\000')
+ else:
+ self.assertEqual(re.escape(b), b'\\' + b)
+ self.assertMatch(re.escape(b), b)
+ self.assertMatch(re.escape(p), p)
+
+ def test_re_escape_non_ascii(self):
+ s = u'xxx\u2620\u2620\u2620xxx'
+ s_escaped = re.escape(s)
+ self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+ self.assertMatch(s_escaped, s)
+ self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
+ u'x\u2620\u2620\u2620x', (2, 7), re.search)
+
+ def test_re_escape_non_ascii_bytes(self):
+ b = u'y\u2620y\u2620y'.encode('utf-8')
+ b_escaped = re.escape(b)
+ self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+ self.assertMatch(b_escaped, b)
+ res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+ self.assertEqual(len(res), 2)
def test_pickling(self):
import pickle
@@ -603,7 +649,7 @@ class ReTests(unittest.TestCase):
unicode
except NameError:
return # no problem if we have no unicode
- self.assert_(re.compile('bug_926075') is not
+ self.assertTrue(re.compile('bug_926075') is not
re.compile(eval("u'bug_926075'")))
def test_bug_931848(self):
@@ -630,6 +676,27 @@ class ReTests(unittest.TestCase):
self.assertEqual(iter.next().span(), (4, 4))
self.assertRaises(StopIteration, iter.next)
+ def test_bug_6561(self):
+ # '\d' should match characters in Unicode category 'Nd'
+ # (Number, Decimal Digit), but not those in 'Nl' (Number,
+ # Letter) or 'No' (Number, Other).
+ decimal_digits = [
+ u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+ u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+ u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+ ]
+ for x in decimal_digits:
+ self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
+
+ not_decimal_digits = [
+ u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+ u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+ u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+ u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+ ]
+ for x in not_decimal_digits:
+ self.assertIsNone(re.match('^\d$', x, re.UNICODE))
+
def test_empty_array(self):
# SF buf 1647541
import array
@@ -691,7 +758,7 @@ class ReTests(unittest.TestCase):
self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
def run_re_tests():
- from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
+ from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
print 'Running re_tests test suite'
else: