diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-09-14 17:40:44 +0300 |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-09-14 17:40:44 +0300 |
commit | 7644ff1fb1860ffb5ce83342349f8b905dac633f (patch) | |
tree | fd105b733e3a2e77881c830bc1e30c96ecc5a178 /Lib/test/test_re.py | |
parent | ed5ea15bda1b0b47edf1354ee8a3e1e8bed0993c (diff) | |
download | cpython-git-7644ff1fb1860ffb5ce83342349f8b905dac633f.tar.gz |
Now re tests work again on Unicode-disabled build.
Simplified existing detections of Unicode-disabled build.
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r-- | Lib/test/test_re.py | 99 |
1 files changed, 52 insertions, 47 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 8c556f53fb..f285c6bed2 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,6 +1,6 @@ from test.test_support import verbose, run_unittest, import_module from test.test_support import precisionbigmemtest, _2G, cpython_only -from test.test_support import captured_stdout +from test.test_support import captured_stdout, have_unicode, requires_unicode, u import re from re import Scanner import sre_constants @@ -86,6 +86,7 @@ class ReTests(unittest.TestCase): self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') + @requires_unicode def test_bug_1140(self): # re.sub(x, y, u'') should return u'', not '', and # re.sub(x, y, '') should return '', not u''. @@ -376,10 +377,11 @@ class ReTests(unittest.TestCase): "abcd abc bcd bx", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.LOCALE).group(1), "bx") - self.assertEqual(re.search(r"\b(b.)\b", - "abcd abc bcd bx", re.UNICODE).group(1), "bx") - self.assertEqual(re.search(r"\B(b.)\B", - "abc bcd bc abxd", re.UNICODE).group(1), "bx") + if have_unicode: + self.assertEqual(re.search(r"\b(b.)\b", + "abcd abc bcd bx", re.UNICODE).group(1), "bx") + self.assertEqual(re.search(r"\B(b.)\B", + "abc bcd bc abxd", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) @@ -394,8 +396,9 @@ class ReTests(unittest.TestCase): "1aa! a").group(0), "1aa! a") self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.LOCALE).group(0), "1aa! a") - self.assertEqual(re.search(r"\d\D\w\W\s\S", - "1aa! a", re.UNICODE).group(0), "1aa! a") + if have_unicode: + self.assertEqual(re.search(r"\d\D\w\W\s\S", + "1aa! a", re.UNICODE).group(0), "1aa! a") def test_string_boundaries(self): # See http://bugs.python.org/issue10713 @@ -423,13 +426,14 @@ class ReTests(unittest.TestCase): # Can match around the whitespace. self.assertEqual(len(re.findall(r"\B", " ")), 2) + @requires_unicode def test_bigcharset(self): - self.assertEqual(re.match(u"([\u2222\u2223])", - u"\u2222").group(1), u"\u2222") - self.assertEqual(re.match(u"([\u2222\u2223])", - u"\u2222", re.UNICODE).group(1), u"\u2222") + self.assertEqual(re.match(u(r"([\u2222\u2223])"), + unichr(0x2222)).group(1), unichr(0x2222)) + self.assertEqual(re.match(u(r"([\u2222\u2223])"), + unichr(0x2222), re.UNICODE).group(1), unichr(0x2222)) r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255))) - self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01") + self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01)) def test_big_codesize(self): # Issue #1160 @@ -476,7 +480,8 @@ class ReTests(unittest.TestCase): import _sre self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) - self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) + if have_unicode: + self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") @@ -503,8 +508,9 @@ class ReTests(unittest.TestCase): self.assertEqual(m.group(), match) self.assertEqual(m.span(), span) + @requires_unicode def test_re_escape(self): - alnum_chars = string.ascii_letters + string.digits + alnum_chars = unicode(string.ascii_letters + string.digits) p = u''.join(unichr(i) for i in range(256)) for c in p: if c in alnum_chars: @@ -517,7 +523,7 @@ class ReTests(unittest.TestCase): self.assertMatch(re.escape(p), p) def test_re_escape_byte(self): - alnum_chars = (string.ascii_letters + string.digits).encode('ascii') + alnum_chars = string.ascii_letters + string.digits p = ''.join(chr(i) for i in range(256)) for b in p: if b in alnum_chars: @@ -529,20 +535,21 @@ class ReTests(unittest.TestCase): self.assertMatch(re.escape(b), b) self.assertMatch(re.escape(p), p) + @requires_unicode def test_re_escape_non_ascii(self): - s = u'xxx\u2620\u2620\u2620xxx' + s = u(r'xxx\u2620\u2620\u2620xxx') s_escaped = re.escape(s) - self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') + self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx')) self.assertMatch(s_escaped, s) - self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, - u'x\u2620\u2620\u2620x', (2, 7), re.search) + self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s, + u(r'x\u2620\u2620\u2620x'), (2, 7), re.search) def test_re_escape_non_ascii_bytes(self): - b = u'y\u2620y\u2620y'.encode('utf-8') + b = b'y\xe2\x98\xa0y\xe2\x98\xa0y' b_escaped = re.escape(b) self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') self.assertMatch(b_escaped, b) - res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) + res = re.findall(re.escape(b'\xe2\x98\xa0'), b) self.assertEqual(len(res), 2) def test_pickling(self): @@ -621,8 +628,9 @@ class ReTests(unittest.TestCase): # non-recursive scheme was implemented. self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) + @requires_unicode def test_bug_612074(self): - pat=u"["+re.escape(u"\u2039")+u"]" + pat=u"["+re.escape(unichr(0x2039))+u"]" self.assertEqual(re.compile(pat) and 1, 1) def test_stack_overflow(self): @@ -696,12 +704,9 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), ('a', None, None)) + @requires_unicode def test_bug_764548(self): # bug 764548, re.compile() barfs on str/unicode subclasses - try: - unicode - except NameError: - self.skipTest('no problem if we have no unicode') class my_unicode(unicode): pass pat = re.compile(my_unicode("abc")) self.assertIsNone(pat.match("xyz")) @@ -711,20 +716,14 @@ class ReTests(unittest.TestCase): self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) + @requires_unicode def test_bug_926075(self): - try: - unicode - except NameError: - self.skipTest('no problem if we have no unicode') self.assertIsNot(re.compile('bug_926075'), - re.compile(eval("u'bug_926075'"))) + re.compile(u'bug_926075')) + @requires_unicode def test_bug_931848(self): - try: - unicode - except NameError: - self.skipTest('no problem if we have no unicode') - pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') + pattern = u(r"[\u002E\u3002\uFF0E\uFF61]") self.assertEqual(re.compile(pattern).split("a.b.c"), ['a','b','c']) @@ -743,23 +742,24 @@ class ReTests(unittest.TestCase): self.assertEqual(iter.next().span(), (4, 4)) self.assertRaises(StopIteration, iter.next) + @requires_unicode def test_bug_6561(self): # '\d' should match characters in Unicode category 'Nd' # (Number, Decimal Digit), but not those in 'Nl' (Number, # Letter) or 'No' (Number, Other). decimal_digits = [ - u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' - u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' - u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' + unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd' + unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd' + unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' ] for x in decimal_digits: self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) not_decimal_digits = [ - u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' - u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' - u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' - u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' + unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl' + unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' + unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No' + unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' ] for x in not_decimal_digits: self.assertIsNone(re.match('^\d$', x, re.UNICODE)) @@ -767,11 +767,15 @@ class ReTests(unittest.TestCase): def test_empty_array(self): # SF buf 1647541 import array - for typecode in 'cbBuhHiIlLfd': + typecodes = 'cbBhHiIlLfd' + if have_unicode: + typecodes += 'u' + for typecode in typecodes: a = array.array(typecode) self.assertIsNone(re.compile("bla").match(a)) self.assertEqual(re.compile("").match(a).groups(), ()) + @requires_unicode def test_inline_flags(self): # Bug #1700 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow @@ -906,9 +910,10 @@ class ReTests(unittest.TestCase): pattern = '.' + reps + mod + 'yz' self.assertEqual(re.compile(pattern, re.S).findall('xyz'), ['xyz'], msg=pattern) - pattern = pattern.encode() - self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'), - [b'xyz'], msg=pattern) + if have_unicode: + pattern = unicode(pattern) + self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'), + [u'xyz'], msg=pattern) def test_bug_2537(self): |