summaryrefslogtreecommitdiff
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-09-14 17:40:44 +0300
committerSerhiy Storchaka <storchaka@gmail.com>2014-09-14 17:40:44 +0300
commit7644ff1fb1860ffb5ce83342349f8b905dac633f (patch)
treefd105b733e3a2e77881c830bc1e30c96ecc5a178 /Lib/test/test_re.py
parented5ea15bda1b0b47edf1354ee8a3e1e8bed0993c (diff)
downloadcpython-git-7644ff1fb1860ffb5ce83342349f8b905dac633f.tar.gz
Now re tests work again on Unicode-disabled build.
Simplified existing detections of Unicode-disabled build.
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py99
1 files changed, 52 insertions, 47 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 8c556f53fb..f285c6bed2 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,6 @@
from test.test_support import verbose, run_unittest, import_module
from test.test_support import precisionbigmemtest, _2G, cpython_only
-from test.test_support import captured_stdout
+from test.test_support import captured_stdout, have_unicode, requires_unicode, u
import re
from re import Scanner
import sre_constants
@@ -86,6 +86,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
+ @requires_unicode
def test_bug_1140(self):
# re.sub(x, y, u'') should return u'', not '', and
# re.sub(x, y, '') should return '', not u''.
@@ -376,10 +377,11 @@ class ReTests(unittest.TestCase):
"abcd abc bcd bx", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.LOCALE).group(1), "bx")
- self.assertEqual(re.search(r"\b(b.)\b",
- "abcd abc bcd bx", re.UNICODE).group(1), "bx")
- self.assertEqual(re.search(r"\B(b.)\B",
- "abc bcd bc abxd", re.UNICODE).group(1), "bx")
+ if have_unicode:
+ self.assertEqual(re.search(r"\b(b.)\b",
+ "abcd abc bcd bx", re.UNICODE).group(1), "bx")
+ self.assertEqual(re.search(r"\B(b.)\B",
+ "abc bcd bc abxd", re.UNICODE).group(1), "bx")
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -394,8 +396,9 @@ class ReTests(unittest.TestCase):
"1aa! a").group(0), "1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.LOCALE).group(0), "1aa! a")
- self.assertEqual(re.search(r"\d\D\w\W\s\S",
- "1aa! a", re.UNICODE).group(0), "1aa! a")
+ if have_unicode:
+ self.assertEqual(re.search(r"\d\D\w\W\s\S",
+ "1aa! a", re.UNICODE).group(0), "1aa! a")
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
@@ -423,13 +426,14 @@ class ReTests(unittest.TestCase):
# Can match around the whitespace.
self.assertEqual(len(re.findall(r"\B", " ")), 2)
+ @requires_unicode
def test_bigcharset(self):
- self.assertEqual(re.match(u"([\u2222\u2223])",
- u"\u2222").group(1), u"\u2222")
- self.assertEqual(re.match(u"([\u2222\u2223])",
- u"\u2222", re.UNICODE).group(1), u"\u2222")
+ self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+ unichr(0x2222)).group(1), unichr(0x2222))
+ self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+ unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
- self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
+ self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
def test_big_codesize(self):
# Issue #1160
@@ -476,7 +480,8 @@ class ReTests(unittest.TestCase):
import _sre
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
- self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+ if have_unicode:
+ self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
@@ -503,8 +508,9 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
+ @requires_unicode
def test_re_escape(self):
- alnum_chars = string.ascii_letters + string.digits
+ alnum_chars = unicode(string.ascii_letters + string.digits)
p = u''.join(unichr(i) for i in range(256))
for c in p:
if c in alnum_chars:
@@ -517,7 +523,7 @@ class ReTests(unittest.TestCase):
self.assertMatch(re.escape(p), p)
def test_re_escape_byte(self):
- alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+ alnum_chars = string.ascii_letters + string.digits
p = ''.join(chr(i) for i in range(256))
for b in p:
if b in alnum_chars:
@@ -529,20 +535,21 @@ class ReTests(unittest.TestCase):
self.assertMatch(re.escape(b), b)
self.assertMatch(re.escape(p), p)
+ @requires_unicode
def test_re_escape_non_ascii(self):
- s = u'xxx\u2620\u2620\u2620xxx'
+ s = u(r'xxx\u2620\u2620\u2620xxx')
s_escaped = re.escape(s)
- self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+ self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
self.assertMatch(s_escaped, s)
- self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
- u'x\u2620\u2620\u2620x', (2, 7), re.search)
+ self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
+ u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
def test_re_escape_non_ascii_bytes(self):
- b = u'y\u2620y\u2620y'.encode('utf-8')
+ b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
b_escaped = re.escape(b)
self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
self.assertMatch(b_escaped, b)
- res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+ res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
self.assertEqual(len(res), 2)
def test_pickling(self):
@@ -621,8 +628,9 @@ class ReTests(unittest.TestCase):
# non-recursive scheme was implemented.
self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
+ @requires_unicode
def test_bug_612074(self):
- pat=u"["+re.escape(u"\u2039")+u"]"
+ pat=u"["+re.escape(unichr(0x2039))+u"]"
self.assertEqual(re.compile(pat) and 1, 1)
def test_stack_overflow(self):
@@ -696,12 +704,9 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
('a', None, None))
+ @requires_unicode
def test_bug_764548(self):
# bug 764548, re.compile() barfs on str/unicode subclasses
- try:
- unicode
- except NameError:
- self.skipTest('no problem if we have no unicode')
class my_unicode(unicode): pass
pat = re.compile(my_unicode("abc"))
self.assertIsNone(pat.match("xyz"))
@@ -711,20 +716,14 @@ class ReTests(unittest.TestCase):
self.assertEqual([item.group(0) for item in iter],
[":", "::", ":::"])
+ @requires_unicode
def test_bug_926075(self):
- try:
- unicode
- except NameError:
- self.skipTest('no problem if we have no unicode')
self.assertIsNot(re.compile('bug_926075'),
- re.compile(eval("u'bug_926075'")))
+ re.compile(u'bug_926075'))
+ @requires_unicode
def test_bug_931848(self):
- try:
- unicode
- except NameError:
- self.skipTest('no problem if we have no unicode')
- pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+ pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
self.assertEqual(re.compile(pattern).split("a.b.c"),
['a','b','c'])
@@ -743,23 +742,24 @@ class ReTests(unittest.TestCase):
self.assertEqual(iter.next().span(), (4, 4))
self.assertRaises(StopIteration, iter.next)
+ @requires_unicode
def test_bug_6561(self):
# '\d' should match characters in Unicode category 'Nd'
# (Number, Decimal Digit), but not those in 'Nl' (Number,
# Letter) or 'No' (Number, Other).
decimal_digits = [
- u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
- u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
- u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+ unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
+ unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
+ unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
not_decimal_digits = [
- u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
- u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
- u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
- u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+ unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+ unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+ unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
+ unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
self.assertIsNone(re.match('^\d$', x, re.UNICODE))
@@ -767,11 +767,15 @@ class ReTests(unittest.TestCase):
def test_empty_array(self):
# SF buf 1647541
import array
- for typecode in 'cbBuhHiIlLfd':
+ typecodes = 'cbBhHiIlLfd'
+ if have_unicode:
+ typecodes += 'u'
+ for typecode in typecodes:
a = array.array(typecode)
self.assertIsNone(re.compile("bla").match(a))
self.assertEqual(re.compile("").match(a).groups(), ())
+ @requires_unicode
def test_inline_flags(self):
# Bug #1700
upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
@@ -906,9 +910,10 @@ class ReTests(unittest.TestCase):
pattern = '.' + reps + mod + 'yz'
self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
['xyz'], msg=pattern)
- pattern = pattern.encode()
- self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
- [b'xyz'], msg=pattern)
+ if have_unicode:
+ pattern = unicode(pattern)
+ self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
+ [u'xyz'], msg=pattern)
def test_bug_2537(self):