From 898ff03e1e7925ecde3da66327d3cdc7e07625ba Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 5 May 2017 08:53:40 +0300 Subject: bpo-30215: Make re.compile() locale agnostic. (#1361) Compiled regular expression objects with the re.LOCALE flag no longer depend on the locale at compile time. Only the locale at matching time affects the result of matching. --- Lib/test/test_re.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'Lib/test/test_re.py') diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index da5c953ced..7601dc88c7 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1730,6 +1730,38 @@ SUBPATTERN None 0 0 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) + def test_locale_compiled(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + for loc in 'en_US.iso88591', 'en_US.utf8': + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs %s locale' % loc) + + locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') + p1 = re.compile(b'\xc5\xe5', re.L|re.I) + p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I) + p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I) + p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I) + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertTrue(p.match(b'\xe5\xe5')) + self.assertTrue(p.match(b'\xc5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + + locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertIsNone(p.match(b'\xe5\xe5')) + self.assertIsNone(p.match(b'\xc5\xc5')) + self.assertTrue(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + def test_error(self): with self.assertRaises(re.error) as cm: re.compile('(\u20ac))') -- cgit v1.2.1