diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/re.py | 12 | ||||
-rw-r--r-- | Lib/sre_compile.py | 24 | ||||
-rw-r--r-- | Lib/sre_constants.py | 10 | ||||
-rw-r--r-- | Lib/test/test_re.py | 32 |
4 files changed, 58 insertions, 20 deletions
@@ -268,9 +268,7 @@ _MAXCACHE = 512 def _compile(pattern, flags): # internal: compile pattern try: - p, loc = _cache[type(pattern), pattern, flags] - if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): - return p + return _cache[type(pattern), pattern, flags] except KeyError: pass if isinstance(pattern, _pattern_type): @@ -284,13 +282,7 @@ def _compile(pattern, flags): if not (flags & DEBUG): if len(_cache) >= _MAXCACHE: _cache.clear() - if p.flags & LOCALE: - if not _locale: - return p - loc = _locale.setlocale(_locale.LC_CTYPE) - else: - loc = None - _cache[type(pattern), pattern, flags] = p, loc + _cache[type(pattern), pattern, flags] = p return p @functools.lru_cache(_MAXCACHE) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 2cc39007ac..d7ee4e8cb6 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -78,7 +78,13 @@ def _compile(code, pattern, flags): fixes = None for op, av in pattern: if op in LITERAL_CODES: - if flags & SRE_FLAG_IGNORECASE: + if not flags & SRE_FLAG_IGNORECASE: + emit(op) + emit(av) + elif flags & SRE_FLAG_LOCALE: + emit(OP_LOC_IGNORE[op]) + emit(av) + else: lo = _sre.getlower(av, flags) if fixes and lo in fixes: emit(IN_IGNORE) @@ -93,17 +99,17 @@ def _compile(code, pattern, flags): else: emit(OP_IGNORE[op]) emit(lo) - else: - emit(op) - emit(av) elif op is IN: - if flags & SRE_FLAG_IGNORECASE: - emit(OP_IGNORE[op]) - def fixup(literal, flags=flags): - return _sre.getlower(literal, flags) - else: + if not flags & SRE_FLAG_IGNORECASE: emit(op) fixup = None + elif flags & SRE_FLAG_LOCALE: + emit(IN_LOC_IGNORE) + fixup = None + else: + emit(IN_IGNORE) + def fixup(literal, flags=flags): + return _sre.getlower(literal, flags) skip = _len(code); emit(0) _compile_charset(av, flags, code, fixup, fixes) code[skip] = _len(code) - skip diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index fc684ae96f..b0164312d0 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -13,7 +13,7 @@ # update when constants are added or removed -MAGIC = 20140917 +MAGIC = 20170530 from _sre import MAXREPEAT, MAXGROUPS @@ -87,6 +87,9 @@ OPCODES = _makecodes(""" SUBPATTERN MIN_REPEAT_ONE RANGE_IGNORE + LITERAL_LOC_IGNORE + NOT_LITERAL_LOC_IGNORE + IN_LOC_IGNORE MIN_REPEAT MAX_REPEAT """) @@ -124,6 +127,11 @@ OP_IGNORE = { RANGE: RANGE_IGNORE, } +OP_LOC_IGNORE = { + LITERAL: LITERAL_LOC_IGNORE, + NOT_LITERAL: NOT_LITERAL_LOC_IGNORE, +} + AT_MULTILINE = { AT_BEGINNING: AT_BEGINNING_LINE, AT_END: AT_END_LINE diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index da5c953ced..7601dc88c7 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1730,6 +1730,38 @@ SUBPATTERN None 0 0 self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) + def test_locale_compiled(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + for loc in 'en_US.iso88591', 'en_US.utf8': + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs %s locale' % loc) + + locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') + p1 = re.compile(b'\xc5\xe5', re.L|re.I) + p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I) + p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I) + p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I) + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertTrue(p.match(b'\xe5\xe5')) + self.assertTrue(p.match(b'\xc5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + + locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') + for p in p1, p2, p3: + self.assertTrue(p.match(b'\xc5\xe5')) + self.assertIsNone(p.match(b'\xe5\xe5')) + self.assertIsNone(p.match(b'\xc5\xc5')) + self.assertTrue(p4.match(b'\xe5\xc5')) + self.assertIsNone(p4.match(b'\xe5\xe5')) + self.assertIsNone(p4.match(b'\xc5\xc5')) + def test_error(self): with self.assertRaises(re.error) as cm: re.compile('(\u20ac))') |