From ab14088141ab749763e35b7a49e79c368940e12d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 11 Nov 2014 21:13:28 +0200 Subject: Minor code clean up and improvements in the re module. --- Lib/re.py | 2 +- Lib/sre_compile.py | 16 ++++++---------- Lib/sre_parse.py | 8 ++++---- Lib/test/test_re.py | 4 ++-- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/Lib/re.py b/Lib/re.py index a4de5cc3ef..788fa6bd78 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -363,7 +363,7 @@ class Scanner: append = result.append match = self.scanner.scanner(string).match i = 0 - while 1: + while True: m = match() if not m: break diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index f5aef7a2e5..1241a01c3e 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -16,11 +16,6 @@ from sre_constants import * assert _sre.MAGIC == MAGIC, "SRE module mismatch" -if _sre.CODESIZE == 2: - MAXCODE = 65535 -else: - MAXCODE = 0xFFFFFFFF - _LITERAL_CODES = {LITERAL, NOT_LITERAL} _REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT} _SUCCESS_CODES = {SUCCESS, FAILURE} @@ -191,7 +186,7 @@ def _compile(code, pattern, flags): emit(JUMP) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip - emit(0) # end of branch + emit(FAILURE) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: @@ -374,6 +369,7 @@ def _optimize_charset(charset, fixup, fixes): return out _CODEBITS = _sre.CODESIZE * 8 +MAXCODE = (1 << _CODEBITS) - 1 _BITS_TRANS = b'0' + b'1' * 255 def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): s = bits.translate(_BITS_TRANS)[::-1] @@ -477,9 +473,9 @@ def _compile_info(code, pattern, flags): elif op is IN: charset = av ## if prefix: -## print "*** PREFIX", prefix, prefix_skip +## print("*** PREFIX", prefix, prefix_skip) ## if charset: -## print "*** CHARSET", charset +## print("*** CHARSET", charset) # add an info block emit = code.append emit(INFO) @@ -489,9 +485,9 @@ def _compile_info(code, pattern, flags): if prefix: mask = SRE_INFO_PREFIX if len(prefix) == prefix_skip == len(pattern.data): - mask = mask + SRE_INFO_LITERAL + mask = mask | SRE_INFO_LITERAL elif charset: - mask = mask + SRE_INFO_CHARSET + mask = mask | SRE_INFO_CHARSET emit(mask) # pattern length if lo < MAXCODE: diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index aa2d64bb40..45411f89f1 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -103,18 +103,18 @@ class SubPattern: seqtypes = (tuple, list) for op, av in self.data: print(level*" " + str(op), end='') - if op == IN: + if op is IN: # member sublanguage print() for op, a in av: print((level+1)*" " + str(op), a) - elif op == BRANCH: + elif op is BRANCH: print() for i, a in enumerate(av[1]): if i: print(level*" " + "OR") a.dump(level+1) - elif op == GROUPREF_EXISTS: + elif op is GROUPREF_EXISTS: condgroup, item_yes, item_no = av print('', condgroup) item_yes.dump(level+1) @@ -607,7 +607,7 @@ def _parse(source, state): item = subpattern[-1:] else: item = None - if not item or (_len(item) == 1 and item[0][0] == AT): + if not item or (_len(item) == 1 and item[0][0] is AT): raise source.error("nothing to repeat", source.tell() - here + len(this)) if item[0][0] in _REPEATCODES: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index b30abadd52..7bc1e935d4 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1101,8 +1101,8 @@ class ReTests(unittest.TestCase): def test_inline_flags(self): # Bug #1700 - upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow - lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow + upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below + lower_char = '\u1ea1' # Latin Small Letter A with Dot Below p = re.compile(upper_char, re.I | re.U) q = p.match(lower_char) -- cgit v1.2.1