diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 +0000 |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 +0000 |
commit | 7cafe4d7e466996d5fc32e871fe834e0e0c94282 (patch) | |
tree | dc3572d1d6bd95316c7a044cfd8639be014e3520 /Lib/sre.py | |
parent | b19948b7fb96cfc2ed69bb58f2205d1399f1f9f5 (diff) | |
download | cpython-git-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.gz |
- actually enabled charset anchors in the engine (still not
used by the code generator)
- changed max repeat value in engine (to match earlier array fix)
- added experimental "which part matched?" mechanism to sre; see
http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
or python-dev for details.
Diffstat (limited to 'Lib/sre.py')
-rw-r--r-- | Lib/sre.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/Lib/sre.py b/Lib/sre.py index a09184b2a0..79f12a1b0f 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -155,3 +155,34 @@ def _pickle(p): return _compile, (p.pattern, p.flags) copy_reg.pickle(type(_compile("")), _pickle, _compile) + +# -------------------------------------------------------------------- +# experimental stuff (see python-dev discussions for details) + +class Scanner: + def __init__(self, lexicon): + self.lexicon = lexicon + p = [] + for phrase, action in lexicon: + p.append("(?:%s)(?P#%d)" % (phrase, len(p))) + self.scanner = sre.compile("|".join(p)) + def scan(self, string): + result = [] + append = result.append + match = self.scanner.match + i = 0 + while 1: + m = match(string, i) + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.index][1] + if callable(action): + self.match = match + action = action(self, m.group()) + if action is not None: + append(action) + i = j + return result, string[i:] |