summaryrefslogtreecommitdiff
path: root/Lib/re.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-07-17 22:39:13 +0000
committerGuido van Rossum <guido@python.org>1997-07-17 22:39:13 +0000
commit9e18ec7dc9e8452035085ea34d8f4dda43c8baed (patch)
treecc62ea42022b87ba6e94e98483ee7f18d29183e3 /Lib/re.py
parenta4f1a78b6eaed4d5d2c609d2e3160ec64535d22a (diff)
downloadcpython-git-9e18ec7dc9e8452035085ea34d8f4dda43c8baed.tar.gz
Correctly implement sub, subn, and split. Also correct and augment
the cache code.
Diffstat (limited to 'Lib/re.py')
-rw-r--r--Lib/re.py98
1 files changed, 88 insertions, 10 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 8509b5f753..abc31c8cc4 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -40,7 +40,7 @@ def valid_identifier(id):
_cache = {}
_MAXCACHE = 20
-def _cachecompile(pattern, flags):
+def _cachecompile(pattern, flags=0):
key = (pattern, flags)
try:
return _cache[key]
@@ -59,13 +59,19 @@ def search(pattern, string, flags=0):
return _cachecompile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
- return _cachecompile(pattern).sub(repl, string, count)
+ if type(pattern) == type(''):
+ pattern = _cachecompile(pattern)
+ return pattern.sub(repl, string, count)
def subn(pattern, repl, string, count=0):
- return _cachecompile(pattern).subn(repl, string, count)
+ if type(pattern) == type(''):
+ pattern = _cachecompile(pattern)
+ return pattern.subn(repl, string, count)
def split(pattern, string, maxsplit=0):
- return _cachecompile(pattern).subn(string, maxsplit)
+ if type(pattern) == type(''):
+ pattern = _cachecompile(pattern)
+ return pattern.split(string, maxsplit)
#
#
@@ -126,14 +132,86 @@ class RegexObject:
regs)
def sub(self, repl, string, count=0):
- pass
-
- def subn(self, repl, string, count=0):
- pass
+ return self.subn(repl, string, count)[0]
- def split(self, string, maxsplit=0):
- pass
+ def subn(self, repl, source, count=0):
+ if count < 0: raise error, "negative substibution count"
+ if count == 0: import sys; count = sys.maxint
+ if type(repl) == type(''):
+ if '\\' in repl:
+ repl = lambda m, r=repl: _expand(m, r)
+ else:
+ repl = lambda m, r=repl: r
+ n = 0 # Number of matches
+ pos = 0 # Where to start searching
+ lastmatch = -1 # End of last match
+ results = [] # Substrings making up the result
+ end = len(source)
+ while n < count and pos <= end:
+ m = self.search(source, pos)
+ if not m: break
+ i, j = m.span(0)
+ if i == j == lastmatch:
+ # Empty match adjacent to previous match
+ pos = pos+1
+ results.append(source[lastmatch:pos])
+ continue
+ if pos < i: results.append(source[pos:i])
+ results.append(repl(m))
+ pos = lastmatch = j
+ if i == j:
+ # Last match was empty; don't try here again
+ pos = pos+1
+ results.append(source[lastmatch:pos])
+ n = n+1
+ results.append(source[pos:])
+ return (string.join(results, ''), n)
+ def split(self, source, maxsplit=0):
+ if maxsplit < 0: raise error, "negative split count"
+ if maxsplit == 0: import sys; maxsplit = sys.maxint
+ n = 0
+ pos = 0
+ lastmatch = 0
+ results = []
+ end = len(source)
+ while n < maxsplit:
+ m = self.search(source, pos)
+ if not m: break
+ i, j = m.span(0)
+ if i == j:
+ # Empty match
+ if pos >= end: break
+ pos = pos+1
+ continue
+ results.append(source[lastmatch:i])
+ g = m.group()
+ if g:
+ results[len(results):] = list(g)
+ pos = lastmatch = j
+ results.append(source[lastmatch:])
+ return results
+
+def _expand(m, repl):
+ results = []
+ index = 0
+ size = len(repl)
+ while index < size:
+ found = string.find(repl, '\\', index)
+ if found < 0:
+ results.append(repl[index:])
+ break
+ if found > index:
+ results.append(repl[index:found])
+ escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT)
+ if escape_type == CHAR:
+ results.append(value)
+ elif escape_type == MEMORY_REFERENCE:
+ results.append(m.group(value))
+ else:
+ raise error, "bad escape in replacement"
+ return string.join(results, '')
+
class MatchObject:
def __init__(self, re, string, pos, regs):
self.re = re