fixed character set description in docstring (SRE uses Python

strings, not C strings) removed USE_PYTHON defines, and related sre.py helpers skip calling the subx helper if the template is callable. interestingly enough, this means that def callback(m): return literal result = pattern.sub(callback, string) is much faster than result = pattern.sub(literal, string)
author: Fredrik Lundh <fredrik@pythonware.com> 2001-10-21 21:48:30 +0000
committer: Fredrik Lundh <fredrik@pythonware.com> 2001-10-21 21:48:30 +0000
commit: 020aa22bf70f3a34d058556d0008a49edbd588f1 (patch)
tree: 243b016c45e173f2ec3e342516df2401fb0638c1 /Lib/sre.py
parent: 943353ab628b2c67a98582a986f5a4d1104976cc (diff)
download: cpython-020aa22bf70f3a34d058556d0008a49edbd588f1.tar.gz
1 files changed, 15 insertions, 80 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 701334e5db..e7517f598f 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -17,15 +17,13 @@
 r"""Support for regular expressions (RE).
 
 This module provides regular expression matching operations similar to
-those found in Perl. It's 8-bit clean: the strings being processed may
-contain both null bytes and characters whose high bit is set. Regular
-expression pattern strings may not contain null bytes, but can specify
-the null byte using the \\number notation. Characters with the high
-bit set may be included.
-
-Regular expressions can contain both special and ordinary
-characters. Most ordinary characters, like "A", "a", or "0", are the
-simplest regular expressions; they simply match themselves. You can
+those found in Perl.  It supports both 8-bit and Unicode strings; both
+the pattern and the strings being processed can contain null bytes and
+characters outside the US ASCII range.
+
+Regular expressions can contain both special and ordinary characters.
+Most ordinary characters, like "A", "a", or "0", are the simplest
+regular expressions; they simply match themselves.  You can
 concatenate ordinary characters, so last matches the string 'last'.
 
 The special characters are:
@@ -45,7 +43,7 @@ The special characters are:
     "|"      A|B, creates an RE that will match either A or B.
     (...)    Matches the RE inside the parentheses.
              The contents can be retrieved or matched later in the string.
-    (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
+    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
     (?:...)  Non-grouping version of regular parentheses.
     (?P<name>...) The substring matched by the group is accessible by name.
     (?P=name)     Matches the text matched earlier by the group named name.
@@ -54,7 +52,7 @@ The special characters are:
     (?!...)  Matches if ... doesn't match next.
 
 The special sequences consist of "\\" and a character from the list
-below. If the ordinary character is not on the list, then the
+below.  If the ordinary character is not on the list, then the
 resulting RE will match the second character.
     \number  Matches the contents of the group of the same number.
     \A       Matches only at the start of the string.
@@ -246,76 +244,13 @@ def _expand(pattern, match, template):
 
 def _subx(pattern, template):
     # internal: pattern.sub/subn implementation helper
-    if callable(template):
-        filter = template
-    else:
-        template = _compile_repl(template, pattern)
-        if not template[0] and len(template[1]) == 1:
-            # literal replacement
-            filter = template[1][0]
-        else:
-            def filter(match, template=template):
-                return sre_parse.expand_template(template, match)
-    return filter
-
-def _sub(pattern, template, text, count=0):
-    # internal: pattern.sub implementation hook
-    # FIXME: not used in SRE 2.2.1 and later; will be removed soon
-    return _subn(pattern, template, text, count)[0]
-
-def _subn(pattern, template, text, count=0):
-    # internal: pattern.subn implementation hook
-    # FIXME: not used in SRE 2.2.1 and later; will be removed soon
-    filter = _subx(pattern, template)
-    if not callable(filter):
+    template = _compile_repl(template, pattern)
+    if not template[0] and len(template[1]) == 1:
         # literal replacement
-        def filter(match, literal=filter):
-            return literal
-    n = i = 0
-    s = []
-    append = s.append
-    c = pattern.scanner(text)
-    while not count or n < count:
-        m = c.search()
-        if not m:
-            break
-        b, e = m.span()
-        if i < b:
-            append(text[i:b])
-        elif i == b == e and n:
-            append(text[i:b])
-            continue # ignore empty match at previous position
-        append(filter(m))
-        i = e
-        n = n + 1
-    append(text[i:])
-    return _join(s, text[:0]), n
-
-def _split(pattern, text, maxsplit=0):
-    # internal: pattern.split implementation hook
-    # FIXME: not used in SRE 2.2.1 and later; will be removed soon
-    n = i = 0
-    s = []
-    append = s.append
-    extend = s.extend
-    c = pattern.scanner(text)
-    g = pattern.groups
-    while not maxsplit or n < maxsplit:
-        m = c.search()
-        if not m:
-            break
-        b, e = m.span()
-        if b == e:
-            if i >= len(text):
-                break
-            continue
-        append(text[i:b])
-        if g and b != e:
-            extend(list(m.groups()))
-        i = e
-        n = n + 1
-    append(text[i:])
-    return s
+        return template[1][0]
+    def filter(match, template=template):
+        return sre_parse.expand_template(template, match)
+    return filter
 
 # register myself for pickling
author	Fredrik Lundh <fredrik@pythonware.com>	2001-10-21 21:48:30 +0000
committer	Fredrik Lundh <fredrik@pythonware.com>	2001-10-21 21:48:30 +0000
commit	020aa22bf70f3a34d058556d0008a49edbd588f1 (patch)
tree	243b016c45e173f2ec3e342516df2401fb0638c1 /Lib/sre.py
parent	943353ab628b2c67a98582a986f5a4d1104976cc (diff)
download	cpython-020aa22bf70f3a34d058556d0008a49edbd588f1.tar.gz