summaryrefslogtreecommitdiff
path: root/Lib/re.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-10-08 02:08:04 +0000
committerGuido van Rossum <guido@python.org>1997-10-08 02:08:04 +0000
commit5bc5b14f6d01e11a8e10c3e937e7e83eff556178 (patch)
tree58e65e156db3e939856df4ef2faf8f17d52e024a /Lib/re.py
parentc3861078385ee8f546ae9d16d47b2f3447f3044d (diff)
downloadcpython-git-5bc5b14f6d01e11a8e10c3e937e7e83eff556178.tar.gz
Checking in AMK's latest installement.
Diffstat (limited to 'Lib/re.py')
-rw-r--r--Lib/re.py193
1 files changed, 0 insertions, 193 deletions
diff --git a/Lib/re.py b/Lib/re.py
index b08f8af4c6..420f6ed7e1 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -7,9 +7,6 @@ import sys
import string
from pcre import *
-[ NORMAL, CHARCLASS, REPLACEMENT ] = range(3)
-[ CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER ] = range(9)
-
#
# First, the public part of the interface:
#
@@ -231,199 +228,9 @@ def escape(pattern):
result.append(char)
return string.join(result, '')
-_idprog = None
-def valid_identifier(id):
- global _idprog
- if not _idprog:
- _idprog = compile(r"[a-zA-Z_]\w*$")
- if _idprog.match(id):
- return 1
- else:
- return 0
-
def compile(pattern, flags=0):
groupindex={}
code=pcre_compile(pattern, flags, groupindex)
return RegexObject(pattern, flags, code, groupindex)
-def _expand(m, repl):
- results = []
- index = 0
- size = len(repl)
- while index < size:
- found = string.find(repl, '\\', index)
- if found < 0:
- results.append(repl[index:])
- break
- if found > index:
- results.append(repl[index:found])
- escape_type, value, index = _expand_escape(repl, found+1, REPLACEMENT)
- if escape_type == CHAR:
- results.append(value)
- elif escape_type == MEMORY_REFERENCE:
- r = m.group(value)
- if r is None:
- raise error, ('group "' + str(value) + '" did not contribute '
- 'to the match')
- results.append(m.group(value))
- else:
- raise error, "bad escape in replacement"
- return string.join(results, '')
-
-def _expand_escape(pattern, index, context=NORMAL):
- if index >= len(pattern):
- raise error, 'escape ends too soon'
-
- elif pattern[index] == 't':
- return CHAR, chr(9), index + 1
-
- elif pattern[index] == 'n':
- return CHAR, chr(10), index + 1
-
- elif pattern[index] == 'v':
- return CHAR, chr(11), index + 1
-
- elif pattern[index] == 'r':
- return CHAR, chr(13), index + 1
-
- elif pattern[index] == 'f':
- return CHAR, chr(12), index + 1
-
- elif pattern[index] == 'a':
- return CHAR, chr(7), index + 1
-
- elif pattern[index] == 'x':
- # CAUTION: this is the Python rule, not the Perl rule!
- end = index + 1 # Skip over the 'x' character
- while (end < len(pattern)) and (pattern[end] in string.hexdigits):
- end = end + 1
- if end == index:
- raise error, "\\x must be followed by hex digit(s)"
- # let Python evaluate it, so we don't incorrectly 2nd-guess
- # what it's doing (and Python in turn passes it on to sscanf,
- # so that *it* doesn't incorrectly 2nd-guess what C does!)
- char = eval ('"' + pattern[index-1:end] + '"')
-# assert len(char) == 1
- return CHAR, char, end
-
- elif pattern[index] == 'b':
- if context != NORMAL:
- return CHAR, chr(8), index + 1
- else:
- return WORD_BOUNDARY, '', index + 1
-
- elif pattern[index] == 'B':
- if context != NORMAL:
- return CHAR, 'B', index + 1
- else:
- return NOT_WORD_BOUNDARY, '', index + 1
-
- elif pattern[index] == 'A':
- if context != NORMAL:
- return CHAR, 'A', index + 1
- else:
- return BEGINNING_OF_BUFFER, '', index + 1
-
- elif pattern[index] == 'Z':
- if context != NORMAL:
- return CHAR, 'Z', index + 1
- else:
- return END_OF_BUFFER, '', index + 1
-
- elif pattern[index] in 'GluLUQE':
- raise error, ('\\' + pattern[index] + ' is not allowed')
-
- elif pattern[index] == 'w':
- return CHAR, 'w', index + 1
-
- elif pattern[index] == 'W':
- return CHAR, 'W', index + 1
-
- elif pattern[index] == 's':
- return CHAR, 's', index + 1
-
- elif pattern[index] == 'S':
- return CHAR, 'S', index + 1
-
- elif pattern[index] == 'd':
- return CHAR, 'd', index + 1
-
- elif pattern[index] == 'D':
- return CHAR, 'D', index + 1
-
- elif pattern[index] in '0123456789':
-
- if pattern[index] == '0':
- if (index + 1 < len(pattern)) and \
- (pattern[index + 1] in string.octdigits):
- if (index + 2 < len(pattern)) and \
- (pattern[index + 2] in string.octdigits):
- value = string.atoi(pattern[index:index + 3], 8)
- index = index + 3
-
- else:
- value = string.atoi(pattern[index:index + 2], 8)
- index = index + 2
-
- else:
- value = 0
- index = index + 1
-
- if value > 255:
- raise error, 'octal value out of range'
-
- return CHAR, chr(value), index
-
- else:
- if (index + 1 < len(pattern)) and \
- (pattern[index + 1] in string.digits):
- if (index + 2 < len(pattern)) and \
- (pattern[index + 2] in string.octdigits) and \
- (pattern[index + 1] in string.octdigits) and \
- (pattern[index] in string.octdigits):
- value = string.atoi(pattern[index:index + 3], 8)
- if value > 255:
- raise error, 'octal value out of range'
-
- return CHAR, chr(value), index + 3
-
- else:
- value = string.atoi(pattern[index:index + 2])
- if (value < 1) or (value > 99):
- raise error, 'memory reference out of range'
-
- if context == CHARCLASS:
- raise error, ('cannot reference a register from '
- 'inside a character class')
- return MEMORY_REFERENCE, value, index + 2
-
- else:
- if context == CHARCLASS:
- raise error, ('cannot reference a register from '
- 'inside a character class')
-
- value = string.atoi(pattern[index])
- return MEMORY_REFERENCE, value, index + 1
-
- elif pattern[index] == 'g':
- if context != REPLACEMENT:
- return CHAR, 'g', index + 1
-
- index = index + 1
- if index >= len(pattern):
- raise error, 'unfinished symbolic reference'
- if pattern[index] != '<':
- raise error, 'missing < in symbolic reference'
-
- index = index + 1
- end = string.find(pattern, '>', index)
- if end == -1:
- raise error, 'unfinished symbolic reference'
- value = pattern[index:end]
- if not valid_identifier(value):
- raise error, 'illegal symbolic reference'
- return MEMORY_REFERENCE, value, end + 1
-
- else:
- return CHAR, pattern[index], index + 1