summaryrefslogtreecommitdiff
path: root/Lib/sre_parse.py
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2001-01-14 21:00:44 +0000
committerFredrik Lundh <fredrik@pythonware.com>2001-01-14 21:00:44 +0000
commit8efd1f1bf698db208389b99c5481f7678f72c376 (patch)
treee453c82ffda8c4e62e75d1d3ce4988842b2a4539 /Lib/sre_parse.py
parent9fc6be2d40ff4cd842ceacdfbafdfaa294497549 (diff)
downloadcpython-8efd1f1bf698db208389b99c5481f7678f72c376.tar.gz
SRE: stricter pattern syntax checking (covers parts of bug #115900)
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r--Lib/sre_parse.py24
1 files changed, 17 insertions, 7 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index a21fd61dc9..454e4779f8 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -8,6 +8,8 @@
# See the sre.py file for information on usage and redistribution.
#
+# XXX: show string offset and offending character for all errors
+
import string, sys
from sre_constants import *
@@ -410,11 +412,11 @@ def _parse(source, state):
else:
code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL:
- raise error, "illegal range"
+ raise error, "bad character range"
lo = code1[1]
hi = code2[1]
if hi < lo:
- raise error, "illegal range"
+ raise error, "bad character range"
set.append((RANGE, (lo, hi)))
else:
if code1[0] is IN:
@@ -457,7 +459,8 @@ def _parse(source, state):
min = int(lo)
if hi:
max = int(hi)
- # XXX: <fl> check that hi >= lo ???
+ if max < min:
+ raise error, "bad repeat interval"
else:
raise error, "not supported"
# figure out which item to repeat
@@ -465,6 +468,8 @@ def _parse(source, state):
item = subpattern[-1:]
else:
raise error, "nothing to repeat"
+ if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
+ raise error, "multiple repeat"
if source.match("?"):
subpattern[-1] = (MIN_REPEAT, (min, max, item))
else:
@@ -493,7 +498,7 @@ def _parse(source, state):
name = name + char
group = 1
if not isname(name):
- raise error, "illegal character in group name"
+ raise error, "bad character in group name"
elif source.match("="):
# named backreference
name = ""
@@ -505,7 +510,7 @@ def _parse(source, state):
break
name = name + char
if not isname(name):
- raise error, "illegal character in group name"
+ raise error, "bad character in group name"
gid = state.groupdict.get(name)
if gid is None:
raise error, "unknown group name"
@@ -547,6 +552,8 @@ def _parse(source, state):
continue
else:
# flags
+ if not FLAGS.has_key(source.next):
+ raise error, "unexpected end of pattern"
while FLAGS.has_key(source.next):
state.flags = state.flags | FLAGS[source.get()]
if group:
@@ -565,7 +572,9 @@ def _parse(source, state):
else:
while 1:
char = source.get()
- if char is None or char == ")":
+ if char is None:
+ raise error, "unexpected end of pattern"
+ if char == ")":
break
raise error, "unknown extension"
@@ -592,6 +601,7 @@ def parse(str, flags=0, pattern=None):
if pattern is None:
pattern = Pattern()
pattern.flags = flags
+ pattern.str = str
p = _parse_sub(source, pattern, 0)
@@ -639,7 +649,7 @@ def parse_template(source, pattern):
index = int(name)
except ValueError:
if not isname(name):
- raise error, "illegal character in group name"
+ raise error, "bad character in group name"
try:
index = pattern.groupindex[name]
except KeyError: