summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-06-30 10:41:31 +0000
committerFredrik Lundh <fredrik@pythonware.com>2000-06-30 10:41:31 +0000
commit43b3b49b5ab486295baef3a35cd8e836f735c065 (patch)
tree3ca960c788dafde87799443f2087dc661dc52d48
parenta4657f736c34e4bb1802b40246028c5bec545fe6 (diff)
downloadcpython-git-43b3b49b5ab486295baef3a35cd8e836f735c065.tar.gz
- fixed lookahead assertions (#10, #11, #12)
- untabified sre_constants.py
-rw-r--r--Lib/sre_compile.py100
-rw-r--r--Lib/sre_constants.py32
-rw-r--r--Lib/sre_parse.py19
-rw-r--r--Lib/test/output/test_sre3
-rw-r--r--Modules/_sre.c25
-rw-r--r--Modules/sre_constants.h57
6 files changed, 146 insertions, 90 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 9fdc8f395a..0829c00e27 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
- if op is ANY:
- if flags & SRE_FLAG_DOTALL:
- emit(OPCODES[op])
- else:
- emit(OPCODES[CATEGORY])
- emit(CHCODES[CATEGORY_NOT_LINEBREAK])
- elif op in (SUCCESS, FAILURE):
- emit(OPCODES[op])
- elif op is AT:
- emit(OPCODES[op])
- if flags & SRE_FLAG_MULTILINE:
- emit(ATCODES[AT_MULTILINE[av]])
- else:
- emit(ATCODES[av])
- elif op is BRANCH:
- emit(OPCODES[op])
- tail = []
- for av in av[1]:
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[JUMP])
- tail.append(len(code)); emit(0)
- code[skip] = len(code) - skip
- emit(0) # end of branch
- for tail in tail:
- code[tail] = len(code) - tail
- elif op is CALL:
- emit(OPCODES[op])
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
- elif op is CATEGORY:
- emit(OPCODES[op])
- if flags & SRE_FLAG_LOCALE:
- emit(CHCODES[CH_LOCALE[av]])
- elif flags & SRE_FLAG_UNICODE:
- emit(CHCODES[CH_UNICODE[av]])
- else:
- emit(CHCODES[av])
- elif op is GROUP:
+ if op in (LITERAL, NOT_LITERAL):
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
else:
emit(OPCODES[op])
- emit(av-1)
+ emit(ord(av))
elif op is IN:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
@@ -101,15 +61,12 @@ def _compile(code, pattern, flags):
raise error, "internal: unsupported set operator"
emit(OPCODES[FAILURE])
code[skip] = len(code) - skip
- elif op in (LITERAL, NOT_LITERAL):
- if flags & SRE_FLAG_IGNORECASE:
- emit(OPCODES[OP_IGNORE[op]])
- else:
+ elif op is ANY:
+ if flags & SRE_FLAG_DOTALL:
emit(OPCODES[op])
- emit(ord(av))
- elif op is MARK:
- emit(OPCODES[op])
- emit(av)
+ else:
+ emit(OPCODES[CATEGORY])
+ emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
emit(OPCODES[REPEAT])
@@ -150,6 +107,49 @@ def _compile(code, pattern, flags):
if group:
emit(OPCODES[MARK])
emit((group-1)*2+1)
+ elif op in (SUCCESS, FAILURE):
+ emit(OPCODES[op])
+ elif op in (ASSERT, ASSERT_NOT, CALL):
+ emit(OPCODES[op])
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = len(code) - skip
+ elif op is AT:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_MULTILINE:
+ emit(ATCODES[AT_MULTILINE[av]])
+ else:
+ emit(ATCODES[av])
+ elif op is BRANCH:
+ emit(OPCODES[op])
+ tail = []
+ for av in av[1]:
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[JUMP])
+ tail.append(len(code)); emit(0)
+ code[skip] = len(code) - skip
+ emit(0) # end of branch
+ for tail in tail:
+ code[tail] = len(code) - tail
+ elif op is CATEGORY:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_LOCALE:
+ emit(CHCODES[CH_LOCALE[av]])
+ elif flags & SRE_FLAG_UNICODE:
+ emit(CHCODES[CH_UNICODE[av]])
+ else:
+ emit(CHCODES[av])
+ elif op is GROUP:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ else:
+ emit(OPCODES[op])
+ emit(av-1)
+ elif op is MARK:
+ emit(OPCODES[op])
+ emit(av)
else:
raise ValueError, ("unsupported operand type", op)
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index f5e7894e3b..45f4f482d2 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -23,6 +23,7 @@ SUCCESS = "success"
ANY = "any"
ASSERT = "assert"
+ASSERT_NOT = "assert_not"
AT = "at"
BRANCH = "branch"
CALL = "call"
@@ -81,7 +82,7 @@ OPCODES = [
FAILURE, SUCCESS,
ANY,
- ASSERT,
+ ASSERT, ASSERT_NOT,
AT,
BRANCH,
CALL,
@@ -121,8 +122,8 @@ def makedict(list):
d = {}
i = 0
for item in list:
- d[item] = i
- i = i + 1
+ d[item] = i
+ i = i + 1
return d
OPCODES = makedict(OPCODES)
@@ -176,12 +177,27 @@ SRE_FLAG_VERBOSE = 64
if __name__ == "__main__":
import string
def dump(f, d, prefix):
- items = d.items()
- items.sort(lambda a, b: cmp(a[1], b[1]))
- for k, v in items:
- f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+ items = d.items()
+ items.sort(lambda a, b: cmp(a[1], b[1]))
+ for k, v in items:
+ f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
- f.write("/* generated from sre_constants.py */\n")
+ f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
dump(f, CHCODES, "SRE")
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index a6f3082a44..d3dbe00041 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
if source.next is None or source.next == ")":
break
source.get()
+ elif source.next in ("=", "!"):
+ # lookahead assertions
+ char = source.get()
+ b = []
+ while 1:
+ p = _parse(source, state, flags)
+ if source.next == ")":
+ if b:
+ b.append(p)
+ p = _branch(state, b)
+ if char == "=":
+ subpattern.append((ASSERT, p))
+ else:
+ subpattern.append((ASSERT_NOT, p))
+ break
+ elif source.match("|"):
+ b.append(p)
+ else:
+ raise error, "pattern not properly closed"
else:
# flags
while FLAGS.has_key(source.next):
diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre
index 75caa55c38..d3732b5214 100644
--- a/Lib/test/output/test_sre
+++ b/Lib/test/output/test_sre
@@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
-=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 6fcd65ea73..22b6c7347c 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -20,6 +20,7 @@
* 00-06-28 fl fixed findall (0.9.1)
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
* 00-06-30 fl tuning, fast search (0.9.3)
+ * 00-06-30 fl added assert (lookahead) primitives (0.9.4)
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
@@ -30,7 +31,7 @@
#ifndef SRE_RECURSIVE
-char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
+char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
@@ -576,11 +577,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
pattern += pattern[0];
break;
-#if 0
- case SRE_OP_CALL:
- /* match subpattern, without backtracking */
+ case SRE_OP_ASSERT:
+ /* assert subpattern */
/* args: <skip> <pattern> */
- TRACE(("%8d: subpattern\n", PTR(ptr)));
+ TRACE(("%8d: assert subpattern\n", PTR(ptr)));
state->ptr = ptr;
i = SRE_MATCH(state, pattern + 1);
if (i < 0)
@@ -588,9 +588,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
if (!i)
goto failure;
pattern += pattern[0];
- ptr = state->ptr;
break;
-#endif
+
+ case SRE_OP_ASSERT_NOT:
+ /* assert not subpattern */
+ /* args: <skip> <pattern> */
+ TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
+ state->ptr = ptr;
+ i = SRE_MATCH(state, pattern + 1);
+ if (i < 0)
+ return i;
+ if (i)
+ goto failure;
+ pattern += pattern[0];
+ break;
#if 0
case SRE_OP_MAX_REPEAT_ONE:
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index 01c8448816..2ec00bada4 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -1,29 +1,42 @@
-/* generated from sre_constants.py */
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ASSERT 3
-#define SRE_OP_AT 4
-#define SRE_OP_BRANCH 5
-#define SRE_OP_CALL 6
-#define SRE_OP_CATEGORY 7
-#define SRE_OP_GROUP 8
-#define SRE_OP_GROUP_IGNORE 9
-#define SRE_OP_IN 10
-#define SRE_OP_IN_IGNORE 11
-#define SRE_OP_INFO 12
-#define SRE_OP_JUMP 13
-#define SRE_OP_LITERAL 14
-#define SRE_OP_LITERAL_IGNORE 15
-#define SRE_OP_MARK 16
-#define SRE_OP_MAX_REPEAT 17
-#define SRE_OP_MAX_REPEAT_ONE 18
-#define SRE_OP_MIN_REPEAT 19
-#define SRE_OP_NOT_LITERAL 20
-#define SRE_OP_NOT_LITERAL_IGNORE 21
-#define SRE_OP_NEGATE 22
-#define SRE_OP_RANGE 23
-#define SRE_OP_REPEAT 24
+#define SRE_OP_ASSERT_NOT 4
+#define SRE_OP_AT 5
+#define SRE_OP_BRANCH 6
+#define SRE_OP_CALL 7
+#define SRE_OP_CATEGORY 8
+#define SRE_OP_GROUP 9
+#define SRE_OP_GROUP_IGNORE 10
+#define SRE_OP_IN 11
+#define SRE_OP_IN_IGNORE 12
+#define SRE_OP_INFO 13
+#define SRE_OP_JUMP 14
+#define SRE_OP_LITERAL 15
+#define SRE_OP_LITERAL_IGNORE 16
+#define SRE_OP_MARK 17
+#define SRE_OP_MAX_REPEAT 18
+#define SRE_OP_MAX_REPEAT_ONE 19
+#define SRE_OP_MIN_REPEAT 20
+#define SRE_OP_NOT_LITERAL 21
+#define SRE_OP_NOT_LITERAL_IGNORE 22
+#define SRE_OP_NEGATE 23
+#define SRE_OP_RANGE 24
+#define SRE_OP_REPEAT 25
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BOUNDARY 2