summaryrefslogtreecommitdiff
path: root/regnodes.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2016-03-05 22:04:28 +0100
committerYves Orton <demerphq@gmail.com>2016-03-06 14:06:08 +0100
commitd5a00e4af6b155495be31a35728b8fef8e671ebe (patch)
tree562a578c47ebca3abd0f26130e8e9e75cb6702f6 /regnodes.h
parent5bd2d46ea3f06ba4e06c713635d5f83a331c4af0 (diff)
downloadperl-d5a00e4af6b155495be31a35728b8fef8e671ebe.tar.gz
Unify GOSTART and GOSUB
GOSTART is a special case of GOSUB, we can remove a lot of offset twiddling, and other special casing by unifying them, at pretty much no cost. GOSUB has 2 arguments, ARG() and ARG2L(), which are interpreted as a U32 and an I32 respectively. ARG() holds the "parno" we will recurse into. ARG2L() holds a signed offset to the relevant start node for the recursion. Prior to this patch the argument to GOSUB would always be >=, and unlike other parts of our logic we would not use 0 to represent "start/end" of pattern, as GOSTART would be used for "recurse to beginning of pattern", after this patch we use 0 to represent "start/end", and a lot of complexity "goes away" along with GOSTART regops.
Diffstat (limited to 'regnodes.h')
-rw-r--r--regnodes.h76
1 files changed, 36 insertions, 40 deletions
diff --git a/regnodes.h b/regnodes.h
index f27abe0c7c..f820c5684e 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 93
-#define REGMATCH_STATE_MAX 133
+#define REGNODE_MAX 92
+#define REGMATCH_STATE_MAX 132
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -88,23 +88,22 @@
#define AHOCORASICK 74 /* 0x4a Aho Corasick stclass. flags==type */
#define AHOCORASICKC 75 /* 0x4b Same as AHOCORASICK, but with embedded charclass data */
#define GOSUB 76 /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */
-#define GOSTART 77 /* 0x4d recurse to start of pattern */
-#define NGROUPP 78 /* 0x4e Whether the group matched. */
-#define INSUBP 79 /* 0x4f Whether we are in a specific recurse. */
-#define DEFINEP 80 /* 0x50 Never execute directly. */
-#define ENDLIKE 81 /* 0x51 Used only for the type field of verbs */
-#define OPFAIL 82 /* 0x52 Same as (?!), but with verb arg */
-#define ACCEPT 83 /* 0x53 Accepts the current matched string, with verbar */
-#define VERB 84 /* 0x54 Used only for the type field of verbs */
-#define PRUNE 85 /* 0x55 Pattern fails at this startpoint if no-backtracking through this */
-#define MARKPOINT 86 /* 0x56 Push the current location for rollback by cut. */
-#define SKIP 87 /* 0x57 On failure skip forward (to the mark) before retrying */
-#define COMMIT 88 /* 0x58 Pattern fails outright if backtracking through this */
-#define CUTGROUP 89 /* 0x59 On failure go to the next alternation in the group */
-#define KEEPS 90 /* 0x5a $& begins here. */
-#define LNBREAK 91 /* 0x5b generic newline pattern */
-#define OPTIMIZED 92 /* 0x5c Placeholder for dump. */
-#define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */
+#define NGROUPP 77 /* 0x4d Whether the group matched. */
+#define INSUBP 78 /* 0x4e Whether we are in a specific recurse. */
+#define DEFINEP 79 /* 0x4f Never execute directly. */
+#define ENDLIKE 80 /* 0x50 Used only for the type field of verbs */
+#define OPFAIL 81 /* 0x51 Same as (?!), but with verb arg */
+#define ACCEPT 82 /* 0x52 Accepts the current matched string, with verbar */
+#define VERB 83 /* 0x53 Used only for the type field of verbs */
+#define PRUNE 84 /* 0x54 Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 85 /* 0x55 Push the current location for rollback by cut. */
+#define SKIP 86 /* 0x56 On failure skip forward (to the mark) before retrying */
+#define COMMIT 87 /* 0x57 Pattern fails outright if backtracking through this */
+#define CUTGROUP 88 /* 0x58 On failure go to the next alternation in the group */
+#define KEEPS 89 /* 0x59 $& begins here. */
+#define LNBREAK 90 /* 0x5a generic newline pattern */
+#define OPTIMIZED 91 /* 0x5b Placeholder for dump. */
+#define PSEUDO 92 /* 0x5c Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -230,7 +229,6 @@ EXTCONST U8 PL_regkind[] = {
TRIE, /* AHOCORASICK */
TRIE, /* AHOCORASICKC */
GOSUB, /* GOSUB */
- GOSTART, /* GOSTART */
NGROUPP, /* NGROUPP */
INSUBP, /* INSUBP */
DEFINEP, /* DEFINEP */
@@ -373,7 +371,6 @@ static const U8 regarglen[] = {
EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
EXTRA_SIZE(struct regnode_2L), /* GOSUB */
- 0, /* GOSTART */
EXTRA_SIZE(struct regnode_1), /* NGROUPP */
EXTRA_SIZE(struct regnode_1), /* INSUBP */
EXTRA_SIZE(struct regnode_1), /* DEFINEP */
@@ -472,7 +469,6 @@ static const char reg_off_by_arg[] = {
0, /* AHOCORASICK */
0, /* AHOCORASICKC */
0, /* GOSUB */
- 0, /* GOSTART */
0, /* NGROUPP */
0, /* INSUBP */
0, /* DEFINEP */
@@ -577,23 +573,22 @@ EXTCONST char * const PL_reg_name[] = {
"AHOCORASICK", /* 0x4a */
"AHOCORASICKC", /* 0x4b */
"GOSUB", /* 0x4c */
- "GOSTART", /* 0x4d */
- "NGROUPP", /* 0x4e */
- "INSUBP", /* 0x4f */
- "DEFINEP", /* 0x50 */
- "ENDLIKE", /* 0x51 */
- "OPFAIL", /* 0x52 */
- "ACCEPT", /* 0x53 */
- "VERB", /* 0x54 */
- "PRUNE", /* 0x55 */
- "MARKPOINT", /* 0x56 */
- "SKIP", /* 0x57 */
- "COMMIT", /* 0x58 */
- "CUTGROUP", /* 0x59 */
- "KEEPS", /* 0x5a */
- "LNBREAK", /* 0x5b */
- "OPTIMIZED", /* 0x5c */
- "PSEUDO", /* 0x5d */
+ "NGROUPP", /* 0x4d */
+ "INSUBP", /* 0x4e */
+ "DEFINEP", /* 0x4f */
+ "ENDLIKE", /* 0x50 */
+ "OPFAIL", /* 0x51 */
+ "ACCEPT", /* 0x52 */
+ "VERB", /* 0x53 */
+ "PRUNE", /* 0x54 */
+ "MARKPOINT", /* 0x55 */
+ "SKIP", /* 0x56 */
+ "COMMIT", /* 0x57 */
+ "CUTGROUP", /* 0x58 */
+ "KEEPS", /* 0x59 */
+ "LNBREAK", /* 0x5a */
+ "OPTIMIZED", /* 0x5b */
+ "PSEUDO", /* 0x5c */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -702,11 +697,12 @@ EXTCONST char * const PL_reg_intflags_name[] = {
"ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
"ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
"ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
+ "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
};
#endif /* DOINIT */
#ifdef DEBUGGING
-# define REG_INTFLAGS_NAME_SIZE 12
+# define REG_INTFLAGS_NAME_SIZE 13
#endif
/* The following have no fixed length. U8 so we can do strchr() on it. */