summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-03-14 16:46:50 -0600
committerKarl Williamson <khw@cpan.org>2019-03-14 17:12:26 -0600
commitbf848a12528ab1e63a2f20da532eda498adbdca6 (patch)
tree500a9cfd5cf0b44d67180f05efc1df9be3460207
parent912b808cb4fcd596e07f77898c626f5567fbe994 (diff)
downloadperl-bf848a12528ab1e63a2f20da532eda498adbdca6.tar.gz
Add more checking for regnode offset overflowing
This is part of the ongoing failures in [perl #133921]. The bottom line cause is that there are generally 16 bits available for the address of the next regnode. On very large patterns, this may not be enough. When that happens, a long jump is used instead. What previous commits have done is to insert tests in a loop to detect that overflow isn't going to occur. But it turns out that there are other places where such overflow could occur. The real solution should be to detect overflow in the base level routine that would otherwise get things wrong. This entails making that routine be able to return failure. It turns out that another function is used under DEBUGGING, so that one must be changed as well. And the calls where it is possible for this to overflow are changed to look for failure return and proceed appropriately, which is to set a flag that we need to use long jumps, and restart the parse.
-rw-r--r--embed.fnc4
-rw-r--r--proto.h4
-rw-r--r--regcomp.c67
-rw-r--r--t/re/pat.t49
4 files changed, 99 insertions, 25 deletions
diff --git a/embed.fnc b/embed.fnc
index 517bcb577c..c1236e15ba 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2424,7 +2424,7 @@ Es |void |reginsert |NN RExC_state_t *pRExC_state \
|const U8 op \
|const regnode_offset operand \
|const U32 depth
-Es |void |regtail |NN RExC_state_t * pRExC_state \
+Es |bool |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode_offset p \
|NN const regnode_offset val \
|const U32 depth
@@ -2556,7 +2556,7 @@ Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth
-Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \
+Es |bool |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif
#endif
diff --git a/proto.h b/proto.h
index 936d344c32..49fcb89b7d 100644
--- a/proto.h
+++ b/proto.h
@@ -4434,7 +4434,7 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
-STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
+STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val)
# endif
@@ -5569,7 +5569,7 @@ STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
-STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
+STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val)
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
diff --git a/regcomp.c b/regcomp.c
index 15d843e7c2..73e8a83b64 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -364,7 +364,6 @@ struct RExC_state_t {
} \
} STMT_END
-#define BRANCH_MAX_OFFSET U16_MAX
#define REQUIRE_BRANCHJ(flagp, restart_retval) \
STMT_START { \
RExC_use_BRANCHJ = 1; \
@@ -12070,7 +12069,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
}
- REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
+ if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
lastbr = br;
*flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
}
@@ -12141,7 +12142,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
(IV)(ender - lastbr)
);
);
- REGTAIL(pRExC_state, lastbr, ender);
+ if (! REGTAIL(pRExC_state, lastbr, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if (have_branch) {
char is_nothing= 1;
@@ -12152,9 +12155,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
for (br = REGNODE_p(ret); br; br = regnext(br)) {
const U8 op = PL_regkind[OP(br)];
if (op == BRANCH) {
- REGTAIL_STUDY(pRExC_state,
- REGNODE_OFFSET(NEXTOPER(br)),
- ender);
+ if (! REGTAIL_STUDY(pRExC_state,
+ REGNODE_OFFSET(NEXTOPER(br)),
+ ender))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if ( OP(NEXTOPER(br)) != NOTHING
|| regnext(NEXTOPER(br)) != REGNODE_p(ender))
is_nothing= 0;
@@ -12221,7 +12227,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
FLAGS(REGNODE_p(ret)) = flag;
- REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
+ if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
}
@@ -12315,16 +12324,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
/* FIXME adding one for every branch after the first is probably
* excessive now we have TRIE support. (hv) */
MARK_NAUGHTY(1);
- REGTAIL(pRExC_state, chain, latest);
+ if (! REGTAIL(pRExC_state, chain, latest)) {
+ /* XXX We could just redo this branch, but figuring out what
+ * bookkeeping needs to be reset is a pain, and it's likely
+ * that other branches that goto END will also be too large */
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
chain = latest;
- if ( chain > (SSize_t) BRANCH_MAX_OFFSET
- && ! RExC_use_BRANCHJ)
- {
- /* XXX We could just redo this branch, but figuring out what
- * bookkeeping needs to be reset is a pain */
- REQUIRE_BRANCHJ(flagp, 0);
- }
c++;
}
if (chain == 0) { /* Loop ran zero times. */
@@ -19627,10 +19634,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
}
/*
-- regtail - set the next-pointer at the end of a node chain of p to val.
+- regtail - set the next-pointer at the end of a node chain of p to val. If
+ that value won't fit in the space available, instead returns FALSE.
+ (Except asserts if we can't fit in the largest space the regex
+ engine is designed for.)
- SEE ALSO: regtail_study
*/
-STATIC void
+STATIC bool
S_regtail(pTHX_ RExC_state_t * pRExC_state,
const regnode_offset p,
const regnode_offset val,
@@ -19666,8 +19676,17 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
+ if (val - scan > U16_MAX) {
+ /* Since not all callers check the return value, populate this with
+ * something that won't loop and will likely lead to a crash if
+ * execution continues */
+ NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+ return FALSE;
+ }
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}
+
+ return TRUE;
}
#ifdef DEBUGGING
@@ -19684,10 +19703,14 @@ that it is purely analytical.
Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
to control which is which.
+This used to return a value that was ignored. It was a problem that it is
+#ifdef'd to be another function that didn't return a value. khw has changed it
+so both currently return a pass/fail return.
+
*/
/* TODO: All four parms should be const */
-STATIC U8
+STATIC bool
S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
const regnode_offset val, U32 depth)
{
@@ -19711,7 +19734,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
bool unfolded_multi_char; /* Unexamined in this routine */
if (join_exact(pRExC_state, scan, &min,
&unfolded_multi_char, 1, REGNODE_p(val), depth+1))
- return EXACT;
+ return TRUE; /* Was return EXACT */
}
#endif
if ( exact ) {
@@ -19764,10 +19787,14 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
+ if (val - scan > U16_MAX) {
+ NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+ return FALSE;
+ }
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}
- return exact;
+ return TRUE; /* Was 'return exact' */
}
#endif
diff --git a/t/re/pat.t b/t/re/pat.t
index 7bb215a1ea..c2b9d447a0 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -24,7 +24,7 @@ BEGIN {
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 857; # Update this when adding/deleting tests.
+plan tests => 859; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -2008,6 +2008,53 @@ CODE
{ # [perl #133921], segfault
fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x
fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x
+
+fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿ
+
+
+
+
+ ç
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+x{0c!}\;\;çÿ
+
+ fresh_perl_is('a aú
+
+
+
+
+
+ ç
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+x{1c!}\;\;îçÿp
}
} # End of sub run_tests