summaryrefslogtreecommitdiff
path: root/pp_hot.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-02-14 15:46:13 +0000
committerDavid Mitchell <davem@iabyn.com>2011-02-16 17:34:08 +0000
commit20be6587f85cec282e10810718c869dd958afe43 (patch)
treef80be6605c3f9d61ca41ac9d6c846c8701f4c2fb /pp_hot.c
parentc769ddc70796c6d56fa78ec22fb70caee961bcbf (diff)
downloadperl-20be6587f85cec282e10810718c869dd958afe43.tar.gz
fix many s/// tainting bugs
This is a re-implementation of the tainting code in pp_subst and pp_substcont. Although this fixes many bugs, because its a de-novo rewrite of the tainting parts of the code in those two functions, it's quite possible that it breaks some existing tainting behaviour. It doesn't break any existing tests, although it turns out that this area was severely under-tested anyway. The main bugs that this commit fixes are as follows, where: T = a tainted value L = pattern tainted by locale (e.g. use locale; s/\w//) Happens both with and without 'use re taint' unless specified. Happens with all modifiers (/g, /r etc) unless explicitly mentioned. $1 unexpectedly untainted: s/T// T =~ s/// under use re 'taint' original string unexpectedly untainted: s/L//, s/L//g return value unexpectedly untainted: T =~ s///g under no re 'taint' s/L//g, s/L//r return value unexpectedly tainted: s/T// s//T/r under no re 'taint' T =~ s/// under use re 'taint' s//T/ under use re 'taint' Also, with /ge, the original string becomes tainted as soon as possible (usually in the second entry to the /e code block) rather than only at the end, in code like $orig =~ s/T/...code.../ge The rationale behind the taintedness of the return value of s/// (in the non /r case), is that a boolean value shouldn't be tainted. This corresponds to the general perl tainting policy that boolean ops don't return tainted values. On the other hand, when it returns an integer (number of matches), that should be tainted. A couple of note about the old tainting code this replaces: firstly, several occurrences of the following were NOOPs, since rxtainted was U8 and the bit being ored was > 256: rxtainted |= RX_MATCH_TAINTED(rx) secondly, removing a whole bunch of the following didn't make any existing tests fail: TAINT_IF(rxtainted & 1);
Diffstat (limited to 'pp_hot.c')
-rw-r--r--pp_hot.c69
1 files changed, 51 insertions, 18 deletions
diff --git a/pp_hot.c b/pp_hot.c
index 7f9a13c7b5..0fa5727a54 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2071,7 +2071,7 @@ PP(pp_subst)
I32 maxiters;
register I32 i;
bool once;
- U8 rxtainted;
+ U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits */
char *orig;
U8 r_flags;
register REGEXP *rx = PM_GETRE(pm);
@@ -2127,11 +2127,19 @@ PP(pp_subst)
s = SvPV_mutable(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV)
force_on_match = 1;
- rxtainted = ((RX_EXTFLAGS(rx) & RXf_TAINTED) ||
- (PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
- if (PL_tainted)
- rxtainted |= 2;
- TAINT_NOT;
+
+ /* only replace once? */
+ once = !(rpm->op_pmflags & PMf_GLOBAL);
+
+ if (PL_tainting) {
+ rxtainted = (
+ (SvTAINTED(TARG) ? SUBST_TAINT_STR : 0)
+ | ((RX_EXTFLAGS(rx) & RXf_TAINTED) ? SUBST_TAINT_PAT : 0)
+ | ((pm->op_pmflags & PMf_RETAINT) ? SUBST_TAINT_RETAINT : 0)
+ | ((once && !(rpm->op_pmflags & PMf_NONDESTRUCT))
+ ? SUBST_TAINT_BOOLRET : 0));
+ TAINT_NOT;
+ }
RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
@@ -2173,12 +2181,12 @@ PP(pp_subst)
*/
}
- /* only replace once? */
- once = !(rpm->op_pmflags & PMf_GLOBAL);
matched = CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL,
r_flags | REXEC_CHECKED);
/* known replacement string? */
if (dstr) {
+ if (SvTAINTED(dstr))
+ rxtainted |= SUBST_TAINT_REPL;
/* Upgrade the source if the replacement is utf8 but the source is not,
* but only if it matched; see
@@ -2250,7 +2258,8 @@ PP(pp_subst)
PL_curpm = pm;
SvSCREAM_off(TARG); /* disable possible screamer */
if (once) {
- rxtainted |= RX_MATCH_TAINTED(rx);
+ if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
+ rxtainted |= SUBST_TAINT_PAT;
m = orig + RX_OFFS(rx)[0].start;
d = orig + RX_OFFS(rx)[0].end;
s = orig;
@@ -2283,7 +2292,6 @@ PP(pp_subst)
else {
sv_chop(TARG, d);
}
- TAINT_IF(rxtainted & 1);
SPAGAIN;
PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_yes);
}
@@ -2291,7 +2299,8 @@ PP(pp_subst)
do {
if (iters++ > maxiters)
DIE(aTHX_ "Substitution loop");
- rxtainted |= RX_MATCH_TAINTED(rx);
+ if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
+ rxtainted |= SUBST_TAINT_PAT;
m = RX_OFFS(rx)[0].start + orig;
if ((i = m - s)) {
if (s != d)
@@ -2312,7 +2321,6 @@ PP(pp_subst)
SvCUR_set(TARG, d - SvPVX_const(TARG) + i);
Move(s, d, i+1, char); /* include the NUL */
}
- TAINT_IF(rxtainted & 1);
SPAGAIN;
if (rpm->op_pmflags & PMf_NONDESTRUCT)
PUSHs(TARG);
@@ -2329,13 +2337,19 @@ PP(pp_subst)
#ifdef PERL_OLD_COPY_ON_WRITE
have_a_cow:
#endif
- rxtainted |= RX_MATCH_TAINTED(rx);
+ if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
+ rxtainted |= SUBST_TAINT_PAT;
dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG));
SAVEFREESV(dstr);
PL_curpm = pm;
if (!c) {
register PERL_CONTEXT *cx;
SPAGAIN;
+ /* note that a whole bunch of local vars are saved here for
+ * use by pp_substcont: here's a list of them in case you're
+ * searching for places in this sub that uses a particular var:
+ * iters maxiters r_flags oldsave rxtainted orig dstr targ
+ * s m strend rx once */
PUSHSUBST(cx);
RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot);
}
@@ -2343,7 +2357,8 @@ PP(pp_subst)
do {
if (iters++ > maxiters)
DIE(aTHX_ "Substitution loop");
- rxtainted |= RX_MATCH_TAINTED(rx);
+ if (RX_MATCH_TAINTED(rx))
+ rxtainted |= SUBST_TAINT_PAT;
if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
m = s;
s = orig;
@@ -2387,7 +2402,6 @@ PP(pp_subst)
doutf8 |= DO_UTF8(dstr);
SvPV_set(dstr, NULL);
- TAINT_IF(rxtainted & 1);
SPAGAIN;
if (rpm->op_pmflags & PMf_NONDESTRUCT)
PUSHs(TARG);
@@ -2397,9 +2411,28 @@ PP(pp_subst)
(void)SvPOK_only_UTF8(TARG);
if (doutf8)
SvUTF8_on(TARG);
- TAINT_IF(rxtainted);
- SvSETMAGIC(TARG);
- SvTAINT(TARG);
+
+ if (PL_tainting) {
+ if ((rxtainted & SUBST_TAINT_PAT) ||
+ ((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) ==
+ (SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
+ )
+ (RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */
+
+ if (!(rxtainted & SUBST_TAINT_BOOLRET)
+ && (rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT))
+ )
+ SvTAINTED_on(TOPs); /* taint return value */
+ else
+ SvTAINTED_off(TOPs); /* may have got tainted earlier */
+
+ /* needed for mg_set below */
+ PL_tainted =
+ cBOOL(rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL));
+ SvTAINT(TARG);
+ }
+ SvSETMAGIC(TARG); /* PL_tainted must be correctly set for this mg_set */
+ TAINT_NOT;
LEAVE_SCOPE(oldsave);
RETURN;
}