diff options
author | David Mitchell <davem@iabyn.com> | 2011-02-14 15:46:13 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2011-02-16 17:34:08 +0000 |
commit | 20be6587f85cec282e10810718c869dd958afe43 (patch) | |
tree | f80be6605c3f9d61ca41ac9d6c846c8701f4c2fb /pp_hot.c | |
parent | c769ddc70796c6d56fa78ec22fb70caee961bcbf (diff) | |
download | perl-20be6587f85cec282e10810718c869dd958afe43.tar.gz |
fix many s/// tainting bugs
This is a re-implementation of the tainting code in pp_subst and
pp_substcont. Although this fixes many bugs, because its a de-novo rewrite
of the tainting parts of the code in those two functions, it's quite
possible that it breaks some existing tainting behaviour. It doesn't break
any existing tests, although it turns out that this area was severely
under-tested anyway.
The main bugs that this commit fixes are as follows, where:
T = a tainted value
L = pattern tainted by locale (e.g. use locale; s/\w//)
Happens both with and without 'use re taint' unless specified.
Happens with all modifiers (/g, /r etc) unless explicitly mentioned.
$1 unexpectedly untainted:
s/T//
T =~ s/// under use re 'taint'
original string unexpectedly untainted:
s/L//, s/L//g
return value unexpectedly untainted:
T =~ s///g under no re 'taint'
s/L//g, s/L//r
return value unexpectedly tainted:
s/T//
s//T/r under no re 'taint'
T =~ s/// under use re 'taint'
s//T/ under use re 'taint'
Also, with /ge, the original string becomes tainted as soon as possible
(usually in the second entry to the /e code block) rather than only at the
end, in code like
$orig =~ s/T/...code.../ge
The rationale behind the taintedness of the return value of s/// (in the
non /r case), is that a boolean value shouldn't be tainted. This
corresponds to the general perl tainting policy that boolean ops don't
return tainted values. On the other hand, when it returns an integer
(number of matches), that should be tainted.
A couple of note about the old tainting code this replaces: firstly, several
occurrences of the following were NOOPs, since rxtainted was U8 and the bit
being ored was > 256:
rxtainted |= RX_MATCH_TAINTED(rx)
secondly, removing a whole bunch of the following didn't make any
existing tests fail:
TAINT_IF(rxtainted & 1);
Diffstat (limited to 'pp_hot.c')
-rw-r--r-- | pp_hot.c | 69 |
1 files changed, 51 insertions, 18 deletions
@@ -2071,7 +2071,7 @@ PP(pp_subst) I32 maxiters; register I32 i; bool once; - U8 rxtainted; + U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits */ char *orig; U8 r_flags; register REGEXP *rx = PM_GETRE(pm); @@ -2127,11 +2127,19 @@ PP(pp_subst) s = SvPV_mutable(TARG, len); if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV) force_on_match = 1; - rxtainted = ((RX_EXTFLAGS(rx) & RXf_TAINTED) || - (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); - if (PL_tainted) - rxtainted |= 2; - TAINT_NOT; + + /* only replace once? */ + once = !(rpm->op_pmflags & PMf_GLOBAL); + + if (PL_tainting) { + rxtainted = ( + (SvTAINTED(TARG) ? SUBST_TAINT_STR : 0) + | ((RX_EXTFLAGS(rx) & RXf_TAINTED) ? SUBST_TAINT_PAT : 0) + | ((pm->op_pmflags & PMf_RETAINT) ? SUBST_TAINT_RETAINT : 0) + | ((once && !(rpm->op_pmflags & PMf_NONDESTRUCT)) + ? SUBST_TAINT_BOOLRET : 0)); + TAINT_NOT; + } RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); @@ -2173,12 +2181,12 @@ PP(pp_subst) */ } - /* only replace once? */ - once = !(rpm->op_pmflags & PMf_GLOBAL); matched = CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags | REXEC_CHECKED); /* known replacement string? */ if (dstr) { + if (SvTAINTED(dstr)) + rxtainted |= SUBST_TAINT_REPL; /* Upgrade the source if the replacement is utf8 but the source is not, * but only if it matched; see @@ -2250,7 +2258,8 @@ PP(pp_subst) PL_curpm = pm; SvSCREAM_off(TARG); /* disable possible screamer */ if (once) { - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; m = orig + RX_OFFS(rx)[0].start; d = orig + RX_OFFS(rx)[0].end; s = orig; @@ -2283,7 +2292,6 @@ PP(pp_subst) else { sv_chop(TARG, d); } - TAINT_IF(rxtainted & 1); SPAGAIN; PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_yes); } @@ -2291,7 +2299,8 @@ PP(pp_subst) do { if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; m = RX_OFFS(rx)[0].start + orig; if ((i = m - s)) { if (s != d) @@ -2312,7 +2321,6 @@ PP(pp_subst) SvCUR_set(TARG, d - SvPVX_const(TARG) + i); Move(s, d, i+1, char); /* include the NUL */ } - TAINT_IF(rxtainted & 1); SPAGAIN; if (rpm->op_pmflags & PMf_NONDESTRUCT) PUSHs(TARG); @@ -2329,13 +2337,19 @@ PP(pp_subst) #ifdef PERL_OLD_COPY_ON_WRITE have_a_cow: #endif - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ + rxtainted |= SUBST_TAINT_PAT; dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG)); SAVEFREESV(dstr); PL_curpm = pm; if (!c) { register PERL_CONTEXT *cx; SPAGAIN; + /* note that a whole bunch of local vars are saved here for + * use by pp_substcont: here's a list of them in case you're + * searching for places in this sub that uses a particular var: + * iters maxiters r_flags oldsave rxtainted orig dstr targ + * s m strend rx once */ PUSHSUBST(cx); RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot); } @@ -2343,7 +2357,8 @@ PP(pp_subst) do { if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); - rxtainted |= RX_MATCH_TAINTED(rx); + if (RX_MATCH_TAINTED(rx)) + rxtainted |= SUBST_TAINT_PAT; if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) { m = s; s = orig; @@ -2387,7 +2402,6 @@ PP(pp_subst) doutf8 |= DO_UTF8(dstr); SvPV_set(dstr, NULL); - TAINT_IF(rxtainted & 1); SPAGAIN; if (rpm->op_pmflags & PMf_NONDESTRUCT) PUSHs(TARG); @@ -2397,9 +2411,28 @@ PP(pp_subst) (void)SvPOK_only_UTF8(TARG); if (doutf8) SvUTF8_on(TARG); - TAINT_IF(rxtainted); - SvSETMAGIC(TARG); - SvTAINT(TARG); + + if (PL_tainting) { + if ((rxtainted & SUBST_TAINT_PAT) || + ((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) == + (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) + ) + (RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */ + + if (!(rxtainted & SUBST_TAINT_BOOLRET) + && (rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT)) + ) + SvTAINTED_on(TOPs); /* taint return value */ + else + SvTAINTED_off(TOPs); /* may have got tainted earlier */ + + /* needed for mg_set below */ + PL_tainted = + cBOOL(rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL)); + SvTAINT(TARG); + } + SvSETMAGIC(TARG); /* PL_tainted must be correctly set for this mg_set */ + TAINT_NOT; LEAVE_SCOPE(oldsave); RETURN; } |