diff options
-rw-r--r-- | ext/XS-APItest/APItest.pm | 2 | ||||
-rw-r--r-- | ext/XS-APItest/numeric.xs | 2 | ||||
-rw-r--r-- | mg.c | 2 | ||||
-rw-r--r-- | numeric.c | 74 | ||||
-rw-r--r-- | perl.c | 3 | ||||
-rw-r--r-- | pod/perlclib.pod | 3 | ||||
-rw-r--r-- | regcomp.c | 15 | ||||
-rw-r--r-- | t/porting/known_pod_issues.dat | 2 | ||||
-rw-r--r-- | utf8.c | 2 | ||||
-rw-r--r-- | util.c | 6 |
10 files changed, 72 insertions, 39 deletions
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 07ff377dcf..61531fc97a 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.98'; +our $VERSION = '0.99'; require XSLoader; diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs index fac81ba3e0..847eb75d7c 100644 --- a/ext/XS-APItest/numeric.xs +++ b/ext/XS-APItest/numeric.xs @@ -40,7 +40,7 @@ grok_atoUV(number, endsv) const char *pv = SvPV(number, len); UV value = 0xdeadbeef; bool result; - const char* endptr = NULL; + const char* endptr = pv + len; PPCODE: EXTEND(SP,2); if (endsv == &PL_sv_undef) { @@ -3170,7 +3170,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) { const char *p = SvPV_const(sv, len); Groups_t *gary = NULL; - const char* endptr; + const char* endptr = p + len; UV uv; #ifdef _SC_NGROUPS_MAX int maxgrp = sysconf(_SC_NGROUPS_MAX); @@ -1049,31 +1049,39 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) } /* -grok_atoUV +=for apidoc grok_atoUV -grok_atoUV parses a C-style zero-byte terminated string, looking for -a decimal unsigned integer. +parse a string, looking for a decimal unsigned integer. -Returns the unsigned integer, if a valid value can be parsed -from the beginning of the string. +On entry, C<pv> points to the beginning of the string; +C<valptr> points to a UV that will receive the converted value, if found; +C<endptr> is either NULL or points to a variable that points to one byte +beyond the point in C<pv> that this routine should examine. +If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated. -Accepts only the decimal digits '0'..'9'. +Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with +no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that +value. -As opposed to atoi or strtol, grok_atoUV does NOT allow optional -leading whitespace, or negative inputs. If such features are -required, the calling code needs to explicitly implement those. +If you constrain the portion of C<pv> that is looked at by this function (by +passing a non-NULL C<endptr>), and if the intial bytes of that portion form a +valid value, it will return TRUE, setting C<*endptr> to the byte following the +final digit of the value. But if there is no constraint at what's looked at, +all of C<pv> must be valid in order for TRUE to be returned. -Returns true if a valid value could be parsed. In that case, valptr -is set to the parsed value, and endptr (if provided) is set to point -to the character after the last digit. +The only characters this accepts are the decimal digits '0'..'9'. -Returns false otherwise. This can happen if a) there is a leading zero -followed by another digit; b) the digits would overflow a UV; or c) -there are trailing non-digits AND endptr is not provided. +As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional +leading whitespace, nor negative inputs. If such features are required, the +calling code needs to explicitly implement those. -Background: atoi has severe problems with illegal inputs, it cannot be +Note that this function returns FALSE for inputs that would overflow a UV, +or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor +C<01>, C<002>, I<etc>. + +Background: C<atoi> has severe problems with illegal inputs, it cannot be used for incremental parsing, and therefore should be avoided -atoi and strtol are also affected by locale settings, which can also be +C<atoi> and C<strtol> are also affected by locale settings, which can also be seen as a bug (global state controlled by user environment). */ @@ -1088,15 +1096,27 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) PERL_ARGS_ASSERT_GROK_ATOUV; - eptr = endptr ? endptr : &end2; - if (isDIGIT(*s)) { + if (endptr) { + eptr = endptr; + } + else { + end2 = s + strlen(s); + eptr = &end2; + } + + if ( *eptr <= s + || ! isDIGIT(*s)) + { + return FALSE; + } + /* Single-digit inputs are quite common. */ val = *s++ - '0'; - if (isDIGIT(*s)) { + if (s < *eptr && isDIGIT(*s)) { /* Fail on extra leading zeros. */ if (val == 0) return FALSE; - while (isDIGIT(*s)) { + while (s < *eptr && isDIGIT(*s)) { /* This could be unrolled like in grok_number(), but * the expected uses of this are not speed-needy, and * unlikely to need full 64-bitness. */ @@ -1109,12 +1129,14 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) } } } + if (endptr == NULL) { + if (*s) { + return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ + } + } + else { + *endptr = s; } - if (s == pv) - return FALSE; - if (endptr == NULL && *s) - return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ - *eptr = s; *valptr = val; return TRUE; } @@ -3351,7 +3351,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) } } else if (isDIGIT(**s)) { - const char* e; + const char* e = *s + strlen(*s); if (grok_atoUV(*s, &uv, &e)) *s = e; for (; isWORDCHAR(**s); (*s)++) ; @@ -3946,6 +3946,7 @@ S_open_script(pTHX_ const char *scriptname, bool dosearch, bool *suidscript) UV uv; /* if find_script() returns, it returns a malloc()-ed value */ scriptname = PL_origfilename = find_script(scriptname, dosearch, NULL, 1); + s = scriptname + strlen(scriptname); if (strBEGINs(scriptname, "/dev/fd/") && isDIGIT(scriptname[8]) diff --git a/pod/perlclib.pod b/pod/perlclib.pod index 1e6cf779a0..176f8548ee 100644 --- a/pod/perlclib.pod +++ b/pod/perlclib.pod @@ -211,7 +211,8 @@ C<toUPPER_uni>, as described in L<perlapi/Character case changing>.) Typical use is to do range checks on C<uv> before casting: - int i; UV uv; char* end_ptr; + int i; UV uv; + char* end_ptr = input_end; if (grok_atoUV(input, &uv, &end_ptr) && uv <= INT_MAX) i = (int)uv; @@ -11247,6 +11247,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; is_neg = TRUE; } + endptr = RExC_end; if (grok_atoUV(RExC_parse, &unum, &endptr) && unum <= I32_MAX ) { @@ -11485,6 +11486,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) { UV uv; + endptr = RExC_end; if (grok_atoUV(RExC_parse, &uv, &endptr) && uv <= I32_MAX ) { @@ -11520,6 +11522,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* (?(1)...) */ char c; UV uv; + endptr = RExC_end; if (grok_atoUV(RExC_parse, &uv, &endptr) && uv <= I32_MAX ) { @@ -12029,6 +12032,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) maxpos = next; RExC_parse++; if (isDIGIT(*RExC_parse)) { + endptr = RExC_end; if (!grok_atoUV(RExC_parse, &uv, &endptr)) vFAIL("Invalid quantifier in {,}"); if (uv >= REG_INFTY) @@ -12042,6 +12046,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else maxpos = RExC_parse; if (isDIGIT(*maxpos)) { + endptr = RExC_end; if (!grok_atoUV(maxpos, &uv, &endptr)) vFAIL("Invalid quantifier in {,}"); if (uv >= REG_INFTY) @@ -12799,9 +12804,9 @@ S_new_regcurly(const char *s, const char *e) * in which case return I32_MAX (rather than possibly 32-bit wrapping) */ static I32 -S_backref_value(char *p) +S_backref_value(char *p, char *e) { - const char* endptr; + const char* endptr = e; UV val; if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX) return (I32)val; @@ -13347,7 +13352,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (RExC_parse >= RExC_end) { goto unterminated_g; } - num = S_backref_value(RExC_parse); + num = S_backref_value(RExC_parse, RExC_end); if (num == 0) vFAIL("Reference to invalid group 0"); else if (num == I32_MAX) { @@ -13365,7 +13370,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } } else { - num = S_backref_value(RExC_parse); + num = S_backref_value(RExC_parse, RExC_end); /* bare \NNN might be backref or octal - if it is larger * than or equal RExC_npar then it is assumed to be an * octal escape. Note RExC_npar is +1 from the actual @@ -13742,7 +13747,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) /* NOTE, RExC_npar is 1 more than the actual number of * parens we have seen so far, hence the < RExC_npar below. */ - if ( !isDIGIT(p[1]) || S_backref_value(p) < RExC_npar) + if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end) < RExC_npar) { /* Not to be treated as an octal constant, go find backref */ --p; diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index e89d5c02a1..9237c57df2 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -23,6 +23,7 @@ Apache::SmallProf Archive::Extract Array::Base atan2(3) +atoi(3) Attribute::Constant autobox B::Generate @@ -283,6 +284,7 @@ strftime(3) strictures String::Base String::Scanf +strtol(3) Switch tar(1) Template::Declare @@ -5201,6 +5201,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) /* Get the 0th element, which is needed to setup the inversion list * */ while (isSPACE(*l)) l++; + after_atou = (char *) lend; if (!grok_atoUV((const char *)l, &element0, &after_atou)) { Perl_croak(aTHX_ "panic: Expecting a valid 0th element for" " inversion list"); @@ -5217,6 +5218,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) " elements than available", elements); } while (isSPACE(*l)) l++; + after_atou = (char *) lend; if (!grok_atoUV((const char *)l, other_elements_ptr++, &after_atou)) { @@ -4320,7 +4320,7 @@ Perl_parse_unicode_opts(pTHX_ const char **popt) if (*p) { if (isDIGIT(*p)) { - const char* endptr; + const char* endptr = p + strlen(p); UV uv; if (grok_atoUV(p, &uv, &endptr) && uv <= U32_MAX) { opt = (U32)uv; @@ -4707,7 +4707,7 @@ S_mem_log_common(enum mem_log_type mlt, const UV n, * timeval. */ { STRLEN len; - const char* endptr; + const char* endptr = pmlenv + stren(pmlenv); int fd; UV uv; if (grok_atoUV(pmlenv, &uv, &endptr) /* Ignore endptr. */ @@ -5989,7 +5989,7 @@ static const char* atos_parse(const char* p, * The matched regular expression is roughly "\(.*:\d+\)\s*$" */ const char* source_number_start; const char* source_name_end; - const char* source_line_end; + const char* source_line_end = start; const char* close_paren; UV uv; |