summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/XS-APItest/APItest.pm2
-rw-r--r--ext/XS-APItest/numeric.xs2
-rw-r--r--mg.c2
-rw-r--r--numeric.c74
-rw-r--r--perl.c3
-rw-r--r--pod/perlclib.pod3
-rw-r--r--regcomp.c15
-rw-r--r--t/porting/known_pod_issues.dat2
-rw-r--r--utf8.c2
-rw-r--r--util.c6
10 files changed, 72 insertions, 39 deletions
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm
index 07ff377dcf..61531fc97a 100644
--- a/ext/XS-APItest/APItest.pm
+++ b/ext/XS-APItest/APItest.pm
@@ -5,7 +5,7 @@ use strict;
use warnings;
use Carp;
-our $VERSION = '0.98';
+our $VERSION = '0.99';
require XSLoader;
diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs
index fac81ba3e0..847eb75d7c 100644
--- a/ext/XS-APItest/numeric.xs
+++ b/ext/XS-APItest/numeric.xs
@@ -40,7 +40,7 @@ grok_atoUV(number, endsv)
const char *pv = SvPV(number, len);
UV value = 0xdeadbeef;
bool result;
- const char* endptr = NULL;
+ const char* endptr = pv + len;
PPCODE:
EXTEND(SP,2);
if (endsv == &PL_sv_undef) {
diff --git a/mg.c b/mg.c
index c03bf257ff..b022d63442 100644
--- a/mg.c
+++ b/mg.c
@@ -3170,7 +3170,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
{
const char *p = SvPV_const(sv, len);
Groups_t *gary = NULL;
- const char* endptr;
+ const char* endptr = p + len;
UV uv;
#ifdef _SC_NGROUPS_MAX
int maxgrp = sysconf(_SC_NGROUPS_MAX);
diff --git a/numeric.c b/numeric.c
index 99531ef921..e71ab39293 100644
--- a/numeric.c
+++ b/numeric.c
@@ -1049,31 +1049,39 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
}
/*
-grok_atoUV
+=for apidoc grok_atoUV
-grok_atoUV parses a C-style zero-byte terminated string, looking for
-a decimal unsigned integer.
+parse a string, looking for a decimal unsigned integer.
-Returns the unsigned integer, if a valid value can be parsed
-from the beginning of the string.
+On entry, C<pv> points to the beginning of the string;
+C<valptr> points to a UV that will receive the converted value, if found;
+C<endptr> is either NULL or points to a variable that points to one byte
+beyond the point in C<pv> that this routine should examine.
+If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
-Accepts only the decimal digits '0'..'9'.
+Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
+no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
+value.
-As opposed to atoi or strtol, grok_atoUV does NOT allow optional
-leading whitespace, or negative inputs. If such features are
-required, the calling code needs to explicitly implement those.
+If you constrain the portion of C<pv> that is looked at by this function (by
+passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
+valid value, it will return TRUE, setting C<*endptr> to the byte following the
+final digit of the value. But if there is no constraint at what's looked at,
+all of C<pv> must be valid in order for TRUE to be returned.
-Returns true if a valid value could be parsed. In that case, valptr
-is set to the parsed value, and endptr (if provided) is set to point
-to the character after the last digit.
+The only characters this accepts are the decimal digits '0'..'9'.
-Returns false otherwise. This can happen if a) there is a leading zero
-followed by another digit; b) the digits would overflow a UV; or c)
-there are trailing non-digits AND endptr is not provided.
+As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
+leading whitespace, nor negative inputs. If such features are required, the
+calling code needs to explicitly implement those.
-Background: atoi has severe problems with illegal inputs, it cannot be
+Note that this function returns FALSE for inputs that would overflow a UV,
+or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
+C<01>, C<002>, I<etc>.
+
+Background: C<atoi> has severe problems with illegal inputs, it cannot be
used for incremental parsing, and therefore should be avoided
-atoi and strtol are also affected by locale settings, which can also be
+C<atoi> and C<strtol> are also affected by locale settings, which can also be
seen as a bug (global state controlled by user environment).
*/
@@ -1088,15 +1096,27 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
PERL_ARGS_ASSERT_GROK_ATOUV;
- eptr = endptr ? endptr : &end2;
- if (isDIGIT(*s)) {
+ if (endptr) {
+ eptr = endptr;
+ }
+ else {
+ end2 = s + strlen(s);
+ eptr = &end2;
+ }
+
+ if ( *eptr <= s
+ || ! isDIGIT(*s))
+ {
+ return FALSE;
+ }
+
/* Single-digit inputs are quite common. */
val = *s++ - '0';
- if (isDIGIT(*s)) {
+ if (s < *eptr && isDIGIT(*s)) {
/* Fail on extra leading zeros. */
if (val == 0)
return FALSE;
- while (isDIGIT(*s)) {
+ while (s < *eptr && isDIGIT(*s)) {
/* This could be unrolled like in grok_number(), but
* the expected uses of this are not speed-needy, and
* unlikely to need full 64-bitness. */
@@ -1109,12 +1129,14 @@ Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
}
}
}
+ if (endptr == NULL) {
+ if (*s) {
+ return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
+ }
+ }
+ else {
+ *endptr = s;
}
- if (s == pv)
- return FALSE;
- if (endptr == NULL && *s)
- return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
- *eptr = s;
*valptr = val;
return TRUE;
}
diff --git a/perl.c b/perl.c
index e6dfa8dc05..86376e30da 100644
--- a/perl.c
+++ b/perl.c
@@ -3351,7 +3351,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
}
}
else if (isDIGIT(**s)) {
- const char* e;
+ const char* e = *s + strlen(*s);
if (grok_atoUV(*s, &uv, &e))
*s = e;
for (; isWORDCHAR(**s); (*s)++) ;
@@ -3946,6 +3946,7 @@ S_open_script(pTHX_ const char *scriptname, bool dosearch, bool *suidscript)
UV uv;
/* if find_script() returns, it returns a malloc()-ed value */
scriptname = PL_origfilename = find_script(scriptname, dosearch, NULL, 1);
+ s = scriptname + strlen(scriptname);
if (strBEGINs(scriptname, "/dev/fd/")
&& isDIGIT(scriptname[8])
diff --git a/pod/perlclib.pod b/pod/perlclib.pod
index 1e6cf779a0..176f8548ee 100644
--- a/pod/perlclib.pod
+++ b/pod/perlclib.pod
@@ -211,7 +211,8 @@ C<toUPPER_uni>, as described in L<perlapi/Character case changing>.)
Typical use is to do range checks on C<uv> before casting:
- int i; UV uv; char* end_ptr;
+ int i; UV uv;
+ char* end_ptr = input_end;
if (grok_atoUV(input, &uv, &end_ptr)
&& uv <= INT_MAX)
i = (int)uv;
diff --git a/regcomp.c b/regcomp.c
index f694ff7f8b..2d9dd137c4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11247,6 +11247,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
RExC_parse++;
is_neg = TRUE;
}
+ endptr = RExC_end;
if (grok_atoUV(RExC_parse, &unum, &endptr)
&& unum <= I32_MAX
) {
@@ -11485,6 +11486,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
UV uv;
+ endptr = RExC_end;
if (grok_atoUV(RExC_parse, &uv, &endptr)
&& uv <= I32_MAX
) {
@@ -11520,6 +11522,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
/* (?(1)...) */
char c;
UV uv;
+ endptr = RExC_end;
if (grok_atoUV(RExC_parse, &uv, &endptr)
&& uv <= I32_MAX
) {
@@ -12029,6 +12032,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
maxpos = next;
RExC_parse++;
if (isDIGIT(*RExC_parse)) {
+ endptr = RExC_end;
if (!grok_atoUV(RExC_parse, &uv, &endptr))
vFAIL("Invalid quantifier in {,}");
if (uv >= REG_INFTY)
@@ -12042,6 +12046,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
else
maxpos = RExC_parse;
if (isDIGIT(*maxpos)) {
+ endptr = RExC_end;
if (!grok_atoUV(maxpos, &uv, &endptr))
vFAIL("Invalid quantifier in {,}");
if (uv >= REG_INFTY)
@@ -12799,9 +12804,9 @@ S_new_regcurly(const char *s, const char *e)
* in which case return I32_MAX (rather than possibly 32-bit wrapping) */
static I32
-S_backref_value(char *p)
+S_backref_value(char *p, char *e)
{
- const char* endptr;
+ const char* endptr = e;
UV val;
if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
return (I32)val;
@@ -13347,7 +13352,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
if (RExC_parse >= RExC_end) {
goto unterminated_g;
}
- num = S_backref_value(RExC_parse);
+ num = S_backref_value(RExC_parse, RExC_end);
if (num == 0)
vFAIL("Reference to invalid group 0");
else if (num == I32_MAX) {
@@ -13365,7 +13370,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
}
else {
- num = S_backref_value(RExC_parse);
+ num = S_backref_value(RExC_parse, RExC_end);
/* bare \NNN might be backref or octal - if it is larger
* than or equal RExC_npar then it is assumed to be an
* octal escape. Note RExC_npar is +1 from the actual
@@ -13742,7 +13747,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* NOTE, RExC_npar is 1 more than the actual number of
* parens we have seen so far, hence the < RExC_npar below. */
- if ( !isDIGIT(p[1]) || S_backref_value(p) < RExC_npar)
+ if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end) < RExC_npar)
{ /* Not to be treated as an octal constant, go
find backref */
--p;
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index e89d5c02a1..9237c57df2 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -23,6 +23,7 @@ Apache::SmallProf
Archive::Extract
Array::Base
atan2(3)
+atoi(3)
Attribute::Constant
autobox
B::Generate
@@ -283,6 +284,7 @@ strftime(3)
strictures
String::Base
String::Scanf
+strtol(3)
Switch
tar(1)
Template::Declare
diff --git a/utf8.c b/utf8.c
index ec67b08d50..406fb8bdde 100644
--- a/utf8.c
+++ b/utf8.c
@@ -5201,6 +5201,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
/* Get the 0th element, which is needed to setup the inversion list
* */
while (isSPACE(*l)) l++;
+ after_atou = (char *) lend;
if (!grok_atoUV((const char *)l, &element0, &after_atou)) {
Perl_croak(aTHX_ "panic: Expecting a valid 0th element for"
" inversion list");
@@ -5217,6 +5218,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
" elements than available", elements);
}
while (isSPACE(*l)) l++;
+ after_atou = (char *) lend;
if (!grok_atoUV((const char *)l, other_elements_ptr++,
&after_atou))
{
diff --git a/util.c b/util.c
index 647f53307d..7282dd9cfe 100644
--- a/util.c
+++ b/util.c
@@ -4320,7 +4320,7 @@ Perl_parse_unicode_opts(pTHX_ const char **popt)
if (*p) {
if (isDIGIT(*p)) {
- const char* endptr;
+ const char* endptr = p + strlen(p);
UV uv;
if (grok_atoUV(p, &uv, &endptr) && uv <= U32_MAX) {
opt = (U32)uv;
@@ -4707,7 +4707,7 @@ S_mem_log_common(enum mem_log_type mlt, const UV n,
* timeval. */
{
STRLEN len;
- const char* endptr;
+ const char* endptr = pmlenv + stren(pmlenv);
int fd;
UV uv;
if (grok_atoUV(pmlenv, &uv, &endptr) /* Ignore endptr. */
@@ -5989,7 +5989,7 @@ static const char* atos_parse(const char* p,
* The matched regular expression is roughly "\(.*:\d+\)\s*$" */
const char* source_number_start;
const char* source_name_end;
- const char* source_line_end;
+ const char* source_line_end = start;
const char* close_paren;
UV uv;