From 3cf67f58ce8e42f9ce8d7be45936eedf79751b46 Mon Sep 17 00:00:00 2001 From: "Andrew J. Schorr" Date: Tue, 9 Aug 2016 11:33:27 -0400 Subject: If a strnum integer has a non-standard string representation, do not accept it as an integer array subscript. --- ChangeLog | 12 +++++++++++ awk.h | 8 +++++++ int_array.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9ac5be64..c3da0195 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2016-08-09 Andrew J. Schorr + + * awk.h: Add a comment explaining the NUMINT flag in more detail. + * int_array.c (standard_integer_string): New function to test whether + a string matches what would be produced by sprintf("%ld", ). + (is_integer): Fix bug -- if NUMBER was set, then the function was + accepting strnum values with nonstandard string representations. We + now call standard_integer_string to check that the string looks OK. + Also added ifdef'ed code to simplify the function by relying upon + force_number to parse the string, but this is disabled due to possible + negative performance impact. + 2016-08-01 Arnold D. Robbins * README, NEWS: Mark DJGPP port as unsupported. diff --git a/awk.h b/awk.h index 7288e20f..ff622898 100644 --- a/awk.h +++ b/awk.h @@ -420,6 +420,14 @@ typedef struct exp_node { * and add WSTRCUR to the flags so that we don't have to do the * conversion more than once. * + * The NUMINT flag may be used with a value of any type -- NUMBER, + * STRING, or STRNUM. It indicates that the string representation + * equals the result of sprintf("%ld", ). So, for + * example, NUMINT should NOT be set if it's a strnum or string value + * where the string is " 1" or "01" or "+1" or "1.0" or "0.1E1". This + * is a hint to indicate that an integer array optimization may be + * used when this value appears as a subscript. + * * We hope that the rest of the flags are self-explanatory. :-) */ # define STRING 0x0002 /* assigned as string */ diff --git a/int_array.c b/int_array.c index a8de3d55..1fa32bd7 100644 --- a/int_array.c +++ b/int_array.c @@ -78,27 +78,86 @@ int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) return & success_node; } +/* + * standard_integer_string -- check whether the string matches what + * sprintf("%ld", ) would produce. This is accomplished by accepting + * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be + * faster than comparing vs. the results of actually calling sprintf. + */ + +static bool +standard_integer_string(const char *s, size_t len) +{ + const char *end; + + if (len == 0) + return false; + if (*s == '0' && len == 1) + return true; + end = s + len; + /* ignore leading minus sign */ + if (*s == '-' && ++s == end) + return false; + /* check first char is [1-9] */ + if (*s < '1' || *s > '9') + return false; + while (++s < end) { + if (*s < '0' || *s > '9') + return false; + } + return true; +} + /* is_integer --- check if subscript is an integer */ NODE ** is_integer(NODE *symbol, NODE *subs) { +#ifndef CHECK_INTEGER_USING_FORCE_NUMBER long l; +#endif AWKNUM d; + if ((subs->flags & NUMINT) != 0) + /* quick exit */ + return & success_node; + if (subs == Nnull_string || do_mpfr) return NULL; - if ((subs->flags & NUMINT) != 0) - return & success_node; +#ifdef CHECK_INTEGER_USING_FORCE_NUMBER + /* + * This approach is much simpler, because we remove all of the strtol + * logic below. But this may be slower in some usage cases. + */ + if ((subs->flags & NUMCUR) == 0) { + str2number(subs); - if ((subs->flags & NUMBER) != 0) { + /* check again in case force_number set NUMINT */ + if ((subs->flags & NUMINT) != 0) + return & success_node; + } +#else /* CHECK_INTEGER_USING_FORCE_NUMBER */ + if ((subs->flags & NUMCUR) != 0) { +#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */ d = subs->numbr; if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) { - subs->flags |= NUMINT; - return & success_node; + /* + * the numeric value is an integer, but we must + * protect against strings that cannot be generated + * from sprintf("%ld", ). This can happen + * with strnum or string values. We could skip this + * check for pure NUMBER values, but unfortunately the + * code does not currently distinguish between NUMBER + * and strnum values. + */ + if ((subs->flags & STRCUR) == 0 || standard_integer_string(subs->stptr, subs->stlen)) { + subs->flags |= NUMINT; + return & success_node; + } } return NULL; +#ifndef CHECK_INTEGER_USING_FORCE_NUMBER } /* a[3]=1; print "3" in a -- true @@ -151,6 +210,7 @@ is_integer(NODE *symbol, NODE *subs) } } return NULL; +#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */ } -- cgit v1.2.1