summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew J. Schorr <aschorr@telemetry-investments.com>2016-08-09 11:33:27 -0400
committerAndrew J. Schorr <aschorr@telemetry-investments.com>2016-08-09 11:33:27 -0400
commit3cf67f58ce8e42f9ce8d7be45936eedf79751b46 (patch)
tree449f8c9cc41797a23eb9502b4ed521525a43c183
parent560cf6d52f978e360e9b2a10f7ddd9a0197e0b31 (diff)
downloadgawk-3cf67f58ce8e42f9ce8d7be45936eedf79751b46.tar.gz
If a strnum integer has a non-standard string representation, do not accept it as an integer array subscript.
-rw-r--r--ChangeLog12
-rw-r--r--awk.h8
-rw-r--r--int_array.c70
3 files changed, 85 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 9ac5be64..c3da0195 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2016-08-09 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * awk.h: Add a comment explaining the NUMINT flag in more detail.
+ * int_array.c (standard_integer_string): New function to test whether
+ a string matches what would be produced by sprintf("%ld", <value>).
+ (is_integer): Fix bug -- if NUMBER was set, then the function was
+ accepting strnum values with nonstandard string representations. We
+ now call standard_integer_string to check that the string looks OK.
+ Also added ifdef'ed code to simplify the function by relying upon
+ force_number to parse the string, but this is disabled due to possible
+ negative performance impact.
+
2016-08-01 Arnold D. Robbins <arnold@skeeve.com>
* README, NEWS: Mark DJGPP port as unsupported.
diff --git a/awk.h b/awk.h
index 7288e20f..ff622898 100644
--- a/awk.h
+++ b/awk.h
@@ -420,6 +420,14 @@ typedef struct exp_node {
* and add WSTRCUR to the flags so that we don't have to do the
* conversion more than once.
*
+ * The NUMINT flag may be used with a value of any type -- NUMBER,
+ * STRING, or STRNUM. It indicates that the string representation
+ * equals the result of sprintf("%ld", <numeric value>). So, for
+ * example, NUMINT should NOT be set if it's a strnum or string value
+ * where the string is " 1" or "01" or "+1" or "1.0" or "0.1E1". This
+ * is a hint to indicate that an integer array optimization may be
+ * used when this value appears as a subscript.
+ *
* We hope that the rest of the flags are self-explanatory. :-)
*/
# define STRING 0x0002 /* assigned as string */
diff --git a/int_array.c b/int_array.c
index a8de3d55..1fa32bd7 100644
--- a/int_array.c
+++ b/int_array.c
@@ -78,27 +78,86 @@ int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
return & success_node;
}
+/*
+ * standard_integer_string -- check whether the string matches what
+ * sprintf("%ld", <value>) would produce. This is accomplished by accepting
+ * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be
+ * faster than comparing vs. the results of actually calling sprintf.
+ */
+
+static bool
+standard_integer_string(const char *s, size_t len)
+{
+ const char *end;
+
+ if (len == 0)
+ return false;
+ if (*s == '0' && len == 1)
+ return true;
+ end = s + len;
+ /* ignore leading minus sign */
+ if (*s == '-' && ++s == end)
+ return false;
+ /* check first char is [1-9] */
+ if (*s < '1' || *s > '9')
+ return false;
+ while (++s < end) {
+ if (*s < '0' || *s > '9')
+ return false;
+ }
+ return true;
+}
+
/* is_integer --- check if subscript is an integer */
NODE **
is_integer(NODE *symbol, NODE *subs)
{
+#ifndef CHECK_INTEGER_USING_FORCE_NUMBER
long l;
+#endif
AWKNUM d;
+ if ((subs->flags & NUMINT) != 0)
+ /* quick exit */
+ return & success_node;
+
if (subs == Nnull_string || do_mpfr)
return NULL;
- if ((subs->flags & NUMINT) != 0)
- return & success_node;
+#ifdef CHECK_INTEGER_USING_FORCE_NUMBER
+ /*
+ * This approach is much simpler, because we remove all of the strtol
+ * logic below. But this may be slower in some usage cases.
+ */
+ if ((subs->flags & NUMCUR) == 0) {
+ str2number(subs);
- if ((subs->flags & NUMBER) != 0) {
+ /* check again in case force_number set NUMINT */
+ if ((subs->flags & NUMINT) != 0)
+ return & success_node;
+ }
+#else /* CHECK_INTEGER_USING_FORCE_NUMBER */
+ if ((subs->flags & NUMCUR) != 0) {
+#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
d = subs->numbr;
if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
- subs->flags |= NUMINT;
- return & success_node;
+ /*
+ * the numeric value is an integer, but we must
+ * protect against strings that cannot be generated
+ * from sprintf("%ld", <subscript>). This can happen
+ * with strnum or string values. We could skip this
+ * check for pure NUMBER values, but unfortunately the
+ * code does not currently distinguish between NUMBER
+ * and strnum values.
+ */
+ if ((subs->flags & STRCUR) == 0 || standard_integer_string(subs->stptr, subs->stlen)) {
+ subs->flags |= NUMINT;
+ return & success_node;
+ }
}
return NULL;
+#ifndef CHECK_INTEGER_USING_FORCE_NUMBER
}
/* a[3]=1; print "3" in a -- true
@@ -151,6 +210,7 @@ is_integer(NODE *symbol, NODE *subs)
}
}
return NULL;
+#endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
}