Use UV instead of Size_t.

A strong reason for using UV are the promised custom codepoints, they go beyond 32-bit. The overflow logic didn't work if Size_t was 32-bit but the UV was 64-bit. Steal the battle-proven logic from grok_number_flags(). The numeric.xs or grok.t were not right in 32-bit, either. Add comments.
author: Jarkko Hietaniemi <jhi@iki.fi> 2014-07-23 10:38:50 -0400
committer: Jarkko Hietaniemi <jhi@iki.fi> 2014-07-24 09:08:14 -0400
commit: 75feedba47600d94d18d49dbcbdf46393b6c6cc5 (patch)
tree: fafed80ec3d9e71aaeadaacff83099aad527e77f /numeric.c
parent: 4ba3adde25c4edf2e470f13677632b6d2f9e2fcd (diff)
download: perl-75feedba47600d94d18d49dbcbdf46393b6c6cc5.tar.gz
1 files changed, 27 insertions, 32 deletions
diff --git a/numeric.c b/numeric.c
index 66e4e75815..d3eaa60f25 100644
--- a/numeric.c
+++ b/numeric.c
@@ -804,7 +804,7 @@ leading whitespace, or negative inputs.  If such features are
 required, the calling code needs to explicitly implement those.
 
 If a valid value cannot be parsed, returns either zero (if non-digits
-are met before any digits) or Size_t_MAX (if the value overflows).
+are met before any digits) or UV_MAX (if the value overflows).
 
 Note that extraneous leading zeros also count as an overflow
 (meaning that only "0" is the zero).
@@ -825,51 +825,46 @@ seen as a bug (global state controlled by user environment).
 =cut
 */
 
-Size_t
+UV
 Perl_grok_atou(const char *pv, const char** endptr)
 {
     const char* s = pv;
     const char** eptr;
     const char* end2; /* Used in case endptr is NULL. */
-    /* With Size_t_size of 8 or 4 this works out to be the start plus
-     * either 20 or 10.  When 128 or 256-bit systems became reality,
-     * this overshoots (should get 39, 78, but gets 40, 80). */
-    const char* maxend = s + 10 * (Size_t_size / 4);
-    Size_t val = 0; /* The return value. */
+    UV val = 0; /* The return value. */
+    const UV max_div_10 = UV_MAX / 10;
+    const UV max_mod_10 = UV_MAX % 10;
 
     PERL_ARGS_ASSERT_GROK_ATOU;
 
     eptr = endptr ? endptr : &end2;
-    if (isDIGIT(*s) && !isDIGIT(*(s + 1))) {
-        /* Single-digit inputs are quite common cases, and in addition
-         * the case of zero ("0") here simplifies the decoding loop:
-         * not having to think whether "000" or "000123" are valid
-         * (now they are invalid). */
+    if (isDIGIT(*s)) {
+        /* Single-digit inputs are quite common. */
         val = *s++ - '0';
-    } else {
-        Size_t tmp = 0; /* Temporary accumulator. */
-
-        while (s < maxend && *s) {
-            /* This could be unrolled like in grok_number(), but
-             * the expected uses of this are not speed-needy, and
-             * unlikely to need full 64-bitness. */
-            if (isDIGIT(*s)) {
-                int digit = *s++ - '0';
-                tmp = tmp * 10 + digit;
-                if (tmp > val) { /* This implictly rejects leading zeros. */
-                    val = tmp;
-                } else { /* Overflow. */
+        if (isDIGIT(*s)) {
+            /* Extra leading zeros cause overflow. */
+            if (val == 0) {
+                *eptr = NULL;
+                return UV_MAX;
+            }
+            while (isDIGIT(*s)) {
+                /* This could be unrolled like in grok_number(), but
+                 * the expected uses of this are not speed-needy, and
+                 * unlikely to need full 64-bitness. */
+                U8 digit = *s++ - '0';
+                if (val < max_div_10 ||
+                    (val == max_div_10 && digit <= max_mod_10)) {
+                    val = val * 10 + digit;
+                } else {
                     *eptr = NULL;
-                    return Size_t_MAX;
+                    return UV_MAX;
                 }
-            } else {
-                break;
             }
         }
-        if (s == pv) {
-            *eptr = NULL; /* If no progress, failed to parse anything. */
-            return 0;
-        }
+    }
+    if (s == pv) {
+        *eptr = NULL; /* If no progress, failed to parse anything. */
+        return 0;
     }
     if (endptr == NULL && *s) {
         return 0; /* If endptr is NULL, no trailing non-digits allowed. */
author	Jarkko Hietaniemi <jhi@iki.fi>	2014-07-23 10:38:50 -0400
committer	Jarkko Hietaniemi <jhi@iki.fi>	2014-07-24 09:08:14 -0400
commit	75feedba47600d94d18d49dbcbdf46393b6c6cc5 (patch)
tree	fafed80ec3d9e71aaeadaacff83099aad527e77f /numeric.c
parent	4ba3adde25c4edf2e470f13677632b6d2f9e2fcd (diff)
download	perl-75feedba47600d94d18d49dbcbdf46393b6c6cc5.tar.gz