summaryrefslogtreecommitdiff
path: root/numeric.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2014-08-23 22:49:04 -0400
committerJarkko Hietaniemi <jhi@iki.fi>2014-08-24 22:31:49 -0400
commit8c12dc63d1f47b22b69bbcfe5c21dcadb14c5397 (patch)
tree49a27c8473749b45b7f71b3f4b4f2264b15772f6 /numeric.c
parentb2bea0a9cc3975a6d0df1ebfa82cd24df518b9b0 (diff)
downloadperl-8c12dc63d1f47b22b69bbcfe5c21dcadb14c5397.tar.gz
More robust inf/nan recognition and generation.
Drop INFNAN_PEEK, premature optimization and hard to get right (it basically imitates unrolled first half of grok_infnan). Just keep grok_infan fast. (There is one spot in grok_number_flags() where we peek at the next byte to avoid wasted work.) If falling back (from not having NV_INF/NV_NAN) to the native strtod (or similar), fake the input based on the grok_infnan result. Add last-resort ways to generate inf/nan. Recognize explicit unary plus, like "+Inf", and "INFINITE". In tests use cmp_ok(), fix typos, add tests.
Diffstat (limited to 'numeric.c')
-rw-r--r--numeric.c145
1 files changed, 84 insertions, 61 deletions
diff --git a/numeric.c b/numeric.c
index daaec06d0e..355980acea 100644
--- a/numeric.c
+++ b/numeric.c
@@ -586,16 +586,6 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
return grok_number_flags(pv, len, valuep, 0);
}
-/* Peek ahead to see whether this could be Inf/NaN/qNaN/snan/1.#INF */
-#define INFNAN_PEEK(s, send) \
- (s < send && \
- ((isALPHA_FOLD_EQ(*s, 'I') || isALPHA_FOLD_EQ(*s, 'N')) || \
- ((s + 4) < send && \
- (isALPHA_FOLD_EQ(*s, 'Q') || isALPHA_FOLD_EQ(*s, 'S')) && \
- isALPHA_FOLD_EQ(s[1], 'N')) || \
- ((s + 5) < send && \
- (*s == '1' && ((s[1] == '.' && s[2] == '#') || s[1] == '#')))))
-
/*
=for apidoc grok_infnan
@@ -623,7 +613,10 @@ Perl_grok_infnan(const char** sp, const char* send)
PERL_ARGS_ASSERT_GROK_INFNAN;
- if (*s == '-') {
+ if (*s == '+') {
+ s++; if (s == send) return 0;
+ }
+ else if (*s == '-') {
flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
s++; if (s == send) return 0;
}
@@ -650,8 +643,11 @@ Perl_grok_infnan(const char** sp, const char* send)
s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return 0;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return 0;
- /* XXX maybe also grok "infinite"? */
- s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return 0;
+ s++; if (s == send ||
+ /* allow either Infinity or Infinite */
+ (isALPHA_FOLD_NE(*s, 'Y') &&
+ isALPHA_FOLD_NE(*s, 'E')))
+ return 0;
s++;
} else if (*s)
return 0;
@@ -681,10 +677,11 @@ Perl_grok_infnan(const char** sp, const char* send)
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
- /* NaN can be followed by various stuff since there are
- * multiple different NaN values, and some implementations
- * output the "payload" values, e.g. NaN123, NAN(abc),
- * some implementation just have weird stuff like NaN%. */
+ /* NaN can be followed by various stuff (NaNQ, NaNS), but
+ * there are also multiple different NaN values, and some
+ * implementations output the "payload" values,
+ * e.g. NaN123, NAN(abc), while some implementations just
+ * have weird stuff like NaN%. */
s = send;
}
else
@@ -707,8 +704,6 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
const char * const send = pv + len;
const char *d;
int numtype = 0;
- int sawinf = 0;
- int sawnan = 0;
PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
@@ -727,10 +722,10 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
return 0;
/* The first digit (after optional sign): note that might
- * also point to "infinity" or "nan". */
+ * also point to "infinity" or "nan", or "1.#INF". */
d = s;
- /* next must be digit or the radix separator or beginning of infinity */
+ /* next must be digit or the radix separator or beginning of infinity/nan */
if (isDIGIT(*s)) {
/* UVs are at least 32 bits, so the first 9 decimal digits cannot
overflow. */
@@ -841,30 +836,8 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
else
return 0;
}
- else {
- if (INFNAN_PEEK(d, send)) {
- int infnan = Perl_grok_infnan(&d, send);
- if ((infnan & IS_NUMBER_INFINITY)) {
- numtype |= infnan;
- sawinf = 1;
- }
- else if ((infnan & IS_NUMBER_NAN)) {
- numtype |= infnan;
- sawnan = 1;
- }
- else
- return 0;
- s = d;
- }
- }
- if (sawinf) {
- /* Keep the sign for infinity. */
- numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
- } else if (sawnan) {
- numtype &= IS_NUMBER_NEG; /* Clear sign for nan. */
- numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
- } else if (s < send) {
+ if (s < send) {
/* we can have an optional exponent part */
if (isALPHA_FOLD_EQ(*s, 'e')) {
s++;
@@ -894,6 +867,18 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
*valuep = 0;
return IS_NUMBER_IN_UV;
}
+ /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
+ if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
+ /* Really detect inf/nan. Start at d, not s, since the above
+ * code might have already consumed the "1." or "1". */
+ int infnan = Perl_grok_infnan(&d, send);
+ if ((infnan & IS_NUMBER_INFINITY)) {
+ return (numtype | infnan); /* Keep sign for infinity. */
+ }
+ else if ((infnan & IS_NUMBER_NAN)) {
+ return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
+ }
+ }
else if (flags & PERL_SCAN_TRAILING) {
return numtype | IS_NUMBER_TRAILING;
}
@@ -1174,30 +1159,68 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value)
{
const char *p0 = negative ? s - 1 : s;
const char *p = p0;
-#if defined(NV_INF) && defined(NV_NAN)
- int infnan_flags = grok_infnan(&p, send);
- if (infnan_flags && p != p0) {
- if ((infnan_flags & IS_NUMBER_INFINITY)) {
- *value = (infnan_flags & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
+ int infnan = grok_infnan(&p, send);
+ if (infnan && p != p0) {
+ /* If we can generate inf/nan directly, let's do so. */
+#ifdef NV_INF
+ if ((infnan & IS_NUMBER_INFINITY)) {
+ *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
return (char*)p;
}
- else if ((infnan_flags & IS_NUMBER_NAN)) {
+#endif
+#ifdef NV_NAN
+ if ((infnan & IS_NUMBER_NAN)) {
*value = NV_NAN;
return (char*)p;
}
- }
-#elif defined(HAS_STRTOD)
- if (INFNAN_PEEK(s, send)) {
- /* The native strtod() may not get all the possible
- * inf/nan strings INFNAN_PEEK() recognizes. */
- char* endp;
- NV nv = Perl_strtod(p, &endp);
- if (p != endp) {
- *value = nv;
- return endp;
+#endif
+#ifdef Perl_strtod
+ /* If still here, we didn't have either NV_INF or INV_NAN,
+ * and can try falling back to native strtod/strtold.
+ *
+ * The native interface might not recognize all the possible
+ * inf/nan strings Perl recognizes. What we can try
+ * is to try faking the input. We will try inf/-inf/nan
+ * as the most promising/portable input. */
+ {
+ const char* fake = NULL;
+ char* endp;
+ NV nv;
+ if ((infnan & IS_NUMBER_INFINITY)) {
+ fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
+ }
+ else if ((infnan & IS_NUMBER_NAN)) {
+ fake = "nan";
+ }
+ assert(fake);
+ nv = Perl_strtod(fake, &endp);
+ if (fake != endp) {
+ if ((infnan & IS_NUMBER_INFINITY)) {
+#ifdef Perl_isinf
+ if (Perl_isinf(nv))
+ *value = nv;
+#else
+ /* last resort, may generate SIGFPE */
+ *value = Perl_exp((NV)1e9);
+ if ((infnan & IS_NUMBER_NEG))
+ *value = -*value;
+#endif
+ return (char*)p; /* p, not endp */
+ }
+ else if ((infnan & IS_NUMBER_NAN)) {
+#ifdef Perl_isnan
+ if (Perl_isnan(nv))
+ *value = nv;
+#else
+ /* last resort, may generate SIGFPE */
+ *value = Perl_log((NV)-1.0);
+#endif
+ return (char*)p; /* p, not endp */
+ }
+ }
}
+#endif /* #ifdef Perl_strtod */
}
-#endif
}
/* we accumulate digits into an integer; when this becomes too