summaryrefslogtreecommitdiff
path: root/Python
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2013-11-26 16:38:25 +0000
committerMark Dickinson <dickinsm@gmail.com>2013-11-26 16:38:25 +0000
commit1604464c18612dbdadffa5202402895075f599ba (patch)
tree2db85d7c3bcb4db07e97cdc304439c05953b09fb /Python
parent29515db5a6986220278f3b7719fda1e50d16f054 (diff)
downloadcpython-1604464c18612dbdadffa5202402895075f599ba.tar.gz
Issue #19638: Raise ValueError instead of crashing when converting billion character strings to float.
Diffstat (limited to 'Python')
-rw-r--r--Python/dtoa.c65
1 files changed, 48 insertions, 17 deletions
diff --git a/Python/dtoa.c b/Python/dtoa.c
index 44dc01f1d5..73e23af010 100644
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@@ -204,7 +204,24 @@ typedef union { double d; ULong L[2]; } U;
MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP
should fit into an int. */
#ifndef MAX_ABS_EXP
-#define MAX_ABS_EXP 19999U
+#define MAX_ABS_EXP 1100000000U
+#endif
+/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically,
+ this is used to bound the total number of digits ignoring leading zeros and
+ the number of digits that follow the decimal point. Ideally, MAX_DIGITS
+ should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the
+ exponent clipping in _Py_dg_strtod can't affect the value of the output. */
+#ifndef MAX_DIGITS
+#define MAX_DIGITS 1000000000U
+#endif
+
+/* Guard against trying to use the above values on unusual platforms with ints
+ * of width less than 32 bits. */
+#if MAX_ABS_EXP > INT_MAX
+#error "MAX_ABS_EXP should fit in an int"
+#endif
+#if MAX_DIGITS > INT_MAX
+#error "MAX_DIGITS should fit in an int"
#endif
/* The following definition of Storeinc is appropriate for MIPS processors.
@@ -1498,6 +1515,7 @@ _Py_dg_strtod(const char *s00, char **se)
Long L;
BCinfo bc;
Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
+ size_t ndigits, fraclen;
dval(&rv) = 0.;
@@ -1520,39 +1538,52 @@ _Py_dg_strtod(const char *s00, char **se)
c = *++s;
lz = s != s1;
- /* Point s0 at the first nonzero digit (if any). nd0 will be the position
- of the point relative to s0. nd will be the total number of digits
- ignoring leading zeros. */
+ /* Point s0 at the first nonzero digit (if any). fraclen will be the
+ number of digits between the decimal point and the end of the
+ digit string. ndigits will be the total number of digits ignoring
+ leading zeros. */
s0 = s1 = s;
while ('0' <= c && c <= '9')
c = *++s;
- nd0 = nd = s - s1;
+ ndigits = s - s1;
+ fraclen = 0;
/* Parse decimal point and following digits. */
if (c == '.') {
c = *++s;
- if (!nd) {
+ if (!ndigits) {
s1 = s;
while (c == '0')
c = *++s;
lz = lz || s != s1;
- nd0 -= s - s1;
+ fraclen += (s - s1);
s0 = s;
}
s1 = s;
while ('0' <= c && c <= '9')
c = *++s;
- nd += s - s1;
+ ndigits += s - s1;
+ fraclen += s - s1;
+ }
+
+ /* Now lz is true if and only if there were leading zero digits, and
+ ndigits gives the total number of digits ignoring leading zeros. A
+ valid input must have at least one digit. */
+ if (!ndigits && !lz) {
+ if (se)
+ *se = (char *)s00;
+ goto parse_error;
}
- /* Now lz is true if and only if there were leading zero digits, and nd
- gives the total number of digits ignoring leading zeros. A valid input
- must have at least one digit. */
- if (!nd && !lz) {
+ /* Range check ndigits and fraclen to make sure that they, and values
+ computed with them, can safely fit in an int. */
+ if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) {
if (se)
*se = (char *)s00;
goto parse_error;
}
+ nd = (int)ndigits;
+ nd0 = (int)ndigits - (int)fraclen;
/* Parse exponent. */
e = 0;
@@ -1886,20 +1917,20 @@ _Py_dg_strtod(const char *s00, char **se)
bd2++;
/* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
- and bs == 1, so:
+ and bs == 1, so:
tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
- 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
+ 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
- It follows that:
+ It follows that:
M * tdv = bd * 2**bd2 * 5**bd5
M * srv = bb * 2**bb2 * 5**bb5
M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
- for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
- this fact is not needed below.)
+ for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
+ this fact is not needed below.)
*/
/* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */