summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2018-10-10 23:17:18 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2018-10-10 23:19:42 -0700
commit5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d (patch)
treec51b2a470f62679f5e5c3f6f55d90aa0082d0133
parentfd3a48fcd8bb212ec12b9b10a79de0ae605ee93b (diff)
downloademacs-5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d.tar.gz
Fix mishandling of symbols that look like numbers
* src/bignum.c (make_neg_biguint): New function. * src/lread.c (read1): Do not mishandle an unquoted symbol with name equal to something like "1\0x", i.e., a string of numeric form followed by a NUL byte. Formerly these symbols were misread as numbers. (string_to_number): Change last argument from an integer flag to a pointer to the length. This lets the caller figure out how much of the prefix was used. All callers changed. Add a fast path if the integer (sans sign) fits in uintmax_t. Update comments and simplify now that bignums are present. * src/print.c (print_object): Fix quoting of symbols that look like numbers, by relying on string_to_number for the tricky cases rather than trying to redo its logic, incorrectly. For example, (read (prin1-to-string '\1e+NaN)) formerly returned "1e+NaN", which was wrong: a backslash is needed in the output to prevent it from being read as a NaN. Escape NO_BREAK_SPACE too, since lread.c treats it like SPACE. * test/src/print-tests.el (print-read-roundtrip): Add tests illustrating the abovementioned bugs.
-rw-r--r--src/bignum.c10
-rw-r--r--src/data.c2
-rw-r--r--src/lisp.h5
-rw-r--r--src/lread.c100
-rw-r--r--src/print.c49
-rw-r--r--src/process.c7
-rw-r--r--test/src/print-tests.el16
7 files changed, 99 insertions, 90 deletions
diff --git a/src/bignum.c b/src/bignum.c
index 0ab8de3ab7a..e3db0377a53 100644
--- a/src/bignum.c
+++ b/src/bignum.c
@@ -117,6 +117,16 @@ make_biguint (uintmax_t n)
return make_bignum ();
}
+/* Return a Lisp integer equal to -N, which must not be in fixnum range. */
+Lisp_Object
+make_neg_biguint (uintmax_t n)
+{
+ eassert (-MOST_NEGATIVE_FIXNUM < n);
+ mpz_set_uintmax (mpz[0], n);
+ mpz_neg (mpz[0], mpz[0]);
+ return make_bignum ();
+}
+
/* Return a Lisp integer with value taken from mpz[0].
Set mpz[0] to a junk value. */
Lisp_Object
diff --git a/src/data.c b/src/data.c
index 5f1d059512d..538081e5c9b 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2796,7 +2796,7 @@ If the base used is not 10, STRING is always parsed as an integer. */)
while (*p == ' ' || *p == '\t')
p++;
- Lisp_Object val = string_to_number (p, b, S2N_IGNORE_TRAILING);
+ Lisp_Object val = string_to_number (p, b, 0);
return NILP (val) ? make_fixnum (0) : val;
}
diff --git a/src/lisp.h b/src/lisp.h
index 2c20b483cad..5ecc48b025c 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2506,7 +2506,7 @@ INTEGERP (Lisp_Object x)
return FIXNUMP (x) || BIGNUMP (x);
}
-/* Return a Lisp integer with value taken from n. */
+/* Return a Lisp integer with value taken from N. */
INLINE Lisp_Object
make_int (intmax_t n)
{
@@ -3329,6 +3329,7 @@ extern ptrdiff_t bignum_bufsize (Lisp_Object, int);
extern ptrdiff_t bignum_to_c_string (char *, ptrdiff_t, Lisp_Object, int);
extern Lisp_Object bignum_to_string (Lisp_Object, int);
extern Lisp_Object make_bignum_str (char const *, int);
+extern Lisp_Object make_neg_biguint (uintmax_t);
extern Lisp_Object double_to_integer (double);
/* Converthe integer NUM to *N. Return true if successful, false
@@ -3839,7 +3840,7 @@ LOADHIST_ATTACH (Lisp_Object x)
extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object *, Lisp_Object, bool);
enum { S2N_IGNORE_TRAILING = 1 };
-extern Lisp_Object string_to_number (char const *, int, int);
+extern Lisp_Object string_to_number (char const *, int, ptrdiff_t *);
extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
Lisp_Object);
extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index 73e38d89954..62616cb6819 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2354,12 +2354,14 @@ character_name_to_code (char const *name, ptrdiff_t name_len)
{
/* For "U+XXXX", pass the leading '+' to string_to_number to reject
monstrosities like "U+-0000". */
+ ptrdiff_t len = name_len - 1;
Lisp_Object code
= (name[0] == 'U' && name[1] == '+'
- ? string_to_number (name + 1, 16, 0)
+ ? string_to_number (name + 1, 16, &len)
: call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR)
+ || len != name_len - 1
|| char_surrogate_p (XFIXNUM (code)))
{
AUTO_STRING (format, "\\N{%s}");
@@ -3531,12 +3533,14 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
|| strchr ("\"';()[]#`,", c) == NULL));
*p = 0;
+ ptrdiff_t nbytes = p - read_buffer;
UNREAD (c);
if (!quoted && !uninterned_symbol)
{
- Lisp_Object result = string_to_number (read_buffer, 10, 0);
- if (! NILP (result))
+ ptrdiff_t len;
+ Lisp_Object result = string_to_number (read_buffer, 10, &len);
+ if (! NILP (result) && len == nbytes)
return unbind_to (count, result);
}
if (!quoted && multibyte)
@@ -3548,7 +3552,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
}
{
Lisp_Object result;
- ptrdiff_t nbytes = p - read_buffer;
ptrdiff_t nchars
= (multibyte
? multibyte_chars_in_text ((unsigned char *) read_buffer,
@@ -3700,18 +3703,18 @@ substitute_in_interval (INTERVAL interval, void *arg)
}
-/* Convert STRING to a number, assuming base BASE. When STRING has
- floating point syntax and BASE is 10, return a nearest float. When
- STRING has integer syntax, return a fixnum if the integer fits, or
- else a bignum. Otherwise, return nil. If FLAGS &
- S2N_IGNORE_TRAILING is nonzero, consider just the longest prefix of
- STRING that has valid syntax. */
+/* Convert the initial prefix of STRING to a number, assuming base BASE.
+ If the prefix has floating point syntax and BASE is 10, return a
+ nearest float; otherwise, if the prefix has integer syntax, return
+ the integer; otherwise, return nil. If PLEN, set *PLEN to the
+ length of the numeric prefix if there is one, otherwise *PLEN is
+ unspecified. */
Lisp_Object
-string_to_number (char const *string, int base, int flags)
+string_to_number (char const *string, int base, ptrdiff_t *plen)
{
char const *cp = string;
- bool float_syntax = 0;
+ bool float_syntax = false;
double value = 0;
/* Negate the value ourselves. This treats 0, NaNs, and infinity properly on
@@ -3797,49 +3800,46 @@ string_to_number (char const *string, int base, int flags)
|| (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
}
- /* Return nil if the number uses invalid syntax. If FLAGS &
- S2N_IGNORE_TRAILING, accept any prefix that matches. Otherwise,
- the entire string must match. */
- if (! (flags & S2N_IGNORE_TRAILING
- ? ((state & LEAD_INT) != 0 || float_syntax)
- : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
- || float_syntax))))
- return Qnil;
+ if (plen)
+ *plen = cp - string;
- /* If the number uses integer and not float syntax, and is in C-language
- range, use its value, preferably as a fixnum. */
- if (leading_digit >= 0 && ! float_syntax)
+ /* Return a float if the number uses float syntax. */
+ if (float_syntax)
{
- if ((state & INTOVERFLOW) == 0
- && n <= (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM))
- {
- EMACS_INT signed_n = n;
- return make_fixnum (negative ? -signed_n : signed_n);
- }
-
- /* Trim any leading "+" and trailing nondigits, then convert to
- bignum. */
- string += positive;
- if (!*after_digits)
- return make_bignum_str (string, base);
- ptrdiff_t trimmed_len = after_digits - string;
- USE_SAFE_ALLOCA;
- char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
- memcpy (trimmed, string, trimmed_len);
- trimmed[trimmed_len] = '\0';
- Lisp_Object result = make_bignum_str (trimmed, base);
- SAFE_FREE ();
- return result;
+ /* Convert to floating point, unless the value is already known
+ because it is infinite or a NaN. */
+ if (! value)
+ value = atof (string + signedp);
+ return make_float (negative ? -value : value);
}
- /* Either the number uses float syntax, or it does not fit into a fixnum.
- Convert it from string to floating point, unless the value is already
- known because it is an infinity, a NAN, or its absolute value fits in
- uintmax_t. */
- if (! value)
- value = atof (string + signedp);
+ /* Return nil if the number uses invalid syntax. */
+ if (! (state & LEAD_INT))
+ return Qnil;
+
+ /* Fast path if the integer (san sign) fits in uintmax_t. */
+ if (! (state & INTOVERFLOW))
+ {
+ if (!negative)
+ return make_uint (n);
+ if (-MOST_NEGATIVE_FIXNUM < n)
+ return make_neg_biguint (n);
+ EMACS_INT signed_n = n;
+ return make_fixnum (-signed_n);
+ }
- return make_float (negative ? -value : value);
+ /* Trim any leading "+" and trailing nondigits, then return a bignum. */
+ string += positive;
+ if (!*after_digits)
+ return make_bignum_str (string, base);
+ ptrdiff_t trimmed_len = after_digits - string;
+ USE_SAFE_ALLOCA;
+ char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
+ memcpy (trimmed, string, trimmed_len);
+ trimmed[trimmed_len] = '\0';
+ Lisp_Object result = make_bignum_str (trimmed, base);
+ SAFE_FREE ();
+ return result;
}
diff --git a/src/print.c b/src/print.c
index c0c90bc7e9a..d15ff97b00c 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
case Lisp_Symbol:
{
- bool confusing;
- unsigned char *p = SDATA (SYMBOL_NAME (obj));
- unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));
- int c;
- ptrdiff_t i, i_byte;
- ptrdiff_t size_byte;
- Lisp_Object name;
-
- name = SYMBOL_NAME (obj);
-
- if (p != end && (*p == '-' || *p == '+')) p++;
- if (p == end)
- confusing = 0;
- /* If symbol name begins with a digit, and ends with a digit,
- and contains nothing but digits and `e', it could be treated
- as a number. So set CONFUSING.
-
- Symbols that contain periods could also be taken as numbers,
- but periods are always escaped, so we don't have to worry
- about them here. */
- else if (*p >= '0' && *p <= '9'
- && end[-1] >= '0' && end[-1] <= '9')
- {
- while (p != end && ((*p >= '0' && *p <= '9')
- /* Needed for \2e10. */
- || *p == 'e' || *p == 'E'))
- p++;
- confusing = (end == p);
- }
- else
- confusing = 0;
-
- size_byte = SBYTES (name);
+ Lisp_Object name = SYMBOL_NAME (obj);
+ ptrdiff_t size_byte = SBYTES (name);
+
+ /* Set CONFUSING if NAME looks like a number, calling
+ string_to_number for non-obvious cases. */
+ char *p = SSDATA (name);
+ bool signedp = *p == '-' || *p == '+';
+ ptrdiff_t len;
+ bool confusing = ((c_isdigit (p[signedp]) || p[signedp] == '.')
+ && !NILP (string_to_number (p, 10, &len))
+ && len == size_byte);
if (! NILP (Vprint_gensym)
&& !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
break;
}
- for (i = 0, i_byte = 0; i_byte < size_byte;)
+ ptrdiff_t i = 0;
+ for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
{
/* Here, we must convert each multi-byte form to the
corresponding character code before handing it to PRINTCHAR. */
+ int c;
FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
maybe_quit ();
@@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
|| c == ';' || c == '#' || c == '(' || c == ')'
|| c == ',' || c == '.' || c == '`'
|| c == '[' || c == ']' || c == '?' || c <= 040
+ || c == NO_BREAK_SPACE
|| confusing
|| (i == 1 && confusable_symbol_character_p (c)))
{
diff --git a/src/process.c b/src/process.c
index a9638dfc2df..6cda4f27acc 100644
--- a/src/process.c
+++ b/src/process.c
@@ -6852,7 +6852,12 @@ SIGCODE may be an integer, or a symbol whose name is a signal name. */)
{
Lisp_Object tem = Fget_process (process);
if (NILP (tem))
- tem = string_to_number (SSDATA (process), 10, 0);
+ {
+ ptrdiff_t len;
+ tem = string_to_number (SSDATA (process), 10, &len);
+ if (NILP (tem) || len != SBYTES (process))
+ return Qnil;
+ }
process = tem;
}
else if (!NUMBERP (process))
diff --git a/test/src/print-tests.el b/test/src/print-tests.el
index 091f1aa1afb..78e769f50e9 100644
--- a/test/src/print-tests.el
+++ b/test/src/print-tests.el
@@ -95,8 +95,20 @@ otherwise, use a different charset."
"--------\n"))))
(ert-deftest print-read-roundtrip ()
- (let ((sym '\’bar))
- (should (eq (read (prin1-to-string sym)) sym))))
+ (let ((syms (list '## '& '* '+ '- '/ '0E '0e '< '= '> 'E 'E0 'NaN '\"
+ '\# '\#x0 '\' '\'\' '\( '\) '\+00 '\, '\-0 '\. '\.0
+ '\0 '\0.0 '\0E0 '\0e0 '\1E+ '\1E+NaN '\1e+ '\1e+NaN
+ '\; '\? '\[ '\\ '\] '\` '_ 'a 'e 'e0 'x
+ '{ '| '} '~ : '\’ '\’bar
+ (intern "\t") (intern "\n") (intern " ")
+ (intern "\N{NO-BREAK SPACE}")
+ (intern "\N{ZERO WIDTH SPACE}")
+ (intern "\0"))))
+ (dolist (sym syms)
+ (should (eq (read (prin1-to-string sym)) sym))
+ (dolist (sym1 syms)
+ (let ((sym2 (intern (concat (symbol-name sym) (symbol-name sym1)))))
+ (should (eq (read (prin1-to-string sym2)) sym2)))))))
(ert-deftest print-bignum ()
(let* ((str "999999999999999999999999999999999")