From 198ae2b90eb2e7b41965223967bbb8971da950d5 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Jun 2011 23:17:35 -0700 Subject: * lisp.h: Include , as it'll useful in later changes. * character.c, data.c, editfns.c, insdel.c, intervals.c: Don't include , since lisp.h does. --- src/ChangeLog | 4 ++++ src/character.c | 2 +- src/data.c | 2 -- src/editfns.c | 1 - src/insdel.c | 2 -- src/intervals.c | 2 +- src/lisp.h | 2 ++ 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 1c97b0ac9f5..708e08135be 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,9 @@ 2011-06-02 Paul Eggert + * lisp.h: Include , as it'll useful in later changes. + * character.c, data.c, editfns.c, insdel.c, intervals.c: + Don't include , since lisp.h does. + Don't assume time_t can fit into int. * buffer.h (struct buffer.modtime): Now time_t, not int. * fileio.c (Fvisited_file_modtime): No need for time_t cast now. diff --git a/src/character.c b/src/character.c index 34e69da9cc5..4aa1b75cd8c 100644 --- a/src/character.c +++ b/src/character.c @@ -35,7 +35,7 @@ along with GNU Emacs. If not, see . */ #include #include -#include + #include "lisp.h" #include "character.h" #include "buffer.h" diff --git a/src/data.c b/src/data.c index 78bd454056d..522f0156ebd 100644 --- a/src/data.c +++ b/src/data.c @@ -23,8 +23,6 @@ along with GNU Emacs. If not, see . */ #include #include -#include - #include "lisp.h" #include "puresize.h" #include "character.h" diff --git a/src/editfns.c b/src/editfns.c index 8b48355fbfa..bfdc891140d 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -47,7 +47,6 @@ along with GNU Emacs. If not, see . */ #include #include #include -#include #include #include diff --git a/src/insdel.c b/src/insdel.c index c0cccc65d6a..d695278284b 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -21,8 +21,6 @@ along with GNU Emacs. If not, see . */ #include #include -#include - #include "lisp.h" #include "intervals.h" #include "buffer.h" diff --git a/src/intervals.c b/src/intervals.c index f9e9c864e13..2cebc9af585 100644 --- a/src/intervals.c +++ b/src/intervals.c @@ -39,7 +39,7 @@ along with GNU Emacs. If not, see . */ #include #include -#include + #include "lisp.h" #include "intervals.h" #include "buffer.h" diff --git a/src/lisp.h b/src/lisp.h index 8a504e8eb86..6ef2c4211f2 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -24,6 +24,8 @@ along with GNU Emacs. If not, see . */ #include #include +#include + /* Use the configure flag --enable-checking[=LIST] to enable various types of run time checks for Lisp objects. */ -- cgit v1.2.1 From 76118f10b098e0cf03050b277401b8646dce5706 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Jun 2011 23:21:13 -0700 Subject: * dired.c (Ffile_attributes): Don't assume EMACS_INT has >32 bits. --- src/ChangeLog | 2 ++ src/dired.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 708e08135be..44c9db28d58 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,7 @@ 2011-06-02 Paul Eggert + * dired.c (Ffile_attributes): Don't assume EMACS_INT has >32 bits. + * lisp.h: Include , as it'll useful in later changes. * character.c, data.c, editfns.c, insdel.c, intervals.c: Don't include , since lisp.h does. diff --git a/src/dired.c b/src/dired.c index 60d7bc64974..1e587353f6d 100644 --- a/src/dired.c +++ b/src/dired.c @@ -1013,12 +1013,11 @@ so last access time will always be midnight of that day. */) The code on the next line avoids a compiler warning on systems where st_ino is 32 bit wide. (bug#766). */ EMACS_INT high_ino = s.st_ino >> 31 >> 1; - EMACS_INT low_ino = s.st_ino & 0xffffffff; values[10] = Fcons (make_number (high_ino >> 8), Fcons (make_number (((high_ino & 0xff) << 16) - + (low_ino >> 16)), - make_number (low_ino & 0xffff))); + + (s.st_ino >> 16 & 0xffff)), + make_number (s.st_ino & 0xffff))); } /* Likewise for device. */ -- cgit v1.2.1 From 6f9028d2dc58431d00319b4918af325c507b854c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Jun 2011 23:23:20 -0700 Subject: * fileio.c (Fverify_visited_file_modtime): Avoid time overflow if b->modtime has its maximal value. --- src/ChangeLog | 3 +++ src/fileio.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ChangeLog b/src/ChangeLog index 44c9db28d58..0d84500e78d 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,8 @@ 2011-06-02 Paul Eggert + * fileio.c (Fverify_visited_file_modtime): Avoid time overflow + if b->modtime has its maximal value. + * dired.c (Ffile_attributes): Don't assume EMACS_INT has >32 bits. * lisp.h: Include , as it'll useful in later changes. diff --git a/src/fileio.c b/src/fileio.c index 94894b97a6e..48dac80a39f 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -4960,7 +4960,7 @@ See Info node `(elisp)Modification Time' for more details. */) if ((st.st_mtime == b->modtime /* If both are positive, accept them if they are off by one second. */ || (st.st_mtime > 0 && b->modtime > 0 - && (st.st_mtime == b->modtime + 1 + && (st.st_mtime - 1 == b->modtime || st.st_mtime == b->modtime - 1))) && (st.st_size == b->modtime_size || b->modtime_size < 0)) -- cgit v1.2.1 From 47de45b793c19b81914434b12476d3e6b5e9342e Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Jun 2011 23:29:14 -0700 Subject: * lisp.h (WIDE_EMACS_INT): Now defaults to 1. --- src/ChangeLog | 2 ++ src/lisp.h | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 0d84500e78d..3f5d9e19c1e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,7 @@ 2011-06-02 Paul Eggert + * lisp.h (WIDE_EMACS_INT): Now defaults to 1. + * fileio.c (Fverify_visited_file_modtime): Avoid time overflow if b->modtime has its maximal value. diff --git a/src/lisp.h b/src/lisp.h index 6ef2c4211f2..7f3f76a8e1e 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -36,13 +36,14 @@ extern void check_cons_list (void); #define CHECK_CONS_LIST() ((void) 0) #endif -/* Temporarily disable wider-than-pointer integers until they're tested more. - Build with CFLAGS='-DWIDE_EMACS_INT' to try them out. */ -/* #undef WIDE_EMACS_INT */ +/* To disable wider-than-pointer integers, build with -DWIDE_EMACS_INT=0. */ +#ifndef WIDE_EMACS_INT +#define WIDE_EMACS_INT 1 +#endif /* These are default choices for the types to use. */ #ifndef EMACS_INT -# if BITS_PER_LONG < BITS_PER_LONG_LONG && defined WIDE_EMACS_INT +# if BITS_PER_LONG < BITS_PER_LONG_LONG && WIDE_EMACS_INT # define EMACS_INT long long # define BITS_PER_EMACS_INT BITS_PER_LONG_LONG # define pI "ll" -- cgit v1.2.1 From 9ca05932715b387d963f7dd490a2b78a67ad200a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 11:22:12 -0700 Subject: * xselect.c: Use 'unsigned' more consistently. (selection_data_to_lisp_data, lisp_data_to_selection_data): Use 'unsigned' consistently when computing sizes of unsigned objects. --- src/ChangeLog | 4 ++++ src/xselect.c | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index ff36dd33102..2b81f66d8b6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,9 @@ 2011-06-03 Paul Eggert + * xselect.c: Use 'unsigned' more consistently. + (selection_data_to_lisp_data, lisp_data_to_selection_data): + Use 'unsigned' consistently when computing sizes of unsigned objects. + * fileio.c (Fverify_visited_file_modtime): Avoid time overflow if b->modtime has its maximal value. diff --git a/src/xselect.c b/src/xselect.c index 73ef4abc0a4..ca2b1812a61 100644 --- a/src/xselect.c +++ b/src/xselect.c @@ -1651,9 +1651,9 @@ selection_data_to_lisp_data (Display *display, const unsigned char *data, If the number is 32 bits and won't fit in a Lisp_Int, convert it to a cons of integers, 16 bits in each half. */ - else if (format == 32 && size == sizeof (int)) + else if (format == 32 && size == sizeof (unsigned int)) return long_to_cons (((unsigned int *) data) [0]); - else if (format == 16 && size == sizeof (short)) + else if (format == 16 && size == sizeof (unsigned short)) return make_number ((int) (((unsigned short *) data) [0])); /* Convert any other kind of data to a vector of numbers, represented @@ -1753,8 +1753,8 @@ lisp_data_to_selection_data (Display *display, Lisp_Object obj, { *format_ret = 32; *size_ret = 1; - *data_ret = (unsigned char *) xmalloc (sizeof (long) + 1); - (*data_ret) [sizeof (long)] = 0; + *data_ret = (unsigned char *) xmalloc (sizeof (unsigned long) + 1); + (*data_ret) [sizeof (unsigned long)] = 0; (*(unsigned long **) data_ret) [0] = cons_to_long (obj); if (NILP (type)) type = QINTEGER; } -- cgit v1.2.1 From 3c9d8b1ef814e03b5490c3ab3e28f2867ee9cf51 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 11:42:04 -0700 Subject: * NEWS: 62-bit integers are typical now. --- etc/ChangeLog | 4 ++++ etc/NEWS | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/etc/ChangeLog b/etc/ChangeLog index 4586628e296..43c0300fb1e 100644 --- a/etc/ChangeLog +++ b/etc/ChangeLog @@ -1,3 +1,7 @@ +2011-06-03 Paul Eggert + + * NEWS: 62-bit integers are typical now. + 2011-05-24 Leo Liu * NEWS: Mention the new primitive sha1 and the removal of sha1.el. diff --git a/etc/NEWS b/etc/NEWS index 25fd259eb9a..26581202284 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -902,6 +902,14 @@ of function value which looks like (closure ENV ARGS &rest BODY). *** New function `special-variable-p' to check whether a variable is declared as dynamically bound. +** Emacs integers have a wider range on typical 32-bit hosts. +Previously, they were limited to a 30-bit range (-2**29 .. 2**29-1). +Now, they are limited to a 62-bit range (-2**61 .. 2**61-1), the +same as on 64-bit hosts. This increased range comes from the Emacs +interpreter using 64-bit native integer types that are available +on typical modern 32-bit platforms. Older 32-bit hosts that lack +64-bit integers have the same 30-bit range as before. + ** pre/post-command-hook are not reset to nil upon error. Instead, the offending function is removed. -- cgit v1.2.1 From da908fa9a718745d426af0f69f88324b3023b7e4 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 11:42:59 -0700 Subject: Fix doc for machines with wider system times such as time_t. On such machines, it's now safe to assume that EMACS_INT is as wide as the system times, so that shifting right by 16 will result in an integer that always fits in EMACS_INT. * dired.c (Ffile_attributes): Document large inode number handling. * termhooks.h: Fix comment for large time stamp handling. --- src/ChangeLog | 7 +++++++ src/dired.c | 10 +++++----- src/termhooks.h | 4 ++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 490f5428e3a..857600fda0c 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,12 @@ 2011-06-03 Paul Eggert + Fix doc for machines with wider system times such as time_t. + On such machines, it's now safe to assume that EMACS_INT is as + wide as the system times, so that shifting right by 16 will + result in an integer that always fits in EMACS_INT. + * dired.c (Ffile_attributes): Document large inode number handling. + * termhooks.h: Fix comment for large time stamp handling. + * lisp.h (WIDE_EMACS_INT): Now defaults to 1. * xselect.c: Use 'unsigned' more consistently. diff --git a/src/dired.c b/src/dired.c index 1e587353f6d..0fe2ead56ef 100644 --- a/src/dired.c +++ b/src/dired.c @@ -901,10 +901,10 @@ Elements of the attribute list are: 8. File modes, as a string of ten letters or dashes as in ls -l. 9. t if file's gid would change if file were deleted and recreated. 10. inode number. If inode number is larger than what Emacs integer - can hold, but still fits into a 32-bit number, this is a cons cell + can hold, but all but the bottom 16 bits still fits, this is a cons cell containing two integers: first the high part, then the low 16 bits. - If the inode number is wider than 32 bits, this is of the form - (HIGH MIDDLE . LOW): first the high 24 bits, then middle 24 bits, + If the inode number is still wider, this is of the form + (HIGH MIDDLE . LOW): first the high bits, then the middle 24 bits, and finally the low 16 bits. 11. Filesystem device number. If it is larger than what the Emacs integer can hold, this is a cons cell, similar to the inode number. @@ -1008,8 +1008,8 @@ so last access time will always be midnight of that day. */) make_number ((EMACS_INT)(s.st_ino & 0xffff))); else { - /* To allow inode numbers beyond 32 bits, separate into 2 24-bit - high parts and a 16-bit bottom part. + /* To allow inode numbers beyond what INTEGER_TO_CONS can handle, + separate into 2 24-bit high parts and a 16-bit bottom part. The code on the next line avoids a compiler warning on systems where st_ino is 32 bit wide. (bug#766). */ EMACS_INT high_ino = s.st_ino >> 31 >> 1; diff --git a/src/termhooks.h b/src/termhooks.h index 6a58517a85a..81583e79d78 100644 --- a/src/termhooks.h +++ b/src/termhooks.h @@ -342,8 +342,8 @@ struct terminal SELECTION-VALUE is the value that emacs owns for that selection. It may be any kind of Lisp object. SELECTION-TIMESTAMP is the time at which emacs began owning this - selection, as a cons of two 16-bit numbers (making a 32 bit - time.) + selection, as an Emacs integer; or if that doesn't fit, as a + cons of two 16-bit integers (making a 32 bit time.) FRAME is the frame for which we made the selection. If there is an entry in this alist, then it can be assumed that Emacs owns that selection. -- cgit v1.2.1 From b047e7acb5d18ccabb7548e3e7d79eba711008bf Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 11:47:14 -0700 Subject: Document wide integers better. * buffers.texi (Buffers): * files.texi (Visiting): Default buffer maximum is now 2 EiB typically. --- doc/emacs/ChangeLog | 6 ++++++ doc/emacs/buffers.texi | 7 +++++-- doc/emacs/files.texi | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/emacs/ChangeLog b/doc/emacs/ChangeLog index 89a78263e94..3b61ca6ccba 100644 --- a/doc/emacs/ChangeLog +++ b/doc/emacs/ChangeLog @@ -1,3 +1,9 @@ +2011-06-02 Paul Eggert + + Document wide integers better. + * buffers.texi (Buffers): + * files.texi (Visiting): Default buffer maximum is now 2 EiB typically. + 2011-05-28 Chong Yidong * custom.texi (Hooks): Reorganize. Mention Prog mode. diff --git a/doc/emacs/buffers.texi b/doc/emacs/buffers.texi index ae0d85f249b..9463b02464d 100644 --- a/doc/emacs/buffers.texi +++ b/doc/emacs/buffers.texi @@ -43,8 +43,11 @@ can be different from the value in other buffers. @xref{Locals}. A buffer's size cannot be larger than some maximum, which is defined by the largest buffer position representable by the @dfn{Emacs integer} data type. This is because Emacs tracks buffer positions -using that data type. For 32-bit machines, the largest buffer size is -512 megabytes. +using that data type. For most machines, the maximum buffer size +enforced by the data types is @math{2^61 - 2} bytes, or about 2 EiB. +For some older machines, the maximum is @math{2^29 - 2} bytes, or +about 512 MiB. Buffer sizes are also limited by the size of Emacs's +virtual memory. @menu * Select Buffer:: Creating a new buffer or reselecting an old one. diff --git a/doc/emacs/files.texi b/doc/emacs/files.texi index 40bd065610c..793a11e62ed 100644 --- a/doc/emacs/files.texi +++ b/doc/emacs/files.texi @@ -209,7 +209,8 @@ to reread it. about 10 megabytes), Emacs asks you for confirmation first. You can answer @kbd{y} to proceed with visiting the file. Note, however, that Emacs cannot visit files that are larger than the maximum Emacs buffer -size, which is around 512 megabytes on 32-bit machines +size, which is limited by the amount of memory Emacs can allocate +and by the integers that Emacs can represent (@pxref{Buffers}). If you try, Emacs will display an error message saying that the maximum buffer size has been exceeded. -- cgit v1.2.1 From be14b9ab109c8deb5745dc47cbc471e97be06486 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 11:49:33 -0700 Subject: Document wide integers better. * files.texi (File Attributes): Document ino_t values better. * numbers.texi (Integer Basics, Integer Basics, Arithmetic Operations): (Bitwise Operations): * objects.texi (Integer Type): Integers are typically 62 bits now. * os.texi (Time Conversion): Document time_t values better. --- doc/lispref/ChangeLog | 9 +++ doc/lispref/files.texi | 11 ++-- doc/lispref/numbers.texi | 154 +++++++++++++++++++++++------------------------ doc/lispref/objects.texi | 25 ++++---- doc/lispref/os.texi | 8 +-- 5 files changed, 108 insertions(+), 99 deletions(-) diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index 83cee10f899..16d175c338b 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog @@ -1,3 +1,12 @@ +2011-06-03 Paul Eggert + + Document wide integers better. + * files.texi (File Attributes): Document ino_t values better. + * numbers.texi (Integer Basics, Integer Basics, Arithmetic Operations): + (Bitwise Operations): + * objects.texi (Integer Type): Integers are typically 62 bits now. + * os.texi (Time Conversion): Document time_t values better. + 2011-05-31 Lars Magne Ingebrigtsen * processes.texi (Process Information): Document diff --git a/doc/lispref/files.texi b/doc/lispref/files.texi index 72f39f681ae..ed282349573 100644 --- a/doc/lispref/files.texi +++ b/doc/lispref/files.texi @@ -1236,12 +1236,13 @@ deleted and recreated; @code{nil} otherwise. @item The file's inode number. If possible, this is an integer. If the -inode number is too large to be represented as an integer in Emacs -Lisp, but still fits into a 32-bit integer, then the value has the -form @code{(@var{high} . @var{low})}, where @var{low} holds the low 16 -bits. If the inode is wider than 32 bits, the value is of the form +inode number @math{N} is too large to be represented as an integer in +Emacs Lisp, but @math{N / 2^16} is representable, then the value has +the form @code{(@var{high} . @var{low})}, where @var{high} holds the +high bits (i.e., excluding the low-order bits) and @var{low} the low +16 bits. If the inode number is even larger, the value is of the form @code{(@var{high} @var{middle} . @var{low})}, where @code{high} holds -the high 24 bits, @var{middle} the next 24 bits, and @var{low} the low +the high bits, @var{middle} the next 24 bits, and @var{low} the low 16 bits. @item diff --git a/doc/lispref/numbers.texi b/doc/lispref/numbers.texi index 2c73a03a26c..ff057c22254 100644 --- a/doc/lispref/numbers.texi +++ b/doc/lispref/numbers.texi @@ -36,22 +36,24 @@ exact; they have a fixed, limited amount of precision. @section Integer Basics The range of values for an integer depends on the machine. The -minimum range is @minus{}536870912 to 536870911 (30 bits; i.e., +typical range is @minus{}2305843009213693952 to 2305843009213693951 +(62 bits; i.e., @ifnottex --2**29 +-2**61 @end ifnottex @tex -@math{-2^{29}} +@math{-2^{61}} @end tex to @ifnottex -2**29 - 1), +2**61 - 1) @end ifnottex @tex -@math{2^{29}-1}), +@math{2^{61}-1}) @end tex -but some machines may provide a wider range. Many examples in this -chapter assume an integer has 30 bits. +but some older machines provide only 30 bits. Many examples in this +chapter assume that an integer has 62 bits and that floating point +numbers are IEEE double precision. @cindex overflow The Lisp reader reads an integer as a sequence of digits with optional @@ -63,7 +65,8 @@ Emacs range is treated as a floating-point number. 1. ; @r{The integer 1.} +1 ; @r{Also the integer 1.} -1 ; @r{The integer @minus{}1.} - 1073741825 ; @r{The floating point number 1073741825.0.} + 4611686018427387904 + ; @r{The floating point number 4.611686018427388e+18.} 0 ; @r{The integer 0.} -0 ; @r{The integer 0.} @end example @@ -94,25 +97,21 @@ from 2 to 36. For example: bitwise operators (@pxref{Bitwise Operations}), it is often helpful to view the numbers in their binary form. - In 30-bit binary, the decimal integer 5 looks like this: + In 62-bit binary, the decimal integer 5 looks like this: @example -00 0000 0000 0000 0000 0000 0000 0101 +0000...000101 (62 bits total) @end example -@noindent -(We have inserted spaces between groups of 4 bits, and two spaces -between groups of 8 bits, to make the binary integer easier to read.) - The integer @minus{}1 looks like this: @example -11 1111 1111 1111 1111 1111 1111 1111 +1111...111111 (62 bits total) @end example @noindent @cindex two's complement -@minus{}1 is represented as 30 ones. (This is called @dfn{two's +@minus{}1 is represented as 62 ones. (This is called @dfn{two's complement} notation.) The negative integer, @minus{}5, is creating by subtracting 4 from @@ -120,24 +119,24 @@ complement} notation.) @minus{}5 looks like this: @example -11 1111 1111 1111 1111 1111 1111 1011 +1111...111011 (62 bits total) @end example - In this implementation, the largest 30-bit binary integer value is -536,870,911 in decimal. In binary, it looks like this: + In this implementation, the largest 62-bit binary integer value is +2,305,843,009,213,693,951 in decimal. In binary, it looks like this: @example -01 1111 1111 1111 1111 1111 1111 1111 +0111...111111 (62 bits total) @end example Since the arithmetic functions do not check whether integers go -outside their range, when you add 1 to 536,870,911, the value is the -negative integer @minus{}536,870,912: +outside their range, when you add 1 to 2,305,843,009,213,693,951, the value is the +negative integer @minus{}2,305,843,009,213,693,952: @example -(+ 1 536870911) - @result{} -536870912 - @result{} 10 0000 0000 0000 0000 0000 0000 0000 +(+ 1 2305843009213693951) + @result{} -2305843009213693952 + @result{} 1000...000000 (62 bits total) @end example Many of the functions described in this chapter accept markers for @@ -508,8 +507,8 @@ commonly used. if any argument is floating. It is important to note that in Emacs Lisp, arithmetic functions -do not check for overflow. Thus @code{(1+ 268435455)} may evaluate to -@minus{}268435456, depending on your hardware. +do not check for overflow. Thus @code{(1+ 2305843009213693951)} may +evaluate to @minus{}2305843009213693952, depending on your hardware. @defun 1+ number-or-marker This function returns @var{number-or-marker} plus 1. @@ -829,19 +828,19 @@ value of a positive integer by two, rounding downward. The function @code{lsh}, like all Emacs Lisp arithmetic functions, does not check for overflow, so shifting left can discard significant bits and change the sign of the number. For example, left shifting -536,870,911 produces @minus{}2 on a 30-bit machine: +2,305,843,009,213,693,951 produces @minus{}2 on a typical machine: @example -(lsh 536870911 1) ; @r{left shift} +(lsh 2305843009213693951 1) ; @r{left shift} @result{} -2 @end example -In binary, in the 30-bit implementation, the argument looks like this: +In binary, in the 62-bit implementation, the argument looks like this: @example @group -;; @r{Decimal 536,870,911} -01 1111 1111 1111 1111 1111 1111 1111 +;; @r{Decimal 2,305,843,009,213,693,951} +0111...111111 (62 bits total) @end group @end example @@ -851,7 +850,7 @@ which becomes the following when left shifted: @example @group ;; @r{Decimal @minus{}2} -11 1111 1111 1111 1111 1111 1111 1110 +1111...111110 (62 bits total) @end group @end example @end defun @@ -874,9 +873,9 @@ looks like this: @group (ash -6 -1) @result{} -3 ;; @r{Decimal @minus{}6 becomes decimal @minus{}3.} -11 1111 1111 1111 1111 1111 1111 1010 +1111...111010 (62 bits total) @result{} -11 1111 1111 1111 1111 1111 1111 1101 +1111...111101 (62 bits total) @end group @end example @@ -885,11 +884,11 @@ In contrast, shifting the pattern of bits one place to the right with @example @group -(lsh -6 -1) @result{} 536870909 -;; @r{Decimal @minus{}6 becomes decimal 536,870,909.} -11 1111 1111 1111 1111 1111 1111 1010 +(lsh -6 -1) @result{} 2305843009213693949 +;; @r{Decimal @minus{}6 becomes decimal 2,305,843,009,213,693,949.} +1111...111010 (62 bits total) @result{} -01 1111 1111 1111 1111 1111 1111 1101 +0111...111101 (62 bits total) @end group @end example @@ -899,34 +898,35 @@ Here are other examples: @c with smallbook but not with regular book! --rjc 16mar92 @smallexample @group - ; @r{ 30-bit binary values} + ; @r{ 62-bit binary values} -(lsh 5 2) ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - @result{} 20 ; = @r{00 0000 0000 0000 0000 0000 0001 0100} +(lsh 5 2) ; 5 = @r{0000...000101} + @result{} 20 ; = @r{0000...010100} @end group @group (ash 5 2) @result{} 20 -(lsh -5 2) ; -5 = @r{11 1111 1111 1111 1111 1111 1111 1011} - @result{} -20 ; = @r{11 1111 1111 1111 1111 1111 1110 1100} +(lsh -5 2) ; -5 = @r{1111...111011} + @result{} -20 ; = @r{1111...101100} (ash -5 2) @result{} -20 @end group @group -(lsh 5 -2) ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - @result{} 1 ; = @r{00 0000 0000 0000 0000 0000 0000 0001} +(lsh 5 -2) ; 5 = @r{0000...000101} + @result{} 1 ; = @r{0000...000001} @end group @group (ash 5 -2) @result{} 1 @end group @group -(lsh -5 -2) ; -5 = @r{11 1111 1111 1111 1111 1111 1111 1011} - @result{} 268435454 ; = @r{00 0111 1111 1111 1111 1111 1111 1110} +(lsh -5 -2) ; -5 = @r{1111...111011} + @result{} 1152921504606846974 + ; = @r{0011...111110} @end group @group -(ash -5 -2) ; -5 = @r{11 1111 1111 1111 1111 1111 1111 1011} - @result{} -2 ; = @r{11 1111 1111 1111 1111 1111 1111 1110} +(ash -5 -2) ; -5 = @r{1111...111011} + @result{} -2 ; = @r{1111...111110} @end group @end smallexample @end defun @@ -961,23 +961,23 @@ because its binary representation consists entirely of ones. If @smallexample @group - ; @r{ 30-bit binary values} + ; @r{ 62-bit binary values} -(logand 14 13) ; 14 = @r{00 0000 0000 0000 0000 0000 0000 1110} - ; 13 = @r{00 0000 0000 0000 0000 0000 0000 1101} - @result{} 12 ; 12 = @r{00 0000 0000 0000 0000 0000 0000 1100} +(logand 14 13) ; 14 = @r{0000...001110} + ; 13 = @r{0000...001101} + @result{} 12 ; 12 = @r{0000...001100} @end group @group -(logand 14 13 4) ; 14 = @r{00 0000 0000 0000 0000 0000 0000 1110} - ; 13 = @r{00 0000 0000 0000 0000 0000 0000 1101} - ; 4 = @r{00 0000 0000 0000 0000 0000 0000 0100} - @result{} 4 ; 4 = @r{00 0000 0000 0000 0000 0000 0000 0100} +(logand 14 13 4) ; 14 = @r{0000...001110} + ; 13 = @r{0000...001101} + ; 4 = @r{0000...000100} + @result{} 4 ; 4 = @r{0000...000100} @end group @group (logand) - @result{} -1 ; -1 = @r{11 1111 1111 1111 1111 1111 1111 1111} + @result{} -1 ; -1 = @r{1111...111111} @end group @end smallexample @end defun @@ -991,18 +991,18 @@ passed just one argument, it returns that argument. @smallexample @group - ; @r{ 30-bit binary values} + ; @r{ 62-bit binary values} -(logior 12 5) ; 12 = @r{00 0000 0000 0000 0000 0000 0000 1100} - ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - @result{} 13 ; 13 = @r{00 0000 0000 0000 0000 0000 0000 1101} +(logior 12 5) ; 12 = @r{0000...001100} + ; 5 = @r{0000...000101} + @result{} 13 ; 13 = @r{0000...001101} @end group @group -(logior 12 5 7) ; 12 = @r{00 0000 0000 0000 0000 0000 0000 1100} - ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - ; 7 = @r{00 0000 0000 0000 0000 0000 0000 0111} - @result{} 15 ; 15 = @r{00 0000 0000 0000 0000 0000 0000 1111} +(logior 12 5 7) ; 12 = @r{0000...001100} + ; 5 = @r{0000...000101} + ; 7 = @r{0000...000111} + @result{} 15 ; 15 = @r{0000...001111} @end group @end smallexample @end defun @@ -1016,18 +1016,18 @@ result is 0, which is an identity element for this operation. If @smallexample @group - ; @r{ 30-bit binary values} + ; @r{ 62-bit binary values} -(logxor 12 5) ; 12 = @r{00 0000 0000 0000 0000 0000 0000 1100} - ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - @result{} 9 ; 9 = @r{00 0000 0000 0000 0000 0000 0000 1001} +(logxor 12 5) ; 12 = @r{0000...001100} + ; 5 = @r{0000...000101} + @result{} 9 ; 9 = @r{0000...001001} @end group @group -(logxor 12 5 7) ; 12 = @r{00 0000 0000 0000 0000 0000 0000 1100} - ; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} - ; 7 = @r{00 0000 0000 0000 0000 0000 0000 0111} - @result{} 14 ; 14 = @r{00 0000 0000 0000 0000 0000 0000 1110} +(logxor 12 5 7) ; 12 = @r{0000...001100} + ; 5 = @r{0000...000101} + ; 7 = @r{0000...000111} + @result{} 14 ; 14 = @r{0000...001110} @end group @end smallexample @end defun @@ -1040,9 +1040,9 @@ bit is one in the result if, and only if, the @var{n}th bit is zero in @example (lognot 5) @result{} -6 -;; 5 = @r{00 0000 0000 0000 0000 0000 0000 0101} +;; 5 = @r{0000...000101} (62 bits total) ;; @r{becomes} -;; -6 = @r{11 1111 1111 1111 1111 1111 1111 1010} +;; -6 = @r{1111...111010} (62 bits total) @end example @end defun diff --git a/doc/lispref/objects.texi b/doc/lispref/objects.texi index c58d54f13fc..d5aa51098e7 100644 --- a/doc/lispref/objects.texi +++ b/doc/lispref/objects.texi @@ -164,25 +164,25 @@ latter are unique to Emacs Lisp. @node Integer Type @subsection Integer Type - The range of values for integers in Emacs Lisp is @minus{}536870912 to -536870911 (30 bits; i.e., + The range of values for integers in Emacs Lisp is +@minus{}2305843009213693952 to 2305843009213693951 (62 bits; i.e., @ifnottex --2**29 +-2**61 @end ifnottex @tex -@math{-2^{29}} +@math{-2^{61}} @end tex to @ifnottex -2**29 - 1) +2**61 - 1) @end ifnottex @tex -@math{2^{29}-1}) +@math{2^{61}-1}) @end tex -on most machines. (Some machines may provide a wider range.) It is -important to note that the Emacs Lisp arithmetic functions do not check -for overflow. Thus @code{(1+ 536870911)} is @minus{}536870912 on most -machines. +on most machines. Some machines may provide a narrower or wider +range; all machines provide at least 30 bits. Emacs Lisp arithmetic +functions do not check for overflow. Thus @code{(1+ +2305843009213693951)} is @minus{}2305843009213693952 on most machines. The read syntax for integers is a sequence of (base ten) digits with an optional sign at the beginning and an optional period at the end. The @@ -195,7 +195,6 @@ leading @samp{+} or a final @samp{.}. 1 ; @r{The integer 1.} 1. ; @r{Also the integer 1.} +1 ; @r{Also the integer 1.} -1073741825 ; @r{Also the integer 1 on a 30-bit implementation.} @end group @end example @@ -203,8 +202,8 @@ leading @samp{+} or a final @samp{.}. As a special exception, if a sequence of digits specifies an integer too large or too small to be a valid integer object, the Lisp reader reads it as a floating-point number (@pxref{Floating Point Type}). -For instance, on most machines @code{536870912} is read as the -floating-point number @code{536870912.0}. +For instance, on most machines @code{2305843009213693952} is read as the +floating-point number @code{2.305843009213694e+18}. @xref{Numbers}, for more information. diff --git a/doc/lispref/os.texi b/doc/lispref/os.texi index b226d676462..5f422065c5b 100644 --- a/doc/lispref/os.texi +++ b/doc/lispref/os.texi @@ -1193,11 +1193,11 @@ to calendrical information and vice versa. You can get time values from the functions @code{current-time} (@pxref{Time of Day}) and @code{file-attributes} (@pxref{Definition of file-attributes}). - Many operating systems are limited to time values that contain 32 bits + Many 32-bit operating systems are limited to time values that contain 32 bits of information; these systems typically handle only the times from -1901-12-13 20:45:52 UTC through 2038-01-19 03:14:07 UTC. However, some -operating systems have larger time values, and can represent times far -in the past or future. +1901-12-13 20:45:52 UTC through 2038-01-19 03:14:07 UTC. However, 64-bit +and some 32-bit operating systems have larger time values, and can +represent times far in the past or future. Time conversion functions always use the Gregorian calendar, even for dates before the Gregorian calendar was introduced. Year numbers -- cgit v1.2.1 From 201f31ae3de0b747b47863b93d6f6a747c36c960 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 12:02:25 -0700 Subject: Check for overflow when converting integer to cons and back. * charset.c (Fdefine_charset_internal, Fdecode_char): Use cons_to_unsigned to catch overflow. (Fencode_char): Use INTEGER_TO_CONS. * composite.h (LGLYPH_CODE): Use cons_to_unsigned. (LGLYPH_SET_CODE): Use INTEGER_TO_CONS. * data.c (long_to_cons, cons_to_long): Remove. (cons_to_unsigned, cons_to_signed): New functions. These signal an error for invalid or out-of-range values. * dired.c (Ffile_attributes): Use INTEGER_TO_CONS. * fileio.c (Fset_visited_file_modtime): Use CONS_TO_INTEGER. * font.c (Ffont_variation_glyphs): * fontset.c (Finternal_char_font): Use INTEGER_TO_CONS. * lisp.h (INTEGER_TO_CONS, CONS_TO_INTEGER): New macros. (cons_to_signed, cons_to_unsigned): New decls. (long_to_cons, cons_to_long): Remove decls. * undo.c (record_first_change): Use INTEGER_TO_CONS. (Fprimitive_undo): Use CONS_TO_INTEGER. * xfns.c (Fx_window_property): Likewise. * xselect.c (x_own_selection, selection_data_to_lisp_data): Use INTEGER_TO_CONS. (x_handle_selection_request, x_handle_selection_clear) (x_get_foreign_selection, Fx_disown_selection_internal) (Fx_get_atom_name, x_send_client_event): Use CONS_TO_INTEGER. (lisp_data_to_selection_data): Use cons_to_unsigned. (x_fill_property_data): Use cons_to_signed. Report values out of range. --- src/ChangeLog | 27 +++++++++++++++ src/charset.c | 38 +++------------------ src/composite.h | 16 ++------- src/data.c | 100 +++++++++++++++++++++++++++++++++++++++++++------------- src/dired.c | 39 +++------------------- src/fileio.c | 2 +- src/font.c | 10 +----- src/fontset.c | 8 +---- src/lisp.h | 28 ++++++++++++++-- src/undo.c | 15 +++------ src/xfns.c | 15 ++------- src/xselect.c | 87 ++++++++++++++++++++---------------------------- 12 files changed, 189 insertions(+), 196 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 857600fda0c..20308d40ab0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,32 @@ 2011-06-03 Paul Eggert + Check for overflow when converting integer to cons and back. + * charset.c (Fdefine_charset_internal, Fdecode_char): + Use cons_to_unsigned to catch overflow. + (Fencode_char): Use INTEGER_TO_CONS. + * composite.h (LGLYPH_CODE): Use cons_to_unsigned. + (LGLYPH_SET_CODE): Use INTEGER_TO_CONS. + * data.c (long_to_cons, cons_to_long): Remove. + (cons_to_unsigned, cons_to_signed): New functions. + These signal an error for invalid or out-of-range values. + * dired.c (Ffile_attributes): Use INTEGER_TO_CONS. + * fileio.c (Fset_visited_file_modtime): Use CONS_TO_INTEGER. + * font.c (Ffont_variation_glyphs): + * fontset.c (Finternal_char_font): Use INTEGER_TO_CONS. + * lisp.h (INTEGER_TO_CONS, CONS_TO_INTEGER): New macros. + (cons_to_signed, cons_to_unsigned): New decls. + (long_to_cons, cons_to_long): Remove decls. + * undo.c (record_first_change): Use INTEGER_TO_CONS. + (Fprimitive_undo): Use CONS_TO_INTEGER. + * xfns.c (Fx_window_property): Likewise. + * xselect.c (x_own_selection, selection_data_to_lisp_data): + Use INTEGER_TO_CONS. + (x_handle_selection_request, x_handle_selection_clear) + (x_get_foreign_selection, Fx_disown_selection_internal) + (Fx_get_atom_name, x_send_client_event): Use CONS_TO_INTEGER. + (lisp_data_to_selection_data): Use cons_to_unsigned. + (x_fill_property_data): Use cons_to_signed. Report values out of range. + Fix doc for machines with wider system times such as time_t. On such machines, it's now safe to assume that EMACS_INT is as wide as the system times, so that shifting right by 16 will diff --git a/src/charset.c b/src/charset.c index bfebe02f52e..770e98c99e1 100644 --- a/src/charset.c +++ b/src/charset.c @@ -932,17 +932,8 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_min_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) args_out_of_range_3 (make_number (charset.min_code), @@ -954,17 +945,8 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_max_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) args_out_of_range_3 (make_number (charset.min_code), @@ -1865,17 +1847,7 @@ and CODE-POINT to a character. Currently not supported and just ignored. */) struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - if (CONSP (code_point)) - { - CHECK_NATNUM_CAR (code_point); - CHECK_NATNUM_CDR (code_point); - code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point))); - } - else - { - CHECK_NATNUM (code_point); - code = XINT (code_point); - } + code = cons_to_unsigned (code_point, UINT_MAX); charsetp = CHARSET_FROM_ID (id); c = DECODE_CHAR (charsetp, code); return (c >= 0 ? make_number (c) : Qnil); @@ -1900,9 +1872,7 @@ code-point in CCS. Currently not supported and just ignored. */) code = ENCODE_CHAR (charsetp, XINT (ch)); if (code == CHARSET_INVALID_CODE (charsetp)) return Qnil; - if (code > 0x7FFFFFF) - return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); - return make_number (code); + return INTEGER_TO_CONS (code); } diff --git a/src/composite.h b/src/composite.h index cc8ca10a139..0f81911f0b0 100644 --- a/src/composite.h +++ b/src/composite.h @@ -265,10 +265,7 @@ enum lglyph_indices #define LGLYPH_CODE(g) \ (NILP (AREF ((g), LGLYPH_IX_CODE)) \ ? FONT_INVALID_CODE \ - : CONSP (AREF ((g), LGLYPH_IX_CODE)) \ - ? ((XFASTINT (XCAR (AREF ((g), LGLYPH_IX_CODE))) << 16) \ - | (XFASTINT (XCDR (AREF ((g), LGLYPH_IX_CODE))))) \ - : XFASTINT (AREF ((g), LGLYPH_IX_CODE))) + : cons_to_unsigned (AREF (g, LGLYPH_IX_CODE), TYPE_MAXIMUM (unsigned))) #define LGLYPH_WIDTH(g) XINT (AREF ((g), LGLYPH_IX_WIDTH)) #define LGLYPH_LBEARING(g) XINT (AREF ((g), LGLYPH_IX_LBEARING)) #define LGLYPH_RBEARING(g) XINT (AREF ((g), LGLYPH_IX_RBEARING)) @@ -280,15 +277,8 @@ enum lglyph_indices #define LGLYPH_SET_CHAR(g, val) ASET ((g), LGLYPH_IX_CHAR, make_number (val)) /* Callers must assure that VAL is not negative! */ #define LGLYPH_SET_CODE(g, val) \ - do { \ - if (val == FONT_INVALID_CODE) \ - ASET ((g), LGLYPH_IX_CODE, Qnil); \ - else if ((EMACS_INT)val > MOST_POSITIVE_FIXNUM) \ - ASET ((g), LGLYPH_IX_CODE, Fcons (make_number ((val) >> 16), \ - make_number ((val) & 0xFFFF))); \ - else \ - ASET ((g), LGLYPH_IX_CODE, make_number (val)); \ - } while (0) + ASET (g, LGLYPH_IX_CODE, \ + val == FONT_INVALID_CODE ? Qnil : INTEGER_TO_CONS (val)) #define LGLYPH_SET_WIDTH(g, val) ASET ((g), LGLYPH_IX_WIDTH, make_number (val)) #define LGLYPH_SET_LBEARING(g, val) ASET ((g), LGLYPH_IX_LBEARING, make_number (val)) diff --git a/src/data.c b/src/data.c index 522f0156ebd..408234f25cb 100644 --- a/src/data.c +++ b/src/data.c @@ -2324,33 +2324,89 @@ DEFUN ("zerop", Fzerop, Szerop, 1, 1, 0, return Qnil; } -/* Convert between long values and pairs of Lisp integers. - Note that long_to_cons returns a single Lisp integer - when the value fits in one. */ +/* Convert the cons-of-integers, integer, or float value C to an + unsigned value with maximum value MAX. Signal an error if C does not + have a valid format or is out of range. */ +uintmax_t +cons_to_unsigned (Lisp_Object c, uintmax_t max) +{ + int valid = 0; + uintmax_t val IF_LINT (= 0); + if (INTEGERP (c)) + { + valid = 0 <= XINT (c); + val = XINT (c); + } + else if (FLOATP (c)) + { + double d = XFLOAT_DATA (c); + if (0 <= d + && d < (max == UINTMAX_MAX ? (double) UINTMAX_MAX + 1 : max + 1)) + { + val = d; + valid = 1; + } + } + else if (CONSP (c)) + { + Lisp_Object top = XCAR (c); + Lisp_Object bot = XCDR (c); + if (CONSP (bot)) + bot = XCAR (bot); + if (NATNUMP (top) && XFASTINT (top) <= UINTMAX_MAX >> 16 && NATNUMP (bot)) + { + uintmax_t utop = XFASTINT (top); + val = (utop << 16) | XFASTINT (bot); + valid = 1; + } + } -Lisp_Object -long_to_cons (long unsigned int i) -{ - unsigned long top = i >> 16; - unsigned int bot = i & 0xFFFF; - if (top == 0) - return make_number (bot); - if (top == (unsigned long)-1 >> 16) - return Fcons (make_number (-1), make_number (bot)); - return Fcons (make_number (top), make_number (bot)); + if (! (valid && val <= max)) + error ("Not an in-range integer, float, or cons of integers"); + return val; } -unsigned long -cons_to_long (Lisp_Object c) +/* Convert the cons-of-integers, integer, or float value C to a signed + value with extrema MIN and MAX. Signal an error if C does not have + a valid format or is out of range. */ +intmax_t +cons_to_signed (Lisp_Object c, intmax_t min, intmax_t max) { - Lisp_Object top, bot; + int valid = 0; + intmax_t val IF_LINT (= 0); if (INTEGERP (c)) - return XINT (c); - top = XCAR (c); - bot = XCDR (c); - if (CONSP (bot)) - bot = XCAR (bot); - return ((XINT (top) << 16) | XINT (bot)); + { + val = XINT (c); + valid = 1; + } + else if (FLOATP (c)) + { + double d = XFLOAT_DATA (c); + if (min <= d + && d < (max == INTMAX_MAX ? (double) INTMAX_MAX + 1 : max + 1)) + { + val = d; + valid = 1; + } + } + else if (CONSP (c)) + { + Lisp_Object top = XCAR (c); + Lisp_Object bot = XCDR (c); + if (CONSP (bot)) + bot = XCAR (bot); + if (INTEGERP (top) && INTMAX_MIN >> 16 <= XINT (top) + && XINT (top) <= INTMAX_MAX >> 16 && INTEGERP (bot)) + { + intmax_t itop = XINT (top); + val = (itop << 16) | XINT (bot); + valid = 1; + } + } + + if (! (valid && min <= val && val <= max)) + error ("Not an in-range integer, float, or cons of integers"); + return val; } DEFUN ("number-to-string", Fnumber_to_string, Snumber_to_string, 1, 1, 0, diff --git a/src/dired.c b/src/dired.c index 0fe2ead56ef..3bf4fd9a023 100644 --- a/src/dired.c +++ b/src/dired.c @@ -900,12 +900,9 @@ Elements of the attribute list are: This is a floating point number if the size is too large for an integer. 8. File modes, as a string of ten letters or dashes as in ls -l. 9. t if file's gid would change if file were deleted and recreated. -10. inode number. If inode number is larger than what Emacs integer - can hold, but all but the bottom 16 bits still fits, this is a cons cell - containing two integers: first the high part, then the low 16 bits. - If the inode number is still wider, this is of the form - (HIGH MIDDLE . LOW): first the high bits, then the middle 24 bits, - and finally the low 16 bits. +10. inode number. If it is larger than what the Emacs integer + can hold, this is a cons cell containing two integers: first the + high part, then the low 16 bits. 11. Filesystem device number. If it is larger than what the Emacs integer can hold, this is a cons cell, similar to the inode number. @@ -998,34 +995,8 @@ so last access time will always be midnight of that day. */) #else /* file gid will be egid */ values[9] = (s.st_gid != getegid ()) ? Qt : Qnil; #endif /* not BSD4_2 */ - if (!FIXNUM_OVERFLOW_P (s.st_ino)) - /* Keep the most common cases as integers. */ - values[10] = make_number (s.st_ino); - else if (!FIXNUM_OVERFLOW_P (s.st_ino >> 16)) - /* To allow inode numbers larger than VALBITS, separate the bottom - 16 bits. */ - values[10] = Fcons (make_number ((EMACS_INT)(s.st_ino >> 16)), - make_number ((EMACS_INT)(s.st_ino & 0xffff))); - else - { - /* To allow inode numbers beyond what INTEGER_TO_CONS can handle, - separate into 2 24-bit high parts and a 16-bit bottom part. - The code on the next line avoids a compiler warning on - systems where st_ino is 32 bit wide. (bug#766). */ - EMACS_INT high_ino = s.st_ino >> 31 >> 1; - - values[10] = Fcons (make_number (high_ino >> 8), - Fcons (make_number (((high_ino & 0xff) << 16) - + (s.st_ino >> 16 & 0xffff)), - make_number (s.st_ino & 0xffff))); - } - - /* Likewise for device. */ - if (FIXNUM_OVERFLOW_P (s.st_dev)) - values[11] = Fcons (make_number (s.st_dev >> 16), - make_number (s.st_dev & 0xffff)); - else - values[11] = make_number (s.st_dev); + values[10] = INTEGER_TO_CONS (s.st_ino); + values[11] = INTEGER_TO_CONS (s.st_dev); return Flist (sizeof(values) / sizeof(values[0]), values); } diff --git a/src/fileio.c b/src/fileio.c index 48dac80a39f..2f7716d5b54 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -5005,7 +5005,7 @@ An argument specifies the modification time value to use { if (!NILP (time_list)) { - current_buffer->modtime = cons_to_long (time_list); + CONS_TO_INTEGER (time_list, time_t, current_buffer->modtime); current_buffer->modtime_size = -1; } else diff --git a/src/font.c b/src/font.c index 398198324a4..326c9d80e44 100644 --- a/src/font.c +++ b/src/font.c @@ -4388,16 +4388,8 @@ where for (i = 0; i < 255; i++) if (variations[i]) { - Lisp_Object code; int vs = (i < 16 ? 0xFE00 + i : 0xE0100 + (i - 16)); - /* Stops GCC whining about limited range of data type. */ - EMACS_INT var = variations[i]; - - if (var > MOST_POSITIVE_FIXNUM) - code = Fcons (make_number ((variations[i]) >> 16), - make_number ((variations[i]) & 0xFFFF)); - else - code = make_number (variations[i]); + Lisp_Object code = INTEGER_TO_CONS (variations[i]); val = Fcons (Fcons (make_number (vs), code), val); } return val; diff --git a/src/fontset.c b/src/fontset.c index 46637b53b3e..fec3c56b036 100644 --- a/src/fontset.c +++ b/src/fontset.c @@ -1859,17 +1859,11 @@ DEFUN ("internal-char-font", Finternal_char_font, Sinternal_char_font, 1, 2, 0, { unsigned code = face->font->driver->encode_char (face->font, c); Lisp_Object font_object; - /* Assignment to EMACS_INT stops GCC whining about limited range - of data type. */ - EMACS_INT cod = code; if (code == FONT_INVALID_CODE) return Qnil; XSETFONT (font_object, face->font); - if (cod <= MOST_POSITIVE_FIXNUM) - return Fcons (font_object, make_number (code)); - return Fcons (font_object, Fcons (make_number (code >> 16), - make_number (code & 0xFFFF))); + return Fcons (font_object, INTEGER_TO_CONS (code)); } return Qnil; } diff --git a/src/lisp.h b/src/lisp.h index e694bbcc58e..1defda151ae 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2405,9 +2405,33 @@ EXFUN (Fadd1, 1); EXFUN (Fsub1, 1); EXFUN (Fmake_variable_buffer_local, 1); +/* Convert the integer I to an Emacs representation, either the integer + itself, or a cons of two integers, or if all else fails a float. + The float might lose information; this happens only in extreme cases + such as 32-bit EMACS_INT and 64-bit time_t with outlandish time values, + and these aren't worth complicating the interface. + + I should not have side effects. */ +#define INTEGER_TO_CONS(i) \ + (! FIXNUM_OVERFLOW_P (i) \ + ? make_number (i) \ + : ! ((FIXNUM_OVERFLOW_P (INTMAX_MIN >> 16) \ + || FIXNUM_OVERFLOW_P (UINTMAX_MAX >> 16)) \ + && FIXNUM_OVERFLOW_P ((i) >> 16)) \ + ? Fcons (make_number ((i) >> 16), make_number ((i) & 0xffff)) \ + : make_float (i)) + +/* Convert the Emacs representation CONS back to an integer of type + TYPE, storing the result the variable VAR. Signal an error if CONS + is not a valid representation or is out of range for TYPE. */ +#define CONS_TO_INTEGER(cons, type, var) \ + (TYPE_SIGNED (type) \ + ? ((var) = cons_to_signed (cons, TYPE_MINIMUM (type), TYPE_MAXIMUM (type))) \ + : ((var) = cons_to_unsigned (cons, TYPE_MAXIMUM (type)))) +extern intmax_t cons_to_signed (Lisp_Object, intmax_t, intmax_t); +extern uintmax_t cons_to_unsigned (Lisp_Object, uintmax_t); + extern struct Lisp_Symbol *indirect_variable (struct Lisp_Symbol *); -extern Lisp_Object long_to_cons (unsigned long); -extern unsigned long cons_to_long (Lisp_Object); extern void args_out_of_range (Lisp_Object, Lisp_Object) NO_RETURN; extern void args_out_of_range_3 (Lisp_Object, Lisp_Object, Lisp_Object) NO_RETURN; diff --git a/src/undo.c b/src/undo.c index 142960545a7..e7e9ae5632e 100644 --- a/src/undo.c +++ b/src/undo.c @@ -212,7 +212,6 @@ record_change (EMACS_INT beg, EMACS_INT length) void record_first_change (void) { - Lisp_Object high, low; struct buffer *base_buffer = current_buffer; if (EQ (BVAR (current_buffer, undo_list), Qt)) @@ -225,9 +224,9 @@ record_first_change (void) if (base_buffer->base_buffer) base_buffer = base_buffer->base_buffer; - XSETFASTINT (high, (base_buffer->modtime >> 16) & 0xffff); - XSETFASTINT (low, base_buffer->modtime & 0xffff); - BVAR (current_buffer, undo_list) = Fcons (Fcons (Qt, Fcons (high, low)), BVAR (current_buffer, undo_list)); + BVAR (current_buffer, undo_list) = + Fcons (Fcons (Qt, INTEGER_TO_CONS (base_buffer->modtime)), + BVAR (current_buffer, undo_list)); } /* Record a change in property PROP (whose old value was VAL) @@ -499,13 +498,9 @@ Return what remains of the list. */) if (EQ (car, Qt)) { /* Element (t high . low) records previous modtime. */ - Lisp_Object high, low; - time_t mod_time; struct buffer *base_buffer = current_buffer; - - high = Fcar (cdr); - low = Fcdr (cdr); - mod_time = (XFASTINT (high) << 16) + XFASTINT (low); + time_t mod_time; + CONS_TO_INTEGER (cdr, time_t, mod_time); if (current_buffer->base_buffer) base_buffer = current_buffer->base_buffer; diff --git a/src/xfns.c b/src/xfns.c index f3dc493ff85..1b425f602d9 100644 --- a/src/xfns.c +++ b/src/xfns.c @@ -4295,18 +4295,9 @@ no value of TYPE (always string in the MS Windows case). */) if (! NILP (source)) { - if (NUMBERP (source)) - { - if (FLOATP (source)) - target_window = (Window) XFLOAT (source); - else - target_window = XFASTINT (source); - - if (target_window == 0) - target_window = FRAME_X_DISPLAY_INFO (f)->root_window; - } - else if (CONSP (source)) - target_window = cons_to_long (source); + CONS_TO_INTEGER (source, Window, target_window); + if (! target_window) + target_window = FRAME_X_DISPLAY_INFO (f)->root_window; } BLOCK_INPUT; diff --git a/src/xselect.c b/src/xselect.c index ca2b1812a61..7545a44b1bc 100644 --- a/src/xselect.c +++ b/src/xselect.c @@ -335,7 +335,7 @@ x_own_selection (Lisp_Object selection_name, Lisp_Object selection_value, Lisp_Object prev_value; selection_data = list4 (selection_name, selection_value, - long_to_cons (timestamp), frame); + INTEGER_TO_CONS (timestamp), frame); prev_value = LOCAL_SELECTION (selection_name, dpyinfo); dpyinfo->terminal->Vselection_alist @@ -419,7 +419,7 @@ x_get_local_selection (Lisp_Object selection_symbol, Lisp_Object target_type, || INTEGERP (check) || NILP (value)) return value; - /* Check for a value that cons_to_long could handle. */ + /* Check for a value that CONS_TO_INTEGER could handle. */ else if (CONSP (check) && INTEGERP (XCAR (check)) && (INTEGERP (XCDR (check)) @@ -782,8 +782,8 @@ x_handle_selection_request (struct input_event *event) if (NILP (local_selection_data)) goto DONE; /* Decline requests issued prior to our acquiring the selection. */ - local_selection_time - = (Time) cons_to_long (XCAR (XCDR (XCDR (local_selection_data)))); + CONS_TO_INTEGER (XCAR (XCDR (XCDR (local_selection_data))), + Time, local_selection_time); if (SELECTION_EVENT_TIME (event) != CurrentTime && local_selection_time > SELECTION_EVENT_TIME (event)) goto DONE; @@ -950,8 +950,8 @@ x_handle_selection_clear (struct input_event *event) /* Well, we already believe that we don't own it, so that's just fine. */ if (NILP (local_selection_data)) return; - local_selection_time = (Time) - cons_to_long (XCAR (XCDR (XCDR (local_selection_data)))); + CONS_TO_INTEGER (XCAR (XCDR (XCDR (local_selection_data))), + Time, local_selection_time); /* We have reasserted the selection since this SelectionClear was generated, so we can disregard it. */ @@ -1213,16 +1213,7 @@ x_get_foreign_selection (Lisp_Object selection_symbol, Lisp_Object target_type, return Qnil; if (! NILP (time_stamp)) - { - if (CONSP (time_stamp)) - requestor_time = (Time) cons_to_long (time_stamp); - else if (INTEGERP (time_stamp)) - requestor_time = (Time) XUINT (time_stamp); - else if (FLOATP (time_stamp)) - requestor_time = (Time) XFLOAT_DATA (time_stamp); - else - error ("TIME_STAMP must be cons or number"); - } + CONS_TO_INTEGER (time_stamp, Time, requestor_time); BLOCK_INPUT; @@ -1652,7 +1643,7 @@ selection_data_to_lisp_data (Display *display, const unsigned char *data, convert it to a cons of integers, 16 bits in each half. */ else if (format == 32 && size == sizeof (unsigned int)) - return long_to_cons (((unsigned int *) data) [0]); + return INTEGER_TO_CONS (((unsigned int *) data) [0]); else if (format == 16 && size == sizeof (unsigned short)) return make_number ((int) (((unsigned short *) data) [0])); @@ -1678,7 +1669,7 @@ selection_data_to_lisp_data (Display *display, const unsigned char *data, for (i = 0; i < size / 4; i++) { unsigned int j = ((unsigned int *) data) [i]; - Faset (v, make_number (i), long_to_cons (j)); + Faset (v, make_number (i), INTEGER_TO_CONS (j)); } return v; } @@ -1755,7 +1746,7 @@ lisp_data_to_selection_data (Display *display, Lisp_Object obj, *size_ret = 1; *data_ret = (unsigned char *) xmalloc (sizeof (unsigned long) + 1); (*data_ret) [sizeof (unsigned long)] = 0; - (*(unsigned long **) data_ret) [0] = cons_to_long (obj); + (*(unsigned long **) data_ret) [0] = cons_to_unsigned (obj, ULONG_MAX); if (NILP (type)) type = QINTEGER; } else if (VECTORP (obj)) @@ -1803,11 +1794,11 @@ lisp_data_to_selection_data (Display *display, Lisp_Object obj, *data_ret = (unsigned char *) xmalloc (*size_ret * data_size); for (i = 0; i < *size_ret; i++) if (*format_ret == 32) - (*((unsigned long **) data_ret)) [i] - = cons_to_long (XVECTOR (obj)->contents [i]); + (*((unsigned long **) data_ret)) [i] = + cons_to_unsigned (XVECTOR (obj)->contents [i], ULONG_MAX); else - (*((unsigned short **) data_ret)) [i] - = (unsigned short) cons_to_long (XVECTOR (obj)->contents [i]); + (*((unsigned short **) data_ret)) [i] = + cons_to_unsigned (XVECTOR (obj)->contents [i], USHRT_MAX); } } else @@ -2025,8 +2016,10 @@ frame's display, or the first available X display. */) selection_atom = symbol_to_x_atom (dpyinfo, selection); BLOCK_INPUT; - timestamp = (NILP (time_object) ? last_event_timestamp - : cons_to_long (time_object)); + if (NILP (time_object)) + timestamp = last_event_timestamp; + else + CONS_TO_INTEGER (time_object, Time, timestamp); XSetSelectionOwner (dpyinfo->display, selection_atom, None, timestamp); UNBLOCK_INPUT; @@ -2232,12 +2225,8 @@ x_fill_property_data (Display *dpy, Lisp_Object data, void *ret, int format) { Lisp_Object o = XCAR (iter); - if (INTEGERP (o)) - val = (long) XFASTINT (o); - else if (FLOATP (o)) - val = (long) XFLOAT_DATA (o); - else if (CONSP (o)) - val = (long) cons_to_long (o); + if (INTEGERP (o) || FLOATP (o) || CONSP (o)) + val = cons_to_signed (o, LONG_MIN, LONG_MAX); else if (STRINGP (o)) { BLOCK_INPUT; @@ -2248,9 +2237,19 @@ x_fill_property_data (Display *dpy, Lisp_Object data, void *ret, int format) error ("Wrong type, must be string, number or cons"); if (format == 8) - *d08++ = (char) val; + { + if (CHAR_MIN <= val && val <= CHAR_MAX) + *d08++ = val; + else + error ("Out of 'char' range"); + } else if (format == 16) - *d16++ = (short) val; + { + if (SHRT_MIN <= val && val <= SHRT_MAX) + *d16++ = val; + else + error ("Out of 'short' range"); + } else *d32++ = val; } @@ -2334,14 +2333,7 @@ If the value is 0 or the atom is not known, return the empty string. */) Atom atom; int had_errors; - if (INTEGERP (value)) - atom = (Atom) XUINT (value); - else if (FLOATP (value)) - atom = (Atom) XFLOAT_DATA (value); - else if (CONSP (value)) - atom = (Atom) cons_to_long (value); - else - error ("Wrong type, value must be number or cons"); + CONS_TO_INTEGER (value, Atom, atom); BLOCK_INPUT; x_catch_errors (dpy); @@ -2531,17 +2523,8 @@ x_send_client_event (Lisp_Object display, Lisp_Object dest, Lisp_Object from, At else error ("DEST as a string must be one of PointerWindow or InputFocus"); } - else if (INTEGERP (dest)) - wdest = (Window) XFASTINT (dest); - else if (FLOATP (dest)) - wdest = (Window) XFLOAT_DATA (dest); - else if (CONSP (dest)) - { - if (! NUMBERP (XCAR (dest)) || ! NUMBERP (XCDR (dest))) - error ("Both car and cdr for DEST must be numbers"); - else - wdest = (Window) cons_to_long (dest); - } + else if (INTEGERP (dest) || FLOATP (dest) || CONSP (dest)) + CONS_TO_INTEGER (dest, Window, wdest); else error ("DEST must be a frame, nil, string, number or cons"); -- cgit v1.2.1 From 2d365993f18598a29e1327937a5408669c18dc0a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 12:04:41 -0700 Subject: Document wide integers better. * files.texi (File Attributes): ino_t values no longer map to anything larger than a single cons. --- doc/lispref/ChangeLog | 2 ++ doc/lispref/files.texi | 10 +++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index 16d175c338b..a22b86cf36b 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog @@ -2,6 +2,8 @@ Document wide integers better. * files.texi (File Attributes): Document ino_t values better. + ino_t values no longer map to anything larger than a single cons. + * numbers.texi (Integer Basics, Integer Basics, Arithmetic Operations): (Bitwise Operations): * objects.texi (Integer Type): Integers are typically 62 bits now. diff --git a/doc/lispref/files.texi b/doc/lispref/files.texi index ed282349573..51c27ecc42f 100644 --- a/doc/lispref/files.texi +++ b/doc/lispref/files.texi @@ -1237,13 +1237,9 @@ deleted and recreated; @code{nil} otherwise. @item The file's inode number. If possible, this is an integer. If the inode number @math{N} is too large to be represented as an integer in -Emacs Lisp, but @math{N / 2^16} is representable, then the value has -the form @code{(@var{high} . @var{low})}, where @var{high} holds the -high bits (i.e., excluding the low-order bits) and @var{low} the low -16 bits. If the inode number is even larger, the value is of the form -@code{(@var{high} @var{middle} . @var{low})}, where @code{high} holds -the high bits, @var{middle} the next 24 bits, and @var{low} the low -16 bits. +Emacs Lisp, then the value has the form @code{(@var{high} +. @var{low})}, where @var{high} holds the high bits (i.e., all but the +low 16 bits) and @var{low} the low 16 bits. @item The filesystem number of the device that the file is on. Depending on -- cgit v1.2.1 From aa76b908750df2d882a3a7767133928435b0fe85 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 13:14:12 -0700 Subject: * xselect.c: Include . --- src/ChangeLog | 3 ++- src/xselect.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ChangeLog b/src/ChangeLog index 20308d40ab0..de227b45256 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -19,7 +19,8 @@ * undo.c (record_first_change): Use INTEGER_TO_CONS. (Fprimitive_undo): Use CONS_TO_INTEGER. * xfns.c (Fx_window_property): Likewise. - * xselect.c (x_own_selection, selection_data_to_lisp_data): + * xselect.c: Include . + (x_own_selection, selection_data_to_lisp_data): Use INTEGER_TO_CONS. (x_handle_selection_request, x_handle_selection_clear) (x_get_foreign_selection, Fx_disown_selection_internal) diff --git a/src/xselect.c b/src/xselect.c index 7545a44b1bc..ae73aa2d6a6 100644 --- a/src/xselect.c +++ b/src/xselect.c @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see . */ /* Rewritten by jwz */ #include +#include #include /* termhooks.h needs this */ #include -- cgit v1.2.1 From 4751effbbce465109c28dddec695f1d34640c675 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 16:21:13 -0700 Subject: * buffers.texi (Buffers): Correct the size limit. --- doc/emacs/buffers.texi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/emacs/buffers.texi b/doc/emacs/buffers.texi index 9463b02464d..ea48be48bf1 100644 --- a/doc/emacs/buffers.texi +++ b/doc/emacs/buffers.texi @@ -43,8 +43,9 @@ can be different from the value in other buffers. @xref{Locals}. A buffer's size cannot be larger than some maximum, which is defined by the largest buffer position representable by the @dfn{Emacs integer} data type. This is because Emacs tracks buffer positions -using that data type. For most machines, the maximum buffer size +using that data type. For 64-bit machines, the maximum buffer size enforced by the data types is @math{2^61 - 2} bytes, or about 2 EiB. +For most 32-bit machines, the maximum is @math{2^31 - 1} bytes, or about 2 GiB. For some older machines, the maximum is @math{2^29 - 2} bytes, or about 512 MiB. Buffer sizes are also limited by the size of Emacs's virtual memory. -- cgit v1.2.1 From c6c3615fb23e438701da089110b645fc771f8087 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 19:02:36 -0700 Subject: Use ptrdiff_t, not int, for sizes. * image.c (slurp_file): Switch from int to ptrdiff_t. All uses changed. (slurp_file, svg_load): Check that file size fits in both size_t (for malloc) and ptrdiff_t (for sanity and safety). --- src/ChangeLog | 8 ++++++++ src/image.c | 21 ++++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index de227b45256..8333bf2dedd 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2011-06-04 Paul Eggert + + Use ptrdiff_t, not int, for sizes. + * image.c (slurp_file): Switch from int to ptrdiff_t. + All uses changed. + (slurp_file, svg_load): Check that file size fits in both + size_t (for malloc) and ptrdiff_t (for sanity and safety). + 2011-06-03 Paul Eggert Check for overflow when converting integer to cons and back. diff --git a/src/image.c b/src/image.c index 26542bf27e7..ffc4f633c7a 100644 --- a/src/image.c +++ b/src/image.c @@ -2112,9 +2112,6 @@ x_put_x_image (struct frame *f, XImagePtr ximg, Pixmap pixmap, int width, int he File Handling ***********************************************************************/ -static unsigned char *slurp_file (char *, int *); - - /* Find image file FILE. Look in data-directory/images, then x-bitmap-file-path. Value is the encoded full name of the file found, or nil if not found. */ @@ -2151,7 +2148,7 @@ x_find_image_file (Lisp_Object file) occurred. *SIZE is set to the size of the file. */ static unsigned char * -slurp_file (char *file, int *size) +slurp_file (char *file, ptrdiff_t *size) { FILE *fp = NULL; unsigned char *buf = NULL; @@ -2159,6 +2156,7 @@ slurp_file (char *file, int *size) if (stat (file, &st) == 0 && (fp = fopen (file, "rb")) != NULL + && 0 <= st.st_size && st.st_size <= min (PTRDIFF_MAX, SIZE_MAX) && (buf = (unsigned char *) xmalloc (st.st_size), fread (buf, 1, st.st_size, fp) == st.st_size)) { @@ -2814,7 +2812,7 @@ xbm_load (struct frame *f, struct image *img) { Lisp_Object file; unsigned char *contents; - int size; + ptrdiff_t size; file = x_find_image_file (file_name); if (!STRINGP (file)) @@ -4039,7 +4037,7 @@ xpm_load (struct frame *f, { Lisp_Object file; unsigned char *contents; - int size; + ptrdiff_t size; file = x_find_image_file (file_name); if (!STRINGP (file)) @@ -5021,6 +5019,7 @@ pbm_read_file (file, size) if (stat (SDATA (file), &st) == 0 && (fp = fopen (SDATA (file), "rb")) != NULL + && 0 <= st.st_size && st.st_size <= min (PTRDIFF_MAX, SIZE_MAX) && (buf = (char *) xmalloc (st.st_size), fread (buf, 1, st.st_size, fp) == st.st_size)) { @@ -5055,7 +5054,7 @@ pbm_load (struct frame *f, struct image *img) enum {PBM_MONO, PBM_GRAY, PBM_COLOR} type; unsigned char *contents = NULL; unsigned char *end, *p; - int size; + ptrdiff_t size; specified_file = image_spec_value (img->spec, QCfile, NULL); @@ -7869,7 +7868,7 @@ static int svg_image_p (Lisp_Object object); static int svg_load (struct frame *f, struct image *img); static int svg_load_image (struct frame *, struct image *, - unsigned char *, unsigned int); + unsigned char *, ptrdiff_t); /* The symbol `svg' identifying images of this type. */ @@ -8047,7 +8046,7 @@ svg_load (struct frame *f, struct image *img) { Lisp_Object file; unsigned char *contents; - int size; + ptrdiff_t size; file = x_find_image_file (file_name); if (!STRINGP (file)) @@ -8074,7 +8073,7 @@ svg_load (struct frame *f, struct image *img) Lisp_Object data; data = image_spec_value (img->spec, QCdata, NULL); - if (!STRINGP (data)) + if (! (STRINGP (data) && SBYTES (data) <= min (PTRDIFF_MAX, SIZE_MAX))) { image_error ("Invalid image data `%s'", data, Qnil); return 0; @@ -8096,7 +8095,7 @@ static int svg_load_image (struct frame *f, /* Pointer to emacs frame structure. */ struct image *img, /* Pointer to emacs image structure. */ unsigned char *contents, /* String containing the SVG XML data to be parsed. */ - unsigned int size) /* Size of data in bytes. */ + ptrdiff_t size) /* Size of data in bytes. */ { RsvgHandle *rsvg_handle; RsvgDimensionData dimension_data; -- cgit v1.2.1 From edaa182249601b0f7ee36f9863243b6919943982 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 19:49:51 -0700 Subject: Check for buffer and string overflow more precisely. * buffer.h (BUF_BYTES_MAX): New macro. * lisp.h (STRING_BYTES_MAX): New macro. * alloc.c (Fmake_string): * character.c (string_escape_byte8): * coding.c (coding_alloc_by_realloc): * doprnt.c (doprnt): * editfns.c (Fformat): * eval.c (verror): Use STRING_BYTES_MAX, not MOST_POSITIVE_FIXNUM, since they may not be the same number. * editfns.c (Finsert_char): * fileio.c (Finsert_file_contents): Likewise for BUF_BYTES_MAX. --- src/ChangeLog | 15 +++++++++++++++ src/alloc.c | 2 +- src/buffer.h | 5 +++++ src/character.c | 4 ++-- src/coding.c | 4 ++-- src/doprnt.c | 4 ++-- src/editfns.c | 4 ++-- src/eval.c | 2 +- src/fileio.c | 2 +- src/lisp.h | 6 ++++++ 10 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 8333bf2dedd..374790b51f6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,20 @@ 2011-06-04 Paul Eggert + Check for buffer and string overflow more precisely. + * buffer.h (BUF_BYTES_MAX): New macro. + * lisp.h (STRING_BYTES_MAX): New macro. + * alloc.c (Fmake_string): + * character.c (string_escape_byte8): + * coding.c (coding_alloc_by_realloc): + * doprnt.c (doprnt): + * editfns.c (Fformat): + * eval.c (verror): + Use STRING_BYTES_MAX, not MOST_POSITIVE_FIXNUM, + since they may not be the same number. + * editfns.c (Finsert_char): + * fileio.c (Finsert_file_contents): + Likewise for BUF_BYTES_MAX. + Use ptrdiff_t, not int, for sizes. * image.c (slurp_file): Switch from int to ptrdiff_t. All uses changed. diff --git a/src/alloc.c b/src/alloc.c index 0c18fca1755..d20ee1e59e3 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -2205,7 +2205,7 @@ INIT must be an integer that represents a character. */) int len = CHAR_STRING (c, str); EMACS_INT string_len = XINT (length); - if (string_len > MOST_POSITIVE_FIXNUM / len) + if (string_len > STRING_BYTES_MAX / len) string_overflow (); nbytes = len * string_len; val = make_uninit_multibyte_string (string_len, nbytes); diff --git a/src/buffer.h b/src/buffer.h index 8c64a24e804..3c91bdfe570 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -306,6 +306,11 @@ do \ } \ while (0) +/* Maximum number of bytes in a buffer. + A buffer cannot contain more bytes than a 1-origin fixnum can represent, + nor can it be so large that C pointer arithmetic stops working. */ +#define BUF_BYTES_MAX min (MOST_POSITIVE_FIXNUM - 1, min (SIZE_MAX, PTRDIFF_MAX)) + /* Return the address of byte position N in current buffer. */ #define BYTE_POS_ADDR(n) \ diff --git a/src/character.c b/src/character.c index 4aa1b75cd8c..cff00f44660 100644 --- a/src/character.c +++ b/src/character.c @@ -838,7 +838,7 @@ string_escape_byte8 (Lisp_Object string) if (multibyte) { if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count - || (MOST_POSITIVE_FIXNUM - nbytes) / 2 < byte8_count) + || (STRING_BYTES_MAX - nbytes) / 2 < byte8_count) string_overflow (); /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ @@ -847,7 +847,7 @@ string_escape_byte8 (Lisp_Object string) } else { - if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count) + if ((STRING_BYTES_MAX - nchars) / 3 < byte8_count) string_overflow (); /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ diff --git a/src/coding.c b/src/coding.c index 6ccaf354c74..64e8e41a5a1 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1071,8 +1071,8 @@ coding_set_destination (struct coding_system *coding) static void coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes) { - if (coding->dst_bytes >= MOST_POSITIVE_FIXNUM - bytes) - error ("Maximum size of buffer or string exceeded"); + if (STRING_BYTES_MAX - coding->dst_bytes < bytes) + string_overflow (); coding->destination = (unsigned char *) xrealloc (coding->destination, coding->dst_bytes + bytes); coding->dst_bytes += bytes; diff --git a/src/doprnt.c b/src/doprnt.c index d2abc119912..5ca3ea89be6 100644 --- a/src/doprnt.c +++ b/src/doprnt.c @@ -329,7 +329,7 @@ doprnt (char *buffer, register size_t bufsize, const char *format, minlen = atoi (&fmtcpy[1]); string = va_arg (ap, char *); tem = strlen (string); - if (tem > MOST_POSITIVE_FIXNUM) + if (tem > STRING_BYTES_MAX) error ("String for %%s or %%S format is too long"); width = strwidth (string, tem); goto doit1; @@ -338,7 +338,7 @@ doprnt (char *buffer, register size_t bufsize, const char *format, doit: /* Coming here means STRING contains ASCII only. */ tem = strlen (string); - if (tem > MOST_POSITIVE_FIXNUM) + if (tem > STRING_BYTES_MAX) error ("Format width or precision too large"); width = tem; doit1: diff --git a/src/editfns.c b/src/editfns.c index b18a35fe295..b7951c45fac 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -2341,7 +2341,7 @@ from adjoining text, if those properties are sticky. */) len = CHAR_STRING (XFASTINT (character), str); else str[0] = XFASTINT (character), len = 1; - if (MOST_POSITIVE_FIXNUM / len < XINT (count)) + if (BUF_BYTES_MAX / len < XINT (count)) error ("Maximum buffer size would be exceeded"); n = XINT (count) * len; if (n <= 0) @@ -3588,7 +3588,7 @@ usage: (format STRING &rest OBJECTS) */) char initial_buffer[4000]; char *buf = initial_buffer; EMACS_INT bufsize = sizeof initial_buffer; - EMACS_INT max_bufsize = min (MOST_POSITIVE_FIXNUM + 1, SIZE_MAX); + EMACS_INT max_bufsize = STRING_BYTES_MAX + 1; char *p; Lisp_Object buf_save_value IF_LINT (= {0}); register char *format, *end, *format_start; diff --git a/src/eval.c b/src/eval.c index f8bc0a9f6aa..ef5abac17ae 100644 --- a/src/eval.c +++ b/src/eval.c @@ -1994,7 +1994,7 @@ verror (const char *m, va_list ap) { char buf[4000]; size_t size = sizeof buf; - size_t size_max = min (MOST_POSITIVE_FIXNUM + 1, SIZE_MAX); + size_t size_max = STRING_BYTES_MAX + 1; size_t mlen = strlen (m); char *buffer = buf; size_t used; diff --git a/src/fileio.c b/src/fileio.c index 2f7716d5b54..d9bc28d8c37 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -3248,7 +3248,7 @@ variable `last-coding-system-used' to the coding system actually used. */) /* Check whether the size is too large or negative, which can happen on a platform that allows file sizes greater than the maximum off_t value. */ if (! not_regular - && ! (0 <= st.st_size && st.st_size <= MOST_POSITIVE_FIXNUM)) + && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX)) error ("Maximum buffer size exceeded"); /* Prevent redisplay optimizations. */ diff --git a/src/lisp.h b/src/lisp.h index 1defda151ae..ad4614c7b16 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -766,6 +766,12 @@ extern EMACS_INT string_bytes (struct Lisp_String *); #endif /* not GC_CHECK_STRING_BYTES */ +/* A string cannot contain more bytes than a fixnum can represent, + nor can it be so long that C pointer arithmetic stops working on + the string plus a terminating null. */ +#define STRING_BYTES_MAX \ + min (MOST_POSITIVE_FIXNUM, min (SIZE_MAX, PTRDIFF_MAX) - 1) + /* Mark STR as a unibyte string. */ #define STRING_SET_UNIBYTE(STR) \ do { if (EQ (STR, empty_multibyte_string)) \ -- cgit v1.2.1 From cad02d3b8074b286b5c2796294c477cd2056bcc1 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jun 2011 20:24:42 -0700 Subject: * image.c (svg_load): Omit needless test. --- src/ChangeLog | 4 ++-- src/image.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 374790b51f6..ead20c31f7f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -18,8 +18,8 @@ Use ptrdiff_t, not int, for sizes. * image.c (slurp_file): Switch from int to ptrdiff_t. All uses changed. - (slurp_file, svg_load): Check that file size fits in both - size_t (for malloc) and ptrdiff_t (for sanity and safety). + (slurp_file): Check that file size fits in both size_t (for + malloc) and ptrdiff_t (for sanity and safety). 2011-06-03 Paul Eggert diff --git a/src/image.c b/src/image.c index ffc4f633c7a..a179568cb85 100644 --- a/src/image.c +++ b/src/image.c @@ -8073,7 +8073,7 @@ svg_load (struct frame *f, struct image *img) Lisp_Object data; data = image_spec_value (img->spec, QCdata, NULL); - if (! (STRINGP (data) && SBYTES (data) <= min (PTRDIFF_MAX, SIZE_MAX))) + if (!STRINGP (data)) { image_error ("Invalid image data `%s'", data, Qnil); return 0; -- cgit v1.2.1