diff options
Diffstat (limited to 'src/lread.c')
-rw-r--r-- | src/lread.c | 714 |
1 files changed, 429 insertions, 285 deletions
diff --git a/src/lread.c b/src/lread.c index b0262ebea79..439b7e0b24c 100644 --- a/src/lread.c +++ b/src/lread.c @@ -29,7 +29,9 @@ Boston, MA 02111-1307, USA. */ #include "lisp.h" #include "intervals.h" #include "buffer.h" +#include "character.h" #include "charset.h" +#include "coding.h" #include <epaths.h> #include "commands.h" #include "keyboard.h" @@ -86,6 +88,12 @@ Lisp_Object Qascii_character, Qload, Qload_file_name; Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction; Lisp_Object Qinhibit_file_name_operation; +/* Used instead of Qget_file_char while loading *.elc files compiled + by Emacs 21 or older. */ +static Lisp_Object Qget_emacs_mule_file_char; + +static Lisp_Object Qload_force_doc_strings; + extern Lisp_Object Qevent_symbol_element_mask; extern Lisp_Object Qfile_exists_p; @@ -129,6 +137,11 @@ static int load_force_doc_strings; /* Nonzero means read should convert strings to unibyte. */ static int load_convert_to_unibyte; +/* Nonzero means READCHAR should read bytes one by one (not character) + when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char. + This is set to 1 by read1 temporarily while handling #@NUMBER. */ +static int load_each_byte; + /* Function to use for loading an Emacs lisp source file (not compiled) instead of readevalloop. */ Lisp_Object Vload_source_file_function; @@ -157,9 +170,6 @@ static int read_from_string_index; static int read_from_string_index_byte; static int read_from_string_limit; -/* Number of bytes left to read in the buffer character - that `readchar' has already advanced over. */ -static int readchar_backlog; /* Number of characters read in the current call to Fread or Fread_from_string. */ static int readchar_count; @@ -203,7 +213,9 @@ int load_dangerous_libraries; static Lisp_Object Vbytecomp_version_regexp; -static void to_multibyte P_ ((char **, char **, int *)); +static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object), + Lisp_Object)); + static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, Lisp_Object (*) (), int, Lisp_Object, Lisp_Object, @@ -212,29 +224,41 @@ static Lisp_Object load_unwind P_ ((Lisp_Object)); static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object)); +/* Functions that read one byte from the current source READCHARFUN + or unreads one byte. If the integer argument C is -1, it returns + one read byte, or -1 when there's no more byte in the source. If C + is 0 or positive, it unreads C, and the return value is not + interesting. */ + +static int readbyte_for_lambda P_ ((int, Lisp_Object)); +static int readbyte_from_file P_ ((int, Lisp_Object)); +static int readbyte_from_string P_ ((int, Lisp_Object)); + /* Handle unreading and rereading of characters. Write READCHAR to read a character, UNREAD(c) to unread c to be read again. - The READCHAR and UNREAD macros are meant for reading/unreading a - byte code; they do not handle multibyte characters. The caller - should manage them if necessary. - - [ Actually that seems to be a lie; READCHAR will definitely read - multibyte characters from buffer sources, at least. Is the - comment just out of date? - -- Colin Walters <walters@gnu.org>, 22 May 2002 16:36:50 -0400 ] - */ + These macros correctly read/unread multibyte characters. */ #define READCHAR readchar (readcharfun) #define UNREAD(c) unreadchar (readcharfun, c) +/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char, + Qlambda, or a cons, we use this to keep an unread character because + a file stream can't handle multibyte-char unreading. The value -1 + means that there's no unread character. */ +static int unread_char; + static int readchar (readcharfun) Lisp_Object readcharfun; { Lisp_Object tem; register int c; + int (*readbyte) P_ ((int, Lisp_Object)); + unsigned char buf[MAX_MULTIBYTE_LENGTH]; + int i, len; + int emacs_mule_encoding = 0; readchar_count++; @@ -243,21 +267,10 @@ readchar (readcharfun) register struct buffer *inbuffer = XBUFFER (readcharfun); int pt_byte = BUF_PT_BYTE (inbuffer); - int orig_pt_byte = pt_byte; - - if (readchar_backlog > 0) - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1) - - --readchar_backlog); if (pt_byte >= BUF_ZV_BYTE (inbuffer)) return -1; - readchar_backlog = -1; - if (! NILP (inbuffer->enable_multibyte_characters)) { /* Fetch the character code from the buffer. */ @@ -268,6 +281,8 @@ readchar (readcharfun) else { c = BUF_FETCH_BYTE (inbuffer, pt_byte); + if (! ASCII_BYTE_P (c)) + c = BYTE8_TO_CHAR (c); pt_byte++; } SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); @@ -279,21 +294,10 @@ readchar (readcharfun) register struct buffer *inbuffer = XMARKER (readcharfun)->buffer; int bytepos = marker_byte_position (readcharfun); - int orig_bytepos = bytepos; - - if (readchar_backlog > 0) - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1) - - --readchar_backlog); if (bytepos >= BUF_ZV_BYTE (inbuffer)) return -1; - readchar_backlog = -1; - if (! NILP (inbuffer->enable_multibyte_characters)) { /* Fetch the character code from the buffer. */ @@ -304,6 +308,8 @@ readchar (readcharfun) else { c = BUF_FETCH_BYTE (inbuffer, bytepos); + if (! ASCII_BYTE_P (c)) + c = BYTE8_TO_CHAR (c); bytepos++; } @@ -314,21 +320,15 @@ readchar (readcharfun) } if (EQ (readcharfun, Qlambda)) - return read_bytecode_char (0); + { + readbyte = readbyte_for_lambda; + goto read_multibyte; + } if (EQ (readcharfun, Qget_file_char)) { - c = getc (instream); -#ifdef EINTR - /* Interrupted reads have been observed while reading over the network */ - while (c == EOF && ferror (instream) && errno == EINTR) - { - QUIT; - clearerr (instream); - c = getc (instream); - } -#endif - return c; + readbyte = readbyte_from_file; + goto read_multibyte; } if (STRINGP (readcharfun)) @@ -343,11 +343,59 @@ readchar (readcharfun) return c; } + if (CONSP (readcharfun)) + { + /* This is the case that read_vector is reading from a unibyte + string that contains a byte sequence previously skipped + because of #@NUMBER. The car part of readcharfun is that + string, and the cdr part is a value of readcharfun given to + read_vector. */ + readbyte = readbyte_from_string; + if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char)) + emacs_mule_encoding = 1; + goto read_multibyte; + } + + if (EQ (readcharfun, Qget_emacs_mule_file_char)) + { + readbyte = readbyte_from_file; + emacs_mule_encoding = 1; + goto read_multibyte; + } + tem = call0 (readcharfun); if (NILP (tem)) return -1; return XINT (tem); + + read_multibyte: + if (unread_char >= 0) + { + c = unread_char; + unread_char = -1; + return c; + } + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ASCII_BYTE_P (c) || load_each_byte) + return c; + if (emacs_mule_encoding) + return read_emacs_mule_char (c, readbyte, readcharfun); + i = 0; + buf[i++] = c; + len = BYTES_BY_CHAR_HEAD (c); + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ! TRAILING_CODE_P (c)) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + return STRING_CHAR (buf, i); } /* Unread the character C in the way appropriate for the stream READCHARFUN. @@ -368,36 +416,26 @@ unreadchar (readcharfun, c) struct buffer *b = XBUFFER (readcharfun); int bytepos = BUF_PT_BYTE (b); - if (readchar_backlog >= 0) - readchar_backlog++; + BUF_PT (b)--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); else - { - BUF_PT (b)--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; + bytepos--; - BUF_PT_BYTE (b) = bytepos; - } + BUF_PT_BYTE (b) = bytepos; } else if (MARKERP (readcharfun)) { struct buffer *b = XMARKER (readcharfun)->buffer; int bytepos = XMARKER (readcharfun)->bytepos; - if (readchar_backlog >= 0) - readchar_backlog++; + XMARKER (readcharfun)->charpos--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); else - { - XMARKER (readcharfun)->charpos--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; + bytepos--; - XMARKER (readcharfun)->bytepos = bytepos; - } + XMARKER (readcharfun)->bytepos = bytepos; } else if (STRINGP (readcharfun)) { @@ -405,14 +443,152 @@ unreadchar (readcharfun, c) read_from_string_index_byte = string_char_to_byte (readcharfun, read_from_string_index); } + else if (CONSP (readcharfun)) + { + unread_char = c; + } else if (EQ (readcharfun, Qlambda)) - read_bytecode_char (1); - else if (EQ (readcharfun, Qget_file_char)) - ungetc (c, instream); + { + unread_char = c; + } + else if (EQ (readcharfun, Qget_file_char) + || EQ (readcharfun, Qget_emacs_mule_file_char)) + { + if (load_each_byte) + ungetc (c, instream); + else + unread_char = c; + } else call1 (readcharfun, make_number (c)); } +static int +readbyte_for_lambda (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + return read_bytecode_char (c >= 0); +} + + +static int +readbyte_from_file (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + if (c >= 0) + { + ungetc (c, instream); + return 0; + } + + c = getc (instream); +#ifdef EINTR + /* Interrupted reads have been observed while reading over the network */ + while (c == EOF && ferror (instream) && errno == EINTR) + { + QUIT; + clearerr (instream); + c = getc (instream); + } +#endif + return (c == EOF ? -1 : c); +} + +static int +readbyte_from_string (c, readcharfun) + int c; + Lisp_Object readcharfun; +{ + Lisp_Object string = XCAR (readcharfun); + + if (c >= 0) + { + read_from_string_index--; + read_from_string_index_byte + = string_char_to_byte (string, read_from_string_index); + } + + if (read_from_string_index >= read_from_string_limit) + c = -1; + else + FETCH_STRING_CHAR_ADVANCE (c, string, + read_from_string_index, + read_from_string_index_byte); + return c; +} + + +/* Read one non-ASCII character from INSTREAM. The character is + encoded in `emacs-mule' and the first byte is already read in + C. */ + +extern char emacs_mule_bytes[256]; + +static int +read_emacs_mule_char (c, readbyte, readcharfun) + int c; + int (*readbyte) P_ ((int, Lisp_Object)); + Lisp_Object readcharfun; +{ + /* Emacs-mule coding uses at most 4-byte for one character. */ + unsigned char buf[4]; + int len = emacs_mule_bytes[c]; + struct charset *charset; + int i; + unsigned code; + + if (len == 1) + /* C is not a valid leading-code of `emacs-mule'. */ + return BYTE8_TO_CHAR (c); + + i = 0; + buf[i++] = c; + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0xA0) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + + if (len == 2) + { + charset = emacs_mule_charset[buf[0]]; + code = buf[1] & 0x7F; + } + else if (len == 3) + { + if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11 + || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12) + { + charset = emacs_mule_charset[buf[1]]; + code = buf[2] & 0x7F; + } + else + { + charset = emacs_mule_charset[buf[0]]; + code = ((buf[1] << 8) | buf[2]) & 0x7F7F; + } + } + else + { + charset = emacs_mule_charset[buf[1]]; + code = ((buf[2] << 8) | buf[3]) & 0x7F7F; + } + c = DECODE_CHAR (charset, code); + if (c < 0) + Fsignal (Qinvalid_read_syntax, + Fcons (build_string ("invalid multibyte form"), Qnil)); + return c; +} + + static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); static Lisp_Object read0 P_ ((Lisp_Object)); @@ -420,7 +596,6 @@ static Lisp_Object read1 P_ ((Lisp_Object, int *, int)); static Lisp_Object read_list P_ ((int, Lisp_Object)); static Lisp_Object read_vector P_ ((Lisp_Object, int)); -static int read_multibyte P_ ((int, Lisp_Object)); static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); @@ -595,11 +770,11 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0, -/* Value is non-zero if the file asswociated with file descriptor FD - is a compiled Lisp file that's safe to load. Only files compiled - with Emacs are safe to load. Files compiled with XEmacs can lead - to a crash in Fbyte_code because of an incompatible change in the - byte compiler. */ +/* Value is a version number of byte compiled code if the file + asswociated with file descriptor FD is a compiled Lisp file that's + safe to load. Only files compiled with Emacs are safe to load. + Files compiled with XEmacs can lead to a crash in Fbyte_code + because of an incompatible change in the byte compiler. */ static int safe_to_load_p (fd) @@ -608,6 +783,7 @@ safe_to_load_p (fd) char buf[512]; int nbytes, i; int safe_p = 1; + int version = 1; /* Read the first few bytes from the file, and look for a line specifying the byte compiler version used. */ @@ -617,15 +793,18 @@ safe_to_load_p (fd) buf[nbytes] = '\0'; /* Skip to the next newline, skipping over the initial `ELC' - with NUL bytes following it. */ + with NUL bytes following it, but note the version. */ for (i = 0; i < nbytes && buf[i] != '\n'; ++i) - ; + if (i == 4) + version = buf[i]; - if (i < nbytes - && fast_c_string_match_ignore_case (Vbytecomp_version_regexp, + if (i == nbytes + || fast_c_string_match_ignore_case (Vbytecomp_version_regexp, buf + i) < 0) safe_p = 0; } + if (safe_p) + safe_p = version; lseek (fd, 0, SEEK_SET); return safe_p; @@ -685,6 +864,8 @@ Return t if file exists. */) Lisp_Object handler; int safe_p = 1; char *fmode = "r"; + int version; + #ifdef DOS_NT fmode = "rt"; #endif /* DOS_NT */ @@ -800,8 +981,10 @@ Return t if file exists. */) Vloads_in_progress = Fcons (found, Vloads_in_progress); } + version = -1; if (!bcmp (SDATA (found) + SBYTES (found) - 4, - ".elc", 4)) + ".elc", 4) + || (version = safe_to_load_p (fd)) > 0) /* Load .elc files directly, but not when they are remote and have no handler! */ { @@ -810,7 +993,8 @@ Return t if file exists. */) struct stat s1, s2; int result; - if (!safe_to_load_p (fd)) + if (version < 0 + && ! (version = safe_to_load_p (fd))) { safe_p = 0; if (!load_dangerous_libraries) @@ -913,8 +1097,17 @@ Return t if file exists. */) load_descriptor_list = Fcons (make_number (fileno (stream)), load_descriptor_list); load_in_progress++; - readevalloop (Qget_file_char, stream, file, Feval, - 0, Qnil, Qnil, Qnil, Qnil); + if (! version || version >= 22) + readevalloop (Qget_file_char, stream, file, Feval, + 0, Qnil, Qnil, Qnil, Qnil); + else + { + /* We can't handle a file which was compiled with + byte-compile-dynamic by older version of Emacs. */ + specbind (Qload_force_doc_strings, Qt); + readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, + 0, Qnil, Qnil, Qnil, Qnil); + } unbind_to (count, Qnil); /* Run any load-hooks for this file. */ @@ -1324,8 +1517,6 @@ readevalloop (readcharfun, stream, sourcename, evalfun, record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil); load_convert_to_unibyte = !NILP (unibyte); - readchar_backlog = -1; - GCPRO1 (sourcename); LOADHIST_ATTACH (sourcename); @@ -1546,7 +1737,6 @@ read_internal_start (stream, start, end) { Lisp_Object retval; - readchar_backlog = -1; readchar_count = 0; new_backquote_flag = 0; read_objects = Qnil; @@ -1554,17 +1744,25 @@ read_internal_start (stream, start, end) || EQ (Vread_with_symbol_positions, stream)) Vread_symbol_positions_list = Qnil; - if (STRINGP (stream)) + if (STRINGP (stream) + || ((CONSP (stream) && STRINGP (XCAR (stream))))) { int startval, endval; + Lisp_Object string; + + if (STRINGP (stream)) + string = stream; + else + string = XCAR (stream); + if (NILP (end)) - endval = SCHARS (stream); + endval = SCHARS (string); else { CHECK_NUMBER (end); endval = XINT (end); - if (endval < 0 || endval > SCHARS (stream)) - args_out_of_range (stream, end); + if (endval < 0 || endval > SCHARS (string)) + args_out_of_range (string, end); } if (NILP (start)) @@ -1574,10 +1772,10 @@ read_internal_start (stream, start, end) CHECK_NUMBER (start); startval = XINT (start); if (startval < 0 || startval > endval) - args_out_of_range (stream, start); + args_out_of_range (string, start); } read_from_string_index = startval; - read_from_string_index_byte = string_char_to_byte (stream, startval); + read_from_string_index_byte = string_char_to_byte (string, startval); read_from_string_limit = endval; } @@ -1610,56 +1808,16 @@ read0 (readcharfun) static int read_buffer_size; static char *read_buffer; -/* Read multibyte form and return it as a character. C is a first - byte of multibyte form, and rest of them are read from - READCHARFUN. */ - -static int -read_multibyte (c, readcharfun) - register int c; - Lisp_Object readcharfun; -{ - /* We need the actual character code of this multibyte - characters. */ - unsigned char str[MAX_MULTIBYTE_LENGTH]; - int len = 0; - int bytes; - - if (c < 0) - return c; - - str[len++] = c; - while ((c = READCHAR) >= 0xA0 - && len < MAX_MULTIBYTE_LENGTH) - { - str[len++] = c; - readchar_count--; - } - UNREAD (c); - if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes)) - return STRING_CHAR (str, len); - /* The byte sequence is not valid as multibyte. Unread all bytes - but the first one, and return the first byte. */ - while (--len > 0) - UNREAD (str[len]); - return str[0]; -} - /* Read a \-escape sequence, assuming we already read the `\'. - If the escape sequence forces unibyte, store 1 into *BYTEREP. - If the escape sequence forces multibyte, store 2 into *BYTEREP. - Otherwise store 0 into *BYTEREP. */ + If the escape sequence forces unibyte, return eight-bit char. */ static int -read_escape (readcharfun, stringp, byterep) +read_escape (readcharfun, stringp) Lisp_Object readcharfun; int stringp; - int *byterep; { register int c = READCHAR; - *byterep = 0; - switch (c) { case -1: @@ -1696,7 +1854,7 @@ read_escape (readcharfun, stringp, byterep) error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | meta_modifier; case 'S': @@ -1705,7 +1863,7 @@ read_escape (readcharfun, stringp, byterep) error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | shift_modifier; case 'H': @@ -1714,7 +1872,7 @@ read_escape (readcharfun, stringp, byterep) error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | hyper_modifier; case 'A': @@ -1723,7 +1881,7 @@ read_escape (readcharfun, stringp, byterep) error ("Invalid escape character syntax"); c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | alt_modifier; case 's': @@ -1736,7 +1894,7 @@ read_escape (readcharfun, stringp, byterep) } c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | super_modifier; case 'C': @@ -1746,7 +1904,7 @@ read_escape (readcharfun, stringp, byterep) case '^': c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); if ((c & ~CHAR_MODIFIER_MASK) == '?') return 0177 | (c & CHAR_MODIFIER_MASK); else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) @@ -1786,7 +1944,8 @@ read_escape (readcharfun, stringp, byterep) } } - *byterep = 1; + if (i >= 0x80 && i < 0x100) + i = BYTE8_TO_CHAR (i); return i; } @@ -1794,6 +1953,7 @@ read_escape (readcharfun, stringp, byterep) /* A hex escape, as in ANSI C. */ { int i = 0; + int count = 0; while (1) { c = READCHAR; @@ -1816,15 +1976,15 @@ read_escape (readcharfun, stringp, byterep) UNREAD (c); break; } + count++; } - *byterep = 2; + if (count < 3 && i >= 0x80) + return BYTE8_TO_CHAR (i); return i; } default: - if (BASE_LEADING_CODE_P (c)) - c = read_multibyte (c, readcharfun); return c; } } @@ -1896,43 +2056,6 @@ read_integer (readcharfun, radix) } -/* Convert unibyte text in read_buffer to multibyte. - - Initially, *P is a pointer after the end of the unibyte text, and - the pointer *END points after the end of read_buffer. - - If read_buffer doesn't have enough room to hold the result - of the conversion, reallocate it and adjust *P and *END. - - At the end, make *P point after the result of the conversion, and - return in *NCHARS the number of characters in the converted - text. */ - -static void -to_multibyte (p, end, nchars) - char **p, **end; - int *nchars; -{ - int nbytes; - - parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars); - if (read_buffer_size < 2 * nbytes) - { - int offset = *p - read_buffer; - read_buffer_size = 2 * max (read_buffer_size, nbytes); - read_buffer = (char *) xrealloc (read_buffer, read_buffer_size); - *p = read_buffer + offset; - *end = read_buffer + read_buffer_size; - } - - if (nbytes != *nchars) - nbytes = str_as_multibyte (read_buffer, read_buffer_size, - *p - read_buffer, nchars); - - *p = read_buffer + nbytes; -} - - /* If the next token is ')' or ']' or '.', we store that character in *PCH and the return value is not interesting. Else, we store zero in *PCH and we read and return one lisp object. @@ -1949,6 +2072,7 @@ read1 (readcharfun, pch, first_in_list) int uninterned_symbol = 0; *pch = 0; + load_each_byte = 0; retry: @@ -1980,11 +2104,9 @@ read1 (readcharfun, pch, first_in_list) { Lisp_Object tmp; tmp = read_vector (readcharfun, 0); - if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS - || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10) + if (XVECTOR (tmp)->size < VECSIZE (struct Lisp_Char_Table)) error ("Invalid size char-table"); XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); - XCHAR_TABLE (tmp)->top = Qt; return tmp; } else if (c == '^') @@ -1993,11 +2115,18 @@ read1 (readcharfun, pch, first_in_list) if (c == '[') { Lisp_Object tmp; + int depth, size; + tmp = read_vector (readcharfun, 0); - if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS) + if (!INTEGERP (AREF (tmp, 0))) + error ("Invalid depth in char-table"); + depth = XINT (AREF (tmp, 0)); + if (depth < 1 || depth > 3) + error ("Invalid depth in char-table"); + size = XVECTOR (tmp)->size - 2; + if (chartab_size [depth] != size) error ("Invalid size char-table"); - XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); - XCHAR_TABLE (tmp)->top = Qnil; + XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp)); return tmp; } Fsignal (Qinvalid_read_syntax, @@ -2019,12 +2148,14 @@ read1 (readcharfun, pch, first_in_list) UNREAD (c); tmp = read1 (readcharfun, pch, first_in_list); - if (size_in_chars != SCHARS (tmp) - /* We used to print 1 char too many - when the number of bits was a multiple of 8. - Accept such input in case it came from an old version. */ - && ! (XFASTINT (length) - == (SCHARS (tmp) - 1) * BOOL_VECTOR_BITS_PER_CHAR)) + if (STRING_MULTIBYTE (tmp) + || (size_in_chars != SCHARS (tmp) + /* We used to print 1 char too many + when the number of bits was a multiple of 8. + Accept such input in case it came from an old + version. */ + && ! (XFASTINT (length) + == (SCHARS (tmp) - 1) * BOOL_VECTOR_BITS_PER_CHAR))) Fsignal (Qinvalid_read_syntax, Fcons (make_string ("#&...", 5), Qnil)); @@ -2090,6 +2221,7 @@ read1 (readcharfun, pch, first_in_list) { int i, nskip = 0; + load_each_byte = 1; /* Read a decimal integer. */ while ((c = READCHAR) >= 0 && c >= '0' && c <= '9') @@ -2100,7 +2232,9 @@ read1 (readcharfun, pch, first_in_list) if (c >= 0) UNREAD (c); - if (load_force_doc_strings && EQ (readcharfun, Qget_file_char)) + if (load_force_doc_strings + && (EQ (readcharfun, Qget_file_char) + || EQ (readcharfun, Qget_emacs_mule_file_char))) { /* If we are supposed to force doc strings into core right now, record the last string that we skipped, @@ -2152,6 +2286,7 @@ read1 (readcharfun, pch, first_in_list) c = READCHAR; } + load_each_byte = 0; goto retry; } if (c == '!') @@ -2281,7 +2416,7 @@ read1 (readcharfun, pch, first_in_list) case '?': { - int discard; + int modifiers; int next_char; int ok; @@ -2297,9 +2432,12 @@ read1 (readcharfun, pch, first_in_list) return make_number (c); if (c == '\\') - c = read_escape (readcharfun, 0, &discard); - else if (BASE_LEADING_CODE_P (c)) - c = read_multibyte (c, readcharfun); + c = read_escape (readcharfun, 0); + modifiers = c & CHAR_MODIFIER_MASK; + c &= ~CHAR_MODIFIER_MASK; + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + c |= modifiers; next_char = READCHAR; if (next_char == '.') @@ -2334,14 +2472,12 @@ read1 (readcharfun, pch, first_in_list) char *p = read_buffer; char *end = read_buffer + read_buffer_size; register int c; - /* 1 if we saw an escape sequence specifying - a multibyte character, or a multibyte character. */ + /* Nonzero if we saw an escape sequence specifying + a multibyte character. */ int force_multibyte = 0; - /* 1 if we saw an escape sequence specifying + /* Nonzero if we saw an escape sequence specifying a single-byte character. */ int force_singlebyte = 0; - /* 1 if read_buffer contains multibyte text now. */ - int is_multibyte = 0; int cancel = 0; int nchars = 0; @@ -2359,9 +2495,9 @@ read1 (readcharfun, pch, first_in_list) if (c == '\\') { - int byterep; + int modifiers; - c = read_escape (readcharfun, 1, &byterep); + c = read_escape (readcharfun, 1); /* C is -1 if \ newline has just been seen */ if (c == -1) @@ -2371,50 +2507,55 @@ read1 (readcharfun, pch, first_in_list) continue; } - if (byterep == 1) + modifiers = c & CHAR_MODIFIER_MASK; + c = c & ~CHAR_MODIFIER_MASK; + + if (CHAR_BYTE8_P (c)) force_singlebyte = 1; - else if (byterep == 2) + else if (! ASCII_CHAR_P (c)) force_multibyte = 1; - } - - /* A character that must be multibyte forces multibyte. */ - if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK)) - force_multibyte = 1; + else /* i.e. ASCII_CHAR_P (c) */ + { + /* Allow `\C- ' and `\C-?'. */ + if (modifiers == CHAR_CTL) + { + if (c == ' ') + c = 0, modifiers = 0; + else if (c == '?') + c = 127, modifiers = 0; + } + if (modifiers & CHAR_SHIFT) + { + /* Shift modifier is valid only with [A-Za-z]. */ + if (c >= 'A' && c <= 'Z') + modifiers &= ~CHAR_SHIFT; + else if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT; + } + + if (modifiers & CHAR_META) + { + /* Move the meta bit to the right place for a + string. */ + modifiers &= ~CHAR_META; + c = BYTE8_TO_CHAR (c | 0x80); + force_singlebyte = 1; + } + } - /* If we just discovered the need to be multibyte, - convert the text accumulated thus far. */ - if (force_multibyte && ! is_multibyte) - { - is_multibyte = 1; - to_multibyte (&p, &end, &nchars); + /* Any modifiers remaining are invalid. */ + if (modifiers) + error ("Invalid modifier in string"); + p += CHAR_STRING (c, (unsigned char *) p); } - - /* Allow `\C- ' and `\C-?'. */ - if (c == (CHAR_CTL | ' ')) - c = 0; - else if (c == (CHAR_CTL | '?')) - c = 127; - - if (c & CHAR_SHIFT) + else { - /* Shift modifier is valid only with [A-Za-z]. */ - if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') - c &= ~CHAR_SHIFT; - else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') - c = (c & ~CHAR_SHIFT) - ('a' - 'A'); + p += CHAR_STRING (c, (unsigned char *) p); + if (CHAR_BYTE8_P (c)) + force_singlebyte = 1; + else if (! ASCII_CHAR_P (c)) + force_multibyte = 1; } - - if (c & CHAR_META) - /* Move the meta bit to the right place for a string. */ - c = (c & ~CHAR_META) | 0x80; - if (c & CHAR_MODIFIER_MASK) - error ("Invalid modifier in string"); - - if (is_multibyte) - p += CHAR_STRING (c, p); - else - *p++ = c; - nchars++; } @@ -2427,37 +2568,16 @@ read1 (readcharfun, pch, first_in_list) if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) return make_number (0); - if (is_multibyte || force_singlebyte) + if (force_multibyte) + /* READ_BUFFER already contains valid multibyte forms. */ ; - else if (load_convert_to_unibyte) + else if (force_singlebyte) { - Lisp_Object string; - to_multibyte (&p, &end, &nchars); - if (p - read_buffer != nchars) - { - string = make_multibyte_string (read_buffer, nchars, - p - read_buffer); - return Fstring_make_unibyte (string); - } - /* We can make a unibyte string directly. */ - is_multibyte = 0; - } - else if (EQ (readcharfun, Qget_file_char) - || EQ (readcharfun, Qlambda)) - { - /* Nowadays, reading directly from a file is used only for - compiled Emacs Lisp files, and those always use the - Emacs internal encoding. Meanwhile, Qlambda is used - for reading dynamic byte code (compiled with - byte-compile-dynamic = t). So make the string multibyte - if the string contains any multibyte sequences. - (to_multibyte is a no-op if not.) */ - to_multibyte (&p, &end, &nchars); - is_multibyte = (p - read_buffer) != nchars; + nchars = str_as_unibyte (read_buffer, p - read_buffer); + p = read_buffer + nchars; } else - /* In all other cases, if we read these bytes as - separate characters, treat them as separate characters now. */ + /* Otherwise, READ_BUFFER contains only ASCII. */ ; /* We want readchar_count to be the number of characters, not @@ -2467,9 +2587,11 @@ read1 (readcharfun, pch, first_in_list) /* readchar_count -= (p - read_buffer) - nchars; */ if (read_pure) return make_pure_string (read_buffer, nchars, p - read_buffer, - is_multibyte); + (force_multibyte + || (p - read_buffer != nchars))); return make_specified_string (read_buffer, nchars, p - read_buffer, - is_multibyte); + (force_multibyte + || (p - read_buffer != nchars))); } case '.': @@ -2524,11 +2646,7 @@ read1 (readcharfun, pch, first_in_list) quoted = 1; } - if (! SINGLE_BYTE_CHAR_P (c)) - p += CHAR_STRING (c, p); - else - *p++ = c; - + p += CHAR_STRING (c, p); c = READCHAR; } @@ -2562,6 +2680,8 @@ read1 (readcharfun, pch, first_in_list) { if (p1[-1] == '.') p1[-1] = '\0'; + /* Fixme: if we have strtol, use that, and check + for overflow. */ if (sizeof (int) == sizeof (EMACS_INT)) XSETINT (val, atoi (read_buffer)); else if (sizeof (long) == sizeof (EMACS_INT)) @@ -2865,7 +2985,7 @@ read_vector (readcharfun, bytecodeflag) STRING_SET_CHARS (bytestr, SBYTES (bytestr)); STRING_SET_UNIBYTE (bytestr); - item = Fread (bytestr); + item = Fread (Fcons (bytestr, readcharfun)); if (!CONSP (item)) error ("invalid byte code"); @@ -2878,6 +2998,15 @@ read_vector (readcharfun, bytecodeflag) /* Now handle the bytecode slot. */ ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr; } + else if (i == COMPILED_DOC_STRING + && STRINGP (item) + && ! STRING_MULTIBYTE (item)) + { + if (EQ (readcharfun, Qget_emacs_mule_file_char)) + item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil); + else + item = Fstring_as_multibyte (item); + } } ptr[i] = read_pure ? Fpurecopy (item) : item; otem = XCONS (tem); @@ -2975,7 +3104,15 @@ read_list (flag, readcharfun) if (doc_reference == 2) { /* Get a doc string from the file we are loading. - If it's in saved_doc_string, get it from there. */ + If it's in saved_doc_string, get it from there. + + Here, we don't know if the string is a + bytecode string or a doc string. As a + bytecode string must be unibyte, we always + return a unibyte string. If it is actually a + doc string, caller must make it + multibyte. */ + int pos = XINT (XCDR (val)); /* Position is negative for user variables. */ if (pos < 0) pos = -pos; @@ -3007,8 +3144,8 @@ read_list (flag, readcharfun) saved_doc_string[to++] = c; } - return make_string (saved_doc_string + start, - to - start); + return make_unibyte_string (saved_doc_string + start, + to - start); } /* Look in prev_saved_doc_string the same way. */ else if (pos >= prev_saved_doc_string_position @@ -3039,11 +3176,12 @@ read_list (flag, readcharfun) prev_saved_doc_string[to++] = c; } - return make_string (prev_saved_doc_string + start, - to - start); + return make_unibyte_string (prev_saved_doc_string + + start, + to - start); } else - return get_doc_string (val, 0, 0); + return get_doc_string (val, 1, 0); } return val; @@ -3961,6 +4099,12 @@ to load. See also `load-dangerous-libraries'. */); Qget_file_char = intern ("get-file-char"); staticpro (&Qget_file_char); + Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char"); + staticpro (&Qget_emacs_mule_file_char); + + Qload_force_doc_strings = intern ("load-force-doc-strings"); + staticpro (&Qload_force_doc_strings); + Qbackquote = intern ("`"); staticpro (&Qbackquote); Qcomma = intern (","); |