diff options
Diffstat (limited to 'src/coding.c')
-rw-r--r-- | src/coding.c | 137 |
1 files changed, 70 insertions, 67 deletions
diff --git a/src/coding.c b/src/coding.c index b0a9f6ef4cb..e4b52f6db48 100644 --- a/src/coding.c +++ b/src/coding.c @@ -642,15 +642,6 @@ static enum coding_category coding_priorities[coding_category_max]; Nth coding category. */ static struct coding_system coding_categories[coding_category_max]; -/*** Commonly used macros and functions ***/ - -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - /* Encode a flag that can be nil, something else, or t as -1, 0, 1. */ static int @@ -690,6 +681,14 @@ CHECK_NATNUM_CDR (Lisp_Object x) XSETCDR (x, tmp); } +/* True if CODING's destination can be grown. */ + +static bool +growable_destination (struct coding_system *coding) +{ + return STRINGP (coding->dst_object) || BUFFERP (coding->dst_object); +} + /* Safely get one byte from the source text pointed by SRC which ends at SRC_END, and set C to that byte. If there are not enough bytes @@ -1485,8 +1484,7 @@ decode_coding_utf_8 (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); - coding->errors++; + *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); } no_more_source: @@ -1685,7 +1683,6 @@ decode_coding_utf_16 (struct coding_system *coding) /* The first two bytes are not BOM. Treat them as bytes for a normal character. */ src = src_base; - coding->errors++; } CODING_UTF_16_BOM (coding) = utf_without_bom; } @@ -1725,7 +1722,7 @@ decode_coding_utf_16 (struct coding_system *coding) ONE_MORE_BYTE (c2); if (c2 < 0) { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); *charbuf++ = -c2; continue; } @@ -1742,7 +1739,6 @@ decode_coding_utf_16 (struct coding_system *coding) c1 = surrogate & 0xFF, c2 = surrogate >> 8; *charbuf++ = c1; *charbuf++ = c2; - coding->errors++; if (UTF_16_HIGH_SURROGATE_P (c)) CODING_UTF_16_SURROGATE (coding) = surrogate = c; else @@ -2108,7 +2104,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, case 1: code = c; - charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; + charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit; break; default: @@ -2596,9 +2592,8 @@ decode_coding_emacs_mule (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; - coding->errors++; } no_more_source: @@ -3591,7 +3586,7 @@ decode_coding_iso_2022 (struct coding_system *coding) if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0) { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); char_offset++; CODING_ISO_EXTSEGMENT_LEN (coding)--; continue; @@ -3618,7 +3613,7 @@ decode_coding_iso_2022 (struct coding_system *coding) } else { - *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1); char_offset++; } continue; @@ -3992,7 +3987,7 @@ decode_coding_iso_2022 (struct coding_system *coding) MAYBE_FINISH_COMPOSITION (); for (; src_base < src; src_base++, char_offset++) { - if (ASCII_BYTE_P (*src_base)) + if (ASCII_CHAR_P (*src_base)) *charbuf++ = *src_base; else *charbuf++ = BYTE8_TO_CHAR (*src_base); @@ -4022,9 +4017,8 @@ decode_coding_iso_2022 (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; - coding->errors++; /* Reset the invocation and designation status to the safest one; i.e. designate ASCII to the graphic register 0, and invoke that register to the graphic plane 0. This typically @@ -4855,7 +4849,6 @@ decode_coding_sjis (struct coding_system *coding) ONE_MORE_BYTE (c); *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); char_offset++; - coding->errors++; } no_more_source: @@ -4951,7 +4944,6 @@ decode_coding_big5 (struct coding_system *coding) ONE_MORE_BYTE (c); *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); char_offset++; - coding->errors++; } no_more_source: @@ -5658,9 +5650,8 @@ decode_coding_charset (struct coding_system *coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; - coding->errors++; } no_more_source: @@ -6893,6 +6884,11 @@ decode_eol (struct coding_system *coding) } +/* MAX_LOOKUP's maximum value. MAX_LOOKUP is an int and so cannot + exceed INT_MAX. Also, MAX_LOOKUP is multiplied by sizeof (int) for + alloca, so it cannot exceed MAX_ALLOCA / sizeof (int). */ +enum { MAX_LOOKUP_MAX = min (INT_MAX, MAX_ALLOCA / sizeof (int)) }; + /* Return a translation table (or list of them) from coding system attribute vector ATTRS for encoding (if ENCODEP) or decoding (if not ENCODEP). */ @@ -6945,7 +6941,7 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup) { val = XCHAR_TABLE (translation_table)->extras[1]; if (NATNUMP (val) && *max_lookup < XFASTINT (val)) - *max_lookup = XFASTINT (val); + *max_lookup = min (XFASTINT (val), MAX_LOOKUP_MAX); } else if (CONSP (translation_table)) { @@ -6957,7 +6953,7 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup) { Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1]; if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval)) - *max_lookup = XFASTINT (tailval); + *max_lookup = min (XFASTINT (tailval), MAX_LOOKUP_MAX); } } } @@ -7040,8 +7036,10 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, int *buf = coding->charbuf; int *buf_end = buf + coding->charbuf_used; - if (EQ (coding->src_object, coding->dst_object)) + if (EQ (coding->src_object, coding->dst_object) + && ! NILP (coding->dst_object)) { + eassert (growable_destination (coding)); coding_set_source (coding); dst_end = ((unsigned char *) coding->source) + coding->consumed; } @@ -7080,6 +7078,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars) { + eassert (growable_destination (coding)); if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf)) / MAX_MULTIBYTE_LENGTH) < to_nchars) @@ -7124,7 +7123,10 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, const unsigned char *src_end = src + coding->consumed; if (EQ (coding->dst_object, coding->src_object)) - dst_end = (unsigned char *) src; + { + eassert (growable_destination (coding)); + dst_end = (unsigned char *) src; + } if (coding->src_multibyte != coding->dst_multibyte) { if (coding->src_multibyte) @@ -7140,6 +7142,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, ONE_MORE_BYTE (c); if (dst == dst_end) { + eassert (growable_destination (coding)); if (EQ (coding->src_object, coding->dst_object)) dst_end = (unsigned char *) src; if (dst == dst_end) @@ -7170,6 +7173,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, if (dst >= dst_end - 1) { + eassert (growable_destination (coding)); if (EQ (coding->src_object, coding->dst_object)) dst_end = (unsigned char *) src; if (dst >= dst_end - 1) @@ -7283,16 +7287,20 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) coding->dst_object); } +#define MAX_CHARBUF_SIZE 0x4000 +/* How many units decoding functions expect in coding->charbuf at + most. Currently, decode_coding_emacs_mule expects the following + size, and that is the largest value. */ +#define MAX_CHARBUF_EXTRA_SIZE ((MAX_ANNOTATION_LENGTH * 3) + 1) -#define CHARBUF_SIZE 0x4000 - -#define ALLOC_CONVERSION_WORK_AREA(coding) \ - do { \ - coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ - coding->charbuf_size = CHARBUF_SIZE; \ +#define ALLOC_CONVERSION_WORK_AREA(coding, size) \ + do { \ + ptrdiff_t units = min ((size) + MAX_CHARBUF_EXTRA_SIZE, \ + MAX_CHARBUF_SIZE); \ + coding->charbuf = SAFE_ALLOCA (units * sizeof (int)); \ + coding->charbuf_size = units; \ } while (0) - static void produce_annotation (struct coding_system *coding, ptrdiff_t pos) { @@ -7389,9 +7397,8 @@ decode_coding (struct coding_system *coding) coding->produced = coding->produced_char = 0; coding->chars_at_source = 0; record_conversion_result (coding, CODING_RESULT_SUCCESS); - coding->errors = 0; - ALLOC_CONVERSION_WORK_AREA (coding); + ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes); attrs = CODING_ID_ATTRS (coding->id); translation_table = get_translation_table (attrs, 0, NULL); @@ -7785,9 +7792,8 @@ encode_coding (struct coding_system *coding) coding->consumed = coding->consumed_char = 0; coding->produced = coding->produced_char = 0; record_conversion_result (coding, CODING_RESULT_SUCCESS); - coding->errors = 0; - ALLOC_CONVERSION_WORK_AREA (coding); + ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars); if (coding->encoder == encode_coding_ccl) { @@ -8461,11 +8467,11 @@ from_unicode (Lisp_Object str) } Lisp_Object -from_unicode_buffer (const wchar_t* wstr) +from_unicode_buffer (const wchar_t *wstr) { return from_unicode ( make_unibyte_string ( - (char*) wstr, + (char *) wstr, /* we get one of the two final 0 bytes for free. */ 1 + sizeof (wchar_t) * wcslen (wstr))); } @@ -9049,13 +9055,13 @@ DEFUN ("find-coding-systems-region-internal", p = pbeg = BYTE_POS_ADDR (start_byte); pend = p + (end_byte - start_byte); - while (p < pend && ASCII_BYTE_P (*p)) p++; - while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; + while (p < pend && ASCII_CHAR_P (*p)) p++; + while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--; work_table = Fmake_char_table (Qnil, Qnil); while (p < pend) { - if (ASCII_BYTE_P (*p)) + if (ASCII_CHAR_P (*p)) p++; else { @@ -9109,8 +9115,7 @@ DEFUN ("find-coding-systems-region-internal", DEFUN ("unencodable-char-position", Funencodable_char_position, Sunencodable_char_position, 3, 5, 0, - doc: /* -Return position of first un-encodable character in a region. + doc: /* Return position of first un-encodable character in a region. START and END specify the region and CODING-SYSTEM specifies the encoding to check. Return nil if CODING-SYSTEM does encode the region. @@ -9120,8 +9125,9 @@ list of positions. If optional 5th argument STRING is non-nil, it is a string to search for un-encodable characters. In that case, START and END are indexes -to the string. */) - (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string) +to the string and treated as in `substring'. */) + (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, + Lisp_Object count, Lisp_Object string) { EMACS_INT n; struct coding_system coding; @@ -9158,12 +9164,7 @@ to the string. */) else { CHECK_STRING (string); - CHECK_NATNUM (start); - CHECK_NATNUM (end); - if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string))) - args_out_of_range_3 (string, start, end); - from = XINT (start); - to = XINT (end); + validate_subarray (string, start, end, SCHARS (string), &from, &to); if (! STRING_MULTIBYTE (string)) return Qnil; p = SDATA (string) + string_char_to_byte (string, from); @@ -9187,7 +9188,7 @@ to the string. */) int c; if (ascii_compatible) - while (p < stop && ASCII_BYTE_P (*p)) + while (p < stop && ASCII_CHAR_P (*p)) p++, from++; if (p >= stop) { @@ -9303,12 +9304,12 @@ is nil. */) p = pbeg = BYTE_POS_ADDR (start_byte); pend = p + (end_byte - start_byte); - while (p < pend && ASCII_BYTE_P (*p)) p++, pos++; - while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; + while (p < pend && ASCII_CHAR_P (*p)) p++, pos++; + while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--; while (p < pend) { - if (ASCII_BYTE_P (*p)) + if (ASCII_CHAR_P (*p)) p++; else { @@ -9616,7 +9617,7 @@ Return the corresponding character. */) CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); attrs = AREF (spec, 0); - if (ASCII_BYTE_P (ch) + if (ASCII_CHAR_P (ch) && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) return code; @@ -9697,7 +9698,7 @@ Return the corresponding character. */) CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); attrs = AREF (spec, 0); - if (ASCII_BYTE_P (ch) + if (ASCII_CHAR_P (ch) && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) return code; @@ -9758,7 +9759,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern doc: /* Internal use only. */) (Lisp_Object coding_system, Lisp_Object terminal) { - struct terminal *term = get_terminal (terminal, 1); + struct terminal *term = decode_live_terminal (terminal); struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term); CHECK_SYMBOL (coding_system); setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding); @@ -9799,7 +9800,7 @@ frame's terminal device. */) (Lisp_Object terminal) { struct coding_system *terminal_coding - = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1)); + = TERMINAL_TERMINAL_CODING (decode_live_terminal (terminal)); Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id); /* For backward compatibility, return nil if it is `undecided'. */ @@ -9811,7 +9812,7 @@ DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_intern doc: /* Internal use only. */) (Lisp_Object coding_system, Lisp_Object terminal) { - struct terminal *t = get_terminal (terminal, 1); + struct terminal *t = decode_live_terminal (terminal); CHECK_SYMBOL (coding_system); if (NILP (coding_system)) coding_system = Qno_conversion; @@ -9830,7 +9831,7 @@ DEFUN ("keyboard-coding-system", (Lisp_Object terminal) { return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING - (get_terminal (terminal, 1))->id); + (decode_live_terminal (terminal))->id); } @@ -10040,7 +10041,8 @@ make_subsidiaries (Lisp_Object base) { Lisp_Object subsidiaries; ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base)); - char *buf = alloca (base_name_len + 6); + USE_SAFE_ALLOCA; + char *buf = SAFE_ALLOCA (base_name_len + 6); int i; memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); @@ -10050,6 +10052,7 @@ make_subsidiaries (Lisp_Object base) strcpy (buf + base_name_len, suffixes[i]); ASET (subsidiaries, i, intern (buf)); } + SAFE_FREE (); return subsidiaries; } |