diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2011-09-24 18:22:30 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2011-09-24 18:22:30 -0700 |
commit | 3c7649c1859d6252444044fd64c7b27d8e487f68 (patch) | |
tree | c36db71bf71b503000c025d9586fb962e88cecb9 /src/charset.c | |
parent | 7c85f529fd12a8cb12eb85944877d0a52b6380a7 (diff) | |
download | emacs-3c7649c1859d6252444044fd64c7b27d8e487f68.tar.gz |
* charset.c: Integer overflow fixes.
Don't rely on undefined behavior with signed left shift overflow.
Don't assume unsigned int fits into fixnum, or that fixnum fits
into unsigned int. Don't require max_code to be a valid fixnum;
that's not true for gb10830 4-byte on a 32-bit host. Allow
invalid_code to be a cons, for the same reason. Require code_offset
to be a character. Avoid int overflow if max_char is close
to INT_MAX.
(CODE_POINT_TO_INDEX): On 32-bit hosts, return int, not unsigned;
this is intended anyway and avoids some undefined behavior.
(load_charset_map): Pass unsigned, not int, as 2nd arg of
INDEX_TO_CODE_POINT, as that's what it expects.
(Funify_charset, encode_char): Don't stuff unsigned vals into int vars.
Diffstat (limited to 'src/charset.c')
-rw-r--r-- | src/charset.c | 61 |
1 files changed, 31 insertions, 30 deletions
diff --git a/src/charset.c b/src/charset.c index 9d58d29d05c..2451c55e92a 100644 --- a/src/charset.c +++ b/src/charset.c @@ -118,24 +118,25 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; #define CODE_POINT_TO_INDEX(charset, code) \ ((charset)->code_linear_p \ - ? (code) - (charset)->min_code \ + ? (int) ((code) - (charset)->min_code) \ : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ - ? (((((code) >> 24) - (charset)->code_space[12]) \ - * (charset)->code_space[11]) \ - + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ - * (charset)->code_space[7]) \ - + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ - * (charset)->code_space[3]) \ - + (((code) & 0xFF) - (charset)->code_space[0]) \ - - ((charset)->char_index_offset)) \ + ? (int) (((((code) >> 24) - (charset)->code_space[12]) \ + * (charset)->code_space[11]) \ + + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ + * (charset)->code_space[7]) \ + + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ + * (charset)->code_space[3]) \ + + (((code) & 0xFF) - (charset)->code_space[0]) \ + - ((charset)->char_index_offset)) \ : -1) -/* Convert the character index IDX to code-point CODE for CHARSET. - It is assumed that IDX is in a valid range. */ +/* Return the code-point for the character index IDX in CHARSET. + IDX should be an unsigned int variable in a valid range (which is + always in nonnegative int range too). IDX contains garbage afterwards. */ #define INDEX_TO_CODE_POINT(charset, idx) \ ((charset)->code_linear_p \ @@ -363,7 +364,8 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, && CHARSET_COMPACT_CODES_P (charset)) for (; from_index < lim_index; from_index++, from_c++) { - unsigned code = INDEX_TO_CODE_POINT (charset, from_index); + unsigned code = from_index; + code = INDEX_TO_CODE_POINT (charset, code); if (NILP (CHAR_TABLE_REF (table, from_c))) CHAR_TABLE_SET (table, from_c, make_number (code)); @@ -923,11 +925,11 @@ usage: (define-charset-internal ...) */) charset.min_code = (charset.code_space[0] | (charset.code_space[4] << 8) | (charset.code_space[8] << 16) - | (charset.code_space[12] << 24)); + | ((unsigned) charset.code_space[12] << 24)); charset.max_code = (charset.code_space[1] | (charset.code_space[5] << 8) | (charset.code_space[9] << 16) - | (charset.code_space[13] << 24)); + | ((unsigned) charset.code_space[13] << 24)); charset.char_index_offset = 0; val = args[charset_arg_min_code]; @@ -937,8 +939,8 @@ usage: (define-charset-internal ...) */) if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); charset.min_code = code; } @@ -950,8 +952,8 @@ usage: (define-charset-internal ...) */) if (code < charset.min_code || code > charset.max_code) - args_out_of_range_3 (make_number (charset.min_code), - make_number (charset.max_code), val); + args_out_of_range_3 (make_fixnum_or_float (charset.min_code), + make_fixnum_or_float (charset.max_code), val); charset.max_code = code; } @@ -964,14 +966,14 @@ usage: (define-charset-internal ...) */) charset.invalid_code = 0; else { - if (charset.max_code < min (UINT_MAX, MOST_POSITIVE_FIXNUM)) + if (charset.max_code < UINT_MAX) charset.invalid_code = charset.max_code + 1; else error ("Attribute :invalid-code must be specified"); } } else - charset.invalid_code = XFASTINT (val); + charset.invalid_code = cons_to_unsigned (val, UINT_MAX); val = args[charset_arg_iso_final]; if (NILP (val)) @@ -1015,17 +1017,17 @@ usage: (define-charset-internal ...) */) if (! NILP (args[charset_arg_code_offset])) { val = args[charset_arg_code_offset]; - CHECK_TYPE_RANGED_INTEGER (int, val); + CHECK_CHARACTER (val); charset.method = CHARSET_METHOD_OFFSET; charset.code_offset = XINT (val); - i = CODE_POINT_TO_INDEX (&charset, charset.min_code); - charset.min_char = i + charset.code_offset; i = CODE_POINT_TO_INDEX (&charset, charset.max_code); - charset.max_char = i + charset.code_offset; - if (charset.max_char > MAX_CHAR) + if (MAX_CHAR - charset.code_offset < i) error ("Unsupported max char: %d", charset.max_char); + charset.max_char = i + charset.code_offset; + i = CODE_POINT_TO_INDEX (&charset, charset.min_code); + charset.min_char = i + charset.code_offset; i = (charset.min_char >> 7) << 7; for (; i < 0x10000 && i <= charset.max_char; i += 128) @@ -1385,8 +1387,8 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) } else if (CHAR_TABLE_P (Vchar_unify_table)) { - int min_code = CHARSET_MIN_CODE (cs); - int max_code = CHARSET_MAX_CODE (cs); + unsigned min_code = CHARSET_MIN_CODE (cs); + unsigned max_code = CHARSET_MAX_CODE (cs); int min_char = DECODE_CHAR (cs, min_code); int max_char = DECODE_CHAR (cs, max_code); @@ -1830,9 +1832,8 @@ encode_char (struct charset *charset, int c) } else /* method == CHARSET_METHOD_OFFSET */ { - int code_index = c - CHARSET_CODE_OFFSET (charset); - - code = INDEX_TO_CODE_POINT (charset, code_index); + code = c - CHARSET_CODE_OFFSET (charset); + code = INDEX_TO_CODE_POINT (charset, code); } return code; |