summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.gdbinit25
-rw-r--r--src/ChangeLog5
-rw-r--r--src/ChangeLog.unicode3866
-rw-r--r--src/Makefile.in232
-rw-r--r--src/abbrev.c14
-rw-r--r--src/alloc.c48
-rw-r--r--src/buffer.c42
-rw-r--r--src/buffer.h23
-rw-r--r--src/bytecode.c17
-rw-r--r--src/callproc.c202
-rw-r--r--src/casefiddle.c222
-rw-r--r--src/casetab.c88
-rw-r--r--src/category.c220
-rw-r--r--src/category.h23
-rw-r--r--src/ccl.c556
-rw-r--r--src/ccl.h33
-rw-r--r--src/character.c998
-rw-r--r--src/character.h662
-rw-r--r--src/charset.c3230
-rw-r--r--src/charset.h1312
-rw-r--r--src/chartab.c975
-rw-r--r--src/cmds.c21
-rw-r--r--src/coding.c12563
-rw-r--r--src/coding.h986
-rw-r--r--src/composite.c148
-rw-r--r--src/composite.h54
-rw-r--r--src/config.in12
-rw-r--r--src/data.c174
-rw-r--r--src/dired.c1
-rw-r--r--src/dispextern.h64
-rw-r--r--src/dispnew.c16
-rw-r--r--src/disptab.h10
-rw-r--r--src/doc.c2
-rw-r--r--src/doprnt.c2
-rw-r--r--src/dosfns.c2
-rw-r--r--src/editfns.c183
-rw-r--r--src/emacs.c33
-rw-r--r--src/fileio.c578
-rw-r--r--src/filelock.c2
-rw-r--r--src/fns.c850
-rw-r--r--src/font.c3357
-rw-r--r--src/font.h509
-rw-r--r--src/fontset.c2417
-rw-r--r--src/fontset.h79
-rw-r--r--src/frame.c64
-rw-r--r--src/frame.h18
-rw-r--r--src/fringe.c4
-rw-r--r--src/ftfont.c912
-rw-r--r--src/ftxfont.c349
-rw-r--r--src/indent.c6
-rw-r--r--src/insdel.c59
-rw-r--r--src/intervals.c2
-rw-r--r--src/intervals.h2
-rw-r--r--src/keyboard.c19
-rw-r--r--src/keymap.c456
-rw-r--r--src/lisp.h256
-rw-r--r--src/lread.c728
-rw-r--r--src/macfns.c8
-rw-r--r--src/macgui.h2
-rw-r--r--src/macterm.c135
-rw-r--r--src/makefile.w32-in68
-rw-r--r--src/marker.c2
-rw-r--r--src/minibuf.c29
-rw-r--r--src/msdos.c10
-rw-r--r--src/print.c154
-rw-r--r--src/process.c148
-rw-r--r--src/regex.c547
-rw-r--r--src/regex.h10
-rw-r--r--src/search.c180
-rw-r--r--src/syntax.c996
-rw-r--r--src/syntax.h43
-rw-r--r--src/term.c162
-rw-r--r--src/w16select.c2
-rw-r--r--src/w32bdf.c6
-rw-r--r--src/w32console.c2
-rw-r--r--src/w32fns.c173
-rw-r--r--src/w32select.c165
-rw-r--r--src/w32term.c307
-rw-r--r--src/w32term.h2
-rw-r--r--src/window.c8
-rw-r--r--src/xdisp.c684
-rw-r--r--src/xfaces.c1064
-rw-r--r--src/xfns.c205
-rw-r--r--src/xfont.c851
-rw-r--r--src/xftfont.c545
-rw-r--r--src/xmenu.c2
-rw-r--r--src/xterm.c674
-rw-r--r--src/xterm.h11
88 files changed, 30853 insertions, 14073 deletions
diff --git a/src/.gdbinit b/src/.gdbinit
index 564b3762bd7..60730536ae9 100644
--- a/src/.gdbinit
+++ b/src/.gdbinit
@@ -731,7 +731,7 @@ define xchartable
print (struct Lisp_Char_Table *) $ptr
printf "Purpose: "
xprintsym $->purpose
- printf " %d extra slots", ($->size & 0x1ff) - 388
+ printf " %d extra slots", ($->size & 0x1ff) - 68
echo \n
end
document xchartable
@@ -968,6 +968,29 @@ document xprintsym
Print argument as a symbol.
end
+define xcoding
+ set $tmp = (struct Lisp_Hash_Table *) ((Vcoding_system_hash_table & $valmask) | gdb_data_seg_bits)
+ set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits)
+ set $name = $tmp->contents[$arg0 * 2]
+ print $name
+ pr
+ print $tmp->contents[$arg0 * 2 + 1]
+ pr
+end
+document xcoding
+ Print the name and attributes of coding system that has ID (argument).
+end
+
+define xcharset
+ set $tmp = (struct Lisp_Hash_Table *) ((Vcharset_hash_table & $valmask) | gdb_data_seg_bits)
+ set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits)
+ p $tmp->contents[$arg0->hash_index * 2]
+ pr
+end
+document xcharset
+ Print the name of charset that has ID (argument).
+end
+
define xbacktrace
set $bt = backtrace_list
while $bt
diff --git a/src/ChangeLog b/src/ChangeLog
index 70d8f0b9548..1ea2005e0c4 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -9554,11 +9554,6 @@
* regex.c (re_error_msgid): Add an entry for REG_ERANGEX.
(regex_compile): Return REG_ERANGEX if appropriate.
-2004-10-22 Kenichi Handa <handa@m17n.org>
-
- * editfns.c (Ftranslate_region_internal): New function.
- (syms_of_editfns): Defsubr it.
-
2004-10-22 Jan Dj,Ad(Brv <jan.h.d@swipnet.se>
* xfns.c (xic_create_xfontset): Initialize missing_list to NULL.
diff --git a/src/ChangeLog.unicode b/src/ChangeLog.unicode
new file mode 100644
index 00000000000..7af2aa0784c
--- /dev/null
+++ b/src/ChangeLog.unicode
@@ -0,0 +1,3866 @@
+2006-07-18 Miles Bader <miles@gnu.org>
+
+ * character.h (CHECK_CHARACTER): Redefine in terms of CHECK_TYPE.
+
+2006-07-14 Kenichi Handa <handa@m17n.org>
+
+ * font.h (LGLYPH_XOFF, LGLYPH_YOFF, LGLYPH_WIDTH, LGLYPH_WADJUST)
+ (LGLYPH_SET_WIDTH): Adjusted for the change of LGLYPH format.
+ (LGLYPH_ADJUSTMENT, LGLYPH_SET_ADJUSTMENT): New macros.
+
+ * font.c (font_merge_old_spec): Treat '*' in foundry as a wild
+ card.
+ (DEVICE_DELTA): Fix typo.
+ (font_otf_gpos): Adjusted for the change of LGLYPH format.
+ (font_prepare_composition): Likewise.
+
+ * xterm.c (x_draw_composite_glyph_string_foreground): Adjusted for
+ the change of LGLYPH format.
+
+2006-07-07 Kenichi Handa <handa@m17n.org>
+
+ * ftfont.c (ftfont_list): Fix typo.
+ (ftfont_build_basic_charsets): Don't include letters with
+ diactrics.
+
+2006-07-09 Jan Dj,Ad(Brv <jan.h.d@swipnet.se>
+
+ * xfaces.c (realize_non_ascii_face): Set face->extra to NULL.
+
+ * xftfont.c (xftfont_done_face): Call XftDrawDestroy only if
+ xftface_info is non-NULL.
+
+2006-07-07 Kenichi Handa <handa@m17n.org>
+
+ * ftfont.c (ftfont_list): Fix typo.
+ (ftfont_build_basic_charsets): Don't include letters with
+ diactrics.
+
+2006-07-05 Jan Dj,Ad(Brv <jan.h.d@swipnet.se>
+
+ * ftfont.c (ftfont_list): Move misplaced #endif
+
+2006-07-05 Kenichi Handa <handa@m17n.org>
+
+ * ftfont.c (ftfont_list): Pay attention to the case that
+ FC_CAPABILITY is not defined.
+
+2006-07-03 Kenichi Handa <handa@m17n.org>
+
+ * xftfont.c (xftfont_open): Set charset related members to -1.
+
+ * ftfont.c (ftfont_list): Handle QCotf property. Handling of
+ QCname fixed.
+ (ftfont_open): Set charset related members to -1.
+
+ * fontset.c (Votf_script_alist): New variable.
+ (syms_of_fontset): Initialize it.
+ (fontset_font): Delete unused variable.
+
+ * fontset.h (Votf_script_alist): Extern it.
+
+ * font.c (font_find_for_lface): Code optimized.
+
+ * font.h (font_close_object, font_merge_old_spec): Extern them.
+
+2006-06-28 Kenichi Handa <handa@m17n.org>
+
+ * font.c (QCscalable, Qc, Qm, Qp, Qd): New variables.
+ (syms_of_font): Initialize them.
+ (font_pixel_size): Allow float value in dpi.
+ (font_prop_validate_type): Deleted.
+ (font_prop_validate_symbol, font_prop_validate_style): Argument
+ changed. Caller changed.
+ (font_prop_validate_non_neg): Renamed from
+ font_prop_validate_size.
+ (font_prop_validate_extra): Deleted.
+ (font_prop_validate_spacing): New function.
+ (font_property_table): Add elements for all known properties.
+ (get_font_prop_index): Renamed from check_font_prop_name. New
+ argument FROM. Caller changed.
+ (font_prop_validate): Validate all known properties.
+ (font_put_extra): Argument force deleted. Caller changed.
+ (font_expand_wildcards): Make it static. Fix the way of shrinking
+ the possible range.
+ (font_parse_xlfd): Arguemnt merge deleted. Fix handling of RESX,
+ RESY, SPACING, and AVGWIDTH. Don't validate property values here.
+ Caller changed.
+ (font_unparse_xlfd): Handle dpi, spacing, and scalable properties.
+ (font_parse_fcname): Arguemnt merge deleted. Fix parsing of point
+ size. Don't validate properties values here. Caller changed.
+ (font_unparse_fcname): Handle dpi, spacing, and scalable
+ properties.
+ (font_open_by_name): Delete unused variable.
+ (Ffont_spec): Likewise. Validate property values.
+ (Ffont_match_p): New function.
+
+ * font.h (QCscalable): Extern it.
+ (font_parse_xlfd, font_parse_fcname): Prototype adjusted.
+
+ * ftfont.c (ftfont_list): Handle properties dpi, spacing, and
+ scalable.
+
+ * xfont.c (xfont_query_font): Adjusted for the change of
+ font_parse_xlfd.
+ (xfont_list_pattern): New function.
+ (xfont_list): Use xfont_list_pattern.
+
+ * xftfont.c (xftfont_prepare_face): Cancel previous change.
+ (xftfont_done_face): Likewise.
+
+2006-06-26 Kenichi Handa <handa@m17n.org>
+
+ * font.h (Flist_fonts): EXFUN it.
+
+2006-06-25 Jason Rumney <jasonr@gnu.org>
+
+ * w32term.c (w32_initialize): Add back smoothing_type and
+ smoothing_enabled definitions.
+
+2006-06-23 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_draw_glyph_string) [USE_FONT_BACKEND]: Check
+ s->face->font on determining underline position.
+
+2006-06-21 Kenichi Handa <handa@m17n.org>
+
+ * font.c (font_parse_xlfd): Fix generating of CHARSET_REGISTRY field.
+ (font_has_char): Accept font-object too.
+ (font_find_for_lface): Try at first with a size specified in face.
+
+ * xftfont.c (xftfont_prepare_face): Make non-ascii face share
+ face->extra with ascii face.
+ (xftfont_done_face): Don't free face->extra of non-ascii face.
+
+2006-06-20 Kenichi Handa <handa@m17n.org>
+
+ * frame.c (x_set_font) [USE_FONT_BACKEND]: Fix argument to
+ font_open_by_name.
+
+2006-06-19 Kenichi Handa <handa@m17n.org>
+
+ * font.h (QCspacing, QCdpi): Extern them.
+ (enum font_spacing): New enum.
+ (FONT_PIXEL_SIZE_QUANTUM): New macro.
+
+ * font.c (POINT_TO_PIXEL): Don't divice POINT by 10.
+ (QCspacing, QCdpi): New variables.
+ (syms_of_font): Initialize them.
+ (font_pixel_size): New function.
+ (font_put_extra): New function.
+ (font_parse_xlfd): Fix handling of font size. Add QCdpi property
+ in FONT_EXTRA.
+ (font_parse_fcname): Handle enumenrated values (e.g. bold). Fix
+ handling font size. Add QCname property that contains only
+ unknown properties.
+ (font_score): Change argument. Caller changed. Pay attention to
+ FONT_PIXEL_SIZE_QUANTUM.
+ (font_sort_entites): Fix handling of font size.
+ (font_list_entities): Likewise.
+ (font_find_for_lface): Likewise.
+ (font_open_for_lface): Likewise.
+ (font_open_by_name): Likewise.
+ (Ffont_spec): Add QCname property that contains only unknown
+ properties.
+
+ * ftfont.c (ftfont_list): Use assq_no_quit, not Fassq. Don't
+ include weight in listing pattern, instead check weight of each
+ listed font. Don't include scalable in pattern. Pay attention to
+ FONT_PIXEL_SIZE_QUANTUM.
+
+2006-06-19 Kenichi Handa <handa@m17n.org>
+
+ * lread.c (read_escape): Fix the code synched with HEAD.
+
+ * font.c (font_parse_fcname): Fix parsing of point-size.
+ (font_unparse_fcname): Produce symbolic names for style
+ properties.
+ (font_list_entities): Handle float size correctly.
+ (font_open_by_name): Prefer `normal' property values if the name
+ doesn't specify them.
+
+ * fontset.c (Finternal_char_font): Use font_get_name, not
+ Ffont_xlfd_name.
+
+ * ftfont.c (ftfont_pattern_entity): Use the numeric value 100 for
+ FC_WEIGHT_REGULAR. Exclude FC_SIZE and FC_PIXEL_SIZE from listing
+ pattern. Don't force scalable.
+
+ * xftfont.c (xftfont_open): For generating a name, start from
+ 96-byte buffer.
+
+2006-06-16 Jan Dj,Ad(Brv <jan.h.d@swipnet.se>
+
+ * frame.h (x_new_fontset2): Fix prototype.
+
+2006-06-16 Kenichi Handa <handa@m17n.org>
+
+ * font.h (struct font_driver): Member parse_name deleted.
+ (font_match_p, font_get_spec, font_parse_fcname)
+ (font_unparse_fcname): Extern them.
+ (font_get_name): Prototype adjusted.
+
+ * font.c (XLFD_SMALLNUM_MASK): Delete this macro.
+ (XLFD_LARGENUM_MASK): Delete XLFD_ENCODING_MASK from it.
+ (font_expand_wildcards): Fix handling ENCODING field. Avoid
+ unnecessary checks for weight, slant, and swidth.
+ (font_parse_fcname): New function.
+ (font_unparse_fcname): New function.
+ (font_parse_name): New function.
+ (font_match_p): New function.
+ (font_get_name): Return value changed to Lisp string.
+ (font_get_spec): New function.
+ (Qunspecified, Qignore_defface): Don't extern them.
+ (font_find_for_lface): Assume that LFACE is fully specified.
+ (font_load_for_face): If lface[LFACE_FONT_INDEX] is an font
+ object, use it for FACE.
+ (font_open_by_name): Call Ffont_spec with QCname prop. Don't call
+ driver->parse_name.
+ (Ffont_spec): Call font_parse_name, not font_parse_xlfd.
+
+ * fontset.h (new_fontset_from_font) [USE_FONT_BACKEND]: Prototype
+ adjusted.
+
+ * fontset.c (new_fontset_from_font) [USE_FONT_BACKEND]: Argument F
+ deleted. Don't call Fnew_fontset. Instead, directly call
+ make_fontset.
+
+ * frame.h (x_new_fontset2) [USE_FONT_BACKEND]: Prototype adjusted.
+
+ * frame.c (x_set_font) [USE_FONT_BACKEND]: Adjusted for the change
+ of x_new_fontset2.
+
+ * ftfont.c (Qmonospace, Qsans_serif, Qserif, Qmono, Qsans)
+ (Qsans__serif): New variables.
+ (ftfont_generic_family_list): New variable.
+ (syms_of_ftfont): Initialize the above variables.
+ (ftfont_pattern_entity): Argument NAME deleted.
+ (ftfont_list_generic_family): New function.
+ (ftfont_parse_name): Delete this function.
+ (ftfont_list): Try generic family only when FcFontList found no
+ font.
+ (ftfont_list_family): Fix args to FcObjectSetBuild.
+
+ * xfaces.c (check_lface_attrs) [USE_FONT_BACKEND]: Accept font
+ object in attrs[LFACE_FONT_INDEX].
+ (set_lface_from_font_name): Cancel all changes for font-backend.
+ (set_lface_from_font_and_fontset) [USE_FONT_BACKEND]: New
+ function.
+ (Finternal_set_lisp_face_attribute) [USE_FONT_BACKEND]: Accept a
+ font object in QCfont attribute.
+ (set_font_frame_param) [USE_FONT_BACKEND]: Likewise.
+ (realize_default_face) [USE_FONT_BACKEND]: Call
+ set_lface_from_font_and_fontset.
+
+ * xfns.c (x_default_font_parameter) [USE_FONT_BACKEND]: Try also
+ "fixed", and signal error here if no suitable font was found.
+
+ * xfont.c (xfont_parse_name): Delete this function.
+
+ * xftfont.c (xftfont_open): Change coding style of error
+ handling. Generate fontconfig's fontname pattern.
+
+ * xterm.h (struct x_output) [USE_FONT_BACKEND]: New member fontp.
+ (FRAME_FONT_OBJECT) [USE_FONT_BACKEND]: New macro.
+
+ * xterm.c (x_new_fontset2) [USE_FONT_BACKEND]: Change arguments.
+ Both args FONTSET and FONT_OBJECT must be existing ones.
+
+2006-06-16 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macterm.c (mac_set_unicode_keystroke_event): Don't use MAKE_CHAR.
+
+2006-06-14 Kenichi Handa <handa@m17n.org>
+
+ * xfont.c (xfont_open, xfont_encode_char): Fix typo.
+
+ * font.h (struct font): Fix typo.
+
+ * font.c (enum xlfd_field_index): Rename XLFD_XXX_SIZE_INDEX to
+ XLFD_XXX_INDEX.
+ (enum xlfd_field_mask): New enum.
+ (intern_font_field): Argument changed. Caller changed. If digits
+ are followed by non-digits, return a symbol.
+ (font_expand_wildcards): New function.
+ (font_parse_xlfd): Fix wildcard handling.
+ (Ffont_spec): If :name is specified, reflect the info in the other
+ properties.
+
+ * ftfont.c (ftfont_pattern_entity): Fix typo.
+ (ftfont_list): Enforce FC_LANG in PATTERN to cancel the effect of
+ locale.
+
+2006-06-09 Kenichi Handa <handa@m17n.org>
+
+ * font.h (Qiso8859_1, Qiso10646_1, Qunicode_bmp): Extern them.
+
+ * font.c (Qiso8859_1, Qiso10646_1, Qunicode_bmp): Moved from
+ ftfont.c.
+ (font_unparse_xlfd): Fix argument type declaration. Append "*" if
+ registry doesn't specify encoding part.
+ (font_find_for_lface): Pay attention to LFACE_FONT_INDEX.
+ (font_open_by_name): At first try parsing the name.
+ (syms_of_font): Declare Qiso8859_1, Qiso10646_1, and Qunicode_bmp
+ as Lisp symbols.
+
+ * fontset.c (reorder_font_vector): Pay attention to the case that
+ the 3rd element of font_def is nil.
+ (fontset_font): For the default fontset, append one more fontset
+ elements for a script-based font specification. Don't add script
+ attribute on finding a font.
+ (new_fontset_from_font): Unconditionally set FONTSET_ASCII to the
+ font name.
+ (fontset_ascii_font): If a font can't be opened, return nil.
+
+ * ftfont.c (Qiso8859_1, Qiso10646_1, Qunicode_bmp): Moved to
+ font.c.
+ (ftfont_pattern_entity): New function.
+ (ftfont_get_cache): Assume that freetype_font_cache is already
+ initialized.
+ (ftfont_list): Handle the case that a file is specified in font
+ name. Use ftfont_pattern_entity to generate entities.
+ (ftfont_has_char): Check if the pattern contains FC_CHARSET.
+ (syms_of_ftfont): Initialize freetype_font_cache.
+
+ * xftfont.c (xftfont_open): Make the font name fontconfig's
+ style. Add BLOCK_INPUT and UNBLOCK_INPUT.
+ (xftfont_close): Free font->font.name if not NULL.
+
+ * xfont.c (xfont_list): If script is specified for a font, return
+ null_vector.
+ (xfont_list_family): Declare argument type.
+
+ * xfaces.c (set_lface_from_font_name): If a font doesn't have a
+ name, set LFACE_FONT (lface) to nil.
+
+ * xterm.c (x_new_fontset2): If an ASCII font couldn't be loaded,
+ return Qnil.
+
+2006-06-08 Jason Rumney <jasonr@gnu.org>
+
+ * w32term.c (w32_initialize): Manually sync 2006-06-05 change from
+ HEAD.
+
+2006-06-08 Kenichi Handa <handa@m17n.org>
+
+ * emacs.c (main): Check -enable-font-backend arg after the check
+ of -nl.
+ (standard_args): Add "-enable-font-backend".
+
+ * coding.c (Ffind_operation_coding_system): Sync with HEAD.
+
+ * callproc.c (Fcall_process): Sync with HEAD.
+
+ * coding.h (CODING_REQUIRE_ENCODING): Comment sync with HEAD.
+
+2006-06-07 Kenichi Handa <handa@m17n.org>
+
+ * xftfont.c (xftfont_default_fid): Set fid_known to 1.
+ (struct xftdraw_list, xftdraw_list): Delete them.
+ (register_xftdraw, check_xftdraw): Delete them.
+ (xftfont_prepare_face): Don't call register_xftdraw.
+ (xftfont_done_face): Don't call check_xftdraw.
+ (xftfont_draw): Get backroudn color only when with_background is
+ nonzero.
+
+ * xfont.c (xfont_encode_char): Fix calculation of char2b.
+
+2006-06-06 Kenichi Handa <handa@m17n.org>
+
+ These changes are for the new font handling codes.
+
+ * Makefile.in (ALL_CFLAGS): Add @FREETYPE_CFLAGS@,
+ @FONTCONFIG_CFLAGS@, and @LIBOTF_CFLAGS@.
+ (LIB_X11_LIB): If HAVE_XFT is defined, set to @XFT_LIBS@.
+ (FONTSRC, FONTOBJ): New variables.
+ (obj): Add $(FONTOBJ).
+ (SOME_MACHINE_OBJECTS): Lib_X11_Lib.
+ (LIBES): Add @FREETYPE_LIBS@, @FONTCONFIG_LIBS@, and
+ @LIBOTF_LIBS@.
+ (font.o, ftfont.o, xfont.o, xftfont.o, ftxfont.o): New targets.
+ (fontset.o, xdisp.o, xfaces.o, xfns.o, xterm.o): Depends on
+ $(FONTSRC).
+
+ * font.h, font.c, xfont.c, ftfont.c, xftfont.c, ftxfont.c: New
+ files.
+
+ * character.h (Vscript_representative_chars): Extern it.
+
+ * character.c (Vscript_representative_chars): New variable.
+ (syms_of_character): Declare it as a Lisp variable.
+
+ * composite.c (get_composition_id) [USE_FONT_BACKEND]: If
+ enable_font_backend is nonzero, accept the composition method
+ COMPOSITION_WITH_GLYPH_STRING.
+
+ * composite.h (enum composition_method) [USE_FONT_BACKEND]: New
+ enumeration COMPOSITION_WITH_GLYPH_STRING.
+
+ * config.in: Re-generated.
+
+ * dispextern.h (struct glyph_string) [USE_FONT_BACKEND]: New
+ members clip_x, clip_y, clip_width, and clip_height.
+ (struct face) [USE_FONT_BACKEND]: New members font_info and extra.
+
+ * emacs.c (main) [USE_FONT_BACKEND]: Handle arg
+ --enable-font-backend. Call syms_of_font.
+
+ * fns.c (assoc_no_quit): New function.
+
+ * fontset.h (FONT_INFO_FROM_FACE): New macro.
+ (face_for_font, new_fontset_from_font)
+ (fontset_ascii_font) [USE_FONT_BACKEND]: Extern them.
+
+ * fontset.c [USE_FONT_BACKEND]: Include "font.h".
+ (fontset_font, fontset_ascii, face_for_char)
+ (make_fontset_for_ascii_face, Ffont_info)
+ (Finternal_char_font) [USE_FONT_BACKEND]: If enable_font_backend
+ is nonzero, use font-backend mechanism.
+ (find_font_encoding): Make it non-static.
+ (new_fontset_from_font, fontset_ascii_font) [USE_FONT_BACKEND]:
+ New functions.
+
+ * frame.h (struct frame): New members resx and resy.
+ (struct frame) [USE_FONT_BACKEND]: New member font_driver_list.
+ (x_new_fontset2) [USE_FONT_BACKEND]: Extern it.
+
+ * frame.c [USE_FONT_BACKEND]: Include "font.h".
+ (make_frame, x_set_font) [USE_FONT_BACKEND]: Use font-backend
+ mechanism.
+
+ * lisp.h (assoc_no_quit): Extern it.
+
+ * xdisp.c: If USE_FONT_BACKEND is defined, include "font.h".
+ Through out the file, use FONT_INFO_FROM_FACE instead of
+ FONT_INFO_FROM_ID, use get_per_char_metric instead of
+ rif->per_char_metric.
+ (handle_composition_prop) [USE_FONT_BACKEND]: If the composition
+ method is COMPOSITION_WITH_GLYPH_STRING, just set it->c to ' '.
+ (get_glyph_face_and_encoding, fill_composite_glyph_string)
+ (get_char_face_and_encoding, BUILD_COMPOSITE_GLYPH_STRING)
+ (x_produce_glyphs) [USE_FONT_BACKEND]: If enable_font_backend is
+ nonzero, use font-backend mechanism.
+ (get_per_char_metric): New function.
+
+ * xfaces.c [USE_FONT_BACKEND]: Include "font.h".
+ (set_lface_from_font_name)
+ (set_font_frame_param, free_realized_face)
+ (prepare_face_for_display, clear_face_gcs)
+ (Finternal_set_font_selection_order, realize_x_face)
+ [USE_FONT_BACKEND]: If enable_font_backend is nonzero, use
+ font-backend mechanism.
+ (clear_face_cache) [USE_FONT_BACKEND]: Don't call
+ clear_font_table.
+ (load_face_font) [USE_FONT_BACKEND]: Abort.
+ (face_symbolic_value, face_symbolic_weight, face_symbolic_slant)
+ (face_symbolic_swidth, face_for_font) [USE_FONT_BACKEND]: New
+ functions.
+
+ * xfns.c [USE_FONT_BACKEND]: Include "font.h".
+ (x_default_font_parameter) [USE_FONT_BACKEND]: New function.
+ (Fx_create_frame) [USE_FONT_BACKEND]: If enable_font_backend is
+ nonzero, register all available font drivers. Call
+ x_default_font_parameter for deciding a font.
+ (x_create_tip_frame) [USE_FONT_BACKEND]: Likewise.
+
+ * xterm.c [USE_FONT_BACKEND]: Include "font.h".
+ (x_set_mouse_face_gc, x_set_glyph_string_clipping)
+ (x_set_glyph_string_clipping_exactly)
+ (x_compute_glyph_string_overhangs)
+ (x_draw_glyph_string_foreground)
+ (x_draw_composite_glyph_string_foreground, x_draw_glyph_string)
+ (x_free_frame_resources) [USE_FONT_BACKEND]: If
+ enable_font_backend is nonzero, use font-backend mechanism.
+ (x_new_fontset2) [USE_FONT_BACKEND]: New function.
+
+2006-05-15 Kenichi Handa <handa@m17n.org>
+
+ * coding.h (system_eol_type): Fix synching with HEAD.
+
+ * coding.c (system_eol_type): Sync with HEAD.
+ (coding_inherit_eol_type): If PARENT is nil, inherit from
+ system_eol_type.
+ (syms_of_coding): Initialize system_eol_type.
+
+ * callproc.c (Fcall_process): Sync with HEAD.
+
+ * process.c (setup_process_coding_systems): Fix synching with
+ HEAD.
+ (read_process_output): Likewise.
+ (Fset_process_coding_system): Inherit system's eol format if
+ necessary.
+
+ * fileio.c (choose_write_coding_system): Fix synching with HEAD.
+
+ * keymap.c (push_key_description): Fix synching with HEAD.
+
+2006-05-02 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macgui.h (USE_ATSUI): Don't enable on emacs-unicode-2 branch.
+
+2006-04-07 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_eol): Pay attention to buffer relocation in
+ del_range_2.
+ (decode_coding): Call decode_eol before restoring undo_list.
+
+2006-03-20 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fdefine_charset_internal): Fix setting of
+ emacs_mule_bytes.
+
+2006-03-14 Kenichi Handa <handa@m17n.org>
+
+ * keyboard.c (read_char): Check if C is a character or not before
+ looking up Vkeyboard_translate_table.
+
+2006-03-10 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION): Fix
+ condition to terminate the loop.
+
+2006-03-09 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (produce_composition): Compare charbuf[i] instead of
+ args[i] against 0.
+ (Fterminal_coding_system): Use EQ to compare Lisp objects.
+
+2006-03-07 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (DECODE_COMPOSITION_START): If the source is short, set
+ coding->result to CODING_RESULT_INSUFFICIENT_SRC.
+ (decode_coding_gap): Set CODING_MODE_LAST_BLOCK after the call of
+ detect_coding.
+ (emacs_mule_char): Handle old style (Emacs 20) component character
+ of a composition.
+ (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Fix parsing a composition
+ rule.
+ (DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION): Likewise.
+ (decode_coding_emacs_mule): Handle invalid bytes correctly.
+
+2006-03-04 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_ccl): Allocate destination dynamically
+ when necessary.
+
+2006-03-03 Kenichi Handa <handa@m17n.org>
+
+ * ccl.c (Fccl_execute_on_string): Fix the condition of terminating
+ the loop. When quitted, show a proper error message.
+
+2006-03-02 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding): Fix previous change.
+
+ * xterm.c (x_set_glyph_string_clipping_exactly): Set
+ src->clip_head and src->clip_tail temporarily instead of src->hl.
+
+ * ccl.c (CCL_WRITE_STRING): Handle a flag bit for multibyte
+ character sequence.
+ (Fccl_execute_on_string): Use ASET, not XSET.
+
+2006-03-01 Kenichi Handa <handa@m17n.org>
+
+ * search.c (search_buffer): Fix handling of "\\" in a trivial
+ regexp.
+
+2006-02-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding): Fix the condition of terminating the
+ decoding loop.
+
+2006-02-27 Kenichi Handa <handa@m17n.org>
+
+ * data.c (Faset): On setting a character bigger than 255 in a
+ unibyte string, signal an error instead of make the string
+ multibyte.
+
+2006-02-22 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (map_charset_chars): Fix for ascii-compatible charset
+ made by a mapping table.
+
+2006-02-21 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (fill_composite_glyph_string): Check s->face is NULL or
+ not.
+ (BUILD_COMPOSITE_GLYPH_STRING): If C is TAB, set s->face to NULL.
+ (x_produce_glyphs): If CH is TAB, set cmp->offsets properly.
+
+ * xterm.c (x_draw_composite_glyph_string_foreground): Check
+ s->face is NULL or not.
+
+2006-02-20 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_set_glyph_string_clipping_exactly): New function.
+ (x_draw_glyph_string): Fix drawing of right_overhang and
+ left_overhang around/on cursor.
+
+ * xdisp.c (draw_glyphs): Fix inclusion of right_overwriting
+ glyphs.
+
+ * term.c (produce_glyphs): Sync to HEAD.
+
+2006-02-15 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (x_produce_glyphs): Handle composition with TAB.
+
+2006-02-05 Kenichi Handa <handa@m17n.org>
+
+ * coding.c: Cancel incorrect synching with HEAD.
+
+2006-02-03 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Fdefine_coding_system_internal): Avoid a duplicated
+ element in Vcoding_system_alist.
+ (Fdefine_coding_system_alias): Likewise.
+
+2006-01-19 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (handle_one_xevent): Handle keysyms 0x1000000..0x10000FF.
+
+ * coding.c: Sync to HEAD for handling autoload-coding-system.
+ (Qcoding_system_define_form): New variable.
+ (syms_of_coding): Intern and staticpro it.
+ (Fcoding_system_p): Check Qcoding_system_define_form.
+ (Fcheck_coding_system): Try to autoload the definition of
+ CODING-SYSTEM.
+
+ * coding.h (CODING_SYSTEM_P): If ID is not available, call
+ Fcoding_system_p.
+ (CHECK_CODING_SYSTEM): If ID is not available, call
+ Fcheck_coding_system.
+ (CHECK_CODING_SYSTEM_GET_SPEC): Try also Fcheck_coding_system.
+ (CHECK_CODING_SYSTEM_GET_ID): Likewise.
+
+2006-01-17 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (handle_one_xevent): Delete unnecessary code inserted by
+ sync with HEAD.
+
+ * coding.c (code_conversion_restore): GCPRO arg.
+
+2005-12-28 Kenichi Handa <handa@m17n.org>
+
+ * character.c (lisp_string_width): Check multibyteness of STRING.
+
+2005-10-18 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macterm.c (mac_encode_char): Call ccl_driver with the last arg
+ Qnil. Use JIS_TO_SJIS instead of ENCODE_SJIS.
+ (decode_mac_font_name): Use decode_coding_c_string instead of
+ decode_coding.
+ (x_load_font): Initialize fontp->fontset to -1. Set
+ fontp->encoding_type.
+
+2005-10-17 Kenichi Handa <handa@m17n.org>
+
+ * search.c (search_buffer): Give up BM search on case-fold-search
+ if one of a target character has a case-equivalence of different
+ byte length even if that target charcter is an ASCII.
+ (simple_search): Fix culculation of byte length of matched text.
+ (boyer_moore): Fix handling of case-equivalent multibyte
+ characters.
+
+2005-10-15 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding): Fix handling of invalid bytes.
+
+2005-10-06 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (handle_one_xevent): Handle keysyms directly mapped to
+ Unicode characters.
+
+2005-09-23 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_object): If a pre-write-conversion
+ function makes a new buffer, kill it.
+
+2005-07-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (QCascii_compatible_p): New variable.
+ (syms_of_coding): Initialize it.
+ (ONE_MORE_BYTE): Decrement `src' before calling string_char.
+ (ONE_MORE_BYTE_NO_CHECK): Likewise.
+ (record_conversion_result): Add `default:' case.
+ (coding_charset_list): Delete unused variable `coding_type'.
+ (Fdefine_coding_system_internal): Add `ascii-compatible-p'
+ property in the plist of the coding system.
+ (Fcoding_system_put): Check QCascii_compatible_p.
+
+2005-06-09 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (get_next_display_element): Sync with the change in
+ HEAD (2005-06-08).
+
+2005-06-06 Kenichi Handa <handa@m17n.org>
+
+ * callproc.c (Fcall_process): Sync with the change in
+ HEAD (2005-06-04).
+
+2005-06-05 Miles Bader <miles@gnu.org>
+
+ * xfaces.c (Finternal_lisp_face_equal_p): Restore previously
+ removed calculation of frame `f', as it's now used.
+
+2005-05-22 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macterm.c (x_font_name_to_mac_font_name): Sync with trunk
+ for the case that does not require code conversion.
+
+2005-05-11 Kenichi Handa <handa@m17n.org>
+
+ * Makefile.in (shortlisp): Cancel previous change.
+ (RUN_TEMACS): Include "-nl" if HAVE_SHM is defined.
+ (emacs${EXEEXT}): Run $(RUN_TEMACS) unconditionally.
+ (UNIDATA): New variable.
+ (${lispsource}international/charprop.el): Depends on ${UNIDATA}.
+ (bootstrap-emacs${EXEEXT}): Depends on charprop.el. Run
+ $(RUN_TEMACS) unconditionally.
+
+2005-05-10 Kenichi Handa <handa@m17n.org>
+
+ * Makefile.in (shortlisp): Add ../lisp/international/charprop.el.
+ (temacs${EXEEXT}): Build charprop.el if necessary.
+ (admindir): New variable.
+ ($(lispsource)international/charprop.el): New target.
+
+2005-05-04 Miles Bader <miles@gnu.org>
+
+ * character.c (chars-in-region): Obsolete function removed.
+ (syms_of_character): Remove its initialization.
+
+2005-04-28 Benjamin Riefenstahl <b.riefenstahl@turtle-trading.net>
+
+ * w32select.c (validate_coding_system)
+ (setup_windows_coding_system): New functions.
+ (convert_to_handle_as_coded, Fw32_get_clipboard_data): Use
+ setup_windows_coding_system.
+ (setup_config, Fw32_get_clipboard_data): Use
+ validate_coding_system.
+ (Fx_selection_exists): Move call to setup_config to a place
+ were signals are allowed.
+
+ * lisp.h (Fcoding_system_base, Fcoding_system_eol_type)
+ (Fcheck_coding_system): Add declarations.
+
+2005-04-28 Kenichi Handa <handa@m17n.org>
+
+ * s/ms-w32.h (STDC_HEADERS): Sync with the change in
+ HEAD (2005-04-23).
+
+2005-04-25 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (load_charset_map_from_vector): Fix for the first
+ iteration.
+
+2005-04-22 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macfns.c (Fx_create_frame, x_create_tip_frame): Pass Lisp
+ string as the second argument for x_new_fontset.
+
+2005-04-18 Kenichi Handa <handa@m17n.org>
+
+ * fns.c (Fstring_as_multibyte): Fix the change for syncing with
+ CVS head.
+
+2005-04-09 Kenichi Handa <handa@m17n.org>
+
+ * search.c (search_buffer): Fix the change for syncing with CVS
+ head.
+ (search_buffer): Likewise.
+
+2005-03-31 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (get_next_display_element): Sync with CVS head.
+
+2005-03-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_object): Use safe_call1 instead of call1.
+ (encode_coding_object): Use safe_call instead of call2.
+
+2005-03-14 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (set_default_ascii_font): Fix the change for
+ syncing with CVS head.
+
+2005-01-30 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (Fset_fontset_font): Check family element of a given
+ vector.
+
+ * Makefile.in (lisp): Include charprop.el.
+
+2005-01-17 YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
+
+ * macfns.c (Fx_create_frame, x_create_tip_frame): Fix crash.
+ Not sure if it's unnecessary.
+
+2005-01-16 Steven Tamm <steventamm@mac.com>
+
+ * macfns.c (Fx_create_frame, x_create_tip_frame): ifdef'd out
+ some possibly unnecessary fontset checking code that crashed
+ when creating a new frame
+
+2005-01-17 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (merge_faces): Fix argument to lookup_derived_face and
+ lookup_face.
+
+ * xdisp.c (Fformat_mode_line): Fix argument to lookup_named_face.
+
+ * fringe.c (draw_fringe_bitmap_1): Fix argument to
+ lookup_named_face.
+
+2004-12-25 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (get_next_display_element): Sync to the change in HEAD
+ on 2004-12-21.
+
+2004-12-11 Kenichi Handa <handa@m17n.org>
+
+ * search.c: Sync to the change in HEAD on 2004-11-19, 20.
+
+ * w32console.c: Sync to the change in HEAD on 2004-12-01.
+
+ * coding.c: Cancel the change done in HEAD on 2004-11-30.
+ (coding_charset_list): New function.
+
+ * coding.h (coding_charset_list): Extern it.
+
+ * term.c: Sync to the change in HEAD on 2004-11-30.
+
+2004-12-09 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (Fset_fontset_font): Call find_font_encoding with
+ concatenation of family and registry.
+
+2004-12-06 Kenichi Handa <handa@m17n.org>
+
+ * character.h (BYTE8_STRING): Fix typo.
+
+ * editfns.c (Ftranslate_region_internal): Don't convert unibyte
+ string to multibyte (sync to HEAD).
+
+ * casefiddle.c (casify_region): Handle changes in byte-length
+ using replace_range_2 (sync to HEAD).
+
+2004-11-24 Andreas Schwab <schwab@suse.de>
+
+ * chartab.c (map_char_table): GCPRO table and arg.
+
+2004-10-29 Kenichi Handa <handa@m17n.org>
+
+ * syntax.c (skip_syntaxes): Return lispy 0 (not nil) if point is
+ already at limit.
+
+2004-10-23 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (fs_load_font): Use fast_string_match_ignore_case
+ instead of fast_c_string_match_ignore_case.
+ (find_font_encoding): Argument changed to Lisp_Object. Use
+ fast_string_match_ignore_case instead of
+ fast_c_string_match_ignore_case. Caller changed.
+
+2004-10-15 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (get_next_display_element): In unibyte case, decide to
+ display in octal form by checking a chacter by
+ UNIBYTE_CHAR_HAS_MULTIBYTE_P.
+
+ * charset.c (Fset_unibyte_charset): Setup
+ unibyte_has_multibyte_table.
+
+ * character.c (unibyte_has_multibyte_table): New variable.
+
+ * character.h (unibyte_has_multibyte_table): Extern it.
+ (UNIBYTE_CHAR_HAS_MULTIBYTE_P): New macro.
+
+2004-10-14 Kenichi Handa <handa@m17n.org>
+
+ * callproc.c (Fcall_process): Fix merging of 2004-10-13 change.
+
+2004-10-13 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_iso_2022): Fix handling of charset
+ annotation.
+
+2004-10-12 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (setup_coding_system): If coding_system is nil, use
+ Qundecided.
+ (Fterminal_coding_system): Return nil if terminal coding system is
+ `undecided'.
+ (syms_of_coding): Define coding-system `undecided' here. Setup
+ terminal_coding as `undecided'.
+
+2004-10-04 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (message_dolog, set_message_1): Call
+ unibyte_char_to_multibyte with arg type int.
+
+ * fileio.c (Fsubstitute_in_file_name): Fix previous change.
+
+ * lread.c (read1): Fix reading of a char-table.
+
+ * print.c (print_object): Include sub char-table in cicularities
+ detection.
+
+2004-10-01 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (where_is_internal_2): Fix for the case that KEY is a
+ cons. Append the found sequences in car of ARGS instead of
+ prepending.
+
+2004-09-28 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (report_file_error): Make a unibyte string from
+ strerror (errorno).
+ (Fsubstitute_in_file_name): Fix the arg to
+ unibyte_char_to_multibyte. It is evaluated twice.
+
+2004-09-19 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (CHAR_CHARSET): Shortcut for ASCII case.
+
+2004-09-14 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding): Fix previous change.
+
+2004-09-13 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding_utf_16): Don't set detect_info->found if
+ BOM is not found.
+ (detect_coding): Optimization for ISO-2022 when no 8-bit data is
+ found.
+ (detect_coding_system): Likewise.
+
+2004-09-01 Jason Rumney <jasonr@gnu.org>
+
+ * w32fns.c (x_to_w32_font): Update to use new coding struct.
+
+2004-08-17 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fdeclare_equiv_charset): Fix handing of CHARS.
+ (Fiso_charset): Likewise.
+
+2004-08-03 Steven Tamm <steventamm@mac.com>
+
+ * macterm.c (mac_encode_char): Add charset argument and update
+ to use encoding_type
+ (x_new_font,x_new_fontset): Merge in changes from xterm.c;
+ switch to pure fontset
+ (decode_mac_font_name): Temporarily remove decoding
+ (x_font_name_to_mac_font_name): Temporarily remove encoding
+ (x_load_font): Temporarily remove encoding
+
+2004-06-30 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (Fface_font): If frame is not on a window system,
+ ignore CHARACTER arg. If HAVE_WINDOW_SYSTEM is not defined, don't
+ refer to face->font.
+ (split_font_name_into_vector, build_font_name_from_vector)
+ (lookup_non_ascii_face, realize_non_ascii_face): Define them only
+ whne HAVE_WINDOW_SYSTEM is defined.
+
+2004-05-29 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (BUILD_GLYPH_STRINGS): Check if s is NULL.
+ (x_produce_glyphs): Fix setting of members of cmp in case
+ cmp->glyph_len is zero,
+
+ * fontset.c (Fset_fontset_font): Docstring fixed.
+ (Ffontset_info): Make it backward compatible. New arg ALL.
+
+2004-05-11 Kim F. Storm <storm@cua.dk>
+
+ * process.c (read_process_output): Grow decoding_buf when needed;
+ this could cause a crash in allocate_string and compact_small_strings.
+
+2004-04-29 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (WRITE_BUF_SIZE): This macro deleted.
+ (e_write): Fix previous change.
+
+2004-04-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (setup_coding_system): Set coding->common_flags
+ correctly for raw-text.
+ (consume_chars): On encoding unibyte text by raw-text, don't check
+ multibyte form.
+ (encode_coding): On encoding by raw-text, never use translation
+ tables.
+
+ * fileio.c (e_write): Short cut for the case of no encoding.
+
+2004-04-20 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding): Delete unused variables.
+ (detect_coding_system): Likewise.
+
+2004-04-18 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_utf_8): Fix handling of raw-byte char.
+ (consume_chars): Fix handling of 8-bit bytes in unibyte source.
+
+2004-04-14 Kenichi Handa <handa@m17n.org>
+
+ Sync all files to HEAD.
+
+2004-04-14 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Ffind_coding_systems_region_internal): Include
+ raw-text and no-conversion in the result.
+
+ * fontset.h: Sync to HEAD.
+
+ * fontset.c: Sync to HEAD.
+
+2004-04-14 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (find_font_encoding): Return `ascii' for unknown
+ encoding.
+ (load_font_get_repertory): Delete unnecessary check of ENCODING of
+ FONT_DEF.
+ (font_def_arg, add_arg, from_arg, to_arg): New args.
+ (set_fontset_font): Argument changed.
+ (Fset_fontset_font): Fix for the case that TARGET is a script
+ name and charset name.
+ (new_fontset_from_font_name): Fix argument to Fnew_fontset.
+
+2004-04-13 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (fontset_font): Renamed from fontset_face. Return
+ value changed.
+ (face_suitable_for_char_p): Adjusted for the change of
+ fontset_font.
+ (face_for_char): Likewise.
+ (make_fontset_for_ascii_face): Fix setting of the fontset element
+ for ASCII.
+ (Finternal_char_font): Use fontset_font instead of FACE_FOR_CHAR
+ to get a font name.
+ (Ffontset_info): Adjusted for the change of fontset_font.
+
+ * composite.c: Sync to HEAD.
+
+ * search.c: Sync to HEAD.
+
+ * coding.c: Sync to HEAD.
+ (emacs_mule_char): Check invalid code more regidly.
+
+ * coding.h: Sync to HEAD.
+
+ * charset.c: Sync to HEAD.
+
+ * charset.h: Sync to HEAD.
+
+ * character.h (LEADING_CODE_LATIN_1_MIN)
+ (LEADING_CODE_LATIN_1_MAX): Delete these macros.
+
+2004-04-08 Kenichi Handa <handa@m17n.org>
+
+ * category.h: Sync to HEAD.
+
+ * category.c: Sync to HEAD.
+
+ * syntax.h: Sync to HEAD.
+
+ * syntax.c: Sync to HEAD.
+
+ * regex.h: Sync to HEAD.
+
+ * regex.c: Sync to HEAD.
+
+2004-04-07 Kenichi Handa <handa@m17n.org>
+
+ * editfns.c: Sync to HEAD.
+ (check_translation): New function.
+ (Ftranslate_region_internal): Handle M:N mapping.
+
+2004-04-06 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (xlfd_point_size): Set font->numeric[XLFD_PIXEL_SIZE].
+
+2004-03-30 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (DECODE_DESIGNATION): Set chars_96 to -1 instead of
+ goto invalid_code.
+ (decode_coding_iso_2022): Fix handling of invalid designation.
+
+ * fileio.c (Finsert_file_contents): Be sure to call unbind_to
+ after calling code_conversion_save.
+
+2004-03-11 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (handle_auto_composed_prop): Fix Lisp_Object/int mixup.
+
+ * print.c (print_prune_string_charset): Fix Lisp_Object/int mixup.
+
+ * fontset.c: Include "intervals.h".
+ (fontset_face): Fix comparing of Lisp_Objects.
+ (free_face_fontset): Fix Lisp_Object/int mixup.
+ (new_fontset_from_font_name): Likewise.
+
+ * editfns.c (Ftranslate_region_internal): Fix Lisp_Object/int mixup.
+
+ * coding.c: Add many prototypes for static functions.
+ (get_translation_table): Allow max_lookup to be NULL.
+ (decode_coding): Call get_translation_table with max_lookup NULL.
+ (Ffind_coding_systems_region_internal): Likewise.
+ (Funencodable_char_position, Fcheck_coding_systems_region):
+ Likewise.
+
+2004-03-11 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (get_translation_table): Declare it as Lisp_Object.
+ (LOOKUP_TRANSLATION_TABLE): New macro.
+ (produce_chars): Use LOOKUP_TRANSLATION_TABLE instead of
+ CHAR_TABLE_REF.
+ (consume_chars): Likewise.
+
+2004-03-11 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (MAX_ANNOTATION_LENGTH): Adjusted for the change of
+ annotation data format.
+ (ADD_ANNOTATION_DATA, ADD_COMPOSITION_DATA, ADD_CHARSET_DATA):
+ Change arguments FROM and TO to single argument NCHARS. Caller
+ changed.
+ (decode_coding_utf_8): Pay attention to coding->charbuf_used.
+ (decode_coding_utf_16, decode_coding_emacs_mule)
+ (decode_coding_iso_2022, decode_coding_sjis, decode_coding_big5)
+ (decode_coding_ccl, decode_coding_charset): Likewise.
+ (get_translation): New function.
+ (produce_chars): New arguments translation_table and last_block.
+ Translate characters here. Return number of carryover chars.
+ Caller changed.
+ (produce_composition): New argument pos. Caller changed.
+ Adjusted for the change of annotation data format.
+ (produce_charset, produce_annotation): Likewise.
+ (decode_coding, encode_coding): Don't call translate_chars.
+ (consume_chars): New arg translation_table. Caller changed.
+ (translate_chars): Deleted.
+ (syms_of_coding): Make translation-table's number of extra slots
+ 2.
+
+2004-03-09 Kenichi Handa <handa@m17n.org>
+
+ * search.c (simple_search): Fix setting this_pos_byte in backward
+ search.
+
+ * coding.c (detect_coding_emacs_mule): Fix counting of encoded
+ byte sequence.
+ (detect_coding_ccl): Fix setting of the variable valids.
+
+2004-03-04 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_list_fonts): Fix the detection of an auto-scaled font.
+
+ * coding.c (decode_coding_utf_16): Fix handling of surrogate pair.
+
+ * editfns.c (Ftranslate_region_internal): Renamed from
+ Ftranslate_region. Accept a char-table in TABLE.
+ (syms_of_editfns): Defsubr Stranslate_region_internal.
+
+ * xfaces.c (set_lface_from_font_name): If a font is specified for
+ a frame, generate a fontset from the font.
+ (build_scalable_font_name): If the scalable font is requested for
+ a specific size, don't change that size.
+ (try_font_list): Try a scalable font also in the case that a
+ pattern string is specified,
+
+
+2004-03-03 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (Fface_font): New optional arg CHARACTER.
+
+2004-02-17 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (CHARSET_OFFSET): New macro.
+
+2004-02-13 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_get_font_repertory): Fix for non-Unicode-bmp charset.
+
+ * fontset.c (fontset_face): Handle the case that repertory is a
+ char-table.
+ (find_font_encoding): Return nil for unknown encoding.
+ (Fset_fontset_font): Ignore a font of unknown encoding.
+
+2004-02-09 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (describe_vector): Handle default value of a char
+ table.
+
+ * fontset.c (fontset_face): Handle fallback fonts correctly.
+ (Ffontset_info): Return infomation about fallback fonts.
+
+2004-02-06 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (FONTSET_DEFAULT): New macro.
+ (FONTSET_ADD): Handle the case that range is nil.
+ (fontset_add): Likewise.
+ (Fset_fontset_font): Change the 2nd arg name to TARGET, and handle
+ the case that it is nil.
+ (dump_fontset): Call FONTSET_DEFAULT, not FONTSET_FALLBACK.
+ (syms_of_fontset): Set char-table-extra-slots property of fontset
+ to 9.
+
+ * charset.h (CHAR_CHARSET_P): Fix for the case that the method is
+ subset or superset.
+
+2004-01-30 Kenichi Handa <handa@m17n.org>
+
+ * emacs.c (main): Call init_charset after syms_of_XXX.
+
+ * charset.c (Vcharset_map_directory): Deleted.
+ (Vcharset_map_path): New variable
+ (load_charset_map_from_file): Use Vcharset_map_path instead.
+ (init_charset): Initialize Vcharset_map_path.
+ (syms_of_charset): Delete declaration of "charset-map-directory",
+ add declaration of "charset-map-path".
+
+2004-01-29 Kenichi Handa <handa@m17n.org>
+
+ * fns.c (string_char_to_byte): Optimize for ASCII only string.
+ (string_byte_to_char): Likewise.
+
+ * fileio.c (Finsert_file_contents): Avoid detecting a code twice.
+
+ * coding.c (detect_coding_iso_2022): Fix handling of SS2 and SS3.
+ (detect_coding): Treat '\0' as normal ASCII byte..
+ (detect_coding_system): Likewise.
+
+2004-01-27 Kenichi Handa <handa@m17n.org>
+
+ * coding.h (SJIS_TO_JIS2, JIS_TO_SJIS2): New macros.
+
+ * coding.c (QCmnemonic, QCdefalut_char)
+ (QCdecode_translation_table, QCencode_translation_table)
+ (QCpost_read_conversion, QCpre_write_conversion): New variables.
+ (get_translation_table): Return a list of translation tables if
+ necessary.
+ (decode_coding): Call get_translation_table with ENCODEP 0.
+ (char_encodable_p): If translation_table is non-nil, always call
+ translate_char.
+ (Fdefine_coding_system_internal): Accept list of translation
+ tables as :encode-translation-table and :decode-translation-table.
+ (Fcoding_system_put): New function.
+ (syms_of_coding): Declare new symbols. Defsubr
+ Scoding_system_put.
+ (decode_coding_sjis): Handle 4th charset (typically JISX0212).
+ (encode_coding_sjis): Likewise.
+
+ * charset.c (map_charset_chars): Fix arg to map_charset_chars in
+ when the charset is superset type.
+
+ * character.c (translate_char): Accept list of translation tables.
+
+2004-01-25 Kenichi Handa <handa@m17n.org>
+
+ * coding.h (enum coding_attr_index): New member
+ coding_attr_trans_tbl.
+ (CODING_ATTR_TRANS_TBL): New macro.
+
+ * coding.c (get_translation_table): New function.
+ (translate_chars): Fix the bug of skipping annotation data.
+ (decode_coding): Utilze get_translation_table.
+ (encode_coding): Likewise.
+ (char_encodable_p): Translate char if necessary.
+ (Funencodable_char_position): Likewise.
+ (Ffind_coding_systems_region_internal): Setup translation table
+ for encode in a coding system attribute vector in advance.
+ (Fcheck_coding_systems_region): Likewise.
+ (Fdefine_coding_system_internal): Allow a symbol as translation
+ table. For shift-jis type coding system, allow 4th charset.
+
+2004-01-24 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_sjis): Check the first byte rigidly.
+
+ * xdisp.c (get_next_display_element): Pass -1 as POS to
+ FACE_FOR_CHAR if displaying a C-string.
+
+2004-01-23 Kenichi Handa <handa@m17n.org>
+
+ * composite.c (get_composition_id): Handle xoff and yoff in a
+ composition rule.
+
+ * composite.h (COMPOSITION_DECODE_RULE): New arg xoff and yoff.
+ (struct composition): New member lbearing and rbearing.
+
+ * xdisp.c (move_it_to): Optimize for the case (op & MOVE_TO_Y).
+ (x_get_glyph_overhangs): Handle a composition glyph.
+ (x_produce_glyphs): Setup lbearing and rbreaing for a composition
+ glyph.
+
+ * xterm.c (x_compute_glyph_string_overhangs): Handle also a
+ composition glyph.
+
+2004-01-18 Kenichi Handa <handa@m17n.org>
+
+ * print.c: Include charset.h.
+ (Vprint_charset_text_property): New variable.
+ (Qdefault): Extern it.
+ (PRINT_STRING_NON_CHARSET_FOUND)
+ (PRINT_STRING_UNSAFE_CHARSET_FOUND): New macros.
+ (print_check_string_result): New variable.
+ (print_check_string_charset_prop): New function.
+ (print_prune_charset_plist): New variable.
+ (print_prune_string_charset): New function.
+ (print_object): Call print_prune_string_charset if
+ Vprint_charset_text_property is not t.
+ (print_interval): Print nothing if itnerval->plist is nil.
+ (syms_of_print): Declare Vprint_charset_text_property as a lisp
+ variable. Init and staticpro print_prune_charset_plist.
+
+2004-01-15 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (new_fontset_from_font_name): Use the specified font
+ for all characters in the new fontset.
+
+ * macterm.c (x_set_mouse_face_gc): Call FACE_FOR_CHAR with POS and
+ OBJECT args.
+
+ * xdisp.c (x_produce_glyphs): Call FACE_FOR_CHAR with POS and
+ OBJECT args for composition too.
+
+ * w32term.c (x_set_mouse_face_gc): Call FACE_FOR_CHAR with POS and
+ OBJECT args.
+
+2004-01-13 Kenichi Handa <handa@m17n.org>
+
+ * dispextern.h (FACE_FOR_CHAR): New args POS and OBJECT.
+
+ * fontset.c (reorder_font_vector): Adjusted for the change of
+ FONT_DEF format.
+ (fontset_face): New arg id. Caller changed.
+ (face_for_char): New args pos and object.
+ (make_fontset_for_ascii_face): Adjusted for the change of FONT_DEF
+ format.n
+ (fs_query_fontset): Check NAME by Fassoc too.
+ (Fset_fontset_font): Allow non-XLFD font name.
+ (Ffontset_info): Adjusted for the change of FONT_DEF format.
+
+ * fontset.h (face_for_char): Prototype adjusted.
+
+ * xdisp.c (face_before_or_after_it_pos): Call FACE_FOR_CHAR with
+ POS and OBJECT args.
+ (get_next_display_element): Likewise.
+ (append_space): Likewise.
+ (extend_face_to_end_of_line): Likewise.
+ (get_char_face_and_encoding): Likewise.
+ (BUILD_COMPOSITE_GLYPH_STRING): Likewise.
+ (x_produce_glyphs): Likewise.
+
+ * xfaces.c (compute_char_face): Call FACE_FOR_CHAR with
+ POS and OBJECT args.
+
+ * xterm.c (x_set_mouse_face_gc): Call FACE_FOR_CHAR with
+ POS and OBJECT args.
+
+2004-01-03 Jason Rumney <jasonr@gnu.org>
+
+ * w32select.c (Fw32_set_clipboard_data): Avoid potential realloc
+ of GlobalAlloc'ed memory.
+
+2003-12-29 Kenichi Handa <handa@m17n.org>
+
+ * ccl.c (Fccl_execute_on_string): Fix the condition of loop.
+
+ * charset.h (charset_table_used): Delete extern.
+
+ * charset.c (charset_table_used): Make it static.
+ (map_charset_chars): Fix args to c_function with.
+
+ * chartab.c (map_sub_char_table_for_charset): Fix args to
+ c_function with.
+
+ * coding.h (enum coding_result_code): Delete
+ CODING_RESULT_INSUFFICIENT_CMP, add CODING_RESULT_INVALID_SRC.
+
+ * coding.c (Qinsufficient_source, Qinconsistent_eol)
+ (Qinvalid_source, Qinterrupted, Qinsufficient_memory): New
+ variables.
+ (Vlast_code_conversion_error): New variables.
+ (syms_of_coding): DEFSYM or DEFVAR_LISP them.
+ (ONE_MORE_BYTE): Record error if any instead of signaling an
+ error. If non-ASCII multibyte char is found, return the negative
+ value of the code. All callers changed to check it.
+ (ONE_MORE_BYTE_NO_CHECK): Likewise.
+ (record_conversion_result): New function. All codes setting
+ coding->result are changed to call this function.
+ (detect_coding_utf_8): Don't use the local variable incomplete.
+ (decode_coding_utf_8): Likewise.
+ (emacs_mule_char): Change the second arg to `const'.
+ (detect_coding_emacs_mule): Don't use the local variable
+ incomplete.
+ (detect_coding_sjis): Likewise.
+ (detect_coding_big5): Likewise.
+ (decode_coding): Fix of flushing out unprocessed data.
+ (make_conversion_work_buffer): Fix making of a work buffer.
+ (decode_coding_object): Return coding->dst_object;
+
+ * fontset.c (set_fontset_font): Fix args.
+
+ * lisp.h (CHARACTERBITS): Define as 22.
+
+ * process.c (send_process): Be sure to set coding->src_multibyte.
+
+ * xdisp.c (handle_auto_composed_prop): Fix setting of limit.
+
+2003-12-02 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (handle_auto_composed_prop): Give limit to
+ Fnext_single_char_property_change.
+
+2003-12-02 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding): Fix previous change.
+ (detect_coding_system): Likewise.
+
+2003-12-02 Kenichi Handa <handa@m17n.org>
+
+ * composite.c (syms_of_composite): Don't make the compostion hash
+ table week.
+
+ * fontset.c (Fset_fontset_font): Fix docstring.
+
+ * lisp.h (detect_coding_system): Adjust prototype.
+
+ * fileio.c (kill_workbuf_unwind): Delete this function.
+ (Finsert_file_contents): Adjust the call of detect_coding_system.
+ Get conversion_buffer by code_conversion_save. Use the macor
+ CODING_MAY_REQUIRE_DECODING. After decoding, update
+ coding_system.
+
+ * coding.h (make_conversion_work_buffer): Delete extern.
+ (code_conversion_save): Extern it.
+
+ * coding.c (enum iso_code_class_type): Delete ISO_carriage_return.
+ (CODING_GET_INFO): Delete argument eol_type. Callers changed.
+ (decode_coding_utf_8): Don't do eol converion.
+ (detect_coding_utf_16): Check coding->src_chars, not
+ coding->src_bytes. Add heuristics for those that have no
+ signature.
+ (decode_coding_emacs_mule): Don't do eol converion.
+ (decode_coding_iso_2022): Likewise.
+ (decode_coding_sjis): Likewise.
+ (decode_coding_big5): Likewise.
+ (decode_coding_charset): Likewise.
+ (adjust_coding_eol_type): Return a new coding system.
+ (detect_coding): Don't detect eol. Fix for utf-16 detection.
+ (decode_eol): In case of CRLF->LF conversion, use del_range_2 on
+ each change.
+ (decode_coding): Pay attention to undo_list. Do eol convesion for
+ all types of coding-systems (if necessary).
+ (Vcode_conversion_work_buf_list): Delete it.
+ (Vcode_conversion_reused_workbuf): Renamed from
+ Vcode_conversion_reused_work_buf.
+ (Vcode_conversion_workbuf_name): New variable.
+ (reused_workbuf_in_use): New variable.
+ (make_conversion_work_buffer): Delete the arg DEPTH.
+ (code_conversion_restore): Argument changed to cons.
+ (code_conversion_save): Delete the argument BUFFER. Callers
+ changed.
+ (detect_coding_system): New argument src_chars. Callers changed.
+ Fix for utf-16 detection.
+ (init_coding_once): Don't use ISO_carriage_return.
+ (syms_of_coding): Initialized Vcode_conversion_workbuf_name and
+ reused_workbuf_in_use.
+
+2003-11-24 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (store_in_keymap): Pay attention to the case that idx
+ is a cons specifying a character range.
+
+ * coding.c (Fdefine_coding_system_internal): Fix previous change.
+
+2003-11-23 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (handle_auto_composed_prop): Fix the case of returning
+ HANDLED_RECOMPUTE_PROPS.
+
+ * coding.c (Fdefine_coding_system_internal): Fix checking of
+ ascii compatibility.
+
+2003-11-22 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (find_charsets_in_text): Delete unused locale
+ variable.
+ (Fset_charset_priority): Update Vemacs_mule_charset_list too.
+
+ * coding.c (encode_coding_emacs_mule): Emit bytes with MSB.
+ Resync charset_list to Vemacs_mule_charset_list.
+
+ * keymap.c (store_in_keymap): Pay attention to the case that idx
+ is a cons specifying a character range.
+
+2003-11-18 Kenichi Handa <handa@m17n.org>
+
+ * composite.c (update_compositions): Bind inhibit-read-only, etc
+ to t before calling remove-list-of-text-properties.
+
+ * print.c (print_object): Always print ASCII chars as is.
+
+2003-11-17 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (Fdefine_key): Fix handling of Lucid style event type
+ list.
+
+ * fns.c (Fmapconcat): Signal an error if SEQUENCE is a char table.
+ (Fmapcar): Likewise.
+ (Fmapc): Likewise.
+
+2003-11-15 Kenichi Handa <handa@m17n.org>
+
+ * syntax.c (skip_chars): Be sure to alloca char_ranges when
+ necessary.
+
+2003-11-14 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (set_lface_from_font_name): Fix for the case that
+ FONTNAME is not fontset name.
+
+2003-11-13 Kenichi Handa <handa@m17n.org>
+
+ * fns.c (base64_encode_1): Fix previous change.
+
+2003-11-08 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (set_fontset_font): New function.
+ (Fset_fontset_font): If a font is specified for a charset, use
+ map_charset_chars to store the font spec in a fontset.
+
+2003-10-29 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (fontset_face): Create a fallback fontset on demand
+ (make_fontset): Don't create a fallback fontset here.
+ (free_face_fontset): Free a fallback fontset (if any) too.
+ (n_auto_fontsets): Delete this variable.
+ (auto_fontset_alist): New variable.
+ (new_fontset_from_font_name): Check auto_fontset_alist.
+ (dump_fontset) [FONTSET_DEBUG]: Fully re-written.
+ (Ffontset_list_all) [FONTSET_DEBUG]: New function.
+ (syms_of_fontset): Initialize and staticpro auto_fontset_alist.
+ Defsubr Sfontset_list_all.
+
+2003-10-24 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_list_fonts): Fix excluding of auto-scaled fonts.
+
+2003-10-23 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (Fnew_fontset): Check NAME more rigidly.
+
+2003-10-17 Kenichi Handa <handa@m17n.org>
+
+ * editfns.c (Fgoto_char): Fix docstring.
+
+2003-10-16 Kenichi Handa <handa@m17n.org>
+
+ * insdel.c (insert_from_gap): Adjust intervals correctly.
+
+2003-10-12 Jason Rumney <jasonr@gnu.org>
+
+ * w32term.c (GLYPHSET, WCRANGE): Define if system headers don't.
+ (pfnGetFontUnicodeRanges): New dynamically loaded function.
+ (w32_initialize): Try to load it.
+ (x_get_font_repertory): Use it if available.
+ (w32_encode_char): Add shortcut for unicode output.
+
+ * w32fns.c (w32_load_system_font): Default charset to -1.
+ (x_to_w32_charset): Match all fonts for unicode.
+ (w32_to_x_charset): New parameter matching. Don't return partial
+ or wildcard charsets.
+ (w32_to_all_x_charsets): Don't return partial or wildcard charsets.
+ (w32_codepage_for_font): Return CP_UNICODE for unicode.
+ (w32_to_x_font): Match charset to real charset.
+ (enum_font_cb2): Always list unicode versions.
+
+ * makefile.w32-in (temacs): Increase EMHEAP.
+
+2003-10-11 Jason Rumney <jasonr@gnu.org>
+
+ * w32term.c (w32_encode_char): New charset parameter.
+ font_info.encoding becomes encoding_type.
+ (x_get_font_repertory): New function. Warning: stub only!
+ (x_new_font): Return quickly if font already set.
+ (x_new_fontset): fontsetname parameter is Lisp_Object.
+ Use new fs_query_fontset. Try new_fontset_from_font_name. Use
+ fontset_name for return value.
+
+ * w32term.h: Declare x_get_font_repertory.
+
+ * w32select.c (Fw32_set_clipboard_data): Use string_x_string_p in
+ place of find_charset_in_text. Use encode_coding_object in place
+ of encode_coding.
+ (Fw32_get_clipboard_data): Use decode_coding_c_string in place of
+ decode_coding.
+
+ * w32fns.c (Fx_create_frame, x_create_tip_frame): Use new version
+ of x_new_fontset.
+ (w32_load_system_font): Initialize charset as unicode.
+ font_info.encoding becomes encoding_type.
+ (w32_to_x_font): Use decode_coding_c_string in place of
+ decode_coding.
+ (x_to_w32_font): Use encode_coding_object in place of
+ encode_coding.
+ (syms_of_w32fns): Set get_font_repertory_func.
+
+ * w32console.c: Include character.h. Use terminal_encode_buffer
+ from term.c.
+ (write_glyphs): Use new version of encode_terminal_code. Use
+ encode_coding_object in place of encode_coding.
+
+ * w32bdf.c (w32_load_bdf_font): Clear font_info before filling.
+ encoding becomes encoding_type.
+
+ * term.c (terminal_encode_buffer): Make externally visible.
+
+ * makefile.w32-in: Add character.h dependancies.
+ (character.o, chartab.o): New targets.
+
+2003-10-10 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (Finsert_file_contents) [DOS_NT]: Use the macro
+ CODING_ID_EOL_TYPE..
+
+2003-10-07 Andreas Schwab <schwab@suse.de>
+
+ * coding.c (produce_chars): Revert last change.
+
+2003-10-06 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (charset_unicode): Extern it.
+
+ * charset.c (string_xstring_p): Check by (C >= 0x100).
+ (find_charsets_in_text): Format of the arc CHARSETS changed. New
+ arg MULTIBYTE.
+ (Ffind_charset_region, Ffind_charset_string): Adjusted for the
+ change of find_charsets_in_text.
+ (Fsplit_char): Fix doc. Never return unknown.
+
+ * chartab.c (char_table_translate): Use CHARACTERP, not INETEGERP.
+
+ * coding.c (Fdefine_coding_system_alias): Update
+ Vcoding_system_list.
+
+ * fontset.c (load_font_get_repertory): Pay attention to the case
+ that ENCODING of a font is specified by a char-table.
+
+ * xterm.c (x_get_font_repertory): Handle the case that the
+ encoding of font is other than Unicode.
+
+2003-10-02 Kenichi Handa <handa@m17n.org>
+
+ * term.c (encode_terminal_code): Don't handle glyph-table. Check
+ if a character is encodable by the terminal coding system. If
+ not, produces proper number of `?'s. Update
+ terminal_encode_buffer and terminal_encode_buf_size if necessary.
+ (produce_glyphs): Check by CHAR_BYTE8_P, not SINGLE_BYTE_CHAR_P.
+
+2003-10-01 Kenichi Handa <handa@m17n.org>
+
+ * term.c (terminal_encode_buffer, terminal_encode_buf_size): New
+ variables.
+ (encode_terminal_code): Argument changed. Encode multiple
+ characters at once. Store the result of encoding in
+ terminal_encode_buffer.
+ (write_glyphs): Adjusted for the change of encode_terminal_code.
+ (insert_glyphs): Likewise.
+ (term_init): Initialize terminal_encode_buffer and
+ terminal_encode_buf_size.
+
+ * coding.c (consume_chars): If coding->src_object is nil, don't
+ check annotation.
+
+2003-09-30 Kenichi Handa <handa@m17n.org>
+
+ * character.c (char_string): Use ASCII_CHAR_P instead of
+ SINGLE_BYTE_CHAR_P.
+
+2003-09-30 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (handle_auto_composed_prop): Check if the last
+ characters of auto-composed region is newly composed with the
+ following characters.
+ (handle_composition_prop): Fix checking of point being inside
+ composition.
+
+2003-09-26 Kenichi Handa <handa@m17n.org>
+
+ * fns.c (concat): Don't change multibyteness of the result by
+ concatenating an 8-bit character.
+
+ * data.c (Faset): Check newelt by CHECK_CHARACTER. Don't change
+ multibyteness of the result when newelt is an 8-bit character.
+
+2003-09-29 Dave Love <fx@gnu.org>
+
+ * xmenu.c (find_and_call_menu_selection): Make menu_bar_items_used
+ EMACS_INT.
+
+ * xfns.c (DefaultDepthOfScreen, x_encode_text): Remove unused vars.
+
+ * xfaces.c (face_numeric_value): Declare dim size_t.
+ (Finternal_lisp_face_equal_p): Remove unused f.
+
+ * xdisp.c (BUILD_CHAR_GLYPH_STRINGS, display_and_set_cursor)
+ (MATRIX_ROW): Remove unused vars.
+ (draw_glyphs, x_insert_glyphs, fast_find_position)
+ (fast_find_position, fast_find_string_pos): Use EMACS_INT for
+ byte/char counts.
+
+ * regex.c (regex_compile): Remove unused var.
+
+ * minibuf.c (Fminibuffer_complete_word): Remove unused var.
+
+ * keymap.c (Fset_keymap_parent, map_keymap, Fcopy_keymap)
+ (Faccessible_keymaps, where_is_internal): Remove unused vars.
+
+ * keyboard.c (cancel_hourglass_unwind): Return Qnil.
+
+ * frame.c (frame_name_fnn_p): Make len EMACS_INT.
+
+ * fileio.c (Fwrite_region): Remove unused var.
+
+ * dispnew.c (adjust_frame_glyphs_for_frame_redisplay)
+ (adjust_frame_glyphs_for_window_redisplay): Remove unused ch_dim.
+
+ * composite.c (Fremove_list_of_text_properties): Declare.
+
+ * coding.c (inhibit_pre_post_conversion): Removed (unused).
+ (alloc_destination, produce_chars): Use EMACS_INT for byte/char
+ counts.
+ (coding_inherit_eol_type): Remove unused attrs.
+ (detect_coding): Cast arg of detect_eol.
+
+ * charset.c (syms_of_charset): Remove unused var p.
+ (find_charsets_in_text, Ffind_charset_region): Use EMACS_INT for
+ byte/char counts.
+
+ * casetab.c (set_case_table): Remove unused var.
+
+ * window.c (Fdisplay_buffer, Fframe_selected_window): Remove
+ unsued vars.
+
+2003-09-26 Dave Love <fx@gnu.org>
+
+ * xterm.c (x_bitmap_mask): Declare.
+
+2003-09-17 Dave Love <fx@gnu.org>
+
+ * xterm.c (x_term_init): Fix type error.
+
+ * lisp.h: Add Funibyte_char_to_multibyte.
+
+ * coding.c (Fread_coding_system): Fix arg of XSETSTRING.
+ (Fset_coding_system_priority): Doc fix.
+
+ * alloc.c: Sync with HEAD version.
+
+ * ccl.c (ccl_driver): Fix arg of CHARACTERP.
+
+ * indent.c (check_composition): Make start and end EMACS_INT.
+
+ * character.c (lisp_string_width): Make ignore and end EMACS_INT.
+
+ * xdisp.c (handle_composition_prop, check_point_in_composition):
+ Make buffer positions EMACS_INT.
+
+ * composite.c (find_composition, run_composition_function)
+ (update_compositions, Ffind_composition_internal): Make buffer
+ positions EMACS_INT.
+
+ * composite.h (find_composition, update_compositions): Make
+ position args EMACS_INT.
+
+ * keyboard.c (adjust_point_for_property): Make beg and end
+ EMACS_INT.
+
+ * intervals.c (get_property_and_range)
+ * intervals.h (get_property_and_range): Make start and end EMACS_INT.
+
+ * unexalpha.c: Don't include varargs.h.
+
+2003-09-16 Dave Love <fx@gnu.org>
+
+ * coding.h (ENCODE_UTF_8): New.
+
+ * Makefile.in (gtkutil.o): Depend on coding.h.
+
+ * coding.c (Fset_coding_system_priority): Doc fix.
+
+2003-09-16 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (Finsert_file_contents): Call setup_coding_system in
+ the case of auto saving.
+
+2003-09-10 Andreas Schwab <schwab@suse.de>
+
+ * chartab.c (map_char_table): Protect `range' from GC.
+ (map_char_table_for_charset): Likewise.
+
+2003-07-09 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_sjis): Check bytes more rigidly.
+
+2003-06-26 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (choose_write_coding_system): Return a decided coding
+ system.
+ (Fwrite_region): Set Vlast_coding_system_used to the return value
+ of choose_write_coding_system.
+
+2003-06-06 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fset_charset_priority): Pay attention to duplicated
+ arguments.
+
+ * coding.c (QCcategory): New variable.
+ (syms_of_coding): Defsym it. Set all elements of
+ Vcoding_category_table and their symbol values.
+ (Fset_coding_system_priority): Doc fix. Update symbol qvalues of
+ coding-category-XXX, and coding-category-list.
+ (Fdefine_coding_system_internal): Add category in the plist.
+
+2003-06-05 Kenichi Handa <handa@m17n.org>
+
+ * callproc.c (Fcall_process): Handle carryover correctly.
+
+ * coding.c (decode_coding_iso_2022): Fix handling of invalid
+ bytes.
+ (raw_text_coding_system): Check NILP (coding_system).
+ (coding_inherit_eol_type): Check NILP (coding_system) and
+ NILP (parent).
+ (consume_chars): Fix for the case of raw-text.
+
+ * process.c (read_process_output): Handle carryover correctly.
+
+2003-06-02 Dave Love <fx@gnu.org>
+
+ * regex.c (re_search_2): Fix last change.
+
+2003-05-30 Kenichi Handa <handa@m17n.org>
+
+ * regex.c (GET_CHAR_BEFORE_2): Check multibyte, not
+ target_multibyte. Even in a unibyte case, return a converted
+ multibyte char.
+ (GET_CHAR_AFTER): New macro.
+ (PATFETCH): Translate via multibyte char.
+ (HANDLE_UNIBYTE_RANGE): Delete this macro.
+ (SETUP_MULTIBYTE_RANGE): New macro.
+ (regex_compile): Setup compiled code so that its multibyteness
+ matches that of a target. Fix the handling of "[X-YZ]" using
+ SETUP_MULTIBYTE_RANGE.
+ (analyse_first) <charset>: For filling fastmap for all multibyte
+ characters, don't check by BASE_LEADING_CODE_P.
+ (re_search_2): Don't check RE_TARGET_MULTIBYTE_P (bufp). It is
+ the same as RE_MULTIBYTE_P (bufp) now.
+ (mutually_exclusive_p): Check by (! multibyte ||
+ IS_REAL_ASCII (c)).
+ (TARGET_CHAR_AND_LENGTH): Delete this macro.
+ (TRANSLATE_VIA_MULTIBYTE): New macro.
+ (re_match_2_internal): Don't check RE_TARGET_MULTIBYTE_P (bufp).
+ It is the same as RE_MULTIBYTE_P (bufp) now.
+ <exactn>: Translate via multibyte.
+ <anychar>: Fetch a character by RE_STRING_CHAR_AND_LENGTH. Don't
+ translate it.
+ <charset, charset_not>: Fetch a character by
+ RE_STRING_CHAR_AND_LENGTH. Translate via multibyte.
+ <duplicate>: Call bcmp_translate with the last arg `multibyte'.
+ <wordbound, notwordbound, wordbeg, wordend, syntaxspec,
+ notsyntaxspec, categoryspec, notcategoryspec> Fetch a character
+ by GET_CHAR_AFTER.
+ (bcmp_translate): Likewise.
+
+ * search.c (compile_pattern): Check the member target_multibyte,
+ not the member multibyte of buf.
+
+ * lread.c (read1): While reading a string, set force_singlebyte
+ and force_multibyte correctly.
+
+ * charset.c (Fset_unibyte_charset): Fix setting up of
+ unibyte_to_multibyte_table.
+ (init_charset_once): Likewise.
+
+2003-05-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (setup_coding_system): If coding has
+ post-read-conversion or pre-write-conversion, set
+ CODING_REQUIRE_DECODING_MASK and CODING_REQUIRE_ENCODING_MASK
+ respectively.
+ (decode_coding_gap): Run post-read-conversion if any.
+
+ * fileio.c (Finsert_file_contents): Even if we read into a
+ unibyte buffer, check if we must decode the result or not.
+
+2003-05-29 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (make_conversion_work_buffer): Change the work buffer
+ name to the same one as that of Emacs 21.
+
+2003-05-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.h (make_conversion_work_buffer): Prototype adjusted.
+ (code_conversion_restore): Don't extern it.
+
+ * coding.c (detected_mask): Delete unused variable.
+ (decode_coding_iso_2022): Pay attention to the byte sequence of
+ CTEXT extended segment, and retain those bytes as is.
+ (decode_coding_ccl): Delete unused variable `valids'.
+ (setup_coding_system): Delete unused variable `category'.
+ (consume_chars): Delete unused variable `category'. Make it work
+ for non-multibyte case.
+ (make_conversion_work_buffer): Argument changed.
+ (saved_coding): Delete unused variable.
+ (code_conversion_restore): Don't check saved_coding->destination.
+ (code_conversion_save): New function.
+ (decode_coding_gap, encode_coding_gap): Call code_conversion_save
+ instead of record_unwind_protect.
+ (decode_coding_object, encode_coding_object): Likewise. Recover
+ PT.
+ (detect_coding_system): Delete unused variable `mask'.
+ (Fdefine_coding_system_internal): Delete unsed vaiable id;
+
+ * fileio.c (kill_workbuf_unwind): New function.
+ (Finsert_file_contents): On replacing, call
+ make_conversion_work_buffer with correct args, and call
+ record_unwind_protect with the first arg kill_workbuf_unwind.
+
+ * lisp.h (Fgenerate_new_buffer_name): EXFUN it.
+
+2003-05-20 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (BASE_FONTSET_P): Check FONTSET_BASE, not
+ FONTSET_NAME.
+ (fontset_add): Fix for the case that TO is less than TO1.
+ (Ffontset_info): Don't use fallback fontset on checking the
+ default fontset.
+ (dump_fontset): New function for debugging.
+
+ * coding.c (Fdefine_coding_system_internal): Fix for the case that
+ coding_type is Qcharset.
+
+2003-05-07 Kenichi Handa <handa@m17n.org>
+
+ * chartab.c (map_sub_char_table): New argument DEFAULT_VAL.
+ (map_char_table): Don't inherit the value from the parent on
+ initializing VAL. Adjusted for the above change.
+
+2003-05-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Qsignature, Qendian): Delete these variables.
+ (syms_of_coding): Don't initialize them.
+ (CATEGORY_MASK_UTF_16_AUTO): New macro.
+ (detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in
+ detect_info->found.
+ (decode_coding_utf_16): Don't detect BOM here.
+ (encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding)
+ is NOT utf_16_without_bom.
+ (setup_coding_system): For a coding system of type utf-16, check
+ if the attribute :endian is Qbig or not (not nil or not), and set
+ CODING_REQUIRE_DETECTION_MASK if BOM detection is required.
+ (detect_coding): If coding type is utf-16 and BOM detection is
+ required, detect it.
+ (Fdefine_coding_system_internal): For a coding system of type
+ utf-16, check if the attribute :endian is Qbig or not (not nil or
+ not).
+
+2003-05-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (coding_set_source): Fix for the case that the current
+ buffer is different from coding->src_object.
+ (decode_coding_object): Don't use the conversion work buffer if
+ DST_OBJECT is a buffer.
+
+2003-05-04 Dave Love <fx@gnu.org>
+
+ * lread.c (read_emacs_mule_char) [len==2]: Index
+ emacs_mule_charset correctly.
+
+2003-02-16 Dave Love <fx@gnu.org>
+
+ * coding.c (Qbig5, Vbig5_coding_system, CATEGORY_MASK_BIG5)
+ (detect_coding_big5, decode_coding_big5, encode_coding_big5)
+ (Fdecode_big5_char, Fencode_big5_char): Deleted. (Big5 no longer
+ treated specially.)
+ (setup_coding_system, coding_category, CATEGORY_MASK_ANY)
+ (detected_mask): Remove Big5 bits.
+
+2003-04-09 Kenichi Handa <handa@m17n.org>
+
+ The following changes are to make the font rescaling facility
+ compatible with Emacs 21.
+
+ * xfaces.c (Vface_font_rescale_alist): Renamed from
+ Vface_resizing_fonts.
+ (struct font_name): Rename member resizing_ratio to rescale_ratio.
+ (font_rescale_ratio): Renamed from font_resizing_ratio.
+ (split_font_name): Set font->rescale_ratio.
+ (better_font_p): Pay attention to font->rescale_ratio.
+ (build_scalable_font_name): Likewise. Change RESX, and RESY
+ fields.
+ (syms_of_xfaces): Declare Vface_font_rescale_alist as a Lisp
+ variable.
+
+2003-03-28 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig)
+ (Qutf_16_le): Remove these variables.
+ (syms_of_coding): Don't DEFSYM them.
+ (decode_coding_utf_16): Fix handling of BOM.
+ (encode_coding_utf_16): Fix handling of BOM.
+
+2003-03-14 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (Finsert_file_contents): On replacing, before decoding
+ the file into the work buffer, set point of the work buffer to the
+ end.
+
+2003-02-13 Dave Love <fx@gnu.org>
+
+ * coding.c (Fcheck_coding_systems_region): Fix type errors.
+
+2003-02-04 Dave Love <fx@gnu.org>
+
+ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table
+ and fix C types.
+
+2003-01-31 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (SKIP_GLYPHS): New macro.
+ (set_cursor_from_row): Pay attention to string display properties.
+
+ * category.c (copy_category_entry): Fix for the case that RANGE
+ is an integer.
+
+ * xterm.c (x_encode_char): Call ccl_driver with the last arg Qnil.
+
+ * w32term.c (w32_encode_char): Call ccl_driver with the last arg
+ Qnil.
+
+2003-01-30 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fcharset_id_internal): New function.
+ (syms_of_charset): Defsubr it.
+
+ * coding.c (decode_coding_ccl, encode_coding_ccl): Call ccl_driver
+ with the last arg charset_list acquired from coding.
+ (Fdefine_coding_system_internal): For ccl-based coding system, fix
+ the attribute coding_attr_ccl_valids.
+
+ * coding.h (enum define_coding_ccl_arg_index): Set the first
+ member coding_arg_ccl_decoder to coding_arg_max.
+
+ * ccl.h (ccl_driver): Prototype adjusted.
+
+ * ccl.c (CCL_DECODE_CHAR, CCL_ENCODE_CHAR): New macros.
+ (ccl_driver): New arg CHARSET_LIST. Use the above macros instead
+ of DECODE_CAHR, ENCODE_CHAR, CHAR_CHARSET.
+ (Fccl_execute): Call ccl_driver with the last arg Qnil.
+ (Fccl_execute_on_string): Likewise.
+
+2003-01-11 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (ENCODE_CHAR): If the method is SUBSET or SUPERSET,
+ call encode_char.
+
+ * charset.c (encode_char): Fix handling of methods SUBSET and
+ SUPERSET.
+
+ * xterm.c (x_new_fontset): Fix previous change.
+
+2003-01-10 Dave Love <fx@gnu.org>
+
+ * composite.c (syms_of_composite): Make composition_hash_table
+ weak.
+
+2003-01-10 Kenichi Handa <handa@m17n.org>
+
+ * dispextern.h (check_face_attributes, generate_ascii_font_name)
+ (font_name_registry): Don't extern them.
+ (split_font_name_into_vector, build_font_name_from_vector): Extern
+ them.
+
+ * fontset.h (Qfontset): Don't extern it.
+ (new_fontset_from_font_name): Extern it.
+
+ * fontset.c: Give 8 extra slots to fontset objects.
+ (Qfontset_info): New variable.
+ (syms_of_fontset): Defsym it.
+ (FONTSET_FALLBACK): New macro.
+ (fontset_face): Try also the default fontset.
+ (make_fontset): Realize a fallback fontset from the default
+ fontset.
+ (generate_ascii_font_name): Moved from xfaces.c. Rewritten by
+ using split_font_name_into_vector and build_font_name_from_vector.
+ (Fset_fontset_font): Access the elements of font_spec by enum
+ FONT_SPEC_INDEX. If font_spec is a string, extract the registry
+ name by using split_font_name_into_vector.
+ (Fnew_fontset): If no ASCII font is specified in FONTLIST,
+ generate a proper font name from the fontset name. Update
+ Vfontset_alias_alist.
+ (n_auto_fontsets): New variable.
+ (new_fontset_from_font_name): New function.
+ (Ffont_info): Store the information about fonts generated from the
+ default fontset in the first extra slot of the returned
+ char-table.
+
+ * xfaces.c (generate_ascii_font_name): Moved to fontset.c.
+ (font_name_registry): Function deleted.
+ (split_font_name_into_vector): New function.
+ (build_font_name_from_vector): New function.
+ (font_list): The argument REGISTRY is now a list of registry
+ names.
+ (choose_face_font): If we are choosing an ASCII font, and ATTRS
+ specifies an explicit font name, return the name as is. Make a
+ list of registy names.
+
+ * xfns.c (x_set_font, x_create_tip_frame): Adjusted to the change
+ of x_new_fontset.
+ (Fx_create_frame): Don't call x_new_fontset here. Just use
+ x_list_fonts to check the existence of fonts.
+
+ * xterm.h (x_new_fontset): Prototype adjusted.
+
+ * xterm.c (x_new_fontset): Change the arg FONTSETNAME to Lisp
+ string. Use new_fontset_from_font_name to create a fontset from a
+ font name.
+
+2003-01-07 Dave Love <fx@gnu.org>
+
+ * Makefile.in: Fix some dependencies.
+
+ * keymap.c (Fapropos_internal): Don't gcpro apropos_predicate but
+ set it to nil before returning.
+
+ * composite.c (update_compositions): Fix type error.
+
+ * syntax.c (skip_chars, skip_syntaxes): Fix type errors.
+
+2003-01-07 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_new_font): Optimize for the case that the font is
+ already set for the frame.
+
+2003-01-06 Kenichi Handa <handa@m17n.org>
+
+ * chartab.c (char_table_ascii): Check if the char table contents
+ is sub-char-table or not.
+ (char_table_set): Fix argument to char_table_ascii.
+ (char_table_set_range): Likewise.
+
+ * coding.c (CATEGORY_MASK_RAW_TEXT): New macro.
+ (detect_coding_utf_8, detect_coding_utf_16)
+ (detect_coding_emacs_mule, detect_coding_iso_2022)
+ (detect_coding_sjis, detect_coding_big5)
+ (detect_coding_ccl, detect_coding_charset): Change argument MASK
+ to DETECT_INFO. Update DETECT_INFO and return 1 if the byte
+ sequence is valid in this coding system. Callers changed.
+ (MAX_ANNOTATION_LENGTH): New macro.
+ (ADD_ANNOTATION_DATA): New macro.
+ (ADD_COMPOSITION_DATA): Argument changed. Callers changed. Call
+ ADD_ANNOTATION_DATA. The format of annotation data changed.
+ (ADD_CHARSET_DATA): New macro.
+ (emacs_mule_char): New argument ID. Callers changed.
+ (decode_coding_emacs_mule, decode_coding_iso_2022)
+ (decode_coding_sjis, decode_coding_big5, decode_coding_charset):
+ Produce charset annotation data in coding->charbuf.
+ (encode_coding_emacs_mule, encode_coding_iso_2022): Pay attention
+ to charset annotation data in coding->charbuf.
+ (setup_coding_system): Add CODING_ANNOTATE_CHARSET_MASK
+ coding->common_flags if the coding system is iso-2022 based and
+ uses designation.
+ (produce_composition): Adjusted for the new annotation data
+ format.
+ (produce_charset): New function.
+ (produce_annotation): Handle charset annotation.
+ (handle_composition_annotation, handle_charset_annotation): New
+ functions.
+ (consume_chars): Handle charset annotation. Utilize the above two
+ functions.
+ (encode_coding_object): If SRC_OBJECT and DST_OBJECT are the same
+ buffer, get the deleted text as a string and set
+ coding->src_object to that string.
+ (detect_coding, detect_coding_system): Use the new struct
+ coding_detection_info.
+
+ * coding.h (struct coding_detection_info): New structure.
+ (struct coding_system): Prototype of the member `detector'
+ adjusted.
+ (CODING_ANNOTATE_CHARSET_MASK): New macro.
+
+2003-01-06 Kenichi Handa <handa@m17n.org>
+
+ * insdel.c (insert_from_gap): Fix argument to offset_intervals.
+
+2003-01-03 Dave Love <fx@gnu.org>
+
+ * keymap.c (apropos_predicate, apropos_accumulate): Declare
+ static.
+ (Fapropos_internal): Don't gcpro apropos_accumulate. Set result
+ to new local and nullify apropos_accumulate before returning.
+ (syms_of_keymap): Staticpro and initialize apropos_accumulate.
+
+2002-12-05 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fdefine_charset_internal): Setup charset.fast_map
+ correctly.
+
+2002-11-26 Dave Love <fx@gnu.org>
+
+ * fns.c (Flanginfo): Call synchronize_system_time_locale.
+
+2002-11-07 Kenichi Handa <handa@m17n.org>
+
+ The following changes are to make character composition happen
+ automatically on displaying.
+
+ * Makefile.in (lisp, shortlisp): Add composite.elc
+
+ * composite.h (Qauto_composed, Vauto_composition_function,
+ Qauto_composition_function): Extern them.
+
+ * composite.c (Vcomposition_function_table,
+ Qcomposition_function_table): Delete variables.
+ (Qauto_composed, Vauto_composition_function,
+ Qauto_composition_function): New variables.
+ (run_composition_function): Don't call
+ compose-chars-after-function.
+ (update_compositions): Clear `auto-composed' text property.
+ (compose_chars_in_text): Delete this function.
+ (syms_of_composite): Staticpro Qauto_composed and
+ Qauto_composition_function. Declare Vauto_composition_function as
+ a Lisp variable.
+
+ * dispextern.h (enum prop_idx): Add member AUTO_COMPOSED_PROP_IDX.
+
+ * xdisp.c (it_props): Add an entry for Qauto_composed.
+ (handle_auto_composed_prop): New function.
+
+ * xselect.c (selection_data_to_lisp_data): Don't call
+ compose_chars_in_text.
+
+2002-11-06 Dave Love <fx@gnu.org>
+
+ * keyboard.c (read_char): Modify checking around use of
+ Vkeyboard_translate_table.
+
+ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table
+ and fix C types.
+
+2002-11-06 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_utf_8): When eol_type is Qdos, handle
+ the case that the last byte is '\r' correctly.
+ (decode_coding_emacs_mule): Likewise.
+ (decode_coding_iso_2022): Likewise.
+ (decode_coding_sjis): Likewise.
+ (decode_coding_big5): Likewise.
+ (decode_coding_charset): Likewise.
+ (produce_chars): Likewise.
+ (decode_coding): Flushing out the unprocessed data correctly.
+ (decode_coding_gap): Set CODING_MODE_LAST_BLOCK bit of
+ coding->mode.
+
+2002-10-31 Dave Love <fx@gnu.org>
+
+ * xterm.c (XTread_socket): Fix changes for defined keysyms. Add
+ XK_ISO... case.
+ (xaw_scroll_callback): Revert last change.
+
+2002-10-30 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (Fset_charset_priority): Update
+ Viso_2022_charset_list.
+
+2002-10-29 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (Vface_resizing_fonts): New variable.
+ (struct font_name): New member `resizing_ratio'.
+ (font_resizing_ratio): New function.
+ (split_font_name): Set font->resizing_ratio.
+ (better_font_p): Pay attention to font->resizing_ratio.
+ (build_scalable_font_name): Likewise. Don't change POINT_SIZE,
+ RESX, and RESY fields.
+ (try_alternative_families): Try scalable fonts if
+ Vscalable_fonts_allowed is not Qt.
+ (syms_of_xfaces): Declare Vface_resizing_fonts as a Lisp variable.
+
+2002-10-29 Dave Love <fx@gnu.org>
+
+ * xterm.c (xaw_scroll_callback): Cast correctly.
+
+2002-10-28 Dave Love <fx@gnu.org>
+
+ * keyboard.c (lispy_accent_codes, lispy_accent_keys): Extend.
+ (lispy_kana_keys): Comment out.
+ (make_lispy_event) [XK_kana_A]: Comment out.
+
+ * xterm.c (xaw_scroll_callback): Cast call_data.
+ (XTread_socket): Deal with ASCII keysyms.
+ (syms_of_xterm) <Vx_keysym_table>: Fix args of make_hash_table.
+
+2002-10-27 Dave Love <fx@gnu.org>
+
+ * xterm.c (Vx_keysym_table): New.
+ (syms_of_xterm): Initialize it.
+ (XTread_socket): Use it.
+ From head: Eliminate incorrect optimization that tried to avoid
+ decoding the output of X*LookupString.
+ (x_get_font_repertory): Delete charset declaration.
+
+2002-10-16 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding): Fix previous change.
+ (detect_coding_charset): If only ASCII bytes are found, return 0.
+ (detect_coding_system): Fix previous change.
+ (Fdefine_coding_system_internal): Setup CODING_ATTR_ASCII_COMPAT
+ (attrs) correctly.
+
+2002-10-15 Dave Love <fx@gnu.org>
+
+ * coding.c (Fcheck_coding_system): Doc fix.
+
+ * editfns.c (Finsert_byte): Return a proper value.
+
+2002-10-14 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding): Fix args to translate_chars. Pay
+ attention to Vstandard_translation_table_for_decode.
+ (encode_coding): Fix args to translate_chars. Pay attention to
+ Vstandard_translation_table_for_encode.
+
+ * data.c (Faset): Check NEWELT by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * editfns.c (general_insert_function): Check VAL by ASCII_CHAR_P,
+ not by SINGLE_BYTE_CHAR_P.
+
+ * fns.c (concat): Check CH by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * insdel.c (copy_text): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+ * keymap.c (Ftext_char_description): Check C by ASCII_CHAR_P, not
+ by SINGLE_BYTE_CHAR_P.
+
+ * search.c (Freplace_match): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+
+2002-10-14 Dave Love <fx@gnu.org>
+
+ * fns.c (Fstring_as_multibyte, Fstring_to_multibyte): Doc fix.
+
+2002-10-10 Dave Love <fx@gnu.org>
+
+ * fns.c (Flanginfo): Fix typo.
+
+ * unexelf.c (unexec): Make last change conditional on Irix 6.5.
+
+2002-10-10 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (detect_coding_utf_8): Check incomplete byte sequence.
+ Don't update *mask when correctly detected.
+ (detect_coding_utf_16): Likewise.
+ (detect_coding_emacs_mule): Likewise.
+ (detect_coding_iso_2022): Likewise.
+ (detect_coding_sjis): Likewise.
+ (detect_coding_big5): Likewise.
+ (detect_coding_ccl): Likewise.
+ (decode_coding_sjis): Fix decoding of katakana-jisx0201.
+ (detect_eol): Delete the argument CODING, and add the argument
+ CATEGORY.
+ (detect_coding): Adjusted for the changes above.
+ (detect_coding_system): Likewise.
+
+2002-10-09 Kenichi Handa <handa@m17n.org>
+
+ * character.c (char_string): Renamed from
+ char_string_with_unification. Pay attention to
+ CHAR_MODIFIER_MASK.
+ (string_char): Renamed from string_char.
+
+ * character.h (CHAR_STRING): Call char_string if C is greater than
+ MAX_3_BYTE_CHAR.
+ (CHAR_STRING_ADVANCE): Likewise.
+ (STRING_CHAR): Call string_char instead of
+ string_char_with_unification.
+ (STRING_CHAR_AND_LENGTH): Likewise.
+ (STRING_CHAR_ADVANCE): Likewise.
+
+2002-10-09 Dave Love <fx@gnu.org>
+
+ * coding.c (decode_coding_utf_8): Treat surrogates as invalid.
+
+2002-10-07 Kenichi Handa <handa@m17n.org>
+
+ * keymap.c (push_key_description): Pay attention to
+ force_multibyte.
+
+ * regex.c (re_search_2): Fix for the case of unibyte buffer.
+
+2002-10-06 Dave Love <fx@gnu.org>
+
+ * charset.c (define_charset_internal): Rename `supprementary'.
+
+ * Makefile.in (lisp, shortlisp): Remove latin-N.
+
+2002-10-05 Dave Love <fx@gnu.org>
+
+ * xfns.c (x_window, x_window): Use use_xim.
+
+ * xterm.c (use_xim): Initialize.
+ (xim_open_dpy, xim_initialize, xim_close_dpy): Use use_xim.
+ (x_term_init): Maybe set use_xim.
+
+ * xterm.h (use_xim) [HAVE_X_I18N]: Declare.
+
+2002-10-01 Kenichi Handa <handa@m17n.org>
+
+ * search.c (search_buffer): Fix case-fold-search of multibyte
+ characters.
+ (boyer_moore): Rename the last argument to char_high_bits.
+
+2002-09-27 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (display_string): Fix for the case of zero width glyph.
+
+ * xfns.c (x_set_font): Change the error message of the case that
+ x_new_fontset returns Qt.
+
+ * xfaces.c (set_lface_from_font_name): Reject the default fontset.
+ (Finternal_set_lisp_face_attribute): Use signal_error for the
+ error of invalid fontset.
+
+ * xterm.c (x_new_fontset): If FONTSETNAME specifies the default
+ fontset, return Qt.
+
+2002-09-19 Kenichi Handa <handa@m17n.org>
+
+ * regex.c (re_search_2): Fix previous change.
+
+2002-09-18 Kenichi Handa <handa@m17n.org>
+
+ * syntax.c (skip_syntaxes): Fix previous change.
+
+2002-09-13 Kenichi Handa <handa@m17n.org>
+
+ * syntax.c (skip_chars): Fix previous change.
+ (skip_syntaxes): Fix previous change.
+
+2002-09-06 Dave Love <fx@gnu.org>
+
+ * config.in: Restore it.
+
+2002-09-05 Dave Love <fx@gnu.org>
+
+ * config.in: Removed (now auto-generated).
+
+ * s/usg5-4.h: Fix last change.
+
+ * unexelf.c (unexec): Make .got handling not SGI-specific.
+
+ * syntax.c (syms_of_syntax) <multibyte-syntax-as-symbol>: Doc fix.
+
+ * regex.c: Use `ifdef HAVE_ALLOCA_H', not `if HAVE_ALLOCA_H'.
+
+ * keyboard.c (read_key_sequence): Fix type error.
+
+ * buffer.c (Fset_buffer_multibyte, Fset_buffer_multibyte): Fix
+ type error.
+
+ * fontset.c (fontset_add): Return Lisp_Object.
+
+2002-09-03 Dave Love <fx@gnu.org>
+
+ * charset.h (charset_ordered_list_tick): Declare extern.
+
+2002-09-03 Kenichi Handa <handa@m17n.org>
+
+ The following changes (and some of 2002-08-20 changes of mine) are
+ for handling syntax, category, and case conversion for unibyte
+ characters by converting them to multibyte on the fly. With these
+ changes, we don't have to setup syntax and case tables for unibyte
+ characters in each language environment.
+
+ * abbrev.c (Fexpand_abbrev): Convert a unibyte character to
+ multibyte if necessary.
+
+ * bytecode.c (Fbyte_code): Likewise.
+
+ * character.h (LEADING_CODE_LATIN_1_MIN)
+ (LEADING_CODE_LATIN_1_MAX): New macros.
+ (unibyte_to_multibyte_table): Extern it.
+ (unibyte_char_to_multibyte): New macro.
+ (MAKE_CHAR_MULTIBYTE): Use unibyte_to_multibyte_table.
+ (CHAR_LEADING_CODE): New macro.
+ (FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE): New macro.
+
+ * character.c (unibyte_to_multibyte_table): New variable.
+ (unibyte_char_to_multibyte): Move to character.h and defined as
+ macro.
+ (multibyte_char_to_unibyte): If C is an eight-bit character,
+ convert it to the corresponding byte value.
+
+ * charset.c (Fset_unibyte_charset): If the dimension of CHARSET is
+ not 1, singals an error. Update the elements of
+ unibyte_to_multibyte_table.
+ (init_charset_once): Initialize unibyte_to_multibyte_table.
+ (syms_of_charset): Define the charset `iso-8859-1'.
+
+ * casefiddle.c (casify_object): Fix previous change.
+
+ * cmds.c (internal_self_insert): In a multibyte buffer, insert C
+ as is without converting it to unibyte. In a unibyte buffer,
+ convert C to multibyte before checking the syntax.
+
+ * lisp.h (unibyte_char_to_multibyte): Extern deleted.
+
+ * minibuf.c (Fminibuffer_complete_word): Use the macro
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE.
+
+ * regex.h (struct re_pattern_buffer): New member target_multibyte.
+
+ * regex.c (RE_TARGET_MULTIBYTE_P): New macro.
+ (GET_CHAR_BEFORE_2): Check target_multibyte, not multibyte. If
+ that is zero, convert an eight-bit char to multibyte.
+ (MAKE_CHAR_MULTIBYTE, CHAR_LEADING_CODE): New dummy new macros for
+ non-emacs case.
+ (PATFETCH): Convert an eight-bit char to multibyte.
+ (HANDLE_UNIBYTE_RANGE): New macro.
+ (regex_compile): Setup the compiled pattern for multibyte chars
+ even if the given regex string is unibyte. Use PATFETCH_RAW
+ instead of PATFETCH in many places. To handle `charset'
+ specification of unibyte, call HANDLE_UNIBYTE_RANGE. Use bitmap
+ only for ASCII chars.
+ (analyse_first) <exactn>: Simplified because the compiled pattern
+ is multibyte.
+ <charset_not>: Setup fastmap from bitmap only for ASCII chars.
+ <charset>: Use CHAR_LEADING_CODE to get leading codes.
+ <categoryspec>: If multibyte, setup fastmap only for ASCII chars
+ here.
+ (re_compile_fastmap) [emacs]: Call analyse_first with the arg
+ multibyte always 1.
+ (re_search_2) In emacs, set the locale variable multibyte to 1,
+ otherwise to 0. New local variable target_multibyte. Check it
+ to decide the multibyteness of STR1 and STR2. If
+ target_multibyte is zero, convert unibyte chars to multibyte
+ before translating and checking fastmap.
+ (TARGET_CHAR_AND_LENGTH): New macro.
+ (re_match_2_internal): In emacs, set the locale variable multibyte
+ to 1, otherwise to 0. New local variable target_multibyte. Check
+ it to decide the multibyteness of STR1 and STR2. Use
+ TARGET_CHAR_AND_LENGTH to fetch a character from D.
+ <charset, charset_not>: If multibyte is nonzero, check fastmap
+ only for ASCII chars. Call bcmp_translate with
+ target_multibyte, not with multibyte.
+ <begline>: Declare the local variable C as `unsigned'.
+ (bcmp_translate): Change the last arg name to target_multibyte.
+
+ * search.c (compile_pattern_1): Don't adjust the multibyteness of
+ the regexp pattern and the matching target. Set cp->buf.multibyte
+ to the multibyteness of the regexp pattern. Set
+ cp->but.target_multibyte to the multibyteness of the matching
+ target.
+ (wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of
+ FETCH_STRING_CHAR_ADVANCE.
+ (Freplace_match): Convert unibyte chars to multibyte.
+
+ * syntax.c (char_quoted): Use FETCH_CHAR_AS_MULTIBYTE to convert
+ unibyte chars to multibyte.
+ (back_comment): Likewise.
+ (scan_words): Likewise.
+ (skip_chars): The arg syntaxp is deleted, and the code for
+ handling syntaxes is moved to skip_syntaxes. Callers changed.
+ Fix the case that the multibyteness of STRING and the current
+ buffer doesn't match.
+ (skip_syntaxes): New function.
+ (SYNTAX_WITH_MULTIBYTE_CHECK): Check C by ASCII_CHAR_P, not by
+ SINGLE_BYTE_CHAR_P.
+ (Fforward_comment): Use FETCH_CHAR_AS_MULTIBYTE to convert unibyte
+ chars to multibyte.
+ (scan_lists): Likewise.
+ (Fbackward_prefix_chars): Likewise.
+ (scan_sexps_forward): Likewise.
+
+2002-08-23 Kenichi Handa <handa@m17n.org>
+
+ * xfaces.c (QCfontset): New variable.
+ (LFACE_FONTSET): New macro.
+ (check_lface_attrs): Check also LFACE_FONTSET_INDEX.
+ (set_lface_from_font_name): Setup LFACE_FONTSET (lface).
+ (Finternal_set_lisp_face_attribute): Handle QCfontset.
+ (Finternal_get_lisp_face_attribute): Likewise.
+ (lface_same_font_attributes_p): Fix checking of LFACE_FONT_INDEX,
+ check also LFACE_FONTSET_INDEX.
+ (face_fontset): Check attrs[LFACE_FONTSET_INDEX], not
+ attrs[LFACE_FONT_INDEX].
+ (syms_of_xfaces): Intern and staticpro QCfontset.
+
+ * dispextern.h (enum lface_attribute_index): New member
+ LFACE_FONTSET_INDEX.
+
+ * fns.c (base64_encode_1): Handle eight-bit chars correctly.
+
+2002-08-21 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (coding_set_destination): Fix coding->destination for
+ the case converting a region.
+ (encode_coding_utf_8): Encode eight-bit chars as single byte.
+ (encode_coding_object): Fix coding->dst_pos and
+ coding->dst_pos_byte for the case converting a region.
+
+ * insdel.c (insert_from_gap): Make it work even if PT != GTP.
+
+ * character.h (BYTE8_STRING): New macro.
+
+ * fns.c (base64_decode_1): Insert eight-bit chars correctly.
+
+2002-08-20 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (get_next_display_element): Don't display unibyte 8-bit
+ characters by octal form.
+
+ * abbrev.c (Fexpand_abbrev): Fix for the multibyte case.
+
+ * buffer.h (_fetch_multibyte_char_len): Extern deleted.
+ (FETCH_MULTIBYTE_CHAR): Don't use _fetch_multibyte_char_len.
+ (BUF_FETCH_MULTIBYTE_CHAR): Likewise.
+ (FETCH_CHAR_AS_MULTIBYTE): New macro.
+
+ * casetab.c (set_canon, set_identity, shuffle): Simplified.
+
+ * casefiddle.c (casify_object): Simplified. Handle the case that
+ the case conversion change the byte length.
+ (casify_region): Likewise
+
+ * character.h (MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New
+ macros.
+
+ * character.c (_fetch_multibyte_char_len): This variable deleted.
+ (syms_of_character): Setup Vprintable_chars.
+
+ * editfns.c (Fchar_equal): Fix for the unibyte case.
+ (Finsert_byte): New function.
+ (syms_of_editfns): Defsubr it.
+
+ * keyboard.c (read_key_sequence): Use ~CHAR_MODIFIER_MASK instead
+ of direct code 0x3ffff.
+
+ * search.c (Freplace_match): Fix for the unibyte case.
+
+2002-08-19 Kenichi Handa <handa@m17n.org>
+
+ * lread.c (safe_to_load_p): Fix the logic.
+
+ * syntax.c (scan_words): Don't treat characters belonging to
+ different scripts as constituting a word.
+
+ * editfns.c (Fformat): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+
+ * fontset.c (Fset_fontset_font): Treat `ascii' as charset, not
+ script.
+
+ * emacs.c (main): In the case of --unibyte, instead of aborting on
+ finding non-empty buffer, make it unibyte.
+
+2002-08-18 Kenichi Handa <handa@m17n.org>
+
+ * xterm.c (x_new_fontset): Call `create-fontset-from-ascii-font'
+ to create a fontset.
+
+2002-08-18 Dave Love <fx@gnu.org>
+
+ * character.c (Funibyte_char_to_multibyte): Doc fix.
+
+ * xfns.c [HAVE_STDLIB_H]: Fix last change.
+
+2002-08-15 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (fontset_add): Make the type `int'.
+ (fontset_id_valid_p): Define it if FONTSET_DEBUG is defined.
+
+ * character.c (unibyte_char_to_multibyte): Refer to
+ charset_unibyte, not charset_primary.
+ (multibyte_char_to_unibyte): Likewise.
+ (Funibyte_char_to_multibyte): Likewise.
+
+ * charset.h: (charset_unibyte): Extern it instead of
+ charset_primary.
+
+ * charset.c (charset_unibyte): Renamed from charset_primary.
+ (Funibyte_charset): Renamed from Fprimary_charset.
+ (Fset_unibyte_charset): Renamed from Fset_primary_charset.
+ (syms_of_charset): Adjusted for the above changes.
+
+ * w32term.c (x_produce_glyphs): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when
+ it->multibyte_p is zero.
+
+ * lisp.h (nonascii_insert_offset, Vnonascii_translation_table):
+ Extern deleted.
+
+2002-08-08 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Fdefine_coding_system_internal): Fix category setting
+ for a coding system of type iso-2022.
+
+2002-08-02 Kenichi Handa <handa@m17n.org>
+
+ * fontset.h (FS_LOAD_FONT): Call fs_load_font with the arg CHARSET
+ -1.
+
+2002-08-01 Kenichi Handa <handa@m17n.org>
+
+ * syntax.c (Vnext_word_boundary_function_table): New variable.
+ (syms_of_syntax): Declare it as a Lisp variable.
+ (scan_words): Call functions in Vnext_word_boundary_function_table
+ if any.
+
+ * xterm.c (x_load_font): Initialize fontp->fontset to -1.
+
+ * fontset.c (fs_load_font): If fontp->charset is not negative,
+ return fontp without setting its members.
+
+2002-07-31 Dave Love <fx@gnu.org>
+
+ * config.in: Generated with autoheader.
+
+ * xfns.c [HAVE_STDLIB_H]: Change logic (instead of fixing typo).
+
+ * m/sparc.h (HAVE_ALLOCA): Delete.
+
+ * s/irix6-5.h: Don't include strings.h.
+ (bcopy, bzero, bcmp): Don't undef.
+
+ * s/irix6-0.h (bcopy, bzero, bcmp): Don't undef.
+
+ * s/usg5-4.h (NO_SIOCTL_H): Don't define.
+ (TIOCSIGSEND): Don't test IRIX6.
+ (bcopy, bzero, bcmp): Define conditionally.
+
+2002-07-31 Kenichi Handa <handa@m17n.org>
+
+ * buffer.c (Qas, Qmake, Qto): New variables.
+ (Fset_buffer_multibyte): New optional arg METHOD. Caller changed.
+ (syms_of_buffer): Intern and staticpro Qas, Qmake, and Qto.
+
+ * callproc.c (Fcall_process): Don't call insert_1_both directly if
+ we are inserting a process output into a multibyte buffer.
+
+ * character.h (CHAR_TO_BYTE8): If C is not eight-bit char, call
+ multibyte_char_to_unibyte.
+
+ * character.c (Funibyte_char_to_multibyte): If C can't be decoded
+ by the primary charset, make it eight-bit char.
+ (Fmultibyte_char_to_unibyte): Call CHAR_TO_BYTE8.
+
+ * charset.c: (charset_eight_bit, Qeight_bit_control): New
+ variables.
+ (charset_8_bit__control, charset_8_bit_graphic,
+ Qeight_bit_control, Qeight_bit_graphic): These variables deleted.
+ (define_charset_internal): New function.
+ (syms_of_charset): Call define_charset_internal for pre-defined
+ charsets.
+
+ * charset.h (charset_8_bit): Extern it.
+
+ * coding.c (make_conversion_work_buffer): Adjusted for the change
+ of Fset_buffer_multibyte.
+ (encode_coding_raw_text): Increment p0 in the loop.
+
+ * lisp.h (Fset_buffer_multibyte): Prototype adjusted.
+
+ * xdisp.c (setup_echo_area_for_printing, set_message_1): Adjusted
+ for the change of Fset_buffer_multibyte.
+
+ * fns.c (Fstring_to_multibyte): New function.
+ (syms_of_fns): Declare Fstring_to_multibyte as Lisp subroutine.
+
+2002-07-30 Dave Love <fx@gnu.org>
+
+ * xfns.c (x_put_x_image): Declare args.
+
+ * xfaces.c (font_name_registry, choose_face_font): Delete unused
+ vars.
+ (try_font_list): Declare an arg.
+
+ * xdisp.c (message2_nolog, set_message): Declare an arg.
+
+ * terminfo.c (tparam): Declare an arg. Use P_ to declare tparm.
+
+ * syntax.c (scan_sexps_forward): Declare an arg.
+
+ * scroll.c (calculate_scrolling, calculate_direct_scrolling):
+ Declare an arg.
+
+ * lisp.h (Fnew_fontset): Declare.
+
+ * keymap.c (push_key_description): Call CHARACTERP correctly.
+
+ * fontset.c (fontset_add): Declare args. Call make_number
+ correctly.
+ (face_for_char): Delete unused vars.
+ (Fset_fontset_font): Doc fix. Delete unused vars.
+
+ * doc.c (Fsubstitute_command_keys): Delete unused vars.
+
+ * composite.c (update_compositions): Declare arg.
+
+ * cm.c (calccost, cmgoto): Declare args.
+
+ * charset.c: Remove `emacs' conditional. Doc fixes.
+ (map_char_table_for_charset): Declare.
+
+ * character.c (syms_of_character) <translation-table-vector>: Doc
+ fix.
+
+ * ccl.c: Remove `emacs' conditional. Include hash table stuff
+ from trunk.
+
+2002-07-26 Kenichi Handa <handa@m17n.org>
+
+ The following changes are to allow specifying multiple font
+ patterns for a character range (specified by script or charset).
+
+ * Makefile.in (abbrev.o): Depend on syntax.h.
+ (xfaces.o): Depend on charset.h.
+
+ * alloc.c (Fmake_string): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P.
+
+ * ccl.c (Fccl_execute_on_string): Add `const' to local variables.
+
+ * character.h (Vchar_script_table): Extern it.
+
+ * character.c (Vscript_alist): This variable deleted.
+ (Vchar_script_table, Qchar_script_table): New variable.
+ (syms_of_character): Declare Vchar_script_table as a lisp variable
+ and initialize it.
+
+ * chartab.c (Fmake_char_table): Doc fixed. If PURPOSE doesn't
+ have property char-table-extra-slots, make no extra slot.
+
+ * dispextern.h (struct face): Member `charset' deleted.
+ (FACE_SUITABLE_FOR_CHAR_P): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P.
+ (FACE_FOR_CHAR): Likewise.
+ (choose_face_font, lookup_non_ascii_face, font_name_registry): Add
+ prototypes
+ (lookup_face, lookup_named_face, lookup_derived_face): Prototype
+ fixed.
+ (generate_ascii_font_name): Renamed from generate_ascii_font.
+
+ * fontset.h (get_font_repertory_func): New prototype.
+ (make_fontset_for_ascii_face, fs_load_font): Prototypes fixed.
+ (FS_LOAD_FONT): Call fs_load_font with the 3rd arg charset_ascii.
+
+ * fontset.c (Qprepend, Qappend): New variables.
+ (FONTSET_CHARSET_ALIST, FONTSET_FACE_ALIST): These macros deleted.
+ (FONTSET_NOFONT_FACE, FONTSET_REPERTORY): New macros.
+ (FONTSET_REF): Optimize if FONTSET is Vdefault_fontset.
+ (FONTSET_REF_AND_RANGE, FONTSET_ADD): New macros.
+ (fontset_ref_and_range, fontset_add, reorder_font_vector)
+ (load_font_get_repertory): New functions.
+ (fontset_set): This function deleted.
+ (fontset_face): New arg FACE. Return face ID, not face.
+ Completely re-written to handle new fontset structure. Caller
+ changed.
+ (free_face_fontset): Use ASET istead of AREF (X) = Y.
+ (face_for_char): Don't call lookup_face.
+ (make_fontset_for_ascii_face): New arg FACE.
+ (fs_load_font): New arg CHARSET_ID. Don't check
+ Vfont_encoding_alist here.
+ (find_font_encoding): New function.
+ (list_fontsets): Use STRINGP, not ! NILP.
+ (accumulate_script_ranges): New function.
+ (Fset_fontset_font, Fnew_fontset, Ffontset_info): Completely
+ re-written to handle new fontset structure.
+ (Ffontset_font): Return a copy of element.
+ (syms_of_fontset): Define symbols Qprepend and Qappend. Fix
+ docstring of font-encoding-alist.
+
+ * lisp.h (CHAR_TABLE_REF): Remove unnecessary check (IDX >= 0).
+ (Fset_fotset_font): Fix arguments to 5.
+
+ * msdos.c (XMenuActivate): Adjuted for the change of
+ lookup_derived_face.
+
+ * xdisp.c (message_dolog, set_message_1, extend_face_to_end_of_line):
+ Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+ (highlight_trailing_whitespace): Adjusted for the change of
+ lookup_named_face.
+
+ * xfaces.c: Include charset.h.
+ (load_face_font): Argument C deleted. Caller changed.
+ (generate_ascii_font_name): Renamed from generate_ascii_font.
+ (font_name_registry): New function.
+ (cache_face): Store ascii faces before non-ascii faces in buckets.
+ (lookup_face): Arguments C and BASE_FACE deleted. Caller changed.
+ Lookup only ascii faces.
+ (lookup_non_ascii_face): New function.
+ (lookup_named_face): Argument C deleted. Caller changed.
+ (lookup_derived_face): Argument C deleted. Caller changed.
+ (try_font_list): New arg PATTERN. Caller changed. If PATTERN is
+ a string, just call font_list with it.
+ (choose_face_font): Arguments FACE and C deleted. New arg
+ FONT_SPEC. Caller changed.
+ (realize_face): Arguments C and BASE_FACE deleted. Caller
+ (realize_x_face): Likewise.
+ (realize_non_ascii_face): New function.
+ (realize_x_face): Call load_face_font here.
+ (realize_tty_face): Argument C deleted. Caller changed.
+ (compute_char_face): If CH is not ascii, call FACE_FOR_CHAR to
+ get a face ID.
+ (dump_realized_face): Don't print charset of FACE.
+
+ * xfns.c (x_set_font): Always call x_new_fontset and
+ store_frame_parameter.
+ (Fx_create_frame): Call x_new_fontset, not x_new_font.
+ (syms_of_xfns): Set get_font_repertory_func to
+ x_get_font_repertory.
+
+ * xterm.h (x_get_font_repertory): Extern it.
+
+ * xterm.c (x_produce_glyphs): Use ASCII_CHAR_P, not
+ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when
+ it->multibyte_p is zero.
+ (XTread_socket): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P.
+ (x_new_fontset): If FONTSETNAME doesn't match any existing
+ fontsets, create a new one.
+ (x_get_font_repertory): New function.
+
+2002-07-25 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (Ffind_coding_systems_region_internal): Detect an
+ ASCII only string correctly.
+
+ * lread.c (Fload): Don't load with Qload_force_doc_strings t if
+ version is 0.
+
+2002-07-24 Kenichi Handa <handa@m17n.org>
+
+ * lread.c: Include "coding.h".
+ (Qget_emacs_mule_file_char, Qload_force_doc_strings,
+ load_each_byte, unread_char): New variables.
+ (readchar_backlog): This variable deleted.
+ (readchar): Return a character unless load_each_byte is nonzero.
+ Handle the case that readcharfun is Qget_emacs_mule_file_char or a
+ cons. If unread_char is not -1, simply return it.
+ (unreadchar): Handle the case that readcharfun is
+ Qget_emacs_mule_file_char or a cons. Set unread_char if
+ necessary.
+ (read_multibyte): This function deleted.
+ (readbyte_for_lambda, readbyte_from_file, readbyte_from_string)
+ (read_emacs_mule_char): New functions.
+ (Fload): Even if the file doesn't have the extention ".elc", if
+ safe_to_load_p returns a positive version number, assume that the
+ file contains bytecompiled code. If the version is less than 22,
+ load the file while decoding multibyte sequences by emacs-mule.
+ (readevalloop): Don't use readchar_backlog.
+ (Fread): Likewise. Pay attention to the case that STREAM is a
+ cons.
+ (Fread_from_string): Pay attention to the case that STREAM is a
+ cons.
+ (read_escape): The arg BYTEREP deleted.
+ (read1): Set load_each_byte to 1 temporarily while handling
+ #@NUMBER. Don't call read_multibyte.
+ (read_vector): Call Fread with a cons. If readcharfun is
+ Qget_emacs_mule_file_char, decode the read string by emacs-mule.
+ (read_list): If doc_reference is 2, make the cdr part string as
+ unibyte.
+ (syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char
+ and Qload_force_doc_strings.
+
+2002-07-23 Kenichi Handa <handa@m17n.org>
+
+ * xdisp.c (face_before_or_after_it_pos): Call
+ FETCH_MULTIBYTE_CHAR with byte postion, not char position.
+
+2002-07-22 Kenichi Handa <handa@m17n.org>
+
+ * character.h (TRAILING_CODE_P): New macro.
+ (MAYBE_UNIFY_CHAR): Adjusted for the change of Funify_charset.
+ (string_char_with_unification): Fix prototype.
+ (Vscript_alist): Extern it.
+
+ * character.c (Vscript_alist): New variable.
+ (string_char_with_unification): Add `const' to local variables.
+ (str_as_unibyte): Likewise.
+ (string_escape_byte8): Likewise.
+ (syms_of_character): Declare script-alist as a Lisp variable.
+
+ * charset.h (Vcharset_ordered_list): Extern it.
+ (charset_ordered_list_tick): Extern it.
+ (EMACS_MULE_LEADING_CODE_PRIVATE_11)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_21)
+ (EMACS_MULE_LEADING_CODE_PRIVATE_22): New macros
+ (Funify_charset): Adjusted for the change of Funify_charset.
+
+ * charset.c (charset_ordered_list_tick): New variable.
+ (Fdefine_charset_internal): Increment charset_ordered_list_tick.
+ (Funify_charset): New optional arg DEUNIFY. If it is non-nil,
+ deunify intead of unify a charset.
+ (string_xstring_p): Add `const' to local variables.
+ (find_charsets_in_text): Add `const' to arguemnts and local
+ variables.
+ (encode_char): Adjusted for the change of Funify_charset. Fix
+ detecting of invalid code.
+ (Fset_charset_priority): Increment charset_ordered_list_tick.
+ (Fmap_charset_chars): Fix handling of default value for FROM_CODE
+ and TO_CODE.
+
+ * coding.c (LEADING_CODE_PRIVATE_11, LEADING_CODE_PRIVATE_12)
+ (LEADING_CODE_PRIVATE_21, LEADING_CODE_PRIVATE_22): Macros
+ deleted. Callers changed to use
+ EMACS_MULE_LEADING_CODE_PRIVATE_11, etc.
+ (decode_coding_ccl): Add `const' to local variables.
+ (consume_chars): Likewise.
+ (Ffind_coding_systems_region_internal): Likewise.
+ (Fcheck_coding_systems_region): Likewise.
+
+ * print.c (print_object): Use octal form for printing the
+ contents of a bool vector.
+
+2002-07-18 Dave Love <fx@gnu.org>
+
+ * lread.c (Fload) <!load_dangerous_libraries>: Don't leak fd.
+ <version == 20>: Refuse to load.
+
+2002-07-17 Dave Love <fx@gnu.org>
+
+ * fns.c: Move coding.h.
+ (Qcodeset, Qdays, Qmonths): New.
+ (concat): Use CHARACTERP instead of INTERGERP.
+ (Flocale_codeset): Deleted.
+ (Flanginfo): New function.
+ (syms_of_fns): Changed accordingly.
+
+ * coding.c (adjust_coding_eol_type): Fix eol_type/eol_seen mixup.
+
+2002-07-16 Dave Love <fx@gnu.org>
+
+ * casetab.c (init_casetab_once, init_casetab_once): Fix
+ CHAR_TABLE_SET call.
+
+ * category.c (Fmodify_category_entry): Fix CATEGORY_MEMBER call.
+
+ * character.c (syms_of_character): Fix CHAR_TABLE_SET call.
+
+ * charset.c (Fmap_charset_chars): Check args. Convert Lisp types.
+ (load_charset_map, Fdeclare_equiv_charset, Fencode_char)
+ (Fset_charset_priority, syms_of_charset): Convert Lisp types.
+
+ * charset.h (CHECK_CHARSET_GET_ID): Use XINT on AREF result.
+
+ * coding.c (ENCODE_DESIGNATION, decode_eol)
+ (make_conversion_work_buffer, code_conversion_restore)
+ (Fdefine_coding_system_internal): Convert Lisp types.
+ (code_conversion_restore): Use EQ, not ==.
+ (Fencode_coding_string): Fix code_convert_string call.
+
+ * coding.h (code_convert_region): Fix prototype.
+
+ * dispextern.h (redraw_frame, redraw_garbaged_frames): Removed.
+
+ * fontset.c (fontset_ref, fontset_set, fs_load_font)
+ (Ffontset_info): Convert Lisp types.
+
+ * syntax.h (SYNTAX_ENTRY_INT): Don't use make_number.
+
+ * xterm.c (note_mouse_movement): Fix call of window_from_coordinates.
+
+ * xdisp.c (display_mode_element): Fix call of Fset_text_properties.
+
+ * chartab.c: Include "...h", not <...h> in some cases.
+
+ * callproc.c (Fcall_process): Remove unused variables.
+
+2002-07-12 Dave Love <fx@gnu.org>
+
+ * coding.c (Fset_coding_system_priority): Allow null arg list.
+
+2002-07-03 Dave Love <fx@gnu.org>
+
+ * minibuf.c (Fminibuffer_complete_word): Remove unused var.
+ (Fself_insert_and_exit): Use CHARACTERP.
+
+ * callproc.c (Fcall_process): Remove unused vars.
+
+ * xterm.c (XTread_socket): Add extra dead keysyms.
+
+ * xdisp.c (decode_mode_spec_coding): Use CHARACTERP.
+
+ * dispextern.h: Remove prototypes for redraw_frame,
+ redraw_garbaged_frames.
+
+ * cmds.c (Fself_insert_command): Use CHARACTERP.
+
+ * chartab.c (make_sub_char_table): Remove unused var.
+ (Fset_char_table_default, Fmap_char_table): Doc fix.
+
+ * keymap.c (access_keymap): Remove generic char code.
+ (push_key_description): Use CHARACTERP.
+
+2002-07-01 Dave Love <fx@gnu.org>
+
+ * charset.c: Doc fixes.
+ (Funify_charset): Extra checking.
+
+2002-06-24 Dave Love <fx@gnu.org>
+
+ * lread.c: Remove some unused variables.
+ (safe_to_load_p): If safe, return the magic number version byte.
+ (Fload): Maybe use load-with-code-conversion.
+
+2002-06-12 Kenichi Handa <handa@m17n.org>
+
+ * category.c (Fmodify_category_entry): Don't modify the contents
+ of category_set for characters out of the range. Avoid
+ unnecessary modification.
+
+ * character.h (MAYBE_UNIFY_CHAR): Adjusted for the change of
+ Vchar_unify_table. The default value of the table is now nil.
+
+ * character.c (syms_of_character): Setup Vchar_width_table for
+ eight-bit-control and raw-byte chars.
+
+ * charset.h (enum define_charset_arg_index): Delete
+ charset_arg_parents and add charset_arg_subset and
+ charset_arg_superset.
+ (enum charset_attr_index): Delete charset_parents and add
+ charset_subset and charset_superset.
+ (enum charset_method): Delete CHARSET_METHOD_INHERIT and add
+ CHARSET_METHOD_SUBSET and CHARSET_METHOD_SUPERSET.
+ (CHARSET_ATTR_PARENTS, CHARSET_PARENTS): Macros deleted.
+ (CHARSET_ATTR_SUBSET, CHARSET_ATTR_SUPERSET, CHARSET_SUBSET)
+ (CHARSET_SUPERSET): New macros.
+ (charset_work): Extern it.
+ (ENCODE_CHAR): Use charset_work.
+ (CHAR_CHARSET_P): Adjusted for the change of encoder format.
+ (map_charset_chars): Extern it.
+
+ * charset.c (load_charset_map): Set the default value of encoder
+ and deunifier char-tables to nil.
+ (map_charset_chars): Argument changed. Callers changed. Use
+ map_char_table_for_charset instead of map_char_table.
+ (Fmap_charset_chars): New optional args from_code and to_code.
+ (Fdefine_charset_internal): Adjusted for the change of
+ `define-charset' (:parents -> :subset or :superset).
+ (charset_work): New variable.
+ (encode_char): Adjusted for the change of
+ Fdefine_charset_internal.
+ (syms_of_charset): Likewise.
+ (Ffind_charset_string): Setup the vector `charsets' correctly.
+
+ * chartab.c (sub_char_table_ref_and_range): New arg defalt. Fix
+ the previous change.
+ (char_table_ref_and_range): Adjusted for the above change.
+ (map_sub_char_table_for_charset): New function.
+ (map_char_table_for_charset): New function.
+
+ * keymap.c (describe_vector): Handle a char-table directly here.
+ (describe_char_table): Deleted.
+
+ * lisp.h (map_charset_chars): Deleted.
+
+2002-06-11 Dave Love <fx@gnu.org>
+
+ * fns.c (count_combining): Comment out (unused).
+ (Flocale_codeset): New.
+ (syms_of_fns): Defsubr it.
+
+ * config.in (HAVE_PTY_H, HAVE_SIZE_T, HAVE_LANGINFO_CODESET): New.
+ (size_t): Removed.
+
+2002-06-06 Dave Love <fx@gnu.org>
+
+ * Makefile.in (chartab.o): Depend on charset.h
+
+2002-06-03 Kenichi Handa <handa@m17n.org>
+
+ * character.c (syms_of_character): Set the default value of
+ Vprintable_chars to Qnil.
+
+2002-05-31 Dave Love <fx@gnu.org>
+
+ * Makefile.in (lisp, shortlisp): Change indian.elc to indian.el.
+
+2002-05-31 Kenichi Handa <handa@m17n.org>
+
+ * charset.c (load_charset_map): Handle the case that from < to
+ correctly.
+
+ * coding.c (encode_coding_emacs_mule): Pay attention to raw-8-bit
+ chars.
+ (encode_coding_iso_2022): Likewise.
+ (encode_coding_sjis): Likewise.
+ (encode_coding_big5): Likewise.
+ (encode_coding_charset): Likewise.
+
+2002-05-30 Kenichi Handa <handa@m17n.org>
+
+ * Makefile.in (lisp): Change chinese.elc to chinese.el. They are
+ not bytecompiled now.
+ (shortlisp): Likewise.
+
+ * charset.c (charset_jisx0201_roman, charset_jisx0208_1978)
+ (charset_jisx0208): New variables.
+ (Fdefine_charset_internal): Setup them if appropriate.
+ (init_charset_once): Initialize them to -1.
+
+ * charset.h (charset_jisx0201_roman, charset_jisx0208_1978,
+ charset_jisx0208): Extern them.
+
+ * coding.c (CODING_ISO_FLAG_USE_ROMAN): New macro
+ (CODING_ISO_FLAG_USE_OLDJIS): New macro.
+ (CODING_ISO_FLAG_FULL_SUPPORT): Macro definition changed.
+ (setup_iso_safe_charsets): Fix arguemtns to Fassq.
+ (DECODE_DESIGNATION): Pay attention to CODING_ISO_FLAG_USE_ROMAN
+ and CODING_ISO_FLAG_USE_OLDJIS.
+ (ENCODE_ISO_CHARACTER_DIMENSION1): Likewise.
+ (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
+ (encode_coding_iso_2022): Change the 1st arg to
+ ENCODE_ISO_CHARACTER to a variable.
+
+2002-05-29 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (enum define_charset_arg_index): New enums
+ charset_arg_min_code and charset_arg_max_code.
+ (struct charset): New member char_index_offset.
+
+ * charset.c (CODE_POINT_TO_INDEX): Take charset->char_index_offset
+ into account.
+ (INDEX_TO_CODE_POINT): Likewise.
+ (Fdefine_charset_internal): Handle args[charset_arg_min_code] and
+ args[charset_arg_max_code]. Setup charset.char_index_offset.
+ (syms_of_charset): Fix args to Fdefine_charset_internal.
+
+2002-05-27 Dave Love <fx@gnu.org>
+
+ * coding.c (decode_coding_utf_8): Reject overlong sequences.
+
+2002-05-26 Dave Love <fx@gnu.org>
+
+ * coding.c: Doc fixes.
+ (Fcoding_system_aliases): Fix return value.
+ (Qmac): Remove (duplicated) definition.
+
+2002-05-25 Dave Love <fx@gnu.org>
+
+ * charset.c (Fcharset_priority_list, Fset_charset_priority): New
+ functions.
+
+ * character.c (Fstring): Doc fix.
+
+ * charset.c (Fdefine_charset_alias): Update Vcharset_list.
+
+ * fontset.c (Ffontset_info): Doc fix. Return charset names, not
+ ids.
+ (font-encoding-alist): Doc fix.
+
+2002-05-24 Dave Love <fx@gnu.org>
+
+ * term.c (costs_set): Declare static, non-initialized for pcc.
+ (encode_terminal_code): Remove ensued var.
+
+ * keyboard.c (kbd_buffer_store_event): Fix interrupt_signal decl
+ for K&R.
+
+ * xterm.c (xlwmenu_window_p): Fix prototype for K&R.
+
+ * coding.c (setup_iso_safe_charsets): Fix arg decl for K&R.
+ (suffixes): Moved out of make_subsidiaries for K&R.
+
+ * charset.c (map_charset_chars): Fix c_function declaration for
+ K&R.
+
+ * lisp.h (DEFUN) [!PROTOTYPES]: Remove spurious `args'.
+
+2002-05-23 Dave Love <fx@gnu.org>
+
+ * data.c (Fchar_or_string_p): Doc fix. Use CHARACTERP.
+
+ * category.c (Fmodify_category_entry): Doc fix. Remove unused
+ vars.
+
+2002-05-23 Yong Lu <lyongu@asia-infonet.com>
+
+ * charset.c (Fdefine_charset_internal): Fix argument to bzero.
+
+ * coding.c (Fdefine_coding_system_internal): Fix previous change.
+ (decode_coding_charset): Workaround for the bug of GCC 2.96.
+
+2002-05-23 Kenichi Handa <handa@m17n.org>
+
+ * Makefile.in (lisp): Change cyrillic.elc to cyrillic.el,
+ vietnamese.elc to vietnamese.el. They are not bytecompiled now.
+ (shortlisp): Likewise.
+
+2002-05-22 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_charset): Adjusted for the change of
+ Fdefine_coding_system_internal.
+ (Fdefine_coding_system_internal): For a coding system of
+ `charset' type, store a list of charset IDs in
+ `charset_attr_charset_valids' element of coding attributes.
+
+ * charset.c (Fmake_char): Fix previous change.
+
+2002-05-21 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
+ (emacs_mule_char): New arg src. Delete arg `composition'. Caller
+ changed. Handle 2-byte and 3-byte charsets correctly.
+ (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from
+ DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed.
+ (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro.
+ (DECODE_EMACS_MULE_21_COMPOSITION): Call
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation
+ sequence.
+ (decode_coding_emacs_mule): Handle composition correctly. Rewind
+ `src' and `consumed_chars' correctly before calling
+ emacs_mule_char.
+ (DECODE_COMPOSITION_START): Correctly handle the case of altchar
+ and alt&rule composition.
+ (decode_coding_iso_2022): Handle composition correctly.
+ (init_coding_once): Setup emacs_mule_bytes for private charsets.
+
+ * charset.c (Fdefine_charset_internal): Fix bug for the case of
+ re-defining a charset. If the charset has :emacs-mule-id, setup
+ emacs_mule_bytes.
+ (Fmake_char): If CODE1 is nil, use the minimum code of the
+ charset.
+
+2002-05-20 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_iso_2022): If coding requires safe
+ encoding, produce a character specified by
+ CODING_INHIBIT_CHARACTER_SUBSTITUTION.
+ (encode_coding_sjis): Likewise.
+ (encode_coding_big5): Likewise.
+ (encode_coding_charset): Likewise.
+
+2002-05-17 Dave Love <fx@gnu.org>
+
+ * xterm.c (XSetIMValues): Declare.
+
+ * process.c: Conditionally include sys/wait.h, pty.h.
+
+ * print.c (print_object): Fix print format for 64-bit
+ systems.
+
+ * keyboard.c (modify_event_symbol): Fix print format for 64-bit
+ systems.
+
+ * buffer.c (emacs_strerror): Declare.
+ (MMAP_ALLOCATED_P, mmap_enlarge, syms_of_buffer): Import changes
+ from trunk.
+
+ * fontset.c (Fclear_face_cache): Declare.
+ (accumulate_font_info): Commented-out (unused).
+ (face_for_char, Fset_fontset_font, Ffontset_info): Remove unused
+ variables.
+
+ * character.h (string_escape_byte8): Declare.
+
+ * charset.c (load_charset_map, load_charset_map_from_file): Remove
+ unused vars.
+ (Fdefine_charset_internal, Fsplit_char, syms_of_charset)
+ (Fmap_charset_chars): Doc fix.
+
+ * coding.c (Vchar_coding_system_table, Qchar_coding_system):
+ Removed.
+ (Fset_coding_system_priority, Fset_coding_system_priority)
+ (Fdefine_coding_system_internal): Doc fix.
+
+2002-05-16 Dave Love <fx@gnu.org>
+
+ * s/osf5-0.h (C_SWITCH_SYSTEM) [!__GNUC__]: Remove -nointrinsics.
+
+2002-05-16 Kenichi Handa <handa@m17n.org>
+
+ * character.c (string_escape_byte8): Make multibyte string with
+ correct size.
+
+ * charset.c (Fmake_char): Delete unnecessary code.
+
+2002-05-14 Kenichi Handa <handa@m17n.org>
+
+ * xfns.c (x_encode_text): Allocate coding.destination here, and
+ call encode_coding_object with dst_object Qnil.
+
+ * buffer.c (Fset_buffer_multibyte): Convert 8-bit bytes to
+ multibyte form correctly.
+
+ * fontset.c (fs_load_font): Check fontp->full_name (not fontname)
+ against Vfont_encoding_alist.
+
+ * coding.c (Fdecode_sjis_char): Fix typo (0x7F->0xFF). Fix the
+ handling of charset list.
+ (encode_coding_iso_2022): Setup coding->safe_charsets in advance.
+ (decode_coding_object): Move point to coding->dst_pos before
+ calling post-read-conversion function.
+ (encode_coding_object): Give correct arguments to
+ pre-write-conversion. Ignore the return value of
+ pre-write-conversion function. Pay attention to the case that
+ pre-write-conversion changes the current buffer. If dst_object is
+ Qt, even if coding->src_bytes is zero, allocate at least one byte
+ to coding->destination.
+
+ * coding.h (JIS_TO_SJIS): Fix typo (j1->s1, j2->s2).
+
+ * charset.c (Fmake_char): Make it more backward compatible.
+ (Fmap_charset_chars): Fix docstring.
+
+2002-05-13 Dave Love <fx@gnu.org>
+
+ * coding.c: Doc fixes.
+ (Fdefine_coding_system_alias): Use names, not symbols, in
+ coding-system-alist.
+
+2002-05-13 Kenichi Handa <handa@m17n.org>
+
+ * fontset.c (free_realized_fontsets): Call Fclear_face_cache instead
+ of calling free_realized_face.
+
+2002-05-10 Yong Lu <lyongu@asia-infonet.com>
+
+ * charset.c (load_charset_map): Fix previous change.
+ (read_hex): Don't treat SPC as a comment starter.
+ (decode_char): If CODE_POINT_TO_INDEX retruns -1, always return
+ -1.
+ (Fdecode_char): Fix typo.
+
+2002-05-10 Kenichi Handa <handa@m17n.org>
+
+ * charset.h (struct charset): New member `code_space_mask'.
+
+ * coding.c (coding_set_source): Delete the local variable
+ beg_byte.
+ (encode_coding_charset): Delete the local variable charset.
+ (Fdefine_coding_system_internal): Likewise.
+ (Fdefine_coding_system_internal): Setup
+ attrs[coding_attr_charset_valids] correctly.
+
+ * charset.c (CODE_POINT_TO_INDEX): Utilize `code_space_mask'
+ member to check if CODE is valid or not.
+ (Fdefine_charset_internal): Initialize `code_space_mask' member.
+ (encode_char): Before calling CODE_POINT_TO_INDEX, check if CODE
+ is within the range of charset->min_code and carset->max_code.
+
+2002-05-09 Dave Love <fx@gnu.org>
+
+ * syntax.h (syntax_temp) [!__GNUC__]: Declare.
+
+ * dispextern.h (generate_ascii_font): Fix return type.
+
+ * xfaces.c (generate_ascii_font): Fix arg declaration.
+
+ * coding.c (coding_inherit_eol_type)
+ (Fset_terminal_coding_system_internal)
+ (Fset_safe_terminal_coding_system_internal): Fix arg declarations.
+
+2002-05-08 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (decode_coding_charset, encode_coding_charset): Handle
+ multiple charsets correctly.
+
+2002-05-07 Kenichi Handa <handa@m17n.org>
+
+ * search.c (boyer_moore): Fix handling of mulitbyte character
+ translation.
+
+ * xdisp.c (display_mode_element): When the variable `elt' is
+ changed, update `this' and `lisp_string'.
+
+2002-05-07 Kenichi Handa <handa@m17n.org>
+
+ * buffer.c (Fset_buffer_multibyte): Fix 8-bit char handling.
+
+ * callproc.c (Fcall_process): Be sure to give the current buffer
+ to decode_coding_c_string. Update PT and PT_BYTE after the
+ insertion.
+
+ * charset.c (struct charset_map_entries): New struct.
+ (load_charset_map): Renamed from parse_charset_map. New args
+ entries and n_entries. Caller changed.
+ (load_charset_map_from_file): Renamed from load_charset_map.
+ Caller changed. New arg control_flag. Call load_charset_map at
+ the tail.
+ (load_charset_map_from_vector): New function.
+ (Fdefine_charset_internal): Setup charset.compact_codes_p.
+ (encode_char): If the charset is compact, change a character index
+ to a code point.
+
+ * coding.c (coding_alloc_by_making_gap): Check the case that the
+ source and destination are the same correctly.
+ (decode_coding_raw_text): Set coding->consumed_char and
+ coding->consumed to 0.
+ (produce_chars): If coding->chars_at_source is nonzero, update
+ coding->consumed_char and coding->consumed before calling
+ alloc_destination.
+ (Fdefine_coding_system_alias): Register ALIAS in
+ Vcoding_system_alist.
+ (syms_of_coding): Define `no-convesion' coding system at the tail.
+
+ * fileio.c (Finsert_file_contents): Set coding_system instead of
+ val. If the current buffer is multibyte, always call
+ decode_coding_gap.
+
+ * xfaces.c (try_font_list): Give higher priority to fontset's
+ family than face's family.
+
+2002-04-18 Kenichi Handa <handa@m17n.org>
+
+ * callproc.c (Fcall_process): Be sure to give the current buffer
+ to decode_coding_c_string.
+
+ * xfaces.c (try_font_list): Give a family specified in a fontset
+ higher priority than a family specified in a face.
+
+2002-04-09 Kenichi Handa <handa@m17n.org>
+
+ * fileio.c (Finsert_file_contents): Fix calculation of `inserted'.
+ Fix arguments to insert_from_buffer.
+
+ * xdisp.c (display_mode_element): Fix calculation of `bytepos'.
+
+2002-03-11 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (produce_chars): Set the variable `multibytep' correctly.
+ (decode_coding_gap): Set coding->dst_multibyte correctly.
+
+2002-03-07 Kenichi Handa <handa@m17n.org>
+
+ * coding.c (encode_coding_utf_8): Initialize produced_chars to 0.
+ (decode_coding_utf_16): Fix converting high and low bytes to
+ code-point.
+ (encode_coding_utf_16): Substitute coding->default_char for
+ non-Unicode characters.
+ (decode_coding): Don't call record_insert here.
+ (setup_coding_system): Initialize `surrogate' of
+ coding->spec.utf_16 to 0.
+ (EMIT_ONE_BYTE): Fix for multibyte case.
+
+ * insdel.c (insert_from_gap): Call record_insert.
+
+2002-03-04 Kenichi Handa <handa@m17n.org>
+
+ * casefiddle.c (casify_region): Fix multibyte case.
+
+ * character.c (c_string_width): Add return type `int'.
+ (char_string_with_unification): Arg ADVANCED deleted.
+
+ * character.h (CHAR_VALID_P): Don't call CHARACTERP.
+ (CHAR_STRING): Adjusted for the change of
+ char_string_with_unification.
+ (CHAR_STRING_ADVANCE): Make it do-while statement.
+
+ * chartab.c (sub_char_table_set_range): Optimized for the case
+ DEPTH == 3. Add workaround code for a GCC optimization bug.
+
+ * charset.c (parse_charset_map): Remove an unused variable.
+
+ * coding.c: Delete unused variables.
+
+ * fileio.c (Finsert_file_contents): Set coding_system to Qnil
+ earlier. If inserted is zero and the coding system doesn't
+ require flushing, don't call decode_coding_gap.
+
+ * syntax.h (SET_RAW_SYNTAX_ENTRY): Don't call make_number.
+
+2002-03-01 Kenichi Handa <handa@m17n.org>
+
+ The following changes are for using Unicode as an internal
+ character model, and use UTF-8 format for buffer/string
+ representation.
+
+ * .gdbinit (xchartable): Adjusted for the change of char table
+ structure.
+ (xsubchartable, xcoding, xcharset, xcurbuf): New commands.
+
+ * Makefile.in (obj): Add character.o and chartab.o.
+ (lisp, shortlisp): Remove utf-8.elc:
+ (*.o): For many files, change dependency on charset.h to
+ character.h, and add dependency on character.h.
+ (character.o, chartab.o): New targets.
+
+ * abbrev.c, bytecode.c, casefiddle.c, cmds.c, dispnew.c, doc.c,
+ doprnt.c, dosfns.c, frame.c, marker.c, minibuf.c, msdos.c,
+ w16select.c, w32bdf.c, w32console.c: Include "character.h" instead
+ of "charset.h".
+
+ * dired.c, filelock.c: Include "character.h".
+
+ * alloc.c: Include "character.h" instead of "charset.h".
+ (Fmake_char_table): Moved to chartab.c.
+ (make_sub_char_table): Likewise.
+ (syms_of_alloc): Remove defsubr for Smake_char_table.
+
+ * buffer.c: Include "character.h" instead of "charset.h", don't
+ include "coding.h".
+ (Fset_buffer_multibyte): Adjuted for UTF-8.
+
+ * buffer.h: EXFUN Fbuffer_live_p.
+
+ * callproc.c: Include "character.h" instead of "charset.h".
+ (Fcall_process): Big change for the new code-conversion APIs.
+
+ * casetab.c: Include "character.h" instead of "charset.h".
+ (set_canon, set_identity, shuffle): Adjusted for the new
+ map_char_table spec.
+ (init_casetab_once): Call CHAR_TABLE_SET instead of directly
+ accessing the char table structure.
+
+ * chartab.c: New file that implements char table.
+
+ * category.c: Include "character.h".
+ (copy_category_entry): New function.
+ (copy_category_table): Call map_char_table and copy_category_entry.
+ (Fmake_category_table): Initialize all top-vel slots.
+ (char_category_set): New function.
+ (modify_lower_category_set): Deleted.
+ (Fmodify_category_entry): Call char_table_ref_and_range.
+
+ * category.h (CATEGORY_SET): Just call char_category_set.
+
+ * ccl.c: Include "character.h".
+ (Qccl, Qcclp): New variables.
+ (CCL_WRITE_CHAR): Alway treat the arg CH as a character even if
+ it's less than 256.
+ (CCL_WRITE_MULTIBYTE_CHAR): Deleted.
+ (CCL_WRITE_STRING, CCL_READ_CHAR): Adjusted for the change of SRC
+ and DST type.
+ (ccl_driver): Types of arguments changed. Code adjusted for that.
+ (Fccl_execute, Fccl_execute_on_string): Adjusted for the change of
+ ccl_driver.
+ (syms_of_ccl): Intern and staticpro Qccl and Qcclp.
+
+ * ccl.h (struct ccl_program): Members eol_type and multibyte
+ deleted. New members src_multibyte, dst_multibyte, consumed, and
+ produced.
+ (struct ccl_spec): Members decoder and encoder deleted. New
+ memeber ccl.
+ (CODING_SPEC_CCL_PROGRAM): New macro.
+ (ccl_driver): Prototype updated.
+ (Qccl, Qcclp, Fccl_program_p): Extern them.
+ (CHECK_CCL_PROGRAM): New macro.
+
+ * character.c, character.h, chartab.c: New files.
+
+ * charset.c: Mostly re-written. Character and multibyte sequence
+ handling codes are moved to character.c.
+
+ * charset.h: Mostly re-written. Character and multibyte sequence
+ handling codes are moved to character.h.
+
+ * coding.c, coding.h: Mostly re-written.
+
+ * composite.c: Include "character.h" instead of "charset.h".
+ (CHAR_WIDTH): Moved to character.h.
+ (HASH_KEY, HASH_VALUE): Deleted.
+
+ * composite.h (enum composition_method): Order of enumeration
+ symbols changed.
+
+ * data.c: Include "character.h" instead of "charset.h".
+ (Faref): Call CHAR_TABLE_REF for a char table.
+ (Faset): Call CHAR_TABLE_SET for a char table.
+
+ * dispextern.h (free_realized_face, check_face_attribytes,
+ generate_ascii_font): Extern them.
+ (free_realized_multibyte_face): Extern deleted.
+
+ * disptab.h (DISP_CHAR_VECTOR): Adjusted for the change of char
+ table structure.
+
+ * editfns.c: Include "character.h" instead of "charset.h".
+ (Fchar_to_string): Always call CHAR_STRING.
+
+ * emacs.c (main): Call init_charset_once, init_charset,
+ syms_of_chartab, and syms_of_character.
+
+ * fileio.c: Include "character.h" instead of "charset.h".
+ (Finsert_file_contents): Big change for the new code-conversion
+ API.
+ (choose_write_coding_system): Likewise.
+ (Fwrite_region): Likewise.
+ (build_annotations_2): Deleted.
+ (e_write): Big change for the new code-conversion API.
+
+ * fns.c: Include "character.h" instead of "charset.h".
+ (copy_sub_char_table): Moved to chartab.c.
+ (Fcopy_sequence): Call copy_char_table for a char table.
+ (concat): Delete codes calling count_multibyte.
+ (string_char_to_byte): Adjusted for the new multibyte form.
+ (string_byte_to_char): Likewise.
+ (internal_equal): Adjusted for the change of char table structure.
+ (Fchar_table_subtype, Fchar_table_parent, Fset_char_table_parent,
+ Fchar_table_extra_slot, Fset_char_table_extra_slot,
+ Fchar_table_range, Fset_char_table_range, Fset_char_table_default,
+ char_table_translate, optimize_sub_char_table,
+ Foptimize_char_table, map_char_table, Fmap_char_table): Moved to
+ chartab.c.
+ (char_table_ref_and_index): Deleted.
+ (HASH_KEY, HASH_VALUE): Moved to lisp.h.
+ (Fmd5): Call preferred_coding_system instead of accessing
+ Vcoding_category_list. Adjusted for the new code-conversion API.
+ (syms_of_fns): Defsubr for char table related functions moved to
+ chartab.c.
+
+ * fontset.c: Mostly re-written.
+
+ * fontset.h (struct font_info): Type of the member encoding_type
+ changed.
+ (enum FONT_SPEC_INDEX): New enum.
+ (fontset_font_pattern, fs_load_font): Prototype updated.
+ (FS_LOAD_FONT): Adjusted for the change of fs_load_font.
+
+ * indent.c: Include "character.h" instead of "charset.h".
+ (MULTIBYTE_BYTES_WIDTH): Call CHAR_WIDTH instead of
+ WIDTH_BY_CHAR_HEAD.
+
+ * insdel.c: Include "character.h" instead of "charset.h".
+ (copy_text): Don't refer to Vnonascii_translation_table.
+ (insert_from_gap): New function.
+
+ * keyboard.c: Include "character.h" instead of "charset.h".
+ (command_loop_1): Never call direct_output_forward_char before
+ a non-ASCII character.
+ (read_char): If Vkeyboard_translate_table is a char table, always
+ translated a character.
+
+ * keymap.c: Include "character.h".
+ (store_in_keymap): Handle the case that IDX is a cons.
+ (Fdefine_key): Handle the case that KEY is a cons and the car part
+ is also a cons (range).
+ (push_key_description): Adjusted for the new character code.
+ (describe_vector): Call describe_char_table for a char table.
+ (describe_char_table): New function.
+
+ * keymap.h (describe_char_table): Extern it.
+
+ * lisp.h (enum pvec_type): New member PVEC_SUB_CHAR_TABLE.
+ (XSUB_CHAR_TABLE, XSETSUB_CHAR_TABLE): New macros.
+ (CHAR_TABLE_ORDINARY_SLOTS, CHAR_TABLE_SINGLE_BYTE_SLOTS,
+ SUB_CHAR_TABLE_ORDINARY_SLOTS, SUB_CHAR_TABLE_STANDARD_SLOTS):
+ Deleted.
+ (CHAR_TABLE_REF, CHAR_TABLE_SET): Adjusted for the new char table
+ structure.
+ (CHAR_TABLE_TRANSLATE): Just call char_table_translate.
+ (CHARTAB_SIZE_BITS_0, CHARTAB_SIZE_BITS_1, CHARTAB_SIZE_BITS_2,
+ CHARTAB_SIZE_BITS_3): New macros.
+ (chartab_size): Extern it.
+ (struct Lisp_Char_Table): Re-designed.
+ (struct Lisp_Sub_Char_Table): New structure.
+ (HASH_KEY, HASH_VALUE): Moved from fns.c.
+ (CHARACTERBITS): Defined as 22.
+ (GLYPH_MASK_FACE, GLYPH_MASK_CHAR): Adjusted for the above change.
+ (SUB_CHAR_TABLE_P): Check PVEC_CHAR_TABLE.
+ (GC_SUB_CHAR_TABLE_P): New macro.
+ (Fencode_coding_string, Fdecode_coding_string): EXFUN Updated.
+ (code_convert_string_norecord): Extern deleted.
+ (init_character_once, syms_of_character, init_charset,
+ syms_of_composite, Qeq, Fmakehash, insert_from_gap): Extern them.
+
+ * lread.c: Include "character.h".
+ (read_multibyte): New arg NBYTES.
+ (read_escape): The meaning of returned *BYTEREP changed.
+ (to_multibyte): Deleted.
+ (read1): Adjuted the handling of char table and string.
+
+ * print.c: Include "character.h" instead of "charset.h".
+ (print_string): Convert 8-bit raw bytes to octal form by
+ string_escape_byte8.
+ (print_object): Adjusted for the new multibyte form. Print 8-bit
+ raw bytes always in octal form. Handle sub char table correctly.
+
+ * process.c: Include "character.h" instead of "charset.h".
+ (read_process_output): Adjusted for the new code-conversion API.
+ (send_process): Likewise.
+
+ * puresize.h (BASE_PURESIZE): Increased.
+
+ * regex.c: Include "character.h" instead of "charset.h".
+ (BYTE8_TO_CHAR, CHAR_BYTE8_P) [not emacs]: New dummy macros.
+ (regex_compile): Accept a range whose starting and ending
+ character have different leading bytes.
+ (analyse_first): Adjusted for the above change.
+
+ * search.c: Include "character.h" instead of "charset.h".
+ (search_buffer, boyer_moore): Adjusted for the new multibyte form.
+ (Freplace_match): Adjusted for the change of
+ multibyte_char_to_unibyte.
+
+ * syntax.c: Include "character.h" instead of "charset.h".
+ (syntax_parent_lookup): Deleted.
+ (Fmodify_syntax_entry): Accept a cons as CHAR.
+ (skip_chars): Adjusted for the new multibyte form.
+ (init_syntax_once): Call char_table_set_range instead of directly
+ accessing the structure of a char table.
+
+ * syntax.h (SET_RAW_SYNTAX_ENTRY): Call CHAR_TABLE_SET.
+ (SYNTAX_ENTRY_FOLLOW_PARENT): Macro deleted.
+ (SET_RAW_SYNTAX_ENTRY_RANGE): New macro.
+ (SYNTAX_ENTRY_INT): Call CHAR_TABLE_REF.
+
+ * term.c: Include "buffer.h" and "character.h".
+ (encode_terminal_code): Adjusted for the new code-conversion API.
+ (write_glyphs): Likewise.
+ (produce_glyphs): Call CHAR_WIDTH instead of CHARSET_WIDTH.
+
+ * w32term.c (x_new_font): Adjusted for the change of FS_LOAD_FONT.
+
+ * xdisp.c: Include "character.h".
+ (get_next_display_element): Adjusted for the new multibyte form.
+ (disp_char_vector): Adjusted for the new char table structure.
+ (decode_mode_spec_coding): Adjusted for the new structure of
+ coding system.
+ (decode_mode_spec): Adjusted for the new code-conversion API.
+
+ * xfaces.c: Include "character.h" instead of "charset.h".
+ (load_face_font): Adjusted for the change of choose_face_font and
+ FS_LOAD_FONT.
+ (generate_ascii_font): New function.
+ (set_lface_from_font_name): Adjusted for the change of
+ FS_LOAD_FONT.
+ (set_font_frame_param): Adjusted for the change of
+ choose_face_font.
+ (free_realized_face): Make it public.
+ (free_realized_faces_for_fontset): Renamed from
+ free_realized_multibyte_face. Free also faces realized for ASCII.
+ (choose_face_font): Argments changed. Adjusted for the change of
+ fontset_font_pattern and FS_LOAD_FONT.
+
+ * xfns.c: Include "character.h".
+ (x_encode_text): Adjusted for the new code-conversion API.
+
+ * xselect.c: Don't include "charset.h".
+ (selection_data_to_lisp_data): Adjusted for the new code
+ covnersion API.
+
+ * xterm.c: Include "character.h".
+ (x_encode_char): New argument CHARSET. Caller changed.
+ (x_get_char_face_and_encoding): Call ENCODE_CHAR instead of
+ SPLIT_CHAR.
+ (x_get_glyph_face_and_encoding): Likewise.
+ (x_produce_glyphs): Don't check Vnonascii_translation_table Call
+ CHAR_WIDTH instead of CHARSET_WIDTH.
+ (XTread_socket): Adjusted for the new code-conversion API.
+ (x_new_font): Adjusted for the change of FS_LOAD_FONT.
+ (x_load_font): Adjusted for the change of struct font.
+
+;; Local Variables:
+;; coding: iso-2022-7bit
+;; End:
+
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ Copying and distribution of this file, with or without modification,
+ are permitted provided the copyright notice and this notice are preserved.
+
+;;; arch-tag: 1bff38bd-2030-46ae-9d18-f15e6006b665
diff --git a/src/Makefile.in b/src/Makefile.in
index 18f276c00de..beee481d611 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -50,6 +50,7 @@ LIBOBJS = @LIBOBJS@
dot = .
dotdot = ${dot}${dot}
lispsource = ${srcdir}/$(dot)$(dot)/lisp/
+admindir = $(srcdir)/$(dot)$(dot)/admin/
libsrc = $(dot)$(dot)/lib-src/
etc = $(dot)$(dot)/etc/
oldXMenudir = $(dot)$(dot)/oldXMenu/
@@ -280,7 +281,7 @@ TOOLKIT_DEFINES =
/* C_SWITCH_X_SITE must come before C_SWITCH_X_MACHINE and C_SWITCH_X_SYSTEM
since it may have -I options that should override those two. */
-ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(TOOLKIT_DEFINES) $(MYCPPFLAGS) -I. -I${srcdir} C_SWITCH_MACHINE C_SWITCH_SYSTEM C_SWITCH_SITE C_SWITCH_X_SITE C_SWITCH_X_MACHINE C_SWITCH_X_SYSTEM C_SWITCH_SYSTEM_TEMACS ${CFLAGS_SOUND} ${CFLAGS}
+ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(TOOLKIT_DEFINES) $(MYCPPFLAGS) -I. -I${srcdir} C_SWITCH_MACHINE C_SWITCH_SYSTEM C_SWITCH_SITE C_SWITCH_X_SITE C_SWITCH_X_MACHINE C_SWITCH_X_SYSTEM C_SWITCH_SYSTEM_TEMACS ${CFLAGS_SOUND} ${CFLAGS} @FREETYPE_CFLAGS@ @FONTCONFIG_CFLAGS@ @LIBOTF_CFLAGS@
.c.o:
$(CC) -c $(CPPFLAGS) $(ALL_CFLAGS) $<
@@ -401,6 +402,11 @@ LIBXT=$(LIBW)
#endif
#endif /* not USE_X_TOOLKIT */
+#if HAVE_XFT
+#undef LIB_X11_LIB
+#define LIB_X11_LIB @XFT_LIBS@
+#endif /* HAVE_XFT */
+
#if HAVE_XPM
#ifndef LIBXPM
#define LIBXPM -lXpm
@@ -574,10 +580,27 @@ emacsapp = $(PWD)/$(mac)Emacs.app/
emacsappsrc = ${srcdir}/../mac/Emacs.app/
#endif
+#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+FONTSRC = font.h
+#ifdef HAVE_X_WINDOWS
+#if defined (HAVE_XFT)
+FONTOBJ = font.o xfont.o ftfont.o xftfont.o ftxfont.o
+#elif defined (HAVE_FREETYPE)
+FONTOBJ = font.o xfont.o ftfont.o ftxfont.o
+#else /* ! defined (HAVE_XFT) && ! defined (HAVE_FREETYPE) */
+FONTOBJ = font.o xfont.o
+#endif /* ! defined (HAVE_XFT) && ! defined (HAVE_FREETYPE) */
+#else /* ! HAVE_X_WINDOWS */
+FONTOBJ = font.o
+#endif /* ! HAVE_X_WINDOWS */
+#endif /* USE_FONT_BACKEND */
+#endif /* HAVE_WINDOW_SYSTEM */
+
/* lastfile must follow all files
whose initialized data areas should be dumped as pure by dump-emacs. */
obj= dispnew.o frame.o scroll.o xdisp.o $(XMENU_OBJ) window.o \
- charset.o coding.o category.o ccl.o \
+ charset.o coding.o category.o ccl.o character.o chartab.o \
cm.o term.o xfaces.o $(XOBJ) $(GTK_OBJ)\
emacs.o keyboard.o macros.o keymap.o sysdep.o \
buffer.o filelock.o insdel.o marker.o \
@@ -589,7 +612,7 @@ obj= dispnew.o frame.o scroll.o xdisp.o $(XMENU_OBJ) window.o \
process.o callproc.o \
region-cache.o sound.o atimer.o \
doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \
- $(MSDOS_OBJ) $(MAC_OBJ) $(CYGWIN_OBJ)
+ $(MSDOS_OBJ) $(MAC_OBJ) $(CYGWIN_OBJ) $(FONTOBJ)
/* Object files used on some machine or other.
These go in the DOC file on all machines
@@ -598,7 +621,7 @@ SOME_MACHINE_OBJECTS = sunfns.o dosfns.o msdos.o \
xterm.o xfns.o xmenu.o xselect.o xrdb.o xsmfns.o fringe.o image.o \
mac.o macterm.o macfns.o macmenu.o macselect.o fontset.o \
w32.o w32bdf.o w32console.o w32fns.o w32heap.o w32inevt.o \
- w32menu.o w32proc.o w32reg.o w32select.o w32term.o w32xfns.o
+ w32menu.o w32proc.o w32reg.o w32select.o w32term.o w32xfns.o $(FONTOBJ)
#ifdef TERMINFO
@@ -722,6 +745,7 @@ lisp= \
${lispsource}buff-menu.elc \
${lispsource}button.elc \
${lispsource}emacs-lisp/byte-run.elc \
+ ${lispsource}composite.elc \
${lispsource}cus-face.elc \
${lispsource}cus-start.elc \
${lispsource}custom.elc \
@@ -749,20 +773,11 @@ lisp= \
${lispsource}international/mule-conf.el \
${lispsource}international/mule-cmds.elc \
${lispsource}international/characters.elc \
- ${lispsource}international/ucs-tables.elc \
- ${lispsource}international/utf-8.elc \
- ${lispsource}international/utf-16.elc \
- ${lispsource}international/latin-1.el \
- ${lispsource}international/latin-2.el \
- ${lispsource}international/latin-3.el \
- ${lispsource}international/latin-4.el \
- ${lispsource}international/latin-5.el \
- ${lispsource}international/latin-8.el \
- ${lispsource}international/latin-9.el \
+ ${lispsource}international/charprop.el \
${lispsource}case-table.elc \
- ${lispsource}language/chinese.elc \
- ${lispsource}language/cyrillic.elc \
- ${lispsource}language/indian.elc \
+ ${lispsource}language/chinese.el \
+ ${lispsource}language/cyrillic.el \
+ ${lispsource}language/indian.el \
${lispsource}language/devanagari.el \
${lispsource}language/kannada.el \
${lispsource}language/malayalam.el \
@@ -780,7 +795,7 @@ lisp= \
${lispsource}language/lao.el \
${lispsource}language/thai.el \
${lispsource}language/tibetan.elc \
- ${lispsource}language/vietnamese.elc \
+ ${lispsource}language/vietnamese.el \
${lispsource}language/misc-lang.el \
${lispsource}language/utf-8-lang.el \
${lispsource}language/georgian.el \
@@ -821,6 +836,7 @@ shortlisp= \
../lisp/buff-menu.elc \
../lisp/button.elc \
../lisp/emacs-lisp/byte-run.elc \
+ ../lisp/composite.elc \
../lisp/cus-face.elc \
../lisp/cus-start.elc \
../lisp/custom.elc \
@@ -846,20 +862,10 @@ shortlisp= \
../lisp/international/mule-conf.el \
../lisp/international/mule-cmds.elc \
../lisp/international/characters.elc \
- ../lisp/international/ucs-tables.elc \
- ../lisp/international/utf-8.elc \
- ../lisp/international/utf-16.elc \
- ../lisp/international/latin-1.el \
- ../lisp/international/latin-2.el \
- ../lisp/international/latin-3.el \
- ../lisp/international/latin-4.el \
- ../lisp/international/latin-5.el \
- ../lisp/international/latin-8.el \
- ../lisp/international/latin-9.el \
../lisp/case-table.elc \
- ../lisp/language/chinese.elc \
- ../lisp/language/cyrillic.elc \
- ../lisp/language/indian.elc \
+ ../lisp/language/chinese.el \
+ ../lisp/language/cyrillic.el \
+ ../lisp/language/indian.el \
../lisp/language/devanagari.el \
../lisp/language/kannada.el \
../lisp/language/malayalam.el \
@@ -877,7 +883,7 @@ shortlisp= \
../lisp/language/lao.el \
../lisp/language/thai.el \
../lisp/language/tibetan.elc \
- ../lisp/language/vietnamese.elc \
+ ../lisp/language/vietnamese.el \
../lisp/language/misc-lang.el \
../lisp/language/utf-8-lang.el \
../lisp/language/georgian.el \
@@ -931,7 +937,7 @@ SOME_MACHINE_LISP = ${dotdot}/lisp/mouse.elc \
LIBES = $(LOADLIBES) $(LIBS) $(LIBX) $(LIBSOUND) \
LIBS_SYSTEM LIBS_MACHINE LIBS_TERMCAP \
LIBS_DEBUG $(GETLOADAVG_LIBS) $(GNULIB_VAR) LIB_MATH LIB_STANDARD \
- $(GNULIB_VAR)
+ $(GNULIB_VAR) @FREETYPE_LIBS@ @FONTCONFIG_LIBS@ @LIBOTF_LIBS@
/* Enable recompilation of certain other files depending on system type. */
@@ -943,7 +949,11 @@ LIBES = $(LOADLIBES) $(LIBS) $(LIBX) $(LIBSOUND) \
#define OBJECTS_MACHINE
#endif
-RUN_TEMACS = ./temacs
+#ifdef HAVE_SHM
+RUN_TEMACS = `/bin/pwd`/temacs -nl
+#else
+RUN_TEMACS = `/bin/pwd`/temacs
+#endif
all: emacs${EXEEXT} OTHER_FILES
@@ -952,11 +962,7 @@ emacs${EXEEXT}: temacs${EXEEXT} ${etc}DOC ${lisp}
rm -f emacs${EXEEXT}
ln temacs${EXEEXT} emacs${EXEEXT}
#else
-#ifdef HAVE_SHM
- LC_ALL=C $(RUN_TEMACS) -nl -batch -l loadup dump
-#else /* ! defined (HAVE_SHM) */
LC_ALL=C $(RUN_TEMACS) -batch -l loadup dump
-#endif /* ! defined (HAVE_SHM) */
#endif /* ! defined (CANNOT_DUMP) */
-./emacs -q -batch -f list-load-path-shadows
@@ -976,6 +982,16 @@ ${etc}DOC: ${libsrc}make-docfile${EXEEXT} ${obj} ${shortlisp} ${SOME_MACHINE_LIS
${libsrc}make-docfile${EXEEXT}:
cd ${libsrc}; ${MAKE} ${MFLAGS} make-docfile${EXEEXT}
+#ifdef HAVE_UNIDATA
+UNIDATA=${admindir}unidata/UnicodeData.txt
+#endif
+
+${lispsource}international/charprop.el: ${UNIDATA}
+ RUNEMACS="$(RUN_TEMACS)"; \
+ cd ${admindir}unidata; \
+ $(MAKE) $(MFLAGS) \
+ RUNEMACS="$${RUNEMACS}" DSTDIR=${lispsource}international
+
/* Some systems define this to cause parallel Make-ing. */
#ifndef MAKE_PARALLEL
#define MAKE_PARALLEL
@@ -1082,71 +1098,80 @@ alloca.o: alloca.c blockinput.h atimer.h
it is so often changed in ways that do not require any recompilation
and so rarely changed in ways that do require any. */
-abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h charset.h \
+abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h character.h \
syntax.h $(config_h)
buffer.o: buffer.c buffer.h region-cache.h commands.h window.h \
- dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h charset.h \
+ dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h character.h \
$(config_h)
callint.o: callint.c window.h commands.h buffer.h keymap.h \
keyboard.h dispextern.h $(config_h)
callproc.o: callproc.c epaths.h buffer.h commands.h $(config_h) \
- process.h systty.h syssignal.h charset.h coding.h ccl.h msdos.h \
+ process.h systty.h syssignal.h character.h coding.h ccl.h msdos.h \
composite.h w32.h blockinput.h atimer.h systime.h
-casefiddle.o: casefiddle.c syntax.h commands.h buffer.h composite.h \
+casefiddle.o: casefiddle.c syntax.h commands.h buffer.h character.h \
+ composite.h \
charset.h keymap.h $(config_h)
casetab.o: casetab.c buffer.h $(config_h)
-category.o: category.c category.h buffer.h charset.h keymap.h $(config_h)
-ccl.o: ccl.c ccl.h charset.h coding.h $(config_h)
-charset.o: charset.c charset.h buffer.h coding.h composite.h disptab.h \
- $(config_h)
-coding.o: coding.c coding.h ccl.h buffer.h charset.h intervals.h composite.h \
+category.o: category.c category.h buffer.h charset.h keymap.h \
+ character.h $(config_h)
+ccl.o: ccl.c ccl.h charset.h character.h coding.h $(config_h)
+character.o: character.c character.h buffer.h charset.h composite.h disptab.h \
+ $(config.h)
+charset.o: charset.c charset.h character.h buffer.h coding.h composite.h \
+ disptab.h $(config_h)
+chartab.o: charset.h character.h $(config.h)
+coding.o: coding.c coding.h ccl.h buffer.h character.h charset.h intervals.h composite.h \
window.h dispextern.h $(config_h)
cm.o: cm.c cm.h termhooks.h $(config_h)
-cmds.o: cmds.c syntax.h buffer.h charset.h commands.h window.h $(config_h) \
+cmds.o: cmds.c syntax.h buffer.h character.h commands.h window.h $(config_h) \
msdos.h dispextern.h keyboard.h keymap.h
pre-crt0.o: pre-crt0.c
ecrt0.o: ecrt0.c $(config_h)
CRT0_COMPILE ${srcdir}/ecrt0.c
-dired.o: dired.c commands.h buffer.h $(config_h) charset.h coding.h regex.h \
- systime.h blockinput.h
+dired.o: dired.c commands.h buffer.h $(config_h) character.h charset.h \
+ coding.h regex.h systime.h blockinput.h
dispnew.o: dispnew.c systty.h systime.h commands.h process.h frame.h \
window.h buffer.h dispextern.h termchar.h termopts.h termhooks.h cm.h \
disptab.h indent.h intervals.h \
- xterm.h blockinput.h atimer.h charset.h msdos.h composite.h keyboard.h \
+ xterm.h blockinput.h atimer.h character.h msdos.h composite.h keyboard.h \
$(config_h)
-doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h keymap.h charset.h
-doprnt.o: doprnt.c charset.h $(config_h)
+doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h keymap.h character.h
+doprnt.o: doprnt.c character.h $(config_h)
dosfns.o: buffer.h termchar.h termhooks.h frame.h blockinput.h window.h \
msdos.h dosfns.h dispextern.h charset.h coding.h $(config_h)
-editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \
+editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \
coding.h dispextern.h frame.h blockinput.h $(config_h)
emacs.o: emacs.c commands.h systty.h syssignal.h blockinput.h process.h \
termhooks.h buffer.h atimer.h systime.h $(INTERVAL_SRC) $(config_h) \
window.h dispextern.h keyboard.h keymap.h
-fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \
+fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \
coding.h msdos.h dispextern.h blockinput.h $(config_h)
-filelock.o: filelock.c buffer.h charset.h coding.h systime.h epaths.h $(config_h)
+filelock.o: filelock.c buffer.h character.h charset.h coding.h systime.h \
+ epaths.h $(config_h)
filemode.o: filemode.c $(config_h)
frame.o: frame.c xterm.h window.h frame.h termhooks.h commands.h keyboard.h \
- blockinput.h atimer.h systime.h buffer.h charset.h fontset.h \
+ blockinput.h atimer.h systime.h buffer.h character.h fontset.h \
msdos.h dosfns.h dispextern.h w32term.h macterm.h $(config_h)
fringe.o: fringe.c dispextern.h frame.h window.h buffer.h $(config_h)
-fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h charset.h frame.h \
- keyboard.h $(config_h)
+font.o: font.c dispextern.h frame.h ccl.h character.h charset.h font.h \
+ $(config_h)
+ftfont.o: dispextern.h frame.h character.h charset.h font.h $(config_h)
+fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h character.h \
+ charset.h frame.h keyboard.h $(FONTSRC) $(config_h)
getloadavg.o: getloadavg.c $(config_h)
image.o: image.c frame.h window.h dispextern.h blockinput.h atimer.h \
systime.h xterm.h w32term.h w32gui.h macterm.h macgui.h $(config_h)
indent.o: indent.c frame.h window.h indent.h buffer.h $(config_h) termchar.h \
- termopts.h disptab.h region-cache.h charset.h composite.h dispextern.h \
- keyboard.h
-insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h charset.h \
+ termopts.h disptab.h region-cache.h character.h category.h composite.h \
+ dispextern.h keyboard.h
+insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h character.h \
dispextern.h atimer.h systime.h region-cache.h $(config_h)
-keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h charset.h \
+keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h character.h \
commands.h frame.h window.h macros.h disptab.h keyboard.h syssignal.h \
systty.h systime.h dispextern.h syntax.h $(INTERVAL_SRC) blockinput.h \
atimer.h xterm.h puresize.h msdos.h keymap.h w32term.h macterm.h $(config_h)
keymap.o: keymap.c buffer.h commands.h keyboard.h termhooks.h blockinput.h \
- atimer.h systime.h puresize.h charset.h intervals.h $(config_h)
+ atimer.h systime.h puresize.h character.h intervals.h $(config_h)
lastfile.o: lastfile.c $(config_h)
macros.o: macros.c window.h buffer.h commands.h macros.h keyboard.h \
dispextern.h $(config_h)
@@ -1154,34 +1179,36 @@ malloc.o: malloc.c $(config_h)
gmalloc.o: gmalloc.c $(config_h)
ralloc.o: ralloc.c $(config_h)
vm-limit.o: vm-limit.c mem-limits.h $(config_h)
-marker.o: marker.c buffer.h charset.h $(config_h)
+marker.o: marker.c buffer.h character.h $(config_h)
md5.o: md5.c md5.h $(config_h)
minibuf.o: minibuf.c syntax.h dispextern.h frame.h window.h keyboard.h \
- buffer.h commands.h charset.h msdos.h $(INTERVAL_SRC) keymap.h $(config_h)
+ buffer.h commands.h character.h msdos.h $(INTERVAL_SRC) keymap.h $(config_h)
mktime.o: mktime.c $(config_h)
msdos.o: msdos.c msdos.h dosfns.h systime.h termhooks.h dispextern.h frame.h \
- termopts.h termchar.h charset.h coding.h ccl.h disptab.h window.h \
+ termopts.h termchar.h character.h coding.h ccl.h disptab.h window.h \
keyboard.h intervals.h buffer.h commands.h blockinput.h $(config_h)
process.o: process.c process.h buffer.h window.h termhooks.h termopts.h \
commands.h syssignal.h systime.h systty.h syswait.h frame.h dispextern.h \
blockinput.h atimer.h charset.h coding.h ccl.h msdos.h composite.h \
keyboard.h $(config_h)
-regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h charset.h
+regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h character.h \
+ charset.h
region-cache.o: region-cache.c buffer.h region-cache.h $(config_h)
scroll.o: scroll.c termchar.h dispextern.h frame.h msdos.h keyboard.h \
$(config_h)
search.o: search.c regex.h commands.h buffer.h region-cache.h syntax.h \
- blockinput.h atimer.h systime.h category.h charset.h composite.h \
- $(INTERVAL_SRC) $(config_h)
+ blockinput.h atimer.h systime.h category.h character.h composite.h \
+ $(INTERVAL_SRC) \
+ $(config_h)
strftime.o: strftime.c $(config_h)
-syntax.o: syntax.c syntax.h buffer.h commands.h category.h charset.h \
+syntax.o: syntax.c syntax.h buffer.h commands.h category.h character.h \
composite.h keymap.h regex.h $(INTERVAL_SRC) $(config_h)
sysdep.o: sysdep.c syssignal.h systty.h systime.h syswait.h blockinput.h \
process.h dispextern.h termhooks.h termchar.h termopts.h \
frame.h atimer.h window.h msdos.h dosfns.h keyboard.h $(config_h)
term.o: term.c termchar.h termhooks.h termopts.h $(config_h) cm.h frame.h \
- disptab.h dispextern.h keyboard.h charset.h coding.h ccl.h msdos.h \
- window.h keymap.h
+ disptab.h dispextern.h keyboard.h character.h charset.h coding.h ccl.h \
+ msdos.h window.h keymap.h
termcap.o: termcap.c $(config_h)
terminfo.o: terminfo.c $(config_h)
tparam.o: tparam.c $(config_h)
@@ -1197,23 +1224,32 @@ widget.o: widget.c xterm.h frame.h dispextern.h widgetprv.h \
window.o: window.c indent.h commands.h frame.h window.h buffer.h termchar.h \
termhooks.h disptab.h keyboard.h dispextern.h msdos.h composite.h \
keymap.h blockinput.h $(INTERVAL_SRC) xterm.h w32term.h macterm.h $(config_h)
-xdisp.o: xdisp.c macros.h commands.h process.h indent.h buffer.h dispextern.h coding.h \
- termchar.h frame.h window.h disptab.h termhooks.h charset.h $(config_h) \
- keyboard.h $(INTERVAL_SRC) region-cache.h xterm.h w32term.h macterm.h \
- msdos.h composite.h fontset.h blockinput.h atimer.h systime.h keymap.h
-xfaces.o: xfaces.c dispextern.h frame.h xterm.h buffer.h blockinput.h \
- window.h charset.h msdos.h dosfns.h composite.h atimer.h systime.h \
- keyboard.h fontset.h w32term.h macterm.h $(INTERVAL_SRC) $(config_h)
+xdisp.o: xdisp.c macros.h commands.h process.h indent.h buffer.h dispextern.h \
+ coding.h termchar.h frame.h window.h disptab.h termhooks.h character.h \
+ charset.h keyboard.h $(INTERVAL_SRC) region-cache.h xterm.h w32term.h \
+ macterm.h $(config_h) msdos.h composite.h fontset.h blockinput.h atimer.h \
+ systime.h keymap.h $(FONTSRC)
+xfaces.o: xfaces.c dispextern.h frame.h xterm.h buffer.h blockinput.h \
+ window.h character.h charset.h msdos.h dosfns.h composite.h atimer.h \
+ systime.h keyboard.h fontset.h w32term.h macterm.h $(INTERVAL_SRC) \
+ $(FONTSRC) $(config_h)
xfns.o: xfns.c buffer.h frame.h window.h keyboard.h xterm.h dispextern.h \
$(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h epaths.h \
- charset.h gtkutil.h $(config_h)
+ character.h charset.h coding.h gtkutil.h $(config_h) termhooks.h \
+ fontset.h $(FONTSRC)
+xfont.o: dispextern.h xterm.h frame.h blockinput.h character.h charset.h \
+ font.h $(config_h)
+xftfont.o: dispextern.h xterm.h frame.h blockinput.h character.h charset.h \
+ font.h $(config_h)
+ftxfont.o: dispextern.h xterm.h frame.h blockinput.h character.h charset.h \
+ font.h $(config_h)
xmenu.o: xmenu.c xterm.h termhooks.h window.h dispextern.h frame.h buffer.h \
- keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h \
- gtkutil.h msdos.h coding.h $(config_h)
+ charset.h keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h \
+ systime.h gtkutil.h msdos.h coding.h $(config_h)
xterm.o: xterm.c xterm.h termhooks.h termopts.h termchar.h window.h buffer.h \
- dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \
- keyboard.h gnu.h charset.h ccl.h fontset.h composite.h \
- coding.h process.h gtkutil.h $(config_h)
+ dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \
+ keyboard.h gnu.h character.h charset.h ccl.h fontset.h composite.h \
+ coding.h process.h gtkutil.h $(FONTSRC) $(config_h)
xselect.o: xselect.c process.h dispextern.h frame.h xterm.h blockinput.h \
buffer.h atimer.h systime.h $(config_h)
xrdb.o: xrdb.c $(config_h) epaths.h
@@ -1229,27 +1265,29 @@ atimer.o: atimer.c atimer.h systime.h $(config_h)
/* The files of Lisp proper */
alloc.o: alloc.c process.h frame.h window.h buffer.h puresize.h syssignal.h keyboard.h \
- blockinput.h atimer.h systime.h charset.h dispextern.h $(config_h) $(INTERVAL_SRC)
-bytecode.o: bytecode.c buffer.h syntax.h charset.h window.h dispextern.h \
+ blockinput.h atimer.h systime.h character.h dispextern.h $(config_h) \
+ $(INTERVAL_SRC)
+bytecode.o: bytecode.c buffer.h syntax.h character.h window.h dispextern.h \
frame.h xterm.h $(config_h)
-data.o: data.c buffer.h puresize.h charset.h syssignal.h keyboard.h frame.h $(config_h)
+data.o: data.c buffer.h puresize.h character.h syssignal.h keyboard.h frame.h \
+ $(config_h)
eval.o: eval.c commands.h keyboard.h blockinput.h atimer.h systime.h \
dispextern.h $(config_h)
floatfns.o: floatfns.c $(config_h)
-fns.o: fns.c commands.h $(config_h) frame.h buffer.h charset.h keyboard.h \
+fns.o: fns.c commands.h $(config_h) frame.h buffer.h character.h keyboard.h \
keymap.h frame.h window.h dispextern.h $(INTERVAL_SRC) coding.h md5.h \
blockinput.h xterm.h
-print.o: print.c process.h frame.h window.h buffer.h keyboard.h charset.h \
+print.o: print.c process.h frame.h window.h buffer.h keyboard.h character.h \
$(config_h) dispextern.h termchar.h $(INTERVAL_SRC) msdos.h composite.h
-lread.o: lread.c commands.h keyboard.h buffer.h epaths.h charset.h \
- $(config_h) $(INTERVAL_SRC) termhooks.h coding.h msdos.h
+lread.o: lread.c commands.h keyboard.h buffer.h epaths.h character.h \
+ charset.h $(config_h) $(INTERVAL_SRC) termhooks.h coding.h msdos.h
/* Text properties support */
textprop.o: textprop.c buffer.h window.h dispextern.h $(INTERVAL_SRC) \
$(config_h)
intervals.o: intervals.c buffer.h $(INTERVAL_SRC) keyboard.h puresize.h \
keymap.h $(config_h)
-composite.o: composite.c buffer.h charset.h $(INTERVAL_SRC) $(config_h)
+composite.o: composite.c buffer.h character.h $(INTERVAL_SRC) $(config_h)
/* System-specific programs to be made.
OTHER_FILES and OBJECTS_MACHINE
@@ -1371,14 +1409,10 @@ bootstrap: bootstrap-emacs${EXEEXT}
/* Dump an Emacs executable named bootstrap-emacs containing the
files from loadup.el in source form. */
-bootstrap-emacs${EXEEXT}: temacs${EXEEXT}
+bootstrap-emacs${EXEEXT}: temacs${EXEEXT} ${lispsource}international/charprop.el
#ifdef CANNOT_DUMP
ln temacs${EXEEXT} bootstrap-emacs${EXEEXT}
#else
-#ifdef HAVE_SHM
- $(RUN_TEMACS) -nl -batch -l loadup bootstrap
-#else /* ! defined (HAVE_SHM) */
$(RUN_TEMACS) --batch --load loadup bootstrap
-#endif /* ! defined (HAVE_SHM) */
mv -f emacs${EXEEXT} bootstrap-emacs${EXEEXT}
#endif /* ! defined (CANNOT_DUMP) */
diff --git a/src/abbrev.c b/src/abbrev.c
index e371797f139..f47a83b0bfe 100644
--- a/src/abbrev.c
+++ b/src/abbrev.c
@@ -27,7 +27,7 @@ Boston, MA 02110-1301, USA. */
#include "commands.h"
#include "buffer.h"
#include "window.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
/* An abbrev table is an obarray.
@@ -388,9 +388,15 @@ Returns the abbrev symbol, if expansion took place. */)
int pos = wordstart_byte;
/* Find the initial. */
- while (pos < PT_BYTE
- && SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos)) != Sword)
- pos++;
+ if (multibyte)
+ while (pos < PT_BYTE
+ && SYNTAX (FETCH_MULTIBYTE_CHAR (pos)) != Sword)
+ INC_POS (pos);
+ else
+ while (pos < PT_BYTE
+ && (SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos))
+ != Sword))
+ pos++;
/* Change just that. */
pos = BYTE_TO_CHAR (pos);
diff --git a/src/alloc.c b/src/alloc.c
index 4b3fa4d7e69..2cf3ff40e3b 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -53,7 +53,7 @@ Boston, MA 02110-1301, USA. */
#include "keyboard.h"
#include "frame.h"
#include "blockinput.h"
-#include "charset.h"
+#include "character.h"
#include "syssignal.h"
#include <setjmp.h>
@@ -2277,7 +2277,7 @@ INIT must be an integer that represents a character. */)
CHECK_NUMBER (init);
c = XINT (init);
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c))
{
nbytes = XINT (length);
val = make_uninit_string (nbytes);
@@ -3067,49 +3067,6 @@ See also the function `vector'. */)
}
-DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
- doc: /* Return a newly created char-table, with purpose PURPOSE.
-Each element is initialized to INIT, which defaults to nil.
-PURPOSE should be a symbol which has a `char-table-extra-slots' property.
-The property's value should be an integer between 0 and 10. */)
- (purpose, init)
- register Lisp_Object purpose, init;
-{
- Lisp_Object vector;
- Lisp_Object n;
- CHECK_SYMBOL (purpose);
- n = Fget (purpose, Qchar_table_extra_slots);
- CHECK_NUMBER (n);
- if (XINT (n) < 0 || XINT (n) > 10)
- args_out_of_range (n, Qnil);
- /* Add 2 to the size for the defalt and parent slots. */
- vector = Fmake_vector (make_number (CHAR_TABLE_STANDARD_SLOTS + XINT (n)),
- init);
- XCHAR_TABLE (vector)->top = Qt;
- XCHAR_TABLE (vector)->parent = Qnil;
- XCHAR_TABLE (vector)->purpose = purpose;
- XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
- return vector;
-}
-
-
-/* Return a newly created sub char table with slots initialized by INIT.
- Since a sub char table does not appear as a top level Emacs Lisp
- object, we don't need a Lisp interface to make it. */
-
-Lisp_Object
-make_sub_char_table (init)
- Lisp_Object init;
-{
- Lisp_Object vector
- = Fmake_vector (make_number (SUB_CHAR_TABLE_STANDARD_SLOTS), init);
- XCHAR_TABLE (vector)->top = Qnil;
- XCHAR_TABLE (vector)->defalt = Qnil;
- XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
- return vector;
-}
-
-
DEFUN ("vector", Fvector, Svector, 0, MANY, 0,
doc: /* Return a newly created vector with specified arguments as elements.
Any number of arguments, even zero arguments, are allowed.
@@ -6377,7 +6334,6 @@ The time is in seconds as a floating point value. */);
defsubr (&Smake_byte_code);
defsubr (&Smake_list);
defsubr (&Smake_vector);
- defsubr (&Smake_char_table);
defsubr (&Smake_string);
defsubr (&Smake_bool_vector);
defsubr (&Smake_symbol);
diff --git a/src/buffer.c b/src/buffer.c
index fcb842de83c..6115f727deb 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -42,7 +42,7 @@ extern int errno;
#include "window.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "region-cache.h"
#include "indent.h"
#include "blockinput.h"
@@ -181,6 +181,7 @@ static struct Lisp_Overlay * copy_overlays P_ ((struct buffer *, struct Lisp_Ove
static void modify_overlay P_ ((struct buffer *, EMACS_INT, EMACS_INT));
static Lisp_Object buffer_lisp_local_variables P_ ((struct buffer *));
+extern char * emacs_strerror P_ ((int));
/* For debugging; temporary. See set_buffer_internal. */
/* Lisp_Object Qlisp_mode, Vcheck_symbol; */
@@ -2103,8 +2104,10 @@ DEFUN ("set-buffer-multibyte", Fset_buffer_multibyte, Sset_buffer_multibyte,
doc: /* Set the multibyte flag of the current buffer to FLAG.
If FLAG is t, this makes the buffer a multibyte buffer.
If FLAG is nil, this makes the buffer a single-byte buffer.
-The buffer contents remain unchanged as a sequence of bytes
-but the contents viewed as characters do change.
+In these cases, the buffer contents remain unchanged as a sequence of
+bytes but the contents viewed as characters do change.
+If FLAG is `to', this makes the buffer a multibyte buffer by changing
+all eight-bit bytes to eight-bit characters.
If the multibyte flag was really changed, undo information of the
current buffer is cleared. */)
(flag)
@@ -2176,11 +2179,11 @@ current buffer is cleared. */)
p = GAP_END_ADDR;
stop = Z;
}
- if (MULTIBYTE_STR_AS_UNIBYTE_P (p, bytes))
- p += bytes, pos += bytes;
- else
+ if (ASCII_BYTE_P (*p))
+ p++, pos++;
+ else if (CHAR_BYTE8_HEAD_P (*p))
{
- c = STRING_CHAR (p, stop - pos);
+ c = STRING_CHAR_AND_LENGTH (p, stop - pos, bytes);
/* Delete all bytes for this 8-bit character but the
last one, and change the last one to the charcter
code. */
@@ -2195,6 +2198,11 @@ current buffer is cleared. */)
zv -= bytes;
stop = Z;
}
+ else
+ {
+ bytes = BYTES_BY_CHAR_HEAD (*p);
+ p += bytes, pos += bytes;
+ }
}
if (narrowed)
Fnarrow_to_region (make_number (begv), make_number (zv));
@@ -2203,13 +2211,14 @@ current buffer is cleared. */)
{
int pt = PT;
int pos, stop;
- unsigned char *p;
+ unsigned char *p, *pend;
/* Be sure not to have a multibyte sequence striding over the GAP.
- Ex: We change this: "...abc\201 _GAP_ \241def..."
- to: "...abc _GAP_ \201\241def..." */
+ Ex: We change this: "...abc\302 _GAP_ \241def..."
+ to: "...abc _GAP_ \302\241def..." */
- if (GPT_BYTE > 1 && GPT_BYTE < Z_BYTE
+ if (EQ (flag, Qt)
+ && GPT_BYTE > 1 && GPT_BYTE < Z_BYTE
&& ! CHAR_HEAD_P (*(GAP_END_ADDR)))
{
unsigned char *p = GPT_ADDR - 1;
@@ -2228,6 +2237,7 @@ current buffer is cleared. */)
pos = BEG;
stop = GPT;
p = BEG_ADDR;
+ pend = GPT_ADDR;
while (1)
{
int bytes;
@@ -2237,16 +2247,21 @@ current buffer is cleared. */)
if (pos == Z)
break;
p = GAP_END_ADDR;
+ pend = Z_ADDR;
stop = Z;
}
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p, stop - pos, bytes))
+ if (ASCII_BYTE_P (*p))
+ p++, pos++;
+ else if (EQ (flag, Qt) && (bytes = MULTIBYTE_LENGTH (p, pend)) > 0)
p += bytes, pos += bytes;
else
{
unsigned char tmp[MAX_MULTIBYTE_LENGTH];
+ int c;
- bytes = CHAR_STRING (*p, tmp);
+ c = BYTE8_TO_CHAR (*p);
+ bytes = CHAR_STRING (c, tmp);
*p = tmp[0];
TEMP_SET_PT_BOTH (pos + 1, pos + 1);
bytes--;
@@ -2260,6 +2275,7 @@ current buffer is cleared. */)
zv += bytes;
if (pos <= pt)
pt += bytes;
+ pend = Z_ADDR;
stop = Z;
}
}
diff --git a/src/buffer.h b/src/buffer.h
index efe0252453a..9d3ca6ea463 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -319,7 +319,6 @@ else
/* Variables used locally in FETCH_MULTIBYTE_CHAR. */
extern unsigned char *_fetch_multibyte_char_p;
-extern int _fetch_multibyte_char_len;
/* Return character code of multi-byte form at position POS. If POS
doesn't point the head of valid multi-byte form, only the byte at
@@ -327,10 +326,18 @@ extern int _fetch_multibyte_char_len;
#define FETCH_MULTIBYTE_CHAR(pos) \
(_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \
- + (pos) + BEG_ADDR - BEG_BYTE), \
- _fetch_multibyte_char_len \
- = ((pos) >= GPT_BYTE ? ZV_BYTE : GPT_BYTE) - (pos), \
- STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len))
+ + (pos) + BEG_ADDR - BEG_BYTE), \
+ STRING_CHAR (_fetch_multibyte_char_p, 0))
+
+/* Return character at position POS. If the current buffer is unibyte
+ and the character is not ASCII, make the returning character
+ multibyte. */
+
+#define FETCH_CHAR_AS_MULTIBYTE(pos) \
+ (!NILP (current_buffer->enable_multibyte_characters) \
+ ? FETCH_MULTIBYTE_CHAR ((pos)) \
+ : unibyte_char_to_multibyte (FETCH_BYTE ((pos))))
+
/* Macros for accessing a character or byte,
or converting between byte positions and addresses,
@@ -379,10 +386,7 @@ extern int _fetch_multibyte_char_len;
(_fetch_multibyte_char_p \
= (((pos) >= BUF_GPT_BYTE (buf) ? BUF_GAP_SIZE (buf) : 0) \
+ (pos) + BUF_BEG_ADDR (buf) - BEG_BYTE), \
- _fetch_multibyte_char_len \
- = (((pos) >= BUF_GPT_BYTE (buf) ? BUF_ZV_BYTE (buf) : BUF_GPT_BYTE (buf)) \
- - (pos)), \
- STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len))
+ STRING_CHAR (_fetch_multibyte_char_p, 0))
/* Define the actual buffer data structures. */
@@ -856,6 +860,7 @@ extern void mmap_set_vars P_ ((int));
} \
} while (0)
+EXFUN (Fbuffer_live_p, 1);
EXFUN (Fbuffer_name, 1);
EXFUN (Fget_file_buffer, 1);
EXFUN (Fnext_overlay_change, 1);
diff --git a/src/bytecode.c b/src/bytecode.c
index 2facaa47062..3ee9b5576b4 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -37,7 +37,7 @@ by Hallvard:
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
#include "window.h"
@@ -1394,10 +1394,17 @@ If the third argument is incorrect, Emacs may crash. */)
break;
case Bchar_syntax:
- BEFORE_POTENTIAL_GC ();
- CHECK_NUMBER (TOP);
- AFTER_POTENTIAL_GC ();
- XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (XINT (TOP))]);
+ {
+ int c;
+
+ BEFORE_POTENTIAL_GC ();
+ CHECK_CHARACTER (TOP);
+ AFTER_POTENTIAL_GC ();
+ c = XFASTINT (TOP);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ MAKE_CHAR_MULTIBYTE (c);
+ XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (c)]);
+ }
break;
case Bbuffer_substring:
diff --git a/src/callproc.c b/src/callproc.c
index 7b8ba8fea21..d8eebac08a6 100644
--- a/src/callproc.c
+++ b/src/callproc.c
@@ -75,7 +75,7 @@ extern int errno;
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "ccl.h"
#include "coding.h"
#include "composite.h"
@@ -273,6 +273,7 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
if (nargs >= 5)
{
int must_encode = 0;
+ Lisp_Object coding_attrs;
for (i = 4; i < nargs; i++)
CHECK_STRING (args[i]);
@@ -298,11 +299,15 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
else
val = Qnil;
}
+ val = coding_inherit_eol_type (val, Qnil);
setup_coding_system (Fcheck_coding_system (val), &argument_coding);
- if (argument_coding.common_flags & CODING_ASCII_INCOMPATIBLE_MASK)
- setup_coding_system (Qraw_text, &argument_coding);
- if (argument_coding.eol_type == CODING_EOL_UNDECIDED)
- argument_coding.eol_type = system_eol_type;
+ coding_attrs = CODING_ID_ATTRS (argument_coding.id);
+ if (NILP (CODING_ATTR_ASCII_COMPAT (coding_attrs)))
+ {
+ /* We should not use an ASCII incompatible coding system. */
+ val = raw_text_coding_system (val);
+ setup_coding_system (val, &argument_coding);
+ }
}
}
@@ -425,12 +430,8 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
{
argument_coding.src_multibyte = STRING_MULTIBYTE (args[i]);
if (CODING_REQUIRE_ENCODING (&argument_coding))
- {
- /* We must encode this argument. */
- args[i] = encode_coding_string (args[i], &argument_coding, 1);
- if (argument_coding.type == coding_type_ccl)
- setup_ccl_program (&(argument_coding.spec.ccl.encoder), Qnil);
- }
+ /* We must encode this argument. */
+ args[i] = encode_coding_string (&argument_coding, args[i], 1);
new_argv[i - 3] = SDATA (args[i]);
}
UNGCPRO;
@@ -742,19 +743,15 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
else
val = Qnil;
}
- setup_coding_system (Fcheck_coding_system (val), &process_coding);
+ Fcheck_coding_system (val);
/* In unibyte mode, character code conversion should not take
place but EOL conversion should. So, setup raw-text or one
of the subsidiary according to the information just setup. */
if (NILP (current_buffer->enable_multibyte_characters)
&& !NILP (val))
- setup_raw_text_coding_system (&process_coding);
+ val = raw_text_coding_system (val);
+ setup_coding_system (val, &process_coding);
}
- process_coding.src_multibyte = 0;
- process_coding.dst_multibyte
- = (BUFFERP (buffer)
- ? ! NILP (XBUFFER (buffer)->enable_multibyte_characters)
- : ! NILP (current_buffer->enable_multibyte_characters));
immediate_quit = 1;
QUIT;
@@ -766,12 +763,8 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
int carryover = 0;
int display_on_the_fly = display_p;
struct coding_system saved_coding;
- int pt_orig = PT, pt_byte_orig = PT_BYTE;
- int inserted;
saved_coding = process_coding;
- if (process_coding.composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (&process_coding, PT);
while (1)
{
/* Repeatedly read until we've filled as much as possible
@@ -804,133 +797,49 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
if (!NILP (buffer))
{
- if (! CODING_MAY_REQUIRE_DECODING (&process_coding))
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! CODING_MAY_REQUIRE_DECODING (&process_coding))
insert_1_both (buf, nread, nread, 0, 1, 0);
else
{ /* We have to decode the input. */
- int size;
- char *decoding_buf;
-
- repeat_decoding:
- size = decoding_buffer_size (&process_coding, nread);
- decoding_buf = (char *) xmalloc (size);
-
- /* We can't use the macro CODING_REQUIRE_DETECTION
- because it always returns nonzero if the coding
- system requires EOL detection. Here, we have to
- check only whether or not the coding system
- requires text-encoding detection. */
- if (process_coding.type == coding_type_undecided)
- {
- detect_coding (&process_coding, buf, nread);
- if (process_coding.composing != COMPOSITION_DISABLED)
- /* We have not yet allocated the composition
- data because the coding type was undecided. */
- coding_allocate_composition_data (&process_coding, PT);
- }
- if (process_coding.cmp_data)
- process_coding.cmp_data->char_offset = PT;
-
- decode_coding (&process_coding, buf, decoding_buf,
- nread, size);
+ Lisp_Object curbuf;
+ XSETBUFFER (curbuf, current_buffer);
+ decode_coding_c_string (&process_coding, buf, nread,
+ curbuf);
if (display_on_the_fly
- && saved_coding.type == coding_type_undecided
- && process_coding.type != coding_type_undecided)
+ && CODING_REQUIRE_DETECTION (&saved_coding)
+ && ! CODING_REQUIRE_DETECTION (&process_coding))
{
/* We have detected some coding system. But,
there's a possibility that the detection was
- done by insufficient data. So, we try the code
- detection again with more data. */
- xfree (decoding_buf);
+ done by insufficient data. So, we give up
+ displaying on the fly. */
+ if (process_coding.produced > 0)
+ del_range_2 (process_coding.dst_pos,
+ process_coding.dst_pos_byte,
+ process_coding.dst_pos
+ + process_coding.produced_char,
+ process_coding.dst_pos_byte
+ + process_coding.produced, 0);
display_on_the_fly = 0;
process_coding = saved_coding;
carryover = nread;
/* This is to make the above condition always
fails in the future. */
- saved_coding.type = coding_type_no_conversion;
+ saved_coding.common_flags
+ &= ~CODING_REQUIRE_DETECTION_MASK;
continue;
}
- if (process_coding.produced > 0)
- insert_1_both (decoding_buf, process_coding.produced_char,
- process_coding.produced, 0, 1, 0);
- xfree (decoding_buf);
-
- if (process_coding.result == CODING_FINISH_INCONSISTENT_EOL)
- {
- Lisp_Object eol_type, coding;
-
- if (process_coding.eol_type == CODING_EOL_CR)
- {
- /* CRs have been replaced with LFs. Undo
- that in the text inserted above. */
- unsigned char *p;
-
- move_gap_both (PT, PT_BYTE);
-
- p = BYTE_POS_ADDR (pt_byte_orig);
- for (; p < GPT_ADDR; ++p)
- if (*p == '\n')
- *p = '\r';
- }
- else if (process_coding.eol_type == CODING_EOL_CRLF)
- {
- /* CR LFs have been replaced with LFs. Undo
- that by inserting CRs in front of LFs in
- the text inserted above. */
- EMACS_INT bytepos, old_pt, old_pt_byte, nCR;
-
- old_pt = PT;
- old_pt_byte = PT_BYTE;
- nCR = 0;
-
- for (bytepos = PT_BYTE - 1;
- bytepos >= pt_byte_orig;
- --bytepos)
- if (FETCH_BYTE (bytepos) == '\n')
- {
- EMACS_INT charpos = BYTE_TO_CHAR (bytepos);
- TEMP_SET_PT_BOTH (charpos, bytepos);
- insert_1_both ("\r", 1, 1, 0, 1, 0);
- ++nCR;
- }
-
- TEMP_SET_PT_BOTH (old_pt + nCR, old_pt_byte + nCR);
- }
-
- /* Set the coding system symbol to that for
- Unix-like EOL. */
- eol_type = Fget (saved_coding.symbol, Qeol_type);
- if (VECTORP (eol_type)
- && ASIZE (eol_type) == 3
- && SYMBOLP (AREF (eol_type, CODING_EOL_LF)))
- coding = AREF (eol_type, CODING_EOL_LF);
- else
- coding = saved_coding.symbol;
-
- process_coding.symbol = coding;
- process_coding.eol_type = CODING_EOL_LF;
- process_coding.mode
- &= ~CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
-
- nread -= process_coding.consumed;
- carryover = nread;
+ TEMP_SET_PT_BOTH (PT + process_coding.produced_char,
+ PT_BYTE + process_coding.produced);
+ carryover = process_coding.carryover_bytes;
if (carryover > 0)
/* As CARRYOVER should not be that large, we had
better avoid overhead of bcopy. */
- BCOPY_SHORT (buf + process_coding.consumed, buf,
- carryover);
- if (process_coding.result == CODING_FINISH_INSUFFICIENT_CMP)
- {
- /* The decoding ended because of insufficient data
- area to record information about composition.
- We must try decoding with additional data area
- before reading more output for the process. */
- coding_allocate_composition_data (&process_coding, PT);
- goto repeat_decoding;
- }
+ BCOPY_SHORT (process_coding.carryover, buf,
+ process_coding.carryover_bytes);
}
}
@@ -961,33 +870,12 @@ usage: (call-process PROGRAM &optional INFILE BUFFER DISPLAY &rest ARGS) */)
}
give_up: ;
- if (!NILP (buffer)
- && process_coding.cmp_data)
- {
- coding_restore_composition (&process_coding, Fcurrent_buffer ());
- coding_free_composition_data (&process_coding);
- }
-
- {
- int post_read_count = SPECPDL_INDEX ();
-
- record_unwind_protect (save_excursion_restore, save_excursion_save ());
- inserted = PT - pt_orig;
- TEMP_SET_PT_BOTH (pt_orig, pt_byte_orig);
- if (SYMBOLP (process_coding.post_read_conversion)
- && !NILP (Ffboundp (process_coding.post_read_conversion)))
- call1 (process_coding.post_read_conversion, make_number (inserted));
-
- Vlast_coding_system_used = process_coding.symbol;
-
- /* If the caller required, let the buffer inherit the
- coding-system used to decode the process output. */
- if (inherit_process_coding_system)
- call1 (intern ("after-insert-file-set-buffer-file-coding-system"),
- make_number (total_read));
-
- unbind_to (post_read_count, Qnil);
- }
+ Vlast_coding_system_used = CODING_ID_NAME (process_coding.id);
+ /* If the caller required, let the buffer inherit the
+ coding-system used to decode the process output. */
+ if (inherit_process_coding_system)
+ call1 (intern ("after-insert-file-set-buffer-file-coding-system"),
+ make_number (total_read));
}
/* Wait for it to terminate, unless it already has. */
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 0ad884310ed..ba9b3e3adf4 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -23,7 +23,7 @@ Boston, MA 02110-1301, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "commands.h"
#include "syntax.h"
#include "composite.h"
@@ -38,80 +38,90 @@ casify_object (flag, obj)
enum case_action flag;
Lisp_Object obj;
{
- register int i, c, len;
+ register int c, c1;
register int inword = flag == CASE_DOWN;
/* If the case table is flagged as modified, rescan it. */
if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
Fset_case_table (current_buffer->downcase_table);
- if (INTEGERP (obj))
+ while (1)
{
- int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
- | CHAR_SHIFT | CHAR_CTL | CHAR_META);
- int flags = XINT (obj) & flagbits;
-
- /* If the character has higher bits set
- above the flags, return it unchanged.
- It is not a real character. */
- if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
- return obj;
-
- c = DOWNCASE (XFASTINT (obj) & ~flagbits);
- if (inword)
- XSETFASTINT (obj, c | flags);
- else if (c == (XFASTINT (obj) & ~flagbits))
+ if (INTEGERP (obj))
{
- c = UPCASE1 ((XFASTINT (obj) & ~flagbits));
- XSETFASTINT (obj, c | flags);
+ int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
+ | CHAR_SHIFT | CHAR_CTL | CHAR_META);
+ int flags = XINT (obj) & flagbits;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+
+ /* If the character has higher bits set
+ above the flags, return it unchanged.
+ It is not a real character. */
+ if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
+ return obj;
+
+ c1 = XFASTINT (obj) & ~flagbits;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c1);
+ c = DOWNCASE (c1);
+ if (inword)
+ XSETFASTINT (obj, c | flags);
+ else if (c == (XFASTINT (obj) & ~flagbits))
+ {
+ if (! inword)
+ c = UPCASE1 (c1);
+ if (! multibyte)
+ MAKE_CHAR_UNIBYTE (c);
+ XSETFASTINT (obj, c | flags);
+ }
+ return obj;
}
- return obj;
- }
- if (STRINGP (obj))
- {
- int multibyte = STRING_MULTIBYTE (obj);
- int n;
-
- obj = Fcopy_sequence (obj);
- len = SBYTES (obj);
-
- /* I counts bytes, and N counts chars. */
- for (i = n = 0; i < len; n++)
+ if (STRINGP (obj))
{
- int from_len = 1, to_len = 1;
-
- c = SREF (obj, i);
-
- if (multibyte && c >= 0x80)
- c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i, len -i, from_len);
- if (inword && flag != CASE_CAPITALIZE_UP)
- c = DOWNCASE (c);
- else if (!UPPERCASEP (c)
- && (!inword || flag != CASE_CAPITALIZE_UP))
- c = UPCASE1 (c);
- if ((ASCII_BYTE_P (c) && from_len == 1)
- || (! multibyte && SINGLE_BYTE_CHAR_P (c)))
- SSET (obj, i, c);
- else
+ int multibyte = STRING_MULTIBYTE (obj);
+ int i, i_byte, len;
+ int size = SCHARS (obj);
+
+ obj = Fcopy_sequence (obj);
+ for (i = i_byte = 0; i < size; i++, i_byte += len)
{
- to_len = CHAR_BYTES (c);
- if (from_len == to_len)
- CHAR_STRING (c, SDATA (obj) + i);
+ if (multibyte)
+ c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
else
{
- Faset (obj, make_number (n), make_number (c));
- len += to_len - from_len;
+ c = SREF (obj, i_byte);
+ len = 1;
+ MAKE_CHAR_MULTIBYTE (c);
+ }
+ c1 = c;
+ if (inword && flag != CASE_CAPITALIZE_UP)
+ c = DOWNCASE (c);
+ else if (!UPPERCASEP (c)
+ && (!inword || flag != CASE_CAPITALIZE_UP))
+ c = UPCASE1 (c1);
+ if ((int) flag >= (int) CASE_CAPITALIZE)
+ inword = (SYNTAX (c) == Sword);
+ if (c != c1)
+ {
+ if (! multibyte)
+ {
+ MAKE_CHAR_UNIBYTE (c);
+ SSET (obj, i_byte, c);
+ }
+ else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
+ SSET (obj, i_byte, c);
+ else
+ {
+ Faset (obj, make_number (i), make_number (c));
+ i_byte += CHAR_BYTES (c) - len;
+ }
}
}
- if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c) == Sword;
- i += to_len;
+ return obj;
}
- return obj;
+ obj = wrong_type_argument (Qchar_or_string_p, obj);
}
-
- wrong_type_argument (Qchar_or_string_p, obj);
}
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
@@ -168,13 +178,14 @@ casify_region (flag, b, e)
enum case_action flag;
Lisp_Object b, e;
{
- register int i;
register int c;
register int inword = flag == CASE_DOWN;
register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int start, end;
int start_byte, end_byte;
int changed = 0;
+ int opoint = PT;
+ int opoint_byte = PT_BYTE;
if (EQ (b, e))
/* Not modifying because nothing marked */
@@ -192,85 +203,74 @@ casify_region (flag, b, e)
start_byte = CHAR_TO_BYTE (start);
end_byte = CHAR_TO_BYTE (end);
- for (i = start_byte; i < end_byte; i++, start++)
+ while (start < end)
{
- int c2;
- c = c2 = FETCH_BYTE (i);
- if (multibyte && c >= 0x80)
- /* A multibyte character can't be handled in this simple loop. */
- break;
+ int c2, len;
+
+ if (multibyte)
+ {
+ c = FETCH_MULTIBYTE_CHAR (start_byte);
+ len = CHAR_BYTES (c);
+ }
+ else
+ {
+ c = FETCH_BYTE (start_byte);
+ MAKE_CHAR_MULTIBYTE (c);
+ len = 1;
+ }
+ c2 = c;
if (inword && flag != CASE_CAPITALIZE_UP)
c = DOWNCASE (c);
else if (!UPPERCASEP (c)
&& (!inword || flag != CASE_CAPITALIZE_UP))
c = UPCASE1 (c);
- if (multibyte && c >= 0x80)
- /* A multibyte result character can't be handled in this
- simple loop. */
- break;
- FETCH_BYTE (i) = c;
- if (c != c2)
- changed = 1;
if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c) == Sword && (inword || !SYNTAX_PREFIX (c));
- }
- if (i < end_byte)
- {
- /* The work is not yet finished because of a multibyte character
- just encountered. */
- int opoint = PT;
- int opoint_byte = PT_BYTE;
- int c2;
-
- while (start < end)
+ inword = ((SYNTAX (c) == Sword) && (inword || !SYNTAX_PREFIX (c)));
+ if (c != c2)
{
- if ((c = FETCH_BYTE (i)) >= 0x80)
- c = FETCH_MULTIBYTE_CHAR (i);
- c2 = c;
- if (inword && flag != CASE_CAPITALIZE_UP)
- c2 = DOWNCASE (c);
- else if (!UPPERCASEP (c)
- && (!inword || flag != CASE_CAPITALIZE_UP))
- c2 = UPCASE1 (c);
- if (c != c2)
+ changed = 1;
+ if (! multibyte)
+ {
+ MAKE_CHAR_UNIBYTE (c);
+ FETCH_BYTE (start_byte) = c;
+ }
+ else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
+ FETCH_BYTE (start_byte) = c;
+ else
{
- int fromlen, tolen, j;
+ int tolen = CHAR_BYTES (c);
+ int j;
unsigned char str[MAX_MULTIBYTE_LENGTH];
- changed = 1;
- /* Handle the most likely case */
- if (c < 0400 && c2 < 0400)
- FETCH_BYTE (i) = c2;
- else if (fromlen = CHAR_STRING (c, str),
- tolen = CHAR_STRING (c2, str),
- fromlen == tolen)
+ CHAR_STRING (c, str);
+ if (len == tolen)
{
/* Length is unchanged. */
- for (j = 0; j < tolen; ++j)
- FETCH_BYTE (i + j) = str[j];
+ for (j = 0; j < len; ++j)
+ FETCH_BYTE (start_byte + j) = str[j];
}
else
{
/* Replace one character with the other,
keeping text properties the same. */
- replace_range_2 (start, i,
- start + 1, i + fromlen,
+ replace_range_2 (start, start_byte,
+ start + 1, start_byte + len,
str, 1, tolen,
- 1);
- if (opoint > start)
- opoint_byte += tolen - fromlen;
+ 0);
+ len = tolen;
}
}
- if ((int) flag >= (int) CASE_CAPITALIZE)
- inword = SYNTAX (c2) == Sword;
- INC_BOTH (start, i);
}
- TEMP_SET_PT_BOTH (opoint, opoint_byte);
+ start++;
+ start_byte += len;
}
- start = XFASTINT (b);
+ if (PT != opoint)
+ TEMP_SET_PT_BOTH (opoint, opoint_byte);
+
if (changed)
{
+ start = XFASTINT (b);
signal_after_change (start, end - start, end - start);
update_compositions (start, end, CHECK_ALL);
}
diff --git a/src/casetab.c b/src/casetab.c
index 5483f5663fa..15bf133a869 100644
--- a/src/casetab.c
+++ b/src/casetab.c
@@ -24,7 +24,7 @@ Boston, MA 02110-1301, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
Lisp_Object Qcase_table_p, Qcase_table;
Lisp_Object Vascii_downcase_table, Vascii_upcase_table;
@@ -126,7 +126,6 @@ set_case_table (table, standard)
int standard;
{
Lisp_Object up, canon, eqv;
- Lisp_Object indices[3];
check_case_table (table);
@@ -137,8 +136,8 @@ set_case_table (table, standard)
if (NILP (up))
{
up = Fmake_char_table (Qcase_table, Qnil);
- map_char_table (set_identity, Qnil, table, table, up, 0, indices);
- map_char_table (shuffle, Qnil, table, table, up, 0, indices);
+ map_char_table (set_identity, Qnil, table, up);
+ map_char_table (shuffle, Qnil, table, up);
XCHAR_TABLE (table)->extras[0] = up;
}
@@ -146,14 +145,14 @@ set_case_table (table, standard)
{
canon = Fmake_char_table (Qcase_table, Qnil);
XCHAR_TABLE (table)->extras[1] = canon;
- map_char_table (set_canon, Qnil, table, table, table, 0, indices);
+ map_char_table (set_canon, Qnil, table, table);
}
if (NILP (eqv))
{
eqv = Fmake_char_table (Qcase_table, Qnil);
- map_char_table (set_identity, Qnil, canon, canon, eqv, 0, indices);
- map_char_table (shuffle, Qnil, canon, canon, eqv, 0, indices);
+ map_char_table (set_identity, Qnil, canon, eqv);
+ map_char_table (shuffle, Qnil, canon, eqv);
XCHAR_TABLE (table)->extras[2] = eqv;
}
@@ -180,30 +179,45 @@ set_case_table (table, standard)
/* The following functions are called in map_char_table. */
-/* Set CANON char-table element for C to a translated ELT by UP and
- DOWN char-tables. This is done only when ELT is a character. The
- char-tables CANON, UP, and DOWN are in CASE_TABLE. */
+/* Set CANON char-table element for characters in RANGE to a
+ translated ELT by UP and DOWN char-tables. This is done only when
+ ELT is a character. The char-tables CANON, UP, and DOWN are in
+ CASE_TABLE. */
static void
-set_canon (case_table, c, elt)
- Lisp_Object case_table, c, elt;
+set_canon (case_table, range, elt)
+ Lisp_Object case_table, range, elt;
{
Lisp_Object up = XCHAR_TABLE (case_table)->extras[0];
Lisp_Object canon = XCHAR_TABLE (case_table)->extras[1];
if (NATNUMP (elt))
- Faset (canon, c, Faref (case_table, Faref (up, elt)));
+ Fset_char_table_range (canon, range, Faref (case_table, Faref (up, elt)));
}
-/* Set elements of char-table TABLE for C to C itself. This is done
- only when ELT is a character. This is called in map_char_table. */
+/* Set elements of char-table TABLE for C to C itself. C may be a
+ cons specifying a character range. In that case, set characters in
+ that range to themselves. This is done only when ELT is a
+ character. This is called in map_char_table. */
static void
set_identity (table, c, elt)
Lisp_Object table, c, elt;
{
if (NATNUMP (elt))
- Faset (table, c, c);
+ {
+ int from, to;
+
+ if (CONSP (c))
+ {
+ from = XINT (XCAR (c));
+ to = XINT (XCDR (c));
+ }
+ else
+ from = to = XINT (c);
+ for (; from <= to; from++)
+ CHAR_TABLE_SET (table, from, make_number (from));
+ }
}
/* Permute the elements of TABLE (which is initially an identity
@@ -215,11 +229,25 @@ static void
shuffle (table, c, elt)
Lisp_Object table, c, elt;
{
- if (NATNUMP (elt) && !EQ (c, elt))
+ if (NATNUMP (elt))
{
Lisp_Object tem = Faref (table, elt);
- Faset (table, elt, c);
- Faset (table, c, tem);
+ int from, to;
+
+ if (CONSP (c))
+ {
+ from = XINT (XCAR (c));
+ to = XINT (XCDR (c));
+ }
+ else
+ from = to = XINT (c);
+
+ for (; from <= to; from++)
+ if (from != XINT (elt))
+ {
+ Faset (table, elt, make_number (from));
+ Faset (table, make_number (from), tem);
+ }
}
}
@@ -244,22 +272,24 @@ init_casetab_once ()
Vascii_downcase_table = down;
XCHAR_TABLE (down)->purpose = Qcase_table;
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- XSETFASTINT (XCHAR_TABLE (down)->contents[i],
- (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i);
+ for (i = 0; i < 128; i++)
+ {
+ int c = (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i;
+ CHAR_TABLE_SET (down, i, make_number (c));
+ }
XCHAR_TABLE (down)->extras[1] = Fcopy_sequence (down);
up = Fmake_char_table (Qcase_table, Qnil);
XCHAR_TABLE (down)->extras[0] = up;
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- XSETFASTINT (XCHAR_TABLE (up)->contents[i],
- ((i >= 'A' && i <= 'Z')
- ? i + ('a' - 'A')
- : ((i >= 'a' && i <= 'z')
- ? i + ('A' - 'a')
- : i)));
+ for (i = 0; i < 128; i++)
+ {
+ int c = ((i >= 'A' && i <= 'Z') ? i + ('a' - 'A')
+ : ((i >= 'a' && i <= 'z') ? i + ('A' - 'a')
+ : i));;
+ CHAR_TABLE_SET (up, i, make_number (c));
+ }
XCHAR_TABLE (down)->extras[2] = Fcopy_sequence (up);
}
diff --git a/src/category.c b/src/category.c
index 6835d00d824..b9f80982ee7 100644
--- a/src/category.c
+++ b/src/category.c
@@ -3,6 +3,9 @@
Copyright (C) 1995, 1997, 1998, 1999
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -29,6 +32,7 @@ Boston, MA 02110-1301, USA. */
#include <ctype.h>
#include "lisp.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "category.h"
#include "keymap.h"
@@ -187,6 +191,18 @@ This is the one used for new buffers. */)
return Vstandard_category_table;
}
+
+static void
+copy_category_entry (table, c, val)
+ Lisp_Object table, c, val;
+{
+ val = Fcopy_sequence (val);
+ if (CONSP (c))
+ char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val);
+ else
+ char_table_set (table, XINT (c), val);
+}
+
/* Return a copy of category table TABLE. We can't simply use the
function copy-sequence because no contents should be shared between
the original and the copy. This function is called recursively by
@@ -196,44 +212,14 @@ Lisp_Object
copy_category_table (table)
Lisp_Object table;
{
- Lisp_Object tmp;
- int i, to;
+ table = copy_char_table (table);
- if (!NILP (XCHAR_TABLE (table)->top))
- {
- /* TABLE is a top level char table.
- At first, make a copy of tree structure of the table. */
- table = Fcopy_sequence (table);
-
- /* Then, copy elements for single byte characters one by one. */
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
- XCHAR_TABLE (table)->contents[i] = Fcopy_sequence (tmp);
- to = CHAR_TABLE_ORDINARY_SLOTS;
-
- /* Also copy the first (and sole) extra slot. It is a vector
- containing docstring of each category. */
- Fset_char_table_extra_slot
- (table, make_number (0),
- Fcopy_sequence (Fchar_table_extra_slot (table, make_number (0))));
- }
- else
- {
- i = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- }
-
- /* If the table has non-nil default value, copy it. */
- if (!NILP (tmp = XCHAR_TABLE (table)->defalt))
- XCHAR_TABLE (table)->defalt = Fcopy_sequence (tmp);
-
- /* At last, copy the remaining elements while paying attention to a
- sub char table. */
- for (; i < to; i++)
- if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
- XCHAR_TABLE (table)->contents[i]
- = (SUB_CHAR_TABLE_P (tmp)
- ? copy_category_table (tmp) : Fcopy_sequence (tmp));
+ if (! NILP (XCHAR_TABLE (table)->defalt))
+ XCHAR_TABLE (table)->defalt
+ = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
+ XCHAR_TABLE (table)->extras[0]
+ = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]);
+ map_char_table (copy_category_entry, Qnil, table, table);
return table;
}
@@ -259,9 +245,12 @@ DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
()
{
Lisp_Object val;
+ int i;
val = Fmake_char_table (Qcategory_table, Qnil);
XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
+ for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
+ XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET;
Fset_char_table_extra_slot (val, make_number (0),
Fmake_vector (make_number (95), Qnil));
return val;
@@ -283,6 +272,13 @@ Return TABLE. */)
}
+Lisp_Object
+char_category_set (c)
+ int c;
+{
+ return CHAR_TABLE_REF (current_buffer->category_table, c);
+}
+
DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
doc: /* Return the category set of CHAR. */)
(ch)
@@ -315,34 +311,6 @@ The return value is a string containing those same categories. */)
return build_string (str);
}
-/* Modify all category sets stored under sub char-table TABLE so that
- they contain (SET_VALUE is t) or don't contain (SET_VALUE is nil)
- CATEGORY. */
-
-void
-modify_lower_category_set (table, category, set_value)
- Lisp_Object table, category, set_value;
-{
- Lisp_Object val;
- int i;
-
- val = XCHAR_TABLE (table)->defalt;
- if (!CATEGORY_SET_P (val))
- val = MAKE_CATEGORY_SET;
- SET_CATEGORY_SET (val, category, set_value);
- XCHAR_TABLE (table)->defalt = val;
-
- for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
- {
- val = XCHAR_TABLE (table)->contents[i];
-
- if (CATEGORY_SET_P (val))
- SET_CATEGORY_SET (val, category, set_value);
- else if (SUB_CHAR_TABLE_P (val))
- modify_lower_category_set (val, category, set_value);
- }
-}
-
void
set_category_set (category_set, category, val)
Lisp_Object category_set, category, val;
@@ -362,113 +330,55 @@ DEFUN ("modify-category-entry", Fmodify_category_entry,
Smodify_category_entry, 2, 4, 0,
doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
The category is changed only for table TABLE, which defaults to
- the current buffer's category table.
+the current buffer's category table.
+CHARACTER can be either a single character or a cons representing the
+lower and upper ends of an inclusive character range to modify.
If optional fourth argument RESET is non-nil,
then delete CATEGORY from the category set instead of adding it. */)
(character, category, table, reset)
Lisp_Object character, category, table, reset;
{
- int c, charset, c1, c2;
Lisp_Object set_value; /* Actual value to be set in category sets. */
- Lisp_Object val, category_set;
+ Lisp_Object category_set;
+ int start, end;
+ int from, to;
- CHECK_NUMBER (character);
- c = XINT (character);
- CHECK_CATEGORY (category);
- table = check_category_table (table);
-
- if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
- error ("Undefined category: %c", XFASTINT (category));
-
- set_value = NILP (reset) ? Qt : Qnil;
-
- if (c < CHAR_TABLE_SINGLE_BYTE_SLOTS)
- {
- val = XCHAR_TABLE (table)->contents[c];
- if (!CATEGORY_SET_P (val))
- XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET);
- SET_CATEGORY_SET (val, category, set_value);
- return Qnil;
- }
-
- SPLIT_CHAR (c, charset, c1, c2);
-
- /* The top level table. */
- val = XCHAR_TABLE (table)->contents[charset + 128];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
- {
- category_set = val = MAKE_CATEGORY_SET;
- XCHAR_TABLE (table)->contents[charset + 128] = category_set;
- }
-
- if (c1 <= 0)
+ if (INTEGERP (character))
{
- /* Only a charset is specified. */
- if (SUB_CHAR_TABLE_P (val))
- /* All characters in CHARSET should be the same as for having
- CATEGORY or not. */
- modify_lower_category_set (val, category, set_value);
- else
- SET_CATEGORY_SET (category_set, category, set_value);
- return Qnil;
+ CHECK_CHARACTER (character);
+ start = end = XFASTINT (character);
}
-
- /* The second level table. */
- if (!SUB_CHAR_TABLE_P (val))
+ else
{
- val = make_sub_char_table (Qnil);
- XCHAR_TABLE (table)->contents[charset + 128] = val;
- /* We must set default category set of CHARSET in `defalt' slot. */
- XCHAR_TABLE (val)->defalt = category_set;
+ CHECK_CONS (character);
+ CHECK_CHARACTER_CAR (character);
+ CHECK_CHARACTER_CDR (character);
+ start = XFASTINT (XCAR (character));
+ end = XFASTINT (XCDR (character));
}
- table = val;
- val = XCHAR_TABLE (table)->contents[c1];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
- {
- category_set = val = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
- XCHAR_TABLE (table)->contents[c1] = category_set;
- }
+ CHECK_CATEGORY (category);
+ table = check_category_table (table);
- if (c2 <= 0)
- {
- if (SUB_CHAR_TABLE_P (val))
- /* All characters in C1 group of CHARSET should be the same as
- for CATEGORY. */
- modify_lower_category_set (val, category, set_value);
- else
- SET_CATEGORY_SET (category_set, category, set_value);
- return Qnil;
- }
+ if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
+ error ("Undefined category: %c", XFASTINT (category));
- /* The third (bottom) level table. */
- if (!SUB_CHAR_TABLE_P (val))
- {
- val = make_sub_char_table (Qnil);
- XCHAR_TABLE (table)->contents[c1] = val;
- /* We must set default category set of CHARSET and C1 in
- `defalt' slot. */
- XCHAR_TABLE (val)->defalt = category_set;
- }
- table = val;
+ set_value = NILP (reset) ? Qt : Qnil;
- val = XCHAR_TABLE (table)->contents[c2];
- if (CATEGORY_SET_P (val))
- category_set = val;
- else if (!SUB_CHAR_TABLE_P (val))
+ while (start <= end)
{
- category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
- XCHAR_TABLE (table)->contents[c2] = category_set;
+ category_set = char_table_ref_and_range (table, start, &from, &to);
+ if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset))
+ {
+ category_set = Fcopy_sequence (category_set);
+ SET_CATEGORY_SET (category_set, category, set_value);
+ if (to > end)
+ char_table_set_range (table, start, end, category_set);
+ else
+ char_table_set_range (table, start, to, category_set);
+ }
+ start = to + 1;
}
- else
- /* This should never happen. */
- error ("Invalid category table");
-
- SET_CATEGORY_SET (category_set, category, set_value);
return Qnil;
}
diff --git a/src/category.h b/src/category.h
index 413505ece61..4799b7a5e3d 100644
--- a/src/category.h
+++ b/src/category.h
@@ -2,6 +2,9 @@
Copyright (C) 1995, 1998, 1999
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -92,21 +95,7 @@ extern Lisp_Object _temp_category_set;
#define Vstandard_category_table buffer_defaults.category_table
/* Return the category set of character C in the current category table. */
-#ifdef __GNUC__
-#define CATEGORY_SET(c) \
- ({ Lisp_Object table = current_buffer->category_table; \
- Lisp_Object temp; \
- if ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS) \
- while (NILP (temp = XCHAR_TABLE (table)->contents[(unsigned char) c]) \
- && NILP (temp = XCHAR_TABLE (table)->defalt)) \
- table = XCHAR_TABLE (table)->parent; \
- else \
- temp = Faref (table, make_number (c)); \
- temp; })
-#else
-#define CATEGORY_SET(c) \
- Faref (current_buffer->category_table, make_number (c))
-#endif
+#define CATEGORY_SET(c) char_category_set (c)
/* Return the doc string of CATEGORY in category table TABLE. */
#define CATEGORY_DOCSTRING(table, category) \
@@ -119,8 +108,8 @@ extern Lisp_Object _temp_category_set;
/* Return 1 if there is a word boundary between two word-constituent
characters C1 and C2 if they appear in this order, else return 0.
- There is no word boundary between two word-constituent ASCII
- characters. */
+ There is no word boundary between two word-constituent ASCII and
+ Latin-1 characters. */
#define WORD_BOUNDARY_P(c1, c2) \
(!(SINGLE_BYTE_CHAR_P (c1) && SINGLE_BYTE_CHAR_P (c2)) \
&& word_boundary_p (c1, c2))
diff --git a/src/ccl.c b/src/ccl.c
index 779755cf39e..74d0affa681 100644
--- a/src/ccl.c
+++ b/src/ccl.c
@@ -4,6 +4,9 @@
Copyright (C) 1995, 1997, 1998, 2003, 2004, 2005
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -27,10 +30,13 @@ Boston, MA 02110-1301, USA. */
#include <stdio.h>
#include "lisp.h"
+#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "coding.h"
+Lisp_Object Qccl, Qcclp;
+
/* This contains all code conversion map available to CCL. */
Lisp_Object Vcode_conversion_map_vector;
@@ -66,6 +72,8 @@ Lisp_Object Vtranslation_hash_table_vector;
#define GET_HASH_TABLE(id) \
(XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)])))
+extern int charset_unicode;
+
/* CCL (Code Conversion Language) is a simple language which has
operations on one input buffer, one output buffer, and 7 registers.
The syntax of CCL is described in `ccl.el'. Emacs Lisp function
@@ -198,10 +206,13 @@ Lisp_Object Vtranslation_hash_table_vector;
#define CCL_WriteStringJump 0x0A /* Write string and jump:
1:A--D--D--R--E--S--S-000XXXXX
2:LENGTH
- 3:0000STRIN[0]STRIN[1]STRIN[2]
+ 3:000MSTRIN[0]STRIN[1]STRIN[2]
...
------------------------------
- write_string (STRING, LENGTH);
+ if (M)
+ write_multibyte_string (STRING, LENGTH);
+ else
+ write_string (STRING, LENGTH);
IC += ADDRESS;
*/
@@ -308,13 +319,16 @@ Lisp_Object Vtranslation_hash_table_vector;
#define CCL_WriteConstString 0x14 /* Write a constant or a string:
1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX
- [2:0000STRIN[0]STRIN[1]STRIN[2]]
+ [2:000MSTRIN[0]STRIN[1]STRIN[2]]
[...]
-----------------------------
if (!rrr)
write (CC..C)
else
- write_string (STRING, CC..C);
+ if (M)
+ write_multibyte_string (STRING, CC..C);
+ else
+ write_string (STRING, CC..C);
IC += (CC..C + 2) / 3;
*/
@@ -742,136 +756,87 @@ while(0)
/* Encode one character CH to multibyte form and write to the current
output buffer. If CH is less than 256, CH is written as is. */
-#define CCL_WRITE_CHAR(ch) \
- do { \
- int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (bytes == 1) \
- { \
- *dst++ = (ch); \
- if (extra_bytes && (ch) >= 0x80 && (ch) < 0xA0) \
- /* We may have to convert this eight-bit char to \
- multibyte form later. */ \
- extra_bytes++; \
- } \
- else if (CHAR_VALID_P (ch, 0)) \
- dst += CHAR_STRING (ch, dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
- } while (0)
-
-/* Encode one character CH to multibyte form and write to the current
- output buffer. The output bytes always forms a valid multibyte
- sequence. */
-#define CCL_WRITE_MULTIBYTE_CHAR(ch) \
- do { \
- int bytes = CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (CHAR_VALID_P ((ch), 0)) \
- dst += CHAR_STRING ((ch), dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
+#define CCL_WRITE_CHAR(ch) \
+ do { \
+ if (! dst) \
+ CCL_INVALID_CMD; \
+ else if (dst < dst_end) \
+ *dst++ = (ch); \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Write a string at ccl_prog[IC] of length LEN to the current output
buffer. */
-#define CCL_WRITE_STRING(len) \
- do { \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + len <= (dst_bytes ? dst_end : src)) \
- for (i = 0; i < len; i++) \
- *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
- >> ((2 - (i % 3)) * 8)) & 0xFF; \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
- } while (0)
-
-/* Read one byte from the current input buffer into REGth register. */
-#define CCL_READ_CHAR(REG) \
- do { \
- if (!src) \
- CCL_INVALID_CMD; \
- else if (src < src_end) \
- { \
- REG = *src++; \
- if (REG == '\n' \
- && ccl->eol_type != CODING_EOL_LF) \
- { \
- /* We are encoding. */ \
- if (ccl->eol_type == CODING_EOL_CRLF) \
- { \
- if (ccl->cr_consumed) \
- ccl->cr_consumed = 0; \
- else \
- { \
- ccl->cr_consumed = 1; \
- REG = '\r'; \
- src--; \
- } \
- } \
- else \
- REG = '\r'; \
- } \
- if (REG == LEADING_CODE_8_BIT_CONTROL \
- && ccl->multibyte) \
- REG = *src++ - 0x20; \
- } \
- else if (ccl->last_block) \
- { \
- REG = -1; \
- ic = eof_ic; \
- goto ccl_repeat; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
- } while (0)
-
-
-/* Set C to the character code made from CHARSET and CODE. This is
- like MAKE_CHAR but check the validity of CHARSET and CODE. If they
- are not valid, set C to (CODE & 0xFF) because that is usually the
- case that CCL_ReadMultibyteChar2 read an invalid code and it set
- CODE to that invalid byte. */
-
-#define CCL_MAKE_CHAR(charset, code, c) \
+#define CCL_WRITE_STRING(len) \
do { \
- if (charset == CHARSET_ASCII) \
- c = code & 0xFF; \
- else if (CHARSET_DEFINED_P (charset) \
- && (code & 0x7F) >= 32 \
- && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \
+ int i; \
+ if (!dst) \
+ CCL_INVALID_CMD; \
+ else if (dst + len <= dst_end) \
{ \
- int c1 = code & 0x7F, c2 = 0; \
- \
- if (code >= 256) \
- c2 = c1, c1 = (code >> 7) & 0x7F; \
- c = MAKE_CHAR (charset, c1, c2); \
+ if (XFASTINT (ccl_prog[ic]) & 0x1000000) \
+ for (i = 0; i < len; i++) \
+ *dst++ = XFASTINT (ccl_prog[ic + i]) & 0xFFFFFF; \
+ else \
+ for (i = 0; i < len; i++) \
+ *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
+ >> ((2 - (i % 3)) * 8)) & 0xFF; \
} \
else \
- c = code & 0xFF; \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
+/* Read one byte from the current input buffer into Rth register. */
+#define CCL_READ_CHAR(r) \
+ do { \
+ if (! src) \
+ CCL_INVALID_CMD; \
+ else if (src < src_end) \
+ r = *src++; \
+ else if (ccl->last_block) \
+ { \
+ r = -1; \
+ ic = ccl->eof_ic; \
+ goto ccl_repeat; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
+ } while (0)
+
+/* Decode CODE by a charset whose id is ID. If ID is 0, return CODE
+ as is for backward compatibility. Assume that we can use the
+ variable `charset'. */
+
+#define CCL_DECODE_CHAR(id, code) \
+ ((id) == 0 ? (code) \
+ : (charset = CHARSET_FROM_ID ((id)), DECODE_CHAR (charset, (code))))
+
+/* Encode character C by some of charsets in CHARSET_LIST. Set ID to
+ the id of the used charset, ENCODED to the resulf of encoding.
+ Assume that we can use the variable `charset'. */
+
+#define CCL_ENCODE_CHAR(c, charset_list, id, encoded) \
+ do { \
+ unsigned code; \
+ \
+ charset = char_charset ((c), (charset_list), &code); \
+ if (! charset && ! NILP (charset_list)) \
+ charset = char_charset ((c), Qnil, &code); \
+ if (charset) \
+ { \
+ (id) = CHARSET_ID (charset); \
+ (encoded) = code; \
+ } \
+ } while (0)
-/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting
- text goes to a place pointed by DESTINATION, the length of which
- should not exceed DST_BYTES. The bytes actually processed is
- returned as *CONSUMED. The return value is the length of the
- resulting text. As a side effect, the contents of CCL registers
- are updated. If SOURCE or DESTINATION is NULL, only operations on
- registers are permitted. */
+/* Execute CCL code on characters at SOURCE (length SRC_SIZE). The
+ resulting text goes to a place pointed by DESTINATION, the length
+ of which should not exceed DST_SIZE. As a side effect, how many
+ characters are consumed and produced are recorded in CCL->consumed
+ and CCL->produced, and the contents of CCL registers are updated.
+ If SOURCE or DESTINATION is NULL, only operations on registers are
+ permitted. */
#ifdef CCL_DEBUG
#define CCL_DEBUG_BACKTRACE_LEN 256
@@ -896,36 +861,32 @@ struct ccl_prog_stack
/* For the moment, we only support depth 256 of stack. */
static struct ccl_prog_stack ccl_prog_stack_struct[256];
-int
-ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
+void
+ccl_driver (ccl, source, destination, src_size, dst_size, charset_list)
struct ccl_program *ccl;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int *consumed;
+ int *source, *destination;
+ int src_size, dst_size;
+ Lisp_Object charset_list;
{
register int *reg = ccl->reg;
register int ic = ccl->ic;
register int code = 0, field1, field2;
register Lisp_Object *ccl_prog = ccl->prog;
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned char *dst = destination, *dst_end = dst + dst_bytes;
+ int *src = source, *src_end = src + src_size;
+ int *dst = destination, *dst_end = dst + dst_size;
int jump_address;
int i = 0, j, op;
int stack_idx = ccl->stack_idx;
/* Instruction counter of the current CCL code. */
int this_ic = 0;
- /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But,
- each of them will be converted to multibyte form of 2-byte
- sequence. For that conversion, we remember how many more bytes
- we must keep in DESTINATION in this variable. */
- int extra_bytes = ccl->eight_bit_control;
+ struct charset *charset;
int eof_ic = ccl->eof_ic;
int eof_hit = 0;
if (ic >= eof_ic)
ic = CCL_HEADER_MAIN;
- if (ccl->buf_magnification == 0) /* We can't produce any bytes. */
+ if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */
dst = NULL;
/* Set mapping stack pointer. */
@@ -950,8 +911,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
/* We can't just signal Qquit, instead break the loop as if
the whole data is processed. Don't reset Vquit_flag, it
must be handled later at a safer place. */
- if (consumed)
- src = source + src_bytes;
+ if (src)
+ src = source + src_size;
ccl->status = CCL_STAT_QUIT;
break;
}
@@ -1272,8 +1233,22 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_LE: reg[rrr] = i <= j; break;
case CCL_GE: reg[rrr] = i >= j; break;
case CCL_NE: reg[rrr] = i != j; break;
- case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break;
- case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break;
+ case CCL_DECODE_SJIS:
+ {
+ i = (i << 8) | j;
+ SJIS_TO_JIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
+ case CCL_ENCODE_SJIS:
+ {
+ i = (i << 8) | j;
+ JIS_TO_SJIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
default: CCL_INVALID_CMD;
}
code &= 0x1F;
@@ -1293,166 +1268,29 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_ReadMultibyteChar2:
if (!src)
CCL_INVALID_CMD;
-
- if (src >= src_end)
- {
- src++;
- goto ccl_read_multibyte_character_suspend;
- }
-
- if (!ccl->multibyte)
- {
- int bytes;
- if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = *src++;
- break;
- }
- }
- i = *src++;
- if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
- {
- /* We are encoding. */
- if (ccl->eol_type == CODING_EOL_CRLF)
- {
- if (ccl->cr_consumed)
- ccl->cr_consumed = 0;
- else
- {
- ccl->cr_consumed = 1;
- i = '\r';
- src--;
- }
- }
- else
- i = '\r';
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i < 0x80)
- {
- /* ASCII */
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
- {
- int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
-
- if (dimension == 0)
- {
- /* `i' is a leading code for an undefined charset. */
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else if (src + dimension > src_end)
- goto ccl_read_multibyte_character_suspend;
- else
- {
- reg[RRR] = i;
- i = (*src++ & 0x7F);
- if (dimension == 1)
- reg[rrr] = i;
- else
- reg[rrr] = ((i << 7) | (*src++ & 0x7F));
- }
- }
- else if ((i == LEADING_CODE_PRIVATE_11)
- || (i == LEADING_CODE_PRIVATE_12))
- {
- if ((src + 1) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- reg[rrr] = (*src++ & 0x7F);
- }
- else if ((i == LEADING_CODE_PRIVATE_21)
- || (i == LEADING_CODE_PRIVATE_22))
- {
- if ((src + 2) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- i = (*src++ & 0x7F);
- reg[rrr] = ((i << 7) | (*src & 0x7F));
- src++;
- }
- else if (i == LEADING_CODE_8_BIT_CONTROL)
- {
- if (src >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = (*src++ - 0x20);
- }
- else if (i >= 0xA0)
- {
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else
- {
- /* INVALID CODE. Return a single byte character. */
- reg[RRR] = CHARSET_ASCII;
- reg[rrr] = i;
- }
- break;
-
- ccl_read_multibyte_character_suspend:
- if (src <= src_end && !ccl->multibyte && ccl->last_block)
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = i;
- break;
- }
- src--;
- if (ccl->last_block)
- {
- ic = eof_ic;
- eof_hit = 1;
- goto ccl_repeat;
- }
- else
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC);
-
+ CCL_READ_CHAR (i);
+ CCL_ENCODE_CHAR (i, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_WriteMultibyteChar2:
- i = reg[RRR]; /* charset */
- if (i == CHARSET_ASCII
- || i == CHARSET_8_BIT_CONTROL
- || i == CHARSET_8_BIT_GRAPHIC)
- i = reg[rrr] & 0xFF;
- else if (CHARSET_DIMENSION (i) == 1)
- i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F);
- else if (i < MIN_CHARSET_PRIVATE_DIMENSION2)
- i = ((i - 0x8F) << 14) | reg[rrr];
- else
- i = ((i - 0xE0) << 14) | reg[rrr];
-
- CCL_WRITE_MULTIBYTE_CHAR (i);
-
+ if (! dst)
+ CCL_INVALID_CMD;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ CCL_WRITE_CHAR (i);
break;
case CCL_TranslateCharacter:
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]),
- i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i);
+ CCL_ENCODE_CHAR (op, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_TranslateCharacterConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (op), i);
+ CCL_ENCODE_CHAR (op, charset_list, reg[RRR], reg[rrr]);
break;
case CCL_LookupIntConstTbl:
@@ -1466,12 +1304,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
{
Lisp_Object opl;
opl = HASH_VALUE (h, op);
- if (!CHAR_VALID_P (XINT (opl), 0))
+ if (! CHARACTERP (opl))
CCL_INVALID_CMD;
- SPLIT_CHAR (XINT (opl), reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
- reg[rrr] = i;
+ reg[RRR] = charset_unicode;
+ reg[rrr] = op;
reg[7] = 1; /* r7 true for success */
}
else
@@ -1482,7 +1318,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
case CCL_LookupCharConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
+ i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]);
{
struct Lisp_Hash_Table *h = GET_HASH_TABLE (op);
@@ -1916,10 +1752,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
}
msglen = strlen (msg);
- if (dst + msglen <= (dst_bytes ? dst_end : src))
+ if (dst + msglen <= dst_end)
{
- bcopy (msg, dst, msglen);
- dst += msglen;
+ for (i = 0; i < msglen; i++)
+ *dst++ = msg[i];
}
if (ccl->status == CCL_STAT_INVALID_CMD)
@@ -1945,10 +1781,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
ccl->ic = ic;
ccl->stack_idx = stack_idx;
ccl->prog = ccl_prog;
- ccl->eight_bit_control = (extra_bytes > 1);
- if (consumed)
- *consumed = src - source;
- return (dst ? dst - destination : 0);
+ ccl->consumed = src - source;
+ ccl->produced = dst - destination;
}
/* Resolve symbols in the specified CCL code (Lisp vector). This
@@ -2108,7 +1942,6 @@ setup_ccl_program (ccl, ccl_prog)
ccl->private_state = 0;
ccl->status = 0;
ccl->stack_idx = 0;
- ccl->eol_type = CODING_EOL_LF;
ccl->suppress_error = 0;
ccl->eight_bit_control = 0;
return 0;
@@ -2196,7 +2029,7 @@ programs. */)
? XINT (AREF (reg, i))
: 0);
- ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
+ ccl_driver (&ccl, NULL, NULL, 0, 0, Qnil);
QUIT;
if (ccl.status != CCL_STAT_SUCCESS)
error ("Error in CCL program at %dth code", ccl.ic);
@@ -2238,10 +2071,13 @@ usage: (ccl-execute-on-string CCL-PROGRAM STATUS STRING &optional CONTINUE UNIBY
{
Lisp_Object val;
struct ccl_program ccl;
- int i, produced;
+ int i;
int outbufsize;
- char *outbuf;
- struct gcpro gcpro1, gcpro2;
+ unsigned char *outbuf, *outp;
+ int str_chars, str_bytes;
+#define CCL_EXECUTE_BUF_SIZE 1024
+ int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE];
+ int consumed_chars, consumed_bytes, produced_chars;
if (setup_ccl_program (&ccl, ccl_prog) < 0)
error ("Invalid CCL program");
@@ -2251,7 +2087,8 @@ usage: (ccl-execute-on-string CCL-PROGRAM STATUS STRING &optional CONTINUE UNIBY
error ("Length of vector STATUS is not 9");
CHECK_STRING (str);
- GCPRO2 (status, str);
+ str_chars = SCHARS (str);
+ str_bytes = SBYTES (str);
for (i = 0; i < 8; i++)
{
@@ -2266,33 +2103,90 @@ usage: (ccl-execute-on-string CCL-PROGRAM STATUS STRING &optional CONTINUE UNIBY
if (ccl.ic < i && i < ccl.size)
ccl.ic = i;
}
- outbufsize = SBYTES (str) * ccl.buf_magnification + 256;
- outbuf = (char *) xmalloc (outbufsize);
- ccl.last_block = NILP (contin);
- ccl.multibyte = STRING_MULTIBYTE (str);
- produced = ccl_driver (&ccl, SDATA (str), outbuf,
- SBYTES (str), outbufsize, (int *) 0);
+
+ outbufsize = (ccl.buf_magnification
+ ? str_bytes * ccl.buf_magnification + 256
+ : str_bytes + 256);
+ outp = outbuf = (unsigned char *) xmalloc (outbufsize);
+
+ consumed_chars = consumed_bytes = 0;
+ produced_chars = 0;
+ while (1)
+ {
+ const unsigned char *p = SDATA (str) + consumed_bytes;
+ const unsigned char *endp = SDATA (str) + str_bytes;
+ int i = 0;
+ int *src, src_size;
+
+ if (endp - p == str_chars - consumed_chars)
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = *p++;
+ else
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = STRING_CHAR_ADVANCE (p);
+ consumed_chars += i;
+ consumed_bytes = p - SDATA (str);
+
+ if (consumed_bytes == str_bytes)
+ ccl.last_block = NILP (contin);
+ src = source;
+ src_size = i;
+ while (1)
+ {
+ ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE,
+ Qnil);
+ produced_chars += ccl.produced;
+ if (NILP (unibyte_p))
+ {
+ if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced
+ > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ CHAR_STRING_ADVANCE (destination[i], outp);
+ }
+ else
+ {
+ if (outp - outbuf + ccl.produced > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ *outp++ = destination[i];
+ }
+ src += ccl.consumed;
+ src_size -= ccl.consumed;
+ if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
+ break;
+ }
+
+ if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
+ || str_chars == consumed_chars)
+ break;
+ }
+
+ if (ccl.status == CCL_STAT_INVALID_CMD)
+ error ("Error in CCL program at %dth code", ccl.ic);
+ if (ccl.status == CCL_STAT_QUIT)
+ error ("CCL program interrupted at %dth code", ccl.ic);
+
for (i = 0; i < 8; i++)
ASET (status, i, make_number (ccl.reg[i]));
ASET (status, 8, make_number (ccl.ic));
- UNGCPRO;
if (NILP (unibyte_p))
- {
- int nchars;
-
- produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars);
- val = make_multibyte_string (outbuf, nchars, produced);
- }
+ val = make_multibyte_string ((char *) outbuf, produced_chars,
+ outp - outbuf);
else
- val = make_unibyte_string (outbuf, produced);
+ val = make_unibyte_string ((char *) outbuf, produced_chars);
xfree (outbuf);
- QUIT;
- if (ccl.status == CCL_STAT_SUSPEND_BY_DST)
- error ("Output buffer for the CCL programs overflow");
- if (ccl.status != CCL_STAT_SUCCESS
- && ccl.status != CCL_STAT_SUSPEND_BY_SRC)
- error ("Error in CCL program at %dth code", ccl.ic);
return val;
}
@@ -2439,6 +2333,12 @@ syms_of_ccl ()
staticpro (&Vccl_program_table);
Vccl_program_table = Fmake_vector (make_number (32), Qnil);
+ Qccl = intern ("ccl");
+ staticpro (&Qccl);
+
+ Qcclp = intern ("cclp");
+ staticpro (&Qcclp);
+
Qccl_program = intern ("ccl-program");
staticpro (&Qccl_program);
diff --git a/src/ccl.h b/src/ccl.h
index 21a72df1d0b..96417e79fa5 100644
--- a/src/ccl.h
+++ b/src/ccl.h
@@ -2,6 +2,9 @@
Copyright (C) 1995, 1998, 2000
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -58,16 +61,14 @@ struct ccl_program {
many times bigger the output buffer
should be than the input buffer. */
int stack_idx; /* How deep the call of CCL_Call is nested. */
- int eol_type; /* When the CCL program is used for
- encoding by a coding system, set to
- the eol_type of the coding system.
- In other cases, always
- CODING_EOL_LF. */
- int multibyte; /* 1 if the source text is multibyte. */
+ int src_multibyte; /* 1 if the input buffer is multibyte. */
+ int dst_multibyte; /* 1 if the output buffer is multibyte. */
int cr_consumed; /* Flag for encoding DOS-like EOL
format when the CCL program is used
for encoding by a coding
system. */
+ int consumed;
+ int produced;
int suppress_error; /* If nonzero, don't insert error
message in the output. */
int eight_bit_control; /* If nonzero, ccl_driver counts all
@@ -81,13 +82,13 @@ struct ccl_program {
coding_system. */
struct ccl_spec {
- struct ccl_program decoder;
- struct ccl_program encoder;
- unsigned char valid_codes[256];
+ struct ccl_program ccl;
int cr_carryover; /* CR carryover flag. */
unsigned char eight_bit_carryover[MAX_MULTIBYTE_LENGTH];
};
+#define CODING_SPEC_CCL_PROGRAM(coding) ((coding)->spec.ccl.ccl)
+
/* Alist of fontname patterns vs corresponding CCL program. */
extern Lisp_Object Vfont_ccl_encoder_alist;
@@ -98,8 +99,8 @@ extern int setup_ccl_program P_ ((struct ccl_program *, Lisp_Object));
/* Check if CCL is updated or not. If not, re-setup members of CCL. */
extern int check_ccl_update P_ ((struct ccl_program *));
-extern int ccl_driver P_ ((struct ccl_program *, unsigned char *,
- unsigned char *, int, int, int *));
+extern void ccl_driver P_ ((struct ccl_program *, int *, int *, int, int,
+ Lisp_Object));
/* Vector of CCL program names vs corresponding program data. */
extern Lisp_Object Vccl_program_table;
@@ -108,6 +109,16 @@ extern Lisp_Object Vccl_program_table;
is an index for Vccl_protram_table. */
extern Lisp_Object Qccl_program_idx;
+extern Lisp_Object Qccl, Qcclp;
+
+EXFUN (Fccl_program_p, 1);
+
+#define CHECK_CCL_PROGRAM(x) \
+ do { \
+ if (NILP (Fccl_program_p (x))) \
+ x = wrong_type_argument (Qcclp, (x)); \
+ } while (0);
+
#endif /* EMACS_CCL_H */
/* arch-tag: 14681df7-876d-43de-bc71-6b78e23a4e3c
diff --git a/src/character.c b/src/character.c
new file mode 100644
index 00000000000..3d3e28bc7d2
--- /dev/null
+++ b/src/character.c
@@ -0,0 +1,998 @@
+/* Basic character support.
+ Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2001, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* At first, see the document in `character.h' to understand the code
+ in this file. */
+
+#ifdef emacs
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef emacs
+
+#include <sys/types.h>
+#include "lisp.h"
+#include "character.h"
+#include "buffer.h"
+#include "charset.h"
+#include "composite.h"
+#include "disptab.h"
+
+#else /* not emacs */
+
+#include "mulelib.h"
+
+#endif /* emacs */
+
+Lisp_Object Qcharacterp;
+
+/* Vector of translation table ever defined.
+ ID of a translation table is used to index this vector. */
+Lisp_Object Vtranslation_table_vector;
+
+/* A char-table for characters which may invoke auto-filling. */
+Lisp_Object Vauto_fill_chars;
+
+Lisp_Object Qauto_fill_chars;
+
+/* Char-table of information about which character to unify to which
+ Unicode character. */
+Lisp_Object Vchar_unify_table;
+
+/* A char-table. An element is non-nil iff the corresponding
+ character has a printable glyph. */
+Lisp_Object Vprintable_chars;
+
+/* A char-table. An elemnent is a column-width of the corresponding
+ character. */
+Lisp_Object Vchar_width_table;
+
+/* A char-table. An element is a symbol indicating the direction
+ property of corresponding character. */
+Lisp_Object Vchar_direction_table;
+
+/* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */
+unsigned char *_fetch_multibyte_char_p;
+
+/* Char table of scripts. */
+Lisp_Object Vchar_script_table;
+
+/* Alist of scripts vs representative characters. */
+Lisp_Object Vscript_representative_chars;
+
+static Lisp_Object Qchar_script_table;
+
+/* Mapping table from unibyte chars to multibyte chars. */
+int unibyte_to_multibyte_table[256];
+
+/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
+ char. */
+char unibyte_has_multibyte_table[256];
+
+
+
+/* Store multibyte form of character C at P. If C has modifier bits,
+ handle them appropriately. */
+
+int
+char_string (c, p)
+ int c;
+ unsigned char *p;
+{
+ int bytes;
+
+ if (c & CHAR_MODIFIER_MASK)
+ {
+ /* As an non-ASCII character can't have modifier bits, we just
+ ignore the bits. */
+ if (ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+ {
+ /* For Meta, Shift, and Control modifiers, we need special care. */
+ if (c & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a string. */
+ c = (c & ~CHAR_META) | 0x80;
+ }
+ if (c & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+ c &= ~CHAR_SHIFT;
+ else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+ c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ }
+ if (c & CHAR_CTL)
+ {
+ /* Simulate the code in lread.c. */
+ /* Allow `\C- ' and `\C-?'. */
+ if (c == (CHAR_CTL | ' '))
+ c = 0;
+ else if (c == (CHAR_CTL | '?'))
+ c = 127;
+ /* ASCII control chars are made from letters (both cases),
+ as well as the non-letters within 0100...0137. */
+ else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ }
+ }
+
+ /* If C still has any modifier bits, just ignore it. */
+ c &= ~CHAR_MODIFIER_MASK;
+ }
+
+ MAYBE_UNIFY_CHAR (c);
+
+ if (c <= MAX_3_BYTE_CHAR)
+ {
+ bytes = CHAR_STRING (c, p);
+ }
+ else if (c <= MAX_4_BYTE_CHAR)
+ {
+ p[0] = (0xF0 | (c >> 18));
+ p[1] = (0x80 | ((c >> 12) & 0x3F));
+ p[2] = (0x80 | ((c >> 6) & 0x3F));
+ p[3] = (0x80 | (c & 0x3F));
+ bytes = 4;
+ }
+ else if (c <= MAX_5_BYTE_CHAR)
+ {
+ p[0] = 0xF8;
+ p[1] = (0x80 | ((c >> 18) & 0x0F));
+ p[2] = (0x80 | ((c >> 12) & 0x3F));
+ p[3] = (0x80 | ((c >> 6) & 0x3F));
+ p[4] = (0x80 | (c & 0x3F));
+ bytes = 5;
+ }
+ else
+ {
+ c = CHAR_TO_BYTE8 (c);
+ bytes = BYTE8_STRING (c, p);
+ }
+
+ return bytes;
+}
+
+
+/* Return a character whose multibyte form is at P. Set LEN is not
+ NULL, it must be a pointer to integer. In that case, set *LEN to
+ the byte length of the multibyte form. If ADVANCED is not NULL, is
+ must be a pointer to unsigned char. In that case, set *ADVANCED to
+ the ending address (i.e. the starting address of the next
+ character) of the multibyte form. */
+
+int
+string_char (p, advanced, len)
+ const unsigned char *p;
+ const unsigned char **advanced;
+ int *len;
+{
+ int c;
+ const unsigned char *saved_p = p;
+
+ if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
+ {
+ c = STRING_CHAR_ADVANCE (p);
+ }
+ else if (! (*p & 0x08))
+ {
+ c = ((((p)[0] & 0xF) << 18)
+ | (((p)[1] & 0x3F) << 12)
+ | (((p)[2] & 0x3F) << 6)
+ | ((p)[3] & 0x3F));
+ p += 4;
+ }
+ else
+ {
+ c = ((((p)[1] & 0x3F) << 18)
+ | (((p)[2] & 0x3F) << 12)
+ | (((p)[3] & 0x3F) << 6)
+ | ((p)[4] & 0x3F));
+ p += 5;
+ }
+
+ MAYBE_UNIFY_CHAR (c);
+
+ if (len)
+ *len = p - saved_p;
+ if (advanced)
+ *advanced = p;
+ return c;
+}
+
+
+/* Translate character C by translation table TABLE. If C is
+ negative, translate a character specified by CHARSET and CODE. If
+ no translation is found in TABLE, return the untranslated
+ character. If TABLE is a list, elements are char tables. In this
+ case, translace C by all tables. */
+
+int
+translate_char (table, c)
+ Lisp_Object table;
+ int c;
+{
+ if (CHAR_TABLE_P (table))
+ {
+ Lisp_Object ch;
+
+ ch = CHAR_TABLE_REF (table, c);
+ if (CHARACTERP (ch))
+ c = XINT (ch);
+ }
+ else
+ {
+ for (; CONSP (table); table = XCDR (table))
+ c = translate_char (XCAR (table), c);
+ }
+ return c;
+}
+
+/* Convert the multibyte character C to unibyte 8-bit character based
+ on the current value of charset_unibyte. If dimension of
+ charset_unibyte is more than one, return (C & 0xFF).
+
+ The argument REV_TBL is now ignored. It will be removed in the
+ future. */
+
+int
+multibyte_char_to_unibyte (c, rev_tbl)
+ int c;
+ Lisp_Object rev_tbl;
+{
+ struct charset *charset;
+ unsigned c1;
+
+ if (CHAR_BYTE8_P (c))
+ return CHAR_TO_BYTE8 (c);
+ charset = CHARSET_FROM_ID (charset_unibyte);
+ c1 = ENCODE_CHAR (charset, c);
+ return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+}
+
+
+DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
+ doc: /* Return non-nil if OBJECT is a character. */)
+ (object, ignore)
+ Lisp_Object object, ignore;
+{
+ return (CHARACTERP (object) ? Qt : Qnil);
+}
+
+DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
+ doc: /* Return the character of the maximum code. */)
+ ()
+{
+ return make_number (MAX_CHAR);
+}
+
+DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
+ Sunibyte_char_to_multibyte, 1, 1, 0,
+ doc: /* Convert the unibyte character CH to multibyte character.
+The multibyte character is a result of decoding CH by
+the current unibyte charset (see `unibyte-charset'). */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+ struct charset *charset;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ if (c >= 0400)
+ error ("Invalid unibyte character: %d", c);
+ charset = CHARSET_FROM_ID (charset_unibyte);
+ c = DECODE_CHAR (charset, c);
+ if (c < 0)
+ c = BYTE8_TO_CHAR (XFASTINT (ch));
+ return make_number (c);
+}
+
+DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
+ Smultibyte_char_to_unibyte, 1, 1, 0,
+ doc: /* Convert the multibyte character CH to unibyte character.\n\
+The unibyte character is a result of encoding CH by
+the current primary charset (value of `charset-primary'). */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ c = CHAR_TO_BYTE8 (c);
+ return make_number (c);
+}
+
+DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
+ doc: /* Return 1 regardless of the argument CHAR.
+This is now an obsolete function. We keep it just for backward compatibility. */)
+ (ch)
+ Lisp_Object ch;
+{
+ CHECK_CHARACTER (ch);
+ return make_number (1);
+}
+
+DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
+ doc: /* Return width of CHAR when displayed in the current buffer.
+The width is measured by how many columns it occupies on the screen.
+Tab is taken to occupy `tab-width' columns. */)
+ (ch)
+ Lisp_Object ch;
+{
+ Lisp_Object disp;
+ int c, width;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ CHECK_CHARACTER (ch);
+ c = XINT (ch);
+
+ /* Get the way the display table would display it. */
+ disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
+
+ if (VECTORP (disp))
+ width = ASIZE (disp);
+ else
+ width = CHAR_WIDTH (c);
+
+ return make_number (width);
+}
+
+/* Return width of string STR of length LEN when displayed in the
+ current buffer. The width is measured by how many columns it
+ occupies on the screen. If PRECISION > 0, return the width of
+ longest substring that doesn't exceed PRECISION, and set number of
+ characters and bytes of the substring in *NCHARS and *NBYTES
+ respectively. */
+
+int
+c_string_width (str, len, precision, nchars, nbytes)
+ const unsigned char *str;
+ int precision, *nchars, *nbytes;
+{
+ int i = 0, i_byte = 0;
+ int width = 0;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ while (i_byte < len)
+ {
+ int bytes, thiswidth;
+ Lisp_Object val;
+ int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+
+ if (dp)
+ {
+ val = DISP_CHAR_VECTOR (dp, c);
+ if (VECTORP (val))
+ thiswidth = XVECTOR (val)->size;
+ else
+ thiswidth = CHAR_WIDTH (c);
+ }
+ else
+ {
+ thiswidth = CHAR_WIDTH (c);
+ }
+
+ if (precision > 0
+ && (width + thiswidth > precision))
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ return width;
+ }
+ i++;
+ i_byte += bytes;
+ width += thiswidth;
+ }
+
+ if (precision > 0)
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ }
+
+ return width;
+}
+
+/* Return width of string STR of length LEN when displayed in the
+ current buffer. The width is measured by how many columns it
+ occupies on the screen. */
+
+int
+strwidth (str, len)
+ unsigned char *str;
+ int len;
+{
+ return c_string_width (str, len, -1, NULL, NULL);
+}
+
+/* Return width of Lisp string STRING when displayed in the current
+ buffer. The width is measured by how many columns it occupies on
+ the screen while paying attention to compositions. If PRECISION >
+ 0, return the width of longest substring that doesn't exceed
+ PRECISION, and set number of characters and bytes of the substring
+ in *NCHARS and *NBYTES respectively. */
+
+int
+lisp_string_width (string, precision, nchars, nbytes)
+ Lisp_Object string;
+ int precision, *nchars, *nbytes;
+{
+ int len = SCHARS (string);
+ /* This set multibyte to 0 even if STRING is multibyte when it
+ contains only ascii and eight-bit-graphic, but that's
+ intentional. */
+ int multibyte = len < SBYTES (string);
+ unsigned char *str = SDATA (string);
+ int i = 0, i_byte = 0;
+ int width = 0;
+ struct Lisp_Char_Table *dp = buffer_display_table ();
+
+ while (i < len)
+ {
+ int chars, bytes, thiswidth;
+ Lisp_Object val;
+ int cmp_id;
+ EMACS_INT ignore, end;
+
+ if (find_composition (i, -1, &ignore, &end, &val, string)
+ && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
+ >= 0))
+ {
+ thiswidth = composition_table[cmp_id]->width;
+ chars = end - i;
+ bytes = string_char_to_byte (string, end) - i_byte;
+ }
+ else
+ {
+ int c;
+
+ if (multibyte)
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+ else
+ c = str[i_byte], bytes = 1;
+ chars = 1;
+ if (dp)
+ {
+ val = DISP_CHAR_VECTOR (dp, c);
+ if (VECTORP (val))
+ thiswidth = XVECTOR (val)->size;
+ else
+ thiswidth = CHAR_WIDTH (c);
+ }
+ else
+ {
+ thiswidth = CHAR_WIDTH (c);
+ }
+ }
+
+ if (precision > 0
+ && (width + thiswidth > precision))
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ return width;
+ }
+ i += chars;
+ i_byte += bytes;
+ width += thiswidth;
+ }
+
+ if (precision > 0)
+ {
+ *nchars = i;
+ *nbytes = i_byte;
+ }
+
+ return width;
+}
+
+DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
+ doc: /* Return width of STRING when displayed in the current buffer.
+Width is measured by how many columns it occupies on the screen.
+When calculating width of a multibyte character in STRING,
+only the base leading-code is considered; the validity of
+the following bytes is not checked. Tabs in STRING are always
+taken to occupy `tab-width' columns. */)
+ (str)
+ Lisp_Object str;
+{
+ Lisp_Object val;
+
+ CHECK_STRING (str);
+ XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
+ return val;
+}
+
+DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
+ doc: /* Return the direction of CHAR.
+The returned value is 0 for left-to-right and 1 for right-to-left. */)
+ (ch)
+ Lisp_Object ch;
+{
+ int c;
+
+ CHECK_CHARACTER (ch);
+ c = XINT (ch);
+ return CHAR_TABLE_REF (Vchar_direction_table, c);
+}
+
+/* Return the number of characters in the NBYTES bytes at PTR.
+ This works by looking at the contents and checking for multibyte
+ sequences while assuming that there's no invalid sequence.
+ However, if the current buffer has enable-multibyte-characters =
+ nil, we treat each byte as a character. */
+
+int
+chars_in_text (ptr, nbytes)
+ const unsigned char *ptr;
+ int nbytes;
+{
+ /* current_buffer is null at early stages of Emacs initialization. */
+ if (current_buffer == 0
+ || NILP (current_buffer->enable_multibyte_characters))
+ return nbytes;
+
+ return multibyte_chars_in_text (ptr, nbytes);
+}
+
+/* Return the number of characters in the NBYTES bytes at PTR.
+ This works by looking at the contents and checking for multibyte
+ sequences while assuming that there's no invalid sequence. It
+ ignores enable-multibyte-characters. */
+
+int
+multibyte_chars_in_text (ptr, nbytes)
+ const unsigned char *ptr;
+ int nbytes;
+{
+ const unsigned char *endp = ptr + nbytes;
+ int chars = 0;
+
+ while (ptr < endp)
+ {
+ int len = MULTIBYTE_LENGTH (ptr, endp);
+
+ if (len == 0)
+ abort ();
+ ptr += len;
+ chars++;
+ }
+
+ return chars;
+}
+
+/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
+ characters and bytes in it, and store them in *NCHARS and *NBYTES
+ respectively. On counting bytes, pay attention to that 8-bit
+ characters not constructing a valid multibyte sequence are
+ represented by 2-byte in a multibyte text. */
+
+void
+parse_str_as_multibyte (str, len, nchars, nbytes)
+ const unsigned char *str;
+ int len, *nchars, *nbytes;
+{
+ const unsigned char *endp = str + len;
+ int n, chars = 0, bytes = 0;
+
+ if (len >= MAX_MULTIBYTE_LENGTH)
+ {
+ const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (str < adjusted_endp)
+ {
+ if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
+ str += n, bytes += n;
+ else
+ str++, bytes += 2;
+ chars++;
+ }
+ }
+ while (str < endp)
+ {
+ if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
+ str += n, bytes += n;
+ else
+ str++, bytes += 2;
+ chars++;
+ }
+
+ *nchars = chars;
+ *nbytes = bytes;
+ return;
+}
+
+/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
+ It actually converts only such 8-bit characters that don't contruct
+ a multibyte sequence to multibyte forms of Latin-1 characters. If
+ NCHARS is nonzero, set *NCHARS to the number of characters in the
+ text. It is assured that we can use LEN bytes at STR as a work
+ area and that is enough. Return the number of bytes of the
+ resulting text. */
+
+int
+str_as_multibyte (str, len, nbytes, nchars)
+ unsigned char *str;
+ int len, nbytes, *nchars;
+{
+ unsigned char *p = str, *endp = str + nbytes;
+ unsigned char *to;
+ int chars = 0;
+ int n;
+
+ if (nbytes >= MAX_MULTIBYTE_LENGTH)
+ {
+ unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (p < adjusted_endp
+ && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+ p += n, chars++;
+ }
+ while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+ p += n, chars++;
+ if (nchars)
+ *nchars = chars;
+ if (p == endp)
+ return nbytes;
+
+ to = p;
+ nbytes = endp - p;
+ endp = str + len;
+ safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
+ p = endp - nbytes;
+
+ if (nbytes >= MAX_MULTIBYTE_LENGTH)
+ {
+ unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ while (p < adjusted_endp)
+ {
+ if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+ {
+ while (n--)
+ *to++ = *p++;
+ }
+ else
+ {
+ int c = *p++;
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ }
+ chars++;
+ }
+ while (p < endp)
+ {
+ if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+ {
+ while (n--)
+ *to++ = *p++;
+ }
+ else
+ {
+ int c = *p++;
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ chars++;
+ }
+ if (nchars)
+ *nchars = chars;
+ return (to - str);
+}
+
+/* Parse unibyte string at STR of LEN bytes, and return the number of
+ bytes it may ocupy when converted to multibyte string by
+ `str_to_multibyte'. */
+
+int
+parse_str_to_multibyte (str, len)
+ unsigned char *str;
+ int len;
+{
+ unsigned char *endp = str + len;
+ int bytes;
+
+ for (bytes = 0; str < endp; str++)
+ bytes += (*str < 0x80) ? 1 : 2;
+ return bytes;
+}
+
+
+/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
+ that contains the same single-byte characters. It actually
+ converts all 8-bit characters to multibyte forms. It is assured
+ that we can use LEN bytes at STR as a work area and that is
+ enough. */
+
+int
+str_to_multibyte (str, len, bytes)
+ unsigned char *str;
+ int len, bytes;
+{
+ unsigned char *p = str, *endp = str + bytes;
+ unsigned char *to;
+
+ while (p < endp && *p < 0x80) p++;
+ if (p == endp)
+ return bytes;
+ to = p;
+ bytes = endp - p;
+ endp = str + len;
+ safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
+ p = endp - bytes;
+ while (p < endp)
+ {
+ int c = *p++;
+
+ if (c >= 0x80)
+ c = BYTE8_TO_CHAR (c);
+ to += CHAR_STRING (c, to);
+ }
+ return (to - str);
+}
+
+/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
+ actually converts characters in the range 0x80..0xFF to
+ unibyte. */
+
+int
+str_as_unibyte (str, bytes)
+ unsigned char *str;
+ int bytes;
+{
+ const unsigned char *p = str, *endp = str + bytes;
+ unsigned char *to;
+ int c, len;
+
+ while (p < endp)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+ if (CHAR_BYTE8_HEAD_P (c))
+ break;
+ p += len;
+ }
+ to = str + (p - str);
+ while (p < endp)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+ if (CHAR_BYTE8_HEAD_P (c))
+ {
+ c = STRING_CHAR_ADVANCE (p);
+ *to++ = CHAR_TO_BYTE8 (c);
+ }
+ else
+ {
+ while (len--) *to++ = *p++;
+ }
+ }
+ return (to - str);
+}
+
+int
+string_count_byte8 (string)
+ Lisp_Object string;
+{
+ int multibyte = STRING_MULTIBYTE (string);
+ int nbytes = SBYTES (string);
+ unsigned char *p = SDATA (string);
+ unsigned char *pend = p + nbytes;
+ int count = 0;
+ int c, len;
+
+ if (multibyte)
+ while (p < pend)
+ {
+ c = *p;
+ len = BYTES_BY_CHAR_HEAD (c);
+
+ if (CHAR_BYTE8_HEAD_P (c))
+ count++;
+ p += len;
+ }
+ else
+ while (p < pend)
+ {
+ if (*p++ >= 0x80)
+ count++;
+ }
+ return count;
+}
+
+
+Lisp_Object
+string_escape_byte8 (string)
+ Lisp_Object string;
+{
+ int nchars = SCHARS (string);
+ int nbytes = SBYTES (string);
+ int multibyte = STRING_MULTIBYTE (string);
+ int byte8_count;
+ const unsigned char *src, *src_end;
+ unsigned char *dst;
+ Lisp_Object val;
+ int c, len;
+
+ if (multibyte && nchars == nbytes)
+ return string;
+
+ byte8_count = string_count_byte8 (string);
+
+ if (byte8_count == 0)
+ return string;
+
+ if (multibyte)
+ /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
+ val = make_uninit_multibyte_string (nchars + byte8_count * 3,
+ nbytes + byte8_count * 2);
+ else
+ /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
+ val = make_uninit_string (nbytes + byte8_count * 3);
+
+ src = SDATA (string);
+ src_end = src + nbytes;
+ dst = SDATA (val);
+ if (multibyte)
+ while (src < src_end)
+ {
+ c = *src;
+ len = BYTES_BY_CHAR_HEAD (c);
+
+ if (CHAR_BYTE8_HEAD_P (c))
+ {
+ c = STRING_CHAR_ADVANCE (src);
+ c = CHAR_TO_BYTE8 (c);
+ sprintf ((char *) dst, "\\%03o", c);
+ dst += 4;
+ }
+ else
+ while (len--) *dst++ = *src++;
+ }
+ else
+ while (src < src_end)
+ {
+ c = *src++;
+ if (c >= 0x80)
+ {
+ sprintf ((char *) dst, "\\%03o", c);
+ dst += 4;
+ }
+ else
+ *dst++ = c;
+ }
+ return val;
+}
+
+
+DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
+ doc: /*
+Concatenate all the argument characters and make the result a string.
+usage: (string &rest CHARACTERS) */)
+ (n, args)
+ int n;
+ Lisp_Object *args;
+{
+ int i;
+ unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
+ unsigned char *p = buf;
+ int c;
+
+ for (i = 0; i < n; i++)
+ {
+ CHECK_CHARACTER (args[i]);
+ c = XINT (args[i]);
+ p += CHAR_STRING (c, p);
+ }
+
+ return make_string_from_bytes ((char *) buf, n, p - buf);
+}
+
+void
+init_character_once ()
+{
+}
+
+#ifdef emacs
+
+void
+syms_of_character ()
+{
+ DEFSYM (Qcharacterp, "characterp");
+ DEFSYM (Qauto_fill_chars, "auto-fill-chars");
+
+ staticpro (&Vchar_unify_table);
+ Vchar_unify_table = Qnil;
+
+ defsubr (&Smax_char);
+ defsubr (&Scharacterp);
+ defsubr (&Sunibyte_char_to_multibyte);
+ defsubr (&Smultibyte_char_to_unibyte);
+ defsubr (&Schar_bytes);
+ defsubr (&Schar_width);
+ defsubr (&Sstring_width);
+ defsubr (&Schar_direction);
+ defsubr (&Sstring);
+
+ DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
+ doc: /*
+Vector recording all translation tables ever defined.
+Each element is a pair (SYMBOL . TABLE) relating the table to the
+symbol naming it. The ID of a translation table is an index into this vector. */);
+ Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
+
+ DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
+ doc: /*
+A char-table for characters which invoke auto-filling.
+Such characters have value t in this table. */);
+ Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
+ CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
+ CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
+
+ DEFVAR_LISP ("char-width-table", &Vchar_width_table,
+ doc: /*
+A char-table for width (columns) of each character. */);
+ Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
+ char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4));
+ char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
+ make_number (4));
+
+ DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
+ doc: /* A char-table for direction of each character. */);
+ Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
+
+ DEFVAR_LISP ("printable-chars", &Vprintable_chars,
+ doc: /* A char-table for each printable character. */);
+ Vprintable_chars = Fmake_char_table (Qnil, Qnil);
+ Fset_char_table_range (Vprintable_chars,
+ Fcons (make_number (32), make_number (126)), Qt);
+ Fset_char_table_range (Vprintable_chars,
+ Fcons (make_number (160),
+ make_number (MAX_5_BYTE_CHAR)), Qt);
+
+ DEFVAR_LISP ("char-script-table", &Vchar_script_table,
+ doc: /* Char table of script symbols.
+It has one extra slot whose value is a list of script symbols. */);
+
+ /* Intern this now in case it isn't already done.
+ Setting this variable twice is harmless.
+ But don't staticpro it here--that is done in alloc.c. */
+ Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ DEFSYM (Qchar_script_table, "char-script-table");
+ Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
+ Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
+
+ DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
+ doc: /* Alist of scripts vs the representative characters. */);
+ Vscript_representative_chars = Qnil;
+}
+
+#endif /* emacs */
+
+/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999
+ (do not change this comment) */
diff --git a/src/character.h b/src/character.h
new file mode 100644
index 00000000000..72823752d96
--- /dev/null
+++ b/src/character.h
@@ -0,0 +1,662 @@
+/* Header for multibyte character handler.
+ Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef EMACS_CHARACTER_H
+#define EMACS_CHARACTER_H
+
+/* character code 1st byte byte sequence
+ -------------- -------- -------------
+ 0-7F 00..7F 0xxxxxxx
+ 80-7FF C2..DF 110xxxxx 10xxxxxx
+ 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx
+ 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx (for eight-bit-char)
+ 400000-... invalid
+
+ invalid 1st byte 80..BF 10xxxxxx
+ F9..FF 11111xxx (xxx != 000)
+*/
+
+/* Maximum character code ((1 << CHARACTERBITS) - 1). */
+#define MAX_CHAR 0x3FFFFF
+
+/* Maximum Unicode character code. */
+#define MAX_UNICODE_CHAR 0x10FFFF
+
+/* Maximum N-byte character codes. */
+#define MAX_1_BYTE_CHAR 0x7F
+#define MAX_2_BYTE_CHAR 0x7FF
+#define MAX_3_BYTE_CHAR 0xFFFF
+#define MAX_4_BYTE_CHAR 0x1FFFFF
+#define MAX_5_BYTE_CHAR 0x3FFF7F
+
+/* Nonzero iff C is a character that corresponds to a raw 8-bit
+ byte. */
+#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
+
+/* Return the character code for raw 8-bit byte BYTE. */
+#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
+
+/* Return the raw 8-bit byte for character C. */
+#define CHAR_TO_BYTE8(c) \
+ (CHAR_BYTE8_P (c) \
+ ? (c) - 0x3FFF00 \
+ : multibyte_char_to_unibyte (c, Qnil))
+
+/* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
+ that corresponds to a raw 8-bit byte. */
+#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
+
+/* Mapping table from unibyte chars to multibyte chars. */
+extern int unibyte_to_multibyte_table[256];
+
+/* Convert the unibyte character C to the corresponding multibyte
+ character. If C can't be converted, return C. */
+#define unibyte_char_to_multibyte(c) \
+ ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
+
+/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
+ char. */
+extern char unibyte_has_multibyte_table[256];
+
+#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
+
+/* If C is not ASCII, make it unibyte. */
+#define MAKE_CHAR_UNIBYTE(c) \
+ do { \
+ if (! ASCII_CHAR_P (c)) \
+ c = CHAR_TO_BYTE8 (c); \
+ } while (0)
+
+
+/* If C is not ASCII, make it multibyte. It assumes C < 256. */
+#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
+
+/* This is the maximum byte length of multibyte form. */
+#define MAX_MULTIBYTE_LENGTH 5
+
+/* Return a Lisp character whose character code is C. It assumes C is
+ a valid character code. */
+#define make_char(c) make_number (c)
+
+/* Nonzero iff C is an ASCII byte. */
+#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
+
+/* Nonzero iff X is a character. */
+#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
+
+/* Nonzero iff C is valid as a character code. GENERICP is not used
+ now. */
+#define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR)
+
+/* Check if Lisp object X is a character or not. */
+#define CHECK_CHARACTER(x) \
+ CHECK_TYPE (CHARACTERP (x), Qcharacterp, x)
+
+#define CHECK_CHARACTER_CAR(x) \
+ do { \
+ Lisp_Object tmp = XCAR (x); \
+ CHECK_CHARACTER (tmp); \
+ XSETCAR ((x), tmp); \
+ } while (0)
+
+#define CHECK_CHARACTER_CDR(x) \
+ do { \
+ Lisp_Object tmp = XCDR (x); \
+ CHECK_CHARACTER (tmp); \
+ XSETCDR ((x), tmp); \
+ } while (0)
+
+/* Nonzero iff C is an ASCII character. */
+#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
+
+/* Nonzero iff C is a character of code less than 0x100. */
+#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
+
+/* Nonzero if character C has a printable glyph. */
+#define CHAR_PRINTABLE_P(c) \
+ (((c) >= 32 && ((c) < 127) \
+ || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
+
+/* Return byte length of multibyte form for character C. */
+#define CHAR_BYTES(c) \
+ ( (c) <= MAX_1_BYTE_CHAR ? 1 \
+ : (c) <= MAX_2_BYTE_CHAR ? 2 \
+ : (c) <= MAX_3_BYTE_CHAR ? 3 \
+ : (c) <= MAX_4_BYTE_CHAR ? 4 \
+ : (c) <= MAX_5_BYTE_CHAR ? 5 \
+ : 2)
+
+
+/* Return the leading code of multibyte form of C. */
+#define CHAR_LEADING_CODE(c) \
+ ((c) <= MAX_1_BYTE_CHAR ? c \
+ : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \
+ : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \
+ : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \
+ : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \
+ : (0xC0 | (((c) >> 6) & 0x01)))
+
+
+/* Store multibyte form of the character C in P. The caller should
+ allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
+ Returns the length of the multibyte form. */
+
+#define CHAR_STRING(c, p) \
+ ((unsigned) (c) <= MAX_1_BYTE_CHAR \
+ ? ((p)[0] = (c), \
+ 1) \
+ : (unsigned) (c) <= MAX_2_BYTE_CHAR \
+ ? ((p)[0] = (0xC0 | ((c) >> 6)), \
+ (p)[1] = (0x80 | ((c) & 0x3F)), \
+ 2) \
+ : (unsigned) (c) <= MAX_3_BYTE_CHAR \
+ ? ((p)[0] = (0xE0 | ((c) >> 12)), \
+ (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
+ (p)[2] = (0x80 | ((c) & 0x3F)), \
+ 3) \
+ : char_string (c, p))
+
+/* Store multibyte form of byte B in P. The caller should allocate at
+ least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the
+ length of the multibyte form. */
+
+#define BYTE8_STRING(b, p) \
+ ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \
+ (p)[1] = (0x80 | ((b) & 0x3F)), \
+ 2)
+
+
+/* Store multibyte form of the character C in P. The caller should
+ allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
+ And, advance P to the end of the multibyte form. */
+
+#define CHAR_STRING_ADVANCE(c, p) \
+ do { \
+ if ((c) <= MAX_1_BYTE_CHAR) \
+ *(p)++ = (c); \
+ else if ((c) <= MAX_2_BYTE_CHAR) \
+ *(p)++ = (0xC0 | ((c) >> 6)), \
+ *(p)++ = (0x80 | ((c) & 0x3F)); \
+ else if ((c) <= MAX_3_BYTE_CHAR) \
+ *(p)++ = (0xE0 | ((c) >> 12)), \
+ *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
+ *(p)++ = (0x80 | ((c) & 0x3F)); \
+ else \
+ (p) += char_string ((c), (p)); \
+ } while (0)
+
+
+/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
+ form. */
+#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
+
+/* Nonzero iff BYTE is a trailing code of a non-ASCII character in a
+ multibyte form. */
+#define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80)
+
+/* Nonzero iff BYTE starts a character in a multibyte form.
+ This is equivalent to:
+ (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */
+#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
+
+/* Just kept for backward compatibility. This macro will be removed
+ in the future. */
+#define BASE_LEADING_CODE_P LEADING_CODE_P
+
+/* How many bytes a character that starts with BYTE occupies in a
+ multibyte form. */
+#define BYTES_BY_CHAR_HEAD(byte) \
+ (!((byte) & 0x80) ? 1 \
+ : !((byte) & 0x20) ? 2 \
+ : !((byte) & 0x10) ? 3 \
+ : !((byte) & 0x08) ? 4 \
+ : 5)
+
+
+/* Return the length of the multi-byte form at string STR of length
+ LEN while assuming that STR points a valid multi-byte form. As
+ this macro isn't necessary anymore, all callers will be changed to
+ use BYTES_BY_CHAR_HEAD directly in the future. */
+
+#define MULTIBYTE_FORM_LENGTH(str, len) \
+ BYTES_BY_CHAR_HEAD (*(str))
+
+/* Parse multibyte string STR of length LENGTH and set BYTES to the
+ byte length of a character at STR while assuming that STR points a
+ valid multibyte form. As this macro isn't necessary anymore, all
+ callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
+ future. */
+
+#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
+ (bytes) = BYTES_BY_CHAR_HEAD (*(str))
+
+/* The byte length of multibyte form at unibyte string P ending at
+ PEND. If STR doesn't point a valid multibyte form, return 0. */
+
+#define MULTIBYTE_LENGTH(p, pend) \
+ (p >= pend ? 0 \
+ : !((p)[0] & 0x80) ? 1 \
+ : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xE0) == 0xC0 ? 2 \
+ : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xF0) == 0xE0 ? 3 \
+ : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
+ : ((p)[0] & 0xF8) == 0xF0 ? 4 \
+ : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
+ : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
+ : 0)
+
+
+/* Like MULTIBYTE_LENGTH but don't check the ending address. */
+
+#define MULTIBYTE_LENGTH_NO_CHECK(p) \
+ (!((p)[0] & 0x80) ? 1 \
+ : ((p)[1] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xE0) == 0xC0 ? 2 \
+ : ((p)[2] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xF0) == 0xE0 ? 3 \
+ : ((p)[3] & 0xC0) != 0x80 ? 0 \
+ : ((p)[0] & 0xF8) == 0xF0 ? 4 \
+ : ((p)[4] & 0xC0) != 0x80 ? 0 \
+ : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
+ : 0)
+
+/* If P is before LIMIT, advance P to the next character boundary. It
+ assumes that P is already at a character boundary of the sane
+ mulitbyte form whose end address is LIMIT. */
+
+#define NEXT_CHAR_BOUNDARY(p, limit) \
+ do { \
+ if ((p) < (limit)) \
+ (p) += BYTES_BY_CHAR_HEAD (*(p)); \
+ } while (0)
+
+
+/* If P is after LIMIT, advance P to the previous character boundary.
+ It assumes that P is already at a character boundary of the sane
+ mulitbyte form whose beginning address is LIMIT. */
+
+#define PREV_CHAR_BOUNDARY(p, limit) \
+ do { \
+ if ((p) > (limit)) \
+ { \
+ const unsigned char *p0 = (p); \
+ do { \
+ p0--; \
+ } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \
+ (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \
+ } \
+ } while (0)
+
+/* Return the character code of character whose multibyte form is at
+ P. The argument LEN is ignored. It will be removed in the
+ future. */
+
+#define STRING_CHAR(p, len) \
+ (!((p)[0] & 0x80) \
+ ? (p)[0] \
+ : ! ((p)[0] & 0x20) \
+ ? (((((p)[0] & 0x1F) << 6) \
+ | ((p)[1] & 0x3F)) \
+ + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
+ : ! ((p)[0] & 0x10) \
+ ? ((((p)[0] & 0x0F) << 12) \
+ | (((p)[1] & 0x3F) << 6) \
+ | ((p)[2] & 0x3F)) \
+ : string_char ((p), NULL, NULL))
+
+
+/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
+ form. The argument LEN is ignored. It will be removed in the
+ future. */
+
+#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
+ (!((p)[0] & 0x80) \
+ ? ((actual_len) = 1, (p)[0]) \
+ : ! ((p)[0] & 0x20) \
+ ? ((actual_len) = 2, \
+ (((((p)[0] & 0x1F) << 6) \
+ | ((p)[1] & 0x3F)) \
+ + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
+ : ! ((p)[0] & 0x10) \
+ ? ((actual_len) = 3, \
+ ((((p)[0] & 0x0F) << 12) \
+ | (((p)[1] & 0x3F) << 6) \
+ | ((p)[2] & 0x3F))) \
+ : string_char ((p), NULL, &actual_len))
+
+
+/* Like STRING_CHAR but advance P to the end of multibyte form. */
+
+#define STRING_CHAR_ADVANCE(p) \
+ (!((p)[0] & 0x80) \
+ ? *(p)++ \
+ : ! ((p)[0] & 0x20) \
+ ? ((p) += 2, \
+ ((((p)[-2] & 0x1F) << 6) \
+ | ((p)[-1] & 0x3F) \
+ | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
+ : ! ((p)[0] & 0x10) \
+ ? ((p) += 3, \
+ ((((p)[-3] & 0x0F) << 12) \
+ | (((p)[-2] & 0x3F) << 6) \
+ | ((p)[-1] & 0x3F))) \
+ : string_char ((p), &(p), NULL))
+
+
+/* Fetch the "next" character from Lisp string STRING at byte position
+ BYTEIDX, character position CHARIDX. Store it into OUTPUT.
+
+ All the args must be side-effect-free.
+ BYTEIDX and CHARIDX must be lvalues;
+ we increment them past the character fetched. */
+
+#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (STRING_MULTIBYTE (STRING)) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ } \
+ else
+
+/* Like FETCH_STRING_CHAR_ADVANCE but return a multibyte character eve
+ if STRING is unibyte. */
+
+#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (STRING_MULTIBYTE (STRING)) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ { \
+ OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
+ MAKE_CHAR_MULTIBYTE (OUTPUT); \
+ } \
+ } \
+ else
+
+
+/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
+
+#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
+ int len; \
+ \
+ OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ CHARIDX++; \
+ } \
+ else
+
+
+/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
+ buffer. */
+
+#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ CHARIDX++; \
+ if (!NILP (current_buffer->enable_multibyte_characters)) \
+ { \
+ unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
+ int len; \
+ \
+ OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ } \
+ else \
+ { \
+ OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
+ BYTEIDX++; \
+ } \
+ } \
+ else
+
+
+/* Like FETCH_CHAR_ADVANCE but assumes the current buffer is multibyte. */
+
+#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
+ if (1) \
+ { \
+ unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
+ int len; \
+ \
+ OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
+ BYTEIDX += len; \
+ CHARIDX++; \
+ } \
+ else
+
+
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. No range checking of POS. */
+
+#define INC_POS(pos_byte) \
+ do { \
+ unsigned char *p = BYTE_POS_ADDR (pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+ } while (0)
+
+
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS. */
+
+#define DEC_POS(pos_byte) \
+ do { \
+ unsigned char *p; \
+ \
+ pos_byte--; \
+ if (pos_byte < GPT_BYTE) \
+ p = BEG_ADDR + pos_byte - 1; \
+ else \
+ p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
+ while (!CHAR_HEAD_P (*p)) \
+ { \
+ p--; \
+ pos_byte--; \
+ } \
+ } while (0)
+
+/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define INC_BOTH(charpos, bytepos) \
+ do \
+ { \
+ (charpos)++; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)++; \
+ else \
+ INC_POS ((bytepos)); \
+ } \
+ while (0)
+
+
+/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
+
+#define DEC_BOTH(charpos, bytepos) \
+ do \
+ { \
+ (charpos)--; \
+ if (NILP (current_buffer->enable_multibyte_characters)) \
+ (bytepos)--; \
+ else \
+ DEC_POS ((bytepos)); \
+ } \
+ while (0)
+
+
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. This macro relies on the fact that
+ *GPT_ADDR and *Z_ADDR are always accessible and the values are
+ '\0'. No range checking of POS_BYTE. */
+
+#define BUF_INC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
+ pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+ } while (0)
+
+
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS_BYTE. */
+
+#define BUF_DEC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p; \
+ pos_byte--; \
+ if (pos_byte < BUF_GPT_BYTE (buf)) \
+ p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
+ else \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
+ while (!CHAR_HEAD_P (*p)) \
+ { \
+ p--; \
+ pos_byte--; \
+ } \
+ } while (0)
+
+
+/* If C is a character to be unified with a Unicode character, return
+ the unified Unicode character. */
+
+#define MAYBE_UNIFY_CHAR(c) \
+ if (c > MAX_UNICODE_CHAR \
+ && CHAR_TABLE_P (Vchar_unify_table)) \
+ { \
+ Lisp_Object val; \
+ int unified; \
+ \
+ val = CHAR_TABLE_REF (Vchar_unify_table, c); \
+ if (! NILP (val)) \
+ { \
+ if (SYMBOLP (val)) \
+ { \
+ Funify_charset (val, Qnil, Qnil); \
+ val = CHAR_TABLE_REF (Vchar_unify_table, c); \
+ } \
+ if ((unified = XINT (val)) >= 0) \
+ c = unified; \
+ } \
+ } \
+ else
+
+
+/* Return the width of ASCII character C. The width is measured by
+ how many columns occupied on the screen when displayed in the
+ current buffer. */
+
+#define ASCII_CHAR_WIDTH(c) \
+ (c < 0x20 \
+ ? (c == '\t' \
+ ? XFASTINT (current_buffer->tab_width) \
+ : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
+ : (c < 0x7f \
+ ? 1 \
+ : ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
+
+/* Return the width of character C. The width is measured by how many
+ columns occupied on the screen when displayed in the current
+ buffer. */
+
+#define CHAR_WIDTH(c) \
+ (ASCII_CHAR_P (c) \
+ ? ASCII_CHAR_WIDTH (c) \
+ : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
+
+extern int char_resolve_modifier_mask P_ ((int));
+extern int char_string P_ ((int, unsigned char *));
+extern int string_char P_ ((const unsigned char *,
+ const unsigned char **, int *));
+
+extern int translate_char P_ ((Lisp_Object, int c));
+extern int char_printable_p P_ ((int c));
+extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
+ int *));
+extern int parse_str_to_multibyte P_ ((unsigned char *, int));
+extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
+extern int str_to_multibyte P_ ((unsigned char *, int, int));
+extern int str_as_unibyte P_ ((unsigned char *, int));
+extern int strwidth P_ ((unsigned char *, int));
+extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
+extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
+
+extern Lisp_Object Vprintable_chars;
+
+extern Lisp_Object Qcharacterp, Qauto_fill_chars;
+extern Lisp_Object Vtranslation_table_vector;
+extern Lisp_Object Vchar_width_table;
+extern Lisp_Object Vchar_direction_table;
+extern Lisp_Object Vchar_unify_table;
+
+extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object));
+
+/* Return a translation table of id number ID. */
+#define GET_TRANSLATION_TABLE(id) \
+ (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
+
+/* A char-table for characters which may invoke auto-filling. */
+extern Lisp_Object Vauto_fill_chars;
+
+extern Lisp_Object Vchar_script_table;
+extern Lisp_Object Vscript_representative_chars;
+
+/* Copy LEN bytes from FROM to TO. This macro should be used only
+ when a caller knows that LEN is short and the obvious copy loop is
+ faster than calling bcopy which has some overhead. Copying a
+ multibyte sequence of a character is the typical case. */
+
+#define BCOPY_SHORT(from, to, len) \
+ do { \
+ int i = len; \
+ unsigned char *from_p = from, *to_p = to; \
+ while (i--) *to_p++ = *from_p++; \
+ } while (0)
+
+#define DEFSYM(sym, name) \
+ do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
+
+#endif /* EMACS_CHARACTER_H */
+
+/* arch-tag: 4ef86004-2eff-4073-8cea-cfcbcf7188ac
+ (do not change this comment) */
diff --git a/src/charset.c b/src/charset.c
index 211de24ef89..2228a24f698 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -1,9 +1,12 @@
-/* Basic multilingual character support.
+/* Basic character set support.
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006 Free Software Foundation, Inc.
+ 2006 Free Software Foundation, Inc.
Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003, 2004
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -22,720 +25,1244 @@ along with GNU Emacs; see the file COPYING. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
-/* At first, see the document in `charset.h' to understand the code in
- this file. */
-
-#ifdef emacs
#include <config.h>
-#endif
#include <stdio.h>
-
-#ifdef emacs
-
+#include <unistd.h>
+#include <ctype.h>
#include <sys/types.h>
#include "lisp.h"
-#include "buffer.h"
+#include "character.h"
#include "charset.h"
-#include "composite.h"
#include "coding.h"
#include "disptab.h"
+#include "buffer.h"
-#else /* not emacs */
+/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
-#include "mulelib.h"
+ A coded character set ("charset" hereafter) is a meaningful
+ collection (i.e. language, culture, functionality, etc.) of
+ characters. Emacs handles multiple charsets at once. In Emacs Lisp
+ code, a charset is represented by a symbol. In C code, a charset is
+ represented by its ID number or by a pointer to a struct charset.
-#endif /* emacs */
+ The actual information about each charset is stored in two places.
+ Lispy information is stored in the hash table Vcharset_hash_table as
+ a vector (charset attributes). The other information is stored in
+ charset_table as a struct charset.
+
+*/
-Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
-Lisp_Object Qunknown;
-
-/* Declaration of special leading-codes. */
-EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
-EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
-EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
-EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
-
-/* Declaration of special charsets. The values are set by
- Fsetup_special_charsets. */
-int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
-int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
-int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
-int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
-int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
-int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
-int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-int charset_mule_unicode_0100_24ff;
-int charset_mule_unicode_2500_33ff;
-int charset_mule_unicode_e000_ffff;
-
-Lisp_Object Qcharset_table;
-
-/* A char-table containing information of each character set. */
-Lisp_Object Vcharset_table;
-
-/* A vector of charset symbol indexed by charset-id. This is used
- only for returning charset symbol from C functions. */
-Lisp_Object Vcharset_symbol_table;
-
-/* A list of charset symbols ever defined. */
+/* List of all charsets. This variable is used only from Emacs
+ Lisp. */
Lisp_Object Vcharset_list;
-/* Vector of translation table ever defined.
- ID of a translation table is used to index this vector. */
-Lisp_Object Vtranslation_table_vector;
+/* Hash table that contains attributes of each charset. Keys are
+ charset symbols, and values are vectors of charset attributes. */
+Lisp_Object Vcharset_hash_table;
-/* A char-table for characters which may invoke auto-filling. */
-Lisp_Object Vauto_fill_chars;
+/* Table of struct charset. */
+struct charset *charset_table;
-Lisp_Object Qauto_fill_chars;
+static int charset_table_size;
+static int charset_table_used;
-/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
-int bytes_by_char_head[256];
-int width_by_char_head[256];
+Lisp_Object Qcharsetp;
-/* Mapping table from ISO2022's charset (specified by DIMENSION,
- CHARS, and FINAL-CHAR) to Emacs' charset. */
-int iso_charset_table[2][2][128];
+/* Special charset symbols. */
+Lisp_Object Qascii;
+Lisp_Object Qeight_bit;
+Lisp_Object Qiso_8859_1;
+Lisp_Object Qunicode;
+
+/* The corresponding charsets. */
+int charset_ascii;
+int charset_eight_bit;
+int charset_iso_8859_1;
+int charset_unicode;
+
+/* The other special charsets. */
+int charset_jisx0201_roman;
+int charset_jisx0208_1978;
+int charset_jisx0208;
+
+/* Value of charset attribute `charset-iso-plane'. */
+Lisp_Object Qgl, Qgr;
-/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
-unsigned char *_fetch_multibyte_char_p;
-int _fetch_multibyte_char_len;
+/* Charset of unibyte characters. */
+int charset_unibyte;
-/* Offset to add to a non-ASCII value when inserting it. */
-EMACS_INT nonascii_insert_offset;
+/* List of charsets ordered by the priority. */
+Lisp_Object Vcharset_ordered_list;
-/* Translation table for converting non-ASCII unibyte characters
- to multibyte codes, or nil. */
-Lisp_Object Vnonascii_translation_table;
+/* Incremented everytime we change Vcharset_ordered_list. This is
+ unsigned short so that it fits in Lisp_Int and never matches
+ -1. */
+unsigned short charset_ordered_list_tick;
+
+/* List of iso-2022 charsets. */
+Lisp_Object Viso_2022_charset_list;
+
+/* List of emacs-mule charsets. */
+Lisp_Object Vemacs_mule_charset_list;
+
+struct charset *emacs_mule_charset[256];
+
+/* Mapping table from ISO2022's charset (specified by DIMENSION,
+ CHARS, and FINAL-CHAR) to Emacs' charset. */
+int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
+
+Lisp_Object Vcharset_map_path;
+
+Lisp_Object Vchar_unified_charset_table;
+
+/* Defined in chartab.c */
+extern void
+map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
+ Lisp_Object function, Lisp_Object table,
+ Lisp_Object arg, struct charset *charset,
+ unsigned from, unsigned to));
+
+#define CODE_POINT_TO_INDEX(charset, code) \
+ ((charset)->code_linear_p \
+ ? (code) - (charset)->min_code \
+ : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
+ && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
+ && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
+ && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
+ ? (((((code) >> 24) - (charset)->code_space[12]) \
+ * (charset)->code_space[11]) \
+ + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
+ * (charset)->code_space[7]) \
+ + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
+ * (charset)->code_space[3]) \
+ + (((code) & 0xFF) - (charset)->code_space[0]) \
+ - ((charset)->char_index_offset)) \
+ : -1)
+
+
+/* Convert the character index IDX to code-point CODE for CHARSET.
+ It is assumed that IDX is in a valid range. */
+
+#define INDEX_TO_CODE_POINT(charset, idx) \
+ ((charset)->code_linear_p \
+ ? (idx) + (charset)->min_code \
+ : (idx += (charset)->char_index_offset, \
+ (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
+ | (((charset)->code_space[4] \
+ + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
+ << 8) \
+ | (((charset)->code_space[8] \
+ + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
+ << 16) \
+ | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
+ << 24))))
-/* List of all possible generic characters. */
-Lisp_Object Vgeneric_character_list;
-void
-invalid_character (c)
- int c;
-{
- error ("Invalid character: %d, #o%o, #x%x", c, c, c);
-}
-/* Parse string STR of length LENGTH and fetch information of a
- character at STR. Set BYTES to the byte length the character
- occupies, CHARSET, C1, C2 to proper values of the character. */
-
-#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
- do { \
- (c1) = *(str); \
- (bytes) = BYTES_BY_CHAR_HEAD (c1); \
- if ((bytes) == 1) \
- (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
- else if ((bytes) == 2) \
- { \
- if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
- (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
- else \
- (charset) = (c1), (c1) = (str)[1] & 0x7F; \
- } \
- else if ((bytes) == 3) \
- { \
- if ((c1) < LEADING_CODE_PRIVATE_11) \
- (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
- else \
- (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
- } \
- else \
- (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
- } while (0)
-
-/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
- Note that this intentionally allows invalid components, such
- as 0xA0 0xA0, because there exist many files that contain
- such invalid byte sequences, especially in EUC-GB. */
-#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
- ((charset) == CHARSET_ASCII \
- ? ((c1) >= 0 && (c1) <= 0x7F) \
- : ((charset) == CHARSET_8_BIT_CONTROL \
- ? ((c1) >= 0x80 && (c1) <= 0x9F) \
- : ((charset) == CHARSET_8_BIT_GRAPHIC \
- ? ((c1) >= 0x80 && (c1) <= 0xFF) \
- : (CHARSET_DIMENSION (charset) == 1 \
- ? ((c1) >= 0x20 && (c1) <= 0x7F) \
- : ((c1) >= 0x20 && (c1) <= 0x7F \
- && (c2) >= 0x20 && (c2) <= 0x7F)))))
-
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least 4-byte area at STR in advance. Returns the
- length of the multi-byte form. If C is an invalid character code,
- return -1. */
+/* Set to 1 to warn that a charset map is loaded and thus a buffer
+ text and a string data may be relocated. */
+int charset_map_loaded;
-int
-char_to_string_1 (c, str)
- int c;
- unsigned char *str;
+struct charset_map_entries
+{
+ struct {
+ unsigned from, to;
+ int c;
+ } entry[0x10000];
+ struct charset_map_entries *next;
+};
+
+/* Load the mapping information for CHARSET from ENTRIES.
+
+ If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
+
+ If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
+ CHARSET->decoder, and CHARSET->encoder.
+
+ If CONTROL_FLAG is 2, setup CHARSET->deunifier and
+ Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
+ setup it too. */
+
+static void
+load_charset_map (charset, entries, n_entries, control_flag)
+ struct charset *charset;
+ struct charset_map_entries *entries;
+ int n_entries;
+ int control_flag;
{
- unsigned char *p = str;
+ Lisp_Object vec, table;
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ int ascii_compatible_p = charset->ascii_compatible_p;
+ int min_char, max_char, nonascii_min_char;
+ int i;
+ unsigned char *fast_map = charset->fast_map;
+
+ if (n_entries <= 0)
+ return;
- if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
+ if (control_flag > 0)
{
- /* Multibyte character can't have a modifier bit. */
- if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
- return -1;
+ int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
+
+ table = Fmake_char_table (Qnil, Qnil);
+ if (control_flag == 1)
+ vec = Fmake_vector (make_number (n), make_number (-1));
+ else if (! CHAR_TABLE_P (Vchar_unify_table))
+ Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
- /* For Meta, Shift, and Control modifiers, we need special care. */
- if (c & CHAR_META)
+ charset_map_loaded = 1;
+ }
+
+ min_char = max_char = entries->entry[0].c;
+ nonascii_min_char = MAX_CHAR;
+ for (i = 0; i < n_entries; i++)
+ {
+ unsigned from, to;
+ int from_index, to_index;
+ int from_c, to_c;
+ int idx = i % 0x10000;
+
+ if (i > 0 && idx == 0)
+ entries = entries->next;
+ from = entries->entry[idx].from;
+ to = entries->entry[idx].to;
+ from_c = entries->entry[idx].c;
+ from_index = CODE_POINT_TO_INDEX (charset, from);
+ if (from == to)
{
- /* Move the meta bit to the right place for a string. */
- c = (c & ~CHAR_META) | 0x80;
+ to_index = from_index;
+ to_c = from_c;
}
- if (c & CHAR_SHIFT)
+ else
{
- /* Shift modifier is valid only with [A-Za-z]. */
- if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
- c &= ~CHAR_SHIFT;
- else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
- c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ to_index = CODE_POINT_TO_INDEX (charset, to);
+ to_c = from_c + (to_index - from_index);
}
- if (c & CHAR_CTL)
+ if (from_index < 0 || to_index < 0)
+ continue;
+
+ if (control_flag < 2)
{
- /* Simulate the code in lread.c. */
- /* Allow `\C- ' and `\C-?'. */
- if (c == (CHAR_CTL | ' '))
- c = 0;
- else if (c == (CHAR_CTL | '?'))
- c = 127;
- /* ASCII control chars are made from letters (both cases),
- as well as the non-letters within 0100...0137. */
- else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
- c &= (037 | (~0177 & ~CHAR_CTL));
- else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
- c &= (037 | (~0177 & ~CHAR_CTL));
+ int c;
+
+ if (to_c > max_char)
+ max_char = to_c;
+ else if (from_c < min_char)
+ min_char = from_c;
+ if (ascii_compatible_p)
+ {
+ if (! ASCII_BYTE_P (from_c))
+ {
+ if (from_c < nonascii_min_char)
+ nonascii_min_char = from_c;
+ }
+ else if (! ASCII_BYTE_P (to_c))
+ {
+ nonascii_min_char = 0x80;
+ }
+ }
+
+ for (c = from_c; c <= to_c; c++)
+ CHARSET_FAST_MAP_SET (c, fast_map);
+
+ if (control_flag == 1)
+ {
+ unsigned code = from;
+
+ if (CHARSET_COMPACT_CODES_P (charset))
+ while (1)
+ {
+ ASET (vec, from_index, make_number (from_c));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (code));
+ if (from_index == to_index)
+ break;
+ from_index++, from_c++;
+ code = INDEX_TO_CODE_POINT (charset, from_index);
+ }
+ else
+ for (; from_index <= to_index; from_index++, from_c++)
+ {
+ ASET (vec, from_index, make_number (from_c));
+ if (NILP (CHAR_TABLE_REF (table, from_c)))
+ CHAR_TABLE_SET (table, from_c, make_number (from_index));
+ }
+ }
}
+ else
+ {
+ unsigned code = from;
- /* If C still has any modifier bits, just ignore it. */
- c &= ~CHAR_MODIFIER_MASK;
+ while (1)
+ {
+ int c1 = DECODE_CHAR (charset, code);
+
+ if (c1 >= 0)
+ {
+ CHAR_TABLE_SET (table, from_c, make_number (c1));
+ CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
+ if (CHAR_TABLE_P (Vchar_unified_charset_table))
+ CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
+ CHARSET_NAME (charset));
+ }
+ if (from_index == to_index)
+ break;
+ from_index++, from_c++;
+ code = INDEX_TO_CODE_POINT (charset, from_index);
+ }
+ }
}
- if (SINGLE_BYTE_CHAR_P (c))
+ if (control_flag < 2)
{
- if (ASCII_BYTE_P (c) || c >= 0xA0)
- *p++ = c;
- else
+ CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
+ ? nonascii_min_char : min_char);
+ CHARSET_MAX_CHAR (charset) = max_char;
+ if (control_flag == 1)
{
- *p++ = LEADING_CODE_8_BIT_CONTROL;
- *p++ = c + 0x20;
+ CHARSET_DECODER (charset) = vec;
+ CHARSET_ENCODER (charset) = table;
}
}
- else if (CHAR_VALID_P (c, 0))
- {
- int charset, c1, c2;
+ else
+ CHARSET_DEUNIFIER (charset) = table;
+}
- SPLIT_CHAR (c, charset, c1, c2);
- if (charset >= LEADING_CODE_EXT_11)
- *p++ = (charset < LEADING_CODE_EXT_12
- ? LEADING_CODE_PRIVATE_11
- : (charset < LEADING_CODE_EXT_21
- ? LEADING_CODE_PRIVATE_12
- : (charset < LEADING_CODE_EXT_22
- ? LEADING_CODE_PRIVATE_21
- : LEADING_CODE_PRIVATE_22)));
- *p++ = charset;
- if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
- return -1;
- if (c1)
+/* Read a hexadecimal number (preceded by "0x") from the file FP while
+ paying attention to comment charcter '#'. */
+
+static INLINE unsigned
+read_hex (fp, eof)
+ FILE *fp;
+ int *eof;
+{
+ int c;
+ unsigned n;
+
+ while ((c = getc (fp)) != EOF)
+ {
+ if (c == '#')
{
- *p++ = c1 | 0x80;
- if (c2 > 0)
- *p++ = c2 | 0x80;
+ while ((c = getc (fp)) != EOF && c != '\n');
}
+ else if (c == '0')
+ {
+ if ((c = getc (fp)) == EOF || c == 'x')
+ break;
+ }
+ }
+ if (c == EOF)
+ {
+ *eof = 1;
+ return 0;
}
+ *eof = 0;
+ n = 0;
+ if (c == 'x')
+ while ((c = getc (fp)) != EOF && isxdigit (c))
+ n = ((n << 4)
+ | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
else
- return -1;
-
- return (p - str);
+ while ((c = getc (fp)) != EOF && isdigit (c))
+ n = (n * 10) + c - '0';
+ if (c != EOF)
+ ungetc (c, fp);
+ return n;
}
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least 4-byte area at STR in advance. Returns the
- length of the multi-byte form. If C is an invalid character code,
- signal an error.
+/* Return a mapping vector for CHARSET loaded from MAPFILE.
+ Each line of MAPFILE has this form
+ 0xAAAA 0xCCCC
+ where 0xAAAA is a code-point and 0xCCCC is the corresponding
+ character code, or this form
+ 0xAAAA-0xBBBB 0xCCCC
+ where 0xAAAA and 0xBBBB are code-points specifying a range, and
+ 0xCCCC is the first character code of the range.
- Use macro `CHAR_STRING (C, STR)' instead of calling this function
- directly if C can be an ASCII character. */
+ The returned vector has this form:
+ [ CODE1 CHAR1 CODE2 CHAR2 .... ]
+ where CODE1 is a code-point or a cons of code-points specifying a
+ range. */
-int
-char_to_string (c, str)
- int c;
- unsigned char *str;
+extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
+
+static void
+load_charset_map_from_file (charset, mapfile, control_flag)
+ struct charset *charset;
+ Lisp_Object mapfile;
+ int control_flag;
{
- int len;
- len = char_to_string_1 (c, str);
- if (len == -1)
- invalid_character (c);
- return len;
-}
+ unsigned min_code = CHARSET_MIN_CODE (charset);
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ int fd;
+ FILE *fp;
+ int eof;
+ Lisp_Object suffixes;
+ struct charset_map_entries *head, *entries;
+ int n_entries;
+
+ suffixes = Fcons (build_string (".map"),
+ Fcons (build_string (".TXT"), Qnil));
+
+ fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
+ if (fd < 0
+ || ! (fp = fdopen (fd, "r")))
+ {
+ add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
+ return;
+ }
+ head = entries = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ n_entries = 0;
+ eof = 0;
+ while (1)
+ {
+ unsigned from, to;
+ int c;
+ int idx;
-/* Return the non-ASCII character corresponding to multi-byte form at
- STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
- length of the multibyte form in *ACTUAL_LEN.
+ from = read_hex (fp, &eof);
+ if (eof)
+ break;
+ if (getc (fp) == '-')
+ to = read_hex (fp, &eof);
+ else
+ to = from;
+ c = (int) read_hex (fp, &eof);
- Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
- this function directly if you want ot handle ASCII characters as
- well. */
+ if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+ continue;
-int
-string_to_char (str, len, actual_len)
- const unsigned char *str;
- int len, *actual_len;
-{
- int c, bytes, charset, c1, c2;
+ if (n_entries > 0 && (n_entries % 0x10000) == 0)
+ {
+ entries->next = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ entries = entries->next;
+ }
+ idx = n_entries % 0x10000;
+ entries->entry[idx].from = from;
+ entries->entry[idx].to = to;
+ entries->entry[idx].c = c;
+ n_entries++;
+ }
+ fclose (fp);
+ close (fd);
- SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
- c = MAKE_CHAR (charset, c1, c2);
- if (actual_len)
- *actual_len = bytes;
- return c;
+ load_charset_map (charset, head, n_entries, control_flag);
}
-/* Return the length of the multi-byte form at string STR of length LEN.
- Use the macro MULTIBYTE_FORM_LENGTH instead. */
-int
-multibyte_form_length (str, len)
- const unsigned char *str;
- int len;
+static void
+load_charset_map_from_vector (charset, vec, control_flag)
+ struct charset *charset;
+ Lisp_Object vec;
+ int control_flag;
{
- int bytes;
+ unsigned min_code = CHARSET_MIN_CODE (charset);
+ unsigned max_code = CHARSET_MAX_CODE (charset);
+ struct charset_map_entries *head, *entries;
+ int n_entries;
+ int len = ASIZE (vec);
+ int i;
- PARSE_MULTIBYTE_SEQ (str, len, bytes);
- return bytes;
-}
+ if (len % 2 == 1)
+ {
+ add_to_log ("Failure in loading charset map: %V", vec, Qnil);
+ return;
+ }
-/* Check multibyte form at string STR of length LEN and set variables
- pointed by CHARSET, C1, and C2 to charset and position codes of the
- character at STR, and return 0. If there's no multibyte character,
- return -1. This should be used only in the macro SPLIT_STRING
- which checks range of STR in advance. */
+ head = entries = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ n_entries = 0;
+ for (i = 0; i < len; i += 2)
+ {
+ Lisp_Object val, val2;
+ unsigned from, to;
+ int c;
+ int idx;
-int
-split_string (str, len, charset, c1, c2)
- const unsigned char *str;
- unsigned char *c1, *c2;
- int len, *charset;
-{
- register int bytes, cs, code1, code2 = -1;
+ val = AREF (vec, i);
+ if (CONSP (val))
+ {
+ val2 = XCDR (val);
+ val = XCAR (val);
+ CHECK_NATNUM (val);
+ CHECK_NATNUM (val2);
+ from = XFASTINT (val);
+ to = XFASTINT (val2);
+ }
+ else
+ {
+ CHECK_NATNUM (val);
+ from = to = XFASTINT (val);
+ }
+ val = AREF (vec, i + 1);
+ CHECK_NATNUM (val);
+ c = XFASTINT (val);
- SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
- if (cs == CHARSET_ASCII)
- return -1;
- *charset = cs;
- *c1 = code1;
- *c2 = code2;
- return 0;
+ if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+ continue;
+
+ if (n_entries > 0 && (n_entries % 0x10000) == 0)
+ {
+ entries->next = ((struct charset_map_entries *)
+ alloca (sizeof (struct charset_map_entries)));
+ entries = entries->next;
+ }
+ idx = n_entries % 0x10000;
+ entries->entry[idx].from = from;
+ entries->entry[idx].to = to;
+ entries->entry[idx].c = c;
+ n_entries++;
+ }
+
+ load_charset_map (charset, head, n_entries, control_flag);
}
-/* Return 1 iff character C has valid printable glyph.
- Use the macro CHAR_PRINTABLE_P instead. */
-int
-char_printable_p (c)
- int c;
+static void
+load_charset (charset)
+ struct charset *charset;
{
- int charset, c1, c2;
-
- if (ASCII_BYTE_P (c))
- return 1;
- else if (SINGLE_BYTE_CHAR_P (c))
- return 0;
- else if (c >= MAX_CHAR)
- return 0;
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
+ {
+ Lisp_Object map;
- SPLIT_CHAR (c, charset, c1, c2);
- if (! CHARSET_DEFINED_P (charset))
- return 0;
- if (CHARSET_CHARS (charset) == 94
- ? c1 <= 32 || c1 >= 127
- : c1 < 32)
- return 0;
- if (CHARSET_DIMENSION (charset) == 2
- && (CHARSET_CHARS (charset) == 94
- ? c2 <= 32 || c2 >= 127
- : c2 < 32))
- return 0;
- return 1;
+ map = CHARSET_MAP (charset);
+ if (STRINGP (map))
+ load_charset_map_from_file (charset, map, 1);
+ else
+ load_charset_map_from_vector (charset, map, 1);
+ CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
+ }
}
-/* Translate character C by translation table TABLE. If C
- is negative, translate a character specified by CHARSET, C1, and C2
- (C1 and C2 are code points of the character). If no translation is
- found in TABLE, return C. */
-int
-translate_char (table, c, charset, c1, c2)
- Lisp_Object table;
- int c, charset, c1, c2;
+
+DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
+ doc: /* Return non-nil if and only if OBJECT is a charset.*/)
+ (object)
+ Lisp_Object object;
{
- Lisp_Object ch;
- int alt_charset, alt_c1, alt_c2, dimension;
-
- if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
- if (!CHAR_TABLE_P (table)
- || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
- return c;
-
- SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
- dimension = CHARSET_DIMENSION (alt_charset);
- if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
- /* CH is not a generic character, just return it. */
- return XFASTINT (ch);
-
- /* Since CH is a generic character, we must return a specific
- charater which has the same position codes as C from CH. */
- if (charset < 0)
- SPLIT_CHAR (c, charset, c1, c2);
- if (dimension != CHARSET_DIMENSION (charset))
- /* We can't make such a character because of dimension mismatch. */
- return c;
- return MAKE_CHAR (alt_charset, c1, c2);
+ return (CHARSETP (object) ? Qt : Qnil);
}
-/* Convert the unibyte character C to multibyte based on
- Vnonascii_translation_table or nonascii_insert_offset. If they can't
- convert C to a valid multibyte character, convert it based on
- DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
-int
-unibyte_char_to_multibyte (c)
- int c;
+void
+map_charset_chars (c_function, function, arg,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, arg;
+ struct charset *charset;
+ unsigned from, to;
{
- if (c < 0400 && c >= 0200)
+ Lisp_Object range;
+ int partial;
+
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
+ load_charset (charset);
+
+ partial = (from > CHARSET_MIN_CODE (charset)
+ || to < CHARSET_MAX_CODE (charset));
+
+ if (CHARSET_UNIFIED_P (charset)
+ && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
{
- int c_save = c;
+ map_char_table_for_charset (c_function, function,
+ CHARSET_DEUNIFIER (charset), arg,
+ partial ? charset : NULL, from, to);
+ }
- if (! NILP (Vnonascii_translation_table))
- {
- c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
- if (c >= 0400 && ! char_valid_p (c, 0))
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
- }
- else if (c >= 0240 && nonascii_insert_offset > 0)
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
+ {
+ int from_idx = CODE_POINT_TO_INDEX (charset, from);
+ int to_idx = CODE_POINT_TO_INDEX (charset, to);
+ int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
+ int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
+
+ range = Fcons (make_number (from_c), make_number (to_c));
+ if (NILP (function))
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
+ {
+ if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+ return;
+ map_char_table_for_charset (c_function, function,
+ CHARSET_ENCODER (charset), arg,
+ partial ? charset : NULL, from, to);
+ }
+ else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
+ {
+ Lisp_Object subset_info;
+ int offset;
+
+ subset_info = CHARSET_SUBSET (charset);
+ charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ offset = XINT (AREF (subset_info, 3));
+ from -= offset;
+ if (from < XFASTINT (AREF (subset_info, 1)))
+ from = XFASTINT (AREF (subset_info, 1));
+ to -= offset;
+ if (to > XFASTINT (AREF (subset_info, 2)))
+ to = XFASTINT (AREF (subset_info, 2));
+ map_charset_chars (c_function, function, arg, charset, from, to);
+ }
+ else /* i.e. CHARSET_METHOD_SUPERSET */
+ {
+ Lisp_Object parents;
+
+ for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
+ parents = XCDR (parents))
{
- c += nonascii_insert_offset;
- if (c < 0400 || ! char_valid_p (c, 0))
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
+ int offset;
+ unsigned this_from, this_to;
+
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
+ offset = XINT (XCDR (XCAR (parents)));
+ this_from = from - offset;
+ this_to = to - offset;
+ if (this_from < CHARSET_MIN_CODE (charset))
+ this_from = CHARSET_MIN_CODE (charset);
+ if (this_to > CHARSET_MAX_CODE (charset))
+ this_to = CHARSET_MAX_CODE (charset);
+ map_charset_chars (c_function, function, arg, charset,
+ this_from, this_to);
}
- else if (c >= 0240)
- c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
}
- return c;
}
+DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
+ doc: /* Call FUNCTION for all characters in CHARSET.
+FUNCTION is called with an argument RANGE and the optional 3rd
+argument ARG.
-/* Convert the multibyte character C to unibyte 8-bit character based
- on Vnonascii_translation_table or nonascii_insert_offset. If
- REV_TBL is non-nil, it should be a reverse table of
- Vnonascii_translation_table, i.e. what given by:
- Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
+RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
+characters contained in CHARSET.
-int
-multibyte_char_to_unibyte (c, rev_tbl)
- int c;
- Lisp_Object rev_tbl;
+The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
+range of code points of target characters. */)
+ (function, charset, arg, from_code, to_code)
+ Lisp_Object function, charset, arg, from_code, to_code;
{
- if (!SINGLE_BYTE_CHAR_P (c))
+ struct charset *cs;
+ unsigned from, to;
+
+ CHECK_CHARSET_GET_CHARSET (charset, cs);
+ if (NILP (from_code))
+ from = CHARSET_MIN_CODE (cs);
+ else
+ {
+ CHECK_NATNUM (from_code);
+ from = XINT (from_code);
+ if (from < CHARSET_MIN_CODE (cs))
+ from = CHARSET_MIN_CODE (cs);
+ }
+ if (NILP (to_code))
+ to = CHARSET_MAX_CODE (cs);
+ else
+ {
+ CHECK_NATNUM (to_code);
+ to = XINT (to_code);
+ if (to > CHARSET_MAX_CODE (cs))
+ to = CHARSET_MAX_CODE (cs);
+ }
+ map_charset_chars (NULL, function, arg, cs, from, to);
+ return Qnil;
+}
+
+
+/* Define a charset according to the arguments. The Nth argument is
+ the Nth attribute of the charset (the last attribute `charset-id'
+ is not included). See the docstring of `define-charset' for the
+ detail. */
+
+DEFUN ("define-charset-internal", Fdefine_charset_internal,
+ Sdefine_charset_internal, charset_arg_max, MANY, 0,
+ doc: /* For internal use only.
+usage: (define-charset-internal ...) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
+{
+ /* Charset attr vector. */
+ Lisp_Object attrs;
+ Lisp_Object val;
+ unsigned hash_code;
+ struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
+ int i, j;
+ struct charset charset;
+ int id;
+ int dimension;
+ int new_definition_p;
+ int nchars;
+
+ if (nargs != charset_arg_max)
+ return Fsignal (Qwrong_number_of_arguments,
+ Fcons (intern ("define-charset-internal"),
+ make_number (nargs)));
+
+ attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
+
+ CHECK_SYMBOL (args[charset_arg_name]);
+ ASET (attrs, charset_name, args[charset_arg_name]);
+
+ val = args[charset_arg_code_space];
+ for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
+ {
+ int min_byte, max_byte;
+
+ min_byte = XINT (Faref (val, make_number (i * 2)));
+ max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
+ if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
+ error ("Invalid :code-space value");
+ charset.code_space[i * 4] = min_byte;
+ charset.code_space[i * 4 + 1] = max_byte;
+ charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
+ nchars *= charset.code_space[i * 4 + 2];
+ charset.code_space[i * 4 + 3] = nchars;
+ if (max_byte > 0)
+ dimension = i + 1;
+ }
+
+ val = args[charset_arg_dimension];
+ if (NILP (val))
+ charset.dimension = dimension;
+ else
+ {
+ CHECK_NATNUM (val);
+ charset.dimension = XINT (val);
+ if (charset.dimension < 1 || charset.dimension > 4)
+ args_out_of_range_3 (val, make_number (1), make_number (4));
+ }
+
+ charset.code_linear_p
+ = (charset.dimension == 1
+ || (charset.code_space[2] == 256
+ && (charset.dimension == 2
+ || (charset.code_space[6] == 256
+ && (charset.dimension == 3
+ || charset.code_space[10] == 256)))));
+
+ if (! charset.code_linear_p)
+ {
+ charset.code_space_mask = (unsigned char *) xmalloc (256);
+ bzero (charset.code_space_mask, 256);
+ for (i = 0; i < 4; i++)
+ for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
+ j++)
+ charset.code_space_mask[j] |= (1 << i);
+ }
+
+ charset.iso_chars_96 = charset.code_space[2] == 96;
+
+ charset.min_code = (charset.code_space[0]
+ | (charset.code_space[4] << 8)
+ | (charset.code_space[8] << 16)
+ | (charset.code_space[12] << 24));
+ charset.max_code = (charset.code_space[1]
+ | (charset.code_space[5] << 8)
+ | (charset.code_space[9] << 16)
+ | (charset.code_space[13] << 24));
+ charset.char_index_offset = 0;
+
+ val = args[charset_arg_min_code];
+ if (! NILP (val))
{
- int c_save = c;
+ unsigned code;
- if (! CHAR_TABLE_P (rev_tbl)
- && CHAR_TABLE_P (Vnonascii_translation_table))
- rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0));
- if (CHAR_TABLE_P (rev_tbl))
+ if (INTEGERP (val))
+ code = XINT (val);
+ else
{
- Lisp_Object temp;
- temp = Faref (rev_tbl, make_number (c));
- if (INTEGERP (temp))
- c = XINT (temp);
- if (c >= 256)
- c = (c_save & 0177) + 0200;
+ CHECK_CONS (val);
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
+ code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
+ if (code < charset.min_code
+ || code > charset.max_code)
+ args_out_of_range_3 (make_number (charset.min_code),
+ make_number (charset.max_code), val);
+ charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
+ charset.min_code = code;
+ }
+
+ val = args[charset_arg_max_code];
+ if (! NILP (val))
+ {
+ unsigned code;
+
+ if (INTEGERP (val))
+ code = XINT (val);
else
{
- if (nonascii_insert_offset > 0)
- c -= nonascii_insert_offset;
- if (c < 128 || c >= 256)
- c = (c_save & 0177) + 0200;
+ CHECK_CONS (val);
+ CHECK_NUMBER_CAR (val);
+ CHECK_NUMBER_CDR (val);
+ code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
}
+ if (code < charset.min_code
+ || code > charset.max_code)
+ args_out_of_range_3 (make_number (charset.min_code),
+ make_number (charset.max_code), val);
+ charset.max_code = code;
}
- return c;
-}
+ charset.compact_codes_p = charset.max_code < 0x1000000;
-
-/* Update the table Vcharset_table with the given arguments (see the
- document of `define-charset' for the meaning of each argument).
- Several other table contents are also updated. The caller should
- check the validity of CHARSET-ID and the remaining arguments in
- advance. */
+ val = args[charset_arg_invalid_code];
+ if (NILP (val))
+ {
+ if (charset.min_code > 0)
+ charset.invalid_code = 0;
+ else
+ {
+ XSETINT (val, charset.max_code + 1);
+ if (XINT (val) == charset.max_code + 1)
+ charset.invalid_code = charset.max_code + 1;
+ else
+ error ("Attribute :invalid-code must be specified");
+ }
+ }
+ else
+ {
+ CHECK_NATNUM (val);
+ charset.invalid_code = XFASTINT (val);
+ }
-void
-update_charset_table (charset_id, dimension, chars, width, direction,
- iso_final_char, iso_graphic_plane,
- short_name, long_name, description)
- Lisp_Object charset_id, dimension, chars, width, direction;
- Lisp_Object iso_final_char, iso_graphic_plane;
- Lisp_Object short_name, long_name, description;
-{
- int charset = XINT (charset_id);
- int bytes;
- unsigned char leading_code_base, leading_code_ext;
-
- if (NILP (CHARSET_TABLE_ENTRY (charset)))
- CHARSET_TABLE_ENTRY (charset)
- = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
-
- if (NILP (long_name))
- long_name = short_name;
- if (NILP (description))
- description = long_name;
-
- /* Get byte length of multibyte form, base leading-code, and
- extended leading-code of the charset. See the comment under the
- title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
- bytes = XINT (dimension);
- if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
- {
- /* Official charset, it doesn't have an extended leading-code. */
- if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
- bytes += 1; /* For a base leading-code. */
- leading_code_base = charset;
- leading_code_ext = 0;
+ val = args[charset_arg_iso_final];
+ if (NILP (val))
+ charset.iso_final = -1;
+ else
+ {
+ CHECK_NUMBER (val);
+ if (XINT (val) < '0' || XINT (val) > 127)
+ error ("Invalid iso-final-char: %d", XINT (val));
+ charset.iso_final = XINT (val);
}
+
+ val = args[charset_arg_iso_revision];
+ if (NILP (val))
+ charset.iso_revision = -1;
else
{
- /* Private charset. */
- bytes += 2; /* For base and extended leading-codes. */
- leading_code_base
- = (charset < LEADING_CODE_EXT_12
- ? LEADING_CODE_PRIVATE_11
- : (charset < LEADING_CODE_EXT_21
- ? LEADING_CODE_PRIVATE_12
- : (charset < LEADING_CODE_EXT_22
- ? LEADING_CODE_PRIVATE_21
- : LEADING_CODE_PRIVATE_22)));
- leading_code_ext = charset;
- if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
- error ("Invalid dimension for the charset-ID %d", charset);
- }
-
- CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
- CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
- CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
- CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
- CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
- CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
- CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
- = make_number (leading_code_base);
- CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
- = make_number (leading_code_ext);
- CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
- CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
- = iso_graphic_plane;
- CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
- CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
- CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
- CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
+ CHECK_NUMBER (val);
+ if (XINT (val) > 63)
+ args_out_of_range (make_number (63), val);
+ charset.iso_revision = XINT (val);
+ }
- {
- /* If we have already defined a charset which has the same
- DIMENSION, CHARS and ISO-FINAL-CHAR but the different
- DIRECTION, we must update the entry REVERSE-CHARSET of both
- charsets. If there's no such charset, the value of the entry
- is set to nil. */
- int i;
-
- for (i = 0; i <= MAX_CHARSET; i++)
- if (!NILP (CHARSET_TABLE_ENTRY (i)))
+ val = args[charset_arg_emacs_mule_id];
+ if (NILP (val))
+ charset.emacs_mule_id = -1;
+ else
+ {
+ CHECK_NATNUM (val);
+ if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
+ error ("Invalid emacs-mule-id: %d", XINT (val));
+ charset.emacs_mule_id = XINT (val);
+ }
+
+ charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
+
+ charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
+
+ charset.unified_p = 0;
+
+ bzero (charset.fast_map, sizeof (charset.fast_map));
+
+ if (! NILP (args[charset_arg_code_offset]))
+ {
+ val = args[charset_arg_code_offset];
+ CHECK_NUMBER (val);
+
+ charset.method = CHARSET_METHOD_OFFSET;
+ charset.code_offset = XINT (val);
+
+ i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+ charset.min_char = i + charset.code_offset;
+ i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
+ charset.max_char = i + charset.code_offset;
+ if (charset.max_char > MAX_CHAR)
+ error ("Unsupported max char: %d", charset.max_char);
+
+ i = (charset.min_char >> 7) << 7;
+ for (; i < 0x10000 && i <= charset.max_char; i += 128)
+ CHARSET_FAST_MAP_SET (i, charset.fast_map);
+ i = (i >> 12) << 12;
+ for (; i <= charset.max_char; i += 0x1000)
+ CHARSET_FAST_MAP_SET (i, charset.fast_map);
+ }
+ else if (! NILP (args[charset_arg_map]))
+ {
+ val = args[charset_arg_map];
+ ASET (attrs, charset_map, val);
+ if (STRINGP (val))
+ load_charset_map_from_file (&charset, val, 0);
+ else
+ load_charset_map_from_vector (&charset, val, 0);
+ charset.method = CHARSET_METHOD_MAP_DEFERRED;
+ }
+ else if (! NILP (args[charset_arg_subset]))
+ {
+ Lisp_Object parent;
+ Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
+ struct charset *parent_charset;
+
+ val = args[charset_arg_subset];
+ parent = Fcar (val);
+ CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
+ parent_min_code = Fnth (make_number (1), val);
+ CHECK_NATNUM (parent_min_code);
+ parent_max_code = Fnth (make_number (2), val);
+ CHECK_NATNUM (parent_max_code);
+ parent_code_offset = Fnth (make_number (3), val);
+ CHECK_NUMBER (parent_code_offset);
+ val = Fmake_vector (make_number (4), Qnil);
+ ASET (val, 0, make_number (parent_charset->id));
+ ASET (val, 1, parent_min_code);
+ ASET (val, 2, parent_max_code);
+ ASET (val, 3, parent_code_offset);
+ ASET (attrs, charset_subset, val);
+
+ charset.method = CHARSET_METHOD_SUBSET;
+ /* Here, we just copy the parent's fast_map. It's not accurate,
+ but at least it works for quickly detecting which character
+ DOESN'T belong to this charset. */
+ for (i = 0; i < 190; i++)
+ charset.fast_map[i] = parent_charset->fast_map[i];
+
+ /* We also copy these for parents. */
+ charset.min_char = parent_charset->min_char;
+ charset.max_char = parent_charset->max_char;
+ }
+ else if (! NILP (args[charset_arg_superset]))
+ {
+ val = args[charset_arg_superset];
+ charset.method = CHARSET_METHOD_SUPERSET;
+ val = Fcopy_sequence (val);
+ ASET (attrs, charset_superset, val);
+
+ charset.min_char = MAX_CHAR;
+ charset.max_char = 0;
+ for (; ! NILP (val); val = Fcdr (val))
{
- if (CHARSET_DIMENSION (i) == XINT (dimension)
- && CHARSET_CHARS (i) == XINT (chars)
- && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
- && CHARSET_DIRECTION (i) != XINT (direction))
+ Lisp_Object elt, car_part, cdr_part;
+ int this_id, offset;
+ struct charset *this_charset;
+
+ elt = Fcar (val);
+ if (CONSP (elt))
+ {
+ car_part = XCAR (elt);
+ cdr_part = XCDR (elt);
+ CHECK_CHARSET_GET_ID (car_part, this_id);
+ CHECK_NUMBER (cdr_part);
+ offset = XINT (cdr_part);
+ }
+ else
{
- CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
- = make_number (i);
- CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
- break;
+ CHECK_CHARSET_GET_ID (elt, this_id);
+ offset = 0;
}
+ XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
+
+ this_charset = CHARSET_FROM_ID (this_id);
+ if (charset.min_char > this_charset->min_char)
+ charset.min_char = this_charset->min_char;
+ if (charset.max_char < this_charset->max_char)
+ charset.max_char = this_charset->max_char;
+ for (i = 0; i < 190; i++)
+ charset.fast_map[i] |= this_charset->fast_map[i];
}
- if (i > MAX_CHARSET)
- /* No such a charset. */
- CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
- = make_number (-1);
- }
+ }
+ else
+ error ("None of :code-offset, :map, :parents are specified");
- if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
- && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
+ val = args[charset_arg_unify_map];
+ if (! NILP (val) && !STRINGP (val))
+ CHECK_VECTOR (val);
+ ASET (attrs, charset_unify_map, val);
+
+ CHECK_LIST (args[charset_arg_plist]);
+ ASET (attrs, charset_plist, args[charset_arg_plist]);
+
+ charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
+ &hash_code);
+ if (charset.hash_index >= 0)
+ {
+ new_definition_p = 0;
+ id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
+ HASH_VALUE (hash_table, charset.hash_index) = attrs;
+ }
+ else
{
- bytes_by_char_head[leading_code_base] = bytes;
- width_by_char_head[leading_code_base] = XINT (width);
+ charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
+ hash_code);
+ if (charset_table_used == charset_table_size)
+ {
+ struct charset *new_table
+ = (struct charset *) xmalloc (sizeof (struct charset)
+ * (charset_table_size + 16));
+ bcopy (charset_table, new_table,
+ sizeof (struct charset) * charset_table_size);
+ charset_table_size += 16;
+ charset_table = new_table;
+ }
+ id = charset_table_used++;
+ new_definition_p = 1;
+ }
- /* Update table emacs_code_class. */
- emacs_code_class[charset] = (bytes == 2
- ? EMACS_leading_code_2
- : (bytes == 3
- ? EMACS_leading_code_3
- : EMACS_leading_code_4));
+ ASET (attrs, charset_id, make_number (id));
+ charset.id = id;
+ charset_table[id] = charset;
+
+ if (charset.iso_final >= 0)
+ {
+ ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
+ charset.iso_final) = id;
+ if (new_definition_p)
+ Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
+ Fcons (make_number (id), Qnil));
+ if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
+ charset_jisx0201_roman = id;
+ else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
+ charset_jisx0208_1978 = id;
+ else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
+ charset_jisx0208 = id;
+ }
+
+ if (charset.emacs_mule_id >= 0)
+ {
+ emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
+ if (charset.emacs_mule_id < 0xA0)
+ emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
+ else
+ emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
+ if (new_definition_p)
+ Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
+ Fcons (make_number (id), Qnil));
}
- /* Update table iso_charset_table. */
- if (XINT (iso_final_char) >= 0
- && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
- ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
+ if (new_definition_p)
+ {
+ Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
+ Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
+ Fcons (make_number (id), Qnil));
+ charset_ordered_list_tick++;
+ }
+
+ return Qnil;
}
-#ifdef emacs
-/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
- is invalid. */
-int
-get_charset_id (charset_symbol)
- Lisp_Object charset_symbol;
+/* Same as Fdefine_charset_internal but arguments are more convenient
+ to call from C (typically in syms_of_charset). This can define a
+ charset of `offset' method only. Return the ID of the new
+ charset. */
+
+static int
+define_charset_internal (name, dimension, code_space, min_code, max_code,
+ iso_final, iso_revision, emacs_mule_id,
+ ascii_compatible, supplementary,
+ code_offset)
+ Lisp_Object name;
+ int dimension;
+ unsigned char *code_space;
+ unsigned min_code, max_code;
+ int iso_final, iso_revision, emacs_mule_id;
+ int ascii_compatible, supplementary;
+ int code_offset;
{
+ Lisp_Object args[charset_arg_max];
+ Lisp_Object plist[14];
Lisp_Object val;
- int charset;
-
- /* This originally used a ?: operator, but reportedly the HP-UX
- compiler version HP92453-01 A.10.32.22 miscompiles that. */
- if (SYMBOLP (charset_symbol)
- && VECTORP (val = Fget (charset_symbol, Qcharset))
- && CHARSET_VALID_P (charset =
- XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
- return charset;
- else
- return -1;
+ int i;
+
+ args[charset_arg_name] = name;
+ args[charset_arg_dimension] = make_number (dimension);
+ val = Fmake_vector (make_number (8), make_number (0));
+ for (i = 0; i < 8; i++)
+ ASET (val, i, make_number (code_space[i]));
+ args[charset_arg_code_space] = val;
+ args[charset_arg_min_code] = make_number (min_code);
+ args[charset_arg_max_code] = make_number (max_code);
+ args[charset_arg_iso_final]
+ = (iso_final < 0 ? Qnil : make_number (iso_final));
+ args[charset_arg_iso_revision] = make_number (iso_revision);
+ args[charset_arg_emacs_mule_id]
+ = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
+ args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
+ args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
+ args[charset_arg_invalid_code] = Qnil;
+ args[charset_arg_code_offset] = make_number (code_offset);
+ args[charset_arg_map] = Qnil;
+ args[charset_arg_subset] = Qnil;
+ args[charset_arg_superset] = Qnil;
+ args[charset_arg_unify_map] = Qnil;
+
+ plist[0] = intern (":name");
+ plist[1] = args[charset_arg_name];
+ plist[2] = intern (":dimension");
+ plist[3] = args[charset_arg_dimension];
+ plist[4] = intern (":code-space");
+ plist[5] = args[charset_arg_code_space];
+ plist[6] = intern (":iso-final-char");
+ plist[7] = args[charset_arg_iso_final];
+ plist[8] = intern (":emacs-mule-id");
+ plist[9] = args[charset_arg_emacs_mule_id];
+ plist[10] = intern (":ascii-compatible-p");
+ plist[11] = args[charset_arg_ascii_compatible_p];
+ plist[12] = intern (":code-offset");
+ plist[13] = args[charset_arg_code_offset];
+
+ args[charset_arg_plist] = Flist (14, plist);
+ Fdefine_charset_internal (charset_arg_max, args);
+
+ return XINT (CHARSET_SYMBOL_ID (name));
}
-/* Return an identification number for a new private charset of
- DIMENSION and WIDTH. If there's no more room for the new charset,
- return 0. */
-Lisp_Object
-get_new_private_charset_id (dimension, width)
- int dimension, width;
+
+DEFUN ("define-charset-alias", Fdefine_charset_alias,
+ Sdefine_charset_alias, 2, 2, 0,
+ doc: /* Define ALIAS as an alias for charset CHARSET. */)
+ (alias, charset)
+ Lisp_Object alias, charset;
{
- int charset, from, to;
+ Lisp_Object attr;
- if (dimension == 1)
- {
- from = LEADING_CODE_EXT_11;
- to = LEADING_CODE_EXT_21;
- }
- else
+ CHECK_CHARSET_GET_ATTR (charset, attr);
+ Fputhash (alias, attr, Vcharset_hash_table);
+ Vcharset_list = Fcons (alias, Vcharset_list);
+ return Qnil;
+}
+
+
+DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
+ doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
+ ()
+{
+ return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
+}
+
+
+DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
+ 1, 1, 0,
+ doc: /* Set the unibyte charset to CHARSET.
+This determines how unibyte/multibyte conversion is done. See also
+function `unibyte-charset'. */)
+ (charset)
+ Lisp_Object charset;
+{
+ struct charset *cs;
+ int i, c;
+
+ CHECK_CHARSET_GET_CHARSET (charset, cs);
+ if (! cs->ascii_compatible_p
+ || cs->dimension != 1)
+ error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
+ charset_unibyte = cs->id;
+ memset (unibyte_has_multibyte_table, 1, 128);
+ for (i = 128; i < 256; i++)
{
- from = LEADING_CODE_EXT_21;
- to = LEADING_CODE_EXT_MAX + 1;
+ c = DECODE_CHAR (cs, i);
+ unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
+ unibyte_has_multibyte_table[i] = c >= 0;
}
- for (charset = from; charset < to; charset++)
- if (!CHARSET_DEFINED_P (charset)) break;
+ return Qnil;
+}
- return make_number (charset < to ? charset : 0);
+
+DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
+ doc: /* Return the property list of CHARSET. */)
+ (charset)
+ Lisp_Object charset;
+{
+ Lisp_Object attrs;
+
+ CHECK_CHARSET_GET_ATTR (charset, attrs);
+ return CHARSET_ATTR_PLIST (attrs);
}
-DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
- doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
-If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
- treated as a private charset.
-INFO-VECTOR is a vector of the format:
- [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
- SHORT-NAME LONG-NAME DESCRIPTION]
-The meanings of each elements is as follows:
-DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
-CHARS (integer) is the number of characters in a dimension: 94 or 96.
-WIDTH (integer) is the number of columns a character in the charset
-occupies on the screen: one of 0, 1, and 2.
-
-DIRECTION (integer) is the rendering direction of characters in the
-charset when rendering. If 0, render from left to right, else
-render from right to left.
-
-ISO-FINAL-CHAR (character) is the final character of the
-corresponding ISO 2022 charset.
-It may be -1 if the charset is internal use only.
-
-ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
-while encoding to variants of ISO 2022 coding system, one of the
-following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
-It may be -1 if the charset is internal use only.
-
-SHORT-NAME (string) is the short name to refer to the charset.
-
-LONG-NAME (string) is the long name to refer to the charset.
-
-DESCRIPTION (string) is the description string of the charset. */)
- (charset_id, charset_symbol, info_vector)
- Lisp_Object charset_id, charset_symbol, info_vector;
+
+DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
+ doc: /* Set CHARSET's property list to PLIST. */)
+ (charset, plist)
+ Lisp_Object charset, plist;
{
- Lisp_Object *vec;
-
- if (!NILP (charset_id))
- CHECK_NUMBER (charset_id);
- CHECK_SYMBOL (charset_symbol);
- CHECK_VECTOR (info_vector);
-
- if (! NILP (charset_id))
- {
- if (! CHARSET_VALID_P (XINT (charset_id)))
- error ("Invalid CHARSET: %d", XINT (charset_id));
- else if (CHARSET_DEFINED_P (XINT (charset_id)))
- error ("Already defined charset: %d", XINT (charset_id));
- }
-
- vec = XVECTOR (info_vector)->contents;
- if (XVECTOR (info_vector)->size != 9
- || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
- || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
- || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
- || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
- || !INTEGERP (vec[4])
- || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
- || !INTEGERP (vec[5])
- || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
- || !STRINGP (vec[6])
- || !STRINGP (vec[7])
- || !STRINGP (vec[8]))
- error ("Invalid info-vector argument for defining charset %s",
- SDATA (SYMBOL_NAME (charset_symbol)));
-
- if (NILP (charset_id))
- {
- charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
- if (XINT (charset_id) == 0)
- error ("There's no room for a new private charset %s",
- SDATA (SYMBOL_NAME (charset_symbol)));
- }
-
- update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
- vec[4], vec[5], vec[6], vec[7], vec[8]);
- Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
- CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
- Vcharset_list = Fcons (charset_symbol, Vcharset_list);
- Fupdate_coding_systems_internal ();
- return Qnil;
+ Lisp_Object attrs;
+
+ CHECK_CHARSET_GET_ATTR (charset, attrs);
+ CHARSET_ATTR_PLIST (attrs) = plist;
+ return plist;
}
-DEFUN ("generic-character-list", Fgeneric_character_list,
- Sgeneric_character_list, 0, 0, 0,
- doc: /* Return a list of all possible generic characters.
-It includes a generic character for a charset not yet defined. */)
- ()
+
+DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
+ doc: /* Unify characters of CHARSET with Unicode.
+This means reading the relevant file and installing the table defined
+by CHARSET's `:unify-map' property.
+
+Optional second arg UNIFY-MAP is a file name string or a vector. It has
+the same meaning as the `:unify-map' attribute in the function
+`define-charset' (which see).
+
+Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
+ (charset, unify_map, deunify)
+ Lisp_Object charset, unify_map, deunify;
{
- return Vgeneric_character_list;
+ int id;
+ struct charset *cs;
+
+ CHECK_CHARSET_GET_ID (charset, id);
+ cs = CHARSET_FROM_ID (id);
+ if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
+ load_charset (cs);
+ if (NILP (deunify)
+ ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
+ : ! CHARSET_UNIFIED_P (cs))
+ return Qnil;
+
+ CHARSET_UNIFIED_P (cs) = 0;
+ if (NILP (deunify))
+ {
+ if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
+ error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
+ if (NILP (unify_map))
+ unify_map = CHARSET_UNIFY_MAP (cs);
+ if (STRINGP (unify_map))
+ load_charset_map_from_file (cs, unify_map, 2);
+ else if (VECTORP (unify_map))
+ load_charset_map_from_vector (cs, unify_map, 2);
+ else if (NILP (unify_map))
+ error ("No unify-map for charset");
+ else
+ error ("Bad unify-map arg");
+ CHARSET_UNIFIED_P (cs) = 1;
+ }
+ else if (CHAR_TABLE_P (Vchar_unify_table))
+ {
+ int min_code = CHARSET_MIN_CODE (cs);
+ int max_code = CHARSET_MAX_CODE (cs);
+ int min_char = DECODE_CHAR (cs, min_code);
+ int max_char = DECODE_CHAR (cs, max_code);
+
+ char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
+ }
+
+ return Qnil;
}
DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
Sget_unused_iso_final_char, 2, 2, 0,
- doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
+ doc: /*
+Return an unused ISO final char for a charset of DIMENISION and CHARS.
DIMENSION is the number of bytes to represent a character: 1 or 2.
CHARS is the number of characters in a dimension: 94 or 96.
@@ -749,20 +1276,33 @@ return nil. */)
CHECK_NUMBER (dimension);
CHECK_NUMBER (chars);
- if (XINT (dimension) != 1 && XINT (dimension) != 2)
- error ("Invalid charset dimension %d, it should be 1 or 2",
- XINT (dimension));
+ if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
+ args_out_of_range_3 (dimension, make_number (1), make_number (3));
if (XINT (chars) != 94 && XINT (chars) != 96)
- error ("Invalid charset chars %d, it should be 94 or 96",
- XINT (chars));
+ args_out_of_range_3 (chars, make_number (94), make_number (96));
for (final_char = '0'; final_char <= '?'; final_char++)
- {
- if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
- break;
- }
+ if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
+ break;
return (final_char <= '?' ? make_number (final_char) : Qnil);
}
+static void
+check_iso_charset_parameter (dimension, chars, final_char)
+ Lisp_Object dimension, chars, final_char;
+{
+ CHECK_NATNUM (dimension);
+ CHECK_NATNUM (chars);
+ CHECK_NATNUM (final_char);
+
+ if (XINT (dimension) > 3)
+ error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
+ if (XINT (chars) != 94 && XINT (chars) != 96)
+ error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
+ if (XINT (final_char) < '0' || XINT (final_char) > '~')
+ error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
+}
+
+
DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
4, 4, 0,
doc: /* Declare an equivalent charset for ISO-2022 decoding.
@@ -773,104 +1313,96 @@ if CHARSET is designated instead. */)
(dimension, chars, final_char, charset)
Lisp_Object dimension, chars, final_char, charset;
{
- int charset_id;
-
- CHECK_NUMBER (dimension);
- CHECK_NUMBER (chars);
- CHECK_NUMBER (final_char);
- CHECK_SYMBOL (charset);
-
- if (XINT (dimension) != 1 && XINT (dimension) != 2)
- error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
- if (XINT (chars) != 94 && XINT (chars) != 96)
- error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
- if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
- error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
- if ((charset_id = get_charset_id (charset)) < 0)
- error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
+ int id;
+ int chars_flag;
- ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
+ CHECK_CHARSET_GET_ID (charset, id);
+ check_iso_charset_parameter (dimension, chars, final_char);
+ chars_flag = XINT (chars) == 96;
+ ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
return Qnil;
}
+
/* Return information about charsets in the text at PTR of NBYTES
bytes, which are NCHARS characters. The value is:
0: Each character is represented by one byte. This is always
- true for unibyte text.
- 1: No charsets other than ascii eight-bit-control,
- eight-bit-graphic, and latin-1 are found.
- 2: Otherwise.
+ true for a unibyte string. For a multibyte string, true if
+ it contains only ASCII characters.
- In addition, if CHARSETS is nonzero, for each found charset N, set
- CHARSETS[N] to 1. For that, callers should allocate CHARSETS
- (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
- table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
- 1 (note that there's no charset whose ID is 1). */
+ 1: No charsets other than ascii, control-1, and latin-1 are
+ found.
+
+ 2: Otherwise.
+*/
int
-find_charset_in_text (ptr, nchars, nbytes, charsets, table)
- const unsigned char *ptr;
- int nchars, nbytes, *charsets;
- Lisp_Object table;
+string_xstring_p (string)
+ Lisp_Object string;
{
- if (nchars == nbytes)
- {
- if (charsets && nbytes > 0)
- {
- const unsigned char *endp = ptr + nbytes;
- int maskbits = 0;
+ const unsigned char *p = SDATA (string);
+ const unsigned char *endp = p + SBYTES (string);
- while (ptr < endp && maskbits != 7)
- {
- maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
- ptr++;
- }
+ if (SCHARS (string) == SBYTES (string))
+ return 0;
- if (maskbits & 1)
- charsets[CHARSET_ASCII] = 1;
- if (maskbits & 2)
- charsets[CHARSET_8_BIT_CONTROL] = 1;
- if (maskbits & 4)
- charsets[CHARSET_8_BIT_GRAPHIC] = 1;
- }
- return 0;
- }
- else
+ while (p < endp)
{
- int return_val = 1;
- int bytes, charset, c1, c2;
+ int c = STRING_CHAR_ADVANCE (p);
- if (! CHAR_TABLE_P (table))
- table = Qnil;
+ if (c >= 0x100)
+ return 2;
+ }
+ return 1;
+}
- while (nchars-- > 0)
- {
- SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
- ptr += bytes;
- if (!CHARSET_DEFINED_P (charset))
- charset = 1;
- else if (! NILP (table))
- {
- int c = translate_char (table, -1, charset, c1, c2);
- if (c >= 0)
- charset = CHAR_CHARSET (c);
- }
+/* Find charsets in the string at PTR of NCHARS and NBYTES.
- if (return_val == 1
- && charset != CHARSET_ASCII
- && charset != CHARSET_8_BIT_CONTROL
- && charset != CHARSET_8_BIT_GRAPHIC
- && charset != charset_latin_iso8859_1)
- return_val = 2;
+ CHARSETS is a vector. If Nth element is non-nil, it means the
+ charset whose id is N is already found.
- if (charsets)
- charsets[charset] = 1;
- else if (return_val == 2)
- break;
+ It may lookup a translation table TABLE if supplied. */
+
+static void
+find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
+ const unsigned char *ptr;
+ EMACS_INT nchars, nbytes;
+ Lisp_Object charsets, table;
+ int multibyte;
+{
+ const unsigned char *pend = ptr + nbytes;
+
+ if (nchars == nbytes)
+ {
+ if (multibyte)
+ ASET (charsets, charset_ascii, Qt);
+ else
+ while (ptr < pend)
+ {
+ int c = *ptr++;
+
+ if (!NILP (table))
+ c = translate_char (table, c);
+ if (ASCII_BYTE_P (c))
+ ASET (charsets, charset_ascii, Qt);
+ else
+ ASET (charsets, charset_eight_bit, Qt);
+ }
+ }
+ else
+ {
+ while (ptr < pend)
+ {
+ int c = STRING_CHAR_ADVANCE (ptr);
+ struct charset *charset;
+
+ if (!NILP (table))
+ c = translate_char (table, c);
+ charset = CHAR_CHARSET (c);
+ ASET (charsets, CHARSET_ID (charset), Qt);
}
- return return_val;
}
}
@@ -880,17 +1412,16 @@ DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
BEG and END are buffer positions.
Optional arg TABLE if non-nil is a translation table to look up.
-If the region contains invalid multibyte characters,
-`unknown' is included in the returned list.
-
If the current buffer is unibyte, the returned list may contain
only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
(beg, end, table)
Lisp_Object beg, end, table;
{
- int charsets[MAX_CHARSET + 1];
- int from, from_byte, to, stop, stop_byte, i;
+ Lisp_Object charsets;
+ EMACS_INT from, from_byte, to, stop, stop_byte;
+ int i;
Lisp_Object val;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
validate_region (&beg, &end);
from = XFASTINT (beg);
@@ -906,11 +1437,12 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
from_byte = CHAR_TO_BYTE (from);
- bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
+ charsets = Fmake_vector (make_number (charset_table_used), Qnil);
while (1)
{
- find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
- stop_byte - from_byte, charsets, table);
+ find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
+ stop_byte - from_byte, charsets, table,
+ multibyte);
if (stop < to)
{
from = stop, from_byte = stop_byte;
@@ -921,13 +1453,9 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
}
val = Qnil;
- if (charsets[1])
- val = Fcons (Qunknown, val);
- for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
- if (charsets[i])
- val = Fcons (CHARSET_SYMBOL (i), val);
- if (charsets[0])
- val = Fcons (Qascii, val);
+ for (i = charset_table_used - 1; i >= 0; i--)
+ if (!NILP (AREF (charsets, i)))
+ val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
@@ -936,850 +1464,607 @@ DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
doc: /* Return a list of charsets in STR.
Optional arg TABLE if non-nil is a translation table to look up.
-If the string contains invalid multibyte characters,
-`unknown' is included in the returned list.
-
If STR is unibyte, the returned list may contain
-only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
+only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
(str, table)
Lisp_Object str, table;
{
- int charsets[MAX_CHARSET + 1];
+ Lisp_Object charsets;
int i;
Lisp_Object val;
CHECK_STRING (str);
- bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
- find_charset_in_text (SDATA (str), SCHARS (str),
- SBYTES (str), charsets, table);
-
+ charsets = Fmake_vector (make_number (charset_table_used), Qnil);
+ find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
+ charsets, table,
+ STRING_MULTIBYTE (str));
val = Qnil;
- if (charsets[1])
- val = Fcons (Qunknown, val);
- for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
- if (charsets[i])
- val = Fcons (CHARSET_SYMBOL (i), val);
- if (charsets[0])
- val = Fcons (Qascii, val);
+ for (i = charset_table_used - 1; i >= 0; i--)
+ if (!NILP (AREF (charsets, i)))
+ val = Fcons (CHARSET_NAME (charset_table + i), val);
return val;
}
-DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
- doc: /* Return a character made from arguments.
-Internal use only. */)
- (charset, code1, code2)
- Lisp_Object charset, code1, code2;
+
+/* Return a character correponding to the code-point CODE of
+ CHARSET. */
+
+int
+decode_char (charset, code)
+ struct charset *charset;
+ unsigned code;
{
- int charset_id, c1, c2;
+ int c, char_index;
+ enum charset_method method = CHARSET_METHOD (charset);
- CHECK_NUMBER (charset);
- charset_id = XINT (charset);
- if (!CHARSET_DEFINED_P (charset_id))
- error ("Invalid charset ID: %d", XINT (charset));
+ if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
+ return -1;
- if (NILP (code1))
- c1 = 0;
- else
+ if (method == CHARSET_METHOD_MAP_DEFERRED)
{
- CHECK_NUMBER (code1);
- c1 = XINT (code1);
- }
- if (NILP (code2))
- c2 = 0;
- else
- {
- CHECK_NUMBER (code2);
- c2 = XINT (code2);
+ load_charset (charset);
+ method = CHARSET_METHOD (charset);
}
- if (charset_id == CHARSET_ASCII)
+ if (method == CHARSET_METHOD_SUBSET)
{
- if (c1 < 0 || c1 > 0x7F)
- goto invalid_code_posints;
- return make_number (c1);
+ Lisp_Object subset_info;
+
+ subset_info = CHARSET_SUBSET (charset);
+ charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ code -= XINT (AREF (subset_info, 3));
+ if (code < XFASTINT (AREF (subset_info, 1))
+ || code > XFASTINT (AREF (subset_info, 2)))
+ c = -1;
+ else
+ c = DECODE_CHAR (charset, code);
}
- else if (charset_id == CHARSET_8_BIT_CONTROL)
+ else if (method == CHARSET_METHOD_SUPERSET)
{
- if (NILP (code1))
- c1 = 0x80;
- else if (c1 < 0x80 || c1 > 0x9F)
- goto invalid_code_posints;
- return make_number (c1);
+ Lisp_Object parents;
+
+ parents = CHARSET_SUPERSET (charset);
+ c = -1;
+ for (; CONSP (parents); parents = XCDR (parents))
+ {
+ int id = XINT (XCAR (XCAR (parents)));
+ int code_offset = XINT (XCDR (XCAR (parents)));
+ unsigned this_code = code - code_offset;
+
+ charset = CHARSET_FROM_ID (id);
+ if ((c = DECODE_CHAR (charset, this_code)) >= 0)
+ break;
+ }
}
- else if (charset_id == CHARSET_8_BIT_GRAPHIC)
+ else
{
- if (NILP (code1))
- c1 = 0xA0;
- else if (c1 < 0xA0 || c1 > 0xFF)
- goto invalid_code_posints;
- return make_number (c1);
- }
- else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
- goto invalid_code_posints;
- c1 &= 0x7F;
- c2 &= 0x7F;
- if (c1 == 0
- ? c2 != 0
- : (c2 == 0
- ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
- : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
- goto invalid_code_posints;
- return make_number (MAKE_CHAR (charset_id, c1, c2));
-
- invalid_code_posints:
- error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
-}
+ char_index = CODE_POINT_TO_INDEX (charset, code);
+ if (char_index < 0)
+ return -1;
-DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
- doc: /* Return list of charset and one or two position-codes of CH.
-If CH is invalid as a character code,
-return a list of symbol `unknown' and CH. */)
- (ch)
- Lisp_Object ch;
-{
- int c, charset, c1, c2;
+ if (method == CHARSET_METHOD_MAP)
+ {
+ Lisp_Object decoder;
- CHECK_NUMBER (ch);
- c = XFASTINT (ch);
- if (!CHAR_VALID_P (c, 1))
- return Fcons (Qunknown, Fcons (ch, Qnil));
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- return (c2 >= 0
- ? Fcons (CHARSET_SYMBOL (charset),
- Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
- : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
-}
+ decoder = CHARSET_DECODER (charset);
+ if (! VECTORP (decoder))
+ return -1;
+ c = XINT (AREF (decoder, char_index));
+ }
+ else
+ {
+ c = char_index + CHARSET_CODE_OFFSET (charset);
+ }
+ }
-DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
- doc: /* Return charset of CH. */)
- (ch)
- Lisp_Object ch;
-{
- CHECK_NUMBER (ch);
+ if (CHARSET_UNIFIED_P (charset)
+ && c >= 0)
+ {
+ MAYBE_UNIFY_CHAR (c);
+ }
- return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
+ return c;
}
-DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
- doc: /* Return charset of a character in the current buffer at position POS.
-If POS is nil, it defauls to the current point.
-If POS is out of range, the value is nil. */)
- (pos)
- Lisp_Object pos;
-{
- Lisp_Object ch;
- int charset;
+/* Variable used temporarily by the macro ENCODE_CHAR. */
+Lisp_Object charset_work;
- ch = Fchar_after (pos);
- if (! INTEGERP (ch))
- return ch;
- charset = CHAR_CHARSET (XINT (ch));
- return CHARSET_SYMBOL (charset);
-}
-
-DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
- doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
+/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
+ CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
+ use CHARSET's strict_max_char instead of max_char. */
-ISO 2022's designation sequence (escape sequence) distinguishes charsets
-by their DIMENSION, CHARS, and FINAL-CHAR,
-where as Emacs distinguishes them by charset symbol.
-See the documentation of the function `charset-info' for the meanings of
-DIMENSION, CHARS, and FINAL-CHAR. */)
- (dimension, chars, final_char)
- Lisp_Object dimension, chars, final_char;
-{
- int charset;
-
- CHECK_NUMBER (dimension);
- CHECK_NUMBER (chars);
- CHECK_NUMBER (final_char);
-
- if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
- return Qnil;
- return CHARSET_SYMBOL (charset);
-}
-
-/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
- generic character. If GENERICP is zero, return nonzero iff C is a
- valid normal character. Do not call this function directly,
- instead use macro CHAR_VALID_P. */
-int
-char_valid_p (c, genericp)
- int c, genericp;
+unsigned
+encode_char (charset, c)
+ struct charset *charset;
+ int c;
{
- int charset, c1, c2;
+ unsigned code;
+ enum charset_method method = CHARSET_METHOD (charset);
- if (c < 0 || c >= MAX_CHAR)
- return 0;
- if (SINGLE_BYTE_CHAR_P (c))
- return 1;
- SPLIT_CHAR (c, charset, c1, c2);
- if (genericp)
+ if (CHARSET_UNIFIED_P (charset))
{
- if (c1)
- {
- if (c2 <= 0) c2 = 0x20;
- }
- else
+ Lisp_Object deunifier, deunified;
+
+ deunifier = CHARSET_DEUNIFIER (charset);
+ if (! CHAR_TABLE_P (deunifier))
{
- if (c2 <= 0) c1 = c2 = 0x20;
+ Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
+ deunifier = CHARSET_DEUNIFIER (charset);
}
+ deunified = CHAR_TABLE_REF (deunifier, c);
+ if (! NILP (deunified))
+ c = XINT (deunified);
}
- return (CHARSET_DEFINED_P (charset)
- && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
-}
-DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
- doc: /* Return t if OBJECT is a valid normal character.
-If optional arg GENERICP is non-nil, also return t if OBJECT is
-a valid generic character. */)
- (object, genericp)
- Lisp_Object object, genericp;
-{
- if (! NATNUMP (object))
- return Qnil;
- return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
-}
+ if (method == CHARSET_METHOD_SUBSET)
+ {
+ Lisp_Object subset_info;
+ struct charset *this_charset;
+
+ subset_info = CHARSET_SUBSET (charset);
+ this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
+ code = ENCODE_CHAR (this_charset, c);
+ if (code == CHARSET_INVALID_CODE (this_charset)
+ || code < XFASTINT (AREF (subset_info, 1))
+ || code > XFASTINT (AREF (subset_info, 2)))
+ return CHARSET_INVALID_CODE (charset);
+ code += XINT (AREF (subset_info, 3));
+ return code;
+ }
-DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
- Sunibyte_char_to_multibyte, 1, 1, 0,
- doc: /* Convert the unibyte character CH to multibyte character.
-The conversion is done based on `nonascii-translation-table' (which see)
- or `nonascii-insert-offset' (which see). */)
- (ch)
- Lisp_Object ch;
-{
- int c;
+ if (method == CHARSET_METHOD_SUPERSET)
+ {
+ Lisp_Object parents;
- CHECK_NUMBER (ch);
- c = XINT (ch);
- if (c < 0 || c >= 0400)
- error ("Invalid unibyte character: %d", c);
- c = unibyte_char_to_multibyte (c);
- if (c < 0)
- error ("Can't convert to multibyte character: %d", XINT (ch));
- return make_number (c);
-}
+ parents = CHARSET_SUPERSET (charset);
+ for (; CONSP (parents); parents = XCDR (parents))
+ {
+ int id = XINT (XCAR (XCAR (parents)));
+ int code_offset = XINT (XCDR (XCAR (parents)));
+ struct charset *this_charset = CHARSET_FROM_ID (id);
-DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
- Smultibyte_char_to_unibyte, 1, 1, 0,
- doc: /* Convert the multibyte character CH to unibyte character.
-The conversion is done based on `nonascii-translation-table' (which see)
- or `nonascii-insert-offset' (which see). */)
- (ch)
- Lisp_Object ch;
-{
- int c;
+ code = ENCODE_CHAR (this_charset, c);
+ if (code != CHARSET_INVALID_CODE (this_charset))
+ return code + code_offset;
+ }
+ return CHARSET_INVALID_CODE (charset);
+ }
- CHECK_NUMBER (ch);
- c = XINT (ch);
- if (! CHAR_VALID_P (c, 0))
- error ("Invalid multibyte character: %d", c);
- c = multibyte_char_to_unibyte (c, Qnil);
- if (c < 0)
- error ("Can't convert to unibyte character: %d", XINT (ch));
- return make_number (c);
-}
+ if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
+ || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
+ return CHARSET_INVALID_CODE (charset);
-DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
- doc: /* Return 1 regardless of the argument CH. */)
- (ch)
- Lisp_Object ch;
-{
- CHECK_NUMBER (ch);
- return make_number (1);
-}
+ if (method == CHARSET_METHOD_MAP_DEFERRED)
+ {
+ load_charset (charset);
+ method = CHARSET_METHOD (charset);
+ }
-/* Return how many bytes C will occupy in a multibyte buffer.
- Don't call this function directly, instead use macro CHAR_BYTES. */
-int
-char_bytes (c)
- int c;
-{
- int charset;
+ if (method == CHARSET_METHOD_MAP)
+ {
+ Lisp_Object encoder;
+ Lisp_Object val;
- if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
- return 1;
- if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
- return 1;
+ encoder = CHARSET_ENCODER (charset);
+ if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
+ return CHARSET_INVALID_CODE (charset);
+ val = CHAR_TABLE_REF (encoder, c);
+ if (NILP (val))
+ return CHARSET_INVALID_CODE (charset);
+ code = XINT (val);
+ if (! CHARSET_COMPACT_CODES_P (charset))
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
+ else /* method == CHARSET_METHOD_OFFSET */
+ {
+ code = c - CHARSET_CODE_OFFSET (charset);
+ code = INDEX_TO_CODE_POINT (charset, code);
+ }
- charset = CHAR_CHARSET (c);
- return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
+ return code;
}
-/* Return the width of character of which multi-byte form starts with
- C. The width is measured by how many columns occupied on the
- screen when displayed in the current buffer. */
-
-#define ONE_BYTE_CHAR_WIDTH(c) \
- (c < 0x20 \
- ? (c == '\t' \
- ? XFASTINT (current_buffer->tab_width) \
- : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
- : (c < 0x7f \
- ? 1 \
- : (c == 0x7F \
- ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
- : ((! NILP (current_buffer->enable_multibyte_characters) \
- && BASE_LEADING_CODE_P (c)) \
- ? WIDTH_BY_CHAR_HEAD (c) \
- : 4))))
-
-DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
- doc: /* Return width of CH when displayed in the current buffer.
-The width is measured by how many columns it occupies on the screen.
-Tab is taken to occupy `tab-width' columns. */)
- (ch)
- Lisp_Object ch;
-{
- Lisp_Object val, disp;
- int c;
- struct Lisp_Char_Table *dp = buffer_display_table ();
- CHECK_NUMBER (ch);
+DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
+ doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
+Return nil if CODE-POINT is not valid in CHARSET.
- c = XINT (ch);
+CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
- /* Get the way the display table would display it. */
- disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
+Optional argument RESTRICTION specifies a way to map the pair of CCS
+and CODE-POINT to a chracter. Currently not supported and just ignored. */)
+ (charset, code_point, restriction)
+ Lisp_Object charset, code_point, restriction;
+{
+ int c, id;
+ unsigned code;
+ struct charset *charsetp;
- if (VECTORP (disp))
- XSETINT (val, XVECTOR (disp)->size);
- else if (SINGLE_BYTE_CHAR_P (c))
- XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
+ CHECK_CHARSET_GET_ID (charset, id);
+ if (CONSP (code_point))
+ {
+ CHECK_NATNUM_CAR (code_point);
+ CHECK_NATNUM_CDR (code_point);
+ code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
+ }
else
{
- int charset = CHAR_CHARSET (c);
-
- XSETFASTINT (val, CHARSET_WIDTH (charset));
+ CHECK_NATNUM (code_point);
+ code = XINT (code_point);
}
- return val;
+ charsetp = CHARSET_FROM_ID (id);
+ c = DECODE_CHAR (charsetp, code);
+ return (c >= 0 ? make_number (c) : Qnil);
}
-/* Return width of string STR of length LEN when displayed in the
- current buffer. The width is measured by how many columns it
- occupies on the screen. */
-int
-strwidth (str, len)
- unsigned char *str;
- int len;
+DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
+ doc: /* Encode the character CH into a code-point of CHARSET.
+Return nil if CHARSET doesn't include CH.
+
+Optional argument RESTRICTION specifies a way to map CHAR to a
+code-point in CCS. Currently not supported and just ignored. */)
+ (ch, charset, restriction)
+ Lisp_Object ch, charset, restriction;
{
- return c_string_width (str, len, -1, NULL, NULL);
+ int id;
+ unsigned code;
+ struct charset *charsetp;
+
+ CHECK_CHARSET_GET_ID (charset, id);
+ CHECK_NATNUM (ch);
+ charsetp = CHARSET_FROM_ID (id);
+ code = ENCODE_CHAR (charsetp, XINT (ch));
+ if (code == CHARSET_INVALID_CODE (charsetp))
+ return Qnil;
+ if (code > 0x7FFFFFF)
+ return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
+ return make_number (code);
}
-/* Return width of string STR of length LEN when displayed in the
- current buffer. The width is measured by how many columns it
- occupies on the screen. If PRECISION > 0, return the width of
- longest substring that doesn't exceed PRECISION, and set number of
- characters and bytes of the substring in *NCHARS and *NBYTES
- respectively. */
-int
-c_string_width (str, len, precision, nchars, nbytes)
- const unsigned char *str;
- int len, precision, *nchars, *nbytes;
+DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
+ doc:
+ /* Return a character of CHARSET whose position codes are CODEn.
+
+CODE1 through CODE4 are optional, but if you don't supply sufficient
+position codes, it is assumed that the minimum code in each dimension
+is specified. */)
+ (charset, code1, code2, code3, code4)
+ Lisp_Object charset, code1, code2, code3, code4;
{
- int i = 0, i_byte = 0;
- int width = 0;
- int chars;
- struct Lisp_Char_Table *dp = buffer_display_table ();
+ int id, dimension;
+ struct charset *charsetp;
+ unsigned code;
+ int c;
- while (i_byte < len)
+ CHECK_CHARSET_GET_ID (charset, id);
+ charsetp = CHARSET_FROM_ID (id);
+
+ dimension = CHARSET_DIMENSION (charsetp);
+ if (NILP (code1))
+ code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
+ ? 0 : CHARSET_MIN_CODE (charsetp));
+ else
{
- int bytes, thiswidth;
- Lisp_Object val;
+ CHECK_NATNUM (code1);
+ if (XFASTINT (code1) >= 0x100)
+ args_out_of_range (make_number (0xFF), code1);
+ code = XFASTINT (code1);
- if (dp)
+ if (dimension > 1)
{
- int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
-
- chars = 1;
- val = DISP_CHAR_VECTOR (dp, c);
- if (VECTORP (val))
- thiswidth = XVECTOR (val)->size;
+ code <<= 8;
+ if (NILP (code2))
+ code |= charsetp->code_space[(dimension - 2) * 4];
else
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
- else
- {
- chars = 1;
- PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
+ {
+ CHECK_NATNUM (code2);
+ if (XFASTINT (code2) >= 0x100)
+ args_out_of_range (make_number (0xFF), code2);
+ code |= XFASTINT (code2);
+ }
- if (precision > 0
- && (width + thiswidth > precision))
- {
- *nchars = i;
- *nbytes = i_byte;
- return width;
+ if (dimension > 2)
+ {
+ code <<= 8;
+ if (NILP (code3))
+ code |= charsetp->code_space[(dimension - 3) * 4];
+ else
+ {
+ CHECK_NATNUM (code3);
+ if (XFASTINT (code3) >= 0x100)
+ args_out_of_range (make_number (0xFF), code3);
+ code |= XFASTINT (code3);
+ }
+
+ if (dimension > 3)
+ {
+ code <<= 8;
+ if (NILP (code4))
+ code |= charsetp->code_space[0];
+ else
+ {
+ CHECK_NATNUM (code4);
+ if (XFASTINT (code4) >= 0x100)
+ args_out_of_range (make_number (0xFF), code4);
+ code |= XFASTINT (code4);
+ }
+ }
+ }
}
- i++;
- i_byte += bytes;
- width += thiswidth;
- }
-
- if (precision > 0)
- {
- *nchars = i;
- *nbytes = i_byte;
}
- return width;
+ if (CHARSET_ISO_FINAL (charsetp) >= 0)
+ code &= 0x7F7F7F7F;
+ c = DECODE_CHAR (charsetp, code);
+ if (c < 0)
+ error ("Invalid code(s)");
+ return make_number (c);
}
-/* Return width of Lisp string STRING when displayed in the current
- buffer. The width is measured by how many columns it occupies on
- the screen while paying attention to compositions. If PRECISION >
- 0, return the width of longest substring that doesn't exceed
- PRECISION, and set number of characters and bytes of the substring
- in *NCHARS and *NBYTES respectively. */
-int
-lisp_string_width (string, precision, nchars, nbytes)
- Lisp_Object string;
- int precision, *nchars, *nbytes;
-{
- int len = SCHARS (string);
- int len_byte = SBYTES (string);
- /* This set multibyte to 0 even if STRING is multibyte when it
- contains only ascii and eight-bit-graphic, but that's
- intentional. */
- int multibyte = len < len_byte;
- const unsigned char *str = SDATA (string);
- int i = 0, i_byte = 0;
- int width = 0;
- struct Lisp_Char_Table *dp = buffer_display_table ();
-
- while (i < len)
- {
- int chars, bytes, thiswidth;
- Lisp_Object val;
- int cmp_id;
- int ignore, end;
+/* Return the first charset in CHARSET_LIST that contains C.
+ CHARSET_LIST is a list of charset IDs. If it is nil, use
+ Vcharset_ordered_list. */
- if (find_composition (i, -1, &ignore, &end, &val, string)
- && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
- >= 0))
- {
- thiswidth = composition_table[cmp_id]->width;
- chars = end - i;
- bytes = string_char_to_byte (string, end) - i_byte;
- }
- else if (dp)
- {
- int c;
+struct charset *
+char_charset (c, charset_list, code_return)
+ int c;
+ Lisp_Object charset_list;
+ unsigned *code_return;
+{
+ if (NILP (charset_list))
+ charset_list = Vcharset_ordered_list;
- if (multibyte)
- c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
- else
- c = str[i_byte], bytes = 1;
- chars = 1;
- val = DISP_CHAR_VECTOR (dp, c);
- if (VECTORP (val))
- thiswidth = XVECTOR (val)->size;
- else
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
- else
- {
- chars = 1;
- if (multibyte)
- PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
- else
- bytes = 1;
- thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
- }
+ while (CONSP (charset_list))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ unsigned code = ENCODE_CHAR (charset, c);
- if (precision > 0
- && (width + thiswidth > precision))
+ if (code != CHARSET_INVALID_CODE (charset))
{
- *nchars = i;
- *nbytes = i_byte;
- return width;
+ if (code_return)
+ *code_return = code;
+ return charset;
}
- i += chars;
- i_byte += bytes;
- width += thiswidth;
- }
-
- if (precision > 0)
- {
- *nchars = i;
- *nbytes = i_byte;
+ charset_list = XCDR (charset_list);
}
-
- return width;
+ return NULL;
}
-DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
- doc: /* Return width of STRING when displayed in the current buffer.
-Width is measured by how many columns it occupies on the screen.
-When calculating width of a multibyte character in STRING,
-only the base leading-code is considered; the validity of
-the following bytes is not checked. Tabs in STRING are always
-taken to occupy `tab-width' columns. */)
- (string)
- Lisp_Object string;
-{
- Lisp_Object val;
-
- CHECK_STRING (string);
- XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
- return val;
-}
-DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
- doc: /* Return the direction of CH.
-The returned value is 0 for left-to-right and 1 for right-to-left. */)
+DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
+ doc:
+ /*Return list of charset and one to four position-codes of CHAR.
+The charset is decided by the current priority order of charsets.
+A position-code is a byte value of each dimension of the code-point of
+CHAR in the charset. */)
(ch)
Lisp_Object ch;
{
- int charset;
+ struct charset *charset;
+ int c, dimension;
+ unsigned code;
+ Lisp_Object val;
- CHECK_NUMBER (ch);
- charset = CHAR_CHARSET (XFASTINT (ch));
- if (!CHARSET_DEFINED_P (charset))
- invalid_character (XINT (ch));
- return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ charset = CHAR_CHARSET (c);
+ if (! charset)
+ abort ();
+ code = ENCODE_CHAR (charset, c);
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ dimension = CHARSET_DIMENSION (charset);
+ for (val = Qnil; dimension > 0; dimension--)
+ {
+ val = Fcons (make_number (code & 0xFF), val);
+ code >>= 8;
+ }
+ return Fcons (CHARSET_NAME (charset), val);
}
-/* Return the number of characters in the NBYTES bytes at PTR.
- This works by looking at the contents and checking for multibyte sequences.
- However, if the current buffer has enable-multibyte-characters = nil,
- we treat each byte as a character. */
-int
-chars_in_text (ptr, nbytes)
- const unsigned char *ptr;
- int nbytes;
+DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
+ doc: /* Return the charset of highest priority that contains CH. */)
+ (ch)
+ Lisp_Object ch;
{
- /* current_buffer is null at early stages of Emacs initialization. */
- if (current_buffer == 0
- || NILP (current_buffer->enable_multibyte_characters))
- return nbytes;
+ struct charset *charset;
- return multibyte_chars_in_text (ptr, nbytes);
+ CHECK_CHARACTER (ch);
+ charset = CHAR_CHARSET (XINT (ch));
+ return (CHARSET_NAME (charset));
}
-/* Return the number of characters in the NBYTES bytes at PTR.
- This works by looking at the contents and checking for multibyte sequences.
- It ignores enable-multibyte-characters. */
-int
-multibyte_chars_in_text (ptr, nbytes)
- const unsigned char *ptr;
- int nbytes;
+DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
+ doc: /*
+Return charset of a character in the current buffer at position POS.
+If POS is nil, it defauls to the current point.
+If POS is out of range, the value is nil. */)
+ (pos)
+ Lisp_Object pos;
{
- const unsigned char *endp;
- int chars, bytes;
+ Lisp_Object ch;
+ struct charset *charset;
- endp = ptr + nbytes;
- chars = 0;
+ ch = Fchar_after (pos);
+ if (! INTEGERP (ch))
+ return ch;
+ charset = CHAR_CHARSET (XINT (ch));
+ return (CHARSET_NAME (charset));
+}
- while (ptr < endp)
- {
- PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
- ptr += bytes;
- chars++;
- }
- return chars;
-}
+DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
+ doc: /*
+Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
-/* Parse unibyte text at STR of LEN bytes as multibyte text, and
- count the numbers of characters and bytes in it. On counting
- bytes, pay attention to the fact that 8-bit characters in the range
- 0x80..0x9F are represented by 2 bytes in multibyte text. */
-void
-parse_str_as_multibyte (str, len, nchars, nbytes)
- const unsigned char *str;
- int len, *nchars, *nbytes;
+ISO 2022's designation sequence (escape sequence) distinguishes charsets
+by their DIMENSION, CHARS, and FINAL-CHAR,
+where as Emacs distinguishes them by charset symbol.
+See the documentation of the function `charset-info' for the meanings of
+DIMENSION, CHARS, and FINAL-CHAR. */)
+ (dimension, chars, final_char)
+ Lisp_Object dimension, chars, final_char;
{
- const unsigned char *endp = str + len;
- int n, chars = 0, bytes = 0;
-
- while (str < endp)
- {
- if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
- str += n, bytes += n;
- else
- str++, bytes += 2;
- chars++;
- }
- *nchars = chars;
- *nbytes = bytes;
- return;
+ int id;
+ int chars_flag;
+
+ check_iso_charset_parameter (dimension, chars, final_char);
+ chars_flag = XFASTINT (chars) == 96;
+ id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
+ XFASTINT (final_char));
+ return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
}
-/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
- It actually converts only 8-bit characters in the range 0x80..0x9F
- that don't contruct multibyte characters to multibyte forms. If
- NCHARS is nonzero, set *NCHARS to the number of characters in the
- text. It is assured that we can use LEN bytes at STR as a work
- area and that is enough. Return the number of bytes of the
- resulting text. */
-int
-str_as_multibyte (str, len, nbytes, nchars)
- unsigned char *str;
- int len, nbytes, *nchars;
+DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
+ 0, 0, 0,
+ doc: /*
+Clear encoder and decoder of charsets that are loaded from mapfiles. */)
+ ()
{
- unsigned char *p = str, *endp = str + nbytes;
- unsigned char *to;
- int chars = 0;
- int n;
-
- while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
- p += n, chars++;
- if (nchars)
- *nchars = chars;
- if (p == endp)
- return nbytes;
-
- to = p;
- nbytes = endp - p;
- endp = str + len;
- safe_bcopy (p, endp - nbytes, nbytes);
- p = endp - nbytes;
- while (p < endp)
+ int i;
+ struct charset *charset;
+ Lisp_Object attrs;
+
+ for (i = 0; i < charset_table_used; i++)
{
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
- {
- while (n--)
- *to++ = *p++;
- }
- else
+ charset = CHARSET_FROM_ID (i);
+ attrs = CHARSET_ATTRIBUTES (charset);
+
+ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
{
- *to++ = LEADING_CODE_8_BIT_CONTROL;
- *to++ = *p++ + 0x20;
+ CHARSET_ATTR_DECODER (attrs) = Qnil;
+ CHARSET_ATTR_ENCODER (attrs) = Qnil;
+ CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
}
- chars++;
- }
- if (nchars)
- *nchars = chars;
- return (to - str);
-}
-/* Parse unibyte string at STR of LEN bytes, and return the number of
- bytes it may ocupy when converted to multibyte string by
- `str_to_multibyte'. */
-
-int
-parse_str_to_multibyte (str, len)
- unsigned char *str;
- int len;
-{
- unsigned char *endp = str + len;
- int bytes;
-
- for (bytes = 0; str < endp; str++)
- bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
- return bytes;
-}
-
-/* Convert unibyte text at STR of NBYTES bytes to multibyte text
- that contains the same single-byte characters. It actually
- converts all 8-bit characters to multibyte forms. It is assured
- that we can use LEN bytes at STR as a work area and that is
- enough. */
+ if (CHARSET_UNIFIED_P (charset))
+ CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
+ }
-int
-str_to_multibyte (str, len, bytes)
- unsigned char *str;
- int len, bytes;
-{
- unsigned char *p = str, *endp = str + bytes;
- unsigned char *to;
-
- while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
- if (p == endp)
- return bytes;
- to = p;
- bytes = endp - p;
- endp = str + len;
- safe_bcopy (p, endp - bytes, bytes);
- p = endp - bytes;
- while (p < endp)
+ if (CHAR_TABLE_P (Vchar_unified_charset_table))
{
- if (*p < 0x80 || *p >= 0xA0)
- *to++ = *p++;
- else
- *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
+ Foptimize_char_table (Vchar_unified_charset_table);
+ Vchar_unify_table = Vchar_unified_charset_table;
+ Vchar_unified_charset_table = Qnil;
}
- return (to - str);
-}
-/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
- actually converts only 8-bit characters in the range 0x80..0x9F to
- unibyte forms. */
+ return Qnil;
+}
-int
-str_as_unibyte (str, bytes)
- unsigned char *str;
- int bytes;
+DEFUN ("charset-priority-list", Fcharset_priority_list,
+ Scharset_priority_list, 0, 1, 0,
+ doc: /* Return the list of charsets ordered by priority.
+HIGHESTP non-nil means just return the highest priority one. */)
+ (highestp)
+ Lisp_Object highestp;
{
- unsigned char *p = str, *endp = str + bytes;
- unsigned char *to = str;
+ Lisp_Object val = Qnil, list = Vcharset_ordered_list;
- while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
- to = p;
- while (p < endp)
+ if (!NILP (highestp))
+ return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
+
+ while (!NILP (list))
{
- if (*p == LEADING_CODE_8_BIT_CONTROL)
- *to++ = *(p + 1) - 0x20, p += 2;
- else
- *to++ = *p++;
+ val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
+ list = XCDR (list);
}
- return (to - str);
+ return Fnreverse (val);
}
-
-DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
- doc: /* Concatenate all the argument characters and make the result a string.
-usage: (string &rest CHARACTERS) */)
- (n, args)
- int n;
+DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
+ 1, MANY, 0,
+ doc: /* Assign higher priority to the charsets given as arguments.
+usage: (set-charset-priority &rest charsets) */)
+ (nargs, args)
+ int nargs;
Lisp_Object *args;
{
- int i, bufsize;
- unsigned char *buf, *p;
- int c;
- int multibyte = 0;
- Lisp_Object ret;
- USE_SAFE_ALLOCA;
-
- bufsize = MAX_MULTIBYTE_LENGTH * n;
- SAFE_ALLOCA (buf, unsigned char *, bufsize);
- p = buf;
+ Lisp_Object new_head, old_list, arglist[2];
+ Lisp_Object list_2022, list_emacs_mule;
+ int i, id;
- for (i = 0; i < n; i++)
+ old_list = Fcopy_sequence (Vcharset_ordered_list);
+ new_head = Qnil;
+ for (i = 0; i < nargs; i++)
{
- CHECK_NUMBER (args[i]);
- if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
- multibyte = 1;
+ CHECK_CHARSET_GET_ID (args[i], id);
+ if (! NILP (Fmemq (make_number (id), old_list)))
+ {
+ old_list = Fdelq (make_number (id), old_list);
+ new_head = Fcons (make_number (id), new_head);
+ }
}
+ arglist[0] = Fnreverse (new_head);
+ arglist[1] = old_list;
+ Vcharset_ordered_list = Fnconc (2, arglist);
+ charset_ordered_list_tick++;
- for (i = 0; i < n; i++)
+ for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
+ CONSP (old_list); old_list = XCDR (old_list))
{
- c = XINT (args[i]);
- if (multibyte)
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
+ if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
+ list_2022 = Fcons (XCAR (old_list), list_2022);
+ if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
+ list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
}
+ Viso_2022_charset_list = Fnreverse (list_2022);
+ Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
- ret = make_string_from_bytes (buf, n, p - buf);
- SAFE_FREE ();
-
- return ret;
+ return Qnil;
}
-#endif /* emacs */
-
-int
-charset_id_internal (charset_name)
- char *charset_name;
+DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
+ 0, 1, 0,
+ doc: /* Internal use only.
+Return charset identification number of CHARSET. */)
+ (charset)
+ Lisp_Object charset;
{
- Lisp_Object val;
-
- val= Fget (intern (charset_name), Qcharset);
- if (!VECTORP (val))
- error ("Charset %s is not defined", charset_name);
+ int id;
- return (XINT (XVECTOR (val)->contents[0]));
+ CHECK_CHARSET_GET_ID (charset, id);
+ return make_number (id);
}
-DEFUN ("setup-special-charsets", Fsetup_special_charsets,
- Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
- ()
+
+void
+init_charset ()
{
- charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
- charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
- charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
- charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
- charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
- charset_big5_1 = charset_id_internal ("chinese-big5-1");
- charset_big5_2 = charset_id_internal ("chinese-big5-2");
- charset_mule_unicode_0100_24ff
- = charset_id_internal ("mule-unicode-0100-24ff");
- charset_mule_unicode_2500_33ff
- = charset_id_internal ("mule-unicode-2500-33ff");
- charset_mule_unicode_e000_ffff
- = charset_id_internal ("mule-unicode-e000-ffff");
- return Qnil;
+ Vcharset_map_path
+ = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
+ Qnil);
}
+
void
init_charset_once ()
{
int i, j, k;
- staticpro (&Vcharset_table);
- staticpro (&Vcharset_symbol_table);
- staticpro (&Vgeneric_character_list);
-
- /* This has to be done here, before we call Fmake_char_table. */
- Qcharset_table = intern ("charset-table");
- staticpro (&Qcharset_table);
-
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
-
- /* Now we are ready to set up this property, so we can
- create the charset table. */
- Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
- Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
-
- Qunknown = intern ("unknown");
- staticpro (&Qunknown);
- Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
- Qunknown);
-
- /* Setup tables. */
- for (i = 0; i < 2; i++)
- for (j = 0; j < 2; j++)
- for (k = 0; k < 128; k++)
- iso_charset_table [i][j][k] = -1;
+ for (i = 0; i < ISO_MAX_DIMENSION; i++)
+ for (j = 0; j < ISO_MAX_CHARS; j++)
+ for (k = 0; k < ISO_MAX_FINAL; k++)
+ iso_charset_table[i][j][k] = -1;
for (i = 0; i < 256; i++)
- bytes_by_char_head[i] = 1;
- bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
- bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
- bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
- bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
+ emacs_mule_charset[i] = NULL;
+
+ charset_jisx0201_roman = -1;
+ charset_jisx0208_1978 = -1;
+ charset_jisx0208 = -1;
for (i = 0; i < 128; i++)
- width_by_char_head[i] = 1;
+ unibyte_to_multibyte_table[i] = i;
for (; i < 256; i++)
- width_by_char_head[i] = 4;
- width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
- width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
- width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
- width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
-
- {
- Lisp_Object val;
-
- val = Qnil;
- for (i = 0x81; i < 0x90; i++)
- val = Fcons (make_number ((i - 0x70) << 7), val);
- for (; i < 0x9A; i++)
- val = Fcons (make_number ((i - 0x8F) << 14), val);
- for (i = 0xA0; i < 0xF0; i++)
- val = Fcons (make_number ((i - 0x70) << 7), val);
- for (; i < 0xFF; i++)
- val = Fcons (make_number ((i - 0xE0) << 14), val);
- Vgeneric_character_list = Fnreverse (val);
- }
-
- nonascii_insert_offset = 0;
- Vnonascii_translation_table = Qnil;
+ unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
}
#ifdef emacs
@@ -1787,140 +2072,87 @@ init_charset_once ()
void
syms_of_charset ()
{
- Qcharset = intern ("charset");
- staticpro (&Qcharset);
-
- Qascii = intern ("ascii");
- staticpro (&Qascii);
-
- Qeight_bit_control = intern ("eight-bit-control");
- staticpro (&Qeight_bit_control);
-
- Qeight_bit_graphic = intern ("eight-bit-graphic");
- staticpro (&Qeight_bit_graphic);
-
- /* Define special charsets ascii, eight-bit-control, and
- eight-bit-graphic. */
- update_charset_table (make_number (CHARSET_ASCII),
- make_number (1), make_number (94),
- make_number (1),
- make_number (0),
- make_number ('B'),
- make_number (0),
- build_string ("ASCII"),
- Qnil, /* same as above */
- build_string ("ASCII (ISO646 IRV)"));
- CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
- Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
-
- update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
- make_number (1), make_number (96),
- make_number (4),
- make_number (0),
- make_number (-1),
- make_number (-1),
- build_string ("8-bit control code (0x80..0x9F)"),
- Qnil, /* same as above */
- Qnil); /* same as above */
- CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
- Fput (Qeight_bit_control, Qcharset,
- CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
-
- update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
- make_number (1), make_number (96),
- make_number (4),
- make_number (0),
- make_number (-1),
- make_number (-1),
- build_string ("8-bit graphic char (0xA0..0xFF)"),
- Qnil, /* same as above */
- Qnil); /* same as above */
- CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
- Fput (Qeight_bit_graphic, Qcharset,
- CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
-
- Qauto_fill_chars = intern ("auto-fill-chars");
- staticpro (&Qauto_fill_chars);
- Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
-
- defsubr (&Sdefine_charset);
- defsubr (&Sgeneric_character_list);
+ DEFSYM (Qcharsetp, "charsetp");
+
+ DEFSYM (Qascii, "ascii");
+ DEFSYM (Qunicode, "unicode");
+ DEFSYM (Qeight_bit, "eight-bit");
+ DEFSYM (Qiso_8859_1, "iso-8859-1");
+
+ DEFSYM (Qgl, "gl");
+ DEFSYM (Qgr, "gr");
+
+ staticpro (&Vcharset_ordered_list);
+ Vcharset_ordered_list = Qnil;
+
+ staticpro (&Viso_2022_charset_list);
+ Viso_2022_charset_list = Qnil;
+
+ staticpro (&Vemacs_mule_charset_list);
+ Vemacs_mule_charset_list = Qnil;
+
+ staticpro (&Vcharset_hash_table);
+ {
+ Lisp_Object args[2];
+ args[0] = QCtest;
+ args[1] = Qeq;
+ Vcharset_hash_table = Fmake_hash_table (2, args);
+ }
+
+ charset_table_size = 128;
+ charset_table = ((struct charset *)
+ xmalloc (sizeof (struct charset) * charset_table_size));
+ charset_table_used = 0;
+
+ staticpro (&Vchar_unified_charset_table);
+ Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
+
+ defsubr (&Scharsetp);
+ defsubr (&Smap_charset_chars);
+ defsubr (&Sdefine_charset_internal);
+ defsubr (&Sdefine_charset_alias);
+ defsubr (&Sunibyte_charset);
+ defsubr (&Sset_unibyte_charset);
+ defsubr (&Scharset_plist);
+ defsubr (&Sset_charset_plist);
+ defsubr (&Sunify_charset);
defsubr (&Sget_unused_iso_final_char);
defsubr (&Sdeclare_equiv_charset);
defsubr (&Sfind_charset_region);
defsubr (&Sfind_charset_string);
- defsubr (&Smake_char_internal);
+ defsubr (&Sdecode_char);
+ defsubr (&Sencode_char);
defsubr (&Ssplit_char);
+ defsubr (&Smake_char);
defsubr (&Schar_charset);
defsubr (&Scharset_after);
defsubr (&Siso_charset);
- defsubr (&Schar_valid_p);
- defsubr (&Sunibyte_char_to_multibyte);
- defsubr (&Smultibyte_char_to_unibyte);
- defsubr (&Schar_bytes);
- defsubr (&Schar_width);
- defsubr (&Sstring_width);
- defsubr (&Schar_direction);
- defsubr (&Sstring);
- defsubr (&Ssetup_special_charsets);
+ defsubr (&Sclear_charset_maps);
+ defsubr (&Scharset_priority_list);
+ defsubr (&Sset_charset_priority);
+ defsubr (&Scharset_id_internal);
+
+ DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
+ doc: /* *Lisp of directories to search for charset map files. */);
+ Vcharset_map_path = Qnil;
DEFVAR_LISP ("charset-list", &Vcharset_list,
- doc: /* List of charsets ever defined. */);
- Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
- Fcons (Qeight_bit_graphic, Qnil)));
-
- DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
- doc: /* Vector of cons cell of a symbol and translation table ever defined.
-An ID of a translation table is an index of this vector. */);
- Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
-
- DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
- doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
- leading_code_private_11 = LEADING_CODE_PRIVATE_11;
-
- DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
- doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
- leading_code_private_12 = LEADING_CODE_PRIVATE_12;
-
- DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
- doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
- leading_code_private_21 = LEADING_CODE_PRIVATE_21;
-
- DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
- doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
- leading_code_private_22 = LEADING_CODE_PRIVATE_22;
-
- DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
- doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
-This is used for converting unibyte text to multibyte,
-and for inserting character codes specified by number.
-
-This serves to convert a Latin-1 or similar 8-bit character code
-to the corresponding Emacs multibyte character code.
-Typically the value should be (- (make-char CHARSET 0) 128),
-for your choice of character set.
-If `nonascii-translation-table' is non-nil, it overrides this variable. */);
- nonascii_insert_offset = 0;
-
- DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
- doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
-This is used for converting unibyte text to multibyte,
-and for inserting character codes specified by number.
-
-Conversion is performed only when multibyte characters are enabled,
-and it serves to convert a Latin-1 or similar 8-bit character code
-to the corresponding Emacs character code.
-
-If this is nil, `nonascii-insert-offset' is used instead.
-See also the docstring of `make-translation-table'. */);
- Vnonascii_translation_table = Qnil;
-
- DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
- doc: /* A char-table for characters which invoke auto-filling.
-Such characters have value t in this table. */);
- Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
- CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
- CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
+ doc: /* List of all charsets ever defined. */);
+ Vcharset_list = Qnil;
+
+ charset_ascii
+ = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
+ 0, 127, 'B', -1, 0, 1, 0, 0);
+ charset_iso_8859_1
+ = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
+ 0, 255, -1, -1, -1, 1, 0, 0);
+ charset_unicode
+ = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
+ 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
+ charset_eight_bit
+ = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
+ 128, 255, -1, 0, -1, 0, 0,
+ MAX_5_BYTE_CHAR + 1);
}
#endif /* emacs */
diff --git a/src/charset.h b/src/charset.h
index b7ab4cb8b53..25ccaf9d324 100644
--- a/src/charset.h
+++ b/src/charset.h
@@ -1,9 +1,12 @@
-/* Header for multibyte character handler.
+/* Header for charset handler.
Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006 Free Software Foundation, Inc.
+ 2006 Free Software Foundation, Inc.
Copyright (C) 1995, 1997, 1998, 2003
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,854 +28,523 @@ Boston, MA 02110-1301, USA. */
#ifndef EMACS_CHARSET_H
#define EMACS_CHARSET_H
-/* #define BYTE_COMBINING_DEBUG */
-
-/*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
-
- A character set ("charset" hereafter) is a meaningful collection
- (i.e. language, culture, functionality, etc) of characters. Emacs
- handles multiple charsets at once. Each charset corresponds to one
- of the ISO charsets. Emacs identifies a charset by a unique
- identification number, whereas ISO identifies a charset by a triplet
- of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying
- "charset" means an identification number (integer value).
-
- The value range of charsets is 0x00, 0x81..0xFE. There are four
- kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
- 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
- characters.
-
- Within Emacs Lisp, a charset is treated as a symbol which has a
- property `charset'. The property value is a vector containing
- various information about the charset. For readability of C code,
- we use the following convention for C variable names:
- charset_symbol: Emacs Lisp symbol of a charset
- charset_id: Emacs Lisp integer of an identification number of a charset
- charset: C integer of an identification number of a charset
-
- Each charset (except for ascii) is assigned a base leading-code
- (range 0x80..0x9E). In addition, a charset of greater than 0xA0
- (whose base leading-code is 0x9A..0x9D) is assigned an extended
- leading-code (range 0xA0..0xFE). In this case, each base
- leading-code specifies the allowable range of extended leading-code
- as shown in the table below. A leading-code is used to represent a
- character in Emacs' buffer and string.
-
- We call a charset which has extended leading-code a "private
- charset" because those are mainly for a charset which is not yet
- registered by ISO. On the contrary, we call a charset which does
- not have extended leading-code an "official charset".
-
- ---------------------------------------------------------------------------
- charset dimension base leading-code extended leading-code
- ---------------------------------------------------------------------------
- 0x00 official dim1 -- none -- -- none --
- (ASCII)
- 0x01..0x7F --never used--
- 0x80 official dim1 -- none -- -- none --
- (eight-bit-graphic)
- 0x81..0x8F official dim1 same as charset -- none --
- 0x90..0x99 official dim2 same as charset -- none --
- 0x9A..0x9D --never used--
- 0x9E official dim1 same as charset -- none --
- (eight-bit-control)
- 0x9F --never used--
- 0xA0..0xDF private dim1 0x9A same as charset
- of 1-column width
- 0xE0..0xEF private dim1 0x9B same as charset
- of 2-column width
- 0xF0..0xF4 private dim2 0x9C same as charset
- of 1-column width
- 0xF5..0xFE private dim2 0x9D same as charset
- of 2-column width
- 0xFF --never used--
- ---------------------------------------------------------------------------
-
-*/
-
-/* Definition of special leading-codes. */
-/* Leading-code followed by extended leading-code. */
-#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
-#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
-#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
-
-#define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */
-
-/* Extended leading-code. */
-/* Start of each extended leading-codes. */
-#define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
-#define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
-#define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
-#define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
-/* Maximum value of extended leading-codes. */
-#define LEADING_CODE_EXT_MAX 0xFE
-
-/* Definition of minimum/maximum charset of each DIMENSION. */
-#define MIN_CHARSET_OFFICIAL_DIMENSION1 0x80
-#define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
-#define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
-#define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
-#define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
-#define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
-
-/* Maximum value of overall charset identification number. */
-#define MAX_CHARSET 0xFE
-
-/* Definition of special charsets. */
-#define CHARSET_ASCII 0 /* 0x00..0x7F */
-#define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */
-#define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */
-
-extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
-extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
-extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
-extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
-extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
-extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
-extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-extern int charset_mule_unicode_0100_24ff;
-extern int charset_mule_unicode_2500_33ff;
-extern int charset_mule_unicode_e000_ffff;
-
-/* Check if CH is an ASCII character or a base leading-code.
- Nowadays, any byte can be the first byte of a character in a
- multibyte buffer/string. So this macro name is not appropriate. */
-#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
-
-/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
-
- Firstly, the term "character" or "char" is used for a multilingual
- character (of course, including ASCII characters), not for a byte in
- computer memory. We use the term "code" or "byte" for the latter
- case.
-
- A character is identified by charset and one or two POSITION-CODEs.
- POSITION-CODE is the position of the character in the charset. A
- character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
- A character of DIMENSION2 charset has two POSITION-CODE:
- POSITION-CODE-1 and POSITION-CODE-2. The code range of
- POSITION-CODE is 0x20..0x7F.
-
- Emacs has two kinds of representation of a character: multi-byte
- form (for buffers and strings) and single-word form (for character
- objects in Emacs Lisp). The latter is called "character code"
- hereafter. Both representations encode the information of charset
- and POSITION-CODE but in a different way (for instance, the MSB of
- POSITION-CODE is set in multi-byte form).
-
- For details of the multi-byte form, see the section "2. Emacs
- internal format handlers" of `coding.c'.
-
- Emacs uses 19 bits for a character code. The bits are divided into
- 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
-
- A character code of DIMENSION1 character uses FIELD2 to hold charset
- and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
- character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
- POSITION-CODE-1 and POSITION-CODE-2 respectively.
-
- More precisely...
-
- FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control,
- and eight-bit-graphic) is "charset - 0x70". This is to make all
- character codes except for ASCII and 8-bit codes greater than 256.
- So, the range of FIELD2 of DIMENSION1 character is 0, 1, or
- 0x11..0x7F.
-
- FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
- charset and "charset - 0xE0" for private charset. So, the range of
- FIELD1 of DIMENSION2 character is 0x01..0x1E.
-
- -----------------------------------------------------------------------------
- charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
- -----------------------------------------------------------------------------
- ascii 0 0 0x00..0x7F
- eight-bit-control 0 1 0x00..0x1F
- eight-bit-graphic 0 1 0x20..0x7F
- DIMENSION1 0 charset - 0x70 POSITION-CODE-1
- DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
- DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
- -----------------------------------------------------------------------------
- "(o)": official, "(p)": private
- -----------------------------------------------------------------------------
-*/
-
-/* Masks of each field of character code. */
-#define CHAR_FIELD1_MASK (0x1F << 14)
-#define CHAR_FIELD2_MASK (0x7F << 7)
-#define CHAR_FIELD3_MASK 0x7F
-
-/* Macros to access each field of character C. */
-#define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
-#define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
-#define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
-
-/* Minimum character code of character of each DIMENSION. */
-#define MIN_CHAR_OFFICIAL_DIMENSION1 \
- ((0x81 - 0x70) << 7)
-#define MIN_CHAR_PRIVATE_DIMENSION1 \
- ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
-#define MIN_CHAR_OFFICIAL_DIMENSION2 \
- ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
-#define MIN_CHAR_PRIVATE_DIMENSION2 \
- ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
-/* Maximum character code currently used plus 1. */
-#define MAX_CHAR (0x1F << 14)
-
-/* 1 if C is a single byte character, else 0. */
-#define SINGLE_BYTE_CHAR_P(c) (((unsigned)(c) & 0xFF) == (c))
-
-/* 1 if BYTE is an ASCII character in itself, in multibyte mode. */
-#define ASCII_BYTE_P(byte) ((byte) < 0x80)
-
-/* A char-table containing information on each character set.
-
- Unlike ordinary char-tables, this doesn't contain any nested tables.
- Only the top level elements are used. Each element is a vector of
- the following information:
- CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
- LEADING-CODE-BASE, LEADING-CODE-EXT,
- ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
- REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
- PLIST.
-
- CHARSET-ID (integer) is the identification number of the charset.
-
- BYTES (integer) is the length of the multi-byte form of a character
- in the charset: one of 1, 2, 3, and 4.
-
- DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
-
- CHARS (integer) is the number of characters in a dimension: 94 or 96.
-
- WIDTH (integer) is the number of columns a character in the charset
- occupies on the screen: one of 0, 1, and 2..
-
- DIRECTION (integer) is the rendering direction of characters in the
- charset when rendering. If 0, render from left to right, else
- render from right to left.
-
- LEADING-CODE-BASE (integer) is the base leading-code for the
- charset.
-
- LEADING-CODE-EXT (integer) is the extended leading-code for the
- charset. All charsets of less than 0xA0 have the value 0.
-
- ISO-FINAL-CHAR (character) is the final character of the
- corresponding ISO 2022 charset. It is -1 for such a character
- that is used only internally (e.g. `eight-bit-control').
-
- ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
- while encoding to variants of ISO 2022 coding system, one of the
- following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It
- is -1 for such a character that is used only internally
- (e.g. `eight-bit-control').
-
- REVERSE-CHARSET (integer) is the charset which differs only in
- LEFT-TO-RIGHT value from the charset. If there's no such a
- charset, the value is -1.
-
- SHORT-NAME (string) is the short name to refer to the charset.
-
- LONG-NAME (string) is the long name to refer to the charset.
-
- DESCRIPTION (string) is the description string of the charset.
-
- PLIST (property list) may contain any type of information a user
- wants to put and get by functions `put-charset-property' and
- `get-charset-property' respectively. */
-extern Lisp_Object Vcharset_table;
-
-/* Macros to access various information of CHARSET in Vcharset_table.
- We provide these macros for efficiency. No range check of CHARSET. */
-
-/* Return entry of CHARSET (C integer) in Vcharset_table. */
-#define CHARSET_TABLE_ENTRY(charset) \
- XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
- ? 0 : (charset) + 128)]
-
-/* Return information INFO-IDX of CHARSET. */
-#define CHARSET_TABLE_INFO(charset, info_idx) \
- XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
-
-#define CHARSET_ID_IDX (0)
-#define CHARSET_BYTES_IDX (1)
-#define CHARSET_DIMENSION_IDX (2)
-#define CHARSET_CHARS_IDX (3)
-#define CHARSET_WIDTH_IDX (4)
-#define CHARSET_DIRECTION_IDX (5)
-#define CHARSET_LEADING_CODE_BASE_IDX (6)
-#define CHARSET_LEADING_CODE_EXT_IDX (7)
-#define CHARSET_ISO_FINAL_CHAR_IDX (8)
-#define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
-#define CHARSET_REVERSE_CHARSET_IDX (10)
-#define CHARSET_SHORT_NAME_IDX (11)
-#define CHARSET_LONG_NAME_IDX (12)
-#define CHARSET_DESCRIPTION_IDX (13)
-#define CHARSET_PLIST_IDX (14)
-/* Size of a vector of each entry of Vcharset_table. */
-#define CHARSET_MAX_IDX (15)
-
-/* And several more macros to be used frequently. */
-#define CHARSET_BYTES(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
-#define CHARSET_DIMENSION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
-#define CHARSET_CHARS(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
-#define CHARSET_WIDTH(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
-#define CHARSET_DIRECTION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
-#define CHARSET_LEADING_CODE_BASE(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
-#define CHARSET_LEADING_CODE_EXT(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
-#define CHARSET_ISO_FINAL_CHAR(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
-#define CHARSET_ISO_GRAPHIC_PLANE(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
-#define CHARSET_REVERSE_CHARSET(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
-
-/* Macros to specify direction of a charset. */
-#define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
-#define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
-
-/* A vector of charset symbol indexed by charset-id. This is used
- only for returning charset symbol from C functions. */
-extern Lisp_Object Vcharset_symbol_table;
-
-/* Return symbol of CHARSET. */
-#define CHARSET_SYMBOL(charset) \
- XVECTOR (Vcharset_symbol_table)->contents[charset]
-
-/* 1 if CHARSET is in valid value range, else 0. */
-#define CHARSET_VALID_P(charset) \
- ((charset) == 0 \
- || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
- || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \
- && (charset) <= MAX_CHARSET) \
- || ((charset) == CHARSET_8_BIT_CONTROL) \
- || ((charset) == CHARSET_8_BIT_GRAPHIC))
-
-/* 1 if CHARSET is already defined, else 0. */
-#define CHARSET_DEFINED_P(charset) \
- (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
- && !NILP (CHARSET_TABLE_ENTRY (charset)))
-
-/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
- Vcharset_table can be retrieved only by the first byte of
- multi-byte form (an ASCII code or a base leading-code), we provide
- here tables to be used by macros BYTES_BY_CHAR_HEAD and
- WIDTH_BY_CHAR_HEAD for faster information retrieval. */
-extern int bytes_by_char_head[256];
-extern int width_by_char_head[256];
-
-#define BYTES_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head])
-#define WIDTH_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head])
-
-/* Charset of the character C. */
-#define CHAR_CHARSET(c) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? (ASCII_BYTE_P (c) \
- ? CHARSET_ASCII \
- : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \
- : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? CHAR_FIELD2 (c) + 0x70 \
- : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
- ? CHAR_FIELD1 (c) + 0x8F \
- : CHAR_FIELD1 (c) + 0xE0)))
-
-/* Check if two characters C1 and C2 belong to the same charset. */
-#define SAME_CHARSET_P(c1, c2) \
- (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \
- : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))
-
-/* Return a character of which charset is CHARSET and position-codes
- are C1 and C2. DIMENSION1 character ignores C2. */
-#define MAKE_CHAR(charset, c1, c2) \
- ((charset) == CHARSET_ASCII \
- ? (c1) & 0x7F \
- : (((charset) == CHARSET_8_BIT_CONTROL \
- || (charset) == CHARSET_8_BIT_GRAPHIC) \
- ? ((c1) & 0x7F) | 0x80 \
- : ((CHARSET_DEFINED_P (charset) \
- ? CHARSET_DIMENSION (charset) == 1 \
- : (charset) < MIN_CHARSET_PRIVATE_DIMENSION2) \
- ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : ((c1) & 0x7F)) \
- : ((((charset) \
- - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
- << 14) \
- | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \
- | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7))))))
-
-
-/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
- generic character. If GENERICP is zero, return nonzero iff C is a
- valid normal character. */
-#define CHAR_VALID_P(c, genericp) \
- ((c) >= 0 \
- && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
-
-/* This default value is used when nonascii-translation-table or
- nonascii-insert-offset fail to convert unibyte character to a valid
- multibyte character. This makes a Latin-1 character. */
-
-#define DEFAULT_NONASCII_INSERT_OFFSET 0x800
-
-/* Parse multibyte string STR of length LENGTH and set BYTES to the
- byte length of a character at STR. */
-
-#ifdef BYTE_COMBINING_DEBUG
-
-#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
- do { \
- int i = 1; \
- while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
- (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
- if ((bytes) > i) \
- abort (); \
- } while (0)
+/* Index to arguments of Fdefine_charset_internal. */
+
+enum define_charset_arg_index
+ {
+ charset_arg_name,
+ charset_arg_dimension,
+ charset_arg_code_space,
+ charset_arg_min_code,
+ charset_arg_max_code,
+ charset_arg_iso_final,
+ charset_arg_iso_revision,
+ charset_arg_emacs_mule_id,
+ charset_arg_ascii_compatible_p,
+ charset_arg_supplementary_p,
+ charset_arg_invalid_code,
+ charset_arg_code_offset,
+ charset_arg_map,
+ charset_arg_subset,
+ charset_arg_superset,
+ charset_arg_unify_map,
+ charset_arg_plist,
+ charset_arg_max
+ };
+
+
+/* Indices to charset attributes vector. */
+
+enum charset_attr_index
+ {
+ /* ID number of the charset. */
+ charset_id,
+
+ /* Name of the charset (symbol). */
+ charset_name,
+
+ /* Property list of the charset. */
+ charset_plist,
+
+ /* If the method of the charset is `MAP_DEFERRED', the value is a
+ mapping vector or a file name that contains mapping vector.
+ Otherwise, nil. */
+ charset_map,
+
+ /* If the method of the charset is `MAP', the value is a vector
+ that maps code points of the charset to characters. The vector
+ is indexed by a character index. A character index is
+ calculated from a code point and the code-space table of the
+ charset. */
+ charset_decoder,
+
+ /* If the method of the charset is `MAP', the value is a
+ char-table that maps characters of the charset to code
+ points. */
+ charset_encoder,
+
+ /* If the method of the charset is `SUBSET', the value is a vector
+ that has this form:
+
+ [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]
+
+ CHARSET-ID is an ID number of a parent charset. MIN-CODE and
+ MAX-CODE specify the range of characters inherited from the
+ parent. OFFSET is an integer value to add to a code point of
+ the parent charset to get the corresponding code point of this
+ charset. */
+ charset_subset,
+
+ /* If the method of the charset is `SUPERSET', the value is a list
+ whose elements have this form:
+
+ (CHARSET-ID . OFFSET)
+
+ CHARSET-IDs are ID numbers of parent charsets. OFFSET is an
+ integer value to add to a code point of the parent charset to
+ get the corresponding code point of this charset. */
+ charset_superset,
+
+ /* The value is a mapping vector or a file name that contains the
+ mapping. This defines how characters in the charset should be
+ unified with Unicode. The value of the member
+ `charset_deunifier' is created from this information. */
+ charset_unify_map,
+
+ /* If characters in the charset must be unified Unicode, the value
+ is a char table that maps a unified Unicode character code to
+ the non-unified character code in the charset. */
+ charset_deunifier,
+
+ /* The length of the charset attribute vector. */
+ charset_attr_max
+ };
+
+/* Methods for converting code points and characters of charsets. */
+
+enum charset_method
+ {
+ /* For a charset of this method, a character code is calculated
+ from a character index (which is calculated from a code point)
+ simply by adding an offset value. */
+ CHARSET_METHOD_OFFSET,
+
+ /* For a charset of this method, a decoder vector and an encoder
+ char-table is used for code point <-> character code
+ conversion. */
+ CHARSET_METHOD_MAP,
+
+ /* Same as above but decoder and encoder are loaded from a file on
+ demand. Once loaded, the method is changed to
+ CHARSET_METHOD_MAP. */
+ CHARSET_METHOD_MAP_DEFERRED,
+
+ /* A charset of this method is a subset of another charset. */
+ CHARSET_METHOD_SUBSET,
+
+ /* A charset of this method is a superset of other charsets. */
+ CHARSET_METHOD_SUPERSET
+ };
+
+struct charset
+{
+ /* Index to charset_table. */
+ int id;
-#else /* not BYTE_COMBINING_DEBUG */
-
-#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
- ((void)(length), (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]))
-
-#endif /* not BYTE_COMBINING_DEBUG */
-
-#define VALID_LEADING_CODE_P(code) \
- (! NILP (CHARSET_TABLE_ENTRY (code)))
-
-/* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes)
- is valid as a multibyte form. If valid, by a side effect, BYTES is
- set to the byte length of the multibyte form. */
-
-#define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \
- (((str)[0] < 0x80 || (str)[0] >= 0xA0) \
- ? ((bytes) = 1) \
- : (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])), \
- ((bytes) <= (length) \
- && !CHAR_HEAD_P ((str)[1]) \
- && ((bytes) == 2 \
- ? (str)[0] != LEADING_CODE_8_BIT_CONTROL \
- : (!CHAR_HEAD_P ((str)[2]) \
- && ((bytes) == 3 \
- ? (((str)[0] != LEADING_CODE_PRIVATE_11 \
- && (str)[0] != LEADING_CODE_PRIVATE_12) \
- || VALID_LEADING_CODE_P (str[1])) \
- : (!CHAR_HEAD_P ((str)[3]) \
- && VALID_LEADING_CODE_P (str[1]))))))))
-
-
-/* Return 1 iff the byte sequence at multibyte string STR is valid as
- a unibyte form. By a side effect, BYTES is set to the byte length
- of one character at STR. */
-
-#define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \
- ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \
- (str)[0] != LEADING_CODE_8_BIT_CONTROL)
-
-/* The charset of character C is stored in CHARSET, and the
- position-codes of C are stored in C1 and C2.
- We store -1 in C2 if the dimension of the charset is 1. */
-
-#define SPLIT_CHAR(c, charset, c1, c2) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((charset \
- = (ASCII_BYTE_P (c) \
- ? CHARSET_ASCII \
- : ((c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC))), \
- c1 = (c), c2 = -1) \
- : ((c) & CHAR_FIELD1_MASK \
- ? (charset = (CHAR_FIELD1 (c) \
- + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \
- c1 = CHAR_FIELD2 (c), \
- c2 = CHAR_FIELD3 (c)) \
- : (charset = CHAR_FIELD2 (c) + 0x70, \
- c1 = CHAR_FIELD3 (c), \
- c2 = -1)))
-
-/* Return 1 iff character C has valid printable glyph. */
-#define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c))
-
-/* The charset of the character at STR is stored in CHARSET, and the
- position-codes are stored in C1 and C2.
- We store -1 in C2 if the character is just 2 bytes. */
-
-#define SPLIT_STRING(str, len, charset, c1, c2) \
- ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
- || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
- || split_string (str, len, &charset, &c1, &c2) < 0) \
- ? c1 = *(str), charset = CHARSET_ASCII \
- : charset)
+ /* Index to Vcharset_hash_table. */
+ int hash_index;
+
+ /* Dimension of the charset: 1, 2, 3, or 4. */
+ int dimension;
+
+ /* Byte code range of each dimension. <code_space>[4N] is a mininum
+ byte code of the (N+1)th dimension, <code_space>[4N+1] is a
+ maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
+ (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
+ is a number of characters containd in the first to (N+1)th
+ dismesions. We get `char-index' of a `code-point' from this
+ information. */
+ int code_space[16];
-/* Mapping table from ISO2022's charset (specified by DIMENSION,
- CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
- macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
-extern int iso_charset_table[2][2][128];
-
-#define ISO_CHARSET_TABLE(dimension, chars, final_char) \
- iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)]
-
-#define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
-
-/* Return how many bytes C will occupy in a multibyte buffer. */
-#define CHAR_BYTES(c) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \
- : char_bytes (c))
-
-/* The following two macros CHAR_STRING and STRING_CHAR are the main
- entry points to convert between Emacs's two types of character
- representations: multi-byte form and single-word form (character
- code). */
-
-/* Store multi-byte form of the character C in STR. The caller should
- allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
- advance. Returns the length of the multi-byte form. If C is an
- invalid character code, signal an error. */
-
-#define CHAR_STRING(c, str) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
- ? (*(str) = (unsigned char)(c), 1) \
- : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
- : char_to_string (c, (unsigned char *) str))
-
-/* Like CHAR_STRING but don't signal an error if C is invalid.
- Value is -1 in this case. */
-
-#define CHAR_STRING_NO_SIGNAL(c, str) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
- ? (*(str) = (unsigned char)(c), 1) \
- : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
- : char_to_string_1 (c, (unsigned char *) str))
-
-/* Return a character code of the character of which multi-byte form
- is at STR and the length is LEN. If STR doesn't contain valid
- multi-byte form, only the first byte in STR is returned. */
-
-#define STRING_CHAR(str, len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? (unsigned char) *(str) \
- : string_to_char (str, len, 0))
-
-/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
- length of the multi-byte form. Just to know the length, use
- MULTIBYTE_FORM_LENGTH. */
-
-#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? ((actual_len) = 1), (unsigned char) *(str) \
- : string_to_char (str, len, &(actual_len)))
-
-/* Fetch the "next" character from Lisp string STRING at byte position
- BYTEIDX, character position CHARIDX. Store it into OUTPUT.
-
- All the args must be side-effect-free.
- BYTEIDX and CHARIDX must be lvalues;
- we increment them past the character fetched. */
-
-#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- CHARIDX++; \
- if (STRING_MULTIBYTE (STRING)) \
- { \
- const unsigned char *ptr = SDATA (STRING) + BYTEIDX; \
- int space_left = SBYTES (STRING) - BYTEIDX; \
- int actual_len; \
- \
- OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
- BYTEIDX += actual_len; \
- } \
- else \
- OUTPUT = SREF (STRING, BYTEIDX++); \
- } \
-else
-
-/* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */
-
-#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- const unsigned char *fetch_string_char_ptr = SDATA (STRING) + BYTEIDX; \
- int fetch_string_char_space_left = SBYTES (STRING) - BYTEIDX; \
- int actual_len; \
- \
- OUTPUT \
- = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
- fetch_string_char_space_left, actual_len); \
- \
- BYTEIDX += actual_len; \
- CHARIDX++; \
- } \
-else
-
-/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
- buffer. */
-
-#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
-if (1) \
- { \
- CHARIDX++; \
- if (!NILP (current_buffer->enable_multibyte_characters)) \
- { \
- unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
- int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \
- int actual_len; \
- \
- OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
- BYTEIDX += actual_len; \
- } \
- else \
- { \
- OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
- BYTEIDX++; \
- } \
- } \
-else
-
-/* Return the length of the multi-byte form at string STR of length LEN. */
-
-#define MULTIBYTE_FORM_LENGTH(str, len) \
- (BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \
- ? 1 \
- : multibyte_form_length (str, len))
-
-/* If P is before LIMIT, advance P to the next character boundary. It
- assumes that P is already at a character boundary of the sane
- mulitbyte form whose end address is LIMIT. */
-
-#define NEXT_CHAR_BOUNDARY(p, limit) \
- do { \
- if ((p) < (limit)) \
- (p) += BYTES_BY_CHAR_HEAD (*(p)); \
- } while (0)
+ /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
+ of code_space_mask[B] is set. This array is used to quickly
+ check if a code-point is in a valid range. */
+ unsigned char *code_space_mask;
+ /* 1 if there's no gap in code-points. */
+ int code_linear_p;
-/* If P is after LIMIT, advance P to the previous character boundary.
- It assumes that P is already at a character boundary of the sane
- mulitbyte form whose beginning address is LIMIT. */
+ /* If the charset is treated as 94-chars in ISO-2022, the value is 0.
+ If the charset is treated as 96-chars in ISO-2022, the value is 1. */
+ int iso_chars_96;
-#define PREV_CHAR_BOUNDARY(p, limit) \
- do { \
- if ((p) > (limit)) \
- { \
- const unsigned char *p0 = (p); \
- do { \
- p0--; \
- } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \
- (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \
- } \
- } while (0)
+ /* ISO final byte of the charset: 48..127. It may be -1 if the
+ charset doesn't conform to ISO-2022. */
+ int iso_final;
+ /* ISO revision number of the charset. */
+ int iso_revision;
-#ifdef emacs
+ /* If the charset is identical to what supported by Emacs 21 and the
+ priors, the identification number of the charset used in those
+ version. Otherwise, -1. */
+ int emacs_mule_id;
+
+ /* Nonzero iff the charset is compatible with ASCII. */
+ int ascii_compatible_p;
-/* Increase the buffer byte position POS_BYTE of the current buffer to
- the next character boundary. This macro relies on the fact that
- *GPT_ADDR and *Z_ADDR are always accessible and the values are
- '\0'. No range checking of POS. */
+ /* Nonzero iff the charset is supplementary. */
+ int supplementary_p;
+
+ /* Nonzero iff all the code points are representable by Lisp_Int. */
+ int compact_codes_p;
+
+ /* The method for encoding/decoding characters of the charset. */
+ enum charset_method method;
+
+ /* Mininum and Maximum code points of the charset. */
+ unsigned min_code, max_code;
+
+ /* Offset value used by macros CODE_POINT_TO_INDEX and
+ INDEX_TO_CODE_POINT. . */
+ unsigned char_index_offset;
+
+ /* Mininum and Maximum character codes of the charset. If the
+ charset is compatible with ASCII, min_char is a minimum non-ASCII
+ character of the charset. If the method of charset is
+ CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
+ and max_char doesn't change. */
+ int min_char, max_char;
+
+ /* The code returned by ENCODE_CHAR if a character is not encodable
+ by the charset. */
+ unsigned invalid_code;
+
+ /* If the method of the charset is CHARSET_METHOD_MAP, this is a
+ table of bits used to quickly and roughly guess if a character
+ belongs to the charset.
+
+ The first 64 elements are 512 bits for characters less than
+ 0x10000. Each bit corresponds to 128-character block. The last
+ 126 elements are 1008 bits for the greater characters
+ (0x10000..0x3FFFFF). Each bit corresponds to 4096-character
+ block.
+
+ If a bit is 1, at least one character in the corresponding block is
+ in this charset. */
+ unsigned char fast_map[190];
+
+ /* Offset value to calculate a character code from code-point, and
+ visa versa. */
+ int code_offset;
+
+ int unified_p;
+};
+
+/* Hash table of charset symbols vs. the correponding attribute
+ vectors. */
+extern Lisp_Object Vcharset_hash_table;
+
+/* Table of struct charset. */
+extern struct charset *charset_table;
+
+#define CHARSET_FROM_ID(id) (charset_table + (id))
+
+extern Lisp_Object Vcharset_ordered_list;
+
+/* Incremented everytime we change the priority of charsets. */
+extern unsigned short charset_ordered_list_tick;
+
+extern Lisp_Object Vcharset_list;
+extern Lisp_Object Viso_2022_charset_list;
+extern Lisp_Object Vemacs_mule_charset_list;
+
+extern struct charset *emacs_mule_charset[256];
+
+
+/* Macros to access information about charset. */
+
+/* Return the attribute vector of charset whose symbol is SYMBOL. */
+#define CHARSET_SYMBOL_ATTRIBUTES(symbol) \
+ Fgethash ((symbol), Vcharset_hash_table, Qnil)
+
+#define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id)
+#define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name)
+#define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist)
+#define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map)
+#define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder)
+#define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder)
+#define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset)
+#define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset)
+#define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map)
+#define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier)
+
+#define CHARSET_SYMBOL_ID(symbol) \
+ CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))
+
+/* Return an index to Vcharset_hash_table of the charset whose symbol
+ is SYMBOL. */
+#define CHARSET_SYMBOL_HASH_INDEX(symbol) \
+ hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)
+
+/* Return the attribute vector of CHARSET. */
+#define CHARSET_ATTRIBUTES(charset) \
+ (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))
+
+#define CHARSET_ID(charset) ((charset)->id)
+#define CHARSET_HASH_INDEX(charset) ((charset)->hash_index)
+#define CHARSET_DIMENSION(charset) ((charset)->dimension)
+#define CHARSET_CODE_SPACE(charset) ((charset)->code_space)
+#define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p)
+#define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96)
+#define CHARSET_ISO_FINAL(charset) ((charset)->iso_final)
+#define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane)
+#define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision)
+#define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id)
+#define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
+#define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
+#define CHARSET_METHOD(charset) ((charset)->method)
+#define CHARSET_MIN_CODE(charset) ((charset)->min_code)
+#define CHARSET_MAX_CODE(charset) ((charset)->max_code)
+#define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code)
+#define CHARSET_MIN_CHAR(charset) ((charset)->min_char)
+#define CHARSET_MAX_CHAR(charset) ((charset)->max_char)
+#define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset)
+#define CHARSET_UNIFIED_P(charset) ((charset)->unified_p)
+
+#define CHARSET_NAME(charset) \
+ (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_MAP(charset) \
+ (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_DECODER(charset) \
+ (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_ENCODER(charset) \
+ (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_SUBSET(charset) \
+ (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_SUPERSET(charset) \
+ (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_UNIFY_MAP(charset) \
+ (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
+#define CHARSET_DEUNIFIER(charset) \
+ (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))
+
+
+/* Nonzero iff OBJ is a valid charset symbol. */
+#define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)
+
+/* Check if X is a valid charset symbol. If not, signal an error. */
+#define CHECK_CHARSET(x) \
+ do { \
+ if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
+ } while (0)
-#ifdef BYTE_COMBINING_DEBUG
-#define INC_POS(pos_byte) \
- do { \
- unsigned char *p = BYTE_POS_ADDR (pos_byte); \
- if (BASE_LEADING_CODE_P (*p)) \
- { \
- int len, bytes; \
- len = Z_BYTE - pos_byte; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- pos_byte += bytes; \
- } \
- else \
- pos_byte++; \
+/* Check if X is a valid charset symbol. If valid, set ID to the id
+ number of the charset. Otherwise, signal an error. */
+#define CHECK_CHARSET_GET_ID(x, id) \
+ do { \
+ int idx; \
+ \
+ if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
+ id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
+ charset_id)); \
} while (0)
-#else /* not BYTE_COMBINING_DEBUG */
-#define INC_POS(pos_byte) \
- do { \
- unsigned char *p = BYTE_POS_ADDR (pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*p); \
+/* Check if X is a valid charset symbol. If valid, set ATTR to the
+ attr vector of the charset. Otherwise, signal an error. */
+#define CHECK_CHARSET_GET_ATTR(x, attr) \
+ do { \
+ if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \
+ x = wrong_type_argument (Qcharsetp, (x)); \
} while (0)
-#endif /* not BYTE_COMBINING_DEBUG */
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
- the previous character boundary. No range checking of POS. */
-#define DEC_POS(pos_byte) \
- do { \
- unsigned char *p, *p_min; \
- \
- pos_byte--; \
- if (pos_byte < GPT_BYTE) \
- p = BEG_ADDR + pos_byte - BEG_BYTE, p_min = BEG_ADDR; \
- else \
- p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE, p_min = GAP_END_ADDR;\
- if (p > p_min && !CHAR_HEAD_P (*p)) \
- { \
- unsigned char *pend = p--; \
- int len, bytes; \
- if (p_min < p - MAX_MULTIBYTE_LENGTH) \
- p_min = p - MAX_MULTIBYTE_LENGTH; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
- len = pend + 1 - p; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- if (bytes == len) \
- pos_byte -= len - 1; \
- } \
+#define CHECK_CHARSET_GET_CHARSET(x, charset) \
+ do { \
+ int id; \
+ CHECK_CHARSET_GET_ID (x, id); \
+ charset = CHARSET_FROM_ID (id); \
} while (0)
-/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
-#define INC_BOTH(charpos, bytepos) \
-do \
- { \
- (charpos)++; \
- if (NILP (current_buffer->enable_multibyte_characters)) \
- (bytepos)++; \
- else \
- INC_POS ((bytepos)); \
- } \
-while (0)
-
-/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
-
-#define DEC_BOTH(charpos, bytepos) \
-do \
- { \
- (charpos)--; \
- if (NILP (current_buffer->enable_multibyte_characters)) \
- (bytepos)--; \
- else \
- DEC_POS ((bytepos)); \
- } \
-while (0)
+/* Lookup Vcharset_order_list and return the first charset that
+ contains the character C. */
+#define CHAR_CHARSET(c) \
+ ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii) \
+ : char_charset ((c), Qnil, NULL))
+
+#if 0
+/* Char-table of charset-sets. Each element is a bool vector indexed
+ by a charset ID. */
+extern Lisp_Object Vchar_charset_set;
+
+/* Charset-bag of character C. */
+#define CHAR_CHARSET_SET(c) \
+ CHAR_TABLE_REF (Vchar_charset_set, c)
-/* Increase the buffer byte position POS_BYTE of the current buffer to
- the next character boundary. This macro relies on the fact that
- *GPT_ADDR and *Z_ADDR are always accessible and the values are
- '\0'. No range checking of POS_BYTE. */
+/* Check if two characters C1 and C2 belong to the same charset. */
+#define SAME_CHARSET_P(c1, c2) \
+ intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))
+
+#endif
+
+
+/* Return a character correponding to the code-point CODE of CHARSET.
+ Try some optimization before calling decode_char. */
+
+#define DECODE_CHAR(charset, code) \
+ ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \
+ ? (code) \
+ : ((code) < (charset)->min_code || (code) > (charset)->max_code) \
+ ? -1 \
+ : (charset)->unified_p \
+ ? decode_char ((charset), (code)) \
+ : (charset)->method == CHARSET_METHOD_OFFSET \
+ ? ((charset)->code_linear_p \
+ ? (code) - (charset)->min_code + (charset)->code_offset \
+ : decode_char ((charset), (code))) \
+ : (charset)->method == CHARSET_METHOD_MAP \
+ ? ((charset)->code_linear_p \
+ ? XINT (AREF (CHARSET_DECODER (charset), \
+ (code) - (charset)->min_code)) \
+ : decode_char ((charset), (code))) \
+ : decode_char ((charset), (code)))
+
+
+/* If CHARSET is a simple offset base charset, return it's offset,
+ otherwise return -1. */
+#define CHARSET_OFFSET(charset) \
+ (((charset)->method == CHARSET_METHOD_OFFSET \
+ && (charset)->code_linear_p \
+ && ! (charset)->unified_p) \
+ ? (charset)->code_offset - (charset)->min_code \
+ : -1)
+
+extern Lisp_Object charset_work;
+
+/* Return a code point of CHAR in CHARSET.
+ Try some optimization before calling encode_char. */
+
+#define ENCODE_CHAR(charset, c) \
+ ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
+ ? (c) \
+ : ((charset)->unified_p \
+ || (charset)->method == CHARSET_METHOD_SUBSET \
+ || (charset)->method == CHARSET_METHOD_SUPERSET) \
+ ? encode_char ((charset), (c)) \
+ : ((c) < (charset)->min_char || (c) > (charset)->max_char) \
+ ? (charset)->invalid_code \
+ : (charset)->method == CHARSET_METHOD_OFFSET \
+ ? ((charset)->code_linear_p \
+ ? (c) - (charset)->code_offset + (charset)->min_code \
+ : encode_char ((charset), (c))) \
+ : (charset)->method == CHARSET_METHOD_MAP \
+ ? ((charset)->compact_codes_p \
+ ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \
+ (NILP (charset_work) \
+ ? (charset)->invalid_code \
+ : XFASTINT (charset_work))) \
+ : encode_char ((charset), (c))) \
+ : encode_char ((charset), (c)))
+
+
+/* Set to 1 when a charset map is loaded to warn that a buffer text
+ and a string data may be relocated. */
+extern int charset_map_loaded;
+
+
+/* Set CHARSET to the charset highest priority of C, CODE to the
+ code-point of C in CHARSET. */
+#define SPLIT_CHAR(c, charset, code) \
+ ((charset) = char_charset ((c), Qnil, &(code)))
+
+
+#define ISO_MAX_DIMENSION 3
+#define ISO_MAX_CHARS 2
+#define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */
+
+/* Mapping table from ISO2022's charset (specified by DIMENSION,
+ CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by
+ macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
+extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
+
+/* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL
+ (final character). */
+#define ISO_CHARSET_TABLE(dimension, chars_96, final) \
+ iso_charset_table[(dimension) - 1][(chars_96)][(final)]
-#ifdef BYTE_COMBINING_DEBUG
+/* Nonzero iff the charset who has FAST_MAP may contain C. */
+#define CHARSET_FAST_MAP_REF(c, fast_map) \
+ ((c) < 0x10000 \
+ ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \
+ : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
-#define BUF_INC_POS(buf, pos_byte) \
+#define CHARSET_FAST_MAP_SET(c, fast_map) \
do { \
- unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
- if (BASE_LEADING_CODE_P (*p)) \
- { \
- int len, bytes; \
- len = BUF_Z_BYTE (buf) - pos_byte; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- pos_byte += bytes; \
- } \
+ if ((c) < 0x10000) \
+ (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \
else \
- pos_byte++; \
+ (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \
} while (0)
-#else /* not BYTE_COMBINING_DEBUG */
-#define BUF_INC_POS(buf, pos_byte) \
- do { \
- unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
- pos_byte += BYTES_BY_CHAR_HEAD (*p); \
- } while (0)
-#endif /* not BYTE_COMBINING_DEBUG */
+/* 1 iff CHARSET may contain the character C. */
+#define CHAR_CHARSET_P(c, charset) \
+ ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
+ || ((CHARSET_UNIFIED_P (charset) \
+ || (charset)->method == CHARSET_METHOD_SUBSET \
+ || (charset)->method == CHARSET_METHOD_SUPERSET) \
+ ? encode_char ((charset), (c)) != (charset)->invalid_code \
+ : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \
+ && ((charset)->method == CHARSET_METHOD_OFFSET \
+ ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
+ : ((charset)->method == CHARSET_METHOD_MAP \
+ && (charset)->compact_codes_p) \
+ ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
+ : encode_char ((charset), (c)) != (charset)->invalid_code))))
-/* Decrease the buffer byte position POS_BYTE of the current buffer to
- the previous character boundary. No range checking of POS_BYTE. */
-#define BUF_DEC_POS(buf, pos_byte) \
- do { \
- unsigned char *p, *p_min; \
- pos_byte--; \
- if (pos_byte < BUF_GPT_BYTE (buf)) \
- { \
- p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \
- p_min = BUF_BEG_ADDR (buf); \
- } \
- else \
- { \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
- p_min = BUF_GAP_END_ADDR (buf); \
- } \
- if (p > p_min && !CHAR_HEAD_P (*p)) \
- { \
- unsigned char *pend = p--; \
- int len, bytes; \
- if (p_min < p - MAX_MULTIBYTE_LENGTH) \
- p_min = p - MAX_MULTIBYTE_LENGTH; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
- len = pend + 1 - p; \
- PARSE_MULTIBYTE_SEQ (p, len, bytes); \
- if (bytes == len) \
- pos_byte -= len - 1; \
- } \
- } while (0)
+
+/* Special macros for emacs-mule encoding. */
-#endif /* emacs */
-
-/* This is the maximum byte length of multi-byte sequence. */
-#define MAX_MULTIBYTE_LENGTH 4
-
-extern void invalid_character P_ ((int)) NO_RETURN;
-
-extern int translate_char P_ ((Lisp_Object, int, int, int, int));
-extern int split_string P_ ((const unsigned char *, int, int *,
- unsigned char *, unsigned char *));
-extern int char_to_string P_ ((int, unsigned char *));
-extern int char_to_string_1 P_ ((int, unsigned char *));
-extern int string_to_char P_ ((const unsigned char *, int, int *));
-extern int char_printable_p P_ ((int c));
-extern int multibyte_form_length P_ ((const unsigned char *, int));
-extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
- int *));
-extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
-extern int parse_str_to_multibyte P_ ((unsigned char *, int));
-extern int str_to_multibyte P_ ((unsigned char *, int, int));
-extern int str_as_unibyte P_ ((unsigned char *, int));
-extern int get_charset_id P_ ((Lisp_Object));
-extern int find_charset_in_text P_ ((const unsigned char *, int, int, int *,
- Lisp_Object));
-extern int strwidth P_ ((unsigned char *, int));
-extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
-extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
-extern int char_bytes P_ ((int));
-extern int char_valid_p P_ ((int, int));
-
-EXFUN (Funibyte_char_to_multibyte, 1);
-
-extern Lisp_Object Vtranslation_table_vector;
-
-/* Return a translation table of id number ID. */
-#define GET_TRANSLATION_TABLE(id) \
- (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
-
-/* A char-table for characters which may invoke auto-filling. */
-extern Lisp_Object Vauto_fill_chars;
-
-/* Copy LEN bytes from FROM to TO. This macro should be used only
- when a caller knows that LEN is short and the obvious copy loop is
- faster than calling bcopy which has some overhead. Copying a
- multibyte sequence of a multibyte character is the typical case. */
-
-#define BCOPY_SHORT(from, to, len) \
- do { \
- int i = len; \
- const unsigned char *from_p = from; \
- unsigned char *to_p = to; \
- while (i--) *to_p++ = *from_p++; \
- } while (0)
+/* Leading-code followed by extended leading-code. DIMENSION/COLUMN */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */
+#define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */
+
+extern struct charset *emacs_mule_charset[256];
+
+
+
+extern Lisp_Object Qcharsetp;
+
+extern Lisp_Object Qascii, Qunicode;
+extern int charset_ascii, charset_eight_bit;
+extern int charset_iso_8859_1;
+extern int charset_unicode;
+extern int charset_jisx0201_roman;
+extern int charset_jisx0208_1978;
+extern int charset_jisx0208;
+
+extern int charset_unibyte;
+
+extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *));
+extern Lisp_Object charset_attributes P_ ((int));
+
+extern int decode_char P_ ((struct charset *, unsigned));
+extern unsigned encode_char P_ ((struct charset *, int));
+extern int string_xstring_p P_ ((Lisp_Object));
+
+extern void map_charset_chars P_ ((void (*) (Lisp_Object, Lisp_Object),
+ Lisp_Object, Lisp_Object,
+ struct charset *, unsigned, unsigned));
+
+EXFUN (Funify_charset, 3);
#endif /* EMACS_CHARSET_H */
diff --git a/src/chartab.c b/src/chartab.c
new file mode 100644
index 00000000000..a75ed1e7cda
--- /dev/null
+++ b/src/chartab.c
@@ -0,0 +1,975 @@
+/* chartab.c -- char-table support
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <config.h>
+#include "lisp.h"
+#include "character.h"
+#include "charset.h"
+#include "ccl.h"
+
+/* 64/16/32/128 */
+
+/* Number of elements in Nth level char-table. */
+const int chartab_size[4] =
+ { (1 << CHARTAB_SIZE_BITS_0),
+ (1 << CHARTAB_SIZE_BITS_1),
+ (1 << CHARTAB_SIZE_BITS_2),
+ (1 << CHARTAB_SIZE_BITS_3) };
+
+/* Number of characters each element of Nth level char-table
+ covers. */
+const int chartab_chars[4] =
+ { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
+ (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)),
+ (1 << CHARTAB_SIZE_BITS_3),
+ 1 };
+
+/* Number of characters (in bits) each element of Nth level char-table
+ covers. */
+const int chartab_bits[4] =
+ { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
+ (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3),
+ CHARTAB_SIZE_BITS_3,
+ 0 };
+
+#define CHARTAB_IDX(c, depth, min_char) \
+ (((c) - (min_char)) >> chartab_bits[(depth)])
+
+
+DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
+ doc: /* Return a newly created char-table, with purpose PURPOSE.
+Each element is initialized to INIT, which defaults to nil.
+
+PURPOSE should be a symbol. If it has a `char-table-extra-slots'
+property, the property's value should be an integer between 0 and 10
+that specifies how many extra slots the char-table has. Otherwise,
+the char-table has no extra slot. */)
+ (purpose, init)
+ register Lisp_Object purpose, init;
+{
+ Lisp_Object vector;
+ Lisp_Object n;
+ int n_extras;
+ int size;
+
+ CHECK_SYMBOL (purpose);
+ n = Fget (purpose, Qchar_table_extra_slots);
+ if (NILP (n))
+ n_extras = 0;
+ else
+ {
+ CHECK_NATNUM (n);
+ n_extras = XINT (n);
+ if (n_extras > 10)
+ args_out_of_range (n, Qnil);
+ }
+
+ size = VECSIZE (struct Lisp_Char_Table) - 1 + n_extras;
+ vector = Fmake_vector (make_number (size), init);
+ XCHAR_TABLE (vector)->parent = Qnil;
+ XCHAR_TABLE (vector)->purpose = purpose;
+ XSETCHAR_TABLE (vector, XCHAR_TABLE (vector));
+ return vector;
+}
+
+static Lisp_Object
+make_sub_char_table (depth, min_char, defalt)
+ int depth, min_char;
+ Lisp_Object defalt;
+{
+ Lisp_Object table;
+ int size = VECSIZE (struct Lisp_Sub_Char_Table) - 1 + chartab_size[depth];
+
+ table = Fmake_vector (make_number (size), defalt);
+ XSUB_CHAR_TABLE (table)->depth = make_number (depth);
+ XSUB_CHAR_TABLE (table)->min_char = make_number (min_char);
+ XSETSUB_CHAR_TABLE (table, XSUB_CHAR_TABLE (table));
+
+ return table;
+}
+
+static Lisp_Object
+char_table_ascii (table)
+ Lisp_Object table;
+{
+ Lisp_Object sub;
+
+ sub = XCHAR_TABLE (table)->contents[0];
+ if (! SUB_CHAR_TABLE_P (sub))
+ return sub;
+ sub = XSUB_CHAR_TABLE (sub)->contents[0];
+ if (! SUB_CHAR_TABLE_P (sub))
+ return sub;
+ return XSUB_CHAR_TABLE (sub)->contents[0];
+}
+
+Lisp_Object
+copy_sub_char_table (table)
+ Lisp_Object table;
+{
+ Lisp_Object copy;
+ int depth = XINT (XSUB_CHAR_TABLE (table)->depth);
+ int min_char = XINT (XSUB_CHAR_TABLE (table)->min_char);
+ Lisp_Object val;
+ int i;
+
+ copy = make_sub_char_table (depth, min_char, Qnil);
+ /* Recursively copy any sub char-tables. */
+ for (i = 0; i < chartab_size[depth]; i++)
+ {
+ val = XSUB_CHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (val))
+ XSUB_CHAR_TABLE (copy)->contents[i] = copy_sub_char_table (val);
+ else
+ XSUB_CHAR_TABLE (copy)->contents[i] = val;
+ }
+
+ return copy;
+}
+
+
+Lisp_Object
+copy_char_table (table)
+ Lisp_Object table;
+{
+ Lisp_Object copy;
+ int size = XCHAR_TABLE (table)->size & PSEUDOVECTOR_SIZE_MASK;
+ int i;
+
+ copy = Fmake_vector (make_number (size), Qnil);
+ XCHAR_TABLE (copy)->defalt = XCHAR_TABLE (table)->defalt;
+ XCHAR_TABLE (copy)->parent = XCHAR_TABLE (table)->parent;
+ XCHAR_TABLE (copy)->purpose = XCHAR_TABLE (table)->purpose;
+ XCHAR_TABLE (copy)->ascii = XCHAR_TABLE (table)->ascii;
+ for (i = 0; i < chartab_size[0]; i++)
+ XCHAR_TABLE (copy)->contents[i]
+ = (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i])
+ ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i])
+ : XCHAR_TABLE (table)->contents[i]);
+ if (SUB_CHAR_TABLE_P (XCHAR_TABLE (copy)->ascii))
+ XCHAR_TABLE (copy)->ascii = char_table_ascii (copy);
+ size -= VECSIZE (struct Lisp_Char_Table) - 1;
+ for (i = 0; i < size; i++)
+ XCHAR_TABLE (copy)->extras[i] = XCHAR_TABLE (table)->extras[i];
+
+ XSETCHAR_TABLE (copy, XCHAR_TABLE (copy));
+ return copy;
+}
+
+Lisp_Object
+sub_char_table_ref (table, c)
+ Lisp_Object table;
+ int c;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int min_char = XINT (tbl->min_char);
+ Lisp_Object val;
+
+ val = tbl->contents[CHARTAB_IDX (c, depth, min_char)];
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref (val, c);
+ return val;
+}
+
+Lisp_Object
+char_table_ref (table, c)
+ Lisp_Object table;
+ int c;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ Lisp_Object val;
+
+ if (ASCII_CHAR_P (c))
+ {
+ val = tbl->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[c];
+ }
+ else
+ {
+ val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref (val, c);
+ }
+ if (NILP (val))
+ {
+ val = tbl->defalt;
+ if (NILP (val) && CHAR_TABLE_P (tbl->parent))
+ val = char_table_ref (tbl->parent, c);
+ }
+ return val;
+}
+
+static Lisp_Object
+sub_char_table_ref_and_range (table, c, from, to, defalt)
+ Lisp_Object table;
+ int c;
+ int *from, *to;
+ Lisp_Object defalt;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int min_char = XINT (tbl->min_char);
+ int max_char = min_char + chartab_chars[depth - 1] - 1;
+ int index = CHARTAB_IDX (c, depth, min_char);
+ Lisp_Object val;
+
+ val = tbl->contents[index];
+ *from = min_char + index * chartab_chars[depth];
+ *to = *from + chartab_chars[depth] - 1;
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref_and_range (val, c, from, to, defalt);
+ else if (NILP (val))
+ val = defalt;
+
+ while (*from > min_char
+ && *from == min_char + index * chartab_chars[depth])
+ {
+ Lisp_Object this_val;
+ int this_from = *from - chartab_chars[depth];
+ int this_to = *from - 1;
+
+ index--;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_to,
+ &this_from, &this_to,
+ defalt);
+ else if (NILP (this_val))
+ this_val = defalt;
+
+ if (! EQ (this_val, val))
+ break;
+ *from = this_from;
+ }
+ index = CHARTAB_IDX (c, depth, min_char);
+ while (*to < max_char
+ && *to == min_char + (index + 1) * chartab_chars[depth] - 1)
+ {
+ Lisp_Object this_val;
+ int this_from = *to + 1;
+ int this_to = this_from + chartab_chars[depth] - 1;
+
+ index++;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_from,
+ &this_from, &this_to,
+ defalt);
+ else if (NILP (this_val))
+ this_val = defalt;
+ if (! EQ (this_val, val))
+ break;
+ *to = this_to;
+ }
+
+ return val;
+}
+
+
+/* Return the value for C in char-table TABLE. Set *FROM and *TO to
+ the range of characters (containing C) that have the same value as
+ C. It is not assured that the value of (*FROM - 1) and (*TO + 1)
+ is different from that of C. */
+
+Lisp_Object
+char_table_ref_and_range (table, c, from, to)
+ Lisp_Object table;
+ int c;
+ int *from, *to;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ int index = CHARTAB_IDX (c, 0, 0);
+ Lisp_Object val;
+
+ val = tbl->contents[index];
+ *from = index * chartab_chars[0];
+ *to = *from + chartab_chars[0] - 1;
+ if (SUB_CHAR_TABLE_P (val))
+ val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt);
+ else if (NILP (val))
+ val = tbl->defalt;
+
+ while (*from > 0 && *from == index * chartab_chars[0])
+ {
+ Lisp_Object this_val;
+ int this_from = *from - chartab_chars[0];
+ int this_to = *from - 1;
+
+ index--;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_to,
+ &this_from, &this_to,
+ tbl->defalt);
+ else if (NILP (this_val))
+ this_val = tbl->defalt;
+
+ if (! EQ (this_val, val))
+ break;
+ *from = this_from;
+ }
+ while (*to < MAX_CHAR && *to == (index + 1) * chartab_chars[0] - 1)
+ {
+ Lisp_Object this_val;
+ int this_from = *to + 1;
+ int this_to = this_from + chartab_chars[0] - 1;
+
+ index++;
+ this_val = tbl->contents[index];
+ if (SUB_CHAR_TABLE_P (this_val))
+ this_val = sub_char_table_ref_and_range (this_val, this_from,
+ &this_from, &this_to,
+ tbl->defalt);
+ else if (NILP (this_val))
+ this_val = tbl->defalt;
+ if (! EQ (this_val, val))
+ break;
+ *to = this_to;
+ }
+
+ return val;
+}
+
+
+#define ASET_RANGE(ARRAY, FROM, TO, LIMIT, VAL) \
+ do { \
+ int limit = (TO) < (LIMIT) ? (TO) : (LIMIT); \
+ for (; (FROM) < limit; (FROM)++) (ARRAY)->contents[(FROM)] = (VAL); \
+ } while (0)
+
+#define GET_SUB_CHAR_TABLE(TABLE, SUBTABLE, IDX, DEPTH, MIN_CHAR) \
+ do { \
+ (SUBTABLE) = (TABLE)->contents[(IDX)]; \
+ if (!SUB_CHAR_TABLE_P (SUBTABLE)) \
+ (SUBTABLE) = make_sub_char_table ((DEPTH), (MIN_CHAR), (SUBTABLE)); \
+ } while (0)
+
+
+static void
+sub_char_table_set (table, c, val)
+ Lisp_Object table;
+ int c;
+ Lisp_Object val;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT ((tbl)->depth);
+ int min_char = XINT ((tbl)->min_char);
+ int i = CHARTAB_IDX (c, depth, min_char);
+ Lisp_Object sub;
+
+ if (depth == 3)
+ tbl->contents[i] = val;
+ else
+ {
+ sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (depth + 1,
+ min_char + i * chartab_chars[depth], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set (sub, c, val);
+ }
+}
+
+Lisp_Object
+char_table_set (table, c, val)
+ Lisp_Object table;
+ int c;
+ Lisp_Object val;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+
+ if (ASCII_CHAR_P (c)
+ && SUB_CHAR_TABLE_P (tbl->ascii))
+ {
+ XSUB_CHAR_TABLE (tbl->ascii)->contents[c] = val;
+ }
+ else
+ {
+ int i = CHARTAB_IDX (c, 0, 0);
+ Lisp_Object sub;
+
+ sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (1, i * chartab_chars[0], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set (sub, c, val);
+ if (ASCII_CHAR_P (c))
+ tbl->ascii = char_table_ascii (table);
+ }
+ return val;
+}
+
+static void
+sub_char_table_set_range (table, depth, min_char, from, to, val)
+ Lisp_Object *table;
+ int depth;
+ int min_char;
+ int from, to;
+ Lisp_Object val;
+{
+ int max_char = min_char + chartab_chars[depth] - 1;
+
+ if (depth == 3 || (from <= min_char && to >= max_char))
+ *table = val;
+ else
+ {
+ int i, j;
+
+ depth++;
+ if (! SUB_CHAR_TABLE_P (*table))
+ *table = make_sub_char_table (depth, min_char, *table);
+ if (from < min_char)
+ from = min_char;
+ if (to > max_char)
+ to = max_char;
+ i = CHARTAB_IDX (from, depth, min_char);
+ j = CHARTAB_IDX (to, depth, min_char);
+ min_char += chartab_chars[depth] * i;
+ for (; i <= j; i++, min_char += chartab_chars[depth])
+ sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i,
+ depth, min_char, from, to, val);
+ }
+}
+
+
+Lisp_Object
+char_table_set_range (table, from, to, val)
+ Lisp_Object table;
+ int from, to;
+ Lisp_Object val;
+{
+ struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
+ Lisp_Object *contents = tbl->contents;
+ int i, min_char;
+
+ if (from == to)
+ char_table_set (table, from, val);
+ else
+ {
+ for (i = CHARTAB_IDX (from, 0, 0), min_char = i * chartab_chars[0];
+ min_char <= to;
+ i++, min_char += chartab_chars[0])
+ sub_char_table_set_range (contents + i, 0, min_char, from, to, val);
+ if (ASCII_CHAR_P (from))
+ tbl->ascii = char_table_ascii (table);
+ }
+ return val;
+}
+
+
+DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
+ 1, 1, 0,
+ doc: /*
+Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ return XCHAR_TABLE (char_table)->purpose;
+}
+
+DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
+ 1, 1, 0,
+ doc: /* Return the parent char-table of CHAR-TABLE.
+The value is either nil or another char-table.
+If CHAR-TABLE holds nil for a given character,
+then the actual applicable value is inherited from the parent char-table
+\(or from its parents, if necessary). */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ return XCHAR_TABLE (char_table)->parent;
+}
+
+DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
+ 2, 2, 0,
+ doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
+Return PARENT. PARENT must be either nil or another char-table. */)
+ (char_table, parent)
+ Lisp_Object char_table, parent;
+{
+ Lisp_Object temp;
+
+ CHECK_CHAR_TABLE (char_table);
+
+ if (!NILP (parent))
+ {
+ CHECK_CHAR_TABLE (parent);
+
+ for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
+ if (EQ (temp, char_table))
+ error ("Attempt to make a chartable be its own parent");
+ }
+
+ XCHAR_TABLE (char_table)->parent = parent;
+
+ return parent;
+}
+
+DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
+ 2, 2, 0,
+ doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
+ (char_table, n)
+ Lisp_Object char_table, n;
+{
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_NUMBER (n);
+ if (XINT (n) < 0
+ || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
+ args_out_of_range (char_table, n);
+
+ return XCHAR_TABLE (char_table)->extras[XINT (n)];
+}
+
+DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
+ Sset_char_table_extra_slot,
+ 3, 3, 0,
+ doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
+ (char_table, n, value)
+ Lisp_Object char_table, n, value;
+{
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_NUMBER (n);
+ if (XINT (n) < 0
+ || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
+ args_out_of_range (char_table, n);
+
+ return XCHAR_TABLE (char_table)->extras[XINT (n)] = value;
+}
+
+DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
+ 2, 2, 0,
+ doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
+RANGE should be nil (for the default value),
+a cons of character codes (for characters in the range), or a character code. */)
+ (char_table, range)
+ Lisp_Object char_table, range;
+{
+ Lisp_Object val;
+ CHECK_CHAR_TABLE (char_table);
+
+ if (EQ (range, Qnil))
+ val = XCHAR_TABLE (char_table)->defalt;
+ else if (INTEGERP (range))
+ val = CHAR_TABLE_REF (char_table, XINT (range));
+ else if (CONSP (range))
+ {
+ int from, to;
+
+ CHECK_CHARACTER_CAR (range);
+ CHECK_CHARACTER_CDR (range);
+ val = char_table_ref_and_range (char_table, XINT (XCAR (range)),
+ &from, &to);
+ /* Not yet implemented. */
+ }
+ else
+ error ("Invalid RANGE argument to `char-table-range'");
+ return val;
+}
+
+DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
+ 3, 3, 0,
+ doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
+RANGE should be t (for all characters), nil (for the default value),
+a cons of character codes (for characters in the range),
+or a character code. Return VALUE. */)
+ (char_table, range, value)
+ Lisp_Object char_table, range, value;
+{
+ CHECK_CHAR_TABLE (char_table);
+ if (EQ (range, Qt))
+ {
+ int i;
+
+ XCHAR_TABLE (char_table)->ascii = Qnil;
+ for (i = 0; i < chartab_size[0]; i++)
+ XCHAR_TABLE (char_table)->contents[i] = Qnil;
+ XCHAR_TABLE (char_table)->defalt = value;
+ }
+ else if (EQ (range, Qnil))
+ XCHAR_TABLE (char_table)->defalt = value;
+ else if (INTEGERP (range))
+ char_table_set (char_table, XINT (range), value);
+ else if (CONSP (range))
+ {
+ CHECK_CHARACTER_CAR (range);
+ CHECK_CHARACTER_CDR (range);
+ char_table_set_range (char_table,
+ XINT (XCAR (range)), XINT (XCDR (range)), value);
+ }
+ else
+ error ("Invalid RANGE argument to `set-char-table-range'");
+
+ return value;
+}
+
+DEFUN ("set-char-table-default", Fset_char_table_default,
+ Sset_char_table_default, 3, 3, 0,
+ doc: /*
+This function is obsolete and has no effect. */)
+ (char_table, ch, value)
+ Lisp_Object char_table, ch, value;
+{
+ return Qnil;
+}
+
+/* Look up the element in TABLE at index CH, and return it as an
+ integer. If the element is not a character, return CH itself. */
+
+int
+char_table_translate (table, ch)
+ Lisp_Object table;
+ int ch;
+{
+ Lisp_Object value;
+ value = Faref (table, make_number (ch));
+ if (! CHARACTERP (value))
+ return ch;
+ return XINT (value);
+}
+
+static Lisp_Object
+optimize_sub_char_table (table)
+ Lisp_Object table;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ Lisp_Object elt, this;
+ int i;
+
+ elt = XSUB_CHAR_TABLE (table)->contents[0];
+ if (SUB_CHAR_TABLE_P (elt))
+ elt = XSUB_CHAR_TABLE (table)->contents[0] = optimize_sub_char_table (elt);
+ if (SUB_CHAR_TABLE_P (elt))
+ return table;
+ for (i = 1; i < chartab_size[depth]; i++)
+ {
+ this = XSUB_CHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ this = XSUB_CHAR_TABLE (table)->contents[i]
+ = optimize_sub_char_table (this);
+ if (SUB_CHAR_TABLE_P (this)
+ || NILP (Fequal (this, elt)))
+ break;
+ }
+
+ return (i < chartab_size[depth] ? table : elt);
+}
+
+DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
+ 1, 1, 0,
+ doc: /* Optimize CHAR-TABLE. */)
+ (char_table)
+ Lisp_Object char_table;
+{
+ Lisp_Object elt;
+ int i;
+
+ CHECK_CHAR_TABLE (char_table);
+
+ for (i = 0; i < chartab_size[0]; i++)
+ {
+ elt = XCHAR_TABLE (char_table)->contents[i];
+ if (SUB_CHAR_TABLE_P (elt))
+ XCHAR_TABLE (char_table)->contents[i] = optimize_sub_char_table (elt);
+ }
+ return Qnil;
+}
+
+
+static Lisp_Object
+map_sub_char_table (c_function, function, table, arg, val, range,
+ default_val, parent)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg, val, range, default_val, parent;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int i, c;
+
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth];
+ i++, c += chartab_chars[depth])
+ {
+ Lisp_Object this;
+
+ this = tbl->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ val = map_sub_char_table (c_function, function, this, arg, val, range,
+ default_val, parent);
+ else
+ {
+ if (NILP (this))
+ this = default_val;
+ if (NILP (this) && ! NILP (parent))
+ this = CHAR_TABLE_REF (parent, c);
+ if (NILP (Fequal (val, this)))
+ {
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (depth == 3
+ && EQ (XCAR (range), XCDR (range)))
+ {
+ if (c_function)
+ (*c_function) (arg, XCAR (range), val);
+ else
+ call2 (function, XCAR (range), val);
+ }
+ else
+ {
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+ }
+ val = this;
+ XSETCAR (range, make_number (c));
+ }
+ }
+ }
+ return val;
+}
+
+
+/* Map C_FUNCTION or FUNCTION over TABLE, calling it for each
+ character or group of characters that share a value.
+
+ ARG is passed to C_FUNCTION when that is called. */
+
+void
+map_char_table (c_function, function, table, arg)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg;
+{
+ Lisp_Object range, val;
+ int c, i;
+ struct gcpro gcpro1, gcpro2, gcpro3;
+
+ range = Fcons (make_number (0), Qnil);
+ GCPRO3 (table, arg, range);
+ val = XCHAR_TABLE (table)->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[0];
+
+ for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
+ {
+ Lisp_Object this;
+
+ this = XCHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ val = map_sub_char_table (c_function, function, this, arg, val, range,
+ XCHAR_TABLE (table)->defalt,
+ XCHAR_TABLE (table)->parent);
+ else
+ {
+ if (NILP (this))
+ this = XCHAR_TABLE (table)->defalt;
+ if (NILP (this) && ! NILP (XCHAR_TABLE (table)->parent))
+ this = CHAR_TABLE_REF (XCHAR_TABLE (table)->parent, c);
+ if (NILP (Fequal (val, this)))
+ {
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+ val = this;
+ XSETCAR (range, make_number (c));
+ }
+ }
+ }
+
+ if (! NILP (val))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range, val);
+ else
+ call2 (function, range, val);
+ }
+
+ UNGCPRO;
+}
+
+DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
+ 2, 2, 0,
+ doc: /*
+Call FUNCTION for each character in CHAR-TABLE that has non-nil value.
+FUNCTION is called with two arguments--a key and a value.
+The key is a character code or a cons of character codes specifying a
+range of characters that have the same value. */)
+ (function, char_table)
+ Lisp_Object function, char_table;
+{
+ CHECK_CHAR_TABLE (char_table);
+
+ map_char_table (NULL, function, char_table, char_table);
+ return Qnil;
+}
+
+
+static void
+map_sub_char_table_for_charset (c_function, function, table, arg, range,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg, range;
+ struct charset *charset;
+ unsigned from, to;
+{
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT (tbl->depth);
+ int c, i;
+
+ if (depth < 3)
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth];
+ i++, c += chartab_chars[depth])
+ {
+ Lisp_Object this;
+
+ this = tbl->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ map_sub_char_table_for_charset (c_function, function, this, arg,
+ range, charset, from, to);
+ else
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ XSETCAR (range, Qnil);
+ }
+ }
+ else
+ for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth]; i++, c ++)
+ {
+ Lisp_Object this;
+ unsigned code;
+
+ this = tbl->contents[i];
+ if (NILP (this)
+ || (charset
+ && (code = ENCODE_CHAR (charset, c),
+ (code < from || code > to))))
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ XSETCAR (range, Qnil);
+ }
+ }
+ else
+ {
+ if (NILP (XCAR (range)))
+ XSETCAR (range, make_number (c));
+ }
+ }
+}
+
+
+void
+map_char_table_for_charset (c_function, function, table, arg,
+ charset, from, to)
+ void (*c_function) P_ ((Lisp_Object, Lisp_Object));
+ Lisp_Object function, table, arg;
+ struct charset *charset;
+ unsigned from, to;
+{
+ Lisp_Object range;
+ int c, i;
+ struct gcpro gcpro1;
+
+ range = Fcons (Qnil, Qnil);
+ GCPRO1 (range);
+
+ for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0])
+ {
+ Lisp_Object this;
+
+ this = XCHAR_TABLE (table)->contents[i];
+ if (SUB_CHAR_TABLE_P (this))
+ map_sub_char_table_for_charset (c_function, function, this, arg,
+ range, charset, from, to);
+ else
+ {
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+ XSETCAR (range, Qnil);
+ }
+ }
+ if (! NILP (XCAR (range)))
+ {
+ XSETCDR (range, make_number (c - 1));
+ if (c_function)
+ (*c_function) (arg, range);
+ else
+ call2 (function, range, arg);
+ }
+
+ UNGCPRO;
+}
+
+
+void
+syms_of_chartab ()
+{
+ defsubr (&Smake_char_table);
+ defsubr (&Schar_table_parent);
+ defsubr (&Schar_table_subtype);
+ defsubr (&Sset_char_table_parent);
+ defsubr (&Schar_table_extra_slot);
+ defsubr (&Sset_char_table_extra_slot);
+ defsubr (&Schar_table_range);
+ defsubr (&Sset_char_table_range);
+ defsubr (&Sset_char_table_default);
+ defsubr (&Soptimize_char_table);
+ defsubr (&Smap_char_table);
+}
+
+/* arch-tag: 18b5b560-7ab5-4108-b09e-d5dd65dc6fda
+ (do not change this comment) */
diff --git a/src/cmds.c b/src/cmds.c
index b84b9d1d85e..096b63dd453 100644
--- a/src/cmds.c
+++ b/src/cmds.c
@@ -24,7 +24,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "syntax.h"
#include "window.h"
#include "keyboard.h"
@@ -327,11 +327,11 @@ Whichever character you type to run this command is inserted. */)
CHECK_NUMBER (n);
/* Barf if the key that invoked this was not a character. */
- if (!INTEGERP (last_command_char))
+ if (!CHARACTERP (last_command_char))
bitch_at_user ();
{
int character = translate_char (Vtranslation_table_for_input,
- XINT (last_command_char), 0, 0, 0);
+ XINT (last_command_char));
if (XINT (n) >= 2 && NILP (current_buffer->overwrite_mode))
{
int modified_char = character;
@@ -395,7 +395,6 @@ internal_self_insert (c, noautofill)
/* At first, get multi-byte form of C in STR. */
if (!NILP (current_buffer->enable_multibyte_characters))
{
- c = unibyte_char_to_multibyte (c);
len = CHAR_STRING (c, str);
if (len == 1)
/* If C has modifier bits, this makes C an appropriate
@@ -472,10 +471,19 @@ internal_self_insert (c, noautofill)
}
hairy = 2;
}
+
+ if (NILP (current_buffer->enable_multibyte_characters))
+ MAKE_CHAR_MULTIBYTE (c);
+ synt = SYNTAX (c);
+
if (!NILP (current_buffer->abbrev_mode)
- && SYNTAX (c) != Sword
+ && synt != Sword
&& NILP (current_buffer->read_only)
- && PT > BEGV && SYNTAX (XFASTINT (Fprevious_char ())) == Sword)
+ && PT > BEGV
+ && (!NILP (current_buffer->enable_multibyte_characters)
+ ? SYNTAX (XFASTINT (Fprevious_char ())) == Sword
+ : (SYNTAX (unibyte_char_to_multibyte (XFASTINT (Fprevious_char ())))
+ == Sword)))
{
int modiff = MODIFF;
Lisp_Object sym;
@@ -544,7 +552,6 @@ internal_self_insert (c, noautofill)
Vself_insert_face = Qnil;
}
- synt = SYNTAX (c);
if ((synt == Sclose || synt == Smath)
&& !NILP (Vblink_paren_function) && INTERACTIVE
&& !noautofill)
diff --git a/src/coding.c b/src/coding.c
index a3fd8f91284..cb6df79fca0 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,9 +1,12 @@
-/* Coding system handler (conversion, detection, and etc).
+/* Coding system handler (conversion, detection, etc).
Copyright (C) 2001, 2002, 2003, 2004, 2005,
2006 Free Software Foundation, Inc.
Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -26,382 +29,325 @@ Boston, MA 02110-1301, USA. */
0. General comments
1. Preamble
- 2. Emacs' internal format (emacs-mule) handlers
- 3. ISO2022 handlers
- 4. Shift-JIS and BIG5 handlers
- 5. CCL handlers
- 6. End-of-line handlers
- 7. C library functions
- 8. Emacs Lisp library functions
- 9. Post-amble
+ 2. Emacs' internal format (emacs-utf-8) handlers
+ 3. UTF-8 handlers
+ 4. UTF-16 handlers
+ 5. Charset-base coding systems handlers
+ 6. emacs-mule (old Emacs' internal format) handlers
+ 7. ISO2022 handlers
+ 8. Shift-JIS and BIG5 handlers
+ 9. CCL handlers
+ 10. C library functions
+ 11. Emacs Lisp library functions
+ 12. Postamble
*/
-/*** 0. General comments ***/
+/*** 0. General comments ***
-/*** GENERAL NOTE on CODING SYSTEMS ***
+CODING SYSTEM
- A coding system is an encoding mechanism for one or more character
- sets. Here's a list of coding systems which Emacs can handle. When
- we say "decode", it means converting some other coding system to
- Emacs' internal format (emacs-mule), and when we say "encode",
- it means converting the coding system emacs-mule to some other
+ A coding system is an object for an encoding mechanism that contains
+ information about how to convert byte sequences to character
+ sequences and vice versa. When we say "decode", it means converting
+ a byte sequence of a specific coding system into a character
+ sequence that is represented by Emacs' internal coding system
+ `emacs-utf-8', and when we say "encode", it means converting a
+ character sequence of emacs-utf-8 to a byte sequence of a specific
coding system.
- 0. Emacs' internal format (emacs-mule)
+ In Emacs Lisp, a coding system is represented by a Lisp symbol. In
+ C level, a coding system is represented by a vector of attributes
+ stored in the hash table Vcharset_hash_table. The conversion from
+ coding system symbol to attributes vector is done by looking up
+ Vcharset_hash_table by the symbol.
+
+ Coding systems are classified into the following types depending on
+ the encoding mechanism. Here's a brief description of the types.
+
+ o UTF-8
+
+ o UTF-16
+
+ o Charset-base coding system
+
+ A coding system defined by one or more (coded) character sets.
+ Decoding and encoding are done by a code converter defined for each
+ character set.
+
+ o Old Emacs internal format (emacs-mule)
- Emacs itself holds a multi-lingual character in buffers and strings
- in a special format. Details are described in section 2.
+ The coding system adopted by old versions of Emacs (20 and 21).
- 1. ISO2022
+ o ISO2022-base coding system
The most famous coding system for multiple character sets. X's
- Compound Text, various EUCs (Extended Unix Code), and coding
- systems used in Internet communication such as ISO-2022-JP are
- all variants of ISO2022. Details are described in section 3.
+ Compound Text, various EUCs (Extended Unix Code), and coding systems
+ used in the Internet communication such as ISO-2022-JP are all
+ variants of ISO2022.
- 2. SJIS (or Shift-JIS or MS-Kanji-Code)
+ o SJIS (or Shift-JIS or MS-Kanji-Code)
A coding system to encode character sets: ASCII, JISX0201, and
JISX0208. Widely used for PC's in Japan. Details are described in
- section 4.
+ section 8.
- 3. BIG5
+ o BIG5
- A coding system to encode the character sets ASCII and Big5. Widely
+ A coding system to encode character sets: ASCII and Big5. Widely
used for Chinese (mainly in Taiwan and Hong Kong). Details are
- described in section 4. In this file, when we write "BIG5"
- (all uppercase), we mean the coding system, and when we write
- "Big5" (capitalized), we mean the character set.
+ described in section 8. In this file, when we write "big5" (all
+ lowercase), we mean the coding system, and when we write "Big5"
+ (capitalized), we mean the character set.
- 4. Raw text
+ o CCL
- A coding system for text containing random 8-bit code. Emacs does
- no code conversion on such text except for end-of-line format.
+ If a user wants to decode/encode text encoded in a coding system
+ not listed above, he can supply a decoder and an encoder for it in
+ CCL (Code Conversion Language) programs. Emacs executes the CCL
+ program while decoding/encoding.
- 5. Other
+ o Raw-text
- If a user wants to read/write text encoded in a coding system not
- listed above, he can supply a decoder and an encoder for it as CCL
- (Code Conversion Language) programs. Emacs executes the CCL program
- while reading/writing.
+ A coding system for text containing raw eight-bit data. Emacs
+ treats each byte of source text as a character (except for
+ end-of-line conversion).
- Emacs represents a coding system by a Lisp symbol that has a property
- `coding-system'. But, before actually using the coding system, the
- information about it is set in a structure of type `struct
- coding_system' for rapid processing. See section 6 for more details.
+ o No-conversion
+
+ Like raw text, but don't do end-of-line conversion.
-*/
-/*** GENERAL NOTES on END-OF-LINE FORMAT ***
+END-OF-LINE FORMAT
- How end-of-line of text is encoded depends on the operating system.
- For instance, Unix's format is just one byte of `line-feed' code,
+ How text end-of-line is encoded depends on operating system. For
+ instance, Unix's format is just one byte of LF (line-feed) code,
whereas DOS's format is two-byte sequence of `carriage-return' and
`line-feed' codes. MacOS's format is usually one byte of
`carriage-return'.
Since text character encoding and end-of-line encoding are
- independent, any coding system described above can have any
- end-of-line format. So Emacs has information about end-of-line
- format in each coding-system. See section 6 for more details.
+ independent, any coding system described above can take any format
+ of end-of-line (except for no-conversion).
+
+STRUCT CODING_SYSTEM
+
+ Before using a coding system for code conversion (i.e. decoding and
+ encoding), we setup a structure of type `struct coding_system'.
+ This structure keeps various information about a specific code
+ conversion (e.g. the location of source and destination data).
*/
+/* COMMON MACROS */
+
+
/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
- These functions check if a text between SRC and SRC_END is encoded
- in the coding system category XXX. Each returns an integer value in
- which appropriate flag bits for the category XXX are set. The flag
- bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
- template for these functions. If MULTIBYTEP is nonzero, 8-bit codes
- of the range 0x80..0x9F are in multibyte form. */
+ These functions check if a byte sequence specified as a source in
+ CODING conforms to the format of XXX, and update the members of
+ DETECT_INFO.
+
+ Return 1 if the byte sequence conforms to XXX, otherwise return 0.
+
+ Below is the template of these functions. */
+
#if 0
-int
-detect_coding_emacs_mule (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+static int
+detect_coding_XXX (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- ...
+ const unsigned char *src = coding->source;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+ ...;
+
+ while (1)
+ {
+ /* Get one byte from the source. If the souce is exausted, jump
+ to no_more_source:. */
+ ONE_MORE_BYTE (c);
+
+ if (! __C_conforms_to_XXX___ (c))
+ break;
+ if (! __C_strongly_suggests_XXX__ (c))
+ found = CATEGORY_MASK_XXX;
+ }
+ /* The byte sequence is invalid for XXX. */
+ detect_info->rejected |= CATEGORY_MASK_XXX;
+ return 0;
+
+ no_more_source:
+ /* The source exausted successfully. */
+ detect_info->found |= found;
+ return 1;
}
#endif
/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
- These functions decode SRC_BYTES length of unibyte text at SOURCE
- encoded in CODING to Emacs' internal format. The resulting
- multibyte text goes to a place pointed to by DESTINATION, the length
- of which should not exceed DST_BYTES.
+ These functions decode a byte sequence specified as a source by
+ CODING. The resulting multibyte text goes to a place pointed to by
+ CODING->charbuf, the length of which should not exceed
+ CODING->charbuf_size;
- These functions set the information about original and decoded texts
- in the members `produced', `produced_char', `consumed', and
- `consumed_char' of the structure *CODING. They also set the member
- `result' to one of CODING_FINISH_XXX indicating how the decoding
- finished.
+ These functions set the information of original and decoded texts in
+ CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
+ They also set CODING->result to one of CODING_RESULT_XXX indicating
+ how the decoding is finished.
- DST_BYTES zero means that the source area and destination area are
- overlapped, which means that we can produce a decoded text until it
- reaches the head of the not-yet-decoded source text.
+ Below is the template of these functions. */
- Below is a template for these functions. */
#if 0
static void
-decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_XXXX (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- ...
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code, or
+ when there's no room in CHARBUF for a decoded character. */
+ const unsigned char *src_base;
+ /* A buffer to produce decoded characters. */
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
+ int multibytep = coding->src_multibyte;
+
+ while (1)
+ {
+ src_base = src;
+ if (charbuf < charbuf_end)
+ /* No more room to produce a decoded character. */
+ break;
+ ONE_MORE_BYTE (c);
+ /* Decode it. */
+ }
+
+ no_more_source:
+ if (src_base < src_end
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ /* If the source ends by partial bytes to construct a character,
+ treat them as eight-bit raw data. */
+ while (src_base < src_end && charbuf < charbuf_end)
+ *charbuf++ = *src_base++;
+ /* Remember how many bytes and characters we consumed. If the
+ source is multibyte, the bytes and chars are not identical. */
+ coding->consumed = coding->consumed_char = src_base - coding->source;
+ /* Remember how many characters we produced. */
+ coding->charbuf_used = charbuf - coding->charbuf;
}
#endif
/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
- These functions encode SRC_BYTES length text at SOURCE from Emacs'
- internal multibyte format to CODING. The resulting unibyte text
+ These functions encode SRC_BYTES length text at SOURCE of Emacs'
+ internal multibyte format by CODING. The resulting byte sequence
goes to a place pointed to by DESTINATION, the length of which
should not exceed DST_BYTES.
- These functions set the information about original and encoded texts
- in the members `produced', `produced_char', `consumed', and
- `consumed_char' of the structure *CODING. They also set the member
- `result' to one of CODING_FINISH_XXX indicating how the encoding
- finished.
+ These functions set the information of original and encoded texts in
+ the members produced, produced_char, consumed, and consumed_char of
+ the structure *CODING. They also set the member result to one of
+ CODING_RESULT_XXX indicating how the encoding finished.
- DST_BYTES zero means that the source area and destination area are
- overlapped, which means that we can produce encoded text until it
- reaches at the head of the not-yet-encoded source text.
+ DST_BYTES zero means that source area and destination area are
+ overlapped, which means that we can produce a encoded text until it
+ reaches at the head of not-yet-encoded source text.
- Below is a template for these functions. */
+ Below is a template of these functions. */
#if 0
static void
-encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
+encode_coding_XXX (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
{
- ...
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
+ int produced_chars = 0;
+
+ for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
+ {
+ int c = *charbuf;
+ /* Encode C into DST, and increment DST. */
+ }
+ label_no_more_destination:
+ /* How many chars and bytes we produced. */
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
}
#endif
-/*** COMMONLY USED MACROS ***/
-
-/* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
- get one, two, and three bytes from the source text respectively.
- If there are not enough bytes in the source, they jump to
- `label_end_of_loop'. The caller should set variables `coding',
- `src' and `src_end' to appropriate pointer in advance. These
- macros are called from decoding routines `decode_coding_XXX', thus
- it is assumed that the source text is unibyte. */
-
-#define ONE_MORE_BYTE(c1) \
- do { \
- if (src >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- } while (0)
-
-#define TWO_MORE_BYTES(c1, c2) \
- do { \
- if (src + 1 >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- c2 = *src++; \
- } while (0)
-
-
-/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
- form if MULTIBYTEP is nonzero. */
-
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
- do { \
- if (src >= src_end) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- c1 = *src++; \
- if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
- c1 = *src++ - 0x20; \
- } while (0)
-
-/* Set C to the next character at the source text pointed by `src'.
- If there are not enough characters in the source, jump to
- `label_end_of_loop'. The caller should set variables `coding'
- `src', `src_end', and `translation_table' to appropriate pointers
- in advance. This macro is used in encoding routines
- `encode_coding_XXX', thus it assumes that the source text is in
- multibyte form except for 8-bit characters. 8-bit characters are
- in multibyte form if coding->src_multibyte is nonzero, else they
- are represented by a single byte. */
-
-#define ONE_MORE_CHAR(c) \
- do { \
- int len = src_end - src; \
- int bytes; \
- if (len <= 0) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
- goto label_end_of_loop; \
- } \
- if (coding->src_multibyte \
- || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
- c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
- else \
- c = *src, bytes = 1; \
- if (!NILP (translation_table)) \
- c = translate_char (translation_table, c, -1, 0, 0); \
- src += bytes; \
- } while (0)
-
-
-/* Produce a multibyte form of character C to `dst'. Jump to
- `label_end_of_loop' if there's not enough space at `dst'.
-
- If we are now in the middle of a composition sequence, the decoded
- character may be ALTCHAR (for the current composition). In that
- case, the character goes to coding->cmp_data->data instead of
- `dst'.
-
- This macro is used in decoding routines. */
-
-#define EMIT_CHAR(c) \
- do { \
- if (! COMPOSING_P (coding) \
- || coding->composing == COMPOSITION_RELATIVE \
- || coding->composing == COMPOSITION_WITH_RULE) \
- { \
- int bytes = CHAR_BYTES (c); \
- if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- dst += CHAR_STRING (c, dst); \
- coding->produced_char++; \
- } \
- \
- if (COMPOSING_P (coding) \
- && coding->composing != COMPOSITION_RELATIVE) \
- { \
- CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
- coding->composition_rule_follows \
- = coding->composing != COMPOSITION_WITH_ALTCHARS; \
- } \
- } while (0)
-
-
-#define EMIT_ONE_BYTE(c) \
- do { \
- if (dst >= (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- *dst++ = c; \
- } while (0)
-
-#define EMIT_TWO_BYTES(c1, c2) \
- do { \
- if (dst + 2 > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- *dst++ = c1, *dst++ = c2; \
- } while (0)
-
-#define EMIT_BYTES(from, to) \
- do { \
- if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- while (from < to) \
- *dst++ = *from++; \
- } while (0)
-
/*** 1. Preamble ***/
-#ifdef emacs
#include <config.h>
-#endif
-
#include <stdio.h>
-#ifdef emacs
-
#include "lisp.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
-#include "composite.h"
#include "ccl.h"
+#include "composite.h"
#include "coding.h"
#include "window.h"
-#include "intervals.h"
-
-#else /* not emacs */
-
-#include "mulelib.h"
-#endif /* not emacs */
+Lisp_Object Vcoding_system_hash_table;
-Lisp_Object Qcoding_system, Qeol_type;
+Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
+Lisp_Object Qunix, Qdos;
+extern Lisp_Object Qmac; /* frame.c */
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
+Lisp_Object Qdefault_char;
Lisp_Object Qno_conversion, Qundecided;
+Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
+Lisp_Object Qbig, Qlittle;
Lisp_Object Qcoding_system_history;
-Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
-Lisp_Object Qascii_incompatible;
+Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
+Lisp_Object QCdecode_translation_table, QCencode_translation_table;
+Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
+Lisp_Object QCascii_compatible_p;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
+Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
+Lisp_Object Qinterrupted, Qinsufficient_memory;
+
/* If a symbol has this property, evaluate the value to define the
symbol as a coding system. */
-Lisp_Object Qcoding_system_define_form;
-
-Lisp_Object Vselect_safe_coding_system_function;
+static Lisp_Object Qcoding_system_define_form;
int coding_system_require_warning;
+Lisp_Object Vselect_safe_coding_system_function;
+
/* Mnemonic string for each format of end-of-line. */
Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate format of end-of-line is not yet
decided. */
Lisp_Object eol_mnemonic_undecided;
-/* Format of end-of-line decided by system. This is CODING_EOL_LF on
- Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.
+/* Format of end-of-line decided by system. This is Qunix on
+ Unix and Mac, Qdos on DOS/Windows.
This has an effect only for external encoding (i.e. for output to
file and process), not for in-buffer or Lisp string encoding. */
-int system_eol_type;
+static Lisp_Object system_eol_type;
#ifdef emacs
-/* Information about which coding system is safe for which chars.
- The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
-
- GENERIC-LIST is a list of generic coding systems which can encode
- any characters.
-
- NON-GENERIC-ALIST is an alist of non generic coding systems vs the
- corresponding char table that contains safe chars. */
-Lisp_Object Vcoding_system_safe_chars;
-
Lisp_Object Vcoding_system_list, Vcoding_system_alist;
Lisp_Object Qcoding_system_p, Qcoding_system_error;
@@ -409,8 +355,7 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
/* Coding system emacs-mule and raw-text are for converting only
end-of-line format. */
Lisp_Object Qemacs_mule, Qraw_text;
-
-Lisp_Object Qutf_8;
+Lisp_Object Qutf_8_emacs;
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
@@ -420,7 +365,8 @@ Lisp_Object Vcoding_system_for_read;
Lisp_Object Vcoding_system_for_write;
/* Coding-system actually used in the latest I/O. */
Lisp_Object Vlast_coding_system_used;
-
+/* Set to non-nil when an error is detected while code conversion. */
+Lisp_Object Vlast_code_conversion_error;
/* A vector of length 256 which contains information about special
Latin codes (especially for dealing with Microsoft codes). */
Lisp_Object Vlatin_extra_code_table;
@@ -444,9 +390,6 @@ struct coding_system safe_terminal_coding;
/* Coding system of what is sent from terminal keyboard. */
struct coding_system keyboard_coding;
-/* Default coding system to be used to write a file. */
-struct coding_system default_buffer_file_coding;
-
Lisp_Object Vfile_coding_system_alist;
Lisp_Object Vprocess_coding_system_alist;
Lisp_Object Vnetwork_coding_system_alist;
@@ -455,42 +398,6 @@ Lisp_Object Vlocale_coding_system;
#endif /* emacs */
-Lisp_Object Qcoding_category, Qcoding_category_index;
-
-/* List of symbols `coding-category-xxx' ordered by priority. */
-Lisp_Object Vcoding_category_list;
-
-/* Table of coding categories (Lisp symbols). */
-Lisp_Object Vcoding_category_table;
-
-/* Table of names of symbol for each coding-category. */
-char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
- "coding-category-emacs-mule",
- "coding-category-sjis",
- "coding-category-iso-7",
- "coding-category-iso-7-tight",
- "coding-category-iso-8-1",
- "coding-category-iso-8-2",
- "coding-category-iso-7-else",
- "coding-category-iso-8-else",
- "coding-category-ccl",
- "coding-category-big5",
- "coding-category-utf-8",
- "coding-category-utf-16-be",
- "coding-category-utf-16-le",
- "coding-category-raw-text",
- "coding-category-binary"
-};
-
-/* Table of pointers to coding systems corresponding to each coding
- categories. */
-struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
-
-/* Table of coding category masks. Nth element is a mask for a coding
- category of which priority is Nth. */
-static
-int coding_priorities[CODING_CATEGORY_IDX_MAX];
-
/* Flag to tell if we look up translation table on character code
conversion. */
Lisp_Object Venable_character_translation;
@@ -505,7 +412,7 @@ Lisp_Object Qtranslation_table_for_decode;
Lisp_Object Qtranslation_table_for_encode;
/* Alist of charsets vs revision number. */
-Lisp_Object Vcharset_revision_alist;
+static Lisp_Object Vcharset_revision_table;
/* Default coding systems used for process I/O. */
Lisp_Object Vdefault_process_coding_system;
@@ -513,34 +420,1198 @@ Lisp_Object Vdefault_process_coding_system;
/* Char table for translating Quail and self-inserting input. */
Lisp_Object Vtranslation_table_for_input;
-/* Global flag to tell that we can't call post-read-conversion and
- pre-write-conversion functions. Usually the value is zero, but it
- is set to 1 temporarily while such functions are running. This is
- to avoid infinite recursive call. */
-static int inhibit_pre_post_conversion;
+/* Two special coding systems. */
+Lisp_Object Vsjis_coding_system;
+Lisp_Object Vbig5_coding_system;
+
+/* ISO2022 section */
+
+#define CODING_ISO_INITIAL(coding, reg) \
+ (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
+ coding_attr_iso_initial), \
+ reg)))
+
+
+#define CODING_ISO_REQUEST(coding, charset_id) \
+ ((charset_id <= (coding)->max_charset_id \
+ ? (coding)->safe_charsets[charset_id] \
+ : -1))
+
+
+#define CODING_ISO_FLAGS(coding) \
+ ((coding)->spec.iso_2022.flags)
+#define CODING_ISO_DESIGNATION(coding, reg) \
+ ((coding)->spec.iso_2022.current_designation[reg])
+#define CODING_ISO_INVOCATION(coding, plane) \
+ ((coding)->spec.iso_2022.current_invocation[plane])
+#define CODING_ISO_SINGLE_SHIFTING(coding) \
+ ((coding)->spec.iso_2022.single_shifting)
+#define CODING_ISO_BOL(coding) \
+ ((coding)->spec.iso_2022.bol)
+#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
+ CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
+
+/* Control characters of ISO2022. */
+ /* code */ /* function */
+#define ISO_CODE_LF 0x0A /* line-feed */
+#define ISO_CODE_CR 0x0D /* carriage-return */
+#define ISO_CODE_SO 0x0E /* shift-out */
+#define ISO_CODE_SI 0x0F /* shift-in */
+#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
+#define ISO_CODE_ESC 0x1B /* escape */
+#define ISO_CODE_SS2 0x8E /* single-shift-2 */
+#define ISO_CODE_SS3 0x8F /* single-shift-3 */
+#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
+
+/* All code (1-byte) of ISO2022 is classified into one of the
+ followings. */
+enum iso_code_class_type
+ {
+ ISO_control_0, /* Control codes in the range
+ 0x00..0x1F and 0x7F, except for the
+ following 5 codes. */
+ ISO_shift_out, /* ISO_CODE_SO (0x0E) */
+ ISO_shift_in, /* ISO_CODE_SI (0x0F) */
+ ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
+ ISO_escape, /* ISO_CODE_SO (0x1B) */
+ ISO_control_1, /* Control codes in the range
+ 0x80..0x9F, except for the
+ following 3 codes. */
+ ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
+ ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
+ ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
+ ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
+ ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
+ ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
+ ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
+ };
-Lisp_Object Qchar_coding_system;
+/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
+ `iso-flags' attribute of an iso2022 coding system. */
-/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
- its validity. */
+/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
+ instead of the correct short-form sequence (e.g. ESC $ A). */
+#define CODING_ISO_FLAG_LONG_FORM 0x0001
-Lisp_Object
-coding_safe_chars (coding_system)
- Lisp_Object coding_system;
+/* If set, reset graphic planes and registers at end-of-line to the
+ initial state. */
+#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
+
+/* If set, reset graphic planes and registers before any control
+ characters to the initial state. */
+#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
+
+/* If set, encode by 7-bit environment. */
+#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
+
+/* If set, use locking-shift function. */
+#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
+
+/* If set, use single-shift function. Overwrite
+ CODING_ISO_FLAG_LOCKING_SHIFT. */
+#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
+
+/* If set, use designation escape sequence. */
+#define CODING_ISO_FLAG_DESIGNATION 0x0040
+
+/* If set, produce revision number sequence. */
+#define CODING_ISO_FLAG_REVISION 0x0080
+
+/* If set, produce ISO6429's direction specifying sequence. */
+#define CODING_ISO_FLAG_DIRECTION 0x0100
+
+/* If set, assume designation states are reset at beginning of line on
+ output. */
+#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
+
+/* If set, designation sequence should be placed at beginning of line
+ on output. */
+#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
+
+/* If set, do not encode unsafe charactes on output. */
+#define CODING_ISO_FLAG_SAFE 0x0800
+
+/* If set, extra latin codes (128..159) are accepted as a valid code
+ on input. */
+#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
+
+#define CODING_ISO_FLAG_COMPOSITION 0x2000
+
+#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
+
+#define CODING_ISO_FLAG_USE_ROMAN 0x8000
+
+#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
+
+#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
+
+/* A character to be produced on output if encoding of the original
+ character is prohibited by CODING_ISO_FLAG_SAFE. */
+#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
+
+
+/* UTF-16 section */
+#define CODING_UTF_16_BOM(coding) \
+ ((coding)->spec.utf_16.bom)
+
+#define CODING_UTF_16_ENDIAN(coding) \
+ ((coding)->spec.utf_16.endian)
+
+#define CODING_UTF_16_SURROGATE(coding) \
+ ((coding)->spec.utf_16.surrogate)
+
+
+/* CCL section */
+#define CODING_CCL_DECODER(coding) \
+ AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
+#define CODING_CCL_ENCODER(coding) \
+ AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
+#define CODING_CCL_VALIDS(coding) \
+ (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
+
+/* Index for each coding category in `coding_categories' */
+
+enum coding_category
+ {
+ coding_category_iso_7,
+ coding_category_iso_7_tight,
+ coding_category_iso_8_1,
+ coding_category_iso_8_2,
+ coding_category_iso_7_else,
+ coding_category_iso_8_else,
+ coding_category_utf_8,
+ coding_category_utf_16_auto,
+ coding_category_utf_16_be,
+ coding_category_utf_16_le,
+ coding_category_utf_16_be_nosig,
+ coding_category_utf_16_le_nosig,
+ coding_category_charset,
+ coding_category_sjis,
+ coding_category_big5,
+ coding_category_ccl,
+ coding_category_emacs_mule,
+ /* All above are targets of code detection. */
+ coding_category_raw_text,
+ coding_category_undecided,
+ coding_category_max
+ };
+
+/* Definitions of flag bits used in detect_coding_XXXX. */
+#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
+#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
+#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
+#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
+#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
+#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
+#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
+#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
+#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
+#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
+#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
+#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
+#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
+#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
+#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
+#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
+#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
+#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
+
+/* This value is returned if detect_coding_mask () find nothing other
+ than ASCII characters. */
+#define CATEGORY_MASK_ANY \
+ (CATEGORY_MASK_ISO_7 \
+ | CATEGORY_MASK_ISO_7_TIGHT \
+ | CATEGORY_MASK_ISO_8_1 \
+ | CATEGORY_MASK_ISO_8_2 \
+ | CATEGORY_MASK_ISO_7_ELSE \
+ | CATEGORY_MASK_ISO_8_ELSE \
+ | CATEGORY_MASK_UTF_8 \
+ | CATEGORY_MASK_UTF_16_BE \
+ | CATEGORY_MASK_UTF_16_LE \
+ | CATEGORY_MASK_UTF_16_BE_NOSIG \
+ | CATEGORY_MASK_UTF_16_LE_NOSIG \
+ | CATEGORY_MASK_CHARSET \
+ | CATEGORY_MASK_SJIS \
+ | CATEGORY_MASK_BIG5 \
+ | CATEGORY_MASK_CCL \
+ | CATEGORY_MASK_EMACS_MULE)
+
+
+#define CATEGORY_MASK_ISO_7BIT \
+ (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
+
+#define CATEGORY_MASK_ISO_8BIT \
+ (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
+
+#define CATEGORY_MASK_ISO_ELSE \
+ (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
+
+#define CATEGORY_MASK_ISO_ESCAPE \
+ (CATEGORY_MASK_ISO_7 \
+ | CATEGORY_MASK_ISO_7_TIGHT \
+ | CATEGORY_MASK_ISO_7_ELSE \
+ | CATEGORY_MASK_ISO_8_ELSE)
+
+#define CATEGORY_MASK_ISO \
+ ( CATEGORY_MASK_ISO_7BIT \
+ | CATEGORY_MASK_ISO_8BIT \
+ | CATEGORY_MASK_ISO_ELSE)
+
+#define CATEGORY_MASK_UTF_16 \
+ (CATEGORY_MASK_UTF_16_BE \
+ | CATEGORY_MASK_UTF_16_LE \
+ | CATEGORY_MASK_UTF_16_BE_NOSIG \
+ | CATEGORY_MASK_UTF_16_LE_NOSIG)
+
+
+/* List of symbols `coding-category-xxx' ordered by priority. This
+ variable is exposed to Emacs Lisp. */
+static Lisp_Object Vcoding_category_list;
+
+/* Table of coding categories (Lisp symbols). This variable is for
+ internal use oly. */
+static Lisp_Object Vcoding_category_table;
+
+/* Table of coding-categories ordered by priority. */
+static enum coding_category coding_priorities[coding_category_max];
+
+/* Nth element is a coding context for the coding system bound to the
+ Nth coding category. */
+static struct coding_system coding_categories[coding_category_max];
+
+/*** Commonly used macros and functions ***/
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define CODING_GET_INFO(coding, attrs, charset_list) \
+ do { \
+ (attrs) = CODING_ID_ATTRS ((coding)->id); \
+ (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
+ } while (0)
+
+
+/* Safely get one byte from the source text pointed by SRC which ends
+ at SRC_END, and set C to that byte. If there are not enough bytes
+ in the source, it jumps to `no_more_source'. If multibytep is
+ nonzero, and a multibyte character is found at SRC, set C to the
+ negative value of the character code. The caller should declare
+ and set these variables appropriately in advance:
+ src, src_end, multibytep */
+
+#define ONE_MORE_BYTE(c) \
+ do { \
+ if (src == src_end) \
+ { \
+ if (src_base < src) \
+ record_conversion_result \
+ (coding, CODING_RESULT_INSUFFICIENT_SRC); \
+ goto no_more_source; \
+ } \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) == 0xC0) \
+ c = ((c & 1) << 6) | *src++; \
+ else \
+ { \
+ src--; \
+ c = - string_char (src, &src, NULL); \
+ record_conversion_result \
+ (coding, CODING_RESULT_INVALID_SRC); \
+ } \
+ } \
+ consumed_chars++; \
+ } while (0)
+
+
+#define ONE_MORE_BYTE_NO_CHECK(c) \
+ do { \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) == 0xC0) \
+ c = ((c & 1) << 6) | *src++; \
+ else \
+ { \
+ src--; \
+ c = - string_char (src, &src, NULL); \
+ record_conversion_result \
+ (coding, CODING_RESULT_INVALID_SRC); \
+ } \
+ } \
+ consumed_chars++; \
+ } while (0)
+
+
+/* Store a byte C in the place pointed by DST and increment DST to the
+ next free point, and increment PRODUCED_CHARS. The caller should
+ assure that C is 0..127, and declare and set the variable `dst'
+ appropriately in advance.
+*/
+
+
+#define EMIT_ONE_ASCII_BYTE(c) \
+ do { \
+ produced_chars++; \
+ *dst++ = (c); \
+ } while (0)
+
+
+/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
+
+#define EMIT_TWO_ASCII_BYTES(c1, c2) \
+ do { \
+ produced_chars += 2; \
+ *dst++ = (c1), *dst++ = (c2); \
+ } while (0)
+
+
+/* Store a byte C in the place pointed by DST and increment DST to the
+ next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
+ nonzero, store in an appropriate multibyte from. The caller should
+ declare and set the variables `dst' and `multibytep' appropriately
+ in advance. */
+
+#define EMIT_ONE_BYTE(c) \
+ do { \
+ produced_chars++; \
+ if (multibytep) \
+ { \
+ int ch = (c); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ } \
+ else \
+ *dst++ = (c); \
+ } while (0)
+
+
+/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
+
+#define EMIT_TWO_BYTES(c1, c2) \
+ do { \
+ produced_chars += 2; \
+ if (multibytep) \
+ { \
+ int ch; \
+ \
+ ch = (c1); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ ch = (c2); \
+ if (ch >= 0x80) \
+ ch = BYTE8_TO_CHAR (ch); \
+ CHAR_STRING_ADVANCE (ch, dst); \
+ } \
+ else \
+ { \
+ *dst++ = (c1); \
+ *dst++ = (c2); \
+ } \
+ } while (0)
+
+
+#define EMIT_THREE_BYTES(c1, c2, c3) \
+ do { \
+ EMIT_ONE_BYTE (c1); \
+ EMIT_TWO_BYTES (c2, c3); \
+ } while (0)
+
+
+#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
+ do { \
+ EMIT_TWO_BYTES (c1, c2); \
+ EMIT_TWO_BYTES (c3, c4); \
+ } while (0)
+
+
+/* Prototypes for static functions. */
+static void record_conversion_result P_ ((struct coding_system *coding,
+ enum coding_result_code result));
+static int detect_coding_utf_8 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_8 P_ ((struct coding_system *));
+static int encode_coding_utf_8 P_ ((struct coding_system *));
+
+static int detect_coding_utf_16 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_16 P_ ((struct coding_system *));
+static int encode_coding_utf_16 P_ ((struct coding_system *));
+
+static int detect_coding_iso_2022 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_iso_2022 P_ ((struct coding_system *));
+static int encode_coding_iso_2022 P_ ((struct coding_system *));
+
+static int detect_coding_emacs_mule P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_emacs_mule P_ ((struct coding_system *));
+static int encode_coding_emacs_mule P_ ((struct coding_system *));
+
+static int detect_coding_sjis P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_sjis P_ ((struct coding_system *));
+static int encode_coding_sjis P_ ((struct coding_system *));
+
+static int detect_coding_big5 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_big5 P_ ((struct coding_system *));
+static int encode_coding_big5 P_ ((struct coding_system *));
+
+static int detect_coding_ccl P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_ccl P_ ((struct coding_system *));
+static int encode_coding_ccl P_ ((struct coding_system *));
+
+static void decode_coding_raw_text P_ ((struct coding_system *));
+static int encode_coding_raw_text P_ ((struct coding_system *));
+
+static void coding_set_source P_ ((struct coding_system *));
+static void coding_set_destination P_ ((struct coding_system *));
+static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
+static void coding_alloc_by_making_gap P_ ((struct coding_system *,
+ EMACS_INT));
+static unsigned char *alloc_destination P_ ((struct coding_system *,
+ EMACS_INT, unsigned char *));
+static void setup_iso_safe_charsets P_ ((Lisp_Object));
+static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
+ int *, int *,
+ unsigned char *));
+static int detect_eol P_ ((const unsigned char *,
+ EMACS_INT, enum coding_category));
+static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
+static void decode_eol P_ ((struct coding_system *));
+static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
+static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
+ int, int *, int *));
+static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
+static INLINE void produce_composition P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static INLINE void produce_charset P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
+static int decode_coding P_ ((struct coding_system *));
+static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
+static int encode_coding P_ ((struct coding_system *));
+static Lisp_Object make_conversion_work_buffer P_ ((int));
+static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
+static INLINE int char_encodable_p P_ ((int, Lisp_Object));
+static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
+
+static void
+record_conversion_result (struct coding_system *coding,
+ enum coding_result_code result)
+{
+ coding->result = result;
+ switch (result)
+ {
+ case CODING_RESULT_INSUFFICIENT_SRC:
+ Vlast_code_conversion_error = Qinsufficient_source;
+ break;
+ case CODING_RESULT_INCONSISTENT_EOL:
+ Vlast_code_conversion_error = Qinconsistent_eol;
+ break;
+ case CODING_RESULT_INVALID_SRC:
+ Vlast_code_conversion_error = Qinvalid_source;
+ break;
+ case CODING_RESULT_INTERRUPT:
+ Vlast_code_conversion_error = Qinterrupted;
+ break;
+ case CODING_RESULT_INSUFFICIENT_MEM:
+ Vlast_code_conversion_error = Qinsufficient_memory;
+ break;
+ default:
+ Vlast_code_conversion_error = intern ("Unknown error");
+ }
+}
+
+#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
+ do { \
+ charset_map_loaded = 0; \
+ c = DECODE_CHAR (charset, code); \
+ if (charset_map_loaded) \
+ { \
+ const unsigned char *orig = coding->source; \
+ EMACS_INT offset; \
+ \
+ coding_set_source (coding); \
+ offset = coding->source - orig; \
+ src += offset; \
+ src_base += offset; \
+ src_end += offset; \
+ } \
+ } while (0)
+
+
+#define ASSURE_DESTINATION(bytes) \
+ do { \
+ if (dst + (bytes) >= dst_end) \
+ { \
+ int more_bytes = charbuf_end - charbuf + (bytes); \
+ \
+ dst = alloc_destination (coding, more_bytes, dst); \
+ dst_end = coding->destination + coding->dst_bytes; \
+ } \
+ } while (0)
+
+
+
+static void
+coding_set_source (coding)
+ struct coding_system *coding;
+{
+ if (BUFFERP (coding->src_object))
+ {
+ struct buffer *buf = XBUFFER (coding->src_object);
+
+ if (coding->src_pos < 0)
+ coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
+ else
+ coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
+ }
+ else if (STRINGP (coding->src_object))
+ {
+ coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
+ }
+ else
+ /* Otherwise, the source is C string and is never relocated
+ automatically. Thus we don't have to update anything. */
+ ;
+}
+
+static void
+coding_set_destination (coding)
+ struct coding_system *coding;
+{
+ if (BUFFERP (coding->dst_object))
+ {
+ if (coding->src_pos < 0)
+ {
+ coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
+ coding->dst_bytes = (GAP_END_ADDR
+ - (coding->src_bytes - coding->consumed)
+ - coding->destination);
+ }
+ else
+ {
+ /* We are sure that coding->dst_pos_byte is before the gap
+ of the buffer. */
+ coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
+ + coding->dst_pos_byte - 1);
+ coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
+ - coding->destination);
+ }
+ }
+ else
+ /* Otherwise, the destination is C string and is never relocated
+ automatically. Thus we don't have to update anything. */
+ ;
+}
+
+
+static void
+coding_alloc_by_realloc (coding, bytes)
+ struct coding_system *coding;
+ EMACS_INT bytes;
+{
+ coding->destination = (unsigned char *) xrealloc (coding->destination,
+ coding->dst_bytes + bytes);
+ coding->dst_bytes += bytes;
+}
+
+static void
+coding_alloc_by_making_gap (coding, bytes)
+ struct coding_system *coding;
+ EMACS_INT bytes;
+{
+ if (BUFFERP (coding->dst_object)
+ && EQ (coding->src_object, coding->dst_object))
+ {
+ EMACS_INT add = coding->src_bytes - coding->consumed;
+
+ GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+ make_gap (bytes);
+ GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
+ }
+ else
+ {
+ Lisp_Object this_buffer;
+
+ this_buffer = Fcurrent_buffer ();
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ make_gap (bytes);
+ set_buffer_internal (XBUFFER (this_buffer));
+ }
+}
+
+
+static unsigned char *
+alloc_destination (coding, nbytes, dst)
+ struct coding_system *coding;
+ EMACS_INT nbytes;
+ unsigned char *dst;
+{
+ EMACS_INT offset = dst - coding->destination;
+
+ if (BUFFERP (coding->dst_object))
+ coding_alloc_by_making_gap (coding, nbytes);
+ else
+ coding_alloc_by_realloc (coding, nbytes);
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding_set_destination (coding);
+ dst = coding->destination + offset;
+ return dst;
+}
+
+/** Macros for annotations. */
+
+/* Maximum length of annotation data (sum of annotations for
+ composition and charset). */
+#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
+
+/* An annotation data is stored in the array coding->charbuf in this
+ format:
+ [ -LENGTH ANNOTATION_MASK NCHARS ... ]
+ LENGTH is the number of elements in the annotation.
+ ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
+ NCHARS is the number of characters in the text annotated.
+
+ The format of the following elements depend on ANNOTATION_MASK.
+
+ In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
+ follows:
+ ... METHOD [ COMPOSITION-COMPONENTS ... ]
+ METHOD is one of enum composition_method.
+ Optionnal COMPOSITION-COMPONENTS are characters and composition
+ rules.
+
+ In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
+ follows. */
+
+#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
+ do { \
+ *(buf)++ = -(len); \
+ *(buf)++ = (mask); \
+ *(buf)++ = (nchars); \
+ coding->annotated = 1; \
+ } while (0);
+
+#define ADD_COMPOSITION_DATA(buf, nchars, method) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
+ *buf++ = method; \
+ } while (0)
+
+
+#define ADD_CHARSET_DATA(buf, nchars, id) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
+ *buf++ = id; \
+ } while (0)
+
+
+/*** 2. Emacs' internal format (emacs-utf-8) ***/
+
+
+
+
+/*** 3. UTF-8 ***/
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in UTF-8. If it is, return 1, else
+ return 0. */
+
+#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
+#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
+#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
+#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
+#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
+#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
+
+static int
+detect_coding_utf_8 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_UTF_8;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
+
+ while (1)
+ {
+ int c, c1, c2, c3, c4;
+
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c < 0 || UTF_8_1_OCTET_P (c))
+ continue;
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
+ break;
+ if (UTF_8_2_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
+ break;
+ if (UTF_8_3_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c3);
+ if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
+ break;
+ if (UTF_8_4_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ ONE_MORE_BYTE (c4);
+ if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
+ break;
+ if (UTF_8_5_OCTET_LEADING_P (c))
+ {
+ found = CATEGORY_MASK_UTF_8;
+ continue;
+ }
+ break;
+ }
+ detect_info->rejected |= CATEGORY_MASK_UTF_8;
+ return 0;
+
+ no_more_source:
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_8;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
+}
+
+
+static void
+decode_coding_utf_8 (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attr, charset_list;
+
+ CODING_GET_INFO (coding, attr, charset_list);
+
+ while (1)
+ {
+ int c, c1, c2, c3, c4, c5;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0)
+ {
+ c = - c1;
+ }
+ else if (UTF_8_1_OCTET_P(c1))
+ {
+ c = c1;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
+ goto invalid_code;
+ if (UTF_8_2_OCTET_LEADING_P (c1))
+ {
+ c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
+ /* Reject overlong sequences here and below. Encoders
+ producing them are incorrect, they can be misleading,
+ and they mess up read/write invariance. */
+ if (c < 128)
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c3);
+ if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
+ goto invalid_code;
+ if (UTF_8_3_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0xF) << 12)
+ | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
+ if (c < 0x800
+ || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c4);
+ if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
+ goto invalid_code;
+ if (UTF_8_4_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
+ | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
+ if (c < 0x10000)
+ goto invalid_code;
+ }
+ else
+ {
+ ONE_MORE_BYTE (c5);
+ if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
+ goto invalid_code;
+ if (UTF_8_5_OCTET_LEADING_P (c1))
+ {
+ c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
+ | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
+ | (c5 & 0x3F));
+ if ((c > MAX_CHAR) || (c < 0x200000))
+ goto invalid_code;
+ }
+ else
+ goto invalid_code;
+ }
+ }
+ }
+ }
+
+ *charbuf++ = c;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ coding->errors++;
+ }
+
+ no_more_source:
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+
+static int
+encode_coding_utf_8 (coding)
+ struct coding_system *coding;
{
- Lisp_Object coding_spec, plist, safe_chars;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced_chars = 0;
+ int c;
- coding_spec = Fget (coding_system, Qcoding_system);
- plist = XVECTOR (coding_spec)->contents[3];
- safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
- return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
+ if (multibytep)
+ {
+ int safe_room = MAX_MULTIBYTE_LENGTH * 2;
+
+ while (charbuf < charbuf_end)
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
+
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
+ {
+ CHAR_STRING_ADVANCE (c, pend);
+ for (p = str; p < pend; p++)
+ EMIT_ONE_BYTE (*p);
+ }
+ }
+ }
+ else
+ {
+ int safe_room = MAX_MULTIBYTE_LENGTH;
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (CHAR_BYTE8_P (c))
+ *dst++ = CHAR_TO_BYTE8 (c);
+ else
+ dst += CHAR_STRING (c, dst);
+ produced_chars++;
+ }
+ }
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-#define CODING_SAFE_CHAR_P(safe_chars, c) \
- (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in one of UTF-16 based coding systems.
+ If it is, return 1, else return 0. */
+
+#define UTF_16_HIGH_SURROGATE_P(val) \
+ (((val) & 0xFC00) == 0xD800)
+
+#define UTF_16_LOW_SURROGATE_P(val) \
+ (((val) & 0xFC00) == 0xDC00)
+
+#define UTF_16_INVALID_P(val) \
+ (((val) == 0xFFFE) \
+ || ((val) == 0xFFFF) \
+ || UTF_16_LOW_SURROGATE_P (val))
+
+
+static int
+detect_coding_utf_16 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int c1, c2;
+
+ detect_info->checked |= CATEGORY_MASK_UTF_16;
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && (coding->src_chars & 1))
+ {
+ detect_info->rejected |= CATEGORY_MASK_UTF_16;
+ return 0;
+ }
+
+ ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c2);
+ if ((c1 == 0xFF) && (c2 == 0xFE))
+ {
+ detect_info->found |= (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_AUTO);
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG
+ | CATEGORY_MASK_UTF_16_LE_NOSIG);
+ }
+ else if ((c1 == 0xFE) && (c2 == 0xFF))
+ {
+ detect_info->found |= (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_AUTO);
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG
+ | CATEGORY_MASK_UTF_16_LE_NOSIG);
+ }
+ else if (c1 >= 0 && c2 >= 0)
+ {
+ detect_info->rejected
+ |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+ }
+ no_more_source:
+ return 1;
+}
+
+static void
+decode_coding_utf_16 (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+ enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
+ int surrogate = CODING_UTF_16_SURROGATE (coding);
+ Lisp_Object attr, charset_list;
+
+ CODING_GET_INFO (coding, attr, charset_list);
+
+ if (bom == utf_16_with_bom)
+ {
+ int c, c1, c2;
+
+ src_base = src;
+ ONE_MORE_BYTE (c1);
+ ONE_MORE_BYTE (c2);
+ c = (c1 << 8) | c2;
+
+ if (endian == utf_16_big_endian
+ ? c != 0xFEFF : c != 0xFFFE)
+ {
+ /* The first two bytes are not BOM. Treat them as bytes
+ for a normal character. */
+ src = src_base;
+ coding->errors++;
+ }
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+ else if (bom == utf_16_detect_bom)
+ {
+ /* We have already tried to detect BOM and failed in
+ detect_coding. */
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+
+ while (1)
+ {
+ int c, c1, c2;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf + 2 >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0)
+ {
+ *charbuf++ = -c1;
+ continue;
+ }
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0)
+ {
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = -c2;
+ continue;
+ }
+ c = (endian == utf_16_big_endian
+ ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
+ if (surrogate)
+ {
+ if (! UTF_16_LOW_SURROGATE_P (c))
+ {
+ if (endian == utf_16_big_endian)
+ c1 = surrogate >> 8, c2 = surrogate & 0xFF;
+ else
+ c1 = surrogate & 0xFF, c2 = surrogate >> 8;
+ *charbuf++ = c1;
+ *charbuf++ = c2;
+ coding->errors++;
+ if (UTF_16_HIGH_SURROGATE_P (c))
+ CODING_UTF_16_SURROGATE (coding) = surrogate = c;
+ else
+ *charbuf++ = c;
+ }
+ else
+ {
+ c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
+ CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
+ *charbuf++ = 0x10000 + c;
+ }
+ }
+ else
+ {
+ if (UTF_16_HIGH_SURROGATE_P (c))
+ CODING_UTF_16_SURROGATE (coding) = surrogate = c;
+ else
+ *charbuf++ = c;
+ }
+ }
+
+ no_more_source:
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+static int
+encode_coding_utf_16 (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 8;
+ enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+ int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
+ int produced_chars = 0;
+ Lisp_Object attrs, charset_list;
+ int c;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+
+ if (bom != utf_16_without_bom)
+ {
+ ASSURE_DESTINATION (safe_room);
+ if (big_endian)
+ EMIT_TWO_BYTES (0xFE, 0xFF);
+ else
+ EMIT_TWO_BYTES (0xFF, 0xFE);
+ CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+ }
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (c >= MAX_UNICODE_CHAR)
+ c = coding->default_char;
+
+ if (c < 0x10000)
+ {
+ if (big_endian)
+ EMIT_TWO_BYTES (c >> 8, c & 0xFF);
+ else
+ EMIT_TWO_BYTES (c & 0xFF, c >> 8);
+ }
+ else
+ {
+ int c1, c2;
+
+ c -= 0x10000;
+ c1 = (c >> 10) + 0xD800;
+ c2 = (c & 0x3FF) + 0xDC00;
+ if (big_endian)
+ EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
+ else
+ EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
+ }
+ }
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced = dst - coding->destination;
+ coding->produced_char += produced_chars;
+ return 0;
+}
-/*** 2. Emacs internal format (emacs-mule) handlers ***/
+/*** 6. Old Emacs' internal format (emacs-mule) ***/
/* Emacs' internal format for representation of multiple character
sets is a kind of multi-byte encoding, i.e. characters are
@@ -582,7 +1653,7 @@ coding_safe_chars (coding_system)
In that case, a sequence of one-byte codes has a slightly different
form.
- Firstly, all characters in eight-bit-control are represented by
+ At first, all characters in eight-bit-control are represented by
one-byte sequences which are their 8-bit code.
Next, character composition data are represented by the byte
@@ -591,12 +1662,12 @@ coding_safe_chars (coding_system)
METHOD is 0xF0 plus one of composition method (enum
composition_method),
- BYTES is 0xA0 plus the byte length of these composition data,
+ BYTES is 0xA0 plus a byte length of this composition data,
- CHARS is 0xA0 plus the number of characters composed by these
+ CHARS is 0x20 plus a number of characters composed by this
data,
- COMPONENTs are characters of multibyte form or composition
+ COMPONENTs are characters of multibye form or composition
rules encoded by two-byte of ASCII codes.
In addition, for backward compatibility, the following formats are
@@ -613,598 +1684,601 @@ coding_safe_chars (coding_system)
represents a composition rule.
*/
-enum emacs_code_class_type emacs_code_class[256];
-
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in Emacs' internal format. If it is,
- return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
+char emacs_mule_bytes[256];
-static int
-detect_coding_emacs_mule (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+int
+emacs_mule_char (coding, src, nbytes, nchars, id)
+ struct coding_system *coding;
+ const unsigned char *src;
+ int *nbytes, *nchars, *id;
{
- unsigned char c;
- int composing = 0;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base = src;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset;
+ unsigned code;
+ int c;
+ int consumed_chars = 0;
- while (1)
+ ONE_MORE_BYTE (c);
+ if (c < 0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-
- if (composing)
+ c = -c;
+ charset = emacs_mule_charset[0];
+ }
+ else
+ {
+ if (c >= 0xA0)
{
- if (c < 0xA0)
- composing = 0;
- else if (c == 0xA0)
+ /* Old style component character of a compostion. */
+ if (c == 0xA0)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- c &= 0x7F;
+ ONE_MORE_BYTE (c);
+ c -= 0x80;
}
else
c -= 0x20;
}
- if (c < 0x20)
- {
- if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
- return 0;
- }
- else if (c >= 0x80 && c < 0xA0)
+ switch (emacs_mule_bytes[c])
{
- if (c == 0x80)
- /* Old leading code for a composite character. */
- composing = 1;
+ case 2:
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = c & 0x7F;
+ break;
+
+ case 3:
+ if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = c & 0x7F;
+ }
else
{
- unsigned char *src_base = src - 1;
- int bytes;
-
- if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
- bytes))
- return 0;
- src = src_base + bytes;
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = (c & 0x7F) << 8;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code |= c & 0x7F;
}
+ break;
+
+ case 4:
+ ONE_MORE_BYTE (c);
+ if (c < 0 || ! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = (c & 0x7F) << 8;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code |= c & 0x7F;
+ break;
+
+ case 1:
+ code = c;
+ charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
+ ? charset_ascii : charset_eight_bit);
+ break;
+
+ default:
+ abort ();
}
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ goto invalid_code;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_EMACS_MULE;
+ *nbytes = src - src_base;
+ *nchars = consumed_chars;
+ if (id)
+ *id = charset->id;
+ return c;
+
+ no_more_source:
+ return -2;
+
+ invalid_code:
+ return -1;
}
-/* Record the starting position START and METHOD of one composition. */
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in `emacs-mule'. If it is, return 1,
+ else return 0. */
-#define CODING_ADD_COMPOSITION_START(coding, start, method) \
- do { \
- struct composition_data *cmp_data = coding->cmp_data; \
- int *data = cmp_data->data + cmp_data->used; \
- coding->cmp_data_start = cmp_data->used; \
- data[0] = -1; \
- data[1] = cmp_data->char_offset + start; \
- data[3] = (int) method; \
- cmp_data->used += 4; \
- } while (0)
+static int
+detect_coding_emacs_mule (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int c;
+ int found = 0;
-/* Record the ending position END of the current composition. */
+ detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
-#define CODING_ADD_COMPOSITION_END(coding, end) \
- do { \
- struct composition_data *cmp_data = coding->cmp_data; \
- int *data = cmp_data->data + coding->cmp_data_start; \
- data[0] = cmp_data->used - coding->cmp_data_start; \
- data[2] = cmp_data->char_offset + end; \
- } while (0)
+ while (1)
+ {
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ continue;
+ if (c == 0x80)
+ {
+ /* Perhaps the start of composite character. We simple skip
+ it because analyzing it is too heavy for detecting. But,
+ at least, we check that the composite character
+ constitues of more than 4 bytes. */
+ const unsigned char *src_base;
-/* Record one COMPONENT (alternate character or composition rule). */
+ repeat:
+ src_base = src;
+ do
+ {
+ ONE_MORE_BYTE (c);
+ }
+ while (c >= 0xA0);
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
- do { \
- coding->cmp_data->data[coding->cmp_data->used++] = component; \
- if (coding->cmp_data->used - coding->cmp_data_start \
- == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
- { \
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
- coding->composing = COMPOSITION_NO; \
- } \
- } while (0)
+ if (src - src_base <= 4)
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
+ if (c == 0x80)
+ goto repeat;
+ }
+
+ if (c < 0x80)
+ {
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
+ break;
+ }
+ else
+ {
+ int more_bytes = emacs_mule_bytes[*src_base] - 1;
+ while (more_bytes > 0)
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ {
+ src--; /* Unread the last byte. */
+ break;
+ }
+ more_bytes--;
+ }
+ if (more_bytes != 0)
+ break;
+ found = CATEGORY_MASK_EMACS_MULE;
+ }
+ }
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
-/* Get one byte from a data pointed by SRC and increment SRC. If SRC
- is not less than SRC_END, return -1 without incrementing Src. */
+ no_more_source:
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
+}
-#define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
/* Decode a character represented as a component of composition
- sequence of Emacs 20 style at SRC. Set C to that character, store
- its multibyte form sequence at P, and set P to the end of that
- sequence. If no valid character is found, set C to -1. */
-
-#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
- do { \
- int bytes; \
+ sequence of Emacs 20/21 style at SRC. Set C to that character and
+ update SRC to the head of next character (or an encoded composition
+ rule). If SRC doesn't points a composition component, set C to -1.
+ If SRC points an invalid byte sequence, global exit by a return
+ value 0. */
+
+#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
+ if (1) \
+ { \
+ int c; \
+ int nbytes, nchars; \
\
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0) \
- break; \
- if (CHAR_HEAD_P (c)) \
- c = -1; \
- else if (c == 0xA0) \
- { \
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0xA0) \
- c = -1; \
- else \
- { \
- c -= 0x80; \
- *p++ = c; \
- } \
- } \
- else if (BASE_LEADING_CODE_P (c - 0x20)) \
- { \
- unsigned char *p0 = p; \
- \
- c -= 0x20; \
- *p++ = c; \
- bytes = BYTES_BY_CHAR_HEAD (c); \
- while (--bytes) \
- { \
- c = SAFE_ONE_MORE_BYTE (); \
- if (c < 0) \
- break; \
- *p++ = c; \
- } \
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
- || (coding->flags /* We are recovering a file. */ \
- && p0[0] == LEADING_CODE_8_BIT_CONTROL \
- && ! CHAR_HEAD_P (p0[1]))) \
- c = STRING_CHAR (p0, bytes); \
- else \
- c = -1; \
- } \
- else \
- c = -1; \
- } while (0)
+ if (src == src_end) \
+ break; \
+ c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
+ if (c < 0) \
+ { \
+ if (c == -2) \
+ break; \
+ goto invalid_code; \
+ } \
+ *buf++ = c; \
+ src += nbytes; \
+ consumed_chars += nchars; \
+ } \
+ else
/* Decode a composition rule represented as a component of composition
- sequence of Emacs 20 style at SRC. Set C to the rule. If not
- valid rule is found, set C to -1. */
+ sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
-#define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
do { \
- c = SAFE_ONE_MORE_BYTE (); \
+ int c, gref, nref; \
+ \
+ if (src >= src_end) \
+ goto invalid_code; \
+ ONE_MORE_BYTE_NO_CHECK (c); \
c -= 0xA0; \
if (c < 0 || c >= 81) \
- c = -1; \
- else \
- { \
- gref = c / 9, nref = c % 9; \
- c = COMPOSITION_ENCODE_RULE (gref, nref); \
- } \
+ goto invalid_code; \
+ \
+ gref = c / 9, nref = c % 9; \
+ *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
} while (0)
-/* Decode composition sequence encoded by `emacs-mule' at the source
- pointed by SRC. SRC_END is the end of source. Store information
- of the composition in CODING->cmp_data.
-
- For backward compatibility, decode also a composition sequence of
- Emacs 20 style. In that case, the composition sequence contains
- characters that should be extracted into a buffer or string. Store
- those characters at *DESTINATION in multibyte form.
-
- If we encounter an invalid byte sequence, return 0.
- If we encounter an insufficient source or destination, or
- insufficient space in CODING->cmp_data, return 1.
- Otherwise, return consumed bytes in the source.
+/* Decode a composition rule represented as a component of composition
+ sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
+ and increment BUF. If SRC points an invalid byte sequence, set C
+ to -1. */
-*/
-static INLINE int
-decode_composition_emacs_mule (coding, src, src_end,
- destination, dst_end, dst_bytes)
- struct coding_system *coding;
- const unsigned char *src, *src_end;
- unsigned char **destination, *dst_end;
- int dst_bytes;
-{
- unsigned char *dst = *destination;
- int method, data_len, nchars;
- const unsigned char *src_base = src++;
- /* Store components of composition. */
- int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
- int ncomponent;
- /* Store multibyte form of characters to be composed. This is for
- Emacs 20 style composition sequence. */
- unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
- unsigned char *bufp = buf;
- int c, i, gref, nref;
+#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
+ do { \
+ int gref, nref; \
+ \
+ if (src + 1>= src_end) \
+ goto invalid_code; \
+ ONE_MORE_BYTE_NO_CHECK (gref); \
+ gref -= 0x20; \
+ ONE_MORE_BYTE_NO_CHECK (nref); \
+ nref -= 0x20; \
+ if (gref < 0 || gref >= 81 \
+ || nref < 0 || nref >= 81) \
+ goto invalid_code; \
+ *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
+ } while (0)
- if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
- >= COMPOSITION_DATA_SIZE)
- {
- coding->result = CODING_FINISH_INSUFFICIENT_CMP;
- return -1;
- }
- ONE_MORE_BYTE (c);
- if (c - 0xF0 >= COMPOSITION_RELATIVE
- && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
- {
- int with_rule;
+#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
+ do { \
+ /* Emacs 21 style format. The first three bytes at SRC are \
+ (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
+ the byte length of this composition information, CHARS is the \
+ number of characters composed by this composition. */ \
+ enum composition_method method = c - 0xF2; \
+ int *charbuf_base = charbuf; \
+ int consumed_chars_limit; \
+ int nbytes, nchars; \
+ \
+ ONE_MORE_BYTE (c); \
+ if (c < 0) \
+ goto invalid_code; \
+ nbytes = c - 0xA0; \
+ if (nbytes < 3) \
+ goto invalid_code; \
+ ONE_MORE_BYTE (c); \
+ if (c < 0) \
+ goto invalid_code; \
+ nchars = c - 0xA0; \
+ ADD_COMPOSITION_DATA (charbuf, nchars, method); \
+ consumed_chars_limit = consumed_chars_base + nbytes; \
+ if (method != COMPOSITION_RELATIVE) \
+ { \
+ int i = 0; \
+ while (consumed_chars < consumed_chars_limit) \
+ { \
+ if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
+ else \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
+ i++; \
+ } \
+ if (consumed_chars < consumed_chars_limit) \
+ goto invalid_code; \
+ charbuf_base[0] -= i; \
+ } \
+ } while (0)
- method = c - 0xF0;
- with_rule = (method == COMPOSITION_WITH_RULE
- || method == COMPOSITION_WITH_RULE_ALTCHARS);
- ONE_MORE_BYTE (c);
- data_len = c - 0xA0;
- if (data_len < 4
- || src_base + data_len > src_end)
- return 0;
- ONE_MORE_BYTE (c);
- nchars = c - 0xA0;
- if (c < 1)
- return 0;
- for (ncomponent = 0; src < src_base + data_len; ncomponent++)
- {
- /* If it is longer than this, it can't be valid. */
- if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
- return 0;
- if (ncomponent % 2 && with_rule)
- {
- ONE_MORE_BYTE (gref);
- gref -= 32;
- ONE_MORE_BYTE (nref);
- nref -= 32;
- c = COMPOSITION_ENCODE_RULE (gref, nref);
- }
- else
- {
- int bytes;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
- || (coding->flags /* We are recovering a file. */
- && src[0] == LEADING_CODE_8_BIT_CONTROL
- && ! CHAR_HEAD_P (src[1])))
- c = STRING_CHAR (src, bytes);
- else
- c = *src, bytes = 1;
- src += bytes;
- }
- component[ncomponent] = c;
- }
- }
- else if (c >= 0x80)
- {
- /* This may be an old Emacs 20 style format. See the comment at
- the section 2 of this file. */
- while (src < src_end && !CHAR_HEAD_P (*src)) src++;
- if (src == src_end
- && !(coding->mode & CODING_MODE_LAST_BLOCK))
- goto label_end_of_loop;
+#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
+ do { \
+ /* Emacs 20 style format for relative composition. */ \
+ /* Store multibyte form of characters to be composed. */ \
+ enum composition_method method = COMPOSITION_RELATIVE; \
+ int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
+ int *buf = components; \
+ int i, j; \
+ \
+ src = src_base; \
+ ONE_MORE_BYTE (c); /* skip 0x80 */ \
+ for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ if (i < 2) \
+ goto invalid_code; \
+ ADD_COMPOSITION_DATA (charbuf, i, method); \
+ for (j = 0; j < i; j++) \
+ *charbuf++ = components[j]; \
+ } while (0)
- src_end = src;
- src = src_base + 1;
- if (c < 0xC0)
- {
- method = COMPOSITION_RELATIVE;
- for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
- {
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- }
- if (ncomponent < 2)
- return 0;
- nchars = ncomponent;
- }
- else if (c == 0xFF)
- {
- method = COMPOSITION_WITH_RULE;
- src++;
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- return 0;
- component[0] = c;
- for (ncomponent = 1;
- ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
- {
- DECODE_EMACS_MULE_COMPOSITION_RULE (c);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
- if (c < 0)
- break;
- component[ncomponent++] = c;
- }
- if (ncomponent < 3)
- return 0;
- nchars = (ncomponent + 1) / 2;
- }
- else
- return 0;
- }
- else
- return 0;
- if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
- {
- CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
- for (i = 0; i < ncomponent; i++)
- CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
- if (buf < bufp)
- {
- unsigned char *p = buf;
- EMIT_BYTES (p, bufp);
- *destination += bufp - buf;
- coding->produced_char += nchars;
- }
- return (src - src_base);
- }
- label_end_of_loop:
- return -1;
-}
+#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
+ do { \
+ /* Emacs 20 style format for rule-base composition. */ \
+ /* Store multibyte form of characters to be composed. */ \
+ enum composition_method method = COMPOSITION_WITH_RULE; \
+ int *charbuf_base = charbuf; \
+ int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
+ int *buf = components; \
+ int i, j; \
+ \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
+ { \
+ if (*src < 0xA0) \
+ break; \
+ DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
+ DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
+ } \
+ if (i <= 1 || (buf - components) % 2 == 0) \
+ goto invalid_code; \
+ if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
+ goto no_more_source; \
+ ADD_COMPOSITION_DATA (charbuf, i, method); \
+ i = i * 2 - 1; \
+ for (j = 0; j < i; j++) \
+ *charbuf++ = components[j]; \
+ charbuf_base[0] -= i; \
+ for (j = 0; j < i; j += 2) \
+ *charbuf++ = components[j]; \
+ } while (0)
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
static void
-decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_emacs_mule (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- const unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code, or
- when there's not enough destination area to produce a
- character. */
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attrs, charset_list;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
- coding->produced_char = 0;
- while ((src_base = src) < src_end)
+ while (1)
{
- unsigned char tmp[MAX_MULTIBYTE_LENGTH];
- const unsigned char *p;
- int bytes;
+ int c;
- if (*src == '\r')
- {
- int c = *src++;
+ src_base = src;
+ consumed_chars_base = consumed_chars;
- if (coding->eol_type == CODING_EOL_CR)
- c = '\n';
- else if (coding->eol_type == CODING_EOL_CRLF)
- {
- ONE_MORE_BYTE (c);
- if (c != '\n')
- {
- src--;
- c = '\r';
- }
- }
- *dst++ = c;
- coding->produced_char++;
- continue;
- }
- else if (*src == '\n')
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c);
+ if (c < 0)
{
- if ((coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF)
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- *dst++ = *src++;
- coding->produced_char++;
- continue;
+ *charbuf++ = -c;
+ char_offset++;
}
- else if (*src == 0x80 && coding->cmp_data)
+ else if (c < 0x80)
{
- /* Start of composition data. */
- int consumed = decode_composition_emacs_mule (coding, src, src_end,
- &dst, dst_end,
- dst_bytes);
- if (consumed < 0)
- goto label_end_of_loop;
- else if (consumed > 0)
- {
- src += consumed;
- continue;
- }
- bytes = CHAR_STRING (*src, tmp);
- p = tmp;
- src++;
+ *charbuf++ = c;
+ char_offset++;
}
- else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
- || (coding->flags /* We are recovering a file. */
- && src[0] == LEADING_CODE_8_BIT_CONTROL
- && ! CHAR_HEAD_P (src[1])))
+ else if (c == 0x80)
{
- p = src;
- src += bytes;
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ goto invalid_code;
+ if (c - 0xF2 >= COMPOSITION_RELATIVE
+ && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
+ DECODE_EMACS_MULE_21_COMPOSITION (c);
+ else if (c < 0xC0)
+ DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
+ else if (c == 0xFF)
+ DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
+ else
+ goto invalid_code;
}
- else
+ else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
{
- int i, c;
+ int nbytes, nchars;
+ int id;
- bytes = BYTES_BY_CHAR_HEAD (*src);
- src++;
- for (i = 1; i < bytes; i++)
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
+ if (c < 0)
{
- ONE_MORE_BYTE (c);
- if (CHAR_HEAD_P (c))
+ if (c == -2)
break;
+ goto invalid_code;
}
- if (i < bytes)
- {
- bytes = CHAR_STRING (*src_base, tmp);
- p = tmp;
- src = src_base + 1;
- }
- else
+ if (last_id != id)
{
- p = src_base;
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = id;
+ last_offset = char_offset;
}
+ *charbuf++ = c;
+ src += nbytes;
+ consumed_chars += nchars;
+ char_offset++;
}
- if (dst + bytes >= (dst_bytes ? dst_end : src))
- {
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
- }
- while (bytes--) *dst++ = *p++;
- coding->produced_char++;
- }
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
-}
+ else
+ goto invalid_code;
+ continue;
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ }
-/* Encode composition data stored at DATA into a special byte sequence
- starting by 0x80. Update CODING->cmp_data_start and maybe
- CODING->cmp_data for the next call. */
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
-#define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \
- do { \
- unsigned char buf[1024], *p0 = buf, *p; \
- int len = data[0]; \
- int i; \
- \
- buf[0] = 0x80; \
- buf[1] = 0xF0 + data[3]; /* METHOD */ \
- buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \
- p = buf + 4; \
- if (data[3] == COMPOSITION_WITH_RULE \
- || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \
- { \
- p += CHAR_STRING (data[4], p); \
- for (i = 5; i < len; i += 2) \
- { \
- int gref, nref; \
- COMPOSITION_DECODE_RULE (data[i], gref, nref); \
- *p++ = 0x20 + gref; \
- *p++ = 0x20 + nref; \
- p += CHAR_STRING (data[i + 1], p); \
- } \
- } \
- else \
- { \
- for (i = 4; i < len; i++) \
- p += CHAR_STRING (data[i], p); \
- } \
- buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \
- \
- if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_DST; \
- goto label_end_of_loop; \
- } \
- while (p0 < p) \
- *dst++ = *p0++; \
- coding->cmp_data_start += data[0]; \
- if (coding->cmp_data_start == coding->cmp_data->used \
- && coding->cmp_data->next) \
- { \
- coding->cmp_data = coding->cmp_data->next; \
- coding->cmp_data_start = 0; \
- } \
- } while (0)
+#define EMACS_MULE_LEADING_CODES(id, codes) \
+ do { \
+ if (id < 0xA0) \
+ codes[0] = id, codes[1] = 0; \
+ else if (id < 0xE0) \
+ codes[0] = 0x9A, codes[1] = id; \
+ else if (id < 0xF0) \
+ codes[0] = 0x9B, codes[1] = id; \
+ else if (id < 0xF5) \
+ codes[0] = 0x9C, codes[1] = id; \
+ else \
+ codes[0] = 0x9D, codes[1] = id; \
+ } while (0);
-static void encode_eol P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-static void
-encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+static int
+encode_coding_emacs_mule (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- const unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- const unsigned char *src_base;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 8;
+ int produced_chars = 0;
+ Lisp_Object attrs, charset_list;
int c;
- int char_offset;
- int *data;
-
- Lisp_Object translation_table;
-
- translation_table = Qnil;
+ int preferred_charset_id = -1;
- /* Optimization for the case that there's no composition. */
- if (!coding->cmp_data || coding->cmp_data->used == 0)
+ CODING_GET_INFO (coding, attrs, charset_list);
+ if (! EQ (charset_list, Vemacs_mule_charset_list))
{
- encode_eol (coding, source, destination, src_bytes, dst_bytes);
- return;
+ CODING_ATTR_CHARSET_LIST (attrs)
+ = charset_list = Vemacs_mule_charset_list;
}
- char_offset = coding->cmp_data->char_offset;
- data = coding->cmp_data->data + coding->cmp_data_start;
- while (1)
+ while (charbuf < charbuf_end)
{
- src_base = src;
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
- /* If SRC starts a composition, encode the information about the
- composition in advance. */
- if (coding->cmp_data_start < coding->cmp_data->used
- && char_offset + coding->consumed_char == data[1])
+ if (c < 0)
{
- ENCODE_COMPOSITION_EMACS_MULE (coding, data);
- char_offset = coding->cmp_data->char_offset;
- data = coding->cmp_data->data + coding->cmp_data_start;
+ /* Handle an annotation. */
+ switch (*charbuf)
+ {
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ /* Not yet implemented. */
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ preferred_charset_id = charbuf[3];
+ if (preferred_charset_id >= 0
+ && NILP (Fmemq (make_number (preferred_charset_id),
+ charset_list)))
+ preferred_charset_id = -1;
+ break;
+ default:
+ abort ();
+ }
+ charbuf += -c - 1;
+ continue;
}
- ONE_MORE_CHAR (c);
- if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
- || coding->eol_type == CODING_EOL_CR))
+ if (ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
{
- if (coding->eol_type == CODING_EOL_CRLF)
- EMIT_TWO_BYTES ('\r', c);
- else
- EMIT_ONE_BYTE ('\r');
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
- else if (SINGLE_BYTE_CHAR_P (c))
+ else
{
- if (coding->flags && ! ASCII_BYTE_P (c))
- {
- /* As we are auto saving, retain the multibyte form for
- 8-bit chars. */
- unsigned char buf[MAX_MULTIBYTE_LENGTH];
- int bytes = CHAR_STRING (c, buf);
+ struct charset *charset;
+ unsigned code;
+ int dimension;
+ int emacs_mule_id;
+ unsigned char leading_codes[2];
- if (bytes == 1)
- EMIT_ONE_BYTE (buf[0]);
- else
- EMIT_TWO_BYTES (buf[0], buf[1]);
+ if (preferred_charset_id >= 0)
+ {
+ charset = CHARSET_FROM_ID (preferred_charset_id);
+ if (! CHAR_CHARSET_P (c, charset))
+ charset = char_charset (c, charset_list, NULL);
}
else
- EMIT_ONE_BYTE (c);
+ charset = char_charset (c, charset_list, &code);
+ if (! charset)
+ {
+ c = coding->default_char;
+ if (ASCII_CHAR_P (c))
+ {
+ EMIT_ONE_ASCII_BYTE (c);
+ continue;
+ }
+ charset = char_charset (c, charset_list, &code);
+ }
+ dimension = CHARSET_DIMENSION (charset);
+ emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
+ EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
+ EMIT_ONE_BYTE (leading_codes[0]);
+ if (leading_codes[1])
+ EMIT_ONE_BYTE (leading_codes[1]);
+ if (dimension == 1)
+ EMIT_ONE_BYTE (code | 0x80);
+ else
+ {
+ code |= 0x8080;
+ EMIT_ONE_BYTE (code >> 8);
+ EMIT_ONE_BYTE (code & 0xFF);
+ }
}
- else
- EMIT_BYTES (src_base, src);
- coding->consumed_char++;
}
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
- return;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 3. ISO2022 handlers ***/
+/*** 7. ISO2022 handlers ***/
/* The following note describes the coding system ISO2022 briefly.
Since the intention of this note is to help understand the
@@ -1334,7 +2408,7 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
7-bit environment, non-locking-shift, and non-single-shift.
Note (**): If <F> is '@', 'A', or 'B', the intermediate character
- '(' can be omitted. We refer to this as "short-form" hereafter.
+ '(' must be omitted. We refer to this as "short-form" hereafter.
Now you may notice that there are a lot of ways of encoding the
same multilingual text in ISO2022. Actually, there exist many
@@ -1364,10 +2438,10 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
Since these are not standard escape sequences of any ISO standard,
the use of them with these meanings is restricted to Emacs only.
- (*) This form is used only in Emacs 20.5 and older versions,
- but the newer versions can safely decode it.
+ (*) This form is used only in Emacs 20.7 and older versions,
+ but newer versions can safely decode it.
(**) This form is used only in Emacs 21.1 and newer versions,
- and the older versions can't decode it.
+ and older versions can't decode it.
Here's a list of example usages of these composition escape
sequences (categorized by `enum composition_method').
@@ -1383,421 +2457,439 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
enum iso_code_class_type iso_code_class[256];
-#define CHARSET_OK(idx, charset, c) \
- (coding_system_table[idx] \
- && (charset == CHARSET_ASCII \
- || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
- CODING_SAFE_CHAR_P (safe_chars, c))) \
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
- charset) \
- != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+#define SAFE_CHARSET_P(coding, id) \
+ ((id) <= (coding)->max_charset_id \
+ && (coding)->safe_charsets[id] >= 0)
-#define SHIFT_OUT_OK(idx) \
- (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
-#define COMPOSITION_OK(idx) \
- (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+#define SHIFT_OUT_OK(category) \
+ (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
+
+static void
+setup_iso_safe_charsets (attrs)
+ Lisp_Object attrs;
+{
+ Lisp_Object charset_list, safe_charsets;
+ Lisp_Object request;
+ Lisp_Object reg_usage;
+ Lisp_Object tail;
+ int reg94, reg96;
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+ int max_charset_id;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ && ! EQ (charset_list, Viso_2022_charset_list))
+ {
+ CODING_ATTR_CHARSET_LIST (attrs)
+ = charset_list = Viso_2022_charset_list;
+ ASET (attrs, coding_attr_safe_charsets, Qnil);
+ }
+
+ if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
+ return;
+
+ max_charset_id = 0;
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ int id = XINT (XCAR (tail));
+ if (max_charset_id < id)
+ max_charset_id = id;
+ }
+
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ request = AREF (attrs, coding_attr_iso_request);
+ reg_usage = AREF (attrs, coding_attr_iso_usage);
+ reg94 = XINT (XCAR (reg_usage));
+ reg96 = XINT (XCDR (reg_usage));
+
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object id;
+ Lisp_Object reg;
+ struct charset *charset;
+
+ id = XCAR (tail);
+ charset = CHARSET_FROM_ID (XINT (id));
+ reg = Fcdr (Fassq (id, request));
+ if (! NILP (reg))
+ SSET (safe_charsets, XINT (id), XINT (reg));
+ else if (charset->iso_chars_96)
+ {
+ if (reg96 < 4)
+ SSET (safe_charsets, XINT (id), reg96);
+ }
+ else
+ {
+ if (reg94 < 4)
+ SSET (safe_charsets, XINT (id), reg94);
+ }
+ }
+ ASET (attrs, coding_attr_safe_charsets, safe_charsets);
+}
+
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in ISO2022. If it is, return an
- integer in which appropriate flag bits any of:
- CODING_CATEGORY_MASK_ISO_7
- CODING_CATEGORY_MASK_ISO_7_TIGHT
- CODING_CATEGORY_MASK_ISO_8_1
- CODING_CATEGORY_MASK_ISO_8_2
- CODING_CATEGORY_MASK_ISO_7_ELSE
- CODING_CATEGORY_MASK_ISO_8_ELSE
- are set. If a code which should never appear in ISO2022 is found,
- returns 0. */
+ Check if a text is encoded in one of ISO-2022 based codig systems.
+ If it is, return 1, else return 0. */
static int
-detect_coding_iso2022 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_iso_2022 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- int mask = CODING_CATEGORY_MASK_ISO;
- int mask_found = 0;
- int reg[4], shift_out = 0, single_shifting = 0;
- int c, c1, charset;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
- Lisp_Object safe_chars;
-
- reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
- while (mask && src < src_end)
- {
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- retry:
+ const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int single_shifting = 0;
+ int id;
+ int c, c1;
+ int consumed_chars = 0;
+ int i;
+ int rejected = 0;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_ISO;
+
+ for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
+ {
+ struct coding_system *this = &(coding_categories[i]);
+ Lisp_Object attrs, val;
+
+ attrs = CODING_ID_ATTRS (this->id);
+ if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
+ && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
+ setup_iso_safe_charsets (attrs);
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ this->max_charset_id = SCHARS (val) - 1;
+ this->safe_charsets = (char *) SDATA (val);
+ }
+
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
+
+ while (rejected != CATEGORY_MASK_ISO)
+ {
+ src_base = src;
+ ONE_MORE_BYTE (c);
switch (c)
{
case ISO_CODE_ESC:
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
- || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
+ || (id = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
- reg[(c - '(') % 4] = charset;
}
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
- reg[0] = charset = iso_charset_table[1][0][c];
+ id = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
- || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
+ || (id = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
break;
- reg[(c - '(') % 4] = charset;
}
else
- /* Invalid designation sequence. Just ignore. */
+ /* Invalid designation sequence. Just ignore it. */
break;
}
else if (c == 'N' || c == 'O')
{
/* ESC <Fe> for SS2 or SS3. */
- mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ single_shifting = 1;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
break;
}
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
- mask_found |= CODING_CATEGORY_MASK_ISO_7;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
- else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
+ found |= CATEGORY_MASK_ISO;
break;
}
else
- /* Invalid escape sequence. Just ignore. */
- break;
+ {
+ /* Invalid escape sequence. Just ignore it. */
+ break;
+ }
/* We found a valid designation sequence for CHARSET. */
- mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
- c = MAKE_CHAR (charset, 0, 0);
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7;
+ rejected |= CATEGORY_MASK_ISO_8BIT;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
+ id))
+ found |= CATEGORY_MASK_ISO_7;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+ rejected |= CATEGORY_MASK_ISO_7;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
+ id))
+ found |= CATEGORY_MASK_ISO_7_TIGHT;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+ rejected |= CATEGORY_MASK_ISO_7_TIGHT;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
+ id))
+ found |= CATEGORY_MASK_ISO_7_ELSE;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
- mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+ rejected |= CATEGORY_MASK_ISO_7_ELSE;
+ if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
+ id))
+ found |= CATEGORY_MASK_ISO_8_ELSE;
else
- mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
+ rejected |= CATEGORY_MASK_ISO_8_ELSE;
break;
case ISO_CODE_SO:
- if (inhibit_iso_escape_detection)
- break;
- single_shifting = 0;
- if (shift_out == 0
- && (reg[1] >= 0
- || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
- || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
- {
- /* Locking shift out. */
- mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
- mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
- }
- break;
-
case ISO_CODE_SI:
+ /* Locking shift out/in. */
if (inhibit_iso_escape_detection)
break;
single_shifting = 0;
- if (shift_out == 1)
- {
- /* Locking shift in. */
- mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
- mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
- }
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
+ found |= CATEGORY_MASK_ISO_ELSE;
break;
case ISO_CODE_CSI:
+ /* Control sequence introducer. */
single_shifting = 0;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+ found |= CATEGORY_MASK_ISO_8_ELSE;
+ goto check_extra_latin;
+
case ISO_CODE_SS2:
case ISO_CODE_SS3:
- {
- int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
-
- if (inhibit_iso_escape_detection)
- break;
- if (c != ISO_CODE_CSI)
- {
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_SINGLE_SHIFT)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_SINGLE_SHIFT)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- single_shifting = 1;
- }
- if (VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- {
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- }
- mask &= newmask;
- mask_found |= newmask;
- }
- break;
+ /* Single shift. */
+ if (inhibit_iso_escape_detection)
+ break;
+ single_shifting = 0;
+ rejected |= CATEGORY_MASK_ISO_7BIT;
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+ & CODING_ISO_FLAG_SINGLE_SHIFT)
+ found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
+ & CODING_ISO_FLAG_SINGLE_SHIFT)
+ found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
+ if (single_shifting)
+ break;
+ goto check_extra_latin;
default:
+ if (c < 0)
+ continue;
if (c < 0x80)
{
single_shifting = 0;
break;
}
- else if (c < 0xA0)
- {
- single_shifting = 0;
- if (VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- {
- int newmask = 0;
-
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_1;
- if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
- & CODING_FLAG_ISO_LATIN_EXTRA)
- newmask |= CODING_CATEGORY_MASK_ISO_8_2;
- mask &= newmask;
- mask_found |= newmask;
- }
- else
- return 0;
- }
- else
+ if (c >= 0xA0)
{
- mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
- | CODING_CATEGORY_MASK_ISO_7_ELSE);
- mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+ rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+ found |= CATEGORY_MASK_ISO_8_1;
/* Check the length of succeeding codes of the range
- 0xA0..0FF. If the byte length is odd, we exclude
- CODING_CATEGORY_MASK_ISO_8_2. We can check this only
- when we are not single shifting. */
- if (!single_shifting
- && mask & CODING_CATEGORY_MASK_ISO_8_2)
+ 0xA0..0FF. If the byte length is even, we include
+ CATEGORY_MASK_ISO_8_2 in `found'. We can check this
+ only when we are not single shifting. */
+ if (! single_shifting
+ && ! (rejected & CATEGORY_MASK_ISO_8_2))
{
int i = 1;
-
- c = -1;
while (src < src_end)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c < 0xA0)
break;
i++;
}
if (i & 1 && src < src_end)
- mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+ rejected |= CATEGORY_MASK_ISO_8_2;
else
- mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
- if (c >= 0)
- /* This means that we have read one extra byte. */
- goto retry;
+ found |= CATEGORY_MASK_ISO_8_2;
}
+ break;
}
- break;
+ check_extra_latin:
+ single_shifting = 0;
+ if (! VECTORP (Vlatin_extra_code_table)
+ || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ {
+ rejected = CATEGORY_MASK_ISO;
+ break;
+ }
+ if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+ & CODING_ISO_FLAG_LATIN_EXTRA)
+ found |= CATEGORY_MASK_ISO_8_1;
+ else
+ rejected |= CATEGORY_MASK_ISO_8_1;
+ rejected |= CATEGORY_MASK_ISO_8_2;
}
}
- label_end_of_loop:
- return (mask & mask_found);
-}
+ detect_info->rejected |= CATEGORY_MASK_ISO;
+ return 0;
-/* Decode a character of which charset is CHARSET, the 1st position
- code is C1, the 2nd position code is C2, and return the decoded
- character code. If the variable `translation_table' is non-nil,
- returned the translated code. */
+ no_more_source:
+ detect_info->rejected |= rejected;
+ detect_info->found |= (found & ~rejected);
+ return 1;
+}
-#define DECODE_ISO_CHARACTER(charset, c1, c2) \
- (NILP (translation_table) \
- ? MAKE_CHAR (charset, c1, c2) \
- : translate_char (translation_table, -1, charset, c1, c2))
-/* Set designation state into CODING. */
-#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
- do { \
- int charset, c; \
- \
- if (final_char < '0' || final_char >= 128) \
- goto label_invalid_code; \
- charset = ISO_CHARSET_TABLE (make_number (dimension), \
- make_number (chars), \
- make_number (final_char)); \
- c = MAKE_CHAR (charset, 0, 0); \
- if (charset >= 0 \
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
- || CODING_SAFE_CHAR_P (safe_chars, c))) \
- { \
- if (coding->spec.iso2022.last_invalid_designation_register == 0 \
- && reg == 0 \
- && charset == CHARSET_ASCII) \
- { \
- /* We should insert this designation sequence as is so \
- that it is surely written back to a file. */ \
- coding->spec.iso2022.last_invalid_designation_register = -1; \
- goto label_invalid_code; \
- } \
- coding->spec.iso2022.last_invalid_designation_register = -1; \
- if ((coding->mode & CODING_MODE_DIRECTION) \
- && CHARSET_REVERSE_CHARSET (charset) >= 0) \
- charset = CHARSET_REVERSE_CHARSET (charset); \
- CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
- } \
- else \
- { \
- coding->spec.iso2022.last_invalid_designation_register = reg; \
- goto label_invalid_code; \
- } \
+/* Set designation state into CODING. Set CHARS_96 to -1 if the
+ escape sequence should be kept. */
+#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
+ do { \
+ int id, prev; \
+ \
+ if (final < '0' || final >= 128 \
+ || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
+ || !SAFE_CHARSET_P (coding, id)) \
+ { \
+ CODING_ISO_DESIGNATION (coding, reg) = -2; \
+ chars_96 = -1; \
+ break; \
+ } \
+ prev = CODING_ISO_DESIGNATION (coding, reg); \
+ if (id == charset_jisx0201_roman) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
+ id = charset_ascii; \
+ } \
+ else if (id == charset_jisx0208_1978) \
+ { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
+ id = charset_jisx0208; \
+ } \
+ CODING_ISO_DESIGNATION (coding, reg) = id; \
+ /* If there was an invalid designation to REG previously, and this \
+ designation is ASCII to REG, we should keep this designation \
+ sequence. */ \
+ if (prev == -2 && id == charset_ascii) \
+ chars_96 = -1; \
} while (0)
-/* Allocate a memory block for storing information about compositions.
- The block is chained to the already allocated blocks. */
-void
-coding_allocate_composition_data (coding, char_offset)
- struct coding_system *coding;
- int char_offset;
-{
- struct composition_data *cmp_data
- = (struct composition_data *) xmalloc (sizeof *cmp_data);
-
- cmp_data->char_offset = char_offset;
- cmp_data->used = 0;
- cmp_data->prev = coding->cmp_data;
- cmp_data->next = NULL;
- if (coding->cmp_data)
- coding->cmp_data->next = cmp_data;
- coding->cmp_data = cmp_data;
- coding->cmp_data_start = 0;
- coding->composing = COMPOSITION_NO;
-}
+#define MAYBE_FINISH_COMPOSITION() \
+ do { \
+ int i; \
+ if (composition_state == COMPOSING_NO) \
+ break; \
+ /* It is assured that we have enough room for producing \
+ characters stored in the table `components'. */ \
+ if (charbuf + component_idx > charbuf_end) \
+ goto no_more_source; \
+ composition_state = COMPOSING_NO; \
+ if (method == COMPOSITION_RELATIVE \
+ || method == COMPOSITION_WITH_ALTCHARS) \
+ { \
+ for (i = 0; i < component_idx; i++) \
+ *charbuf++ = components[i]; \
+ char_offset += component_idx; \
+ } \
+ else \
+ { \
+ for (i = 0; i < component_idx; i += 2) \
+ *charbuf++ = components[i]; \
+ char_offset += (component_idx / 2) + 1; \
+ } \
+ } while (0)
+
/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
- ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1
- ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
+ ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
+ ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
*/
-#define DECODE_COMPOSITION_START(c1) \
- do { \
- if (coding->composing == COMPOSITION_DISABLED) \
- { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = c1 & 0x7f; \
- coding->produced_char += 2; \
- } \
- else if (!COMPOSING_P (coding)) \
- { \
- /* This is surely the start of a composition. We must be sure \
- that coding->cmp_data has enough space to store the \
- information about the composition. If not, terminate the \
- current decoding loop, allocate one more memory block for \
- coding->cmp_data in the caller, then start the decoding \
- loop again. We can't allocate memory here directly because \
- it may cause buffer/string relocation. */ \
- if (!coding->cmp_data \
- || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
- >= COMPOSITION_DATA_SIZE)) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
- goto label_end_of_loop; \
- } \
- coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
- : c1 == '2' ? COMPOSITION_WITH_RULE \
- : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
- : COMPOSITION_WITH_RULE_ALTCHARS); \
- CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
- coding->composing); \
- coding->composition_rule_follows = 0; \
- } \
- else \
- { \
- /* We are already handling a composition. If the method is \
- the following two, the codes following the current escape \
- sequence are actual characters stored in a buffer. */ \
- if (coding->composing == COMPOSITION_WITH_ALTCHARS \
- || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
- { \
- coding->composing = COMPOSITION_RELATIVE; \
- coding->composition_rule_follows = 0; \
- } \
- } \
+#define DECODE_COMPOSITION_START(c1) \
+ do { \
+ if (c1 == '0' \
+ && composition_state == COMPOSING_COMPONENT_RULE) \
+ { \
+ component_len = component_idx; \
+ composition_state = COMPOSING_CHAR; \
+ } \
+ else \
+ { \
+ const unsigned char *p; \
+ \
+ MAYBE_FINISH_COMPOSITION (); \
+ if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
+ goto no_more_source; \
+ for (p = src; p < src_end - 1; p++) \
+ if (*p == ISO_CODE_ESC && p[1] == '1') \
+ break; \
+ if (p == src_end - 1) \
+ { \
+ /* The current composition doesn't end in the current \
+ source. */ \
+ record_conversion_result \
+ (coding, CODING_RESULT_INSUFFICIENT_SRC); \
+ goto no_more_source; \
+ } \
+ \
+ /* This is surely the start of a composition. */ \
+ method = (c1 == '0' ? COMPOSITION_RELATIVE \
+ : c1 == '2' ? COMPOSITION_WITH_RULE \
+ : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
+ : COMPOSITION_WITH_RULE_ALTCHARS); \
+ composition_state = (c1 <= '2' ? COMPOSING_CHAR \
+ : COMPOSING_COMPONENT_CHAR); \
+ component_idx = component_len = 0; \
+ } \
} while (0)
-/* Handle composition end sequence ESC 1. */
-#define DECODE_COMPOSITION_END(c1) \
+/* Handle compositoin end sequence ESC 1. */
+
+#define DECODE_COMPOSITION_END() \
do { \
- if (! COMPOSING_P (coding)) \
+ int nchars = (component_len > 0 ? component_idx - component_len \
+ : method == COMPOSITION_RELATIVE ? component_idx \
+ : (component_idx + 1) / 2); \
+ int i; \
+ int *saved_charbuf = charbuf; \
+ \
+ ADD_COMPOSITION_DATA (charbuf, nchars, method); \
+ if (method != COMPOSITION_RELATIVE) \
{ \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = c1; \
- coding->produced_char += 2; \
+ if (component_len == 0) \
+ for (i = 0; i < component_idx; i++) \
+ *charbuf++ = components[i]; \
+ else \
+ for (i = 0; i < component_len; i++) \
+ *charbuf++ = components[i]; \
+ *saved_charbuf = saved_charbuf - charbuf; \
} \
+ if (method == COMPOSITION_WITH_RULE) \
+ for (i = 0; i < component_idx; i += 2, char_offset++) \
+ *charbuf++ = components[i]; \
else \
- { \
- CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
- coding->composing = COMPOSITION_NO; \
- } \
+ for (i = component_len; i < component_idx; i++, char_offset++) \
+ *charbuf++ = components[i]; \
+ coding->annotated = 1; \
+ composition_state = COMPOSING_NO; \
} while (0)
+
/* Decode a composition rule from the byte C1 (and maybe one more byte
from SRC) and store one encoded composition rule in
coding->cmp_data. */
#define DECODE_COMPOSITION_RULE(c1) \
do { \
- int rule = 0; \
(c1) -= 32; \
if (c1 < 81) /* old format (before ver.21) */ \
{ \
@@ -1805,168 +2897,161 @@ coding_allocate_composition_data (coding, char_offset)
int nref = (c1) % 9; \
if (gref == 4) gref = 10; \
if (nref == 4) nref = 10; \
- rule = COMPOSITION_ENCODE_RULE (gref, nref); \
+ c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
} \
else if (c1 < 93) /* new format (after ver.21) */ \
{ \
ONE_MORE_BYTE (c2); \
- rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
+ c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
} \
- CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
- coding->composition_rule_follows = 0; \
+ else \
+ c1 = 0; \
} while (0)
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
static void
-decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_iso_2022 (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- const unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* Charsets invoked to graphic plane 0 and 1 respectively. */
- int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int c, charset;
- Lisp_Object translation_table;
- Lisp_Object safe_chars;
-
- safe_chars = coding_safe_chars (coding->symbol);
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
- }
-
- coding->result = CODING_FINISH_NORMAL;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ /* Charsets invoked to graphic plane 0 and 1 respectively. */
+ int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ int charset_id_2, charset_id_3;
+ struct charset *charset;
+ int c;
+ /* For handling composition sequence. */
+#define COMPOSING_NO 0
+#define COMPOSING_CHAR 1
+#define COMPOSING_RULE 2
+#define COMPOSING_COMPONENT_CHAR 3
+#define COMPOSING_COMPONENT_RULE 4
+
+ int composition_state = COMPOSING_NO;
+ enum composition_method method;
+ int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
+ int component_idx;
+ int component_len;
+ Lisp_Object attrs, charset_list;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ setup_iso_safe_charsets (attrs);
while (1)
{
- int c1, c2 = 0;
+ int c1, c2;
src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
ONE_MORE_BYTE (c1);
+ if (c1 < 0)
+ goto invalid_code;
- /* We produce no character or one character. */
+ /* We produce at most one character. */
switch (iso_code_class [c1])
{
case ISO_0x20_or_0x7F:
- if (COMPOSING_P (coding) && coding->composition_rule_follows)
- {
- DECODE_COMPOSITION_RULE (c1);
- continue;
- }
- if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
+ if (composition_state != COMPOSING_NO)
{
- /* This is SPACE or DEL. */
- charset = CHARSET_ASCII;
- break;
+ if (composition_state == COMPOSING_RULE
+ || composition_state == COMPOSING_COMPONENT_RULE)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
}
- /* This is a graphic character, we fall down ... */
+ if (charset_id_0 < 0
+ || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
+ /* This is SPACE or DEL. */
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
+ break;
case ISO_graphic_plane_0:
- if (COMPOSING_P (coding) && coding->composition_rule_follows)
+ if (composition_state != COMPOSING_NO)
{
- DECODE_COMPOSITION_RULE (c1);
- continue;
+ if (composition_state == COMPOSING_RULE
+ || composition_state == COMPOSING_COMPONENT_RULE)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
}
- charset = charset0;
+ if (charset_id_0 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
break;
case ISO_0xA0_or_0xFF:
- if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
- || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- goto label_invalid_code;
+ if (charset_id_1 < 0
+ || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
+ || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+ goto invalid_code;
/* This is a graphic character, we fall down ... */
case ISO_graphic_plane_1:
- if (charset1 < 0)
- goto label_invalid_code;
- charset = charset1;
+ if (charset_id_1 < 0)
+ goto invalid_code;
+ charset = CHARSET_FROM_ID (charset_id_1);
break;
case ISO_control_0:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
-
- /* All ISO2022 control characters in this class have the
- same representation in Emacs internal format. */
- if (c1 == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- && (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF))
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- charset = CHARSET_ASCII;
+ MAYBE_FINISH_COMPOSITION ();
+ charset = CHARSET_FROM_ID (charset_ascii);
break;
case ISO_control_1:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- goto label_invalid_code;
-
- case ISO_carriage_return:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
-
- if (coding->eol_type == CODING_EOL_CR)
- c1 = '\n';
- else if (coding->eol_type == CODING_EOL_CRLF)
- {
- ONE_MORE_BYTE (c1);
- if (c1 != ISO_CODE_LF)
- {
- src--;
- c1 = '\r';
- }
- }
- charset = CHARSET_ASCII;
- break;
+ MAYBE_FINISH_COMPOSITION ();
+ goto invalid_code;
case ISO_shift_out:
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 1) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 1;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case ISO_shift_in:
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 0;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case ISO_single_shift_2_7:
case ISO_single_shift_2:
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
+ goto invalid_code;
/* SS2 is handled as an escape sequence of ESC 'N' */
c1 = 'N';
goto label_escape_sequence;
case ISO_single_shift_3:
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- goto label_invalid_code;
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
+ goto invalid_code;
/* SS2 is handled as an escape sequence of ESC 'O' */
c1 = 'O';
goto label_escape_sequence;
@@ -1979,7 +3064,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
case ISO_escape:
ONE_MORE_BYTE (c1);
label_escape_sequence:
- /* Escape sequences handled by Emacs are invocation,
+ /* Escape sequences handled here are invocation,
designation, direction specification, and character
composition specification. */
switch (c1)
@@ -1987,89 +3072,110 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
case '&': /* revision of following character set */
ONE_MORE_BYTE (c1);
if (!(c1 >= '@' && c1 <= '~'))
- goto label_invalid_code;
+ goto invalid_code;
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_ESC)
- goto label_invalid_code;
+ goto invalid_code;
ONE_MORE_BYTE (c1);
goto label_escape_sequence;
case '$': /* designation of 2-byte character set */
- if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
- goto label_invalid_code;
- ONE_MORE_BYTE (c1);
- if (c1 >= '@' && c1 <= 'B')
- { /* designation of JISX0208.1978, GB2312.1980,
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
+ goto invalid_code;
+ {
+ int reg, chars96;
+
+ ONE_MORE_BYTE (c1);
+ if (c1 >= '@' && c1 <= 'B')
+ { /* designation of JISX0208.1978, GB2312.1980,
or JISX0208.1980 */
- DECODE_DESIGNATION (0, 2, 94, c1);
- }
- else if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION2_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION2_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
- }
- else
- goto label_invalid_code;
- /* We must update these variables now. */
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ reg = 0, chars96 = 0;
+ }
+ else if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION2_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION2_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 2, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
case 'n': /* invocation of locking-shift-2 */
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 2) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 2;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case 'o': /* invocation of locking-shift-3 */
- if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
- goto label_invalid_code;
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 3) < 0)
+ goto invalid_code;
+ CODING_ISO_INVOCATION (coding, 0) = 3;
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
continue;
case 'N': /* invocation of single-shift-2 */
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
- goto label_invalid_code;
- charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 2) < 0)
+ goto invalid_code;
+ charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
+ if (charset_id_2 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_2);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
- goto label_invalid_code;
+ goto invalid_code;
break;
case 'O': /* invocation of single-shift-3 */
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
- || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
- goto label_invalid_code;
- charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
+ || CODING_ISO_DESIGNATION (coding, 3) < 0)
+ goto invalid_code;
+ charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
+ if (charset_id_3 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_3);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
- goto label_invalid_code;
+ goto invalid_code;
break;
case '0': case '2': case '3': case '4': /* start composition */
+ if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
+ goto invalid_code;
DECODE_COMPOSITION_START (c1);
continue;
case '1': /* end composition */
- DECODE_COMPOSITION_END (c1);
+ if (composition_state == COMPOSING_NO)
+ goto invalid_code;
+ DECODE_COMPOSITION_END ();
continue;
case '[': /* specification of direction */
- if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
- goto label_invalid_code;
+ if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+ goto invalid_code;
/* For the moment, nested direction is not supported.
So, `coding->mode & CODING_MODE_DIRECTION' zero means
- left-to-right, and nonzero means right-to-left. */
+ left-to-right, and nozero means right-to-left. */
ONE_MORE_BYTE (c1);
switch (c1)
{
@@ -2082,7 +3188,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
if (c1 == ']')
coding->mode &= ~CODING_MODE_DIRECTION;
else
- goto label_invalid_code;
+ goto invalid_code;
break;
case '2': /* start of right-to-left direction */
@@ -2090,17 +3196,15 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
if (c1 == ']')
coding->mode |= CODING_MODE_DIRECTION;
else
- goto label_invalid_code;
+ goto invalid_code;
break;
default:
- goto label_invalid_code;
+ goto invalid_code;
}
continue;
case '%':
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
ONE_MORE_BYTE (c1);
if (c1 == '/')
{
@@ -2109,46 +3213,40 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
We keep these bytes as is for the moment.
They may be decoded by post-read-conversion. */
int dim, M, L;
- int size, required;
- int produced_chars;
+ int size;
ONE_MORE_BYTE (dim);
ONE_MORE_BYTE (M);
ONE_MORE_BYTE (L);
size = ((M - 128) * 128) + (L - 128);
- required = 8 + size * 2;
- if (dst + required > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *dst++ = ISO_CODE_ESC;
- *dst++ = '%';
- *dst++ = '/';
- *dst++ = dim;
- produced_chars = 4;
- dst += CHAR_STRING (M, dst), produced_chars++;
- dst += CHAR_STRING (L, dst), produced_chars++;
+ if (charbuf + 8 + size > charbuf_end)
+ goto break_loop;
+ *charbuf++ = ISO_CODE_ESC;
+ *charbuf++ = '%';
+ *charbuf++ = '/';
+ *charbuf++ = dim;
+ *charbuf++ = BYTE8_TO_CHAR (M);
+ *charbuf++ = BYTE8_TO_CHAR (L);
while (size-- > 0)
{
ONE_MORE_BYTE (c1);
- dst += CHAR_STRING (c1, dst), produced_chars++;
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
- coding->produced_char += produced_chars;
}
else if (c1 == 'G')
{
- unsigned char *d = dst;
- int produced_chars;
-
/* XFree86 extension for embedding UTF-8 in CTEXT:
ESC % G --UTF-8-BYTES-- ESC % @
We keep these bytes as is for the moment.
They may be decoded by post-read-conversion. */
- if (d + 6 > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *d++ = ISO_CODE_ESC;
- *d++ = '%';
- *d++ = 'G';
- produced_chars = 3;
- while (d + 1 < (dst_bytes ? dst_end : src))
+ int *p = charbuf;
+
+ if (p + 6 > charbuf_end)
+ goto break_loop;
+ *p++ = ISO_CODE_ESC;
+ *p++ = '%';
+ *p++ = 'G';
+ while (p < charbuf_end)
{
ONE_MORE_BYTE (c1);
if (c1 == ISO_CODE_ESC
@@ -2159,71 +3257,128 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
src += 2;
break;
}
- d += CHAR_STRING (c1, d), produced_chars++;
+ *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
- if (d + 3 > (dst_bytes ? dst_end : src))
- goto label_end_of_loop;
- *d++ = ISO_CODE_ESC;
- *d++ = '%';
- *d++ = '@';
- dst = d;
- coding->produced_char += produced_chars + 3;
+ if (p + 3 > charbuf_end)
+ goto break_loop;
+ *p++ = ISO_CODE_ESC;
+ *p++ = '%';
+ *p++ = '@';
+ charbuf = p;
}
else
- goto label_invalid_code;
+ goto invalid_code;
continue;
+ break;
default:
- if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
- goto label_invalid_code;
- if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION1_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION1_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
- }
- else
- goto label_invalid_code;
- /* We must update these variables now. */
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
+ goto invalid_code;
+ {
+ int reg, chars96;
+
+ if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION1_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION1_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 1, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
}
}
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
+
/* Now we know CHARSET and 1st position code C1 of a character.
- Produce a multibyte sequence for that character while getting
- 2nd position code C2 if necessary. */
- if (CHARSET_DIMENSION (charset) == 2)
+ Produce a decoded character while getting 2nd position code
+ C2 if necessary. */
+ c1 &= 0x7F;
+ if (CHARSET_DIMENSION (charset) > 1)
{
ONE_MORE_BYTE (c2);
- if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
/* C2 is not in a valid range. */
- goto label_invalid_code;
+ goto invalid_code;
+ c1 = (c1 << 8) | (c2 & 0x7F);
+ if (CHARSET_DIMENSION (charset) > 2)
+ {
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+ /* C2 is not in a valid range. */
+ goto invalid_code;
+ c1 = (c1 << 8) | (c2 & 0x7F);
+ }
+ }
+
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
+ if (c < 0)
+ {
+ MAYBE_FINISH_COMPOSITION ();
+ for (; src_base < src; src_base++, char_offset++)
+ {
+ if (ASCII_BYTE_P (*src_base))
+ *charbuf++ = *src_base;
+ else
+ *charbuf++ = BYTE8_TO_CHAR (*src_base);
+ }
+ }
+ else if (composition_state == COMPOSING_NO)
+ {
+ *charbuf++ = c;
+ char_offset++;
+ }
+ else
+ {
+ components[component_idx++] = c;
+ if (method == COMPOSITION_WITH_RULE
+ || (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && composition_state == COMPOSING_COMPONENT_CHAR))
+ composition_state++;
}
- c = DECODE_ISO_CHARACTER (charset, c1, c2);
- EMIT_CHAR (c);
continue;
- label_invalid_code:
- coding->errors++;
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
+ invalid_code:
+ MAYBE_FINISH_COMPOSITION ();
src = src_base;
- c = *src++;
- if (! NILP (translation_table))
- c = translate_char (translation_table, c, 0, 0, 0);
- EMIT_CHAR (c);
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ continue;
+
+ break_loop:
+ break;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
@@ -2231,9 +3386,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
/*
It is not enough to say just "ISO2022" on encoding, we have to
- specify more details. In Emacs, each ISO2022 coding system
+ specify more details. In Emacs, each coding system of ISO2022
variant has the following specifications:
- 1. Initial designation to G0 through G3.
+ 1. Initial designation to G0 thru G3.
2. Allows short-form designation?
3. ASCII should be designated to G0 before control characters?
4. ASCII should be designated to G0 at end of line?
@@ -2243,8 +3398,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
And the following two are only for Japanese:
8. Use ASCII in place of JIS0201-1976-Roman?
9. Use JISX0208-1983 in place of JISX0208-1978?
- These specifications are encoded in `coding->flags' as flag bits
- defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
+ These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
+ defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
details.
*/
@@ -2255,115 +3410,136 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
#define ENCODE_DESIGNATION(charset, reg, coding) \
do { \
- unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
+ unsigned char final_char = CHARSET_ISO_FINAL (charset); \
char *intermediate_char_94 = "()*+"; \
char *intermediate_char_96 = ",-./"; \
- int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
- \
- if (revision < 255) \
+ int revision = -1; \
+ int c; \
+ \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
+ revision = CHARSET_ISO_REVISION (charset); \
+ \
+ if (revision >= 0) \
{ \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '&'; \
- *dst++ = '@' + revision; \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
+ EMIT_ONE_BYTE ('@' + revision); \
} \
- *dst++ = ISO_CODE_ESC; \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
if (CHARSET_DIMENSION (charset) == 1) \
{ \
- if (CHARSET_CHARS (charset) == 94) \
- *dst++ = (unsigned char) (intermediate_char_94[reg]); \
+ if (! CHARSET_ISO_CHARS_96 (charset)) \
+ c = intermediate_char_94[reg]; \
else \
- *dst++ = (unsigned char) (intermediate_char_96[reg]); \
+ c = intermediate_char_96[reg]; \
+ EMIT_ONE_ASCII_BYTE (c); \
} \
else \
{ \
- *dst++ = '$'; \
- if (CHARSET_CHARS (charset) == 94) \
+ EMIT_ONE_ASCII_BYTE ('$'); \
+ if (! CHARSET_ISO_CHARS_96 (charset)) \
{ \
- if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
|| reg != 0 \
|| final_char < '@' || final_char > 'B') \
- *dst++ = (unsigned char) (intermediate_char_94[reg]); \
+ EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
} \
else \
- *dst++ = (unsigned char) (intermediate_char_96[reg]); \
+ EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
} \
- *dst++ = final_char; \
- CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
+ EMIT_ONE_ASCII_BYTE (final_char); \
+ \
+ CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
} while (0)
+
/* The following two macros produce codes (control character or escape
sequence) for ISO2022 single-shift functions (single-shift-2 and
single-shift-3). */
-#define ENCODE_SINGLE_SHIFT_2 \
- do { \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
- else \
- *dst++ = ISO_CODE_SS2; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
+#define ENCODE_SINGLE_SHIFT_2 \
+ do { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
+ else \
+ EMIT_ONE_BYTE (ISO_CODE_SS2); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
-#define ENCODE_SINGLE_SHIFT_3 \
- do { \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
- else \
- *dst++ = ISO_CODE_SS3; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
+
+#define ENCODE_SINGLE_SHIFT_3 \
+ do { \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
+ else \
+ EMIT_ONE_BYTE (ISO_CODE_SS3); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
+
/* The following four macros produce codes (control character or
escape sequence) for ISO2022 locking-shift functions (shift-in,
shift-out, locking-shift-2, and locking-shift-3). */
-#define ENCODE_SHIFT_IN \
- do { \
- *dst++ = ISO_CODE_SI; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
+#define ENCODE_SHIFT_IN \
+ do { \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
+ CODING_ISO_INVOCATION (coding, 0) = 0; \
} while (0)
-#define ENCODE_SHIFT_OUT \
- do { \
- *dst++ = ISO_CODE_SO; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
+
+#define ENCODE_SHIFT_OUT \
+ do { \
+ EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
+ CODING_ISO_INVOCATION (coding, 0) = 1; \
} while (0)
-#define ENCODE_LOCKING_SHIFT_2 \
- do { \
- *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
+
+#define ENCODE_LOCKING_SHIFT_2 \
+ do { \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
+ CODING_ISO_INVOCATION (coding, 0) = 2; \
} while (0)
-#define ENCODE_LOCKING_SHIFT_3 \
- do { \
- *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
+
+#define ENCODE_LOCKING_SHIFT_3 \
+ do { \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
+ CODING_ISO_INVOCATION (coding, 0) = 3; \
} while (0)
+
/* Produce codes for a DIMENSION1 character whose character set is
CHARSET and whose position-code is C1. Designation and invocation
sequences are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
do { \
- if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
+ int id = CHARSET_ID (charset); \
+ \
+ if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
+ && id == charset_ascii) \
+ { \
+ id = charset_jisx0201_roman; \
+ charset = CHARSET_FROM_ID (id); \
+ } \
+ \
+ if (CODING_ISO_SINGLE_SHIFTING (coding)) \
{ \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = c1 & 0x7F; \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
else \
- *dst++ = c1 | 0x80; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
+ EMIT_ONE_BYTE (c1 | 0x80); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
{ \
- *dst++ = c1 & 0x7F; \
+ EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
{ \
- *dst++ = c1 | 0x80; \
+ EMIT_ONE_BYTE (c1 | 0x80); \
break; \
} \
else \
@@ -2371,32 +3547,43 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
- dst = encode_invocation_designation (charset, coding, dst); \
+ dst = encode_invocation_designation (charset, coding, dst, \
+ &produced_chars); \
} while (1)
+
/* Produce codes for a DIMENSION2 character whose character set is
CHARSET and whose position-codes are C1 and C2. Designation and
invocation codes are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
do { \
- if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
+ int id = CHARSET_ID (charset); \
+ \
+ if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
+ && id == charset_jisx0208) \
+ { \
+ id = charset_jisx0208_1978; \
+ charset = CHARSET_FROM_ID (id); \
+ } \
+ \
+ if (CODING_ISO_SINGLE_SHIFTING (coding)) \
{ \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
else \
- *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
+ EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
{ \
- *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
+ EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
break; \
} \
- else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
+ else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
{ \
- *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
+ EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
break; \
} \
else \
@@ -2404,73 +3591,49 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
- dst = encode_invocation_designation (charset, coding, dst); \
+ dst = encode_invocation_designation (charset, coding, dst, \
+ &produced_chars); \
} while (1)
-#define ENCODE_ISO_CHARACTER(c) \
- do { \
- int charset, c1, c2; \
- \
- SPLIT_CHAR (c, charset, c1, c2); \
- if (CHARSET_DEFINED_P (charset)) \
- { \
- if (CHARSET_DIMENSION (charset) == 1) \
- { \
- if (charset == CHARSET_ASCII \
- && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- charset = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
- } \
- else \
- { \
- if (charset == charset_jisx0208 \
- && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- charset = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
- } \
- } \
- else \
- { \
- *dst++ = c1; \
- if (c2 >= 0) \
- *dst++ = c2; \
- } \
- } while (0)
-
-
-/* Instead of encoding character C, produce one or two `?'s. */
-#define ENCODE_UNSAFE_CHARACTER(c) \
- do { \
- ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
- if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
- ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
+#define ENCODE_ISO_CHARACTER(charset, c) \
+ do { \
+ int code = ENCODE_CHAR ((charset),(c)); \
+ \
+ if (CHARSET_DIMENSION (charset) == 1) \
+ ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
+ else \
+ ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
} while (0)
/* Produce designation and invocation codes at a place pointed by DST
- to use CHARSET. The element `spec.iso2022' of *CODING is updated.
+ to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
Return new DST. */
unsigned char *
-encode_invocation_designation (charset, coding, dst)
- int charset;
+encode_invocation_designation (charset, coding, dst, p_nchars)
+ struct charset *charset;
struct coding_system *coding;
unsigned char *dst;
+ int *p_nchars;
{
+ int multibytep = coding->dst_multibyte;
+ int produced_chars = *p_nchars;
int reg; /* graphic register number */
+ int id = CHARSET_ID (charset);
/* At first, check designations. */
for (reg = 0; reg < 4; reg++)
- if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
+ if (id == CODING_ISO_DESIGNATION (coding, reg))
break;
if (reg >= 4)
{
/* CHARSET is not yet designated to any graphic registers. */
/* At first check the requested designation. */
- reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
- if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+ reg = CODING_ISO_REQUEST (coding, id);
+ if (reg < 0)
/* Since CHARSET requests no special designation, designate it
to graphic register 0. */
reg = 0;
@@ -2478,8 +3641,8 @@ encode_invocation_designation (charset, coding, dst)
ENCODE_DESIGNATION (charset, reg, coding);
}
- if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
- && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
+ if (CODING_ISO_INVOCATION (coding, 0) != reg
+ && CODING_ISO_INVOCATION (coding, 1) != reg)
{
/* Since the graphic register REG is not invoked to any graphic
planes, invoke it to graphic plane 0. */
@@ -2494,14 +3657,14 @@ encode_invocation_designation (charset, coding, dst)
break;
case 2: /* graphic register 2 */
- if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
ENCODE_SINGLE_SHIFT_2;
else
ENCODE_LOCKING_SHIFT_2;
break;
case 3: /* graphic register 3 */
- if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
ENCODE_SINGLE_SHIFT_3;
else
ENCODE_LOCKING_SHIFT_3;
@@ -2509,98 +3672,55 @@ encode_invocation_designation (charset, coding, dst)
}
}
+ *p_nchars = produced_chars;
return dst;
}
-/* Produce 2-byte codes for encoded composition rule RULE. */
-
-#define ENCODE_COMPOSITION_RULE(rule) \
- do { \
- int gref, nref; \
- COMPOSITION_DECODE_RULE (rule, gref, nref); \
- *dst++ = 32 + 81 + gref; \
- *dst++ = 32 + nref; \
- } while (0)
-
-/* Produce codes for indicating the start of a composition sequence
- (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
- which specify information about the composition. See the comment
- in coding.h for the format of DATA. */
-
-#define ENCODE_COMPOSITION_START(coding, data) \
+/* The following three macros produce codes for indicating direction
+ of text. */
+#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
do { \
- coding->composing = data[3]; \
- *dst++ = ISO_CODE_ESC; \
- if (coding->composing == COMPOSITION_RELATIVE) \
- *dst++ = '0'; \
+ if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
+ EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
else \
- { \
- *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
- ? '3' : '4'); \
- coding->cmp_data_index = coding->cmp_data_start + 4; \
- coding->composition_rule_follows = 0; \
- } \
+ EMIT_ONE_BYTE (ISO_CODE_CSI); \
} while (0)
-/* Produce codes for indicating the end of the current composition. */
-#define ENCODE_COMPOSITION_END(coding, data) \
- do { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '1'; \
- coding->cmp_data_start += data[0]; \
- coding->composing = COMPOSITION_NO; \
- if (coding->cmp_data_start == coding->cmp_data->used \
- && coding->cmp_data->next) \
- { \
- coding->cmp_data = coding->cmp_data->next; \
- coding->cmp_data_start = 0; \
- } \
+#define ENCODE_DIRECTION_R2L() \
+ do { \
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
+ EMIT_TWO_ASCII_BYTES ('2', ']'); \
} while (0)
-/* Produce composition start sequence ESC 0. Here, this sequence
- doesn't mean the start of a new composition but means that we have
- just produced components (alternate chars and composition rules) of
- the composition and the actual text follows in SRC. */
-#define ENCODE_COMPOSITION_FAKE_START(coding) \
+#define ENCODE_DIRECTION_L2R() \
do { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = '0'; \
- coding->composing = COMPOSITION_RELATIVE; \
- } while (0)
-
-/* The following three macros produce codes for indicating direction
- of text. */
-#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
- do { \
- if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = '['; \
- else \
- *dst++ = ISO_CODE_CSI; \
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
+ EMIT_TWO_ASCII_BYTES ('0', ']'); \
} while (0)
-#define ENCODE_DIRECTION_R2L \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
-
-#define ENCODE_DIRECTION_L2R \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
/* Produce codes for designation and invocation to reset the graphic
planes and registers to initial state. */
-#define ENCODE_RESET_PLANE_AND_REGISTER \
- do { \
- int reg; \
- if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
- ENCODE_SHIFT_IN; \
- for (reg = 0; reg < 4; reg++) \
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
- && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
- != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
- ENCODE_DESIGNATION \
- (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
+#define ENCODE_RESET_PLANE_AND_REGISTER() \
+ do { \
+ int reg; \
+ struct charset *charset; \
+ \
+ if (CODING_ISO_INVOCATION (coding, 0) != 0) \
+ ENCODE_SHIFT_IN; \
+ for (reg = 0; reg < 4; reg++) \
+ if (CODING_ISO_INITIAL (coding, reg) >= 0 \
+ && (CODING_ISO_DESIGNATION (coding, reg) \
+ != CODING_ISO_INITIAL (coding, reg))) \
+ { \
+ charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
+ ENCODE_DESIGNATION (charset, reg, coding); \
+ } \
} while (0)
+
/* Produce designation sequences of charsets in the line started from
SRC to a place pointed by DST, and return updated DST.
@@ -2608,41 +3728,51 @@ encode_invocation_designation (charset, coding, dst)
find all the necessary designations. */
static unsigned char *
-encode_designation_at_bol (coding, translation_table, src, src_end, dst)
+encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
struct coding_system *coding;
- Lisp_Object translation_table;
- const unsigned char *src, *src_end;
+ int *charbuf, *charbuf_end;
unsigned char *dst;
{
- int charset, c, found = 0, reg;
+ struct charset *charset;
/* Table of charsets to be designated to each graphic register. */
int r[4];
+ int c, found = 0, reg;
+ int produced_chars = 0;
+ int multibytep = coding->dst_multibyte;
+ Lisp_Object attrs;
+ Lisp_Object charset_list;
+
+ attrs = CODING_ID_ATTRS (coding->id);
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ if (EQ (charset_list, Qiso_2022))
+ charset_list = Viso_2022_charset_list;
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
while (found < 4)
{
- ONE_MORE_CHAR (c);
+ int id;
+
+ c = *charbuf++;
if (c == '\n')
break;
-
- charset = CHAR_CHARSET (c);
- reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
- if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
+ charset = char_charset (c, charset_list, NULL);
+ id = CHARSET_ID (charset);
+ reg = CODING_ISO_REQUEST (coding, id);
+ if (reg >= 0 && r[reg] < 0)
{
found++;
- r[reg] = charset;
+ r[reg] = id;
}
}
- label_end_of_loop:
if (found)
{
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0
- && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
- ENCODE_DESIGNATION (r[reg], reg, coding);
+ && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
+ ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
}
return dst;
@@ -2650,188 +3780,160 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst)
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
-static void
-encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
+static int
+encode_coding_iso_2022 (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- const unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 20, we subtract 19
- from DST_END to assure overflow checking is necessary only at the
- head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 19;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- const unsigned char *src_base;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 16;
+ int bol_designation
+ = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
+ && CODING_ISO_BOL (coding));
+ int produced_chars = 0;
+ Lisp_Object attrs, eol_type, charset_list;
+ int ascii_compatible;
int c;
- Lisp_Object translation_table;
- Lisp_Object safe_chars;
+ int preferred_charset_id = -1;
- if (coding->flags & CODING_FLAG_ISO_SAFE)
- coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+ CODING_GET_INFO (coding, attrs, charset_list);
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (VECTORP (eol_type))
+ eol_type = Qunix;
- safe_chars = coding_safe_chars (coding->symbol);
+ setup_iso_safe_charsets (attrs);
+ /* Charset list may have been changed. */
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
+ coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
- }
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
- coding->consumed_char = 0;
- coding->errors = 0;
- while (1)
+ while (charbuf < charbuf_end)
{
- src_base = src;
+ ASSURE_DESTINATION (safe_room);
- if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
+ if (bol_designation)
{
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
- }
+ unsigned char *dst_prev = dst;
- if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
- && CODING_SPEC_ISO_BOL (coding))
- {
/* We have to produce designation sequences if any now. */
- dst = encode_designation_at_bol (coding, translation_table,
- src, src_end, dst);
- CODING_SPEC_ISO_BOL (coding) = 0;
+ dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
+ bol_designation = 0;
+ /* We are sure that designation sequences are all ASCII bytes. */
+ produced_chars += dst - dst_prev;
}
- /* Check composition start and end. */
- if (coding->composing != COMPOSITION_DISABLED
- && coding->cmp_data_start < coding->cmp_data->used)
- {
- struct composition_data *cmp_data = coding->cmp_data;
- int *data = cmp_data->data + coding->cmp_data_start;
- int this_pos = cmp_data->char_offset + coding->consumed_char;
+ c = *charbuf++;
- if (coding->composing == COMPOSITION_RELATIVE)
- {
- if (this_pos == data[2])
- {
- ENCODE_COMPOSITION_END (coding, data);
- cmp_data = coding->cmp_data;
- data = cmp_data->data + coding->cmp_data_start;
- }
- }
- else if (COMPOSING_P (coding))
- {
- /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
- if (coding->cmp_data_index == coding->cmp_data_start + data[0])
- /* We have consumed components of the composition.
- What follows in SRC is the composition's base
- text. */
- ENCODE_COMPOSITION_FAKE_START (coding);
- else
- {
- int c = cmp_data->data[coding->cmp_data_index++];
- if (coding->composition_rule_follows)
- {
- ENCODE_COMPOSITION_RULE (c);
- coding->composition_rule_follows = 0;
- }
- else
- {
- if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
- && ! CODING_SAFE_CHAR_P (safe_chars, c))
- ENCODE_UNSAFE_CHARACTER (c);
- else
- ENCODE_ISO_CHARACTER (c);
- if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
- coding->composition_rule_follows = 1;
- }
- continue;
- }
- }
- if (!COMPOSING_P (coding))
+ if (c < 0)
+ {
+ /* Handle an annotation. */
+ switch (*charbuf)
{
- if (this_pos == data[1])
- {
- ENCODE_COMPOSITION_START (coding, data);
- continue;
- }
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ /* Not yet implemented. */
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ preferred_charset_id = charbuf[2];
+ if (preferred_charset_id >= 0
+ && NILP (Fmemq (make_number (preferred_charset_id),
+ charset_list)))
+ preferred_charset_id = -1;
+ break;
+ default:
+ abort ();
}
+ charbuf += -c - 1;
+ continue;
}
- ONE_MORE_CHAR (c);
-
/* Now encode the character C. */
if (c < 0x20 || c == 0x7F)
{
- if (c == '\r')
+ if (c == '\n'
+ || (c == '\r' && EQ (eol_type, Qmac)))
{
- if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
{
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c;
- continue;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ CODING_ISO_DESIGNATION (coding, i)
+ = CODING_ISO_INITIAL (coding, i);
}
- /* fall down to treat '\r' as '\n' ... */
- c = '\n';
- }
- if (c == '\n')
- {
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
- bcopy (coding->spec.iso2022.initial_designation,
- coding->spec.iso2022.current_designation,
- sizeof coding->spec.iso2022.initial_designation);
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- *dst++ = ISO_CODE_LF;
- else if (coding->eol_type == CODING_EOL_CRLF)
- *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
- else
- *dst++ = ISO_CODE_CR;
- CODING_SPEC_ISO_BOL (coding) = 1;
+ bol_designation
+ = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
}
+ else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ EMIT_ONE_ASCII_BYTE (c);
+ }
+ else if (ASCII_CHAR_P (c))
+ {
+ if (ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
else
{
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c;
+ struct charset *charset = CHARSET_FROM_ID (charset_ascii);
+ ENCODE_ISO_CHARACTER (charset, c);
}
}
- else if (ASCII_BYTE_P (c))
- ENCODE_ISO_CHARACTER (c);
- else if (SINGLE_BYTE_CHAR_P (c))
+ else if (CHAR_BYTE8_P (c))
{
- *dst++ = c;
- coding->errors++;
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
- && ! CODING_SAFE_CHAR_P (safe_chars, c))
- ENCODE_UNSAFE_CHARACTER (c);
else
- ENCODE_ISO_CHARACTER (c);
+ {
+ struct charset *charset;
- coding->consumed_char++;
+ if (preferred_charset_id >= 0)
+ {
+ charset = CHARSET_FROM_ID (preferred_charset_id);
+ if (! CHAR_CHARSET_P (c, charset))
+ charset = char_charset (c, charset_list, NULL);
+ }
+ else
+ charset = char_charset (c, charset_list, NULL);
+ if (!charset)
+ {
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ {
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
+ }
+ else
+ {
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, NULL);
+ }
+ }
+ ENCODE_ISO_CHARACTER (charset, c);
+ }
}
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
+ {
+ ASSURE_DESTINATION (safe_room);
+ ENCODE_RESET_PLANE_AND_REGISTER ();
+ }
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ CODING_ISO_BOL (coding) = bol_designation;
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 4. SJIS and BIG5 handlers ***/
+/*** 8,9. SJIS and BIG5 handlers ***/
-/* Although SJIS and BIG5 are not ISO coding systems, they are used
+/* Although SJIS and BIG5 are not ISO's coding system, they are used
quite widely. So, for the moment, Emacs supports them in the bare
C code. But, in the future, they may be supported only by CCL. */
@@ -2840,12 +3942,12 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
as is. A character of charset katakana-jisx0201 is encoded by
"position-code + 0x80". A character of charset japanese-jisx0208
is encoded in 2-byte but two position-codes are divided and shifted
- so that it fits in the range below.
+ so that it fit in the range below.
--- CODE RANGE of SJIS ---
(character set) (range)
ASCII 0x00 .. 0x7F
- KATAKANA-JISX0201 0xA1 .. 0xDF
+ KATAKANA-JISX0201 0xA0 .. 0xDF
JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
(2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
-------------------------------
@@ -2854,7 +3956,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
/* BIG5 is a coding system encoding two character sets: ASCII and
Big5. An ASCII character is encoded as is. Big5 is a two-byte
- character set and is encoded in two bytes.
+ character set and is encoded in two-byte.
--- CODE RANGE of BIG5 ---
(character set) (range)
@@ -2863,316 +3965,293 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
(2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
--------------------------
- Since the number of characters in Big5 is larger than maximum
- characters in Emacs' charset (96x96), it can't be handled as one
- charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
- and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
- contains frequently used characters and the latter contains less
- frequently used characters. */
-
-/* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
- are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
- C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
- format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
-
-/* Number of Big5 characters which have the same code in 1st byte. */
-#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
-
-#define DECODE_BIG5(b1, b2, charset, c1, c2) \
- do { \
- unsigned int temp \
- = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
- if (b1 < 0xC9) \
- charset = charset_big5_1; \
- else \
- { \
- charset = charset_big5_2; \
- temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
- } \
- c1 = temp / (0xFF - 0xA1) + 0x21; \
- c2 = temp % (0xFF - 0xA1) + 0x21; \
- } while (0)
-
-#define ENCODE_BIG5(charset, c1, c2, b1, b2) \
- do { \
- unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
- if (charset == charset_big5_2) \
- temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
- b1 = temp / BIG5_SAME_ROW + 0xA1; \
- b2 = temp % BIG5_SAME_ROW; \
- b2 += b2 < 0x3F ? 0x40 : 0x62; \
- } while (0)
+ */
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in SJIS. If it is, return
- CODING_CATEGORY_MASK_SJIS, else return 0. */
+ CATEGORY_MASK_SJIS, else return 0. */
static int
-detect_coding_sjis (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_sjis (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+
+ detect_info->checked |= CATEGORY_MASK_SJIS;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ src_base = src;
+ ONE_MORE_BYTE (c);
if (c < 0x80)
continue;
- if (c == 0x80 || c == 0xA0 || c > 0xEF)
- return 0;
- if (c <= 0x9F || c >= 0xE0)
+ if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+ ONE_MORE_BYTE (c);
if (c < 0x40 || c == 0x7F || c > 0xFC)
- return 0;
+ break;
+ found = CATEGORY_MASK_SJIS;
}
+ else if (c >= 0xA0 && c < 0xE0)
+ found = CATEGORY_MASK_SJIS;
+ else
+ break;
+ }
+ detect_info->rejected |= CATEGORY_MASK_SJIS;
+ return 0;
+
+ no_more_source:
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_SJIS;
+ return 0;
}
- label_end_of_loop:
- return CODING_CATEGORY_MASK_SJIS;
+ detect_info->found |= found;
+ return 1;
}
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in BIG5. If it is, return
- CODING_CATEGORY_MASK_BIG5, else return 0. */
+ CATEGORY_MASK_BIG5, else return 0. */
static int
-detect_coding_big5 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_big5 (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
- while (1)
- {
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (c < 0x80)
- continue;
- if (c < 0xA1 || c > 0xFE)
- return 0;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
- return 0;
- }
- label_end_of_loop:
- return CODING_CATEGORY_MASK_BIG5;
-}
-
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in UTF-8. If it is, return
- CODING_CATEGORY_MASK_UTF_8, else return 0. */
-
-#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
-#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
-#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
-#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
-#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
-#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
-#define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
-
-static int
-detect_coding_utf_8 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
-{
- unsigned char c;
- int seq_maybe_bytes;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ detect_info->checked |= CATEGORY_MASK_BIG5;
+ /* A coding system of this category is always ASCII compatible. */
+ src += coding->head_ascii;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (UTF_8_1_OCTET_P (c))
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c < 0x80)
continue;
- else if (UTF_8_2_OCTET_LEADING_P (c))
- seq_maybe_bytes = 1;
- else if (UTF_8_3_OCTET_LEADING_P (c))
- seq_maybe_bytes = 2;
- else if (UTF_8_4_OCTET_LEADING_P (c))
- seq_maybe_bytes = 3;
- else if (UTF_8_5_OCTET_LEADING_P (c))
- seq_maybe_bytes = 4;
- else if (UTF_8_6_OCTET_LEADING_P (c))
- seq_maybe_bytes = 5;
- else
- return 0;
-
- do
+ if (c >= 0xA1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (!UTF_8_EXTRA_OCTET_P (c))
+ ONE_MORE_BYTE (c);
+ if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
return 0;
- seq_maybe_bytes--;
+ found = CATEGORY_MASK_BIG5;
}
- while (seq_maybe_bytes > 0);
+ else
+ break;
}
+ detect_info->rejected |= CATEGORY_MASK_BIG5;
+ return 0;
- label_end_of_loop:
- return CODING_CATEGORY_MASK_UTF_8;
+ no_more_source:
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ detect_info->rejected |= CATEGORY_MASK_BIG5;
+ return 0;
+ }
+ detect_info->found |= found;
+ return 1;
}
-/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
- Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
- Little Endian (otherwise). If it is, return
- CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
- else return 0. */
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
+ If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
-#define UTF_16_INVALID_P(val) \
- (((val) == 0xFFFE) \
- || ((val) == 0xFFFF))
+static void
+decode_coding_sjis (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset_roman, *charset_kanji, *charset_kana;
+ struct charset *charset_kanji2;
+ Lisp_Object attrs, charset_list, val;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
-#define UTF_16_HIGH_SURROGATE_P(val) \
- (((val) & 0xD800) == 0xD800)
+ while (1)
+ {
+ int c, c1;
+ struct charset *charset;
-#define UTF_16_LOW_SURROGATE_P(val) \
- (((val) & 0xDC00) == 0xDC00)
+ src_base = src;
+ consumed_chars_base = consumed_chars;
-static int
-detect_coding_utf_16 (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
-{
- unsigned char c1, c2;
- /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
+ if (charbuf >= charbuf_end)
+ break;
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ goto invalid_code;
+ if (c < 0x80)
+ charset = charset_roman;
+ else if (c == 0x80 || c == 0xA0)
+ goto invalid_code;
+ else if (c >= 0xA1 && c <= 0xDF)
+ {
+ /* SJIS -> JISX0201-Kana */
+ c &= 0x7F;
+ charset = charset_kana;
+ }
+ else if (c <= 0xEF)
+ {
+ /* SJIS -> JISX0208 */
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
+ goto invalid_code;
+ c = (c << 8) | c1;
+ SJIS_TO_JIS (c);
+ charset = charset_kanji;
+ }
+ else if (c <= 0xFC && charset_kanji2)
+ {
+ /* SJIS -> JISX0213-2 */
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
+ goto invalid_code;
+ c = (c << 8) | c1;
+ SJIS_TO_JIS2 (c);
+ charset = charset_kanji2;
+ }
+ else
+ goto invalid_code;
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
+ *charbuf++ = c;
+ char_offset++;
+ continue;
- if ((c1 == 0xFF) && (c2 == 0xFE))
- return CODING_CATEGORY_MASK_UTF_16_LE;
- else if ((c1 == 0xFE) && (c2 == 0xFF))
- return CODING_CATEGORY_MASK_UTF_16_BE;
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ }
- label_end_of_loop:
- return 0;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
- If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
-
static void
-decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, sjis_p)
+decode_coding_big5 (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
- int sjis_p;
{
- const unsigned char *src = source;
- const unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- Lisp_Object translation_table;
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
- }
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ struct charset *charset_roman, *charset_big5;
+ Lisp_Object attrs, charset_list, val;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
- coding->produced_char = 0;
while (1)
{
- int c, charset, c1, c2 = 0;
+ int c, c1;
+ struct charset *charset;
src_base = src;
- ONE_MORE_BYTE (c1);
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
- if (c1 < 0x80)
+ ONE_MORE_BYTE (c);
+
+ if (c < 0)
+ goto invalid_code;
+ if (c < 0x80)
+ charset = charset_roman;
+ else
{
- charset = CHARSET_ASCII;
- if (c1 < 0x20)
- {
- if (c1 == '\r')
- {
- if (coding->eol_type == CODING_EOL_CRLF)
- {
- ONE_MORE_BYTE (c2);
- if (c2 == '\n')
- c1 = c2;
- else
- /* To process C2 again, SRC is subtracted by 1. */
- src--;
- }
- else if (coding->eol_type == CODING_EOL_CR)
- c1 = '\n';
- }
- else if (c1 == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- && (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF))
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- }
+ /* BIG5 -> Big5 */
+ if (c < 0xA1 || c > 0xFE)
+ goto invalid_code;
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
+ goto invalid_code;
+ c = c << 8 | c1;
+ charset = charset_big5;
}
- else
- {
- if (sjis_p)
- {
- if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
- goto label_invalid_code;
- if (c1 <= 0x9F || c1 >= 0xE0)
- {
- /* SJIS -> JISX0208 */
- ONE_MORE_BYTE (c2);
- if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
- goto label_invalid_code;
- DECODE_SJIS (c1, c2, c1, c2);
- charset = charset_jisx0208;
- }
- else
- /* SJIS -> JISX0201-Kana */
- charset = charset_katakana_jisx0201;
- }
- else
- {
- /* BIG5 -> Big5 */
- if (c1 < 0xA0 || c1 > 0xFE)
- goto label_invalid_code;
- ONE_MORE_BYTE (c2);
- if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
- goto label_invalid_code;
- DECODE_BIG5 (c1, c2, charset, c1, c2);
- }
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
}
-
- c = DECODE_ISO_CHARACTER (charset, c1, c2);
- EMIT_CHAR (c);
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
+ *charbuf++ = c;
+ char_offset++;
continue;
- label_invalid_code:
- coding->errors++;
+ invalid_code:
src = src_base;
- c = *src++;
- EMIT_CHAR (c);
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
@@ -3183,827 +4262,960 @@ decode_coding_sjis_big5 (coding, source, destination,
charsets are produced without any encoding. If SJIS_P is 1, encode
SJIS text, else encode BIG5 text. */
-static void
-encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, sjis_p)
+static int
+encode_coding_sjis (coding)
struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int sjis_p;
{
- unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- unsigned char *src_base;
- Lisp_Object translation_table;
-
- if (NILP (Venable_character_translation))
- translation_table = Qnil;
- else
- {
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
- }
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 4;
+ int produced_chars = 0;
+ Lisp_Object attrs, charset_list, val;
+ int ascii_compatible;
+ struct charset *charset_roman, *charset_kanji, *charset_kana;
+ struct charset *charset_kanji2;
+ int c;
- while (1)
- {
- int c, charset, c1, c2;
+ CODING_GET_INFO (coding, attrs, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
- src_base = src;
- ONE_MORE_CHAR (c);
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
/* Now encode the character C. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c) && ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
{
- switch (c)
+ unsigned code;
+ struct charset *charset = char_charset (c, charset_list, &code);
+
+ if (!charset)
{
- case '\r':
- if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
{
- EMIT_ONE_BYTE (c);
- break;
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
}
- c = '\n';
- case '\n':
- if (coding->eol_type == CODING_EOL_CRLF)
+ else
{
- EMIT_TWO_BYTES ('\r', c);
- break;
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
}
- else if (coding->eol_type == CODING_EOL_CR)
- c = '\r';
- default:
- EMIT_ONE_BYTE (c);
}
- }
- else
- {
- SPLIT_CHAR (c, charset, c1, c2);
- if (sjis_p)
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ if (charset == charset_kanji)
+ {
+ int c1, c2;
+ JIS_TO_SJIS (code);
+ c1 = code >> 8, c2 = code & 0xFF;
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else if (charset == charset_kana)
+ EMIT_ONE_BYTE (code | 0x80);
+ else if (charset_kanji2 && charset == charset_kanji2)
{
- if (charset == charset_jisx0208
- || charset == charset_jisx0208_1978)
+ int c1, c2;
+
+ c1 = code >> 8;
+ if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+ || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
{
- ENCODE_SJIS (c1, c2, c1, c2);
+ JIS_TO_SJIS2 (code);
+ c1 = code >> 8, c2 = code & 0xFF;
EMIT_TWO_BYTES (c1, c2);
}
- else if (charset == charset_katakana_jisx0201)
- EMIT_ONE_BYTE (c1 | 0x80);
- else if (charset == charset_latin_jisx0201)
- EMIT_ONE_BYTE (c1);
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
- {
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- if (CHARSET_WIDTH (charset) > 1)
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- }
else
- /* There's no way other than producing the internal
- codes as is. */
- EMIT_BYTES (src_base, src);
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
}
else
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
+ }
+ }
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
+}
+
+static int
+encode_coding_big5 (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = 4;
+ int produced_chars = 0;
+ Lisp_Object attrs, charset_list, val;
+ int ascii_compatible;
+ struct charset *charset_roman, *charset_big5;
+ int c;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ val = charset_list;
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ /* Now encode the character C. */
+ if (ASCII_CHAR_P (c) && ascii_compatible)
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
+ {
+ unsigned code;
+ struct charset *charset = char_charset (c, charset_list, &code);
+
+ if (! charset)
{
- if (charset == charset_big5_1 || charset == charset_big5_2)
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
{
- ENCODE_BIG5 (charset, c1, c2, c1, c2);
- EMIT_TWO_BYTES (c1, c2);
+ code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ charset = CHARSET_FROM_ID (charset_ascii);
}
- else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+ else
{
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
- if (CHARSET_WIDTH (charset) > 1)
- EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+ c = coding->default_char;
+ charset = char_charset (c, charset_list, &code);
}
- else
- /* There's no way other than producing the internal
- codes as is. */
- EMIT_BYTES (src_base, src);
}
+ if (code == CHARSET_INVALID_CODE (charset))
+ abort ();
+ if (charset == charset_big5)
+ {
+ int c1, c2;
+
+ c1 = code >> 8, c2 = code & 0xFF;
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
}
- coding->consumed_char++;
}
-
- label_end_of_loop:
- coding->consumed = src_base - source;
- coding->produced = coding->produced_char = dst - destination;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/*** 5. CCL handlers ***/
+/*** 10. CCL handlers ***/
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in a coding system of which
encoder/decoder are written in CCL program. If it is, return
- CODING_CATEGORY_MASK_CCL, else return 0. */
+ CATEGORY_MASK_CCL, else return 0. */
static int
-detect_coding_ccl (src, src_end, multibytep)
- unsigned char *src, *src_end;
- int multibytep;
+detect_coding_ccl (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
{
- unsigned char *valid;
- int c;
- /* Dummy for ONE_MORE_BYTE. */
- struct coding_system dummy_coding;
- struct coding_system *coding = &dummy_coding;
-
- /* No coding system is assigned to coding-category-ccl. */
- if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
- return 0;
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ int found = 0;
+ unsigned char *valids;
+ int head_ascii = coding->head_ascii;
+ Lisp_Object attrs;
+
+ detect_info->checked |= CATEGORY_MASK_CCL;
+
+ coding = &coding_categories[coding_category_ccl];
+ valids = CODING_CCL_VALIDS (coding);
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ src += head_ascii;
- valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
while (1)
{
- ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
- if (! valid[c])
- return 0;
- }
- label_end_of_loop:
- return CODING_CATEGORY_MASK_CCL;
-}
+ int c;
-
-/*** 6. End-of-line handlers ***/
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c < 0 || ! valids[c])
+ break;
+ if ((valids[c] > 1))
+ found = CATEGORY_MASK_CCL;
+ }
+ detect_info->rejected |= CATEGORY_MASK_CCL;
+ return 0;
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
+ no_more_source:
+ detect_info->found |= found;
+ return 1;
+}
static void
-decode_eol (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_ccl (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- unsigned char *dst = destination;
- const unsigned char *src_end = src + src_bytes;
- unsigned char *dst_end = dst + dst_bytes;
- Lisp_Object translation_table;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source code
- (within macro ONE_MORE_BYTE), or when there's not enough
- destination area to produce a character (within macro
- EMIT_CHAR). */
- const unsigned char *src_base;
- int c;
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
+ int consumed_chars = 0;
+ int multibytep = coding->src_multibyte;
+ struct ccl_program ccl;
+ int source_charbuf[1024];
+ int source_byteidx[1024];
+ Lisp_Object attrs, charset_list;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
+
+ while (src < src_end)
+ {
+ const unsigned char *p = src;
+ int *source, *source_end;
+ int i = 0;
+
+ if (multibytep)
+ while (i < 1024 && p < src_end)
+ {
+ source_byteidx[i] = p - src;
+ source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+ }
+ else
+ while (i < 1024 && p < src_end)
+ source_charbuf[i++] = *p++;
- translation_table = Qnil;
- switch (coding->eol_type)
- {
- case CODING_EOL_CRLF:
- while (1)
+ if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
+ ccl.last_block = 1;
+
+ source = source_charbuf;
+ source_end = source + i;
+ while (source < source_end)
{
- src_base = src;
- ONE_MORE_BYTE (c);
- if (c == '\r')
- {
- ONE_MORE_BYTE (c);
- if (c != '\n')
- {
- src--;
- c = '\r';
- }
- }
- else if (c == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- EMIT_CHAR (c);
+ ccl_driver (&ccl, source, charbuf,
+ source_end - source, charbuf_end - charbuf,
+ charset_list);
+ source += ccl.consumed;
+ charbuf += ccl.produced;
+ if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
+ break;
}
+ if (source < source_end)
+ src += source_byteidx[source - source_charbuf];
+ else
+ src = p;
+ consumed_chars += source - source_charbuf;
+
+ if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
+ && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
+ break;
+ }
+
+ switch (ccl.status)
+ {
+ case CCL_STAT_SUSPEND_BY_SRC:
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
+ break;
+ case CCL_STAT_SUSPEND_BY_DST:
break;
+ case CCL_STAT_QUIT:
+ case CCL_STAT_INVALID_CMD:
+ record_conversion_result (coding, CODING_RESULT_INTERRUPT);
+ break;
+ default:
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ break;
+ }
+ coding->consumed_char += consumed_chars;
+ coding->consumed = src - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
- case CODING_EOL_CR:
- while (1)
+static int
+encode_coding_ccl (coding)
+ struct coding_system *coding;
+{
+ struct ccl_program ccl;
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int destination_charbuf[1024];
+ int i, produced_chars = 0;
+ Lisp_Object attrs, charset_list;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
+
+ ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
+ ccl.dst_multibyte = coding->dst_multibyte;
+
+ while (charbuf < charbuf_end)
+ {
+ ccl_driver (&ccl, charbuf, destination_charbuf,
+ charbuf_end - charbuf, 1024, charset_list);
+ if (multibytep)
{
- src_base = src;
- ONE_MORE_BYTE (c);
- if (c == '\n')
- {
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- coding->result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop;
- }
- }
- else if (c == '\r')
- c = '\n';
- EMIT_CHAR (c);
+ ASSURE_DESTINATION (ccl.produced * 2);
+ for (i = 0; i < ccl.produced; i++)
+ EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
}
- break;
-
- default: /* no need for EOL handling */
- while (1)
+ else
{
- src_base = src;
- ONE_MORE_BYTE (c);
- EMIT_CHAR (c);
+ ASSURE_DESTINATION (ccl.produced);
+ for (i = 0; i < ccl.produced; i++)
+ *dst++ = destination_charbuf[i] & 0xFF;
+ produced_chars += ccl.produced;
}
+ charbuf += ccl.consumed;
+ if (ccl.status == CCL_STAT_QUIT
+ || ccl.status == CCL_STAT_INVALID_CMD)
+ break;
}
- label_end_of_loop:
- coding->consumed = coding->consumed_char = src_base - source;
- coding->produced = dst - destination;
- return;
+ switch (ccl.status)
+ {
+ case CCL_STAT_SUSPEND_BY_SRC:
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
+ break;
+ case CCL_STAT_SUSPEND_BY_DST:
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
+ break;
+ case CCL_STAT_QUIT:
+ case CCL_STAT_INVALID_CMD:
+ record_conversion_result (coding, CODING_RESULT_INTERRUPT);
+ break;
+ default:
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ break;
+ }
+
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
- format of end-of-line according to `coding->eol_type'. It also
- convert multibyte form 8-bit characters to unibyte if
- CODING->src_multibyte is nonzero. If `coding->mode &
- CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
- also means end-of-line. */
+
+
+/*** 10, 11. no-conversion handlers ***/
+
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
static void
-encode_eol (coding, source, destination, src_bytes, dst_bytes)
+decode_coding_raw_text (coding)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
{
- const unsigned char *src = source;
- unsigned char *dst = destination;
- const unsigned char *src_end = src + src_bytes;
- unsigned char *dst_end = dst + dst_bytes;
- Lisp_Object translation_table;
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text to
- analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
- there's not enough destination area to produce encoded codes
- (within macro EMIT_BYTES). */
- const unsigned char *src_base;
- unsigned char *tmp;
+ coding->chars_at_source = 1;
+ coding->consumed_char = 0;
+ coding->consumed = 0;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+}
+
+static int
+encode_coding_raw_text (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = coding->charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced_chars = 0;
int c;
- int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
- translation_table = Qnil;
- if (coding->src_multibyte
- && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
+ if (multibytep)
{
- src_end--;
- src_bytes--;
- coding->result = CODING_FINISH_INSUFFICIENT_SRC;
- }
+ int safe_room = MAX_MULTIBYTE_LENGTH * 2;
+
+ if (coding->src_multibyte)
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
+ {
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
+ }
+ else
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
- if (coding->eol_type == CODING_EOL_CRLF)
+ CHAR_STRING_ADVANCE (c, p1);
+ while (p0 < p1)
+ {
+ EMIT_ONE_BYTE (*p0);
+ p0++;
+ }
+ }
+ }
+ else
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ EMIT_ONE_BYTE (c);
+ }
+ }
+ else
{
- while (src < src_end)
+ if (coding->src_multibyte)
{
- src_base = src;
- c = *src++;
- if (c >= 0x20)
- EMIT_ONE_BYTE (c);
- else if (c == '\n' || (c == '\r' && selective_display))
- EMIT_TWO_BYTES ('\r', '\n');
- else
- EMIT_ONE_BYTE (c);
+ int safe_room = MAX_MULTIBYTE_LENGTH;
+
+ while (charbuf < charbuf_end)
+ {
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ASCII_CHAR_P (c))
+ *dst++ = c;
+ else if (CHAR_BYTE8_P (c))
+ *dst++ = CHAR_TO_BYTE8 (c);
+ else
+ CHAR_STRING_ADVANCE (c, dst);
+ produced_chars++;
+ }
+ }
+ else
+ {
+ ASSURE_DESTINATION (charbuf_end - charbuf);
+ while (charbuf < charbuf_end && dst < dst_end)
+ *dst++ = *charbuf++;
+ produced_chars = dst - (coding->destination + coding->dst_bytes);
}
+ }
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
+}
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+ Check if a text is encoded in a charset-based coding system. If it
+ is, return 1, else return 0. */
+
+static int
+detect_coding_charset (coding, detect_info)
+ struct coding_system *coding;
+ struct coding_detection_info *detect_info;
+{
+ const unsigned char *src = coding->source, *src_base;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ int multibytep = coding->src_multibyte;
+ int consumed_chars = 0;
+ Lisp_Object attrs, valids;
+ int found = 0;
+
+ detect_info->checked |= CATEGORY_MASK_CHARSET;
+
+ coding = &coding_categories[coding_category_charset];
+ attrs = CODING_ID_ATTRS (coding->id);
+ valids = AREF (attrs, coding_attr_charset_valids);
+
+ if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ src += coding->head_ascii;
+
+ while (1)
+ {
+ int c;
+
src_base = src;
- label_end_of_loop:
- ;
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ continue;
+ if (NILP (AREF (valids, c)))
+ break;
+ if (c >= 0x80)
+ found = CATEGORY_MASK_CHARSET;
}
- else
+ detect_info->rejected |= CATEGORY_MASK_CHARSET;
+ return 0;
+
+ no_more_source:
+ detect_info->found |= found;
+ return 1;
+}
+
+static void
+decode_coding_charset (coding)
+ struct coding_system *coding;
+{
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ const unsigned char *src_base;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int consumed_chars = 0, consumed_chars_base;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object attrs, charset_list, valids;
+ int char_offset = coding->produced_char;
+ int last_offset = char_offset;
+ int last_id = charset_ascii;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ valids = AREF (attrs, coding_attr_charset_valids);
+
+ while (1)
{
- if (!dst_bytes || src_bytes <= dst_bytes)
+ int c;
+ Lisp_Object val;
+ struct charset *charset;
+ int dim;
+ int len = 1;
+ unsigned code;
+
+ src_base = src;
+ consumed_chars_base = consumed_chars;
+
+ if (charbuf >= charbuf_end)
+ break;
+
+ ONE_MORE_BYTE (c);
+ if (c < 0)
+ goto invalid_code;
+ code = c;
+
+ val = AREF (valids, c);
+ if (NILP (val))
+ goto invalid_code;
+ if (INTEGERP (val))
{
- safe_bcopy (src, dst, src_bytes);
- src_base = src_end;
- dst += src_bytes;
+ charset = CHARSET_FROM_ID (XFASTINT (val));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base, src_end,
+ charset, code, c);
}
else
{
- if (coding->src_multibyte
- && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
- dst_bytes--;
- safe_bcopy (src, dst, dst_bytes);
- src_base = src + dst_bytes;
- dst = destination + dst_bytes;
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
+ /* VAL is a list of charset IDs. It is assured that the
+ list is sorted by charset dimensions (smaller one
+ comes first). */
+ while (CONSP (val))
+ {
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
+ {
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
+ }
+ CODING_DECODE_CHAR (coding, src, src_base,
+ src_end, charset, code, c);
+ if (c >= 0)
+ break;
+ val = XCDR (val);
+ }
+ }
+ if (c < 0)
+ goto invalid_code;
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
}
- if (coding->eol_type == CODING_EOL_CR)
+
+ *charbuf++ = c;
+ char_offset++;
+ continue;
+
+ invalid_code:
+ src = src_base;
+ consumed_chars = consumed_chars_base;
+ ONE_MORE_BYTE (c);
+ *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ char_offset++;
+ coding->errors++;
+ }
+
+ no_more_source:
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ coding->consumed_char += consumed_chars_base;
+ coding->consumed = src_base - coding->source;
+ coding->charbuf_used = charbuf - coding->charbuf;
+}
+
+static int
+encode_coding_charset (coding)
+ struct coding_system *coding;
+{
+ int multibytep = coding->dst_multibyte;
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int safe_room = MAX_MULTIBYTE_LENGTH;
+ int produced_chars = 0;
+ Lisp_Object attrs, charset_list;
+ int ascii_compatible;
+ int c;
+
+ CODING_GET_INFO (coding, attrs, charset_list);
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+
+ while (charbuf < charbuf_end)
+ {
+ struct charset *charset;
+ unsigned code;
+
+ ASSURE_DESTINATION (safe_room);
+ c = *charbuf++;
+ if (ascii_compatible && ASCII_CHAR_P (c))
+ EMIT_ONE_ASCII_BYTE (c);
+ else if (CHAR_BYTE8_P (c))
{
- for (tmp = destination; tmp < dst; tmp++)
- if (*tmp == '\n') *tmp = '\r';
+ c = CHAR_TO_BYTE8 (c);
+ EMIT_ONE_BYTE (c);
}
- else if (selective_display)
+ else
{
- for (tmp = destination; tmp < dst; tmp++)
- if (*tmp == '\r') *tmp = '\n';
+ charset = char_charset (c, charset_list, &code);
+ if (charset)
+ {
+ if (CHARSET_DIMENSION (charset) == 1)
+ EMIT_ONE_BYTE (code);
+ else if (CHARSET_DIMENSION (charset) == 2)
+ EMIT_TWO_BYTES (code >> 8, code & 0xFF);
+ else if (CHARSET_DIMENSION (charset) == 3)
+ EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
+ else
+ EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
+ (code >> 8) & 0xFF, code & 0xFF);
+ }
+ else
+ {
+ if (coding->mode & CODING_MODE_SAFE_ENCODING)
+ c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
+ else
+ c = coding->default_char;
+ EMIT_ONE_BYTE (c);
+ }
}
}
- if (coding->src_multibyte)
- dst = destination + str_as_unibyte (destination, dst - destination);
- coding->consumed = src_base - source;
- coding->produced = dst - destination;
- coding->produced_char = coding->produced;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->produced_char += produced_chars;
+ coding->produced = dst - coding->destination;
+ return 0;
}
/*** 7. C library functions ***/
-/* In Emacs Lisp, a coding system is represented by a Lisp symbol which
- has a property `coding-system'. The value of this property is a
- vector of length 5 (called the coding-vector). Among elements of
- this vector, the first (element[0]) and the fifth (element[4])
- carry important information for decoding/encoding. Before
- decoding/encoding, this information should be set in fields of a
- structure of type `coding_system'.
-
- The value of the property `coding-system' can be a symbol of another
- subsidiary coding-system. In that case, Emacs gets coding-vector
- from that symbol.
-
- `element[0]' contains information to be set in `coding->type'. The
- value and its meaning is as follows:
-
- 0 -- coding_type_emacs_mule
- 1 -- coding_type_sjis
- 2 -- coding_type_iso2022
- 3 -- coding_type_big5
- 4 -- coding_type_ccl encoder/decoder written in CCL
- nil -- coding_type_no_conversion
- t -- coding_type_undecided (automatic conversion on decoding,
- no-conversion on encoding)
-
- `element[4]' contains information to be set in `coding->flags' and
- `coding->spec'. The meaning varies by `coding->type'.
-
- If `coding->type' is `coding_type_iso2022', element[4] is a vector
- of length 32 (of which the first 13 sub-elements are used now).
- Meanings of these sub-elements are:
-
- sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
- If the value is an integer of valid charset, the charset is
- assumed to be designated to graphic register N initially.
-
- If the value is minus, it is a minus value of charset which
- reserves graphic register N, which means that the charset is
- not designated initially but should be designated to graphic
- register N just before encoding a character in that charset.
-
- If the value is nil, graphic register N is never used on
- encoding.
-
- sub-element[N] where N is 4 through 11: to be set in `coding->flags'
- Each value takes t or nil. See the section ISO2022 of
- `coding.h' for more information.
-
- If `coding->type' is `coding_type_big5', element[4] is t to denote
- BIG5-ETen or nil to denote BIG5-HKU.
-
- If `coding->type' takes the other value, element[4] is ignored.
-
- Emacs Lisp's coding systems also carry information about format of
- end-of-line in a value of property `eol-type'. If the value is
- integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
- means CODING_EOL_CR. If it is not integer, it should be a vector
- of subsidiary coding systems of which property `eol-type' has one
- of the above values.
-
-*/
-
-/* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
- and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
- is setup so that no conversion is necessary and return -1, else
- return 0. */
+/* Setup coding context CODING from information about CODING_SYSTEM.
+ If CODING_SYSTEM is nil, `no-conversion' is assumed. If
+ CODING_SYSTEM is invalid, signal an error. */
-int
+void
setup_coding_system (coding_system, coding)
Lisp_Object coding_system;
struct coding_system *coding;
{
- Lisp_Object coding_spec, coding_type, eol_type, plist;
+ Lisp_Object attrs;
+ Lisp_Object eol_type;
+ Lisp_Object coding_type;
Lisp_Object val;
- /* At first, zero clear all members. */
- bzero (coding, sizeof (struct coding_system));
+ if (NILP (coding_system))
+ coding_system = Qundecided;
- /* Initialize some fields required for all kinds of coding systems. */
- coding->symbol = coding_system;
- coding->heading_ascii = -1;
- coding->post_read_conversion = coding->pre_write_conversion = Qnil;
- coding->composing = COMPOSITION_DISABLED;
- coding->cmp_data = NULL;
+ CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
- if (NILP (coding_system))
- goto label_invalid_coding_system;
+ attrs = CODING_ID_ATTRS (coding->id);
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
- coding_spec = Fget (coding_system, Qcoding_system);
+ coding->mode = 0;
+ coding->head_ascii = -1;
+ coding->common_flags
+ = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
+ coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
+ if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
+ coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
- if (!VECTORP (coding_spec)
- || XVECTOR (coding_spec)->size != 5
- || !CONSP (XVECTOR (coding_spec)->contents[3]))
- goto label_invalid_coding_system;
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ coding->max_charset_id = SCHARS (val) - 1;
+ coding->safe_charsets = (char *) SDATA (val);
+ coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
- eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
- if (VECTORP (eol_type))
+ coding_type = CODING_ATTR_TYPE (attrs);
+ if (EQ (coding_type, Qundecided))
{
- coding->eol_type = CODING_EOL_UNDECIDED;
- coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
- if (system_eol_type != CODING_EOL_LF)
- coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ coding->detector = NULL;
+ coding->decoder = decode_coding_raw_text;
+ coding->encoder = encode_coding_raw_text;
+ coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
}
- else if (XFASTINT (eol_type) == 1)
+ else if (EQ (coding_type, Qiso_2022))
{
- coding->eol_type = CODING_EOL_CRLF;
+ int i;
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+
+ /* Invoke graphic register 0 to plane 0. */
+ CODING_ISO_INVOCATION (coding, 0) = 0;
+ /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
+ CODING_ISO_INVOCATION (coding, 1)
+ = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
+ /* Setup the initial status of designation. */
+ for (i = 0; i < 4; i++)
+ CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
+ /* Not single shifting initially. */
+ CODING_ISO_SINGLE_SHIFTING (coding) = 0;
+ /* Beginning of buffer should also be regarded as bol. */
+ CODING_ISO_BOL (coding) = 1;
+ coding->detector = detect_coding_iso_2022;
+ coding->decoder = decode_coding_iso_2022;
+ coding->encoder = encode_coding_iso_2022;
+ if (flags & CODING_ISO_FLAG_SAFE)
+ coding->mode |= CODING_MODE_SAFE_ENCODING;
coding->common_flags
- = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
+ | CODING_REQUIRE_FLUSHING_MASK);
+ if (flags & CODING_ISO_FLAG_COMPOSITION)
+ coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
+ if (flags & CODING_ISO_FLAG_DESIGNATION)
+ coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
+ if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ {
+ setup_iso_safe_charsets (attrs);
+ val = CODING_ATTR_SAFE_CHARSETS (attrs);
+ coding->max_charset_id = SCHARS (val) - 1;
+ coding->safe_charsets = (char *) SDATA (val);
+ }
+ CODING_ISO_FLAGS (coding) = flags;
}
- else if (XFASTINT (eol_type) == 2)
+ else if (EQ (coding_type, Qcharset))
{
- coding->eol_type = CODING_EOL_CR;
+ coding->detector = detect_coding_charset;
+ coding->decoder = decode_coding_charset;
+ coding->encoder = encode_coding_charset;
coding->common_flags
- = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
}
- else
+ else if (EQ (coding_type, Qutf_8))
{
- coding->common_flags = 0;
- coding->eol_type = CODING_EOL_LF;
+ coding->detector = detect_coding_utf_8;
+ coding->decoder = decode_coding_utf_8;
+ coding->encoder = encode_coding_utf_8;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else if (EQ (coding_type, Qutf_16))
+ {
+ val = AREF (attrs, coding_attr_utf_16_bom);
+ CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
+ : EQ (val, Qt) ? utf_16_with_bom
+ : utf_16_without_bom);
+ val = AREF (attrs, coding_attr_utf_16_endian);
+ CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
+ : utf_16_little_endian);
+ CODING_UTF_16_SURROGATE (coding) = 0;
+ coding->detector = detect_coding_utf_16;
+ coding->decoder = decode_coding_utf_16;
+ coding->encoder = encode_coding_utf_16;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
+ coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
}
-
- coding_type = XVECTOR (coding_spec)->contents[0];
- /* Try short cut. */
- if (SYMBOLP (coding_type))
+ else if (EQ (coding_type, Qccl))
{
- if (EQ (coding_type, Qt))
+ coding->detector = detect_coding_ccl;
+ coding->decoder = decode_coding_ccl;
+ coding->encoder = encode_coding_ccl;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
+ | CODING_REQUIRE_FLUSHING_MASK);
+ }
+ else if (EQ (coding_type, Qemacs_mule))
+ {
+ coding->detector = detect_coding_emacs_mule;
+ coding->decoder = decode_coding_emacs_mule;
+ coding->encoder = encode_coding_emacs_mule;
+ coding->common_flags
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
+ && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
{
- coding->type = coding_type_undecided;
- coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+ Lisp_Object tail, safe_charsets;
+ int max_charset_id = 0;
+
+ for (tail = Vemacs_mule_charset_list; CONSP (tail);
+ tail = XCDR (tail))
+ if (max_charset_id < XFASTINT (XCAR (tail)))
+ max_charset_id = XFASTINT (XCAR (tail));
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ for (tail = Vemacs_mule_charset_list; CONSP (tail);
+ tail = XCDR (tail))
+ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
+ coding->max_charset_id = max_charset_id;
+ coding->safe_charsets = (char *) SDATA (safe_charsets);
}
- else
- coding->type = coding_type_no_conversion;
- /* Initialize this member. Any thing other than
- CODING_CATEGORY_IDX_UTF_16_BE and
- CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
- special treatment in detect_eol. */
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
-
- return 0;
- }
-
- /* Get values of coding system properties:
- `post-read-conversion', `pre-write-conversion',
- `translation-table-for-decode', `translation-table-for-encode'. */
- plist = XVECTOR (coding_spec)->contents[3];
- /* Pre & post conversion functions should be disabled if
- inhibit_eol_conversion is nonzero. This is the case that a code
- conversion function is called while those functions are running. */
- if (! inhibit_pre_post_conversion)
- {
- coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
- coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
- }
- val = Fplist_get (plist, Qtranslation_table_for_decode);
- if (SYMBOLP (val))
- val = Fget (val, Qtranslation_table_for_decode);
- coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
- val = Fplist_get (plist, Qtranslation_table_for_encode);
- if (SYMBOLP (val))
- val = Fget (val, Qtranslation_table_for_encode);
- coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
- val = Fplist_get (plist, Qcoding_category);
- if (!NILP (val))
- {
- val = Fget (val, Qcoding_category_index);
- if (INTEGERP (val))
- coding->category_idx = XINT (val);
- else
- goto label_invalid_coding_system;
}
- else
- goto label_invalid_coding_system;
-
- /* If the coding system has non-nil `composition' property, enable
- composition handling. */
- val = Fplist_get (plist, Qcomposition);
- if (!NILP (val))
- coding->composing = COMPOSITION_NO;
-
- /* If the coding system is ascii-incompatible, record it in
- common_flags. */
- val = Fplist_get (plist, Qascii_incompatible);
- if (! NILP (val))
- coding->common_flags |= CODING_ASCII_INCOMPATIBLE_MASK;
-
- switch (XFASTINT (coding_type))
+ else if (EQ (coding_type, Qshift_jis))
{
- case 0:
- coding->type = coding_type_emacs_mule;
- coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- if (!NILP (coding->post_read_conversion))
- coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
- if (!NILP (coding->pre_write_conversion))
- coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
- break;
-
- case 1:
- coding->type = coding_type_sjis;
+ coding->detector = detect_coding_sjis;
+ coding->decoder = decode_coding_sjis;
+ coding->encoder = encode_coding_sjis;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- break;
-
- case 2:
- coding->type = coding_type_iso2022;
- coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- {
- Lisp_Object val, temp;
- Lisp_Object *flags;
- int i, charset, reg_bits = 0;
-
- val = XVECTOR (coding_spec)->contents[4];
-
- if (!VECTORP (val) || XVECTOR (val)->size != 32)
- goto label_invalid_coding_system;
-
- flags = XVECTOR (val)->contents;
- coding->flags
- = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
- | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
- | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
- | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
- | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
- | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
- | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
- | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
- | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
- | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
- | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
- | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
- | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
- );
-
- /* Invoke graphic register 0 to plane 0. */
- CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
- /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
- CODING_SPEC_ISO_INVOCATION (coding, 1)
- = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
- /* Not single shifting at first. */
- CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
- /* Beginning of buffer should also be regarded as bol. */
- CODING_SPEC_ISO_BOL (coding) = 1;
-
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
- val = Vcharset_revision_alist;
- while (CONSP (val))
- {
- charset = get_charset_id (Fcar_safe (XCAR (val)));
- if (charset >= 0
- && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
- && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
- CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
- val = XCDR (val);
- }
-
- /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
- FLAGS[REG] can be one of below:
- integer CHARSET: CHARSET occupies register I,
- t: designate nothing to REG initially, but can be used
- by any charsets,
- list of integer, nil, or t: designate the first
- element (if integer) to REG initially, the remaining
- elements (if integer) is designated to REG on request,
- if an element is t, REG can be used by any charsets,
- nil: REG is never used. */
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
- for (i = 0; i < 4; i++)
- {
- if ((INTEGERP (flags[i])
- && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (flags[i])) >= 0)
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
- }
- else if (EQ (flags[i], Qt))
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
- reg_bits |= 1 << i;
- coding->flags |= CODING_FLAG_ISO_DESIGNATION;
- }
- else if (CONSP (flags[i]))
- {
- Lisp_Object tail;
- tail = flags[i];
-
- coding->flags |= CODING_FLAG_ISO_DESIGNATION;
- if ((INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (XCAR (tail))) >= 0)
- {
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
- }
- else
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
- tail = XCDR (tail);
- while (CONSP (tail))
- {
- if ((INTEGERP (XCAR (tail))
- && (charset = XINT (XCAR (tail)),
- CHARSET_VALID_P (charset)))
- || (charset = get_charset_id (XCAR (tail))) >= 0)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = i;
- else if (EQ (XCAR (tail), Qt))
- reg_bits |= 1 << i;
- tail = XCDR (tail);
- }
- }
- else
- CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
-
- CODING_SPEC_ISO_DESIGNATION (coding, i)
- = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
- }
-
- if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
- {
- /* REG 1 can be used only by locking shift in 7-bit env. */
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- reg_bits &= ~2;
- if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
- /* Without any shifting, only REG 0 and 1 can be used. */
- reg_bits &= 3;
- }
-
- if (reg_bits)
- for (charset = 0; charset <= MAX_CHARSET; charset++)
- {
- if (CHARSET_DEFINED_P (charset)
- && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
- {
- /* There exist some default graphic registers to be
- used by CHARSET. */
-
- /* We had better avoid designating a charset of
- CHARS96 to REG 0 as far as possible. */
- if (CHARSET_CHARS (charset) == 96)
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = (reg_bits & 2
- ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
- else
- CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
- = (reg_bits & 1
- ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
- }
- }
- }
- coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
- coding->spec.iso2022.last_invalid_designation_register = -1;
- break;
-
- case 3:
- coding->type = coding_type_big5;
- coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- coding->flags
- = (NILP (XVECTOR (coding_spec)->contents[4])
- ? CODING_FLAG_BIG5_HKU
- : CODING_FLAG_BIG5_ETEN);
- break;
-
- case 4:
- coding->type = coding_type_ccl;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else if (EQ (coding_type, Qbig5))
+ {
+ coding->detector = detect_coding_big5;
+ coding->decoder = decode_coding_big5;
+ coding->encoder = encode_coding_big5;
coding->common_flags
- |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
- {
- val = XVECTOR (coding_spec)->contents[4];
- if (! CONSP (val)
- || setup_ccl_program (&(coding->spec.ccl.decoder),
- XCAR (val)) < 0
- || setup_ccl_program (&(coding->spec.ccl.encoder),
- XCDR (val)) < 0)
- goto label_invalid_coding_system;
-
- bzero (coding->spec.ccl.valid_codes, 256);
- val = Fplist_get (plist, Qvalid_codes);
- if (CONSP (val))
- {
- Lisp_Object this;
-
- for (; CONSP (val); val = XCDR (val))
- {
- this = XCAR (val);
- if (INTEGERP (this)
- && XINT (this) >= 0 && XINT (this) < 256)
- coding->spec.ccl.valid_codes[XINT (this)] = 1;
- else if (CONSP (this)
- && INTEGERP (XCAR (this))
- && INTEGERP (XCDR (this)))
- {
- int start = XINT (XCAR (this));
- int end = XINT (XCDR (this));
-
- if (start >= 0 && start <= end && end < 256)
- while (start <= end)
- coding->spec.ccl.valid_codes[start++] = 1;
- }
- }
- }
- }
- coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
- coding->spec.ccl.cr_carryover = 0;
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- break;
-
- case 5:
- coding->type = coding_type_raw_text;
- break;
+ |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+ }
+ else /* EQ (coding_type, Qraw_text) */
+ {
+ coding->detector = NULL;
+ coding->decoder = decode_coding_raw_text;
+ coding->encoder = encode_coding_raw_text;
+ if (! EQ (eol_type, Qunix))
+ {
+ coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
+ if (! VECTORP (eol_type))
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ }
- default:
- goto label_invalid_coding_system;
}
- return 0;
- label_invalid_coding_system:
- coding->type = coding_type_no_conversion;
- coding->category_idx = CODING_CATEGORY_IDX_BINARY;
- coding->common_flags = 0;
- coding->eol_type = CODING_EOL_UNDECIDED;
- coding->pre_write_conversion = coding->post_read_conversion = Qnil;
- return NILP (coding_system) ? 0 : -1;
+ return;
}
-/* Free memory blocks allocated for storing composition information. */
+/* Return a list of charsets supported by CODING. */
-void
-coding_free_composition_data (coding)
+Lisp_Object
+coding_charset_list (coding)
struct coding_system *coding;
{
- struct composition_data *cmp_data = coding->cmp_data, *next;
+ Lisp_Object attrs, charset_list;
- if (!cmp_data)
- return;
- /* Memory blocks are chained. At first, rewind to the first, then,
- free blocks one by one. */
- while (cmp_data->prev)
- cmp_data = cmp_data->prev;
- while (cmp_data)
+ CODING_GET_INFO (coding, attrs, charset_list);
+ if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
+ {
+ int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+
+ if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
+ charset_list = Viso_2022_charset_list;
+ }
+ else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
{
- next = cmp_data->next;
- xfree (cmp_data);
- cmp_data = next;
+ charset_list = Vemacs_mule_charset_list;
}
- coding->cmp_data = NULL;
+ return charset_list;
}
-/* Set `char_offset' member of all memory blocks pointed by
- coding->cmp_data to POS. */
-void
-coding_adjust_composition_offset (coding, pos)
- struct coding_system *coding;
- int pos;
+/* Return raw-text or one of its subsidiaries that has the same
+ eol_type as CODING-SYSTEM. */
+
+Lisp_Object
+raw_text_coding_system (coding_system)
+ Lisp_Object coding_system;
{
- struct composition_data *cmp_data;
+ Lisp_Object spec, attrs;
+ Lisp_Object eol_type, raw_text_eol_type;
- for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
- cmp_data->char_offset = pos;
+ if (NILP (coding_system))
+ return Qraw_text;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ attrs = AREF (spec, 0);
+
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ return coding_system;
+
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ return Qraw_text;
+ spec = CODING_SYSTEM_SPEC (Qraw_text);
+ raw_text_eol_type = AREF (spec, 2);
+ return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
+ : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
+ : AREF (raw_text_eol_type, 2));
}
-/* Setup raw-text or one of its subsidiaries in the structure
- coding_system CODING according to the already setup value eol_type
- in CODING. CODING should be setup for some coding system in
- advance. */
-void
-setup_raw_text_coding_system (coding)
- struct coding_system *coding;
+/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
+ does, return one of the subsidiary that has the same eol-spec as
+ PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
+ inherit end-of-line format from the system's setting
+ (system_eol_type). */
+
+Lisp_Object
+coding_inherit_eol_type (coding_system, parent)
+ Lisp_Object coding_system, parent;
{
- if (coding->type != coding_type_raw_text)
+ Lisp_Object spec, eol_type;
+
+ if (NILP (coding_system))
+ coding_system = Qraw_text;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
{
- coding->symbol = Qraw_text;
- coding->type = coding_type_raw_text;
- if (coding->eol_type != CODING_EOL_UNDECIDED)
+ Lisp_Object parent_eol_type;
+
+ if (! NILP (parent))
{
- Lisp_Object subsidiaries;
- subsidiaries = Fget (Qraw_text, Qeol_type);
+ Lisp_Object parent_spec;
- if (VECTORP (subsidiaries)
- && XVECTOR (subsidiaries)->size == 3)
- coding->symbol
- = XVECTOR (subsidiaries)->contents[coding->eol_type];
+ parent_spec
+ = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
+ parent_eol_type = AREF (parent_spec, 2);
}
- setup_coding_system (coding->symbol, coding);
- }
- return;
+ else
+ parent_eol_type = system_eol_type;
+ if (EQ (parent_eol_type, Qunix))
+ coding_system = AREF (eol_type, 0);
+ else if (EQ (parent_eol_type, Qdos))
+ coding_system = AREF (eol_type, 1);
+ else if (EQ (parent_eol_type, Qmac))
+ coding_system = AREF (eol_type, 2);
+ }
+ return coding_system;
}
/* Emacs has a mechanism to automatically detect a coding system if it
@@ -4056,14 +5268,14 @@ setup_raw_text_coding_system (coding)
o coding-category-iso-7-else
The category for a coding system which has the same code range
- as ISO2022 of 7-bit environment but uses locking shift or
+ as ISO2022 of 7-bit environemnt but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-7bit-lock' by default.
o coding-category-iso-8-else
The category for a coding system which has the same code range
- as ISO2022 of 8-bit environment but uses locking shift or
+ as ISO2022 of 8-bit environemnt but uses locking shift or
single shift functions. Assigned the coding-system (Lisp
symbol) `iso-2022-8bit-ss2' by default.
@@ -4106,2420 +5318,1786 @@ setup_raw_text_coding_system (coding)
`no-conversion' by default.
Each of them is a Lisp symbol and the value is an actual
- `coding-system' (this is also a Lisp symbol) assigned by a user.
+ `coding-system's (this is also a Lisp symbol) assigned by a user.
What Emacs does actually is to detect a category of coding system.
Then, it uses a `coding-system' assigned to it. If Emacs can't
- decide a single possible category, it selects a category of the
+ decide only one possible category, it selects a category of the
highest priority. Priorities of categories are also specified by a
user in a Lisp variable `coding-category-list'.
*/
-static
-int ascii_skip_code[256];
+#define EOL_SEEN_NONE 0
+#define EOL_SEEN_LF 1
+#define EOL_SEEN_CR 2
+#define EOL_SEEN_CRLF 4
-/* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
- If it detects possible coding systems, return an integer in which
- appropriate flag bits are set. Flag bits are defined by macros
- CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
- it should point the table `coding_priorities'. In that case, only
- the flag bit for a coding system of the highest priority is set in
- the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the
- range 0x80..0x9F are in multibyte form.
+/* Detect how end-of-line of a text of length SRC_BYTES pointed by
+ SOURCE is encoded. If CATEGORY is one of
+ coding_category_utf_16_XXXX, assume that CR and LF are encoded by
+ two-byte, else they are encoded by one-byte.
+
+ Return one of EOL_SEEN_XXX. */
- How many ASCII characters are at the head is returned as *SKIP. */
+#define MAX_EOL_CHECK_COUNT 3
static int
-detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
- unsigned char *source;
- int src_bytes, *priorities, *skip;
- int multibytep;
+detect_eol (source, src_bytes, category)
+ const unsigned char *source;
+ EMACS_INT src_bytes;
+ enum coding_category category;
{
- register unsigned char c;
- unsigned char *src = source, *src_end = source + src_bytes;
- unsigned int mask, utf16_examined_p, iso2022_examined_p;
- int i;
+ const unsigned char *src = source, *src_end = src + src_bytes;
+ unsigned char c;
+ int total = 0;
+ int eol_seen = EOL_SEEN_NONE;
- /* At first, skip all ASCII characters and control characters except
- for three ISO2022 specific control characters. */
- ascii_skip_code[ISO_CODE_SO] = 0;
- ascii_skip_code[ISO_CODE_SI] = 0;
- ascii_skip_code[ISO_CODE_ESC] = 0;
-
- label_loop_detect_coding:
- while (src < src_end && ascii_skip_code[*src]) src++;
- *skip = src - source;
-
- if (src >= src_end)
- /* We found nothing other than ASCII. There's nothing to do. */
- return 0;
-
- c = *src;
- /* The text seems to be encoded in some multilingual coding system.
- Now, try to find in which coding system the text is encoded. */
- if (c < 0x80)
- {
- /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
- /* C is an ISO2022 specific control code of C0. */
- mask = detect_coding_iso2022 (src, src_end, multibytep);
- if (mask == 0)
- {
- /* No valid ISO2022 code follows C. Try again. */
- src++;
- if (c == ISO_CODE_ESC)
- ascii_skip_code[ISO_CODE_ESC] = 1;
- else
- ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
- goto label_loop_detect_coding;
- }
- if (priorities)
+ if ((1 << category) & CATEGORY_MASK_UTF_16)
+ {
+ int msb, lsb;
+
+ msb = category == (coding_category_utf_16_le
+ | coding_category_utf_16_le_nosig);
+ lsb = 1 - msb;
+
+ while (src + 1 < src_end)
{
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
+ c = src[lsb];
+ if (src[msb] == 0 && (c == '\n' || c == '\r'))
{
- if (mask & priorities[i])
- return priorities[i];
+ int this_eol;
+
+ if (c == '\n')
+ this_eol = EOL_SEEN_LF;
+ else if (src + 3 >= src_end
+ || src[msb + 2] != 0
+ || src[lsb + 2] != '\n')
+ this_eol = EOL_SEEN_CR;
+ else
+ this_eol = EOL_SEEN_CRLF;
+
+ if (eol_seen == EOL_SEEN_NONE)
+ /* This is the first end-of-line. */
+ eol_seen = this_eol;
+ else if (eol_seen != this_eol)
+ {
+ /* The found type is different from what found before. */
+ eol_seen = EOL_SEEN_LF;
+ break;
+ }
+ if (++total == MAX_EOL_CHECK_COUNT)
+ break;
}
- return CODING_CATEGORY_MASK_RAW_TEXT;
+ src += 2;
}
}
else
{
- int try;
-
- if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
- c = src[1] - 0x20;
-
- if (c < 0xA0)
- {
- /* C is the first byte of SJIS character code,
- or a leading-code of Emacs' internal format (emacs-mule),
- or the first byte of UTF-16. */
- try = (CODING_CATEGORY_MASK_SJIS
- | CODING_CATEGORY_MASK_EMACS_MULE
- | CODING_CATEGORY_MASK_UTF_16_BE
- | CODING_CATEGORY_MASK_UTF_16_LE);
-
- /* Or, if C is a special latin extra code,
- or is an ISO2022 specific control code of C1 (SS2 or SS3),
- or is an ISO2022 control-sequence-introducer (CSI),
- we should also consider the possibility of ISO2022 codings. */
- if ((VECTORP (Vlatin_extra_code_table)
- && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
- || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
- || (c == ISO_CODE_CSI
- && (src < src_end
- && (*src == ']'
- || ((*src == '0' || *src == '1' || *src == '2')
- && src + 1 < src_end
- && src[1] == ']')))))
- try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
- | CODING_CATEGORY_MASK_ISO_8BIT);
- }
- else
- /* C is a character of ISO2022 in graphic plane right,
- or a SJIS's 1-byte character code (i.e. JISX0201),
- or the first byte of BIG5's 2-byte code,
- or the first byte of UTF-8/16. */
- try = (CODING_CATEGORY_MASK_ISO_8_ELSE
- | CODING_CATEGORY_MASK_ISO_8BIT
- | CODING_CATEGORY_MASK_SJIS
- | CODING_CATEGORY_MASK_BIG5
- | CODING_CATEGORY_MASK_UTF_8
- | CODING_CATEGORY_MASK_UTF_16_BE
- | CODING_CATEGORY_MASK_UTF_16_LE);
-
- /* Or, we may have to consider the possibility of CCL. */
- if (coding_system_table[CODING_CATEGORY_IDX_CCL]
- && (coding_system_table[CODING_CATEGORY_IDX_CCL]
- ->spec.ccl.valid_codes)[c])
- try |= CODING_CATEGORY_MASK_CCL;
-
- mask = 0;
- utf16_examined_p = iso2022_examined_p = 0;
- if (priorities)
- {
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
+ while (src < src_end)
+ {
+ c = *src++;
+ if (c == '\n' || c == '\r')
{
- if (!iso2022_examined_p
- && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
- {
- mask |= detect_coding_iso2022 (src, src_end, multibytep);
- iso2022_examined_p = 1;
- }
- else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
- mask |= detect_coding_sjis (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
- mask |= detect_coding_utf_8 (src, src_end, multibytep);
- else if (!utf16_examined_p
- && (priorities[i] & try &
- CODING_CATEGORY_MASK_UTF_16_BE_LE))
+ int this_eol;
+
+ if (c == '\n')
+ this_eol = EOL_SEEN_LF;
+ else if (src >= src_end || *src != '\n')
+ this_eol = EOL_SEEN_CR;
+ else
+ this_eol = EOL_SEEN_CRLF, src++;
+
+ if (eol_seen == EOL_SEEN_NONE)
+ /* This is the first end-of-line. */
+ eol_seen = this_eol;
+ else if (eol_seen != this_eol)
{
- mask |= detect_coding_utf_16 (src, src_end, multibytep);
- utf16_examined_p = 1;
+ /* The found type is different from what found before. */
+ eol_seen = EOL_SEEN_LF;
+ break;
}
- else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
- mask |= detect_coding_big5 (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
- mask |= detect_coding_emacs_mule (src, src_end, multibytep);
- else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
- mask |= detect_coding_ccl (src, src_end, multibytep);
- else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
- mask |= CODING_CATEGORY_MASK_RAW_TEXT;
- else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
- mask |= CODING_CATEGORY_MASK_BINARY;
- if (mask & priorities[i])
- return priorities[i];
+ if (++total == MAX_EOL_CHECK_COUNT)
+ break;
}
- return CODING_CATEGORY_MASK_RAW_TEXT;
- }
- if (try & CODING_CATEGORY_MASK_ISO)
- mask |= detect_coding_iso2022 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_SJIS)
- mask |= detect_coding_sjis (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_BIG5)
- mask |= detect_coding_big5 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_UTF_8)
- mask |= detect_coding_utf_8 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
- mask |= detect_coding_utf_16 (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_EMACS_MULE)
- mask |= detect_coding_emacs_mule (src, src_end, multibytep);
- if (try & CODING_CATEGORY_MASK_CCL)
- mask |= detect_coding_ccl (src, src_end, multibytep);
- }
- return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
+ }
+ }
+ return eol_seen;
}
-/* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
- The information of the detected coding system is set in CODING. */
-void
-detect_coding (coding, src, src_bytes)
+static Lisp_Object
+adjust_coding_eol_type (coding, eol_seen)
struct coding_system *coding;
- const unsigned char *src;
- int src_bytes;
+ int eol_seen;
{
- unsigned int idx;
- int skip, mask;
- Lisp_Object val;
-
- val = Vcoding_category_list;
- mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
- coding->src_multibyte);
- coding->heading_ascii = skip;
-
- if (!mask) return;
-
- /* We found a single coding system of the highest priority in MASK. */
- idx = 0;
- while (mask && ! (mask & 1)) mask >>= 1, idx++;
- if (! mask)
- idx = CODING_CATEGORY_IDX_RAW_TEXT;
+ Lisp_Object eol_type;
- val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
-
- if (coding->eol_type != CODING_EOL_UNDECIDED)
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (eol_seen & EOL_SEEN_LF)
{
- Lisp_Object tmp;
-
- tmp = Fget (val, Qeol_type);
- if (VECTORP (tmp))
- val = XVECTOR (tmp)->contents[coding->eol_type];
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
+ eol_type = Qunix;
}
-
- /* Setup this new coding system while preserving some slots. */
- {
- int src_multibyte = coding->src_multibyte;
- int dst_multibyte = coding->dst_multibyte;
-
- setup_coding_system (val, coding);
- coding->src_multibyte = src_multibyte;
- coding->dst_multibyte = dst_multibyte;
- coding->heading_ascii = skip;
- }
+ else if (eol_seen & EOL_SEEN_CRLF)
+ {
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
+ eol_type = Qdos;
+ }
+ else if (eol_seen & EOL_SEEN_CR)
+ {
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
+ eol_type = Qmac;
+ }
+ return eol_type;
}
-/* Detect how end-of-line of a text of length SRC_BYTES pointed by
- SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
- CODING_EOL_CR, and CODING_EOL_UNDECIDED.
-
- How many non-eol characters are at the head is returned as *SKIP. */
-
-#define MAX_EOL_CHECK_COUNT 3
+/* Detect how a text specified in CODING is encoded. If a coding
+ system is detected, update fields of CODING by the detected coding
+ system. */
-static int
-detect_eol_type (source, src_bytes, skip)
- unsigned char *source;
- int src_bytes, *skip;
+void
+detect_coding (coding)
+ struct coding_system *coding;
{
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned char c;
- int total = 0; /* How many end-of-lines are found so far. */
- int eol_type = CODING_EOL_UNDECIDED;
- int this_eol_type;
+ const unsigned char *src, *src_end;
+
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ coding_set_source (coding);
- *skip = 0;
+ src_end = coding->source + coding->src_bytes;
- while (src < src_end && total < MAX_EOL_CHECK_COUNT)
+ /* If we have not yet decided the text encoding type, detect it
+ now. */
+ if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
{
- c = *src++;
- if (c == '\n' || c == '\r')
- {
- if (*skip == 0)
- *skip = src - 1 - source;
- total++;
- if (c == '\n')
- this_eol_type = CODING_EOL_LF;
- else if (src >= src_end || *src != '\n')
- this_eol_type = CODING_EOL_CR;
- else
- this_eol_type = CODING_EOL_CRLF, src++;
+ int c, i;
+ struct coding_detection_info detect_info;
- if (eol_type == CODING_EOL_UNDECIDED)
- /* This is the first end-of-line. */
- eol_type = this_eol_type;
- else if (eol_type != this_eol_type)
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
+ for (i = 0, src = coding->source; src < src_end; i++, src++)
+ {
+ c = *src;
+ if (c & 0x80)
+ break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && ! inhibit_iso_escape_detection
+ && ! detect_info.checked)
{
- /* The found type is different from what found before. */
- eol_type = CODING_EOL_INCONSISTENT;
- break;
+ coding->head_ascii = src - (coding->source + coding->consumed);
+ if (detect_coding_iso_2022 (coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
}
}
+ coding->head_ascii = src - (coding->source + coding->consumed);
+
+ if (coding->head_ascii < coding->src_bytes
+ || detect_info.found)
+ {
+ enum coding_category category;
+ struct coding_system *this;
+
+ if (coding->head_ascii == coding->src_bytes)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else if ((*(this->detector)) (coding, &detect_info)
+ && detect_info.found & (1 << category))
+ {
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
+ break;
+ }
+ }
+
+ if (i < coding_category_raw_text)
+ setup_coding_system (CODING_ID_NAME (this->id), coding);
+ else if (detect_info.rejected == CATEGORY_MASK_ANY)
+ setup_coding_system (Qraw_text, coding);
+ else if (detect_info.rejected)
+ for (i = 0; i < coding_category_raw_text; i++)
+ if (! (detect_info.rejected & (1 << coding_priorities[i])))
+ {
+ this = coding_categories + coding_priorities[i];
+ setup_coding_system (CODING_ID_NAME (this->id), coding);
+ break;
+ }
+ }
}
+ else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
+ == coding_category_utf_16_auto)
+ {
+ Lisp_Object coding_systems;
+ struct coding_detection_info detect_info;
- if (*skip == 0)
- *skip = src_end - source;
- return eol_type;
+ coding_systems
+ = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
+ detect_info.found = detect_info.rejected = 0;
+ if (CONSP (coding_systems)
+ && detect_coding_utf_16 (coding, &detect_info))
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ setup_coding_system (XCAR (coding_systems), coding);
+ else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+ setup_coding_system (XCDR (coding_systems), coding);
+ }
+ }
}
-/* Like detect_eol_type, but detect EOL type in 2-octet
- big-endian/little-endian format for coding systems utf-16-be and
- utf-16-le. */
-static int
-detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
- unsigned char *source;
- int src_bytes, *skip, big_endian_p;
+static void
+decode_eol (coding)
+ struct coding_system *coding;
{
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned int c1, c2;
- int total = 0; /* How many end-of-lines are found so far. */
- int eol_type = CODING_EOL_UNDECIDED;
- int this_eol_type;
- int msb, lsb;
-
- if (big_endian_p)
- msb = 0, lsb = 1;
- else
- msb = 1, lsb = 0;
+ Lisp_Object eol_type;
+ unsigned char *p, *pbeg, *pend;
+
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (EQ (eol_type, Qunix))
+ return;
- *skip = 0;
+ if (NILP (coding->dst_object))
+ pbeg = coding->destination;
+ else
+ pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
+ pend = pbeg + coding->produced;
- while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
+ if (VECTORP (eol_type))
{
- c1 = (src[msb] << 8) | (src[lsb]);
- src += 2;
+ int eol_seen = EOL_SEEN_NONE;
- if (c1 == '\n' || c1 == '\r')
+ for (p = pbeg; p < pend; p++)
{
- if (*skip == 0)
- *skip = src - 2 - source;
- total++;
- if (c1 == '\n')
- {
- this_eol_type = CODING_EOL_LF;
- }
- else
+ if (*p == '\n')
+ eol_seen |= EOL_SEEN_LF;
+ else if (*p == '\r')
{
- if ((src + 1) >= src_end)
+ if (p + 1 < pend && *(p + 1) == '\n')
{
- this_eol_type = CODING_EOL_CR;
+ eol_seen |= EOL_SEEN_CRLF;
+ p++;
}
else
- {
- c2 = (src[msb] << 8) | (src[lsb]);
- if (c2 == '\n')
- this_eol_type = CODING_EOL_CRLF, src += 2;
- else
- this_eol_type = CODING_EOL_CR;
- }
- }
-
- if (eol_type == CODING_EOL_UNDECIDED)
- /* This is the first end-of-line. */
- eol_type = this_eol_type;
- else if (eol_type != this_eol_type)
- {
- /* The found type is different from what found before. */
- eol_type = CODING_EOL_INCONSISTENT;
- break;
+ eol_seen |= EOL_SEEN_CR;
}
}
+ if (eol_seen != EOL_SEEN_NONE
+ && eol_seen != EOL_SEEN_LF
+ && eol_seen != EOL_SEEN_CRLF
+ && eol_seen != EOL_SEEN_CR)
+ eol_seen = EOL_SEEN_LF;
+ if (eol_seen != EOL_SEEN_NONE)
+ eol_type = adjust_coding_eol_type (coding, eol_seen);
}
- if (*skip == 0)
- *skip = src_end - source;
- return eol_type;
-}
-
-/* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
- is encoded. If it detects an appropriate format of end-of-line, it
- sets the information in *CODING. */
-
-void
-detect_eol (coding, src, src_bytes)
- struct coding_system *coding;
- const unsigned char *src;
- int src_bytes;
-{
- Lisp_Object val;
- int skip;
- int eol_type;
-
- switch (coding->category_idx)
+ if (EQ (eol_type, Qmac))
{
- case CODING_CATEGORY_IDX_UTF_16_BE:
- eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
- break;
- case CODING_CATEGORY_IDX_UTF_16_LE:
- eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
- break;
- default:
- eol_type = detect_eol_type (src, src_bytes, &skip);
- break;
+ for (p = pbeg; p < pend; p++)
+ if (*p == '\r')
+ *p = '\n';
}
-
- if (coding->heading_ascii > skip)
- coding->heading_ascii = skip;
- else
- skip = coding->heading_ascii;
-
- if (eol_type == CODING_EOL_UNDECIDED)
- return;
- if (eol_type == CODING_EOL_INCONSISTENT)
+ else if (EQ (eol_type, Qdos))
{
-#if 0
- /* This code is suppressed until we find a better way to
- distinguish raw text file and binary file. */
+ int n = 0;
- /* If we have already detected that the coding is raw-text, the
- coding should actually be no-conversion. */
- if (coding->type == coding_type_raw_text)
+ if (NILP (coding->dst_object))
{
- setup_coding_system (Qno_conversion, coding);
- return;
+ /* Start deleting '\r' from the tail to minimize the memory
+ movement. */
+ for (p = pend - 2; p >= pbeg; p--)
+ if (*p == '\r')
+ {
+ safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
+ n++;
+ }
}
- /* Else, let's decode only text code anyway. */
-#endif /* 0 */
- eol_type = CODING_EOL_LF;
- }
-
- val = Fget (coding->symbol, Qeol_type);
- if (VECTORP (val) && XVECTOR (val)->size == 3)
- {
- int src_multibyte = coding->src_multibyte;
- int dst_multibyte = coding->dst_multibyte;
- struct composition_data *cmp_data = coding->cmp_data;
+ else
+ {
+ int pos_byte = coding->dst_pos_byte;
+ int pos = coding->dst_pos;
+ int pos_end = pos + coding->produced_char - 1;
- setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
- coding->src_multibyte = src_multibyte;
- coding->dst_multibyte = dst_multibyte;
- coding->heading_ascii = skip;
- coding->cmp_data = cmp_data;
+ while (pos < pos_end)
+ {
+ p = BYTE_POS_ADDR (pos_byte);
+ if (*p == '\r' && p[1] == '\n')
+ {
+ del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
+ n++;
+ pos_end--;
+ }
+ pos++;
+ pos_byte += BYTES_BY_CHAR_HEAD (*p);
+ }
+ }
+ coding->produced -= n;
+ coding->produced_char -= n;
}
}
-#define CONVERSION_BUFFER_EXTRA_ROOM 256
-
-#define DECODING_BUFFER_MAG(coding) \
- (coding->type == coding_type_iso2022 \
- ? 3 \
- : (coding->type == coding_type_ccl \
- ? coding->spec.ccl.decoder.buf_magnification \
- : 2))
-
-/* Return maximum size (bytes) of a buffer enough for decoding
- SRC_BYTES of text encoded in CODING. */
-
-int
-decoding_buffer_size (coding, src_bytes)
- struct coding_system *coding;
- int src_bytes;
-{
- return (src_bytes * DECODING_BUFFER_MAG (coding)
- + CONVERSION_BUFFER_EXTRA_ROOM);
-}
-/* Return maximum size (bytes) of a buffer enough for encoding
- SRC_BYTES of text to CODING. */
+/* Return a translation table (or list of them) from coding system
+ attribute vector ATTRS for encoding (ENCODEP is nonzero) or
+ decoding (ENCODEP is zero). */
-int
-encoding_buffer_size (coding, src_bytes)
- struct coding_system *coding;
- int src_bytes;
+static Lisp_Object
+get_translation_table (attrs, encodep, max_lookup)
+ Lisp_Object attrs;
+ int encodep, *max_lookup;
{
- int magnification;
+ Lisp_Object standard, translation_table;
+ Lisp_Object val;
- if (coding->type == coding_type_ccl)
- {
- magnification = coding->spec.ccl.encoder.buf_magnification;
- if (coding->eol_type == CODING_EOL_CRLF)
- magnification *= 2;
- }
- else if (CODING_REQUIRE_ENCODING (coding))
- magnification = 3;
+ if (encodep)
+ translation_table = CODING_ATTR_ENCODE_TBL (attrs),
+ standard = Vstandard_translation_table_for_encode;
+ else
+ translation_table = CODING_ATTR_DECODE_TBL (attrs),
+ standard = Vstandard_translation_table_for_decode;
+ if (NILP (translation_table))
+ translation_table = standard;
else
- magnification = 1;
-
- return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
-}
-
-/* Working buffer for code conversion. */
-struct conversion_buffer
-{
- int size; /* size of data. */
- int on_stack; /* 1 if allocated by alloca. */
- unsigned char *data;
-};
-
-/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
-#define allocate_conversion_buffer(buf, len) \
- do { \
- if (len < MAX_ALLOCA) \
- { \
- buf.data = (unsigned char *) alloca (len); \
- buf.on_stack = 1; \
- } \
- else \
- { \
- buf.data = (unsigned char *) xmalloc (len); \
- buf.on_stack = 0; \
- } \
- buf.size = len; \
- } while (0)
-
-/* Double the allocated memory for *BUF. */
-static void
-extend_conversion_buffer (buf)
- struct conversion_buffer *buf;
-{
- if (buf->on_stack)
{
- unsigned char *save = buf->data;
- buf->data = (unsigned char *) xmalloc (buf->size * 2);
- bcopy (save, buf->data, buf->size);
- buf->on_stack = 0;
+ if (SYMBOLP (translation_table))
+ translation_table = Fget (translation_table, Qtranslation_table);
+ else if (CONSP (translation_table))
+ {
+ translation_table = Fcopy_sequence (translation_table);
+ for (val = translation_table; CONSP (val); val = XCDR (val))
+ if (SYMBOLP (XCAR (val)))
+ XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
+ }
+ if (CHAR_TABLE_P (standard))
+ {
+ if (CONSP (translation_table))
+ translation_table = nconc2 (translation_table,
+ Fcons (standard, Qnil));
+ else
+ translation_table = Fcons (translation_table,
+ Fcons (standard, Qnil));
+ }
}
- else
+
+ if (max_lookup)
{
- buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
+ *max_lookup = 1;
+ if (CHAR_TABLE_P (translation_table)
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
+ {
+ val = XCHAR_TABLE (translation_table)->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ else if (CONSP (translation_table))
+ {
+ Lisp_Object tail, val;
+
+ for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
+ if (CHAR_TABLE_P (XCAR (tail))
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
+ {
+ val = XCHAR_TABLE (XCAR (tail))->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ }
}
- buf->size *= 2;
+ return translation_table;
}
-/* Free the allocated memory for BUF if it is not on stack. */
-static void
-free_conversion_buffer (buf)
- struct conversion_buffer *buf;
-{
- if (!buf->on_stack)
- xfree (buf->data);
-}
+#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
+ do { \
+ trans = Qnil; \
+ if (CHAR_TABLE_P (table)) \
+ { \
+ trans = CHAR_TABLE_REF (table, c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ } \
+ else if (CONSP (table)) \
+ { \
+ Lisp_Object tail; \
+ \
+ for (tail = table; CONSP (tail); tail = XCDR (tail)) \
+ if (CHAR_TABLE_P (XCAR (tail))) \
+ { \
+ trans = CHAR_TABLE_REF (XCAR (tail), c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ else if (! NILP (trans)) \
+ break; \
+ } \
+ } \
+ } while (0)
-int
-ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
- struct coding_system *coding;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes, encodep;
-{
- struct ccl_program *ccl
- = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
- unsigned char *dst = destination;
- ccl->suppress_error = coding->suppress_error;
- ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
- if (encodep)
- {
- /* On encoding, EOL format is converted within ccl_driver. For
- that, setup proper information in the structure CCL. */
- ccl->eol_type = coding->eol_type;
- if (ccl->eol_type ==CODING_EOL_UNDECIDED)
- ccl->eol_type = CODING_EOL_LF;
- ccl->cr_consumed = coding->spec.ccl.cr_carryover;
- ccl->eight_bit_control = coding->dst_multibyte;
- }
- else
- ccl->eight_bit_control = 1;
- ccl->multibyte = coding->src_multibyte;
- if (coding->spec.ccl.eight_bit_carryover[0] != 0)
+static Lisp_Object
+get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
+ Lisp_Object val;
+ int *buf, *buf_end;
+ int last_block;
+ int *from_nchars, *to_nchars;
+{
+ /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
+ [TO-CHAR ...]. */
+ if (CONSP (val))
{
- /* Move carryover bytes to DESTINATION. */
- unsigned char *p = coding->spec.ccl.eight_bit_carryover;
- while (*p)
- *dst++ = *p++;
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- if (dst_bytes)
- dst_bytes -= dst - destination;
- }
+ Lisp_Object from, tail;
+ int i, len;
- coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
- &(coding->consumed))
- + dst - destination);
-
- if (encodep)
- {
- coding->produced_char = coding->produced;
- coding->spec.ccl.cr_carryover = ccl->cr_consumed;
- }
- else if (!ccl->eight_bit_control)
- {
- /* The produced bytes forms a valid multibyte sequence. */
- coding->produced_char
- = multibyte_chars_in_text (destination, coding->produced);
- coding->spec.ccl.eight_bit_carryover[0] = 0;
- }
- else
- {
- /* On decoding, the destination should always multibyte. But,
- CCL program might have been generated an invalid multibyte
- sequence. Here we make such a sequence valid as
- multibyte. */
- int bytes
- = dst_bytes ? dst_bytes : source + coding->consumed - destination;
-
- if ((coding->consumed < src_bytes
- || !ccl->last_block)
- && coding->produced >= 1
- && destination[coding->produced - 1] >= 0x80)
- {
- /* We should not convert the tailing 8-bit codes to
- multibyte form even if they doesn't form a valid
- multibyte sequence. They may form a valid sequence in
- the next call. */
- int carryover = 0;
-
- if (destination[coding->produced - 1] < 0xA0)
- carryover = 1;
- else if (coding->produced >= 2)
+ for (tail = val; CONSP (tail); tail = XCDR (tail))
+ {
+ val = XCAR (tail);
+ from = XCAR (val);
+ len = ASIZE (from);
+ for (i = 0; i < len; i++)
{
- if (destination[coding->produced - 2] >= 0x80)
+ if (buf + i == buf_end)
{
- if (destination[coding->produced - 2] < 0xA0)
- carryover = 2;
- else if (coding->produced >= 3
- && destination[coding->produced - 3] >= 0x80
- && destination[coding->produced - 3] < 0xA0)
- carryover = 3;
+ if (! last_block)
+ return Qt;
+ break;
}
+ if (XINT (AREF (from, i)) != buf[i])
+ break;
}
- if (carryover > 0)
+ if (i == len)
{
- BCOPY_SHORT (destination + coding->produced - carryover,
- coding->spec.ccl.eight_bit_carryover,
- carryover);
- coding->spec.ccl.eight_bit_carryover[carryover] = 0;
- coding->produced -= carryover;
+ val = XCDR (val);
+ *from_nchars = len;
+ break;
}
}
- coding->produced = str_as_multibyte (destination, bytes,
- coding->produced,
- &(coding->produced_char));
- }
-
- switch (ccl->status)
- {
- case CCL_STAT_SUSPEND_BY_SRC:
- coding->result = CODING_FINISH_INSUFFICIENT_SRC;
- break;
- case CCL_STAT_SUSPEND_BY_DST:
- coding->result = CODING_FINISH_INSUFFICIENT_DST;
- break;
- case CCL_STAT_QUIT:
- case CCL_STAT_INVALID_CMD:
- coding->result = CODING_FINISH_INTERRUPT;
- break;
- default:
- coding->result = CODING_FINISH_NORMAL;
- break;
+ if (! CONSP (tail))
+ return Qnil;
}
- return coding->result;
+ if (VECTORP (val))
+ *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
+ else
+ *buf = XINT (val);
+ return val;
}
-/* Decode EOL format of the text at PTR of BYTES length destructively
- according to CODING->eol_type. This is called after the CCL
- program produced a decoded text at PTR. If we do CRLF->LF
- conversion, update CODING->produced and CODING->produced_char. */
-static void
-decode_eol_post_ccl (coding, ptr, bytes)
+static int
+produce_chars (coding, translation_table, last_block)
struct coding_system *coding;
- unsigned char *ptr;
- int bytes;
+ Lisp_Object translation_table;
+ int last_block;
{
- Lisp_Object val, saved_coding_symbol;
- unsigned char *pend = ptr + bytes;
- int dummy;
-
- /* Remember the current coding system symbol. We set it back when
- an inconsistent EOL is found so that `last-coding-system-used' is
- set to the coding system that doesn't specify EOL conversion. */
- saved_coding_symbol = coding->symbol;
+ unsigned char *dst = coding->destination + coding->produced;
+ unsigned char *dst_end = coding->destination + coding->dst_bytes;
+ int produced;
+ int produced_chars = 0;
+ int carryover = 0;
- coding->spec.ccl.cr_carryover = 0;
- if (coding->eol_type == CODING_EOL_UNDECIDED)
+ if (! coding->chars_at_source)
{
- /* Here, to avoid the call of setup_coding_system, we directly
- call detect_eol_type. */
- coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
- if (coding->eol_type == CODING_EOL_INCONSISTENT)
- coding->eol_type = CODING_EOL_LF;
- if (coding->eol_type != CODING_EOL_UNDECIDED)
- {
- val = Fget (coding->symbol, Qeol_type);
- if (VECTORP (val) && XVECTOR (val)->size == 3)
- coding->symbol = XVECTOR (val)->contents[coding->eol_type];
- }
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
+ /* Characters are in coding->charbuf. */
+ int *buf = coding->charbuf;
+ int *buf_end = buf + coding->charbuf_used;
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- {
- /* We have nothing to do. */
- ptr = pend;
- }
- else if (coding->eol_type == CODING_EOL_CRLF)
- {
- unsigned char *pstart = ptr, *p = ptr;
+ if (BUFFERP (coding->src_object)
+ && EQ (coding->src_object, coding->dst_object))
+ dst_end = ((unsigned char *) coding->source) + coding->consumed;
- if (! (coding->mode & CODING_MODE_LAST_BLOCK)
- && *(pend - 1) == '\r')
- {
- /* If the last character is CR, we can't handle it here
- because LF will be in the not-yet-decoded source text.
- Record that the CR is not yet processed. */
- coding->spec.ccl.cr_carryover = 1;
- coding->produced--;
- coding->produced_char--;
- pend--;
- }
- while (ptr < pend)
+ while (buf < buf_end)
{
- if (*ptr == '\r')
+ int c = *buf, i;
+
+ if (c >= 0)
{
- if (ptr + 1 < pend && *(ptr + 1) == '\n')
+ int from_nchars = 1, to_nchars = 1;
+ Lisp_Object trans = Qnil;
+
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (! NILP (trans))
{
- *p++ = '\n';
- ptr += 2;
+ trans = get_translation (trans, buf, buf_end, last_block,
+ &from_nchars, &to_nchars);
+ if (EQ (trans, Qt))
+ break;
+ c = *buf;
}
- else
+
+ if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
{
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- goto undo_eol_conversion;
- *p++ = *ptr++;
+ dst = alloc_destination (coding,
+ buf_end - buf
+ + MAX_MULTIBYTE_LENGTH * to_nchars,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
}
- }
- else if (*ptr == '\n'
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- goto undo_eol_conversion;
- else
- *p++ = *ptr++;
- continue;
-
- undo_eol_conversion:
- /* We have faced with inconsistent EOL format at PTR.
- Convert all LFs before PTR back to CRLFs. */
- for (p--, ptr--; p >= pstart; p--)
- {
- if (*p == '\n')
- *ptr-- = '\n', *ptr-- = '\r';
- else
- *ptr-- = *p;
- }
- /* If carryover is recorded, cancel it because we don't
- convert CRLF anymore. */
- if (coding->spec.ccl.cr_carryover)
- {
- coding->spec.ccl.cr_carryover = 0;
- coding->produced++;
- coding->produced_char++;
- pend++;
- }
- p = ptr = pend;
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
- }
- if (p < pend)
- {
- /* As each two-byte sequence CRLF was converted to LF, (PEND
- - P) is the number of deleted characters. */
- coding->produced -= pend - p;
- coding->produced_char -= pend - p;
- }
- }
- else /* i.e. coding->eol_type == CODING_EOL_CR */
- {
- unsigned char *p = ptr;
- for (; ptr < pend; ptr++)
- {
- if (*ptr == '\r')
- *ptr = '\n';
- else if (*ptr == '\n'
- && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- for (; p < ptr; p++)
+ for (i = 0; i < to_nchars; i++)
{
- if (*p == '\n')
- *p = '\r';
+ if (i > 0)
+ c = XINT (AREF (trans, i));
+ if (coding->dst_multibyte
+ || ! CHAR_BYTE8_P (c))
+ CHAR_STRING_ADVANCE (c, dst);
+ else
+ *dst++ = CHAR_TO_BYTE8 (c);
}
- ptr = pend;
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
+ produced_chars += to_nchars;
+ *buf++ = to_nchars;
+ while (--from_nchars > 0)
+ *buf++ = 0;
}
+ else
+ /* This is an annotation datum. (-C) is the length. */
+ buf += -c;
}
+ carryover = buf_end - buf;
}
-}
-
-/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
- decoding, it may detect coding system and format of end-of-line if
- those are not yet decided. The source should be unibyte, the
- result is multibyte if CODING->dst_multibyte is nonzero, else
- unibyte. */
-
-int
-decode_coding (coding, source, destination, src_bytes, dst_bytes)
- struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
-{
- int extra = 0;
-
- if (coding->type == coding_type_undecided)
- detect_coding (coding, source, src_bytes);
-
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
+ else
{
- detect_eol (coding, source, src_bytes);
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
- }
+ const unsigned char *src = coding->source;
+ const unsigned char *src_end = src + coding->src_bytes;
+ Lisp_Object eol_type;
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->errors = 0;
- coding->result = CODING_FINISH_NORMAL;
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
- switch (coding->type)
- {
- case coding_type_sjis:
- decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
-
- case coding_type_iso2022:
- decode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
- break;
+ if (coding->src_multibyte != coding->dst_multibyte)
+ {
+ if (coding->src_multibyte)
+ {
+ int multibytep = 1;
+ int consumed_chars;
- case coding_type_big5:
- decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
- break;
+ while (1)
+ {
+ const unsigned char *src_base = src;
+ int c;
- case coding_type_emacs_mule:
- decode_coding_emacs_mule (coding, source, destination,
- src_bytes, dst_bytes);
- break;
+ ONE_MORE_BYTE (c);
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src == src_end)
+ {
+ record_conversion_result
+ (coding, CODING_RESULT_INSUFFICIENT_SRC);
+ goto no_more_source;
+ }
+ if (*src == '\n')
+ c = *src++;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ if (dst == dst_end)
+ {
+ coding->consumed = src - coding->source;
+
+ if (EQ (coding->src_object, coding->dst_object))
+ dst_end = (unsigned char *) src;
+ if (dst == dst_end)
+ {
+ dst = alloc_destination (coding, src_end - src + 1,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ coding_set_source (coding);
+ src = coding->source + coding->consumed;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ *dst++ = c;
+ produced_chars++;
+ }
+ no_more_source:
+ ;
+ }
+ else
+ while (src < src_end)
+ {
+ int multibytep = 1;
+ int c = *src++;
- case coding_type_ccl:
- if (coding->spec.ccl.cr_carryover)
- {
- /* Put the CR which was not processed by the previous call
- of decode_eol_post_ccl in DESTINATION. It will be
- decoded together with the following LF by the call to
- decode_eol_post_ccl below. */
- *destination = '\r';
- coding->produced++;
- coding->produced_char++;
- dst_bytes--;
- extra = coding->spec.ccl.cr_carryover;
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src < src_end
+ && *src == '\n')
+ c = *src++;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ if (dst >= dst_end - 1)
+ {
+ coding->consumed = src - coding->source;
+
+ if (EQ (coding->src_object, coding->dst_object))
+ dst_end = (unsigned char *) src;
+ if (dst >= dst_end - 1)
+ {
+ dst = alloc_destination (coding, src_end - src + 2,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ coding_set_source (coding);
+ src = coding->source + coding->consumed;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ EMIT_ONE_BYTE (c);
+ }
}
- ccl_coding_driver (coding, source, destination + extra,
- src_bytes, dst_bytes, 0);
- if (coding->eol_type != CODING_EOL_LF)
+ else
{
- coding->produced += extra;
- coding->produced_char += extra;
- decode_eol_post_ccl (coding, destination, coding->produced);
- }
- break;
-
- default:
- decode_eol (coding, source, destination, src_bytes, dst_bytes);
- }
+ if (!EQ (coding->src_object, coding->dst_object))
+ {
+ int require = coding->src_bytes - coding->dst_bytes;
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->mode & CODING_MODE_LAST_BLOCK
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
+ if (require > 0)
+ {
+ EMACS_INT offset = src - coding->source;
- if (coding->mode & CODING_MODE_LAST_BLOCK
- && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
- {
- const unsigned char *src = source + coding->consumed;
- unsigned char *dst = destination + coding->produced;
+ dst = alloc_destination (coding, require, dst);
+ coding_set_source (coding);
+ src = coding->source + offset;
+ src_end = coding->source + coding->src_bytes;
+ }
+ }
+ produced_chars = coding->src_chars;
+ while (src < src_end)
+ {
+ int c = *src++;
- src_bytes -= coding->consumed;
- coding->errors++;
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- while (src_bytes--)
- {
- int c = *src++;
- dst += CHAR_STRING (c, dst);
- coding->produced_char++;
+ if (c == '\r')
+ {
+ if (EQ (eol_type, Qdos))
+ {
+ if (src < src_end
+ && *src == '\n')
+ c = *src++;
+ produced_chars--;
+ }
+ else if (EQ (eol_type, Qmac))
+ c = '\n';
+ }
+ *dst++ = c;
+ }
}
- coding->consumed = coding->consumed_char = src - source;
- coding->produced = dst - destination;
- coding->result = CODING_FINISH_NORMAL;
- }
-
- if (!coding->dst_multibyte)
- {
- coding->produced = str_as_unibyte (destination, coding->produced);
- coding->produced_char = coding->produced;
+ coding->consumed = coding->src_bytes;
+ coding->consumed_char = coding->src_chars;
}
- return coding->result;
+ produced = dst - (coding->destination + coding->produced);
+ if (BUFFERP (coding->dst_object))
+ insert_from_gap (produced_chars, produced);
+ coding->produced += produced;
+ coding->produced_char += produced_chars;
+ return carryover;
}
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
- multibyteness of the source is CODING->src_multibyte, the
- multibyteness of the result is always unibyte. */
+/* Compose text in CODING->object according to the annotation data at
+ CHARBUF. CHARBUF is an array:
+ [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
+ */
-int
-encode_coding (coding, source, destination, src_bytes, dst_bytes)
+static INLINE void
+produce_composition (coding, charbuf, pos)
struct coding_system *coding;
- const unsigned char *source;
- unsigned char *destination;
- int src_bytes, dst_bytes;
+ int *charbuf;
+ EMACS_INT pos;
{
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->errors = 0;
- coding->result = CODING_FINISH_NORMAL;
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = CODING_EOL_LF;
-
- switch (coding->type)
- {
- case coding_type_sjis:
- encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
-
- case coding_type_iso2022:
- encode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_big5:
- encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
- break;
-
- case coding_type_emacs_mule:
- encode_coding_emacs_mule (coding, source, destination,
- src_bytes, dst_bytes);
- break;
-
- case coding_type_ccl:
- ccl_coding_driver (coding, source, destination,
- src_bytes, dst_bytes, 1);
- break;
+ int len;
+ EMACS_INT to;
+ enum composition_method method;
+ Lisp_Object components;
- default:
- encode_eol (coding, source, destination, src_bytes, dst_bytes);
- }
+ len = -charbuf[0];
+ to = pos + charbuf[2];
+ if (to <= pos)
+ return;
+ method = (enum composition_method) (charbuf[3]);
- if (coding->mode & CODING_MODE_LAST_BLOCK
- && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
+ if (method == COMPOSITION_RELATIVE)
+ components = Qnil;
+ else if (method >= COMPOSITION_WITH_RULE
+ && method <= COMPOSITION_WITH_RULE_ALTCHARS)
{
- const unsigned char *src = source + coding->consumed;
- unsigned char *dst = destination + coding->produced;
+ Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
+ int i;
- if (coding->type == coding_type_iso2022)
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (COMPOSING_P (coding))
- *dst++ = ISO_CODE_ESC, *dst++ = '1';
- if (coding->consumed < src_bytes)
+ len -= 4;
+ charbuf += 4;
+ for (i = 0; i < len; i++)
{
- int len = src_bytes - coding->consumed;
-
- BCOPY_SHORT (src, dst, len);
- if (coding->src_multibyte)
- len = str_as_unibyte (dst, len);
- dst += len;
- coding->consumed = src_bytes;
+ args[i] = make_number (charbuf[i]);
+ if (charbuf[i] < 0)
+ return;
}
- coding->produced = coding->produced_char = dst - destination;
- coding->result = CODING_FINISH_NORMAL;
+ components = (method == COMPOSITION_WITH_ALTCHARS
+ ? Fstring (len, args) : Fvector (len, args));
}
-
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
-
- return coding->result;
+ else
+ return;
+ compose_text (pos, to, components, Qnil, coding->dst_object);
}
-/* Scan text in the region between *BEG and *END (byte positions),
- skip characters which we don't have to decode by coding system
- CODING at the head and tail, then set *BEG and *END to the region
- of the text we actually have to convert. The caller should move
- the gap out of the region in advance if the region is from a
- buffer.
- If STR is not NULL, *BEG and *END are indices into STR. */
+/* Put `charset' property on text in CODING->object according to
+ the annotation data at CHARBUF. CHARBUF is an array:
+ [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
+ */
-static void
-shrink_decoding_region (beg, end, coding, str)
- int *beg, *end;
+static INLINE void
+produce_charset (coding, charbuf, pos)
struct coding_system *coding;
- unsigned char *str;
+ int *charbuf;
+ EMACS_INT pos;
{
- unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
- int eol_conversion;
- Lisp_Object translation_table;
+ EMACS_INT from = pos - charbuf[2];
+ struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
- if (coding->type == coding_type_ccl
- || coding->type == coding_type_undecided
- || coding->eol_type != CODING_EOL_LF
- || !NILP (coding->post_read_conversion)
- || coding->composing != COMPOSITION_DISABLED)
- {
- /* We can't skip any data. */
- return;
- }
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text
- || coding->type == coding_type_emacs_mule)
- {
- /* We need no conversion, but don't have to skip any data here.
- Decoding routine handles them effectively anyway. */
- return;
- }
+ Fput_text_property (make_number (from), make_number (pos),
+ Qcharset, CHARSET_NAME (charset),
+ coding->dst_object);
+}
- translation_table = coding->translation_table_for_decode;
- if (NILP (translation_table) && !NILP (Venable_character_translation))
- translation_table = Vstandard_translation_table_for_decode;
- if (CHAR_TABLE_P (translation_table))
- {
- int i;
- for (i = 0; i < 128; i++)
- if (!NILP (CHAR_TABLE_REF (translation_table, i)))
- break;
- if (i < 128)
- /* Some ASCII character should be translated. We give up
- shrinking. */
- return;
- }
- if (coding->heading_ascii >= 0)
- /* Detection routine has already found how much we can skip at the
- head. */
- *beg += coding->heading_ascii;
+#define CHARBUF_SIZE 0x4000
- if (str)
- {
- begp_orig = begp = str + *beg;
- endp_orig = endp = str + *end;
- }
- else
- {
- begp_orig = begp = BYTE_POS_ADDR (*beg);
- endp_orig = endp = begp + *end - *beg;
- }
+#define ALLOC_CONVERSION_WORK_AREA(coding) \
+ do { \
+ int size = CHARBUF_SIZE;; \
+ \
+ coding->charbuf = NULL; \
+ while (size > 1024) \
+ { \
+ coding->charbuf = (int *) alloca (sizeof (int) * size); \
+ if (coding->charbuf) \
+ break; \
+ size >>= 1; \
+ } \
+ if (! coding->charbuf) \
+ { \
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
+ return coding->result; \
+ } \
+ coding->charbuf_size = size; \
+ } while (0)
- eol_conversion = (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF);
- switch (coding->type)
+static void
+produce_annotation (coding, pos)
+ struct coding_system *coding;
+ EMACS_INT pos;
+{
+ int *charbuf = coding->charbuf;
+ int *charbuf_end = charbuf + coding->charbuf_used;
+
+ if (NILP (coding->dst_object))
+ return;
+
+ while (charbuf < charbuf_end)
{
- case coding_type_sjis:
- case coding_type_big5:
- /* We can skip all ASCII characters at the head. */
- if (coding->heading_ascii < 0)
+ if (*charbuf >= 0)
+ pos += *charbuf++;
+ else
{
- if (eol_conversion)
- while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
- else
- while (begp < endp && *begp < 0x80) begp++;
+ int len = -*charbuf;
+ switch (charbuf[1])
+ {
+ case CODING_ANNOTATE_COMPOSITION_MASK:
+ produce_composition (coding, charbuf, pos);
+ break;
+ case CODING_ANNOTATE_CHARSET_MASK:
+ produce_charset (coding, charbuf, pos);
+ break;
+ default:
+ abort ();
+ }
+ charbuf += len;
}
- /* We can skip all ASCII characters at the tail except for the
- second byte of SJIS or BIG5 code. */
- if (eol_conversion)
- while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
- else
- while (begp < endp && endp[-1] < 0x80) endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
- endp++;
- break;
+ }
+}
- case coding_type_iso2022:
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
- /* We can't skip any data. */
- break;
- if (coding->heading_ascii < 0)
- {
- /* We can skip all ASCII characters at the head except for a
- few control codes. */
- while (begp < endp && (c = *begp) < 0x80
- && c != ISO_CODE_CR && c != ISO_CODE_SO
- && c != ISO_CODE_SI && c != ISO_CODE_ESC
- && (!eol_conversion || c != ISO_CODE_LF))
- begp++;
- }
- switch (coding->category_idx)
- {
- case CODING_CATEGORY_IDX_ISO_8_1:
- case CODING_CATEGORY_IDX_ISO_8_2:
- /* We can skip all ASCII characters at the tail. */
- if (eol_conversion)
- while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
- else
- while (begp < endp && endp[-1] < 0x80) endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- break;
+/* Decode the data at CODING->src_object into CODING->dst_object.
+ CODING->src_object is a buffer, a string, or nil.
+ CODING->dst_object is a buffer.
- case CODING_CATEGORY_IDX_ISO_7:
- case CODING_CATEGORY_IDX_ISO_7_TIGHT:
- {
- /* We can skip all characters at the tail except for 8-bit
- codes and ESC and the following 2-byte at the tail. */
- unsigned char *eight_bit = NULL;
+ If CODING->src_object is a buffer, it must be the current buffer.
+ In this case, if CODING->src_pos is positive, it is a position of
+ the source text in the buffer, otherwise, the source text is in the
+ gap area of the buffer, and CODING->src_pos specifies the offset of
+ the text from GPT (which must be the same as PT). If this is the
+ same buffer as CODING->dst_object, CODING->src_pos must be
+ negative.
- if (eol_conversion)
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
- {
- if (!eight_bit && c & 0x80) eight_bit = endp;
- endp--;
- }
- else
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC)
- {
- if (!eight_bit && c & 0x80) eight_bit = endp;
- endp--;
- }
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig
- && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- if (begp < endp && endp[-1] == ISO_CODE_ESC)
- {
- if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
- /* This is an ASCII designation sequence. We can
- surely skip the tail. But, if we have
- encountered an 8-bit code, skip only the codes
- after that. */
- endp = eight_bit ? eight_bit : endp + 2;
- else
- /* Hmmm, we can't skip the tail. */
- endp = endp_orig;
- }
- else if (eight_bit)
- endp = eight_bit;
- }
- }
- break;
+ If CODING->src_object is a string, CODING->src_pos is an index to
+ that string.
- default:
- abort ();
- }
- *beg += begp - begp_orig;
- *end += endp - endp_orig;
- return;
-}
+ If CODING->src_object is nil, CODING->source must already point to
+ the non-relocatable memory area. In this case, CODING->src_pos is
+ an offset from CODING->source.
-/* Like shrink_decoding_region but for encoding. */
+ The decoded data is inserted at the current point of the buffer
+ CODING->dst_object.
+*/
-static void
-shrink_encoding_region (beg, end, coding, str)
- int *beg, *end;
+static int
+decode_coding (coding)
struct coding_system *coding;
- unsigned char *str;
{
- unsigned char *begp_orig, *begp, *endp_orig, *endp;
- int eol_conversion;
+ Lisp_Object attrs;
+ Lisp_Object undo_list;
Lisp_Object translation_table;
+ int carryover;
+ int i;
- if (coding->type == coding_type_ccl
- || coding->eol_type == CODING_EOL_CRLF
- || coding->eol_type == CODING_EOL_CR
- || (coding->cmp_data && coding->cmp_data->used > 0))
- {
- /* We can't skip any data. */
- return;
- }
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text
- || coding->type == coding_type_emacs_mule
- || coding->type == coding_type_undecided)
+ if (BUFFERP (coding->src_object)
+ && coding->src_pos > 0
+ && coding->src_pos < GPT
+ && coding->src_pos + coding->src_chars > GPT)
+ move_gap_both (coding->src_pos, coding->src_pos_byte);
+
+ undo_list = Qt;
+ if (BUFFERP (coding->dst_object))
{
- /* We need no conversion, but don't have to skip any data here.
- Encoding routine handles them effectively anyway. */
- return;
+ if (current_buffer != XBUFFER (coding->dst_object))
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ if (GPT != PT)
+ move_gap_both (PT, PT_BYTE);
+ undo_list = current_buffer->undo_list;
+ current_buffer->undo_list = Qt;
}
- translation_table = coding->translation_table_for_encode;
- if (NILP (translation_table) && !NILP (Venable_character_translation))
- translation_table = Vstandard_translation_table_for_encode;
- if (CHAR_TABLE_P (translation_table))
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ coding->chars_at_source = 0;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->errors = 0;
+
+ ALLOC_CONVERSION_WORK_AREA (coding);
+
+ attrs = CODING_ID_ATTRS (coding->id);
+ translation_table = get_translation_table (attrs, 0, NULL);
+
+ carryover = 0;
+ do
{
- int i;
- for (i = 0; i < 128; i++)
- if (!NILP (CHAR_TABLE_REF (translation_table, i)))
- break;
- if (i < 128)
- /* Some ASCII character should be translated. We give up
- shrinking. */
- return;
+ EMACS_INT pos = coding->dst_pos + coding->produced_char;
+
+ coding_set_source (coding);
+ coding->annotated = 0;
+ coding->charbuf_used = carryover;
+ (*(coding->decoder)) (coding);
+ coding_set_destination (coding);
+ carryover = produce_chars (coding, translation_table, 0);
+ if (coding->annotated)
+ produce_annotation (coding, pos);
+ for (i = 0; i < carryover; i++)
+ coding->charbuf[i]
+ = coding->charbuf[coding->charbuf_used - carryover + i];
}
+ while (coding->consumed < coding->src_bytes
+ && (coding->result == CODING_RESULT_SUCCESS
+ || coding->result == CODING_RESULT_INVALID_SRC));
- if (str)
+ if (carryover > 0)
{
- begp_orig = begp = str + *beg;
- endp_orig = endp = str + *end;
+ coding_set_destination (coding);
+ coding->charbuf_used = carryover;
+ produce_chars (coding, translation_table, 1);
}
- else
+
+ coding->carryover_bytes = 0;
+ if (coding->consumed < coding->src_bytes)
{
- begp_orig = begp = BYTE_POS_ADDR (*beg);
- endp_orig = endp = begp + *end - *beg;
- }
+ int nbytes = coding->src_bytes - coding->consumed;
+ const unsigned char *src;
- eol_conversion = (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF);
+ coding_set_source (coding);
+ coding_set_destination (coding);
+ src = coding->source + coding->consumed;
- /* Here, we don't have to check coding->pre_write_conversion because
- the caller is expected to have handled it already. */
- switch (coding->type)
- {
- case coding_type_iso2022:
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
- /* We can't skip any data. */
- break;
- if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
{
- unsigned char *bol = begp;
- while (begp < endp && *begp < 0x80)
+ /* Flush out unprocessed data as binary chars. We are sure
+ that the number of data is less than the size of
+ coding->charbuf. */
+ coding->charbuf_used = 0;
+ while (nbytes-- > 0)
{
- begp++;
- if (begp[-1] == '\n')
- bol = begp;
+ int c = *src++;
+
+ if (c & 0x80)
+ c = BYTE8_TO_CHAR (c);
+ coding->charbuf[coding->charbuf_used++] = c;
}
- begp = bol;
- goto label_skip_tail;
+ produce_chars (coding, Qnil, 1);
}
- /* fall down ... */
-
- case coding_type_sjis:
- case coding_type_big5:
- /* We can skip all ASCII characters at the head and tail. */
- if (eol_conversion)
- while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
else
- while (begp < endp && *begp < 0x80) begp++;
- label_skip_tail:
- if (eol_conversion)
- while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
- else
- while (begp < endp && *(endp - 1) < 0x80) endp--;
- break;
-
- default:
- abort ();
+ {
+ /* Record unprocessed bytes in coding->carryover. We are
+ sure that the number of data is less than the size of
+ coding->carryover. */
+ unsigned char *p = coding->carryover;
+
+ coding->carryover_bytes = nbytes;
+ while (nbytes-- > 0)
+ *p++ = *src++;
+ }
+ coding->consumed = coding->src_bytes;
}
- *beg += begp - begp_orig;
- *end += endp - endp_orig;
- return;
+ if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
+ decode_eol (coding);
+ if (BUFFERP (coding->dst_object))
+ {
+ current_buffer->undo_list = undo_list;
+ record_insert (coding->dst_pos, coding->produced_char);
+ }
+ return coding->result;
}
-/* As shrinking conversion region requires some overhead, we don't try
- shrinking if the length of conversion region is less than this
- value. */
-static int shrink_conversion_region_threshhold = 1024;
-#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
- do { \
- if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
- { \
- if (encodep) shrink_encoding_region (beg, end, coding, str); \
- else shrink_decoding_region (beg, end, coding, str); \
- } \
- } while (0)
+/* Extract an annotation datum from a composition starting at POS and
+ ending before LIMIT of CODING->src_object (buffer or string), store
+ the data in BUF, set *STOP to a starting position of the next
+ composition (if any) or to LIMIT, and return the address of the
+ next element of BUF.
-/* ARG is (CODING BUFFER ...) where CODING is what to be set in
- Vlast_coding_system_used and the remaining elements are buffers to
- kill. */
-static Lisp_Object
-code_convert_region_unwind (arg)
- Lisp_Object arg;
-{
- struct gcpro gcpro1;
- GCPRO1 (arg);
-
- inhibit_pre_post_conversion = 0;
- Vlast_coding_system_used = XCAR (arg);
- for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg))
- Fkill_buffer (XCAR (arg));
-
- UNGCPRO;
- return Qnil;
-}
+ If such an annotation is not found, set *STOP to a starting
+ position of a composition after POS (if any) or to LIMIT, and
+ return BUF. */
-/* Store information about all compositions in the range FROM and TO
- of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
- buffer or a string, defaults to the current buffer. */
-
-void
-coding_save_composition (coding, from, to, obj)
+static INLINE int *
+handle_composition_annotation (pos, limit, coding, buf, stop)
+ EMACS_INT pos, limit;
struct coding_system *coding;
- int from, to;
- Lisp_Object obj;
+ int *buf;
+ EMACS_INT *stop;
{
+ EMACS_INT start, end;
Lisp_Object prop;
- int start, end;
- if (coding->composing == COMPOSITION_DISABLED)
- return;
- if (!coding->cmp_data)
- coding_allocate_composition_data (coding, from);
- if (!find_composition (from, to, &start, &end, &prop, obj)
- || end > to)
- return;
- if (start < from
- && (!find_composition (end, to, &start, &end, &prop, obj)
- || end > to))
- return;
- coding->composing = COMPOSITION_NO;
- do
+ if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
+ || end > limit)
+ *stop = limit;
+ else if (start > pos)
+ *stop = start;
+ else
{
- if (COMPOSITION_VALID_P (start, end, prop))
+ if (start == pos)
{
+ /* We found a composition. Store the corresponding
+ annotation data in BUF. */
+ int *head = buf;
enum composition_method method = COMPOSITION_METHOD (prop);
- if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
- >= COMPOSITION_DATA_SIZE)
- coding_allocate_composition_data (coding, from);
- /* For relative composition, we remember start and end
- positions, for the other compositions, we also remember
- components. */
- CODING_ADD_COMPOSITION_START (coding, start - from, method);
+ int nchars = COMPOSITION_LENGTH (prop);
+
+ ADD_COMPOSITION_DATA (buf, nchars, method);
if (method != COMPOSITION_RELATIVE)
{
- /* We must store a*/
- Lisp_Object val, ch;
+ Lisp_Object components;
+ int len, i, i_byte;
- val = COMPOSITION_COMPONENTS (prop);
- if (CONSP (val))
- while (CONSP (val))
- {
- ch = XCAR (val), val = XCDR (val);
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
- }
- else if (VECTORP (val) || STRINGP (val))
+ components = COMPOSITION_COMPONENTS (prop);
+ if (VECTORP (components))
{
- int len = (VECTORP (val)
- ? XVECTOR (val)->size : SCHARS (val));
- int i;
+ len = XVECTOR (components)->size;
for (i = 0; i < len; i++)
+ *buf++ = XINT (AREF (components, i));
+ }
+ else if (STRINGP (components))
+ {
+ len = SCHARS (components);
+ i = i_byte = 0;
+ while (i < len)
{
- ch = (STRINGP (val)
- ? Faref (val, make_number (i))
- : XVECTOR (val)->contents[i]);
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
+ FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
+ buf++;
}
}
- else /* INTEGERP (val) */
- CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
+ else if (INTEGERP (components))
+ {
+ len = 1;
+ *buf++ = XINT (components);
+ }
+ else if (CONSP (components))
+ {
+ for (len = 0; CONSP (components);
+ len++, components = XCDR (components))
+ *buf++ = XINT (XCAR (components));
+ }
+ else
+ abort ();
+ *head -= len;
}
- CODING_ADD_COMPOSITION_END (coding, end - from);
}
- start = end;
- }
- while (start < to
- && find_composition (start, to, &start, &end, &prop, obj)
- && end <= to);
- /* Make coding->cmp_data point to the first memory block. */
- while (coding->cmp_data->prev)
- coding->cmp_data = coding->cmp_data->prev;
- coding->cmp_data_start = 0;
+ if (find_composition (end, limit, &start, &end, &prop,
+ coding->src_object)
+ && end <= limit)
+ *stop = start;
+ else
+ *stop = limit;
+ }
+ return buf;
}
-/* Reflect the saved information about compositions to OBJ.
- CODING->cmp_data points to a memory block for the information. OBJ
- is a buffer or a string, defaults to the current buffer. */
-
-void
-coding_restore_composition (coding, obj)
- struct coding_system *coding;
- Lisp_Object obj;
-{
- struct composition_data *cmp_data = coding->cmp_data;
-
- if (!cmp_data)
- return;
-
- while (cmp_data->prev)
- cmp_data = cmp_data->prev;
- while (cmp_data)
- {
- int i;
+/* Extract an annotation datum from a text property `charset' at POS of
+ CODING->src_object (buffer of string), store the data in BUF, set
+ *STOP to the position where the value of `charset' property changes
+ (limiting by LIMIT), and return the address of the next element of
+ BUF.
- for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
- i += cmp_data->data[i])
- {
- int *data = cmp_data->data + i;
- enum composition_method method = (enum composition_method) data[3];
- Lisp_Object components;
+ If the property value is nil, set *STOP to the position where the
+ property value is non-nil (limiting by LIMIT), and return BUF. */
- if (data[0] < 0 || i + data[0] > cmp_data->used)
- /* Invalid composition data. */
- break;
+static INLINE int *
+handle_charset_annotation (pos, limit, coding, buf, stop)
+ EMACS_INT pos, limit;
+ struct coding_system *coding;
+ int *buf;
+ EMACS_INT *stop;
+{
+ Lisp_Object val, next;
+ int id;
- if (method == COMPOSITION_RELATIVE)
- components = Qnil;
- else
- {
- int len = data[0] - 4, j;
- Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
-
- if (method == COMPOSITION_WITH_RULE_ALTCHARS
- && len % 2 == 0)
- len --;
- if (len < 1)
- /* Invalid composition data. */
- break;
- for (j = 0; j < len; j++)
- args[j] = make_number (data[4 + j]);
- components = (method == COMPOSITION_WITH_ALTCHARS
- ? Fstring (len, args)
- : Fvector (len, args));
- }
- compose_text (data[1], data[2], components, Qnil, obj);
- }
- cmp_data = cmp_data->next;
- }
+ val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
+ if (! NILP (val) && CHARSETP (val))
+ id = XINT (CHARSET_SYMBOL_ID (val));
+ else
+ id = -1;
+ ADD_CHARSET_DATA (buf, 0, id);
+ next = Fnext_single_property_change (make_number (pos), Qcharset,
+ coding->src_object,
+ make_number (limit));
+ *stop = XINT (next);
+ return buf;
}
-/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
- text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
- coding system CODING, and return the status code of code conversion
- (currently, this value has no meaning).
-
- How many characters (and bytes) are converted to how many
- characters (and bytes) are recorded in members of the structure
- CODING.
- If REPLACE is nonzero, we do various things as if the original text
- is deleted and a new text is inserted. See the comments in
- replace_range (insdel.c) to know what we are doing.
-
- If REPLACE is zero, it is assumed that the source text is unibyte.
- Otherwise, it is assumed that the source text is multibyte. */
-
-int
-code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
- int from, from_byte, to, to_byte, encodep, replace;
+static void
+consume_chars (coding, translation_table, max_lookup)
struct coding_system *coding;
+ Lisp_Object translation_table;
+ int max_lookup;
{
- int len = to - from, len_byte = to_byte - from_byte;
- int nchars_del = 0, nbytes_del = 0;
- int require, inserted, inserted_byte;
- int head_skip, tail_skip, total_skip = 0;
- Lisp_Object saved_coding_symbol;
- int first = 1;
- unsigned char *src, *dst;
- Lisp_Object deletion;
- int orig_point = PT, orig_len = len;
- int prev_Z;
- int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
+ int *buf = coding->charbuf;
+ int *buf_end = coding->charbuf + coding->charbuf_size;
+ const unsigned char *src = coding->source + coding->consumed;
+ const unsigned char *src_end = coding->source + coding->src_bytes;
+ EMACS_INT pos = coding->src_pos + coding->consumed_char;
+ EMACS_INT end_pos = coding->src_pos + coding->src_chars;
+ int multibytep = coding->src_multibyte;
+ Lisp_Object eol_type;
+ int c;
+ EMACS_INT stop, stop_composition, stop_charset;
+ int *lookup_buf = NULL;
+
+ if (! NILP (translation_table))
+ lookup_buf = alloca (sizeof (int) * max_lookup);
+
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (VECTORP (eol_type))
+ eol_type = Qunix;
- deletion = Qnil;
- saved_coding_symbol = coding->symbol;
+ /* Note: composition handling is not yet implemented. */
+ coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
- if (from < PT && PT < to)
+ if (NILP (coding->src_object))
+ stop = stop_composition = stop_charset = end_pos;
+ else
{
- TEMP_SET_PT_BOTH (from, from_byte);
- orig_point = from;
+ if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
+ stop = stop_composition = pos;
+ else
+ stop = stop_composition = end_pos;
+ if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
+ stop = stop_charset = pos;
+ else
+ stop_charset = end_pos;
}
- if (replace)
+ /* Compensate for CRLF and conversion. */
+ buf_end -= 1 + MAX_ANNOTATION_LENGTH;
+ while (buf < buf_end)
{
- int saved_from = from;
- int saved_inhibit_modification_hooks;
+ Lisp_Object trans;
- prepare_to_modify_buffer (from, to, &from);
- if (saved_from != from)
+ if (pos == stop)
{
- to = from + len;
- from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
- len_byte = to_byte - from_byte;
+ if (pos == end_pos)
+ break;
+ if (pos == stop_composition)
+ buf = handle_composition_annotation (pos, end_pos, coding,
+ buf, &stop_composition);
+ if (pos == stop_charset)
+ buf = handle_charset_annotation (pos, end_pos, coding,
+ buf, &stop_charset);
+ stop = (stop_composition < stop_charset
+ ? stop_composition : stop_charset);
}
- /* The code conversion routine can not preserve text properties
- for now. So, we must remove all text properties in the
- region. Here, we must suppress all modification hooks. */
- saved_inhibit_modification_hooks = inhibit_modification_hooks;
- inhibit_modification_hooks = 1;
- Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
- inhibit_modification_hooks = saved_inhibit_modification_hooks;
- }
-
- if (! encodep && CODING_REQUIRE_DETECTION (coding))
- {
- /* We must detect encoding of text and eol format. */
+ if (! multibytep)
+ {
+ EMACS_INT bytes;
- if (from < GPT && to > GPT)
- move_gap_both (from, from_byte);
- if (coding->type == coding_type_undecided)
+ if (coding->encoder == encode_coding_raw_text)
+ c = *src++, pos++;
+ else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
+ c = STRING_CHAR_ADVANCE (src), pos += bytes;
+ else
+ c = BYTE8_TO_CHAR (*src), src++, pos++;
+ }
+ else
+ c = STRING_CHAR_ADVANCE (src), pos++;
+ if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ c = '\n';
+ if (! EQ (eol_type, Qunix))
{
- detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
- if (coding->type == coding_type_undecided)
+ if (c == '\n')
{
- /* It seems that the text contains only ASCII, but we
- should not leave it undecided because the deeper
- decoding routine (decode_coding) tries to detect the
- encodings again in vain. */
- coding->type = coding_type_emacs_mule;
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
- /* As emacs-mule decoder will handle composition, we
- need this setting to allocate coding->cmp_data
- later. */
- coding->composing = COMPOSITION_NO;
+ if (EQ (eol_type, Qdos))
+ *buf++ = '\r';
+ else
+ c = '\r';
}
}
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
+
+ trans = Qnil;
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (NILP (trans))
+ *buf++ = c;
+ else
{
- detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = CODING_EOL_LF;
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+ int from_nchars = 1, to_nchars = 1;
+ int *lookup_buf_end;
+ const unsigned char *p = src;
+ int i;
+
+ lookup_buf[0] = c;
+ for (i = 1; i < max_lookup && p < src_end; i++)
+ lookup_buf[i] = STRING_CHAR_ADVANCE (p);
+ lookup_buf_end = lookup_buf + i;
+ trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
+ &from_nchars, &to_nchars);
+ if (EQ (trans, Qt)
+ || buf + to_nchars > buf_end)
+ break;
+ *buf++ = *lookup_buf;
+ for (i = 1; i < to_nchars; i++)
+ *buf++ = XINT (AREF (trans, i));
+ for (i = 1; i < from_nchars; i++, pos++)
+ src += MULTIBYTE_LENGTH_NO_CHECK (src);
}
}
- /* Now we convert the text. */
-
- /* For encoding, we must process pre-write-conversion in advance. */
- if (! inhibit_pre_post_conversion
- && encodep
- && SYMBOLP (coding->pre_write_conversion)
- && ! NILP (Ffboundp (coding->pre_write_conversion)))
- {
- /* The function in pre-write-conversion may put a new text in a
- new buffer. */
- struct buffer *prev = current_buffer;
- Lisp_Object new;
-
- record_unwind_protect (code_convert_region_unwind,
- Fcons (Vlast_coding_system_used, Qnil));
- /* We should not call any more pre-write/post-read-conversion
- functions while this pre-write-conversion is running. */
- inhibit_pre_post_conversion = 1;
- call2 (coding->pre_write_conversion,
- make_number (from), make_number (to));
- inhibit_pre_post_conversion = 0;
- /* Discard the unwind protect. */
- specpdl_ptr--;
+ coding->consumed = src - coding->source;
+ coding->consumed_char = pos - coding->src_pos;
+ coding->charbuf_used = buf - coding->charbuf;
+ coding->chars_at_source = 0;
+}
- if (current_buffer != prev)
- {
- len = ZV - BEGV;
- new = Fcurrent_buffer ();
- set_buffer_internal_1 (prev);
- del_range_2 (from, from_byte, to, to_byte, 0);
- TEMP_SET_PT_BOTH (from, from_byte);
- insert_from_buffer (XBUFFER (new), 1, len, 0);
- Fkill_buffer (new);
- if (orig_point >= to)
- orig_point += len - orig_len;
- else if (orig_point > from)
- orig_point = from;
- orig_len = len;
- to = from + len;
- from_byte = CHAR_TO_BYTE (from);
- to_byte = CHAR_TO_BYTE (to);
- len_byte = to_byte - from_byte;
- TEMP_SET_PT_BOTH (from, from_byte);
- }
- }
- if (replace)
- {
- if (! EQ (current_buffer->undo_list, Qt))
- deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
- else
- {
- nchars_del = to - from;
- nbytes_del = to_byte - from_byte;
- }
- }
+/* Encode the text at CODING->src_object into CODING->dst_object.
+ CODING->src_object is a buffer or a string.
+ CODING->dst_object is a buffer or nil.
- if (coding->composing != COMPOSITION_DISABLED)
- {
- if (encodep)
- coding_save_composition (coding, from, to, Fcurrent_buffer ());
- else
- coding_allocate_composition_data (coding, from);
- }
+ If CODING->src_object is a buffer, it must be the current buffer.
+ In this case, if CODING->src_pos is positive, it is a position of
+ the source text in the buffer, otherwise. the source text is in the
+ gap area of the buffer, and coding->src_pos specifies the offset of
+ the text from GPT (which must be the same as PT). If this is the
+ same buffer as CODING->dst_object, CODING->src_pos must be
+ negative and CODING should not have `pre-write-conversion'.
- /* Try to skip the heading and tailing ASCIIs. We can't skip them
- if we must run CCL program or there are compositions to
- encode. */
- if (coding->type != coding_type_ccl
- && (! coding->cmp_data || coding->cmp_data->used == 0))
- {
- int from_byte_orig = from_byte, to_byte_orig = to_byte;
+ If CODING->src_object is a string, CODING should not have
+ `pre-write-conversion'.
- if (from < GPT && GPT < to)
- move_gap_both (from, from_byte);
- SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
- if (from_byte == to_byte
- && (encodep || NILP (coding->post_read_conversion))
- && ! CODING_REQUIRE_FLUSHING (coding))
- {
- coding->produced = len_byte;
- coding->produced_char = len;
- if (!replace)
- /* We must record and adjust for this new text now. */
- adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
- coding_free_composition_data (coding);
- return 0;
- }
-
- head_skip = from_byte - from_byte_orig;
- tail_skip = to_byte_orig - to_byte;
- total_skip = head_skip + tail_skip;
- from += head_skip;
- to -= tail_skip;
- len -= total_skip; len_byte -= total_skip;
- }
-
- /* For conversion, we must put the gap before the text in addition to
- making the gap larger for efficient decoding. The required gap
- size starts from 2000 which is the magic number used in make_gap.
- But, after one batch of conversion, it will be incremented if we
- find that it is not enough . */
- require = 2000;
-
- if (GAP_SIZE < require)
- make_gap (require - GAP_SIZE);
- move_gap_both (from, from_byte);
-
- inserted = inserted_byte = 0;
-
- GAP_SIZE += len_byte;
- ZV -= len;
- Z -= len;
- ZV_BYTE -= len_byte;
- Z_BYTE -= len_byte;
-
- if (GPT - BEG < BEG_UNCHANGED)
- BEG_UNCHANGED = GPT - BEG;
- if (Z - GPT < END_UNCHANGED)
- END_UNCHANGED = Z - GPT;
-
- if (!encodep && coding->src_multibyte)
- {
- /* Decoding routines expects that the source text is unibyte.
- We must convert 8-bit characters of multibyte form to
- unibyte. */
- int len_byte_orig = len_byte;
- len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
- if (len_byte < len_byte_orig)
- safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
- len_byte);
- coding->src_multibyte = 0;
- }
-
- for (;;)
- {
- int result;
-
- /* The buffer memory is now:
- +--------+converted-text+---------+-------original-text-------+---+
- |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
- |<---------------------- GAP ----------------------->| */
- src = GAP_END_ADDR - len_byte;
- dst = GPT_ADDR + inserted_byte;
-
- if (encodep)
- result = encode_coding (coding, src, dst, len_byte, 0);
- else
- {
- if (coding->composing != COMPOSITION_DISABLED)
- coding->cmp_data->char_offset = from + inserted;
- result = decode_coding (coding, src, dst, len_byte, 0);
- }
+ If CODING->dst_object is a buffer, the encoded data is inserted at
+ the current point of that buffer.
- /* The buffer memory is now:
- +--------+-------converted-text----+--+------original-text----+---+
- |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
- |<---------------------- GAP ----------------------->| */
+ If CODING->dst_object is nil, the encoded data is placed at the
+ memory area specified by CODING->destination. */
- inserted += coding->produced_char;
- inserted_byte += coding->produced;
- len_byte -= coding->consumed;
+static int
+encode_coding (coding)
+ struct coding_system *coding;
+{
+ Lisp_Object attrs;
+ Lisp_Object translation_table;
+ int max_lookup;
- if (result == CODING_FINISH_INSUFFICIENT_CMP)
- {
- coding_allocate_composition_data (coding, from + inserted);
- continue;
- }
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (coding->encoder == encode_coding_raw_text)
+ translation_table = Qnil, max_lookup = 0;
+ else
+ translation_table = get_translation_table (attrs, 1, &max_lookup);
- src += coding->consumed;
- dst += coding->produced;
+ if (BUFFERP (coding->dst_object))
+ {
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ coding->dst_multibyte
+ = ! NILP (current_buffer->enable_multibyte_characters);
+ }
- if (result == CODING_FINISH_NORMAL)
- {
- src += len_byte;
- break;
- }
- if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
- {
- unsigned char *pend = dst, *p = pend - inserted_byte;
- Lisp_Object eol_type;
+ coding->consumed = coding->consumed_char = 0;
+ coding->produced = coding->produced_char = 0;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
+ coding->errors = 0;
- /* Encode LFs back to the original eol format (CR or CRLF). */
- if (coding->eol_type == CODING_EOL_CR)
- {
- while (p < pend) if (*p++ == '\n') p[-1] = '\r';
- }
- else
- {
- int count = 0;
+ ALLOC_CONVERSION_WORK_AREA (coding);
- while (p < pend) if (*p++ == '\n') count++;
- if (src - dst < count)
- {
- /* We don't have sufficient room for encoding LFs
- back to CRLF. We must record converted and
- not-yet-converted text back to the buffer
- content, enlarge the gap, then record them out of
- the buffer contents again. */
- int add = len_byte + inserted_byte;
-
- GAP_SIZE -= add;
- ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (count - GAP_SIZE);
- GAP_SIZE += add;
- ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
- /* Don't forget to update SRC, DST, and PEND. */
- src = GAP_END_ADDR - len_byte;
- dst = GPT_ADDR + inserted_byte;
- pend = dst;
- }
- inserted += count;
- inserted_byte += count;
- coding->produced += count;
- p = dst = pend + count;
- while (count)
- {
- *--p = *--pend;
- if (*p == '\n') count--, *--p = '\r';
- }
- }
+ do {
+ coding_set_source (coding);
+ consume_chars (coding, translation_table, max_lookup);
+ coding_set_destination (coding);
+ (*(coding->encoder)) (coding);
+ } while (coding->consumed_char < coding->src_chars);
- /* Suppress eol-format conversion in the further conversion. */
- coding->eol_type = CODING_EOL_LF;
+ if (BUFFERP (coding->dst_object))
+ insert_from_gap (coding->produced_char, coding->produced);
- /* Set the coding system symbol to that for Unix-like EOL. */
- eol_type = Fget (saved_coding_symbol, Qeol_type);
- if (VECTORP (eol_type)
- && XVECTOR (eol_type)->size == 3
- && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
- coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
- else
- coding->symbol = saved_coding_symbol;
+ return (coding->result);
+}
- continue;
- }
- if (len_byte <= 0)
- {
- if (coding->type != coding_type_ccl
- || coding->mode & CODING_MODE_LAST_BLOCK)
- break;
- coding->mode |= CODING_MODE_LAST_BLOCK;
- continue;
- }
- if (result == CODING_FINISH_INSUFFICIENT_SRC)
- {
- /* The source text ends in invalid codes. Let's just
- make them valid buffer contents, and finish conversion. */
- if (multibyte_p)
- {
- unsigned char *start = dst;
- inserted += len_byte;
- while (len_byte--)
- {
- int c = *src++;
- dst += CHAR_STRING (c, dst);
- }
+/* Name (or base name) of work buffer for code conversion. */
+static Lisp_Object Vcode_conversion_workbuf_name;
- inserted_byte += dst - start;
- }
- else
- {
- inserted += len_byte;
- inserted_byte += len_byte;
- while (len_byte--)
- *dst++ = *src++;
- }
- break;
- }
- if (result == CODING_FINISH_INTERRUPT)
- {
- /* The conversion procedure was interrupted by a user. */
- break;
- }
- /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
- if (coding->consumed < 1)
- {
- /* It's quite strange to require more memory without
- consuming any bytes. Perhaps CCL program bug. */
- break;
- }
- if (first)
- {
- /* We have just done the first batch of conversion which was
- stopped because of insufficient gap. Let's reconsider the
- required gap size (i.e. SRT - DST) now.
+/* A working buffer used by the top level conversion. Once it is
+ created, it is never destroyed. It has the name
+ Vcode_conversion_workbuf_name. The other working buffers are
+ destroyed after the use is finished, and their names are modified
+ versions of Vcode_conversion_workbuf_name. */
+static Lisp_Object Vcode_conversion_reused_workbuf;
- We have converted ORIG bytes (== coding->consumed) into
- NEW bytes (coding->produced). To convert the remaining
- LEN bytes, we may need REQUIRE bytes of gap, where:
- REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
- REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
- Here, we are sure that NEW >= ORIG. */
+/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
+static int reused_workbuf_in_use;
- if (coding->produced <= coding->consumed)
- {
- /* This happens because of CCL-based coding system with
- eol-type CRLF. */
- require = 0;
- }
- else
- {
- float ratio = coding->produced - coding->consumed;
- ratio /= coding->consumed;
- require = len_byte * ratio;
- }
- first = 0;
- }
- if ((src - dst) < (require + 2000))
- {
- /* See the comment above the previous call of make_gap. */
- int add = len_byte + inserted_byte;
- GAP_SIZE -= add;
- ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (require + 2000);
- GAP_SIZE += add;
- ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
- }
- }
- if (src - dst > 0) *dst = 0; /* Put an anchor. */
+/* Return a working buffer of code convesion. MULTIBYTE specifies the
+ multibyteness of returning buffer. */
- if (encodep && coding->dst_multibyte)
- {
- /* The output is unibyte. We must convert 8-bit characters to
- multibyte form. */
- if (inserted_byte * 2 > GAP_SIZE)
- {
- GAP_SIZE -= inserted_byte;
- ZV += inserted_byte; Z += inserted_byte;
- ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
- GPT += inserted_byte; GPT_BYTE += inserted_byte;
- make_gap (inserted_byte - GAP_SIZE);
- GAP_SIZE += inserted_byte;
- ZV -= inserted_byte; Z -= inserted_byte;
- ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
- GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
- }
- inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
- }
+static Lisp_Object
+make_conversion_work_buffer (multibyte)
+ int multibyte;
+{
+ Lisp_Object name, workbuf;
+ struct buffer *current;
- /* If we shrank the conversion area, adjust it now. */
- if (total_skip > 0)
+ if (reused_workbuf_in_use++)
{
- if (tail_skip > 0)
- safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
- inserted += total_skip; inserted_byte += total_skip;
- GAP_SIZE += total_skip;
- GPT -= head_skip; GPT_BYTE -= head_skip;
- ZV -= total_skip; ZV_BYTE -= total_skip;
- Z -= total_skip; Z_BYTE -= total_skip;
- from -= head_skip; from_byte -= head_skip;
- to += tail_skip; to_byte += tail_skip;
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ workbuf = Fget_buffer_create (name);
}
-
- prev_Z = Z;
- if (! EQ (current_buffer->undo_list, Qt))
- adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
else
- adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
- inserted, inserted_byte);
- inserted = Z - prev_Z;
-
- if (!encodep && coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, Fcurrent_buffer ());
- coding_free_composition_data (coding);
-
- if (! inhibit_pre_post_conversion
- && ! encodep && ! NILP (coding->post_read_conversion))
{
- Lisp_Object val;
- Lisp_Object saved_coding_system;
+ name = Vcode_conversion_workbuf_name;
+ workbuf = Fget_buffer_create (name);
+ if (NILP (Vcode_conversion_reused_workbuf))
+ Vcode_conversion_reused_workbuf = workbuf;
+ }
+ current = current_buffer;
+ set_buffer_internal (XBUFFER (workbuf));
+ Ferase_buffer ();
+ current_buffer->undo_list = Qt;
+ current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ set_buffer_internal (current);
+ return workbuf;
+}
- if (from != PT)
- TEMP_SET_PT_BOTH (from, from_byte);
- prev_Z = Z;
- record_unwind_protect (code_convert_region_unwind,
- Fcons (Vlast_coding_system_used, Qnil));
- saved_coding_system = Vlast_coding_system_used;
- Vlast_coding_system_used = coding->symbol;
- /* We should not call any more pre-write/post-read-conversion
- functions while this post-read-conversion is running. */
- inhibit_pre_post_conversion = 1;
- val = call1 (coding->post_read_conversion, make_number (inserted));
- inhibit_pre_post_conversion = 0;
- coding->symbol = Vlast_coding_system_used;
- Vlast_coding_system_used = saved_coding_system;
- /* Discard the unwind protect. */
- specpdl_ptr--;
- CHECK_NUMBER (val);
- inserted += Z - prev_Z;
- }
-
- if (orig_point >= from)
- {
- if (orig_point >= from + orig_len)
- orig_point += inserted - orig_len;
- else
- orig_point = from;
- TEMP_SET_PT (orig_point);
- }
- if (replace)
+static Lisp_Object
+code_conversion_restore (arg)
+ Lisp_Object arg;
+{
+ Lisp_Object current, workbuf;
+ struct gcpro gcpro1;
+
+ GCPRO1 (arg);
+ current = XCAR (arg);
+ workbuf = XCDR (arg);
+ if (! NILP (workbuf))
{
- signal_after_change (from, to - from, inserted);
- update_compositions (from, from + inserted, CHECK_BORDER);
+ if (EQ (workbuf, Vcode_conversion_reused_workbuf))
+ reused_workbuf_in_use = 0;
+ else if (! NILP (Fbuffer_live_p (workbuf)))
+ Fkill_buffer (workbuf);
}
+ set_buffer_internal (XBUFFER (current));
+ UNGCPRO;
+ return Qnil;
+}
- {
- coding->consumed = to_byte - from_byte;
- coding->consumed_char = to - from;
- coding->produced = inserted_byte;
- coding->produced_char = inserted;
- }
+Lisp_Object
+code_conversion_save (with_work_buf, multibyte)
+ int with_work_buf, multibyte;
+{
+ Lisp_Object workbuf = Qnil;
- return 0;
+ if (with_work_buf)
+ workbuf = make_conversion_work_buffer (multibyte);
+ record_unwind_protect (code_conversion_restore,
+ Fcons (Fcurrent_buffer (), workbuf));
+ return workbuf;
}
-/* Name (or base name) of work buffer for code conversion. */
-static Lisp_Object Vcode_conversion_workbuf_name;
+int
+decode_coding_gap (coding, chars, bytes)
+ struct coding_system *coding;
+ EMACS_INT chars, bytes;
+{
+ int count = specpdl_ptr - specpdl;
+ Lisp_Object attrs;
+
+ code_conversion_save (0, 0);
+
+ coding->src_object = Fcurrent_buffer ();
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
+ coding->src_multibyte = chars < bytes;
+ coding->dst_object = coding->src_object;
+ coding->dst_pos = PT;
+ coding->dst_pos_byte = PT_BYTE;
+ coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
-/* Set the current buffer to the working buffer prepared for
- code-conversion. MULTIBYTE specifies the multibyteness of the
- buffer. Return the buffer we set if it must be killed after use.
- Otherwise return Qnil. */
+ if (CODING_REQUIRE_DETECTION (coding))
+ detect_coding (coding);
-static Lisp_Object
-set_conversion_work_buffer (multibyte)
- int multibyte;
-{
- Lisp_Object buffer, buffer_to_kill;
- struct buffer *buf;
+ coding->mode |= CODING_MODE_LAST_BLOCK;
+ decode_coding (coding);
- buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
- buf = XBUFFER (buffer);
- if (buf == current_buffer)
+ attrs = CODING_ID_ATTRS (coding->id);
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
{
- /* As we are already in the work buffer, we must generate a new
- buffer for the work. */
- Lisp_Object name;
+ EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
+ Lisp_Object val;
- name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
- buffer = buffer_to_kill = Fget_buffer_create (name);
- buf = XBUFFER (buffer);
+ TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
+ val = call1 (CODING_ATTR_POST_READ (attrs),
+ make_number (coding->produced_char));
+ CHECK_NATNUM (val);
+ coding->produced_char += Z - prev_Z;
+ coding->produced += Z_BYTE - prev_Z_BYTE;
}
- else
- buffer_to_kill = Qnil;
-
- delete_all_overlays (buf);
- buf->directory = current_buffer->directory;
- buf->read_only = Qnil;
- buf->filename = Qnil;
- buf->undo_list = Qt;
- eassert (buf->overlays_before == NULL);
- eassert (buf->overlays_after == NULL);
- set_buffer_internal (buf);
- if (BEG != BEGV || Z != ZV)
- Fwiden ();
- del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
- buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
- return buffer_to_kill;
+
+ unbind_to (count, Qnil);
+ return coding->result;
}
-Lisp_Object
-run_pre_post_conversion_on_str (str, coding, encodep)
- Lisp_Object str;
+int
+encode_coding_gap (coding, chars, bytes)
struct coding_system *coding;
- int encodep;
+ EMACS_INT chars, bytes;
{
- int count = SPECPDL_INDEX ();
- struct gcpro gcpro1, gcpro2;
- int multibyte = STRING_MULTIBYTE (str);
- Lisp_Object old_deactivate_mark;
- Lisp_Object buffer_to_kill;
- Lisp_Object unwind_arg;
-
- record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- /* It is not crucial to specbind this. */
- old_deactivate_mark = Vdeactivate_mark;
- GCPRO2 (str, old_deactivate_mark);
-
- /* We must insert the contents of STR as is without
- unibyte<->multibyte conversion. For that, we adjust the
- multibyteness of the working buffer to that of STR. */
- buffer_to_kill = set_conversion_work_buffer (multibyte);
- if (NILP (buffer_to_kill))
- unwind_arg = Fcons (Vlast_coding_system_used, Qnil);
- else
- unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill);
- record_unwind_protect (code_convert_region_unwind, unwind_arg);
+ int count = specpdl_ptr - specpdl;
- insert_from_string (str, 0, 0,
- SCHARS (str), SBYTES (str), 0);
- UNGCPRO;
- inhibit_pre_post_conversion = 1;
- if (encodep)
- {
- struct buffer *prev = current_buffer;
+ code_conversion_save (0, 0);
- call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
- if (prev != current_buffer)
- /* We must kill the current buffer too. */
- Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg)));
- }
- else
- {
- Vlast_coding_system_used = coding->symbol;
- TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
- call1 (coding->post_read_conversion, make_number (Z - BEG));
- coding->symbol = Vlast_coding_system_used;
- }
- inhibit_pre_post_conversion = 0;
- Vdeactivate_mark = old_deactivate_mark;
- str = make_buffer_string (BEG, Z, 1);
- return unbind_to (count, str);
+ coding->src_object = Fcurrent_buffer ();
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
+ coding->src_multibyte = chars < bytes;
+ coding->dst_object = coding->src_object;
+ coding->dst_pos = PT;
+ coding->dst_pos_byte = PT_BYTE;
+
+ encode_coding (coding);
+
+ unbind_to (count, Qnil);
+ return coding->result;
}
-/* Run pre-write-conversion function of CODING on NCHARS/NBYTES
- text in *STR. *SIZE is the allocated bytes for STR. As it
- is intended that this function is called from encode_terminal_code,
- the pre-write-conversion function is run by safe_call and thus
- "Error during redisplay: ..." is logged when an error occurs.
+/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
+ SRC_OBJECT into DST_OBJECT by coding context CODING.
+
+ SRC_OBJECT is a buffer, a string, or Qnil.
+
+ If it is a buffer, the text is at point of the buffer. FROM and TO
+ are positions in the buffer.
+
+ If it is a string, the text is at the beginning of the string.
+ FROM and TO are indices to the string.
- Store the resulting text in *STR and set CODING->produced_char and
- CODING->produced to the number of characters and bytes
- respectively. If the size of *STR is too small, enlarge it by
- xrealloc and update *STR and *SIZE. */
+ If it is nil, the text is at coding->source. FROM and TO are
+ indices to coding->source.
+
+ DST_OBJECT is a buffer, Qt, or Qnil.
+
+ If it is a buffer, the decoded text is inserted at point of the
+ buffer. If the buffer is the same as SRC_OBJECT, the source text
+ is deleted.
+
+ If it is Qt, a string is made from the decoded text, and
+ set in CODING->dst_object.
+
+ If it is Qnil, the decoded text is stored at CODING->destination.
+ The caller must allocate CODING->dst_bytes bytes at
+ CODING->destination by xmalloc. If the decoded text is longer than
+ CODING->dst_bytes, CODING->destination is relocated by xrealloc.
+ */
void
-run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding)
- unsigned char **str;
- int *size, nchars, nbytes;
+decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
+ dst_object)
struct coding_system *coding;
+ Lisp_Object src_object;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object dst_object;
{
- struct gcpro gcpro1, gcpro2;
- struct buffer *cur = current_buffer;
- struct buffer *prev;
- Lisp_Object old_deactivate_mark, old_last_coding_system_used;
- Lisp_Object args[3];
- Lisp_Object buffer_to_kill;
-
- /* It is not crucial to specbind this. */
- old_deactivate_mark = Vdeactivate_mark;
- old_last_coding_system_used = Vlast_coding_system_used;
- GCPRO2 (old_deactivate_mark, old_last_coding_system_used);
-
- /* We must insert the contents of STR as is without
- unibyte<->multibyte conversion. For that, we adjust the
- multibyteness of the working buffer to that of STR. */
- buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
- insert_1_both (*str, nchars, nbytes, 0, 0, 0);
- UNGCPRO;
- inhibit_pre_post_conversion = 1;
- prev = current_buffer;
- args[0] = coding->pre_write_conversion;
- args[1] = make_number (BEG);
- args[2] = make_number (Z);
- safe_call (3, args);
- inhibit_pre_post_conversion = 0;
- Vdeactivate_mark = old_deactivate_mark;
- Vlast_coding_system_used = old_last_coding_system_used;
- coding->produced_char = Z - BEG;
- coding->produced = Z_BYTE - BEG_BYTE;
- if (coding->produced > *size)
- {
- *size = coding->produced;
- *str = xrealloc (*str, *size);
- }
- if (BEG < GPT && GPT < Z)
- move_gap (BEG);
- bcopy (BEG_ADDR, *str, coding->produced);
- coding->src_multibyte
- = ! NILP (current_buffer->enable_multibyte_characters);
- if (prev != current_buffer)
- Fkill_buffer (Fcurrent_buffer ());
- set_buffer_internal (cur);
- if (! NILP (buffer_to_kill))
- Fkill_buffer (buffer_to_kill);
-}
+ int count = specpdl_ptr - specpdl;
+ unsigned char *destination;
+ EMACS_INT dst_bytes;
+ EMACS_INT chars = to - from;
+ EMACS_INT bytes = to_byte - from_byte;
+ Lisp_Object attrs;
+ Lisp_Object buffer;
+ int saved_pt = -1, saved_pt_byte;
+ buffer = Fcurrent_buffer ();
-Lisp_Object
-decode_coding_string (str, coding, nocopy)
- Lisp_Object str;
- struct coding_system *coding;
- int nocopy;
-{
- int len;
- struct conversion_buffer buf;
- int from, to_byte;
- Lisp_Object saved_coding_symbol;
- int result;
- int require_decoding;
- int shrinked_bytes = 0;
- Lisp_Object newstr;
- int consumed, consumed_char, produced, produced_char;
-
- from = 0;
- to_byte = SBYTES (str);
-
- saved_coding_symbol = coding->symbol;
- coding->src_multibyte = STRING_MULTIBYTE (str);
- coding->dst_multibyte = 1;
- if (CODING_REQUIRE_DETECTION (coding))
+ if (NILP (dst_object))
+ {
+ destination = coding->destination;
+ dst_bytes = coding->dst_bytes;
+ }
+
+ coding->src_object = src_object;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_multibyte = chars < bytes;
+
+ if (STRINGP (src_object))
{
- /* See the comments in code_convert_region. */
- if (coding->type == coding_type_undecided)
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
+ }
+ else if (BUFFERP (src_object))
+ {
+ set_buffer_internal (XBUFFER (src_object));
+ if (from != GPT)
+ move_gap_both (from, from_byte);
+ if (EQ (src_object, dst_object))
{
- detect_coding (coding, SDATA (str), to_byte);
- if (coding->type == coding_type_undecided)
- {
- coding->type = coding_type_emacs_mule;
- coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
- /* As emacs-mule decoder will handle composition, we
- need this setting to allocate coding->cmp_data
- later. */
- coding->composing = COMPOSITION_NO;
- }
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ TEMP_SET_PT_BOTH (from, from_byte);
+ del_range_both (from, from_byte, to, to_byte, 1);
+ coding->src_pos = -chars;
+ coding->src_pos_byte = -bytes;
}
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && coding->type != coding_type_ccl)
+ else
{
- saved_coding_symbol = coding->symbol;
- detect_eol (coding, SDATA (str), to_byte);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = CODING_EOL_LF;
- /* We had better recover the original eol format if we
- encounter an inconsistent eol format while decoding. */
- coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
}
}
- if (coding->type == coding_type_no_conversion
- || coding->type == coding_type_raw_text)
- coding->dst_multibyte = 0;
-
- require_decoding = CODING_REQUIRE_DECODING (coding);
+ if (CODING_REQUIRE_DETECTION (coding))
+ detect_coding (coding);
+ attrs = CODING_ID_ATTRS (coding->id);
- if (STRING_MULTIBYTE (str))
+ if (EQ (dst_object, Qt)
+ || (! NILP (CODING_ATTR_POST_READ (attrs))
+ && NILP (dst_object)))
{
- /* Decoding routines expect the source text to be unibyte. */
- str = Fstring_as_unibyte (str);
- to_byte = SBYTES (str);
- nocopy = 1;
- coding->src_multibyte = 0;
+ coding->dst_object = code_conversion_save (1, 1);
+ coding->dst_pos = BEG;
+ coding->dst_pos_byte = BEG_BYTE;
+ coding->dst_multibyte = 1;
}
-
- /* Try to skip the heading and tailing ASCIIs. */
- if (require_decoding && coding->type != coding_type_ccl)
+ else if (BUFFERP (dst_object))
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
- 0);
- if (from == to_byte)
- require_decoding = 0;
- shrinked_bytes = from + (SBYTES (str) - to_byte);
+ code_conversion_save (0, 0);
+ coding->dst_object = dst_object;
+ coding->dst_pos = BUF_PT (XBUFFER (dst_object));
+ coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
+ coding->dst_multibyte
+ = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
}
-
- if (!require_decoding
- && !(SYMBOLP (coding->post_read_conversion)
- && !NILP (Ffboundp (coding->post_read_conversion))))
+ else
{
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (coding->dst_multibyte)
- {
- str = Fstring_as_multibyte (str);
- nocopy = 1;
- }
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
+ code_conversion_save (0, 0);
+ coding->dst_object = Qnil;
+ coding->dst_multibyte = 1;
}
- if (coding->composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (coding, from);
- len = decoding_buffer_size (coding, to_byte - from);
- allocate_conversion_buffer (buf, len);
+ decode_coding (coding);
- consumed = consumed_char = produced = produced_char = 0;
- while (1)
+ if (BUFFERP (coding->dst_object))
+ set_buffer_internal (XBUFFER (coding->dst_object));
+
+ if (! NILP (CODING_ATTR_POST_READ (attrs)))
{
- result = decode_coding (coding, SDATA (str) + from + consumed,
- buf.data + produced, to_byte - from - consumed,
- buf.size - produced);
- consumed += coding->consumed;
- consumed_char += coding->consumed_char;
- produced += coding->produced;
- produced_char += coding->produced_char;
- if (result == CODING_FINISH_NORMAL
- || result == CODING_FINISH_INTERRUPT
- || (result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == 0))
- break;
- if (result == CODING_FINISH_INSUFFICIENT_CMP)
- coding_allocate_composition_data (coding, from + produced_char);
- else if (result == CODING_FINISH_INSUFFICIENT_DST)
- extend_conversion_buffer (&buf);
- else if (result == CODING_FINISH_INCONSISTENT_EOL)
- {
- Lisp_Object eol_type;
+ struct gcpro gcpro1, gcpro2;
+ EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
+ Lisp_Object val;
- /* Recover the original EOL format. */
- if (coding->eol_type == CODING_EOL_CR)
- {
- unsigned char *p;
- for (p = buf.data; p < buf.data + produced; p++)
- if (*p == '\n') *p = '\r';
- }
- else if (coding->eol_type == CODING_EOL_CRLF)
+ TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
+ GCPRO2 (coding->src_object, coding->dst_object);
+ val = safe_call1 (CODING_ATTR_POST_READ (attrs),
+ make_number (coding->produced_char));
+ UNGCPRO;
+ CHECK_NATNUM (val);
+ coding->produced_char += Z - prev_Z;
+ coding->produced += Z_BYTE - prev_Z_BYTE;
+ }
+
+ if (EQ (dst_object, Qt))
+ {
+ coding->dst_object = Fbuffer_string ();
+ }
+ else if (NILP (dst_object) && BUFFERP (coding->dst_object))
+ {
+ set_buffer_internal (XBUFFER (coding->dst_object));
+ if (dst_bytes < coding->produced)
+ {
+ destination
+ = (unsigned char *) xrealloc (destination, coding->produced);
+ if (! destination)
{
- int num_eol = 0;
- unsigned char *p0, *p1;
- for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
- if (*p0 == '\n') num_eol++;
- if (produced + num_eol >= buf.size)
- extend_conversion_buffer (&buf);
- for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
- {
- *--p1 = *--p0;
- if (*p0 == '\n') *--p1 = '\r';
- }
- produced += num_eol;
- produced_char += num_eol;
+ record_conversion_result (coding,
+ CODING_RESULT_INSUFFICIENT_DST);
+ unbind_to (count, Qnil);
+ return;
}
- /* Suppress eol-format conversion in the further conversion. */
- coding->eol_type = CODING_EOL_LF;
-
- /* Set the coding system symbol to that for Unix-like EOL. */
- eol_type = Fget (saved_coding_symbol, Qeol_type);
- if (VECTORP (eol_type)
- && XVECTOR (eol_type)->size == 3
- && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
- coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
- else
- coding->symbol = saved_coding_symbol;
-
-
+ if (BEGV < GPT && GPT < BEGV + coding->produced_char)
+ move_gap_both (BEGV, BEGV_BYTE);
+ bcopy (BEGV_ADDR, destination, coding->produced);
+ coding->destination = destination;
}
}
- coding->consumed = consumed;
- coding->consumed_char = consumed_char;
- coding->produced = produced;
- coding->produced_char = produced_char;
+ if (saved_pt >= 0)
+ {
+ /* This is the case of:
+ (BUFFERP (src_object) && EQ (src_object, dst_object))
+ As we have moved PT while replacing the original buffer
+ contents, we must recover it now. */
+ set_buffer_internal (XBUFFER (src_object));
+ if (saved_pt < from)
+ TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+ else if (saved_pt < from + chars)
+ TEMP_SET_PT_BOTH (from, from_byte);
+ else if (! NILP (current_buffer->enable_multibyte_characters))
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+ saved_pt_byte + (coding->produced - bytes));
+ else
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+ saved_pt_byte + (coding->produced - bytes));
+ }
- if (coding->dst_multibyte)
- newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
- produced + shrinked_bytes);
- else
- newstr = make_uninit_string (produced + shrinked_bytes);
- if (from > 0)
- STRING_COPYIN (newstr, 0, SDATA (str), from);
- STRING_COPYIN (newstr, from, buf.data, produced);
- if (shrinked_bytes > from)
- STRING_COPYIN (newstr, from + produced,
- SDATA (str) + to_byte,
- shrinked_bytes - from);
- free_conversion_buffer (&buf);
-
- coding->consumed += shrinked_bytes;
- coding->consumed_char += shrinked_bytes;
- coding->produced += shrinked_bytes;
- coding->produced_char += shrinked_bytes;
-
- if (coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, newstr);
- coding_free_composition_data (coding);
-
- if (SYMBOLP (coding->post_read_conversion)
- && !NILP (Ffboundp (coding->post_read_conversion)))
- newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
-
- return newstr;
+ unbind_to (count, coding->dst_object);
}
-Lisp_Object
-encode_coding_string (str, coding, nocopy)
- Lisp_Object str;
+
+void
+encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
+ dst_object)
struct coding_system *coding;
- int nocopy;
+ Lisp_Object src_object;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object dst_object;
{
- int len;
- struct conversion_buffer buf;
- int from, to, to_byte;
- int result;
- int shrinked_bytes = 0;
- Lisp_Object newstr;
- int consumed, consumed_char, produced, produced_char;
-
- if (SYMBOLP (coding->pre_write_conversion)
- && !NILP (Ffboundp (coding->pre_write_conversion)))
- {
- str = run_pre_post_conversion_on_str (str, coding, 1);
- /* As STR is just newly generated, we don't have to copy it
- anymore. */
- nocopy = 1;
- }
+ int count = specpdl_ptr - specpdl;
+ EMACS_INT chars = to - from;
+ EMACS_INT bytes = to_byte - from_byte;
+ Lisp_Object attrs;
+ Lisp_Object buffer;
+ int saved_pt = -1, saved_pt_byte;
+ int kill_src_buffer = 0;
+
+ buffer = Fcurrent_buffer ();
+
+ coding->src_object = src_object;
+ coding->src_chars = chars;
+ coding->src_bytes = bytes;
+ coding->src_multibyte = chars < bytes;
+
+ attrs = CODING_ID_ATTRS (coding->id);
+
+ if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
+ {
+ coding->src_object = code_conversion_save (1, coding->src_multibyte);
+ set_buffer_internal (XBUFFER (coding->src_object));
+ if (STRINGP (src_object))
+ insert_from_string (src_object, from, from_byte, chars, bytes, 0);
+ else if (BUFFERP (src_object))
+ insert_from_buffer (XBUFFER (src_object), from, chars, 0);
+ else
+ insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
- from = 0;
- to = SCHARS (str);
- to_byte = SBYTES (str);
+ if (EQ (src_object, dst_object))
+ {
+ set_buffer_internal (XBUFFER (src_object));
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ del_range_both (from, from_byte, to, to_byte, 1);
+ set_buffer_internal (XBUFFER (coding->src_object));
+ }
- /* Encoding routines determine the multibyteness of the source text
- by coding->src_multibyte. */
- coding->src_multibyte = SCHARS (str) < SBYTES (str);
- coding->dst_multibyte = 0;
- if (! CODING_REQUIRE_ENCODING (coding))
- goto no_need_of_encoding;
+ {
+ Lisp_Object args[3];
- if (coding->composing != COMPOSITION_DISABLED)
- coding_save_composition (coding, from, to, str);
+ args[0] = CODING_ATTR_PRE_WRITE (attrs);
+ args[1] = make_number (BEG);
+ args[2] = make_number (Z);
+ safe_call (3, args);
+ }
+ if (XBUFFER (coding->src_object) != current_buffer)
+ kill_src_buffer = 1;
+ coding->src_object = Fcurrent_buffer ();
+ if (BEG != GPT)
+ move_gap_both (BEG, BEG_BYTE);
+ coding->src_chars = Z - BEG;
+ coding->src_bytes = Z_BYTE - BEG_BYTE;
+ coding->src_pos = BEG;
+ coding->src_pos_byte = BEG_BYTE;
+ coding->src_multibyte = Z < Z_BYTE;
+ }
+ else if (STRINGP (src_object))
+ {
+ code_conversion_save (0, 0);
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
+ }
+ else if (BUFFERP (src_object))
+ {
+ code_conversion_save (0, 0);
+ set_buffer_internal (XBUFFER (src_object));
+ if (EQ (src_object, dst_object))
+ {
+ saved_pt = PT, saved_pt_byte = PT_BYTE;
+ coding->src_object = del_range_1 (from, to, 1, 1);
+ coding->src_pos = 0;
+ coding->src_pos_byte = 0;
+ }
+ else
+ {
+ if (from < GPT && to >= GPT)
+ move_gap_both (from, from_byte);
+ coding->src_pos = from;
+ coding->src_pos_byte = from_byte;
+ }
+ }
+ else
+ code_conversion_save (0, 0);
- /* Try to skip the heading and tailing ASCIIs. We can't skip them
- if we must run CCL program or there are compositions to
- encode. */
- if (coding->type != coding_type_ccl
- && (! coding->cmp_data || coding->cmp_data->used == 0))
+ if (BUFFERP (dst_object))
{
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
- 1);
- if (from == to_byte)
+ coding->dst_object = dst_object;
+ if (EQ (src_object, dst_object))
+ {
+ coding->dst_pos = from;
+ coding->dst_pos_byte = from_byte;
+ }
+ else
{
- coding_free_composition_data (coding);
- goto no_need_of_encoding;
+ coding->dst_pos = BUF_PT (XBUFFER (dst_object));
+ coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
}
- shrinked_bytes = from + (SBYTES (str) - to_byte);
+ coding->dst_multibyte
+ = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
+ }
+ else if (EQ (dst_object, Qt))
+ {
+ coding->dst_object = Qnil;
+ coding->dst_bytes = coding->src_chars;
+ if (coding->dst_bytes == 0)
+ coding->dst_bytes = 1;
+ coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
+ coding->dst_multibyte = 0;
+ }
+ else
+ {
+ coding->dst_object = Qnil;
+ coding->dst_multibyte = 0;
}
- len = encoding_buffer_size (coding, to_byte - from);
- allocate_conversion_buffer (buf, len);
+ encode_coding (coding);
- consumed = consumed_char = produced = produced_char = 0;
- while (1)
+ if (EQ (dst_object, Qt))
{
- result = encode_coding (coding, SDATA (str) + from + consumed,
- buf.data + produced, to_byte - from - consumed,
- buf.size - produced);
- consumed += coding->consumed;
- consumed_char += coding->consumed_char;
- produced += coding->produced;
- produced_char += coding->produced_char;
- if (result == CODING_FINISH_NORMAL
- || result == CODING_FINISH_INTERRUPT
- || (result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == 0))
- break;
- /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
- extend_conversion_buffer (&buf);
- }
-
- coding->consumed = consumed;
- coding->consumed_char = consumed_char;
- coding->produced = produced;
- coding->produced_char = produced_char;
-
- newstr = make_uninit_string (produced + shrinked_bytes);
- if (from > 0)
- STRING_COPYIN (newstr, 0, SDATA (str), from);
- STRING_COPYIN (newstr, from, buf.data, produced);
- if (shrinked_bytes > from)
- STRING_COPYIN (newstr, from + produced,
- SDATA (str) + to_byte,
- shrinked_bytes - from);
-
- free_conversion_buffer (&buf);
- coding_free_composition_data (coding);
-
- return newstr;
-
- no_need_of_encoding:
- coding->consumed = SBYTES (str);
- coding->consumed_char = SCHARS (str);
- if (STRING_MULTIBYTE (str))
- {
- if (nocopy)
- /* We are sure that STR doesn't contain a multibyte
- character. */
- STRING_SET_UNIBYTE (str);
+ if (BUFFERP (coding->dst_object))
+ coding->dst_object = Fbuffer_string ();
else
{
- str = Fstring_as_unibyte (str);
- nocopy = 1;
+ coding->dst_object
+ = make_unibyte_string ((char *) coding->destination,
+ coding->produced);
+ xfree (coding->destination);
}
}
- coding->produced = SBYTES (str);
- coding->produced_char = SCHARS (str);
- return (nocopy ? str : Fcopy_sequence (str));
+
+ if (saved_pt >= 0)
+ {
+ /* This is the case of:
+ (BUFFERP (src_object) && EQ (src_object, dst_object))
+ As we have moved PT while replacing the original buffer
+ contents, we must recover it now. */
+ set_buffer_internal (XBUFFER (src_object));
+ if (saved_pt < from)
+ TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+ else if (saved_pt < from + chars)
+ TEMP_SET_PT_BOTH (from, from_byte);
+ else if (! NILP (current_buffer->enable_multibyte_characters))
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+ saved_pt_byte + (coding->produced - bytes));
+ else
+ TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+ saved_pt_byte + (coding->produced - bytes));
+ }
+
+ if (kill_src_buffer)
+ Fkill_buffer (coding->src_object);
+ unbind_to (count, Qnil);
+}
+
+
+Lisp_Object
+preferred_coding_system ()
+{
+ int id = coding_categories[coding_priorities[0]].id;
+
+ return CODING_ID_NAME (id);
}
@@ -6528,21 +7106,18 @@ encode_coding_string (str, coding, nocopy)
DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
doc: /* Return t if OBJECT is nil or a coding-system.
-See the documentation of `make-coding-system' for information
+See the documentation of `define-coding-system' for information
about coding-system objects. */)
(obj)
Lisp_Object obj;
{
- if (NILP (obj))
+ if (NILP (obj)
+ || CODING_SYSTEM_ID (obj) >= 0)
return Qt;
- if (!SYMBOLP (obj))
+ if (! SYMBOLP (obj)
+ || NILP (Fget (obj, Qcoding_system_define_form)))
return Qnil;
- if (! NILP (Fget (obj, Qcoding_system_define_form)))
- return Qt;
- /* Get coding-spec vector for OBJ. */
- obj = Fget (obj, Qcoding_system);
- return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
- ? Qt : Qnil);
+ return Qt;
}
DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
@@ -6569,7 +7144,7 @@ If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */
{
Lisp_Object val;
if (SYMBOLP (default_coding_system))
- default_coding_system = SYMBOL_NAME (default_coding_system);
+ XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
Qt, Qnil, Qcoding_system_history,
default_coding_system, Qnil);
@@ -6580,9 +7155,9 @@ DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
-It is valid if it is nil or a symbol with a non-nil `coding-system' property.
-The value of this property should be a vector of length 5. */)
- (coding_system)
+It is valid if it is nil or a symbol defined as a coding system by the
+function `define-coding-system'. */)
+ (coding_system)
Lisp_Object coding_system;
{
Lisp_Object define_form;
@@ -6597,77 +7172,287 @@ The value of this property should be a vector of length 5. */)
return coding_system;
xsignal1 (Qcoding_system_error, coding_system);
}
+
+/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
+ HIGHEST is nonzero, return the coding system of the highest
+ priority among the detected coding systems. Otherwize return a
+ list of detected coding systems sorted by their priorities. If
+ MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
+ multibyte form but contains only ASCII and eight-bit chars.
+ Otherwise, the bytes are raw bytes.
+
+ CODING-SYSTEM controls the detection as below:
+
+ If it is nil, detect both text-format and eol-format. If the
+ text-format part of CODING-SYSTEM is already specified
+ (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
+ part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
+ detect only text-format. */
+
Lisp_Object
-detect_coding_system (src, src_bytes, highest, multibytep)
+detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
+ coding_system)
const unsigned char *src;
- int src_bytes, highest;
+ int src_chars, src_bytes, highest;
int multibytep;
+ Lisp_Object coding_system;
{
- int coding_mask, eol_type;
- Lisp_Object val, tmp;
- int dummy;
+ const unsigned char *src_end = src + src_bytes;
+ Lisp_Object attrs, eol_type;
+ Lisp_Object val;
+ struct coding_system coding;
+ int id;
+ struct coding_detection_info detect_info;
+ enum coding_category base_category;
+
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ setup_coding_system (coding_system, &coding);
+ attrs = CODING_ID_ATTRS (coding.id);
+ eol_type = CODING_ID_EOL_TYPE (coding.id);
+ coding_system = CODING_ATTR_BASE_NAME (attrs);
+
+ coding.source = src;
+ coding.src_chars = src_chars;
+ coding.src_bytes = src_bytes;
+ coding.src_multibyte = multibytep;
+ coding.consumed = 0;
+ coding.mode |= CODING_MODE_LAST_BLOCK;
- coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
- eol_type = detect_eol_type (src, src_bytes, &dummy);
- if (eol_type == CODING_EOL_INCONSISTENT)
- eol_type = CODING_EOL_UNDECIDED;
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
- if (!coding_mask)
+ /* At first, detect text-format if necessary. */
+ base_category = XINT (CODING_ATTR_CATEGORY (attrs));
+ if (base_category == coding_category_undecided)
{
- val = Qundecided;
- if (eol_type != CODING_EOL_UNDECIDED)
+ enum coding_category category;
+ struct coding_system *this;
+ int c, i;
+
+ /* Skip all ASCII bytes except for a few ISO2022 controls. */
+ for (i = 0; src < src_end; i++, src++)
{
- Lisp_Object val2;
- val2 = Fget (Qundecided, Qeol_type);
- if (VECTORP (val2))
- val = XVECTOR (val2)->contents[eol_type];
+ c = *src;
+ if (c & 0x80)
+ break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && inhibit_iso_escape_detection)
+ {
+ coding.head_ascii = src - coding.source;
+ if (detect_coding_iso_2022 (&coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
+ }
}
- return (highest ? val : Fcons (val, Qnil));
- }
+ coding.head_ascii = src - coding.source;
- /* At first, gather possible coding systems in VAL. */
- val = Qnil;
- for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
- {
- Lisp_Object category_val, category_index;
+ if (src < src_end
+ || detect_info.found)
+ {
+ if (src == src_end)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (highest
+ && (detect_info.found & (1 << category)))
+ break;
+ }
+ else
+ {
+ if ((*(this->detector)) (&coding, &detect_info)
+ && highest
+ && (detect_info.found & (1 << category)))
+ {
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
+ break;
+ }
+ }
+ }
+ }
- category_index = Fget (XCAR (tmp), Qcoding_category_index);
- category_val = Fsymbol_value (XCAR (tmp));
- if (!NILP (category_val)
- && NATNUMP (category_index)
- && (coding_mask & (1 << XFASTINT (category_index))))
+ if (detect_info.rejected == CATEGORY_MASK_ANY)
{
- val = Fcons (category_val, val);
- if (highest)
- break;
+ detect_info.found = CATEGORY_MASK_RAW_TEXT;
+ id = coding_categories[coding_category_raw_text].id;
+ val = Fcons (make_number (id), Qnil);
}
- }
- if (!highest)
- val = Fnreverse (val);
+ else if (! detect_info.rejected && ! detect_info.found)
+ {
+ detect_info.found = CATEGORY_MASK_ANY;
+ id = coding_categories[coding_category_undecided].id;
+ val = Fcons (make_number (id), Qnil);
+ }
+ else if (highest)
+ {
+ if (detect_info.found)
+ {
+ detect_info.found = 1 << category;
+ val = Fcons (make_number (this->id), Qnil);
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ if (! (detect_info.rejected & (1 << coding_priorities[i])))
+ {
+ detect_info.found = 1 << coding_priorities[i];
+ id = coding_categories[coding_priorities[i]].id;
+ val = Fcons (make_number (id), Qnil);
+ break;
+ }
+ }
+ else
+ {
+ int mask = detect_info.rejected | detect_info.found;
+ int found = 0;
+ val = Qnil;
- /* Then, replace the elements with subsidiary coding systems. */
- for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
+ for (i = coding_category_raw_text - 1; i >= 0; i--)
+ {
+ category = coding_priorities[i];
+ if (! (mask & (1 << category)))
+ {
+ found |= 1 << category;
+ id = coding_categories[category].id;
+ val = Fcons (make_number (id), val);
+ }
+ }
+ for (i = coding_category_raw_text - 1; i >= 0; i--)
+ {
+ category = coding_priorities[i];
+ if (detect_info.found & (1 << category))
+ {
+ id = coding_categories[category].id;
+ val = Fcons (make_number (id), val);
+ }
+ }
+ detect_info.found |= found;
+ }
+ }
+ else if (base_category == coding_category_utf_16_auto)
{
- if (eol_type != CODING_EOL_UNDECIDED
- && eol_type != CODING_EOL_INCONSISTENT)
+ if (detect_coding_utf_16 (&coding, &detect_info))
{
- Lisp_Object eol;
- eol = Fget (XCAR (tmp), Qeol_type);
- if (VECTORP (eol))
- XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
+ struct coding_system *this;
+
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ this = coding_categories + coding_category_utf_16_le;
+ else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+ this = coding_categories + coding_category_utf_16_be;
+ else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
+ this = coding_categories + coding_category_utf_16_be_nosig;
+ else
+ this = coding_categories + coding_category_utf_16_le_nosig;
+ val = Fcons (make_number (this->id), Qnil);
}
}
+ else
+ {
+ detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
+ val = Fcons (make_number (coding.id), Qnil);
+ }
+
+ /* Then, detect eol-format if necessary. */
+ {
+ int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
+ Lisp_Object tail;
+
+ if (VECTORP (eol_type))
+ {
+ if (detect_info.found & ~CATEGORY_MASK_UTF_16)
+ normal_eol = detect_eol (coding.source, src_bytes,
+ coding_category_raw_text);
+ if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG))
+ utf_16_be_eol = detect_eol (coding.source, src_bytes,
+ coding_category_utf_16_be);
+ if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_LE_NOSIG))
+ utf_16_le_eol = detect_eol (coding.source, src_bytes,
+ coding_category_utf_16_le);
+ }
+ else
+ {
+ if (EQ (eol_type, Qunix))
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
+ else if (EQ (eol_type, Qdos))
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
+ else
+ normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
+ }
+
+ for (tail = val; CONSP (tail); tail = XCDR (tail))
+ {
+ enum coding_category category;
+ int this_eol;
+
+ id = XINT (XCAR (tail));
+ attrs = CODING_ID_ATTRS (id);
+ category = XINT (CODING_ATTR_CATEGORY (attrs));
+ eol_type = CODING_ID_EOL_TYPE (id);
+ if (VECTORP (eol_type))
+ {
+ if (category == coding_category_utf_16_be
+ || category == coding_category_utf_16_be_nosig)
+ this_eol = utf_16_be_eol;
+ else if (category == coding_category_utf_16_le
+ || category == coding_category_utf_16_le_nosig)
+ this_eol = utf_16_le_eol;
+ else
+ this_eol = normal_eol;
+
+ if (this_eol == EOL_SEEN_LF)
+ XSETCAR (tail, AREF (eol_type, 0));
+ else if (this_eol == EOL_SEEN_CRLF)
+ XSETCAR (tail, AREF (eol_type, 1));
+ else if (this_eol == EOL_SEEN_CR)
+ XSETCAR (tail, AREF (eol_type, 2));
+ else
+ XSETCAR (tail, CODING_ID_NAME (id));
+ }
+ else
+ XSETCAR (tail, CODING_ID_NAME (id));
+ }
+ }
+
return (highest ? XCAR (val) : val);
}
+
DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 3, 0,
- doc: /* Detect how the byte sequence in the region is encoded.
-Return a list of possible coding systems used on decoding a byte
-sequence containing the bytes in the region between START and END when
-the coding system `undecided' is specified. The list is ordered by
-priority decided in the current language environment.
+ doc: /* Detect coding system of the text in the region between START and END.
+Return a list of possible coding systems ordered by priority.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
@@ -6680,7 +7465,6 @@ highest priority. */)
{
int from, to;
int from_byte, to_byte;
- int include_anchor_byte = 0;
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
@@ -6692,29 +7476,19 @@ highest priority. */)
if (from < GPT && to >= GPT)
move_gap_both (to, to_byte);
- /* If we an anchor byte `\0' follows the region, we include it in
- the detecting source. Then code detectors can handle the tailing
- byte sequence more accurately.
- Fix me: This is not a perfect solution. It is better that we
- add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
- */
- if (to == Z || (to == GPT && GAP_SIZE > 0))
- include_anchor_byte = 1;
return detect_coding_system (BYTE_POS_ADDR (from_byte),
- to_byte - from_byte + include_anchor_byte,
+ to - from, to_byte - from_byte,
!NILP (highest),
!NILP (current_buffer
- ->enable_multibyte_characters));
+ ->enable_multibyte_characters),
+ Qnil);
}
DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
1, 2, 0,
- doc: /* Detect how the byte sequence in STRING is encoded.
-Return a list of possible coding systems used on decoding a byte
-sequence containing the bytes in STRING when the coding system
-`undecided' is specified. The list is ordered by priority decided in
-the current language environment.
+ doc: /* Detect coding system of the text in STRING.
+Return a list of possible coding systems ordered by priority.
If only ASCII characters are found, it returns a list of single element
`undecided' or its subsidiary coding system according to a detected
@@ -6728,288 +7502,157 @@ highest priority. */)
CHECK_STRING (string);
return detect_coding_system (SDATA (string),
- /* "+ 1" is to include the anchor byte
- `\0'. With this, code detectors can
- handle the tailing bytes more
- accurately. */
- SBYTES (string) + 1,
- !NILP (highest),
- STRING_MULTIBYTE (string));
+ SCHARS (string), SBYTES (string),
+ !NILP (highest), STRING_MULTIBYTE (string),
+ Qnil);
}
-/* Subroutine for Ffind_coding_systems_region_internal.
-
- Return a list of coding systems that safely encode the multibyte
- text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
- possible coding systems. If it is nil, it means that we have not
- yet found any coding systems.
-
- WORK_TABLE a char-table of which element is set to t once the
- element is looked up.
- If a non-ASCII single byte char is found, set
- *single_byte_char_found to 1. */
-
-static Lisp_Object
-find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
- unsigned char *p, *pend;
- Lisp_Object safe_codings, work_table;
- int *single_byte_char_found;
+static INLINE int
+char_encodable_p (c, attrs)
+ int c;
+ Lisp_Object attrs;
{
- int c, len;
- Lisp_Object val, ch;
- Lisp_Object prev, tail;
+ Lisp_Object tail;
+ struct charset *charset;
+ Lisp_Object translation_table;
- if (NILP (safe_codings))
- goto done_safe_codings;
- while (p < pend)
+ translation_table = CODING_ATTR_TRANS_TBL (attrs);
+ if (! NILP (translation_table))
+ c = translate_char (translation_table, c);
+ for (tail = CODING_ATTR_CHARSET_LIST (attrs);
+ CONSP (tail); tail = XCDR (tail))
{
- c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
- p += len;
- if (ASCII_BYTE_P (c))
- /* We can ignore ASCII characters here. */
- continue;
- if (SINGLE_BYTE_CHAR_P (c))
- *single_byte_char_found = 1;
- /* Check the safe coding systems for C. */
- ch = make_number (c);
- val = Faref (work_table, ch);
- if (EQ (val, Qt))
- /* This element was already checked. Ignore it. */
- continue;
- /* Remember that we checked this element. */
- Faset (work_table, ch, Qt);
-
- for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
- {
- Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
- int encodable;
-
- elt = XCAR (tail);
- if (CONSP (XCDR (elt)))
- {
- /* This entry has this format now:
- ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
- ACCEPT-LATIN-EXTRA ) */
- val = XCDR (elt);
- encodable = ! NILP (Faref (XCAR (val), ch));
- if (! encodable)
- {
- val = XCDR (val);
- translation_table = XCAR (val);
- hash_table = XCAR (XCDR (val));
- accept_latin_extra = XCAR (XCDR (XCDR (val)));
- }
- }
- else
- {
- /* This entry has this format now: ( CODING . SAFE-CHARS) */
- encodable = ! NILP (Faref (XCDR (elt), ch));
- if (! encodable)
- {
- /* Transform the format to:
- ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
- ACCEPT-LATIN-EXTRA ) */
- val = Fget (XCAR (elt), Qcoding_system);
- translation_table
- = Fplist_get (AREF (val, 3),
- Qtranslation_table_for_encode);
- if (SYMBOLP (translation_table))
- translation_table = Fget (translation_table,
- Qtranslation_table);
- hash_table
- = (CHAR_TABLE_P (translation_table)
- ? XCHAR_TABLE (translation_table)->extras[1]
- : Qnil);
- accept_latin_extra
- = ((EQ (AREF (val, 0), make_number (2))
- && VECTORP (AREF (val, 4)))
- ? AREF (AREF (val, 4), 16)
- : Qnil);
- XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
- translation_table, hash_table,
- accept_latin_extra));
- }
- }
-
- if (! encodable
- && ((CHAR_TABLE_P (translation_table)
- && ! NILP (Faref (translation_table, ch)))
- || (HASH_TABLE_P (hash_table)
- && ! NILP (Fgethash (ch, hash_table, Qnil)))
- || (SINGLE_BYTE_CHAR_P (c)
- && ! NILP (accept_latin_extra)
- && VECTORP (Vlatin_extra_code_table)
- && ! NILP (AREF (Vlatin_extra_code_table, c)))))
- encodable = 1;
- if (encodable)
- prev = tail;
- else
- {
- /* Exclude this coding system from SAFE_CODINGS. */
- if (EQ (tail, safe_codings))
- {
- safe_codings = XCDR (safe_codings);
- if (NILP (safe_codings))
- goto done_safe_codings;
- }
- else
- XSETCDR (prev, XCDR (tail));
- }
- }
+ charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
+ if (CHAR_CHARSET_P (c, charset))
+ break;
}
-
- done_safe_codings:
- /* If the above loop was terminated before P reaches PEND, it means
- SAFE_CODINGS was set to nil. If we have not yet found an
- non-ASCII single-byte char, check it now. */
- if (! *single_byte_char_found)
- while (p < pend)
- {
- c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
- p += len;
- if (! ASCII_BYTE_P (c)
- && SINGLE_BYTE_CHAR_P (c))
- {
- *single_byte_char_found = 1;
- break;
- }
- }
- return safe_codings;
+ return (! NILP (tail));
}
+
+/* Return a list of coding systems that safely encode the text between
+ START and END. If EXCLUDE is non-nil, it is a list of coding
+ systems not to check. The returned list doesn't contain any such
+ coding systems. In any case, if the text contains only ASCII or is
+ unibyte, return t. */
+
DEFUN ("find-coding-systems-region-internal",
Ffind_coding_systems_region_internal,
- Sfind_coding_systems_region_internal, 2, 2, 0,
+ Sfind_coding_systems_region_internal, 2, 3, 0,
doc: /* Internal use only. */)
- (start, end)
- Lisp_Object start, end;
+ (start, end, exclude)
+ Lisp_Object start, end, exclude;
{
- Lisp_Object work_table, safe_codings;
- int non_ascii_p = 0;
- int single_byte_char_found = 0;
- const unsigned char *p1, *p1end, *p2, *p2end, *p;
+ Lisp_Object coding_attrs_list, safe_codings;
+ EMACS_INT start_byte, end_byte;
+ const unsigned char *p, *pbeg, *pend;
+ int c;
+ Lisp_Object tail, elt;
if (STRINGP (start))
{
- if (!STRING_MULTIBYTE (start))
+ if (!STRING_MULTIBYTE (start)
+ || SCHARS (start) == SBYTES (start))
return Qt;
- p1 = SDATA (start), p1end = p1 + SBYTES (start);
- p2 = p2end = p1end;
- if (SCHARS (start) != SBYTES (start))
- non_ascii_p = 1;
+ start_byte = 0;
+ end_byte = SBYTES (start);
}
else
{
- int from, to, stop;
-
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
args_out_of_range (start, end);
if (NILP (current_buffer->enable_multibyte_characters))
return Qt;
- from = CHAR_TO_BYTE (XINT (start));
- to = CHAR_TO_BYTE (XINT (end));
- stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
- p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
- if (stop == to)
- p2 = p2end = p1end;
- else
- p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
- if (XINT (end) - XINT (start) != to - from)
- non_ascii_p = 1;
- }
+ start_byte = CHAR_TO_BYTE (XINT (start));
+ end_byte = CHAR_TO_BYTE (XINT (end));
+ if (XINT (end) - XINT (start) == end_byte - start_byte)
+ return Qt;
- if (!non_ascii_p)
- {
- /* We are sure that the text contains no multibyte character.
- Check if it contains eight-bit-graphic. */
- p = p1;
- for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
- if (p == p1end)
+ if (XINT (start) < GPT && XINT (end) > GPT)
{
- for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
- if (p == p2end)
- return Qt;
+ if ((GPT - XINT (start)) < (XINT (end) - GPT))
+ move_gap_both (XINT (start), start_byte);
+ else
+ move_gap_both (XINT (end), end_byte);
}
}
- /* The text contains non-ASCII characters. */
+ coding_attrs_list = Qnil;
+ for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
+ if (NILP (exclude)
+ || NILP (Fmemq (XCAR (tail), exclude)))
+ {
+ Lisp_Object attrs;
- work_table = Fmake_char_table (Qchar_coding_system, Qnil);
- safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+ attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
+ if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
+ && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+ {
+ ASET (attrs, coding_attr_trans_tbl,
+ get_translation_table (attrs, 1, NULL));
+ coding_attrs_list = Fcons (attrs, coding_attrs_list);
+ }
+ }
- safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
- &single_byte_char_found);
- if (p2 < p2end)
- safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
- &single_byte_char_found);
- if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
- safe_codings = Qt;
+ if (STRINGP (start))
+ p = pbeg = SDATA (start);
else
- {
- /* Turn safe_codings to a list of coding systems... */
- Lisp_Object val;
-
- if (single_byte_char_found)
- /* ... and append these for eight-bit chars. */
- val = Fcons (Qraw_text,
- Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
- else
- /* ... and append generic coding systems. */
- val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
-
- for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
- val = Fcons (XCAR (XCAR (safe_codings)), val);
- safe_codings = val;
- }
-
- return safe_codings;
-}
-
-
-/* Search from position POS for such characters that are unencodable
- accoding to SAFE_CHARS, and return a list of their positions. P
- points where in the memory the character at POS exists. Limit the
- search at PEND or when Nth unencodable characters are found.
+ p = pbeg = BYTE_POS_ADDR (start_byte);
+ pend = p + (end_byte - start_byte);
- If SAFE_CHARS is a char table, an element for an unencodable
- character is nil.
+ while (p < pend && ASCII_BYTE_P (*p)) p++;
+ while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
- If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
-
- Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
- eight-bit-graphic characters are unencodable. */
-
-static Lisp_Object
-unencodable_char_position (safe_chars, pos, p, pend, n)
- Lisp_Object safe_chars;
- int pos;
- unsigned char *p, *pend;
- int n;
-{
- Lisp_Object pos_list;
-
- pos_list = Qnil;
while (p < pend)
{
- int len;
- int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
-
- if (c >= 128
- && (CHAR_TABLE_P (safe_chars)
- ? NILP (CHAR_TABLE_REF (safe_chars, c))
- : (NILP (safe_chars) || c < 256)))
+ if (ASCII_BYTE_P (*p))
+ p++;
+ else
{
- pos_list = Fcons (make_number (pos), pos_list);
- if (--n <= 0)
- break;
+ c = STRING_CHAR_ADVANCE (p);
+
+ charset_map_loaded = 0;
+ for (tail = coding_attrs_list; CONSP (tail);)
+ {
+ elt = XCAR (tail);
+ if (NILP (elt))
+ tail = XCDR (tail);
+ else if (char_encodable_p (c, elt))
+ tail = XCDR (tail);
+ else if (CONSP (XCDR (tail)))
+ {
+ XSETCAR (tail, XCAR (XCDR (tail)));
+ XSETCDR (tail, XCDR (XCDR (tail)));
+ }
+ else
+ {
+ XSETCAR (tail, Qnil);
+ tail = XCDR (tail);
+ }
+ }
+ if (charset_map_loaded)
+ {
+ EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
+
+ if (STRINGP (start))
+ pbeg = SDATA (start);
+ else
+ pbeg = BYTE_POS_ADDR (start_byte);
+ p = pbeg + p_offset;
+ pend = pbeg + pend_offset;
+ }
}
- pos++;
- p += len;
}
- return Fnreverse (pos_list);
+
+ safe_codings = list2 (Qraw_text, Qno_conversion);
+ for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
+ if (! NILP (XCAR (tail)))
+ safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
+
+ return safe_codings;
}
@@ -7031,24 +7674,36 @@ to the string. */)
Lisp_Object start, end, coding_system, count, string;
{
int n;
- Lisp_Object safe_chars;
struct coding_system coding;
+ Lisp_Object attrs, charset_list, translation_table;
Lisp_Object positions;
int from, to;
- unsigned char *p, *pend;
+ const unsigned char *p, *stop, *pend;
+ int ascii_compatible;
+
+ setup_coding_system (Fcheck_coding_system (coding_system), &coding);
+ attrs = CODING_ID_ATTRS (coding.id);
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ return Qnil;
+ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ translation_table = get_translation_table (attrs, 1, NULL);
if (NILP (string))
{
validate_region (&start, &end);
from = XINT (start);
to = XINT (end);
- if (NILP (current_buffer->enable_multibyte_characters))
+ if (NILP (current_buffer->enable_multibyte_characters)
+ || (ascii_compatible
+ && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
return Qnil;
p = CHAR_POS_ADDR (from);
- if (to == GPT)
- pend = GPT_ADDR;
+ pend = CHAR_POS_ADDR (to);
+ if (from < GPT && to >= GPT)
+ stop = GPT_ADDR;
else
- pend = CHAR_POS_ADDR (to);
+ stop = pend;
}
else
{
@@ -7063,11 +7718,11 @@ to the string. */)
if (! STRING_MULTIBYTE (string))
return Qnil;
p = SDATA (string) + string_char_to_byte (string, from);
- pend = SDATA (string) + string_char_to_byte (string, to);
+ stop = pend = SDATA (string) + string_char_to_byte (string, to);
+ if (ascii_compatible && (to - from) == (pend - p))
+ return Qnil;
}
- setup_coding_system (Fcheck_coding_system (coding_system), &coding);
-
if (NILP (count))
n = 1;
else
@@ -7076,151 +7731,294 @@ to the string. */)
n = XINT (count);
}
- if (coding.type == coding_type_no_conversion
- || coding.type == coding_type_raw_text)
- return Qnil;
+ positions = Qnil;
+ while (1)
+ {
+ int c;
- if (coding.type == coding_type_undecided)
- safe_chars = Qnil;
- else
- safe_chars = coding_safe_chars (coding_system);
+ if (ascii_compatible)
+ while (p < stop && ASCII_BYTE_P (*p))
+ p++, from++;
+ if (p >= stop)
+ {
+ if (p >= pend)
+ break;
+ stop = pend;
+ p = GAP_END_ADDR;
+ }
+
+ c = STRING_CHAR_ADVANCE (p);
+ if (! (ASCII_CHAR_P (c) && ascii_compatible)
+ && ! char_charset (translate_char (translation_table, c),
+ charset_list, NULL))
+ {
+ positions = Fcons (make_number (from), positions);
+ n--;
+ if (n == 0)
+ break;
+ }
+
+ from++;
+ }
+
+ return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
+}
+
+
+DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
+ Scheck_coding_systems_region, 3, 3, 0,
+ doc: /* Check if the region is encodable by coding systems.
+
+START and END are buffer positions specifying the region.
+CODING-SYSTEM-LIST is a list of coding systems to check.
+
+The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
+CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
+whole region, POS0, POS1, ... are buffer positions where non-encodable
+characters are found.
+
+If all coding systems in CODING-SYSTEM-LIST can encode the region, the
+value is nil.
+
+START may be a string. In that case, check if the string is
+encodable, and the value contains indices to the string instead of
+buffer positions. END is ignored. */)
+ (start, end, coding_system_list)
+ Lisp_Object start, end, coding_system_list;
+{
+ Lisp_Object list;
+ EMACS_INT start_byte, end_byte;
+ int pos;
+ const unsigned char *p, *pbeg, *pend;
+ int c;
+ Lisp_Object tail, elt, attrs;
- if (STRINGP (string)
- || from >= GPT || to <= GPT)
- positions = unencodable_char_position (safe_chars, from, p, pend, n);
+ if (STRINGP (start))
+ {
+ if (!STRING_MULTIBYTE (start)
+ && SCHARS (start) != SBYTES (start))
+ return Qnil;
+ start_byte = 0;
+ end_byte = SBYTES (start);
+ pos = 0;
+ }
else
{
- Lisp_Object args[2];
+ CHECK_NUMBER_COERCE_MARKER (start);
+ CHECK_NUMBER_COERCE_MARKER (end);
+ if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
+ args_out_of_range (start, end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qnil;
+ start_byte = CHAR_TO_BYTE (XINT (start));
+ end_byte = CHAR_TO_BYTE (XINT (end));
+ if (XINT (end) - XINT (start) == end_byte - start_byte)
+ return Qt;
+
+ if (XINT (start) < GPT && XINT (end) > GPT)
+ {
+ if ((GPT - XINT (start)) < (XINT (end) - GPT))
+ move_gap_both (XINT (start), start_byte);
+ else
+ move_gap_both (XINT (end), end_byte);
+ }
+ pos = XINT (start);
+ }
+
+ list = Qnil;
+ for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
+ ASET (attrs, coding_attr_trans_tbl,
+ get_translation_table (attrs, 1, NULL));
+ list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+ }
+
+ if (STRINGP (start))
+ p = pbeg = SDATA (start);
+ else
+ p = pbeg = BYTE_POS_ADDR (start_byte);
+ pend = p + (end_byte - start_byte);
- args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
- n -= XINT (Flength (args[0]));
- if (n <= 0)
- positions = args[0];
+ while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
+ while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+
+ while (p < pend)
+ {
+ if (ASCII_BYTE_P (*p))
+ p++;
else
{
- args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
- pend, n);
- positions = Fappend (2, args);
+ c = STRING_CHAR_ADVANCE (p);
+
+ charset_map_loaded = 0;
+ for (tail = list; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCDR (XCAR (tail));
+ if (! char_encodable_p (c, XCAR (elt)))
+ XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
+ }
+ if (charset_map_loaded)
+ {
+ EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
+
+ if (STRINGP (start))
+ pbeg = SDATA (start);
+ else
+ pbeg = BYTE_POS_ADDR (start_byte);
+ p = pbeg + p_offset;
+ pend = pbeg + pend_offset;
+ }
}
+ pos++;
+ }
+
+ tail = list;
+ list = Qnil;
+ for (; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (CONSP (XCDR (XCDR (elt))))
+ list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
+ list);
}
- return (NILP (count) ? Fcar (positions) : positions);
+ return list;
}
Lisp_Object
-code_convert_region1 (start, end, coding_system, encodep)
- Lisp_Object start, end, coding_system;
- int encodep;
+code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
+ Lisp_Object start, end, coding_system, dst_object;
+ int encodep, norecord;
{
struct coding_system coding;
- int from, to;
+ EMACS_INT from, from_byte, to, to_byte;
+ Lisp_Object src_object;
CHECK_NUMBER_COERCE_MARKER (start);
CHECK_NUMBER_COERCE_MARKER (end);
- CHECK_SYMBOL (coding_system);
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
+ src_object = Fcurrent_buffer ();
+ if (NILP (dst_object))
+ dst_object = src_object;
+ else if (! EQ (dst_object, Qt))
+ CHECK_BUFFER (dst_object);
validate_region (&start, &end);
from = XFASTINT (start);
+ from_byte = CHAR_TO_BYTE (from);
to = XFASTINT (end);
+ to_byte = CHAR_TO_BYTE (to);
- if (NILP (coding_system))
- return make_number (to - from);
-
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
-
+ setup_coding_system (coding_system, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
- coding.src_multibyte = coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
- &coding, encodep, 1);
- Vlast_coding_system_used = coding.symbol;
- return make_number (coding.produced_char);
+
+ if (encodep)
+ encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
+ dst_object);
+ else
+ decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
+ dst_object);
+ if (! norecord)
+ Vlast_coding_system_used = CODING_ID_NAME (coding.id);
+
+ return (BUFFERP (dst_object)
+ ? make_number (coding.produced_char)
+ : coding.dst_object);
}
+
DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
- 3, 3, "r\nzCoding system: ",
+ 3, 4, "r\nzCoding system: ",
doc: /* Decode the current region from the specified coding system.
-When called from a program, takes three arguments:
-START, END, and CODING-SYSTEM. START and END are buffer positions.
+When called from a program, takes four arguments:
+ START, END, CODING-SYSTEM, and DESTINATION.
+START and END are buffer positions.
+
+Optional 4th arguments DESTINATION specifies where the decoded text goes.
+If nil, the region between START and END is replace by the decoded text.
+If buffer, the decoded text is inserted in the buffer.
+If t, the decoded text is returned.
+
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
not fully specified.)
It returns the length of the decoded text. */)
- (start, end, coding_system)
- Lisp_Object start, end, coding_system;
+ (start, end, coding_system, destination)
+ Lisp_Object start, end, coding_system, destination;
{
- return code_convert_region1 (start, end, coding_system, 0);
+ return code_convert_region (start, end, coding_system, destination, 0, 0);
}
DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
- 3, 3, "r\nzCoding system: ",
- doc: /* Encode the current region into the specified coding system.
+ 3, 4, "r\nzCoding system: ",
+ doc: /* Encode the current region by specified coding system.
When called from a program, takes three arguments:
START, END, and CODING-SYSTEM. START and END are buffer positions.
+
+Optional 4th arguments DESTINATION specifies where the encoded text goes.
+If nil, the region between START and END is replace by the encoded text.
+If buffer, the encoded text is inserted in the buffer.
+If t, the encoded text is returned.
+
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
not fully specified.)
It returns the length of the encoded text. */)
- (start, end, coding_system)
- Lisp_Object start, end, coding_system;
+ (start, end, coding_system, destination)
+ Lisp_Object start, end, coding_system, destination;
{
- return code_convert_region1 (start, end, coding_system, 1);
+ return code_convert_region (start, end, coding_system, destination, 1, 0);
}
Lisp_Object
-code_convert_string1 (string, coding_system, nocopy, encodep)
- Lisp_Object string, coding_system, nocopy;
- int encodep;
+code_convert_string (string, coding_system, dst_object,
+ encodep, nocopy, norecord)
+ Lisp_Object string, coding_system, dst_object;
+ int encodep, nocopy, norecord;
{
struct coding_system coding;
+ EMACS_INT chars, bytes;
CHECK_STRING (string);
- CHECK_SYMBOL (coding_system);
-
if (NILP (coding_system))
- return (NILP (nocopy) ? Fcopy_sequence (string) : string);
+ {
+ if (! norecord)
+ Vlast_coding_system_used = Qno_conversion;
+ if (NILP (dst_object))
+ return (nocopy ? Fcopy_sequence (string) : string);
+ }
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
+ if (NILP (dst_object))
+ dst_object = Qt;
+ else if (! EQ (dst_object, Qt))
+ CHECK_BUFFER (dst_object);
+ setup_coding_system (coding_system, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
- string = (encodep
- ? encode_coding_string (string, &coding, !NILP (nocopy))
- : decode_coding_string (string, &coding, !NILP (nocopy)));
- Vlast_coding_system_used = coding.symbol;
-
- return string;
-}
+ chars = SCHARS (string);
+ bytes = SBYTES (string);
+ if (encodep)
+ encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
+ else
+ decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
+ if (! norecord)
+ Vlast_coding_system_used = CODING_ID_NAME (coding.id);
-DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
- 2, 3, 0,
- doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
-Optional arg NOCOPY non-nil means it is OK to return STRING itself
-if the decoding operation is trivial.
-This function sets `last-coding-system-used' to the precise coding system
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
- (string, coding_system, nocopy)
- Lisp_Object string, coding_system, nocopy;
-{
- return code_convert_string1 (string, coding_system, nocopy, 0);
+ return (BUFFERP (dst_object)
+ ? make_number (coding.produced_char)
+ : coding.dst_object);
}
-DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
- 2, 3, 0,
- doc: /* Encode STRING to CODING-SYSTEM, and return the result.
-Optional arg NOCOPY non-nil means it is OK to return STRING itself
-if the encoding operation is trivial.
-This function sets `last-coding-system-used' to the precise coding system
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
- (string, coding_system, nocopy)
- Lisp_Object string, coding_system, nocopy;
-{
- return code_convert_string1 (string, coding_system, nocopy, 1);
-}
/* Encode or decode STRING according to CODING_SYSTEM.
Do not set Vlast_coding_system_used.
@@ -7233,23 +8031,52 @@ code_convert_string_norecord (string, coding_system, encodep)
Lisp_Object string, coding_system;
int encodep;
{
- struct coding_system coding;
+ return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
+}
- CHECK_STRING (string);
- CHECK_SYMBOL (coding_system);
- if (NILP (coding_system))
- return string;
+DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
+ 2, 4, 0,
+ doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
+
+Optional third arg NOCOPY non-nil means it is OK to return STRING itself
+if the decoding operation is trivial.
- if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
- error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
+Optional fourth arg BUFFER non-nil meant that the decoded text is
+inserted in BUFFER instead of returned as a string. In this case,
+the return value is BUFFER.
- coding.composing = COMPOSITION_DISABLED;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- return (encodep
- ? encode_coding_string (string, &coding, 1)
- : decode_coding_string (string, &coding, 1));
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified. */)
+ (string, coding_system, nocopy, buffer)
+ Lisp_Object string, coding_system, nocopy, buffer;
+{
+ return code_convert_string (string, coding_system, buffer,
+ 0, ! NILP (nocopy), 0);
+}
+
+DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
+ 2, 4, 0,
+ doc: /* Encode STRING to CODING-SYSTEM, and return the result.
+
+Optional third arg NOCOPY non-nil means it is OK to return STRING
+itself if the encoding operation is trivial.
+
+Optional fourth arg BUFFER non-nil meant that the encoded text is
+inserted in BUFFER instead of returned as a string. In this case,
+the return value is BUFFER.
+
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.) */)
+ (string, coding_system, nocopy, buffer)
+ Lisp_Object string, coding_system, nocopy, buffer;
+{
+ return code_convert_string (string, coding_system, buffer,
+ 1, ! NILP (nocopy), 1);
}
+
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
@@ -7257,60 +8084,75 @@ Return the corresponding character. */)
(code)
Lisp_Object code;
{
- unsigned char c1, c2, s1, s2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, val;
+ struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
+ int c;
+
+ CHECK_NATNUM (code);
+ c = XFASTINT (code);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_BYTE_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return code;
- CHECK_NUMBER (code);
- s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
- if (s1 == 0)
+ val = CODING_ATTR_CHARSET_LIST (attrs);
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
+
+ if (c <= 0x7F)
+ charset = charset_roman;
+ else if (c >= 0xA0 && c < 0xDF)
{
- if (s2 < 0x80)
- XSETFASTINT (val, s2);
- else if (s2 >= 0xA0 || s2 <= 0xDF)
- XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
- else
- error ("Invalid Shift JIS code: %x", XFASTINT (code));
+ charset = charset_kana;
+ c -= 0x80;
}
else
{
- if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
- || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
- error ("Invalid Shift JIS code: %x", XFASTINT (code));
- DECODE_SJIS (s1, s2, c1, c2);
- XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
+ int s1 = c >> 8, s2 = c & 0xFF;
+
+ if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
+ || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
+ error ("Invalid code: %d", code);
+ SJIS_TO_JIS (c);
+ charset = charset_kanji;
}
- return val;
+ c = DECODE_CHAR (charset, c);
+ if (c < 0)
+ error ("Invalid code: %d", code);
+ return make_number (c);
}
+
DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
doc: /* Encode a Japanese character CHAR to shift_jis encoding.
Return the corresponding code in SJIS. */)
(ch)
- Lisp_Object ch;
+ Lisp_Object ch;
{
- int charset, c1, c2, s1, s2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, charset_list;
+ int c;
+ struct charset *charset;
+ unsigned code;
- CHECK_NUMBER (ch);
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- if (charset == CHARSET_ASCII)
- {
- val = ch;
- }
- else if (charset == charset_jisx0208
- && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
- {
- ENCODE_SJIS (c1, c2, s1, s2);
- XSETFASTINT (val, (s1 << 8) | s2);
- }
- else if (charset == charset_katakana_jisx0201
- && c1 > 0x20 && c2 < 0xE0)
- {
- XSETFASTINT (val, c1 | 0x80);
- }
- else
- error ("Can't encode to shift_jis: %d", XFASTINT (ch));
- return val;
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_CHAR_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return ch;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ charset = char_charset (c, charset_list, &code);
+ if (code == CHARSET_INVALID_CODE (charset))
+ error ("Can't encode by shift_jis encoding: %d", c);
+ JIS_TO_SJIS (code);
+
+ return make_number (code);
}
DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
@@ -7319,27 +8161,37 @@ Return the corresponding character. */)
(code)
Lisp_Object code;
{
- int charset;
- unsigned char b1, b2, c1, c2;
- Lisp_Object val;
+ Lisp_Object spec, attrs, val;
+ struct charset *charset_roman, *charset_big5, *charset;
+ int c;
- CHECK_NUMBER (code);
- b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
- if (b1 == 0)
- {
- if (b2 >= 0x80)
- error ("Invalid BIG5 code: %x", XFASTINT (code));
- val = code;
- }
+ CHECK_NATNUM (code);
+ c = XFASTINT (code);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
+ attrs = AREF (spec, 0);
+
+ if (ASCII_BYTE_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return code;
+
+ val = CODING_ATTR_CHARSET_LIST (attrs);
+ charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
+
+ if (c <= 0x7F)
+ charset = charset_roman;
else
{
- if ((b1 < 0xA1 || b1 > 0xFE)
- || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
- error ("Invalid BIG5 code: %x", XFASTINT (code));
- DECODE_BIG5 (b1, b2, charset, c1, c2);
- XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
+ int b1 = c >> 8, b2 = c & 0x7F;
+ if (b1 < 0xA1 || b1 > 0xFE
+ || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
+ error ("Invalid code: %d", code);
+ charset = charset_big5;
}
- return val;
+ c = DECODE_CHAR (charset, (unsigned )c);
+ if (c < 0)
+ error ("Invalid code: %d", code);
+ return make_number (c);
}
DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
@@ -7348,48 +8200,50 @@ Return the corresponding character code in Big5. */)
(ch)
Lisp_Object ch;
{
- int charset, c1, c2, b1, b2;
- Lisp_Object val;
-
- CHECK_NUMBER (ch);
- SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- if (charset == CHARSET_ASCII)
- {
- val = ch;
- }
- else if ((charset == charset_big5_1
- && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
- || (charset == charset_big5_2
- && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
- {
- ENCODE_BIG5 (charset, c1, c2, b1, b2);
- XSETFASTINT (val, (b1 << 8) | b2);
- }
- else
- error ("Can't encode to Big5: %d", XFASTINT (ch));
- return val;
+ Lisp_Object spec, attrs, charset_list;
+ struct charset *charset;
+ int c;
+ unsigned code;
+
+ CHECK_CHARACTER (ch);
+ c = XFASTINT (ch);
+ CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
+ attrs = AREF (spec, 0);
+ if (ASCII_CHAR_P (c)
+ && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
+ return ch;
+
+ charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ charset = char_charset (c, charset_list, &code);
+ if (code == CHARSET_INVALID_CODE (charset))
+ error ("Can't encode by Big5 encoding: %d", c);
+
+ return make_number (code);
}
+
-DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
+DEFUN ("set-terminal-coding-system-internal",
+ Fset_terminal_coding_system_internal,
Sset_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ &terminal_coding);
+
/* We had better not send unsafe characters to terminal. */
- terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
- /* Character composition should be disabled. */
- terminal_coding.composing = COMPOSITION_DISABLED;
- /* Error notification should be suppressed. */
- terminal_coding.suppress_error = 1;
+ terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
+ /* Characer composition should be disabled. */
+ terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
terminal_coding.src_multibyte = 1;
terminal_coding.dst_multibyte = 0;
return Qnil;
}
-DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
+DEFUN ("set-safe-terminal-coding-system-internal",
+ Fset_safe_terminal_coding_system_internal,
Sset_safe_terminal_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
@@ -7398,42 +8252,46 @@ DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_sys
CHECK_SYMBOL (coding_system);
setup_coding_system (Fcheck_coding_system (coding_system),
&safe_terminal_coding);
- /* Character composition should be disabled. */
- safe_terminal_coding.composing = COMPOSITION_DISABLED;
- /* Error notification should be suppressed. */
- safe_terminal_coding.suppress_error = 1;
+ /* Characer composition should be disabled. */
+ safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
safe_terminal_coding.src_multibyte = 1;
safe_terminal_coding.dst_multibyte = 0;
return Qnil;
}
-DEFUN ("terminal-coding-system", Fterminal_coding_system,
- Sterminal_coding_system, 0, 0, 0,
+DEFUN ("terminal-coding-system",
+ Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
doc: /* Return coding system specified for terminal output. */)
()
{
- return terminal_coding.symbol;
+ Lisp_Object coding_system;
+
+ coding_system = CODING_ID_NAME (terminal_coding.id);
+ /* For backward compatibility, return nil if it is `undecided'. */
+ return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
}
-DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
+DEFUN ("set-keyboard-coding-system-internal",
+ Fset_keyboard_coding_system_internal,
Sset_keyboard_coding_system_internal, 1, 1, 0,
doc: /* Internal use only. */)
(coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system);
- setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
- /* Character composition should be disabled. */
- keyboard_coding.composing = COMPOSITION_DISABLED;
+ setup_coding_system (Fcheck_coding_system (coding_system),
+ &keyboard_coding);
+ /* Characer composition should be disabled. */
+ keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
return Qnil;
}
-DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
- Skeyboard_coding_system, 0, 0, 0,
+DEFUN ("keyboard-coding-system",
+ Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
doc: /* Return coding system specified for decoding keyboard input. */)
()
{
- return keyboard_coding.symbol;
+ return CODING_ID_NAME (keyboard_coding.id);
}
@@ -7488,23 +8346,16 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
operation = args[0];
if (!SYMBOLP (operation)
|| !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
- error ("Invalid first argument");
+ error ("Invalid first arguement");
if (nargs < 1 + XINT (target_idx))
error ("Too few arguments for operation: %s",
SDATA (SYMBOL_NAME (operation)));
- /* For write-region, if the 6th argument (i.e. VISIT, the 5th
- argument to write-region) is string, it must be treated as a
- target file name. */
- if (EQ (operation, Qwrite_region)
- && nargs > 5
- && STRINGP (args[5]))
- target_idx = make_number (4);
target = args[XINT (target_idx) + 1];
if (!(STRINGP (target)
|| (EQ (operation, Qinsert_file_contents) && CONSP (target)
&& STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
|| (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
- error ("Invalid argument %d", XINT (target_idx) + 1);
+ error ("Invalid %dth argument", XINT (target_idx) + 1);
if (CONSP (target))
target = XCAR (target);
@@ -7520,8 +8371,8 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
for (; CONSP (chain); chain = XCDR (chain))
{
Lisp_Object elt;
- elt = XCAR (chain);
+ elt = XCAR (chain);
if (CONSP (elt)
&& ((STRINGP (target)
&& STRINGP (XCAR (elt))
@@ -7551,103 +8402,801 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
return Qnil;
}
-DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
- Supdate_coding_systems_internal, 0, 0, 0,
- doc: /* Update internal database for ISO2022 and CCL based coding systems.
-When values of any coding categories are changed, you must
-call this function. */)
- ()
+DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
+ Sset_coding_system_priority, 0, MANY, 0,
+ doc: /* Assign higher priority to the coding systems given as arguments.
+If multiple coding systems belongs to the same category,
+all but the first one are ignored.
+
+usage: (set-coding-system-priority ...) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
{
- int i;
+ int i, j;
+ int changed[coding_category_max];
+ enum coding_category priorities[coding_category_max];
+
+ bzero (changed, sizeof changed);
- for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
+ for (i = j = 0; i < nargs; i++)
{
- Lisp_Object val;
+ enum coding_category category;
+ Lisp_Object spec, attrs;
- val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
- if (!NILP (val))
- {
- if (! coding_system_table[i])
- coding_system_table[i] = ((struct coding_system *)
- xmalloc (sizeof (struct coding_system)));
- setup_coding_system (val, coding_system_table[i]);
- }
- else if (coding_system_table[i])
- {
- xfree (coding_system_table[i]);
- coding_system_table[i] = NULL;
- }
+ CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
+ attrs = AREF (spec, 0);
+ category = XINT (CODING_ATTR_CATEGORY (attrs));
+ if (changed[category])
+ /* Ignore this coding system because a coding system of the
+ same category already had a higher priority. */
+ continue;
+ changed[category] = 1;
+ priorities[j++] = category;
+ if (coding_categories[category].id >= 0
+ && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
+ setup_coding_system (args[i], &coding_categories[category]);
+ Fset (AREF (Vcoding_category_table, category), args[i]);
+ }
+
+ /* Now we have decided top J priorities. Reflect the order of the
+ original priorities to the remaining priorities. */
+
+ for (i = j, j = 0; i < coding_category_max; i++, j++)
+ {
+ while (j < coding_category_max
+ && changed[coding_priorities[j]])
+ j++;
+ if (j == coding_category_max)
+ abort ();
+ priorities[i] = coding_priorities[j];
}
+ bcopy (priorities, coding_priorities, sizeof priorities);
+
+ /* Update `coding-category-list'. */
+ Vcoding_category_list = Qnil;
+ for (i = coding_category_max - 1; i >= 0; i--)
+ Vcoding_category_list
+ = Fcons (AREF (Vcoding_category_table, priorities[i]),
+ Vcoding_category_list);
+
return Qnil;
}
-DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
- Sset_coding_priority_internal, 0, 0, 0,
- doc: /* Update internal database for the current value of `coding-category-list'.
-This function is internal use only. */)
- ()
+DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
+ Scoding_system_priority_list, 0, 1, 0,
+ doc: /* Return a list of coding systems ordered by their priorities.
+HIGHESTP non-nil means just return the highest priority one. */)
+ (highestp)
+ Lisp_Object highestp;
{
- int i = 0, idx;
+ int i;
Lisp_Object val;
- val = Vcoding_category_list;
-
- while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
+ for (i = 0, val = Qnil; i < coding_category_max; i++)
{
- if (! SYMBOLP (XCAR (val)))
- break;
- idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
- if (idx >= CODING_CATEGORY_IDX_MAX)
- break;
- coding_priorities[i++] = (1 << idx);
- val = XCDR (val);
+ enum coding_category category = coding_priorities[i];
+ int id = coding_categories[category].id;
+ Lisp_Object attrs;
+
+ if (id < 0)
+ continue;
+ attrs = CODING_ID_ATTRS (id);
+ if (! NILP (highestp))
+ return CODING_ATTR_BASE_NAME (attrs);
+ val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
}
- /* If coding-category-list is valid and contains all coding
- categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
- the following code saves Emacs from crashing. */
- while (i < CODING_CATEGORY_IDX_MAX)
- coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+ return Fnreverse (val);
+}
- return Qnil;
+static char *suffixes[] = { "-unix", "-dos", "-mac" };
+
+static Lisp_Object
+make_subsidiaries (base)
+ Lisp_Object base;
+{
+ Lisp_Object subsidiaries;
+ int base_name_len = SBYTES (SYMBOL_NAME (base));
+ char *buf = (char *) alloca (base_name_len + 6);
+ int i;
+
+ bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
+ subsidiaries = Fmake_vector (make_number (3), Qnil);
+ for (i = 0; i < 3; i++)
+ {
+ bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
+ ASET (subsidiaries, i, intern (buf));
+ }
+ return subsidiaries;
}
+
DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
- Sdefine_coding_system_internal, 1, 1, 0,
- doc: /* Register CODING-SYSTEM as a base coding system.
-This function is internal use only. */)
- (coding_system)
- Lisp_Object coding_system;
+ Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
+ doc: /* For internal use only.
+usage: (define-coding-system-internal ...) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
{
- Lisp_Object safe_chars, slot;
+ Lisp_Object name;
+ Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
+ Lisp_Object attrs; /* Vector of attributes. */
+ Lisp_Object eol_type;
+ Lisp_Object aliases;
+ Lisp_Object coding_type, charset_list, safe_charsets;
+ enum coding_category category;
+ Lisp_Object tail, val;
+ int max_charset_id = 0;
+ int i;
+
+ if (nargs < coding_arg_max)
+ goto short_args;
+
+ attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
+
+ name = args[coding_arg_name];
+ CHECK_SYMBOL (name);
+ CODING_ATTR_BASE_NAME (attrs) = name;
+
+ val = args[coding_arg_mnemonic];
+ if (! STRINGP (val))
+ CHECK_CHARACTER (val);
+ CODING_ATTR_MNEMONIC (attrs) = val;
+
+ coding_type = args[coding_arg_coding_type];
+ CHECK_SYMBOL (coding_type);
+ CODING_ATTR_TYPE (attrs) = coding_type;
+
+ charset_list = args[coding_arg_charset_list];
+ if (SYMBOLP (charset_list))
+ {
+ if (EQ (charset_list, Qiso_2022))
+ {
+ if (! EQ (coding_type, Qiso_2022))
+ error ("Invalid charset-list");
+ charset_list = Viso_2022_charset_list;
+ }
+ else if (EQ (charset_list, Qemacs_mule))
+ {
+ if (! EQ (coding_type, Qemacs_mule))
+ error ("Invalid charset-list");
+ charset_list = Vemacs_mule_charset_list;
+ }
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ if (max_charset_id < XFASTINT (XCAR (tail)))
+ max_charset_id = XFASTINT (XCAR (tail));
+ }
+ else
+ {
+ charset_list = Fcopy_sequence (charset_list);
+ for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
+ {
+ struct charset *charset;
+
+ val = Fcar (tail);
+ CHECK_CHARSET_GET_CHARSET (val, charset);
+ if (EQ (coding_type, Qiso_2022)
+ ? CHARSET_ISO_FINAL (charset) < 0
+ : EQ (coding_type, Qemacs_mule)
+ ? CHARSET_EMACS_MULE_ID (charset) < 0
+ : 0)
+ error ("Can't handle charset `%s'",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ XSETCAR (tail, make_number (charset->id));
+ if (max_charset_id < charset->id)
+ max_charset_id = charset->id;
+ }
+ }
+ CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
+
+ safe_charsets = Fmake_string (make_number (max_charset_id + 1),
+ make_number (255));
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
+ CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
+
+ CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
+
+ val = args[coding_arg_decode_translation_table];
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_DECODE_TBL (attrs) = val;
+
+ val = args[coding_arg_encode_translation_table];
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_ENCODE_TBL (attrs) = val;
+
+ val = args[coding_arg_post_read_conversion];
+ CHECK_SYMBOL (val);
+ CODING_ATTR_POST_READ (attrs) = val;
+
+ val = args[coding_arg_pre_write_conversion];
+ CHECK_SYMBOL (val);
+ CODING_ATTR_PRE_WRITE (attrs) = val;
+
+ val = args[coding_arg_default_char];
+ if (NILP (val))
+ CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
+ else
+ {
+ CHECK_CHARACTER (val);
+ CODING_ATTR_DEFAULT_CHAR (attrs) = val;
+ }
+
+ val = args[coding_arg_for_unibyte];
+ CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
+
+ val = args[coding_arg_plist];
+ CHECK_LIST (val);
+ CODING_ATTR_PLIST (attrs) = val;
+
+ if (EQ (coding_type, Qcharset))
+ {
+ /* Generate a lisp vector of 256 elements. Each element is nil,
+ integer, or a list of charset IDs.
+
+ If Nth element is nil, the byte code N is invalid in this
+ coding system.
+
+ If Nth element is a number NUM, N is the first byte of a
+ charset whose ID is NUM.
+
+ If Nth element is a list of charset IDs, N is the first byte
+ of one of them. The list is sorted by dimensions of the
+ charsets. A charset of smaller dimension comes firtst. */
+ val = Fmake_vector (make_number (256), Qnil);
+
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
+ int dim = CHARSET_DIMENSION (charset);
+ int idx = (dim - 1) * 4;
+
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ for (i = charset->code_space[idx];
+ i <= charset->code_space[idx + 1]; i++)
+ {
+ Lisp_Object tmp, tmp2;
+ int dim2;
+
+ tmp = AREF (val, i);
+ if (NILP (tmp))
+ tmp = XCAR (tail);
+ else if (NUMBERP (tmp))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
+ if (dim < dim2)
+ tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+ else
+ tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+ }
+ else
+ {
+ for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
+ {
+ dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
+ if (dim < dim2)
+ break;
+ }
+ if (NILP (tmp2))
+ tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+ else
+ {
+ XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
+ XSETCAR (tmp2, XCAR (tail));
+ }
+ }
+ ASET (val, i, tmp);
+ }
+ }
+ ASET (attrs, coding_attr_charset_valids, val);
+ category = coding_category_charset;
+ }
+ else if (EQ (coding_type, Qccl))
+ {
+ Lisp_Object valids;
+
+ if (nargs < coding_arg_ccl_max)
+ goto short_args;
+
+ val = args[coding_arg_ccl_decoder];
+ CHECK_CCL_PROGRAM (val);
+ if (VECTORP (val))
+ val = Fcopy_sequence (val);
+ ASET (attrs, coding_attr_ccl_decoder, val);
+
+ val = args[coding_arg_ccl_encoder];
+ CHECK_CCL_PROGRAM (val);
+ if (VECTORP (val))
+ val = Fcopy_sequence (val);
+ ASET (attrs, coding_attr_ccl_encoder, val);
+
+ val = args[coding_arg_ccl_valids];
+ valids = Fmake_string (make_number (256), make_number (0));
+ for (tail = val; !NILP (tail); tail = Fcdr (tail))
+ {
+ int from, to;
+
+ val = Fcar (tail);
+ if (INTEGERP (val))
+ {
+ from = to = XINT (val);
+ if (from < 0 || from > 255)
+ args_out_of_range_3 (val, make_number (0), make_number (255));
+ }
+ else
+ {
+ CHECK_CONS (val);
+ CHECK_NATNUM_CAR (val);
+ CHECK_NATNUM_CDR (val);
+ from = XINT (XCAR (val));
+ if (from > 255)
+ args_out_of_range_3 (XCAR (val),
+ make_number (0), make_number (255));
+ to = XINT (XCDR (val));
+ if (to < from || to > 255)
+ args_out_of_range_3 (XCDR (val),
+ XCAR (val), make_number (255));
+ }
+ for (i = from; i <= to; i++)
+ SSET (valids, i, 1);
+ }
+ ASET (attrs, coding_attr_ccl_valids, valids);
+
+ category = coding_category_ccl;
+ }
+ else if (EQ (coding_type, Qutf_16))
+ {
+ Lisp_Object bom, endian;
+
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
+
+ if (nargs < coding_arg_utf16_max)
+ goto short_args;
+
+ bom = args[coding_arg_utf16_bom];
+ if (! NILP (bom) && ! EQ (bom, Qt))
+ {
+ CHECK_CONS (bom);
+ val = XCAR (bom);
+ CHECK_CODING_SYSTEM (val);
+ val = XCDR (bom);
+ CHECK_CODING_SYSTEM (val);
+ }
+ ASET (attrs, coding_attr_utf_16_bom, bom);
+
+ endian = args[coding_arg_utf16_endian];
+ CHECK_SYMBOL (endian);
+ if (NILP (endian))
+ endian = Qbig;
+ else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
+ error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
+ ASET (attrs, coding_attr_utf_16_endian, endian);
+
+ category = (CONSP (bom)
+ ? coding_category_utf_16_auto
+ : NILP (bom)
+ ? (EQ (endian, Qbig)
+ ? coding_category_utf_16_be_nosig
+ : coding_category_utf_16_le_nosig)
+ : (EQ (endian, Qbig)
+ ? coding_category_utf_16_be
+ : coding_category_utf_16_le));
+ }
+ else if (EQ (coding_type, Qiso_2022))
+ {
+ Lisp_Object initial, reg_usage, request, flags;
+ int i;
+
+ if (nargs < coding_arg_iso2022_max)
+ goto short_args;
+
+ initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
+ CHECK_VECTOR (initial);
+ for (i = 0; i < 4; i++)
+ {
+ val = Faref (initial, make_number (i));
+ if (! NILP (val))
+ {
+ struct charset *charset;
+
+ CHECK_CHARSET_GET_CHARSET (val, charset);
+ ASET (initial, i, make_number (CHARSET_ID (charset)));
+ if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ }
+ else
+ ASET (initial, i, make_number (-1));
+ }
+
+ reg_usage = args[coding_arg_iso2022_reg_usage];
+ CHECK_CONS (reg_usage);
+ CHECK_NUMBER_CAR (reg_usage);
+ CHECK_NUMBER_CDR (reg_usage);
+
+ request = Fcopy_sequence (args[coding_arg_iso2022_request]);
+ for (tail = request; ! NILP (tail); tail = Fcdr (tail))
+ {
+ int id;
+ Lisp_Object tmp;
+
+ val = Fcar (tail);
+ CHECK_CONS (val);
+ tmp = XCAR (val);
+ CHECK_CHARSET_GET_ID (tmp, id);
+ CHECK_NATNUM_CDR (val);
+ if (XINT (XCDR (val)) >= 4)
+ error ("Invalid graphic register number: %d", XINT (XCDR (val)));
+ XSETCAR (val, make_number (id));
+ }
+
+ flags = args[coding_arg_iso2022_flags];
+ CHECK_NATNUM (flags);
+ i = XINT (flags);
+ if (EQ (args[coding_arg_charset_list], Qiso_2022))
+ flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
+
+ ASET (attrs, coding_attr_iso_initial, initial);
+ ASET (attrs, coding_attr_iso_usage, reg_usage);
+ ASET (attrs, coding_attr_iso_request, request);
+ ASET (attrs, coding_attr_iso_flags, flags);
+ setup_iso_safe_charsets (attrs);
+
+ if (i & CODING_ISO_FLAG_SEVEN_BITS)
+ category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
+ | CODING_ISO_FLAG_SINGLE_SHIFT))
+ ? coding_category_iso_7_else
+ : EQ (args[coding_arg_charset_list], Qiso_2022)
+ ? coding_category_iso_7
+ : coding_category_iso_7_tight);
+ else
+ {
+ int id = XINT (AREF (initial, 1));
+
+ category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
+ || EQ (args[coding_arg_charset_list], Qiso_2022)
+ || id < 0)
+ ? coding_category_iso_8_else
+ : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
+ ? coding_category_iso_8_1
+ : coding_category_iso_8_2);
+ }
+ if (category != coding_category_iso_8_1
+ && category != coding_category_iso_8_2)
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
+ }
+ else if (EQ (coding_type, Qemacs_mule))
+ {
+ if (EQ (args[coding_arg_charset_list], Qemacs_mule))
+ ASET (attrs, coding_attr_emacs_mule_full, Qt);
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ category = coding_category_emacs_mule;
+ }
+ else if (EQ (coding_type, Qshift_jis))
+ {
+
+ struct charset *charset;
+
+ if (XINT (Flength (charset_list)) != 3
+ && XINT (Flength (charset_list)) != 4)
+ error ("There should be three or four charsets");
+
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
- if (NILP (Fcheck_coding_system (coding_system)))
- xsignal1 (Qcoding_system_error, coding_system);
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
- safe_chars = coding_safe_chars (coding_system);
- if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
- error ("No valid safe-chars property for %s",
- SDATA (SYMBOL_NAME (coding_system)));
+ charset_list = XCDR (charset_list);
+ if (! NILP (charset_list))
+ {
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ }
+
+ category = coding_category_sjis;
+ Vsjis_coding_system = name;
+ }
+ else if (EQ (coding_type, Qbig5))
+ {
+ struct charset *charset;
+
+ if (XINT (Flength (charset_list)) != 2)
+ error ("There should be just two charsets");
- if (EQ (safe_chars, Qt))
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 1)
+ error ("Dimension of charset %s is not one",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ if (CHARSET_ASCII_COMPATIBLE_P (charset))
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+ charset_list = XCDR (charset_list);
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+
+ category = coding_category_big5;
+ Vbig5_coding_system = name;
+ }
+ else if (EQ (coding_type, Qraw_text))
{
- if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
- XSETCAR (Vcoding_system_safe_chars,
- Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+ category = coding_category_raw_text;
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
}
+ else if (EQ (coding_type, Qutf_8))
+ {
+ category = coding_category_utf_8;
+ CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+ }
+ else if (EQ (coding_type, Qundecided))
+ category = coding_category_undecided;
else
+ error ("Invalid coding system type: %s",
+ SDATA (SYMBOL_NAME (coding_type)));
+
+ CODING_ATTR_CATEGORY (attrs) = make_number (category);
+ CODING_ATTR_PLIST (attrs)
+ = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
+ CODING_ATTR_PLIST (attrs)));
+ CODING_ATTR_PLIST (attrs)
+ = Fcons (QCascii_compatible_p,
+ Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
+ CODING_ATTR_PLIST (attrs)));
+
+ eol_type = args[coding_arg_eol_type];
+ if (! NILP (eol_type)
+ && ! EQ (eol_type, Qunix)
+ && ! EQ (eol_type, Qdos)
+ && ! EQ (eol_type, Qmac))
+ error ("Invalid eol-type");
+
+ aliases = Fcons (name, Qnil);
+
+ if (NILP (eol_type))
+ {
+ eol_type = make_subsidiaries (name);
+ for (i = 0; i < 3; i++)
+ {
+ Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
+
+ this_name = AREF (eol_type, i);
+ this_aliases = Fcons (this_name, Qnil);
+ this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
+ this_spec = Fmake_vector (make_number (3), attrs);
+ ASET (this_spec, 1, this_aliases);
+ ASET (this_spec, 2, this_eol_type);
+ Fputhash (this_name, this_spec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
+ val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
+ if (NILP (val))
+ Vcoding_system_alist
+ = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
+ Vcoding_system_alist);
+ }
+ }
+
+ spec_vec = Fmake_vector (make_number (3), attrs);
+ ASET (spec_vec, 1, aliases);
+ ASET (spec_vec, 2, eol_type);
+
+ Fputhash (name, spec_vec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (name, Vcoding_system_list);
+ val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
+ if (NILP (val))
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
+ Vcoding_system_alist);
+
+ {
+ int id = coding_categories[category].id;
+
+ if (id < 0 || EQ (name, CODING_ID_NAME (id)))
+ setup_coding_system (name, &coding_categories[category]);
+ }
+
+ return Qnil;
+
+ short_args:
+ return Fsignal (Qwrong_number_of_arguments,
+ Fcons (intern ("define-coding-system-internal"),
+ make_number (nargs)));
+}
+
+
+DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
+ 3, 3, 0,
+ doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
+ (coding_system, prop, val)
+ Lisp_Object coding_system, prop, val;
+{
+ Lisp_Object spec, attrs;
+
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ if (EQ (prop, QCmnemonic))
+ {
+ if (! STRINGP (val))
+ CHECK_CHARACTER (val);
+ CODING_ATTR_MNEMONIC (attrs) = val;
+ }
+ else if (EQ (prop, QCdefalut_char))
{
- slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
- if (NILP (slot))
- XSETCDR (Vcoding_system_safe_chars,
- nconc2 (XCDR (Vcoding_system_safe_chars),
- Fcons (Fcons (coding_system, safe_chars), Qnil)));
+ if (NILP (val))
+ val = make_number (' ');
else
- XSETCDR (slot, safe_chars);
+ CHECK_CHARACTER (val);
+ CODING_ATTR_DEFAULT_CHAR (attrs) = val;
+ }
+ else if (EQ (prop, QCdecode_translation_table))
+ {
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_DECODE_TBL (attrs) = val;
+ }
+ else if (EQ (prop, QCencode_translation_table))
+ {
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_ENCODE_TBL (attrs) = val;
+ }
+ else if (EQ (prop, QCpost_read_conversion))
+ {
+ CHECK_SYMBOL (val);
+ CODING_ATTR_POST_READ (attrs) = val;
+ }
+ else if (EQ (prop, QCpre_write_conversion))
+ {
+ CHECK_SYMBOL (val);
+ CODING_ATTR_PRE_WRITE (attrs) = val;
+ }
+ else if (EQ (prop, QCascii_compatible_p))
+ {
+ CODING_ATTR_ASCII_COMPAT (attrs) = val;
+ }
+
+ CODING_ATTR_PLIST (attrs)
+ = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
+ return val;
+}
+
+
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
+ Sdefine_coding_system_alias, 2, 2, 0,
+ doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
+ (alias, coding_system)
+ Lisp_Object alias, coding_system;
+{
+ Lisp_Object spec, aliases, eol_type, val;
+
+ CHECK_SYMBOL (alias);
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ aliases = AREF (spec, 1);
+ /* ALISES should be a list of length more than zero, and the first
+ element is a base coding system. Append ALIAS at the tail of the
+ list. */
+ while (!NILP (XCDR (aliases)))
+ aliases = XCDR (aliases);
+ XSETCDR (aliases, Fcons (alias, Qnil));
+
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ {
+ Lisp_Object subsidiaries;
+ int i;
+
+ subsidiaries = make_subsidiaries (alias);
+ for (i = 0; i < 3; i++)
+ Fdefine_coding_system_alias (AREF (subsidiaries, i),
+ AREF (eol_type, i));
}
+
+ Fputhash (alias, spec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (alias, Vcoding_system_list);
+ val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
+ if (NILP (val))
+ Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
+ Vcoding_system_alist);
+
return Qnil;
}
+DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
+ 1, 1, 0,
+ doc: /* Return the base of CODING-SYSTEM.
+Any alias or subsidiary coding system is not a base coding system. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, attrs;
+
+ if (NILP (coding_system))
+ return (Qno_conversion);
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ return CODING_ATTR_BASE_NAME (attrs);
+}
+
+DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
+ 1, 1, 0,
+ doc: "Return the property list of CODING-SYSTEM.")
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, attrs;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ return CODING_ATTR_PLIST (attrs);
+}
+
+
+DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
+ 1, 1, 0,
+ doc: /* Return the list of aliases of CODING-SYSTEM. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ return AREF (spec, 1);
+}
+
+DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
+ Scoding_system_eol_type, 1, 1, 0,
+ doc: /* Return eol-type of CODING-SYSTEM.
+An eol-type is integer 0, 1, 2, or a vector of coding systems.
+
+Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
+and CR respectively.
+
+A vector value indicates that a format of end-of-line should be
+detected automatically. Nth element of the vector is the subsidiary
+coding system whose eol-type is N. */)
+ (coding_system)
+ Lisp_Object coding_system;
+{
+ Lisp_Object spec, eol_type;
+ int n;
+
+ if (NILP (coding_system))
+ coding_system = Qno_conversion;
+ if (! CODING_SYSTEM_P (coding_system))
+ return Qnil;
+ spec = CODING_SYSTEM_SPEC (coding_system);
+ eol_type = AREF (spec, 2);
+ if (VECTORP (eol_type))
+ return Fcopy_sequence (eol_type);
+ n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
+ return make_number (n);
+}
+
#endif /* emacs */
@@ -7658,20 +9207,11 @@ init_coding_once ()
{
int i;
- /* Emacs' internal format specific initialize routine. */
- for (i = 0; i <= 0x20; i++)
- emacs_code_class[i] = EMACS_control_code;
- emacs_code_class[0x0A] = EMACS_linefeed_code;
- emacs_code_class[0x0D] = EMACS_carriage_return_code;
- for (i = 0x21 ; i < 0x7F; i++)
- emacs_code_class[i] = EMACS_ascii_code;
- emacs_code_class[0x7F] = EMACS_control_code;
- for (i = 0x80; i < 0xFF; i++)
- emacs_code_class[i] = EMACS_invalid_code;
- emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
- emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
- emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
- emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
+ for (i = 0; i < coding_category_max; i++)
+ {
+ coding_categories[i].id = -1;
+ coding_priorities[i] = i;
+ }
/* ISO2022 specific initialize routine. */
for (i = 0; i < 0x20; i++)
@@ -7684,7 +9224,6 @@ init_coding_once ()
iso_code_class[i] = ISO_graphic_plane_1;
iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
- iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
iso_code_class[ISO_CODE_SO] = ISO_shift_out;
iso_code_class[ISO_CODE_SI] = ISO_shift_in;
iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
@@ -7693,24 +9232,14 @@ init_coding_once ()
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- setup_coding_system (Qnil, &keyboard_coding);
- setup_coding_system (Qnil, &terminal_coding);
- setup_coding_system (Qnil, &safe_terminal_coding);
- setup_coding_system (Qnil, &default_buffer_file_coding);
-
- bzero (coding_system_table, sizeof coding_system_table);
-
- bzero (ascii_skip_code, sizeof ascii_skip_code);
- for (i = 0; i < 128; i++)
- ascii_skip_code[i] = 1;
-
-#if defined (MSDOS) || defined (WINDOWSNT)
- system_eol_type = CODING_EOL_CRLF;
-#else
- system_eol_type = CODING_EOL_LF;
-#endif
-
- inhibit_pre_post_conversion = 0;
+ for (i = 0; i < 256; i++)
+ {
+ emacs_mule_bytes[i] = 1;
+ }
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
+ emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
}
#ifdef emacs
@@ -7718,14 +9247,31 @@ init_coding_once ()
void
syms_of_coding ()
{
+ staticpro (&Vcoding_system_hash_table);
+ {
+ Lisp_Object args[2];
+ args[0] = QCtest;
+ args[1] = Qeq;
+ Vcoding_system_hash_table = Fmake_hash_table (2, args);
+ }
+
+ staticpro (&Vsjis_coding_system);
+ Vsjis_coding_system = Qnil;
+
+ staticpro (&Vbig5_coding_system);
+ Vbig5_coding_system = Qnil;
+
+ staticpro (&Vcode_conversion_reused_workbuf);
+ Vcode_conversion_reused_workbuf = Qnil;
+
staticpro (&Vcode_conversion_workbuf_name);
Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
- Qtarget_idx = intern ("target-idx");
- staticpro (&Qtarget_idx);
+ reused_workbuf_in_use = 0;
- Qcoding_system_history = intern ("coding-system-history");
- staticpro (&Qcoding_system_history);
+ DEFSYM (Qcharset, "charset");
+ DEFSYM (Qtarget_idx, "target-idx");
+ DEFSYM (Qcoding_system_history, "coding-system-history");
Fset (Qcoding_system_history, Qnil);
/* Target FILENAME is the first argument. */
@@ -7733,123 +9279,131 @@ syms_of_coding ()
/* Target FILENAME is the third argument. */
Fput (Qwrite_region, Qtarget_idx, make_number (2));
- Qcall_process = intern ("call-process");
- staticpro (&Qcall_process);
+ DEFSYM (Qcall_process, "call-process");
/* Target PROGRAM is the first argument. */
Fput (Qcall_process, Qtarget_idx, make_number (0));
- Qcall_process_region = intern ("call-process-region");
- staticpro (&Qcall_process_region);
+ DEFSYM (Qcall_process_region, "call-process-region");
/* Target PROGRAM is the third argument. */
Fput (Qcall_process_region, Qtarget_idx, make_number (2));
- Qstart_process = intern ("start-process");
- staticpro (&Qstart_process);
+ DEFSYM (Qstart_process, "start-process");
/* Target PROGRAM is the third argument. */
Fput (Qstart_process, Qtarget_idx, make_number (2));
- Qopen_network_stream = intern ("open-network-stream");
- staticpro (&Qopen_network_stream);
+ DEFSYM (Qopen_network_stream, "open-network-stream");
/* Target SERVICE is the fourth argument. */
Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
- Qcoding_system = intern ("coding-system");
- staticpro (&Qcoding_system);
-
- Qeol_type = intern ("eol-type");
- staticpro (&Qeol_type);
+ DEFSYM (Qcoding_system, "coding-system");
+ DEFSYM (Qcoding_aliases, "coding-aliases");
- Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
- staticpro (&Qbuffer_file_coding_system);
+ DEFSYM (Qeol_type, "eol-type");
+ DEFSYM (Qunix, "unix");
+ DEFSYM (Qdos, "dos");
- Qpost_read_conversion = intern ("post-read-conversion");
- staticpro (&Qpost_read_conversion);
+ DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
+ DEFSYM (Qpost_read_conversion, "post-read-conversion");
+ DEFSYM (Qpre_write_conversion, "pre-write-conversion");
+ DEFSYM (Qdefault_char, "default-char");
+ DEFSYM (Qundecided, "undecided");
+ DEFSYM (Qno_conversion, "no-conversion");
+ DEFSYM (Qraw_text, "raw-text");
- Qpre_write_conversion = intern ("pre-write-conversion");
- staticpro (&Qpre_write_conversion);
+ DEFSYM (Qiso_2022, "iso-2022");
- Qno_conversion = intern ("no-conversion");
- staticpro (&Qno_conversion);
+ DEFSYM (Qutf_8, "utf-8");
+ DEFSYM (Qutf_8_emacs, "utf-8-emacs");
- Qundecided = intern ("undecided");
- staticpro (&Qundecided);
+ DEFSYM (Qutf_16, "utf-16");
+ DEFSYM (Qbig, "big");
+ DEFSYM (Qlittle, "little");
- Qcoding_system_p = intern ("coding-system-p");
- staticpro (&Qcoding_system_p);
+ DEFSYM (Qshift_jis, "shift-jis");
+ DEFSYM (Qbig5, "big5");
- Qcoding_system_error = intern ("coding-system-error");
- staticpro (&Qcoding_system_error);
+ DEFSYM (Qcoding_system_p, "coding-system-p");
+ DEFSYM (Qcoding_system_error, "coding-system-error");
Fput (Qcoding_system_error, Qerror_conditions,
Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
Fput (Qcoding_system_error, Qerror_message,
build_string ("Invalid coding system"));
- Qcoding_category = intern ("coding-category");
- staticpro (&Qcoding_category);
- Qcoding_category_index = intern ("coding-category-index");
- staticpro (&Qcoding_category_index);
-
- Vcoding_category_table
- = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
- staticpro (&Vcoding_category_table);
- {
- int i;
- for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
- {
- XVECTOR (Vcoding_category_table)->contents[i]
- = intern (coding_category_name[i]);
- Fput (XVECTOR (Vcoding_category_table)->contents[i],
- Qcoding_category_index, make_number (i));
- }
- }
-
- Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
- staticpro (&Vcoding_system_safe_chars);
-
- Qtranslation_table = intern ("translation-table");
- staticpro (&Qtranslation_table);
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
-
- Qtranslation_table_id = intern ("translation-table-id");
- staticpro (&Qtranslation_table_id);
-
- Qtranslation_table_for_decode = intern ("translation-table-for-decode");
- staticpro (&Qtranslation_table_for_decode);
-
- Qtranslation_table_for_encode = intern ("translation-table-for-encode");
- staticpro (&Qtranslation_table_for_encode);
-
- Qsafe_chars = intern ("safe-chars");
- staticpro (&Qsafe_chars);
-
- Qchar_coding_system = intern ("char-coding-system");
- staticpro (&Qchar_coding_system);
-
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
Qchar_table_extra_slots = intern ("char-table-extra-slots");
- Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
- Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
-
- Qvalid_codes = intern ("valid-codes");
- staticpro (&Qvalid_codes);
- Qascii_incompatible = intern ("ascii-incompatible");
- staticpro (&Qascii_incompatible);
+ DEFSYM (Qtranslation_table, "translation-table");
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
+ DEFSYM (Qtranslation_table_id, "translation-table-id");
+ DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
+ DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
- Qemacs_mule = intern ("emacs-mule");
- staticpro (&Qemacs_mule);
+ DEFSYM (Qvalid_codes, "valid-codes");
- Qraw_text = intern ("raw-text");
- staticpro (&Qraw_text);
+ DEFSYM (Qemacs_mule, "emacs-mule");
- Qutf_8 = intern ("utf-8");
- staticpro (&Qutf_8);
+ DEFSYM (QCcategory, ":category");
+ DEFSYM (QCmnemonic, ":mnemonic");
+ DEFSYM (QCdefalut_char, ":default-char");
+ DEFSYM (QCdecode_translation_table, ":decode-translation-table");
+ DEFSYM (QCencode_translation_table, ":encode-translation-table");
+ DEFSYM (QCpost_read_conversion, ":post-read-conversion");
+ DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
+ DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
- Qcoding_system_define_form = intern ("coding-system-define-form");
- staticpro (&Qcoding_system_define_form);
+ Vcoding_category_table
+ = Fmake_vector (make_number (coding_category_max), Qnil);
+ staticpro (&Vcoding_category_table);
+ /* Followings are target of code detection. */
+ ASET (Vcoding_category_table, coding_category_iso_7,
+ intern ("coding-category-iso-7"));
+ ASET (Vcoding_category_table, coding_category_iso_7_tight,
+ intern ("coding-category-iso-7-tight"));
+ ASET (Vcoding_category_table, coding_category_iso_8_1,
+ intern ("coding-category-iso-8-1"));
+ ASET (Vcoding_category_table, coding_category_iso_8_2,
+ intern ("coding-category-iso-8-2"));
+ ASET (Vcoding_category_table, coding_category_iso_7_else,
+ intern ("coding-category-iso-7-else"));
+ ASET (Vcoding_category_table, coding_category_iso_8_else,
+ intern ("coding-category-iso-8-else"));
+ ASET (Vcoding_category_table, coding_category_utf_8,
+ intern ("coding-category-utf-8"));
+ ASET (Vcoding_category_table, coding_category_utf_16_be,
+ intern ("coding-category-utf-16-be"));
+ ASET (Vcoding_category_table, coding_category_utf_16_auto,
+ intern ("coding-category-utf-16-auto"));
+ ASET (Vcoding_category_table, coding_category_utf_16_le,
+ intern ("coding-category-utf-16-le"));
+ ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
+ intern ("coding-category-utf-16-be-nosig"));
+ ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
+ intern ("coding-category-utf-16-le-nosig"));
+ ASET (Vcoding_category_table, coding_category_charset,
+ intern ("coding-category-charset"));
+ ASET (Vcoding_category_table, coding_category_sjis,
+ intern ("coding-category-sjis"));
+ ASET (Vcoding_category_table, coding_category_big5,
+ intern ("coding-category-big5"));
+ ASET (Vcoding_category_table, coding_category_ccl,
+ intern ("coding-category-ccl"));
+ ASET (Vcoding_category_table, coding_category_emacs_mule,
+ intern ("coding-category-emacs-mule"));
+ /* Followings are NOT target of code detection. */
+ ASET (Vcoding_category_table, coding_category_raw_text,
+ intern ("coding-category-raw-text"));
+ ASET (Vcoding_category_table, coding_category_undecided,
+ intern ("coding-category-undecided"));
+
+ DEFSYM (Qinsufficient_source, "insufficient-source");
+ DEFSYM (Qinconsistent_eol, "inconsistent-eol");
+ DEFSYM (Qinvalid_source, "invalid-source");
+ DEFSYM (Qinterrupted, "interrupted");
+ DEFSYM (Qinsufficient_memory, "insufficient-memory");
+ DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
@@ -7859,6 +9413,7 @@ syms_of_coding ()
defsubr (&Sdetect_coding_string);
defsubr (&Sfind_coding_systems_region_internal);
defsubr (&Sunencodable_char_position);
+ defsubr (&Scheck_coding_systems_region);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
@@ -7873,15 +9428,21 @@ syms_of_coding ()
defsubr (&Sset_keyboard_coding_system_internal);
defsubr (&Skeyboard_coding_system);
defsubr (&Sfind_operation_coding_system);
- defsubr (&Supdate_coding_systems_internal);
- defsubr (&Sset_coding_priority_internal);
+ defsubr (&Sset_coding_system_priority);
defsubr (&Sdefine_coding_system_internal);
+ defsubr (&Sdefine_coding_system_alias);
+ defsubr (&Scoding_system_put);
+ defsubr (&Scoding_system_base);
+ defsubr (&Scoding_system_plist);
+ defsubr (&Scoding_system_aliases);
+ defsubr (&Scoding_system_eol_type);
+ defsubr (&Scoding_system_priority_list);
DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
doc: /* List of coding systems.
Do not alter the value of this variable manually. This variable should be
-updated by the functions `make-coding-system' and
+updated by the functions `define-coding-system' and
`define-coding-system-alias'. */);
Vcoding_system_list = Qnil;
@@ -7908,7 +9469,7 @@ Don't modify this variable directly, but use `set-coding-priority'. */);
int i;
Vcoding_category_list = Qnil;
- for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
+ for (i = coding_category_max - 1; i >= 0; i--)
Vcoding_category_list
= Fcons (XVECTOR (Vcoding_category_table)->contents[i],
Vcoding_category_list);
@@ -7938,25 +9499,44 @@ the value of `buffer-file-coding-system' is used. */);
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
- doc: /* Coding system used in the latest file or process I/O.
-Also set by `encode-coding-region', `decode-coding-region',
-`encode-coding-string' and `decode-coding-string'. */);
+ doc: /*
+Coding system used in the latest file or process I/O. */);
Vlast_coding_system_used = Qnil;
+ DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
+ doc: /*
+Error status of the last code conversion.
+
+When an error was detected in the last code conversion, this variable
+is set to one of the following symbols.
+ `insufficient-source'
+ `inconsistent-eol'
+ `invalid-source'
+ `interrupted'
+ `insufficient-memory'
+When no error was detected, the value doesn't change. So, to check
+the error status of a code conversion by this variable, you must
+explicitly set this variable to nil before performing code
+conversion. */);
+ Vlast_code_conversion_error = Qnil;
+
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
- doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
+ doc: /*
+*Non-nil means always inhibit code conversion of end-of-line format.
See info node `Coding Systems' and info node `Text and Binary' concerning
such conversion. */);
inhibit_eol_conversion = 0;
DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
- doc: /* Non-nil means process buffer inherits coding system of process output.
+ doc: /*
+Non-nil means process buffer inherits coding system of process output.
Bind it to t if the process output is to be treated as if it were a file
read from some filesystem. */);
inherit_process_coding_system = 0;
DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a file I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a file I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a file name,
VAL is a coding system, a cons of coding systems, or a function symbol.
@@ -7966,14 +9546,15 @@ If VAL is a cons of coding systems, the car part is used for decoding,
and the cdr part is used for encoding.
If VAL is a function symbol, the function must return a coding system
or a cons of coding systems which are used as above. The function gets
-the arguments with which `find-operation-coding-system' was called.
+the arguments with which `find-operation-coding-systems' was called.
See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);
Vfile_coding_system_alist = Qnil;
DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a process I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a process I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a program name,
VAL is a coding system, a cons of coding systems, or a function symbol.
@@ -7988,7 +9569,8 @@ See also the function `find-operation-coding-system'. */);
Vprocess_coding_system_alist = Qnil;
DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
- doc: /* Alist to decide a coding system to use for a network I/O operation.
+ doc: /*
+Alist to decide a coding system to use for a network I/O operation.
The format is ((PATTERN . VAL) ...),
where PATTERN is a regular expression matching a network service name
or is a port number to connect to,
@@ -8010,23 +9592,28 @@ Also used for decoding keyboard input on X Window system. */);
/* The eol mnemonics are reset in startup.el system-dependently. */
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
- doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
eol_mnemonic_unix = build_string (":");
DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
- doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
eol_mnemonic_dos = build_string ("\\");
DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
- doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format. */);
+ doc: /*
+*String displayed in mode line for MAC-like (CR) end-of-line format. */);
eol_mnemonic_mac = build_string ("/");
DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
- doc: /* *String displayed in mode line when end-of-line format is not yet determined. */);
+ doc: /*
+*String displayed in mode line when end-of-line format is not yet determined. */);
eol_mnemonic_undecided = build_string (":");
DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
- doc: /* *Non-nil enables character translation while encoding and decoding. */);
+ doc: /*
+*Non-nil enables character translation while encoding and decoding. */);
Venable_character_translation = Qt;
DEFVAR_LISP ("standard-translation-table-for-decode",
@@ -8039,11 +9626,12 @@ Also used for decoding keyboard input on X Window system. */);
doc: /* Table for translating characters while encoding. */);
Vstandard_translation_table_for_encode = Qnil;
- DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
+ DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
doc: /* Alist of charsets vs revision numbers.
While encoding, if a charset (car part of an element) is found,
-designate it with the escape sequence identifying revision (cdr part of the element). */);
- Vcharset_revision_alist = Qnil;
+designate it with the escape sequence identifying revision (cdr part
+of the element). */);
+ Vcharset_revision_table = Qnil;
DEFVAR_LISP ("default-process-coding-system",
&Vdefault_process_coding_system,
@@ -8053,7 +9641,8 @@ the cdr part is used for encoding a text to be sent to a process. */);
Vdefault_process_coding_system = Qnil;
DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
- doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
+ doc: /*
+Table of extra Latin codes in the range 128..159 (inclusive).
This is a vector of length 256.
If Nth element is non-nil, the existence of code N in a file
\(or output of subprocess) doesn't prevent it to be detected as
@@ -8065,7 +9654,8 @@ Only 128th through 159th elements has a meaning. */);
DEFVAR_LISP ("select-safe-coding-system-function",
&Vselect_safe_coding_system_function,
- doc: /* Function to call to select safe coding system for encoding a text.
+ doc: /*
+Function to call to select safe coding system for encoding a text.
If set, this function is called to force a user to select a proper
coding system which can encode the text in the case that a default
@@ -8085,7 +9675,8 @@ called even if `coding-system-for-write' is non-nil. The command
DEFVAR_BOOL ("inhibit-iso-escape-detection",
&inhibit_iso_escape_detection,
- doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
+ doc: /*
+If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
By default, on reading a file, Emacs tries to detect how the text is
encoded. This code detection is sensitive to escape sequences. If
@@ -8115,6 +9706,68 @@ escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argumen
This is applied to the result of input methods, not their input. See also
`keyboard-translate-table'. */);
Vtranslation_table_for_input = Qnil;
+
+ {
+ Lisp_Object args[coding_arg_max];
+ Lisp_Object plist[16];
+ int i;
+
+ for (i = 0; i < coding_arg_max; i++)
+ args[i] = Qnil;
+
+ plist[0] = intern (":name");
+ plist[1] = args[coding_arg_name] = Qno_conversion;
+ plist[2] = intern (":mnemonic");
+ plist[3] = args[coding_arg_mnemonic] = make_number ('=');
+ plist[4] = intern (":coding-type");
+ plist[5] = args[coding_arg_coding_type] = Qraw_text;
+ plist[6] = intern (":ascii-compatible-p");
+ plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
+ plist[8] = intern (":default-char");
+ plist[9] = args[coding_arg_default_char] = make_number (0);
+ plist[10] = intern (":for-unibyte");
+ plist[11] = args[coding_arg_for_unibyte] = Qt;
+ plist[12] = intern (":docstring");
+ plist[13] = build_string ("Do no conversion.\n\
+\n\
+When you visit a file with this coding, the file is read into a\n\
+unibyte buffer as is, thus each byte of a file is treated as a\n\
+character.");
+ plist[14] = intern (":eol-type");
+ plist[15] = args[coding_arg_eol_type] = Qunix;
+ args[coding_arg_plist] = Flist (16, plist);
+ Fdefine_coding_system_internal (coding_arg_max, args);
+
+ plist[1] = args[coding_arg_name] = Qundecided;
+ plist[3] = args[coding_arg_mnemonic] = make_number ('-');
+ plist[5] = args[coding_arg_coding_type] = Qundecided;
+ /* This is already set.
+ plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
+ plist[8] = intern (":charset-list");
+ plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
+ plist[11] = args[coding_arg_for_unibyte] = Qnil;
+ plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+ plist[15] = args[coding_arg_eol_type] = Qnil;
+ args[coding_arg_plist] = Flist (16, plist);
+ Fdefine_coding_system_internal (coding_arg_max, args);
+ }
+
+ setup_coding_system (Qno_conversion, &keyboard_coding);
+ setup_coding_system (Qundecided, &terminal_coding);
+ setup_coding_system (Qno_conversion, &safe_terminal_coding);
+
+ {
+ int i;
+
+ for (i = 0; i < coding_category_max; i++)
+ Fset (AREF (Vcoding_category_table, i), Qno_conversion);
+ }
+#if defined (MSDOS) || defined (WINDOWSNT)
+ system_eol_type = Qdos;
+#else
+ system_eol_type = Qunix;
+#endif
+ staticpro (&system_eol_type);
}
char *
diff --git a/src/coding.h b/src/coding.h
index 5727b57a850..93375d2c2eb 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -4,6 +4,9 @@
Copyright (C) 1995, 1997, 1998, 2000
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,306 +28,253 @@ Boston, MA 02110-1301, USA. */
#ifndef EMACS_CODING_H
#define EMACS_CODING_H
-#include "ccl.h"
+/* Index to arguments of Fdefine_coding_system_internal. */
-/*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
+enum define_coding_system_arg_index
+ {
+ coding_arg_name,
+ coding_arg_mnemonic,
+ coding_arg_coding_type,
+ coding_arg_charset_list,
+ coding_arg_ascii_compatible_p,
+ coding_arg_decode_translation_table,
+ coding_arg_encode_translation_table,
+ coding_arg_post_read_conversion,
+ coding_arg_pre_write_conversion,
+ coding_arg_default_char,
+ coding_arg_for_unibyte,
+ coding_arg_plist,
+ coding_arg_eol_type,
+ coding_arg_max
+ };
-/* All code (1-byte) of Emacs' internal format is classified into one
- of the followings. See also `charset.h'. */
-enum emacs_code_class_type
+enum define_coding_iso2022_arg_index
{
- EMACS_control_code, /* Control codes in the range
- 0x00..0x1F and 0x7F except for the
- following two codes. */
- EMACS_linefeed_code, /* 0x0A (linefeed) to denote
- end-of-line. */
- EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
- in selective display mode. */
- EMACS_ascii_code, /* ASCII characters. */
- EMACS_leading_code_2, /* Base leading code of official
- TYPE9N character. */
- EMACS_leading_code_3, /* Base leading code of private TYPE9N
- or official TYPE9Nx9N character. */
- EMACS_leading_code_4, /* Base leading code of private
- TYPE9Nx9N character. */
- EMACS_invalid_code /* Invalid code, i.e. a base leading
- code not yet assigned to any
- charset, or a code of the range
- 0xA0..0xFF. */
+ coding_arg_iso2022_initial = coding_arg_max,
+ coding_arg_iso2022_reg_usage,
+ coding_arg_iso2022_request,
+ coding_arg_iso2022_flags,
+ coding_arg_iso2022_max
};
-extern enum emacs_code_class_type emacs_code_class[256];
-
-/*** ISO2022 section ***/
-
-/* Macros to define code of control characters for ISO2022's functions. */
- /* code */ /* function */
-#define ISO_CODE_LF 0x0A /* line-feed */
-#define ISO_CODE_CR 0x0D /* carriage-return */
-#define ISO_CODE_SO 0x0E /* shift-out */
-#define ISO_CODE_SI 0x0F /* shift-in */
-#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
-#define ISO_CODE_ESC 0x1B /* escape */
-#define ISO_CODE_SS2 0x8E /* single-shift-2 */
-#define ISO_CODE_SS3 0x8F /* single-shift-3 */
-#define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
-
-/* All code (1-byte) of ISO2022 is classified into one of the
- followings. */
-enum iso_code_class_type
+enum define_coding_utf16_arg_index
{
- ISO_control_0, /* Control codes in the range
- 0x00..0x1F and 0x7F, except for the
- following 5 codes. */
- ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
- ISO_shift_out, /* ISO_CODE_SO (0x0E) */
- ISO_shift_in, /* ISO_CODE_SI (0x0F) */
- ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
- ISO_escape, /* ISO_CODE_SO (0x1B) */
- ISO_control_1, /* Control codes in the range
- 0x80..0x9F, except for the
- following 3 codes. */
- ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
- ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
- ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
- ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
- ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
- ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
- ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
+ coding_arg_utf16_bom = coding_arg_max,
+ coding_arg_utf16_endian,
+ coding_arg_utf16_max
};
-/** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
- element in the structure `coding_system'. This information is used
- while encoding a text to ISO2022. **/
+enum define_coding_ccl_arg_index
+ {
+ coding_arg_ccl_decoder = coding_arg_max,
+ coding_arg_ccl_encoder,
+ coding_arg_ccl_valids,
+ coding_arg_ccl_max
+ };
-/* If set, produce short-form designation sequence (e.g. ESC $ A)
- instead of long-form sequence (e.g. ESC $ ( A). */
-#define CODING_FLAG_ISO_SHORT_FORM 0x0001
+/* Hash table for all coding systems. Keys are coding system symbols
+ and values are spec vectors of the corresponding coding system. A
+ spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
+ vector of attribute of the coding system. ALIASES is a list of
+ aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
+ `mac' or a vector of coding systems (symbols). */
-/* If set, reset graphic planes and registers at end-of-line to the
- initial state. */
-#define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
+extern Lisp_Object Vcoding_system_hash_table;
-/* If set, reset graphic planes and registers before any control
- characters to the initial state. */
-#define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
-/* If set, encode by 7-bit environment. */
-#define CODING_FLAG_ISO_SEVEN_BITS 0x0008
+/* Enumeration of coding system type. */
-/* If set, use locking-shift function. */
-#define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
+enum coding_system_type
+ {
+ coding_type_charset,
+ coding_type_utf_8,
+ coding_type_utf_16,
+ coding_type_iso_2022,
+ coding_type_emacs_mule,
+ coding_type_sjis,
+ coding_type_ccl,
+ coding_type_raw_text,
+ coding_type_undecided,
+ coding_type_max
+ };
-/* If set, use single-shift function. Overwrite
- CODING_FLAG_ISO_LOCKING_SHIFT. */
-#define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
-/* If set, designate JISX0201-Roman instead of ASCII. */
-#define CODING_FLAG_ISO_USE_ROMAN 0x0040
+/* Enumeration of end-of-line format type. */
-/* If set, designate JISX0208-1978 instead of JISX0208-1983. */
-#define CODING_FLAG_ISO_USE_OLDJIS 0x0080
+enum end_of_line_type
+ {
+ eol_lf, /* Line-feed only, same as Emacs' internal
+ format. */
+ eol_crlf, /* Sequence of carriage-return and
+ line-feed. */
+ eol_cr, /* Carriage-return only. */
+ eol_any, /* Accept any of above. Produce line-feed
+ only. */
+ eol_undecided, /* This value is used to denote that the
+ eol-type is not yet undecided. */
+ eol_type_max
+ };
-/* If set, do not produce ISO6429's direction specifying sequence. */
-#define CODING_FLAG_ISO_NO_DIRECTION 0x0100
+/* Enumeration of index to an attribute vector of a coding system. */
-/* If set, assume designation states are reset at beginning of line on
- output. */
-#define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
+enum coding_attr_index
+ {
+ coding_attr_base_name,
+ coding_attr_docstring,
+ coding_attr_mnemonic,
+ coding_attr_type,
+ coding_attr_charset_list,
+ coding_attr_ascii_compat,
+ coding_attr_decode_tbl,
+ coding_attr_encode_tbl,
+ coding_attr_trans_tbl,
+ coding_attr_post_read,
+ coding_attr_pre_write,
+ coding_attr_default_char,
+ coding_attr_for_unibyte,
+ coding_attr_plist,
+
+ coding_attr_category,
+ coding_attr_safe_charsets,
+
+ /* The followings are extra attributes for each type. */
+ coding_attr_charset_valids,
+
+ coding_attr_ccl_decoder,
+ coding_attr_ccl_encoder,
+ coding_attr_ccl_valids,
+
+ coding_attr_iso_initial,
+ coding_attr_iso_usage,
+ coding_attr_iso_request,
+ coding_attr_iso_flags,
+
+ coding_attr_utf_16_bom,
+ coding_attr_utf_16_endian,
+
+ coding_attr_emacs_mule_full,
+
+ coding_attr_last_index
+ };
-/* If set, designation sequence should be placed at beginning of line
- on output. */
-#define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
-/* If set, do not encode unsafe characters on output. */
-#define CODING_FLAG_ISO_SAFE 0x0800
+/* Macros to access an element of an attribute vector. */
-/* If set, extra latin codes (128..159) are accepted as a valid code
- on input. */
-#define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
+#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
+#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
+#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
+#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
+#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
+#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
+#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
+#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
+#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
+#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
+#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
+#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
+#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
+#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
+#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
+#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
+#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
-/* If set, use designation escape sequence. */
-#define CODING_FLAG_ISO_DESIGNATION 0x10000
-/* A character to be produced on output if encoding of the original
- character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
- It must be an ASCII character. */
-#define CODING_REPLACEMENT_CHARACTER '?'
+/* Return the name of a coding system specified by ID. */
+#define CODING_ID_NAME(id) \
+ (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
-/* Structure of the field `spec.iso2022' in the structure `coding_system'. */
-struct iso2022_spec
-{
- /* The current graphic register invoked to each graphic plane. */
- int current_invocation[2];
+/* Return the attribute vector of a coding system specified by ID. */
- /* The current charset designated to each graphic register. */
- int current_designation[4];
+#define CODING_ID_ATTRS(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
- /* A charset initially designated to each graphic register. */
- int initial_designation[4];
+/* Return the list of aliases of a coding system specified by ID. */
- /* If not -1, it is a graphic register specified in an invalid
- designation sequence. */
- int last_invalid_designation_register;
+#define CODING_ID_ALIASES(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
- /* A graphic register to which each charset should be designated. */
- unsigned char requested_designation[MAX_CHARSET + 1];
+/* Return the eol-type of a coding system specified by ID. */
- /* A revision number to be specified for each charset on encoding.
- The value 255 means no revision number for the corresponding
- charset. */
- unsigned char charset_revision_number[MAX_CHARSET + 1];
+#define CODING_ID_EOL_TYPE(id) \
+ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
- /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
- by single-shift while encoding. */
- int single_shifting;
- /* Set to 1 temporarily only when processing at beginning of line. */
- int bol;
-};
+/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
+
+#define CODING_SYSTEM_SPEC(coding_system_symbol) \
+ (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
+
+
+/* Return the ID of CODING_SYSTEM_SYMBOL. */
+
+#define CODING_SYSTEM_ID(coding_system_symbol) \
+ hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
+ coding_system_symbol, NULL)
+
+/* Return 1 iff CODING_SYSTEM_SYMBOL is a coding system. */
+
+#define CODING_SYSTEM_P(coding_system_symbol) \
+ (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
+ || (! NILP (coding_system_symbol) \
+ && ! NILP (Fcoding_system_p (coding_system_symbol))))
+
+/* Check if X is a coding system or not. */
+
+#define CHECK_CODING_SYSTEM(x) \
+ do { \
+ if (CODING_SYSTEM_ID (x) < 0 \
+ && NILP (Fcheck_coding_system (x))) \
+ wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
+
+
+/* Check if X is a coding system or not. If it is, set SEPC to the
+ spec vector of the coding system. */
+
+#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
+ do { \
+ spec = CODING_SYSTEM_SPEC (x); \
+ if (NILP (spec)) \
+ { \
+ Fcheck_coding_system (x); \
+ spec = CODING_SYSTEM_SPEC (x); \
+ } \
+ if (NILP (spec)) \
+ x = wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
+
+
+/* Check if X is a coding system or not. If it is, set ID to the
+ ID of the coding system. */
+
+#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
+ do \
+ { \
+ id = CODING_SYSTEM_ID (x); \
+ if (id < 0) \
+ { \
+ Fcheck_coding_system (x); \
+ id = CODING_SYSTEM_ID (x); \
+ } \
+ if (id < 0) \
+ x = wrong_type_argument (Qcoding_system_p, (x)); \
+ } while (0)
-/* Macros to access each field in the structure `spec.iso2022'. */
-#define CODING_SPEC_ISO_INVOCATION(coding, plane) \
- (coding)->spec.iso2022.current_invocation[plane]
-#define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
- (coding)->spec.iso2022.current_designation[reg]
-#define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
- (coding)->spec.iso2022.initial_designation[reg]
-#define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
- (coding)->spec.iso2022.requested_designation[charset]
-#define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
- (coding)->spec.iso2022.charset_revision_number[charset]
-#define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
- (coding)->spec.iso2022.single_shifting
-#define CODING_SPEC_ISO_BOL(coding) \
- (coding)->spec.iso2022.bol
-
-/* A value which may appear in
- coding->spec.iso2022.requested_designation indicating that the
- corresponding charset does not request any graphic register to be
- designated. */
-#define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
-
-/* Return a charset which is currently designated to the graphic plane
- PLANE in the coding-system CODING. */
-#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
- ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
- ? -1 \
- : CODING_SPEC_ISO_DESIGNATION (coding, \
- CODING_SPEC_ISO_INVOCATION (coding, plane)))
-
-/*** BIG5 section ***/
-
-/* Macros to denote each type of BIG5 coding system. */
-#define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
- BIG5 developed by Hong Kong
- University. */
-#define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
- of BIG5 developed by the
- company ETen in Taiwan. */
/*** GENERAL section ***/
-/* Types of coding system. */
-enum coding_type
+/* Enumeration of result code of code conversion. */
+enum coding_result_code
{
- coding_type_no_conversion, /* A coding system which requires no
- conversion for reading and writing
- including end-of-line format. */
- coding_type_emacs_mule, /* A coding system used in Emacs'
- buffer and string. Requires no
- conversion for reading and writing
- except for end-of-line format. */
- coding_type_undecided, /* A coding system which requires
- automatic detection of a real
- coding system. */
- coding_type_sjis, /* SJIS coding system for Japanese. */
- coding_type_iso2022, /* Any coding system of ISO2022
- variants. */
- coding_type_big5, /* BIG5 coding system for Chinese. */
- coding_type_ccl, /* The coding system of which decoder
- and encoder are written in CCL. */
- coding_type_raw_text /* A coding system for a text
- containing random 8-bit code which
- does not require code conversion
- except for end-of-line format. */
+ CODING_RESULT_SUCCESS,
+ CODING_RESULT_INSUFFICIENT_SRC,
+ CODING_RESULT_INSUFFICIENT_DST,
+ CODING_RESULT_INCONSISTENT_EOL,
+ CODING_RESULT_INVALID_SRC,
+ CODING_RESULT_INTERRUPT,
+ CODING_RESULT_INSUFFICIENT_MEM
};
-/* Formats of end-of-line. */
-#define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
- internal format. */
-#define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
- line-feed. */
-#define CODING_EOL_CR 2 /* Carriage-return only. */
-#define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
- eol-type is not yet decided. */
-#define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
- eol-type is not consistent
- through the file. */
-
-/* 1 iff composing. */
-#define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
-
-#define COMPOSITION_DATA_SIZE 4080
-#define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
-
-/* Data structure to hold information about compositions of text that
- is being decoded or encode. ISO 2022 base code conversion routines
- handle special ESC sequences for composition specification. But,
- they can't get/put such information directly from/to a buffer in
- the deepest place. So, they store or retrieve the information
- through this structure.
-
- The encoder stores the information in this structure when it meets
- ESC sequences for composition while encoding codes, then, after all
- text codes are encoded, puts `composition' properties on the text
- by referring to the structure.
-
- The decoder at first stores the information of a text to be
- decoded, then, while decoding codes, generates ESC sequences for
- composition at proper places by referring to the structure. */
-
-struct composition_data
-{
- /* The character position of the first character to be encoded or
- decoded. START and END (see below) are relative to this
- position. */
- int char_offset;
-
- /* The composition data. These elements are repeated for each
- composition:
- LENGTH START END METHOD [ COMPONENT ... ]
- where,
- LENGTH is the number of elements for this composition.
-
- START and END are starting and ending character positions of
- the composition relative to `char_offset'.
-
- METHOD is one of `enum composing_status' specifying the way of
- composition.
-
- COMPONENT is a character or an encoded composition rule. */
- int data[COMPOSITION_DATA_SIZE];
-
- /* The number of elements in `data' currently used. */
- int used;
-
- /* Pointers to the previous and next structures. When `data' is
- filled up, another structure is allocated and linked in `next'.
- The new structure has backward link to this structure in `prev'.
- The number of chained structures depends on how many compositions
- the text being encoded or decoded contains. */
- struct composition_data *prev, *next;
-};
-
-/* Macros used for the member `result' of the struct
- coding_system. */
-#define CODING_FINISH_NORMAL 0
-#define CODING_FINISH_INSUFFICIENT_SRC 1
-#define CODING_FINISH_INSUFFICIENT_DST 2
-#define CODING_FINISH_INCONSISTENT_EOL 3
-#define CODING_FINISH_INSUFFICIENT_CMP 4
-#define CODING_FINISH_INTERRUPT 5
/* Macros used for the member `mode' of the struct coding_system. */
@@ -333,7 +283,7 @@ struct composition_data
#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
/* If set, the decoding/encoding routines treat the current data as
- the last block of the whole text to be converted, and do
+ the last block of the whole text to be converted, and do the
appropriate finishing job. */
#define CODING_MODE_LAST_BLOCK 0x02
@@ -341,65 +291,106 @@ struct composition_data
enables selective display. */
#define CODING_MODE_SELECTIVE_DISPLAY 0x04
-/* If set, replace unencodabae characters by `?' on encoding. */
-#define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08
-
/* This flag is used by the decoding/encoding routines on the fly. If
set, it means that right-to-left text is being processed. */
-#define CODING_MODE_DIRECTION 0x10
+#define CODING_MODE_DIRECTION 0x08
-struct coding_system
+#define CODING_MODE_FIXED_DESTINATION 0x10
+
+/* If set, it means that the encoding routines produces some safe
+ ASCII characters (usually '?') for unsupported characters. */
+#define CODING_MODE_SAFE_ENCODING 0x20
+
+/* Structure of the field `spec.iso_2022' in the structure
+ `coding_system'. */
+struct iso_2022_spec
{
- /* Type of the coding system. */
- enum coding_type type;
+ /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
+ unsigned flags;
- /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
- int eol_type;
+ /* The current graphic register invoked to each graphic plane. */
+ int current_invocation[2];
- /* Flag bits of the coding system. The meaning of each bit is common
- to all types of coding systems. */
- unsigned int common_flags;
+ /* The current charset designated to each graphic register. The
+ value -1 means that not charset is designated, -2 means that
+ there was an invalid designation previously. */
+ int current_designation[4];
- /* Flag bits of the coding system. The meaning of each bit depends
- on the type of the coding system. */
- unsigned int flags;
+ /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
+ by single-shift while encoding. */
+ int single_shifting;
- /* Mode bits of the coding system. See the comments of the macros
- CODING_MODE_XXX. */
- unsigned int mode;
+ /* Set to 1 temporarily only when processing at beginning of line. */
+ int bol;
+};
+
+struct ccl_spec;
+
+enum utf_16_bom_type
+ {
+ utf_16_detect_bom,
+ utf_16_without_bom,
+ utf_16_with_bom
+ };
+
+enum utf_16_endian_type
+ {
+ utf_16_big_endian,
+ utf_16_little_endian
+ };
+
+struct utf_16_spec
+{
+ enum utf_16_bom_type bom;
+ enum utf_16_endian_type endian;
+ int surrogate;
+};
- /* The current status of composition handling. */
- int composing;
+struct coding_detection_info
+{
+ /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
+ /* Which categories are already checked. */
+ int checked;
+ /* Which categories are strongly found. */
+ int found;
+ /* Which categories are rejected. */
+ int rejected;
+};
- /* 1 iff the next character is a composition rule. */
- int composition_rule_follows;
- /* Information of compositions are stored here on decoding and set
- in advance on encoding. */
- struct composition_data *cmp_data;
+struct coding_system
+{
+ /* ID number of the coding system. This is an index to
+ Vcoding_system_hash_table. This value is set by
+ setup_coding_system. At the early stage of building time, this
+ value is -1 in the array coding_categories to indicate that no
+ coding-system of that category is yet defined. */
+ int id;
- /* Index to cmp_data->data for the first element for the current
- composition. */
- int cmp_data_start;
+ /* Flag bits of the coding system. The meaning of each bit is common
+ to all types of coding systems. */
+ int common_flags;
- /* Index to cmp_data->data for the current element for the current
- composition. */
- int cmp_data_index;
+ /* Mode bits of the coding system. See the comments of the macros
+ CODING_MODE_XXX. */
+ unsigned int mode;
/* Detailed information specific to each type of coding system. */
- union spec
+ union
{
- struct iso2022_spec iso2022;
- struct ccl_spec ccl; /* Defined in ccl.h. */
+ struct iso_2022_spec iso_2022;
+ struct ccl_spec *ccl; /* Defined in ccl.h. */
+ struct utf_16_spec utf_16;
+ int emacs_mule_full_support;
} spec;
- /* Index number of coding category of the coding system. */
- int category_idx;
+ int max_charset_id;
+ char *safe_charsets;
- /* The following two members specify how characters 128..159 are
- represented in source and destination text respectively. 1 means
- they are represented by 2-byte sequence, 0 means they are
- represented by 1-byte as is (see the comment in charset.h). */
+ /* The following two members specify how binary 8-bit code 128..255
+ are represented in source and destination text respectively. 1
+ means they are represented by 2-byte sequence, 0 means they are
+ represented by 1-byte as is (see the comment in character.h). */
unsigned src_multibyte : 1;
unsigned dst_multibyte : 1;
@@ -407,173 +398,200 @@ struct coding_system
-1 in setup_coding_system, and updated by detect_coding. So,
when this is equal to the byte length of the text being
converted, we can skip the actual conversion process. */
- int heading_ascii;
+ int head_ascii;
/* The following members are set by encoding/decoding routine. */
- int produced, produced_char, consumed, consumed_char;
+ EMACS_INT produced, produced_char, consumed, consumed_char;
/* Number of error source data found in a decoding routine. */
int errors;
- /* Finish status of code conversion. It should be one of macros
- CODING_FINISH_XXXX. */
- int result;
+ /* Store the positions of error source data. */
+ EMACS_INT *error_positions;
- /* If nonzero, suppress error notification. */
- int suppress_error;
+ /* Finish status of code conversion. */
+ enum coding_result_code result;
- /* The following members are all Lisp symbols. We don't have to
- protect them from GC because the current garbage collection
- doesn't relocate Lisp symbols. But, when it is changed, we must
- find a way to protect them. */
+ EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
+ Lisp_Object src_object;
+ const unsigned char *source;
- /* Backward pointer to the Lisp symbol of the coding system. */
- Lisp_Object symbol;
+ EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
+ Lisp_Object dst_object;
+ unsigned char *destination;
- /* Lisp function (symbol) to be called after decoding to do
- additional conversion, or nil. */
- Lisp_Object post_read_conversion;
+ /* Set to 1 iff the source of conversion is not in the member
+ `charbuf', but at `src_object'. */
+ int chars_at_source;
- /* Lisp function (symbol) to be called before encoding to do
- additional conversion, or nil. */
- Lisp_Object pre_write_conversion;
+ /* If an element is non-negative, it is a character code.
- /* Character translation tables to look up, or nil. */
- Lisp_Object translation_table_for_decode;
- Lisp_Object translation_table_for_encode;
-};
+ If it is in the range -128..-1, it is a 8-bit character code
+ minus 256.
+
+ If it is less than -128, it specifies the start of an annotation
+ chunk. The length of the chunk is -128 minus the value of the
+ element. The following elements are OFFSET, ANNOTATION-TYPE, and
+ a sequence of actual data for the annotation. OFFSET is a
+ character position offset from dst_pos or src_pos,
+ ANNOTATION-TYPE specfies the meaning of the annotation and how to
+ handle the following data.. */
+ int *charbuf;
+ int charbuf_size, charbuf_used;
+
+ /* Set to 1 if charbuf contains an annotation. */
+ int annotated;
+
+ unsigned char carryover[64];
+ int carryover_bytes;
+
+ int default_char;
-/* Mask bits for (struct coding_system *)->common_flags. */
-#define CODING_REQUIRE_FLUSHING_MASK 0x01
-#define CODING_REQUIRE_DECODING_MASK 0x02
-#define CODING_REQUIRE_ENCODING_MASK 0x04
-#define CODING_REQUIRE_DETECTION_MASK 0x08
-#define CODING_ASCII_INCOMPATIBLE_MASK 0x10
+ int (*detector) P_ ((struct coding_system *,
+ struct coding_detection_info *));
+ void (*decoder) P_ ((struct coding_system *));
+ int (*encoder) P_ ((struct coding_system *));
+};
-/* Return 1 if the coding system CODING requires specific code to be
+/* Meanings of bits in the member `common_flags' of the structure
+ coding_system. The lowest 8 bits are reserved for various kind of
+ annotations (currently two of them are used). */
+#define CODING_ANNOTATION_MASK 0x00FF
+#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
+#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
+#define CODING_ANNOTATE_CHARSET_MASK 0x0003
+#define CODING_FOR_UNIBYTE_MASK 0x0100
+#define CODING_REQUIRE_FLUSHING_MASK 0x0200
+#define CODING_REQUIRE_DECODING_MASK 0x0400
+#define CODING_REQUIRE_ENCODING_MASK 0x0800
+#define CODING_REQUIRE_DETECTION_MASK 0x1000
+#define CODING_RESET_AT_BOL_MASK 0x2000
+
+/* Return 1 if the coding context CODING requires annotaion
+ handling. */
+#define CODING_REQUIRE_ANNOTATION(coding) \
+ ((coding)->common_flags & CODING_ANNOTATION_MASK)
+
+/* Return 1 if the coding context CODING prefers decoding into unibyte. */
+#define CODING_FOR_UNIBYTE(coding) \
+ ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
+
+/* Return 1 if the coding context CODING requires specific code to be
attached at the tail of converted text. */
#define CODING_REQUIRE_FLUSHING(coding) \
((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+/* Return 1 if the coding context CODING requires code conversion on
decoding. */
#define CODING_REQUIRE_DECODING(coding) \
((coding)->dst_multibyte \
|| (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+
+/* Return 1 if the coding context CODING requires code conversion on
encoding.
The non-multibyte part of the condition is to support encoding of
unibyte strings/buffers generated by string-as-unibyte or
(set-buffer-multibyte nil) from multibyte strings/buffers. */
-#define CODING_REQUIRE_ENCODING(coding) \
- ((coding)->src_multibyte \
- || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
+#define CODING_REQUIRE_ENCODING(coding) \
+ ((coding)->src_multibyte \
+ || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
+ || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
-/* Return 1 if the coding system CODING requires some kind of code
+
+/* Return 1 if the coding context CODING requires some kind of code
detection. */
#define CODING_REQUIRE_DETECTION(coding) \
((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
-/* Return 1 if the coding system CODING requires code conversion on
+/* Return 1 if the coding context CODING requires code conversion on
decoding or some kind of code detection. */
#define CODING_MAY_REQUIRE_DECODING(coding) \
(CODING_REQUIRE_DECODING (coding) \
|| CODING_REQUIRE_DETECTION (coding))
-/* Index for each coding category in `coding_category_table' */
-#define CODING_CATEGORY_IDX_EMACS_MULE 0
-#define CODING_CATEGORY_IDX_SJIS 1
-#define CODING_CATEGORY_IDX_ISO_7 2
-#define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
-#define CODING_CATEGORY_IDX_ISO_8_1 4
-#define CODING_CATEGORY_IDX_ISO_8_2 5
-#define CODING_CATEGORY_IDX_ISO_7_ELSE 6
-#define CODING_CATEGORY_IDX_ISO_8_ELSE 7
-#define CODING_CATEGORY_IDX_CCL 8
-#define CODING_CATEGORY_IDX_BIG5 9
-#define CODING_CATEGORY_IDX_UTF_8 10
-#define CODING_CATEGORY_IDX_UTF_16_BE 11
-#define CODING_CATEGORY_IDX_UTF_16_LE 12
-#define CODING_CATEGORY_IDX_RAW_TEXT 13
-#define CODING_CATEGORY_IDX_BINARY 14
-#define CODING_CATEGORY_IDX_MAX 15
-
-/* Definitions of flag bits returned by the function
- detect_coding_mask (). */
-#define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
-#define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
-#define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
-#define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
-#define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
-#define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
-#define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
-#define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
-#define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
-#define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
-#define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
-#define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
-#define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
-#define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
-#define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
-
-/* This value is returned if detect_coding_mask () find nothing other
- than ASCII characters. */
-#define CODING_CATEGORY_MASK_ANY \
- ( CODING_CATEGORY_MASK_EMACS_MULE \
- | CODING_CATEGORY_MASK_SJIS \
- | CODING_CATEGORY_MASK_ISO_7 \
- | CODING_CATEGORY_MASK_ISO_7_TIGHT \
- | CODING_CATEGORY_MASK_ISO_8_1 \
- | CODING_CATEGORY_MASK_ISO_8_2 \
- | CODING_CATEGORY_MASK_ISO_7_ELSE \
- | CODING_CATEGORY_MASK_ISO_8_ELSE \
- | CODING_CATEGORY_MASK_CCL \
- | CODING_CATEGORY_MASK_BIG5 \
- | CODING_CATEGORY_MASK_UTF_8 \
- | CODING_CATEGORY_MASK_UTF_16_BE \
- | CODING_CATEGORY_MASK_UTF_16_LE)
-
-#define CODING_CATEGORY_MASK_ISO_7BIT \
- (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
-
-#define CODING_CATEGORY_MASK_ISO_8BIT \
- (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
-
-#define CODING_CATEGORY_MASK_ISO_SHIFT \
- (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
-
-#define CODING_CATEGORY_MASK_ISO \
- ( CODING_CATEGORY_MASK_ISO_7BIT \
- | CODING_CATEGORY_MASK_ISO_SHIFT \
- | CODING_CATEGORY_MASK_ISO_8BIT)
-
-#define CODING_CATEGORY_MASK_UTF_16_BE_LE \
- (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
-
/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
system. C1 and C2 are the 1st and 2nd position codes of Emacs'
internal format. */
-#define DECODE_SJIS(s1, s2, c1, c2) \
- do { \
- if (s2 >= 0x9F) \
- c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
- c2 = s2 - 0x7E; \
- else \
- c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
- c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
+#define SJIS_TO_JIS(code) \
+ do { \
+ int s1, s2, j1, j2; \
+ \
+ s1 = (code) >> 8, s2 = (code) & 0xFF; \
+ \
+ if (s2 >= 0x9F) \
+ (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
+ j2 = s2 - 0x7E); \
+ else \
+ (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
+ j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
+ (code) = (j1 << 8) | j2; \
+ } while (0)
+
+#define SJIS_TO_JIS2(code) \
+ do { \
+ int s1, s2, j1, j2; \
+ \
+ s1 = (code) >> 8, s2 = (code) & 0xFF; \
+ \
+ if (s2 >= 0x9F) \
+ { \
+ j1 = (s1 == 0xF0 ? 0x28 \
+ : s1 == 0xF1 ? 0x24 \
+ : s1 == 0xF2 ? 0x2C \
+ : s1 == 0xF3 ? 0x2E \
+ : 0x6E + (s1 - 0xF4) * 2); \
+ j2 = s2 - 0x7E; \
+ } \
+ else \
+ { \
+ j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
+ : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
+ : 0x6F + (s1 - 0xF5) * 2); \
+ j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
+ } \
+ (code) = (j1 << 8) | j2; \
+ } while (0)
+
+
+#define JIS_TO_SJIS(code) \
+ do { \
+ int s1, s2, j1, j2; \
+ \
+ j1 = (code) >> 8, j2 = (code) & 0xFF; \
+ if (j1 & 1) \
+ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
+ s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
+ else \
+ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
+ s2 = j2 + 0x7E); \
+ (code) = (s1 << 8) | s2; \
} while (0)
-#define ENCODE_SJIS(c1, c2, s1, s2) \
+#define JIS_TO_SJIS2(code) \
do { \
- if (c1 & 1) \
- s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
- s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
+ int s1, s2, j1, j2; \
+ \
+ j1 = (code) >> 8, j2 = (code) & 0xFF; \
+ if (j1 & 1) \
+ { \
+ s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
+ : j1 <= 0x27 ? 0xF3 + (j1 - 0x2D) / 2 \
+ : 0xF5 + (j1 - 0x6F) / 2); \
+ s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
+ } \
else \
- s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
- s2 = c2 + 0x7E; \
+ { \
+ s1 = (j1 == 0x28 ? 0xF0 \
+ : j1 == 0x24 ? 0xF1 \
+ : j1 == 0x2C ? 0xF2 \
+ : j1 == 0x2E ? 0xF3 \
+ : 0xF4 + (j1 - 0x6E) / 2); \
+ s2 = j2 + 0x7E; \
+ } \
+ (code) = (s1 << 8) | s2; \
} while (0)
/* Encode the file name NAME using the specified coding system
@@ -587,6 +605,7 @@ struct coding_system
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
: name))
+
/* Decode the file name NAME using the specified coding system
for file names, if any. */
#define DECODE_FILE(name) \
@@ -598,6 +617,7 @@ struct coding_system
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
: name))
+
/* Encode the string STR using the specified coding system
for system functions, if any. */
#define ENCODE_SYSTEM(str) \
@@ -614,54 +634,83 @@ struct coding_system
? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
: str)
+/* Used by the gtk menu code. Note that this encodes utf-8, not
+ utf-8-emacs, so it's not a no-op. */
#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
/* Extern declarations. */
-extern int decode_coding P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-extern int encode_coding P_ ((struct coding_system *, const unsigned char *,
- unsigned char *, int, int));
-extern void coding_save_composition P_ ((struct coding_system *, int, int,
- Lisp_Object));
-extern void coding_free_composition_data P_ ((struct coding_system *));
-extern void coding_adjust_composition_offset P_ ((struct coding_system *,
- int));
-extern void coding_allocate_composition_data P_ ((struct coding_system *,
- int));
-extern void coding_restore_composition P_ ((struct coding_system *,
- Lisp_Object));
-extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
- int, int));
-extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
- struct coding_system *,
- int));
-extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *,
- int, int,
- struct coding_system *));
-
+extern Lisp_Object code_conversion_save P_ ((int, int));
extern int decoding_buffer_size P_ ((struct coding_system *, int));
extern int encoding_buffer_size P_ ((struct coding_system *, int));
-extern void detect_coding P_ ((struct coding_system *, const unsigned char *,
- int));
-extern void detect_eol P_ ((struct coding_system *, const unsigned char *,
- int));
-extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
-extern Lisp_Object code_convert_string P_ ((Lisp_Object,
- struct coding_system *, int, int));
-extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object,
- Lisp_Object, int));
+extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
+extern Lisp_Object coding_charset_list P_ ((struct coding_system *));
+extern void detect_coding P_ ((struct coding_system *));
+extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object, Lisp_Object,
+ int, int));
+extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object, int, int, int));
extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
int));
-extern void setup_raw_text_coding_system P_ ((struct coding_system *));
-extern Lisp_Object encode_coding_string P_ ((Lisp_Object,
- struct coding_system *, int));
-extern Lisp_Object decode_coding_string P_ ((Lisp_Object,
- struct coding_system *, int));
+extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
+extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));
+
+extern int decode_coding_gap P_ ((struct coding_system *,
+ EMACS_INT, EMACS_INT));
+extern int encode_coding_gap P_ ((struct coding_system *,
+ EMACS_INT, EMACS_INT));
+extern void decode_coding_object P_ ((struct coding_system *,
+ Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, Lisp_Object));
+extern void encode_coding_object P_ ((struct coding_system *,
+ Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, Lisp_Object));
+
+/* Macros for backward compatibility. */
+
+#define decode_coding_region(coding, from, to) \
+ decode_coding_object (coding, Fcurrent_buffer (), \
+ from, CHAR_TO_BYTE (from), \
+ to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
+
+
+#define encode_coding_region(coding, from, to) \
+ encode_coding_object (coding, Fcurrent_buffer (), \
+ from, CHAR_TO_BYTE (from), \
+ to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
+
+
+#define decode_coding_string(coding, string, nocopy) \
+ decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
+ STRING_BYTES (XSTRING (string)), Qt)
+
+#define encode_coding_string(coding, string, nocopy) \
+ (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
+ STRING_BYTES (XSTRING (string)), Qt), \
+ (coding)->dst_object)
+
+
+#define decode_coding_c_string(coding, src, bytes, dst_object) \
+ do { \
+ (coding)->source = (src); \
+ (coding)->src_chars = (coding)->src_bytes = (bytes); \
+ decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
+ (dst_object)); \
+ } while (0)
+
+
+extern Lisp_Object preferred_coding_system P_ (());
+
+
+extern Lisp_Object Qutf_8, Qutf_8_emacs;
+
extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
-extern Lisp_Object Qraw_text, Qemacs_mule;
+extern Lisp_Object Qcoding_system_p;
+extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
+extern Lisp_Object Qiso_2022;
extern Lisp_Object Qbuffer_file_coding_system;
-extern Lisp_Object Vcoding_category_list;
-extern Lisp_Object Qutf_8;
+
+extern Lisp_Object Qunix, Qdos, Qmac;
extern Lisp_Object Qtranslation_table;
extern Lisp_Object Qtranslation_table_id;
@@ -671,9 +720,6 @@ extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
/* Mnemonic string to indicate type of end-of-line is not yet decided. */
extern Lisp_Object eol_mnemonic_undecided;
-/* Format of end-of-line decided by system. */
-extern int system_eol_type;
-
#ifdef emacs
extern Lisp_Object Qfile_coding_system;
extern Lisp_Object Qcall_process, Qcall_process_region;
@@ -709,13 +755,10 @@ extern struct coding_system safe_terminal_coding;
function `set-keyboard-coding-system'. */
extern struct coding_system keyboard_coding;
-/* Default coding system to be used to write a file. */
-extern struct coding_system default_buffer_file_coding;
-
/* Default coding systems used for process I/O. */
extern Lisp_Object Vdefault_process_coding_system;
-/* Function to call to force a user to force select a proper coding
+/* Function to call to force a user to force select a propert coding
system. */
extern Lisp_Object Vselect_safe_coding_system_function;
@@ -735,6 +778,9 @@ extern Lisp_Object Vdefault_file_name_coding_system;
/* Error signaled when there's a problem with detecting coding system */
extern Lisp_Object Qcoding_system_error;
+extern char emacs_mule_bytes[256];
+extern int emacs_mule_string_char P_ ((unsigned char *));
+
#endif /* EMACS_CODING_H */
/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
diff --git a/src/composite.c b/src/composite.c
index d3be3554c55..b688c4bbde1 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -4,6 +4,9 @@
Copyright (C) 1999
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,7 +28,7 @@ Boston, MA 02110-1301, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "intervals.h"
/* Emacs uses special text property `composition' to support character
@@ -147,19 +150,17 @@ Lisp_Object composition_hash_table;
/* Function to call to adjust composition. */
Lisp_Object Vcompose_chars_after_function;
-/* Char-table of patterns and functions to make a composition. */
-Lisp_Object Vcomposition_function_table;
-Lisp_Object Qcomposition_function_table;
+Lisp_Object Qauto_composed;
+Lisp_Object Vauto_composition_function;
+Lisp_Object Qauto_composition_function;
+
+EXFUN (Fremove_list_of_text_properties, 4);
/* Temporary variable used in macros COMPOSITION_XXX. */
Lisp_Object composition_temp;
-
-/* Return how many columns C will occupy on the screen. It always
- returns 1 for control characters and 8-bit characters because those
- are just ignored in a composition. */
-#define CHAR_WIDTH(c) \
- (SINGLE_BYTE_CHAR_P (c) ? 1 : CHARSET_WIDTH (CHAR_CHARSET (c)))
+extern int enable_font_backend;
+
/* Return COMPOSITION-ID of a composition at buffer position
CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
the sequence is PROP. STRING, if non-nil, is a string that
@@ -274,6 +275,22 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
/* Check if the contents of COMPONENTS are valid if COMPONENTS is a
vector or a list. It should be a sequence of:
char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend
+ && VECTORP (components)
+ && ASIZE (components) >= 2
+ && VECTORP (AREF (components, 0)))
+ {
+ /* COMPONENTS is a glyph-string. */
+ int len = ASIZE (key);
+
+ for (i = 1; i < len; i++)
+ if (! VECTORP (AREF (key, i)))
+ goto invalid_composition;
+ }
+ else
+#endif /* USE_FONT_BACKEND */
if (VECTORP (components) || CONSP (components))
{
int len = XVECTOR (key)->size;
@@ -307,6 +324,12 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
: ((INTEGERP (components) || STRINGP (components))
? COMPOSITION_WITH_ALTCHARS
: COMPOSITION_WITH_RULE_ALTCHARS));
+#ifdef USE_FONT_BACKEND
+ if (cmp->method == COMPOSITION_WITH_RULE_ALTCHARS
+ && VECTORP (components)
+ && ! INTEGERP (AREF (components, 0)))
+ cmp->method = COMPOSITION_WITH_GLYPH_STRING;
+#endif /* USE_FONT_BACKEND */
cmp->hash_index = hash_index;
glyph_len = (cmp->method == COMPOSITION_WITH_RULE_ALTCHARS
? (XVECTOR (key)->size + 1) / 2
@@ -315,6 +338,14 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
cmp->offsets = (short *) xmalloc (sizeof (short) * glyph_len * 2);
cmp->font = NULL;
+#ifdef USE_FONT_BACKEND
+ if (cmp->method == COMPOSITION_WITH_GLYPH_STRING)
+ {
+ cmp->width = 1; /* Should be fixed later. */
+ cmp->glyph_len--;
+ }
+ else
+#endif /* USE_FONT_BACKEND */
/* Calculate the width of overall glyphs of the composition. */
if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
{
@@ -339,7 +370,7 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
for (i = 1; i < glyph_len; i += 2)
{
- int rule, gref, nref;
+ int rule, gref, nref, xoff, yoff;
int this_width;
float this_left;
@@ -361,7 +392,7 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
| |
6---7---8 -- descent
*/
- COMPOSITION_DECODE_RULE (rule, gref, nref);
+ COMPOSITION_DECODE_RULE (rule, gref, nref, xoff, yoff);
this_left = (leftmost
+ (gref % 3) * (rightmost - leftmost) / 2.0
- (nref % 3) * this_width / 2.0);
@@ -407,7 +438,8 @@ get_composition_id (charpos, bytepos, nchars, prop, string)
int
find_composition (pos, limit, start, end, prop, object)
- int pos, limit, *start, *end;
+ int pos, limit;
+ EMACS_INT *start, *end;
Lisp_Object *prop, object;
{
Lisp_Object val;
@@ -451,7 +483,7 @@ run_composition_function (from, to, prop)
Lisp_Object prop;
{
Lisp_Object func;
- int start, end;
+ EMACS_INT start, end;
func = COMPOSITION_MODIFICATION_FUNC (prop);
/* If an invalid composition precedes or follows, try to make them
@@ -466,24 +498,29 @@ run_composition_function (from, to, prop)
to = end;
if (!NILP (Ffboundp (func)))
call2 (func, make_number (from), make_number (to));
- else if (!NILP (Ffboundp (Vcompose_chars_after_function)))
- call3 (Vcompose_chars_after_function,
- make_number (from), make_number (to), Qnil);
}
/* Make invalid compositions adjacent to or inside FROM and TO valid.
CHECK_MASK is bitwise `or' of mask bits defined by macros
CHECK_XXX (see the comment in composite.h).
+ It also resets the text-property `auto-composed' to a proper region
+ so that automatic character composition works correctly later while
+ displaying the region.
+
This function is called when a buffer text is changed. If the
change is deletion, FROM == TO. Otherwise, FROM < TO. */
void
update_compositions (from, to, check_mask)
- int from, to, check_mask;
+ EMACS_INT from, to;
+ int check_mask;
{
Lisp_Object prop;
- int start, end;
+ EMACS_INT start, end;
+ /* The beginning and end of the region to set the property
+ `auto-composed' to nil. */
+ EMACS_INT min_pos = from, max_pos = to;
if (inhibit_modification_hooks)
return;
@@ -502,6 +539,9 @@ update_compositions (from, to, check_mask)
if (from > BEGV
&& find_composition (from - 1, -1, &start, &end, &prop, Qnil))
{
+ min_pos = start;
+ if (end > to)
+ max_pos = end;
if (from < end)
Fput_text_property (make_number (from), make_number (end),
Qcomposition,
@@ -511,7 +551,11 @@ update_compositions (from, to, check_mask)
}
else if (from < ZV
&& find_composition (from, -1, &start, &from, &prop, Qnil))
- run_composition_function (start, from, prop);
+ {
+ if (from > to)
+ max_pos = from;
+ run_composition_function (start, from, prop);
+ }
}
if (check_mask & CHECK_INSIDE)
@@ -536,14 +580,32 @@ update_compositions (from, to, check_mask)
To avoid it, in such a case, we change the property of
the former to the copy of it. */
if (to < end)
- Fput_text_property (make_number (start), make_number (to),
- Qcomposition,
- Fcons (XCAR (prop), XCDR (prop)), Qnil);
+ {
+ Fput_text_property (make_number (start), make_number (to),
+ Qcomposition,
+ Fcons (XCAR (prop), XCDR (prop)), Qnil);
+ max_pos = end;
+ }
run_composition_function (start, end, prop);
}
else if (to < ZV
&& find_composition (to, -1, &start, &end, &prop, Qnil))
- run_composition_function (start, end, prop);
+ {
+ run_composition_function (start, end, prop);
+ max_pos = end;
+ }
+ }
+ if (min_pos < max_pos)
+ {
+ int count = SPECPDL_INDEX ();
+
+ specbind (Qinhibit_read_only, Qt);
+ specbind (Qinhibit_modification_hooks, Qt);
+ specbind (Qinhibit_point_motion_hooks, Qt);
+ Fremove_list_of_text_properties (make_number (min_pos),
+ make_number (max_pos),
+ Fcons (Qauto_composed, Qnil), Qnil);
+ unbind_to (count, Qnil);
}
}
@@ -590,7 +652,6 @@ compose_text (start, end, components, modification_func, string)
Fput_text_property (make_number (start), make_number (end),
Qcomposition, prop, string);
}
-
/* Emacs Lisp APIs. */
@@ -648,7 +709,7 @@ See `find-composition' for more detail. */)
Lisp_Object pos, limit, string, detail_p;
{
Lisp_Object prop, tail;
- int start, end;
+ EMACS_INT start, end;
int id;
CHECK_NUMBER_COERCE_MARKER (pos);
@@ -727,12 +788,12 @@ syms_of_composite ()
args[0] = QCtest;
args[1] = Qequal;
+ args[2] = QCweakness;
/* We used to make the hash table weak so that unreferenced
compostions can be garbage-collected. But, usually once
created compositions are repeatedly used in an Emacs session,
and thus it's not worth to save memory in such a way. So, we
make the table not weak. */
- args[2] = QCweakness;
args[3] = Qnil;
args[4] = QCsize;
args[5] = make_number (311);
@@ -758,29 +819,24 @@ valid.
The default value is the function `compose-chars-after'. */);
Vcompose_chars_after_function = intern ("compose-chars-after");
- Qcomposition_function_table = intern ("composition-function-table");
- staticpro (&Qcomposition_function_table);
-
- /* Intern this now in case it isn't already done.
- Setting this variable twice is harmless.
- But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qauto_composed = intern ("auto-composed");
+ staticpro (&Qauto_composed);
- Fput (Qcomposition_function_table, Qchar_table_extra_slots, make_number (0));
+ Qauto_composition_function = intern ("auto-composition-function");
+ staticpro (&Qauto_composition_function);
- DEFVAR_LISP ("composition-function-table", &Vcomposition_function_table,
- doc: /* Char table of patterns and functions to make a composition.
+ DEFVAR_LISP ("auto-composition-function", &Vauto_composition_function,
+ doc: /* Function to call to compose characters automatically.
+The function is called from the display routine with two arguments,
+POS and STRING.
-Each element is nil or an alist of PATTERNs vs FUNCs, where PATTERNs
-are regular expressions and FUNCs are functions. FUNC is responsible
-for composing text matching the corresponding PATTERN. FUNC is called
-with three arguments FROM, TO, and PATTERN. See the function
-`compose-chars-after' for more detail.
+If STRING is nil, the function must compose characters following POS
+in the current buffer.
-This table is looked up by the first character of a composition when
-the composition gets invalid after a change in a buffer. */);
- Vcomposition_function_table
- = Fmake_char_table (Qcomposition_function_table, Qnil);
+Otherwise, STRING is a string, and POS is an index to the string. In
+this case, the function must compose characters following POS in
+the string. */);
+ Vauto_composition_function = Qnil;
defsubr (&Scompose_region_internal);
defsubr (&Scompose_string_internal);
diff --git a/src/composite.h b/src/composite.h
index 5c19f8aa56e..08463afd899 100644
--- a/src/composite.h
+++ b/src/composite.h
@@ -4,6 +4,9 @@
Copyright (C) 1997
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -25,22 +28,24 @@ Boston, MA 02110-1301, USA. */
#ifndef EMACS_COMPOSITE_H
#define EMACS_COMPOSITE_H
-/* Methods to display a sequence of components a composition. */
+/* Methods to display a sequence of components of a composition. */
enum composition_method {
- /* The first two are actually not methods, but used in code
- conversion to specify the current composing status. */
- COMPOSITION_DISABLED, /* Never handle composition data */
- COMPOSITION_NO, /* Not processing composition data */
/* Compose relatively without alternate characters. */
COMPOSITION_RELATIVE,
- /* Compose by specified composition rule. This is not used in Emacs
- 21 but we need it to decode files saved in the older versions of
- Emacs. */
+ /* Compose by specified composition rules. This is not used in
+ Emacs 21 but we need it to decode files saved in the older
+ versions of Emacs. */
COMPOSITION_WITH_RULE,
/* Compose relatively with alternate characters. */
COMPOSITION_WITH_ALTCHARS,
- /* Compose by specified composition rule with alternate characters. */
- COMPOSITION_WITH_RULE_ALTCHARS
+ /* Compose by specified composition rules with alternate characters. */
+ COMPOSITION_WITH_RULE_ALTCHARS,
+#ifdef USE_FONT_BACKEND
+ /* Compose by specified lispy glyph-string. */
+ COMPOSITION_WITH_GLYPH_STRING,
+#endif /* USE_FONT_BACKEND */
+ /* This is not a method. */
+ COMPOSITION_NO
};
/* Maximum number of compoments a single composition can have. */
@@ -128,13 +133,19 @@ extern Lisp_Object composition_temp;
->contents[(n) * 2 - 1])
/* Decode encoded composition rule RULE_CODE into GREF (global
- reference point code) and NREF (new reference point code). Don't
- check RULE_CODE, always set GREF and NREF to valid values. */
-#define COMPOSITION_DECODE_RULE(rule_code, gref, nref) \
- do { \
- gref = (rule_code) / 12; \
- if (gref > 12) gref = 11; \
- nref = (rule_code) % 12; \
+ reference point code), NREF (new reference point code), XOFF
+ (horizontal offset) YOFF (vertical offset). Don't check RULE_CODE,
+ always set GREF and NREF to valid values. By side effect,
+ RULE_CODE is modified. */
+
+#define COMPOSITION_DECODE_RULE(rule_code, gref, nref, xoff, yoff) \
+ do { \
+ xoff = (rule_code) >> 16; \
+ yoff = ((rule_code) >> 8) & 0xFF; \
+ rule_code &= 0xFF; \
+ gref = (rule_code) / 12; \
+ if (gref > 12) gref = 11; \
+ nref = (rule_code) % 12; \
} while (0)
/* Return encoded composition rule for the pair of global reference
@@ -161,6 +172,8 @@ struct composition {
/* Width, ascent, and descent pixels of the composition. */
short pixel_width, ascent, descent;
+ short lbearing, rbearing;
+
/* How many columns the overall glyphs occupy on the screen. This
gives an approximate value for column calculation in
Fcurrent_column, and etc. */
@@ -200,11 +213,14 @@ extern int n_compositions;
extern Lisp_Object Qcomposition;
extern Lisp_Object composition_hash_table;
+extern Lisp_Object Qauto_composed;
+extern Lisp_Object Vauto_composition_function;
+extern Lisp_Object Qauto_composition_function;
extern int get_composition_id P_ ((int, int, int, Lisp_Object, Lisp_Object));
-extern int find_composition P_ ((int, int, int *, int *, Lisp_Object *,
+extern int find_composition P_ ((int, int, EMACS_INT *, EMACS_INT *, Lisp_Object *,
Lisp_Object));
-extern void update_compositions P_ ((int, int, int));
+extern void update_compositions P_ ((EMACS_INT, EMACS_INT, int));
extern void make_composition_value_copy P_ ((Lisp_Object));
extern void compose_region P_ ((int, int, Lisp_Object, Lisp_Object,
Lisp_Object));
diff --git a/src/config.in b/src/config.in
index 09f7be65b3b..106aeea920c 100644
--- a/src/config.in
+++ b/src/config.in
@@ -155,6 +155,9 @@ Boston, MA 02110-1301, USA. */
/* Define to 1 if you have the `fpathconf' function. */
#undef HAVE_FPATHCONF
+/* Define to 1 if you have freetype and fontconfig libraries. */
+#undef HAVE_FREETYPE
+
/* Define to 1 if you have the `frexp' function. */
#undef HAVE_FREXP
@@ -338,6 +341,9 @@ Boston, MA 02110-1301, USA. */
/* Define to 1 if you have the `ncurses' library (-lncurses). */
#undef HAVE_LIBNCURSES
+/* Define to 1 if you have libotf library. */
+#undef HAVE_LIBOTF
+
/* Define to 1 if you have the <libpng/png.h> header file. */
#undef HAVE_LIBPNG_PNG_H
@@ -703,6 +709,9 @@ Boston, MA 02110-1301, USA. */
/* Define to 1 if you're using XFree386. */
#undef HAVE_XFREE386
+/* Define to 1 if you have the Xft library. */
+#undef HAVE_XFT
+
/* Define to 1 if XIM is available */
#undef HAVE_XIM
@@ -824,6 +833,9 @@ Boston, MA 02110-1301, USA. */
/* Define to the unexec source file name. */
#undef UNEXEC_SRC
+/* Define to 1 if we should use font-backend. */
+#undef USE_FONT_BACKEND
+
/* Define to 1 if we should use toolkit scroll bars. */
#undef USE_TOOLKIT_SCROLL_BARS
diff --git a/src/data.c b/src/data.c
index 761159ec066..4a846207073 100644
--- a/src/data.c
+++ b/src/data.c
@@ -25,7 +25,7 @@ Boston, MA 02110-1301, USA. */
#include <stdio.h>
#include "lisp.h"
#include "puresize.h"
-#include "charset.h"
+#include "character.h"
#include "buffer.h"
#include "keyboard.h"
#include "frame.h"
@@ -439,7 +439,7 @@ DEFUN ("char-or-string-p", Fchar_or_string_p, Schar_or_string_p, 1, 1, 0,
(object)
register Lisp_Object object;
{
- if (INTEGERP (object) || STRINGP (object))
+ if (CHARACTERP (object) || STRINGP (object))
return Qt;
return Qnil;
}
@@ -1957,96 +1957,8 @@ or a byte-code object. IDX starts at 0. */)
}
else if (CHAR_TABLE_P (array))
{
- Lisp_Object val;
-
- val = Qnil;
-
- if (idxval < 0)
- args_out_of_range (array, idx);
- if (idxval < CHAR_TABLE_ORDINARY_SLOTS)
- {
- if (! SINGLE_BYTE_CHAR_P (idxval))
- args_out_of_range (array, idx);
- /* For ASCII and 8-bit European characters, the element is
- stored in the top table. */
- val = XCHAR_TABLE (array)->contents[idxval];
- if (NILP (val))
- {
- int default_slot
- = (idxval < 0x80 ? CHAR_TABLE_DEFAULT_SLOT_ASCII
- : idxval < 0xA0 ? CHAR_TABLE_DEFAULT_SLOT_8_BIT_CONTROL
- : CHAR_TABLE_DEFAULT_SLOT_8_BIT_GRAPHIC);
- val = XCHAR_TABLE (array)->contents[default_slot];
- }
- if (NILP (val))
- val = XCHAR_TABLE (array)->defalt;
- while (NILP (val)) /* Follow parents until we find some value. */
- {
- array = XCHAR_TABLE (array)->parent;
- if (NILP (array))
- return Qnil;
- val = XCHAR_TABLE (array)->contents[idxval];
- if (NILP (val))
- val = XCHAR_TABLE (array)->defalt;
- }
- return val;
- }
- else
- {
- int code[4], i;
- Lisp_Object sub_table;
- Lisp_Object current_default;
-
- SPLIT_CHAR (idxval, code[0], code[1], code[2]);
- if (code[1] < 32) code[1] = -1;
- else if (code[2] < 32) code[2] = -1;
-
- /* Here, the possible range of CODE[0] (== charset ID) is
- 128..MAX_CHARSET. Since the top level char table contains
- data for multibyte characters after 256th element, we must
- increment CODE[0] by 128 to get a correct index. */
- code[0] += 128;
- code[3] = -1; /* anchor */
-
- try_parent_char_table:
- current_default = XCHAR_TABLE (array)->defalt;
- sub_table = array;
- for (i = 0; code[i] >= 0; i++)
- {
- val = XCHAR_TABLE (sub_table)->contents[code[i]];
- if (SUB_CHAR_TABLE_P (val))
- {
- sub_table = val;
- if (! NILP (XCHAR_TABLE (sub_table)->defalt))
- current_default = XCHAR_TABLE (sub_table)->defalt;
- }
- else
- {
- if (NILP (val))
- val = current_default;
- if (NILP (val))
- {
- array = XCHAR_TABLE (array)->parent;
- if (!NILP (array))
- goto try_parent_char_table;
- }
- return val;
- }
- }
- /* Reaching here means IDXVAL is a generic character in
- which each character or a group has independent value.
- Essentially it's nonsense to get a value for such a
- generic character, but for backward compatibility, we try
- the default value and parent. */
- val = current_default;
- if (NILP (val))
- {
- array = XCHAR_TABLE (array)->parent;
- if (!NILP (array))
- goto try_parent_char_table;
- }
- return val;
- }
+ CHECK_CHARACTER (idx);
+ return CHAR_TABLE_REF (array, idxval);
}
else
{
@@ -2102,45 +2014,8 @@ bool-vector. IDX starts at 0. */)
}
else if (CHAR_TABLE_P (array))
{
- if (idxval < 0)
- args_out_of_range (array, idx);
- if (idxval < CHAR_TABLE_ORDINARY_SLOTS)
- {
- if (! SINGLE_BYTE_CHAR_P (idxval))
- args_out_of_range (array, idx);
- XCHAR_TABLE (array)->contents[idxval] = newelt;
- }
- else
- {
- int code[4], i;
- Lisp_Object val;
-
- SPLIT_CHAR (idxval, code[0], code[1], code[2]);
- if (code[1] < 32) code[1] = -1;
- else if (code[2] < 32) code[2] = -1;
-
- /* See the comment of the corresponding part in Faref. */
- code[0] += 128;
- code[3] = -1; /* anchor */
- for (i = 0; code[i + 1] >= 0; i++)
- {
- val = XCHAR_TABLE (array)->contents[code[i]];
- if (SUB_CHAR_TABLE_P (val))
- array = val;
- else
- {
- Lisp_Object temp;
-
- /* VAL is a leaf. Create a sub char table with the
- initial value VAL and look into it. */
-
- temp = make_sub_char_table (val);
- XCHAR_TABLE (array)->contents[code[i]] = temp;
- array = temp;
- }
- }
- XCHAR_TABLE (array)->contents[code[i]] = newelt;
- }
+ CHECK_CHARACTER (idx);
+ CHAR_TABLE_SET (array, idxval, newelt);
}
else if (STRING_MULTIBYTE (array))
{
@@ -2149,7 +2024,7 @@ bool-vector. IDX starts at 0. */)
if (idxval < 0 || idxval >= SCHARS (array))
args_out_of_range (array, idx);
- CHECK_NUMBER (newelt);
+ CHECK_CHARACTER (newelt);
nbytes = SBYTES (array);
@@ -2184,38 +2059,9 @@ bool-vector. IDX starts at 0. */)
args_out_of_range (array, idx);
CHECK_NUMBER (newelt);
- if (XINT (newelt) < 0 || SINGLE_BYTE_CHAR_P (XINT (newelt)))
- SSET (array, idxval, XINT (newelt));
- else
- {
- /* We must relocate the string data while converting it to
- multibyte. */
- int idxval_byte, prev_bytes, new_bytes;
- unsigned char workbuf[MAX_MULTIBYTE_LENGTH], *p0 = workbuf, *p1;
- unsigned char *origstr = SDATA (array), *str;
- int nchars, nbytes;
- USE_SAFE_ALLOCA;
-
- nchars = SCHARS (array);
- nbytes = idxval_byte = count_size_as_multibyte (origstr, idxval);
- nbytes += count_size_as_multibyte (origstr + idxval,
- nchars - idxval);
- SAFE_ALLOCA (str, unsigned char *, nbytes);
- copy_text (SDATA (array), str, nchars, 0, 1);
- PARSE_MULTIBYTE_SEQ (str + idxval_byte, nbytes - idxval_byte,
- prev_bytes);
- new_bytes = CHAR_STRING (XINT (newelt), p0);
- allocate_string_data (XSTRING (array), nchars,
- nbytes + new_bytes - prev_bytes);
- bcopy (str, SDATA (array), idxval_byte);
- p1 = SDATA (array) + idxval_byte;
- while (new_bytes--)
- *p1++ = *p0++;
- bcopy (str + idxval_byte + prev_bytes, p1,
- nbytes - (idxval_byte + prev_bytes));
- SAFE_FREE ();
- clear_string_char_byte_cache ();
- }
+ if (XINT (newelt) >= 0 && ! SINGLE_BYTE_CHAR_P (XINT (newelt)))
+ args_out_of_range (array, newelt);
+ SSET (array, idxval, XINT (newelt));
}
return newelt;
diff --git a/src/dired.c b/src/dired.c
index 17a80a3ce4b..1aea81c2a21 100644
--- a/src/dired.c
+++ b/src/dired.c
@@ -96,6 +96,7 @@ extern struct direct *readdir ();
#include "systime.h"
#include "buffer.h"
#include "commands.h"
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "regex.h"
diff --git a/src/dispextern.h b/src/dispextern.h
index 6aff0dc1803..3cd9eb6052a 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1220,6 +1220,11 @@ struct glyph_string
*clip_tail, not including their overhangs. */
struct glyph_string *clip_head, *clip_tail;
+#ifdef USE_FONT_BACKEND
+ /* The current clipping area. */
+ int clip_x, clip_y, clip_width, clip_height;
+#endif /* USE_FONT_BACKEND */
+
struct glyph_string *next, *prev;
};
@@ -1391,6 +1396,7 @@ enum lface_attribute_index
LFACE_FONT_INDEX,
LFACE_INHERIT_INDEX,
LFACE_AVGWIDTH_INDEX,
+ LFACE_FONTSET_INDEX,
LFACE_VECTOR_SIZE
};
@@ -1475,10 +1481,12 @@ struct face
reallocated. */
int font_info_id;
- /* Fontset ID if this face uses a fontset, or -1. This is only >= 0
- if the face was realized for a composition sequence.
- Otherwise, a specific font is loaded from the set of fonts
- specified by the fontset given by the family attribute of the face. */
+#ifdef USE_FONT_BACKEND
+ struct font_info *font_info;
+#endif /* USE_FONT_BACKEND */
+
+ /* Fontset ID if for this face's fontset. Non-ASCII faces derived
+ from the same ASCII face have the same fontset. */
int fontset;
/* Pixmap width and height. */
@@ -1510,13 +1518,6 @@ struct face
/* The hash value of this face. */
unsigned hash;
- /* The charset for which this face was realized if it was realized
- for use in multibyte text. If fontset >= 0, this is the charset
- of the first character of the composition sequence. A value of
- charset < 0 means the face was realized for use in unibyte text
- where the idea of Emacs charsets isn't applicable. */
- int charset;
-
/* Non-zero if text in this face should be underlined, overlined,
strike-through or have a box drawn around it. */
unsigned underline_p : 1;
@@ -1562,9 +1563,15 @@ struct face
/* Next and previous face in hash collision list of face cache. */
struct face *next, *prev;
- /* If this face is for ASCII characters, this points this face
- itself. Otherwise, this points a face for ASCII characters. */
+ /* If this face is an ASCII face, this points to this face itself.
+ Otherwise, this points to an ASCII face that has the same
+ attributes except the font. */
struct face *ascii_face;
+
+#ifdef USE_FONT_BACKEND
+ /* Extra member that a font-driver uses privately. */
+ void *extra;
+#endif /* USE_FONT_BACKEND */
};
@@ -1652,7 +1659,7 @@ struct face_cache
/* Non-zero if FACE is suitable for displaying character CHAR. */
#define FACE_SUITABLE_FOR_CHAR_P(FACE, CHAR) \
- (SINGLE_BYTE_CHAR_P (CHAR) \
+ (ASCII_CHAR_P (CHAR) \
? (FACE) == (FACE)->ascii_face \
: face_suitable_for_char_p ((FACE), (CHAR)))
@@ -1660,15 +1667,15 @@ struct face_cache
with id ID but is suitable for displaying character CHAR.
This macro is only meaningful for multibyte character CHAR. */
-#define FACE_FOR_CHAR(F, FACE, CHAR) \
- (SINGLE_BYTE_CHAR_P (CHAR) \
- ? (FACE)->ascii_face->id \
- : face_for_char ((F), (FACE), (CHAR)))
+#define FACE_FOR_CHAR(F, FACE, CHAR, POS, OBJECT) \
+ (ASCII_CHAR_P (CHAR) \
+ ? (FACE)->ascii_face->id \
+ : face_for_char ((F), (FACE), (CHAR), (POS), (OBJECT)))
#else /* not HAVE_WINDOW_SYSTEM */
#define FACE_SUITABLE_FOR_CHAR_P(FACE, CHAR) 1
-#define FACE_FOR_CHAR(F, FACE, CHAR) ((FACE)->id)
+#define FACE_FOR_CHAR(F, FACE, CHAR, POS, OBJECT) ((FACE)->id)
#endif /* not HAVE_WINDOW_SYSTEM */
@@ -1786,6 +1793,7 @@ enum display_element_type
enum prop_idx
{
+ AUTO_COMPOSED_PROP_IDX,
FONTIFIED_PROP_IDX,
FACE_PROP_IDX,
INVISIBLE_PROP_IDX,
@@ -2327,7 +2335,9 @@ struct redisplay_interface
the two-byte form of C. Encoding is returned in *CHAR2B. If
TWO_BYTE_P is non-null, return non-zero there if font is two-byte. */
int (*encode_char) P_ ((int c, XChar2b *char2b,
- struct font_info *font_into, int *two_byte_p));
+ struct font_info *font_into,
+ struct charset *charset,
+ int *two_byte_p));
/* Compute left and right overhang of glyph string S.
A NULL pointer if platform does not support this. */
@@ -2832,15 +2842,17 @@ void clear_face_cache P_ ((int));
unsigned long load_color P_ ((struct frame *, struct face *, Lisp_Object,
enum lface_attribute_index));
void unload_color P_ ((struct frame *, unsigned long));
-int face_font_available_p P_ ((struct frame *, Lisp_Object));
+char *choose_face_font P_ ((struct frame *, Lisp_Object *, Lisp_Object,
+ int *));
int ascii_face_of_lisp_face P_ ((struct frame *, int));
void prepare_face_for_display P_ ((struct frame *, struct face *));
int xstricmp P_ ((const unsigned char *, const unsigned char *));
-int lookup_face P_ ((struct frame *, Lisp_Object *, int, struct face *));
-int lookup_named_face P_ ((struct frame *, Lisp_Object, int, int));
+int lookup_face P_ ((struct frame *, Lisp_Object *));
+int lookup_non_ascii_face P_ ((struct frame *, int, struct face *));
+int lookup_named_face P_ ((struct frame *, Lisp_Object, int));
int smaller_face P_ ((struct frame *, int, int));
int face_with_height P_ ((struct frame *, int, int));
-int lookup_derived_face P_ ((struct frame *, Lisp_Object, int, int, int));
+int lookup_derived_face P_ ((struct frame *, Lisp_Object, int, int));
void init_frame_faces P_ ((struct frame *));
void free_frame_faces P_ ((struct frame *));
void recompute_basic_faces P_ ((struct frame *));
@@ -2851,10 +2863,12 @@ int face_at_string_position P_ ((struct window *, Lisp_Object, int, int, int,
int merge_faces P_ ((struct frame *, Lisp_Object, int, int));
int compute_char_face P_ ((struct frame *, int, Lisp_Object));
void free_all_realized_faces P_ ((Lisp_Object));
+void free_realized_face P_ ((struct frame *, struct face *));
extern Lisp_Object Qforeground_color, Qbackground_color;
extern Lisp_Object Qframe_set_background_mode;
extern char unspecified_fg[], unspecified_bg[];
-void free_realized_multibyte_face P_ ((struct frame *, int));
+extern Lisp_Object split_font_name_into_vector P_ ((Lisp_Object));
+extern Lisp_Object build_font_name_from_vector P_ ((Lisp_Object));
/* Defined in xfns.c */
diff --git a/src/dispnew.c b/src/dispnew.c
index 3cbd878bb13..f621aef273a 100644
--- a/src/dispnew.c
+++ b/src/dispnew.c
@@ -37,7 +37,7 @@ Boston, MA 02110-1301, USA. */
#include "dispextern.h"
#include "cm.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "keyboard.h"
#include "frame.h"
#include "window.h"
@@ -2283,7 +2283,6 @@ static void
adjust_frame_glyphs_for_frame_redisplay (f)
struct frame *f;
{
- struct dim ch_dim;
struct dim matrix_dim;
int pool_changed_p;
int window_change_flags;
@@ -2292,10 +2291,6 @@ adjust_frame_glyphs_for_frame_redisplay (f)
if (!FRAME_LIVE_P (f))
return;
- /* Determine the smallest character in any font for F. On
- console windows, all characters have dimension (1, 1). */
- ch_dim.width = ch_dim.height = 1;
-
top_window_y = FRAME_TOP_MARGIN (f);
/* Allocate glyph pool structures if not already done. */
@@ -2384,19 +2379,10 @@ static void
adjust_frame_glyphs_for_window_redisplay (f)
struct frame *f;
{
- struct dim ch_dim;
struct window *w;
xassert (FRAME_WINDOW_P (f) && FRAME_LIVE_P (f));
- /* Get minimum sizes. */
-#ifdef HAVE_WINDOW_SYSTEM
- ch_dim.width = FRAME_SMALLEST_CHAR_WIDTH (f);
- ch_dim.height = FRAME_SMALLEST_FONT_HEIGHT (f);
-#else
- ch_dim.width = ch_dim.height = 1;
-#endif
-
/* Allocate/reallocate window matrices. */
allocate_matrices_for_window_redisplay (XWINDOW (FRAME_ROOT_WINDOW (f)));
diff --git a/src/disptab.h b/src/disptab.h
index 80936fb1830..0d7a03c43ba 100644
--- a/src/disptab.h
+++ b/src/disptab.h
@@ -36,8 +36,14 @@ Boston, MA 02110-1301, USA. */
extern Lisp_Object disp_char_vector P_ ((struct Lisp_Char_Table *, int));
-#define DISP_CHAR_VECTOR(dp, c) \
- (SINGLE_BYTE_CHAR_P(c) ? (dp)->contents[c] : disp_char_vector ((dp), (c)))
+#define DISP_CHAR_VECTOR(dp, c) \
+ (ASCII_CHAR_P(c) \
+ ? (NILP ((dp)->ascii) \
+ ? (dp)->defalt \
+ : (SUB_CHAR_TABLE_P ((dp)->ascii) \
+ ? XSUB_CHAR_TABLE ((dp)->ascii)->contents[c] \
+ : (dp)->ascii)) \
+ : disp_char_vector ((dp), (c)))
/* Defined in window.c. */
extern struct Lisp_Char_Table *window_display_table P_ ((struct window *));
diff --git a/src/doc.c b/src/doc.c
index 93f372606a4..ecb0197b3ca 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -41,7 +41,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "buffer.h"
#include "keyboard.h"
-#include "charset.h"
+#include "character.h"
#include "keymap.h"
#ifdef HAVE_INDEX
diff --git a/src/doprnt.c b/src/doprnt.c
index 63c0261e6ae..6e71e261788 100644
--- a/src/doprnt.c
+++ b/src/doprnt.c
@@ -47,7 +47,7 @@ Boston, MA 02110-1301, USA. */
/* Since we use the macro CHAR_HEAD_P, we have to include this, but
don't have to include others because CHAR_HEAD_P does not contains
another macro. */
-#include "charset.h"
+#include "character.h"
static int doprnt1 ();
diff --git a/src/dosfns.c b/src/dosfns.c
index 22aaa62f0d1..2d5169dcf27 100644
--- a/src/dosfns.c
+++ b/src/dosfns.c
@@ -38,7 +38,7 @@ Boston, MA 02110-1301, USA. */
#include "dosfns.h"
#include "msdos.h"
#include "dispextern.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include <dpmi.h>
#include <go32.h>
diff --git a/src/editfns.c b/src/editfns.c
index aea044db068..46d661452b2 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -52,7 +52,7 @@ Boston, MA 02110-1301, USA. */
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "frame.h"
#include "window.h"
@@ -207,9 +207,7 @@ usage: (char-to-string CHAR) */)
CHECK_NUMBER (character);
- len = (SINGLE_BYTE_CHAR_P (XFASTINT (character))
- ? (*str = (unsigned char)(XFASTINT (character)), 1)
- : char_to_string (XFASTINT (character), str));
+ len = CHAR_STRING (XFASTINT (character), str);
return make_string_from_bytes (str, 1, len);
}
@@ -2149,7 +2147,7 @@ general_insert_function (insert_func, insert_from_string_func,
len = CHAR_STRING (XFASTINT (val), str);
else
{
- str[0] = (SINGLE_BYTE_CHAR_P (XINT (val))
+ str[0] = (ASCII_CHAR_P (XINT (val))
? XINT (val)
: multibyte_char_to_unibyte (XINT (val), Qnil));
len = 1;
@@ -2317,6 +2315,29 @@ from adjoining text, if those properties are sticky. */)
return Qnil;
}
+DEFUN ("insert-byte", Finsert_byte, Sinsert_byte, 2, 3, 0,
+ doc: /* Insert COUNT (second arg) copies of BYTE (first arg).
+Both arguments are required.
+BYTE is a number of the range 0..255.
+
+If BYTE is 128..255 and the current buffer is multibyte, the
+corresponding eight-bit character is inserted.
+
+Point, and before-insertion markers, are relocated as in the function `insert'.
+The optional third arg INHERIT, if non-nil, says to inherit text properties
+from adjoining text, if those properties are sticky. */)
+ (byte, count, inherit)
+ Lisp_Object byte, count, inherit;
+{
+ CHECK_NUMBER (byte);
+ if (XINT (byte) < 0 || XINT (byte) > 255)
+ args_out_of_range_3 (byte, make_number (0), make_number (255));
+ if (XINT (byte) >= 128
+ && ! NILP (current_buffer->enable_multibyte_characters))
+ XSETFASTINT (byte, BYTE8_TO_CHAR (XINT (byte)));
+ return Finsert_char (byte, count, inherit);
+}
+
/* Making strings from buffer contents. */
@@ -2864,12 +2885,73 @@ Both characters must have the same length of multi-byte form. */)
return Qnil;
}
+
+static Lisp_Object check_translation P_ ((int, int, int, Lisp_Object));
+
+/* Helper function for Ftranslate_region_internal.
+
+ Check if a character sequence at POS (POS_BYTE) matches an element
+ of VAL. VAL is a list (([FROM-CHAR ...] . TO) ...). If a matching
+ element is found, return it. Otherwise return Qnil. */
+
+static Lisp_Object
+check_translation (pos, pos_byte, end, val)
+ int pos, pos_byte, end;
+ Lisp_Object val;
+{
+ int buf_size = 16, buf_used = 0;
+ int *buf = alloca (sizeof (int) * buf_size);
+
+ for (; CONSP (val); val = XCDR (val))
+ {
+ Lisp_Object elt;
+ int len, i;
+
+ elt = XCAR (val);
+ if (! CONSP (elt))
+ continue;
+ elt = XCAR (elt);
+ if (! VECTORP (elt))
+ continue;
+ len = ASIZE (elt);
+ if (len <= end - pos)
+ {
+ for (i = 0; i < len; i++)
+ {
+ if (buf_used <= i)
+ {
+ unsigned char *p = BYTE_POS_ADDR (pos_byte);
+ int len;
+
+ if (buf_used == buf_size)
+ {
+ int *newbuf;
+
+ buf_size += 16;
+ newbuf = alloca (sizeof (int) * buf_size);
+ memcpy (newbuf, buf, sizeof (int) * buf_used);
+ buf = newbuf;
+ }
+ buf[buf_used++] = STRING_CHAR_AND_LENGTH (p, 0, len);
+ pos_byte += len;
+ }
+ if (XINT (AREF (elt, i)) != buf[i])
+ break;
+ }
+ if (i == len)
+ return XCAR (val);
+ }
+ }
+ return Qnil;
+}
+
+
DEFUN ("translate-region-internal", Ftranslate_region_internal,
Stranslate_region_internal, 3, 3, 0,
doc: /* Internal use only.
From START to END, translate characters according to TABLE.
-TABLE is a string; the Nth character in it is the mapping
-for the character with code N.
+TABLE is a string or a char-table; the Nth character in it is the
+mapping for the character with code N.
It returns the number of characters changed. */)
(start, end, table)
Lisp_Object start;
@@ -2883,10 +2965,13 @@ It returns the number of characters changed. */)
int pos, pos_byte, end_pos;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int string_multibyte;
+ Lisp_Object val;
validate_region (&start, &end);
if (CHAR_TABLE_P (table))
{
+ if (! EQ (XCHAR_TABLE (table)->purpose, Qtranslation_table))
+ error ("Not a translation table");
size = MAX_CHAR;
tt = NULL;
}
@@ -2897,14 +2982,14 @@ It returns the number of characters changed. */)
if (! multibyte && (SCHARS (table) < SBYTES (table)))
table = string_make_unibyte (table);
string_multibyte = SCHARS (table) < SBYTES (table);
- size = SCHARS (table);
+ size = SBYTES (table);
tt = SDATA (table);
}
pos = XINT (start);
pos_byte = CHAR_TO_BYTE (pos);
end_pos = XINT (end);
- modify_region (current_buffer, pos, XINT (end));
+ modify_region (current_buffer, pos, end_pos);
cnt = 0;
for (; pos < end_pos; )
@@ -2913,6 +2998,7 @@ It returns the number of characters changed. */)
unsigned char *str, buf[MAX_MULTIBYTE_LENGTH];
int len, str_len;
int oc;
+ Lisp_Object val;
if (multibyte)
oc = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
@@ -2927,7 +3013,7 @@ It returns the number of characters changed. */)
if (string_multibyte)
{
str = tt + string_char_to_byte (table, oc);
- nc = STRING_CHAR_AND_LENGTH (str, MAX_MULTIBYTE_LENGTH,
+ nc = STRING_CHAR_AND_LENGTH (str, MAX_MULTIBYTE_LENGTH,
str_len);
}
else
@@ -2935,7 +3021,7 @@ It returns the number of characters changed. */)
nc = tt[oc];
if (! ASCII_BYTE_P (nc) && multibyte)
{
- str_len = CHAR_STRING (nc, buf);
+ str_len = BYTE8_STRING (nc, buf);
str = buf;
}
else
@@ -2947,28 +3033,34 @@ It returns the number of characters changed. */)
}
else
{
- Lisp_Object val;
int c;
nc = oc;
val = CHAR_TABLE_REF (table, oc);
- if (INTEGERP (val)
+ if (CHARACTERP (val)
&& (c = XINT (val), CHAR_VALID_P (c, 0)))
{
nc = c;
str_len = CHAR_STRING (nc, buf);
str = buf;
}
+ else if (VECTORP (val) || (CONSP (val)))
+ {
+ /* VAL is [TO_CHAR ...] or (([FROM-CHAR ...] . TO) ...)
+ where TO is TO-CHAR or [TO-CHAR ...]. */
+ nc = -1;
+ }
}
- if (nc != oc)
+ if (nc != oc && nc >= 0)
{
+ /* Simple one char to one char translation. */
if (len != str_len)
{
Lisp_Object string;
/* This is less efficient, because it moves the gap,
- but it should multibyte characters correctly. */
+ but it should handle multibyte characters correctly. */
string = make_multibyte_string (str, 1, str_len);
replace_range (pos, pos + 1, string, 1, 0, 1);
len = str_len;
@@ -2983,6 +3075,46 @@ It returns the number of characters changed. */)
}
++cnt;
}
+ else if (nc < 0)
+ {
+ Lisp_Object string;
+
+ if (CONSP (val))
+ {
+ val = check_translation (pos, pos_byte, end_pos, val);
+ if (NILP (val))
+ {
+ pos_byte += len;
+ pos++;
+ continue;
+ }
+ /* VAL is ([FROM-CHAR ...] . TO). */
+ len = ASIZE (XCAR (val));
+ val = XCDR (val);
+ }
+ else
+ len = 1;
+
+ if (VECTORP (val))
+ {
+ int i;
+
+ string = Fmake_string (make_number (ASIZE (val)),
+ AREF (val, 0));
+ for (i = 1; i < ASIZE (val); i++)
+ Faset (string, make_number (i), AREF (val, i));
+ }
+ else
+ {
+ string = Fmake_string (make_number (1), val);
+ }
+ replace_range (pos, pos + len, string, 1, 0, 1);
+ pos_byte += SBYTES (string);
+ pos += SCHARS (string);
+ cnt += SCHARS (string);
+ end_pos += SCHARS (string) - len;
+ continue;
+ }
}
pos_byte += len;
pos++;
@@ -3574,8 +3706,8 @@ usage: (format STRING &rest OBJECTS) */)
thissize = 30;
if (*format == 'c')
{
- if (! SINGLE_BYTE_CHAR_P (XINT (args[n]))
- /* Note: No one can remember why we have to treat
+ if (! ASCII_CHAR_P (XINT (args[n]))
+ /* Note: No one can remeber why we have to treat
the character 0 as a multibyte character here.
But, until it causes a real problem, let's
don't change it. */
@@ -3961,8 +4093,20 @@ Case is ignored if `case-fold-search' is non-nil in the current buffer. */)
/* Do these in separate statements,
then compare the variables.
because of the way DOWNCASE uses temp variables. */
- i1 = DOWNCASE (XFASTINT (c1));
- i2 = DOWNCASE (XFASTINT (c2));
+ i1 = XFASTINT (c1);
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! ASCII_CHAR_P (i1))
+ {
+ MAKE_CHAR_MULTIBYTE (i1);
+ }
+ i2 = XFASTINT (c2);
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! ASCII_CHAR_P (i2))
+ {
+ MAKE_CHAR_MULTIBYTE (i2);
+ }
+ i1 = DOWNCASE (i1);
+ i2 = DOWNCASE (i2);
return (i1 == i2 ? Qt : Qnil);
}
@@ -4442,6 +4586,7 @@ functions if all the text being accessed has this property. */);
defsubr (&Sinsert_and_inherit);
defsubr (&Sinsert_and_inherit_before_markers);
defsubr (&Sinsert_char);
+ defsubr (&Sinsert_byte);
defsubr (&Suser_login_name);
defsubr (&Suser_real_login_name);
diff --git a/src/emacs.c b/src/emacs.c
index 1e817112f48..2bacb1abbed 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -821,6 +821,9 @@ bug_reporting_address ()
return count >= 3 ? REPORT_EMACS_BUG_PRETEST_ADDRESS : REPORT_EMACS_BUG_ADDRESS;
}
+#ifdef USE_FONT_BACKEND
+extern int enable_font_backend;
+#endif /* USE_FONT_BACKEND */
/* ARGSUSED */
int
@@ -1302,6 +1305,7 @@ main (argc, argv
init_alloc_once ();
init_obarray ();
init_eval_once ();
+ init_character_once ();
init_charset_once ();
init_coding_once ();
init_syntax_once (); /* Create standard syntax table. */
@@ -1420,12 +1424,15 @@ main (argc, argv
Lisp_Object buffer;
buffer = Fcdr (XCAR (tail));
- /* Verify that all buffers are empty now, as they
- ought to be. */
- if (BUF_Z (XBUFFER (buffer)) > BUF_BEG (XBUFFER (buffer)))
- abort ();
- /* It is safe to do this crudely in an empty buffer. */
- XBUFFER (buffer)->enable_multibyte_characters = Qnil;
+ /* Make a multibyte buffer unibyte. */
+ if (BUF_Z_BYTE (XBUFFER (buffer)) > BUF_Z (XBUFFER (buffer)))
+ {
+ struct buffer *current = current_buffer;
+
+ set_buffer_temp (XBUFFER (buffer));
+ Fset_buffer_multibyte (Qnil);
+ set_buffer_temp (current);
+ }
}
}
}
@@ -1433,6 +1440,12 @@ main (argc, argv
no_loadup
= argmatch (argv, argc, "-nl", "--no-loadup", 6, NULL, &skip_args);
+#ifdef USE_FONT_BACKEND
+ enable_font_backend = 0;
+ if (argmatch (argv, argc, "-enable-font-backend", "--enable-font-backend",
+ 4, NULL, &skip_args))
+ enable_font_backend = 1;
+#endif /* USE_FONT_BACKEND */
#ifdef HAVE_X_WINDOWS
/* Stupid kludge to catch command-line display spec. We can't
@@ -1554,6 +1567,7 @@ main (argc, argv
syms_of_data ();
#endif
syms_of_alloc ();
+ syms_of_chartab ();
syms_of_lread ();
syms_of_print ();
syms_of_eval ();
@@ -1572,6 +1586,7 @@ main (argc, argv
/* Called before init_window_once for Mac OS Classic. */
syms_of_ccl ();
#endif
+ syms_of_character ();
syms_of_charset ();
syms_of_cmds ();
#ifndef NO_DIR_LIBRARY
@@ -1618,6 +1633,9 @@ main (argc, argv
syms_of_window ();
syms_of_xdisp ();
#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+ syms_of_font ();
+#endif /* USE_FONT_BACKEND */
syms_of_fringe ();
syms_of_image ();
#endif /* HAVE_WINDOW_SYSTEM */
@@ -1683,6 +1701,8 @@ main (argc, argv
#endif /* HAVE_NTGUI */
}
+ init_charset ();
+
if (!noninteractive)
{
#ifdef VMS
@@ -1829,6 +1849,7 @@ struct standard_args standard_args[] =
{ "-unibyte", "--unibyte", 81, 0 },
{ "-no-multibyte", "--no-multibyte", 80, 0 },
{ "-nl", "--no-loadup", 70, 0 },
+ { "-enable-font-backend", "--enable-font-backend", 65, 0 },
/* -d must come last before the options handled in startup.el. */
{ "-d", "--display", 60, 1 },
{ "-display", 0, 60, 1 },
diff --git a/src/fileio.c b/src/fileio.c
index d26b2808726..a8408927f5c 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -74,7 +74,7 @@ extern int errno;
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "window.h"
#include "blockinput.h"
@@ -271,9 +271,12 @@ report_file_error (string, data)
{
Lisp_Object errstring;
int errorno = errno;
+ char *str;
synchronize_system_messages_locale ();
- errstring = code_convert_string_norecord (build_string (strerror (errorno)),
+ str = strerror (errorno);
+ errstring = code_convert_string_norecord (make_unibyte_string (str,
+ strlen (str)),
Vlocale_coding_system, 0);
while (1)
@@ -311,6 +314,7 @@ restore_point_unwind (location)
Fset_marker (location, Qnil, Qnil);
return Qnil;
}
+
Lisp_Object Qexpand_file_name;
Lisp_Object Qsubstitute_in_file_name;
@@ -2291,7 +2295,8 @@ duplicates what `expand-file-name' does. */)
convert what we substitute into multibyte. */
while (*o)
{
- int c = unibyte_char_to_multibyte (*o++);
+ int c = *o++;
+ c = unibyte_char_to_multibyte (c);
x += CHAR_STRING (c, x);
}
}
@@ -3765,7 +3770,7 @@ actually used. */)
unsigned char buffer[1 << 14];
int replace_handled = 0;
int set_coding_system = 0;
- int coding_system_decided = 0;
+ Lisp_Object coding_system;
int read_quit = 0;
Lisp_Object old_Vdeactivate_mark = Vdeactivate_mark;
int we_locked_file = 0;
@@ -3785,6 +3790,10 @@ actually used. */)
CHECK_STRING (filename);
filename = Fexpand_file_name (filename, Qnil);
+ /* The value Qnil means that the coding system is not yet
+ decided. */
+ coding_system = Qnil;
+
/* If the file name has special constructs in it,
call the corresponding file handler. */
handler = Ffind_file_name_handler (filename, Qinsert_file_contents);
@@ -3908,27 +3917,18 @@ actually used. */)
if (EQ (Vcoding_system_for_read, Qauto_save_coding))
{
- /* We use emacs-mule for auto saving... */
- setup_coding_system (Qemacs_mule, &coding);
- /* ... but with the special flag to indicate to read in a
- multibyte sequence for eight-bit-control char as is. */
- coding.flags = 1;
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- coding.eol_type = CODING_EOL_LF;
- coding_system_decided = 1;
+ coding_system = Qutf_8_emacs;
+ setup_coding_system (coding_system, &coding);
+ /* Ensure we set Vlast_coding_system_used. */
+ set_coding_system = 1;
}
else if (BEG < Z)
{
/* Decide the coding system to use for reading the file now
because we can't use an optimized method for handling
`coding:' tag if the current buffer is not empty. */
- Lisp_Object val;
- val = Qnil;
-
if (!NILP (Vcoding_system_for_read))
- val = Vcoding_system_for_read;
+ coding_system = Vcoding_system_for_read;
else
{
/* Don't try looking inside a file for a coding system
@@ -3984,8 +3984,8 @@ actually used. */)
insert_1_both (read_buf, nread, nread, 0, 0, 0);
TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
- val = call2 (Vset_auto_coding_function,
- filename, make_number (nread));
+ coding_system = call2 (Vset_auto_coding_function,
+ filename, make_number (nread));
set_buffer_internal (prev);
/* Discard the unwind protect for recovering the
@@ -3999,34 +3999,33 @@ actually used. */)
}
}
- if (NILP (val))
+ if (NILP (coding_system))
{
/* If we have not yet decided a coding system, check
file-coding-system-alist. */
- Lisp_Object args[6], coding_systems;
+ Lisp_Object args[6];
args[0] = Qinsert_file_contents, args[1] = orig_filename;
args[2] = visit, args[3] = beg, args[4] = end, args[5] = replace;
- coding_systems = Ffind_operation_coding_system (6, args);
- if (CONSP (coding_systems))
- val = XCAR (coding_systems);
+ coding_system = Ffind_operation_coding_system (6, args);
+ if (CONSP (coding_system))
+ coding_system = XCAR (coding_system);
}
}
- setup_coding_system (Fcheck_coding_system (val), &coding);
- /* Ensure we set Vlast_coding_system_used. */
- set_coding_system = 1;
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
- if (NILP (current_buffer->enable_multibyte_characters)
- && ! NILP (val))
+ if (NILP (current_buffer->enable_multibyte_characters))
/* We must suppress all character code conversion except for
end-of-line conversion. */
- setup_raw_text_coding_system (&coding);
+ coding_system = raw_text_coding_system (coding_system);
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- coding_system_decided = 1;
+ setup_coding_system (coding_system, &coding);
+ /* Ensure we set Vlast_coding_system_used. */
+ set_coding_system = 1;
}
/* If requested, replace the accessible part of the buffer
@@ -4045,7 +4044,8 @@ actually used. */)
and let the following if-statement handle the replace job. */
if (!NILP (replace)
&& BEGV < ZV
- && !(coding.common_flags & CODING_REQUIRE_DECODING_MASK))
+ && (NILP (coding_system)
+ || ! CODING_REQUIRE_DECODING (&coding)))
{
/* same_at_start and same_at_end count bytes,
because file access counts bytes
@@ -4080,21 +4080,15 @@ actually used. */)
else if (nread == 0)
break;
- if (coding.type == coding_type_undecided)
- detect_coding (&coding, buffer, nread);
- if (coding.common_flags & CODING_REQUIRE_DECODING_MASK)
- /* We found that the file should be decoded somehow.
- Let's give up here. */
+ if (CODING_REQUIRE_DETECTION (&coding))
{
- giveup_match_end = 1;
- break;
+ coding_system = detect_coding_system (buffer, nread, nread, 1, 0,
+ coding_system);
+ setup_coding_system (coding_system, &coding);
}
- if (coding.eol_type == CODING_EOL_UNDECIDED)
- detect_eol (&coding, buffer, nread);
- if (coding.eol_type != CODING_EOL_UNDECIDED
- && coding.eol_type != CODING_EOL_LF)
- /* We found that the format of eol should be decoded.
+ if (CODING_REQUIRE_DECODING (&coding))
+ /* We found that the file should be decoded somehow.
Let's give up here. */
{
giveup_match_end = 1;
@@ -4239,124 +4233,108 @@ actually used. */)
{
int same_at_start = BEGV_BYTE;
int same_at_end = ZV_BYTE;
+ int same_at_start_charpos;
+ int inserted_chars;
int overlap;
int bufpos;
- /* Make sure that the gap is large enough. */
- int bufsize = 2 * st.st_size;
- unsigned char *conversion_buffer = (unsigned char *) xmalloc (bufsize);
+ unsigned char *decoded;
int temp;
+ int this_count = SPECPDL_INDEX ();
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ Lisp_Object conversion_buffer;
+
+ conversion_buffer = code_conversion_save (1, multibyte);
/* First read the whole file, performing code conversion into
CONVERSION_BUFFER. */
if (lseek (fd, XINT (beg), 0) < 0)
- {
- xfree (conversion_buffer);
- report_file_error ("Setting file position",
- Fcons (orig_filename, Qnil));
- }
+ report_file_error ("Setting file position",
+ Fcons (orig_filename, Qnil));
total = st.st_size; /* Total bytes in the file. */
how_much = 0; /* Bytes read from file so far. */
inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */
unprocessed = 0; /* Bytes not processed in previous loop. */
+ GCPRO1 (conversion_buffer);
while (how_much < total)
{
+ /* We read one bunch by one (READ_BUF_SIZE bytes) to allow
+ quitting while reading a huge while. */
/* try is reserved in some compilers (Microsoft C) */
int trytry = min (total - how_much, READ_BUF_SIZE - unprocessed);
- unsigned char *destination = read_buf + unprocessed;
int this;
/* Allow quitting out of the actual I/O. */
immediate_quit = 1;
QUIT;
- this = emacs_read (fd, destination, trytry);
+ this = emacs_read (fd, read_buf + unprocessed, trytry);
immediate_quit = 0;
- if (this < 0 || this + unprocessed == 0)
+ if (this <= 0)
{
- how_much = this;
+ if (this < 0)
+ how_much = this;
break;
}
how_much += this;
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- int require, result;
-
- this += unprocessed;
-
- /* If we are using more space than estimated,
- make CONVERSION_BUFFER bigger. */
- require = decoding_buffer_size (&coding, this);
- if (inserted + require + 2 * (total - how_much) > bufsize)
- {
- bufsize = inserted + require + 2 * (total - how_much);
- conversion_buffer = (unsigned char *) xrealloc (conversion_buffer, bufsize);
- }
-
- /* Convert this batch with results in CONVERSION_BUFFER. */
- if (how_much >= total) /* This is the last block. */
- coding.mode |= CODING_MODE_LAST_BLOCK;
- if (coding.composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (&coding, BEGV);
- result = decode_coding (&coding, read_buf,
- conversion_buffer + inserted,
- this, bufsize - inserted);
-
- /* Save for next iteration whatever we didn't convert. */
- unprocessed = this - coding.consumed;
- bcopy (read_buf + coding.consumed, read_buf, unprocessed);
- if (!NILP (current_buffer->enable_multibyte_characters))
- this = coding.produced;
- else
- this = str_as_unibyte (conversion_buffer + inserted,
- coding.produced);
- }
-
- inserted += this;
+ BUF_SET_PT (XBUFFER (conversion_buffer),
+ BUF_Z (XBUFFER (conversion_buffer)));
+ decode_coding_c_string (&coding, read_buf, unprocessed + this,
+ conversion_buffer);
+ unprocessed = coding.carryover_bytes;
+ if (coding.carryover_bytes > 0)
+ bcopy (coding.carryover, read_buf, unprocessed);
}
+ UNGCPRO;
+ emacs_close (fd);
- /* At this point, INSERTED is how many characters (i.e. bytes)
- are present in CONVERSION_BUFFER.
- HOW_MUCH should equal TOTAL,
- or should be <= 0 if we couldn't read the file. */
+ /* At this point, HOW_MUCH should equal TOTAL, or should be <= 0
+ if we couldn't read the file. */
if (how_much < 0)
+ error ("IO error reading %s: %s",
+ SDATA (orig_filename), emacs_strerror (errno));
+
+ if (unprocessed > 0)
{
- xfree (conversion_buffer);
- coding_free_composition_data (&coding);
- error ("IO error reading %s: %s",
- SDATA (orig_filename), emacs_strerror (errno));
+ coding.mode |= CODING_MODE_LAST_BLOCK;
+ decode_coding_c_string (&coding, read_buf, unprocessed,
+ conversion_buffer);
+ coding.mode &= ~CODING_MODE_LAST_BLOCK;
}
- /* Compare the beginning of the converted file
- with the buffer text. */
+ decoded = BUF_BEG_ADDR (XBUFFER (conversion_buffer));
+ inserted = (BUF_Z_BYTE (XBUFFER (conversion_buffer))
+ - BUF_BEG_BYTE (XBUFFER (conversion_buffer)));
+
+ /* Compare the beginning of the converted string with the buffer
+ text. */
bufpos = 0;
while (bufpos < inserted && same_at_start < same_at_end
- && FETCH_BYTE (same_at_start) == conversion_buffer[bufpos])
+ && FETCH_BYTE (same_at_start) == decoded[bufpos])
same_at_start++, bufpos++;
- /* If the file matches the buffer completely,
+ /* If the file matches the head of buffer completely,
there's no need to replace anything. */
if (bufpos == inserted)
{
- xfree (conversion_buffer);
- coding_free_composition_data (&coding);
- emacs_close (fd);
specpdl_ptr--;
/* Truncate the buffer to the size of the file. */
del_range_byte (same_at_start, same_at_end, 0);
inserted = 0;
+
+ unbind_to (this_count, Qnil);
goto handled;
}
- /* Extend the start of non-matching text area to multibyte
- character boundary. */
+ /* Extend the start of non-matching text area to the previous
+ multibyte character boundary. */
if (! NILP (current_buffer->enable_multibyte_characters))
while (same_at_start > BEGV_BYTE
&& ! CHAR_HEAD_P (FETCH_BYTE (same_at_start)))
@@ -4369,11 +4347,11 @@ actually used. */)
/* Compare with same_at_start to avoid counting some buffer text
as matching both at the file's beginning and at the end. */
while (bufpos > 0 && same_at_end > same_at_start
- && FETCH_BYTE (same_at_end - 1) == conversion_buffer[bufpos - 1])
+ && FETCH_BYTE (same_at_end - 1) == decoded[bufpos - 1])
same_at_end--, bufpos--;
- /* Extend the end of non-matching text area to multibyte
- character boundary. */
+ /* Extend the end of non-matching text area to the next
+ multibyte character boundary. */
if (! NILP (current_buffer->enable_multibyte_characters))
while (same_at_end < ZV_BYTE
&& ! CHAR_HEAD_P (FETCH_BYTE (same_at_end)))
@@ -4391,7 +4369,7 @@ actually used. */)
/* Replace the chars that we need to replace,
and update INSERTED to equal the number of bytes
- we are taking from the file. */
+ we are taking from the decoded string. */
inserted -= (ZV_BYTE - same_at_end) + (same_at_start - BEGV_BYTE);
if (same_at_end != same_at_start)
@@ -4406,20 +4384,21 @@ actually used. */)
}
/* Insert from the file at the proper position. */
SET_PT_BOTH (temp, same_at_start);
- insert_1 (conversion_buffer + same_at_start - BEGV_BYTE, inserted,
- 0, 0, 0);
- if (coding.cmp_data && coding.cmp_data->used)
- coding_restore_composition (&coding, Fcurrent_buffer ());
- coding_free_composition_data (&coding);
-
+ same_at_start_charpos
+ = buf_bytepos_to_charpos (XBUFFER (conversion_buffer),
+ same_at_start);
+ inserted_chars
+ = (buf_bytepos_to_charpos (XBUFFER (conversion_buffer),
+ same_at_start + inserted)
+ - same_at_start_charpos);
+ insert_from_buffer (XBUFFER (conversion_buffer),
+ same_at_start_charpos, inserted_chars, 0);
/* Set `inserted' to the number of inserted characters. */
inserted = PT - temp;
/* Set point before the inserted characters. */
SET_PT_BOTH (temp, same_at_start);
- xfree (conversion_buffer);
- emacs_close (fd);
- specpdl_ptr--;
+ unbind_to (this_count, Qnil);
goto handled;
}
@@ -4472,7 +4451,7 @@ actually used. */)
inserted = 0;
/* Here, we don't do code conversion in the loop. It is done by
- code_convert_region after all data are read into the buffer. */
+ decode_coding_gap after all data are read into the buffer. */
{
int gap_size = GAP_SIZE;
@@ -4577,104 +4556,98 @@ actually used. */)
notfound:
- if (! coding_system_decided)
+ if (NILP (coding_system))
{
/* The coding system is not yet decided. Decide it by an
optimized method for handling `coding:' tag.
Note that we can get here only if the buffer was empty
before the insertion. */
- Lisp_Object val;
- val = Qnil;
if (!NILP (Vcoding_system_for_read))
- val = Vcoding_system_for_read;
+ coding_system = Vcoding_system_for_read;
else
{
/* Since we are sure that the current buffer was empty
before the insertion, we can toggle
enable-multibyte-characters directly here without taking
- care of marker adjustment and byte combining problem. By
- this way, we can run Lisp program safely before decoding
- the inserted text. */
+ care of marker adjustment. By this way, we can run Lisp
+ program safely before decoding the inserted text. */
Lisp_Object unwind_data;
int count = SPECPDL_INDEX ();
unwind_data = Fcons (current_buffer->enable_multibyte_characters,
Fcons (current_buffer->undo_list,
Fcurrent_buffer ()));
- current_buffer->enable_multibyte_characters = Qnil;
+ current_buffer->enable_multibyte_characters = Qnil;
current_buffer->undo_list = Qt;
record_unwind_protect (decide_coding_unwind, unwind_data);
if (inserted > 0 && ! NILP (Vset_auto_coding_function))
{
- val = call2 (Vset_auto_coding_function,
- filename, make_number (inserted));
+ coding_system = call2 (Vset_auto_coding_function,
+ filename, make_number (inserted));
}
- if (NILP (val))
+ if (NILP (coding_system))
{
/* If the coding system is not yet decided, check
file-coding-system-alist. */
- Lisp_Object args[6], coding_systems;
+ Lisp_Object args[6];
args[0] = Qinsert_file_contents, args[1] = orig_filename;
args[2] = visit, args[3] = beg, args[4] = end, args[5] = Qnil;
- coding_systems = Ffind_operation_coding_system (6, args);
- if (CONSP (coding_systems))
- val = XCAR (coding_systems);
+ coding_system = Ffind_operation_coding_system (6, args);
+ if (CONSP (coding_system))
+ coding_system = XCAR (coding_system);
}
unbind_to (count, Qnil);
inserted = Z_BYTE - BEG_BYTE;
}
- /* The following kludgy code is to avoid some compiler bug.
- We can't simply do
- setup_coding_system (val, &coding);
- on some system. */
- {
- struct coding_system temp_coding;
- setup_coding_system (Fcheck_coding_system (val), &temp_coding);
- bcopy (&temp_coding, &coding, sizeof coding);
- }
- /* Ensure we set Vlast_coding_system_used. */
- set_coding_system = 1;
+ if (NILP (coding_system))
+ coding_system = Qundecided;
+ else
+ CHECK_CODING_SYSTEM (coding_system);
- if (NILP (current_buffer->enable_multibyte_characters)
- && ! NILP (val))
+ if (NILP (current_buffer->enable_multibyte_characters))
/* We must suppress all character code conversion except for
end-of-line conversion. */
- setup_raw_text_coding_system (&coding);
- coding.src_multibyte = 0;
- coding.dst_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
+ coding_system = raw_text_coding_system (coding_system);
+ setup_coding_system (coding_system, &coding);
+ /* Ensure we set Vlast_coding_system_used. */
+ set_coding_system = 1;
}
- if (!NILP (visit)
- /* Can't do this if part of the buffer might be preserved. */
- && NILP (replace)
- && (coding.type == coding_type_no_conversion
- || coding.type == coding_type_raw_text))
+ if (!NILP (visit))
{
- /* Visiting a file with these coding system makes the buffer
- unibyte. */
- current_buffer->enable_multibyte_characters = Qnil;
- coding.dst_multibyte = 0;
+ /* When we visit a file by raw-text, we change the buffer to
+ unibyte. */
+ if (CODING_FOR_UNIBYTE (&coding)
+ /* Can't do this if part of the buffer might be preserved. */
+ && NILP (replace))
+ /* Visiting a file with these coding system makes the buffer
+ unibyte. */
+ current_buffer->enable_multibyte_characters = Qnil;
}
- if (inserted > 0 || coding.type == coding_type_ccl)
+ coding.dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ if (CODING_MAY_REQUIRE_DECODING (&coding)
+ && (inserted > 0 || CODING_REQUIRE_FLUSHING (&coding)))
{
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- code_convert_region (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
- &coding, 0, 0);
- inserted = coding.produced_char;
- }
- else
- adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
- inserted);
+ move_gap_both (PT, PT_BYTE);
+ GAP_SIZE += inserted;
+ ZV_BYTE -= inserted;
+ Z_BYTE -= inserted;
+ ZV -= inserted;
+ Z -= inserted;
+ decode_coding_gap (&coding, inserted, inserted);
+ inserted = coding.produced_char;
+ coding_system = CODING_ID_NAME (coding.id);
}
+ else if (inserted > 0)
+ adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
+ inserted);
/* Now INSERTED is measured in characters. */
@@ -4682,8 +4655,8 @@ actually used. */)
/* Use the conversion type to determine buffer-file-type
(find-buffer-file-type is now used to help determine the
conversion). */
- if ((coding.eol_type == CODING_EOL_UNDECIDED
- || coding.eol_type == CODING_EOL_LF)
+ if ((VECTORP (CODING_ID_EOL_TYPE (coding.id))
+ || EQ (CODING_ID_EOL_TYPE (coding.id), Qunix))
&& ! CODING_REQUIRE_DECODING (&coding))
current_buffer->buffer_file_type = Qt;
else
@@ -4723,7 +4696,7 @@ actually used. */)
}
if (set_coding_system)
- Vlast_coding_system_used = coding.symbol;
+ Vlast_coding_system_used = coding_system;
if (! NILP (Ffboundp (Qafter_insert_file_set_coding)))
{
@@ -4802,8 +4775,6 @@ actually used. */)
}
static Lisp_Object build_annotations P_ ((Lisp_Object, Lisp_Object));
-static Lisp_Object build_annotations_2 P_ ((Lisp_Object, Lisp_Object,
- Lisp_Object, Lisp_Object));
/* If build_annotations switched buffers, switch back to BUF.
Kill the temporary buffer that was selected in the meantime.
@@ -4828,7 +4799,7 @@ build_annotations_unwind (buf)
/* Decide the coding-system to encode the data with. */
-void
+static Lisp_Object
choose_write_coding_system (start, end, filename,
append, visit, lockname, coding)
Lisp_Object start, end, filename, append, visit, lockname;
@@ -4839,14 +4810,7 @@ choose_write_coding_system (start, end, filename,
if (auto_saving
&& NILP (Fstring_equal (current_buffer->filename,
current_buffer->auto_save_file_name)))
- {
- /* We use emacs-mule for auto saving... */
- setup_coding_system (Qemacs_mule, coding);
- /* ... but with the special flag to indicate not to strip off
- leading code of eight-bit-control chars. */
- coding->flags = 1;
- goto done_setup_coding;
- }
+ val = Qutf_8_emacs;
else if (!NILP (Vcoding_system_for_write))
{
val = Vcoding_system_for_write;
@@ -4893,8 +4857,7 @@ choose_write_coding_system (start, end, filename,
val = XCDR (coding_systems);
}
- if (NILP (val)
- && !NILP (current_buffer->buffer_file_coding_system))
+ if (NILP (val))
{
/* If we still have not decided a coding system, use the
default value of buffer-file-coding-system. */
@@ -4902,45 +4865,42 @@ choose_write_coding_system (start, end, filename,
using_default_coding = 1;
}
+ if (! NILP (val) && ! force_raw_text)
+ {
+ Lisp_Object spec, attrs;
+
+ CHECK_CODING_SYSTEM_GET_SPEC (val, spec);
+ attrs = AREF (spec, 0);
+ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
+ force_raw_text = 1;
+ }
+
if (!force_raw_text
&& !NILP (Ffboundp (Vselect_safe_coding_system_function)))
/* Confirm that VAL can surely encode the current region. */
val = call5 (Vselect_safe_coding_system_function,
start, end, val, Qnil, filename);
- setup_coding_system (Fcheck_coding_system (val), coding);
- if (coding->eol_type == CODING_EOL_UNDECIDED
- && !using_default_coding)
- {
- if (! EQ (default_buffer_file_coding.symbol,
- buffer_defaults.buffer_file_coding_system))
- setup_coding_system (buffer_defaults.buffer_file_coding_system,
- &default_buffer_file_coding);
- if (default_buffer_file_coding.eol_type != CODING_EOL_UNDECIDED)
- {
- Lisp_Object subsidiaries;
-
- coding->eol_type = default_buffer_file_coding.eol_type;
- subsidiaries = Fget (coding->symbol, Qeol_type);
- if (VECTORP (subsidiaries)
- && XVECTOR (subsidiaries)->size == 3)
- coding->symbol
- = XVECTOR (subsidiaries)->contents[coding->eol_type];
- }
- }
+ /* If the decided coding-system doesn't specify end-of-line
+ format, we use that of
+ `default-buffer-file-coding-system'. */
+ if (! using_default_coding
+ && ! NILP (buffer_defaults.buffer_file_coding_system))
+ val = (coding_inherit_eol_type
+ (val, buffer_defaults.buffer_file_coding_system));
+ /* If we decide not to encode text, use `raw-text' or one of its
+ subsidiaries. */
if (force_raw_text)
- setup_raw_text_coding_system (coding);
- goto done_setup_coding;
+ val = raw_text_coding_system (val);
}
- setup_coding_system (Fcheck_coding_system (val), coding);
+ val = coding_inherit_eol_type (val, Qnil);
+ setup_coding_system (val, coding);
- done_setup_coding:
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = system_eol_type;
if (!STRINGP (start) && !NILP (current_buffer->selective_display))
coding->mode |= CODING_MODE_SELECTIVE_DISPLAY;
+ return val;
}
DEFUN ("write-region", Fwrite_region, Swrite_region, 3, 7,
@@ -4985,7 +4945,6 @@ This does code conversion according to the value of
int save_errno = 0;
const unsigned char *fn;
struct stat st;
- int tem;
int count = SPECPDL_INDEX ();
int count1;
#ifdef VMS
@@ -5085,21 +5044,9 @@ This does code conversion according to the value of
We used to make this choice before calling build_annotations, but that
leads to problems when a write-annotate-function takes care of
unsavable chars (as was the case with X-Symbol). */
- choose_write_coding_system (start, end, filename,
- append, visit, lockname, &coding);
- Vlast_coding_system_used = coding.symbol;
-
- given_buffer = current_buffer;
- if (! STRINGP (start))
- {
- annotations = build_annotations_2 (start, end,
- coding.pre_write_conversion, annotations);
- if (current_buffer != given_buffer)
- {
- XSETFASTINT (start, BEGV);
- XSETFASTINT (end, ZV);
- }
- }
+ Vlast_coding_system_used
+ = choose_write_coding_system (start, end, filename,
+ append, visit, lockname, &coding);
#ifdef CLASH_DETECTION
if (!auto_saving)
@@ -5237,6 +5184,9 @@ This does code conversion according to the value of
if (GPT > BEG && GPT_ADDR[-1] != '\n')
move_gap (find_next_newline (GPT, 1));
#else
+#if 0
+ /* The new encoding routine doesn't require the following. */
+
/* Whether VMS or not, we must move the gap to the next of newline
when we must put designation sequences at beginning of line. */
if (INTEGERP (start)
@@ -5250,6 +5200,7 @@ This does code conversion according to the value of
SET_PT_BOTH (opoint, opoint_byte);
}
#endif
+#endif
failure = 0;
immediate_quit = 1;
@@ -5262,23 +5213,10 @@ This does code conversion according to the value of
}
else if (XINT (start) != XINT (end))
{
- tem = CHAR_TO_BYTE (XINT (start));
-
- if (XINT (start) < GPT)
- {
- failure = 0 > a_write (desc, Qnil, XINT (start),
- min (GPT, XINT (end)) - XINT (start),
- &annotations, &coding);
- save_errno = errno;
- }
-
- if (XINT (end) > GPT && !failure)
- {
- tem = max (XINT (start), GPT);
- failure = 0 > a_write (desc, Qnil, tem , XINT (end) - tem,
- &annotations, &coding);
- save_errno = errno;
- }
+ failure = 0 > a_write (desc, Qnil,
+ XINT (start), XINT (end) - XINT (start),
+ &annotations, &coding);
+ save_errno = errno;
}
else
{
@@ -5294,7 +5232,7 @@ This does code conversion according to the value of
{
/* We have to flush out a data. */
coding.mode |= CODING_MODE_LAST_BLOCK;
- failure = 0 > e_write (desc, Qnil, 0, 0, &coding);
+ failure = 0 > e_write (desc, Qnil, 1, 1, &coding);
save_errno = errno;
}
@@ -5491,30 +5429,6 @@ build_annotations (start, end)
return annotations;
}
-static Lisp_Object
-build_annotations_2 (start, end, pre_write_conversion, annotations)
- Lisp_Object start, end, pre_write_conversion, annotations;
-{
- struct gcpro gcpro1;
- Lisp_Object res;
-
- GCPRO1 (annotations);
- /* At last, do the same for the function PRE_WRITE_CONVERSION
- implied by the current coding-system. */
- if (!NILP (pre_write_conversion))
- {
- struct buffer *given_buffer = current_buffer;
- Vwrite_region_annotations_so_far = annotations;
- res = call2 (pre_write_conversion, start, end);
- Flength (res);
- annotations = (current_buffer != given_buffer
- ? res
- : merge (annotations, res, Qcar_less_than_car));
- }
-
- UNGCPRO;
- return annotations;
-}
/* Write to descriptor DESC the NCHARS chars starting at POS of STRING.
If STRING is nil, POS is the character position in the current buffer.
@@ -5570,9 +5484,6 @@ a_write (desc, string, pos, nchars, annot, coding)
return 0;
}
-#ifndef WRITE_BUF_SIZE
-#define WRITE_BUF_SIZE (16 * 1024)
-#endif
/* Write text in the range START and END into descriptor DESC,
encoding them with coding system CODING. If STRING is nil, START
@@ -5586,78 +5497,77 @@ e_write (desc, string, start, end, coding)
int start, end;
struct coding_system *coding;
{
- register char *addr;
- register int nbytes;
- char buf[WRITE_BUF_SIZE];
- int return_val = 0;
-
- if (start >= end)
- coding->composing = COMPOSITION_DISABLED;
- if (coding->composing != COMPOSITION_DISABLED)
- coding_save_composition (coding, start, end, string);
-
if (STRINGP (string))
{
- addr = SDATA (string);
- nbytes = SBYTES (string);
- coding->src_multibyte = STRING_MULTIBYTE (string);
- }
- else if (start < end)
- {
- /* It is assured that the gap is not in the range START and END-1. */
- addr = CHAR_POS_ADDR (start);
- nbytes = CHAR_TO_BYTE (end) - CHAR_TO_BYTE (start);
- coding->src_multibyte
- = !NILP (current_buffer->enable_multibyte_characters);
- }
- else
- {
- addr = "";
- nbytes = 0;
- coding->src_multibyte = 1;
+ start = 0;
+ end = SCHARS (string);
}
/* We used to have a code for handling selective display here. But,
now it is handled within encode_coding. */
- while (1)
- {
- int result;
- result = encode_coding (coding, addr, buf, nbytes, WRITE_BUF_SIZE);
- if (coding->produced > 0)
+ while (start < end)
+ {
+ if (STRINGP (string))
{
- coding->produced -= emacs_write (desc, buf, coding->produced);
- if (coding->produced)
+ coding->src_multibyte = SCHARS (string) < SBYTES (string);
+ if (CODING_REQUIRE_ENCODING (coding))
{
- return_val = -1;
- break;
+ encode_coding_object (coding, string,
+ start, string_char_to_byte (string, start),
+ end, string_char_to_byte (string, end), Qt);
+ }
+ else
+ {
+ coding->dst_object = string;
+ coding->consumed_char = SCHARS (string);
+ coding->produced = SBYTES (string);
}
}
- nbytes -= coding->consumed;
- addr += coding->consumed;
- if (result == CODING_FINISH_INSUFFICIENT_SRC
- && nbytes > 0)
+ else
{
- /* The source text ends by an incomplete multibyte form.
- There's no way other than write it out as is. */
- nbytes -= emacs_write (desc, addr, nbytes);
- if (nbytes)
+ int start_byte = CHAR_TO_BYTE (start);
+ int end_byte = CHAR_TO_BYTE (end);
+
+ coding->src_multibyte = (end - start) < (end_byte - start_byte);
+ if (CODING_REQUIRE_ENCODING (coding))
{
- return_val = -1;
- break;
+ encode_coding_object (coding, Fcurrent_buffer (),
+ start, start_byte, end, end_byte, Qt);
+ }
+ else
+ {
+ coding->dst_object = Qnil;
+ coding->dst_pos_byte = start_byte;
+ if (start >= GPT || end <= GPT)
+ {
+ coding->consumed_char = end - start;
+ coding->produced = end_byte - start_byte;
+ }
+ else
+ {
+ coding->consumed_char = GPT - start;
+ coding->produced = GPT_BYTE - start_byte;
+ }
}
}
- if (nbytes <= 0)
- break;
+
+ if (coding->produced > 0)
+ {
+ coding->produced -=
+ emacs_write (desc,
+ STRINGP (coding->dst_object)
+ ? SDATA (coding->dst_object)
+ : BYTE_POS_ADDR (coding->dst_pos_byte),
+ coding->produced);
+
+ if (coding->produced)
+ return -1;
+ }
start += coding->consumed_char;
- if (coding->cmp_data)
- coding_adjust_composition_offset (coding, start);
}
- if (coding->cmp_data)
- coding_free_composition_data (coding);
-
- return return_val;
+ return 0;
}
DEFUN ("verify-visited-file-modtime", Fverify_visited_file_modtime,
diff --git a/src/filelock.c b/src/filelock.c
index 6e8e6da9fbd..6a3972dad42 100644
--- a/src/filelock.c
+++ b/src/filelock.c
@@ -53,7 +53,7 @@ extern int errno;
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "systime.h"
diff --git a/src/fns.c b/src/fns.c
index c222a6c45e0..4c1e5b79ae2 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -39,7 +39,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "commands.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "buffer.h"
#include "keyboard.h"
@@ -151,8 +151,6 @@ To get the number of bytes, use `string-bytes'. */)
XSETFASTINT (val, SCHARS (sequence));
else if (VECTORP (sequence))
XSETFASTINT (val, XVECTOR (sequence)->size);
- else if (SUB_CHAR_TABLE_P (sequence))
- XSETFASTINT (val, SUB_CHAR_TABLE_ORDINARY_SLOTS);
else if (CHAR_TABLE_P (sequence))
XSETFASTINT (val, MAX_CHAR);
else if (BOOL_VECTOR_P (sequence))
@@ -217,7 +215,7 @@ which is at least the number of distinct elements. */)
DEFUN ("string-bytes", Fstring_bytes, Sstring_bytes, 1, 1, 0,
doc: /* Return the number of bytes in STRING.
-If STRING is a multibyte string, this is greater than the length of STRING. */)
+If STRING is multibyte, this may be greater than the length of STRING. */)
(string)
Lisp_Object string;
{
@@ -457,28 +455,6 @@ usage: (vconcat &rest SEQUENCES) */)
return concat (nargs, args, Lisp_Vectorlike, 0);
}
-/* Return a copy of a sub char table ARG. The elements except for a
- nested sub char table are not copied. */
-static Lisp_Object
-copy_sub_char_table (arg)
- Lisp_Object arg;
-{
- Lisp_Object copy = make_sub_char_table (Qnil);
- int i;
-
- XCHAR_TABLE (copy)->defalt = XCHAR_TABLE (arg)->defalt;
- /* Copy all the contents. */
- bcopy (XCHAR_TABLE (arg)->contents, XCHAR_TABLE (copy)->contents,
- SUB_CHAR_TABLE_ORDINARY_SLOTS * sizeof (Lisp_Object));
- /* Recursively copy any sub char-tables in the ordinary slots. */
- for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
- if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i]))
- XCHAR_TABLE (copy)->contents[i]
- = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]);
-
- return copy;
-}
-
DEFUN ("copy-sequence", Fcopy_sequence, Scopy_sequence, 1, 1, 0,
doc: /* Return a copy of a list, vector, string or char-table.
@@ -491,24 +467,7 @@ with the original. */)
if (CHAR_TABLE_P (arg))
{
- int i;
- Lisp_Object copy;
-
- copy = Fmake_char_table (XCHAR_TABLE (arg)->purpose, Qnil);
- /* Copy all the slots, including the extra ones. */
- bcopy (XVECTOR (arg)->contents, XVECTOR (copy)->contents,
- ((XCHAR_TABLE (arg)->size & PSEUDOVECTOR_SIZE_MASK)
- * sizeof (Lisp_Object)));
-
- /* Recursively copy any sub char tables in the ordinary slots
- for multibyte characters. */
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS;
- i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i]))
- XCHAR_TABLE (copy)->contents[i]
- = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]);
-
- return copy;
+ return copy_char_table (arg);
}
if (BOOL_VECTOR_P (arg))
@@ -611,10 +570,10 @@ concat (nargs, args, target_type, last_special)
for (i = 0; i < len; i++)
{
ch = XVECTOR (this)->contents[i];
- CHECK_NUMBER (ch);
+ CHECK_CHARACTER (ch);
this_len_byte = CHAR_BYTES (XINT (ch));
result_len_byte += this_len_byte;
- if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
+ if (! ASCII_CHAR_P (XINT (ch)) && ! CHAR_BYTE8_P (XINT (ch)))
some_multibyte = 1;
}
else if (BOOL_VECTOR_P (this) && XBOOL_VECTOR (this)->size > 0)
@@ -623,10 +582,10 @@ concat (nargs, args, target_type, last_special)
for (; CONSP (this); this = XCDR (this))
{
ch = XCAR (this);
- CHECK_NUMBER (ch);
+ CHECK_CHARACTER (ch);
this_len_byte = CHAR_BYTES (XINT (ch));
result_len_byte += this_len_byte;
- if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
+ if (! ASCII_CHAR_P (XINT (ch)) && ! CHAR_BYTE8_P (XINT (ch)))
some_multibyte = 1;
}
else if (STRINGP (this))
@@ -742,9 +701,7 @@ concat (nargs, args, target_type, last_special)
{
XSETFASTINT (elt, SREF (this, thisindex)); thisindex++;
if (some_multibyte
- && (XINT (elt) >= 0240
- || (XINT (elt) >= 0200
- && ! NILP (Vnonascii_translation_table)))
+ && XINT (elt) >= 0200
&& XINT (elt) < 0400)
{
c = unibyte_char_to_multibyte (XINT (elt));
@@ -777,28 +734,12 @@ concat (nargs, args, target_type, last_special)
else
{
CHECK_NUMBER (elt);
- if (SINGLE_BYTE_CHAR_P (XINT (elt)))
- {
- if (some_multibyte)
- toindex_byte
- += CHAR_STRING (XINT (elt),
- SDATA (val) + toindex_byte);
- else
- SSET (val, toindex_byte++, XINT (elt));
- toindex++;
- }
+ if (some_multibyte)
+ toindex_byte += CHAR_STRING (XINT (elt),
+ SDATA (val) + toindex_byte);
else
- /* If we have any multibyte characters,
- we already decided to make a multibyte string. */
- {
- int c = XINT (elt);
- /* P exists as a variable
- to avoid a bug on the Masscomp C compiler. */
- unsigned char *p = SDATA (val) + toindex_byte;
-
- toindex_byte += CHAR_STRING (c, p);
- toindex++;
- }
+ SSET (val, toindex_byte++, XINT (elt));
+ toindex++;
}
}
}
@@ -848,7 +789,7 @@ string_char_to_byte (string, char_index)
Lisp_Object string;
int char_index;
{
- int i, i_byte;
+ int i_byte;
int best_below, best_below_byte;
int best_above, best_above_byte;
@@ -874,40 +815,30 @@ string_char_to_byte (string, char_index)
if (char_index - best_below < best_above - char_index)
{
+ unsigned char *p = SDATA (string) + best_below_byte;
+
while (best_below < char_index)
{
- int c;
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
- best_below, best_below_byte);
+ p += BYTES_BY_CHAR_HEAD (*p);
+ best_below++;
}
- i = best_below;
- i_byte = best_below_byte;
+ i_byte = p - SDATA (string);
}
else
{
+ unsigned char *p = SDATA (string) + best_above_byte;
+
while (best_above > char_index)
{
- unsigned char *pend = SDATA (string) + best_above_byte;
- unsigned char *pbeg = pend - best_above_byte;
- unsigned char *p = pend - 1;
- int bytes;
-
- while (p > pbeg && !CHAR_HEAD_P (*p)) p--;
- PARSE_MULTIBYTE_SEQ (p, pend - p, bytes);
- if (bytes == pend - p)
- best_above_byte -= bytes;
- else if (bytes > pend - p)
- best_above_byte -= (pend - p);
- else
- best_above_byte--;
+ p--;
+ while (!CHAR_HEAD_P (*p)) p--;
best_above--;
}
- i = best_above;
- i_byte = best_above_byte;
+ i_byte = p - SDATA (string);
}
string_char_byte_cache_bytepos = i_byte;
- string_char_byte_cache_charpos = i;
+ string_char_byte_cache_charpos = char_index;
string_char_byte_cache_string = string;
return i_byte;
@@ -946,36 +877,30 @@ string_byte_to_char (string, byte_index)
if (byte_index - best_below_byte < best_above_byte - byte_index)
{
- while (best_below_byte < byte_index)
+ unsigned char *p = SDATA (string) + best_below_byte;
+ unsigned char *pend = SDATA (string) + byte_index;
+
+ while (p < pend)
{
- int c;
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
- best_below, best_below_byte);
+ p += BYTES_BY_CHAR_HEAD (*p);
+ best_below++;
}
i = best_below;
- i_byte = best_below_byte;
+ i_byte = p - SDATA (string);
}
else
{
- while (best_above_byte > byte_index)
+ unsigned char *p = SDATA (string) + best_above_byte;
+ unsigned char *pbeg = SDATA (string) + byte_index;
+
+ while (p > pbeg)
{
- unsigned char *pend = SDATA (string) + best_above_byte;
- unsigned char *pbeg = pend - best_above_byte;
- unsigned char *p = pend - 1;
- int bytes;
-
- while (p > pbeg && !CHAR_HEAD_P (*p)) p--;
- PARSE_MULTIBYTE_SEQ (p, pend - p, bytes);
- if (bytes == pend - p)
- best_above_byte -= bytes;
- else if (bytes > pend - p)
- best_above_byte -= (pend - p);
- else
- best_above_byte--;
+ p--;
+ while (!CHAR_HEAD_P (*p)) p--;
best_above--;
}
i = best_above;
- i_byte = best_above_byte;
+ i_byte = p - SDATA (string);
}
string_char_byte_cache_bytepos = i_byte;
@@ -985,9 +910,7 @@ string_byte_to_char (string, byte_index)
return i;
}
-/* Convert STRING to a multibyte string.
- Single-byte characters 0240 through 0377 are converted
- by adding nonascii_insert_offset to each. */
+/* Convert STRING to a multibyte string. */
Lisp_Object
string_make_multibyte (string)
@@ -1019,10 +942,9 @@ string_make_multibyte (string)
}
-/* Convert STRING to a multibyte string without changing each
- character codes. Thus, characters 0200 trough 0237 are converted
- to eight-bit-control characters, and characters 0240 through 0377
- are converted eight-bit-graphic characters. */
+/* Convert STRING (if unibyte) to a multibyte string without changing
+ the number of characters. Characters 0200 trough 0237 are
+ converted to eight-bit characters. */
Lisp_Object
string_to_multibyte (string)
@@ -1037,8 +959,8 @@ string_to_multibyte (string)
return string;
nbytes = parse_str_to_multibyte (SDATA (string), SBYTES (string));
- /* If all the chars are ASCII or eight-bit-graphic, they won't need
- any more bytes once converted. */
+ /* If all the chars are ASCII, they won't need any more bytes once
+ converted. */
if (nbytes == SBYTES (string))
return make_multibyte_string (SDATA (string), nbytes, nbytes);
@@ -1119,8 +1041,7 @@ DEFUN ("string-as-unibyte", Fstring_as_unibyte, Sstring_as_unibyte,
If STRING is unibyte, the result is STRING itself.
Otherwise it is a newly created string, with no text properties.
If STRING is multibyte and contains a character of charset
-`eight-bit-control' or `eight-bit-graphic', it is converted to the
-corresponding single byte. */)
+`eight-bit', it is converted to the corresponding single byte. */)
(string)
Lisp_Object string;
{
@@ -1144,20 +1065,16 @@ DEFUN ("string-as-multibyte", Fstring_as_multibyte, Sstring_as_multibyte,
doc: /* Return a multibyte string with the same individual bytes as STRING.
If STRING is multibyte, the result is STRING itself.
Otherwise it is a newly created string, with no text properties.
+
If STRING is unibyte and contains an individual 8-bit byte (i.e. not
-part of a multibyte form), it is converted to the corresponding
-multibyte character of charset `eight-bit-control' or `eight-bit-graphic'.
+part of a correct utf-8 sequence), it is converted to the corresponding
+multibyte character of charset `eight-bit'.
+See also `string-to-multibyte'.
+
Beware, this often doesn't really do what you think it does.
-It is similar to (decode-coding-string STRING 'emacs-mule-unix).
+It is similar to (decode-coding-string STRING 'utf-8-emacs).
If you're not sure, whether to use `string-as-multibyte' or
-`string-to-multibyte', use `string-to-multibyte'. Beware:
- (aref (string-as-multibyte "\\201") 0) -> 129 (aka ?\\201)
- (aref (string-as-multibyte "\\300") 0) -> 192 (aka ?\\300)
- (aref (string-as-multibyte "\\300\\201") 0) -> 192 (aka ?\\300)
- (aref (string-as-multibyte "\\300\\201") 1) -> 129 (aka ?\\201)
-but
- (aref (string-as-multibyte "\\201\\300") 0) -> 2240
- (aref (string-as-multibyte "\\201\\300") 1) -> <error> */)
+`string-to-multibyte', use `string-to-multibyte'. */)
(string)
Lisp_Object string;
{
@@ -1188,11 +1105,13 @@ DEFUN ("string-to-multibyte", Fstring_to_multibyte, Sstring_to_multibyte,
doc: /* Return a multibyte string with the same individual chars as STRING.
If STRING is multibyte, the result is STRING itself.
Otherwise it is a newly created string, with no text properties.
-Characters 0200 through 0237 are converted to eight-bit-control
-characters of the same character code. Characters 0240 through 0377
-are converted to eight-bit-graphic characters of the same character
-codes.
-This is similar to (decode-coding-string STRING 'binary) */)
+
+If STRING is unibyte and contains an 8-bit byte, it is converted to
+the corresponding multibyte character of charset `eight-bit'.
+
+This differs from `string-as-multibyte' by converting each byte of a correct
+utf-8 sequence to an eight-bit character, not just bytes that don't form a
+correct sequence. */)
(string)
Lisp_Object string;
{
@@ -1571,6 +1490,22 @@ The value is actually the first element of LIST whose car equals KEY. */)
return CAR (list);
}
+/* Like Fassoc but never report an error and do not allow quits.
+ Use only on lists known never to be circular. */
+
+Lisp_Object
+assoc_no_quit (key, list)
+ Lisp_Object key, list;
+{
+ while (CONSP (list)
+ && (!CONSP (XCAR (list))
+ || (!EQ (XCAR (XCAR (list)), key)
+ && NILP (Fequal (XCAR (XCAR (list)), key)))))
+ list = XCDR (list);
+
+ return CONSP (list) ? XCAR (list) : Qnil;
+}
+
DEFUN ("rassq", Frassq, Srassq, 2, 2, 0,
doc: /* Return non-nil if KEY is `eq' to the cdr of an element of LIST.
The value is actually the first element of LIST whose cdr is KEY. */)
@@ -2242,7 +2177,8 @@ internal_equal (o1, o2, depth, props)
functions are sensible to compare, so eliminate the others now. */
if (size & PSEUDOVECTOR_FLAG)
{
- if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE)))
+ if (!(size & (PVEC_COMPILED
+ | PVEC_CHAR_TABLE | PVEC_SUB_CHAR_TABLE)))
return 0;
size &= PSEUDOVECTOR_SIZE_MASK;
}
@@ -2297,11 +2233,11 @@ ARRAY is a vector, string, char-table, or bool-vector. */)
}
else if (CHAR_TABLE_P (array))
{
- register Lisp_Object *p = XCHAR_TABLE (array)->contents;
- size = CHAR_TABLE_ORDINARY_SLOTS;
- for (index = 0; index < size; index++)
- p[index] = item;
- XCHAR_TABLE (array)->defalt = Qnil;
+ int i;
+
+ for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
+ XCHAR_TABLE (array)->contents[i] = item;
+ XCHAR_TABLE (array)->defalt = item;
}
else if (STRINGP (array))
{
@@ -2371,579 +2307,6 @@ This makes STRING unibyte and may change its length. */)
return Qnil;
}
-DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype,
- 1, 1, 0,
- doc: /* Return the subtype of char-table CHAR-TABLE. The value is a symbol. */)
- (char_table)
- Lisp_Object char_table;
-{
- CHECK_CHAR_TABLE (char_table);
-
- return XCHAR_TABLE (char_table)->purpose;
-}
-
-DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent,
- 1, 1, 0,
- doc: /* Return the parent char-table of CHAR-TABLE.
-The value is either nil or another char-table.
-If CHAR-TABLE holds nil for a given character,
-then the actual applicable value is inherited from the parent char-table
-\(or from its parents, if necessary). */)
- (char_table)
- Lisp_Object char_table;
-{
- CHECK_CHAR_TABLE (char_table);
-
- return XCHAR_TABLE (char_table)->parent;
-}
-
-DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent,
- 2, 2, 0,
- doc: /* Set the parent char-table of CHAR-TABLE to PARENT.
-Return PARENT. PARENT must be either nil or another char-table. */)
- (char_table, parent)
- Lisp_Object char_table, parent;
-{
- Lisp_Object temp;
-
- CHECK_CHAR_TABLE (char_table);
-
- if (!NILP (parent))
- {
- CHECK_CHAR_TABLE (parent);
-
- for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent)
- if (EQ (temp, char_table))
- error ("Attempt to make a chartable be its own parent");
- }
-
- XCHAR_TABLE (char_table)->parent = parent;
-
- return parent;
-}
-
-DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot,
- 2, 2, 0,
- doc: /* Return the value of CHAR-TABLE's extra-slot number N. */)
- (char_table, n)
- Lisp_Object char_table, n;
-{
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (n);
- if (XINT (n) < 0
- || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
- args_out_of_range (char_table, n);
-
- return XCHAR_TABLE (char_table)->extras[XINT (n)];
-}
-
-DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
- Sset_char_table_extra_slot,
- 3, 3, 0,
- doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */)
- (char_table, n, value)
- Lisp_Object char_table, n, value;
-{
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (n);
- if (XINT (n) < 0
- || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
- args_out_of_range (char_table, n);
-
- return XCHAR_TABLE (char_table)->extras[XINT (n)] = value;
-}
-
-static Lisp_Object
-char_table_range (table, from, to, defalt)
- Lisp_Object table;
- int from, to;
- Lisp_Object defalt;
-{
- Lisp_Object val;
-
- if (! NILP (XCHAR_TABLE (table)->defalt))
- defalt = XCHAR_TABLE (table)->defalt;
- val = XCHAR_TABLE (table)->contents[from];
- if (SUB_CHAR_TABLE_P (val))
- val = char_table_range (val, 32, 127, defalt);
- else if (NILP (val))
- val = defalt;
- for (from++; from <= to; from++)
- {
- Lisp_Object this_val;
-
- this_val = XCHAR_TABLE (table)->contents[from];
- if (SUB_CHAR_TABLE_P (this_val))
- this_val = char_table_range (this_val, 32, 127, defalt);
- else if (NILP (this_val))
- this_val = defalt;
- if (! EQ (val, this_val))
- error ("Characters in the range have inconsistent values");
- }
- return val;
-}
-
-
-DEFUN ("char-table-range", Fchar_table_range, Schar_table_range,
- 2, 2, 0,
- doc: /* Return the value in CHAR-TABLE for a range of characters RANGE.
-RANGE should be nil (for the default value),
-a vector which identifies a character set or a row of a character set,
-a character set name, or a character code.
-If the characters in the specified range have different values,
-an error is signaled.
-
-Note that this function doesn't check the parent of CHAR-TABLE. */)
- (char_table, range)
- Lisp_Object char_table, range;
-{
- int charset_id, c1 = 0, c2 = 0;
- int size;
- Lisp_Object ch, val, current_default;
-
- CHECK_CHAR_TABLE (char_table);
-
- if (EQ (range, Qnil))
- return XCHAR_TABLE (char_table)->defalt;
- if (INTEGERP (range))
- {
- int c = XINT (range);
- if (! CHAR_VALID_P (c, 0))
- error ("Invalid character code: %d", c);
- ch = range;
- SPLIT_CHAR (c, charset_id, c1, c2);
- }
- else if (SYMBOLP (range))
- {
- Lisp_Object charset_info;
-
- charset_info = Fget (range, Qcharset);
- CHECK_VECTOR (charset_info);
- charset_id = XINT (XVECTOR (charset_info)->contents[0]);
- ch = Fmake_char_internal (make_number (charset_id),
- make_number (0), make_number (0));
- }
- else if (VECTORP (range))
- {
- size = ASIZE (range);
- if (size == 0)
- args_out_of_range (range, make_number (0));
- CHECK_NUMBER (AREF (range, 0));
- charset_id = XINT (AREF (range, 0));
- if (size > 1)
- {
- CHECK_NUMBER (AREF (range, 1));
- c1 = XINT (AREF (range, 1));
- if (size > 2)
- {
- CHECK_NUMBER (AREF (range, 2));
- c2 = XINT (AREF (range, 2));
- }
- }
-
- /* This checks if charset_id, c0, and c1 are all valid or not. */
- ch = Fmake_char_internal (make_number (charset_id),
- make_number (c1), make_number (c2));
- }
- else
- error ("Invalid RANGE argument to `char-table-range'");
-
- if (c1 > 0 && (CHARSET_DIMENSION (charset_id) == 1 || c2 > 0))
- {
- /* Fully specified character. */
- Lisp_Object parent = XCHAR_TABLE (char_table)->parent;
-
- XCHAR_TABLE (char_table)->parent = Qnil;
- val = Faref (char_table, ch);
- XCHAR_TABLE (char_table)->parent = parent;
- return val;
- }
-
- current_default = XCHAR_TABLE (char_table)->defalt;
- if (charset_id == CHARSET_ASCII
- || charset_id == CHARSET_8_BIT_CONTROL
- || charset_id == CHARSET_8_BIT_GRAPHIC)
- {
- int from, to, defalt;
-
- if (charset_id == CHARSET_ASCII)
- from = 0, to = 127, defalt = CHAR_TABLE_DEFAULT_SLOT_ASCII;
- else if (charset_id == CHARSET_8_BIT_CONTROL)
- from = 128, to = 159, defalt = CHAR_TABLE_DEFAULT_SLOT_8_BIT_CONTROL;
- else
- from = 160, to = 255, defalt = CHAR_TABLE_DEFAULT_SLOT_8_BIT_GRAPHIC;
- if (! NILP (XCHAR_TABLE (char_table)->contents[defalt]))
- current_default = XCHAR_TABLE (char_table)->contents[defalt];
- return char_table_range (char_table, from, to, current_default);
- }
-
- val = XCHAR_TABLE (char_table)->contents[128 + charset_id];
- if (! SUB_CHAR_TABLE_P (val))
- return (NILP (val) ? current_default : val);
- if (! NILP (XCHAR_TABLE (val)->defalt))
- current_default = XCHAR_TABLE (val)->defalt;
- if (c1 == 0)
- return char_table_range (val, 32, 127, current_default);
- val = XCHAR_TABLE (val)->contents[c1];
- if (! SUB_CHAR_TABLE_P (val))
- return (NILP (val) ? current_default : val);
- if (! NILP (XCHAR_TABLE (val)->defalt))
- current_default = XCHAR_TABLE (val)->defalt;
- return char_table_range (val, 32, 127, current_default);
-}
-
-DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range,
- 3, 3, 0,
- doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE.
-RANGE should be t (for all characters), nil (for the default value),
-a character set, a vector which identifies a character set, a row of a
-character set, or a character code. Return VALUE. */)
- (char_table, range, value)
- Lisp_Object char_table, range, value;
-{
- int i;
-
- CHECK_CHAR_TABLE (char_table);
-
- if (EQ (range, Qt))
- for (i = 0; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- {
- /* Don't set these special slots used for default values of
- ascii, eight-bit-control, and eight-bit-graphic. */
- if (i != CHAR_TABLE_DEFAULT_SLOT_ASCII
- && i != CHAR_TABLE_DEFAULT_SLOT_8_BIT_CONTROL
- && i != CHAR_TABLE_DEFAULT_SLOT_8_BIT_GRAPHIC)
- XCHAR_TABLE (char_table)->contents[i] = value;
- }
- else if (EQ (range, Qnil))
- XCHAR_TABLE (char_table)->defalt = value;
- else if (SYMBOLP (range))
- {
- Lisp_Object charset_info;
- int charset_id;
-
- charset_info = Fget (range, Qcharset);
- if (! VECTORP (charset_info)
- || ! NATNUMP (AREF (charset_info, 0))
- || (charset_id = XINT (AREF (charset_info, 0)),
- ! CHARSET_DEFINED_P (charset_id)))
- error ("Invalid charset: %s", SDATA (SYMBOL_NAME (range)));
-
- if (charset_id == CHARSET_ASCII)
- for (i = 0; i < 128; i++)
- XCHAR_TABLE (char_table)->contents[i] = value;
- else if (charset_id == CHARSET_8_BIT_CONTROL)
- for (i = 128; i < 160; i++)
- XCHAR_TABLE (char_table)->contents[i] = value;
- else if (charset_id == CHARSET_8_BIT_GRAPHIC)
- for (i = 160; i < 256; i++)
- XCHAR_TABLE (char_table)->contents[i] = value;
- else
- XCHAR_TABLE (char_table)->contents[charset_id + 128] = value;
- }
- else if (INTEGERP (range))
- Faset (char_table, range, value);
- else if (VECTORP (range))
- {
- int size = XVECTOR (range)->size;
- Lisp_Object *val = XVECTOR (range)->contents;
- Lisp_Object ch = Fmake_char_internal (size <= 0 ? Qnil : val[0],
- size <= 1 ? Qnil : val[1],
- size <= 2 ? Qnil : val[2]);
- Faset (char_table, ch, value);
- }
- else
- error ("Invalid RANGE argument to `set-char-table-range'");
-
- return value;
-}
-
-DEFUN ("set-char-table-default", Fset_char_table_default,
- Sset_char_table_default, 3, 3, 0,
- doc: /* Set the default value in CHAR-TABLE for generic character CH to VALUE.
-The generic character specifies the group of characters.
-If CH is a normal character, set the default value for a group of
-characters to which CH belongs.
-See also the documentation of `make-char'. */)
- (char_table, ch, value)
- Lisp_Object char_table, ch, value;
-{
- int c, charset, code1, code2;
- Lisp_Object temp;
-
- CHECK_CHAR_TABLE (char_table);
- CHECK_NUMBER (ch);
-
- c = XINT (ch);
- SPLIT_CHAR (c, charset, code1, code2);
-
- /* Since we may want to set the default value for a character set
- not yet defined, we check only if the character set is in the
- valid range or not, instead of it is already defined or not. */
- if (! CHARSET_VALID_P (charset))
- invalid_character (c);
-
- if (SINGLE_BYTE_CHAR_P (c))
- {
- /* We use special slots for the default values of single byte
- characters. */
- int default_slot
- = (c < 0x80 ? CHAR_TABLE_DEFAULT_SLOT_ASCII
- : c < 0xA0 ? CHAR_TABLE_DEFAULT_SLOT_8_BIT_CONTROL
- : CHAR_TABLE_DEFAULT_SLOT_8_BIT_GRAPHIC);
-
- return (XCHAR_TABLE (char_table)->contents[default_slot] = value);
- }
-
- /* Even if C is not a generic char, we had better behave as if a
- generic char is specified. */
- if (!CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1)
- code1 = 0;
- temp = XCHAR_TABLE (char_table)->contents[charset + 128];
- if (! SUB_CHAR_TABLE_P (temp))
- {
- temp = make_sub_char_table (temp);
- XCHAR_TABLE (char_table)->contents[charset + 128] = temp;
- }
- if (!code1)
- {
- XCHAR_TABLE (temp)->defalt = value;
- return value;
- }
- char_table = temp;
- temp = XCHAR_TABLE (char_table)->contents[code1];
- if (SUB_CHAR_TABLE_P (temp))
- XCHAR_TABLE (temp)->defalt = value;
- else
- XCHAR_TABLE (char_table)->contents[code1] = value;
- return value;
-}
-
-/* Look up the element in TABLE at index CH,
- and return it as an integer.
- If the element is nil, return CH itself.
- (Actually we do that for any non-integer.) */
-
-int
-char_table_translate (table, ch)
- Lisp_Object table;
- int ch;
-{
- Lisp_Object value;
- value = Faref (table, make_number (ch));
- if (! INTEGERP (value))
- return ch;
- return XINT (value);
-}
-
-static void
-optimize_sub_char_table (table, chars)
- Lisp_Object *table;
- int chars;
-{
- Lisp_Object elt;
- int from, to;
-
- if (chars == 94)
- from = 33, to = 127;
- else
- from = 32, to = 128;
-
- if (!SUB_CHAR_TABLE_P (*table))
- return;
- elt = XCHAR_TABLE (*table)->contents[from++];
- for (; from < to; from++)
- if (NILP (Fequal (elt, XCHAR_TABLE (*table)->contents[from])))
- return;
- *table = elt;
-}
-
-DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table,
- 1, 1, 0, doc: /* Optimize char table TABLE. */)
- (table)
- Lisp_Object table;
-{
- Lisp_Object elt;
- int dim;
- int i, j;
-
- CHECK_CHAR_TABLE (table);
-
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- {
- elt = XCHAR_TABLE (table)->contents[i];
- if (!SUB_CHAR_TABLE_P (elt))
- continue;
- dim = CHARSET_DIMENSION (i - 128);
- if (dim == 2)
- for (j = 32; j < SUB_CHAR_TABLE_ORDINARY_SLOTS; j++)
- optimize_sub_char_table (XCHAR_TABLE (elt)->contents + j, dim);
- optimize_sub_char_table (XCHAR_TABLE (table)->contents + i, dim);
- }
- return Qnil;
-}
-
-
-/* Map C_FUNCTION or FUNCTION over SUBTABLE, calling it for each
- character or group of characters that share a value.
- DEPTH is the current depth in the originally specified
- chartable, and INDICES contains the vector indices
- for the levels our callers have descended.
-
- ARG is passed to C_FUNCTION when that is called. */
-
-void
-map_char_table (c_function, function, table, subtable, arg, depth, indices)
- void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
- Lisp_Object function, table, subtable, arg, *indices;
- int depth;
-{
- int i, to;
- struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
-
- GCPRO4 (arg, table, subtable, function);
-
- if (depth == 0)
- {
- /* At first, handle ASCII and 8-bit European characters. */
- for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
- {
- Lisp_Object elt= XCHAR_TABLE (subtable)->contents[i];
- if (NILP (elt))
- elt = XCHAR_TABLE (subtable)->defalt;
- if (NILP (elt))
- elt = Faref (subtable, make_number (i));
- if (c_function)
- (*c_function) (arg, make_number (i), elt);
- else
- call2 (function, make_number (i), elt);
- }
-#if 0 /* If the char table has entries for higher characters,
- we should report them. */
- if (NILP (current_buffer->enable_multibyte_characters))
- {
- UNGCPRO;
- return;
- }
-#endif
- to = CHAR_TABLE_ORDINARY_SLOTS;
- }
- else
- {
- int charset = XFASTINT (indices[0]) - 128;
-
- i = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- if (CHARSET_CHARS (charset) == 94)
- i++, to--;
- }
-
- for (; i < to; i++)
- {
- Lisp_Object elt;
- int charset;
-
- elt = XCHAR_TABLE (subtable)->contents[i];
- XSETFASTINT (indices[depth], i);
- charset = XFASTINT (indices[0]) - 128;
- if (depth == 0
- && (!CHARSET_DEFINED_P (charset)
- || charset == CHARSET_8_BIT_CONTROL
- || charset == CHARSET_8_BIT_GRAPHIC))
- continue;
-
- if (SUB_CHAR_TABLE_P (elt))
- {
- if (depth >= 3)
- error ("Too deep char table");
- map_char_table (c_function, function, table, elt, arg, depth + 1, indices);
- }
- else
- {
- int c1, c2, c;
-
- c1 = depth >= 1 ? XFASTINT (indices[1]) : 0;
- c2 = depth >= 2 ? XFASTINT (indices[2]) : 0;
- c = MAKE_CHAR (charset, c1, c2);
-
- if (NILP (elt))
- elt = XCHAR_TABLE (subtable)->defalt;
- if (NILP (elt))
- elt = Faref (table, make_number (c));
-
- if (c_function)
- (*c_function) (arg, make_number (c), elt);
- else
- call2 (function, make_number (c), elt);
- }
- }
- UNGCPRO;
-}
-
-static void void_call2 P_ ((Lisp_Object a, Lisp_Object b, Lisp_Object c));
-static void
-void_call2 (a, b, c)
- Lisp_Object a, b, c;
-{
- call2 (a, b, c);
-}
-
-DEFUN ("map-char-table", Fmap_char_table, Smap_char_table,
- 2, 2, 0,
- doc: /* Call FUNCTION for each (normal and generic) characters in CHAR-TABLE.
-FUNCTION is called with two arguments--a key and a value.
-The key is always a possible IDX argument to `aref'. */)
- (function, char_table)
- Lisp_Object function, char_table;
-{
- /* The depth of char table is at most 3. */
- Lisp_Object indices[3];
-
- CHECK_CHAR_TABLE (char_table);
-
- /* When Lisp_Object is represented as a union, `call2' cannot directly
- be passed to map_char_table because it returns a Lisp_Object rather
- than returning nothing.
- Casting leads to crashes on some architectures. -stef */
- map_char_table (void_call2, Qnil, char_table, char_table, function, 0, indices);
- return Qnil;
-}
-
-/* Return a value for character C in char-table TABLE. Store the
- actual index for that value in *IDX. Ignore the default value of
- TABLE. */
-
-Lisp_Object
-char_table_ref_and_index (table, c, idx)
- Lisp_Object table;
- int c, *idx;
-{
- int charset, c1, c2;
- Lisp_Object elt;
-
- if (SINGLE_BYTE_CHAR_P (c))
- {
- *idx = c;
- return XCHAR_TABLE (table)->contents[c];
- }
- SPLIT_CHAR (c, charset, c1, c2);
- elt = XCHAR_TABLE (table)->contents[charset + 128];
- *idx = MAKE_CHAR (charset, 0, 0);
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- if (c1 < 32 || NILP (XCHAR_TABLE (elt)->contents[c1]))
- return XCHAR_TABLE (elt)->defalt;
- elt = XCHAR_TABLE (elt)->contents[c1];
- *idx = MAKE_CHAR (charset, c1, 0);
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- if (c2 < 32 || NILP (XCHAR_TABLE (elt)->contents[c2]))
- return XCHAR_TABLE (elt)->defalt;
- *idx = c;
- return XCHAR_TABLE (elt)->contents[c2];
-}
-
-
/* ARGSUSED */
Lisp_Object
nconc2 (s1, s2)
@@ -3106,6 +2469,8 @@ SEQUENCE may be a list, a vector, a bool-vector, or a string. */)
USE_SAFE_ALLOCA;
len = Flength (sequence);
+ if (CHAR_TABLE_P (sequence))
+ wrong_type_argument (Qlistp, sequence);
leni = XINT (len);
nargs = leni + leni - 1;
if (nargs < 0) return build_string ("");
@@ -3142,6 +2507,8 @@ SEQUENCE may be a list, a vector, a bool-vector, or a string. */)
USE_SAFE_ALLOCA;
len = Flength (sequence);
+ if (CHAR_TABLE_P (sequence))
+ wrong_type_argument (Qlistp, sequence);
leni = XFASTINT (len);
SAFE_ALLOCA_LISP (args, leni);
@@ -3164,6 +2531,8 @@ SEQUENCE may be a list, a vector, a bool-vector, or a string. */)
register int leni;
leni = XFASTINT (Flength (sequence));
+ if (CHAR_TABLE_P (sequence))
+ wrong_type_argument (Qlistp, sequence);
mapcar1 (leni, 0, function, sequence);
return sequence;
@@ -3957,7 +3326,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
return -1;
i += bytes;
}
@@ -3995,7 +3366,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
return -1;
i += bytes;
}
@@ -4017,7 +3390,9 @@ base64_encode_1 (from, to, length, line_break, multibyte)
if (multibyte)
{
c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
- if (c >= 256)
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ else if (c >= 256)
return -1;
i += bytes;
}
@@ -4167,8 +3542,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c] << 12;
c = (unsigned char) (value >> 16);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -4191,8 +3566,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c] << 6;
c = (unsigned char) (0xff & value >> 8);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -4209,8 +3584,8 @@ base64_decode_1 (from, to, length, multibyte, nchars_return)
value |= base64_char_to_value[c];
c = (unsigned char) (0xff & value);
- if (multibyte)
- e += CHAR_STRING (c, e);
+ if (multibyte && c >= 128)
+ e += BYTE8_STRING (c, e);
else
*e++ = c;
nchars++;
@@ -5477,7 +4852,6 @@ including negative integers. */)
************************************************************************/
#include "md5.h"
-#include "coding.h"
DEFUN ("md5", Fmd5, Smd5, 1, 5, 0,
doc: /* Return MD5 message digest of OBJECT, a buffer or string.
@@ -5528,7 +4902,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
if (STRING_MULTIBYTE (object))
/* use default, we can't guess correct value */
- coding_system = SYMBOL_VALUE (XCAR (Vcoding_category_list));
+ coding_system = preferred_coding_system ();
else
coding_system = Qraw_text;
}
@@ -5544,7 +4918,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
}
if (STRING_MULTIBYTE (object))
- object = code_convert_string1 (object, coding_system, Qnil, 1);
+ object = code_convert_string (object, coding_system, Qnil, 1, 0, 1);
size = SCHARS (object);
size_byte = SBYTES (object);
@@ -5686,7 +5060,7 @@ guesswork fails. Normally, an error is signaled in such case. */)
specpdl_ptr--;
if (STRING_MULTIBYTE (object))
- object = code_convert_string1 (object, coding_system, Qnil, 1);
+ object = code_convert_string (object, coding_system, Qnil, 1, 0, 0);
}
md5_buffer (SDATA (object) + start_byte,
@@ -5848,16 +5222,6 @@ used if both `use-dialog-box' and this variable are non-nil. */);
defsubr (&Sequal_including_properties);
defsubr (&Sfillarray);
defsubr (&Sclear_string);
- defsubr (&Schar_table_subtype);
- defsubr (&Schar_table_parent);
- defsubr (&Sset_char_table_parent);
- defsubr (&Schar_table_extra_slot);
- defsubr (&Sset_char_table_extra_slot);
- defsubr (&Schar_table_range);
- defsubr (&Sset_char_table_range);
- defsubr (&Sset_char_table_default);
- defsubr (&Soptimize_char_table);
- defsubr (&Smap_char_table);
defsubr (&Snconc);
defsubr (&Smapcar);
defsubr (&Smapc);
diff --git a/src/font.c b/src/font.c
new file mode 100644
index 00000000000..80e23b4ec67
--- /dev/null
+++ b/src/font.c
@@ -0,0 +1,3357 @@
+/* font.c -- "Font" primitives.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "lisp.h"
+#include "buffer.h"
+#include "frame.h"
+#include "dispextern.h"
+#include "charset.h"
+#include "character.h"
+#include "composite.h"
+#include "fontset.h"
+#include "font.h"
+
+#ifndef FONT_DEBUG
+#define FONT_DEBUG
+#endif
+
+#ifdef FONT_DEBUG
+#undef xassert
+#define xassert(X) do {if (!(X)) abort ();} while (0)
+#else
+#define xassert(X) (void) 0
+#endif
+
+int enable_font_backend;
+
+Lisp_Object Qfontp;
+
+/* Important character set symbols. */
+Lisp_Object Qiso8859_1, Qiso10646_1, Qunicode_bmp;
+
+/* Like CHECK_FONT_SPEC but also validate properties of the font-spec,
+ and set X to the validated result. */
+
+#define CHECK_VALIDATE_FONT_SPEC(x) \
+ do { \
+ if (! FONT_SPEC_P (x)) x = wrong_type_argument (Qfont, x); \
+ x = font_prop_validate (x); \
+ } while (0)
+
+/* Number of pt per inch (from the TeXbook). */
+#define PT_PER_INCH 72.27
+
+/* Return a pixel size (integer) corresponding to POINT size (double)
+ on resolution DPI. */
+#define POINT_TO_PIXEL(POINT, DPI) ((POINT) * (DPI) / PT_PER_INCH + 0.5)
+
+/* Return a point size (double) corresponding to POINT size (integer)
+ on resolution DPI. */
+#define PIXEL_TO_POINT(PIXEL, DPI) ((PIXEL) * PT_PER_INCH * 10 / (DPI) + 0.5)
+
+/* Special string of zero length. It is used to specify a NULL name
+ in a font properties (e.g. adstyle). We don't use the symbol of
+ NULL name because it's confusing (Lisp printer prints nothing for
+ it). */
+Lisp_Object null_string;
+
+/* Special vector of zero length. This is repeatedly used by (struct
+ font_driver *)->list when a specified font is not found. */
+Lisp_Object null_vector;
+
+/* Vector of 3 elements. Each element is an alist for one of font
+ style properties (weight, slant, width). The alist contains a
+ mapping between symbolic property values (e.g. `medium' for weight)
+ and numeric property values (e.g. 100). So, it looks like this:
+ [((thin . 0) ... (heavy . 210))
+ ((ro . 0) ... (ot . 210))
+ ((ultracondensed . 50) ... (wide . 200))] */
+static Lisp_Object font_style_table;
+
+/* Alist of font family vs the corresponding aliases.
+ Each element has this form:
+ (FAMILY ALIAS1 ALIAS2 ...) */
+
+static Lisp_Object font_family_alist;
+
+/* Symbols representing keys of normal font properties. */
+extern Lisp_Object QCtype, QCfamily, QCweight, QCslant, QCwidth, QCsize, QCname;
+Lisp_Object QCfoundry, QCadstyle, QCregistry, QCextra;
+/* Symbols representing keys of font extra info. */
+Lisp_Object QCspacing, QCdpi, QCscalable, QCotf, QClanguage, QCscript;
+/* Symbols representing values of font spacing property. */
+Lisp_Object Qc, Qm, Qp, Qd;
+
+/* List of all font drivers. All font-backends (XXXfont.c) call
+ add_font_driver in syms_of_XXXfont to register the font-driver
+ here. */
+static struct font_driver_list *font_driver_list;
+
+static int font_pixel_size P_ ((FRAME_PTR f, Lisp_Object));
+static Lisp_Object prop_name_to_numeric P_ ((enum font_property_index,
+ Lisp_Object));
+static Lisp_Object prop_numeric_to_name P_ ((enum font_property_index, int));
+static Lisp_Object font_open_entity P_ ((FRAME_PTR, Lisp_Object, int));
+static void build_font_family_alist P_ ((void));
+
+/* Number of registered font drivers. */
+static int num_font_drivers;
+
+/* Return a pixel size of font-spec SPEC on frame F. */
+
+static int
+font_pixel_size (f, spec)
+ FRAME_PTR f;
+ Lisp_Object spec;
+{
+ Lisp_Object size = AREF (spec, FONT_SIZE_INDEX);
+ double point_size;
+ int pixel_size, dpi;
+ Lisp_Object extra, val;
+
+ if (INTEGERP (size))
+ return XINT (size);
+ if (NILP (size))
+ return 0;
+ point_size = XFLOAT_DATA (size);
+ extra = AREF (spec, FONT_EXTRA_INDEX);
+ val = assq_no_quit (extra, QCdpi);
+ if (CONSP (val))
+ {
+ if (INTEGERP (XCDR (val)))
+ dpi = XINT (XCDR (val));
+ else
+ dpi = XFLOAT_DATA (XCDR (val)) + 0.5;
+ }
+ else
+ dpi = f->resy;
+ pixel_size = POINT_TO_PIXEL (point_size, dpi);
+ return pixel_size;
+}
+
+/* Return a numeric value corresponding to PROP's NAME (symbol). If
+ NAME is not registered in font_style_table, return Qnil. PROP must
+ be one of FONT_{WEIGHT|SLANT|SWIDTH}_INDEX. */
+
+static Lisp_Object
+prop_name_to_numeric (prop, name)
+ enum font_property_index prop;
+ Lisp_Object name;
+{
+ int table_index = prop - FONT_WEIGHT_INDEX;
+ Lisp_Object val;
+
+ val = assq_no_quit (name, AREF (font_style_table, table_index));
+ return (NILP (val) ? Qnil : XCDR (val));
+}
+
+
+/* Return a name (symbol) corresponding to PROP's NUMERIC value. If
+ no name is registered for NUMERIC in font_style_table, return a
+ symbol of integer name (e.g. `123'). PROP must be one of
+ FONT_{WEIGHT|SLANT|SWIDTH}_INDEX. */
+
+static Lisp_Object
+prop_numeric_to_name (prop, numeric)
+ enum font_property_index prop;
+ int numeric;
+{
+ int table_index = prop - FONT_WEIGHT_INDEX;
+ Lisp_Object table = AREF (font_style_table, table_index);
+ char buf[10];
+
+ while (! NILP (table))
+ {
+ if (XINT (XCDR (XCAR (table))) >= numeric)
+ {
+ if (XINT (XCDR (XCAR (table))) == numeric)
+ return XCAR (XCAR (table));
+ else
+ break;
+ }
+ table = XCDR (table);
+ }
+ sprintf (buf, "%d", numeric);
+ return intern (buf);
+}
+
+
+/* Return a symbol whose name is STR (length LEN). If STR contains
+ uppercase letters, downcase them in advance. */
+
+Lisp_Object
+intern_downcase (str, len)
+ char *str;
+ int len;
+{
+ char *buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (isupper (str[i]))
+ break;
+ if (i == len)
+ return Fintern (make_unibyte_string (str, len), Qnil);
+ buf = alloca (len);
+ if (! buf)
+ return Fintern (null_string, Qnil);
+ bcopy (str, buf, len);
+ for (; i < len; i++)
+ if (isascii (buf[i]))
+ buf[i] = tolower (buf[i]);
+ return Fintern (make_unibyte_string (buf, len), Qnil);
+}
+
+extern Lisp_Object Vface_alternative_font_family_alist;
+
+static void
+build_font_family_alist ()
+{
+ Lisp_Object alist = Vface_alternative_font_family_alist;
+
+ for (; CONSP (alist); alist = XCDR (alist))
+ {
+ Lisp_Object tail, elt;
+
+ for (tail = XCAR (alist), elt = Qnil ; CONSP (tail); tail = XCDR (tail))
+ elt = nconc2 (elt, Fcons (Fintern (XCAR (tail), Qnil), Qnil));
+ font_family_alist = Fcons (elt, font_family_alist);
+ }
+}
+
+
+/* Font property validater. */
+
+static Lisp_Object font_prop_validate_symbol P_ ((enum font_property_index,
+ Lisp_Object, Lisp_Object));
+static Lisp_Object font_prop_validate_style P_ ((enum font_property_index,
+ Lisp_Object, Lisp_Object));
+static Lisp_Object font_prop_validate_non_neg P_ ((enum font_property_index,
+ Lisp_Object, Lisp_Object));
+static Lisp_Object font_prop_validate_spacing P_ ((enum font_property_index,
+ Lisp_Object, Lisp_Object));
+static int get_font_prop_index P_ ((Lisp_Object, int));
+static Lisp_Object font_prop_validate P_ ((Lisp_Object));
+static Lisp_Object font_put_extra P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
+
+static Lisp_Object
+font_prop_validate_symbol (prop_index, prop, val)
+ enum font_property_index prop_index;
+ Lisp_Object prop, val;
+{
+ if (EQ (prop, QCotf))
+ return (SYMBOLP (val) ? val : Qerror);
+ if (STRINGP (val))
+ val = (SCHARS (val) == 0 ? null_string
+ : intern_downcase ((char *) SDATA (val), SBYTES (val)));
+ else if (SYMBOLP (val))
+ {
+ if (SCHARS (SYMBOL_NAME (val)) == 0)
+ val = null_string;
+ }
+ else
+ val = Qerror;
+ return val;
+}
+
+static Lisp_Object
+font_prop_validate_style (prop_index, prop, val)
+ enum font_property_index prop_index;
+ Lisp_Object prop, val;
+{
+ if (! INTEGERP (val))
+ {
+ if (STRINGP (val))
+ val = intern_downcase ((char *) SDATA (val), SBYTES (val));
+ if (! SYMBOLP (val))
+ val = Qerror;
+ else
+ {
+ val = prop_name_to_numeric (prop_index, val);
+ if (NILP (val))
+ val = Qerror;
+ }
+ }
+ return val;
+}
+
+static Lisp_Object
+font_prop_validate_non_neg (prop_index, prop, val)
+ enum font_property_index prop_index;
+ Lisp_Object prop, val;
+{
+ return (NATNUMP (val) || (FLOATP (val) && XFLOAT_DATA (val) >= 0)
+ ? val : Qerror);
+}
+
+static Lisp_Object
+font_prop_validate_spacing (prop_index, prop, val)
+ enum font_property_index prop_index;
+ Lisp_Object prop, val;
+{
+ if (NILP (val) || (NATNUMP (val) && XINT (val) <= FONT_SPACING_CHARCELL))
+ return val;
+ if (EQ (val, Qc))
+ return make_number (FONT_SPACING_CHARCELL);
+ if (EQ (val, Qm))
+ return make_number (FONT_SPACING_MONO);
+ if (EQ (val, Qp))
+ return make_number (FONT_SPACING_PROPORTIONAL);
+ return Qerror;
+}
+
+/* Structure of known font property keys and validater of the
+ values. */
+struct
+{
+ /* Pointer to the key symbol. */
+ Lisp_Object *key;
+ /* Function to validate the value VAL, or NULL if any value is ok. */
+ Lisp_Object (*validater) P_ ((enum font_property_index prop_index,
+ Lisp_Object prop, Lisp_Object val));
+} font_property_table[] =
+ { { &QCtype, font_prop_validate_symbol },
+ { &QCfoundry, font_prop_validate_symbol },
+ { &QCfamily, font_prop_validate_symbol },
+ { &QCadstyle, font_prop_validate_symbol },
+ { &QCregistry, font_prop_validate_symbol },
+ { &QCweight, font_prop_validate_style },
+ { &QCslant, font_prop_validate_style },
+ { &QCwidth, font_prop_validate_style },
+ { &QCsize, font_prop_validate_non_neg },
+ { &QClanguage, font_prop_validate_symbol },
+ { &QCscript, font_prop_validate_symbol },
+ { &QCdpi, font_prop_validate_non_neg },
+ { &QCspacing, font_prop_validate_spacing },
+ { &QCscalable, NULL },
+ { &QCotf, font_prop_validate_symbol }
+ };
+
+#define FONT_PROPERTY_TABLE_SIZE \
+ ((sizeof font_property_table) / (sizeof *font_property_table))
+
+static int
+get_font_prop_index (key, from)
+ Lisp_Object key;
+ int from;
+{
+ for (; from < FONT_PROPERTY_TABLE_SIZE; from++)
+ if (EQ (key, *font_property_table[from].key))
+ return from;
+ return -1;
+}
+
+static Lisp_Object
+font_prop_validate (spec)
+ Lisp_Object spec;
+{
+ int i;
+ Lisp_Object prop, val, extra;
+
+ for (i = FONT_TYPE_INDEX; i < FONT_EXTRA_INDEX; i++)
+ {
+ if (! NILP (AREF (spec, i)))
+ {
+ prop = *font_property_table[i].key;
+ val = (font_property_table[i].validater) (i, prop, AREF (spec, i));
+ if (EQ (val, Qerror))
+ Fsignal (Qfont, list2 (build_string ("invalid font property"),
+ Fcons (prop, AREF (spec, i))));
+ ASET (spec, i, val);
+ }
+ }
+ for (extra = AREF (spec, FONT_EXTRA_INDEX);
+ CONSP (extra); extra = XCDR (extra))
+ {
+ Lisp_Object elt = XCAR (extra);
+
+ prop = XCAR (elt);
+ i = get_font_prop_index (prop, FONT_EXTRA_INDEX);
+ if (i >= 0
+ && font_property_table[i].validater)
+ {
+ val = (font_property_table[i].validater) (i, prop, XCDR (elt));
+ if (EQ (val, Qerror))
+ Fsignal (Qfont, list2 (build_string ("invalid font property"),
+ elt));
+ XSETCDR (elt, val);
+ }
+ }
+ return spec;
+}
+
+static Lisp_Object
+font_put_extra (font, prop, val)
+ Lisp_Object font, prop, val;
+{
+ Lisp_Object extra = AREF (font, FONT_EXTRA_INDEX);
+ Lisp_Object slot = (NILP (extra) ? Qnil : assq_no_quit (prop, extra));
+
+ if (NILP (slot))
+ {
+ extra = Fcons (Fcons (prop, val), extra);
+ ASET (font, FONT_EXTRA_INDEX, extra);
+ return val;
+ }
+ XSETCDR (slot, val);
+ return val;
+}
+
+
+/* Font name parser and unparser */
+
+static Lisp_Object intern_font_field P_ ((char *, int));
+static int parse_matrix P_ ((char *));
+static int font_expand_wildcards P_ ((Lisp_Object *, int));
+static int font_parse_name P_ ((char *, Lisp_Object));
+
+/* An enumerator for each field of an XLFD font name. */
+enum xlfd_field_index
+{
+ XLFD_FOUNDRY_INDEX,
+ XLFD_FAMILY_INDEX,
+ XLFD_WEIGHT_INDEX,
+ XLFD_SLANT_INDEX,
+ XLFD_SWIDTH_INDEX,
+ XLFD_ADSTYLE_INDEX,
+ XLFD_PIXEL_INDEX,
+ XLFD_POINT_INDEX,
+ XLFD_RESX_INDEX,
+ XLFD_RESY_INDEX,
+ XLFD_SPACING_INDEX,
+ XLFD_AVGWIDTH_INDEX,
+ XLFD_REGISTRY_INDEX,
+ XLFD_ENCODING_INDEX,
+ XLFD_LAST_INDEX
+};
+
+/* An enumerator for mask bit corresponding to each XLFD field. */
+enum xlfd_field_mask
+{
+ XLFD_FOUNDRY_MASK = 0x0001,
+ XLFD_FAMILY_MASK = 0x0002,
+ XLFD_WEIGHT_MASK = 0x0004,
+ XLFD_SLANT_MASK = 0x0008,
+ XLFD_SWIDTH_MASK = 0x0010,
+ XLFD_ADSTYLE_MASK = 0x0020,
+ XLFD_PIXEL_MASK = 0x0040,
+ XLFD_POINT_MASK = 0x0080,
+ XLFD_RESX_MASK = 0x0100,
+ XLFD_RESY_MASK = 0x0200,
+ XLFD_SPACING_MASK = 0x0400,
+ XLFD_AVGWIDTH_MASK = 0x0800,
+ XLFD_REGISTRY_MASK = 0x1000,
+ XLFD_ENCODING_MASK = 0x2000
+};
+
+
+/* Return a Lispy value of a XLFD font field at STR and LEN bytes.
+ If LEN is zero, it returns `null_string'.
+ If STR is "*", it returns nil.
+ If all characters in STR are digits, it returns an integer.
+ Otherwise, it returns a symbol interned from downcased STR. */
+
+static Lisp_Object
+intern_font_field (str, len)
+ char *str;
+ int len;
+{
+ int i;
+
+ if (len == 0)
+ return null_string;
+ if (*str == '*' && len == 1)
+ return Qnil;
+ if (isdigit (*str))
+ {
+ for (i = 1; i < len; i++)
+ if (! isdigit (str[i]))
+ break;
+ if (i == len)
+ return make_number (atoi (str));
+ }
+ return intern_downcase (str, len);
+}
+
+/* Parse P pointing the pixel/point size field of the form
+ `[A B C D]' which specifies a transformation matrix:
+
+ A B 0
+ C D 0
+ 0 0 1
+
+ by which all glyphs of the font are transformed. The spec says
+ that scalar value N for the pixel/point size is equivalent to:
+ A = N * resx/resy, B = C = 0, D = N.
+
+ Return the scalar value N if the form is valid. Otherwise return
+ -1. */
+
+static int
+parse_matrix (p)
+ char *p;
+{
+ double matrix[4];
+ char *end;
+ int i;
+
+ for (i = 0, p++; i < 4 && *p && *p != ']'; i++)
+ {
+ if (*p == '~')
+ matrix[i] = - strtod (p + 1, &end);
+ else
+ matrix[i] = strtod (p, &end);
+ p = end;
+ }
+ return (i == 4 ? (int) matrix[3] : -1);
+}
+
+/* Expand a wildcard field in FIELD (the first N fields are filled) to
+ multiple fields to fill in all 14 XLFD fields while restring a
+ field position by its contents. */
+
+static int
+font_expand_wildcards (field, n)
+ Lisp_Object field[XLFD_LAST_INDEX];
+ int n;
+{
+ /* Copy of FIELD. */
+ Lisp_Object tmp[XLFD_LAST_INDEX];
+ /* Array of information about where this element can go. Nth
+ element is for Nth element of FIELD. */
+ struct {
+ /* Minimum possible field. */
+ int from;
+ /* Maxinum possible field. */
+ int to;
+ /* Bit mask of possible field. Nth bit corresponds to Nth field. */
+ int mask;
+ } range[XLFD_LAST_INDEX];
+ int i, j;
+ int range_from, range_to;
+ unsigned range_mask;
+
+#define XLFD_SYMBOL_MASK (XLFD_FOUNDRY_MASK | XLFD_FAMILY_MASK \
+ | XLFD_ADSTYLE_MASK | XLFD_REGISTRY_MASK)
+#define XLFD_NULL_MASK (XLFD_FOUNDRY_MASK | XLFD_ADSTYLE_MASK)
+#define XLFD_LARGENUM_MASK (XLFD_POINT_MASK | XLFD_RESX_MASK | XLFD_RESY_MASK \
+ | XLFD_AVGWIDTH_MASK)
+#define XLFD_REGENC_MASK (XLFD_REGISTRY_MASK | XLFD_ENCODING_MASK)
+
+ /* Initialize RANGE_MASK for FIELD[0] which can be 0th to (14 - N)th
+ field. The value is shifted to left one bit by one in the
+ following loop. */
+ for (i = 0, range_mask = 0; i <= 14 - n; i++)
+ range_mask = (range_mask << 1) | 1;
+
+ /* The triplet RANGE_FROM, RANGE_TO, and RANGE_MASK is a
+ position-based retriction for FIELD[I]. */
+ for (i = 0, range_from = 0, range_to = 14 - n; i < n;
+ i++, range_from++, range_to++, range_mask <<= 1)
+ {
+ Lisp_Object val = field[i];
+
+ tmp[i] = val;
+ if (NILP (val))
+ {
+ /* Wildcard. */
+ range[i].from = range_from;
+ range[i].to = range_to;
+ range[i].mask = range_mask;
+ }
+ else
+ {
+ /* The triplet FROM, TO, and MASK is a value-based
+ retriction for FIELD[I]. */
+ int from, to;
+ unsigned mask;
+
+ if (INTEGERP (val))
+ {
+ int numeric = XINT (val);
+
+ if (i + 1 == n)
+ from = to = XLFD_ENCODING_INDEX,
+ mask = XLFD_ENCODING_MASK;
+ else if (numeric == 0)
+ from = XLFD_PIXEL_INDEX, to = XLFD_AVGWIDTH_INDEX,
+ mask = XLFD_PIXEL_MASK | XLFD_LARGENUM_MASK;
+ else if (numeric <= 48)
+ from = to = XLFD_PIXEL_INDEX,
+ mask = XLFD_PIXEL_MASK;
+ else
+ from = XLFD_POINT_INDEX, to = XLFD_AVGWIDTH_INDEX,
+ mask = XLFD_LARGENUM_MASK;
+ }
+ else if (EQ (val, null_string))
+ from = XLFD_FOUNDRY_INDEX, to = XLFD_ADSTYLE_INDEX,
+ mask = XLFD_NULL_MASK;
+ else if (i == 0)
+ from = to = XLFD_FOUNDRY_INDEX, mask = XLFD_FOUNDRY_MASK;
+ else if (i + 1 == n)
+ {
+ Lisp_Object name = SYMBOL_NAME (val);
+
+ if (SDATA (name)[SBYTES (name) - 1] == '*')
+ from = XLFD_REGISTRY_INDEX, to = XLFD_ENCODING_INDEX,
+ mask = XLFD_REGENC_MASK;
+ else
+ from = to = XLFD_ENCODING_INDEX,
+ mask = XLFD_ENCODING_MASK;
+ }
+ else if (range_from <= XLFD_WEIGHT_INDEX
+ && range_to >= XLFD_WEIGHT_INDEX
+ && !NILP (prop_name_to_numeric (FONT_WEIGHT_INDEX, val)))
+ from = to = XLFD_WEIGHT_INDEX, mask = XLFD_WEIGHT_MASK;
+ else if (range_from <= XLFD_SLANT_INDEX
+ && range_to >= XLFD_SLANT_INDEX
+ && !NILP (prop_name_to_numeric (FONT_SLANT_INDEX, val)))
+ from = to = XLFD_SLANT_INDEX, mask = XLFD_SLANT_MASK;
+ else if (range_from <= XLFD_SWIDTH_INDEX
+ && range_to >= XLFD_SWIDTH_INDEX
+ && !NILP (prop_name_to_numeric (FONT_WIDTH_INDEX, val)))
+ from = to = XLFD_SWIDTH_INDEX, mask = XLFD_SWIDTH_MASK;
+ else
+ {
+ if (EQ (val, Qc) || EQ (val, Qm) || EQ (val, Qp) || EQ (val, Qd))
+ from = to = XLFD_SPACING_INDEX, mask = XLFD_SPACING_MASK;
+ else
+ from = XLFD_FOUNDRY_INDEX, to = XLFD_ENCODING_INDEX,
+ mask = XLFD_SYMBOL_MASK;
+ }
+
+ /* Merge position-based and value-based restrictions. */
+ mask &= range_mask;
+ while (from < range_from)
+ mask &= ~(1 << from++);
+ while (from < 14 && ! (mask & (1 << from)))
+ from++;
+ while (to > range_to)
+ mask &= ~(1 << to--);
+ while (to >= 0 && ! (mask & (1 << to)))
+ to--;
+ if (from > to)
+ return -1;
+ range[i].from = from;
+ range[i].to = to;
+ range[i].mask = mask;
+
+ if (from > range_from || to < range_to)
+ {
+ /* The range is narrowed by value-based restrictions.
+ Reflect it to the other fields. */
+
+ /* Following fields should be after FROM. */
+ range_from = from;
+ /* Preceding fields should be before TO. */
+ for (j = i - 1, from--, to--; j >= 0; j--, from--, to--)
+ {
+ /* Check FROM for non-wildcard field. */
+ if (! NILP (tmp[j]) && range[j].from < from)
+ {
+ while (range[j].from < from)
+ range[j].mask &= ~(1 << range[j].from++);
+ while (from < 14 && ! (range[j].mask & (1 << from)))
+ from++;
+ range[j].from = from;
+ }
+ else
+ from = range[j].from;
+ if (range[j].to > to)
+ {
+ while (range[j].to > to)
+ range[j].mask &= ~(1 << range[j].to--);
+ while (to >= 0 && ! (range[j].mask & (1 << to)))
+ to--;
+ range[j].to = to;
+ }
+ else
+ to = range[j].to;
+ if (from > to)
+ return -1;
+ }
+ }
+ }
+ }
+
+ /* Decide all fileds from restrictions in RANGE. */
+ for (i = j = 0; i < n ; i++)
+ {
+ if (j < range[i].from)
+ {
+ if (i == 0 || ! NILP (tmp[i - 1]))
+ /* None of TMP[X] corresponds to Jth field. */
+ return -1;
+ for (; j < range[i].from; j++)
+ field[j] = Qnil;
+ }
+ field[j++] = tmp[i];
+ }
+ if (! NILP (tmp[n - 1]) && j < XLFD_REGISTRY_INDEX)
+ return -1;
+ for (; j < XLFD_LAST_INDEX; j++)
+ field[j] = Qnil;
+ if (INTEGERP (field[XLFD_ENCODING_INDEX]))
+ field[XLFD_ENCODING_INDEX]
+ = Fintern (Fnumber_to_string (field[XLFD_ENCODING_INDEX]), Qnil);
+ return 0;
+}
+
+/* Parse NAME (null terminated) as XLFD and store information in FONT
+ (font-spec or font-entity). Size property of FONT is set as
+ follows:
+ specified XLFD fields FONT property
+ --------------------- -------------
+ PIXEL_SIZE PIXEL_SIZE (Lisp integer)
+ POINT_SIZE and RESY calculated pixel size (Lisp integer)
+ POINT_SIZE POINT_SIZE/10 (Lisp float)
+
+ If NAME is successfully parsed, return 0. Otherwise return -1.
+
+ FONT is usually a font-spec, but when this function is called from
+ X font backend driver, it is a font-entity. In that case, NAME is
+ a fully specified XLFD, and we set FONT_EXTRA_INDEX of FONT to a
+ symbol RESX-RESY-SPACING-AVGWIDTH.
+*/
+
+int
+font_parse_xlfd (name, font)
+ char *name;
+ Lisp_Object font;
+{
+ int len = strlen (name);
+ int i, j;
+ Lisp_Object dpi, spacing;
+ int avgwidth;
+ char *f[XLFD_LAST_INDEX];
+ Lisp_Object val;
+ char *p;
+
+ if (len > 255)
+ /* Maximum XLFD name length is 255. */
+ return -1;
+ /* Accept "*-.." as a fully specified XLFD. */
+ if (name[0] == '*' && name[1] == '-')
+ i = 1, f[XLFD_FOUNDRY_INDEX] = name;
+ else
+ i = 0;
+ for (p = name + i; *p; p++)
+ if (*p == '-' && i < XLFD_LAST_INDEX)
+ f[i++] = p + 1;
+ f[i] = p;
+
+ dpi = spacing = Qnil;
+ avgwidth = -1;
+
+ if (i == XLFD_LAST_INDEX)
+ {
+ int pixel_size;
+
+ /* Fully specified XLFD. */
+ for (i = 0, j = FONT_FOUNDRY_INDEX; i < XLFD_WEIGHT_INDEX; i++, j++)
+ {
+ val = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ if (! NILP (val))
+ ASET (font, j, val);
+ }
+ for (j = FONT_WEIGHT_INDEX; i < XLFD_ADSTYLE_INDEX; i++, j++)
+ {
+ val = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ if (! NILP (val))
+ {
+ Lisp_Object numeric = prop_name_to_numeric (j, val);
+
+ if (INTEGERP (numeric))
+ val = numeric;
+ ASET (font, j, val);
+ }
+ }
+ val = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ if (! NILP (val))
+ ASET (font, FONT_ADSTYLE_INDEX, val);
+ i = XLFD_REGISTRY_INDEX;
+ val = intern_font_field (f[i], f[i + 2] - f[i]);
+ if (! NILP (val))
+ ASET (font, FONT_REGISTRY_INDEX, val);
+
+ p = f[XLFD_PIXEL_INDEX];
+ if (*p == '[' && (pixel_size = parse_matrix (p)) >= 0)
+ ASET (font, FONT_SIZE_INDEX, make_number (pixel_size));
+ else
+ {
+ i = XLFD_PIXEL_INDEX;
+ val = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ if (! NILP (val))
+ ASET (font, FONT_SIZE_INDEX, val);
+ else
+ {
+ double point_size = -1;
+
+ xassert (FONT_SPEC_P (font));
+ p = f[XLFD_POINT_INDEX];
+ if (*p == '[')
+ point_size = parse_matrix (p);
+ else if (isdigit (*p))
+ point_size = atoi (p), point_size /= 10;
+ if (point_size >= 0)
+ ASET (font, FONT_SIZE_INDEX, make_float (point_size));
+ else
+ {
+ i = XLFD_PIXEL_INDEX;
+ val = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ if (! NILP (val))
+ ASET (font, FONT_SIZE_INDEX, val);
+ }
+ }
+ }
+
+ /* Parse RESX, RESY, SPACING, and AVGWIDTH. */
+ if (FONT_ENTITY_P (font))
+ {
+ i = XLFD_RESX_INDEX;
+ ASET (font, FONT_EXTRA_INDEX,
+ intern_font_field (f[i], f[XLFD_REGISTRY_INDEX] - 1 - f[i]));
+ return 0;
+ }
+
+ /* Here we just setup DPI, SPACING, and AVGWIDTH. They are set
+ in FONT_EXTRA_INDEX later. */
+ i = XLFD_RESX_INDEX;
+ dpi = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ i = XLFD_SPACING_INDEX;
+ spacing = intern_font_field (f[i], f[i + 1] - 1 - f[i]);
+ p = f[XLFD_AVGWIDTH_INDEX];
+ if (*p == '~')
+ p++;
+ if (isdigit (*p))
+ avgwidth = atoi (p);
+ }
+ else
+ {
+ int wild_card_found = 0;
+ Lisp_Object prop[XLFD_LAST_INDEX];
+
+ for (j = 0; j < i; j++)
+ {
+ if (*f[j] == '*')
+ {
+ if (f[j][1] && f[j][1] != '-')
+ return -1;
+ prop[j] = Qnil;
+ wild_card_found = 1;
+ }
+ else if (isdigit (*f[j]))
+ {
+ for (p = f[j] + 1; isdigit (*p); p++);
+ if (*p && *p != '-')
+ prop[j] = intern_downcase (f[j], p - f[j]);
+ else
+ prop[j] = make_number (atoi (f[j]));
+ }
+ else if (j + 1 < i)
+ prop[j] = intern_font_field (f[j], f[j + 1] - 1 - f[j]);
+ else
+ prop[j] = intern_font_field (f[j], f[i] - f[j]);
+ }
+ if (! wild_card_found)
+ return -1;
+ if (font_expand_wildcards (prop, i) < 0)
+ return -1;
+
+ for (i = 0, j = FONT_FOUNDRY_INDEX; i < XLFD_WEIGHT_INDEX; i++, j++)
+ if (! NILP (prop[i]))
+ ASET (font, j, prop[i]);
+ for (j = FONT_WEIGHT_INDEX; i < XLFD_ADSTYLE_INDEX; i++, j++)
+ if (! NILP (prop[i]))
+ ASET (font, j, prop[i]);
+ if (! NILP (prop[XLFD_ADSTYLE_INDEX]))
+ ASET (font, FONT_ADSTYLE_INDEX, prop[XLFD_ADSTYLE_INDEX]);
+ val = prop[XLFD_REGISTRY_INDEX];
+ if (NILP (val))
+ {
+ val = prop[XLFD_ENCODING_INDEX];
+ if (! NILP (val))
+ val = Fintern (concat2 (build_string ("*-"), SYMBOL_NAME (val)),
+ Qnil);
+ }
+ else if (NILP (prop[XLFD_ENCODING_INDEX]))
+ val = Fintern (concat2 (SYMBOL_NAME (val), build_string ("-*")),
+ Qnil);
+ else
+ val = Fintern (concat3 (SYMBOL_NAME (val), build_string ("-"),
+ SYMBOL_NAME (prop[XLFD_ENCODING_INDEX])),
+ Qnil);
+ if (! NILP (val))
+ ASET (font, FONT_REGISTRY_INDEX, val);
+
+ if (INTEGERP (prop[XLFD_PIXEL_INDEX]))
+ ASET (font, FONT_SIZE_INDEX, prop[XLFD_PIXEL_INDEX]);
+ else if (INTEGERP (prop[XLFD_POINT_INDEX]))
+ {
+ double point_size = XINT (prop[XLFD_POINT_INDEX]);
+
+ ASET (font, FONT_SIZE_INDEX, make_float (point_size / 10));
+ }
+
+ dpi = prop[XLFD_RESX_INDEX];
+ spacing = prop[XLFD_SPACING_INDEX];
+ if (INTEGERP (prop[XLFD_AVGWIDTH_INDEX]))
+ avgwidth = XINT (prop[XLFD_AVGWIDTH_INDEX]);
+ }
+
+ if (! NILP (dpi))
+ font_put_extra (font, QCdpi, dpi);
+ if (! NILP (spacing))
+ font_put_extra (font, QCspacing, spacing);
+ if (avgwidth >= 0)
+ font_put_extra (font, QCscalable, avgwidth == 0 ? Qt : Qnil);
+
+ return 0;
+}
+
+/* Store XLFD name of FONT (font-spec or font-entity) in NAME (NBYTES
+ length), and return the name length. If FONT_SIZE_INDEX of FONT is
+ 0, use PIXEL_SIZE instead. */
+
+int
+font_unparse_xlfd (font, pixel_size, name, nbytes)
+ Lisp_Object font;
+ int pixel_size;
+ char *name;
+ int nbytes;
+{
+ char *f[XLFD_REGISTRY_INDEX + 1];
+ Lisp_Object val;
+ int i, j, len = 0;
+
+ xassert (FONTP (font));
+
+ for (i = FONT_FOUNDRY_INDEX, j = XLFD_FOUNDRY_INDEX; i <= FONT_REGISTRY_INDEX;
+ i++, j++)
+ {
+ if (i == FONT_ADSTYLE_INDEX)
+ j = XLFD_ADSTYLE_INDEX;
+ else if (i == FONT_REGISTRY_INDEX)
+ j = XLFD_REGISTRY_INDEX;
+ val = AREF (font, i);
+ if (NILP (val))
+ {
+ if (j == XLFD_REGISTRY_INDEX)
+ f[j] = "*-*", len += 4;
+ else
+ f[j] = "*", len += 2;
+ }
+ else
+ {
+ if (SYMBOLP (val))
+ val = SYMBOL_NAME (val);
+ if (j == XLFD_REGISTRY_INDEX
+ && ! strchr ((char *) SDATA (val), '-'))
+ {
+ /* Change "jisx0208*" and "jisx0208" to "jisx0208*-*". */
+ if (SDATA (val)[SBYTES (val) - 1] == '*')
+ {
+ f[j] = alloca (SBYTES (val) + 3);
+ sprintf (f[j], "%s-*", SDATA (val));
+ len += SBYTES (val) + 3;
+ }
+ else
+ {
+ f[j] = alloca (SBYTES (val) + 4);
+ sprintf (f[j], "%s*-*", SDATA (val));
+ len += SBYTES (val) + 4;
+ }
+ }
+ else
+ f[j] = (char *) SDATA (val), len += SBYTES (val) + 1;
+ }
+ }
+
+ for (i = FONT_WEIGHT_INDEX, j = XLFD_WEIGHT_INDEX; i <= FONT_WIDTH_INDEX;
+ i++, j++)
+ {
+ val = AREF (font, i);
+ if (NILP (val))
+ f[j] = "*", len += 2;
+ else
+ {
+ if (INTEGERP (val))
+ val = prop_numeric_to_name (i, XINT (val));
+ if (SYMBOLP (val))
+ val = SYMBOL_NAME (val);
+ xassert (STRINGP (val));
+ f[j] = (char *) SDATA (val), len += SBYTES (val) + 1;
+ }
+ }
+
+ val = AREF (font, FONT_SIZE_INDEX);
+ xassert (NUMBERP (val) || NILP (val));
+ if (INTEGERP (val))
+ {
+ f[XLFD_PIXEL_INDEX] = alloca (22);
+ i = XINT (val);
+ if (i > 0)
+ len += sprintf (f[XLFD_PIXEL_INDEX], "%d-*", i) + 1;
+ else /* i == 0 */
+ len += sprintf (f[XLFD_PIXEL_INDEX], "%d-*", pixel_size) + 1;
+ }
+ else if (FLOATP (val))
+ {
+ f[XLFD_PIXEL_INDEX] = alloca (12);
+ i = XFLOAT_DATA (val) * 10;
+ len += sprintf (f[XLFD_PIXEL_INDEX], "*-%d", i) + 1;
+ }
+ else
+ f[XLFD_PIXEL_INDEX] = "*-*", len += 4;
+
+ val = AREF (font, FONT_EXTRA_INDEX);
+
+ if (FONT_ENTITY_P (font)
+ && EQ (AREF (font, FONT_TYPE_INDEX), Qx))
+ {
+ /* Setup names for RESX-RESY-SPACING-AVWIDTH. */
+ if (SYMBOLP (val) && ! NILP (val))
+ {
+ val = SYMBOL_NAME (val);
+ f[XLFD_RESX_INDEX] = (char *) SDATA (val), len += SBYTES (val) + 1;
+ }
+ else
+ f[XLFD_RESX_INDEX] = "*-*-*-*", len += 6;
+ }
+ else
+ {
+ Lisp_Object dpi = assq_no_quit (QCdpi, val);
+ Lisp_Object spacing = assq_no_quit (QCspacing, val);
+ Lisp_Object scalable = assq_no_quit (QCscalable, val);
+
+ if (CONSP (dpi) || CONSP (spacing) || CONSP (scalable))
+ {
+ char *str = alloca (24);
+ int this_len;
+
+ if (CONSP (dpi) && INTEGERP (XCDR (dpi)))
+ this_len = sprintf (str, "%d-%d",
+ XINT (XCDR (dpi)), XINT (XCDR (dpi)));
+ else
+ this_len = sprintf (str, "*-*");
+ if (CONSP (spacing) && ! NILP (XCDR (spacing)))
+ {
+ val = XCDR (spacing);
+ if (INTEGERP (val))
+ {
+ if (XINT (val) < FONT_SPACING_MONO)
+ val = Qp;
+ else if (XINT (val) < FONT_SPACING_CHARCELL)
+ val = Qm;
+ else
+ val = Qc;
+ }
+ xassert (SYMBOLP (val));
+ this_len += sprintf (str + this_len, "-%c",
+ SDATA (SYMBOL_NAME (val))[0]);
+ }
+ else
+ this_len += sprintf (str + this_len, "-*");
+ if (CONSP (scalable) && ! NILP (XCDR (spacing)))
+ this_len += sprintf (str + this_len, "-0");
+ else
+ this_len += sprintf (str + this_len, "-*");
+ f[XLFD_RESX_INDEX] = str;
+ len += this_len;
+ }
+ else
+ f[XLFD_RESX_INDEX] = "*-*-*-*", len += 8;
+ }
+
+ len++; /* for terminating '\0'. */
+ if (len >= nbytes)
+ return -1;
+ return sprintf (name, "-%s-%s-%s-%s-%s-%s-%s-%s-%s",
+ f[XLFD_FOUNDRY_INDEX], f[XLFD_FAMILY_INDEX],
+ f[XLFD_WEIGHT_INDEX], f[XLFD_SLANT_INDEX],
+ f[XLFD_SWIDTH_INDEX],
+ f[XLFD_ADSTYLE_INDEX], f[XLFD_PIXEL_INDEX],
+ f[XLFD_RESX_INDEX], f[XLFD_REGISTRY_INDEX]);
+}
+
+/* Parse NAME (null terminated) as Fonconfig's name format and store
+ information in FONT (font-spec or font-entity). If NAME is
+ successfully parsed, return 0. Otherwise return -1. */
+
+int
+font_parse_fcname (name, font)
+ char *name;
+ Lisp_Object font;
+{
+ char *p0, *p1;
+ int len = strlen (name);
+ char *copy;
+
+ if (len == 0)
+ return -1;
+ /* It is assured that (name[0] && name[0] != '-'). */
+ if (name[0] == ':')
+ p0 = name;
+ else
+ {
+ Lisp_Object family;
+ double point_size;
+
+ for (p0 = name + 1; *p0 && (*p0 != '-' && *p0 != ':'); p0++)
+ if (*p0 == '\\' && p0[1])
+ p0++;
+ family = intern_font_field (name, p0 - name);
+ if (*p0 == '-')
+ {
+ if (! isdigit (p0[1]))
+ return -1;
+ point_size = strtod (p0 + 1, &p1);
+ if (*p1 && *p1 != ':')
+ return -1;
+ ASET (font, FONT_SIZE_INDEX, make_float (point_size));
+ p0 = p1;
+ }
+ ASET (font, FONT_FAMILY_INDEX, family);
+ }
+
+ len -= p0 - name;
+ copy = alloca (len + 1);
+ if (! copy)
+ return -1;
+ name = copy;
+
+ /* Now parse ":KEY=VAL" patterns. Store known keys and values in
+ extra, copy unknown ones to COPY. */
+ while (*p0)
+ {
+ Lisp_Object key, val;
+ int prop;
+
+ for (p1 = p0 + 1; *p1 && *p1 != '=' && *p1 != ':'; p1++);
+ if (*p1 != '=')
+ {
+ /* Must be an enumerated value. */
+ val = intern_font_field (p0 + 1, p1 - p0 - 1);
+ if (memcmp (p0 + 1, "light", 5) == 0
+ || memcmp (p0 + 1, "medium", 6) == 0
+ || memcmp (p0 + 1, "demibold", 8) == 0
+ || memcmp (p0 + 1, "bold", 4) == 0
+ || memcmp (p0 + 1, "black", 5) == 0)
+ {
+ ASET (font, FONT_WEIGHT_INDEX, val);
+ }
+ else if (memcmp (p0 + 1, "roman", 5) == 0
+ || memcmp (p0 + 1, "italic", 6) == 0
+ || memcmp (p0 + 1, "oblique", 7) == 0)
+ {
+ ASET (font, FONT_SLANT_INDEX, val);
+ }
+ else if (memcmp (p0 + 1, "charcell", 8) == 0
+ || memcmp (p0 + 1, "mono", 4) == 0
+ || memcmp (p0 + 1, "proportional", 12) == 0)
+ {
+ font_put_extra (font, QCspacing,
+ (p0[1] == 'c' ? Qc : p0[1] == 'm' ? Qm : Qp));
+ }
+ else
+ {
+ /* unknown key */
+ bcopy (p0, copy, p1 - p0);
+ copy += p1 - p0;
+ }
+ }
+ else
+ {
+ char *pbeg = p0;
+
+ if (memcmp (p0 + 1, "pixelsize=", 10) == 0)
+ prop = FONT_SIZE_INDEX;
+ else
+ {
+ key = intern_font_field (p0, p1 - p0);
+ prop = get_font_prop_index (key, 0);
+ }
+ p0 = p1 + 1;
+ for (p1 = p0; *p1 && *p1 != ':'; p1++);
+ val = intern_font_field (p0, p1 - p0);
+ if (! NILP (val))
+ {
+ if (prop >= 0 && prop < FONT_EXTRA_INDEX)
+ {
+ ASET (font, prop, val);
+ }
+ else if (prop > 0)
+ font_put_extra (font, key, val);
+ else
+ {
+ /* Unknown attribute, keep it in name. */
+ bcopy (pbeg, copy, p1 - pbeg);
+ copy += p1 - pbeg;
+ }
+ }
+ }
+ p0 = p1;
+ }
+
+ if (name < copy)
+ font_put_extra (font, QCname, make_unibyte_string (name, copy - name));
+
+ return 0;
+}
+
+/* Store fontconfig's font name of FONT (font-spec or font-entity) in
+ NAME (NBYTES length), and return the name length. If
+ FONT_SIZE_INDEX of FONT is 0, use PIXEL_SIZE instead. */
+
+int
+font_unparse_fcname (font, pixel_size, name, nbytes)
+ Lisp_Object font;
+ int pixel_size;
+ char *name;
+ int nbytes;
+{
+ Lisp_Object val;
+ int point_size;
+ int dpi, spacing, scalable;
+ int i, len = 1;
+ char *p;
+ Lisp_Object styles[3];
+ char *style_names[3] = { "weight", "slant", "swidth" };
+
+ val = AREF (font, FONT_FAMILY_INDEX);
+ if (SYMBOLP (val) && ! NILP (val))
+ len += SBYTES (SYMBOL_NAME (val));
+
+ val = AREF (font, FONT_SIZE_INDEX);
+ if (INTEGERP (val))
+ {
+ if (XINT (val) != 0)
+ pixel_size = XINT (val);
+ point_size = -1;
+ len += 21; /* for ":pixelsize=NUM" */
+ }
+ else if (FLOATP (val))
+ {
+ pixel_size = -1;
+ point_size = (int) XFLOAT_DATA (val);
+ len += 11; /* for "-NUM" */
+ }
+
+ val = AREF (font, FONT_FOUNDRY_INDEX);
+ if (! NILP (val))
+ /* ":foundry=NAME" */
+ len += 9 + SBYTES (SYMBOL_NAME (val));
+
+ for (i = FONT_WEIGHT_INDEX; i <= FONT_WIDTH_INDEX; i++)
+ {
+ val = AREF (font, i);
+ if (INTEGERP (val))
+ {
+ val = prop_numeric_to_name (i, XINT (val));
+ len += (strlen (style_names[i - FONT_WEIGHT_INDEX])
+ + 2 + SBYTES (SYMBOL_NAME (val))); /* :xxx=NAME */
+ }
+ styles[i - FONT_WEIGHT_INDEX] = val;
+ }
+
+ val = AREF (font, FONT_EXTRA_INDEX);
+ if (FONT_ENTITY_P (font)
+ && EQ (AREF (font, FONT_TYPE_INDEX), Qx))
+ {
+ char *p;
+
+ /* VAL is a symbol of name `RESX-RESY-SPACING-AVWIDTH'. */
+ p = (char *) SDATA (SYMBOL_NAME (val));
+ dpi = atoi (p);
+ for (p++; *p != '-'; p++); /* skip RESX */
+ for (p++; *p != '-'; p++); /* skip RESY */
+ spacing = (*p == 'c' ? FONT_SPACING_CHARCELL
+ : *p == 'm' ? FONT_SPACING_MONO
+ : FONT_SPACING_PROPORTIONAL);
+ for (p++; *p != '-'; p++); /* skip SPACING */
+ scalable = (atoi (p) == 0);
+ /* The longest pattern is ":dpi=NUM:scalable=False:spacing=100" */
+ len += 42;
+ }
+ else
+ {
+ Lisp_Object elt;
+
+ dpi = spacing = scalable = -1;
+ elt = assq_no_quit (QCdpi, val);
+ if (CONSP (elt))
+ dpi = XINT (XCDR (elt)), len += 15; /* for ":dpi=NUM" */
+ elt = assq_no_quit (QCspacing, val);
+ if (CONSP (elt))
+ spacing = XINT (XCDR (elt)), len += 12; /* for ":spacing=100" */
+ elt = assq_no_quit (QCscalable, val);
+ if (CONSP (elt))
+ scalable = ! NILP (XCDR (elt)), len += 15; /* for ":scalable=False" */
+ }
+
+ if (len > nbytes)
+ return -1;
+ p = name;
+ if (! NILP (AREF (font, FONT_FAMILY_INDEX)))
+ p += sprintf(p, "%s",
+ SDATA (SYMBOL_NAME (AREF (font, FONT_FAMILY_INDEX))));
+ if (point_size > 0)
+ {
+ if (p == name)
+ p += sprintf (p, "%d", point_size);
+ else
+ p += sprintf (p, "-%d", point_size);
+ }
+ else if (pixel_size > 0)
+ p += sprintf (p, ":pixelsize=%d", pixel_size);
+ if (SYMBOLP (AREF (font, FONT_FOUNDRY_INDEX))
+ && ! NILP (AREF (font, FONT_FOUNDRY_INDEX)))
+ p += sprintf (p, ":foundry=%s",
+ SDATA (SYMBOL_NAME (AREF (font, FONT_FOUNDRY_INDEX))));
+ for (i = 0; i < 3; i++)
+ if (! NILP (styles [i]))
+ p += sprintf (p, ":%s=%s", style_names[i],
+ SDATA (SYMBOL_NAME (styles [i])));
+ if (dpi >= 0)
+ p += sprintf (p, ":dpi=%d", dpi);
+ if (spacing >= 0)
+ p += sprintf (p, ":spacing=%d", spacing);
+ if (scalable > 0)
+ p += sprintf (p, ":scalable=True");
+ else if (scalable == 0)
+ p += sprintf (p, ":scalable=False");
+ return (p - name);
+}
+
+/* Parse NAME (null terminated) and store information in FONT
+ (font-spec or font-entity). If NAME is successfully parsed, return
+ 0. Otherwise return -1.
+
+ If NAME is XLFD and FONT is a font-entity, store
+ RESX-RESY-SPACING-AVWIDTH information as a symbol in
+ FONT_EXTRA_INDEX. */
+
+static int
+font_parse_name (name, font)
+ char *name;
+ Lisp_Object font;
+{
+ if (name[0] == '-' || index (name, '*'))
+ {
+ if (font_parse_xlfd (name, font) == 0)
+ return 0;
+ font_put_extra (font, QCname, make_unibyte_string (name, strlen (name)));
+ return -1;
+ }
+ font_put_extra (font, QCname, make_unibyte_string (name, strlen (name)));
+ return font_parse_fcname (name, font);
+}
+
+void
+font_merge_old_spec (name, family, registry, spec)
+ Lisp_Object name, family, registry, spec;
+{
+ if (STRINGP (name))
+ {
+ if (font_parse_xlfd ((char *) SDATA (name), spec) < 0)
+ {
+ Lisp_Object extra = Fcons (Fcons (QCname, name), Qnil);
+
+ ASET (spec, FONT_EXTRA_INDEX, extra);
+ }
+ }
+ else
+ {
+ if (! NILP (family))
+ {
+ int len;
+ char *p0, *p1;
+
+ xassert (STRINGP (family));
+ len = SBYTES (family);
+ p0 = (char *) SDATA (family);
+ p1 = index (p0, '-');
+ if (p1)
+ {
+ if ((*p0 != '*' || p1 - p0 > 1)
+ && NILP (AREF (spec, FONT_FOUNDRY_INDEX)))
+ ASET (spec, FONT_FOUNDRY_INDEX,
+ intern_downcase (p0, p1 - p0));
+ if (NILP (AREF (spec, FONT_FAMILY_INDEX)))
+ ASET (spec, FONT_FAMILY_INDEX,
+ intern_downcase (p1 + 1, len - (p1 + 1 - p0)));
+ }
+ else if (NILP (AREF (spec, FONT_FAMILY_INDEX)))
+ ASET (spec, FONT_FAMILY_INDEX, intern_downcase (p0, len));
+ }
+ if (! NILP (registry)
+ && NILP (AREF (spec, FONT_REGISTRY_INDEX)))
+ ASET (spec, FONT_REGISTRY_INDEX,
+ intern_downcase ((char *) SDATA (registry), SBYTES (registry)));
+ }
+}
+
+
+/* OTF handler */
+
+#ifdef HAVE_LIBOTF
+#include <otf.h>
+
+struct otf_list
+{
+ Lisp_Object entity;
+ OTF *otf;
+ struct otf_list *next;
+};
+
+static struct otf_list *otf_list;
+
+static Lisp_Object
+otf_tag_symbol (tag)
+ OTF_Tag tag;
+{
+ char name[5];
+
+ OTF_tag_name (tag, name);
+ return Fintern (make_unibyte_string (name, 4), Qnil);
+}
+
+static OTF *
+otf_open (entity, file)
+ Lisp_Object entity;
+ char *file;
+{
+ struct otf_list *list = otf_list;
+
+ while (list && ! EQ (list->entity, entity))
+ list = list->next;
+ if (! list)
+ {
+ list = malloc (sizeof (struct otf_list));
+ list->entity = entity;
+ list->otf = file ? OTF_open (file) : NULL;
+ list->next = otf_list;
+ otf_list = list;
+ }
+ return list->otf;
+}
+
+
+/* Return a list describing which scripts/languages FONT supports by
+ which GSUB/GPOS features of OpenType tables. See the comment of
+ (sturct font_driver).otf_capability. */
+
+Lisp_Object
+font_otf_capability (font)
+ struct font *font;
+{
+ OTF *otf;
+ Lisp_Object capability = Fcons (Qnil, Qnil);
+ int i;
+
+ otf = otf_open (font->entity, font->file_name);
+ if (! otf)
+ return Qnil;
+ for (i = 0; i < 2; i++)
+ {
+ OTF_GSUB_GPOS *gsub_gpos;
+ Lisp_Object script_list = Qnil;
+ int j;
+
+ if (OTF_get_features (otf, i == 0) < 0)
+ continue;
+ gsub_gpos = i == 0 ? otf->gsub : otf->gpos;
+ for (j = gsub_gpos->ScriptList.ScriptCount - 1; j >= 0; j--)
+ {
+ OTF_Script *script = gsub_gpos->ScriptList.Script + j;
+ Lisp_Object langsys_list = Qnil;
+ Lisp_Object script_tag = otf_tag_symbol (script->ScriptTag);
+ int k;
+
+ for (k = script->LangSysCount; k >= 0; k--)
+ {
+ OTF_LangSys *langsys;
+ Lisp_Object feature_list = Qnil;
+ Lisp_Object langsys_tag;
+ int l;
+
+ if (j == script->LangSysCount)
+ {
+ langsys = &script->DefaultLangSys;
+ langsys_tag = Qnil;
+ }
+ else
+ {
+ langsys = script->LangSys + k;
+ langsys_tag
+ = otf_tag_symbol (script->LangSysRecord[k].LangSysTag);
+ }
+ for (l = langsys->FeatureCount -1; l >= 0; l--)
+ {
+ OTF_Feature *feature
+ = gsub_gpos->FeatureList.Feature + langsys->FeatureIndex[l];
+ Lisp_Object feature_tag
+ = otf_tag_symbol (feature->FeatureTag);
+
+ feature_list = Fcons (feature_tag, feature_list);
+ }
+ langsys_list = Fcons (Fcons (langsys_tag, feature_list),
+ langsys_list);
+ }
+ script_list = Fcons (Fcons (script_tag, langsys_list),
+ script_list);
+ }
+
+ if (i == 0)
+ XSETCAR (capability, script_list);
+ else
+ XSETCDR (capability, script_list);
+ }
+
+ return capability;
+}
+
+static int
+parse_gsub_gpos_spec (spec, script, langsys, features)
+ Lisp_Object spec;
+ char **script, **langsys, **features;
+{
+ Lisp_Object val;
+ int len;
+ char *p;
+ int asterisk;
+
+ val = XCAR (spec);
+ *script = (char *) SDATA (SYMBOL_NAME (val));
+ spec = XCDR (spec);
+ val = XCAR (spec);
+ *langsys = NILP (val) ? NULL : (char *) SDATA (SYMBOL_NAME (val));
+ spec = XCDR (spec);
+ len = XINT (Flength (spec));
+ *features = p = malloc (6 * len);
+ if (! p)
+ return -1;
+
+ for (asterisk = 0; CONSP (spec); spec = XCDR (spec))
+ {
+ val = XCAR (spec);
+ if (SREF (SYMBOL_NAME (val), 0) == '*')
+ {
+ asterisk = 1;
+ p += sprintf (p, ",*");
+ }
+ else if (! asterisk)
+ p += sprintf (p, ",%s", SDATA (SYMBOL_NAME (val)));
+ else
+ p += sprintf (p, ",~%s", SDATA (SYMBOL_NAME (val)));
+ }
+ return 0;
+}
+
+#define DEVICE_DELTA(table, size) \
+ (((size) >= (table).StartSize && (size) <= (table).EndSize) \
+ ? (table).DeltaValue[(size) - (table).StartSize] \
+ : 0)
+
+void
+adjust_anchor (struct font *font, OTF_Anchor *anchor,
+ unsigned code, int size, int *x, int *y)
+{
+ if (anchor->AnchorFormat == 2)
+ {
+ int x0, y0;
+
+ if (font->driver->anchor_point (font, code, anchor->f.f1.AnchorPoint,
+ &x0, &y0) >= 0)
+ *x = x0, *y = y0;
+ }
+ else if (anchor->AnchorFormat == 3)
+ {
+ if (anchor->f.f2.XDeviceTable.offset)
+ *x += DEVICE_DELTA (anchor->f.f2.XDeviceTable, size);
+ if (anchor->f.f2.YDeviceTable.offset)
+ *y += DEVICE_DELTA (anchor->f.f2.YDeviceTable, size);
+ }
+}
+
+
+/* Drive FONT's OTF GSUB features according to GSUB_SPEC. See the
+ comment of (sturct font_driver).otf_gsub. */
+
+int
+font_otf_gsub (font, gsub_spec, gstring_in, from, to, gstring_out, idx)
+ struct font *font;
+ Lisp_Object gsub_spec;
+ Lisp_Object gstring_in;
+ int from, to;
+ Lisp_Object gstring_out;
+ int idx;
+{
+ int len;
+ int i;
+ OTF *otf;
+ OTF_GlyphString otf_gstring;
+ OTF_Glyph *g;
+ char *script, *langsys, *features;
+
+ otf = otf_open (font->entity, font->file_name);
+ if (! otf)
+ return 0;
+ if (OTF_get_table (otf, "head") < 0)
+ return 0;
+ if (OTF_check_table (otf, "GSUB") < 0)
+ return 0;
+ if (parse_gsub_gpos_spec (gsub_spec, &script, &langsys, &features) < 0)
+ return 0;
+ len = to - from;
+ otf_gstring.size = otf_gstring.used = len;
+ otf_gstring.glyphs = (OTF_Glyph *) malloc (sizeof (OTF_Glyph) * len);
+ memset (otf_gstring.glyphs, 0, sizeof (OTF_Glyph) * len);
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring_in, from + i);
+
+ otf_gstring.glyphs[i].c = XINT (LGLYPH_CHAR (g));
+ otf_gstring.glyphs[i].glyph_id = XINT (LGLYPH_CODE (g));
+ }
+
+ OTF_drive_gdef (otf, &otf_gstring);
+ if (OTF_drive_gsub (otf, &otf_gstring, script, langsys, features) < 0)
+ {
+ free (otf_gstring.glyphs);
+ return 0;
+ }
+ if (ASIZE (gstring_out) < idx + otf_gstring.used)
+ {
+ free (otf_gstring.glyphs);
+ return -1;
+ }
+
+ for (i = 0, g = otf_gstring.glyphs; i < otf_gstring.used;)
+ {
+ int i0 = g->f.index.from, i1 = g->f.index.to;
+ Lisp_Object glyph = LGSTRING_GLYPH (gstring_in, from + i0);
+ Lisp_Object min_idx = AREF (glyph, 0);
+ Lisp_Object max_idx = AREF (glyph, 1);
+
+ if (i0 < i1)
+ {
+ int min_idx_i = XINT (min_idx), max_idx_i = XINT (max_idx);
+
+ for (i0++; i0 <= i1; i0++)
+ {
+ glyph = LGSTRING_GLYPH (gstring_in, from + i0);
+ if (min_idx_i > XINT (AREF (glyph, 0)))
+ min_idx_i = XINT (AREF (glyph, 0));
+ if (max_idx_i < XINT (AREF (glyph, 1)))
+ max_idx_i = XINT (AREF (glyph, 1));
+ }
+ min_idx = make_number (min_idx_i);
+ max_idx = make_number (max_idx_i);
+ i0 = g->f.index.from;
+ }
+ for (; i < otf_gstring.used && g->f.index.from == i0; i++, g++)
+ {
+ glyph = LGSTRING_GLYPH (gstring_out, idx + i);
+ ASET (glyph, 0, min_idx);
+ ASET (glyph, 1, max_idx);
+ LGLYPH_SET_CHAR (glyph, make_number (g->c));
+ LGLYPH_SET_CODE (glyph, make_number (g->glyph_id));
+ }
+ }
+
+ free (otf_gstring.glyphs);
+ return i;
+}
+
+/* Drive FONT's OTF GPOS features according to GPOS_SPEC. See the
+ comment of (sturct font_driver).otf_gpos. */
+
+int
+font_otf_gpos (font, gpos_spec, gstring, from, to)
+ struct font *font;
+ Lisp_Object gpos_spec;
+ Lisp_Object gstring;
+ int from, to;
+{
+ int len;
+ int i;
+ OTF *otf;
+ OTF_GlyphString otf_gstring;
+ OTF_Glyph *g;
+ char *script, *langsys, *features;
+ Lisp_Object glyph;
+ int u, size;
+ Lisp_Object base, mark;
+
+ otf = otf_open (font->entity, font->file_name);
+ if (! otf)
+ return 0;
+ if (OTF_get_table (otf, "head") < 0)
+ return 0;
+ if (OTF_check_table (otf, "GPOS") < 0)
+ return 0;
+ if (parse_gsub_gpos_spec (gpos_spec, &script, &langsys, &features) < 0)
+ return 0;
+ len = to - from;
+ otf_gstring.size = otf_gstring.used = len;
+ otf_gstring.glyphs = (OTF_Glyph *) malloc (sizeof (OTF_Glyph) * len);
+ memset (otf_gstring.glyphs, 0, sizeof (OTF_Glyph) * len);
+ for (i = 0; i < len; i++)
+ {
+ glyph = LGSTRING_GLYPH (gstring, from + i);
+ otf_gstring.glyphs[i].glyph_id = XINT (LGLYPH_CODE (glyph));
+ }
+
+ OTF_drive_gdef (otf, &otf_gstring);
+
+ if (OTF_drive_gpos (otf, &otf_gstring, script, langsys, features) < 0)
+ {
+ free (otf_gstring.glyphs);
+ return 0;
+ }
+
+ u = otf->head->unitsPerEm;
+ size = font->pixel_size;
+ base = mark = Qnil;
+ for (i = 0, g = otf_gstring.glyphs; i < otf_gstring.used; i++, g++)
+ {
+ Lisp_Object prev;
+ int xoff = 0, yoff = 0, width_adjust = 0;
+
+ if (! g->glyph_id)
+ continue;
+
+ glyph = LGSTRING_GLYPH (gstring, from + i);
+ switch (g->positioning_type)
+ {
+ case 0:
+ break;
+ case 1: case 2:
+ {
+ int format = g->f.f1.format;
+
+ if (format & OTF_XPlacement)
+ xoff = g->f.f1.value->XPlacement * size / u;
+ if (format & OTF_XPlaDevice)
+ xoff += DEVICE_DELTA (g->f.f1.value->XPlaDevice, size);
+ if (format & OTF_YPlacement)
+ yoff = - (g->f.f1.value->YPlacement * size / u);
+ if (format & OTF_YPlaDevice)
+ yoff -= DEVICE_DELTA (g->f.f1.value->YPlaDevice, size);
+ if (format & OTF_XAdvance)
+ width_adjust += g->f.f1.value->XAdvance * size / u;
+ if (format & OTF_XAdvDevice)
+ width_adjust += DEVICE_DELTA (g->f.f1.value->XAdvDevice, size);
+ }
+ break;
+ case 3:
+ /* Not yet supported. */
+ break;
+ case 4: case 5:
+ if (NILP (base))
+ break;
+ prev = base;
+ goto label_adjust_anchor;
+ default: /* i.e. case 6 */
+ if (NILP (mark))
+ break;
+ prev = mark;
+
+ label_adjust_anchor:
+ {
+ int base_x, base_y, mark_x, mark_y, width;
+ unsigned code;
+
+ base_x = g->f.f4.base_anchor->XCoordinate * size / u;
+ base_y = g->f.f4.base_anchor->YCoordinate * size / u;
+ mark_x = g->f.f4.mark_anchor->XCoordinate * size / u;
+ mark_y = g->f.f4.mark_anchor->YCoordinate * size / u;
+
+ code = XINT (LGLYPH_CODE (prev));
+ if (g->f.f4.base_anchor->AnchorFormat != 1)
+ adjust_anchor (font, g->f.f4.base_anchor,
+ code, size, &base_x, &base_y);
+ if (g->f.f4.mark_anchor->AnchorFormat != 1)
+ adjust_anchor (font, g->f.f4.mark_anchor,
+ code, size, &mark_x, &mark_y);
+
+ if (NILP (LGLYPH_WIDTH (prev)))
+ {
+ width = font->driver->text_extents (font, &code, 1, NULL);
+ LGLYPH_SET_WIDTH (prev, make_number (width));
+ }
+ else
+ width = XINT (LGLYPH_WIDTH (prev));
+ xoff = XINT (LGLYPH_XOFF (prev)) + (base_x - width) - mark_x;
+ yoff = XINT (LGLYPH_YOFF (prev)) + mark_y - base_y;
+ }
+ }
+
+ if (xoff || yoff || width_adjust)
+ {
+ Lisp_Object adjustment = Fmake_vector (make_number (3), Qnil);
+
+ ASET (adjustment, 0, make_number (xoff));
+ ASET (adjustment, 1, make_number (yoff));
+ ASET (adjustment, 2, make_number (width_adjust));
+ LGLYPH_SET_ADJUSTMENT (glyph, adjustment);
+ }
+
+ if (g->GlyphClass == OTF_GlyphClass0)
+ base = mark = glyph;
+ else if (g->GlyphClass == OTF_GlyphClassMark)
+ mark = glyph;
+ else
+ base = glyph;
+ }
+
+ free (otf_gstring.glyphs);
+ return 0;
+}
+
+#endif /* HAVE_LIBOTF */
+
+
+/* glyph-string handler */
+
+/* GSTRING is a vector of this form:
+ [ [FONT-OBJECT LBEARING RBEARING WIDTH ASCENT DESCENT] GLYPH ... ]
+ and GLYPH is a vector of this form:
+ [ FROM-IDX TO-IDX C CODE [ [X-OFF Y-OFF WIDTH WADJUST] | nil] ]
+ where
+ FROM-IDX and TO-IDX are used internally and should not be touched.
+ C is a character of the glyph.
+ CODE is a glyph-code of C in FONT-OBJECT.
+ X-OFF and Y-OFF are offests to the base position for the glyph.
+ WIDTH is a normal width of the glyph.
+ WADJUST is an adjustment to the normal width of the glyph. */
+
+struct font *
+font_prepare_composition (cmp)
+ struct composition *cmp;
+{
+ Lisp_Object gstring
+ = AREF (XHASH_TABLE (composition_hash_table)->key_and_value,
+ cmp->hash_index * 2);
+ struct font *font = XSAVE_VALUE (LGSTRING_FONT (gstring))->pointer;
+ int len = LGSTRING_LENGTH (gstring);
+ int i;
+
+ cmp->font = font;
+ cmp->lbearing = cmp->rbearing = cmp->pixel_width = 0;
+ cmp->ascent = font->ascent;
+ cmp->descent = font->descent;
+
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring, i);
+ unsigned code = XINT (LGLYPH_CODE (g));
+ struct font_metrics metrics;
+
+ font->driver->text_extents (font, &code, 1, &metrics);
+ LGLYPH_SET_WIDTH (g, make_number (metrics.width));
+ metrics.lbearing += LGLYPH_XOFF (g);
+ metrics.rbearing += LGLYPH_XOFF (g);
+ metrics.ascent += LGLYPH_YOFF (g);
+ metrics.descent += LGLYPH_YOFF (g);
+
+ if (cmp->lbearing > cmp->pixel_width + metrics.lbearing)
+ cmp->lbearing = cmp->pixel_width + metrics.lbearing;
+ if (cmp->rbearing < cmp->pixel_width + metrics.rbearing)
+ cmp->rbearing = cmp->pixel_width + metrics.rbearing;
+ if (cmp->ascent < metrics.ascent)
+ cmp->ascent = metrics.ascent;
+ if (cmp->descent < metrics.descent)
+ cmp->descent = metrics.descent;
+ cmp->pixel_width += metrics.width + LGLYPH_WADJUST (g);
+ }
+ LGSTRING_SET_LBEARING (gstring, make_number (cmp->lbearing));
+ LGSTRING_SET_RBEARING (gstring, make_number (cmp->rbearing));
+ LGSTRING_SET_WIDTH (gstring, make_number (cmp->pixel_width));
+ LGSTRING_SET_ASCENT (gstring, make_number (cmp->ascent));
+ LGSTRING_SET_DESCENT (gstring, make_number (cmp->descent));
+
+ return font;
+}
+
+int
+font_gstring_produce (old, from, to, new, idx, code, n)
+ Lisp_Object old;
+ int from, to;
+ Lisp_Object new;
+ int idx;
+ unsigned *code;
+ int n;
+{
+ Lisp_Object min_idx, max_idx;
+ int i;
+
+ if (idx + n > ASIZE (new))
+ return -1;
+ if (from == to)
+ {
+ if (from == 0)
+ {
+ min_idx = make_number (0);
+ max_idx = make_number (1);
+ }
+ else
+ {
+ min_idx = AREF (AREF (old, from - 1), 0);
+ max_idx = AREF (AREF (old, from - 1), 1);
+ }
+ }
+ else if (from + 1 == to)
+ {
+ min_idx = AREF (AREF (old, from), 0);
+ max_idx = AREF (AREF (old, from), 1);
+ }
+ else
+ {
+ int min_idx_i = XINT (AREF (AREF (old, from), 0));
+ int max_idx_i = XINT (AREF (AREF (old, from), 1));
+
+ for (i = from + 1; i < to; i++)
+ {
+ if (min_idx_i > XINT (AREF (AREF (old, i), 0)))
+ min_idx_i = XINT (AREF (AREF (old, i), 0));
+ if (max_idx_i < XINT (AREF (AREF (old, i), 1)))
+ max_idx_i = XINT (AREF (AREF (old, i), 1));
+ }
+ min_idx = make_number (min_idx_i);
+ max_idx = make_number (max_idx_i);
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ ASET (AREF (new, idx + i), 0, min_idx);
+ ASET (AREF (new, idx + i), 1, max_idx);
+ ASET (AREF (new, idx + i), 2, make_number (code[i]));
+ }
+
+ return 0;
+}
+
+/* Font sorting */
+
+static unsigned font_score P_ ((Lisp_Object, Lisp_Object *));
+static int font_compare P_ ((const void *, const void *));
+static Lisp_Object font_sort_entites P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object, Lisp_Object));
+
+/* We sort fonts by scoring each of them against a specified
+ font-spec. The score value is 32 bit (`unsigned'), and the smaller
+ the value is, the closer the font is to the font-spec.
+
+ Each 1-bit in the highest 4 bits of the score is used for atomic
+ properties FOUNDRY, FAMILY, ADSTYLE, and REGISTRY.
+
+ Each 7-bit in the lowest 28 bits are used for numeric properties
+ WEIGHT, SLANT, WIDTH, and SIZE. */
+
+/* How many bits to shift to store the difference value of each font
+ property in a score. */
+static int sort_shift_bits[FONT_SIZE_INDEX + 1];
+
+/* Score font-entity ENTITY against properties of font-spec SPEC_PROP.
+ The return value indicates how different ENTITY is compared with
+ SPEC_PROP. */
+
+static unsigned
+font_score (entity, spec_prop)
+ Lisp_Object entity, *spec_prop;
+{
+ unsigned score = 0;
+ int i;
+ /* Score four atomic fields. Maximum difference is 1. */
+ for (i = FONT_FOUNDRY_INDEX; i <= FONT_REGISTRY_INDEX; i++)
+ if (! NILP (spec_prop[i])
+ && ! EQ (spec_prop[i], AREF (entity, i)))
+ score |= 1 << sort_shift_bits[i];
+
+ /* Score four numeric fields. Maximum difference is 127. */
+ for (i = FONT_WEIGHT_INDEX; i <= FONT_SIZE_INDEX; i++)
+ {
+ Lisp_Object entity_val = AREF (entity, i);
+
+ if (! NILP (spec_prop[i]) && ! EQ (spec_prop[i], entity_val))
+ {
+ if (! INTEGERP (entity_val))
+ score |= 127 << sort_shift_bits[i];
+ else
+ {
+ int diff = XINT (entity_val) - XINT (spec_prop[i]);
+
+ if (diff < 0)
+ diff = - diff;
+ if (i == FONT_SIZE_INDEX)
+ {
+ if (XINT (entity_val) > 0
+ && diff > FONT_PIXEL_SIZE_QUANTUM)
+ score |= min (diff, 127) << sort_shift_bits[i];
+ }
+ else
+ score |= min (diff, 127) << sort_shift_bits[i];
+ }
+ }
+ }
+
+ return score;
+}
+
+
+/* The comparison function for qsort. */
+
+static int
+font_compare (d1, d2)
+ const void *d1, *d2;
+{
+ return (*(unsigned *) d1 < *(unsigned *) d2
+ ? -1 : *(unsigned *) d1 > *(unsigned *) d2);
+}
+
+
+/* The structure for elements being sorted by qsort. */
+struct font_sort_data
+{
+ unsigned score;
+ Lisp_Object entity;
+};
+
+
+/* Sort font-entities in vector VEC by closeness to font-spec PREFER.
+ If PREFER specifies a point-size, calculate the corresponding
+ pixel-size from QCdpi property of PREFER or from the Y-resolution
+ of FRAME before sorting. If SPEC is not nil, it is a font-spec to
+ get the font-entities in VEC. */
+
+static Lisp_Object
+font_sort_entites (vec, prefer, frame, spec)
+ Lisp_Object vec, prefer, frame, spec;
+{
+ Lisp_Object prefer_prop[FONT_SPEC_MAX];
+ int len, i;
+ struct font_sort_data *data;
+ USE_SAFE_ALLOCA;
+
+ len = ASIZE (vec);
+ if (len <= 1)
+ return vec;
+
+ for (i = FONT_FOUNDRY_INDEX; i <= FONT_SIZE_INDEX; i++)
+ prefer_prop[i] = AREF (prefer, i);
+
+ if (! NILP (spec))
+ {
+ /* As it is assured that all fonts in VEC match with SPEC, we
+ should ignore properties specified in SPEC. So, set the
+ corresponding properties in PREFER_PROP to nil. */
+ for (i = FONT_WEIGHT_INDEX; i <= FONT_SIZE_INDEX; i++)
+ if (! NILP (AREF (spec, i)))
+ prefer_prop[i++] = Qnil;
+ }
+
+ if (FLOATP (prefer_prop[FONT_SIZE_INDEX]))
+ prefer_prop[FONT_SIZE_INDEX]
+ = make_number (font_pixel_size (XFRAME (frame), prefer));
+
+ /* Scoring and sorting. */
+ SAFE_ALLOCA (data, struct font_sort_data *, (sizeof *data) * len);
+ for (i = 0; i < len; i++)
+ {
+ data[i].entity = AREF (vec, i);
+ data[i].score = font_score (data[i].entity, prefer_prop);
+ }
+ qsort (data, len, sizeof *data, font_compare);
+ for (i = 0; i < len; i++)
+ ASET (vec, i, data[i].entity);
+ SAFE_FREE ();
+
+ return vec;
+}
+
+
+/* API of Font Service Layer. */
+
+void
+font_update_sort_order (order)
+ int *order;
+{
+ int i, shift_bits = 21;
+
+ for (i = 0; i < 4; i++, shift_bits -= 7)
+ {
+ int xlfd_idx = order[i];
+
+ if (xlfd_idx == XLFD_WEIGHT_INDEX)
+ sort_shift_bits[FONT_WEIGHT_INDEX] = shift_bits;
+ else if (xlfd_idx == XLFD_SLANT_INDEX)
+ sort_shift_bits[FONT_SLANT_INDEX] = shift_bits;
+ else if (xlfd_idx == XLFD_SWIDTH_INDEX)
+ sort_shift_bits[FONT_WIDTH_INDEX] = shift_bits;
+ else
+ sort_shift_bits[FONT_SIZE_INDEX] = shift_bits;
+ }
+}
+
+Lisp_Object
+font_symbolic_weight (font)
+ Lisp_Object font;
+{
+ Lisp_Object weight = AREF (font, FONT_WEIGHT_INDEX);
+
+ if (INTEGERP (weight))
+ weight = prop_numeric_to_name (FONT_WEIGHT_INDEX, XINT (weight));
+ return weight;
+}
+
+Lisp_Object
+font_symbolic_slant (font)
+ Lisp_Object font;
+{
+ Lisp_Object slant = AREF (font, FONT_SLANT_INDEX);
+
+ if (INTEGERP (slant))
+ slant = prop_numeric_to_name (FONT_SLANT_INDEX, XINT (slant));
+ return slant;
+}
+
+Lisp_Object
+font_symbolic_width (font)
+ Lisp_Object font;
+{
+ Lisp_Object width = AREF (font, FONT_WIDTH_INDEX);
+
+ if (INTEGERP (width))
+ width = prop_numeric_to_name (FONT_WIDTH_INDEX, XINT (width));
+ return width;
+}
+
+int
+font_match_p (spec, entity)
+ Lisp_Object spec, entity;
+{
+ int i;
+
+ for (i = FONT_FOUNDRY_INDEX; i < FONT_SIZE_INDEX; i++)
+ if (! NILP (AREF (spec, i))
+ && ! EQ (AREF (spec, i), AREF (entity, i)))
+ return 0;
+ if (INTEGERP (AREF (spec, FONT_SIZE_INDEX))
+ && XINT (AREF (entity, FONT_SIZE_INDEX)) > 0
+ && (XINT (AREF (spec, FONT_SIZE_INDEX))
+ != XINT (AREF (entity, FONT_SIZE_INDEX))))
+ return 0;
+ return 1;
+}
+
+Lisp_Object
+font_find_object (font)
+ struct font *font;
+{
+ Lisp_Object tail, elt;
+
+ for (tail = AREF (font->entity, FONT_OBJLIST_INDEX); CONSP (tail);
+ tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (font == XSAVE_VALUE (elt)->pointer
+ && XSAVE_VALUE (elt)->integer > 0)
+ return elt;
+ }
+ abort ();
+ return Qnil;
+}
+
+static Lisp_Object scratch_font_spec, scratch_font_prefer;
+
+/* Return a vector of font-entities matching with SPEC on frame F. */
+
+static Lisp_Object
+font_list_entities (frame, spec)
+ Lisp_Object frame, spec;
+{
+ FRAME_PTR f = XFRAME (frame);
+ struct font_driver_list *driver_list = f->font_driver_list;
+ Lisp_Object ftype, family, size, alternate_familes;
+ Lisp_Object *vec = alloca (sizeof (Lisp_Object) * num_font_drivers);
+ int i;
+
+ if (! vec)
+ return null_vector;
+
+ family = AREF (spec, FONT_FAMILY_INDEX);
+ if (NILP (family))
+ alternate_familes = Qnil;
+ else
+ {
+ if (NILP (font_family_alist)
+ && !NILP (Vface_alternative_font_family_alist))
+ build_font_family_alist ();
+ alternate_familes = assq_no_quit (family, font_family_alist);
+ if (! NILP (alternate_familes))
+ alternate_familes = XCDR (alternate_familes);
+ }
+ size = AREF (spec, FONT_SIZE_INDEX);
+ if (FLOATP (size))
+ ASET (spec, FONT_SIZE_INDEX, make_number (font_pixel_size (f, spec)));
+
+ xassert (ASIZE (spec) == FONT_SPEC_MAX);
+ ftype = AREF (spec, FONT_TYPE_INDEX);
+
+ for (i = 0; driver_list; driver_list = driver_list->next)
+ if (NILP (ftype) || EQ (driver_list->driver->type, ftype))
+ {
+ Lisp_Object cache = driver_list->driver->get_cache (frame);
+ Lisp_Object tail = alternate_familes;
+ Lisp_Object val;
+
+ xassert (CONSP (cache));
+ ASET (spec, FONT_TYPE_INDEX, driver_list->driver->type);
+ ASET (spec, FONT_FAMILY_INDEX, family);
+
+ while (1)
+ {
+ val = assoc_no_quit (spec, XCDR (cache));
+ if (CONSP (val))
+ val = XCDR (val);
+ else
+ {
+ val = driver_list->driver->list (frame, spec);
+ if (VECTORP (val))
+ XSETCDR (cache, Fcons (Fcons (Fcopy_sequence (spec), val),
+ XCDR (cache)));
+ }
+ if (VECTORP (val) && ASIZE (val) > 0)
+ {
+ vec[i++] = val;
+ break;
+ }
+ if (NILP (tail))
+ break;
+ ASET (spec, FONT_FAMILY_INDEX, XCAR (tail));
+ tail = XCDR (tail);
+ }
+ }
+ ASET (spec, FONT_TYPE_INDEX, ftype);
+ ASET (spec, FONT_FAMILY_INDEX, family);
+ ASET (spec, FONT_SIZE_INDEX, size);
+ return (i > 0 ? Fvconcat (i, vec) : null_vector);
+}
+
+static int num_fonts;
+
+static Lisp_Object
+font_open_entity (f, entity, pixel_size)
+ FRAME_PTR f;
+ Lisp_Object entity;
+ int pixel_size;
+{
+ struct font_driver_list *driver_list;
+ Lisp_Object objlist, size, val;
+ struct font *font;
+
+ size = AREF (entity, FONT_SIZE_INDEX);
+ xassert (NATNUMP (size));
+ if (XINT (size) != 0)
+ pixel_size = XINT (size);
+
+ for (objlist = AREF (entity, FONT_OBJLIST_INDEX); CONSP (objlist);
+ objlist = XCDR (objlist))
+ {
+ font = XSAVE_VALUE (XCAR (objlist))->pointer;
+ if (font->pixel_size == pixel_size)
+ {
+ XSAVE_VALUE (XCAR (objlist))->integer++;
+ return XCAR (objlist);
+ }
+ }
+
+ xassert (FONT_ENTITY_P (entity));
+ val = AREF (entity, FONT_TYPE_INDEX);
+ for (driver_list = f->font_driver_list;
+ driver_list && ! EQ (driver_list->driver->type, val);
+ driver_list = driver_list->next);
+ if (! driver_list)
+ return Qnil;
+
+ font = driver_list->driver->open (f, entity, pixel_size);
+ if (! font)
+ return Qnil;
+ val = make_save_value (font, 1);
+ ASET (entity, FONT_OBJLIST_INDEX,
+ Fcons (val, AREF (entity, FONT_OBJLIST_INDEX)));
+ num_fonts++;
+ return val;
+}
+
+void
+font_close_object (f, font_object)
+ FRAME_PTR f;
+ Lisp_Object font_object;
+{
+ struct font *font;
+ Lisp_Object objlist = AREF (font->entity, FONT_OBJLIST_INDEX);
+ Lisp_Object tail, prev = Qnil;
+
+ for (prev = Qnil, tail = objlist; CONSP (tail);
+ prev = tail, tail = XCDR (tail))
+ if (EQ (font_object, XCAR (tail)))
+ {
+ struct Lisp_Save_Value *p = XSAVE_VALUE (font_object);
+
+ xassert (p->integer > 0);
+ p->integer--;
+ if (p->integer == 0)
+ {
+ if (font->driver->close)
+ font->driver->close (f, p->pointer);
+ p->pointer = NULL;
+ if (NILP (prev))
+ ASET (font->entity, FONT_OBJLIST_INDEX, XCDR (objlist));
+ else
+ XSETCDR (prev, XCDR (objlist));
+ }
+ break;
+ }
+}
+
+int
+font_has_char (f, font, c)
+ FRAME_PTR f;
+ Lisp_Object font;
+ int c;
+{
+ struct font *fontp;
+
+ if (FONT_ENTITY_P (font))
+ {
+ Lisp_Object type = AREF (font, FONT_TYPE_INDEX);
+ struct font_driver_list *driver_list;
+
+ for (driver_list = f->font_driver_list;
+ driver_list && ! EQ (driver_list->driver->type, type);
+ driver_list = driver_list->next);
+ if (! driver_list)
+ return 0;
+ if (! driver_list->driver->has_char)
+ return -1;
+ return driver_list->driver->has_char (font, c);
+ }
+
+ xassert (FONT_OBJECT_P (font));
+ fontp = XSAVE_VALUE (font)->pointer;
+
+ if (fontp->driver->has_char)
+ {
+ int result = fontp->driver->has_char (fontp->entity, c);
+
+ if (result >= 0)
+ return result;
+ }
+ return (fontp->driver->encode_char (fontp, c) != FONT_INVALID_CODE);
+}
+
+unsigned
+font_encode_char (font_object, c)
+ Lisp_Object font_object;
+ int c;
+{
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+
+ return font->driver->encode_char (font, c);
+}
+
+Lisp_Object
+font_get_name (font_object)
+ Lisp_Object font_object;
+{
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+ char *name = (font->font.full_name ? font->font.full_name
+ : font->font.name ? font->font.name
+ : NULL);
+
+ return (name ? make_unibyte_string (name, strlen (name)) : null_string);
+}
+
+Lisp_Object
+font_get_spec (font_object)
+ Lisp_Object font_object;
+{
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+ Lisp_Object spec = Ffont_spec (0, NULL);
+ int i;
+
+ for (i = 0; i < FONT_SIZE_INDEX; i++)
+ ASET (spec, i, AREF (font->entity, i));
+ ASET (spec, FONT_SIZE_INDEX, make_number (font->pixel_size));
+ return spec;
+}
+
+Lisp_Object
+font_get_frame (font)
+ Lisp_Object font;
+{
+ if (FONT_OBJECT_P (font))
+ font = ((struct font *) XSAVE_VALUE (font)->pointer)->entity;
+ xassert (FONT_ENTITY_P (font));
+ return AREF (font, FONT_FRAME_INDEX);
+}
+
+/* Find a font entity best matching with LFACE. If SPEC is non-nil,
+ the font must exactly match with it. */
+
+Lisp_Object
+font_find_for_lface (f, lface, spec)
+ FRAME_PTR f;
+ Lisp_Object *lface;
+ Lisp_Object spec;
+{
+ Lisp_Object frame, entities;
+ int i;
+
+ XSETFRAME (frame, f);
+
+ if (NILP (spec))
+ {
+ for (i = 0; i < FONT_SPEC_MAX; i++)
+ ASET (scratch_font_spec, i, Qnil);
+ ASET (scratch_font_spec, FONT_REGISTRY_INDEX, Qiso8859_1);
+
+ if (! NILP (lface[LFACE_FAMILY_INDEX]))
+ font_merge_old_spec (Qnil, lface[LFACE_FAMILY_INDEX], Qnil,
+ scratch_font_spec);
+ entities = font_list_entities (frame, scratch_font_spec);
+ while (ASIZE (entities) == 0)
+ {
+ /* Try without FOUNDRY or FAMILY. */
+ if (! NILP (AREF (scratch_font_spec, FONT_FOUNDRY_INDEX)))
+ {
+ ASET (scratch_font_spec, FONT_FOUNDRY_INDEX, Qnil);
+ entities = font_list_entities (frame, scratch_font_spec);
+ }
+ else if (! NILP (AREF (scratch_font_spec, FONT_FAMILY_INDEX)))
+ {
+ ASET (scratch_font_spec, FONT_FAMILY_INDEX, Qnil);
+ entities = font_list_entities (frame, scratch_font_spec);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ for (i = 0; i < FONT_SPEC_MAX; i++)
+ ASET (scratch_font_spec, i, AREF (spec, i));
+ if (NILP (AREF (spec, FONT_REGISTRY_INDEX)))
+ ASET (scratch_font_spec, FONT_REGISTRY_INDEX, Qiso8859_1);
+ entities = font_list_entities (frame, scratch_font_spec);
+ }
+
+ if (ASIZE (entities) == 0)
+ return Qnil;
+ if (ASIZE (entities) > 1)
+ {
+ /* Sort fonts by properties specified in LFACE. */
+ Lisp_Object prefer = scratch_font_prefer;
+ double pt;
+
+ if (! NILP (lface[LFACE_FAMILY_INDEX]))
+ font_merge_old_spec (Qnil, lface[LFACE_FAMILY_INDEX], Qnil, prefer);
+ ASET (prefer, FONT_WEIGHT_INDEX,
+ font_prop_validate_style (FONT_WEIGHT_INDEX, QCweight,
+ lface[LFACE_WEIGHT_INDEX]));
+ ASET (prefer, FONT_SLANT_INDEX,
+ font_prop_validate_style (FONT_SLANT_INDEX, QCslant,
+ lface[LFACE_SLANT_INDEX]));
+ ASET (prefer, FONT_WIDTH_INDEX,
+ font_prop_validate_style (FONT_WIDTH_INDEX, QCwidth,
+ lface[LFACE_SWIDTH_INDEX]));
+ pt = XINT (lface[LFACE_HEIGHT_INDEX]);
+ ASET (prefer, FONT_SIZE_INDEX, make_float (pt / 10));
+
+ font_sort_entites (entities, prefer, frame, spec);
+ }
+
+ return AREF (entities, 0);
+}
+
+Lisp_Object
+font_open_for_lface (f, lface, entity)
+ FRAME_PTR f;
+ Lisp_Object *lface;
+ Lisp_Object entity;
+{
+ double pt = XINT (lface[LFACE_HEIGHT_INDEX]);
+ int size;
+
+ pt /= 10;
+ size = POINT_TO_PIXEL (pt, f->resy);
+ return font_open_entity (f, entity, size);
+}
+
+void
+font_load_for_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ Lisp_Object font_object = face->lface[LFACE_FONT_INDEX];
+
+ if (NILP (font_object))
+ {
+ Lisp_Object entity = font_find_for_lface (f, face->lface, Qnil);
+
+ if (! NILP (entity))
+ font_object = font_open_for_lface (f, face->lface, entity);
+ }
+
+ if (! NILP (font_object))
+ {
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+
+ face->font = font->font.font;
+ face->font_info = (struct font_info *) font;
+ face->font_info_id = 0;
+ face->font_name = font->font.full_name;
+ }
+ else
+ {
+ face->font = NULL;
+ face->font_info = NULL;
+ face->font_info_id = -1;
+ face->font_name = NULL;
+ add_to_log ("Unable to load font for a face%s", null_string, Qnil);
+ }
+}
+
+void
+font_prepare_for_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ struct font *font = (struct font *) face->font_info;
+
+ if (font->driver->prepare_face)
+ font->driver->prepare_face (f, face);
+}
+
+void
+font_done_for_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ struct font *font = (struct font *) face->font_info;
+
+ if (font->driver->done_face)
+ font->driver->done_face (f, face);
+ face->extra = NULL;
+}
+
+Lisp_Object
+font_open_by_name (f, name)
+ FRAME_PTR f;
+ char *name;
+{
+ Lisp_Object args[2];
+ Lisp_Object spec, prefer, size, entities;
+ Lisp_Object frame;
+ int i;
+ int pixel_size;
+
+ XSETFRAME (frame, f);
+
+ args[0] = QCname;
+ args[1] = make_unibyte_string (name, strlen (name));
+ spec = Ffont_spec (2, args);
+ prefer = scratch_font_prefer;
+ for (i = FONT_WEIGHT_INDEX; i < FONT_SIZE_INDEX; i++)
+ if (NILP (AREF (spec, i)))
+ ASET (prefer, i, make_number (100));
+ size = AREF (spec, FONT_SIZE_INDEX);
+ if (NILP (size))
+ pixel_size = 0;
+ else if (INTEGERP (size))
+ pixel_size = XINT (size);
+ else /* FLOATP (size) */
+ {
+ double pt = XFLOAT_DATA (size);
+
+ pixel_size = POINT_TO_PIXEL (pt, f->resy);
+ size = make_number (pixel_size);
+ ASET (spec, FONT_SIZE_INDEX, size);
+ }
+ if (pixel_size == 0)
+ {
+ pixel_size = POINT_TO_PIXEL (12.0, f->resy);
+ size = make_number (pixel_size);
+ }
+ ASET (prefer, FONT_SIZE_INDEX, size);
+ if (NILP (AREF (spec, FONT_REGISTRY_INDEX)))
+ ASET (spec, FONT_REGISTRY_INDEX, Qiso8859_1);
+
+ entities = Flist_fonts (spec, frame, make_number (1), prefer);
+ return (NILP (entities)
+ ? Qnil
+ : font_open_entity (f, XCAR (entities), pixel_size));
+}
+
+
+/* Register font-driver DRIVER. This function is used in two ways.
+
+ The first is with frame F non-NULL. In this case, DRIVER is
+ registered to be used for drawing characters on F. All frame
+ creaters (e.g. Fx_create_frame) must call this function at least
+ once with an available font-driver.
+
+ The second is with frame F NULL. In this case, DRIVER is globally
+ registered in the variable `font_driver_list'. All font-driver
+ implementations must call this function in its syms_of_XXXX
+ (e.g. syms_of_xfont). */
+
+void
+register_font_driver (driver, f)
+ struct font_driver *driver;
+ FRAME_PTR f;
+{
+ struct font_driver_list *root = f ? f->font_driver_list : font_driver_list;
+ struct font_driver_list *prev, *list;
+
+ if (f && ! driver->draw)
+ error ("Unsable font driver for a frame: %s",
+ SDATA (SYMBOL_NAME (driver->type)));
+
+ for (prev = NULL, list = root; list; prev = list, list = list->next)
+ if (list->driver->type == driver->type)
+ error ("Duplicated font driver: %s", SDATA (SYMBOL_NAME (driver->type)));
+
+ list = malloc (sizeof (struct font_driver_list));
+ list->driver = driver;
+ list->next = NULL;
+ if (prev)
+ prev->next = list;
+ else if (f)
+ f->font_driver_list = list;
+ else
+ font_driver_list = list;
+ num_font_drivers++;
+}
+
+/* Free font-driver list on frame F. It doesn't free font-drivers
+ themselves. */
+
+void
+free_font_driver_list (f)
+ FRAME_PTR f;
+{
+ while (f->font_driver_list)
+ {
+ struct font_driver_list *next = f->font_driver_list->next;
+
+ free (f->font_driver_list);
+ f->font_driver_list = next;
+ }
+}
+
+
+/* Lisp API */
+
+DEFUN ("fontp", Ffontp, Sfontp, 1, 1, 0,
+ doc: /* Return t if object is a font-spec or font-entity. */)
+ (object)
+ Lisp_Object object;
+{
+ return (FONTP (object) ? Qt : Qnil);
+}
+
+DEFUN ("font-spec", Ffont_spec, Sfont_spec, 0, MANY, 0,
+ doc: /* Return a newly created font-spec with specified arguments as properties.
+usage: (font-spec &rest properties) */)
+ (nargs, args)
+ int nargs;
+ Lisp_Object *args;
+{
+ Lisp_Object spec = Fmake_vector (make_number (FONT_SPEC_MAX), Qnil);
+ int i;
+
+ for (i = 0; i < nargs; i += 2)
+ {
+ enum font_property_index prop;
+ Lisp_Object key = args[i], val = args[i + 1];
+
+ prop = get_font_prop_index (key, 0);
+ if (prop < FONT_EXTRA_INDEX)
+ ASET (spec, prop, val);
+ else
+ {
+ if (EQ (key, QCname))
+ {
+ CHECK_STRING (val);
+ font_parse_name ((char *) SDATA (val), spec);
+ }
+ else
+ font_put_extra (spec, key, val);
+ }
+ }
+ CHECK_VALIDATE_FONT_SPEC (spec);
+ return spec;
+}
+
+
+DEFUN ("font-get", Ffont_get, Sfont_get, 2, 2, 0,
+ doc: /* Return the value of FONT's PROP property.
+FONT may be a font-spec or font-entity.
+If FONT is font-entity and PROP is :extra, always nil is returned. */)
+ (font, prop)
+ Lisp_Object font, prop;
+{
+ enum font_property_index idx;
+
+ CHECK_FONT (font);
+ idx = get_font_prop_index (prop, 0);
+ if (idx < FONT_EXTRA_INDEX)
+ return AREF (font, idx);
+ if (FONT_ENTITY_P (font))
+ return Qnil;
+ return Fcdr (Fassoc (AREF (font, FONT_EXTRA_INDEX), prop));
+}
+
+
+DEFUN ("font-put", Ffont_put, Sfont_put, 3, 3, 0,
+ doc: /* Set one property of FONT-SPEC: give property PROP value VALUE. */)
+ (font_spec, prop, val)
+ Lisp_Object font_spec, prop, val;
+{
+ enum font_property_index idx;
+ Lisp_Object extra, slot;
+
+ CHECK_FONT_SPEC (font_spec);
+ idx = get_font_prop_index (prop, 0);
+ if (idx < FONT_EXTRA_INDEX)
+ return ASET (font_spec, idx, val);
+ extra = AREF (font_spec, FONT_EXTRA_INDEX);
+ slot = Fassoc (extra, prop);
+ if (NILP (slot))
+ extra = Fcons (Fcons (prop, val), extra);
+ else
+ Fsetcdr (slot, val);
+ return val;
+}
+
+DEFUN ("list-fonts", Flist_fonts, Slist_fonts, 1, 4, 0,
+ doc: /* List available fonts matching FONT-SPEC on the current frame.
+Optional 2nd argument FRAME specifies the target frame.
+Optional 3rd argument NUM, if non-nil, limits the number of returned fonts.
+Optional 4th argument PREFER, if non-nil, is a font-spec
+to which closeness fonts are sorted. */)
+ (font_spec, frame, num, prefer)
+ Lisp_Object font_spec, frame, num, prefer;
+{
+ Lisp_Object vec, list, tail;
+ int n = 0, i, len;
+
+ if (NILP (frame))
+ frame = selected_frame;
+ CHECK_LIVE_FRAME (frame);
+ CHECK_VALIDATE_FONT_SPEC (font_spec);
+ if (! NILP (num))
+ {
+ CHECK_NUMBER (num);
+ n = XINT (num);
+ if (n <= 0)
+ return Qnil;
+ }
+ if (! NILP (prefer))
+ CHECK_FONT (prefer);
+
+ vec = font_list_entities (frame, font_spec);
+ len = ASIZE (vec);
+ if (len == 0)
+ return Qnil;
+ if (len == 1)
+ return Fcons (AREF (vec, 0), Qnil);
+
+ if (! NILP (prefer))
+ vec = font_sort_entites (vec, prefer, frame, font_spec);
+
+ list = tail = Fcons (AREF (vec, 0), Qnil);
+ if (n == 0 || n > len)
+ n = len;
+ for (i = 1; i < n; i++)
+ {
+ Lisp_Object val = Fcons (AREF (vec, i), Qnil);
+
+ XSETCDR (tail, val);
+ tail = val;
+ }
+ return list;
+}
+
+DEFUN ("list-families", Flist_families, Slist_families, 0, 1, 0,
+ doc: /* List available font families on the current frame.
+Optional 2nd argument FRAME specifies the target frame. */)
+ (frame)
+ Lisp_Object frame;
+{
+ FRAME_PTR f;
+ struct font_driver_list *driver_list;
+ Lisp_Object list;
+
+ if (NILP (frame))
+ frame = selected_frame;
+ CHECK_LIVE_FRAME (frame);
+ f = XFRAME (frame);
+ list = Qnil;
+ for (driver_list = f->font_driver_list; driver_list;
+ driver_list = driver_list->next)
+ if (driver_list->driver->list_family)
+ {
+ Lisp_Object val = driver_list->driver->list_family (frame);
+
+ if (NILP (list))
+ list = val;
+ else
+ {
+ Lisp_Object tail = list;
+
+ for (; CONSP (val); val = XCDR (val))
+ if (NILP (Fmemq (XCAR (val), tail)))
+ list = Fcons (XCAR (val), list);
+ }
+ }
+ return list;
+}
+
+DEFUN ("find-font", Ffind_font, Sfind_font, 1, 2, 0,
+ doc: /* Return a font-entity matching with FONT-SPEC on the current frame.
+Optional 2nd argument FRAME, if non-nil, specifies the target frame. */)
+ (font_spec, frame)
+ Lisp_Object font_spec, frame;
+{
+ Lisp_Object val = Flist_fonts (font_spec, frame, make_number (1), Qnil);
+
+ if (CONSP (val))
+ val = XCAR (val);
+ return val;
+}
+
+DEFUN ("font-xlfd-name", Ffont_xlfd_name, Sfont_xlfd_name, 1, 1, 0,
+ doc: /* Return XLFD name of FONT.
+FONT is a font-spec, font-entity, or font-object.
+If the name is too long for XLFD (maximum 255 chars), return nil. */)
+ (font)
+ Lisp_Object font;
+{
+ char name[256];
+ int pixel_size = 0;
+
+ if (FONT_SPEC_P (font))
+ CHECK_VALIDATE_FONT_SPEC (font);
+ else if (FONT_ENTITY_P (font))
+ CHECK_FONT (font);
+ else
+ {
+ struct font *fontp;
+
+ CHECK_FONT_GET_OBJECT (font, fontp);
+ font = fontp->entity;
+ pixel_size = fontp->pixel_size;
+ }
+
+ if (font_unparse_xlfd (font, pixel_size, name, 256) < 0)
+ return Qnil;
+ return build_string (name);
+}
+
+DEFUN ("clear-font-cache", Fclear_font_cache, Sclear_font_cache, 0, 0, 0,
+ doc: /* Clear font cache. */)
+ ()
+{
+ Lisp_Object list, frame;
+
+ FOR_EACH_FRAME (list, frame)
+ {
+ FRAME_PTR f = XFRAME (frame);
+ struct font_driver_list *driver_list = f->font_driver_list;
+
+ for (; driver_list; driver_list = driver_list->next)
+ {
+ Lisp_Object cache = driver_list->driver->get_cache (frame);
+ Lisp_Object tail, elt;
+
+ for (tail = XCDR (cache); CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (CONSP (elt) && FONT_SPEC_P (XCAR (elt)))
+ {
+ Lisp_Object vec = XCDR (elt);
+ int i;
+
+ for (i = 0; i < ASIZE (vec); i++)
+ {
+ Lisp_Object entity = AREF (vec, i);
+ Lisp_Object objlist = AREF (entity, FONT_OBJLIST_INDEX);
+
+ for (; CONSP (objlist); objlist = XCDR (objlist))
+ {
+ Lisp_Object val = XCAR (objlist);
+ struct Lisp_Save_Value *p = XSAVE_VALUE (val);
+ struct font *font = p->pointer;
+
+ xassert (font
+ && driver_list->driver == font->driver);
+ driver_list->driver->close (f, font);
+ p->pointer = NULL;
+ p->integer = 0;
+ }
+ if (driver_list->driver->free_entity)
+ driver_list->driver->free_entity (entity);
+ }
+ }
+ }
+ XSETCDR (cache, Qnil);
+ }
+ }
+
+ return Qnil;
+}
+
+DEFUN ("internal-set-font-style-table", Finternal_set_font_style_table,
+ Sinternal_set_font_style_table, 2, 2, 0,
+ doc: /* Set font style table for PROP to TABLE.
+PROP must be `:weight', `:slant', or `:width'.
+TABLE must be an alist of symbols vs the corresponding numeric values
+sorted by numeric values. */)
+ (prop, table)
+ Lisp_Object prop, table;
+{
+ int table_index;
+ int numeric;
+ Lisp_Object tail, val;
+
+ CHECK_SYMBOL (prop);
+ table_index = (EQ (prop, QCweight) ? 0
+ : EQ (prop, QCslant) ? 1
+ : EQ (prop, QCwidth) ? 2
+ : 3);
+ if (table_index >= ASIZE (font_style_table))
+ error ("Invalid font style property: %s", SDATA (SYMBOL_NAME (prop)));
+ table = Fcopy_sequence (table);
+ numeric = -1;
+ for (tail = table; ! NILP (tail); tail = Fcdr (tail))
+ {
+ prop = Fcar (Fcar (tail));
+ val = Fcdr (Fcar (tail));
+ CHECK_SYMBOL (prop);
+ CHECK_NATNUM (val);
+ if (numeric > XINT (val))
+ error ("Numeric values not sorted for %s", SDATA (SYMBOL_NAME (prop)));
+ numeric = XINT (val);
+ XSETCAR (tail, Fcons (prop, val));
+ }
+ ASET (font_style_table, table_index, table);
+ return Qnil;
+}
+
+DEFUN ("font-make-gstring", Ffont_make_gstring, Sfont_make_gstring, 2, 2, 0,
+ doc: /* Return a newly created glyph-string for FONT-OBJECT with NUM glyphs.
+FONT-OBJECT may be nil if it is not yet known. */)
+ (font_object, num)
+ Lisp_Object font_object, num;
+{
+ Lisp_Object gstring, g;
+ int len;
+ int i;
+
+ if (! NILP (font_object))
+ CHECK_FONT_OBJECT (font_object);
+ CHECK_NATNUM (num);
+
+ len = XINT (num) + 1;
+ gstring = Fmake_vector (make_number (len), Qnil);
+ g = Fmake_vector (make_number (6), Qnil);
+ ASET (g, 0, font_object);
+ ASET (gstring, 0, g);
+ for (i = 1; i < len; i++)
+ ASET (gstring, i, Fmake_vector (make_number (8), make_number (0)));
+ return gstring;
+}
+
+DEFUN ("font-fill-gstring", Ffont_fill_gstring, Sfont_fill_gstring, 4, 5, 0,
+ doc: /* Fillin glyph-string GSTRING by characters for FONT-OBJECT.
+START and END specifies the region to extract characters.
+If optional 3rd argument OBJECT is non-nil, it is a buffer or a string from
+where to extract characters.
+FONT-OBJECT may be nil if GSTRING already already contains one. */)
+ (gstring, font_object, start, end, object)
+ Lisp_Object gstring, font_object, start, end, object;
+{
+ int len, i, c;
+ unsigned code;
+ struct font *font;
+
+ CHECK_VECTOR (gstring);
+ if (NILP (font_object))
+ font_object = Faref (Faref (gstring, make_number (0)), make_number (0));
+ CHECK_FONT_GET_OBJECT (font_object, font);
+
+ if (STRINGP (object))
+ {
+ const unsigned char *p;
+
+ CHECK_NATNUM (start);
+ CHECK_NATNUM (end);
+ if (XINT (start) > XINT (end)
+ || XINT (end) > ASIZE (object)
+ || XINT (end) - XINT (start) >= XINT (Flength (gstring)))
+ args_out_of_range (start, end);
+
+ len = XINT (end) - XINT (start);
+ p = SDATA (object) + string_char_to_byte (object, XINT (start));
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring, i);
+
+ c = STRING_CHAR_ADVANCE (p);
+ code = font->driver->encode_char (font, c);
+ if (code > MOST_POSITIVE_FIXNUM)
+ error ("Glyph code 0x%X is too large", code);
+ ASET (g, 0, make_number (i));
+ ASET (g, 1, make_number (i + 1));
+ LGLYPH_SET_CHAR (g, make_number (c));
+ LGLYPH_SET_CODE (g, make_number (code));
+ }
+ }
+ else
+ {
+ int pos, pos_byte;
+
+ if (! NILP (object))
+ Fset_buffer (object);
+ validate_region (&start, &end);
+ if (XINT (end) - XINT (start) > len)
+ args_out_of_range (start, end);
+ len = XINT (end) - XINT (start);
+ pos = XINT (start);
+ pos_byte = CHAR_TO_BYTE (pos);
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring, i);
+
+ FETCH_CHAR_ADVANCE (c, pos, pos_byte);
+ code = font->driver->encode_char (font, c);
+ if (code > MOST_POSITIVE_FIXNUM)
+ error ("Glyph code 0x%X is too large", code);
+ ASET (g, 0, make_number (i));
+ ASET (g, 1, make_number (i + 1));
+ LGLYPH_SET_CHAR (g, make_number (c));
+ LGLYPH_SET_CODE (g, make_number (code));
+ }
+ }
+ return Qnil;
+}
+
+
+#ifdef FONT_DEBUG
+
+DEFUN ("open-font", Fopen_font, Sopen_font, 1, 3, 0,
+ doc: /* Open FONT-ENTITY. */)
+ (font_entity, size, frame)
+ Lisp_Object font_entity;
+ Lisp_Object size;
+ Lisp_Object frame;
+{
+ int isize;
+
+ CHECK_FONT_ENTITY (font_entity);
+ if (NILP (size))
+ size = AREF (font_entity, FONT_SIZE_INDEX);
+ CHECK_NUMBER (size);
+ if (NILP (frame))
+ frame = selected_frame;
+ CHECK_LIVE_FRAME (frame);
+
+ isize = XINT (size);
+ if (isize < 0)
+ isize = POINT_TO_PIXEL (- isize, XFRAME (frame)->resy);
+
+ return font_open_entity (XFRAME (frame), font_entity, isize);
+}
+
+DEFUN ("close-font", Fclose_font, Sclose_font, 1, 2, 0,
+ doc: /* Close FONT-OBJECT. */)
+ (font_object, frame)
+ Lisp_Object font_object, frame;
+{
+ CHECK_FONT_OBJECT (font_object);
+ if (NILP (frame))
+ frame = selected_frame;
+ CHECK_LIVE_FRAME (frame);
+ font_close_object (XFRAME (frame), font_object);
+ return Qnil;
+}
+
+DEFUN ("query-font", Fquery_font, Squery_font, 1, 1, 0,
+ doc: /* Return information about FONT-OBJECT. */)
+ (font_object)
+ Lisp_Object font_object;
+{
+ struct font *font;
+ Lisp_Object val;
+
+ CHECK_FONT_GET_OBJECT (font_object, font);
+
+ val = Fmake_vector (make_number (9), Qnil);
+ ASET (val, 0, Ffont_xlfd_name (font_object));
+ if (font->file_name)
+ ASET (val, 1, make_unibyte_string (font->file_name,
+ strlen (font->file_name)));
+ ASET (val, 2, make_number (font->pixel_size));
+ ASET (val, 3, make_number (font->font.size));
+ ASET (val, 4, make_number (font->ascent));
+ ASET (val, 5, make_number (font->descent));
+ ASET (val, 6, make_number (font->font.space_width));
+ ASET (val, 7, make_number (font->font.average_width));
+ if (font->driver->otf_capability)
+ ASET (val, 8, font->driver->otf_capability (font));
+ return val;
+}
+
+DEFUN ("get-font-glyphs", Fget_font_glyphs, Sget_font_glyphs, 2, 2, 0,
+ doc: /* Return a vector of glyphs of FONT-OBJECT for drawing STRING.
+Each element is a vector [GLYPH-CODE LBEARING RBEARING WIDTH ASCENT DESCENT]. */)
+ (font_object, string)
+ Lisp_Object font_object, string;
+{
+ struct font *font;
+ int i, len;
+ Lisp_Object vec;
+
+ CHECK_FONT_GET_OBJECT (font_object, font);
+ CHECK_STRING (string);
+ len = SCHARS (string);
+ vec = Fmake_vector (make_number (len), Qnil);
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object ch = Faref (string, make_number (i));
+ Lisp_Object val;
+ int c = XINT (ch);
+ unsigned code;
+ struct font_metrics metrics;
+
+ code = font->driver->encode_char (font, c);
+ if (code == FONT_INVALID_CODE)
+ continue;
+ val = Fmake_vector (make_number (6), Qnil);
+ if (code <= MOST_POSITIVE_FIXNUM)
+ ASET (val, 0, make_number (code));
+ else
+ ASET (val, 0, Fcons (make_number (code >> 16),
+ make_number (code & 0xFFFF)));
+ font->driver->text_extents (font, &code, 1, &metrics);
+ ASET (val, 1, make_number (metrics.lbearing));
+ ASET (val, 2, make_number (metrics.rbearing));
+ ASET (val, 3, make_number (metrics.width));
+ ASET (val, 4, make_number (metrics.ascent));
+ ASET (val, 5, make_number (metrics.descent));
+ ASET (vec, i, val);
+ }
+ return vec;
+}
+
+DEFUN ("font-match-p", Ffont_match_p, Sfont_match_p, 2, 2, 0,
+ doc: /* Return t iff font-spec SPEC matches with FONT.
+FONT is a font-spec, font-entity, or font-object. */)
+ (spec, font)
+ Lisp_Object spec, font;
+{
+ CHECK_FONT_SPEC (spec);
+ if (FONT_OBJECT_P (font))
+ font = ((struct font *) XSAVE_VALUE (font)->pointer)->entity;
+ else if (! FONT_ENTITY_P (font))
+ CHECK_FONT_SPEC (font);
+
+ return (font_match_p (spec, font) ? Qt : Qnil);
+}
+
+#if 0
+DEFUN ("draw-string", Fdraw_string, Sdraw_string, 2, 2, 0,
+ doc: /* Draw STRING by FONT-OBJECT on the top left corner of the current frame.
+The value is a number of glyphs drawn.
+Type C-l to recover what previously shown. */)
+ (font_object, string)
+ Lisp_Object font_object, string;
+{
+ Lisp_Object frame = selected_frame;
+ FRAME_PTR f = XFRAME (frame);
+ struct font *font;
+ struct face *face;
+ int i, len, width;
+ unsigned *code;
+
+ CHECK_FONT_GET_OBJECT (font_object, font);
+ CHECK_STRING (string);
+ len = SCHARS (string);
+ code = alloca (sizeof (unsigned) * len);
+ for (i = 0; i < len; i++)
+ {
+ Lisp_Object ch = Faref (string, make_number (i));
+ Lisp_Object val;
+ int c = XINT (ch);
+
+ code[i] = font->driver->encode_char (font, c);
+ if (code[i] == FONT_INVALID_CODE)
+ break;
+ }
+ face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
+ face->fontp = font;
+ if (font->driver->prepare_face)
+ font->driver->prepare_face (f, face);
+ width = font->driver->text_extents (font, code, i, NULL);
+ len = font->driver->draw_text (f, face, 0, font->ascent, code, i, width);
+ if (font->driver->done_face)
+ font->driver->done_face (f, face);
+ face->fontp = NULL;
+ return make_number (len);
+}
+#endif
+
+#endif /* FONT_DEBUG */
+
+
+extern void syms_of_ftfont P_ (());
+extern void syms_of_xfont P_ (());
+extern void syms_of_xftfont P_ (());
+extern void syms_of_ftxfont P_ (());
+extern void syms_of_bdffont P_ (());
+extern void syms_of_w32font P_ (());
+extern void syms_of_atmfont P_ (());
+
+void
+syms_of_font ()
+{
+ sort_shift_bits[FONT_SLANT_INDEX] = 0;
+ sort_shift_bits[FONT_WEIGHT_INDEX] = 7;
+ sort_shift_bits[FONT_SIZE_INDEX] = 14;
+ sort_shift_bits[FONT_WIDTH_INDEX] = 21;
+ sort_shift_bits[FONT_ADSTYLE_INDEX] = 28;
+ sort_shift_bits[FONT_FOUNDRY_INDEX] = 29;
+ sort_shift_bits[FONT_FAMILY_INDEX] = 30;
+ sort_shift_bits[FONT_REGISTRY_INDEX] = 31;
+ /* Note that sort_shift_bits[FONT_TYPE_INDEX] is never used. */
+
+ staticpro (&font_style_table);
+ font_style_table = Fmake_vector (make_number (3), Qnil);
+
+ staticpro (&font_family_alist);
+ font_family_alist = Qnil;
+
+ DEFSYM (Qfontp, "fontp");
+
+ DEFSYM (Qiso8859_1, "iso8859-1");
+ DEFSYM (Qiso10646_1, "iso10646-1");
+ DEFSYM (Qunicode_bmp, "unicode-bmp");
+
+ DEFSYM (QCotf, ":otf");
+ DEFSYM (QClanguage, ":language");
+ DEFSYM (QCscript, ":script");
+
+ DEFSYM (QCfoundry, ":foundry");
+ DEFSYM (QCadstyle, ":adstyle");
+ DEFSYM (QCregistry, ":registry");
+ DEFSYM (QCspacing, ":spacing");
+ DEFSYM (QCdpi, ":dpi");
+ DEFSYM (QCscalable, ":scalable");
+ DEFSYM (QCextra, ":extra");
+
+ DEFSYM (Qc, "c");
+ DEFSYM (Qm, "m");
+ DEFSYM (Qp, "p");
+ DEFSYM (Qd, "d");
+
+ staticpro (&null_string);
+ null_string = build_string ("");
+ staticpro (&null_vector);
+ null_vector = Fmake_vector (make_number (0), Qnil);
+
+ staticpro (&scratch_font_spec);
+ scratch_font_spec = Ffont_spec (0, NULL);
+ staticpro (&scratch_font_prefer);
+ scratch_font_prefer = Ffont_spec (0, NULL);
+
+ defsubr (&Sfontp);
+ defsubr (&Sfont_spec);
+ defsubr (&Sfont_get);
+ defsubr (&Sfont_put);
+ defsubr (&Slist_fonts);
+ defsubr (&Slist_families);
+ defsubr (&Sfind_font);
+ defsubr (&Sfont_xlfd_name);
+ defsubr (&Sclear_font_cache);
+ defsubr (&Sinternal_set_font_style_table);
+ defsubr (&Sfont_make_gstring);
+ defsubr (&Sfont_fill_gstring);
+
+#ifdef FONT_DEBUG
+ defsubr (&Sopen_font);
+ defsubr (&Sclose_font);
+ defsubr (&Squery_font);
+ defsubr (&Sget_font_glyphs);
+ defsubr (&Sfont_match_p);
+#if 0
+ defsubr (&Sdraw_string);
+#endif
+#endif /* FONT_DEBUG */
+
+#ifdef HAVE_FREETYPE
+ syms_of_ftfont ();
+#ifdef HAVE_X_WINDOWS
+ syms_of_xfont ();
+ syms_of_ftxfont ();
+#ifdef HAVE_XFT
+ syms_of_xftfont ();
+#endif /* HAVE_XFT */
+#endif /* HAVE_X_WINDOWS */
+#else /* not HAVE_FREETYPE */
+#ifdef HAVE_X_WINDOWS
+ syms_of_xfont ();
+#endif /* HAVE_X_WINDOWS */
+#endif /* not HAVE_FREETYPE */
+#ifdef HAVE_BDFFONT
+ syms_of_bdffont ();
+#endif /* HAVE_BDFFONT */
+#ifdef WINDOWSNT
+ syms_of_w32font ();
+#endif /* WINDOWSNT */
+#ifdef MAC_OS
+ syms_of_atmfont ();
+#endif /* MAC_OS */
+}
+
+/* arch-tag: 74c9475d-5976-4c93-a327-942ae3072846
+ (do not change this comment) */
diff --git a/src/font.h b/src/font.h
new file mode 100644
index 00000000000..85f36d20872
--- /dev/null
+++ b/src/font.h
@@ -0,0 +1,509 @@
+/* font.h -- Interface definition for font handling.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#ifndef EMACS_FONT_H
+#define EMACS_FONT_H
+
+#include "ccl.h"
+
+/* We have three types of Lisp objects related to font.
+
+ FONT-SPEC
+
+ Vector (length FONT_SPEC_MAX) of font properties. Some
+ properties can be left unspecified (i.e. nil). Emacs asks
+ font-drivers to find a font by FONT-SPEC. A fontset entry
+ specifies requisite properties whereas a face specifies just
+ preferable properties. This object is fully modifiable by
+ Lisp.
+
+ FONT-ENTITY
+
+ Vector (length FONT_ENTITY_MAX) of fully specified font
+ properties that a font-driver returns upon a request of
+ FONT-SPEC.
+
+ Note: Only the method `list' of a font-driver can create this
+ object, and should never be modified by Lisp. In that sense,
+ it may be cleaner to implement it as a Lisp object of a new
+ type (e.g. struct Lisp_Font).
+
+ FONT-OBJECT
+
+ Lisp object of type Lisp_Misc_Save_Value encapsulating a
+ pointer to "struct font". This corresponds to an opened font.
+
+ Note: The note for FONT-ENTITY also applies to this.
+*/
+
+
+struct font_driver;
+struct font;
+
+/* An enumerator for each font property. This is used as an index to
+ the vector of FONT-SPEC and FONT-ENTITY.
+
+ Note: The order is important and should not be changed. */
+
+enum font_property_index
+ {
+ /* FONT-TYPE is a symbol indicating a font backend; currently `x',
+ `xft', `ftx', `freetype' are available. For windows, we need
+ `bdf' and `windows'. For Mac OS X, we need `atm'. */
+ FONT_TYPE_INDEX,
+
+ /* FONT-FOUNDRY is a foundry name (symbol). */
+ FONT_FOUNDRY_INDEX,
+
+ /* FONT-FAMILY is a family name (symbol). */
+ FONT_FAMILY_INDEX,
+
+ /* FONT-ADSTYLE is an additional style name (symbol). */
+ FONT_ADSTYLE_INDEX,
+
+ /* FONT-REGISTRY is a combination of a charset-registry and
+ charset0encoding name (symbol). */
+ FONT_REGISTRY_INDEX,
+
+ /* FONT-WEIGHT is a numeric value of weight (e.g. medium, bold) of
+ the font. The value is what defined by FC_WEIGHT_* in
+ fontconfig. */
+ FONT_WEIGHT_INDEX,
+
+ /* FONT-SLANT is a numeric value of slant (e.g. r, i, o) of the
+ font. The value is what defined by FC_SLANT_* in
+ fontconfig plus 100. */
+ FONT_SLANT_INDEX,
+
+ /* FONT-WIDTH is a numeric value of setwidth (e.g. normal,
+ condensed) of the font. The value is what defined by
+ FC_WIDTH_* in fontconfig. */
+ FONT_WIDTH_INDEX,
+
+ /* FONT-SIZE is a size of the font. If integer, it is a pixel
+ size. For a font-spec, the value can be float specifying a
+ point size. For a font-entity, the value can be zero meaning
+ that the font is scalable. */
+ FONT_SIZE_INDEX,
+
+ /* In a font-spec, the value is an alist of extra information of a
+ font such as name, OpenType features, and language coverage.
+ In a font-entity, the value is an extra infomation for
+ identifying a font (font-driver dependent). */
+ FONT_EXTRA_INDEX, /* alist alist */
+
+ /* This value is the length of font-spec vector. */
+ FONT_SPEC_MAX,
+
+ /* The followings are used only for a font-entity. */
+
+ /* Frame on which the font is found. The value is nil if the font
+ can be opend on any frame. */
+ FONT_FRAME_INDEX = FONT_SPEC_MAX,
+
+ /* List of font-objects opened from the font-entity. */
+ FONT_OBJLIST_INDEX,
+
+ /* This value is the length of font-entity vector. */
+ FONT_ENTITY_MAX
+ };
+
+extern Lisp_Object QCspacing, QCdpi, QCscalable, QCotf, QClanguage, QCscript;
+
+/* Important character set symbols. */
+extern Lisp_Object Qiso8859_1, Qiso10646_1, Qunicode_bmp;
+
+extern Lisp_Object null_string;
+extern Lisp_Object null_vector;
+
+/* Structure for an opened font. We can safely cast this structure to
+ "struft font_info". */
+
+struct font
+{
+ struct font_info font;
+
+ /* From which font-entity the font is opened. */
+ Lisp_Object entity;
+
+ /* By which pixel size the font is opened. */
+ int pixel_size;
+
+ /* Font-driver for the font. */
+ struct font_driver *driver;
+
+ /* File name of the font, or NULL if the font is not associated with
+ a file. */
+ char *file_name;
+
+ /* Charset to encode a character code into a glyph code of the font.
+ -1 means that the font doesn't require this information to encode
+ a character. */
+ int encoding_charset;
+
+ /* Charset to check if a character code is supported by the font.
+ -1 means that the contents of the font must be looked up to
+ determine it. */
+ int repertory_charset;
+
+ /* Minimum glyph width (in pixels). */
+ int min_width;
+
+ /* Ascent and descent of the font (in pixels). */
+ int ascent, descent;
+
+ /* There will be more to this structure, but they are private to a
+ font-driver. */
+};
+
+enum font_spacing
+ {
+ FONT_SPACING_PROPORTIONAL = 0,
+ FONT_SPACING_DUAL = 90,
+ FONT_SPACING_MONO = 100,
+ FONT_SPACING_CHARCELL = 110
+ };
+
+struct font_metrics
+{
+ short lbearing, rbearing, width, ascent, descent;
+};
+
+struct font_bitmap
+{
+ int rows;
+ int width;
+ int pitch;
+ unsigned char *buffer;
+ int left;
+ int top;
+ int advance;
+ void *extra;
+};
+
+/* Predicates to check various font-related objects. */
+
+#define FONTP(x) \
+ (VECTORP (x) && (ASIZE (x) == FONT_SPEC_MAX || ASIZE (x) == FONT_ENTITY_MAX))
+#define FONT_SPEC_P(x) \
+ (VECTORP (x) && ASIZE (x) == FONT_SPEC_MAX)
+#define FONT_ENTITY_P(x) \
+ (VECTORP (x) && ASIZE (x) == FONT_ENTITY_MAX)
+#define FONT_OBJECT_P(x) \
+ (XTYPE (x) == Lisp_Misc && XMISCTYPE (x) == Lisp_Misc_Save_Value)
+
+
+/* Check macros for various font-related objects. */
+
+#define CHECK_FONT(x) \
+ do { if (! FONTP (x)) x = wrong_type_argument (Qfont, x); } while (0)
+#define CHECK_FONT_SPEC(x) \
+ do { if (! FONT_SPEC_P (x)) x = wrong_type_argument (Qfont, x); } while (0)
+#define CHECK_FONT_ENTITY(x) \
+ do { if (! FONT_ENTITY_P (x)) x = wrong_type_argument (Qfont, x); } while (0)
+#define CHECK_FONT_OBJECT(x) \
+ do { if (! FONT_OBJECT_P (x)) x = wrong_type_argument (Qfont, x); } while (0)
+
+#define CHECK_FONT_GET_OBJECT(x, font) \
+ do { \
+ if (! FONT_OBJECT_P (x)) x = wrong_type_argument (Qfont, x); \
+ if (! XSAVE_VALUE (x)->pointer) error ("Font already closed"); \
+ font = XSAVE_VALUE (x)->pointer; \
+ } while (0)
+
+/* Ignore the difference of font pixel sizes less than or equal to
+ this value. */
+#define FONT_PIXEL_SIZE_QUANTUM 1
+
+struct face;
+struct composition;
+
+/* Macros for lispy glyph-string. */
+#define LGSTRING_FONT(lgs) AREF (AREF ((lgs), 0), 0)
+#define LGSTRING_LBEARING(lgs) AREF (AREF ((lgs), 0), 1)
+#define LGSTRING_RBEARING(lgs) AREF (AREF ((lgs), 0), 2)
+#define LGSTRING_WIDTH(lgs) AREF (AREF ((lgs), 0), 3)
+#define LGSTRING_ASCENT(lgs) AREF (AREF ((lgs), 0), 4)
+#define LGSTRING_DESCENT(lgs) AREF (AREF ((lgs), 0), 5)
+#define LGSTRING_SET_FONT(lgs, val) ASET (AREF ((lgs), 0), 0, (val))
+#define LGSTRING_SET_LBEARING(lgs, val) ASET (AREF ((lgs), 0), 1, (val))
+#define LGSTRING_SET_RBEARING(lgs, val) ASET (AREF ((lgs), 0), 2, (val))
+#define LGSTRING_SET_WIDTH(lgs, val) ASET (AREF ((lgs), 0), 3, (val))
+#define LGSTRING_SET_ASCENT(lgs, val) ASET (AREF ((lgs), 0), 4, (val))
+#define LGSTRING_SET_DESCENT(lgs, val) ASET (AREF ((lgs), 0), 5, (val))
+
+#define LGSTRING_LENGTH(lgs) (ASIZE ((lgs)) - 1)
+#define LGSTRING_GLYPH(lgs, idx) AREF ((lgs), (idx) + 1)
+
+#define LGLYPH_CHAR(g) AREF ((g), 2)
+#define LGLYPH_CODE(g) AREF ((g), 3)
+#define LGLYPH_WIDTH(g) AREF ((g), 4)
+#define LGLYPH_ADJUSTMENT(g) AREF ((g), 5)
+#define LGLYPH_SET_CHAR(g, val) ASET ((g), 2, (val))
+#define LGLYPH_SET_CODE(g, val) ASET ((g), 3, (val))
+#define LGLYPH_SET_WIDTH(g, val) ASET ((g), 4, (val))
+#define LGLYPH_SET_ADJUSTMENT(g, val) ASET ((g), 5, (val))
+
+#define LGLYPH_XOFF(g) (NILP (LGLYPH_ADJUSTMENT (g)) ? 0 \
+ : XINT (AREF (LGLYPH_ADJUSTMENT (g), 0)))
+#define LGLYPH_YOFF(g) (NILP (LGLYPH_ADJUSTMENT (g)) ? 0 \
+ : XINT (AREF (LGLYPH_ADJUSTMENT (g), 1)))
+#define LGLYPH_WADJUST(g) (NILP (LGLYPH_ADJUSTMENT (g)) ? 0 \
+ : XINT (AREF (LGLYPH_ADJUSTMENT (g), 2)))
+
+#define FONT_INVALID_CODE 0xFFFFFFFF
+
+/* Font driver. Members specified as "optional" can be NULL. */
+
+struct font_driver
+{
+ /* Symbol indicating the type of the font-driver. */
+ Lisp_Object type;
+
+ /* Return a cache of font-entities on FRAME. The cache must be a
+ cons whose cdr part is the actual cache area. */
+ Lisp_Object (*get_cache) P_ ((Lisp_Object frame));
+
+ /* List fonts matching with FONT_SPEC on FRAME. The value is a
+ vector of font-entities. This is the sole API that allocates
+ font-entities. */
+ Lisp_Object (*list) P_ ((Lisp_Object frame, Lisp_Object font_spec));
+
+ /* Optional.
+ List available families. The value is a list of family names
+ (symbols). */
+ Lisp_Object (*list_family) P_ ((Lisp_Object frame));
+
+ /* Optional (if FONT_EXTRA_INDEX is not Lisp_Save_Value).
+ Free FONT_EXTRA_INDEX field of FONT_ENTITY. */
+ void (*free_entity) P_ ((Lisp_Object font_entity));
+
+ /* Open a font specified by FONT_ENTITY on frame F. If the font is
+ scalable, open it with PIXEL_SIZE. */
+ struct font *(*open) P_ ((FRAME_PTR f, Lisp_Object font_entity,
+ int pixel_size));
+
+ /* Close FONT on frame F. */
+ void (*close) P_ ((FRAME_PTR f, struct font *font));
+
+ /* Optional (if FACE->extra is not used).
+ Prepare FACE for displaying characters by FONT on frame F by
+ storing some data in FACE->extra. If successful, return 0.
+ Otherwise, return -1. */
+ int (*prepare_face) P_ ((FRAME_PTR f, struct face *face));
+
+ /* Optional.
+ Done FACE for displaying characters by FACE->font on frame F. */
+ void (*done_face) P_ ((FRAME_PTR f, struct face *face));
+
+ /* Optional.
+ If FONT_ENTITY has a glyph for character C (Unicode code point),
+ return 1. If not, return 0. If a font must be opened to check
+ it, return -1. */
+ int (*has_char) P_ ((Lisp_Object entity, int c));
+
+ /* Return a glyph code of FONT for characer C (Unicode code point).
+ If FONT doesn't have such a glyph, return FONT_INVALID_CODE. */
+ unsigned (*encode_char) P_ ((struct font *font, int c));
+
+ /* Perform the size computation of glyphs of FONT and fillin members
+ of METRICS. The glyphs are specified by their glyph codes in
+ CODE (length NGLYPHS). */
+ int (*text_extents) P_ ((struct font *font,
+ unsigned *code, int nglyphs,
+ struct font_metrics *metrics));
+
+ /* Optional.
+ Draw glyphs between FROM and TO of S->char2b at (X Y) pixel
+ position of frame F with S->FACE and S->GC. If WITH_BACKGROUND
+ is nonzero, fill the background in advance. It is assured that
+ WITH_BACKGROUND is zero when (FROM > 0 || TO < S->nchars). */
+ int (*draw) P_ ((struct glyph_string *s, int from, int to,
+ int x, int y, int with_background));
+
+ /* Optional.
+ Store bitmap data for glyph-code CODE of FONT in BITMAP. It is
+ intended that this method is callled from the other font-driver
+ for actual drawing. */
+ int (*get_bitmap) P_ ((struct font *font, unsigned code,
+ struct font_bitmap *bitmap,
+ int bits_per_pixel));
+
+ /* Optional.
+ Free bitmap data in BITMAP. */
+ void (*free_bitmap) P_ ((struct font *font, struct font_bitmap *bitmap));
+
+ /* Optional.
+ Return an outline data for glyph-code CODE of FONT. The format
+ of the outline data depends on the font-driver. */
+ void *(*get_outline) P_ ((struct font *font, unsigned code));
+
+ /* Optional.
+ Free OUTLINE (that is obtained by the above method). */
+ void (*free_outline) P_ ((struct font *font, void *outline));
+
+ /* Optional.
+ Get coordinates of the INDEXth anchor point of the glyph whose
+ code is CODE. Store the coordinates in *X and *Y. Return 0 if
+ the operations was successfull. Otherwise return -1. */
+ int (*anchor_point) P_ ((struct font *font, unsigned code, int index,
+ int *x, int *y));
+
+ /* Optional.
+ Return a list describing which scripts/languages FONT
+ supports by which GSUB/GPOS features of OpenType tables. */
+ Lisp_Object (*otf_capability) P_ ((struct font *font));
+
+ /* Optional.
+ Drive FONT's OTF GSUB features according to GSUB_SPEC.
+
+ GSUB_SPEC is in this format (all elements are symbols):
+ (SCRIPT LANGSYS GSUB-FEATURE ...)
+ If one of GSUB-FEATURE is nil, apply all gsub features except for
+ already applied and listed later. For instance, if the font has
+ GSUB features nukt, haln, rphf, blwf, and half,
+ (deva nil nukt haln nil rphf)
+ applies nukt and haln in this order, then applies blwf and half
+ in the order apearing in the font. The features are of the
+ default langsys of `deva' script.
+
+ This method applies the specified features to the codes in the
+ elements of GSTRING-IN (between FROMth and TOth). The output
+ codes are stored in GSTRING-OUT at the IDXth element and the
+ following elements.
+
+ Return the number of output codes. If none of the features are
+ applicable to the input data, return 0. If GSTRING-OUT is too
+ short, return -1. */
+ int (*otf_gsub) P_ ((struct font *font, Lisp_Object gsub_spec,
+ Lisp_Object gstring_in, int from, int to,
+ Lisp_Object gstring_out, int idx));
+
+ /* Optional.
+ Drive FONT's OTF GPOS features according to GPOS_SPEC.
+
+ GPOS_SPEC is in this format (all elements are symbols):
+ (SCRIPT LANGSYS GPOS-FEATURE ...)
+ The meaning is the same as GSUB_SPEC above.
+
+ This method applies the specified features to the codes in the
+ elements of GSTRING (between FROMth and TOth). The resulting
+ positioning information (x-offset and y-offset) is stored in the
+ slots of the elements.
+
+ Return 1 if at least one glyph has nonzero x-offset or y-offset.
+ Otherwise return 0. */
+ int (*otf_gpos) P_ ((struct font *font, Lisp_Object gpos_spec,
+ Lisp_Object gstring, int from, int to));
+};
+
+
+struct font_driver_list
+{
+ struct font_driver *driver;
+ struct font_driver_list *next;
+};
+
+extern int enable_font_backend;
+
+EXFUN (Ffont_spec, MANY);
+EXFUN (Flist_fonts, 4);
+
+extern Lisp_Object font_symbolic_weight P_ ((Lisp_Object font));
+extern Lisp_Object font_symbolic_slant P_ ((Lisp_Object font));
+extern Lisp_Object font_symbolic_width P_ ((Lisp_Object font));
+
+extern int font_match_p P_ ((Lisp_Object spec, Lisp_Object entity));
+
+extern Lisp_Object font_find_object P_ ((struct font *font));
+extern Lisp_Object font_get_name P_ ((Lisp_Object font_object));
+extern Lisp_Object font_get_spec P_ ((Lisp_Object font_object));
+extern Lisp_Object font_get_frame P_ ((Lisp_Object font_object));
+extern int font_has_char P_ ((FRAME_PTR, Lisp_Object, int));
+extern unsigned font_encode_char P_ ((Lisp_Object, int));
+
+extern int font_set_lface_from_name P_ ((FRAME_PTR f,
+ Lisp_Object lface,
+ Lisp_Object fontname,
+ int force_p, int may_fail_p));
+extern Lisp_Object font_find_for_lface P_ ((FRAME_PTR f, Lisp_Object *lface,
+ Lisp_Object spec));
+extern Lisp_Object font_open_for_lface P_ ((FRAME_PTR f, Lisp_Object *lface,
+ Lisp_Object entity));
+extern void font_load_for_face P_ ((FRAME_PTR f, struct face *face));
+extern void font_prepare_for_face P_ ((FRAME_PTR f, struct face *face));
+extern Lisp_Object font_open_by_name P_ ((FRAME_PTR f, char *name));
+extern void font_close_object (FRAME_PTR f, Lisp_Object font_object);
+
+extern Lisp_Object intern_downcase P_ ((char *str, int len));
+extern void font_update_sort_order P_ ((int *order));
+
+extern void font_merge_old_spec P_ ((Lisp_Object name, Lisp_Object family,
+ Lisp_Object registry, Lisp_Object spec));
+
+
+extern int font_parse_xlfd P_ ((char *name, Lisp_Object font));
+extern int font_unparse_xlfd P_ ((Lisp_Object font, int pixel_size,
+ char *name, int bytes));
+extern int font_parse_fcname P_ ((char *name, Lisp_Object font));
+extern int font_unparse_fcname P_ ((Lisp_Object font, int pixel_size,
+ char *name, int bytes));
+extern void register_font_driver P_ ((struct font_driver *driver, FRAME_PTR f));
+extern void free_font_driver_list P_ ((FRAME_PTR f));
+
+extern struct font *font_prepare_composition P_ ((struct composition *cmp));
+
+
+#ifdef HAVE_LIBOTF
+/* This can be used as `otf_capability' method of a font-driver. */
+extern Lisp_Object font_otf_capability P_ ((struct font *font));
+/* This can be used as `otf_gsub' method of a font-driver. */
+extern int font_otf_gsub P_ ((struct font *font, Lisp_Object gsub_spec,
+ Lisp_Object gstring_in, int from, int to,
+ Lisp_Object gstring_out, int idx));
+/* This can be used as `otf_gpos' method of a font-driver. */
+extern int font_otf_gpos P_ ((struct font *font, Lisp_Object gpos_spec,
+ Lisp_Object gstring, int from, int to));
+#endif /* HAVE_LIBOTF */
+
+#ifdef HAVE_FREETYPE
+extern struct font_driver ftfont_driver;
+#endif /* HAVE_FREETYPE */
+#ifdef HAVE_X_WINDOWS
+extern struct font_driver xfont_driver;
+extern struct font_driver ftxfont_driver;
+#ifdef HAVE_XFT
+extern struct font_driver xftfont_driver;
+#endif /* HAVE_XFT */
+#endif /* HAVE_X_WINDOWS */
+#ifdef WINDOWSNT
+extern struct font_driver w32font_driver;
+#endif /* WINDOWSNT */
+#ifdef MAC_OS
+extern struct font_driver atmfont_driver;
+#endif /* MAC_OS */
+
+#endif /* not EMACS_FONT_H */
+
+/* arch-tag: 3b7260c3-5bec-4d6b-a0db-95c1b431b1a2
+ (do not change this comment) */
diff --git a/src/fontset.c b/src/fontset.c
index c8679a7b7e2..dd4b7620452 100644
--- a/src/fontset.c
+++ b/src/fontset.c
@@ -3,7 +3,10 @@
Copyright (C) 1995, 1997, 1998, 2000, 2003, 2004, 2005
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
-
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
This file is part of GNU Emacs.
GNU Emacs is free software; you can redistribute it and/or modify
@@ -30,12 +33,15 @@ Boston, MA 02110-1301, USA. */
#endif
#include "lisp.h"
+#include "blockinput.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "keyboard.h"
#include "frame.h"
#include "dispextern.h"
+#include "intervals.h"
#include "fontset.h"
#include "window.h"
#ifdef HAVE_X_WINDOWS
@@ -48,71 +54,135 @@ Boston, MA 02110-1301, USA. */
#include "macterm.h"
#endif
-#ifdef FONTSET_DEBUG
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+
#undef xassert
+#ifdef FONTSET_DEBUG
#define xassert(X) do {if (!(X)) abort ();} while (0)
#undef INLINE
#define INLINE
-#endif
+#else /* not FONTSET_DEBUG */
+#define xassert(X) (void) 0
+#endif /* not FONTSET_DEBUG */
+EXFUN (Fclear_face_cache, 1);
/* FONTSET
A fontset is a collection of font related information to give
- similar appearance (style, size, etc) of characters. There are two
- kinds of fontsets; base and realized. A base fontset is created by
- new-fontset from Emacs Lisp explicitly. A realized fontset is
- created implicitly when a face is realized for ASCII characters. A
- face is also realized for multibyte characters based on an ASCII
- face. All of the multibyte faces based on the same ASCII face
- share the same realized fontset.
+ similar appearance (style, etc) of characters. A fontset has two
+ roles. One is to use for the frame parameter `font' as if it is an
+ ASCII font. In that case, Emacs uses the font specified for
+ `ascii' script for the frame's default font.
+
+ Another role, the more important one, is to provide information
+ about which font to use for each non-ASCII character.
+
+ There are two kinds of fontsets; base and realized. A base fontset
+ is created by `new-fontset' from Emacs Lisp explicitly. A realized
+ fontset is created implicitly when a face is realized for ASCII
+ characters. A face is also realized for non-ASCII characters based
+ on an ASCII face. All of non-ASCII faces based on the same ASCII
+ face share the same realized fontset.
+
+ A fontset object is implemented by a char-table whose default value
+ and parent are always nil.
+
+ An element of a base fontset is a vector of FONT-DEFs which itself
+ is a vector [ FONT-SPEC ENCODING REPERTORY ].
+
+ FONT-SPEC is:
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
+ or
+ FONT-NAME
+ where FAMILY, WEIGHT, SLANT, SWIDTH, ADSTYLE, REGISTRY, and
+ FONT-NAME are strings.
+
+ Note: Currently WEIGHT through ADSTYLE are ignored.
+
+ ENCODING is a charset ID that can convert characters to glyph codes
+ of the corresponding font.
+
+ REPERTORY is a charset ID, a char-table, or nil. If REPERTORY is a
+ charset ID, the repertory of the charset exactly matches with that
+ of the font. If REPERTORY is a char-table, all characters who have
+ a non-nil value in the table are supported. If REPERTORY is nil,
+ we consult with the font itself to get the repertory.
+
+ ENCODING and REPERTORY are extracted from the variable
+ Vfont_encoding_alist by using a font name generated from FONT-SPEC
+ (if it is a vector) or FONT-NAME as a matching target.
+
+
+ An element of a realized fontset is nil or t, or has this form:
+
+ [CHARSET-ORDERED-LIST-TICK PREFERRED-CHARSET-ID
+ PREFERRED-RFONT-DEF RFONT-DEF0 RFONT-DEF1 ...].
+
+ RFONT-DEFn (i.e. Realized FONT-DEF) has this form:
+
+ [ FACE-ID FONT-INDEX FONT-DEF OPENED-FONT-NAME ]
- A fontset object is implemented by a char-table.
+ RFONT-DEFn is automatically reordered by the current charset
+ priority list.
- An element of a base fontset is:
- (INDEX . FONTNAME) or
- (INDEX . (FOUNDRY . REGISTRY ))
- FONTNAME is a font name pattern for the corresponding character.
- FOUNDRY and REGISTRY are respectively foundry and registry fields of
- a font name for the corresponding character. INDEX specifies for
- which character (or generic character) the element is defined. It
- may be different from an index to access this element. For
- instance, if a fontset defines some font for all characters of
- charset `japanese-jisx0208', INDEX is the generic character of this
- charset. REGISTRY is the
+ The value nil means that we have not yet generated the above vector
+ from the base of the fontset.
- An element of a realized fontset is FACE-ID which is a face to use
- for displaying the corresponding character.
+ The value t means that no font is available for the corresponding
+ range of characters.
- All single byte characters (ASCII and 8bit-unibyte) share the same
- element in a fontset. The element is stored in the first element
- of the fontset.
- To access or set each element, use macros FONTSET_REF and
- FONTSET_SET respectively for efficiency.
+ A fontset has 9 extra slots.
- A fontset has 3 extra slots.
+ The 1st slot: the ID number of the fontset
- The 1st slot is an ID number of the fontset.
+ The 2nd slot:
+ base: the name of the fontset
+ realized: nil
- The 2nd slot is a name of the fontset. This is nil for a realized
- face.
+ The 3rd slot:
+ base: nil
+ realized: the base fontset
- The 3rd slot is a frame that the fontset belongs to. This is nil
- for a default face.
+ The 4th slot:
+ base: nil
+ realized: the frame that the fontset belongs to
- A parent of a base fontset is nil. A parent of a realized fontset
- is a base fontset.
+ The 5th slot:
+ base: the font name for ASCII characters
+ realized: nil
- All fontsets are recorded in Vfontset_table.
+ The 6th slot:
+ base: nil
+ realized: the ID number of a face to use for characters that
+ has no font in a realized fontset.
+
+ The 7th slot:
+ base: nil
+ realized: Alist of font index vs the corresponding repertory
+ char-table.
+
+ The 8th slot:
+ base: nil
+ realized: If the base is not the default fontset, a fontset
+ realized from the default fontset, else nil.
+
+ The 9th slot:
+ base: Same as element value (but for fallback fonts).
+ realized: Likewise.
+
+ All fontsets are recorded in the vector Vfontset_table.
DEFAULT FONTSET
- There's a special fontset named `default fontset' which defines a
- default fontname pattern. When a base fontset doesn't specify a
- font for a specific character, the corresponding value in the
- default fontset is used. The format is the same as a base fontset.
+ There's a special base fontset named `default fontset' which
+ defines the default font specifications. When a base fontset
+ doesn't specify a font for a specific character, the corresponding
+ value in the default fontset is used.
The parent of a realized fontset created for such a face that has
no fontset is the default fontset.
@@ -120,16 +190,18 @@ Boston, MA 02110-1301, USA. */
These structures are hidden from the other codes than this file.
The other codes handle fontsets only by their ID numbers. They
- usually use variable name `fontset' for IDs. But, in this file, we
- always use variable name `id' for IDs, and name `fontset' for the
- actual fontset objects.
+ usually use the variable name `fontset' for IDs. But, in this
+ file, we always use varialbe name `id' for IDs, and name `fontset'
+ for an actual fontset object, i.e., char-table.
*/
/********** VARIABLES and FUNCTION PROTOTYPES **********/
extern Lisp_Object Qfont;
-Lisp_Object Qfontset;
+static Lisp_Object Qfontset;
+static Lisp_Object Qfontset_info;
+static Lisp_Object Qprepend, Qappend;
/* Vector containing all fontsets. */
static Lisp_Object Vfontset_table;
@@ -139,19 +211,16 @@ static Lisp_Object Vfontset_table;
static int next_fontset_id;
/* The default fontset. This gives default FAMILY and REGISTRY of
- font for each characters. */
+ font for each character. */
static Lisp_Object Vdefault_fontset;
-/* Alist of font specifications. It override the font specification
- in the default fontset. */
-static Lisp_Object Voverriding_fontspec_alist;
-
Lisp_Object Vfont_encoding_alist;
Lisp_Object Vuse_default_ascent;
Lisp_Object Vignore_relative_composition;
Lisp_Object Valternate_fontname_alist;
Lisp_Object Vfontset_alias_alist;
Lisp_Object Vvertical_centering_font_regexp;
+Lisp_Object Votf_script_alist;
/* The following six are declarations of callback functions depending
on window system. See the comments in src/fontset.h for more
@@ -184,19 +253,38 @@ void (*set_frame_fontset_func) P_ ((FRAME_PTR f, Lisp_Object arg,
This function set the member `encoder' of the structure. */
void (*find_ccl_program_func) P_ ((struct font_info *));
+Lisp_Object (*get_font_repertory_func) P_ ((struct frame *,
+ struct font_info *));
+
/* Check if any window system is used now. */
void (*check_window_system_func) P_ ((void));
/* Prototype declarations for static functions. */
-static Lisp_Object fontset_ref P_ ((Lisp_Object, int));
-static Lisp_Object lookup_overriding_fontspec P_ ((Lisp_Object, int));
-static void fontset_set P_ ((Lisp_Object, int, Lisp_Object));
+static Lisp_Object fontset_add P_ ((Lisp_Object, Lisp_Object, Lisp_Object,
+ Lisp_Object));
+static Lisp_Object fontset_font P_ ((Lisp_Object, int, struct face *, int));
static Lisp_Object make_fontset P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
-static int fontset_id_valid_p P_ ((int));
static Lisp_Object fontset_pattern_regexp P_ ((Lisp_Object));
-static Lisp_Object font_family_registry P_ ((Lisp_Object, int));
-static Lisp_Object regularize_fontname P_ ((Lisp_Object));
+static void accumulate_script_ranges P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object));
+Lisp_Object find_font_encoding P_ ((Lisp_Object));
+
+static void set_fontset_font P_ ((Lisp_Object, Lisp_Object));
+
+#ifdef FONTSET_DEBUG
+
+/* Return 1 if ID is a valid fontset id, else return 0. */
+
+static int
+fontset_id_valid_p (id)
+ int id;
+{
+ return (id >= 0 && id < ASIZE (Vfontset_table) - 1);
+}
+
+#endif
+
/********** MACROS AND FUNCTIONS TO HANDLE FONTSET **********/
@@ -206,170 +294,539 @@ static Lisp_Object regularize_fontname P_ ((Lisp_Object));
/* Macros to access special values of FONTSET. */
#define FONTSET_ID(fontset) XCHAR_TABLE (fontset)->extras[0]
+
+/* Macros to access special values of (base) FONTSET. */
#define FONTSET_NAME(fontset) XCHAR_TABLE (fontset)->extras[1]
-#define FONTSET_FRAME(fontset) XCHAR_TABLE (fontset)->extras[2]
-#define FONTSET_ASCII(fontset) XCHAR_TABLE (fontset)->contents[0]
-#define FONTSET_BASE(fontset) XCHAR_TABLE (fontset)->parent
+#define FONTSET_ASCII(fontset) XCHAR_TABLE (fontset)->extras[4]
+
+/* Macros to access special values of (realized) FONTSET. */
+#define FONTSET_BASE(fontset) XCHAR_TABLE (fontset)->extras[2]
+#define FONTSET_FRAME(fontset) XCHAR_TABLE (fontset)->extras[3]
+#define FONTSET_NOFONT_FACE(fontset) XCHAR_TABLE (fontset)->extras[5]
+#define FONTSET_REPERTORY(fontset) XCHAR_TABLE (fontset)->extras[6]
+#define FONTSET_DEFAULT(fontset) XCHAR_TABLE (fontset)->extras[7]
-#define BASE_FONTSET_P(fontset) NILP (FONTSET_BASE(fontset))
+/* For both base and realized fontset. */
+#define FONTSET_FALLBACK(fontset) XCHAR_TABLE (fontset)->extras[8]
+#define BASE_FONTSET_P(fontset) (NILP (FONTSET_BASE (fontset)))
-/* Return the element of FONTSET (char-table) at index C (character). */
-#define FONTSET_REF(fontset, c) fontset_ref (fontset, c)
+/* Return the element of FONTSET for the character C. If FONTSET is a
+ base fontset other then the default fontset and FONTSET doesn't
+ contain information for C, return the information in the default
+ fontset. */
+
+#define FONTSET_REF(fontset, c) \
+ (EQ (fontset, Vdefault_fontset) \
+ ? CHAR_TABLE_REF (fontset, c) \
+ : fontset_ref ((fontset), (c)))
static Lisp_Object
fontset_ref (fontset, c)
Lisp_Object fontset;
int c;
{
- int charset, c1, c2;
- Lisp_Object elt, defalt;
-
- if (SINGLE_BYTE_CHAR_P (c))
- return FONTSET_ASCII (fontset);
-
- SPLIT_CHAR (c, charset, c1, c2);
- elt = XCHAR_TABLE (fontset)->contents[charset + 128];
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- defalt = XCHAR_TABLE (elt)->defalt;
- if (c1 < 32
- || (elt = XCHAR_TABLE (elt)->contents[c1],
- NILP (elt)))
- return defalt;
- if (!SUB_CHAR_TABLE_P (elt))
- return elt;
- defalt = XCHAR_TABLE (elt)->defalt;
- if (c2 < 32
- || (elt = XCHAR_TABLE (elt)->contents[c2],
- NILP (elt)))
- return defalt;
+ Lisp_Object elt;
+
+ elt = CHAR_TABLE_REF (fontset, c);
+ if (NILP (elt) && ! EQ (fontset, Vdefault_fontset)
+ /* Don't check Vdefault_fontset for a realized fontset. */
+ && NILP (FONTSET_BASE (fontset)))
+ elt = CHAR_TABLE_REF (Vdefault_fontset, c);
return elt;
}
+/* Return the element of FONTSET for the character C, set FROM and TO
+ to the range of characters around C that have the same value as C.
+ If FONTSET is a base fontset other then the default fontset and
+ FONTSET doesn't contain information for C, return the information
+ in the default fontset. */
+
+#define FONTSET_REF_AND_RANGE(fontset, c, form, to) \
+ (EQ (fontset, Vdefault_fontset) \
+ ? char_table_ref_and_range (fontset, c, &from, &to) \
+ : fontset_ref_and_range (fontset, c, &from, &to))
+
static Lisp_Object
-lookup_overriding_fontspec (frame, c)
- Lisp_Object frame;
+fontset_ref_and_range (fontset, c, from, to)
+ Lisp_Object fontset;
int c;
+ int *from, *to;
{
- Lisp_Object tail;
+ Lisp_Object elt;
- for (tail = Voverriding_fontspec_alist; CONSP (tail); tail = XCDR (tail))
+ elt = char_table_ref_and_range (fontset, c, from, to);
+ if (NILP (elt) && ! EQ (fontset, Vdefault_fontset)
+ /* Don't check Vdefault_fontset for a realized fontset. */
+ && NILP (FONTSET_BASE (fontset)))
{
- Lisp_Object val, target, elt;
-
- val = XCAR (tail);
- target = XCAR (val);
- val = XCDR (val);
- /* Now VAL is (NO-FRAME-LIST OK-FRAME-LIST CHAR FONTNAME). */
- if (NILP (Fmemq (frame, XCAR (val)))
- && (CHAR_TABLE_P (target)
- ? ! NILP (CHAR_TABLE_REF (target, c))
- : XINT (target) == CHAR_CHARSET (c)))
- {
- val = XCDR (val);
- elt = XCDR (val);
- if (NILP (Fmemq (frame, XCAR (val))))
- {
- if (! face_font_available_p (XFRAME (frame), XCDR (elt)))
- {
- val = XCDR (XCAR (tail));
- XSETCAR (val, Fcons (frame, XCAR (val)));
- continue;
- }
- XSETCAR (val, Fcons (frame, XCAR (val)));
- }
- if (NILP (XCAR (elt)))
- XSETCAR (elt, make_number (c));
- return elt;
- }
+ int from1, to1;
+
+ elt = char_table_ref_and_range (Vdefault_fontset, c, &from1, &to1);
+ if (*from < from1)
+ *from = from1;
+ if (*to > to1)
+ *to = to1;
}
- return Qnil;
+ return elt;
}
-#define FONTSET_REF_VIA_BASE(fontset, c) fontset_ref_via_base (fontset, &c)
+
+/* Set elements of FONTSET for characters in RANGE to the value ELT.
+ RANGE is a cons (FROM . TO), where FROM and TO are character codes
+ specifying a range. */
+
+#define FONTSET_SET(fontset, range, elt) \
+ Fset_char_table_range ((fontset), (range), (elt))
+
+
+/* Modify the elements of FONTSET for characters in RANGE by replacing
+ with ELT or adding ELT. RANGE is a cons (FROM . TO), where FROM
+ and TO are character codes specifying a range. If ADD is nil,
+ replace with ELT, if ADD is `prepend', prepend ELT, otherwise,
+ append ELT. */
+
+#define FONTSET_ADD(fontset, range, elt, add) \
+ (NILP (add) \
+ ? (NILP (range) \
+ ? (FONTSET_FALLBACK (fontset) = Fmake_vector (make_number (1), (elt))) \
+ : Fset_char_table_range ((fontset), (range), \
+ Fmake_vector (make_number (1), (elt)))) \
+ : fontset_add ((fontset), (range), (elt), (add)))
static Lisp_Object
-fontset_ref_via_base (fontset, c)
- Lisp_Object fontset;
- int *c;
+fontset_add (fontset, range, elt, add)
+ Lisp_Object fontset, range, elt, add;
{
- int charset, c1, c2;
- Lisp_Object elt;
+ Lisp_Object args[2];
+ int idx = (EQ (add, Qappend) ? 0 : 1);
- if (SINGLE_BYTE_CHAR_P (*c))
- return FONTSET_ASCII (fontset);
-
- elt = Qnil;
- if (! EQ (FONTSET_BASE (fontset), Vdefault_fontset))
- elt = FONTSET_REF (FONTSET_BASE (fontset), *c);
- if (NILP (elt))
- elt = lookup_overriding_fontspec (FONTSET_FRAME (fontset), *c);
- if (NILP (elt))
- elt = FONTSET_REF (Vdefault_fontset, *c);
- if (NILP (elt))
- return Qnil;
+ args[1 - idx] = Fmake_vector (make_number (1), elt);
- *c = XINT (XCAR (elt));
- SPLIT_CHAR (*c, charset, c1, c2);
- elt = XCHAR_TABLE (fontset)->contents[charset + 128];
- if (c1 < 32)
- return (SUB_CHAR_TABLE_P (elt) ? XCHAR_TABLE (elt)->defalt : elt);
- if (!SUB_CHAR_TABLE_P (elt))
- return Qnil;
- elt = XCHAR_TABLE (elt)->contents[c1];
- if (c2 < 32)
- return (SUB_CHAR_TABLE_P (elt) ? XCHAR_TABLE (elt)->defalt : elt);
- if (!SUB_CHAR_TABLE_P (elt))
- return Qnil;
- elt = XCHAR_TABLE (elt)->contents[c2];
- return elt;
+ if (CONSP (range))
+ {
+ int from = XINT (XCAR (range));
+ int to = XINT (XCDR (range));
+ int from1, to1;
+
+ do {
+ args[idx] = char_table_ref_and_range (fontset, from, &from1, &to1);
+ if (to < to1)
+ to1 = to;
+ char_table_set_range (fontset, from, to1,
+ NILP (args[idx]) ? args[1 - idx]
+ : Fvconcat (2, args));
+ from = to1 + 1;
+ } while (from < to);
+ }
+ else
+ {
+ args[idx] = FONTSET_FALLBACK (fontset);
+ FONTSET_FALLBACK (fontset)
+ = NILP (args[idx]) ? args[1 - idx] : Fvconcat (2, args);
+ }
+ return Qnil;
}
-/* Store into the element of FONTSET at index C the value NEWELT. */
-#define FONTSET_SET(fontset, c, newelt) fontset_set(fontset, c, newelt)
+/* Update FONTSET_ELEMENT which has this form:
+ [CHARSET-ORDERED-LIST-TICK PREFERRED-CHARSET-ID PREFERRED-RFONT-DEF
+ RFONT-DEF0 RFONT-DEF1 ...].
+ Reorder RFONT-DEFs according to the current order of charset
+ (Vcharset_ordered_list), and update CHARSET-ORDERED-LIST-TICK to
+ the latest value. */
static void
-fontset_set (fontset, c, newelt)
+reorder_font_vector (fontset_element)
+ Lisp_Object fontset_element;
+{
+ Lisp_Object list, *new_vec;
+ Lisp_Object font_def;
+ int size;
+ int *charset_id_table;
+ int i, idx;
+
+ ASET (fontset_element, 0, make_number (charset_ordered_list_tick));
+ size = ASIZE (fontset_element) - 3;
+ if (size <= 1)
+ /* No need to reorder VEC. */
+ return;
+ charset_id_table = (int *) alloca (sizeof (int) * size);
+ new_vec = (Lisp_Object *) alloca (sizeof (Lisp_Object) * size);
+
+ /* At first, extract ENCODING (a chaset ID) from each FONT-DEF.
+ FONT-DEF has this form:
+ [FACE-ID FONT-INDEX [ FONT-SPEC ENCODING REPERTORY ]] */
+ for (i = 0; i < size; i++)
+ {
+ font_def = AREF (fontset_element, i + 3);
+ if (! NILP (AREF (font_def, 2)))
+ charset_id_table[i] = XINT (AREF (AREF (font_def, 2), 1));
+ else
+ charset_id_table[i] = -1;
+ }
+
+ /* Then, store FONT-DEFs in NEW_VEC in the correct order. */
+ for (idx = 0, list = Vcharset_ordered_list;
+ idx < size && CONSP (list); list = XCDR (list))
+ {
+ for (i = 0; i < size; i++)
+ if (charset_id_table[i] == XINT (XCAR (list)))
+ new_vec[idx++] = AREF (fontset_element, i + 3);
+ }
+ for (i = 0; i < size; i++)
+ if (charset_id_table[i] < 0)
+ new_vec[idx++] = AREF (fontset_element, i + 3);
+
+ /* At last, update FONT-DEFs. */
+ for (i = 0; i < size; i++)
+ ASET (fontset_element, i + 3, new_vec[i]);
+}
+
+
+/* Load a font matching the font related attributes in FACE->lface and
+ font pattern in FONT_DEF of FONTSET, and return an index of the
+ font. FONT_DEF has this form:
+ [ FONT-SPEC ENCODING REPERTORY ]
+ If REPERTORY is nil, generate a char-table representing the font
+ repertory by looking into the font itself. */
+
+static int
+load_font_get_repertory (f, face, font_def, fontset)
+ FRAME_PTR f;
+ struct face *face;
+ Lisp_Object font_def;
+ Lisp_Object fontset;
+{
+ char *font_name;
+ struct font_info *font_info;
+ int charset;
+
+ font_name = choose_face_font (f, face->lface, AREF (font_def, 0), NULL);
+ charset = XINT (AREF (font_def, 1));
+ if (! (font_info = fs_load_font (f, font_name, charset)))
+ return -1;
+
+ if (NILP (AREF (font_def, 2))
+ && NILP (Fassq (make_number (font_info->font_idx),
+ FONTSET_REPERTORY (fontset))))
+ {
+ /* We must look into the font to get the correct repertory as a
+ char-table. */
+ Lisp_Object repertory;
+
+ repertory = (*get_font_repertory_func) (f, font_info);
+ FONTSET_REPERTORY (fontset)
+ = Fcons (Fcons (make_number (font_info->font_idx), repertory),
+ FONTSET_REPERTORY (fontset));
+ }
+
+ return font_info->font_idx;
+}
+
+
+/* Return RFONT-DEF (vector) in the realized fontset FONTSET for the
+ character C. If the corresponding font is not yet opened, open it
+ (if FACE is not NULL) or return Qnil (if FACE is NULL).
+ If no proper font is found for C, return Qnil. */
+
+static Lisp_Object
+fontset_font (fontset, c, face, id)
Lisp_Object fontset;
int c;
- Lisp_Object newelt;
+ struct face *face;
+ int id;
{
- int charset, code[3];
- Lisp_Object *elt;
- int i;
+ Lisp_Object base_fontset, elt, vec;
+ int i, from, to;
+ int font_idx;
+ FRAME_PTR f = XFRAME (FONTSET_FRAME (fontset));
+
+ base_fontset = FONTSET_BASE (fontset);
+ vec = CHAR_TABLE_REF (fontset, c);
+ if (EQ (vec, Qt))
+ goto try_fallback;
- if (SINGLE_BYTE_CHAR_P (c))
+ if (NILP (vec))
{
- FONTSET_ASCII (fontset) = newelt;
- return;
+ /* We have not yet decided a face for C. */
+ Lisp_Object range;
+
+ if (! face)
+ return Qnil;
+ elt = FONTSET_REF_AND_RANGE (base_fontset, c, from, to);
+ range = Fcons (make_number (from), make_number (to));
+ if (NILP (elt))
+ {
+ /* Record that we have no font for characters of this
+ range. */
+ vec = Qt;
+ FONTSET_SET (fontset, range, vec);
+ goto try_fallback;
+ }
+ /* Build a vector [ -1 -1 nil NEW-ELT0 NEW-ELT1 NEW-ELT2 ... ],
+ where the first -1 is to force reordering of NEW-ELTn,
+ NEW-ETLn is [nil nil AREF (elt, n) nil]. */
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend
+ && EQ (base_fontset, Vdefault_fontset))
+ vec = Fmake_vector (make_number (ASIZE (elt) + 4), make_number (-1));
+ else
+#endif /* not USE_FONT_BACKEND */
+ vec = Fmake_vector (make_number (ASIZE (elt) + 3), make_number (-1));
+ ASET (vec, 2, Qnil);
+ for (i = 0; i < ASIZE (elt); i++)
+ {
+ Lisp_Object tmp;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ tmp = Fmake_vector (make_number (5), Qnil);
+ else
+#endif /* USE_FONT_BACKEND */
+ tmp = Fmake_vector (make_number (4), Qnil);
+ ASET (tmp, 2, AREF (elt, i));
+ ASET (vec, 3 + i, tmp);
+ }
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend
+ && EQ (base_fontset, Vdefault_fontset))
+ {
+ Lisp_Object script, font_spec, tmp;
+
+ script = CHAR_TABLE_REF (Vchar_script_table, c);
+ if (NILP (script))
+ script = intern ("latin");
+ font_spec = Ffont_spec (0, NULL);
+ ASET (font_spec, FONT_REGISTRY_INDEX, Qiso10646_1);
+ ASET (font_spec, FONT_EXTRA_INDEX,
+ Fcons (Fcons (QCscript, script), Qnil));
+ tmp = Fmake_vector (make_number (5), Qnil);
+ ASET (tmp, 3, font_spec);
+ ASET (vec, 3 + i, tmp);
+ }
+#endif /* USE_FONT_BACKEND */
+
+ /* Then store it in the fontset. */
+ FONTSET_SET (fontset, range, vec);
}
- SPLIT_CHAR (c, charset, code[0], code[1]);
- code[2] = 0; /* anchor */
- elt = &XCHAR_TABLE (fontset)->contents[charset + 128];
- for (i = 0; code[i] > 0; i++)
+ retry:
+ if (XINT (AREF (vec, 0)) != charset_ordered_list_tick)
+ /* The priority of charsets is changed after we selected a face
+ for C last time. */
+ reorder_font_vector (vec);
+
+ if (id < 0)
+ i = 3;
+ else if (id == XFASTINT (AREF (vec, 1)))
+ i = 2;
+ else
{
- if (!SUB_CHAR_TABLE_P (*elt))
+ ASET (vec, 1, make_number (id));
+ for (i = 3; i < ASIZE (vec); i++)
+ if (id == XFASTINT (AREF (AREF (AREF (vec, i), 2), 1)))
+ break;
+ if (i < ASIZE (vec))
{
- Lisp_Object val = *elt;
- *elt = make_sub_char_table (Qnil);
- XCHAR_TABLE (*elt)->defalt = val;
+ ASET (vec, 2, AREF (vec, i));
+ i = 2;
+ }
+ else
+ {
+ ASET (vec, 2, Qnil);
+ i = 3;
}
- elt = &XCHAR_TABLE (*elt)->contents[code[i]];
}
- if (SUB_CHAR_TABLE_P (*elt))
- XCHAR_TABLE (*elt)->defalt = newelt;
- else
- *elt = newelt;
+
+ /* Find the first available font in the vector of RFONT-DEF. */
+ for (; i < ASIZE (vec); i++)
+ {
+ Lisp_Object font_def;
+
+ elt = AREF (vec, i);
+ if (NILP (elt))
+ continue;
+ /* ELT == [ FACE-ID FONT-INDEX FONT-DEF OPENED-FONT-NAME ] */
+ if (INTEGERP (AREF (elt, 1)) && XINT (AREF (elt, 1)) < 0)
+ /* We couldn't open this font last time. */
+ continue;
+
+ if (!face && NILP (AREF (elt, 1)))
+ /* We have not yet opened the font. */
+ return Qnil;
+
+ font_def = AREF (elt, 2);
+ /* FONT_DEF == [ FONT-SPEC ENCODING REPERTORY ] */
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ /* ELT == [ FACE-ID FONT-INDEX FONT-DEF FONT-ENTITY FONT-OBJECT ] */
+ Lisp_Object font_entity = AREF (elt, 3);
+ Lisp_Object font_object = AREF (elt, 4);
+ int has_char;
+
+ if (NILP (font_entity) && ! NILP (AREF (font_def, 0)))
+ {
+ Lisp_Object tmp = AREF (font_def, 0);
+ Lisp_Object spec = Ffont_spec (0, NULL);
+
+ if (STRINGP (tmp))
+ font_merge_old_spec (tmp, Qnil, Qnil, spec);
+ else
+ {
+ Lisp_Object family = AREF (tmp, 0);
+ Lisp_Object registry = AREF (tmp, 5);;
+
+ font_merge_old_spec (Qnil, family, registry, spec);
+ }
+ font_entity = font_find_for_lface (f, face->lface, spec);
+ ASET (elt, 3, font_entity);
+ }
+ else if (FONT_SPEC_P (font_entity))
+ {
+ font_entity = font_find_for_lface (f, face->lface, font_entity);
+ ASET (elt, 3, font_entity);
+ }
+ if (NILP (font_entity))
+ {
+ ASET (elt, 1, make_number (-1));
+ continue;
+ }
+ has_char = font_has_char (f, font_entity, c);
+ if (has_char == 0)
+ continue;
+ if (NILP (font_object))
+ font_object = font_open_for_lface (f, face->lface, font_entity);
+ if (NILP (font_object))
+ {
+ ASET (elt, 1, make_number (-1));
+ continue;
+ }
+ ASET (elt, 1, make_number (0));
+ ASET (elt, 4, font_object);
+ if (has_char < 0
+ && font_encode_char (font_object, c) == FONT_INVALID_CODE)
+ continue;
+ }
+ else
+#endif /* USE_FONT_BACKEND */
+
+ if (INTEGERP (AREF (font_def, 2)))
+ {
+ /* The repertory is specified by charset ID. */
+ struct charset *charset
+ = CHARSET_FROM_ID (XINT (AREF (font_def, 2)));
+
+ if (! CHAR_CHARSET_P (c, charset))
+ /* This font can't display C. */
+ continue;
+ }
+ else if (CHAR_TABLE_P (AREF (font_def, 2)))
+ {
+ /* The repertory is specified by a char table. */
+ if (NILP (CHAR_TABLE_REF (AREF (font_def, 2), c)))
+ /* This font can't display C. */
+ continue;
+ }
+ else
+ {
+ Lisp_Object slot;
+
+ if (! INTEGERP (AREF (elt, 1)))
+ {
+ /* We have not yet opened a font matching this spec.
+ Open the best matching font now and register the
+ repertory. */
+ struct font_info *font_info;
+
+ font_idx = load_font_get_repertory (f, face, font_def, fontset);
+ ASET (elt, 1, make_number (font_idx));
+ if (font_idx < 0)
+ /* This means that we couldn't find a font matching
+ FONT_DEF. */
+ continue;
+ font_info = (*get_font_info_func) (f, font_idx);
+ ASET (elt, 3, build_string (font_info->full_name));
+ }
+
+ slot = Fassq (AREF (elt, 1), FONTSET_REPERTORY (fontset));
+ xassert (CONSP (slot));
+ if (NILP (CHAR_TABLE_REF (XCDR (slot), c)))
+ /* This font can't display C. */
+ continue;
+ }
+
+ /* Now we have decided to use this font spec to display C. */
+ if (! INTEGERP (AREF (elt, 1)))
+ {
+ /* But not yet opened the best matching font. */
+ struct font_info *font_info;
+
+ font_idx = load_font_get_repertory (f, face, font_def, fontset);
+ ASET (elt, 1, make_number (font_idx));
+ if (font_idx < 0)
+ /* Can't open it. Try the other one. */
+ continue;
+ font_info = (*get_font_info_func) (f, font_idx);
+ ASET (elt, 3, build_string (font_info->full_name));
+ }
+
+ /* Now we have the opened font. */
+ return elt;
+ }
+
+ try_fallback:
+ if (! EQ (vec, FONTSET_FALLBACK (fontset)))
+ {
+ vec = FONTSET_FALLBACK (fontset);
+ if (VECTORP (vec))
+ goto retry;
+ if (EQ (vec, Qt))
+ goto try_default;
+ elt = FONTSET_FALLBACK (base_fontset);
+ if (! NILP (elt))
+ {
+ vec = Fmake_vector (make_number (ASIZE (elt) + 3), make_number (-1));
+ ASET (vec, 2, Qnil);
+ for (i = 0; i < ASIZE (elt); i++)
+ {
+ Lisp_Object tmp;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ tmp = Fmake_vector (make_number (5), Qnil);
+ else
+#endif /* USE_FONT_BACKEND */
+ tmp = Fmake_vector (make_number (4), Qnil);
+ ASET (tmp, 2, AREF (elt, i));
+ ASET (vec, 3 + i, tmp);
+ }
+ FONTSET_FALLBACK (fontset) = vec;
+ goto retry;
+ }
+ /* Record that this fontset has no fallback fonts. */
+ FONTSET_FALLBACK (fontset) = Qt;
+ }
+
+ /* Try the default fontset. */
+ try_default:
+ if (! EQ (base_fontset, Vdefault_fontset))
+ {
+ if (NILP (FONTSET_DEFAULT (fontset)))
+ FONTSET_DEFAULT (fontset)
+ = make_fontset (FONTSET_FRAME (fontset), Qnil, Vdefault_fontset);
+ return fontset_font (FONTSET_DEFAULT (fontset), c, face, id);
+ }
+ return Qnil;
}
/* Return a newly created fontset with NAME. If BASE is nil, make a
- base fontset. Otherwise make a realized fontset whose parent is
+ base fontset. Otherwise make a realized fontset whose base is
BASE. */
static Lisp_Object
@@ -389,10 +846,11 @@ make_fontset (frame, name, base)
if (id + 1 == size)
{
+ /* We must grow Vfontset_table. */
Lisp_Object tem;
int i;
- tem = Fmake_vector (make_number (size + 8), Qnil);
+ tem = Fmake_vector (make_number (size + 32), Qnil);
for (i = 0; i < size; i++)
AREF (tem, i) = AREF (Vfontset_table, i);
Vfontset_table = tem;
@@ -401,98 +859,100 @@ make_fontset (frame, name, base)
fontset = Fmake_char_table (Qfontset, Qnil);
FONTSET_ID (fontset) = make_number (id);
- FONTSET_NAME (fontset) = name;
- FONTSET_FRAME (fontset) = frame;
- FONTSET_BASE (fontset) = base;
+ if (NILP (base))
+ {
+ FONTSET_NAME (fontset) = name;
+ }
+ else
+ {
+ FONTSET_NAME (fontset) = Qnil;
+ FONTSET_FRAME (fontset) = frame;
+ FONTSET_BASE (fontset) = base;
+ }
- AREF (Vfontset_table, id) = fontset;
+ ASET (Vfontset_table, id, fontset);
next_fontset_id = id + 1;
return fontset;
}
-/* Return 1 if ID is a valid fontset id, else return 0. */
-
-static INLINE int
-fontset_id_valid_p (id)
- int id;
-{
- return (id >= 0 && id < ASIZE (Vfontset_table) - 1);
-}
-
-
-/* Extract `family' and `registry' string from FONTNAME and a cons of
- them. Actually, `family' may also contain `foundry', `registry'
- may also contain `encoding' of FONTNAME. But, if FONTNAME doesn't
- conform to XLFD nor explicitely specifies the other fields
- (i.e. not using wildcard `*'), return FONTNAME. If FORCE is
- nonzero, specifications of the other fields are ignored, and return
- a cons as far as FONTNAME conform to XLFD. */
-
-static Lisp_Object
-font_family_registry (fontname, force)
+/* Set the ASCII font of the default fontset to FONTNAME if that is
+ not yet set. */
+void
+set_default_ascii_font (fontname)
Lisp_Object fontname;
- int force;
{
- Lisp_Object family, registry;
- const char *p = SDATA (fontname);
- const char *sep[15];
- int i = 0;
-
- while (*p && i < 15)
- if (*p++ == '-')
- {
- if (!force && i >= 2 && i <= 11 && *p != '*' && p[1] != '-')
- return fontname;
- sep[i++] = p;
- }
- if (i != 14)
- return fontname;
+ if (! STRINGP (FONTSET_ASCII (Vdefault_fontset)))
+ {
+ int id = fs_query_fontset (fontname, 2);
- family = make_unibyte_string (sep[0], sep[2] - 1 - sep[0]);
- registry = make_unibyte_string (sep[12], p - sep[12]);
- return Fcons (family, registry);
+ if (id >= 0)
+ fontname = FONTSET_ASCII (FONTSET_FROM_ID (id));
+ FONTSET_ASCII (Vdefault_fontset)= fontname;
+ }
}
-/********** INTERFACES TO xfaces.c and dispextern.h **********/
+/********** INTERFACES TO xfaces.c, xfns.c, and dispextern.h **********/
-/* Return name of the fontset with ID. */
+/* Return the name of the fontset who has ID. */
Lisp_Object
fontset_name (id)
int id;
{
Lisp_Object fontset;
+
fontset = FONTSET_FROM_ID (id);
return FONTSET_NAME (fontset);
}
-/* Return ASCII font name of the fontset with ID. */
+/* Return the ASCII font name of the fontset who has ID. */
Lisp_Object
fontset_ascii (id)
int id;
{
Lisp_Object fontset, elt;
+
fontset= FONTSET_FROM_ID (id);
elt = FONTSET_ASCII (fontset);
- return XCDR (elt);
+#ifdef USE_FONT_BACKEND
+ if (CONSP (elt))
+ elt = XCAR (elt);
+#endif /* USE_FONT_BACKEND */
+ /* It is assured that ELT is always a string (i.e. fontname
+ pattern). */
+ return elt;
}
-/* Free fontset of FACE. Called from free_realized_face. */
+/* Free fontset of FACE defined on frame F. Called from
+ free_realized_face. */
void
free_face_fontset (f, face)
FRAME_PTR f;
struct face *face;
{
- if (fontset_id_valid_p (face->fontset))
+ Lisp_Object fontset;
+
+ fontset = AREF (Vfontset_table, face->fontset);
+ xassert (!NILP (fontset) && ! BASE_FONTSET_P (fontset));
+ xassert (f == XFRAME (FONTSET_FRAME (fontset)));
+ ASET (Vfontset_table, face->fontset, Qnil);
+ if (face->fontset < next_fontset_id)
+ next_fontset_id = face->fontset;
+ if (! NILP (FONTSET_DEFAULT (fontset)))
{
- AREF (Vfontset_table, face->fontset) = Qnil;
- if (face->fontset < next_fontset_id)
+ int id = XINT (FONTSET_ID (FONTSET_DEFAULT (fontset)));
+
+ fontset = AREF (Vfontset_table, id);
+ xassert (!NILP (fontset) && ! BASE_FONTSET_P (fontset));
+ xassert (f == XFRAME (FONTSET_FRAME (fontset)));
+ ASET (Vfontset_table, id, Qnil);
+ if (id < next_fontset_id)
next_fontset_id = face->fontset;
}
}
@@ -500,57 +960,85 @@ free_face_fontset (f, face)
/* Return 1 iff FACE is suitable for displaying character C.
Otherwise return 0. Called from the macro FACE_SUITABLE_FOR_CHAR_P
- when C is not a single byte character.. */
+ when C is not an ASCII character. */
int
face_suitable_for_char_p (face, c)
struct face *face;
int c;
{
- Lisp_Object fontset, elt;
+ Lisp_Object fontset, rfont_def;
- if (SINGLE_BYTE_CHAR_P (c))
- return (face == face->ascii_face);
-
- xassert (fontset_id_valid_p (face->fontset));
fontset = FONTSET_FROM_ID (face->fontset);
- xassert (!BASE_FONTSET_P (fontset));
-
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- return (!NILP (elt) && face->id == XFASTINT (elt));
+ rfont_def = fontset_font (fontset, c, NULL, -1);
+ return (VECTORP (rfont_def)
+ && INTEGERP (AREF (rfont_def, 0))
+ && face->id == XINT (AREF (rfont_def, 0)));
}
/* Return ID of face suitable for displaying character C on frame F.
- The selection of face is done based on the fontset of FACE. FACE
- should already have been realized for ASCII characters. Called
- from the macro FACE_FOR_CHAR when C is not a single byte character. */
+ FACE must be reazlied for ASCII characters in advance. Called from
+ the macro FACE_FOR_CHAR. */
int
-face_for_char (f, face, c)
+face_for_char (f, face, c, pos, object)
FRAME_PTR f;
struct face *face;
- int c;
+ int c, pos;
+ Lisp_Object object;
{
- Lisp_Object fontset, elt;
+ Lisp_Object fontset, charset, rfont_def;
int face_id;
+ int id;
+
+ if (ASCII_CHAR_P (c))
+ return face->ascii_face->id;
xassert (fontset_id_valid_p (face->fontset));
fontset = FONTSET_FROM_ID (face->fontset);
xassert (!BASE_FONTSET_P (fontset));
+ if (pos < 0)
+ id = -1;
+ else
+ {
+ charset = Fget_char_property (make_number (pos), Qcharset, object);
+ if (NILP (charset))
+ id = -1;
+ else if (CHARSETP (charset))
+ id = XINT (CHARSET_SYMBOL_ID (charset));
+ }
+ rfont_def = fontset_font (fontset, c, face, id);
+ if (VECTORP (rfont_def))
+ {
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend
+ && NILP (AREF (rfont_def, 0)))
+ {
+ struct font *font = XSAVE_VALUE (AREF (rfont_def, 4))->pointer;
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- if (!NILP (elt))
- return XINT (elt);
+ face_id = face_for_font (f, font, face);
+ ASET (rfont_def, 0, make_number (face_id));
+ }
+ else
+#endif /* USE_FONT_BACKEND */
+ if (NILP (AREF (rfont_def, 0)))
+ {
+ /* We have not yet made a realized face that uses this font. */
+ int font_idx = XINT (AREF (rfont_def, 1));
- /* No face is recorded for C in the fontset of FACE. Make a new
- realized face for C that has the same fontset. */
- face_id = lookup_face (f, face->lface, c, face);
+ face_id = lookup_non_ascii_face (f, font_idx, face);
+ ASET (rfont_def, 0, make_number (face_id));
+ }
+ return XINT (AREF (rfont_def, 0));
+ }
- /* Record the face ID in FONTSET at the same index as the
- information in the base fontset. */
- FONTSET_SET (fontset, c, make_number (face_id));
- return face_id;
+ if (NILP (FONTSET_NOFONT_FACE (fontset)))
+ {
+ face_id = lookup_non_ascii_face (f, -1, face);
+ FONTSET_NOFONT_FACE (fontset) = make_number (face_id);
+ }
+ return XINT (FONTSET_NOFONT_FACE (fontset));
}
@@ -560,9 +1048,10 @@ face_for_char (f, face, c)
Called from realize_x_face. */
int
-make_fontset_for_ascii_face (f, base_fontset_id)
+make_fontset_for_ascii_face (f, base_fontset_id, face)
FRAME_PTR f;
int base_fontset_id;
+ struct face *face;
{
Lisp_Object base_fontset, fontset, frame;
@@ -573,69 +1062,44 @@ make_fontset_for_ascii_face (f, base_fontset_id)
if (!BASE_FONTSET_P (base_fontset))
base_fontset = FONTSET_BASE (base_fontset);
xassert (BASE_FONTSET_P (base_fontset));
+ if (! BASE_FONTSET_P (base_fontset))
+ abort ();
}
else
base_fontset = Vdefault_fontset;
fontset = make_fontset (frame, Qnil, base_fontset);
- return XINT (FONTSET_ID (fontset));
-}
-
-
-/* Return the font name pattern for C that is recorded in the fontset
- with ID. If a font name pattern is specified (instead of a cons of
- family and registry), check if a font can be opened by that pattern
- to get the fullname. If a font is opened, return that name.
- Otherwise, return nil. If ID is -1, or the fontset doesn't contain
- information about C, get the registry and encoding of C from the
- default fontset. Called from choose_face_font. */
+ {
+ Lisp_Object elt, rfont_def;
-Lisp_Object
-fontset_font_pattern (f, id, c)
- FRAME_PTR f;
- int id, c;
-{
- Lisp_Object fontset, elt;
- struct font_info *fontp;
+ elt = FONTSET_REF (base_fontset, 0);
+ xassert (VECTORP (elt) && ASIZE (elt) > 0);
+#ifdef USE_FONT_BACKEND
+ rfont_def = Fmake_vector (make_number (5), Qnil);
+ if (enable_font_backend && face->font_info)
+ {
+ struct font *font = (struct font *) face->font_info;
- elt = Qnil;
- if (fontset_id_valid_p (id))
- {
- fontset = FONTSET_FROM_ID (id);
- xassert (!BASE_FONTSET_P (fontset));
- fontset = FONTSET_BASE (fontset);
- if (! EQ (fontset, Vdefault_fontset))
- elt = FONTSET_REF (fontset, c);
- }
- if (NILP (elt))
+ ASET (rfont_def, 3, font->entity);
+ ASET (rfont_def, 4, font_find_object (font));
+ }
+ else
+#endif /* USE_FONT_BACKEND */
{
- Lisp_Object frame;
-
- XSETFRAME (frame, f);
- elt = lookup_overriding_fontspec (frame, c);
+ rfont_def = Fmake_vector (make_number (4), Qnil);
+ ASET (rfont_def, 3, build_string (face->font_name));
}
- if (NILP (elt))
- elt = FONTSET_REF (Vdefault_fontset, c);
-
- if (!CONSP (elt))
- return Qnil;
- if (CONSP (XCDR (elt)))
- return XCDR (elt);
-
- /* The fontset specifies only a font name pattern (not cons of
- family and registry). If a font can be opened by that pattern,
- return the name of opened font. Otherwise return nil. The
- exception is a font for single byte characters. In that case, we
- return a cons of FAMILY and REGISTRY extracted from the opened
- font name. */
- elt = XCDR (elt);
- xassert (STRINGP (elt));
- fontp = FS_LOAD_FONT (f, c, SDATA (elt), -1);
- if (!fontp)
- return Qnil;
-
- return font_family_registry (build_string (fontp->full_name),
- SINGLE_BYTE_CHAR_P (c));
+ ASET (rfont_def, 0, make_number (face->id));
+ ASET (rfont_def, 1, make_number (face->font_info_id));
+ ASET (rfont_def, 2, AREF (elt, 0));
+ elt = Fmake_vector (make_number (4), Qnil);
+ ASET (elt, 0, make_number (charset_ordered_list_tick));
+ ASET (elt, 1, make_number (charset_ascii));
+ ASET (elt, 2, rfont_def);
+ ASET (elt, 3, rfont_def);
+ char_table_set_range (fontset, 0, 127, elt);
+ }
+ return XINT (FONTSET_ID (fontset));
}
@@ -643,125 +1107,57 @@ fontset_font_pattern (f, id, c)
#pragma optimize("", off)
#endif
-/* Load a font named FONTNAME to display character C on frame F.
- Return a pointer to the struct font_info of the loaded font. If
- loading fails, return NULL. If FACE is non-zero and a fontset is
- assigned to it, record FACE->id in the fontset for C. If FONTNAME
- is NULL, the name is taken from the fontset of FACE or what
- specified by ID. */
+/* Load a font named FONTNAME on frame F. Return a pointer to the
+ struct font_info of the loaded font. If loading fails, return
+ NULL. CHARSET is an ID of charset to encode characters for this
+ font. If it is -1, find one from Vfont_encoding_alist. */
struct font_info *
-fs_load_font (f, c, fontname, id, face)
+fs_load_font (f, fontname, charset)
FRAME_PTR f;
- int c;
char *fontname;
- int id;
- struct face *face;
+ int charset;
{
- Lisp_Object fontset;
- Lisp_Object list, elt, fullname;
- int size = 0;
struct font_info *fontp;
- int charset = CHAR_CHARSET (c);
-
- if (face)
- id = face->fontset;
- if (id < 0)
- fontset = Qnil;
- else
- fontset = FONTSET_FROM_ID (id);
-
- if (!NILP (fontset)
- && !BASE_FONTSET_P (fontset))
- {
- elt = FONTSET_REF_VIA_BASE (fontset, c);
- if (!NILP (elt))
- {
- /* A suitable face for C is already recorded, which means
- that a proper font is already loaded. */
- int face_id = XINT (elt);
-
- xassert (face_id == face->id);
- face = FACE_FROM_ID (f, face_id);
- return (*get_font_info_func) (f, face->font_info_id);
- }
-
- if (!fontname && charset == CHARSET_ASCII)
- {
- elt = FONTSET_ASCII (fontset);
- fontname = SDATA (XCDR (elt));
- }
- }
+ Lisp_Object fullname;
if (!fontname)
/* No way to get fontname. */
- return 0;
-
- fontp = (*load_font_func) (f, fontname, size);
- if (!fontp)
- return 0;
+ return NULL;
- /* Fill in members (charset, vertical_centering, encoding, etc) of
- font_info structure that are not set by (*load_font_func). */
- fontp->charset = charset;
+ fontp = (*load_font_func) (f, fontname, 0);
+ if (! fontp || fontp->charset >= 0)
+ return fontp;
+ fontname = fontp->full_name;
fullname = build_string (fontp->full_name);
- fontp->vertical_centering
- = (STRINGP (Vvertical_centering_font_regexp)
- && (fast_string_match_ignore_case
- (Vvertical_centering_font_regexp, fullname) >= 0));
- if (fontp->encoding[1] != FONT_ENCODING_NOT_DECIDED)
+ if (charset < 0)
{
- /* The font itself tells which code points to be used. Use this
- encoding for all other charsets. */
- int i;
-
- fontp->encoding[0] = fontp->encoding[1];
- for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i <= MAX_CHARSET; i++)
- fontp->encoding[i] = fontp->encoding[1];
+ Lisp_Object charset_symbol;
+
+ charset_symbol = find_font_encoding (fullname);
+ if (CONSP (charset_symbol))
+ charset_symbol = XCAR (charset_symbol);
+ if (NILP (charset_symbol))
+ charset_symbol = Qascii;
+ charset = XINT (CHARSET_SYMBOL_ID (charset_symbol));
}
- else
- {
- /* The font itself doesn't have information about encoding. */
- int i;
+ fontp->charset = charset;
+ fontp->vertical_centering = 0;
+ fontp->font_encoder = NULL;
- /* By default, encoding of ASCII chars is 0 (i.e. 0x00..0x7F),
- others is 1 (i.e. 0x80..0xFF). */
- fontp->encoding[0] = 0;
- for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; i <= MAX_CHARSET; i++)
- fontp->encoding[i] = 1;
- /* Then override them by a specification in Vfont_encoding_alist. */
- for (list = Vfont_encoding_alist; CONSP (list); list = XCDR (list))
- {
- elt = XCAR (list);
- if (CONSP (elt)
- && STRINGP (XCAR (elt)) && CONSP (XCDR (elt))
- && (fast_string_match_ignore_case (XCAR (elt), fullname) >= 0))
- {
- Lisp_Object tmp;
+ if (charset != charset_ascii)
+ {
+ fontp->vertical_centering
+ = (STRINGP (Vvertical_centering_font_regexp)
+ && (fast_string_match_ignore_case
+ (Vvertical_centering_font_regexp, fullname) >= 0));
- for (tmp = XCDR (elt); CONSP (tmp); tmp = XCDR (tmp))
- if (CONSP (XCAR (tmp))
- && ((i = get_charset_id (XCAR (XCAR (tmp))))
- >= 0)
- && INTEGERP (XCDR (XCAR (tmp)))
- && XFASTINT (XCDR (XCAR (tmp))) < 4)
- fontp->encoding[i]
- = XFASTINT (XCDR (XCAR (tmp)));
- }
- }
+ if (find_ccl_program_func)
+ (*find_ccl_program_func) (fontp);
}
- if (! fontp->font_encoder && find_ccl_program_func)
- (*find_ccl_program_func) (fontp);
-
- /* If we loaded a font for a face that has fontset, record the face
- ID in the fontset for C. */
- if (face
- && !NILP (fontset)
- && !BASE_FONTSET_P (fontset))
- FONTSET_SET (fontset, c, make_number (face->id));
return fontp;
}
@@ -769,24 +1165,34 @@ fs_load_font (f, c, fontname, id, face)
#pragma optimize("", on)
#endif
-/* Set the ASCII font of the default fontset to FONTNAME if that is
- not yet set. */
-void
-set_default_ascii_font (fontname)
+
+/* Return ENCODING or a cons of ENCODING and REPERTORY of the font
+ FONTNAME. ENCODING is a charset symbol that specifies the encoding
+ of the font. REPERTORY is a charset symbol or nil. */
+
+
+Lisp_Object
+find_font_encoding (fontname)
Lisp_Object fontname;
{
- if (! CONSP (FONTSET_ASCII (Vdefault_fontset)))
- {
- int id = fs_query_fontset (fontname, 2);
+ Lisp_Object tail, elt;
- if (id >= 0)
- fontname = XCDR (FONTSET_ASCII (FONTSET_FROM_ID (id)));
- FONTSET_ASCII (Vdefault_fontset)
- = Fcons (make_number (0), fontname);
+ for (tail = Vfont_encoding_alist; CONSP (tail); tail = XCDR (tail))
+ {
+ elt = XCAR (tail);
+ if (CONSP (elt)
+ && STRINGP (XCAR (elt))
+ && fast_string_match_ignore_case (XCAR (elt), fontname) >= 0
+ && (SYMBOLP (XCDR (elt))
+ ? CHARSETP (XCDR (elt))
+ : CONSP (XCDR (elt)) && CHARSETP (XCAR (XCDR (elt)))))
+ return (XCDR (elt));
}
+ /* We don't know the encoding of this font. Let's assume `ascii'. */
+ return Qascii;
}
-
+
/* Cache data used by fontset_pattern_regexp. The car part is a
pattern string containing at least one wild card, the cdr part is
the corresponding regular expression. */
@@ -875,6 +1281,8 @@ fs_query_fontset (name, name_pattern)
if (name_pattern != 1)
{
tem = Frassoc (name, Vfontset_alias_alist);
+ if (NILP (tem))
+ tem = Fassoc (name, Vfontset_alias_alist);
if (CONSP (tem) && STRINGP (XCAR (tem)))
name = XCAR (tem);
else if (name_pattern == 0)
@@ -934,9 +1342,7 @@ If REGEXPP is non-nil, PATTERN is a regular expression. */)
return FONTSET_NAME (fontset);
}
-/* Return a list of base fontset names matching PATTERN on frame F.
- If SIZE is not 0, it is the size (maximum bound width) of fontsets
- to be listed. */
+/* Return a list of base fontset names matching PATTERN on frame F. */
Lisp_Object
list_fontsets (f, pattern, size)
@@ -963,111 +1369,62 @@ list_fontsets (f, pattern, size)
continue;
name = FONTSET_NAME (fontset);
- if (!NILP (regexp)
+ if (STRINGP (regexp)
? (fast_string_match (regexp, name) < 0)
: strcmp (SDATA (pattern), SDATA (name)))
continue;
- if (size)
- {
- struct font_info *fontp;
- fontp = FS_LOAD_FONT (f, 0, NULL, id);
- if (!fontp || size != fontp->size)
- continue;
- }
val = Fcons (Fcopy_sequence (FONTSET_NAME (fontset)), val);
}
return val;
}
-DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0,
- doc: /* Create a new fontset NAME that contains font information in FONTLIST.
-FONTLIST is an alist of charsets vs corresponding font name patterns. */)
- (name, fontlist)
- Lisp_Object name, fontlist;
-{
- Lisp_Object fontset, elements, ascii_font;
- Lisp_Object tem, tail, elt;
- int id;
- (*check_window_system_func) ();
+/* Free all realized fontsets whose base fontset is BASE. */
- CHECK_STRING (name);
- CHECK_LIST (fontlist);
-
- name = Fdowncase (name);
- id = fs_query_fontset (name, 2);
- if (id >= 0)
- {
- fontset = FONTSET_FROM_ID (id);
- tem = FONTSET_NAME (fontset);
- error ("Fontset `%s' matches the existing fontset `%s'",
- SDATA (name), SDATA (tem));
- }
+static void
+free_realized_fontsets (base)
+ Lisp_Object base;
+{
+#if 0
+ int id;
- /* Check the validity of FONTLIST while creating a template for
- fontset elements. */
- elements = ascii_font = Qnil;
- for (tail = fontlist; CONSP (tail); tail = XCDR (tail))
+ /* For the moment, this doesn't work because free_realized_face
+ doesn't remove FACE from a cache. Until we find a solution, we
+ suppress this code, and simply use Fclear_face_cache even though
+ that is not efficient. */
+ BLOCK_INPUT;
+ for (id = 0; id < ASIZE (Vfontset_table); id++)
{
- int c, charset;
-
- tem = XCAR (tail);
- if (!CONSP (tem)
- || (charset = get_charset_id (XCAR (tem))) < 0
- || (!STRINGP (XCDR (tem)) && !CONSP (XCDR (tem))))
- error ("Elements of fontlist must be a cons of charset and font name pattern");
+ Lisp_Object this = AREF (Vfontset_table, id);
- tem = XCDR (tem);
- if (STRINGP (tem))
- tem = Fdowncase (tem);
- else
- tem = Fcons (Fdowncase (Fcar (tem)), Fdowncase (Fcdr (tem)));
- if (charset == CHARSET_ASCII)
- ascii_font = tem;
- else
+ if (EQ (FONTSET_BASE (this), base))
{
- c = MAKE_CHAR (charset, 0, 0);
- elements = Fcons (Fcons (make_number (c), tem), elements);
- }
- }
+ Lisp_Object tail;
- if (NILP (ascii_font))
- error ("No ASCII font in the fontlist");
+ for (tail = FONTSET_FACE_ALIST (this); CONSP (tail);
+ tail = XCDR (tail))
+ {
+ FRAME_PTR f = XFRAME (FONTSET_FRAME (this));
+ int face_id = XINT (XCDR (XCAR (tail)));
+ struct face *face = FACE_FROM_ID (f, face_id);
- fontset = make_fontset (Qnil, name, Qnil);
- FONTSET_ASCII (fontset) = Fcons (make_number (0), ascii_font);
- for (; CONSP (elements); elements = XCDR (elements))
- {
- elt = XCAR (elements);
- tem = XCDR (elt);
- if (STRINGP (tem))
- tem = font_family_registry (tem, 0);
- tem = Fcons (XCAR (elt), tem);
- FONTSET_SET (fontset, XINT (XCAR (elt)), tem);
+ /* Face THIS itself is also freed by the following call. */
+ free_realized_face (f, face);
+ }
+ }
}
-
- return Qnil;
-}
-
-
-/* Clear all elements of FONTSET for multibyte characters. */
-
-static void
-clear_fontset_elements (fontset)
- Lisp_Object fontset;
-{
- int i;
-
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- XCHAR_TABLE (fontset)->contents[i] = Qnil;
+ UNBLOCK_INPUT;
+#else /* not 0 */
+ Fclear_face_cache (Qt);
+#endif /* not 0 */
}
/* Check validity of NAME as a fontset name and return the
corresponding fontset. If not valid, signal an error.
- If NAME is nil, return Vdefault_fontset. */
+ If NAME is t, return Vdefault_fontset. */
static Lisp_Object
check_fontset_name (name)
@@ -1075,7 +1432,7 @@ check_fontset_name (name)
{
int id;
- if (EQ (name, Qnil))
+ if (EQ (name, Qt))
return Vdefault_fontset;
CHECK_STRING (name);
@@ -1089,125 +1446,476 @@ check_fontset_name (name)
return FONTSET_FROM_ID (id);
}
-/* Downcase FONTNAME or car and cdr of FONTNAME. If FONTNAME is a
- string, maybe change FONTNAME to (FAMILY . REGISTRY). */
+static void
+accumulate_script_ranges (arg, range, val)
+ Lisp_Object arg, range, val;
+{
+ if (EQ (XCAR (arg), val))
+ {
+ if (CONSP (range))
+ XSETCDR (arg, Fcons (Fcons (XCAR (range), XCDR (range)), XCDR (arg)));
+ else
+ XSETCDR (arg, Fcons (Fcons (range, range), XCDR (arg)));
+ }
+}
+
-static Lisp_Object
-regularize_fontname (Lisp_Object fontname)
+/* Return an ASCII font name generated from fontset name NAME and
+ ASCII font specification ASCII_SPEC. NAME is a string conforming
+ to XLFD. ASCII_SPEC is a vector:
+ [FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY]. */
+
+static INLINE Lisp_Object
+generate_ascii_font_name (name, ascii_spec)
+ Lisp_Object name, ascii_spec;
{
- Lisp_Object family, registry;
+ Lisp_Object vec;
+ int i;
+
+ vec = split_font_name_into_vector (name);
+ for (i = FONT_SPEC_FAMILY_INDEX; i <= FONT_SPEC_ADSTYLE_INDEX; i++)
+ if (! NILP (AREF (ascii_spec, i)))
+ ASET (vec, 1 + i, AREF (ascii_spec, i));
+ if (! NILP (AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX)))
+ ASET (vec, 12, AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX));
+ return build_font_name_from_vector (vec);
+}
+
+/* Variables referred in set_fontset_font. They are set before
+ map_charset_chars is called in Fset_fontset_font. */
+static Lisp_Object font_def_arg, add_arg;
+static int from_arg, to_arg;
- if (STRINGP (fontname))
- return font_family_registry (Fdowncase (fontname), 0);
+/* Callback function for map_charset_chars in Fset_fontset_font. In
+ FONTSET, set font_def_arg in a fashion specified by add_arg for
+ characters in RANGE while ignoring the range between from_arg and
+ to_arg. */
- CHECK_CONS (fontname);
- family = XCAR (fontname);
- registry = XCDR (fontname);
- if (!NILP (family))
+static void
+set_fontset_font (fontset, range)
+ Lisp_Object fontset, range;
+{
+ if (from_arg < to_arg)
{
- CHECK_STRING (family);
- family = Fdowncase (family);
+ int from = XINT (XCAR (range)), to = XINT (XCDR (range));
+
+ if (from < from_arg)
+ {
+ if (to > to_arg)
+ {
+ Lisp_Object range2;
+
+ range2 = Fcons (make_number (to_arg), XCDR (range));
+ FONTSET_ADD (fontset, range, font_def_arg, add_arg);
+ to = to_arg;
+ }
+ if (to > from_arg)
+ range = Fcons (XCAR (range), make_number (from_arg));
+ }
+ else if (to <= to_arg)
+ return;
+ else
+ {
+ if (from < to_arg)
+ range = Fcons (make_number (to_arg), XCDR (range));
+ }
}
- if (!NILP (registry))
+ FONTSET_ADD (fontset, range, font_def_arg, add_arg);
+}
+
+
+DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 5, 0,
+ doc: /*
+Modify fontset NAME to use FONT-SPEC for TARGET characters.
+
+TARGET may be a cons; (FROM . TO), where FROM and TO are characters.
+In that case, use FONT-SPEC for all characters in the range FROM and
+TO (inclusive).
+
+TARGET may be a script name symbol. In that case, use FONT-SPEC for
+all characters that belong to the script.
+
+TARGET may be a charset. In that case, use FONT-SPEC for all
+characters in the charset.
+
+TARGET may be nil. In that case, use FONT-SPEC for any characters for
+that no FONT-SPEC is specified.
+
+FONT-SPEC may one of these:
+ * A cons (FAMILY . REGISTRY), where FAMILY is a font family name and
+ REGISTRY is a font registry name. FAMILY may contains foundry
+ name, and REGISTRY may contains encoding name.
+ * A font name string.
+
+Optional 4th argument FRAME, if non-nil, is a frame. This argument is
+kept for backward compatibility and has no meaning.
+
+Optional 5th argument ADD, if non-nil, specifies how to add FONT-SPEC
+to the font specifications for TARGET previously set. If it is
+`prepend', FONT-SPEC is prepended. If it is `append', FONT-SPEC is
+appended. By default, FONT-SPEC overrides the previous settings. */)
+ (name, target, font_spec, frame, add)
+ Lisp_Object name, target, font_spec, frame, add;
+{
+ Lisp_Object fontset;
+ Lisp_Object font_def, registry, family;
+ Lisp_Object encoding, repertory;
+ Lisp_Object range_list;
+ struct charset *charset = NULL;
+
+ fontset = check_fontset_name (name);
+
+ /* The arg FRAME is kept for backward compatibility. We only check
+ the validity. */
+ if (!NILP (frame))
+ CHECK_LIVE_FRAME (frame);
+
+ if (VECTORP (font_spec))
+ {
+ /* FONT_SPEC should have this form:
+ [ FAMILY WEIGHT SLANT WIDTH ADSTYLE REGISTRY ]
+ This is a feature not yet documented because WEIGHT thru
+ ADSTYLE are ignored for the moment. */
+ int j;
+
+ if (ASIZE (font_spec) != FONT_SPEC_MAX_INDEX)
+ args_out_of_range (make_number (FONT_SPEC_MAX_INDEX),
+ make_number (ASIZE (font_spec)));
+
+ font_spec = Fcopy_sequence (font_spec);
+ for (j = 0; j < FONT_SPEC_MAX_INDEX - 1; j++)
+ if (! NILP (AREF (font_spec, j)))
+ {
+ CHECK_STRING (AREF (font_spec, j));
+ ASET (font_spec, j, Fdowncase (AREF (font_spec, j)));
+ }
+ family = AREF (font_spec, FONT_SPEC_FAMILY_INDEX);
+ /* REGISTRY should not be omitted. */
+ CHECK_STRING (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX));
+ registry = AREF (font_spec, FONT_SPEC_REGISTRY_INDEX);
+ }
+ else if (CONSP (font_spec))
{
+ family = XCAR (font_spec);
+ registry = XCDR (font_spec);
+
+ if (! NILP (family))
+ {
+ CHECK_STRING (family);
+ family = Fdowncase (family);
+ }
CHECK_STRING (registry);
registry = Fdowncase (registry);
+ font_spec = Fmake_vector (make_number (FONT_SPEC_MAX_INDEX), Qnil);
+ ASET (font_spec, FONT_SPEC_FAMILY_INDEX, family);
+ ASET (font_spec, FONT_SPEC_REGISTRY_INDEX, registry);
+ }
+ else
+ {
+ CHECK_STRING (font_spec);
+ font_spec = Fdowncase (font_spec);
+ }
+
+ if (STRINGP (font_spec))
+ encoding = find_font_encoding (font_spec);
+ else
+ encoding = find_font_encoding (concat2 (family, registry));
+ if (NILP (encoding))
+ encoding = Qascii;
+
+ if (SYMBOLP (encoding))
+ {
+ CHECK_CHARSET (encoding);
+ encoding = repertory = CHARSET_SYMBOL_ID (encoding);
+ }
+ else
+ {
+ repertory = XCDR (encoding);
+ encoding = XCAR (encoding);
+ CHECK_CHARSET (encoding);
+ encoding = CHARSET_SYMBOL_ID (encoding);
+ if (! NILP (repertory) && SYMBOLP (repertory))
+ {
+ CHECK_CHARSET (repertory);
+ repertory = CHARSET_SYMBOL_ID (repertory);
+ }
}
- return Fcons (family, registry);
+ font_def = Fmake_vector (make_number (3), font_spec);
+ ASET (font_def, 1, encoding);
+ ASET (font_def, 2, repertory);
+
+ if (CHARACTERP (target))
+ range_list = Fcons (Fcons (target, target), Qnil);
+ else if (CONSP (target))
+ {
+ Lisp_Object from, to;
+
+ from = Fcar (target);
+ to = Fcdr (target);
+ CHECK_CHARACTER (from);
+ CHECK_CHARACTER (to);
+ range_list = Fcons (target, Qnil);
+ }
+ else if (SYMBOLP (target) && !NILP (target))
+ {
+ Lisp_Object script_list;
+ Lisp_Object val;
+
+ range_list = Qnil;
+ script_list = XCHAR_TABLE (Vchar_script_table)->extras[0];
+ if (! NILP (Fmemq (target, script_list)))
+ {
+ val = Fcons (target, Qnil);
+ map_char_table (accumulate_script_ranges, Qnil, Vchar_script_table,
+ val);
+ range_list = XCDR (val);
+ }
+ if (CHARSETP (target))
+ {
+ if (EQ (target, Qascii))
+ {
+ if (VECTORP (font_spec))
+ font_spec = generate_ascii_font_name (FONTSET_NAME (fontset),
+ font_spec);
+ FONTSET_ASCII (fontset) = font_spec;
+ range_list = Fcons (Fcons (make_number (0), make_number (127)),
+ Qnil);
+ }
+ else
+ {
+ CHECK_CHARSET_GET_CHARSET (target, charset);
+ }
+ }
+ else if (NILP (range_list))
+ error ("Invalid script or charset name: %s",
+ SDATA (SYMBOL_NAME (target)));
+ }
+ else if (NILP (target))
+ range_list = Fcons (Qnil, Qnil);
+ else
+ error ("Invalid target for setting a font");
+
+
+ if (charset)
+ {
+ font_def_arg = font_def;
+ add_arg = add;
+ if (NILP (range_list))
+ from_arg = to_arg = 0;
+ else
+ from_arg = XINT (XCAR (XCAR (range_list))),
+ to_arg = XINT (XCDR (XCAR (range_list)));
+
+ map_charset_chars (set_fontset_font, Qnil, fontset, charset,
+ CHARSET_MIN_CODE (charset),
+ CHARSET_MAX_CODE (charset));
+ }
+ for (; CONSP (range_list); range_list = XCDR (range_list))
+ FONTSET_ADD (fontset, XCAR (range_list), font_def, add);
+
+ /* Free all realized fontsets whose base is FONTSET. This way, the
+ specified character(s) are surely redisplayed by a correct
+ font. */
+ free_realized_fontsets (fontset);
+
+ return Qnil;
}
-DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 4, 0,
- doc: /* Modify fontset NAME to use FONTNAME for CHARACTER.
-If NAME is nil, modify the default fontset.
-CHARACTER may be a cons; (FROM . TO), where FROM and TO are
-non-generic characters. In that case, use FONTNAME
-for all characters in the range FROM and TO (inclusive).
-CHARACTER may be a charset. In that case, use FONTNAME
-for all character in the charsets.
+DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0,
+ doc: /* Create a new fontset NAME from font information in FONTLIST.
+
+FONTLIST is an alist of scripts vs the corresponding font specification list.
+Each element of FONTLIST has the form (SCRIPT FONT-SPEC ...), where a
+character of SCRIPT is displayed by a font that matches one of
+FONT-SPEC.
+
+SCRIPT is a symbol that appears in the first extra slot of the
+char-table `char-script-table'.
-FONTNAME may be a cons; (FAMILY . REGISTRY), where FAMILY is a family
-name of a font, REGISTRY is a registry name of a font. */)
- (name, character, fontname, frame)
- Lisp_Object name, character, fontname, frame;
+FONT-SPEC is a vector, a cons, or a string. See the documentation of
+`set-fontset-font' for the meaning. */)
+ (name, fontlist)
+ Lisp_Object name, fontlist;
{
- Lisp_Object fontset, elt;
- Lisp_Object realized;
- int from, to;
+ Lisp_Object fontset;
+ Lisp_Object val;
int id;
- fontset = check_fontset_name (name);
+ CHECK_STRING (name);
+ CHECK_LIST (fontlist);
+
+ id = fs_query_fontset (name, 0);
+ if (id < 0)
+ {
+ name = Fdowncase (name);
+ val = split_font_name_into_vector (name);
+ if (NILP (val) || NILP (AREF (val, 12)) || NILP (AREF (val, 13)))
+ error ("Fontset name must be in XLFD format");
+ if (strcmp (SDATA (AREF (val, 12)), "fontset"))
+ error ("Registry field of fontset name must be \"fontset\"");
+ Vfontset_alias_alist
+ = Fcons (Fcons (name,
+ concat2 (concat2 (AREF (val, 12), build_string ("-")),
+ AREF (val, 13))),
+ Vfontset_alias_alist);
+ ASET (val, 12, build_string ("iso8859-1"));
+ fontset = make_fontset (Qnil, name, Qnil);
+ FONTSET_ASCII (fontset) = build_font_name_from_vector (val);
+ }
+ else
+ {
+ fontset = FONTSET_FROM_ID (id);;
+ free_realized_fontsets (fontset);
+ Fset_char_table_range (fontset, Qt, Qnil);
+ }
- if (CONSP (character))
+ for (; ! NILP (fontlist); fontlist = Fcdr (fontlist))
{
- /* CH should be (FROM . TO) where FROM and TO are non-generic
- characters. */
- CHECK_NUMBER_CAR (character);
- CHECK_NUMBER_CDR (character);
- from = XINT (XCAR (character));
- to = XINT (XCDR (character));
- if (!char_valid_p (from, 0) || !char_valid_p (to, 0))
- error ("Character range should be by non-generic characters");
- if (!NILP (name)
- && (SINGLE_BYTE_CHAR_P (from) || SINGLE_BYTE_CHAR_P (to)))
- error ("Can't change font for a single byte character");
+ Lisp_Object elt, script;
+
+ elt = Fcar (fontlist);
+ script = Fcar (elt);
+ elt = Fcdr (elt);
+ if (CONSP (elt) && (NILP (XCDR (elt)) || CONSP (XCDR (elt))))
+ for (; CONSP (elt); elt = XCDR (elt))
+ Fset_fontset_font (name, script, XCAR (elt), Qnil, Qappend);
+ else
+ Fset_fontset_font (name, script, elt, Qnil, Qappend);
}
- else if (SYMBOLP (character))
+ return name;
+}
+
+
+/* Alist of automatically created fontsets. Each element is a cons
+ (FONTNAME . FONTSET-ID). */
+static Lisp_Object auto_fontset_alist;
+
+int
+new_fontset_from_font_name (Lisp_Object fontname)
+{
+ Lisp_Object val;
+ Lisp_Object name;
+ Lisp_Object vec;
+ int id;
+
+ fontname = Fdowncase (fontname);
+ val = Fassoc (fontname, auto_fontset_alist);
+ if (CONSP (val))
+ return XINT (XCDR (val));
+
+ vec = split_font_name_into_vector (fontname);
+ if ( NILP (vec))
+ vec = Fmake_vector (make_number (14), build_string (""));
+ ASET (vec, 12, build_string ("fontset"));
+ if (NILP (auto_fontset_alist))
{
- elt = Fget (character, Qcharset);
- if (!VECTORP (elt) || ASIZE (elt) < 1 || !NATNUMP (AREF (elt, 0)))
- error ("Invalid charset: %s", SDATA (SYMBOL_NAME (character)));
- from = MAKE_CHAR (XINT (AREF (elt, 0)), 0, 0);
- to = from;
+ ASET (vec, 13, build_string ("startup"));
+ name = build_font_name_from_vector (vec);
}
else
{
- CHECK_NUMBER (character);
- from = XINT (character);
- to = from;
+ char temp[20];
+ int len = XINT (Flength (auto_fontset_alist));
+
+ sprintf (temp, "auto%d", len);
+ ASET (vec, 13, build_string (temp));
+ name = build_font_name_from_vector (vec);
}
- if (!char_valid_p (from, 1))
- invalid_character (from);
- if (SINGLE_BYTE_CHAR_P (from))
- error ("Can't change font for a single byte character");
- if (from < to)
+ name = Fnew_fontset (name, list2 (list2 (Qascii, fontname),
+ list2 (Fcons (make_number (0),
+ make_number (MAX_CHAR)),
+ fontname)));
+ id = fs_query_fontset (name, 0);
+ auto_fontset_alist
+ = Fcons (Fcons (fontname, make_number (id)), auto_fontset_alist);
+ return id;
+}
+
+#ifdef USE_FONT_BACKEND
+int
+new_fontset_from_font (font_object)
+ Lisp_Object font_object;
+{
+ Lisp_Object font_name = font_get_name (font_object);
+ Lisp_Object font_spec = font_get_spec (font_object);
+ Lisp_Object short_name, name, fontset;
+
+ if (NILP (auto_fontset_alist))
+ short_name = build_string ("fontset-startup");
+ else
{
- if (!char_valid_p (to, 1))
- invalid_character (to);
- if (SINGLE_BYTE_CHAR_P (to))
- error ("Can't change font for a single byte character");
+ char temp[32];
+ int len = XINT (Flength (auto_fontset_alist));
+
+ sprintf (temp, "fontset-auto%d", len);
+ short_name = build_string (temp);
}
+ ASET (font_spec, FONT_REGISTRY_INDEX, short_name);
+ name = Ffont_xlfd_name (font_spec);
+ if (NILP (name))
+ {
+ int i;
- /* The arg FRAME is kept for backward compatibility. We only check
- the validity. */
- if (!NILP (frame))
- CHECK_LIVE_FRAME (frame);
+ for (i = 0; i < FONT_SIZE_INDEX; i++)
+ if ((i != FONT_FAMILY_INDEX) && (i != FONT_REGISTRY_INDEX))
+ ASET (font_spec, i, Qnil);
+ name = Ffont_xlfd_name (font_spec);
+ if (NILP (name))
+ abort ();
+ }
+ fontset = make_fontset (Qnil, name, Qnil);
+ FONTSET_ASCII (fontset) = font_name;
+ return XINT (FONTSET_ID (fontset));
+}
- elt = Fcons (make_number (from), regularize_fontname (fontname));
- for (; from <= to; from++)
- FONTSET_SET (fontset, from, elt);
- Foptimize_char_table (fontset);
+struct font *
+fontset_ascii_font (f, id)
+ FRAME_PTR f;
+ int id;
+{
+ Lisp_Object fontset = FONTSET_FROM_ID (id);
+ Lisp_Object ascii_slot = FONTSET_ASCII (fontset);
+ Lisp_Object val, font_object;
- /* If there's a realized fontset REALIZED whose parent is FONTSET,
- clear all the elements of REALIZED and free all multibyte faces
- whose fontset is REALIZED. This way, the specified character(s)
- are surely redisplayed by a correct font. */
- for (id = 0; id < ASIZE (Vfontset_table); id++)
+ if (CONSP (ascii_slot))
{
- realized = AREF (Vfontset_table, id);
- if (!NILP (realized)
- && !BASE_FONTSET_P (realized)
- && EQ (FONTSET_BASE (realized), fontset))
+ Lisp_Object ascii_font_name = XCAR (ascii_slot);
+
+ font_object = Qnil;
+ for (val = XCDR (ascii_slot); ! NILP (val); val = XCDR (val))
{
- FRAME_PTR f = XFRAME (FONTSET_FRAME (realized));
- clear_fontset_elements (realized);
- free_realized_multibyte_face (f, id);
+ Lisp_Object frame = font_get_frame (XCAR (val));
+
+ if (NILP (frame) || XFRAME (frame) == f)
+ {
+ font_object = XCAR (val);
+ if (XSAVE_VALUE (font_object)->integer == 0)
+ {
+ font_object = font_open_by_name (f, SDATA (ascii_font_name));
+ XSETCAR (val, font_object);
+ }
+ break;
+ }
+ }
+ if (NILP (font_object))
+ {
+ font_object = font_open_by_name (f, SDATA (ascii_font_name));
+ XSETCDR (ascii_slot, Fcons (font_object, XCDR (ascii_slot)));
}
}
-
- return Qnil;
+ else
+ {
+ font_object = font_open_by_name (f, SDATA (ascii_slot));
+ FONTSET_ASCII (fontset) = Fcons (ascii_slot, Fcons (font_object, Qnil));
+ }
+ if (NILP (font_object))
+ return NULL;
+ return XSAVE_VALUE (font_object)->pointer;
}
+#endif /* USE_FONT_BACKEND */
+
DEFUN ("font-info", Ffont_info, Sfont_info, 1, 2, 0,
doc: /* Return information about a font named NAME on frame FRAME.
If FRAME is omitted or nil, use the selected frame.
@@ -1228,6 +1936,7 @@ If the named font is not yet loaded, return nil. */)
FRAME_PTR f;
struct font_info *fontp;
Lisp_Object info;
+ Lisp_Object font_object;
(*check_window_system_func) ();
@@ -1241,6 +1950,17 @@ If the named font is not yet loaded, return nil. */)
if (!query_font_func)
error ("Font query function is not supported");
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ font_object = font_open_by_name (f, SDATA (name));
+ if (NILP (font_object))
+ fontp = NULL;
+ else
+ fontp = (struct font_info *) XSAVE_VALUE (font_object)->pointer;
+ }
+ else
+#endif /* USE_FONT_BACKEND */
fontp = (*query_font_func) (f, SDATA (name));
if (!fontp)
return Qnil;
@@ -1255,6 +1975,10 @@ If the named font is not yet loaded, return nil. */)
XVECTOR (info)->contents[5] = make_number (fontp->relative_compose);
XVECTOR (info)->contents[6] = make_number (fontp->default_ascent);
+#ifdef USE_FONT_BACKEND
+ if (! NILP (font_object))
+ font_close_object (f, font_object);
+#endif /* USE_FONT_BACKEND */
return info;
}
@@ -1293,16 +2017,19 @@ DEFUN ("internal-char-font", Finternal_char_font, Sinternal_char_font, 1, 2, 0,
{
int pos, pos_byte, dummy;
int face_id;
- int c, code;
+ int c;
struct frame *f;
struct face *face;
+ Lisp_Object charset, rfont_def;
+ int id;
if (NILP (position))
{
- CHECK_NATNUM (ch);
+ CHECK_CHARACTER (ch);
c = XINT (ch);
f = XFRAME (selected_frame);
face_id = DEFAULT_FACE_ID;
+ pos = -1;
}
else
{
@@ -1330,228 +2057,252 @@ DEFUN ("internal-char-font", Finternal_char_font, Sinternal_char_font, 1, 2, 0,
}
if (! CHAR_VALID_P (c, 0))
return Qnil;
- face_id = FACE_FOR_CHAR (f, FACE_FROM_ID (f, face_id), c);
+ face_id = FACE_FOR_CHAR (f, FACE_FROM_ID (f, face_id), c, pos, Qnil);
face = FACE_FROM_ID (f, face_id);
- if (! face->font || ! face->font_name)
- return Qnil;
-
- {
- struct font_info *fontp = (*get_font_info_func) (f, face->font_info_id);
- XChar2b char2b;
- int c1, c2, charset;
-
- SPLIT_CHAR (c, charset, c1, c2);
- if (c2 > 0)
- STORE_XCHAR2B (&char2b, c1, c2);
- else
- STORE_XCHAR2B (&char2b, 0, c1);
- rif->encode_char (c, &char2b, fontp, NULL);
- code = (XCHAR2B_BYTE1 (&char2b) << 8) | XCHAR2B_BYTE2 (&char2b);
- }
- return Fcons (build_string (face->font_name), make_number (code));
+ charset = Fget_char_property (position, Qcharset, Qnil);
+ if (CHARSETP (charset))
+ id = XINT (CHARSET_SYMBOL_ID (charset));
+ else
+ id = -1;
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ rfont_def = fontset_font (FONTSET_FROM_ID (face->fontset), c, face, id);
+ if (VECTORP (rfont_def) && ! NILP (AREF (rfont_def, 4)))
+ {
+ Lisp_Object font_object = AREF (rfont_def, 4);
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+ unsigned code = font->driver->encode_char (font, c);
+ Lisp_Object fontname = font_get_name (font_object);
+
+ if (code == FONT_INVALID_CODE)
+ return Fcons (fontname, Qnil);
+ if (code <= MOST_POSITIVE_FIXNUM)
+ return Fcons (fontname, make_number (code));
+ return Fcons (fontname, Fcons (make_number (code >> 16),
+ make_number (code & 0xFFFF)));
+ }
+ return Qnil;
+ }
+#endif /* USE_FONT_BACKEND */
+ rfont_def = fontset_font (FONTSET_FROM_ID (face->fontset), c, face, id);
+ if (VECTORP (rfont_def) && STRINGP (AREF (rfont_def, 3)))
+ {
+ Lisp_Object font_def;
+ struct font_info *fontp;
+ struct charset *charset;
+ XChar2b char2b;
+ int code;
+
+ font_def = AREF (rfont_def, 2);
+ charset = CHARSET_FROM_ID (XINT (AREF (font_def, 1)));
+ code = ENCODE_CHAR (charset, c);
+ if (code == CHARSET_INVALID_CODE (charset))
+ return (Fcons (AREF (rfont_def, 3), Qnil));
+ STORE_XCHAR2B (&char2b, ((code >> 8) & 0xFF), (code & 0xFF));
+ fontp = (*get_font_info_func) (f, XINT (AREF (rfont_def, 1)));
+ rif->encode_char (c, &char2b, fontp, charset, NULL);
+ code = (XCHAR2B_BYTE1 (&char2b) << 8) | XCHAR2B_BYTE2 (&char2b);
+ return (Fcons (AREF (rfont_def, 3), make_number (code)));
+ }
+ return Qnil;
}
-/* Called from Ffontset_info via map_char_table on each leaf of
- fontset. ARG is a copy of the default fontset. The current leaf
- is indexed by CHARACTER and has value ELT. This function override
- the copy by ELT if ELT is not nil. */
+DEFUN ("fontset-info", Ffontset_info, Sfontset_info, 1, 2, 0,
+ doc: /* Return information about a fontset FONTSET on frame FRAME.
+The value is a char-table of which elements has this form.
-static void
-override_font_info (fontset, character, elt)
- Lisp_Object fontset, character, elt;
-{
- if (! NILP (elt))
- Faset (fontset, character, elt);
-}
+ ((FONT-PATTERN OPENED-FONT ...) ...)
-/* Called from Ffontset_info via map_char_table on each leaf of
- fontset. ARG is a list (LAST FONT-INFO ...), where LAST is `(last
- ARG)' and FONT-INFOs have this form:
- (CHAR FONT-SPEC) or ((FROM . TO) FONT-SPEC)
- The current leaf is indexed by CHARACTER and has value ELT. This
- function add the information of the current leaf to ARG by
- appending a new element or modifying the last element. */
+FONT-PATTERN is a vector:
-static void
-accumulate_font_info (arg, character, elt)
- Lisp_Object arg, character, elt;
-{
- Lisp_Object last, last_char, last_elt;
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
- if (!CONSP (elt) && !SINGLE_BYTE_CHAR_P (XINT (character)))
- elt = FONTSET_REF (Vdefault_fontset, XINT (character));
- if (!CONSP (elt))
- return;
- last = XCAR (arg);
- last_char = XCAR (XCAR (last));
- last_elt = XCAR (XCDR (XCAR (last)));
- elt = XCDR (elt);
- if (!NILP (Fequal (elt, last_elt)))
- {
- int this_charset = CHAR_CHARSET (XINT (character));
+or a string of font name pattern.
- if (CONSP (last_char)) /* LAST_CHAR == (FROM . TO) */
- {
- if (this_charset == CHAR_CHARSET (XINT (XCAR (last_char))))
- {
- XSETCDR (last_char, character);
- return;
- }
- }
- else if (XINT (last_char) == XINT (character))
- return;
- else if (this_charset == CHAR_CHARSET (XINT (last_char)))
- {
- XSETCAR (XCAR (last), Fcons (last_char, character));
- return;
- }
- }
- XSETCDR (last, Fcons (Fcons (character, Fcons (elt, Qnil)), Qnil));
- XSETCAR (arg, XCDR (last));
-}
+OPENED-FONT is a name of a font actually opened.
-
-DEFUN ("fontset-info", Ffontset_info, Sfontset_info, 1, 2, 0,
- doc: /* Return information about a fontset named NAME on frame FRAME.
-If NAME is nil, return information about the default fontset.
-The value is a vector:
- [ SIZE HEIGHT ((CHARSET-OR-RANGE FONT-SPEC OPENED ...) ...) ],
-where,
- SIZE is the maximum bound width of ASCII font in the fontset,
- HEIGHT is the maximum bound height of ASCII font in the fontset,
- CHARSET-OR-RANGE is a charset, a character (may be a generic character)
- or a cons of two characters specifying the range of characters.
- FONT-SPEC is a fontname pattern string or a cons (FAMILY . REGISTRY),
- where FAMILY is a `FAMILY' field of a XLFD font name,
- REGISTRY is a `CHARSET_REGISTRY' field of a XLFD font name.
- FAMILY may contain a `FOUNDRY' field at the head.
- REGISTRY may contain a `CHARSET_ENCODING' field at the tail.
- OPENEDs are names of fonts actually opened.
-If the ASCII font is not yet opened, SIZE and HEIGHT are 0.
-If FRAME is omitted, it defaults to the currently selected frame. */)
- (name, frame)
- Lisp_Object name, frame;
+The char-table has one extra slot. The value is a char-table
+containing the information about the derived fonts from the default
+fontset. The format is the same as abobe. */)
+ (fontset, frame)
+ Lisp_Object fontset, frame;
{
- Lisp_Object fontset;
FRAME_PTR f;
- Lisp_Object indices[3];
- Lisp_Object val, tail, elt;
- Lisp_Object *realized;
- struct font_info *fontp = NULL;
- int n_realized = 0;
- int i;
+ Lisp_Object *realized[2], fontsets[2], tables[2];
+ Lisp_Object val, elt;
+ int c, i, j, k;
(*check_window_system_func) ();
- fontset = check_fontset_name (name);
+ fontset = check_fontset_name (fontset);
if (NILP (frame))
frame = selected_frame;
CHECK_LIVE_FRAME (frame);
f = XFRAME (frame);
- /* Recode realized fontsets whose base is FONTSET in the table
- `realized'. */
- realized = (Lisp_Object *) alloca (sizeof (Lisp_Object)
- * ASIZE (Vfontset_table));
- for (i = 0; i < ASIZE (Vfontset_table); i++)
+ /* Recode fontsets realized on FRAME from the base fontset FONTSET
+ in the table `realized'. */
+ realized[0] = (Lisp_Object *) alloca (sizeof (Lisp_Object)
+ * ASIZE (Vfontset_table));
+ for (i = j = 0; i < ASIZE (Vfontset_table); i++)
{
elt = FONTSET_FROM_ID (i);
if (!NILP (elt)
- && EQ (FONTSET_BASE (elt), fontset))
- realized[n_realized++] = elt;
+ && EQ (FONTSET_BASE (elt), fontset)
+ && EQ (FONTSET_FRAME (elt), frame))
+ realized[0][j++] = elt;
}
+ realized[0][j] = Qnil;
- if (! EQ (fontset, Vdefault_fontset))
+ realized[1] = (Lisp_Object *) alloca (sizeof (Lisp_Object)
+ * ASIZE (Vfontset_table));
+ for (i = j = 0; ! NILP (realized[0][i]); i++)
{
- /* Merge FONTSET onto the default fontset. */
- val = Fcopy_sequence (Vdefault_fontset);
- map_char_table (override_font_info, Qnil, fontset, fontset, val, 0, indices);
- fontset = val;
+ elt = FONTSET_DEFAULT (realized[0][i]);
+ if (! NILP (elt))
+ realized[1][j++] = elt;
}
+ realized[1][j] = Qnil;
+
+ tables[0] = Fmake_char_table (Qfontset_info, Qnil);
+ tables[1] = Fmake_char_table (Qnil, Qnil);
+ XCHAR_TABLE (tables[0])->extras[0] = tables[1];
+ fontsets[0] = fontset;
+ fontsets[1] = Vdefault_fontset;
- /* Accumulate information of the fontset in VAL. The format is
- (LAST FONT-INFO FONT-INFO ...), where FONT-INFO is (CHAR-OR-RANGE
- FONT-SPEC). See the comment for accumulate_font_info for the
- detail. */
- val = Fcons (Fcons (make_number (0),
- Fcons (XCDR (FONTSET_ASCII (fontset)), Qnil)),
- Qnil);
- val = Fcons (val, val);
- map_char_table (accumulate_font_info, Qnil, fontset, fontset, val, 0, indices);
- val = XCDR (val);
-
- /* For each FONT-INFO, if CHAR_OR_RANGE (car part) is a generic
- character for a charset, replace it with the charset symbol. If
- fonts are opened for FONT-SPEC, append the names of the fonts to
- FONT-SPEC. */
- for (tail = val; CONSP (tail); tail = XCDR (tail))
+ /* Accumulate information of the fontset in TABLE. The format of
+ each element is ((FONT-SPEC OPENED-FONT ...) ...). */
+ for (k = 0; k <= 1; k++)
{
- int c;
- elt = XCAR (tail);
- if (INTEGERP (XCAR (elt)))
+ for (c = 0; c <= MAX_CHAR; )
{
- int charset, c1, c2;
- c = XINT (XCAR (elt));
- SPLIT_CHAR (c, charset, c1, c2);
- if (c1 == 0)
- XSETCAR (elt, CHARSET_SYMBOL (charset));
- }
- else
- c = XINT (XCAR (XCAR (elt)));
- for (i = 0; i < n_realized; i++)
- {
- Lisp_Object face_id, font;
- struct face *face;
+ int from, to;
- face_id = FONTSET_REF_VIA_BASE (realized[i], c);
- if (INTEGERP (face_id))
+ if (c <= MAX_5_BYTE_CHAR)
+ {
+ val = char_table_ref_and_range (fontsets[k], c, &from, &to);
+ if (to > MAX_5_BYTE_CHAR)
+ to = MAX_5_BYTE_CHAR;
+ }
+ else
{
- face = FACE_FROM_ID (f, XINT (face_id));
- if (face && face->font && face->font_name)
+ val = FONTSET_FALLBACK (fontsets[k]);
+ to = MAX_CHAR;
+ }
+ if (VECTORP (val))
+ {
+ Lisp_Object alist;
+
+ /* At first, set ALIST to ((FONT-SPEC) ...). */
+ for (alist = Qnil, i = 0; i < ASIZE (val); i++)
+ alist = Fcons (Fcons (AREF (AREF (val, i), 0), Qnil), alist);
+ alist = Fnreverse (alist);
+
+ /* Then store opend font names to cdr of each elements. */
+ for (i = 0; ! NILP (realized[k][i]); i++)
{
- font = build_string (face->font_name);
- if (NILP (Fmember (font, XCDR (XCDR (elt)))))
- XSETCDR (XCDR (elt), Fcons (font, XCDR (XCDR (elt))));
+ if (c <= MAX_5_BYTE_CHAR)
+ val = FONTSET_REF (realized[k][i], c);
+ else
+ val = FONTSET_FALLBACK (realized[k][i]);
+ if (! VECTORP (val))
+ continue;
+ /* VAL is [int int ?
+ [FACE-ID FONT-INDEX FONT-DEF FONT-NAME] ...].
+ If a font of an element is already opened,
+ FONT-NAME is the name of a opened font. */
+ for (j = 3; j < ASIZE (val); j++)
+ if (STRINGP (AREF (AREF (val, j), 3)))
+ {
+ Lisp_Object font_idx;
+
+ font_idx = AREF (AREF (val, j), 1);
+ elt = Fassq (AREF (AREF (AREF (val, j), 2), 0), alist);
+ if (CONSP (elt)
+ && NILP (Fmemq (font_idx, XCDR(elt))))
+ nconc2 (elt, Fcons (font_idx, Qnil));
+ }
}
+ for (val = alist; CONSP (val); val = XCDR (val))
+ for (elt = XCDR (XCAR (val)); CONSP (elt); elt = XCDR (elt))
+ {
+ struct font_info *font_info
+ = (*get_font_info_func) (f, XINT (XCAR (elt)));
+ XSETCAR (elt, build_string (font_info->full_name));
+ }
+
+ /* Store ALIST in TBL for characters C..TO. */
+ if (c <= MAX_5_BYTE_CHAR)
+ char_table_set_range (tables[k], c, to, alist);
+ else
+ XCHAR_TABLE (tables[k])->defalt = alist;
}
+ c = to + 1;
}
}
- elt = Fcdr (Fcdr (Fassq (CHARSET_SYMBOL (CHARSET_ASCII), val)));
- if (CONSP (elt))
- {
- elt = XCAR (elt);
- fontp = (*query_font_func) (f, SDATA (elt));
- }
- val = Fmake_vector (make_number (3), val);
- AREF (val, 0) = fontp ? make_number (fontp->size) : make_number (0);
- AREF (val, 1) = fontp ? make_number (fontp->height) : make_number (0);
- return val;
+ return tables[0];
}
-DEFUN ("fontset-font", Ffontset_font, Sfontset_font, 2, 2, 0,
+
+DEFUN ("fontset-font", Ffontset_font, Sfontset_font, 2, 3, 0,
doc: /* Return a font name pattern for character CH in fontset NAME.
-If NAME is nil, find a font name pattern in the default fontset. */)
- (name, ch)
- Lisp_Object name, ch;
+If NAME is t, find a pattern in the default fontset.
+
+The value has the form (FAMILY . REGISTRY), where FAMILY is a font
+family name and REGISTRY is a font registry name. This is actually
+the first font name pattern for CH in the fontset or in the default
+fontset.
+
+If the 2nd optional arg ALL is non-nil, return a list of all font name
+patterns. */)
+ (name, ch, all)
+ Lisp_Object name, ch, all;
{
int c;
- Lisp_Object fontset, elt;
+ Lisp_Object fontset, elt, list, repertory, val;
+ int i, j;
fontset = check_fontset_name (name);
- CHECK_NUMBER (ch);
+ CHECK_CHARACTER (ch);
c = XINT (ch);
- if (!char_valid_p (c, 1))
- invalid_character (c);
-
- elt = FONTSET_REF (fontset, c);
- if (CONSP (elt))
- elt = XCDR (elt);
+ list = Qnil;
+ while (1)
+ {
+ for (i = 0, elt = FONTSET_REF (fontset, c); i < 2;
+ i++, elt = FONTSET_FALLBACK (fontset))
+ if (VECTORP (elt))
+ for (j = 0; j < ASIZE (elt); j++)
+ {
+ val = AREF (elt, j);
+ repertory = AREF (val, 1);
+ if (INTEGERP (repertory))
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (repertory));
- return elt;
+ if (! CHAR_CHARSET_P (c, charset))
+ continue;
+ }
+ else if (CHAR_TABLE_P (repertory))
+ {
+ if (NILP (CHAR_TABLE_REF (repertory, c)))
+ continue;
+ }
+ val = AREF (val, 0);
+ val = Fcons (AREF (val, 0), AREF (val, 5));
+ if (NILP (all))
+ return val;
+ list = Fcons (val, list);
+ }
+ if (EQ (fontset, Vdefault_fontset))
+ break;
+ fontset = Vdefault_fontset;
+ }
+ return (Fnreverse (list));
}
DEFUN ("fontset-list", Ffontset_list, Sfontset_list, 0, 0, 0,
@@ -1573,62 +2324,56 @@ DEFUN ("fontset-list", Ffontset_list, Sfontset_list, 0, 0, 0,
return list;
}
-DEFUN ("set-overriding-fontspec-internal", Fset_overriding_fontspec_internal,
- Sset_overriding_fontspec_internal, 1, 1, 0,
- doc: /* Internal use only.
-
-FONTLIST is an alist of TARGET vs FONTNAME, where TARGET is a charset
-or a char-table, FONTNAME have the same meanings as in
-`set-fontset-font'.
-
-It overrides the font specifications for each TARGET in the default
-fontset by the corresponding FONTNAME.
-If TARGET is a charset, targets are all characters in the charset. If
-TARGET is a char-table, targets are characters whose value is non-nil
-in the table.
+#ifdef FONTSET_DEBUG
-It is intended that this function is called only from
-`set-language-environment'. */)
- (fontlist)
- Lisp_Object fontlist;
+Lisp_Object
+dump_fontset (fontset)
+ Lisp_Object fontset;
{
- Lisp_Object tail;
+ Lisp_Object vec;
+
+ vec = Fmake_vector (make_number (3), Qnil);
+ ASET (vec, 0, FONTSET_ID (fontset));
- fontlist = Fcopy_sequence (fontlist);
- /* Now FONTLIST is ((TARGET . FONTNAME) ...). Reform it to ((TARGET
- nil nil nil FONTSPEC) ...), where TARGET is a charset-id or a
- char-table. */
- for (tail = fontlist; CONSP (tail); tail = XCDR (tail))
+ if (BASE_FONTSET_P (fontset))
{
- Lisp_Object elt, target;
+ ASET (vec, 1, FONTSET_NAME (fontset));
+ }
+ else
+ {
+ Lisp_Object frame;
- elt = XCAR (tail);
- target = Fcar (elt);
- elt = Fcons (Qnil, regularize_fontname (Fcdr (elt)));
- if (! CHAR_TABLE_P (target))
+ frame = FONTSET_FRAME (fontset);
+ if (FRAMEP (frame))
{
- int charset, c;
-
- CHECK_SYMBOL (target);
- charset = get_charset_id (target);
- if (charset < 0)
- error ("Invalid charset %s", SDATA (SYMBOL_NAME (target)));
- target = make_number (charset);
- c = MAKE_CHAR (charset, 0, 0);
- XSETCAR (elt, make_number (c));
+ FRAME_PTR f = XFRAME (frame);
+
+ if (FRAME_LIVE_P (f))
+ ASET (vec, 1, f->name);
+ else
+ ASET (vec, 1, Qt);
}
- elt = Fcons (target, Fcons (Qnil, Fcons (Qnil, elt)));
- XSETCAR (tail, elt);
+ if (!NILP (FONTSET_DEFAULT (fontset)))
+ ASET (vec, 2, FONTSET_ID (FONTSET_DEFAULT (fontset)));
}
- if (! NILP (Fequal (fontlist, Voverriding_fontspec_alist)))
- return Qnil;
- Voverriding_fontspec_alist = fontlist;
- clear_face_cache (0);
- ++windows_or_buffers_changed;
- return Qnil;
+ return vec;
}
+DEFUN ("fontset-list-all", Ffontset_list_all, Sfontset_list_all, 0, 0, 0,
+ doc: /* Return a brief summary of all fontsets for debug use. */)
+ ()
+{
+ Lisp_Object val;
+ int i;
+
+ for (i = 0, val = Qnil; i < ASIZE (Vfontset_table); i++)
+ if (! NILP (AREF (Vfontset_table, i)))
+ val = Fcons (dump_fontset (AREF (Vfontset_table, i)), val);
+ return (Fnreverse (val));
+}
+#endif /* FONTSET_DEBUG */
+
void
syms_of_fontset ()
{
@@ -1636,9 +2381,13 @@ syms_of_fontset ()
/* Window system initializer should have set proper functions. */
abort ();
- Qfontset = intern ("fontset");
- staticpro (&Qfontset);
- Fput (Qfontset, Qchar_table_extra_slots, make_number (3));
+ DEFSYM (Qfontset, "fontset");
+ Fput (Qfontset, Qchar_table_extra_slots, make_number (9));
+ DEFSYM (Qfontset_info, "fontset-info");
+ Fput (Qfontset_info, Qchar_table_extra_slots, make_number (1));
+
+ DEFSYM (Qprepend, "prepend");
+ DEFSYM (Qappend, "append");
Vcached_fontset_data = Qnil;
staticpro (&Vcached_fontset_data);
@@ -1654,32 +2403,31 @@ syms_of_fontset ()
AREF (Vfontset_table, 0) = Vdefault_fontset;
next_fontset_id = 1;
- Voverriding_fontspec_alist = Qnil;
- staticpro (&Voverriding_fontspec_alist);
+ auto_fontset_alist = Qnil;
+ staticpro (&auto_fontset_alist);
DEFVAR_LISP ("font-encoding-alist", &Vfont_encoding_alist,
- doc: /* Alist of fontname patterns vs corresponding encoding info.
-Each element looks like (REGEXP . ENCODING-INFO),
- where ENCODING-INFO is an alist of CHARSET vs ENCODING.
-ENCODING is one of the following integer values:
- 0: code points 0x20..0x7F or 0x2020..0x7F7F are used,
- 1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used,
- 2: code points 0x20A0..0x7FFF are used,
- 3: code points 0xA020..0xFF7F are used. */);
+ doc: /*
+Alist of fontname patterns vs the corresponding encoding and repertory info.
+Each element looks like (REGEXP . (ENCODING . REPERTORY)),
+where ENCODING is a charset or a char-table,
+and REPERTORY is a charset, a char-table, or nil.
+
+ENCODING is for converting a character to a glyph code of the font.
+If ENCODING is a charset, encoding a character by the charset gives
+the corresponding glyph code. If ENCODING is a char-table, looking up
+the table by a character gives the corresponding glyph code.
+
+REPERTORY specifies a repertory of characters supported by the font.
+If REPERTORY is a charset, all characters beloging to the charset are
+supported. If REPERTORY is a char-table, all characters who have a
+non-nil value in the table are supported. It REPERTORY is nil, Emacs
+gets the repertory information by an opened font and ENCODING. */);
Vfont_encoding_alist = Qnil;
- Vfont_encoding_alist
- = Fcons (Fcons (build_string ("JISX0201"),
- Fcons (Fcons (intern ("latin-jisx0201"), make_number (0)),
- Qnil)),
- Vfont_encoding_alist);
- Vfont_encoding_alist
- = Fcons (Fcons (build_string ("ISO8859-1"),
- Fcons (Fcons (intern ("ascii"), make_number (0)),
- Qnil)),
- Vfont_encoding_alist);
DEFVAR_LISP ("use-default-ascent", &Vuse_default_ascent,
- doc: /* Char table of characters whose ascent values should be ignored.
+ doc: /*
+Char table of characters whose ascent values should be ignored.
If an entry for a character is non-nil, the ascent value of the glyph
is assumed to be what specified by _MULE_DEFAULT_ASCENT property of a font.
@@ -1688,7 +2436,8 @@ such a character is displayed on screen. */);
Vuse_default_ascent = Qnil;
DEFVAR_LISP ("ignore-relative-composition", &Vignore_relative_composition,
- doc: /* Char table of characters which is not composed relatively.
+ doc: /*
+Char table of characters which is not composed relatively.
If an entry for a character is non-nil, a composition sequence
which contains that character is displayed so that
the glyph of that character is put without considering
@@ -1714,6 +2463,10 @@ When a character is displayed with such fonts, the character is displayed
at the vertical center of lines. */);
Vvertical_centering_font_regexp = Qnil;
+ DEFVAR_LISP ("otf-script-alist", &Votf_script_alist,
+ doc: /* Alist of OpenType script tags vs the corresponding script names. */);
+ Votf_script_alist = Qnil;
+
defsubr (&Squery_fontset);
defsubr (&Snew_fontset);
defsubr (&Sset_fontset_font);
@@ -1722,7 +2475,9 @@ at the vertical center of lines. */);
defsubr (&Sfontset_info);
defsubr (&Sfontset_font);
defsubr (&Sfontset_list);
- defsubr (&Sset_overriding_fontspec_internal);
+#ifdef FONTSET_DEBUG
+ defsubr (&Sfontset_list_all);
+#endif
}
/* arch-tag: ea861585-2f5f-4e5b-9849-d04a9c3a3537
diff --git a/src/fontset.h b/src/fontset.h
index a4360280911..cf09dde73f2 100644
--- a/src/fontset.h
+++ b/src/fontset.h
@@ -4,6 +4,9 @@
Copyright (C) 1995, 1997, 2000
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
+ Copyright (C) 2003, 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
@@ -43,7 +46,8 @@ struct font_info
/* Full name of the font given by a window system. */
char *full_name;
- /* Charset of characters displayed by the font. */
+ /* Charset to encode a character code into a glyph code of the
+ font. */
int charset;
#ifdef WINDOWSNT
@@ -70,25 +74,15 @@ struct font_info
of lines. */
int vertical_centering;
- /* Encodings of the font indexed by CHARSET. The value is one of
+ /* Encoding type of the font. The value is one of
0, 1, 2, or 3:
0: code points 0x20..0x7F or 0x2020..0x7F7F are used
1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used
2: code points 0x20A0..0x7FFF are used
3: code points 0xA020..0xFF7F are used
- For instance, ASCII and Latin-1 characters may use the same font
- but different code points (ASCII uses 0x20..0x7F and Latin-1 uses
- 0xA0..0xFF).
-
- If the value can't be decided from information of the font, we
- consult `font-encoding-alist' to get of the corresponding charset
- whose default value is defined in lisp/fontset.el. Since there's
- no charset whose id is 1, we use encoding[1] to store the
- encoding information decided by the font itself.
-
If the member `font_encoder' is not NULL, this member is ignored.
*/
- unsigned char encoding[MAX_CHARSET + 1];
+ unsigned char encoding_type;
/* The baseline position of a font is normally `ascent' value of the
font. However, there exists many fonts which don't set `ascent'
@@ -148,6 +142,17 @@ struct font_info
to be used. */
#define FONT_ENCODING_NOT_DECIDED 255
+enum FONT_SPEC_INDEX
+ {
+ FONT_SPEC_FAMILY_INDEX,
+ FONT_SPEC_WEIGHT_INDEX,
+ FONT_SPEC_SLANT_INDEX,
+ FONT_SPEC_SWIDTH_INDEX,
+ FONT_SPEC_ADSTYLE_INDEX,
+ FONT_SPEC_REGISTRY_INDEX,
+ FONT_SPEC_MAX_INDEX
+ };
+
/* Forward declaration for prototypes. */
struct frame;
@@ -187,43 +192,41 @@ extern void (*set_frame_fontset_func) P_ ((struct frame *f, Lisp_Object arg,
This function set the memer `encoder' of the structure. */
extern void (*find_ccl_program_func) P_ ((struct font_info *));
+extern Lisp_Object (*get_font_repertory_func) P_ ((struct frame *,
+ struct font_info *));
+
/* Check if any window system is used now. */
extern void (*check_window_system_func) P_ ((void));
struct face;
extern void free_face_fontset P_ ((FRAME_PTR, struct face *));
-extern Lisp_Object fontset_font_pattern P_ ((FRAME_PTR, int, int));
+extern Lisp_Object fontset_font_pattern P_ ((FRAME_PTR, struct face *, int));
extern int face_suitable_for_char_p P_ ((struct face *, int));
-extern int face_for_char P_ ((FRAME_PTR, struct face *, int));
-extern int make_fontset_for_ascii_face P_ ((FRAME_PTR, int));
+extern int face_for_char P_ ((FRAME_PTR, struct face *, int,
+ int, Lisp_Object));
+extern int make_fontset_for_ascii_face P_ ((FRAME_PTR, int, struct face *));
+extern int new_fontset_from_font_name P_ ((Lisp_Object));
extern void set_default_ascii_font P_ ((Lisp_Object));
-extern struct font_info *fs_load_font P_ ((struct frame *, int, char *, int,
- struct face *));
+extern struct font_info *fs_load_font P_ ((struct frame *, char *, int));
extern int fs_query_fontset P_ ((Lisp_Object, int));
EXFUN (Fquery_fontset, 2);
extern Lisp_Object list_fontsets P_ ((struct frame *, Lisp_Object, int));
-extern Lisp_Object Qfontset;
extern Lisp_Object Vuse_default_ascent;
extern Lisp_Object Vignore_relative_composition;
extern Lisp_Object Valternate_fontname_alist;
extern Lisp_Object Vfontset_alias_alist;
extern Lisp_Object Vvertical_centering_font_regexp;
+extern Lisp_Object Votf_script_alist;
-/* Load a font named FONTNAME for displaying character C. All fonts
- for frame F is stored in a table pointed by FONT_TABLE. Return a
- pointer to the struct font_info of the loaded font. If loading
- fails, return 0; If FONTNAME is NULL, the name is taken from the
- information of FONTSET. If FONTSET is given, try to load a font
- whose size matches that of FONTSET, and, the font index is stored
- in the table for FONTSET. */
+/* Load a font named FONTNAME on frame F. All fonts for frame F is
+ stored in a table pointed by FONT_TABLE. Return a pointer to the
+ struct font_info of the loaded font. If loading fails, return
+ NULL. */
-#define FS_LOAD_FONT(f, c, fontname, fontset) \
- fs_load_font (f, c, fontname, fontset, NULL)
+#define FS_LOAD_FONT(f, fontname) fs_load_font (f, fontname, -1)
-#define FS_LOAD_FACE_FONT(f, c, fontname, face) \
- fs_load_font (f, c, fontname, -1, face)
/* Return an immutable id for font_info FONT_INFO on frame F. The
reason for this macro is hat one cannot hold pointers to font_info
@@ -241,10 +244,26 @@ extern Lisp_Object Vvertical_centering_font_regexp;
? (FRAME_X_DISPLAY_INFO ((F))->font_table + (ID)) \
: 0)
+#ifdef USE_FONT_BACKEND
+#define FONT_INFO_FROM_FACE(F, FACE) \
+ (enable_font_backend ? (FACE)->font_info \
+ : FONT_INFO_FROM_ID ((F), (FACE)->font_info_id))
+#else /* not USE_FONT_BACKEND */
+#define FONT_INFO_FROM_FACE(F, FACE) \
+ FONT_INFO_FROM_ID ((F), (FACE)->font_info_id)
+#endif /* not USE_FONT_BACKEND */
+
extern Lisp_Object fontset_name P_ ((int));
extern Lisp_Object fontset_ascii P_ ((int));
extern int fontset_height P_ ((int));
+#ifdef USE_FONT_BACKEND
+struct font;
+extern int face_for_font P_ ((struct frame *, struct font *, struct face *));
+extern int new_fontset_from_font P_ ((Lisp_Object));
+extern struct font *fontset_ascii_font P_ ((FRAME_PTR, int));
+#endif /* USE_FONT_BACKEND */
+
#endif /* EMACS_FONTSET_H */
/* arch-tag: c27cef7b-3cab-488a-8398-7a4daa96bb77
diff --git a/src/frame.c b/src/frame.c
index 1fdeb129f89..1f8c173b9db 100644
--- a/src/frame.c
+++ b/src/frame.c
@@ -1,5 +1,5 @@
/* Generic frame functions.
- Copyright (C) 1993, 1994, 1995, 1997, 1999, 2000, 2001, 2002, 2003,
+ Copyright (C) 1993, 1994, 1995, 1997, 1999, 2000, 2001, 2002, 2003, 2006,
2004, 2005, 2006 Free Software Foundation, Inc.
This file is part of GNU Emacs.
@@ -23,7 +23,7 @@ Boston, MA 02110-1301, USA. */
#include <stdio.h>
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#ifdef HAVE_X_WINDOWS
#include "xterm.h"
#endif
@@ -53,6 +53,10 @@ Boston, MA 02110-1301, USA. */
#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+
/* The name we're using in resource queries. Most often "emacs". */
Lisp_Object Vx_resource_name;
@@ -299,6 +303,9 @@ make_frame (mini_p)
#endif
f->size_hint_flags = 0;
f->win_gravity = 0;
+#ifdef USE_FONT_BACKEND
+ f->font_driver_list = NULL;
+#endif /* USE_FONT_BACKEND */
root_window = make_window ();
if (mini_p)
@@ -1935,7 +1942,7 @@ store_in_alist (alistptr, prop, val)
static int
frame_name_fnn_p (str, len)
char *str;
- int len;
+ EMACS_INT len;
{
if (len > 1 && str[0] == 'F')
{
@@ -3066,20 +3073,60 @@ x_set_font (f, arg, oldval)
Lisp_Object frame;
int old_fontset = FRAME_FONTSET(f);
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ int fontset = -1;
+ Lisp_Object font_object;
+
+ /* ARG is a fontset name, a font name, or a font object.
+ In the last case, this function never fail. */
+ if (STRINGP (arg))
+ {
+ fontset = fs_query_fontset (arg, Qnil);
+ if (fontset < 0)
+ font_object = font_open_by_name (f, SDATA (arg));
+ else if (fontset > 0)
+ {
+ Lisp_Object ascii_font = fontset_ascii (fontset);
+
+ font_object = font_open_by_name (f, SDATA (ascii_font));
+ }
+ }
+ else
+ font_object = arg;
+
+ if (fontset < 0 && ! NILP (font_object))
+ fontset = new_fontset_from_font (font_object);
+
+ if (fontset == 0)
+ /* Refuse the default fontset. */
+ result = Qt;
+ else if (NILP (font_object))
+ result = Qnil;
+ else
+ result = x_new_fontset2 (f, fontset, font_object);
+ }
+ else
+ {
+#endif /* USE_FONT_BACKEND */
CHECK_STRING (arg);
fontset_name = Fquery_fontset (arg, Qnil);
BLOCK_INPUT;
result = (STRINGP (fontset_name)
- ? x_new_fontset (f, SDATA (fontset_name))
- : x_new_font (f, SDATA (arg)));
+ ? x_new_fontset (f, fontset_name)
+ : x_new_fontset (f, arg));
UNBLOCK_INPUT;
+#ifdef USE_FONT_BACKEND
+ }
+#endif
if (EQ (result, Qnil))
error ("Font `%s' is not defined", SDATA (arg));
else if (EQ (result, Qt))
- error ("The characters of the given font have varying widths");
+ error ("The default fontset can't be used for a frame font");
else if (STRINGP (result))
{
set_default_ascii_font (result);
@@ -3090,7 +3137,9 @@ x_set_font (f, arg, oldval)
if (old_fontset == FRAME_FONTSET (f))
return;
}
- else if (!NILP (Fequal (result, oldval)))
+ store_frame_param (f, Qfont, result);
+
+ if (!NILP (Fequal (result, oldval)))
return;
/* Recalculate toolbar height. */
@@ -3098,7 +3147,6 @@ x_set_font (f, arg, oldval)
/* Ensure we redraw it. */
clear_current_matrices (f);
- store_frame_param (f, Qfont, result);
recompute_basic_faces (f);
}
else
diff --git a/src/frame.h b/src/frame.h
index 50e7c0660b4..bbf0c45d79b 100644
--- a/src/frame.h
+++ b/src/frame.h
@@ -93,6 +93,10 @@ extern struct x_output tty_display;
#endif /* ! MSDOS && ! WINDOWSNT && ! MAC_OS */
+#ifdef USE_FONT_BACKEND
+struct font_driver_list;
+#endif /* USE_FONT_BACKEND */
+
struct frame
{
EMACS_INT size;
@@ -256,6 +260,9 @@ struct frame
/* Size of the frame window in pixels. */
int pixel_height, pixel_width;
+ /* Dots per inch of the screen the frame is on. */
+ double resx, resy;
+
/* These many pixels are the difference between the outer window (i.e. the
left and top of the window manager decoration) and FRAME_X_WINDOW. */
int x_pixels_diff, y_pixels_diff;
@@ -301,6 +308,11 @@ struct frame
}
output_data;
+#ifdef USE_FONT_BACKEND
+ /* List of font-drivers available on the frame. */
+ struct font_driver_list *font_driver_list;
+#endif /* USE_FONT_BACKEND */
+
/* Total width of fringes reserved for drawing truncation bitmaps,
continuation bitmaps and alike. The width is in canonical char
units of the frame. This must currently be the case because window
@@ -1043,8 +1055,10 @@ extern void x_set_offset P_ ((struct frame *, int, int, int));
extern void x_wm_set_icon_position P_ ((struct frame *, int, int));
extern Lisp_Object x_new_font P_ ((struct frame *, char *));
-extern Lisp_Object x_new_fontset P_ ((struct frame *, char *));
-
+extern Lisp_Object x_new_fontset P_ ((struct frame *, Lisp_Object));
+#ifdef USE_FONT_BACKEND
+extern Lisp_Object x_new_fontset2 P_ ((struct frame *, int, Lisp_Object));
+#endif /* USE_FONT_BACKEND */
/* These are in frame.c */
diff --git a/src/fringe.c b/src/fringe.c
index 033832f7bb6..a42c2d70439 100644
--- a/src/fringe.c
+++ b/src/fringe.c
@@ -585,7 +585,7 @@ draw_fringe_bitmap_1 (w, row, left_p, overlay, which)
Lisp_Object face;
if ((face = fringe_faces[which], NILP (face))
- || (face_id = lookup_derived_face (f, face, 'A', FRINGE_FACE_ID, 0),
+ || (face_id = lookup_derived_face (f, face, FRINGE_FACE_ID, 0),
face_id < 0))
face_id = FRINGE_FACE_ID;
}
@@ -1554,7 +1554,7 @@ If FACE is nil, reset face to default fringe face. */)
if (!NILP (face))
{
face_id = lookup_derived_face (SELECTED_FRAME (), face,
- 'A', FRINGE_FACE_ID, 1);
+ FRINGE_FACE_ID, 1);
if (face_id < 0)
error ("No such face");
}
diff --git a/src/ftfont.c b/src/ftfont.c
new file mode 100644
index 00000000000..37e4f2563b4
--- /dev/null
+++ b/src/ftfont.c
@@ -0,0 +1,912 @@
+/* ftfont.c -- FreeType font driver.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdio.h>
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include FT_SIZES_H
+#include <fontconfig/fontconfig.h>
+#include <fontconfig/fcfreetype.h>
+
+#include "lisp.h"
+#include "dispextern.h"
+#include "frame.h"
+#include "blockinput.h"
+#include "character.h"
+#include "charset.h"
+#include "coding.h"
+#include "fontset.h"
+#include "font.h"
+
+/* Symbolic type of this font-driver. */
+Lisp_Object Qfreetype;
+
+/* Fontconfig's generic families and their aliases. */
+static Lisp_Object Qmonospace, Qsans_serif, Qserif, Qmono, Qsans, Qsans__serif;
+
+/* Flag to tell if FcInit is areadly called or not. */
+static int fc_initialized;
+
+/* Handle to a FreeType library instance. */
+static FT_Library ft_library;
+
+/* Cache for FreeType fonts. */
+static Lisp_Object freetype_font_cache;
+
+/* Fontconfig's charset used for finding fonts of registry
+ "iso8859-1". */
+static FcCharSet *cs_iso8859_1;
+
+/* The actual structure for FreeType font that can be casted to struct
+ font. */
+
+struct ftfont_info
+{
+ struct font font;
+ FT_Size ft_size;
+};
+
+static int ftfont_build_basic_charsets P_ ((void));
+static Lisp_Object ftfont_pattern_entity P_ ((FcPattern *,
+ Lisp_Object, Lisp_Object));
+static Lisp_Object ftfont_list_generic_family P_ ((Lisp_Object, Lisp_Object,
+ Lisp_Object));
+
+#define SYMBOL_FcChar8(SYM) (FcChar8 *) SDATA (SYMBOL_NAME (SYM))
+
+static int
+ftfont_build_basic_charsets ()
+{
+ FcChar32 c;
+
+ cs_iso8859_1 = FcCharSetCreate ();
+ if (! cs_iso8859_1)
+ return -1;
+ for (c = ' '; c < 127; c++)
+ if (! FcCharSetAddChar (cs_iso8859_1, c))
+ return -1;
+#if 0
+ /* This part is currently disabled. Should be fixed later. */
+ for (c = 192; c < 256; c++)
+ if (! FcCharSetAddChar (cs_iso8859_1, c))
+ return -1;
+#endif
+ return 0;
+}
+
+static Lisp_Object
+ftfont_pattern_entity (p, frame, registry)
+ FcPattern *p;
+ Lisp_Object frame, registry;
+{
+ Lisp_Object entity;
+ FcChar8 *file;
+ FcCharSet *charset;
+ char *str;
+ int numeric;
+ double dbl;
+
+ if (FcPatternGetString (p, FC_FILE, 0, &file) != FcResultMatch)
+ return Qnil;
+ if (FcPatternGetCharSet (p, FC_CHARSET, 0, &charset) != FcResultMatch)
+ charset = NULL;
+
+ entity = Fmake_vector (make_number (FONT_ENTITY_MAX), null_string);
+
+ ASET (entity, FONT_TYPE_INDEX, Qfreetype);
+ ASET (entity, FONT_REGISTRY_INDEX, registry);
+ ASET (entity, FONT_FRAME_INDEX, frame);
+ ASET (entity, FONT_OBJLIST_INDEX, Qnil);
+
+ if (FcPatternGetString (p, FC_FOUNDRY, 0, (FcChar8 **) &str) == FcResultMatch)
+ ASET (entity, FONT_FOUNDRY_INDEX, intern_downcase (str, strlen (str)));
+ if (FcPatternGetString (p, FC_FAMILY, 0, (FcChar8 **) &str) == FcResultMatch)
+ ASET (entity, FONT_FAMILY_INDEX, intern_downcase (str, strlen (str)));
+ if (FcPatternGetInteger (p, FC_WEIGHT, 0, &numeric) == FcResultMatch)
+ {
+ if (numeric == FC_WEIGHT_REGULAR)
+ numeric = 100;
+ ASET (entity, FONT_WEIGHT_INDEX, make_number (numeric));
+ }
+ if (FcPatternGetInteger (p, FC_SLANT, 0, &numeric) == FcResultMatch)
+ ASET (entity, FONT_SLANT_INDEX, make_number (numeric + 100));
+ if (FcPatternGetInteger (p, FC_WIDTH, 0, &numeric) == FcResultMatch)
+ ASET (entity, FONT_WIDTH_INDEX, make_number (numeric));
+ if (FcPatternGetDouble (p, FC_PIXEL_SIZE, 0, &dbl) == FcResultMatch)
+ ASET (entity, FONT_SIZE_INDEX, make_number (dbl));
+ else
+ ASET (entity, FONT_SIZE_INDEX, make_number (0));
+
+ if (FcPatternGetInteger (p, FC_SPACING, 0, &numeric) != FcResultMatch)
+ numeric = FC_MONO;
+ file = FcStrCopy (file);
+ if (! file)
+ return Qnil;
+
+ p = FcPatternCreate ();
+ if (! p)
+ return Qnil;
+
+ if (FcPatternAddString (p, FC_FILE, file) == FcFalse
+ || (charset && FcPatternAddCharSet (p, FC_CHARSET, charset) == FcFalse)
+ || FcPatternAddInteger (p, FC_SPACING, numeric) == FcFalse)
+ {
+ FcPatternDestroy (p);
+ return Qnil;
+ }
+ ASET (entity, FONT_EXTRA_INDEX, make_save_value (p, 0));
+ return entity;
+}
+
+static Lisp_Object ftfont_generic_family_list;
+
+static Lisp_Object
+ftfont_list_generic_family (spec, frame, registry)
+ Lisp_Object spec, frame, registry;
+{
+ Lisp_Object family = AREF (spec, FONT_FAMILY_INDEX);
+ Lisp_Object slot, list, val;
+
+ if (EQ (family, Qmono))
+ family = Qmonospace;
+ else if (EQ (family, Qsans) || EQ (family, Qsans__serif))
+ family = Qsans_serif;
+ slot = assq_no_quit (family, ftfont_generic_family_list);
+ if (! CONSP (slot))
+ return null_vector;
+ list = XCDR (slot);
+ if (EQ (list, Qt))
+ {
+ /* Not yet listed. */
+ FcObjectSet *objset = NULL;
+ FcPattern *pattern = NULL, *pat = NULL;
+ FcFontSet *fontset = NULL;
+ FcChar8 *fam;
+ int i, j;
+
+ objset = FcObjectSetBuild (FC_FOUNDRY, FC_FAMILY, FC_WEIGHT, FC_SLANT,
+ FC_WIDTH, FC_PIXEL_SIZE, FC_SPACING,
+ FC_CHARSET, FC_FILE, NULL);
+ if (! objset)
+ goto err;
+ pattern = FcPatternBuild (NULL, FC_FAMILY, FcTypeString,
+ SYMBOL_FcChar8 (family), (char *) 0);
+ if (! pattern)
+ goto err;
+ pat = FcPatternCreate ();
+ if (! pat)
+ goto err;
+ FcConfigSubstitute (NULL, pattern, FcMatchPattern);
+ for (i = 0, val = Qnil;
+ FcPatternGetString (pattern, FC_FAMILY, i, &fam) == FcResultMatch;
+ i++)
+ {
+ if (strcmp ((char *) fam, (char *) SYMBOL_FcChar8 (family)) == 0)
+ continue;
+ if (! FcPatternAddString (pat, FC_FAMILY, fam))
+ goto err;
+ fontset = FcFontList (NULL, pat, objset);
+ if (! fontset)
+ goto err;
+ /* Here we build the list in reverse order so that the last
+ loop in this function build a list in the correct
+ order. */
+ for (j = 0; j < fontset->nfont; j++)
+ {
+ Lisp_Object entity;
+
+ entity = ftfont_pattern_entity (fontset->fonts[j],
+ frame, registry);
+ if (! NILP (entity))
+ val = Fcons (entity, val);
+ }
+ FcFontSetDestroy (fontset);
+ fontset = NULL;
+ FcPatternDel (pat, FC_FAMILY);
+ }
+ list = val;
+ XSETCDR (slot, list);
+ err:
+ if (pat) FcPatternDestroy (pat);
+ if (pattern) FcPatternDestroy (pattern);
+ if (fontset) FcFontSetDestroy (fontset);
+ if (objset) FcObjectSetDestroy (objset);
+ if (EQ (list, Qt))
+ return Qnil;
+ }
+ ASET (spec, FONT_FAMILY_INDEX, Qnil);
+ for (val = Qnil; CONSP (list); list = XCDR (list))
+ if (font_match_p (spec, XCAR (list)))
+ val = Fcons (XCAR (list), val);
+ ASET (spec, FONT_FAMILY_INDEX, family);
+ return Fvconcat (1, &val);
+}
+
+
+static Lisp_Object ftfont_get_cache P_ ((Lisp_Object));
+static Lisp_Object ftfont_list P_ ((Lisp_Object, Lisp_Object));
+static Lisp_Object ftfont_list_family P_ ((Lisp_Object));
+static void ftfont_free_entity P_ ((Lisp_Object));
+static struct font *ftfont_open P_ ((FRAME_PTR, Lisp_Object, int));
+static void ftfont_close P_ ((FRAME_PTR, struct font *));
+static int ftfont_has_char P_ ((Lisp_Object, int));
+static unsigned ftfont_encode_char P_ ((struct font *, int));
+static int ftfont_text_extents P_ ((struct font *, unsigned *, int,
+ struct font_metrics *));
+static int ftfont_get_bitmap P_ ((struct font *, unsigned,
+ struct font_bitmap *, int));
+static int ftfont_anchor_point P_ ((struct font *, unsigned, int,
+ int *, int *));
+
+struct font_driver ftfont_driver =
+ {
+ (Lisp_Object) NULL, /* Qfreetype */
+ ftfont_get_cache,
+ ftfont_list,
+ ftfont_list_family,
+ ftfont_free_entity,
+ ftfont_open,
+ ftfont_close,
+ /* We can't draw a text without device dependent functions. */
+ NULL,
+ NULL,
+ ftfont_has_char,
+ ftfont_encode_char,
+ ftfont_text_extents,
+ /* We can't draw a text without device dependent functions. */
+ NULL,
+ ftfont_get_bitmap,
+ NULL,
+ NULL,
+ NULL,
+ ftfont_anchor_point,
+#ifdef HAVE_LIBOTF
+ font_otf_capability,
+ font_otf_gsub,
+ font_otf_gpos
+#else
+ NULL,
+ NULL,
+ NULL
+#endif /* HAVE_LIBOTF */
+ };
+
+extern Lisp_Object QCname;
+
+static Lisp_Object
+ftfont_get_cache (frame)
+ Lisp_Object frame;
+{
+ return freetype_font_cache;
+}
+
+static Lisp_Object
+ftfont_list (frame, spec)
+ Lisp_Object frame, spec;
+{
+ Lisp_Object val, tmp, extra, font_name;
+ int i;
+ FcPattern *pattern = NULL;
+ FcCharSet *charset = NULL;
+ FcLangSet *langset = NULL;
+ FcFontSet *fontset = NULL;
+ FcObjectSet *objset = NULL;
+ Lisp_Object registry = Qunicode_bmp;
+ int weight = 0;
+ double dpi = -1;
+ int spacing = -1;
+ int scalable = -1;
+ char otf_script[15]; /* For "otlayout\:XXXX" */
+
+ val = null_vector;
+
+ if (! fc_initialized)
+ {
+ FcInit ();
+ fc_initialized = 1;
+ }
+
+ if (! NILP (AREF (spec, FONT_ADSTYLE_INDEX))
+ && ! EQ (AREF (spec, FONT_ADSTYLE_INDEX), null_string))
+ return val;
+ if (! NILP (AREF (spec, FONT_SLANT_INDEX))
+ && XINT (AREF (spec, FONT_SLANT_INDEX)) < 100)
+ /* Fontconfig doesn't support reverse-italic/obligue. */
+ return val;
+
+ if (! NILP (AREF (spec, FONT_REGISTRY_INDEX)))
+ {
+ registry = AREF (spec, FONT_REGISTRY_INDEX);
+ if (EQ (registry, Qiso8859_1))
+ {
+ if (! cs_iso8859_1
+ && ftfont_build_basic_charsets () < 0)
+ return Qnil;
+ charset = cs_iso8859_1;
+ }
+ else if (! EQ (registry, Qiso10646_1) && ! EQ (registry, Qunicode_bmp))
+ return val;
+ }
+
+ extra = AREF (spec, FONT_EXTRA_INDEX);
+ font_name = Qnil;
+ otf_script[0] = '\0';
+ if (CONSP (extra))
+ {
+ Lisp_Object script = Qnil;
+
+ tmp = assq_no_quit (QCname, extra);
+ if (CONSP (tmp) && STRINGP (XCDR (tmp))
+ && SDATA (XCDR (tmp))[0] == ':')
+ font_name = XCDR (tmp);
+ tmp = assq_no_quit (QCotf, extra);
+ if (CONSP (tmp) && SYMBOLP (XCDR (tmp)))
+ {
+ tmp = XCDR (tmp);
+ script = assq_no_quit (tmp, Votf_script_alist);
+ if (CONSP (script) && SYMBOLP (XCDR (script)))
+ script = XCDR (script);
+ tmp = SYMBOL_NAME (tmp);
+ sprintf (otf_script, "otlayout:%s", (char *) SDATA (tmp));
+ }
+ tmp = assq_no_quit (QClanguage, extra);
+ if (CONSP (tmp))
+ {
+ langset = FcLangSetCreate ();
+ if (! langset)
+ goto err;
+ tmp = XCDR (tmp);
+ if (SYMBOLP (tmp))
+ {
+ if (! FcLangSetAdd (langset, SYMBOL_FcChar8 (tmp)))
+ goto err;
+ }
+ else
+ while (CONSP (tmp))
+ {
+ if (SYMBOLP (XCAR (tmp))
+ && ! FcLangSetAdd (langset, SYMBOL_FcChar8 (XCAR (tmp))))
+ goto err;
+ tmp = XCDR (tmp);
+ }
+ }
+ tmp = assq_no_quit (QCscript, extra);
+ if (CONSP (tmp))
+ script = XCDR (tmp);
+ if (! NILP (script) && ! charset)
+ {
+ Lisp_Object chars
+ = assq_no_quit (script, Vscript_representative_chars);
+
+ if (CONSP (chars))
+ {
+ charset = FcCharSetCreate ();
+ if (! charset)
+ goto err;
+ for (chars = XCDR (chars); CONSP (chars); chars = XCDR (chars))
+ if (CHARACTERP (XCAR (chars))
+ && ! FcCharSetAddChar (charset, XUINT (XCAR (chars))))
+ goto err;
+ }
+ }
+ tmp = assq_no_quit (QCdpi, extra);
+ if (CONSP (tmp))
+ dpi = XINT (XCDR (tmp));
+ tmp = assq_no_quit (QCspacing, extra);
+ if (CONSP (tmp))
+ spacing = XINT (XCDR (tmp));
+ tmp = assq_no_quit (QCscalable, extra);
+ if (CONSP (tmp))
+ scalable = ! NILP (XCDR (tmp));
+ }
+
+ if (STRINGP (font_name))
+ pattern = FcNameParse (SDATA (font_name));
+ else
+ pattern = FcPatternCreate ();
+ if (! pattern)
+ goto err;
+
+ tmp = AREF (spec, FONT_FOUNDRY_INDEX);
+ if (SYMBOLP (tmp) && ! NILP (tmp)
+ && ! FcPatternAddString (pattern, FC_FOUNDRY, SYMBOL_FcChar8 (tmp)))
+ goto err;
+ tmp = AREF (spec, FONT_FAMILY_INDEX);
+ if (SYMBOLP (tmp) && ! NILP (tmp)
+ && ! FcPatternAddString (pattern, FC_FAMILY, SYMBOL_FcChar8 (tmp)))
+ goto err;
+ /* Emacs conventionally doesn't distinguish normal, regular, and
+ medium weight, but fontconfig does. So, we can't restrict font
+ listing by weight. We check it after getting a list. */
+ tmp = AREF (spec, FONT_WEIGHT_INDEX);
+ if (INTEGERP (tmp))
+ weight = XINT (tmp);
+ tmp = AREF (spec, FONT_SLANT_INDEX);
+ if (INTEGERP (tmp)
+ && ! FcPatternAddInteger (pattern, FC_SLANT, XINT (tmp) - 100))
+ goto err;
+ tmp = AREF (spec, FONT_WIDTH_INDEX);
+ if (INTEGERP (tmp)
+ && ! FcPatternAddInteger (pattern, FC_WIDTH, XINT (tmp)))
+ goto err;
+
+ if (charset
+ && ! FcPatternAddCharSet (pattern, FC_CHARSET, charset))
+ goto err;
+ if (langset
+ && ! FcPatternAddLangSet (pattern, FC_LANG, langset))
+ goto err;
+ if (dpi >= 0
+ && ! FcPatternAddDouble (pattern, FC_DPI, dpi))
+ goto err;
+ if (spacing >= 0
+ && ! FcPatternAddInteger (pattern, FC_SPACING, spacing))
+ goto err;
+ if (scalable >= 0
+ && ! FcPatternAddBool (pattern, FC_SCALABLE, scalable ? FcTrue : FcFalse))
+ goto err;
+
+ objset = FcObjectSetBuild (FC_FOUNDRY, FC_FAMILY, FC_WEIGHT, FC_SLANT,
+ FC_WIDTH, FC_PIXEL_SIZE, FC_SPACING,
+ FC_CHARSET, FC_FILE, NULL);
+ if (! objset)
+ goto err;
+ if (otf_script[0])
+ {
+#ifndef FC_CAPABILITY
+ goto finish;
+#else /* not FC_CAPABILITY */
+ if (! FcObjectSetAdd (objset, FC_CAPABILITY))
+ goto err;
+#endif /* not FC_CAPABILITY */
+ }
+
+ fontset = FcFontList (NULL, pattern, objset);
+ if (! fontset)
+ goto err;
+
+ if (fontset->nfont > 0)
+ {
+ double pixel_size;
+
+ if (NILP (AREF (spec, FONT_SIZE_INDEX)))
+ pixel_size = 0;
+ else
+ pixel_size = XINT (AREF (spec, FONT_SIZE_INDEX));
+
+ for (i = 0, val = Qnil; i < fontset->nfont; i++)
+ {
+ Lisp_Object entity;
+
+ if (pixel_size > 0)
+ {
+ double this;
+
+ if (FcPatternGetDouble (fontset->fonts[i], FC_PIXEL_SIZE, 0,
+ &this) == FcResultMatch
+ && ((this < pixel_size - FONT_PIXEL_SIZE_QUANTUM)
+ || (this > pixel_size + FONT_PIXEL_SIZE_QUANTUM)))
+ continue;
+ }
+ if (weight > 0)
+ {
+ int this;
+
+ if (FcPatternGetInteger (fontset->fonts[i], FC_WEIGHT, 0,
+ &this) != FcResultMatch
+ || (this != weight
+ && (weight != 100
+ || this < FC_WEIGHT_REGULAR
+ || this > FC_WEIGHT_MEDIUM)))
+ continue;
+ }
+#ifdef FC_CAPABILITY
+ if (otf_script[0])
+ {
+ FcChar8 *this;
+
+ if (FcPatternGetString (fontset->fonts[i], FC_CAPABILITY, 0,
+ &this) != FcResultMatch
+ || ! strstr ((char *) this, otf_script))
+ continue;
+ }
+#endif /* FC_CAPABILITY */
+ entity = ftfont_pattern_entity (fontset->fonts[i], frame, registry);
+ if (! NILP (entity))
+ val = Fcons (entity, val);
+ }
+ val = Fvconcat (1, &val);
+ }
+ else if (! NILP (AREF (spec, FONT_FAMILY_INDEX)))
+ val = ftfont_list_generic_family (spec, frame, registry);
+ goto finish;
+
+ err:
+ /* We come here because of unexpected error in fontconfig API call
+ (usually insufficient memory). */
+ val = Qnil;
+
+ finish:
+ if (charset && charset != cs_iso8859_1) FcCharSetDestroy (charset);
+ if (objset) FcObjectSetDestroy (objset);
+ if (fontset) FcFontSetDestroy (fontset);
+ if (langset) FcLangSetDestroy (langset);
+ if (pattern) FcPatternDestroy (pattern);
+
+ return val;
+}
+
+static Lisp_Object
+ftfont_list_family (frame)
+ Lisp_Object frame;
+{
+ Lisp_Object list;
+ FcPattern *pattern = NULL;
+ FcFontSet *fontset = NULL;
+ FcObjectSet *objset = NULL;
+ int i;
+
+ if (! fc_initialized)
+ {
+ FcInit ();
+ fc_initialized = 1;
+ }
+
+ pattern = FcPatternCreate ();
+ if (! pattern)
+ goto finish;
+ objset = FcObjectSetBuild (FC_FAMILY, NULL);
+ if (! objset)
+ goto finish;
+ fontset = FcFontList (NULL, pattern, objset);
+ if (! fontset)
+ goto finish;
+
+ list = Qnil;
+ for (i = 0; i < fontset->nfont; i++)
+ {
+ FcPattern *pat = fontset->fonts[i];
+ FcChar8 *str;
+
+ if (FcPatternGetString (pat, FC_FAMILY, 0, &str) == FcResultMatch)
+ list = Fcons (intern_downcase ((char *) str, strlen ((char *) str)),
+ list);
+ }
+
+ finish:
+ if (objset) FcObjectSetDestroy (objset);
+ if (fontset) FcFontSetDestroy (fontset);
+ if (pattern) FcPatternDestroy (pattern);
+
+ return list;
+}
+
+
+static void
+ftfont_free_entity (entity)
+ Lisp_Object entity;
+{
+ Lisp_Object val = AREF (entity, FONT_EXTRA_INDEX);
+ FcPattern *pattern = XSAVE_VALUE (val)->pointer;
+
+ FcPatternDestroy (pattern);
+}
+
+static struct font *
+ftfont_open (f, entity, pixel_size)
+ FRAME_PTR f;
+ Lisp_Object entity;
+ int pixel_size;
+{
+ struct ftfont_info *ftfont_info;
+ struct font *font;
+ FT_Face ft_face;
+ FT_Size ft_size;
+ FT_UInt size;
+ Lisp_Object val;
+ FcPattern *pattern;
+ FcChar8 *file;
+ int spacing;
+
+ val = AREF (entity, FONT_EXTRA_INDEX);
+ if (XTYPE (val) != Lisp_Misc
+ || XMISCTYPE (val) != Lisp_Misc_Save_Value)
+ return NULL;
+ pattern = XSAVE_VALUE (val)->pointer;
+ if (XSAVE_VALUE (val)->integer == 0)
+ {
+ /* We have not yet created FT_Face for this font. */
+ if (! ft_library
+ && FT_Init_FreeType (&ft_library) != 0)
+ return NULL;
+ if (FcPatternGetString (pattern, FC_FILE, 0, &file) != FcResultMatch)
+ return NULL;
+ if (FT_New_Face (ft_library, (char *) file, 0, &ft_face) != 0)
+ return NULL;
+ FcPatternAddFTFace (pattern, FC_FT_FACE, ft_face);
+ ft_size = ft_face->size;
+ }
+ else
+ {
+ if (FcPatternGetFTFace (pattern, FC_FT_FACE, 0, &ft_face)
+ != FcResultMatch)
+ return NULL;
+ if (FT_New_Size (ft_face, &ft_size) != 0)
+ return NULL;
+ if (FT_Activate_Size (ft_size) != 0)
+ {
+ FT_Done_Size (ft_size);
+ return NULL;
+ }
+ }
+
+ size = XINT (AREF (entity, FONT_SIZE_INDEX));
+ if (size == 0)
+ size = pixel_size;
+ if (FT_Set_Pixel_Sizes (ft_face, size, size) != 0)
+ {
+ if (XSAVE_VALUE (val)->integer == 0)
+ FT_Done_Face (ft_face);
+ return NULL;
+ }
+
+ ftfont_info = malloc (sizeof (struct ftfont_info));
+ if (! ftfont_info)
+ return NULL;
+ ftfont_info->ft_size = ft_size;
+
+ font = (struct font *) ftfont_info;
+ font->entity = entity;
+ font->pixel_size = size;
+ font->driver = &ftfont_driver;
+ font->font.name = font->font.full_name = NULL;
+ font->file_name = (char *) file;
+ font->font.size = ft_face->size->metrics.max_advance >> 6;
+ font->font.charset = font->encoding_charset = font->repertory_charset = -1;
+ font->ascent = ft_face->size->metrics.ascender >> 6;
+ font->descent = - ft_face->size->metrics.descender >> 6;
+ font->font.height = ft_face->size->metrics.height >> 6;
+ if (FcPatternGetInteger (pattern, FC_SPACING, 0, &spacing) != FcResultMatch
+ || spacing != FC_PROPORTIONAL)
+ font->font.average_width = font->font.space_width = font->font.size;
+ else
+ {
+ int i;
+
+ for (i = 32; i < 127; i++)
+ {
+ if (FT_Load_Char (ft_face, i, FT_LOAD_DEFAULT) != 0)
+ break;
+ if (i == 32)
+ font->font.space_width = ft_face->glyph->metrics.horiAdvance >> 6;
+ font->font.average_width += ft_face->glyph->metrics.horiAdvance >> 6;
+ }
+ if (i == 127)
+ {
+ /* The font contains all ASCII printable characters. */
+ font->font.average_width /= 95;
+ }
+ else
+ {
+ if (i == 32)
+ font->font.space_width = font->font.size;
+ font->font.average_width = font->font.size;
+ }
+ }
+
+ font->font.baseline_offset = 0;
+ font->font.relative_compose = 0;
+ font->font.default_ascent = 0;
+ font->font.vertical_centering = 0;
+
+ (XSAVE_VALUE (val)->integer)++;
+
+ return font;
+}
+
+static void
+ftfont_close (f, font)
+ FRAME_PTR f;
+ struct font *font;
+{
+ struct ftfont_info *ftfont_info = (struct ftfont_info *) font;
+ Lisp_Object entity = font->entity;
+ Lisp_Object val = AREF (entity, FONT_EXTRA_INDEX);
+
+ (XSAVE_VALUE (val)->integer)--;
+ if (XSAVE_VALUE (val)->integer == 0)
+ FT_Done_Face (ftfont_info->ft_size->face);
+ else
+ FT_Done_Size (ftfont_info->ft_size);
+
+ free (font);
+}
+
+static int
+ftfont_has_char (entity, c)
+ Lisp_Object entity;
+ int c;
+{
+ Lisp_Object val;
+ FcPattern *pattern;
+ FcCharSet *charset;
+
+ val = AREF (entity, FONT_EXTRA_INDEX);
+ pattern = XSAVE_VALUE (val)->pointer;
+ if (FcPatternGetCharSet (pattern, FC_CHARSET, 0, &charset) != FcResultMatch)
+ return -1;
+ return (FcCharSetHasChar (charset, (FcChar32) c) == FcTrue);
+}
+
+static unsigned
+ftfont_encode_char (font, c)
+ struct font *font;
+ int c;
+{
+ struct ftfont_info *ftfont_info = (struct ftfont_info *) font;
+ FT_Face ft_face = ftfont_info->ft_size->face;
+ FT_ULong charcode = c;
+ FT_UInt code = FT_Get_Char_Index (ft_face, charcode);
+
+ return (code > 0 ? code : 0xFFFFFFFF);
+}
+
+static int
+ftfont_text_extents (font, code, nglyphs, metrics)
+ struct font *font;
+ unsigned *code;
+ int nglyphs;
+ struct font_metrics *metrics;
+{
+ struct ftfont_info *ftfont_info = (struct ftfont_info *) font;
+ FT_Face ft_face = ftfont_info->ft_size->face;
+ int width = 0;
+ int i;
+
+ if (ftfont_info->ft_size != ft_face->size)
+ FT_Activate_Size (ftfont_info->ft_size);
+ if (metrics)
+ bzero (metrics, sizeof (struct font_metrics));
+ for (i = 0; i < nglyphs; i++)
+ {
+ if (FT_Load_Glyph (ft_face, code[i], FT_LOAD_DEFAULT) == 0)
+ {
+ FT_Glyph_Metrics *m = &ft_face->glyph->metrics;
+
+ if (metrics)
+ {
+ if (metrics->lbearing > width + (m->horiBearingX >> 6))
+ metrics->lbearing = width + (m->horiBearingX >> 6);
+ if (metrics->rbearing
+ < width + ((m->horiBearingX + m->width) >> 6))
+ metrics->rbearing
+ = width + ((m->horiBearingX + m->width) >> 6);
+ if (metrics->ascent < (m->horiBearingY >> 6))
+ metrics->ascent = m->horiBearingY >> 6;
+ if (metrics->descent > ((m->horiBearingY + m->height) >> 6))
+ metrics->descent = (m->horiBearingY + m->height) >> 6;
+ }
+ width += m->horiAdvance >> 6;
+ }
+ else
+ {
+ width += font->font.space_width;
+ }
+ }
+ if (metrics)
+ metrics->width = width;
+
+ return width;
+}
+
+static int
+ftfont_get_bitmap (font, code, bitmap, bits_per_pixel)
+ struct font *font;
+ unsigned code;
+ struct font_bitmap *bitmap;
+ int bits_per_pixel;
+{
+ struct ftfont_info *ftfont_info = (struct ftfont_info *) font;
+ FT_Face ft_face = ftfont_info->ft_size->face;
+ FT_Int32 load_flags = FT_LOAD_RENDER;
+
+ if (ftfont_info->ft_size != ft_face->size)
+ FT_Activate_Size (ftfont_info->ft_size);
+ if (bits_per_pixel == 1)
+ {
+#ifdef FT_LOAD_TARGET_MONO
+ load_flags |= FT_LOAD_TARGET_MONO;
+#else
+ load_flags |= FT_LOAD_MONOCHROME;
+#endif
+ }
+ else if (bits_per_pixel != 8)
+ /* We don't support such a rendering. */
+ return -1;
+
+ if (FT_Load_Glyph (ft_face, code, load_flags) != 0)
+ return -1;
+ bitmap->rows = ft_face->glyph->bitmap.rows;
+ bitmap->width = ft_face->glyph->bitmap.width;
+ bitmap->pitch = ft_face->glyph->bitmap.pitch;
+ bitmap->buffer = ft_face->glyph->bitmap.buffer;
+ bitmap->left = ft_face->glyph->bitmap_left;
+ bitmap->top = ft_face->glyph->bitmap_top;
+ bitmap->advance = ft_face->glyph->metrics.horiAdvance >> 6;
+ bitmap->extra = NULL;
+
+ return 0;
+}
+
+static int
+ftfont_anchor_point (font, code, index, x, y)
+ struct font *font;
+ unsigned code;
+ int index;
+ int *x, *y;
+{
+ struct ftfont_info *ftfont_info = (struct ftfont_info *) font;
+ FT_Face ft_face = ftfont_info->ft_size->face;
+
+ if (ftfont_info->ft_size != ft_face->size)
+ FT_Activate_Size (ftfont_info->ft_size);
+ if (FT_Load_Glyph (ft_face, code, FT_LOAD_DEFAULT) != 0)
+ return -1;
+ if (ft_face->glyph->format != FT_GLYPH_FORMAT_OUTLINE)
+ return -1;
+ if (index >= ft_face->glyph->outline.n_points)
+ return -1;
+ *x = ft_face->glyph->outline.points[index].x;
+ *y = ft_face->glyph->outline.points[index].y;
+ return 0;
+}
+
+
+void
+syms_of_ftfont ()
+{
+ DEFSYM (Qfreetype, "freetype");
+ DEFSYM (Qmonospace, "monospace");
+ DEFSYM (Qsans_serif, "sans-serif");
+ DEFSYM (Qserif, "serif");
+ DEFSYM (Qmono, "mono");
+ DEFSYM (Qsans, "sans");
+ DEFSYM (Qsans__serif, "sans serif");
+
+ staticpro (&freetype_font_cache);
+ freetype_font_cache = Fcons (Qt, Qnil);
+
+ staticpro (&ftfont_generic_family_list);
+ ftfont_generic_family_list
+ = Fcons (Fcons (Qmonospace, Qt),
+ Fcons (Fcons (Qsans_serif, Qt),
+ Fcons (Fcons (Qsans, Qt), Qnil)));
+
+ ftfont_driver.type = Qfreetype;
+ register_font_driver (&ftfont_driver, NULL);
+}
+
+/* arch-tag: 7cfa432c-33a6-4988-83d2-a82ed8604aca
+ (do not change this comment) */
diff --git a/src/ftxfont.c b/src/ftxfont.c
new file mode 100644
index 00000000000..4e91bd50895
--- /dev/null
+++ b/src/ftxfont.c
@@ -0,0 +1,349 @@
+/* ftxfont.c -- FreeType font driver on X (without using XFT).
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdio.h>
+#include <X11/Xlib.h>
+
+#include "lisp.h"
+#include "dispextern.h"
+#include "xterm.h"
+#include "frame.h"
+#include "blockinput.h"
+#include "character.h"
+#include "charset.h"
+#include "fontset.h"
+#include "font.h"
+
+/* FTX font driver. */
+
+static Lisp_Object Qftx;
+
+/* Prototypes for helper function. */
+static int ftxfont_draw_bitmap P_ ((FRAME_PTR, GC *, struct font *, unsigned,
+ int, int, XPoint *, int, int *n));
+static void ftxfont_draw_backgrond P_ ((FRAME_PTR, struct font *, GC,
+ int, int, int));
+
+static int
+ftxfont_draw_bitmap (f, gc, font, code, x, y, p, size, n)
+ FRAME_PTR f;
+ GC *gc;
+ struct font *font;
+ unsigned code;
+ int x, y;
+ XPoint *p;
+ int size, *n;
+{
+ struct font_bitmap bitmap;
+ unsigned char *b;
+ int i, j;
+
+ if (ftfont_driver.get_bitmap (font, code, &bitmap, 1) < 0)
+ return 0;
+ for (i = 0, b = bitmap.buffer; i < bitmap.rows;
+ i++, b += bitmap.pitch)
+ {
+ if (size > 0x100)
+ {
+ for (j = 0; j < bitmap.width; j++)
+ if (b[j / 8] & (1 << (7 - (j % 8))))
+ {
+ p[n[0]].x = x + bitmap.left + j;
+ p[n[0]].y = y - bitmap.top + i;
+ if (++n[0] == 0x400)
+ {
+ XDrawPoints (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f),
+ gc[0], p, size, CoordModeOrigin);
+ n[0] = 0;
+ }
+ }
+ }
+ else
+ {
+ for (j = 0; j < bitmap.width; j++)
+ {
+ int idx = (b[j] >> 5) - 1;
+
+ if (idx >= 0)
+ {
+ XPoint *pp = p + size * idx;
+
+ pp[n[idx]].x = x + bitmap.left + j;
+ pp[n[idx]].y = y - bitmap.top + i;
+ if (++(n[idx]) == 0x100)
+ {
+ XDrawPoints (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f),
+ gc[idx], pp, size, CoordModeOrigin);
+ n[idx] = 0;
+ }
+ }
+ }
+ }
+ }
+
+ if (ftfont_driver.free_bitmap)
+ ftfont_driver.free_bitmap (font, &bitmap);
+
+ return bitmap.advance;
+}
+
+static void
+ftxfont_draw_backgrond (f, font, gc, x, y, width)
+ FRAME_PTR f;
+ struct font *font;
+ GC gc;
+ int x, y, width;
+{
+ XGCValues xgcv;
+
+ XGetGCValues (FRAME_X_DISPLAY (f), gc,
+ GCForeground | GCBackground, &xgcv);
+ XSetForeground (FRAME_X_DISPLAY (f), gc, xgcv.background);
+ XFillRectangle (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f), gc,
+ x, y - font->ascent, width, font->font.height);
+ XSetForeground (FRAME_X_DISPLAY (f), gc, xgcv.foreground);
+}
+
+/* Prototypes for font-driver methods. */
+static Lisp_Object ftxfont_list P_ ((Lisp_Object, Lisp_Object));
+static struct font *ftxfont_open P_ ((FRAME_PTR, Lisp_Object, int));
+static void ftxfont_close P_ ((FRAME_PTR, struct font *));
+static int ftxfont_prepare_face (FRAME_PTR, struct face *);
+static void ftxfont_done_face (FRAME_PTR, struct face *);
+
+static int ftxfont_draw P_ ((struct glyph_string *, int, int, int, int, int));
+
+struct font_driver ftxfont_driver;
+
+static Lisp_Object
+ftxfont_list (frame, spec)
+ Lisp_Object frame;
+ Lisp_Object spec;
+{
+ Lisp_Object val = ftfont_driver.list (frame, spec);
+
+ if (! NILP (val))
+ {
+ int i;
+
+ for (i = 0; i < ASIZE (val); i++)
+ ASET (AREF (val, i), FONT_TYPE_INDEX, Qftx);
+ }
+ return val;
+}
+
+static struct font *
+ftxfont_open (f, entity, pixel_size)
+ FRAME_PTR f;
+ Lisp_Object entity;
+ int pixel_size;
+{
+ Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f);
+ struct font *font;
+ XFontStruct *xfont = malloc (sizeof (XFontStruct));
+
+ if (! xfont)
+ return NULL;
+ font = ftfont_driver.open (f, entity, pixel_size);
+ if (! font)
+ {
+ free (xfont);
+ return NULL;
+ }
+
+ xfont->fid = FRAME_FONT (f)->fid;
+ xfont->ascent = font->ascent;
+ xfont->descent = font->descent;
+ xfont->max_bounds.width = font->font.size;
+ xfont->min_bounds.width = font->min_width;
+ font->font.font = xfont;
+ font->driver = &ftxfont_driver;
+
+ dpyinfo->n_fonts++;
+
+ /* Set global flag fonts_changed_p to non-zero if the font loaded
+ has a character with a smaller width than any other character
+ before, or if the font loaded has a smaller height than any other
+ font loaded before. If this happens, it will make a glyph matrix
+ reallocation necessary. */
+ if (dpyinfo->n_fonts == 1)
+ {
+ dpyinfo->smallest_font_height = font->font.height;
+ dpyinfo->smallest_char_width = font->min_width;
+ fonts_changed_p = 1;
+ }
+ else
+ {
+ if (dpyinfo->smallest_font_height > font->font.height)
+ dpyinfo->smallest_font_height = font->font.height, fonts_changed_p |= 1;
+ if (dpyinfo->smallest_char_width > font->min_width)
+ dpyinfo->smallest_char_width = font->min_width, fonts_changed_p |= 1;
+ }
+
+ return font;
+}
+
+static void
+ftxfont_close (f, font)
+ FRAME_PTR f;
+ struct font *font;
+{
+ ftfont_driver.close (f, font);
+ FRAME_X_DISPLAY_INFO (f)->n_fonts--;
+}
+
+static int
+ftxfont_prepare_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ GC gc[6];
+ XColor colors[3];
+ XGCValues xgcv;
+ unsigned long mask = GCForeground | GCBackground | GCGraphicsExposures;
+ int i;
+
+ face->extra = NULL;
+
+ /* Here, we create 6 more GCs to simulate anti-aliasing. */
+ BLOCK_INPUT;
+ XGetGCValues (FRAME_X_DISPLAY (f), face->gc, mask, &xgcv);
+ colors[0].pixel = face->foreground;
+ colors[1].pixel = face->background;
+ XQueryColors (FRAME_X_DISPLAY (f), FRAME_X_COLORMAP (f), colors, 2);
+ for (i = 1; i < 7; i++)
+ {
+ colors[2].red = (colors[0].red * i + colors[1].red * (7 - i)) / 7;
+ colors[2].green = (colors[0].green * i + colors[1].green * (7 - i)) / 7;
+ colors[2].blue = (colors[0].blue * i + colors[1].blue * (7 - i)) / 7;
+ if (! x_alloc_nearest_color (f, FRAME_X_COLORMAP (f), &colors[2]))
+ break;
+ xgcv.foreground = colors[2].pixel;
+ gc[i - 1] = XCreateGC (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f),
+ mask, &xgcv);
+ }
+ UNBLOCK_INPUT;
+
+ if (i < 7)
+ return -1;
+ face->extra = malloc (sizeof (GC) * 7);
+ if (! face->extra)
+ return -1;
+ for (i = 0; i < 6; i++)
+ ((GC *) face->extra)[i] = gc[i];
+ ((GC *) face->extra)[i] = face->gc;
+ return 0;
+}
+
+static void
+ftxfont_done_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ if (face->extra)
+ {
+ int i;
+
+ BLOCK_INPUT;
+ for (i = 0; i < 7; i++)
+ XFreeGC (FRAME_X_DISPLAY (f), ((GC *) face->extra)[i]);
+ UNBLOCK_INPUT;
+ free (face->extra);
+ face->extra = NULL;
+ }
+}
+
+static int
+ftxfont_draw (s, from, to, x, y, with_background)
+ struct glyph_string *s;
+ int from, to, x, y, with_background;
+{
+ FRAME_PTR f = s->f;
+ struct face *face = s->face;
+ struct font *font = (struct font *) face->font;
+ XPoint p[0x700];
+ int n[7];
+ unsigned *code;
+ int len = to - from;
+ int i;
+
+ n[0] = n[1] = n[2] = n[3] = n[4] = n[5] = n[6] = 0;
+
+ BLOCK_INPUT;
+
+ if (with_background)
+ ftxfont_draw_backgrond (f, font, s->gc, x, y, s->width);
+ code = alloca (sizeof (unsigned) * len);
+ for (i = 0; i < len; i++)
+ code[i] = ((XCHAR2B_BYTE1 (s->char2b + from + i) << 8)
+ | XCHAR2B_BYTE2 (s->char2b + from + i));
+
+ if (! face->extra)
+ {
+ for (i = 0; i < len; i++)
+ x += ftxfont_draw_bitmap (f, &face->gc, font, code[i], x, y,
+ p, 0x700, n);
+ if (n[0] > 0)
+ XDrawPoints (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f),
+ face->gc, p, n[0], CoordModeOrigin);
+ }
+ else
+ {
+ GC *gc = face->extra;
+
+ for (i = 0; i < len; i++)
+ x += ftxfont_draw_bitmap (f, &face->gc, font, code[i], x, y,
+ p, 0x100, n);
+ for (i = 0; i < 7; i++)
+ if (n[i] > 0)
+ XDrawPoints (FRAME_X_DISPLAY (f), FRAME_X_WINDOW (f),
+ gc[i], p + 0x100 * i, n[i], CoordModeOrigin);
+ }
+
+ UNBLOCK_INPUT;
+
+ return len;
+}
+
+
+
+void
+syms_of_ftxfont ()
+{
+ DEFSYM (Qftx, "ftx");
+
+ ftxfont_driver = ftfont_driver;
+ ftxfont_driver.type = Qftx;
+ ftxfont_driver.list = ftxfont_list;
+ ftxfont_driver.open = ftxfont_open;
+ ftxfont_driver.close = ftxfont_close;
+ ftxfont_driver.prepare_face = ftxfont_prepare_face;
+ ftxfont_driver.done_face = ftxfont_done_face;
+ ftxfont_driver.draw = ftxfont_draw;
+
+ register_font_driver (&ftxfont_driver, NULL);
+}
+
+/* arch-tag: 59bd3469-5330-413f-b29d-1aa36492abe8
+ (do not change this comment) */
diff --git a/src/indent.c b/src/indent.c
index cc928f2171f..ae28fa3791d 100644
--- a/src/indent.c
+++ b/src/indent.c
@@ -22,7 +22,7 @@ Boston, MA 02110-1301, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "category.h"
#include "indent.h"
#include "keyboard.h"
@@ -288,7 +288,7 @@ check_composition (pos, pos_byte, point, len, len_byte, width)
int *len, *len_byte, *width;
{
Lisp_Object prop;
- int start, end;
+ EMACS_INT start, end;
int id;
if (! find_composition (pos, -1, &start, &end, &prop, Qnil)
@@ -324,7 +324,7 @@ check_composition (pos, pos_byte, point, len, len_byte, width)
if (dp != 0 && VECTORP (DISP_CHAR_VECTOR (dp, c))) \
width = XVECTOR (DISP_CHAR_VECTOR (dp, c))->size; \
else \
- width = WIDTH_BY_CHAR_HEAD (*p); \
+ width = CHAR_WIDTH (c); \
if (width > 1) \
wide_column = width; \
} \
diff --git a/src/insdel.c b/src/insdel.c
index b97539c1cc2..df0831c9652 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -24,7 +24,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "window.h"
#include "blockinput.h"
#include "region-cache.h"
@@ -659,22 +659,11 @@ copy_text (from_addr, to_addr, nbytes,
int bytes_left = nbytes;
Lisp_Object tbl = Qnil;
- /* We set the variable tbl to the reverse table of
- Vnonascii_translation_table in advance. */
- if (CHAR_TABLE_P (Vnonascii_translation_table))
- {
- tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0));
- if (!CHAR_TABLE_P (tbl))
- tbl = Qnil;
- }
-
- /* Convert multibyte to single byte. */
while (bytes_left > 0)
{
int thislen, c;
c = STRING_CHAR_AND_LENGTH (from_addr, bytes_left, thislen);
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (!ASCII_CHAR_P (c))
c = multibyte_char_to_unibyte (c, tbl);
*to_addr++ = c;
from_addr += thislen;
@@ -1178,6 +1167,50 @@ insert_from_string_1 (string, pos, pos_byte, nchars, nbytes,
current_buffer, inherit);
adjust_point (nchars, outgoing_nbytes);
+
+ CHECK_MARKERS ();
+}
+
+/* Insert a sequence of NCHARS chars which occupy NBYTES bytes
+ starting at GPT_ADDR. */
+
+void
+insert_from_gap (nchars, nbytes)
+ register int nchars, nbytes;
+{
+ if (NILP (current_buffer->enable_multibyte_characters))
+ nchars = nbytes;
+
+ record_insert (GPT, nchars);
+ MODIFF++;
+
+ GAP_SIZE -= nbytes;
+ GPT += nchars;
+ ZV += nchars;
+ Z += nchars;
+ GPT_BYTE += nbytes;
+ ZV_BYTE += nbytes;
+ Z_BYTE += nbytes;
+ if (GAP_SIZE > 0) *(GPT_ADDR) = 0; /* Put an anchor. */
+
+ if (GPT_BYTE < GPT)
+ abort ();
+
+ adjust_overlays_for_insert (GPT - nchars, nchars);
+ adjust_markers_for_insert (GPT - nchars, GPT_BYTE - nbytes,
+ GPT, GPT_BYTE, 0);
+
+ if (BUF_INTERVALS (current_buffer) != 0)
+ {
+ offset_intervals (current_buffer, GPT - nchars, nchars);
+ graft_intervals_into_buffer (NULL_INTERVAL, GPT - nchars, nchars,
+ current_buffer, 0);
+ }
+
+ if (GPT - nchars < PT)
+ adjust_point (nchars, nbytes);
+
+ CHECK_MARKERS ();
}
/* Insert text from BUF, NCHARS characters starting at CHARPOS, into the
diff --git a/src/intervals.c b/src/intervals.c
index 20c4c191a93..fecd1d181dc 100644
--- a/src/intervals.c
+++ b/src/intervals.c
@@ -2306,7 +2306,7 @@ int
get_property_and_range (pos, prop, val, start, end, object)
int pos;
Lisp_Object prop, *val;
- int *start, *end;
+ EMACS_INT *start, *end;
Lisp_Object object;
{
INTERVAL i, prev, next;
diff --git a/src/intervals.h b/src/intervals.h
index 89b26446190..5aec706afea 100644
--- a/src/intervals.h
+++ b/src/intervals.h
@@ -300,7 +300,7 @@ extern Lisp_Object textget P_ ((Lisp_Object, Lisp_Object));
extern Lisp_Object lookup_char_property P_ ((Lisp_Object, Lisp_Object, int));
extern void move_if_not_intangible P_ ((int));
extern int get_property_and_range P_ ((int, Lisp_Object, Lisp_Object *,
- int *, int *, Lisp_Object));
+ EMACS_INT *, EMACS_INT *, Lisp_Object));
extern Lisp_Object get_local_map P_ ((int, struct buffer *, Lisp_Object));
extern INTERVAL update_interval P_ ((INTERVAL, int));
extern void set_intervals_multibyte P_ ((int));
diff --git a/src/keyboard.c b/src/keyboard.c
index f6e8eadcf8c..e58c78c84ac 100644
--- a/src/keyboard.c
+++ b/src/keyboard.c
@@ -33,7 +33,7 @@ Boston, MA 02110-1301, USA. */
#include "window.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "disptab.h"
#include "dispextern.h"
#include "syntax.h"
@@ -1668,7 +1668,7 @@ command_loop_1 ()
: (lose >= 0x20 && lose < 0x7f))
/* To extract the case of continuation on
wide-column characters. */
- && (WIDTH_BY_CHAR_HEAD (FETCH_BYTE (PT_BYTE)) == 1)
+ && ASCII_BYTE_P (lose)
&& (XFASTINT (XWINDOW (selected_window)->last_modified)
>= MODIFF)
&& (XFASTINT (XWINDOW (selected_window)->last_overlay_modified)
@@ -1725,7 +1725,7 @@ command_loop_1 ()
{
unsigned int c
= translate_char (Vtranslation_table_for_input,
- XFASTINT (last_command_char), 0, 0, 0);
+ XFASTINT (last_command_char));
int value;
if (NILP (Vexecuting_kbd_macro)
&& !EQ (minibuf_window, selected_window))
@@ -1901,7 +1901,7 @@ adjust_point_for_property (last_pt, modified)
int last_pt;
int modified;
{
- int beg, end;
+ EMACS_INT beg, end;
Lisp_Object val, overlay, tmp;
int check_composition = 1, check_display = 1, check_invisible = 1;
int orig_pt = PT;
@@ -2970,7 +2970,7 @@ read_char (commandflag, nmaps, maps, prev_event, used_mouse_menu)
|| (VECTORP (Vkeyboard_translate_table)
&& XVECTOR (Vkeyboard_translate_table)->size > (unsigned) XFASTINT (c))
|| (CHAR_TABLE_P (Vkeyboard_translate_table)
- && CHAR_VALID_P (XINT (c), 0)))
+ && CHARACTERP (c)))
{
Lisp_Object d;
d = Faref (Vkeyboard_translate_table, c);
@@ -9404,9 +9404,8 @@ read_key_sequence (keybuf, bufsize, prompt, dont_downcase_last,
if (first_binding >= nmaps
&& fkey.start >= t && keytran.start >= t
&& INTEGERP (key)
- && ((((XINT (key) & 0x3ffff)
- < XCHAR_TABLE (current_buffer->downcase_table)->size)
- && UPPERCASEP (XINT (key) & 0x3ffff))
+ && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK))
+ && UPPERCASEP (XINT (key) & ~CHAR_MODIFIER_MASK))
|| (XINT (key) & shift_modifier)))
{
Lisp_Object new_key;
@@ -9417,8 +9416,8 @@ read_key_sequence (keybuf, bufsize, prompt, dont_downcase_last,
if (XINT (key) & shift_modifier)
XSETINT (new_key, XINT (key) & ~shift_modifier);
else
- XSETINT (new_key, (DOWNCASE (XINT (key) & 0x3ffff)
- | (XINT (key) & ~0x3ffff)));
+ XSETINT (new_key, (DOWNCASE (XINT (key) & ~CHAR_MODIFIER_MASK)
+ | (XINT (key) & ~CHAR_MODIFIER_MASK)));
/* We have to do this unconditionally, regardless of whether
the lower-case char is defined in the keymaps, because they
diff --git a/src/keymap.c b/src/keymap.c
index 71fd5f03390..af9d817a1eb 100644
--- a/src/keymap.c
+++ b/src/keymap.c
@@ -26,6 +26,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "keyboard.h"
#include "termhooks.h"
@@ -425,11 +426,7 @@ Return PARENT. PARENT should be nil or another keymap. */)
if (CHAR_TABLE_P (XCAR (list)))
{
- Lisp_Object indices[3];
-
- map_char_table (fix_submap_inheritance, Qnil,
- XCAR (list), XCAR (list),
- keymap, 0, indices);
+ map_char_table (fix_submap_inheritance, Qnil, XCAR (list), keymap);
}
}
@@ -569,9 +566,7 @@ access_keymap (map, idx, t_ok, noinherit, autoload)
GCPRO4 (map, tail, idx, t_binding);
- /* If `t_ok' is 2, both `t' and generic-char bindings are accepted.
- If it is 1, only generic-char bindings are accepted.
- Otherwise, neither are. */
+ /* If `t_ok' is 2, both `t' is accepted. */
t_ok = t_ok ? 2 : 0;
for (tail = XCDR (map);
@@ -595,24 +590,6 @@ access_keymap (map, idx, t_ok, noinherit, autoload)
if (EQ (key, idx))
val = XCDR (binding);
- else if (t_ok
- && INTEGERP (idx)
- && (XINT (idx) & CHAR_MODIFIER_MASK) == 0
- && INTEGERP (key)
- && (XINT (key) & CHAR_MODIFIER_MASK) == 0
- && !SINGLE_BYTE_CHAR_P (XINT (idx))
- && !SINGLE_BYTE_CHAR_P (XINT (key))
- && CHAR_VALID_P (XINT (key), 1)
- && !CHAR_VALID_P (XINT (key), 0)
- && (CHAR_CHARSET (XINT (key))
- == CHAR_CHARSET (XINT (idx))))
- {
- /* KEY is the generic character of the charset of IDX.
- Use KEY's binding if there isn't a binding for IDX
- itself. */
- t_binding = XCDR (binding);
- t_ok = 0;
- }
else if (t_ok > 1 && EQ (key, Qt))
{
t_binding = XCDR (binding);
@@ -724,12 +701,10 @@ map_keymap (map, fun, args, data, autoload)
}
else if (CHAR_TABLE_P (binding))
{
- Lisp_Object indices[3];
- map_char_table (map_keymap_char_table_item, Qnil, binding, binding,
+ map_char_table (map_keymap_char_table_item, Qnil, binding,
Fcons (make_save_value (fun, 0),
Fcons (make_save_value (data, 0),
- args)),
- 0, indices);
+ args)));
}
}
UNGCPRO;
@@ -881,10 +856,15 @@ store_in_keymap (keymap, idx, def)
if (!CONSP (keymap) || !EQ (XCAR (keymap), Qkeymap))
error ("attempt to define a key in a non-keymap");
- /* If idx is a list (some sort of mouse click, perhaps?),
- the index we want to use is the car of the list, which
- ought to be a symbol. */
- idx = EVENT_HEAD (idx);
+ /* If idx is a cons, and the car part is a character, idx must be of
+ the form (FROM-CHAR . TO-CHAR). */
+ if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ CHECK_CHARACTER_CDR (idx);
+ else
+ /* If idx is a list (some sort of mouse click, perhaps?),
+ the index we want to use is the car of the list, which
+ ought to be a symbol. */
+ idx = EVENT_HEAD (idx);
/* If idx is a symbol, it might have modifiers, which need to
be put in the canonical order. */
@@ -921,6 +901,19 @@ store_in_keymap (keymap, idx, def)
ASET (elt, XFASTINT (idx), def);
return def;
}
+ else if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ {
+ int from = XFASTINT (XCAR (idx));
+ int to = XFASTINT (XCDR (idx));
+
+ if (to >= ASIZE (elt))
+ to = ASIZE (elt) - 1;
+ for (; from <= to; from++)
+ ASET (elt, from, def);
+ if (to == XFASTINT (XCDR (idx)))
+ /* We have defined all keys in IDX. */
+ return def;
+ }
insertion_point = tail;
}
else if (CHAR_TABLE_P (elt))
@@ -937,6 +930,11 @@ store_in_keymap (keymap, idx, def)
NILP (def) ? Qt : def);
return def;
}
+ else if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ {
+ Fset_char_table_range (elt, idx, NILP (def) ? Qt : def);
+ return def;
+ }
insertion_point = tail;
}
else if (CONSP (elt))
@@ -947,6 +945,19 @@ store_in_keymap (keymap, idx, def)
XSETCDR (elt, def);
return def;
}
+ else if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ {
+ int from = XFASTINT (XCAR (idx));
+ int to = XFASTINT (XCDR (idx));
+
+ if (from <= XFASTINT (XCAR (elt))
+ && to >= XFASTINT (XCAR (elt)))
+ {
+ XSETCDR (elt, def);
+ if (from == to)
+ return def;
+ }
+ }
}
else if (EQ (elt, Qkeymap))
/* If we find a 'keymap' symbol in the spine of KEYMAP,
@@ -961,9 +972,22 @@ store_in_keymap (keymap, idx, def)
keymap_end:
/* We have scanned the entire keymap, and not found a binding for
IDX. Let's add one. */
- CHECK_IMPURE (insertion_point);
- XSETCDR (insertion_point,
- Fcons (Fcons (idx, def), XCDR (insertion_point)));
+ {
+ Lisp_Object elt;
+
+ if (CONSP (idx) && CHARACTERP (XCAR (idx)))
+ {
+ /* IDX specifies a range of characters, and not all of them
+ were handled yet, which means this keymap doesn't have a
+ char-table. So, we insert a char-table now. */
+ elt = Fmake_char_table (Qkeymap, Qnil);
+ Fset_char_table_range (elt, idx, NILP (def) ? Qt : def);
+ }
+ else
+ elt = Fcons (idx, def);
+ CHECK_IMPURE (insertion_point);
+ XSETCDR (insertion_point, Fcons (elt, XCDR (insertion_point)));
+ }
}
return def;
@@ -1049,7 +1073,7 @@ static void
copy_keymap_1 (chartable, idx, elt)
Lisp_Object chartable, idx, elt;
{
- Faset (chartable, idx, copy_keymap_item (elt));
+ Fset_char_table_range (chartable, idx, copy_keymap_item (elt));
}
DEFUN ("copy-keymap", Fcopy_keymap, Scopy_keymap, 1, 1, 0,
@@ -1072,9 +1096,8 @@ is not copied. */)
Lisp_Object elt = XCAR (keymap);
if (CHAR_TABLE_P (elt))
{
- Lisp_Object indices[3];
elt = Fcopy_sequence (elt);
- map_char_table (copy_keymap_1, Qnil, elt, elt, elt, 0, indices);
+ map_char_table (copy_keymap_1, Qnil, elt, elt);
}
else if (VECTORP (elt))
{
@@ -1154,8 +1177,15 @@ binding KEY to DEF is added at the front of KEYMAP. */)
{
c = Faref (key, make_number (idx));
- if (CONSP (c) && lucid_event_type_list_p (c))
- c = Fevent_convert_list (c);
+ if (CONSP (c))
+ {
+ /* C may be a Lucid style event type list or a cons (FROM .
+ TO) specifying a range of characters. */
+ if (lucid_event_type_list_p (c))
+ c = Fevent_convert_list (c);
+ else if (CHARACTERP (XCAR (c)))
+ CHECK_CHARACTER_CDR (c);
+ }
if (SYMBOLP (c))
silly_event_symbol_error (c);
@@ -1176,7 +1206,10 @@ binding KEY to DEF is added at the front of KEYMAP. */)
idx++;
}
- if (!INTEGERP (c) && !SYMBOLP (c) && !CONSP (c))
+ if (!INTEGERP (c) && !SYMBOLP (c)
+ && (!CONSP (c)
+ /* If C is a range, it must be a leaf. */
+ || (INTEGERP (XCAR (c)) && idx != length)))
error ("Key sequence contains invalid event");
if (idx == length)
@@ -1946,12 +1979,9 @@ then the value includes only maps for prefixes that start with PREFIX. */)
if (CHAR_TABLE_P (elt))
{
- Lisp_Object indices[3];
-
- map_char_table (accessible_keymaps_char_table, Qnil, elt,
+ map_char_table (accessible_keymaps_char_table, Qnil,
elt, Fcons (Fcons (maps, make_number (is_metized)),
- Fcons (tail, thisseq)),
- 0, indices);
+ Fcons (tail, thisseq)));
}
else if (VECTORP (elt))
{
@@ -2088,15 +2118,13 @@ push_key_description (c, p, force_multibyte)
int force_multibyte;
{
unsigned c2;
- int valid_p;
/* Clear all the meaningless bits above the meta bit. */
c &= meta_modifier | ~ - meta_modifier;
c2 = c & ~(alt_modifier | ctrl_modifier | hyper_modifier
| meta_modifier | shift_modifier | super_modifier);
- valid_p = SINGLE_BYTE_CHAR_P (c2) || char_valid_p (c2, 0);
- if (! valid_p)
+ if (! CHARACTERP (make_number (c2)))
{
/* KEY_DESCRIPTION_SIZE is large enough for this. */
p += sprintf (p, "[%d]", c);
@@ -2190,25 +2218,12 @@ push_key_description (c, p, force_multibyte)
}
else
{
- if (force_multibyte)
- {
- if (SINGLE_BYTE_CHAR_P (c))
- c = unibyte_char_to_multibyte (c);
- p += CHAR_STRING (c, p);
- }
- else if (NILP (current_buffer->enable_multibyte_characters))
- {
- int bit_offset;
- *p++ = '\\';
- /* The biggest character code uses 19 bits. */
- for (bit_offset = 18; bit_offset >= 0; bit_offset -= 3)
- {
- if (c >= (1 << bit_offset))
- *p++ = ((c & (7 << bit_offset)) >> bit_offset) + '0';
- }
- }
+ /* Now we are sure that C is a valid character code. */
+ if (NILP (current_buffer->enable_multibyte_characters)
+ && ! force_multibyte)
+ *p++ = multibyte_char_to_unibyte (c, Qnil);
else
- p += CHAR_STRING (c, p);
+ p += CHAR_STRING (c, (unsigned char *) p);
}
return p;
@@ -2232,43 +2247,10 @@ around function keys and event symbols. */)
if (INTEGERP (key)) /* Normal character */
{
- unsigned int charset, c1, c2;
- int without_bits = XINT (key) & ~((-1) << CHARACTERBITS);
-
- if (SINGLE_BYTE_CHAR_P (without_bits))
- charset = 0;
- else
- SPLIT_CHAR (without_bits, charset, c1, c2);
+ char tem[KEY_DESCRIPTION_SIZE];
- if (charset
- && CHARSET_DEFINED_P (charset)
- && ((c1 >= 0 && c1 < 32)
- || (c2 >= 0 && c2 < 32)))
- {
- /* Handle a generic character. */
- Lisp_Object name;
- name = CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX);
- CHECK_STRING (name);
- return concat2 (build_string ("Character set "), name);
- }
- else
- {
- char tem[KEY_DESCRIPTION_SIZE], *end;
- int nbytes, nchars;
- Lisp_Object string;
-
- end = push_key_description (XUINT (key), tem, 1);
- nbytes = end - tem;
- nchars = multibyte_chars_in_text (tem, nbytes);
- if (nchars == nbytes)
- {
- *end = '\0';
- string = build_string (tem);
- }
- else
- string = make_multibyte_string (tem, nchars, nbytes);
- return string;
- }
+ *push_key_description (XUINT (key), tem, 1) = 0;
+ return build_string (tem);
}
else if (SYMBOLP (key)) /* Function key or event-symbol */
{
@@ -2334,7 +2316,7 @@ See Info node `(elisp)Describing Characters' for examples. */)
CHECK_NUMBER (character);
c = XINT (character);
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (!ASCII_CHAR_P (c))
{
int len = CHAR_STRING (c, str);
@@ -2505,7 +2487,6 @@ where_is_internal (definition, keymaps, firstonly, noindirect, no_remap)
}
else if (CHAR_TABLE_P (elt))
{
- Lisp_Object indices[3];
Lisp_Object args;
args = Fcons (Fcons (Fcons (definition, noindirect),
@@ -2513,8 +2494,7 @@ where_is_internal (definition, keymaps, firstonly, noindirect, no_remap)
Fcons (Fcons (this, last),
Fcons (make_number (nomenus),
make_number (last_is_meta))));
- map_char_table (where_is_internal_2, Qnil, elt, elt, args,
- 0, indices);
+ map_char_table (where_is_internal_2, Qnil, elt, args);
sequences = XCDR (XCAR (args));
}
else if (CONSP (elt))
@@ -2729,12 +2709,15 @@ remapped command in the returned list. */)
/* This is the function that Fwhere_is_internal calls using map_char_table.
ARGS has the form
- (((DEFINITION . NOINDIRECT) . (KEYMAP . RESULT))
+ (((DEFINITION . NOINDIRECT) . RESULT)
.
((THIS . LAST) . (NOMENUS . LAST_IS_META)))
Since map_char_table doesn't really use the return value from this function,
we the result append to RESULT, the slot in ARGS.
+ KEY may be a cons (FROM . TO) where both FROM and TO are integers
+ (i.e. character events).
+
This function can GC because it calls where_is_internal_1 which can
GC. */
@@ -2748,7 +2731,6 @@ where_is_internal_2 (args, key, binding)
struct gcpro gcpro1, gcpro2, gcpro3;
GCPRO3 (args, key, binding);
- result = XCDR (XCAR (args));
definition = XCAR (XCAR (XCAR (args)));
noindirect = XCDR (XCAR (XCAR (args)));
this = XCAR (XCAR (XCDR (args)));
@@ -2756,11 +2738,39 @@ where_is_internal_2 (args, key, binding)
nomenus = XFASTINT (XCAR (XCDR (XCDR (args))));
last_is_meta = XFASTINT (XCDR (XCDR (XCDR (args))));
- sequence = where_is_internal_1 (binding, key, definition, noindirect,
- this, last, nomenus, last_is_meta);
+ result = Qnil;
+ if (CONSP (key) && INTEGERP (XCAR (key)) && INTEGERP (XCDR (key)))
+ {
+ /* Try all ASCII characters. Try also non-ASCII characters but
+ only the first and last one because trying all of them is
+ extremely memory and time consuming.
- if (!NILP (sequence))
- XSETCDR (XCAR (args), Fcons (sequence, result));
+ Fixme: Perhaps it should be allowed to store a cons directly
+ in RESULT. -- handa@m17n.org */
+ int from = XINT (XCAR (key)), to = XINT (XCDR (key));
+ Lisp_Object k;
+
+ for (; from <= to; to--)
+ {
+ k = make_number (to);
+ sequence = where_is_internal_1 (binding, k, definition, noindirect,
+ this, last, nomenus, last_is_meta);
+ if (!NILP (sequence))
+ result = Fcons (sequence, result);
+ if (to > 129)
+ to = 129;
+ }
+ }
+ else
+ {
+ sequence = where_is_internal_1 (binding, key, definition, noindirect,
+ this, last, nomenus, last_is_meta);
+ if (!NILP (sequence))
+ result = Fcons (sequence, Qnil);
+ }
+
+ if (! NILP (result))
+ nconc2 (XCAR (args), result);
UNGCPRO;
}
@@ -3442,9 +3452,10 @@ DESCRIBER is the output function used; nil means use `princ'. */)
If the definition in effect in the whole map does not match
the one in this vector, we ignore this one.
- When describing a sub-char-table, INDICES is a list of
- indices at higher levels in this char-table,
- and CHAR_TABLE_DEPTH says how many levels down we have gone.
+ ARGS is simply passed as the second argument to ELT_DESCRIBER.
+
+ INDICES and CHAR_TABLE_DEPTH are ignored. They will be removed in
+ the near future.
KEYMAP_P is 1 if vector is known to be a keymap, so map ESC to M-.
@@ -3469,24 +3480,18 @@ describe_vector (vector, prefix, args, elt_describer,
Lisp_Object definition;
Lisp_Object tem2;
Lisp_Object elt_prefix = Qnil;
- register int i;
+ int i;
Lisp_Object suppress;
Lisp_Object kludge;
int first = 1;
struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
/* Range of elements to be handled. */
int from, to;
- /* A flag to tell if a leaf in this level of char-table is not a
- generic character (i.e. a complete multibyte character). */
- int complete_char;
- int character;
+ Lisp_Object character;
int starting_i;
suppress = Qnil;
- if (indices == 0)
- indices = (int *) alloca (3 * sizeof (int));
-
definition = Qnil;
if (!keymap_p)
@@ -3510,61 +3515,24 @@ describe_vector (vector, prefix, args, elt_describer,
if (partial)
suppress = intern ("suppress-keymap");
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0)
- {
- /* VECTOR is a top level char-table. */
- complete_char = 1;
- from = 0;
- to = CHAR_TABLE_ORDINARY_SLOTS;
- }
- else
- {
- /* VECTOR is a sub char-table. */
- if (char_table_depth >= 3)
- /* A char-table is never that deep. */
- error ("Too deep char table");
-
- complete_char
- = (CHARSET_VALID_P (indices[0])
- && ((CHARSET_DIMENSION (indices[0]) == 1
- && char_table_depth == 1)
- || char_table_depth == 2));
-
- /* Meaningful elements are from 32th to 127th. */
- from = 32;
- to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
- }
- }
- else
- {
- /* This does the right thing for ordinary vectors. */
-
- complete_char = 1;
- from = 0;
- to = XVECTOR (vector)->size;
- }
+ from = 0;
+ to = CHAR_TABLE_P (vector) ? MAX_CHAR + 1 : XVECTOR (vector)->size;
for (i = from; i < to; i++)
{
int this_shadowed = 0;
- QUIT;
+ int range_beg, range_end;
+ Lisp_Object val;
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0 && i >= CHAR_TABLE_SINGLE_BYTE_SLOTS)
- complete_char = 0;
+ QUIT;
- if (i >= CHAR_TABLE_SINGLE_BYTE_SLOTS
- && !CHARSET_DEFINED_P (i - 128))
- continue;
+ starting_i = i;
- definition
- = get_keyelt (XCHAR_TABLE (vector)->contents[i], 0);
- }
+ if (CHAR_TABLE_P (vector))
+ val = char_table_ref_and_range (vector, i, &range_beg, &i);
else
- definition = get_keyelt (AREF (vector, i), 0);
+ val = AREF (vector, i);
+ definition = get_keyelt (val, 0);
if (NILP (definition)) continue;
@@ -3578,31 +3546,11 @@ describe_vector (vector, prefix, args, elt_describer,
if (!NILP (tem)) continue;
}
- /* Set CHARACTER to the character this entry describes, if any.
- Also update *INDICES. */
- if (CHAR_TABLE_P (vector))
- {
- indices[char_table_depth] = i;
-
- if (char_table_depth == 0)
- {
- character = i;
- indices[0] = i - 128;
- }
- else if (complete_char)
- {
- character = MAKE_CHAR (indices[0], indices[1], indices[2]);
- }
- else
- character = 0;
- }
- else
- character = i;
-
- ASET (kludge, 0, make_number (character));
+ character = make_number (starting_i);
+ ASET (kludge, 0, character);
/* If this binding is shadowed by some other map, ignore it. */
- if (!NILP (shadow) && complete_char)
+ if (!NILP (shadow))
{
Lisp_Object tem;
@@ -3619,7 +3567,7 @@ describe_vector (vector, prefix, args, elt_describer,
/* Ignore this definition if it is shadowed by an earlier
one in the same keymap. */
- if (!NILP (entire_map) && complete_char)
+ if (!NILP (entire_map))
{
Lisp_Object tem;
@@ -3631,89 +3579,28 @@ describe_vector (vector, prefix, args, elt_describer,
if (first)
{
- if (char_table_depth == 0)
- insert ("\n", 1);
+ insert ("\n", 1);
first = 0;
}
- /* For a sub char-table, show the depth by indentation.
- CHAR_TABLE_DEPTH can be greater than 0 only for a char-table. */
- if (char_table_depth > 0)
- insert (" ", char_table_depth * 2); /* depth is 1 or 2. */
-
/* Output the prefix that applies to every entry in this map. */
if (!NILP (elt_prefix))
insert1 (elt_prefix);
- /* Insert or describe the character this slot is for,
- or a description of what it is for. */
- if (SUB_CHAR_TABLE_P (vector))
- {
- if (complete_char)
- insert_char (character);
- else
- {
- /* We need an octal representation for this block of
- characters. */
- char work[16];
- sprintf (work, "(row %d)", i);
- insert (work, strlen (work));
- }
- }
- else if (CHAR_TABLE_P (vector))
- {
- if (complete_char)
- insert1 (Fkey_description (kludge, prefix));
- else
- {
- /* Print the information for this character set. */
- insert_string ("<");
- tem2 = CHARSET_TABLE_INFO (i - 128, CHARSET_SHORT_NAME_IDX);
- if (STRINGP (tem2))
- insert_from_string (tem2, 0, 0, SCHARS (tem2),
- SBYTES (tem2), 0);
- else
- insert ("?", 1);
- insert (">", 1);
- }
- }
- else
- {
- insert1 (Fkey_description (kludge, prefix));
- }
-
- /* If we find a sub char-table within a char-table,
- scan it recursively; it defines the details for
- a character set or a portion of a character set. */
- if (CHAR_TABLE_P (vector) && SUB_CHAR_TABLE_P (definition))
- {
- insert ("\n", 1);
- describe_vector (definition, prefix, args, elt_describer,
- partial, shadow, entire_map,
- indices, char_table_depth + 1, keymap_p,
- mention_shadow);
- continue;
- }
-
- starting_i = i;
+ insert1 (Fkey_description (kludge, prefix));
/* Find all consecutive characters or rows that have the same
definition. But, for elements of a top level char table, if
they are for charsets, we had better describe one by one even
if they have the same definition. */
if (CHAR_TABLE_P (vector))
- {
- int limit = to;
-
- if (char_table_depth == 0)
- limit = CHAR_TABLE_SINGLE_BYTE_SLOTS;
-
- while (i + 1 < limit
- && (tem2 = get_keyelt (XCHAR_TABLE (vector)->contents[i + 1], 0),
- !NILP (tem2))
- && !NILP (Fequal (tem2, definition)))
- i++;
- }
+ while (i + 1 < to
+ && (val = char_table_ref_and_range (vector, i + 1,
+ &range_beg, &range_end),
+ tem2 = get_keyelt (val, 0),
+ !NILP (tem2))
+ && !NILP (Fequal (tem2, definition)))
+ i = range_end;
else
while (i + 1 < to
&& (tem2 = get_keyelt (AREF (vector, i + 1), 0),
@@ -3721,7 +3608,6 @@ describe_vector (vector, prefix, args, elt_describer,
&& !NILP (Fequal (tem2, definition)))
i++;
-
/* If we have a range of more than one character,
print where the range reaches to. */
@@ -3734,31 +3620,7 @@ describe_vector (vector, prefix, args, elt_describer,
if (!NILP (elt_prefix))
insert1 (elt_prefix);
- if (CHAR_TABLE_P (vector))
- {
- if (char_table_depth == 0)
- {
- insert1 (Fkey_description (kludge, prefix));
- }
- else if (complete_char)
- {
- indices[char_table_depth] = i;
- character = MAKE_CHAR (indices[0], indices[1], indices[2]);
- insert_char (character);
- }
- else
- {
- /* We need an octal representation for this block of
- characters. */
- char work[16];
- sprintf (work, "(row %d)", i);
- insert (work, strlen (work));
- }
- }
- else
- {
- insert1 (Fkey_description (kludge, prefix));
- }
+ insert1 (Fkey_description (kludge, prefix));
}
/* Print a description of the definition of this character.
@@ -3774,11 +3636,11 @@ describe_vector (vector, prefix, args, elt_describer,
}
}
- /* For (sub) char-table, print `defalt' slot at last. */
- if (CHAR_TABLE_P (vector) && !NILP (XCHAR_TABLE (vector)->defalt))
+ if (CHAR_TABLE_P (vector) && ! NILP (XCHAR_TABLE (vector)->defalt))
{
- insert (" ", char_table_depth * 2);
- insert_string ("<<default>>");
+ if (!NILP (elt_prefix))
+ insert1 (elt_prefix);
+ insert ("default", 7);
(*elt_describer) (XCHAR_TABLE (vector)->defalt, args);
}
diff --git a/src/lisp.h b/src/lisp.h
index d1ce953cd68..8224117241c 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -288,7 +288,8 @@ enum pvec_type
PVEC_BOOL_VECTOR = 0x10000,
PVEC_BUFFER = 0x20000,
PVEC_HASH_TABLE = 0x40000,
- PVEC_TYPE_MASK = 0x7fe00
+ PVEC_SUB_CHAR_TABLE = 0x80000,
+ PVEC_TYPE_MASK = 0x0ffe00
#if 0 /* This is used to make the value of PSEUDOVECTOR_FLAG available to
GDB. It doesn't work on OS Alpha. Moved to a variable in
@@ -541,6 +542,7 @@ extern size_t pure_size;
#define XSUBR(a) (eassert (GC_SUBRP(a)),(struct Lisp_Subr *) XPNTR(a))
#define XBUFFER(a) (eassert (GC_BUFFERP(a)),(struct buffer *) XPNTR(a))
#define XCHAR_TABLE(a) ((struct Lisp_Char_Table *) XPNTR(a))
+#define XSUB_CHAR_TABLE(a) ((struct Lisp_Sub_Char_Table *) XPNTR(a))
#define XBOOL_VECTOR(a) ((struct Lisp_Bool_Vector *) XPNTR(a))
/* Construct a Lisp_Object from a value or address. */
@@ -570,6 +572,7 @@ extern size_t pure_size;
#define XSETBUFFER(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BUFFER))
#define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
#define XSETBOOL_VECTOR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BOOL_VECTOR))
+#define XSETSUB_CHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_SUB_CHAR_TABLE))
/* Convenience macros for dealing with Lisp arrays. */
@@ -747,49 +750,20 @@ struct Lisp_Vector
((OFFSETOF(type, nonlispfield) - OFFSETOF(struct Lisp_Vector, contents[0])) \
/ sizeof (Lisp_Object))
-/* A char table is a kind of vectorlike, with contents are like a
+/* A char-table is a kind of vectorlike, with contents are like a
vector but with a few other slots. For some purposes, it makes
- sense to handle a chartable with type struct Lisp_Vector. An
+ sense to handle a char-table with type struct Lisp_Vector. An
element of a char table can be any Lisp objects, but if it is a sub
char-table, we treat it a table that contains information of a
- group of characters of the same charsets or a specific character of
- a charset. A sub char-table has the same structure as a char table
- except for that the former omits several slots at the tail. A sub
- char table appears only in an element of a char table, and there's
- no way to access it directly from Emacs Lisp program. */
-
-/* This is the number of slots that apply to characters or character
- sets. The first 128 are for ASCII, the next 128 are for 8-bit
- European characters, and the last 128 are for multibyte characters.
- The first 256 are indexed by the code itself, but the last 128 are
- indexed by (charset-id + 128). */
-#define CHAR_TABLE_ORDINARY_SLOTS 384
-
-/* These are the slot of the default values for single byte
- characters. As 0x9A is never be a charset-id, it is safe to use
- that slot for ASCII. 0x9E and 0x80 are charset-ids of
- eight-bit-control and eight-bit-graphic respectively. */
-#define CHAR_TABLE_DEFAULT_SLOT_ASCII (0x9A + 128)
-#define CHAR_TABLE_DEFAULT_SLOT_8_BIT_CONTROL (0x9E + 128)
-#define CHAR_TABLE_DEFAULT_SLOT_8_BIT_GRAPHIC (0x80 + 128)
-
-/* This is the number of slots that apply to characters of ASCII and
- 8-bit Europeans only. */
-#define CHAR_TABLE_SINGLE_BYTE_SLOTS 256
+ specific range of characters. A sub char-table has the same
+ structure as a vector. A sub char table appears only in an element
+ of a char-table, and there's no way to access it directly from
+ Emacs Lisp program. */
/* This is the number of slots that every char table must have. This
counts the ordinary slots and the top, defalt, parent, and purpose
slots. */
-#define CHAR_TABLE_STANDARD_SLOTS (CHAR_TABLE_ORDINARY_SLOTS + 4)
-
-/* This is the number of slots that apply to position-code-1 and
- position-code-2 of a multibyte character at the 2nd and 3rd level
- sub char tables respectively. */
-#define SUB_CHAR_TABLE_ORDINARY_SLOTS 128
-
-/* This is the number of slots that every sub char table must have.
- This counts the ordinary slots and the top and defalt slot. */
-#define SUB_CHAR_TABLE_STANDARD_SLOTS (SUB_CHAR_TABLE_ORDINARY_SLOTS + 2)
+#define CHAR_TABLE_STANDARD_SLOTS (VECSIZE (struct Lisp_Char_Table) - 1)
/* Return the number of "extra" slots in the char table CT. */
@@ -797,70 +771,92 @@ struct Lisp_Vector
(((CT)->size & PSEUDOVECTOR_SIZE_MASK) - CHAR_TABLE_STANDARD_SLOTS)
/* Almost equivalent to Faref (CT, IDX) with optimization for ASCII
- and 8-bit Europeans characters. For these characters, do not check
- validity of CT. Do not follow parent. */
-#define CHAR_TABLE_REF(CT, IDX) \
- ((IDX) >= 0 && (IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? (!NILP (XCHAR_TABLE (CT)->contents[IDX]) \
- ? XCHAR_TABLE (CT)->contents[IDX] \
- : XCHAR_TABLE (CT)->defalt) \
- : Faref (CT, make_number (IDX)))
+ characters. Do not check validity of CT. */
+#define CHAR_TABLE_REF(CT, IDX) \
+ ((ASCII_CHAR_P (IDX) \
+ && SUB_CHAR_TABLE_P (XCHAR_TABLE (CT)->ascii) \
+ && !NILP (XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX])) \
+ ? XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX] \
+ : char_table_ref ((CT), (IDX)))
-/* Almost equivalent to Faref (CT, IDX) with optimization for ASCII
- and 8-bit Europeans characters. However, if the result is nil,
- return IDX.
+/* Almost equivalent to Faref (CT, IDX). However, if the result is
+ not a character, return IDX.
For these characters, do not check validity of CT
and do not follow parent. */
-#define CHAR_TABLE_TRANSLATE(CT, IDX) \
- ((IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
- ? (!NILP (XCHAR_TABLE (CT)->contents[IDX]) \
- ? XINT (XCHAR_TABLE (CT)->contents[IDX]) \
- : IDX) \
- : char_table_translate (CT, IDX))
+#define CHAR_TABLE_TRANSLATE(CT, IDX) \
+ char_table_translate (CT, IDX)
/* Equivalent to Faset (CT, IDX, VAL) with optimization for ASCII and
- 8-bit Europeans characters. Do not check validity of CT. */
-#define CHAR_TABLE_SET(CT, IDX, VAL) \
- do { \
- if (XFASTINT (IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS) \
- XCHAR_TABLE (CT)->contents[XFASTINT (IDX)] = VAL; \
- else \
- Faset (CT, IDX, VAL); \
- } while (0)
+ 8-bit European characters. Do not check validity of CT. */
+#define CHAR_TABLE_SET(CT, IDX, VAL) \
+ (((IDX) >= 0 && ASCII_CHAR_P (IDX) \
+ && SUB_CHAR_TABLE_P (XCHAR_TABLE (CT)->ascii)) \
+ ? XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX] = VAL \
+ : char_table_set (CT, IDX, VAL))
+
+#define CHARTAB_SIZE_BITS_0 6
+#define CHARTAB_SIZE_BITS_1 4
+#define CHARTAB_SIZE_BITS_2 5
+#define CHARTAB_SIZE_BITS_3 7
+
+extern const int chartab_size[4];
+
+struct Lisp_Sub_Char_Table;
struct Lisp_Char_Table
{
/* This is the vector's size field, which also holds the
- pseudovector type information. It holds the size, too.
- The size counts the top, defalt, purpose, and parent slots.
- The last three are not counted if this is a sub char table. */
+ pseudovector type information. It holds the size, too. The size
+ counts the defalt, parent, purpose, ascii, contents, and extras
+ slots. */
EMACS_INT size;
struct Lisp_Vector *next;
- /* This holds a flag to tell if this is a top level char table (t)
- or a sub char table (nil). */
- Lisp_Object top;
+
/* This holds a default value,
which is used whenever the value for a specific character is nil. */
Lisp_Object defalt;
- /* This holds an actual value of each element. A sub char table
- has only SUB_CHAR_TABLE_ORDINARY_SLOTS number of elements. */
- Lisp_Object contents[CHAR_TABLE_ORDINARY_SLOTS];
-
- /* A sub char table doesn't has the following slots. */
- /* This points to another char table, which we inherit from
- when the value for a specific character is nil.
- The `defalt' slot takes precedence over this. */
+ /* This points to another char table, which we inherit from when the
+ value for a specific character is nil. The `defalt' slot takes
+ precedence over this. */
Lisp_Object parent;
- /* This should be a symbol which says what kind of use
- this char-table is meant for.
- Typically now the values can be `syntax-table' and `display-table'. */
+
+ /* This is a symbol which says what kind of use this char-table is
+ meant for. */
Lisp_Object purpose;
- /* These hold additional data. */
+
+ /* The bottom sub char-table for characters of the range 0..127. It
+ is nil if none of ASCII character has a specific value. */
+ Lisp_Object ascii;
+
+ Lisp_Object contents[(1 << CHARTAB_SIZE_BITS_0)];
+
+ /* These hold additional data. It is a vector. */
Lisp_Object extras[1];
};
+struct Lisp_Sub_Char_Table
+ {
+ /* This is the vector's size field, which also holds the
+ pseudovector type information. It holds the size, too. */
+ EMACS_INT size;
+ struct Lisp_Vector *next;
+
+ /* Depth of this sub char-table. It should be 1, 2, or 3. A sub
+ char-table of depth 1 contains 16 elments, and each element
+ covers 4096 (128*32) characters. A sub char-table of depth 2
+ contains 32 elements, and each element covers 128 characters. A
+ sub char-table of depth 3 contains 128 elements, and each element
+ is for one character. */
+ Lisp_Object depth;
+
+ /* Minimum character covered by the sub char-table. */
+ Lisp_Object min_char;
+
+ Lisp_Object contents[1];
+ };
+
/* A boolvector is a kind of vectorlike, with contents are like a string. */
struct Lisp_Bool_Vector
{
@@ -1363,9 +1359,9 @@ typedef unsigned char UCHAR;
(CHAR_ALT | CHAR_SUPER | CHAR_HYPER | CHAR_SHIFT | CHAR_CTL | CHAR_META)
-/* Actually, the current Emacs uses 19 bits for the character value
+/* Actually, the current Emacs uses 22 bits for the character value
itself. */
-#define CHARACTERBITS 19
+#define CHARACTERBITS 22
/* The maximum byte size consumed by push_key_description.
All callers should assure that at least this size of memory is
@@ -1421,9 +1417,9 @@ typedef unsigned char UCHAR;
#define GLYPH int
/* Mask bits for face. */
-#define GLYPH_MASK_FACE 0x7FF80000
+#define GLYPH_MASK_FACE 0x7FC00000
/* Mask bits for character code. */
-#define GLYPH_MASK_CHAR 0x0007FFFF /* The lowest 19 bits */
+#define GLYPH_MASK_CHAR 0x003FFFFF /* The lowest 22 bits */
/* The FAST macros assume that we already know we're in an X window. */
@@ -1521,14 +1517,14 @@ typedef unsigned char UCHAR;
#define BUFFERP(x) PSEUDOVECTORP (x, PVEC_BUFFER)
#define GC_BUFFERP(x) GC_PSEUDOVECTORP (x, PVEC_BUFFER)
#define CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
+#define SUB_CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE)
+#define GC_SUB_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE)
#define GC_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_CHAR_TABLE)
#define BOOL_VECTOR_P(x) PSEUDOVECTORP (x, PVEC_BOOL_VECTOR)
#define GC_BOOL_VECTOR_P(x) GC_PSEUDOVECTORP (x, PVEC_BOOL_VECTOR)
#define FRAMEP(x) PSEUDOVECTORP (x, PVEC_FRAME)
#define GC_FRAMEP(x) GC_PSEUDOVECTORP (x, PVEC_FRAME)
-#define SUB_CHAR_TABLE_P(x) (CHAR_TABLE_P (x) && NILP (XCHAR_TABLE (x)->top))
-
/* Test for image (image . spec) */
#define IMAGEP(x) (CONSP (x) && EQ (XCAR (x), Qimage))
@@ -1644,6 +1640,20 @@ typedef unsigned char UCHAR;
XSETCDR ((x), tmp); \
} while (0)
+#define CHECK_NATNUM_CAR(x) \
+ do { \
+ Lisp_Object tmp = XCAR (x); \
+ CHECK_NATNUM (tmp); \
+ XSETCAR ((x), tmp); \
+ } while (0)
+
+#define CHECK_NATNUM_CDR(x) \
+ do { \
+ Lisp_Object tmp = XCDR (x); \
+ CHECK_NATNUM (tmp); \
+ XSETCDR ((x), tmp); \
+ } while (0)
+
/* Cast pointers to this type to compare them. Some machines want int. */
#ifndef PNTR_COMPARISON_TYPE
#define PNTR_COMPARISON_TYPE EMACS_UINT
@@ -2285,34 +2295,43 @@ extern void keys_of_cmds P_ ((void));
/* Defined in coding.c */
EXFUN (Fcoding_system_p, 1);
+EXFUN (Fcoding_system_base, 1);
+EXFUN (Fcoding_system_eol_type, 1);
+EXFUN (Fcheck_coding_system, 1);
EXFUN (Fcheck_coding_system, 1);
EXFUN (Fread_coding_system, 2);
EXFUN (Fread_non_nil_coding_system, 1);
EXFUN (Ffind_operation_coding_system, MANY);
EXFUN (Fupdate_coding_systems_internal, 0);
-EXFUN (Fencode_coding_string, 3);
-EXFUN (Fdecode_coding_string, 3);
-extern Lisp_Object detect_coding_system P_ ((const unsigned char *, int, int,
- int));
+EXFUN (Fencode_coding_string, 4);
+EXFUN (Fdecode_coding_string, 4);
+extern Lisp_Object detect_coding_system P_ ((const unsigned char *, int,
+ int, int, int, Lisp_Object));
extern void init_coding P_ ((void));
extern void init_coding_once P_ ((void));
extern void syms_of_coding P_ ((void));
-extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
- int));
+
+/* Defined in character.c */
+extern void init_character_once P_ ((void));
+extern void syms_of_character P_ ((void));
+EXFUN (Funibyte_char_to_multibyte, 1);
/* Defined in charset.c */
-extern EMACS_INT nonascii_insert_offset;
-extern Lisp_Object Vnonascii_translation_table;
EXFUN (Fchar_bytes, 1);
EXFUN (Fchar_width, 1);
EXFUN (Fstring, MANY);
extern int chars_in_text P_ ((const unsigned char *, int));
extern int multibyte_chars_in_text P_ ((const unsigned char *, int));
-extern int unibyte_char_to_multibyte P_ ((int));
extern int multibyte_char_to_unibyte P_ ((int, Lisp_Object));
extern Lisp_Object Qcharset;
+extern void init_charset P_ ((void));
extern void init_charset_once P_ ((void));
extern void syms_of_charset P_ ((void));
+/* Structure forward declarations. */
+struct charset;
+
+/* Defined in composite.c */
+extern void syms_of_composite P_ ((void));
/* Defined in syntax.c */
EXFUN (Fforward_word, 1);
@@ -2330,9 +2349,8 @@ extern int next_almost_prime P_ ((int));
extern Lisp_Object larger_vector P_ ((Lisp_Object, int, Lisp_Object));
extern void sweep_weak_hash_tables P_ ((void));
extern Lisp_Object Qstring_lessp;
-EXFUN (Foptimize_char_table, 1);
extern Lisp_Object Vfeatures;
-extern Lisp_Object QCtest, QCweakness, Qequal;
+extern Lisp_Object QCtest, QCweakness, Qequal, Qeq;
unsigned sxhash P_ ((Lisp_Object, int));
Lisp_Object make_hash_table P_ ((Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object, Lisp_Object, Lisp_Object,
@@ -2347,6 +2365,7 @@ void remove_hash_entry P_ ((struct Lisp_Hash_Table *, int));
extern void init_fns P_ ((void));
EXFUN (Fsxhash, 1);
EXFUN (Fmake_hash_table, MANY);
+EXFUN (Fmakehash, 1);
EXFUN (Fcopy_hash_table, 1);
EXFUN (Fhash_table_count, 1);
EXFUN (Fhash_table_rehash_size, 1);
@@ -2405,6 +2424,7 @@ extern Lisp_Object concat2 P_ ((Lisp_Object, Lisp_Object));
extern Lisp_Object concat3 P_ ((Lisp_Object, Lisp_Object, Lisp_Object));
extern Lisp_Object nconc2 P_ ((Lisp_Object, Lisp_Object));
extern Lisp_Object assq_no_quit P_ ((Lisp_Object, Lisp_Object));
+extern Lisp_Object assoc_no_quit P_ ((Lisp_Object, Lisp_Object));
extern void clear_string_char_byte_cache P_ ((void));
extern int string_char_to_byte P_ ((Lisp_Object, int));
extern int string_byte_to_char P_ ((Lisp_Object, int));
@@ -2415,18 +2435,10 @@ EXFUN (Fcopy_alist, 1);
EXFUN (Fplist_get, 2);
EXFUN (Fplist_put, 3);
EXFUN (Fplist_member, 2);
-EXFUN (Fset_char_table_parent, 2);
-EXFUN (Fchar_table_extra_slot, 2);
-EXFUN (Fset_char_table_extra_slot, 3);
EXFUN (Frassoc, 2);
EXFUN (Fstring_equal, 2);
EXFUN (Fcompare_strings, 7);
EXFUN (Fstring_lessp, 2);
-extern int char_table_translate P_ ((Lisp_Object, int));
-extern void map_char_table P_ ((void (*) (Lisp_Object, Lisp_Object, Lisp_Object),
- Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, int,
- Lisp_Object *));
-extern Lisp_Object char_table_ref_and_index P_ ((Lisp_Object, int, int *));
extern void syms_of_fns P_ ((void));
/* Defined in floatfns.c */
@@ -2459,6 +2471,7 @@ extern void insert P_ ((const unsigned char *, int));
extern void insert_and_inherit P_ ((const unsigned char *, int));
extern void insert_1 P_ ((const unsigned char *, int, int, int, int));
extern void insert_1_both P_ ((const unsigned char *, int, int, int, int, int));
+extern void insert_from_gap P_ ((int, int));
extern void insert_from_string P_ ((Lisp_Object, int, int, int, int, int));
extern void insert_from_buffer P_ ((struct buffer *, int, int, int));
extern void insert_char P_ ((int));
@@ -2585,8 +2598,6 @@ extern Lisp_Object make_pure_vector P_ ((EMACS_INT));
EXFUN (Fgarbage_collect, 0);
EXFUN (Fmake_byte_code, MANY);
EXFUN (Fmake_bool_vector, 2);
-EXFUN (Fmake_char_table, 2);
-extern Lisp_Object make_sub_char_table P_ ((Lisp_Object));
extern Lisp_Object Qchar_table_extra_slots;
extern struct Lisp_Vector *allocate_vector P_ ((EMACS_INT));
extern struct Lisp_Vector *allocate_other_vector P_ ((EMACS_INT));
@@ -2609,6 +2620,31 @@ extern void syms_of_alloc P_ ((void));
extern struct buffer * allocate_buffer P_ ((void));
extern int valid_lisp_object_p P_ ((Lisp_Object));
+/* Defined in chartab.c */
+EXFUN (Fmake_char_table, 2);
+EXFUN (Fchar_table_parent, 1);
+EXFUN (Fset_char_table_parent, 2);
+EXFUN (Fchar_table_extra_slot, 2);
+EXFUN (Fset_char_table_extra_slot, 3);
+EXFUN (Fchar_table_range, 2);
+EXFUN (Fset_char_table_range, 3);
+EXFUN (Fset_char_table_default, 3);
+EXFUN (Foptimize_char_table, 1);
+EXFUN (Fmap_char_table, 2);
+extern Lisp_Object copy_char_table P_ ((Lisp_Object));
+extern Lisp_Object sub_char_table_ref P_ ((Lisp_Object, int));
+extern Lisp_Object char_table_ref P_ ((Lisp_Object, int));
+extern Lisp_Object char_table_ref_and_range P_ ((Lisp_Object, int,
+ int *, int *));
+extern Lisp_Object char_table_set P_ ((Lisp_Object, int, Lisp_Object));
+extern Lisp_Object char_table_set_range P_ ((Lisp_Object, int, int,
+ Lisp_Object));
+extern int char_table_translate P_ ((Lisp_Object, int));
+extern void map_char_table P_ ((void (*) (Lisp_Object, Lisp_Object,
+ Lisp_Object),
+ Lisp_Object, Lisp_Object, Lisp_Object));
+extern void syms_of_chartab P_ ((void));
+
/* Defined in print.c */
extern Lisp_Object Vprin1_to_string_buffer;
extern void debug_print P_ ((Lisp_Object));
@@ -2821,6 +2857,7 @@ extern int overlay_touches_p P_ ((int));
extern Lisp_Object Vbuffer_alist, Vinhibit_read_only;
EXFUN (Fget_buffer, 1);
EXFUN (Fget_buffer_create, 1);
+EXFUN (Fgenerate_new_buffer_name, 2);
EXFUN (Fset_buffer, 1);
EXFUN (set_buffer_if_live, 1);
EXFUN (Fbarf_if_buffer_read_only, 0);
@@ -3155,6 +3192,7 @@ extern Lisp_Object Qinsert_in_front_hooks, Qinsert_behind_hooks;
EXFUN (Fnext_single_property_change, 4);
EXFUN (Fnext_single_char_property_change, 4);
EXFUN (Fprevious_single_property_change, 4);
+EXFUN (Fget_text_property, 3);
EXFUN (Fput_text_property, 5);
EXFUN (Fprevious_char_property_change, 2);
EXFUN (Fnext_char_property_change, 2);
@@ -3212,6 +3250,7 @@ extern void init_sound P_ ((void));
/* Defined in category.c */
extern void init_category_once P_ ((void));
+extern Lisp_Object char_category_set P_ ((int));
extern void syms_of_category P_ ((void));
/* Defined in ccl.c */
@@ -3228,7 +3267,8 @@ extern void fatal () NO_RETURN;
#ifdef HAVE_WINDOW_SYSTEM
/* Defined in fontset.c */
extern void syms_of_fontset P_ ((void));
-EXFUN (Fset_fontset_font, 4);
+EXFUN (Fset_fontset_font, 5);
+EXFUN (Fnew_fontset, 2);
/* Defined in xfns.c, w32fns.c, or macfns.c */
EXFUN (Fxw_display_color_p, 1);
diff --git a/src/lread.c b/src/lread.c
index 88a70f691c3..08ba5123fcb 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -30,7 +30,9 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "intervals.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
+#include "coding.h"
#include <epaths.h>
#include "commands.h"
#include "keyboard.h"
@@ -89,6 +91,12 @@ Lisp_Object Qinhibit_file_name_operation;
Lisp_Object Qeval_buffer_list, Veval_buffer_list;
Lisp_Object Qfile_truename, Qdo_after_load_evaluation; /* ACM 2006/5/16 */
+/* Used instead of Qget_file_char while loading *.elc files compiled
+ by Emacs 21 or older. */
+static Lisp_Object Qget_emacs_mule_file_char;
+
+static Lisp_Object Qload_force_doc_strings;
+
extern Lisp_Object Qevent_symbol_element_mask;
extern Lisp_Object Qfile_exists_p;
@@ -132,6 +140,11 @@ static int load_force_doc_strings;
/* Nonzero means read should convert strings to unibyte. */
static int load_convert_to_unibyte;
+/* Nonzero means READCHAR should read bytes one by one (not character)
+ when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
+ This is set to 1 by read1 temporarily while handling #@NUMBER. */
+static int load_each_byte;
+
/* Function to use for loading an Emacs lisp source file (not
compiled) instead of readevalloop. */
Lisp_Object Vload_source_file_function;
@@ -160,9 +173,6 @@ static int read_from_string_index;
static int read_from_string_index_byte;
static int read_from_string_limit;
-/* Number of bytes left to read in the buffer character
- that `readchar' has already advanced over. */
-static int readchar_backlog;
/* Number of characters read in the current call to Fread or
Fread_from_string. */
static int readchar_count;
@@ -206,7 +216,9 @@ int load_dangerous_libraries;
static Lisp_Object Vbytecomp_version_regexp;
-static void to_multibyte P_ ((char **, char **, int *));
+static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
+ Lisp_Object));
+
static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object,
Lisp_Object (*) (), int,
Lisp_Object, Lisp_Object,
@@ -218,29 +230,41 @@ static void invalid_syntax P_ ((const char *, int)) NO_RETURN;
static void end_of_file_error P_ (()) NO_RETURN;
+/* Functions that read one byte from the current source READCHARFUN
+ or unreads one byte. If the integer argument C is -1, it returns
+ one read byte, or -1 when there's no more byte in the source. If C
+ is 0 or positive, it unreads C, and the return value is not
+ interesting. */
+
+static int readbyte_for_lambda P_ ((int, Lisp_Object));
+static int readbyte_from_file P_ ((int, Lisp_Object));
+static int readbyte_from_string P_ ((int, Lisp_Object));
+
/* Handle unreading and rereading of characters.
Write READCHAR to read a character,
UNREAD(c) to unread c to be read again.
- The READCHAR and UNREAD macros are meant for reading/unreading a
- byte code; they do not handle multibyte characters. The caller
- should manage them if necessary.
-
- [ Actually that seems to be a lie; READCHAR will definitely read
- multibyte characters from buffer sources, at least. Is the
- comment just out of date?
- -- Colin Walters <walters@gnu.org>, 22 May 2002 16:36:50 -0400 ]
- */
+ These macros correctly read/unread multibyte characters. */
#define READCHAR readchar (readcharfun)
#define UNREAD(c) unreadchar (readcharfun, c)
+/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
+ Qlambda, or a cons, we use this to keep an unread character because
+ a file stream can't handle multibyte-char unreading. The value -1
+ means that there's no unread character. */
+static int unread_char;
+
static int
readchar (readcharfun)
Lisp_Object readcharfun;
{
Lisp_Object tem;
register int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int i, len;
+ int emacs_mule_encoding = 0;
readchar_count++;
@@ -249,21 +273,10 @@ readchar (readcharfun)
register struct buffer *inbuffer = XBUFFER (readcharfun);
int pt_byte = BUF_PT_BYTE (inbuffer);
- int orig_pt_byte = pt_byte;
-
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
- - --readchar_backlog);
if (pt_byte >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -274,6 +287,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
pt_byte++;
}
SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
@@ -285,21 +300,10 @@ readchar (readcharfun)
register struct buffer *inbuffer = XMARKER (readcharfun)->buffer;
int bytepos = marker_byte_position (readcharfun);
- int orig_bytepos = bytepos;
-
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
- - --readchar_backlog);
if (bytepos >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -310,6 +314,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, bytepos);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
bytepos++;
}
@@ -320,21 +326,15 @@ readchar (readcharfun)
}
if (EQ (readcharfun, Qlambda))
- return read_bytecode_char (0);
+ {
+ readbyte = readbyte_for_lambda;
+ goto read_multibyte;
+ }
if (EQ (readcharfun, Qget_file_char))
{
- c = getc (instream);
-#ifdef EINTR
- /* Interrupted reads have been observed while reading over the network */
- while (c == EOF && ferror (instream) && errno == EINTR)
- {
- QUIT;
- clearerr (instream);
- c = getc (instream);
- }
-#endif
- return c;
+ readbyte = readbyte_from_file;
+ goto read_multibyte;
}
if (STRINGP (readcharfun))
@@ -349,11 +349,59 @@ readchar (readcharfun)
return c;
}
+ if (CONSP (readcharfun))
+ {
+ /* This is the case that read_vector is reading from a unibyte
+ string that contains a byte sequence previously skipped
+ because of #@NUMBER. The car part of readcharfun is that
+ string, and the cdr part is a value of readcharfun given to
+ read_vector. */
+ readbyte = readbyte_from_string;
+ if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
+
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ readbyte = readbyte_from_file;
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
+
tem = call0 (readcharfun);
if (NILP (tem))
return -1;
return XINT (tem);
+
+ read_multibyte:
+ if (unread_char >= 0)
+ {
+ c = unread_char;
+ unread_char = -1;
+ return c;
+ }
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
+ return c;
+ if (emacs_mule_encoding)
+ return read_emacs_mule_char (c, readbyte, readcharfun);
+ i = 0;
+ buf[i++] = c;
+ len = BYTES_BY_CHAR_HEAD (c);
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ! TRAILING_CODE_P (c))
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+ return STRING_CHAR (buf, i);
}
/* Unread the character C in the way appropriate for the stream READCHARFUN.
@@ -374,36 +422,26 @@ unreadchar (readcharfun, c)
struct buffer *b = XBUFFER (readcharfun);
int bytepos = BUF_PT_BYTE (b);
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ BUF_PT (b)--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- BUF_PT (b)--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- BUF_PT_BYTE (b) = bytepos;
- }
+ BUF_PT_BYTE (b) = bytepos;
}
else if (MARKERP (readcharfun))
{
struct buffer *b = XMARKER (readcharfun)->buffer;
int bytepos = XMARKER (readcharfun)->bytepos;
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ XMARKER (readcharfun)->charpos--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- XMARKER (readcharfun)->charpos--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- XMARKER (readcharfun)->bytepos = bytepos;
- }
+ XMARKER (readcharfun)->bytepos = bytepos;
}
else if (STRINGP (readcharfun))
{
@@ -411,14 +449,152 @@ unreadchar (readcharfun, c)
read_from_string_index_byte
= string_char_to_byte (readcharfun, read_from_string_index);
}
+ else if (CONSP (readcharfun))
+ {
+ unread_char = c;
+ }
else if (EQ (readcharfun, Qlambda))
- read_bytecode_char (1);
- else if (EQ (readcharfun, Qget_file_char))
- ungetc (c, instream);
+ {
+ unread_char = c;
+ }
+ else if (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ if (load_each_byte)
+ ungetc (c, instream);
+ else
+ unread_char = c;
+ }
else
call1 (readcharfun, make_number (c));
}
+static int
+readbyte_for_lambda (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ return read_bytecode_char (c >= 0);
+}
+
+
+static int
+readbyte_from_file (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ if (c >= 0)
+ {
+ ungetc (c, instream);
+ return 0;
+ }
+
+ c = getc (instream);
+#ifdef EINTR
+ /* Interrupted reads have been observed while reading over the network */
+ while (c == EOF && ferror (instream) && errno == EINTR)
+ {
+ QUIT;
+ clearerr (instream);
+ c = getc (instream);
+ }
+#endif
+ return (c == EOF ? -1 : c);
+}
+
+static int
+readbyte_from_string (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ Lisp_Object string = XCAR (readcharfun);
+
+ if (c >= 0)
+ {
+ read_from_string_index--;
+ read_from_string_index_byte
+ = string_char_to_byte (string, read_from_string_index);
+ }
+
+ if (read_from_string_index >= read_from_string_limit)
+ c = -1;
+ else
+ FETCH_STRING_CHAR_ADVANCE (c, string,
+ read_from_string_index,
+ read_from_string_index_byte);
+ return c;
+}
+
+
+/* Read one non-ASCII character from INSTREAM. The character is
+ encoded in `emacs-mule' and the first byte is already read in
+ C. */
+
+extern char emacs_mule_bytes[256];
+
+static int
+read_emacs_mule_char (c, readbyte, readcharfun)
+ int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ Lisp_Object readcharfun;
+{
+ /* Emacs-mule coding uses at most 4-byte for one character. */
+ unsigned char buf[4];
+ int len = emacs_mule_bytes[c];
+ struct charset *charset;
+ int i;
+ unsigned code;
+
+ if (len == 1)
+ /* C is not a valid leading-code of `emacs-mule'. */
+ return BYTE8_TO_CHAR (c);
+
+ i = 0;
+ buf[i++] = c;
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0xA0)
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+
+ if (len == 2)
+ {
+ charset = emacs_mule_charset[buf[0]];
+ code = buf[1] & 0x7F;
+ }
+ else if (len == 3)
+ {
+ if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = buf[2] & 0x7F;
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[0]];
+ code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
+ }
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
+ }
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ Fsignal (Qinvalid_read_syntax,
+ Fcons (build_string ("invalid multibyte form"), Qnil));
+ return c;
+}
+
+
static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object,
Lisp_Object));
static Lisp_Object read0 P_ ((Lisp_Object));
@@ -426,7 +602,6 @@ static Lisp_Object read1 P_ ((Lisp_Object, int *, int));
static Lisp_Object read_list P_ ((int, Lisp_Object));
static Lisp_Object read_vector P_ ((Lisp_Object, int));
-static int read_multibyte P_ ((int, Lisp_Object));
static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object,
Lisp_Object));
@@ -601,11 +776,11 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0,
-/* Value is non-zero if the file asswociated with file descriptor FD
- is a compiled Lisp file that's safe to load. Only files compiled
- with Emacs are safe to load. Files compiled with XEmacs can lead
- to a crash in Fbyte_code because of an incompatible change in the
- byte compiler. */
+/* Value is a version number of byte compiled code if the file
+ asswociated with file descriptor FD is a compiled Lisp file that's
+ safe to load. Only files compiled with Emacs are safe to load.
+ Files compiled with XEmacs can lead to a crash in Fbyte_code
+ because of an incompatible change in the byte compiler. */
static int
safe_to_load_p (fd)
@@ -614,6 +789,7 @@ safe_to_load_p (fd)
char buf[512];
int nbytes, i;
int safe_p = 1;
+ int version = 1;
/* Read the first few bytes from the file, and look for a line
specifying the byte compiler version used. */
@@ -623,15 +799,18 @@ safe_to_load_p (fd)
buf[nbytes] = '\0';
/* Skip to the next newline, skipping over the initial `ELC'
- with NUL bytes following it. */
+ with NUL bytes following it, but note the version. */
for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
- ;
+ if (i == 4)
+ version = buf[i];
- if (i < nbytes
- && fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
+ if (i == nbytes
+ || fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
buf + i) < 0)
safe_p = 0;
}
+ if (safe_p)
+ safe_p = version;
lseek (fd, 0, SEEK_SET);
return safe_p;
@@ -732,6 +911,8 @@ Return t if the file exists and loads successfully. */)
int safe_p = 1;
char *fmode = "r";
Lisp_Object tmp[2];
+ int version;
+
#ifdef DOS_NT
fmode = "rt";
#endif /* DOS_NT */
@@ -855,8 +1036,10 @@ Return t if the file exists and loads successfully. */)
tmp))
: found) ;
+ version = -1;
if (!bcmp (SDATA (found) + SBYTES (found) - 4,
- ".elc", 4))
+ ".elc", 4)
+ || (version = safe_to_load_p (fd)) > 0)
/* Load .elc files directly, but not when they are
remote and have no handler! */
{
@@ -867,7 +1050,8 @@ Return t if the file exists and loads successfully. */)
GCPRO3 (file, found, hist_file_name);
- if (!safe_to_load_p (fd))
+ if (version < 0
+ && ! (version = safe_to_load_p (fd)))
{
safe_p = 0;
if (!load_dangerous_libraries)
@@ -965,8 +1149,17 @@ Return t if the file exists and loads successfully. */)
load_descriptor_list
= Fcons (make_number (fileno (stream)), load_descriptor_list);
load_in_progress++;
- readevalloop (Qget_file_char, stream, hist_file_name,
- Feval, 0, Qnil, Qnil, Qnil, Qnil);
+ if (! version || version >= 22)
+ readevalloop (Qget_file_char, stream, hist_file_name,
+ Feval, 0, Qnil, Qnil, Qnil, Qnil);
+ else
+ {
+ /* We can't handle a file which was compiled with
+ byte-compile-dynamic by older version of Emacs. */
+ specbind (Qload_force_doc_strings, Qt);
+ readevalloop (Qget_emacs_mule_file_char, stream, hist_file_name, Feval,
+ 0, Qnil, Qnil, Qnil, Qnil);
+ }
unbind_to (count, Qnil);
/* Run any eval-after-load forms for this file */
@@ -1393,8 +1586,6 @@ readevalloop (readcharfun, stream, sourcename, evalfun,
record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
load_convert_to_unibyte = !NILP (unibyte);
- readchar_backlog = -1;
-
GCPRO4 (sourcename, readfun, start, end);
/* Try to ensure sourcename is a truename, except whilst preloading. */
@@ -1650,7 +1841,6 @@ read_internal_start (stream, start, end)
{
Lisp_Object retval;
- readchar_backlog = -1;
readchar_count = 0;
new_backquote_flag = 0;
read_objects = Qnil;
@@ -1658,17 +1848,25 @@ read_internal_start (stream, start, end)
|| EQ (Vread_with_symbol_positions, stream))
Vread_symbol_positions_list = Qnil;
- if (STRINGP (stream))
+ if (STRINGP (stream)
+ || ((CONSP (stream) && STRINGP (XCAR (stream)))))
{
int startval, endval;
+ Lisp_Object string;
+
+ if (STRINGP (stream))
+ string = stream;
+ else
+ string = XCAR (stream);
+
if (NILP (end))
- endval = SCHARS (stream);
+ endval = SCHARS (string);
else
{
CHECK_NUMBER (end);
endval = XINT (end);
- if (endval < 0 || endval > SCHARS (stream))
- args_out_of_range (stream, end);
+ if (endval < 0 || endval > SCHARS (string))
+ args_out_of_range (string, end);
}
if (NILP (start))
@@ -1678,10 +1876,10 @@ read_internal_start (stream, start, end)
CHECK_NUMBER (start);
startval = XINT (start);
if (startval < 0 || startval > endval)
- args_out_of_range (stream, start);
+ args_out_of_range (string, start);
}
read_from_string_index = startval;
- read_from_string_index_byte = string_char_to_byte (stream, startval);
+ read_from_string_index_byte = string_char_to_byte (string, startval);
read_from_string_limit = endval;
}
@@ -1728,59 +1926,19 @@ read0 (readcharfun)
static int read_buffer_size;
static char *read_buffer;
-/* Read multibyte form and return it as a character. C is a first
- byte of multibyte form, and rest of them are read from
- READCHARFUN. */
-
-static int
-read_multibyte (c, readcharfun)
- register int c;
- Lisp_Object readcharfun;
-{
- /* We need the actual character code of this multibyte
- characters. */
- unsigned char str[MAX_MULTIBYTE_LENGTH];
- int len = 0;
- int bytes;
-
- if (c < 0)
- return c;
-
- str[len++] = c;
- while ((c = READCHAR) >= 0xA0
- && len < MAX_MULTIBYTE_LENGTH)
- {
- str[len++] = c;
- readchar_count--;
- }
- UNREAD (c);
- if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes))
- return STRING_CHAR (str, len);
- /* The byte sequence is not valid as multibyte. Unread all bytes
- but the first one, and return the first byte. */
- while (--len > 0)
- UNREAD (str[len]);
- return str[0];
-}
-
/* Read a \-escape sequence, assuming we already read the `\'.
- If the escape sequence forces unibyte, store 1 into *BYTEREP.
- If the escape sequence forces multibyte, store 2 into *BYTEREP.
- Otherwise store 0 into *BYTEREP. */
+ If the escape sequence forces unibyte, return eight-bit char. */
static int
-read_escape (readcharfun, stringp, byterep)
+read_escape (readcharfun, stringp)
Lisp_Object readcharfun;
int stringp;
- int *byterep;
{
register int c = READCHAR;
/* \u allows up to four hex digits, \U up to eight. Default to the
behaviour for \u, and change this value in the case that \U is seen. */
int unicode_hex_count = 4;
- *byterep = 0;
-
switch (c)
{
case -1:
@@ -1817,7 +1975,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | meta_modifier;
case 'S':
@@ -1826,7 +1984,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | shift_modifier;
case 'H':
@@ -1835,7 +1993,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | hyper_modifier;
case 'A':
@@ -1844,7 +2002,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | alt_modifier;
case 's':
@@ -1856,7 +2014,7 @@ read_escape (readcharfun, stringp, byterep)
}
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | super_modifier;
case 'C':
@@ -1866,7 +2024,7 @@ read_escape (readcharfun, stringp, byterep)
case '^':
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
if ((c & ~CHAR_MODIFIER_MASK) == '?')
return 0177 | (c & CHAR_MODIFIER_MASK);
else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -1906,7 +2064,8 @@ read_escape (readcharfun, stringp, byterep)
}
}
- *byterep = 1;
+ if (i >= 0x80 && i < 0x100)
+ i = BYTE8_TO_CHAR (i);
return i;
}
@@ -1914,6 +2073,7 @@ read_escape (readcharfun, stringp, byterep)
/* A hex escape, as in ANSI C. */
{
int i = 0;
+ int count = 0;
while (1)
{
c = READCHAR;
@@ -1936,9 +2096,11 @@ read_escape (readcharfun, stringp, byterep)
UNREAD (c);
break;
}
+ count++;
}
- *byterep = 2;
+ if (count < 3 && i >= 0x80)
+ return BYTE8_TO_CHAR (i);
return i;
}
@@ -1952,8 +2114,6 @@ read_escape (readcharfun, stringp, byterep)
{
int i = 0;
int count = 0;
- Lisp_Object lisp_char;
- struct gcpro gcpro1;
while (++count <= unicode_hex_count)
{
@@ -1970,22 +2130,10 @@ read_escape (readcharfun, stringp, byterep)
}
}
- GCPRO1 (readcharfun);
- lisp_char = call2 (intern ("decode-char"), intern ("ucs"),
- make_number (i));
- UNGCPRO;
-
- if (NILP (lisp_char))
- {
- error ("Unsupported Unicode code point: U+%x", (unsigned)i);
- }
-
- return XFASTINT (lisp_char);
+ return i;
}
default:
- if (BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun);
return c;
}
}
@@ -2056,43 +2204,6 @@ read_integer (readcharfun, radix)
}
-/* Convert unibyte text in read_buffer to multibyte.
-
- Initially, *P is a pointer after the end of the unibyte text, and
- the pointer *END points after the end of read_buffer.
-
- If read_buffer doesn't have enough room to hold the result
- of the conversion, reallocate it and adjust *P and *END.
-
- At the end, make *P point after the result of the conversion, and
- return in *NCHARS the number of characters in the converted
- text. */
-
-static void
-to_multibyte (p, end, nchars)
- char **p, **end;
- int *nchars;
-{
- int nbytes;
-
- parse_str_as_multibyte (read_buffer, *p - read_buffer, &nbytes, nchars);
- if (read_buffer_size < 2 * nbytes)
- {
- int offset = *p - read_buffer;
- read_buffer_size = 2 * max (read_buffer_size, nbytes);
- read_buffer = (char *) xrealloc (read_buffer, read_buffer_size);
- *p = read_buffer + offset;
- *end = read_buffer + read_buffer_size;
- }
-
- if (nbytes != *nchars)
- nbytes = str_as_multibyte (read_buffer, read_buffer_size,
- *p - read_buffer, nchars);
-
- *p = read_buffer + nbytes;
-}
-
-
/* If the next token is ')' or ']' or '.', we store that character
in *PCH and the return value is not interesting. Else, we store
zero in *PCH and we read and return one lisp object.
@@ -2109,6 +2220,7 @@ read1 (readcharfun, pch, first_in_list)
int uninterned_symbol = 0;
*pch = 0;
+ load_each_byte = 0;
retry:
@@ -2140,11 +2252,9 @@ read1 (readcharfun, pch, first_in_list)
{
Lisp_Object tmp;
tmp = read_vector (readcharfun, 0);
- if (XVECTOR (tmp)->size < CHAR_TABLE_STANDARD_SLOTS
- || XVECTOR (tmp)->size > CHAR_TABLE_STANDARD_SLOTS + 10)
+ if (XVECTOR (tmp)->size < VECSIZE (struct Lisp_Char_Table))
error ("Invalid size char-table");
XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
- XCHAR_TABLE (tmp)->top = Qt;
return tmp;
}
else if (c == '^')
@@ -2153,11 +2263,18 @@ read1 (readcharfun, pch, first_in_list)
if (c == '[')
{
Lisp_Object tmp;
+ int depth, size;
+
tmp = read_vector (readcharfun, 0);
- if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS)
+ if (!INTEGERP (AREF (tmp, 0)))
+ error ("Invalid depth in char-table");
+ depth = XINT (AREF (tmp, 0));
+ if (depth < 1 || depth > 3)
+ error ("Invalid depth in char-table");
+ size = XVECTOR (tmp)->size - 2;
+ if (chartab_size [depth] != size)
error ("Invalid size char-table");
- XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp));
- XCHAR_TABLE (tmp)->top = Qnil;
+ XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp));
return tmp;
}
invalid_syntax ("#^^", 3);
@@ -2178,12 +2295,14 @@ read1 (readcharfun, pch, first_in_list)
UNREAD (c);
tmp = read1 (readcharfun, pch, first_in_list);
- if (size_in_chars != SCHARS (tmp)
- /* We used to print 1 char too many
- when the number of bits was a multiple of 8.
- Accept such input in case it came from an old version. */
- && ! (XFASTINT (length)
- == (SCHARS (tmp) - 1) * BOOL_VECTOR_BITS_PER_CHAR))
+ if (STRING_MULTIBYTE (tmp)
+ || (size_in_chars != SCHARS (tmp)
+ /* We used to print 1 char too many
+ when the number of bits was a multiple of 8.
+ Accept such input in case it came from an old
+ version. */
+ && ! (XFASTINT (length)
+ == (SCHARS (tmp) - 1) * BOOL_VECTOR_BITS_PER_CHAR)))
invalid_syntax ("#&...", 5);
val = Fmake_bool_vector (length, Qnil);
@@ -2245,6 +2364,7 @@ read1 (readcharfun, pch, first_in_list)
{
int i, nskip = 0;
+ load_each_byte = 1;
/* Read a decimal integer. */
while ((c = READCHAR) >= 0
&& c >= '0' && c <= '9')
@@ -2255,7 +2375,9 @@ read1 (readcharfun, pch, first_in_list)
if (c >= 0)
UNREAD (c);
- if (load_force_doc_strings && EQ (readcharfun, Qget_file_char))
+ if (load_force_doc_strings
+ && (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char)))
{
/* If we are supposed to force doc strings into core right now,
record the last string that we skipped,
@@ -2307,6 +2429,7 @@ read1 (readcharfun, pch, first_in_list)
c = READCHAR;
}
+ load_each_byte = 0;
goto retry;
}
if (c == '!')
@@ -2436,7 +2559,7 @@ read1 (readcharfun, pch, first_in_list)
case '?':
{
- int discard;
+ int modifiers;
int next_char;
int ok;
@@ -2452,9 +2575,12 @@ read1 (readcharfun, pch, first_in_list)
return make_number (c);
if (c == '\\')
- c = read_escape (readcharfun, 0, &discard);
- else if (BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun);
+ c = read_escape (readcharfun, 0);
+ modifiers = c & CHAR_MODIFIER_MASK;
+ c &= ~CHAR_MODIFIER_MASK;
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ c |= modifiers;
next_char = READCHAR;
if (next_char == '.')
@@ -2489,14 +2615,12 @@ read1 (readcharfun, pch, first_in_list)
char *p = read_buffer;
char *end = read_buffer + read_buffer_size;
register int c;
- /* 1 if we saw an escape sequence specifying
- a multibyte character, or a multibyte character. */
+ /* Nonzero if we saw an escape sequence specifying
+ a multibyte character. */
int force_multibyte = 0;
- /* 1 if we saw an escape sequence specifying
+ /* Nonzero if we saw an escape sequence specifying
a single-byte character. */
int force_singlebyte = 0;
- /* 1 if read_buffer contains multibyte text now. */
- int is_multibyte = 0;
int cancel = 0;
int nchars = 0;
@@ -2514,9 +2638,9 @@ read1 (readcharfun, pch, first_in_list)
if (c == '\\')
{
- int byterep;
+ int modifiers;
- c = read_escape (readcharfun, 1, &byterep);
+ c = read_escape (readcharfun, 1);
/* C is -1 if \ newline has just been seen */
if (c == -1)
@@ -2526,50 +2650,55 @@ read1 (readcharfun, pch, first_in_list)
continue;
}
- if (byterep == 1)
+ modifiers = c & CHAR_MODIFIER_MASK;
+ c = c & ~CHAR_MODIFIER_MASK;
+
+ if (CHAR_BYTE8_P (c))
force_singlebyte = 1;
- else if (byterep == 2)
+ else if (! ASCII_CHAR_P (c))
force_multibyte = 1;
- }
-
- /* A character that must be multibyte forces multibyte. */
- if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK))
- force_multibyte = 1;
+ else /* i.e. ASCII_CHAR_P (c) */
+ {
+ /* Allow `\C- ' and `\C-?'. */
+ if (modifiers == CHAR_CTL)
+ {
+ if (c == ' ')
+ c = 0, modifiers = 0;
+ else if (c == '?')
+ c = 127, modifiers = 0;
+ }
+ if (modifiers & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if (c >= 'A' && c <= 'Z')
+ modifiers &= ~CHAR_SHIFT;
+ else if (c >= 'a' && c <= 'z')
+ c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT;
+ }
+
+ if (modifiers & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a
+ string. */
+ modifiers &= ~CHAR_META;
+ c = BYTE8_TO_CHAR (c | 0x80);
+ force_singlebyte = 1;
+ }
+ }
- /* If we just discovered the need to be multibyte,
- convert the text accumulated thus far. */
- if (force_multibyte && ! is_multibyte)
- {
- is_multibyte = 1;
- to_multibyte (&p, &end, &nchars);
+ /* Any modifiers remaining are invalid. */
+ if (modifiers)
+ error ("Invalid modifier in string");
+ p += CHAR_STRING (c, (unsigned char *) p);
}
-
- /* Allow `\C- ' and `\C-?'. */
- if (c == (CHAR_CTL | ' '))
- c = 0;
- else if (c == (CHAR_CTL | '?'))
- c = 127;
-
- if (c & CHAR_SHIFT)
+ else
{
- /* Shift modifier is valid only with [A-Za-z]. */
- if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
- c &= ~CHAR_SHIFT;
- else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
- c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ p += CHAR_STRING (c, (unsigned char *) p);
+ if (CHAR_BYTE8_P (c))
+ force_singlebyte = 1;
+ else if (! ASCII_CHAR_P (c))
+ force_multibyte = 1;
}
-
- if (c & CHAR_META)
- /* Move the meta bit to the right place for a string. */
- c = (c & ~CHAR_META) | 0x80;
- if (c & CHAR_MODIFIER_MASK)
- error ("Invalid modifier in string");
-
- if (is_multibyte)
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
-
nchars++;
}
@@ -2582,37 +2711,16 @@ read1 (readcharfun, pch, first_in_list)
if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
return make_number (0);
- if (is_multibyte || force_singlebyte)
+ if (force_multibyte)
+ /* READ_BUFFER already contains valid multibyte forms. */
;
- else if (load_convert_to_unibyte)
- {
- Lisp_Object string;
- to_multibyte (&p, &end, &nchars);
- if (p - read_buffer != nchars)
- {
- string = make_multibyte_string (read_buffer, nchars,
- p - read_buffer);
- return Fstring_make_unibyte (string);
- }
- /* We can make a unibyte string directly. */
- is_multibyte = 0;
- }
- else if (EQ (readcharfun, Qget_file_char)
- || EQ (readcharfun, Qlambda))
+ else if (force_singlebyte)
{
- /* Nowadays, reading directly from a file is used only for
- compiled Emacs Lisp files, and those always use the
- Emacs internal encoding. Meanwhile, Qlambda is used
- for reading dynamic byte code (compiled with
- byte-compile-dynamic = t). So make the string multibyte
- if the string contains any multibyte sequences.
- (to_multibyte is a no-op if not.) */
- to_multibyte (&p, &end, &nchars);
- is_multibyte = (p - read_buffer) != nchars;
+ nchars = str_as_unibyte (read_buffer, p - read_buffer);
+ p = read_buffer + nchars;
}
else
- /* In all other cases, if we read these bytes as
- separate characters, treat them as separate characters now. */
+ /* Otherwise, READ_BUFFER contains only ASCII. */
;
/* We want readchar_count to be the number of characters, not
@@ -2622,9 +2730,11 @@ read1 (readcharfun, pch, first_in_list)
/* readchar_count -= (p - read_buffer) - nchars; */
if (read_pure)
return make_pure_string (read_buffer, nchars, p - read_buffer,
- is_multibyte);
+ (force_multibyte
+ || (p - read_buffer != nchars)));
return make_specified_string (read_buffer, nchars, p - read_buffer,
- is_multibyte);
+ (force_multibyte
+ || (p - read_buffer != nchars)));
}
case '.':
@@ -2679,11 +2789,7 @@ read1 (readcharfun, pch, first_in_list)
quoted = 1;
}
- if (! SINGLE_BYTE_CHAR_P (c))
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
-
+ p += CHAR_STRING (c, p);
c = READCHAR;
}
@@ -2717,6 +2823,8 @@ read1 (readcharfun, pch, first_in_list)
{
if (p1[-1] == '.')
p1[-1] = '\0';
+ /* Fixme: if we have strtol, use that, and check
+ for overflow. */
if (sizeof (int) == sizeof (EMACS_INT))
XSETINT (val, atoi (read_buffer));
else if (sizeof (long) == sizeof (EMACS_INT))
@@ -3037,7 +3145,7 @@ read_vector (readcharfun, bytecodeflag)
STRING_SET_CHARS (bytestr, SBYTES (bytestr));
STRING_SET_UNIBYTE (bytestr);
- item = Fread (bytestr);
+ item = Fread (Fcons (bytestr, readcharfun));
if (!CONSP (item))
error ("Invalid byte code");
@@ -3050,6 +3158,15 @@ read_vector (readcharfun, bytecodeflag)
/* Now handle the bytecode slot. */
ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
}
+ else if (i == COMPILED_DOC_STRING
+ && STRINGP (item)
+ && ! STRING_MULTIBYTE (item))
+ {
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
+ else
+ item = Fstring_as_multibyte (item);
+ }
}
ptr[i] = read_pure ? Fpurecopy (item) : item;
otem = XCONS (tem);
@@ -3146,7 +3263,15 @@ read_list (flag, readcharfun)
if (doc_reference == 2)
{
/* Get a doc string from the file we are loading.
- If it's in saved_doc_string, get it from there. */
+ If it's in saved_doc_string, get it from there.
+
+ Here, we don't know if the string is a
+ bytecode string or a doc string. As a
+ bytecode string must be unibyte, we always
+ return a unibyte string. If it is actually a
+ doc string, caller must make it
+ multibyte. */
+
int pos = XINT (XCDR (val));
/* Position is negative for user variables. */
if (pos < 0) pos = -pos;
@@ -3178,8 +3303,8 @@ read_list (flag, readcharfun)
saved_doc_string[to++] = c;
}
- return make_string (saved_doc_string + start,
- to - start);
+ return make_unibyte_string (saved_doc_string + start,
+ to - start);
}
/* Look in prev_saved_doc_string the same way. */
else if (pos >= prev_saved_doc_string_position
@@ -3210,11 +3335,12 @@ read_list (flag, readcharfun)
prev_saved_doc_string[to++] = c;
}
- return make_string (prev_saved_doc_string + start,
- to - start);
+ return make_unibyte_string (prev_saved_doc_string
+ + start,
+ to - start);
}
else
- return get_doc_string (val, 0, 0);
+ return get_doc_string (val, 1, 0);
}
return val;
@@ -4158,6 +4284,12 @@ to load. See also `load-dangerous-libraries'. */);
Qget_file_char = intern ("get-file-char");
staticpro (&Qget_file_char);
+ Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
+ staticpro (&Qget_emacs_mule_file_char);
+
+ Qload_force_doc_strings = intern ("load-force-doc-strings");
+ staticpro (&Qload_force_doc_strings);
+
Qbackquote = intern ("`");
staticpro (&Qbackquote);
Qcomma = intern (",");
diff --git a/src/macfns.c b/src/macfns.c
index 494d6ec1da3..10e7dd97fa1 100644
--- a/src/macfns.c
+++ b/src/macfns.c
@@ -2623,7 +2623,7 @@ This function is an internal primitive--use `make-frame' instead. */)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -2637,7 +2637,7 @@ This function is an internal primitive--use `make-frame' instead. */)
font = x_new_font (f, "-ETL-fixed-medium-r-*--*-160-*-*-*-*-iso8859-1");
/* If those didn't work, look for something which will at least work. */
if (! STRINGP (font))
- font = x_new_fontset (f, "fontset-standard");
+ font = x_new_fontset (f, build_string ("fontset-standard"));
if (! STRINGP (font))
font = x_new_font (f, "-*-monaco-*-12-*-mac-roman");
if (! STRINGP (font))
@@ -3821,7 +3821,7 @@ x_create_tip_frame (dpyinfo, parms, text)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -3835,7 +3835,7 @@ x_create_tip_frame (dpyinfo, parms, text)
font = x_new_font (f, "-ETL-fixed-medium-r-*--*-160-*-*-*-*-iso8859-1");
/* If those didn't work, look for something which will at least work. */
if (! STRINGP (font))
- font = x_new_fontset (f, "fontset-standard");
+ font = x_new_fontset (f, build_string ("fontset-standard"));
if (! STRINGP (font))
font = x_new_font (f, "-*-monaco-*-12-*-mac-roman");
if (! STRINGP (font))
diff --git a/src/macgui.h b/src/macgui.h
index 1ea53af59be..01f5317aad6 100644
--- a/src/macgui.h
+++ b/src/macgui.h
@@ -81,11 +81,13 @@ typedef unsigned long Time;
/* Whether to use ATSUI (Apple Type Services for Unicode Imaging) for
text drawing. */
+#if 0 /* Don't enable by default on the emacs-unicode-2 branch. */
#ifndef USE_ATSUI
#ifdef MAC_OSX
#define USE_ATSUI 1
#endif
#endif
+#endif
/* Whether to use low-level Quartz 2D (aka Core Graphics) text drawing
in preference to ATSUI for ASCII and Latin-1 characters. */
diff --git a/src/macterm.c b/src/macterm.c
index 69612302774..e98fc7729c0 100644
--- a/src/macterm.c
+++ b/src/macterm.c
@@ -84,6 +84,8 @@ Boston, MA 02110-1301, USA. */
#include "intervals.h"
#include "atimer.h"
#include "keymap.h"
+#include "character.h"
+#include "ccl.h"
@@ -2171,7 +2173,8 @@ XTreset_terminal_modes ()
/* Function prototypes of this page. */
static XCharStruct *x_per_char_metric P_ ((XFontStruct *, XChar2b *));
-static int mac_encode_char P_ ((int, XChar2b *, struct font_info *, int *));
+static int mac_encode_char P_ ((int, XChar2b *, struct font_info *,
+ struct charset *, int *));
static void
@@ -2315,13 +2318,13 @@ mac_per_char_metric (font, char2b, font_type)
the two-byte form of C. Encoding is returned in *CHAR2B. */
static int
-mac_encode_char (c, char2b, font_info, two_byte_p)
+mac_encode_char (c, char2b, font_info, charset, two_byte_p)
int c;
XChar2b *char2b;
struct font_info *font_info;
+ struct charset *charset;
int *two_byte_p;
{
- int charset = CHAR_CHARSET (c);
XFontStruct *font = font_info->font;
/* FONT_INFO may define a scheme by which to encode byte1 and byte2.
@@ -2335,31 +2338,31 @@ mac_encode_char (c, char2b, font_info, two_byte_p)
check_ccl_update (ccl);
if (CHARSET_DIMENSION (charset) == 1)
{
- ccl->reg[0] = charset;
- ccl->reg[1] = char2b->byte2;
+ ccl->reg[0] = CHARSET_ID (charset);
+ ccl->reg[1] = XCHAR2B_BYTE2 (char2b);
ccl->reg[2] = -1;
}
else
{
- ccl->reg[0] = charset;
- ccl->reg[1] = char2b->byte1;
- ccl->reg[2] = char2b->byte2;
+ ccl->reg[0] = CHARSET_ID (charset);
+ ccl->reg[1] = XCHAR2B_BYTE1 (char2b);
+ ccl->reg[2] = XCHAR2B_BYTE2 (char2b);
}
- ccl_driver (ccl, NULL, NULL, 0, 0, NULL);
+ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil);
/* We assume that MSBs are appropriately set/reset by CCL
program. */
if (font->max_byte1 == 0) /* 1-byte font */
- char2b->byte1 = 0, char2b->byte2 = ccl->reg[1];
+ STORE_XCHAR2B (char2b, 0, ccl->reg[1]);
else
- char2b->byte1 = ccl->reg[1], char2b->byte2 = ccl->reg[2];
+ STORE_XCHAR2B (char2b, ccl->reg[1], ccl->reg[2]);
}
- else if (font_info->encoding[charset])
+ else if (font_info->encoding_type)
{
/* Fixed encoding scheme. See fontset.h for the meaning of the
encoding numbers. */
- int enc = font_info->encoding[charset];
+ unsigned char enc = font_info->encoding_type;
if ((enc == 1 || enc == 2)
&& CHARSET_DIMENSION (charset) == 2)
@@ -2369,13 +2372,12 @@ mac_encode_char (c, char2b, font_info, two_byte_p)
char2b->byte2 |= 0x80;
if (enc == 4)
- {
- int sjis1, sjis2;
+ {
+ int code = (char2b->byte1 << 8) | char2b->byte2;
- ENCODE_SJIS (char2b->byte1, char2b->byte2, sjis1, sjis2);
- char2b->byte1 = sjis1;
- char2b->byte2 = sjis2;
- }
+ JIS_TO_SJIS (code);
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
+ }
}
if (two_byte_p)
@@ -2494,9 +2496,9 @@ x_set_mouse_face_gc (s)
face = FACE_FROM_ID (s->f, MOUSE_FACE_ID);
if (s->first_glyph->type == CHAR_GLYPH)
- face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch);
+ face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch, -1, Qnil);
else
- face_id = FACE_FOR_CHAR (s->f, face, 0);
+ face_id = FACE_FOR_CHAR (s->f, face, 0, -1, Qnil);
s->face = FACE_FROM_ID (s->f, face_id);
PREPARE_FACE_FOR_DISPLAY (s->f, s->face);
@@ -5687,11 +5689,16 @@ x_new_font (f, fontname)
register char *fontname;
{
struct font_info *fontp
- = FS_LOAD_FONT (f, 0, fontname, -1);
+ = FS_LOAD_FONT (f, fontname);
if (!fontp)
return Qnil;
+ if (FRAME_FONT (f) == (XFontStruct *) (fontp->font))
+ /* This font is already set in frame F. There's nothing more to
+ do. */
+ return build_string (fontp->full_name);
+
FRAME_FONT (f) = (XFontStruct *) (fontp->font);
FRAME_BASELINE_OFFSET (f) = fontp->baseline_offset;
FRAME_FONTSET (f) = -1;
@@ -5734,38 +5741,50 @@ x_new_font (f, fontname)
return build_string (fontp->full_name);
}
+
+/* Give frame F the fontset named FONTSETNAME as its default fontset,
+ and return the full name of that fontset. FONTSETNAME may be a
+ wildcard pattern; in that case, we choose some fontset that fits
+ the pattern. FONTSETNAME may be a font name for ASCII characters;
+ in that case, we create a fontset from that font name.
-/* Give frame F the fontset named FONTSETNAME as its default font, and
- return the full name of that fontset. FONTSETNAME may be a wildcard
- pattern; in that case, we choose some fontset that fits the pattern.
- The return value shows which fontset we chose. */
+ The return value shows which fontset we chose.
+ If FONTSETNAME specifies the default fontset, return Qt.
+ If an ASCII font in the specified fontset can't be loaded, return
+ Qnil. */
Lisp_Object
x_new_fontset (f, fontsetname)
struct frame *f;
- char *fontsetname;
+ Lisp_Object fontsetname;
{
- int fontset = fs_query_fontset (build_string (fontsetname), 0);
+ int fontset = fs_query_fontset (fontsetname, 0);
Lisp_Object result;
- if (fontset < 0)
- return Qnil;
-
- if (FRAME_FONTSET (f) == fontset)
+ if (fontset > 0 && FRAME_FONTSET(f) == fontset)
/* This fontset is already set in frame F. There's nothing more
to do. */
return fontset_name (fontset);
+ else if (fontset == 0)
+ /* The default fontset can't be the default font. */
+ return Qt;
- result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ if (fontset > 0)
+ result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ else
+ result = x_new_font (f, SDATA (fontsetname));
if (!STRINGP (result))
/* Can't load ASCII font. */
return Qnil;
+ if (fontset < 0)
+ fontset = new_fontset_from_font_name (result);
+
/* Since x_new_font doesn't update any fontset information, do it now. */
FRAME_FONTSET (f) = fontset;
- return build_string (fontsetname);
+ return fontset_name (fontset);
}
@@ -6978,12 +6997,12 @@ decode_mac_font_name (name, size, coding_system)
coding.src_multibyte = 0;
coding.dst_multibyte = 1;
coding.mode |= CODING_MODE_LAST_BLOCK;
- coding.composing = COMPOSITION_DISABLED;
- buf = (char *) alloca (size);
+ coding.dst_bytes = size;
+ coding.destination = (unsigned char *) alloca (coding.dst_bytes);
- decode_coding (&coding, name, buf, strlen (name), size - 1);
- bcopy (buf, name, coding.produced);
- name[coding.produced] = '\0';
+ decode_coding_c_string (&coding, name, strlen (name), Qnil);
+ bcopy (coding.destination, name, min (coding.produced, size));
+ name[min (coding.produced, size)] = '\0';
}
}
@@ -8220,6 +8239,7 @@ x_load_font (f, fontname, size)
bzero (fontp, sizeof (*fontp));
fontp->font = font;
fontp->font_idx = i;
+ fontp->charset = -1; /* fs_load_font sets it. */
fontp->name = (char *) xmalloc (strlen (fontname) + 1);
bcopy (fontname, fontp->name, strlen (fontname) + 1);
@@ -8265,19 +8285,20 @@ x_load_font (f, fontname, size)
fontp->height = max_height;
}
+ /* MAC_TODO: The script encoding is irrelevant in unicode? */
/* The slot `encoding' specifies how to map a character
code-points (0x20..0x7F or 0x2020..0x7F7F) of each charset to
the font code-points (0:0x20..0x7F, 1:0xA0..0xFF), or
(0:0x2020..0x7F7F, 1:0xA0A0..0xFFFF, 3:0x20A0..0x7FFF,
2:0xA020..0xFF7F). For the moment, we don't know which charset
- uses this font. So, we set information in fontp->encoding[1]
+ uses this font. So, we set information in fontp->encoding_type
which is never used by any charset. If mapping can't be
decided, set FONT_ENCODING_NOT_DECIDED. */
if (font->mac_scriptcode == smJapanese)
- fontp->encoding[1] = 4;
+ fontp->encoding_type = 4;
else
{
- fontp->encoding[1]
+ fontp->encoding_type
= (font->max_byte1 == 0
/* 1-byte font */
? (font->min_char_or_byte2 < 0x80
@@ -10136,34 +10157,10 @@ mac_set_unicode_keystroke_event (code, buf)
int charset_id, c1, c2;
if (code < 0x80)
- {
- buf->kind = ASCII_KEYSTROKE_EVENT;
- buf->code = code;
- }
- else if (code < 0x100)
- {
- if (code < 0xA0)
- charset_id = CHARSET_8_BIT_CONTROL;
- else
- charset_id = charset_latin_iso8859_1;
- buf->kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
- buf->code = MAKE_CHAR (charset_id, code, 0);
- }
+ buf->kind = ASCII_KEYSTROKE_EVENT;
else
- {
- if (code < 0x2500)
- charset_id = charset_mule_unicode_0100_24ff,
- code -= 0x100;
- else if (code < 0x33FF)
- charset_id = charset_mule_unicode_2500_33ff,
- code -= 0x2500;
- else if (code >= 0xE000)
- charset_id = charset_mule_unicode_e000_ffff,
- code -= 0xE000;
- c1 = (code / 96) + 32, c2 = (code % 96) + 32;
- buf->kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
- buf->code = MAKE_CHAR (charset_id, c1, c2);
- }
+ buf->kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
+ buf->code = code;
}
#endif
diff --git a/src/makefile.w32-in b/src/makefile.w32-in
index a94938a0b80..ec407c883f3 100644
--- a/src/makefile.w32-in
+++ b/src/makefile.w32-in
@@ -118,6 +118,8 @@ OBJ1 = $(BLD)/abbrev.$(O) \
$(BLD)/region-cache.$(O) \
$(BLD)/strftime.$(O) \
$(BLD)/charset.$(O) \
+ $(BLD)/character.$(O) \
+ $(BLD)/chartab.$(O) \
$(BLD)/coding.$(O) \
$(BLD)/category.$(O) \
$(BLD)/ccl.$(O) \
@@ -125,6 +127,7 @@ OBJ1 = $(BLD)/abbrev.$(O) \
$(BLD)/fringe.$(O) \
$(BLD)/image.$(O)
+
WIN32OBJ = $(BLD)/w32term.$(O) \
$(BLD)/w32xfns.$(O) \
$(BLD)/w32fns.$(O) \
@@ -171,7 +174,7 @@ temacs: $(BLD) $(TEMACS)
$(TEMACS): $(TLIB0) $(TLIB1) $(TLIBW32) $(TLASTLIB) $(TOBJ) $(TRES) \
../nt/$(BLD)/addsection.exe
$(LINK) $(LINK_OUT)$(TEMACS_TMP) $(FULL_LINK_FLAGS) $(TOBJ) $(TRES) $(LIBS)
- "../nt/$(BLD)/addsection" "$(TEMACS_TMP)" "$(TEMACS)" EMHEAP 16
+ "../nt/$(BLD)/addsection" "$(TEMACS_TMP)" "$(TEMACS)" EMHEAP 20
echo $(OBJ0) > $(BLD)/buildobj.lst
echo $(OBJ1) >> $(BLD)/buildobj.lst
echo $(WIN32OBJ) >> $(BLD)/buildobj.lst
@@ -271,6 +274,7 @@ $(BLD)/abbrev.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/dispextern.h \
@@ -286,6 +290,7 @@ $(BLD)/alloc.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -368,6 +373,7 @@ $(BLD)/callproc.$(O) : \
$(EMACS_ROOT)/nt/inc/sys/file.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -384,6 +390,7 @@ $(BLD)/casefiddle.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -405,6 +412,7 @@ $(BLD)/category.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/category.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/keymap.h
@@ -414,9 +422,22 @@ $(BLD)/ccl.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h
+$(BLD)/character.$(O) : \
+ $(SRC)/character.c \
+ $(EMACS_ROOT)/src/s/ms-w32.h \
+ $(EMACS_ROOT)/src/m/intel386.h \
+ $(EMACS_ROOT)/src/config.h \
+ $(SRC)/buffer.h \
+ $(SRC)/character.h \
+ $(SRC)/charset.h \
+ $(SRC)/coding.h \
+ $(SRC)/composite.h \
+ $(SRC)/disptab.h
+
$(BLD)/charset.$(O) : \
$(SRC)/charset.c \
$(EMACS_ROOT)/src/s/ms-w32.h \
@@ -424,11 +445,20 @@ $(BLD)/charset.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
$(SRC)/disptab.h
+$(BLD)/chartab.$(O) : \
+ $(SRC)/chartab.c \
+ $(EMACS_ROOT)/src/s/ms-w32.h \
+ $(EMACS_ROOT)/src/m/intel386.h \
+ $(EMACS_ROOT)/src/config.h \
+ $(SRC)/charset.h \
+ $(SRC)/character.h
+
$(BLD)/cm.$(O) : \
$(SRC)/cm.c \
$(EMACS_ROOT)/src/s/ms-w32.h \
@@ -443,6 +473,7 @@ $(BLD)/cmds.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/dispextern.h \
@@ -460,6 +491,7 @@ $(BLD)/coding.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
@@ -475,6 +507,7 @@ $(BLD)/composite.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -488,6 +521,7 @@ $(BLD)/data.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/frame.h \
$(SRC)/keyboard.h \
@@ -501,6 +535,7 @@ $(BLD)/dired.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -518,6 +553,7 @@ $(BLD)/dispnew.$(O) : \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/cm.h \
$(SRC)/commands.h \
@@ -546,6 +582,7 @@ $(BLD)/doc.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(EMACS_ROOT)/nt/inc/sys/file.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/keyboard.h \
$(SRC)/keymap.h
@@ -555,6 +592,7 @@ $(BLD)/doprnt.$(O) : \
$(EMACS_ROOT)/src/s/ms-w32.h \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
+ $(SRC)/character.h \
$(SRC)/charset.c
$(BLD)/editfns.$(O) : \
@@ -565,6 +603,7 @@ $(BLD)/editfns.$(O) : \
$(EMACS_ROOT)/nt/inc/pwd.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
@@ -626,6 +665,7 @@ $(BLD)/fileio.$(O) : \
$(EMACS_ROOT)/nt/inc/sys/file.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -647,6 +687,7 @@ $(BLD)/filelock.$(O) : \
$(EMACS_ROOT)/src/epaths.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/systime.h
@@ -682,6 +723,7 @@ $(BLD)/fns.$(O) : \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -704,6 +746,7 @@ $(BLD)/fontset.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/dispextern.h \
$(SRC)/fontset.h \
@@ -721,6 +764,7 @@ $(BLD)/frame.$(O) : \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/dispextern.h \
@@ -782,6 +826,7 @@ $(BLD)/indent.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/category.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -805,6 +850,7 @@ $(BLD)/insdel.$(O) : \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -840,6 +886,7 @@ $(BLD)/keyboard.$(O) : \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -871,6 +918,7 @@ $(BLD)/keymap.$(O) : \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -899,6 +947,7 @@ $(BLD)/lread.$(O) : \
$(EMACS_ROOT)/src/epaths.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -930,6 +979,7 @@ $(BLD)/marker.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h
$(BLD)/md5.$(O) : \
@@ -942,6 +992,7 @@ $(BLD)/minibuf.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -1009,6 +1060,7 @@ $(BLD)/w32console.$(O) : \
$(SRC)/s/ms-w32.h \
$(SRC)/m/intel386.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/config.h \
@@ -1026,6 +1078,7 @@ $(BLD)/print.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -1049,6 +1102,7 @@ $(BLD)/process.$(O) : \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -1088,6 +1142,7 @@ $(BLD)/regex.$(O) : \
$(SRC)/m/intel386.h \
$(SRC)/buffer.h \
$(SRC)/category.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/config.h \
$(SRC)/regex.h \
@@ -1123,6 +1178,7 @@ $(BLD)/search.$(O) : \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/category.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -1155,6 +1211,7 @@ $(BLD)/syntax.$(O) : \
$(EMACS_ROOT)/src/config.h \
$(SRC)/buffer.h \
$(SRC)/category.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/commands.h \
$(SRC)/composite.h \
@@ -1199,6 +1256,7 @@ $(BLD)/term.$(O) : \
$(EMACS_ROOT)/src/m/intel386.h \
$(EMACS_ROOT)/src/config.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/cm.h \
$(SRC)/coding.h \
@@ -1295,6 +1353,7 @@ $(BLD)/xdisp.$(O) : \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/commands.h \
@@ -1326,6 +1385,7 @@ $(BLD)/xfaces.$(O): \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/composite.h \
$(SRC)/dispextern.h \
@@ -1348,6 +1408,7 @@ $(BLD)/w32fns.$(O): \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
@@ -1373,6 +1434,7 @@ $(BLD)/w32menu.$(O): \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/dispextern.h \
@@ -1395,6 +1457,7 @@ $(BLD)/w32term.$(O): \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
@@ -1426,6 +1489,7 @@ $(BLD)/w32select.$(O): \
$(SRC)/blockinput.h \
$(SRC)/buffer.h \
$(SRC)/ccl.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/coding.h \
$(SRC)/composite.h \
@@ -1457,6 +1521,7 @@ $(BLD)/w32xfns.$(O): \
$(EMACS_ROOT)/src/config.h \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/fontset.h \
$(SRC)/frame.h \
@@ -1473,6 +1538,7 @@ $(BLD)/w32bdf.$(O): \
$(EMACS_ROOT)/src/config.h \
$(SRC)/atimer.h \
$(SRC)/blockinput.h \
+ $(SRC)/character.h \
$(SRC)/charset.h \
$(SRC)/dispextern.h \
$(SRC)/fontset.h \
diff --git a/src/marker.c b/src/marker.c
index 20b660ddadd..82e62e0aa99 100644
--- a/src/marker.c
+++ b/src/marker.c
@@ -23,7 +23,7 @@ Boston, MA 02110-1301, USA. */
#include <config.h>
#include "lisp.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
/* Record one cached position found recently by
buf_charpos_to_bytepos or buf_bytepos_to_charpos. */
diff --git a/src/minibuf.c b/src/minibuf.c
index 9c56ea8618e..848dbd0fed0 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -27,7 +27,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "dispextern.h"
#include "keyboard.h"
#include "frame.h"
@@ -2334,23 +2334,14 @@ Return nil if there is no valid completion, else t. */)
/* Now find first word-break in the stuff found by completion.
i gets index in string of where to stop completing. */
- {
- int len, c;
- int bytes = SBYTES (completion);
- register const unsigned char *completion_string = SDATA (completion);
- for (; i_byte < SBYTES (completion); i_byte += len, i++)
- {
- c = STRING_CHAR_AND_LENGTH (completion_string + i_byte,
- bytes - i_byte,
- len);
- if (SYNTAX (c) != Sword)
- {
- i_byte += len;
- i++;
- break;
- }
- }
- }
+ while (i_byte < SBYTES (completion))
+ {
+ int c;
+
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, completion, i, i_byte);
+ if (SYNTAX (c) != Sword)
+ break;
+ }
/* If got no characters, print help for user. */
@@ -2630,7 +2621,7 @@ DEFUN ("self-insert-and-exit", Fself_insert_and_exit, Sself_insert_and_exit, 0,
doc: /* Terminate minibuffer input. */)
()
{
- if (INTEGERP (last_command_char))
+ if (CHARACTERP (last_command_char))
internal_self_insert (XINT (last_command_char), 0);
else
bitch_at_user ();
diff --git a/src/msdos.c b/src/msdos.c
index 2068af544ac..581b2ea38e8 100644
--- a/src/msdos.c
+++ b/src/msdos.c
@@ -56,7 +56,7 @@ Boston, MA 02110-1301, USA. */
#include "dispextern.h"
#include "dosfns.h"
#include "termopts.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "disptab.h"
#include "frame.h"
@@ -3799,15 +3799,15 @@ XMenuActivate (Display *foo, XMenu *menu, int *pane, int *selidx,
screensize = screen_size * 2;
faces[0]
= lookup_derived_face (sf, intern ("msdos-menu-passive-face"),
- 0, DEFAULT_FACE_ID, 1);
+ DEFAULT_FACE_ID, 1);
faces[1]
= lookup_derived_face (sf, intern ("msdos-menu-active-face"),
- 0, DEFAULT_FACE_ID, 1);
+ DEFAULT_FACE_ID, 1);
selectface = intern ("msdos-menu-select-face");
faces[2] = lookup_derived_face (sf, selectface,
- 0, faces[0], 1);
+ faces[0], 1);
faces[3] = lookup_derived_face (sf, selectface,
- 0, faces[1], 1);
+ faces[1], 1);
/* Make sure the menu title is always displayed with
`msdos-menu-active-face', no matter where the mouse pointer is. */
diff --git a/src/print.c b/src/print.c
index c36b9476f82..6fdd41ada04 100644
--- a/src/print.c
+++ b/src/print.c
@@ -25,6 +25,7 @@ Boston, MA 02110-1301, USA. */
#include <stdio.h>
#include "lisp.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "keyboard.h"
#include "frame.h"
@@ -472,11 +473,15 @@ print_string (string, printcharfun)
{
int chars;
+ if (print_escape_nonascii)
+ string = string_escape_byte8 (string);
+
if (STRING_MULTIBYTE (string))
chars = SCHARS (string);
- else if (EQ (printcharfun, Qt)
- ? ! NILP (buffer_defaults.enable_multibyte_characters)
- : ! NILP (current_buffer->enable_multibyte_characters))
+ else if (! print_escape_nonascii
+ && (EQ (printcharfun, Qt)
+ ? ! NILP (buffer_defaults.enable_multibyte_characters)
+ : ! NILP (current_buffer->enable_multibyte_characters)))
{
/* If unibyte string STRING contains 8-bit codes, we must
convert STRING to a multibyte string containing the same
@@ -522,11 +527,6 @@ print_string (string, printcharfun)
int len;
int ch = STRING_CHAR_AND_LENGTH (SDATA (string) + i,
size_byte - i, len);
- if (!CHAR_VALID_P (ch, 0))
- {
- ch = SREF (string, i);
- len = 1;
- }
PRINTCHAR (ch);
i += len;
}
@@ -1431,6 +1431,93 @@ print_preprocess_string (interval, arg)
print_preprocess (interval->plist);
}
+/* A flag to control printing of `charset' text property.
+ The default value is Qdefault. */
+Lisp_Object Vprint_charset_text_property;
+extern Lisp_Object Qdefault;
+
+static void print_check_string_charset_prop ();
+
+#define PRINT_STRING_NON_CHARSET_FOUND 1
+#define PRINT_STRING_UNSAFE_CHARSET_FOUND 2
+
+/* Bitwize or of the abobe macros. */
+static int print_check_string_result;
+
+static void
+print_check_string_charset_prop (interval, string)
+ INTERVAL interval;
+ Lisp_Object string;
+{
+ Lisp_Object val;
+
+ if (NILP (interval->plist)
+ || (print_check_string_result == (PRINT_STRING_NON_CHARSET_FOUND
+ | PRINT_STRING_UNSAFE_CHARSET_FOUND)))
+ return;
+ for (val = interval->plist; CONSP (val) && ! EQ (XCAR (val), Qcharset);
+ val = XCDR (XCDR (val)));
+ if (! CONSP (val))
+ {
+ print_check_string_result |= PRINT_STRING_NON_CHARSET_FOUND;
+ return;
+ }
+ if (! (print_check_string_result & PRINT_STRING_NON_CHARSET_FOUND))
+ {
+ if (! EQ (val, interval->plist)
+ || CONSP (XCDR (XCDR (val))))
+ print_check_string_result |= PRINT_STRING_NON_CHARSET_FOUND;
+ }
+ if (NILP (Vprint_charset_text_property)
+ || ! (print_check_string_result & PRINT_STRING_UNSAFE_CHARSET_FOUND))
+ {
+ int i, c;
+ int charpos = interval->position;
+ int bytepos = string_char_to_byte (string, charpos);
+ Lisp_Object charset;
+
+ charset = XCAR (XCDR (val));
+ for (i = 0; i < LENGTH (interval); i++)
+ {
+ FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
+ if (! ASCII_CHAR_P (c)
+ && ! EQ (CHARSET_NAME (CHAR_CHARSET (c)), charset))
+ {
+ print_check_string_result |= PRINT_STRING_UNSAFE_CHARSET_FOUND;
+ break;
+ }
+ }
+ }
+}
+
+/* The value is (charset . nil). */
+static Lisp_Object print_prune_charset_plist;
+
+static Lisp_Object
+print_prune_string_charset (string)
+ Lisp_Object string;
+{
+ print_check_string_result = 0;
+ traverse_intervals (STRING_INTERVALS (string), 0,
+ print_check_string_charset_prop, string);
+ if (! (print_check_string_result & PRINT_STRING_UNSAFE_CHARSET_FOUND))
+ {
+ string = Fcopy_sequence (string);
+ if (print_check_string_result & PRINT_STRING_NON_CHARSET_FOUND)
+ {
+ if (NILP (print_prune_charset_plist))
+ print_prune_charset_plist = Fcons (Qcharset, Qnil);
+ Fremove_text_properties (make_number (0),
+ make_number (SCHARS (string)),
+ print_prune_charset_plist, string);
+ }
+ else
+ Fset_text_properties (make_number (0), make_number (SCHARS (string)),
+ Qnil, string);
+ }
+ return string;
+}
+
static void
print_object (obj, printcharfun, escapeflag)
Lisp_Object obj;
@@ -1443,7 +1530,7 @@ print_object (obj, printcharfun, escapeflag)
/* Detect circularities and truncate them. */
if (STRINGP (obj) || CONSP (obj) || VECTORP (obj)
- || COMPILEDP (obj) || CHAR_TABLE_P (obj)
+ || COMPILEDP (obj) || CHAR_TABLE_P (obj) || SUB_CHAR_TABLE_P (obj)
|| (! NILP (Vprint_gensym)
&& SYMBOLP (obj)
&& !SYMBOL_INTERNED_P (obj)))
@@ -1539,6 +1626,9 @@ print_object (obj, printcharfun, escapeflag)
GCPRO1 (obj);
+ if (! EQ (Vprint_charset_text_property, Qt))
+ obj = print_prune_string_charset (obj);
+
if (!NULL_INTERVAL_P (STRING_INTERVALS (obj)))
{
PRINTCHAR ('#');
@@ -1560,10 +1650,7 @@ print_object (obj, printcharfun, escapeflag)
{
c = STRING_CHAR_AND_LENGTH (str + i_byte,
size_byte - i_byte, len);
- if (CHAR_VALID_P (c, 0))
- i_byte += len;
- else
- c = str[i_byte++];
+ i_byte += len;
}
else
c = str[i_byte++];
@@ -1581,8 +1668,8 @@ print_object (obj, printcharfun, escapeflag)
PRINTCHAR ('f');
}
else if (multibyte
- && ! ASCII_BYTE_P (c)
- && (SINGLE_BYTE_CHAR_P (c) || print_escape_multibyte))
+ && (CHAR_BYTE8_P (c)
+ || (! ASCII_CHAR_P (c) && print_escape_multibyte)))
{
/* When multibyte is disabled,
print multibyte string chars using hex escapes.
@@ -1590,9 +1677,15 @@ print_object (obj, printcharfun, escapeflag)
when found in a multibyte string, always use a hex escape
so it reads back as multibyte. */
unsigned char outbuf[50];
- sprintf (outbuf, "\\x%x", c);
+
+ if (CHAR_BYTE8_P (c))
+ sprintf (outbuf, "\\%03o", CHAR_TO_BYTE8 (c));
+ else
+ {
+ sprintf (outbuf, "\\x%04x", c);
+ need_nonhex = 1;
+ }
strout (outbuf, -1, -1, printcharfun, 0);
- need_nonhex = 1;
}
else if (! multibyte
&& SINGLE_BYTE_CHAR_P (c) && ! ASCII_BYTE_P (c)
@@ -1873,7 +1966,12 @@ print_object (obj, printcharfun, escapeflag)
{
QUIT;
c = XBOOL_VECTOR (obj)->data[i];
- if (c == '\n' && print_escape_newlines)
+ if (! ASCII_BYTE_P (c))
+ {
+ sprintf (buf, "\\%03o", c);
+ strout (buf, -1, -1, printcharfun, 0);
+ }
+ else if (c == '\n' && print_escape_newlines)
{
PRINTCHAR ('\\');
PRINTCHAR ('n');
@@ -1975,7 +2073,7 @@ print_object (obj, printcharfun, escapeflag)
PRINTCHAR ('#');
size &= PSEUDOVECTOR_SIZE_MASK;
}
- if (CHAR_TABLE_P (obj))
+ if (CHAR_TABLE_P (obj) || SUB_CHAR_TABLE_P (obj))
{
/* We print a char-table as if it were a vector,
lumping the parent and default slots in with the
@@ -2165,6 +2263,8 @@ print_interval (interval, printcharfun)
INTERVAL interval;
Lisp_Object printcharfun;
{
+ if (NILP (interval->plist))
+ return;
PRINTCHAR (' ');
print_object (make_number (interval->position), printcharfun, 1);
PRINTCHAR (' ');
@@ -2287,6 +2387,19 @@ the printing done so far has not found any shared structure or objects
that need to be recorded in the table. */);
Vprint_number_table = Qnil;
+ DEFVAR_LISP ("print-charset-text-property", &Vprint_charset_text_property,
+ doc: /* A flag to control printing of `charset' text property on printing a string.
+The value must be nil, t, or `default'.
+
+If the value is nil, don't print the text property `charset'.
+
+If the value is t, always print the text property `charset'.
+
+If the value is `default', print the text property `charset' only when
+the value is different from what is guessed in the current charset
+priorities. */);
+ Vprint_charset_text_property = Qdefault;
+
/* prin1_to_string_buffer initialized in init_buffer_once in buffer.c */
staticpro (&Vprin1_to_string_buffer);
@@ -2314,6 +2427,9 @@ that need to be recorded in the table. */);
Qprint_escape_nonascii = intern ("print-escape-nonascii");
staticpro (&Qprint_escape_nonascii);
+ print_prune_charset_plist = Qnil;
+ staticpro (&print_prune_charset_plist);
+
defsubr (&Swith_output_to_temp_buffer);
}
diff --git a/src/process.c b/src/process.c
index 857d7494c69..45bf6446644 100644
--- a/src/process.c
+++ b/src/process.c
@@ -135,7 +135,7 @@ Boston, MA 02110-1301, USA. */
#include "window.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "process.h"
#include "termhooks.h"
@@ -670,6 +670,7 @@ setup_process_coding_systems (process)
struct Lisp_Process *p = XPROCESS (process);
int inch = XINT (p->infd);
int outch = XINT (p->outfd);
+ Lisp_Object coding_system;
if (inch < 0 || outch < 0)
return;
@@ -677,26 +678,24 @@ setup_process_coding_systems (process)
if (!proc_decode_coding_system[inch])
proc_decode_coding_system[inch]
= (struct coding_system *) xmalloc (sizeof (struct coding_system));
- setup_coding_system (p->decode_coding_system,
- proc_decode_coding_system[inch]);
+ coding_system = p->decode_coding_system;
if (! NILP (p->filter))
{
if (NILP (p->filter_multibyte))
- setup_raw_text_coding_system (proc_decode_coding_system[inch]);
+ coding_system = raw_text_coding_system (coding_system);
}
else if (BUFFERP (p->buffer))
{
if (NILP (XBUFFER (p->buffer)->enable_multibyte_characters))
- setup_raw_text_coding_system (proc_decode_coding_system[inch]);
+ coding_system = raw_text_coding_system (coding_system);
}
+ setup_coding_system (coding_system, proc_decode_coding_system[inch]);
if (!proc_encode_coding_system[outch])
proc_encode_coding_system[outch]
= (struct coding_system *) xmalloc (sizeof (struct coding_system));
setup_coding_system (p->encode_coding_system,
proc_encode_coding_system[outch]);
- if (proc_encode_coding_system[outch]->eol_type == CODING_EOL_UNDECIDED)
- proc_encode_coding_system[outch]->eol_type = system_eol_type;
}
DEFUN ("processp", Fprocessp, Sprocessp, 1, 1, 0,
@@ -5091,13 +5090,13 @@ read_process_output (proc, channel)
save the match data in a special nonrecursive fashion. */
running_asynch_code = 1;
- text = decode_coding_string (make_unibyte_string (chars, nbytes),
- coding, 0);
- Vlast_coding_system_used = coding->symbol;
+ decode_coding_c_string (coding, chars, nbytes, Qt);
+ text = coding->dst_object;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
/* A new coding system might be found. */
- if (!EQ (p->decode_coding_system, coding->symbol))
+ if (!EQ (p->decode_coding_system, Vlast_coding_system_used))
{
- p->decode_coding_system = coding->symbol;
+ p->decode_coding_system = Vlast_coding_system_used;
/* Don't call setup_coding_system for
proc_decode_coding_system[channel] here. It is done in
@@ -5113,22 +5112,21 @@ read_process_output (proc, channel)
if (NILP (p->encode_coding_system)
&& proc_encode_coding_system[XINT (p->outfd)])
{
- p->encode_coding_system = coding->symbol;
- setup_coding_system (coding->symbol,
+ p->encode_coding_system
+ = coding_inherit_eol_type (Vlast_coding_system_used, Qnil);
+ setup_coding_system (p->encode_coding_system,
proc_encode_coding_system[XINT (p->outfd)]);
- if (proc_encode_coding_system[XINT (p->outfd)]->eol_type
- == CODING_EOL_UNDECIDED)
- proc_encode_coding_system[XINT (p->outfd)]->eol_type
- = system_eol_type;
}
}
- carryover = nbytes - coding->consumed;
- if (SCHARS (p->decoding_buf) < carryover)
- p->decoding_buf = make_uninit_string (carryover);
- bcopy (chars + coding->consumed, SDATA (p->decoding_buf),
- carryover);
- XSETINT (p->decoding_carryover, carryover);
+ if (coding->carryover_bytes > 0)
+ {
+ if (SCHARS (p->decoding_buf) < coding->carryover_bytes)
+ p->decoding_buf = make_uninit_string (coding->carryover_bytes);
+ bcopy (coding->carryover, SDATA (p->decoding_buf),
+ coding->carryover_bytes);
+ XSETINT (p->decoding_carryover, coding->carryover_bytes);
+ }
/* Adjust the multibyteness of TEXT to that of the filter. */
if (NILP (p->filter_multibyte) != ! STRING_MULTIBYTE (text))
text = (STRING_MULTIBYTE (text)
@@ -5213,32 +5211,31 @@ read_process_output (proc, channel)
if (! (BEGV <= PT && PT <= ZV))
Fwiden ();
- text = decode_coding_string (make_unibyte_string (chars, nbytes),
- coding, 0);
- Vlast_coding_system_used = coding->symbol;
+ decode_coding_c_string (coding, chars, nbytes, Qt);
+ text = coding->dst_object;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
/* A new coding system might be found. See the comment in the
similar code in the previous `if' block. */
- if (!EQ (p->decode_coding_system, coding->symbol))
+ if (!EQ (p->decode_coding_system, Vlast_coding_system_used))
{
- p->decode_coding_system = coding->symbol;
+ p->decode_coding_system = Vlast_coding_system_used;
if (NILP (p->encode_coding_system)
&& proc_encode_coding_system[XINT (p->outfd)])
{
- p->encode_coding_system = coding->symbol;
- setup_coding_system (coding->symbol,
+ p->encode_coding_system
+ = coding_inherit_eol_type (Vlast_coding_system_used, Qnil);
+ setup_coding_system (p->encode_coding_system,
proc_encode_coding_system[XINT (p->outfd)]);
- if (proc_encode_coding_system[XINT (p->outfd)]->eol_type
- == CODING_EOL_UNDECIDED)
- proc_encode_coding_system[XINT (p->outfd)]->eol_type
- = system_eol_type;
}
}
- carryover = nbytes - coding->consumed;
- if (SCHARS (p->decoding_buf) < carryover)
- p->decoding_buf = make_uninit_string (carryover);
- bcopy (chars + coding->consumed, SDATA (p->decoding_buf),
- carryover);
- XSETINT (p->decoding_carryover, carryover);
+ if (coding->carryover_bytes > 0)
+ {
+ if (SCHARS (p->decoding_buf) < coding->carryover_bytes)
+ p->decoding_buf = make_uninit_string (coding->carryover_bytes);
+ bcopy (coding->carryover, SDATA (p->decoding_buf),
+ coding->carryover_bytes);
+ XSETINT (p->decoding_carryover, coding->carryover_bytes);
+ }
/* Adjust the multibyteness of TEXT to that of the buffer. */
if (NILP (current_buffer->enable_multibyte_characters)
!= ! STRING_MULTIBYTE (text))
@@ -5360,24 +5357,19 @@ send_process (proc, buf, len, object)
error ("Output file descriptor of %s is closed", SDATA (p->name));
coding = proc_encode_coding_system[XINT (p->outfd)];
- Vlast_coding_system_used = coding->symbol;
+ Vlast_coding_system_used = CODING_ID_NAME (coding->id);
if ((STRINGP (object) && STRING_MULTIBYTE (object))
|| (BUFFERP (object)
&& !NILP (XBUFFER (object)->enable_multibyte_characters))
|| EQ (object, Qt))
{
- if (!EQ (coding->symbol, p->encode_coding_system))
+ if (!EQ (Vlast_coding_system_used, p->encode_coding_system))
/* The coding system for encoding was changed to raw-text
because we sent a unibyte text previously. Now we are
sending a multibyte text, thus we must encode it by the
original coding system specified for the current process. */
setup_coding_system (p->encode_coding_system, coding);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
- coding->eol_type = system_eol_type;
- /* src_multibyte should be set to 1 _after_ a call to
- setup_coding_system, since it resets src_multibyte to
- zero. */
coding->src_multibyte = 1;
}
else
@@ -5385,60 +5377,56 @@ send_process (proc, buf, len, object)
/* For sending a unibyte text, character code conversion should
not take place but EOL conversion should. So, setup raw-text
or one of the subsidiary if we have not yet done it. */
- if (coding->type != coding_type_raw_text)
+ if (CODING_REQUIRE_ENCODING (coding))
{
if (CODING_REQUIRE_FLUSHING (coding))
{
/* But, before changing the coding, we must flush out data. */
coding->mode |= CODING_MODE_LAST_BLOCK;
send_process (proc, "", 0, Qt);
+ coding->mode &= CODING_MODE_LAST_BLOCK;
}
+ setup_coding_system (raw_text_coding_system
+ (Vlast_coding_system_used),
+ coding);
coding->src_multibyte = 0;
- setup_raw_text_coding_system (coding);
}
}
coding->dst_multibyte = 0;
if (CODING_REQUIRE_ENCODING (coding))
{
- int require = encoding_buffer_size (coding, len);
- int from_byte = -1, from = -1, to = -1;
-
+ coding->dst_object = Qt;
if (BUFFERP (object))
{
- from_byte = BUF_PTR_BYTE_POS (XBUFFER (object), buf);
- from = buf_bytepos_to_charpos (XBUFFER (object), from_byte);
- to = buf_bytepos_to_charpos (XBUFFER (object), from_byte + len);
+ int from_byte, from, to;
+ int save_pt, save_pt_byte;
+ struct buffer *cur = current_buffer;
+
+ set_buffer_internal (XBUFFER (object));
+ save_pt = PT, save_pt_byte = PT_BYTE;
+
+ from_byte = PTR_BYTE_POS (buf);
+ from = BYTE_TO_CHAR (from_byte);
+ to = BYTE_TO_CHAR (from_byte + len);
+ TEMP_SET_PT_BOTH (from, from_byte);
+ encode_coding_object (coding, object, from, from_byte,
+ to, from_byte + len, Qt);
+ TEMP_SET_PT_BOTH (save_pt, save_pt_byte);
+ set_buffer_internal (cur);
}
else if (STRINGP (object))
{
- from_byte = buf - SDATA (object);
- from = string_byte_to_char (object, from_byte);
- to = string_byte_to_char (object, from_byte + len);
+ encode_coding_string (coding, object, 1);
}
-
- if (coding->composing != COMPOSITION_DISABLED)
+ else
{
- if (from_byte >= 0)
- coding_save_composition (coding, from, to, object);
- else
- coding->composing = COMPOSITION_DISABLED;
+ coding->dst_object = make_unibyte_string (buf, len);
+ coding->produced = len;
}
- if (SBYTES (p->encoding_buf) < require)
- p->encoding_buf = make_uninit_string (require);
-
- if (from_byte >= 0)
- buf = (BUFFERP (object)
- ? BUF_BYTE_ADDRESS (XBUFFER (object), from_byte)
- : SDATA (object) + from_byte);
-
- object = p->encoding_buf;
- encode_coding (coding, (char *) buf, SDATA (object),
- len, SBYTES (object));
- coding_free_composition_data (coding);
len = coding->produced;
- buf = SDATA (object);
+ buf = SDATA (coding->dst_object);
}
#ifdef VMS
@@ -6764,7 +6752,7 @@ encode subprocess input. */)
error ("Output file descriptor of %s closed", SDATA (p->name));
Fcheck_coding_system (decoding);
Fcheck_coding_system (encoding);
-
+ encoding = coding_inherit_eol_type (encoding, Qnil);
p->decode_coding_system = decoding;
p->encode_coding_system = encoding;
setup_process_coding_systems (process);
@@ -7150,7 +7138,7 @@ The variable takes effect when `start-process' is called. */);
#include "lisp.h"
#include "systime.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "termopts.h"
#include "sysselect.h"
diff --git a/src/regex.c b/src/regex.c
index 846c87041b1..48baacd81e2 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -123,7 +123,7 @@
# define SYNTAX_ENTRY_VIA_PROPERTY
# include "syntax.h"
-# include "charset.h"
+# include "character.h"
# include "category.h"
# ifdef malloc
@@ -144,28 +144,44 @@
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
+# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
# define RE_STRING_CHAR(p, s) \
(multibyte ? (STRING_CHAR (p, s)) : (*(p)))
# define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
(multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
-/* Set C a (possibly multibyte) character before P. P points into a
- string which is the virtual concatenation of STR1 (which ends at
- END1) or STR2 (which ends at END2). */
-# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
- do { \
- if (multibyte) \
- { \
- re_char *dtemp = (p) == (str2) ? (end1) : (p); \
- re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
- re_char *d0 = dtemp; \
- PREV_CHAR_BOUNDARY (d0, dlimit); \
- c = STRING_CHAR (d0, dtemp - d0); \
- } \
- else \
- (c = ((p) == (str2) ? (end1) : (p))[-1]); \
+/* Set C a (possibly converted to multibyte) character before P. P
+ points into a string which is the virtual concatenation of STR1
+ (which ends at END1) or STR2 (which ends at END2). */
+# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+ do { \
+ if (multibyte) \
+ { \
+ re_char *dtemp = (p) == (str2) ? (end1) : (p); \
+ re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+ while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
+ c = STRING_CHAR (dtemp, (p) - dtemp); \
+ } \
+ else \
+ { \
+ (c = ((p) == (str2) ? (end1) : (p))[-1]); \
+ MAKE_CHAR_MULTIBYTE (c); \
+ } \
} while (0)
+/* Set C a (possibly converted to multibyte) character at P, and set
+ LEN to the byte length of that character. */
+# define GET_CHAR_AFTER(c, p, len) \
+ do { \
+ if (multibyte) \
+ c = STRING_CHAR_AND_LENGTH (p, 0, len); \
+ else \
+ { \
+ c = *p; \
+ len = 1; \
+ MAKE_CHAR_MULTIBYTE (c); \
+ } \
+ } while (0)
#else /* not emacs */
@@ -277,6 +293,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# define CHARSET_LEADING_CODE_BASE(c) 0
# define MAX_MULTIBYTE_LENGTH 1
# define RE_MULTIBYTE_P(x) 0
+# define RE_TARGET_MULTIBYTE_P(x) 0
# define WORD_BOUNDARY_P(c1, c2) (0)
# define CHAR_HEAD_P(p) (1)
# define SINGLE_BYTE_CHAR_P(c) (1)
@@ -290,7 +307,15 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
(c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+# define GET_CHAR_AFTER(c, p, len) \
+ (c = *p, len = 1)
# define MAKE_CHAR(charset, c1, c2) (c1)
+# define BYTE8_TO_CHAR(c) (c)
+# define CHAR_BYTE8_P(c) (0)
+# define MAKE_CHAR_MULTIBYTE(c) (c)
+# define MAKE_CHAR_UNIBYTE(c) (c)
+# define CHAR_LEADING_CODE(c) (c)
+
#endif /* not emacs */
#ifndef RE_TRANSLATE
@@ -496,7 +521,7 @@ init_syntax_once ()
# ifdef __GNUC__
# define alloca __builtin_alloca
# else /* not __GNUC__ */
-# if HAVE_ALLOCA_H
+# ifdef HAVE_ALLOCA_H
# include <alloca.h>
# endif /* HAVE_ALLOCA_H */
# endif /* not __GNUC__ */
@@ -1947,10 +1972,10 @@ struct range_table_work_area
#define EXTEND_RANGE_TABLE(work_area, n) \
do { \
- if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
+ if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
{ \
- extend_range_table_work_area (work_area); \
- if ((work_area)->table == 0) \
+ extend_range_table_work_area (&work_area); \
+ if ((work_area).table == 0) \
return (REG_ESPACE); \
} \
} while (0)
@@ -1967,15 +1992,12 @@ struct range_table_work_area
#define BIT_UPPER 0x10
#define BIT_MULTIBYTE 0x20
-/* Set a range START..END to WORK_AREA.
- The range is passed through TRANSLATE, so START and END
- should be untranslated. */
-#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
do { \
- int tem; \
- tem = set_image_of_range (&work_area, start, end, translate); \
- if (tem > 0) \
- FREE_STACK_RETURN (tem); \
+ EXTEND_RANGE_TABLE ((work_area), 2); \
+ (work_area).table[(work_area).used++] = (range_start); \
+ (work_area).table[(work_area).used++] = (range_end); \
} while (0)
/* Free allocated memory for WORK_AREA. */
@@ -1995,6 +2017,38 @@ struct range_table_work_area
#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+#ifdef emacs
+
+/* Store characters in the rage range C0 to C1 in WORK_AREA while
+ translating them and paying attention to the continuity of
+ translated characters.
+
+ Implementation note: It is better to implement this fairly big
+ macro by a function, but it's not that easy because macros called
+ in this macro assume various local variables already declared. */
+
+#define SETUP_MULTIBYTE_RANGE(work_area, c0, c1) \
+ do { \
+ re_wchar_t c, t, t_last; \
+ int n; \
+ \
+ c = (c0); \
+ t_last = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \
+ for (c++, n = 1; c <= (c1); c++, n++) \
+ { \
+ t = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \
+ if (t_last + n == t) \
+ continue; \
+ SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \
+ t_last = t; \
+ n = 0; \
+ } \
+ if (n > 0) \
+ SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \
+ } while (0)
+
+#endif /* emacs */
+
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
do { \
@@ -2118,6 +2172,7 @@ extend_range_table_work_area (work_area)
= (int *) malloc (work_area->allocated);
}
+#if 0
#ifdef emacs
/* Carefully find the ranges of codes that are equivalent
@@ -2350,6 +2405,7 @@ set_image_of_range (work_area, start, end, translate)
return -1;
}
+#endif /* 0 */
#ifndef MATCH_MAY_ALLOCATE
@@ -2493,6 +2549,9 @@ regex_compile (pattern, size, syntax, bufp)
/* If the object matched can contain multibyte characters. */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ /* If a target of matching can contain multibyte characters. */
+ const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+
/* Nonzero if we have pushed down into a subpattern. */
int in_subpattern = 0;
@@ -2870,10 +2929,6 @@ regex_compile (pattern, size, syntax, bufp)
break;
}
- /* What should we do for the character which is
- greater than 0x7F, but not BASE_LEADING_CODE_P?
- XXX */
-
/* See if we're at the beginning of a possible character
class. */
@@ -2912,6 +2967,7 @@ regex_compile (pattern, size, syntax, bufp)
{
re_wchar_t ch;
re_wctype_t cc;
+ int limit;
cc = re_wctype (str);
@@ -2931,15 +2987,31 @@ regex_compile (pattern, size, syntax, bufp)
don't need to handle them for multibyte.
They are distinguished by a negative wctype. */
- if (multibyte)
- SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
- re_wctype_to_bit (cc));
+ for (ch = 0; ch < 128; ++ch)
+ if (re_iswctype (btowc (ch), cc))
+ {
+ c = TRANSLATE (ch);
+ SET_LIST_BIT (c);
+ }
- for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ if (target_multibyte)
+ {
+ SET_RANGE_TABLE_WORK_AREA_BIT
+ (range_table_work, re_wctype_to_bit (cc));
+ }
+ else
{
- int translated = TRANSLATE (ch);
- if (re_iswctype (btowc (ch), cc))
- SET_LIST_BIT (translated);
+ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
+ {
+ c = ch;
+ MAKE_CHAR_MULTIBYTE (c);
+ if (re_iswctype (btowc (c), cc))
+ {
+ c = TRANSLATE (c);
+ MAKE_CHAR_UNIBYTE (c);
+ SET_LIST_BIT (c);
+ }
+ }
}
/* Repeat the loop. */
@@ -2966,57 +3038,51 @@ regex_compile (pattern, size, syntax, bufp)
/* Fetch the character which ends the range. */
PATFETCH (c1);
-
- if (SINGLE_BYTE_CHAR_P (c))
+ if (c > c1)
{
- if (! SINGLE_BYTE_CHAR_P (c1))
- {
- /* Handle a range starting with a
- character of less than 256, and ending
- with a character of not less than 256.
- Split that into two ranges, the low one
- ending at 0377, and the high one
- starting at the smallest character in
- the charset of C1 and ending at C1. */
- int charset = CHAR_CHARSET (c1);
- re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
-
- SET_RANGE_TABLE_WORK_AREA (range_table_work,
- c2, c1);
- c1 = 0377;
- }
+ if (syntax & RE_NO_EMPTY_RANGES)
+ FREE_STACK_RETURN (REG_ERANGEX);
+ /* Else, repeat the loop. */
}
- else if (!SAME_CHARSET_P (c, c1))
- FREE_STACK_RETURN (REG_ERANGEX);
}
else
/* Range from C to C. */
c1 = c;
- /* Set the range ... */
- if (SINGLE_BYTE_CHAR_P (c))
- /* ... into bitmap. */
+#ifndef emacs
+ c = TRANSLATE (c);
+ c1 = TRANSLATE (c1);
+ /* Set the range into bitmap */
+ for (; c <= c1; c++)
+ SET_LIST_BIT (TRANSLATE (c));
+#else /* not emacs */
+ if (target_multibyte)
{
- re_wchar_t this_char;
- re_wchar_t range_start = c, range_end = c1;
-
- /* If the start is after the end, the range is empty. */
- if (range_start > range_end)
+ if (c1 >= 128)
{
- if (syntax & RE_NO_EMPTY_RANGES)
- FREE_STACK_RETURN (REG_ERANGE);
- /* Else, repeat the loop. */
+ re_wchar_t c0 = MAX (c, 128);
+
+ SETUP_MULTIBYTE_RANGE (range_table_work, c0, c1);
+ c1 = 127;
}
- else
+ for (; c <= c1; c++)
+ SET_LIST_BIT (TRANSLATE (c));
+ }
+ else
+ {
+ re_wchar_t c0;
+
+ for (; c <= c1; c++)
{
- for (this_char = range_start; this_char <= range_end;
- this_char++)
- SET_LIST_BIT (TRANSLATE (this_char));
+ c0 = c;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c0);
+ c0 = TRANSLATE (c0);
+ MAKE_CHAR_UNIBYTE (c0);
+ SET_LIST_BIT (c0);
}
}
- else
- /* ... into range table. */
- SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+#endif /* not emacs */
}
/* Discard any (non)matching list bytes that are all 0 at the
@@ -3601,12 +3667,20 @@ regex_compile (pattern, size, syntax, bufp)
{
int len;
+ if (! multibyte)
+ MAKE_CHAR_MULTIBYTE (c);
c = TRANSLATE (c);
- if (multibyte)
- len = CHAR_STRING (c, b);
+ if (target_multibyte)
+ {
+ len = CHAR_STRING (c, b);
+ b += len;
+ }
else
- *b = c, len = 1;
- b += len;
+ {
+ MAKE_CHAR_UNIBYTE (c);
+ *b++ = c;
+ len = 1;
+ }
(*pending_exact) += len;
}
@@ -3630,6 +3704,11 @@ regex_compile (pattern, size, syntax, bufp)
/* We have succeeded; set the length of the buffer. */
bufp->used = b - bufp->buffer;
+#ifdef emacs
+ /* Now the buffer is adjusted for the multibyteness of a target. */
+ bufp->multibyte = bufp->target_multibyte;
+#endif
+
#ifdef DEBUG
if (debug > 0)
{
@@ -3875,14 +3954,11 @@ analyse_first (p, pend, fastmap, multibyte)
case exactn:
if (fastmap)
- {
- int c = RE_STRING_CHAR (p + 1, pend - p);
-
- if (SINGLE_BYTE_CHAR_P (c))
- fastmap[c] = 1;
- else
- fastmap[p[1]] = 1;
- }
+ /* If multibyte is nonzero, the first byte of each
+ character is an ASCII or a leading code. Otherwise,
+ each byte is a character. Thus, this works in both
+ cases. */
+ fastmap[p[1]] = 1;
break;
@@ -3894,14 +3970,18 @@ analyse_first (p, pend, fastmap, multibyte)
case charset_not:
- /* Chars beyond end of bitmap are possible matches.
- All the single-byte codes can occur in multibyte buffers.
- So any that are not listed in the charset
- are possible matches, even in multibyte buffers. */
if (!fastmap) break;
- for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
- j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
+ {
+ /* Chars beyond end of bitmap are possible matches. */
+ /* In a multibyte case, the bitmap is used only for ASCII
+ characters. */
+ int limit = multibyte ? 128 : (1 << BYTEWIDTH);
+
+ for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+ j < limit; j++)
+ fastmap[j] = 1;
+ }
+
/* Fallthrough */
case charset:
if (!fastmap) break;
@@ -3912,19 +3992,17 @@ analyse_first (p, pend, fastmap, multibyte)
fastmap[j] = 1;
if ((not && multibyte)
- /* Any character set can possibly contain a character
+ /* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
|| (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
/* If we can match a character class, we can match
- any character set. */
+ any multibyte characters. */
{
- set_fastmap_for_multibyte_characters:
if (match_any_multibyte_characters == false)
{
- for (j = 0x80; j < 0xA0; j++) /* XXX */
- if (BASE_LEADING_CODE_P (j))
- fastmap[j] = 1;
+ for (j = 0x80; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
match_any_multibyte_characters = true;
}
}
@@ -3932,9 +4010,10 @@ analyse_first (p, pend, fastmap, multibyte)
else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& match_any_multibyte_characters == false)
{
- /* Set fastmap[I] 1 where I is a base leading code of each
- multibyte character in the range table. */
+ /* Set fastmap[I] to 1 where I is a leading code of each
+ multibyte characer in the range table. */
int c, count;
+ unsigned char lc1, lc2;
/* Make P points the range table. `+ 2' is to skip flag
bits for a character class. */
@@ -3944,10 +4023,14 @@ analyse_first (p, pend, fastmap, multibyte)
EXTRACT_NUMBER_AND_INCR (count, p);
for (; count > 0; count--, p += 2 * 3) /* XXX */
{
- /* Extract the start of each range. */
+ /* Extract the start and end of each range. */
EXTRACT_CHARACTER (c, p);
- j = CHAR_CHARSET (c);
- fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+ lc1 = CHAR_LEADING_CODE (c);
+ p += 3;
+ EXTRACT_CHARACTER (c, p);
+ lc2 = CHAR_LEADING_CODE (c);
+ for (j = lc1; j <= lc2; j++)
+ fastmap[j] = 1;
}
}
break;
@@ -3972,14 +4055,21 @@ analyse_first (p, pend, fastmap, multibyte)
if (!fastmap) break;
not = (re_opcode_t)p[-1] == notcategoryspec;
k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
+ for (j = (multibyte ? 127 : (1 << BYTEWIDTH)); j >= 0; j--)
if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
fastmap[j] = 1;
if (multibyte)
- /* Any character set can possibly contain a character
- whose category is K (or not). */
- goto set_fastmap_for_multibyte_characters;
+ {
+ /* Any character set can possibly contain a character
+ whose category is K (or not). */
+ if (match_any_multibyte_characters == false)
+ {
+ for (j = 0x80; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+ match_any_multibyte_characters = true;
+ }
+ }
break;
/* All cases after this match the empty string. These end with
@@ -4229,8 +4319,8 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
int total_size = size1 + size2;
int endpos = startpos + range;
boolean anchored_start;
-
- /* Nonzero if we have to concern multibyte character. */
+ /* Nonzero if BUFP is setup for multibyte characters. We are sure
+ that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
/* Check for out-of-range STARTPOS. */
@@ -4327,37 +4417,47 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
buf_charlen);
-
buf_ch = RE_TRANSLATE (translate, buf_ch);
- if (buf_ch >= 0400
- || fastmap[buf_ch])
+ if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
range -= buf_charlen;
d += buf_charlen;
}
else
- {
- /* Convert *d to integer to shut up GCC's
- whining about comparison that is always
- true. */
- int di = *d;
-
- while (range > lim
- && !fastmap[RE_TRANSLATE (translate, di)])
- {
- di = *(++d);
- range--;
- }
- }
+ while (range > lim)
+ {
+ buf_ch = *d;
+ MAKE_CHAR_MULTIBYTE (buf_ch);
+ buf_ch = RE_TRANSLATE (translate, buf_ch);
+ MAKE_CHAR_UNIBYTE (buf_ch);
+ if (fastmap[buf_ch])
+ break;
+ d++;
+ range--;
+ }
}
else
- while (range > lim && !fastmap[*d])
- {
- d++;
- range--;
- }
+ {
+ if (multibyte)
+ while (range > lim)
+ {
+ int buf_charlen;
+ buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+ buf_charlen);
+ if (fastmap[CHAR_LEADING_CODE (buf_ch)])
+ break;
+ range -= buf_charlen;
+ d += buf_charlen;
+ }
+ else
+ while (range > lim && !fastmap[*d])
+ {
+ d++;
+ range--;
+ }
+ }
startpos += irange - range;
}
else /* Searching backwards. */
@@ -4365,12 +4465,18 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
int room = (startpos >= size1
? size2 + size1 - startpos
: size1 - startpos);
- buf_ch = RE_STRING_CHAR (d, room);
- buf_ch = TRANSLATE (buf_ch);
-
- if (! (buf_ch >= 0400
- || fastmap[buf_ch]))
- goto advance;
+ if (multibyte)
+ {
+ buf_ch = STRING_CHAR (d, room);
+ buf_ch = TRANSLATE (buf_ch);
+ if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
+ goto advance;
+ }
+ else
+ {
+ if (! fastmap[TRANSLATE (*d)])
+ goto advance;
+ }
}
}
@@ -4667,7 +4773,7 @@ mutually_exclusive_p (bufp, p1, p2)
/* Test if C is listed in charset (or charset_not)
at `p1'. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (! multibyte || IS_REAL_ASCII (c))
{
if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
&& p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
@@ -4710,9 +4816,10 @@ mutually_exclusive_p (bufp, p1, p2)
size of bitmap table of P1 is extracted by
using macro `CHARSET_BITMAP_SIZE'.
- Since we know that all the character listed in
- P2 is ASCII, it is enough to test only bitmap
- table of P1. */
+ In a multibyte case, we know that all the character
+ listed in P2 is ASCII. In a unibyte case, P1 has only a
+ bitmap table. So, in both cases, it is enough to test
+ only the bitmap table of P1. */
if ((re_opcode_t) *p1 == charset)
{
@@ -4876,6 +4983,24 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
WEAK_ALIAS (__re_match_2, re_match_2)
+#ifdef emacs
+#define TRANSLATE_VIA_MULTIBYTE(c) \
+ do { \
+ if (multibyte) \
+ (c) = TRANSLATE (c); \
+ else \
+ { \
+ MAKE_CHAR_MULTIBYTE (c); \
+ (c) = TRANSLATE (c); \
+ MAKE_CHAR_UNIBYTE (c); \
+ } \
+ } while (0)
+
+#else
+#define TRANSLATE_VIA_MULTIBYTE(c) ((c) = TRANSLATE (c))
+#endif
+
+
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static int
@@ -4915,7 +5040,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We use this to map every character in the string. */
RE_TRANSLATE_TYPE translate = bufp->translate;
- /* Nonzero if we have to concern multibyte character. */
+ /* Nonzero if BUFP is setup for multibyte characters. We are sure
+ that it is the same as RE_TARGET_MULTIBYTE_P (bufp). */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
/* Failure point stack. Each place that can handle a failure further
@@ -5269,63 +5395,71 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Remember the start point to rollback upon failure. */
dfail = d;
+#ifndef emacs
/* This is written out as an if-else so we don't waste time
testing `translate' inside the loop. */
if (RE_TRANSLATE_P (translate))
- {
- if (multibyte)
- do
+ do
+ {
+ PREFETCH ();
+ if (RE_TRANSLATE (translate, *d) != *p++)
{
- int pat_charlen, buf_charlen;
- unsigned int pat_ch, buf_ch;
-
- PREFETCH ();
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
- buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
-
- if (RE_TRANSLATE (translate, buf_ch)
- != pat_ch)
- {
- d = dfail;
- goto fail;
- }
-
- p += pat_charlen;
- d += buf_charlen;
- mcnt -= pat_charlen;
+ d = dfail;
+ goto fail;
}
- while (mcnt > 0);
- else
- do
+ d++;
+ }
+ while (--mcnt);
+ else
+ do
+ {
+ PREFETCH ();
+ if (*d++ != *p++)
{
- /* Avoid compiler whining about comparison being
- always true. */
- int di;
+ d = dfail;
+ goto fail;
+ }
+ }
+ while (--mcnt);
+#else /* emacs */
+ /* The cost of testing `translate' is comparatively small. */
+ if (multibyte)
+ do
+ {
+ int pat_charlen, buf_charlen;
+ unsigned int pat_ch, buf_ch;
- PREFETCH ();
- di = *d;
- if (RE_TRANSLATE (translate, di) != *p++)
- {
- d = dfail;
- goto fail;
- }
- d++;
+ PREFETCH ();
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+
+ if (TRANSLATE (buf_ch) != pat_ch)
+ {
+ d = dfail;
+ goto fail;
}
- while (--mcnt);
- }
+
+ p += pat_charlen;
+ d += buf_charlen;
+ mcnt -= pat_charlen;
+ }
+ while (mcnt > 0);
else
- {
- do
- {
- PREFETCH ();
- if (*d++ != *p++)
- {
- d = dfail;
- goto fail;
- }
- }
- while (--mcnt);
- }
+ do
+ {
+ unsigned int buf_ch;
+
+ PREFETCH ();
+ buf_ch = *d++;
+ TRANSLATE_VIA_MULTIBYTE (buf_ch);
+ if (buf_ch != *p++)
+ {
+ d = dfail;
+ goto fail;
+ }
+ }
+ while (--mcnt);
+#endif
break;
@@ -5383,9 +5517,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
PREFETCH ();
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
- c = TRANSLATE (c); /* The character to match. */
+ TRANSLATE_VIA_MULTIBYTE (c); /* The character to match. */
- if (SINGLE_BYTE_CHAR_P (c))
+ if (! multibyte || IS_REAL_ASCII (c))
{ /* Lookup bitmap. */
/* Cast to `unsigned' instead of `unsigned char' in
case the bit list is a full 32 bytes long. */
@@ -5548,7 +5682,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
}
else
{
- unsigned char c;
+ unsigned c;
GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
if (c == '\n')
break;
@@ -5815,6 +5949,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d - 1);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5826,7 +5961,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
if (/* Case 2: Only one of S1 and S2 is Sword. */
@@ -5855,13 +5990,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
/* Case 2: S2 is not Sword. */
@@ -5899,6 +6035,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
is the character at D, and S2 is the syntax of C2. */
re_wchar_t c1, c2;
int s1, s2;
+ int dummy;
#ifdef emacs
int offset = PTR_TO_OFFSET (d) - 1;
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5915,9 +6052,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
if (!AT_STRINGS_END (d))
{
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d);
+ GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos);
#endif
s2 = SYNTAX (c2);
@@ -6032,8 +6169,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
int len;
re_wchar_t c;
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+ GET_CHAR_AFTER (c, d, len);
if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
goto fail;
d += len;
@@ -6069,8 +6205,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
int len;
re_wchar_t c;
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+ GET_CHAR_AFTER (c, d, len);
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
goto fail;
d += len;
@@ -6162,8 +6297,8 @@ bcmp_translate (s1, s2, len, translate, multibyte)
int p1_charlen, p2_charlen;
re_wchar_t p1_ch, p2_ch;
- p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
- p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+ GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
+ GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
if (RE_TRANSLATE (translate, p1_ch)
!= RE_TRANSLATE (translate, p2_ch))
diff --git a/src/regex.h b/src/regex.h
index c850c640b36..efae7749f31 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -393,9 +393,15 @@ struct re_pattern_buffer
unsigned not_eol : 1;
#ifdef emacs
- /* If true, multi-byte form in the `buffer' should be recognized as a
- multibyte character. */
+ /* If true, multi-byte form in the regexp pattern should be
+ recognized as a multibyte character. When the pattern is
+ compiled, this is set to the same value as target_multibyte
+ below. */
unsigned multibyte : 1;
+
+ /* If true, multi-byte form in the target of match should be
+ recognized as a multibyte character. */
+ unsigned target_multibyte : 1;
#endif
/* [[[end pattern_buffer]]] */
diff --git a/src/search.c b/src/search.c
index 5d532a9d8dd..d826e2e1b05 100644
--- a/src/search.c
+++ b/src/search.c
@@ -25,7 +25,7 @@ Boston, MA 02110-1301, USA. */
#include "syntax.h"
#include "category.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "region-cache.h"
#include "commands.h"
#include "blockinput.h"
@@ -111,9 +111,8 @@ matcher_overflow ()
subexpression bounds.
POSIX is nonzero if we want full backtracking (POSIX style)
for this pattern. 0 means backtrack only enough to get a valid match.
- MULTIBYTE is nonzero if we want to handle multibyte characters in
- PATTERN. 0 means all multibyte characters are recognized just as
- sequences of binary data.
+ MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
+ string.
The behavior also depends on Vsearch_spaces_regexp. */
@@ -126,56 +125,23 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
int posix;
int multibyte;
{
- unsigned char *raw_pattern;
- int raw_pattern_size;
char *val;
reg_syntax_t old;
- /* MULTIBYTE says whether the text to be searched is multibyte.
- We must convert PATTERN to match that, or we will not really
- find things right. */
-
- if (multibyte == STRING_MULTIBYTE (pattern))
- {
- raw_pattern = (unsigned char *) SDATA (pattern);
- raw_pattern_size = SBYTES (pattern);
- }
- else if (multibyte)
- {
- raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
- SCHARS (pattern));
- raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
- copy_text (SDATA (pattern), raw_pattern,
- SCHARS (pattern), 0, 1);
- }
- else
- {
- /* Converting multibyte to single-byte.
-
- ??? Perhaps this conversion should be done in a special way
- by subtracting nonascii-insert-offset from each non-ASCII char,
- so that only the multibyte chars which really correspond to
- the chosen single-byte character set can possibly match. */
- raw_pattern_size = SCHARS (pattern);
- raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
- copy_text (SDATA (pattern), raw_pattern,
- SBYTES (pattern), 1, 0);
- }
-
cp->regexp = Qnil;
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
cp->posix = posix;
- cp->buf.multibyte = multibyte;
+ cp->buf.multibyte = STRING_MULTIBYTE (pattern);
+ cp->buf.target_multibyte = multibyte;
cp->whitespace_regexp = Vsearch_spaces_regexp;
BLOCK_INPUT;
old = re_set_syntax (RE_SYNTAX_EMACS
| (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
-
re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
: SDATA (Vsearch_spaces_regexp));
- val = (char *) re_compile_pattern ((char *)raw_pattern,
- raw_pattern_size, &cp->buf);
+ val = (char *) re_compile_pattern ((char *) SDATA (pattern),
+ SBYTES (pattern), &cp->buf);
re_set_whitespace_regexp (NULL);
@@ -239,7 +205,7 @@ compile_pattern (pattern, regp, translate, posix, multibyte)
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
&& cp->posix == posix
- && cp->buf.multibyte == multibyte
+ && cp->buf.target_multibyte == multibyte
&& !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
break;
@@ -1172,7 +1138,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
unsigned char *base_pat;
/* Set to positive if we find a non-ASCII char that need
translation. Otherwise set to zero later. */
- int charset_base = -1;
+ int char_base = -1;
int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
@@ -1213,7 +1179,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
/* Copy and optionally translate the pattern. */
len = raw_pattern_size;
len_byte = raw_pattern_size_byte;
- patbuf = (unsigned char *) alloca (len_byte);
+ patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
pat = patbuf;
base_pat = raw_pattern;
if (multibyte)
@@ -1263,46 +1229,47 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
if (c != inverse && boyer_moore_ok)
{
/* Check if all equivalents belong to the same
- charset & row. Note that the check of C
- itself is done by the last iteration. Note
- also that we don't have to check ASCII
- characters because boyer-moore search can
- always handle their translation. */
- while (1)
+ group of characters. Note that the check of C
+ itself is done by the last iteration. */
+ int this_char_base = -1;
+
+ while (boyer_moore_ok)
{
if (ASCII_BYTE_P (inverse))
{
- if (charset_base > 0)
+ if (this_char_base > 0)
+ boyer_moore_ok = 0;
+ else
{
- boyer_moore_ok = 0;
- break;
+ this_char_base = 0;
+ if (char_base < 0)
+ char_base = this_char_base;
}
- charset_base = 0;
}
- else if (SINGLE_BYTE_CHAR_P (inverse))
+ else if (CHAR_BYTE8_P (inverse))
+ /* Boyer-moore search can't handle a
+ translation of an eight-bit
+ character. */
+ boyer_moore_ok = 0;
+ else if (this_char_base < 0)
{
- /* Boyer-moore search can't handle a
- translation of an eight-bit
- character. */
- boyer_moore_ok = 0;
- break;
- }
- else if (charset_base < 0)
- charset_base = inverse & ~CHAR_FIELD3_MASK;
- else if ((inverse & ~CHAR_FIELD3_MASK)
- != charset_base)
- {
- boyer_moore_ok = 0;
- break;
+ this_char_base = inverse & ~0x3F;
+ if (char_base < 0)
+ char_base = this_char_base;
+ else if (char_base > 0
+ && this_char_base != char_base)
+ boyer_moore_ok = 0;
}
+ else if ((inverse & ~0x3F) != this_char_base)
+ boyer_moore_ok = 0;
if (c == inverse)
break;
TRANSLATE (inverse, inverse_trt, inverse);
}
}
}
- if (charset_base < 0)
- charset_base = 0;
+ if (char_base < 0)
+ char_base = 0;
/* Store this character into the translated pattern. */
bcopy (str, pat, charlen);
@@ -1314,7 +1281,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
else
{
/* Unibyte buffer. */
- charset_base = 0;
+ char_base = 0;
while (--len >= 0)
{
int c, translated;
@@ -1341,7 +1308,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
if (boyer_moore_ok)
return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
pos, pos_byte, lim, lim_byte,
- charset_base);
+ char_base);
else
return simple_search (n, pat, len, len_byte, trt,
pos, pos_byte, lim, lim_byte);
@@ -1371,6 +1338,9 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
{
int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
int forward = n > 0;
+ /* Number of buffer bytes matched. Note that this may be different
+ from len_byte in a multibyte buffer. */
+ int match_byte;
if (lim > pos && multibyte)
while (n > 0)
@@ -1410,8 +1380,9 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
if (this_len == 0)
{
+ match_byte = this_pos_byte - pos_byte;
pos += len;
- pos_byte += len_byte;
+ pos_byte += match_byte;
break;
}
@@ -1448,6 +1419,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
if (this_len == 0)
{
+ match_byte = len;
pos += len;
break;
}
@@ -1465,13 +1437,15 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
{
/* Try matching at position POS. */
int this_pos = pos - len;
- int this_pos_byte = pos_byte - len_byte;
+ int this_pos_byte;
int this_len = len;
int this_len_byte = len_byte;
unsigned char *p = pat;
if (pos - len < lim)
goto stop;
+ this_pos_byte = CHAR_TO_BYTE (this_pos);
+ match_byte = pos_byte - this_pos_byte;
while (this_len > 0)
{
@@ -1497,7 +1471,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
if (this_len == 0)
{
pos -= len;
- pos_byte -= len_byte;
+ pos_byte -= match_byte;
break;
}
@@ -1533,6 +1507,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
if (this_len == 0)
{
+ match_byte = len;
pos -= len;
break;
}
@@ -1547,9 +1522,9 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
if (n == 0)
{
if (forward)
- set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
+ set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
else
- set_search_regs (multibyte ? pos_byte : pos, len_byte);
+ set_search_regs (multibyte ? pos_byte : pos, match_byte);
return pos;
}
@@ -1570,13 +1545,13 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
have nontrivial translation are the same aside from the last byte.
This makes it possible to translate just the last byte of a
character, and do so after just a simple test of the context.
- CHARSET_BASE is nonzero iff there is such a non-ASCII character.
+ CHAR_BASE is nonzero iff there is such a non-ASCII character.
If that criterion is not satisfied, do not call this function. */
static int
boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
- pos, pos_byte, lim, lim_byte, charset_base)
+ pos, pos_byte, lim, lim_byte, char_base)
int n;
unsigned char *base_pat;
int len, len_byte;
@@ -1584,7 +1559,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
Lisp_Object inverse_trt;
int pos, pos_byte;
int lim, lim_byte;
- int charset_base;
+ int char_base;
{
int direction = ((n > 0) ? 1 : -1);
register int dirlen;
@@ -1598,12 +1573,13 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
unsigned char simple_translate[0400];
/* These are set to the preceding bytes of a byte to be translated
- if charset_base is nonzero. As the maximum byte length of a
- multibyte character is 4, we have to check at most three previous
+ if char_base is nonzero. As the maximum byte length of a
+ multibyte character is 5, we have to check at most four previous
bytes. */
int translate_prev_byte1 = 0;
int translate_prev_byte2 = 0;
int translate_prev_byte3 = 0;
+ int translate_prev_byte4 = 0;
#ifdef C_ALLOCA
int BM_tab_space[0400];
@@ -1669,20 +1645,23 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
for (i = 0; i < 0400; i++)
simple_translate[i] = i;
- if (charset_base)
+ if (char_base)
{
- /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
+ /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
byte following them are the target of translation. */
- int sample_char = charset_base | 0x20;
unsigned char str[MAX_MULTIBYTE_LENGTH];
- int len = CHAR_STRING (sample_char, str);
+ int len = CHAR_STRING (char_base, str);
translate_prev_byte1 = str[len - 2];
if (len > 2)
{
translate_prev_byte2 = str[len - 3];
if (len > 3)
- translate_prev_byte3 = str[len - 4];
+ {
+ translate_prev_byte3 = str[len - 4];
+ if (len > 4)
+ translate_prev_byte4 = str[len - 5];
+ }
}
}
@@ -1698,12 +1677,12 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
/* If the byte currently looking at is the last of a
character to check case-equivalents, set CH to that
character. An ASCII character and a non-ASCII character
- matching with CHARSET_BASE are to be checked. */
+ matching with CHAR_BASE are to be checked. */
int ch = -1;
if (ASCII_BYTE_P (*ptr) || ! multibyte)
ch = *ptr;
- else if (charset_base
+ else if (char_base
&& ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
{
unsigned char *charstart = ptr - 1;
@@ -1711,12 +1690,12 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
while (! (CHAR_HEAD_P (*charstart)))
charstart--;
ch = STRING_CHAR (charstart, ptr - charstart + 1);
- if (charset_base != (ch & ~CHAR_FIELD3_MASK))
+ if (char_base != (ch & ~0x3F))
ch = -1;
}
if (ch >= 0400)
- j = ((unsigned char) ch) | 0200;
+ j = (ch & 0x3F) | 0200;
else
j = *ptr;
@@ -1735,9 +1714,9 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
{
TRANSLATE (ch, inverse_trt, ch);
if (ch >= 0400)
- j = ((unsigned char) ch) | 0200;
+ j = (ch & 0x3F) | 0200;
else
- j = (unsigned char) ch;
+ j = ch;
/* For all the characters that map into CH,
set up simple_translate to map the last byte
@@ -2036,7 +2015,7 @@ wordify (string)
{
int c;
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) != Sword)
{
@@ -2071,7 +2050,7 @@ wordify (string)
int c;
int i_byte_orig = i_byte;
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) == Sword)
{
@@ -2355,11 +2334,11 @@ since only regular expressions have distinguished subexpressions. */)
{
if (NILP (string))
{
- c = FETCH_CHAR (pos_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
INC_BOTH (pos, pos_byte);
}
else
- FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
+ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
if (LOWERCASEP (c))
{
@@ -2531,10 +2510,7 @@ since only regular expressions have distinguished subexpressions. */)
Lisp_Object rev_tbl;
int really_changed = 0;
- rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
- ? Fchar_table_extra_slot (Vnonascii_translation_table,
- make_number (0))
- : Qnil);
+ rev_tbl = Qnil;
substed_alloc_size = length * 2 + 100;
substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
@@ -2577,7 +2553,7 @@ since only regular expressions have distinguished subexpressions. */)
{
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
pos, pos_byte);
- if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
+ if (!buf_multibyte && !ASCII_CHAR_P (c))
c = multibyte_char_to_unibyte (c, rev_tbl);
}
else
diff --git a/src/syntax.c b/src/syntax.c
index eee9151f878..b0b4bdc0032 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -25,7 +25,7 @@ Boston, MA 02110-1301, USA. */
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "keymap.h"
#include "regex.h"
@@ -100,7 +100,8 @@ static int find_defun_start P_ ((int, int));
static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
EMACS_INT *, EMACS_INT *));
static int char_quoted P_ ((int, int));
-static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
+static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
+static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
int, int, int, int,
@@ -305,7 +306,7 @@ char_quoted (charpos, bytepos)
int c;
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
- c = FETCH_CHAR (bytepos);
+ c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
code = SYNTAX (c);
if (! (code == Scharquote || code == Sescape))
break;
@@ -397,11 +398,11 @@ find_defun_start (pos, pos_byte)
/* Open-paren at start of line means we may have found our
defun-start. */
- c = FETCH_CHAR (PT_BYTE);
+ c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
if (SYNTAX (c) == Sopen)
{
SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
- c = FETCH_CHAR (PT_BYTE);
+ c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
if (SYNTAX (c) == Sopen)
break;
/* Now fallback to the default value. */
@@ -522,7 +523,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
UPDATE_SYNTAX_TABLE_BACKWARD (from);
prev_syntax = syntax;
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = SYNTAX (c);
@@ -551,7 +552,7 @@ back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_p
int next = from, next_byte = from_byte, next_c, next_syntax;
DEC_BOTH (next, next_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (next);
- next_c = FETCH_CHAR (next_byte);
+ next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
next_syntax = SYNTAX_WITH_FLAGS (next_c);
if (((com2start || comnested)
&& SYNTAX_FLAGS_COMEND_SECOND (syntax)
@@ -854,29 +855,6 @@ char syntax_code_spec[16] =
static Lisp_Object Vsyntax_code_object;
-/* Look up the value for CHARACTER in syntax table TABLE's parent
- and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
- for CHARACTER. It's actually used only when not compiled with GCC. */
-
-Lisp_Object
-syntax_parent_lookup (table, character)
- Lisp_Object table;
- int character;
-{
- Lisp_Object value;
-
- while (1)
- {
- table = XCHAR_TABLE (table)->parent;
- if (NILP (table))
- return Qnil;
-
- value = XCHAR_TABLE (table)->contents[character];
- if (!NILP (value))
- return value;
- }
-}
-
DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
doc: /* Return the syntax code of CHARACTER, described by a character.
For example, if CHARACTER is a word constituent,
@@ -995,6 +973,8 @@ DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
doc: /* Set syntax for character CHAR according to string NEWENTRY.
The syntax is changed only for table SYNTAX-TABLE, which defaults to
the current buffer's syntax table.
+CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
+in the range MIN and MAX are changed.
The first character of NEWENTRY should be one of the following:
Space or - whitespace syntax. w word constituent.
_ symbol constituent. . punctuation.
@@ -1031,14 +1011,24 @@ usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
(c, newentry, syntax_table)
Lisp_Object c, newentry, syntax_table;
{
- CHECK_NUMBER (c);
+ if (CONSP (c))
+ {
+ CHECK_CHARACTER_CAR (c);
+ CHECK_CHARACTER_CDR (c);
+ }
+ else
+ CHECK_CHARACTER (c);
if (NILP (syntax_table))
syntax_table = current_buffer->syntax_table;
else
check_syntax_table (syntax_table);
- SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
+ newentry = Fstring_to_syntax (newentry);
+ if (CONSP (c))
+ SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
+ else
+ SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
return Qnil;
}
@@ -1192,6 +1182,10 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
int parse_sexp_ignore_comments;
+/* Char-table of functions that find the next or previous word
+ boundary. */
+Lisp_Object Vfind_word_boundary_function_table;
+
/* Return the position across COUNT words from FROM.
If that many words cannot be found before the end of the buffer, return 0.
COUNT negative means scan backward and stop at word beginning. */
@@ -1205,6 +1199,7 @@ scan_words (from, count)
register int from_byte = CHAR_TO_BYTE (from);
register enum syntaxcode code;
int ch0, ch1;
+ Lisp_Object func, script, pos;
immediate_quit = 1;
QUIT;
@@ -1221,7 +1216,7 @@ scan_words (from, count)
return 0;
}
UPDATE_SYNTAX_TABLE_FORWARD (from);
- ch0 = FETCH_CHAR (from_byte);
+ ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch0);
INC_BOTH (from, from_byte);
if (words_include_escapes
@@ -1232,18 +1227,33 @@ scan_words (from, count)
}
/* Now CH0 is a character which begins a word and FROM is the
position of the next character. */
- while (1)
+ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
+ if (! NILP (Ffboundp (func)))
{
- if (from == end) break;
- UPDATE_SYNTAX_TABLE_FORWARD (from);
- ch1 = FETCH_CHAR (from_byte);
- code = SYNTAX (ch1);
- if (!(words_include_escapes
- && (code == Sescape || code == Scharquote)))
- if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
- break;
- INC_BOTH (from, from_byte);
- ch0 = ch1;
+ pos = call2 (func, make_number (from - 1), make_number (end));
+ if (INTEGERP (pos) && XINT (pos) > from)
+ {
+ from = XINT (pos);
+ from_byte = CHAR_TO_BYTE (from);
+ }
+ }
+ else
+ {
+ script = CHAR_TABLE_REF (Vchar_script_table, ch0);
+ while (1)
+ {
+ if (from == end) break;
+ UPDATE_SYNTAX_TABLE_FORWARD (from);
+ ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
+ code = SYNTAX (ch1);
+ if ((code != Sword
+ && (! words_include_escapes
+ || (code != Sescape && code != Scharquote)))
+ || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
+ break;
+ INC_BOTH (from, from_byte);
+ ch0 = ch1;
+ }
}
count--;
}
@@ -1258,7 +1268,7 @@ scan_words (from, count)
}
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- ch1 = FETCH_CHAR (from_byte);
+ ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (ch1);
if (words_include_escapes
&& (code == Sescape || code == Scharquote))
@@ -1268,22 +1278,37 @@ scan_words (from, count)
}
/* Now CH1 is a character which ends a word and FROM is the
position of it. */
- while (1)
+ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
+ if (! NILP (Ffboundp (func)))
+ {
+ pos = call2 (func, make_number (from), make_number (beg));
+ if (INTEGERP (pos) && XINT (pos) < from)
+ {
+ from = XINT (pos);
+ from_byte = CHAR_TO_BYTE (from);
+ }
+ }
+ else
{
- int temp_byte;
+ script = CHAR_TABLE_REF (Vchar_script_table, ch1);
+ while (1)
+ {
+ int temp_byte;
- if (from == beg)
- break;
- temp_byte = dec_bytepos (from_byte);
- UPDATE_SYNTAX_TABLE_BACKWARD (from);
- ch0 = FETCH_CHAR (temp_byte);
- code = SYNTAX (ch0);
- if (!(words_include_escapes
- && (code == Sescape || code == Scharquote)))
- if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
- break;
- DEC_BOTH (from, from_byte);
- ch1 = ch0;
+ if (from == beg)
+ break;
+ temp_byte = dec_bytepos (from_byte);
+ UPDATE_SYNTAX_TABLE_BACKWARD (from);
+ ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
+ code = SYNTAX (ch0);
+ if ((code != Sword
+ && (! words_include_escapes
+ || (code != Sescape && code != Scharquote)))
+ || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
+ break;
+ DEC_BOTH (from, from_byte);
+ ch1 = ch0;
+ }
}
count++;
}
@@ -1338,7 +1363,7 @@ Returns the distance traveled, either zero or positive. */)
(string, lim)
Lisp_Object string, lim;
{
- return skip_chars (1, 0, string, lim, 1);
+ return skip_chars (1, string, lim, 1);
}
DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1348,7 +1373,7 @@ Returns the distance traveled, either zero or negative. */)
(string, lim)
Lisp_Object string, lim;
{
- return skip_chars (0, 0, string, lim, 1);
+ return skip_chars (0, string, lim, 1);
}
DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1360,7 +1385,7 @@ This function returns the distance traveled, either zero or positive. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
- return skip_chars (1, 1, syntax, lim, 0);
+ return skip_syntaxes (1, syntax, lim);
}
DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
@@ -1372,25 +1397,27 @@ This function returns the distance traveled, either zero or negative. */)
(syntax, lim)
Lisp_Object syntax, lim;
{
- return skip_chars (0, 1, syntax, lim, 0);
+ return skip_syntaxes (0, syntax, lim);
}
static Lisp_Object
-skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
- int forwardp, syntaxp;
+skip_chars (forwardp, string, lim, handle_iso_classes)
+ int forwardp;
Lisp_Object string, lim;
int handle_iso_classes;
{
register unsigned int c;
unsigned char fastmap[0400];
- /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
- of which codes don't fit in FASTMAP. In that case, set the
- ranges of characters in CHAR_RANGES. */
+ /* Store the ranges of non-ASCII characters. */
int *char_ranges;
int n_char_ranges = 0;
int negate = 0;
register int i, i_byte;
- int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ /* Set to 1 if the current buffer is multibyte and the region
+ contains non-ASCII chars. */
+ int multibyte;
+ /* Set to 1 if STRING is multibyte and it contains non-ASCII
+ chars. */
int string_multibyte;
int size_byte;
const unsigned char *str;
@@ -1398,32 +1425,8 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
Lisp_Object iso_classes;
CHECK_STRING (string);
- char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
- string_multibyte = STRING_MULTIBYTE (string);
- str = SDATA (string);
- size_byte = SBYTES (string);
iso_classes = Qnil;
- /* Adjust the multibyteness of the string to that of the buffer. */
- if (multibyte != string_multibyte)
- {
- int nbytes;
-
- if (multibyte)
- nbytes = count_size_as_multibyte (SDATA (string),
- SCHARS (string));
- else
- nbytes = SCHARS (string);
- if (nbytes != size_byte)
- {
- unsigned char *tmp = (unsigned char *) alloca (nbytes);
- copy_text (SDATA (string), tmp, size_byte,
- string_multibyte, multibyte);
- size_byte = nbytes;
- str = tmp;
- }
- }
-
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
else
@@ -1435,10 +1438,16 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
if (XINT (lim) < BEGV)
XSETFASTINT (lim, BEGV);
+ multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+ && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+ string_multibyte = SBYTES (string) > SCHARS (string);
+
bzero (fastmap, sizeof fastmap);
- i_byte = 0;
+ str = SDATA (string);
+ size_byte = SBYTES (string);
+ i_byte = 0;
if (i_byte < size_byte
&& SREF (string, 0) == '^')
{
@@ -1446,21 +1455,23 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
}
/* Find the characters specified and set their elements of fastmap.
- If syntaxp, each character counts as itself.
- Otherwise, handle backslashes and ranges specially. */
+ Handle backslashes and ranges specially.
- while (i_byte < size_byte)
+ If STRING contains non-ASCII characters, setup char_ranges for
+ them and use fastmap only for their leading codes. */
+
+ if (! string_multibyte)
{
- c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
- i_byte += len;
+ int string_has_eight_bit = 0;
- if (syntaxp)
- fastmap[syntax_spec_code[c & 0377]] = 1;
- else
+ /* At first setup fastmap. */
+ while (i_byte < size_byte)
{
+ c = str[i_byte++];
+
if (handle_iso_classes && c == '['
&& i_byte < size_byte
- && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
+ && str[i_byte] == ':')
{
const unsigned char *class_beg = str + i_byte + 1;
const unsigned char *class_end = class_beg;
@@ -1499,6 +1510,129 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
if (i_byte == size_byte)
break;
+ c = str[i_byte++];
+ }
+ /* Treat `-' as range character only if another character
+ follows. */
+ if (i_byte + 1 < size_byte
+ && str[i_byte] == '-')
+ {
+ unsigned int c2;
+
+ /* Skip over the dash. */
+ i_byte++;
+
+ /* Get the end of the range. */
+ c2 = str[i_byte++];
+ if (c2 == '\\'
+ && i_byte < size_byte)
+ c2 = str[i_byte++];
+
+ if (c <= c2)
+ {
+ while (c <= c2)
+ fastmap[c++] = 1;
+ if (! ASCII_CHAR_P (c2))
+ string_has_eight_bit = 1;
+ }
+ }
+ else
+ {
+ fastmap[c] = 1;
+ if (! ASCII_CHAR_P (c))
+ string_has_eight_bit = 1;
+ }
+ }
+
+ /* If the current range is multibyte and STRING contains
+ eight-bit chars, arrange fastmap and setup char_ranges for
+ the corresponding multibyte chars. */
+ if (multibyte && string_has_eight_bit)
+ {
+ unsigned char fastmap2[0400];
+ int range_start_byte, range_start_char;
+
+ bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
+ bzero (fastmap + 0200, 0200);
+ /* We are sure that this loop stops. */
+ for (i = 0200; ! fastmap2[i]; i++);
+ c = unibyte_char_to_multibyte (i);
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
+ range_start_byte = i;
+ range_start_char = c;
+ char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
+ for (i = 129; i < 0400; i++)
+ {
+ c = unibyte_char_to_multibyte (i);
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
+ if (i - range_start_byte != c - range_start_char)
+ {
+ char_ranges[n_char_ranges++] = range_start_char;
+ char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ + range_start_char);
+ range_start_byte = i;
+ range_start_char = c;
+ }
+ }
+ char_ranges[n_char_ranges++] = range_start_char;
+ char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+ + range_start_char);
+ }
+ }
+ else /* STRING is multibyte */
+ {
+ char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
+
+ while (i_byte < size_byte)
+ {
+ unsigned char leading_code;
+
+ leading_code = str[i_byte];
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ i_byte += len;
+
+ if (handle_iso_classes && c == '['
+ && i_byte < size_byte
+ && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
+ {
+ const unsigned char *class_beg = str + i_byte + 1;
+ const unsigned char *class_end = class_beg;
+ const unsigned char *class_limit = str + size_byte - 2;
+ /* Leave room for the null. */
+ unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
+ re_wctype_t cc;
+
+ if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
+ class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
+
+ while (class_end < class_limit
+ && *class_end >= 'a' && *class_end <= 'z')
+ class_end++;
+
+ if (class_end == class_beg
+ || *class_end != ':' || class_end[1] != ']')
+ goto not_a_class_name_multibyte;
+
+ bcopy (class_beg, class_name, class_end - class_beg);
+ class_name[class_end - class_beg] = 0;
+
+ cc = re_wctype (class_name);
+ if (cc == 0)
+ error ("Invalid ISO C character class");
+
+ iso_classes = Fcons (make_number (cc), iso_classes);
+
+ i_byte = class_end + 2 - str;
+ continue;
+ }
+
+ not_a_class_name_multibyte:
+ if (c == '\\')
+ {
+ if (i_byte == size_byte)
+ break;
+
+ leading_code = str[i_byte];
c = STRING_CHAR_AND_LENGTH (str + i_byte,
size_byte - i_byte, len);
i_byte += len;
@@ -1509,61 +1643,90 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
&& str[i_byte] == '-')
{
unsigned int c2;
+ unsigned char leading_code2;
/* Skip over the dash. */
i_byte++;
/* Get the end of the range. */
+ leading_code2 = str[i_byte];
c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
size_byte - i_byte, len);
i_byte += len;
- if (SINGLE_BYTE_CHAR_P (c))
+ if (c2 == '\\'
+ && i_byte < size_byte)
{
- if (! SINGLE_BYTE_CHAR_P (c2))
+ leading_code2 = str[i_byte];
+ c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+ i_byte += len;
+ }
+
+ if (c > c2)
+ continue;
+ if (ASCII_CHAR_P (c))
+ {
+ while (c <= c2 && c < 0x80)
+ fastmap[c++] = 1;
+ leading_code = CHAR_LEADING_CODE (c);
+ }
+ if (! ASCII_CHAR_P (c))
+ {
+ while (leading_code <= leading_code2)
+ fastmap[leading_code++] = 1;
+ if (c <= c2)
{
- /* Handle a range starting with a character of
- less than 256, and ending with a character of
- not less than 256. Split that into two
- ranges, the low one ending at 0377, and the
- high one starting at the smallest character
- in the charset of C2 and ending at C2. */
- int charset = CHAR_CHARSET (c2);
- int c1 = MAKE_CHAR (charset, 0, 0);
-
- char_ranges[n_char_ranges++] = c1;
+ char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
- c2 = 0377;
- }
- while (c <= c2)
- {
- fastmap[c] = 1;
- c++;
}
}
- else if (c <= c2) /* Both C and C2 are multibyte char. */
- {
- char_ranges[n_char_ranges++] = c;
- char_ranges[n_char_ranges++] = c2;
- }
}
else
{
- if (SINGLE_BYTE_CHAR_P (c))
+ if (ASCII_CHAR_P (c))
fastmap[c] = 1;
else
{
+ fastmap[leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
}
}
+
+ /* If the current range is unibyte and STRING contains non-ASCII
+ chars, arrange fastmap for the corresponding unibyte
+ chars. */
+
+ if (! multibyte && n_char_ranges > 0)
+ {
+ bzero (fastmap + 0200, 0200);
+ for (i = 0; i < n_char_ranges; i += 2)
+ {
+ int c1 = char_ranges[i];
+ int c2 = char_ranges[i + 1];
+
+ for (; c1 <= c2; c1++)
+ fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+ }
+ }
}
/* If ^ was the first character, complement the fastmap. */
if (negate)
- for (i = 0; i < sizeof fastmap; i++)
- fastmap[i] ^= 1;
+ {
+ if (! multibyte)
+ for (i = 0; i < sizeof fastmap; i++)
+ fastmap[i] ^= 1;
+ else
+ {
+ for (i = 0; i < 0200; i++)
+ fastmap[i] ^= 1;
+ /* All non-ASCII chars possibly match. */
+ for (; i < sizeof fastmap; i++)
+ fastmap[i] = 1;
+ }
+ }
{
int start_point = PT;
@@ -1583,254 +1746,312 @@ skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
}
immediate_quit = 1;
- if (syntaxp)
+ if (forwardp)
{
- SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
- if (forwardp)
- {
- if (multibyte)
- while (1)
- {
- int nbytes;
+ if (multibyte)
+ while (1)
+ {
+ int nbytes;
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (! fastmap[(int) SYNTAX (c)])
+ if (p >= stop)
+ {
+ if (p >= endp)
break;
- p += nbytes, pos++, pos_byte += nbytes;
- UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ p = GAP_END_ADDR;
+ stop = endp;
}
- else
- while (1)
+ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ if (! NILP (iso_classes) && in_classes (c, iso_classes))
{
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- if (! fastmap[(int) SYNTAX (*p)])
+ if (negate)
break;
- p++, pos++;
- UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ else
+ goto fwd_ok;
}
- }
- else
- {
- if (multibyte)
- while (1)
- {
- unsigned char *prev_p;
- int nbytes;
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- prev_p = p;
- while (--p >= stop && ! CHAR_HEAD_P (*p));
- PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (prev_p - p > nbytes)
- p = prev_p - 1, c = *p, nbytes = 1;
- else
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
- pos--, pos_byte -= nbytes;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos);
- if (! fastmap[(int) SYNTAX (c)])
- {
- pos++;
- pos_byte += nbytes;
+ if (! fastmap[*p])
+ break;
+ if (! ASCII_CHAR_P (c))
+ {
+ /* As we are looking at a multibyte character, we
+ must look up the character in the table
+ CHAR_RANGES. If there's no data in the table,
+ that character is not what we want to skip. */
+
+ /* The following code do the right thing even if
+ n_char_ranges is zero (i.e. no data in
+ CHAR_RANGES). */
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
- }
+ if (!(negate ^ (i < n_char_ranges)))
+ break;
}
- else
- while (1)
+ fwd_ok:
+ p += nbytes, pos++, pos_byte += nbytes;
+ }
+ else
+ while (1)
+ {
+ if (p >= stop)
{
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- if (! fastmap[(int) SYNTAX (p[-1])])
+ if (p >= endp)
break;
- p--, pos--;
- UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
+ p = GAP_END_ADDR;
+ stop = endp;
}
- }
+
+ if (!NILP (iso_classes) && in_classes (*p, iso_classes))
+ {
+ if (negate)
+ break;
+ else
+ goto fwd_unibyte_ok;
+ }
+
+ if (!fastmap[*p])
+ break;
+ fwd_unibyte_ok:
+ p++, pos++, pos_byte++;
+ }
}
else
{
- if (forwardp)
- {
- if (multibyte)
- while (1)
- {
- int nbytes;
-
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
- c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ if (multibyte)
+ while (1)
+ {
+ unsigned char *prev_p;
- if (! NILP (iso_classes) && in_classes (c, iso_classes))
- {
- if (negate)
- break;
- else
- goto fwd_ok;
- }
+ if (p <= stop)
+ {
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
+ prev_p = p;
+ while (--p >= stop && ! CHAR_HEAD_P (*p));
+ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
- if (SINGLE_BYTE_CHAR_P (c))
- {
- if (!fastmap[c])
- break;
- }
+ if (! NILP (iso_classes) && in_classes (c, iso_classes))
+ {
+ if (negate)
+ break;
else
- {
- /* If we are looking at a multibyte character,
- we must look up the character in the table
- CHAR_RANGES. If there's no data in the
- table, that character is not what we want to
- skip. */
-
- /* The following code do the right thing even if
- n_char_ranges is zero (i.e. no data in
- CHAR_RANGES). */
- for (i = 0; i < n_char_ranges; i += 2)
- if (c >= char_ranges[i] && c <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- break;
- }
- fwd_ok:
- p += nbytes, pos++, pos_byte += nbytes;
+ goto back_ok;
}
- else
- while (1)
- {
- if (p >= stop)
- {
- if (p >= endp)
- break;
- p = GAP_END_ADDR;
- stop = endp;
- }
-
- if (!NILP (iso_classes) && in_classes (*p, iso_classes))
- {
- if (negate)
- break;
- else
- goto fwd_unibyte_ok;
- }
- if (!fastmap[*p])
+ if (! fastmap[*p])
+ break;
+ if (! ASCII_CHAR_P (c))
+ {
+ /* See the comment in the previous similar code. */
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+ break;
+ if (!(negate ^ (i < n_char_ranges)))
break;
- fwd_unibyte_ok:
- p++, pos++;
}
- }
+ back_ok:
+ pos--, pos_byte -= prev_p - p;
+ }
else
- {
- if (multibyte)
- while (1)
+ while (1)
+ {
+ if (p <= stop)
{
- unsigned char *prev_p;
- int nbytes;
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
- prev_p = p;
- while (--p >= stop && ! CHAR_HEAD_P (*p));
- PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
- if (prev_p - p > nbytes)
- p = prev_p - 1, c = *p, nbytes = 1;
+ if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
+ {
+ if (negate)
+ break;
else
- c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ goto back_unibyte_ok;
+ }
- if (! NILP (iso_classes) && in_classes (c, iso_classes))
- {
- if (negate)
- break;
- else
- goto back_ok;
- }
+ if (!fastmap[p[-1]])
+ break;
+ back_unibyte_ok:
+ p--, pos--, pos_byte--;
+ }
+ }
- if (SINGLE_BYTE_CHAR_P (c))
- {
- if (!fastmap[c])
- break;
- }
- else
- {
- /* See the comment in the previous similar code. */
- for (i = 0; i < n_char_ranges; i += 2)
- if (c >= char_ranges[i] && c <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- break;
- }
- back_ok:
- pos--, pos_byte -= nbytes;
- }
- else
- while (1)
- {
- if (p <= stop)
- {
- if (p <= endp)
- break;
- p = GPT_ADDR;
- stop = endp;
- }
+ SET_PT_BOTH (pos, pos_byte);
+ immediate_quit = 0;
- if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
- {
- if (negate)
- break;
- else
- goto back_unibyte_ok;
- }
+ return make_number (PT - start_point);
+ }
+}
- if (!fastmap[p[-1]])
- break;
- back_unibyte_ok:
- p--, pos--;
- }
- }
+
+static Lisp_Object
+skip_syntaxes (forwardp, string, lim)
+ int forwardp;
+ Lisp_Object string, lim;
+{
+ register unsigned int c;
+ unsigned char fastmap[0400];
+ int negate = 0;
+ register int i, i_byte;
+ int multibyte;
+ int size_byte;
+ unsigned char *str;
+
+ CHECK_STRING (string);
+
+ if (NILP (lim))
+ XSETINT (lim, forwardp ? ZV : BEGV);
+ else
+ CHECK_NUMBER_COERCE_MARKER (lim);
+
+ /* In any case, don't allow scan outside bounds of buffer. */
+ if (XINT (lim) > ZV)
+ XSETFASTINT (lim, ZV);
+ if (XINT (lim) < BEGV)
+ XSETFASTINT (lim, BEGV);
+
+ if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
+ return make_number (0);
+
+ multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+ && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+
+ bzero (fastmap, sizeof fastmap);
+
+ if (SBYTES (string) > SCHARS (string))
+ /* As this is very rare case (syntax spec is ASCII only), don't
+ consider efficiency. */
+ string = string_make_unibyte (string);
+
+ str = SDATA (string);
+ size_byte = SBYTES (string);
+
+ i_byte = 0;
+ if (i_byte < size_byte
+ && SREF (string, 0) == '^')
+ {
+ negate = 1; i_byte++;
+ }
+
+ /* Find the syntaxes specified and set their elements of fastmap. */
+
+ while (i_byte < size_byte)
+ {
+ c = str[i_byte++];
+ fastmap[syntax_spec_code[c]] = 1;
+ }
+
+ /* If ^ was the first character, complement the fastmap. */
+ if (negate)
+ for (i = 0; i < sizeof fastmap; i++)
+ fastmap[i] ^= 1;
+
+ {
+ int start_point = PT;
+ int pos = PT;
+ int pos_byte = PT_BYTE;
+ unsigned char *p = PT_ADDR, *endp, *stop;
+
+ if (forwardp)
+ {
+ endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
+ stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
+ }
+ else
+ {
+ endp = CHAR_POS_ADDR (XINT (lim));
+ stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
}
-#if 0 /* Not needed now that a position in mid-character
- cannot be specified in Lisp. */
- if (multibyte
- /* INC_POS or DEC_POS might have moved POS over LIM. */
- && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
- pos = XINT (lim);
-#endif
+ immediate_quit = 1;
+ SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
+ if (forwardp)
+ {
+ if (multibyte)
+ {
+ while (1)
+ {
+ int nbytes;
+
+ if (p >= stop)
+ {
+ if (p >= endp)
+ break;
+ p = GAP_END_ADDR;
+ stop = endp;
+ }
+ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+ if (! fastmap[(int) SYNTAX (c)])
+ break;
+ p += nbytes, pos++, pos_byte += nbytes;
+ UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ if (p >= stop)
+ {
+ if (p >= endp)
+ break;
+ p = GAP_END_ADDR;
+ stop = endp;
+ }
+ if (! fastmap[(int) SYNTAX (*p)])
+ break;
+ p++, pos++, pos_byte++;
+ UPDATE_SYNTAX_TABLE_FORWARD (pos);
+ }
+ }
+ }
+ else
+ {
+ if (multibyte)
+ {
+ while (1)
+ {
+ unsigned char *prev_p;
- if (! multibyte)
- pos_byte = pos;
+ if (p <= stop)
+ {
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
+ prev_p = p;
+ while (--p >= stop && ! CHAR_HEAD_P (*p));
+ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+ if (! fastmap[(int) SYNTAX (c)])
+ break;
+ pos--, pos_byte -= prev_p - p;
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos);
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ if (p <= stop)
+ {
+ if (p <= endp)
+ break;
+ p = GPT_ADDR;
+ stop = endp;
+ }
+ if (! fastmap[(int) SYNTAX (p[-1])])
+ break;
+ p--, pos--, pos_byte--;
+ UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
+ }
+ }
+ }
SET_PT_BOTH (pos, pos_byte);
immediate_quit = 0;
@@ -1915,7 +2136,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
*bytepos_ptr = from_byte;
return 0;
}
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = syntax & 0xff;
if (code == Sendcomment
@@ -1945,7 +2166,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
forw_incomment:
if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
&& SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMEND_SECOND (c1))
&& ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
@@ -1964,7 +2185,7 @@ forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
if (nesting > 0
&& from < stop
&& SYNTAX_FLAGS_COMSTART_FIRST (syntax)
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMMENT_STYLE (c1) == style
&& SYNTAX_COMSTART_SECOND (c1))
&& (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
@@ -2028,7 +2249,7 @@ between them, return t; otherwise return nil. */)
immediate_quit = 0;
return Qnil;
}
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2036,7 +2257,7 @@ between them, return t; otherwise return nil. */)
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
if (from < stop && comstart_first
- && (c1 = FETCH_CHAR (from_byte),
+ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_COMSTART_SECOND (c1)))
{
/* We have encountered a comment start sequence and we
@@ -2094,7 +2315,7 @@ between them, return t; otherwise return nil. */)
DEC_BOTH (from, from_byte);
/* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
quoted = char_quoted (from, from_byte);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX (c);
comstyle = 0;
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2111,7 +2332,7 @@ between them, return t; otherwise return nil. */)
code = Sendcomment;
/* Calling char_quoted, above, set up global syntax position
at the new value of FROM. */
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
}
@@ -2125,7 +2346,7 @@ between them, return t; otherwise return nil. */)
{
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (SYNTAX (c) == Scomment_fence
&& !char_quoted (from, from_byte))
{
@@ -2191,11 +2412,11 @@ between them, return t; otherwise return nil. */)
return Qt;
}
-/* Return syntax code of character C if C is a single byte character
+/* Return syntax code of character C if C is an ASCII character
or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
-#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
- ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
+#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
+ ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
? SYNTAX (c) : Ssymbol)
static Lisp_Object
@@ -2239,7 +2460,7 @@ scan_lists (from, count, depth, sexpflag)
{
int comstart_first, prefix;
UPDATE_SYNTAX_TABLE_FORWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2250,7 +2471,8 @@ scan_lists (from, count, depth, sexpflag)
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
if (from < stop && comstart_first
- && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
+ && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
+ SYNTAX_COMSTART_SECOND (c))
&& parse_sexp_ignore_comments)
{
/* we have encountered a comment start sequence and we
@@ -2259,7 +2481,7 @@ scan_lists (from, count, depth, sexpflag)
only a comment end of the same style actually ends
the comment section */
code = Scomment;
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
INC_BOTH (from, from_byte);
@@ -2285,7 +2507,7 @@ scan_lists (from, count, depth, sexpflag)
UPDATE_SYNTAX_TABLE_FORWARD (from);
/* Some compilers can't handle this inside the switch. */
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
switch (temp)
{
@@ -2328,7 +2550,7 @@ scan_lists (from, count, depth, sexpflag)
case Smath:
if (!sexpflag)
break;
- if (from != stop && c == FETCH_CHAR (from_byte))
+ if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
{
INC_BOTH (from, from_byte);
}
@@ -2355,12 +2577,12 @@ scan_lists (from, count, depth, sexpflag)
case Sstring:
case Sstring_fence:
temp_pos = dec_bytepos (from_byte);
- stringterm = FETCH_CHAR (temp_pos);
+ stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
while (1)
{
if (from >= stop) goto lose;
UPDATE_SYNTAX_TABLE_FORWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
if (code == Sstring
? (c == stringterm
&& SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
@@ -2403,7 +2625,7 @@ scan_lists (from, count, depth, sexpflag)
{
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
if (depth == min_depth)
last_good = from;
@@ -2421,7 +2643,7 @@ scan_lists (from, count, depth, sexpflag)
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
code = Sendcomment;
- c1 = FETCH_CHAR (from_byte);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
comstyle = SYNTAX_COMMENT_STYLE (c1);
comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
}
@@ -2454,7 +2676,7 @@ scan_lists (from, count, depth, sexpflag)
else
temp_pos--;
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
- c1 = FETCH_CHAR (temp_pos);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
/* Don't allow comment-end to be quoted. */
if (temp_code == Sendcomment)
@@ -2466,7 +2688,7 @@ scan_lists (from, count, depth, sexpflag)
temp_pos = dec_bytepos (temp_pos);
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
}
- c1 = FETCH_CHAR (temp_pos);
+ c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
if (! (quoted || temp_code == Sword
|| temp_code == Ssymbol
@@ -2481,7 +2703,7 @@ scan_lists (from, count, depth, sexpflag)
break;
temp_pos = dec_bytepos (from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
- if (from != stop && c == FETCH_CHAR (temp_pos))
+ if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
DEC_BOTH (from, from_byte);
if (mathexit)
{
@@ -2526,7 +2748,7 @@ scan_lists (from, count, depth, sexpflag)
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
- && (c = FETCH_CHAR (from_byte),
+ && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
break;
}
@@ -2534,14 +2756,15 @@ scan_lists (from, count, depth, sexpflag)
break;
case Sstring:
- stringterm = FETCH_CHAR (from_byte);
+ stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
while (1)
{
if (from == stop) goto lose;
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
- && stringterm == (c = FETCH_CHAR (from_byte))
+ && (stringterm
+ == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
&& SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
break;
}
@@ -2644,7 +2867,7 @@ This includes chars with "quote" or "prefix" syntax (' or p). */)
while (!char_quoted (pos, pos_byte)
/* Previous statement updates syntax table. */
- && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
+ && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
|| SYNTAX_PREFIX (c)))
{
opoint = pos;
@@ -2672,7 +2895,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
stopbefore, oldstate, commentstop)
struct lisp_parse_state *stateptr;
register int from;
- int end, targetdepth, stopbefore, from_byte;
+ int from_byte;
+ int end, targetdepth, stopbefore;
Lisp_Object oldstate;
int commentstop;
{
@@ -2709,7 +2933,7 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
#define INC_FROM \
do { prev_from = from; \
prev_from_byte = from_byte; \
- temp = FETCH_CHAR (prev_from_byte); \
+ temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
INC_BOTH (from, from_byte); \
if (from < end) \
@@ -2865,7 +3089,7 @@ do { prev_from = from; \
while (from < end)
{
/* Some compilers can't handle this inside the switch. */
- temp = FETCH_CHAR (from_byte);
+ temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
temp = SYNTAX (temp);
switch (temp)
{
@@ -2939,7 +3163,7 @@ do { prev_from = from; \
if (stopbefore) goto stop; /* this arg means stop at sexp start */
curlevel->last = prev_from;
state.instring = (code == Sstring
- ? (FETCH_CHAR (prev_from_byte))
+ ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
: ST_STRING_STYLE);
if (boundary_stop) goto done;
startinstring:
@@ -2951,7 +3175,7 @@ do { prev_from = from; \
int c;
if (from >= end) goto done;
- c = FETCH_CHAR (from_byte);
+ c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
/* Some compilers can't handle this inside the switch. */
temp = SYNTAX (c);
@@ -3177,8 +3401,7 @@ init_syntax_once ()
/* All multibyte characters have syntax `word' by default. */
temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
- for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
- XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
+ char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
}
void
@@ -3226,6 +3449,25 @@ See the info node `(elisp)Syntax Properties' for a description of the
doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
open_paren_in_column_0_is_defun_start = 1;
+
+ DEFVAR_LISP ("find-word-boundary-function-table",
+ &Vfind_word_boundary_function_table,
+ doc: /*
+Char table of functions to search for the word boundary.
+Each function is called with two arguments; POS and LIMIT.
+POS and LIMIT are character positions in the current buffer.
+
+If POS is less than LIMIT, POS is at the first character of a word,
+and the return value of a function is a position after the last
+character of that word.
+
+If POS is not less than LIMIT, POS is at the last character of a word,
+and the return value of a function is a position at the first
+character of that word.
+
+In both cases, LIMIT bounds the search. */);
+ Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
+
defsubr (&Ssyntax_table_p);
defsubr (&Ssyntax_table);
defsubr (&Sstandard_syntax_table);
diff --git a/src/syntax.h b/src/syntax.h
index 92d55967b33..c7e67ebf355 100644
--- a/src/syntax.h
+++ b/src/syntax.h
@@ -58,37 +58,14 @@ enum syntaxcode
/* Set the syntax entry VAL for char C in table TABLE. */
-#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
- ((((c) & 0xFF) == (c)) \
- ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
- : Faset ((table), make_number (c), (val)))
+#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
+ CHAR_TABLE_SET ((table), c, (val))
-/* Fetch the syntax entry for char C in syntax table TABLE.
- This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
- Do inheritance. */
+/* Set the syntax entry VAL for char-range RANGE in table TABLE.
+ RANGE is a cons (FROM . TO) specifying the range of characters. */
-#ifdef __GNUC__
-#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
- ({ Lisp_Object _syntax_tbl = (table); \
- Lisp_Object _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
- while (NILP (_syntax_temp)) \
- { \
- _syntax_tbl = XCHAR_TABLE (_syntax_tbl)->parent; \
- if (NILP (_syntax_tbl)) \
- break; \
- _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \
- } \
- _syntax_temp; })
-#else
-extern Lisp_Object syntax_temp;
-extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
-
-#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
- (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
- (NILP (syntax_temp) \
- ? syntax_parent_lookup (table, (c)) \
- : syntax_temp))
-#endif
+#define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
+ Fset_char_table_range ((table), (range), (val))
/* SYNTAX_ENTRY fetches the information from the entry for character C
in syntax table TABLE, or from globally kept data (gl_state).
@@ -106,12 +83,7 @@ extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
#endif
-#define SYNTAX_ENTRY_INT(c) \
- ((((c) & 0xFF) == (c)) \
- ? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
- (unsigned char) (c)) \
- : Faref (CURRENT_SYNTAX_TABLE, \
- make_number (c)))
+#define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
/* Extract the information from the entry for character C
in the current syntax table. */
@@ -138,6 +110,7 @@ extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
? XCDR (_syntax_temp) \
: Qnil); })
#else
+extern Lisp_Object syntax_temp;
#define SYNTAX(c) \
(syntax_temp = SYNTAX_ENTRY ((c)), \
(CONSP (syntax_temp) \
diff --git a/src/term.c b/src/term.c
index 603305c8f17..48942a46671 100644
--- a/src/term.c
+++ b/src/term.c
@@ -29,6 +29,8 @@ Boston, MA 02110-1301, USA. */
#include "termchar.h"
#include "termopts.h"
#include "lisp.h"
+#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "keyboard.h"
@@ -806,10 +808,12 @@ clear_end_of_line (first_unused_hpos)
}
}
-/* Buffer to store the source and result of code conversion for terminal. */
-static unsigned char *encode_terminal_buf;
-/* Allocated size of the above buffer. */
-static int encode_terminal_bufsize;
+/* Buffers to store the source and result of code conversion for terminal. */
+static unsigned char *encode_terminal_src;
+static unsigned char *encode_terminal_dst;
+/* Allocated sizes of the above buffers. */
+static int encode_terminal_src_size;
+static int encode_terminal_dst_size;
/* Encode SRC_LEN glyphs starting at SRC to terminal output codes.
Set CODING->produced to the byte-length of the resulting byte
@@ -827,37 +831,40 @@ encode_terminal_code (src, src_len, coding)
int nchars, nbytes, required;
register int tlen = GLYPH_TABLE_LENGTH;
register Lisp_Object *tbase = GLYPH_TABLE_BASE;
+ Lisp_Object charset_list;
/* Allocate sufficient size of buffer to store all characters in
multibyte-form. But, it may be enlarged on demand if
Vglyph_table contains a string. */
required = MAX_MULTIBYTE_LENGTH * src_len;
- if (encode_terminal_bufsize < required)
+ if (encode_terminal_src_size < required)
{
- if (encode_terminal_bufsize == 0)
- encode_terminal_buf = xmalloc (required);
+ if (encode_terminal_src_size == 0)
+ encode_terminal_src = xmalloc (required);
else
- encode_terminal_buf = xrealloc (encode_terminal_buf, required);
- encode_terminal_bufsize = required;
+ encode_terminal_src = xrealloc (encode_terminal_src, required);
+ encode_terminal_src_size = required;
}
- buf = encode_terminal_buf;
+ charset_list = coding_charset_list (coding);
+
+ buf = encode_terminal_src;
nchars = 0;
while (src < src_end)
{
/* We must skip glyphs to be padded for a wide character. */
if (! CHAR_GLYPH_PADDING_P (*src))
{
+ int c;
+ Lisp_Object string;
+
+ string = Qnil;
g = GLYPH_FROM_CHAR_GLYPH (src[0]);
if (g < 0 || g >= tlen)
{
/* This glyph doesn't has an entry in Vglyph_table. */
- if (CHAR_VALID_P (src->u.ch, 0))
- buf += CHAR_STRING (src->u.ch, buf);
- else
- *buf++ = SPACEGLYPH;
- nchars++;
+ c = src->u.ch;
}
else
{
@@ -866,69 +873,87 @@ encode_terminal_code (src, src_len, coding)
GLYPH_FOLLOW_ALIASES (tbase, tlen, g);
if (GLYPH_SIMPLE_P (tbase, tlen, g))
- {
- int c = FAST_GLYPH_CHAR (g);
+ /* We set the multi-byte form of a character in G
+ (that should be an ASCII character) at WORKBUF. */
+ c = FAST_GLYPH_CHAR (g);
+ else
+ /* We have a string in Vglyph_table. */
+ string = tbase[g];
+ }
- if (CHAR_VALID_P (c, 0))
- buf += CHAR_STRING (c, buf);
- else
- *buf++ = SPACEGLYPH;
+ if (NILP (string))
+ {
+ if (char_charset (c, charset_list, NULL))
+ {
+ /* Store the multibyte form of C at BUF. */
+ buf += CHAR_STRING (c, buf);
nchars++;
}
else
{
- /* We have a string in Vglyph_table. */
- Lisp_Object string;
-
- string = tbase[g];
- if (! STRING_MULTIBYTE (string))
- string = string_to_multibyte (string);
- nbytes = buf - encode_terminal_buf;
- if (encode_terminal_bufsize < nbytes + SBYTES (string))
+ /* C is not encodable. */
+ *buf++ = '?';
+ nchars++;
+ while (src + 1 < src_end && CHAR_GLYPH_PADDING_P (src[1]))
{
- encode_terminal_bufsize = nbytes + SBYTES (string);
- encode_terminal_buf = xrealloc (encode_terminal_buf,
- encode_terminal_bufsize);
- buf = encode_terminal_buf + nbytes;
+ *buf++ = '?';
+ nchars++;
+ src++;
}
- bcopy (SDATA (string), buf, SBYTES (string));
- buf += SBYTES (string);
- nchars += SCHARS (string);
}
}
+ else
+ {
+ unsigned char *p = SDATA (string), *pend = p + SBYTES (string);
+
+ if (! STRING_MULTIBYTE (string))
+ string = string_to_multibyte (string);
+ nbytes = buf - encode_terminal_src;
+ if (encode_terminal_src_size < nbytes + SBYTES (string))
+ {
+ encode_terminal_src_size = nbytes + SBYTES (string);
+ encode_terminal_src = xrealloc (encode_terminal_src,
+ encode_terminal_src_size);
+ buf = encode_terminal_src + nbytes;
+ }
+ bcopy (SDATA (string), buf, SBYTES (string));
+ buf += SBYTES (string);
+ nchars += SCHARS (string);
+ }
}
src++;
}
- nbytes = buf - encode_terminal_buf;
- coding->src_multibyte = 1;
- coding->dst_multibyte = 0;
- if (SYMBOLP (coding->pre_write_conversion)
- && ! NILP (Ffboundp (coding->pre_write_conversion)))
+ if (nchars == 0)
{
- run_pre_write_conversin_on_c_str (&encode_terminal_buf,
- &encode_terminal_bufsize,
- nchars, nbytes, coding);
- nchars = coding->produced_char;
- nbytes = coding->produced;
+ coding->produced = 0;
+ return NULL;
}
- required = nbytes + encoding_buffer_size (coding, nbytes);
- if (encode_terminal_bufsize < required)
+
+ nbytes = buf - encode_terminal_src;
+ coding->source = encode_terminal_src;
+ if (encode_terminal_dst_size == 0)
{
- encode_terminal_bufsize = required;
- encode_terminal_buf = xrealloc (encode_terminal_buf, required);
+ encode_terminal_dst_size = encode_terminal_src_size;
+ encode_terminal_dst = xmalloc (encode_terminal_dst_size);
}
+ coding->destination = encode_terminal_dst;
+ coding->dst_bytes = encode_terminal_dst_size;
+ encode_coding_object (coding, Qnil, 0, 0, nchars, nbytes, Qnil);
+ /* coding->destination may have been reallocated. */
+ encode_terminal_dst = coding->destination;
+ encode_terminal_dst_size = coding->dst_bytes;
- encode_coding (coding, encode_terminal_buf, encode_terminal_buf + nbytes,
- nbytes, encode_terminal_bufsize - nbytes);
- return encode_terminal_buf + nbytes;
+ return (encode_terminal_dst);
}
+
void
write_glyphs (string, len)
register struct glyph *string;
register int len;
{
+ int produced, consumed;
struct frame *sf = XFRAME (selected_frame);
struct frame *f = updating_frame ? updating_frame : sf;
unsigned char *conversion_buffer;
@@ -1756,28 +1781,24 @@ produce_glyphs (it)
it->pixel_width = nspaces;
it->nglyphs = nspaces;
}
- else if (SINGLE_BYTE_CHAR_P (it->c))
+ else if (CHAR_BYTE8_P (it->c))
{
if (unibyte_display_via_language_environment
- && (it->c >= 0240
- || !NILP (Vnonascii_translation_table)))
+ && (it->c >= 0240))
{
- int charset;
-
it->char_to_display = unibyte_char_to_multibyte (it->c);
- charset = CHAR_CHARSET (it->char_to_display);
- it->pixel_width = CHARSET_WIDTH (charset);
+ it->pixel_width = CHAR_WIDTH (it->char_to_display);
it->nglyphs = it->pixel_width;
if (it->glyph_row)
append_glyph (it);
}
else
{
- /* Coming here means that it->c is from display table, thus we
- must send the code as is to the terminal. Although there's
- no way to know how many columns it occupies on a screen, it
- is a good assumption that a single byte code has 1-column
- width. */
+ /* Coming here means that it->c is from display table, thus
+ we must send the raw 8-bit byte as is to the terminal.
+ Although there's no way to know how many columns it
+ occupies on a screen, it is a good assumption that a
+ single byte code has 1-column width. */
it->pixel_width = it->nglyphs = 1;
if (it->glyph_row)
append_glyph (it);
@@ -1785,13 +1806,7 @@ produce_glyphs (it)
}
else
{
- /* A multi-byte character. The display width is fixed for all
- characters of the set. Some of the glyphs may have to be
- ignored because they are already displayed in a continued
- line. */
- int charset = CHAR_CHARSET (it->c);
-
- it->pixel_width = CHARSET_WIDTH (charset);
+ it->pixel_width = CHAR_WIDTH (it->c);
it->nglyphs = it->pixel_width;
if (it->glyph_row)
@@ -2313,7 +2328,8 @@ term_init (terminal_type)
int status;
struct frame *sf = XFRAME (selected_frame);
- encode_terminal_bufsize = 0;
+ encode_terminal_src_size = 0;
+ encode_terminal_dst_size = 0;
#ifdef WINDOWSNT
initialize_w32_display ();
diff --git a/src/w16select.c b/src/w16select.c
index cd3098bae52..50c107a0789 100644
--- a/src/w16select.c
+++ b/src/w16select.c
@@ -39,7 +39,7 @@ Boston, MA 02110-1301, USA. */
#include "frame.h" /* Need this to get the X window of selected_frame */
#include "blockinput.h"
#include "buffer.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "composite.h"
diff --git a/src/w32bdf.c b/src/w32bdf.c
index 89d88574188..fbdb7f4c373 100644
--- a/src/w32bdf.c
+++ b/src/w32bdf.c
@@ -29,7 +29,7 @@ Boston, MA 02110-1301, USA. */
#endif
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#include "keyboard.h"
#include "frame.h"
#include "dispextern.h"
@@ -202,7 +202,7 @@ set_bdf_font_info(bdffont *fontp)
else if (search_file_line("CHARSET_ENCODING", start, len,
(char **)&p, (char **)&q) == 1)
{
- fontp->encoding = get_quoted_string(p, q);
+ fontp->encoding = get_quoted_string(p, q);
}
else if (search_file_line("SLANT", start, len,
(char **)&p, (char **)&q) == 1)
@@ -790,7 +790,7 @@ struct font_info *w32_load_bdf_font (struct frame *f, char *fontname,
uses this font. So, we set informatoin in fontp->encoding[1]
which is never used by any charset. If mapping can't be
decided, set FONT_ENCODING_NOT_DECIDED. */
- fontp->encoding[1] = FONT_ENCODING_NOT_DECIDED;
+ fontp->encoding_type = FONT_ENCODING_NOT_DECIDED;
fontp->baseline_offset = bdf_font->yoffset;
fontp->relative_compose = bdf_font->relative_compose;
fontp->default_ascent = bdf_font->default_ascent;
diff --git a/src/w32console.c b/src/w32console.c
index 0fc652bf55f..b660d74f852 100644
--- a/src/w32console.c
+++ b/src/w32console.c
@@ -32,7 +32,7 @@ Boston, MA 02110-1301, USA.
#include <string.h>
#include "lisp.h"
-#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "disptab.h"
#include "termhooks.h"
diff --git a/src/w32fns.c b/src/w32fns.c
index 66cac34b2d9..b548c2c1efe 100644
--- a/src/w32fns.c
+++ b/src/w32fns.c
@@ -29,22 +29,23 @@ Boston, MA 02110-1301, USA. */
#include <errno.h>
#include "lisp.h"
-#include "charset.h"
-#include "dispextern.h"
#include "w32term.h"
-#include "keyboard.h"
#include "frame.h"
#include "window.h"
#include "buffer.h"
-#include "fontset.h"
#include "intervals.h"
+#include "dispextern.h"
+#include "keyboard.h"
#include "blockinput.h"
#include "epaths.h"
-#include "w32heap.h"
-#include "termhooks.h"
+#include "character.h"
+#include "charset.h"
#include "coding.h"
#include "ccl.h"
+#include "fontset.h"
#include "systime.h"
+#include "termhooks.h"
+#include "w32heap.h"
#include "bitmaps/gray.xbm"
@@ -4247,7 +4248,7 @@ This function is an internal primitive--use `make-frame' instead. */)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -4645,6 +4646,8 @@ w32_load_system_font (f,fontname,size)
fontp->average_width = font->tm.tmAveCharWidth;
}
+
+ fontp->charset = -1;
charset = xlfd_charset_of_font (fontname);
/* Cache the W32 codepage for a font. This makes w32_encode_char
@@ -4671,7 +4674,7 @@ w32_load_system_font (f,fontname,size)
(0:0x20..0x7F, 1:0xA0..0xFF,
(0:0x2020..0x7F7F, 1:0xA0A0..0xFFFF, 3:0x20A0..0x7FFF,
2:0xA020..0xFF7F). For the moment, we don't know which charset
- uses this font. So, we set information in fontp->encoding[1]
+ uses this font. So, we set information in fontp->encoding_type
which is never used by any charset. If mapping can't be
decided, set FONT_ENCODING_NOT_DECIDED. */
@@ -4679,9 +4682,9 @@ w32_load_system_font (f,fontname,size)
type FONT_ENCODING_NOT_DECIDED. */
encoding = strrchr (fontp->name, '-');
if (encoding && strnicmp (encoding+1, "sjis", 4) == 0)
- fontp->encoding[1] = 4;
+ fontp->encoding_type = 4;
else
- fontp->encoding[1] = FONT_ENCODING_NOT_DECIDED;
+ fontp->encoding_type = FONT_ENCODING_NOT_DECIDED;
/* The following three values are set to 0 under W32, which is
what they get set to if XGetFontProperty fails under X. */
@@ -4835,12 +4838,16 @@ x_to_w32_charset (lpcs)
if (strncmp (lpcs, "*-#", 3) == 0)
return atoi (lpcs + 3);
+ /* All Windows fonts qualify as unicode. */
+ if (!strncmp (lpcs, "iso10646", 8))
+ return DEFAULT_CHARSET;
+
/* Handle wildcards by ignoring them; eg. treat "big5*-*" as "big5". */
charset = alloca (len + 1);
strcpy (charset, lpcs);
lpcs = strchr (charset, '*');
if (lpcs)
- *lpcs = 0;
+ *lpcs = '\0';
/* Look through w32-charset-info-alist for the character set.
Format of each entry is
@@ -4908,11 +4915,26 @@ x_to_w32_charset (lpcs)
static char *
-w32_to_x_charset (fncharset)
+w32_to_x_charset (fncharset, matching)
int fncharset;
+ char *matching;
{
static char buf[32];
Lisp_Object charset_type;
+ int match_len = 0;
+
+ if (matching)
+ {
+ /* If fully specified, accept it as it is. Otherwise use a
+ substring match. */
+ char *wildcard = strchr (matching, '*');
+ if (wildcard)
+ *wildcard = '\0';
+ else if (strchr (matching, '-'))
+ return matching;
+
+ match_len = strlen (matching);
+ }
switch (fncharset)
{
@@ -4997,6 +5019,7 @@ w32_to_x_charset (fncharset)
{
Lisp_Object rest;
char * best_match = NULL;
+ int matching_found = 0;
/* Look through w32-charset-info-alist for the character set.
Prefer ISO codepages, and prefer lower numbers in the ISO
@@ -5032,12 +5055,34 @@ w32_to_x_charset (fncharset)
/* If we don't have a match already, then this is the
best. */
if (!best_match)
- best_match = x_charset;
- /* If this is an ISO codepage, and the best so far isn't,
- then this is better. */
- else if (strnicmp (best_match, "iso", 3) != 0
- && strnicmp (x_charset, "iso", 3) == 0)
- best_match = x_charset;
+ {
+ best_match = x_charset;
+ if (matching && !strnicmp (x_charset, matching, match_len))
+ matching_found = 1;
+ }
+ /* If we already found a match for MATCHING, then
+ only consider other matches. */
+ else if (matching_found
+ && strnicmp (x_charset, matching, match_len))
+ continue;
+ /* If this matches what we want, and the best so far doesn't,
+ then this is better. */
+ else if (!matching_found && matching
+ && !strnicmp (x_charset, matching, match_len))
+ {
+ best_match = x_charset;
+ matching_found = 1;
+ }
+ /* If this is fully specified, and the best so far isn't,
+ then this is better. */
+ else if ((!strchr (best_match, '-') && strchr (x_charset, '-'))
+ /* If this is an ISO codepage, and the best so far isn't,
+ then this is better, but only if it fully specifies the
+ encoding. */
+ || (strnicmp (best_match, "iso", 3) != 0
+ && strnicmp (x_charset, "iso", 3) == 0
+ && strchr (x_charset, '-')))
+ best_match = x_charset;
/* If both are ISO8859 codepages, choose the one with the
lowest number in the encoding field. */
else if (strnicmp (best_match, "iso8859-", 8) == 0
@@ -5058,7 +5103,18 @@ w32_to_x_charset (fncharset)
return buf;
}
- strncpy(buf, best_match, 31);
+ strncpy (buf, best_match, 31);
+ /* If the charset is not fully specified, put -0 on the end. */
+ if (!strchr (best_match, '-'))
+ {
+ int pos = strlen (best_match);
+ /* Charset specifiers shouldn't be very long. If it is a made
+ up one, truncating it should not do any harm since it isn't
+ recognized anyway. */
+ if (pos > 29)
+ pos = 29;
+ strcpy (buf + pos, "-0");
+ }
buf[31] = '\0';
return buf;
}
@@ -5158,7 +5214,8 @@ w32_to_all_x_charsets (fncharset)
{
Lisp_Object rest;
/* Look through w32-charset-info-alist for the character set.
- Only return charsets for codepages which are installed.
+ Only return fully specified charsets for codepages which are
+ installed.
Format of each entry in Vw32_charset_info_alist is
(CHARSET_NAME . (WINDOWS_CHARSET . CODEPAGE)).
@@ -5181,6 +5238,9 @@ w32_to_all_x_charsets (fncharset)
w32_charset = XCAR (XCDR (this_entry));
codepage = XCDR (XCDR (this_entry));
+ if (!strchr (SDATA (x_charset), '-'))
+ continue;
+
/* Look for Same charset and a valid codepage (or non-int
which means ignore). */
if (EQ (w32_charset, charset_type)
@@ -5211,9 +5271,6 @@ w32_codepage_for_font (char *fontname)
Lisp_Object codepage, entry;
char *charset_str, *charset, *end;
- if (NILP (Vw32_charset_info_alist))
- return CP_DEFAULT;
-
/* Extract charset part of font string. */
charset = xlfd_charset_of_font (fontname);
@@ -5239,6 +5296,12 @@ w32_codepage_for_font (char *fontname)
*end = '\0';
}
+ if (!strcmp (charset, "iso10646"))
+ return CP_UNICODE;
+
+ if (NILP (Vw32_charset_info_alist))
+ return CP_DEFAULT;
+
entry = Fassoc (build_string(charset), Vw32_charset_info_alist);
if (NILP (entry))
return CP_UNKNOWN;
@@ -5271,7 +5334,6 @@ w32_to_x_font (lplogfont, lpxstr, len, specific_charset)
char *fontname_dash;
int display_resy = (int) one_w32_display_info.resy;
int display_resx = (int) one_w32_display_info.resx;
- int bufsz;
struct coding_system coding;
if (!lpxstr) abort ();
@@ -5293,12 +5355,14 @@ w32_to_x_font (lplogfont, lpxstr, len, specific_charset)
coding.mode |= CODING_MODE_LAST_BLOCK;
/* We explicitely disable composition handling because selection
data should not contain any composition sequence. */
- coding.composing = COMPOSITION_DISABLED;
- bufsz = decoding_buffer_size (&coding, LF_FACESIZE);
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
+
+ coding.dst_bytes = LF_FACESIZE * 2;
+ coding.destination = (unsigned char *) xmalloc (coding.dst_bytes + 1);
+ decode_coding_c_string (&coding, lplogfont->lfFaceName,
+ strlen(lplogfont->lfFaceName), Qnil);
+ fontname = coding.destination;
- fontname = alloca(sizeof(*fontname) * bufsz);
- decode_coding (&coding, lplogfont->lfFaceName, fontname,
- strlen(lplogfont->lfFaceName), bufsz - 1);
*(fontname + coding.produced) = '\0';
/* Replace dashes with underscores so the dashes are not
@@ -5342,8 +5406,7 @@ w32_to_x_font (lplogfont, lpxstr, len, specific_charset)
((lplogfont->lfPitchAndFamily & 0x3) == VARIABLE_PITCH)
? 'p' : 'c', /* spacing */
width_pixels, /* avg width */
- specific_charset ? specific_charset
- : w32_to_x_charset (lplogfont->lfCharSet)
+ w32_to_x_charset (lplogfont->lfCharSet, specific_charset)
/* charset registry and encoding */
);
@@ -5421,26 +5484,24 @@ x_to_w32_font (lpxstr, lplogfont)
if (fields > 0 && name[0] != '*')
{
- int bufsize;
- unsigned char *buf;
-
+ Lisp_Object string = build_string (name);
setup_coding_system
(Fcheck_coding_system (Vlocale_coding_system), &coding);
- coding.src_multibyte = 1;
- coding.dst_multibyte = 0;
- /* Need to set COMPOSITION_DISABLED, otherwise Emacs crashes in
- encode_coding_iso2022 trying to dereference a null pointer. */
- coding.composing = COMPOSITION_DISABLED;
- if (coding.type == coding_type_iso2022)
- coding.flags |= CODING_FLAG_ISO_SAFE;
- bufsize = encoding_buffer_size (&coding, strlen (name));
- buf = (unsigned char *) alloca (bufsize);
- coding.mode |= CODING_MODE_LAST_BLOCK;
- encode_coding (&coding, name, buf, strlen (name), bufsize);
+ coding.mode |= (CODING_MODE_SAFE_ENCODING | CODING_MODE_LAST_BLOCK);
+ /* Disable composition/charset annotation. */
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
+ coding.dst_bytes = SCHARS (string) * 2;
+
+ coding.destination = (unsigned char *) xmalloc (coding.dst_bytes);
+ encode_coding_object (&coding, string, 0, 0,
+ SCHARS (string), SBYTES (string), Qnil);
if (coding.produced >= LF_FACESIZE)
coding.produced = LF_FACESIZE - 1;
- buf[coding.produced] = 0;
- strcpy (lplogfont->lfFaceName, buf);
+
+ coding.destination[coding.produced] = '\0';
+
+ strcpy (lplogfont->lfFaceName, coding.destination);
+ xfree (coding.destination);
}
else
{
@@ -5818,14 +5879,17 @@ enum_font_cb2 (lplf, lptm, FontType, lpef)
if (charset
&& strncmp (charset, "*-*", 3) != 0
&& lpef->logfont.lfCharSet == DEFAULT_CHARSET
- && strcmp (charset, w32_to_x_charset (DEFAULT_CHARSET)) != 0)
+ && strcmp (charset, w32_to_x_charset (DEFAULT_CHARSET, NULL)) != 0)
return 1;
}
if (charset)
charset_list = Fcons (build_string (charset), Qnil);
else
- charset_list = w32_to_all_x_charsets (lplf->elfLogFont.lfCharSet);
+ /* Always prefer unicode. */
+ charset_list
+ = Fcons (build_string ("iso10646-1"),
+ w32_to_all_x_charsets (lplf->elfLogFont.lfCharSet));
/* Loop through the charsets. */
for ( ; CONSP (charset_list); charset_list = Fcdr (charset_list))
@@ -5833,14 +5897,15 @@ enum_font_cb2 (lplf, lptm, FontType, lpef)
Lisp_Object this_charset = Fcar (charset_list);
charset = SDATA (this_charset);
+ enum_font_maybe_add_to_list (lpef, &(lplf->elfLogFont),
+ charset, width);
+
/* List bold and italic variations if w32-enable-synthesized-fonts
is non-nil and this is a plain font. */
if (w32_enable_synthesized_fonts
&& lplf->elfLogFont.lfWeight == FW_NORMAL
&& lplf->elfLogFont.lfItalic == FALSE)
{
- enum_font_maybe_add_to_list (lpef, &(lplf->elfLogFont),
- charset, width);
/* bold. */
lplf->elfLogFont.lfWeight = FW_BOLD;
enum_font_maybe_add_to_list (lpef, &(lplf->elfLogFont),
@@ -5854,9 +5919,6 @@ enum_font_cb2 (lplf, lptm, FontType, lpef)
enum_font_maybe_add_to_list (lpef, &(lplf->elfLogFont),
charset, width);
}
- else
- enum_font_maybe_add_to_list (lpef, &(lplf->elfLogFont),
- charset, width);
}
}
@@ -7269,7 +7331,7 @@ x_create_tip_frame (dpyinfo, parms, text)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -8926,6 +8988,7 @@ versions of Windows) characters. */);
find_ccl_program_func = w32_find_ccl_program;
query_font_func = w32_query_font;
set_frame_fontset_func = x_set_font;
+ get_font_repertory_func = x_get_font_repertory;
check_window_system_func = check_w32;
diff --git a/src/w32select.c b/src/w32select.c
index 99825c23911..7b394943434 100644
--- a/src/w32select.c
+++ b/src/w32select.c
@@ -100,6 +100,9 @@ static void setup_config (void);
static BOOL WINAPI enum_locale_callback (/*const*/ char* loc_string);
static UINT cp_from_locale (LCID lcid, UINT format);
static Lisp_Object coding_from_cp (UINT codepage);
+static Lisp_Object validate_coding_system (Lisp_Object coding_system);
+static void setup_windows_coding_system (Lisp_Object coding_system,
+ struct coding_system * coding);
/* A remnant from X11: Symbol for the CLIPBORD selection type. Other
@@ -213,63 +216,36 @@ convert_to_handle_as_ascii (void)
static HGLOBAL
convert_to_handle_as_coded (Lisp_Object coding_system)
{
- HGLOBAL htext = NULL, htext2;
- int nbytes;
- unsigned char *src;
+ HGLOBAL htext;
unsigned char *dst = NULL;
- int bufsize;
struct coding_system coding;
- Lisp_Object string = Qnil;
ONTRACE (fprintf (stderr, "convert_to_handle_as_coded: %s\n",
SDATA (SYMBOL_NAME (coding_system))));
- setup_coding_system (Fcheck_coding_system (coding_system), &coding);
- coding.src_multibyte = 1;
- coding.dst_multibyte = 0;
- /* Need to set COMPOSITION_DISABLED, otherwise Emacs crashes in
- encode_coding_iso2022 trying to dereference a null pointer. */
- coding.composing = COMPOSITION_DISABLED;
- if (coding.type == coding_type_iso2022)
- coding.flags |= CODING_FLAG_ISO_SAFE;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- /* Force DOS line-ends. */
- coding.eol_type = CODING_EOL_CRLF;
-
- if (SYMBOLP (coding.pre_write_conversion)
- && !NILP (Ffboundp (coding.pre_write_conversion)))
- string = run_pre_post_conversion_on_str (current_text, &coding, 1);
- else
- string = current_text;
-
- nbytes = SBYTES (string);
- src = SDATA (string);
+ setup_windows_coding_system (coding_system, &coding);
+ coding.dst_bytes = SBYTES(current_text) * 2;
+ coding.destination = (unsigned char *) xmalloc (coding.dst_bytes);
+ encode_coding_object (&coding, current_text, 0, 0,
+ SCHARS (current_text), SBYTES (current_text), Qnil);
- bufsize = encoding_buffer_size (&coding, nbytes) +2;
- htext = GlobalAlloc (GMEM_MOVEABLE | GMEM_DDESHARE, bufsize);
+ htext = GlobalAlloc (GMEM_MOVEABLE | GMEM_DDESHARE, coding.produced +2);
if (htext != NULL)
dst = (unsigned char *) GlobalLock (htext);
if (dst != NULL)
{
- encode_coding (&coding, src, dst, nbytes, bufsize-2);
+ memcpy (dst, coding.destination, coding.produced);
/* Add the string terminator. Add two NULs in case we are
producing Unicode here. */
dst[coding.produced] = dst[coding.produced+1] = '\0';
- }
-
- if (dst != NULL)
- GlobalUnlock (htext);
- if (htext != NULL)
- {
- /* Shrink data block to actual size. */
- htext2 = GlobalReAlloc (htext, coding.produced+2,
- GMEM_MOVEABLE | GMEM_DDESHARE);
- if (htext2 != NULL) htext = htext2;
+ GlobalUnlock (htext);
}
+ xfree (coding.destination);
+
return htext;
}
@@ -518,17 +494,26 @@ setup_config (void)
const char *cp;
char *end;
int slen;
- Lisp_Object new_coding_system;
+ Lisp_Object coding_system;
+ Lisp_Object dos_coding_system;
CHECK_SYMBOL (Vselection_coding_system);
- /* Check if we have it cached */
- new_coding_system = NILP (Vnext_selection_coding_system) ?
+ coding_system = NILP (Vnext_selection_coding_system) ?
Vselection_coding_system : Vnext_selection_coding_system;
+
+ dos_coding_system = validate_coding_system (coding_system);
+ if (NILP (dos_coding_system))
+ Fsignal (Qerror,
+ list2 (build_string ("Coding system is invalid or doesn't have "
+ "an eol variant for dos line ends"),
+ coding_system));
+
+ /* Check if we have it cached */
if (!NILP (cfg_coding_system)
- && EQ (cfg_coding_system, new_coding_system))
+ && EQ (cfg_coding_system, dos_coding_system))
return;
- cfg_coding_system = new_coding_system;
+ cfg_coding_system = dos_coding_system;
/* Set some sensible fallbacks */
cfg_codepage = ANSICP;
@@ -637,12 +622,61 @@ coding_from_cp (UINT codepage)
char buffer[30];
sprintf (buffer, "cp%d-dos", (int) codepage);
return intern (buffer);
- /* We don't need to check that this coding system exists right here,
- because that is done when the coding system is actually
- instantiated, i.e. it is passed through Fcheck_coding_system()
- there. */
+ /* We don't need to check that this coding system actually exists
+ right here, because that is done later for all coding systems
+ used, regardless of where they originate. */
}
+static Lisp_Object
+validate_coding_system (Lisp_Object coding_system)
+{
+ Lisp_Object eol_type;
+
+ /* Make sure the input is valid. */
+ if (NILP (Fcoding_system_p (coding_system)))
+ return Qnil;
+
+ /* Make sure we use a DOS coding system as mandated by the system
+ specs. */
+ eol_type = Fcoding_system_eol_type (coding_system);
+
+ /* Already a DOS coding system? */
+ if (EQ (eol_type, make_number (1)))
+ return coding_system;
+
+ /* Get EOL_TYPE vector of the base of CODING_SYSTEM. */
+ if (!VECTORP (eol_type))
+ {
+ eol_type = Fcoding_system_eol_type (Fcoding_system_base (coding_system));
+ if (!VECTORP (eol_type))
+ return Qnil;
+ }
+
+ return AREF (eol_type, 1);
+}
+
+static void
+setup_windows_coding_system (Lisp_Object coding_system,
+ struct coding_system * coding)
+{
+ memset (coding, 0, sizeof (*coding));
+ setup_coding_system (coding_system, coding);
+
+ /* Unset CODING_ANNOTATE_COMPOSITION_MASK. Previous code had
+ comments about crashes in encode_coding_iso2022 trying to
+ dereference a null pointer when composition was on. Selection
+ data should not contain any composition sequence on Windows.
+
+ CODING_ANNOTATION_MASK also includes
+ CODING_ANNOTATE_DIRECTION_MASK and CODING_ANNOTATE_CHARSET_MASK,
+ which both apply to ISO6429 only. We don't know if these really
+ need to be unset on Windows, but it probably doesn't hurt
+ either. */
+ coding->mode &= ~CODING_ANNOTATION_MASK;
+ coding->mode |= CODING_MODE_LAST_BLOCK | CODING_MODE_SAFE_ENCODING;
+}
+
+
DEFUN ("w32-set-clipboard-data", Fw32_set_clipboard_data,
Sw32_set_clipboard_data, 1, 2, 0,
@@ -847,10 +881,9 @@ DEFUN ("w32-get-clipboard-data", Fw32_get_clipboard_data,
if (require_decoding)
{
- int bufsize;
- unsigned char *buf;
struct coding_system coding;
Lisp_Object coding_system = Qnil;
+ Lisp_Object dos_coding_system;
/* `next-selection-coding-system' should override everything,
even when the locale passed by the system disagrees. The
@@ -912,27 +945,16 @@ DEFUN ("w32-get-clipboard-data", Fw32_get_clipboard_data,
coding_system = Vselection_coding_system;
Vnext_selection_coding_system = Qnil;
- setup_coding_system (Fcheck_coding_system (coding_system), &coding);
- coding.src_multibyte = 0;
- coding.dst_multibyte = 1;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- /* We explicitely disable composition handling because
- selection data should not contain any composition
- sequence. */
- coding.composing = COMPOSITION_DISABLED;
- /* Force DOS line-ends. */
- coding.eol_type = CODING_EOL_CRLF;
-
- bufsize = decoding_buffer_size (&coding, nbytes);
- buf = (unsigned char *) xmalloc (bufsize);
- decode_coding (&coding, src, buf, nbytes, bufsize);
- Vlast_coding_system_used = coding.symbol;
- ret = make_string_from_bytes ((char *) buf,
- coding.produced_char, coding.produced);
- xfree (buf);
- if (SYMBOLP (coding.post_read_conversion)
- && !NILP (Ffboundp (coding.post_read_conversion)))
- ret = run_pre_post_conversion_on_str (ret, &coding, 0);
+ dos_coding_system = validate_coding_system (coding_system);
+ if (!NILP (dos_coding_system))
+ {
+ setup_windows_coding_system (dos_coding_system, &coding);
+ coding.source = src;
+ decode_coding_object (&coding, Qnil, 0, 0, nbytes, nbytes, Qt);
+ ret = coding.dst_object;
+
+ Vlast_coding_system_used = CODING_ID_NAME (coding.id);
+ }
}
else
{
@@ -1017,10 +1039,11 @@ and t is the same as `SECONDARY'. */)
{
Lisp_Object val = Qnil;
+ setup_config ();
+
if (OpenClipboard (NULL))
{
UINT format = 0;
- setup_config ();
while ((format = EnumClipboardFormats (format)))
/* Check CF_TEXT in addition to cfg_clipboard_type,
because we can fall back on that if CF_UNICODETEXT is
diff --git a/src/w32term.c b/src/w32term.c
index 294059aa77b..ddc15e77259 100644
--- a/src/w32term.c
+++ b/src/w32term.c
@@ -24,25 +24,21 @@ Boston, MA 02110-1301, USA. */
#include <stdio.h>
#include <stdlib.h>
#include "lisp.h"
-#include "charset.h"
#include "blockinput.h"
-
-#include "w32heap.h"
#include "w32term.h"
-#include "w32bdf.h"
-#include <shellapi.h>
#include "systty.h"
#include "systime.h"
-#include "atimer.h"
-#include "keymap.h"
#include <ctype.h>
#include <errno.h>
#include <setjmp.h>
#include <sys/stat.h>
-#include "keyboard.h"
+#include "charset.h"
+#include "character.h"
+#include "coding.h"
+#include "ccl.h"
#include "frame.h"
#include "dispextern.h"
#include "fontset.h"
@@ -53,9 +49,15 @@ Boston, MA 02110-1301, USA. */
#include "disptab.h"
#include "buffer.h"
#include "window.h"
+#include "keyboard.h"
#include "intervals.h"
-#include "composite.h"
-#include "coding.h"
+#include "process.h"
+#include "atimer.h"
+#include "keymap.h"
+
+#include "w32heap.h"
+#include "w32bdf.h"
+#include <shellapi.h>
#define abs(x) ((x) < 0 ? -(x) : (x))
@@ -116,6 +118,31 @@ struct w32_display_info *x_display_list;
FONT-LIST-CACHE records previous values returned by x-list-fonts. */
Lisp_Object w32_display_name_list;
+
+#ifndef GLYPHSET
+/* Pre Windows 2000, this was not available, but define it here so
+ that Emacs compiled on such a platform will run on newer versions. */
+
+typedef struct tagWCRANGE
+{
+ WCHAR wcLow;
+ USHORT cGlyphs;
+} WCRANGE;
+
+typedef struct tagGLYPHSET
+{
+ DWORD cbThis;
+ DWORD flAccel;
+ DWORD cGlyphsSupported;
+ DWORD cRanges;
+ WCRANGE ranges[1];
+} GLYPHSET;
+
+#endif
+
+/* Dynamic linking to GetFontUnicodeRanges (not available on 95, 98, ME). */
+DWORD (PASCAL *pfnGetFontUnicodeRanges) (HDC device, GLYPHSET *ranges);
+
/* Frame being updated by update_frame. This is declared in term.c.
This is set by update_begin and looked at by all the
w32 functions. It is zero while not inside an update.
@@ -817,7 +844,8 @@ w32_reset_terminal_modes (void)
/* Function prototypes of this page. */
XCharStruct *w32_per_char_metric P_ ((XFontStruct *, wchar_t *, int));
-static int w32_encode_char P_ ((int, wchar_t *, struct font_info *, int *));
+static int w32_encode_char P_ ((int, wchar_t *, struct font_info *,
+ struct charset *, int *));
/* Get metrics of character CHAR2B in FONT. Value is always non-null.
@@ -1081,13 +1109,13 @@ w32_use_unicode_for_codepage (codepage)
the two-byte form of C. Encoding is returned in *CHAR2B. */
static int /* enum w32_char_font_type */
-w32_encode_char (c, char2b, font_info, two_byte_p)
+w32_encode_char (c, char2b, font_info, charset, two_byte_p)
int c;
wchar_t *char2b;
struct font_info *font_info;
+ struct charset *charset;
int * two_byte_p;
{
- int charset = CHAR_CHARSET (c);
int codepage;
int unicode_p = 0;
int internal_two_byte_p = 0;
@@ -1095,29 +1123,39 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
XFontStruct *font = font_info->font;
internal_two_byte_p = w32_font_is_double_byte (font);
+ codepage = font_info->codepage;
+
+ /* If font can output unicode, use the original unicode character. */
+ if ( font && !font->bdf && w32_use_unicode_for_codepage (codepage)
+ && c >= 0x100)
+ {
+ *char2b = c;
+ unicode_p = 1;
+ internal_two_byte_p = 1;
+ }
/* FONT_INFO may define a scheme by which to encode byte1 and byte2.
This may be either a program in a special encoder language or a
fixed encoding. */
- if (font_info->font_encoder)
+ else if (font_info->font_encoder)
{
/* It's a program. */
struct ccl_program *ccl = font_info->font_encoder;
if (CHARSET_DIMENSION (charset) == 1)
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = XCHAR2B_BYTE2 (char2b);
ccl->reg[2] = -1;
}
else
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = XCHAR2B_BYTE1 (char2b);
ccl->reg[2] = XCHAR2B_BYTE2 (char2b);
}
- ccl_driver (ccl, NULL, NULL, 0, 0, NULL);
+ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil);
/* We assume that MSBs are appropriately set/reset by CCL
program. */
@@ -1126,50 +1164,26 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
else
STORE_XCHAR2B (char2b, ccl->reg[1], ccl->reg[2]);
}
- else if (font_info->encoding[charset])
+ else if (font_info->encoding_type)
{
/* Fixed encoding scheme. See fontset.h for the meaning of the
encoding numbers. */
- int enc = font_info->encoding[charset];
+ unsigned char enc = font_info->encoding_type;
if ((enc == 1 || enc == 2)
&& CHARSET_DIMENSION (charset) == 2)
STORE_XCHAR2B (char2b, XCHAR2B_BYTE1 (char2b) | 0x80, XCHAR2B_BYTE2 (char2b));
- if (enc == 1 || enc == 3
- || (enc == 4 && CHARSET_DIMENSION (charset) == 1))
+ if (enc == 1 || enc == 3 || (enc == 4 && CHARSET_DIMENSION (charset) == 1))
STORE_XCHAR2B (char2b, XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b) | 0x80);
else if (enc == 4)
{
- int sjis1, sjis2;
+ int code = (int) (*char2b);
- ENCODE_SJIS (XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b),
- sjis1, sjis2);
- STORE_XCHAR2B (char2b, sjis1, sjis2);
+ JIS_TO_SJIS (code);
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
}
}
- codepage = font_info->codepage;
-
- /* If charset is not ASCII or Latin-1, may need to move it into
- Unicode space. */
- if ( font && !font->bdf && w32_use_unicode_for_codepage (codepage)
- && charset != CHARSET_ASCII && charset != charset_latin_iso8859_1
- && charset != CHARSET_8_BIT_CONTROL && charset != CHARSET_8_BIT_GRAPHIC)
- {
- char temp[3];
- temp[0] = XCHAR2B_BYTE1 (char2b);
- temp[1] = XCHAR2B_BYTE2 (char2b);
- temp[2] = '\0';
- if (codepage != CP_UNICODE)
- {
- if (temp[0])
- MultiByteToWideChar (codepage, 0, temp, 2, char2b, 1);
- else
- MultiByteToWideChar (codepage, 0, temp+1, 1, char2b, 1);
- }
- unicode_p = 1;
- internal_two_byte_p = 1;
- }
if (two_byte_p)
*two_byte_p = internal_two_byte_p;
@@ -1187,6 +1201,143 @@ w32_encode_char (c, char2b, font_info, two_byte_p)
}
+/* Return a char-table whose elements are t if the font FONT_INFO
+ contains a glyph for the corresponding character, and nil if not.
+
+ Fixme: For the moment, this function works only for fonts whose
+ glyph encoding is the same as Unicode (e.g. ISO10646-1 fonts). */
+
+Lisp_Object
+x_get_font_repertory (f, font_info)
+ FRAME_PTR f;
+ struct font_info *font_info;
+{
+ XFontStruct *font = (XFontStruct *) font_info->font;
+ Lisp_Object table;
+ int min_byte1, max_byte1, min_byte2, max_byte2;
+
+ table = Fmake_char_table (Qnil, Qnil);
+
+ if (!font->bdf && pfnGetFontUnicodeRanges)
+ {
+ GLYPHSET *glyphset;
+ DWORD glyphset_size;
+ HDC display = get_frame_dc (f);
+ HFONT prev_font;
+ int i;
+
+ prev_font = SelectObject (display, font->hfont);
+
+ /* First call GetFontUnicodeRanges to find out how big a structure
+ we need. */
+ glyphset_size = pfnGetFontUnicodeRanges (display, NULL);
+ if (glyphset_size)
+ {
+ glyphset = (GLYPHSET *) alloca (glyphset_size);
+ glyphset->cbThis = glyphset_size;
+
+ /* Now call it again to get the ranges. */
+ glyphset_size = pfnGetFontUnicodeRanges (display, glyphset);
+
+ if (glyphset_size)
+ {
+ /* Store the ranges in TABLE. */
+ for (i = 0; i < glyphset->cRanges; i++)
+ {
+ int from = glyphset->ranges[i].wcLow;
+ int to = from + glyphset->ranges[i].cGlyphs - 1;
+ char_table_set_range (table, from, to, Qt);
+ }
+ }
+ }
+
+ SelectObject (display, prev_font);
+ release_frame_dc (f, display);
+
+ /* If we got the information we wanted above, then return it. */
+ if (glyphset_size)
+ return table;
+ }
+
+#if 0 /* TODO: Convert to work on Windows so BDF and older platforms work. */
+ /* When GetFontUnicodeRanges is not available or does not work,
+ work it out manually. */
+ min_byte1 = font->min_byte1;
+ max_byte1 = font->max_byte1;
+ min_byte2 = font->min_char_or_byte2;
+ max_byte2 = font->max_char_or_byte2;
+ if (min_byte1 == 0 && max_byte1 == 0)
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ char_table_set_range (table, min_byte2, max_byte2, Qt);
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int from = -1;
+ int i;
+
+ for (i = min_byte2; i <= max_byte2; i++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ char_table_set_range (table, from, i - 1, Qt);
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = i;
+ }
+ if (from >= 0)
+ char_table_set_range (table, from, i - 1, Qt);
+ }
+ }
+ else
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ {
+ int i;
+
+ for (i = min_byte1; i <= max_byte1; i++)
+ char_table_set_range (table,
+ (i << 8) | min_byte2, (i << 8) | max_byte2,
+ Qt);
+ }
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int i;
+
+ for (i = min_byte1; i <= max_byte1; i++)
+ {
+ int from = -1;
+ int j;
+
+ for (j = min_byte2; j <= max_byte2; j++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ char_table_set_range (table, (i << 8) | from,
+ (i << 8) | (j - 1), Qt);
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = j;
+ }
+ if (from >= 0)
+ char_table_set_range (table, (i << 8) | from,
+ (i << 8) | (j - 1), Qt);
+ }
+ }
+ }
+#endif
+ return table;
+}
+
/***********************************************************************
Glyph display
@@ -1316,9 +1467,9 @@ x_set_mouse_face_gc (s)
face = FACE_FROM_ID (s->f, MOUSE_FACE_ID);
if (s->first_glyph->type == CHAR_GLYPH)
- face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch);
+ face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch, -1, Qnil);
else
- face_id = FACE_FOR_CHAR (s->f, face, 0);
+ face_id = FACE_FOR_CHAR (s->f, face, 0, -1, Qnil);
s->face = FACE_FROM_ID (s->f, face_id);
PREPARE_FACE_FOR_DISPLAY (s->f, s->face);
@@ -5216,11 +5367,16 @@ x_new_font (f, fontname)
register char *fontname;
{
struct font_info *fontp
- = FS_LOAD_FONT (f, 0, fontname, -1);
+ = FS_LOAD_FONT (f, fontname);
if (!fontp)
return Qnil;
+ if (FRAME_FONT (f) == (XFontStruct *) (fontp->font))
+ /* This font is already set in frame F. There's nothing more to
+ do. */
+ return build_string (fontp->full_name);
+
FRAME_FONT (f) = (XFontStruct *) (fontp->font);
FRAME_BASELINE_OFFSET (f) = fontp->baseline_offset;
FRAME_FONTSET (f) = -1;
@@ -5254,37 +5410,49 @@ x_new_font (f, fontname)
return build_string (fontp->full_name);
}
-/* Give frame F the fontset named FONTSETNAME as its default font, and
- return the full name of that fontset. FONTSETNAME may be a wildcard
- pattern; in that case, we choose some fontset that fits the pattern.
- The return value shows which fontset we chose. */
+/* Give frame F the fontset named FONTSETNAME as its default fontset,
+ and return the full name of that fontset. FONTSETNAME may be a
+ wildcard pattern; in that case, we choose some fontset that fits
+ the pattern. FONTSETNAME may be a font name for ASCII characters;
+ in that case, we create a fontset from that font name.
+
+ The return value shows which fontset we chose.
+ If FONTSETNAME specifies the default fontset, return Qt.
+ If an ASCII font in the specified fontset can't be loaded, return
+ Qnil. */
Lisp_Object
x_new_fontset (f, fontsetname)
struct frame *f;
- char *fontsetname;
+ Lisp_Object fontsetname;
{
- int fontset = fs_query_fontset (build_string (fontsetname), 0);
+ int fontset = fs_query_fontset (fontsetname, 0);
Lisp_Object result;
- if (fontset < 0)
- return Qnil;
-
- if (FRAME_FONTSET (f) == fontset)
+ if (fontset > 0 && FRAME_FONTSET(f) == fontset)
/* This fontset is already set in frame F. There's nothing more
to do. */
return fontset_name (fontset);
+ else if (fontset == 0)
+ /* The default fontset can't be the default font. */
+ return Qt;
- result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ if (fontset > 0)
+ result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ else
+ result = x_new_font (f, SDATA (fontsetname));
if (!STRINGP (result))
/* Can't load ASCII font. */
return Qnil;
+ if (fontset < 0)
+ fontset = new_fontset_from_font_name (result);
+
/* Since x_new_font doesn't update any fontset information, do it now. */
FRAME_FONTSET(f) = fontset;
- return build_string (fontsetname);
+ return fontset_name (fontset);
}
@@ -6390,13 +6558,22 @@ w32_initialize ()
AttachThreadInput (dwMainThreadId, dwWindowsThreadId, TRUE);
#endif
- /* Load system settings. */
+ /* Dynamically link to optional system components. */
{
UINT smoothing_type;
BOOL smoothing_enabled;
- /* If using proportional scroll bars, ensure handle is at least 5 pixels;
- otherwise use the fixed height. */
+ HANDLE gdi_lib = LoadLibrary ("gdi32.dll");
+
+#define LOAD_PROC(lib, fn) pfn##fn = (void *) GetProcAddress (lib, #fn)
+
+ LOAD_PROC (gdi_lib, GetFontUnicodeRanges);
+
+#undef LOAD_PROC
+
+ FreeLibrary (gdi_lib);
+
+ /* Ensure scrollbar handle is at least 5 pixels. */
vertical_scroll_bar_min_handle = 5;
/* For either kind of scroll bar, take account of the arrows; these
diff --git a/src/w32term.h b/src/w32term.h
index 5dcd381b576..c9d7fc375ac 100644
--- a/src/w32term.h
+++ b/src/w32term.h
@@ -260,6 +260,8 @@ extern Lisp_Object w32_list_fonts P_ ((struct frame *, Lisp_Object, int, int));
extern struct font_info *w32_get_font_info (), *w32_query_font ();
extern void w32_cache_char_metrics (XFontStruct *font);
extern void w32_find_ccl_program();
+extern Lisp_Object x_get_font_repertory P_ ((struct frame *,
+ struct font_info *));
#define PIX_TYPE COLORREF
diff --git a/src/window.c b/src/window.c
index 5a7655999f5..2d15cdc7e4e 100644
--- a/src/window.c
+++ b/src/window.c
@@ -3609,7 +3609,7 @@ displayed. */)
window = Fsplit_window (window, Qnil, Qnil);
else
{
- Lisp_Object upper, lower, other;
+ Lisp_Object upper, other;
window = Fget_lru_window (frames, Qt);
/* If the LRU window is selected, and big enough,
@@ -3646,11 +3646,11 @@ displayed. */)
window = Fframe_selected_window (call0 (Vpop_up_frame_function));
/* If window appears above or below another,
even out their heights. */
- other = upper = lower = Qnil;
+ other = upper = Qnil;
if (!NILP (XWINDOW (window)->prev))
- other = upper = XWINDOW (window)->prev, lower = window;
+ other = upper = XWINDOW (window)->prev;
if (!NILP (XWINDOW (window)->next))
- other = lower = XWINDOW (window)->next, upper = window;
+ other = XWINDOW (window)->next, upper = window;
if (!NILP (other)
&& !NILP (Veven_window_heights)
/* Check that OTHER and WINDOW are vertically arrayed. */
diff --git a/src/xdisp.c b/src/xdisp.c
index 22be60f7eb8..fbd61f7e2be 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -177,6 +177,7 @@ Boston, MA 02110-1301, USA. */
#include "termchar.h"
#include "dispextern.h"
#include "buffer.h"
+#include "character.h"
#include "charset.h"
#include "indent.h"
#include "commands.h"
@@ -201,6 +202,12 @@ Boston, MA 02110-1301, USA. */
#include "macterm.h"
#endif
+#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+#endif /* HAVE_WINDOW_SYSTEM */
+
#ifndef FRAME_X_OUTPUT
#define FRAME_X_OUTPUT(f) ((f)->output_data.x)
#endif
@@ -742,11 +749,13 @@ static enum prop_handled handle_display_prop P_ ((struct it *));
static enum prop_handled handle_composition_prop P_ ((struct it *));
static enum prop_handled handle_overlay_change P_ ((struct it *));
static enum prop_handled handle_fontified_prop P_ ((struct it *));
+static enum prop_handled handle_auto_composed_prop P_ ((struct it *));
/* Properties handled by iterators. */
static struct props it_props[] =
{
+ {&Qauto_composed, AUTO_COMPOSED_PROP_IDX, handle_auto_composed_prop},
{&Qfontified, FONTIFIED_PROP_IDX, handle_fontified_prop},
/* Handle `face' before `display' because some sub-properties of
`display' need to know the face. */
@@ -3475,7 +3484,7 @@ face_before_or_after_it_pos (it, before_p)
struct face *face = FACE_FROM_ID (it->f, face_id);
c = string_char_and_length (p, rest, &len);
- face_id = FACE_FOR_CHAR (it->f, face, c);
+ face_id = FACE_FOR_CHAR (it->f, face, c, CHARPOS (pos), it->string);
}
}
else
@@ -3514,7 +3523,7 @@ face_before_or_after_it_pos (it, before_p)
{
int c = FETCH_MULTIBYTE_CHAR (BYTEPOS (pos));
struct face *face = FACE_FROM_ID (it->f, face_id);
- face_id = FACE_FOR_CHAR (it->f, face, c);
+ face_id = FACE_FOR_CHAR (it->f, face, c, CHARPOS (pos), Qnil);
}
}
@@ -4089,7 +4098,7 @@ handle_single_display_spec (it, spec, object, position,
{
Lisp_Object face_name = XCAR (XCDR (XCDR (spec)));
int face_id2 = lookup_derived_face (it->f, face_name,
- 'A', FRINGE_FACE_ID, 0);
+ FRINGE_FACE_ID, 0);
if (face_id2 >= 0)
face_id = face_id2;
}
@@ -4440,6 +4449,90 @@ string_buffer_position (w, string, around_charpos)
`composition' property
***********************************************************************/
+static enum prop_handled
+handle_auto_composed_prop (it)
+ struct it *it;
+{
+ enum prop_handled handled = HANDLED_NORMALLY;
+
+ if (FUNCTIONP (Vauto_composition_function))
+ {
+ Lisp_Object val;
+ EMACS_INT pos, this_pos;
+
+ if (STRINGP (it->string))
+ pos = IT_STRING_CHARPOS (*it);
+ else
+ pos = IT_CHARPOS (*it);
+ this_pos = pos;
+
+ val =Fget_char_property (make_number (pos), Qauto_composed, it->string);
+ if (! NILP (val))
+ {
+ Lisp_Object limit = Qnil, next;
+
+ /* As Fnext_single_char_property_change is very slow, we
+ limit the search to the current line. */
+ if (STRINGP (it->string))
+ limit = make_number (SCHARS (it->string));
+ else
+ limit = make_number (find_next_newline_no_quit (pos, 1));
+
+ next = (Fnext_single_property_change
+ (make_number (pos), Qauto_composed, it->string, limit));
+ if (XINT (next) < XINT (limit))
+ {
+ /* The current point is auto-composed, but there exist
+ characters not yet composed beyond the auto-composed
+ region. There's a possiblity that the last
+ characters in the region may be newly composed. */
+ int charpos = XINT (next) - 1, bytepos, c;
+
+ if (STRINGP (it->string))
+ {
+ bytepos = string_char_to_byte (it->string, charpos);
+ c = SDATA (it->string)[bytepos];
+ }
+ else
+ {
+ bytepos = CHAR_TO_BYTE (charpos);
+ c = FETCH_BYTE (bytepos);
+ }
+ if (c != '\n')
+ /* If the last character is not newline, it may be
+ composed with the following characters. */
+ val = Qnil, pos = charpos + 1;
+ }
+ }
+ if (NILP (val))
+ {
+ int count = SPECPDL_INDEX ();
+ Lisp_Object args[3];
+
+ args[0] = Vauto_composition_function;
+ specbind (Qauto_composition_function, Qnil);
+ args[1] = make_number (pos);
+ args[2] = it->string;
+ safe_call (3, args);
+ unbind_to (count, Qnil);
+
+ if (this_pos == pos)
+ {
+ val = Fget_char_property (args[1], Qauto_composed, it->string);
+ /* Return HANDLED_RECOMPUTE_PROPS only if function composed
+ something. This avoids an endless loop if they failed to
+ fontify the text for which reason ever. */
+ if (! NILP (val))
+ handled = HANDLED_RECOMPUTE_PROPS;
+ }
+ else
+ handled = HANDLED_RECOMPUTE_PROPS;
+ }
+ }
+
+ return handled;
+}
+
/* Set up iterator IT from `composition' property at its current
position. Called from handle_stop. */
@@ -4448,7 +4541,7 @@ handle_composition_prop (it)
struct it *it;
{
Lisp_Object prop, string;
- int pos, pos_byte, end;
+ EMACS_INT pos, pos_byte, start, end;
enum prop_handled handled = HANDLED_NORMALLY;
if (STRINGP (it->string))
@@ -4467,11 +4560,20 @@ handle_composition_prop (it)
/* If there's a valid composition and point is not inside of the
composition (in the case that the composition is from the current
buffer), draw a glyph composed from the composition components. */
- if (find_composition (pos, -1, &pos, &end, &prop, string)
- && COMPOSITION_VALID_P (pos, end, prop)
- && (STRINGP (it->string) || (PT <= pos || PT >= end)))
+ if (find_composition (pos, -1, &start, &end, &prop, string)
+ && COMPOSITION_VALID_P (start, end, prop)
+ && (STRINGP (it->string) || (PT <= start || PT >= end)))
{
- int id = get_composition_id (pos, pos_byte, end - pos, prop, string);
+ int id;
+
+ if (start != pos)
+ {
+ if (STRINGP (it->string))
+ pos_byte = string_char_to_byte (it->string, start);
+ else
+ pos_byte = CHAR_TO_BYTE (start);
+ }
+ id = get_composition_id (start, pos_byte, end - start, prop, string);
if (id >= 0)
{
@@ -4500,6 +4602,11 @@ handle_composition_prop (it)
it->method = GET_FROM_COMPOSITION;
it->cmp_id = id;
it->cmp_len = COMPOSITION_LENGTH (prop);
+#ifdef USE_FONT_BACKEND
+ if (composition_table[id]->method == COMPOSITION_WITH_GLYPH_STRING)
+ it->c = ' ';
+ else
+#endif /* USE_FONT_BACKEND */
/* For a terminal, draw only the first character of the
components. */
it->c = COMPOSITION_GLYPH (composition_table[id], 0);
@@ -5519,31 +5626,26 @@ get_next_display_element (it)
the translation. This could easily be changed but I
don't believe that it is worth doing.
- If it->multibyte_p is nonzero, eight-bit characters and
- non-printable multibyte characters are also translated to
- octal form.
+ If it->multibyte_p is nonzero, non-printable non-ASCII
+ characters are also translated to octal form.
If it->multibyte_p is zero, eight-bit characters that
don't have corresponding multibyte char code are also
translated to octal form. */
else if ((it->c < ' '
- && (it->area != TEXT_AREA
- /* In mode line, treat \n like other crl chars. */
- || (it->c != '\t'
- && it->glyph_row && it->glyph_row->mode_line_p)
- || (it->c != '\n' && it->c != '\t')))
- || (it->multibyte_p
- ? ((it->c >= 127
- && it->len == 1)
- || !CHAR_PRINTABLE_P (it->c)
+ ? (it->area != TEXT_AREA
+ /* In mode line, treat \n, \t like other crl chars. */
+ || (it->c != '\t'
+ && it->glyph_row && it->glyph_row->mode_line_p)
+ || (it->c != '\n' && it->c != '\t'))
+ : (it->multibyte_p
+ ? (!CHAR_PRINTABLE_P (it->c)
|| (!NILP (Vnobreak_char_display)
- && (it->c == 0x8a0 || it->c == 0x8ad
- || it->c == 0x920 || it->c == 0x92d
- || it->c == 0xe20 || it->c == 0xe2d
- || it->c == 0xf20 || it->c == 0xf2d)))
+ && (it->c == 0xA0 /* NO-BREAK SPACE */
+ || it->c == 0xAD /* SOFT HYPHEN */)))
: (it->c >= 127
- && (!unibyte_display_via_language_environment
- || it->c == unibyte_char_to_multibyte (it->c)))))
+ && (! unibyte_display_via_language_environment
+ || (UNIBYTE_CHAR_HAS_MULTIBYTE_P (it->c)))))))
{
/* IT->c is a control character which must be displayed
either as '\003' or as `^C' where the '\\' and '^'
@@ -5600,8 +5702,7 @@ get_next_display_element (it)
highlighting. */
if (EQ (Vnobreak_char_display, Qt)
- && (it->c == 0x8a0 || it->c == 0x920
- || it->c == 0xe20 || it->c == 0xf20))
+ && it->c == 0xA0)
{
/* Merge the no-break-space face into the current face. */
face_id = merge_faces (it->f, Qnobreak_space, 0,
@@ -5652,8 +5753,7 @@ get_next_display_element (it)
highlighting. */
if (EQ (Vnobreak_char_display, Qt)
- && (it->c == 0x8ad || it->c == 0x92d
- || it->c == 0xe2d || it->c == 0xf2d))
+ && it->c == 0xAD)
{
g = it->c = '-';
XSETINT (it->ctl_chars[0], g);
@@ -5664,13 +5764,10 @@ get_next_display_element (it)
/* Handle non-break space and soft hyphen
with the escape glyph. */
- if (it->c == 0x8a0 || it->c == 0x8ad
- || it->c == 0x920 || it->c == 0x92d
- || it->c == 0xe20 || it->c == 0xe2d
- || it->c == 0xf20 || it->c == 0xf2d)
+ if (it->c == 0xA0 || it->c == 0xAD)
{
XSETINT (it->ctl_chars[0], escape_glyph);
- g = it->c = ((it->c & 0xf) == 0 ? ' ' : '-');
+ g = it->c = (it->c == 0xA0 ? ' ' : '-');
XSETINT (it->ctl_chars[1], g);
ctl_len = 2;
goto display_control;
@@ -5682,23 +5779,27 @@ get_next_display_element (it)
int i;
/* Set IT->ctl_chars[0] to the glyph for `\\'. */
- if (SINGLE_BYTE_CHAR_P (it->c))
- str[0] = it->c, len = 1;
+ if (CHAR_BYTE8_P (it->c))
+ {
+ str[0] = CHAR_TO_BYTE8 (it->c);
+ len = 1;
+ }
+ else if (it->c < 256)
+ {
+ str[0] = it->c;
+ len = 1;
+ }
else
{
- len = CHAR_STRING_NO_SIGNAL (it->c, str);
- if (len < 0)
- {
- /* It's an invalid character, which shouldn't
- happen actually, but due to bugs it may
- happen. Let's print the char as is, there's
- not much meaningful we can do with it. */
- str[0] = it->c;
- str[1] = it->c >> 8;
- str[2] = it->c >> 16;
- str[3] = it->c >> 24;
- len = 4;
- }
+ /* It's an invalid character, which shouldn't
+ happen actually, but due to bugs it may
+ happen. Let's print the char as is, there's
+ not much meaningful we can do with it. */
+ str[0] = it->c;
+ str[1] = it->c >> 8;
+ str[2] = it->c >> 16;
+ str[3] = it->c >> 24;
+ len = 4;
}
for (i = 0; i < len; i++)
@@ -5737,7 +5838,11 @@ get_next_display_element (it)
&& FRAME_WINDOW_P (it->f))
{
struct face *face = FACE_FROM_ID (it->f, it->face_id);
- it->face_id = FACE_FOR_CHAR (it->f, face, it->c);
+ int pos = (it->s ? -1
+ : STRINGP (it->string) ? IT_STRING_CHARPOS (*it)
+ : IT_CHARPOS (*it));
+
+ it->face_id = FACE_FOR_CHAR (it->f, face, it->c, pos, it->string);
}
}
@@ -6740,6 +6845,16 @@ move_it_to (it, to_charpos, to_x, to_y, to_vpos, op)
the line. */
if (skip == MOVE_X_REACHED)
{
+ /* Wait! We can conclude that TO_Y is in the line if
+ the already scanned glyphs make the line tall enough
+ because further scanning doesn't make it shorter. */
+ line_height = it->max_ascent + it->max_descent;
+ if (to_y >= it->current_y
+ && to_y < it->current_y + line_height)
+ {
+ reached = 6;
+ break;
+ }
it_backup = *it;
TRACE_MOVE ((stderr, "move_it: from %d\n", IT_CHARPOS (*it)));
skip2 = move_it_in_display_line_to (it, to_charpos, -1,
@@ -7267,7 +7382,7 @@ message_dolog (m, nbytes, nlflag, multibyte)
for (i = 0; i < nbytes; i += char_bytes)
{
c = string_char_and_length (m + i, nbytes - i, &char_bytes);
- work[0] = (SINGLE_BYTE_CHAR_P (c)
+ work[0] = (ASCII_CHAR_P (c)
? c
: multibyte_char_to_unibyte (c, Qnil));
insert_1_both (work, 1, 1, 1, 0, 0);
@@ -7283,7 +7398,8 @@ message_dolog (m, nbytes, nlflag, multibyte)
for the *Message* buffer. */
for (i = 0; i < nbytes; i++)
{
- c = unibyte_char_to_multibyte (msg[i]);
+ c = msg[i];
+ c = unibyte_char_to_multibyte (c);
char_bytes = CHAR_STRING (c, str);
insert_1_both (str, 1, char_bytes, 1, 0, 0);
}
@@ -8554,7 +8670,7 @@ set_message_1 (a1, a2, nbytes, multibyte_p)
for (i = 0; i < nbytes; i += n)
{
c = string_char_and_length (s + i, nbytes - i, &n);
- work[0] = (SINGLE_BYTE_CHAR_P (c)
+ work[0] = (ASCII_CHAR_P (c)
? c
: multibyte_char_to_unibyte (c, Qnil));
insert_1_both (work, 1, 1, 1, 0, 0);
@@ -8571,7 +8687,8 @@ set_message_1 (a1, a2, nbytes, multibyte_p)
/* Convert a single-byte string to multibyte. */
for (i = 0; i < nbytes; i++)
{
- c = unibyte_char_to_multibyte (msg[i]);
+ c = msg[i];
+ c = unibyte_char_to_multibyte (c);
n = CHAR_STRING (c, str);
insert_1_both (str, 1, n, 1, 0, 0);
}
@@ -10631,7 +10748,7 @@ check_point_in_composition (prev_buf, prev_pt, buf, pt)
struct buffer *prev_buf, *buf;
int prev_pt, pt;
{
- int start, end;
+ EMACS_INT start, end;
Lisp_Object prop;
Lisp_Object buffer;
@@ -11625,35 +11742,24 @@ disp_char_vector (dp, c)
struct Lisp_Char_Table *dp;
int c;
{
- int code[4], i;
Lisp_Object val;
- if (SINGLE_BYTE_CHAR_P (c))
- return (dp->contents[c]);
-
- SPLIT_CHAR (c, code[0], code[1], code[2]);
- if (code[1] < 32)
- code[1] = -1;
- else if (code[2] < 32)
- code[2] = -1;
-
- /* Here, the possible range of code[0] (== charset ID) is
- 128..max_charset. Since the top level char table contains data
- for multibyte characters after 256th element, we must increment
- code[0] by 128 to get a correct index. */
- code[0] += 128;
- code[3] = -1; /* anchor */
-
- for (i = 0; code[i] >= 0; i++, dp = XCHAR_TABLE (val))
+ if (ASCII_CHAR_P (c))
{
- val = dp->contents[code[i]];
- if (!SUB_CHAR_TABLE_P (val))
- return (NILP (val) ? dp->defalt : val);
+ val = dp->ascii;
+ if (SUB_CHAR_TABLE_P (val))
+ val = XSUB_CHAR_TABLE (val)->contents[c];
}
+ else
+ {
+ Lisp_Object table;
- /* Here, val is a sub char table. We return the default value of
- it. */
- return (dp->defalt);
+ XSETCHAR_TABLE (table, dp);
+ val = char_table_ref (table, c);
+ }
+ if (NILP (val))
+ val = dp->defalt;
+ return val;
}
@@ -15566,7 +15672,7 @@ append_space_for_newline (it, default_face_p)
else if (it->face_before_selective_p)
it->face_id = it->saved_face_id;
face = FACE_FROM_ID (it->f, it->face_id);
- it->face_id = FACE_FOR_CHAR (it->f, face, 0);
+ it->face_id = FACE_FOR_CHAR (it->f, face, 0, -1, Qnil);
PRODUCE_GLYPHS (it);
@@ -15626,9 +15732,9 @@ extend_face_to_end_of_line (it)
ASCII face. This will be automatically undone the next time
get_next_display_element returns a multibyte character. Note
that the character will always be single byte in unibyte text. */
- if (!SINGLE_BYTE_CHAR_P (it->c))
+ if (!ASCII_CHAR_P (it->c))
{
- it->face_id = FACE_FOR_CHAR (f, face, 0);
+ it->face_id = FACE_FOR_CHAR (f, face, 0, -1, Qnil);
}
if (FRAME_WINDOW_P (f))
@@ -15734,7 +15840,7 @@ highlight_trailing_whitespace (f, row)
&& glyph->u.ch == ' '))
&& trailing_whitespace_p (glyph->charpos))
{
- int face_id = lookup_named_face (f, Qtrailing_whitespace, 0, 0);
+ int face_id = lookup_named_face (f, Qtrailing_whitespace, 0);
if (face_id < 0)
return;
@@ -17234,7 +17340,7 @@ are the selected window and the window's buffer). */)
{
if (EQ (face, Qt))
face = (EQ (window, selected_window) ? Qmode_line : Qmode_line_inactive);
- face_id = lookup_named_face (XFRAME (WINDOW_FRAME (w)), face, 0, 0);
+ face_id = lookup_named_face (XFRAME (WINDOW_FRAME (w)), face, 0);
}
if (face_id < 0)
@@ -17457,7 +17563,7 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
/* The EOL conversion we are using. */
Lisp_Object eoltype;
- val = Fget (coding_system, Qcoding_system);
+ val = CODING_SYSTEM_SPEC (coding_system);
eoltype = Qnil;
if (!VECTORP (val)) /* Not yet decided. */
@@ -17470,12 +17576,14 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
}
else
{
+ Lisp_Object attrs;
Lisp_Object eolvalue;
- eolvalue = Fget (coding_system, Qeol_type);
+ attrs = AREF (val, 0);
+ eolvalue = AREF (val, 2);
if (multibyte)
- *buf++ = XFASTINT (AREF (val, 1));
+ *buf++ = XFASTINT (CODING_ATTR_MNEMONIC (attrs));
if (eol_flag)
{
@@ -17485,10 +17593,10 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
eoltype = eol_mnemonic_undecided;
else if (VECTORP (eolvalue)) /* Not yet decided. */
eoltype = eol_mnemonic_undecided;
- else /* INTEGERP (eolvalue) -- 0:LF, 1:CRLF, 2:CR */
- eoltype = (XFASTINT (eolvalue) == 0
+ else /* eolvalue is Qunix, Qdos, or Qmac. */
+ eoltype = (EQ (eolvalue, Qunix)
? eol_mnemonic_unix
- : (XFASTINT (eolvalue) == 1
+ : (EQ (eolvalue, Qdos) == 1
? eol_mnemonic_dos : eol_mnemonic_mac));
}
}
@@ -17501,8 +17609,7 @@ decode_mode_spec_coding (coding_system, buf, eol_flag)
eol_str = SDATA (eoltype);
eol_str_len = SBYTES (eoltype);
}
- else if (INTEGERP (eoltype)
- && CHAR_VALID_P (XINT (eoltype), 0))
+ else if (CHARACTERP (eoltype))
{
unsigned char *tmp = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH);
eol_str_len = CHAR_STRING (XINT (eoltype), tmp);
@@ -17883,8 +17990,10 @@ decode_mode_spec (w, c, field_width, precision, multibyte)
{
/* No need to mention EOL here--the terminal never needs
to do EOL conversion. */
- p = decode_mode_spec_coding (keyboard_coding.symbol, p, 0);
- p = decode_mode_spec_coding (terminal_coding.symbol, p, 0);
+ p = decode_mode_spec_coding (CODING_ID_NAME (keyboard_coding.id),
+ p, 0);
+ p = decode_mode_spec_coding (CODING_ID_NAME (terminal_coding.id),
+ p, 0);
}
p = decode_mode_spec_coding (b->buffer_file_coding_system,
p, eol_flag);
@@ -18156,7 +18265,7 @@ display_string (string, lisp_string, face_string, face_string_pos,
}
break;
}
- else if (x + glyph->pixel_width > it->first_visible_x)
+ else if (x + glyph->pixel_width >= it->first_visible_x)
{
/* Glyph is at least partially visible. */
++it->hpos;
@@ -18703,6 +18812,23 @@ get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p)
if (two_byte_p)
*two_byte_p = 0;
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ struct font *font = (struct font *) face->font_info;
+
+ if (font)
+ {
+ unsigned code = font->driver->encode_char (font, glyph->u.ch);
+
+ if (code != FONT_INVALID_CODE)
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
+ else
+ STORE_XCHAR2B (char2b, 0, code);
+ }
+ }
+ else
+#endif /* USE_FONT_BACKEND */
if (!glyph->multibyte_p)
{
/* Unibyte case. We don't have to encode, but we have to make
@@ -18716,24 +18842,25 @@ get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p)
}
else
{
- int c1, c2, charset;
+ struct font_info *font_info
+ = FONT_INFO_FROM_ID (f, face->font_info_id);
+ if (font_info)
+ {
+ struct charset *charset = CHARSET_FROM_ID (font_info->charset);
+ unsigned code = ENCODE_CHAR (charset, glyph->u.ch);
- /* Split characters into bytes. If c2 is -1 afterwards, C is
- really a one-byte character so that byte1 is zero. */
- SPLIT_CHAR (glyph->u.ch, charset, c1, c2);
- if (c2 > 0)
- STORE_XCHAR2B (char2b, c1, c2);
- else
- STORE_XCHAR2B (char2b, 0, c1);
+ if (CHARSET_DIMENSION (charset) == 1)
+ STORE_XCHAR2B (char2b, 0, code);
+ else
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
- /* Maybe encode the character in *CHAR2B. */
- if (charset != CHARSET_ASCII)
- {
- struct font_info *font_info
- = FONT_INFO_FROM_ID (f, face->font_info_id);
- if (font_info)
- glyph->font_type
- = rif->encode_char (glyph->u.ch, char2b, font_info, two_byte_p);
+ /* Maybe encode the character in *CHAR2B. */
+ if (CHARSET_ID (charset) != charset_ascii)
+ {
+ glyph->font_type
+ = rif->encode_char (glyph->u.ch, char2b, font_info, charset,
+ two_byte_p);
+ }
}
}
@@ -18767,20 +18894,53 @@ fill_composite_glyph_string (s, faces, overlaps)
s->for_overlaps = overlaps;
s->face = faces[s->gidx];
- s->font = s->face->font;
- s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id);
+ if (s->face == NULL)
+ {
+ s->font = NULL;
+ s->font_info = NULL;
+ }
+ else
+ {
+ s->font = s->face->font;
+ s->font_info = FONT_INFO_FROM_FACE (s->f, s->face);
+ }
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend && s->cmp->method == COMPOSITION_WITH_GLYPH_STRING)
+ {
+ Lisp_Object gstring
+ = AREF (XHASH_TABLE (composition_hash_table)->key_and_value,
+ s->cmp->hash_index * 2);
+
+ for (i = 0; i < s->cmp->glyph_len; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring, i);
+ unsigned code = XUINT (LGLYPH_CODE (g));
+
+ STORE_XCHAR2B (s->char2b + i, code >> 8, code & 0xFF);
+ }
+ s->nchars = s->cmp->glyph_len;
+ s->width = s->cmp->pixel_width;
+ }
+ else
+ {
+#endif /* USE_FONT_BACKEND */
/* For all glyphs of this composition, starting at the offset
S->gidx, until we reach the end of the definition or encounter a
glyph that requires the different face, add it to S. */
++s->nchars;
- for (i = s->gidx + 1; i < s->cmp->glyph_len && faces[i] == s->face; ++i)
+ for (i = s->gidx + 1;
+ i < s->cmp->glyph_len && (faces[i] == s->face || ! faces[i] || ! s->face);
+ ++i)
++s->nchars;
/* All glyph strings for the same composition has the same width,
i.e. the width set for the first component of the composition. */
s->width = s->first_glyph->pixel_width;
+#ifdef USE_FONT_BACKEND
+ }
+#endif /* USE_FONT_BACKEND */
/* If the specified font could not be loaded, use the frame's
default font, but record the fact that we couldn't load it in
@@ -18795,8 +18955,6 @@ fill_composite_glyph_string (s, faces, overlaps)
/* Adjust base line for subscript/superscript text. */
s->ybase += s->first_glyph->voffset;
- xassert (s->face && s->face->gc);
-
/* This glyph string must always be drawn with 16-bit functions. */
s->two_byte_p = 1;
@@ -18854,7 +19012,7 @@ fill_glyph_string (s, face_id, start, end, overlaps)
}
s->font = s->face->font;
- s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id);
+ s->font_info = FONT_INFO_FROM_FACE (s->f, s->face);
/* If the specified font could not be loaded, use the frame's font,
but record the fact that we couldn't load it in
@@ -18918,7 +19076,7 @@ fill_stretch_glyph_string (s, row, area, start, end)
face_id = glyph->face_id;
s->face = FACE_FROM_ID (s->f, face_id);
s->font = s->face->font;
- s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id);
+ s->font_info = FONT_INFO_FROM_FACE (s->f, s->face);
s->width = glyph->pixel_width;
s->nchars = 1;
voffset = glyph->voffset;
@@ -18940,6 +19098,35 @@ fill_stretch_glyph_string (s, row, area, start, end)
return glyph - s->row->glyphs[s->area];
}
+static XCharStruct *
+get_per_char_metric (font, font_info, char2b, font_type)
+ XFontStruct *font;
+ struct font_info *font_info;
+ XChar2b *char2b;
+ int font_type;
+{
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ static XCharStruct pcm_value;
+ unsigned code = (XCHAR2B_BYTE1 (char2b) << 8) | XCHAR2B_BYTE2 (char2b);
+ struct font *fontp;
+ struct font_metrics metrics;
+
+ if (! font_info || code == FONT_INVALID_CODE)
+ return NULL;
+ fontp = (struct font *) font_info;
+ fontp->driver->text_extents (fontp, &code, 1, &metrics);
+ pcm_value.lbearing = metrics.lbearing;
+ pcm_value.rbearing = metrics.rbearing;
+ pcm_value.ascent = metrics.ascent;
+ pcm_value.descent = metrics.descent;
+ pcm_value.width = metrics.width;
+ return &pcm_value;
+ }
+#endif /* USE_FONT_BACKEND */
+ return rif->per_char_metric (font, char2b, font_type);
+}
/* EXPORT for RIF:
Set *LEFT and *RIGHT to the left and right overhang of GLYPH on
@@ -18964,9 +19151,9 @@ x_get_glyph_overhangs (glyph, f, left, right)
face = get_glyph_face_and_encoding (f, glyph, &char2b, NULL);
font = face->font;
- font_info = FONT_INFO_FROM_ID (f, face->font_info_id);
+ font_info = FONT_INFO_FROM_FACE (f, face);
if (font /* ++KFS: Should this be font_info ? */
- && (pcm = rif->per_char_metric (font, &char2b, glyph->font_type)))
+ && (pcm = get_per_char_metric (font, font_info, &char2b, glyph->font_type)))
{
if (pcm->rbearing > pcm->width)
*right = pcm->rbearing - pcm->width;
@@ -18974,6 +19161,13 @@ x_get_glyph_overhangs (glyph, f, left, right)
*left = -pcm->lbearing;
}
}
+ else if (glyph->type == COMPOSITE_GLYPH)
+ {
+ struct composition *cmp = composition_table[glyph->u.cmp_id];
+
+ *right = cmp->rbearing - cmp->pixel_width;
+ *left = - cmp->lbearing;
+ }
}
@@ -19103,12 +19297,29 @@ get_char_face_and_encoding (f, c, face_id, char2b, multibyte_p, display_p)
{
struct face *face = FACE_FROM_ID (f, face_id);
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ struct font *font = (struct font *) face->font_info;
+
+ if (font)
+ {
+ unsigned code = font->driver->encode_char (font, c);
+
+ if (code != FONT_INVALID_CODE)
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
+ else
+ STORE_XCHAR2B (char2b, 0, 0);
+ }
+ }
+ else
+#endif /* USE_FONT_BACKEND */
if (!multibyte_p)
{
/* Unibyte case. We don't have to encode, but we have to make
sure to use a face suitable for unibyte. */
STORE_XCHAR2B (char2b, 0, c);
- face_id = FACE_FOR_CHAR (f, face, c);
+ face_id = FACE_FOR_CHAR (f, face, c, -1, Qnil);
face = FACE_FROM_ID (f, face_id);
}
else if (c < 128)
@@ -19116,26 +19327,19 @@ get_char_face_and_encoding (f, c, face_id, char2b, multibyte_p, display_p)
/* Case of ASCII in a face known to fit ASCII. */
STORE_XCHAR2B (char2b, 0, c);
}
- else
+ else if (face->font != NULL)
{
- int c1, c2, charset;
+ struct font_info *font_info
+ = FONT_INFO_FROM_ID (f, face->font_info_id);
+ struct charset *charset = CHARSET_FROM_ID (font_info->charset);
+ unsigned code = ENCODE_CHAR (charset, c);
- /* Split characters into bytes. If c2 is -1 afterwards, C is
- really a one-byte character so that byte1 is zero. */
- SPLIT_CHAR (c, charset, c1, c2);
- if (c2 > 0)
- STORE_XCHAR2B (char2b, c1, c2);
+ if (CHARSET_DIMENSION (charset) == 1)
+ STORE_XCHAR2B (char2b, 0, code);
else
- STORE_XCHAR2B (char2b, 0, c1);
-
- /* Maybe encode the character in *CHAR2B. */
- if (face->font != NULL)
- {
- struct font_info *font_info
- = FONT_INFO_FROM_ID (f, face->font_info_id);
- if (font_info)
- rif->encode_char (c, char2b, font_info, 0);
- }
+ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF));
+ /* Maybe encode the character in *CHAR2B. */
+ rif->encode_char (c, char2b, font_info, charset, NULL);
}
/* Make sure X resources of the face are allocated. */
@@ -19290,10 +19494,9 @@ compute_overhangs_and_x (s, x, backward_p)
#define BUILD_CHAR_GLYPH_STRINGS(START, END, HEAD, TAIL, HL, X, LAST_X) \
do \
{ \
- int c, face_id; \
+ int face_id; \
XChar2b *char2b; \
\
- c = (row)->glyphs[area][START].u.ch; \
face_id = (row)->glyphs[area][START].face_id; \
\
s = (struct glyph_string *) alloca (sizeof *s); \
@@ -19327,6 +19530,14 @@ compute_overhangs_and_x (s, x, backward_p)
struct glyph_string *first_s = NULL; \
int n; \
\
+ if (cmp->method > COMPOSITION_WITH_RULE_ALTCHARS) \
+ { \
+ /* This happens only when USE_FONT_BACKEND is defined. */ \
+ char2b = (XChar2b *) alloca ((sizeof *char2b) * glyph_len); \
+ faces = &base_face; \
+ } \
+ else \
+ { \
base_face = base_face->ascii_face; \
char2b = (XChar2b *) alloca ((sizeof *char2b) * glyph_len); \
faces = (struct face **) alloca ((sizeof *faces) * glyph_len); \
@@ -19334,11 +19545,18 @@ compute_overhangs_and_x (s, x, backward_p)
for (n = 0; n < glyph_len; n++) \
{ \
int c = COMPOSITION_GLYPH (cmp, n); \
- int this_face_id = FACE_FOR_CHAR (f, base_face, c); \
- faces[n] = FACE_FROM_ID (f, this_face_id); \
- get_char_face_and_encoding (f, c, this_face_id, \
+ \
+ if (c == '\t') \
+ faces[n] = NULL; \
+ else \
+ { \
+ int this_face_id = FACE_FOR_CHAR (f, base_face, c, -1, Qnil); \
+ faces[n] = FACE_FROM_ID (f, this_face_id); \
+ get_char_face_and_encoding (f, c, this_face_id, \
char2b + n, 1, 1); \
+ } \
} \
+ } \
\
/* Make glyph_strings for each glyph sequence that is drawable by \
the same face, and append them to HEAD/TAIL. */ \
@@ -19405,8 +19623,11 @@ compute_overhangs_and_x (s, x, backward_p)
abort (); \
} \
\
- set_glyph_string_background_width (s, START, LAST_X); \
- (X) += s->width; \
+ if (s) \
+ { \
+ set_glyph_string_background_width (s, START, LAST_X); \
+ (X) += s->width; \
+ } \
} \
} \
while (0)
@@ -19440,7 +19661,7 @@ draw_glyphs (w, x, row, area, start, end, hl, overlaps)
int x;
struct glyph_row *row;
enum glyph_row_area area;
- int start, end;
+ EMACS_INT start, end;
enum draw_glyphs_face hl;
int overlaps;
{
@@ -19560,6 +19781,7 @@ draw_glyphs (w, x, row, area, start, end, hl, overlaps)
if (i >= 0)
{
clip_tail = tail;
+ i++; /* We must include the Ith glyph. */
BUILD_GLYPH_STRINGS (end, i, h, t,
DRAW_NORMAL_TEXT, x, last_x);
for (s = h; s; s = s->next)
@@ -20140,7 +20362,7 @@ calc_line_height_property (it, val, font, boff, override)
struct face *face;
struct font_info *font_info;
- face_id = lookup_named_face (it->f, face_name, ' ', 0);
+ face_id = lookup_named_face (it->f, face_name, 0);
if (face_id < 0)
return make_number (-1);
@@ -20149,7 +20371,7 @@ calc_line_height_property (it, val, font, boff, override)
if (font == NULL)
return make_number (-1);
- font_info = FONT_INFO_FROM_ID (it->f, face->font_info_id);
+ font_info = FONT_INFO_FROM_FACE (it->f, face);
boff = font_info->baseline_offset;
if (font_info->vertical_centering)
boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff;
@@ -20213,23 +20435,17 @@ x_produce_glyphs (it)
/* Maybe translate single-byte characters to multibyte, or the
other way. */
it->char_to_display = it->c;
- if (!ASCII_BYTE_P (it->c))
+ if (!ASCII_BYTE_P (it->c)
+ && ! it->multibyte_p)
{
- if (unibyte_display_via_language_environment
- && SINGLE_BYTE_CHAR_P (it->c)
- && (it->c >= 0240
- || !NILP (Vnonascii_translation_table)))
+ if (SINGLE_BYTE_CHAR_P (it->c)
+ && unibyte_display_via_language_environment)
+ it->char_to_display = unibyte_char_to_multibyte (it->c);
+ if (! SINGLE_BYTE_CHAR_P (it->c))
{
- it->char_to_display = unibyte_char_to_multibyte (it->c);
it->multibyte_p = 1;
- it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display);
- face = FACE_FROM_ID (it->f, it->face_id);
- }
- else if (!SINGLE_BYTE_CHAR_P (it->c)
- && !it->multibyte_p)
- {
- it->multibyte_p = 1;
- it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display);
+ it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
+ -1, Qnil);
face = FACE_FROM_ID (it->f, it->face_id);
}
}
@@ -20249,7 +20465,7 @@ x_produce_glyphs (it)
}
else
{
- font_info = FONT_INFO_FROM_ID (it->f, face->font_info_id);
+ font_info = FONT_INFO_FROM_FACE (it->f, face);
boff = font_info->baseline_offset;
if (font_info->vertical_centering)
boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff;
@@ -20263,7 +20479,7 @@ x_produce_glyphs (it)
it->nglyphs = 1;
- pcm = rif->per_char_metric (font, &char2b,
+ pcm = get_per_char_metric (font, font_info, &char2b,
FONT_TYPE_FOR_UNIBYTE (font, it->char_to_display));
if (it->override_ascent >= 0)
@@ -20490,20 +20706,18 @@ x_produce_glyphs (it)
/* If we found a font, this font should give us the right
metrics. If we didn't find a font, use the frame's
- default font and calculate the width of the character
- from the charset width; this is what old redisplay code
- did. */
+ default font and calculate the width of the character by
+ multiplying the width of font by the width of the
+ character. */
- pcm = rif->per_char_metric (font, &char2b,
+ pcm = get_per_char_metric (font, font_info, &char2b,
FONT_TYPE_FOR_MULTIBYTE (font, it->c));
if (font_not_found_p || !pcm)
{
- int charset = CHAR_CHARSET (it->char_to_display);
-
it->glyph_not_available_p = 1;
it->pixel_width = (FRAME_COLUMN_WIDTH (it->f)
- * CHARSET_WIDTH (charset));
+ * CHAR_WIDTH (it->char_to_display));
it->phys_ascent = FONT_BASE (font) + boff;
it->phys_descent = FONT_DESCENT (font) - boff;
}
@@ -20562,20 +20776,20 @@ x_produce_glyphs (it)
struct font_info *font_info;
int boff; /* baseline offset */
struct composition *cmp = composition_table[it->cmp_id];
+ int pos;
/* Maybe translate single-byte characters to multibyte. */
it->char_to_display = it->c;
if (unibyte_display_via_language_environment
- && SINGLE_BYTE_CHAR_P (it->c)
- && (it->c >= 0240
- || (it->c >= 0200
- && !NILP (Vnonascii_translation_table))))
+ && it->c >= 0200)
{
it->char_to_display = unibyte_char_to_multibyte (it->c);
}
/* Get face and font to use. Encode IT->char_to_display. */
- it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display);
+ pos = STRINGP (it->string) ? IT_STRING_CHARPOS (*it) : IT_CHARPOS (*it);
+ it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
+ pos, it->string);
face = FACE_FROM_ID (it->f, it->face_id);
get_char_face_and_encoding (it->f, it->char_to_display, it->face_id,
&char2b, it->multibyte_p, 0);
@@ -20591,7 +20805,7 @@ x_produce_glyphs (it)
}
else
{
- font_info = FONT_INFO_FROM_ID (it->f, face->font_info_id);
+ font_info = FONT_INFO_FROM_FACE (it->f, face);
boff = font_info->baseline_offset;
if (font_info->vertical_centering)
boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff;
@@ -20608,9 +20822,21 @@ x_produce_glyphs (it)
now. Theoretically, we have to check all fonts for the
glyphs, but that requires much time and memory space. So,
here we check only the font of the first glyph. This leads
- to incorrect display very rarely, and C-l (recenter) can
- correct the display anyway. */
- if (cmp->font != (void *) font)
+ to incorrect display, but it's very rare, and C-l (recenter)
+ can correct the display anyway. */
+ if (cmp->glyph_len == 0)
+ {
+ cmp->lbearing = cmp->rbearing = 0;
+ cmp->pixel_width = cmp->ascent = cmp->descent = 0;
+ }
+#ifdef USE_FONT_BACKEND
+ else if (cmp->method == COMPOSITION_WITH_GLYPH_STRING)
+ {
+ if (! cmp->font)
+ font_prepare_composition (cmp);
+ }
+#endif /* USE_FONT_BACKEND */
+ else if (cmp->font != (void *) font)
{
/* Ascent and descent of the font of the first character of
this composition (adjusted by baseline offset). Ascent
@@ -20618,26 +20844,37 @@ x_produce_glyphs (it)
them respectively. */
int font_ascent = FONT_BASE (font) + boff;
int font_descent = FONT_DESCENT (font) - boff;
+ int font_height = FONT_HEIGHT (font);
/* Bounding box of the overall glyphs. */
int leftmost, rightmost, lowest, highest;
+ int lbearing, rbearing;
int i, width, ascent, descent;
+ int fully_padded = 0;
cmp->font = (void *) font;
/* Initialize the bounding box. */
if (font_info
- && (pcm = rif->per_char_metric (font, &char2b,
+ && (pcm = get_per_char_metric (font, font_info, &char2b,
FONT_TYPE_FOR_MULTIBYTE (font, it->c))))
{
width = pcm->width;
ascent = pcm->ascent;
descent = pcm->descent;
+ lbearing = pcm->lbearing;
+ if (lbearing > 0)
+ lbearing = 0;
+ rbearing = pcm->rbearing;
+ if (rbearing < width)
+ rbearing = width;
}
else
{
width = FONT_WIDTH (font);
ascent = FONT_BASE (font);
descent = FONT_DESCENT (font);
+ lbearing = 0;
+ rbearing = width;
}
rightmost = width;
@@ -20657,14 +20894,25 @@ x_produce_glyphs (it)
the left. */
cmp->offsets[0] = 0;
cmp->offsets[1] = boff;
+ cmp->lbearing = lbearing;
+ cmp->rbearing = rbearing;
/* Set cmp->offsets for the remaining glyphs. */
for (i = 1; i < cmp->glyph_len; i++)
{
int left, right, btm, top;
int ch = COMPOSITION_GLYPH (cmp, i);
- int face_id = FACE_FOR_CHAR (it->f, face, ch);
+ int face_id;
+ if (ch == '\t')
+ {
+ fully_padded = 1;
+ cmp->offsets[i * 2] = 0;
+ cmp->offsets[i * 2 + 1] = boff;
+ continue;
+ }
+
+ face_id = FACE_FOR_CHAR (it->f, face, ch, pos, it->string);
face = FACE_FROM_ID (it->f, face_id);
get_char_face_and_encoding (it->f, ch, face->id,
&char2b, it->multibyte_p, 0);
@@ -20678,25 +20926,33 @@ x_produce_glyphs (it)
else
{
font_info
- = FONT_INFO_FROM_ID (it->f, face->font_info_id);
+ = FONT_INFO_FROM_FACE (it->f, face);
boff = font_info->baseline_offset;
if (font_info->vertical_centering)
boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff;
}
if (font_info
- && (pcm = rif->per_char_metric (font, &char2b,
+ && (pcm = get_per_char_metric (font, font_info, &char2b,
FONT_TYPE_FOR_MULTIBYTE (font, ch))))
{
width = pcm->width;
ascent = pcm->ascent;
descent = pcm->descent;
+ lbearing = pcm->lbearing;
+ if (lbearing > 0)
+ lbearing = 0;
+ rbearing = pcm->rbearing;
+ if (rbearing < width)
+ rbearing = width;
}
else
{
width = FONT_WIDTH (font);
ascent = 1;
descent = 0;
+ lbearing = 0;
+ rbearing = width;
}
if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
@@ -20737,15 +20993,21 @@ x_produce_glyphs (it)
6---7---8 -- descent
*/
int rule = COMPOSITION_RULE (cmp, i);
- int gref, nref, grefx, grefy, nrefx, nrefy;
+ int gref, nref, grefx, grefy, nrefx, nrefy, xoff, yoff;
- COMPOSITION_DECODE_RULE (rule, gref, nref);
+ COMPOSITION_DECODE_RULE (rule, gref, nref, xoff, yoff);
grefx = gref % 3, nrefx = nref % 3;
grefy = gref / 3, nrefy = nref / 3;
+ if (xoff)
+ xoff = font_height * (xoff - 128) / 256;
+ if (yoff)
+ yoff = font_height * (yoff - 128) / 256;
left = (leftmost
+ grefx * (rightmost - leftmost) / 2
- - nrefx * width / 2);
+ - nrefx * width / 2
+ + xoff);
+
btm = ((grefy == 0 ? highest
: grefy == 1 ? 0
: grefy == 2 ? lowest
@@ -20753,23 +21015,32 @@ x_produce_glyphs (it)
- (nrefy == 0 ? ascent + descent
: nrefy == 1 ? descent - boff
: nrefy == 2 ? 0
- : (ascent + descent) / 2));
+ : (ascent + descent) / 2)
+ + yoff);
}
cmp->offsets[i * 2] = left;
cmp->offsets[i * 2 + 1] = btm + descent;
/* Update the bounding box of the overall glyphs. */
- right = left + width;
+ if (width > 0)
+ {
+ right = left + width;
+ if (left < leftmost)
+ leftmost = left;
+ if (right > rightmost)
+ rightmost = right;
+ }
top = btm + descent + ascent;
- if (left < leftmost)
- leftmost = left;
- if (right > rightmost)
- rightmost = right;
if (top > highest)
highest = top;
if (btm < lowest)
lowest = btm;
+
+ if (cmp->lbearing > left + lbearing)
+ cmp->lbearing = left + lbearing;
+ if (cmp->rbearing < left + rbearing)
+ cmp->rbearing = left + rbearing;
}
/* If there are glyphs whose x-offsets are negative,
@@ -20780,6 +21051,17 @@ x_produce_glyphs (it)
for (i = 0; i < cmp->glyph_len; i++)
cmp->offsets[i * 2] -= leftmost;
rightmost -= leftmost;
+ cmp->lbearing -= leftmost;
+ cmp->rbearing -= leftmost;
+ }
+
+ if (fully_padded)
+ {
+ for (i = 0; i < cmp->glyph_len; i++)
+ cmp->offsets[i * 2] -= cmp->lbearing;
+ rightmost = cmp->rbearing - cmp->lbearing;
+ cmp->lbearing = 0;
+ cmp->rbearing = rightmost;
}
cmp->pixel_width = rightmost;
@@ -20791,6 +21073,11 @@ x_produce_glyphs (it)
cmp->descent = font_descent;
}
+ if (it->glyph_row
+ && (cmp->lbearing < 0
+ || cmp->rbearing > cmp->pixel_width))
+ it->glyph_row->contains_overlapping_glyphs_p = 1;
+
it->pixel_width = cmp->pixel_width;
it->ascent = it->phys_ascent = cmp->ascent;
it->descent = it->phys_descent = cmp->descent;
@@ -20901,7 +21188,8 @@ x_insert_glyphs (start, len)
int line_height, shift_by_width, shifted_region_width;
struct glyph_row *row;
struct glyph *glyph;
- int frame_x, frame_y, hpos;
+ int frame_x, frame_y;
+ EMACS_INT hpos;
xassert (updated_window && updated_row);
BLOCK_INPUT;
@@ -21826,7 +22114,7 @@ cursor_in_mouse_face_p (w)
static int
fast_find_position (w, charpos, hpos, vpos, x, y, stop)
struct window *w;
- int charpos;
+ EMACS_INT charpos;
int *hpos, *vpos, *x, *y;
Lisp_Object stop;
{
@@ -21914,7 +22202,7 @@ fast_find_position (w, charpos, hpos, vpos, x, y, stop)
static int
fast_find_position (w, pos, hpos, vpos, x, y, stop)
struct window *w;
- int pos;
+ EMACS_INT pos;
int *hpos, *vpos, *x, *y;
Lisp_Object stop;
{
@@ -22028,7 +22316,7 @@ fast_find_position (w, pos, hpos, vpos, x, y, stop)
static int
fast_find_string_pos (w, pos, object, hpos, vpos, x, y, right_p)
struct window *w;
- int pos;
+ EMACS_INT pos;
Lisp_Object object;
int *hpos, *vpos, *x, *y;
int right_p;
diff --git a/src/xfaces.c b/src/xfaces.c
index f67ea61b37a..6a05611939e 100644
--- a/src/xfaces.c
+++ b/src/xfaces.c
@@ -56,7 +56,7 @@ Boston, MA 02110-1301, USA. */
13. Whether or not a box should be drawn around characters, the box
type, and, for simple boxes, in what color.
- 14. Font or fontset pattern, or nil. This is a special attribute.
+ 14. Font pattern, or nil. This is a special attribute.
When this attribute is specified, the face uses a font opened by
that pattern as is. In addition, all the other font-related
attributes (1st thru 5th) are generated from the opened font name.
@@ -72,6 +72,8 @@ Boston, MA 02110-1301, USA. */
and is used to ensure that a font specified on the command line,
for example, can be matched exactly.
+ 17. A fontset name.
+
Faces are frame-local by nature because Emacs allows to define the
same named face (face names are symbols) differently for different
frames. Each frame has an alist of face definitions for all named
@@ -123,7 +125,7 @@ Boston, MA 02110-1301, USA. */
is realized, it inherits (thus shares) a fontset of an ASCII face
that has the same attributes other than font-related ones.
- Thus, all realized face have a realized fontset.
+ Thus, all realized faces have a realized fontset.
Unibyte text.
@@ -197,6 +199,7 @@ Boston, MA 02110-1301, USA. */
#include <sys/stat.h>
#include "lisp.h"
+#include "character.h"
#include "charset.h"
#include "keyboard.h"
#include "frame.h"
@@ -243,6 +246,12 @@ Boston, MA 02110-1301, USA. */
#include "window.h"
#include "intervals.h"
+#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+#endif /* HAVE_WINDOW_SYSTEM */
+
#ifdef HAVE_X_WINDOWS
/* Compensate for a bug in Xos.h on some systems, on which it requires
@@ -304,6 +313,7 @@ Lisp_Object QCinverse_video, QCforeground, QCbackground, QCstipple;
Lisp_Object QCwidth, QCfont, QCbold, QCitalic;
Lisp_Object QCreverse_video;
Lisp_Object QCoverline, QCstrike_through, QCbox, QCinherit;
+Lisp_Object QCfontset;
/* Symbols used for attribute values. */
@@ -487,7 +497,7 @@ static int get_lface_attributes P_ ((struct frame *, Lisp_Object, Lisp_Object *,
static int load_pixmap P_ ((struct frame *, Lisp_Object, unsigned *, unsigned *));
static unsigned char *xstrlwr P_ ((unsigned char *));
static struct frame *frame_or_selected_frame P_ ((Lisp_Object, int));
-static void load_face_font P_ ((struct frame *, struct face *, int));
+static void load_face_font P_ ((struct frame *, struct face *));
static void load_face_colors P_ ((struct frame *, struct face *, Lisp_Object *));
static void free_face_colors P_ ((struct frame *, struct face *));
static int face_color_gray_p P_ ((struct frame *, char *));
@@ -500,18 +510,17 @@ static int font_list_1 P_ ((struct frame *, Lisp_Object, Lisp_Object,
Lisp_Object, struct font_name **));
static int font_list P_ ((struct frame *, Lisp_Object, Lisp_Object,
Lisp_Object, struct font_name **));
-static int try_font_list P_ ((struct frame *, Lisp_Object *,
- Lisp_Object, Lisp_Object, struct font_name **,
- int));
+static int try_font_list P_ ((struct frame *, Lisp_Object,
+ Lisp_Object, Lisp_Object, struct font_name **));
static int try_alternative_families P_ ((struct frame *f, Lisp_Object,
Lisp_Object, struct font_name **));
static int cmp_font_names P_ ((const void *, const void *));
-static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *, int,
- struct face *, int));
-static struct face *realize_x_face P_ ((struct face_cache *,
- Lisp_Object *, int, struct face *));
-static struct face *realize_tty_face P_ ((struct face_cache *,
- Lisp_Object *, int));
+static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *,
+ int));
+static struct face *realize_non_ascii_face P_ ((struct frame *, int,
+ struct face *));
+static struct face *realize_x_face P_ ((struct face_cache *, Lisp_Object *));
+static struct face *realize_tty_face P_ ((struct face_cache *, Lisp_Object *));
static int realize_basic_faces P_ ((struct frame *));
static int realize_default_face P_ ((struct frame *));
static void realize_named_face P_ ((struct frame *, Lisp_Object, int));
@@ -521,23 +530,22 @@ static unsigned hash_string_case_insensitive P_ ((Lisp_Object));
static unsigned lface_hash P_ ((Lisp_Object *));
static int lface_same_font_attributes_p P_ ((Lisp_Object *, Lisp_Object *));
static struct face_cache *make_face_cache P_ ((struct frame *));
-static void free_realized_face P_ ((struct frame *, struct face *));
static void clear_face_gcs P_ ((struct face_cache *));
static void free_face_cache P_ ((struct face_cache *));
static int face_numeric_weight P_ ((Lisp_Object));
static int face_numeric_slant P_ ((Lisp_Object));
static int face_numeric_swidth P_ ((Lisp_Object));
static int face_fontset P_ ((Lisp_Object *));
-static char *choose_face_font P_ ((struct frame *, Lisp_Object *, int, int, int*));
static void merge_face_vectors P_ ((struct frame *, Lisp_Object *, Lisp_Object*,
struct named_merge_point *));
static int merge_face_ref P_ ((struct frame *, Lisp_Object, Lisp_Object *,
int, struct named_merge_point *));
static int set_lface_from_font_name P_ ((struct frame *, Lisp_Object,
Lisp_Object, int, int));
+static void set_lface_from_font_and_fontset P_ ((struct frame *, Lisp_Object,
+ Lisp_Object, int, int));
static Lisp_Object lface_from_face_name P_ ((struct frame *, Lisp_Object, int));
static struct face *make_realized_face P_ ((Lisp_Object *));
-static void free_realized_faces P_ ((struct face_cache *));
static char *best_matching_font P_ ((struct frame *, Lisp_Object *,
struct font_name *, int, int, int *));
static void cache_face P_ ((struct face_cache *, struct face *, unsigned));
@@ -976,6 +984,9 @@ clear_face_cache (clear_fonts_p)
{
struct x_display_info *dpyinfo;
+#ifdef USE_FONT_BACKEND
+ if (! enable_font_backend)
+#endif /* USE_FONT_BACKEND */
/* Fonts are common for frames on one display, i.e. on
one X screen. */
for (dpyinfo = x_display_list; dpyinfo; dpyinfo = dpyinfo->next)
@@ -1222,30 +1233,32 @@ load_pixmap (f, name, w_ptr, h_ptr)
#ifdef HAVE_WINDOW_SYSTEM
-/* Load font of face FACE which is used on frame F to display
- character C. The name of the font to load is determined by lface
- and fontset of FACE. */
+/* Load font of face FACE which is used on frame F to display ASCII
+ characters. The name of the font to load is determined by lface. */
static void
-load_face_font (f, face, c)
+load_face_font (f, face)
struct frame *f;
struct face *face;
- int c;
{
struct font_info *font_info = NULL;
char *font_name;
int needs_overstrike;
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ abort ();
+#endif /* USE_FONT_BACKEND */
face->font_info_id = -1;
face->font = NULL;
+ face->font_name = NULL;
- font_name = choose_face_font (f, face->lface, face->fontset, c,
- &needs_overstrike);
+ font_name = choose_face_font (f, face->lface, Qnil, &needs_overstrike);
if (!font_name)
return;
BLOCK_INPUT;
- font_info = FS_LOAD_FACE_FONT (f, c, font_name, face);
+ font_info = FS_LOAD_FONT (f, font_name);
UNBLOCK_INPUT;
if (font_info)
@@ -1384,7 +1397,7 @@ tty_defined_color (f, color_name, color_def, alloc)
color_def->green = 0;
if (*color_name)
- status = tty_lookup_color (f, build_string (color_name), color_def, 0);
+ status = tty_lookup_color (f, build_string (color_name), color_def, NULL);
if (color_def->pixel == FACE_TTY_DEFAULT_COLOR && *color_name)
{
@@ -2132,7 +2145,7 @@ face_value (table, dim, symbol)
static INLINE int
face_numeric_value (table, dim, symbol)
struct table_entry *table;
- int dim;
+ size_t dim;
Lisp_Object symbol;
{
struct table_entry *p = face_value (table, dim, symbol);
@@ -2173,9 +2186,117 @@ face_numeric_swidth (width)
return face_numeric_value (swidth_table, DIM (swidth_table), width);
}
-
#ifdef HAVE_WINDOW_SYSTEM
+#ifdef USE_FONT_BACKEND
+static INLINE Lisp_Object
+face_symbolic_value (table, dim, font_prop)
+ struct table_entry *table;
+ int dim;
+ Lisp_Object font_prop;
+{
+ struct table_entry *p;
+ char *s = SDATA (SYMBOL_NAME (font_prop));
+ int low, mid, high, cmp;
+
+ low = 0;
+ high = dim - 1;
+
+ while (low <= high)
+ {
+ mid = (low + high) / 2;
+ cmp = strcmp (table[mid].name, s);
+
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else
+ return *table[mid].symbol;
+ }
+
+ return Qnil;
+}
+
+static INLINE Lisp_Object
+face_symbolic_weight (weight)
+ Lisp_Object weight;
+{
+ return face_symbolic_value (weight_table, DIM (weight_table), weight);
+}
+
+static INLINE Lisp_Object
+face_symbolic_slant (slant)
+ Lisp_Object slant;
+{
+ return face_symbolic_value (slant_table, DIM (slant_table), slant);
+}
+
+static INLINE Lisp_Object
+face_symbolic_swidth (width)
+ Lisp_Object width;
+{
+ return face_symbolic_value (swidth_table, DIM (swidth_table), width);
+}
+#endif /* USE_FONT_BACKEND */
+
+Lisp_Object
+split_font_name_into_vector (fontname)
+ Lisp_Object fontname;
+{
+ struct font_name font;
+ Lisp_Object vec;
+ int i;
+
+ font.name = LSTRDUPA (fontname);
+ if (! split_font_name (NULL, &font, 0))
+ return Qnil;
+ vec = Fmake_vector (make_number (XLFD_LAST), Qnil);
+ for (i = 0; i < XLFD_LAST; i++)
+ if (font.fields[i][0] != '*')
+ ASET (vec, i, build_string (font.fields[i]));
+ return vec;
+}
+
+Lisp_Object
+build_font_name_from_vector (vec)
+ Lisp_Object vec;
+{
+ struct font_name font;
+ Lisp_Object fontname;
+ char *p;
+ int i;
+
+ for (i = 0; i < XLFD_LAST; i++)
+ {
+ font.fields[i] = (NILP (AREF (vec, i))
+ ? "*" : (char *) SDATA (AREF (vec, i)));
+ if ((i == XLFD_FAMILY || i == XLFD_REGISTRY)
+ && (p = strchr (font.fields[i], '-')))
+ {
+ char *p1 = STRDUPA (font.fields[i]);
+
+ p1[p - font.fields[i]] = '\0';
+ if (i == XLFD_FAMILY)
+ {
+ font.fields[XLFD_FOUNDRY] = p1;
+ font.fields[XLFD_FAMILY] = p + 1;
+ }
+ else
+ {
+ font.fields[XLFD_REGISTRY] = p1;
+ font.fields[XLFD_ENCODING] = p + 1;
+ break;
+ }
+ }
+ }
+
+ p = build_font_name (&font);
+ fontname = build_string (p);
+ xfree (p);
+ return fontname;
+}
+
/* Return non-zero if FONT is the name of a fixed-pitch font. */
static INLINE int
@@ -2198,7 +2319,9 @@ xlfd_fixed_p (font)
72dpi versions, only.)
Value is the real point size of FONT on frame F, or 0 if it cannot
- be determined. */
+ be determined.
+
+ By side effect, set FONT->numeric[XLFD_PIXEL_SIZE]. */
static INLINE int
xlfd_point_size (f, font)
@@ -2237,6 +2360,7 @@ xlfd_point_size (f, font)
else
pixel = atoi (pixel_field);
+ font->numeric[XLFD_PIXEL_SIZE] = pixel;
if (pixel == 0)
real_pt = 0;
else
@@ -2723,12 +2847,12 @@ cmp_font_names (a, b)
}
-/* Get a sorted list of fonts of family FAMILY on frame F. If PATTERN
- is non-nil list fonts matching that pattern. Otherwise, if
- REGISTRY is non-nil return only fonts with that registry, otherwise
- return fonts of any registry. Set *FONTS to a vector of font_name
- structures allocated from the heap containing the fonts found.
- Value is the number of fonts found. */
+/* Get a sorted list of fonts matching PATTERN on frame F. If PATTERN
+ is nil, list fonts matching FAMILY and REGISTRY. FAMILY is a
+ family name string or nil. REGISTRY is a registry name string.
+ Set *FONTS to a vector of font_name structures allocated from the
+ heap containing the fonts found. Value is the number of fonts
+ found. */
static int
font_list_1 (f, pattern, family, registry, fonts)
@@ -2789,10 +2913,11 @@ concat_font_list (fonts1, nfonts1, fonts2, nfonts2)
/* Get a sorted list of fonts of family FAMILY on frame F.
- If PATTERN is non-nil list fonts matching that pattern.
+ If PATTERN is non-nil, list fonts matching that pattern.
- If REGISTRY is non-nil, return fonts with that registry and the
- alternative registries from Vface_alternative_font_registry_alist.
+ If REGISTRY is non-nil, it is a list of registry (and encoding)
+ names. Return fonts with those registries and the alternative
+ registries from Vface_alternative_font_registry_alist.
If REGISTRY is nil return fonts of any registry.
@@ -2806,35 +2931,37 @@ font_list (f, pattern, family, registry, fonts)
Lisp_Object pattern, family, registry;
struct font_name **fonts;
{
- int nfonts = font_list_1 (f, pattern, family, registry, fonts);
+ int nfonts;
+ int reg_prio;
+ int i;
- if (!NILP (registry)
- && CONSP (Vface_alternative_font_registry_alist))
+ if (NILP (registry))
+ return font_list_1 (f, pattern, family, registry, fonts);
+
+ for (reg_prio = 0, nfonts = 0; CONSP (registry); registry = XCDR (registry))
{
- Lisp_Object alter;
+ Lisp_Object elt, alter;
+ int nfonts2;
+ struct font_name *fonts2;
- alter = Fassoc (registry, Vface_alternative_font_registry_alist);
- if (CONSP (alter))
+ elt = XCAR (registry);
+ alter = Fassoc (elt, Vface_alternative_font_registry_alist);
+ if (NILP (alter))
+ alter = Fcons (elt, Qnil);
+ for (; CONSP (alter); alter = XCDR (alter), reg_prio++)
{
- int reg_prio, i;
-
- for (alter = XCDR (alter), reg_prio = 1;
- CONSP (alter);
- alter = XCDR (alter), reg_prio++)
- if (STRINGP (XCAR (alter)))
- {
- int nfonts2;
- struct font_name *fonts2;
-
- nfonts2 = font_list_1 (f, pattern, family, XCAR (alter),
- &fonts2);
+ nfonts2 = font_list_1 (f, pattern, family, XCAR (alter), &fonts2);
+ if (nfonts2 > 0)
+ {
+ if (reg_prio > 0)
for (i = 0; i < nfonts2; i++)
fonts2[i].registry_priority = reg_prio;
- *fonts = (nfonts > 0
- ? concat_font_list (*fonts, nfonts, fonts2, nfonts2)
- : fonts2);
- nfonts += nfonts2;
- }
+ if (nfonts > 0)
+ *fonts = concat_font_list (*fonts, nfonts, fonts2, nfonts2);
+ else
+ *fonts = fonts2;
+ nfonts += nfonts2;
+ }
}
}
@@ -3016,7 +3143,7 @@ the WIDTH times as wide as FACE on FRAME. */)
{
/* This is of limited utility since it works with character
widths. Keep it for compatibility. --gerd. */
- int face_id = lookup_named_face (f, face, 0, 0);
+ int face_id = lookup_named_face (f, face, 0);
struct face *face = (face_id < 0
? NULL
: FACE_FROM_ID (f, face_id));
@@ -3075,6 +3202,7 @@ the WIDTH times as wide as FACE on FRAME. */)
#define LFACE_FONT(LFACE) AREF ((LFACE), LFACE_FONT_INDEX)
#define LFACE_INHERIT(LFACE) AREF ((LFACE), LFACE_INHERIT_INDEX)
#define LFACE_AVGWIDTH(LFACE) AREF ((LFACE), LFACE_AVGWIDTH_INDEX)
+#define LFACE_FONTSET(LFACE) AREF ((LFACE), LFACE_FONTSET_INDEX)
/* Non-zero if LFACE is a Lisp face. A Lisp face is a vector of size
LFACE_VECTOR_SIZE which has the symbol `face' in slot 0. */
@@ -3153,7 +3281,12 @@ check_lface_attrs (attrs)
xassert (UNSPECIFIEDP (attrs[LFACE_FONT_INDEX])
|| IGNORE_DEFFACE_P (attrs[LFACE_FONT_INDEX])
|| NILP (attrs[LFACE_FONT_INDEX])
+#ifdef USE_FONT_BACKEND
+ || FONT_OBJECT_P (attrs[LFACE_FONT_INDEX])
+#endif /* USE_FONT_BACKEND */
|| STRINGP (attrs[LFACE_FONT_INDEX]));
+ xassert (UNSPECIFIEDP (attrs[LFACE_FONTSET_INDEX])
+ || STRINGP (attrs[LFACE_FONTSET_INDEX]));
#endif
}
@@ -3345,7 +3478,7 @@ lface_fully_specified_p (attrs)
for (i = 1; i < LFACE_VECTOR_SIZE; ++i)
if (i != LFACE_FONT_INDEX && i != LFACE_INHERIT_INDEX
- && i != LFACE_AVGWIDTH_INDEX)
+ && i != LFACE_AVGWIDTH_INDEX && i != LFACE_FONTSET_INDEX)
if ((UNSPECIFIEDP (attrs[i]) || IGNORE_DEFFACE_P (attrs[i]))
#ifdef MAC_OS
/* MAC_TODO: No stipple support on Mac OS yet, this index is
@@ -3389,8 +3522,15 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
/* If FONTNAME is actually a fontset name, get ASCII font name of it. */
fontset = fs_query_fontset (fontname, 0);
- if (fontset >= 0)
+
+ if (fontset > 0)
font_name = SDATA (fontset_ascii (fontset));
+ else if (fontset == 0)
+ {
+ if (may_fail_p)
+ return 0;
+ abort ();
+ }
/* Check if FONT_NAME is surely available on the system. Usually
FONT_NAME is already cached for the frame F and FS_LOAD_FONT
@@ -3398,7 +3538,7 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
caching it now is not futail because we anyway load the font
later. */
BLOCK_INPUT;
- font_info = FS_LOAD_FONT (f, 0, font_name, -1);
+ font_info = FS_LOAD_FONT (f, font_name);
UNBLOCK_INPUT;
if (!font_info)
@@ -3460,11 +3600,103 @@ set_lface_from_font_name (f, lface, fontname, force_p, may_fail_p)
LFACE_SLANT (lface)
= have_xlfd_p ? xlfd_symbolic_slant (&font) : Qnormal;
- LFACE_FONT (lface) = fontname;
-
+ if (fontset > 0)
+ {
+ LFACE_FONT (lface) = build_string (font_info->full_name);
+ LFACE_FONTSET (lface) = fontset_name (fontset);
+ }
+ else
+ {
+ LFACE_FONT (lface) = fontname;
+ fontset
+ = new_fontset_from_font_name (build_string (font_info->full_name));
+ LFACE_FONTSET (lface) = fontset_name (fontset);
+ }
return 1;
}
+#ifdef USE_FONT_BACKEND
+/* Set font-related attributes of Lisp face LFACE from FONT-OBJECT and
+ FONTSET. If FORCE_P is zero, set only unspecified attributes of
+ LFACE. The exceptions are `font' and `fontset' attributes. They
+ are set regardless of FORCE_P. */
+
+static void
+set_lface_from_font_and_fontset (f, lface, font_object, fontset, force_p)
+ struct frame *f;
+ Lisp_Object lface, font_object;
+ int fontset;
+ int force_p;
+{
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+ Lisp_Object entity = font->entity;
+ Lisp_Object val;
+
+ /* Set attributes only if unspecified, otherwise face defaults for
+ new frames would never take effect. If the font doesn't have a
+ specific property, set a normal value for that. */
+
+ if (force_p || UNSPECIFIEDP (LFACE_FAMILY (lface)))
+ {
+ Lisp_Object foundry = AREF (entity, FONT_FOUNDRY_INDEX);
+ Lisp_Object family = AREF (entity, FONT_FAMILY_INDEX);
+
+ if (! NILP (foundry))
+ {
+ if (! NILP (family))
+ val = concat3 (SYMBOL_NAME (foundry), build_string ("-"),
+ SYMBOL_NAME (family));
+ else
+ val = concat2 (SYMBOL_NAME (foundry), build_string ("-*"));
+ }
+ else
+ {
+ if (! NILP (family))
+ val = SYMBOL_NAME (family);
+ else
+ val = build_string ("*");
+ }
+ LFACE_FAMILY (lface) = val;
+ }
+
+ if (force_p || UNSPECIFIEDP (LFACE_HEIGHT (lface)))
+ {
+ int pt = pixel_point_size (f, font->pixel_size * 10);
+
+ xassert (pt > 0);
+ LFACE_HEIGHT (lface) = make_number (pt);
+ }
+
+ if (force_p || UNSPECIFIEDP (LFACE_AVGWIDTH (lface)))
+ LFACE_AVGWIDTH (lface) = make_number (font->font.average_width);
+
+ if (force_p || UNSPECIFIEDP (LFACE_WEIGHT (lface)))
+ {
+ Lisp_Object weight = font_symbolic_weight (entity);
+
+ val = NILP (weight) ? Qnormal : face_symbolic_weight (weight);
+ LFACE_WEIGHT (lface) = ! NILP (val) ? val : weight;
+ }
+ if (force_p || UNSPECIFIEDP (LFACE_SLANT (lface)))
+ {
+ Lisp_Object slant = font_symbolic_slant (entity);
+
+ val = NILP (slant) ? Qnormal : face_symbolic_slant (slant);
+ LFACE_SLANT (lface) = ! NILP (val) ? val : slant;
+ }
+ if (force_p || UNSPECIFIEDP (LFACE_SWIDTH (lface)))
+ {
+ Lisp_Object width = font_symbolic_width (entity);
+
+ val = NILP (width) ? Qnormal : face_symbolic_swidth (width);
+ LFACE_SWIDTH (lface) = ! NILP (val) ? val : width;
+ }
+
+ LFACE_FONT (lface) = font_object;
+ LFACE_FONTSET (lface) = fontset_name (fontset);
+}
+#endif /* USE_FONT_BACKEND */
+
#endif /* HAVE_WINDOW_SYSTEM */
@@ -4287,7 +4519,7 @@ FRAME 0 means change the face on all frames, and change the default
LFACE_SWIDTH (lface) = value;
font_related_attr_p = 1;
}
- else if (EQ (attr, QCfont))
+ else if (EQ (attr, QCfont) || EQ (attr, QCfontset))
{
#ifdef HAVE_WINDOW_SYSTEM
if (EQ (frame, Qt) || FRAME_WINDOW_P (XFRAME (frame)))
@@ -4302,6 +4534,48 @@ FRAME 0 means change the face on all frames, and change the default
else
f = check_x_frame (frame);
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend
+ && !UNSPECIFIEDP (value) && !IGNORE_DEFFACE_P (value))
+ {
+ int fontset;
+
+ if (EQ (attr, QCfontset))
+ {
+ Lisp_Object fontset_name = Fquery_fontset (value, Qnil);
+
+ if (NILP (fontset_name))
+ signal_error ("Invalid fontset name", value);
+ LFACE_FONTSET (lface) = value;
+ }
+ else
+ {
+ Lisp_Object font_object;
+
+ if (FONT_OBJECT_P (value))
+ {
+ font_object = value;
+ fontset = FRAME_FONTSET (f);
+ }
+ else
+ {
+ CHECK_STRING (value);
+
+ fontset = fs_query_fontset (value, 0);
+ if (fontset >= 0)
+ value = fontset_ascii (fontset);
+ else
+ fontset = FRAME_FONTSET (f);
+ font_object = font_open_by_name (f, SDATA (value));
+ if (NILP (font_object))
+ signal_error ("Invalid font", value);
+ }
+ set_lface_from_font_and_fontset (f, lface, font_object,
+ fontset, 1);
+ }
+ }
+ else
+#endif /* USE_FONT_BACKEND */
if (!UNSPECIFIEDP (value) && !IGNORE_DEFFACE_P (value))
{
CHECK_STRING (value);
@@ -4311,9 +4585,16 @@ FRAME 0 means change the face on all frames, and change the default
tmp = Fquery_fontset (value, Qnil);
if (!NILP (tmp))
value = tmp;
+ else if (EQ (attr, QCfontset))
+ signal_error ("Invalid fontset name", value);
- if (!set_lface_from_font_name (f, lface, value, 1, 1))
- signal_error ("Invalid font or fontset name", value);
+ if (EQ (attr, QCfont))
+ {
+ if (!set_lface_from_font_name (f, lface, value, 1, 1))
+ signal_error ("Invalid font or fontset name", value);
+ }
+ else
+ LFACE_FONTSET (lface) = value;
}
font_attr_p = 1;
@@ -4365,6 +4646,7 @@ FRAME 0 means change the face on all frames, and change the default
if (!EQ (frame, Qt)
&& NILP (Fget (face, Qface_no_inherit))
&& (EQ (attr, QCfont)
+ || EQ (attr, QCfontset)
|| NILP (Fequal (old_value, value))))
{
++face_change_count;
@@ -4472,7 +4754,7 @@ FRAME 0 means change the face on all frames, and change the default
#ifdef HAVE_WINDOW_SYSTEM
/* Set the `font' frame parameter of FRAME determined from `default'
- face attributes LFACE. If a face or fontset name is explicitely
+ face attributes LFACE. If a font name is explicitely
specfied in LFACE, use it as is. Otherwise, determine a font name
from the other font-related atrributes of LFACE. In that case, if
there's no matching font, signals an error. */
@@ -4490,12 +4772,29 @@ set_font_frame_param (frame, lface)
if (STRINGP (LFACE_FONT (lface)))
font_name = LFACE_FONT (lface);
+#ifdef USE_FONT_BACKEND
+ else if (enable_font_backend)
+ {
+ /* We set FONT_NAME to a font-object. */
+ if (FONT_OBJECT_P (LFACE_FONT (lface)))
+ font_name = LFACE_FONT (lface);
+ else
+ {
+ font_name = font_find_for_lface (f, &AREF (lface, 0), Qnil);
+ if (NILP (font_name))
+ error ("No font matches the specified attribute");
+ font_name = font_open_for_lface (f, &AREF (lface, 0), font_name);
+ if (NILP (font_name))
+ error ("No font matches the specified attribute");
+ }
+ }
+#endif
else
{
/* Choose a font name that reflects LFACE's attributes and has
the registry and encoding pattern specified in the default
fontset (3rd arg: -1) for ASCII characters (4th arg: 0). */
- font = choose_face_font (f, XVECTOR (lface)->contents, -1, 0, 0);
+ font = choose_face_font (f, XVECTOR (lface)->contents, Qnil, NULL);
if (!font)
error ("No font matches the specified attribute");
font_name = build_string (font);
@@ -4878,6 +5177,8 @@ frames). If FRAME is omitted or nil, use the selected frame. */)
value = LFACE_INHERIT (lface);
else if (EQ (keyword, QCfont))
value = LFACE_FONT (lface);
+ else if (EQ (keyword, QCfontset))
+ value = LFACE_FONTSET (lface);
else
signal_error ("Invalid face attribute name", keyword);
@@ -4982,15 +5283,18 @@ Default face attributes override any local face attributes. */)
return fonts with the same size as the font of a face. This is
done in fontset.el. */
-DEFUN ("face-font", Fface_font, Sface_font, 1, 2, 0,
+DEFUN ("face-font", Fface_font, Sface_font, 1, 3, 0,
doc: /* Return the font name of face FACE, or nil if it is unspecified.
+The font name is, by default, for ASCII characters.
If the optional argument FRAME is given, report on face FACE in that frame.
If FRAME is t, report on the defaults for face FACE (for new frames).
The font default for a face is either nil, or a list
of the form (bold), (italic) or (bold italic).
-If FRAME is omitted or nil, use the selected frame. */)
- (face, frame)
- Lisp_Object face, frame;
+If FRAME is omitted or nil, use the selected frame. And, in this case,
+if the optional third argument CHARACTER is given,
+return the font name used for CHARACTER. */)
+ (face, frame, character)
+ Lisp_Object face, frame, character;
{
if (EQ (frame, Qt))
{
@@ -5010,9 +5314,23 @@ If FRAME is omitted or nil, use the selected frame. */)
else
{
struct frame *f = frame_or_selected_frame (frame, 1);
- int face_id = lookup_named_face (f, face, 0, 1);
+ int face_id = lookup_named_face (f, face, 1);
struct face *face = FACE_FROM_ID (f, face_id);
- return face ? build_string (face->font_name) : Qnil;
+
+ if (! face)
+ return Qnil;
+#ifdef HAVE_WINDOW_SYSTEM
+ if (FRAME_WINDOW_P (f) && !NILP (character))
+ {
+ CHECK_CHARACTER (character);
+ face_id = FACE_FOR_CHAR (f, face, XINT (character), -1, Qnil);
+ face = FACE_FROM_ID (f, face_id);
+ return (face->font && face->font_name
+ ? build_string (face->font_name)
+ : Qnil);
+ }
+#endif
+ return build_string (face->font_name);
}
}
@@ -5175,8 +5493,8 @@ lface_hash (v)
/* Return non-zero if LFACE1 and LFACE2 specify the same font (without
considering charsets/registries). They do if they specify the same
- family, point size, weight, width, slant, and fontset. Both LFACE1
- and LFACE2 must be fully-specified. */
+ family, point size, weight, width, slant, font, and fontset. Both
+ LFACE1 and LFACE2 must be fully-specified. */
static INLINE int
lface_same_font_attributes_p (lface1, lface2)
@@ -5194,8 +5512,14 @@ lface_same_font_attributes_p (lface1, lface2)
&& (EQ (lface1[LFACE_FONT_INDEX], lface2[LFACE_FONT_INDEX])
|| (STRINGP (lface1[LFACE_FONT_INDEX])
&& STRINGP (lface2[LFACE_FONT_INDEX])
- && xstricmp (SDATA (lface1[LFACE_FONT_INDEX]),
- SDATA (lface2[LFACE_FONT_INDEX])))));
+ && ! xstricmp (SDATA (lface1[LFACE_FONT_INDEX]),
+ SDATA (lface2[LFACE_FONT_INDEX]))))
+ && (EQ (lface1[LFACE_FONTSET_INDEX], lface2[LFACE_FONTSET_INDEX])
+ || (STRINGP (lface1[LFACE_FONTSET_INDEX])
+ && STRINGP (lface2[LFACE_FONTSET_INDEX])
+ && ! xstricmp (SDATA (lface1[LFACE_FONTSET_INDEX]),
+ SDATA (lface2[LFACE_FONTSET_INDEX]))))
+ );
}
@@ -5222,7 +5546,7 @@ make_realized_face (attr)
/* Free realized face FACE, including its X resources. FACE may
be null. */
-static void
+void
free_realized_face (f, face)
struct frame *f;
struct face *face;
@@ -5237,6 +5561,10 @@ free_realized_face (f, face)
free_face_fontset (f, face);
if (face->gc)
{
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend && face->font_info)
+ font_done_for_face (f, face);
+#endif /* USE_FONT_BACKEND */
x_free_gc (f, face->gc);
face->gc = 0;
}
@@ -5298,6 +5626,10 @@ prepare_face_for_display (f, face)
}
#endif
face->gc = x_create_gc (f, mask, &xgcv);
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend && face->font)
+ font_prepare_for_face (f, face);
+#endif /* USE_FONT_BACKEND */
UNBLOCK_INPUT;
}
#endif /* HAVE_WINDOW_SYSTEM */
@@ -5404,6 +5736,10 @@ clear_face_gcs (c)
struct face *face = c->faces_by_id[i];
if (face && face->gc)
{
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend && face->font_info)
+ font_done_for_face (c->f, face);
+#endif /* USE_FONT_BACKEND */
x_free_gc (c->f, face->gc);
face->gc = 0;
}
@@ -5457,11 +5793,10 @@ free_realized_faces (c)
}
-/* Free all faces realized for multibyte characters on frame F that
- has FONTSET. */
+/* Free all realized faces that are using FONTSET on frame F. */
void
-free_realized_multibyte_face (f, fontset)
+free_realized_faces_for_fontset (f, fontset)
struct frame *f;
int fontset;
{
@@ -5478,7 +5813,6 @@ free_realized_multibyte_face (f, fontset)
{
face = cache->faces_by_id[i];
if (face
- && face != face->ascii_face
&& face->fontset == fontset)
{
uncache_face (cache, face);
@@ -5536,10 +5870,11 @@ free_face_cache (c)
/* Cache realized face FACE in face cache C. HASH is the hash value
- of FACE. If FACE->fontset >= 0, add the new face to the end of the
- collision list of the face hash table of C. This is done because
- otherwise lookup_face would find FACE for every character, even if
- faces with the same attributes but for specific characters exist. */
+ of FACE. If FACE is for ASCII characters (i.e. FACE->ascii_face ==
+ FACE), insert the new face to the beginning of the collision list
+ of the face hash table of C. Otherwise, add the new face to the
+ end of the collision list. This way, lookup_face can quickly find
+ that a requested face is not cached. */
static void
cache_face (c, face, hash)
@@ -5551,7 +5886,7 @@ cache_face (c, face, hash)
face->hash = hash;
- if (face->fontset >= 0)
+ if (face->ascii_face != face)
{
struct face *last = c->buckets[i];
if (last)
@@ -5643,17 +5978,14 @@ uncache_face (c, face)
/* Look up a realized face with face attributes ATTR in the face cache
- of frame F. The face will be used to display character C. Value
- is the ID of the face found. If no suitable face is found, realize
- a new one. In that case, if C is a multibyte character, BASE_FACE
- is a face that has the same attributes. */
+ of frame F. The face will be used to display ASCII characters.
+ Value is the ID of the face found. If no suitable face is found,
+ realize a new one. */
INLINE int
-lookup_face (f, attr, c, base_face)
+lookup_face (f, attr)
struct frame *f;
Lisp_Object *attr;
- int c;
- struct face *base_face;
{
struct face_cache *cache = FRAME_FACE_CACHE (f);
unsigned hash;
@@ -5668,44 +6000,120 @@ lookup_face (f, attr, c, base_face)
i = hash % FACE_CACHE_BUCKETS_SIZE;
for (face = cache->buckets[i]; face; face = face->next)
- if (face->hash == hash
- && (!FRAME_WINDOW_P (f)
- || FACE_SUITABLE_FOR_CHAR_P (face, c))
- && lface_equal_p (face->lface, attr))
- break;
+ {
+ if (face->ascii_face != face)
+ {
+ /* There's no more ASCII face. */
+ face = NULL;
+ break;
+ }
+ if (face->hash == hash
+ && lface_equal_p (face->lface, attr))
+ break;
+ }
/* If not found, realize a new face. */
if (face == NULL)
- face = realize_face (cache, attr, c, base_face, -1);
+ face = realize_face (cache, attr, -1);
#if GLYPH_DEBUG
xassert (face == FACE_FROM_ID (f, face->id));
+#endif /* GLYPH_DEBUG */
-/* When this function is called from face_for_char (in this case, C is
- a multibyte character), a fontset of a face returned by
- realize_face is not yet set, i.e. FACE_SUITABLE_FOR_CHAR_P (FACE,
- C) is not sutisfied. The fontset is set for this face by
- face_for_char later. */
-#if 0
- if (FRAME_WINDOW_P (f))
- xassert (FACE_SUITABLE_FOR_CHAR_P (face, c));
-#endif
+ return face->id;
+}
+
+#ifdef HAVE_WINDOW_SYSTEM
+/* Look up a realized face that has the same attributes as BASE_FACE
+ except for the font in the face cache of frame F. If FONT_ID is
+ not negative, it is an ID number of an already opened font that is
+ used by the face. If FONT_ID is negative, the face has no font.
+ Value is the ID of the face found. If no suitable face is found,
+ realize a new one. */
+
+int
+lookup_non_ascii_face (f, font_id, base_face)
+ struct frame *f;
+ int font_id;
+ struct face *base_face;
+{
+ struct face_cache *cache = FRAME_FACE_CACHE (f);
+ unsigned hash;
+ int i;
+ struct face *face;
+
+ xassert (cache != NULL);
+ base_face = base_face->ascii_face;
+ hash = lface_hash (base_face->lface);
+ i = hash % FACE_CACHE_BUCKETS_SIZE;
+
+ for (face = cache->buckets[i]; face; face = face->next)
+ {
+ if (face->ascii_face == face)
+ continue;
+ if (face->ascii_face == base_face
+ && face->font_info_id == font_id)
+ break;
+ }
+
+ /* If not found, realize a new face. */
+ if (face == NULL)
+ face = realize_non_ascii_face (f, font_id, base_face);
+
+#if GLYPH_DEBUG
+ xassert (face == FACE_FROM_ID (f, face->id));
#endif /* GLYPH_DEBUG */
return face->id;
}
+#ifdef USE_FONT_BACKEND
+int
+face_for_font (f, font, base_face)
+ struct frame *f;
+ struct font *font;
+ struct face *base_face;
+{
+ struct face_cache *cache = FRAME_FACE_CACHE (f);
+ unsigned hash;
+ int i;
+ struct face *face;
+
+ xassert (cache != NULL);
+ base_face = base_face->ascii_face;
+ hash = lface_hash (base_face->lface);
+ i = hash % FACE_CACHE_BUCKETS_SIZE;
+
+ for (face = cache->buckets[i]; face; face = face->next)
+ {
+ if (face->ascii_face == face)
+ continue;
+ if (face->ascii_face == base_face
+ && face->font_info == (struct font_info *) font)
+ return face->id;
+ }
+
+ /* If not found, realize a new face. */
+ face = realize_non_ascii_face (f, -1, base_face);
+ face->font = font->font.font;
+ face->font_info = (struct font_info *) font;
+ face->font_info_id = 0;
+ face->font_name = font->font.full_name;
+ return face->id;
+}
+#endif /* USE_FONT_BACKEND */
+
+#endif /* HAVE_WINDOW_SYSTEM */
/* Return the face id of the realized face for named face SYMBOL on
- frame F suitable for displaying character C. Value is -1 if the
- face couldn't be determined, which might happen if the default face
- isn't realized and cannot be realized. */
+ frame F suitable for displaying ASCII characters. Value is -1 if
+ the face couldn't be determined, which might happen if the default
+ face isn't realized and cannot be realized. */
int
-lookup_named_face (f, symbol, c, signal_p)
+lookup_named_face (f, symbol, signal_p)
struct frame *f;
Lisp_Object symbol;
- int c;
int signal_p;
{
Lisp_Object attrs[LFACE_VECTOR_SIZE];
@@ -5725,7 +6133,7 @@ lookup_named_face (f, symbol, c, signal_p)
bcopy (default_face->lface, attrs, sizeof attrs);
merge_face_vectors (f, symbol_attrs, attrs, 0);
- return lookup_face (f, attrs, c, NULL);
+ return lookup_face (f, attrs);
}
@@ -5742,7 +6150,7 @@ ascii_face_of_lisp_face (f, lface_id)
if (lface_id >= 0 && lface_id < lface_id_to_name_size)
{
Lisp_Object face_name = lface_id_to_name[lface_id];
- face_id = lookup_named_face (f, face_name, 0, 1);
+ face_id = lookup_named_face (f, face_name, 1);
}
else
face_id = -1;
@@ -5790,7 +6198,7 @@ smaller_face (f, face_id, steps)
/* Look up a face for a slightly smaller/larger font. */
pt += delta;
attrs[LFACE_HEIGHT_INDEX] = make_number (pt);
- new_face_id = lookup_face (f, attrs, 0, NULL);
+ new_face_id = lookup_face (f, attrs);
new_face = FACE_FROM_ID (f, new_face_id);
/* If height changes, count that as one step. */
@@ -5833,7 +6241,7 @@ face_with_height (f, face_id, height)
face = FACE_FROM_ID (f, face_id);
bcopy (face->lface, attrs, sizeof attrs);
attrs[LFACE_HEIGHT_INDEX] = make_number (height);
- face_id = lookup_face (f, attrs, 0, NULL);
+ face_id = lookup_face (f, attrs);
#endif /* HAVE_WINDOW_SYSTEM */
return face_id;
@@ -5841,17 +6249,16 @@ face_with_height (f, face_id, height)
/* Return the face id of the realized face for named face SYMBOL on
- frame F suitable for displaying character C, and use attributes of
- the face FACE_ID for attributes that aren't completely specified by
- SYMBOL. This is like lookup_named_face, except that the default
- attributes come from FACE_ID, not from the default face. FACE_ID
- is assumed to be already realized. */
+ frame F suitable for displaying ASCII characters, and use
+ attributes of the face FACE_ID for attributes that aren't
+ completely specified by SYMBOL. This is like lookup_named_face,
+ except that the default attributes come from FACE_ID, not from the
+ default face. FACE_ID is assumed to be already realized. */
int
-lookup_derived_face (f, symbol, c, face_id, signal_p)
+lookup_derived_face (f, symbol, face_id, signal_p)
struct frame *f;
Lisp_Object symbol;
- int c;
int face_id;
int signal_p;
{
@@ -5865,7 +6272,7 @@ lookup_derived_face (f, symbol, c, face_id, signal_p)
get_lface_attributes (f, symbol, symbol_attrs, signal_p);
bcopy (default_face->lface, attrs, sizeof attrs);
merge_face_vectors (f, symbol_attrs, attrs, 0);
- return lookup_face (f, attrs, c, default_face);
+ return lookup_face (f, attrs);
}
DEFUN ("face-attributes-as-vector", Fface_attributes_as_vector,
@@ -5952,6 +6359,7 @@ x_supports_face_attributes_p (f, attrs, def_face)
|| !UNSPECIFIEDP (attrs[LFACE_SWIDTH_INDEX])
|| !UNSPECIFIEDP (attrs[LFACE_AVGWIDTH_INDEX]))
{
+ int face_id;
struct face *face;
Lisp_Object merged_attrs[LFACE_VECTOR_SIZE];
@@ -5959,7 +6367,8 @@ x_supports_face_attributes_p (f, attrs, def_face)
merge_face_vectors (f, attrs, merged_attrs, 0);
- face = FACE_FROM_ID (f, lookup_face (f, merged_attrs, 0, 0));
+ face_id = lookup_face (f, merged_attrs);
+ face = FACE_FROM_ID (f, face_id);
if (! face)
error ("Cannot make face");
@@ -6239,7 +6648,7 @@ face for italic. */)
Font selection
***********************************************************************/
-DEFUN ("internal-set-font-selection-order",
+ DEFUN ("internal-set-font-selection-order",
Finternal_set_font_selection_order,
Sinternal_set_font_selection_order, 1, 1, 0,
doc: /* Set font selection order for face font selection to ORDER.
@@ -6295,6 +6704,10 @@ Value is ORDER. */)
free_all_realized_faces (Qnil);
}
+#ifdef USE_FONT_BACKEND
+ font_update_sort_order (font_sort_order);
+#endif /* USE_FONT_BACKEND */
+
return Qnil;
}
@@ -6485,6 +6898,12 @@ build_scalable_font_name (f, font, specified_pt)
double resy = FRAME_X_DISPLAY_INFO (f)->resy;
double pt;
+ if (font->numeric[XLFD_PIXEL_SIZE] != 0
+ || font->numeric[XLFD_POINT_SIZE] != 0)
+ /* This is a scalable font but is requested for a specific size.
+ We should not change that size. */
+ return build_font_name (font);
+
/* If scalable font is for a specific resolution, compute
the point size we must specify from the resolution of
the display and the specified resolution of the font. */
@@ -6757,78 +7176,62 @@ try_alternative_families (f, family, registry, fonts)
/* Get a list of matching fonts on frame F.
- FAMILY, if a string, specifies a font family derived from the fontset.
- It is only used if the face does not specify any family in ATTRS or
- if we cannot find any font of the face's family.
+ PATTERN, if a string, specifies a font name pattern to match while
+ ignoring FAMILY and REGISTRY.
- REGISTRY, if a string, specifies a font registry and encoding to
- match. A value of nil means include fonts of any registry and
- encoding.
+ FAMILY, if a list, specifies a list of font families to try.
- If PREFER_FACE_FAMILY is nonzero, perfer face's family to FAMILY.
- Otherwise, prefer FAMILY.
+ REGISTRY, if a list, specifies a list of font registries and
+ encodinging to try.
Return in *FONTS a pointer to a vector of font_name structures for
the fonts matched. Value is the number of fonts found. */
static int
-try_font_list (f, attrs, family, registry, fonts, prefer_face_family)
+try_font_list (f, pattern, family, registry, fonts)
struct frame *f;
- Lisp_Object *attrs;
- Lisp_Object family, registry;
+ Lisp_Object pattern, family, registry;
struct font_name **fonts;
- int prefer_face_family;
{
int nfonts = 0;
- Lisp_Object face_family = attrs[LFACE_FAMILY_INDEX];
- Lisp_Object try_family;
-
- try_family = (prefer_face_family || NILP (family)) ? face_family : family;
-
- if (STRINGP (try_family))
- nfonts = try_alternative_families (f, try_family, registry, fonts);
-#ifdef MAC_OS
- if (nfonts == 0 && STRINGP (try_family) && STRINGP (registry))
- {
- if (xstricmp (SDATA (registry), "mac-roman") == 0)
- /* When realizing the default face and a font spec does not
- matched exactly, Emacs looks for ones with the same registry
- as the default font. On the Mac, this is mac-roman, which
- does not work if the family is -etl-fixed, e.g. The
- following widens the choices and fixes that problem. */
- nfonts = try_alternative_families (f, try_family, Qnil, fonts);
- else if (SBYTES (try_family) > 0
- && SREF (try_family, SBYTES (try_family) - 1) != '*')
- /* Some Central European/Cyrillic font family names have the
- Roman counterpart name as their prefix. */
- nfonts = try_alternative_families (f, concat2 (try_family,
- build_string ("*")),
- registry, fonts);
+ if (STRINGP (pattern))
+ {
+ nfonts = font_list (f, pattern, Qnil, Qnil, fonts);
+ if (nfonts == 0 && ! EQ (Vscalable_fonts_allowed, Qt))
+ {
+ int count = SPECPDL_INDEX ();
+ specbind (Qscalable_fonts_allowed, Qt);
+ nfonts = font_list (f, pattern, Qnil, Qnil, fonts);
+ unbind_to (count, Qnil);
+ }
}
-#endif
+ else
+ {
+ Lisp_Object tail;
- if (EQ (try_family, family))
- family = face_family;
+ if (NILP (family))
+ nfonts = font_list (f, Qnil, Qnil, registry, fonts);
+ else
+ for (tail = family; ! nfonts && CONSP (tail); tail = XCDR (tail))
+ nfonts = try_alternative_families (f, XCAR (tail), registry, fonts);
- if (nfonts == 0 && STRINGP (family))
- nfonts = try_alternative_families (f, family, registry, fonts);
+ /* Try font family of the default face or "fixed". */
+ if (nfonts == 0 && !NILP (family))
+ {
+ struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
+ if (default_face)
+ family = default_face->lface[LFACE_FAMILY_INDEX];
+ else
+ family = build_string ("fixed");
+ nfonts = try_alternative_families (f, family, registry, fonts);
+ }
- /* Try font family of the default face or "fixed". */
- if (nfonts == 0)
- {
- struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
- if (default_face)
- family = default_face->lface[LFACE_FAMILY_INDEX];
- else
- family = build_string ("fixed");
- nfonts = font_list (f, Qnil, family, registry, fonts);
+ /* Try any family with the given registry. */
+ if (nfonts == 0 && !NILP (family))
+ nfonts = try_alternative_families (f, Qnil, registry, fonts);
}
- /* Try any family with the given registry. */
- if (nfonts == 0)
- nfonts = try_alternative_families (f, Qnil, registry, fonts);
-
return nfonts;
}
@@ -6843,63 +7246,108 @@ face_fontset (attrs)
{
Lisp_Object name;
- name = attrs[LFACE_FONT_INDEX];
+ name = attrs[LFACE_FONTSET_INDEX];
if (!STRINGP (name))
return -1;
return fs_query_fontset (name, 0);
}
-/* Choose a name of font to use on frame F to display character C with
+/* Choose a name of font to use on frame F to display characters with
Lisp face attributes specified by ATTRS. The font name is
- determined by the font-related attributes in ATTRS and the name
- pattern for C in FONTSET. Value is the font name which is
- allocated from the heap and must be freed by the caller, or NULL if
- we can get no information about the font name of C. It is assured
- that we always get some information for a single byte
- character.
+ determined by the font-related attributes in ATTRS and FONT-SPEC
+ (if specified).
- If NEEDS_OVERSTRIKE is non-zero, a boolean is returned in it to
- indicate whether the resulting font should be drawn using overstrike
- to simulate bold-face. */
+ When we are choosing a font for ASCII characters, FONT-SPEC is
+ always nil. Otherwise FONT-SPEC is a list
+ [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ]
+ or a string specifying a font name pattern.
-static char *
-choose_face_font (f, attrs, fontset, c, needs_overstrike)
+ If NEEDS_OVERSTRIKE is not NULL, a boolean is returned in it to
+ indicate whether the resulting font should be drawn using
+ overstrike to simulate bold-face.
+
+ Value is the font name which is allocated from the heap and must be
+ freed by the caller. */
+
+char *
+choose_face_font (f, attrs, font_spec, needs_overstrike)
struct frame *f;
Lisp_Object *attrs;
- int fontset, c;
+ Lisp_Object font_spec;
int *needs_overstrike;
{
- Lisp_Object pattern;
+ Lisp_Object pattern, family, adstyle, registry;
char *font_name = NULL;
struct font_name *fonts;
- int nfonts, width_ratio;
+ int nfonts;
if (needs_overstrike)
*needs_overstrike = 0;
- /* Get (foundry and) family name and registry (and encoding) name of
- a font for C. */
- pattern = fontset_font_pattern (f, fontset, c);
- if (NILP (pattern))
+ /* If we are choosing an ASCII font and a font name is explicitly
+ specified in ATTRS, return it. */
+ if (NILP (font_spec) && STRINGP (attrs[LFACE_FONT_INDEX]))
+ return xstrdup (SDATA (attrs[LFACE_FONT_INDEX]));
+
+ if (NILP (attrs[LFACE_FAMILY_INDEX]))
+ family = Qnil;
+ else
+ family = Fcons (attrs[LFACE_FAMILY_INDEX], Qnil);
+
+ /* Decide FAMILY, ADSTYLE, and REGISTRY from FONT_SPEC. But,
+ ADSTYLE is not used in the font selector for the moment. */
+ if (VECTORP (font_spec))
{
- xassert (!SINGLE_BYTE_CHAR_P (c));
- return NULL;
+ pattern = Qnil;
+ if (STRINGP (AREF (font_spec, FONT_SPEC_FAMILY_INDEX)))
+ family = Fcons (AREF (font_spec, FONT_SPEC_FAMILY_INDEX), family);
+ adstyle = AREF (font_spec, FONT_SPEC_ADSTYLE_INDEX);
+ registry = Fcons (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX), Qnil);
+ }
+ else if (STRINGP (font_spec))
+ {
+ pattern = font_spec;
+ family = Qnil;
+ adstyle = Qnil;
+ registry = Qnil;
+ }
+ else
+ {
+ /* We are choosing an ASCII font. By default, use the registry
+ name "iso8859-1". But, if the registry name of the ASCII
+ font specified in the fontset of ATTRS is not "iso8859-1"
+ (e.g "iso10646-1"), use also that name with higher
+ priority. */
+ int fontset = face_fontset (attrs);
+ Lisp_Object ascii;
+ int len;
+ struct font_name font;
+
+ pattern = Qnil;
+ adstyle = Qnil;
+ registry = Fcons (build_string ("iso8859-1"), Qnil);
+
+ ascii = fontset_ascii (fontset);
+ len = SBYTES (ascii);
+ if (len < 9
+ || strcmp (SDATA (ascii) + len - 9, "iso8859-1"))
+ {
+ font.name = LSTRDUPA (ascii);
+ /* Check if the name is in XLFD. */
+ if (split_font_name (f, &font, 0))
+ {
+ font.fields[XLFD_ENCODING][-1] = '-';
+ registry = Fcons (build_string (font.fields[XLFD_REGISTRY]),
+ registry);
+ }
+ }
}
-
- /* If what we got is a name pattern, return it. */
- if (STRINGP (pattern))
- return xstrdup (SDATA (pattern));
/* Get a list of fonts matching that pattern and choose the
best match for the specified face attributes from it. */
- nfonts = try_font_list (f, attrs, XCAR (pattern), XCDR (pattern), &fonts,
- (SINGLE_BYTE_CHAR_P (c)
- || CHAR_CHARSET (c) == charset_latin_iso8859_1));
- width_ratio = (SINGLE_BYTE_CHAR_P (c)
- ? 1
- : CHARSET_WIDTH (CHAR_CHARSET (c)));
- font_name = best_matching_font (f, attrs, fonts, nfonts, width_ratio,
+ nfonts = try_font_list (f, pattern, family, registry, &fonts);
+ font_name = best_matching_font (f, attrs, fonts, nfonts, NILP (font_spec),
needs_overstrike);
return font_name;
}
@@ -6987,12 +7435,27 @@ realize_default_face (f)
#ifdef HAVE_WINDOW_SYSTEM
if (FRAME_WINDOW_P (f))
{
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ frame_font = font_find_object (FRAME_FONT_OBJECT (f));
+ xassert (FONT_OBJECT_P (frame_font));
+ set_lface_from_font_and_fontset (f, lface, frame_font,
+ FRAME_FONTSET (f),
+ f->default_face_done_p);
+ }
+ else
+ {
+#endif /* USE_FONT_BACKEND */
/* Set frame_font to the value of the `font' frame parameter. */
frame_font = Fassq (Qfont, f->param_alist);
xassert (CONSP (frame_font) && STRINGP (XCDR (frame_font)));
frame_font = XCDR (frame_font);
set_lface_from_font_name (f, lface, frame_font,
f->default_face_done_p, 1);
+#ifdef USE_FONT_BACKEND
+ }
+#endif /* USE_FONT_BACKEND */
f->default_face_done_p = 1;
}
#endif /* HAVE_WINDOW_SYSTEM */
@@ -7062,7 +7525,7 @@ realize_default_face (f)
xassert (lface_fully_specified_p (XVECTOR (lface)->contents));
check_lface (lface);
bcopy (XVECTOR (lface)->contents, attrs, sizeof attrs);
- face = realize_face (c, attrs, 0, NULL, DEFAULT_FACE_ID);
+ face = realize_face (c, attrs, DEFAULT_FACE_ID);
#ifdef HAVE_WINDOW_SYSTEM
#ifdef HAVE_X_WINDOWS
@@ -7111,23 +7574,19 @@ realize_named_face (f, symbol, id)
merge_face_vectors (f, symbol_attrs, attrs, 0);
/* Realize the face. */
- new_face = realize_face (c, attrs, 0, NULL, id);
+ new_face = realize_face (c, attrs, id);
}
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. If C is a multibyte character,
- BASE_FACE is a face that has the same attributes. Otherwise,
- BASE_FACE is ignored. If FORMER_FACE_ID is non-negative, it is an
- ID of face to remove before caching the new face. Value is a
- pointer to the newly created realized face. */
+ cache CACHE for ASCII characters. If FORMER_FACE_ID is
+ non-negative, it is an ID of face to remove before caching the new
+ face. Value is a pointer to the newly created realized face. */
static struct face *
-realize_face (cache, attrs, c, base_face, former_face_id)
+realize_face (cache, attrs, former_face_id)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
- struct face *base_face;
int former_face_id;
{
struct face *face;
@@ -7145,37 +7604,78 @@ realize_face (cache, attrs, c, base_face, former_face_id)
}
if (FRAME_WINDOW_P (cache->f))
- face = realize_x_face (cache, attrs, c, base_face);
+ face = realize_x_face (cache, attrs);
else if (FRAME_TERMCAP_P (cache->f) || FRAME_MSDOS_P (cache->f))
- face = realize_tty_face (cache, attrs, c);
+ face = realize_tty_face (cache, attrs);
else
abort ();
/* Insert the new face. */
cache_face (cache, face, lface_hash (attrs));
+ return face;
+}
+
+
#ifdef HAVE_WINDOW_SYSTEM
- if (FRAME_WINDOW_P (cache->f) && face->font == NULL)
- load_face_font (cache->f, face, c);
-#endif /* HAVE_WINDOW_SYSTEM */
+/* Realize the fully-specified face that has the same attributes as
+ BASE_FACE except for the font on frame F. If FONT_ID is not
+ negative, it is an ID number of an already opened font that should
+ be used by the face. If FONT_ID is negative, the face has no font,
+ i.e., characters are displayed by empty boxes. */
+
+static struct face *
+realize_non_ascii_face (f, font_id, base_face)
+ struct frame *f;
+ int font_id;
+ struct face *base_face;
+{
+ struct face_cache *cache = FRAME_FACE_CACHE (f);
+ struct face *face;
+ struct font_info *font_info;
+
+ face = (struct face *) xmalloc (sizeof *face);
+ *face = *base_face;
+ face->gc = 0;
+#ifdef USE_FONT_BACKEND
+ face->extra = NULL;
+#endif
+
+ /* Don't try to free the colors copied bitwise from BASE_FACE. */
+ face->colors_copied_bitwise_p = 1;
+
+ face->font_info_id = font_id;
+ if (font_id >= 0)
+ {
+ font_info = FONT_INFO_FROM_ID (f, font_id);
+ face->font = font_info->font;
+ face->font_name = font_info->full_name;
+ }
+ else
+ {
+ face->font = NULL;
+ face->font_name = NULL;
+ }
+
+ face->gc = 0;
+
+ cache_face (cache, face, face->hash);
+
return face;
}
+#endif /* HAVE_WINDOW_SYSTEM */
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. Do it for X frame CACHE->f. If C is
- a multibyte character, BASE_FACE is a face that has the same
- attributes. Otherwise, BASE_FACE is ignored. If the new face
- doesn't share font with the default face, a fontname is allocated
- from the heap and set in `font_name' of the new face, but it is not
- yet loaded here. Value is a pointer to the newly created realized
- face. */
+ cache CACHE for ASCII characters. Do it for X frame CACHE->f. If
+ the new face doesn't share font with the default face, a fontname
+ is allocated from the heap and set in `font_name' of the new face,
+ but it is not yet loaded here. Value is a pointer to the newly
+ created realized face. */
static struct face *
-realize_x_face (cache, attrs, c, base_face)
+realize_x_face (cache, attrs)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
- struct face *base_face;
{
struct face *face = NULL;
#ifdef HAVE_WINDOW_SYSTEM
@@ -7184,50 +7684,27 @@ realize_x_face (cache, attrs, c, base_face)
Lisp_Object stipple, overline, strike_through, box;
xassert (FRAME_WINDOW_P (cache->f));
- xassert (SINGLE_BYTE_CHAR_P (c)
- || base_face);
/* Allocate a new realized face. */
face = make_realized_face (attrs);
+ face->ascii_face = face;
f = cache->f;
- /* If C is a multibyte character, we share all face attirbutes with
- BASE_FACE including the realized fontset. But, we must load a
- different font. */
- if (!SINGLE_BYTE_CHAR_P (c))
- {
- bcopy (base_face, face, sizeof *face);
- face->gc = 0;
-
- /* Don't try to free the colors copied bitwise from BASE_FACE. */
- face->colors_copied_bitwise_p = 1;
-
- /* to force realize_face to load font */
- face->font = NULL;
- return face;
- }
-
- /* Now we are realizing a face for ASCII (and unibyte) characters. */
-
/* Determine the font to use. Most of the time, the font will be
the same as the font of the default face, so try that first. */
default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
if (default_face
- && FACE_SUITABLE_FOR_CHAR_P (default_face, c)
&& lface_same_font_attributes_p (default_face->lface, attrs))
{
face->font = default_face->font;
- face->fontset = default_face->fontset;
face->font_info_id = default_face->font_info_id;
+#ifdef USE_FONT_BACKEND
+ face->font_info = default_face->font_info;
+#endif /* USE_FONT_BACKEND */
face->font_name = default_face->font_name;
- face->ascii_face = face;
-
- /* But, as we can't share the fontset, make a new realized
- fontset that has the same base fontset as of the default
- face. */
face->fontset
- = make_fontset_for_ascii_face (f, default_face->fontset);
+ = make_fontset_for_ascii_face (f, default_face->fontset, face);
}
else
{
@@ -7239,10 +7716,24 @@ realize_x_face (cache, attrs, c, base_face)
are constructed from ATTRS. */
int fontset = face_fontset (attrs);
- if ((fontset == -1) && default_face)
+ /* If we are realizing the default face, ATTRS should specify a
+ fontset. In other words, if FONTSET is -1, we are not
+ realizing the default face, thus the default face should have
+ already been realized. */
+ if (fontset == -1)
fontset = default_face->fontset;
- face->fontset = make_fontset_for_ascii_face (f, fontset);
- face->font = NULL; /* to force realize_face to load font */
+ if (fontset == -1)
+ abort ();
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ font_load_for_face (f, face);
+ else
+#endif /* USE_FONT_BACKEND */
+ load_face_font (f, face);
+ if (face->font)
+ face->fontset = make_fontset_for_ascii_face (f, fontset, face);
+ else
+ face->fontset = -1;
}
/* Load colors, and set remaining attributes. */
@@ -7373,9 +7864,8 @@ realize_x_face (cache, attrs, c, base_face)
stipple = attrs[LFACE_STIPPLE_INDEX];
if (!NILP (stipple))
face->stipple = load_pixmap (f, stipple, &face->pixmap_w, &face->pixmap_h);
-
- xassert (FACE_SUITABLE_FOR_CHAR_P (face, c));
#endif /* HAVE_WINDOW_SYSTEM */
+
return face;
}
@@ -7467,14 +7957,13 @@ map_tty_color (f, face, idx, defaulted)
/* Realize the fully-specified face with attributes ATTRS in face
- cache CACHE for character C. Do it for TTY frame CACHE->f. Value is a
- pointer to the newly created realized face. */
+ cache CACHE for ASCII characters. Do it for TTY frame CACHE->f.
+ Value is a pointer to the newly created realized face. */
static struct face *
-realize_tty_face (cache, attrs, c)
+realize_tty_face (cache, attrs)
struct face_cache *cache;
Lisp_Object *attrs;
- int c;
{
struct face *face;
int weight, slant;
@@ -7567,7 +8056,7 @@ compute_char_face (f, ch, prop)
if (NILP (prop))
{
struct face *face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
- face_id = FACE_FOR_CHAR (f, face, ch);
+ face_id = FACE_FOR_CHAR (f, face, ch, -1, Qnil);
}
else
{
@@ -7575,7 +8064,7 @@ compute_char_face (f, ch, prop)
struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID);
bcopy (default_face->lface, attrs, sizeof attrs);
merge_face_ref (f, prop, attrs, 1, 0);
- face_id = lookup_face (f, attrs, ch, NULL);
+ face_id = lookup_face (f, attrs);
}
return face_id;
@@ -7692,7 +8181,7 @@ face_at_buffer_position (w, pos, region_beg, region_end,
/* Look up a realized face with the given face attributes,
or realize a new one for ASCII characters. */
- return lookup_face (f, attrs, 0, NULL);
+ return lookup_face (f, attrs);
}
@@ -7788,7 +8277,7 @@ face_at_string_position (w, string, pos, bufpos, region_beg,
/* Look up a realized face with the given face attributes,
or realize a new one for ASCII characters. */
- return lookup_face (f, attrs, 0, NULL);
+ return lookup_face (f, attrs);
}
@@ -7825,7 +8314,7 @@ merge_faces (f, face_name, face_id, base_face_id)
if (face_id < 0 || face_id >= lface_id_to_name_size)
return base_face_id;
face_name = lface_id_to_name[face_id];
- face_id = lookup_derived_face (f, face_name, 0, base_face_id, 1);
+ face_id = lookup_derived_face (f, face_name, base_face_id, 1);
if (face_id >= 0)
return face_id;
return base_face_id;
@@ -7852,7 +8341,7 @@ merge_faces (f, face_name, face_id, base_face_id)
/* Look up a realized face with the given face attributes,
or realize a new one for ASCII characters. */
- return lookup_face (f, attrs, 0, NULL);
+ return lookup_face (f, attrs);
}
@@ -7890,7 +8379,6 @@ dump_realized_face (face)
face->underline_p,
SDATA (Fsymbol_name (face->lface[LFACE_UNDERLINE_INDEX])));
fprintf (stderr, "hash: %d\n", face->hash);
- fprintf (stderr, "charset: %d\n", face->charset);
}
@@ -7983,6 +8471,8 @@ syms_of_xfaces ()
staticpro (&QCwidth);
QCfont = intern (":font");
staticpro (&QCfont);
+ QCfontset = intern (":fontset");
+ staticpro (&QCfontset);
QCbold = intern (":bold");
staticpro (&QCbold);
QCitalic = intern (":italic");
diff --git a/src/xfns.c b/src/xfns.c
index 47916fccb71..85296bc6c35 100644
--- a/src/xfns.c
+++ b/src/xfns.c
@@ -41,6 +41,7 @@ Boston, MA 02110-1301, USA. */
#include "keyboard.h"
#include "blockinput.h"
#include <epaths.h>
+#include "character.h"
#include "charset.h"
#include "coding.h"
#include "fontset.h"
@@ -48,6 +49,10 @@ Boston, MA 02110-1301, USA. */
#include "termhooks.h"
#include "atimer.h"
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+
#ifdef HAVE_X_WINDOWS
#include <ctype.h>
@@ -536,6 +541,8 @@ x_top_window_to_frame (dpyinfo, wdesc)
+static void x_default_font_parameter P_ ((struct frame *, Lisp_Object));
+
static Lisp_Object unwind_create_frame P_ ((Lisp_Object));
static Lisp_Object unwind_create_tip_frame P_ ((Lisp_Object));
@@ -1535,51 +1542,30 @@ x_encode_text (string, coding_system, selectionp, text_bytes, stringp, freep)
int selectionp;
int *freep;
{
- unsigned char *str = SDATA (string);
- int chars = SCHARS (string);
- int bytes = SBYTES (string);
- int charset_info;
- int bufsize;
- unsigned char *buf;
+ int result = string_xstring_p (string);
struct coding_system coding;
- extern Lisp_Object Qcompound_text_with_extensions;
- charset_info = find_charset_in_text (str, chars, bytes, NULL, Qnil);
- if (charset_info == 0)
+ if (result == 0)
{
/* No multibyte character in OBJ. We need not encode it. */
- *text_bytes = bytes;
+ *text_bytes = SBYTES (string);
*stringp = 1;
*freep = 0;
- return str;
+ return SDATA (string);
}
setup_coding_system (coding_system, &coding);
- if (selectionp
- && SYMBOLP (coding.pre_write_conversion)
- && !NILP (Ffboundp (coding.pre_write_conversion)))
- {
- string = run_pre_post_conversion_on_str (string, &coding, 1);
- str = SDATA (string);
- chars = SCHARS (string);
- bytes = SBYTES (string);
- }
- coding.src_multibyte = 1;
- coding.dst_multibyte = 0;
- coding.mode |= CODING_MODE_LAST_BLOCK;
- if (coding.type == coding_type_iso2022)
- coding.flags |= CODING_FLAG_ISO_SAFE;
+ coding.mode |= (CODING_MODE_SAFE_ENCODING | CODING_MODE_LAST_BLOCK);
/* We suppress producing escape sequences for composition. */
- coding.composing = COMPOSITION_DISABLED;
- bufsize = encoding_buffer_size (&coding, bytes);
- buf = (unsigned char *) xmalloc (bufsize);
- encode_coding (&coding, str, buf, bytes, bufsize);
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
+ coding.dst_bytes = SCHARS (string) * 2;
+ coding.destination = (unsigned char *) xmalloc (coding.dst_bytes);
+ encode_coding_object (&coding, string, 0, 0,
+ SCHARS (string), SBYTES (string), Qnil);
*text_bytes = coding.produced;
- *stringp = (charset_info == 1
- || (!EQ (coding_system, Qcompound_text)
- && !EQ (coding_system, Qcompound_text_with_extensions)));
+ *stringp = (result == 1 || !EQ (coding_system, Qcompound_text));
*freep = 1;
- return buf;
+ return coding.destination;
}
@@ -2976,6 +2962,44 @@ unwind_create_frame (frame)
return Qnil;
}
+#ifdef USE_FONT_BACKEND
+static void
+x_default_font_parameter (f, parms)
+ struct frame *f;
+ Lisp_Object parms;
+{
+ struct x_display_info *dpyinfo = FRAME_X_DISPLAY_INFO (f);
+ Lisp_Object font = x_get_arg (dpyinfo, parms, Qfont, "font", "Font",
+ RES_TYPE_STRING);
+
+ if (! STRINGP (font))
+ {
+ char *names[]
+ = { "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1",
+ "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ /* This was formerly the first thing tried, but it finds
+ too many fonts and takes too long. */
+ "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1",
+ /* If those didn't work, look for something which will
+ at least work. */
+ "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1",
+ "fixed",
+ NULL };
+ int i;
+
+ for (i = 0; names[i]; i++)
+ {
+ font = font_open_by_name (f, names[i]);
+ if (! NILP (font))
+ break;
+ }
+ if (NILP (font))
+ error ("No suitable font was found");
+ }
+ x_default_parameter (f, parms, Qfont, font, "font", "Font", RES_TYPE_STRING);
+}
+#endif /* USE_FONT_BACKEND */
DEFUN ("x-create-frame", Fx_create_frame, Sx_create_frame,
1, 1, 0,
@@ -3153,43 +3177,72 @@ This function is an internal primitive--use `make-frame' instead. */)
specbind (Qx_resource_name, name);
}
+ f->resx = dpyinfo->resx;
+ f->resy = dpyinfo->resy;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ /* Perhaps, we must allow frame parameter, say `font-backend',
+ to specify which font backends to use. */
+#ifdef HAVE_FREETYPE
+#ifdef HAVE_XFT
+ register_font_driver (&xftfont_driver, f);
+#else /* not HAVE_XFT */
+ register_font_driver (&ftxfont_driver, f);
+#endif /* not HAVE_XFT */
+#endif /* HAVE_FREETYPE */
+ register_font_driver (&xfont_driver, f);
+ }
+#endif /* USE_FONT_BACKEND */
+
/* Extract the window parameters from the supplied values
that are needed to determine window geometry. */
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ x_default_font_parameter (f, parms);
+else
+#endif /* USE_FONT_BACKEND */
{
Lisp_Object font;
font = x_get_arg (dpyinfo, parms, Qfont, "font", "Font", RES_TYPE_STRING);
- BLOCK_INPUT;
- /* First, try whatever font the caller has specified. */
- if (STRINGP (font))
+ /* If the caller has specified no font, try out fonts which we
+ hope have bold and italic variations. */
+ if (!STRINGP (font))
{
- tem = Fquery_fontset (font, Qnil);
- if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
- else
- font = x_new_font (f, SDATA (font));
+ char *names[]
+ = { "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1",
+ "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1",
+ /* This was formerly the first thing tried, but it finds
+ too many fonts and takes too long. */
+ "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1",
+ /* If those didn't work, look for something which will
+ at least work. */
+ "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1",
+ NULL };
+ int i;
+
+ BLOCK_INPUT;
+ for (i = 0; names[i]; i++)
+ {
+ Lisp_Object list;
+
+ list = x_list_fonts (f, build_string (names[i]), 0, 1);
+ if (CONSP (list))
+ {
+ font = XCAR (list);
+ break;
+ }
+ }
+ UNBLOCK_INPUT;
+ if (! STRINGP (font))
+ font = build_string ("fixed");
}
-
- /* Try out a font which we hope has bold and italic variations. */
- if (!STRINGP (font))
- font = x_new_font (f, "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1");
- if (!STRINGP (font))
- font = x_new_font (f, "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1");
- if (! STRINGP (font))
- font = x_new_font (f, "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1");
- if (! STRINGP (font))
- /* This was formerly the first thing tried, but it finds too many fonts
- and takes too long. */
- font = x_new_font (f, "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1");
- /* If those didn't work, look for something which will at least work. */
- if (! STRINGP (font))
- font = x_new_font (f, "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1");
- UNBLOCK_INPUT;
- if (! STRINGP (font))
- font = build_string ("fixed");
-
- x_set_frame_parameters (f, Fcons (Fcons (Qfont, font), Qnil));
+ x_default_parameter (f, parms, Qfont, font,
+ "font", "Font", RES_TYPE_STRING);
}
#ifdef USE_LUCID
@@ -4734,8 +4787,32 @@ x_create_tip_frame (dpyinfo, parms, text)
specbind (Qx_resource_name, name);
}
+ f->resx = dpyinfo->resx;
+ f->resy = dpyinfo->resy;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ /* Perhaps, we must allow frame parameter, say `font-backend',
+ to specify which font backends to use. */
+#ifdef HAVE_FREETYPE
+#ifdef HAVE_XFT
+ register_font_driver (&xftfont_driver, f);
+#else /* not HAVE_XFT */
+ register_font_driver (&ftxfont_driver, f);
+#endif /* not HAVE_XFT */
+#endif /* HAVE_FREETYPE */
+ register_font_driver (&xfont_driver, f);
+ }
+#endif /* USE_FONT_BACKEND */
+
/* Extract the window parameters from the supplied values that are
needed to determine window geometry. */
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ x_default_font_parameter (f, parms);
+else
+#endif /* USE_FONT_BACKEND */
{
Lisp_Object font;
@@ -4747,7 +4824,7 @@ x_create_tip_frame (dpyinfo, parms, text)
{
tem = Fquery_fontset (font, Qnil);
if (STRINGP (tem))
- font = x_new_fontset (f, SDATA (tem));
+ font = x_new_fontset (f, tem);
else
font = x_new_font (f, SDATA (font));
}
@@ -4770,8 +4847,7 @@ x_create_tip_frame (dpyinfo, parms, text)
if (! STRINGP (font))
font = build_string ("fixed");
- x_default_parameter (f, parms, Qfont, font,
- "font", "Font", RES_TYPE_STRING);
+ x_set_frame_parameters (f, Fcons (Fcons (Qfont, font), Qnil));
}
x_default_parameter (f, parms, Qborder_width, make_number (2),
@@ -5890,6 +5966,7 @@ the tool bar buttons. */);
find_ccl_program_func = x_find_ccl_program;
query_font_func = x_query_font;
set_frame_fontset_func = x_set_font;
+ get_font_repertory_func = x_get_font_repertory;
check_window_system_func = check_x;
hourglass_atimer = NULL;
diff --git a/src/xfont.c b/src/xfont.c
new file mode 100644
index 00000000000..ac60b96bb5f
--- /dev/null
+++ b/src/xfont.c
@@ -0,0 +1,851 @@
+/* xfont.c -- X core font driver.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdio.h>
+#include <X11/Xlib.h>
+
+#include "lisp.h"
+#include "dispextern.h"
+#include "xterm.h"
+#include "frame.h"
+#include "blockinput.h"
+#include "character.h"
+#include "charset.h"
+#include "fontset.h"
+#include "font.h"
+
+
+/* X core font driver. */
+
+Lisp_Object Qx;
+
+/* Alist of font registry symbol and the corresponding charsets
+ information. The information is retrieved from
+ Vfont_encoding_alist on demand.
+
+ Eash element has the form:
+ (REGISTRY . (ENCODING-CHARSET-ID . REPERTORY-CHARSET-ID))
+ or
+ (REGISTRY . nil)
+
+ In the former form, ENCODING-CHARSET-ID is an ID of a charset that
+ encodes a character code to a glyph code of a font, and
+ REPERTORY-CHARSET-ID is an ID of a charset that tells if a
+ character is supported by a font.
+
+ The latter form means that the information for REGISTRY couldn't be
+ retrieved. */
+static Lisp_Object x_font_charset_alist;
+
+/* Prototypes of support functions. */
+extern void x_clear_errors P_ ((Display *));
+
+static char *xfont_query_font P_ ((Display *, char *, Lisp_Object));
+static XCharStruct *xfont_get_pcm P_ ((XFontStruct *, XChar2b *));
+static int xfont_registry_charsets P_ ((Lisp_Object, struct charset **,
+ struct charset **));
+
+static char *
+xfont_query_font (display, name, spec)
+ Display *display;
+ char *name;
+ Lisp_Object spec;
+{
+ XFontStruct *font;
+
+ BLOCK_INPUT;
+ x_catch_errors (display);
+ font = XLoadQueryFont (display, name);
+ name = NULL;
+ if (x_had_errors_p (display))
+ {
+ /* This error is perhaps due to insufficient memory on X
+ server. Let's just ignore it. */
+ x_clear_errors (display);
+ }
+ else if (font)
+ {
+ unsigned long value;
+
+ if (XGetFontProperty (font, XA_FONT, &value))
+ {
+ char *n = (char *) XGetAtomName (display, (Atom) value);
+
+ if (font_parse_xlfd (n, spec) >= 0)
+ name = n;
+ else
+ XFree (n);
+ }
+ XFreeFont (display, font);
+ }
+ x_uncatch_errors ();
+ UNBLOCK_INPUT;
+
+ return name;
+}
+
+
+/* Get metrics of character CHAR2B in XFONT. Value is null if CHAR2B
+ is not contained in the font. */
+
+static XCharStruct *
+xfont_get_pcm (xfont, char2b)
+ XFontStruct *xfont;
+ XChar2b *char2b;
+{
+ /* The result metric information. */
+ XCharStruct *pcm = NULL;
+
+ xassert (xfont && char2b);
+
+ if (xfont->per_char != NULL)
+ {
+ if (xfont->min_byte1 == 0 && xfont->max_byte1 == 0)
+ {
+ /* min_char_or_byte2 specifies the linear character index
+ corresponding to the first element of the per_char array,
+ max_char_or_byte2 is the index of the last character. A
+ character with non-zero CHAR2B->byte1 is not in the font.
+ A character with byte2 less than min_char_or_byte2 or
+ greater max_char_or_byte2 is not in the font. */
+ if (char2b->byte1 == 0
+ && char2b->byte2 >= xfont->min_char_or_byte2
+ && char2b->byte2 <= xfont->max_char_or_byte2)
+ pcm = xfont->per_char + char2b->byte2 - xfont->min_char_or_byte2;
+ }
+ else
+ {
+ /* If either min_byte1 or max_byte1 are nonzero, both
+ min_char_or_byte2 and max_char_or_byte2 are less than
+ 256, and the 2-byte character index values corresponding
+ to the per_char array element N (counting from 0) are:
+
+ byte1 = N/D + min_byte1
+ byte2 = N\D + min_char_or_byte2
+
+ where:
+
+ D = max_char_or_byte2 - min_char_or_byte2 + 1
+ / = integer division
+ \ = integer modulus */
+ if (char2b->byte1 >= xfont->min_byte1
+ && char2b->byte1 <= xfont->max_byte1
+ && char2b->byte2 >= xfont->min_char_or_byte2
+ && char2b->byte2 <= xfont->max_char_or_byte2)
+ pcm = (xfont->per_char
+ + ((xfont->max_char_or_byte2 - xfont->min_char_or_byte2 + 1)
+ * (char2b->byte1 - xfont->min_byte1))
+ + (char2b->byte2 - xfont->min_char_or_byte2));
+ }
+ }
+ else
+ {
+ /* If the per_char pointer is null, all glyphs between the first
+ and last character indexes inclusive have the same
+ information, as given by both min_bounds and max_bounds. */
+ if (char2b->byte2 >= xfont->min_char_or_byte2
+ && char2b->byte2 <= xfont->max_char_or_byte2)
+ pcm = &xfont->max_bounds;
+ }
+
+ return ((pcm == NULL
+ || (pcm->width == 0 && (pcm->rbearing - pcm->lbearing) == 0))
+ ? NULL : pcm);
+}
+
+extern Lisp_Object find_font_encoding P_ ((Lisp_Object));
+
+/* Return encoding charset and repertory charset for REGISTRY in
+ ENCODING and REPERTORY correspondingly. If correct information for
+ REGISTRY is available, return 0. Otherwise return -1. */
+
+static int
+xfont_registry_charsets (registry, encoding, repertory)
+ Lisp_Object registry;
+ struct charset **encoding, **repertory;
+{
+ Lisp_Object val;
+ int encoding_id, repertory_id;
+
+ val = assq_no_quit (registry, x_font_charset_alist);
+ if (! NILP (val))
+ {
+ val = XCDR (val);
+ if (NILP (val))
+ return -1;
+ encoding_id = XINT (XCAR (val));
+ repertory_id = XINT (XCDR (val));
+ }
+ else
+ {
+ val = find_font_encoding (SYMBOL_NAME (registry));
+ if (SYMBOLP (val) && CHARSETP (val))
+ {
+ encoding_id = repertory_id = XINT (CHARSET_SYMBOL_ID (val));
+ }
+ else if (CONSP (val))
+ {
+ if (! CHARSETP (XCAR (val)))
+ goto invalid_entry;
+ encoding_id = XINT (CHARSET_SYMBOL_ID (XCAR (val)));
+ if (NILP (XCDR (val)))
+ repertory_id = -1;
+ else
+ {
+ if (! CHARSETP (XCDR (val)))
+ goto invalid_entry;
+ repertory_id = XINT (CHARSET_SYMBOL_ID (XCDR (val)));
+ }
+ }
+ else
+ goto invalid_entry;
+ val = Fcons (make_number (encoding_id), make_number (repertory_id));
+ x_font_charset_alist
+ = nconc2 (x_font_charset_alist, Fcons (Fcons (registry, val), Qnil));
+ }
+
+ if (encoding)
+ *encoding = CHARSET_FROM_ID (encoding_id);
+ if (repertory)
+ *repertory = repertory_id >= 0 ? CHARSET_FROM_ID (repertory_id) : NULL;
+ return 0;
+
+ invalid_entry:
+ x_font_charset_alist
+ = nconc2 (x_font_charset_alist, Fcons (Fcons (registry, Qnil), Qnil));
+ return -1;
+}
+
+static Lisp_Object xfont_get_cache P_ ((Lisp_Object));
+static Lisp_Object xfont_list P_ ((Lisp_Object, Lisp_Object));
+static Lisp_Object xfont_list_family P_ ((Lisp_Object));
+static struct font *xfont_open P_ ((FRAME_PTR, Lisp_Object, int));
+static void xfont_close P_ ((FRAME_PTR, struct font *));
+static int xfont_prepare_face P_ ((FRAME_PTR, struct face *));
+#if 0
+static void xfont_done_face P_ ((FRAME_PTR, struct face *));
+#endif
+static int xfont_has_char P_ ((Lisp_Object, int));
+static unsigned xfont_encode_char P_ ((struct font *, int));
+static int xfont_text_extents P_ ((struct font *, unsigned *, int,
+ struct font_metrics *));
+static int xfont_draw P_ ((struct glyph_string *, int, int, int, int, int));
+
+struct font_driver xfont_driver =
+ {
+ (Lisp_Object) NULL, /* Qx */
+ xfont_get_cache,
+ xfont_list,
+ xfont_list_family,
+ NULL,
+ xfont_open,
+ xfont_close,
+ xfont_prepare_face,
+ NULL /*xfont_done_face*/,
+ xfont_has_char,
+ xfont_encode_char,
+ xfont_text_extents,
+ xfont_draw,
+ };
+
+extern Lisp_Object QCname;
+
+static Lisp_Object
+xfont_get_cache (frame)
+ Lisp_Object frame;
+{
+ Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (XFRAME (frame));
+
+ return (dpyinfo->name_list_element);
+}
+
+extern Lisp_Object Vface_alternative_font_registry_alist;
+
+static Lisp_Object
+xfont_list_pattern (frame, display, pattern)
+ Lisp_Object frame;
+ Display *display;
+ char *pattern;
+{
+ Lisp_Object list = Qnil;
+ int i, limit, num_fonts;
+ char **names;
+
+ BLOCK_INPUT;
+ x_catch_errors (display);
+
+ for (limit = 512; ; limit *= 2)
+ {
+ names = XListFonts (display, pattern, limit, &num_fonts);
+ if (x_had_errors_p (display))
+ {
+ /* This error is perhaps due to insufficient memory on X
+ server. Let's just ignore it. */
+ x_clear_errors (display);
+ num_fonts = 0;
+ break;
+ }
+ if (num_fonts < limit)
+ break;
+ XFreeFontNames (names);
+ }
+
+ for (i = 0; i < num_fonts; i++)
+ {
+ Lisp_Object entity = Fmake_vector (make_number (FONT_ENTITY_MAX), Qnil);
+ int result;
+
+ ASET (entity, FONT_TYPE_INDEX, Qx);
+ ASET (entity, FONT_FRAME_INDEX, frame);
+
+ result = font_parse_xlfd (names[i], entity);
+ if (result < 0)
+ {
+ /* This may be an alias name. Try to get the full XLFD name
+ from XA_FONT property of the font. */
+ XFontStruct *font = XLoadQueryFont (display, names[i]);
+ unsigned long value;
+
+ if (! font)
+ continue;
+ if (XGetFontProperty (font, XA_FONT, &value))
+ {
+ char *name = (char *) XGetAtomName (display, (Atom) value);
+ int len = strlen (name);
+
+ /* If DXPC (a Differential X Protocol Compressor)
+ Ver.3.7 is running, XGetAtomName will return null
+ string. We must avoid such a name. */
+ if (len > 0)
+ result = font_parse_xlfd (name, entity);
+ XFree (name);
+ }
+ XFreeFont (display, font);
+ }
+
+ if (result == 0)
+ {
+ Lisp_Object val = AREF (entity, FONT_EXTRA_INDEX);
+ char *p = (char *) SDATA (SYMBOL_NAME (val));
+
+ /* P == "RESX-RESY-SPACING-AVGWIDTH. We rejust this font if
+ it's an autoscaled one (i.e. RESX > 0 && AVGWIDTH == 0). */
+ if (atoi (p) > 0)
+ {
+ p += SBYTES (SYMBOL_NAME (val));
+ while (p[-1] != '-') p--;
+ if (atoi (p) == 0)
+ continue;
+ }
+ list = Fcons (entity, list);
+ }
+ }
+
+ x_uncatch_errors ();
+ UNBLOCK_INPUT;
+
+ return list;
+}
+
+static Lisp_Object
+xfont_list (frame, spec)
+ Lisp_Object frame, spec;
+{
+ FRAME_PTR f = XFRAME (frame);
+ Display *display = FRAME_X_DISPLAY_INFO (f)->display;
+ Lisp_Object list, val, extra, font_name;
+ int len;
+ char name[256];
+
+ extra = AREF (spec, FONT_EXTRA_INDEX);
+ font_name = Qnil;
+ if (CONSP (extra))
+ {
+ val = assq_no_quit (QCotf, extra);
+ if (! NILP (val))
+ return null_vector;
+ val = assq_no_quit (QCscript, extra);
+ if (! NILP (val))
+ return null_vector;
+ val = assq_no_quit (QClanguage, extra);
+ if (! NILP (val))
+ return null_vector;
+ val = assq_no_quit (QCname, extra);
+ if (CONSP (val))
+ font_name = XCDR (val);
+ }
+
+ if (STRINGP (font_name))
+ list = xfont_list_pattern (frame, display, (char *) SDATA (font_name));
+ else if ((len = font_unparse_xlfd (spec, 0, name, 256)) < 0)
+ return null_vector;
+ else
+ {
+ list = xfont_list_pattern (frame, display, name);
+ if (NILP (list))
+ {
+ Lisp_Object registry = AREF (spec, FONT_REGISTRY_INDEX);
+ Lisp_Object alter;
+
+ if (! NILP (registry)
+ && (alter = Fassoc (SYMBOL_NAME (registry),
+ Vface_alternative_font_registry_alist))
+ && CONSP (alter))
+ {
+ /* Pointer to REGISTRY-ENCODING field. */
+ char *r = name + len - SBYTES (SYMBOL_NAME (registry));
+
+ for (alter = XCDR (alter); CONSP (alter); alter = XCDR (alter))
+ if (STRINGP (XCAR (alter))
+ && ((r - name) + SBYTES (XCAR (alter))) < 255)
+ {
+ strcpy (r, (char *) SDATA (XCAR (alter)));
+ list = xfont_list_pattern (frame, display, name);
+ if (! NILP (list))
+ break;
+ }
+ }
+ }
+ }
+
+ return (NILP (list) ? null_vector : Fvconcat (1, &list));
+}
+
+static int
+memq_no_quit (elt, list)
+ Lisp_Object elt, list;
+{
+ while (CONSP (list) && ! EQ (XCAR (list), elt))
+ list = XCDR (list);
+ return (CONSP (list));
+}
+
+static Lisp_Object
+xfont_list_family (frame)
+ Lisp_Object frame;
+{
+ FRAME_PTR f = XFRAME (frame);
+ Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f);
+ char **names;
+ int num_fonts, i;
+ Lisp_Object list;
+ char *last_family;
+ int last_len;
+
+ BLOCK_INPUT;
+ x_catch_errors (dpyinfo->display);
+ names = XListFonts (dpyinfo->display, "-*-*-*-*-*-*-*-*-*-*-*-*-*-*",
+ 0x8000, &num_fonts);
+ if (x_had_errors_p (dpyinfo->display))
+ {
+ /* This error is perhaps due to insufficient memory on X server.
+ Let's just ignore it. */
+ x_clear_errors (dpyinfo->display);
+ num_fonts = 0;
+ }
+
+ list = Qnil;
+ for (i = 0, last_len = 0; i < num_fonts; i++)
+ {
+ char *p0 = names[i], *p1;
+ Lisp_Object family;
+
+ p0++; /* skip the leading '-' */
+ while (*p0 && *p0 != '-') p0++; /* skip foundry */
+ if (! *p0)
+ continue;
+ p1 = ++p0;
+ while (*p1 && *p1 != '-') p1++; /* find the end of family */
+ if (! *p1 || p1 == p0)
+ continue;
+ if (last_len == p1 - p0
+ && bcmp (last_family, p0, last_len) == 0)
+ continue;
+ last_len = p1 - p0;
+ last_family = p0;
+ family = intern_downcase (p0, last_len);
+ if (! memq_no_quit (family, list))
+ list = Fcons (family, list);
+ }
+
+ XFreeFontNames (names);
+ x_uncatch_errors ();
+ UNBLOCK_INPUT;
+
+ return list;
+}
+
+static struct font *
+xfont_open (f, entity, pixel_size)
+ FRAME_PTR f;
+ Lisp_Object entity;
+ int pixel_size;
+{
+ Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f);
+ Display *display = dpyinfo->display;
+ char name[256];
+ int len;
+ unsigned long value;
+ Lisp_Object registry;
+ struct charset *encoding, *repertory;
+ struct font *font;
+ XFontStruct *xfont;
+
+ /* At first, check if we know how to encode characters for this
+ font. */
+ registry = AREF (entity, FONT_REGISTRY_INDEX);
+ if (xfont_registry_charsets (registry, &encoding, &repertory) < 0)
+ return NULL;
+
+ if (XINT (AREF (entity, FONT_SIZE_INDEX)) != 0)
+ pixel_size = XINT (AREF (entity, FONT_SIZE_INDEX));
+ len = font_unparse_xlfd (entity, pixel_size, name, 256);
+ if (len <= 0)
+ return NULL;
+
+ BLOCK_INPUT;
+ x_catch_errors (display);
+ xfont = XLoadQueryFont (display, name);
+ if (x_had_errors_p (display))
+ {
+ /* This error is perhaps due to insufficient memory on X server.
+ Let's just ignore it. */
+ x_clear_errors (display);
+ xfont = NULL;
+ }
+ x_uncatch_errors ();
+ UNBLOCK_INPUT;
+
+ if (! xfont)
+ return NULL;
+ font = malloc (sizeof (struct font));
+ font->font.font = xfont;
+ font->entity = entity;
+ font->pixel_size = pixel_size;
+ font->driver = &xfont_driver;
+ font->font.name = malloc (len + 1);
+ if (! font->font.name)
+ {
+ XFreeFont (display, xfont);
+ free (font);
+ return NULL;
+ }
+ bcopy (name, font->font.name, len + 1);
+ font->font.charset = encoding->id;
+ font->encoding_charset = encoding->id;
+ font->repertory_charset = repertory ? repertory->id : -1;
+ font->ascent = xfont->ascent;
+ font->descent = xfont->descent;
+
+ if (xfont->min_bounds.width == xfont->max_bounds.width)
+ {
+ /* Fixed width font. */
+ font->font.average_width = font->font.space_width
+ = xfont->min_bounds.width;
+ }
+ else
+ {
+ XChar2b char2b;
+ XCharStruct *pcm;
+
+ char2b.byte1 = 0x00, char2b.byte2 = 0x20;
+ pcm = xfont_get_pcm (xfont, &char2b);
+ if (pcm)
+ font->font.space_width = pcm->width;
+ else
+ font->font.space_width = xfont->max_bounds.width;
+
+ font->font.average_width
+ = (XGetFontProperty (xfont, dpyinfo->Xatom_AVERAGE_WIDTH, &value)
+ ? (long) value / 10 : 0);
+ if (font->font.average_width < 0)
+ font->font.average_width = - font->font.average_width;
+ if (font->font.average_width == 0)
+ {
+ if (pcm)
+ {
+ int width = pcm->width;
+ for (char2b.byte2 = 33; char2b.byte2 <= 126; char2b.byte2++)
+ if ((pcm = xfont_get_pcm (xfont, &char2b)) != NULL)
+ width += pcm->width;
+ font->font.average_width = width / 95;
+ }
+ else
+ font->font.average_width = xfont->max_bounds.width;
+ }
+ }
+ font->min_width = xfont->min_bounds.width;
+ if (font->min_width <= 0)
+ font->min_width = font->font.space_width;
+
+ BLOCK_INPUT;
+ /* Try to get the full name of FONT. Put it in FULL_NAME. */
+ if (XGetFontProperty (xfont, XA_FONT, &value))
+ {
+ char *full_name = NULL, *p0, *p;
+ int dashes = 0;
+
+ p0 = p = (char *) XGetAtomName (FRAME_X_DISPLAY (f), (Atom) value);;
+ /* Count the number of dashes in the "full name".
+ If it is too few, this isn't really the font's full name,
+ so don't use it.
+ In X11R4, the fonts did not come with their canonical names
+ stored in them. */
+ while (*p)
+ {
+ if (*p == '-')
+ dashes++;
+ p++;
+ }
+
+ if (dashes >= 13)
+ {
+ full_name = (char *) malloc (p - p0 + 1);
+ if (full_name)
+ bcopy (p0, full_name, p - p0 + 1);
+ }
+ XFree (p0);
+
+ if (full_name)
+ font->font.full_name = full_name;
+ else
+ font->font.full_name = font->font.name;
+ }
+ font->file_name = NULL;
+
+ font->font.size = xfont->max_bounds.width;
+ font->font.height = xfont->ascent + xfont->descent;
+ font->font.baseline_offset
+ = (XGetFontProperty (xfont, dpyinfo->Xatom_MULE_BASELINE_OFFSET, &value)
+ ? (long) value : 0);
+ font->font.relative_compose
+ = (XGetFontProperty (xfont, dpyinfo->Xatom_MULE_RELATIVE_COMPOSE, &value)
+ ? (long) value : 0);
+ font->font.default_ascent
+ = (XGetFontProperty (xfont, dpyinfo->Xatom_MULE_DEFAULT_ASCENT, &value)
+ ? (long) value : 0);
+ font->font.vertical_centering
+ = (STRINGP (Vvertical_centering_font_regexp)
+ && (fast_c_string_match_ignore_case
+ (Vvertical_centering_font_regexp, font->font.full_name) >= 0));
+
+ UNBLOCK_INPUT;
+
+ dpyinfo->n_fonts++;
+
+ /* Set global flag fonts_changed_p to non-zero if the font loaded
+ has a character with a smaller width than any other character
+ before, or if the font loaded has a smaller height than any other
+ font loaded before. If this happens, it will make a glyph matrix
+ reallocation necessary. */
+ if (dpyinfo->n_fonts == 1)
+ {
+ dpyinfo->smallest_font_height = font->font.height;
+ dpyinfo->smallest_char_width = font->min_width;
+ fonts_changed_p = 1;
+ }
+ else
+ {
+ if (dpyinfo->smallest_font_height > font->font.height)
+ dpyinfo->smallest_font_height = font->font.height, fonts_changed_p |= 1;
+ if (dpyinfo->smallest_char_width > font->min_width)
+ dpyinfo->smallest_char_width = font->min_width, fonts_changed_p |= 1;
+ }
+
+ return font;
+}
+
+static void
+xfont_close (f, font)
+ FRAME_PTR f;
+ struct font *font;
+{
+ BLOCK_INPUT;
+ XFreeFont (FRAME_X_DISPLAY (f), font->font.font);
+ UNBLOCK_INPUT;
+
+ if (font->font.name != font->font.full_name)
+ free (font->font.full_name);
+ free (font->font.name);
+ free (font);
+ FRAME_X_DISPLAY_INFO (f)->n_fonts--;
+}
+
+static int
+xfont_prepare_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ BLOCK_INPUT;
+ XSetFont (FRAME_X_DISPLAY (f), face->gc, face->font->fid);
+ UNBLOCK_INPUT;
+
+ return 0;
+}
+
+#if 0
+static void
+xfont_done_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ if (face->extra)
+ {
+ BLOCK_INPUT;
+ XFreeGC (FRAME_X_DISPLAY (f), (GC) face->extra);
+ UNBLOCK_INPUT;
+ face->extra = NULL;
+ }
+}
+#endif /* 0 */
+
+static int
+xfont_has_char (entity, c)
+ Lisp_Object entity;
+ int c;
+{
+ Lisp_Object registry = AREF (entity, FONT_REGISTRY_INDEX);
+ struct charset *repertory;
+
+ if (xfont_registry_charsets (registry, NULL, &repertory) < 0)
+ return -1;
+ if (! repertory)
+ return -1;
+ return (ENCODE_CHAR (repertory, c) != CHARSET_INVALID_CODE (repertory));
+}
+
+static unsigned
+xfont_encode_char (font, c)
+ struct font *font;
+ int c;
+{
+ struct charset *charset;
+ unsigned code;
+ XChar2b char2b;
+
+ charset = CHARSET_FROM_ID (font->encoding_charset);
+ code = ENCODE_CHAR (charset, c);
+ if (code == CHARSET_INVALID_CODE (charset))
+ return 0xFFFFFFFF;
+ if (font->repertory_charset >= 0)
+ {
+ charset = CHARSET_FROM_ID (font->repertory_charset);
+ return (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)
+ ? code : 0xFFFFFFFF);
+ }
+ char2b.byte1 = code >> 8;
+ char2b.byte2 = code & 0xFF;
+ return (xfont_get_pcm (font->font.font, &char2b) ? code : 0xFFFFFFFF);
+}
+
+static int
+xfont_text_extents (font, code, nglyphs, metrics)
+ struct font *font;
+ unsigned *code;
+ int nglyphs;
+ struct font_metrics *metrics;
+{
+ int width = 0;
+ int i, x;
+
+ if (metrics)
+ bzero (metrics, sizeof (struct font_metrics));
+ for (i = 0, x = 0; i < nglyphs; i++)
+ {
+ XChar2b char2b;
+ static XCharStruct *pcm;
+
+ if (code[i] >= 0x10000)
+ continue;
+ char2b.byte1 = code[i] >> 8, char2b.byte2 = code[i] & 0xFF;
+ pcm = xfont_get_pcm (font->font.font, &char2b);
+ if (! pcm)
+ continue;
+ if (metrics->lbearing > width + pcm->lbearing)
+ metrics->lbearing = width + pcm->lbearing;
+ if (metrics->rbearing < width + pcm->rbearing)
+ metrics->rbearing = width + pcm->rbearing;
+ if (metrics->ascent < pcm->ascent)
+ metrics->ascent = pcm->ascent;
+ if (metrics->descent < pcm->descent)
+ metrics->descent = pcm->descent;
+ width += pcm->width;
+ }
+ if (metrics)
+ metrics->width = width;
+ return width;
+}
+
+static int
+xfont_draw (s, from, to, x, y, with_background)
+ struct glyph_string *s;
+ int from, to, x, y, with_background;
+{
+ XFontStruct *xfont = s->face->font;
+ int len = to - from;
+
+ if (xfont->min_byte1 == 0 && xfont->max_byte1 == 0)
+ {
+ char *str;
+ int i;
+ USE_SAFE_ALLOCA;
+
+ SAFE_ALLOCA (str, char *, len);
+ for (i = 0; i < len ; i++)
+ str[i] = XCHAR2B_BYTE2 (s->char2b + from + i);
+ if (with_background > 0)
+ XDrawImageString (FRAME_X_DISPLAY (s->f), FRAME_X_WINDOW (s->f),
+ s->gc, x, y, str, len);
+ else
+ XDrawString (FRAME_X_DISPLAY (s->f), FRAME_X_WINDOW (s->f),
+ s->gc, x, y, str, len);
+ SAFE_FREE ();
+ return s->nchars;
+ }
+
+ if (with_background > 0)
+ XDrawImageString16 (FRAME_X_DISPLAY (s->f), FRAME_X_WINDOW (s->f),
+ s->gc, x, y, s->char2b + from, len);
+ else
+ XDrawString16 (FRAME_X_DISPLAY (s->f), FRAME_X_WINDOW (s->f),
+ s->gc, x, y, s->char2b + from, len);
+
+ return len;
+}
+
+
+
+void
+syms_of_xfont ()
+{
+ staticpro (&x_font_charset_alist);
+ x_font_charset_alist = Qnil;
+
+ DEFSYM (Qx, "x");
+ xfont_driver.type = Qx;
+ register_font_driver (&xfont_driver, NULL);
+}
+
+/* arch-tag: 23c5f366-a5ee-44b7-a3b7-90d6da7fd749
+ (do not change this comment) */
diff --git a/src/xftfont.c b/src/xftfont.c
new file mode 100644
index 00000000000..bd310cb68b8
--- /dev/null
+++ b/src/xftfont.c
@@ -0,0 +1,545 @@
+/* xftfont.c -- XFT font driver.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+#include <stdio.h>
+#include <X11/Xlib.h>
+#include <X11/Xft/Xft.h>
+
+#include "lisp.h"
+#include "dispextern.h"
+#include "xterm.h"
+#include "frame.h"
+#include "blockinput.h"
+#include "character.h"
+#include "charset.h"
+#include "fontset.h"
+#include "font.h"
+
+/* Xft font driver. */
+
+static Lisp_Object Qxft;
+
+/* The actual structure for Xft font that can be casted to struct
+ font. */
+
+struct xftfont_info
+{
+ struct font font;
+ Display *display;
+ int screen;
+ XftFont *xftfont;
+ FT_Face ft_face;
+};
+
+/* Structure pointed by (struct face *)->extra */
+struct xftface_info
+{
+ XftColor xft_fg;
+ XftColor xft_bg;
+ XftDraw *xft_draw;
+};
+
+static void xftfont_get_colors P_ ((FRAME_PTR, struct face *, GC gc,
+ struct xftface_info *,
+ XftColor *fg, XftColor *bg));
+static Font xftfont_default_fid P_ ((FRAME_PTR));
+
+
+/* Setup colors pointed by FG and BG for GC. If XFTFACE_INFO is not
+ NULL, reuse the colors in it if possible. BG may be NULL. */
+static void
+xftfont_get_colors (f, face, gc, xftface_info, fg, bg)
+ FRAME_PTR f;
+ struct face *face;
+ GC gc;
+ struct xftface_info *xftface_info;
+ XftColor *fg, *bg;
+{
+ if (xftface_info && face->gc == gc)
+ {
+ *fg = xftface_info->xft_fg;
+ if (bg)
+ *bg = xftface_info->xft_bg;
+ }
+ else
+ {
+ XGCValues xgcv;
+ int fg_done = 0, bg_done = 0;
+
+ BLOCK_INPUT;
+ XGetGCValues (FRAME_X_DISPLAY (f), gc,
+ GCForeground | GCBackground, &xgcv);
+ if (xftface_info)
+ {
+ if (xgcv.foreground == face->foreground)
+ *fg = xftface_info->xft_fg, fg_done = 1;
+ else if (xgcv.foreground == face->background)
+ *fg = xftface_info->xft_bg, fg_done = 1;
+ if (! bg)
+ bg_done = 1;
+ else if (xgcv.background == face->background)
+ *bg = xftface_info->xft_bg, bg_done = 1;
+ else if (xgcv.background == face->foreground)
+ *bg = xftface_info->xft_fg, bg_done = 1;
+ }
+
+ if (fg_done + bg_done < 2)
+ {
+ XColor colors[2];
+
+ colors[0].pixel = fg->pixel = xgcv.foreground;
+ if (bg)
+ colors[1].pixel = bg->pixel = xgcv.background;
+ XQueryColors (FRAME_X_DISPLAY (f), FRAME_X_COLORMAP (f), colors,
+ bg ? 2 : 1);
+ fg->color.alpha = 0xFFFF;
+ fg->color.red = colors[0].red;
+ fg->color.green = colors[0].green;
+ fg->color.blue = colors[0].blue;
+ if (bg)
+ {
+ bg->color.alpha = 0xFFFF;
+ bg->color.red = colors[1].red;
+ bg->color.green = colors[1].green;
+ bg->color.blue = colors[1].blue;
+ }
+ }
+ UNBLOCK_INPUT;
+ }
+}
+
+/* Return the default Font ID on frame F. */
+
+static Font
+xftfont_default_fid (f)
+ FRAME_PTR f;
+{
+ static int fid_known;
+ static Font fid;
+
+ if (! fid_known)
+ {
+ fid = XLoadFont (FRAME_X_DISPLAY (f), "fixed");
+ if (! fid)
+ {
+ fid = XLoadFont (FRAME_X_DISPLAY (f), "*");
+ if (! fid)
+ abort ();
+ }
+ fid_known = 1;
+ }
+ return fid;
+}
+
+
+static Lisp_Object xftfont_list P_ ((Lisp_Object, Lisp_Object));
+static struct font *xftfont_open P_ ((FRAME_PTR, Lisp_Object, int));
+static void xftfont_close P_ ((FRAME_PTR, struct font *));
+static int xftfont_prepare_face P_ ((FRAME_PTR, struct face *));
+static void xftfont_done_face P_ ((FRAME_PTR, struct face *));
+static unsigned xftfont_encode_char P_ ((struct font *, int));
+static int xftfont_text_extents P_ ((struct font *, unsigned *, int,
+ struct font_metrics *));
+static int xftfont_draw P_ ((struct glyph_string *, int, int, int, int, int));
+
+static int xftfont_anchor_point P_ ((struct font *, unsigned, int,
+ int *, int *));
+
+struct font_driver xftfont_driver;
+
+static Lisp_Object
+xftfont_list (frame, spec)
+ Lisp_Object frame;
+ Lisp_Object spec;
+{
+ Lisp_Object val = ftfont_driver.list (frame, spec);
+
+ if (! NILP (val))
+ {
+ int i;
+
+ for (i = 0; i < ASIZE (val); i++)
+ ASET (AREF (val, i), FONT_TYPE_INDEX, Qxft);
+ }
+ return val;
+}
+
+static FcChar8 ascii_printable[95];
+
+static struct font *
+xftfont_open (f, entity, pixel_size)
+ FRAME_PTR f;
+ Lisp_Object entity;
+ int pixel_size;
+{
+ Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f);
+ Display *display = FRAME_X_DISPLAY (f);
+ Lisp_Object val;
+ FcPattern *pattern, *pat = NULL;
+ FcChar8 *file;
+ struct xftfont_info *xftfont_info = NULL;
+ XFontStruct *xfont = NULL;
+ struct font *font;
+ double size = 0;
+ XftFont *xftfont = NULL;
+ int spacing;
+ char *name;
+ int len;
+
+ val = AREF (entity, FONT_EXTRA_INDEX);
+ if (XTYPE (val) != Lisp_Misc
+ || XMISCTYPE (val) != Lisp_Misc_Save_Value)
+ return NULL;
+ pattern = XSAVE_VALUE (val)->pointer;
+ if (FcPatternGetString (pattern, FC_FILE, 0, &file) != FcResultMatch)
+ return NULL;
+
+ size = XINT (AREF (entity, FONT_SIZE_INDEX));
+ if (size == 0)
+ size = pixel_size;
+
+ pat = FcPatternCreate ();
+ FcPatternAddString (pat, FC_FILE, file);
+ FcPatternAddDouble (pat, FC_PIXEL_SIZE, pixel_size);
+ FcPatternAddBool (pat, FC_ANTIALIAS, FcTrue);
+
+ BLOCK_INPUT;
+ xftfont = XftFontOpenPattern (display, pat);
+ /* We should not destroy PAT here because it is kept in XFTFONT and
+ destroyed automatically when XFTFONT is closed. */
+ if (! xftfont)
+ goto err;
+
+ xftfont_info = malloc (sizeof (struct xftfont_info));
+ if (! xftfont_info)
+ goto err;
+ xfont = malloc (sizeof (XFontStruct));
+ if (! xfont)
+ goto err;
+ xftfont_info->display = display;
+ xftfont_info->screen = FRAME_X_SCREEN_NUMBER (f);
+ xftfont_info->xftfont = xftfont;
+ xftfont_info->ft_face = XftLockFace (xftfont);
+
+ font = (struct font *) xftfont_info;
+ font->entity = entity;
+ font->pixel_size = size;
+ font->driver = &xftfont_driver;
+ len = 96;
+ name = malloc (len);
+ while (name && font_unparse_fcname (entity, pixel_size, name, len) < 0)
+ {
+ char *new = realloc (name, len += 32);
+
+ if (! new)
+ free (name);
+ name = new;
+ }
+ if (! name)
+ goto err;
+ font->font.full_name = font->font.name = name;
+ font->file_name = (char *) file;
+ font->font.size = xftfont->max_advance_width;
+ font->font.charset = font->encoding_charset = font->repertory_charset = -1;
+ font->ascent = xftfont->ascent;
+ font->descent = xftfont->descent;
+ font->font.height = xftfont->ascent + xftfont->descent;
+
+ if (FcPatternGetInteger (xftfont->pattern, FC_SPACING, 0, &spacing)
+ != FcResultMatch)
+ spacing = FC_PROPORTIONAL;
+ if (spacing != FC_PROPORTIONAL)
+ font->font.average_width = font->font.space_width
+ = xftfont->max_advance_width;
+ else
+ {
+ XGlyphInfo extents;
+
+ if (! ascii_printable[0])
+ {
+ int i;
+ for (i = 0; i < 95; i++)
+ ascii_printable[i] = ' ' + i;
+ }
+ XftTextExtents8 (display, xftfont, ascii_printable, 1, &extents);
+ font->font.space_width = extents.xOff;
+ if (font->font.space_width <= 0)
+ /* dirty workaround */
+ font->font.space_width = pixel_size;
+ XftTextExtents8 (display, xftfont, ascii_printable + 1, 94, &extents);
+ font->font.average_width = (font->font.space_width + extents.xOff) / 95;
+ }
+ UNBLOCK_INPUT;
+
+ /* Unfortunately Xft doesn't provide a way to get minimum char
+ width. So, we use space_width instead. */
+ font->min_width = font->font.space_width;
+
+ font->font.baseline_offset = 0;
+ font->font.relative_compose = 0;
+ font->font.default_ascent = 0;
+ font->font.vertical_centering = 0;
+
+ /* Setup pseudo XFontStruct */
+ xfont->fid = xftfont_default_fid (f);
+ xfont->ascent = xftfont->ascent;
+ xfont->descent = xftfont->descent;
+ xfont->max_bounds.descent = xftfont->descent;
+ xfont->max_bounds.width = xftfont->max_advance_width;
+ xfont->min_bounds.width = font->font.space_width;
+ font->font.font = xfont;
+
+ dpyinfo->n_fonts++;
+
+ /* Set global flag fonts_changed_p to non-zero if the font loaded
+ has a character with a smaller width than any other character
+ before, or if the font loaded has a smaller height than any other
+ font loaded before. If this happens, it will make a glyph matrix
+ reallocation necessary. */
+ if (dpyinfo->n_fonts == 1)
+ {
+ dpyinfo->smallest_font_height = font->font.height;
+ dpyinfo->smallest_char_width = font->min_width;
+ fonts_changed_p = 1;
+ }
+ else
+ {
+ if (dpyinfo->smallest_font_height > font->font.height)
+ dpyinfo->smallest_font_height = font->font.height,
+ fonts_changed_p |= 1;
+ if (dpyinfo->smallest_char_width > font->min_width)
+ dpyinfo->smallest_char_width = font->min_width,
+ fonts_changed_p |= 1;
+ }
+
+ return font;
+
+ err:
+ if (xftfont) XftFontClose (display, xftfont);
+ UNBLOCK_INPUT;
+ if (xftfont_info) free (xftfont_info);
+ if (xfont) free (xfont);
+ return NULL;
+}
+
+static void
+xftfont_close (f, font)
+ FRAME_PTR f;
+ struct font *font;
+{
+ struct xftfont_info *xftfont_info = (struct xftfont_info *) font;
+
+ XftUnlockFace (xftfont_info->xftfont);
+ XftFontClose (xftfont_info->display, xftfont_info->xftfont);
+ if (font->font.name)
+ free (font->font.name);
+ free (font);
+ FRAME_X_DISPLAY_INFO (f)->n_fonts--;
+}
+
+static int
+xftfont_prepare_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ struct xftface_info *xftface_info;
+
+#if 0
+ /* This doesn't work if face->ascii_face doesn't use an Xft font. */
+ if (face != face->ascii_face)
+ {
+ face->extra = face->ascii_face->extra;
+ return 0;
+ }
+#endif
+
+ xftface_info = malloc (sizeof (struct xftface_info));
+ if (! xftface_info)
+ return -1;
+
+ BLOCK_INPUT;
+ xftface_info->xft_draw = XftDrawCreate (FRAME_X_DISPLAY (f),
+ FRAME_X_WINDOW (f),
+ FRAME_X_VISUAL (f),
+ FRAME_X_COLORMAP (f));
+ xftfont_get_colors (f, face, face->gc, NULL,
+ &xftface_info->xft_fg, &xftface_info->xft_bg);
+ UNBLOCK_INPUT;
+
+ face->extra = xftface_info;
+ return 0;
+}
+
+static void
+xftfont_done_face (f, face)
+ FRAME_PTR f;
+ struct face *face;
+{
+ struct xftface_info *xftface_info;
+
+#if 0
+ /* This doesn't work if face->ascii_face doesn't use an Xft font. */
+ if (face != face->ascii_face
+ || ! face->extra)
+ return;
+#endif
+
+ xftface_info = (struct xftface_info *) face->extra;
+ if (xftface_info)
+ {
+ BLOCK_INPUT;
+ XftDrawDestroy (xftface_info->xft_draw);
+ UNBLOCK_INPUT;
+ free (xftface_info);
+ }
+ face->extra = NULL;
+}
+
+static unsigned
+xftfont_encode_char (font, c)
+ struct font *font;
+ int c;
+{
+ struct xftfont_info *xftfont_info = (struct xftfont_info *) font;
+ unsigned code = XftCharIndex (xftfont_info->display, xftfont_info->xftfont,
+ (FcChar32) c);
+
+ return (code ? code : 0xFFFFFFFF);
+}
+
+static int
+xftfont_text_extents (font, code, nglyphs, metrics)
+ struct font *font;
+ unsigned *code;
+ int nglyphs;
+ struct font_metrics *metrics;
+{
+ struct xftfont_info *xftfont_info = (struct xftfont_info *) font;
+ XGlyphInfo extents;
+
+ BLOCK_INPUT;
+ XftGlyphExtents (xftfont_info->display, xftfont_info->xftfont, code, nglyphs,
+ &extents);
+ UNBLOCK_INPUT;
+ if (metrics)
+ {
+ metrics->lbearing = - extents.x;
+ metrics->rbearing = - extents.x + extents.width;
+ metrics->width = extents.xOff;
+ metrics->ascent = extents.y;
+ metrics->descent = extents.y - extents.height;
+ }
+ return extents.xOff;
+}
+
+static int
+xftfont_draw (s, from, to, x, y, with_background)
+ struct glyph_string *s;
+ int from, to, x, y, with_background;
+{
+ FRAME_PTR f = s->f;
+ struct face *face = s->face;
+ struct xftfont_info *xftfont_info = (struct xftfont_info *) face->font_info;
+ struct xftface_info *xftface_info = (struct xftface_info *) face->extra;
+ FT_UInt *code;
+ XftColor fg, bg;
+ XRectangle r;
+ int len = to - from;
+ int i;
+
+ xftfont_get_colors (f, face, s->gc, xftface_info,
+ &fg, with_background ? &bg : NULL);
+ BLOCK_INPUT;
+ if (s->clip_width)
+ {
+ r.x = s->clip_x, r.width = s->clip_width;
+ r.y = s->clip_y, r.height = s->clip_height;
+ XftDrawSetClipRectangles (xftface_info->xft_draw, 0, 0, &r, 1);
+ }
+ if (with_background)
+ {
+ struct font *font = (struct font *) face->font_info;
+
+ XftDrawRect (xftface_info->xft_draw, &bg,
+ x, y - face->font->ascent, s->width, font->font.height);
+ }
+ code = alloca (sizeof (FT_UInt) * len);
+ for (i = 0; i < len; i++)
+ code[i] = ((XCHAR2B_BYTE1 (s->char2b + from + i) << 8)
+ | XCHAR2B_BYTE2 (s->char2b + from + i));
+
+ XftDrawGlyphs (xftface_info->xft_draw, &fg, xftfont_info->xftfont,
+ x, y, code, len);
+ if (s->clip_width)
+ XftDrawSetClip (xftface_info->xft_draw, NULL);
+ UNBLOCK_INPUT;
+
+ return len;
+}
+
+static int
+xftfont_anchor_point (font, code, index, x, y)
+ struct font *font;
+ unsigned code;
+ int index;
+ int *x, *y;
+{
+ struct xftfont_info *xftfont_info = (struct xftfont_info *) font;
+ FT_Face ft_face = xftfont_info->ft_face;
+
+ if (FT_Load_Glyph (ft_face, code, FT_LOAD_DEFAULT) != 0)
+ return -1;
+ if (ft_face->glyph->format != FT_GLYPH_FORMAT_OUTLINE)
+ return -1;
+ if (index >= ft_face->glyph->outline.n_points)
+ return -1;
+ *x = ft_face->glyph->outline.points[index].x;
+ *y = ft_face->glyph->outline.points[index].y;
+ return 0;
+}
+
+
+void
+syms_of_xftfont ()
+{
+ DEFSYM (Qxft, "xft");
+
+ xftfont_driver = ftfont_driver;
+ xftfont_driver.type = Qxft;
+ xftfont_driver.get_cache = xfont_driver.get_cache;
+ xftfont_driver.list = xftfont_list;
+ xftfont_driver.open = xftfont_open;
+ xftfont_driver.close = xftfont_close;
+ xftfont_driver.prepare_face = xftfont_prepare_face;
+ xftfont_driver.done_face = xftfont_done_face;
+ xftfont_driver.encode_char = xftfont_encode_char;
+ xftfont_driver.text_extents = xftfont_text_extents;
+ xftfont_driver.draw = xftfont_draw;
+ xftfont_driver.anchor_point = xftfont_anchor_point;
+
+ register_font_driver (&xftfont_driver, NULL);
+}
+
+/* arch-tag: 64ec61bf-7c8e-4fe6-b953-c6a85d5e1605
+ (do not change this comment) */
diff --git a/src/xmenu.c b/src/xmenu.c
index 5a71d82a424..8e994ac5efb 100644
--- a/src/xmenu.c
+++ b/src/xmenu.c
@@ -1617,7 +1617,7 @@ menu_highlight_callback (widget, id, call_data)
static void
find_and_call_menu_selection (f, menu_bar_items_used, vector, client_data)
FRAME_PTR f;
- int menu_bar_items_used;
+ EMACS_INT menu_bar_items_used;
Lisp_Object vector;
void *client_data;
{
diff --git a/src/xterm.c b/src/xterm.c
index 5e4eeb3bb64..ca5af4afbfc 100644
--- a/src/xterm.c
+++ b/src/xterm.c
@@ -68,6 +68,7 @@ Boston, MA 02110-1301, USA. */
/* #include <sys/param.h> */
#include "charset.h"
+#include "character.h"
#include "coding.h"
#include "ccl.h"
#include "frame.h"
@@ -101,6 +102,10 @@ Boston, MA 02110-1301, USA. */
#include "gtkutil.h"
#endif
+#ifdef USE_FONT_BACKEND
+#include "font.h"
+#endif /* USE_FONT_BACKEND */
+
#ifdef USE_LUCID
extern int xlwmenu_window_p P_ ((Widget w, Window window));
extern void xlwmenu_redisplay P_ ((Widget));
@@ -804,7 +809,8 @@ XTreset_terminal_modes ()
/* Function prototypes of this page. */
-static int x_encode_char P_ ((int, XChar2b *, struct font_info *, int *));
+static int x_encode_char P_ ((int, XChar2b *, struct font_info *,
+ struct charset *, int *));
/* Get metrics of character CHAR2B in FONT. Value is null if CHAR2B
@@ -883,13 +889,13 @@ x_per_char_metric (font, char2b, font_type)
the two-byte form of C. Encoding is returned in *CHAR2B. */
static int
-x_encode_char (c, char2b, font_info, two_byte_p)
+x_encode_char (c, char2b, font_info, charset, two_byte_p)
int c;
XChar2b *char2b;
struct font_info *font_info;
+ struct charset *charset;
int *two_byte_p;
{
- int charset = CHAR_CHARSET (c);
XFontStruct *font = font_info->font;
/* FONT_INFO may define a scheme by which to encode byte1 and byte2.
@@ -903,31 +909,31 @@ x_encode_char (c, char2b, font_info, two_byte_p)
check_ccl_update (ccl);
if (CHARSET_DIMENSION (charset) == 1)
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = char2b->byte2;
ccl->reg[2] = -1;
}
else
{
- ccl->reg[0] = charset;
+ ccl->reg[0] = CHARSET_ID (charset);
ccl->reg[1] = char2b->byte1;
ccl->reg[2] = char2b->byte2;
}
- ccl_driver (ccl, NULL, NULL, 0, 0, NULL);
+ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil);
/* We assume that MSBs are appropriately set/reset by CCL
program. */
if (font->max_byte1 == 0) /* 1-byte font */
- char2b->byte1 = 0, char2b->byte2 = ccl->reg[1];
+ STORE_XCHAR2B (char2b, 0, ccl->reg[1]);
else
- char2b->byte1 = ccl->reg[1], char2b->byte2 = ccl->reg[2];
+ STORE_XCHAR2B (char2b, ccl->reg[1], ccl->reg[2]);
}
- else if (font_info->encoding[charset])
+ else if (font_info->encoding_type)
{
/* Fixed encoding scheme. See fontset.h for the meaning of the
encoding numbers. */
- int enc = font_info->encoding[charset];
+ unsigned char enc = font_info->encoding_type;
if ((enc == 1 || enc == 2)
&& CHARSET_DIMENSION (charset) == 2)
@@ -1055,15 +1061,20 @@ x_set_mouse_face_gc (s)
face = FACE_FROM_ID (s->f, MOUSE_FACE_ID);
if (s->first_glyph->type == CHAR_GLYPH)
- face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch);
+ face_id = FACE_FOR_CHAR (s->f, face, s->first_glyph->u.ch, -1, Qnil);
else
- face_id = FACE_FOR_CHAR (s->f, face, 0);
+ face_id = FACE_FOR_CHAR (s->f, face, 0, -1, Qnil);
s->face = FACE_FROM_ID (s->f, face_id);
PREPARE_FACE_FOR_DISPLAY (s->f, s->face);
/* If font in this face is same as S->font, use it. */
if (s->font == s->face->font)
s->gc = s->face->gc;
+#ifdef USE_FONT_BACKEND
+ else if (enable_font_backend)
+ /* No need of setting a font for s->gc. */
+ s->gc = s->face->gc;
+#endif /* USE_FONT_BACKEND */
else
{
/* Otherwise construct scratch_cursor_gc with values from FACE
@@ -1161,12 +1172,50 @@ x_set_glyph_string_clipping (s)
XRectangle r;
get_glyph_string_clip_rect (s, &r);
XSetClipRectangles (s->display, s->gc, 0, 0, &r, 1, Unsorted);
+#ifdef USE_FONT_BACKEND
+ s->clip_x = r.x, s->clip_y = r.y;
+ s->clip_width = r.width, s->clip_height = r.height;
+#endif /* USE_FONT_BACKEND */
+}
+
+
+/* Set SRC's clipping for output of glyph string DST. This is called
+ when we are drawing DST's left_overhang or right_overhang only in
+ the area of SRC. */
+
+static void
+x_set_glyph_string_clipping_exactly (src, dst)
+ struct glyph_string *src, *dst;
+{
+ XRectangle r;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ r.x = dst->clip_x = src->x;
+ r.width = dst->clip_width = src->clip_width;
+ r.y = dst->clip_y = src->clip_y;
+ r.height = dst->clip_height = src->clip_height;
+ }
+ else
+ {
+#endif /* USE_FONT_BACKEND */
+ struct glyph_string *clip_head = src->clip_head;
+ struct glyph_string *clip_tail = src->clip_tail;
+
+ /* This foces clipping just this glyph string. */
+ src->clip_head = src->clip_tail = src;
+ get_glyph_string_clip_rect (src, &r);
+ src->clip_head = clip_head, src->clip_tail = clip_tail;
+#ifdef USE_FONT_BACKEND
+ }
+#endif /* USE_FONT_BACKEND */
+ XSetClipRectangles (dst->display, dst->gc, 0, 0, &r, 1, Unsorted);
}
/* RIF:
- Compute left and right overhang of glyph string S. If S is a glyph
- string for a composition, assume overhangs don't exist. */
+ Compute left and right overhang of glyph string S. */
static void
x_compute_glyph_string_overhangs (s)
@@ -1177,11 +1226,34 @@ x_compute_glyph_string_overhangs (s)
{
XCharStruct cs;
int direction, font_ascent, font_descent;
+
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ unsigned *code = alloca (sizeof (unsigned) * s->nchars);
+ struct font *font = (struct font *) s->font_info;
+ struct font_metrics metrics;
+ int i;
+
+ for (i = 0; i < s->nchars; i++)
+ code[i] = (s->char2b[i].byte1 << 8) | s->char2b[i].byte2;
+ font->driver->text_extents (font, code, s->nchars, &metrics);
+ cs.rbearing = metrics.rbearing;
+ cs.lbearing = metrics.lbearing;
+ cs.width = metrics.width;
+ }
+ else
+#endif /* USE_FONT_BACKEND */
XTextExtents16 (s->font, s->char2b, s->nchars, &direction,
&font_ascent, &font_descent, &cs);
s->right_overhang = cs.rbearing > cs.width ? cs.rbearing - cs.width : 0;
s->left_overhang = cs.lbearing < 0 ? -cs.lbearing : 0;
}
+ else if (s->cmp)
+ {
+ s->right_overhang = s->cmp->rbearing - s->cmp->pixel_width;
+ s->left_overhang = - s->cmp->lbearing;
+ }
}
@@ -1271,6 +1343,30 @@ x_draw_glyph_string_foreground (s)
x += g->pixel_width;
}
}
+#ifdef USE_FONT_BACKEND
+ else if (enable_font_backend)
+ {
+ unsigned *code = alloca (sizeof (unsigned) * s->nchars);
+ int boff = s->font_info->baseline_offset;
+ struct font *font = (struct font *) s->font_info;
+ int y;
+
+ for (i = 0; i < s->nchars; i++)
+ code[i] = (s->char2b[i].byte1 << 8) | s->char2b[i].byte2;
+
+ if (s->font_info->vertical_centering)
+ boff = VCENTER_BASELINE_OFFSET (s->font, s->f) - boff;
+
+ y = s->ybase - boff;
+ if (s->for_overlaps
+ || (s->background_filled_p && s->hl != DRAW_CURSOR))
+ font->driver->draw (s, 0, s->nchars, x, y, 0);
+ else
+ font->driver->draw (s, 0, s->nchars, x, y, 1);
+ if (s->face->overstrike)
+ font->driver->draw (s, 0, s->nchars, x + 1, y, 0);
+ }
+#endif /* USE_FONT_BACKEND */
else
{
char *char1b = (char *) s->char2b;
@@ -1334,7 +1430,7 @@ x_draw_composite_glyph_string_foreground (s)
/* If first glyph of S has a left box line, start drawing the text
of S to the right of that box line. */
- if (s->face->box != FACE_NO_BOX
+ if (s->face && s->face->box != FACE_NO_BOX
&& s->first_glyph->left_box_line_p)
x = s->x + abs (s->face->box_line_width);
else
@@ -1353,20 +1449,77 @@ x_draw_composite_glyph_string_foreground (s)
XDrawRectangle (s->display, s->window, s->gc, x, s->y,
s->width - 1, s->height - 1);
}
+#ifdef USE_FONT_BACKEND
+ else if (enable_font_backend)
+ {
+ struct font *font = (struct font *) s->font_info;
+ int y = s->ybase;
+ int width = 0;
+
+ if (s->cmp->method == COMPOSITION_WITH_GLYPH_STRING)
+ {
+ Lisp_Object gstring = AREF (XHASH_TABLE (composition_hash_table)
+ ->key_and_value,
+ s->cmp->hash_index * 2);
+ int from;
+
+ for (i = from = 0; i < s->nchars; i++)
+ {
+ Lisp_Object g = LGSTRING_GLYPH (gstring, i);
+ Lisp_Object adjustment = LGLYPH_ADJUSTMENT (g);
+ int xoff, yoff, wadjust;
+
+ if (NILP (adjustment))
+ {
+ width += XINT (LGLYPH_WIDTH (g));
+ continue;
+ }
+ if (from < i)
+ {
+ font->driver->draw (s, from, i, x, y, 0);
+ x += width;
+ }
+ xoff = XINT (AREF (adjustment, 0));
+ yoff = XINT (AREF (adjustment, 1));
+ wadjust = XINT (AREF (adjustment, 2));
+
+ font->driver->draw (s, i, i + 1, x + xoff, y + yoff, 0);
+ x += XINT (LGLYPH_WIDTH (g)) + wadjust;
+ from = i + 1;
+ width = 0;
+ }
+ if (from < i)
+ font->driver->draw (s, from, i, x, y, 0);
+ }
+ else
+ {
+ for (i = 0; i < s->nchars; i++, ++s->gidx)
+ {
+ int xx = x + s->cmp->offsets[s->gidx * 2];
+ int yy = y - s->cmp->offsets[s->gidx * 2 + 1];
+
+ font->driver->draw (s, i, i + 1, xx, yy, 0);
+ if (s->face->overstrike)
+ font->driver->draw (s, i, i + 1, xx + 1, yy, 0);
+ }
+ }
+ }
+#endif /* USE_FONT_BACKEND */
else
{
for (i = 0; i < s->nchars; i++, ++s->gidx)
- {
- XDrawString16 (s->display, s->window, s->gc,
- x + s->cmp->offsets[s->gidx * 2],
- s->ybase - s->cmp->offsets[s->gidx * 2 + 1],
- s->char2b + i, 1);
- if (s->face->overstrike)
+ if (s->face)
+ {
XDrawString16 (s->display, s->window, s->gc,
- x + s->cmp->offsets[s->gidx * 2] + 1,
+ x + s->cmp->offsets[s->gidx * 2],
s->ybase - s->cmp->offsets[s->gidx * 2 + 1],
s->char2b + i, 1);
- }
+ if (s->face->overstrike)
+ XDrawString16 (s->display, s->window, s->gc,
+ x + s->cmp->offsets[s->gidx * 2] + 1,
+ s->ybase - s->cmp->offsets[s->gidx * 2 + 1],
+ s->char2b + i, 1);
+ }
}
}
@@ -2610,15 +2763,25 @@ x_draw_glyph_string (s)
{
int relief_drawn_p = 0;
- /* If S draws into the background of its successor, draw the
- background of the successor first so that S can draw into it.
+ /* If S draws into the background of its successors, draw the
+ background of the successors first so that S can draw into it.
This makes S->next use XDrawString instead of XDrawImageString. */
if (s->next && s->right_overhang && !s->for_overlaps)
{
- xassert (s->next->img == NULL);
- x_set_glyph_string_gc (s->next);
- x_set_glyph_string_clipping (s->next);
- x_draw_glyph_string_background (s->next, 1);
+ int width;
+ struct glyph_string *next;
+
+ for (width = 0, next = s->next; next;
+ width += next->width, next = next->next)
+ if (next->first_glyph->type != IMAGE_GLYPH)
+ {
+ x_set_glyph_string_gc (next);
+ x_set_glyph_string_clipping (next);
+ x_draw_glyph_string_background (next, 1);
+#ifdef USE_FONT_BACKEND
+ next->clip_width = 0;
+#endif /* USE_FONT_BACKEND */
+ }
}
/* Set up S->gc, set clipping and draw S. */
@@ -2638,6 +2801,12 @@ x_draw_glyph_string (s)
x_set_glyph_string_clipping (s);
relief_drawn_p = 1;
}
+ else if ((s->prev && s->prev->hl != s->hl && s->left_overhang)
+ || (s->next && s->next->hl != s->hl && s->right_overhang))
+ /* We must clip just this glyph. left_overhang part has already
+ drawn when s->prev was drawn, and right_overhang part will be
+ drawn later when s->next is drawn. */
+ x_set_glyph_string_clipping_exactly (s, s);
else
x_set_glyph_string_clipping (s);
@@ -2680,6 +2849,12 @@ x_draw_glyph_string (s)
int y;
/* Get the underline thickness. Default is 1 pixel. */
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ /* In the future, we must use information of font. */
+ h = 1;
+ else
+#endif /* USE_FONT_BACKEND */
if (!XGetFontProperty (s->font, XA_UNDERLINE_THICKNESS, &h))
h = 1;
@@ -2691,6 +2866,17 @@ x_draw_glyph_string (s)
ROUND ((maximum descent) / 2), with
ROUND(x) = floor (x + 0.5) */
+#ifdef USE_FONT_BACKEND
+ if (enable_font_backend)
+ {
+ if (s->face->font)
+ /* In the future, we must use information of font. */
+ y = s->ybase + (s->face->font->max_bounds.descent + 1) / 2;
+ else
+ y = s->y + s->height - h;
+ }
+ else
+#endif
if (x_use_underline_position_properties
&& XGetFontProperty (s->font, XA_UNDERLINE_POSITION, &tem))
y = s->ybase + (long) tem;
@@ -2755,10 +2941,67 @@ x_draw_glyph_string (s)
/* Draw relief if not yet drawn. */
if (!relief_drawn_p && s->face->box != FACE_NO_BOX)
x_draw_glyph_string_box (s);
+
+ if (s->prev)
+ {
+ struct glyph_string *prev;
+
+ for (prev = s->prev; prev; prev = prev->prev)
+ if (prev->hl != s->hl
+ && prev->x + prev->width + prev->right_overhang > s->x)
+ {
+ /* As prev was drawn while clipped to its own area, we
+ must draw the right_overhang part using s->hl now. */
+ enum draw_glyphs_face save = prev->hl;
+
+ prev->hl = s->hl;
+ x_set_glyph_string_gc (prev);
+ x_set_glyph_string_clipping_exactly (s, prev);
+ if (prev->first_glyph->type == CHAR_GLYPH)
+ x_draw_glyph_string_foreground (prev);
+ else
+ x_draw_composite_glyph_string_foreground (prev);
+ XSetClipMask (prev->display, prev->gc, None);
+ prev->hl = save;
+#ifdef USE_FONT_BACKEND
+ prev->clip_width = 0;
+#endif /* USE_FONT_BACKEND */
+ }
+ }
+
+ if (s->next)
+ {
+ struct glyph_string *next;
+
+ for (next = s->next; next; next = next->next)
+ if (next->hl != s->hl
+ && next->x - next->left_overhang < s->x + s->width)
+ {
+ /* As next will be drawn while clipped to its own area,
+ we must draw the left_overhang part using s->hl now. */
+ enum draw_glyphs_face save = next->hl;
+
+ next->hl = s->hl;
+ x_set_glyph_string_gc (next);
+ x_set_glyph_string_clipping_exactly (s, next);
+ if (next->first_glyph->type == CHAR_GLYPH)
+ x_draw_glyph_string_foreground (next);
+ else
+ x_draw_composite_glyph_string_foreground (next);
+ XSetClipMask (next->display, next->gc, None);
+ next->hl = save;
+#ifdef USE_FONT_BACKEND
+ next->clip_width = 0;
+#endif /* USE_FONT_BACKEND */
+ }
+ }
}
/* Reset clipping. */
XSetClipMask (s->display, s->gc, None);
+#ifdef USE_FONT_BACKEND
+ s->clip_width = 0;
+#endif /* USE_FONT_BACKEND */
}
/* Shift display to make room for inserted glyphs. */
@@ -6256,41 +6499,14 @@ handle_one_xevent (dpyinfo, eventp, finish, hold_quit)
goto done_keysym;
}
- /* Keysyms directly mapped to supported Unicode characters. */
- if ((keysym >= 0x01000000 && keysym <= 0x010033ff)
- || (keysym >= 0x0100e000 && keysym <= 0x0100ffff))
+ /* Keysyms directly mapped to Unicode characters. */
+ if (keysym >= 0x01000000 && keysym <= 0x0110FFFF)
{
- int code = keysym & 0xFFFF, charset_id, c1, c2;
-
- if (code < 0x80)
- {
- inev.ie.kind = ASCII_KEYSTROKE_EVENT;
- inev.ie.code = code;
- }
- else if (code < 0x100)
- {
- if (code < 0xA0)
- charset_id = CHARSET_8_BIT_CONTROL;
- else
- charset_id = charset_latin_iso8859_1;
- inev.ie.kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
- inev.ie.code = MAKE_CHAR (charset_id, code, 0);
- }
+ if (keysym < 0x01000080)
+ inev.ie.kind = ASCII_KEYSTROKE_EVENT;
else
- {
- if (code < 0x2500)
- charset_id = charset_mule_unicode_0100_24ff,
- code -= 0x100;
- else if (code < 0xE000)
- charset_id = charset_mule_unicode_2500_33ff,
- code -= 0x2500;
- else
- charset_id = charset_mule_unicode_e000_ffff,
- code -= 0xE000;
- c1 = (code / 96) + 32, c2 = (code % 96) + 32;
- inev.ie.kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
- inev.ie.code = MAKE_CHAR (charset_id, c1, c2);
- }
+ inev.ie.kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
+ inev.ie.code = keysym & 0xFFFFFF;
goto done_keysym;
}
@@ -6403,38 +6619,39 @@ handle_one_xevent (dpyinfo, eventp, finish, hold_quit)
register int c;
int nchars, len;
- /* The input should be decoded with `coding_system'
- which depends on which X*LookupString function
- we used just above and the locale. */
- setup_coding_system (coding_system, &coding);
- coding.src_multibyte = 0;
- coding.dst_multibyte = 1;
- /* The input is converted to events, thus we can't
- handle composition. Anyway, there's no XIM that
- gives us composition information. */
- coding.composing = COMPOSITION_DISABLED;
-
- for (i = 0; i < nbytes; i++)
+ for (i = 0, nchars = 0; i < nbytes; i++)
{
+ if (ASCII_BYTE_P (copy_bufptr[i]))
+ nchars++;
STORE_KEYSYM_FOR_DEBUG (copy_bufptr[i]);
}
- {
- /* Decode the input data. */
- int require;
- unsigned char *p;
-
- require = decoding_buffer_size (&coding, nbytes);
- p = (unsigned char *) alloca (require);
- coding.mode |= CODING_MODE_LAST_BLOCK;
- /* We explicitly disable composition handling because
- key data should not contain any composition sequence. */
- coding.composing = COMPOSITION_DISABLED;
- decode_coding (&coding, copy_bufptr, p, nbytes, require);
- nbytes = coding.produced;
- nchars = coding.produced_char;
- copy_bufptr = p;
- }
+ if (nchars < nbytes)
+ {
+ /* Decode the input data. */
+ int require;
+ unsigned char *p;
+
+ /* The input should be decoded with `coding_system'
+ which depends on which X*LookupString function
+ we used just above and the locale. */
+ setup_coding_system (coding_system, &coding);
+ coding.src_multibyte = 0;
+ coding.dst_multibyte = 1;
+ /* The input is converted to events, thus we can't
+ handle composition. Anyway, there's no XIM that
+ gives us composition information. */
+ coding.common_flags &= ~CODING_ANNOTATION_MASK;
+
+ require = MAX_MULTIBYTE_LENGTH * nbytes;
+ coding.destination = alloca (require);
+ coding.dst_bytes = require;
+ coding.mode |= CODING_MODE_LAST_BLOCK;
+ decode_coding_c_string (&coding, copy_bufptr, nbytes, Qnil);
+ nbytes = coding.produced;
+ nchars = coding.produced_char;
+ copy_bufptr = coding.destination;
+ }
/* Convert the input data to a sequence of
character events. */
@@ -7854,11 +8071,16 @@ x_new_font (f, fontname)
register char *fontname;
{
struct font_info *fontp
- = FS_LOAD_FONT (f, 0, fontname, -1);
+ = FS_LOAD_FONT (f, fontname);
if (!fontp)
return Qnil;
+ if (FRAME_FONT (f) == (XFontStruct *) (fontp->font))
+ /* This font is already set in frame F. There's nothing more to
+ do. */
+ return build_string (fontp->full_name);
+
FRAME_FONT (f) = (XFontStruct *) (fontp->font);
FRAME_BASELINE_OFFSET (f) = fontp->baseline_offset;
FRAME_FONTSET (f) = -1;
@@ -7902,33 +8124,45 @@ x_new_font (f, fontname)
return build_string (fontp->full_name);
}
-/* Give frame F the fontset named FONTSETNAME as its default font, and
- return the full name of that fontset. FONTSETNAME may be a wildcard
- pattern; in that case, we choose some fontset that fits the pattern.
- The return value shows which fontset we chose. */
+/* Give frame F the fontset named FONTSETNAME as its default fontset,
+ and return the full name of that fontset. FONTSETNAME may be a
+ wildcard pattern; in that case, we choose some fontset that fits
+ the pattern. FONTSETNAME may be a font name for ASCII characters;
+ in that case, we create a fontset from that font name.
+
+ The return value shows which fontset we chose.
+ If FONTSETNAME specifies the default fontset, return Qt.
+ If an ASCII font in the specified fontset can't be loaded, return
+ Qnil. */
Lisp_Object
x_new_fontset (f, fontsetname)
struct frame *f;
- char *fontsetname;
+ Lisp_Object fontsetname;
{
- int fontset = fs_query_fontset (build_string (fontsetname), 0);
+ int fontset = fs_query_fontset (fontsetname, 0);
Lisp_Object result;
- if (fontset < 0)
- return Qnil;
-
- if (FRAME_FONTSET (f) == fontset)
+ if (fontset > 0 && f->output_data.x->fontset == fontset)
/* This fontset is already set in frame F. There's nothing more
to do. */
return fontset_name (fontset);
+ else if (fontset == 0)
+ /* The default fontset can't be the default font. */
+ return Qt;
- result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ if (fontset > 0)
+ result = x_new_font (f, (SDATA (fontset_ascii (fontset))));
+ else
+ result = x_new_font (f, SDATA (fontsetname));
if (!STRINGP (result))
/* Can't load ASCII font. */
return Qnil;
+ if (fontset < 0)
+ fontset = new_fontset_from_font_name (result);
+
/* Since x_new_font doesn't update any fontset information, do it now. */
FRAME_FONTSET (f) = fontset;
@@ -7938,8 +8172,70 @@ x_new_fontset (f, fontsetname)
xic_set_xfontset (f, SDATA (fontset_ascii (fontset)));
#endif
- return build_string (fontsetname);
+ return fontset_name (fontset);
+}
+
+#ifdef USE_FONT_BACKEND
+Lisp_Object
+x_new_fontset2 (f, fontset, font_object)
+ struct frame *f;
+ int fontset;
+ Lisp_Object font_object;
+{
+ struct font *font = XSAVE_VALUE (font_object)->pointer;
+
+ if (FRAME_FONT_OBJECT (f) == font)
+ /* This font is already set in frame F. There's nothing more to
+ do. */
+ return fontset_name (fontset);
+
+ BLOCK_INPUT;
+
+ FRAME_FONT_OBJECT (f) = font;
+ FRAME_FONT (f) = font->font.font;
+ FRAME_BASELINE_OFFSET (f) = font->font.baseline_offset;
+ FRAME_FONTSET (f) = fontset;
+
+ FRAME_COLUMN_WIDTH (f) = font->font.average_width;
+ FRAME_SPACE_WIDTH (f) = font->font.space_width;
+ FRAME_LINE_HEIGHT (f) = font->font.height;
+
+ compute_fringe_widths (f, 1);
+
+ /* Compute the scroll bar width in character columns. */
+ if (FRAME_CONFIG_SCROLL_BAR_WIDTH (f) > 0)
+ {
+ int wid = FRAME_COLUMN_WIDTH (f);
+ FRAME_CONFIG_SCROLL_BAR_COLS (f)
+ = (FRAME_CONFIG_SCROLL_BAR_WIDTH (f) + wid - 1) / wid;
+ }
+ else
+ {
+ int wid = FRAME_COLUMN_WIDTH (f);
+ FRAME_CONFIG_SCROLL_BAR_COLS (f) = (14 + wid - 1) / wid;
+ }
+
+ /* Now make the frame display the given font. */
+ if (FRAME_X_WINDOW (f) != 0)
+ {
+ /* Don't change the size of a tip frame; there's no point in
+ doing it because it's done in Fx_show_tip, and it leads to
+ problems because the tip frame has no widget. */
+ if (NILP (tip_frame) || XFRAME (tip_frame) != f)
+ x_set_window_size (f, 0, FRAME_COLS (f), FRAME_LINES (f));
+ }
+
+#ifdef HAVE_X_I18N
+ if (FRAME_XIC (f)
+ && (FRAME_XIC_STYLE (f) & (XIMPreeditPosition | XIMStatusArea)))
+ xic_set_xfontset (f, SDATA (fontset_ascii (fontset)));
+#endif
+
+ UNBLOCK_INPUT;
+
+ return fontset_name (fontset);
}
+#endif /* USE_FONT_BACKEND */
/***********************************************************************
@@ -9017,6 +9313,15 @@ x_free_frame_resources (f)
commands to the X server. */
if (dpyinfo->display)
{
+#ifdef USE_FONT_BACKEND
+ /* We must free faces before destroying windows because some
+ font-driver (e.g. xft) access a window while finishing a
+ face. */
+ if (enable_font_backend
+ && FRAME_FACE_CACHE (f))
+ free_frame_faces (f);
+#endif /* USE_FONT_BACKEND */
+
if (f->output_data.x->icon_desc)
XDestroyWindow (FRAME_X_DISPLAY (f), f->output_data.x->icon_desc);
@@ -9411,7 +9716,7 @@ x_get_font_info (f, font_idx)
If SIZE is > 0, it is the size (maximum bounds width) of fonts
to be listed.
- SIZE < 0 means include scalable fonts.
+ SIZE < 0 means include auto scaled fonts.
Frame F null means we have not yet created any frame on X, and
consult the first display in x_display_list. MAXNAMES sets a limit
@@ -9884,6 +10189,7 @@ x_load_font (f, fontname, size)
bzero (fontp, sizeof (*fontp));
fontp->font = font;
fontp->font_idx = i;
+ fontp->charset = -1; /* fs_load_font sets it. */
fontp->name = (char *) xmalloc (strlen (fontname) + 1);
bcopy (fontname, fontp->name, strlen (fontname) + 1);
@@ -9995,10 +10301,10 @@ x_load_font (f, fontname, size)
the font code-points (0:0x20..0x7F, 1:0xA0..0xFF), or
(0:0x2020..0x7F7F, 1:0xA0A0..0xFFFF, 3:0x20A0..0x7FFF,
2:0xA020..0xFF7F). For the moment, we don't know which charset
- uses this font. So, we set information in fontp->encoding[1]
+ uses this font. So, we set information in fontp->encoding_type
which is never used by any charset. If mapping can't be
decided, set FONT_ENCODING_NOT_DECIDED. */
- fontp->encoding[1]
+ fontp->encoding_type
= (font->max_byte1 == 0
/* 1-byte font */
? (font->min_char_or_byte2 < 0x80
@@ -10098,6 +10404,160 @@ x_find_ccl_program (fontp)
}
+/* Return a char-table whose elements are t if the font FONT_INFO
+ contains a glyph for the corresponding character, and nil if
+ not. */
+
+Lisp_Object
+x_get_font_repertory (f, font_info)
+ FRAME_PTR f;
+ struct font_info *font_info;
+{
+ XFontStruct *font = (XFontStruct *) font_info->font;
+ Lisp_Object table;
+ int min_byte1, max_byte1, min_byte2, max_byte2;
+ int c;
+ struct charset *charset = CHARSET_FROM_ID (font_info->charset);
+ int offset = CHARSET_OFFSET (charset);
+
+ table = Fmake_char_table (Qnil, Qnil);
+
+ min_byte1 = font->min_byte1;
+ max_byte1 = font->max_byte1;
+ min_byte2 = font->min_char_or_byte2;
+ max_byte2 = font->max_char_or_byte2;
+ if (min_byte1 == 0 && max_byte1 == 0)
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ {
+ if (offset >= 0)
+ char_table_set_range (table, offset + min_byte2,
+ offset + max_byte2, Qt);
+ else
+ for (; min_byte2 <= max_byte2; min_byte2++)
+ {
+ c = DECODE_CHAR (charset, min_byte2);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ }
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int from = -1;
+ int i;
+
+ for (i = min_byte2; i <= max_byte2; i++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ if (offset >= 0)
+ char_table_set_range (table, offset + from,
+ offset + i - 1, Qt);
+ else
+ for (; from < i; from++)
+ {
+ c = DECODE_CHAR (charset, from);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = i;
+ }
+ if (from >= 0)
+ {
+ if (offset >= 0)
+ char_table_set_range (table, offset + from, offset + i - 1,
+ Qt);
+ else
+ for (; from < i; from++)
+ {
+ c = DECODE_CHAR (charset, from);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ }
+ }
+ }
+ else
+ {
+ if (! font->per_char || font->all_chars_exist == True)
+ {
+ int i, j;
+
+ if (offset >= 0)
+ for (i = min_byte1; i <= max_byte1; i++)
+ char_table_set_range
+ (table, offset + ((i << 8) | min_byte2),
+ offset + ((i << 8) | max_byte2), Qt);
+ else
+ for (i = min_byte1; i <= max_byte1; i++)
+ for (j = min_byte2; j <= max_byte2; j++)
+ {
+ unsigned code = (i << 8) | j;
+ c = DECODE_CHAR (charset, code);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ }
+ else
+ {
+ XCharStruct *pcm = font->per_char;
+ int i;
+
+ for (i = min_byte1; i <= max_byte1; i++)
+ {
+ int from = -1;
+ int j;
+
+ for (j = min_byte2; j <= max_byte2; j++, pcm++)
+ {
+ if (pcm->width == 0 && pcm->rbearing == pcm->lbearing)
+ {
+ if (from >= 0)
+ {
+ if (offset >= 0)
+ char_table_set_range
+ (table, offset + ((i << 8) | from),
+ offset + ((i << 8) | (j - 1)), Qt);
+ else
+ {
+ for (; from < j; from++)
+ {
+ unsigned code = (i << 8) | from;
+ c = ENCODE_CHAR (charset, code);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ }
+ from = -1;
+ }
+ }
+ else if (from < 0)
+ from = j;
+ }
+ if (from >= 0)
+ {
+ if (offset >= 0)
+ char_table_set_range
+ (table, offset + ((i << 8) | from),
+ offset + ((i << 8) | (j - 1)), Qt);
+ else
+ {
+ for (; from < j; from++)
+ {
+ unsigned code = (i << 8) | from;
+ c = DECODE_CHAR (charset, code);
+ CHAR_TABLE_SET (table, c, Qt);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return table;
+}
/***********************************************************************
Initialization
@@ -10956,8 +11416,6 @@ syms_of_xterm ()
staticpro (&Qvendor_specific_keysyms);
Qvendor_specific_keysyms = intern ("vendor-specific-keysyms");
- staticpro (&Qutf_8);
- Qutf_8 = intern ("utf-8");
staticpro (&Qlatin_1);
Qlatin_1 = intern ("latin-1");
diff --git a/src/xterm.h b/src/xterm.h
index 10a9aaa2961..8bc9782b02b 100644
--- a/src/xterm.h
+++ b/src/xterm.h
@@ -431,6 +431,9 @@ extern struct font_info *x_get_font_info P_ ((struct frame *f, int));
extern struct font_info *x_load_font P_ ((struct frame *, char *, int));
extern struct font_info *x_query_font P_ ((struct frame *, char *));
extern void x_find_ccl_program P_ ((struct font_info *));
+extern Lisp_Object x_get_font_repertory P_ ((struct frame *,
+ struct font_info *));
+
/* Each X frame object points to its own struct x_output object
in the output_data.x field. The x_output structure contains
@@ -512,6 +515,10 @@ struct x_output
/* Default ASCII font of this frame. */
XFontStruct *font;
+#ifdef USE_FONT_BACKEND
+ struct font *fontp;
+#endif /* USE_FONT_BACKEND */
+
/* The baseline offset of the default ASCII font. */
int baseline_offset;
@@ -697,6 +704,10 @@ enum
#define FRAME_TOOLBAR_HEIGHT(f) ((f)->output_data.x->toolbar_height)
#define FRAME_BASELINE_OFFSET(f) ((f)->output_data.x->baseline_offset)
+#ifdef USE_FONT_BACKEND
+#define FRAME_FONT_OBJECT(f) ((f)->output_data.x->fontp)
+#endif /* USE_FONT_BACKEND */
+
/* This gives the x_display_info structure for the display F is on. */
#define FRAME_X_DISPLAY_INFO(f) ((f)->output_data.x->display_info)