summaryrefslogtreecommitdiff
path: root/lisp/international/mule.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international/mule.el')
-rw-r--r--lisp/international/mule.el529
1 files changed, 529 insertions, 0 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
new file mode 100644
index 00000000000..82c2679455a
--- /dev/null
+++ b/lisp/international/mule.el
@@ -0,0 +1,529 @@
+;;; mule.el --- basic commands for mulitilingual environment
+
+;; Copyright (C) 1995 Free Software Foundation, Inc.
+;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+
+;; Keywords: mule, multilingual, character set, coding system
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to
+;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+;;; Code:
+
+(defconst mule-version "3.0 (MOMIJINOGA)" "\
+Version number and name of this version of MULE (multilingual environment).")
+
+(defconst mule-version-date "1998.1.1" "\
+Distribution date of this version of MULE (multilingual environment).")
+
+(defun load-with-code-conversion (fullname file &optional noerror nomessage)
+ "Execute a file of Lisp code named FILE whose absolute path is FULLNAME.
+The FILE is decoded before evaluation if necessary.
+If optional second arg NOERROR is non-nil,
+ report no error if FILE doesn't exist.
+Print messages at start and end of loading unless
+ optional third arg NOMESSAGE is non-nil.
+Return t if file exists."
+ (if (null (file-readable-p fullname))
+ (and (null noerror)
+ (signal 'file-error (list "Cannot open load file" file)))
+ ;; Read file with code conversion, and then eval.
+ (let* ((buffer
+ ;; To avoid any autoloading, set default-major-mode to
+ ;; fundamental-mode.
+ (let ((default-major-mode 'fundamental-mode))
+ ;; We can't use `generate-new-buffer' because files.el
+ ;; is not yet loaded.
+ (get-buffer-create (generate-new-buffer-name " *load*"))))
+ (load-in-progress t))
+ (or nomessage (message "Loading %s..." file))
+ (unwind-protect
+ (progn
+ (save-excursion
+ (set-buffer buffer)
+ (insert-file-contents fullname)
+ ;; We must set `buffer-file-name' for `eval-buffer' and
+ ;; `load-history'.
+ (setq buffer-file-name file)
+ ;; Make `kill-buffer' quiet.
+ (set-buffer-modified-p nil))
+ ;; Eval in the original buffer.
+ (eval-buffer buffer))
+ (kill-buffer buffer))
+ (let ((hook (assoc file after-load-alist)))
+ (if hook
+ (mapcar (function eval) (cdr hook))))
+ (or nomessage noninteractive
+ (message "Loading %s...done" file))
+ t)))
+
+;; API (Application Program Interface) for charsets.
+
+;; Return t if OBJ is a quoted symbol.
+(defsubst quoted-symbol-p (obj)
+ (and (listp obj) (eq (car obj) 'quote)))
+
+(defsubst charsetp (object)
+ "T is OBJECT is a charset."
+ (and (symbolp object) (vectorp (get object 'charset))))
+
+(defsubst charset-info (charset)
+ "Return a vector of information of CHARSET.
+The elements of the vector are:
+ CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
+ LEADING-CODE-BASE, LEADING-CODE-EXT,
+ ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
+ REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
+ PLIST,
+where
+CHARSET-ID (integer) is the identification number of the charset.
+DIMENSION (integer) is the number of bytes to represent a character of
+the charset: 1 or 2.
+CHARS (integer) is the number of characters in a dimension: 94 or 96.
+BYTE (integer) is the length of multi-byte form of a character in
+ the charset: one of 1, 2, 3, and 4.
+WIDTH (integer) is the number of columns a character in the charset
+ occupies on the screen: one of 0, 1, and 2.
+DIRECTION (integer) is the rendering direction of characters in the
+ charset when rendering. If 0, render from right to left, else
+ render from left to right.
+LEADING-CODE-BASE (integer) is the base leading-code for the
+ charset.
+LEADING-CODE-EXT (integer) is the extended leading-code for the
+ charset. All charsets of less than 0xA0 has the value 0.
+ISO-FINAL-CHAR (character) is the final character of the
+ corresponding ISO 2022 charset.
+ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
+ while encoding to variants of ISO 2022 coding system, one of the
+ following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
+REVERSE-CHARSET (integer) is the charset which differs only in
+ LEFT-TO-RIGHT value from the charset. If there's no such a
+ charset, the value is -1.
+SHORT-NAME (string) is the short name to refer to the charset.
+LONG-NAME (string) is the long name to refer to the charset
+DESCRIPTION (string) is the description string of the charset.
+PLIST (property list) may contain any type of information a user
+ want to put and get by functions `put-charset-property' and
+ `get-charset-property' respectively."
+ (get charset 'charset))
+
+(defmacro charset-id (charset)
+ "Return charset identification number of CHARSET."
+ (if (and (listp charset) (eq (car charset) 'quote))
+ (aref (charset-info (nth 1 charset)) 0)
+ `(aref (charset-info ,charset) 0)))
+
+(defmacro charset-bytes (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 1)
+ `(aref (charset-info ,charset) 1)))
+
+(defmacro charset-dimension (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 2)
+ `(aref (charset-info ,charset) 2)))
+
+(defmacro charset-chars (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 3)
+ `(aref (charset-info ,charset) 3)))
+
+(defmacro charset-width (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 4)
+ `(aref (charset-info ,charset) 4)))
+
+(defmacro charset-direction (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 5)
+ `(aref (charset-info ,charset) 5)))
+
+(defmacro charset-iso-final-char (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 8)
+ `(aref (charset-info ,charset) 8)))
+
+(defmacro charset-iso-graphic-plane (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 9)
+ `(aref (charset-info ,charset) 9)))
+
+(defmacro charset-reverse-charset (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 10)
+ `(aref (charset-info ,charset) 10)))
+
+(defmacro charset-short-name (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 11)
+ `(aref (charset-info ,charset) 11)))
+
+(defmacro charset-long-name (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 12)
+ `(aref (charset-info ,charset) 12)))
+
+(defmacro charset-description (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 13)
+ `(aref (charset-info ,charset) 13)))
+
+(defmacro charset-plist (charset)
+ (if (quoted-symbol-p charset)
+ (aref (charset-info (nth 1 charset)) 14)
+ `(aref (charset-info ,charset) 14)))
+
+(defun set-charset-plist (charset plist)
+ (aset (charset-info charset) 14 plist))
+
+(defmacro make-char (charset &optional c1 c2)
+ (if (quoted-symbol-p charset)
+ `(make-char-internal ,(charset-id (nth 1 charset)) ,c1 ,c2)
+ `(make-char-internal (charset-id ,charset) ,c1 ,c2)))
+
+;; Coding-system staffs
+
+;; Coding-system object is a symbol that has the property
+;; `coding-system' and `eol-type'.
+;;
+;; The value of the property `coding-system' is a coding-vector of the
+;; format: [TYPE MNEMONIC DOCSTRING NOT-USED-NOW FLAGS].
+;; See comments in src/coding.c for more detail. The property value
+;; may be another coding-system, in which case, the coding-vector
+;; should be taken from that coding-system.
+;;
+;; The value of the property `eol-type' is integer 0..2 or a vector of
+;; length 3. The integer value 0, 1, and 2 indicate the format of
+;; end-of-line LF, CRLF, and CR respectively. The vector value
+;; indicates that the format of end-of-line should be detected
+;; automatically. Nth element of the vector is the subsidiary
+;; coding-system whose `eol-type' property is integer value.
+;;
+;; Coding-system may also have properties `post-read-conversion' and
+;; `pre-write-conversion and the values are functions.
+;;
+;; The function in `post-read-conversion' is called after some text is
+;; inserted and decoded along the coding-system and before any
+;; functions in `after-insert-functions' are called. The arguments to
+;; this function is the same as those of a function in
+;; `after-insert-functions', i.e. LENGTH of a text while putting point
+;; at the head of the text to be decoded
+;;
+;; The function in `pre-write-conversion' is called after all
+;; functions in `write-region-annotate-functions' and
+;; `buffer-file-format' are called, and before the text is encoded by
+;; the coding-system. The arguments to this function is the same as
+;; those of a function in `write-region-annotate-functions', i.e. FROM
+;; and TO specifying region of a text.
+
+(defsubst coding-vector-type (vec) (aref vec 0))
+(defsubst coding-vector-mnemonic (vec) (aref vec 1))
+(defsubst coding-vector-docstring (vec) (aref vec 2))
+(defsubst coding-vector-flags (vec) (aref vec 4))
+
+;; Return type of CODING-SYSTEM.
+(defun coding-system-type (coding-system)
+ (let ((vec (coding-system-vector coding-system)))
+ (if vec (coding-vector-type vec))))
+
+;; Return mnemonic character of CODING-SYSTEM.
+(defun coding-system-mnemonic (coding-system)
+ (let ((vec (coding-system-vector coding-system)))
+ (if vec (coding-vector-mnemonic vec)
+ ?-)))
+
+;; Return docstring of CODING-SYSTEM.
+(defun coding-system-docstring (coding-system)
+ (let ((vec (coding-system-vector coding-system)))
+ (if vec (coding-vector-docstring vec))))
+
+;; Return flags of CODING-SYSTEM.
+(defun coding-system-flags (coding-system)
+ (let ((vec (coding-system-vector coding-system)))
+ (if vec (coding-vector-flags vec))))
+
+;; Return eol-type of CODING-SYSTEM.
+(defun coding-system-eoltype (coding-system)
+ (and coding-system
+ (or (get coding-system 'eol-type)
+ (coding-system-eoltype (get coding-system 'coding-system)))))
+
+;; Return mnemonic character of eol-type of CODING-SYSTEM.
+(defun coding-system-eoltype-mnemonic (coding-system)
+ (let ((eol-type (coding-system-eoltype coding-system)))
+ (cond ((vectorp eol-type) eol-mnemonic-undecided)
+ ((eq eol-type 0) eol-mnemonic-unix)
+ ((eq eol-type 1) eol-mnemonic-unix)
+ ((eq eol-type 2) eol-mnemonic-unix)
+ (t ?-))))
+
+;; Return function for post-read-conversion of CODING-SYSTEM.
+(defun coding-system-post-read-conversion (coding-system)
+ (and coding-system
+ (symbolp coding-system)
+ (or (get coding-system 'post-read-conversion)
+ (coding-system-post-read-conversion
+ (get coding-system 'coding-system)))))
+
+;; Return function for pre-write-conversion of CODING-SYSTEM.
+(defun coding-system-pre-write-conversion (coding-system)
+ (and coding-system
+ (symbolp coding-system)
+ (or (get coding-system 'pre-write-conversion)
+ (coding-system-pre-write-conversion
+ (get coding-system 'coding-system)))))
+
+(defun make-coding-system (coding-system type mnemonic docstring
+ &optional flags)
+ "Define a new CODING-SYSTEM (symbol).
+Remaining arguments are TYPE, MNEMONIC, DOCSTRING, and FLAGS (optional).
+TYPE is an integer value indicating the type of coding-system as follows:
+ 0: Emacs internal format,
+ 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
+ 2: ISO-2022 including many variants,
+ 3: Big5 used mainly on Chinese PC,
+ 4: private, CCL programs provide encoding/decoding algorithm.
+MNEMONIC is a character to be displayed on mode line for the coding-system.
+DOCSTRING is a documentation string for the coding-system.
+FLAGS specifies more precise information of each TYPE.
+ If TYPE is 2 (ISO-2022), FLAGS should be a list of:
+ CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
+ ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
+ USE-ROMAN, USE-OLDJIS, NO-ISO6429.
+ CHARSETn are character sets initially designated to Gn graphic registers.
+ If CHARSETn is nil, Gn is never used.
+ If CHARSETn is t, Gn can be used but nothing designated initially.
+ If CHARSETn is a list of character sets, those character sets are
+ designated to Gn on output, but nothing designated to Gn initially.
+ SHORT-FORM non-nil means use short designation sequence on output.
+ ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
+ ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
+ SPACE on output.
+ SEVEN non-nil means use 7-bit code only on output.
+ LOCKING-SHIFT non-nil means use locking-shift.
+ SINGLE-SHIFT non-nil means use single-shift.
+ USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
+ USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
+ NO-ISO6429 non-nil means not use ISO6429's direction specification.
+ If TYPE is 4 (private), FLAGS should be a cons of CCL programs,
+ for encoding and decoding. See the documentation of CCL for more detail."
+
+ ;; At first, set a value of `coding-system' property.
+ (let ((coding-vector (make-vector 5 nil)))
+ (aset coding-vector 0 type)
+ (aset coding-vector 1
+ ;; MNEMONIC must be a printable character.
+ (if (and (> mnemonic ? ) (< mnemonic 127)) mnemonic ? ))
+ (aset coding-vector 2 (if (stringp docstring) docstring ""))
+ (aset coding-vector 3 nil) ; obsolete element
+ (cond ((eq type 2) ; ISO2022
+ (let ((i 0)
+ (vec (make-vector 32 nil)))
+ (while (< i 4)
+ (let ((charset (car flags)))
+ (if (and charset (not (eq charset t)))
+ (if (symbolp charset)
+ (setq charset (charset-id charset))
+ (let (elt l)
+ (while charset
+ (setq elt (car charset))
+ (if (and elt (not (eq elt t)))
+ (setq elt (charset-id elt)))
+ (setq l (cons elt l))
+ (setq charset (cdr charset)))
+ (setq charset (nreverse l)))))
+ (aset vec i charset))
+ (setq flags (cdr flags) i (1+ i)))
+ (while (and (< i 32) flags)
+ (aset vec i (car flags))
+ (setq flags (cdr flags) i (1+ i)))
+ (aset coding-vector 4 vec)))
+ ((eq type 4) ; private
+ (if (and (consp flags)
+ (vectorp (car flags))
+ (vectorp (cdr flags)))
+ (aset coding-vector 4 flags)
+ (error "Invalid FLAGS argument for TYPE 4 (CCL)")))
+ (t (aset coding-vector 4 flags)))
+ (put coding-system 'coding-system coding-vector))
+
+ ;; Next, set a value of `eol-type' property. The value is a vector
+ ;; of subsidiary coding-systems, each corresponds to a coding-system
+ ;; for the detected end-of-line format.
+ (let ((codings (vector (intern (format "%s-unix" coding-system))
+ (intern (format "%s-dos" coding-system))
+ (intern (format "%s-mac" coding-system))))
+ (i 0))
+ (while (< i 3)
+ (put (aref codings i) 'coding-system coding-system)
+ (put (aref codings i) 'eol-type i)
+ (setq i (1+ i)))
+ (put coding-system 'eol-type codings))
+ )
+
+(defun define-coding-system-alias (symbol new-symbol)
+ "Define NEW-SYMBOL as the same coding system as SYMBOL."
+ (check-coding-system symbol)
+ (put new-symbol 'coding-system (get symbol 'coding-system))
+ (let ((eol-type (get symbol 'eol-type)))
+ (if (vectorp eol-type)
+ (let* ((name (symbol-name new-symbol))
+ (new (vector (intern (concat name "-unix"))
+ (intern (concat name "-dos"))
+ (intern (concat name "-mac"))))
+ (i 0))
+ (while (< i 3)
+ (define-coding-system-alias (aref eol-type i) (aref new i))
+ (setq i (1+ i)))
+ (setq eol-type new)))
+ (put new-symbol 'eol-type eol-type)))
+
+(defvar buffer-file-coding-system nil
+ "Coding-system of the file which the current-buffer is visiting.")
+(make-variable-buffer-local 'buffer-file-coding-system)
+;; This value should not be reset by changing major mode.
+(put 'buffer-file-coding-system 'permanent-local t)
+
+(defun set-buffer-file-coding-system (coding-system &optional force)
+ "Set buffer-file-coding-system of the current buffer to CODING-SYSTEM.
+If eol-type of the current buffer-file-coding-system is an integer value N, and
+ eol-type of CODING-SYSTEM is a vector, the Nth element of the vector is set
+ instead of CODING-SYSTEM itself.
+Optional prefix argument FORCE non-nil means CODING-SYSTEM is set
+ regardless of eol-type of the current buffer-file-coding-system."
+ (interactive "zBuffer-file-coding-system: \nP")
+ (check-coding-system coding-system)
+ (if (null force)
+ (let ((x (coding-system-eoltype buffer-file-coding-system))
+ (y (coding-system-eoltype coding-system)))
+ (if (and (numberp x) (>= x 0) (<= x 2) (vectorp y))
+ (setq coding-system (aref y x)))))
+ (setq buffer-file-coding-system coding-system)
+ (set-buffer-modified-p t)
+ (force-mode-line-update))
+
+(defun set-current-process-coding-system (input output)
+ (interactive
+ "zCoding-system for process input: \nzCoding-system for process output: ")
+ (let ((proc (get-buffer-process (current-buffer))))
+ (if (null proc)
+ (error "no process")
+ (check-coding-system input)
+ (check-coding-system output)
+ (set-process-coding-system proc input output)))
+ (force-mode-line-update))
+
+(defvar default-process-coding-system (cons nil nil)
+ "Cons of default values used to read from and write to process.")
+
+(defun set-coding-priority (arg)
+ "Set priority of coding-category according to LIST.
+LIST is a list of coding-categories ordered by priority."
+ (let (l)
+ ;; Put coding-categories listed in ARG to L while checking the
+ ;; validity. We assume that `coding-category-list' contains whole
+ ;; coding-categories.
+ (while arg
+ (if (null (memq (car arg) coding-category-list))
+ (error "Invalid element in argument: %s" (car arg)))
+ (setq l (cons (car arg) l))
+ (setq arg (cdr arg)))
+ ;; Put coding-category not listed in ARG to L.
+ (while coding-category-list
+ (if (null (memq (car coding-category-list) l))
+ (setq l (cons (car coding-category-list) l)))
+ (setq coding-category-list (cdr coding-category-list)))
+ ;; Update `coding-category-list' and return it.
+ (setq coding-category-list (nreverse l))))
+
+;;; FILE I/O
+
+;; Set buffer-file-coding-system of the current buffer after some text
+;; is inserted.
+(defun after-insert-file-set-buffer-file-coding-system (inserted)
+ (if last-coding-system-used
+ (let ((coding-system
+ (find-new-buffer-file-coding-system last-coding-system-used))
+ (modified-p (buffer-modified-p)))
+ (if coding-system
+ (set-buffer-file-coding-system coding-system))
+ (set-buffer-modified-p modified-p)))
+ nil)
+
+(setq after-insert-file-functions
+ (cons 'after-insert-file-set-buffer-file-coding-system
+ after-insert-file-functions))
+
+;; The coding-vector and eol-type of coding-system returned is decided
+;; independently in the following order.
+;; 1. That of buffer-file-coding-system locally bound.
+;; 2. That of CODING.
+
+(defun find-new-buffer-file-coding-system (coding)
+ "Return a coding system for a buffer when a file of CODING is inserted.
+The returned value is set to `buffer-file-coding-system' of the
+current buffer. Return nil if there's no need of setting new
+buffer-file-coding-system."
+ (let (local-coding local-eol
+ found-eol
+ new-coding new-eol)
+ (if (null coding)
+ ;; Nothing found about coding.
+ nil
+
+ ;; Get information of the current local value of
+ ;; `buffer-file-coding-system' in LOCAL-EOL and LOCAL-CODING.
+ (if (local-variable-p 'buffer-file-coding-system)
+ ;; Something already set locally.
+ (progn
+ (setq local-eol (coding-system-eoltype buffer-file-coding-system))
+ (if (null (numberp local-eol))
+ ;; But eol-type is not yet set.
+ (setq local-eol nil))
+ (if (null (eq (coding-system-type buffer-file-coding-system) t))
+ ;; This is not automatic-conversion.
+ (progn
+ (setq local-coding buffer-file-coding-system)
+ (while (symbolp (get local-coding 'coding-system))
+ (setq local-coding (get local-coding 'coding-system))))
+ )))
+
+ (if (and local-eol local-coding)
+ ;; The current buffer has already set full coding-system, we
+ ;; had better not change it.
+ nil
+
+ (setq found-eol (coding-system-eoltype coding))
+ (if (null (numberp found-eol))
+ ;; But eol-type is not found.
+ (setq found-eol nil))
+ (if (eq (coding-system-type coding) t)
+ ;; This is automatic-conversion, which means nothing found
+ ;; except for eol-type.
+ (setq coding nil))
+
+ ;; The local setting takes precedence over the found one.
+ (setq new-coding (or local-coding coding))
+ (setq new-eol (or local-eol found-eol))
+ (if (and (numberp new-eol)
+ (vectorp (coding-system-eoltype new-coding)))
+ (setq new-coding
+ (aref (coding-system-eoltype new-coding) new-eol)))
+ new-coding))))
+
+(provide 'mule)
+
+;;; mule.el ends here