summaryrefslogtreecommitdiff
path: root/lisp/international
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/ccl.el4
-rw-r--r--lisp/international/characters.el69
-rw-r--r--lisp/international/codepage.el1
-rw-r--r--lisp/international/encoded-kb.el1
-rw-r--r--lisp/international/fontset.el1
-rw-r--r--lisp/international/isearch-x.el1
-rw-r--r--lisp/international/iso-acc.el1
-rw-r--r--lisp/international/iso-ascii.el1
-rw-r--r--lisp/international/iso-cvt.el1
-rw-r--r--lisp/international/iso-insert.el1
-rw-r--r--lisp/international/iso-swed.el1
-rw-r--r--lisp/international/iso-transl.el1
-rw-r--r--lisp/international/ja-dic-cnv.el1
-rw-r--r--lisp/international/ja-dic-utl.el1
-rw-r--r--lisp/international/kinsoku.el28
-rw-r--r--lisp/international/kkc.el1
-rw-r--r--lisp/international/latin1-disp.el221
-rw-r--r--lisp/international/mule-cmds.el129
-rw-r--r--lisp/international/mule-conf.el10
-rw-r--r--lisp/international/mule-util.el72
-rw-r--r--lisp/international/mule.el258
-rw-r--r--lisp/international/ogonek.el1
-rw-r--r--lisp/international/quail.el13
-rw-r--r--lisp/international/swedish.el1
-rw-r--r--lisp/international/titdic-cnv.el8
-rw-r--r--lisp/international/utf-7.el2
26 files changed, 541 insertions, 288 deletions
diff --git a/lisp/international/ccl.el b/lisp/international/ccl.el
index 777bf13c1a5..883b53d83e3 100644
--- a/lisp/international/ccl.el
+++ b/lisp/international/ccl.el
@@ -774,7 +774,8 @@
(ccl-check-register right rrr)
(ccl-embed-code 'write-expr-register 0
(logior (ash op 3)
- (get right 'ccl-register-number))))))
+ (get right 'ccl-register-number))
+ left))))
(t
(error "CCL: Invalid argument: %s" cmd))))
@@ -1544,4 +1545,5 @@ See the documentation of `define-ccl-program' for the detail of CCL program."
(provide 'ccl)
+;;; arch-tag: 836bcd27-63a1-4a56-b232-1145ecf823fb
;;; ccl.el ends here
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 71260420adf..c437bd1b0b0 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -682,6 +682,15 @@
(set-case-syntax c "." tbl)
(setq c (1+ c)))
;; Fixme: These aren't all right:
+ (setq c #x2010)
+ (while (<= c #x2016)
+ (set-case-syntax c "_" tbl)
+ (setq c (1+ c)))
+ ;; Punctuation syntax for quotation marks (like `)
+ (while (<= c #x201f)
+ (set-case-syntax c "." tbl)
+ (setq c (1+ c)))
+ ;; Fixme: These aren't all right:
(while (<= c #x2027)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
@@ -738,6 +747,65 @@
;; Fixme: syntax for symbols &c
)
+
+(let ((pairs
+ '("$,1sEsF(B" ; U+2045 U+2046
+ "$,1s}s~(B" ; U+207D U+207E
+ "$,1t-t.(B" ; U+208D U+208E
+ "$,1{){*(B" ; U+2329 U+232A
+ "$,1|T|U(B" ; U+23B4 U+23B5
+ "$,2&H&I(B" ; U+2768 U+2769
+ "$,2&J&K(B" ; U+276A U+276B
+ "$,2&L&M(B" ; U+276C U+276D
+ "$,2&P&Q(B" ; U+2770 U+2771
+ "$,2&R&S(B" ; U+2772 U+2773
+ "$,2&T&U(B" ; U+2774 U+2775
+ "$,2'f'g(B" ; U+27E6 U+27E7
+ "$,2'h'i(B" ; U+27E8 U+27E9
+ "$,2'j'k(B" ; U+27EA U+27EB
+ "$,2,#,$(B" ; U+2983 U+2984
+ "$,2,%,&(B" ; U+2985 U+2986
+ "$,2,',((B" ; U+2987 U+2988
+ "$,2,),*(B" ; U+2989 U+298A
+ "$,2,+,,(B" ; U+298B U+298C
+ "$,2,-,.(B" ; U+298D U+298E
+ "$,2,/,0(B" ; U+298F U+2990
+ "$,2,1,2(B" ; U+2991 U+2992
+ "$,2,3,4(B" ; U+2993 U+2994
+ "$,2,5,6(B" ; U+2995 U+2996
+ "$,2,7,8(B" ; U+2997 U+2998
+ "$,2-<-=(B" ; U+29FC U+29FD
+ "$,2=H=I(B" ; U+3008 U+3009
+ "$,2=J=K(B" ; U+300A U+300B
+ "$,2=L=M(B" ; U+300C U+300D
+ "$,2=N=O(B" ; U+300E U+300F
+ "$,2=P=Q(B" ; U+3010 U+3011
+ "$,2=T=U(B" ; U+3014 U+3015
+ "$,2=V=W(B" ; U+3016 U+3017
+ "$,2=X=Y(B" ; U+3018 U+3019
+ "$,2=Z=[(B" ; U+301A U+301B
+ "$,3m~m(B" ; U+FD3E U+FD3F
+ "$,3pUpV(B" ; U+FE35 U+FE36
+ "$,3pWpX(B" ; U+FE37 U+FE38
+ "$,3pYpZ(B" ; U+FE39 U+FE3A
+ "$,3p[p\(B" ; U+FE3B U+FE3C
+ "$,3p]p^(B" ; U+FE3D U+FE3E
+ "$,3p_p`(B" ; U+FE3F U+FE40
+ "$,3papb(B" ; U+FE41 U+FE42
+ "$,3pcpd(B" ; U+FE43 U+FE44
+ "$,3pypz(B" ; U+FE59 U+FE5A
+ "$,3p{p|(B" ; U+FE5B U+FE5C
+ "$,3p}p~(B" ; U+FE5D U+FE5E
+ "$,3rhri(B" ; U+FF08 U+FF09
+ "$,3s;s=(B" ; U+FF3B U+FF3D
+ "$,3s[s](B" ; U+FF5B U+FF5D
+ "$,3s_s`(B" ; U+FF5F U+FF60
+ "$,3sbsc(B" ; U+FF62 U+FF63
+ )))
+ (dolist (elt pairs)
+ (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
+ (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
+
;; For each character set, put the information of the most proper
;; coding system to encode it by `preferred-coding-system' property.
@@ -1008,4 +1076,5 @@
;;; coding: utf-8-emacs
;;; End:
+;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
;;; characters.el ends here
diff --git a/lisp/international/codepage.el b/lisp/international/codepage.el
index e11b26e8609..e4880bd56ac 100644
--- a/lisp/international/codepage.el
+++ b/lisp/international/codepage.el
@@ -141,4 +141,5 @@
(provide 'codepage)
+;;; arch-tag: 80328de8-b94e-4386-be26-5876105731f0
;;; codepage.el ends here
diff --git a/lisp/international/encoded-kb.el b/lisp/international/encoded-kb.el
index a6ceb3f2d07..b62effd42f1 100644
--- a/lisp/international/encoded-kb.el
+++ b/lisp/international/encoded-kb.el
@@ -425,4 +425,5 @@ as a multilingual text encoded in a coding system set by
(provide 'encoded-kb)
+;;; arch-tag: 76f0f9b3-65e7-45c3-b692-59509a87ad44
;;; encoded-kb.el ends here
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
index 81fcaa9813c..a751579c056 100644
--- a/lisp/international/fontset.el
+++ b/lisp/international/fontset.el
@@ -745,4 +745,5 @@ See the documentation of `create-fontset-from-fontset-spec' for the format.")
;;
(provide 'fontset)
+;;; arch-tag: bb53e629-0234-403c-950e-551e61554849
;;; fontset.el ends here
diff --git a/lisp/international/isearch-x.el b/lisp/international/isearch-x.el
index 64df952de68..60736277b97 100644
--- a/lisp/international/isearch-x.el
+++ b/lisp/international/isearch-x.el
@@ -130,4 +130,5 @@
(isearch-update)))
(isearch-process-search-char last-char)))
+;;; arch-tag: 1a90a6cf-2cb2-477a-814a-9ff895852822
;;; isearch-x.el ends here
diff --git a/lisp/international/iso-acc.el b/lisp/international/iso-acc.el
index 840a564800d..971d1d128e6 100644
--- a/lisp/international/iso-acc.el
+++ b/lisp/international/iso-acc.el
@@ -475,4 +475,5 @@ Noninteractively, this operates on text from START to END."
(add-hook 'minibuffer-setup-hook 'iso-acc-minibuf-setup)
+;;; arch-tag: 149ff409-7c3e-4574-9b5d-ac038939c0a6
;;; iso-acc.el ends here
diff --git a/lisp/international/iso-ascii.el b/lisp/international/iso-ascii.el
index 86f3d2b4348..3bffb1795f0 100644
--- a/lisp/international/iso-ascii.el
+++ b/lisp/international/iso-ascii.el
@@ -175,4 +175,5 @@
(provide 'iso-ascii)
+;;; arch-tag: 687edf0d-f792-471e-b50e-be805938359a
;;; iso-ascii.el ends here
diff --git a/lisp/international/iso-cvt.el b/lisp/international/iso-cvt.el
index 6c8a31a8fac..b0dffc40f50 100644
--- a/lisp/international/iso-cvt.el
+++ b/lisp/international/iso-cvt.el
@@ -892,4 +892,5 @@ Optional arg BUFFER is ignored (for use in `format-alist')."
(provide 'iso-cvt)
+;;; arch-tag: 64ae843f-ed0e-43e1-ba50-ffd581b90840
;;; iso-cvt.el ends here
diff --git a/lisp/international/iso-insert.el b/lisp/international/iso-insert.el
index 9dcfb123b81..e181d16588a 100644
--- a/lisp/international/iso-insert.el
+++ b/lisp/international/iso-insert.el
@@ -628,4 +628,5 @@
(provide 'iso-insert)
+;;; arch-tag: eb5f97bd-a034-4851-92ff-ab1f1bf92814
;;; iso-insert.el ends here
diff --git a/lisp/international/iso-swed.el b/lisp/international/iso-swed.el
index 156e3452693..60cc7fd632a 100644
--- a/lisp/international/iso-swed.el
+++ b/lisp/international/iso-swed.el
@@ -148,4 +148,5 @@
(provide 'iso-swed)
+;;; arch-tag: 6b3dc269-660c-44b6-a25f-680b921eaf2c
;;; iso-swed.el ends here
diff --git a/lisp/international/iso-transl.el b/lisp/international/iso-transl.el
index ea5f9ff53ae..a071b14b3d2 100644
--- a/lisp/international/iso-transl.el
+++ b/lisp/international/iso-transl.el
@@ -292,4 +292,5 @@ sequence VECTOR. (VECTOR is normally one character long.)")
(provide 'iso-transl)
+;;; arch-tag: 034cfedf-7ebd-461d-bcd0-5c79e6dc0b61
;;; iso-transl.el ends here
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index 72d98d9386d..682a2a8f2b5 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -569,4 +569,5 @@ To get complete usage, invoke:
;; coding: iso-2022-7bit
;; End:
+;;; arch-tag: dec06fb0-8118-45b1-80d7-dc360b6fd3b2
;;; ja-dic-cnv.el ends here
diff --git a/lisp/international/ja-dic-utl.el b/lisp/international/ja-dic-utl.el
index 30b7de18380..07d9e1ff760 100644
--- a/lisp/international/ja-dic-utl.el
+++ b/lisp/international/ja-dic-utl.el
@@ -222,4 +222,5 @@ LEIM is available from the same ftp directory as Emacs."))
;; coding: iso-2022-7bit
;; End:
+;;; arch-tag: df2218fa-469c-40f6-bace-7f89a053f9c0
;;; ja-dic-utl.el ends here
diff --git a/lisp/international/kinsoku.el b/lisp/international/kinsoku.el
index b551161a6f3..28d6409d46b 100644
--- a/lisp/international/kinsoku.el
+++ b/lisp/international/kinsoku.el
@@ -121,11 +121,17 @@ The value 0 means there's no limitation.")
;; Try to resolve `kinsoku' restriction by making the current line longer.
(defun kinsoku-longer ()
- (let ((pos-and-column (save-excursion
- (forward-char 1)
- (while (aref (char-category-set (following-char)) ?>)
- (forward-char 1))
- (cons (point) (current-column)))))
+ (let ((pos-and-column
+ (save-excursion
+ (forward-char 1)
+ (while (and (not (eobp))
+ (or (aref (char-category-set (following-char)) ?>)
+ ;; protect non-kinsoku words
+ (not (or (eq (preceding-char) ? )
+ (aref (char-category-set (preceding-char))
+ ?|)))))
+ (forward-char 1))
+ (cons (point) (current-column)))))
(if (or (<= kinsoku-limit 0)
(< (cdr pos-and-column) (+ (current-fill-column) kinsoku-limit)))
(goto-char (car pos-and-column)))))
@@ -135,9 +141,14 @@ The value 0 means there's no limitation.")
(defun kinsoku-shorter (linebeg)
(let ((pos (save-excursion
(forward-char -1)
- (while (and (< linebeg (point))
- (or (aref (char-category-set (preceding-char)) ?<)
- (aref (char-category-set (following-char)) ?>)))
+ (while (and
+ (< linebeg (point))
+ (or (aref (char-category-set (preceding-char)) ?<)
+ (aref (char-category-set (following-char)) ?>)
+ ;; protect non-kinsoku words
+ (not (or (eq (preceding-char) ? )
+ (aref (char-category-set (preceding-char))
+ ?|)))))
(forward-char -1))
(point))))
(if (< linebeg pos)
@@ -170,4 +181,5 @@ the context of text formatting."
(aref (char-category-set (preceding-char)) ?<))
(kinsoku-shorter linebeg))))
+;;; arch-tag: e6b036bc-9e5b-4e9f-a22c-4ed04e37777e
;;; kinsoku.el ends here
diff --git a/lisp/international/kkc.el b/lisp/international/kkc.el
index a25cab5f4d1..972bbbfdddf 100644
--- a/lisp/international/kkc.el
+++ b/lisp/international/kkc.el
@@ -656,4 +656,5 @@ and change the current conversion to the last one in the group."
;;
(provide 'kkc)
+;;; arch-tag: 3cbfd56e-74e6-4f60-bb46-ba7c2d366fbf
;;; kkc.el ends here
diff --git a/lisp/international/latin1-disp.el b/lisp/international/latin1-disp.el
index 1ab79c4f1ac..a0be6db3d2f 100644
--- a/lisp/international/latin1-disp.el
+++ b/lisp/international/latin1-disp.el
@@ -84,7 +84,7 @@ This option also treats some characters in the `mule-unicode-...'
charsets if you don't have a Unicode font with which to display them.
Setting this variable directly does not take effect;
-use either M-x customize of the function `latin1-display'."
+use either \\[customize] or the function `latin1-display'."
:group 'latin1-display
:type 'boolean
:require 'latin1-disp
@@ -106,7 +106,7 @@ a Unicode font with which to display them."
(if sets
(progn
(mapc #'latin1-display-setup sets)
- (unless (latin1-char-displayable-p
+ (unless (char-displayable-p
(make-char 'mule-unicode-0100-24ff 32 33))
;; It doesn't look as though we have a Unicode font.
(map-char-table
@@ -133,6 +133,7 @@ a Unicode font with which to display them."
(?\$,1rt(B "--") ;; EM DASH
(?\$,1ub(B "TM") ;; TRADE MARK SIGN
(?\$,1s:(B ">") ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ (?$,1s"(B ",A7(B")
)))
(setq latin1-display t))
(mapc #'latin1-display-reset latin1-display-sets)
@@ -220,47 +221,11 @@ character set: `latin-2', `hebrew' etc."
(setq language 'cyrillic-iso))
(let* ((info (get-language-info language 'charset))
(char (and info (make-char (car (remq 'ascii info)) ?\ ))))
- (and char (latin1-char-displayable-p char))))
+ (and char (char-displayable-p char))))
-;; This should be moved into mule-utils or somewhere after 21.1.
-(defun latin1-char-displayable-p (char)
- "Return non-nil if we should be able to display CHAR.
-On a multi-font display, the test is only whether there is an
-appropriate font from the selected frame's fontset to display CHAR's
-charset in general. Since fonts may be specified on a per-character
-basis, this may not be accurate."
- (cond ((< char 256)
- ;; Single byte characters are always displayable.
- t)
- ((display-multi-font-p)
- ;; On a window system, a character is displayable if we have
- ;; a font for that character in the default face of the
- ;; currently selected frame.
- (let ((fontset (frame-parameter (selected-frame) 'font))
- font-pattern)
- (if (query-fontset fontset)
- (setq font-pattern (fontset-font fontset char)))
- (or font-pattern
- (setq font-pattern (fontset-font "fontset-default" char)))
- (if font-pattern
- (progn
- ;; Now FONT-PATTERN is a string or a cons of family
- ;; field pattern and registry field pattern.
- (or (stringp font-pattern)
- (setq font-pattern (concat "-"
- (or (car font-pattern) "*")
- "-*-"
- (cdr font-pattern))))
- (x-list-fonts font-pattern 'default (selected-frame) 1)))))
- (t
- (let ((coding (terminal-coding-system)))
- (if coding
- (let ((safe-chars (coding-system-get coding 'safe-chars))
- (safe-charsets (coding-system-get coding 'safe-charsets)))
- (or (and safe-chars
- (aref safe-chars char))
- (and safe-charsets
- (memq (char-charset char) safe-charsets)))))))))
+;; Backwards compatibility.
+(defalias 'latin1-char-displayable-p 'char-displayable-p)
+(make-obsolete 'latin1-char-displayable-p 'char-displayable-p "21.5")
(defun latin1-display-setup (set &optional force)
"Set up Latin-1 display for characters in the given SET.
@@ -269,12 +234,11 @@ whether a font for SET is available and don't set the display if it
is. If FORCE is non-nil, set up the display regardless."
(cond
((eq set 'latin-2)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,BF(B "'C" "C'")
(?,BP(B "'D" "/D")
(?,B&(B "'S" "S'")
@@ -335,15 +299,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Bk(B "\"e")
(?,B=(B "''" "'")
(?,B7(B "'<") ; Lynx's rendering of caron
- ))))
+ )))
((eq set 'latin-3)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,C!(B "/H")
(?,C"(B "~`" "'(")
(?,C&(B "^H" "H^")
@@ -371,15 +334,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Cx(B "^g" "g^")
(?,C}(B "~u" "u(")
(?,C~(B "^s" "s^")
- (?,C(B "/." "^.")))))
+ (?,C(B "/." "^."))))
((eq set 'latin-4)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,D!(B "A," "A;")
(?,D"(B "k/" "kk")
(?,D#(B "R," ",R")
@@ -428,15 +390,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Dy(B "u," "u;")
(?,D}(B "u~" "~u")
(?,D~(B "u-")
- (?,D(B "^.")))))
+ (?,D(B "^."))))
((eq set 'latin-5)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,Mp(B "~g" "g(")
(?,MP(B "~G" "G(")
(?,M](B ".I" "I^.")
@@ -445,15 +406,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Mj(B "^e" "e<") ; from latin-post
(?,Ml(B ".e" "e^.")
(?,Mo(B "\"i" "i-") ; from latin-post
- (?,M}(B ".i" "i.")))))
+ (?,M}(B ".i" "i."))))
((eq set 'latin-8)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,_!(B ".B" "B`")
(?,_"(B ".b" "b`")
(?,_%(B ".c" "c`")
@@ -484,15 +444,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,_W(B ".T" "T`")
(?,_~(B "^y" "y^")
(?,_^(B "^Y" "Y^")
- (?,_/(B "\"Y")))))
+ (?,_/(B "\"Y"))))
((eq set 'latin-9)
- (when (or force
- (not (latin1-display-check-font set)))
- (latin1-display-identities set)
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (latin1-display-identities set)
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,b((B "~s" "s<")
(?,b&(B "~S" "S<")
(?,b$(B "Euro" "E=")
@@ -500,14 +459,13 @@ is. If FORCE is non-nil, set up the display regardless."
(?,b4(B "~Z" "Z<")
(?,b>(B "\"Y")
(?,b=(B "oe")
- (?,b<(B "OE")))))
+ (?,b<(B "OE"))))
((eq set 'greek)
- (when (or force
- (not (latin1-display-check-font set)))
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,F!(B "9'")
(?,F"(B "'9")
(?,F/(B "-M")
@@ -566,9 +524,10 @@ is. If FORCE is non-nil, set up the display regardless."
(?,F|(B "'o")
(?,F}(B "'u")
(?,F~(B "'w")))
- (mapc
- (lambda (l)
- (aset standard-display-table (car l) (string-to-vector (cadr l))))
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (aset standard-display-table (car l) (string-to-vector (cadr l)))))
'((?,FA(B "A")
(?,FB(B "B")
(?,FE(B "E")
@@ -583,23 +542,22 @@ is. If FORCE is non-nil, set up the display regardless."
(?,FT(B "T")
(?,FU(B "Y")
(?,FW(B "X")
- (?,Fo(B "o")))))
+ (?,Fo(B "o"))))
((eq set 'hebrew)
- (when (or force
- (not (latin1-display-check-font set)))
- ;; Don't start with identities, since we don't have definitions
- ;; for a lot of Hebrew in internal.el. (Intlfonts is also
- ;; missing some glyphs.)
- (let ((i 34))
- (while (<= i 62)
- (aset standard-display-table
- (make-char 'hebrew-iso8859-8 i)
- (vector (make-char 'latin-iso8859-1 i)))
- (setq i (1+ i))))
- (mapc
- (lambda (l)
- (aset standard-display-table (car l) (string-to-vector (cadr l))))
+ ;; Don't start with identities, since we don't have definitions
+ ;; for a lot of Hebrew in internal.el. (Intlfonts is also
+ ;; missing some glyphs.)
+ (let ((i 34))
+ (while (<= i 62)
+ (aset standard-display-table
+ (make-char 'hebrew-iso8859-8 i)
+ (vector (make-char 'latin-iso8859-1 i)))
+ (setq i (1+ i))))
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (aset standard-display-table (car l) (string-to-vector (cadr l)))))
'((?,H_(B "=2")
(?,H`(B "A+")
(?,Ha(B "B+")
@@ -627,19 +585,21 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Hw(B "Q+")
(?,Hx(B "R+")
(?,Hy(B "Sh")
- (?,Hz(B "T+")))))
+ (?,Hz(B "T+"))))
;; Arabic probably isn't so useful in the absence of Arabic
;; language support...
((eq set 'arabic)
(setq set 'arabic)
- (when (or force
- (not (latin1-display-check-font set)))
- (aset standard-display-table ?,G (B ",A (B")
- (aset standard-display-table ?,G$(B ",A$(B")
- (aset standard-display-table ?,G-(B ",A-(B")
- (mapc (lambda (l)
- (apply 'latin1-display-char l))
+ (or (char-displayable-p ?,G (B)
+ (aset standard-display-table ?,G (B ",A (B"))
+ (or (char-displayable-p ?,G$(B)
+ (aset standard-display-table ?,G$(B ",A$(B"))
+ (or (char-displayable-p ?,G-(B)
+ (aset standard-display-table ?,G-(B ",A-(B"))
+ (mapc (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,G,(B ",+")
(?,G;(B ";+")
(?,G?(B "?+")
@@ -687,15 +647,14 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Go(B "'+")
(?,Gp(B "1+")
(?,Gq(B "3+")
- (?,Gr(B "0+")))))
+ (?,Gr(B "0+"))))
((eq set 'cyrillic)
(setq set 'cyrillic-iso)
- (when (or force
- (not (latin1-display-check-font set)))
- (mapc
- (lambda (l)
- (apply 'latin1-display-char l))
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (apply 'latin1-display-char l)))
'((?,L"(B "Dj")
(?,L#(B "Gj")
(?,L$(B "IE")
@@ -762,9 +721,10 @@ is. If FORCE is non-nil, set up the display regardless."
(?,L|(B "kj")
(?,L~(B "v%")
(?,L(B "dzh")))
- (mapc
- (lambda (l)
- (aset standard-display-table (car l) (string-to-vector (cadr l))))
+ (mapc
+ (lambda (l)
+ (or (char-displayable-p (car l))
+ (aset standard-display-table (car l) (string-to-vector (cadr l)))))
'((?,L!(B ",AK(B")
(?,L%(B "S")
(?,L&(B "I")
@@ -793,7 +753,7 @@ is. If FORCE is non-nil, set up the display regardless."
(?,Lu(B "s")
(?,Lv(B "i")
(?,Lw(B ",Ao(B")
- (?,Lx(B "j")))))
+ (?,Lx(B "j"))))
(t (error "Unsupported character set: %S" set)))
@@ -802,11 +762,11 @@ is. If FORCE is non-nil, set up the display regardless."
;;;###autoload
(defcustom latin1-display-ucs-per-lynx nil
"Set up Latin-1/ASCII display for Unicode characters.
-This uses the transliterations of the Lynx browser. The display is't
+This uses the transliterations of the Lynx browser. The display isn't
changed if the display can render Unicode characters.
Setting this variable directly does not take effect;
-use either M-x customize of the function `latin1-display'."
+use either \\[customize] or the function `latin1-display'."
:group 'latin1-display
:type 'boolean
:require 'latin1-disp
@@ -825,7 +785,7 @@ turn it off and display Unicode characters literally. The display
is't changed if the display can render Unicode characters."
(interactive "p")
(if (> arg 0)
- (unless (latin1-char-displayable-p
+ (unless (char-displayable-p
(make-char 'mule-unicode-0100-24ff 32 33))
;; It doesn't look as though we have a Unicode font.
(let ((latin1-display-format "%s"))
@@ -3244,8 +3204,8 @@ is't changed if the display can render Unicode characters."
(?\$,3sc(B "\"")
(?\$,3sd(B ",")
;; Not from Lynx
- (?$,3r_(B . "")
- (?$,3u=(B . "?")))))
+ (?$,3r_(B "")
+ (?$,3u=(B "?")))))
(aset standard-display-table
(make-char 'mule-unicode-0100-24ff) nil)
(aset standard-display-table
@@ -3257,4 +3217,5 @@ is't changed if the display can render Unicode characters."
(provide 'latin1-disp)
+;;; arch-tag: 68b2872e-d667-4f48-8e2f-ec2ba2d29406
;;; latin1-disp.el ends here
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 3178ec3b2b0..cd60e266b45 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -1,4 +1,4 @@
-;;; mule-cmds.el --- commands for mulitilingual environment
+;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: iso-2022-7bit -*-
;; Copyright (C) 1995, 2003 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.
;; Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
@@ -293,7 +293,7 @@ wrong, use this command again to toggle back to the right mode."
(not (eq cmd 'universal-argument-other-key)))
(let ((current-prefix-arg prefix-arg)
;; Have to bind `last-command-char' here so that
- ;; `digit-argument', for isntance, can compute the
+ ;; `digit-argument', for instance, can compute the
;; prefix arg.
(last-command-char (aref keyseq 0)))
(call-interactively cmd)))
@@ -326,6 +326,11 @@ This also sets the following values:
o default value for the command `set-keyboard-coding-system'."
(check-coding-system coding-system)
(setq-default buffer-file-coding-system coding-system)
+ (if (fboundp 'ucs-set-table-for-input)
+ (dolist (buffer (buffer-list))
+ (or (local-variable-p 'buffer-file-coding-system buffer)
+ (ucs-set-table-for-input buffer))))
+
(if default-enable-multibyte-characters
(setq default-file-name-coding-system coding-system))
;; If coding-system is nil, honor that on MS-DOS as well, so
@@ -870,7 +875,7 @@ one of the following safe coding systems, or edit the buffer:\n")
(insert "\n")
(fill-region-as-paragraph pos (point)))
(insert "Or specify any other coding system
-on your risk of losing the problematic characters.\n")))
+at the risk of losing the problematic characters.\n")))
;; Read a coding system.
(setq default-coding-system (or (car safe) (car codings)))
@@ -902,13 +907,33 @@ on your risk of losing the problematic characters.\n")))
(goto-char (point-min))
(set-auto-coding (or file buffer-file-name "")
(buffer-size))))))
- (if (and auto-cs coding-system
+ ;; Merge coding-system and auto-cs as far as possible.
+ (if (not coding-system)
+ (setq coding-system auto-cs)
+ (if (not auto-cs)
+ (setq auto-cs coding-system)
+ (let ((eol-type-1 (coding-system-eol-type coding-system))
+ (eol-type-2 (coding-system-eol-type auto-cs)))
+ (if (eq (coding-system-base coding-system) 'undecided)
+ (setq coding-system (coding-system-change-text-conversion
+ coding-system auto-cs))
+ (if (eq (coding-system-base auto-cs) 'undecided)
+ (setq auto-cs (coding-system-change-text-conversion
+ auto-cs coding-system))))
+ (if (vectorp eol-type-1)
+ (or (vectorp eol-type-2)
+ (setq coding-system (coding-system-change-eol-conversion
+ coding-system eol-type-2)))
+ (if (vectorp eol-type-2)
+ (setq auto-cs (coding-system-change-eol-conversion
+ auto-cs eol-type-1)))))))
+
+ (if (and auto-cs
;; Don't barf if writing a compressed file, say.
;; This check perhaps isn't ideal, but is probably
;; the best thing to do.
(not (auto-coding-alist-lookup (or file buffer-file-name "")))
- (not (coding-system-equal (coding-system-base coding-system)
- (coding-system-base auto-cs))))
+ (not (coding-system-equal coding-system auto-cs)))
(unless (yes-or-no-p
(format "Selected encoding %s disagrees with \
%s specified by file contents. Really save (else edit coding cookies \
@@ -981,6 +1006,12 @@ Meaningful values for KEY include
environment.
features value is a list of features requested in this
language environment.
+ ctext-non-standard-encodings
+ value is a list of non-standard encoding
+ names used in extended segments of CTEXT.
+ See the variable
+ `ctext-non-standard-encodings' for more
+ detail.
The following keys take effect only when multibyte characters are
globally disabled, i.e. the value of `default-enable-multibyte-characters'
@@ -1720,6 +1751,15 @@ specifies the character set for the major languages of Western Europe."
(while required-features
(require (car required-features))
(setq required-features (cdr required-features))))
+
+ ;; Don't invoke fontset-related functions if fontsets aren't
+ ;; supported in this build of Emacs.
+ (when (fboundp 'fontset-list)
+ (let ((overriding-fontspec (get-language-info language-name
+ 'overriding-fontspec)))
+ (if overriding-fontspec
+ (set-overriding-fontspec-internal overriding-fontspec))))
+
(let ((func (get-language-info language-name 'setup-function)))
(if (functionp func)
(funcall func)))
@@ -1801,7 +1841,7 @@ Setting this variable directly does not take effect. See
(aset standard-display-table 160 [32])
;; With luck, non-Latin-1 fonts are more recent and so don't
;; have this bug.
- (aset standard-display-table 2208 [32]) ; Latin-1 NBSP
+ (aset standard-display-table (make-char 'latin-iso8859-1 160) [32])
;; Most Windows programs send out apostrophes as \222. Most X fonts
;; don't contain a character at that position. Map it to the ASCII
;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
@@ -1809,7 +1849,23 @@ Setting this variable directly does not take effect. See
;; fonts probably have the appropriate glyph at this position,
;; so they could use standard-display-8bit. It's better to use a
;; proper windows-1252 coding system. --fx]
- (aset standard-display-table 146 [39]))))
+ (aset standard-display-table 146 [39])
+ ;; XFree86 4 has changed most of the fonts from their designed
+ ;; versions such that `' no longer appears as balanced quotes.
+ ;; Assume it has iso10646 fonts installed, so we can display
+ ;; balanced quotes.
+ (when (and (eq window-system 'x)
+ (string= "The XFree86 Project, Inc" (x-server-vendor))
+ (> (aref (number-to-string (nth 2 (x-server-version))) 0)
+ ?3))
+ ;; We suppress these setting for the moment because the
+ ;; above assumption is wrong.
+ ;; (aset standard-display-table ?' [?,F"(B])
+ ;; (aset standard-display-table ?` [?,F!(B])
+ ;; The fonts don't have the relevant bug.
+ (aset standard-display-table 160 nil)
+ (aset standard-display-table (make-char 'latin-iso8859-1 160)
+ nil)))))
(defun set-language-environment-coding-systems (language-name
&optional eol-type)
@@ -1834,7 +1890,7 @@ of `buffer-file-coding-system' set by this function."
(put 'describe-specified-language-support 'apropos-inhibit t)
-;; Print a language specific information such as input methods,
+;; Print language-specific information such as input methods,
;; charsets, and coding systems. This function is intended to be
;; called from the menu:
;; [menu-bar mule describe-language-environment LANGUAGE]
@@ -1958,6 +2014,13 @@ of `buffer-file-coding-system' set by this function."
;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
;; CODESET and MODIFIER are implementation-dependent.
+ ;; jasonr comments: MS Windows uses three letter codes for
+ ;; languages instead of the two letter ISO codes that POSIX
+ ;; uses. In most cases the first two letters are the same, so
+ ;; most of the regexps in locale-language-names work. Japanese
+ ;; and Chinese are exceptions, which are listed in the
+ ;; non-standard section at the bottom of locale-language-names.
+
; aa Afar
; ab Abkhazian
("af" . "Latin-1") ; Afrikaans
@@ -2151,14 +2214,13 @@ If the language name is nil, there is no corresponding language environment.")
(".*8859[-_]?9\\>" . "Latin-5")
(".*8859[-_]?14\\>" . "Latin-8")
(".*8859[-_]?15\\>" . "Latin-9")
- (".*utf\\(-?8\\)\\>" . "UTF-8")
- ;; @euro actually indicates the monetary component, but it
- ;; probably implies a Latin-9 codeset component.
- ;; utf-8@euro exists, so put this last.
+ (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
+ ;; utf-8@euro exists, so put this last. (@euro really specifies
+ ;; the currency, rather than the charset.)
(".*@euro\\>" . "Latin-9")))
"List of pairs of locale regexps and charset language names.
The first element whose locale regexp matches the start of a downcased locale
-specifies the language name whose charsets corresponds to that locale.
+specifies the language name whose charset corresponds to that locale.
This language name is used if its charsets disagree with the charsets of
the language name that would otherwise be used for this locale.")
@@ -2193,13 +2255,39 @@ names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
(setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
(eq t (compare-strings charset1 nil nil charset2 nil nil t)))
+(defvar locale-charset-alist nil
+ "Coding system alist keyed on locale-style charset name.
+Used by `locale-charset-to-coding-system'.")
+
+(defun locale-charset-to-coding-system (charset)
+ "Find coding system corresponding to CHARSET.
+CHARSET is any sort of non-Emacs charset name, such as might be used
+in a locale codeset, or elsewhere. It is matched to a coding system
+first by case-insensitive lookup in `locale-charset-alist'. Then
+matches are looked for in the coding system list, treating case and
+the characters `-' and `_' as insignificant. The coding system base
+is returned. Thus, for instance, if charset \"ISO8859-2\",
+`iso-latin-2' is returned."
+ (or (car (assoc-ignore-case charset locale-charset-alist))
+ (let ((cs coding-system-alist)
+ c)
+ (while (and (not c) cs)
+ (if (locale-charset-match-p charset (caar cs))
+ (setq c (intern (caar cs)))
+ (pop cs)))
+ (if c (coding-system-base c)))))
+
+;; Fixme: This ought to deal with the territory part of the locale
+;; too, for setting things such as calendar holidays, ps-print paper
+;; size, spelling dictionary.
+
(defun set-locale-environment (&optional locale-name)
"Set up multi-lingual environment for using LOCALE-NAME.
This sets the language environment, the coding system priority,
the default input method and sometimes other things.
LOCALE-NAME should be a string which is the name of a locale supported
-by the system; often it is of the form xx_XX.CODE, where xx is a
+by the system. Often it is of the form xx_XX.CODE, where xx is a
language, XX is a country, and CODE specifies a character set and
coding system. For example, the locale name \"ja_JP.EUC\" might name
a locale for Japanese in Japan using the `japanese-iso-8bit'
@@ -2223,7 +2311,7 @@ See also `locale-charset-language-names', `locale-language-names',
(setq locale-translation-file-name
(let ((files
'("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
- "/usr/X11R6/lib/X11/locale/locale.alias" ; e.g. RedHat 4.2
+ "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
"/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
;;
;; The following name appears after the X-related names above,
@@ -2271,7 +2359,11 @@ See also `locale-charset-language-names', `locale-language-names',
(charset-language-name
(locale-name-match locale locale-charset-language-names))
(coding-system
- (locale-name-match locale locale-preferred-coding-systems)))
+ (or (locale-name-match locale locale-preferred-coding-systems)
+ (when locale
+ (if (string-match "\\.\\([^@]+\\)" locale)
+ (locale-charset-to-coding-system
+ (match-string 1 locale)))))))
;; Give preference to charset-language-name over language-name.
(if (and charset-language-name
@@ -2393,7 +2485,7 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(if (and coding-system (eq (coding-system-type coding-system) 'iso-2022))
;; Try to get a pretty description for ISO 2022 escape sequences.
(function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
- (format "%02X" x))))
+ (format "0x%02X" x))))
(function (lambda (x) (format "0x%02X" x))))
str " "))
@@ -2439,4 +2531,5 @@ If CODING-SYSTEM can't safely encode CHAR, return nil."
(defvar nonascii-translation-table nil "This variable is obsolete.")
+;;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
;;; mule-cmds.el ends here
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el
index 12e3231bf46..2e516c08aac 100644
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -1387,10 +1387,17 @@ Like `compound-text', but does not produce escape sequences for compositions."
(define-coding-system 'compound-text-with-extensions
"Compound text encoding with ICCCM Extended Segment extensions.
+See the variable `ctext-non-standard-encodings-alist' for the
+detail about how extended segments are handled.
+
This coding system should be used only for X selections. It is inappropriate
for decoding and encoding files, process I/O, etc."
- :coding-type 'raw-text
+ :coding-type 'iso-2022
:mnemonic ?x
+ :charset-list 'iso-2022
+ :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
+ :flags '(ascii-at-eol ascii-at-cntl
+ designation locking-shift single-shift)
:post-read-conversion 'ctext-post-read-conversion
:pre-write-conversion 'ctext-pre-write-conversion)
@@ -1491,4 +1498,5 @@ for decoding and encoding files, process I/O, etc."
;; no-byte-compile: t
;; End:
+;;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
;;; mule-conf.el ends here
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index 0ba9bf5360d..c3ea76c8716 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -2,12 +2,11 @@
;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.
-;; Copyright (C) 2002 Free Software Foundation, Inc.
+;; Copyright (C) 2000, 2002 Free Software Foundation, Inc.
;; Copyright (C) 2003
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H13PRO009
-
;; Keywords: mule, multilingual
;; This file is part of GNU Emacs.
@@ -186,18 +185,18 @@ defaults to \"...\"."
;; (("foobarbaz" 6 nil nil "...") . "foo...")
;; (("foobarbaz" 7 2 nil "...") . "ob...")
;; (("foobarbaz" 9 3 nil "...") . "barbaz")
-;; (("$B$3(Bh$B$s(Be$B$K(Bl$B$A(Bl$B$O(Bo" 15 1 ? t) . " h$B$s(Be$B$K(Bl$B$A(Bl$B$O(Bo")
-;; (("$B$3(Bh$B$s(Be$B$K(Bl$B$A(Bl$B$O(Bo" 14 1 ? t) . " h$B$s(Be$B$K(Bl$B$A(B...")
-;; (("x" 3 nil nil "$(0GnM$(B") . "x")
-;; (("$AVP(B" 2 nil nil "$(0GnM$(B") . "$AVP(B")
-;; (("$AVP(B" 1 nil ?x "$(0GnM$(B") . "x") ;; XEmacs error
-;; (("$AVPND(B" 3 nil ? "$(0GnM$(B") . "$AVP(B ") ;; XEmacs error
-;; (("foobarbaz" 4 nil nil "$(0GnM$(B") . "$(0GnM$(B")
-;; (("foobarbaz" 5 nil nil "$(0GnM$(B") . "f$(0GnM$(B")
-;; (("foobarbaz" 6 nil nil "$(0GnM$(B") . "fo$(0GnM$(B")
-;; (("foobarbaz" 8 3 nil "$(0GnM$(B") . "b$(0GnM$(B")
-;; (("$B$3(Bh$B$s(Be$B$K(Bl$B$A(Bl$B$O(Bo" 14 4 ?x "$BF|K\8l(B") . "xe$B$KF|K\8l(B")
-;; (("$B$3(Bh$B$s(Be$B$K(Bl$B$A(Bl$B$O(Bo" 13 4 ?x "$BF|K\8l(B") . "xex$BF|K\8l(B")
+;; (("$A$3(Bh$A$s(Be$A$K(Bl$A$A(Bl$A$O(Bo" 15 1 ? t) . " h$A$s(Be$A$K(Bl$A$A(Bl$A$O(Bo")
+;; (("$A$3(Bh$A$s(Be$A$K(Bl$A$A(Bl$A$O(Bo" 14 1 ? t) . " h$A$s(Be$A$K(Bl$A$A(B...")
+;; (("x" 3 nil nil "$(Gemk#(B") . "x")
+;; (("$AVP(B" 2 nil nil "$(Gemk#(B") . "$AVP(B")
+;; (("$AVP(B" 1 nil ?x "$(Gemk#(B") . "x") ;; XEmacs error
+;; (("$AVPND(B" 3 nil ? "$(Gemk#(B") . "$AVP(B ") ;; XEmacs error
+;; (("foobarbaz" 4 nil nil "$(Gemk#(B") . "$(Gemk#(B")
+;; (("foobarbaz" 5 nil nil "$(Gemk#(B") . "f$(Gemk#(B")
+;; (("foobarbaz" 6 nil nil "$(Gemk#(B") . "fo$(Gemk#(B")
+;; (("foobarbaz" 8 3 nil "$(Gemk#(B") . "b$(Gemk#(B")
+;; (("$A$3(Bh$A$s(Be$A$K(Bl$A$A(Bl$A$O(Bo" 14 4 ?x "$AHU1>$(Gk#(B") . "xe$A$KHU1>$(Gk#(B")
+;; (("$A$3(Bh$A$s(Be$A$K(Bl$A$A(Bl$A$O(Bo" 13 4 ?x "$AHU1>$(Gk#(B") . "xex$AHU1>$(Gk#(B")
;; ))
;; (let (ret)
;; (condition-case e
@@ -366,6 +365,50 @@ language environment LANG-ENV."
(with-coding-priority coding-priority
(detect-coding-region from to)))))
+;;;###autoload
+(defun char-displayable-p (char)
+ "Return non-nil if we should be able to display CHAR.
+On a multi-font display, the test is only whether there is an
+appropriate font from the selected frame's fontset to display CHAR's
+charset in general. Since fonts may be specified on a per-character
+basis, this may not be accurate."
+ (cond ((< char 256)
+ ;; Single byte characters are always displayable.
+ t)
+ ((display-multi-font-p)
+ ;; On a window system, a character is displayable if we have
+ ;; a font for that character in the default face of the
+ ;; currently selected frame.
+ (let ((fontset (frame-parameter (selected-frame) 'font))
+ font-pattern)
+ (if (query-fontset fontset)
+ (setq font-pattern (fontset-font fontset char)))
+ (or font-pattern
+ (setq font-pattern (fontset-font "fontset-default" char)))
+ (if font-pattern
+ (progn
+ ;; Now FONT-PATTERN is a string or a cons of family
+ ;; field pattern and registry field pattern.
+ (or (stringp font-pattern)
+ (let ((family (or (car font-pattern) "*"))
+ (registry (or (cdr font-pattern) "*")))
+ (or (string-match "-" family)
+ (setq family (concat "*-" family)))
+ (or (string-match "-" registry)
+ (setq registry (concat registry "-*")))
+ (setq font-pattern
+ (format "-%s-*-*-*-*-*-*-*-*-*-*-%s"
+ family registry))))
+ (x-list-fonts font-pattern 'default (selected-frame) 1)))))
+ (t
+ (let ((coding (terminal-coding-system)))
+ (if coding
+ (let ((safe-chars (coding-system-get coding 'safe-chars))
+ (safe-charsets (coding-system-get coding 'safe-charsets)))
+ (or (and safe-chars
+ (aref safe-chars char))
+ (and safe-charsets
+ (memq (char-charset char) safe-charsets)))))))))
(provide 'mule-util)
@@ -373,4 +416,5 @@ language environment LANG-ENV."
;; coding: iso-2022-7bit
;; End:
+;;; arch-tag: 5bdb52b6-a3a5-4529-b7a0-37d01b0e570b
;;; mule-util.el ends here
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index d2cc9c0f195..8b2702bc30a 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1117,7 +1117,7 @@ If CODING-SYSTEM leaves the text conversion unspecified, or if it
leaves the end-of-line conversion unspecified, FORCE controls what to
do. If FORCE is nil, get the unspecified aspect (or aspects) from the
buffer's previous `buffer-file-coding-system' value (if it is
-specified there). Otherwise, levae it unspecified.
+specified there). Otherwise, leave it unspecified.
This marks the buffer modified so that the succeeding \\[save-buffer]
surely saves the buffer with CODING-SYSTEM. From a program, if you
@@ -1229,7 +1229,7 @@ See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
On non-windowing terminals, this is set from the locale by default.
Setting this variable directly does not take effect;
-use either M-x customize or \\[set-keyboard-coding-system]."
+use either \\[customize] or \\[set-keyboard-coding-system]."
:type '(coding-system :tag "Coding system")
:link '(info-link "(emacs)Specify Coding")
:link '(info-link "(emacs)Single-Byte Character Support")
@@ -1302,12 +1302,42 @@ Now we have more convenient function `set-coding-system-priority'."
;;; X selections
(defvar ctext-non-standard-encodings-alist
- '(("ISO8859-15" . latin-iso8859-15)
- ("ISO8859-14" . latin-iso8859-14)
- ("KOI8-R" . koi8-r)
- ("BIG5-0" . big5))
- "Alist of non-standard encoding names vs Emacs coding systems.
-This alist is used to decode an extened segment of a compound text.")
+ '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
+ ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
+ ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
+ "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
+
+It controls how extended segments of a compound text are handled
+by the coding system `compound-text-with-extensions'.
+
+Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
+
+ENCODING-NAME is an encoding name of an \"extended segments\".
+
+CODING-SYSTEM is the coding-system to encode (or decode) the
+characters into (or from) the extended segment.
+
+N-OCTET is the number of octets (bytes) that encodes a character
+in the segment. It can be 0 (meaning the number of octets per
+character is variable), 1, 2, 3, or 4.
+
+CHARSET is a charater set containing characters that are encoded
+in the segment. It can be a list of character sets. It can also
+be a char-table, in which case characters that have non-nil value
+in the char-table are the target.
+
+On decoding CTEXT, all encoding names listed here are recognized.
+
+On encoding CTEXT, encoding names in the variable
+`ctext-non-standard-encodings' (which see) and in the information
+listed for the current language environment under the key
+`ctext-non-standard-encodings' are used.")
+
+(defvar ctext-non-standard-encodings
+ '("big5-0")
+ "List of non-standard encoding names used in extended segments of CTEXT.
+Each element must be one of the names listed in the variable
+`ctext-non-standard-encodings-alist' (which see).")
(defvar ctext-non-standard-encodings-regexp
(string-to-multibyte
@@ -1319,13 +1349,9 @@ This alist is used to decode an extened segment of a compound text.")
"\\(\e%G[^\e]*\e%@\\)")))
;; Functions to support "Non-Standard Character Set Encodings" defined
-;; by the COMPOUND-TEXT spec.
-;; We support that by decoding the whole data by `ctext' which just
-;; pertains byte sequences belonging to ``extended segment'', then
-;; decoding those byte sequences one by one in Lisp.
-;; This function also supports "The UTF-8 encoding" described in the
-;; section 7 of the documentation fo COMPOUND-TEXT distributed with
-;; XFree86.
+;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding"
+;; described in the section 7 of the documentation of COMPOUND-TEXT
+;; distributed with XFree86.
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
@@ -1348,14 +1374,14 @@ This alist is used to decode an extened segment of a compound text.")
(let* ((M (char-after (+ pos 4)))
(L (char-after (+ pos 5)))
(encoding (match-string 2))
- (coding (or (cdr (assoc-ignore-case
- encoding
- ctext-non-standard-encodings-alist))
- (coding-system-p
- (intern (downcase encoding))))))
- (if enable-multibyte-characters
- (setq M (multibyte-char-to-unibyte M)
- L (multibyte-char-to-unibyte L)))
+ (encoding-info (assoc-ignore-case
+ encoding
+ ctext-non-standard-encodings-alist))
+ (coding (if encoding-info
+ (nth 1 encoding-info)
+ (setq encoding (intern (downcase encoding)))
+ (and (coding-system-p encoding)
+ encoding))))
(setq bytes (- (+ (* (- M 128) 128) (- L 128))
(- (point) (+ pos 6))))
(when coding
@@ -1363,40 +1389,39 @@ This alist is used to decode an extened segment of a compound text.")
(forward-char bytes)
(decode-coding-region (- (point) bytes) (point) coding)))
;; ESC % G --UTF-8-BYTES-- ESC % @
- (setq bytes (- (point) pos))
- (decode-coding-region (- (point) bytes) (point) 'utf-8))))
+ (delete-char -3)
+ (delete-region pos (+ pos 3))
+ (decode-coding-region pos (point) 'utf-8))))
(goto-char (point-min))
(- (point-max) (point)))))
-;; If you add charsets here, be sure to modify the regexp used by
-;; ctext-pre-write-conversion to look up non-standard charsets.
-(defvar ctext-non-standard-designations-alist
- '(("$(0" . (big5 "big5-0" 2))
- ("$(1" . (big5 "big5-0" 2))
- ;; The following are actually standard; generating extended
- ;; segments for them is wrong and screws e.g. Latin-9 users.
- ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
-;; ("-V" . (t "iso8859-10" 1))
-;; ("-Y" . (t "iso8859-13" 1))
-;; ("-_" . (t "iso8859-14" 1))
-;; ("-b" . (t "iso8859-15" 1))
-;; ("-f" . (t "iso8859-16" 1))
- )
- "Alist of ctext control sequences that introduce character sets which
-are not in the list of approved encodings, and the corresponding
-coding system, identifier string, and number of octets per encoded
-character.
-
-Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
-is the control sequence (sans the leading ESC) that introduces the character
-set in the text encoded by compound-text. ENCODING is a coding system
-symbol; if it is t, it means that the ctext coding system already encodes
-the text correctly, and only the leading control sequence needs to be altered.
-If ENCODING is a coding system, we need to re-encode the text with that
-coding system. CHARSET is the name of the charset we need to put into
-the leading control sequence. NOCTETS is the number of octets (bytes) that
-encode each character in this charset. NOCTETS can be 0 (meaning the number
-of octets per character is variable), 1, 2, 3, or 4.")
+;; Return a char table of extended segment usage for each character.
+;; Each value of the char table is nil, one of the elements of
+;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
+
+(defun ctext-non-standard-encodings-table ()
+ (let ((table (make-char-table 'translation-table)))
+ (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
+ (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
+ (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
+ (dolist (encoding (reverse
+ (append
+ (get-language-info current-language-environment
+ 'ctext-non-standard-encodings)
+ ctext-non-standard-encodings)))
+ (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+ (charset (nth 3 slot)))
+ (if charset
+ (cond ((charsetp charset)
+ (aset table (make-char charset) slot))
+ ((listp charset)
+ (dolist (elt charset)
+ (aset table (make-char elt) slot)))
+ ((char-table-p charset)
+ (map-char-table #'(lambda (k v)
+ (if (and v (> k 128)) (aset table k slot)))
+ charset))))))
+ table))
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
@@ -1412,47 +1437,56 @@ text, and convert it in the temporary buffer. Otherwise, convert in-place."
(insert from))
;; Now we can encode the whole buffer.
- (let ((case-fold-search nil)
+ (let ((encoding-table (ctext-non-standard-encodings-table))
last-coding-system-used
- pos posend desig encode-info encoding chset noctets textlen)
- (goto-char (point-min))
- ;; At first encode the whole buffer.
- (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
- ;; Then replace ISO-2022 charset designations with extended
- ;; segments, for those charsets that are not part of the
- ;; official X registry. The regexp below finds the leading
- ;; sequences for big5.
- (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
- (setq pos (match-beginning 0)
- posend (point)
- desig (match-string 1)
- encode-info (cdr (assoc desig
- ctext-non-standard-designations-alist))
- encoding (car encode-info)
- chset (cadr encode-info)
- noctets (car (cddr encode-info)))
- (skip-chars-forward "^\e")
- (cond
- ((eq encoding t) ; only the leading sequence needs to be changed
- (setq textlen (+ (- (point) posend) (length chset) 1))
- ;; Generate the control sequence for an extended segment.
- (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s"
- noctets
- (+ (/ textlen 128) 128)
- (+ (% textlen 128) 128)
- chset))
- t t))
- ((coding-system-p encoding) ; need to recode the entire segment...
- (decode-coding-region pos (point) 'ctext-no-compositions)
- (encode-coding-region pos (point) encoding)
- (setq textlen (+ (- (point) pos) (length chset) 1))
- (save-excursion
- (goto-char pos)
- (insert (string-to-multibyte (format "\e%%/%d%c%c%s"
- noctets
- (+ (/ textlen 128) 128)
- (+ (% textlen 128) 128)
- chset)))))))
+ last-pos last-encoding-info
+ encoding-info end-pos)
+ (goto-char (setq last-pos (point-min)))
+ (setq end-pos (point-marker))
+ (while (re-search-forward "[^\000-\177]+" nil t)
+ ;; Found a sequence of non-ASCII characters.
+ (setq last-pos (match-beginning 0)
+ last-encoding-info (aref encoding-table (char-after last-pos)))
+ (set-marker end-pos (match-end 0))
+ (goto-char (1+ last-pos))
+ (catch 'tag
+ (while t
+ (setq encoding-info
+ (if (< (point) end-pos)
+ (aref encoding-table (following-char))))
+ (unless (eq last-encoding-info encoding-info)
+ (cond ((consp last-encoding-info)
+ ;; Encode the previous range using an extended
+ ;; segment.
+ (let ((encoding-name (car last-encoding-info))
+ (coding-system (nth 1 last-encoding-info))
+ (noctets (nth 2 last-encoding-info))
+ len)
+ (encode-coding-region last-pos (point) coding-system)
+ (setq len (+ (length encoding-name) 1
+ (- (point) last-pos)))
+ (save-excursion
+ (goto-char last-pos)
+ (insert (string-to-multibyte
+ (format "\e%%/%d%c%c%s"
+ noctets
+ (+ (/ len 128) 128)
+ (+ (% len 128) 128)
+ encoding-name))))))
+ ((eq last-encoding-info 'utf-8)
+ ;; Encode the previous range using UTF-8 encoding
+ ;; extention.
+ (encode-coding-region last-pos (point) 'mule-utf-8)
+ (save-excursion
+ (goto-char last-pos)
+ (insert "\e%G"))
+ (insert "\e%@")))
+ (setq last-pos (point)
+ last-encoding-info encoding-info))
+ (if (< (point) end-pos)
+ (forward-char 1)
+ (throw 'tag nil)))))
+ (set-marker end-pos nil)
(goto-char (point-min))))
;; Must return nil, as build_annotations_2 expects that.
nil)
@@ -1596,31 +1630,36 @@ function by default."
(setq coding-system nil)))))
;; If no coding: tag in the head, check the tail.
+ ;; Here we must pay attention to the case that the end-of-line
+ ;; is just "\r" and we can't use "^" nor "$" in regexp.
(when (and tail-found (not coding-system))
(goto-char tail-start)
- (search-forward "\n\^L" nil t)
+ (re-search-forward "[\r\n]\^L" nil t)
(if (re-search-forward
- "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
- ;; The prefix is what comes before "local variables:" in its
- ;; line. The suffix is what comes after "local variables:"
+ "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
+ tail-end t)
+ ;; The prefix is what comes before "local variables:" in its
+ ;; line. The suffix is what comes after "local variables:"
;; in its line.
(let* ((prefix (regexp-quote (match-string 1)))
(suffix (regexp-quote (match-string 2)))
(re-coding
(concat
- "^" prefix
+ "[\r\n]" prefix
;; N.B. without the \n below, the regexp can
;; eat newlines.
- "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
- suffix "$"))
+ "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+ suffix "[\r\n]"))
(re-unibyte
(concat
- "^" prefix
- "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
- suffix "$"))
+ "[\r\n]" prefix
+ "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+ suffix "[\r\n]"))
(re-end
- (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
- (pos (point)))
+ (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
+ "[\r\n]?"))
+ (pos (1- (point))))
+ (forward-char -1) ; skip back \r or \n.
(re-search-forward re-end tail-end 'move)
(setq tail-end (point))
(goto-char pos)
@@ -1788,9 +1827,9 @@ of the function `insert-file-contents'."
(setq coding (funcall set-auto-coding-function
filename (- (point-max) (point-min)))))
(or coding
- (setq coding (find-operation-coding-system
- 'insert-file-contents
- filename visit beg end replace)))
+ (setq coding (car (find-operation-coding-system
+ 'insert-file-contents
+ filename visit beg end replace))))
(if (coding-system-p coding)
(or enable-multibyte-characters
(setq coding
@@ -2062,4 +2101,5 @@ This function is intended to be added to `auto-coding-functions'."
;;;
(provide 'mule)
+;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
;;; mule.el ends here
diff --git a/lisp/international/ogonek.el b/lisp/international/ogonek.el
index 05dc2bd520b..ebe2eaec900 100644
--- a/lisp/international/ogonek.el
+++ b/lisp/international/ogonek.el
@@ -500,4 +500,5 @@ followed by a non-Polish character, that is one not listed in the
(provide 'ogonek)
+;;; arch-tag: 672d7744-28ac-412b-965e-06a27e50d1d7
;;; ogonek.el ends here
diff --git a/lisp/international/quail.el b/lisp/international/quail.el
index 60c8884be38..1415648be3b 100644
--- a/lisp/international/quail.el
+++ b/lisp/international/quail.el
@@ -2097,7 +2097,7 @@ are shown (at most to the depth specified `quail-completion-max-depth')."
(scroll-other-window)))
(setq quail-current-key key)
(erase-buffer)
- (insert "Possible completion and corresponding translations are:\n")
+ (insert "Possible completion and corresponding characters are:\n")
(quail-completion-1 key map 1)
(goto-char (point-min))
(display-buffer (current-buffer))
@@ -2277,8 +2277,8 @@ should be made by `quail-build-decode-map' (which see)."
(or (> (length x) (length y))
(and (= (length x) (length y))
(not (string< x y))))))))
- (let ((frame-width (frame-width (window-frame (get-buffer-window
- (current-buffer) 'visible))))
+ (let ((window-width (window-width (get-buffer-window
+ (current-buffer) 'visible)))
(single-key-width 3)
(single-trans-width 4)
(multiple-key-width 3)
@@ -2307,7 +2307,7 @@ should be made by `quail-build-decode-map' (which see)."
(setq multiple-key-width width)))
(when single-list
(setq col-width (+ single-key-width 1 single-trans-width 1)
- cols (/ frame-width col-width)
+ cols (/ window-width col-width)
rows (/ (length single-list) cols))
(if (> (% (length single-list) cols) 0)
(setq rows (1+ rows)))
@@ -2353,7 +2353,7 @@ should be made by `quail-build-decode-map' (which see)."
(lambda (x)
(let ((width (if (integerp x) (char-width x)
(string-width x))))
- (when (> (+ (current-column) 1 width) frame-width)
+ (when (> (+ (current-column) 1 width) window-width)
(insert "\n")
(indent-to multiple-key-width))
(insert " " x))))
@@ -2390,7 +2390,7 @@ package to describe."
(with-current-buffer standard-output
(setq quail-current-package package-def))))
;; Then, insert text in the help buffer while paying attention to
- ;; the width of the frame in which the buffer displayed.
+ ;; the width of the window in which the buffer displayed.
(with-current-buffer (help-buffer)
(setq buffer-read-only nil)
(insert "Input method: " (quail-name)
@@ -2808,4 +2808,5 @@ call it with one argument STRING."
;;
(provide 'quail)
+;;; arch-tag: 46d7db54-5467-42c4-a2a9-53ca90a1e886
;;; quail.el ends here
diff --git a/lisp/international/swedish.el b/lisp/international/swedish.el
index 0171433b0a3..b13a5e685b9 100644
--- a/lisp/international/swedish.el
+++ b/lisp/international/swedish.el
@@ -155,4 +155,5 @@ Leaves point just after the word that looks Swedish."
(provide 'swedish)
+;;; arch-tag: a117019d-acac-4ac4-8eac-0dbd49a41d32
;;; swedish.el ends here
diff --git a/lisp/international/titdic-cnv.el b/lisp/international/titdic-cnv.el
index 12beba682f7..d92e28981e3 100644
--- a/lisp/international/titdic-cnv.el
+++ b/lisp/international/titdic-cnv.el
@@ -209,7 +209,7 @@ SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy
;; Return a value of the key in the current line.
(defsubst tit-read-key-value ()
- (if (looking-at "[^ \t\n]+")
+ (if (looking-at "[^ \t\r\n]+")
(car (read-from-string (concat "\"" (match-string 0) "\"")))))
;; Return an appropriate quail-package filename from FILENAME (TIT
@@ -766,6 +766,11 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
(insert "(quail-define-rules\n")
(save-excursion
(set-buffer dicbuf)
+ ;; Handle double CR line ends, which result when checking out of
+ ;; CVS on MS-Windows.
+ (goto-char (point-min))
+ (while (re-search-forward "\r\r$" nil t)
+ (replace-match ""))
(goto-char (point-min))
(search-forward "A440")
(beginning-of-line)
@@ -1177,4 +1182,5 @@ to store generated Quail packages."
;; coding: iso-2022-7bit
;; End:
+;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
;;; titdic-cnv.el ends here
diff --git a/lisp/international/utf-7.el b/lisp/international/utf-7.el
index 842d81c92a1..8d98b442ac4 100644
--- a/lisp/international/utf-7.el
+++ b/lisp/international/utf-7.el
@@ -136,4 +136,6 @@ ESC and SKIP-CHARS are adjusted for the normal and IMAP versions."
;; (utf-7-encode from to t))
(provide 'utf-7)
+
+;;; arch-tag: 975ee403-90a4-4286-97d2-4ed1323f4ef9
;;; utf-7.el ends here