summaryrefslogtreecommitdiff
path: root/lisp/international
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/ccl.el22
-rw-r--r--lisp/international/characters.el34
-rw-r--r--lisp/international/fontset.el36
-rw-r--r--lisp/international/iso-ascii.el5
-rw-r--r--lisp/international/ja-dic-cnv.el65
-rw-r--r--lisp/international/ja-dic-utl.el43
-rw-r--r--lisp/international/kinsoku.el36
-rw-r--r--lisp/international/kkc.el4
-rw-r--r--lisp/international/latin1-disp.el4
-rw-r--r--lisp/international/mule-cmds.el227
-rw-r--r--lisp/international/mule-conf.el71
-rw-r--r--lisp/international/mule-diag.el2
-rw-r--r--lisp/international/mule-util.el26
-rw-r--r--lisp/international/mule.el17
-rw-r--r--lisp/international/quail.el37
-rw-r--r--lisp/international/titdic-cnv.el323
-rw-r--r--lisp/international/ucs-normalize.el4
17 files changed, 551 insertions, 405 deletions
diff --git a/lisp/international/ccl.el b/lisp/international/ccl.el
index 7f8aa7dda37..51626f51618 100644
--- a/lisp/international/ccl.el
+++ b/lisp/international/ccl.el
@@ -184,11 +184,19 @@
(defvar ccl-current-ic 0
"The current index for `ccl-program-vector'.")
+;; The CCL compiled codewords are 28bits, but the CCL implementation
+;; assumes that the codewords are sign-extended, so that data constants in
+;; the upper part of the codeword are signed. This function truncates a
+;; codeword to 28bits, and then sign extends the result to a fixnum.
+(defun ccl-fixnum (code)
+ "Convert a CCL code word to a fixnum value."
+ (- (logxor (logand code #x0fffffff) #x08000000) #x08000000))
+
(defun ccl-embed-data (data &optional ic)
"Embed integer DATA in `ccl-program-vector' at `ccl-current-ic' and
increment it. If IC is specified, embed DATA at IC."
(if ic
- (aset ccl-program-vector ic data)
+ (aset ccl-program-vector ic (ccl-fixnum data))
(let ((len (length ccl-program-vector)))
(if (>= ccl-current-ic len)
(let ((new (make-vector (* len 2) nil)))
@@ -196,7 +204,7 @@ increment it. If IC is specified, embed DATA at IC."
(setq len (1- len))
(aset new len (aref ccl-program-vector len)))
(setq ccl-program-vector new))))
- (aset ccl-program-vector ccl-current-ic data)
+ (aset ccl-program-vector ccl-current-ic (ccl-fixnum data))
(setq ccl-current-ic (1+ ccl-current-ic))))
(defun ccl-embed-symbol (symbol prop)
@@ -230,7 +238,8 @@ proper index number for SYMBOL. PROP should be
`ccl-program-vector' at IC without altering the other bit field."
(let ((relative (- ccl-current-ic (1+ ic))))
(aset ccl-program-vector ic
- (logior (aref ccl-program-vector ic) (ash relative 8)))))
+ (logior (aref ccl-program-vector ic)
+ (ccl-fixnum (ash relative 8))))))
(defun ccl-embed-code (op reg data &optional reg2)
"Embed CCL code for the operation OP and arguments REG and DATA in
@@ -986,7 +995,8 @@ is a list of CCL-BLOCKs."
(defun ccl-get-next-code ()
"Return a CCL code in `ccl-code' at `ccl-current-ic'."
(prog1
- (aref ccl-code ccl-current-ic)
+ (let ((code (aref ccl-code ccl-current-ic)))
+ (if (numberp code) (ccl-fixnum code) code))
(setq ccl-current-ic (1+ ccl-current-ic))))
(defun ccl-dump-1 ()
@@ -1142,9 +1152,9 @@ is a list of CCL-BLOCKs."
(progn
(insert (logand code #xFFFFFF))
(setq i (1+ i)))
- (insert (format "%c" (lsh code -16)))
+ (insert (format "%c" (ash code -16)))
(if (< (1+ i) len)
- (insert (format "%c" (logand (lsh code -8) 255))))
+ (insert (format "%c" (logand (ash code -8) 255))))
(if (< (+ i 2) len)
(insert (format "%c" (logand code 255))))
(setq i (+ i 3)))))
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index cdd8ba7c403..012827ba1c6 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -987,11 +987,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x103D . #x103E)
(#x1058 . #x1059)
(#x105E . #x1160)
- (#x1171 . #x1074)
+ (#x1071 . #x1074)
(#x1082 . #x1082)
(#x1085 . #x1086)
(#x108D . #x108D)
(#x109D . #x109D)
+ (#x1160 . #x11FF)
(#x135D . #x135F)
(#x1712 . #x1714)
(#x1732 . #x1734)
@@ -1081,6 +1082,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xABE5 . #xABE5)
(#xABE8 . #xABE8)
(#xABED . #xABED)
+ (#xD7B0 . #xD7FB)
(#xFB1E . #xFB1E)
(#xFE00 . #xFE0F)
(#xFE20 . #xFE2F)
@@ -1217,10 +1219,11 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xFE30 . #xFE6F)
(#xFF01 . #xFF60)
(#xFFE0 . #xFFE6)
- (#x16FE0 . #x16FE1)
- (#x17000 . #x187F1)
+ (#x16FE0 . #x16FE3)
+ (#x17000 . #x187F7)
(#x18800 . #x18AF2)
- (#x1B000 . #x1B11E)
+ (#x1B000 . #x1B152)
+ (#x1B164 . #x1B167)
(#x1B170 . #x1B2FB)
(#x1F004 . #x1F004)
(#x1F0CF . #x1F0CF)
@@ -1250,17 +1253,22 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1F680 . #x1F6C5)
(#x1F6CC . #x1F6CC)
(#x1F6D0 . #x1F6D2)
+ (#x1F6D5 . #x1F6D5)
(#x1F6EB . #x1F6EC)
- (#x1F6F4 . #x1F6F9)
- (#x1F910 . #x1F93E)
- (#x1F940 . #x1F970)
+ (#x1F6F4 . #x1F6FA)
+ (#x1F7E0 . #x1F7EB)
+ (#x1F90D . #x1F971)
(#x1F973 . #x1F976)
- (#x1F97A . #x1F97A)
- (#x1F97C . #x1F9A2)
- (#x1F9B0 . #x1F9B9)
- (#x1F9C0 . #x1F9C2)
- (#x1F9D0 . #x1F9FF)
+ (#x1F97A . #x1F9A2)
+ (#x1F9A5 . #x1F9AA)
+ (#x1F9AE . #x1F9CA)
+ (#x1F9CD . #x1F9FF)
+ (#x1FA00 . #x1FA53)
(#x1FA60 . #x1FA6D)
+ (#x1FA70 . #x1FA73)
+ (#x1FA78 . #x1FA7A)
+ (#x1FA80 . #x1FA82)
+ (#x1FA90 . #x1FA95)
(#x20000 . #x2FFFF)
(#x30000 . #x3FFFF))))
(dolist (elt l)
@@ -1334,7 +1342,7 @@ Setup char-width-table appropriate for non-CJK language environment."
;; Setting char-script-table.
-(if purify-flag
+(if dump-mode
;; While dumping, we can't use require, and international is not
;; in load-path.
(load "international/charscript")
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
index c90d4f53bd9..0413646dfb3 100644
--- a/lisp/international/fontset.el
+++ b/lisp/international/fontset.el
@@ -222,6 +222,7 @@
(hanifi-rohingya #x10D00)
(old-sogdian #x10F00)
(sogdian #x10F30)
+ (elymaic #x10fe0)
(mahajani #x11150)
(sinhala-archaic-number #x111E1)
(khojki #x11200)
@@ -234,6 +235,7 @@
(takri #x11680)
(dogra #x11800)
(warang-citi #x118A1)
+ (nandinagari #x119a0)
(zanabazar-square #x11A00)
(soyombo #x11A50)
(pau-cin-hau #x11AC0)
@@ -257,15 +259,19 @@
(ancient-greek-musical-notation #x1D200)
(tai-xuan-jing-symbol #x1D300)
(counting-rod-numeral #x1D360)
+ (nyiakeng-puachue-hmong #x1e100)
+ (wancho #x1e2c0)
(mende-kikakui #x1E810)
(adlam #x1E900)
+ (indic-siyaq-number #x1ec71)
+ (ottoman-siyaq-number #x1ed01)
(mahjong-tile #x1F000)
(domino-tile #x1F030)))
(defvar otf-script-alist)
-;; The below was synchronized with the latest Jul 23, 2017 version of
-;; https://www.microsoft.com/typography/otspec/scripttags.htm.
+;; The below was synchronized with the latest Aug 16, 2018 version of
+;; https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
(setq otf-script-alist
'((adlm . adlam)
(ahom . ahom)
@@ -300,6 +306,7 @@
(dsrt . deseret)
(deva . devanagari)
(dev2 . devanagari)
+ (dogr . dogra)
(dupl . duployan-shorthand)
(egyp . egyptian)
(elba . elbasan)
@@ -311,11 +318,13 @@
(grek . greek)
(gujr . gujarati)
(gjr2 . gujarati)
+ (gong . gunjala-gondi)
(guru . gurmukhi)
(gur2 . gurmukhi)
(hani . han)
(hang . hangul)
(jamo . hangul)
+ (rohg . hanifi-rohingya)
(hano . hanunoo)
(hatr . hatran)
(hebr . hebrew)
@@ -324,9 +333,9 @@
(prti . inscriptional-parthian)
(java . javanese)
(kthi . kaithi)
- (kana . kana) ; Hiragana
(knda . kannada)
(knd2 . kannada)
+ (kana . kana) ; Hiragana
(kali . kayah-li)
(khar . kharoshthi)
(khmr . khmer)
@@ -342,12 +351,15 @@
(lyci . lycian)
(lydi . lydian)
(mahj . mahajani)
+ (maka . makasar)
(marc . marchen)
(mlym . malayalam)
(mlm2 . malayalam)
(mand . mandaic)
(mani . manichaean)
+ (gonm . masaram-gondi)
(math . mathematical)
+ (medf . medefaidrin)
(mtei . meetei-mayek)
(mend . mende-kikakui)
(merc . meroitic)
@@ -363,12 +375,14 @@
(nbat . nabataean)
(newa . newa)
(nko\ . nko)
+ (nshu . nushu)
(ogam . ogham)
(olck . ol-chiki)
(ital . old_italic)
(xpeo . old_persian)
(narb . old-north-arabian)
(perm . old-permic)
+ (sogo . old-sogdian)
(sarb . old-south-arabian)
(orkh . old-turkic)
(orya . oriya)
@@ -392,7 +406,9 @@
(sidd . siddham)
(sgnw . sutton-sign-writing)
(sinh . sinhala)
+ (sogd . sogdian)
(sora . sora-sompeng)
+ (soyo . soyombo)
(sund . sundanese)
(sylo . syloti_nagri)
(syrc . syriac)
@@ -416,7 +432,8 @@
(ugar . ugaritic)
(vai\ . vai)
(wara . warang-citi)
- (yi\ \ . yi)))
+ (yi\ \ . yi)
+ (zanb . zanabazar-square)))
;; Set standard fontname specification of characters in the default
;; fontset to find an appropriate font for each script/charset. The
@@ -487,7 +504,7 @@
(data (list (vconcat (mapcar 'car cjk))))
(i 0))
(dolist (elt cjk)
- (let ((mask (lsh 1 i)))
+ (let ((mask (ash 1 i)))
(map-charset-chars
#'(lambda (range _arg)
(let ((from (car range)) (to (cdr range)))
@@ -876,7 +893,7 @@
(spec (cdr target-spec)))
(if (integerp spec)
(dotimes (i (length registries))
- (if (> (logand spec (lsh 1 i)) 0)
+ (if (> (logand spec (ash 1 i)) 0)
(set-fontset-font "fontset-default" target
(cons nil (aref registries i))
nil 'append)))
@@ -1164,6 +1181,8 @@ given from DEFAULT-SPEC."
(setcar (cdr elt) spec)))
fontlist))
+(defvar fontset-alias-alist)
+
(defun fontset-name-p (fontset)
"Return non-nil if FONTSET is valid as fontset name.
A valid fontset name should conform to XLFD (X Logical Font Description)
@@ -1240,11 +1259,12 @@ Done when `mouse-set-font' is called."
(latin-iso8859-15 . latin)
(latin-iso8859-16 . latin)
(latin-jisx0201 . latin)
+ (thai-iso8859-11 . thai)
(thai-tis620 . thai)
(cyrillic-iso8859-5 . cyrillic)
(arabic-iso8859-6 . arabic)
- (greek-iso8859-7 . latin)
- (hebrew-iso8859-8 . latin)
+ (greek-iso8859-7 . greek)
+ (hebrew-iso8859-8 . hebrew)
(katakana-jisx0201 . kana)
(chinese-gb2312 . han)
(chinese-gbk . han)
diff --git a/lisp/international/iso-ascii.el b/lisp/international/iso-ascii.el
index 4441241a658..395e6c4dcd0 100644
--- a/lisp/international/iso-ascii.el
+++ b/lisp/international/iso-ascii.el
@@ -163,10 +163,7 @@
(iso-ascii-display 255 "\"y") ; small y with diaeresis or umlaut mark
(define-minor-mode iso-ascii-mode
- "Toggle ISO-ASCII mode.
-With a prefix argument ARG, enable the mode if ARG is positive,
-and disable it otherwise. If called from Lisp, enable the mode
-if ARG is omitted or nil."
+ "Toggle ISO-ASCII mode."
:variable ((eq standard-display-table iso-ascii-display-table)
. (lambda (v)
(setq standard-display-table
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index 78d2cd5aced..578cd63a590 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -32,15 +32,15 @@
;; input method (e.g. quail-japanese) can utilize the dictionary.
;; The format of SKK dictionary is quite simple. Each line has the
-;; form "KANASTRING /CONV1/CONV2/.../" which means KANASTRING ($B2>L>J8(B
-;; $B;zNs(B) can be converted to one of CONVi. CONVi is a Kanji ($B4A;z(B)
-;; and Kana ($B2>L>(B) mixed string.
+;; form "KANASTRING /CONV1/CONV2/.../" which means KANASTRING (仮名文
+;; 字列) can be converted to one of CONVi. CONVi is a Kanji (漢字)
+;; and Kana (仮名) mixed string.
;;
-;; KANASTRING may have a trailing ASCII letter for Okurigana ($BAw$j2>L>(B)
+;; KANASTRING may have a trailing ASCII letter for Okurigana (送り仮名)
;; information. For instance, the trailing letter `k' means that one
-;; of the following Okurigana is allowed: $B$+$-$/$1$3(B. So, in that
-;; case, the string "KANASTRING$B$/(B" can be converted to one of "CONV1$B$/(B",
-;; CONV2$B$/(B, ...
+;; of the following Okurigana is allowed: かきくけこ. So, in that
+;; case, the string "KANASTRINGく" can be converted to one of "CONV1く",
+;; CONV2く, ...
;;; Code:
@@ -76,25 +76,25 @@
(defconst skkdic-postfix-list '(skkdic-postfix-list))
(defconst skkdic-postfix-data
- '(("$B$$$-(B" "$B9T(B")
- ("$B$,$+$j(B" "$B78(B")
- ("$B$,$/(B" "$B3X(B")
- ("$B$,$o(B" "$B@n(B")
- ("$B$7$c(B" "$B<R(B")
- ("$B$7$e$&(B" "$B=8(B")
- ("$B$7$g$&(B" "$B>^(B" "$B>k(B")
- ("$B$8$g$&(B" "$B>k(B")
- ("$B$;$s(B" "$B@~(B")
- ("$B$@$1(B" "$B3Y(B")
- ("$B$A$c$/(B" "$BCe(B")
- ("$B$F$s(B" "$BE9(B")
- ("$B$H$&$2(B" "$BF=(B")
- ("$B$I$*$j(B" "$BDL$j(B")
- ("$B$d$^(B" "$B;3(B")
- ("$B$P$7(B" "$B66(B")
- ("$B$O$D(B" "$BH/(B")
- ("$B$b$/(B" "$BL\(B")
- ("$B$f$-(B" "$B9T(B")))
+ '(("いき" "行")
+ ("がかり" "係")
+ ("がく" "学")
+ ("がわ" "川")
+ ("しゃ" "社")
+ ("しゅう" "集")
+ ("しょう" "賞" "城")
+ ("じょう" "城")
+ ("せん" "線")
+ ("だけ" "岳")
+ ("ちゃく" "着")
+ ("てん" "店")
+ ("とうげ" "峠")
+ ("どおり" "通り")
+ ("やま" "山")
+ ("ばし" "橋")
+ ("はつ" "発")
+ ("もく" "目")
+ ("ゆき" "行")))
(defun skkdic-convert-postfix (skkbuf buf)
(message "Processing POSTFIX entries ...")
@@ -124,7 +124,7 @@
(setq l (cdr l)))))
;; Search postfix entries.
- (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t)
+ (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|ー\\)+\\) " nil t)
(let ((kana (match-string-no-properties 1))
str candidates)
(while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
@@ -157,7 +157,7 @@
(insert ";; Setting prefix entries.\n"
"(skkdic-set-prefix\n"))
(save-excursion
- (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t)
+ (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\)[<>?] " nil t)
(let ((kana (match-string-no-properties 1))
str candidates)
(while (looking-at "/\\([^/\n]+\\)/")
@@ -275,7 +275,7 @@
(let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
(point) (point-max)
nil 10)))
- (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$"
+ (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\) \\(/\\cj.*\\)/$"
nil t)
(let ((kana (match-string-no-properties 1))
(candidates (skkdic-get-candidate-list (match-beginning 3)
@@ -452,7 +452,7 @@ To get complete usage, invoke:
(aset vec i
(if (< ch 128) ; CH is an ASCII letter for OKURIGANA,
(- ch) ; represented by a negative code.
- (if (= ch ?$B!<(B) ; `$B!<(B' is represented by 0.
+ (if (= ch ?ー) ; `ー' is represented by 0.
0
(- (logand (encode-char ch 'japanese-jisx0208) #xFF) 32))))
(setq i (1+ i)))
@@ -541,9 +541,4 @@ To get complete usage, invoke:
map)))
(provide 'ja-dic-cnv)
-
-;; Local Variables:
-;; coding: iso-2022-7bit
-;; End:
-
;;; ja-dic-cnv.el ends here
diff --git a/lisp/international/ja-dic-utl.el b/lisp/international/ja-dic-utl.el
index 86ba3749df8..498fb23f707 100644
--- a/lisp/international/ja-dic-utl.el
+++ b/lisp/international/ja-dic-utl.el
@@ -53,23 +53,23 @@
"Nested alist for OKURI-NASI entries of SKK dictionary.")
(defconst skkdic-okurigana-table
- '((?$B$!(B . ?a) (?$B$"(B . ?a) (?$B$#(B . ?i) (?$B$$(B . ?i) (?$B$%(B . ?u)
- (?$B$&(B . ?u) (?$B$'(B . ?e) (?$B$((B . ?e) (?$B$)(B . ?o) (?$B$*(B . ?o)
- (?$B$+(B . ?k) (?$B$,(B . ?g) (?$B$-(B . ?k) (?$B$.(B . ?g) (?$B$/(B . ?k)
- (?$B$0(B . ?g) (?$B$1(B . ?k) (?$B$2(B . ?g) (?$B$3(B . ?k) (?$B$4(B . ?g)
- (?$B$5(B . ?s) (?$B$6(B . ?z) (?$B$7(B . ?s) (?$B$8(B . ?j) (?$B$9(B . ?s)
- (?$B$:(B . ?z) (?$B$;(B . ?s) (?$B$<(B . ?z) (?$B$=(B . ?s) (?$B$>(B . ?z)
- (?$B$?(B . ?t) (?$B$@(B . ?d) (?$B$A(B . ?t) (?$B$B(B . ?d) (?$B$C(B . ?t)
- (?$B$D(B . ?t) (?$B$E(B . ?d) (?$B$F(B . ?t) (?$B$G(B . ?d) (?$B$H(B . ?t) (?$B$I(B . ?d)
- (?$B$J(B . ?n) (?$B$K(B . ?n) (?$B$L(B . ?n) (?$B$M(B . ?n) (?$B$N(B . ?n)
- (?$B$O(B . ?h) (?$B$P(B . ?b) (?$B$Q(B . ?p) (?$B$R(B . ?h) (?$B$S(B . ?b)
- (?$B$T(B . ?p) (?$B$U(B . ?h) (?$B$V(B . ?b) (?$B$W(B . ?p) (?$B$X(B . ?h)
- (?$B$Y(B . ?b) (?$B$Z(B . ?p) (?$B$[(B . ?h) (?$B$\(B . ?b) (?$B$](B . ?p)
- (?$B$^(B . ?m) (?$B$_(B . ?m) (?$B$`(B . ?m) (?$B$a(B . ?m) (?$B$b(B . ?m)
- (?$B$c(B . ?y) (?$B$d(B . ?y) (?$B$e(B . ?y) (?$B$f(B . ?y) (?$B$g(B . ?y) (?$B$h(B . ?y)
- (?$B$i(B . ?r) (?$B$j(B . ?r) (?$B$k(B . ?r) (?$B$l(B . ?r) (?$B$m(B . ?r)
- (?$B$o(B . ?w) (?$B$p(B . ?w) (?$B$q(B . ?w) (?$B$r(B . ?w)
- (?$B$s(B . ?n)
+ '((?ぁ . ?a) (?あ . ?a) (?ぃ . ?i) (?い . ?i) (?ぅ . ?u)
+ (?う . ?u) (?ぇ . ?e) (?え . ?e) (?ぉ . ?o) (?お . ?o)
+ (?か . ?k) (?が . ?g) (?き . ?k) (?ぎ . ?g) (?く . ?k)
+ (?ぐ . ?g) (?け . ?k) (?げ . ?g) (?こ . ?k) (?ご . ?g)
+ (?さ . ?s) (?ざ . ?z) (?し . ?s) (?じ . ?j) (?す . ?s)
+ (?ず . ?z) (?せ . ?s) (?ぜ . ?z) (?そ . ?s) (?ぞ . ?z)
+ (?た . ?t) (?だ . ?d) (?ち . ?t) (?ぢ . ?d) (?っ . ?t)
+ (?つ . ?t) (?づ . ?d) (?て . ?t) (?で . ?d) (?と . ?t) (?ど . ?d)
+ (?な . ?n) (?に . ?n) (?ぬ . ?n) (?ね . ?n) (?の . ?n)
+ (?は . ?h) (?ば . ?b) (?ぱ . ?p) (?ひ . ?h) (?び . ?b)
+ (?ぴ . ?p) (?ふ . ?h) (?ぶ . ?b) (?ぷ . ?p) (?へ . ?h)
+ (?べ . ?b) (?ぺ . ?p) (?ほ . ?h) (?ぼ . ?b) (?ぽ . ?p)
+ (?ま . ?m) (?み . ?m) (?む . ?m) (?め . ?m) (?も . ?m)
+ (?ゃ . ?y) (?や . ?y) (?ゅ . ?y) (?ゆ . ?y) (?ょ . ?y) (?よ . ?y)
+ (?ら . ?r) (?り . ?r) (?る . ?r) (?れ . ?r) (?ろ . ?r)
+ (?わ . ?w) (?ゐ . ?w) (?ゑ . ?w) (?を . ?w)
+ (?ん . ?n)
)
"Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.")
@@ -125,14 +125,14 @@ LEIM is available from the same ftp directory as Emacs.")))
;; At first, generate vector VEC from SEQ for looking up SKK
;; alists. Nth element in VEC corresponds to Nth element in SEQ.
;; The values are decided as follows.
- ;; If SEQ[N] is `$B!<(B', VEC[N] is 0,
+ ;; If SEQ[N] is `ー', VEC[N] is 0,
;; else if SEQ[N] is a Hiragana character, VEC[N] is:
;; ((The 2nd position code of SEQ[N]) - 32),
;; else VEC[N] is 128.
(while (< i len)
(let ((ch (aref seq i))
code)
- (cond ((= ch ?$B!<(B)
+ (cond ((= ch ?ー)
(aset vec i 0))
((and (>= ch (car skkdic-jisx0208-hiragana-block))
(<= ch (cdr skkdic-jisx0208-hiragana-block)))
@@ -218,9 +218,4 @@ LEIM is available from the same ftp directory as Emacs.")))
;;
(provide 'ja-dic-utl)
-
-;; Local Variables:
-;; coding: iso-2022-7bit
-;; End:
-
;;; ja-dic-utl.el ends here
diff --git a/lisp/international/kinsoku.el b/lisp/international/kinsoku.el
index 376d23b1fa6..690a80e6595 100644
--- a/lisp/international/kinsoku.el
+++ b/lisp/international/kinsoku.el
@@ -1,4 +1,4 @@
-;;; kinsoku.el --- `Kinsoku' processing funcs -*- coding: iso-2022-7bit; -*-
+;;; kinsoku.el --- `Kinsoku' processing funcs
;; Copyright (C) 1997, 2001-2019 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
@@ -62,19 +62,19 @@ The value 0 means there's no limitation.")
idx (1+ idx)))
str2)
;; Katakana JISX0201
- "(I!#'()*+,-./0^_(B"
+ "。」ァィゥェォャュョッー゙゚"
;; Japanese JISX0208
- "$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>(B\
-$B!?!@!A!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n(B\
-$B$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v(B"
+ "、。,.・:;?!゛゜´`¨^ ̄_ヽヾゝゞ〃仝々〆〇ー—‐\
+/\〜‖|…‥’”)〕]}〉》」』】°′″℃\
+ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ"
;; Chinese GB2312
- "$A!"!##.#,!$!%!&!'!(!)!*!+!,!-!/!1#)!3!5!7!9!;!=(B\
-$A!?#;#:#?#!!@!A!B!C!c!d!e!f#/#\#"#_#~#|(e(B"
+ "、。.,・ˉˇ¨〃々―~‖…’”)〕〉》」』〗\
+】;:?!±×÷∶°′″℃/\"_ ̄|ㄥ"
;; Chinese BIG5
- "$(0!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2(B\
-$(0!3!4!5!6!7!8!9!:!;!<!=!?!A!C!E!G!I!K(B\
-$(0!M!O!Q!S!U!W!Y![!]!_!a!c!e!g!i!k!q(B\
-$(0"#"$"%"&"'"(")"*"+","2"3"4"j"k"l"x%7(B"))
+ ",、。.‧;:?!︰…‥﹐﹑﹒·﹔\
+﹕﹖﹗|–︱—︳╴︴﹏)︶}︸〕︺】\
+︼》︾〉﹀」﹂』﹄﹚﹜﹞’”〞′〃\
+¯ ̄_ˍ﹉﹊﹍﹎﹋﹌×÷±℃℉﹩°ㄥ"))
(len (length kinsoku-bol))
(idx 0)
ch)
@@ -102,16 +102,16 @@ The value 0 means there's no limitation.")
idx (1+ idx)))
str2)
;; JISX0201 Katakana
- "(I"(B"
+ "「"
;; Japanese JISX0208
- "$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!n!w!x(B"
+ "‘“(〔[{〈《「『【°′″℃@§"
;; Chinese GB2312
- "$A!.!0#"#(!2!4!6!8!:!<!>!c!d!e#@!f!l(B\
-$A(E(F(G(H(I(J(K(L(M(N(O(P(Q(R(S(T(U(V(W(X(Y(h(B\
-\$(0!>!@!B!D!F!H!J!L!N!P!R!T!V!X!Z!\!^!`!b(B"
+ "‘“"(〔〈《「『〖【°′″@℃§\
+ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄨ\
+\(︵{︷〔︹【︻《︽〈︿「﹁『﹃﹙﹛﹝"
;; Chinese BIG5
- "$(0!d!f!h!j!k!q!p"i"j"k"n"x$u$v$w$x$y$z${(B\
-$(0$|$}$~%!%"%#%$%%%&%'%(%)%*%+%:(B"))
+ "‘“〝‵′〃§@℃℉﹫°ㄅㄆㄇㄈㄉㄊㄋ\
+ㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄨ"))
(len (length kinsoku-eol))
(idx 0)
ch)
diff --git a/lisp/international/kkc.el b/lisp/international/kkc.el
index df56ce26161..6691ee9eb9b 100644
--- a/lisp/international/kkc.el
+++ b/lisp/international/kkc.el
@@ -1,4 +1,4 @@
-;;; kkc.el --- Kana Kanji converter -*- coding: iso-2022-7bit; -*-
+;;; kkc.el --- Kana Kanji converter
;; Copyright (C) 1997-1998, 2001-2019 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
@@ -36,7 +36,7 @@
(require 'ja-dic-utl)
-(defvar kkc-input-method-title "$B4A(B"
+(defvar kkc-input-method-title "漢"
"String denoting KKC input method.
This string is shown at mode line when users are in KKC mode.")
diff --git a/lisp/international/latin1-disp.el b/lisp/international/latin1-disp.el
index 6aa633fb42a..1b7bc49a6be 100644
--- a/lisp/international/latin1-disp.el
+++ b/lisp/international/latin1-disp.el
@@ -201,10 +201,6 @@ character set: `latin-2', `hebrew' etc."
(char (and info (decode-char (car (remq 'ascii info)) ?\ ))))
(and char (char-displayable-p char))))
-;; Backwards compatibility.
-(define-obsolete-function-alias 'latin1-char-displayable-p
- 'char-displayable-p "22.1")
-
(defun latin1-display-setup (set &optional force)
"Set up Latin-1 display for characters in the given SET.
SET must be a member of `latin1-display-sets'. Normally, check
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 0be0f0fee2d..dfa9e4e6c8c 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -136,8 +136,7 @@
(expand-file-name "HELLO" data-directory))
:help "Demonstrate various character sets"))
(bindings--define-key map [set-various-coding-system]
- `(menu-item "Set Coding Systems" ,set-coding-system-map
- :enable (default-value 'enable-multibyte-characters)))
+ `(menu-item "Set Coding Systems" ,set-coding-system-map))
(bindings--define-key map [separator-input-method] menu-bar-separator)
(bindings--define-key map [describe-input-method]
@@ -282,9 +281,7 @@ wrong, use this command again to toggle back to the right mode."
(defun view-hello-file ()
"Display the HELLO file, which lists many languages and characters."
(interactive)
- ;; We have to decode the file in any environment.
- (let ((coding-system-for-read 'iso-2022-7bit))
- (view-file (expand-file-name "HELLO" data-directory))))
+ (view-file (expand-file-name "HELLO" data-directory)))
(defun universal-coding-system-argument (coding-system)
"Execute an I/O command using the specified coding system."
@@ -303,8 +300,7 @@ wrong, use this command again to toggle back to the right mode."
(cmd (key-binding keyseq))
prefix)
;; read-key-sequence ignores quit, so make an explicit check.
- ;; Like many places, this assumes quit == C-g, but it need not be.
- (if (equal last-input-event ?\C-g)
+ (if (equal last-input-event (nth 3 (current-input-mode)))
(keyboard-quit))
(when (memq cmd '(universal-argument digit-argument))
(call-interactively cmd)
@@ -317,16 +313,16 @@ wrong, use this command again to toggle back to the right mode."
(let ((current-prefix-arg prefix-arg)
;; Have to bind `last-command-event' here so that
;; `digit-argument', for instance, can compute the
- ;; prefix arg.
+ ;; `prefix-arg'.
(last-command-event (aref keyseq 0)))
(call-interactively cmd)))
;; This is the final call to `universal-argument-other-key', which
- ;; set's the final `prefix-arg.
+ ;; sets the final `prefix-arg'.
(let ((current-prefix-arg prefix-arg))
(call-interactively cmd))
- ;; Read the command to execute with the given prefix arg.
+ ;; Read the command to execute with the given `prefix-arg'.
(setq prefix prefix-arg
keyseq (read-key-sequence nil t)
cmd (key-binding keyseq)))
@@ -355,8 +351,7 @@ This also sets the following values:
(if (eq system-type 'darwin)
;; The file-name coding system on Darwin systems is always utf-8.
(setq default-file-name-coding-system 'utf-8-unix)
- (if (and (default-value 'enable-multibyte-characters)
- (or (not coding-system)
+ (if (and (or (not coding-system)
(coding-system-get coding-system 'ascii-compatible-p)))
(setq default-file-name-coding-system
(coding-system-change-eol-conversion coding-system 'unix))))
@@ -456,8 +451,8 @@ non-nil, it is used to sort CODINGS instead."
;; E: 1 if not XXX-with-esc
;; II: if iso-2022 based, 0..3, else 1.
(logior
- (lsh (if (eq base most-preferred) 1 0) 7)
- (lsh
+ (ash (if (eq base most-preferred) 1 0) 7)
+ (ash
(let ((mime (coding-system-get base :mime-charset)))
;; Prefer coding systems corresponding to a
;; MIME charset.
@@ -473,9 +468,9 @@ non-nil, it is used to sort CODINGS instead."
(t 3))
0))
5)
- (lsh (if (memq base lang-preferred) 1 0) 4)
- (lsh (if (memq base from-priority) 1 0) 3)
- (lsh (if (string-match-p "-with-esc\\'"
+ (ash (if (memq base lang-preferred) 1 0) 4)
+ (ash (if (memq base from-priority) 1 0) 3)
+ (ash (if (string-match-p "-with-esc\\'"
(symbol-name base))
0 1) 2)
(if (eq (coding-system-type base) 'iso-2022)
@@ -992,6 +987,11 @@ It is highly recommended to fix it before writing to a file."
;; If all the defaults failed, ask a user.
(when (not coding-system)
+ ;; If UTF-8 is in CODINGS, but is not its first member, make
+ ;; it the first one, so it is offered as the default.
+ (and (memq 'utf-8 codings) (not (eq 'utf-8 (car codings)))
+ (setq codings (append '(utf-8) (delq 'utf-8 codings))))
+
(setq coding-system (select-safe-coding-system-interactively
from to codings unsafe rejected (car codings))))
@@ -1158,10 +1158,7 @@ see `language-info-alist'."
((eq key 'nonascii-translation)
(set-language-environment-nonascii-translation lang-env))
((eq key 'charset)
- (set-language-environment-charset lang-env))
- ((and (not (default-value 'enable-multibyte-characters))
- (or (eq key 'unibyte-syntax) (eq key 'unibyte-display)))
- (set-language-environment-unibyte lang-env)))))
+ (set-language-environment-charset lang-env)))))
(defun set-language-info-internal (lang-env key info)
"Internal use only.
@@ -1333,7 +1330,7 @@ This is the input method activated automatically by the command
`toggle-input-method' (\\[toggle-input-method])."
:link '(custom-manual "(emacs)Input Methods")
:group 'mule
- :type `(choice (const nil)
+ :type '(choice (const nil)
mule-input-method-string)
:set-after '(current-language-environment))
@@ -1471,12 +1468,7 @@ If INPUT-METHOD is nil, deactivate any current input method."
(defun deactivate-input-method ()
"Turn off the current input method."
(when current-input-method
- (if input-method-history
- (unless (string= current-input-method (car input-method-history))
- (setq input-method-history
- (cons current-input-method
- (delete current-input-method input-method-history))))
- (setq input-method-history (list current-input-method)))
+ (add-to-history 'input-method-history current-input-method)
(unwind-protect
(progn
(setq input-method-function nil
@@ -1800,6 +1792,9 @@ The default status is as follows:
(setq default-sendmail-coding-system 'iso-latin-1)
;; On Darwin systems, this should be utf-8-unix, but when this file is loaded
;; that is not yet defined, so we set it in set-locale-environment instead.
+ ;; [Actually, it seems to work fine to use utf-8-unix here, and not just
+ ;; on Darwin. The previous comment seems to be outdated?
+ ;; See patch at https://debbugs.gnu.org/15803 ]
(setq default-file-name-coding-system 'iso-latin-1-unix)
;; Preserve eol-type from existing default-process-coding-systems.
;; On non-unix-like systems in particular, these may have been set
@@ -1897,9 +1892,6 @@ the new language environment, it runs `set-language-environment-hook'."
(set-language-environment-input-method language-name)
(set-language-environment-nonascii-translation language-name)
(set-language-environment-charset language-name)
- ;; Unibyte setups if necessary.
- (unless (default-value 'enable-multibyte-characters)
- (set-language-environment-unibyte language-name))
(let ((func (get-language-info language-name 'setup-function)))
(if (functionp func)
@@ -1951,7 +1943,7 @@ See `set-language-info-alist' for use in programs."
(set-language-info-alist (car elt) (cdr elt)))
;; re-set the environment in case its parameters changed
(set-language-environment current-language-environment)))
- :type `(alist
+ :type '(alist
:key-type (string :tag "Language environment"
:completions
(lambda (string pred action)
@@ -1978,28 +1970,22 @@ See `set-language-info-alist' for use in programs."
(defun standard-display-european-internal ()
;; Actually set up direct output of non-ASCII characters.
(standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
- ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
- ;; the native font, and codes 160 and 146 stand for something very
- ;; different there.
- (or (and (eq window-system 'pc) (not (default-value
- 'enable-multibyte-characters)))
- (progn
- ;; Most X fonts used to do the wrong thing for latin-1 code 160.
- (unless (and (eq window-system 'x)
- ;; XFree86 4 has fixed the fonts.
- (string= "The XFree86 Project, Inc" (x-server-vendor))
- (> (aref (number-to-string (nth 2 (x-server-version))) 0)
- ?3))
- ;; Make non-line-break space display as a plain space.
- (aset standard-display-table (unibyte-char-to-multibyte 160) [32]))
- ;; Most Windows programs send out apostrophes as \222. Most X fonts
- ;; don't contain a character at that position. Map it to the ASCII
- ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
- ;; U+2019, normally from the windows-1252 character set. XFree 4
- ;; fonts probably have the appropriate glyph at this position,
- ;; so they could use standard-display-8bit. It's better to use a
- ;; proper windows-1252 coding system. --fx]
- (aset standard-display-table (unibyte-char-to-multibyte 146) [39]))))
+ ;; Most X fonts used to do the wrong thing for latin-1 code 160.
+ (unless (and (eq window-system 'x)
+ ;; XFree86 4 has fixed the fonts.
+ (string= "The XFree86 Project, Inc" (x-server-vendor))
+ (> (aref (number-to-string (nth 2 (x-server-version))) 0)
+ ?3))
+ ;; Make non-line-break space display as a plain space.
+ (aset standard-display-table (unibyte-char-to-multibyte 160) [32]))
+ ;; Most Windows programs send out apostrophes as \222. Most X fonts
+ ;; don't contain a character at that position. Map it to the ASCII
+ ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
+ ;; U+2019, normally from the windows-1252 character set. XFree 4
+ ;; fonts probably have the appropriate glyph at this position,
+ ;; so they could use standard-display-8bit. It's better to use a
+ ;; proper windows-1252 coding system. --fx]
+ (aset standard-display-table (unibyte-char-to-multibyte 146) [39]))
(defun set-language-environment-coding-systems (language-name)
"Do various coding system setups for language environment LANGUAGE-NAME."
@@ -2035,10 +2021,8 @@ See `set-language-info-alist' for use in programs."
(let ((input-method (get-language-info language-name 'input-method)))
(when input-method
(setq default-input-method input-method)
- (if input-method-history
- (setq input-method-history
- (cons input-method
- (delete input-method input-method-history)))))))
+ (when input-method-history
+ (add-to-history 'input-method-history input-method)))))
(defun set-language-environment-nonascii-translation (language-name)
"Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME."
@@ -2197,22 +2181,27 @@ See `set-language-info-alist' for use in programs."
(defconst locale-language-names
(purecopy
'(
- ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
- ;; as specified in the Single Unix Spec, Version 2.
- ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
- ;; with additions from ISO 639/RA Newsletter No.1/1989;
- ;; see Internet RFC 2165 (1997-06) and
- ;; http://www.evertype.com/standards/iso639/iso639-en.html
- ;; TERRITORY is a country code taken from ISO 3166
- ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
- ;; CODESET and MODIFIER are implementation-dependent.
+ ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
+ ;; as specified in the Single Unix Spec, Version 2.
+ ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
+ ;; with additions from ISO 639/RA Newsletter No.1/1989;
+ ;; see Internet RFC 2165 (1997-06) and
+ ;; http://www.evertype.com/standards/iso639/iso639-en.html
+ ;; TERRITORY is a country code taken from ISO 3166
+ ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
+ ;; CODESET and MODIFIER are implementation-dependent.
+
+ ;; Language names for which there are no locales (yet) are
+ ;; commented out.
;; jasonr comments: MS Windows uses three letter codes for
;; languages instead of the two letter ISO codes that POSIX
- ;; uses. In most cases the first two letters are the same, so
- ;; most of the regexps in locale-language-names work. Japanese
- ;; and Chinese are exceptions, which are listed in the
- ;; non-standard section at the bottom of locale-language-names.
+ ;; uses. In most cases the first two letters are the same, so
+ ;; most of the regexps in locale-language-names work. Japanese,
+ ;; Chinese, and some others are exceptions, which are listed in the
+ ;; non-standard section at the bottom of locale-language-names, or
+ ;; in the main section, if otherwise we would pick up the wrong
+ ;; entry (because the first matching entry is used).
("aa_DJ" . "Latin-1") ; Afar
("aa" . "UTF-8")
@@ -2220,11 +2209,12 @@ See `set-language-info-alist' for use in programs."
("af" . "Latin-1") ; Afrikaans
("am" "Ethiopic" utf-8) ; Amharic
("an" . "Latin-9") ; Aragonese
+ ("arn" . "UTF-8") ; MS-Windows Mapudungun, Mapuche
("ar" . "Arabic")
- ; as Assamese
+ ("as" . "UTF-8") ; Assamese
; ay Aymara
("az" . "UTF-8") ; Azerbaijani
- ; ba Bashkir
+ ("ba" . "UTF-8") ; Bashkir, Cyrillic script
("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
@@ -2235,12 +2225,12 @@ See `set-language-info-alist' for use in programs."
("bs" . "Latin-2") ; Bosnian
("byn" . "UTF-8") ; Bilin; Blin
("ca" "Catalan" iso-8859-1) ; Catalan
- ; co Corsican
+ ("co" . "UTF-8") ; Corsican
("cs" "Czech" iso-8859-2)
("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
("de" "German" iso-8859-1)
- ; dv Divehi
+ ("dv" . "UTF-8") ; Divehi
; dz Bhutani
("ee" . "Latin-4") ; Ewe
("el" "Greek" iso-8859-7)
@@ -2254,6 +2244,8 @@ See `set-language-info-alist' for use in programs."
("et" . "Latin-9") ; Estonian
("eu" . "Latin-1") ; Basque
("fa" "Persian" utf-8) ; Persian
+ ("fil" . "UTF-8") ; Filipino
+ ("fpo" . "UTF-8") ; MS-Windows Filipino
("fi" . "Latin-9") ; Finnish
("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
@@ -2262,6 +2254,7 @@ See `set-language-info-alist' for use in programs."
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
("gd" . "Latin-9") ; Scots Gaelic
("gez" "Ethiopic" utf-8) ; Geez
+ ("gla" . "Latin-9") ; MS-Windows Scots Gaelic
("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
("gu" "Gujarati" utf-8) ; Gujarati
@@ -2272,27 +2265,33 @@ See `set-language-info-alist' for use in programs."
("hni_IN" . "UTF-8") ; Chhattisgarhi
("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
- ; hy Armenian
+ ("hy" . "UTF-8") ; Armenian
; ia Interlingua
("id" . "Latin-1") ; Indonesian
; ie Interlingue
- ; ik Inupiak
+ ("ig" . "UTF-8") ; Igbo (Nigeria)
+ ("ibo" . "UTF-8") ; MS-Windows Igbo
+ ; ik Inupiak, Inupiaq
("is" . "Latin-1") ; Icelandic
("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
("iw" "Hebrew" iso-8859-8)
("ja" "Japanese" euc-jp)
; jw Javanese
+ ("kal" . "Latin-1") ; MS-Windows Greenlandic
("ka" "Georgian" georgian-ps) ; Georgian
- ; kk Kazakh
+ ("kk" . "UTF-8") ; Kazakh
("kl" . "Latin-1") ; Greenlandic
("km" "Khmer" utf-8) ; Cambodian, Khmer
+ ("knk" "Devanagari" utf-8) ; MS-Windows Konkani
+ ("kok" "Devanagari" utf-8) ; Konkani
("kn" "Kannada" utf-8)
("ko" "Korean" euc-kr)
("ks" . "UTF-8") ; Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
("ky" . "UTF-8") ; Kirghiz
+ ("lao" "Lao" utf-8) ; MS-Windows Lao
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
("lg" . "Latin-6") ; Ganda, a.k.a. Luganda
@@ -2303,18 +2302,22 @@ See `set-language-info-alist' for use in programs."
; mg Malagasy
("mi" . "Latin-7") ; Maori
("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
+ ("mlt" . "Latin-3") ; MS-Windows Maltese
("ml" "Malayalam" utf-8)
("mn" . "UTF-8") ; Mongolian
- ; mo Moldavian
+ ; mo Moldavian (retired)
+ ("mri" . "Latin-7") ; MS-Windows Maori
("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
+ ("mym" "Malayalam" utf-8) ; MS-Windows Malayalam
("my" "Burmese" utf-8) ; Burmese
; na Nauru
("nb" . "Latin-1") ; Norwegian
("ne" "Devanagari" utf-8) ; Nepali
("nl" "Dutch" iso-8859-1)
("nn" . "Latin-1") ; Norwegian Nynorsk
+ ("non" . "Latin-1") ; MS-Windows Norwegian Nynorsk
("no" . "Latin-1") ; Norwegian
("nr_ZA" . "UTF-8") ; South Ndebele
("nso_ZA" . "UTF-8") ; Pedi
@@ -2324,7 +2327,8 @@ See `set-language-info-alist' for use in programs."
("or" "Oriya" utf-8)
("pa" "Punjabi" utf-8) ; Punjabi
("pl" "Polish" iso-8859-2) ; Polish
- ; ps Pashto, Pushto
+ ("ps" . "UTF-8") ; Pashto, Pushto
+ ("pas" . "UTF-8") ; MS-Windows Pashto
("pt_BR" "Brazilian Portuguese" iso-8859-1) ; Brazilian Portuguese
("pt" . "Latin-1") ; Portuguese
; qu Quechua
@@ -2334,7 +2338,7 @@ See `set-language-info-alist' for use in programs."
("ru_RU.koi8r" "Cyrillic-KOI8" koi8-r)
("ru_RU" "Russian" iso-8859-5)
("ru_UA" "Russian" koi8-u)
- ; rw Kinyarwanda
+ ("rw" . "UTF-8") ; Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
("se" . "UTF-8") ; Northern Sami
@@ -2355,6 +2359,7 @@ See `set-language-info-alist' for use in programs."
; su Sundanese
("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
+ ("taj" "Tajik" koi8-t) ; MS-Windows Tajik w/Cyrillic script
("ta" "Tamil" utf-8)
("te" "Telugu" utf-8) ; Telugu
("tg" "Tajik" koi8-t)
@@ -2364,15 +2369,17 @@ See `set-language-info-alist' for use in programs."
("th" "Thai" iso-8859-11)
("ti" "Ethiopic" utf-8) ; Tigrinya
("tig_ER" . "UTF-8") ; Tigre
- ; tk Turkmen
+ ("tk" . "Latin-5") ; Turkmen
+ ("tuk" . "Latin-5") ; MS-Windows Turkmen
("tl" . "Latin-1") ; Tagalog
("tn" . "Latin-9") ; Setswana, Tswana
; to Tonga
("tr" "Turkish" iso-8859-9)
+ ("tsn" . "Latin-9") ; MS-Windows Tswana
("ts" . "Latin-1") ; Tsonga
("tt" . "UTF-8") ; Tatar
; tw Twi
- ; ug Uighur
+ ("ug" . "UTF-8") ; Uighur
("uk" "Ukrainian" koi8-u)
("ur" . "UTF-8") ; Urdu
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
@@ -2381,10 +2388,10 @@ See `set-language-info-alist' for use in programs."
("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
- ; wo Wolof
+ ("wo" . "UTF-8") ; Wolof
("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
- ; yo Yoruba
+ ("yo" . "UTF-8") ; Yoruba
; za Zhuang
("zh_HK" . "Chinese-Big5")
; zh_HK/BIG5-HKSCS \
@@ -2394,6 +2401,9 @@ See `set-language-info-alist' for use in programs."
("zh_CN.GB18030" "Chinese-GB18030")
("zh_CN.UTF-8" . "Chinese-GBK")
("zh_CN" . "Chinese-GB")
+ ("zhh" . "Chinese-Big5") ; MS-Windows Chinese (Hong Kong S.A.R.)
+ ("zhi" . "Chinese-GBK") ; MS-Windows Chinese (Singapore)
+ ("zhm" . "Chinese-Big5") ; MS-Windows Chinese (Macao S.A.R.)
("zh" . "Chinese-GB")
("zu" . "Latin-1") ; Zulu
@@ -2411,12 +2421,23 @@ See `set-language-info-alist' for use in programs."
("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
("jp" . "Japanese") ; e.g. MS Windows
- ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
- ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
+ ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified (PRC)
+ ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional (Taiwan)
("gbz" . "UTF-8") ; MS Windows Dari Persian
("div" . "UTF-8") ; MS Windows Divehi (Maldives)
("wee" . "Latin-2") ; MS Windows Lower Sorbian
("wen" . "Latin-2") ; MS Windows Upper Sorbian
+ ("ind" . "Latin-1") ; MS-Windows Indonesian
+ ("sme" . "UTF-8") ; MS-Windows Northern Sami (Norway)
+ ("smf" . "UTF-8") ; MS-Windows Northern Sami (Sweden)
+ ("smg" . "UTF-8") ; MS-Windows Northern Sami (Finland)
+ ("kdi" "Kannada" utf-8) ; MS-Windows Kannada
+ ("mar" "Devanagari" utf-8) ; MS-Windows Marathi
+ ("khm" "Khmer" utf-8) ; MS-Windows Khmer
+ ("iri" . "Latin-1") ; MS-Windows Irish Gaelic
+ ; mwk MS-Windows Mohawk (Canada)
+ ("uig" . "UTF-8") ; MS-Windows Uighur
+ ("kin" . "UTF-8") ; MS-Windows Kinyarwanda
))
"Alist of locale regexps vs the corresponding languages and coding systems.
Each element has this form:
@@ -2675,12 +2696,8 @@ See also `locale-charset-language-names', `locale-language-names',
(unless frame
(set-language-environment language-name))
- ;; If the default enable-multibyte-characters is nil,
- ;; we are using single-byte characters,
- ;; so the display table and terminal coding system are irrelevant.
- (when (default-value 'enable-multibyte-characters)
- (set-display-table-and-terminal-coding-system
- language-name coding-system frame))
+ (set-display-table-and-terminal-coding-system
+ language-name coding-system frame)
;; Set the `keyboard-coding-system' if appropriate (tty
;; only). At least X and MS Windows can generate
@@ -2722,10 +2739,20 @@ See also `locale-charset-language-names', `locale-language-names',
(output-coding
(if noninteractive
(intern (format "cp%d" (w32-get-console-output-codepage)))
- code-page-coding)))
- (when (coding-system-p code-page-coding)
+ code-page-coding))
+ (multibyte-code-page-coding
+ (or (and (boundp 'w32-multibyte-code-page)
+ (not (zerop w32-multibyte-code-page))
+ (intern (format "cp%d" w32-multibyte-code-page)))
+ code-page-coding))
+ (locale-coding
+ (if noninteractive
+ code-page-coding
+ multibyte-code-page-coding)))
+ (when (and (coding-system-p code-page-coding)
+ (coding-system-p locale-coding))
(or output-coding (setq output-coding code-page-coding))
- (unless frame (setq locale-coding-system code-page-coding))
+ (unless frame (setq locale-coding-system locale-coding))
(set-keyboard-coding-system code-page-coding frame)
(set-terminal-coding-system output-coding frame)
(setq default-file-name-coding-system ansi-code-page-coding))))
@@ -2747,7 +2774,6 @@ See also `locale-charset-language-names', `locale-language-names',
(let ((paper (locale-info 'paper))
locale)
(if paper
- ;; This will always be null at the time of writing.
(cond
((equal paper '(216 279))
(setq ps-paper-type 'letter))
@@ -2950,12 +2976,13 @@ on encoding."
(#x14400 . #x14646)
;; (#x14647 . #x167FF) unused
(#x16800 . #x16F9F)
- (#x16FE0 . #x16FE0)
+ (#x16FE0 . #x16FE3)
;; (#x17000 . #x187FF) Tangut Ideographs
;; (#x18800 . #x18AFF) Tangut Components
;; (#x18B00 . #x1AFFF) unused
- (#x1B000 . #x1B12F)
- ;; (#x1B130 . #x1B16F) unused
+ (#x1B000 . #x1B11F)
+ ;; (#x1B120 . #x1B14F) unused
+ (#x1B150 . #x1B16F)
(#x1B170 . #x1B2FF)
;; (#x1B300 . #x1BBFF) unused
(#x1BC00 . #x1BCAF)
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el
index 6db795739de..c84dc819d1c 100644
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -222,20 +222,19 @@
;; Can this be shared with 8859-11?
;; N.b. not all of these are defined in Unicode.
(define-charset 'thai-tis620
- "TIS620.2533"
+ "MULE charset for TIS620.2533"
:short-name "TIS620.2533"
:iso-final-char ?T
:emacs-mule-id 133
:code-space [32 127]
:code-offset #x0E00)
-;; Fixme: doc for this, c.f. above
(define-charset 'tis620-2533
- "TIS620.2533"
+ "TIS620.2533, a.k.a. TIS-620. Like `thai-iso8859-11', but without NBSP."
:short-name "TIS620.2533"
:ascii-compatible-p t
:code-space [0 255]
- :superset '(ascii eight-bit-control (thai-tis620 . 128)))
+ :superset '(ascii (thai-tis620 . 128)))
(define-charset 'jisx0201
"JISX0201"
@@ -1067,6 +1066,15 @@
:mime-charset 'ebcdic-uk
:map "EBCDICUK")
+(define-charset 'ibm038
+ "International version of EBCDIC"
+ :short-name "IBM038"
+ :code-space [0 255]
+ :mime-charset 'ibm038
+ :map "IBM038")
+(define-charset-alias 'ebcdic-int 'ibm038)
+(define-charset-alias 'cp038 'ibm038)
+
(define-charset 'ibm1047
;; Says groff:
"IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
@@ -1576,6 +1584,61 @@ for decoding and encoding files, process I/O, etc."
(aset latin-extra-code-table ?\225 t)
(aset latin-extra-code-table ?\226 t)
+(defcustom password-word-equivalents
+ '("password" "passcode" "passphrase" "pass phrase"
+ ; These are sorted according to the GNU en_US locale.
+ "암호" ; ko
+ "パスワード" ; ja
+ "ପ୍ରବେଶ ସଙ୍କେତ" ; or
+ "ពាក្យសម្ងាត់" ; km
+ "adgangskode" ; da
+ "contraseña" ; es
+ "contrasenya" ; ca
+ "geslo" ; sl
+ "hasło" ; pl
+ "heslo" ; cs, sk
+ "iphasiwedi" ; zu
+ "jelszó" ; hu
+ "lösenord" ; sv
+ "lozinka" ; hr, sr
+ "mật khẩu" ; vi
+ "mot de passe" ; fr
+ "parola" ; tr
+ "pasahitza" ; eu
+ "passord" ; nb
+ "passwort" ; de
+ "pasvorto" ; eo
+ "salasana" ; fi
+ "senha" ; pt
+ "slaptažodis" ; lt
+ "wachtwoord" ; nl
+ "كلمة السر" ; ar
+ "ססמה" ; he
+ "лозинка" ; sr
+ "пароль" ; kk, ru, uk
+ "गुप्तशब्द" ; mr
+ "शब्दकूट" ; hi
+ "પાસવર્ડ" ; gu
+ "సంకేతపదము" ; te
+ "ਪਾਸਵਰਡ" ; pa
+ "ಗುಪ್ತಪದ" ; kn
+ "கடவுச்சொல்" ; ta
+ "അടയാളവാക്ക്" ; ml
+ "গুপ্তশব্দ" ; as
+ "পাসওয়ার্ড" ; bn_IN
+ "රහස්පදය" ; si
+ "密码" ; zh_CN
+ "密碼" ; zh_TW
+ )
+ "List of words equivalent to \"password\".
+This is used by Shell mode and other parts of Emacs to recognize
+password prompts, including prompts in languages other than
+English. Different case choices should not be assumed to be
+included; callers should bind `case-fold-search' to t."
+ :type '(repeat string)
+ :version "24.4"
+ :group 'processes)
+
;; The old code-pages library is obsoleted by coding systems based on
;; the charsets defined in this file but might be required by user
;; code.
diff --git a/lisp/international/mule-diag.el b/lisp/international/mule-diag.el
index d6ac8944d78..472529ffc05 100644
--- a/lisp/international/mule-diag.el
+++ b/lisp/international/mule-diag.el
@@ -1104,8 +1104,6 @@ system which uses fontsets)."
(insert "Version of this emacs:\n " (emacs-version) "\n\n")
(insert "Configuration options:\n " system-configuration-options "\n\n")
(insert "Multibyte characters awareness:\n"
- (format " default: %S\n" (default-value
- 'enable-multibyte-characters))
(format " current-buffer: %S\n\n" enable-multibyte-characters))
(insert "Current language environment: " current-language-environment
"\n\n")
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index 2526f1ee324..8ad212796a5 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -342,7 +342,7 @@ per-character basis, this may not be accurate."
(let ((eol-offset 0)
;; Make sure we terminate, even if BYTE falls right in the middle
;; of a CRLF or some other weird corner case.
- (omin 0) (omax most-positive-fixnum)
+ (omin 0) omax
pos lines)
(while
(progn
@@ -355,9 +355,9 @@ per-character basis, this may not be accurate."
(setq pos (point-max))))
;; Adjust POS for DOS EOL format.
(setq lines (1- (line-number-at-pos pos)))
- (and (not (= lines eol-offset)) (> omax omin)))
+ (and (not (= lines eol-offset)) (or (not omax) (> omax omin))))
(if (> lines eol-offset)
- (setq omax (min (1- omax) lines)
+ (setq omax (if omax (min (1- omax) lines) lines)
eol-offset omax)
(setq omin (max (1+ omin) lines)
eol-offset omin)))
@@ -393,17 +393,17 @@ QUALITY can be:
japanese-cp932 korean-cp949)))
(setq type 'single-byte))
(pcase type
- (`utf-8
+ ('utf-8
(when (coding-system-get coding-system :bom)
(setq byte (max 0 (- byte 3))))
(if (= eol 1)
(filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
(byte-to-position (+ pm byte))))
- (`single-byte
+ ('single-byte
(if (= eol 1)
(filepos-to-bufferpos--dos (+ pm byte) #'identity)
(+ pm byte)))
- ((and `utf-16
+ ((and 'utf-16
;; FIXME: For utf-16, we could use the same approach as used for
;; dos EOLs (counting the number of non-BMP chars instead of the
;; number of lines).
@@ -419,8 +419,8 @@ QUALITY can be:
(+ pm byte)))
(_
(pcase quality
- (`approximate (byte-to-position (+ pm byte)))
- (`exact
+ ('approximate (byte-to-position (+ pm byte)))
+ ('exact
;; Rather than assume that the file exists and still holds the right
;; data, we reconstruct it based on the buffer's content.
(let ((buf (current-buffer)))
@@ -470,7 +470,7 @@ QUALITY can be:
japanese-cp932 korean-cp949)))
(setq type 'single-byte))
(pcase type
- (`utf-8
+ ('utf-8
(setq byte (position-bytes position))
(when (null byte)
(if (<= position 0)
@@ -482,9 +482,9 @@ QUALITY can be:
(if (coding-system-get coding-system :bom) 3 0)
;; Account for CR in CRLF pairs.
lineno))
- (`single-byte
+ ('single-byte
(+ position -1 lineno))
- ((and `utf-16
+ ((and 'utf-16
;; FIXME: For utf-16, we could use the same approach as used for
;; dos EOLs (counting the number of non-BMP chars instead of the
;; number of lines).
@@ -498,8 +498,8 @@ QUALITY can be:
lineno))
(_
(pcase quality
- (`approximate (+ (position-bytes position) -1 lineno))
- (`exact
+ ('approximate (+ (position-bytes position) -1 lineno))
+ ('exact
;; Rather than assume that the file exists and still holds the right
;; data, we reconstruct its relevant portion.
(let ((buf (current-buffer)))
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index cc0658dc3f4..ba30fee4961 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -343,7 +343,7 @@ Return t if file exists."
;; Have the original buffer current while we eval.
(eval-buffer buffer nil
;; This is compatible with what `load' does.
- (if purify-flag file fullname)
+ (if dump-mode file fullname)
nil t))
(let (kill-buffer-hook kill-buffer-query-functions)
(kill-buffer buffer)))
@@ -819,10 +819,10 @@ VALUE is a CCL program name defined by `define-ccl-program'. The
CCL program reads a character sequence and writes a byte sequence
as an encoding result.
-`:inhibit-null-byte-detection'
+`:inhibit-nul-byte-detection'
VALUE non-nil means Emacs ignore null bytes on code detection.
-See the variable `inhibit-null-byte-detection'. This attribute
+See the variable `inhibit-nul-byte-detection'. This attribute
is meaningful only when `:coding-type' is `undecided'.
`:inhibit-iso-escape-detection'
@@ -867,7 +867,7 @@ non-ASCII files. This attribute is meaningful only when
:ccl-encoder
:valids))
((eq coding-type 'undecided)
- '(:inhibit-null-byte-detection
+ '(:inhibit-nul-byte-detection
:inhibit-iso-escape-detection
:prefer-utf-8))))))
@@ -911,7 +911,7 @@ non-ASCII files. This attribute is meaningful only when
(i 0))
(dolist (elt coding-system-iso-2022-flags)
(if (memq elt flags)
- (setq bits (logior bits (lsh 1 i))))
+ (setq bits (logior bits (ash 1 i))))
(setq i (1+ i)))
(setcdr (assq :flags spec-attrs) bits))))
@@ -920,8 +920,8 @@ non-ASCII files. This attribute is meaningful only when
(cons :name (cons name (cons :docstring (cons (purecopy docstring)
props)))))
(setcdr (assq :plist common-attrs) props)
- (apply 'define-coding-system-internal
- name (mapcar 'cdr (append common-attrs spec-attrs)))))
+ (apply #'define-coding-system-internal
+ name (mapcar #'cdr (append common-attrs spec-attrs)))))
(defun coding-system-doc-string (coding-system)
"Return the documentation string for CODING-SYSTEM."
@@ -1514,6 +1514,7 @@ DECODING is the coding system to be used to decode input from the process,
ENCODING is the coding system to be used to encode output to the process.
For a list of possible coding systems, use \\[list-coding-systems]."
+ (declare (interactive-only set-process-coding-system))
(interactive
"zCoding-system for output from the process: \nzCoding-system for input to the process: ")
(let ((proc (get-buffer-process (current-buffer))))
@@ -2554,7 +2555,7 @@ This function is intended to be added to `auto-coding-functions'."
;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
;; useful for Mozilla bookmark files.
(when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
- (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t))
+ (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'[:space:]/>]" size t))
(let* ((match (match-string 2))
(sym (intern (downcase match))))
(if (coding-system-p sym)
diff --git a/lisp/international/quail.el b/lisp/international/quail.el
index f1fb5f7c605..3266b93b446 100644
--- a/lisp/international/quail.el
+++ b/lisp/international/quail.el
@@ -568,7 +568,7 @@ While this input method is active, the variable
(quail-delete-overlays)
(setq describe-current-input-method-function nil)
(quail-hide-guidance)
- (remove-hook 'post-command-hook 'quail-show-guidance t)
+ (remove-hook 'post-command-hook #'quail-show-guidance t)
(run-hooks 'quail-deactivate-hook))
(kill-local-variable 'input-method-function))
;; Let's activate Quail input method.
@@ -579,19 +579,18 @@ While this input method is active, the variable
(setq name (car (car quail-package-alist)))
(error "No Quail package loaded"))
(quail-select-package name)))
- (setq deactivate-current-input-method-function 'quail-deactivate)
- (setq describe-current-input-method-function 'quail-help)
+ (setq deactivate-current-input-method-function #'quail-deactivate)
+ (setq describe-current-input-method-function #'quail-help)
(quail-delete-overlays)
(setq quail-guidance-str "")
(quail-show-guidance)
;; If we are in minibuffer, turn off the current input method
;; before exiting.
(when (eq (selected-window) (minibuffer-window))
- (add-hook 'minibuffer-exit-hook 'quail-exit-from-minibuffer)
- (add-hook 'post-command-hook 'quail-show-guidance nil t))
+ (add-hook 'minibuffer-exit-hook #'quail-exit-from-minibuffer)
+ (add-hook 'post-command-hook #'quail-show-guidance nil t))
(run-hooks 'quail-activate-hook)
- (make-local-variable 'input-method-function)
- (setq input-method-function 'quail-input-method)))
+ (setq-local input-method-function #'quail-input-method)))
(define-obsolete-variable-alias
'quail-inactivate-hook
@@ -1367,9 +1366,7 @@ If STR has `advice' text property, append the following special event:
(let ((start (overlay-start overlay))
(end (overlay-end overlay)))
(if (< start end)
- (prog1
- (string-to-list (buffer-substring start end))
- (delete-region start end)))))
+ (string-to-list (delete-and-extract-region start end)))))
(defsubst quail-delete-region ()
"Delete the text in the current translation region of Quail."
@@ -1394,12 +1391,13 @@ Return the input string."
(generated-events nil) ;FIXME: What is this?
(input-method-function nil)
(modified-p (buffer-modified-p))
- last-command-event last-command this-command)
+ last-command-event last-command this-command inhibit-record)
(setq quail-current-key ""
quail-current-str ""
quail-translating t)
(if key
- (setq unread-command-events (cons key unread-command-events)))
+ (setq unread-command-events (cons key unread-command-events)
+ inhibit-record t))
(while quail-translating
(set-buffer-modified-p modified-p)
(quail-show-guidance)
@@ -1408,8 +1406,13 @@ Return the input string."
(or input-method-previous-message "")
quail-current-str
quail-guidance-str)))
+ ;; We inhibit record_char only for the first key,
+ ;; because it was already recorded before read_char
+ ;; called quail-input-method.
+ (inhibit--record-char inhibit-record)
(keyseq (read-key-sequence prompt nil nil t))
(cmd (lookup-key (quail-translation-keymap) keyseq)))
+ (setq inhibit-record nil)
(if (if key
(and (commandp cmd) (not (eq cmd 'quail-other-command)))
(eq cmd 'quail-self-insert-command))
@@ -1453,14 +1456,15 @@ Return the input string."
(generated-events nil) ;FIXME: What is this?
(input-method-function nil)
(modified-p (buffer-modified-p))
- last-command-event last-command this-command)
+ last-command-event last-command this-command inhibit-record)
(setq quail-current-key ""
quail-current-str ""
quail-translating t
quail-converting t
quail-conversion-str "")
(if key
- (setq unread-command-events (cons key unread-command-events)))
+ (setq unread-command-events (cons key unread-command-events)
+ inhibit-record t))
(while quail-converting
(set-buffer-modified-p modified-p)
(or quail-translating
@@ -1476,8 +1480,13 @@ Return the input string."
quail-conversion-str
quail-current-str
quail-guidance-str)))
+ ;; We inhibit record_char only for the first key,
+ ;; because it was already recorded before read_char
+ ;; called quail-input-method.
+ (inhibit--record-char inhibit-record)
(keyseq (read-key-sequence prompt nil nil t))
(cmd (lookup-key (quail-conversion-keymap) keyseq)))
+ (setq inhibit-record nil)
(if (if key (commandp cmd) (eq cmd 'quail-self-insert-command))
(progn
(setq last-command-event (aref keyseq (1- (length keyseq)))
diff --git a/lisp/international/titdic-cnv.el b/lisp/international/titdic-cnv.el
index ac5a0e8861d..e6065fb0f76 100644
--- a/lisp/international/titdic-cnv.el
+++ b/lisp/international/titdic-cnv.el
@@ -1,4 +1,4 @@
-;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
+;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding: utf-8-emacs; lexical-binding:t -*-
;; Copyright (C) 1997-1998, 2000-2019 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
@@ -83,9 +83,9 @@
;; how to select a translation from a list of candidates.
(defvar quail-cxterm-package-ext-info
- '(("chinese-4corner" "$(0(?-F(B")
- ("chinese-array30" "$(0#R#O(B")
- ("chinese-ccdospy" "$AKuF4(B"
+ '(("chinese-4corner" "四角")
+ ("chinese-array30" "30")
+ ("chinese-ccdospy" "缩拼"
"Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
Pinyin is the standard Roman transliteration method for Chinese.
@@ -94,10 +94,10 @@ method `chinese-py'.
This input method works almost the same way as `chinese-py'. The
difference is that you type a single key for these Pinyin spelling.
- Pinyin: zh en eng ang ch an ao ai ong sh ing yu($A(9(B)
+ Pinyin: zh en eng ang ch an ao ai ong sh ing yu(ü)
keyseq: a f g h i j k l s u y v
For example:
- Chinese: $A0!(B $A9{(B $AVP(B $AND(B $A9b(B $ASq(B $AH+(B
+ Chinese: 啊 果 中 文 光 玉 全
Pinyin: a guo zhong wen guang yu quan
Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6
@@ -106,14 +106,14 @@ For example:
For double-width GB2312 characters corresponding to ASCII, use the
input method `chinese-qj'.")
- ("chinese-ecdict" "$(05CKH(B"
+ ("chinese-ecdict" "英漢"
"In this input method, you enter a Chinese (Big5) character or word
by typing the corresponding English word. For example, if you type
-\"computer\", \"$(0IZH+(B\" is input.
+\"computer\", \"電腦\" is input.
\\<quail-translation-docstring>")
- ("chinese-etzy" "$(06/0D(B"
+ ("chinese-etzy" "倚注"
"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
`chinese-big5-2').
@@ -122,20 +122,20 @@ compose one Chinese character.
In this input method, you enter a Chinese character by first typing
keys corresponding to Zhuyin symbols (see the above table) followed by
-SPC, 1, 2, 3, or 4 specifying a tone (SPC:$(0?v(N(B, 1:$(0M=Vy(B, 2:$(0Dm(N(B, 3: $(0&9Vy(B,
-4:$(0(+Vy(B).
+SPC, 1, 2, 3, or 4 specifying a tone (SPC:陰平, 1:輕聲, 2:陽平, 3: 上聲,
+4:去聲).
\\<quail-translation-docstring>")
- ("chinese-punct-b5" "$(0O:(BB"
+ ("chinese-punct-b5" "標B"
"Input method for Chinese punctuation and symbols of Big5
\(`chinese-big5-1' and `chinese-big5-2').")
- ("chinese-punct" "$A1j(BG"
+ ("chinese-punct" "标G"
"Input method for Chinese punctuation and symbols of GB2312
\(`chinese-gb2312').")
- ("chinese-py-b5" "$(03<(BB"
+ ("chinese-py-b5" "拼B"
"Pinyin base input method for Chinese Big5 characters
\(`chinese-big5-1', `chinese-big5-2').
@@ -153,28 +153,28 @@ method `chinese-qj-b5'.
The input method `chinese-py' and `chinese-tonepy' are also Pinyin
based, but for the character set GB2312 (`chinese-gb2312').")
- ("chinese-qj-b5" "$(0)A(BB")
+ ("chinese-qj-b5" "全B")
- ("chinese-qj" "$AH+(BG")
+ ("chinese-qj" "全G")
- ("chinese-sw" "$AJWN2(B"
+ ("chinese-sw" "首尾"
"Radical base input method for Chinese charset GB2312 (`chinese-gb2312').
In this input method, you enter a Chinese character by typing two
-keys. The first key corresponds to the first ($AJW(B) radical, the second
-key corresponds to the last ($AN2(B) radical. The correspondence of keys
+keys. The first key corresponds to the first (首) radical, the second
+key corresponds to the last (尾) radical. The correspondence of keys
and radicals is as below:
first radical:
a b c d e f g h i j k l m n o p q r s t u v w x y z
- $APD(B $AZ"(B $AJ,(B $AX<(B $A;p(B $A?Z(B $A^P(B $Ac_(B $AZ%(B $A\3(B $AXi(B $AD>(B $Alj(B $Ab;(B $ATB(B $Afy(B $AJ/(B $AMu(B $A0K(B $AX/(B $AHU(B $AeA(B $Aak(B $AVq(B $AR;(B $AHK(B
+ 心 冖 尸 丶 火 口 扌 氵 讠 艹 亻 木 礻 饣 月 纟 石 王 八 丿 日 辶 犭 竹 一 人
last radical:
a b c d e f g h i j k l m n o p q r s t u v w x y z
- $ASV(B $AI=(B $AMA(B $A56(B $AZb(B $A?Z(B $ARB(B $Aqb(B $A4s(B $A6!(B $A[L(B $Ala(B $AJ.(B $A4u(B $AXg(B $ACE(B $A=q(B $AX-(B $AE.(B $ARR(B $A`m(B $AP!(B $A3'(B $A3f(B $A_.(B $A27(B
+ 又 山 土 刀 阝 口 衣 疋 大 丁 厶 灬 十 歹 冂 门 今 丨 女 乙 囗 小 厂 虫 弋 卜
\\<quail-translation-docstring>")
- ("chinese-tonepy" "$A5wF4(B"
+ ("chinese-tonepy" "调拼"
"Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
Pinyin is the standard roman transliteration method for Chinese.
@@ -183,18 +183,18 @@ method `chinese-py'.
This input method works almost the same way as `chinese-py'. The
difference is that you must type 1..5 after each Pinyin spelling to
-specify a tone (1:$ARuF=(B, 2:$AQtF=(B, 3:$AIOIy(B, 4$AOBIy(B, 5:$AGaIy(B).
+specify a tone (1:阴平, 2:阳平, 3:上声, 4下声, 5:轻声).
\\<quail-translation-docstring>
-For instance, to input $ADc(B, you type \"n i 3 3\", the first \"n i\" is
+For instance, to input 你, you type \"n i 3 3\", the first \"n i\" is
a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
the third character from the candidate list.
For double-width GB2312 characters corresponding to ASCII, use the
input method `chinese-qj'.")
- ("chinese-zozy" "$(0I\0D(B"
+ ("chinese-zozy" "零注"
"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
`chinese-big5-2').
@@ -203,8 +203,8 @@ compose a Chinese character.
In this input method, you enter a Chinese character by first typing
keys corresponding to Zhuyin symbols (see the above table) followed by
-SPC, 6, 3, 4, or 7 specifying a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy(B, 4:$(0(+Vy(B,
-7:$(0M=Vy(B).
+SPC, 6, 3, 4, or 7 specifying a tone (SPC:陰平, 6:陽平, 3:上聲, 4:去聲,
+7:輕聲).
\\<quail-translation-docstring>")))
@@ -348,7 +348,7 @@ SPC, 6, 3, 4, or 7 specifying a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy
(princ (nth 2 (assoc tit-encode tit-encode-list)))
(princ "\" \"")
(princ (or title
- (if (string-match "[:$A!K$(0!(!J(B]+\\([^:$A!K$(0!(!K(B]+\\)" tit-prompt)
+ (if (string-match "[:∷:【]+\\([^:∷:】]+\\)" tit-prompt)
(substring tit-prompt (match-beginning 1) (match-end 1))
tit-prompt)))
(princ "\"\n"))
@@ -417,9 +417,7 @@ SPC, 6, 3, 4, or 7 specifying a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy
;; function call.
(defun tit-process-body ()
(message "Formatting translation rules...")
- (let* ((template (list nil nil))
- (second (cdr template))
- (prev-key "")
+ (let* ((prev-key "")
ch key translations pos)
(princ "(quail-define-rules\n")
(while (null (eobp))
@@ -500,8 +498,7 @@ the generated Quail package is saved."
(goto-char (point-min))
(decode-coding-region (point-min) (point-max) coding-system)
;; Explicitly set eol format to `unix'.
- (setq coding-system-for-write
- (coding-system-change-eol-conversion coding-system 'unix))
+ (setq coding-system-for-write 'utf-8-unix)
(remove-text-properties (point-min) (point-max) '(charset nil)))
(set-buffer-multibyte t)
@@ -522,7 +519,6 @@ the generated Quail package is saved."
(princ ";; Local Variables:\n")
(princ ";; version-control: never\n")
(princ ";; no-update-autoloads: t\n")
- (princ (format ";; coding: %s\n" coding-system-for-write))
(princ ";; End:\n"))))))
;;;###autoload
@@ -581,7 +577,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; )
(defvar quail-misc-package-ext-info
- '(("chinese-b5-tsangchi" "$(06A(BB"
+ '(("chinese-b5-tsangchi" "倉B"
"cangjie-table.b5" big5 "tsang-b5.el"
tsang-b5-converter
"\
@@ -591,7 +587,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")
- ("chinese-b5-quick" "$(0X|(BB"
+ ("chinese-b5-quick" "簡B"
"cangjie-table.b5" big5 "quick-b5.el"
quick-b5-converter
"\
@@ -601,7 +597,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")
- ("chinese-cns-tsangchi" "$(GT?(BC"
+ ("chinese-cns-tsangchi" "倉C"
"cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
tsang-cns-converter
"\
@@ -611,7 +607,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")
- ("chinese-cns-quick" "$(Gv|(BC"
+ ("chinese-cns-quick" "簡C"
"cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
quick-cns-converter
"\
@@ -621,7 +617,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; # unmodified versions is granted without royalty provided
;; # this notice is preserved.")
- ("chinese-py" "$AF4(BG"
+ ("chinese-py" "拼G"
"pinyin.map" cn-gb-2312 "PY.el"
py-converter
"\
@@ -649,7 +645,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; You should have received a copy of the GNU General Public License along with
;; CCE. If not, see <https://www.gnu.org/licenses/>.")
- ("chinese-ziranma" "$AWTH;(B"
+ ("chinese-ziranma" "自然"
"ziranma.cin" cn-gb-2312 "ZIRANMA.el"
ziranma-converter
"\
@@ -677,7 +673,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; You should have received a copy of the GNU General Public License along with
;; CCE. If not, see <https://www.gnu.org/licenses/>.")
- ("chinese-ctlau" "$AAuTA(B"
+ ("chinese-ctlau" "刘粤"
"CTLau.html" cn-gb-2312 "CTLau.el"
ctlau-gb-converter
"\
@@ -702,7 +698,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; # You should have received a copy of the GNU General Public License
;; # along with this program. If not, see <https://www.gnu.org/licenses/>.")
- ("chinese-ctlaub" "$(0N,Gn(B"
+ ("chinese-ctlaub" "劉粵"
"CTLau-b5.html" big5 "CTLau-b5.el"
ctlau-b5-converter
"\
@@ -732,38 +728,38 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
;; dictionary in the buffer DICBUF. The input method name of the
;; Quail package is NAME, and the title string is TITLE.
-;; TSANG-P is non-nil, generate $(06AQo(B input method. Otherwise
-;; generate $(0X|/y(B (simple version of $(06AQo(B). If BIG5-P is non-nil, the
+;; TSANG-P is non-nil, generate 倉頡 input method. Otherwise
+;; generate 簡易 (simple version of 倉頡). If BIG5-P is non-nil, the
;; input method is for inputting Big5 characters. Otherwise the input
;; method is for inputting CNS characters.
-(defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
- (let ((fulltitle (if tsang-p (if big5-p "$(06AQo(B" "$(GT?on(B")
- (if big5-p "$(0X|/y(B" "$(Gv|Mx(B")))
+(defun tsang-quick-converter (dicbuf tsang-p big5-p)
+ (let ((fulltitle (if tsang-p (if big5-p "倉頡" "倉頡")
+ (if big5-p "簡易" "簡易")))
dic)
(goto-char (point-max))
(if big5-p
- (insert (format "\"$(0&d'GTT&,!J(B%s$(0!K(BBIG5
+ (insert (format "\"中文輸入【%s】BIG5
- $(0KHM$(B%s$(0TT&,WoOu(B
+ 漢語%s輸入鍵盤
- [Q $(0'D(B] [W $(0(q(B] [E $(0'V(B] [R $(0&H(B] [T $(0'>(B] [Y $(0&4(B] [U $(0&U(B] [I $(0'B(B] [O $(0&*(B] [P $(0'A(B]
+ [Q 手] [W 田] [E 水] [R 口] [T 廿] [Y 卜] [U 山] [I 戈] [O 人] [P 心]
- [A $(0'K(B] [S $(0&T(B] [D $(0'N(B] [F $(0'W(B] [G $(0&I(B] [H $(0*M(B] [J $(0&3(B] [L $(0&d(B]
+ [A 日] [S 尸] [D 木] [F 火] [G 土] [H 竹] [J 十] [L 中]
- [Z ] [X $(0[E(B] [C $(01[(B] [V $(0&M(B] [B $(0'M(B] [N $(0&_(B] [M $(0&"(B]
+ [Z ] [X 難] [C 金] [V 女] [B 月] [N 弓] [M 一]
\\\\<quail-translation-docstring>\"\n"
fulltitle fulltitle))
- (insert (format "\"$(GDcEFrSD+!J(B%s$(G!K(BCNS
+ (insert (format "\"中文輸入【%s】CNS
- $(GiGk#(B%s$(GrSD+uomu(B
+ 漢語%s輸入鍵盤
- [Q $(GEC(B] [W $(GFp(B] [E $(GEU(B] [R $(GDG(B] [T $(GE=(B] [Y $(GD3(B] [U $(GDT(B] [I $(GEA(B] [O $(GD)(B] [P $(GE@(B]
+ [Q 手] [W 田] [E 水] [R 口] [T 廿] [Y 卜] [U 山] [I 戈] [O 人] [P 心]
- [A $(GEJ(B] [S $(GDS(B] [D $(GEM(B] [F $(GEV(B] [G $(GDH(B] [H $(GHL(B] [J $(GD2(B] [L $(GDc(B]
+ [A 日] [S 尸] [D 木] [F 火] [G 土] [H 竹] [J 十] [L 中]
- [Z ] [X $(GyE(B] [C $(GOZ(B] [V $(GDL(B] [B $(GEL(B] [N $(GD^(B] [M $(GD!(B]
+ [Z ] [X 難] [C 金] [V 女] [B 月] [N 弓] [M 一]
\\\\<quail-translation-docstring>\"\n"
fulltitle fulltitle)))
@@ -782,7 +778,7 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
(while (not (eobp))
(forward-char 5)
(let ((trans (char-to-string (following-char)))
- key slot)
+ key)
(re-search-forward "\\([A-Z]+\\)\r*$" nil t)
(setq key (downcase
(if (or tsang-p
@@ -799,63 +795,63 @@ To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
(setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
(dolist (elt dic)
(insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
- (let ((punctuation '((";" "$(0!'!2!"!#!.!/(B" "$(G!'!2!"!#!.!/(B")
- (":" "$(0!(!+!3!%!$!&!0!1(B" "$(G!(!+!3!%!$!&!0!1(B")
- ("'" "$(0!e!d(B" "$(G!e!d(B")
- ("\"" "$(0!g!f!h!i!q(B" "$(G!g!f!h!i!q(B")
- ("\\" "$(0"`"b#M(B" "$(G"`"b#M(B")
- ("|" "$(0!6!8!:"^(B" "$(G!6!8!:"^(B")
- ("/" "$(0"_"a#L(B" "$(G"_"a#L(B")
- ("?" "$(0!)!4(B" "$(G!)!4(B")
- ("<" "$(0!R"6"A!T"H(B" "$(G!R"6"A!T"H(B")
- (">" "$(0!S"7"B!U(B" "$(G!S"7"B!U(B")
- ("[" "$(0!F!J!b!H!L!V!Z!X!\(B" "$(G!F!J!b!H!L!V!Z!X!\(B")
- ("]" "$(0!G!K!c!I!M!W![!Y!](B" "$(G!G!K!c!I!M!W![!Y!](B")
- ("{" "$(0!B!`!D(B " "$(G!B!`!D(B ")
- ("}" "$(0!C!a!E(B" "$(G!C!a!E(B")
- ("`" "$(0!j!k(B" "$(G!j!k(B")
- ("~" "$(0"D"+",!<!=(B" "$(G"D"+",!<!=(B")
- ("!" "$(0!*!5(B" "$(G!*!5(B")
- ("@" "$(0"i"n(B" "$(G"i"n(B")
- ("#" "$(0!l"-(B" "$(G!l"-(B")
- ("$" "$(0"c"l(B" "$(G"c"l(B")
- ("%" "$(0"h"m(B" "$(G"h"m(B")
- ("&" "$(0!m".(B" "$(G!m".(B")
- ("*" "$(0!n"/!o!w!x(B" "$(G!n"/!o!w!x(B")
- ("(" "$(0!>!^!@(B" "$(G!>!^!@(B")
- (")" "$(0!?!_!A(B" "$(G!?!_!A(B")
- ("-" "$(0!7!9"#"$"1"@(B" "$(G!7!9"#"$"1"@(B")
- ("_" "$(0"%"&(B" "$(G"%"&(B")
- ("=" "$(0"8"C(B" "$(G"8"C(B")
- ("+" "$(0"0"?(B" "$(G"0"?(B"))))
+ (let ((punctuation '((";" ";﹔,、﹐﹑" ";﹔,、﹐﹑")
+ (":" ":︰﹕.。‧﹒·" ":︰﹕.。・﹒·")
+ ("'" "’‘" "’‘")
+ ("\"" "”“〝〞〃" "”“〝〞〃")
+ ("\\" "\﹨╲" "\﹨╲")
+ ("|" "|︱︳∣" "︱︲|")
+ ("/" "/∕╱" "/∕╱")
+ ("?" "?﹖" "?﹖")
+ ("<" "〈<﹤︿∠" "〈<﹤︿∠")
+ (">" "〉>﹥﹀" "〉>﹦﹀")
+ ("[" "〔【﹝︹︻「『﹁﹃" "〔【﹝︹︻「『﹁﹃")
+ ("]" "〕】﹞︺︼」』﹂﹄" "〕】﹞︺︼」』﹂﹄")
+ ("{" "{﹛︷ " "{﹛︷ ")
+ ("}" "}﹜︸" "}﹜︸")
+ ("`" "‵′" "′‵")
+ ("~" "~﹋﹌︴﹏" "∼﹋﹌")
+ ("!" "!﹗" "!﹗")
+ ("@" "@﹫" "@﹫")
+ ("#" "#﹟" "#﹟")
+ ("$" "$﹩" "$﹩")
+ ("%" "%﹪" "%﹪")
+ ("&" "&﹠" "&﹠")
+ ("*" "*﹡※☆★" "*﹡※☆★")
+ ("(" "(﹙︵" "(﹙︵")
+ (")" ")﹚︶" ")﹚︶")
+ ("-" "–—¯ ̄-﹣" "—–‾-﹣")
+ ("_" "_ˍ" "_")
+ ("=" "=﹦" "=﹥")
+ ("+" "+﹢" "+﹢"))))
(dolist (elt punctuation)
(insert (format "(%S %S)\n" (concat "z" (car elt))
(if big5-p (nth 1 elt) (nth 2 elt))))))
(insert ")\n")))
-(defun tsang-b5-converter (dicbuf name title)
- (tsang-quick-converter dicbuf name title t t))
+(defun tsang-b5-converter (dicbuf)
+ (tsang-quick-converter dicbuf t t))
-(defun quick-b5-converter (dicbuf name title)
- (tsang-quick-converter dicbuf name title nil t))
+(defun quick-b5-converter (dicbuf)
+ (tsang-quick-converter dicbuf nil t))
-(defun tsang-cns-converter (dicbuf name title)
- (tsang-quick-converter dicbuf name title t nil))
+(defun tsang-cns-converter (dicbuf)
+ (tsang-quick-converter dicbuf t nil))
-(defun quick-cns-converter (dicbuf name title)
- (tsang-quick-converter dicbuf name title nil nil))
+(defun quick-cns-converter (dicbuf)
+ (tsang-quick-converter dicbuf nil nil))
;; Generate a code of a Quail package in the current buffer from
;; Pinyin dictionary in the buffer DICBUF. The input method name of
;; the Quail package is NAME, and the title string is TITLE.
-(defun py-converter (dicbuf name title)
+(defun py-converter (dicbuf)
(goto-char (point-max))
- (insert (format "%S\n" "$A::WVJdHk!KF4Rt!K(B
+ (insert (format "%S\n" "汉字输入∷拼音∷
- $AF4Rt7=08(B
+ 拼音方案
- $AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,(B \"u(yu) $ATrSC(B u: $A1mJ>!C(B
+ 小写英文字母代表「拼音」符号, \"u(yu) 则用 u: 表示∶
Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
@@ -869,14 +865,14 @@ character. The sequence is made by the combination of the initials
iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun
(Note: In the correct Pinyin writing, the sequence \"yu\" in the last
- four finals should be written by the character u-umlaut `$A(9(B'.)
+ four finals should be written by the character u-umlaut `ü'.)
With this input method, you enter a Chinese character by first
entering its pinyin spelling.
\\<quail-translation-docstring>
-For instance, to input $ADc(B, you type \"n i C-n 3\". The first \"n i\"
+For instance, to input 你, you type \"n i C-n 3\". The first \"n i\"
is a Pinyin, \"C-n\" selects the next group of candidates (each group
contains at most 10 characters), \"3\" select the third character in
that group.
@@ -924,14 +920,14 @@ method `chinese-tonepy' with which you must specify tones by digits
;; Ziranma dictionary in the buffer DICBUF. The input method name of
;; the Quail package is NAME, and the title string is TITLE.
-(defun ziranma-converter (dicbuf name title)
+(defun ziranma-converter (dicbuf)
(let (dic)
(with-current-buffer dicbuf
(goto-char (point-min))
(search-forward "\n%keyname end")
(forward-line 1)
(let ((table (make-hash-table :test 'equal))
- elt pos key trans val)
+ pos key trans val)
(while (not (eobp))
(setq pos (point))
(skip-chars-forward "^ \t")
@@ -959,22 +955,22 @@ method `chinese-tonepy' with which you must specify tones by digits
table)))
(setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
(goto-char (point-max))
- (insert (format "%S\n" "$A::WVJdHk!K!>WTH;!?!K(B
-
- $A<|EL6TUU1m(B:
- $A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7(B
- $A)'#Q(B $A)'#W(B $A)'#E(B $A)'#R(B $A)'#T(B $A)'#Y(B $A)'#U(Bsh$A)'#I(Bch$A)'#O(B $A)'#P(B $A)'(B
- $A)'(B iu$A)'(B ua$A)'(B e$A)'(B uan$A)'(B ue$A)'(B uai$A)'(B u$A)'(B i$A)'(B o$A)'(B un$A)'(B
- $A)'(B $A)'(B ia$A)'(B $A)'(B van$A)'(B ve$A)'(B ing$A)'(B $A)'(B $A)'(B uo$A)'(B vn$A)'(B
- $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?(B
- $A)'#A(B $A)'#S(B $A)'#D(B $A)'#F(B $A)'#G(B $A)'#H(B $A)'#J(B $A)'#K(B $A)'#L(B $A)'(B
- $A)'(B a$A)'(Biong$A)'(Buang$A)'(B en$A)'(B eng$A)'(B ang$A)'(B an$A)'(B ao$A)'(B ai$A)'(B
- $A)'(B $A)'(B ong$A)'(Biang$A)'(B $A)'(B ng$A)'(B $A)'(B $A)'(B $A)'(B $A)'(B
- $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7(B
- $A)'#Z(B $A)'#X(B $A)'#C(B $A)'#V(Bzh$A)'#B(B $A)'#N(B $A)'#M(B $A)'#,(B $A)'#.(B $A)'(B $A#/(B $A)'(B
- $A)'(B ei$A)'(B ie$A)'(B iao$A)'(B ui$A)'(B ou$A)'(B in$A)'(B ian$A)'G0R3)':sR3)'7{:E)'(B
- $A)'(B $A)'(B $A)'(B $A)'(B v$A)'(B $A)'(B $A)'(B $A)'(B $A)'(B $A)'(B $A)'(B
- $A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?(B
+ (insert (format "%S\n" "汉字输入∷【自然】∷
+
+ 键盘对照表:
+ ┏━━┳━━┳━━┳━━┳━━┳━━┳━━┳━━┳━━┳━━┓
+ ┃Q ┃W ┃E ┃R ┃T ┃Y ┃Ush┃Ich┃O ┃P ┃
+ ┃ iu┃ ua┃ e┃ uan┃ ue┃ uai┃ u┃ i┃ o┃ un┃
+ ┃ ┃ ia┃ ┃ van┃ ve┃ ing┃ ┃ ┃ uo┃ vn┃
+ ┗┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┛
+ ┃A ┃S ┃D ┃F ┃G ┃H ┃J ┃K ┃L ┃
+ ┃ a┃iong┃uang┃ en┃ eng┃ ang┃ an┃ ao┃ ai┃
+ ┃ ┃ ong┃iang┃ ┃ ng┃ ┃ ┃ ┃ ┃
+ ┗┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━┻┳━━┓
+ ┃Z ┃X ┃C ┃Vzh┃B ┃N ┃M ┃, ┃. ┃ / ┃
+ ┃ ei┃ ie┃ iao┃ ui┃ ou┃ in┃ ian┃前页┃后页┃符号┃
+ ┃ ┃ ┃ ┃ v┃ ┃ ┃ ┃ ┃ ┃ ┃
+ ┗━━┻━━┻━━┻━━┻━━┻━━┻━━┻━━┻━━┻━━┛
Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').
@@ -986,34 +982,34 @@ method `chinese-py'.
Unlike the standard spelling of Pinyin, in this input method all
initials and finals are assigned to single keys (see the above table).
For instance, the initial \"ch\" is assigned to the key `i', the final
-\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and $AGaIy(B are
+\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and 轻声 are
assigned to the keys `q', `w', `e', `r', `t' respectively.
\\<quail-translation-docstring>
To input one-letter words, you type 4 keys, the first two for the
Pinyin of the letter, next one for tone, and the last one is always a
-quote ('). For instance, \"vsq'\" input $AVP(B. Exceptions are these
+quote ('). For instance, \"vsq'\" input 中. Exceptions are these
letters. You can input them just by typing a single key.
- Character: $A04(B $A2;(B $A4N(B $A5D(B $A6~(B $A7"(B $A8v(B $A:M(B $A3v(B $A<0(B $A?I(B $AAK(B $AC;(B
+ Character: 按 不 次 的 二 发 个 和 出 及 可 了 没
Key: a b c d e f g h i j k l m
- Character: $ADc(B $AE7(B $AF,(B $AF_(B $AHK(B $AH}(B $AK{(B $AJG(B $AWE(B $ANR(B $AP!(B $AR;(B $ATZ(B
+ Character: 你 欧 片 七 人 三 他 是 着 我 小 一 在
Key: n o p q r s t u v w x y z
To input two-letter words, you have two ways. One way is to type 4
keys, two for the first Pinyin, two for the second Pinyin. For
-instance, \"vsgo\" inputs $AVP9z(B. Another way is to type 3 keys: 2
+instance, \"vsgo\" inputs 中国. Another way is to type 3 keys: 2
initials of two letters, and quote ('). For instance, \"vg'\" also
-inputs $AVP9z(B.
+inputs 中国.
To input three-letter words, you type 4 keys: initials of three
-letters, and the last is quote ('). For instance, \"bjy'2\" inputs $A11(B
-$A>)Q<(B (the last `2' is to select one of the candidates).
+letters, and the last is quote ('). For instance, \"bjy'2\" inputs 北
+京鸭 (the last `2' is to select one of the candidates).
To input words of more than three letters, you type 4 keys, initials
of the first three letters and the last letter. For instance,
-\"bjdt\" inputs $A11>)5gJSL((B.
+\"bjdt\" inputs 北京电视台.
To input symbols and punctuation, type `/' followed by one of `a' to
`z', then select one of the candidates."))
@@ -1033,7 +1029,7 @@ To input symbols and punctuation, type `/' followed by one of `a' to
;; method name of the Quail package is NAME, and the title string is
;; TITLE. DESCRIPTION is the string shown by describe-input-method.
-(defun ctlau-converter (dicbuf name title description)
+(defun ctlau-converter (dicbuf description)
(goto-char (point-max))
(insert (format "%S\n" description))
(insert " '((\"\C-?\" . quail-delete-last-char)
@@ -1043,7 +1039,7 @@ To input symbols and punctuation, type `/' followed by one of `a' to
(\"<\" . quail-prev-translation))
nil nil nil nil)\n\n")
(insert "(quail-define-rules\n")
- (let (dicbuf-start dicbuf-end key-start key (pos (point)))
+ (let (dicbuf-start dicbuf-end key-start (pos (point)))
;; Find the dictionary, which starts below a horizontal rule and
;; ends at the second to last line in the HTML file.
(with-current-buffer dicbuf
@@ -1060,7 +1056,7 @@ To input symbols and punctuation, type `/' followed by one of `a' to
;; which the file is converted have no Big5 equivalent. Go
;; through and delete them.
(goto-char pos)
- (while (search-forward "$(0!{(B" nil t)
+ (while (search-forward "□" nil t)
(delete-char -1))
;; Uppercase keys in dictionary need to be downcased. Backslashes
;; at the beginning of keys need to be turned into double
@@ -1082,33 +1078,33 @@ To input symbols and punctuation, type `/' followed by one of `a' to
(forward-line 1)))
(insert ")\n"))
-(defun ctlau-gb-converter (dicbuf name title)
- (ctlau-converter dicbuf name title
-"$A::WVJdHk!KAuN}OiJ=TARt!K(B
+(defun ctlau-gb-converter (dicbuf)
+ (ctlau-converter dicbuf
+"汉字输入∷刘锡祥式粤音∷
- $AAuN}OiJ=TASoW"Rt7=08(B
+ 刘锡祥式粤语注音方案
Sidney Lau's Cantonese transcription scheme as described in his book
\"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
- This file was prepared by Fung Fung Lee ($A@n7c7e(B).
+ This file was prepared by Fung Fung Lee (李枫峰).
Originally converted from CTCPS3.tit
Last modified: June 2, 1993.
Some infrequent GB characters are accessed by typing \\, followed by
- the Cantonese romanization of the respective radical ($A2?JW(B)."))
+ the Cantonese romanization of the respective radical (部首)."))
-(defun ctlau-b5-converter (dicbuf name title)
- (ctlau-converter dicbuf name title
-"$(0KH)tTT&,!(N,Tg>A*#Gn5x!((B
+(defun ctlau-b5-converter (dicbuf)
+ (ctlau-converter dicbuf
+"漢字輸入:劉錫祥式粵音:
- $(0N,Tg>A*#GnM$0D5x'J7{(B
+ 劉錫祥式粵語注音方案
Sidney Lau's Cantonese transcription scheme as described in his book
\"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
- This file was prepared by Fung Fung Lee ($(0,XFS76(B).
+ This file was prepared by Fung Fung Lee (李楓峰).
Originally converted from CTCPS3.tit
Last modified: June 2, 1993.
Some infrequent characters are accessed by typing \\, followed by
- the Cantonese romanization of the respective radical ($(0?f5}(B)."))
+ the Cantonese romanization of the respective radical (部首)."))
(declare-function dos-8+3-filename "dos-fns.el" (filename))
@@ -1122,8 +1118,7 @@ the generated Quail package is saved."
(let ((tail quail-misc-package-ext-info)
coding-system-for-write
slot
- name title dicfile coding quailfile converter copyright
- dicbuf)
+ name title dicfile coding quailfile converter copyright)
(while tail
(setq slot (car tail)
dicfile (nth 2 slot)
@@ -1148,8 +1143,7 @@ the generated Quail package is saved."
copyright (nth 6 slot))
(message "Converting %s to %s..." dicfile quailfile)
;; Explicitly set eol format to `unix'.
- (setq coding-system-for-write
- (coding-system-change-eol-conversion coding 'unix))
+ (setq coding-system-for-write 'utf-8-unix)
(with-temp-file (expand-file-name quailfile dirname)
(insert (format-message ";; Quail package `%s'\n" name))
(insert (format-message
@@ -1174,11 +1168,10 @@ the generated Quail package is saved."
(insert-file-contents filename)
(let ((dicbuf (current-buffer)))
(with-current-buffer dstbuf
- (funcall converter dicbuf name title)))))
+ (funcall converter dicbuf)))))
(insert ";; Local Variables:\n"
";; version-control: never\n"
";; no-update-autoloads: t\n"
- (format ";; coding: %s\n" coding)
";; End:\n\n"
";;; " quailfile " ends here\n"))
(message "Converting %s to %s...done" dicfile quailfile))
@@ -1210,6 +1203,38 @@ to store generated Quail packages."
(miscdic-convert filename dir))))
(kill-emacs 0))
+(defun pinyin-convert ()
+ "Convert text file pinyin.map into an elisp library.
+The library is named pinyin.el, and contains the constant
+`pinyin-character-map'."
+ (let ((src-file (car command-line-args-left))
+ (dst-file (cadr command-line-args-left))
+ (coding-system-for-write 'utf-8-unix))
+ (with-temp-file dst-file
+ (insert ";; This file is automatically generated from pinyin.map,\
+ by the\n;; function pinyin-convert.\n\n")
+ (insert "(defconst pinyin-character-map\n'(")
+ (let ((pos (point)))
+ (insert-file-contents src-file)
+ (goto-char pos)
+ (re-search-forward "^[a-z]")
+ (beginning-of-line)
+ (delete-region pos (point))
+ (while (not (eobp))
+ (insert "(\"")
+ (skip-chars-forward "a-z")
+ (insert "\" . \"")
+ (delete-char 1)
+ (end-of-line)
+ (while (= (preceding-char) ?\r)
+ (delete-char -1))
+ (insert "\")")
+ (forward-line 1)))
+ (insert ")\n\"An alist holding correspondences between pinyin syllables\
+ and\nChinese characters.\")\n\n")
+ (insert "(provide 'pinyin)\n"))
+ (kill-emacs 0)))
+
;; Prevent "Local Variables" above confusing Emacs.
diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el
index 9d55470d948..6f1e770c09c 100644
--- a/lisp/international/ucs-normalize.el
+++ b/lisp/international/ucs-normalize.el
@@ -109,7 +109,9 @@
(defconst ucs-normalize-version "1.2")
-(eval-when-compile (require 'cl-lib))
+(eval-when-compile
+ (require 'cl-lib)
+ (require 'regexp-opt))
(declare-function nfd "ucs-normalize" (char))