summaryrefslogtreecommitdiff
path: root/lisp
diff options
context:
space:
mode:
authorKenichi Handa <handa@etlken>2010-08-04 17:06:52 +0900
committerKenichi Handa <handa@etlken>2010-08-04 17:06:52 +0900
commit6b4d96c2f04e5a08c4f9fff144743ff16c151dae (patch)
tree29def28b96201bf62115bbbadad44b5cf0ffbfe2 /lisp
parent4ce5a4ccd4cca6dcdc6d8bafa8cfaea1986e7e6e (diff)
downloademacs-6b4d96c2f04e5a08c4f9fff144743ff16c151dae.tar.gz
Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/ChangeLog17
-rw-r--r--lisp/international/mule-conf.el14
-rw-r--r--lisp/international/mule.el147
-rw-r--r--lisp/language/cyrillic.el7
4 files changed, 104 insertions, 81 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 670f07c2683..775ddcdc2e7 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,20 @@
+2010-08-04 Kenichi Handa <handa@m17n.org>
+
+ * language/cyrillic.el: Don't add "microsoft-cp1251" to
+ ctext-non-standard-encodings-alist here.
+
+ * international/mule.el (ctext-non-standard-encodings-alist): Add
+ "koi8-r" and "microsoft-cp1251".
+ (ctext-standard-encodings): New variable.
+ (ctext-non-standard-encodings-table): List only elements for
+ non-standard encodings.
+ (ctext-pre-write-conversion): Adjusted for the above change.
+ Check ctext-standard-encodings.
+
+ * international/mule-conf.el (compound-text): Doc fix.
+ (ctext-no-compositions): Doc fix.
+ (compound-text-with-extensions): Doc fix.
+
2010-07-23 Juanma Barranquero <lekktu@gmail.com>
* help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494).
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el
index f53b69eed8b..9ee8d22463a 100644
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -1410,9 +1410,10 @@ is treated as a character."
:flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
(define-coding-system 'compound-text
- "Compound text based generic encoding for decoding unknown messages.
-
-This coding system does not support extended segments of CTEXT."
+ "Compound text based generic encoding.
+This coding system is an extension of X's \"Compound Text Encoding\".
+It encodes many characters using the normal ISO-2022 designation sequences,
+but it doesn't support extended segments of CTEXT."
:coding-type 'iso-2022
:mnemonic ?x
:charset-list 'iso-2022
@@ -1432,7 +1433,7 @@ This coding system does not support extended segments of CTEXT."
;; not have a mime-charset property, to prevent it from showing up
;; close to the beginning of coding systems ordered by priority.
(define-coding-system 'ctext-no-compositions
- "Compound text based generic encoding for decoding unknown messages.
+ "Compound text based generic encoding.
Like `compound-text', but does not produce escape sequences for compositions."
:coding-type 'iso-2022
@@ -1445,8 +1446,9 @@ Like `compound-text', but does not produce escape sequences for compositions."
(define-coding-system 'compound-text-with-extensions
"Compound text encoding with ICCCM Extended Segment extensions.
-See the variable `ctext-non-standard-encodings-alist' for the
-detail about how extended segments are handled.
+See the variables `ctext-standard-encodings' and
+`ctext-non-standard-encodings-alist' for the detail about how
+extended segments are handled.
This coding system should be used only for X selections. It is inappropriate
for decoding and encoding files, process I/O, etc."
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 7e7e55728c8..e030acbef02 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1408,7 +1408,9 @@ This function is provided for backward compatibility."
'(("big5-0" big5 2 big5)
("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
- ("gbk-0" gbk 2 chinese-gbk)))
+ ("gbk-0" gbk 2 chinese-gbk)
+ ("koi8-r" koi8-r 1 koi8-r)
+ ("microsoft-cp1251" windows-1251 1 windows-1251)))
"Alist of non-standard encoding names vs the corresponding usages in CTEXT.
It controls how extended segments of a compound text are handled
@@ -1497,6 +1499,20 @@ Each element must be one of the names listed in the variable
(goto-char (point-min))
(- (point-max) (point)))))
+(defvar ctext-standard-encodings
+ '(ascii latin-jisx0201 katakana-jisx0201
+ latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
+ greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
+ latin-iso8859-9
+ chinese-gb2312 japanese-jisx0208 korean-ksc5601)
+ "List of approved standard encodings (i.e. charsets) of X's Compound Text.
+Coding-system `compound-text-with-extensions' encodes a character
+belonging to any of those charsets using the normal ISO2022
+designation sequence unless the current language environment or
+the variable `ctext-non-standard-encodings' decide to use an extended
+segment of CTEXT for that character. See also the documentation
+of `ctext-non-standard-encodings-alist'.")
+
;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
;; `ctext-non-standard-encodings' and a list specified by the key
;; `ctext-non-standard-encodings' for the currrent language
@@ -1508,77 +1524,74 @@ Each element must be one of the names listed in the variable
;; is encoded using UTF-8 encoding extention.
(defun ctext-non-standard-encodings-table ()
- (let (table)
- ;; Setup charsets specified by the key
- ;; `ctext-non-standard-encodings' for the current language
- ;; environment and in `ctext-non-standard-encodings'.
- (dolist (encoding (append
- (get-language-info current-language-environment
- 'ctext-non-standard-encodings)
- ctext-non-standard-encodings))
- (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+ (let* ((table (append ctext-non-standard-encodings
+ (copy-sequence
+ (get-language-info current-language-environment
+ 'ctext-non-standard-encodings))))
+ (tail table)
+ elt)
+ (while tail
+ (setq elt (car tail))
+ (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
(charset (nth 3 slot)))
(if (charsetp charset)
- (push (cons charset slot) table)
- (dolist (cs charset)
- (push (cons cs slot) table)))))
-
- ;; Next prepend charsets for ISO2022 designation sequence.
- (dolist (charset charset-list)
- (let ((final (plist-get (charset-plist charset) :iso-final-char)))
- (if (and (integerp final)
- (>= final #x40) (<= final #x7e)
- ;; Exclude ascii and chinese-cns11643-X.
- (not (eq charset 'ascii))
- (not (string-match "cns11643" (symbol-name charset))))
- (push (cons charset nil) table))))
-
- ;; Returned reversed list so that the charsets specified by the
- ;; key `ctext-non-standard-encodings' for the current language
- ;; have the highest priority.
- (nreverse table)))
+ (setcar tail (cons charset slot))
+ (setcar tail (cons (car charset) slot))
+ (dolist (cs (cdr charset))
+ (setcdr tail
+ (cons (cons (car cs) slot) (cdr tail)))
+ (setq tail (cdr tail))))
+ (setq tail (cdr tail))))
+ table))
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
-If FROM is a string, or if the current buffer is not the one set up for us
-by `encode-coding-string', generate a new temp buffer, insert the text,
-and convert it in the temporary buffer. Otherwise, convert in-place."
+If FROM is a string, generate a new temp buffer, insert the text,
+and convert it in the temporary buffer. Otherwise, convert
+in-place."
(save-match-data
;; Setup a working buffer if necessary.
(when (stringp from)
(set-buffer (generate-new-buffer " *temp"))
(set-buffer-multibyte (multibyte-string-p from))
- (insert from))
-
- ;; Now we can encode the whole buffer.
- (let ((encoding-table (ctext-non-standard-encodings-table))
- last-coding-system-used
- last-pos last-encoding-info
- encoding-info end-pos ch)
- (goto-char (setq last-pos (point-min)))
- (setq end-pos (point-marker))
- (while (re-search-forward "[^\000-\177]+" nil t)
- ;; Found a sequence of non-ASCII characters.
- (setq last-pos (match-beginning 0)
- ch (char-after last-pos)
- last-encoding-info (catch 'tag
- (dolist (elt encoding-table)
- (if (encode-char ch (car elt))
- (throw 'tag (cdr elt))))
- 'utf-8))
- (set-marker end-pos (match-end 0))
- (goto-char (1+ last-pos))
- (catch 'tag
- (while t
- (setq encoding-info
- (if (< (point) end-pos)
- (catch 'tag
- (setq ch (following-char))
- (dolist (elt encoding-table)
- (if (encode-char ch (car elt))
- (throw 'tag (cdr elt))))
- 'utf-8)))
+ (insert from)
+ (setq from 1 to (point-max)))
+ (save-restriction
+ (narrow-to-region from to)
+ (let ((encoding-table (ctext-non-standard-encodings-table))
+ (charset-list ctext-standard-encodings)
+ last-coding-system-used
+ last-pos last-encoding-info
+ encoding-info end-pos ch charset)
+ (dolist (elt encoding-table)
+ (push (car elt) charset-list))
+ (goto-char (setq last-pos from))
+ (setq end-pos (point-marker))
+ (while (re-search-forward "[^\000-\177]+" nil t)
+ ;; Found a sequence of non-ASCII characters.
+ (setq last-pos (match-beginning 0)
+ ch (char-after last-pos)
+ charset (char-charset ch charset-list)
+ last-encoding-info
+ (if charset
+ (or (cdr (assq charset encoding-table))
+ charset)
+ 'utf-8))
+ (set-marker end-pos (match-end 0))
+ (goto-char (1+ last-pos))
+ (while (marker-position end-pos)
+ (if (< (point) end-pos)
+ (progn
+ (setq charset (char-charset (following-char) charset-list)
+ encoding-info
+ (if charset
+ (or (cdr (assq charset encoding-table))
+ charset)
+ 'utf-8))
+ (forward-char 1))
+ (setq encoding-info nil)
+ (set-marker end-pos nil))
(unless (eq last-encoding-info encoding-info)
(cond ((consp last-encoding-info)
;; Encode the previous range using an extended
@@ -1609,14 +1622,12 @@ and convert it in the temporary buffer. Otherwise, convert in-place."
(save-excursion
(goto-char last-pos)
(insert "\e%G"))
- (insert "\e%@")))
+ (insert "\e%@"))
+ (t
+ (put-text-property last-pos (point) 'charset charset)))
(setq last-pos (point)
- last-encoding-info encoding-info))
- (if (< (point) end-pos)
- (forward-char 1)
- (throw 'tag nil)))))
- (set-marker end-pos nil)
- (goto-char (point-min))))
+ last-encoding-info encoding-info))))
+ (goto-char (point-min)))))
;; Must return nil, as build_annotations_2 expects that.
nil)
diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el
index 7d2f082579f..b293ad1ff0b 100644
--- a/lisp/language/cyrillic.el
+++ b/lisp/language/cyrillic.el
@@ -239,13 +239,6 @@ Support for Russian using koi8-r and the russian-computer input method.")
(documentation . "Support for Tajik using KOI8-T."))
'("Cyrillic"))
-(let ((elt `("microsoft-cp1251" windows-1251 1
- ,(get 'encode-windows-1251 'translation-table)))
- (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
- (if slot
- (setcdr slot (cdr elt))
- (push elt ctext-non-standard-encodings-alist)))
-
(set-language-info-alist
"Bulgarian" `((coding-system windows-1251)
(coding-priority windows-1251)