summaryrefslogtreecommitdiff
path: root/lisp
diff options
context:
space:
mode:
authorReiner Steib <Reiner.Steib@gmx.de>2006-04-17 19:37:15 +0000
committerReiner Steib <Reiner.Steib@gmx.de>2006-04-17 19:37:15 +0000
commitbd29ba2089a66563ec538a3399d038007de6136f (patch)
treebd35af7325bd4181398ec3aedc2391a3c59e1014 /lisp
parent18c06a99aa65121a4c09138403a7b494b7d41d37 (diff)
downloademacs-bd29ba2089a66563ec538a3399d038007de6136f.tar.gz
* mm-util.el (mm-charset-synonym-alist): Improve doc string.
(mm-charset-override-alist): New variable. (mm-charset-to-coding-system): Use it. (mm-codepage-setup): New helper function. (mm-charset-eval-alist): New variable. (mm-charset-to-coding-system): Use mm-charset-eval-alist. Warn about unknown charsets. Add allow-override. Use `mm-charset-override-alist' only when decoding. (mm-detect-mime-charset-region): Use :mime-charset. * mm-bodies.el (mm-decode-body, mm-decode-string): Call `mm-charset-to-coding-system' with allow-override argument.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/gnus/ChangeLog15
-rw-r--r--lisp/gnus/mm-bodies.el13
-rw-r--r--lisp/gnus/mm-util.el121
3 files changed, 142 insertions, 7 deletions
diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog
index 50a7262e1a3..09dbe9e0027 100644
--- a/lisp/gnus/ChangeLog
+++ b/lisp/gnus/ChangeLog
@@ -1,5 +1,20 @@
2006-04-17 Reiner Steib <Reiner.Steib@gmx.de>
+ [ Merge from Gnus trunk. ]
+
+ * mm-util.el (mm-charset-synonym-alist): Improve doc string.
+ (mm-charset-override-alist): New variable.
+ (mm-charset-to-coding-system): Use it.
+ (mm-codepage-setup): New helper function.
+ (mm-charset-eval-alist): New variable.
+ (mm-charset-to-coding-system): Use mm-charset-eval-alist. Warn
+ about unknown charsets. Add allow-override. Use
+ `mm-charset-override-alist' only when decoding.
+ (mm-detect-mime-charset-region): Use :mime-charset.
+
+ * mm-bodies.el (mm-decode-body, mm-decode-string): Call
+ `mm-charset-to-coding-system' with allow-override argument.
+
* message.el (message-tool-bar-zap-list, message-tool-bar)
(message-tool-bar-gnome, message-tool-bar-retro): New variables.
(message-tool-bar-local-item-from-menu): Remove.
diff --git a/lisp/gnus/mm-bodies.el b/lisp/gnus/mm-bodies.el
index c58eb6bd41d..a10b8b28399 100644
--- a/lisp/gnus/mm-bodies.el
+++ b/lisp/gnus/mm-bodies.el
@@ -56,6 +56,8 @@
;; known to break servers.
;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
;; so this can't happen :-/.
+ ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
+ ;; markup. - jh.
(utf-16 . base64)
(utf-16be . base64)
(utf-16le . base64))
@@ -251,7 +253,10 @@ decoding. If it is nil, default to `mail-parse-charset'."
(mm-decode-content-transfer-encoding encoding type))
(when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
(not (eq charset 'gnus-decoded)))
- (let ((coding-system (mm-charset-to-coding-system charset)))
+ (let ((coding-system (mm-charset-to-coding-system
+ ;; Allow overwrite using
+ ;; `mm-charset-override-alist'.
+ charset nil t)))
(if (and (not coding-system)
(listp mail-parse-ignored-charsets)
(memq 'gnus-unknown mail-parse-ignored-charsets))
@@ -282,7 +287,11 @@ decoding. If it is nil, default to `mail-parse-charset'."
(setq charset mail-parse-charset))
(or
(when (featurep 'mule)
- (let ((coding-system (mm-charset-to-coding-system charset)))
+ (let ((coding-system (mm-charset-to-coding-system
+ charset
+ ;; Allow overwrite using
+ ;; `mm-charset-override-alist'.
+ nil t)))
(if (and (not coding-system)
(listp mail-parse-ignored-charsets)
(memq 'gnus-unknown mail-parse-ignored-charsets))
diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el
index a8c1f3a87a1..e16750cfcf6 100644
--- a/lisp/gnus/mm-util.el
+++ b/lisp/gnus/mm-util.el
@@ -177,6 +177,29 @@ system object in XEmacs."
;; no-MULE XEmacs:
(car (memq cs (mm-get-coding-system-list))))))
+(defun mm-codepage-setup (number &optional alias)
+ "Create a coding system cpNUMBER.
+The coding system is created using `codepage-setup'. If ALIAS is
+non-nil, an alias is created and added to
+`mm-charset-synonym-alist'. If ALIAS is a string, it's used as
+the alias. Else windows-NUMBER is used."
+ (interactive
+ (let ((completion-ignore-case t)
+ (candidates (cp-supported-codepages)))
+ (list (completing-read "Setup DOS Codepage: (default 437) " candidates
+ nil t nil nil "437"))))
+ (when alias
+ (setq alias (if (stringp alias)
+ (intern alias)
+ (intern (format "windows-%s" number)))))
+ (let* ((cp (intern (format "cp%s" number))))
+ (unless (mm-coding-system-p cp)
+ (codepage-setup number))
+ (when (and alias
+ ;; Don't add alias if setup of cp failed.
+ (mm-coding-system-p cp))
+ (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
+
(defvar mm-charset-synonym-alist
`(
;; Not in XEmacs, but it's not a proper MIME charset anyhow.
@@ -200,8 +223,61 @@ system object in XEmacs."
,@(if (and (not (mm-coding-system-p 'windows-1250))
(mm-coding-system-p 'cp1250))
'((windows-1250 . cp1250)))
+ ;; A Microsoft misunderstanding.
+ ,@(if (and (not (mm-coding-system-p 'unicode))
+ (mm-coding-system-p 'utf-16-le))
+ '((unicode . utf-16-le)))
+ ;; A Microsoft misunderstanding.
+ ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
+ (if (mm-coding-system-p 'cp949)
+ '((ks_c_5601-1987 . cp949))
+ '((ks_c_5601-1987 . euc-kr))))
)
- "A mapping from invalid charset names to the real charset names.")
+ "A mapping from unknown or invalid charset names to the real charset names.")
+
+(defcustom mm-charset-override-alist
+ `((iso-8859-1 . windows-1252))
+ "A mapping from undesired charset names to their replacement.
+
+You may add pairs like (iso-8859-1 . windows-1252) here,
+i.e. treat iso-8859-1 as windows-1252. windows-1252 is a
+superset of iso-8859-1."
+ :type '(list (set :inline t
+ (const (iso-8859-1 . windows-1252))
+ (const (undecided . windows-1252)))
+ (repeat :inline t
+ :tag "Other options"
+ (cons (symbol :tag "From charset")
+ (symbol :tag "To charset"))))
+ :version "23.0" ;; No Gnus
+ :group 'mime)
+
+(defcustom mm-charset-eval-alist
+ (if (featurep 'xemacs)
+ nil ;; I don't know what would be useful for XEmacs.
+ '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for
+ ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing).
+ (windows-1250 . (mm-codepage-setup 1250 t))
+ (windows-1251 . (mm-codepage-setup 1251 t))
+ (windows-1253 . (mm-codepage-setup 1253 t))
+ (windows-1257 . (mm-codepage-setup 1257 t))))
+ "An alist of (CHARSET . FORM) pairs.
+If an article is encoded in an unknown CHARSET, FORM is
+evaluated. This allows to load additional libraries providing
+charsets on demand. If supported by your Emacs version, you
+could use `autoload-coding-system' here."
+ :version "23.0" ;; No Gnus
+ :type '(list (set :inline t
+ (const (windows-1250 . (mm-codepage-setup 1250 t)))
+ (const (windows-1251 . (mm-codepage-setup 1251 t)))
+ (const (windows-1253 . (mm-codepage-setup 1253 t)))
+ (const (windows-1257 . (mm-codepage-setup 1257 t)))
+ (const (cp850 . (mm-codepage-setup 850 nil))))
+ (repeat :inline t
+ :tag "Other options"
+ (cons (symbol :tag "charset")
+ (symbol :tag "form"))))
+ :group 'mime)
(defvar mm-binary-coding-system
(cond
@@ -426,11 +502,17 @@ mail with multiple parts is preferred to sending a Unicode one.")
(pop alist))
out)))
-(defun mm-charset-to-coding-system (charset &optional lbt)
+(defun mm-charset-to-coding-system (charset &optional lbt
+ allow-override)
"Return coding-system corresponding to CHARSET.
CHARSET is a symbol naming a MIME charset.
If optional argument LBT (`unix', `dos' or `mac') is specified, it is
-used as the line break code type of the coding system."
+used as the line break code type of the coding system.
+
+If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to
+map undesired charset names to their replacement. This should
+only be used for decoding, not for encoding."
+ ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'.
(when (stringp charset)
(setq charset (intern (downcase charset))))
(when lbt
@@ -442,6 +524,11 @@ used as the line break code type of the coding system."
((or (null (mm-get-coding-system-list))
(not (fboundp 'coding-system-get)))
charset)
+ ;; Check override list quite early. Should only used for decoding, not for
+ ;; encoding!
+ ((and allow-override
+ (let ((cs (cdr (assq charset mm-charset-override-alist))))
+ (and cs (mm-coding-system-p cs) cs))))
;; ascii
((eq charset 'us-ascii)
'ascii)
@@ -454,9 +541,27 @@ used as the line break code type of the coding system."
;;; (eq charset (coding-system-get charset 'mime-charset))
)
charset)
+ ;; Eval expressions from `mm-charset-eval-alist'
+ ((let* ((el (assq charset mm-charset-eval-alist))
+ (cs (car el))
+ (form (cdr el)))
+ (and cs
+ form
+ (prog2
+ ;; Avoid errors...
+ (condition-case nil (eval form) (error nil))
+ ;; (message "Failed to eval `%s'" form))
+ (mm-coding-system-p cs)
+ (message "Added charset `%s' via `mm-charset-eval-alist'" cs))
+ cs)))
;; Translate invalid charsets.
((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
- (and cs (mm-coding-system-p cs) cs)))
+ (and cs
+ (mm-coding-system-p cs)
+ ;; (message
+ ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'"
+ ;; cs charset)
+ cs)))
;; Last resort: search the coding system list for entries which
;; have the right mime-charset in case the canonical name isn't
;; defined (though it should be).
@@ -468,6 +573,11 @@ used as the line break code type of the coding system."
(eq charset (or (coding-system-get c :mime-charset)
(coding-system-get c 'mime-charset))))
(setq cs c)))
+ (unless cs
+ ;; Warn the user about unknown charset:
+ (if (fboundp 'gnus-message)
+ (gnus-message 7 "Unknown charset: %s" charset)
+ (message "Unknown charset: %s" charset)))
cs))))
(defsubst mm-replace-chars-in-string (string from to)
@@ -1070,7 +1180,8 @@ If SUFFIX is non-nil, add that at the end of the file name."
(defun mm-detect-mime-charset-region (start end)
"Detect MIME charset of the text in the region between START and END."
(let ((cs (mm-detect-coding-region start end)))
- (coding-system-get cs 'mime-charset)))
+ (or (coding-system-get cs :mime-charset)
+ (coding-system-get cs 'mime-charset))))
(defun mm-detect-mime-charset-region (start end)
"Detect MIME charset of the text in the region between START and END."
(let ((cs (mm-detect-coding-region start end)))