summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKatsumi Yamaoka <yamaoka@jpl.org>2017-02-17 09:52:09 +0000
committerKatsumi Yamaoka <yamaoka@jpl.org>2017-02-17 09:52:09 +0000
commit79f017d5c3019f8bc2a5014beda28bb3b829a8e3 (patch)
tree68602a635d4213a17e846db41b3f96075e2c9421
parent78f869687e86d4a9f91003dbbbbacde2e2741487 (diff)
downloademacs-79f017d5c3019f8bc2a5014beda28bb3b829a8e3.tar.gz
mm-shr: Prefer charset specified in html meta tag
* lisp/gnus/mm-decode.el (mm-shr): Prefer charset specified in html meta tag than mail-parse-charset in the case there is no charset spec in MIME header.
-rw-r--r--lisp/gnus/mm-decode.el68
1 files changed, 36 insertions, 32 deletions
diff --git a/lisp/gnus/mm-decode.el b/lisp/gnus/mm-decode.el
index 989d4b8ea17..6b539399596 100644
--- a/lisp/gnus/mm-decode.el
+++ b/lisp/gnus/mm-decode.el
@@ -1793,40 +1793,44 @@ If RECURSIVE, search recursively."
(buffer-string))))))
(shr-inhibit-images mm-html-inhibit-images)
(shr-blocked-images mm-html-blocked-images)
- charset coding char)
- (unless handle
- (setq handle (mm-dissect-buffer t)))
- (and (setq charset
- (or (mail-content-type-get (mm-handle-type handle) 'charset)
- mail-parse-charset))
- (setq coding (mm-charset-to-coding-system charset nil t))
- (eq coding 'ascii)
- (setq coding nil))
+ charset coding char document)
+ (mm-with-part (or handle (setq handle (mm-dissect-buffer t)))
+ (setq case-fold-search t)
+ (setq charset
+ (or (mail-content-type-get (mm-handle-type handle) 'charset)
+ (progn
+ (goto-char (point-min))
+ (and (re-search-forward "\
+<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']?\
+text/\\(\\sw+\\)\\(?:;\\s-*charset=\\([^\t\n\r \"'>]+\\)\\)?[^>]*>" nil t)
+ (setq coding
+ (mm-charset-to-coding-system (match-string 2)
+ nil t))
+ (string-match "\\`html\\'" (match-string 1))))
+ mail-parse-charset))
+ (when (or coding
+ (setq coding (mm-charset-to-coding-system charset nil t)))
+ (insert (prog1
+ (decode-coding-string (buffer-string) coding)
+ (erase-buffer)
+ (set-buffer-multibyte t))))
+ (goto-char (point-min))
+ (while (re-search-forward
+ "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
+ (when (setq char
+ (cdr (assq (if (match-beginning 1)
+ (string-to-number (match-string 1) 16)
+ (string-to-number (match-string 2)))
+ mm-extra-numeric-entities)))
+ (replace-match (char-to-string char))))
+ ;; Remove "soft hyphens".
+ (goto-char (point-min))
+ (while (search-forward "­" nil t)
+ (replace-match "" t t))
+ (setq document (libxml-parse-html-region (point-min) (point-max))))
(save-restriction
(narrow-to-region (point) (point))
- (shr-insert-document
- (mm-with-part handle
- (insert (prog1
- (if coding
- (decode-coding-string (buffer-string) coding)
- (buffer-string))
- (erase-buffer)
- (mm-enable-multibyte)))
- (goto-char (point-min))
- (setq case-fold-search t)
- (while (re-search-forward
- "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
- (when (setq char
- (cdr (assq (if (match-beginning 1)
- (string-to-number (match-string 1) 16)
- (string-to-number (match-string 2)))
- mm-extra-numeric-entities)))
- (replace-match (char-to-string char))))
- ;; Remove "soft hyphens".
- (goto-char (point-min))
- (while (search-forward "­" nil t)
- (replace-match "" t t))
- (libxml-parse-html-region (point-min) (point-max))))
+ (shr-insert-document document)
(unless (bobp)
(insert "\n"))
(mm-convert-shr-links)