summaryrefslogtreecommitdiff
path: root/lisp/gnus/rfc2047.el
diff options
context:
space:
mode:
authorMiles Bader <miles@gnu.org>2007-12-06 09:51:45 +0000
committerMiles Bader <miles@gnu.org>2007-12-06 09:51:45 +0000
commit0bd508417142ff377f34aec8dcec9438d9175c2c (patch)
tree4d60fe09e5cebf7d79766b11e9cda8cc1c9dbb9b /lisp/gnus/rfc2047.el
parent98fe991da804a42f53f6a5e84cd5eab18a82e181 (diff)
parent9fb1ba8090da3528de56158a79bd3527d31c7f2f (diff)
downloademacs-0bd508417142ff377f34aec8dcec9438d9175c2c.tar.gz
Merge from emacs--devo--0
Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-294
Diffstat (limited to 'lisp/gnus/rfc2047.el')
-rw-r--r--lisp/gnus/rfc2047.el71
1 files changed, 55 insertions, 16 deletions
diff --git a/lisp/gnus/rfc2047.el b/lisp/gnus/rfc2047.el
index b789061853f..aa9999a7722 100644
--- a/lisp/gnus/rfc2047.el
+++ b/lisp/gnus/rfc2047.el
@@ -30,8 +30,8 @@
;;; Code:
(eval-when-compile
- (require 'cl)
- (defvar message-posting-charset))
+ (require 'cl))
+(defvar message-posting-charset)
(require 'qp)
(require 'mm-util)
@@ -101,6 +101,40 @@ quoted-printable and base64 respectively.")
(defvar rfc2047-encode-encoded-words t
"Whether encoded words should be encoded again.")
+(defvar rfc2047-allow-irregular-q-encoded-words t
+ "*Whether to decode irregular Q-encoded words.")
+
+(eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'.
+ (defconst rfc2047-encoded-word-regexp
+ "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
+\\(B\\?[+/0-9A-Za-z]*=*\
+\\|Q\\?[ ->@-~]*\
+\\)\\?="
+ "Regexp that matches encoded word."
+ ;; The patterns for the B encoding and the Q encoding, i.e. the ones
+ ;; beginning with "B" and "Q" respectively, are restricted into only
+ ;; the characters that those encodings may generally use.
+ )
+ (defconst rfc2047-encoded-word-regexp-loose
+ "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
+\\(B\\?[+/0-9A-Za-z]*=*\
+\\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\
+\\)\\?="
+ "Regexp that matches encoded word allowing loose Q encoding."
+ ;; The pattern for the Q encoding, i.e. the one beginning with "Q",
+ ;; is similar to:
+ ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*"
+ ;; <--------1-------><----------2,3----------><--4--><-5->
+ ;; They mean:
+ ;; 1. After "Q?", allow "?"s that follow a character other than "=".
+ ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator.
+ ;; 3. In the middle of an encoded word, allow "?"s that follow a
+ ;; character other than "=".
+ ;; 4. Allow any characters other than "?" in the middle of an
+ ;; encoded word.
+ ;; 5. At the end, allow "?"s.
+ ))
+
;;;
;;; Functions for encoding RFC2047 messages
;;;
@@ -287,7 +321,6 @@ Should be called narrowed to the head of the message."
;; Fixme: This, and the require below may not be the Right Thing, but
;; should be safe just before release. -- fx 2001-02-08
-(eval-when-compile (defvar message-posting-charset))
(defun rfc2047-encodable-p ()
"Return non-nil if any characters in current buffer need encoding in headers.
@@ -298,7 +331,7 @@ The buffer may be narrowed."
(goto-char (point-min))
(or (and rfc2047-encode-encoded-words
(prog1
- (search-forward "=?" nil t)
+ (re-search-forward rfc2047-encoded-word-regexp nil t)
(goto-char (point-min))))
(and charsets
(not (equal charsets (list (car message-posting-charset))))))))
@@ -533,10 +566,19 @@ By default, the string is treated as containing addresses (see
(rfc2047-encode-region (point-min) (point-max))
(buffer-string)))
+;; From RFC 2047:
+;; 2. Syntax of encoded-words
+;; [...]
+;; While there is no limit to the length of a multiple-line header
+;; field, each line of a header field that contains one or more
+;; 'encoded-word's is limited to 76 characters.
+;;
+;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it.
(defvar rfc2047-encode-max-chars 76
"Maximum characters of each header line that contain encoded-words.
-If it is nil, encoded-words will not be folded. Too small value may
-cause an error. Don't change this for no particular reason.")
+According to RFC 2047, it is 76. If it is nil, encoded-words
+will not be folded. Too small value may cause an error. You
+should not change this value.")
(defun rfc2047-encode-1 (column string cs encoder start crest tail
&optional eword)
@@ -827,11 +869,6 @@ it, put the following line in your ~/.gnus.el file:
;;; Functions for decoding RFC2047 messages
;;;
-(eval-and-compile
- (defconst rfc2047-encoded-word-regexp
- "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\
-\\?\\(B\\|Q\\)\\?\\([!->@-~ ]*\\)\\?="))
-
(defvar rfc2047-quote-decoded-words-containing-tspecials nil
"If non-nil, quote decoded words containing special characters.")
@@ -950,10 +987,12 @@ If ADDRESS-MIME is non-nil, strip backslashes which precede characters
other than `\"' and `\\' in quoted strings."
(interactive "r")
(let ((case-fold-search t)
- (eword-regexp (eval-when-compile
- ;; Ignore whitespace between encoded-words.
- (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp
- "\\)")))
+ (eword-regexp
+ (if rfc2047-allow-irregular-q-encoded-words
+ (eval-when-compile
+ (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose "\\)"))
+ (eval-when-compile
+ (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp "\\)"))))
b e match words)
(save-excursion
(save-restriction
@@ -969,7 +1008,7 @@ other than `\"' and `\\' in quoted strings."
(while match
(push (list (match-string 2) ;; charset
(char-after (match-beginning 3)) ;; encoding
- (match-string 4) ;; encoded-text
+ (substring (match-string 3) 2) ;; encoded-text
(match-string 1)) ;; encoded-word
words)
;; Look for the subsequent encoded-words.