summaryrefslogtreecommitdiff
path: root/lisp/gnus/qp.el
blob: 6c48f0fc9a45ace1c4d2965c873062fc191b555b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
;;; qp.el --- Quoted-Printable functions

;; Copyright (C) 1998-2015 Free Software Foundation, Inc.

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: mail, extensions

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; Functions for encoding and decoding quoted-printable text as
;; defined in RFC 2045.

;;; Code:

(require 'mm-util)
(defvar mm-use-ultra-safe-encoding)

;;;###autoload
(defun quoted-printable-decode-region (from to &optional coding-system)
  "Decode quoted-printable in the region between FROM and TO, per RFC 2045.
If CODING-SYSTEM is non-nil, decode bytes into characters with that
coding-system.

Interactively, you can supply the CODING-SYSTEM argument
with \\[universal-coding-system-argument].

The CODING-SYSTEM argument is a historical hangover and is deprecated.
QP encodes raw bytes and should be decoded into raw bytes.  Decoding
them into characters should be done separately."
  (interactive
   ;; Let the user determine the coding system with "C-x RET c".
   (list (region-beginning) (region-end) coding-system-for-read))
  (unless (mm-coding-system-p coding-system) ; e.g. `ascii' from Gnus
    (setq coding-system nil))
  (save-excursion
    (save-restriction
      ;; RFC 2045:  ``An "=" followed by two hexadecimal digits, one
      ;; or both of which are lowercase letters in "abcdef", is
      ;; formally illegal. A robust implementation might choose to
      ;; recognize them as the corresponding uppercase letters.''
      (let ((case-fold-search t))
	(narrow-to-region from to)
	;; Do this in case we're called from Gnus, say, in a buffer
	;; which already contains non-ASCII characters which would
	;; then get doubly-decoded below.
	(if coding-system
	    (mm-encode-coding-region (point-min) (point-max) coding-system))
	(goto-char (point-min))
	(while (and (skip-chars-forward "^=")
		    (not (eobp)))
	  (cond ((eq (char-after (1+ (point))) ?\n)
		 (delete-char 2))
		((looking-at "\\(=[0-9A-F][0-9A-F]\\)+")
		 ;; Decode this sequence at once; i.e. by a single
		 ;; deletion and insertion.
		 (let* ((n (/ (- (match-end 0) (point)) 3))
			(str (make-string n 0)))
		   (dotimes (i n)
                     (let ((n1 (char-after (1+ (point))))
                           (n2 (char-after (+ 2 (point)))))
                       (aset str i
                             (+ (* 16 (- n1 (if (<= n1 ?9) ?0
                                              (if (<= n1 ?F) (- ?A 10)
                                                (- ?a 10)))))
                                (- n2 (if (<= n2 ?9) ?0
                                        (if (<= n2 ?F) (- ?A 10)
                                          (- ?a 10)))))))
		     (forward-char 3))
		   (delete-region (match-beginning 0) (match-end 0))
		   (insert str)))
		(t
		 (message "Malformed quoted-printable text")
		 (forward-char)))))
      (if coding-system
	  (mm-decode-coding-region (point-min) (point-max) coding-system)))))

(defun quoted-printable-decode-string (string &optional coding-system)
  "Decode the quoted-printable encoded STRING and return the result.
If CODING-SYSTEM is non-nil, decode the string with coding-system.
Use of CODING-SYSTEM is deprecated; this function should deal with
raw bytes, and coding conversion should be done separately."
  (mm-with-unibyte-buffer
    (insert string)
    (quoted-printable-decode-region (point-min) (point-max) coding-system)
    (buffer-string)))

(defun quoted-printable-encode-region (from to &optional fold class)
  "Quoted-printable encode the region between FROM and TO per RFC 2045.

If FOLD, fold long lines at 76 characters (as required by the RFC).
If CLASS is non-nil, translate the characters not matched by that
regexp class, which is in the form expected by `skip-chars-forward'.
You should probably avoid non-ASCII characters in this arg.

If `mm-use-ultra-safe-encoding' is set, fold lines unconditionally and
encode lines starting with \"From\"."
  (interactive "r")
  (unless class
    ;; Avoid using 8bit characters. = is \075.
    ;; Equivalent to "^\000-\007\013\015-\037\200-\377="
    (setq class "\010-\012\014\040-\074\076-\177"))
  (save-excursion
    (goto-char from)
    (if (re-search-forward (mm-string-to-multibyte "[^\x0-\x7f\x80-\xff]")
			   to t)
	(error "Multibyte character in QP encoding region"))
    (save-restriction
      (narrow-to-region from to)
      ;; Encode all the non-ascii and control characters.
      (goto-char (point-min))
      (while (and (skip-chars-forward class)
		  (not (eobp)))
	(insert
	 (prog1
	     ;; To unibyte in case of Emacs 23 (unicode) eight-bit.
	     (format "=%02X" (mm-multibyte-char-to-unibyte (char-after)))
	   (delete-char 1))))
      ;; Encode white space at the end of lines.
      (goto-char (point-min))
      (while (re-search-forward "[ \t]+$" nil t)
	(goto-char (match-beginning 0))
	(while (not (eolp))
	  (insert
	   (prog1
	       (format "=%02X" (char-after))
	     (delete-char 1)))))
      (let ((mm-use-ultra-safe-encoding
	     (and (boundp 'mm-use-ultra-safe-encoding)
		  mm-use-ultra-safe-encoding)))
	(when (or fold mm-use-ultra-safe-encoding)
	  (let ((tab-width 1))		; HTAB is one character.
	    (goto-char (point-min))
	    (while (not (eobp))
	      ;; In ultra-safe mode, encode "From " at the beginning
	      ;; of a line.
	      (when mm-use-ultra-safe-encoding
		(if (looking-at "From ")
		    (replace-match "From=20" nil t)
		  (if (looking-at "-")
		      (replace-match "=2D" nil t))))
	      (end-of-line)
	      ;; Fold long lines.
	      (while (> (current-column) 76) ; tab-width must be 1.
		(beginning-of-line)
		(forward-char 75)	; 75 chars plus an "="
		(search-backward "=" (- (point) 2) t)
		(insert "=\n")
		(end-of-line))
	      (forward-line))))))))

(defun quoted-printable-encode-string (string)
  "Encode the STRING as quoted-printable and return the result."
  (with-temp-buffer
    (if (mm-multibyte-string-p string)
	(mm-enable-multibyte)
      (mm-disable-multibyte))
    (insert string)
    (quoted-printable-encode-region (point-min) (point-max))
    (buffer-string)))

(provide 'qp)

;;; qp.el ends here