summaryrefslogtreecommitdiff
path: root/lisp/international/rfc1843.el
blob: 7f09eb41d17c31ceb812e3240f0fa89931e34703 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
;;; rfc1843.el --- HZ (rfc1843) decoding  -*- lexical-binding:t -*-

;; Copyright (C) 1998-2020 Free Software Foundation, Inc.

;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
;; Keywords: news HZ HZ+ mail i18n

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;; Test:
;; (rfc1843-decode-string  "~{<:Ky2;S{#,NpJ)l6HK!#~}")

;;; Code:

(eval-when-compile (require 'cl-lib))

(defvar rfc1843-word-regexp
  "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)")

(defvar rfc1843-word-regexp-strictly
  "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)")

(defvar rfc1843-hzp-word-regexp
  "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\
[<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)")

(defvar rfc1843-hzp-word-regexp-strictly
  "~\\({\\([\041-\167][\041-\176]\\)+\\|\
[<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)")

(defcustom rfc1843-decode-loosely nil
  "Loosely check HZ encoding if non-nil.
When it is set non-nil, only buffers or strings with strictly
HZ-encoded are decoded."
  :type 'boolean
  :group 'mime)

(defcustom rfc1843-decode-hzp t
  "HZ+ decoding support if non-nil.
HZ+ specification (also known as HZP) is to provide a standardized
7-bit representation of mixed Big5, GB, and ASCII text for convenient
e-mail transmission, news posting, etc."
  :type 'boolean
  :group 'mime)

(defcustom rfc1843-newsgroups-regexp "chinese\\|hz"
  "Regexp of newsgroups in which might be HZ encoded."
  :type 'string
  :group 'mime)

(defun rfc1843-decode-region (from to)
  "Decode HZ in the region between FROM and TO."
  (interactive "r")
  (let (str firstc)
    (save-excursion
      (goto-char from)
      (if (or rfc1843-decode-loosely
	      (re-search-forward (if rfc1843-decode-hzp
				     rfc1843-hzp-word-regexp-strictly
				   rfc1843-word-regexp-strictly) to t))
	  (save-restriction
	    (narrow-to-region from to)
	    (goto-char (point-min))
	    (while (re-search-forward (if rfc1843-decode-hzp
					  rfc1843-hzp-word-regexp
					rfc1843-word-regexp) (point-max) t)
	      (setq str (buffer-substring-no-properties
			 (match-beginning 1)
			 (match-end 1)))
	      (setq firstc (aref str 0))
	      (insert (decode-coding-string
		       (rfc1843-decode
			(prog1
			    (substring str 1)
			  (delete-region (match-beginning 0) (match-end 0)))
			firstc)
		       (if (eq firstc ?{) 'cn-gb-2312 'cn-big5))))
	    (goto-char (point-min))
	    (while (search-forward "~" (point-max) t)
	      (cond ((eq (char-after) ?\n)
		     (delete-char -1)
		     (delete-char 1))
		    ((eq (char-after) ?~)
		     (delete-char 1)))))))))

(defun rfc1843-decode-string (string)
  "Decode HZ STRING and return the results."
  (let ((m enable-multibyte-characters))
    (with-temp-buffer
      (when m
	(set-buffer-multibyte 'to))
      (insert string)
      (inline
	(rfc1843-decode-region (point-min) (point-max)))
      (buffer-string))))

(defun rfc1843-decode (word &optional firstc)
  "Decode HZ WORD and return it."
  (let ((i -1) (s (substring word 0)) v)
    (if (or (not firstc) (eq firstc ?{))
	(while (< (cl-incf i) (length s))
	  (if (eq (setq v (aref s i)) ? ) nil
	    (aset s i (+ 128 v))))
      (while (< (cl-incf i) (length s))
	(if (eq (setq v (aref s i)) ? ) nil
	  (setq v (+ (* 94 v) (aref s (1+ i)) -3135))
	  (aset s i (+ (/ v 157) (if (eq firstc ?<) 201 161)))
	  (setq v (% v 157))
	  (aset s (cl-incf i) (+ v (if (< v 63) 64 98))))))
    s))

(provide 'rfc1843)

;;; rfc1843.el ends here