diff options
author | Dave Love <fx@gnu.org> | 2003-05-08 17:58:17 +0000 |
---|---|---|
committer | Dave Love <fx@gnu.org> | 2003-05-08 17:58:17 +0000 |
commit | 4b971341ff5ad247dccd762f13cd0a0360bc0510 (patch) | |
tree | a30f349e1e50c5e5ebc15763c7cfc9edb9942880 | |
parent | e20f36df99957bc9f29b6b9a56bcf887e6c92885 (diff) | |
download | emacs-4b971341ff5ad247dccd762f13cd0a0360bc0510.tar.gz |
*** empty log message ***
-rw-r--r-- | lisp/ChangeLog | 6 | ||||
-rw-r--r-- | lisp/international/utf-7.el | 143 |
2 files changed, 149 insertions, 0 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index d16f2b1f6b6..68ce460fb27 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,9 @@ +2003-05-08 Dave Love <fx@gnu.org> + + * international/utf-7.el: New file. + + * international/mule-conf.el (utf-7): New. + 2003-05-06 Kenichi Handa <handa@m17n.org> * international/mule-conf.el (utf-16-be) diff --git a/lisp/international/utf-7.el b/lisp/international/utf-7.el new file mode 100644 index 00000000000..f45c7cd905a --- /dev/null +++ b/lisp/international/utf-7.el @@ -0,0 +1,143 @@ +;;; utf-7.el --- utf-7 coding system + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: Dave Love <fx@gnu.org> +;; Keywords: i18n, mail + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII +;; segments are encoded as base64-encoded big endian UTF-16. Also +;; defines a variation required for IMAP (RFC 2060). + +;; The encoding and decoding was originally taken from Jon K Hellan's +;; implementation in Gnus, but has been substantially re-done. + +;; This probably needs more attention. In particular, it's not +;; completely consistent with iconv's behaviour. It's arguable +;; whether the IMAP version should be a coding system since it's +;; apparently only used for IMAP mailbox names. + +;;; Code: + +;; See mule-conf. +;; (define-coding-system 'utf-7 +;; "UTF-7 encoding of Unicode (RFC 2152)" +;; :coding-type 'utf-8 +;; :mnemonic ?U +;; :mime-charset 'utf-7 +;; :charset-list '(unicode) +;; :pre-write-conversion 'utf-7-pre-write-conversion +;; :post-read-conversion 'utf-7-post-read-conversion) + +;; (define-coding-system 'utf-7-imap +;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" +;; :coding-type 'utf-8 +;; :mnemonic ?U +;; :mime-charset 'utf-7 +;; :charset-list '(unicode) +;; :pre-write-conversion 'utf-7-imap-pre-write-conversion +;; :post-read-conversion 'utf-7-imap-post-read-conversion)) + +;;;###autoload +(defun utf-7-decode (len imap) + "Decode LEN bytes of UTF-7 at point. +IMAP non-nil means use the IMAP version." + (save-excursion + (save-restriction + (narrow-to-region (point) (+ (point) len)) + (let ((not-esc (if imap "^&" "^+")) + (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) + (while (not (eobp)) + (skip-chars-forward not-esc) + (unless (eobp) + (forward-char) + (let ((p (point)) + (run-length (skip-chars-forward skip-chars))) + (if (eq ?- (char-after)) + (delete-char 1)) + (unless (= run-length 0) ; encoded lone esc-char + (let ((pl (mod (- run-length) 4))) + (insert-char ?= pl) + (if imap + (subst-char-in-region p (point) ?, ?/)) + (base64-decode-region p (point))) + (decode-coding-region p (point) 'utf-16-be) + (save-excursion + (goto-char p) + (delete-backward-char 1))))))) + (- (point-max) (point-min))))) + +(defun utf-7-post-read-conversion (len) + (utf-7-decode len nil)) + +(defun utf-7-imap-post-read-conversion (len) + (utf-7-decode len t)) + +;;;###autoload +(defun utf-7-encode (from to imap) + "Encode bytes between FROM and TO to UTF-7. +ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." + (let* ((old-buf (current-buffer)) + (esc (if imap ?& ?+)) + ;; These are characters which can be encoded asis. + (skip-chars (if imap + "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 + ;; This includes the rfc2152 optional set. + ;; Perhaps it shouldn't (like iconv). + "\t\n\r -*,-[]-}")) + (not-skip-chars (format "^%s%c" skip-chars esc))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (goto-char (point-min)) + (while (not (eobp)) + (skip-chars-forward skip-chars) + (if (eq ?+ (char-after)) + (progn (forward-char) + (insert ?-)) + (unless (eobp) + (insert esc) + (let ((p (point))) + (skip-chars-forward not-skip-chars) + (save-restriction + ;; encode-coding-region doesn't preserve point + (narrow-to-region p (point)) + (encode-coding-region p (point-max) 'utf-16-be) + (base64-encode-region p (point-max)) + (if imap + (subst-char-in-region p (point-max) ?/ ?,)) + (goto-char p) + ;; As I read the RFC, this isn't correct, but it's + ;; consistent with iconv, at least regarding `='. + (skip-chars-forward "^= \t\n") + (delete-region (point) (point-max)))) + (unless (eobp) + (insert ?-))))) + nil)) + +(defun utf-7-pre-write-conversion (from to) + (utf-7-encode from to nil)) + +(defun utf-7-imap-pre-write-conversion (from to) + (utf-7-encode from to t)) + +(provide 'utf-7) +;;; utf-7.el ends here |