summaryrefslogtreecommitdiff
path: root/lisp/international/mule.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international/mule.el')
-rw-r--r--lisp/international/mule.el46
1 files changed, 33 insertions, 13 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index cc0658dc3f4..21f3118a98e 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -343,7 +343,7 @@ Return t if file exists."
;; Have the original buffer current while we eval.
(eval-buffer buffer nil
;; This is compatible with what `load' does.
- (if purify-flag file fullname)
+ (if dump-mode file fullname)
nil t))
(let (kill-buffer-hook kill-buffer-query-functions)
(kill-buffer buffer)))
@@ -819,10 +819,10 @@ VALUE is a CCL program name defined by `define-ccl-program'. The
CCL program reads a character sequence and writes a byte sequence
as an encoding result.
-`:inhibit-null-byte-detection'
+`:inhibit-nul-byte-detection'
VALUE non-nil means Emacs ignore null bytes on code detection.
-See the variable `inhibit-null-byte-detection'. This attribute
+See the variable `inhibit-nul-byte-detection'. This attribute
is meaningful only when `:coding-type' is `undecided'.
`:inhibit-iso-escape-detection'
@@ -867,7 +867,7 @@ non-ASCII files. This attribute is meaningful only when
:ccl-encoder
:valids))
((eq coding-type 'undecided)
- '(:inhibit-null-byte-detection
+ '(:inhibit-nul-byte-detection
:inhibit-iso-escape-detection
:prefer-utf-8))))))
@@ -911,7 +911,7 @@ non-ASCII files. This attribute is meaningful only when
(i 0))
(dolist (elt coding-system-iso-2022-flags)
(if (memq elt flags)
- (setq bits (logior bits (lsh 1 i))))
+ (setq bits (logior bits (ash 1 i))))
(setq i (1+ i)))
(setcdr (assq :flags spec-attrs) bits))))
@@ -920,8 +920,8 @@ non-ASCII files. This attribute is meaningful only when
(cons :name (cons name (cons :docstring (cons (purecopy docstring)
props)))))
(setcdr (assq :plist common-attrs) props)
- (apply 'define-coding-system-internal
- name (mapcar 'cdr (append common-attrs spec-attrs)))))
+ (apply #'define-coding-system-internal
+ name (mapcar #'cdr (append common-attrs spec-attrs)))))
(defun coding-system-doc-string (coding-system)
"Return the documentation string for CODING-SYSTEM."
@@ -1345,8 +1345,11 @@ just set the variable `buffer-file-coding-system' directly."
(setq coding-system
(merge-coding-systems coding-system buffer-file-coding-system)))
(when (and (called-interactively-p 'interactive)
- (not (memq 'emacs (coding-system-get coding-system
- :charset-list))))
+ ;; FIXME: For some reason
+ ;; (coding-system-get 'iso-2022-7bit :charset-list)
+ ;; returns `iso-2022' rather than returning a list!
+ (let ((css (coding-system-get coding-system :charset-list)))
+ (not (and (listp css) (memq 'emacs css)))))
;; Check whether save would succeed, and jump to the offending char(s)
;; if not.
(let ((css (find-coding-systems-region (point-min) (point-max))))
@@ -1514,6 +1517,7 @@ DECODING is the coding system to be used to decode input from the process,
ENCODING is the coding system to be used to encode output to the process.
For a list of possible coding systems, use \\[list-coding-systems]."
+ (declare (interactive-only set-process-coding-system))
(interactive
"zCoding-system for output from the process: \nzCoding-system for input to the process: ")
(let ((proc (get-buffer-process (current-buffer))))
@@ -2494,7 +2498,18 @@ This function is intended to be added to `auto-coding-functions'."
(when end
(if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t)
(let* ((match (match-string 1))
- (sym (intern (downcase match))))
+ (sym-name (downcase match))
+ (sym-name
+ ;; https://www.w3.org/TR/xml/#charencoding says:
+ ;; "Entities encoded in UTF-16 MUST [...] begin
+ ;; with the Byte Order Mark." The trick below is
+ ;; based on the fact that utf-16be/le don't
+ ;; specify BOM, while utf-16-be/le do.
+ (cond
+ ((equal sym-name "utf-16le") "utf-16-le")
+ ((equal sym-name "utf-16be") "utf-16-be")
+ (t sym-name)))
+ (sym (intern sym-name)))
(if (coding-system-p sym)
;; If the encoding tag is UTF-8 and the buffer's
;; encoding is one of the variants of UTF-8, use the
@@ -2554,7 +2569,7 @@ This function is intended to be added to `auto-coding-functions'."
;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
;; useful for Mozilla bookmark files.
(when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
- (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t))
+ (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'[:space:]/>]" size t))
(let* ((match (match-string 2))
(sym (intern (downcase match))))
(if (coding-system-p sym)
@@ -2583,9 +2598,14 @@ added by processing software."
(let ((detected
(with-coding-priority '(utf-8)
(coding-system-base
- (detect-coding-region (point-min) (point-max) t)))))
- ;; Pure ASCII always comes back as undecided.
+ (detect-coding-region (point-min) (point-max) t))))
+ (bom (list (char-after 1) (char-after 2))))
(cond
+ ((equal bom '(#xFE #xFF))
+ 'utf-16be-with-signature)
+ ((equal bom '(#xFF #xFE))
+ 'utf-16le-with-signature)
+ ;; Pure ASCII always comes back as undecided.
((memq detected '(utf-8 undecided))
'utf-8)
((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature)