diff options
author | Chong Yidong <cyd@gnu.org> | 2012-07-01 15:17:05 +0800 |
---|---|---|
committer | Chong Yidong <cyd@gnu.org> | 2012-07-01 15:17:05 +0800 |
commit | fbf2e7ad3bd676083dae339aba16bf812dfc51a3 (patch) | |
tree | 1ee6f4f014de8f97f8a711f58d3323aebbf8ce41 | |
parent | b95b72547b5a2c5e4e294e9e703d3a85928f58f4 (diff) | |
download | emacs-fbf2e7ad3bd676083dae339aba16bf812dfc51a3.tar.gz |
Improve xml parameter entity parsing, and add a new ERT test.
* test/automated/xml-parse-tests.el: New file.
* lisp/xml.el (xml--parse-buffer): New function. Move most of
xml-parse-region here.
(xml-parse-region): Copy region into a temporary buffer, since
parameter entity substitution requires changing buffer contents.
Use xml--parse-buffer.
(xml-parse-file): Use xml--parse-buffer.
(xml-parse-dtd): Make parameter entity substitution work right.
-rw-r--r-- | lisp/ChangeLog | 10 | ||||
-rw-r--r-- | lisp/xml.el | 180 | ||||
-rw-r--r-- | test/ChangeLog | 4 | ||||
-rw-r--r-- | test/automated/xml-parse-tests.el | 57 |
4 files changed, 178 insertions, 73 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 0cae8a88e77..3156dc412e3 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,13 @@ +2012-07-01 Chong Yidong <cyd@gnu.org> + + * xml.el (xml--parse-buffer): New function. Move most of + xml-parse-region here. + (xml-parse-region): Copy region into a temporary buffer, since + parameter entity substitution requires changing buffer contents. + Use xml--parse-buffer. + (xml-parse-file): Use xml--parse-buffer. + (xml-parse-dtd): Make parameter entity substitution work right. + 2012-06-30 Glenn Morris <rgm@gnu.org> * comint.el (follow-comint-scroll-to-bottom): Fix declaration. diff --git a/lisp/xml.el b/lisp/xml.el index a9e1b2c2830..841e19a174a 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'." ;;;###autoload (defun xml-parse-file (file &optional parse-dtd parse-ns) "Parse the well-formed XML file FILE. -If FILE is already visited, use its buffer and don't kill it. -Returns the top node with all its children. +Return the top node with all its children. If PARSE-DTD is non-nil, the DTD is parsed rather than skipped. If PARSE-NS is non-nil, then QNAMES are expanded." - (if (get-file-buffer file) - (with-current-buffer (get-file-buffer file) - (save-excursion - (xml-parse-region (point-min) - (point-max) - (current-buffer) - parse-dtd parse-ns))) - (with-temp-buffer - (insert-file-contents file) - (xml-parse-region (point-min) - (point-max) - (current-buffer) - parse-dtd parse-ns)))) + (with-temp-buffer + (insert-file-contents file) + (xml--parse-buffer parse-dtd parse-ns))) (eval-and-compile (let* ((start-chars (concat "[:alpha:]:_")) @@ -320,42 +309,44 @@ and returned as the first element of the list. If PARSE-NS is non-nil, then QNAMES are expanded." ;; Use fixed syntax table to ensure regexp char classes and syntax ;; specs DTRT. + (unless buffer + (setq buffer (current-buffer))) + (with-temp-buffer + (insert-buffer-substring buffer beg end) + (xml--parse-buffer parse-dtd parse-ns))) + +(defun xml--parse-buffer (parse-dtd parse-ns) (with-syntax-table (standard-syntax-table) (let ((case-fold-search nil) ; XML is case-sensitive. ;; Prevent entity definitions from changing the defaults (xml-entity-alist xml-entity-alist) (xml-parameter-entity-alist xml-parameter-entity-alist) xml result dtd) - (save-excursion - (if buffer - (set-buffer buffer)) - (save-restriction - (narrow-to-region beg end) - (goto-char (point-min)) - (while (not (eobp)) - (if (search-forward "<" nil t) - (progn - (forward-char -1) - (setq result (xml-parse-tag parse-dtd parse-ns)) - (cond - ((null result) - ;; Not looking at an xml start tag. - (unless (eobp) - (forward-char 1))) - ((and xml (not xml-sub-parser)) - ;; Translation of rule [1] of XML specifications - (error "XML: (Not Well-Formed) Only one root tag allowed")) - ((and (listp (car result)) - parse-dtd) - (setq dtd (car result)) - (if (cdr result) ; possible leading comment - (add-to-list 'xml (cdr result)))) - (t - (add-to-list 'xml result)))) - (goto-char (point-max)))) - (if parse-dtd - (cons dtd (nreverse xml)) - (nreverse xml))))))) + (goto-char (point-min)) + (while (not (eobp)) + (if (search-forward "<" nil t) + (progn + (forward-char -1) + (setq result (xml-parse-tag parse-dtd parse-ns)) + (cond + ((null result) + ;; Not looking at an xml start tag. + (unless (eobp) + (forward-char 1))) + ((and xml (not xml-sub-parser)) + ;; Translation of rule [1] of XML specifications + (error "XML: (Not Well-Formed) Only one root tag allowed")) + ((and (listp (car result)) + parse-dtd) + (setq dtd (car result)) + (if (cdr result) ; possible leading comment + (add-to-list 'xml (cdr result)))) + (t + (add-to-list 'xml result)))) + (goto-char (point-max)))) + (if parse-dtd + (cons dtd (nreverse xml)) + (nreverse xml))))) (defun xml-maybe-do-ns (name default xml-ns) "Perform any namespace expansion. @@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications." ;; Get the name of the document (looking-at xml-name-regexp) (let ((dtd (list (match-string-no-properties 0) 'dtd)) - (xml-parameter-entity-alist xml-parameter-entity-alist)) + (xml-parameter-entity-alist xml-parameter-entity-alist) + (parameter-entity-re (eval-when-compile + (concat "%\\(" xml-name-re "\\);"))) + next-parameter-entity) (goto-char (match-end 0)) (skip-syntax-forward " ") @@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications." (error "XML: Bad DTD")) (forward-char) + ;; [2.8]: "markup declarations may be made up in whole or in + ;; part of the replacement text of parameter entities." + + ;; Since parameter entities are valid only within the DTD, we + ;; first search for the position of the next possible parameter + ;; entity. Then, search for the next DTD element; if it ends + ;; before the next parameter entity, expand the parameter entity + ;; and try again. + (setq next-parameter-entity + (save-excursion + (if (re-search-forward parameter-entity-re nil t) + (match-beginning 0)))) + ;; Parse the rest of the DTD ;; Fixme: Deal with NOTATION, PIs. (while (not (looking-at "\\s-*\\]")) (skip-syntax-forward " ") (cond ;; Element declaration [45]: - ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") + ((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") + (or (null next-parameter-entity) + (<= (match-end 0) next-parameter-entity))) (let ((element (match-string-no-properties 1)) (type (match-string-no-properties 2)) (end-pos (match-end 0))) @@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications." (goto-char end-pos))) ;; Attribute-list declaration [52] (currently unsupported): - ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re - "\\)[ \t\n\r]*\\(" xml-att-def-re - "\\)*[ \t\n\r]*>")) + ((and (looking-at (eval-when-compile + (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re + "\\)[ \t\n\r]*\\(" xml-att-def-re + "\\)*[ \t\n\r]*>"))) + (or (null next-parameter-entity) + (<= (match-end 0) next-parameter-entity))) (goto-char (match-end 0))) - ;; Comments (skip to end): + ;; Comments (skip to end, ignoring parameter entity): ((looking-at "<!--") - (search-forward "-->")) + (search-forward "-->") + (and next-parameter-entity + (> (point) next-parameter-entity) + (setq next-parameter-entity + (save-excursion + (if (re-search-forward parameter-entity-re nil t) + (match-beginning 0)))))) ;; Internal entity declarations: - ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" - xml-name-re "\\)[ \t\n\r]*\\(" - xml-entity-value-re "\\)[ \t\n\r]*>")) + ((and (looking-at (eval-when-compile + (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]*\\(" + xml-entity-value-re "\\)[ \t\n\r]*>"))) + (or (null next-parameter-entity) + (<= (match-end 0) next-parameter-entity))) (let* ((name (prog1 (match-string-no-properties 2) (goto-char (match-end 0)))) (alist (if (match-string 1) @@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications." (set alist (cons (cons name value) (symbol-value alist)))))) ;; External entity declarations (currently unsupported): - ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" - xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" - "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) - (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" - xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" - "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" - "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" - "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" - "[ \t\n\r]*>"))) + ((and (or (looking-at (eval-when-compile + (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" + "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))) + (looking-at (eval-when-compile + (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" + "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" + "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" + "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" + "[ \t\n\r]*>")))) + (or (null next-parameter-entity) + (<= (match-end 0) next-parameter-entity))) (goto-char (match-end 0))) - ;; Parameter entity: - ((looking-at (concat "%\\(" xml-name-re "\\);")) - (goto-char (match-end 0)) - (let* ((entity (match-string 1)) - (end (point-marker)) - (elt (assoc entity xml-parameter-entity-alist))) - (when elt - (replace-match (cdr elt) t t) - (goto-char end)))) + ;; If a parameter entity is in the way, expand it. + (next-parameter-entity + (save-excursion + (goto-char next-parameter-entity) + (unless (looking-at parameter-entity-re) + (error "XML: Internal error")) + (let* ((entity (match-string 1)) + (beg (point-marker)) + (elt (assoc entity xml-parameter-entity-alist))) + (if elt + (progn + (replace-match (cdr elt) t t) + ;; The replacement can itself be a parameter entity. + (goto-char next-parameter-entity)) + (goto-char (match-end 0)))) + (setq next-parameter-entity + (if (re-search-forward parameter-entity-re nil t) + (match-beginning 0))))) ;; Anything else: (xml-validating-parser diff --git a/test/ChangeLog b/test/ChangeLog index 45fc70e0440..d9d9bc5a9fa 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,7 @@ +2012-07-01 Chong Yidong <cyd@gnu.org> + + * automated/xml-parse-tests.el: New file. + 2012-06-27 Stefan Monnier <monnier@iro.umontreal.ca> * automated/ert-x-tests.el (ert-test-run-tests-interactively-2): diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el new file mode 100644 index 00000000000..8e8ef291bdc --- /dev/null +++ b/test/automated/xml-parse-tests.el @@ -0,0 +1,57 @@ +;;; xml-parse-tests.el --- Test suite for XML parsing. + +;; Copyright (C) 2012 Free Software Foundation, Inc. + +;; Author: Chong Yidong <cyd@stupidchicken.com> +;; Keywords: internal +;; Human-Keywords: internal + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;; Type M-x test-xml-parse RET to generate the test buffer. + +;;; Code: + +(require 'xml) + +(defvar xml-parse-tests--data + '(;; General entity substitution + ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . + ((foo ((a . "b")) (bar nil "AbC;")))) + ;; Parameter entity substitution + ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" . + ((foo ((a . "b")) (bar nil "AbC;")))) + ;; Tricky parameter entity substitution (like XML spec Appendix D) + ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" . + ((foo nil "AbC")))) + "Alist of XML strings and their expected parse trees.") + +(ert-deftest xml-parse-tests () + "Test XML parsing." + (with-temp-buffer + (dolist (test xml-parse-tests--data) + (erase-buffer) + (insert (car test)) + (should (equal (cdr test) + (xml-parse-region (point-min) (point-max))))))) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; xml-parse-tests.el ends here. |