From 9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= Date: Thu, 13 Feb 2020 20:06:48 +0100 Subject: Remove the optional KEEP-ORDER argument to regexp-opt This argument was added for the 'or' clause in rx, but it turned out to be a bad idea (bug#37659), and there seems to be little other use for it. * lisp/emacs-lisp/regexp-opt.el (regexp-opt): Remove KEEP-ORDER. * doc/lispref/searching.texi (Regexp Functions): * etc/NEWS: Remove it from the documentation. * test/lisp/emacs-lisp/regexp-opt-tests.el (regexp-opt-test--match-all) (regexp-opt-test--check-perm, regexp-opt-test--explain-perm) (regexp-opt-keep-order, regexp-opt-longest-match): Simplify test. --- doc/lispref/searching.texi | 9 +++---- etc/NEWS | 7 ----- lisp/emacs-lisp/regexp-opt.el | 43 +++++++------------------------ test/lisp/emacs-lisp/regexp-opt-tests.el | 44 +++++--------------------------- 4 files changed, 19 insertions(+), 84 deletions(-) diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 5f4509a8b43..1f6db0643e8 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -1745,7 +1745,7 @@ any special characters. @end defun @cindex optimize regexp -@defun regexp-opt strings &optional paren keep-order +@defun regexp-opt strings &optional paren This function returns an efficient regular expression that will match any of the strings in the list @var{strings}. This is useful when you need to make matching or searching as fast as possible---for example, @@ -1783,11 +1783,8 @@ if it is necessary to ensure that a postfix operator appended to it will apply to the whole expression. @end table -The optional argument @var{keep-order}, if non-@code{nil}, forces the -match to be performed in the order given, as if the strings were made -into a regexp by joining them with the @samp{\|} operator. If nil or -omitted, the returned regexp will always match the longest string -possible. +The returned regexp is ordered in such a way that it will always match +the longest string possible. Up to reordering, the resulting regexp of @code{regexp-opt} is equivalent to but usually more efficient than that of a simplified diff --git a/etc/NEWS b/etc/NEWS index 312869fe57e..380ac71260d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -3526,13 +3526,6 @@ the process. That way 'make-process' can start remote processes. This is currently supported on GNUish hosts and on modern versions of MS-Windows. -+++ -** The function 'regexp-opt' accepts an additional optional argument. -By default, the regexp returned by 'regexp-opt' may match the strings -in any order. If the new third argument is non-nil, the match is -guaranteed to be performed in the order given, as if the strings were -made into a regexp by joining them with '\|'. - +++ ** The function 'regexp-opt', when given an empty list of strings, now returns a regexp that never matches anything, which is an identity for diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index 2cce4e63539..35a5fda184f 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -84,7 +84,7 @@ ;;; Code: ;;;###autoload -(defun regexp-opt (strings &optional paren keep-order) +(defun regexp-opt (strings &optional paren) "Return a regexp to match a string in the list STRINGS. Each member of STRINGS is treated as a fixed string, not as a regexp. Optional PAREN specifies how the returned regexp is surrounded by @@ -114,11 +114,8 @@ nil necessary to ensure that a postfix operator appended to it will apply to the whole expression. -The optional argument KEEP-ORDER, if non-nil, forces the match to -be performed in the order given, as if the strings were made into -a regexp by joining them with the `\\|' operator. If nil or -omitted, the returned regexp is will always match the longest -string possible. +The returned regexp is ordered in such a way that it will always +match the longest string possible. Up to reordering, the resulting regexp is equivalent to but usually more efficient than that of a simplified version: @@ -140,34 +137,12 @@ usually more efficient than that of a simplified version: (completion-ignore-case nil) (completion-regexp-list nil) (open (cond ((stringp paren) paren) (paren "\\("))) - (re - (cond - ;; No strings: return an unmatchable regexp. - ((null strings) - (concat (or open "\\(?:") regexp-unmatchable "\\)")) - - ;; The algorithm will generate a pattern that matches - ;; longer strings in the list before shorter. If the - ;; list order matters, then no string must come after a - ;; proper prefix of that string. To check this, verify - ;; that a straight or-pattern matches each string - ;; entirely. - ((and keep-order - (let* ((case-fold-search nil) - (alts (mapconcat #'regexp-quote strings "\\|"))) - (and (let ((s strings)) - (while (and s - (string-match alts (car s)) - (= (match-end 0) (length (car s)))) - (setq s (cdr s))) - ;; If we exited early, we found evidence that - ;; regexp-opt-group cannot be used. - s) - (concat (or open "\\(?:") alts "\\)"))))) - (t - (regexp-opt-group - (delete-dups (sort (copy-sequence strings) 'string-lessp)) - (or open t) (not open)))))) + (re (if strings + (regexp-opt-group + (delete-dups (sort (copy-sequence strings) 'string-lessp)) + (or open t) (not open)) + ;; No strings: return an unmatchable regexp. + (concat (or open "\\(?:") regexp-unmatchable "\\)")))) (cond ((eq paren 'words) (concat "\\<" re "\\>")) ((eq paren 'symbols) diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el b/test/lisp/emacs-lisp/regexp-opt-tests.el index 9b4567c72cc..0179ac4f1f4 100644 --- a/test/lisp/emacs-lisp/regexp-opt-tests.el +++ b/test/lisp/emacs-lisp/regexp-opt-tests.el @@ -47,43 +47,13 @@ (mapcar (lambda (i) (regexp-opt-test--permutation i list)) (number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) -(defun regexp-opt-test--match-all (words re) - (mapcar (lambda (w) (and (string-match re w) - (match-string 0 w))) - words)) - -(defun regexp-opt-test--check-perm (perm) - (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) - (opt-re (regexp-opt perm nil t)) - (ref (regexp-opt-test--match-all perm ref-re)) - (opt (regexp-opt-test--match-all perm opt-re))) - (equal opt ref))) - -(defun regexp-opt-test--explain-perm (perm) - (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) - (opt-re (regexp-opt perm nil t)) - (ref (regexp-opt-test--match-all perm ref-re)) - (opt (regexp-opt-test--match-all perm opt-re))) - (concat "\n" - (format "Naïve regexp: %s\n" ref-re) - (format "Optimized regexp: %s\n" opt-re) - (format "Got: %s\n" opt) - (format "Expected: %s\n" ref)))) - -(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm) - -(ert-deftest regexp-opt-keep-order () - "Check that KEEP-ORDER works." - (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab"))) - (should (regexp-opt-test--check-perm perm))) - (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc"))) - (should (regexp-opt-test--check-perm perm))) - (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy"))) - (should (regexp-opt-test--check-perm perm))) - (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy"))) - (should (regexp-opt-test--check-perm perm))) - (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) - (should (regexp-opt-test--check-perm perm)))) +(ert-deftest regexp-opt-longest-match () + "Check that the regexp always matches as much as possible." + (let ((s "abcd")) + (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) + (should (equal (and (string-match (regexp-opt perm) s) + (match-string 0 s)) + "abc"))))) (ert-deftest regexp-opt-charset () (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) -- cgit v1.2.1