summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMattias Engdegård <mattiase@acm.org>2022-10-23 15:40:37 +0200
committerMattias Engdegård <mattiase@acm.org>2022-10-24 11:50:13 +0200
commit36f5a1a7e74442272796413575f85ba9bd18cb53 (patch)
tree8a01195f9d287dfa53f48792afd5e0ad5da81d34
parentb5ada7aa6fcb14019df207259609d602150b03a4 (diff)
downloademacs-36f5a1a7e74442272796413575f85ba9bd18cb53.tar.gz
Fix regexp matching with atomic strings and optimised backtracking
This bug occurs when an atomic pattern is matched at the end of a string and the on-failure-keep-string-jump optimisation is in effect, as in: (string-match "\\'\\(?:ab\\)*\\'" "a") which succeeded but clearly should not (bug#58726). Reported by Michael Heerdegen. * src/regex-emacs.c (PREFETCH): Add reset parameter. (re_match_2_internal): Use it for proper atomic pattern treatment. * test/src/regex-emacs-tests.el (regexp-atomic-failure): New test.
-rw-r--r--src/regex-emacs.c14
-rw-r--r--test/src/regex-emacs-tests.el5
2 files changed, 14 insertions, 5 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 9b2c14c413d..626560911f6 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3446,14 +3446,18 @@ static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
/* Call before fetching a character with *d. This switches over to
string2 if necessary.
+ `reset' is executed before backtracking if there are no more characters.
Check re_match_2_internal for a discussion of why end_match_2 might
not be within string2 (but be equal to end_match_1 instead). */
-#define PREFETCH() \
+#define PREFETCH(reset) \
while (d == dend) \
{ \
/* End of string2 => fail. */ \
if (dend == end_match_2) \
- goto fail; \
+ { \
+ reset; \
+ goto fail; \
+ } \
/* End of string1 => advance to string2. */ \
d = string2; \
dend = end_match_2; \
@@ -4252,7 +4256,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int pat_charlen, buf_charlen;
int pat_ch, buf_ch;
- PREFETCH ();
+ PREFETCH (d = dfail);
if (multibyte)
pat_ch = string_char_and_length (p, &pat_charlen);
else
@@ -4280,7 +4284,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int pat_charlen;
int pat_ch, buf_ch;
- PREFETCH ();
+ PREFETCH (d = dfail);
if (multibyte)
{
pat_ch = string_char_and_length (p, &pat_charlen);
@@ -4486,7 +4490,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
if (d2 == dend2) break;
/* If necessary, advance to next segment in data. */
- PREFETCH ();
+ PREFETCH (d = dfail);
/* How many characters left in this segment to match. */
dcnt = dend - d;
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index ff0d6be3f5d..b323f592dca 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -867,4 +867,9 @@ This evaluates the TESTS test cases from glibc."
(should (equal (string-match "[[:lower:]]" "ẞ") 0))
(should (equal (string-match "[[:upper:]]" "ẞ") 0))))
+(ert-deftest regexp-atomic-failure ()
+ "Bug#58726."
+ (should (equal (string-match "\\`\\(?:ab\\)*\\'" "a") nil))
+ (should (equal (string-match "\\`a\\{2\\}*\\'" "a") nil)))
+
;;; regex-emacs-tests.el ends here