diff options
author | Dave Love <fx@gnu.org> | 1999-12-15 15:15:29 +0000 |
---|---|---|
committer | Dave Love <fx@gnu.org> | 1999-12-15 15:15:29 +0000 |
commit | 1c8c6d3948af8c720e2e256a57cd5623e372e1c5 (patch) | |
tree | d09a4402395ab88e9923efd06f1ba0b9a4645c57 | |
parent | f12b4deaba6379fe3c1ff89c72fb40c0c5924836 (diff) | |
download | emacs-1c8c6d3948af8c720e2e256a57cd5623e372e1c5.tar.gz |
1999-12-15 Kenichi Handa <handa@etl.go.jp>
* regex.c (regex_compile): Adjusted for the change of CHAR_STRING.
1999-12-04 Stefan Monnier <monnier@cs.yale.edu>
* regex.c (regex_compile): Recognize *?, +? and ?? as non-greedy
operators and handle them properly.
* regex.h (RE_ALL_GREEDY): New option.
(RE_UNMATCHED_RIGHT_PAREN_ORD): Moved to the end where alphabetic
sorting would put it.
(RE_SYNTAX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP)
(_RE_SYNTAX_POSIX_COMMON): Use the new option to keep old behavior.
-rw-r--r-- | src/regex.c | 51 |
1 files changed, 46 insertions, 5 deletions
diff --git a/src/regex.c b/src/regex.c index 3cf8a139a46..6660f07cacc 100644 --- a/src/regex.c +++ b/src/regex.c @@ -2168,6 +2168,7 @@ regex_compile (pattern, size, syntax, bufp) /* 1 means zero (many) matches is allowed. */ char zero_times_ok = 0, many_times_ok = 0; + char greedy = 1; /* If there is a sequence of repetition chars, collapse it down to just one (the right one). We can't combine @@ -2176,8 +2177,14 @@ regex_compile (pattern, size, syntax, bufp) for (;;) { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; + if (!(syntax & RE_ALL_GREEDY) + && c == '?' && (zero_times_ok || many_times_ok)) + greedy = 0; + else + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + } if (p == pend) break; @@ -2218,6 +2225,8 @@ regex_compile (pattern, size, syntax, bufp) /* Now we know whether or not zero matches is allowed and also whether or not two or more matches is allowed. */ + if (greedy) + { if (many_times_ok) { /* More than one repetition is allowed, so put in at the end a backward relative jump from `b' to before the next @@ -2276,7 +2285,39 @@ regex_compile (pattern, size, syntax, bufp) INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); b += 3; } - } + + } + else /* not greedy */ + { /* I wish the greedy and non-greedy cases could be merged. */ + + if (many_times_ok) + { + /* The greedy multiple match looks like a repeat..until: + we only need a conditional jump at the end of the loop */ + GET_BUFFER_SPACE (3); + STORE_JUMP (on_failure_jump, b, laststart); + b += 3; + if (zero_times_ok) + { + /* The repeat...until naturally matches one or more. + To also match zero times, we need to first jump to + the end of the loop (its conditional jump). */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b); + b += 3; + } + } + else + { + /* non-greedy a?? */ + GET_BUFFER_SPACE (6); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + INSERT_JUMP (on_failure_jump, laststart, laststart + 6); + b += 3; + } + } + } break; @@ -3110,8 +3151,8 @@ regex_compile (pattern, size, syntax, bufp) #ifdef emacs if (! SINGLE_BYTE_CHAR_P (c)) { - unsigned char work[4], *str; - int i = CHAR_STRING (c, work, str); + unsigned char str[MAX_MULTIBYTE_LENGTH]; + int i = CHAR_STRING (c, str); int j; for (j = 0; j < i; j++) { |