summaryrefslogtreecommitdiff
path: root/src/regexp_nfa.c
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2013-06-08 23:26:27 +0200
committerBram Moolenaar <Bram@vim.org>2013-06-08 23:26:27 +0200
commit1e02e6620be6f6ded23879dec344badf71a34ca8 (patch)
treeadd764cfb2ca2e9d832ee46c4c3eed23510b06cf /src/regexp_nfa.c
parente7766eeaa5d92cf727649923cfd3e42b1696df03 (diff)
downloadvim-git-1e02e6620be6f6ded23879dec344badf71a34ca8.tar.gz
updated for version 7.3.1151v7.3.1151
Problem: New regexp engine: Slow when a look-behind match is followed by a zero-width match. Solution: Postpone the look-behind match more often.
Diffstat (limited to 'src/regexp_nfa.c')
-rw-r--r--src/regexp_nfa.c92
1 files changed, 89 insertions, 3 deletions
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 18bcad098..5c6e986f4 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -2794,6 +2794,7 @@ nfa_max_width(startstate, depth)
/* unrecognized */
return -1;
}
+
/*
* Convert a postfix form into its equivalent NFA.
* Return the NFA start state on success, NULL otherwise.
@@ -3433,6 +3434,7 @@ static void copy_sub __ARGS((regsub_T *to, regsub_T *from));
static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
+static int match_follows __ARGS((nfa_state_T *startstate, int depth));
static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off));
static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
@@ -3626,6 +3628,88 @@ has_state_with_pos(l, state, subs)
}
/*
+ * Return TRUE if "state" leads to a NFA_MATCH without advancing the input.
+ */
+ static int
+match_follows(startstate, depth)
+ nfa_state_T *startstate;
+ int depth;
+{
+ nfa_state_T *state = startstate;
+
+ /* avoid too much recursion */
+ if (depth > 10)
+ return FALSE;
+
+ for (;;)
+ {
+ switch (state->c)
+ {
+ case NFA_MATCH:
+ return TRUE;
+
+ case NFA_SPLIT:
+ return match_follows(state->out, depth + 1)
+ || match_follows(state->out1, depth + 1);
+
+ case NFA_START_INVISIBLE:
+ case NFA_START_INVISIBLE_BEFORE:
+ case NFA_START_INVISIBLE_NEG:
+ case NFA_START_INVISIBLE_BEFORE_NEG:
+ case NFA_COMPOSING:
+ /* skip ahead to next state */
+ state = state->out1->out;
+ break;
+
+ case NFA_ANY:
+ case NFA_IDENT:
+ case NFA_SIDENT:
+ case NFA_KWORD:
+ case NFA_SKWORD:
+ case NFA_FNAME:
+ case NFA_SFNAME:
+ case NFA_PRINT:
+ case NFA_SPRINT:
+ case NFA_WHITE:
+ case NFA_NWHITE:
+ case NFA_DIGIT:
+ case NFA_NDIGIT:
+ case NFA_HEX:
+ case NFA_NHEX:
+ case NFA_OCTAL:
+ case NFA_NOCTAL:
+ case NFA_WORD:
+ case NFA_NWORD:
+ case NFA_HEAD:
+ case NFA_NHEAD:
+ case NFA_ALPHA:
+ case NFA_NALPHA:
+ case NFA_LOWER:
+ case NFA_NLOWER:
+ case NFA_UPPER:
+ case NFA_NUPPER:
+ case NFA_START_COLL:
+ case NFA_START_NEG_COLL:
+ case NFA_NEWL:
+ /* state will advance input */
+ return FALSE;
+
+ default:
+ if (state->c > 0)
+ /* state will advance input */
+ return FALSE;
+
+ /* Others: zero-width or possibly zero-width, might still find
+ * a match at the same position, keep looking. */
+ break;
+ }
+ state = state->out;
+ }
+ return FALSE;
+}
+
+
+/*
* Return TRUE if "state" is already in list "l".
*/
static int
@@ -5714,9 +5798,11 @@ nfa_regmatch(prog, start, submatch, m)
if (add_state != NULL)
{
- if (t->pim != NULL)
+ /* Handle the postponed invisible match before advancing to
+ * the next character and for a zero-width match if the match
+ * might end without advancing. */
+ if (t->pim != NULL && (!add_here || match_follows(add_state, 0)))
{
- /* postponed invisible match */
if (t->pim->result == NFA_PIM_TODO)
{
#ifdef ENABLE_LOG
@@ -5773,7 +5859,7 @@ nfa_regmatch(prog, start, submatch, m)
}
if (add_here)
- addstate_here(thislist, add_state, &t->subs, NULL, &listidx);
+ addstate_here(thislist, add_state, &t->subs, t->pim, &listidx);
else
{
addstate(nextlist, add_state, &t->subs, add_off);