diff options
author | Bram Moolenaar <Bram@vim.org> | 2013-05-25 20:19:50 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2013-05-25 20:19:50 +0200 |
commit | 4b41706477bccb6f0b3f5154f09af30e9fc4f452 (patch) | |
tree | e77da44ff02c08064f5b0d073c6a77ab3cc0adc1 | |
parent | f47ca63dbcc547fa7d16ff4a0e8729ba9c3c508a (diff) | |
download | vim-git-4b41706477bccb6f0b3f5154f09af30e9fc4f452.tar.gz |
updated for version 7.3.1017v7.3.1017
Problem: Zero width match changes length of match.
Solution: For a zero width match put new states in the current position in
the state list.
-rw-r--r-- | src/regexp.h | 2 | ||||
-rw-r--r-- | src/regexp_nfa.c | 121 | ||||
-rw-r--r-- | src/testdir/test64.in | 7 | ||||
-rw-r--r-- | src/testdir/test64.ok | 4 | ||||
-rw-r--r-- | src/version.c | 2 |
5 files changed, 99 insertions, 37 deletions
diff --git a/src/regexp.h b/src/regexp.h index 1fe42729e..88f9cfbf6 100644 --- a/src/regexp.h +++ b/src/regexp.h @@ -29,8 +29,6 @@ typedef struct regengine regengine_T; -typedef struct thread thread_T; - /* * Structure returned by vim_regcomp() to pass on to vim_regexec(). * This is the general structure. For the actual matcher, two specific diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 0f212554e..10b6af5cd 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -2471,24 +2471,27 @@ theend: * NFA execution code. ****************************************************************/ -/* thread_T contains runtime information of a NFA state */ -struct thread +/* nfa_thread_T contains runtime information of a NFA state */ +typedef struct { nfa_state_T *state; - regsub_T sub; /* submatch info */ -}; + regsub_T sub; /* Submatch info. TODO: expensive! */ +} nfa_thread_T; + typedef struct { - thread_T *t; - int n; -} List; + nfa_thread_T *t; + int n; +} nfa_list_T; -static void addstate __ARGS((List *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); +static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); + +static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *match, int *ip)); static void addstate(l, state, m, off, lid, match) - List *l; /* runtime state list */ + nfa_list_T *l; /* runtime state list */ nfa_state_T *state; /* state to update */ regsub_T *m; /* pointers to subexpressions */ int off; /* byte offset, when -1 go to next line */ @@ -2497,7 +2500,7 @@ addstate(l, state, m, off, lid, match) { regsub_T save; int subidx = 0; - thread_T *lastthread; + nfa_thread_T *lastthread; if (l == NULL || state == NULL) return; @@ -2533,7 +2536,7 @@ addstate(l, state, m, off, lid, match) state->lastlist = lid; lastthread = &l->t[l->n++]; lastthread->state = state; - lastthread->sub = *m; + lastthread->sub = *m; /* TODO: expensive! */ } } @@ -2698,6 +2701,54 @@ addstate(l, state, m, off, lid, match) } /* + * Like addstate(), but the new state(s) are put at position "*ip". + * Used for zero-width matches, next state to use is the added one. + * This makes sure the order of states to be tried does not change, which + * matters for alternatives. + */ + static void +addstate_here(l, state, m, lid, matchp, ip) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ + regsub_T *m; /* pointers to subexpressions */ + int lid; + int *matchp; /* found match? */ + int *ip; +{ + int tlen = l->n; + int count; + int i = *ip; + + /* first add the state(s) at the end, so that we know how many there are */ + addstate(l, state, m, 0, lid, matchp); + + /* when "*ip" was at the end of the list, nothing to do */ + if (i + 1 == tlen) + return; + + /* re-order to put the new state at the current position */ + count = l->n - tlen; + if (count > 1) + { + /* make space for new states, then move them from the + * end to the current position */ + mch_memmove(&(l->t[i + count]), + &(l->t[i + 1]), + sizeof(nfa_thread_T) * (l->n - i - 1)); + mch_memmove(&(l->t[i]), + &(l->t[l->n - 1]), + sizeof(nfa_thread_T) * count); + } + else + { + /* overwrite the current state */ + l->t[i] = l->t[l->n - 1]; + } + --l->n; + *ip = i - 1; +} + +/* * Check character class "class" against current character c. */ static int @@ -2872,17 +2923,17 @@ nfa_regmatch(start, submatch, m) int match = FALSE; int flag = 0; int old_reglnum = -1; - int go_to_nextline; - thread_T *t; + int go_to_nextline = FALSE; + nfa_thread_T *t; char_u *old_reginput = NULL; char_u *old_regline = NULL; - List list[3]; - List *listtbl[2][2]; - List *ll; + nfa_list_T list[3]; + nfa_list_T *listtbl[2][2]; + nfa_list_T *ll; int listid = 1; - List *thislist; - List *nextlist; - List *neglist; + nfa_list_T *thislist; + nfa_list_T *nextlist; + nfa_list_T *neglist; int *listids = NULL; int j = 0; #ifdef NFA_REGEXP_DEBUG_LOG @@ -2896,10 +2947,10 @@ nfa_regmatch(start, submatch, m) #endif /* Allocate memory for the lists of nodes */ - size = (nstate + 1) * sizeof(thread_T); - list[0].t = (thread_T *)lalloc(size, TRUE); - list[1].t = (thread_T *)lalloc(size, TRUE); - list[2].t = (thread_T *)lalloc(size, TRUE); + size = (nstate + 1) * sizeof(nfa_thread_T); + list[0].t = (nfa_thread_T *)lalloc(size, TRUE); + list[1].t = (nfa_thread_T *)lalloc(size, TRUE); + list[2].t = (nfa_thread_T *)lalloc(size, TRUE); if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) goto theend; vim_memset(list[0].t, 0, size); @@ -3056,8 +3107,8 @@ nfa_regmatch(start, submatch, m) * nfa_regmatch(). Submatches are stored in *m, and used in * the parent call. */ if (start->c == NFA_MOPEN + 0) - addstate(thislist, t->state->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out, &t->sub, listid, + &match, &i); else { *m = t->sub; @@ -3130,8 +3181,8 @@ nfa_regmatch(start, submatch, m) t->sub.end[j] = m->end[j]; } /* t->state->out1 is the corresponding END_INVISIBLE node */ - addstate(thislist, t->state->out1->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out1->out, &t->sub, + listid, &match, &i); } else { @@ -3142,14 +3193,14 @@ nfa_regmatch(start, submatch, m) case NFA_BOL: if (reginput == regline) - addstate(thislist, t->state->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out, &t->sub, listid, + &match, &i); break; case NFA_EOL: if (c == NUL) - addstate(thislist, t->state->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out, &t->sub, listid, + &match, &i); break; case NFA_BOW: @@ -3176,8 +3227,8 @@ nfa_regmatch(start, submatch, m) && vim_iswordc_buf(reginput[-1], reg_buf))) bow = FALSE; if (bow) - addstate(thislist, t->state->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out, &t->sub, listid, + &match, &i); break; } @@ -3204,8 +3255,8 @@ nfa_regmatch(start, submatch, m) || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) eow = FALSE; if (eow) - addstate(thislist, t->state->out, &t->sub, 0, listid, - &match); + addstate_here(thislist, t->state->out, &t->sub, listid, + &match, &i); break; } diff --git a/src/testdir/test64.in b/src/testdir/test64.in index 9799647da..6faf2dbe2 100644 --- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -270,6 +270,7 @@ STARTTEST :call add(tl, ['aa \zsax', ' ax']) " must match before \zs :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) +:call add(tl, ['\>\zs.', 'aword. ', '.']) :"""" Tests for \@ features :call add(tl, ['abc\@=', 'abc', 'ab']) @@ -299,6 +300,12 @@ STARTTEST :call add(tl, ['\%u0020', 'yes no', ' ']) :call add(tl, ['\%U00000020', 'yes no', ' ']) +:"""" Alternatives, must use first longest match +:call add(tl, ['goo\|go', 'google', 'goo']) +:call add(tl, ['\<goo\|\<go', 'google', 'goo']) +:call add(tl, ['\<goo\|go', 'google', 'goo']) + + :"""" Run the tests :" diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok index d31f1efff..3fb8268e4 100644 --- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -209,6 +209,7 @@ OK - abc\zsdd OK - aa \zsax OK - abc \zsmatch\ze abc OK - \v(a \zsif .*){2} +OK - \>\zs. OK - abc\@= OK - abc\@=cd OK - abc\@= @@ -231,4 +232,7 @@ OK - \%o40 OK - \%x20 OK - \%u0020 OK - \%U00000020 +OK - goo\|go +OK - \<goo\|\<go +OK - \<goo\|go 192.168.0.1 diff --git a/src/version.c b/src/version.c index d03fed52a..6d745d380 100644 --- a/src/version.c +++ b/src/version.c @@ -729,6 +729,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1017, +/**/ 1016, /**/ 1015, |