diff options
author | Bram Moolenaar <Bram@vim.org> | 2016-10-02 16:51:57 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2016-10-02 16:51:57 +0200 |
commit | 6100d02aab7c8294b581cb299250eea164b50e9d (patch) | |
tree | 753bb02aa6aa2b9121a640b343f5c0a220bfd40e /src/regexp_nfa.c | |
parent | 2ec618c9feac4573b154510236ad8121c77d0eca (diff) | |
download | vim-git-6100d02aab7c8294b581cb299250eea164b50e9d.tar.gz |
patch 8.0.0020v8.0.0020
Problem: The regexp engines are not reentrant.
Solution: Add regexec_T and save/restore the state when needed.
Diffstat (limited to 'src/regexp_nfa.c')
-rw-r--r-- | src/regexp_nfa.c | 189 |
1 files changed, 95 insertions, 94 deletions
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 1f331aed4..fcca81828 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5432,7 +5432,7 @@ skip_to_start(int c, colnr_T *colp) char_u *s; /* Used often, do some work to avoid call overhead. */ - if (!ireg_ic + if (!rex.reg_ic #ifdef FEAT_MBYTE && !has_mbyte #endif @@ -5467,7 +5467,7 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text) { c1 = PTR2CHAR(match_text + len1); c2 = PTR2CHAR(regline + col + len2); - if (c1 != c2 && (!ireg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) + if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) { match = FALSE; break; @@ -5485,15 +5485,15 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text) cleanup_subexpr(); if (REG_MULTI) { - reg_startpos[0].lnum = reglnum; - reg_startpos[0].col = col; - reg_endpos[0].lnum = reglnum; - reg_endpos[0].col = col + len2; + rex.reg_startpos[0].lnum = reglnum; + rex.reg_startpos[0].col = col; + rex.reg_endpos[0].lnum = reglnum; + rex.reg_endpos[0].col = col + len2; } else { - reg_startp[0] = regline + col; - reg_endp[0] = regline + col + len2; + rex.reg_startp[0] = regline + col; + rex.reg_endp[0] = regline + col + len2; } return 1L; } @@ -5728,8 +5728,8 @@ nfa_regmatch( { #ifdef FEAT_MBYTE /* If the match ends before a composing characters and - * ireg_icombine is not set, that is not really a match. */ - if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) + * rex.reg_icombine is not set, that is not really a match. */ + if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) break; #endif nfa_match = TRUE; @@ -6048,16 +6048,16 @@ nfa_regmatch( int this_class; /* Get class of current and previous char (if it exists). */ - this_class = mb_get_class_buf(reginput, reg_buf); + this_class = mb_get_class_buf(reginput, rex.reg_buf); if (this_class <= 1) result = FALSE; else if (reg_prev_class() == this_class) result = FALSE; } #endif - else if (!vim_iswordc_buf(curc, reg_buf) + else if (!vim_iswordc_buf(curc, rex.reg_buf) || (reginput > regline - && vim_iswordc_buf(reginput[-1], reg_buf))) + && vim_iswordc_buf(reginput[-1], rex.reg_buf))) result = FALSE; if (result) { @@ -6076,16 +6076,16 @@ nfa_regmatch( int this_class, prev_class; /* Get class of current and previous char (if it exists). */ - this_class = mb_get_class_buf(reginput, reg_buf); + this_class = mb_get_class_buf(reginput, rex.reg_buf); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) result = FALSE; } #endif - else if (!vim_iswordc_buf(reginput[-1], reg_buf) + else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) || (reginput[0] != NUL - && vim_iswordc_buf(curc, reg_buf))) + && vim_iswordc_buf(curc, rex.reg_buf))) result = FALSE; if (result) { @@ -6096,7 +6096,7 @@ nfa_regmatch( case NFA_BOF: if (reglnum == 0 && reginput == regline - && (!REG_MULTI || reg_firstlnum == 1)) + && (!REG_MULTI || rex.reg_firstlnum == 1)) { add_here = TRUE; add_state = t->state->out; @@ -6104,7 +6104,7 @@ nfa_regmatch( break; case NFA_EOF: - if (reglnum == reg_maxline && curc == NUL) + if (reglnum == rex.reg_maxline && curc == NUL) { add_here = TRUE; add_state = t->state->out; @@ -6131,7 +6131,7 @@ nfa_regmatch( * (no preceding character). */ len += mb_char2len(mc); } - if (ireg_icombine && len == 0) + if (rex.reg_icombine && len == 0) { /* If \Z was present, then ignore composing characters. * When ignoring the base character this always matches. */ @@ -6190,8 +6190,8 @@ nfa_regmatch( #endif case NFA_NEWL: - if (curc == NUL && !reg_line_lbr && REG_MULTI - && reglnum <= reg_maxline) + if (curc == NUL && !rex.reg_line_lbr && REG_MULTI + && reglnum <= rex.reg_maxline) { go_to_nextline = TRUE; /* Pass -1 for the offset, which means taking the position @@ -6199,7 +6199,7 @@ nfa_regmatch( add_state = t->state->out; add_off = -1; } - else if (curc == '\n' && reg_line_lbr) + else if (curc == '\n' && rex.reg_line_lbr) { /* match \n as if it is an ordinary character */ add_state = t->state->out; @@ -6244,7 +6244,7 @@ nfa_regmatch( result = result_if_matched; break; } - if (ireg_ic) + if (rex.reg_ic) { int curc_low = MB_TOLOWER(curc); int done = FALSE; @@ -6262,7 +6262,7 @@ nfa_regmatch( } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (ireg_ic && MB_TOLOWER(curc) + || (rex.reg_ic && MB_TOLOWER(curc) == MB_TOLOWER(state->c)))) { result = result_if_matched; @@ -6320,13 +6320,13 @@ nfa_regmatch( break; case NFA_KWORD: /* \k */ - result = vim_iswordp_buf(reginput, reg_buf); + result = vim_iswordp_buf(reginput, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; case NFA_SKWORD: /* \K */ result = !VIM_ISDIGIT(curc) - && vim_iswordp_buf(reginput, reg_buf); + && vim_iswordp_buf(reginput, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; @@ -6441,24 +6441,24 @@ nfa_regmatch( break; case NFA_LOWER_IC: /* [a-z] */ - result = ri_lower(curc) || (ireg_ic && ri_upper(curc)); + result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc)); ADD_STATE_IF_MATCH(t->state); break; case NFA_NLOWER_IC: /* [^a-z] */ result = curc != NUL - && !(ri_lower(curc) || (ireg_ic && ri_upper(curc))); + && !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc))); ADD_STATE_IF_MATCH(t->state); break; case NFA_UPPER_IC: /* [A-Z] */ - result = ri_upper(curc) || (ireg_ic && ri_lower(curc)); + result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc)); ADD_STATE_IF_MATCH(t->state); break; case NFA_NUPPER_IC: /* ^[A-Z] */ result = curc != NUL - && !(ri_upper(curc) || (ireg_ic && ri_lower(curc))); + && !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc))); ADD_STATE_IF_MATCH(t->state); break; @@ -6549,7 +6549,7 @@ nfa_regmatch( case NFA_LNUM_LT: result = (REG_MULTI && nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, - (long_u)(reglnum + reg_firstlnum))); + (long_u)(reglnum + rex.reg_firstlnum))); if (result) { add_here = TRUE; @@ -6575,7 +6575,7 @@ nfa_regmatch( { int op = t->state->c - NFA_VCOL; colnr_T col = (colnr_T)(reginput - regline); - win_T *wp = reg_win == NULL ? curwin : reg_win; + win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win; /* Bail out quickly when there can't be a match, avoid the * overhead of win_linetabsize() on long lines. */ @@ -6611,18 +6611,18 @@ nfa_regmatch( case NFA_MARK_GT: case NFA_MARK_LT: { - pos_T *pos = getmark_buf(reg_buf, t->state->val, FALSE); + pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, FALSE); /* Compare the mark position to the match position. */ result = (pos != NULL /* mark doesn't exist */ && pos->lnum > 0 /* mark isn't set in reg_buf */ - && (pos->lnum == reglnum + reg_firstlnum + && (pos->lnum == reglnum + rex.reg_firstlnum ? (pos->col == (colnr_T)(reginput - regline) ? t->state->c == NFA_MARK : (pos->col < (colnr_T)(reginput - regline) ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT)) - : (pos->lnum < reglnum + reg_firstlnum + : (pos->lnum < reglnum + rex.reg_firstlnum ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT))); if (result) @@ -6634,10 +6634,11 @@ nfa_regmatch( } case NFA_CURSOR: - result = (reg_win != NULL - && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) + result = (rex.reg_win != NULL + && (reglnum + rex.reg_firstlnum + == rex.reg_win->w_cursor.lnum) && ((colnr_T)(reginput - regline) - == reg_win->w_cursor.col)); + == rex.reg_win->w_cursor.col)); if (result) { add_here = TRUE; @@ -6691,12 +6692,12 @@ nfa_regmatch( #endif result = (c == curc); - if (!result && ireg_ic) + if (!result && rex.reg_ic) result = MB_TOLOWER(c) == MB_TOLOWER(curc); #ifdef FEAT_MBYTE - /* If ireg_icombine is not set only skip over the character + /* If rex.reg_icombine is not set only skip over the character * itself. When it is set skip over composing characters. */ - if (result && enc_utf8 && !ireg_icombine) + if (result && enc_utf8 && !rex.reg_icombine) clen = utf_ptr2len(reginput); #endif ADD_STATE_IF_MATCH(t->state); @@ -6815,8 +6816,8 @@ nfa_regmatch( && ((toplevel && reglnum == 0 && clen != 0 - && (ireg_maxcol == 0 - || (colnr_T)(reginput - regline) < ireg_maxcol)) + && (rex.reg_maxcol == 0 + || (colnr_T)(reginput - regline) < rex.reg_maxcol)) || (nfa_endp != NULL && (REG_MULTI ? (reglnum < nfa_endp->se_u.pos.lnum @@ -6856,8 +6857,8 @@ nfa_regmatch( /* Checking if the required start character matches is * cheaper than adding a state that won't match. */ c = PTR2CHAR(reginput + clen); - if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c) - != MB_TOLOWER(prog->regstart))) + if (c != prog->regstart && (!rex.reg_ic + || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart))) { #ifdef ENABLE_LOG fprintf(log_fd, " Skipping start state, regstart does not match\n"); @@ -6997,40 +6998,40 @@ nfa_regtry( { for (i = 0; i < subs.norm.in_use; i++) { - reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; - reg_startpos[i].col = subs.norm.list.multi[i].start_col; + rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; + rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col; - reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; - reg_endpos[i].col = subs.norm.list.multi[i].end_col; + rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; + rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col; } - if (reg_startpos[0].lnum < 0) + if (rex.reg_startpos[0].lnum < 0) { - reg_startpos[0].lnum = 0; - reg_startpos[0].col = col; + rex.reg_startpos[0].lnum = 0; + rex.reg_startpos[0].col = col; } - if (reg_endpos[0].lnum < 0) + if (rex.reg_endpos[0].lnum < 0) { /* pattern has a \ze but it didn't match, use current end */ - reg_endpos[0].lnum = reglnum; - reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = reglnum; + rex.reg_endpos[0].col = (int)(reginput - regline); } else /* Use line number of "\ze". */ - reglnum = reg_endpos[0].lnum; + reglnum = rex.reg_endpos[0].lnum; } else { for (i = 0; i < subs.norm.in_use; i++) { - reg_startp[i] = subs.norm.list.line[i].start; - reg_endp[i] = subs.norm.list.line[i].end; + rex.reg_startp[i] = subs.norm.list.line[i].start; + rex.reg_endp[i] = subs.norm.list.line[i].end; } - if (reg_startp[0] == NULL) - reg_startp[0] = regline + col; - if (reg_endp[0] == NULL) - reg_endp[0] = reginput; + if (rex.reg_startp[0] == NULL) + rex.reg_startp[0] = regline + col; + if (rex.reg_endp[0] == NULL) + rex.reg_endp[0] = reginput; } #ifdef FEAT_SYN_HL @@ -7093,16 +7094,16 @@ nfa_regexec_both( if (REG_MULTI) { - prog = (nfa_regprog_T *)reg_mmatch->regprog; + prog = (nfa_regprog_T *)rex.reg_mmatch->regprog; line = reg_getline((linenr_T)0); /* relative to the cursor */ - reg_startpos = reg_mmatch->startpos; - reg_endpos = reg_mmatch->endpos; + rex.reg_startpos = rex.reg_mmatch->startpos; + rex.reg_endpos = rex.reg_mmatch->endpos; } else { - prog = (nfa_regprog_T *)reg_match->regprog; - reg_startp = reg_match->startp; - reg_endp = reg_match->endp; + prog = (nfa_regprog_T *)rex.reg_match->regprog; + rex.reg_startp = rex.reg_match->startp; + rex.reg_endp = rex.reg_match->endp; } /* Be paranoid... */ @@ -7112,16 +7113,16 @@ nfa_regexec_both( goto theend; } - /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ + /* If pattern contains "\c" or "\C": overrule value of rex.reg_ic */ if (prog->regflags & RF_ICASE) - ireg_ic = TRUE; + rex.reg_ic = TRUE; else if (prog->regflags & RF_NOICASE) - ireg_ic = FALSE; + rex.reg_ic = FALSE; #ifdef FEAT_MBYTE - /* If pattern contains "\Z" overrule value of ireg_icombine */ + /* If pattern contains "\Z" overrule value of rex.reg_icombine */ if (prog->regflags & RF_ICOMBINE) - ireg_icombine = TRUE; + rex.reg_icombine = TRUE; #endif regline = line; @@ -7160,14 +7161,14 @@ nfa_regexec_both( * Nothing else to try. Doesn't handle combining chars well. */ if (prog->match_text != NULL #ifdef FEAT_MBYTE - && !ireg_icombine + && !rex.reg_icombine #endif ) return find_match_text(col, prog->regstart, prog->match_text); } /* If the start column is past the maximum column: no need to try. */ - if (ireg_maxcol > 0 && col >= ireg_maxcol) + if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) goto theend; nstate = prog->nstate; @@ -7326,17 +7327,17 @@ nfa_regexec_nl( colnr_T col, /* column to start looking for match */ int line_lbr) { - reg_match = rmp; - reg_mmatch = NULL; - reg_maxline = 0; - reg_line_lbr = line_lbr; - reg_buf = curbuf; - reg_win = NULL; - ireg_ic = rmp->rm_ic; + rex.reg_match = rmp; + rex.reg_mmatch = NULL; + rex.reg_maxline = 0; + rex.reg_line_lbr = line_lbr; + rex.reg_buf = curbuf; + rex.reg_win = NULL; + rex.reg_ic = rmp->rm_ic; #ifdef FEAT_MBYTE - ireg_icombine = FALSE; + rex.reg_icombine = FALSE; #endif - ireg_maxcol = 0; + rex.reg_maxcol = 0; return nfa_regexec_both(line, col, NULL); } @@ -7375,18 +7376,18 @@ nfa_regexec_multi( colnr_T col, /* column to start looking for match */ proftime_T *tm) /* timeout limit or NULL */ { - reg_match = NULL; - reg_mmatch = rmp; - reg_buf = buf; - reg_win = win; - reg_firstlnum = lnum; - reg_maxline = reg_buf->b_ml.ml_line_count - lnum; - reg_line_lbr = FALSE; - ireg_ic = rmp->rmm_ic; + rex.reg_match = NULL; + rex.reg_mmatch = rmp; + rex.reg_buf = buf; + rex.reg_win = win; + rex.reg_firstlnum = lnum; + rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; + rex.reg_line_lbr = FALSE; + rex.reg_ic = rmp->rmm_ic; #ifdef FEAT_MBYTE - ireg_icombine = FALSE; + rex.reg_icombine = FALSE; #endif - ireg_maxcol = rmp->rmm_maxcol; + rex.reg_maxcol = rmp->rmm_maxcol; return nfa_regexec_both(NULL, col, tm); } |