diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2004-12-01 13:11:12 +0000 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2008-01-09 16:11:36 +0100 |
commit | 52e232ab96bd80a26559c865ca1e274a0167c13d (patch) | |
tree | 9270184d3e2cad2938406773f747fec6974c344f /lib | |
parent | 8a00600bfc766f70f82f477e38e0de72a046fb15 (diff) | |
download | sed-52e232ab96bd80a26559c865ca1e274a0167c13d.tar.gz |
do not make single-byte searches go through slow MBCS paths
2004-01-12 Paolo Bonzini <bonzini@gnu.org>
* posix/regcomp.c (peek_token, parse_bracket_exp, build_charclass_op)
[RE_ENABLE_I18N]: Initialize the tokens' accept_mb field.
* posix/regex_internal.c (create_ci_newstate, create_cd_newstate)
[RE_ENABLE_I18N]: Initialize the DFA state's accept_mb field
from the nodes'.
* posix/regex_internal.h (re_token_t) [RE_ENABLE_I18N]: Add the
accept_mb bitfield.
(ACCEPT_MB_NODE): Remove.
* posix/regexec.c (transit_state_mb, check_arrival_add_next_nodes,
proceed_next_node, build_sifted_states): Look at the nodes' accept_mb
field.
git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-26
Diffstat (limited to 'lib')
-rw-r--r-- | lib/regcomp.c | 6 | ||||
-rw-r--r-- | lib/regex_internal.c | 23 | ||||
-rw-r--r-- | lib/regex_internal.h | 3 | ||||
-rw-r--r-- | lib/regexec.c | 8 |
4 files changed, 20 insertions, 20 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c index 5de5bf7..fb8f210 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -1788,6 +1788,7 @@ peek_token (token, input, syntax) token->word_char = 0; #ifdef RE_ENABLE_I18N + token->accept_mb = 0; token->mb_partial = 0; if (input->mb_cur_max > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) @@ -2383,6 +2384,9 @@ parse_expression (regexp, preg, token, syntax, nest, err) fetch_token (token, regexp, syntax); return tree; case OP_PERIOD: +#ifdef RE_ENABLE_I18N + token->accept_mb = dfa->mb_cur_max > 1; +#endif tree = re_dfa_add_tree_node (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { @@ -3314,6 +3318,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return work_tree; } br_token.type = COMPLEX_BRACKET; + br_token.accept_mb = 1; br_token.opr.mbcset = mbcset; mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); if (BE (mbc_tree == NULL, 0)) @@ -3704,6 +3709,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err) bin_tree_t *mbc_tree; /* Build a tree for complex bracket. */ br_token.type = COMPLEX_BRACKET; + br_token.accept_mb = 1; br_token.opr.mbcset = mbcset; dfa->has_mb_node = 1; mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); diff --git a/lib/regex_internal.c b/lib/regex_internal.c index 00382fe..6fd92f7 100644 --- a/lib/regex_internal.c +++ b/lib/regex_internal.c @@ -1560,16 +1560,13 @@ create_ci_newstate (dfa, nodes, hash) re_token_type_t type = node->type; if (type == CHARACTER && !node->constraint) continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ /* If the state has the halt node, the state is a halt state. */ - else if (type == END_OF_RE) + if (type == END_OF_RE) newstate->halt = 1; -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET - || type == OP_UTF8_PERIOD - || (type == OP_PERIOD && dfa->mb_cur_max > 1)) - newstate->accept_mb = 1; -#endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR || node->constraint) @@ -1613,15 +1610,13 @@ create_cd_newstate (dfa, nodes, context, hash) if (type == CHARACTER && !constraint) continue; - /* If the state has the halt node, the state is a halt state. */ - else if (type == END_OF_RE) - newstate->halt = 1; #ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET - || type == OP_UTF8_PERIOD - || (type == OP_PERIOD && dfa->mb_cur_max > 1)) - newstate->accept_mb = 1; + newstate->accept_mb |= node->accept_mb; #endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR) diff --git a/lib/regex_internal.h b/lib/regex_internal.h index 658246a..5a1f839 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -284,6 +284,7 @@ typedef struct unsigned int duplicated : 1; unsigned int opt_subexp : 1; #ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; /* These 2 bits can be moved into the union if needed (e.g. if running out of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ unsigned int mb_partial : 1; @@ -292,8 +293,6 @@ typedef struct } re_token_t; #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) -#define ACCEPT_MB_NODE(type) \ - ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD) struct re_string_t { diff --git a/lib/regexec.c b/lib/regexec.c index dd5591c..200dd55 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1254,7 +1254,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) re_token_type_t type = dfa->nodes[node].type; #ifdef RE_ENABLE_I18N - if (ACCEPT_MB_NODE (type)) + if (dfa->nodes[node].accept_mb) naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); else #endif /* RE_ENABLE_I18N */ @@ -1620,7 +1620,7 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest) continue; #ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ - if (ACCEPT_MB_NODE (type)) + if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx); #endif /* RE_ENABLE_I18N */ @@ -2476,7 +2476,7 @@ transit_state_mb (mctx, pstate) } /* How many bytes the node can accept? */ - if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type)) + if (dfa->nodes[cur_node_idx].accept_mb) naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, re_string_cur_idx (&mctx->input)); if (naccepted == 0) @@ -3015,7 +3015,7 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes) continue; #ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ - if (ACCEPT_MB_NODE (type)) + if (dfa->nodes[cur_node].accept_mb) { naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, str_idx); |