summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2004-12-01 13:11:12 +0000
committerPaolo Bonzini <bonzini@gnu.org>2008-01-09 16:11:36 +0100
commit52e232ab96bd80a26559c865ca1e274a0167c13d (patch)
tree9270184d3e2cad2938406773f747fec6974c344f /lib
parent8a00600bfc766f70f82f477e38e0de72a046fb15 (diff)
downloadsed-52e232ab96bd80a26559c865ca1e274a0167c13d.tar.gz
do not make single-byte searches go through slow MBCS paths
2004-01-12 Paolo Bonzini <bonzini@gnu.org> * posix/regcomp.c (peek_token, parse_bracket_exp, build_charclass_op) [RE_ENABLE_I18N]: Initialize the tokens' accept_mb field. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate) [RE_ENABLE_I18N]: Initialize the DFA state's accept_mb field from the nodes'. * posix/regex_internal.h (re_token_t) [RE_ENABLE_I18N]: Add the accept_mb bitfield. (ACCEPT_MB_NODE): Remove. * posix/regexec.c (transit_state_mb, check_arrival_add_next_nodes, proceed_next_node, build_sifted_states): Look at the nodes' accept_mb field. git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-26
Diffstat (limited to 'lib')
-rw-r--r--lib/regcomp.c6
-rw-r--r--lib/regex_internal.c23
-rw-r--r--lib/regex_internal.h3
-rw-r--r--lib/regexec.c8
4 files changed, 20 insertions, 20 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c
index 5de5bf7..fb8f210 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1788,6 +1788,7 @@ peek_token (token, input, syntax)
token->word_char = 0;
#ifdef RE_ENABLE_I18N
+ token->accept_mb = 0;
token->mb_partial = 0;
if (input->mb_cur_max > 1 &&
!re_string_first_byte (input, re_string_cur_idx (input)))
@@ -2383,6 +2384,9 @@ parse_expression (regexp, preg, token, syntax, nest, err)
fetch_token (token, regexp, syntax);
return tree;
case OP_PERIOD:
+#ifdef RE_ENABLE_I18N
+ token->accept_mb = dfa->mb_cur_max > 1;
+#endif
tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
@@ -3314,6 +3318,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
return work_tree;
}
br_token.type = COMPLEX_BRACKET;
+ br_token.accept_mb = 1;
br_token.opr.mbcset = mbcset;
mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
if (BE (mbc_tree == NULL, 0))
@@ -3704,6 +3709,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
bin_tree_t *mbc_tree;
/* Build a tree for complex bracket. */
br_token.type = COMPLEX_BRACKET;
+ br_token.accept_mb = 1;
br_token.opr.mbcset = mbcset;
dfa->has_mb_node = 1;
mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 00382fe..6fd92f7 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1560,16 +1560,13 @@ create_ci_newstate (dfa, nodes, hash)
re_token_type_t type = node->type;
if (type == CHARACTER && !node->constraint)
continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
- else if (type == END_OF_RE)
+ if (type == END_OF_RE)
newstate->halt = 1;
-#ifdef RE_ENABLE_I18N
- else if (type == COMPLEX_BRACKET
- || type == OP_UTF8_PERIOD
- || (type == OP_PERIOD && dfa->mb_cur_max > 1))
- newstate->accept_mb = 1;
-#endif /* RE_ENABLE_I18N */
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR || node->constraint)
@@ -1613,15 +1610,13 @@ create_cd_newstate (dfa, nodes, context, hash)
if (type == CHARACTER && !constraint)
continue;
- /* If the state has the halt node, the state is a halt state. */
- else if (type == END_OF_RE)
- newstate->halt = 1;
#ifdef RE_ENABLE_I18N
- else if (type == COMPLEX_BRACKET
- || type == OP_UTF8_PERIOD
- || (type == OP_PERIOD && dfa->mb_cur_max > 1))
- newstate->accept_mb = 1;
+ newstate->accept_mb |= node->accept_mb;
#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR)
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 658246a..5a1f839 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -284,6 +284,7 @@ typedef struct
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
#ifdef RE_ENABLE_I18N
+ unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
@@ -292,8 +293,6 @@ typedef struct
} re_token_t;
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
-#define ACCEPT_MB_NODE(type) \
- ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
struct re_string_t
{
diff --git a/lib/regexec.c b/lib/regexec.c
index dd5591c..200dd55 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1254,7 +1254,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
re_token_type_t type = dfa->nodes[node].type;
#ifdef RE_ENABLE_I18N
- if (ACCEPT_MB_NODE (type))
+ if (dfa->nodes[node].accept_mb)
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
else
#endif /* RE_ENABLE_I18N */
@@ -1620,7 +1620,7 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
continue;
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
- if (ACCEPT_MB_NODE (type))
+ if (dfa->nodes[prev_node].accept_mb)
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
str_idx, sctx->last_str_idx);
#endif /* RE_ENABLE_I18N */
@@ -2476,7 +2476,7 @@ transit_state_mb (mctx, pstate)
}
/* How many bytes the node can accept? */
- if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+ if (dfa->nodes[cur_node_idx].accept_mb)
naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
re_string_cur_idx (&mctx->input));
if (naccepted == 0)
@@ -3015,7 +3015,7 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
continue;
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
- if (ACCEPT_MB_NODE (type))
+ if (dfa->nodes[cur_node].accept_mb)
{
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
str_idx);