summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c169
1 files changed, 103 insertions, 66 deletions
diff --git a/regexec.c b/regexec.c
index 01fbe055..aee502d2 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -390,8 +390,7 @@ re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
else
str = string1;
- rval = re_search_stub (bufp, str, len, start, range, stop, regs,
- ret_len);
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
if (free_str)
re_free ((char *) str);
return rval;
@@ -506,9 +505,14 @@ re_copy_regs (regs, pmatch, nregs, regs_allocated)
if (regs_allocated == REGS_UNALLOCATED)
{ /* No. So allocate them with malloc. */
regs->start = re_malloc (regoff_t, need_regs);
- regs->end = re_malloc (regoff_t, need_regs);
- if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ if (BE (regs->start == NULL, 0))
return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
regs->num_regs = need_regs;
}
else if (regs_allocated == REGS_REALLOCATE)
@@ -518,9 +522,15 @@ re_copy_regs (regs, pmatch, nregs, regs_allocated)
if (BE (need_regs > regs->num_regs, 0))
{
regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
- regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
- if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ regoff_t *new_end;
+ if (BE (new_start == NULL, 0))
return REGS_UNALLOCATED;
+ new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_end == NULL, 0))
+ {
+ re_free (new_start);
+ return REGS_UNALLOCATED;
+ }
regs->start = new_start;
regs->end = new_end;
regs->num_regs = need_regs;
@@ -662,7 +672,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|| !preg->newline_anchor))
{
if (start != 0 && start + range != 0)
- return REG_NOMATCH;
+ return REG_NOMATCH;
start = range = 0;
}
@@ -687,6 +697,13 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
{
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+
mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
if (BE (mctx.state_log == NULL, 0))
{
@@ -794,10 +811,10 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
break;
match_first += incr;
if (match_first < left_lim || match_first > right_lim)
- {
- err = REG_NOMATCH;
- goto free_return;
- }
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
}
break;
}
@@ -911,14 +928,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
}
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
- if (dfa->subexp_map[reg_idx] != reg_idx)
- {
- pmatch[reg_idx + 1].rm_so
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
- pmatch[reg_idx + 1].rm_eo
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
- }
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
}
free_return:
@@ -944,6 +961,11 @@ prune_impossible_nodes (mctx)
#endif
match_last = mctx->match_last;
halt_node = mctx->last_node;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+ return REG_ESPACE;
+
sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
if (BE (sifted_states == NULL, 0))
{
@@ -1102,7 +1124,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
{
err = transit_state_bkref (mctx, &cur_state->nodes);
if (BE (err != REG_NOERROR, 0))
- return err;
+ return err;
}
}
}
@@ -1129,16 +1151,16 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
if (BE (next_char_idx >= mctx->input.bufs_len, 0)
- || (BE (next_char_idx >= mctx->input.valid_len, 0)
- && mctx->input.valid_len < mctx->input.len))
- {
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
return -2;
}
- }
+ }
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
@@ -1257,20 +1279,20 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
int candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
- if (dest_node == -1)
+ if (dest_node == -1)
dest_node = candidate;
- else
+ else
{
/* In order to avoid infinite loop like "(a*)*", return the second
- epsilon-transition if the first was already considered. */
+ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node))
- return candidate;
+ return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ eps_via_nodes))
return -2;
/* We know we are going to exit. */
@@ -1620,7 +1642,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
if (mctx->state_log[str_idx])
{
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
goto free_return;
}
@@ -1816,10 +1838,14 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
{
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
if (BE (err != REG_NOERROR, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
for (i = 0; i < dest_nodes->nelem; i++)
- re_node_set_merge (&state->inveclosure,
- dfa->inveclosures + dest_nodes->elems[i]);
+ {
+ err = re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ }
}
return re_node_set_add_intersect (dest_nodes, candidates,
&state->inveclosure);
@@ -1931,7 +1957,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
- {
+ {
int dst, cpos;
if (ent->node != node)
@@ -1952,9 +1978,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
if (dst == from_node)
{
if (boundaries & 1)
- return -1;
+ return -1;
else /* if (boundaries & 2) */
- return 0;
+ return 0;
}
cpos =
@@ -1968,7 +1994,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
if (subexp_idx < BITSET_WORD_BITS)
ent->eps_reachable_subexps_map
&= ~((bitset_word_t) 1 << subexp_idx);
- }
+ }
while (ent++->more);
}
break;
@@ -2193,7 +2219,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* mctx->bkref_ents may have changed, reload the pointer. */
- entry = mctx->bkref_ents + enabled_idx;
+ entry = mctx->bkref_ents + enabled_idx;
}
while (enabled_idx++, entry++->more);
}
@@ -2274,7 +2300,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
trtable = state->word_trtable;
if (BE (trtable != NULL, 1))
- {
+ {
unsigned int context;
context
= re_string_context_at (&mctx->input,
@@ -2320,21 +2346,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
unsigned int context;
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
+ }
else
- next_nodes = *log_nodes;
+ next_nodes = *log_nodes;
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
@@ -2342,12 +2368,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
re_string_cur_idx (&mctx->input) - 1,
mctx->eflags);
next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
+ this function is next_state and ERR is already set. */
if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
+ re_node_set_free (&next_nodes);
}
if (BE (dfa->nbackref, 0) && next_state != NULL)
@@ -2388,9 +2414,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
do
{
- if (++cur_str_idx > max)
- return NULL;
- re_string_skip_bytes (&mctx->input, 1);
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
}
while (mctx->state_log[cur_str_idx] == NULL);
@@ -2497,7 +2523,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
re_dfastate_t *dest_state;
if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
+ continue;
if (dfa->nodes[cur_node_idx].constraint)
{
@@ -2678,7 +2704,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
do
- if (entry->node == bkref_node)
+ if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */
while (entry++->more);
}
@@ -3033,6 +3059,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
const re_dfa_t *const dfa = mctx->dfa;
int result;
int cur_idx;
+ reg_errcode_t err = REG_NOERROR;
re_node_set union_set;
re_node_set_init_empty (&union_set);
for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
@@ -3047,7 +3074,6 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
/* If the node may accept `multi byte'. */
if (dfa->nodes[cur_node].accept_mb)
{
- reg_errcode_t err = REG_NOERROR;
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
str_idx);
if (naccepted > 1)
@@ -3354,6 +3380,13 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
if (BE (err != REG_NOERROR, 0))
goto out_free;
+ /* Avoid arithmetic overflow in size calculation. */
+ if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
+ / (3 * sizeof (re_dfastate_t *)))
+ < ndests),
+ 0))
+ goto out_free;
+
#ifdef HAVE_ALLOCA
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ ndests * 3 * sizeof (re_dfastate_t *)))
@@ -3564,13 +3597,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
- {
+ {
memset (accepts, '\xff', sizeof (bitset_t) / 2);
if (!(dfa->syntax & RE_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
- }
+ }
#endif
else
continue;
@@ -3776,7 +3809,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
if (node->type == OP_PERIOD)
{
if (char_len <= 1)
- return 0;
+ return 0;
/* FIXME: I don't think this if is needed, as both '\n'
and '\0' are char_len == 1. */
/* '.' accepts any one character except the following two cases. */
@@ -4027,18 +4060,18 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
{
case CHARACTER:
if (node->opr.c != ch)
- return 0;
+ return 0;
break;
case SIMPLE_BRACKET:
if (!bitset_contain (node->opr.sbcset, ch))
- return 0;
+ return 0;
break;
#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= 0x80)
- return 0;
+ return 0;
/* FALLTHROUGH */
#endif
case OP_PERIOD:
@@ -4073,6 +4106,10 @@ extend_buffers (re_match_context_t *mctx)
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
+ /* Avoid overflow. */
+ if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+ return REG_ESPACE;
+
/* Double the lengthes of the buffers. */
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
if (BE (ret != REG_NOERROR, 0))