summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2004-12-31 10:11:49 +0000
committerPaolo Bonzini <bonzini@gnu.org>2008-01-09 16:11:41 +0100
commit32cf31f2ba6fbeb61740e3ad40005e5052317ad5 (patch)
tree5250f6d1c1126579eec301f488a8e99dc7a3a880 /lib
parent5135bf48145a3def165d6e558ca715b5235eb3a9 (diff)
downloadsed-32cf31f2ba6fbeb61740e3ad40005e5052317ad5.tar.gz
compute inveclosure only if actually needed.
2004-12-31 Paolo Bonzini <bonzini@gnu.org> * regcomp.c (calc_inveclosure): Return reg_errcode_t. Initialize the node sets in dfa->inveclosures. (analyze): Initialize inveclosures only if it is needed. Check errors from calc_inveclosure. * regex_internal.c (re_dfa_add_node): Do not initialize the inveclosure node set. * regexec.c (re_search_internal): If nmatch includes unused subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here. Limit to preg->re_nsubs the nmatch that is passed to set_regs. (set_regs): Remove the real_nmatch variable, the nmatch parameter now is <= preg->re_nsubs. git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-34
Diffstat (limited to 'lib')
-rw-r--r--lib/regcomp.c33
-rw-r--r--lib/regex_internal.c9
-rw-r--r--lib/regexec.c22
3 files changed, 41 insertions, 23 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c
index f00b8bf..7f4f63b 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -58,7 +58,7 @@ static int search_duplicated_node (re_dfa_t *dfa, int org_node,
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
int node, int root);
-static void calc_inveclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
static void fetch_token (re_token_t *result, re_string_t *input,
@@ -1132,9 +1132,8 @@ analyze (preg)
dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
- dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
- || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
+ || dfa->eclosures == NULL, 0))
return REG_ESPACE;
dfa->subexp_map = re_malloc (int, preg->re_nsub);
@@ -1167,7 +1166,18 @@ analyze (preg)
ret = calc_eclosure (dfa);
if (BE (ret != REG_NOERROR, 0))
return ret;
- calc_inveclosure (dfa);
+
+ /* We only need this during the prune_impossible_nodes pass in regexec.c;
+ do not compute it otherwise since it can become quadratic. */
+ if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+ if (BE (dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ ret = calc_inveclosure (dfa);
+ }
+
return ret;
}
@@ -1600,19 +1610,26 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
return REG_NOERROR;
}
-static void
+static reg_errcode_t
calc_inveclosure (dfa)
re_dfa_t *dfa;
{
- int src, idx, dest;
+ int src, idx, ret;
+ for (idx = 0; idx < dfa->nodes_len; ++idx)
+ re_node_set_init_empty (dfa->inveclosures + idx);
+
for (src = 0; src < dfa->nodes_len; ++src)
{
+ int *elems = dfa->eclosures[src].elems;
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
{
- dest = dfa->eclosures[src].elems[idx];
- re_node_set_insert_last (dfa->inveclosures + dest, src);
+ ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
}
}
+
+ return REG_NOERROR;
}
/* Calculate "eclosure" for all the node in DFA. */
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index dbd3f24..c3295a8 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1339,7 +1339,7 @@ re_dfa_add_node (dfa, token)
{
int new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices;
- re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+ re_node_set *new_edests, *new_eclosures;
re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
new_nodes_alloc);
@@ -1350,17 +1350,13 @@ re_dfa_add_node (dfa, token)
new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
- new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
- new_nodes_alloc);
if (BE (new_nexts == NULL || new_indices == NULL
- || new_edests == NULL || new_eclosures == NULL
- || new_inveclosures == NULL, 0))
+ || new_edests == NULL || new_eclosures == NULL, 0))
return -1;
dfa->nexts = new_nexts;
dfa->org_indices = new_indices;
dfa->edests = new_edests;
dfa->eclosures = new_eclosures;
- dfa->inveclosures = new_inveclosures;
dfa->nodes_alloc = new_nodes_alloc;
}
dfa->nodes[dfa->nodes_len] = token;
@@ -1372,7 +1368,6 @@ re_dfa_add_node (dfa, token)
dfa->nexts[dfa->nodes_len] = -1;
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
- re_node_set_init_empty (dfa->inveclosures + dfa->nodes_len);
return dfa->nodes_len++;
}
diff --git a/lib/regexec.c b/lib/regexec.c
index dde600b..cd5a76b 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -605,6 +605,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int left_lim, right_lim, incr;
int fl_longest_match, match_first, match_kind, match_last = -1;
+ int extra_nmatch;
int sb, ch;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_match_context_t mctx = { .dfa = dfa };
@@ -620,6 +621,9 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
mctx.dfa = dfa;
#endif
+ extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+ nmatch -= extra_nmatch;
+
/* Check if the DFA haven't been compiled. */
if (BE (preg->used == 0 || dfa->init_state == NULL
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
@@ -882,11 +886,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
pmatch[reg_idx].rm_so += match_first;
pmatch[reg_idx].rm_eo += match_first;
}
+ for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+ {
+ pmatch[nmatch + reg_idx].rm_so = -1;
+ pmatch[nmatch + reg_idx].rm_eo = -1;
+ }
if (dfa->subexp_map)
- for (reg_idx = 0;
- reg_idx + 1 < nmatch && reg_idx < preg->re_nsub;
- reg_idx++)
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
if (dfa->subexp_map[reg_idx] != reg_idx)
{
pmatch[reg_idx + 1].rm_so
@@ -1371,7 +1378,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
int fl_backtrack;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int idx, cur_node, real_nmatch;
+ int idx, cur_node;
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
struct re_fail_stack_t fs_body = { 0, 2, NULL };
@@ -1392,15 +1399,14 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
fs = NULL;
cur_node = dfa->init_node;
- real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
re_node_set_init_empty (&eps_via_nodes);
- prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
- memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
+ prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
{
- update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
{