summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--posix/PCRE.tests19
-rw-r--r--posix/regcomp.c79
-rw-r--r--posix/regex_internal.c25
-rw-r--r--posix/regex_internal.h2
5 files changed, 91 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index 8b1a34542b..0267ce381b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2004-11-23 Paolo Bonzini <bonzini@gnu.org>
+
+ * posix/regcomp.c (analyze_tree): Always call calc_epsdest.
+ (calc_inveclosure): Use re_node_set_insert_last.
+ (parse_dup_op): Lower X{1,5} to (X(X(X(XX?)?)?)?)?
+ rather than X?X?X?X?X?.
+ * posix/regex_internal.h (re_node_set_insert_last): New declaration.
+ * posix/regex_internal.c (re_node_set_insert_last): New function.
+ * posix/PCRE.tests: Add testcases.
+
2004-11-25 Ulrich Drepper <drepper@redhat.com>
* dlfcn/dlfcn.h: Remove nonnull attribute from dlopen.
diff --git a/posix/PCRE.tests b/posix/PCRE.tests
index 7ea5b9e70c..0fb9cadafc 100644
--- a/posix/PCRE.tests
+++ b/posix/PCRE.tests
@@ -2365,3 +2365,22 @@ No match
0: bc123bc
1: bc
2: bc
+
+/^a{2,5}$/
+ aa
+ 0: aa
+ aaa
+ 0: aaa
+ aaaa
+ 0: aaaa
+ aaaaa
+ 0: aaaaa
+ *** Failers
+No match
+ a
+No match
+ b
+No match
+ aaaaab
+No match
+ aaaaaa
diff --git a/posix/regcomp.c b/posix/regcomp.c
index dafad9bd0c..675f816f60 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1269,8 +1269,8 @@ analyze_tree (dfa, node)
calc_first (dfa, node);
if (node->next == -1)
calc_next (dfa, node);
- if (node->eclosure.nelem == 0)
- calc_epsdest (dfa, node);
+ calc_epsdest (dfa, node);
+
/* Calculate "first" etc. for the left child. */
if (node->left != NULL)
{
@@ -1626,7 +1626,7 @@ calc_inveclosure (dfa)
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
{
dest = dfa->eclosures[src].elems[idx];
- re_node_set_insert (dfa->inveclosures + dest, src);
+ re_node_set_insert_last (dfa->inveclosures + dest, src);
}
}
}
@@ -2538,7 +2538,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
reg_errcode_t *err;
{
re_token_t dup_token;
- bin_tree_t *tree = NULL;
+ bin_tree_t *tree = NULL, *old_tree = NULL;
int i, start, end, start_idx = re_string_cur_idx (regexp);
re_token_t start_token = *token;
@@ -2598,12 +2598,14 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
}
+ fetch_token (token, regexp, syntax);
+
/* Treat "<re>{0}*" etc. as "<re>{0}". */
- if (BE (elem == NULL, 0))
- start = end = 0;
+ if (BE (elem == NULL || (start == 0 && end == 0), 0))
+ return NULL;
/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
- else if (BE (start > 0, 0))
+ if (BE (start > 0, 0))
{
tree = elem;
for (i = 2; i <= start; ++i)
@@ -2613,52 +2615,41 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
if (BE (elem == NULL || tree == NULL, 0))
goto parse_dup_op_espace;
}
- }
- if (BE (end != start, 1))
- {
- dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
- if (BE (start > 0, 0))
- {
- elem = duplicate_tree (elem, dfa);
- if (BE (elem == NULL, 0))
- goto parse_dup_op_espace;
+ if (start == end)
+ return tree;
- /* This subexpression will be marked as optional, so that
- empty matches do not touch the registers. */
- mark_opt_subexp (elem, dfa);
+ /* Duplicate ELEM before it is marked optional. */
+ elem = duplicate_tree (elem, dfa);
+ old_tree = tree;
+ }
+ else
+ old_tree = NULL;
- /* Prepare the tree with the modifier. */
- elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
- tree = create_tree (dfa, tree, elem, CONCAT, 0);
- }
- else
- {
- /* We do not need to duplicate the tree because we have not
- created it yet. */
- mark_opt_subexp (elem, dfa);
- tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
- }
+ mark_opt_subexp (elem, dfa);
+ dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
+ tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+ /* This loop is actually executed only when end != -1,
+ to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
+ already created the start+1-th copy. */
+ for (i = start + 2; i <= end; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
if (BE (elem == NULL || tree == NULL, 0))
goto parse_dup_op_espace;
- /* This loop is actually executed only when end != -1,
- to rewrite <re>{0,n} as <re>?<re>?<re>?... We have
- already created the start+1-th copy. */
- for (i = start + 2; i <= end; ++i)
- {
- elem = duplicate_tree (elem, dfa);
- tree = create_tree (dfa, tree, elem, CONCAT, 0);
- if (BE (elem == NULL || tree == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
+ tree = re_dfa_add_tree_node (dfa, tree, NULL, &dup_token);
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
}
- fetch_token (token, regexp, syntax);
+ if (old_tree)
+ tree = create_tree (dfa, old_tree, tree, CONCAT, 0);
+
return tree;
parse_dup_op_espace:
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index bb1d73d9a0..cb439e5d7c 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1250,6 +1250,31 @@ re_node_set_insert (set, elem)
return 1;
}
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have any element greater than or equal to ELEM.
+ Return -1 if an error is occured, return 1 otherwise. */
+
+static int
+re_node_set_insert_last (set, elem)
+ re_node_set *set;
+ int elem;
+{
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_array;
+ set->alloc = (set->alloc + 1) * 2;
+ new_array = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_array == NULL, 0))
+ return -1;
+ set->elems = new_array;
+ }
+
+ /* Insert the new element. */
+ set->elems[set->nelem++] = elem;
+ return 1;
+}
+
/* Compare two node sets SET1 and SET2.
return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index a778032d77..703d409eb8 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -668,6 +668,8 @@ static reg_errcode_t re_node_set_init_union (re_node_set *dest,
static reg_errcode_t re_node_set_merge (re_node_set *dest,
const re_node_set *src) internal_function;
static int re_node_set_insert (re_node_set *set, int elem) internal_function;
+static int re_node_set_insert_last (re_node_set *set,
+ int elem) internal_function;
static int re_node_set_compare (const re_node_set *set1,
const re_node_set *set2) internal_function;
static int re_node_set_contains (const re_node_set *set, int elem) internal_function;