diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-11-24 19:30:51 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-11-24 19:30:51 +0000 |
commit | 65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa (patch) | |
tree | 119234eb952b9bd87c68ceb03f68826d4bbad4de /posix/regexec.c | |
parent | 951d64082330765a22da6beac6e067ec054605e7 (diff) | |
download | glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.gz |
Update.
2003-11-24 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.h (re_token_t): Add word_char bit. Add
comment.
(re_dfa_t): Add sb_char field.
(bitset_mask): New function.
* posix/regcomp.c (free_dfa_content): Free sb_char.
(init_dfa): Don't initialize word_char unnecessarily.
Initialize sb_char.
(duplicate_node): Don't duplicate !word_char CHARACTERs with
NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with
NEXT_NOTWORD_CONSTRAINT. Return -1 in *new_idx instead.
(duplicate_node_closure): Handle clone_dest == -1 from
duplicate_node.
(peek_token): Initialize word_char bit.
(parse_expression, parse_dup_op): Add comments.
(parse_bracket_exp): Don't set bitmask bits for multi-byte char
starting bytes here at the beginning. Mask off the bits right
before creating SIMPLE_BRACKET.
(build_charclass_op): Likewise.
* posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only
set accept bits for single-byte characters.
(group_nodes_into_DFAstates): Don't rely on characters 0 .. 127
being single byte encoded and the rest multi-byte.
* posix/bug-regex19.c (tests): Add new tests.
(do_mb_tests): Initialize t to *test.
(main): Fail even on do_mb_tests errors.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r-- | posix/regexec.c | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/posix/regexec.c b/posix/regexec.c index 0b524856ca..58ac9c82c4 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -3341,7 +3341,12 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) } else if (type == OP_PERIOD) { - bitset_set_all (accepts); +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); if (!(preg->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (preg->syntax & RE_DOT_NOT_NULL) @@ -3362,8 +3367,6 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) match it the context. */ if (constraint) { - int word_char_max; - if (constraint & NEXT_NEWLINE_CONSTRAINT) { int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); @@ -3379,16 +3382,28 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) continue; } - /* This assumes ASCII compatible locale. We cannot say - anything about the non-ascii chars. */ - word_char_max - = dfa->mb_cur_max > 1 ? BITSET_UINTS / 2 : BITSET_UINTS; if (constraint & NEXT_WORD_CONSTRAINT) - for (j = 0; j < word_char_max; ++j) - accepts[j] &= dfa->word_char[j]; + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_UINTS; ++j) + accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]); + else +#endif + for (j = 0; j < BITSET_UINTS; ++j) + accepts[j] &= dfa->word_char[j]; + } if (constraint & NEXT_NOTWORD_CONSTRAINT) - for (j = 0; j < word_char_max; ++j) - accepts[j] &= ~dfa->word_char[j]; + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_UINTS; ++j) + accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]); + else +#endif + for (j = 0; j < BITSET_UINTS; ++j) + accepts[j] &= ~dfa->word_char[j]; + } } /* Then divide `accepts' into DFA states, or create a new |