diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-11-24 19:30:51 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-11-24 19:30:51 +0000 |
commit | 65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa (patch) | |
tree | 119234eb952b9bd87c68ceb03f68826d4bbad4de /posix/regex_internal.h | |
parent | 951d64082330765a22da6beac6e067ec054605e7 (diff) | |
download | glibc-65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa.tar.gz |
Update.
2003-11-24 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.h (re_token_t): Add word_char bit. Add
comment.
(re_dfa_t): Add sb_char field.
(bitset_mask): New function.
* posix/regcomp.c (free_dfa_content): Free sb_char.
(init_dfa): Don't initialize word_char unnecessarily.
Initialize sb_char.
(duplicate_node): Don't duplicate !word_char CHARACTERs with
NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with
NEXT_NOTWORD_CONSTRAINT. Return -1 in *new_idx instead.
(duplicate_node_closure): Handle clone_dest == -1 from
duplicate_node.
(peek_token): Initialize word_char bit.
(parse_expression, parse_dup_op): Add comments.
(parse_bracket_exp): Don't set bitmask bits for multi-byte char
starting bytes here at the beginning. Mask off the bits right
before creating SIMPLE_BRACKET.
(build_charclass_op): Likewise.
* posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only
set accept bits for single-byte characters.
(group_nodes_into_DFAstates): Don't rely on characters 0 .. 127
being single byte encoded and the rest multi-byte.
* posix/bug-regex19.c (tests): Add new tests.
(do_mb_tests): Initialize t to *test.
(main): Fail even on do_mb_tests errors.
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r-- | posix/regex_internal.h | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 5111f6d793..f8e99ee06a 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -133,6 +133,7 @@ typedef unsigned int *re_bitset_ptr_t; static inline void bitset_not (bitset set); static inline void bitset_merge (bitset dest, const bitset src); static inline void bitset_not_merge (bitset dest, const bitset src); +static inline void bitset_mask (bitset dest, const bitset src); #define PREV_WORD_CONSTRAINT 0x0001 #define PREV_NOTWORD_CONSTRAINT 0x0002 @@ -281,8 +282,11 @@ typedef struct unsigned int constraint : 10; /* context constraint */ unsigned int duplicated : 1; #ifdef RE_ENABLE_I18N + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ unsigned int mb_partial : 1; #endif + unsigned int word_char : 1; } re_token_t; #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) @@ -601,6 +605,7 @@ struct re_dfa_t re_dfastate_t *init_state_begbuf; bin_tree_t *str_tree; bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; int str_tree_storage_idx; /* number of subexpressions `re_nsub' is in regex_t. */ @@ -711,6 +716,16 @@ bitset_not_merge (dest, src) dest[i] |= ~src[i]; } +static inline void +bitset_mask (dest, src) + bitset dest; + const bitset src; +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + #if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES /* Inline functions for re_string. */ static inline int |