diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-11-26 03:24:15 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-11-26 03:24:15 +0000 |
commit | 97fd3a3003b9eb980395417ffb104e02bf315fe8 (patch) | |
tree | a182a950fc4f7d23c60dac89ee85bad2aae696c9 /posix/regex_internal.c | |
parent | 65e6becf5b1b9ca1e911986d030b8b31b5dd4cfa (diff) | |
download | glibc-97fd3a3003b9eb980395417ffb104e02bf315fe8.tar.gz |
Update.
2003-11-25 Ulrich Drepper <drepper@redhat.com>
* posix/runptests.c (main): Make errors fatal.
* posix/PTESTS: One test in GA135 and GA136 check functionality
which seems not guaranteed.
2003-11-25 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (re_search_internal): If prune_impossible_nodes
returned REG_NOMATCH, set match_last to -1. Don't initialize
pmatch[0] needlessly. Fix comment.
(prune_impossible_nodes): Don't segfault on NULL state_log entry.
(set_regs): Fix comment.
* posix/regcomp.c (parse_bracket_exp): Only set has_plural_match
if adding both SIMPLE_BRACKET and COMPLEX_BRACKET.
(build_charclass_op): Set has_plural_match if adding both
SIMPLE_BRACKET and COMPLEX_BRACKET.
* posix/bug-regex11.c (tests): Fix register values for one commented
out test. Add new tests.
* posix/regex_internal.c (re_string_allocate): Make sure init_len
is at least dfa->mb_cur_max.
(re_string_reconstruct): If is_utf8, don't fall back into
re_string_skip_chars just because idx points into a middle of
valid UTF-8 character. Instead, set the wcs bytes which correspond
to the partial character bytes to WEOF.
* posix/regexec.c (re_search_internal): Allocate input.bufs_len + 1
instead of dfa->nodes_len + 1 state_log entries initially.
* posix/bug-regex20.c (main): Uncomment backwards case insensitive
tests.
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r-- | posix/regex_internal.c | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 8b68bd62cb..f78ec79e65 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -55,7 +55,12 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) const re_dfa_t *dfa; { reg_errcode_t ret; - int init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; re_string_construct_common (str, len, pstr, trans, icase, dfa); pstr->stop = pstr->len; @@ -516,33 +521,33 @@ re_string_reconstruct (pstr, idx, eflags, newline) /* Special case UTF-8. Multi-byte chars start with any byte other than 0x80 - 0xbf. */ raw = pstr->raw_mbs + pstr->raw_mbs_idx; - end = raw + (pstr->valid_len > offset - pstr->mb_cur_max - ? pstr->valid_len : offset - pstr->mb_cur_max); + end = raw + (offset - pstr->mb_cur_max); for (p = raw + offset - 1; p >= end; --p) if ((*p & 0xc0) != 0x80) { mbstate_t cur_state; wchar_t wc2; + int mlen; /* XXX Don't use mbrtowc, we know which conversion to use (UTF-8 -> UCS4). */ memset (&cur_state, 0, sizeof (cur_state)); - if (mbrtowc (&wc2, p, raw + offset - p, &cur_state) - == raw + offset - p) + mlen = mbrtowc (&wc2, p, raw + pstr->len - p, + &cur_state) - (raw + offset - p); + if (mlen >= 0) { memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + pstr->valid_len = mlen; wc = wc2; } break; } } if (wc == WEOF) - { - pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; - for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) - pstr->wcs[wcs_idx] = WEOF; - } + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; if (pstr->trans && wc <= 0xff) wc = pstr->trans[wc]; pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD |