diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/dfasearch.c | 75 | ||||
-rw-r--r-- | src/grep.c | 282 | ||||
-rw-r--r-- | src/grep.h | 3 | ||||
-rw-r--r-- | src/kwsearch.c | 30 | ||||
-rw-r--r-- | src/kwset.c | 88 | ||||
-rw-r--r-- | src/kwset.h | 17 | ||||
-rw-r--r-- | src/pcresearch.c | 6 | ||||
-rw-r--r-- | src/search.h | 47 | ||||
-rw-r--r-- | src/searchutils.c | 22 |
9 files changed, 293 insertions, 277 deletions
diff --git a/src/dfasearch.c b/src/dfasearch.c index d6afa8d3..16758655 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -36,13 +36,13 @@ struct dfa_comp /* Regex compiled regexps. */ struct re_pattern_buffer *patterns; - size_t pcount; + idx_t pcount; struct re_registers regs; /* Number of compiled fixed strings known to exactly match the regexp. If kwsexec returns < kwset_exact_matches, then we don't need to call the regexp matcher at all. */ - ptrdiff_t kwset_exact_matches; + idx_t kwset_exact_matches; bool begline; }; @@ -80,9 +80,9 @@ kwsmusts (struct dfa_comp *dc) The kwset matcher will return the index of the matching string that it chooses. */ ++dc->kwset_exact_matches; - ptrdiff_t old_len = strlen (dm->must); - ptrdiff_t new_len = old_len + dm->begline + dm->endline; - char *must = xmalloc (new_len); + idx_t old_len = strlen (dm->must); + idx_t new_len = old_len + dm->begline + dm->endline; + char *must = ximalloc (new_len); char *mp = must; *mp = eolbyte; mp += dm->begline; @@ -108,7 +108,7 @@ kwsmusts (struct dfa_comp *dc) BS_SAFE is true of encodings where a backslash cannot appear as the last byte of a multibyte character. */ static bool _GL_ATTRIBUTE_PURE -possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe) +possible_backrefs_in_pattern (char const *keys, idx_t len, bool bs_safe) { /* Normally a backslash, but in an unsafe encoding this is a non-char value so that the comparison below always fails, because if there @@ -144,8 +144,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe) } static bool -regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, - ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits, +regex_compile (struct dfa_comp *dc, char const *p, idx_t len, + idx_t pcount, idx_t lineno, reg_syntax_t syntax_bits, bool syntax_only) { struct re_pattern_buffer pat0; @@ -154,7 +154,9 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, pat->allocated = 0; /* Do not use a fastmap with -i, to work around glibc Bug#20381. */ - pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1); + verify (UCHAR_MAX < IDX_MAX); + idx_t uchar_max = UCHAR_MAX; + pat->fastmap = (syntax_only | match_icase) ? NULL : ximalloc (uchar_max + 1); pat->translate = NULL; @@ -168,14 +170,17 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, return true; /* Emit a filename:lineno: prefix for patterns taken from files. */ - size_t pat_lineno; + idx_t pat_lineno; char const *pat_filename = lineno < 0 ? "" : pattern_file_name (lineno, &pat_lineno); if (*pat_filename == '\0') error (0, 0, "%s", err); else - error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err); + { + ptrdiff_t n = pat_lineno; + error (0, 0, "%s:%td: %s", pat_filename, n, err); + } return false; } @@ -185,7 +190,7 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len, Return a description of the compiled pattern. */ void * -GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, +GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits, bool exact) { char *motif; @@ -210,29 +215,30 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, dc->patterns = xmalloc (sizeof *dc->patterns); dc->patterns++; dc->pcount = 0; - size_t palloc = 1; + idx_t palloc = 1; char const *prev = pattern; /* Buffer containing back-reference-free patterns. */ char *buf = NULL; - ptrdiff_t buflen = 0; - size_t bufalloc = 0; + idx_t buflen = 0; + idx_t bufalloc = 0; - ptrdiff_t lineno = 0; + idx_t lineno = 0; do { char const *sep = rawmemchr (p, '\n'); - ptrdiff_t len = sep - p; + idx_t len = sep - p; bool backref = possible_backrefs_in_pattern (p, len, bs_safe); if (backref && prev < p) { - ptrdiff_t prevlen = p - prev; - while (bufalloc < buflen + prevlen) - buf = x2realloc (buf, &bufalloc); + idx_t prevlen = p - prev; + ptrdiff_t bufshortage = buflen - bufalloc + prevlen; + if (0 < bufshortage) + buf = xpalloc (buf, &bufalloc, bufshortage, -1, 1); memcpy (buf + buflen, prev, prevlen); buflen += prevlen; } @@ -240,10 +246,11 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, /* Ensure room for at least two more patterns. The extra one is for the regex_compile that may be executed after this loop exits, and its (unused) slot is patterns[-1] until then. */ - while (palloc <= dc->pcount + 1) + ptrdiff_t shortage = dc->pcount - palloc + 2; + if (0 < shortage) { - dc->patterns = x2nrealloc (dc->patterns - 1, &palloc, - sizeof *dc->patterns); + dc->patterns = xpalloc (dc->patterns - 1, &palloc, shortage, -1, + sizeof *dc->patterns); dc->patterns++; } @@ -271,8 +278,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, { if (pattern < prev) { - ptrdiff_t prevlen = patlim - prev; - buf = xrealloc (buf, buflen + prevlen); + idx_t prevlen = patlim - prev; + buf = xirealloc (buf, buflen + prevlen); memcpy (buf + buflen, prev, prevlen); buflen += prevlen; } @@ -298,11 +305,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\("; static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)"; int bk = !(syntax_bits & RE_NO_BK_PARENS); - char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk); + idx_t bracket_bytes = sizeof word_beg_bk - 1 + sizeof word_end_bk; + char *n = ximalloc (size + bracket_bytes); strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk) : (bk ? word_beg_bk : word_beg_no_bk)); - size_t total = strlen (n); + idx_t total = strlen (n); memcpy (n + total, pattern, size); total += size; strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk) @@ -338,16 +346,16 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits, return dc; } -size_t -EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, +ptrdiff_t +EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size, char const *start_ptr) { char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start; char eol = eolbyte; regoff_t start; - size_t len, best_len; + idx_t len, best_len; struct kwsmatch kwsm; - size_t i; + idx_t i; struct dfa_comp *dc = vdc; struct dfa *superset = dfasuperset (dc->dfa); bool dfafast = dfaisfast (dc->dfa); @@ -362,7 +370,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, if (!start_ptr) { char const *next_beg, *dfa_beg = beg; - ptrdiff_t count = 0; + idx_t count = 0; bool exact_kwset_match = false; bool backref = false; @@ -584,7 +592,6 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, success: len = end - beg; success_in_len:; - size_t off = beg - buf; *match_size = len; - return off; + return beg - buf; } @@ -88,13 +88,13 @@ struct patloc { /* Line number of the pattern in PATTERN_ARRAY. Line numbers start at 0, and each pattern is terminated by '\n'. */ - ptrdiff_t lineno; + idx_t lineno; /* Input location of the pattern. The FILENAME "-" represents standard input, and "" represents the command line. FILELINE is origin-1 for files and is irrelevant for the command line. */ char const *filename; - ptrdiff_t fileline; + idx_t fileline; }; /* The array of pattern locations. The concatenation of all patterns @@ -108,13 +108,13 @@ struct patloc removed patterns not at a file start or end requires another PATLOC entry for the first non-removed pattern. */ static struct patloc *patloc; -static size_t patlocs_allocated, patlocs_used; +static idx_t patlocs_allocated, patlocs_used; /* Pointer to the array of patterns, each terminated by newline. */ static char *pattern_array; /* The number of unique patterns seen so far. */ -static size_t n_patterns; +static idx_t n_patterns; /* Hash table of patterns seen so far. */ static Hash_table *pattern_table; @@ -160,16 +160,16 @@ compare_patterns (void const *a, void const *b) sequence of patterns with no duplicates; SIZE is the total number of bytes in KEYS. If some patterns past the first DUPFREE_SIZE bytes are not duplicates, update PATLOCS accordingly. */ -static ptrdiff_t -update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size, +static idx_t +update_patterns (char *keys, idx_t dupfree_size, idx_t size, char const *filename) { char *dst = keys + dupfree_size; - ptrdiff_t fileline = 1; + idx_t fileline = 1; int prev_inserted = 0; char const *srclim = keys + size; - ptrdiff_t patsize; + idx_t patsize; for (char const *src = keys + dupfree_size; src < srclim; src += patsize) { char const *patend = rawmemchr (src, '\n'); @@ -190,8 +190,8 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size, if (!prev_inserted) { if (patlocs_used == patlocs_allocated) - patloc = x2nrealloc (patloc, &patlocs_allocated, - sizeof *patloc); + patloc = xpalloc (patloc, &patlocs_allocated, 1, -1, + sizeof *patloc); patloc[patlocs_used++] = (struct patloc) { .lineno = n_patterns, .filename = filename, @@ -213,9 +213,9 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size, Set *NEW_LINENO to the origin-1 line number of PATTERN in the file, or to an unspecified value if PATTERN came from the command line. */ char const * _GL_ATTRIBUTE_PURE -pattern_file_name (size_t lineno, size_t *new_lineno) +pattern_file_name (idx_t lineno, idx_t *new_lineno) { - ptrdiff_t i; + idx_t i; for (i = 1; i < patlocs_used; i++) if (lineno < patloc[i].lineno) break; @@ -227,7 +227,7 @@ pattern_file_name (size_t lineno, size_t *new_lineno) /* Record the starting address and length of the sole poisoned region, so that we can unpoison it later, just before each following read. */ static void const *poison_buf; -static size_t poison_len; +static idx_t poison_len; static void clear_asan_poison (void) @@ -237,7 +237,7 @@ clear_asan_poison (void) } static void -asan_poison (void const *addr, size_t size) +asan_poison (void const *addr, idx_t size) { poison_buf = addr; poison_len = size; @@ -246,7 +246,7 @@ asan_poison (void const *addr, size_t size) } #else static void clear_asan_poison (void) { } -static void asan_poison (void const volatile *addr, size_t size) { } +static void asan_poison (void const volatile *addr, idx_t size) { } #endif /* The group separator used when context is requested. */ @@ -467,7 +467,7 @@ printf_errno (char const *format, ...) } static void -fwrite_errno (void const *ptr, size_t size, size_t nmemb) +fwrite_errno (void const *ptr, idx_t size, idx_t nmemb) { if (fwrite (ptr, size, nmemb, stdout) != nmemb) stdout_errno = errno; @@ -644,9 +644,9 @@ static bool seek_failed; static bool seek_data_failed; /* Functions we'll use to search. */ -typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool); -typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *, - char const *); +typedef void *(*compile_fp_t) (char *, idx_t, reg_syntax_t, bool); +typedef ptrdiff_t (*execute_fp_t) (void *, char const *, idx_t, idx_t *, + char const *); static execute_fp_t execute; static void *compiled_pattern; @@ -694,6 +694,7 @@ clean_up_stdout (void) /* An unsigned type suitable for fast matching. */ typedef uintmax_t uword; static uword const uword_max = UINTMAX_MAX; +enum { uword_size = sizeof (uword) }; /* For when a signed size is wanted. */ struct localeinfo localeinfo; @@ -742,7 +743,7 @@ skip_easy_bytes (char const *buf) the buffer end, but that's benign. */ char const *p; uword const *s; - for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++) + for (p = buf; (uintptr_t) p % uword_size != 0; p++) if (to_uchar (*p) & unibyte_mask) return p; for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++) @@ -753,22 +754,22 @@ skip_easy_bytes (char const *buf) } /* Return true if BUF, of size SIZE, has an encoding error. - BUF must be followed by at least sizeof (uword) bytes, + BUF must be followed by at least uword_size bytes, the first of which may be modified. */ static bool -buf_has_encoding_errors (char *buf, size_t size) +buf_has_encoding_errors (char *buf, idx_t size) { if (! unibyte_mask) return false; mbstate_t mbs = { 0 }; - size_t clen; + ptrdiff_t clen; buf[size] = -1; for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen) { - clen = mbrlen (p, buf + size - p, &mbs); - if (MB_LEN_MAX < clen) + clen = imbrlen (p, buf + size - p, &mbs); + if (clen < 0) return true; } @@ -780,7 +781,7 @@ buf_has_encoding_errors (char *buf, size_t size) BUF must be followed by at least one byte, which may be arbitrarily written to or read from. */ static bool -buf_has_nulls (char *buf, size_t size) +buf_has_nulls (char *buf, idx_t size) { buf[size] = 0; return strlen (buf) != size; @@ -790,7 +791,7 @@ buf_has_nulls (char *buf, size_t size) SIZE bytes have already been read from the file with descriptor FD and status ST. */ static bool -file_must_have_nulls (size_t size, int fd, struct stat const *st) +file_must_have_nulls (idx_t size, int fd, struct stat const *st) { /* If the file has holes, it must contain a null byte somewhere. */ if (SEEK_HOLE != SEEK_SET && !seek_failed @@ -869,18 +870,18 @@ skipped_file (char const *name, bool command_line, bool is_dir) page size, unless a read yields a partial page. */ static char *buffer; /* Base of buffer. */ -static size_t bufalloc; /* Allocated buffer size, counting slop. */ +static idx_t bufalloc; /* Allocated buffer size, counting slop. */ static int bufdesc; /* File descriptor. */ static char *bufbeg; /* Beginning of user-visible stuff. */ static char *buflim; /* Limit of user-visible stuff. */ -static size_t pagesize; /* alignment of memory pages */ +static idx_t pagesize; /* alignment of memory pages */ static off_t bufoffset; /* Read offset. */ static off_t after_last_match; /* Pointer after last matching line that would have been output if we were outputting characters. */ static bool skip_nuls; /* Skip '\0' in data. */ static bool skip_empty_lines; /* Skip empty lines in data. */ -static uintmax_t totalnl; /* Total newline count before lastnl. */ +static intmax_t totalnl; /* Total newline count before lastnl. */ /* Initial buffer size, not counting slop. */ enum { INITIAL_BUFSIZE = 96 * 1024 }; @@ -894,18 +895,18 @@ enum { INITIAL_BUFSIZE = 96 * 1024 }; /* Add two numbers that count input bytes or lines, and report an error if the addition overflows. */ -static uintmax_t -add_count (uintmax_t a, uintmax_t b) +static intmax_t +add_count (intmax_t a, idx_t b) { - uintmax_t sum = a + b; - if (sum < a) + intmax_t sum; + if (!INT_ADD_OK (a, b, &sum)) die (EXIT_TROUBLE, 0, _("input is too large to count")); return sum; } /* Return true if BUF (of size SIZE) is all zeros. */ static bool -all_zeros (char const *buf, size_t size) +all_zeros (char const *buf, idx_t size) { for (char const *p = buf; p < buf + size; p++) if (*p) @@ -944,55 +945,55 @@ reset (int fd, struct stat const *st) to the beginning of the buffer contents, and 'buflim' points just after the end. Return false if there's an error. */ static bool -fillbuf (size_t save, struct stat const *st) +fillbuf (idx_t save, struct stat const *st) { - size_t fillsize; - bool cc = true; char *readbuf; - size_t readsize; - if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim) + /* After BUFLIM, we need room for at least a page of data plus a + trailing uword. */ + idx_t min_after_buflim = pagesize + uword_size; + + if (min_after_buflim <= buffer + bufalloc - buflim) readbuf = buflim; else { - size_t minsize = save + pagesize; - size_t newsize; - size_t newalloc; char *newbuf; - /* Grow newsize until it is at least as great as minsize. */ - for (newsize = bufalloc - pagesize - sizeof (uword); - newsize < minsize; - newsize *= 2) - if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize) - xalloc_die (); - - /* Try not to allocate more memory than the file size indicates, - as that might cause unnecessary memory exhaustion if the file - is large. However, do not use the original file size as a - heuristic if we've already read past the file end, as most - likely the file is growing. */ - if (usable_st_size (st)) - { - off_t to_be_read = st->st_size - bufoffset; - off_t maxsize_off = save + to_be_read; - if (0 <= to_be_read && to_be_read <= maxsize_off - && maxsize_off == (size_t) maxsize_off - && minsize <= (size_t) maxsize_off - && (size_t) maxsize_off < newsize) - newsize = maxsize_off; - } + /* For data to be searched we need room for the saved bytes, + plus at least a page of data to read. */ + idx_t minsize = save + pagesize; /* Add enough room so that the buffer is aligned and has room for byte sentinels fore and aft, and so that a uword can be read aft. */ - newalloc = newsize + pagesize + sizeof (uword); + ptrdiff_t incr_min = minsize - bufalloc + min_after_buflim; + + if (incr_min <= 0) + newbuf = buffer; + else + { + /* Try not to allocate more memory than the file size indicates, + as that might cause unnecessary memory exhaustion if the file + is large. However, do not use the original file size as a + heuristic if we've already read past the file end, as most + likely the file is growing. */ + ptrdiff_t alloc_max = -1; + if (usable_st_size (st)) + { + off_t to_be_read = st->st_size - bufoffset; + ptrdiff_t a; + if (0 <= to_be_read + && INT_ADD_OK (to_be_read, save + min_after_buflim, &a)) + alloc_max = a; + } + + newbuf = xpalloc (NULL, &bufalloc, incr_min, alloc_max, 1); + } - newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); - size_t moved = save + 1; /* Move the preceding byte sentinel too. */ + idx_t moved = save + 1; /* Move the preceding byte sentinel too. */ memmove (readbuf - moved, buflim - moved, moved); - if (newbuf != buffer) + if (0 < incr_min) { free (buffer); buffer = newbuf; @@ -1003,9 +1004,12 @@ fillbuf (size_t save, struct stat const *st) clear_asan_poison (); - readsize = buffer + bufalloc - sizeof (uword) - readbuf; + idx_t readsize = buffer + bufalloc - uword_size - readbuf; readsize -= readsize % pagesize; + idx_t fillsize; + bool cc = true; + while (true) { fillsize = safe_read (bufdesc, readbuf, readsize); @@ -1043,12 +1047,11 @@ fillbuf (size_t save, struct stat const *st) /* Initialize the following word, because skip_easy_bytes and some matchers read (but do not use) those bytes. This avoids false positive reports of these bytes being used uninitialized. */ - memset (buflim, 0, sizeof (uword)); + memset (buflim, 0, uword_size); /* Mark the part of the buffer not filled by the read or set by the above memset call as ASAN-poisoned. */ - asan_poison (buflim + sizeof (uword), - bufalloc - (buflim - buffer) - sizeof (uword)); + asan_poison (buflim + uword_size, bufalloc - (buflim - buffer) - uword_size); return cc; } @@ -1089,7 +1092,7 @@ static char *label = NULL; /* Fake filename for stdin */ /* Internal variables to keep track of byte count, context, etc. */ -static uintmax_t totalcc; /* Total character count before bufbeg. */ +static intmax_t totalcc; /* Total character count before bufbeg. */ static char const *lastnl; /* Pointer after last newline counted. */ static char *lastout; /* Pointer after last character output; NULL if no character has been output @@ -1105,7 +1108,7 @@ static bool binary; /* Use binary rather than text I/O. */ static void nlscan (char const *lim) { - size_t newlines = 0; + idx_t newlines = 0; for (char const *beg = lastnl; beg < lim; beg++) { beg = memchr (beg, eolbyte, lim - beg); @@ -1137,16 +1140,16 @@ print_sep (char sep) /* Print a line number or a byte offset. */ static void -print_offset (uintmax_t pos, const char *color) +print_offset (intmax_t pos, const char *color) { pr_sgr_start_if (color); - printf_errno ("%*"PRIuMAX, offset_width, pos); + printf_errno ("%*"PRIdMAX, offset_width, pos); pr_sgr_end_if (color); } /* Print a whole line head (filename, line, byte). The output data starts at BEG and contains LEN bytes; it is followed by at least - sizeof (uword) bytes, the first of which may be temporarily modified. + uword_size bytes, the first of which may be temporarily modified. The output data comes from what is perhaps a larger input line that goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as the separator on output. @@ -1154,7 +1157,7 @@ print_offset (uintmax_t pos, const char *color) Return true unless the line was suppressed due to an encoding error. */ static bool -print_line_head (char *beg, size_t len, char const *lim, char sep) +print_line_head (char *beg, idx_t len, char const *lim, char sep) { if (binary_files != TEXT_BINARY_FILES) { @@ -1191,7 +1194,7 @@ print_line_head (char *beg, size_t len, char const *lim, char sep) if (out_byte) { - uintmax_t pos = add_count (totalcc, beg - bufbeg); + intmax_t pos = add_count (totalcc, beg - bufbeg); print_offset (pos, byte_num_color); print_sep (sep); } @@ -1206,16 +1209,16 @@ static char * print_line_middle (char *beg, char *lim, const char *line_color, const char *match_color) { - size_t match_size; - size_t match_offset; + idx_t match_size; + ptrdiff_t match_offset; char *cur; char *mid = NULL; char *b; for (cur = beg; (cur < lim - && ((match_offset = execute (compiled_pattern, beg, lim - beg, - &match_size, cur)) != (size_t) -1)); + && 0 <= (match_offset = execute (compiled_pattern, beg, lim - beg, + &match_size, cur))); cur = b + match_size) { b = beg + match_offset; @@ -1273,8 +1276,8 @@ print_line_middle (char *beg, char *lim, static char * print_line_tail (char *beg, const char *lim, const char *line_color) { - size_t eol_size; - size_t tail_size; + idx_t eol_size; + idx_t tail_size; eol_size = (lim > beg && lim[-1] == eolbyte); eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); @@ -1462,10 +1465,10 @@ grepbuf (char *beg, char const *lim) for (char *p = beg; p < lim; p = endp) { - size_t match_size; - size_t match_offset = execute (compiled_pattern, p, lim - p, - &match_size, NULL); - if (match_offset == (size_t) -1) + idx_t match_size; + ptrdiff_t match_offset = execute (compiled_pattern, p, lim - p, + &match_size, NULL); + if (match_offset < 0) { if (!out_invert) break; @@ -1500,7 +1503,7 @@ static intmax_t grep (int fd, struct stat const *st, bool *ineof) { intmax_t nlines, i; - size_t residue, save; + idx_t residue, save; char oldc; char *beg; char *lim; @@ -1540,8 +1543,8 @@ grep (int fd, struct stat const *st, bool *ineof) if (align_tabs) { /* Width is log of maximum number. Line numbers are origin-1. */ - uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX; - num += out_line && num < UINTMAX_MAX; + intmax_t num = usable_st_size (st) ? st->st_size : INTMAX_MAX; + num += out_line && num < INTMAX_MAX; do offset_width++; while ((num /= 10) != 0); @@ -2231,15 +2234,15 @@ parse_grep_colors (void) /* Return true if PAT (of length PATLEN) contains an encoding error. */ static bool -contains_encoding_error (char const *pat, size_t patlen) +contains_encoding_error (char const *pat, idx_t patlen) { mbstate_t mbs = { 0 }; - size_t charlen; + ptrdiff_t charlen; - for (size_t i = 0; i < patlen; i += charlen) + for (idx_t i = 0; i < patlen; i += charlen) { charlen = mb_clen (pat + i, patlen - i, &mbs); - if (MB_LEN_MAX < charlen) + if (charlen < 0) return true; } return false; @@ -2279,8 +2282,8 @@ setup_ok_fold (void) Fcompile cannot handle it. MBS is the multibyte conversion state. PATLEN must be nonzero. */ -static int -fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs) +static ptrdiff_t +fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs) { unsigned char pat0 = pat[0]; @@ -2302,7 +2305,7 @@ fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs) wchar_t folded[CASE_FOLDED_BUFSIZE]; if (case_folded_counterparts (wc, folded)) return -1; - for (int i = wn; 0 < --i; ) + for (idx_t i = wn; 0 < --i; ) { unsigned char c = pat[i]; if (toupper (c) != c) @@ -2317,11 +2320,11 @@ fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs) and so can be processed by Fcompile. */ static bool -fgrep_icase_available (char const *pat, size_t patlen) +fgrep_icase_available (char const *pat, idx_t patlen) { mbstate_t mbs = {0,}; - for (size_t i = 0; i < patlen; ) + for (idx_t i = 0; i < patlen; ) { int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs); if (n < 0) @@ -2335,28 +2338,27 @@ fgrep_icase_available (char const *pat, size_t patlen) /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */ void -fgrep_to_grep_pattern (char **keys_p, size_t *len_p) +fgrep_to_grep_pattern (char **keys_p, idx_t *len_p) { - size_t len = *len_p; + idx_t len = *len_p; char *keys = *keys_p; mbstate_t mb_state = { 0 }; char *new_keys = xnmalloc (len + 1, 2); char *p = new_keys; - size_t n; - for (; len; keys += n, len -= n) + for (ptrdiff_t n; len; keys += n, len -= n) { n = mb_clen (keys, len, &mb_state); switch (n) { - case (size_t) -2: + case -2: n = len; FALLTHROUGH; default: p = mempcpy (p, keys, n); break; - case (size_t) -1: + case -1: memset (&mb_state, 0, sizeof mb_state); n = 1; FALLTHROUGH; @@ -2385,11 +2387,11 @@ fgrep_to_grep_pattern (char **keys_p, size_t *len_p) to the -F pattern "a". */ static int -try_fgrep_pattern (int matcher, char *keys, size_t *len_p) +try_fgrep_pattern (int matcher, char *keys, idx_t *len_p) { int result = matcher; - size_t len = *len_p; - char *new_keys = xmalloc (len + 1); + idx_t len = *len_p; + char *new_keys = ximalloc (len + 1); char *p = new_keys; char const *q = keys; mbstate_t mb_state = { 0 }; @@ -2434,26 +2436,14 @@ try_fgrep_pattern (int matcher, char *keys, size_t *len_p) break; } - { - size_t n; - if (match_icase) - { - int ni = fgrep_icase_charlen (q, len, &mb_state); - if (ni < 0) - goto fail; - n = ni; - } - else - { - n = mb_clen (q, len, &mb_state); - if (MB_LEN_MAX < n) - goto fail; - } - - p = mempcpy (p, q, n); - q += n; - len -= n; - } + ptrdiff_t clen = (match_icase + ? fgrep_icase_charlen (q, len, &mb_state) + : mb_clen (q, len, &mb_state)); + if (clen < 0) + goto fail; + p = mempcpy (p, q, clen); + q += clen; + len -= clen; } if (*len_p != p - new_keys) @@ -2473,7 +2463,7 @@ int main (int argc, char **argv) { char *keys = NULL; - size_t keycc = 0, keyalloc = 0; + idx_t keycc = 0, keyalloc = 0; int matcher = -1; int opt; int prev_optind, last_recursive; @@ -2612,12 +2602,10 @@ main (int argc, char **argv) case 'e': { - ptrdiff_t cc = strlen (optarg); - if (keyalloc < keycc + cc + 1) - { - keyalloc = keycc + cc + 1; - pattern_array = keys = x2realloc (keys, &keyalloc); - } + idx_t cc = strlen (optarg); + ptrdiff_t shortage = keycc - keyalloc + cc + 1; + if (0 < shortage) + pattern_array = keys = xpalloc (keys, &keyalloc, shortage, -1, 1); char *keyend = mempcpy (keys + keycc, optarg, cc); *keyend = '\n'; keycc = update_patterns (keys, keycc, keycc + cc + 1, ""); @@ -2638,11 +2626,13 @@ main (int argc, char **argv) if (!fp) die (EXIT_TROUBLE, errno, "%s", optarg); } - ptrdiff_t newkeycc = keycc, cc; + idx_t newkeycc = keycc, cc; for (;; newkeycc += cc) { - if (keyalloc <= newkeycc + 1) - pattern_array = keys = x2realloc (keys, &keyalloc); + ptrdiff_t shortage = newkeycc - keyalloc + 2; + if (0 < shortage) + pattern_array = keys = xpalloc (keys, &keyalloc, + shortage, -1, 1); cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp); if (cc == 0) break; @@ -2861,7 +2851,7 @@ main (int argc, char **argv) { /* Make a copy so that it can be reallocated or freed later. */ pattern_array = keys = xstrdup (argv[optind++]); - ptrdiff_t patlen = strlen (keys); + idx_t patlen = strlen (keys); keys[patlen] = '\n'; keycc = update_patterns (keys, 0, patlen + 1, ""); } @@ -2968,7 +2958,7 @@ main (int argc, char **argv) only_matching | color_option); /* We need one byte prior and one after. */ char eolbytes[3] = { 0, eolbyte, 0 }; - size_t match_size; + idx_t match_size; skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1, &match_size, NULL) == 0) == out_invert); @@ -2987,11 +2977,11 @@ main (int argc, char **argv) #else long psize = getpagesize (); #endif - if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2)) + if (! (0 < psize && psize <= (IDX_MAX - uword_size) / 2)) abort (); pagesize = psize; - bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword); - buffer = xmalloc (bufalloc); + bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + uword_size; + buffer = ximalloc (bufalloc); if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) devices = READ_DEVICES; @@ -21,6 +21,7 @@ #define GREP_GREP_H 1 #include <stdbool.h> +#include <idx.h> /* The following flags are exported from grep for the matchers to look at. */ @@ -29,6 +30,6 @@ extern bool match_words; /* -w */ extern bool match_lines; /* -x */ extern char eolbyte; /* -z */ -extern char const *pattern_file_name (size_t, size_t *); +extern char const *pattern_file_name (idx_t, idx_t *); #endif diff --git a/src/kwsearch.c b/src/kwsearch.c index ea18ce18..171db9ac 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -32,11 +32,11 @@ struct kwsearch 'kwswords (kwset)' when some extra one-character words have been appended, one for each troublesome character that will require a DFA search. */ - ptrdiff_t words; + idx_t words; /* The user's pattern and its size in bytes. */ char *pattern; - size_t size; + idx_t size; /* The user's pattern compiled as a regular expression, or null if it has not been compiled. */ @@ -47,11 +47,11 @@ struct kwsearch followed by '\n'. Return a description of the compiled pattern. */ void * -Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) +Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) { kwset_t kwset; char *buf = NULL; - size_t bufalloc = 0; + idx_t bufalloc = 0; kwset = kwsinit (true); @@ -59,7 +59,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) do { char const *sep = rawmemchr (p, '\n'); - ptrdiff_t len = sep - p; + idx_t len = sep - p; if (match_lines) { @@ -70,8 +70,8 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) if (bufalloc < len + 2) { free (buf); - bufalloc = len + 2; - buf = x2realloc (NULL, &bufalloc); + bufalloc = len; + buf = xpalloc (NULL, &bufalloc, 2, -1, 1); buf[0] = eolbyte; } memcpy (buf + 1, p, len); @@ -88,7 +88,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) free (buf); - ptrdiff_t words = kwswords (kwset); + idx_t words = kwswords (kwset); kwsprep (kwset); struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch); @@ -102,14 +102,14 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) /* Use the compiled pattern VCP to search the buffer BUF of size SIZE. If found, return the offset of the first match and store its - size into *MATCH_SIZE. If not found, return SIZE_MAX. + size into *MATCH_SIZE. If not found, return -1. If START_PTR is nonnull, start searching there. */ -size_t -Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +ptrdiff_t +Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size, char const *start_ptr) { char const *beg, *end, *mb_start; - ptrdiff_t len; + idx_t len; char eol = eolbyte; struct kwsearch *kwsearch = vcp; kwset_t kwset = kwsearch->kwset; @@ -126,7 +126,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, break; len = kwsmatch.size - 2 * match_lines; - size_t mbclen = 0; + idx_t mbclen = 0; if (mb_check && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0) { @@ -198,8 +198,8 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, else end = buf + size; - if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL) - != (size_t) -1) + if (0 <= EGexecute (kwsearch->re, beg, end - beg, + match_size, NULL)) goto success_match_words; beg = end - 1; break; diff --git a/src/kwset.c b/src/kwset.c index e5ac1a98..329b802f 100644 --- a/src/kwset.c +++ b/src/kwset.c @@ -59,31 +59,31 @@ struct tree struct trie { /* If an accepting node, this is either 2*W + 1 where W is the word - index, or is SIZE_MAX if Aho-Corasick is in use and FAIL + index, or is -1 if Aho-Corasick is in use and FAIL specifies where to look for more info. If not an accepting node, this is zero. */ - size_t accepting; + ptrdiff_t accepting; struct tree *links; /* Tree of edges leaving this node. */ struct trie *parent; /* Parent of this node. */ struct trie *next; /* List of all trie nodes in level order. */ struct trie *fail; /* Aho-Corasick failure function. */ - ptrdiff_t depth; /* Depth of this node from the root. */ - ptrdiff_t shift; /* Shift function for search failures. */ - ptrdiff_t maxshift; /* Max shift of self and descendants. */ + idx_t depth; /* Depth of this node from the root. */ + idx_t shift; /* Shift function for search failures. */ + idx_t maxshift; /* Max shift of self and descendants. */ }; /* Structure returned opaquely to the caller, containing everything. */ struct kwset { struct obstack obstack; /* Obstack for node allocation. */ - ptrdiff_t words; /* Number of words in the trie. */ + idx_t words; /* Number of words in the trie. */ struct trie *trie; /* The trie itself. */ - ptrdiff_t mind; /* Minimum depth of an accepting node. */ + idx_t mind; /* Minimum depth of an accepting node. */ unsigned char delta[NCHAR]; /* Delta table for rapid search. */ struct trie *next[NCHAR]; /* Table of children of the root. */ char *target; /* Target string if there's only one. */ - ptrdiff_t *shift; /* Used in Boyer-Moore search for one + idx_t *shift; /* Used in Boyer-Moore search for one string. */ char const *trans; /* Character translation table. */ @@ -108,8 +108,7 @@ struct kwset char gc2; /* kwsexec implementation. */ - ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t, - struct kwsmatch *, bool); + ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool); }; /* Use TRANS to transliterate C. A null TRANS does no transliteration. */ @@ -119,9 +118,9 @@ tr (char const *trans, char c) return trans ? trans[U(c)] : c; } -static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t, +static ptrdiff_t acexec (kwset_t, char const *, idx_t, struct kwsmatch *, bool); -static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t, +static ptrdiff_t bmexec (kwset_t, char const *, idx_t, struct kwsmatch *, bool); /* Return a newly allocated keyword set. A nonnull TRANS specifies a @@ -142,7 +141,7 @@ kwsalloc (char const *trans) kwset->trie->fail = NULL; kwset->trie->depth = 0; kwset->trie->shift = 0; - kwset->mind = PTRDIFF_MAX; + kwset->mind = IDX_MAX; kwset->target = NULL; kwset->trans = trans; kwset->kwsexec = acexec; @@ -156,7 +155,7 @@ enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 }; /* Add the given string to the contents of the keyword set. */ void -kwsincr (kwset_t kwset, char const *text, ptrdiff_t len) +kwsincr (kwset_t kwset, char const *text, idx_t len) { assume (0 <= len); struct trie *trie = kwset->trie; @@ -181,7 +180,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len) enum { L, R } dirs[DEPTH_SIZE]; links[0] = (struct tree *) &trie->links; dirs[0] = L; - ptrdiff_t depth = 1; + idx_t depth = 1; while (cur && label != cur->label) { @@ -292,10 +291,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len) /* Mark the node finally reached as accepting, encoding the index number of this word in the keyword set so far. */ if (!trie->accepting) - { - size_t words = kwset->words; - trie->accepting = 2 * words + 1; - } + trie->accepting = 2 * kwset->words + 1; ++kwset->words; /* Keep track of the longest and shortest string of the keyword set. */ @@ -303,7 +299,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len) kwset->mind = trie->depth; } -ptrdiff_t +idx_t kwswords (kwset_t kwset) { return kwset->words; @@ -350,7 +346,7 @@ treefails (struct tree const *tree, struct trie const *fail, { tree->trie->fail = cur->trie; if (!reverse && cur->trie->accepting && !tree->trie->accepting) - tree->trie->accepting = SIZE_MAX; + tree->trie->accepting = -1; return; } fail = fail->fail; @@ -362,7 +358,7 @@ treefails (struct tree const *tree, struct trie const *fail, /* Set delta entries for the links of the given tree such that the preexisting delta value is larger than the current depth. */ static void -treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[]) +treedelta (struct tree const *tree, idx_t depth, unsigned char delta[]) { if (!tree) return; @@ -407,7 +403,6 @@ void kwsprep (kwset_t kwset) { char const *trans = kwset->trans; - ptrdiff_t i; unsigned char deltabuf[NCHAR]; unsigned char *delta = trans ? deltabuf : kwset->delta; struct trie *curr, *last; @@ -425,7 +420,8 @@ kwsprep (kwset_t kwset) /* Looking for just one string. Extract it from the trie. */ kwset->target = obstack_alloc (&kwset->obstack, kwset->mind); - for (i = 0, curr = kwset->trie; i < kwset->mind; ++i) + curr = kwset->trie; + for (idx_t i = 0; i < kwset->mind; i++) { kwset->target[i] = curr->links->label; curr = curr->next; @@ -504,7 +500,7 @@ kwsprep (kwset_t kwset) treenext (kwset->trie->links, next); int gc1 = -2; int gc1help = -1; - for (i = 0; i < NCHAR; i++) + for (int i = 0; i < NCHAR; i++) { int ti = i; if (trans) @@ -534,9 +530,10 @@ kwsprep (kwset_t kwset) { /* Looking for just one string. Extract it from the trie. */ kwset->target = obstack_alloc (&kwset->obstack, kwset->mind); - for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i) + curr = kwset->trie; + for (idx_t i = kwset->mind; 0 < i; i--) { - kwset->target[i] = curr->links->label; + kwset->target[i - 1] = curr->links->label; curr = curr->next; } @@ -547,7 +544,8 @@ kwsprep (kwset_t kwset) kwset->shift = obstack_alloc (&kwset->obstack, sizeof *kwset->shift * (kwset->mind - 1)); - for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i) + curr = kwset->trie->next; + for (idx_t i = 0; i < kwset->mind - 1; i++) { kwset->shift[i] = curr->shift; curr = curr->next; @@ -560,7 +558,7 @@ kwsprep (kwset_t kwset) /* Fix things up for any translation table. */ if (trans) - for (i = 0; i < NCHAR; ++i) + for (int i = 0; i < NCHAR; ++i) kwset->delta[i] = delta[U(trans[i])]; } @@ -574,16 +572,16 @@ kwsprep (kwset_t kwset) when failing. KWSET->shift says how much to shift. */ static inline bool bm_delta2_search (char const **tpp, char const *ep, char const *sp, - ptrdiff_t len, + idx_t len, char const *trans, char gc1, char gc2, unsigned char const *d1, kwset_t kwset) { char const *tp = *tpp; - ptrdiff_t d = len, skip = 0; + idx_t d = len, skip = 0; while (true) { - ptrdiff_t i = 2; + idx_t i = 2; if (tr (trans, tp[-2]) == gc2) { while (++i <= d) @@ -622,7 +620,7 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, that matches the terminal byte specified by KWSET, or NULL if there is no match. KWSET->gc1 should be nonnegative. */ static char const * -memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset) +memchr_kwset (char const *s, idx_t n, kwset_t kwset) { char const *slim = s + n; if (kwset->gc1help < 0) @@ -634,7 +632,7 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset) else { int small_heuristic = 2; - size_t small_bytes = small_heuristic * sizeof (unsigned long int); + idx_t small_bytes = small_heuristic * sizeof (unsigned long int); while (s < slim) { if (kwset->next[U(*s)]) @@ -649,13 +647,13 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset) /* Fast Boyer-Moore search (inlinable version). */ static inline ptrdiff_t _GL_ATTRIBUTE_PURE -bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size) +bmexec_trans (kwset_t kwset, char const *text, idx_t size) { assume (0 <= size); unsigned char const *d1; char const *ep, *sp, *tp; int d; - ptrdiff_t len = kwset->mind; + idx_t len = kwset->mind; char const *trans = kwset->trans; if (len == 0) @@ -675,8 +673,8 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size) char gc2 = kwset->gc2; /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ - ptrdiff_t len12; - if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size) + idx_t len12; + if (INT_MULTIPLY_OK (len, 12, &len12) && len12 < size) /* 11 is not a bug, the initial offset happens only once. */ for (ep = text + size - 11 * len; tp <= ep; ) { @@ -735,7 +733,7 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size) /* Fast Boyer-Moore search. */ static ptrdiff_t -bmexec (kwset_t kwset, char const *text, ptrdiff_t size, +bmexec (kwset_t kwset, char const *text, idx_t size, struct kwsmatch *kwsmatch, bool longest) { /* Help the compiler inline in two ways, depending on whether @@ -753,7 +751,7 @@ bmexec (kwset_t kwset, char const *text, ptrdiff_t size, /* Hairy multiple string search with the Aho-Corasick algorithm. (inlinable version) */ static inline ptrdiff_t -acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, +acexec_trans (kwset_t kwset, char const *text, idx_t len, struct kwsmatch *kwsmatch, bool longest) { struct trie const *trie, *accept; @@ -831,7 +829,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, match: accept = trie; - while (accept->accepting == SIZE_MAX) + while (accept->accepting < 0) accept = accept->fail; left = tp - accept->depth; @@ -858,7 +856,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, if (trie->accepting) { accept1 = trie; - while (accept1->accepting == SIZE_MAX) + while (accept1->accepting < 0) accept1 = accept1->fail; left1 = tp - accept1->depth; if (left1 <= left) @@ -870,7 +868,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, } } - kwsmatch->index = accept->accepting / 2; + kwsmatch->index = accept->accepting >> 1; kwsmatch->offset = left - text; kwsmatch->size = accept->depth; @@ -879,7 +877,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len, /* Hairy multiple string search with Aho-Corasick algorithm. */ static ptrdiff_t -acexec (kwset_t kwset, char const *text, ptrdiff_t size, +acexec (kwset_t kwset, char const *text, idx_t size, struct kwsmatch *kwsmatch, bool longest) { assume (0 <= size); @@ -898,7 +896,7 @@ acexec (kwset_t kwset, char const *text, ptrdiff_t size, value), and length. If LONGEST, find the longest match; otherwise any match will do. */ ptrdiff_t -kwsexec (kwset_t kwset, char const *text, ptrdiff_t size, +kwsexec (kwset_t kwset, char const *text, idx_t size, struct kwsmatch *kwsmatch, bool longest) { return kwset->kwsexec (kwset, text, size, kwsmatch, longest); diff --git a/src/kwset.h b/src/kwset.h index 24e13e26..cb94cf4b 100644 --- a/src/kwset.h +++ b/src/kwset.h @@ -22,23 +22,26 @@ #include <stddef.h> #include <stdbool.h> +#include <idx.h> + struct kwsmatch { - ptrdiff_t index; /* Index number of matching keyword. */ - ptrdiff_t offset; /* Offset of match. */ - ptrdiff_t size; /* Length of match. */ + idx_t index; /* Index number of matching keyword. */ + idx_t offset; /* Offset of match. */ + idx_t size; /* Length of match. */ }; -#include "arg-nonnull.h" +#include <arg-nonnull.h> +#include <idx.h> struct kwset; typedef struct kwset *kwset_t; extern kwset_t kwsalloc (char const *); -extern void kwsincr (kwset_t, char const *, ptrdiff_t); -extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE; +extern void kwsincr (kwset_t, char const *, idx_t); +extern idx_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE; extern void kwsprep (kwset_t); -extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t, +extern ptrdiff_t kwsexec (kwset_t, char const *, idx_t, struct kwsmatch *, bool) _GL_ARG_NONNULL ((4)); extern void kwsfree (kwset_t); diff --git a/src/pcresearch.c b/src/pcresearch.c index 37f7e409..3bdaee90 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -113,7 +113,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, followed by '\n'. Return a description of the compiled pattern. */ void * -Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) +Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) { int e; char const *ep; @@ -202,8 +202,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) return pc; } -size_t -Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, +ptrdiff_t +Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size, char const *start_ptr) { int sub[NSUB]; diff --git a/src/search.h b/src/search.h index 6a5814a9..acc282c4 100644 --- a/src/search.h +++ b/src/search.h @@ -48,38 +48,55 @@ typedef signed char mb_len_map_t; /* searchutils.c */ extern void wordinit (void); extern kwset_t kwsinit (bool); -extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE; -extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE; -extern size_t wordchar_prev (char const *, char const *, char const *) +extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE; +extern idx_t wordchar_prev (char const *, char const *, char const *) _GL_ATTRIBUTE_PURE; -extern ptrdiff_t mb_goback (char const **, size_t *, char const *, - char const *); +extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const *); /* dfasearch.c */ -extern void *GEAcompile (char *, size_t, reg_syntax_t, bool); -extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *); +extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool); +extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const *); /* kwsearch.c */ -extern void *Fcompile (char *, size_t, reg_syntax_t, bool); -extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *); +extern void *Fcompile (char *, idx_t, reg_syntax_t, bool); +extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *); /* pcresearch.c */ -extern void *Pcompile (char *, size_t, reg_syntax_t, bool); -extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *); +extern void *Pcompile (char *, idx_t, reg_syntax_t, bool); +extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *); /* grep.c */ extern struct localeinfo localeinfo; -extern void fgrep_to_grep_pattern (char **, size_t *); +extern void fgrep_to_grep_pattern (char **, idx_t *); + +/* Return the number of bytes in the character at the start of S, which + is of size N. N must be positive. MBS is the conversion state. + This acts like mbrlen, except it returns -1 and -2 instead of + (size_t) -1 and (size_t) -2. */ +SEARCH_INLINE ptrdiff_t +imbrlen (char const *s, idx_t n, mbstate_t *mbs) +{ + size_t len = mbrlen (s, n, mbs); + + /* Convert result to ptrdiff_t portably, even on oddball platforms. + When optimizing, this typically uses no machine instructions. */ + if (len <= MB_LEN_MAX) + return len; + ptrdiff_t neglen = -len; + return -neglen; +} /* Return the number of bytes in the character at the start of S, which is of size N. N must be positive. MBS is the conversion state. This acts like mbrlen, except it returns 1 when mbrlen would return 0, + it returns -1 and -2 instead of (size_t) -1 and (size_t) -2, and it is typically faster because of the cache. */ -SEARCH_INLINE size_t -mb_clen (char const *s, size_t n, mbstate_t *mbs) +SEARCH_INLINE ptrdiff_t +mb_clen (char const *s, idx_t n, mbstate_t *mbs) { signed char len = localeinfo.sbclen[to_uchar (*s)]; - return len == -2 ? mbrlen (s, n, mbs) : len; + return len == -2 ? imbrlen (s, n, mbs) : len; } extern char const *input_filename (void); diff --git a/src/searchutils.c b/src/searchutils.c index 0080dd75..ebc4a115 100644 --- a/src/searchutils.c +++ b/src/searchutils.c @@ -47,7 +47,7 @@ kwsinit (bool mb_trans) if (match_icase && (MB_CUR_MAX == 1 || mb_trans)) { - trans = xmalloc (NCHAR); + trans = ximalloc (NCHAR); /* If I is a single-byte character that becomes a different single-byte character when uppercased, set trans[I] to that character. Otherwise, set trans[I] to I. */ @@ -88,7 +88,7 @@ kwsinit (bool mb_trans) Treat encoding errors as if they were single-byte characters. */ ptrdiff_t -mb_goback (char const **mb_start, size_t *mbclen, char const *cur, +mb_goback (char const **mb_start, idx_t *mbclen, char const *cur, char const *end) { const char *p = *mb_start; @@ -114,8 +114,8 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, if (long_enough) { mbstate_t mbs = { 0 }; - size_t clen = mbrlen (cur - i, end - (cur - i), &mbs); - if (clen <= MB_LEN_MAX) + ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs); + if (0 <= clen) { /* This multibyte character contains *CUR. */ p0 = cur - i; @@ -130,13 +130,13 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, /* In non-UTF-8 encodings, to find character boundaries one must in general scan forward from the start of the buffer. */ mbstate_t mbs = { 0 }; - size_t clen; + ptrdiff_t clen; do { clen = mb_clen (p, end - p, &mbs); - if (MB_LEN_MAX < clen) + if (clen < 0) { /* An invalid sequence, or a truncated multibyte character. Treat it as a single byte character. */ @@ -159,10 +159,10 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur, /* Examine the start of BUF (which goes to END) for word constituents. If COUNTALL, examine as many as possible; otherwise, examine at most one. Return the total number of bytes in the examined characters. */ -static size_t +static idx_t wordchars_count (char const *buf, char const *end, bool countall) { - size_t n = 0; + idx_t n = 0; mbstate_t mbs = { 0 }; while (n < end - buf) { @@ -188,7 +188,7 @@ wordchars_count (char const *buf, char const *end, bool countall) /* Examine the start of BUF for the longest prefix containing just word constituents. Return the total number of bytes in the prefix. The buffer ends at END. */ -size_t +idx_t wordchars_size (char const *buf, char const *end) { return wordchars_count (buf, end, true); @@ -196,7 +196,7 @@ wordchars_size (char const *buf, char const *end) /* If BUF starts with a word constituent, return the number of bytes used to represent it; otherwise, return zero. The buffer ends at END. */ -size_t +idx_t wordchar_next (char const *buf, char const *end) { return wordchars_count (buf, end, false); @@ -205,7 +205,7 @@ wordchar_next (char const *buf, char const *end) /* In the buffer BUF, return nonzero if the character whose encoding contains the byte before CUR is a word constituent. The buffer ends at END. */ -size_t +idx_t wordchar_prev (char const *buf, char const *cur, char const *end) { if (buf == cur) |