summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2021-08-24 17:19:22 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2021-08-25 12:11:27 -0700
commite3694e90b4789ccafaf022a29d9ce08ff11375c2 (patch)
treeb09bc151ec222cfe5e6757a0cd85fe05ec3125a5
parentb7d83f46d81a304e188c82877430765c29a75610 (diff)
downloadgrep-e3694e90b4789ccafaf022a29d9ce08ff11375c2.tar.gz
grep: prefer signed to unsigned integers
This improves runtime checking for integer overflow when compiling with gcc -fsanitize=undefined and the like. It also avoids the need for some integer casts, which can be error-prone. * bootstrap.conf (gnulib_modules): Add idx. * src/dfasearch.c (struct dfa_comp, kwsmusts): (possible_backrefs_in_pattern, regex_compile, GEAcompile) (EGexecute): * src/grep.c (struct patloc, patlocs_allocated, patlocs_used) (n_patterns, update_patterns, pattern_file_name, poison_len) (asan_poison, fwrite_errno, compile_fp_t, execute_fp_t) (buf_has_encoding_errors, buf_has_nulls, file_must_have_nulls) (bufalloc, pagesize, all_zeros, fillbuf, nlscan) (print_line_head, print_line_middle, print_line_tail, grepbuf) (grep, contains_encoding_error, fgrep_icase_available) (fgrep_icase_charlen, fgrep_to_grep_pattern, try_fgrep_pattern) (main): * src/kwsearch.c (struct kwsearch, Fcompile, Fexecute): * src/kwset.c (struct trie, struct kwset, kwsalloc, kwsincr) (kwswords, treefails, memchr_kwset, acexec_trans, kwsexec) (treedelta, kwsprep, bm_delta2_search, bmexec_trans, bmexec) (acexec): * src/kwset.h (struct kwsmatch): * src/pcresearch.c (Pcompile, Pexecute): * src/search.h (mb_clen): * src/searchutils.c (kwsinit, mb_goback, wordchars_count) (wordchars_size, wordchar_next, wordchar_prev): Prefer idx_t to size_t or ptrdiff_t for nonnegative sizes, and prefer ptrdiff_t to size_t for sizes plus error values. * src/grep.c (uword_size): New constant, used for signed size calculations. (totalnl, add_count, totalcc, print_offset, print_line_head, grep): Prefer intmax_t to uintmax_t for wide integer calculations. (fgrep_icase_charlen): Prefer ptrdiff_t to int for size offsets. * src/grep.h: Include idx.h. * src/search.h (imbrlen): New function, like mbrlen except with idx_t and ptrdiff_t.
-rw-r--r--bootstrap.conf1
-rw-r--r--src/dfasearch.c75
-rw-r--r--src/grep.c282
-rw-r--r--src/grep.h3
-rw-r--r--src/kwsearch.c30
-rw-r--r--src/kwset.c88
-rw-r--r--src/kwset.h17
-rw-r--r--src/pcresearch.c6
-rw-r--r--src/search.h47
-rw-r--r--src/searchutils.c22
10 files changed, 294 insertions, 277 deletions
diff --git a/bootstrap.conf b/bootstrap.conf
index 8e46000a..7e4f24c3 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -50,6 +50,7 @@ gitlog-to-changelog
gnu-web-doc-update
gnupload
hash
+idx
ignore-value
intprops
inttypes
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d6afa8d3..16758655 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -36,13 +36,13 @@ struct dfa_comp
/* Regex compiled regexps. */
struct re_pattern_buffer *patterns;
- size_t pcount;
+ idx_t pcount;
struct re_registers regs;
/* Number of compiled fixed strings known to exactly match the regexp.
If kwsexec returns < kwset_exact_matches, then we don't need to
call the regexp matcher at all. */
- ptrdiff_t kwset_exact_matches;
+ idx_t kwset_exact_matches;
bool begline;
};
@@ -80,9 +80,9 @@ kwsmusts (struct dfa_comp *dc)
The kwset matcher will return the index of the matching
string that it chooses. */
++dc->kwset_exact_matches;
- ptrdiff_t old_len = strlen (dm->must);
- ptrdiff_t new_len = old_len + dm->begline + dm->endline;
- char *must = xmalloc (new_len);
+ idx_t old_len = strlen (dm->must);
+ idx_t new_len = old_len + dm->begline + dm->endline;
+ char *must = ximalloc (new_len);
char *mp = must;
*mp = eolbyte;
mp += dm->begline;
@@ -108,7 +108,7 @@ kwsmusts (struct dfa_comp *dc)
BS_SAFE is true of encodings where a backslash cannot appear as the
last byte of a multibyte character. */
static bool _GL_ATTRIBUTE_PURE
-possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
+possible_backrefs_in_pattern (char const *keys, idx_t len, bool bs_safe)
{
/* Normally a backslash, but in an unsafe encoding this is a non-char
value so that the comparison below always fails, because if there
@@ -144,8 +144,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
}
static bool
-regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
- ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
+regex_compile (struct dfa_comp *dc, char const *p, idx_t len,
+ idx_t pcount, idx_t lineno, reg_syntax_t syntax_bits,
bool syntax_only)
{
struct re_pattern_buffer pat0;
@@ -154,7 +154,9 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
pat->allocated = 0;
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
- pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
+ verify (UCHAR_MAX < IDX_MAX);
+ idx_t uchar_max = UCHAR_MAX;
+ pat->fastmap = (syntax_only | match_icase) ? NULL : ximalloc (uchar_max + 1);
pat->translate = NULL;
@@ -168,14 +170,17 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
return true;
/* Emit a filename:lineno: prefix for patterns taken from files. */
- size_t pat_lineno;
+ idx_t pat_lineno;
char const *pat_filename
= lineno < 0 ? "" : pattern_file_name (lineno, &pat_lineno);
if (*pat_filename == '\0')
error (0, 0, "%s", err);
else
- error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err);
+ {
+ ptrdiff_t n = pat_lineno;
+ error (0, 0, "%s:%td: %s", pat_filename, n, err);
+ }
return false;
}
@@ -185,7 +190,7 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
Return a description of the compiled pattern. */
void *
-GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
+GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
bool exact)
{
char *motif;
@@ -210,29 +215,30 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
dc->patterns = xmalloc (sizeof *dc->patterns);
dc->patterns++;
dc->pcount = 0;
- size_t palloc = 1;
+ idx_t palloc = 1;
char const *prev = pattern;
/* Buffer containing back-reference-free patterns. */
char *buf = NULL;
- ptrdiff_t buflen = 0;
- size_t bufalloc = 0;
+ idx_t buflen = 0;
+ idx_t bufalloc = 0;
- ptrdiff_t lineno = 0;
+ idx_t lineno = 0;
do
{
char const *sep = rawmemchr (p, '\n');
- ptrdiff_t len = sep - p;
+ idx_t len = sep - p;
bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
if (backref && prev < p)
{
- ptrdiff_t prevlen = p - prev;
- while (bufalloc < buflen + prevlen)
- buf = x2realloc (buf, &bufalloc);
+ idx_t prevlen = p - prev;
+ ptrdiff_t bufshortage = buflen - bufalloc + prevlen;
+ if (0 < bufshortage)
+ buf = xpalloc (buf, &bufalloc, bufshortage, -1, 1);
memcpy (buf + buflen, prev, prevlen);
buflen += prevlen;
}
@@ -240,10 +246,11 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
/* Ensure room for at least two more patterns. The extra one is
for the regex_compile that may be executed after this loop
exits, and its (unused) slot is patterns[-1] until then. */
- while (palloc <= dc->pcount + 1)
+ ptrdiff_t shortage = dc->pcount - palloc + 2;
+ if (0 < shortage)
{
- dc->patterns = x2nrealloc (dc->patterns - 1, &palloc,
- sizeof *dc->patterns);
+ dc->patterns = xpalloc (dc->patterns - 1, &palloc, shortage, -1,
+ sizeof *dc->patterns);
dc->patterns++;
}
@@ -271,8 +278,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
{
if (pattern < prev)
{
- ptrdiff_t prevlen = patlim - prev;
- buf = xrealloc (buf, buflen + prevlen);
+ idx_t prevlen = patlim - prev;
+ buf = xirealloc (buf, buflen + prevlen);
memcpy (buf + buflen, prev, prevlen);
buflen += prevlen;
}
@@ -298,11 +305,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
int bk = !(syntax_bits & RE_NO_BK_PARENS);
- char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
+ idx_t bracket_bytes = sizeof word_beg_bk - 1 + sizeof word_end_bk;
+ char *n = ximalloc (size + bracket_bytes);
strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
: (bk ? word_beg_bk : word_beg_no_bk));
- size_t total = strlen (n);
+ idx_t total = strlen (n);
memcpy (n + total, pattern, size);
total += size;
strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
@@ -338,16 +346,16 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
return dc;
}
-size_t
-EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
char const *start_ptr)
{
char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
char eol = eolbyte;
regoff_t start;
- size_t len, best_len;
+ idx_t len, best_len;
struct kwsmatch kwsm;
- size_t i;
+ idx_t i;
struct dfa_comp *dc = vdc;
struct dfa *superset = dfasuperset (dc->dfa);
bool dfafast = dfaisfast (dc->dfa);
@@ -362,7 +370,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
if (!start_ptr)
{
char const *next_beg, *dfa_beg = beg;
- ptrdiff_t count = 0;
+ idx_t count = 0;
bool exact_kwset_match = false;
bool backref = false;
@@ -584,7 +592,6 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
success:
len = end - beg;
success_in_len:;
- size_t off = beg - buf;
*match_size = len;
- return off;
+ return beg - buf;
}
diff --git a/src/grep.c b/src/grep.c
index 35693752..a55194cf 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -88,13 +88,13 @@ struct patloc
{
/* Line number of the pattern in PATTERN_ARRAY. Line numbers
start at 0, and each pattern is terminated by '\n'. */
- ptrdiff_t lineno;
+ idx_t lineno;
/* Input location of the pattern. The FILENAME "-" represents
standard input, and "" represents the command line. FILELINE is
origin-1 for files and is irrelevant for the command line. */
char const *filename;
- ptrdiff_t fileline;
+ idx_t fileline;
};
/* The array of pattern locations. The concatenation of all patterns
@@ -108,13 +108,13 @@ struct patloc
removed patterns not at a file start or end requires another
PATLOC entry for the first non-removed pattern. */
static struct patloc *patloc;
-static size_t patlocs_allocated, patlocs_used;
+static idx_t patlocs_allocated, patlocs_used;
/* Pointer to the array of patterns, each terminated by newline. */
static char *pattern_array;
/* The number of unique patterns seen so far. */
-static size_t n_patterns;
+static idx_t n_patterns;
/* Hash table of patterns seen so far. */
static Hash_table *pattern_table;
@@ -160,16 +160,16 @@ compare_patterns (void const *a, void const *b)
sequence of patterns with no duplicates; SIZE is the total number
of bytes in KEYS. If some patterns past the first DUPFREE_SIZE
bytes are not duplicates, update PATLOCS accordingly. */
-static ptrdiff_t
-update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
+static idx_t
+update_patterns (char *keys, idx_t dupfree_size, idx_t size,
char const *filename)
{
char *dst = keys + dupfree_size;
- ptrdiff_t fileline = 1;
+ idx_t fileline = 1;
int prev_inserted = 0;
char const *srclim = keys + size;
- ptrdiff_t patsize;
+ idx_t patsize;
for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
{
char const *patend = rawmemchr (src, '\n');
@@ -190,8 +190,8 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
if (!prev_inserted)
{
if (patlocs_used == patlocs_allocated)
- patloc = x2nrealloc (patloc, &patlocs_allocated,
- sizeof *patloc);
+ patloc = xpalloc (patloc, &patlocs_allocated, 1, -1,
+ sizeof *patloc);
patloc[patlocs_used++]
= (struct patloc) { .lineno = n_patterns,
.filename = filename,
@@ -213,9 +213,9 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
or to an unspecified value if PATTERN came from the command line. */
char const * _GL_ATTRIBUTE_PURE
-pattern_file_name (size_t lineno, size_t *new_lineno)
+pattern_file_name (idx_t lineno, idx_t *new_lineno)
{
- ptrdiff_t i;
+ idx_t i;
for (i = 1; i < patlocs_used; i++)
if (lineno < patloc[i].lineno)
break;
@@ -227,7 +227,7 @@ pattern_file_name (size_t lineno, size_t *new_lineno)
/* Record the starting address and length of the sole poisoned region,
so that we can unpoison it later, just before each following read. */
static void const *poison_buf;
-static size_t poison_len;
+static idx_t poison_len;
static void
clear_asan_poison (void)
@@ -237,7 +237,7 @@ clear_asan_poison (void)
}
static void
-asan_poison (void const *addr, size_t size)
+asan_poison (void const *addr, idx_t size)
{
poison_buf = addr;
poison_len = size;
@@ -246,7 +246,7 @@ asan_poison (void const *addr, size_t size)
}
#else
static void clear_asan_poison (void) { }
-static void asan_poison (void const volatile *addr, size_t size) { }
+static void asan_poison (void const volatile *addr, idx_t size) { }
#endif
/* The group separator used when context is requested. */
@@ -467,7 +467,7 @@ printf_errno (char const *format, ...)
}
static void
-fwrite_errno (void const *ptr, size_t size, size_t nmemb)
+fwrite_errno (void const *ptr, idx_t size, idx_t nmemb)
{
if (fwrite (ptr, size, nmemb, stdout) != nmemb)
stdout_errno = errno;
@@ -644,9 +644,9 @@ static bool seek_failed;
static bool seek_data_failed;
/* Functions we'll use to search. */
-typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
-typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
- char const *);
+typedef void *(*compile_fp_t) (char *, idx_t, reg_syntax_t, bool);
+typedef ptrdiff_t (*execute_fp_t) (void *, char const *, idx_t, idx_t *,
+ char const *);
static execute_fp_t execute;
static void *compiled_pattern;
@@ -694,6 +694,7 @@ clean_up_stdout (void)
/* An unsigned type suitable for fast matching. */
typedef uintmax_t uword;
static uword const uword_max = UINTMAX_MAX;
+enum { uword_size = sizeof (uword) }; /* For when a signed size is wanted. */
struct localeinfo localeinfo;
@@ -742,7 +743,7 @@ skip_easy_bytes (char const *buf)
the buffer end, but that's benign. */
char const *p;
uword const *s;
- for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
+ for (p = buf; (uintptr_t) p % uword_size != 0; p++)
if (to_uchar (*p) & unibyte_mask)
return p;
for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
@@ -753,22 +754,22 @@ skip_easy_bytes (char const *buf)
}
/* Return true if BUF, of size SIZE, has an encoding error.
- BUF must be followed by at least sizeof (uword) bytes,
+ BUF must be followed by at least uword_size bytes,
the first of which may be modified. */
static bool
-buf_has_encoding_errors (char *buf, size_t size)
+buf_has_encoding_errors (char *buf, idx_t size)
{
if (! unibyte_mask)
return false;
mbstate_t mbs = { 0 };
- size_t clen;
+ ptrdiff_t clen;
buf[size] = -1;
for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
{
- clen = mbrlen (p, buf + size - p, &mbs);
- if (MB_LEN_MAX < clen)
+ clen = imbrlen (p, buf + size - p, &mbs);
+ if (clen < 0)
return true;
}
@@ -780,7 +781,7 @@ buf_has_encoding_errors (char *buf, size_t size)
BUF must be followed by at least one byte,
which may be arbitrarily written to or read from. */
static bool
-buf_has_nulls (char *buf, size_t size)
+buf_has_nulls (char *buf, idx_t size)
{
buf[size] = 0;
return strlen (buf) != size;
@@ -790,7 +791,7 @@ buf_has_nulls (char *buf, size_t size)
SIZE bytes have already been read from the file
with descriptor FD and status ST. */
static bool
-file_must_have_nulls (size_t size, int fd, struct stat const *st)
+file_must_have_nulls (idx_t size, int fd, struct stat const *st)
{
/* If the file has holes, it must contain a null byte somewhere. */
if (SEEK_HOLE != SEEK_SET && !seek_failed
@@ -869,18 +870,18 @@ skipped_file (char const *name, bool command_line, bool is_dir)
page size, unless a read yields a partial page. */
static char *buffer; /* Base of buffer. */
-static size_t bufalloc; /* Allocated buffer size, counting slop. */
+static idx_t bufalloc; /* Allocated buffer size, counting slop. */
static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
-static size_t pagesize; /* alignment of memory pages */
+static idx_t pagesize; /* alignment of memory pages */
static off_t bufoffset; /* Read offset. */
static off_t after_last_match; /* Pointer after last matching line that
would have been output if we were
outputting characters. */
static bool skip_nuls; /* Skip '\0' in data. */
static bool skip_empty_lines; /* Skip empty lines in data. */
-static uintmax_t totalnl; /* Total newline count before lastnl. */
+static intmax_t totalnl; /* Total newline count before lastnl. */
/* Initial buffer size, not counting slop. */
enum { INITIAL_BUFSIZE = 96 * 1024 };
@@ -894,18 +895,18 @@ enum { INITIAL_BUFSIZE = 96 * 1024 };
/* Add two numbers that count input bytes or lines, and report an
error if the addition overflows. */
-static uintmax_t
-add_count (uintmax_t a, uintmax_t b)
+static intmax_t
+add_count (intmax_t a, idx_t b)
{
- uintmax_t sum = a + b;
- if (sum < a)
+ intmax_t sum;
+ if (!INT_ADD_OK (a, b, &sum))
die (EXIT_TROUBLE, 0, _("input is too large to count"));
return sum;
}
/* Return true if BUF (of size SIZE) is all zeros. */
static bool
-all_zeros (char const *buf, size_t size)
+all_zeros (char const *buf, idx_t size)
{
for (char const *p = buf; p < buf + size; p++)
if (*p)
@@ -944,55 +945,55 @@ reset (int fd, struct stat const *st)
to the beginning of the buffer contents, and 'buflim'
points just after the end. Return false if there's an error. */
static bool
-fillbuf (size_t save, struct stat const *st)
+fillbuf (idx_t save, struct stat const *st)
{
- size_t fillsize;
- bool cc = true;
char *readbuf;
- size_t readsize;
- if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
+ /* After BUFLIM, we need room for at least a page of data plus a
+ trailing uword. */
+ idx_t min_after_buflim = pagesize + uword_size;
+
+ if (min_after_buflim <= buffer + bufalloc - buflim)
readbuf = buflim;
else
{
- size_t minsize = save + pagesize;
- size_t newsize;
- size_t newalloc;
char *newbuf;
- /* Grow newsize until it is at least as great as minsize. */
- for (newsize = bufalloc - pagesize - sizeof (uword);
- newsize < minsize;
- newsize *= 2)
- if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
- xalloc_die ();
-
- /* Try not to allocate more memory than the file size indicates,
- as that might cause unnecessary memory exhaustion if the file
- is large. However, do not use the original file size as a
- heuristic if we've already read past the file end, as most
- likely the file is growing. */
- if (usable_st_size (st))
- {
- off_t to_be_read = st->st_size - bufoffset;
- off_t maxsize_off = save + to_be_read;
- if (0 <= to_be_read && to_be_read <= maxsize_off
- && maxsize_off == (size_t) maxsize_off
- && minsize <= (size_t) maxsize_off
- && (size_t) maxsize_off < newsize)
- newsize = maxsize_off;
- }
+ /* For data to be searched we need room for the saved bytes,
+ plus at least a page of data to read. */
+ idx_t minsize = save + pagesize;
/* Add enough room so that the buffer is aligned and has room
for byte sentinels fore and aft, and so that a uword can
be read aft. */
- newalloc = newsize + pagesize + sizeof (uword);
+ ptrdiff_t incr_min = minsize - bufalloc + min_after_buflim;
+
+ if (incr_min <= 0)
+ newbuf = buffer;
+ else
+ {
+ /* Try not to allocate more memory than the file size indicates,
+ as that might cause unnecessary memory exhaustion if the file
+ is large. However, do not use the original file size as a
+ heuristic if we've already read past the file end, as most
+ likely the file is growing. */
+ ptrdiff_t alloc_max = -1;
+ if (usable_st_size (st))
+ {
+ off_t to_be_read = st->st_size - bufoffset;
+ ptrdiff_t a;
+ if (0 <= to_be_read
+ && INT_ADD_OK (to_be_read, save + min_after_buflim, &a))
+ alloc_max = a;
+ }
+
+ newbuf = xpalloc (NULL, &bufalloc, incr_min, alloc_max, 1);
+ }
- newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
- size_t moved = save + 1; /* Move the preceding byte sentinel too. */
+ idx_t moved = save + 1; /* Move the preceding byte sentinel too. */
memmove (readbuf - moved, buflim - moved, moved);
- if (newbuf != buffer)
+ if (0 < incr_min)
{
free (buffer);
buffer = newbuf;
@@ -1003,9 +1004,12 @@ fillbuf (size_t save, struct stat const *st)
clear_asan_poison ();
- readsize = buffer + bufalloc - sizeof (uword) - readbuf;
+ idx_t readsize = buffer + bufalloc - uword_size - readbuf;
readsize -= readsize % pagesize;
+ idx_t fillsize;
+ bool cc = true;
+
while (true)
{
fillsize = safe_read (bufdesc, readbuf, readsize);
@@ -1043,12 +1047,11 @@ fillbuf (size_t save, struct stat const *st)
/* Initialize the following word, because skip_easy_bytes and some
matchers read (but do not use) those bytes. This avoids false
positive reports of these bytes being used uninitialized. */
- memset (buflim, 0, sizeof (uword));
+ memset (buflim, 0, uword_size);
/* Mark the part of the buffer not filled by the read or set by
the above memset call as ASAN-poisoned. */
- asan_poison (buflim + sizeof (uword),
- bufalloc - (buflim - buffer) - sizeof (uword));
+ asan_poison (buflim + uword_size, bufalloc - (buflim - buffer) - uword_size);
return cc;
}
@@ -1089,7 +1092,7 @@ static char *label = NULL; /* Fake filename for stdin */
/* Internal variables to keep track of byte count, context, etc. */
-static uintmax_t totalcc; /* Total character count before bufbeg. */
+static intmax_t totalcc; /* Total character count before bufbeg. */
static char const *lastnl; /* Pointer after last newline counted. */
static char *lastout; /* Pointer after last character output;
NULL if no character has been output
@@ -1105,7 +1108,7 @@ static bool binary; /* Use binary rather than text I/O. */
static void
nlscan (char const *lim)
{
- size_t newlines = 0;
+ idx_t newlines = 0;
for (char const *beg = lastnl; beg < lim; beg++)
{
beg = memchr (beg, eolbyte, lim - beg);
@@ -1137,16 +1140,16 @@ print_sep (char sep)
/* Print a line number or a byte offset. */
static void
-print_offset (uintmax_t pos, const char *color)
+print_offset (intmax_t pos, const char *color)
{
pr_sgr_start_if (color);
- printf_errno ("%*"PRIuMAX, offset_width, pos);
+ printf_errno ("%*"PRIdMAX, offset_width, pos);
pr_sgr_end_if (color);
}
/* Print a whole line head (filename, line, byte). The output data
starts at BEG and contains LEN bytes; it is followed by at least
- sizeof (uword) bytes, the first of which may be temporarily modified.
+ uword_size bytes, the first of which may be temporarily modified.
The output data comes from what is perhaps a larger input line that
goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
the separator on output.
@@ -1154,7 +1157,7 @@ print_offset (uintmax_t pos, const char *color)
Return true unless the line was suppressed due to an encoding error. */
static bool
-print_line_head (char *beg, size_t len, char const *lim, char sep)
+print_line_head (char *beg, idx_t len, char const *lim, char sep)
{
if (binary_files != TEXT_BINARY_FILES)
{
@@ -1191,7 +1194,7 @@ print_line_head (char *beg, size_t len, char const *lim, char sep)
if (out_byte)
{
- uintmax_t pos = add_count (totalcc, beg - bufbeg);
+ intmax_t pos = add_count (totalcc, beg - bufbeg);
print_offset (pos, byte_num_color);
print_sep (sep);
}
@@ -1206,16 +1209,16 @@ static char *
print_line_middle (char *beg, char *lim,
const char *line_color, const char *match_color)
{
- size_t match_size;
- size_t match_offset;
+ idx_t match_size;
+ ptrdiff_t match_offset;
char *cur;
char *mid = NULL;
char *b;
for (cur = beg;
(cur < lim
- && ((match_offset = execute (compiled_pattern, beg, lim - beg,
- &match_size, cur)) != (size_t) -1));
+ && 0 <= (match_offset = execute (compiled_pattern, beg, lim - beg,
+ &match_size, cur)));
cur = b + match_size)
{
b = beg + match_offset;
@@ -1273,8 +1276,8 @@ print_line_middle (char *beg, char *lim,
static char *
print_line_tail (char *beg, const char *lim, const char *line_color)
{
- size_t eol_size;
- size_t tail_size;
+ idx_t eol_size;
+ idx_t tail_size;
eol_size = (lim > beg && lim[-1] == eolbyte);
eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
@@ -1462,10 +1465,10 @@ grepbuf (char *beg, char const *lim)
for (char *p = beg; p < lim; p = endp)
{
- size_t match_size;
- size_t match_offset = execute (compiled_pattern, p, lim - p,
- &match_size, NULL);
- if (match_offset == (size_t) -1)
+ idx_t match_size;
+ ptrdiff_t match_offset = execute (compiled_pattern, p, lim - p,
+ &match_size, NULL);
+ if (match_offset < 0)
{
if (!out_invert)
break;
@@ -1500,7 +1503,7 @@ static intmax_t
grep (int fd, struct stat const *st, bool *ineof)
{
intmax_t nlines, i;
- size_t residue, save;
+ idx_t residue, save;
char oldc;
char *beg;
char *lim;
@@ -1540,8 +1543,8 @@ grep (int fd, struct stat const *st, bool *ineof)
if (align_tabs)
{
/* Width is log of maximum number. Line numbers are origin-1. */
- uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
- num += out_line && num < UINTMAX_MAX;
+ intmax_t num = usable_st_size (st) ? st->st_size : INTMAX_MAX;
+ num += out_line && num < INTMAX_MAX;
do
offset_width++;
while ((num /= 10) != 0);
@@ -2231,15 +2234,15 @@ parse_grep_colors (void)
/* Return true if PAT (of length PATLEN) contains an encoding error. */
static bool
-contains_encoding_error (char const *pat, size_t patlen)
+contains_encoding_error (char const *pat, idx_t patlen)
{
mbstate_t mbs = { 0 };
- size_t charlen;
+ ptrdiff_t charlen;
- for (size_t i = 0; i < patlen; i += charlen)
+ for (idx_t i = 0; i < patlen; i += charlen)
{
charlen = mb_clen (pat + i, patlen - i, &mbs);
- if (MB_LEN_MAX < charlen)
+ if (charlen < 0)
return true;
}
return false;
@@ -2279,8 +2282,8 @@ setup_ok_fold (void)
Fcompile cannot handle it. MBS is the multibyte conversion state.
PATLEN must be nonzero. */
-static int
-fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
+static ptrdiff_t
+fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
{
unsigned char pat0 = pat[0];
@@ -2302,7 +2305,7 @@ fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
wchar_t folded[CASE_FOLDED_BUFSIZE];
if (case_folded_counterparts (wc, folded))
return -1;
- for (int i = wn; 0 < --i; )
+ for (idx_t i = wn; 0 < --i; )
{
unsigned char c = pat[i];
if (toupper (c) != c)
@@ -2317,11 +2320,11 @@ fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
and so can be processed by Fcompile. */
static bool
-fgrep_icase_available (char const *pat, size_t patlen)
+fgrep_icase_available (char const *pat, idx_t patlen)
{
mbstate_t mbs = {0,};
- for (size_t i = 0; i < patlen; )
+ for (idx_t i = 0; i < patlen; )
{
int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
if (n < 0)
@@ -2335,28 +2338,27 @@ fgrep_icase_available (char const *pat, size_t patlen)
/* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
void
-fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
+fgrep_to_grep_pattern (char **keys_p, idx_t *len_p)
{
- size_t len = *len_p;
+ idx_t len = *len_p;
char *keys = *keys_p;
mbstate_t mb_state = { 0 };
char *new_keys = xnmalloc (len + 1, 2);
char *p = new_keys;
- size_t n;
- for (; len; keys += n, len -= n)
+ for (ptrdiff_t n; len; keys += n, len -= n)
{
n = mb_clen (keys, len, &mb_state);
switch (n)
{
- case (size_t) -2:
+ case -2:
n = len;
FALLTHROUGH;
default:
p = mempcpy (p, keys, n);
break;
- case (size_t) -1:
+ case -1:
memset (&mb_state, 0, sizeof mb_state);
n = 1;
FALLTHROUGH;
@@ -2385,11 +2387,11 @@ fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
to the -F pattern "a". */
static int
-try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
+try_fgrep_pattern (int matcher, char *keys, idx_t *len_p)
{
int result = matcher;
- size_t len = *len_p;
- char *new_keys = xmalloc (len + 1);
+ idx_t len = *len_p;
+ char *new_keys = ximalloc (len + 1);
char *p = new_keys;
char const *q = keys;
mbstate_t mb_state = { 0 };
@@ -2434,26 +2436,14 @@ try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
break;
}
- {
- size_t n;
- if (match_icase)
- {
- int ni = fgrep_icase_charlen (q, len, &mb_state);
- if (ni < 0)
- goto fail;
- n = ni;
- }
- else
- {
- n = mb_clen (q, len, &mb_state);
- if (MB_LEN_MAX < n)
- goto fail;
- }
-
- p = mempcpy (p, q, n);
- q += n;
- len -= n;
- }
+ ptrdiff_t clen = (match_icase
+ ? fgrep_icase_charlen (q, len, &mb_state)
+ : mb_clen (q, len, &mb_state));
+ if (clen < 0)
+ goto fail;
+ p = mempcpy (p, q, clen);
+ q += clen;
+ len -= clen;
}
if (*len_p != p - new_keys)
@@ -2473,7 +2463,7 @@ int
main (int argc, char **argv)
{
char *keys = NULL;
- size_t keycc = 0, keyalloc = 0;
+ idx_t keycc = 0, keyalloc = 0;
int matcher = -1;
int opt;
int prev_optind, last_recursive;
@@ -2612,12 +2602,10 @@ main (int argc, char **argv)
case 'e':
{
- ptrdiff_t cc = strlen (optarg);
- if (keyalloc < keycc + cc + 1)
- {
- keyalloc = keycc + cc + 1;
- pattern_array = keys = x2realloc (keys, &keyalloc);
- }
+ idx_t cc = strlen (optarg);
+ ptrdiff_t shortage = keycc - keyalloc + cc + 1;
+ if (0 < shortage)
+ pattern_array = keys = xpalloc (keys, &keyalloc, shortage, -1, 1);
char *keyend = mempcpy (keys + keycc, optarg, cc);
*keyend = '\n';
keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
@@ -2638,11 +2626,13 @@ main (int argc, char **argv)
if (!fp)
die (EXIT_TROUBLE, errno, "%s", optarg);
}
- ptrdiff_t newkeycc = keycc, cc;
+ idx_t newkeycc = keycc, cc;
for (;; newkeycc += cc)
{
- if (keyalloc <= newkeycc + 1)
- pattern_array = keys = x2realloc (keys, &keyalloc);
+ ptrdiff_t shortage = newkeycc - keyalloc + 2;
+ if (0 < shortage)
+ pattern_array = keys = xpalloc (keys, &keyalloc,
+ shortage, -1, 1);
cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
if (cc == 0)
break;
@@ -2861,7 +2851,7 @@ main (int argc, char **argv)
{
/* Make a copy so that it can be reallocated or freed later. */
pattern_array = keys = xstrdup (argv[optind++]);
- ptrdiff_t patlen = strlen (keys);
+ idx_t patlen = strlen (keys);
keys[patlen] = '\n';
keycc = update_patterns (keys, 0, patlen + 1, "");
}
@@ -2968,7 +2958,7 @@ main (int argc, char **argv)
only_matching | color_option);
/* We need one byte prior and one after. */
char eolbytes[3] = { 0, eolbyte, 0 };
- size_t match_size;
+ idx_t match_size;
skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
&match_size, NULL) == 0)
== out_invert);
@@ -2987,11 +2977,11 @@ main (int argc, char **argv)
#else
long psize = getpagesize ();
#endif
- if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
+ if (! (0 < psize && psize <= (IDX_MAX - uword_size) / 2))
abort ();
pagesize = psize;
- bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
- buffer = xmalloc (bufalloc);
+ bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + uword_size;
+ buffer = ximalloc (bufalloc);
if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
devices = READ_DEVICES;
diff --git a/src/grep.h b/src/grep.h
index a3cd73ee..04c15dd5 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -21,6 +21,7 @@
#define GREP_GREP_H 1
#include <stdbool.h>
+#include <idx.h>
/* The following flags are exported from grep for the matchers
to look at. */
@@ -29,6 +30,6 @@ extern bool match_words; /* -w */
extern bool match_lines; /* -x */
extern char eolbyte; /* -z */
-extern char const *pattern_file_name (size_t, size_t *);
+extern char const *pattern_file_name (idx_t, idx_t *);
#endif
diff --git a/src/kwsearch.c b/src/kwsearch.c
index ea18ce18..171db9ac 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -32,11 +32,11 @@ struct kwsearch
'kwswords (kwset)' when some extra one-character words have been
appended, one for each troublesome character that will require a
DFA search. */
- ptrdiff_t words;
+ idx_t words;
/* The user's pattern and its size in bytes. */
char *pattern;
- size_t size;
+ idx_t size;
/* The user's pattern compiled as a regular expression,
or null if it has not been compiled. */
@@ -47,11 +47,11 @@ struct kwsearch
followed by '\n'. Return a description of the compiled pattern. */
void *
-Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
{
kwset_t kwset;
char *buf = NULL;
- size_t bufalloc = 0;
+ idx_t bufalloc = 0;
kwset = kwsinit (true);
@@ -59,7 +59,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
do
{
char const *sep = rawmemchr (p, '\n');
- ptrdiff_t len = sep - p;
+ idx_t len = sep - p;
if (match_lines)
{
@@ -70,8 +70,8 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
if (bufalloc < len + 2)
{
free (buf);
- bufalloc = len + 2;
- buf = x2realloc (NULL, &bufalloc);
+ bufalloc = len;
+ buf = xpalloc (NULL, &bufalloc, 2, -1, 1);
buf[0] = eolbyte;
}
memcpy (buf + 1, p, len);
@@ -88,7 +88,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
free (buf);
- ptrdiff_t words = kwswords (kwset);
+ idx_t words = kwswords (kwset);
kwsprep (kwset);
struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch);
@@ -102,14 +102,14 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
/* Use the compiled pattern VCP to search the buffer BUF of size SIZE.
If found, return the offset of the first match and store its
- size into *MATCH_SIZE. If not found, return SIZE_MAX.
+ size into *MATCH_SIZE. If not found, return -1.
If START_PTR is nonnull, start searching there. */
-size_t
-Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
char const *start_ptr)
{
char const *beg, *end, *mb_start;
- ptrdiff_t len;
+ idx_t len;
char eol = eolbyte;
struct kwsearch *kwsearch = vcp;
kwset_t kwset = kwsearch->kwset;
@@ -126,7 +126,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
break;
len = kwsmatch.size - 2 * match_lines;
- size_t mbclen = 0;
+ idx_t mbclen = 0;
if (mb_check
&& mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
{
@@ -198,8 +198,8 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
else
end = buf + size;
- if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
- != (size_t) -1)
+ if (0 <= EGexecute (kwsearch->re, beg, end - beg,
+ match_size, NULL))
goto success_match_words;
beg = end - 1;
break;
diff --git a/src/kwset.c b/src/kwset.c
index e5ac1a98..329b802f 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -59,31 +59,31 @@ struct tree
struct trie
{
/* If an accepting node, this is either 2*W + 1 where W is the word
- index, or is SIZE_MAX if Aho-Corasick is in use and FAIL
+ index, or is -1 if Aho-Corasick is in use and FAIL
specifies where to look for more info. If not an accepting node,
this is zero. */
- size_t accepting;
+ ptrdiff_t accepting;
struct tree *links; /* Tree of edges leaving this node. */
struct trie *parent; /* Parent of this node. */
struct trie *next; /* List of all trie nodes in level order. */
struct trie *fail; /* Aho-Corasick failure function. */
- ptrdiff_t depth; /* Depth of this node from the root. */
- ptrdiff_t shift; /* Shift function for search failures. */
- ptrdiff_t maxshift; /* Max shift of self and descendants. */
+ idx_t depth; /* Depth of this node from the root. */
+ idx_t shift; /* Shift function for search failures. */
+ idx_t maxshift; /* Max shift of self and descendants. */
};
/* Structure returned opaquely to the caller, containing everything. */
struct kwset
{
struct obstack obstack; /* Obstack for node allocation. */
- ptrdiff_t words; /* Number of words in the trie. */
+ idx_t words; /* Number of words in the trie. */
struct trie *trie; /* The trie itself. */
- ptrdiff_t mind; /* Minimum depth of an accepting node. */
+ idx_t mind; /* Minimum depth of an accepting node. */
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
struct trie *next[NCHAR]; /* Table of children of the root. */
char *target; /* Target string if there's only one. */
- ptrdiff_t *shift; /* Used in Boyer-Moore search for one
+ idx_t *shift; /* Used in Boyer-Moore search for one
string. */
char const *trans; /* Character translation table. */
@@ -108,8 +108,7 @@ struct kwset
char gc2;
/* kwsexec implementation. */
- ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t,
- struct kwsmatch *, bool);
+ ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool);
};
/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
@@ -119,9 +118,9 @@ tr (char const *trans, char c)
return trans ? trans[U(c)] : c;
}
-static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t acexec (kwset_t, char const *, idx_t,
struct kwsmatch *, bool);
-static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t bmexec (kwset_t, char const *, idx_t,
struct kwsmatch *, bool);
/* Return a newly allocated keyword set. A nonnull TRANS specifies a
@@ -142,7 +141,7 @@ kwsalloc (char const *trans)
kwset->trie->fail = NULL;
kwset->trie->depth = 0;
kwset->trie->shift = 0;
- kwset->mind = PTRDIFF_MAX;
+ kwset->mind = IDX_MAX;
kwset->target = NULL;
kwset->trans = trans;
kwset->kwsexec = acexec;
@@ -156,7 +155,7 @@ enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
/* Add the given string to the contents of the keyword set. */
void
-kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
+kwsincr (kwset_t kwset, char const *text, idx_t len)
{
assume (0 <= len);
struct trie *trie = kwset->trie;
@@ -181,7 +180,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
enum { L, R } dirs[DEPTH_SIZE];
links[0] = (struct tree *) &trie->links;
dirs[0] = L;
- ptrdiff_t depth = 1;
+ idx_t depth = 1;
while (cur && label != cur->label)
{
@@ -292,10 +291,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
/* Mark the node finally reached as accepting, encoding the
index number of this word in the keyword set so far. */
if (!trie->accepting)
- {
- size_t words = kwset->words;
- trie->accepting = 2 * words + 1;
- }
+ trie->accepting = 2 * kwset->words + 1;
++kwset->words;
/* Keep track of the longest and shortest string of the keyword set. */
@@ -303,7 +299,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
kwset->mind = trie->depth;
}
-ptrdiff_t
+idx_t
kwswords (kwset_t kwset)
{
return kwset->words;
@@ -350,7 +346,7 @@ treefails (struct tree const *tree, struct trie const *fail,
{
tree->trie->fail = cur->trie;
if (!reverse && cur->trie->accepting && !tree->trie->accepting)
- tree->trie->accepting = SIZE_MAX;
+ tree->trie->accepting = -1;
return;
}
fail = fail->fail;
@@ -362,7 +358,7 @@ treefails (struct tree const *tree, struct trie const *fail,
/* Set delta entries for the links of the given tree such that
the preexisting delta value is larger than the current depth. */
static void
-treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[])
+treedelta (struct tree const *tree, idx_t depth, unsigned char delta[])
{
if (!tree)
return;
@@ -407,7 +403,6 @@ void
kwsprep (kwset_t kwset)
{
char const *trans = kwset->trans;
- ptrdiff_t i;
unsigned char deltabuf[NCHAR];
unsigned char *delta = trans ? deltabuf : kwset->delta;
struct trie *curr, *last;
@@ -425,7 +420,8 @@ kwsprep (kwset_t kwset)
/* Looking for just one string. Extract it from the trie. */
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
- for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
+ curr = kwset->trie;
+ for (idx_t i = 0; i < kwset->mind; i++)
{
kwset->target[i] = curr->links->label;
curr = curr->next;
@@ -504,7 +500,7 @@ kwsprep (kwset_t kwset)
treenext (kwset->trie->links, next);
int gc1 = -2;
int gc1help = -1;
- for (i = 0; i < NCHAR; i++)
+ for (int i = 0; i < NCHAR; i++)
{
int ti = i;
if (trans)
@@ -534,9 +530,10 @@ kwsprep (kwset_t kwset)
{
/* Looking for just one string. Extract it from the trie. */
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
- for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+ curr = kwset->trie;
+ for (idx_t i = kwset->mind; 0 < i; i--)
{
- kwset->target[i] = curr->links->label;
+ kwset->target[i - 1] = curr->links->label;
curr = curr->next;
}
@@ -547,7 +544,8 @@ kwsprep (kwset_t kwset)
kwset->shift
= obstack_alloc (&kwset->obstack,
sizeof *kwset->shift * (kwset->mind - 1));
- for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i)
+ curr = kwset->trie->next;
+ for (idx_t i = 0; i < kwset->mind - 1; i++)
{
kwset->shift[i] = curr->shift;
curr = curr->next;
@@ -560,7 +558,7 @@ kwsprep (kwset_t kwset)
/* Fix things up for any translation table. */
if (trans)
- for (i = 0; i < NCHAR; ++i)
+ for (int i = 0; i < NCHAR; ++i)
kwset->delta[i] = delta[U(trans[i])];
}
@@ -574,16 +572,16 @@ kwsprep (kwset_t kwset)
when failing. KWSET->shift says how much to shift. */
static inline bool
bm_delta2_search (char const **tpp, char const *ep, char const *sp,
- ptrdiff_t len,
+ idx_t len,
char const *trans, char gc1, char gc2,
unsigned char const *d1, kwset_t kwset)
{
char const *tp = *tpp;
- ptrdiff_t d = len, skip = 0;
+ idx_t d = len, skip = 0;
while (true)
{
- ptrdiff_t i = 2;
+ idx_t i = 2;
if (tr (trans, tp[-2]) == gc2)
{
while (++i <= d)
@@ -622,7 +620,7 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp,
that matches the terminal byte specified by KWSET, or NULL if there
is no match. KWSET->gc1 should be nonnegative. */
static char const *
-memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
+memchr_kwset (char const *s, idx_t n, kwset_t kwset)
{
char const *slim = s + n;
if (kwset->gc1help < 0)
@@ -634,7 +632,7 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
else
{
int small_heuristic = 2;
- size_t small_bytes = small_heuristic * sizeof (unsigned long int);
+ idx_t small_bytes = small_heuristic * sizeof (unsigned long int);
while (s < slim)
{
if (kwset->next[U(*s)])
@@ -649,13 +647,13 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
/* Fast Boyer-Moore search (inlinable version). */
static inline ptrdiff_t _GL_ATTRIBUTE_PURE
-bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
+bmexec_trans (kwset_t kwset, char const *text, idx_t size)
{
assume (0 <= size);
unsigned char const *d1;
char const *ep, *sp, *tp;
int d;
- ptrdiff_t len = kwset->mind;
+ idx_t len = kwset->mind;
char const *trans = kwset->trans;
if (len == 0)
@@ -675,8 +673,8 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
char gc2 = kwset->gc2;
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
- ptrdiff_t len12;
- if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size)
+ idx_t len12;
+ if (INT_MULTIPLY_OK (len, 12, &len12) && len12 < size)
/* 11 is not a bug, the initial offset happens only once. */
for (ep = text + size - 11 * len; tp <= ep; )
{
@@ -735,7 +733,7 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
/* Fast Boyer-Moore search. */
static ptrdiff_t
-bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
+bmexec (kwset_t kwset, char const *text, idx_t size,
struct kwsmatch *kwsmatch, bool longest)
{
/* Help the compiler inline in two ways, depending on whether
@@ -753,7 +751,7 @@ bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
/* Hairy multiple string search with the Aho-Corasick algorithm.
(inlinable version) */
static inline ptrdiff_t
-acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
+acexec_trans (kwset_t kwset, char const *text, idx_t len,
struct kwsmatch *kwsmatch, bool longest)
{
struct trie const *trie, *accept;
@@ -831,7 +829,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
match:
accept = trie;
- while (accept->accepting == SIZE_MAX)
+ while (accept->accepting < 0)
accept = accept->fail;
left = tp - accept->depth;
@@ -858,7 +856,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
if (trie->accepting)
{
accept1 = trie;
- while (accept1->accepting == SIZE_MAX)
+ while (accept1->accepting < 0)
accept1 = accept1->fail;
left1 = tp - accept1->depth;
if (left1 <= left)
@@ -870,7 +868,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
}
}
- kwsmatch->index = accept->accepting / 2;
+ kwsmatch->index = accept->accepting >> 1;
kwsmatch->offset = left - text;
kwsmatch->size = accept->depth;
@@ -879,7 +877,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
/* Hairy multiple string search with Aho-Corasick algorithm. */
static ptrdiff_t
-acexec (kwset_t kwset, char const *text, ptrdiff_t size,
+acexec (kwset_t kwset, char const *text, idx_t size,
struct kwsmatch *kwsmatch, bool longest)
{
assume (0 <= size);
@@ -898,7 +896,7 @@ acexec (kwset_t kwset, char const *text, ptrdiff_t size,
value), and length. If LONGEST, find the longest match; otherwise
any match will do. */
ptrdiff_t
-kwsexec (kwset_t kwset, char const *text, ptrdiff_t size,
+kwsexec (kwset_t kwset, char const *text, idx_t size,
struct kwsmatch *kwsmatch, bool longest)
{
return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
diff --git a/src/kwset.h b/src/kwset.h
index 24e13e26..cb94cf4b 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -22,23 +22,26 @@
#include <stddef.h>
#include <stdbool.h>
+#include <idx.h>
+
struct kwsmatch
{
- ptrdiff_t index; /* Index number of matching keyword. */
- ptrdiff_t offset; /* Offset of match. */
- ptrdiff_t size; /* Length of match. */
+ idx_t index; /* Index number of matching keyword. */
+ idx_t offset; /* Offset of match. */
+ idx_t size; /* Length of match. */
};
-#include "arg-nonnull.h"
+#include <arg-nonnull.h>
+#include <idx.h>
struct kwset;
typedef struct kwset *kwset_t;
extern kwset_t kwsalloc (char const *);
-extern void kwsincr (kwset_t, char const *, ptrdiff_t);
-extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
+extern void kwsincr (kwset_t, char const *, idx_t);
+extern idx_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
extern void kwsprep (kwset_t);
-extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t,
+extern ptrdiff_t kwsexec (kwset_t, char const *, idx_t,
struct kwsmatch *, bool)
_GL_ARG_NONNULL ((4));
extern void kwsfree (kwset_t);
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 37f7e409..3bdaee90 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -113,7 +113,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
followed by '\n'. Return a description of the compiled pattern. */
void *
-Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
{
int e;
char const *ep;
@@ -202,8 +202,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
return pc;
}
-size_t
-Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
char const *start_ptr)
{
int sub[NSUB];
diff --git a/src/search.h b/src/search.h
index 6a5814a9..acc282c4 100644
--- a/src/search.h
+++ b/src/search.h
@@ -48,38 +48,55 @@ typedef signed char mb_len_map_t;
/* searchutils.c */
extern void wordinit (void);
extern kwset_t kwsinit (bool);
-extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_prev (char const *, char const *, char const *)
+extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_prev (char const *, char const *, char const *)
_GL_ATTRIBUTE_PURE;
-extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
- char const *);
+extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const *);
/* dfasearch.c */
-extern void *GEAcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const *);
/* kwsearch.c */
-extern void *Fcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Fcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
/* pcresearch.c */
-extern void *Pcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
/* grep.c */
extern struct localeinfo localeinfo;
-extern void fgrep_to_grep_pattern (char **, size_t *);
+extern void fgrep_to_grep_pattern (char **, idx_t *);
+
+/* Return the number of bytes in the character at the start of S, which
+ is of size N. N must be positive. MBS is the conversion state.
+ This acts like mbrlen, except it returns -1 and -2 instead of
+ (size_t) -1 and (size_t) -2. */
+SEARCH_INLINE ptrdiff_t
+imbrlen (char const *s, idx_t n, mbstate_t *mbs)
+{
+ size_t len = mbrlen (s, n, mbs);
+
+ /* Convert result to ptrdiff_t portably, even on oddball platforms.
+ When optimizing, this typically uses no machine instructions. */
+ if (len <= MB_LEN_MAX)
+ return len;
+ ptrdiff_t neglen = -len;
+ return -neglen;
+}
/* Return the number of bytes in the character at the start of S, which
is of size N. N must be positive. MBS is the conversion state.
This acts like mbrlen, except it returns 1 when mbrlen would return 0,
+ it returns -1 and -2 instead of (size_t) -1 and (size_t) -2,
and it is typically faster because of the cache. */
-SEARCH_INLINE size_t
-mb_clen (char const *s, size_t n, mbstate_t *mbs)
+SEARCH_INLINE ptrdiff_t
+mb_clen (char const *s, idx_t n, mbstate_t *mbs)
{
signed char len = localeinfo.sbclen[to_uchar (*s)];
- return len == -2 ? mbrlen (s, n, mbs) : len;
+ return len == -2 ? imbrlen (s, n, mbs) : len;
}
extern char const *input_filename (void);
diff --git a/src/searchutils.c b/src/searchutils.c
index 0080dd75..ebc4a115 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -47,7 +47,7 @@ kwsinit (bool mb_trans)
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
{
- trans = xmalloc (NCHAR);
+ trans = ximalloc (NCHAR);
/* If I is a single-byte character that becomes a different
single-byte character when uppercased, set trans[I]
to that character. Otherwise, set trans[I] to I. */
@@ -88,7 +88,7 @@ kwsinit (bool mb_trans)
Treat encoding errors as if they were single-byte characters. */
ptrdiff_t
-mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
char const *end)
{
const char *p = *mb_start;
@@ -114,8 +114,8 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
if (long_enough)
{
mbstate_t mbs = { 0 };
- size_t clen = mbrlen (cur - i, end - (cur - i), &mbs);
- if (clen <= MB_LEN_MAX)
+ ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
+ if (0 <= clen)
{
/* This multibyte character contains *CUR. */
p0 = cur - i;
@@ -130,13 +130,13 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
/* In non-UTF-8 encodings, to find character boundaries one must
in general scan forward from the start of the buffer. */
mbstate_t mbs = { 0 };
- size_t clen;
+ ptrdiff_t clen;
do
{
clen = mb_clen (p, end - p, &mbs);
- if (MB_LEN_MAX < clen)
+ if (clen < 0)
{
/* An invalid sequence, or a truncated multibyte character.
Treat it as a single byte character. */
@@ -159,10 +159,10 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
/* Examine the start of BUF (which goes to END) for word constituents.
If COUNTALL, examine as many as possible; otherwise, examine at most one.
Return the total number of bytes in the examined characters. */
-static size_t
+static idx_t
wordchars_count (char const *buf, char const *end, bool countall)
{
- size_t n = 0;
+ idx_t n = 0;
mbstate_t mbs = { 0 };
while (n < end - buf)
{
@@ -188,7 +188,7 @@ wordchars_count (char const *buf, char const *end, bool countall)
/* Examine the start of BUF for the longest prefix containing just
word constituents. Return the total number of bytes in the prefix.
The buffer ends at END. */
-size_t
+idx_t
wordchars_size (char const *buf, char const *end)
{
return wordchars_count (buf, end, true);
@@ -196,7 +196,7 @@ wordchars_size (char const *buf, char const *end)
/* If BUF starts with a word constituent, return the number of bytes
used to represent it; otherwise, return zero. The buffer ends at END. */
-size_t
+idx_t
wordchar_next (char const *buf, char const *end)
{
return wordchars_count (buf, end, false);
@@ -205,7 +205,7 @@ wordchar_next (char const *buf, char const *end)
/* In the buffer BUF, return nonzero if the character whose encoding
contains the byte before CUR is a word constituent. The buffer
ends at END. */
-size_t
+idx_t
wordchar_prev (char const *buf, char const *cur, char const *end)
{
if (buf == cur)