summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2021-08-24 17:19:22 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2021-08-25 12:11:27 -0700
commite3694e90b4789ccafaf022a29d9ce08ff11375c2 (patch)
treeb09bc151ec222cfe5e6757a0cd85fe05ec3125a5 /src/searchutils.c
parentb7d83f46d81a304e188c82877430765c29a75610 (diff)
downloadgrep-e3694e90b4789ccafaf022a29d9ce08ff11375c2.tar.gz
grep: prefer signed to unsigned integers
This improves runtime checking for integer overflow when compiling with gcc -fsanitize=undefined and the like. It also avoids the need for some integer casts, which can be error-prone. * bootstrap.conf (gnulib_modules): Add idx. * src/dfasearch.c (struct dfa_comp, kwsmusts): (possible_backrefs_in_pattern, regex_compile, GEAcompile) (EGexecute): * src/grep.c (struct patloc, patlocs_allocated, patlocs_used) (n_patterns, update_patterns, pattern_file_name, poison_len) (asan_poison, fwrite_errno, compile_fp_t, execute_fp_t) (buf_has_encoding_errors, buf_has_nulls, file_must_have_nulls) (bufalloc, pagesize, all_zeros, fillbuf, nlscan) (print_line_head, print_line_middle, print_line_tail, grepbuf) (grep, contains_encoding_error, fgrep_icase_available) (fgrep_icase_charlen, fgrep_to_grep_pattern, try_fgrep_pattern) (main): * src/kwsearch.c (struct kwsearch, Fcompile, Fexecute): * src/kwset.c (struct trie, struct kwset, kwsalloc, kwsincr) (kwswords, treefails, memchr_kwset, acexec_trans, kwsexec) (treedelta, kwsprep, bm_delta2_search, bmexec_trans, bmexec) (acexec): * src/kwset.h (struct kwsmatch): * src/pcresearch.c (Pcompile, Pexecute): * src/search.h (mb_clen): * src/searchutils.c (kwsinit, mb_goback, wordchars_count) (wordchars_size, wordchar_next, wordchar_prev): Prefer idx_t to size_t or ptrdiff_t for nonnegative sizes, and prefer ptrdiff_t to size_t for sizes plus error values. * src/grep.c (uword_size): New constant, used for signed size calculations. (totalnl, add_count, totalcc, print_offset, print_line_head, grep): Prefer intmax_t to uintmax_t for wide integer calculations. (fgrep_icase_charlen): Prefer ptrdiff_t to int for size offsets. * src/grep.h: Include idx.h. * src/search.h (imbrlen): New function, like mbrlen except with idx_t and ptrdiff_t.
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
index 0080dd75..ebc4a115 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -47,7 +47,7 @@ kwsinit (bool mb_trans)
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
{
- trans = xmalloc (NCHAR);
+ trans = ximalloc (NCHAR);
/* If I is a single-byte character that becomes a different
single-byte character when uppercased, set trans[I]
to that character. Otherwise, set trans[I] to I. */
@@ -88,7 +88,7 @@ kwsinit (bool mb_trans)
Treat encoding errors as if they were single-byte characters. */
ptrdiff_t
-mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
char const *end)
{
const char *p = *mb_start;
@@ -114,8 +114,8 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
if (long_enough)
{
mbstate_t mbs = { 0 };
- size_t clen = mbrlen (cur - i, end - (cur - i), &mbs);
- if (clen <= MB_LEN_MAX)
+ ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
+ if (0 <= clen)
{
/* This multibyte character contains *CUR. */
p0 = cur - i;
@@ -130,13 +130,13 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
/* In non-UTF-8 encodings, to find character boundaries one must
in general scan forward from the start of the buffer. */
mbstate_t mbs = { 0 };
- size_t clen;
+ ptrdiff_t clen;
do
{
clen = mb_clen (p, end - p, &mbs);
- if (MB_LEN_MAX < clen)
+ if (clen < 0)
{
/* An invalid sequence, or a truncated multibyte character.
Treat it as a single byte character. */
@@ -159,10 +159,10 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
/* Examine the start of BUF (which goes to END) for word constituents.
If COUNTALL, examine as many as possible; otherwise, examine at most one.
Return the total number of bytes in the examined characters. */
-static size_t
+static idx_t
wordchars_count (char const *buf, char const *end, bool countall)
{
- size_t n = 0;
+ idx_t n = 0;
mbstate_t mbs = { 0 };
while (n < end - buf)
{
@@ -188,7 +188,7 @@ wordchars_count (char const *buf, char const *end, bool countall)
/* Examine the start of BUF for the longest prefix containing just
word constituents. Return the total number of bytes in the prefix.
The buffer ends at END. */
-size_t
+idx_t
wordchars_size (char const *buf, char const *end)
{
return wordchars_count (buf, end, true);
@@ -196,7 +196,7 @@ wordchars_size (char const *buf, char const *end)
/* If BUF starts with a word constituent, return the number of bytes
used to represent it; otherwise, return zero. The buffer ends at END. */
-size_t
+idx_t
wordchar_next (char const *buf, char const *end)
{
return wordchars_count (buf, end, false);
@@ -205,7 +205,7 @@ wordchar_next (char const *buf, char const *end)
/* In the buffer BUF, return nonzero if the character whose encoding
contains the byte before CUR is a word constituent. The buffer
ends at END. */
-size_t
+idx_t
wordchar_prev (char const *buf, char const *cur, char const *end)
{
if (buf == cur)