summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert <eggert@penguin.cs.ucla.edu>2014-04-03 18:14:15 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-04-05 16:32:44 -0700
commit8d0fcb8a231acfdb45fbab7c9df14cda90756c55 (patch)
treebe61d493a74abd1a1077a7601812aad97ad37a9c /src
parentd80c9a5da2c5e04844ac39f8cdd45e6425b2dde6 (diff)
downloadgrep-8d0fcb8a231acfdb45fbab7c9df14cda90756c55.tar.gz
grep: simplify dfa.c by having it not include mbsupport.h directly
* src/mbsupport.h: Remove. * src/Makefile.am (noinst_HEADERS): Remove mbsupport.h. * src/dfa.c, src/grep.c, src/search.h: Don't include mbsupport.h. * src/dfa.c: Include wchar.h and wctype.h unconditionally, as this simplifies the use of dfa.c in grep, and it does no harm in gawk. (setlocale, static_assert): Remove gawk-specific hacks, as gawk now does these itself. (struct dfa, dfambcache, mbs_to_wchar) (is_valid_unibyte_character, setbit_wc, using_utf8, FETCH_WC) (addtok_wc, add_utf8_anychar, atom, state_index, epsclosure) (dfaanalyze, dfastate, prepare_wc_buf, dfaoptimize, dfafree, dfamust): * src/dfasearch.c (EGexecute): * src/grep.c (main): * src/searchutils.c (mbtoupper): Assume MBS_SUPPORT.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am2
-rw-r--r--src/dfa.c94
-rw-r--r--src/dfasearch.c3
-rw-r--r--src/grep.c3
-rw-r--r--src/mbsupport.h29
-rw-r--r--src/search.h3
-rw-r--r--src/searchutils.c2
7 files changed, 16 insertions, 120 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 3487848a..f8c94152 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -27,7 +27,7 @@ grep_SOURCES = grep.c searchutils.c \
dfa.c dfasearch.c \
kwset.c kwsearch.c \
pcresearch.c
-noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h mbsupport.h
+noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h
# Sometimes, the expansion of $(LIBINTL) includes -lc which may
# include modules defining variables like 'optind', so libgreputils.a
diff --git a/src/dfa.c b/src/dfa.c
index b6fbd58f..0d7eab55 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -34,16 +34,6 @@
#include <locale.h>
#include <stdbool.h>
-/* Gawk doesn't use Gnulib, so don't assume that setlocale and
- static_assert are present. */
-#ifndef LC_ALL
-# define setlocale(category, locale) NULL
-#endif
-#ifndef static_assert
-# define static_assert(cond, diagnostic) \
- extern int (*foo (void)) [!!sizeof (struct { int foo: (cond) ? 8 : -1; })]
-#endif
-
#define STREQ(a, b) (strcmp (a, b) == 0)
/* ISASCIIDIGIT differs from isdigit, as follows:
@@ -60,12 +50,8 @@
#include "gettext.h"
#define _(str) gettext (str)
-#include "mbsupport.h" /* Define MBS_SUPPORT to 1 or 0, as appropriate. */
-#if MBS_SUPPORT
-/* We can handle multibyte strings. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+#include <wchar.h>
+#include <wctype.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
@@ -376,13 +362,11 @@ struct dfa
size_t nmultibyte_prop;
int *multibyte_prop;
-#if MBS_SUPPORT
/* A table indexed by byte values that contains the corresponding wide
character (if any) for that byte. WEOF means the byte is the
leading byte of a multibyte character. Invalid and null bytes are
mapped to themselves. */
wint_t mbrtowc_cache[NOTCHAR];
-#endif
/* Array of the bracket expression in the DFA. */
struct mb_char_classes *mbcsets;
@@ -488,7 +472,6 @@ static void regexp (void);
static void
dfambcache (struct dfa *d)
{
-#if MBS_SUPPORT
int i;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
@@ -505,10 +488,8 @@ dfambcache (struct dfa *d)
}
d->mbrtowc_cache[uc] = wi;
}
-#endif
}
-#if MBS_SUPPORT
/* Given the dfa D, store into *PWC the result of converting the
leading bytes of the multibyte buffer S of length N bytes, updating
the conversion state in *MBS. On conversion error, convert just a
@@ -542,7 +523,6 @@ mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n,
*pwc = wc;
return 1;
}
-#endif
#ifdef DEBUG
@@ -712,7 +692,7 @@ static charclass newline;
#ifdef __GLIBC__
# define is_valid_unibyte_character(c) 1
#else
-# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
+# define is_valid_unibyte_character(c) (btowc (c) != WEOF)
#endif
/* Return non-zero if C is a "word-constituent" byte; zero otherwise. */
@@ -773,17 +753,12 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
static bool
setbit_wc (wint_t wc, charclass c)
{
-#if MBS_SUPPORT
int b = wctob (wc);
if (b == EOF)
return false;
setbit (b, c);
return true;
-#else
- abort ();
- /*NOTREACHED*/ return false;
-#endif
}
/* Set a bit for B and its case variants in the charclass C.
@@ -808,7 +783,7 @@ using_utf8 (void)
static int utf8 = -1;
if (utf8 == -1)
{
-#if defined HAVE_LANGINFO_CODESET && MBS_SUPPORT
+#if defined HAVE_LANGINFO_CODESET
utf8 = (STREQ (nl_langinfo (CODESET), "UTF-8"));
#else
utf8 = 0;
@@ -897,7 +872,6 @@ static unsigned char const *buf_begin; /* reference to begin in dfaexec. */
static unsigned char const *buf_end; /* reference to end in dfaexec. */
-#if MBS_SUPPORT
/* Note that characters become unsigned here. */
# define FETCH_WC(c, wc, eoferr) \
do { \
@@ -920,23 +894,6 @@ static unsigned char const *buf_end; /* reference to end in dfaexec. */
} \
} while (0)
-#else
-/* Note that characters become unsigned here. */
-# define FETCH_WC(c, unused, eoferr) \
- do { \
- if (! lexleft) \
- { \
- if ((eoferr) != 0) \
- dfaerror (eoferr); \
- else \
- return lasttok = END; \
- } \
- (c) = to_uchar (*lexptr++); \
- --lexleft; \
- } while (0)
-
-#endif /* MBS_SUPPORT */
-
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
@@ -1720,7 +1677,6 @@ addtok (token t)
}
}
-#if MBS_SUPPORT
/* We treat a multibyte character as a single atom, so that DFA
can treat a multibyte character as a single expression.
@@ -1752,17 +1708,10 @@ addtok_wc (wint_t wc)
addtok (CAT);
}
}
-#else
-static void
-addtok_wc (wint_t wc)
-{
-}
-#endif
static void
add_utf8_anychar (void)
{
-#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
{0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-leading bytes */
{~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */
@@ -1807,7 +1756,6 @@ add_utf8_anychar (void)
addtok (CAT);
addtok (OR);
}
-#endif
}
/* The grammar understood by the parser is as follows.
@@ -1848,7 +1796,7 @@ add_utf8_anychar (void)
static void
atom (void)
{
- if (MBS_SUPPORT && tok == WCHAR)
+ if (tok == WCHAR)
{
addtok_wc (wctok);
@@ -1865,7 +1813,7 @@ atom (void)
tok = lex ();
}
- else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
+ else if (tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -1879,9 +1827,7 @@ atom (void)
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-#if MBS_SUPPORT
|| tok == ANYCHAR || tok == MBCSET
-#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok (tok);
@@ -2164,11 +2110,9 @@ state_index (struct dfa *d, position_set const *s, int context)
d->states[i].backref = 0;
d->states[i].constraint = 0;
d->states[i].first_end = 0;
- if (MBS_SUPPORT)
- {
- d->states[i].mbps.nelem = 0;
- d->states[i].mbps.elems = NULL;
- }
+ d->states[i].mbps.nelem = 0;
+ d->states[i].mbps.elems = NULL;
+
for (j = 0; j < s->nelem; ++j)
if (d->tokens[s->elems[j].index] < 0)
{
@@ -2206,10 +2150,8 @@ epsclosure (position_set * s, struct dfa const *d)
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
&& d->tokens[s->elems[i].index] != BACKREF
-#if MBS_SUPPORT
&& d->tokens[s->elems[i].index] != ANYCHAR
&& d->tokens[s->elems[i].index] != MBCSET
-#endif
&& d->tokens[s->elems[i].index] < CSET)
{
old = s->elems[i];
@@ -2526,9 +2468,7 @@ dfaanalyze (struct dfa *d, int searchflag)
it with its epsilon closure. */
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
-#if MBS_SUPPORT
|| d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
-#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
@@ -2638,9 +2578,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
setbit (d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
- else if (MBS_SUPPORT
- && (d->tokens[pos.index] == ANYCHAR
- || d->tokens[pos.index] == MBCSET))
+ else if (d->tokens[pos.index] == ANYCHAR
+ || d->tokens[pos.index] == MBCSET)
/* MB_CUR_MAX > 1 */
{
/* ANYCHAR and MBCSET must match with a single character, so we
@@ -2814,7 +2753,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
if (d->searchflag
- && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
+ && (d->mb_cur_max == 1 || !next_isnt_1st_byte))
for (j = 0; j < d->states[0].elems.nelem; ++j)
insert (d->states[0].elems.elems[j], &follows);
@@ -3366,7 +3305,6 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
static void
prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
{
-#if MBS_SUPPORT
unsigned char eol = eolbyte;
size_t i;
size_t ilim = end - begin + 1;
@@ -3390,7 +3328,6 @@ prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
buf_end = (unsigned char *) (begin + i);
mblen_buf[i] = 0;
inputwcs[i] = 0; /* sentinel */
-#endif /* MBS_SUPPORT */
}
/* Search through a buffer looking for a match to the given struct dfa.
@@ -3613,7 +3550,7 @@ dfaoptimize (struct dfa *d)
{
size_t i;
- if (!MBS_SUPPORT || !using_utf8 ())
+ if (!using_utf8 ())
return;
for (i = 0; i < d->tindex; ++i)
@@ -3663,8 +3600,7 @@ dfafree (struct dfa *d)
for (i = 0; i < d->sindex; ++i)
{
free (d->states[i].elems.elems);
- if (MBS_SUPPORT)
- free (d->states[i].mbps.elems);
+ free (d->states[i].mbps.elems);
}
free (d->states);
for (i = 0; i < d->tindex; ++i)
@@ -4139,7 +4075,7 @@ dfamust (struct dfa *d)
/* not on *my* shift */
goto done;
}
- else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
+ else if (t >= CSET || t == ANYCHAR || t == MBCSET)
{
/* easy enough */
resetmust (mp);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d098a9b1..5665b825 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -239,9 +239,6 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
char const *dfa_start = beg;
if (kwsm.index < kwset_exact_matches)
{
- if (!MBS_SUPPORT)
- goto success;
-
if (mb_start < beg)
mb_start = beg;
if (MB_CUR_MAX == 1
diff --git a/src/grep.c b/src/grep.c
index a1bccdb6..70337302 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -21,7 +21,6 @@
#include <config.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include "mbsupport.h"
#include <wchar.h>
#include <wctype.h>
#include <fcntl.h>
@@ -2461,10 +2460,8 @@ main (int argc, char **argv)
}
}
-#if MBS_SUPPORT
if (MB_CUR_MAX > 1)
build_mbclen_cache ();
-#endif
compile (keys, keycc);
free (keys);
diff --git a/src/mbsupport.h b/src/mbsupport.h
deleted file mode 100644
index 49c79263..00000000
--- a/src/mbsupport.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* mbsupport.h --- Localize determination of whether we have multibyte stuff.
-
- Copyright (C) 2004-2005, 2007, 2009-2014 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <stdlib.h>
-
-#ifndef MBS_SUPPORT
-# define MBS_SUPPORT 1
-#endif
-
-#if ! MBS_SUPPORT
-# undef MB_CUR_MAX
-# define MB_CUR_MAX 1
-#endif
diff --git a/src/search.h b/src/search.h
index 69e3afdc..871b7d54 100644
--- a/src/search.h
+++ b/src/search.h
@@ -23,9 +23,6 @@
#include <sys/types.h>
#include <stdint.h>
-
-#include "mbsupport.h"
-
#include <wchar.h>
#include <wctype.h>
#include <regex.h>
diff --git a/src/searchutils.c b/src/searchutils.c
index babb31f0..6749945f 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -48,7 +48,6 @@ kwsinit (kwset_t *kwset)
xalloc_die ();
}
-#if MBS_SUPPORT
/* Convert BEG, an *N-byte string, to uppercase, and write the
NUL-terminated result into malloc'd storage. Upon success, set *N
to the length (in bytes) of the resulting string (not including the
@@ -276,4 +275,3 @@ is_mb_middle (const char **good, const char *buf, const char *end,
/* P == BUF here. */
return 0 < match_len && match_len < mbrlen (p, end - p, &cur_state);
}
-#endif /* MBS_SUPPORT */