diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2010-03-08 12:20:37 +0100 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2010-03-17 11:58:50 +0100 |
commit | 50747b20b631a4a375f9e63427ca553c7cdaf100 (patch) | |
tree | 7c42f78ff1d475c27a47af9deac3417d5bfb1b62 /src | |
parent | 4adeed466aebc9f470fda44744188ae9c8f112d8 (diff) | |
download | grep-50747b20b631a4a375f9e63427ca553c7cdaf100.tar.gz |
dfa: add missing function
* src/dfa.c (using_utf8): New.
(addtok_wc, free_mbdata, dfaoptimize) [!MBS_SUPPORT]: Do not define.
(dfacomp) [!MBS_SUPPORT]: Do not call dfaoptimize.
Diffstat (limited to 'src')
-rw-r--r-- | src/dfa.c | 31 |
1 files changed, 30 insertions, 1 deletions
@@ -79,6 +79,7 @@ /* We can handle multibyte strings. */ # include <wchar.h> # include <wctype.h> +# include <langinfo.h> #endif #include "regex.h" @@ -251,6 +252,25 @@ setbit_case_fold (unsigned b, charclass c) } } + +/* UTF-8 encoding allows some optimizations that we can't otherwise + assume in a multibyte encoding. */ +static inline int +using_utf8 (void) +{ + static int utf8 = -1; + if (utf8 == -1) + { +#if defined HAVE_LANGINFO_CODESET && defined MBS_SUPPORT + utf8 = (strcmp (nl_langinfo (CODESET), "UTF-8") == 0); +#else + utf8 = 0; +#endif + } + + return utf8; +} + /* Lexical analyzer. All the dross that deals with the obnoxious GNU Regex syntax bits is located here. The poor, suffering reader is referred to the GNU Regex documentation for the @@ -292,6 +312,7 @@ static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */ static unsigned char const *buf_end; /* reference to end in dfaexec(). */ #endif /* MBS_SUPPORT */ + #ifdef MBS_SUPPORT /* Note that characters become unsigned here. */ # define FETCH(c, eoferr) \ @@ -1126,6 +1147,7 @@ addtok (token t) addtok_mb (t, 3); } +#ifdef MBS_SUPPORT /* We treat a multibyte character as a single atom, so that DFA can treat a multibyte character as a single expression. @@ -1147,6 +1169,7 @@ addtok_wc (wint_t wc) addtok(CAT); } } +#endif /* The grammar understood by the parser is as follows. @@ -2897,6 +2920,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, } } +#ifdef MBS_SUPPORT static void free_mbdata (struct dfa *d) { @@ -2927,6 +2951,7 @@ free_mbdata (struct dfa *d) d->mbcsets = NULL; d->nmbcsets = 0; } +#endif /* Initialize the components of a dfa that the other routines don't initialize for themselves. */ @@ -2966,11 +2991,12 @@ dfainit (struct dfa *d) #endif } +#ifdef MBS_SUPPORT static void dfaoptimize (struct dfa *d) { unsigned i; - if (!using_utf8) + if (!using_utf8()) return; for (i = 0; i < d->tindex; ++i) @@ -2989,6 +3015,7 @@ dfaoptimize (struct dfa *d) free_mbdata (d); d->mb_cur_max = 1; } +#endif /* Parse and analyze a single string of the given length. */ void @@ -2997,7 +3024,9 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) dfainit(d); dfaparse(s, len, d); dfamust(d); +#ifdef MBS_SUPPORT dfaoptimize(d); +#endif dfaanalyze(d, searchflag); } |