dfa: add missing function

* src/dfa.c (using_utf8): New. (addtok_wc, free_mbdata, dfaoptimize) [!MBS_SUPPORT]: Do not define. (dfacomp) [!MBS_SUPPORT]: Do not call dfaoptimize.
author: Paolo Bonzini <bonzini@gnu.org> 2010-03-08 12:20:37 +0100
committer: Paolo Bonzini <bonzini@gnu.org> 2010-03-17 11:58:50 +0100
commit: 50747b20b631a4a375f9e63427ca553c7cdaf100 (patch)
tree: 7c42f78ff1d475c27a47af9deac3417d5bfb1b62 /src
parent: 4adeed466aebc9f470fda44744188ae9c8f112d8 (diff)
download: grep-50747b20b631a4a375f9e63427ca553c7cdaf100.tar.gz
1 files changed, 30 insertions, 1 deletions
diff --git a/src/dfa.c b/src/dfa.c
index baf4df2a..ee2ff232 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -79,6 +79,7 @@
 /* We can handle multibyte strings. */
 # include <wchar.h>
 # include <wctype.h>
+# include <langinfo.h>
 #endif
 
 #include "regex.h"
@@ -251,6 +252,25 @@ setbit_case_fold (unsigned b, charclass c)
     }
 }
 
+
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+   assume in a multibyte encoding. */
+static inline int
+using_utf8 (void)
+{
+  static int utf8 = -1;
+  if (utf8 == -1)
+    {
+#if defined HAVE_LANGINFO_CODESET && defined MBS_SUPPORT
+      utf8 = (strcmp (nl_langinfo (CODESET), "UTF-8") == 0);
+#else
+      utf8 = 0;
+#endif
+    }
+
+  return utf8;
+}
+
 /* Lexical analyzer.  All the dross that deals with the obnoxious
    GNU Regex syntax bits is located here.  The poor, suffering
    reader is referred to the GNU Regex documentation for the
@@ -292,6 +312,7 @@ static unsigned char const *buf_begin;	/* reference to begin in dfaexec().  */
 static unsigned char const *buf_end;	/* reference to end in dfaexec().  */
 #endif /* MBS_SUPPORT  */
 
+
 #ifdef MBS_SUPPORT
 /* Note that characters become unsigned here. */
 # define FETCH(c, eoferr)			\
@@ -1126,6 +1147,7 @@ addtok (token t)
     addtok_mb (t, 3);
 }
 
+#ifdef MBS_SUPPORT
 /* We treat a multibyte character as a single atom, so that DFA
    can treat a multibyte character as a single expression.
 
@@ -1147,6 +1169,7 @@ addtok_wc (wint_t wc)
       addtok(CAT);
     }
 }
+#endif
 
 /* The grammar understood by the parser is as follows.
 
@@ -2897,6 +2920,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
     }
 }
 
+#ifdef MBS_SUPPORT
 static void
 free_mbdata (struct dfa *d)
 {
@@ -2927,6 +2951,7 @@ free_mbdata (struct dfa *d)
   d->mbcsets = NULL;
   d->nmbcsets = 0;
 }
+#endif
 
 /* Initialize the components of a dfa that the other routines don't
    initialize for themselves. */
@@ -2966,11 +2991,12 @@ dfainit (struct dfa *d)
 #endif
 }
 
+#ifdef MBS_SUPPORT
 static void
 dfaoptimize (struct dfa *d)
 {
   unsigned i;
-  if (!using_utf8)
+  if (!using_utf8())
     return;
 
   for (i = 0; i < d->tindex; ++i)
@@ -2989,6 +3015,7 @@ dfaoptimize (struct dfa *d)
   free_mbdata (d);
   d->mb_cur_max = 1;
 }
+#endif
 
 /* Parse and analyze a single string of the given length. */
 void
@@ -2997,7 +3024,9 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
   dfainit(d);
   dfaparse(s, len, d);
   dfamust(d);
+#ifdef MBS_SUPPORT
   dfaoptimize(d);
+#endif
   dfaanalyze(d, searchflag);
 }
author	Paolo Bonzini <bonzini@gnu.org>	2010-03-08 12:20:37 +0100
committer	Paolo Bonzini <bonzini@gnu.org>	2010-03-17 11:58:50 +0100
commit	50747b20b631a4a375f9e63427ca553c7cdaf100 (patch)
tree	7c42f78ff1d475c27a47af9deac3417d5bfb1b62 /src
parent	4adeed466aebc9f470fda44744188ae9c8f112d8 (diff)
download	grep-50747b20b631a4a375f9e63427ca553c7cdaf100.tar.gz