summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2010-03-08 12:20:37 +0100
committerPaolo Bonzini <bonzini@gnu.org>2010-03-17 11:58:50 +0100
commit50747b20b631a4a375f9e63427ca553c7cdaf100 (patch)
tree7c42f78ff1d475c27a47af9deac3417d5bfb1b62
parent4adeed466aebc9f470fda44744188ae9c8f112d8 (diff)
downloadgrep-50747b20b631a4a375f9e63427ca553c7cdaf100.tar.gz
dfa: add missing function
* src/dfa.c (using_utf8): New. (addtok_wc, free_mbdata, dfaoptimize) [!MBS_SUPPORT]: Do not define. (dfacomp) [!MBS_SUPPORT]: Do not call dfaoptimize.
-rw-r--r--src/dfa.c31
1 files changed, 30 insertions, 1 deletions
diff --git a/src/dfa.c b/src/dfa.c
index baf4df2a..ee2ff232 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -79,6 +79,7 @@
/* We can handle multibyte strings. */
# include <wchar.h>
# include <wctype.h>
+# include <langinfo.h>
#endif
#include "regex.h"
@@ -251,6 +252,25 @@ setbit_case_fold (unsigned b, charclass c)
}
}
+
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+ assume in a multibyte encoding. */
+static inline int
+using_utf8 (void)
+{
+ static int utf8 = -1;
+ if (utf8 == -1)
+ {
+#if defined HAVE_LANGINFO_CODESET && defined MBS_SUPPORT
+ utf8 = (strcmp (nl_langinfo (CODESET), "UTF-8") == 0);
+#else
+ utf8 = 0;
+#endif
+ }
+
+ return utf8;
+}
+
/* Lexical analyzer. All the dross that deals with the obnoxious
GNU Regex syntax bits is located here. The poor, suffering
reader is referred to the GNU Regex documentation for the
@@ -292,6 +312,7 @@ static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */
static unsigned char const *buf_end; /* reference to end in dfaexec(). */
#endif /* MBS_SUPPORT */
+
#ifdef MBS_SUPPORT
/* Note that characters become unsigned here. */
# define FETCH(c, eoferr) \
@@ -1126,6 +1147,7 @@ addtok (token t)
addtok_mb (t, 3);
}
+#ifdef MBS_SUPPORT
/* We treat a multibyte character as a single atom, so that DFA
can treat a multibyte character as a single expression.
@@ -1147,6 +1169,7 @@ addtok_wc (wint_t wc)
addtok(CAT);
}
}
+#endif
/* The grammar understood by the parser is as follows.
@@ -2897,6 +2920,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
}
}
+#ifdef MBS_SUPPORT
static void
free_mbdata (struct dfa *d)
{
@@ -2927,6 +2951,7 @@ free_mbdata (struct dfa *d)
d->mbcsets = NULL;
d->nmbcsets = 0;
}
+#endif
/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
@@ -2966,11 +2991,12 @@ dfainit (struct dfa *d)
#endif
}
+#ifdef MBS_SUPPORT
static void
dfaoptimize (struct dfa *d)
{
unsigned i;
- if (!using_utf8)
+ if (!using_utf8())
return;
for (i = 0; i < d->tindex; ++i)
@@ -2989,6 +3015,7 @@ dfaoptimize (struct dfa *d)
free_mbdata (d);
d->mb_cur_max = 1;
}
+#endif
/* Parse and analyze a single string of the given length. */
void
@@ -2997,7 +3024,9 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
dfainit(d);
dfaparse(s, len, d);
dfamust(d);
+#ifdef MBS_SUPPORT
dfaoptimize(d);
+#endif
dfaanalyze(d, searchflag);
}