summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZev Weiss <zev@bewilderbeest.net>2016-08-19 10:37:26 -0700
committerJim Meyering <meyering@fb.com>2016-08-20 17:19:31 -0700
commite0a4b399ad3e4da1a03a1c2c51eb8134d1fce1de (patch)
tree6009cc364c07cec95eef3a3d7f7d2717b09895f3
parent8cf06f05c154e669359cf940ed13d73f6a19562b (diff)
downloadgrep-e0a4b399ad3e4da1a03a1c2c51eb8134d1fce1de.tar.gz
dfa: thread-safety: eliminate static local variables
* src/dfa.c: Replace utf8 and unibyte_c static local variables with static globals initialized by a new function dfa_init() which must be called before any other dfa*() functions. (dfa_using_utf8): Rename using_utf8() to dfa_using_utf8() for consistency with other exported functions. * src/dfa.h (dfa_using_utf8): Rename using_utf8() to dfa_using_utf8(); also add _GL_ATTRIBUTE_PURE. (dfa_init): New function. * src/grep.c (main), tests/dfa-match-aux.c (main): Call dfa_init(). * src/dfasearch.c (EGexecute): Replace using_utf8 with dfa_using_utf8. * src/kwsearch.c (Fexecute): Likewise. * src/pcresearch.c (Pcompile): Likewise. http://bugs.gnu.org/24259
-rw-r--r--src/dfa.c62
-rw-r--r--src/dfa.h5
-rw-r--r--src/dfasearch.c2
-rw-r--r--src/grep.c2
-rw-r--r--src/kwsearch.c2
-rw-r--r--src/pcresearch.c2
-rw-r--r--tests/dfa-match-aux.c2
7 files changed, 44 insertions, 33 deletions
diff --git a/src/dfa.c b/src/dfa.c
index 76c12176..86dab0dc 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -757,6 +757,16 @@ char_context (struct dfa *dfa, unsigned char c)
return CTX_NONE;
}
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+ assume in a multibyte encoding. */
+static bool using_utf8;
+
+bool
+dfa_using_utf8 (void)
+{
+ return using_utf8;
+}
+
/* Entry point to set syntax options. */
void
dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
@@ -789,7 +799,7 @@ dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
/* POSIX requires that the five bytes in "\n\r./" (including the
terminating NUL) cannot occur inside a multibyte character. */
- dfa->syntax.never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
+ dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80
: strchr ("\n\r./", uc) != NULL);
}
}
@@ -822,21 +832,21 @@ setbit_case_fold_c (int b, charclass c)
setbit (i, c);
}
+static void check_utf8 (void)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
+static bool unibyte_c;
-/* UTF-8 encoding allows some optimizations that we can't otherwise
- assume in a multibyte encoding. */
-bool
-using_utf8 (void)
+static void check_unibyte_c (void)
{
- static int utf8 = -1;
- if (utf8 < 0)
- {
- wchar_t wc;
- mbstate_t mbs = { 0 };
- utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
- }
- return utf8;
+ char const *locale = setlocale (LC_ALL, NULL);
+ unibyte_c = (!locale
+ || STREQ (locale, "C")
+ || STREQ (locale, "POSIX"));
}
/* The current locale is known to be a unibyte locale
@@ -863,20 +873,7 @@ using_simple_locale (struct dfa const *dfa)
&& '}' == 125 && '~' == 126)
};
- if (! native_c_charset || dfa->multibyte)
- return false;
- else
- {
- static int unibyte_c = -1;
- if (unibyte_c < 0)
- {
- char const *locale = setlocale (LC_ALL, NULL);
- unibyte_c = (!locale
- || STREQ (locale, "C")
- || STREQ (locale, "POSIX"));
- }
- return unibyte_c;
- }
+ return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
}
/* Fetch the next lexical input character. Set C (of type int) to the
@@ -1843,7 +1840,7 @@ atom (struct dfa *dfa)
dfa->parsestate.tok = lex (dfa);
}
- else if (dfa->parsestate.tok == ANYCHAR && using_utf8 ())
+ else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -3524,7 +3521,7 @@ dfaoptimize (struct dfa *d)
size_t i;
bool have_backref = false;
- if (!using_utf8 ())
+ if (!using_utf8)
return;
for (i = 0; i < d->tindex; ++i)
@@ -4202,4 +4199,11 @@ dfaalloc (void)
return d;
}
+void
+dfa_init (void)
+{
+ check_utf8 ();
+ check_unibyte_c ();
+}
+
/* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
index 014ae96f..585390a7 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -100,4 +100,7 @@ extern void dfawarn (const char *);
The user must supply a dfaerror. */
extern _Noreturn void dfaerror (const char *);
-extern bool using_utf8 (void);
+extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE;
+
+/* This must be called before calling any of the above dfa*() functions. */
+extern void dfa_init (void);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 3dbf76b3..10c4f51b 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -277,7 +277,7 @@ EGexecute (char *buf, size_t size, size_t *match_size,
if (exact_kwset_match)
{
- if (MB_CUR_MAX == 1 || using_utf8 ())
+ if (MB_CUR_MAX == 1 || dfa_using_utf8 ())
goto success;
if (mb_start < beg)
mb_start = beg;
diff --git a/src/grep.c b/src/grep.c
index f4d0444f..63487d6b 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2350,6 +2350,8 @@ main (int argc, char **argv)
textdomain (PACKAGE);
#endif
+ dfa_init ();
+
atexit (clean_up_stdout);
last_recursive = 0;
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 09af4a25..57fd4d77 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -93,7 +93,7 @@ Fexecute (char *buf, size_t size, size_t *match_size,
mb_check = longest = false;
else
{
- mb_check = MB_CUR_MAX > 1 && !using_utf8 ();
+ mb_check = MB_CUR_MAX > 1 && !dfa_using_utf8 ();
longest = mb_check | !!start_ptr | match_words;
}
diff --git a/src/pcresearch.c b/src/pcresearch.c
index f6e72b0e..3f76603d 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -114,7 +114,7 @@ Pcompile (char const *pattern, size_t size)
if (1 < MB_CUR_MAX)
{
- if (! using_utf8 ())
+ if (! dfa_using_utf8 ())
error (EXIT_TROUBLE, 0,
_("-P supports only unibyte and UTF-8 locales"));
multibyte_locale = true;
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index 25b05357..e6517352 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -54,6 +54,8 @@ main (int argc, char **argv)
setlocale (LC_ALL, "");
+ dfa_init ();
+
dfa = dfaalloc ();
dfasyntax (dfa, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
dfacomp (argv[1], strlen (argv[1]), dfa, 0);