summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIvan Maidanski <ivmai@mail.ru>2022-06-01 21:12:40 +0300
committerIvan Maidanski <ivmai@mail.ru>2022-06-16 08:50:28 +0300
commitd95745914dd36a5c12d0e105aea27dadebdf98ff (patch)
tree53d0ad684f21bc8431a635800e68bc1a258e217c
parent85a2dc684d4ed57c5bd4da395349af1a42ee45a1 (diff)
downloadbdwgc-d95745914dd36a5c12d0e105aea27dadebdf98ff.tar.gz
Prevent (fix) parallel custom mark procs run in single-threaded clients
(a cherry-pick of commit b8ee3ca62 from 'release-8_2') If the collector is built with parallel marker support then marking is performed in parallel on multi-core targets. Thus, if the client provides custom mark procedures, then they could be executed in parallel. In case of a single-threaded client (developed for the older libgc version with the parallel mark support off), its custom mark procedures might not be prepared to be launched in parallel. Now, the parallel mark threads are not launched, even if available, until the client starts a (user) thread (e.g. calls pthread_create or GC_allow_register_threads) or tells the collector explicitly to start the mark threads (by calling GC_start_mark_threads). * doc/README.macros (GC_ALWAYS_MULTITHREADED): Update documentation. * doc/scale.md (Options for enhanced scalability): Likewise. * include/gc.h [GC_THREADS] (GC_parallel, GC_allow_register_threads): Update comment. * include/gc.h (GC_start_mark_threads): Likewise. * include/gc_mark.h (GC_mark_proc): Likewise. * include/gc_mark.h (GC_PROC_BYTES, GC_ms_entry): Move upper to be before the comment belonging to GC_mark_proc. * misc.c [THREADS && PARALLEL_MARK] (GC_init): Do not call GC_start_mark_threads_inner(). * misc.c [PARALLEL_MARK] (GC_start_mark_threads): Call GC_start_mark_threads_inner() even if THREAD_SANITIZER or no CAN_HANDLE_FORK. * misc.c [THREADS] (GC_get_parallel): Remove comment. * pthread_support.c [PARALLEL_MARK && !CAN_HANDLE_FORK] (available_markers_m1): Define as a variable. * win32_threads.c [PARALLEL_MARK && !CAN_HANDLE_FORK] (available_markers_m1): Likewise. * pthread_support.c [PARALLEL_MARK && !CAN_HANDLE_FORK] (GC_wait_for_gc_completion): Declare. * pthread_support.c [PARALLEL_MARK && !CAN_HANDLE_FORK] (GC_start_mark_threads_inner): If GC_parallel then return; call GC_wait_for_gc_completion(); set GC_markers_m1 value from available_markers_m1. * win32_threads.c [PARALLEL_MARK && (!GC_PTHREADS_PARAMARK || !CAN_HANDLE_FORK)] (GC_start_mark_threads_inner): Likewise. * pthread_support.c [CAN_HANDLE_FORK && PARALLEL_MARK && THREAD_SANITIZER] (fork_child_proc): Set available_markers_m1 to 0. * pthread_support.c [CAN_HANDLE_FORK]: Move GC_remove_all_threads_but_me() call to be after setting available_markers_m1. * pthread_support.c (GC_allow_register_threads): Call GC_start_mark_threads(). * tests/middle.c (main): Likewise. * win32_threads.c (GC_allow_register_threads): Likewise. * pthread_support.c [PARALLEL_MARK] (pthread_create): Call GC_start_mark_threads() unless GC_parallel or available_markers_m1<=0. * win32_threads.c (START_MARK_THREADS): Define macro (to call GC_start_mark_threads() if PARALLEL_MARK). * win32_threads.c (GC_CreateThread): Call START_MARK_THREADS() (right before set_need_to_lock). * win32_threads.c [!CYGWIN32 && !MSWINCE && !MSWIN_XBOX1 && !NO_CRT] (GC_beginthreadex): Likewise. * win32_threads.c [GC_PTHREADS] (GC_pthread_create): Likewise.
-rw-r--r--doc/README.macros1
-rw-r--r--doc/scale.md6
-rw-r--r--include/gc.h38
-rw-r--r--include/gc_mark.h20
-rw-r--r--misc.c9
-rw-r--r--pthread_support.c43
-rw-r--r--tests/middle.c4
-rw-r--r--win32_threads.c36
8 files changed, 86 insertions, 71 deletions
diff --git a/doc/README.macros b/doc/README.macros
index f79f6e25..4fe29b46 100644
--- a/doc/README.macros
+++ b/doc/README.macros
@@ -419,7 +419,6 @@ GC_BUILTIN_ATOMIC Use GCC atomic intrinsics instead of libatomic_ops
primitives.
GC_ALWAYS_MULTITHREADED Force multi-threaded mode at GC initialization.
- (Turns GC_allow_register_threads into a no-op routine.)
GC_ENABLE_SUSPEND_THREAD (Linux only) Turn on thread suspend/resume API
support.
diff --git a/doc/scale.md b/doc/scale.md
index 2a3c6948..50760c50 100644
--- a/doc/scale.md
+++ b/doc/scale.md
@@ -38,8 +38,10 @@ to be used together.
itself, though not the allocation process. Currently the marking
is performed by the thread that triggered the collection, together with
_N_ - 1 dedicated threads, where _N_ is the number of processors detected
- by the collector. The dedicated threads are created once at initialization
- time (and optionally recreated in child processes after forking).
+ by the collector. The dedicated threads are created when the
+ client calls `GC_start_mark_threads()` or when the client starts the first
+ non-main thread after the GC initialization (or after fork operation in
+ a child process).
A second effect of this flag is to switch to a more concurrent
implementation of `GC_malloc_many`, so that free lists can be built, and
memory can be cleared, by more than one thread concurrently.
diff --git a/include/gc.h b/include/gc.h
index 081ae33a..429f6637 100644
--- a/include/gc.h
+++ b/include/gc.h
@@ -87,23 +87,20 @@ GC_API GC_word GC_CALL GC_get_gc_no(void);
/* avoid data races on multiprocessors. */
#ifdef GC_THREADS
+ /* GC is parallelized for performance on multiprocessors. Set to */
+ /* a non-zero value when client calls GC_start_mark_threads() */
+ /* directly or starts the first non-main thread, provided the */
+ /* collector is built with PARALLEL_MARK defined, and either */
+ /* GC_MARKERS (or GC_NPROCS) environment variable is set to a value */
+ /* bigger than 1, or multiple cores (processors) are available. */
+ /* If GC_parallel is on (non-zero), incremental collection is only */
+ /* partially functional, and may not be desirable. Starting from */
+ /* GC v7.3, after setting, GC_parallel value is equal to the number */
+ /* of marker threads minus one (i.e. the number of existing parallel */
+ /* marker threads excluding the initiating one). */
GC_API GC_ATTR_DEPRECATED int GC_parallel;
- /* GC is parallelized for performance on */
- /* multiprocessors. Set to a non-zero value */
- /* only implicitly if collector is built with */
- /* PARALLEL_MARK defined, and if either */
- /* GC_MARKERS (or GC_NPROCS) environment */
- /* variable is set to > 1, or multiple cores */
- /* (processors) are available. */
- /* If GC_parallel is on (non-zero), incremental */
- /* collection is only partially functional, */
- /* and may not be desirable. The getter does */
- /* not use or need synchronization (i.e. */
- /* acquiring the GC lock). Starting from */
- /* GC v7.3, GC_parallel value is equal to the */
- /* number of marker threads minus one (i.e. */
- /* number of existing parallel marker threads */
- /* excluding the initiating one). */
+
+ /* Return value of GC_parallel. Does not acquire the GC lock. */
GC_API int GC_CALL GC_get_parallel(void);
#endif
@@ -1459,9 +1456,11 @@ GC_API void * GC_CALL GC_call_with_stack_base(GC_stack_base_func /* fn */,
/* systems. Return -1 otherwise. */
GC_API int GC_CALL GC_get_thr_restart_signal(void);
- /* Restart marker threads after POSIX fork in child. Meaningless in */
- /* other situations. Should not be called if fork followed by exec. */
- /* Acquires the GC lock to avoid a data race. */
+ /* Start the parallel marker threads, if available. Useful, e.g., */
+ /* after POSIX fork in a child process (provided not followed by */
+ /* exec) or in single-threaded clients (provided it is OK for the */
+ /* client to perform marking in parallel). Acquires the GC lock to */
+ /* avoid a data race. */
GC_API void GC_CALL GC_start_mark_threads(void);
/* Explicitly enable GC_register_my_thread() invocation. */
@@ -1471,6 +1470,7 @@ GC_API void * GC_CALL GC_call_with_stack_base(GC_stack_base_func /* fn */,
/* must be called from the main (or any previously registered) thread */
/* between the collector initialization and the first explicit */
/* registering of a thread (it should be called as late as possible). */
+ /* Includes a GC_start_mark_threads() call. */
GC_API void GC_CALL GC_allow_register_threads(void);
/* Register the current thread, with the indicated stack base, as */
diff --git a/include/gc_mark.h b/include/gc_mark.h
index b5201cb7..5aed0beb 100644
--- a/include/gc_mark.h
+++ b/include/gc_mark.h
@@ -33,6 +33,14 @@
extern "C" {
#endif
+#define GC_PROC_BYTES 100
+
+#ifdef GC_BUILD
+ struct GC_ms_entry;
+#else
+ struct GC_ms_entry { void *opaque; };
+#endif
+
/* A client supplied mark procedure. Returns new mark stack pointer. */
/* Primary effect should be to push new entries on the mark stack. */
/* Mark stack pointer values are passed and returned explicitly. */
@@ -54,14 +62,10 @@
/* residing on a free list. Such objects are cleared, except for a */
/* free list link field in the first word. Thus mark procedures may */
/* not count on the presence of a type descriptor, and must handle this */
-/* case correctly somehow. */
-#define GC_PROC_BYTES 100
-
-#ifdef GC_BUILD
- struct GC_ms_entry;
-#else
- struct GC_ms_entry { void *opaque; };
-#endif
+/* case correctly somehow. Also, a mark procedure should be prepared */
+/* to be executed concurrently from the marker threads (the later ones */
+/* are created only if the client has called GC_start_mark_threads() */
+/* or started a user thread previously). */
typedef struct GC_ms_entry * (*GC_mark_proc)(GC_word * /* addr */,
struct GC_ms_entry * /* mark_stack_ptr */,
struct GC_ms_entry * /* mark_stack_limit */,
diff --git a/misc.c b/misc.c
index 64a055c6..c3ce49a9 100644
--- a/misc.c
+++ b/misc.c
@@ -1315,10 +1315,6 @@ GC_API void GC_CALL GC_init(void)
GC_is_initialized = TRUE;
# if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS)
GC_thr_init();
-# ifdef PARALLEL_MARK
- /* Actually start helper threads. */
- GC_start_mark_threads_inner();
-# endif
# endif
COND_DUMP;
/* Get black list set up and/or incremental GC started */
@@ -1416,9 +1412,7 @@ GC_API void GC_CALL GC_enable_incremental(void)
#if defined(THREADS)
GC_API void GC_CALL GC_start_mark_threads(void)
{
-# if defined(PARALLEL_MARK) && defined(CAN_HANDLE_FORK) \
- && !defined(THREAD_SANITIZER)
- /* TSan does not support threads creation in the child process. */
+# ifdef PARALLEL_MARK
IF_CANCEL(int cancel_state;)
DCL_LOCK_STATE;
@@ -2305,7 +2299,6 @@ GC_API GC_word GC_CALL GC_get_gc_no(void)
#ifdef THREADS
GC_API int GC_CALL GC_get_parallel(void)
{
- /* GC_parallel is initialized at start-up. */
return GC_parallel;
}
diff --git a/pthread_support.c b/pthread_support.c
index 43e1a6fb..e17e4c54 100644
--- a/pthread_support.c
+++ b/pthread_support.c
@@ -379,16 +379,17 @@ STATIC void * GC_mark_thread(void * id)
STATIC pthread_t GC_mark_threads[MAX_MARKERS];
+static int available_markers_m1 = 0;
+
#ifdef CAN_HANDLE_FORK
- static int available_markers_m1 = 0;
static pthread_cond_t mark_cv;
/* initialized by GC_start_mark_threads_inner */
- STATIC void GC_wait_for_gc_completion(GC_bool wait_for_all);
#else
-# define available_markers_m1 GC_markers_m1
static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
#endif
+STATIC void GC_wait_for_gc_completion(GC_bool wait_for_all);
+
GC_INNER void GC_start_mark_threads_inner(void)
{
int i;
@@ -399,12 +400,11 @@ GC_INNER void GC_start_mark_threads_inner(void)
GC_ASSERT(I_HOLD_LOCK());
ASSERT_CANCEL_DISABLED();
- if (available_markers_m1 <= 0) return;
+ if (available_markers_m1 <= 0 || GC_parallel) return;
/* Skip if parallel markers disabled or already started. */
-# ifdef CAN_HANDLE_FORK
- if (GC_parallel) return;
- GC_wait_for_gc_completion(TRUE);
+ GC_wait_for_gc_completion(TRUE);
+# ifdef CAN_HANDLE_FORK
/* Initialize mark_cv (for the first time), or cleanup its value */
/* after forking in the child process. All the marker threads in */
/* the parent process were blocked on this variable at fork, so */
@@ -463,10 +463,9 @@ GC_INNER void GC_start_mark_threads_inner(void)
}
# endif /* !NO_MARKER_SPECIAL_SIGMASK */
-# ifdef CAN_HANDLE_FORK
- /* To have proper GC_parallel value in GC_help_marker. */
- GC_markers_m1 = available_markers_m1;
-# endif
+ /* To have proper GC_parallel value in GC_help_marker. */
+ GC_markers_m1 = available_markers_m1;
+
for (i = 0; i < available_markers_m1; ++i) {
if (0 != REAL_FUNC(pthread_create)(GC_mark_threads + i, &attr,
GC_mark_thread, (void *)(word)i)) {
@@ -1117,17 +1116,20 @@ static void fork_parent_proc(void)
static void fork_child_proc(void)
{
GC_release_dirty_lock();
-# if defined(PARALLEL_MARK)
- if (GC_parallel)
+# ifdef PARALLEL_MARK
+ if (GC_parallel) {
GC_release_mark_lock();
+ /* Turn off parallel marking in the child, since we are probably */
+ /* just going to exec, and we would have to restart mark threads. */
+ GC_parallel = FALSE;
+ }
+# ifdef THREAD_SANITIZER
+ /* TSan does not support threads creation in the child process. */
+ available_markers_m1 = 0;
+# endif
# endif
/* Clean up the thread table, so that just our thread is left. */
GC_remove_all_threads_but_me();
-# ifdef PARALLEL_MARK
- /* Turn off parallel marking in the child, since we are probably */
- /* just going to exec, and we would have to restart mark threads. */
- GC_parallel = FALSE;
-# endif
# ifndef GC_DISABLE_INCREMENTAL
GC_dirty_update_child();
# endif
@@ -1832,6 +1834,7 @@ GC_API void GC_CALL GC_allow_register_threads(void)
UNLOCK();
# endif
INIT_REAL_SYMS(); /* to initialize symbols while single-threaded */
+ GC_start_mark_threads();
set_need_to_lock();
}
@@ -2024,6 +2027,10 @@ GC_INNER_PTHRSTART GC_thread GC_start_rtn_prepare_thread(
GC_log_printf("About to start new thread from thread %p\n",
(void *)pthread_self());
# endif
+# ifdef PARALLEL_MARK
+ if (EXPECT(!GC_parallel && available_markers_m1 > 0, FALSE))
+ GC_start_mark_threads();
+# endif
set_need_to_lock();
result = REAL_FUNC(pthread_create)(new_thread, attr, GC_start_routine,
&si);
diff --git a/tests/middle.c b/tests/middle.c
index 9a7cc3d0..c9f24c4e 100644
--- a/tests/middle.c
+++ b/tests/middle.c
@@ -18,6 +18,10 @@ int main (void)
(void)GC_malloc_atomic(4096);
(void)GC_malloc(4096);
}
+
+ /* Test delayed start of marker threads, if they are enabled. */
+ GC_start_mark_threads();
+
for (i = 0; i < 20000; ++i) {
(void)GC_malloc_atomic(2048);
(void)GC_malloc(2048);
diff --git a/win32_threads.c b/win32_threads.c
index 27a5b824..122eb2d2 100644
--- a/win32_threads.c
+++ b/win32_threads.c
@@ -784,6 +784,7 @@ GC_API void GC_CALL GC_allow_register_threads(void)
/* GC_init() does not call GC_init_parallel() in this case. */
parallel_initialized = TRUE;
# endif
+ GC_start_mark_threads();
set_need_to_lock();
}
@@ -1980,11 +1981,7 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
/* GC_mark_threads[] is unused here unlike that in pthread_support.c */
-# ifdef CAN_HANDLE_FORK
- static int available_markers_m1 = 0;
-# else
-# define available_markers_m1 GC_markers_m1
-# endif
+ static int available_markers_m1 = 0;
# ifdef GC_PTHREADS_PARAMARK
# include <pthread.h>
@@ -2015,12 +2012,11 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
GC_ASSERT(I_HOLD_LOCK());
ASSERT_CANCEL_DISABLED();
- if (available_markers_m1 <= 0) return;
+ if (available_markers_m1 <= 0 || GC_parallel) return;
/* Skip if parallel markers disabled or already started. */
-# ifdef CAN_HANDLE_FORK
- if (GC_parallel) return;
- GC_wait_for_gc_completion(TRUE);
+ GC_wait_for_gc_completion(TRUE);
+# ifdef CAN_HANDLE_FORK
/* Reset mark_cv state after forking (as in pthread_support.c). */
{
pthread_cond_t mark_cv_local = PTHREAD_COND_INITIALIZER;
@@ -2047,10 +2043,9 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
}
# endif /* !NO_MARKER_SPECIAL_SIGMASK */
-# ifdef CAN_HANDLE_FORK
- /* To have proper GC_parallel value in GC_help_marker. */
- GC_markers_m1 = available_markers_m1;
-# endif
+ /* To have proper GC_parallel value in GC_help_marker. */
+ GC_markers_m1 = available_markers_m1;
+
for (i = 0; i < available_markers_m1; ++i) {
marker_last_stack_min[i] = ADDR_LIMIT;
if (0 != pthread_create(&new_thread, &attr,
@@ -2190,11 +2185,13 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
GC_ASSERT(I_HOLD_LOCK());
ASSERT_CANCEL_DISABLED();
- if (available_markers_m1 <= 0) return;
+ if (available_markers_m1 <= 0 || GC_parallel) return;
+ GC_wait_for_gc_completion(TRUE);
GC_ASSERT(GC_fl_builder_count == 0);
/* Initialize GC_marker_cv[] fully before starting the */
/* first helper thread. */
+ GC_markers_m1 = available_markers_m1;
for (i = 0; i < GC_markers_m1; ++i) {
if ((GC_marker_cv[i] = CreateEvent(NULL /* attrs */,
TRUE /* isManualReset */,
@@ -2382,7 +2379,13 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
# endif /* ! GC_PTHREADS_PARAMARK */
-#endif /* PARALLEL_MARK */
+# define START_MARK_THREADS() \
+ if (EXPECT(GC_parallel || available_markers_m1 <= 0, TRUE)) {} \
+ else GC_start_mark_threads()
+#else
+
+# define START_MARK_THREADS() (void)0
+#endif /* !PARALLEL_MARK */
/* We have no DllMain to take care of new threads. Thus we */
/* must properly intercept thread creation. */
@@ -2472,6 +2475,7 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
GC_dirty(args);
REACHABLE_AFTER_DIRTY(lpParameter);
+ START_MARK_THREADS();
set_need_to_lock();
thread_h = CreateThread(lpThreadAttributes, dwStackSize, GC_win32_start,
args, dwCreationFlags, lpThreadId);
@@ -2526,6 +2530,7 @@ GC_INNER void GC_get_next_stack(char *start, char *limit,
GC_dirty(args);
REACHABLE_AFTER_DIRTY(arglist);
+ START_MARK_THREADS();
set_need_to_lock();
thread_h = _beginthreadex(security, stack_size,
(unsigned (__stdcall *)(void *))GC_win32_start,
@@ -2843,6 +2848,7 @@ GC_INNER void GC_thr_init(void)
(void *)GC_PTHREAD_PTRVAL(pthread_self()),
(long)GetCurrentThreadId());
# endif
+ START_MARK_THREADS();
set_need_to_lock();
result = pthread_create(new_thread, attr, GC_pthread_start, si);