summaryrefslogtreecommitdiff
path: root/include/private/gc_locks.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/private/gc_locks.h')
-rw-r--r--include/private/gc_locks.h362
1 files changed, 199 insertions, 163 deletions
diff --git a/include/private/gc_locks.h b/include/private/gc_locks.h
index e5f692a0..eed9f105 100644
--- a/include/private/gc_locks.h
+++ b/include/private/gc_locks.h
@@ -43,6 +43,7 @@
*
*/
# ifdef THREADS
+ void GC_noop1 GC_PROTO((word));
# ifdef PCR_OBSOLETE /* Faster, but broken with multiple lwp's */
# include "th/PCR_Th.h"
# include "th/PCR_ThCrSec.h"
@@ -82,126 +83,49 @@
# define LOCK() mutex_lock(&GC_allocate_ml);
# define UNLOCK() mutex_unlock(&GC_allocate_ml);
# endif
-# if defined(LINUX_THREADS)
-# define NO_THREAD (pthread_t)(-1)
-# if defined(I386)|| defined(POWERPC) || defined(ALPHA) || defined(IA64) \
- || defined(M68K) || defined(SPARC)
-# include <pthread.h>
-# if defined(PARALLEL_MARK)
- /* We need compare-and-swap to update mark bits, where it's */
- /* performance critical. If USE_MARK_BYTES is defined, it is */
- /* no longer needed for this purpose. However we use it in */
- /* either case to implement atomic fetch-and-add, though that's */
- /* less performance critical, and could perhaps be done with */
- /* a lock. */
-# if defined(GENERIC_COMPARE_AND_SWAP)
- /* Probably not useful, except for debugging. */
- extern pthread_mutex_t GC_compare_and_swap_lock;
- static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
- GC_word old, GC_word new_val)
- {
- GC_bool result;
- pthread_mutex_lock(&GC_compare_and_swap_lock);
- if (*addr == old) {
- *addr = new_val;
- result = TRUE;
- } else {
- result = FALSE;
- }
- pthread_mutex_unlock(&GC_compare_and_swap_lock);
- return result;
- }
-# endif /* GENERIC_COMPARE_AND_SWAP */
-# if defined(I386)
-# if !defined(GENERIC_COMPARE_AND_SWAP)
- /* Returns TRUE if the comparison succeeded. */
- inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
- GC_word old,
- GC_word new_val)
- {
- char result;
- __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1"
- : "=m"(*(addr)), "=r"(result)
- : "r" (new_val), "0"(*(addr)), "a"(old));
- return (GC_bool) result;
- }
-# endif /* !GENERIC_COMPARE_AND_SWAP */
- inline static void GC_memory_barrier()
- {
- /* We believe the processor ensures at least processor */
- /* consistent ordering. Thus a compiler barrier */
- /* should suffice. */
- __asm__ __volatile__("" : : : "memory");
- }
-# endif
-# if defined(IA64)
-# if !defined(GENERIC_COMPARE_AND_SWAP)
- inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
- GC_word old, GC_word new_val)
- {
- unsigned long oldval;
- __asm__ __volatile__("mov ar.ccv=%4 ;; cmpxchg8.rel %0=%1,%2,ar.ccv"
- : "=r"(oldval), "=m"(*addr)
- : "r"(new_val), "1"(*addr), "r"(old));
- return (oldval == old);
- }
-# endif /* !GENERIC_COMPARE_AND_SWAP */
- inline static void GC_memory_barrier()
- {
- __asm__ __volatile__("mf" : : : "memory");
- }
-# endif /* IA64 */
- /* Returns the original value of *addr. */
- inline static GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much)
- {
- GC_word old;
- do {
- old = *addr;
- } while (!GC_compare_and_exchange(addr, old, old+how_much));
- return old;
- }
-# endif /* PARALLEL_MARK */
-# ifndef THREAD_LOCAL_ALLOC
- /* In the THREAD_LOCAL_ALLOC case, the allocation lock tends to */
- /* be held for long periods, if it is held at all. Thus spinning */
- /* and sleeping for fixed periods are likely to result in */
- /* significant wasted time. We thus rely mostly on queued locks. */
-# define USE_SPIN_LOCK
-# if defined(I386)
+/* Try to define GC_TEST_AND_SET and a matching GC_CLEAR for spin lock */
+/* acquisition and release. We need this for correct operation of the */
+/* incremental GC. */
+# ifdef __GNUC__
+# if defined(I386)
inline static int GC_test_and_set(volatile unsigned int *addr) {
int oldval;
/* Note: the "xchg" instruction does not need a "lock" prefix */
__asm__ __volatile__("xchgl %0, %1"
: "=r"(oldval), "=m"(*(addr))
- : "0"(1), "m"(*(addr)));
+ : "0"(1), "m"(*(addr)) : "memory");
return oldval;
}
-# endif
-# if defined(IA64)
+# define GC_TEST_AND_SET_DEFINED
+# endif
+# if defined(IA64)
inline static int GC_test_and_set(volatile unsigned int *addr) {
long oldval, n = 1;
__asm__ __volatile__("xchg4 %0=%1,%2"
: "=r"(oldval), "=m"(*addr)
- : "r"(n), "1"(*addr));
+ : "r"(n), "1"(*addr) : "memory");
return oldval;
}
+# define GC_TEST_AND_SET_DEFINED
+ /* Should this handle post-increment addressing?? */
inline static void GC_clear(volatile unsigned int *addr) {
- __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr));
+ __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr) : : "memory");
}
# define GC_CLEAR_DEFINED
-# endif
-# ifdef SPARC
+# endif
+# ifdef SPARC
inline static int GC_test_and_set(volatile unsigned int *addr) {
int oldval;
__asm__ __volatile__("ldstub %1,%0"
: "=r"(oldval), "=m"(*addr)
- : "m"(*addr));
+ : "m"(*addr) : "memory");
return oldval;
}
-# endif
-# ifdef M68K
+# define GC_TEST_AND_SET_DEFINED
+# endif
+# ifdef M68K
/* Contributed by Tony Mantler. I'm not sure how well it was */
/* tested. */
inline static int GC_test_and_set(volatile unsigned int *addr) {
@@ -213,11 +137,12 @@
__asm__ __volatile__(
"tas %1@; sne %0; negb %0"
: "=d" (oldval)
- : "a" (addr));
+ : "a" (addr) : "memory");
return oldval;
}
-# endif
-# if defined(POWERPC)
+# define GC_TEST_AND_SET_DEFINED
+# endif
+# if defined(POWERPC)
inline static int GC_test_and_set(volatile unsigned int *addr) {
int oldval;
int temp = 1; // locked value
@@ -234,13 +159,14 @@
: "memory");
return (int)oldval;
}
+# define GC_TEST_AND_SET_DEFINED
inline static void GC_clear(volatile unsigned int *addr) {
- __asm__ __volatile__("eieio");
+ __asm__ __volatile__("eieio" ::: "memory");
*(addr) = 0;
}
# define GC_CLEAR_DEFINED
-# endif
-# ifdef ALPHA
+# endif
+# if defined(ALPHA)
inline static int GC_test_and_set(volatile unsigned int * addr)
{
unsigned long oldvalue;
@@ -259,14 +185,16 @@
"3: br 1b\n"
".previous"
:"=&r" (temp), "=m" (*addr), "=&r" (oldvalue)
- :"Ir" (1), "m" (*addr));
+ :"Ir" (1), "m" (*addr)
+ :"memory");
return oldvalue;
}
+# define GC_TEST_AND_SET_DEFINED
/* Should probably also define GC_clear, since it needs */
/* a memory barrier ?? */
-# endif /* ALPHA */
-# ifdef ARM32
+# endif /* ALPHA */
+# ifdef ARM32
inline static int GC_test_and_set(volatile unsigned int *addr) {
int oldval;
/* SWP on ARM is very similar to XCHG on x86. Doesn't lock the
@@ -275,18 +203,154 @@
/* See linuxthreads/sysdeps/arm/pt-machine.h in glibc-2.1 */
__asm__ __volatile__("swp %0, %1, [%2]"
: "=r"(oldval)
- : "r"(1), "r"(addr));
+ : "r"(1), "r"(addr)
+ : "memory");
return oldval;
}
-# endif /* ARM32 */
-# ifndef GC_CLEAR_DEFINED
- inline static void GC_clear(volatile unsigned int *addr) {
- /* Try to discourage gcc from moving anything past this. */
- __asm__ __volatile__(" ");
- *(addr) = 0;
+# define GC_TEST_AND_SET_DEFINED
+# endif /* ARM32 */
+# endif /* __GNUC__ */
+# if (defined(ALPHA) && !defined(__GNUC__))
+# define GC_test_and_set(addr) __cxx_test_and_set_atomic(addr, 1)
+# define GC_TEST_AND_SET_DEFINED
+# endif
+# if defined(MSWIN32)
+# define GC_test_and_set(addr) InterlockedExchange((LPLONG)addr,1)
+# define GC_TEST_AND_SET_DEFINED
+# endif
+# ifdef MIPS
+# if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \
+ || !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700
+# define GC_test_and_set(addr, v) test_and_set(addr,v)
+# else
+# define GC_test_and_set(addr, v) __test_and_set(addr,v)
+# define GC_clear(addr) __lock_release(addr);
+# define GC_CLEAR_DEFINED
+# endif
+# define GC_TEST_AND_SET_DEFINED
+# endif /* MIPS */
+# if 0 /* defined(HP_PA) */
+ /* The official recommendation seems to be to not use ldcw from */
+ /* user mode. Since multithreaded incremental collection doesn't */
+ /* work anyway on HP_PA, this shouldn't be a major loss. */
+
+ /* "set" means 0 and "clear" means 1 here. */
+# define GC_test_and_set(addr) !GC_test_and_clear(addr);
+# define GC_TEST_AND_SET_DEFINED
+# define GC_clear(addr) GC_noop1((word)(addr)); *(volatile unsigned int *)addr = 1;
+ /* The above needs a memory barrier! */
+# define GC_CLEAR_DEFINED
+# endif
+# if defined(GC_TEST_AND_SET_DEFINED) && !defined(GC_CLEAR_DEFINED)
+# ifdef __GNUC__
+ inline static void GC_clear(volatile unsigned int *addr) {
+ /* Try to discourage gcc from moving anything past this. */
+ __asm__ __volatile__(" " : : : "memory");
+ *(addr) = 0;
+ }
+# else
+ /* The function call in the following should prevent the */
+ /* compiler from moving assignments to below the UNLOCK. */
+# define GC_clear(addr) GC_noop1((word)(addr)); \
+ *((volatile unsigned int *)(addr)) = 0;
+# endif
+# define GC_CLEAR_DEFINED
+# endif /* !GC_CLEAR_DEFINED */
+
+# if !defined(GC_TEST_AND_SET_DEFINED)
+# define USE_PTHREAD_LOCKS
+# endif
+
+# if defined(LINUX_THREADS) || defined(OSF1_THREADS) \
+ || defined(HPUX_THREADS)
+# define NO_THREAD (pthread_t)(-1)
+# include <pthread.h>
+# if defined(PARALLEL_MARK)
+ /* We need compare-and-swap to update mark bits, where it's */
+ /* performance critical. If USE_MARK_BYTES is defined, it is */
+ /* no longer needed for this purpose. However we use it in */
+ /* either case to implement atomic fetch-and-add, though that's */
+ /* less performance critical, and could perhaps be done with */
+ /* a lock. */
+# if defined(GENERIC_COMPARE_AND_SWAP)
+ /* Probably not useful, except for debugging. */
+ /* We do use GENERIC_COMPARE_AND_SWAP on PA_RISC, but we */
+ /* minimize its use. */
+ extern pthread_mutex_t GC_compare_and_swap_lock;
+
+ /* Note that if GC_word updates are not atomic, a concurrent */
+ /* reader should acquire GC_compare_and_swap_lock. On */
+ /* currently supported platforms, such updates are atomic. */
+ extern GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+ GC_word old, GC_word new_val);
+# endif /* GENERIC_COMPARE_AND_SWAP */
+# if defined(I386)
+# if !defined(GENERIC_COMPARE_AND_SWAP)
+ /* Returns TRUE if the comparison succeeded. */
+ inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+ GC_word old,
+ GC_word new_val)
+ {
+ char result;
+ __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1"
+ : "=m"(*(addr)), "=r"(result)
+ : "r" (new_val), "0"(*(addr)), "a"(old) : "memory");
+ return (GC_bool) result;
+ }
+# endif /* !GENERIC_COMPARE_AND_SWAP */
+ inline static void GC_memory_write_barrier()
+ {
+ /* We believe the processor ensures at least processor */
+ /* consistent ordering. Thus a compiler barrier */
+ /* should suffice. */
+ __asm__ __volatile__("" : : : "memory");
+ }
+# endif /* I386 */
+# if defined(IA64)
+# if !defined(GENERIC_COMPARE_AND_SWAP)
+ inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+ GC_word old, GC_word new_val)
+ {
+ unsigned long oldval;
+ __asm__ __volatile__("mov ar.ccv=%4 ;; cmpxchg8.rel %0=%1,%2,ar.ccv"
+ : "=r"(oldval), "=m"(*addr)
+ : "r"(new_val), "1"(*addr), "r"(old) : "memory");
+ return (oldval == old);
}
-# endif /* !GC_CLEAR_DEFINED */
+# endif /* !GENERIC_COMPARE_AND_SWAP */
+# if 0
+ /* Shouldn't be needed; we use volatile stores instead. */
+ inline static void GC_memory_write_barrier()
+ {
+ __asm__ __volatile__("mf" : : : "memory");
+ }
+# endif /* 0 */
+# endif /* IA64 */
+# if !defined(GENERIC_COMPARE_AND_SWAP)
+ /* Returns the original value of *addr. */
+ inline static GC_word GC_atomic_add(volatile GC_word *addr,
+ GC_word how_much)
+ {
+ GC_word old;
+ do {
+ old = *addr;
+ } while (!GC_compare_and_exchange(addr, old, old+how_much));
+ return old;
+ }
+# else /* GENERIC_COMPARE_AND_SWAP */
+ /* So long as a GC_word can be atomically updated, it should */
+ /* be OK to read *addr without a lock. */
+ extern GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much);
+# endif /* GENERIC_COMPARE_AND_SWAP */
+# endif /* PARALLEL_MARK */
+
+# if !defined(THREAD_LOCAL_ALLOC) && !defined(USE_PTHREAD_LOCKS)
+ /* In the THREAD_LOCAL_ALLOC case, the allocation lock tends to */
+ /* be held for long periods, if it is held at all. Thus spinning */
+ /* and sleeping for fixed periods are likely to result in */
+ /* significant wasted time. We thus rely mostly on queued locks. */
+# define USE_SPIN_LOCK
extern volatile unsigned int GC_allocate_lock;
extern void GC_lock(void);
/* Allocation lock holder. Only set if acquired by client through */
@@ -304,13 +368,23 @@
# define UNLOCK() \
GC_clear(&GC_allocate_lock)
# endif /* !GC_ASSERTIONS */
-# else /* THREAD_LOCAL_ALLOC */
-# define USE_PTHREAD_LOCKS
+# if 0
+ /* Another alternative for OSF1 might be: */
+# include <sys/mman.h>
+ extern msemaphore GC_allocate_semaphore;
+# define LOCK() { if (msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) \
+ != 0) GC_lock(); else GC_allocate_lock = 1; }
+ /* The following is INCORRECT, since the memory model is too weak. */
+ /* Is this true? Presumably msem_unlock has the right semantics? */
+ /* - HB */
+# define UNLOCK() { GC_allocate_lock = 0; \
+ msem_unlock(&GC_allocate_semaphore, 0); }
+# endif /* 0 */
+# else /* THREAD_LOCAL_ALLOC || USE_PTHREAD_LOCKS */
+# ifndef USE_PTHREAD_LOCKS
+# define USE_PTHREAD_LOCKS
+# endif
# endif /* THREAD_LOCAL_ALLOC */
-# else /* LINUX_THREADS on hardware for which we don't know how */
- /* to do test and set. */
-# define USE_PTHREAD_LOCKS
-# endif /* ! known hardware */
# ifdef USE_PTHREAD_LOCKS
# include <pthread.h>
extern pthread_mutex_t GC_allocate_ml;
@@ -338,33 +412,13 @@
# ifdef GC_ASSERTIONS
extern pthread_t GC_mark_lock_holder;
# endif
-# endif /* LINUX_THREADS */
-# if defined(HPUX_THREADS)
-# include <pthread.h>
- extern pthread_mutex_t GC_allocate_ml;
-# define NO_THREAD (pthread_t)(-1)
-# define LOCK() pthread_mutex_lock(&GC_allocate_ml)
-# define UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
-# endif
+# endif /* LINUX_THREADS || OSF1_THREADS || HPUX_THREADS */
# if defined(IRIX_THREADS)
- /* This may also eventually be appropriate for HPUX_THREADS */
# include <pthread.h>
-# ifndef HPUX_THREADS
- /* This probably should never be included, but I can't test */
- /* on Irix anymore. */
-# include <mutex.h>
-# endif
+ /* This probably should never be included, but I can't test */
+ /* on Irix anymore. */
+# include <mutex.h>
-# ifndef HPUX_THREADS
-# if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \
- || !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700
-# define GC_test_and_set(addr, v) test_and_set(addr,v)
-# else
-# define GC_test_and_set(addr, v) __test_and_set(addr,v)
-# endif
-# else
- /* I couldn't find a way to do this inline on HP/UX */
-# endif
extern unsigned long GC_allocate_lock;
/* This is not a mutex because mutexes that obey the (optional) */
/* POSIX scheduling rules are subject to convoys in high contention */
@@ -377,26 +431,8 @@
# define NO_THREAD (pthread_t)(-1)
# define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
# define I_HOLD_LOCK() (pthread_equal(GC_lock_holder, pthread_self()))
-# ifdef HPUX_THREADS
-# define LOCK() { if (!GC_test_and_clear(&GC_allocate_lock)) GC_lock(); }
- /* The following is INCORRECT, since the memory model is too weak. */
-# define UNLOCK() { GC_noop1(&GC_allocate_lock); \
- *(volatile unsigned long *)(&GC_allocate_lock) = 1; }
-# else
-# define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); }
-# if __mips >= 3 && (defined (_ABIN32) || defined(_ABI64)) \
- && defined(_COMPILER_VERSION) && _COMPILER_VERSION >= 700
-# define UNLOCK() __lock_release(&GC_allocate_lock)
-# else
- /* The function call in the following should prevent the */
- /* compiler from moving assignments to below the UNLOCK. */
- /* This is probably not necessary for ucode or gcc 2.8. */
- /* It may be necessary for Ragnarok and future gcc */
- /* versions. */
-# define UNLOCK() { GC_noop1(&GC_allocate_lock); \
- *(volatile unsigned long *)(&GC_allocate_lock) = 0; }
-# endif
-# endif
+# define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); }
+# define UNLOCK() GC_clear(&GC_allocate_lock);
extern VOLATILE GC_bool GC_collecting;
# define ENTER_GC() \
{ \