diff options
Diffstat (limited to 'include/private/gc_locks.h')
-rw-r--r-- | include/private/gc_locks.h | 362 |
1 files changed, 199 insertions, 163 deletions
diff --git a/include/private/gc_locks.h b/include/private/gc_locks.h index e5f692a0..eed9f105 100644 --- a/include/private/gc_locks.h +++ b/include/private/gc_locks.h @@ -43,6 +43,7 @@ * */ # ifdef THREADS + void GC_noop1 GC_PROTO((word)); # ifdef PCR_OBSOLETE /* Faster, but broken with multiple lwp's */ # include "th/PCR_Th.h" # include "th/PCR_ThCrSec.h" @@ -82,126 +83,49 @@ # define LOCK() mutex_lock(&GC_allocate_ml); # define UNLOCK() mutex_unlock(&GC_allocate_ml); # endif -# if defined(LINUX_THREADS) -# define NO_THREAD (pthread_t)(-1) -# if defined(I386)|| defined(POWERPC) || defined(ALPHA) || defined(IA64) \ - || defined(M68K) || defined(SPARC) -# include <pthread.h> -# if defined(PARALLEL_MARK) - /* We need compare-and-swap to update mark bits, where it's */ - /* performance critical. If USE_MARK_BYTES is defined, it is */ - /* no longer needed for this purpose. However we use it in */ - /* either case to implement atomic fetch-and-add, though that's */ - /* less performance critical, and could perhaps be done with */ - /* a lock. */ -# if defined(GENERIC_COMPARE_AND_SWAP) - /* Probably not useful, except for debugging. */ - extern pthread_mutex_t GC_compare_and_swap_lock; - static GC_bool GC_compare_and_exchange(volatile GC_word *addr, - GC_word old, GC_word new_val) - { - GC_bool result; - pthread_mutex_lock(&GC_compare_and_swap_lock); - if (*addr == old) { - *addr = new_val; - result = TRUE; - } else { - result = FALSE; - } - pthread_mutex_unlock(&GC_compare_and_swap_lock); - return result; - } -# endif /* GENERIC_COMPARE_AND_SWAP */ -# if defined(I386) -# if !defined(GENERIC_COMPARE_AND_SWAP) - /* Returns TRUE if the comparison succeeded. */ - inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr, - GC_word old, - GC_word new_val) - { - char result; - __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" - : "=m"(*(addr)), "=r"(result) - : "r" (new_val), "0"(*(addr)), "a"(old)); - return (GC_bool) result; - } -# endif /* !GENERIC_COMPARE_AND_SWAP */ - inline static void GC_memory_barrier() - { - /* We believe the processor ensures at least processor */ - /* consistent ordering. Thus a compiler barrier */ - /* should suffice. */ - __asm__ __volatile__("" : : : "memory"); - } -# endif -# if defined(IA64) -# if !defined(GENERIC_COMPARE_AND_SWAP) - inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr, - GC_word old, GC_word new_val) - { - unsigned long oldval; - __asm__ __volatile__("mov ar.ccv=%4 ;; cmpxchg8.rel %0=%1,%2,ar.ccv" - : "=r"(oldval), "=m"(*addr) - : "r"(new_val), "1"(*addr), "r"(old)); - return (oldval == old); - } -# endif /* !GENERIC_COMPARE_AND_SWAP */ - inline static void GC_memory_barrier() - { - __asm__ __volatile__("mf" : : : "memory"); - } -# endif /* IA64 */ - /* Returns the original value of *addr. */ - inline static GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much) - { - GC_word old; - do { - old = *addr; - } while (!GC_compare_and_exchange(addr, old, old+how_much)); - return old; - } -# endif /* PARALLEL_MARK */ -# ifndef THREAD_LOCAL_ALLOC - /* In the THREAD_LOCAL_ALLOC case, the allocation lock tends to */ - /* be held for long periods, if it is held at all. Thus spinning */ - /* and sleeping for fixed periods are likely to result in */ - /* significant wasted time. We thus rely mostly on queued locks. */ -# define USE_SPIN_LOCK -# if defined(I386) +/* Try to define GC_TEST_AND_SET and a matching GC_CLEAR for spin lock */ +/* acquisition and release. We need this for correct operation of the */ +/* incremental GC. */ +# ifdef __GNUC__ +# if defined(I386) inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; /* Note: the "xchg" instruction does not need a "lock" prefix */ __asm__ __volatile__("xchgl %0, %1" : "=r"(oldval), "=m"(*(addr)) - : "0"(1), "m"(*(addr))); + : "0"(1), "m"(*(addr)) : "memory"); return oldval; } -# endif -# if defined(IA64) +# define GC_TEST_AND_SET_DEFINED +# endif +# if defined(IA64) inline static int GC_test_and_set(volatile unsigned int *addr) { long oldval, n = 1; __asm__ __volatile__("xchg4 %0=%1,%2" : "=r"(oldval), "=m"(*addr) - : "r"(n), "1"(*addr)); + : "r"(n), "1"(*addr) : "memory"); return oldval; } +# define GC_TEST_AND_SET_DEFINED + /* Should this handle post-increment addressing?? */ inline static void GC_clear(volatile unsigned int *addr) { - __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr)); + __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr) : : "memory"); } # define GC_CLEAR_DEFINED -# endif -# ifdef SPARC +# endif +# ifdef SPARC inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; __asm__ __volatile__("ldstub %1,%0" : "=r"(oldval), "=m"(*addr) - : "m"(*addr)); + : "m"(*addr) : "memory"); return oldval; } -# endif -# ifdef M68K +# define GC_TEST_AND_SET_DEFINED +# endif +# ifdef M68K /* Contributed by Tony Mantler. I'm not sure how well it was */ /* tested. */ inline static int GC_test_and_set(volatile unsigned int *addr) { @@ -213,11 +137,12 @@ __asm__ __volatile__( "tas %1@; sne %0; negb %0" : "=d" (oldval) - : "a" (addr)); + : "a" (addr) : "memory"); return oldval; } -# endif -# if defined(POWERPC) +# define GC_TEST_AND_SET_DEFINED +# endif +# if defined(POWERPC) inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; int temp = 1; // locked value @@ -234,13 +159,14 @@ : "memory"); return (int)oldval; } +# define GC_TEST_AND_SET_DEFINED inline static void GC_clear(volatile unsigned int *addr) { - __asm__ __volatile__("eieio"); + __asm__ __volatile__("eieio" ::: "memory"); *(addr) = 0; } # define GC_CLEAR_DEFINED -# endif -# ifdef ALPHA +# endif +# if defined(ALPHA) inline static int GC_test_and_set(volatile unsigned int * addr) { unsigned long oldvalue; @@ -259,14 +185,16 @@ "3: br 1b\n" ".previous" :"=&r" (temp), "=m" (*addr), "=&r" (oldvalue) - :"Ir" (1), "m" (*addr)); + :"Ir" (1), "m" (*addr) + :"memory"); return oldvalue; } +# define GC_TEST_AND_SET_DEFINED /* Should probably also define GC_clear, since it needs */ /* a memory barrier ?? */ -# endif /* ALPHA */ -# ifdef ARM32 +# endif /* ALPHA */ +# ifdef ARM32 inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; /* SWP on ARM is very similar to XCHG on x86. Doesn't lock the @@ -275,18 +203,154 @@ /* See linuxthreads/sysdeps/arm/pt-machine.h in glibc-2.1 */ __asm__ __volatile__("swp %0, %1, [%2]" : "=r"(oldval) - : "r"(1), "r"(addr)); + : "r"(1), "r"(addr) + : "memory"); return oldval; } -# endif /* ARM32 */ -# ifndef GC_CLEAR_DEFINED - inline static void GC_clear(volatile unsigned int *addr) { - /* Try to discourage gcc from moving anything past this. */ - __asm__ __volatile__(" "); - *(addr) = 0; +# define GC_TEST_AND_SET_DEFINED +# endif /* ARM32 */ +# endif /* __GNUC__ */ +# if (defined(ALPHA) && !defined(__GNUC__)) +# define GC_test_and_set(addr) __cxx_test_and_set_atomic(addr, 1) +# define GC_TEST_AND_SET_DEFINED +# endif +# if defined(MSWIN32) +# define GC_test_and_set(addr) InterlockedExchange((LPLONG)addr,1) +# define GC_TEST_AND_SET_DEFINED +# endif +# ifdef MIPS +# if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \ + || !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700 +# define GC_test_and_set(addr, v) test_and_set(addr,v) +# else +# define GC_test_and_set(addr, v) __test_and_set(addr,v) +# define GC_clear(addr) __lock_release(addr); +# define GC_CLEAR_DEFINED +# endif +# define GC_TEST_AND_SET_DEFINED +# endif /* MIPS */ +# if 0 /* defined(HP_PA) */ + /* The official recommendation seems to be to not use ldcw from */ + /* user mode. Since multithreaded incremental collection doesn't */ + /* work anyway on HP_PA, this shouldn't be a major loss. */ + + /* "set" means 0 and "clear" means 1 here. */ +# define GC_test_and_set(addr) !GC_test_and_clear(addr); +# define GC_TEST_AND_SET_DEFINED +# define GC_clear(addr) GC_noop1((word)(addr)); *(volatile unsigned int *)addr = 1; + /* The above needs a memory barrier! */ +# define GC_CLEAR_DEFINED +# endif +# if defined(GC_TEST_AND_SET_DEFINED) && !defined(GC_CLEAR_DEFINED) +# ifdef __GNUC__ + inline static void GC_clear(volatile unsigned int *addr) { + /* Try to discourage gcc from moving anything past this. */ + __asm__ __volatile__(" " : : : "memory"); + *(addr) = 0; + } +# else + /* The function call in the following should prevent the */ + /* compiler from moving assignments to below the UNLOCK. */ +# define GC_clear(addr) GC_noop1((word)(addr)); \ + *((volatile unsigned int *)(addr)) = 0; +# endif +# define GC_CLEAR_DEFINED +# endif /* !GC_CLEAR_DEFINED */ + +# if !defined(GC_TEST_AND_SET_DEFINED) +# define USE_PTHREAD_LOCKS +# endif + +# if defined(LINUX_THREADS) || defined(OSF1_THREADS) \ + || defined(HPUX_THREADS) +# define NO_THREAD (pthread_t)(-1) +# include <pthread.h> +# if defined(PARALLEL_MARK) + /* We need compare-and-swap to update mark bits, where it's */ + /* performance critical. If USE_MARK_BYTES is defined, it is */ + /* no longer needed for this purpose. However we use it in */ + /* either case to implement atomic fetch-and-add, though that's */ + /* less performance critical, and could perhaps be done with */ + /* a lock. */ +# if defined(GENERIC_COMPARE_AND_SWAP) + /* Probably not useful, except for debugging. */ + /* We do use GENERIC_COMPARE_AND_SWAP on PA_RISC, but we */ + /* minimize its use. */ + extern pthread_mutex_t GC_compare_and_swap_lock; + + /* Note that if GC_word updates are not atomic, a concurrent */ + /* reader should acquire GC_compare_and_swap_lock. On */ + /* currently supported platforms, such updates are atomic. */ + extern GC_bool GC_compare_and_exchange(volatile GC_word *addr, + GC_word old, GC_word new_val); +# endif /* GENERIC_COMPARE_AND_SWAP */ +# if defined(I386) +# if !defined(GENERIC_COMPARE_AND_SWAP) + /* Returns TRUE if the comparison succeeded. */ + inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr, + GC_word old, + GC_word new_val) + { + char result; + __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" + : "=m"(*(addr)), "=r"(result) + : "r" (new_val), "0"(*(addr)), "a"(old) : "memory"); + return (GC_bool) result; + } +# endif /* !GENERIC_COMPARE_AND_SWAP */ + inline static void GC_memory_write_barrier() + { + /* We believe the processor ensures at least processor */ + /* consistent ordering. Thus a compiler barrier */ + /* should suffice. */ + __asm__ __volatile__("" : : : "memory"); + } +# endif /* I386 */ +# if defined(IA64) +# if !defined(GENERIC_COMPARE_AND_SWAP) + inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr, + GC_word old, GC_word new_val) + { + unsigned long oldval; + __asm__ __volatile__("mov ar.ccv=%4 ;; cmpxchg8.rel %0=%1,%2,ar.ccv" + : "=r"(oldval), "=m"(*addr) + : "r"(new_val), "1"(*addr), "r"(old) : "memory"); + return (oldval == old); } -# endif /* !GC_CLEAR_DEFINED */ +# endif /* !GENERIC_COMPARE_AND_SWAP */ +# if 0 + /* Shouldn't be needed; we use volatile stores instead. */ + inline static void GC_memory_write_barrier() + { + __asm__ __volatile__("mf" : : : "memory"); + } +# endif /* 0 */ +# endif /* IA64 */ +# if !defined(GENERIC_COMPARE_AND_SWAP) + /* Returns the original value of *addr. */ + inline static GC_word GC_atomic_add(volatile GC_word *addr, + GC_word how_much) + { + GC_word old; + do { + old = *addr; + } while (!GC_compare_and_exchange(addr, old, old+how_much)); + return old; + } +# else /* GENERIC_COMPARE_AND_SWAP */ + /* So long as a GC_word can be atomically updated, it should */ + /* be OK to read *addr without a lock. */ + extern GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much); +# endif /* GENERIC_COMPARE_AND_SWAP */ +# endif /* PARALLEL_MARK */ + +# if !defined(THREAD_LOCAL_ALLOC) && !defined(USE_PTHREAD_LOCKS) + /* In the THREAD_LOCAL_ALLOC case, the allocation lock tends to */ + /* be held for long periods, if it is held at all. Thus spinning */ + /* and sleeping for fixed periods are likely to result in */ + /* significant wasted time. We thus rely mostly on queued locks. */ +# define USE_SPIN_LOCK extern volatile unsigned int GC_allocate_lock; extern void GC_lock(void); /* Allocation lock holder. Only set if acquired by client through */ @@ -304,13 +368,23 @@ # define UNLOCK() \ GC_clear(&GC_allocate_lock) # endif /* !GC_ASSERTIONS */ -# else /* THREAD_LOCAL_ALLOC */ -# define USE_PTHREAD_LOCKS +# if 0 + /* Another alternative for OSF1 might be: */ +# include <sys/mman.h> + extern msemaphore GC_allocate_semaphore; +# define LOCK() { if (msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) \ + != 0) GC_lock(); else GC_allocate_lock = 1; } + /* The following is INCORRECT, since the memory model is too weak. */ + /* Is this true? Presumably msem_unlock has the right semantics? */ + /* - HB */ +# define UNLOCK() { GC_allocate_lock = 0; \ + msem_unlock(&GC_allocate_semaphore, 0); } +# endif /* 0 */ +# else /* THREAD_LOCAL_ALLOC || USE_PTHREAD_LOCKS */ +# ifndef USE_PTHREAD_LOCKS +# define USE_PTHREAD_LOCKS +# endif # endif /* THREAD_LOCAL_ALLOC */ -# else /* LINUX_THREADS on hardware for which we don't know how */ - /* to do test and set. */ -# define USE_PTHREAD_LOCKS -# endif /* ! known hardware */ # ifdef USE_PTHREAD_LOCKS # include <pthread.h> extern pthread_mutex_t GC_allocate_ml; @@ -338,33 +412,13 @@ # ifdef GC_ASSERTIONS extern pthread_t GC_mark_lock_holder; # endif -# endif /* LINUX_THREADS */ -# if defined(HPUX_THREADS) -# include <pthread.h> - extern pthread_mutex_t GC_allocate_ml; -# define NO_THREAD (pthread_t)(-1) -# define LOCK() pthread_mutex_lock(&GC_allocate_ml) -# define UNLOCK() pthread_mutex_unlock(&GC_allocate_ml) -# endif +# endif /* LINUX_THREADS || OSF1_THREADS || HPUX_THREADS */ # if defined(IRIX_THREADS) - /* This may also eventually be appropriate for HPUX_THREADS */ # include <pthread.h> -# ifndef HPUX_THREADS - /* This probably should never be included, but I can't test */ - /* on Irix anymore. */ -# include <mutex.h> -# endif + /* This probably should never be included, but I can't test */ + /* on Irix anymore. */ +# include <mutex.h> -# ifndef HPUX_THREADS -# if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \ - || !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700 -# define GC_test_and_set(addr, v) test_and_set(addr,v) -# else -# define GC_test_and_set(addr, v) __test_and_set(addr,v) -# endif -# else - /* I couldn't find a way to do this inline on HP/UX */ -# endif extern unsigned long GC_allocate_lock; /* This is not a mutex because mutexes that obey the (optional) */ /* POSIX scheduling rules are subject to convoys in high contention */ @@ -377,26 +431,8 @@ # define NO_THREAD (pthread_t)(-1) # define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD # define I_HOLD_LOCK() (pthread_equal(GC_lock_holder, pthread_self())) -# ifdef HPUX_THREADS -# define LOCK() { if (!GC_test_and_clear(&GC_allocate_lock)) GC_lock(); } - /* The following is INCORRECT, since the memory model is too weak. */ -# define UNLOCK() { GC_noop1(&GC_allocate_lock); \ - *(volatile unsigned long *)(&GC_allocate_lock) = 1; } -# else -# define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); } -# if __mips >= 3 && (defined (_ABIN32) || defined(_ABI64)) \ - && defined(_COMPILER_VERSION) && _COMPILER_VERSION >= 700 -# define UNLOCK() __lock_release(&GC_allocate_lock) -# else - /* The function call in the following should prevent the */ - /* compiler from moving assignments to below the UNLOCK. */ - /* This is probably not necessary for ucode or gcc 2.8. */ - /* It may be necessary for Ragnarok and future gcc */ - /* versions. */ -# define UNLOCK() { GC_noop1(&GC_allocate_lock); \ - *(volatile unsigned long *)(&GC_allocate_lock) = 0; } -# endif -# endif +# define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); } +# define UNLOCK() GC_clear(&GC_allocate_lock); extern VOLATILE GC_bool GC_collecting; # define ENTER_GC() \ { \ |