diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/atomic/gcc_builtins.h | 12 | ||||
-rw-r--r-- | include/atomic/generic-msvc.h | 26 | ||||
-rw-r--r-- | include/lf.h | 1 | ||||
-rw-r--r-- | include/my_atomic.h | 10 | ||||
-rw-r--r-- | include/my_cpu.h | 57 |
5 files changed, 64 insertions, 42 deletions
diff --git a/include/atomic/gcc_builtins.h b/include/atomic/gcc_builtins.h index 99e6f628b50..e2c3b10c267 100644 --- a/include/atomic/gcc_builtins.h +++ b/include/atomic/gcc_builtins.h @@ -40,18 +40,18 @@ #define my_atomic_add64_explicit(P, A, O) __atomic_fetch_add((P), (A), (O)) #define my_atomic_cas32_weak_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), true, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 1, (S), (F)) #define my_atomic_cas64_weak_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), true, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 1, (S), (F)) #define my_atomic_casptr_weak_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), true, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 1, (S), (F)) #define my_atomic_cas32_strong_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), false, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 0, (S), (F)) #define my_atomic_cas64_strong_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), false, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 0, (S), (F)) #define my_atomic_casptr_strong_explicit(P, E, D, S, F) \ - __atomic_compare_exchange_n((P), (E), (D), false, (S), (F)) + __atomic_compare_exchange_n((P), (E), (D), 0, (S), (F)) #define my_atomic_store32(P, D) __atomic_store_n((P), (D), __ATOMIC_SEQ_CST) #define my_atomic_store64(P, D) __atomic_store_n((P), (D), __ATOMIC_SEQ_CST) diff --git a/include/atomic/generic-msvc.h b/include/atomic/generic-msvc.h index 56fa4f66fcd..d5eaa4738c7 100644 --- a/include/atomic/generic-msvc.h +++ b/include/atomic/generic-msvc.h @@ -137,30 +137,4 @@ static inline void my_atomic_storeptr(void * volatile *a, void *v) *a= v; } - -/* - my_yield_processor (equivalent of x86 PAUSE instruction) should be used - to improve performance on hyperthreaded CPUs. Intel recommends to use it in - spin loops also on non-HT machines to reduce power consumption (see e.g - http://softwarecommunity.intel.com/articles/eng/2004.htm) - - Running benchmarks for spinlocks implemented with InterlockedCompareExchange - and YieldProcessor shows that much better performance is achieved by calling - YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting - loop count in the range 200-300 brought best results. - */ -#define YIELD_LOOPS 200 - -static inline int my_yield_processor() -{ - int i; - for (i=0; i<YIELD_LOOPS; i++) - { - YieldProcessor(); - } - return 1; -} - -#define LF_BACKOFF my_yield_processor() - #endif /* ATOMIC_MSC_INCLUDED */ diff --git a/include/lf.h b/include/lf.h index 1825de62b43..a9d7e9ee688 100644 --- a/include/lf.h +++ b/include/lf.h @@ -17,6 +17,7 @@ #define INCLUDE_LF_INCLUDED #include <my_atomic.h> +#include <my_cpu.h> C_MODE_START diff --git a/include/my_atomic.h b/include/my_atomic.h index 32c9d6b4736..896dc2b5c33 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -116,16 +116,6 @@ #include "atomic/gcc_sync.h" #endif - -/* - the macro below defines (as an expression) the code that - will be run in spin-loops. Intel manuals recummend to have PAUSE there. - It is expected to be defined in include/atomic/ *.h files -*/ -#ifndef LF_BACKOFF -#define LF_BACKOFF (1) -#endif - #if SIZEOF_LONG == 4 #define my_atomic_addlong(A,B) my_atomic_add32((int32*) (A), (B)) #define my_atomic_loadlong(A) my_atomic_load32((int32*) (A)) diff --git a/include/my_cpu.h b/include/my_cpu.h index e255de85960..f2e26fca70c 100644 --- a/include/my_cpu.h +++ b/include/my_cpu.h @@ -1,3 +1,5 @@ +#ifndef MY_CPU_INCLUDED +#define MY_CPU_INCLUDED /* Copyright (c) 2013, MariaDB foundation Ab and SkySQL This program is free software; you can redistribute it and/or modify @@ -43,3 +45,58 @@ #define HMT_medium_high() #define HMT_high() #endif + + +static inline void MY_RELAX_CPU(void) +{ +#ifdef HAVE_PAUSE_INSTRUCTION + /* + According to the gcc info page, asm volatile means that the + instruction has important side-effects and must not be removed. + Also asm volatile may trigger a memory barrier (spilling all registers + to memory). + */ +#ifdef __SUNPRO_CC + asm ("pause" ); +#else + __asm__ __volatile__ ("pause"); +#endif + +#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) + __asm__ __volatile__ ("rep; nop"); +#elif defined _WIN32 + /* + In the Win32 API, the x86 PAUSE instruction is executed by calling + the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- + independent way by using YieldProcessor. + */ + YieldProcessor(); +#elif defined(_ARCH_PWR8) + __ppc_get_timebase(); +#else + int32 var, oldval = 0; + my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED, + MY_MEMORY_ORDER_RELAXED); +#endif +} + + +/* + LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel + recommends to use it in spin loops also on non-HT machines to reduce power + consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm) + + Running benchmarks for spinlocks implemented with InterlockedCompareExchange + and YieldProcessor shows that much better performance is achieved by calling + YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting + loop count in the range 200-300 brought best results. +*/ + +static inline int LF_BACKOFF(void) +{ + int i; + for (i= 0; i < 200; i++) + MY_RELAX_CPU(); + return 1; +} +#endif |