summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhboehm <hboehm>2008-11-11 00:27:48 +0000
committerIvan Maidanski <ivmai@mail.ru>2011-07-25 16:03:24 +0400
commitc69538936623d2a41f71c88b50e5ce6c8ba2d5e1 (patch)
treeeb526bc9a257ecdd9f70eabd7343400b5bb86111
parent3d0e5eeecba0b46e969e039d01f67346ce12da43 (diff)
downloadlibatomic_ops-c69538936623d2a41f71c88b50e5ce6c8ba2d5e1.tar.gz
2008-11-10 Hans Boehm <Hans.Boehm@hp.com> (Really Joerg Wagner)
* src/atomic_ops/sysdeps/armcc/arm_v6.h: Compute AO_compare_and_swap value differently, add AO_compare_double_and_swap_double, some indentation fixes. * src/atomic_ops/sysdeps/gcc/arm.h: Make gcc asm code more robust and minimize clobbers, Add AO_compare_double_and_swap_double.
-rw-r--r--ChangeLog7
-rw-r--r--src/atomic_ops/sysdeps/armcc/arm_v6.h59
-rw-r--r--src/atomic_ops/sysdeps/gcc/arm.h125
3 files changed, 132 insertions, 59 deletions
diff --git a/ChangeLog b/ChangeLog
index 6914709..3f6aa7c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-11-10 Hans Boehm <Hans.Boehm@hp.com> (Really Joerg Wagner)
+ * src/atomic_ops/sysdeps/armcc/arm_v6.h: Compute
+ AO_compare_and_swap value differently, add
+ AO_compare_double_and_swap_double, some indentation fixes.
+ * src/atomic_ops/sysdeps/gcc/arm.h: Make gcc asm code more
+ robust and minimize clobbers, Add AO_compare_double_and_swap_double.
+
2008-11-06 Hans Boehm <Hans.Boehm@hp.com>
* INSTALL: Add some platform-specific documentation.
* src/Makefile.msft: Fix copyright notice.
diff --git a/src/atomic_ops/sysdeps/armcc/arm_v6.h b/src/atomic_ops/sysdeps/armcc/arm_v6.h
index 326506c..469b994 100644
--- a/src/atomic_ops/sysdeps/armcc/arm_v6.h
+++ b/src/atomic_ops/sysdeps/armcc/arm_v6.h
@@ -24,15 +24,17 @@
#if __TARGET_ARCH_ARM < 6
Dont use with ARM instruction sets lower than v6
-#endif
+#else
+
+#include "../standard_ao_double_t.h"
/* NEC LE-IT: ARMv6 is the first architecture providing support for simple LL/SC
* A data memory barrier must be raised via CP15 command (see documentation).
- *
+ *
* ARMv7 is compatible to ARMv6 but has a simpler command for issuing a
* memory barrier (DMB). Raising it via CP15 should still work as told me by the
* support engineers. If it turns out to be much quicker than we should implement
- * custom code for ARMv7 using the asm { dmb } command.
+ * custom code for ARMv7 using the asm { dmb } command.
*
* If only a single processor is used, we can define AO_UNIPROCESSOR
* and do not need to access CP15 for ensuring a DMB at all.
@@ -41,12 +43,12 @@ Dont use with ARM instruction sets lower than v6
AO_INLINE void
AO_nop_full()
{
-# ifndef AO_UNIPROCESSOR
- unsigned int dest=0;
- /* issue an data memory barrier (keeps ordering of memory transactions */
- /* before and after this operation) */
+#ifndef AO_UNIPROCESSOR
+ unsigned int dest=0;
+ /* issue an data memory barrier (keeps ordering of memory transactions */
+ /* before and after this operation) */
__asm { mcr p15,0,dest,c7,c10,5 } ;
-# endif
+#endif
}
#define AO_HAVE_nop_full
@@ -54,8 +56,8 @@ AO_nop_full()
AO_INLINE AO_t
AO_load(const volatile AO_t *addr)
{
- /* Cast away the volatile in case it adds fence semantics. */
- return (*(const AO_t *)addr);
+ /* Cast away the volatile in case it adds fence semantics */
+ return (*(const AO_t *)addr);
}
#define AO_HAVE_load
@@ -184,16 +186,49 @@ AO_compare_and_swap(volatile AO_t *addr,
retry:
__asm__ {
- ldrex tmp, [addr]
mov result, #2
+ ldrex tmp, [addr]
teq tmp, old_val
strexeq result, new_val, [addr]
teq result, #1
beq retry
}
- return (result^2)>>1;
+ return !(result&2);
}
#define AO_HAVE_compare_and_swap
+/* helper functions for the Realview compiler: LDREXD is not usable
+ * with inline assembler, so use the "embedded" assembler as
+ * suggested by ARM Dev. support (June 2008). */
+__asm inline double_ptr_storage load_ex(volatile AO_double_t *addr) {
+ LDREXD r0,r1,[r0]
+}
+
+__asm inline int store_ex(AO_t val1, AO_t val2, volatile AO_double_t *addr) {
+ STREXD r3,r0,r1,[r2]
+ MOV r0,r3
+}
+
+AO_INLINE int
+AO_compare_double_and_swap_double(volatile AO_double_t *addr,
+ AO_t old_val1, AO_t old_val2,
+ AO_t new_val1, AO_t new_val2)
+{
+ double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1;
+
+ double_ptr_storage tmp;
+ int result;
+
+ while(1) {
+ tmp = load_ex(addr);
+ if(tmp != old_val) return false;
+ result = store_ex(new_val1, new_val2, addr);
+ if(!result) return true;
+ }
+}
+
+#define AO_HAVE_compare_double_and_swap_double
+
+
#endif // __TARGET_ARCH_ARM
diff --git a/src/atomic_ops/sysdeps/gcc/arm.h b/src/atomic_ops/sysdeps/gcc/arm.h
index 5b57048..b9d5d9e 100644
--- a/src/atomic_ops/sysdeps/gcc/arm.h
+++ b/src/atomic_ops/sysdeps/gcc/arm.h
@@ -34,6 +34,9 @@
/* NEC LE-IT: gcc has no way to easily check the arm architecture
* but defines only one of __ARM_ARCH_x__ to be true */
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_7__)
+
+#include "../standard_ao_double_t.h"
+
AO_INLINE void
AO_nop_full()
{
@@ -75,7 +78,7 @@ AO_load(const volatile AO_t *addr)
STR(x)
STREX(x) Yes
-----------------------------------
-
+
* ARMv7 behaves similar, see documentation CortexA8 TRM, point 8.5
*
* HB: I think this is only a problem if interrupt handlers do not clear
@@ -84,16 +87,16 @@ AO_load(const volatile AO_t *addr)
*/
AO_INLINE void AO_store(volatile AO_t *addr, AO_t value)
{
- unsigned long tmp;
-
+ AO_t flag;
+
__asm__ __volatile__("@AO_store\n"
-"1: ldrex %0, [%1]\n"
-" strex %0, %2, [%1]\n"
+"1: ldrex %0, [%2]\n"
+" strex %0, %3, [%2]\n"
" teq %0, #0\n"
" bne 1b"
- : "=&r"(tmp)
+ : "=&r"(flag), "+m"(*addr)
: "r" (addr), "r"(value)
- : "cc","memory");
+ : "cc");
}
#define AO_HAVE_store
@@ -113,16 +116,16 @@ AO_INLINE AO_TS_t
AO_test_and_set(volatile AO_TS_t *addr) {
AO_TS_t oldval;
- unsigned long tmp;
+ unsigned long flag;
__asm__ __volatile__("@AO_test_and_set\n"
-"1: ldrex %0, [%2]\n"
-" strex %1, %3, [%2]\n"
-" teq %1, #0\n"
-" bne 1b\n"
- : "=&r"(oldval),"=&r"(tmp)
+"1: ldrex %0, [%3]\n"
+" strex %1, %4, [%3]\n"
+" teq %1, #0\n"
+" bne 1b\n"
+ : "=&r"(oldval),"=&r"(flag), "+m"(*addr)
: "r"(addr), "r"(1)
- : "memory","cc");
+ : "cc");
return oldval;
}
@@ -133,18 +136,18 @@ AO_test_and_set(volatile AO_TS_t *addr) {
AO_INLINE AO_t
AO_fetch_and_add(volatile AO_t *p, AO_t incr)
{
- unsigned long tmp,tmp2;
+ unsigned long flag,tmp;
AO_t result;
__asm__ __volatile__("@AO_fetch_and_add\n"
-"1: ldrex %0, [%4]\n" /* get original */
-" add %2, %3, %0\n" /* sum up */
-" strex %1, %2, [%4]\n" /* store them */
-" teq %1, #0\n"
-" bne 1b\n"
- : "=&r"(result),"=&r"(tmp),"=&r"(tmp2)
- : "r"(incr), "r"(p)
- : "cc","memory");
+"1: ldrex %0, [%5]\n" /* get original */
+" add %2, %0, %4\n" /* sum up in incr */
+" strex %1, %2, [%5]\n" /* store them */
+" teq %1, #0\n"
+" bne 1b\n"
+ : "=&r"(result),"=&r"(flag),"=&r"(tmp),"+m"(*p) /* 0..3 */
+ : "r"(incr), "r"(p) /* 4..5 */
+ : "cc");
return result;
}
@@ -155,18 +158,18 @@ AO_fetch_and_add(volatile AO_t *p, AO_t incr)
AO_INLINE AO_t
AO_fetch_and_add1(volatile AO_t *p)
{
- unsigned long tmp,tmp2;
+ unsigned long flag,tmp;
AO_t result;
__asm__ __volatile__("@AO_fetch_and_add1\n"
-"1: ldrex %0, [%3]\n" /* get original */
+"1: ldrex %0, [%4]\n" /* get original */
" add %1, %0, #1\n" /* increment */
-" strex %2, %1, [%3]\n" /* store them */
-" teq %2, #0\n"
-" bne 1b\n"
- : "=&r"(result), "=&r"(tmp), "=&r"(tmp2)
+" strex %2, %1, [%4]\n" /* store them */
+" teq %2, #0\n"
+" bne 1b\n"
+ : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
: "r"(p)
- : "cc","memory");
+ : "cc");
return result;
}
@@ -177,18 +180,18 @@ AO_fetch_and_add1(volatile AO_t *p)
AO_INLINE AO_t
AO_fetch_and_sub1(volatile AO_t *p)
{
- unsigned long tmp,tmp2;
+ unsigned long flag,tmp;
AO_t result;
- __asm__ __volatile__("@ AO_fetch_and_sub1\n"
-"1: ldrex %0, [%3]\n" /* get original */
-" sub %1, %0, #1\n" /* increment */
-" strex %2, %1, [%3]\n" /* store them */
-" teq %2, #0\n"
-" bne 1b\n"
- : "=&r"(result), "=&r"(tmp), "=&r"(tmp2)
+ __asm__ __volatile__("@AO_fetch_and_sub1\n"
+"1: ldrex %0, [%4]\n" /* get original */
+" sub %1, %0, #1\n" /* decrement */
+" strex %2, %1, [%4]\n" /* store them */
+" teq %2, #0\n"
+" bne 1b\n"
+ : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
: "r"(p)
- : "cc","memory");
+ : "cc");
return result;
}
@@ -204,21 +207,49 @@ AO_compare_and_swap(volatile AO_t *addr,
AO_t result,tmp;
__asm__ __volatile__("@ AO_compare_and_swap\n"
-"1: ldrex %1, [%2]\n" /* get original */
-" mov %0, #2\n" /* store a flag */
-" teq %1, %3\n" /* see if match */
-" strexeq %0, %4, [%2]\n" /* store new one if matched */
+"1: mov %0, #2\n" /* store a flag */
+" ldrex %1, [%3]\n" /* get original */
+" teq %1, %4\n" /* see if match */
+" strexeq %0, %5, [%3]\n" /* store new one if matched */
" teq %0, #1\n"
" beq 1b\n" /* if update failed, repeat */
-" eor %0, %0, #2\n" /* if succeded, return 2, else 0 */
- : "=&r"(result), "=&r"(tmp)
+ : "=&r"(result), "=&r"(tmp), "+m"(*addr)
: "r"(addr), "r"(old_val), "r"(new_val)
- : "cc","memory");
+ : "cc");
- return (result>>1);
+ return !(result&2); /* if succeded, return 1, else 0 */
}
#define AO_HAVE_compare_and_swap
+AO_INLINE int
+AO_compare_double_and_swap_double(volatile AO_double_t *addr,
+ AO_t old_val1, AO_t old_val2,
+ AO_t new_val1, AO_t new_val2)
+{
+ double_ptr_storage old_val = ((double_ptr_storage)old_val2 << 32) | old_val1;
+ double_ptr_storage new_val = ((double_ptr_storage)new_val2 << 32) | new_val1;
+
+ double_ptr_storage tmp;
+ int result;
+
+ while(1) {
+ __asm__ __volatile__("@ AO_compare_and_swap_double\n"
+ " ldrexd %0, [%1]\n" /* get original to r1&r2*/
+ : "=&r"(tmp)
+ : "r"(addr)
+ : );
+ if(tmp != old_val) return false;
+ __asm__ __volatile__(
+ " strexd %0, %2, [%3]\n" /* store new one if matched */
+ : "=&r"(result),"+m"(*addr)
+ : "r"(new_val), "r"(addr)
+ : );
+ if(!result) return true;
+ }
+}
+
+#define AO_HAVE_compare_double_and_swap_double
+
#else
/* pre ARMv6 architecures ... */