summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2021-12-16 15:56:27 +0000
committerylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2021-12-16 15:56:27 +0000
commit0e7c37ad662624261d40a0bba3097fe25f995a00 (patch)
treec6f780e7c5cb13c377d30e76661688d66775fa62
parent5237c96ad21d6b484455050b6cd30c7cd2885132 (diff)
downloadlibapr-0e7c37ad662624261d40a0bba3097fe25f995a00.tar.gz
Merge r1894621, r1894719, r1894622 from trunk:
apr_atomic: Use __atomic builtins when available. Unlike Intel's atomic builtins (__sync_*), the more recent __atomic builtins provide atomic load and store for weakly ordered architectures like ARM32 or powerpc[64], so use them when available (gcc 4.6.3+). Follow up to r1894621: restore apr_atomic_init::apr__atomic_generic64_init(). Even if apr__atomic_generic64_init() is currently a noop when !APR_HAS_THREADS, it may change later without apr_atomic_init() noticing (thanks RĂ¼diger). apr_atomic: Fix load/store for weak memory ordering architectures. Volatile access prevents compiler reordering of load/store but it's not enough for weakly ordered archs like ARM32 and PowerPC[64]. While __atomic builtins provide load and store, __sync builtins don't so let's use an atomic add of zero for the former and atomic exchange for the latter. The assembly code for PowerPC was not correct either, fix apr_atomic_read32() and apr_atomic_set32() and add the necessary memory barriers for the others. PR 50586. Submitted by: ylavic git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.7.x@1896067 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--atomic/unix/builtins.c69
-rw-r--r--atomic/unix/builtins64.c51
-rw-r--r--atomic/unix/ppc.c63
-rw-r--r--configure.in45
4 files changed, 192 insertions, 36 deletions
diff --git a/atomic/unix/builtins.c b/atomic/unix/builtins.c
index 745acf155..22b828c3c 100644
--- a/atomic/unix/builtins.c
+++ b/atomic/unix/builtins.c
@@ -18,6 +18,12 @@
#ifdef USE_ATOMICS_BUILTINS
+#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
+#define WEAK_MEMORY_ORDERING 1
+#else
+#define WEAK_MEMORY_ORDERING 0
+#endif
+
APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
{
return APR_SUCCESS;
@@ -25,57 +31,104 @@ APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_load() available => apr_atomic_add32(mem, 0) */
+ return __sync_fetch_and_add(mem, 0);
+#else
return *mem;
+#endif
}
APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_store() available => apr_atomic_xchg32(mem, val) */
+ __sync_synchronize();
+ __sync_lock_test_and_set(mem, val);
+#else
*mem = val;
+#endif
}
APR_DECLARE(apr_uint32_t) apr_atomic_add32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_fetch_add(mem, val, __ATOMIC_SEQ_CST);
+#else
return __sync_fetch_and_add(mem, val);
+#endif
}
APR_DECLARE(void) apr_atomic_sub32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_fetch_sub(mem, val, __ATOMIC_SEQ_CST);
+#else
__sync_fetch_and_sub(mem, val);
+#endif
}
APR_DECLARE(apr_uint32_t) apr_atomic_inc32(volatile apr_uint32_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_fetch_add(mem, 1, __ATOMIC_SEQ_CST);
+#else
return __sync_fetch_and_add(mem, 1);
+#endif
}
APR_DECLARE(int) apr_atomic_dec32(volatile apr_uint32_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_sub_fetch(mem, 1, __ATOMIC_SEQ_CST);
+#else
return __sync_sub_and_fetch(mem, 1);
+#endif
}
-APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint32_t with,
+APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint32_t val,
apr_uint32_t cmp)
{
- return __sync_val_compare_and_swap(mem, cmp, with);
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_compare_exchange_n(mem, &cmp, val, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return cmp;
+#else
+ return __sync_val_compare_and_swap(mem, cmp, val);
+#endif
}
APR_DECLARE(apr_uint32_t) apr_atomic_xchg32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_exchange_n(mem, val, __ATOMIC_SEQ_CST);
+#else
__sync_synchronize();
-
return __sync_lock_test_and_set(mem, val);
+#endif
}
-APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void *cmp)
+APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *ptr, const void *cmp)
{
- return (void*) __sync_val_compare_and_swap(mem, cmp, with);
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_compare_exchange_n(mem, (void **)&cmp, ptr, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return (void *)cmp;
+#else
+ return (void *)__sync_val_compare_and_swap(mem, (void *)cmp, ptr);
+#endif
}
-APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *with)
+APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *ptr)
{
+#if HAVE__ATOMIC_BUILTINS
+ return (void *)__atomic_exchange_n(mem, ptr, __ATOMIC_SEQ_CST);
+#else
__sync_synchronize();
-
- return (void*) __sync_lock_test_and_set(mem, with);
+ return (void *)__sync_lock_test_and_set(mem, ptr);
+#endif
}
#endif /* USE_ATOMICS_BUILTINS */
diff --git a/atomic/unix/builtins64.c b/atomic/unix/builtins64.c
index 4a4b685c7..0f6edffbb 100644
--- a/atomic/unix/builtins64.c
+++ b/atomic/unix/builtins64.c
@@ -18,47 +18,92 @@
#ifdef USE_ATOMICS_BUILTINS
+#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
+#define WEAK_MEMORY_ORDERING 1
+#else
+#define WEAK_MEMORY_ORDERING 0
+#endif
+
APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_load() available => apr_atomic_add64(mem, 0) */
+ return __sync_fetch_and_add(mem, 0);
+#else
return *mem;
+#endif
}
APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_store() available => apr_atomic_xchg64(mem, val) */
+ __sync_synchronize();
+ __sync_lock_test_and_set(mem, val);
+#else
*mem = val;
+#endif
}
APR_DECLARE(apr_uint64_t) apr_atomic_add64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_fetch_add(mem, val, __ATOMIC_SEQ_CST);
+#else
return __sync_fetch_and_add(mem, val);
+#endif
}
APR_DECLARE(void) apr_atomic_sub64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_fetch_sub(mem, val, __ATOMIC_SEQ_CST);
+#else
__sync_fetch_and_sub(mem, val);
+#endif
}
APR_DECLARE(apr_uint64_t) apr_atomic_inc64(volatile apr_uint64_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_fetch_add(mem, 1, __ATOMIC_SEQ_CST);
+#else
return __sync_fetch_and_add(mem, 1);
+#endif
}
APR_DECLARE(int) apr_atomic_dec64(volatile apr_uint64_t *mem)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_sub_fetch(mem, 1, __ATOMIC_SEQ_CST);
+#else
return __sync_sub_and_fetch(mem, 1);
+#endif
}
-APR_DECLARE(apr_uint64_t) apr_atomic_cas64(volatile apr_uint64_t *mem, apr_uint64_t with,
+APR_DECLARE(apr_uint64_t) apr_atomic_cas64(volatile apr_uint64_t *mem, apr_uint64_t val,
apr_uint64_t cmp)
{
- return __sync_val_compare_and_swap(mem, cmp, with);
+#if HAVE__ATOMIC_BUILTINS
+ __atomic_compare_exchange_n(mem, &cmp, val, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return cmp;
+#else
+ return __sync_val_compare_and_swap(mem, cmp, val);
+#endif
}
APR_DECLARE(apr_uint64_t) apr_atomic_xchg64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
+#if HAVE__ATOMIC_BUILTINS
+ return __atomic_exchange_n(mem, val, __ATOMIC_SEQ_CST);
+#else
__sync_synchronize();
-
return __sync_lock_test_and_set(mem, val);
+#endif
}
#endif /* USE_ATOMICS_BUILTINS */
diff --git a/atomic/unix/ppc.c b/atomic/unix/ppc.c
index 1823fe81c..46554af54 100644
--- a/atomic/unix/ppc.c
+++ b/atomic/unix/ppc.c
@@ -35,24 +35,39 @@ APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem)
{
- return *mem;
+ apr_uint32_t val;
+ asm volatile (" sync\n" /* full barrier */
+ " lwz %0,%1\n" /* load */
+ " cmpw 7,%0,%0\n" /* compare (always equal) */
+ " bne- 7,$+4\n" /* goto next in any case */
+ " isync" /* acquire barrier (bc+isync) */
+ : "=r"(val)
+ : "m"(*mem)
+ : "cc", "memory");
+ return val;
}
APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
- *mem = val;
+ asm volatile (" sync\n" /* full barrier */
+ " stw %1,%0" /* store */
+ : "=m"(*mem)
+ : "r"(val)
+ : "memory");
}
APR_DECLARE(apr_uint32_t) apr_atomic_add32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
apr_uint32_t prev, temp;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%3\n" /* load and reserve */
" add %1,%0,%4\n" /* add val and prev */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
- " stwcx. %1,0,%3\n" /* store new value */
+ " stwcx. %1,0,%3\n" /* store if still reserved */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev), "=&r" (temp), "=m" (*mem)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -64,12 +79,14 @@ APR_DECLARE(void) apr_atomic_sub32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
apr_uint32_t temp;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" subf %0,%3,%0\n" /* subtract val */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (temp), "=m" (*mem)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -79,13 +96,15 @@ APR_DECLARE(apr_uint32_t) apr_atomic_inc32(volatile apr_uint32_t *mem)
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" addi %0,%0,1\n" /* add immediate */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
" subi %0,%0,1\n" /* return old value */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&b" (prev), "=m" (*mem)
: "b" (mem), "m" (*mem)
: "cc", "memory");
@@ -97,12 +116,14 @@ APR_DECLARE(int) apr_atomic_dec32(volatile apr_uint32_t *mem)
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" subi %0,%0,1\n" /* subtract immediate */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&b" (prev), "=m" (*mem)
: "b" (mem), "m" (*mem)
: "cc", "memory");
@@ -115,7 +136,8 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
" cmpw %0,%3\n" /* compare operands */
" bne- exit_%=\n" /* skip if not equal */
@@ -123,6 +145,7 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"exit_%=:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
@@ -134,11 +157,13 @@ APR_DECLARE(apr_uint32_t) apr_atomic_xchg32(volatile apr_uint32_t *mem, apr_uint
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %2,0,%1\n" /* store new value */
- " bne- 1b" /* loop if lost */
+ " bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -150,7 +175,8 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void
{
void *prev;
#if APR_SIZEOF_VOIDP == 4
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
" cmpw %0,%3\n" /* compare operands */
" bne- 2f\n" /* skip if not equal */
@@ -158,11 +184,13 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"2:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
#elif APR_SIZEOF_VOIDP == 8
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" ldarx %0,0,%1\n" /* load and reserve */
" cmpd %0,%3\n" /* compare operands */
" bne- 2f\n" /* skip if not equal */
@@ -170,6 +198,7 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void
" stdcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"2:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
@@ -183,22 +212,24 @@ APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *with)
{
void *prev;
#if APR_SIZEOF_VOIDP == 4
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
- " isync\n" /* memory barrier */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with)
: "cc", "memory");
#elif APR_SIZEOF_VOIDP == 8
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" ldarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stdcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
- " isync\n" /* memory barrier */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with)
: "cc", "memory");
diff --git a/configure.in b/configure.in
index 4981aaf19..28c8d44db 100644
--- a/configure.in
+++ b/configure.in
@@ -467,7 +467,7 @@ esac
AC_CACHE_CHECK([whether the compiler provides atomic builtins], [ap_cv_atomic_builtins],
[AC_TRY_RUN([
-int main()
+int main(int argc, const char *const *argv)
{
unsigned long val = 1010, tmp, *mem = &val;
@@ -475,7 +475,6 @@ int main()
return 1;
tmp = val;
-
if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010)
return 1;
@@ -483,28 +482,56 @@ int main()
return 1;
tmp = 3030;
-
if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp)
return 1;
+ __sync_synchronize();
if (__sync_lock_test_and_set(&val, 4040) != 3030)
return 1;
- mem = &tmp;
+ if (__sync_val_compare_and_swap(&mem, &val, &tmp) != &val || mem != &tmp)
+ return 1;
+
+ return 0;
+}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])])
+
+AC_CACHE_CHECK([whether the compiler provides __atomic builtins], [ap_cv__atomic_builtins],
+[AC_TRY_RUN([
+int main(int argc, const char *const *argv)
+{
+ unsigned long val = 1010, tmp, *mem = &val, *ptmp;
- if (__sync_val_compare_and_swap(&mem, &tmp, &val) != &tmp)
+ if (__atomic_fetch_add(&val, 1010, __ATOMIC_SEQ_CST) != 1010 || val != 2020)
return 1;
- __sync_synchronize();
+ tmp = val;
+ if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != tmp || val != 1010)
+ return 1;
+
+ if (__atomic_sub_fetch(&val, 1010, __ATOMIC_SEQ_CST) != 0 || val != 0)
+ return 1;
- if (mem != &val)
+ tmp = val;
+ if (!__atomic_compare_exchange_n(mem, &tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+ || tmp != 0)
+ return 1;
+
+ if (__atomic_exchange_n(&val, 4040, __ATOMIC_SEQ_CST) != 3030)
+ return 1;
+
+ ptmp = &val;
+ if (!__atomic_compare_exchange_n(&mem, &ptmp, &tmp, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+ || ptmp != &val || mem != &tmp)
return 1;
return 0;
-}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])])
+}], [ap_cv__atomic_builtins=yes], [ap_cv__atomic_builtins=no], [ap_cv__atomic_builtins=no])])
-if test "$ap_cv_atomic_builtins" = "yes"; then
+if test "$ap_cv_atomic_builtins" = "yes" -o "$ap_cv__atomic_builtins" = "yes"; then
AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [Define if compiler provides atomic builtins])
+ if test "$ap_cv__atomic_builtins" = "yes"; then
+ AC_DEFINE(HAVE__ATOMIC_BUILTINS, 1, [Define if compiler provides __atomic builtins])
+ fi
fi
case $host in