summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2023-02-15 13:18:12 +0000
committerylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2023-02-15 13:18:12 +0000
commitf34424009a2607156f07ff58b46fec3ccb8e7ecc (patch)
treed7b41fb2b9e0f80eb0e9d12f6ffb70b1963ccb69
parent52d96cbba0b2a1a2ca67df39468c6afa3ea02370 (diff)
downloadlibapr-f34424009a2607156f07ff58b46fec3ccb8e7ecc.tar.gz
apr_atomic: Generic apr_atomic_read64() needs a mutex on 32bit systems (tearing).
A 64bit load on a 32 bit CPU/system uses two instructions (tearing), so ensure atomicity with regard to other atomic functions by using the (same) lock. test_atomics_threaded64() fails because of this on 32bit systems. PR 66457. * atomic/unix/mutex64.c(apr_atomic_read64): Use locking when APR_SIZEOF_VOIDP < 8 atomic64: Generic apr_atomic_read64() to always use a lock. Don't play games with sizeof(void*) to determine whether a raw load intruction is atomic or not. Systems that fall back to the generic implementation are not eligible for the compiler builtins or CPU native atomic intructions already, and we don't want to reimplement that here (e.g. alignment, ordering guarantees, ...). * atomic/unix/mutex64.c(apr_atomic_read64): No #ifdefery, always take the lock. Follow up to r1907541. atomic: No raw 64bit load/store on 32bit systems or anything but x86_64 or s390x. Raw 64 bit load and store need two intructions on 32bit systems (tearing) so they are not atomic, and only x86(_64) and s390(x) have stong mempry ordering guarantees. Always use builtin functions for the cases where raw load/store don't work as expected. * atomic/unix/builtins.c, atomic/unix/builtins64.c: Use an accept-list rather than a reject-list to define WEAK_MEMORY_ORDERING. Test APR_SIZEOF_VOIDP < 8 to force usage of __sync builtins for _read{32,64} and _set{32,64} on 32bit systems when __atomic_{load,store} buitlins are not available. configure: Test apr_uint64_t alignment for 64bit atomic builtins usability. On some systems the __atomic builtins may be available only through libatomic or fall back to libatomic when the atomic operations are not issued on a suitably aligned address (64bit atomics on 8-byte aligned addresses only for instance). Modify the tests for HAVE_ATOMIC_BUILTINS64 and HAVE__ATOMIC_BUILTINS64 such that the address for the atomic operations is not aligned (unless 64bit ints always have the suitable alignment, i.e. mainly 64bit systems..). Also, use the __atomic_always_lock_free() builtin to fail the test when the compiler already knows about the alignment issue (falling back to libatomic, which we don't require/want). With this, 64bit builtins should be selected only for platforms that can natively handle atomics on any apr_uin64_t (since the APR has no dedicated 8-byte aligned 64bit type for now), while the generic/mutex implementation is used for others. testatomic: initialize in the test the globals used by it. Just in case the test is later reordered (e.g. test_atomics_threaded64 and test_atomics_threaded_setread64 use the same atomic_ops64 variable). Submitted by: ylavic Merges r1907541, r1907678, r1907637, r1907642, r1907677 from trunk. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.8.x@1907679 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--atomic/unix/builtins.c7
-rw-r--r--atomic/unix/builtins64.c11
-rw-r--r--atomic/unix/mutex64.c9
-rw-r--r--configure.in68
-rw-r--r--test/testatomic.c3
5 files changed, 64 insertions, 34 deletions
diff --git a/atomic/unix/builtins.c b/atomic/unix/builtins.c
index b92e435cf..5ae63824f 100644
--- a/atomic/unix/builtins.c
+++ b/atomic/unix/builtins.c
@@ -18,10 +18,11 @@
#ifdef USE_ATOMICS_BUILTINS
-#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
-#define WEAK_MEMORY_ORDERING 1
-#else
+#if defined(__i386__) || defined(__x86_64__) \
+ || defined(__s390__) || defined(__s390x__)
#define WEAK_MEMORY_ORDERING 0
+#else
+#define WEAK_MEMORY_ORDERING 1
#endif
APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
diff --git a/atomic/unix/builtins64.c b/atomic/unix/builtins64.c
index 7d8422548..f0cdb309b 100644
--- a/atomic/unix/builtins64.c
+++ b/atomic/unix/builtins64.c
@@ -18,17 +18,18 @@
#ifdef USE_ATOMICS_BUILTINS64
-#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
-#define WEAK_MEMORY_ORDERING 1
-#else
+#if defined(__i386__) || defined(__x86_64__) \
+ || defined(__s390__) || defined(__s390x__)
#define WEAK_MEMORY_ORDERING 0
+#else
+#define WEAK_MEMORY_ORDERING 1
#endif
APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
{
#if HAVE__ATOMIC_BUILTINS
return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
-#elif WEAK_MEMORY_ORDERING
+#elif WEAK_MEMORY_ORDERING || APR_SIZEOF_VOIDP < 8
/* No __sync_load() available => apr_atomic_add64(mem, 0) */
return __sync_fetch_and_add(mem, 0);
#else
@@ -40,7 +41,7 @@ APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
#if HAVE__ATOMIC_BUILTINS
__atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
-#elif WEAK_MEMORY_ORDERING
+#elif WEAK_MEMORY_ORDERING || APR_SIZEOF_VOIDP < 8
/* No __sync_store() available => apr_atomic_xchg64(mem, val) */
__sync_synchronize();
__sync_lock_test_and_set(mem, val);
diff --git a/atomic/unix/mutex64.c b/atomic/unix/mutex64.c
index 9fc44af61..a6a7ceb0b 100644
--- a/atomic/unix/mutex64.c
+++ b/atomic/unix/mutex64.c
@@ -96,7 +96,14 @@ apr_status_t apr__atomic_generic64_init(apr_pool_t *p)
APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
{
- return *mem;
+ apr_uint64_t cur_value;
+ DECLARE_MUTEX_LOCKED(mutex, mem);
+
+ cur_value = *mem;
+
+ MUTEX_UNLOCK(mutex);
+
+ return cur_value;
}
APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val)
diff --git a/configure.in b/configure.in
index 49d1338af..b16cf649a 100644
--- a/configure.in
+++ b/configure.in
@@ -552,31 +552,35 @@ AC_CACHE_CHECK([whether the compiler provides 64bit atomic builtins], [ap_cv_ato
[AC_TRY_RUN([
#if HAVE_STDINT_H
#include <stdint.h>
+typedef uint64_t u64_t;
+#else
+typedef unsigned long long u64_t;
#endif
int main(int argc, const char *const *argv)
{
-#if HAVE_STDINT_H
- uint64_t val = 1010, tmp, *mem = &val;
-#else
- unsigned long long val = 1010, tmp, *mem = &val;
-#endif
-
- if (__sync_fetch_and_add(&val, 1010) != 1010 || val != 2020)
+ struct {
+ char pad0;
+ u64_t val;
+ } s;
+ u64_t *mem = &s.val, tmp;
+
+ s.val = 1010;
+ if (__sync_fetch_and_add(&s.val, 1010) != 1010 || s.val != 2020)
return 1;
- tmp = val;
- if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010)
+ tmp = s.val;
+ if (__sync_fetch_and_sub(mem, 1010) != tmp || s.val != 1010)
return 1;
- if (__sync_sub_and_fetch(&val, 1010) != 0 || val != 0)
+ if (__sync_sub_and_fetch(&s.val, 1010) != 0 || s.val != 0)
return 1;
tmp = 3030;
- if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp)
+ if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || s.val != tmp)
return 1;
__sync_synchronize();
- if (__sync_lock_test_and_set(&val, 4040) != 3030)
+ if (__sync_lock_test_and_set(&s.val, 4040) != 3030)
return 1;
return 0;
@@ -586,31 +590,45 @@ AC_CACHE_CHECK([whether the compiler provides 64bit __atomic builtins], [ap_cv__
[AC_TRY_RUN([
#if HAVE_STDINT_H
#include <stdint.h>
+typedef uint64_t u64_t;
+#else
+typedef unsigned long long u64_t;
#endif
+static int test_always_lock_free(volatile u64_t *val)
+{
+ return __atomic_always_lock_free(sizeof(*val), val);
+}
int main(int argc, const char *const *argv)
{
-#if HAVE_STDINT_H
- uint64_t val = 1010, tmp, *mem = &val;
-#else
- unsigned long long val = 1010, tmp, *mem = &val;
-#endif
+ struct {
+ char pad0;
+ u64_t val;
+ char pad1;
+ u64_t tmp;
+ } s;
+ u64_t *mem = &s.val;
+
+ /* check if alignment matters (no fallback to libatomic) */
+ if (!test_always_lock_free(&s.val))
+ return 1;
- if (__atomic_fetch_add(&val, 1010, __ATOMIC_SEQ_CST) != 1010 || val != 2020)
+ s.val = 1010;
+ if (__atomic_fetch_add(&s.val, 1010, __ATOMIC_SEQ_CST) != 1010 || s.val != 2020)
return 1;
- tmp = val;
- if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != tmp || val != 1010)
+ s.tmp = s.val;
+ if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != s.tmp || s.val != 1010)
return 1;
- if (__atomic_sub_fetch(&val, 1010, __ATOMIC_SEQ_CST) != 0 || val != 0)
+ if (__atomic_sub_fetch(&s.val, 1010, __ATOMIC_SEQ_CST) != 0 || s.val != 0)
return 1;
- tmp = val;
- if (!__atomic_compare_exchange_n(mem, &tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
- || tmp != 0)
+ s.tmp = s.val;
+ if (!__atomic_compare_exchange_n(mem, &s.tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+ || s.tmp != 0)
return 1;
- if (__atomic_exchange_n(&val, 4040, __ATOMIC_SEQ_CST) != 3030)
+ if (__atomic_exchange_n(&s.val, 4040, __ATOMIC_SEQ_CST) != 3030)
return 1;
return 0;
diff --git a/test/testatomic.c b/test/testatomic.c
index 6488f45f5..d9c01e668 100644
--- a/test/testatomic.c
+++ b/test/testatomic.c
@@ -682,6 +682,9 @@ static void test_atomics_threaded64(abts_case *tc, void *data)
pthread_setconcurrency(8);
#endif
+ mutex_locks64 = 0;
+ apr_atomic_set64(&atomic_ops64, 0);
+
rv = apr_thread_mutex_create(&thread_lock64, APR_THREAD_MUTEX_DEFAULT, p);
APR_ASSERT_SUCCESS(tc, "Could not create lock", rv);