From 03b007771beb4dba3b16d6097a53bfa328a78efb Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Fri, 20 Jan 2017 09:53:04 +0100 Subject: S390: Adjust lock elision code after review. This patch adjusts s390 specific lock elision code after review of the following patches: -S390: Use own tbegin macro instead of __builtin_tbegin. (8bfc4a2ab4bebdf86c151665aae8a266e2f18fb4) -S390: Use new __libc_tbegin_retry macro in elision-lock.c. (53c5c3d5ac238901c13f28a73ba05b0678094e80) -S390: Optimize lock-elision by decrementing adapt_count at unlock. (dd037fb3df286b7c2d0b0c6f8d02a2dd8a8e8a08) The futex value is not tested before starting a transaction, __glibc_likely is used instead of __builtin_expect and comments are adjusted. ChangeLog: * sysdeps/unix/sysv/linux/s390/htm.h: Adjust comments. * sysdeps/unix/sysv/linux/s390/elision-unlock.c: Likewise. * sysdeps/unix/sysv/linux/s390/elision-lock.c: Adjust comments. (__lll_lock_elision): Do not test futex before starting a transaction. Use __glibc_likely instead of __builtin_expect. * sysdeps/unix/sysv/linux/s390/elision-trylock.c: Adjust comments. (__lll_trylock_elision): Do not test futex before starting a transaction. Use __glibc_likely instead of __builtin_expect. --- ChangeLog | 11 +++++++++ sysdeps/unix/sysv/linux/s390/elision-lock.c | 30 +++++++++++++------------ sysdeps/unix/sysv/linux/s390/elision-trylock.c | 31 +++++++++++++------------- sysdeps/unix/sysv/linux/s390/elision-unlock.c | 24 +++++++++++--------- sysdeps/unix/sysv/linux/s390/htm.h | 12 +++++++--- 5 files changed, 65 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index d7c2d2e55b..47fccc7642 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2017-01-20 Stefan Liebler + + * sysdeps/unix/sysv/linux/s390/htm.h: Adjust comments. + * sysdeps/unix/sysv/linux/s390/elision-unlock.c: Likewise. + * sysdeps/unix/sysv/linux/s390/elision-lock.c: Adjust comments. + (__lll_lock_elision): Do not test futex before starting a + transaction. Use __glibc_likely instead of __builtin_expect. + * sysdeps/unix/sysv/linux/s390/elision-trylock.c: Adjust comments. + (__lll_trylock_elision): Do not test futex before starting a + transaction. Use __glibc_likely instead of __builtin_expect. + 2017-01-20 Siddhesh Poyarekar * po/Makefile (update-translations): New target. diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c index a7c0fccd26..00815375c7 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c @@ -50,27 +50,28 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) critical section uses lock elision) and outside of transactions. Thus, we need to use atomic accesses to avoid data races. However, the value of adapt_count is just a hint, so relaxed MO accesses are - sufficient. - Do not begin a transaction if another cpu has locked the - futex with normal locking. If adapt_count is zero, it remains and the - next pthread_mutex_lock call will try to start a transaction again. */ - if (atomic_load_relaxed (futex) == 0 - && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) + sufficient. */ + if (atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) { + /* Start a transaction and retry it automatically if it aborts with + _HTM_TBEGIN_TRANSIENT. This macro calls tbegin at most retry_cnt + + 1 times. The second argument is considered as retry_cnt. */ int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1); - if (__builtin_expect (status == _HTM_TBEGIN_STARTED, - _HTM_TBEGIN_STARTED)) + if (__glibc_likely (status == _HTM_TBEGIN_STARTED)) { /* Check the futex to make sure nobody has touched it in the mean time. This forces the futex into the cache and makes - sure the transaction aborts if some other cpu uses the - lock (writes the futex). */ - if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1)) + sure the transaction aborts if another thread acquires the lock + concurrently. */ + if (__glibc_likely (atomic_load_relaxed (futex) == 0)) /* Lock was free. Return to user code in a transaction. */ return 0; - /* Lock was busy. Fall back to normal locking. */ - if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1)) + /* Lock was busy. Fall back to normal locking. + This can be the case if e.g. adapt_count was decremented to zero + by a former release and another thread has been waken up and + acquired it. */ + if (__glibc_likely (__libc_tx_nesting_depth () <= 1)) { /* In a non-nested transaction there is no need to abort, which is expensive. Simply end the started transaction. */ @@ -118,6 +119,7 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) } } - /* Use normal locking as fallback path if transaction does not succeed. */ + /* Use normal locking as fallback path if the transaction does not + succeed. */ return LLL_LOCK ((*futex), private); } diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c index 3c1d00953b..aa09073324 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c @@ -51,31 +51,29 @@ __lll_trylock_elision (int *futex, short *adapt_count) critical section uses lock elision) and outside of transactions. Thus, we need to use atomic accesses to avoid data races. However, the value of adapt_count is just a hint, so relaxed MO accesses are - sufficient. - Do not begin a transaction if another cpu has locked the - futex with normal locking. If adapt_count is zero, it remains and the - next pthread_mutex_lock call will try to start a transaction again. */ - if (atomic_load_relaxed (futex) == 0 - && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) + sufficient. */ + if (atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0) { int status = __libc_tbegin ((void *) 0); - if (__builtin_expect (status == _HTM_TBEGIN_STARTED, - _HTM_TBEGIN_STARTED)) + if (__glibc_likely (status == _HTM_TBEGIN_STARTED)) { /* Check the futex to make sure nobody has touched it in the mean time. This forces the futex into the cache and makes - sure the transaction aborts if some other cpu uses the - lock (writes the futex). */ - if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1)) + sure the transaction aborts if another thread acquires the lock + concurrently. */ + if (__glibc_likely (atomic_load_relaxed (futex) == 0)) /* Lock was free. Return to user code in a transaction. */ return 0; - /* Lock was busy. Fall back to normal locking. Since we are in - a non-nested transaction there is no need to abort, which is - expensive. Simply end the started transaction. */ + /* Lock was busy. Fall back to normal locking. + This can be the case if e.g. adapt_count was decremented to zero + by a former release and another thread has been waken up and + acquired it. + Since we are in a non-nested transaction there is no need to abort, + which is expensive. Simply end the started transaction. */ __libc_tend (); /* Note: Changing the adapt_count here might abort a transaction on a - different cpu, but that could happen anyway when the futex is + different CPU, but that could happen anyway when the futex is acquired, so there's no need to check the nesting depth here. See above for why relaxed MO is sufficient. */ if (aconf.skip_lock_busy > 0) @@ -93,6 +91,7 @@ __lll_trylock_elision (int *futex, short *adapt_count) /* Could do some retries here. */ } - /* Use normal locking as fallback path if transaction does not succeed. */ + /* Use normal locking as fallback path if the transaction does not + succeed. */ return lll_trylock (*futex); } diff --git a/sysdeps/unix/sysv/linux/s390/elision-unlock.c b/sysdeps/unix/sysv/linux/s390/elision-unlock.c index d9205faf52..c062d71b77 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-unlock.c @@ -26,8 +26,12 @@ __lll_unlock_elision(int *futex, short *adapt_count, int private) /* If the lock is free, we elided the lock earlier. This does not necessarily mean that we are in a transaction, because the user code may have closed the transaction, but that is impossible to detect reliably. - Relaxed MO access to futex is sufficient as we only need a hint, if we - started a transaction or acquired the futex in e.g. elision-lock.c. */ + Relaxed MO access to futex is sufficient because a correct program + will only release a lock it has acquired; therefore, it must either + changed the futex word's value to something !=0 or it must have used + elision; these are actions by the same thread, so these actions are + sequenced-before the relaxed load (and thus also happens-before the + relaxed load). Therefore, relaxed MO is sufficient. */ if (atomic_load_relaxed (futex) == 0) { __libc_tend (); @@ -36,17 +40,17 @@ __lll_unlock_elision(int *futex, short *adapt_count, int private) { /* Update the adapt_count while unlocking before completing the critical section. adapt_count is accessed concurrently outside of a - transaction or an aquired lock e.g. in elision-lock.c so we need to use - atomic accesses. However, the value of adapt_count is just a hint, so - relaxed MO accesses are sufficient. + transaction or a critical section (e.g. in elision-lock.c). So we need + to use atomic accesses. However, the value of adapt_count is just a + hint, so relaxed MO accesses are sufficient. If adapt_count would be decremented while locking, multiple - CPUs trying to lock the locked mutex will decrement adapt_count to + CPUs, trying to lock the acquired mutex, will decrement adapt_count to zero and another CPU will try to start a transaction, which will be immediately aborted as the mutex is locked. - If adapt_count would be decremented while unlocking after completing - the critical section, possible waiters will be waked up before - decrementing the adapt_count. Those waked up waiters could have - destroyed and freed this mutex! */ + The update of adapt_count is done before releasing the lock as POSIX' + mutex destruction requirements disallow accesses to the mutex after it + has been released and thus could have been acquired or destroyed by + another thread. */ short adapt_count_val = atomic_load_relaxed (adapt_count); if (adapt_count_val > 0) atomic_store_relaxed (adapt_count, adapt_count_val - 1); diff --git a/sysdeps/unix/sysv/linux/s390/htm.h b/sysdeps/unix/sysv/linux/s390/htm.h index 32d5a88127..70d7f66000 100644 --- a/sysdeps/unix/sysv/linux/s390/htm.h +++ b/sysdeps/unix/sysv/linux/s390/htm.h @@ -119,11 +119,16 @@ ar modification and fp operations. Some \ program-interruptions (e.g. a null \ pointer access) are filtered and the \ - trancsaction will abort. In this case \ + transaction will abort. In this case \ the normal lock path will execute it \ again and result in a core dump wich does \ now show at tbegin but the real executed \ - instruction. */ \ + instruction. \ + However it is not guaranteed that this \ + retry operate on the same data and thus \ + may not end in an program-interruption. \ + Note: This could also be used to probe \ + memory for being accessible! */ \ "2: tbegin 0, 0xFF0E\n\t" \ /* Branch away in abort case (this is the \ prefered sequence. See PoP in chapter 5 \ @@ -152,7 +157,8 @@ __ret; \ }) -/* These builtins are correct. Use them. */ +/* These builtins are usable in context of glibc lock elision code without any + changes. Use them. */ #define __libc_tend() \ ({ __asm__ __volatile__ (".machine push\n\t" \ ".machinemode \"zarch_nohighgprs\"\n\t" \ -- cgit v1.2.1