summaryrefslogtreecommitdiff
path: root/REORG.TODO/nptl/pthread_cond_wait.c
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/nptl/pthread_cond_wait.c')
-rw-r--r--REORG.TODO/nptl/pthread_cond_wait.c673
1 files changed, 673 insertions, 0 deletions
diff --git a/REORG.TODO/nptl/pthread_cond_wait.c b/REORG.TODO/nptl/pthread_cond_wait.c
new file mode 100644
index 0000000000..7812b94a3a
--- /dev/null
+++ b/REORG.TODO/nptl/pthread_cond_wait.c
@@ -0,0 +1,673 @@
+/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <endian.h>
+#include <errno.h>
+#include <sysdep.h>
+#include <futex-internal.h>
+#include <pthread.h>
+#include <pthreadP.h>
+#include <sys/time.h>
+#include <atomic.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <shlib-compat.h>
+#include <stap-probe.h>
+#include <time.h>
+
+#include "pthread_cond_common.c"
+
+
+struct _condvar_cleanup_buffer
+{
+ uint64_t wseq;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mutex;
+ int private;
+};
+
+
+/* Decrease the waiter reference count. */
+static void
+__condvar_confirm_wakeup (pthread_cond_t *cond, int private)
+{
+ /* If destruction is pending (i.e., the wake-request flag is nonzero) and we
+ are the last waiter (prior value of __wrefs was 1 << 3), then wake any
+ threads waiting in pthread_cond_destroy. Release MO to synchronize with
+ these threads. Don't bother clearing the wake-up request flag. */
+ if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
+ futex_wake (&cond->__data.__wrefs, INT_MAX, private);
+}
+
+
+/* Cancel waiting after having registered as a waiter previously. SEQ is our
+ position and G is our group index.
+ The goal of cancellation is to make our group smaller if that is still
+ possible. If we are in a closed group, this is not possible anymore; in
+ this case, we need to send a replacement signal for the one we effectively
+ consumed because the signal should have gotten consumed by another waiter
+ instead; we must not both cancel waiting and consume a signal.
+
+ Must not be called while still holding a reference on the group.
+
+ Returns true iff we consumed a signal.
+
+ On some kind of timeouts, we may be able to pretend that a signal we
+ effectively consumed happened before the timeout (i.e., similarly to first
+ spinning on signals before actually checking whether the timeout has
+ passed already). Doing this would allow us to skip sending a replacement
+ signal, but this case might happen rarely because the end of the timeout
+ must race with someone else sending a signal. Therefore, we don't bother
+ trying to optimize this. */
+static void
+__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
+ int private)
+{
+ bool consumed_signal = false;
+
+ /* No deadlock with group switching is possible here because we have do
+ not hold a reference on the group. */
+ __condvar_acquire_lock (cond, private);
+
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ if (g1_start > seq)
+ {
+ /* Our group is closed, so someone provided enough signals for it.
+ Thus, we effectively consumed a signal. */
+ consumed_signal = true;
+ }
+ else
+ {
+ if (g1_start + __condvar_get_orig_size (cond) <= seq)
+ {
+ /* We are in the current G2 and thus cannot have consumed a signal.
+ Reduce its effective size or handle overflow. Remember that in
+ G2, unsigned int size is zero or a negative value. */
+ if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
+ {
+ cond->__data.__g_size[g]--;
+ }
+ else
+ {
+ /* Cancellations would overflow the maximum group size. Just
+ wake up everyone spuriously to create a clean state. This
+ also means we do not consume a signal someone else sent. */
+ __condvar_release_lock (cond, private);
+ __pthread_cond_broadcast (cond);
+ return;
+ }
+ }
+ else
+ {
+ /* We are in current G1. If the group's size is zero, someone put
+ a signal in the group that nobody else but us can consume. */
+ if (cond->__data.__g_size[g] == 0)
+ consumed_signal = true;
+ else
+ {
+ /* Otherwise, we decrease the size of the group. This is
+ equivalent to atomically putting in a signal just for us and
+ consuming it right away. We do not consume a signal sent
+ by someone else. We also cannot have consumed a futex
+ wake-up because if we were cancelled or timed out in a futex
+ call, the futex will wake another waiter. */
+ cond->__data.__g_size[g]--;
+ }
+ }
+ }
+
+ __condvar_release_lock (cond, private);
+
+ if (consumed_signal)
+ {
+ /* We effectively consumed a signal even though we didn't want to.
+ Therefore, we need to send a replacement signal.
+ If we would want to optimize this, we could do what
+ pthread_cond_signal does right in the critical section above. */
+ __pthread_cond_signal (cond);
+ }
+}
+
+/* Wake up any signalers that might be waiting. */
+static void
+__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
+{
+ /* Release MO to synchronize-with the acquire load in
+ __condvar_quiesce_and_switch_g1. */
+ if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
+ {
+ /* Clear the wake-up request flag before waking up. We do not need more
+ than relaxed MO and it doesn't matter if we apply this for an aliased
+ group because we wake all futex waiters right after clearing the
+ flag. */
+ atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
+ futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
+ }
+}
+
+/* Clean-up for cancellation of waiters waiting for normal signals. We cancel
+ our registration as a waiter, confirm we have woken up, and re-acquire the
+ mutex. */
+static void
+__condvar_cleanup_waiting (void *arg)
+{
+ struct _condvar_cleanup_buffer *cbuffer =
+ (struct _condvar_cleanup_buffer *) arg;
+ pthread_cond_t *cond = cbuffer->cond;
+ unsigned g = cbuffer->wseq & 1;
+
+ __condvar_dec_grefs (cond, g, cbuffer->private);
+
+ __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
+ /* FIXME With the current cancellation implementation, it is possible that
+ a thread is cancelled after it has returned from a syscall. This could
+ result in a cancelled waiter consuming a futex wake-up that is then
+ causing another waiter in the same group to not wake up. To work around
+ this issue until we have fixed cancellation, just add a futex wake-up
+ conservatively. */
+ futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);
+
+ __condvar_confirm_wakeup (cond, cbuffer->private);
+
+ /* XXX If locking the mutex fails, should we just stop execution? This
+ might be better than silently ignoring the error. */
+ __pthread_mutex_cond_lock (cbuffer->mutex);
+}
+
+/* This condvar implementation guarantees that all calls to signal and
+ broadcast and all of the three virtually atomic parts of each call to wait
+ (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
+ acquiring the mutex) happen in some total order that is consistent with the
+ happens-before relations in the calling program. However, this order does
+ not necessarily result in additional happens-before relations being
+ established (which aligns well with spurious wake-ups being allowed).
+
+ All waiters acquire a certain position in a 64b waiter sequence (__wseq).
+ This sequence determines which waiters are allowed to consume signals.
+ A broadcast is equal to sending as many signals as are unblocked waiters.
+ When a signal arrives, it samples the current value of __wseq with a
+ relaxed-MO load (i.e., the position the next waiter would get). (This is
+ sufficient because it is consistent with happens-before; the caller can
+ enforce stronger ordering constraints by calling signal while holding the
+ mutex.) Only waiters with a position less than the __wseq value observed
+ by the signal are eligible to consume this signal.
+
+ This would be straight-forward to implement if waiters would just spin but
+ we need to let them block using futexes. Futexes give no guarantee of
+ waking in FIFO order, so we cannot reliably wake eligible waiters if we
+ just use a single futex. Also, futex words are 32b in size, but we need
+ to distinguish more than 1<<32 states because we need to represent the
+ order of wake-up (and thus which waiters are eligible to consume signals);
+ blocking in a futex is not atomic with a waiter determining its position in
+ the waiter sequence, so we need the futex word to reliably notify waiters
+ that they should not attempt to block anymore because they have been
+ already signaled in the meantime. While an ABA issue on a 32b value will
+ be rare, ignoring it when we are aware of it is not the right thing to do
+ either.
+
+ Therefore, we use a 64b counter to represent the waiter sequence (on
+ architectures which only support 32b atomics, we use a few bits less).
+ To deal with the blocking using futexes, we maintain two groups of waiters:
+ * Group G1 consists of waiters that are all eligible to consume signals;
+ incoming signals will always signal waiters in this group until all
+ waiters in G1 have been signaled.
+ * Group G2 consists of waiters that arrive when a G1 is present and still
+ contains waiters that have not been signaled. When all waiters in G1
+ are signaled and a new signal arrives, the new signal will convert G2
+ into the new G1 and create a new G2 for future waiters.
+
+ We cannot allocate new memory because of process-shared condvars, so we
+ have just two slots of groups that change their role between G1 and G2.
+ Each has a separate futex word, a number of signals available for
+ consumption, a size (number of waiters in the group that have not been
+ signaled), and a reference count.
+
+ The group reference count is used to maintain the number of waiters that
+ are using the group's futex. Before a group can change its role, the
+ reference count must show that no waiters are using the futex anymore; this
+ prevents ABA issues on the futex word.
+
+ To represent which intervals in the waiter sequence the groups cover (and
+ thus also which group slot contains G1 or G2), we use a 64b counter to
+ designate the start position of G1 (inclusive), and a single bit in the
+ waiter sequence counter to represent which group slot currently contains
+ G2. This allows us to switch group roles atomically wrt. waiters obtaining
+ a position in the waiter sequence. The G1 start position allows waiters to
+ figure out whether they are in a group that has already been completely
+ signaled (i.e., if the current G1 starts at a later position that the
+ waiter's position). Waiters cannot determine whether they are currently
+ in G2 or G1 -- but they do not have too because all they are interested in
+ is whether there are available signals, and they always start in G2 (whose
+ group slot they know because of the bit in the waiter sequence. Signalers
+ will simply fill the right group until it is completely signaled and can
+ be closed (they do not switch group roles until they really have to to
+ decrease the likelihood of having to wait for waiters still holding a
+ reference on the now-closed G1).
+
+ Signalers maintain the initial size of G1 to be able to determine where
+ G2 starts (G2 is always open-ended until it becomes G1). They track the
+ remaining size of a group; when waiters cancel waiting (due to PThreads
+ cancellation or timeouts), they will decrease this remaining size as well.
+
+ To implement condvar destruction requirements (i.e., that
+ pthread_cond_destroy can be called as soon as all waiters have been
+ signaled), waiters increment a reference count before starting to wait and
+ decrement it after they stopped waiting but right before they acquire the
+ mutex associated with the condvar.
+
+ pthread_cond_t thus consists of the following (bits that are used for
+ flags and are not part of the primary value of each field but necessary
+ to make some things atomic or because there was no space for them
+ elsewhere in the data structure):
+
+ __wseq: Waiter sequence counter
+ * LSB is index of current G2.
+ * Waiters fetch-add while having acquire the mutex associated with the
+ condvar. Signalers load it and fetch-xor it concurrently.
+ __g1_start: Starting position of G1 (inclusive)
+ * LSB is index of current G2.
+ * Modified by signalers while having acquired the condvar-internal lock
+ and observed concurrently by waiters.
+ __g1_orig_size: Initial size of G1
+ * The two least-significant bits represent the condvar-internal lock.
+ * Only accessed while having acquired the condvar-internal lock.
+ __wrefs: Waiter reference counter.
+ * Bit 2 is true if waiters should run futex_wake when they remove the
+ last reference. pthread_cond_destroy uses this as futex word.
+ * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
+ * Bit 0 is true iff this is a process-shared condvar.
+ * Simple reference count used by both waiters and pthread_cond_destroy.
+ (If the format of __wrefs is changed, update nptl_lock_constants.pysym
+ and the pretty printers.)
+ For each of the two groups, we have:
+ __g_refs: Futex waiter reference count.
+ * LSB is true if waiters should run futex_wake when they remove the
+ last reference.
+ * Reference count used by waiters concurrently with signalers that have
+ acquired the condvar-internal lock.
+ __g_signals: The number of signals that can still be consumed.
+ * Used as a futex word by waiters. Used concurrently by waiters and
+ signalers.
+ * LSB is true iff this group has been completely signaled (i.e., it is
+ closed).
+ __g_size: Waiters remaining in this group (i.e., which have not been
+ signaled yet.
+ * Accessed by signalers and waiters that cancel waiting (both do so only
+ when having acquired the condvar-internal lock.
+ * The size of G2 is always zero because it cannot be determined until
+ the group becomes G1.
+ * Although this is of unsigned type, we rely on using unsigned overflow
+ rules to make this hold effectively negative values too (in
+ particular, when waiters in G2 cancel waiting).
+
+ A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
+ a condvar that has G2 starting at position 0 and a G1 that is closed.
+
+ Because waiters do not claim ownership of a group right when obtaining a
+ position in __wseq but only reference count the group when using futexes
+ to block, it can happen that a group gets closed before a waiter can
+ increment the reference count. Therefore, waiters have to check whether
+ their group is already closed using __g1_start. They also have to perform
+ this check when spinning when trying to grab a signal from __g_signals.
+ Note that for these checks, using relaxed MO to load __g1_start is
+ sufficient because if a waiter can see a sufficiently large value, it could
+ have also consume a signal in the waiters group.
+
+ Waiters try to grab a signal from __g_signals without holding a reference
+ count, which can lead to stealing a signal from a more recent group after
+ their own group was already closed. They cannot always detect whether they
+ in fact did because they do not know when they stole, but they can
+ conservatively add a signal back to the group they stole from; if they
+ did so unnecessarily, all that happens is a spurious wake-up. To make this
+ even less likely, __g1_start contains the index of the current g2 too,
+ which allows waiters to check if there aliasing on the group slots; if
+ there wasn't, they didn't steal from the current G1, which means that the
+ G1 they stole from must have been already closed and they do not need to
+ fix anything.
+
+ It is essential that the last field in pthread_cond_t is __g_signals[1]:
+ The previous condvar used a pointer-sized field in pthread_cond_t, so a
+ PTHREAD_COND_INITIALIZER from that condvar implementation might only
+ initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
+ in total instead of the 48 we need). __g_signals[1] is not accessed before
+ the first group switch (G2 starts at index 0), which will set its value to
+ zero after a harmless fetch-or whose return value is ignored. This
+ effectively completes initialization.
+
+
+ Limitations:
+ * This condvar isn't designed to allow for more than
+ __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
+ * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
+ supported.
+ * Beyond what is allowed as errors by POSIX or documented, we can also
+ return the following errors:
+ * EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
+ * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
+ for other errors, this can happen when we re-acquire the mutex; this
+ isn't allowed by POSIX (which requires all errors to virtually happen
+ before we release the mutex or change the condvar state), but there's
+ nothing we can do really.
+ * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
+ returned by __pthread_tpp_change_priority. We will already have
+ released the mutex in such cases, so the caller cannot expect to own
+ MUTEX.
+
+ Other notes:
+ * Instead of the normal mutex unlock / lock functions, we use
+ __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
+ because those will not change the mutex-internal users count, so that it
+ can be detected when a condvar is still associated with a particular
+ mutex because there is a waiter blocked on this condvar using this mutex.
+*/
+static __always_inline int
+__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct timespec *abstime)
+{
+ const int maxspin = 0;
+ int err;
+ int result = 0;
+
+ LIBC_PROBE (cond_wait, 2, cond, mutex);
+
+ /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an
+ atomic operation because signals and broadcasts may update the group
+ switch without acquiring the mutex. We do not need release MO here
+ because we do not need to establish any happens-before relation with
+ signalers (see __pthread_cond_signal); modification order alone
+ establishes a total order of waiters/signals. We do need acquire MO
+ to synchronize with group reinitialization in
+ __condvar_quiesce_and_switch_g1. */
+ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
+ /* Find our group's index. We always go into what was G2 when we acquired
+ our position. */
+ unsigned int g = wseq & 1;
+ uint64_t seq = wseq >> 1;
+
+ /* Increase the waiter reference count. Relaxed MO is sufficient because
+ we only need to synchronize when decrementing the reference count. */
+ unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
+ int private = __condvar_get_private (flags);
+
+ /* Now that we are registered as a waiter, we can release the mutex.
+ Waiting on the condvar must be atomic with releasing the mutex, so if
+ the mutex is used to establish a happens-before relation with any
+ signaler, the waiter must be visible to the latter; thus, we release the
+ mutex after registering as waiter.
+ If releasing the mutex fails, we just cancel our registration as a
+ waiter and confirm that we have woken up. */
+ err = __pthread_mutex_unlock_usercnt (mutex, 0);
+ if (__glibc_unlikely (err != 0))
+ {
+ __condvar_cancel_waiting (cond, seq, g, private);
+ __condvar_confirm_wakeup (cond, private);
+ return err;
+ }
+
+ /* Now wait until a signal is available in our group or it is closed.
+ Acquire MO so that if we observe a value of zero written after group
+ switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+ store and will see the prior update of __g1_start done while switching
+ groups too. */
+ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
+
+ do
+ {
+ while (1)
+ {
+ /* Spin-wait first.
+ Note that spinning first without checking whether a timeout
+ passed might lead to what looks like a spurious wake-up even
+ though we should return ETIMEDOUT (e.g., if the caller provides
+ an absolute timeout that is clearly in the past). However,
+ (1) spurious wake-ups are allowed, (2) it seems unlikely that a
+ user will (ab)use pthread_cond_wait as a check for whether a
+ point in time is in the past, and (3) spinning first without
+ having to compare against the current time seems to be the right
+ choice from a performance perspective for most use cases. */
+ unsigned int spin = maxspin;
+ while (signals == 0 && spin > 0)
+ {
+ /* Check that we are not spinning on a group that's already
+ closed. */
+ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+ goto done;
+
+ /* TODO Back off. */
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ spin--;
+ }
+
+ /* If our group will be closed as indicated by the flag on signals,
+ don't bother grabbing a signal. */
+ if (signals & 1)
+ goto done;
+
+ /* If there is an available signal, don't block. */
+ if (signals != 0)
+ break;
+
+ /* No signals available after spinning, so prepare to block.
+ We first acquire a group reference and use acquire MO for that so
+ that we synchronize with the dummy read-modify-write in
+ __condvar_quiesce_and_switch_g1 if we read from that. In turn,
+ in this case this will make us see the closed flag on __g_signals
+ that designates a concurrent attempt to reuse the group's slot.
+ We use acquire MO for the __g_signals check to make the
+ __g1_start check work (see spinning above).
+ Note that the group reference acquisition will not mask the
+ release MO when decrementing the reference count because we use
+ an atomic read-modify-write operation and thus extend the release
+ sequence. */
+ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
+ if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
+ || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+ {
+ /* Our group is closed. Wake up any signalers that might be
+ waiting. */
+ __condvar_dec_grefs (cond, g, private);
+ goto done;
+ }
+
+ // Now block.
+ struct _pthread_cleanup_buffer buffer;
+ struct _condvar_cleanup_buffer cbuffer;
+ cbuffer.wseq = wseq;
+ cbuffer.cond = cond;
+ cbuffer.mutex = mutex;
+ cbuffer.private = private;
+ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+
+ if (abstime == NULL)
+ {
+ /* Block without a timeout. */
+ err = futex_wait_cancelable (
+ cond->__data.__g_signals + g, 0, private);
+ }
+ else
+ {
+ /* Block, but with a timeout.
+ Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ err = ETIMEDOUT;
+
+ else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
+ {
+ /* CLOCK_MONOTONIC is requested. */
+ struct timespec rt;
+ if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
+ __libc_fatal ("clock_gettime does not support "
+ "CLOCK_MONOTONIC");
+ /* Convert the absolute timeout value to a relative
+ timeout. */
+ rt.tv_sec = abstime->tv_sec - rt.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+ /* Did we already time out? */
+ if (__glibc_unlikely (rt.tv_sec < 0))
+ err = ETIMEDOUT;
+ else
+ err = futex_reltimed_wait_cancelable
+ (cond->__data.__g_signals + g, 0, &rt, private);
+ }
+ else
+ {
+ /* Use CLOCK_REALTIME. */
+ err = futex_abstimed_wait_cancelable
+ (cond->__data.__g_signals + g, 0, abstime, private);
+ }
+ }
+
+ __pthread_cleanup_pop (&buffer, 0);
+
+ if (__glibc_unlikely (err == ETIMEDOUT))
+ {
+ __condvar_dec_grefs (cond, g, private);
+ /* If we timed out, we effectively cancel waiting. Note that
+ we have decremented __g_refs before cancellation, so that a
+ deadlock between waiting for quiescence of our group in
+ __condvar_quiesce_and_switch_g1 and us trying to acquire
+ the lock during cancellation is not possible. */
+ __condvar_cancel_waiting (cond, seq, g, private);
+ result = ETIMEDOUT;
+ goto done;
+ }
+ else
+ __condvar_dec_grefs (cond, g, private);
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ }
+
+ }
+ /* Try to grab a signal. Use acquire MO so that we see an up-to-date value
+ of __g1_start below (see spinning above for a similar case). In
+ particular, if we steal from a more recent group, we will also see a
+ more recent __g1_start below. */
+ while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
+ &signals, signals - 2));
+
+ /* We consumed a signal but we could have consumed from a more recent group
+ that aliased with ours due to being in the same group slot. If this
+ might be the case our group must be closed as visible through
+ __g1_start. */
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ if (seq < (g1_start >> 1))
+ {
+ /* We potentially stole a signal from a more recent group but we do not
+ know which group we really consumed from.
+ We do not care about groups older than current G1 because they are
+ closed; we could have stolen from these, but then we just add a
+ spurious wake-up for the current groups.
+ We will never steal a signal from current G2 that was really intended
+ for G2 because G2 never receives signals (until it becomes G1). We
+ could have stolen a signal from G2 that was conservatively added by a
+ previous waiter that also thought it stole a signal -- but given that
+ that signal was added unnecessarily, it's not a problem if we steal
+ it.
+ Thus, the remaining case is that we could have stolen from the current
+ G1, where "current" means the __g1_start value we observed. However,
+ if the current G1 does not have the same slot index as we do, we did
+ not steal from it and do not need to undo that. This is the reason
+ for putting a bit with G2's index into__g1_start as well. */
+ if (((g1_start & 1) ^ 1) == g)
+ {
+ /* We have to conservatively undo our potential mistake of stealing
+ a signal. We can stop trying to do that when the current G1
+ changes because other spinning waiters will notice this too and
+ __condvar_quiesce_and_switch_g1 has checked that there are no
+ futex waiters anymore before switching G1.
+ Relaxed MO is fine for the __g1_start load because we need to
+ merely be able to observe this fact and not have to observe
+ something else as well.
+ ??? Would it help to spin for a little while to see whether the
+ current G1 gets closed? This might be worthwhile if the group is
+ small or close to being closed. */
+ unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
+ while (__condvar_load_g1_start_relaxed (cond) == g1_start)
+ {
+ /* Try to add a signal. We don't need to acquire the lock
+ because at worst we can cause a spurious wake-up. If the
+ group is in the process of being closed (LSB is true), this
+ has an effect similar to us adding a signal. */
+ if (((s & 1) != 0)
+ || atomic_compare_exchange_weak_relaxed
+ (cond->__data.__g_signals + g, &s, s + 2))
+ {
+ /* If we added a signal, we also need to add a wake-up on
+ the futex. We also need to do that if we skipped adding
+ a signal because the group is being closed because
+ while __condvar_quiesce_and_switch_g1 could have closed
+ the group, it might stil be waiting for futex waiters to
+ leave (and one of those waiters might be the one we stole
+ the signal from, which cause it to block using the
+ futex). */
+ futex_wake (cond->__data.__g_signals + g, 1, private);
+ break;
+ }
+ /* TODO Back off. */
+ }
+ }
+ }
+
+ done:
+
+ /* Confirm that we have been woken. We do that before acquiring the mutex
+ to allow for execution of pthread_cond_destroy while having acquired the
+ mutex. */
+ __condvar_confirm_wakeup (cond, private);
+
+ /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,
+ which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */
+ err = __pthread_mutex_cond_lock (mutex);
+ /* XXX Abort on errors that are disallowed by POSIX? */
+ return (err != 0) ? err : result;
+}
+
+
+/* See __pthread_cond_wait_common. */
+int
+__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+ return __pthread_cond_wait_common (cond, mutex, NULL);
+}
+
+/* See __pthread_cond_wait_common. */
+int
+__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct timespec *abstime)
+{
+ /* Check parameter validity. This should also tell the compiler that
+ it can assume that abstime is not NULL. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+ return __pthread_cond_wait_common (cond, mutex, abstime);
+}
+
+versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
+ GLIBC_2_3_2);
+versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
+ GLIBC_2_3_2);