Blame nptl/pthread_cond_wait.c

Packit 6c4009
/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <endian.h>
Packit 6c4009
#include <errno.h>
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include <futex-internal.h>
Packit 6c4009
#include <pthread.h>
Packit 6c4009
#include <pthreadP.h>
Packit 6c4009
#include <sys/time.h>
Packit 6c4009
#include <atomic.h>
Packit 6c4009
#include <stdint.h>
Packit 6c4009
#include <stdbool.h>
Packit 6c4009
Packit 6c4009
#include <shlib-compat.h>
Packit 6c4009
#include <stap-probe.h>
Packit 6c4009
#include <time.h>
Packit 6c4009
Packit 6c4009
#include "pthread_cond_common.c"
Packit 6c4009
Packit 6c4009
Packit 6c4009
struct _condvar_cleanup_buffer
Packit 6c4009
{
Packit 6c4009
  uint64_t wseq;
Packit 6c4009
  pthread_cond_t *cond;
Packit 6c4009
  pthread_mutex_t *mutex;
Packit 6c4009
  int private;
Packit 6c4009
};
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Decrease the waiter reference count.  */
Packit 6c4009
static void
Packit 6c4009
__condvar_confirm_wakeup (pthread_cond_t *cond, int private)
Packit 6c4009
{
Packit 6c4009
  /* If destruction is pending (i.e., the wake-request flag is nonzero) and we
Packit 6c4009
     are the last waiter (prior value of __wrefs was 1 << 3), then wake any
Packit 6c4009
     threads waiting in pthread_cond_destroy.  Release MO to synchronize with
Packit 6c4009
     these threads.  Don't bother clearing the wake-up request flag.  */
Packit 6c4009
  if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
Packit 6c4009
    futex_wake (&cond->__data.__wrefs, INT_MAX, private);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Cancel waiting after having registered as a waiter previously.  SEQ is our
Packit 6c4009
   position and G is our group index.
Packit 6c4009
   The goal of cancellation is to make our group smaller if that is still
Packit 6c4009
   possible.  If we are in a closed group, this is not possible anymore; in
Packit 6c4009
   this case, we need to send a replacement signal for the one we effectively
Packit 6c4009
   consumed because the signal should have gotten consumed by another waiter
Packit 6c4009
   instead; we must not both cancel waiting and consume a signal.
Packit 6c4009
Packit 6c4009
   Must not be called while still holding a reference on the group.
Packit 6c4009
Packit 6c4009
   Returns true iff we consumed a signal.
Packit 6c4009
Packit 6c4009
   On some kind of timeouts, we may be able to pretend that a signal we
Packit 6c4009
   effectively consumed happened before the timeout (i.e., similarly to first
Packit 6c4009
   spinning on signals before actually checking whether the timeout has
Packit 6c4009
   passed already).  Doing this would allow us to skip sending a replacement
Packit 6c4009
   signal, but this case might happen rarely because the end of the timeout
Packit 6c4009
   must race with someone else sending a signal.  Therefore, we don't bother
Packit 6c4009
   trying to optimize this.  */
Packit 6c4009
static void
Packit 6c4009
__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
Packit 6c4009
			  int private)
Packit 6c4009
{
Packit 6c4009
  bool consumed_signal = false;
Packit 6c4009
Packit 6c4009
  /* No deadlock with group switching is possible here because we have do
Packit 6c4009
     not hold a reference on the group.  */
Packit 6c4009
  __condvar_acquire_lock (cond, private);
Packit 6c4009
Packit 6c4009
  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
Packit 6c4009
  if (g1_start > seq)
Packit 6c4009
    {
Packit 6c4009
      /* Our group is closed, so someone provided enough signals for it.
Packit 6c4009
	 Thus, we effectively consumed a signal.  */
Packit 6c4009
      consumed_signal = true;
Packit 6c4009
    }
Packit 6c4009
  else
Packit 6c4009
    {
Packit 6c4009
      if (g1_start + __condvar_get_orig_size (cond) <= seq)
Packit 6c4009
	{
Packit 6c4009
	  /* We are in the current G2 and thus cannot have consumed a signal.
Packit 6c4009
	     Reduce its effective size or handle overflow.  Remember that in
Packit 6c4009
	     G2, unsigned int size is zero or a negative value.  */
Packit 6c4009
	  if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
Packit 6c4009
	    {
Packit 6c4009
	      cond->__data.__g_size[g]--;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* Cancellations would overflow the maximum group size.  Just
Packit 6c4009
		 wake up everyone spuriously to create a clean state.  This
Packit 6c4009
		 also means we do not consume a signal someone else sent.  */
Packit 6c4009
	      __condvar_release_lock (cond, private);
Packit 6c4009
	      __pthread_cond_broadcast (cond);
Packit 6c4009
	      return;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  /* We are in current G1.  If the group's size is zero, someone put
Packit 6c4009
	     a signal in the group that nobody else but us can consume.  */
Packit 6c4009
	  if (cond->__data.__g_size[g] == 0)
Packit 6c4009
	    consumed_signal = true;
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* Otherwise, we decrease the size of the group.  This is
Packit 6c4009
		 equivalent to atomically putting in a signal just for us and
Packit 6c4009
		 consuming it right away.  We do not consume a signal sent
Packit 6c4009
		 by someone else.  We also cannot have consumed a futex
Packit 6c4009
		 wake-up because if we were cancelled or timed out in a futex
Packit 6c4009
		 call, the futex will wake another waiter.  */
Packit 6c4009
	      cond->__data.__g_size[g]--;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  __condvar_release_lock (cond, private);
Packit 6c4009
Packit 6c4009
  if (consumed_signal)
Packit 6c4009
    {
Packit 6c4009
      /* We effectively consumed a signal even though we didn't want to.
Packit 6c4009
	 Therefore, we need to send a replacement signal.
Packit 6c4009
	 If we would want to optimize this, we could do what
Packit 6c4009
	 pthread_cond_signal does right in the critical section above.  */
Packit 6c4009
      __pthread_cond_signal (cond);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Wake up any signalers that might be waiting.  */
Packit 6c4009
static void
Packit 6c4009
__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
Packit 6c4009
{
Packit 6c4009
  /* Release MO to synchronize-with the acquire load in
Packit 6c4009
     __condvar_quiesce_and_switch_g1.  */
Packit 6c4009
  if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
Packit 6c4009
    {
Packit 6c4009
      /* Clear the wake-up request flag before waking up.  We do not need more
Packit 6c4009
	 than relaxed MO and it doesn't matter if we apply this for an aliased
Packit 6c4009
	 group because we wake all futex waiters right after clearing the
Packit 6c4009
	 flag.  */
Packit 6c4009
      atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
Packit 6c4009
      futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
Packit 6c4009
    }
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* Clean-up for cancellation of waiters waiting for normal signals.  We cancel
Packit 6c4009
   our registration as a waiter, confirm we have woken up, and re-acquire the
Packit 6c4009
   mutex.  */
Packit 6c4009
static void
Packit 6c4009
__condvar_cleanup_waiting (void *arg)
Packit 6c4009
{
Packit 6c4009
  struct _condvar_cleanup_buffer *cbuffer =
Packit 6c4009
    (struct _condvar_cleanup_buffer *) arg;
Packit 6c4009
  pthread_cond_t *cond = cbuffer->cond;
Packit 6c4009
  unsigned g = cbuffer->wseq & 1;
Packit 6c4009
Packit 6c4009
  __condvar_dec_grefs (cond, g, cbuffer->private);
Packit 6c4009
Packit 6c4009
  __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
Packit 6c4009
  /* FIXME With the current cancellation implementation, it is possible that
Packit 6c4009
     a thread is cancelled after it has returned from a syscall.  This could
Packit 6c4009
     result in a cancelled waiter consuming a futex wake-up that is then
Packit 6c4009
     causing another waiter in the same group to not wake up.  To work around
Packit 6c4009
     this issue until we have fixed cancellation, just add a futex wake-up
Packit 6c4009
     conservatively.  */
Packit 6c4009
  futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);
Packit 6c4009
Packit 6c4009
  __condvar_confirm_wakeup (cond, cbuffer->private);
Packit 6c4009
Packit 6c4009
  /* XXX If locking the mutex fails, should we just stop execution?  This
Packit 6c4009
     might be better than silently ignoring the error.  */
Packit 6c4009
  __pthread_mutex_cond_lock (cbuffer->mutex);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* This condvar implementation guarantees that all calls to signal and
Packit 6c4009
   broadcast and all of the three virtually atomic parts of each call to wait
Packit 6c4009
   (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
Packit 6c4009
   acquiring the mutex) happen in some total order that is consistent with the
Packit 6c4009
   happens-before relations in the calling program.  However, this order does
Packit 6c4009
   not necessarily result in additional happens-before relations being
Packit 6c4009
   established (which aligns well with spurious wake-ups being allowed).
Packit 6c4009
Packit 6c4009
   All waiters acquire a certain position in a 64b waiter sequence (__wseq).
Packit 6c4009
   This sequence determines which waiters are allowed to consume signals.
Packit 6c4009
   A broadcast is equal to sending as many signals as are unblocked waiters.
Packit 6c4009
   When a signal arrives, it samples the current value of __wseq with a
Packit 6c4009
   relaxed-MO load (i.e., the position the next waiter would get).  (This is
Packit 6c4009
   sufficient because it is consistent with happens-before; the caller can
Packit 6c4009
   enforce stronger ordering constraints by calling signal while holding the
Packit 6c4009
   mutex.)  Only waiters with a position less than the __wseq value observed
Packit 6c4009
   by the signal are eligible to consume this signal.
Packit 6c4009
Packit 6c4009
   This would be straight-forward to implement if waiters would just spin but
Packit 6c4009
   we need to let them block using futexes.  Futexes give no guarantee of
Packit 6c4009
   waking in FIFO order, so we cannot reliably wake eligible waiters if we
Packit 6c4009
   just use a single futex.  Also, futex words are 32b in size, but we need
Packit 6c4009
   to distinguish more than 1<<32 states because we need to represent the
Packit 6c4009
   order of wake-up (and thus which waiters are eligible to consume signals);
Packit 6c4009
   blocking in a futex is not atomic with a waiter determining its position in
Packit 6c4009
   the waiter sequence, so we need the futex word to reliably notify waiters
Packit 6c4009
   that they should not attempt to block anymore because they have been
Packit 6c4009
   already signaled in the meantime.  While an ABA issue on a 32b value will
Packit 6c4009
   be rare, ignoring it when we are aware of it is not the right thing to do
Packit 6c4009
   either.
Packit 6c4009
Packit 6c4009
   Therefore, we use a 64b counter to represent the waiter sequence (on
Packit 6c4009
   architectures which only support 32b atomics, we use a few bits less).
Packit 6c4009
   To deal with the blocking using futexes, we maintain two groups of waiters:
Packit 6c4009
   * Group G1 consists of waiters that are all eligible to consume signals;
Packit 6c4009
     incoming signals will always signal waiters in this group until all
Packit 6c4009
     waiters in G1 have been signaled.
Packit 6c4009
   * Group G2 consists of waiters that arrive when a G1 is present and still
Packit 6c4009
     contains waiters that have not been signaled.  When all waiters in G1
Packit 6c4009
     are signaled and a new signal arrives, the new signal will convert G2
Packit 6c4009
     into the new G1 and create a new G2 for future waiters.
Packit 6c4009
Packit 6c4009
   We cannot allocate new memory because of process-shared condvars, so we
Packit 6c4009
   have just two slots of groups that change their role between G1 and G2.
Packit 6c4009
   Each has a separate futex word, a number of signals available for
Packit 6c4009
   consumption, a size (number of waiters in the group that have not been
Packit 6c4009
   signaled), and a reference count.
Packit 6c4009
Packit 6c4009
   The group reference count is used to maintain the number of waiters that
Packit 6c4009
   are using the group's futex.  Before a group can change its role, the
Packit 6c4009
   reference count must show that no waiters are using the futex anymore; this
Packit 6c4009
   prevents ABA issues on the futex word.
Packit 6c4009
Packit 6c4009
   To represent which intervals in the waiter sequence the groups cover (and
Packit 6c4009
   thus also which group slot contains G1 or G2), we use a 64b counter to
Packit 6c4009
   designate the start position of G1 (inclusive), and a single bit in the
Packit 6c4009
   waiter sequence counter to represent which group slot currently contains
Packit 6c4009
   G2.  This allows us to switch group roles atomically wrt. waiters obtaining
Packit 6c4009
   a position in the waiter sequence.  The G1 start position allows waiters to
Packit 6c4009
   figure out whether they are in a group that has already been completely
Packit 6c4009
   signaled (i.e., if the current G1 starts at a later position that the
Packit 6c4009
   waiter's position).  Waiters cannot determine whether they are currently
Packit 6c4009
   in G2 or G1 -- but they do not have too because all they are interested in
Packit 6c4009
   is whether there are available signals, and they always start in G2 (whose
Packit 6c4009
   group slot they know because of the bit in the waiter sequence.  Signalers
Packit 6c4009
   will simply fill the right group until it is completely signaled and can
Packit 6c4009
   be closed (they do not switch group roles until they really have to to
Packit 6c4009
   decrease the likelihood of having to wait for waiters still holding a
Packit 6c4009
   reference on the now-closed G1).
Packit 6c4009
Packit 6c4009
   Signalers maintain the initial size of G1 to be able to determine where
Packit 6c4009
   G2 starts (G2 is always open-ended until it becomes G1).  They track the
Packit 6c4009
   remaining size of a group; when waiters cancel waiting (due to PThreads
Packit 6c4009
   cancellation or timeouts), they will decrease this remaining size as well.
Packit 6c4009
Packit 6c4009
   To implement condvar destruction requirements (i.e., that
Packit 6c4009
   pthread_cond_destroy can be called as soon as all waiters have been
Packit 6c4009
   signaled), waiters increment a reference count before starting to wait and
Packit 6c4009
   decrement it after they stopped waiting but right before they acquire the
Packit 6c4009
   mutex associated with the condvar.
Packit 6c4009
Packit 6c4009
   pthread_cond_t thus consists of the following (bits that are used for
Packit 6c4009
   flags and are not part of the primary value of each field but necessary
Packit 6c4009
   to make some things atomic or because there was no space for them
Packit 6c4009
   elsewhere in the data structure):
Packit 6c4009
Packit 6c4009
   __wseq: Waiter sequence counter
Packit 6c4009
     * LSB is index of current G2.
Packit 6c4009
     * Waiters fetch-add while having acquire the mutex associated with the
Packit 6c4009
       condvar.  Signalers load it and fetch-xor it concurrently.
Packit 6c4009
   __g1_start: Starting position of G1 (inclusive)
Packit 6c4009
     * LSB is index of current G2.
Packit 6c4009
     * Modified by signalers while having acquired the condvar-internal lock
Packit 6c4009
       and observed concurrently by waiters.
Packit 6c4009
   __g1_orig_size: Initial size of G1
Packit 6c4009
     * The two least-significant bits represent the condvar-internal lock.
Packit 6c4009
     * Only accessed while having acquired the condvar-internal lock.
Packit 6c4009
   __wrefs: Waiter reference counter.
Packit 6c4009
     * Bit 2 is true if waiters should run futex_wake when they remove the
Packit 6c4009
       last reference.  pthread_cond_destroy uses this as futex word.
Packit 6c4009
     * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
Packit 6c4009
     * Bit 0 is true iff this is a process-shared condvar.
Packit 6c4009
     * Simple reference count used by both waiters and pthread_cond_destroy.
Packit 6c4009
     (If the format of __wrefs is changed, update nptl_lock_constants.pysym
Packit 6c4009
      and the pretty printers.)
Packit 6c4009
   For each of the two groups, we have:
Packit 6c4009
   __g_refs: Futex waiter reference count.
Packit 6c4009
     * LSB is true if waiters should run futex_wake when they remove the
Packit 6c4009
       last reference.
Packit 6c4009
     * Reference count used by waiters concurrently with signalers that have
Packit 6c4009
       acquired the condvar-internal lock.
Packit 6c4009
   __g_signals: The number of signals that can still be consumed.
Packit 6c4009
     * Used as a futex word by waiters.  Used concurrently by waiters and
Packit 6c4009
       signalers.
Packit 6c4009
     * LSB is true iff this group has been completely signaled (i.e., it is
Packit 6c4009
       closed).
Packit 6c4009
   __g_size: Waiters remaining in this group (i.e., which have not been
Packit 6c4009
     signaled yet.
Packit 6c4009
     * Accessed by signalers and waiters that cancel waiting (both do so only
Packit 6c4009
       when having acquired the condvar-internal lock.
Packit 6c4009
     * The size of G2 is always zero because it cannot be determined until
Packit 6c4009
       the group becomes G1.
Packit 6c4009
     * Although this is of unsigned type, we rely on using unsigned overflow
Packit 6c4009
       rules to make this hold effectively negative values too (in
Packit 6c4009
       particular, when waiters in G2 cancel waiting).
Packit 6c4009
Packit 6c4009
   A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
Packit 6c4009
   a condvar that has G2 starting at position 0 and a G1 that is closed.
Packit 6c4009
Packit 6c4009
   Because waiters do not claim ownership of a group right when obtaining a
Packit 6c4009
   position in __wseq but only reference count the group when using futexes
Packit 6c4009
   to block, it can happen that a group gets closed before a waiter can
Packit 6c4009
   increment the reference count.  Therefore, waiters have to check whether
Packit 6c4009
   their group is already closed using __g1_start.  They also have to perform
Packit 6c4009
   this check when spinning when trying to grab a signal from __g_signals.
Packit 6c4009
   Note that for these checks, using relaxed MO to load __g1_start is
Packit 6c4009
   sufficient because if a waiter can see a sufficiently large value, it could
Packit 6c4009
   have also consume a signal in the waiters group.
Packit 6c4009
Packit 6c4009
   Waiters try to grab a signal from __g_signals without holding a reference
Packit 6c4009
   count, which can lead to stealing a signal from a more recent group after
Packit 6c4009
   their own group was already closed.  They cannot always detect whether they
Packit 6c4009
   in fact did because they do not know when they stole, but they can
Packit 6c4009
   conservatively add a signal back to the group they stole from; if they
Packit 6c4009
   did so unnecessarily, all that happens is a spurious wake-up.  To make this
Packit 6c4009
   even less likely, __g1_start contains the index of the current g2 too,
Packit 6c4009
   which allows waiters to check if there aliasing on the group slots; if
Packit 6c4009
   there wasn't, they didn't steal from the current G1, which means that the
Packit 6c4009
   G1 they stole from must have been already closed and they do not need to
Packit 6c4009
   fix anything.
Packit 6c4009
Packit 6c4009
   It is essential that the last field in pthread_cond_t is __g_signals[1]:
Packit 6c4009
   The previous condvar used a pointer-sized field in pthread_cond_t, so a
Packit 6c4009
   PTHREAD_COND_INITIALIZER from that condvar implementation might only
Packit 6c4009
   initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
Packit 6c4009
   in total instead of the 48 we need).  __g_signals[1] is not accessed before
Packit 6c4009
   the first group switch (G2 starts at index 0), which will set its value to
Packit 6c4009
   zero after a harmless fetch-or whose return value is ignored.  This
Packit 6c4009
   effectively completes initialization.
Packit 6c4009
Packit 6c4009
Packit 6c4009
   Limitations:
Packit 6c4009
   * This condvar isn't designed to allow for more than
Packit 6c4009
     __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
Packit 6c4009
   * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
Packit 6c4009
     supported.
Packit 6c4009
   * Beyond what is allowed as errors by POSIX or documented, we can also
Packit 6c4009
     return the following errors:
Packit 6c4009
     * EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
Packit 6c4009
     * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes.  Unlike
Packit 6c4009
       for other errors, this can happen when we re-acquire the mutex; this
Packit 6c4009
       isn't allowed by POSIX (which requires all errors to virtually happen
Packit 6c4009
       before we release the mutex or change the condvar state), but there's
Packit 6c4009
       nothing we can do really.
Packit 6c4009
     * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
Packit 6c4009
       returned by __pthread_tpp_change_priority.  We will already have
Packit 6c4009
       released the mutex in such cases, so the caller cannot expect to own
Packit 6c4009
       MUTEX.
Packit 6c4009
Packit 6c4009
   Other notes:
Packit 6c4009
   * Instead of the normal mutex unlock / lock functions, we use
Packit 6c4009
     __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
Packit 6c4009
     because those will not change the mutex-internal users count, so that it
Packit 6c4009
     can be detected when a condvar is still associated with a particular
Packit 6c4009
     mutex because there is a waiter blocked on this condvar using this mutex.
Packit 6c4009
*/
Packit 6c4009
static __always_inline int
Packit 6c4009
__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
Packit 6c4009
    const struct timespec *abstime)
Packit 6c4009
{
Packit 6c4009
  const int maxspin = 0;
Packit 6c4009
  int err;
Packit 6c4009
  int result = 0;
Packit 6c4009
Packit 6c4009
  LIBC_PROBE (cond_wait, 2, cond, mutex);
Packit 6c4009
Packit 6c4009
  /* Acquire a position (SEQ) in the waiter sequence (WSEQ).  We use an
Packit 6c4009
     atomic operation because signals and broadcasts may update the group
Packit 6c4009
     switch without acquiring the mutex.  We do not need release MO here
Packit 6c4009
     because we do not need to establish any happens-before relation with
Packit 6c4009
     signalers (see __pthread_cond_signal); modification order alone
Packit 6c4009
     establishes a total order of waiters/signals.  We do need acquire MO
Packit 6c4009
     to synchronize with group reinitialization in
Packit 6c4009
     __condvar_quiesce_and_switch_g1.  */
Packit 6c4009
  uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
Packit 6c4009
  /* Find our group's index.  We always go into what was G2 when we acquired
Packit 6c4009
     our position.  */
Packit 6c4009
  unsigned int g = wseq & 1;
Packit 6c4009
  uint64_t seq = wseq >> 1;
Packit 6c4009
Packit 6c4009
  /* Increase the waiter reference count.  Relaxed MO is sufficient because
Packit 6c4009
     we only need to synchronize when decrementing the reference count.  */
Packit 6c4009
  unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
Packit 6c4009
  int private = __condvar_get_private (flags);
Packit 6c4009
Packit 6c4009
  /* Now that we are registered as a waiter, we can release the mutex.
Packit 6c4009
     Waiting on the condvar must be atomic with releasing the mutex, so if
Packit 6c4009
     the mutex is used to establish a happens-before relation with any
Packit 6c4009
     signaler, the waiter must be visible to the latter; thus, we release the
Packit 6c4009
     mutex after registering as waiter.
Packit 6c4009
     If releasing the mutex fails, we just cancel our registration as a
Packit 6c4009
     waiter and confirm that we have woken up.  */
Packit 6c4009
  err = __pthread_mutex_unlock_usercnt (mutex, 0);
Packit 6c4009
  if (__glibc_unlikely (err != 0))
Packit 6c4009
    {
Packit 6c4009
      __condvar_cancel_waiting (cond, seq, g, private);
Packit 6c4009
      __condvar_confirm_wakeup (cond, private);
Packit 6c4009
      return err;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Now wait until a signal is available in our group or it is closed.
Packit 6c4009
     Acquire MO so that if we observe a value of zero written after group
Packit 6c4009
     switching in __condvar_quiesce_and_switch_g1, we synchronize with that
Packit 6c4009
     store and will see the prior update of __g1_start done while switching
Packit 6c4009
     groups too.  */
Packit 6c4009
  unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
Packit 6c4009
Packit 6c4009
  do
Packit 6c4009
    {
Packit 6c4009
      while (1)
Packit 6c4009
	{
Packit 6c4009
	  /* Spin-wait first.
Packit 6c4009
	     Note that spinning first without checking whether a timeout
Packit 6c4009
	     passed might lead to what looks like a spurious wake-up even
Packit 6c4009
	     though we should return ETIMEDOUT (e.g., if the caller provides
Packit 6c4009
	     an absolute timeout that is clearly in the past).  However,
Packit 6c4009
	     (1) spurious wake-ups are allowed, (2) it seems unlikely that a
Packit 6c4009
	     user will (ab)use pthread_cond_wait as a check for whether a
Packit 6c4009
	     point in time is in the past, and (3) spinning first without
Packit 6c4009
	     having to compare against the current time seems to be the right
Packit 6c4009
	     choice from a performance perspective for most use cases.  */
Packit 6c4009
	  unsigned int spin = maxspin;
Packit 6c4009
	  while (signals == 0 && spin > 0)
Packit 6c4009
	    {
Packit 6c4009
	      /* Check that we are not spinning on a group that's already
Packit 6c4009
		 closed.  */
Packit 6c4009
	      if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
Packit 6c4009
		goto done;
Packit 6c4009
Packit 6c4009
	      /* TODO Back off.  */
Packit 6c4009
Packit 6c4009
	      /* Reload signals.  See above for MO.  */
Packit 6c4009
	      signals = atomic_load_acquire (cond->__data.__g_signals + g);
Packit 6c4009
	      spin--;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* If our group will be closed as indicated by the flag on signals,
Packit 6c4009
	     don't bother grabbing a signal.  */
Packit 6c4009
	  if (signals & 1)
Packit 6c4009
	    goto done;
Packit 6c4009
Packit 6c4009
	  /* If there is an available signal, don't block.  */
Packit 6c4009
	  if (signals != 0)
Packit 6c4009
	    break;
Packit 6c4009
Packit 6c4009
	  /* No signals available after spinning, so prepare to block.
Packit 6c4009
	     We first acquire a group reference and use acquire MO for that so
Packit 6c4009
	     that we synchronize with the dummy read-modify-write in
Packit 6c4009
	     __condvar_quiesce_and_switch_g1 if we read from that.  In turn,
Packit 6c4009
	     in this case this will make us see the closed flag on __g_signals
Packit 6c4009
	     that designates a concurrent attempt to reuse the group's slot.
Packit 6c4009
	     We use acquire MO for the __g_signals check to make the
Packit 6c4009
	     __g1_start check work (see spinning above).
Packit 6c4009
	     Note that the group reference acquisition will not mask the
Packit 6c4009
	     release MO when decrementing the reference count because we use
Packit 6c4009
	     an atomic read-modify-write operation and thus extend the release
Packit 6c4009
	     sequence.  */
Packit 6c4009
	  atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
Packit 6c4009
	  if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
Packit 6c4009
	      || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
Packit 6c4009
	    {
Packit 6c4009
	      /* Our group is closed.  Wake up any signalers that might be
Packit 6c4009
		 waiting.  */
Packit 6c4009
	      __condvar_dec_grefs (cond, g, private);
Packit 6c4009
	      goto done;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  // Now block.
Packit 6c4009
	  struct _pthread_cleanup_buffer buffer;
Packit 6c4009
	  struct _condvar_cleanup_buffer cbuffer;
Packit 6c4009
	  cbuffer.wseq = wseq;
Packit 6c4009
	  cbuffer.cond = cond;
Packit 6c4009
	  cbuffer.mutex = mutex;
Packit 6c4009
	  cbuffer.private = private;
Packit 6c4009
	  __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
Packit 6c4009
Packit 6c4009
	  if (abstime == NULL)
Packit 6c4009
	    {
Packit 6c4009
	      /* Block without a timeout.  */
Packit 6c4009
	      err = futex_wait_cancelable (
Packit 6c4009
		  cond->__data.__g_signals + g, 0, private);
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
	      /* Block, but with a timeout.
Packit 6c4009
		 Work around the fact that the kernel rejects negative timeout
Packit 6c4009
		 values despite them being valid.  */
Packit 6c4009
	      if (__glibc_unlikely (abstime->tv_sec < 0))
Packit 6c4009
	        err = ETIMEDOUT;
Packit 6c4009
Packit 6c4009
	      else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
Packit 6c4009
		{
Packit 6c4009
		  /* CLOCK_MONOTONIC is requested.  */
Packit 6c4009
		  struct timespec rt;
Packit 6c4009
		  if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
Packit 6c4009
		    __libc_fatal ("clock_gettime does not support "
Packit Service 88f876
				  "CLOCK_MONOTONIC\n");
Packit 6c4009
		  /* Convert the absolute timeout value to a relative
Packit 6c4009
		     timeout.  */
Packit 6c4009
		  rt.tv_sec = abstime->tv_sec - rt.tv_sec;
Packit 6c4009
		  rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
Packit 6c4009
		  if (rt.tv_nsec < 0)
Packit 6c4009
		    {
Packit 6c4009
		      rt.tv_nsec += 1000000000;
Packit 6c4009
		      --rt.tv_sec;
Packit 6c4009
		    }
Packit 6c4009
		  /* Did we already time out?  */
Packit 6c4009
		  if (__glibc_unlikely (rt.tv_sec < 0))
Packit 6c4009
		    err = ETIMEDOUT;
Packit 6c4009
		  else
Packit 6c4009
		    err = futex_reltimed_wait_cancelable
Packit 6c4009
			(cond->__data.__g_signals + g, 0, &rt, private);
Packit 6c4009
		}
Packit 6c4009
	      else
Packit 6c4009
		{
Packit 6c4009
		  /* Use CLOCK_REALTIME.  */
Packit 6c4009
		  err = futex_abstimed_wait_cancelable
Packit 6c4009
		      (cond->__data.__g_signals + g, 0, abstime, private);
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  __pthread_cleanup_pop (&buffer, 0);
Packit 6c4009
Packit 6c4009
	  if (__glibc_unlikely (err == ETIMEDOUT))
Packit 6c4009
	    {
Packit 6c4009
	      __condvar_dec_grefs (cond, g, private);
Packit 6c4009
	      /* If we timed out, we effectively cancel waiting.  Note that
Packit 6c4009
		 we have decremented __g_refs before cancellation, so that a
Packit 6c4009
		 deadlock between waiting for quiescence of our group in
Packit 6c4009
		 __condvar_quiesce_and_switch_g1 and us trying to acquire
Packit 6c4009
		 the lock during cancellation is not possible.  */
Packit 6c4009
	      __condvar_cancel_waiting (cond, seq, g, private);
Packit 6c4009
	      result = ETIMEDOUT;
Packit 6c4009
	      goto done;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    __condvar_dec_grefs (cond, g, private);
Packit 6c4009
Packit 6c4009
	  /* Reload signals.  See above for MO.  */
Packit 6c4009
	  signals = atomic_load_acquire (cond->__data.__g_signals + g);
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
    }
Packit 6c4009
  /* Try to grab a signal.  Use acquire MO so that we see an up-to-date value
Packit 6c4009
     of __g1_start below (see spinning above for a similar case).  In
Packit 6c4009
     particular, if we steal from a more recent group, we will also see a
Packit 6c4009
     more recent __g1_start below.  */
Packit 6c4009
  while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
Packit 6c4009
						&signals, signals - 2));
Packit 6c4009
Packit 6c4009
  /* We consumed a signal but we could have consumed from a more recent group
Packit 6c4009
     that aliased with ours due to being in the same group slot.  If this
Packit 6c4009
     might be the case our group must be closed as visible through
Packit 6c4009
     __g1_start.  */
Packit 6c4009
  uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
Packit 6c4009
  if (seq < (g1_start >> 1))
Packit 6c4009
    {
Packit 6c4009
      /* We potentially stole a signal from a more recent group but we do not
Packit 6c4009
	 know which group we really consumed from.
Packit 6c4009
	 We do not care about groups older than current G1 because they are
Packit 6c4009
	 closed; we could have stolen from these, but then we just add a
Packit 6c4009
	 spurious wake-up for the current groups.
Packit 6c4009
	 We will never steal a signal from current G2 that was really intended
Packit 6c4009
	 for G2 because G2 never receives signals (until it becomes G1).  We
Packit 6c4009
	 could have stolen a signal from G2 that was conservatively added by a
Packit 6c4009
	 previous waiter that also thought it stole a signal -- but given that
Packit 6c4009
	 that signal was added unnecessarily, it's not a problem if we steal
Packit 6c4009
	 it.
Packit 6c4009
	 Thus, the remaining case is that we could have stolen from the current
Packit 6c4009
	 G1, where "current" means the __g1_start value we observed.  However,
Packit 6c4009
	 if the current G1 does not have the same slot index as we do, we did
Packit 6c4009
	 not steal from it and do not need to undo that.  This is the reason
Packit 6c4009
	 for putting a bit with G2's index into__g1_start as well.  */
Packit 6c4009
      if (((g1_start & 1) ^ 1) == g)
Packit 6c4009
	{
Packit 6c4009
	  /* We have to conservatively undo our potential mistake of stealing
Packit 6c4009
	     a signal.  We can stop trying to do that when the current G1
Packit 6c4009
	     changes because other spinning waiters will notice this too and
Packit 6c4009
	     __condvar_quiesce_and_switch_g1 has checked that there are no
Packit 6c4009
	     futex waiters anymore before switching G1.
Packit 6c4009
	     Relaxed MO is fine for the __g1_start load because we need to
Packit 6c4009
	     merely be able to observe this fact and not have to observe
Packit 6c4009
	     something else as well.
Packit 6c4009
	     ??? Would it help to spin for a little while to see whether the
Packit 6c4009
	     current G1 gets closed?  This might be worthwhile if the group is
Packit 6c4009
	     small or close to being closed.  */
Packit 6c4009
	  unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
Packit 6c4009
	  while (__condvar_load_g1_start_relaxed (cond) == g1_start)
Packit 6c4009
	    {
Packit 6c4009
	      /* Try to add a signal.  We don't need to acquire the lock
Packit 6c4009
		 because at worst we can cause a spurious wake-up.  If the
Packit 6c4009
		 group is in the process of being closed (LSB is true), this
Packit 6c4009
		 has an effect similar to us adding a signal.  */
Packit 6c4009
	      if (((s & 1) != 0)
Packit 6c4009
		  || atomic_compare_exchange_weak_relaxed
Packit 6c4009
		       (cond->__data.__g_signals + g, &s, s + 2))
Packit 6c4009
		{
Packit 6c4009
		  /* If we added a signal, we also need to add a wake-up on
Packit 6c4009
		     the futex.  We also need to do that if we skipped adding
Packit 6c4009
		     a signal because the group is being closed because
Packit 6c4009
		     while __condvar_quiesce_and_switch_g1 could have closed
Packit 6c4009
		     the group, it might stil be waiting for futex waiters to
Packit 6c4009
		     leave (and one of those waiters might be the one we stole
Packit 6c4009
		     the signal from, which cause it to block using the
Packit 6c4009
		     futex).  */
Packit 6c4009
		  futex_wake (cond->__data.__g_signals + g, 1, private);
Packit 6c4009
		  break;
Packit 6c4009
		}
Packit 6c4009
	      /* TODO Back off.  */
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
 done:
Packit 6c4009
Packit 6c4009
  /* Confirm that we have been woken.  We do that before acquiring the mutex
Packit 6c4009
     to allow for execution of pthread_cond_destroy while having acquired the
Packit 6c4009
     mutex.  */
Packit 6c4009
  __condvar_confirm_wakeup (cond, private);
Packit 6c4009
Packit 6c4009
  /* Woken up; now re-acquire the mutex.  If this doesn't fail, return RESULT,
Packit 6c4009
     which is set to ETIMEDOUT if a timeout occured, or zero otherwise.  */
Packit 6c4009
  err = __pthread_mutex_cond_lock (mutex);
Packit 6c4009
  /* XXX Abort on errors that are disallowed by POSIX?  */
Packit 6c4009
  return (err != 0) ? err : result;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* See __pthread_cond_wait_common.  */
Packit 6c4009
int
Packit 6c4009
__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
Packit 6c4009
{
Packit 6c4009
  return __pthread_cond_wait_common (cond, mutex, NULL);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
/* See __pthread_cond_wait_common.  */
Packit 6c4009
int
Packit 6c4009
__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
Packit 6c4009
    const struct timespec *abstime)
Packit 6c4009
{
Packit 6c4009
  /* Check parameter validity.  This should also tell the compiler that
Packit 6c4009
     it can assume that abstime is not NULL.  */
Packit 6c4009
  if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
Packit 6c4009
    return EINVAL;
Packit 6c4009
  return __pthread_cond_wait_common (cond, mutex, abstime);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
Packit 6c4009
		  GLIBC_2_3_2);
Packit 6c4009
versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
Packit 6c4009
		  GLIBC_2_3_2);