Blame nptl/pthread_barrier_wait.c

Packit 6c4009
/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <errno.h>
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include <futex-internal.h>
Packit 6c4009
#include <pthreadP.h>
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Wait on the barrier.
Packit 6c4009
Packit 6c4009
   In each round, we wait for a fixed number of threads to enter the barrier
Packit 6c4009
   (COUNT).  Once that has happened, exactly these threads are allowed to
Packit 6c4009
   leave the barrier.  Note that POSIX does not require that only COUNT
Packit 6c4009
   threads can attempt to block using the barrier concurrently.
Packit 6c4009
Packit 6c4009
   We count the number of threads that have entered (IN).  Each thread
Packit 6c4009
   increments IN when entering, thus getting a position in the sequence of
Packit 6c4009
   threads that are or have been waiting (starting with 1, so the position
Packit 6c4009
   is the number of threads that have entered so far including the current
Packit 6c4009
   thread).
Packit 6c4009
   CURRENT_ROUND designates the most recent thread whose round has been
Packit 6c4009
   detected as complete.  When a thread detects that enough threads have
Packit 6c4009
   entered to make a round complete, it finishes this round by effectively
Packit 6c4009
   adding COUNT to CURRENT_ROUND atomically.  Threads that believe that their
Packit 6c4009
   round is not complete yet wait until CURRENT_ROUND is not smaller than
Packit 6c4009
   their position anymore.
Packit 6c4009
Packit 6c4009
   A barrier can be destroyed as soon as no threads are blocked on the
Packit 6c4009
   barrier.  This is already the case if just one thread from the last round
Packit 6c4009
   has stopped waiting and returned to the caller; the assumption is that
Packit 6c4009
   all threads from the round are unblocked atomically, even though they may
Packit 6c4009
   return at different times from the respective calls to
Packit 6c4009
   pthread_barrier_wait).  Thus, a valid call to pthread_barrier_destroy can
Packit 6c4009
   be concurrent with other threads still figuring out that their round has
Packit 6c4009
   been completed.  Therefore, threads need to confirm that they have left
Packit 6c4009
   the barrier by incrementing OUT, and pthread_barrier_destroy needs to wait
Packit 6c4009
   until OUT equals IN.
Packit 6c4009
Packit 6c4009
   To avoid an ABA issue for futex_wait on CURRENT_ROUND and for archs with
Packit 6c4009
   32b-only atomics, we additionally reset the barrier when IN reaches
Packit 6c4009
   a threshold to avoid overflow.  We assume that the total number of threads
Packit 6c4009
   is less than UINT_MAX/2, and set the threshold accordingly so that we can
Packit 6c4009
   use a simple atomic_fetch_add on IN instead of a CAS when entering.  The
Packit 6c4009
   threshold is always set to the end of a round, so all threads that have
Packit 6c4009
   entered are either pre-reset threads or post-reset threads (i.e., have a
Packit 6c4009
   position larger than the threshold).
Packit 6c4009
   Pre-reset threads just run the algorithm explained above.  Post-reset
Packit 6c4009
   threads wait until IN is reset to a pre-threshold value.
Packit 6c4009
   When the last pre-reset thread leaves the barrier (i.e., OUT equals the
Packit 6c4009
   threshold), it resets the barrier to its initial state.  Other (post-reset)
Packit 6c4009
   threads wait for the reset to have finished by waiting until IN is less
Packit 6c4009
   than the threshold and then restart by trying to enter the barrier again.
Packit 6c4009
Packit 6c4009
   We reuse the reset mechanism in pthread_barrier_destroy to get notified
Packit 6c4009
   when all threads have left the barrier: We trigger an artificial reset and
Packit 6c4009
   wait for the last pre-reset thread to finish reset, thus notifying the
Packit 6c4009
   thread that is about to destroy the barrier.
Packit 6c4009
Packit 6c4009
   Blocking using futexes is straightforward: pre-reset threads wait for
Packit 6c4009
   completion of their round using CURRENT_ROUND as futex word, and post-reset
Packit 6c4009
   threads and pthread_barrier_destroy use IN as futex word.
Packit 6c4009
Packit 6c4009
   Further notes:
Packit 6c4009
   * It is not simple to let some of the post-reset threads help with the
Packit 6c4009
     reset because of the ABA issues that arise; therefore, we simply make
Packit 6c4009
     the last thread to leave responsible for the reset.
Packit 6c4009
   * POSIX leaves it unspecified whether a signal handler running in a thread
Packit 6c4009
     that has been unblocked (because its round is complete) can stall all
Packit 6c4009
     other threads and prevent them from returning from the barrier.  In this
Packit 6c4009
     implementation, other threads will return.  However,
Packit 6c4009
     pthread_barrier_destroy will of course wait for the signal handler thread
Packit 6c4009
     to confirm that it left the barrier.
Packit 6c4009
Packit 6c4009
   TODO We should add spinning with back-off.  Once we do that, we could also
Packit 6c4009
   try to avoid the futex_wake syscall when a round is detected as finished.
Packit 6c4009
   If we do not spin, it is quite likely that at least some other threads will
Packit 6c4009
   have called futex_wait already.  */
Packit 6c4009
int
Packit 6c4009
__pthread_barrier_wait (pthread_barrier_t *barrier)
Packit 6c4009
{
Packit 6c4009
  struct pthread_barrier *bar = (struct pthread_barrier *) barrier;
Packit 6c4009
Packit 6c4009
  /* How many threads entered so far, including ourself.  */
Packit 6c4009
  unsigned int i;
Packit 6c4009
Packit 6c4009
 reset_restart:
Packit 6c4009
  /* Try to enter the barrier.  We need acquire MO to (1) ensure that if we
Packit 6c4009
     observe that our round can be completed (see below for our attempt to do
Packit 6c4009
     so), all pre-barrier-entry effects of all threads in our round happen
Packit 6c4009
     before us completing the round, and (2) to make our use of the barrier
Packit 6c4009
     happen after a potential reset.  We need release MO to make sure that our
Packit 6c4009
     pre-barrier-entry effects happen before threads in this round leaving the
Packit 6c4009
     barrier.  */
Packit 6c4009
  i = atomic_fetch_add_acq_rel (&bar->in, 1) + 1;
Packit 6c4009
  /* These loads are after the fetch_add so that we're less likely to first
Packit 6c4009
     pull in the cache line as shared.  */
Packit 6c4009
  unsigned int count = bar->count;
Packit 6c4009
  /* This is the number of threads that can enter before we need to reset.
Packit 6c4009
     Always at the end of a round.  */
Packit 6c4009
  unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD
Packit 6c4009
				   - BARRIER_IN_THRESHOLD % count;
Packit 6c4009
Packit 6c4009
  if (i > max_in_before_reset)
Packit 6c4009
    {
Packit 6c4009
      /* We're in a reset round.  Just wait for a reset to finish; do not
Packit 6c4009
	 help finishing previous rounds because this could happen
Packit 6c4009
	 concurrently with a reset.  */
Packit 6c4009
      while (i > max_in_before_reset)
Packit 6c4009
	{
Packit 6c4009
	  futex_wait_simple (&bar->in, i, bar->shared);
Packit 6c4009
	  /* Relaxed MO is fine here because we just need an indication for
Packit 6c4009
	     when we should retry to enter (which will use acquire MO, see
Packit 6c4009
	     above).  */
Packit 6c4009
	  i = atomic_load_relaxed (&bar->in);
Packit 6c4009
	}
Packit 6c4009
      goto reset_restart;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Look at the current round.  At this point, we are just interested in
Packit 6c4009
     whether we can complete rounds, based on the information we obtained
Packit 6c4009
     through our acquire-MO load of IN.  Nonetheless, if we notice that
Packit 6c4009
     our round has been completed using this load, we use the acquire-MO
Packit 6c4009
     fence below to make sure that all pre-barrier-entry effects of all
Packit 6c4009
     threads in our round happen before us leaving the barrier.  Therefore,
Packit 6c4009
     relaxed MO is sufficient.  */
Packit 6c4009
  unsigned cr = atomic_load_relaxed (&bar->current_round);
Packit 6c4009
Packit 6c4009
  /* Try to finish previous rounds and/or the current round.  We simply
Packit 6c4009
     consider just our position here and do not try to do the work of threads
Packit 6c4009
     that entered more recently.  */
Packit 6c4009
  while (cr + count <= i)
Packit 6c4009
    {
Packit 6c4009
      /* Calculate the new current round based on how many threads entered.
Packit 6c4009
	 NEWCR must be larger than CR because CR+COUNT ends a round.  */
Packit 6c4009
      unsigned int newcr = i - i % count;
Packit 6c4009
      /* Try to complete previous and/or the current round.  We need release
Packit 6c4009
	 MO to propagate the happens-before that we observed through reading
Packit 6c4009
	 with acquire MO from IN to other threads.  If the CAS fails, it
Packit 6c4009
	 is like the relaxed-MO load of CURRENT_ROUND above.  */
Packit 6c4009
      if (atomic_compare_exchange_weak_release (&bar->current_round, &cr,
Packit 6c4009
						newcr))
Packit 6c4009
	{
Packit 6c4009
	  /* Update CR with the modification we just did.  */
Packit 6c4009
	  cr = newcr;
Packit 6c4009
	  /* Wake threads belonging to the rounds we just finished.  We may
Packit 6c4009
	     wake more threads than necessary if more than COUNT threads try
Packit 6c4009
	     to block concurrently on the barrier, but this is not a typical
Packit 6c4009
	     use of barriers.
Packit 6c4009
	     Note that we can still access SHARED because we haven't yet
Packit 6c4009
	     confirmed to have left the barrier.  */
Packit 6c4009
	  futex_wake (&bar->current_round, INT_MAX, bar->shared);
Packit 6c4009
	  /* We did as much as we could based on our position.  If we advanced
Packit 6c4009
	     the current round to a round sufficient for us, do not wait for
Packit 6c4009
	     that to happen and skip the acquire fence (we already
Packit 6c4009
	     synchronize-with all other threads in our round through the
Packit 6c4009
	     initial acquire MO fetch_add of IN.  */
Packit 6c4009
	  if (i <= cr)
Packit 6c4009
	    goto ready_to_leave;
Packit 6c4009
	  else
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Wait until the current round is more recent than the round we are in.  */
Packit 6c4009
  while (i > cr)
Packit 6c4009
    {
Packit 6c4009
      /* Wait for the current round to finish.  */
Packit 6c4009
      futex_wait_simple (&bar->current_round, cr, bar->shared);
Packit 6c4009
      /* See the fence below.  */
Packit 6c4009
      cr = atomic_load_relaxed (&bar->current_round);
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Our round finished.  Use the acquire MO fence to synchronize-with the
Packit 6c4009
     thread that finished the round, either through the initial load of
Packit 6c4009
     CURRENT_ROUND above or a failed CAS in the loop above.  */
Packit 6c4009
  atomic_thread_fence_acquire ();
Packit 6c4009
Packit 6c4009
  /* Now signal that we left.  */
Packit 6c4009
  unsigned int o;
Packit 6c4009
 ready_to_leave:
Packit 6c4009
  /* We need release MO here so that our use of the barrier happens before
Packit 6c4009
     reset or memory reuse after pthread_barrier_destroy.  */
Packit 6c4009
  o = atomic_fetch_add_release (&bar->out, 1) + 1;
Packit 6c4009
  if (o == max_in_before_reset)
Packit 6c4009
    {
Packit 6c4009
      /* Perform a reset if we are the last pre-reset thread leaving.   All
Packit 6c4009
	 other threads accessing the barrier are post-reset threads and are
Packit 6c4009
	 incrementing or spinning on IN.  Thus, resetting IN as the last step
Packit 6c4009
	 of reset ensures that the reset is not concurrent with actual use of
Packit 6c4009
	 the barrier.  We need the acquire MO fence so that the reset happens
Packit 6c4009
	 after use of the barrier by all earlier pre-reset threads.  */
Packit 6c4009
      atomic_thread_fence_acquire ();
Packit 6c4009
      atomic_store_relaxed (&bar->current_round, 0);
Packit 6c4009
      atomic_store_relaxed (&bar->out, 0);
Packit 6c4009
      /* When destroying the barrier, we wait for a reset to happen.  Thus,
Packit 6c4009
	 we must load SHARED now so that this happens before the barrier is
Packit 6c4009
	 destroyed.  */
Packit 6c4009
      int shared = bar->shared;
Packit 6c4009
      atomic_store_release (&bar->in, 0);
Packit 6c4009
      futex_wake (&bar->in, INT_MAX, shared);
Packit 6c4009
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Return a special value for exactly one thread per round.  */
Packit 6c4009
  return i % count == 0 ?  PTHREAD_BARRIER_SERIAL_THREAD : 0;
Packit 6c4009
}
Packit 6c4009
weak_alias (__pthread_barrier_wait, pthread_barrier_wait)