|
Packit |
6c4009 |
/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
|
Packit |
6c4009 |
This file is part of the GNU C Library.
|
|
Packit |
6c4009 |
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is free software; you can redistribute it and/or
|
|
Packit |
6c4009 |
modify it under the terms of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License as published by the Free Software Foundation; either
|
|
Packit |
6c4009 |
version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is distributed in the hope that it will be useful,
|
|
Packit |
6c4009 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
6c4009 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
6c4009 |
Lesser General Public License for more details.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
You should have received a copy of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License along with the GNU C Library; if not, see
|
|
Packit |
6c4009 |
<http://www.gnu.org/licenses/>. */
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#include <errno.h>
|
|
Packit |
6c4009 |
#include <sysdep.h>
|
|
Packit |
6c4009 |
#include <futex-internal.h>
|
|
Packit |
6c4009 |
#include <pthreadP.h>
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Wait on the barrier.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
In each round, we wait for a fixed number of threads to enter the barrier
|
|
Packit |
6c4009 |
(COUNT). Once that has happened, exactly these threads are allowed to
|
|
Packit |
6c4009 |
leave the barrier. Note that POSIX does not require that only COUNT
|
|
Packit |
6c4009 |
threads can attempt to block using the barrier concurrently.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
We count the number of threads that have entered (IN). Each thread
|
|
Packit |
6c4009 |
increments IN when entering, thus getting a position in the sequence of
|
|
Packit |
6c4009 |
threads that are or have been waiting (starting with 1, so the position
|
|
Packit |
6c4009 |
is the number of threads that have entered so far including the current
|
|
Packit |
6c4009 |
thread).
|
|
Packit |
6c4009 |
CURRENT_ROUND designates the most recent thread whose round has been
|
|
Packit |
6c4009 |
detected as complete. When a thread detects that enough threads have
|
|
Packit |
6c4009 |
entered to make a round complete, it finishes this round by effectively
|
|
Packit |
6c4009 |
adding COUNT to CURRENT_ROUND atomically. Threads that believe that their
|
|
Packit |
6c4009 |
round is not complete yet wait until CURRENT_ROUND is not smaller than
|
|
Packit |
6c4009 |
their position anymore.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
A barrier can be destroyed as soon as no threads are blocked on the
|
|
Packit |
6c4009 |
barrier. This is already the case if just one thread from the last round
|
|
Packit |
6c4009 |
has stopped waiting and returned to the caller; the assumption is that
|
|
Packit |
6c4009 |
all threads from the round are unblocked atomically, even though they may
|
|
Packit |
6c4009 |
return at different times from the respective calls to
|
|
Packit |
6c4009 |
pthread_barrier_wait). Thus, a valid call to pthread_barrier_destroy can
|
|
Packit |
6c4009 |
be concurrent with other threads still figuring out that their round has
|
|
Packit |
6c4009 |
been completed. Therefore, threads need to confirm that they have left
|
|
Packit |
6c4009 |
the barrier by incrementing OUT, and pthread_barrier_destroy needs to wait
|
|
Packit |
6c4009 |
until OUT equals IN.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
To avoid an ABA issue for futex_wait on CURRENT_ROUND and for archs with
|
|
Packit |
6c4009 |
32b-only atomics, we additionally reset the barrier when IN reaches
|
|
Packit |
6c4009 |
a threshold to avoid overflow. We assume that the total number of threads
|
|
Packit |
6c4009 |
is less than UINT_MAX/2, and set the threshold accordingly so that we can
|
|
Packit |
6c4009 |
use a simple atomic_fetch_add on IN instead of a CAS when entering. The
|
|
Packit |
6c4009 |
threshold is always set to the end of a round, so all threads that have
|
|
Packit |
6c4009 |
entered are either pre-reset threads or post-reset threads (i.e., have a
|
|
Packit |
6c4009 |
position larger than the threshold).
|
|
Packit |
6c4009 |
Pre-reset threads just run the algorithm explained above. Post-reset
|
|
Packit |
6c4009 |
threads wait until IN is reset to a pre-threshold value.
|
|
Packit |
6c4009 |
When the last pre-reset thread leaves the barrier (i.e., OUT equals the
|
|
Packit |
6c4009 |
threshold), it resets the barrier to its initial state. Other (post-reset)
|
|
Packit |
6c4009 |
threads wait for the reset to have finished by waiting until IN is less
|
|
Packit |
6c4009 |
than the threshold and then restart by trying to enter the barrier again.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
We reuse the reset mechanism in pthread_barrier_destroy to get notified
|
|
Packit |
6c4009 |
when all threads have left the barrier: We trigger an artificial reset and
|
|
Packit |
6c4009 |
wait for the last pre-reset thread to finish reset, thus notifying the
|
|
Packit |
6c4009 |
thread that is about to destroy the barrier.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
Blocking using futexes is straightforward: pre-reset threads wait for
|
|
Packit |
6c4009 |
completion of their round using CURRENT_ROUND as futex word, and post-reset
|
|
Packit |
6c4009 |
threads and pthread_barrier_destroy use IN as futex word.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
Further notes:
|
|
Packit |
6c4009 |
* It is not simple to let some of the post-reset threads help with the
|
|
Packit |
6c4009 |
reset because of the ABA issues that arise; therefore, we simply make
|
|
Packit |
6c4009 |
the last thread to leave responsible for the reset.
|
|
Packit |
6c4009 |
* POSIX leaves it unspecified whether a signal handler running in a thread
|
|
Packit |
6c4009 |
that has been unblocked (because its round is complete) can stall all
|
|
Packit |
6c4009 |
other threads and prevent them from returning from the barrier. In this
|
|
Packit |
6c4009 |
implementation, other threads will return. However,
|
|
Packit |
6c4009 |
pthread_barrier_destroy will of course wait for the signal handler thread
|
|
Packit |
6c4009 |
to confirm that it left the barrier.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
TODO We should add spinning with back-off. Once we do that, we could also
|
|
Packit |
6c4009 |
try to avoid the futex_wake syscall when a round is detected as finished.
|
|
Packit |
6c4009 |
If we do not spin, it is quite likely that at least some other threads will
|
|
Packit |
6c4009 |
have called futex_wait already. */
|
|
Packit |
6c4009 |
int
|
|
Packit |
6c4009 |
__pthread_barrier_wait (pthread_barrier_t *barrier)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
struct pthread_barrier *bar = (struct pthread_barrier *) barrier;
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* How many threads entered so far, including ourself. */
|
|
Packit |
6c4009 |
unsigned int i;
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
reset_restart:
|
|
Packit |
6c4009 |
/* Try to enter the barrier. We need acquire MO to (1) ensure that if we
|
|
Packit |
6c4009 |
observe that our round can be completed (see below for our attempt to do
|
|
Packit |
6c4009 |
so), all pre-barrier-entry effects of all threads in our round happen
|
|
Packit |
6c4009 |
before us completing the round, and (2) to make our use of the barrier
|
|
Packit |
6c4009 |
happen after a potential reset. We need release MO to make sure that our
|
|
Packit |
6c4009 |
pre-barrier-entry effects happen before threads in this round leaving the
|
|
Packit |
6c4009 |
barrier. */
|
|
Packit |
6c4009 |
i = atomic_fetch_add_acq_rel (&bar->in, 1) + 1;
|
|
Packit |
6c4009 |
/* These loads are after the fetch_add so that we're less likely to first
|
|
Packit |
6c4009 |
pull in the cache line as shared. */
|
|
Packit |
6c4009 |
unsigned int count = bar->count;
|
|
Packit |
6c4009 |
/* This is the number of threads that can enter before we need to reset.
|
|
Packit |
6c4009 |
Always at the end of a round. */
|
|
Packit |
6c4009 |
unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD
|
|
Packit |
6c4009 |
- BARRIER_IN_THRESHOLD % count;
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
if (i > max_in_before_reset)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
/* We're in a reset round. Just wait for a reset to finish; do not
|
|
Packit |
6c4009 |
help finishing previous rounds because this could happen
|
|
Packit |
6c4009 |
concurrently with a reset. */
|
|
Packit |
6c4009 |
while (i > max_in_before_reset)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
futex_wait_simple (&bar->in, i, bar->shared);
|
|
Packit |
6c4009 |
/* Relaxed MO is fine here because we just need an indication for
|
|
Packit |
6c4009 |
when we should retry to enter (which will use acquire MO, see
|
|
Packit |
6c4009 |
above). */
|
|
Packit |
6c4009 |
i = atomic_load_relaxed (&bar->in);
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
goto reset_restart;
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Look at the current round. At this point, we are just interested in
|
|
Packit |
6c4009 |
whether we can complete rounds, based on the information we obtained
|
|
Packit |
6c4009 |
through our acquire-MO load of IN. Nonetheless, if we notice that
|
|
Packit |
6c4009 |
our round has been completed using this load, we use the acquire-MO
|
|
Packit |
6c4009 |
fence below to make sure that all pre-barrier-entry effects of all
|
|
Packit |
6c4009 |
threads in our round happen before us leaving the barrier. Therefore,
|
|
Packit |
6c4009 |
relaxed MO is sufficient. */
|
|
Packit |
6c4009 |
unsigned cr = atomic_load_relaxed (&bar->current_round);
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Try to finish previous rounds and/or the current round. We simply
|
|
Packit |
6c4009 |
consider just our position here and do not try to do the work of threads
|
|
Packit |
6c4009 |
that entered more recently. */
|
|
Packit |
6c4009 |
while (cr + count <= i)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
/* Calculate the new current round based on how many threads entered.
|
|
Packit |
6c4009 |
NEWCR must be larger than CR because CR+COUNT ends a round. */
|
|
Packit |
6c4009 |
unsigned int newcr = i - i % count;
|
|
Packit |
6c4009 |
/* Try to complete previous and/or the current round. We need release
|
|
Packit |
6c4009 |
MO to propagate the happens-before that we observed through reading
|
|
Packit |
6c4009 |
with acquire MO from IN to other threads. If the CAS fails, it
|
|
Packit |
6c4009 |
is like the relaxed-MO load of CURRENT_ROUND above. */
|
|
Packit |
6c4009 |
if (atomic_compare_exchange_weak_release (&bar->current_round, &cr,
|
|
Packit |
6c4009 |
newcr))
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
/* Update CR with the modification we just did. */
|
|
Packit |
6c4009 |
cr = newcr;
|
|
Packit |
6c4009 |
/* Wake threads belonging to the rounds we just finished. We may
|
|
Packit |
6c4009 |
wake more threads than necessary if more than COUNT threads try
|
|
Packit |
6c4009 |
to block concurrently on the barrier, but this is not a typical
|
|
Packit |
6c4009 |
use of barriers.
|
|
Packit |
6c4009 |
Note that we can still access SHARED because we haven't yet
|
|
Packit |
6c4009 |
confirmed to have left the barrier. */
|
|
Packit |
6c4009 |
futex_wake (&bar->current_round, INT_MAX, bar->shared);
|
|
Packit |
6c4009 |
/* We did as much as we could based on our position. If we advanced
|
|
Packit |
6c4009 |
the current round to a round sufficient for us, do not wait for
|
|
Packit |
6c4009 |
that to happen and skip the acquire fence (we already
|
|
Packit |
6c4009 |
synchronize-with all other threads in our round through the
|
|
Packit |
6c4009 |
initial acquire MO fetch_add of IN. */
|
|
Packit |
6c4009 |
if (i <= cr)
|
|
Packit |
6c4009 |
goto ready_to_leave;
|
|
Packit |
6c4009 |
else
|
|
Packit |
6c4009 |
break;
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Wait until the current round is more recent than the round we are in. */
|
|
Packit |
6c4009 |
while (i > cr)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
/* Wait for the current round to finish. */
|
|
Packit |
6c4009 |
futex_wait_simple (&bar->current_round, cr, bar->shared);
|
|
Packit |
6c4009 |
/* See the fence below. */
|
|
Packit |
6c4009 |
cr = atomic_load_relaxed (&bar->current_round);
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Our round finished. Use the acquire MO fence to synchronize-with the
|
|
Packit |
6c4009 |
thread that finished the round, either through the initial load of
|
|
Packit |
6c4009 |
CURRENT_ROUND above or a failed CAS in the loop above. */
|
|
Packit |
6c4009 |
atomic_thread_fence_acquire ();
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Now signal that we left. */
|
|
Packit |
6c4009 |
unsigned int o;
|
|
Packit |
6c4009 |
ready_to_leave:
|
|
Packit |
6c4009 |
/* We need release MO here so that our use of the barrier happens before
|
|
Packit |
6c4009 |
reset or memory reuse after pthread_barrier_destroy. */
|
|
Packit |
6c4009 |
o = atomic_fetch_add_release (&bar->out, 1) + 1;
|
|
Packit |
6c4009 |
if (o == max_in_before_reset)
|
|
Packit |
6c4009 |
{
|
|
Packit |
6c4009 |
/* Perform a reset if we are the last pre-reset thread leaving. All
|
|
Packit |
6c4009 |
other threads accessing the barrier are post-reset threads and are
|
|
Packit |
6c4009 |
incrementing or spinning on IN. Thus, resetting IN as the last step
|
|
Packit |
6c4009 |
of reset ensures that the reset is not concurrent with actual use of
|
|
Packit |
6c4009 |
the barrier. We need the acquire MO fence so that the reset happens
|
|
Packit |
6c4009 |
after use of the barrier by all earlier pre-reset threads. */
|
|
Packit |
6c4009 |
atomic_thread_fence_acquire ();
|
|
Packit |
6c4009 |
atomic_store_relaxed (&bar->current_round, 0);
|
|
Packit |
6c4009 |
atomic_store_relaxed (&bar->out, 0);
|
|
Packit |
6c4009 |
/* When destroying the barrier, we wait for a reset to happen. Thus,
|
|
Packit |
6c4009 |
we must load SHARED now so that this happens before the barrier is
|
|
Packit |
6c4009 |
destroyed. */
|
|
Packit |
6c4009 |
int shared = bar->shared;
|
|
Packit |
6c4009 |
atomic_store_release (&bar->in, 0);
|
|
Packit |
6c4009 |
futex_wake (&bar->in, INT_MAX, shared);
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* Return a special value for exactly one thread per round. */
|
|
Packit |
6c4009 |
return i % count == 0 ? PTHREAD_BARRIER_SERIAL_THREAD : 0;
|
|
Packit |
6c4009 |
}
|
|
Packit |
6c4009 |
weak_alias (__pthread_barrier_wait, pthread_barrier_wait)
|