Blame nptl/tst-rwlock-tryrdlock-stall.c

Packit Bot bdf314
/* Bug 23844: Test for pthread_rwlock_tryrdlock stalls.
Packit Bot bdf314
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Bot bdf314
   This file is part of the GNU C Library.
Packit Bot bdf314
Packit Bot bdf314
   The GNU C Library is free software; you can redistribute it and/or
Packit Bot bdf314
   modify it under the terms of the GNU Lesser General Public
Packit Bot bdf314
   License as published by the Free Software Foundation; either
Packit Bot bdf314
   version 2.1 of the License, or (at your option) any later version.
Packit Bot bdf314
Packit Bot bdf314
   The GNU C Library is distributed in the hope that it will be useful,
Packit Bot bdf314
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Bot bdf314
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Bot bdf314
   Lesser General Public License for more details.
Packit Bot bdf314
Packit Bot bdf314
   You should have received a copy of the GNU Lesser General Public
Packit Bot bdf314
   License along with the GNU C Library; if not, see
Packit Bot bdf314
   <http://www.gnu.org/licenses/>.  */
Packit Bot bdf314
Packit Bot bdf314
/* For a full analysis see comment:
Packit Bot bdf314
   https://sourceware.org/bugzilla/show_bug.cgi?id=23844#c14
Packit Bot bdf314
Packit Bot bdf314
   Provided here for reference:
Packit Bot bdf314
Packit Bot bdf314
   --- Analysis of pthread_rwlock_tryrdlock() stall ---
Packit Bot bdf314
   A read lock begins to execute.
Packit Bot bdf314
Packit Bot bdf314
   In __pthread_rwlock_rdlock_full:
Packit Bot bdf314
Packit Bot bdf314
   We can attempt a read lock, but find that the lock is
Packit Bot bdf314
   in a write phase (PTHREAD_RWLOCK_WRPHASE, or WP-bit
Packit Bot bdf314
   is set), and the lock is held by a primary writer
Packit Bot bdf314
   (PTHREAD_RWLOCK_WRLOCKED is set). In this case we must
Packit Bot bdf314
   wait for explicit hand over from the writer to us or
Packit Bot bdf314
   one of the other waiters. The read lock threads are
Packit Bot bdf314
   about to execute:
Packit Bot bdf314
Packit Bot bdf314
   341   r = (atomic_fetch_add_acquire (&rwlock->__data.__readers,
Packit Bot bdf314
   342                                  (1 << PTHREAD_RWLOCK_READER_SHIFT))
Packit Bot bdf314
   343        + (1 << PTHREAD_RWLOCK_READER_SHIFT));
Packit Bot bdf314
Packit Bot bdf314
   An unlock beings to execute.
Packit Bot bdf314
Packit Bot bdf314
   Then in __pthread_rwlock_wrunlock:
Packit Bot bdf314
Packit Bot bdf314
   547   unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers);
Packit Bot bdf314
   ...
Packit Bot bdf314
   549   while (!atomic_compare_exchange_weak_release
Packit Bot bdf314
   550          (&rwlock->__data.__readers, &r,
Packit Bot bdf314
   551           ((r ^ PTHREAD_RWLOCK_WRLOCKED)
Packit Bot bdf314
   552            ^ ((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0 ? 0
Packit Bot bdf314
   553               : PTHREAD_RWLOCK_WRPHASE))))
Packit Bot bdf314
   554     {
Packit Bot bdf314
   ...
Packit Bot bdf314
   556     }
Packit Bot bdf314
Packit Bot bdf314
   We clear PTHREAD_RWLOCK_WRLOCKED, and if there are
Packit Bot bdf314
   no readers so we leave the lock in PTHRAD_RWLOCK_WRPHASE.
Packit Bot bdf314
Packit Bot bdf314
   Back in the read lock.
Packit Bot bdf314
Packit Bot bdf314
   The read lock adjusts __readres as above.
Packit Bot bdf314
Packit Bot bdf314
   383   while ((r & PTHREAD_RWLOCK_WRPHASE) != 0
Packit Bot bdf314
   384          && (r & PTHREAD_RWLOCK_WRLOCKED) == 0)
Packit Bot bdf314
   385     {
Packit Bot bdf314
   ...
Packit Bot bdf314
   390       if (atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers, &r,
Packit Bot bdf314
   391                                                 r ^ PTHREAD_RWLOCK_WRPHASE))
Packit Bot bdf314
   392         {
Packit Bot bdf314
Packit Bot bdf314
   And then attemps to start the read phase.
Packit Bot bdf314
Packit Bot bdf314
   Assume there happens to be a tryrdlock at this point, noting
Packit Bot bdf314
   that PTHREAD_RWLOCK_WRLOCKED is clear, and PTHREAD_RWLOCK_WRPHASE
Packit Bot bdf314
   is 1. So the try lock attemps to start the read phase.
Packit Bot bdf314
Packit Bot bdf314
   In __pthread_rwlock_tryrdlock:
Packit Bot bdf314
Packit Bot bdf314
    44       if ((r & PTHREAD_RWLOCK_WRPHASE) == 0)
Packit Bot bdf314
    45         {
Packit Bot bdf314
   ...
Packit Bot bdf314
    49           if (((r & PTHREAD_RWLOCK_WRLOCKED) != 0)
Packit Bot bdf314
    50               && (rwlock->__data.__flags
Packit Bot bdf314
    51                   == PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP))
Packit Bot bdf314
    52             return EBUSY;
Packit Bot bdf314
    53           rnew = r + (1 << PTHREAD_RWLOCK_READER_SHIFT);
Packit Bot bdf314
    54         }
Packit Bot bdf314
   ...
Packit Bot bdf314
    89   while (!atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers,
Packit Bot bdf314
    90       &r, rnew));
Packit Bot bdf314
Packit Bot bdf314
   And succeeds.
Packit Bot bdf314
Packit Bot bdf314
   Back in the write unlock:
Packit Bot bdf314
Packit Bot bdf314
   557   if ((r >> PTHREAD_RWLOCK_READER_SHIFT) != 0)
Packit Bot bdf314
   558     {
Packit Bot bdf314
   ...
Packit Bot bdf314
   563       if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0)
Packit Bot bdf314
   564            & PTHREAD_RWLOCK_FUTEX_USED) != 0)
Packit Bot bdf314
   565         futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private);
Packit Bot bdf314
   566     }
Packit Bot bdf314
Packit Bot bdf314
   We note that PTHREAD_RWLOCK_FUTEX_USED is non-zero
Packit Bot bdf314
   and don't wake anyone. This is OK because we handed
Packit Bot bdf314
   over to the trylock. It will be the trylock's responsibility
Packit Bot bdf314
   to wake any waiters.
Packit Bot bdf314
Packit Bot bdf314
   Back in the read lock:
Packit Bot bdf314
Packit Bot bdf314
   The read lock fails to install PTHRAD_REWLOCK_WRPHASE as 0 because
Packit Bot bdf314
   the __readers value was adjusted by the trylock, and so it falls through
Packit Bot bdf314
   to waiting on the lock for explicit handover from either a new writer
Packit Bot bdf314
   or a new reader.
Packit Bot bdf314
Packit Bot bdf314
   448           int err = futex_abstimed_wait (&rwlock->__data.__wrphase_futex,
Packit Bot bdf314
   449                                          1 | PTHREAD_RWLOCK_FUTEX_USED,
Packit Bot bdf314
   450                                          abstime, private);
Packit Bot bdf314
Packit Bot bdf314
   We use PTHREAD_RWLOCK_FUTEX_USED to indicate the futex
Packit Bot bdf314
   is in use.
Packit Bot bdf314
Packit Bot bdf314
   At this point we have readers waiting on the read lock
Packit Bot bdf314
   to unlock. The wrlock is done. The trylock is finishing
Packit Bot bdf314
   the installation of the read phase.
Packit Bot bdf314
Packit Bot bdf314
    92   if ((r & PTHREAD_RWLOCK_WRPHASE) != 0)
Packit Bot bdf314
    93     {
Packit Bot bdf314
   ...
Packit Bot bdf314
   105       atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 0);
Packit Bot bdf314
   106     }
Packit Bot bdf314
Packit Bot bdf314
   The trylock does note that we were the one that
Packit Bot bdf314
   installed the read phase, but the comments are not
Packit Bot bdf314
   correct, the execution ordering above shows that
Packit Bot bdf314
   readers might indeed be waiting, and they are.
Packit Bot bdf314
Packit Bot bdf314
   The atomic_store_relaxed throws away PTHREAD_RWLOCK_FUTEX_USED,
Packit Bot bdf314
   and the waiting reader is never worken becuase as noted
Packit Bot bdf314
   above it is conditional on the futex being used.
Packit Bot bdf314
Packit Bot bdf314
   The solution is for the trylock thread to inspect
Packit Bot bdf314
   PTHREAD_RWLOCK_FUTEX_USED and wake the waiting readers.
Packit Bot bdf314
Packit Bot bdf314
   --- Analysis of pthread_rwlock_trywrlock() stall ---
Packit Bot bdf314
Packit Bot bdf314
   A write lock begins to execute, takes the write lock,
Packit Bot bdf314
   and then releases the lock...
Packit Bot bdf314
Packit Bot bdf314
   In pthread_rwlock_wrunlock():
Packit Bot bdf314
Packit Bot bdf314
   547   unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers);
Packit Bot bdf314
   ...
Packit Bot bdf314
   549   while (!atomic_compare_exchange_weak_release
Packit Bot bdf314
   550          (&rwlock->__data.__readers, &r,
Packit Bot bdf314
   551           ((r ^ PTHREAD_RWLOCK_WRLOCKED)
Packit Bot bdf314
   552            ^ ((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0 ? 0
Packit Bot bdf314
   553               : PTHREAD_RWLOCK_WRPHASE))))
Packit Bot bdf314
   554     {
Packit Bot bdf314
   ...
Packit Bot bdf314
   556     }
Packit Bot bdf314
Packit Bot bdf314
   ... leaving it in the write phase with zero readers
Packit Bot bdf314
   (the case where we leave the write phase in place
Packit Bot bdf314
   during a write unlock).
Packit Bot bdf314
Packit Bot bdf314
   A write trylock begins to execute.
Packit Bot bdf314
Packit Bot bdf314
   In __pthread_rwlock_trywrlock:
Packit Bot bdf314
Packit Bot bdf314
    40   while (((r & PTHREAD_RWLOCK_WRLOCKED) == 0)
Packit Bot bdf314
    41       && (((r >> PTHREAD_RWLOCK_READER_SHIFT) == 0)
Packit Bot bdf314
    42           || (prefer_writer && ((r & PTHREAD_RWLOCK_WRPHASE) != 0))))
Packit Bot bdf314
    43     {
Packit Bot bdf314
Packit Bot bdf314
   The lock is not locked.
Packit Bot bdf314
Packit Bot bdf314
   There are no readers.
Packit Bot bdf314
Packit Bot bdf314
    45       if (atomic_compare_exchange_weak_acquire (
Packit Bot bdf314
    46           &rwlock->__data.__readers, &r,
Packit Bot bdf314
    47           r | PTHREAD_RWLOCK_WRPHASE | PTHREAD_RWLOCK_WRLOCKED))
Packit Bot bdf314
Packit Bot bdf314
   We atomically install the write phase and we take the
Packit Bot bdf314
   exclusive write lock.
Packit Bot bdf314
Packit Bot bdf314
    48         {
Packit Bot bdf314
    49           atomic_store_relaxed (&rwlock->__data.__writers_futex, 1);
Packit Bot bdf314
Packit Bot bdf314
   We get this far.
Packit Bot bdf314
Packit Bot bdf314
   A reader lock begins to execute.
Packit Bot bdf314
Packit Bot bdf314
   In pthread_rwlock_rdlock:
Packit Bot bdf314
Packit Bot bdf314
   437   for (;;)
Packit Bot bdf314
   438     {
Packit Bot bdf314
   439       while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex))
Packit Bot bdf314
   440               | PTHREAD_RWLOCK_FUTEX_USED) == (1 | PTHREAD_RWLOCK_FUTEX_USED))
Packit Bot bdf314
   441         {
Packit Bot bdf314
   442           int private = __pthread_rwlock_get_private (rwlock);
Packit Bot bdf314
   443           if (((wpf & PTHREAD_RWLOCK_FUTEX_USED) == 0)
Packit Bot bdf314
   444               && (!atomic_compare_exchange_weak_relaxed
Packit Bot bdf314
   445                   (&rwlock->__data.__wrphase_futex,
Packit Bot bdf314
   446                    &wpf, wpf | PTHREAD_RWLOCK_FUTEX_USED)))
Packit Bot bdf314
   447             continue;
Packit Bot bdf314
   448           int err = futex_abstimed_wait (&rwlock->__data.__wrphase_futex,
Packit Bot bdf314
   449                                          1 | PTHREAD_RWLOCK_FUTEX_USED,
Packit Bot bdf314
   450                                          abstime, private);
Packit Bot bdf314
Packit Bot bdf314
   We are in a write phase, so the while() on line 439 is true.
Packit Bot bdf314
Packit Bot bdf314
   The value of wpf does not have PTHREAD_RWLOCK_FUTEX_USED set
Packit Bot bdf314
   since this is the first reader to lock.
Packit Bot bdf314
Packit Bot bdf314
   The atomic operation sets wpf with PTHREAD_RELOCK_FUTEX_USED
Packit Bot bdf314
   on the expectation that this reader will be woken during
Packit Bot bdf314
   the handoff.
Packit Bot bdf314
Packit Bot bdf314
   Back in pthread_rwlock_trywrlock:
Packit Bot bdf314
Packit Bot bdf314
    50           atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 1);
Packit Bot bdf314
    51           atomic_store_relaxed (&rwlock->__data.__cur_writer,
Packit Bot bdf314
    52               THREAD_GETMEM (THREAD_SELF, tid));
Packit Bot bdf314
    53           return 0;
Packit Bot bdf314
    54         }
Packit Bot bdf314
   ...
Packit Bot bdf314
    57     }
Packit Bot bdf314
Packit Bot bdf314
   We write 1 to __wrphase_futex discarding PTHREAD_RWLOCK_FUTEX_USED,
Packit Bot bdf314
   and so in the unlock we will not awaken the waiting reader.
Packit Bot bdf314
Packit Bot bdf314
   The solution to this is to realize that if we did not start the write
Packit Bot bdf314
   phase we need not write 1 or any other value to __wrphase_futex.
Packit Bot bdf314
   This ensures that any readers (which saw __wrphase_futex != 0) can
Packit Bot bdf314
   set PTHREAD_RWLOCK_FUTEX_USED and this can be used at unlock to
Packit Bot bdf314
   wake them.
Packit Bot bdf314
Packit Bot bdf314
   If we installed the write phase then all other readers are looping
Packit Bot bdf314
   here:
Packit Bot bdf314
Packit Bot bdf314
   In __pthread_rwlock_rdlock_full:
Packit Bot bdf314
Packit Bot bdf314
   437   for (;;)
Packit Bot bdf314
   438     {
Packit Bot bdf314
   439       while (((wpf = atomic_load_relaxed (&rwlock->__data.__wrphase_futex))
Packit Bot bdf314
   440               | PTHREAD_RWLOCK_FUTEX_USED) == (1 | PTHREAD_RWLOCK_FUTEX_USED))
Packit Bot bdf314
   441         {
Packit Bot bdf314
   ...
Packit Bot bdf314
   508     }
Packit Bot bdf314
Packit Bot bdf314
   waiting for the write phase to be installed or removed before they
Packit Bot bdf314
   can begin waiting on __wrphase_futex (part of the algorithm), or
Packit Bot bdf314
   taking a concurrent read lock, and thus we can safely write 1 to
Packit Bot bdf314
   __wrphase_futex.
Packit Bot bdf314
Packit Bot bdf314
   If we did not install the write phase then the readers may already
Packit Bot bdf314
   be waiting on the futex, the original writer wrote 1 to __wrphase_futex
Packit Bot bdf314
   as part of starting the write phase, and we cannot also write 1
Packit Bot bdf314
   without loosing the PTHREAD_RWLOCK_FUTEX_USED bit.
Packit Bot bdf314
Packit Bot bdf314
   ---
Packit Bot bdf314
Packit Bot bdf314
   Summary for the pthread_rwlock_tryrdlock() stall:
Packit Bot bdf314
Packit Bot bdf314
   The stall is caused by pthread_rwlock_tryrdlock failing to check
Packit Bot bdf314
   that PTHREAD_RWLOCK_FUTEX_USED is set in the __wrphase_futex futex
Packit Bot bdf314
   and then waking the futex.
Packit Bot bdf314
Packit Bot bdf314
   The fix for bug 23844 ensures that waiters on __wrphase_futex are
Packit Bot bdf314
   correctly woken.  Before the fix the test stalls as readers can
Packit Bot bdf314
   wait forever on __wrphase_futex.  */
Packit Bot bdf314
Packit Bot bdf314
#include <stdio.h>
Packit Bot bdf314
#include <stdlib.h>
Packit Bot bdf314
#include <unistd.h>
Packit Bot bdf314
#include <pthread.h>
Packit Bot bdf314
#include <support/xthread.h>
Packit Bot bdf314
#include <errno.h>
Packit Bot bdf314
Packit Bot bdf314
/* We need only one lock to reproduce the issue. We will need multiple
Packit Bot bdf314
   threads to get the exact case where we have a read, try, and unlock
Packit Bot bdf314
   all interleaving to produce the case where the readers are waiting
Packit Bot bdf314
   and the try fails to wake them.  */
Packit Bot bdf314
pthread_rwlock_t onelock;
Packit Bot bdf314
Packit Bot bdf314
/* The number of threads is arbitrary but empirically chosen to have
Packit Bot bdf314
   enough threads that we see the condition where waiting readers are
Packit Bot bdf314
   not woken by a successful tryrdlock.  */
Packit Bot bdf314
#define NTHREADS 32
Packit Bot bdf314
Packit Bot bdf314
_Atomic int do_exit;
Packit Bot bdf314
Packit Bot bdf314
void *
Packit Bot bdf314
run_loop (void *arg)
Packit Bot bdf314
{
Packit Bot bdf314
  int i = 0, ret;
Packit Bot bdf314
  while (!do_exit)
Packit Bot bdf314
    {
Packit Bot bdf314
      /* Arbitrarily choose if we are the writer or reader.  Choose a
Packit Bot bdf314
	 high enough ratio of readers to writers to make it likely
Packit Bot bdf314
	 that readers block (and eventually are susceptable to
Packit Bot bdf314
	 stalling).
Packit Bot bdf314
Packit Bot bdf314
         If we are a writer, take the write lock, and then unlock.
Packit Bot bdf314
	 If we are a reader, try the lock, then lock, then unlock.  */
Packit Bot bdf314
      if ((i % 8) != 0)
Packit Bot bdf314
	xpthread_rwlock_wrlock (&onelock);
Packit Bot bdf314
      else
Packit Bot bdf314
	{
Packit Bot bdf314
	  if ((ret = pthread_rwlock_tryrdlock (&onelock)) != 0)
Packit Bot bdf314
	    {
Packit Bot bdf314
	      if (ret == EBUSY)
Packit Bot bdf314
		xpthread_rwlock_rdlock (&onelock);
Packit Bot bdf314
	      else
Packit Bot bdf314
		exit (EXIT_FAILURE);
Packit Bot bdf314
	    }
Packit Bot bdf314
	}
Packit Bot bdf314
      /* Thread does some work and then unlocks.  */
Packit Bot bdf314
      xpthread_rwlock_unlock (&onelock);
Packit Bot bdf314
      i++;
Packit Bot bdf314
    }
Packit Bot bdf314
  return NULL;
Packit Bot bdf314
}
Packit Bot bdf314
Packit Bot bdf314
int
Packit Bot bdf314
do_test (void)
Packit Bot bdf314
{
Packit Bot bdf314
  int i;
Packit Bot bdf314
  pthread_t tids[NTHREADS];
Packit Bot bdf314
  xpthread_rwlock_init (&onelock, NULL);
Packit Bot bdf314
  for (i = 0; i < NTHREADS; i++)
Packit Bot bdf314
    tids[i] = xpthread_create (NULL, run_loop, NULL);
Packit Bot bdf314
  /* Run for some amount of time.  Empirically speaking exercising
Packit Bot bdf314
     the stall via pthread_rwlock_tryrdlock is much harder, and on
Packit Bot bdf314
     a 3.5GHz 4 core x86_64 VM system it takes somewhere around
Packit Bot bdf314
     20-200s to stall, approaching 100% stall past 200s.  We can't
Packit Bot bdf314
     wait that long for a regression test so we just test for 20s,
Packit Bot bdf314
     and expect the stall to happen with a 5-10% chance (enough for
Packit Bot bdf314
     developers to see).  */
Packit Bot bdf314
  sleep (20);
Packit Bot bdf314
  /* Then exit.  */
Packit Bot bdf314
  printf ("INFO: Exiting...\n");
Packit Bot bdf314
  do_exit = 1;
Packit Bot bdf314
  /* If any readers stalled then we will timeout waiting for them.  */
Packit Bot bdf314
  for (i = 0; i < NTHREADS; i++)
Packit Bot bdf314
    xpthread_join (tids[i]);
Packit Bot bdf314
  printf ("INFO: Done.\n");
Packit Bot bdf314
  xpthread_rwlock_destroy (&onelock);
Packit Bot bdf314
  printf ("PASS: No pthread_rwlock_tryrdlock stalls detected.\n");
Packit Bot bdf314
  return 0;
Packit Bot bdf314
}
Packit Bot bdf314
Packit Bot bdf314
#define TIMEOUT 30
Packit Bot bdf314
#include <support/test-driver.c>