Blame src/atomic_ops_stack.c

Packit fe9d6e
/*
Packit fe9d6e
 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
Packit fe9d6e
 *
Packit fe9d6e
 * This file may be redistributed and/or modified under the
Packit fe9d6e
 * terms of the GNU General Public License as published by the Free Software
Packit fe9d6e
 * Foundation; either version 2, or (at your option) any later version.
Packit fe9d6e
 *
Packit fe9d6e
 * It is distributed in the hope that it will be useful, but WITHOUT ANY
Packit fe9d6e
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Packit fe9d6e
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License in the
Packit fe9d6e
 * file COPYING for more details.
Packit fe9d6e
 */
Packit fe9d6e
Packit fe9d6e
#if defined(HAVE_CONFIG_H)
Packit fe9d6e
# include "config.h"
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
#include <string.h>
Packit fe9d6e
#include <stdlib.h>
Packit fe9d6e
#include <assert.h>
Packit fe9d6e
Packit fe9d6e
#define AO_REQUIRE_CAS
Packit fe9d6e
#include "atomic_ops_stack.h"
Packit fe9d6e
Packit fe9d6e
/* This function call must be a part of a do-while loop with a CAS      */
Packit fe9d6e
/* designating the condition of the loop (see the use cases below).     */
Packit fe9d6e
#ifdef AO_THREAD_SANITIZER
Packit fe9d6e
  AO_ATTR_NO_SANITIZE_THREAD
Packit fe9d6e
  static void store_before_cas(AO_t *addr, AO_t value)
Packit fe9d6e
  {
Packit fe9d6e
    *addr = value;
Packit fe9d6e
  }
Packit fe9d6e
#else
Packit fe9d6e
# define store_before_cas(addr, value) (void)(*(addr) = (value))
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
#ifdef AO_USE_ALMOST_LOCK_FREE
Packit fe9d6e
Packit fe9d6e
  void AO_pause(int); /* defined in atomic_ops.c */
Packit fe9d6e
Packit fe9d6e
/* LIFO linked lists based on compare-and-swap.  We need to avoid       */
Packit fe9d6e
/* the case of a node deletion and reinsertion while I'm deleting       */
Packit fe9d6e
/* it, since that may cause my CAS to succeed eventhough the next       */
Packit fe9d6e
/* pointer is now wrong.  Our solution is not fully lock-free, but it   */
Packit fe9d6e
/* is good enough for signal handlers, provided we have a suitably low  */
Packit fe9d6e
/* bound on the number of recursive signal handler reentries.           */
Packit fe9d6e
/* A list consists of a first pointer and a blacklist                   */
Packit fe9d6e
/* of pointer values that are currently being removed.  No list element */
Packit fe9d6e
/* on the blacklist may be inserted.  If we would otherwise do so, we   */
Packit fe9d6e
/* are allowed to insert a variant that differs only in the least       */
Packit fe9d6e
/* significant, ignored, bits.  If the list is full, we wait.           */
Packit fe9d6e
Packit fe9d6e
/* Crucial observation: A particular padded pointer x (i.e. pointer     */
Packit fe9d6e
/* plus arbitrary low order bits) can never be newly inserted into      */
Packit fe9d6e
/* a list while it's in the corresponding auxiliary data structure.     */
Packit fe9d6e
Packit fe9d6e
/* The second argument is a pointer to the link field of the element    */
Packit fe9d6e
/* to be inserted.                                                      */
Packit fe9d6e
/* Both list headers and link fields contain "perturbed" pointers, i.e. */
Packit fe9d6e
/* pointers with extra bits "or"ed into the low order bits.             */
Packit fe9d6e
void AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
Packit fe9d6e
                                        AO_stack_aux *a)
Packit fe9d6e
{
Packit fe9d6e
  AO_t x_bits = (AO_t)x;
Packit fe9d6e
  AO_t next;
Packit fe9d6e
Packit fe9d6e
  /* No deletions of x can start here, since x is not currently in the  */
Packit fe9d6e
  /* list.                                                              */
Packit fe9d6e
 retry:
Packit fe9d6e
# if AO_BL_SIZE == 2
Packit fe9d6e
  {
Packit fe9d6e
    /* Start all loads as close to concurrently as possible. */
Packit fe9d6e
    AO_t entry1 = AO_load(&a->AO_stack_bl[0]);
Packit fe9d6e
    AO_t entry2 = AO_load(&a->AO_stack_bl[1]);
Packit fe9d6e
    if (entry1 == x_bits || entry2 == x_bits)
Packit fe9d6e
      {
Packit fe9d6e
        /* Entry is currently being removed.  Change it a little.       */
Packit fe9d6e
          ++x_bits;
Packit fe9d6e
          if ((x_bits & AO_BIT_MASK) == 0)
Packit fe9d6e
            /* Version count overflowed;         */
Packit fe9d6e
            /* EXTREMELY unlikely, but possible. */
Packit fe9d6e
            x_bits = (AO_t)x;
Packit fe9d6e
        goto retry;
Packit fe9d6e
      }
Packit fe9d6e
  }
Packit fe9d6e
# else
Packit fe9d6e
  {
Packit fe9d6e
    int i;
Packit fe9d6e
    for (i = 0; i < AO_BL_SIZE; ++i)
Packit fe9d6e
      {
Packit fe9d6e
        if (AO_load(&a->AO_stack_bl[i]) == x_bits)
Packit fe9d6e
          {
Packit fe9d6e
            /* Entry is currently being removed.  Change it a little.   */
Packit fe9d6e
              ++x_bits;
Packit fe9d6e
              if ((x_bits & AO_BIT_MASK) == 0)
Packit fe9d6e
                /* Version count overflowed;         */
Packit fe9d6e
                /* EXTREMELY unlikely, but possible. */
Packit fe9d6e
                x_bits = (AO_t)x;
Packit fe9d6e
            goto retry;
Packit fe9d6e
          }
Packit fe9d6e
      }
Packit fe9d6e
  }
Packit fe9d6e
# endif
Packit fe9d6e
  /* x_bits is not currently being deleted */
Packit fe9d6e
  do
Packit fe9d6e
    {
Packit fe9d6e
      next = AO_load(list);
Packit fe9d6e
      store_before_cas(x, next);
Packit fe9d6e
    }
Packit fe9d6e
  while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, next, x_bits)));
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
/*
Packit fe9d6e
 * I concluded experimentally that checking a value first before
Packit fe9d6e
 * performing a compare-and-swap is usually beneficial on X86, but
Packit fe9d6e
 * slows things down appreciably with contention on Itanium.
Packit fe9d6e
 * Since the Itanium behavior makes more sense to me (more cache line
Packit fe9d6e
 * movement unless we're mostly reading, but back-off should guard
Packit fe9d6e
 * against that), we take Itanium as the default.  Measurements on
Packit fe9d6e
 * other multiprocessor architectures would be useful.  (On a uniprocessor,
Packit fe9d6e
 * the initial check is almost certainly a very small loss.) - HB
Packit fe9d6e
 */
Packit fe9d6e
#ifdef __i386__
Packit fe9d6e
# define PRECHECK(a) (a) == 0 &&
Packit fe9d6e
#else
Packit fe9d6e
# define PRECHECK(a)
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
/* This function is used before CAS in the below AO_stack_pop() and the */
Packit fe9d6e
/* data race (reported by TSan) is OK because it results in a retry.    */
Packit fe9d6e
#ifdef AO_THREAD_SANITIZER
Packit fe9d6e
  AO_ATTR_NO_SANITIZE_THREAD
Packit fe9d6e
  static AO_t AO_load_next(volatile AO_t *first_ptr)
Packit fe9d6e
  {
Packit fe9d6e
    /* Assuming an architecture on which loads of word type are atomic. */
Packit fe9d6e
    /* AO_load cannot be used here because it cannot be instructed to   */
Packit fe9d6e
    /* suppress the warning about the race.                             */
Packit fe9d6e
    return *first_ptr;
Packit fe9d6e
  }
Packit fe9d6e
#else
Packit fe9d6e
# define AO_load_next AO_load
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
AO_t *
Packit fe9d6e
AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux * a)
Packit fe9d6e
{
Packit fe9d6e
  unsigned i;
Packit fe9d6e
  int j = 0;
Packit fe9d6e
  AO_t first;
Packit fe9d6e
  AO_t * first_ptr;
Packit fe9d6e
  AO_t next;
Packit fe9d6e
Packit fe9d6e
 retry:
Packit fe9d6e
  first = AO_load(list);
Packit fe9d6e
  if (0 == first) return 0;
Packit fe9d6e
  /* Insert first into aux black list.                                  */
Packit fe9d6e
  /* This may spin if more than AO_BL_SIZE removals using auxiliary     */
Packit fe9d6e
  /* structure a are currently in progress.                             */
Packit fe9d6e
  for (i = 0; ; )
Packit fe9d6e
    {
Packit fe9d6e
      if (PRECHECK(a -> AO_stack_bl[i])
Packit fe9d6e
          AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
Packit fe9d6e
        break;
Packit fe9d6e
      ++i;
Packit fe9d6e
      if ( i >= AO_BL_SIZE )
Packit fe9d6e
        {
Packit fe9d6e
          i = 0;
Packit fe9d6e
          AO_pause(++j);
Packit fe9d6e
        }
Packit fe9d6e
    }
Packit fe9d6e
  assert(i < AO_BL_SIZE);
Packit fe9d6e
# ifndef AO_THREAD_SANITIZER
Packit fe9d6e
    assert(a -> AO_stack_bl[i] == first);
Packit fe9d6e
                                /* No actual race with the above CAS.   */
Packit fe9d6e
# endif
Packit fe9d6e
  /* First is on the auxiliary black list.  It may be removed by        */
Packit fe9d6e
  /* another thread before we get to it, but a new insertion of x       */
Packit fe9d6e
  /* cannot be started here.                                            */
Packit fe9d6e
  /* Only we can remove it from the black list.                         */
Packit fe9d6e
  /* We need to make sure that first is still the first entry on the    */
Packit fe9d6e
  /* list.  Otherwise it's possible that a reinsertion of it was        */
Packit fe9d6e
  /* already started before we added the black list entry.              */
Packit fe9d6e
# if defined(__alpha__) && (__GNUC__ == 4)
Packit fe9d6e
    if (first != AO_load_acquire(list))
Packit fe9d6e
                        /* Workaround __builtin_expect bug found in     */
Packit fe9d6e
                        /* gcc-4.6.3/alpha causing test_stack failure.  */
Packit fe9d6e
# else
Packit fe9d6e
    if (AO_EXPECT_FALSE(first != AO_load_acquire(list)))
Packit fe9d6e
                        /* Workaround test failure on AIX, at least, by */
Packit fe9d6e
                        /* using acquire ordering semantics for this    */
Packit fe9d6e
                        /* load.  Probably, it is not the right fix.    */
Packit fe9d6e
# endif
Packit fe9d6e
  {
Packit fe9d6e
    AO_store_release(a->AO_stack_bl+i, 0);
Packit fe9d6e
    goto retry;
Packit fe9d6e
  }
Packit fe9d6e
  first_ptr = AO_REAL_NEXT_PTR(first);
Packit fe9d6e
  next = AO_load_next(first_ptr);
Packit fe9d6e
# if defined(__alpha__) && (__GNUC__ == 4)
Packit fe9d6e
    if (!AO_compare_and_swap_release(list, first, next))
Packit fe9d6e
# else
Packit fe9d6e
    if (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, first, next)))
Packit fe9d6e
# endif
Packit fe9d6e
  {
Packit fe9d6e
    AO_store_release(a->AO_stack_bl+i, 0);
Packit fe9d6e
    goto retry;
Packit fe9d6e
  }
Packit fe9d6e
# ifndef AO_THREAD_SANITIZER
Packit fe9d6e
    assert(*list != first); /* No actual race with the above CAS.       */
Packit fe9d6e
# endif
Packit fe9d6e
  /* Since we never insert an entry on the black list, this cannot have */
Packit fe9d6e
  /* succeeded unless first remained on the list while we were running. */
Packit fe9d6e
  /* Thus its next link cannot have changed out from under us, and we   */
Packit fe9d6e
  /* removed exactly one entry and preserved the rest of the list.      */
Packit fe9d6e
  /* Note that it is quite possible that an additional entry was        */
Packit fe9d6e
  /* inserted and removed while we were running; this is OK since the   */
Packit fe9d6e
  /* part of the list following first must have remained unchanged, and */
Packit fe9d6e
  /* first must again have been at the head of the list when the        */
Packit fe9d6e
  /* compare_and_swap succeeded.                                        */
Packit fe9d6e
  AO_store_release(a->AO_stack_bl+i, 0);
Packit fe9d6e
  return first_ptr;
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
#else /* ! USE_ALMOST_LOCK_FREE */
Packit fe9d6e
Packit fe9d6e
/* The functionality is the same as of AO_load_next but the atomicity   */
Packit fe9d6e
/* is not needed.  The usage is similar to that of store_before_cas.    */
Packit fe9d6e
#ifdef AO_THREAD_SANITIZER
Packit fe9d6e
  /* TODO: If compiled by Clang (as of clang-4.0) with -O3 flag,        */
Packit fe9d6e
  /* no_sanitize attribute is ignored unless the argument is volatile.  */
Packit fe9d6e
# if defined(__clang__)
Packit fe9d6e
#   define LOAD_BEFORE_CAS_VOLATILE volatile
Packit fe9d6e
# else
Packit fe9d6e
#   define LOAD_BEFORE_CAS_VOLATILE /* empty */
Packit fe9d6e
# endif
Packit fe9d6e
  AO_ATTR_NO_SANITIZE_THREAD
Packit fe9d6e
  static AO_t load_before_cas(const LOAD_BEFORE_CAS_VOLATILE AO_t *addr)
Packit fe9d6e
  {
Packit fe9d6e
    return *addr;
Packit fe9d6e
  }
Packit fe9d6e
#else
Packit fe9d6e
# define load_before_cas(addr) (*(addr))
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
/* Better names for fields in AO_stack_t */
Packit fe9d6e
#define ptr AO_val2
Packit fe9d6e
#define version AO_val1
Packit fe9d6e
Packit fe9d6e
#if defined(AO_HAVE_compare_double_and_swap_double) \
Packit fe9d6e
    && !(defined(AO_STACK_PREFER_CAS_DOUBLE) \
Packit fe9d6e
         && defined(AO_HAVE_compare_and_swap_double))
Packit fe9d6e
Packit fe9d6e
#ifdef LINT2
Packit fe9d6e
  volatile /* non-static */ AO_t AO_noop_sink;
Packit fe9d6e
#endif
Packit fe9d6e
Packit fe9d6e
void AO_stack_push_release(AO_stack_t *list, AO_t *element)
Packit fe9d6e
{
Packit fe9d6e
    AO_t next;
Packit fe9d6e
Packit fe9d6e
    do {
Packit fe9d6e
      next = AO_load(&(list -> ptr));
Packit fe9d6e
      store_before_cas(element, next);
Packit fe9d6e
    } while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(&(list -> ptr),
Packit fe9d6e
                                                      next, (AO_t)element)));
Packit fe9d6e
    /* This uses a narrow CAS here, an old optimization suggested       */
Packit fe9d6e
    /* by Treiber.  Pop is still safe, since we run into the ABA        */
Packit fe9d6e
    /* problem only if there were both intervening "pop"s and "push"es. */
Packit fe9d6e
    /* In that case we still see a change in the version number.        */
Packit fe9d6e
#   ifdef LINT2
Packit fe9d6e
      /* Instruct static analyzer that element is not lost. */
Packit fe9d6e
      AO_noop_sink = (AO_t)element;
Packit fe9d6e
#   endif
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
AO_t *AO_stack_pop_acquire(AO_stack_t *list)
Packit fe9d6e
{
Packit fe9d6e
#   if defined(__clang__) && !AO_CLANG_PREREQ(3, 5)
Packit fe9d6e
      AO_t *volatile cptr;
Packit fe9d6e
                        /* Use volatile to workaround a bug in          */
Packit fe9d6e
                        /* clang-1.1/x86 causing test_stack failure.    */
Packit fe9d6e
#   else
Packit fe9d6e
      AO_t *cptr;
Packit fe9d6e
#   endif
Packit fe9d6e
    AO_t next;
Packit fe9d6e
    AO_t cversion;
Packit fe9d6e
Packit fe9d6e
    do {
Packit fe9d6e
      /* Version must be loaded first.  */
Packit fe9d6e
      cversion = AO_load_acquire(&(list -> version));
Packit fe9d6e
      cptr = (AO_t *)AO_load(&(list -> ptr));
Packit fe9d6e
      if (NULL == cptr)
Packit fe9d6e
        return NULL;
Packit fe9d6e
      next = load_before_cas((AO_t *)cptr);
Packit fe9d6e
    } while (AO_EXPECT_FALSE(!AO_compare_double_and_swap_double_release(list,
Packit fe9d6e
                                        cversion, (AO_t)cptr,
Packit fe9d6e
                                        cversion+1, (AO_t)next)));
Packit fe9d6e
    return cptr;
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
Packit fe9d6e
#elif defined(AO_HAVE_compare_and_swap_double)
Packit fe9d6e
Packit fe9d6e
/* Needed for future IA64 processors.  No current clients? */
Packit fe9d6e
/* TODO: Not tested thoroughly. */
Packit fe9d6e
Packit fe9d6e
/* We have a wide CAS, but only does an AO_t-wide comparison.   */
Packit fe9d6e
/* We can't use the Treiber optimization, since we only check   */
Packit fe9d6e
/* for an unchanged version number, not an unchanged pointer.   */
Packit fe9d6e
void AO_stack_push_release(AO_stack_t *list, AO_t *element)
Packit fe9d6e
{
Packit fe9d6e
    AO_t version;
Packit fe9d6e
Packit fe9d6e
    do {
Packit fe9d6e
      AO_t next_ptr;
Packit fe9d6e
Packit fe9d6e
      /* Again version must be loaded first, for different reason.      */
Packit fe9d6e
      version = AO_load_acquire(&(list -> version));
Packit fe9d6e
      next_ptr = AO_load(&(list -> ptr));
Packit fe9d6e
      store_before_cas(element, next_ptr);
Packit fe9d6e
    } while (!AO_compare_and_swap_double_release(
Packit fe9d6e
                           list, version,
Packit fe9d6e
                           version+1, (AO_t) element));
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
AO_t *AO_stack_pop_acquire(AO_stack_t *list)
Packit fe9d6e
{
Packit fe9d6e
    AO_t *cptr;
Packit fe9d6e
    AO_t next;
Packit fe9d6e
    AO_t cversion;
Packit fe9d6e
Packit fe9d6e
    do {
Packit fe9d6e
      cversion = AO_load_acquire(&(list -> version));
Packit fe9d6e
      cptr = (AO_t *)AO_load(&(list -> ptr));
Packit fe9d6e
      if (NULL == cptr)
Packit fe9d6e
        return NULL;
Packit fe9d6e
      next = load_before_cas(cptr);
Packit fe9d6e
    } while (!AO_compare_double_and_swap_double_release(list,
Packit fe9d6e
                                                cversion, (AO_t)cptr,
Packit fe9d6e
                                                cversion+1, next));
Packit fe9d6e
    return cptr;
Packit fe9d6e
}
Packit fe9d6e
Packit fe9d6e
#endif /* AO_HAVE_compare_and_swap_double */
Packit fe9d6e
Packit fe9d6e
#endif /* ! USE_ALMOST_LOCK_FREE */