Blame sysdeps/alpha/memchr.c

Packit 6c4009
/* Copyright (C) 2010-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <string.h>
Packit 6c4009
Packit 6c4009
typedef unsigned long word;
Packit 6c4009
Packit 6c4009
static inline word
Packit 6c4009
ldq_u(const void *s)
Packit 6c4009
{
Packit 6c4009
  return *(const word *)((word)s & -8);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#define unlikely(X)	__builtin_expect ((X), 0)
Packit 6c4009
#define prefetch(X)	__builtin_prefetch ((void *)(X), 0)
Packit 6c4009
Packit 6c4009
#define cmpbeq0(X)	__builtin_alpha_cmpbge(0, (X))
Packit 6c4009
#define find(X, Y)	cmpbeq0 ((X) ^ (Y))
Packit 6c4009
Packit 6c4009
/* Search no more than N bytes of S for C.  */
Packit 6c4009
Packit 6c4009
void *
Packit 6c4009
__memchr (const void *s, int xc, size_t n)
Packit 6c4009
{
Packit 6c4009
  const word *s_align;
Packit 6c4009
  word t, current, found, mask, offset;
Packit 6c4009
Packit 6c4009
  if (unlikely (n == 0))
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  current = ldq_u (s);
Packit 6c4009
Packit 6c4009
  /* Replicate low byte of XC into all bytes of C.  */
Packit 6c4009
  t = xc & 0xff;			/* 0000000c */
Packit 6c4009
  t = (t << 8) | t;			/* 000000cc */
Packit 6c4009
  t = (t << 16) | t;			/* 0000cccc */
Packit 6c4009
  const word c = (t << 32) | t;		/* cccccccc */
Packit 6c4009
Packit 6c4009
  /* Align the source, and decrement the count by the number
Packit 6c4009
     of bytes searched in the first word.  */
Packit 6c4009
  s_align = (const word *)((word)s & -8);
Packit 6c4009
  {
Packit 6c4009
    size_t inc = n + ((word)s & 7);
Packit 6c4009
    n = inc | -(inc < n);
Packit 6c4009
  }
Packit 6c4009
Packit 6c4009
  /* Deal with misalignment in the first word for the comparison.  */
Packit 6c4009
  mask = (1ul << ((word)s & 7)) - 1;
Packit 6c4009
Packit 6c4009
  /* If the entire string fits within one word, we may need masking
Packit 6c4009
     at both the front and the back of the string.  */
Packit 6c4009
  if (unlikely (n <= 8))
Packit 6c4009
    {
Packit 6c4009
      mask |= -1ul << n;
Packit 6c4009
      goto last_quad;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  found = find (current, c) & ~mask;
Packit 6c4009
  if (unlikely (found))
Packit 6c4009
    goto found_it;
Packit 6c4009
Packit 6c4009
  s_align++;
Packit 6c4009
  n -= 8;
Packit 6c4009
Packit 6c4009
  /* If the block is sufficiently large, align to cacheline and prefetch.  */
Packit 6c4009
  if (unlikely (n >= 256))
Packit 6c4009
    {
Packit 6c4009
      /* Prefetch 3 cache lines beyond the one we're working on.  */
Packit 6c4009
      prefetch (s_align + 8);
Packit 6c4009
      prefetch (s_align + 16);
Packit 6c4009
      prefetch (s_align + 24);
Packit 6c4009
Packit 6c4009
      while ((word)s_align & 63)
Packit 6c4009
	{
Packit 6c4009
	  current = *s_align;
Packit 6c4009
	  found = find (current, c);
Packit 6c4009
	  if (found)
Packit 6c4009
	    goto found_it;
Packit 6c4009
	  s_align++;
Packit 6c4009
	  n -= 8;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
	/* Within each cacheline, advance the load for the next word
Packit 6c4009
	   before the test for the previous word is complete.  This
Packit 6c4009
	   allows us to hide the 3 cycle L1 cache load latency.  We
Packit 6c4009
	   only perform this advance load within a cacheline to prevent
Packit 6c4009
	   reading across page boundary.  */
Packit 6c4009
#define CACHELINE_LOOP				\
Packit 6c4009
	do {					\
Packit 6c4009
	  word i, next = s_align[0];		\
Packit 6c4009
	  for (i = 0; i < 7; ++i)		\
Packit 6c4009
	    {					\
Packit 6c4009
	      current = next;			\
Packit 6c4009
	      next = s_align[1];		\
Packit 6c4009
	      found = find (current, c);	\
Packit 6c4009
	      if (unlikely (found))		\
Packit 6c4009
		goto found_it;			\
Packit 6c4009
	      s_align++;			\
Packit 6c4009
	    }					\
Packit 6c4009
	  current = next;			\
Packit 6c4009
	  found = find (current, c);		\
Packit 6c4009
	  if (unlikely (found))			\
Packit 6c4009
	    goto found_it;			\
Packit 6c4009
	  s_align++;				\
Packit 6c4009
	  n -= 64;				\
Packit 6c4009
	} while (0)
Packit 6c4009
Packit 6c4009
      /* While there's still lots more data to potentially be read,
Packit 6c4009
	 continue issuing prefetches for the 4th cacheline out.  */
Packit 6c4009
      while (n >= 256)
Packit 6c4009
	{
Packit 6c4009
	  prefetch (s_align + 24);
Packit 6c4009
	  CACHELINE_LOOP;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Up to 3 cache lines remaining.  Continue issuing advanced
Packit 6c4009
	 loads, but stop prefetching.  */
Packit 6c4009
      while (n >= 64)
Packit 6c4009
	CACHELINE_LOOP;
Packit 6c4009
Packit 6c4009
      /* We may have exhausted the buffer.  */
Packit 6c4009
      if (n == 0)
Packit 6c4009
	return NULL;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Quadword aligned loop.  */
Packit 6c4009
  current = *s_align;
Packit 6c4009
  while (n > 8)
Packit 6c4009
    {
Packit 6c4009
      found = find (current, c);
Packit 6c4009
      if (unlikely (found))
Packit 6c4009
	goto found_it;
Packit 6c4009
      current = *++s_align;
Packit 6c4009
      n -= 8;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* The last word may need masking at the tail of the compare.  */
Packit 6c4009
  mask = -1ul << n;
Packit 6c4009
 last_quad:
Packit 6c4009
  found = find (current, c) & ~mask;
Packit 6c4009
  if (found == 0)
Packit 6c4009
    return NULL;
Packit 6c4009
Packit 6c4009
 found_it:
Packit 6c4009
#ifdef __alpha_cix__
Packit 6c4009
  offset = __builtin_alpha_cttz (found);
Packit 6c4009
#else
Packit 6c4009
  /* Extract LSB.  */
Packit 6c4009
  found &= -found;
Packit 6c4009
Packit 6c4009
  /* Binary search for the LSB.  */
Packit 6c4009
  offset  = (found & 0x0f ? 0 : 4);
Packit 6c4009
  offset += (found & 0x33 ? 0 : 2);
Packit 6c4009
  offset += (found & 0x55 ? 0 : 1);
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
  return (void *)((word)s_align + offset);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#ifdef weak_alias
Packit 6c4009
weak_alias (__memchr, memchr)
Packit 6c4009
#endif
Packit 6c4009
libc_hidden_builtin_def (memchr)