Blame sysdeps/x86/cacheinfo.c

Packit 6c4009
/* x86_64 cache info.
Packit 6c4009
   Copyright (C) 2003-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#if IS_IN (libc)
Packit 6c4009
Packit 6c4009
#include <assert.h>
Packit 6c4009
#include <stdbool.h>
Packit 6c4009
#include <stdlib.h>
Packit 6c4009
#include <unistd.h>
Packit 6c4009
#include <cpuid.h>
Packit 6c4009
#include <init-arch.h>
Packit 6c4009
Packit 6c4009
static const struct intel_02_cache_info
Packit 6c4009
{
Packit 6c4009
  unsigned char idx;
Packit 6c4009
  unsigned char assoc;
Packit 6c4009
  unsigned char linesize;
Packit 6c4009
  unsigned char rel_name;
Packit 6c4009
  unsigned int size;
Packit 6c4009
} intel_02_known [] =
Packit 6c4009
  {
Packit 6c4009
#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
Packit 6c4009
    { 0x06,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),    8192 },
Packit 6c4009
    { 0x08,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),   16384 },
Packit 6c4009
    { 0x09,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),   32768 },
Packit 6c4009
    { 0x0a,  2, 32, M(_SC_LEVEL1_DCACHE_SIZE),    8192 },
Packit 6c4009
    { 0x0c,  4, 32, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
Packit 6c4009
    { 0x0d,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
Packit 6c4009
    { 0x0e,  6, 64, M(_SC_LEVEL1_DCACHE_SIZE),   24576 },
Packit 6c4009
    { 0x21,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x22,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x23,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0x25,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0x29,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0x2c,  8, 64, M(_SC_LEVEL1_DCACHE_SIZE),   32768 },
Packit 6c4009
    { 0x30,  8, 64, M(_SC_LEVEL1_ICACHE_SIZE),   32768 },
Packit 6c4009
    { 0x39,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
Packit 6c4009
    { 0x3a,  6, 64, M(_SC_LEVEL2_CACHE_SIZE),   196608 },
Packit 6c4009
    { 0x3b,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
Packit 6c4009
    { 0x3c,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x3d,  6, 64, M(_SC_LEVEL2_CACHE_SIZE),   393216 },
Packit 6c4009
    { 0x3e,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x3f,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x41,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
Packit 6c4009
    { 0x42,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x43,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x44,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0x45,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0x46,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0x47,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
Packit 6c4009
    { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE),  3145728 },
Packit 6c4009
    { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  6291456 },
Packit 6c4009
    { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
Packit 6c4009
    { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
Packit 6c4009
    { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
Packit 6c4009
    { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE),  6291456 },
Packit 6c4009
    { 0x60,  8, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
Packit 6c4009
    { 0x66,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),    8192 },
Packit 6c4009
    { 0x67,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
Packit 6c4009
    { 0x68,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   32768 },
Packit 6c4009
    { 0x78,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0x79,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
Packit 6c4009
    { 0x7a,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x7b,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x7c,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0x7d,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0x7f,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x80,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x82,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
Packit 6c4009
    { 0x83,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x84,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0x85,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0x86,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0x87,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0xd0,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),   524288 },
Packit 6c4009
    { 0xd1,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0xd2,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0xd6,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
Packit 6c4009
    { 0xd7,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0xd8,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
Packit 6c4009
    { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
Packit 6c4009
    { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
Packit 6c4009
    { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
Packit 6c4009
    { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
Packit 6c4009
    { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
Packit 6c4009
    { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
Packit 6c4009
  };
Packit 6c4009
Packit 6c4009
#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
Packit 6c4009
Packit 6c4009
static int
Packit 6c4009
intel_02_known_compare (const void *p1, const void *p2)
Packit 6c4009
{
Packit 6c4009
  const struct intel_02_cache_info *i1;
Packit 6c4009
  const struct intel_02_cache_info *i2;
Packit 6c4009
Packit 6c4009
  i1 = (const struct intel_02_cache_info *) p1;
Packit 6c4009
  i2 = (const struct intel_02_cache_info *) p2;
Packit 6c4009
Packit 6c4009
  if (i1->idx == i2->idx)
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  return i1->idx < i2->idx ? -1 : 1;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static long int
Packit 6c4009
__attribute__ ((noinline))
Packit 6c4009
intel_check_word (int name, unsigned int value, bool *has_level_2,
Packit 6c4009
		  bool *no_level_2_or_3,
Packit 6c4009
		  const struct cpu_features *cpu_features)
Packit 6c4009
{
Packit 6c4009
  if ((value & 0x80000000) != 0)
Packit 6c4009
    /* The register value is reserved.  */
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  /* Fold the name.  The _SC_ constants are always in the order SIZE,
Packit 6c4009
     ASSOC, LINESIZE.  */
Packit 6c4009
  int folded_rel_name = (M(name) / 3) * 3;
Packit 6c4009
Packit 6c4009
  while (value != 0)
Packit 6c4009
    {
Packit 6c4009
      unsigned int byte = value & 0xff;
Packit 6c4009
Packit 6c4009
      if (byte == 0x40)
Packit 6c4009
	{
Packit 6c4009
	  *no_level_2_or_3 = true;
Packit 6c4009
Packit 6c4009
	  if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
Packit 6c4009
	    /* No need to look further.  */
Packit 6c4009
	    break;
Packit 6c4009
	}
Packit 6c4009
      else if (byte == 0xff)
Packit 6c4009
	{
Packit 6c4009
	  /* CPUID leaf 0x4 contains all the information.  We need to
Packit 6c4009
	     iterate over it.  */
Packit 6c4009
	  unsigned int eax;
Packit 6c4009
	  unsigned int ebx;
Packit 6c4009
	  unsigned int ecx;
Packit 6c4009
	  unsigned int edx;
Packit 6c4009
Packit 6c4009
	  unsigned int round = 0;
Packit 6c4009
	  while (1)
Packit 6c4009
	    {
Packit 6c4009
	      __cpuid_count (4, round, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
	      enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
Packit 6c4009
	      if (type == null)
Packit 6c4009
		/* That was the end.  */
Packit 6c4009
		break;
Packit 6c4009
Packit 6c4009
	      unsigned int level = (eax >> 5) & 0x7;
Packit 6c4009
Packit 6c4009
	      if ((level == 1 && type == data
Packit 6c4009
		   && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
Packit 6c4009
		  || (level == 1 && type == inst
Packit 6c4009
		      && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
Packit 6c4009
		  || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
Packit 6c4009
		  || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
Packit 6c4009
		  || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
Packit 6c4009
		{
Packit 6c4009
		  unsigned int offset = M(name) - folded_rel_name;
Packit 6c4009
Packit 6c4009
		  if (offset == 0)
Packit 6c4009
		    /* Cache size.  */
Packit 6c4009
		    return (((ebx >> 22) + 1)
Packit 6c4009
			    * (((ebx >> 12) & 0x3ff) + 1)
Packit 6c4009
			    * ((ebx & 0xfff) + 1)
Packit 6c4009
			    * (ecx + 1));
Packit 6c4009
		  if (offset == 1)
Packit 6c4009
		    return (ebx >> 22) + 1;
Packit 6c4009
Packit 6c4009
		  assert (offset == 2);
Packit 6c4009
		  return (ebx & 0xfff) + 1;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      ++round;
Packit 6c4009
	    }
Packit 6c4009
	  /* There is no other cache information anywhere else.  */
Packit 6c4009
	  break;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
Packit 6c4009
	    {
Packit 6c4009
	      /* Intel reused this value.  For family 15, model 6 it
Packit 6c4009
		 specifies the 3rd level cache.  Otherwise the 2nd
Packit 6c4009
		 level cache.  */
Packit 6c4009
	      unsigned int family = cpu_features->family;
Packit 6c4009
	      unsigned int model = cpu_features->model;
Packit 6c4009
Packit 6c4009
	      if (family == 15 && model == 6)
Packit 6c4009
		{
Packit 6c4009
		  /* The level 3 cache is encoded for this model like
Packit 6c4009
		     the level 2 cache is for other models.  Pretend
Packit 6c4009
		     the caller asked for the level 2 cache.  */
Packit 6c4009
		  name = (_SC_LEVEL2_CACHE_SIZE
Packit 6c4009
			  + (name - _SC_LEVEL3_CACHE_SIZE));
Packit 6c4009
		  folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
Packit 6c4009
		}
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  struct intel_02_cache_info *found;
Packit 6c4009
	  struct intel_02_cache_info search;
Packit 6c4009
Packit 6c4009
	  search.idx = byte;
Packit 6c4009
	  found = bsearch (&search, intel_02_known, nintel_02_known,
Packit 6c4009
			   sizeof (intel_02_known[0]), intel_02_known_compare);
Packit 6c4009
	  if (found != NULL)
Packit 6c4009
	    {
Packit 6c4009
	      if (found->rel_name == folded_rel_name)
Packit 6c4009
		{
Packit 6c4009
		  unsigned int offset = M(name) - folded_rel_name;
Packit 6c4009
Packit 6c4009
		  if (offset == 0)
Packit 6c4009
		    /* Cache size.  */
Packit 6c4009
		    return found->size;
Packit 6c4009
		  if (offset == 1)
Packit 6c4009
		    return found->assoc;
Packit 6c4009
Packit 6c4009
		  assert (offset == 2);
Packit 6c4009
		  return found->linesize;
Packit 6c4009
		}
Packit 6c4009
Packit 6c4009
	      if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
Packit 6c4009
		*has_level_2 = true;
Packit 6c4009
	    }
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Next byte for the next round.  */
Packit 6c4009
      value >>= 8;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* Nothing found.  */
Packit 6c4009
  return 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static long int __attribute__ ((noinline))
Packit 6c4009
handle_intel (int name, const struct cpu_features *cpu_features)
Packit 6c4009
{
Packit 6c4009
  unsigned int maxidx = cpu_features->max_cpuid;
Packit 6c4009
Packit 6c4009
  /* Return -1 for older CPUs.  */
Packit 6c4009
  if (maxidx < 2)
Packit 6c4009
    return -1;
Packit 6c4009
Packit 6c4009
  /* OK, we can use the CPUID instruction to get all info about the
Packit 6c4009
     caches.  */
Packit 6c4009
  unsigned int cnt = 0;
Packit 6c4009
  unsigned int max = 1;
Packit 6c4009
  long int result = 0;
Packit 6c4009
  bool no_level_2_or_3 = false;
Packit 6c4009
  bool has_level_2 = false;
Packit 6c4009
Packit 6c4009
  while (cnt++ < max)
Packit 6c4009
    {
Packit 6c4009
      unsigned int eax;
Packit 6c4009
      unsigned int ebx;
Packit 6c4009
      unsigned int ecx;
Packit 6c4009
      unsigned int edx;
Packit 6c4009
      __cpuid (2, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
      /* The low byte of EAX in the first round contain the number of
Packit 6c4009
	 rounds we have to make.  At least one, the one we are already
Packit 6c4009
	 doing.  */
Packit 6c4009
      if (cnt == 1)
Packit 6c4009
	{
Packit 6c4009
	  max = eax & 0xff;
Packit 6c4009
	  eax &= 0xffffff00;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Process the individual registers' value.  */
Packit 6c4009
      result = intel_check_word (name, eax, &has_level_2,
Packit 6c4009
				 &no_level_2_or_3, cpu_features);
Packit 6c4009
      if (result != 0)
Packit 6c4009
	return result;
Packit 6c4009
Packit 6c4009
      result = intel_check_word (name, ebx, &has_level_2,
Packit 6c4009
				 &no_level_2_or_3, cpu_features);
Packit 6c4009
      if (result != 0)
Packit 6c4009
	return result;
Packit 6c4009
Packit 6c4009
      result = intel_check_word (name, ecx, &has_level_2,
Packit 6c4009
				 &no_level_2_or_3, cpu_features);
Packit 6c4009
      if (result != 0)
Packit 6c4009
	return result;
Packit 6c4009
Packit 6c4009
      result = intel_check_word (name, edx, &has_level_2,
Packit 6c4009
				 &no_level_2_or_3, cpu_features);
Packit 6c4009
      if (result != 0)
Packit 6c4009
	return result;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
Packit 6c4009
      && no_level_2_or_3)
Packit 6c4009
    return -1;
Packit 6c4009
Packit 6c4009
  return 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
static long int __attribute__ ((noinline))
Packit 6c4009
handle_amd (int name)
Packit 6c4009
{
Packit 6c4009
  unsigned int eax;
Packit 6c4009
  unsigned int ebx;
Packit 6c4009
  unsigned int ecx;
Packit 6c4009
  unsigned int edx;
Packit 6c4009
  __cpuid (0x80000000, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
  /* No level 4 cache (yet).  */
Packit 6c4009
  if (name > _SC_LEVEL3_CACHE_LINESIZE)
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
Packit 6c4009
  if (eax < fn)
Packit 6c4009
    return 0;
Packit 6c4009
Packit 6c4009
  __cpuid (fn, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
  if (name < _SC_LEVEL1_DCACHE_SIZE)
Packit 6c4009
    {
Packit 6c4009
      name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
Packit 6c4009
      ecx = edx;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  switch (name)
Packit 6c4009
    {
Packit 6c4009
    case _SC_LEVEL1_DCACHE_SIZE:
Packit 6c4009
      return (ecx >> 14) & 0x3fc00;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL1_DCACHE_ASSOC:
Packit 6c4009
      ecx >>= 16;
Packit 6c4009
      if ((ecx & 0xff) == 0xff)
Packit 6c4009
	/* Fully associative.  */
Packit 6c4009
	return (ecx << 2) & 0x3fc00;
Packit 6c4009
      return ecx & 0xff;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL1_DCACHE_LINESIZE:
Packit 6c4009
      return ecx & 0xff;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL2_CACHE_SIZE:
Packit 6c4009
      return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL2_CACHE_ASSOC:
Packit 6c4009
      switch ((ecx >> 12) & 0xf)
Packit 6c4009
	{
Packit 6c4009
	case 0:
Packit 6c4009
	case 1:
Packit 6c4009
	case 2:
Packit 6c4009
	case 4:
Packit 6c4009
	  return (ecx >> 12) & 0xf;
Packit 6c4009
	case 6:
Packit 6c4009
	  return 8;
Packit 6c4009
	case 8:
Packit 6c4009
	  return 16;
Packit 6c4009
	case 10:
Packit 6c4009
	  return 32;
Packit 6c4009
	case 11:
Packit 6c4009
	  return 48;
Packit 6c4009
	case 12:
Packit 6c4009
	  return 64;
Packit 6c4009
	case 13:
Packit 6c4009
	  return 96;
Packit 6c4009
	case 14:
Packit 6c4009
	  return 128;
Packit 6c4009
	case 15:
Packit 6c4009
	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
Packit 6c4009
	default:
Packit 6c4009
	  return 0;
Packit 6c4009
	}
Packit 6c4009
      /* NOTREACHED */
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL2_CACHE_LINESIZE:
Packit 6c4009
      return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL3_CACHE_SIZE:
Packit 6c4009
      return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL3_CACHE_ASSOC:
Packit 6c4009
      switch ((edx >> 12) & 0xf)
Packit 6c4009
	{
Packit 6c4009
	case 0:
Packit 6c4009
	case 1:
Packit 6c4009
	case 2:
Packit 6c4009
	case 4:
Packit 6c4009
	  return (edx >> 12) & 0xf;
Packit 6c4009
	case 6:
Packit 6c4009
	  return 8;
Packit 6c4009
	case 8:
Packit 6c4009
	  return 16;
Packit 6c4009
	case 10:
Packit 6c4009
	  return 32;
Packit 6c4009
	case 11:
Packit 6c4009
	  return 48;
Packit 6c4009
	case 12:
Packit 6c4009
	  return 64;
Packit 6c4009
	case 13:
Packit 6c4009
	  return 96;
Packit 6c4009
	case 14:
Packit 6c4009
	  return 128;
Packit 6c4009
	case 15:
Packit 6c4009
	  return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
Packit 6c4009
	default:
Packit 6c4009
	  return 0;
Packit 6c4009
	}
Packit 6c4009
      /* NOTREACHED */
Packit 6c4009
Packit 6c4009
    case _SC_LEVEL3_CACHE_LINESIZE:
Packit 6c4009
      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
Packit 6c4009
Packit 6c4009
    default:
Packit 6c4009
      assert (! "cannot happen");
Packit 6c4009
    }
Packit 6c4009
  return -1;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Get the value of the system variable NAME.  */
Packit 6c4009
long int
Packit 6c4009
attribute_hidden
Packit 6c4009
__cache_sysconf (int name)
Packit 6c4009
{
Packit 6c4009
  const struct cpu_features *cpu_features = __get_cpu_features ();
Packit 6c4009
Packit 6c4009
  if (cpu_features->kind == arch_kind_intel)
Packit 6c4009
    return handle_intel (name, cpu_features);
Packit 6c4009
Packit 6c4009
  if (cpu_features->kind == arch_kind_amd)
Packit 6c4009
    return handle_amd (name);
Packit 6c4009
Packit 6c4009
  // XXX Fill in more vendors.
Packit 6c4009
Packit 6c4009
  /* CPU not known, we have no information.  */
Packit 6c4009
  return 0;
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* Data cache size for use in memory and string routines, typically
Packit 6c4009
   L1 size, rounded to multiple of 256 bytes.  */
Packit 6c4009
long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
Packit 6c4009
long int __x86_data_cache_size attribute_hidden = 32 * 1024;
Packit 6c4009
/* Similar to __x86_data_cache_size_half, but not rounded.  */
Packit 6c4009
long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
Packit 6c4009
/* Similar to __x86_data_cache_size, but not rounded.  */
Packit 6c4009
long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
Packit 6c4009
/* Shared cache size for use in memory and string routines, typically
Packit 6c4009
   L2 or L3 size, rounded to multiple of 256 bytes.  */
Packit 6c4009
long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
Packit 6c4009
long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
Packit 6c4009
/* Similar to __x86_shared_cache_size_half, but not rounded.  */
Packit 6c4009
long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
Packit 6c4009
/* Similar to __x86_shared_cache_size, but not rounded.  */
Packit 6c4009
long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
Packit 6c4009
Packit 6c4009
/* Threshold to use non temporal store.  */
Packit 6c4009
long int __x86_shared_non_temporal_threshold attribute_hidden;
Packit 6c4009
Packit 6c4009
#ifndef DISABLE_PREFETCHW
Packit 6c4009
/* PREFETCHW support flag for use in memory and string routines.  */
Packit 6c4009
int __x86_prefetchw attribute_hidden;
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
Packit 6c4009
static void
Packit 6c4009
__attribute__((constructor))
Packit 6c4009
init_cacheinfo (void)
Packit 6c4009
{
Packit 6c4009
  /* Find out what brand of processor.  */
Packit 6c4009
  unsigned int eax;
Packit 6c4009
  unsigned int ebx;
Packit 6c4009
  unsigned int ecx;
Packit 6c4009
  unsigned int edx;
Packit 6c4009
  int max_cpuid_ex;
Packit 6c4009
  long int data = -1;
Packit 6c4009
  long int shared = -1;
Packit 6c4009
  unsigned int level;
Packit 6c4009
  unsigned int threads = 0;
Packit 6c4009
  const struct cpu_features *cpu_features = __get_cpu_features ();
Packit 6c4009
  int max_cpuid = cpu_features->max_cpuid;
Packit 6c4009
Packit 6c4009
  if (cpu_features->kind == arch_kind_intel)
Packit 6c4009
    {
Packit 6c4009
      data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
Packit 6c4009
Packit 6c4009
      long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
Packit 6c4009
      bool inclusive_cache = true;
Packit 6c4009
Packit 6c4009
      /* Try L3 first.  */
Packit 6c4009
      level  = 3;
Packit 6c4009
      shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
Packit 6c4009
Packit 6c4009
      /* Number of logical processors sharing L2 cache.  */
Packit 6c4009
      int threads_l2;
Packit 6c4009
Packit 6c4009
      /* Number of logical processors sharing L3 cache.  */
Packit 6c4009
      int threads_l3;
Packit 6c4009
Packit 6c4009
      if (shared <= 0)
Packit 6c4009
	{
Packit 6c4009
	  /* Try L2 otherwise.  */
Packit 6c4009
	  level  = 2;
Packit 6c4009
	  shared = core;
Packit 6c4009
	  threads_l2 = 0;
Packit 6c4009
	  threads_l3 = -1;
Packit 6c4009
	}
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  threads_l2 = 0;
Packit 6c4009
	  threads_l3 = 0;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* A value of 0 for the HTT bit indicates there is only a single
Packit 6c4009
	 logical processor.  */
Packit 6c4009
      if (HAS_CPU_FEATURE (HTT))
Packit 6c4009
	{
Packit 6c4009
	  /* Figure out the number of logical threads that share the
Packit 6c4009
	     highest cache level.  */
Packit 6c4009
	  if (max_cpuid >= 4)
Packit 6c4009
	    {
Packit 6c4009
	      unsigned int family = cpu_features->family;
Packit 6c4009
	      unsigned int model = cpu_features->model;
Packit 6c4009
Packit 6c4009
	      int i = 0;
Packit 6c4009
Packit 6c4009
	      /* Query until cache level 2 and 3 are enumerated.  */
Packit 6c4009
	      int check = 0x1 | (threads_l3 == 0) << 1;
Packit 6c4009
	      do
Packit 6c4009
		{
Packit 6c4009
		  __cpuid_count (4, i++, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
		  /* There seems to be a bug in at least some Pentium Ds
Packit 6c4009
		     which sometimes fail to iterate all cache parameters.
Packit 6c4009
		     Do not loop indefinitely here, stop in this case and
Packit 6c4009
		     assume there is no such information.  */
Packit 6c4009
		  if ((eax & 0x1f) == 0)
Packit 6c4009
		    goto intel_bug_no_cache_info;
Packit 6c4009
Packit 6c4009
		  switch ((eax >> 5) & 0x7)
Packit 6c4009
		    {
Packit 6c4009
		    default:
Packit 6c4009
		      break;
Packit 6c4009
		    case 2:
Packit 6c4009
		      if ((check & 0x1))
Packit 6c4009
			{
Packit 6c4009
			  /* Get maximum number of logical processors
Packit 6c4009
			     sharing L2 cache.  */
Packit 6c4009
			  threads_l2 = (eax >> 14) & 0x3ff;
Packit 6c4009
			  check &= ~0x1;
Packit 6c4009
			}
Packit 6c4009
		      break;
Packit 6c4009
		    case 3:
Packit 6c4009
		      if ((check & (0x1 << 1)))
Packit 6c4009
			{
Packit 6c4009
			  /* Get maximum number of logical processors
Packit 6c4009
			     sharing L3 cache.  */
Packit 6c4009
			  threads_l3 = (eax >> 14) & 0x3ff;
Packit 6c4009
Packit 6c4009
			  /* Check if L2 and L3 caches are inclusive.  */
Packit 6c4009
			  inclusive_cache = (edx & 0x2) != 0;
Packit 6c4009
			  check &= ~(0x1 << 1);
Packit 6c4009
			}
Packit 6c4009
		      break;
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
	      while (check);
Packit 6c4009
Packit 6c4009
	      /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
Packit 6c4009
		 numbers of addressable IDs for logical processors sharing
Packit 6c4009
		 the cache, instead of the maximum number of threads
Packit 6c4009
		 sharing the cache.  */
Packit 6c4009
	      if (max_cpuid >= 11)
Packit 6c4009
		{
Packit 6c4009
		  /* Find the number of logical processors shipped in
Packit 6c4009
		     one core and apply count mask.  */
Packit 6c4009
		  i = 0;
Packit 6c4009
Packit 6c4009
		  /* Count SMT only if there is L3 cache.  Always count
Packit 6c4009
		     core if there is no L3 cache.  */
Packit 6c4009
		  int count = ((threads_l2 > 0 && level == 3)
Packit 6c4009
			       | ((threads_l3 > 0
Packit 6c4009
				   || (threads_l2 > 0 && level == 2)) << 1));
Packit 6c4009
Packit 6c4009
		  while (count)
Packit 6c4009
		    {
Packit 6c4009
		      __cpuid_count (11, i++, eax, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
		      int shipped = ebx & 0xff;
Packit 6c4009
		      int type = ecx & 0xff00;
Packit 6c4009
		      if (shipped == 0 || type == 0)
Packit 6c4009
			break;
Packit 6c4009
		      else if (type == 0x100)
Packit 6c4009
			{
Packit 6c4009
			  /* Count SMT.  */
Packit 6c4009
			  if ((count & 0x1))
Packit 6c4009
			    {
Packit 6c4009
			      int count_mask;
Packit 6c4009
Packit 6c4009
			      /* Compute count mask.  */
Packit 6c4009
			      asm ("bsr %1, %0"
Packit 6c4009
				   : "=r" (count_mask) : "g" (threads_l2));
Packit 6c4009
			      count_mask = ~(-1 << (count_mask + 1));
Packit 6c4009
			      threads_l2 = (shipped - 1) & count_mask;
Packit 6c4009
			      count &= ~0x1;
Packit 6c4009
			    }
Packit 6c4009
			}
Packit 6c4009
		      else if (type == 0x200)
Packit 6c4009
			{
Packit 6c4009
			  /* Count core.  */
Packit 6c4009
			  if ((count & (0x1 << 1)))
Packit 6c4009
			    {
Packit 6c4009
			      int count_mask;
Packit 6c4009
			      int threads_core
Packit 6c4009
				= (level == 2 ? threads_l2 : threads_l3);
Packit 6c4009
Packit 6c4009
			      /* Compute count mask.  */
Packit 6c4009
			      asm ("bsr %1, %0"
Packit 6c4009
				   : "=r" (count_mask) : "g" (threads_core));
Packit 6c4009
			      count_mask = ~(-1 << (count_mask + 1));
Packit 6c4009
			      threads_core = (shipped - 1) & count_mask;
Packit 6c4009
			      if (level == 2)
Packit 6c4009
				threads_l2 = threads_core;
Packit 6c4009
			      else
Packit 6c4009
				threads_l3 = threads_core;
Packit 6c4009
			      count &= ~(0x1 << 1);
Packit 6c4009
			    }
Packit 6c4009
			}
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
	      if (threads_l2 > 0)
Packit 6c4009
		threads_l2 += 1;
Packit 6c4009
	      if (threads_l3 > 0)
Packit 6c4009
		threads_l3 += 1;
Packit 6c4009
	      if (level == 2)
Packit 6c4009
		{
Packit 6c4009
		  if (threads_l2)
Packit 6c4009
		    {
Packit 6c4009
		      threads = threads_l2;
Packit 6c4009
		      if (threads > 2 && family == 6)
Packit 6c4009
			switch (model)
Packit 6c4009
			  {
Packit 6c4009
			  case 0x37:
Packit 6c4009
			  case 0x4a:
Packit 6c4009
			  case 0x4d:
Packit 6c4009
			  case 0x5a:
Packit 6c4009
			  case 0x5d:
Packit 6c4009
			    /* Silvermont has L2 cache shared by 2 cores.  */
Packit 6c4009
			    threads = 2;
Packit 6c4009
			    break;
Packit 6c4009
			  default:
Packit 6c4009
			    break;
Packit 6c4009
			  }
Packit 6c4009
		    }
Packit 6c4009
		}
Packit 6c4009
	      else if (threads_l3)
Packit 6c4009
		threads = threads_l3;
Packit 6c4009
	    }
Packit 6c4009
	  else
Packit 6c4009
	    {
Packit 6c4009
intel_bug_no_cache_info:
Packit 6c4009
	      /* Assume that all logical threads share the highest cache
Packit 6c4009
		 level.  */
Packit 6c4009
Packit 6c4009
	      threads
Packit 6c4009
		= ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
Packit 6c4009
		    >> 16) & 0xff);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* Cap usage of highest cache level to the number of supported
Packit 6c4009
	     threads.  */
Packit 6c4009
	  if (shared > 0 && threads > 0)
Packit 6c4009
	    shared /= threads;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
      /* Account for non-inclusive L2 and L3 caches.  */
Packit 6c4009
      if (!inclusive_cache)
Packit 6c4009
	{
Packit 6c4009
	  if (threads_l2 > 0)
Packit 6c4009
	    core /= threads_l2;
Packit 6c4009
	  shared += core;
Packit 6c4009
	}
Packit 6c4009
    }
Packit 6c4009
  else if (cpu_features->kind == arch_kind_amd)
Packit 6c4009
    {
Packit 6c4009
      data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
Packit 6c4009
      long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
Packit 6c4009
      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
Packit 6c4009
Packit 6c4009
      /* Get maximum extended function. */
Packit 6c4009
      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
      if (shared <= 0)
Packit 6c4009
	/* No shared L3 cache.  All we have is the L2 cache.  */
Packit 6c4009
	shared = core;
Packit 6c4009
      else
Packit 6c4009
	{
Packit 6c4009
	  /* Figure out the number of logical threads that share L3.  */
Packit 6c4009
	  if (max_cpuid_ex >= 0x80000008)
Packit 6c4009
	    {
Packit 6c4009
	      /* Get width of APIC ID.  */
Packit 6c4009
	      __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
Packit 6c4009
	      threads = 1 << ((ecx >> 12) & 0x0f);
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  if (threads == 0)
Packit 6c4009
	    {
Packit 6c4009
	      /* If APIC ID width is not available, use logical
Packit 6c4009
		 processor count.  */
Packit 6c4009
	      __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
Packit 6c4009
Packit 6c4009
	      if ((edx & (1 << 28)) != 0)
Packit 6c4009
		threads = (ebx >> 16) & 0xff;
Packit 6c4009
	    }
Packit 6c4009
Packit 6c4009
	  /* Cap usage of highest cache level to the number of
Packit 6c4009
	     supported threads.  */
Packit 6c4009
	  if (threads > 0)
Packit 6c4009
	    shared /= threads;
Packit 6c4009
Packit 6c4009
	  /* Account for exclusive L2 and L3 caches.  */
Packit 6c4009
	  shared += core;
Packit 6c4009
	}
Packit 6c4009
Packit 6c4009
#ifndef DISABLE_PREFETCHW
Packit 6c4009
      if (max_cpuid_ex >= 0x80000001)
Packit 6c4009
	{
Packit 6c4009
	  __cpuid (0x80000001, eax, ebx, ecx, edx);
Packit 6c4009
	  /*  PREFETCHW     || 3DNow!  */
Packit 6c4009
	  if ((ecx & 0x100) || (edx & 0x80000000))
Packit 6c4009
	    __x86_prefetchw = -1;
Packit 6c4009
	}
Packit 6c4009
#endif
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (cpu_features->data_cache_size != 0)
Packit 6c4009
    data = cpu_features->data_cache_size;
Packit 6c4009
Packit 6c4009
  if (data > 0)
Packit 6c4009
    {
Packit 6c4009
      __x86_raw_data_cache_size_half = data / 2;
Packit 6c4009
      __x86_raw_data_cache_size = data;
Packit 6c4009
      /* Round data cache size to multiple of 256 bytes.  */
Packit 6c4009
      data = data & ~255L;
Packit 6c4009
      __x86_data_cache_size_half = data / 2;
Packit 6c4009
      __x86_data_cache_size = data;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  if (cpu_features->shared_cache_size != 0)
Packit 6c4009
    shared = cpu_features->shared_cache_size;
Packit 6c4009
Packit 6c4009
  if (shared > 0)
Packit 6c4009
    {
Packit 6c4009
      __x86_raw_shared_cache_size_half = shared / 2;
Packit 6c4009
      __x86_raw_shared_cache_size = shared;
Packit 6c4009
      /* Round shared cache size to multiple of 256 bytes.  */
Packit 6c4009
      shared = shared & ~255L;
Packit 6c4009
      __x86_shared_cache_size_half = shared / 2;
Packit 6c4009
      __x86_shared_cache_size = shared;
Packit 6c4009
    }
Packit 6c4009
Packit 6c4009
  /* The large memcpy micro benchmark in glibc shows that 6 times of
Packit 6c4009
     shared cache size is the approximate value above which non-temporal
Packit 6c4009
     store becomes faster on a 8-core processor.  This is the 3/4 of the
Packit 6c4009
     total shared cache size.  */
Packit 6c4009
  __x86_shared_non_temporal_threshold
Packit 6c4009
    = (cpu_features->non_temporal_threshold != 0
Packit 6c4009
       ? cpu_features->non_temporal_threshold
Packit 6c4009
       : __x86_shared_cache_size * threads * 3 / 4);
Packit 6c4009
}
Packit 6c4009
Packit 6c4009
#endif