Blame src/dsp/cpu.c

Packit 9c6abc
// Copyright 2011 Google Inc. All Rights Reserved.
Packit 9c6abc
//
Packit 9c6abc
// Use of this source code is governed by a BSD-style license
Packit 9c6abc
// that can be found in the COPYING file in the root of the source
Packit 9c6abc
// tree. An additional intellectual property rights grant can be found
Packit 9c6abc
// in the file PATENTS. All contributing project authors may
Packit 9c6abc
// be found in the AUTHORS file in the root of the source tree.
Packit 9c6abc
// -----------------------------------------------------------------------------
Packit 9c6abc
//
Packit 9c6abc
// CPU detection
Packit 9c6abc
//
Packit 9c6abc
// Author: Christian Duvivier (cduvivier@google.com)
Packit 9c6abc
Packit 9c6abc
#include "src/dsp/dsp.h"
Packit 9c6abc
Packit 9c6abc
#if defined(WEBP_HAVE_NEON_RTCD)
Packit 9c6abc
#include <stdio.h>
Packit 9c6abc
#include <string.h>
Packit 9c6abc
#endif
Packit 9c6abc
Packit 9c6abc
#if defined(WEBP_ANDROID_NEON)
Packit 9c6abc
#include <cpu-features.h>
Packit 9c6abc
#endif
Packit 9c6abc
Packit 9c6abc
//------------------------------------------------------------------------------
Packit 9c6abc
// SSE2 detection.
Packit 9c6abc
//
Packit 9c6abc
Packit 9c6abc
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
Packit 9c6abc
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
Packit 9c6abc
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
Packit 9c6abc
  __asm__ volatile (
Packit 9c6abc
    "mov %%ebx, %%edi\n"
Packit 9c6abc
    "cpuid\n"
Packit 9c6abc
    "xchg %%edi, %%ebx\n"
Packit 9c6abc
    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
Packit 9c6abc
    : "a"(info_type), "c"(0));
Packit 9c6abc
}
Packit 9c6abc
#elif defined(__x86_64__) && \
Packit 9c6abc
      (defined(__code_model_medium__) || defined(__code_model_large__)) && \
Packit 9c6abc
      defined(__PIC__)
Packit 9c6abc
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
Packit 9c6abc
  __asm__ volatile (
Packit 9c6abc
    "xchg{q}\t{%%rbx}, %q1\n"
Packit 9c6abc
    "cpuid\n"
Packit 9c6abc
    "xchg{q}\t{%%rbx}, %q1\n"
Packit 9c6abc
    : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
Packit 9c6abc
      "=d"(cpu_info[3])
Packit 9c6abc
    : "a"(info_type), "c"(0));
Packit 9c6abc
}
Packit 9c6abc
#elif defined(__i386__) || defined(__x86_64__)
Packit 9c6abc
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
Packit 9c6abc
  __asm__ volatile (
Packit 9c6abc
    "cpuid\n"
Packit 9c6abc
    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
Packit 9c6abc
    : "a"(info_type), "c"(0));
Packit 9c6abc
}
Packit 9c6abc
#elif (defined(_M_X64) || defined(_M_IX86)) && \
Packit 9c6abc
      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
Packit 9c6abc
#include <intrin.h>
Packit 9c6abc
#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
Packit 9c6abc
#elif defined(WEBP_MSC_SSE2)
Packit 9c6abc
#define GetCPUInfo __cpuid
Packit 9c6abc
#endif
Packit 9c6abc
Packit 9c6abc
// NaCl has no support for xgetbv or the raw opcode.
Packit 9c6abc
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
Packit 9c6abc
static WEBP_INLINE uint64_t xgetbv(void) {
Packit 9c6abc
  const uint32_t ecx = 0;
Packit 9c6abc
  uint32_t eax, edx;
Packit 9c6abc
  // Use the raw opcode for xgetbv for compatibility with older toolchains.
Packit 9c6abc
  __asm__ volatile (
Packit 9c6abc
    ".byte 0x0f, 0x01, 0xd0\n"
Packit 9c6abc
    : "=a"(eax), "=d"(edx) : "c" (ecx));
Packit 9c6abc
  return ((uint64_t)edx << 32) | eax;
Packit 9c6abc
}
Packit 9c6abc
#elif (defined(_M_X64) || defined(_M_IX86)) && \
Packit 9c6abc
      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
Packit 9c6abc
#include <immintrin.h>
Packit 9c6abc
#define xgetbv() _xgetbv(0)
Packit 9c6abc
#elif defined(_MSC_VER) && defined(_M_IX86)
Packit 9c6abc
static WEBP_INLINE uint64_t xgetbv(void) {
Packit 9c6abc
  uint32_t eax_, edx_;
Packit 9c6abc
  __asm {
Packit 9c6abc
    xor ecx, ecx  // ecx = 0
Packit 9c6abc
    // Use the raw opcode for xgetbv for compatibility with older toolchains.
Packit 9c6abc
    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
Packit 9c6abc
    mov eax_, eax
Packit 9c6abc
    mov edx_, edx
Packit 9c6abc
  }
Packit 9c6abc
  return ((uint64_t)edx_ << 32) | eax_;
Packit 9c6abc
}
Packit 9c6abc
#else
Packit 9c6abc
#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
Packit 9c6abc
#endif
Packit 9c6abc
Packit 9c6abc
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
Packit 9c6abc
Packit 9c6abc
// helper function for run-time detection of slow SSSE3 platforms
Packit 9c6abc
static int CheckSlowModel(int info) {
Packit 9c6abc
  // Table listing display models with longer latencies for the bsr instruction
Packit 9c6abc
  // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
Packit 9c6abc
  // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
Packit 9c6abc
  static const uint8_t kSlowModels[] = {
Packit 9c6abc
    0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
Packit 9c6abc
    0x1c, 0x26, 0x27   // Atom Microarchitecture
Packit 9c6abc
  };
Packit 9c6abc
  const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
Packit 9c6abc
  const uint32_t family = (info >> 8) & 0xf;
Packit 9c6abc
  if (family == 0x06) {
Packit 9c6abc
    size_t i;
Packit 9c6abc
    for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
Packit 9c6abc
      if (model == kSlowModels[i]) return 1;
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  return 0;
Packit 9c6abc
}
Packit 9c6abc
Packit 9c6abc
static int x86CPUInfo(CPUFeature feature) {
Packit 9c6abc
  int max_cpuid_value;
Packit 9c6abc
  int cpu_info[4];
Packit 9c6abc
  int is_intel = 0;
Packit 9c6abc
Packit 9c6abc
  // get the highest feature value cpuid supports
Packit 9c6abc
  GetCPUInfo(cpu_info, 0);
Packit 9c6abc
  max_cpuid_value = cpu_info[0];
Packit 9c6abc
  if (max_cpuid_value < 1) {
Packit 9c6abc
    return 0;
Packit 9c6abc
  } else {
Packit 9c6abc
    const int VENDOR_ID_INTEL_EBX = 0x756e6547;  // uneG
Packit 9c6abc
    const int VENDOR_ID_INTEL_EDX = 0x49656e69;  // Ieni
Packit 9c6abc
    const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn
Packit 9c6abc
    is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
Packit 9c6abc
                cpu_info[2] == VENDOR_ID_INTEL_ECX &&
Packit 9c6abc
                cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel?
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  GetCPUInfo(cpu_info, 1);
Packit 9c6abc
  if (feature == kSSE2) {
Packit 9c6abc
    return !!(cpu_info[3] & (1 << 26));
Packit 9c6abc
  }
Packit 9c6abc
  if (feature == kSSE3) {
Packit 9c6abc
    return !!(cpu_info[2] & (1 << 0));
Packit 9c6abc
  }
Packit 9c6abc
  if (feature == kSlowSSSE3) {
Packit 9c6abc
    if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
Packit 9c6abc
      return CheckSlowModel(cpu_info[0]);
Packit 9c6abc
    }
Packit 9c6abc
    return 0;
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
  if (feature == kSSE4_1) {
Packit 9c6abc
    return !!(cpu_info[2] & (1 << 19));
Packit 9c6abc
  }
Packit 9c6abc
  if (feature == kAVX) {
Packit 9c6abc
    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
Packit 9c6abc
    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
Packit 9c6abc
      // XMM state and YMM state enabled by the OS.
Packit 9c6abc
      return (xgetbv() & 0x6) == 0x6;
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  if (feature == kAVX2) {
Packit 9c6abc
    if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
Packit 9c6abc
      GetCPUInfo(cpu_info, 7);
Packit 9c6abc
      return !!(cpu_info[1] & (1 << 5));
Packit 9c6abc
    }
Packit 9c6abc
  }
Packit 9c6abc
  return 0;
Packit 9c6abc
}
Packit 9c6abc
VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
Packit 9c6abc
#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
Packit 9c6abc
static int AndroidCPUInfo(CPUFeature feature) {
Packit 9c6abc
  const AndroidCpuFamily cpu_family = android_getCpuFamily();
Packit 9c6abc
  const uint64_t cpu_features = android_getCpuFeatures();
Packit 9c6abc
  if (feature == kNEON) {
Packit 9c6abc
    return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
Packit 9c6abc
            0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
Packit 9c6abc
  }
Packit 9c6abc
  return 0;
Packit 9c6abc
}
Packit 9c6abc
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
Packit 9c6abc
#elif defined(WEBP_USE_NEON)
Packit 9c6abc
// define a dummy function to enable turning off NEON at runtime by setting
Packit 9c6abc
// VP8DecGetCPUInfo = NULL
Packit 9c6abc
static int armCPUInfo(CPUFeature feature) {
Packit 9c6abc
  if (feature != kNEON) return 0;
Packit 9c6abc
#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
Packit 9c6abc
  {
Packit 9c6abc
    int has_neon = 0;
Packit 9c6abc
    char line[200];
Packit 9c6abc
    FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
Packit 9c6abc
    if (cpuinfo == NULL) return 0;
Packit 9c6abc
    while (fgets(line, sizeof(line), cpuinfo)) {
Packit 9c6abc
      if (!strncmp(line, "Features", 8)) {
Packit 9c6abc
        if (strstr(line, " neon ") != NULL) {
Packit 9c6abc
          has_neon = 1;
Packit 9c6abc
          break;
Packit 9c6abc
        }
Packit 9c6abc
      }
Packit 9c6abc
    }
Packit 9c6abc
    fclose(cpuinfo);
Packit 9c6abc
    return has_neon;
Packit 9c6abc
  }
Packit 9c6abc
#else
Packit 9c6abc
  return 1;
Packit 9c6abc
#endif
Packit 9c6abc
}
Packit 9c6abc
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
Packit 9c6abc
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
Packit 9c6abc
      defined(WEBP_USE_MSA)
Packit 9c6abc
static int mipsCPUInfo(CPUFeature feature) {
Packit 9c6abc
  if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
Packit 9c6abc
    return 1;
Packit 9c6abc
  } else {
Packit 9c6abc
    return 0;
Packit 9c6abc
  }
Packit 9c6abc
Packit 9c6abc
}
Packit 9c6abc
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
Packit 9c6abc
#else
Packit 9c6abc
VP8CPUInfo VP8GetCPUInfo = NULL;
Packit 9c6abc
#endif