Blame crc32c.c

Packit 4e8bc4
/* crc32c.c -- compute CRC-32C using the Intel crc32 instruction
Packit 4e8bc4
 * Copyright (C) 2013 Mark Adler
Packit 4e8bc4
 * Version 1.1  1 Aug 2013  Mark Adler
Packit 4e8bc4
 */
Packit 4e8bc4
Packit 4e8bc4
/*
Packit 4e8bc4
  This software is provided 'as-is', without any express or implied
Packit 4e8bc4
  warranty.  In no event will the author be held liable for any damages
Packit 4e8bc4
  arising from the use of this software.
Packit 4e8bc4
Packit 4e8bc4
  Permission is granted to anyone to use this software for any purpose,
Packit 4e8bc4
  including commercial applications, and to alter it and redistribute it
Packit 4e8bc4
  freely, subject to the following restrictions:
Packit 4e8bc4
Packit 4e8bc4
  1. The origin of this software must not be misrepresented; you must not
Packit 4e8bc4
     claim that you wrote the original software. If you use this software
Packit 4e8bc4
     in a product, an acknowledgment in the product documentation would be
Packit 4e8bc4
     appreciated but is not required.
Packit 4e8bc4
  2. Altered source versions must be plainly marked as such, and must not be
Packit 4e8bc4
     misrepresented as being the original software.
Packit 4e8bc4
  3. This notice may not be removed or altered from any source distribution.
Packit 4e8bc4
Packit 4e8bc4
  Mark Adler
Packit 4e8bc4
  madler@alumni.caltech.edu
Packit 4e8bc4
 */
Packit 4e8bc4
Packit 4e8bc4
/* Use hardware CRC instruction on Intel SSE 4.2 processors.  This computes a
Packit 4e8bc4
   CRC-32C, *not* the CRC-32 used by Ethernet and zip, gzip, etc.  A software
Packit 4e8bc4
   version is provided as a fall-back, as well as for speed comparisons. */
Packit 4e8bc4
Packit 4e8bc4
/* Version history:
Packit 4e8bc4
   1.0  10 Feb 2013  First version
Packit 4e8bc4
   1.1   1 Aug 2013  Correct comments on why three crc instructions in parallel
Packit 4e8bc4
 */
Packit 4e8bc4
Packit 4e8bc4
/* This version has been modified by dormando for inclusion in memcached */
Packit 4e8bc4
Packit 4e8bc4
#include <stdio.h>
Packit 4e8bc4
#include <stdlib.h>
Packit 4e8bc4
#include <stdint.h>
Packit 4e8bc4
#include <unistd.h>
Packit 4e8bc4
#include <pthread.h>
Packit 4e8bc4
#include "config.h"
Packit 4e8bc4
#if defined(__linux__) && defined(__aarch64__)
Packit 4e8bc4
#include <sys/auxv.h>
Packit 4e8bc4
#endif
Packit 4e8bc4
#include "crc32c.h"
Packit 4e8bc4
Packit 4e8bc4
/* CRC-32C (iSCSI) polynomial in reversed bit order. */
Packit 4e8bc4
#define POLY 0x82f63b78
Packit 4e8bc4
Packit 4e8bc4
/* Table for a quadword-at-a-time software crc. */
Packit 4e8bc4
static pthread_once_t crc32c_once_sw = PTHREAD_ONCE_INIT;
Packit 4e8bc4
static uint32_t crc32c_table[8][256];
Packit 4e8bc4
Packit 4e8bc4
/* Construct table for software CRC-32C calculation. */
Packit 4e8bc4
static void crc32c_init_sw(void)
Packit 4e8bc4
{
Packit 4e8bc4
    uint32_t n, crc, k;
Packit 4e8bc4
Packit 4e8bc4
    for (n = 0; n < 256; n++) {
Packit 4e8bc4
        crc = n;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
Packit 4e8bc4
        crc32c_table[0][n] = crc;
Packit 4e8bc4
    }
Packit 4e8bc4
    for (n = 0; n < 256; n++) {
Packit 4e8bc4
        crc = crc32c_table[0][n];
Packit 4e8bc4
        for (k = 1; k < 8; k++) {
Packit 4e8bc4
            crc = crc32c_table[0][crc & 0xff] ^ (crc >> 8);
Packit 4e8bc4
            crc32c_table[k][n] = crc;
Packit 4e8bc4
        }
Packit 4e8bc4
    }
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Table-driven software version as a fall-back.  This is about 15 times slower
Packit 4e8bc4
   than using the hardware instructions.  This assumes little-endian integers,
Packit 4e8bc4
   as is the case on Intel processors that the assembler code here is for. */
Packit 4e8bc4
static uint32_t crc32c_sw(uint32_t crci, const void *buf, size_t len)
Packit 4e8bc4
{
Packit 4e8bc4
    const unsigned char *next = buf;
Packit 4e8bc4
    uint64_t crc;
Packit 4e8bc4
Packit 4e8bc4
    pthread_once(&crc32c_once_sw, crc32c_init_sw);
Packit 4e8bc4
    crc = crci ^ 0xffffffff;
Packit 4e8bc4
    while (len && ((uintptr_t)next & 7) != 0) {
Packit 4e8bc4
        crc = crc32c_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
Packit 4e8bc4
        len--;
Packit 4e8bc4
    }
Packit 4e8bc4
    while (len >= 8) {
Packit 4e8bc4
        crc ^= *(uint64_t *)next;
Packit 4e8bc4
        crc = crc32c_table[7][crc & 0xff] ^
Packit 4e8bc4
              crc32c_table[6][(crc >> 8) & 0xff] ^
Packit 4e8bc4
              crc32c_table[5][(crc >> 16) & 0xff] ^
Packit 4e8bc4
              crc32c_table[4][(crc >> 24) & 0xff] ^
Packit 4e8bc4
              crc32c_table[3][(crc >> 32) & 0xff] ^
Packit 4e8bc4
              crc32c_table[2][(crc >> 40) & 0xff] ^
Packit 4e8bc4
              crc32c_table[1][(crc >> 48) & 0xff] ^
Packit 4e8bc4
              crc32c_table[0][crc >> 56];
Packit 4e8bc4
        next += 8;
Packit 4e8bc4
        len -= 8;
Packit 4e8bc4
    }
Packit 4e8bc4
    while (len) {
Packit 4e8bc4
        crc = crc32c_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
Packit 4e8bc4
        len--;
Packit 4e8bc4
    }
Packit 4e8bc4
    return (uint32_t)crc ^ 0xffffffff;
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Hardware CRC support for aarch64 platform */
Packit 4e8bc4
#if defined(__linux__) && defined(__aarch64__) && defined(ARM_CRC32)
Packit 4e8bc4
Packit 4e8bc4
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(+value))
Packit 4e8bc4
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(+value))
Packit 4e8bc4
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(+value))
Packit 4e8bc4
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(+value))
Packit 4e8bc4
Packit 4e8bc4
#ifndef HWCAP_CRC32
Packit 4e8bc4
#define HWCAP_CRC32             (1 << 7)
Packit 4e8bc4
#endif /* HWCAP for crc32 */
Packit 4e8bc4
Packit 4e8bc4
static uint32_t crc32c_hw_aarch64(uint32_t crc, const void* buf, size_t len)
Packit 4e8bc4
{
Packit 4e8bc4
        const uint8_t* p_buf = buf;
Packit 4e8bc4
        uint64_t crc64bit = crc;
Packit 4e8bc4
        for (size_t i = 0; i < len / sizeof(uint64_t); i++) {
Packit 4e8bc4
                CRC32CX(crc64bit, *(uint64_t*) p_buf);
Packit 4e8bc4
                p_buf += sizeof(uint64_t);
Packit 4e8bc4
        }
Packit 4e8bc4
Packit 4e8bc4
        uint32_t crc32bit = (uint32_t) crc64bit;
Packit 4e8bc4
        len &= sizeof(uint64_t) - 1;
Packit 4e8bc4
        switch (len) {
Packit 4e8bc4
        case 7:
Packit 4e8bc4
                CRC32CB(crc32bit, *p_buf++);
Packit 4e8bc4
        case 6:
Packit 4e8bc4
                CRC32CH(crc32bit, *(uint16_t*) p_buf);
Packit 4e8bc4
                p_buf += 2;
Packit 4e8bc4
        case 4:
Packit 4e8bc4
                CRC32CW(crc32bit, *(uint32_t*) p_buf);
Packit 4e8bc4
                break;
Packit 4e8bc4
        case 3:
Packit 4e8bc4
                CRC32CB(crc32bit, *p_buf++);
Packit 4e8bc4
        case 2:
Packit 4e8bc4
                CRC32CH(crc32bit, *(uint16_t*) p_buf);
Packit 4e8bc4
                break;
Packit 4e8bc4
        case 5:
Packit 4e8bc4
                CRC32CW(crc32bit, *(uint32_t*) p_buf);
Packit 4e8bc4
                p_buf += 4;
Packit 4e8bc4
        case 1:
Packit 4e8bc4
                CRC32CB(crc32bit, *p_buf);
Packit 4e8bc4
                break;
Packit 4e8bc4
        case 0:
Packit 4e8bc4
                break;
Packit 4e8bc4
        }
Packit 4e8bc4
Packit 4e8bc4
        return crc32bit;
Packit 4e8bc4
}
Packit 4e8bc4
#endif
Packit 4e8bc4
Packit 4e8bc4
/* Apply if the platform is intel */
Packit 4e8bc4
#if defined(__X86_64__)||defined(__x86_64__)||defined(__ia64__)
Packit 4e8bc4
Packit 4e8bc4
/* Multiply a matrix times a vector over the Galois field of two elements,
Packit 4e8bc4
   GF(2).  Each element is a bit in an unsigned integer.  mat must have at
Packit 4e8bc4
   least as many entries as the power of two for most significant one bit in
Packit 4e8bc4
   vec. */
Packit 4e8bc4
static inline uint32_t gf2_matrix_times(uint32_t *mat, uint32_t vec)
Packit 4e8bc4
{
Packit 4e8bc4
    uint32_t sum;
Packit 4e8bc4
Packit 4e8bc4
    sum = 0;
Packit 4e8bc4
    while (vec) {
Packit 4e8bc4
        if (vec & 1)
Packit 4e8bc4
            sum ^= *mat;
Packit 4e8bc4
        vec >>= 1;
Packit 4e8bc4
        mat++;
Packit 4e8bc4
    }
Packit 4e8bc4
    return sum;
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Multiply a matrix by itself over GF(2).  Both mat and square must have 32
Packit 4e8bc4
   rows. */
Packit 4e8bc4
static inline void gf2_matrix_square(uint32_t *square, uint32_t *mat)
Packit 4e8bc4
{
Packit 4e8bc4
    int n;
Packit 4e8bc4
Packit 4e8bc4
    for (n = 0; n < 32; n++)
Packit 4e8bc4
        square[n] = gf2_matrix_times(mat, mat[n]);
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Construct an operator to apply len zeros to a crc.  len must be a power of
Packit 4e8bc4
   two.  If len is not a power of two, then the result is the same as for the
Packit 4e8bc4
   largest power of two less than len.  The result for len == 0 is the same as
Packit 4e8bc4
   for len == 1.  A version of this routine could be easily written for any
Packit 4e8bc4
   len, but that is not needed for this application. */
Packit 4e8bc4
static void crc32c_zeros_op(uint32_t *even, size_t len)
Packit 4e8bc4
{
Packit 4e8bc4
    int n;
Packit 4e8bc4
    uint32_t row;
Packit 4e8bc4
    uint32_t odd[32];       /* odd-power-of-two zeros operator */
Packit 4e8bc4
Packit 4e8bc4
    /* put operator for one zero bit in odd */
Packit 4e8bc4
    odd[0] = POLY;              /* CRC-32C polynomial */
Packit 4e8bc4
    row = 1;
Packit 4e8bc4
    for (n = 1; n < 32; n++) {
Packit 4e8bc4
        odd[n] = row;
Packit 4e8bc4
        row <<= 1;
Packit 4e8bc4
    }
Packit 4e8bc4
Packit 4e8bc4
    /* put operator for two zero bits in even */
Packit 4e8bc4
    gf2_matrix_square(even, odd);
Packit 4e8bc4
Packit 4e8bc4
    /* put operator for four zero bits in odd */
Packit 4e8bc4
    gf2_matrix_square(odd, even);
Packit 4e8bc4
Packit 4e8bc4
    /* first square will put the operator for one zero byte (eight zero bits),
Packit 4e8bc4
       in even -- next square puts operator for two zero bytes in odd, and so
Packit 4e8bc4
       on, until len has been rotated down to zero */
Packit 4e8bc4
    do {
Packit 4e8bc4
        gf2_matrix_square(even, odd);
Packit 4e8bc4
        len >>= 1;
Packit 4e8bc4
        if (len == 0)
Packit 4e8bc4
            return;
Packit 4e8bc4
        gf2_matrix_square(odd, even);
Packit 4e8bc4
        len >>= 1;
Packit 4e8bc4
    } while (len);
Packit 4e8bc4
Packit 4e8bc4
    /* answer ended up in odd -- copy to even */
Packit 4e8bc4
    for (n = 0; n < 32; n++)
Packit 4e8bc4
        even[n] = odd[n];
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Take a length and build four lookup tables for applying the zeros operator
Packit 4e8bc4
   for that length, byte-by-byte on the operand. */
Packit 4e8bc4
static void crc32c_zeros(uint32_t zeros[][256], size_t len)
Packit 4e8bc4
{
Packit 4e8bc4
    uint32_t n;
Packit 4e8bc4
    uint32_t op[32];
Packit 4e8bc4
Packit 4e8bc4
    crc32c_zeros_op(op, len);
Packit 4e8bc4
    for (n = 0; n < 256; n++) {
Packit 4e8bc4
        zeros[0][n] = gf2_matrix_times(op, n);
Packit 4e8bc4
        zeros[1][n] = gf2_matrix_times(op, n << 8);
Packit 4e8bc4
        zeros[2][n] = gf2_matrix_times(op, n << 16);
Packit 4e8bc4
        zeros[3][n] = gf2_matrix_times(op, n << 24);
Packit 4e8bc4
    }
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Apply the zeros operator table to crc. */
Packit 4e8bc4
static inline uint32_t crc32c_shift(uint32_t zeros[][256], uint32_t crc)
Packit 4e8bc4
{
Packit 4e8bc4
    return zeros[0][crc & 0xff] ^ zeros[1][(crc >> 8) & 0xff] ^
Packit 4e8bc4
           zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24];
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Block sizes for three-way parallel crc computation.  LONG and SHORT must
Packit 4e8bc4
   both be powers of two.  The associated string constants must be set
Packit 4e8bc4
   accordingly, for use in constructing the assembler instructions. */
Packit 4e8bc4
#define LONG 8192
Packit 4e8bc4
#define LONGx1 "8192"
Packit 4e8bc4
#define LONGx2 "16384"
Packit 4e8bc4
#define SHORT 256
Packit 4e8bc4
#define SHORTx1 "256"
Packit 4e8bc4
#define SHORTx2 "512"
Packit 4e8bc4
Packit 4e8bc4
/* Tables for hardware crc that shift a crc by LONG and SHORT zeros. */
Packit 4e8bc4
static pthread_once_t crc32c_once_hw = PTHREAD_ONCE_INIT;
Packit 4e8bc4
static uint32_t crc32c_long[4][256];
Packit 4e8bc4
static uint32_t crc32c_short[4][256];
Packit 4e8bc4
Packit 4e8bc4
/* Initialize tables for shifting crcs. */
Packit 4e8bc4
static void crc32c_init_hw(void)
Packit 4e8bc4
{
Packit 4e8bc4
    crc32c_zeros(crc32c_long, LONG);
Packit 4e8bc4
    crc32c_zeros(crc32c_short, SHORT);
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Compute CRC-32C using the Intel hardware instruction. */
Packit 4e8bc4
static uint32_t crc32c_hw(uint32_t crc, const void *buf, size_t len)
Packit 4e8bc4
{
Packit 4e8bc4
    const unsigned char *next = buf;
Packit 4e8bc4
    const unsigned char *end;
Packit 4e8bc4
    uint64_t crc0, crc1, crc2;      /* need to be 64 bits for crc32q */
Packit 4e8bc4
Packit 4e8bc4
    /* populate shift tables the first time through */
Packit 4e8bc4
    pthread_once(&crc32c_once_hw, crc32c_init_hw);
Packit 4e8bc4
Packit 4e8bc4
    /* pre-process the crc */
Packit 4e8bc4
    crc0 = crc ^ 0xffffffff;
Packit 4e8bc4
Packit 4e8bc4
    /* compute the crc for up to seven leading bytes to bring the data pointer
Packit 4e8bc4
       to an eight-byte boundary */
Packit 4e8bc4
    while (len && ((uintptr_t)next & 7) != 0) {
Packit 4e8bc4
        __asm__("crc32b\t" "(%1), %0"
Packit 4e8bc4
                : "=r"(crc0)
Packit 4e8bc4
                : "r"(next), "0"(crc0));
Packit 4e8bc4
        next++;
Packit 4e8bc4
        len--;
Packit 4e8bc4
    }
Packit 4e8bc4
Packit 4e8bc4
    /* compute the crc on sets of LONG*3 bytes, executing three independent crc
Packit 4e8bc4
       instructions, each on LONG bytes -- this is optimized for the Nehalem,
Packit 4e8bc4
       Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
Packit 4e8bc4
       throughput of one crc per cycle, but a latency of three cycles */
Packit 4e8bc4
    while (len >= LONG*3) {
Packit 4e8bc4
        crc1 = 0;
Packit 4e8bc4
        crc2 = 0;
Packit 4e8bc4
        end = next + LONG;
Packit 4e8bc4
        do {
Packit 4e8bc4
            __asm__("crc32q\t" "(%3), %0\n\t"
Packit 4e8bc4
                    "crc32q\t" LONGx1 "(%3), %1\n\t"
Packit 4e8bc4
                    "crc32q\t" LONGx2 "(%3), %2"
Packit 4e8bc4
                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
Packit 4e8bc4
                    : "r"(next), "0"(crc0), "1"(crc1), "2"(crc2));
Packit 4e8bc4
            next += 8;
Packit 4e8bc4
        } while (next < end);
Packit 4e8bc4
        crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1;
Packit 4e8bc4
        crc0 = crc32c_shift(crc32c_long, crc0) ^ crc2;
Packit 4e8bc4
        next += LONG*2;
Packit 4e8bc4
        len -= LONG*3;
Packit 4e8bc4
    }
Packit 4e8bc4
Packit 4e8bc4
    /* do the same thing, but now on SHORT*3 blocks for the remaining data less
Packit 4e8bc4
       than a LONG*3 block */
Packit 4e8bc4
    while (len >= SHORT*3) {
Packit 4e8bc4
        crc1 = 0;
Packit 4e8bc4
        crc2 = 0;
Packit 4e8bc4
        end = next + SHORT;
Packit 4e8bc4
        do {
Packit 4e8bc4
            __asm__("crc32q\t" "(%3), %0\n\t"
Packit 4e8bc4
                    "crc32q\t" SHORTx1 "(%3), %1\n\t"
Packit 4e8bc4
                    "crc32q\t" SHORTx2 "(%3), %2"
Packit 4e8bc4
                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
Packit 4e8bc4
                    : "r"(next), "0"(crc0), "1"(crc1), "2"(crc2));
Packit 4e8bc4
            next += 8;
Packit 4e8bc4
        } while (next < end);
Packit 4e8bc4
        crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1;
Packit 4e8bc4
        crc0 = crc32c_shift(crc32c_short, crc0) ^ crc2;
Packit 4e8bc4
        next += SHORT*2;
Packit 4e8bc4
        len -= SHORT*3;
Packit 4e8bc4
    }
Packit 4e8bc4
Packit 4e8bc4
    /* compute the crc on the remaining eight-byte units less than a SHORT*3
Packit 4e8bc4
       block */
Packit 4e8bc4
    end = next + (len - (len & 7));
Packit 4e8bc4
    while (next < end) {
Packit 4e8bc4
        __asm__("crc32q\t" "(%1), %0"
Packit 4e8bc4
                : "=r"(crc0)
Packit 4e8bc4
                : "r"(next), "0"(crc0));
Packit 4e8bc4
        next += 8;
Packit 4e8bc4
    }
Packit 4e8bc4
    len &= 7;
Packit 4e8bc4
Packit 4e8bc4
    /* compute the crc for up to seven trailing bytes */
Packit 4e8bc4
    while (len) {
Packit 4e8bc4
        __asm__("crc32b\t" "(%1), %0"
Packit 4e8bc4
                : "=r"(crc0)
Packit 4e8bc4
                : "r"(next), "0"(crc0));
Packit 4e8bc4
        next++;
Packit 4e8bc4
        len--;
Packit 4e8bc4
    }
Packit 4e8bc4
Packit 4e8bc4
    /* return a post-processed crc */
Packit 4e8bc4
    return (uint32_t)crc0 ^ 0xffffffff;
Packit 4e8bc4
}
Packit 4e8bc4
Packit 4e8bc4
/* Check for SSE 4.2.  SSE 4.2 was first supported in Nehalem processors
Packit 4e8bc4
   introduced in November, 2008.  This does not check for the existence of the
Packit 4e8bc4
   cpuid instruction itself, which was introduced on the 486SL in 1992, so this
Packit 4e8bc4
   will fail on earlier x86 processors.  cpuid works on all Pentium and later
Packit 4e8bc4
   processors. */
Packit 4e8bc4
#define SSE42(have) \
Packit 4e8bc4
    do { \
Packit 4e8bc4
        uint32_t eax, ecx; \
Packit 4e8bc4
        eax = 1; \
Packit 4e8bc4
        __asm__("cpuid" \
Packit 4e8bc4
                : "=c"(ecx) \
Packit 4e8bc4
                : "a"(eax) \
Packit 4e8bc4
                : "%ebx", "%edx"); \
Packit 4e8bc4
        (have) = (ecx >> 20) & 1; \
Packit 4e8bc4
    } while (0)
Packit 4e8bc4
Packit 4e8bc4
#endif
Packit 4e8bc4
/* Compute a CRC-32C.  If the crc32 instruction is available, use the hardware
Packit 4e8bc4
   version.  Otherwise, use the software version. */
Packit 4e8bc4
void crc32c_init(void) {
Packit 4e8bc4
    #if defined(__X86_64__)||defined(__x86_64__)||defined(__ia64__)
Packit 4e8bc4
    int sse42;
Packit 4e8bc4
    SSE42(sse42);
Packit 4e8bc4
Packit 4e8bc4
    if (sse42) {
Packit 4e8bc4
        crc32c = crc32c_hw;
Packit 4e8bc4
    } else
Packit 4e8bc4
    #endif
Packit 4e8bc4
    /* Check if CRC instructions supported by aarch64 */
Packit 4e8bc4
    #if defined(__linux__) && defined(__aarch64__) && defined(ARM_CRC32)
Packit 4e8bc4
    unsigned long hwcap = getauxval(AT_HWCAP);
Packit 4e8bc4
Packit 4e8bc4
    if (hwcap & HWCAP_CRC32) {
Packit 4e8bc4
        crc32c = crc32c_hw_aarch64;
Packit 4e8bc4
    } else
Packit 4e8bc4
    #endif
Packit 4e8bc4
    {
Packit 4e8bc4
        crc32c = crc32c_sw;
Packit 4e8bc4
    }
Packit 4e8bc4
}