Blame crypto/sha/keccak1600.c

Packit c4476c
/*
Packit c4476c
 * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
 *
Packit c4476c
 * Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
 * this file except in compliance with the License.  You can obtain a copy
Packit c4476c
 * in the file LICENSE in the source distribution or at
Packit c4476c
 * https://www.openssl.org/source/license.html
Packit c4476c
 */
Packit c4476c
Packit c4476c
#include <openssl/e_os2.h>
Packit c4476c
#include <string.h>
Packit c4476c
#include <assert.h>
Packit c4476c
Packit c4476c
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
Packit c4476c
                   size_t r);
Packit c4476c
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r);
Packit c4476c
Packit c4476c
#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Choose some sensible defaults
Packit c4476c
 */
Packit c4476c
#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \
Packit c4476c
    !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
Packit c4476c
# define KECCAK_2X      /* default to KECCAK_2X variant */
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
Packit c4476c
# define KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#if defined(__x86_64__) || defined(__aarch64__) || \
Packit c4476c
    defined(__mips64) || defined(__ia64) || \
Packit c4476c
    (defined(__VMS) && !defined(__vax))
Packit c4476c
/*
Packit c4476c
 * These are available even in ILP32 flavours, but even then they are
Packit c4476c
 * capable of performing 64-bit operations as efficiently as in *P64.
Packit c4476c
 * Since it's not given that we can use sizeof(void *), just shunt it.
Packit c4476c
 */
Packit c4476c
# define BIT_INTERLEAVE (0)
Packit c4476c
#else
Packit c4476c
# define BIT_INTERLEAVE (sizeof(void *) < 8)
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
Packit c4476c
Packit c4476c
static uint64_t ROL64(uint64_t val, int offset)
Packit c4476c
{
Packit c4476c
    if (offset == 0) {
Packit c4476c
        return val;
Packit c4476c
    } else if (!BIT_INTERLEAVE) {
Packit c4476c
        return (val << offset) | (val >> (64-offset));
Packit c4476c
    } else {
Packit c4476c
        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
Packit c4476c
Packit c4476c
        if (offset & 1) {
Packit c4476c
            uint32_t tmp = hi;
Packit c4476c
Packit c4476c
            offset >>= 1;
Packit c4476c
            hi = ROL32(lo, offset);
Packit c4476c
            lo = ROL32(tmp, offset + 1);
Packit c4476c
        } else {
Packit c4476c
            offset >>= 1;
Packit c4476c
            lo = ROL32(lo, offset);
Packit c4476c
            hi = ROL32(hi, offset);
Packit c4476c
        }
Packit c4476c
Packit c4476c
        return ((uint64_t)hi << 32) | lo;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
static const unsigned char rhotates[5][5] = {
Packit c4476c
    {  0,  1, 62, 28, 27 },
Packit c4476c
    { 36, 44,  6, 55, 20 },
Packit c4476c
    {  3, 10, 43, 25, 39 },
Packit c4476c
    { 41, 45, 15, 21,  8 },
Packit c4476c
    { 18,  2, 61, 56, 14 }
Packit c4476c
};
Packit c4476c
Packit c4476c
static const uint64_t iotas[] = {
Packit c4476c
    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
Packit c4476c
    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
Packit c4476c
};
Packit c4476c
Packit c4476c
#if defined(KECCAK_REF)
Packit c4476c
/*
Packit c4476c
 * This is straightforward or "maximum clarity" implementation aiming
Packit c4476c
 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
Packit c4476c
 * Permutation-Based Hash and Extendible-Output Functions" as much as
Packit c4476c
 * possible. With one caveat. Because of the way C stores matrices,
Packit c4476c
 * references to A[x,y] in the specification are presented as A[y][x].
Packit c4476c
 * Implementation unrolls inner x-loops so that modulo 5 operations are
Packit c4476c
 * explicitly pre-computed.
Packit c4476c
 */
Packit c4476c
static void Theta(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    uint64_t C[5], D[5];
Packit c4476c
    size_t y;
Packit c4476c
Packit c4476c
    C[0] = A[0][0];
Packit c4476c
    C[1] = A[0][1];
Packit c4476c
    C[2] = A[0][2];
Packit c4476c
    C[3] = A[0][3];
Packit c4476c
    C[4] = A[0][4];
Packit c4476c
Packit c4476c
    for (y = 1; y < 5; y++) {
Packit c4476c
        C[0] ^= A[y][0];
Packit c4476c
        C[1] ^= A[y][1];
Packit c4476c
        C[2] ^= A[y][2];
Packit c4476c
        C[3] ^= A[y][3];
Packit c4476c
        C[4] ^= A[y][4];
Packit c4476c
    }
Packit c4476c
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    for (y = 0; y < 5; y++) {
Packit c4476c
        A[y][0] ^= D[0];
Packit c4476c
        A[y][1] ^= D[1];
Packit c4476c
        A[y][2] ^= D[2];
Packit c4476c
        A[y][3] ^= D[3];
Packit c4476c
        A[y][4] ^= D[4];
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
static void Rho(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    size_t y;
Packit c4476c
Packit c4476c
    for (y = 0; y < 5; y++) {
Packit c4476c
        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
Packit c4476c
        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
Packit c4476c
        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
Packit c4476c
        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
Packit c4476c
        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
static void Pi(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    uint64_t T[5][5];
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * T = A
Packit c4476c
     * A[y][x] = T[x][(3*y+x)%5]
Packit c4476c
     */
Packit c4476c
    memcpy(T, A, sizeof(T));
Packit c4476c
Packit c4476c
    A[0][0] = T[0][0];
Packit c4476c
    A[0][1] = T[1][1];
Packit c4476c
    A[0][2] = T[2][2];
Packit c4476c
    A[0][3] = T[3][3];
Packit c4476c
    A[0][4] = T[4][4];
Packit c4476c
Packit c4476c
    A[1][0] = T[0][3];
Packit c4476c
    A[1][1] = T[1][4];
Packit c4476c
    A[1][2] = T[2][0];
Packit c4476c
    A[1][3] = T[3][1];
Packit c4476c
    A[1][4] = T[4][2];
Packit c4476c
Packit c4476c
    A[2][0] = T[0][1];
Packit c4476c
    A[2][1] = T[1][2];
Packit c4476c
    A[2][2] = T[2][3];
Packit c4476c
    A[2][3] = T[3][4];
Packit c4476c
    A[2][4] = T[4][0];
Packit c4476c
Packit c4476c
    A[3][0] = T[0][4];
Packit c4476c
    A[3][1] = T[1][0];
Packit c4476c
    A[3][2] = T[2][1];
Packit c4476c
    A[3][3] = T[3][2];
Packit c4476c
    A[3][4] = T[4][3];
Packit c4476c
Packit c4476c
    A[4][0] = T[0][2];
Packit c4476c
    A[4][1] = T[1][3];
Packit c4476c
    A[4][2] = T[2][4];
Packit c4476c
    A[4][3] = T[3][0];
Packit c4476c
    A[4][4] = T[4][1];
Packit c4476c
}
Packit c4476c
Packit c4476c
static void Chi(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    uint64_t C[5];
Packit c4476c
    size_t y;
Packit c4476c
Packit c4476c
    for (y = 0; y < 5; y++) {
Packit c4476c
        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
Packit c4476c
        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
Packit c4476c
        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
Packit c4476c
        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
Packit c4476c
        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
Packit c4476c
Packit c4476c
        A[y][0] = C[0];
Packit c4476c
        A[y][1] = C[1];
Packit c4476c
        A[y][2] = C[2];
Packit c4476c
        A[y][3] = C[3];
Packit c4476c
        A[y][4] = C[4];
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
static void Iota(uint64_t A[5][5], size_t i)
Packit c4476c
{
Packit c4476c
    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
Packit c4476c
    A[0][0] ^= iotas[i];
Packit c4476c
}
Packit c4476c
Packit c4476c
static void KeccakF1600(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    size_t i;
Packit c4476c
Packit c4476c
    for (i = 0; i < 24; i++) {
Packit c4476c
        Theta(A);
Packit c4476c
        Rho(A);
Packit c4476c
        Pi(A);
Packit c4476c
        Chi(A);
Packit c4476c
        Iota(A, i);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
#elif defined(KECCAK_1X)
Packit c4476c
/*
Packit c4476c
 * This implementation is optimization of above code featuring unroll
Packit c4476c
 * of even y-loops, their fusion and code motion. It also minimizes
Packit c4476c
 * temporary storage. Compiler would normally do all these things for
Packit c4476c
 * you, purpose of manual optimization is to provide "unobscured"
Packit c4476c
 * reference for assembly implementation [in case this approach is
Packit c4476c
 * chosen for implementation on some platform]. In the nutshell it's
Packit c4476c
 * equivalent of "plane-per-plane processing" approach discussed in
Packit c4476c
 * section 2.4 of "Keccak implementation overview".
Packit c4476c
 */
Packit c4476c
static void Round(uint64_t A[5][5], size_t i)
Packit c4476c
{
Packit c4476c
    uint64_t C[5], E[2];        /* registers */
Packit c4476c
    uint64_t D[5], T[2][5];     /* memory    */
Packit c4476c
Packit c4476c
    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
Packit c4476c
Packit c4476c
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
Packit c4476c
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
Packit c4476c
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
Packit c4476c
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
Packit c4476c
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
Packit c4476c
Packit c4476c
#if defined(__arm__)
Packit c4476c
    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
Packit c4476c
    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
Packit c4476c
    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
Packit c4476c
    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
Packit c4476c
    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
Packit c4476c
    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
Packit c4476c
Packit c4476c
    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
Packit c4476c
    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
Packit c4476c
    C[0] =       A[0][0] ^ C[0]; /* rotate by 0 */  /* D[0] */
Packit c4476c
    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);   /* D[2] */
Packit c4476c
    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);   /* D[1] */
Packit c4476c
#else
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
Packit c4476c
    T[0][1] = A[0][1] ^ D[1];
Packit c4476c
    T[0][2] = A[0][2] ^ D[2];
Packit c4476c
    T[0][3] = A[0][3] ^ D[3];
Packit c4476c
    T[0][4] = A[0][4] ^ D[4];
Packit c4476c
Packit c4476c
    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
Packit c4476c
#endif
Packit c4476c
    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
Packit c4476c
    A[0][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    A[0][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    A[0][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    A[0][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
Packit c4476c
    T[1][0] = A[1][0] ^ (C[3] = D[0]);
Packit c4476c
    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
Packit c4476c
    T[1][2] = A[1][2] ^ (E[0] = D[2]);
Packit c4476c
    T[1][3] = A[1][3] ^ (E[1] = D[3]);
Packit c4476c
    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
Packit c4476c
Packit c4476c
    C[0] = ROL64(T[0][3],        rhotates[0][3]);
Packit c4476c
    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]);   /* D[4] */
Packit c4476c
    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]);   /* D[0] */
Packit c4476c
    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]);   /* D[1] */
Packit c4476c
    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]);   /* D[2] */
Packit c4476c
Packit c4476c
    A[1][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    A[1][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    A[1][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    A[1][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    A[1][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
Packit c4476c
    C[0] = ROL64(T[0][1],        rhotates[0][1]);
Packit c4476c
    C[1] = ROL64(T[1][2],        rhotates[1][2]);
Packit c4476c
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
    A[2][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    A[2][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    A[2][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    A[2][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    A[2][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
Packit c4476c
    C[0] = ROL64(T[0][4],        rhotates[0][4]);
Packit c4476c
    C[1] = ROL64(T[1][0],        rhotates[1][0]);
Packit c4476c
    C[2] = ROL64(T[1][1],        rhotates[2][1]); /* originally A[2][1] */
Packit c4476c
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
    A[3][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    A[3][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    A[3][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    A[3][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    A[3][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
Packit c4476c
    C[0] = ROL64(T[0][2],        rhotates[0][2]);
Packit c4476c
    C[1] = ROL64(T[1][3],        rhotates[1][3]);
Packit c4476c
    C[2] = ROL64(T[1][4],        rhotates[2][4]); /* originally A[2][4] */
Packit c4476c
    C[3] = ROL64(T[0][0],        rhotates[3][0]); /* originally A[3][0] */
Packit c4476c
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    A[4][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    A[4][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    A[4][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    A[4][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    A[4][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
}
Packit c4476c
Packit c4476c
static void KeccakF1600(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    size_t i;
Packit c4476c
Packit c4476c
    for (i = 0; i < 24; i++) {
Packit c4476c
        Round(A, i);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
#elif defined(KECCAK_1X_ALT)
Packit c4476c
/*
Packit c4476c
 * This is variant of above KECCAK_1X that reduces requirement for
Packit c4476c
 * temporary storage even further, but at cost of more updates to A[][].
Packit c4476c
 * It's less suitable if A[][] is memory bound, but better if it's
Packit c4476c
 * register bound.
Packit c4476c
 */
Packit c4476c
Packit c4476c
static void Round(uint64_t A[5][5], size_t i)
Packit c4476c
{
Packit c4476c
    uint64_t C[5], D[5];
Packit c4476c
Packit c4476c
    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
Packit c4476c
Packit c4476c
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
Packit c4476c
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
Packit c4476c
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
Packit c4476c
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
Packit c4476c
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
Packit c4476c
Packit c4476c
    D[1] = C[0] ^  ROL64(C[2], 1);
Packit c4476c
    D[2] = C[1] ^  ROL64(C[3], 1);
Packit c4476c
    D[3] = C[2] ^= ROL64(C[4], 1);
Packit c4476c
    D[4] = C[3] ^= ROL64(C[0], 1);
Packit c4476c
    D[0] = C[4] ^= ROL64(C[1], 1);
Packit c4476c
Packit c4476c
    A[0][1] ^= D[1];
Packit c4476c
    A[1][1] ^= D[1];
Packit c4476c
    A[2][1] ^= D[1];
Packit c4476c
    A[3][1] ^= D[1];
Packit c4476c
    A[4][1] ^= D[1];
Packit c4476c
Packit c4476c
    A[0][2] ^= D[2];
Packit c4476c
    A[1][2] ^= D[2];
Packit c4476c
    A[2][2] ^= D[2];
Packit c4476c
    A[3][2] ^= D[2];
Packit c4476c
    A[4][2] ^= D[2];
Packit c4476c
Packit c4476c
    A[0][3] ^= C[2];
Packit c4476c
    A[1][3] ^= C[2];
Packit c4476c
    A[2][3] ^= C[2];
Packit c4476c
    A[3][3] ^= C[2];
Packit c4476c
    A[4][3] ^= C[2];
Packit c4476c
Packit c4476c
    A[0][4] ^= C[3];
Packit c4476c
    A[1][4] ^= C[3];
Packit c4476c
    A[2][4] ^= C[3];
Packit c4476c
    A[3][4] ^= C[3];
Packit c4476c
    A[4][4] ^= C[3];
Packit c4476c
Packit c4476c
    A[0][0] ^= C[4];
Packit c4476c
    A[1][0] ^= C[4];
Packit c4476c
    A[2][0] ^= C[4];
Packit c4476c
    A[3][0] ^= C[4];
Packit c4476c
    A[4][0] ^= C[4];
Packit c4476c
Packit c4476c
    C[1] = A[0][1];
Packit c4476c
    C[2] = A[0][2];
Packit c4476c
    C[3] = A[0][3];
Packit c4476c
    C[4] = A[0][4];
Packit c4476c
Packit c4476c
    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
Packit c4476c
    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
Packit c4476c
    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
Packit c4476c
    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
Packit c4476c
Packit c4476c
    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
Packit c4476c
    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
Packit c4476c
    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
Packit c4476c
    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
Packit c4476c
    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
Packit c4476c
    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
Packit c4476c
    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
Packit c4476c
Packit c4476c
    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
Packit c4476c
    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
Packit c4476c
    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
Packit c4476c
    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
Packit c4476c
Packit c4476c
    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
Packit c4476c
    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
Packit c4476c
    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
Packit c4476c
    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
Packit c4476c
Packit c4476c
    A[1][0] = ROL64(C[3],    rhotates[0][3]);
Packit c4476c
    A[2][0] = ROL64(C[1],    rhotates[0][1]);
Packit c4476c
    A[3][0] = ROL64(C[4],    rhotates[0][4]);
Packit c4476c
    A[4][0] = ROL64(C[2],    rhotates[0][2]);
Packit c4476c
Packit c4476c
    C[0] = A[0][0];
Packit c4476c
    C[1] = A[1][0];
Packit c4476c
    D[0] = A[0][1];
Packit c4476c
    D[1] = A[1][1];
Packit c4476c
Packit c4476c
    A[0][0] ^= (~A[0][1] & A[0][2]);
Packit c4476c
    A[1][0] ^= (~A[1][1] & A[1][2]);
Packit c4476c
    A[0][1] ^= (~A[0][2] & A[0][3]);
Packit c4476c
    A[1][1] ^= (~A[1][2] & A[1][3]);
Packit c4476c
    A[0][2] ^= (~A[0][3] & A[0][4]);
Packit c4476c
    A[1][2] ^= (~A[1][3] & A[1][4]);
Packit c4476c
    A[0][3] ^= (~A[0][4] & C[0]);
Packit c4476c
    A[1][3] ^= (~A[1][4] & C[1]);
Packit c4476c
    A[0][4] ^= (~C[0]    & D[0]);
Packit c4476c
    A[1][4] ^= (~C[1]    & D[1]);
Packit c4476c
Packit c4476c
    C[2] = A[2][0];
Packit c4476c
    C[3] = A[3][0];
Packit c4476c
    D[2] = A[2][1];
Packit c4476c
    D[3] = A[3][1];
Packit c4476c
Packit c4476c
    A[2][0] ^= (~A[2][1] & A[2][2]);
Packit c4476c
    A[3][0] ^= (~A[3][1] & A[3][2]);
Packit c4476c
    A[2][1] ^= (~A[2][2] & A[2][3]);
Packit c4476c
    A[3][1] ^= (~A[3][2] & A[3][3]);
Packit c4476c
    A[2][2] ^= (~A[2][3] & A[2][4]);
Packit c4476c
    A[3][2] ^= (~A[3][3] & A[3][4]);
Packit c4476c
    A[2][3] ^= (~A[2][4] & C[2]);
Packit c4476c
    A[3][3] ^= (~A[3][4] & C[3]);
Packit c4476c
    A[2][4] ^= (~C[2]    & D[2]);
Packit c4476c
    A[3][4] ^= (~C[3]    & D[3]);
Packit c4476c
Packit c4476c
    C[4] = A[4][0];
Packit c4476c
    D[4] = A[4][1];
Packit c4476c
Packit c4476c
    A[4][0] ^= (~A[4][1] & A[4][2]);
Packit c4476c
    A[4][1] ^= (~A[4][2] & A[4][3]);
Packit c4476c
    A[4][2] ^= (~A[4][3] & A[4][4]);
Packit c4476c
    A[4][3] ^= (~A[4][4] & C[4]);
Packit c4476c
    A[4][4] ^= (~C[4]    & D[4]);
Packit c4476c
    A[0][0] ^= iotas[i];
Packit c4476c
}
Packit c4476c
Packit c4476c
static void KeccakF1600(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    size_t i;
Packit c4476c
Packit c4476c
    for (i = 0; i < 24; i++) {
Packit c4476c
        Round(A, i);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
#elif defined(KECCAK_2X)
Packit c4476c
/*
Packit c4476c
 * This implementation is variant of KECCAK_1X above with outer-most
Packit c4476c
 * round loop unrolled twice. This allows to take temporary storage
Packit c4476c
 * out of round procedure and simplify references to it by alternating
Packit c4476c
 * it with actual data (see round loop below). Originally it was meant
Packit c4476c
 * rather as reference for an assembly implementation, but it seems to
Packit c4476c
 * play best with compilers [as well as provide best instruction per
Packit c4476c
 * processed byte ratio at minimal round unroll factor]...
Packit c4476c
 */
Packit c4476c
static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
Packit c4476c
{
Packit c4476c
    uint64_t C[5], D[5];
Packit c4476c
Packit c4476c
    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
Packit c4476c
Packit c4476c
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
Packit c4476c
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
Packit c4476c
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
Packit c4476c
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
Packit c4476c
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
Packit c4476c
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
Packit c4476c
    R[0][1] = C[1] ^ (~C[2] | C[3]);
Packit c4476c
    R[0][2] = C[2] ^ ( C[3] & C[4]);
Packit c4476c
    R[0][3] = C[3] ^ ( C[4] | C[0]);
Packit c4476c
    R[0][4] = C[4] ^ ( C[0] & C[1]);
Packit c4476c
#else
Packit c4476c
    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
Packit c4476c
    R[0][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    R[0][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[0][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    R[0][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
Packit c4476c
    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
Packit c4476c
    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
Packit c4476c
    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
Packit c4476c
    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    R[1][0] = C[0] ^ (C[1] |  C[2]);
Packit c4476c
    R[1][1] = C[1] ^ (C[2] &  C[3]);
Packit c4476c
    R[1][2] = C[2] ^ (C[3] | ~C[4]);
Packit c4476c
    R[1][3] = C[3] ^ (C[4] |  C[0]);
Packit c4476c
    R[1][4] = C[4] ^ (C[0] &  C[1]);
Packit c4476c
#else
Packit c4476c
    R[1][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    R[1][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    R[1][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[1][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    R[1][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
Packit c4476c
    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
Packit c4476c
    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    R[2][0] =  C[0] ^ ( C[1] | C[2]);
Packit c4476c
    R[2][1] =  C[1] ^ ( C[2] & C[3]);
Packit c4476c
    R[2][2] =  C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[2][3] = ~C[3] ^ ( C[4] | C[0]);
Packit c4476c
    R[2][4] =  C[4] ^ ( C[0] & C[1]);
Packit c4476c
#else
Packit c4476c
    R[2][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    R[2][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    R[2][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[2][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    R[2][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
Packit c4476c
    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
Packit c4476c
    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
Packit c4476c
    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    R[3][0] =  C[0] ^ ( C[1] & C[2]);
Packit c4476c
    R[3][1] =  C[1] ^ ( C[2] | C[3]);
Packit c4476c
    R[3][2] =  C[2] ^ (~C[3] | C[4]);
Packit c4476c
    R[3][3] = ~C[3] ^ ( C[4] & C[0]);
Packit c4476c
    R[3][4] =  C[4] ^ ( C[0] | C[1]);
Packit c4476c
#else
Packit c4476c
    R[3][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    R[3][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    R[3][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[3][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    R[3][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
Packit c4476c
    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
Packit c4476c
    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
Packit c4476c
    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
Packit c4476c
    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    R[4][0] =  C[0] ^ (~C[1] & C[2]);
Packit c4476c
    R[4][1] = ~C[1] ^ ( C[2] | C[3]);
Packit c4476c
    R[4][2] =  C[2] ^ ( C[3] & C[4]);
Packit c4476c
    R[4][3] =  C[3] ^ ( C[4] | C[0]);
Packit c4476c
    R[4][4] =  C[4] ^ ( C[0] & C[1]);
Packit c4476c
#else
Packit c4476c
    R[4][0] = C[0] ^ (~C[1] & C[2]);
Packit c4476c
    R[4][1] = C[1] ^ (~C[2] & C[3]);
Packit c4476c
    R[4][2] = C[2] ^ (~C[3] & C[4]);
Packit c4476c
    R[4][3] = C[3] ^ (~C[4] & C[0]);
Packit c4476c
    R[4][4] = C[4] ^ (~C[0] & C[1]);
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
static void KeccakF1600(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    uint64_t T[5][5];
Packit c4476c
    size_t i;
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    A[0][1] = ~A[0][1];
Packit c4476c
    A[0][2] = ~A[0][2];
Packit c4476c
    A[1][3] = ~A[1][3];
Packit c4476c
    A[2][2] = ~A[2][2];
Packit c4476c
    A[3][2] = ~A[3][2];
Packit c4476c
    A[4][0] = ~A[4][0];
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    for (i = 0; i < 24; i += 2) {
Packit c4476c
        Round(T, A, i);
Packit c4476c
        Round(A, T, i + 1);
Packit c4476c
    }
Packit c4476c
Packit c4476c
#ifdef KECCAK_COMPLEMENTING_TRANSFORM
Packit c4476c
    A[0][1] = ~A[0][1];
Packit c4476c
    A[0][2] = ~A[0][2];
Packit c4476c
    A[1][3] = ~A[1][3];
Packit c4476c
    A[2][2] = ~A[2][2];
Packit c4476c
    A[3][2] = ~A[3][2];
Packit c4476c
    A[4][0] = ~A[4][0];
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
#else   /* define KECCAK_INPLACE to compile this code path */
Packit c4476c
/*
Packit c4476c
 * This implementation is KECCAK_1X from above combined 4 times with
Packit c4476c
 * a twist that allows to omit temporary storage and perform in-place
Packit c4476c
 * processing. It's discussed in section 2.5 of "Keccak implementation
Packit c4476c
 * overview". It's likely to be best suited for processors with large
Packit c4476c
 * register bank... On the other hand processor with large register
Packit c4476c
 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
Packit c4476c
 * more compact...
Packit c4476c
 */
Packit c4476c
static void FourRounds(uint64_t A[5][5], size_t i)
Packit c4476c
{
Packit c4476c
    uint64_t B[5], C[5], D[5];
Packit c4476c
Packit c4476c
    assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
Packit c4476c
Packit c4476c
    /* Round 4*n */
Packit c4476c
    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
Packit c4476c
    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
Packit c4476c
    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
Packit c4476c
    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
Packit c4476c
    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
Packit c4476c
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
Packit c4476c
Packit c4476c
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
Packit c4476c
    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
Packit c4476c
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
Packit c4476c
    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
Packit c4476c
    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
Packit c4476c
    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
Packit c4476c
Packit c4476c
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
Packit c4476c
    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
Packit c4476c
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
Packit c4476c
    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
Packit c4476c
    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
Packit c4476c
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
Packit c4476c
    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
Packit c4476c
    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
Packit c4476c
    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
Packit c4476c
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    /* Round 4*n+1 */
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
Packit c4476c
Packit c4476c
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
Packit c4476c
    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
Packit c4476c
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
Packit c4476c
    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
Packit c4476c
    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
Packit c4476c
    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
Packit c4476c
Packit c4476c
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
Packit c4476c
    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
Packit c4476c
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
Packit c4476c
    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
Packit c4476c
    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
Packit c4476c
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
Packit c4476c
    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
Packit c4476c
    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
Packit c4476c
    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
Packit c4476c
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    /* Round 4*n+2 */
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
Packit c4476c
Packit c4476c
    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
Packit c4476c
    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
Packit c4476c
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
Packit c4476c
    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
Packit c4476c
    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
Packit c4476c
    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
Packit c4476c
Packit c4476c
    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
Packit c4476c
    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
Packit c4476c
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
Packit c4476c
    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
Packit c4476c
    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
Packit c4476c
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
Packit c4476c
    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
Packit c4476c
    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
Packit c4476c
    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
Packit c4476c
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    /* Round 4*n+3 */
Packit c4476c
    D[0] = ROL64(C[1], 1) ^ C[4];
Packit c4476c
    D[1] = ROL64(C[2], 1) ^ C[0];
Packit c4476c
    D[2] = ROL64(C[3], 1) ^ C[1];
Packit c4476c
    D[3] = ROL64(C[4], 1) ^ C[2];
Packit c4476c
    D[4] = ROL64(C[0], 1) ^ C[3];
Packit c4476c
Packit c4476c
    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
Packit c4476c
    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
Packit c4476c
    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
Packit c4476c
    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
Packit c4476c
    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
Packit c4476c
Packit c4476c
    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
Packit c4476c
    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
Packit c4476c
    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
Packit c4476c
    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
Packit c4476c
    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
Packit c4476c
    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
Packit c4476c
Packit c4476c
    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
Packit c4476c
    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
Packit c4476c
    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
Packit c4476c
    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
Packit c4476c
    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
Packit c4476c
Packit c4476c
    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
Packit c4476c
    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
Packit c4476c
    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
Packit c4476c
    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
Packit c4476c
    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
Packit c4476c
Packit c4476c
    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
Packit c4476c
    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
Packit c4476c
    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
Packit c4476c
    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
Packit c4476c
    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
Packit c4476c
    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
Packit c4476c
Packit c4476c
    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
Packit c4476c
    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
Packit c4476c
    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
Packit c4476c
    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
Packit c4476c
    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
Packit c4476c
}
Packit c4476c
Packit c4476c
static void KeccakF1600(uint64_t A[5][5])
Packit c4476c
{
Packit c4476c
    size_t i;
Packit c4476c
Packit c4476c
    for (i = 0; i < 24; i += 4) {
Packit c4476c
        FourRounds(A, i);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
#endif
Packit c4476c
Packit c4476c
static uint64_t BitInterleave(uint64_t Ai)
Packit c4476c
{
Packit c4476c
    if (BIT_INTERLEAVE) {
Packit c4476c
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
Packit c4476c
        uint32_t t0, t1;
Packit c4476c
Packit c4476c
        t0 = lo & 0x55555555;
Packit c4476c
        t0 |= t0 >> 1;  t0 &= 0x33333333;
Packit c4476c
        t0 |= t0 >> 2;  t0 &= 0x0f0f0f0f;
Packit c4476c
        t0 |= t0 >> 4;  t0 &= 0x00ff00ff;
Packit c4476c
        t0 |= t0 >> 8;  t0 &= 0x0000ffff;
Packit c4476c
Packit c4476c
        t1 = hi & 0x55555555;
Packit c4476c
        t1 |= t1 >> 1;  t1 &= 0x33333333;
Packit c4476c
        t1 |= t1 >> 2;  t1 &= 0x0f0f0f0f;
Packit c4476c
        t1 |= t1 >> 4;  t1 &= 0x00ff00ff;
Packit c4476c
        t1 |= t1 >> 8;  t1 <<= 16;
Packit c4476c
Packit c4476c
        lo &= 0xaaaaaaaa;
Packit c4476c
        lo |= lo << 1;  lo &= 0xcccccccc;
Packit c4476c
        lo |= lo << 2;  lo &= 0xf0f0f0f0;
Packit c4476c
        lo |= lo << 4;  lo &= 0xff00ff00;
Packit c4476c
        lo |= lo << 8;  lo >>= 16;
Packit c4476c
Packit c4476c
        hi &= 0xaaaaaaaa;
Packit c4476c
        hi |= hi << 1;  hi &= 0xcccccccc;
Packit c4476c
        hi |= hi << 2;  hi &= 0xf0f0f0f0;
Packit c4476c
        hi |= hi << 4;  hi &= 0xff00ff00;
Packit c4476c
        hi |= hi << 8;  hi &= 0xffff0000;
Packit c4476c
Packit c4476c
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    return Ai;
Packit c4476c
}
Packit c4476c
Packit c4476c
static uint64_t BitDeinterleave(uint64_t Ai)
Packit c4476c
{
Packit c4476c
    if (BIT_INTERLEAVE) {
Packit c4476c
        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
Packit c4476c
        uint32_t t0, t1;
Packit c4476c
Packit c4476c
        t0 = lo & 0x0000ffff;
Packit c4476c
        t0 |= t0 << 8;  t0 &= 0x00ff00ff;
Packit c4476c
        t0 |= t0 << 4;  t0 &= 0x0f0f0f0f;
Packit c4476c
        t0 |= t0 << 2;  t0 &= 0x33333333;
Packit c4476c
        t0 |= t0 << 1;  t0 &= 0x55555555;
Packit c4476c
Packit c4476c
        t1 = hi << 16;
Packit c4476c
        t1 |= t1 >> 8;  t1 &= 0xff00ff00;
Packit c4476c
        t1 |= t1 >> 4;  t1 &= 0xf0f0f0f0;
Packit c4476c
        t1 |= t1 >> 2;  t1 &= 0xcccccccc;
Packit c4476c
        t1 |= t1 >> 1;  t1 &= 0xaaaaaaaa;
Packit c4476c
Packit c4476c
        lo >>= 16;
Packit c4476c
        lo |= lo << 8;  lo &= 0x00ff00ff;
Packit c4476c
        lo |= lo << 4;  lo &= 0x0f0f0f0f;
Packit c4476c
        lo |= lo << 2;  lo &= 0x33333333;
Packit c4476c
        lo |= lo << 1;  lo &= 0x55555555;
Packit c4476c
Packit c4476c
        hi &= 0xffff0000;
Packit c4476c
        hi |= hi >> 8;  hi &= 0xff00ff00;
Packit c4476c
        hi |= hi >> 4;  hi &= 0xf0f0f0f0;
Packit c4476c
        hi |= hi >> 2;  hi &= 0xcccccccc;
Packit c4476c
        hi |= hi >> 1;  hi &= 0xaaaaaaaa;
Packit c4476c
Packit c4476c
        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    return Ai;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * SHA3_absorb can be called multiple times, but at each invocation
Packit c4476c
 * largest multiple of |r| out of |len| bytes are processed. Then
Packit c4476c
 * remaining amount of bytes is returned. This is done to spare caller
Packit c4476c
 * trouble of calculating the largest multiple of |r|. |r| can be viewed
Packit c4476c
 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
Packit c4476c
 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
Packit c4476c
 * padding and intermediate sub-block buffering, byte- or bitwise, is
Packit c4476c
 * caller's responsibility.
Packit c4476c
 */
Packit c4476c
size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
Packit c4476c
                   size_t r)
Packit c4476c
{
Packit c4476c
    uint64_t *A_flat = (uint64_t *)A;
Packit c4476c
    size_t i, w = r / 8;
Packit c4476c
Packit c4476c
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
Packit c4476c
Packit c4476c
    while (len >= r) {
Packit c4476c
        for (i = 0; i < w; i++) {
Packit c4476c
            uint64_t Ai = (uint64_t)inp[0]       | (uint64_t)inp[1] << 8  |
Packit c4476c
                          (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
Packit c4476c
                          (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
Packit c4476c
                          (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
Packit c4476c
            inp += 8;
Packit c4476c
Packit c4476c
            A_flat[i] ^= BitInterleave(Ai);
Packit c4476c
        }
Packit c4476c
        KeccakF1600(A);
Packit c4476c
        len -= r;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    return len;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * SHA3_squeeze is called once at the end to generate |out| hash value
Packit c4476c
 * of |len| bytes.
Packit c4476c
 */
Packit c4476c
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
Packit c4476c
{
Packit c4476c
    uint64_t *A_flat = (uint64_t *)A;
Packit c4476c
    size_t i, w = r / 8;
Packit c4476c
Packit c4476c
    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
Packit c4476c
Packit c4476c
    while (len != 0) {
Packit c4476c
        for (i = 0; i < w && len != 0; i++) {
Packit c4476c
            uint64_t Ai = BitDeinterleave(A_flat[i]);
Packit c4476c
Packit c4476c
            if (len < 8) {
Packit c4476c
                for (i = 0; i < len; i++) {
Packit c4476c
                    *out++ = (unsigned char)Ai;
Packit c4476c
                    Ai >>= 8;
Packit c4476c
                }
Packit c4476c
                return;
Packit c4476c
            }
Packit c4476c
Packit c4476c
            out[0] = (unsigned char)(Ai);
Packit c4476c
            out[1] = (unsigned char)(Ai >> 8);
Packit c4476c
            out[2] = (unsigned char)(Ai >> 16);
Packit c4476c
            out[3] = (unsigned char)(Ai >> 24);
Packit c4476c
            out[4] = (unsigned char)(Ai >> 32);
Packit c4476c
            out[5] = (unsigned char)(Ai >> 40);
Packit c4476c
            out[6] = (unsigned char)(Ai >> 48);
Packit c4476c
            out[7] = (unsigned char)(Ai >> 56);
Packit c4476c
            out += 8;
Packit c4476c
            len -= 8;
Packit c4476c
        }
Packit c4476c
        if (len)
Packit c4476c
            KeccakF1600(A);
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#ifdef SELFTEST
Packit c4476c
/*
Packit c4476c
 * Post-padding one-shot implementations would look as following:
Packit c4476c
 *
Packit c4476c
 * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
Packit c4476c
 * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
Packit c4476c
 * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
Packit c4476c
 * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
Packit c4476c
 * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
Packit c4476c
 * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
Packit c4476c
 */
Packit c4476c
Packit c4476c
void SHA3_sponge(const unsigned char *inp, size_t len,
Packit c4476c
                 unsigned char *out, size_t d, size_t r)
Packit c4476c
{
Packit c4476c
    uint64_t A[5][5];
Packit c4476c
Packit c4476c
    memset(A, 0, sizeof(A));
Packit c4476c
    SHA3_absorb(A, inp, len, r);
Packit c4476c
    SHA3_squeeze(A, out, d, r);
Packit c4476c
}
Packit c4476c
Packit c4476c
# include <stdio.h>
Packit c4476c
Packit c4476c
int main()
Packit c4476c
{
Packit c4476c
    /*
Packit c4476c
     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
Packit c4476c
     */
Packit c4476c
    unsigned char test[168] = { '\xf3', '\x3' };
Packit c4476c
    unsigned char out[512];
Packit c4476c
    size_t i;
Packit c4476c
    static const unsigned char result[512] = {
Packit c4476c
        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
Packit c4476c
        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
Packit c4476c
        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
Packit c4476c
        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
Packit c4476c
        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
Packit c4476c
        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
Packit c4476c
        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
Packit c4476c
        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
Packit c4476c
        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
Packit c4476c
        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
Packit c4476c
        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
Packit c4476c
        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
Packit c4476c
        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
Packit c4476c
        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
Packit c4476c
        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
Packit c4476c
        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
Packit c4476c
        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
Packit c4476c
        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
Packit c4476c
        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
Packit c4476c
        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
Packit c4476c
        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
Packit c4476c
        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
Packit c4476c
        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
Packit c4476c
        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
Packit c4476c
        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
Packit c4476c
        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
Packit c4476c
        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
Packit c4476c
        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
Packit c4476c
        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
Packit c4476c
        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
Packit c4476c
        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
Packit c4476c
        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
Packit c4476c
        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
Packit c4476c
        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
Packit c4476c
        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
Packit c4476c
        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
Packit c4476c
        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
Packit c4476c
        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
Packit c4476c
        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
Packit c4476c
        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
Packit c4476c
        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
Packit c4476c
        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
Packit c4476c
        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
Packit c4476c
        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
Packit c4476c
        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
Packit c4476c
        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
Packit c4476c
        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
Packit c4476c
        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
Packit c4476c
        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
Packit c4476c
        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
Packit c4476c
        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
Packit c4476c
        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
Packit c4476c
        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
Packit c4476c
        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
Packit c4476c
        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
Packit c4476c
        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
Packit c4476c
        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
Packit c4476c
        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
Packit c4476c
        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
Packit c4476c
        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
Packit c4476c
        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
Packit c4476c
        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
Packit c4476c
        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
Packit c4476c
        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
Packit c4476c
    };
Packit c4476c
Packit c4476c
    test[167] = '\x80';
Packit c4476c
    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * Rationale behind keeping output [formatted as below] is that
Packit c4476c
     * one should be able to redirect it to a file, then copy-n-paste
Packit c4476c
     * final "output val" from official example to another file, and
Packit c4476c
     * compare the two with diff(1).
Packit c4476c
     */
Packit c4476c
    for (i = 0; i < sizeof(out);) {
Packit c4476c
        printf("%02X", out[i]);
Packit c4476c
        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (memcmp(out,result,sizeof(out))) {
Packit c4476c
        fprintf(stderr,"failure\n");
Packit c4476c
        return 1;
Packit c4476c
    } else {
Packit c4476c
        fprintf(stderr,"success\n");
Packit c4476c
        return 0;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
#endif