Blame crypto/modes/gcm128.c

Packit c4476c
/*
Packit c4476c
 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
 *
Packit c4476c
 * Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
 * this file except in compliance with the License.  You can obtain a copy
Packit c4476c
 * in the file LICENSE in the source distribution or at
Packit c4476c
 * https://www.openssl.org/source/license.html
Packit c4476c
 */
Packit c4476c
Packit c4476c
#include <openssl/crypto.h>
Packit c4476c
#include "modes_local.h"
Packit c4476c
#include <string.h>
Packit c4476c
Packit c4476c
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
Packit c4476c
/* redefine, because alignment is ensured */
Packit c4476c
# undef  GETU32
Packit c4476c
# define GETU32(p)       BSWAP4(*(const u32 *)(p))
Packit c4476c
# undef  PUTU32
Packit c4476c
# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
Packit c4476c
#define REDUCE1BIT(V)   do { \
Packit c4476c
        if (sizeof(size_t)==8) { \
Packit c4476c
                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
Packit c4476c
                V.lo  = (V.hi<<63)|(V.lo>>1); \
Packit c4476c
                V.hi  = (V.hi>>1 )^T; \
Packit c4476c
        } \
Packit c4476c
        else { \
Packit c4476c
                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
Packit c4476c
                V.lo  = (V.hi<<63)|(V.lo>>1); \
Packit c4476c
                V.hi  = (V.hi>>1 )^((u64)T<<32); \
Packit c4476c
        } \
Packit c4476c
} while(0)
Packit c4476c
Packit c4476c
/*-
Packit c4476c
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
Packit c4476c
 * never be set to 8. 8 is effectively reserved for testing purposes.
Packit c4476c
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
Packit c4476c
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
Packit c4476c
 * whole spectrum of possible table driven implementations. Why? In
Packit c4476c
 * non-"Shoup's" case memory access pattern is segmented in such manner,
Packit c4476c
 * that it's trivial to see that cache timing information can reveal
Packit c4476c
 * fair portion of intermediate hash value. Given that ciphertext is
Packit c4476c
 * always available to attacker, it's possible for him to attempt to
Packit c4476c
 * deduce secret parameter H and if successful, tamper with messages
Packit c4476c
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
Packit c4476c
 * not as trivial, but there is no reason to believe that it's resistant
Packit c4476c
 * to cache-timing attack. And the thing about "8-bit" implementation is
Packit c4476c
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
Packit c4476c
 * key + 1KB shared. Well, on pros side it should be twice as fast as
Packit c4476c
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
Packit c4476c
 * was observed to run ~75% faster, closer to 100% for commercial
Packit c4476c
 * compilers... Yet "4-bit" procedure is preferred, because it's
Packit c4476c
 * believed to provide better security-performance balance and adequate
Packit c4476c
 * all-round performance. "All-round" refers to things like:
Packit c4476c
 *
Packit c4476c
 * - shorter setup time effectively improves overall timing for
Packit c4476c
 *   handling short messages;
Packit c4476c
 * - larger table allocation can become unbearable because of VM
Packit c4476c
 *   subsystem penalties (for example on Windows large enough free
Packit c4476c
 *   results in VM working set trimming, meaning that consequent
Packit c4476c
 *   malloc would immediately incur working set expansion);
Packit c4476c
 * - larger table has larger cache footprint, which can affect
Packit c4476c
 *   performance of other code paths (not necessarily even from same
Packit c4476c
 *   thread in Hyper-Threading world);
Packit c4476c
 *
Packit c4476c
 * Value of 1 is not appropriate for performance reasons.
Packit c4476c
 */
Packit c4476c
#if     TABLE_BITS==8
Packit c4476c
Packit c4476c
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
Packit c4476c
{
Packit c4476c
    int i, j;
Packit c4476c
    u128 V;
Packit c4476c
Packit c4476c
    Htable[0].hi = 0;
Packit c4476c
    Htable[0].lo = 0;
Packit c4476c
    V.hi = H[0];
Packit c4476c
    V.lo = H[1];
Packit c4476c
Packit c4476c
    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
Packit c4476c
        REDUCE1BIT(V);
Packit c4476c
        Htable[i] = V;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    for (i = 2; i < 256; i <<= 1) {
Packit c4476c
        u128 *Hi = Htable + i, H0 = *Hi;
Packit c4476c
        for (j = 1; j < i; ++j) {
Packit c4476c
            Hi[j].hi = H0.hi ^ Htable[j].hi;
Packit c4476c
            Hi[j].lo = H0.lo ^ Htable[j].lo;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
Packit c4476c
{
Packit c4476c
    u128 Z = { 0, 0 };
Packit c4476c
    const u8 *xi = (const u8 *)Xi + 15;
Packit c4476c
    size_t rem, n = *xi;
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    static const size_t rem_8bit[256] = {
Packit c4476c
        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
Packit c4476c
        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
Packit c4476c
        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
Packit c4476c
        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
Packit c4476c
        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
Packit c4476c
        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
Packit c4476c
        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
Packit c4476c
        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
Packit c4476c
        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
Packit c4476c
        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
Packit c4476c
        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
Packit c4476c
        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
Packit c4476c
        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
Packit c4476c
        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
Packit c4476c
        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
Packit c4476c
        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
Packit c4476c
        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
Packit c4476c
        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
Packit c4476c
        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
Packit c4476c
        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
Packit c4476c
        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
Packit c4476c
        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
Packit c4476c
        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
Packit c4476c
        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
Packit c4476c
        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
Packit c4476c
        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
Packit c4476c
        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
Packit c4476c
        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
Packit c4476c
        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
Packit c4476c
        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
Packit c4476c
        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
Packit c4476c
        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
Packit c4476c
        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
Packit c4476c
        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
Packit c4476c
        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
Packit c4476c
        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
Packit c4476c
        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
Packit c4476c
        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
Packit c4476c
        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
Packit c4476c
        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
Packit c4476c
        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
Packit c4476c
        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
Packit c4476c
        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
Packit c4476c
        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
Packit c4476c
        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
Packit c4476c
        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
Packit c4476c
        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
Packit c4476c
        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
Packit c4476c
        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
Packit c4476c
        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
Packit c4476c
        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
Packit c4476c
        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
Packit c4476c
        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
Packit c4476c
        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
Packit c4476c
        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
Packit c4476c
        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
Packit c4476c
        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
Packit c4476c
        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
Packit c4476c
        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
Packit c4476c
        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
Packit c4476c
        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
Packit c4476c
        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
Packit c4476c
        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
Packit c4476c
        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
Packit c4476c
    };
Packit c4476c
Packit c4476c
    while (1) {
Packit c4476c
        Z.hi ^= Htable[n].hi;
Packit c4476c
        Z.lo ^= Htable[n].lo;
Packit c4476c
Packit c4476c
        if ((u8 *)Xi == xi)
Packit c4476c
            break;
Packit c4476c
Packit c4476c
        n = *(--xi);
Packit c4476c
Packit c4476c
        rem = (size_t)Z.lo & 0xff;
Packit c4476c
        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
Packit c4476c
        Z.hi = (Z.hi >> 8);
Packit c4476c
        if (sizeof(size_t) == 8)
Packit c4476c
            Z.hi ^= rem_8bit[rem];
Packit c4476c
        else
Packit c4476c
            Z.hi ^= (u64)rem_8bit[rem] << 32;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little) {
Packit c4476c
# ifdef BSWAP8
Packit c4476c
        Xi[0] = BSWAP8(Z.hi);
Packit c4476c
        Xi[1] = BSWAP8(Z.lo);
Packit c4476c
# else
Packit c4476c
        u8 *p = (u8 *)Xi;
Packit c4476c
        u32 v;
Packit c4476c
        v = (u32)(Z.hi >> 32);
Packit c4476c
        PUTU32(p, v);
Packit c4476c
        v = (u32)(Z.hi);
Packit c4476c
        PUTU32(p + 4, v);
Packit c4476c
        v = (u32)(Z.lo >> 32);
Packit c4476c
        PUTU32(p + 8, v);
Packit c4476c
        v = (u32)(Z.lo);
Packit c4476c
        PUTU32(p + 12, v);
Packit c4476c
# endif
Packit c4476c
    } else {
Packit c4476c
        Xi[0] = Z.hi;
Packit c4476c
        Xi[1] = Z.lo;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
Packit c4476c
Packit c4476c
#elif   TABLE_BITS==4
Packit c4476c
Packit c4476c
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
Packit c4476c
{
Packit c4476c
    u128 V;
Packit c4476c
# if defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    int i;
Packit c4476c
# endif
Packit c4476c
Packit c4476c
    Htable[0].hi = 0;
Packit c4476c
    Htable[0].lo = 0;
Packit c4476c
    V.hi = H[0];
Packit c4476c
    V.lo = H[1];
Packit c4476c
Packit c4476c
# if defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
Packit c4476c
        REDUCE1BIT(V);
Packit c4476c
        Htable[i] = V;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    for (i = 2; i < 16; i <<= 1) {
Packit c4476c
        u128 *Hi = Htable + i;
Packit c4476c
        int j;
Packit c4476c
        for (V = *Hi, j = 1; j < i; ++j) {
Packit c4476c
            Hi[j].hi = V.hi ^ Htable[j].hi;
Packit c4476c
            Hi[j].lo = V.lo ^ Htable[j].lo;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
# else
Packit c4476c
    Htable[8] = V;
Packit c4476c
    REDUCE1BIT(V);
Packit c4476c
    Htable[4] = V;
Packit c4476c
    REDUCE1BIT(V);
Packit c4476c
    Htable[2] = V;
Packit c4476c
    REDUCE1BIT(V);
Packit c4476c
    Htable[1] = V;
Packit c4476c
    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
Packit c4476c
    V = Htable[4];
Packit c4476c
    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
Packit c4476c
    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
Packit c4476c
    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
Packit c4476c
    V = Htable[8];
Packit c4476c
    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
Packit c4476c
    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
Packit c4476c
    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
Packit c4476c
    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
Packit c4476c
    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
Packit c4476c
    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
Packit c4476c
    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
Packit c4476c
# endif
Packit c4476c
# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
Packit c4476c
    /*
Packit c4476c
     * ARM assembler expects specific dword order in Htable.
Packit c4476c
     */
Packit c4476c
    {
Packit c4476c
        int j;
Packit c4476c
        const union {
Packit c4476c
            long one;
Packit c4476c
            char little;
Packit c4476c
        } is_endian = { 1 };
Packit c4476c
Packit c4476c
        if (is_endian.little)
Packit c4476c
            for (j = 0; j < 16; ++j) {
Packit c4476c
                V = Htable[j];
Packit c4476c
                Htable[j].hi = V.lo;
Packit c4476c
                Htable[j].lo = V.hi;
Packit c4476c
        } else
Packit c4476c
            for (j = 0; j < 16; ++j) {
Packit c4476c
                V = Htable[j];
Packit c4476c
                Htable[j].hi = V.lo << 32 | V.lo >> 32;
Packit c4476c
                Htable[j].lo = V.hi << 32 | V.hi >> 32;
Packit c4476c
            }
Packit c4476c
    }
Packit c4476c
# endif
Packit c4476c
}
Packit c4476c
Packit c4476c
# ifndef GHASH_ASM
Packit c4476c
static const size_t rem_4bit[16] = {
Packit c4476c
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
Packit c4476c
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
Packit c4476c
    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
Packit c4476c
    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
Packit c4476c
};
Packit c4476c
Packit c4476c
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
Packit c4476c
{
Packit c4476c
    u128 Z;
Packit c4476c
    int cnt = 15;
Packit c4476c
    size_t rem, nlo, nhi;
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
Packit c4476c
    nlo = ((const u8 *)Xi)[15];
Packit c4476c
    nhi = nlo >> 4;
Packit c4476c
    nlo &= 0xf;
Packit c4476c
Packit c4476c
    Z.hi = Htable[nlo].hi;
Packit c4476c
    Z.lo = Htable[nlo].lo;
Packit c4476c
Packit c4476c
    while (1) {
Packit c4476c
        rem = (size_t)Z.lo & 0xf;
Packit c4476c
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
        Z.hi = (Z.hi >> 4);
Packit c4476c
        if (sizeof(size_t) == 8)
Packit c4476c
            Z.hi ^= rem_4bit[rem];
Packit c4476c
        else
Packit c4476c
            Z.hi ^= (u64)rem_4bit[rem] << 32;
Packit c4476c
Packit c4476c
        Z.hi ^= Htable[nhi].hi;
Packit c4476c
        Z.lo ^= Htable[nhi].lo;
Packit c4476c
Packit c4476c
        if (--cnt < 0)
Packit c4476c
            break;
Packit c4476c
Packit c4476c
        nlo = ((const u8 *)Xi)[cnt];
Packit c4476c
        nhi = nlo >> 4;
Packit c4476c
        nlo &= 0xf;
Packit c4476c
Packit c4476c
        rem = (size_t)Z.lo & 0xf;
Packit c4476c
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
        Z.hi = (Z.hi >> 4);
Packit c4476c
        if (sizeof(size_t) == 8)
Packit c4476c
            Z.hi ^= rem_4bit[rem];
Packit c4476c
        else
Packit c4476c
            Z.hi ^= (u64)rem_4bit[rem] << 32;
Packit c4476c
Packit c4476c
        Z.hi ^= Htable[nlo].hi;
Packit c4476c
        Z.lo ^= Htable[nlo].lo;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little) {
Packit c4476c
#  ifdef BSWAP8
Packit c4476c
        Xi[0] = BSWAP8(Z.hi);
Packit c4476c
        Xi[1] = BSWAP8(Z.lo);
Packit c4476c
#  else
Packit c4476c
        u8 *p = (u8 *)Xi;
Packit c4476c
        u32 v;
Packit c4476c
        v = (u32)(Z.hi >> 32);
Packit c4476c
        PUTU32(p, v);
Packit c4476c
        v = (u32)(Z.hi);
Packit c4476c
        PUTU32(p + 4, v);
Packit c4476c
        v = (u32)(Z.lo >> 32);
Packit c4476c
        PUTU32(p + 8, v);
Packit c4476c
        v = (u32)(Z.lo);
Packit c4476c
        PUTU32(p + 12, v);
Packit c4476c
#  endif
Packit c4476c
    } else {
Packit c4476c
        Xi[0] = Z.hi;
Packit c4476c
        Xi[1] = Z.lo;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
/*
Packit c4476c
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
Packit c4476c
 * details... Compiler-generated code doesn't seem to give any
Packit c4476c
 * performance improvement, at least not on x86[_64]. It's here
Packit c4476c
 * mostly as reference and a placeholder for possible future
Packit c4476c
 * non-trivial optimization[s]...
Packit c4476c
 */
Packit c4476c
static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
Packit c4476c
                           const u8 *inp, size_t len)
Packit c4476c
{
Packit c4476c
    u128 Z;
Packit c4476c
    int cnt;
Packit c4476c
    size_t rem, nlo, nhi;
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
Packit c4476c
#   if 1
Packit c4476c
    do {
Packit c4476c
        cnt = 15;
Packit c4476c
        nlo = ((const u8 *)Xi)[15];
Packit c4476c
        nlo ^= inp[15];
Packit c4476c
        nhi = nlo >> 4;
Packit c4476c
        nlo &= 0xf;
Packit c4476c
Packit c4476c
        Z.hi = Htable[nlo].hi;
Packit c4476c
        Z.lo = Htable[nlo].lo;
Packit c4476c
Packit c4476c
        while (1) {
Packit c4476c
            rem = (size_t)Z.lo & 0xf;
Packit c4476c
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
            Z.hi = (Z.hi >> 4);
Packit c4476c
            if (sizeof(size_t) == 8)
Packit c4476c
                Z.hi ^= rem_4bit[rem];
Packit c4476c
            else
Packit c4476c
                Z.hi ^= (u64)rem_4bit[rem] << 32;
Packit c4476c
Packit c4476c
            Z.hi ^= Htable[nhi].hi;
Packit c4476c
            Z.lo ^= Htable[nhi].lo;
Packit c4476c
Packit c4476c
            if (--cnt < 0)
Packit c4476c
                break;
Packit c4476c
Packit c4476c
            nlo = ((const u8 *)Xi)[cnt];
Packit c4476c
            nlo ^= inp[cnt];
Packit c4476c
            nhi = nlo >> 4;
Packit c4476c
            nlo &= 0xf;
Packit c4476c
Packit c4476c
            rem = (size_t)Z.lo & 0xf;
Packit c4476c
            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
            Z.hi = (Z.hi >> 4);
Packit c4476c
            if (sizeof(size_t) == 8)
Packit c4476c
                Z.hi ^= rem_4bit[rem];
Packit c4476c
            else
Packit c4476c
                Z.hi ^= (u64)rem_4bit[rem] << 32;
Packit c4476c
Packit c4476c
            Z.hi ^= Htable[nlo].hi;
Packit c4476c
            Z.lo ^= Htable[nlo].lo;
Packit c4476c
        }
Packit c4476c
#   else
Packit c4476c
    /*
Packit c4476c
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
Packit c4476c
     * [should] give ~50% improvement... One could have PACK()-ed
Packit c4476c
     * the rem_8bit even here, but the priority is to minimize
Packit c4476c
     * cache footprint...
Packit c4476c
     */
Packit c4476c
    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
Packit c4476c
    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
Packit c4476c
    static const unsigned short rem_8bit[256] = {
Packit c4476c
        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
Packit c4476c
        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
Packit c4476c
        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
Packit c4476c
        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
Packit c4476c
        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
Packit c4476c
        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
Packit c4476c
        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
Packit c4476c
        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
Packit c4476c
        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
Packit c4476c
        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
Packit c4476c
        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
Packit c4476c
        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
Packit c4476c
        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
Packit c4476c
        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
Packit c4476c
        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
Packit c4476c
        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
Packit c4476c
        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
Packit c4476c
        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
Packit c4476c
        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
Packit c4476c
        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
Packit c4476c
        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
Packit c4476c
        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
Packit c4476c
        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
Packit c4476c
        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
Packit c4476c
        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
Packit c4476c
        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
Packit c4476c
        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
Packit c4476c
        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
Packit c4476c
        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
Packit c4476c
        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
Packit c4476c
        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
Packit c4476c
        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
Packit c4476c
    };
Packit c4476c
    /*
Packit c4476c
     * This pre-processing phase slows down procedure by approximately
Packit c4476c
     * same time as it makes each loop spin faster. In other words
Packit c4476c
     * single block performance is approximately same as straightforward
Packit c4476c
     * "4-bit" implementation, and then it goes only faster...
Packit c4476c
     */
Packit c4476c
    for (cnt = 0; cnt < 16; ++cnt) {
Packit c4476c
        Z.hi = Htable[cnt].hi;
Packit c4476c
        Z.lo = Htable[cnt].lo;
Packit c4476c
        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
        Hshr4[cnt].hi = (Z.hi >> 4);
Packit c4476c
        Hshl4[cnt] = (u8)(Z.lo << 4);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    do {
Packit c4476c
        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
Packit c4476c
            nlo = ((const u8 *)Xi)[cnt];
Packit c4476c
            nlo ^= inp[cnt];
Packit c4476c
            nhi = nlo >> 4;
Packit c4476c
            nlo &= 0xf;
Packit c4476c
Packit c4476c
            Z.hi ^= Htable[nlo].hi;
Packit c4476c
            Z.lo ^= Htable[nlo].lo;
Packit c4476c
Packit c4476c
            rem = (size_t)Z.lo & 0xff;
Packit c4476c
Packit c4476c
            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
Packit c4476c
            Z.hi = (Z.hi >> 8);
Packit c4476c
Packit c4476c
            Z.hi ^= Hshr4[nhi].hi;
Packit c4476c
            Z.lo ^= Hshr4[nhi].lo;
Packit c4476c
            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
Packit c4476c
        }
Packit c4476c
Packit c4476c
        nlo = ((const u8 *)Xi)[0];
Packit c4476c
        nlo ^= inp[0];
Packit c4476c
        nhi = nlo >> 4;
Packit c4476c
        nlo &= 0xf;
Packit c4476c
Packit c4476c
        Z.hi ^= Htable[nlo].hi;
Packit c4476c
        Z.lo ^= Htable[nlo].lo;
Packit c4476c
Packit c4476c
        rem = (size_t)Z.lo & 0xf;
Packit c4476c
Packit c4476c
        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
Packit c4476c
        Z.hi = (Z.hi >> 4);
Packit c4476c
Packit c4476c
        Z.hi ^= Htable[nhi].hi;
Packit c4476c
        Z.lo ^= Htable[nhi].lo;
Packit c4476c
        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
Packit c4476c
#   endif
Packit c4476c
Packit c4476c
        if (is_endian.little) {
Packit c4476c
#   ifdef BSWAP8
Packit c4476c
            Xi[0] = BSWAP8(Z.hi);
Packit c4476c
            Xi[1] = BSWAP8(Z.lo);
Packit c4476c
#   else
Packit c4476c
            u8 *p = (u8 *)Xi;
Packit c4476c
            u32 v;
Packit c4476c
            v = (u32)(Z.hi >> 32);
Packit c4476c
            PUTU32(p, v);
Packit c4476c
            v = (u32)(Z.hi);
Packit c4476c
            PUTU32(p + 4, v);
Packit c4476c
            v = (u32)(Z.lo >> 32);
Packit c4476c
            PUTU32(p + 8, v);
Packit c4476c
            v = (u32)(Z.lo);
Packit c4476c
            PUTU32(p + 12, v);
Packit c4476c
#   endif
Packit c4476c
        } else {
Packit c4476c
            Xi[0] = Z.hi;
Packit c4476c
            Xi[1] = Z.lo;
Packit c4476c
        }
Packit c4476c
    } while (inp += 16, len -= 16);
Packit c4476c
}
Packit c4476c
#  endif
Packit c4476c
# else
Packit c4476c
void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                    size_t len);
Packit c4476c
# endif
Packit c4476c
Packit c4476c
# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
Packit c4476c
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
Packit c4476c
/*
Packit c4476c
 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
Packit c4476c
 * effect. In other words idea is to hash data while it's still in L1 cache
Packit c4476c
 * after encryption pass...
Packit c4476c
 */
Packit c4476c
#  define GHASH_CHUNK       (3*1024)
Packit c4476c
# endif
Packit c4476c
Packit c4476c
#else                           /* TABLE_BITS */
Packit c4476c
Packit c4476c
static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
Packit c4476c
{
Packit c4476c
    u128 V, Z = { 0, 0 };
Packit c4476c
    long X;
Packit c4476c
    int i, j;
Packit c4476c
    const long *xi = (const long *)Xi;
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
Packit c4476c
    V.hi = H[0];                /* H is in host byte order, no byte swapping */
Packit c4476c
    V.lo = H[1];
Packit c4476c
Packit c4476c
    for (j = 0; j < 16 / sizeof(long); ++j) {
Packit c4476c
        if (is_endian.little) {
Packit c4476c
            if (sizeof(long) == 8) {
Packit c4476c
# ifdef BSWAP8
Packit c4476c
                X = (long)(BSWAP8(xi[j]));
Packit c4476c
# else
Packit c4476c
                const u8 *p = (const u8 *)(xi + j);
Packit c4476c
                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
Packit c4476c
# endif
Packit c4476c
            } else {
Packit c4476c
                const u8 *p = (const u8 *)(xi + j);
Packit c4476c
                X = (long)GETU32(p);
Packit c4476c
            }
Packit c4476c
        } else
Packit c4476c
            X = xi[j];
Packit c4476c
Packit c4476c
        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
Packit c4476c
            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
Packit c4476c
            Z.hi ^= V.hi & M;
Packit c4476c
            Z.lo ^= V.lo & M;
Packit c4476c
Packit c4476c
            REDUCE1BIT(V);
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little) {
Packit c4476c
# ifdef BSWAP8
Packit c4476c
        Xi[0] = BSWAP8(Z.hi);
Packit c4476c
        Xi[1] = BSWAP8(Z.lo);
Packit c4476c
# else
Packit c4476c
        u8 *p = (u8 *)Xi;
Packit c4476c
        u32 v;
Packit c4476c
        v = (u32)(Z.hi >> 32);
Packit c4476c
        PUTU32(p, v);
Packit c4476c
        v = (u32)(Z.hi);
Packit c4476c
        PUTU32(p + 4, v);
Packit c4476c
        v = (u32)(Z.lo >> 32);
Packit c4476c
        PUTU32(p + 8, v);
Packit c4476c
        v = (u32)(Z.lo);
Packit c4476c
        PUTU32(p + 12, v);
Packit c4476c
# endif
Packit c4476c
    } else {
Packit c4476c
        Xi[0] = Z.hi;
Packit c4476c
        Xi[1] = Z.lo;
Packit c4476c
    }
Packit c4476c
}
Packit c4476c
Packit c4476c
# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
Packit c4476c
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
Packit c4476c
# if    !defined(I386_ONLY) && \
Packit c4476c
        (defined(__i386)        || defined(__i386__)    || \
Packit c4476c
         defined(__x86_64)      || defined(__x86_64__)  || \
Packit c4476c
         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
Packit c4476c
#  define GHASH_ASM_X86_OR_64
Packit c4476c
#  define GCM_FUNCREF_4BIT
Packit c4476c
extern unsigned int OPENSSL_ia32cap_P[];
Packit c4476c
Packit c4476c
void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                     size_t len);
Packit c4476c
Packit c4476c
#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
Packit c4476c
#   define gcm_init_avx   gcm_init_clmul
Packit c4476c
#   define gcm_gmult_avx  gcm_gmult_clmul
Packit c4476c
#   define gcm_ghash_avx  gcm_ghash_clmul
Packit c4476c
#  else
Packit c4476c
void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                   size_t len);
Packit c4476c
#  endif
Packit c4476c
Packit c4476c
#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
Packit c4476c
#   define GHASH_ASM_X86
Packit c4476c
void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                        size_t len);
Packit c4476c
Packit c4476c
void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                        size_t len);
Packit c4476c
#  endif
Packit c4476c
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
Packit c4476c
#  include "arm_arch.h"
Packit c4476c
#  if __ARM_MAX_ARCH__>=7
Packit c4476c
#   define GHASH_ASM_ARM
Packit c4476c
#   define GCM_FUNCREF_4BIT
Packit c4476c
#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
Packit c4476c
#   if defined(__arm__) || defined(__arm)
Packit c4476c
#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
Packit c4476c
#   endif
Packit c4476c
void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                    size_t len);
Packit c4476c
void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                  size_t len);
Packit c4476c
#  endif
Packit c4476c
# elif defined(__sparc__) || defined(__sparc)
Packit c4476c
#  include "sparc_arch.h"
Packit c4476c
#  define GHASH_ASM_SPARC
Packit c4476c
#  define GCM_FUNCREF_4BIT
Packit c4476c
extern unsigned int OPENSSL_sparcv9cap_P[];
Packit c4476c
void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                    size_t len);
Packit c4476c
# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
Packit c4476c
#  include "ppc_arch.h"
Packit c4476c
#  define GHASH_ASM_PPC
Packit c4476c
#  define GCM_FUNCREF_4BIT
Packit c4476c
void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
Packit c4476c
void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
Packit c4476c
void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
Packit c4476c
                  size_t len);
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
# undef  GCM_MUL
Packit c4476c
# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
Packit c4476c
# ifdef GHASH
Packit c4476c
#  undef  GHASH
Packit c4476c
#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
Packit c4476c
{
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
Packit c4476c
    memset(ctx, 0, sizeof(*ctx));
Packit c4476c
    ctx->block = block;
Packit c4476c
    ctx->key = key;
Packit c4476c
Packit c4476c
    (*block) (ctx->H.c, ctx->H.c, key);
Packit c4476c
Packit c4476c
    if (is_endian.little) {
Packit c4476c
        /* H is stored in host byte order */
Packit c4476c
#ifdef BSWAP8
Packit c4476c
        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
Packit c4476c
        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
Packit c4476c
#else
Packit c4476c
        u8 *p = ctx->H.c;
Packit c4476c
        u64 hi, lo;
Packit c4476c
        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
Packit c4476c
        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
Packit c4476c
        ctx->H.u[0] = hi;
Packit c4476c
        ctx->H.u[1] = lo;
Packit c4476c
#endif
Packit c4476c
    }
Packit c4476c
#if     TABLE_BITS==8
Packit c4476c
    gcm_init_8bit(ctx->Htable, ctx->H.u);
Packit c4476c
#elif   TABLE_BITS==4
Packit c4476c
# if    defined(GHASH)
Packit c4476c
#  define CTX__GHASH(f) (ctx->ghash = (f))
Packit c4476c
# else
Packit c4476c
#  define CTX__GHASH(f) (ctx->ghash = NULL)
Packit c4476c
# endif
Packit c4476c
# if    defined(GHASH_ASM_X86_OR_64)
Packit c4476c
#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
Packit c4476c
    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
Packit c4476c
        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
Packit c4476c
            gcm_init_avx(ctx->Htable, ctx->H.u);
Packit c4476c
            ctx->gmult = gcm_gmult_avx;
Packit c4476c
            CTX__GHASH(gcm_ghash_avx);
Packit c4476c
        } else {
Packit c4476c
            gcm_init_clmul(ctx->Htable, ctx->H.u);
Packit c4476c
            ctx->gmult = gcm_gmult_clmul;
Packit c4476c
            CTX__GHASH(gcm_ghash_clmul);
Packit c4476c
        }
Packit c4476c
        return;
Packit c4476c
    }
Packit c4476c
#  endif
Packit c4476c
    gcm_init_4bit(ctx->Htable, ctx->H.u);
Packit c4476c
#  if   defined(GHASH_ASM_X86)  /* x86 only */
Packit c4476c
#   if  defined(OPENSSL_IA32_SSE2)
Packit c4476c
    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
Packit c4476c
#   else
Packit c4476c
    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
Packit c4476c
#   endif
Packit c4476c
        ctx->gmult = gcm_gmult_4bit_mmx;
Packit c4476c
        CTX__GHASH(gcm_ghash_4bit_mmx);
Packit c4476c
    } else {
Packit c4476c
        ctx->gmult = gcm_gmult_4bit_x86;
Packit c4476c
        CTX__GHASH(gcm_ghash_4bit_x86);
Packit c4476c
    }
Packit c4476c
#  else
Packit c4476c
    ctx->gmult = gcm_gmult_4bit;
Packit c4476c
    CTX__GHASH(gcm_ghash_4bit);
Packit c4476c
#  endif
Packit c4476c
# elif  defined(GHASH_ASM_ARM)
Packit c4476c
#  ifdef PMULL_CAPABLE
Packit c4476c
    if (PMULL_CAPABLE) {
Packit c4476c
        gcm_init_v8(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_v8;
Packit c4476c
        CTX__GHASH(gcm_ghash_v8);
Packit c4476c
    } else
Packit c4476c
#  endif
Packit c4476c
#  ifdef NEON_CAPABLE
Packit c4476c
    if (NEON_CAPABLE) {
Packit c4476c
        gcm_init_neon(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_neon;
Packit c4476c
        CTX__GHASH(gcm_ghash_neon);
Packit c4476c
    } else
Packit c4476c
#  endif
Packit c4476c
    {
Packit c4476c
        gcm_init_4bit(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_4bit;
Packit c4476c
        CTX__GHASH(gcm_ghash_4bit);
Packit c4476c
    }
Packit c4476c
# elif  defined(GHASH_ASM_SPARC)
Packit c4476c
    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
Packit c4476c
        gcm_init_vis3(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_vis3;
Packit c4476c
        CTX__GHASH(gcm_ghash_vis3);
Packit c4476c
    } else {
Packit c4476c
        gcm_init_4bit(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_4bit;
Packit c4476c
        CTX__GHASH(gcm_ghash_4bit);
Packit c4476c
    }
Packit c4476c
# elif  defined(GHASH_ASM_PPC)
Packit c4476c
    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
Packit c4476c
        gcm_init_p8(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_p8;
Packit c4476c
        CTX__GHASH(gcm_ghash_p8);
Packit c4476c
    } else {
Packit c4476c
        gcm_init_4bit(ctx->Htable, ctx->H.u);
Packit c4476c
        ctx->gmult = gcm_gmult_4bit;
Packit c4476c
        CTX__GHASH(gcm_ghash_4bit);
Packit c4476c
    }
Packit c4476c
# else
Packit c4476c
    gcm_init_4bit(ctx->Htable, ctx->H.u);
Packit c4476c
# endif
Packit c4476c
# undef CTX__GHASH
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
Packit c4476c
                         size_t len)
Packit c4476c
{
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    unsigned int ctr;
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    ctx->len.u[0] = 0;          /* AAD length */
Packit c4476c
    ctx->len.u[1] = 0;          /* message length */
Packit c4476c
    ctx->ares = 0;
Packit c4476c
    ctx->mres = 0;
Packit c4476c
Packit c4476c
    if (len == 12) {
Packit c4476c
        memcpy(ctx->Yi.c, iv, 12);
Packit c4476c
        ctx->Yi.c[12] = 0;
Packit c4476c
        ctx->Yi.c[13] = 0;
Packit c4476c
        ctx->Yi.c[14] = 0;
Packit c4476c
        ctx->Yi.c[15] = 1;
Packit c4476c
        ctr = 1;
Packit c4476c
    } else {
Packit c4476c
        size_t i;
Packit c4476c
        u64 len0 = len;
Packit c4476c
Packit c4476c
        /* Borrow ctx->Xi to calculate initial Yi */
Packit c4476c
        ctx->Xi.u[0] = 0;
Packit c4476c
        ctx->Xi.u[1] = 0;
Packit c4476c
Packit c4476c
        while (len >= 16) {
Packit c4476c
            for (i = 0; i < 16; ++i)
Packit c4476c
                ctx->Xi.c[i] ^= iv[i];
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            iv += 16;
Packit c4476c
            len -= 16;
Packit c4476c
        }
Packit c4476c
        if (len) {
Packit c4476c
            for (i = 0; i < len; ++i)
Packit c4476c
                ctx->Xi.c[i] ^= iv[i];
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
        }
Packit c4476c
        len0 <<= 3;
Packit c4476c
        if (is_endian.little) {
Packit c4476c
#ifdef BSWAP8
Packit c4476c
            ctx->Xi.u[1] ^= BSWAP8(len0);
Packit c4476c
#else
Packit c4476c
            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
Packit c4476c
            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
Packit c4476c
            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
Packit c4476c
            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
Packit c4476c
            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
Packit c4476c
            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
Packit c4476c
            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
Packit c4476c
            ctx->Xi.c[15] ^= (u8)(len0);
Packit c4476c
#endif
Packit c4476c
        } else {
Packit c4476c
            ctx->Xi.u[1] ^= len0;
Packit c4476c
        }
Packit c4476c
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
Packit c4476c
        if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
            ctr = BSWAP4(ctx->Xi.d[3]);
Packit c4476c
#else
Packit c4476c
            ctr = GETU32(ctx->Xi.c + 12);
Packit c4476c
#endif
Packit c4476c
        else
Packit c4476c
            ctr = ctx->Xi.d[3];
Packit c4476c
Packit c4476c
        /* Copy borrowed Xi to Yi */
Packit c4476c
        ctx->Yi.u[0] = ctx->Xi.u[0];
Packit c4476c
        ctx->Yi.u[1] = ctx->Xi.u[1];
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->Xi.u[0] = 0;
Packit c4476c
    ctx->Xi.u[1] = 0;
Packit c4476c
Packit c4476c
    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
Packit c4476c
    ++ctr;
Packit c4476c
    if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
        ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#else
Packit c4476c
        PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#endif
Packit c4476c
    else
Packit c4476c
        ctx->Yi.d[3] = ctr;
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
Packit c4476c
                      size_t len)
Packit c4476c
{
Packit c4476c
    size_t i;
Packit c4476c
    unsigned int n;
Packit c4476c
    u64 alen = ctx->len.u[0];
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
# ifdef GHASH
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    if (ctx->len.u[1])
Packit c4476c
        return -2;
Packit c4476c
Packit c4476c
    alen += len;
Packit c4476c
    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
Packit c4476c
        return -1;
Packit c4476c
    ctx->len.u[0] = alen;
Packit c4476c
Packit c4476c
    n = ctx->ares;
Packit c4476c
    if (n) {
Packit c4476c
        while (n && len) {
Packit c4476c
            ctx->Xi.c[n] ^= *(aad++);
Packit c4476c
            --len;
Packit c4476c
            n = (n + 1) % 16;
Packit c4476c
        }
Packit c4476c
        if (n == 0)
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
        else {
Packit c4476c
            ctx->ares = n;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
#ifdef GHASH
Packit c4476c
    if ((i = (len & (size_t)-16))) {
Packit c4476c
        GHASH(ctx, aad, i);
Packit c4476c
        aad += i;
Packit c4476c
        len -= i;
Packit c4476c
    }
Packit c4476c
#else
Packit c4476c
    while (len >= 16) {
Packit c4476c
        for (i = 0; i < 16; ++i)
Packit c4476c
            ctx->Xi.c[i] ^= aad[i];
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
        aad += 16;
Packit c4476c
        len -= 16;
Packit c4476c
    }
Packit c4476c
#endif
Packit c4476c
    if (len) {
Packit c4476c
        n = (unsigned int)len;
Packit c4476c
        for (i = 0; i < len; ++i)
Packit c4476c
            ctx->Xi.c[i] ^= aad[i];
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->ares = n;
Packit c4476c
    return 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
Packit c4476c
                          const unsigned char *in, unsigned char *out,
Packit c4476c
                          size_t len)
Packit c4476c
{
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    unsigned int n, ctr, mres;
Packit c4476c
    size_t i;
Packit c4476c
    u64 mlen = ctx->len.u[1];
Packit c4476c
    block128_f block = ctx->block;
Packit c4476c
    void *key = ctx->key;
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    mlen += len;
Packit c4476c
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
Packit c4476c
        return -1;
Packit c4476c
    ctx->len.u[1] = mlen;
Packit c4476c
Packit c4476c
    mres = ctx->mres;
Packit c4476c
Packit c4476c
    if (ctx->ares) {
Packit c4476c
        /* First call to encrypt finalizes GHASH(AAD) */
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
        if (len == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            ctx->ares = 0;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
Packit c4476c
        ctx->Xi.u[0] = 0;
Packit c4476c
        ctx->Xi.u[1] = 0;
Packit c4476c
        mres = sizeof(ctx->Xi);
Packit c4476c
#else
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
        ctx->ares = 0;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
        ctr = BSWAP4(ctx->Yi.d[3]);
Packit c4476c
#else
Packit c4476c
        ctr = GETU32(ctx->Yi.c + 12);
Packit c4476c
#endif
Packit c4476c
    else
Packit c4476c
        ctr = ctx->Yi.d[3];
Packit c4476c
Packit c4476c
    n = mres % 16;
Packit c4476c
#if !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    if (16 % sizeof(size_t) == 0) { /* always true actually */
Packit c4476c
        do {
Packit c4476c
            if (n) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
                while (n && len) {
Packit c4476c
                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
Packit c4476c
                    --len;
Packit c4476c
                    n = (n + 1) % 16;
Packit c4476c
                }
Packit c4476c
                if (n == 0) {
Packit c4476c
                    GHASH(ctx, ctx->Xn, mres);
Packit c4476c
                    mres = 0;
Packit c4476c
                } else {
Packit c4476c
                    ctx->mres = mres;
Packit c4476c
                    return 0;
Packit c4476c
                }
Packit c4476c
# else
Packit c4476c
                while (n && len) {
Packit c4476c
                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
Packit c4476c
                    --len;
Packit c4476c
                    n = (n + 1) % 16;
Packit c4476c
                }
Packit c4476c
                if (n == 0) {
Packit c4476c
                    GCM_MUL(ctx);
Packit c4476c
                    mres = 0;
Packit c4476c
                } else {
Packit c4476c
                    ctx->mres = n;
Packit c4476c
                    return 0;
Packit c4476c
                }
Packit c4476c
# endif
Packit c4476c
            }
Packit c4476c
# if defined(STRICT_ALIGNMENT)
Packit c4476c
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
Packit c4476c
                break;
Packit c4476c
# endif
Packit c4476c
# if defined(GHASH)
Packit c4476c
            if (len >= 16 && mres) {
Packit c4476c
                GHASH(ctx, ctx->Xn, mres);
Packit c4476c
                mres = 0;
Packit c4476c
            }
Packit c4476c
#  if defined(GHASH_CHUNK)
Packit c4476c
            while (len >= GHASH_CHUNK) {
Packit c4476c
                size_t j = GHASH_CHUNK;
Packit c4476c
Packit c4476c
                while (j) {
Packit c4476c
                    size_t *out_t = (size_t *)out;
Packit c4476c
                    const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                    ++ctr;
Packit c4476c
                    if (is_endian.little)
Packit c4476c
#   ifdef BSWAP4
Packit c4476c
                        ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#   else
Packit c4476c
                        PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#   endif
Packit c4476c
                    else
Packit c4476c
                        ctx->Yi.d[3] = ctr;
Packit c4476c
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
Packit c4476c
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
Packit c4476c
                    out += 16;
Packit c4476c
                    in += 16;
Packit c4476c
                    j -= 16;
Packit c4476c
                }
Packit c4476c
                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
Packit c4476c
                len -= GHASH_CHUNK;
Packit c4476c
            }
Packit c4476c
#  endif
Packit c4476c
            if ((i = (len & (size_t)-16))) {
Packit c4476c
                size_t j = i;
Packit c4476c
Packit c4476c
                while (len >= 16) {
Packit c4476c
                    size_t *out_t = (size_t *)out;
Packit c4476c
                    const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                    ++ctr;
Packit c4476c
                    if (is_endian.little)
Packit c4476c
#  ifdef BSWAP4
Packit c4476c
                        ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#  else
Packit c4476c
                        PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#  endif
Packit c4476c
                    else
Packit c4476c
                        ctx->Yi.d[3] = ctr;
Packit c4476c
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
Packit c4476c
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
Packit c4476c
                    out += 16;
Packit c4476c
                    in += 16;
Packit c4476c
                    len -= 16;
Packit c4476c
                }
Packit c4476c
                GHASH(ctx, out - j, j);
Packit c4476c
            }
Packit c4476c
# else
Packit c4476c
            while (len >= 16) {
Packit c4476c
                size_t *out_t = (size_t *)out;
Packit c4476c
                const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                ++ctr;
Packit c4476c
                if (is_endian.little)
Packit c4476c
#  ifdef BSWAP4
Packit c4476c
                    ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#  else
Packit c4476c
                    PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#  endif
Packit c4476c
                else
Packit c4476c
                    ctx->Yi.d[3] = ctr;
Packit c4476c
                for (i = 0; i < 16 / sizeof(size_t); ++i)
Packit c4476c
                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
Packit c4476c
                GCM_MUL(ctx);
Packit c4476c
                out += 16;
Packit c4476c
                in += 16;
Packit c4476c
                len -= 16;
Packit c4476c
            }
Packit c4476c
# endif
Packit c4476c
            if (len) {
Packit c4476c
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                ++ctr;
Packit c4476c
                if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
                    ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
                    PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
                else
Packit c4476c
                    ctx->Yi.d[3] = ctr;
Packit c4476c
# if defined(GHASH)
Packit c4476c
                while (len--) {
Packit c4476c
                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
Packit c4476c
                    ++n;
Packit c4476c
                }
Packit c4476c
# else
Packit c4476c
                while (len--) {
Packit c4476c
                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
Packit c4476c
                    ++n;
Packit c4476c
                }
Packit c4476c
                mres = n;
Packit c4476c
# endif
Packit c4476c
            }
Packit c4476c
Packit c4476c
            ctx->mres = mres;
Packit c4476c
            return 0;
Packit c4476c
        } while (0);
Packit c4476c
    }
Packit c4476c
#endif
Packit c4476c
    for (i = 0; i < len; ++i) {
Packit c4476c
        if (n == 0) {
Packit c4476c
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
            ++ctr;
Packit c4476c
            if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
                ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#else
Packit c4476c
                PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#endif
Packit c4476c
            else
Packit c4476c
                ctx->Yi.d[3] = ctr;
Packit c4476c
        }
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
Packit c4476c
        n = (n + 1) % 16;
Packit c4476c
        if (mres == sizeof(ctx->Xn)) {
Packit c4476c
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
Packit c4476c
            mres = 0;
Packit c4476c
        }
Packit c4476c
#else
Packit c4476c
        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
Packit c4476c
        mres = n = (n + 1) % 16;
Packit c4476c
        if (n == 0)
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->mres = mres;
Packit c4476c
    return 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
Packit c4476c
                          const unsigned char *in, unsigned char *out,
Packit c4476c
                          size_t len)
Packit c4476c
{
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    unsigned int n, ctr, mres;
Packit c4476c
    size_t i;
Packit c4476c
    u64 mlen = ctx->len.u[1];
Packit c4476c
    block128_f block = ctx->block;
Packit c4476c
    void *key = ctx->key;
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    mlen += len;
Packit c4476c
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
Packit c4476c
        return -1;
Packit c4476c
    ctx->len.u[1] = mlen;
Packit c4476c
Packit c4476c
    mres = ctx->mres;
Packit c4476c
Packit c4476c
    if (ctx->ares) {
Packit c4476c
        /* First call to decrypt finalizes GHASH(AAD) */
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
        if (len == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            ctx->ares = 0;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
Packit c4476c
        ctx->Xi.u[0] = 0;
Packit c4476c
        ctx->Xi.u[1] = 0;
Packit c4476c
        mres = sizeof(ctx->Xi);
Packit c4476c
#else
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
        ctx->ares = 0;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
        ctr = BSWAP4(ctx->Yi.d[3]);
Packit c4476c
#else
Packit c4476c
        ctr = GETU32(ctx->Yi.c + 12);
Packit c4476c
#endif
Packit c4476c
    else
Packit c4476c
        ctr = ctx->Yi.d[3];
Packit c4476c
Packit c4476c
    n = mres % 16;
Packit c4476c
#if !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    if (16 % sizeof(size_t) == 0) { /* always true actually */
Packit c4476c
        do {
Packit c4476c
            if (n) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
                while (n && len) {
Packit c4476c
                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
Packit c4476c
                    --len;
Packit c4476c
                    n = (n + 1) % 16;
Packit c4476c
                }
Packit c4476c
                if (n == 0) {
Packit c4476c
                    GHASH(ctx, ctx->Xn, mres);
Packit c4476c
                    mres = 0;
Packit c4476c
                } else {
Packit c4476c
                    ctx->mres = mres;
Packit c4476c
                    return 0;
Packit c4476c
                }
Packit c4476c
# else
Packit c4476c
                while (n && len) {
Packit c4476c
                    u8 c = *(in++);
Packit c4476c
                    *(out++) = c ^ ctx->EKi.c[n];
Packit c4476c
                    ctx->Xi.c[n] ^= c;
Packit c4476c
                    --len;
Packit c4476c
                    n = (n + 1) % 16;
Packit c4476c
                }
Packit c4476c
                if (n == 0) {
Packit c4476c
                    GCM_MUL(ctx);
Packit c4476c
                    mres = 0;
Packit c4476c
                } else {
Packit c4476c
                    ctx->mres = n;
Packit c4476c
                    return 0;
Packit c4476c
                }
Packit c4476c
# endif
Packit c4476c
            }
Packit c4476c
# if defined(STRICT_ALIGNMENT)
Packit c4476c
            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
Packit c4476c
                break;
Packit c4476c
# endif
Packit c4476c
# if defined(GHASH)
Packit c4476c
            if (len >= 16 && mres) {
Packit c4476c
                GHASH(ctx, ctx->Xn, mres);
Packit c4476c
                mres = 0;
Packit c4476c
            }
Packit c4476c
#  if defined(GHASH_CHUNK)
Packit c4476c
            while (len >= GHASH_CHUNK) {
Packit c4476c
                size_t j = GHASH_CHUNK;
Packit c4476c
Packit c4476c
                GHASH(ctx, in, GHASH_CHUNK);
Packit c4476c
                while (j) {
Packit c4476c
                    size_t *out_t = (size_t *)out;
Packit c4476c
                    const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                    ++ctr;
Packit c4476c
                    if (is_endian.little)
Packit c4476c
#   ifdef BSWAP4
Packit c4476c
                        ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#   else
Packit c4476c
                        PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#   endif
Packit c4476c
                    else
Packit c4476c
                        ctx->Yi.d[3] = ctr;
Packit c4476c
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
Packit c4476c
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
Packit c4476c
                    out += 16;
Packit c4476c
                    in += 16;
Packit c4476c
                    j -= 16;
Packit c4476c
                }
Packit c4476c
                len -= GHASH_CHUNK;
Packit c4476c
            }
Packit c4476c
#  endif
Packit c4476c
            if ((i = (len & (size_t)-16))) {
Packit c4476c
                GHASH(ctx, in, i);
Packit c4476c
                while (len >= 16) {
Packit c4476c
                    size_t *out_t = (size_t *)out;
Packit c4476c
                    const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                    ++ctr;
Packit c4476c
                    if (is_endian.little)
Packit c4476c
#  ifdef BSWAP4
Packit c4476c
                        ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#  else
Packit c4476c
                        PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#  endif
Packit c4476c
                    else
Packit c4476c
                        ctx->Yi.d[3] = ctr;
Packit c4476c
                    for (i = 0; i < 16 / sizeof(size_t); ++i)
Packit c4476c
                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
Packit c4476c
                    out += 16;
Packit c4476c
                    in += 16;
Packit c4476c
                    len -= 16;
Packit c4476c
                }
Packit c4476c
            }
Packit c4476c
# else
Packit c4476c
            while (len >= 16) {
Packit c4476c
                size_t *out_t = (size_t *)out;
Packit c4476c
                const size_t *in_t = (const size_t *)in;
Packit c4476c
Packit c4476c
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                ++ctr;
Packit c4476c
                if (is_endian.little)
Packit c4476c
#  ifdef BSWAP4
Packit c4476c
                    ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#  else
Packit c4476c
                    PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#  endif
Packit c4476c
                else
Packit c4476c
                    ctx->Yi.d[3] = ctr;
Packit c4476c
                for (i = 0; i < 16 / sizeof(size_t); ++i) {
Packit c4476c
                    size_t c = in[i];
Packit c4476c
                    out[i] = c ^ ctx->EKi.t[i];
Packit c4476c
                    ctx->Xi.t[i] ^= c;
Packit c4476c
                }
Packit c4476c
                GCM_MUL(ctx);
Packit c4476c
                out += 16;
Packit c4476c
                in += 16;
Packit c4476c
                len -= 16;
Packit c4476c
            }
Packit c4476c
# endif
Packit c4476c
            if (len) {
Packit c4476c
                (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
                ++ctr;
Packit c4476c
                if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
                    ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
                    PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
                else
Packit c4476c
                    ctx->Yi.d[3] = ctr;
Packit c4476c
# if defined(GHASH)
Packit c4476c
                while (len--) {
Packit c4476c
                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
Packit c4476c
                    ++n;
Packit c4476c
                }
Packit c4476c
# else
Packit c4476c
                while (len--) {
Packit c4476c
                    u8 c = in[n];
Packit c4476c
                    ctx->Xi.c[n] ^= c;
Packit c4476c
                    out[n] = c ^ ctx->EKi.c[n];
Packit c4476c
                    ++n;
Packit c4476c
                }
Packit c4476c
                mres = n;
Packit c4476c
# endif
Packit c4476c
            }
Packit c4476c
Packit c4476c
            ctx->mres = mres;
Packit c4476c
            return 0;
Packit c4476c
        } while (0);
Packit c4476c
    }
Packit c4476c
#endif
Packit c4476c
    for (i = 0; i < len; ++i) {
Packit c4476c
        u8 c;
Packit c4476c
        if (n == 0) {
Packit c4476c
            (*block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
            ++ctr;
Packit c4476c
            if (is_endian.little)
Packit c4476c
#ifdef BSWAP4
Packit c4476c
                ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#else
Packit c4476c
                PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#endif
Packit c4476c
            else
Packit c4476c
                ctx->Yi.d[3] = ctr;
Packit c4476c
        }
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
Packit c4476c
        n = (n + 1) % 16;
Packit c4476c
        if (mres == sizeof(ctx->Xn)) {
Packit c4476c
            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
Packit c4476c
            mres = 0;
Packit c4476c
        }
Packit c4476c
#else
Packit c4476c
        c = in[i];
Packit c4476c
        out[i] = c ^ ctx->EKi.c[n];
Packit c4476c
        ctx->Xi.c[n] ^= c;
Packit c4476c
        mres = n = (n + 1) % 16;
Packit c4476c
        if (n == 0)
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->mres = mres;
Packit c4476c
    return 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
Packit c4476c
                                const unsigned char *in, unsigned char *out,
Packit c4476c
                                size_t len, ctr128_f stream)
Packit c4476c
{
Packit c4476c
#if defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
Packit c4476c
#else
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    unsigned int n, ctr, mres;
Packit c4476c
    size_t i;
Packit c4476c
    u64 mlen = ctx->len.u[1];
Packit c4476c
    void *key = ctx->key;
Packit c4476c
# ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
#  ifdef GHASH
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
#  endif
Packit c4476c
# endif
Packit c4476c
Packit c4476c
    mlen += len;
Packit c4476c
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
Packit c4476c
        return -1;
Packit c4476c
    ctx->len.u[1] = mlen;
Packit c4476c
Packit c4476c
    mres = ctx->mres;
Packit c4476c
Packit c4476c
    if (ctx->ares) {
Packit c4476c
        /* First call to encrypt finalizes GHASH(AAD) */
Packit c4476c
#if defined(GHASH)
Packit c4476c
        if (len == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            ctx->ares = 0;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
Packit c4476c
        ctx->Xi.u[0] = 0;
Packit c4476c
        ctx->Xi.u[1] = 0;
Packit c4476c
        mres = sizeof(ctx->Xi);
Packit c4476c
#else
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
        ctx->ares = 0;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
        ctr = BSWAP4(ctx->Yi.d[3]);
Packit c4476c
# else
Packit c4476c
        ctr = GETU32(ctx->Yi.c + 12);
Packit c4476c
# endif
Packit c4476c
    else
Packit c4476c
        ctr = ctx->Yi.d[3];
Packit c4476c
Packit c4476c
    n = mres % 16;
Packit c4476c
    if (n) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
        while (n && len) {
Packit c4476c
            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
Packit c4476c
            --len;
Packit c4476c
            n = (n + 1) % 16;
Packit c4476c
        }
Packit c4476c
        if (n == 0) {
Packit c4476c
            GHASH(ctx, ctx->Xn, mres);
Packit c4476c
            mres = 0;
Packit c4476c
        } else {
Packit c4476c
            ctx->mres = mres;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
# else
Packit c4476c
        while (n && len) {
Packit c4476c
            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
Packit c4476c
            --len;
Packit c4476c
            n = (n + 1) % 16;
Packit c4476c
        }
Packit c4476c
        if (n == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            mres = 0;
Packit c4476c
        } else {
Packit c4476c
            ctx->mres = n;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
# endif
Packit c4476c
    }
Packit c4476c
# if defined(GHASH)
Packit c4476c
        if (len >= 16 && mres) {
Packit c4476c
            GHASH(ctx, ctx->Xn, mres);
Packit c4476c
            mres = 0;
Packit c4476c
        }
Packit c4476c
#  if defined(GHASH_CHUNK)
Packit c4476c
    while (len >= GHASH_CHUNK) {
Packit c4476c
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
Packit c4476c
        ctr += GHASH_CHUNK / 16;
Packit c4476c
        if (is_endian.little)
Packit c4476c
#   ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#   else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#   endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        GHASH(ctx, out, GHASH_CHUNK);
Packit c4476c
        out += GHASH_CHUNK;
Packit c4476c
        in += GHASH_CHUNK;
Packit c4476c
        len -= GHASH_CHUNK;
Packit c4476c
    }
Packit c4476c
#  endif
Packit c4476c
# endif
Packit c4476c
    if ((i = (len & (size_t)-16))) {
Packit c4476c
        size_t j = i / 16;
Packit c4476c
Packit c4476c
        (*stream) (in, out, j, key, ctx->Yi.c);
Packit c4476c
        ctr += (unsigned int)j;
Packit c4476c
        if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        in += i;
Packit c4476c
        len -= i;
Packit c4476c
# if defined(GHASH)
Packit c4476c
        GHASH(ctx, out, i);
Packit c4476c
        out += i;
Packit c4476c
# else
Packit c4476c
        while (j--) {
Packit c4476c
            for (i = 0; i < 16; ++i)
Packit c4476c
                ctx->Xi.c[i] ^= out[i];
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            out += 16;
Packit c4476c
        }
Packit c4476c
# endif
Packit c4476c
    }
Packit c4476c
    if (len) {
Packit c4476c
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
        ++ctr;
Packit c4476c
        if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        while (len--) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
Packit c4476c
# else
Packit c4476c
            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
Packit c4476c
# endif
Packit c4476c
            ++n;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->mres = mres;
Packit c4476c
    return 0;
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
Packit c4476c
                                const unsigned char *in, unsigned char *out,
Packit c4476c
                                size_t len, ctr128_f stream)
Packit c4476c
{
Packit c4476c
#if defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
Packit c4476c
#else
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    unsigned int n, ctr, mres;
Packit c4476c
    size_t i;
Packit c4476c
    u64 mlen = ctx->len.u[1];
Packit c4476c
    void *key = ctx->key;
Packit c4476c
# ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
#  ifdef GHASH
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
#  endif
Packit c4476c
# endif
Packit c4476c
Packit c4476c
    mlen += len;
Packit c4476c
    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
Packit c4476c
        return -1;
Packit c4476c
    ctx->len.u[1] = mlen;
Packit c4476c
Packit c4476c
    mres = ctx->mres;
Packit c4476c
Packit c4476c
    if (ctx->ares) {
Packit c4476c
        /* First call to decrypt finalizes GHASH(AAD) */
Packit c4476c
# if defined(GHASH)
Packit c4476c
        if (len == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            ctx->ares = 0;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
Packit c4476c
        ctx->Xi.u[0] = 0;
Packit c4476c
        ctx->Xi.u[1] = 0;
Packit c4476c
        mres = sizeof(ctx->Xi);
Packit c4476c
# else
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
# endif
Packit c4476c
        ctx->ares = 0;
Packit c4476c
    }
Packit c4476c
Packit c4476c
    if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
        ctr = BSWAP4(ctx->Yi.d[3]);
Packit c4476c
# else
Packit c4476c
        ctr = GETU32(ctx->Yi.c + 12);
Packit c4476c
# endif
Packit c4476c
    else
Packit c4476c
        ctr = ctx->Yi.d[3];
Packit c4476c
Packit c4476c
    n = mres % 16;
Packit c4476c
    if (n) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
        while (n && len) {
Packit c4476c
            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
Packit c4476c
            --len;
Packit c4476c
            n = (n + 1) % 16;
Packit c4476c
        }
Packit c4476c
        if (n == 0) {
Packit c4476c
            GHASH(ctx, ctx->Xn, mres);
Packit c4476c
            mres = 0;
Packit c4476c
        } else {
Packit c4476c
            ctx->mres = mres;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
# else
Packit c4476c
        while (n && len) {
Packit c4476c
            u8 c = *(in++);
Packit c4476c
            *(out++) = c ^ ctx->EKi.c[n];
Packit c4476c
            ctx->Xi.c[n] ^= c;
Packit c4476c
            --len;
Packit c4476c
            n = (n + 1) % 16;
Packit c4476c
        }
Packit c4476c
        if (n == 0) {
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            mres = 0;
Packit c4476c
        } else {
Packit c4476c
            ctx->mres = n;
Packit c4476c
            return 0;
Packit c4476c
        }
Packit c4476c
# endif
Packit c4476c
    }
Packit c4476c
# if defined(GHASH)
Packit c4476c
    if (len >= 16 && mres) {
Packit c4476c
        GHASH(ctx, ctx->Xn, mres);
Packit c4476c
        mres = 0;
Packit c4476c
    }
Packit c4476c
#  if defined(GHASH_CHUNK)
Packit c4476c
    while (len >= GHASH_CHUNK) {
Packit c4476c
        GHASH(ctx, in, GHASH_CHUNK);
Packit c4476c
        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
Packit c4476c
        ctr += GHASH_CHUNK / 16;
Packit c4476c
        if (is_endian.little)
Packit c4476c
#   ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
#   else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
#   endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        out += GHASH_CHUNK;
Packit c4476c
        in += GHASH_CHUNK;
Packit c4476c
        len -= GHASH_CHUNK;
Packit c4476c
    }
Packit c4476c
#  endif
Packit c4476c
# endif
Packit c4476c
    if ((i = (len & (size_t)-16))) {
Packit c4476c
        size_t j = i / 16;
Packit c4476c
Packit c4476c
# if defined(GHASH)
Packit c4476c
        GHASH(ctx, in, i);
Packit c4476c
# else
Packit c4476c
        while (j--) {
Packit c4476c
            size_t k;
Packit c4476c
            for (k = 0; k < 16; ++k)
Packit c4476c
                ctx->Xi.c[k] ^= in[k];
Packit c4476c
            GCM_MUL(ctx);
Packit c4476c
            in += 16;
Packit c4476c
        }
Packit c4476c
        j = i / 16;
Packit c4476c
        in -= i;
Packit c4476c
# endif
Packit c4476c
        (*stream) (in, out, j, key, ctx->Yi.c);
Packit c4476c
        ctr += (unsigned int)j;
Packit c4476c
        if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        out += i;
Packit c4476c
        in += i;
Packit c4476c
        len -= i;
Packit c4476c
    }
Packit c4476c
    if (len) {
Packit c4476c
        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
Packit c4476c
        ++ctr;
Packit c4476c
        if (is_endian.little)
Packit c4476c
# ifdef BSWAP4
Packit c4476c
            ctx->Yi.d[3] = BSWAP4(ctr);
Packit c4476c
# else
Packit c4476c
            PUTU32(ctx->Yi.c + 12, ctr);
Packit c4476c
# endif
Packit c4476c
        else
Packit c4476c
            ctx->Yi.d[3] = ctr;
Packit c4476c
        while (len--) {
Packit c4476c
# if defined(GHASH)
Packit c4476c
            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
Packit c4476c
# else
Packit c4476c
            u8 c = in[n];
Packit c4476c
            ctx->Xi.c[mres++] ^= c;
Packit c4476c
            out[n] = c ^ ctx->EKi.c[n];
Packit c4476c
# endif
Packit c4476c
            ++n;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
Packit c4476c
    ctx->mres = mres;
Packit c4476c
    return 0;
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
Packit c4476c
                         size_t len)
Packit c4476c
{
Packit c4476c
    const union {
Packit c4476c
        long one;
Packit c4476c
        char little;
Packit c4476c
    } is_endian = { 1 };
Packit c4476c
    u64 alen = ctx->len.u[0] << 3;
Packit c4476c
    u64 clen = ctx->len.u[1] << 3;
Packit c4476c
#ifdef GCM_FUNCREF_4BIT
Packit c4476c
    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
Packit c4476c
# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
Packit c4476c
                         const u8 *inp, size_t len) = ctx->ghash;
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    u128 bitlen;
Packit c4476c
    unsigned int mres = ctx->mres;
Packit c4476c
Packit c4476c
    if (mres) {
Packit c4476c
        unsigned blocks = (mres + 15) & -16;
Packit c4476c
Packit c4476c
        memset(ctx->Xn + mres, 0, blocks - mres);
Packit c4476c
        mres = blocks;
Packit c4476c
        if (mres == sizeof(ctx->Xn)) {
Packit c4476c
            GHASH(ctx, ctx->Xn, mres);
Packit c4476c
            mres = 0;
Packit c4476c
        }
Packit c4476c
    } else if (ctx->ares) {
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
    }
Packit c4476c
#else
Packit c4476c
    if (ctx->mres || ctx->ares)
Packit c4476c
        GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    if (is_endian.little) {
Packit c4476c
#ifdef BSWAP8
Packit c4476c
        alen = BSWAP8(alen);
Packit c4476c
        clen = BSWAP8(clen);
Packit c4476c
#else
Packit c4476c
        u8 *p = ctx->len.c;
Packit c4476c
Packit c4476c
        ctx->len.u[0] = alen;
Packit c4476c
        ctx->len.u[1] = clen;
Packit c4476c
Packit c4476c
        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
Packit c4476c
        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
Packit c4476c
#endif
Packit c4476c
    }
Packit c4476c
Packit c4476c
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
Packit c4476c
    bitlen.hi = alen;
Packit c4476c
    bitlen.lo = clen;
Packit c4476c
    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
Packit c4476c
    mres += sizeof(bitlen);
Packit c4476c
    GHASH(ctx, ctx->Xn, mres);
Packit c4476c
#else
Packit c4476c
    ctx->Xi.u[0] ^= alen;
Packit c4476c
    ctx->Xi.u[1] ^= clen;
Packit c4476c
    GCM_MUL(ctx);
Packit c4476c
#endif
Packit c4476c
Packit c4476c
    ctx->Xi.u[0] ^= ctx->EK0.u[0];
Packit c4476c
    ctx->Xi.u[1] ^= ctx->EK0.u[1];
Packit c4476c
Packit c4476c
    if (tag && len <= sizeof(ctx->Xi))
Packit c4476c
        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
Packit c4476c
    else
Packit c4476c
        return -1;
Packit c4476c
}
Packit c4476c
Packit c4476c
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
Packit c4476c
{
Packit c4476c
    CRYPTO_gcm128_finish(ctx, NULL, 0);
Packit c4476c
    memcpy(tag, ctx->Xi.c,
Packit c4476c
           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
Packit c4476c
}
Packit c4476c
Packit c4476c
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
Packit c4476c
{
Packit c4476c
    GCM128_CONTEXT *ret;
Packit c4476c
Packit c4476c
    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
Packit c4476c
        CRYPTO_gcm128_init(ret, key, block);
Packit c4476c
Packit c4476c
    return ret;
Packit c4476c
}
Packit c4476c
Packit c4476c
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
Packit c4476c
{
Packit c4476c
    OPENSSL_clear_free(ctx, sizeof(*ctx));
Packit c4476c
}