Blame crypto/aes/aes_x86core.c

Packit c4476c
/*
Packit c4476c
 * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
 *
Packit c4476c
 * Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
 * this file except in compliance with the License.  You can obtain a copy
Packit c4476c
 * in the file LICENSE in the source distribution or at
Packit c4476c
 * https://www.openssl.org/source/license.html
Packit c4476c
 */
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * This is experimental x86[_64] derivative. It assumes little-endian
Packit c4476c
 * byte order and expects CPU to sustain unaligned memory references.
Packit c4476c
 * It is used as playground for cache-time attack mitigations and
Packit c4476c
 * serves as reference C implementation for x86[_64] as well as some
Packit c4476c
 * other assembly modules.
Packit c4476c
 */
Packit c4476c
Packit c4476c
/**
Packit c4476c
 * rijndael-alg-fst.c
Packit c4476c
 *
Packit c4476c
 * @version 3.0 (December 2000)
Packit c4476c
 *
Packit c4476c
 * Optimised ANSI C code for the Rijndael cipher (now AES)
Packit c4476c
 *
Packit c4476c
 * @author Vincent Rijmen
Packit c4476c
 * @author Antoon Bosselaers
Packit c4476c
 * @author Paulo Barreto
Packit c4476c
 *
Packit c4476c
 * This code is hereby placed in the public domain.
Packit c4476c
 *
Packit c4476c
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
Packit c4476c
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
Packit c4476c
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit c4476c
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
Packit c4476c
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit c4476c
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit c4476c
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
Packit c4476c
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
Packit c4476c
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
Packit c4476c
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
Packit c4476c
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit c4476c
 */
Packit c4476c
Packit c4476c
Packit c4476c
#include <assert.h>
Packit c4476c
Packit c4476c
#include <stdlib.h>
Packit c4476c
#include <openssl/aes.h>
Packit c4476c
#include "aes_local.h"
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * These two parameters control which table, 256-byte or 2KB, is
Packit c4476c
 * referenced in outer and respectively inner rounds.
Packit c4476c
 */
Packit c4476c
#define AES_COMPACT_IN_OUTER_ROUNDS
Packit c4476c
#ifdef  AES_COMPACT_IN_OUTER_ROUNDS
Packit c4476c
/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
Packit c4476c
 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
Packit c4476c
 * by factor of ~2. */
Packit c4476c
# undef  AES_COMPACT_IN_INNER_ROUNDS
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#if 1
Packit c4476c
static void prefetch256(const void *table)
Packit c4476c
{
Packit c4476c
    volatile unsigned long *t=(void *)table,ret;
Packit c4476c
    unsigned long sum;
Packit c4476c
    int i;
Packit c4476c
Packit c4476c
    /* 32 is common least cache-line size */
Packit c4476c
    for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0]))   sum ^= t[i];
Packit c4476c
Packit c4476c
    ret = sum;
Packit c4476c
}
Packit c4476c
#else
Packit c4476c
# define prefetch256(t)
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#undef GETU32
Packit c4476c
#define GETU32(p) (*((u32*)(p)))
Packit c4476c
Packit c4476c
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
Packit c4476c
typedef unsigned __int64 u64;
Packit c4476c
#define U64(C)  C##UI64
Packit c4476c
#elif defined(__arch64__)
Packit c4476c
typedef unsigned long u64;
Packit c4476c
#define U64(C)  C##UL
Packit c4476c
#else
Packit c4476c
typedef unsigned long long u64;
Packit c4476c
#define U64(C)  C##ULL
Packit c4476c
#endif
Packit c4476c
Packit c4476c
#undef ROTATE
Packit c4476c
#if defined(_MSC_VER)
Packit c4476c
# define ROTATE(a,n)    _lrotl(a,n)
Packit c4476c
#elif defined(__ICC)
Packit c4476c
# define ROTATE(a,n)    _rotl(a,n)
Packit c4476c
#elif defined(__GNUC__) && __GNUC__>=2
Packit c4476c
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
Packit c4476c
#   define ROTATE(a,n)  ({ register unsigned int ret;   \
Packit c4476c
                asm (           \
Packit c4476c
                "roll %1,%0"        \
Packit c4476c
                : "=r"(ret)     \
Packit c4476c
                : "I"(n), "0"(a)    \
Packit c4476c
                : "cc");        \
Packit c4476c
               ret;             \
Packit c4476c
            })
Packit c4476c
# endif
Packit c4476c
#endif
Packit c4476c
/*-
Packit c4476c
Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
Packit c4476c
Te0[x] = S [x].[02, 01, 01, 03];
Packit c4476c
Te1[x] = S [x].[03, 02, 01, 01];
Packit c4476c
Te2[x] = S [x].[01, 03, 02, 01];
Packit c4476c
Te3[x] = S [x].[01, 01, 03, 02];
Packit c4476c
*/
Packit c4476c
#define Te0 (u32)((u64*)((u8*)Te+0))
Packit c4476c
#define Te1 (u32)((u64*)((u8*)Te+3))
Packit c4476c
#define Te2 (u32)((u64*)((u8*)Te+2))
Packit c4476c
#define Te3 (u32)((u64*)((u8*)Te+1))
Packit c4476c
/*-
Packit c4476c
Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
Packit c4476c
Td0[x] = Si[x].[0e, 09, 0d, 0b];
Packit c4476c
Td1[x] = Si[x].[0b, 0e, 09, 0d];
Packit c4476c
Td2[x] = Si[x].[0d, 0b, 0e, 09];
Packit c4476c
Td3[x] = Si[x].[09, 0d, 0b, 0e];
Packit c4476c
Td4[x] = Si[x].[01];
Packit c4476c
*/
Packit c4476c
#define Td0 (u32)((u64*)((u8*)Td+0))
Packit c4476c
#define Td1 (u32)((u64*)((u8*)Td+3))
Packit c4476c
#define Td2 (u32)((u64*)((u8*)Td+2))
Packit c4476c
#define Td3 (u32)((u64*)((u8*)Td+1))
Packit c4476c
Packit c4476c
static const u64 Te[256] = {
Packit c4476c
    U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
Packit c4476c
    U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
Packit c4476c
    U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
Packit c4476c
    U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
Packit c4476c
    U64(0x5030306050303060), U64(0x0301010203010102),
Packit c4476c
    U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
Packit c4476c
    U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
Packit c4476c
    U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
Packit c4476c
    U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
Packit c4476c
    U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
Packit c4476c
    U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
Packit c4476c
    U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
Packit c4476c
    U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
Packit c4476c
    U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
Packit c4476c
    U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
Packit c4476c
    U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
Packit c4476c
    U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
Packit c4476c
    U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
Packit c4476c
    U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
Packit c4476c
    U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
Packit c4476c
    U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
Packit c4476c
    U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
Packit c4476c
    U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
Packit c4476c
    U64(0x5331316253313162), U64(0x3f15152a3f15152a),
Packit c4476c
    U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
Packit c4476c
    U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
Packit c4476c
    U64(0x2818183028181830), U64(0xa1969637a1969637),
Packit c4476c
    U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
Packit c4476c
    U64(0x0907070e0907070e), U64(0x3612122436121224),
Packit c4476c
    U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
Packit c4476c
    U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
Packit c4476c
    U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
Packit c4476c
    U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
Packit c4476c
    U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
Packit c4476c
    U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
Packit c4476c
    U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
Packit c4476c
    U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
Packit c4476c
    U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
Packit c4476c
    U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
Packit c4476c
    U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
Packit c4476c
    U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
Packit c4476c
    U64(0x0000000000000000), U64(0x2cededc12cededc1),
Packit c4476c
    U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
Packit c4476c
    U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
Packit c4476c
    U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
Packit c4476c
    U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
Packit c4476c
    U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
Packit c4476c
    U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
Packit c4476c
    U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
Packit c4476c
    U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
Packit c4476c
    U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
Packit c4476c
    U64(0x5533336655333366), U64(0x9485851194858511),
Packit c4476c
    U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
Packit c4476c
    U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
Packit c4476c
    U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
Packit c4476c
    U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
Packit c4476c
    U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
Packit c4476c
    U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
Packit c4476c
    U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
Packit c4476c
    U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
Packit c4476c
    U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
Packit c4476c
    U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
Packit c4476c
    U64(0x3010102030101020), U64(0x1affffe51affffe5),
Packit c4476c
    U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
Packit c4476c
    U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
Packit c4476c
    U64(0x3513132635131326), U64(0x2fececc32fececc3),
Packit c4476c
    U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
Packit c4476c
    U64(0xcc444488cc444488), U64(0x3917172e3917172e),
Packit c4476c
    U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
Packit c4476c
    U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
Packit c4476c
    U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
Packit c4476c
    U64(0x2b1919322b191932), U64(0x957373e6957373e6),
Packit c4476c
    U64(0xa06060c0a06060c0), U64(0x9881811998818119),
Packit c4476c
    U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
Packit c4476c
    U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
Packit c4476c
    U64(0xab90903bab90903b), U64(0x8388880b8388880b),
Packit c4476c
    U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
Packit c4476c
    U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
Packit c4476c
    U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
Packit c4476c
    U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
Packit c4476c
    U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
Packit c4476c
    U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
Packit c4476c
    U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
Packit c4476c
    U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
Packit c4476c
    U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
Packit c4476c
    U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
Packit c4476c
    U64(0xa8919139a8919139), U64(0xa4959531a4959531),
Packit c4476c
    U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
Packit c4476c
    U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
Packit c4476c
    U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
Packit c4476c
    U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
Packit c4476c
    U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
Packit c4476c
    U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
Packit c4476c
    U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
Packit c4476c
    U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
Packit c4476c
    U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
Packit c4476c
    U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
Packit c4476c
    U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
Packit c4476c
    U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
Packit c4476c
    U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
Packit c4476c
    U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
Packit c4476c
    U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
Packit c4476c
    U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
Packit c4476c
    U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
Packit c4476c
    U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
Packit c4476c
    U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
Packit c4476c
    U64(0xd8484890d8484890), U64(0x0503030605030306),
Packit c4476c
    U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
Packit c4476c
    U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
Packit c4476c
    U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
Packit c4476c
    U64(0x9186861791868617), U64(0x58c1c19958c1c199),
Packit c4476c
    U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
Packit c4476c
    U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
Packit c4476c
    U64(0xb398982bb398982b), U64(0x3311112233111122),
Packit c4476c
    U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
Packit c4476c
    U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
Packit c4476c
    U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
Packit c4476c
    U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
Packit c4476c
    U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
Packit c4476c
    U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
Packit c4476c
    U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
Packit c4476c
    U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
Packit c4476c
    U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
Packit c4476c
    U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
Packit c4476c
    U64(0xc3414182c3414182), U64(0xb0999929b0999929),
Packit c4476c
    U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
Packit c4476c
    U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
Packit c4476c
    U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
Packit c4476c
};
Packit c4476c
Packit c4476c
static const u8 Te4[256] = {
Packit c4476c
    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
Packit c4476c
    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
Packit c4476c
    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
Packit c4476c
    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
Packit c4476c
    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
Packit c4476c
    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
Packit c4476c
    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
Packit c4476c
    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
Packit c4476c
    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
Packit c4476c
    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
Packit c4476c
    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
Packit c4476c
    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
Packit c4476c
    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
Packit c4476c
    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
Packit c4476c
    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
Packit c4476c
    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
Packit c4476c
    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
Packit c4476c
    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
Packit c4476c
    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
Packit c4476c
    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
Packit c4476c
    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
Packit c4476c
    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
Packit c4476c
    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
Packit c4476c
    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
Packit c4476c
    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
Packit c4476c
    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
Packit c4476c
    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
Packit c4476c
    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
Packit c4476c
    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
Packit c4476c
    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
Packit c4476c
    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
Packit c4476c
    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
Packit c4476c
};
Packit c4476c
Packit c4476c
static const u64 Td[256] = {
Packit c4476c
    U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
Packit c4476c
    U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
Packit c4476c
    U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
Packit c4476c
    U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
Packit c4476c
    U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
Packit c4476c
    U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
Packit c4476c
    U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
Packit c4476c
    U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
Packit c4476c
    U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
Packit c4476c
    U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
Packit c4476c
    U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
Packit c4476c
    U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
Packit c4476c
    U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
Packit c4476c
    U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
Packit c4476c
    U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
Packit c4476c
    U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
Packit c4476c
    U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
Packit c4476c
    U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
Packit c4476c
    U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
Packit c4476c
    U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
Packit c4476c
    U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
Packit c4476c
    U64(0x6033519760335197), U64(0x457f5362457f5362),
Packit c4476c
    U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
Packit c4476c
    U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
Packit c4476c
    U64(0x5868487058684870), U64(0x19fd458f19fd458f),
Packit c4476c
    U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
Packit c4476c
    U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
Packit c4476c
    U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
Packit c4476c
    U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
Packit c4476c
    U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
Packit c4476c
    U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
Packit c4476c
    U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
Packit c4476c
    U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
Packit c4476c
    U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
Packit c4476c
    U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
Packit c4476c
    U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
Packit c4476c
    U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
Packit c4476c
    U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
Packit c4476c
    U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
Packit c4476c
    U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
Packit c4476c
    U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
Packit c4476c
    U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
Packit c4476c
    U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
Packit c4476c
    U64(0x6fd406046fd40604), U64(0xff155060ff155060),
Packit c4476c
    U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
Packit c4476c
    U64(0xcc434089cc434089), U64(0x779ed967779ed967),
Packit c4476c
    U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
Packit c4476c
    U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
Packit c4476c
    U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
Packit c4476c
    U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
Packit c4476c
    U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
Packit c4476c
    U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
Packit c4476c
    U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
Packit c4476c
    U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
Packit c4476c
    U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
Packit c4476c
    U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
Packit c4476c
    U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
Packit c4476c
    U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
Packit c4476c
    U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
Packit c4476c
    U64(0x694b775a694b775a), U64(0x161a121c161a121c),
Packit c4476c
    U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
Packit c4476c
    U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
Packit c4476c
    U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
Packit c4476c
    U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
Packit c4476c
    U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
Packit c4476c
    U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
Packit c4476c
    U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
Packit c4476c
    U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
Packit c4476c
    U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
Packit c4476c
    U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
Packit c4476c
    U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
Packit c4476c
    U64(0x4022971340229713), U64(0x2011c6842011c684),
Packit c4476c
    U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
Packit c4476c
    U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
Packit c4476c
    U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
Packit c4476c
    U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
Packit c4476c
    U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
Packit c4476c
    U64(0xfa489411fa489411), U64(0x2264e9472264e947),
Packit c4476c
    U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
Packit c4476c
    U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
Packit c4476c
    U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
Packit c4476c
    U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
Packit c4476c
    U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
Packit c4476c
    U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
Packit c4476c
    U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
Packit c4476c
    U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
Packit c4476c
    U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
Packit c4476c
    U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
Packit c4476c
    U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
Packit c4476c
    U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
Packit c4476c
    U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
Packit c4476c
    U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
Packit c4476c
    U64(0x097826cd097826cd), U64(0xf418596ef418596e),
Packit c4476c
    U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
Packit c4476c
    U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
Packit c4476c
    U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
Packit c4476c
    U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
Packit c4476c
    U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
Packit c4476c
    U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
Packit c4476c
    U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
Packit c4476c
    U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
Packit c4476c
    U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
Packit c4476c
    U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
Packit c4476c
    U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
Packit c4476c
    U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
Packit c4476c
    U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
Packit c4476c
    U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
Packit c4476c
    U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
Packit c4476c
    U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
Packit c4476c
    U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
Packit c4476c
    U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
Packit c4476c
    U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
Packit c4476c
    U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
Packit c4476c
    U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
Packit c4476c
    U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
Packit c4476c
    U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
Packit c4476c
    U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
Packit c4476c
    U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
Packit c4476c
    U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
Packit c4476c
    U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
Packit c4476c
    U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
Packit c4476c
    U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
Packit c4476c
    U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
Packit c4476c
    U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
Packit c4476c
    U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
Packit c4476c
    U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
Packit c4476c
    U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
Packit c4476c
    U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
Packit c4476c
};
Packit c4476c
static const u8 Td4[256] = {
Packit c4476c
    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
Packit c4476c
    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
Packit c4476c
    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
Packit c4476c
    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
Packit c4476c
    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
Packit c4476c
    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
Packit c4476c
    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
Packit c4476c
    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
Packit c4476c
    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
Packit c4476c
    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
Packit c4476c
    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
Packit c4476c
    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
Packit c4476c
    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
Packit c4476c
    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
Packit c4476c
    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
Packit c4476c
    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
Packit c4476c
    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
Packit c4476c
    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
Packit c4476c
    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
Packit c4476c
    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
Packit c4476c
    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
Packit c4476c
    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
Packit c4476c
    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
Packit c4476c
    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
Packit c4476c
    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
Packit c4476c
    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
Packit c4476c
    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
Packit c4476c
    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
Packit c4476c
    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
Packit c4476c
    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
Packit c4476c
    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
Packit c4476c
    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
Packit c4476c
};
Packit c4476c
Packit c4476c
static const u32 rcon[] = {
Packit c4476c
    0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
Packit c4476c
    0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
Packit c4476c
    0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
Packit c4476c
};
Packit c4476c
Packit c4476c
/**
Packit c4476c
 * Expand the cipher key into the encryption key schedule.
Packit c4476c
 */
Packit c4476c
int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
Packit c4476c
                        AES_KEY *key)
Packit c4476c
{
Packit c4476c
Packit c4476c
    u32 *rk;
Packit c4476c
    int i = 0;
Packit c4476c
    u32 temp;
Packit c4476c
Packit c4476c
    if (!userKey || !key)
Packit c4476c
        return -1;
Packit c4476c
    if (bits != 128 && bits != 192 && bits != 256)
Packit c4476c
        return -2;
Packit c4476c
Packit c4476c
    rk = key->rd_key;
Packit c4476c
Packit c4476c
    if (bits==128)
Packit c4476c
        key->rounds = 10;
Packit c4476c
    else if (bits==192)
Packit c4476c
        key->rounds = 12;
Packit c4476c
    else
Packit c4476c
        key->rounds = 14;
Packit c4476c
Packit c4476c
    rk[0] = GETU32(userKey     );
Packit c4476c
    rk[1] = GETU32(userKey +  4);
Packit c4476c
    rk[2] = GETU32(userKey +  8);
Packit c4476c
    rk[3] = GETU32(userKey + 12);
Packit c4476c
    if (bits == 128) {
Packit c4476c
        while (1) {
Packit c4476c
            temp  = rk[3];
Packit c4476c
            rk[4] = rk[0] ^
Packit c4476c
                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
Packit c4476c
                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
Packit c4476c
                ((u32)Te4[(temp >> 24)       ] << 16) ^
Packit c4476c
                ((u32)Te4[(temp      ) & 0xff] << 24) ^
Packit c4476c
                rcon[i];
Packit c4476c
            rk[5] = rk[1] ^ rk[4];
Packit c4476c
            rk[6] = rk[2] ^ rk[5];
Packit c4476c
            rk[7] = rk[3] ^ rk[6];
Packit c4476c
            if (++i == 10) {
Packit c4476c
                return 0;
Packit c4476c
            }
Packit c4476c
            rk += 4;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    rk[4] = GETU32(userKey + 16);
Packit c4476c
    rk[5] = GETU32(userKey + 20);
Packit c4476c
    if (bits == 192) {
Packit c4476c
        while (1) {
Packit c4476c
            temp = rk[ 5];
Packit c4476c
            rk[ 6] = rk[ 0] ^
Packit c4476c
                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
Packit c4476c
                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
Packit c4476c
                ((u32)Te4[(temp >> 24)       ] << 16) ^
Packit c4476c
                ((u32)Te4[(temp      ) & 0xff] << 24) ^
Packit c4476c
                rcon[i];
Packit c4476c
            rk[ 7] = rk[ 1] ^ rk[ 6];
Packit c4476c
            rk[ 8] = rk[ 2] ^ rk[ 7];
Packit c4476c
            rk[ 9] = rk[ 3] ^ rk[ 8];
Packit c4476c
            if (++i == 8) {
Packit c4476c
                return 0;
Packit c4476c
            }
Packit c4476c
            rk[10] = rk[ 4] ^ rk[ 9];
Packit c4476c
            rk[11] = rk[ 5] ^ rk[10];
Packit c4476c
            rk += 6;
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
    rk[6] = GETU32(userKey + 24);
Packit c4476c
    rk[7] = GETU32(userKey + 28);
Packit c4476c
    if (bits == 256) {
Packit c4476c
        while (1) {
Packit c4476c
            temp = rk[ 7];
Packit c4476c
            rk[ 8] = rk[ 0] ^
Packit c4476c
                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
Packit c4476c
                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
Packit c4476c
                ((u32)Te4[(temp >> 24)       ] << 16) ^
Packit c4476c
                ((u32)Te4[(temp      ) & 0xff] << 24) ^
Packit c4476c
                rcon[i];
Packit c4476c
            rk[ 9] = rk[ 1] ^ rk[ 8];
Packit c4476c
            rk[10] = rk[ 2] ^ rk[ 9];
Packit c4476c
            rk[11] = rk[ 3] ^ rk[10];
Packit c4476c
            if (++i == 7) {
Packit c4476c
                return 0;
Packit c4476c
            }
Packit c4476c
            temp = rk[11];
Packit c4476c
            rk[12] = rk[ 4] ^
Packit c4476c
                ((u32)Te4[(temp      ) & 0xff]      ) ^
Packit c4476c
                ((u32)Te4[(temp >>  8) & 0xff] <<  8) ^
Packit c4476c
                ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
Packit c4476c
                ((u32)Te4[(temp >> 24)       ] << 24);
Packit c4476c
            rk[13] = rk[ 5] ^ rk[12];
Packit c4476c
            rk[14] = rk[ 6] ^ rk[13];
Packit c4476c
            rk[15] = rk[ 7] ^ rk[14];
Packit c4476c
Packit c4476c
            rk += 8;
Packit c4476c
            }
Packit c4476c
    }
Packit c4476c
    return 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
/**
Packit c4476c
 * Expand the cipher key into the decryption key schedule.
Packit c4476c
 */
Packit c4476c
int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
Packit c4476c
                        AES_KEY *key)
Packit c4476c
{
Packit c4476c
Packit c4476c
    u32 *rk;
Packit c4476c
    int i, j, status;
Packit c4476c
    u32 temp;
Packit c4476c
Packit c4476c
    /* first, start with an encryption schedule */
Packit c4476c
    status = AES_set_encrypt_key(userKey, bits, key);
Packit c4476c
    if (status < 0)
Packit c4476c
        return status;
Packit c4476c
Packit c4476c
    rk = key->rd_key;
Packit c4476c
Packit c4476c
    /* invert the order of the round keys: */
Packit c4476c
    for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
Packit c4476c
        temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
Packit c4476c
        temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
Packit c4476c
        temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
Packit c4476c
        temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
Packit c4476c
    }
Packit c4476c
    /* apply the inverse MixColumn transform to all round keys but the first and the last: */
Packit c4476c
    for (i = 1; i < (key->rounds); i++) {
Packit c4476c
        rk += 4;
Packit c4476c
#if 1
Packit c4476c
        for (j = 0; j < 4; j++) {
Packit c4476c
            u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
Packit c4476c
Packit c4476c
            tp1 = rk[j];
Packit c4476c
            m = tp1 & 0x80808080;
Packit c4476c
            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp2 & 0x80808080;
Packit c4476c
            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp4 & 0x80808080;
Packit c4476c
            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            tp9 = tp8 ^ tp1;
Packit c4476c
            tpb = tp9 ^ tp2;
Packit c4476c
            tpd = tp9 ^ tp4;
Packit c4476c
            tpe = tp8 ^ tp4 ^ tp2;
Packit c4476c
#if defined(ROTATE)
Packit c4476c
            rk[j] = tpe ^ ROTATE(tpd,16) ^
Packit c4476c
                ROTATE(tp9,8) ^ ROTATE(tpb,24);
Packit c4476c
#else
Packit c4476c
            rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
Packit c4476c
                (tp9 >> 24) ^ (tp9 << 8) ^
Packit c4476c
                (tpb >> 8) ^ (tpb << 24);
Packit c4476c
#endif
Packit c4476c
        }
Packit c4476c
#else
Packit c4476c
        rk[0] =
Packit c4476c
            Td0[Te2[(rk[0]      ) & 0xff] & 0xff] ^
Packit c4476c
            Td1[Te2[(rk[0] >>  8) & 0xff] & 0xff] ^
Packit c4476c
            Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
Packit c4476c
            Td3[Te2[(rk[0] >> 24)       ] & 0xff];
Packit c4476c
        rk[1] =
Packit c4476c
            Td0[Te2[(rk[1]      ) & 0xff] & 0xff] ^
Packit c4476c
            Td1[Te2[(rk[1] >>  8) & 0xff] & 0xff] ^
Packit c4476c
            Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
Packit c4476c
            Td3[Te2[(rk[1] >> 24)       ] & 0xff];
Packit c4476c
        rk[2] =
Packit c4476c
            Td0[Te2[(rk[2]      ) & 0xff] & 0xff] ^
Packit c4476c
            Td1[Te2[(rk[2] >>  8) & 0xff] & 0xff] ^
Packit c4476c
            Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
Packit c4476c
            Td3[Te2[(rk[2] >> 24)       ] & 0xff];
Packit c4476c
        rk[3] =
Packit c4476c
            Td0[Te2[(rk[3]      ) & 0xff] & 0xff] ^
Packit c4476c
            Td1[Te2[(rk[3] >>  8) & 0xff] & 0xff] ^
Packit c4476c
            Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
Packit c4476c
            Td3[Te2[(rk[3] >> 24)       ] & 0xff];
Packit c4476c
#endif
Packit c4476c
    }
Packit c4476c
    return 0;
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Encrypt a single block
Packit c4476c
 * in and out can overlap
Packit c4476c
 */
Packit c4476c
void AES_encrypt(const unsigned char *in, unsigned char *out,
Packit c4476c
                 const AES_KEY *key)
Packit c4476c
{
Packit c4476c
Packit c4476c
    const u32 *rk;
Packit c4476c
    u32 s0, s1, s2, s3, t[4];
Packit c4476c
    int r;
Packit c4476c
Packit c4476c
    assert(in && out && key);
Packit c4476c
    rk = key->rd_key;
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * map byte array block to cipher state
Packit c4476c
     * and add initial round key:
Packit c4476c
     */
Packit c4476c
    s0 = GETU32(in     ) ^ rk[0];
Packit c4476c
    s1 = GETU32(in +  4) ^ rk[1];
Packit c4476c
    s2 = GETU32(in +  8) ^ rk[2];
Packit c4476c
    s3 = GETU32(in + 12) ^ rk[3];
Packit c4476c
Packit c4476c
#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
Packit c4476c
    prefetch256(Te4);
Packit c4476c
Packit c4476c
    t[0] = (u32)Te4[(s0      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s3 >> 24)       ] << 24;
Packit c4476c
    t[1] = (u32)Te4[(s1      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s0 >> 24)       ] << 24;
Packit c4476c
    t[2] = (u32)Te4[(s2      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s1 >> 24)       ] << 24;
Packit c4476c
    t[3] = (u32)Te4[(s3      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s2 >> 24)       ] << 24;
Packit c4476c
Packit c4476c
    /* now do the linear transform using words */
Packit c4476c
    {   int i;
Packit c4476c
        u32 r0, r1, r2;
Packit c4476c
Packit c4476c
        for (i = 0; i < 4; i++) {
Packit c4476c
            r0 = t[i];
Packit c4476c
            r1 = r0 & 0x80808080;
Packit c4476c
            r2 = ((r0 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
Packit c4476c
#if defined(ROTATE)
Packit c4476c
            t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
Packit c4476c
                ROTATE(r0,16) ^ ROTATE(r0,8);
Packit c4476c
#else
Packit c4476c
            t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
Packit c4476c
                (r0 << 16) ^ (r0 >> 16) ^
Packit c4476c
                (r0 << 8) ^ (r0 >> 24);
Packit c4476c
#endif
Packit c4476c
            t[i] ^= rk[4+i];
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
#else
Packit c4476c
    t[0] =  Te0[(s0      ) & 0xff] ^
Packit c4476c
        Te1[(s1 >>  8) & 0xff] ^
Packit c4476c
        Te2[(s2 >> 16) & 0xff] ^
Packit c4476c
        Te3[(s3 >> 24)       ] ^
Packit c4476c
        rk[4];
Packit c4476c
    t[1] =  Te0[(s1      ) & 0xff] ^
Packit c4476c
        Te1[(s2 >>  8) & 0xff] ^
Packit c4476c
        Te2[(s3 >> 16) & 0xff] ^
Packit c4476c
        Te3[(s0 >> 24)       ] ^
Packit c4476c
        rk[5];
Packit c4476c
    t[2] =  Te0[(s2      ) & 0xff] ^
Packit c4476c
        Te1[(s3 >>  8) & 0xff] ^
Packit c4476c
        Te2[(s0 >> 16) & 0xff] ^
Packit c4476c
        Te3[(s1 >> 24)       ] ^
Packit c4476c
        rk[6];
Packit c4476c
    t[3] =  Te0[(s3      ) & 0xff] ^
Packit c4476c
        Te1[(s0 >>  8) & 0xff] ^
Packit c4476c
        Te2[(s1 >> 16) & 0xff] ^
Packit c4476c
        Te3[(s2 >> 24)       ] ^
Packit c4476c
        rk[7];
Packit c4476c
#endif
Packit c4476c
    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * Nr - 2 full rounds:
Packit c4476c
     */
Packit c4476c
    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
Packit c4476c
#if defined(AES_COMPACT_IN_INNER_ROUNDS)
Packit c4476c
        t[0] = (u32)Te4[(s0      ) & 0xff]       ^
Packit c4476c
               (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Te4[(s3 >> 24)       ] << 24;
Packit c4476c
        t[1] = (u32)Te4[(s1      ) & 0xff]       ^
Packit c4476c
               (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Te4[(s0 >> 24)       ] << 24;
Packit c4476c
        t[2] = (u32)Te4[(s2      ) & 0xff]       ^
Packit c4476c
               (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Te4[(s1 >> 24)       ] << 24;
Packit c4476c
        t[3] = (u32)Te4[(s3      ) & 0xff]       ^
Packit c4476c
               (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Te4[(s2 >> 24)       ] << 24;
Packit c4476c
Packit c4476c
        /* now do the linear transform using words */
Packit c4476c
        {
Packit c4476c
            int i;
Packit c4476c
            u32 r0, r1, r2;
Packit c4476c
Packit c4476c
            for (i = 0; i < 4; i++) {
Packit c4476c
                r0 = t[i];
Packit c4476c
                r1 = r0 & 0x80808080;
Packit c4476c
                r2 = ((r0 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                    ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
Packit c4476c
#if defined(ROTATE)
Packit c4476c
                t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
Packit c4476c
                    ROTATE(r0,16) ^ ROTATE(r0,8);
Packit c4476c
#else
Packit c4476c
                t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
Packit c4476c
                    (r0 << 16) ^ (r0 >> 16) ^
Packit c4476c
                    (r0 << 8) ^ (r0 >> 24);
Packit c4476c
#endif
Packit c4476c
                t[i] ^= rk[i];
Packit c4476c
            }
Packit c4476c
        }
Packit c4476c
#else
Packit c4476c
        t[0] =  Te0[(s0      ) & 0xff] ^
Packit c4476c
            Te1[(s1 >>  8) & 0xff] ^
Packit c4476c
            Te2[(s2 >> 16) & 0xff] ^
Packit c4476c
            Te3[(s3 >> 24)       ] ^
Packit c4476c
            rk[0];
Packit c4476c
        t[1] =  Te0[(s1      ) & 0xff] ^
Packit c4476c
            Te1[(s2 >>  8) & 0xff] ^
Packit c4476c
            Te2[(s3 >> 16) & 0xff] ^
Packit c4476c
            Te3[(s0 >> 24)       ] ^
Packit c4476c
            rk[1];
Packit c4476c
        t[2] =  Te0[(s2      ) & 0xff] ^
Packit c4476c
            Te1[(s3 >>  8) & 0xff] ^
Packit c4476c
            Te2[(s0 >> 16) & 0xff] ^
Packit c4476c
            Te3[(s1 >> 24)       ] ^
Packit c4476c
            rk[2];
Packit c4476c
        t[3] =  Te0[(s3      ) & 0xff] ^
Packit c4476c
            Te1[(s0 >>  8) & 0xff] ^
Packit c4476c
            Te2[(s1 >> 16) & 0xff] ^
Packit c4476c
            Te3[(s2 >> 24)       ] ^
Packit c4476c
            rk[3];
Packit c4476c
#endif
Packit c4476c
        s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
Packit c4476c
    }
Packit c4476c
    /*
Packit c4476c
     * apply last round and
Packit c4476c
     * map cipher state to byte array block:
Packit c4476c
     */
Packit c4476c
#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
Packit c4476c
    prefetch256(Te4);
Packit c4476c
Packit c4476c
    *(u32*)(out+0) =
Packit c4476c
           (u32)Te4[(s0      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s3 >> 24)       ] << 24 ^
Packit c4476c
        rk[0];
Packit c4476c
    *(u32*)(out+4) =
Packit c4476c
           (u32)Te4[(s1      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s0 >> 24)       ] << 24 ^
Packit c4476c
        rk[1];
Packit c4476c
    *(u32*)(out+8) =
Packit c4476c
           (u32)Te4[(s2      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s1 >> 24)       ] << 24 ^
Packit c4476c
        rk[2];
Packit c4476c
    *(u32*)(out+12) =
Packit c4476c
           (u32)Te4[(s3      ) & 0xff]       ^
Packit c4476c
           (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Te4[(s2 >> 24)       ] << 24 ^
Packit c4476c
        rk[3];
Packit c4476c
#else
Packit c4476c
    *(u32*)(out+0) =
Packit c4476c
        (Te2[(s0      ) & 0xff] & 0x000000ffU) ^
Packit c4476c
        (Te3[(s1 >>  8) & 0xff] & 0x0000ff00U) ^
Packit c4476c
        (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
Packit c4476c
        (Te1[(s3 >> 24)       ] & 0xff000000U) ^
Packit c4476c
        rk[0];
Packit c4476c
    *(u32*)(out+4) =
Packit c4476c
        (Te2[(s1      ) & 0xff] & 0x000000ffU) ^
Packit c4476c
        (Te3[(s2 >>  8) & 0xff] & 0x0000ff00U) ^
Packit c4476c
        (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
Packit c4476c
        (Te1[(s0 >> 24)       ] & 0xff000000U) ^
Packit c4476c
        rk[1];
Packit c4476c
    *(u32*)(out+8) =
Packit c4476c
        (Te2[(s2      ) & 0xff] & 0x000000ffU) ^
Packit c4476c
        (Te3[(s3 >>  8) & 0xff] & 0x0000ff00U) ^
Packit c4476c
        (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
Packit c4476c
        (Te1[(s1 >> 24)       ] & 0xff000000U) ^
Packit c4476c
        rk[2];
Packit c4476c
    *(u32*)(out+12) =
Packit c4476c
        (Te2[(s3      ) & 0xff] & 0x000000ffU) ^
Packit c4476c
        (Te3[(s0 >>  8) & 0xff] & 0x0000ff00U) ^
Packit c4476c
        (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
Packit c4476c
        (Te1[(s2 >> 24)       ] & 0xff000000U) ^
Packit c4476c
        rk[3];
Packit c4476c
#endif
Packit c4476c
}
Packit c4476c
Packit c4476c
/*
Packit c4476c
 * Decrypt a single block
Packit c4476c
 * in and out can overlap
Packit c4476c
 */
Packit c4476c
void AES_decrypt(const unsigned char *in, unsigned char *out,
Packit c4476c
                 const AES_KEY *key)
Packit c4476c
{
Packit c4476c
Packit c4476c
    const u32 *rk;
Packit c4476c
    u32 s0, s1, s2, s3, t[4];
Packit c4476c
    int r;
Packit c4476c
Packit c4476c
    assert(in && out && key);
Packit c4476c
    rk = key->rd_key;
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * map byte array block to cipher state
Packit c4476c
     * and add initial round key:
Packit c4476c
     */
Packit c4476c
    s0 = GETU32(in     ) ^ rk[0];
Packit c4476c
    s1 = GETU32(in +  4) ^ rk[1];
Packit c4476c
    s2 = GETU32(in +  8) ^ rk[2];
Packit c4476c
    s3 = GETU32(in + 12) ^ rk[3];
Packit c4476c
Packit c4476c
#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
Packit c4476c
    prefetch256(Td4);
Packit c4476c
Packit c4476c
    t[0] = (u32)Td4[(s0      ) & 0xff]       ^
Packit c4476c
           (u32)Td4[(s3 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Td4[(s1 >> 24)       ] << 24;
Packit c4476c
    t[1] = (u32)Td4[(s1      ) & 0xff]       ^
Packit c4476c
           (u32)Td4[(s0 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Td4[(s2 >> 24)       ] << 24;
Packit c4476c
    t[2] = (u32)Td4[(s2      ) & 0xff]       ^
Packit c4476c
           (u32)Td4[(s1 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Td4[(s3 >> 24)       ] << 24;
Packit c4476c
    t[3] = (u32)Td4[(s3      ) & 0xff]       ^
Packit c4476c
           (u32)Td4[(s2 >>  8) & 0xff] <<  8 ^
Packit c4476c
           (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
Packit c4476c
           (u32)Td4[(s0 >> 24)       ] << 24;
Packit c4476c
Packit c4476c
    /* now do the linear transform using words */
Packit c4476c
    {
Packit c4476c
        int i;
Packit c4476c
        u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
Packit c4476c
Packit c4476c
        for (i = 0; i < 4; i++) {
Packit c4476c
            tp1 = t[i];
Packit c4476c
            m = tp1 & 0x80808080;
Packit c4476c
            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp2 & 0x80808080;
Packit c4476c
            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp4 & 0x80808080;
Packit c4476c
            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            tp9 = tp8 ^ tp1;
Packit c4476c
            tpb = tp9 ^ tp2;
Packit c4476c
            tpd = tp9 ^ tp4;
Packit c4476c
            tpe = tp8 ^ tp4 ^ tp2;
Packit c4476c
#if defined(ROTATE)
Packit c4476c
            t[i] = tpe ^ ROTATE(tpd,16) ^
Packit c4476c
                ROTATE(tp9,8) ^ ROTATE(tpb,24);
Packit c4476c
#else
Packit c4476c
            t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
Packit c4476c
                (tp9 >> 24) ^ (tp9 << 8) ^
Packit c4476c
                (tpb >> 8) ^ (tpb << 24);
Packit c4476c
#endif
Packit c4476c
            t[i] ^= rk[4+i];
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
#else
Packit c4476c
    t[0] =  Td0[(s0      ) & 0xff] ^
Packit c4476c
        Td1[(s3 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s2 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s1 >> 24)       ] ^
Packit c4476c
        rk[4];
Packit c4476c
    t[1] =  Td0[(s1      ) & 0xff] ^
Packit c4476c
        Td1[(s0 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s3 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s2 >> 24)       ] ^
Packit c4476c
        rk[5];
Packit c4476c
    t[2] =  Td0[(s2      ) & 0xff] ^
Packit c4476c
        Td1[(s1 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s0 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s3 >> 24)       ] ^
Packit c4476c
        rk[6];
Packit c4476c
    t[3] =  Td0[(s3      ) & 0xff] ^
Packit c4476c
        Td1[(s2 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s1 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s0 >> 24)       ] ^
Packit c4476c
        rk[7];
Packit c4476c
#endif
Packit c4476c
    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
Packit c4476c
Packit c4476c
    /*
Packit c4476c
     * Nr - 2 full rounds:
Packit c4476c
     */
Packit c4476c
    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
Packit c4476c
#if defined(AES_COMPACT_IN_INNER_ROUNDS)
Packit c4476c
        t[0] = (u32)Td4[(s0      ) & 0xff]       ^
Packit c4476c
               (u32)Td4[(s3 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Td4[(s1 >> 24)       ] << 24;
Packit c4476c
        t[1] = (u32)Td4[(s1      ) & 0xff]       ^
Packit c4476c
               (u32)Td4[(s0 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Td4[(s2 >> 24)       ] << 24;
Packit c4476c
        t[2] = (u32)Td4[(s2      ) & 0xff]       ^
Packit c4476c
               (u32)Td4[(s1 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Td4[(s3 >> 24)       ] << 24;
Packit c4476c
        t[3] = (u32)Td4[(s3      ) & 0xff]       ^
Packit c4476c
               (u32)Td4[(s2 >>  8) & 0xff] <<  8 ^
Packit c4476c
               (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
Packit c4476c
               (u32)Td4[(s0 >> 24)       ] << 24;
Packit c4476c
Packit c4476c
    /* now do the linear transform using words */
Packit c4476c
    {
Packit c4476c
        int i;
Packit c4476c
        u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
Packit c4476c
Packit c4476c
        for (i = 0; i < 4; i++) {
Packit c4476c
            tp1 = t[i];
Packit c4476c
            m = tp1 & 0x80808080;
Packit c4476c
            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp2 & 0x80808080;
Packit c4476c
            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            m = tp4 & 0x80808080;
Packit c4476c
            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
Packit c4476c
                ((m - (m >> 7)) & 0x1b1b1b1b);
Packit c4476c
            tp9 = tp8 ^ tp1;
Packit c4476c
            tpb = tp9 ^ tp2;
Packit c4476c
            tpd = tp9 ^ tp4;
Packit c4476c
            tpe = tp8 ^ tp4 ^ tp2;
Packit c4476c
#if defined(ROTATE)
Packit c4476c
            t[i] = tpe ^ ROTATE(tpd,16) ^
Packit c4476c
                ROTATE(tp9,8) ^ ROTATE(tpb,24);
Packit c4476c
#else
Packit c4476c
            t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
Packit c4476c
                (tp9 >> 24) ^ (tp9 << 8) ^
Packit c4476c
                (tpb >> 8) ^ (tpb << 24);
Packit c4476c
#endif
Packit c4476c
            t[i] ^= rk[i];
Packit c4476c
        }
Packit c4476c
    }
Packit c4476c
#else
Packit c4476c
    t[0] =  Td0[(s0      ) & 0xff] ^
Packit c4476c
        Td1[(s3 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s2 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s1 >> 24)       ] ^
Packit c4476c
        rk[0];
Packit c4476c
    t[1] =  Td0[(s1      ) & 0xff] ^
Packit c4476c
        Td1[(s0 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s3 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s2 >> 24)       ] ^
Packit c4476c
        rk[1];
Packit c4476c
    t[2] =  Td0[(s2      ) & 0xff] ^
Packit c4476c
        Td1[(s1 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s0 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s3 >> 24)       ] ^
Packit c4476c
        rk[2];
Packit c4476c
    t[3] =  Td0[(s3      ) & 0xff] ^
Packit c4476c
        Td1[(s2 >>  8) & 0xff] ^
Packit c4476c
        Td2[(s1 >> 16) & 0xff] ^
Packit c4476c
        Td3[(s0 >> 24)       ] ^
Packit c4476c
        rk[3];
Packit c4476c
#endif
Packit c4476c
    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
Packit c4476c
    }
Packit c4476c
    /*
Packit c4476c
     * apply last round and
Packit c4476c
     * map cipher state to byte array block:
Packit c4476c
     */
Packit c4476c
    prefetch256(Td4);
Packit c4476c
Packit c4476c
    *(u32*)(out+0) =
Packit c4476c
        ((u32)Td4[(s0      ) & 0xff])    ^
Packit c4476c
        ((u32)Td4[(s3 >>  8) & 0xff] <<  8) ^
Packit c4476c
        ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
Packit c4476c
        ((u32)Td4[(s1 >> 24)       ] << 24) ^
Packit c4476c
        rk[0];
Packit c4476c
    *(u32*)(out+4) =
Packit c4476c
        ((u32)Td4[(s1      ) & 0xff])     ^
Packit c4476c
        ((u32)Td4[(s0 >>  8) & 0xff] <<  8) ^
Packit c4476c
        ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
Packit c4476c
        ((u32)Td4[(s2 >> 24)       ] << 24) ^
Packit c4476c
        rk[1];
Packit c4476c
    *(u32*)(out+8) =
Packit c4476c
        ((u32)Td4[(s2      ) & 0xff])     ^
Packit c4476c
        ((u32)Td4[(s1 >>  8) & 0xff] <<  8) ^
Packit c4476c
        ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
Packit c4476c
        ((u32)Td4[(s3 >> 24)       ] << 24) ^
Packit c4476c
        rk[2];
Packit c4476c
    *(u32*)(out+12) =
Packit c4476c
        ((u32)Td4[(s3      ) & 0xff])     ^
Packit c4476c
        ((u32)Td4[(s2 >>  8) & 0xff] <<  8) ^
Packit c4476c
        ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
Packit c4476c
        ((u32)Td4[(s0 >> 24)       ] << 24) ^
Packit c4476c
        rk[3];
Packit c4476c
}