|
Packit |
0680ba |
/* serpent-armv7-neon.S - ARM/NEON assembly implementation of Serpent cipher
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* This file is part of Libgcrypt.
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* Libgcrypt is free software; you can redistribute it and/or modify
|
|
Packit |
0680ba |
* it under the terms of the GNU Lesser General Public License as
|
|
Packit |
0680ba |
* published by the Free Software Foundation; either version 2.1 of
|
|
Packit |
0680ba |
* the License, or (at your option) any later version.
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* Libgcrypt is distributed in the hope that it will be useful,
|
|
Packit |
0680ba |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
0680ba |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
0680ba |
* GNU Lesser General Public License for more details.
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* You should have received a copy of the GNU Lesser General Public
|
|
Packit |
0680ba |
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#include <config.h>
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
|
|
Packit |
0680ba |
defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
|
|
Packit |
0680ba |
defined(HAVE_GCC_INLINE_ASM_NEON)
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.text
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.syntax unified
|
|
Packit |
0680ba |
.fpu neon
|
|
Packit |
0680ba |
.arm
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* ARM registers */
|
|
Packit |
0680ba |
#define RROUND r0
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* NEON vector registers */
|
|
Packit |
0680ba |
#define RA0 q0
|
|
Packit |
0680ba |
#define RA1 q1
|
|
Packit |
0680ba |
#define RA2 q2
|
|
Packit |
0680ba |
#define RA3 q3
|
|
Packit |
0680ba |
#define RA4 q4
|
|
Packit |
0680ba |
#define RB0 q5
|
|
Packit |
0680ba |
#define RB1 q6
|
|
Packit |
0680ba |
#define RB2 q7
|
|
Packit |
0680ba |
#define RB3 q8
|
|
Packit |
0680ba |
#define RB4 q9
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define RT0 q10
|
|
Packit |
0680ba |
#define RT1 q11
|
|
Packit |
0680ba |
#define RT2 q12
|
|
Packit |
0680ba |
#define RT3 q13
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define RA0d0 d0
|
|
Packit |
0680ba |
#define RA0d1 d1
|
|
Packit |
0680ba |
#define RA1d0 d2
|
|
Packit |
0680ba |
#define RA1d1 d3
|
|
Packit |
0680ba |
#define RA2d0 d4
|
|
Packit |
0680ba |
#define RA2d1 d5
|
|
Packit |
0680ba |
#define RA3d0 d6
|
|
Packit |
0680ba |
#define RA3d1 d7
|
|
Packit |
0680ba |
#define RA4d0 d8
|
|
Packit |
0680ba |
#define RA4d1 d9
|
|
Packit |
0680ba |
#define RB0d0 d10
|
|
Packit |
0680ba |
#define RB0d1 d11
|
|
Packit |
0680ba |
#define RB1d0 d12
|
|
Packit |
0680ba |
#define RB1d1 d13
|
|
Packit |
0680ba |
#define RB2d0 d14
|
|
Packit |
0680ba |
#define RB2d1 d15
|
|
Packit |
0680ba |
#define RB3d0 d16
|
|
Packit |
0680ba |
#define RB3d1 d17
|
|
Packit |
0680ba |
#define RB4d0 d18
|
|
Packit |
0680ba |
#define RB4d1 d19
|
|
Packit |
0680ba |
#define RT0d0 d20
|
|
Packit |
0680ba |
#define RT0d1 d21
|
|
Packit |
0680ba |
#define RT1d0 d22
|
|
Packit |
0680ba |
#define RT1d1 d23
|
|
Packit |
0680ba |
#define RT2d0 d24
|
|
Packit |
0680ba |
#define RT2d1 d25
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/**********************************************************************
|
|
Packit |
0680ba |
helper macros
|
|
Packit |
0680ba |
**********************************************************************/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define transpose_4x4(_q0, _q1, _q2, _q3) \
|
|
Packit |
0680ba |
vtrn.32 _q0, _q1; \
|
|
Packit |
0680ba |
vtrn.32 _q2, _q3; \
|
|
Packit |
0680ba |
vswp _q0##d1, _q2##d0; \
|
|
Packit |
0680ba |
vswp _q1##d1, _q3##d0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/**********************************************************************
|
|
Packit |
0680ba |
8-way serpent
|
|
Packit |
0680ba |
**********************************************************************/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/*
|
|
Packit |
0680ba |
* These are the S-Boxes of Serpent from following research paper.
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
|
|
Packit |
0680ba |
* (New York, New York, USA), p. 317–329, National Institute of Standards and
|
|
Packit |
0680ba |
* Technology, 2000.
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
* Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
|
|
Packit |
0680ba |
*
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
veor a3, a3, a0; veor b3, b3, b0; vmov a4, a1; vmov b4, b1; \
|
|
Packit |
0680ba |
vand a1, a1, a3; vand b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; veor a4, a4, a3; veor b4, b4, b3; \
|
|
Packit |
0680ba |
veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4; vmvn a4, a4; vmvn b4, b4; \
|
|
Packit |
0680ba |
vorr a4, a4, a1; vorr b4, b4, b1; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; vorr a3, a3, a0; vorr b3, b3, b0; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; veor a4, a3; veor b4, b3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmvn a2, a2; vmvn b2, b2; vmov a4, a1; vmov b4, b1; \
|
|
Packit |
0680ba |
vorr a1, a1, a0; vorr b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a3; vand b0, b0, b3; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a1; vorr b0, b0, b1; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
veor a2, a2, a1; veor b2, b2, b1; veor a3, a3, a0; veor b3, b3, b0; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1;\
|
|
Packit |
0680ba |
vand a2, a2, a3; vand b2, b2, b3;\
|
|
Packit |
0680ba |
veor a4, a2; veor b4, b2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmvn a0, a0; vmvn b0, b0; vmvn a2, a2; vmvn b2, b2; \
|
|
Packit |
0680ba |
vmov a4, a0; vmov b4, b0; vand a0, a0, a1; vand b0, b0, b1; \
|
|
Packit |
0680ba |
veor a2, a2, a0; veor b2, b2, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
|
|
Packit |
0680ba |
veor a3, a3, a2; veor b3, b3, b2; veor a1, a1, a0; veor b1, b1, b0; \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a1; vorr b4, b4, b1; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; vorr a2, a2, a0; vorr b2, b2, b0; \
|
|
Packit |
0680ba |
vand a2, a2, a4; vand b2, b2, b4; veor a0, a0, a1; veor b0, b0, b1; \
|
|
Packit |
0680ba |
vand a1, a1, a2; vand b1, b1, b2;\
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0; vand a0, a0, a2; vand b0, b0, b2; \
|
|
Packit |
0680ba |
veor a0, a4; veor b0, b4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a1; vmov b4, b1; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
vand a3, a3, a1; vand b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
veor a3, a3, a0; veor b3, b3, b0; vorr a0, a0, a1; vorr b0, b0, b1; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
vorr a0, a0, a2; vorr b0, b0, b2; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
veor a0, a0, a1; veor b0, b0, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0; vmvn a4, a4; vmvn b4, b4; \
|
|
Packit |
0680ba |
veor a4, a4, a1; veor b4, b4, b1; vorr a1, a1, a0; vorr b1, b1, b0; \
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0;\
|
|
Packit |
0680ba |
vorr a1, a1, a4; vorr b1, b1, b4;\
|
|
Packit |
0680ba |
veor a3, a1; veor b3, b1;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a0; vmov b4, b0; vand a0, a0, a2; vand b0, b0, b2; \
|
|
Packit |
0680ba |
veor a0, a0, a3; veor b0, b0, b3; veor a2, a2, a1; veor b2, b2, b1; \
|
|
Packit |
0680ba |
veor a2, a2, a0; veor b2, b2, b0; vorr a3, a3, a4; vorr b3, b3, b4; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
vmov a1, a3; vmov b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \
|
|
Packit |
0680ba |
veor a3, a3, a0; veor b3, b3, b0; vand a0, a0, a1; vand b0, b0, b1; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; vmvn a4, a4; vmvn b4, b4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
|
|
Packit |
0680ba |
vmov a4, a3; vmov b4, b3; vand a3, a3, a2; vand b3, b3, b2; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a2; vorr b1, b1, b2; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; vand a4, a4, a3; vand b4, b4, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; vand a4, a4, a0; vand b4, b4, b0; \
|
|
Packit |
0680ba |
veor a4, a4, a2; veor b4, b4, b2; vand a2, a2, a1; vand b2, b2, b1; \
|
|
Packit |
0680ba |
vorr a2, a2, a0; vorr b2, b2, b0; vmvn a3, a3; vmvn b3, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \
|
|
Packit |
0680ba |
vand a0, a0, a1; vand b0, b0, b1; veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
veor a3, a0; veor b3, b0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a0; vmov b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1; vand a1, a1, a4; vand b1, b1, b4; \
|
|
Packit |
0680ba |
veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a3; veor b2, b2, b3; \
|
|
Packit |
0680ba |
vand a3, a3, a0; vand b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \
|
|
Packit |
0680ba |
veor a3, a3, a4; veor b3, b3, b4; veor a0, a0, a1; veor b0, b0, b1; \
|
|
Packit |
0680ba |
vand a4, a4, a0; vand b4, b4, b0; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
veor a4, a4, a2; veor b4, b4, b2; vorr a1, a1, a0; vorr b1, b1, b0; \
|
|
Packit |
0680ba |
veor a1, a1, a2; veor b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \
|
|
Packit |
0680ba |
vmov a2, a1; vmov b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a0; veor b1, b0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a2; vmov b4, b2; veor a2, a2, a1; veor b2, b2, b1; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; vand a4, a4, a2; vand b4, b4, b2; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; vand a0, a0, a1; vand b0, b0, b1; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; vorr a3, a3, a4; vorr b3, b3, b4; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; veor a0, a0, a3; veor b0, b0, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; vand a3, a3, a2; vand b3, b3, b2; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1; veor a1, a1, a0; veor b1, b1, b0; \
|
|
Packit |
0680ba |
vorr a1, a1, a2; vorr b1, b1, b2; veor a0, a0, a3; veor b0, b0, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4;\
|
|
Packit |
0680ba |
veor a0, a1; veor b0, b1;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; vmvn a3, a3; vmvn b3, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
|
|
Packit |
0680ba |
vmov a4, a1; vmov b4, b1; vand a1, a1, a3; vand b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a2; veor b1, b1, b2; veor a4, a4, a3; veor b4, b4, b3; \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; vand a2, a2, a4; vand b2, b2, b4; \
|
|
Packit |
0680ba |
veor a2, a2, a0; veor b2, b2, b0; vand a0, a0, a1; vand b0, b0, b1; \
|
|
Packit |
0680ba |
veor a3, a3, a0; veor b3, b3, b0; vorr a4, a4, a1; vorr b4, b4, b1; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; vand a2, a2, a3; vand b2, b2, b3; \
|
|
Packit |
0680ba |
vmvn a0, a0; vmvn b0, b0; veor a4, a2; veor b4, b2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a2; vmov b4, b2; vand a2, a2, a3; vand b2, b2, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a1; veor b2, b2, b1; vorr a1, a1, a3; vorr b1, b1, b3; \
|
|
Packit |
0680ba |
vand a1, a1, a0; vand b1, b1, b0; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \
|
|
Packit |
0680ba |
vmvn a0, a0; vmvn b0, b0; veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; vand a3, a3, a0; vand b3, b3, b0; \
|
|
Packit |
0680ba |
veor a3, a3, a2; veor b3, b3, b2; veor a0, a0, a1; veor b0, b0, b1; \
|
|
Packit |
0680ba |
vand a2, a2, a0; vand b2, b2, b0; veor a3, a3, a0; veor b3, b3, b0; \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4;\
|
|
Packit |
0680ba |
vorr a2, a2, a3; vorr b2, b2, b3; veor a3, a3, a0; veor b3, b3, b0; \
|
|
Packit |
0680ba |
veor a2, a1; veor b2, b1;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
veor a0, a0, a1; veor b0, b0, b1; veor a1, a1, a3; veor b1, b1, b3; \
|
|
Packit |
0680ba |
vmvn a3, a3; vmvn b3, b3; vmov a4, a1; vmov b4, b1; \
|
|
Packit |
0680ba |
vand a1, a1, a0; vand b1, b1, b0; veor a2, a2, a3; veor b2, b2, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a2; veor b1, b1, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
|
|
Packit |
0680ba |
veor a4, a4, a3; veor b4, b4, b3; vand a3, a3, a1; vand b3, b3, b1; \
|
|
Packit |
0680ba |
veor a3, a3, a0; veor b3, b3, b0; veor a4, a4, a1; veor b4, b4, b1; \
|
|
Packit |
0680ba |
veor a4, a4, a2; veor b4, b4, b2; veor a2, a2, a0; veor b2, b2, b0; \
|
|
Packit |
0680ba |
vand a0, a0, a3; vand b0, b0, b3; vmvn a2, a2; vmvn b2, b2; \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; vorr a4, a4, a3; vorr b4, b4, b3; \
|
|
Packit |
0680ba |
veor a2, a4; veor b2, b4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmvn a1, a1; vmvn b1, b1; vmov a4, a3; vmov b4, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a0; vorr b3, b3, b0; \
|
|
Packit |
0680ba |
veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a1; vorr b2, b2, b1; \
|
|
Packit |
0680ba |
vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4; vorr a4, a4, a0; vorr b4, b4, b0; \
|
|
Packit |
0680ba |
veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a2; vand b1, b1, b2; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
vand a3, a3, a4; vand b3, b3, b4; veor a4, a4, a1; veor b4, b4, b1; \
|
|
Packit |
0680ba |
veor a3, a3, a4; veor b3, b3, b4; vmvn a4, a4; vmvn b4, b4; \
|
|
Packit |
0680ba |
veor a3, a0; veor b3, b0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmvn a2, a2; vmvn b2, b2; vmov a4, a3; vmov b4, b3; \
|
|
Packit |
0680ba |
vand a3, a3, a0; vand b3, b3, b0; veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
veor a3, a3, a2; veor b3, b3, b2; vorr a2, a2, a4; vorr b2, b2, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; veor a2, a2, a0; veor b2, b2, b0; \
|
|
Packit |
0680ba |
vorr a0, a0, a1; vorr b0, b0, b1; veor a2, a2, a1; veor b2, b2, b1; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; vorr a0, a0, a3; vorr b0, b0, b3; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; veor a4, a4, a3; veor b4, b4, b3; \
|
|
Packit |
0680ba |
veor a4, a4, a0; veor b4, b4, b0; vmvn a3, a3; vmvn b3, b3; \
|
|
Packit |
0680ba |
vand a2, a2, a4; vand b2, b2, b4;\
|
|
Packit |
0680ba |
veor a2, a3; veor b2, b3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; vmov a4, a2; vmov b4, b2; \
|
|
Packit |
0680ba |
vand a2, a2, a0; vand b2, b2, b0; veor a4, a4, a3; veor b4, b4, b3; \
|
|
Packit |
0680ba |
vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \
|
|
Packit |
0680ba |
veor a2, a2, a3; veor b2, b2, b3; vorr a4, a4, a0; vorr b4, b4, b0; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
veor a4, a4, a1; veor b4, b4, b1; vand a1, a1, a3; vand b1, b1, b3; \
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0; veor a0, a0, a3; veor b0, b0, b3; \
|
|
Packit |
0680ba |
vorr a0, a0, a2; vorr b0, b0, b2; veor a3, a3, a1; veor b3, b3, b1; \
|
|
Packit |
0680ba |
veor a4, a0; veor b4, b0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a1; vmov b4, b1; vorr a1, a1, a2; vorr b1, b1, b2; \
|
|
Packit |
0680ba |
veor a1, a1, a3; veor b1, b1, b3; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
veor a2, a2, a1; veor b2, b2, b1; vorr a3, a3, a4; vorr b3, b3, b4; \
|
|
Packit |
0680ba |
vand a3, a3, a0; vand b3, b3, b0; veor a4, a4, a2; veor b4, b4, b2; \
|
|
Packit |
0680ba |
veor a3, a3, a1; veor b3, b3, b1; vorr a1, a1, a4; vorr b1, b1, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a0; veor b1, b1, b0; vorr a0, a0, a4; vorr b0, b0, b4; \
|
|
Packit |
0680ba |
veor a0, a0, a2; veor b0, b0, b2; veor a1, a1, a4; veor b1, b1, b4; \
|
|
Packit |
0680ba |
veor a2, a2, a1; veor b2, b2, b1; vand a1, a1, a0; vand b1, b1, b0; \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; vmvn a2, a2; vmvn b2, b2; \
|
|
Packit |
0680ba |
vorr a2, a2, a0; vorr b2, b2, b0;\
|
|
Packit |
0680ba |
veor a4, a2; veor b4, b2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vmov a4, a2; vmov b4, b2; veor a2, a2, a0; veor b2, b2, b0; \
|
|
Packit |
0680ba |
vand a0, a0, a3; vand b0, b0, b3; vorr a4, a4, a3; vorr b4, b4, b3; \
|
|
Packit |
0680ba |
vmvn a2, a2; vmvn b2, b2; veor a3, a3, a1; veor b3, b3, b1; \
|
|
Packit |
0680ba |
vorr a1, a1, a0; vorr b1, b1, b0; veor a0, a0, a2; veor b0, b0, b2; \
|
|
Packit |
0680ba |
vand a2, a2, a4; vand b2, b2, b4; vand a3, a3, a4; vand b3, b3, b4; \
|
|
Packit |
0680ba |
veor a1, a1, a2; veor b1, b1, b2; veor a2, a2, a0; veor b2, b2, b0; \
|
|
Packit |
0680ba |
vorr a0, a0, a2; vorr b0, b0, b2; veor a4, a4, a1; veor b4, b4, b1; \
|
|
Packit |
0680ba |
veor a0, a0, a3; veor b0, b0, b3; veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
vorr a4, a4, a0; vorr b4, b4, b0; veor a3, a3, a2; veor b3, b3, b2; \
|
|
Packit |
0680ba |
veor a4, a2; veor b4, b2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply SBOX number WHICH to to the block. */
|
|
Packit |
0680ba |
#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply inverse SBOX number WHICH to to the block. */
|
|
Packit |
0680ba |
#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary. */
|
|
Packit |
0680ba |
#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vdup.32 RT3, RT0d0[0]; \
|
|
Packit |
0680ba |
vdup.32 RT1, RT0d0[1]; \
|
|
Packit |
0680ba |
vdup.32 RT2, RT0d1[0]; \
|
|
Packit |
0680ba |
vdup.32 RT0, RT0d1[1]; \
|
|
Packit |
0680ba |
veor a0, a0, RT3; veor b0, b0, RT3; \
|
|
Packit |
0680ba |
veor a1, a1, RT1; veor b1, b1, RT1; \
|
|
Packit |
0680ba |
veor a2, a2, RT2; veor b2, b2, RT2; \
|
|
Packit |
0680ba |
veor a3, a3, RT0; veor b3, b3, RT0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define BLOCK_LOAD_KEY_ENC() \
|
|
Packit |
0680ba |
vld1.8 {RT0d0, RT0d1}, [RROUND]!;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#define BLOCK_LOAD_KEY_DEC() \
|
|
Packit |
0680ba |
vld1.8 {RT0d0, RT0d1}, [RROUND]; \
|
|
Packit |
0680ba |
sub RROUND, RROUND, #16
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply the linear transformation to BLOCK. */
|
|
Packit |
0680ba |
#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vshl.u32 a4, a0, #13; vshl.u32 b4, b0, #13; \
|
|
Packit |
0680ba |
vshr.u32 a0, a0, #(32-13); vshr.u32 b0, b0, #(32-13); \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
vshl.u32 a4, a2, #3; vshl.u32 b4, b2, #3; \
|
|
Packit |
0680ba |
vshr.u32 a2, a2, #(32-3); vshr.u32 b2, b2, #(32-3); \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4; \
|
|
Packit |
0680ba |
veor a1, a0, a1; veor b1, b0, b1; \
|
|
Packit |
0680ba |
veor a1, a2, a1; veor b1, b2, b1; \
|
|
Packit |
0680ba |
vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \
|
|
Packit |
0680ba |
veor a3, a2, a3; veor b3, b2, b3; \
|
|
Packit |
0680ba |
veor a3, a4, a3; veor b3, b4, b3; \
|
|
Packit |
0680ba |
vshl.u32 a4, a1, #1; vshl.u32 b4, b1, #1; \
|
|
Packit |
0680ba |
vshr.u32 a1, a1, #(32-1); vshr.u32 b1, b1, #(32-1); \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; \
|
|
Packit |
0680ba |
vshl.u32 a4, a3, #7; vshl.u32 b4, b3, #7; \
|
|
Packit |
0680ba |
vshr.u32 a3, a3, #(32-7); vshr.u32 b3, b3, #(32-7); \
|
|
Packit |
0680ba |
veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
veor a0, a1, a0; veor b0, b1, b0; \
|
|
Packit |
0680ba |
veor a0, a3, a0; veor b0, b3, b0; \
|
|
Packit |
0680ba |
vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \
|
|
Packit |
0680ba |
veor a2, a3, a2; veor b2, b3, b2; \
|
|
Packit |
0680ba |
veor a2, a4, a2; veor b2, b4, b2; \
|
|
Packit |
0680ba |
vshl.u32 a4, a0, #5; vshl.u32 b4, b0, #5; \
|
|
Packit |
0680ba |
vshr.u32 a0, a0, #(32-5); vshr.u32 b0, b0, #(32-5); \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
vshl.u32 a4, a2, #22; vshl.u32 b4, b2, #22; \
|
|
Packit |
0680ba |
vshr.u32 a2, a2, #(32-22); vshr.u32 b2, b2, #(32-22); \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply the inverse linear transformation to BLOCK. */
|
|
Packit |
0680ba |
#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
|
|
Packit |
0680ba |
vshr.u32 a4, a2, #22; vshr.u32 b4, b2, #22; \
|
|
Packit |
0680ba |
vshl.u32 a2, a2, #(32-22); vshl.u32 b2, b2, #(32-22); \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4; \
|
|
Packit |
0680ba |
vshr.u32 a4, a0, #5; vshr.u32 b4, b0, #5; \
|
|
Packit |
0680ba |
vshl.u32 a0, a0, #(32-5); vshl.u32 b0, b0, #(32-5); \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4; \
|
|
Packit |
0680ba |
vshl.u32 a4, a1, #7; vshl.u32 b4, b1, #7; \
|
|
Packit |
0680ba |
veor a2, a3, a2; veor b2, b3, b2; \
|
|
Packit |
0680ba |
veor a2, a4, a2; veor b2, b4, b2; \
|
|
Packit |
0680ba |
veor a0, a1, a0; veor b0, b1, b0; \
|
|
Packit |
0680ba |
veor a0, a3, a0; veor b0, b3, b0; \
|
|
Packit |
0680ba |
vshr.u32 a4, a3, #7; vshr.u32 b4, b3, #7; \
|
|
Packit |
0680ba |
vshl.u32 a3, a3, #(32-7); vshl.u32 b3, b3, #(32-7); \
|
|
Packit |
0680ba |
veor a3, a3, a4; veor b3, b3, b4; \
|
|
Packit |
0680ba |
vshr.u32 a4, a1, #1; vshr.u32 b4, b1, #1; \
|
|
Packit |
0680ba |
vshl.u32 a1, a1, #(32-1); vshl.u32 b1, b1, #(32-1); \
|
|
Packit |
0680ba |
veor a1, a1, a4; veor b1, b1, b4; \
|
|
Packit |
0680ba |
vshl.u32 a4, a0, #3; vshl.u32 b4, b0, #3; \
|
|
Packit |
0680ba |
veor a3, a2, a3; veor b3, b2, b3; \
|
|
Packit |
0680ba |
veor a3, a4, a3; veor b3, b4, b3; \
|
|
Packit |
0680ba |
veor a1, a0, a1; veor b1, b0, b1; \
|
|
Packit |
0680ba |
veor a1, a2, a1; veor b1, b2, b1; \
|
|
Packit |
0680ba |
vshr.u32 a4, a2, #3; vshr.u32 b4, b2, #3; \
|
|
Packit |
0680ba |
vshl.u32 a2, a2, #(32-3); vshl.u32 b2, b2, #(32-3); \
|
|
Packit |
0680ba |
veor a2, a2, a4; veor b2, b2, b4; \
|
|
Packit |
0680ba |
vshr.u32 a4, a0, #13; vshr.u32 b4, b0, #13; \
|
|
Packit |
0680ba |
vshl.u32 a0, a0, #(32-13); vshl.u32 b0, b0, #(32-13); \
|
|
Packit |
0680ba |
veor a0, a0, a4; veor b0, b0, b4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply a Serpent round to eight parallel blocks. This macro increments
|
|
Packit |
0680ba |
`round'. */
|
|
Packit |
0680ba |
#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
|
|
Packit |
0680ba |
b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_ENC (); \
|
|
Packit |
0680ba |
SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply the last Serpent round to eight parallel blocks. This macro increments
|
|
Packit |
0680ba |
`round'. */
|
|
Packit |
0680ba |
#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
|
|
Packit |
0680ba |
b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_ENC (); \
|
|
Packit |
0680ba |
SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply an inverse Serpent round to eight parallel blocks. This macro
|
|
Packit |
0680ba |
increments `round'. */
|
|
Packit |
0680ba |
#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
|
|
Packit |
0680ba |
na0, na1, na2, na3, na4, \
|
|
Packit |
0680ba |
b0, b1, b2, b3, b4, \
|
|
Packit |
0680ba |
nb0, nb1, nb2, nb3, nb4) \
|
|
Packit |
0680ba |
LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_DEC ();
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Apply the first inverse Serpent round to eight parallel blocks. This macro
|
|
Packit |
0680ba |
increments `round'. */
|
|
Packit |
0680ba |
#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
|
|
Packit |
0680ba |
na0, na1, na2, na3, na4, \
|
|
Packit |
0680ba |
b0, b1, b2, b3, b4, \
|
|
Packit |
0680ba |
nb0, nb1, nb2, nb3, nb4) \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_DEC (); \
|
|
Packit |
0680ba |
SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); \
|
|
Packit |
0680ba |
BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4); \
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_DEC ();
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.align 3
|
|
Packit |
0680ba |
.type __serpent_enc_blk8,%function;
|
|
Packit |
0680ba |
__serpent_enc_blk8:
|
|
Packit |
0680ba |
/* input:
|
|
Packit |
0680ba |
* r0: round key pointer
|
|
Packit |
0680ba |
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
|
|
Packit |
0680ba |
* blocks
|
|
Packit |
0680ba |
* output:
|
|
Packit |
0680ba |
* RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
|
|
Packit |
0680ba |
* ciphertext blocks
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
transpose_4x4(RA0, RA1, RA2, RA3);
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_ENC ();
|
|
Packit |
0680ba |
transpose_4x4(RB0, RB1, RB2, RB3);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
|
|
Packit |
0680ba |
RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
|
|
Packit |
0680ba |
ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
|
|
Packit |
0680ba |
RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
|
|
Packit |
0680ba |
ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
|
|
Packit |
0680ba |
RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
|
|
Packit |
0680ba |
ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
|
|
Packit |
0680ba |
RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
|
|
Packit |
0680ba |
ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
|
|
Packit |
0680ba |
RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
|
|
Packit |
0680ba |
ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
|
|
Packit |
0680ba |
RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
|
|
Packit |
0680ba |
ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
|
|
Packit |
0680ba |
RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
|
|
Packit |
0680ba |
ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
|
|
Packit |
0680ba |
RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
|
|
Packit |
0680ba |
ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
|
|
Packit |
0680ba |
RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
|
|
Packit |
0680ba |
ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
|
|
Packit |
0680ba |
RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
|
|
Packit |
0680ba |
ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
|
|
Packit |
0680ba |
RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
|
|
Packit |
0680ba |
ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
|
|
Packit |
0680ba |
RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
|
|
Packit |
0680ba |
ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
|
|
Packit |
0680ba |
RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
|
|
Packit |
0680ba |
ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
|
|
Packit |
0680ba |
RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
|
|
Packit |
0680ba |
ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
|
|
Packit |
0680ba |
RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
|
|
Packit |
0680ba |
ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
|
|
Packit |
0680ba |
RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
|
|
Packit |
0680ba |
ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
|
|
Packit |
0680ba |
RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
|
|
Packit |
0680ba |
ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
|
|
Packit |
0680ba |
RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
|
|
Packit |
0680ba |
ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
|
|
Packit |
0680ba |
RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
|
|
Packit |
0680ba |
ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
|
|
Packit |
0680ba |
RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
|
|
Packit |
0680ba |
ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
|
|
Packit |
0680ba |
RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
|
|
Packit |
0680ba |
ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
|
|
Packit |
0680ba |
RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
|
|
Packit |
0680ba |
ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
|
|
Packit |
0680ba |
RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
|
|
Packit |
0680ba |
ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
|
|
Packit |
0680ba |
RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
|
|
Packit |
0680ba |
ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
|
|
Packit |
0680ba |
RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
|
|
Packit |
0680ba |
ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
|
|
Packit |
0680ba |
RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
|
|
Packit |
0680ba |
ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
|
|
Packit |
0680ba |
RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
|
|
Packit |
0680ba |
ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
|
|
Packit |
0680ba |
RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
|
|
Packit |
0680ba |
ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
|
|
Packit |
0680ba |
RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
|
|
Packit |
0680ba |
ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
|
|
Packit |
0680ba |
RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
|
|
Packit |
0680ba |
ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
|
|
Packit |
0680ba |
RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
|
|
Packit |
0680ba |
ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
|
|
Packit |
0680ba |
RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
transpose_4x4(RA4, RA1, RA2, RA0);
|
|
Packit |
0680ba |
transpose_4x4(RB4, RB1, RB2, RB0);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
bx lr;
|
|
Packit |
0680ba |
.size __serpent_enc_blk8,.-__serpent_enc_blk8;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.align 3
|
|
Packit |
0680ba |
.type __serpent_dec_blk8,%function;
|
|
Packit |
0680ba |
__serpent_dec_blk8:
|
|
Packit |
0680ba |
/* input:
|
|
Packit |
0680ba |
* r0: round key pointer
|
|
Packit |
0680ba |
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
|
|
Packit |
0680ba |
* ciphertext blocks
|
|
Packit |
0680ba |
* output:
|
|
Packit |
0680ba |
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
|
|
Packit |
0680ba |
* blocks
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
add RROUND, RROUND, #(32*16);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
transpose_4x4(RA0, RA1, RA2, RA3);
|
|
Packit |
0680ba |
BLOCK_LOAD_KEY_DEC ();
|
|
Packit |
0680ba |
transpose_4x4(RB0, RB1, RB2, RB3);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
|
|
Packit |
0680ba |
RA3, RA0, RA1, RA4, RA2,
|
|
Packit |
0680ba |
RB0, RB1, RB2, RB3, RB4,
|
|
Packit |
0680ba |
RB3, RB0, RB1, RB4, RB2);
|
|
Packit |
0680ba |
ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
|
|
Packit |
0680ba |
RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
|
|
Packit |
0680ba |
RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
|
|
Packit |
0680ba |
RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
|
|
Packit |
0680ba |
RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
|
|
Packit |
0680ba |
RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
|
|
Packit |
0680ba |
RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
|
|
Packit |
0680ba |
RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
|
|
Packit |
0680ba |
RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
|
|
Packit |
0680ba |
ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
|
|
Packit |
0680ba |
RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
|
|
Packit |
0680ba |
RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
|
|
Packit |
0680ba |
RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
|
|
Packit |
0680ba |
RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
|
|
Packit |
0680ba |
RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
|
|
Packit |
0680ba |
RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
|
|
Packit |
0680ba |
RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
|
|
Packit |
0680ba |
RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
|
|
Packit |
0680ba |
ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
|
|
Packit |
0680ba |
RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
|
|
Packit |
0680ba |
RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
|
|
Packit |
0680ba |
RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
|
|
Packit |
0680ba |
RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
|
|
Packit |
0680ba |
RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
|
|
Packit |
0680ba |
RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
|
|
Packit |
0680ba |
ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
|
|
Packit |
0680ba |
RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
|
|
Packit |
0680ba |
RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
|
|
Packit |
0680ba |
ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
|
|
Packit |
0680ba |
RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
|
|
Packit |
0680ba |
RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
|
|
Packit |
0680ba |
ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
|
|
Packit |
0680ba |
RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
|
|
Packit |
0680ba |
RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
|
|
Packit |
0680ba |
RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
|
|
Packit |
0680ba |
ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
|
|
Packit |
0680ba |
RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
|
|
Packit |
0680ba |
ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
|
|
Packit |
0680ba |
RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
transpose_4x4(RA0, RA1, RA2, RA3);
|
|
Packit |
0680ba |
transpose_4x4(RB0, RB1, RB2, RB3);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
bx lr;
|
|
Packit |
0680ba |
.size __serpent_dec_blk8,.-__serpent_dec_blk8;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.align 3
|
|
Packit |
0680ba |
.globl _gcry_serpent_neon_ctr_enc
|
|
Packit |
0680ba |
.type _gcry_serpent_neon_ctr_enc,%function;
|
|
Packit |
0680ba |
_gcry_serpent_neon_ctr_enc:
|
|
Packit |
0680ba |
/* input:
|
|
Packit |
0680ba |
* r0: ctx, CTX
|
|
Packit |
0680ba |
* r1: dst (8 blocks)
|
|
Packit |
0680ba |
* r2: src (8 blocks)
|
|
Packit |
0680ba |
* r3: iv
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vmov.u8 RT1d0, #0xff; /* u64: -1 */
|
|
Packit |
0680ba |
push {r4,lr};
|
|
Packit |
0680ba |
vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */
|
|
Packit |
0680ba |
vpush {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* load IV and byteswap */
|
|
Packit |
0680ba |
vld1.8 {RA0}, [r3];
|
|
Packit |
0680ba |
vrev64.u8 RT0, RA0; /* be => le */
|
|
Packit |
0680ba |
ldr r4, [r3, #8];
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* construct IVs */
|
|
Packit |
0680ba |
vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */
|
|
Packit |
0680ba |
vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */
|
|
Packit |
0680ba |
cmp r4, #-1;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */
|
|
Packit |
0680ba |
vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */
|
|
Packit |
0680ba |
ldr r4, [r3, #12];
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */
|
|
Packit |
0680ba |
vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */
|
|
Packit |
0680ba |
vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vmov RA1d0, RT0d0;
|
|
Packit |
0680ba |
vmov RA2d0, RT0d0;
|
|
Packit |
0680ba |
vmov RA3d0, RT0d0;
|
|
Packit |
0680ba |
vmov RB0d0, RT0d0;
|
|
Packit |
0680ba |
rev r4, r4;
|
|
Packit |
0680ba |
vmov RB1d0, RT0d0;
|
|
Packit |
0680ba |
vmov RB2d0, RT0d0;
|
|
Packit |
0680ba |
vmov RB3d0, RT0d0;
|
|
Packit |
0680ba |
vmov RT2d0, RT0d0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* check need for handling 64-bit overflow and carry */
|
|
Packit |
0680ba |
beq .Ldo_ctr_carry;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.Lctr_carry_done:
|
|
Packit |
0680ba |
/* le => be */
|
|
Packit |
0680ba |
vrev64.u8 RA1, RA1;
|
|
Packit |
0680ba |
vrev64.u8 RA2, RA2;
|
|
Packit |
0680ba |
vrev64.u8 RA3, RA3;
|
|
Packit |
0680ba |
vrev64.u8 RB0, RB0;
|
|
Packit |
0680ba |
vrev64.u8 RT2, RT2;
|
|
Packit |
0680ba |
vrev64.u8 RB1, RB1;
|
|
Packit |
0680ba |
vrev64.u8 RB2, RB2;
|
|
Packit |
0680ba |
vrev64.u8 RB3, RB3;
|
|
Packit |
0680ba |
/* store new IV */
|
|
Packit |
0680ba |
vst1.8 {RT2}, [r3];
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
bl __serpent_enc_blk8;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RA4, RA4, RT0;
|
|
Packit |
0680ba |
veor RA1, RA1, RT1;
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
veor RA2, RA2, RT2;
|
|
Packit |
0680ba |
veor RA0, RA0, RT3;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RB4, RB4, RT0;
|
|
Packit |
0680ba |
veor RT0, RT0;
|
|
Packit |
0680ba |
veor RB1, RB1, RT1;
|
|
Packit |
0680ba |
veor RT1, RT1;
|
|
Packit |
0680ba |
veor RB2, RB2, RT2;
|
|
Packit |
0680ba |
veor RT2, RT2;
|
|
Packit |
0680ba |
veor RB0, RB0, RT3;
|
|
Packit |
0680ba |
veor RT3, RT3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vst1.8 {RA4}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RA1}, [r1]!;
|
|
Packit |
0680ba |
veor RA1, RA1;
|
|
Packit |
0680ba |
vst1.8 {RA2}, [r1]!;
|
|
Packit |
0680ba |
veor RA2, RA2;
|
|
Packit |
0680ba |
vst1.8 {RA0}, [r1]!;
|
|
Packit |
0680ba |
veor RA0, RA0;
|
|
Packit |
0680ba |
vst1.8 {RB4}, [r1]!;
|
|
Packit |
0680ba |
veor RB4, RB4;
|
|
Packit |
0680ba |
vst1.8 {RB1}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RB2}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RB0}, [r1]!;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vpop {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* clear the used registers */
|
|
Packit |
0680ba |
veor RA3, RA3;
|
|
Packit |
0680ba |
veor RB3, RB3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
pop {r4,pc};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.Ldo_ctr_carry:
|
|
Packit |
0680ba |
cmp r4, #-8;
|
|
Packit |
0680ba |
blo .Lctr_carry_done;
|
|
Packit |
0680ba |
beq .Lcarry_RT2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
cmp r4, #-6;
|
|
Packit |
0680ba |
blo .Lcarry_RB3;
|
|
Packit |
0680ba |
beq .Lcarry_RB2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
cmp r4, #-4;
|
|
Packit |
0680ba |
blo .Lcarry_RB1;
|
|
Packit |
0680ba |
beq .Lcarry_RB0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
cmp r4, #-2;
|
|
Packit |
0680ba |
blo .Lcarry_RA3;
|
|
Packit |
0680ba |
beq .Lcarry_RA2;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vsub.u64 RA1d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RA2:
|
|
Packit |
0680ba |
vsub.u64 RA2d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RA3:
|
|
Packit |
0680ba |
vsub.u64 RA3d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RB0:
|
|
Packit |
0680ba |
vsub.u64 RB0d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RB1:
|
|
Packit |
0680ba |
vsub.u64 RB1d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RB2:
|
|
Packit |
0680ba |
vsub.u64 RB2d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RB3:
|
|
Packit |
0680ba |
vsub.u64 RB3d0, RT1d0;
|
|
Packit |
0680ba |
.Lcarry_RT2:
|
|
Packit |
0680ba |
vsub.u64 RT2d0, RT1d0;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
b .Lctr_carry_done;
|
|
Packit |
0680ba |
.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.align 3
|
|
Packit |
0680ba |
.globl _gcry_serpent_neon_cfb_dec
|
|
Packit |
0680ba |
.type _gcry_serpent_neon_cfb_dec,%function;
|
|
Packit |
0680ba |
_gcry_serpent_neon_cfb_dec:
|
|
Packit |
0680ba |
/* input:
|
|
Packit |
0680ba |
* r0: ctx, CTX
|
|
Packit |
0680ba |
* r1: dst (8 blocks)
|
|
Packit |
0680ba |
* r2: src (8 blocks)
|
|
Packit |
0680ba |
* r3: iv
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
push {lr};
|
|
Packit |
0680ba |
vpush {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Load input */
|
|
Packit |
0680ba |
vld1.8 {RA0}, [r3];
|
|
Packit |
0680ba |
vld1.8 {RA1, RA2}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RA3}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RB0}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RB1, RB2}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RB3}, [r2]!;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* Update IV */
|
|
Packit |
0680ba |
vld1.8 {RT0}, [r2]!;
|
|
Packit |
0680ba |
vst1.8 {RT0}, [r3];
|
|
Packit |
0680ba |
mov r3, lr;
|
|
Packit |
0680ba |
sub r2, r2, #(8*16);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
bl __serpent_enc_blk8;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RA4, RA4, RT0;
|
|
Packit |
0680ba |
veor RA1, RA1, RT1;
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
veor RA2, RA2, RT2;
|
|
Packit |
0680ba |
veor RA0, RA0, RT3;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RB4, RB4, RT0;
|
|
Packit |
0680ba |
veor RT0, RT0;
|
|
Packit |
0680ba |
veor RB1, RB1, RT1;
|
|
Packit |
0680ba |
veor RT1, RT1;
|
|
Packit |
0680ba |
veor RB2, RB2, RT2;
|
|
Packit |
0680ba |
veor RT2, RT2;
|
|
Packit |
0680ba |
veor RB0, RB0, RT3;
|
|
Packit |
0680ba |
veor RT3, RT3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vst1.8 {RA4}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RA1}, [r1]!;
|
|
Packit |
0680ba |
veor RA1, RA1;
|
|
Packit |
0680ba |
vst1.8 {RA2}, [r1]!;
|
|
Packit |
0680ba |
veor RA2, RA2;
|
|
Packit |
0680ba |
vst1.8 {RA0}, [r1]!;
|
|
Packit |
0680ba |
veor RA0, RA0;
|
|
Packit |
0680ba |
vst1.8 {RB4}, [r1]!;
|
|
Packit |
0680ba |
veor RB4, RB4;
|
|
Packit |
0680ba |
vst1.8 {RB1}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RB2}, [r1]!;
|
|
Packit |
0680ba |
vst1.8 {RB0}, [r1]!;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vpop {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* clear the used registers */
|
|
Packit |
0680ba |
veor RA3, RA3;
|
|
Packit |
0680ba |
veor RB3, RB3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
pop {pc};
|
|
Packit |
0680ba |
.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
.align 3
|
|
Packit |
0680ba |
.globl _gcry_serpent_neon_cbc_dec
|
|
Packit |
0680ba |
.type _gcry_serpent_neon_cbc_dec,%function;
|
|
Packit |
0680ba |
_gcry_serpent_neon_cbc_dec:
|
|
Packit |
0680ba |
/* input:
|
|
Packit |
0680ba |
* r0: ctx, CTX
|
|
Packit |
0680ba |
* r1: dst (8 blocks)
|
|
Packit |
0680ba |
* r2: src (8 blocks)
|
|
Packit |
0680ba |
* r3: iv
|
|
Packit |
0680ba |
*/
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
push {lr};
|
|
Packit |
0680ba |
vpush {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vld1.8 {RA0, RA1}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RA2, RA3}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RB0, RB1}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RB2, RB3}, [r2]!;
|
|
Packit |
0680ba |
sub r2, r2, #(8*16);
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
bl __serpent_dec_blk8;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vld1.8 {RB4}, [r3];
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RA0, RA0, RB4;
|
|
Packit |
0680ba |
veor RA1, RA1, RT0;
|
|
Packit |
0680ba |
veor RA2, RA2, RT1;
|
|
Packit |
0680ba |
vld1.8 {RT0, RT1}, [r2]!;
|
|
Packit |
0680ba |
veor RA3, RA3, RT2;
|
|
Packit |
0680ba |
veor RB0, RB0, RT3;
|
|
Packit |
0680ba |
vld1.8 {RT2, RT3}, [r2]!;
|
|
Packit |
0680ba |
veor RB1, RB1, RT0;
|
|
Packit |
0680ba |
veor RT0, RT0;
|
|
Packit |
0680ba |
veor RB2, RB2, RT1;
|
|
Packit |
0680ba |
veor RT1, RT1;
|
|
Packit |
0680ba |
veor RB3, RB3, RT2;
|
|
Packit |
0680ba |
veor RT2, RT2;
|
|
Packit |
0680ba |
vst1.8 {RT3}, [r3]; /* store new IV */
|
|
Packit |
0680ba |
veor RT3, RT3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vst1.8 {RA0, RA1}, [r1]!;
|
|
Packit |
0680ba |
veor RA0, RA0;
|
|
Packit |
0680ba |
veor RA1, RA1;
|
|
Packit |
0680ba |
vst1.8 {RA2, RA3}, [r1]!;
|
|
Packit |
0680ba |
veor RA2, RA2;
|
|
Packit |
0680ba |
vst1.8 {RB0, RB1}, [r1]!;
|
|
Packit |
0680ba |
veor RA3, RA3;
|
|
Packit |
0680ba |
vst1.8 {RB2, RB3}, [r1]!;
|
|
Packit |
0680ba |
veor RB3, RB3;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
vpop {RA4-RB2};
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
/* clear the used registers */
|
|
Packit |
0680ba |
veor RB4, RB4;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
pop {pc};
|
|
Packit |
0680ba |
.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec;
|
|
Packit |
0680ba |
|
|
Packit |
0680ba |
#endif
|