Blame cipher/twofish-arm.S

Packit 0680ba
/* twofish-arm.S  -  ARM assembly implementation of Twofish cipher
Packit 0680ba
 *
Packit 0680ba
 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
Packit 0680ba
 *
Packit 0680ba
 * This file is part of Libgcrypt.
Packit 0680ba
 *
Packit 0680ba
 * Libgcrypt is free software; you can redistribute it and/or modify
Packit 0680ba
 * it under the terms of the GNU Lesser General Public License as
Packit 0680ba
 * published by the Free Software Foundation; either version 2.1 of
Packit 0680ba
 * the License, or (at your option) any later version.
Packit 0680ba
 *
Packit 0680ba
 * Libgcrypt is distributed in the hope that it will be useful,
Packit 0680ba
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 0680ba
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 0680ba
 * GNU Lesser General Public License for more details.
Packit 0680ba
 *
Packit 0680ba
 * You should have received a copy of the GNU Lesser General Public
Packit 0680ba
 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
Packit 0680ba
 */
Packit 0680ba
Packit 0680ba
#include <config.h>
Packit 0680ba
Packit 0680ba
#if defined(__ARMEL__)
Packit 0680ba
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
Packit 0680ba
Packit 0680ba
.text
Packit 0680ba
Packit 0680ba
.syntax unified
Packit 0680ba
.arm
Packit 0680ba
Packit 0680ba
/* structure of TWOFISH_context: */
Packit 0680ba
#define s0 0
Packit 0680ba
#define s1 ((s0) + 4 * 256)
Packit 0680ba
#define s2 ((s1) + 4 * 256)
Packit 0680ba
#define s3 ((s2) + 4 * 256)
Packit 0680ba
#define w  ((s3) + 4 * 256)
Packit 0680ba
#define k  ((w) + 4 * 8)
Packit 0680ba
Packit 0680ba
/* register macros */
Packit 0680ba
#define CTX %r0
Packit 0680ba
#define CTXs0 %r0
Packit 0680ba
#define CTXs1 %r1
Packit 0680ba
#define CTXs3 %r7
Packit 0680ba
Packit 0680ba
#define RA %r3
Packit 0680ba
#define RB %r4
Packit 0680ba
#define RC %r5
Packit 0680ba
#define RD %r6
Packit 0680ba
Packit 0680ba
#define RX %r2
Packit 0680ba
#define RY %ip
Packit 0680ba
Packit 0680ba
#define RMASK %lr
Packit 0680ba
Packit 0680ba
#define RT0 %r8
Packit 0680ba
#define RT1 %r9
Packit 0680ba
#define RT2 %r10
Packit 0680ba
#define RT3 %r11
Packit 0680ba
Packit 0680ba
/* helper macros */
Packit 0680ba
#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
Packit 0680ba
	ldrb rout, [rsrc, #((offs) + 0)]; \
Packit 0680ba
	ldrb rtmp, [rsrc, #((offs) + 1)]; \
Packit 0680ba
	orr rout, rout, rtmp, lsl #8; \
Packit 0680ba
	ldrb rtmp, [rsrc, #((offs) + 2)]; \
Packit 0680ba
	orr rout, rout, rtmp, lsl #16; \
Packit 0680ba
	ldrb rtmp, [rsrc, #((offs) + 3)]; \
Packit 0680ba
	orr rout, rout, rtmp, lsl #24;
Packit 0680ba
Packit 0680ba
#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
Packit 0680ba
	mov rtmp0, rin, lsr #8; \
Packit 0680ba
	strb rin, [rdst, #((offs) + 0)]; \
Packit 0680ba
	mov rtmp1, rin, lsr #16; \
Packit 0680ba
	strb rtmp0, [rdst, #((offs) + 1)]; \
Packit 0680ba
	mov rtmp0, rin, lsr #24; \
Packit 0680ba
	strb rtmp1, [rdst, #((offs) + 2)]; \
Packit 0680ba
	strb rtmp0, [rdst, #((offs) + 3)];
Packit 0680ba
Packit 0680ba
#ifndef __ARMEL__
Packit 0680ba
	/* bswap on big-endian */
Packit 0680ba
	#define host_to_le(reg) \
Packit 0680ba
		rev reg, reg;
Packit 0680ba
	#define le_to_host(reg) \
Packit 0680ba
		rev reg, reg;
Packit 0680ba
#else
Packit 0680ba
	/* nop on little-endian */
Packit 0680ba
	#define host_to_le(reg) /*_*/
Packit 0680ba
	#define le_to_host(reg) /*_*/
Packit 0680ba
#endif
Packit 0680ba
Packit 0680ba
#define ldr_input_aligned_le(rin, a, b, c, d) \
Packit 0680ba
	ldr a, [rin, #0]; \
Packit 0680ba
	ldr b, [rin, #4]; \
Packit 0680ba
	le_to_host(a); \
Packit 0680ba
	ldr c, [rin, #8]; \
Packit 0680ba
	le_to_host(b); \
Packit 0680ba
	ldr d, [rin, #12]; \
Packit 0680ba
	le_to_host(c); \
Packit 0680ba
	le_to_host(d);
Packit 0680ba
Packit 0680ba
#define str_output_aligned_le(rout, a, b, c, d) \
Packit 0680ba
	le_to_host(a); \
Packit 0680ba
	le_to_host(b); \
Packit 0680ba
	str a, [rout, #0]; \
Packit 0680ba
	le_to_host(c); \
Packit 0680ba
	str b, [rout, #4]; \
Packit 0680ba
	le_to_host(d); \
Packit 0680ba
	str c, [rout, #8]; \
Packit 0680ba
	str d, [rout, #12];
Packit 0680ba
Packit 0680ba
#ifdef __ARM_FEATURE_UNALIGNED
Packit 0680ba
	/* unaligned word reads/writes allowed */
Packit 0680ba
	#define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
Packit 0680ba
		ldr_input_aligned_le(rin, ra, rb, rc, rd)
Packit 0680ba
Packit 0680ba
	#define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
Packit 0680ba
		str_output_aligned_le(rout, ra, rb, rc, rd)
Packit 0680ba
#else
Packit 0680ba
	/* need to handle unaligned reads/writes by byte reads */
Packit 0680ba
	#define ldr_input_le(rin, ra, rb, rc, rd, rtmp0) \
Packit 0680ba
		tst rin, #3; \
Packit 0680ba
		beq 1f; \
Packit 0680ba
			ldr_unaligned_le(ra, rin, 0, rtmp0); \
Packit 0680ba
			ldr_unaligned_le(rb, rin, 4, rtmp0); \
Packit 0680ba
			ldr_unaligned_le(rc, rin, 8, rtmp0); \
Packit 0680ba
			ldr_unaligned_le(rd, rin, 12, rtmp0); \
Packit 0680ba
			b 2f; \
Packit 0680ba
		1:;\
Packit 0680ba
			ldr_input_aligned_le(rin, ra, rb, rc, rd); \
Packit 0680ba
		2:;
Packit 0680ba
Packit 0680ba
	#define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
Packit 0680ba
		tst rout, #3; \
Packit 0680ba
		beq 1f; \
Packit 0680ba
			str_unaligned_le(ra, rout, 0, rtmp0, rtmp1); \
Packit 0680ba
			str_unaligned_le(rb, rout, 4, rtmp0, rtmp1); \
Packit 0680ba
			str_unaligned_le(rc, rout, 8, rtmp0, rtmp1); \
Packit 0680ba
			str_unaligned_le(rd, rout, 12, rtmp0, rtmp1); \
Packit 0680ba
			b 2f; \
Packit 0680ba
		1:;\
Packit 0680ba
			str_output_aligned_le(rout, ra, rb, rc, rd); \
Packit 0680ba
		2:;
Packit 0680ba
#endif
Packit 0680ba
Packit 0680ba
/**********************************************************************
Packit 0680ba
  1-way twofish
Packit 0680ba
 **********************************************************************/
Packit 0680ba
#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
Packit 0680ba
	and RT0, RMASK, b, lsr#(8 - 2); \
Packit 0680ba
	and RY, RMASK, b, lsr#(16 - 2); \
Packit 0680ba
	add RT0, RT0, #(s2 - s1); \
Packit 0680ba
	and RT1, RMASK, b, lsr#(24 - 2); \
Packit 0680ba
	ldr RY, [CTXs3, RY]; \
Packit 0680ba
	and RT2, RMASK, b, lsl#(2); \
Packit 0680ba
	ldr RT0, [CTXs1, RT0]; \
Packit 0680ba
	and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
Packit 0680ba
	ldr RT1, [CTXs0, RT1]; \
Packit 0680ba
	and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
Packit 0680ba
	ldr RT2, [CTXs1, RT2]; \
Packit 0680ba
	add RT3, RT3, #(s2 - s1); \
Packit 0680ba
	ldr RX, [CTXs1, RX]; \
Packit 0680ba
	ror_a(a); \
Packit 0680ba
	\
Packit 0680ba
	eor RY, RY, RT0; \
Packit 0680ba
	ldr RT3, [CTXs1, RT3]; \
Packit 0680ba
	and RT0, RMASK, a, lsl#(2); \
Packit 0680ba
	eor RY, RY, RT1; \
Packit 0680ba
	and RT1, RMASK, a, lsr#(24 - 2); \
Packit 0680ba
	eor RY, RY, RT2; \
Packit 0680ba
	ldr RT0, [CTXs0, RT0]; \
Packit 0680ba
	eor RX, RX, RT3; \
Packit 0680ba
	ldr RT1, [CTXs3, RT1]; \
Packit 0680ba
	eor RX, RX, RT0; \
Packit 0680ba
	\
Packit 0680ba
	ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
Packit 0680ba
	eor RX, RX, RT1; \
Packit 0680ba
	ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
Packit 0680ba
	\
Packit 0680ba
	add RT0, RX, RY, lsl #1; \
Packit 0680ba
	add RX, RX, RY; \
Packit 0680ba
	add RT0, RT0, RT3; \
Packit 0680ba
	add RX, RX, RT2; \
Packit 0680ba
	eor rd, RT0, rd, ror #31; \
Packit 0680ba
	eor rc, rc, RX;
Packit 0680ba
Packit 0680ba
#define dummy(x) /*_*/
Packit 0680ba
Packit 0680ba
#define ror1(r) \
Packit 0680ba
	ror r, r, #1;
Packit 0680ba
Packit 0680ba
#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
Packit 0680ba
	and RT3, RMASK, b, lsl#(2 - (adj_b)); \
Packit 0680ba
	and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
Packit 0680ba
	ror_b(b); \
Packit 0680ba
	and RT2, RMASK, a, lsl#(2); \
Packit 0680ba
	and RT0, RMASK, a, lsr#(8 - 2); \
Packit 0680ba
	\
Packit 0680ba
	ldr RY, [CTXs1, RT3]; \
Packit 0680ba
	add RT1, RT1, #(s2 - s1); \
Packit 0680ba
	ldr RX, [CTXs0, RT2]; \
Packit 0680ba
	and RT3, RMASK, b, lsr#(16 - 2); \
Packit 0680ba
	ldr RT1, [CTXs1, RT1]; \
Packit 0680ba
	and RT2, RMASK, a, lsr#(16 - 2); \
Packit 0680ba
	ldr RT0, [CTXs1, RT0]; \
Packit 0680ba
	\
Packit 0680ba
	add RT2, RT2, #(s2 - s1); \
Packit 0680ba
	ldr RT3, [CTXs3, RT3]; \
Packit 0680ba
	eor RY, RY, RT1; \
Packit 0680ba
	\
Packit 0680ba
	and RT1, RMASK, b, lsr#(24 - 2); \
Packit 0680ba
	eor RX, RX, RT0; \
Packit 0680ba
	ldr RT2, [CTXs1, RT2]; \
Packit 0680ba
	and RT0, RMASK, a, lsr#(24 - 2); \
Packit 0680ba
	\
Packit 0680ba
	ldr RT1, [CTXs0, RT1]; \
Packit 0680ba
	\
Packit 0680ba
	eor RY, RY, RT3; \
Packit 0680ba
	ldr RT0, [CTXs3, RT0]; \
Packit 0680ba
	eor RX, RX, RT2; \
Packit 0680ba
	eor RY, RY, RT1; \
Packit 0680ba
	\
Packit 0680ba
	ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
Packit 0680ba
	eor RX, RX, RT0; \
Packit 0680ba
	ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
Packit 0680ba
	\
Packit 0680ba
	add RT0, RX, RY, lsl #1; \
Packit 0680ba
	add RX, RX, RY; \
Packit 0680ba
	add RT0, RT0, RT1; \
Packit 0680ba
	add RX, RX, RT2; \
Packit 0680ba
	eor rd, rd, RT0; \
Packit 0680ba
	eor rc, RX, rc, ror #31;
Packit 0680ba
Packit 0680ba
#define first_encrypt_cycle(nc) \
Packit 0680ba
	encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
Packit 0680ba
	encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
Packit 0680ba
Packit 0680ba
#define encrypt_cycle(nc) \
Packit 0680ba
	encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
Packit 0680ba
	encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
Packit 0680ba
Packit 0680ba
#define last_encrypt_cycle(nc) \
Packit 0680ba
	encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
Packit 0680ba
	encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
Packit 0680ba
	ror1(RA);
Packit 0680ba
Packit 0680ba
#define first_decrypt_cycle(nc) \
Packit 0680ba
	decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
Packit 0680ba
	decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
Packit 0680ba
Packit 0680ba
#define decrypt_cycle(nc) \
Packit 0680ba
	decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
Packit 0680ba
	decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
Packit 0680ba
Packit 0680ba
#define last_decrypt_cycle(nc) \
Packit 0680ba
	decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
Packit 0680ba
	decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
Packit 0680ba
	ror1(RD);
Packit 0680ba
Packit 0680ba
.align 3
Packit 0680ba
.globl _gcry_twofish_arm_encrypt_block
Packit 0680ba
.type   _gcry_twofish_arm_encrypt_block,%function;
Packit 0680ba
Packit 0680ba
_gcry_twofish_arm_encrypt_block:
Packit 0680ba
	/* input:
Packit 0680ba
	 *	%r0: ctx
Packit 0680ba
	 *	%r1: dst
Packit 0680ba
	 *	%r2: src
Packit 0680ba
	 */
Packit 0680ba
	push {%r1, %r4-%r11, %ip, %lr};
Packit 0680ba
Packit 0680ba
	add RY, CTXs0, #w;
Packit 0680ba
Packit 0680ba
	ldr_input_le(%r2, RA, RB, RC, RD, RT0);
Packit 0680ba
Packit 0680ba
	/* Input whitening */
Packit 0680ba
	ldm RY, {RT0, RT1, RT2, RT3};
Packit 0680ba
	add CTXs3, CTXs0, #(s3 - s0);
Packit 0680ba
	add CTXs1, CTXs0, #(s1 - s0);
Packit 0680ba
	mov RMASK, #(0xff << 2);
Packit 0680ba
	eor RA, RA, RT0;
Packit 0680ba
	eor RB, RB, RT1;
Packit 0680ba
	eor RC, RC, RT2;
Packit 0680ba
	eor RD, RD, RT3;
Packit 0680ba
Packit 0680ba
	first_encrypt_cycle(0);
Packit 0680ba
	encrypt_cycle(1);
Packit 0680ba
	encrypt_cycle(2);
Packit 0680ba
	encrypt_cycle(3);
Packit 0680ba
	encrypt_cycle(4);
Packit 0680ba
	encrypt_cycle(5);
Packit 0680ba
	encrypt_cycle(6);
Packit 0680ba
	last_encrypt_cycle(7);
Packit 0680ba
Packit 0680ba
	add RY, CTXs3, #(w + 4*4 - s3);
Packit 0680ba
	pop {%r1}; /* dst */
Packit 0680ba
Packit 0680ba
	/* Output whitening */
Packit 0680ba
	ldm RY, {RT0, RT1, RT2, RT3};
Packit 0680ba
	eor RC, RC, RT0;
Packit 0680ba
	eor RD, RD, RT1;
Packit 0680ba
	eor RA, RA, RT2;
Packit 0680ba
	eor RB, RB, RT3;
Packit 0680ba
Packit 0680ba
	str_output_le(%r1, RC, RD, RA, RB, RT0, RT1);
Packit 0680ba
Packit 0680ba
	pop {%r4-%r11, %ip, %pc};
Packit 0680ba
.ltorg
Packit 0680ba
.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
Packit 0680ba
Packit 0680ba
.align 3
Packit 0680ba
.globl _gcry_twofish_arm_decrypt_block
Packit 0680ba
.type   _gcry_twofish_arm_decrypt_block,%function;
Packit 0680ba
Packit 0680ba
_gcry_twofish_arm_decrypt_block:
Packit 0680ba
	/* input:
Packit 0680ba
	 *	%r0: ctx
Packit 0680ba
	 *	%r1: dst
Packit 0680ba
	 *	%r2: src
Packit 0680ba
	 */
Packit 0680ba
	push {%r1, %r4-%r11, %ip, %lr};
Packit 0680ba
Packit 0680ba
	add CTXs3, CTXs0, #(s3 - s0);
Packit 0680ba
Packit 0680ba
	ldr_input_le(%r2, RC, RD, RA, RB, RT0);
Packit 0680ba
Packit 0680ba
	add RY, CTXs3, #(w + 4*4 - s3);
Packit 0680ba
	add CTXs3, CTXs0, #(s3 - s0);
Packit 0680ba
Packit 0680ba
	/* Input whitening */
Packit 0680ba
	ldm RY, {RT0, RT1, RT2, RT3};
Packit 0680ba
	add CTXs1, CTXs0, #(s1 - s0);
Packit 0680ba
	mov RMASK, #(0xff << 2);
Packit 0680ba
	eor RC, RC, RT0;
Packit 0680ba
	eor RD, RD, RT1;
Packit 0680ba
	eor RA, RA, RT2;
Packit 0680ba
	eor RB, RB, RT3;
Packit 0680ba
Packit 0680ba
	first_decrypt_cycle(7);
Packit 0680ba
	decrypt_cycle(6);
Packit 0680ba
	decrypt_cycle(5);
Packit 0680ba
	decrypt_cycle(4);
Packit 0680ba
	decrypt_cycle(3);
Packit 0680ba
	decrypt_cycle(2);
Packit 0680ba
	decrypt_cycle(1);
Packit 0680ba
	last_decrypt_cycle(0);
Packit 0680ba
Packit 0680ba
	add RY, CTXs0, #w;
Packit 0680ba
	pop {%r1}; /* dst */
Packit 0680ba
Packit 0680ba
	/* Output whitening */
Packit 0680ba
	ldm RY, {RT0, RT1, RT2, RT3};
Packit 0680ba
	eor RA, RA, RT0;
Packit 0680ba
	eor RB, RB, RT1;
Packit 0680ba
	eor RC, RC, RT2;
Packit 0680ba
	eor RD, RD, RT3;
Packit 0680ba
Packit 0680ba
	str_output_le(%r1, RA, RB, RC, RD, RT0, RT1);
Packit 0680ba
Packit 0680ba
	pop {%r4-%r11, %ip, %pc};
Packit 0680ba
.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
Packit 0680ba
Packit 0680ba
#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
Packit 0680ba
#endif /*__ARMEL__*/