Blame crypto/aes/asm/vpaes-ppc.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
Packit c4476c
######################################################################
Packit c4476c
## Constant-time SSSE3 AES core implementation.
Packit c4476c
## version 0.1
Packit c4476c
##
Packit c4476c
## By Mike Hamburg (Stanford University), 2009
Packit c4476c
## Public domain.
Packit c4476c
##
Packit c4476c
## For details see http://shiftleft.org/papers/vector_aes/ and
Packit c4476c
## http://crypto.stanford.edu/vpaes/.
Packit c4476c
Packit c4476c
# CBC encrypt/decrypt performance in cycles per byte processed with
Packit c4476c
# 128-bit key.
Packit c4476c
#
Packit c4476c
#		aes-ppc.pl		this
Packit c4476c
# PPC74x0/G4e	35.5/52.1/(23.8)	11.9(*)/15.4
Packit c4476c
# PPC970/G5	37.9/55.0/(28.5)	22.2/28.5
Packit c4476c
# POWER6	42.7/54.3/(28.2)	63.0/92.8(**)
Packit c4476c
# POWER7	32.3/42.9/(18.4)	18.5/23.3
Packit c4476c
#
Packit c4476c
# (*)	This is ~10% worse than reported in paper. The reason is
Packit c4476c
#	twofold. This module doesn't make any assumption about
Packit c4476c
#	key schedule (or data for that matter) alignment and handles
Packit c4476c
#	it in-line. Secondly it, being transliterated from
Packit c4476c
#	vpaes-x86_64.pl, relies on "nested inversion" better suited
Packit c4476c
#	for Intel CPUs.
Packit c4476c
# (**)	Inadequate POWER6 performance is due to astronomic AltiVec
Packit c4476c
#	latency, 9 cycles per simple logical operation.
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
Packit c4476c
if ($flavour =~ /64/) {
Packit c4476c
	$SIZE_T	=8;
Packit c4476c
	$LRSAVE	=2*$SIZE_T;
Packit c4476c
	$STU	="stdu";
Packit c4476c
	$POP	="ld";
Packit c4476c
	$PUSH	="std";
Packit c4476c
	$UCMP	="cmpld";
Packit c4476c
} elsif ($flavour =~ /32/) {
Packit c4476c
	$SIZE_T	=4;
Packit c4476c
	$LRSAVE	=$SIZE_T;
Packit c4476c
	$STU	="stwu";
Packit c4476c
	$POP	="lwz";
Packit c4476c
	$PUSH	="stw";
Packit c4476c
	$UCMP	="cmplw";
Packit c4476c
} else { die "nonsense $flavour"; }
Packit c4476c
Packit c4476c
$sp="r1";
Packit c4476c
$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload
Packit c4476c
Packit c4476c
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Packit c4476c
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
Packit c4476c
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
Packit c4476c
die "can't locate ppc-xlate.pl";
Packit c4476c
Packit c4476c
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.machine	"any"
Packit c4476c
Packit c4476c
.text
Packit c4476c
Packit c4476c
.align	7	# totally strategic alignment
Packit c4476c
_vpaes_consts:
Packit c4476c
Lk_mc_forward:	# mc_forward
Packit c4476c
	.long	0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c	?inv
Packit c4476c
	.long	0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300	?inv
Packit c4476c
	.long	0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704	?inv
Packit c4476c
	.long	0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08	?inv
Packit c4476c
Lk_mc_backward:	# mc_backward
Packit c4476c
	.long	0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e	?inv
Packit c4476c
	.long	0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a	?inv
Packit c4476c
	.long	0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506	?inv
Packit c4476c
	.long	0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102	?inv
Packit c4476c
Lk_sr:		# sr
Packit c4476c
	.long	0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f	?inv
Packit c4476c
	.long	0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b	?inv
Packit c4476c
	.long	0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07	?inv
Packit c4476c
	.long	0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603	?inv
Packit c4476c
Packit c4476c
##
Packit c4476c
## "Hot" constants
Packit c4476c
##
Packit c4476c
Lk_inv:		# inv, inva
Packit c4476c
	.long	0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704	?rev
Packit c4476c
	.long	0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03	?rev
Packit c4476c
Lk_ipt:		# input transform (lo, hi)
Packit c4476c
	.long	0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca	?rev
Packit c4476c
	.long	0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd	?rev
Packit c4476c
Lk_sbo:		# sbou, sbot
Packit c4476c
	.long	0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15	?rev
Packit c4476c
	.long	0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e	?rev
Packit c4476c
Lk_sb1:		# sb1u, sb1t
Packit c4476c
	.long	0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b	?rev
Packit c4476c
	.long	0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5	?rev
Packit c4476c
Lk_sb2:		# sb2u, sb2t
Packit c4476c
	.long	0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2	?rev
Packit c4476c
	.long	0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e	?rev
Packit c4476c
Packit c4476c
##
Packit c4476c
##  Decryption stuff
Packit c4476c
##
Packit c4476c
Lk_dipt:	# decryption input transform
Packit c4476c
	.long	0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15	?rev
Packit c4476c
	.long	0x00650560, 0xe683e386, 0x94f191f4, 0x72177712	?rev
Packit c4476c
Lk_dsbo:	# decryption sbox final output
Packit c4476c
	.long	0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7	?rev
Packit c4476c
	.long	0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca	?rev
Packit c4476c
Lk_dsb9:	# decryption sbox output *9*u, *9*t
Packit c4476c
	.long	0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca	?rev
Packit c4476c
	.long	0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72	?rev
Packit c4476c
Lk_dsbd:	# decryption sbox output *D*u, *D*t
Packit c4476c
	.long	0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5	?rev
Packit c4476c
	.long	0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129	?rev
Packit c4476c
Lk_dsbb:	# decryption sbox output *B*u, *B*t
Packit c4476c
	.long	0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660	?rev
Packit c4476c
	.long	0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3	?rev
Packit c4476c
Lk_dsbe:	# decryption sbox output *E*u, *E*t
Packit c4476c
	.long	0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222	?rev
Packit c4476c
	.long	0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794	?rev
Packit c4476c
Packit c4476c
##
Packit c4476c
##  Key schedule constants
Packit c4476c
##
Packit c4476c
Lk_dksd:	# decryption key schedule: invskew x*D
Packit c4476c
	.long	0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007	?rev
Packit c4476c
	.long	0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f	?rev
Packit c4476c
Lk_dksb:	# decryption key schedule: invskew x*B
Packit c4476c
	.long	0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603	?rev
Packit c4476c
	.long	0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9	?rev
Packit c4476c
Lk_dkse:	# decryption key schedule: invskew x*E + 0x63
Packit c4476c
	.long	0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553	?rev
Packit c4476c
	.long	0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd	?rev
Packit c4476c
Lk_dks9:	# decryption key schedule: invskew x*9
Packit c4476c
	.long	0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a	?rev
Packit c4476c
	.long	0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b	?rev
Packit c4476c
Packit c4476c
Lk_rcon:	# rcon
Packit c4476c
	.long	0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70	?asis
Packit c4476c
Lk_s63:
Packit c4476c
	.long	0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b	?asis
Packit c4476c
Packit c4476c
Lk_opt:		# output transform
Packit c4476c
	.long	0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7	?rev
Packit c4476c
	.long	0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1	?rev
Packit c4476c
Lk_deskew:	# deskew tables: inverts the sbox's "skew"
Packit c4476c
	.long	0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d	?rev
Packit c4476c
	.long	0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128	?rev
Packit c4476c
.align	5
Packit c4476c
Lconsts:
Packit c4476c
	mflr	r0
Packit c4476c
	bcl	20,31,\$+4
Packit c4476c
	mflr	r12	#vvvvv "distance between . and _vpaes_consts
Packit c4476c
	addi	r12,r12,-0x308
Packit c4476c
	mtlr	r0
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
.asciz  "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
Packit c4476c
.align	6
Packit c4476c
___
Packit c4476c

Packit c4476c
my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
Packit c4476c
{
Packit c4476c
my ($inp,$out,$key) = map("r$_",(3..5));
Packit c4476c
Packit c4476c
my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
Packit c4476c
my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
Packit c4476c
my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
##
Packit c4476c
##  _aes_preheat
Packit c4476c
##
Packit c4476c
##  Fills register %r10 -> .aes_consts (so you can -fPIC)
Packit c4476c
##  and %xmm9-%xmm15 as specified below.
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_encrypt_preheat:
Packit c4476c
	mflr	r8
Packit c4476c
	bl	Lconsts
Packit c4476c
	mtlr	r8
Packit c4476c
	li	r11, 0xc0		# Lk_inv
Packit c4476c
	li	r10, 0xd0
Packit c4476c
	li	r9,  0xe0		# Lk_ipt
Packit c4476c
	li	r8,  0xf0
Packit c4476c
	vxor	v7, v7, v7		# 0x00..00
Packit c4476c
	vspltisb	v8,4		# 0x04..04
Packit c4476c
	vspltisb	v9,0x0f		# 0x0f..0f
Packit c4476c
	lvx	$invlo, r12, r11
Packit c4476c
	li	r11, 0x100
Packit c4476c
	lvx	$invhi, r12, r10
Packit c4476c
	li	r10, 0x110
Packit c4476c
	lvx	$iptlo, r12, r9
Packit c4476c
	li	r9,  0x120
Packit c4476c
	lvx	$ipthi, r12, r8
Packit c4476c
	li	r8,  0x130
Packit c4476c
	lvx	$sbou, r12, r11
Packit c4476c
	li	r11, 0x140
Packit c4476c
	lvx	$sbot, r12, r10
Packit c4476c
	li	r10, 0x150
Packit c4476c
	lvx	$sb1u, r12, r9
Packit c4476c
	lvx	$sb1t, r12, r8
Packit c4476c
	lvx	$sb2u, r12, r11
Packit c4476c
	lvx	$sb2t, r12, r10
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  _aes_encrypt_core
Packit c4476c
##
Packit c4476c
##  AES-encrypt %xmm0.
Packit c4476c
##
Packit c4476c
##  Inputs:
Packit c4476c
##     %xmm0 = input
Packit c4476c
##     %xmm9-%xmm15 as in _vpaes_preheat
Packit c4476c
##    (%rdx) = scheduled keys
Packit c4476c
##
Packit c4476c
##  Output in %xmm0
Packit c4476c
##  Clobbers  %xmm1-%xmm6, %r9, %r10, %r11, %rax
Packit c4476c
##
Packit c4476c
##
Packit c4476c
.align 5
Packit c4476c
_vpaes_encrypt_core:
Packit c4476c
	lwz	r8, 240($key)		# pull rounds
Packit c4476c
	li	r9, 16
Packit c4476c
	lvx	v5, 0, $key		# vmovdqu	(%r9),	%xmm5		# round0 key
Packit c4476c
	li	r11, 0x10
Packit c4476c
	lvx	v6, r9, $key
Packit c4476c
	addi	r9, r9, 16
Packit c4476c
	?vperm	v5, v5, v6, $keyperm	# align round key
Packit c4476c
	addi	r10, r11, 0x40
Packit c4476c
	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
Packit c4476c
	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm1
Packit c4476c
	vperm	v1, $ipthi, $ipthi, v1	# vpshufb	%xmm0,	%xmm3,	%xmm2
Packit c4476c
	vxor	v0, v0, v5		# vpxor	%xmm5,	%xmm1,	%xmm0
Packit c4476c
	vxor	v0, v0, v1		# vpxor	%xmm2,	%xmm0,	%xmm0
Packit c4476c
	mtctr	r8
Packit c4476c
	b	Lenc_entry
Packit c4476c
Packit c4476c
.align 4
Packit c4476c
Lenc_loop:
Packit c4476c
	# middle of middle round
Packit c4476c
	vperm	v4, $sb1t, v7, v2	# vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
Packit c4476c
	lvx	v1, r12, r11		# vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
Packit c4476c
	addi	r11, r11, 16
Packit c4476c
	vperm	v0, $sb1u, v7, v3	# vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
Packit c4476c
	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
Packit c4476c
	andi.	r11, r11, 0x30		# and		\$0x30, %r11	# ... mod 4
Packit c4476c
	vperm	v5, $sb2t, v7, v2	# vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
Packit c4476c
	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
Packit c4476c
	vperm	v2, $sb2u, v7, v3	# vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
Packit c4476c
	lvx	v4, r12, r10		# vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
Packit c4476c
	addi	r10, r11, 0x40
Packit c4476c
	vperm	v3, v0, v7, v1		# vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
Packit c4476c
	vxor	v2, v2, v5		# vpxor		%xmm5,	%xmm2,	%xmm2	# 2 = 2A
Packit c4476c
	vperm	v0, v0, v7, v4		# vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
Packit c4476c
	vperm	v4, v3, v7, v1		# vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
Packit c4476c
	vxor	v0, v0, v3		# vpxor		%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
Packit c4476c
	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
Packit c4476c
Packit c4476c
Lenc_entry:
Packit c4476c
	# top of round
Packit c4476c
	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
Packit c4476c
	vperm	v5, $invhi, $invhi, v0	# vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
Packit c4476c
	vxor	v0, v0, v1		# vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
Packit c4476c
	vperm	v3, $invlo, $invlo, v1	# vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
Packit c4476c
	vperm	v4, $invlo, $invlo, v0	# vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
Packit c4476c
	vand	v0, v0, v9
Packit c4476c
	vxor	v3, v3, v5		# vpxor		%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
Packit c4476c
	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
Packit c4476c
	vperm	v2, $invlo, v7, v3	# vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
Packit c4476c
	vmr	v5, v6
Packit c4476c
	lvx	v6, r9, $key		# vmovdqu	(%r9), %xmm5
Packit c4476c
	vperm	v3, $invlo, v7, v4	# vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
Packit c4476c
	addi	r9, r9, 16
Packit c4476c
	vxor	v2, v2, v0		# vpxor		%xmm1,	%xmm2,	%xmm2  	# 2 = io
Packit c4476c
	?vperm	v5, v5, v6, $keyperm	# align round key
Packit c4476c
	vxor	v3, v3, v1		# vpxor		%xmm0,	%xmm3,	%xmm3	# 3 = jo
Packit c4476c
	bdnz	Lenc_loop
Packit c4476c
Packit c4476c
	# middle of last round
Packit c4476c
	addi	r10, r11, 0x80
Packit c4476c
					# vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
Packit c4476c
					# vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
Packit c4476c
	vperm	v4, $sbou, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
Packit c4476c
	lvx	v1, r12, r10		# vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
Packit c4476c
	vperm	v0, $sbot, v7, v3	# vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
Packit c4476c
	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
Packit c4476c
	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
Packit c4476c
	vperm	v0, v0, v7, v1		# vpshufb	%xmm1,	%xmm0,	%xmm0
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
.globl	.vpaes_encrypt
Packit c4476c
.align	5
Packit c4476c
.vpaes_encrypt:
Packit c4476c
	$STU	$sp,-$FRAME($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mflr	r6
Packit c4476c
	mfspr	r7, 256			# save vrsave
Packit c4476c
	stvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v30,r10,$sp
Packit c4476c
	stvx	v31,r11,$sp
Packit c4476c
	stw	r7,`$FRAME-4`($sp)	# save vrsave
Packit c4476c
	li	r0, -1
Packit c4476c
	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
Packit c4476c
	mtspr	256, r0			# preserve all AltiVec registers
Packit c4476c
Packit c4476c
	bl	_vpaes_encrypt_preheat
Packit c4476c
Packit c4476c
	?lvsl	$inpperm, 0, $inp	# prepare for unaligned access
Packit c4476c
	lvx	v0, 0, $inp
Packit c4476c
	addi	$inp, $inp, 15		# 15 is not a typo
Packit c4476c
	 ?lvsr	$outperm, 0, $out
Packit c4476c
	?lvsl	$keyperm, 0, $key	# prepare for unaligned access
Packit c4476c
	lvx	$inptail, 0, $inp	# redundant in aligned case
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
Packit c4476c
	bl	_vpaes_encrypt_core
Packit c4476c
Packit c4476c
	andi.	r8, $out, 15
Packit c4476c
	li	r9, 16
Packit c4476c
	beq	Lenc_out_aligned
Packit c4476c
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	mtctr	r9
Packit c4476c
Lenc_out_unaligned:
Packit c4476c
	stvebx	v0, 0, $out
Packit c4476c
	addi	$out, $out, 1
Packit c4476c
	bdnz	Lenc_out_unaligned
Packit c4476c
	b	Lenc_done
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
Lenc_out_aligned:
Packit c4476c
	stvx	v0, 0, $out
Packit c4476c
Lenc_done:
Packit c4476c
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mtlr	r6
Packit c4476c
	mtspr	256, r7			# restore vrsave
Packit c4476c
	lvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v30,r10,$sp
Packit c4476c
	lvx	v31,r11,$sp
Packit c4476c
	addi	$sp,$sp,$FRAME
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x04,1,0x80,0,3,0
Packit c4476c
	.long	0
Packit c4476c
.size	.vpaes_encrypt,.-.vpaes_encrypt
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
_vpaes_decrypt_preheat:
Packit c4476c
	mflr	r8
Packit c4476c
	bl	Lconsts
Packit c4476c
	mtlr	r8
Packit c4476c
	li	r11, 0xc0		# Lk_inv
Packit c4476c
	li	r10, 0xd0
Packit c4476c
	li	r9,  0x160		# Ldipt
Packit c4476c
	li	r8,  0x170
Packit c4476c
	vxor	v7, v7, v7		# 0x00..00
Packit c4476c
	vspltisb	v8,4		# 0x04..04
Packit c4476c
	vspltisb	v9,0x0f		# 0x0f..0f
Packit c4476c
	lvx	$invlo, r12, r11
Packit c4476c
	li	r11, 0x180
Packit c4476c
	lvx	$invhi, r12, r10
Packit c4476c
	li	r10, 0x190
Packit c4476c
	lvx	$iptlo, r12, r9
Packit c4476c
	li	r9,  0x1a0
Packit c4476c
	lvx	$ipthi, r12, r8
Packit c4476c
	li	r8,  0x1b0
Packit c4476c
	lvx	$sbou, r12, r11
Packit c4476c
	li	r11, 0x1c0
Packit c4476c
	lvx	$sbot, r12, r10
Packit c4476c
	li	r10, 0x1d0
Packit c4476c
	lvx	$sb9u, r12, r9
Packit c4476c
	li	r9,  0x1e0
Packit c4476c
	lvx	$sb9t, r12, r8
Packit c4476c
	li	r8,  0x1f0
Packit c4476c
	lvx	$sbdu, r12, r11
Packit c4476c
	li	r11, 0x200
Packit c4476c
	lvx	$sbdt, r12, r10
Packit c4476c
	li	r10, 0x210
Packit c4476c
	lvx	$sbbu, r12, r9
Packit c4476c
	lvx	$sbbt, r12, r8
Packit c4476c
	lvx	$sbeu, r12, r11
Packit c4476c
	lvx	$sbet, r12, r10
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  Decryption core
Packit c4476c
##
Packit c4476c
##  Same API as encryption core.
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_decrypt_core:
Packit c4476c
	lwz	r8, 240($key)		# pull rounds
Packit c4476c
	li	r9, 16
Packit c4476c
	lvx	v5, 0, $key		# vmovdqu	(%r9),	%xmm4		# round0 key
Packit c4476c
	li	r11, 0x30
Packit c4476c
	lvx	v6, r9, $key
Packit c4476c
	addi	r9, r9, 16
Packit c4476c
	?vperm	v5, v5, v6, $keyperm	# align round key
Packit c4476c
	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
Packit c4476c
	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm2
Packit c4476c
	vperm	v1, $ipthi, $ipthi, v1	# vpshufb	%xmm0,	%xmm1,	%xmm0
Packit c4476c
	vxor	v0, v0, v5		# vpxor	%xmm4,	%xmm2,	%xmm2
Packit c4476c
	vxor	v0, v0, v1		# vpxor	%xmm2,	%xmm0,	%xmm0
Packit c4476c
	mtctr	r8
Packit c4476c
	b	Ldec_entry
Packit c4476c
Packit c4476c
.align 4
Packit c4476c
Ldec_loop:
Packit c4476c
#
Packit c4476c
#  Inverse mix columns
Packit c4476c
#
Packit c4476c
	lvx	v0, r12, r11		# v5 and v0 are flipped
Packit c4476c
					# vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
Packit c4476c
					# vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
Packit c4476c
	vperm	v4, $sb9u, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
Packit c4476c
	subi	r11, r11, 16
Packit c4476c
	vperm	v1, $sb9t, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
Packit c4476c
	andi.	r11, r11, 0x30
Packit c4476c
	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0
Packit c4476c
					# vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
Packit c4476c
	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
Packit c4476c
					# vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt
Packit c4476c
Packit c4476c
	vperm	v4, $sbdu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
Packit c4476c
	vperm 	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
Packit c4476c
	vperm	v1, $sbdt, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
Packit c4476c
	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
Packit c4476c
					# vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
Packit c4476c
	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
Packit c4476c
					# vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt
Packit c4476c
Packit c4476c
	vperm	v4, $sbbu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
Packit c4476c
	vperm	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
Packit c4476c
	vperm	v1, $sbbt, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
Packit c4476c
	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
Packit c4476c
					# vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
Packit c4476c
	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
Packit c4476c
					# vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet
Packit c4476c
Packit c4476c
	vperm	v4, $sbeu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
Packit c4476c
	vperm	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
Packit c4476c
	vperm	v1, $sbet, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
Packit c4476c
	vxor	v0, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
Packit c4476c
	vxor	v0, v0, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
Packit c4476c
Packit c4476c
Ldec_entry:
Packit c4476c
	# top of round
Packit c4476c
	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
Packit c4476c
	vperm	v2, $invhi, $invhi, v0	# vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
Packit c4476c
	vxor	v0, v0, v1		# vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
Packit c4476c
	vperm	v3, $invlo, $invlo, v1	# vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
Packit c4476c
	vperm	v4, $invlo, $invlo, v0	# vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
Packit c4476c
	vand	v0, v0, v9
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
Packit c4476c
	vxor	v4, v4, v2		# vpxor		%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
Packit c4476c
	vperm	v2, $invlo, v7, v3	# vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
Packit c4476c
	vmr	v5, v6
Packit c4476c
	lvx	v6, r9, $key		# vmovdqu	(%r9),	%xmm0
Packit c4476c
	vperm	v3, $invlo, v7, v4	# vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
Packit c4476c
	addi	r9, r9, 16
Packit c4476c
	vxor	v2, v2, v0		# vpxor		%xmm1,	%xmm2,	%xmm2	# 2 = io
Packit c4476c
	?vperm	v5, v5, v6, $keyperm	# align round key
Packit c4476c
	vxor	v3, v3, v1		# vpxor		%xmm0,  %xmm3,	%xmm3	# 3 = jo
Packit c4476c
	bdnz	Ldec_loop
Packit c4476c
Packit c4476c
	# middle of last round
Packit c4476c
	addi	r10, r11, 0x80
Packit c4476c
					# vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
Packit c4476c
	vperm	v4, $sbou, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
Packit c4476c
					# vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
Packit c4476c
	lvx	v2, r12, r10		# vmovdqa	-0x160(%r11),	%xmm2	# .Lk_sr-.Lk_dsbd=-0x160
Packit c4476c
	vperm	v1, $sbot, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
Packit c4476c
	vxor	v4, v4, v5		# vpxor		%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
Packit c4476c
	vxor	v0, v1, v4		# vpxor		%xmm4,	%xmm1,	%xmm0	# 0 = A
Packit c4476c
	vperm	v0, v0, v7, v2		# vpshufb	%xmm2,	%xmm0,	%xmm0
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
.globl	.vpaes_decrypt
Packit c4476c
.align	5
Packit c4476c
.vpaes_decrypt:
Packit c4476c
	$STU	$sp,-$FRAME($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mflr	r6
Packit c4476c
	mfspr	r7, 256			# save vrsave
Packit c4476c
	stvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v30,r10,$sp
Packit c4476c
	stvx	v31,r11,$sp
Packit c4476c
	stw	r7,`$FRAME-4`($sp)	# save vrsave
Packit c4476c
	li	r0, -1
Packit c4476c
	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
Packit c4476c
	mtspr	256, r0			# preserve all AltiVec registers
Packit c4476c
Packit c4476c
	bl	_vpaes_decrypt_preheat
Packit c4476c
Packit c4476c
	?lvsl	$inpperm, 0, $inp	# prepare for unaligned access
Packit c4476c
	lvx	v0, 0, $inp
Packit c4476c
	addi	$inp, $inp, 15		# 15 is not a typo
Packit c4476c
	 ?lvsr	$outperm, 0, $out
Packit c4476c
	?lvsl	$keyperm, 0, $key
Packit c4476c
	lvx	$inptail, 0, $inp	# redundant in aligned case
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
Packit c4476c
	bl	_vpaes_decrypt_core
Packit c4476c
Packit c4476c
	andi.	r8, $out, 15
Packit c4476c
	li	r9, 16
Packit c4476c
	beq	Ldec_out_aligned
Packit c4476c
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	mtctr	r9
Packit c4476c
Ldec_out_unaligned:
Packit c4476c
	stvebx	v0, 0, $out
Packit c4476c
	addi	$out, $out, 1
Packit c4476c
	bdnz	Ldec_out_unaligned
Packit c4476c
	b	Ldec_done
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
Ldec_out_aligned:
Packit c4476c
	stvx	v0, 0, $out
Packit c4476c
Ldec_done:
Packit c4476c
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mtlr	r6
Packit c4476c
	mtspr	256, r7			# restore vrsave
Packit c4476c
	lvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v30,r10,$sp
Packit c4476c
	lvx	v31,r11,$sp
Packit c4476c
	addi	$sp,$sp,$FRAME
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x04,1,0x80,0,3,0
Packit c4476c
	.long	0
Packit c4476c
.size	.vpaes_decrypt,.-.vpaes_decrypt
Packit c4476c
Packit c4476c
.globl	.vpaes_cbc_encrypt
Packit c4476c
.align	5
Packit c4476c
.vpaes_cbc_encrypt:
Packit c4476c
	${UCMP}i r5,16
Packit c4476c
	bltlr-
Packit c4476c
Packit c4476c
	$STU	$sp,-`($FRAME+2*$SIZE_T)`($sp)
Packit c4476c
	mflr	r0
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mfspr	r12, 256
Packit c4476c
	stvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v30,r10,$sp
Packit c4476c
	stvx	v31,r11,$sp
Packit c4476c
	stw	r12,`$FRAME-4`($sp)	# save vrsave
Packit c4476c
	$PUSH	r30,`$FRAME+$SIZE_T*0`($sp)
Packit c4476c
	$PUSH	r31,`$FRAME+$SIZE_T*1`($sp)
Packit c4476c
	li	r9, -16
Packit c4476c
	$PUSH	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
Packit c4476c
Packit c4476c
	and	r30, r5, r9		# copy length&-16
Packit c4476c
	andi.	r9, $out, 15		# is $out aligned?
Packit c4476c
	mr	r5, r6			# copy pointer to key
Packit c4476c
	mr	r31, r7			# copy pointer to iv
Packit c4476c
	li	r6, -1
Packit c4476c
	mcrf	cr1, cr0		# put aside $out alignment flag
Packit c4476c
	mr	r7, r12			# copy vrsave
Packit c4476c
	mtspr	256, r6			# preserve all AltiVec registers
Packit c4476c
Packit c4476c
	lvx	v24, 0, r31		# load [potentially unaligned] iv
Packit c4476c
	li	r9, 15
Packit c4476c
	?lvsl	$inpperm, 0, r31
Packit c4476c
	lvx	v25, r9, r31
Packit c4476c
	?vperm	v24, v24, v25, $inpperm
Packit c4476c
Packit c4476c
	cmpwi	r8, 0			# test direction
Packit c4476c
	neg	r8, $inp		# prepare for unaligned access
Packit c4476c
	 vxor	v7, v7, v7
Packit c4476c
	?lvsl	$keyperm, 0, $key
Packit c4476c
	 ?lvsr	$outperm, 0, $out
Packit c4476c
	?lvsr	$inpperm, 0, r8		# -$inp
Packit c4476c
	 vnor	$outmask, v7, v7	# 0xff..ff
Packit c4476c
	lvx	$inptail, 0, $inp
Packit c4476c
	 ?vperm	$outmask, v7, $outmask, $outperm
Packit c4476c
	addi	$inp, $inp, 15		# 15 is not a typo
Packit c4476c
Packit c4476c
	beq	Lcbc_decrypt
Packit c4476c
Packit c4476c
	bl	_vpaes_encrypt_preheat
Packit c4476c
	li	r0, 16
Packit c4476c
Packit c4476c
	beq	cr1, Lcbc_enc_loop	# $out is aligned
Packit c4476c
Packit c4476c
	vmr	v0, $inptail
Packit c4476c
	lvx	$inptail, 0, $inp
Packit c4476c
	addi	$inp, $inp, 16
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
	vxor	v0, v0, v24		# ^= iv
Packit c4476c
Packit c4476c
	bl	_vpaes_encrypt_core
Packit c4476c
Packit c4476c
	andi.	r8, $out, 15
Packit c4476c
	vmr	v24, v0			# put aside iv
Packit c4476c
	sub	r9, $out, r8
Packit c4476c
	vperm	$outhead, v0, v0, $outperm	# rotate right/left
Packit c4476c
Packit c4476c
Lcbc_enc_head:
Packit c4476c
	stvebx	$outhead, r8, r9
Packit c4476c
	cmpwi	r8, 15
Packit c4476c
	addi	r8, r8, 1
Packit c4476c
	bne	Lcbc_enc_head
Packit c4476c
Packit c4476c
	sub.	r30, r30, r0		# len -= 16
Packit c4476c
	addi	$out, $out, 16
Packit c4476c
	beq	Lcbc_unaligned_done
Packit c4476c
Packit c4476c
Lcbc_enc_loop:
Packit c4476c
	vmr	v0, $inptail
Packit c4476c
	lvx	$inptail, 0, $inp
Packit c4476c
	addi	$inp, $inp, 16
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
	vxor	v0, v0, v24		# ^= iv
Packit c4476c
Packit c4476c
	bl	_vpaes_encrypt_core
Packit c4476c
Packit c4476c
	vmr	v24, v0			# put aside iv
Packit c4476c
	sub.	r30, r30, r0		# len -= 16
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	vsel	v1, $outhead, v0, $outmask
Packit c4476c
	vmr	$outhead, v0
Packit c4476c
	stvx	v1, 0, $out
Packit c4476c
	addi	$out, $out, 16
Packit c4476c
	bne	Lcbc_enc_loop
Packit c4476c
Packit c4476c
	b	Lcbc_done
Packit c4476c
Packit c4476c
.align	5
Packit c4476c
Lcbc_decrypt:
Packit c4476c
	bl	_vpaes_decrypt_preheat
Packit c4476c
	li	r0, 16
Packit c4476c
Packit c4476c
	beq	cr1, Lcbc_dec_loop	# $out is aligned
Packit c4476c
Packit c4476c
	vmr	v0, $inptail
Packit c4476c
	lvx	$inptail, 0, $inp
Packit c4476c
	addi	$inp, $inp, 16
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
	vmr	v25, v0			# put aside input
Packit c4476c
Packit c4476c
	bl	_vpaes_decrypt_core
Packit c4476c
Packit c4476c
	andi.	r8, $out, 15
Packit c4476c
	vxor	v0, v0, v24		# ^= iv
Packit c4476c
	vmr	v24, v25
Packit c4476c
	sub	r9, $out, r8
Packit c4476c
	vperm	$outhead, v0, v0, $outperm	# rotate right/left
Packit c4476c
Packit c4476c
Lcbc_dec_head:
Packit c4476c
	stvebx	$outhead, r8, r9
Packit c4476c
	cmpwi	r8, 15
Packit c4476c
	addi	r8, r8, 1
Packit c4476c
	bne	Lcbc_dec_head
Packit c4476c
Packit c4476c
	sub.	r30, r30, r0		# len -= 16
Packit c4476c
	addi	$out, $out, 16
Packit c4476c
	beq	Lcbc_unaligned_done
Packit c4476c
Packit c4476c
Lcbc_dec_loop:
Packit c4476c
	vmr	v0, $inptail
Packit c4476c
	lvx	$inptail, 0, $inp
Packit c4476c
	addi	$inp, $inp, 16
Packit c4476c
	?vperm	v0, v0, $inptail, $inpperm
Packit c4476c
	vmr	v25, v0			# put aside input
Packit c4476c
Packit c4476c
	bl	_vpaes_decrypt_core
Packit c4476c
Packit c4476c
	vxor	v0, v0, v24		# ^= iv
Packit c4476c
	vmr	v24, v25
Packit c4476c
	sub.	r30, r30, r0		# len -= 16
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	vsel	v1, $outhead, v0, $outmask
Packit c4476c
	vmr	$outhead, v0
Packit c4476c
	stvx	v1, 0, $out
Packit c4476c
	addi	$out, $out, 16
Packit c4476c
	bne	Lcbc_dec_loop
Packit c4476c
Packit c4476c
Lcbc_done:
Packit c4476c
	beq	cr1, Lcbc_write_iv	# $out is aligned
Packit c4476c
Packit c4476c
Lcbc_unaligned_done:
Packit c4476c
	andi.	r8, $out, 15
Packit c4476c
	sub	$out, $out, r8
Packit c4476c
	li	r9, 0
Packit c4476c
Lcbc_tail:
Packit c4476c
	stvebx	$outhead, r9, $out
Packit c4476c
	addi	r9, r9, 1
Packit c4476c
	cmpw	r9, r8
Packit c4476c
	bne	Lcbc_tail
Packit c4476c
Packit c4476c
Lcbc_write_iv:
Packit c4476c
	neg	r8, r31			# write [potentially unaligned] iv
Packit c4476c
	li	r10, 4
Packit c4476c
	?lvsl	$outperm, 0, r8
Packit c4476c
	li	r11, 8
Packit c4476c
	li	r12, 12
Packit c4476c
	vperm	v24, v24, v24, $outperm	# rotate right/left
Packit c4476c
	stvewx	v24, 0, r31		# ivp is at least 32-bit aligned
Packit c4476c
	stvewx	v24, r10, r31
Packit c4476c
	stvewx	v24, r11, r31
Packit c4476c
	stvewx	v24, r12, r31
Packit c4476c
Packit c4476c
	mtspr	256, r7			# restore vrsave
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	lvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v30,r10,$sp
Packit c4476c
	lvx	v31,r11,$sp
Packit c4476c
Lcbc_abort:
Packit c4476c
	$POP	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
Packit c4476c
	$POP	r30,`$FRAME+$SIZE_T*0`($sp)
Packit c4476c
	$POP	r31,`$FRAME+$SIZE_T*1`($sp)
Packit c4476c
	mtlr	r0
Packit c4476c
	addi	$sp,$sp,`$FRAME+$SIZE_T*2`
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x04,1,0x80,2,6,0
Packit c4476c
	.long	0
Packit c4476c
.size	.vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
Packit c4476c
___
Packit c4476c
}
Packit c4476c
{
Packit c4476c
my ($inp,$bits,$out)=map("r$_",(3..5));
Packit c4476c
my $dir="cr1";
Packit c4476c
my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
########################################################
Packit c4476c
##                                                    ##
Packit c4476c
##                  AES key schedule                  ##
Packit c4476c
##                                                    ##
Packit c4476c
########################################################
Packit c4476c
.align	4
Packit c4476c
_vpaes_key_preheat:
Packit c4476c
	mflr	r8
Packit c4476c
	bl	Lconsts
Packit c4476c
	mtlr	r8
Packit c4476c
	li	r11, 0xc0		# Lk_inv
Packit c4476c
	li	r10, 0xd0
Packit c4476c
	li	r9,  0xe0		# L_ipt
Packit c4476c
	li	r8,  0xf0
Packit c4476c
Packit c4476c
	vspltisb	v8,4		# 0x04..04
Packit c4476c
	vxor	v9,v9,v9		# 0x00..00
Packit c4476c
	lvx	$invlo, r12, r11	# Lk_inv
Packit c4476c
	li	r11, 0x120
Packit c4476c
	lvx	$invhi, r12, r10
Packit c4476c
	li	r10, 0x130
Packit c4476c
	lvx	$iptlo, r12, r9		# Lk_ipt
Packit c4476c
	li	r9, 0x220
Packit c4476c
	lvx	$ipthi, r12, r8
Packit c4476c
	li	r8, 0x230
Packit c4476c
Packit c4476c
	lvx	v14, r12, r11		# Lk_sb1
Packit c4476c
	li	r11, 0x240
Packit c4476c
	lvx	v15, r12, r10
Packit c4476c
	li	r10, 0x250
Packit c4476c
Packit c4476c
	lvx	v16, r12, r9		# Lk_dksd
Packit c4476c
	li	r9, 0x260
Packit c4476c
	lvx	v17, r12, r8
Packit c4476c
	li	r8, 0x270
Packit c4476c
	lvx	v18, r12, r11		# Lk_dksb
Packit c4476c
	li	r11, 0x280
Packit c4476c
	lvx	v19, r12, r10
Packit c4476c
	li	r10, 0x290
Packit c4476c
	lvx	v20, r12, r9		# Lk_dkse
Packit c4476c
	li	r9, 0x2a0
Packit c4476c
	lvx	v21, r12, r8
Packit c4476c
	li	r8, 0x2b0
Packit c4476c
	lvx	v22, r12, r11		# Lk_dks9
Packit c4476c
	lvx	v23, r12, r10
Packit c4476c
Packit c4476c
	lvx	v24, r12, r9		# Lk_rcon
Packit c4476c
	lvx	v25, 0, r12		# Lk_mc_forward[0]
Packit c4476c
	lvx	v26, r12, r8		# Lks63
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
_vpaes_schedule_core:
Packit c4476c
	mflr	r7
Packit c4476c
Packit c4476c
	bl	_vpaes_key_preheat	# load the tables
Packit c4476c
Packit c4476c
	#lvx	v0, 0, $inp		# vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
Packit c4476c
	neg	r8, $inp		# prepare for unaligned access
Packit c4476c
	lvx	v0, 0, $inp
Packit c4476c
	addi	$inp, $inp, 15		# 15 is not typo
Packit c4476c
	?lvsr	$inpperm, 0, r8		# -$inp
Packit c4476c
	lvx	v6, 0, $inp		# v6 serves as inptail
Packit c4476c
	addi	$inp, $inp, 8
Packit c4476c
	?vperm	v0, v0, v6, $inpperm
Packit c4476c
Packit c4476c
	# input transform
Packit c4476c
	vmr	v3, v0			# vmovdqa	%xmm0,	%xmm3
Packit c4476c
	bl	_vpaes_schedule_transform
Packit c4476c
	vmr	v7, v0			# vmovdqa	%xmm0,	%xmm7
Packit c4476c
Packit c4476c
	bne	$dir, Lschedule_am_decrypting
Packit c4476c
Packit c4476c
	# encrypting, output zeroth round key after transform
Packit c4476c
	li	r8, 0x30		# mov	\$0x30,%r8d
Packit c4476c
	li	r9, 4
Packit c4476c
	li	r10, 8
Packit c4476c
	li	r11, 12
Packit c4476c
Packit c4476c
	?lvsr	$outperm, 0, $out	# prepare for unaligned access
Packit c4476c
	vnor	$outmask, v9, v9	# 0xff..ff
Packit c4476c
	?vperm	$outmask, v9, $outmask, $outperm
Packit c4476c
Packit c4476c
	#stvx	v0, 0, $out		# vmovdqu	%xmm0,	(%rdx)
Packit c4476c
	vperm	$outhead, v0, v0, $outperm	# rotate right/left
Packit c4476c
	stvewx	$outhead, 0, $out	# some are superfluous
Packit c4476c
	stvewx	$outhead, r9, $out
Packit c4476c
	stvewx	$outhead, r10, $out
Packit c4476c
	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
Packit c4476c
	stvewx	$outhead, r11, $out
Packit c4476c
	b	Lschedule_go
Packit c4476c
Packit c4476c
Lschedule_am_decrypting:
Packit c4476c
	srwi	r8, $bits, 1		# shr	\$1,%r8d
Packit c4476c
	andi.	r8, r8, 32		# and	\$32,%r8d
Packit c4476c
	xori	r8, r8, 32		# xor	\$32,%r8d	# nbits==192?0:32
Packit c4476c
	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
Packit c4476c
	# decrypting, output zeroth round key after shiftrows
Packit c4476c
	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
Packit c4476c
	li	r9, 4
Packit c4476c
	li	r10, 8
Packit c4476c
	li	r11, 12
Packit c4476c
	vperm	v4, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
Packit c4476c
	neg	r0, $out		# prepare for unaligned access
Packit c4476c
	?lvsl	$outperm, 0, r0
Packit c4476c
	vnor	$outmask, v9, v9	# 0xff..ff
Packit c4476c
	?vperm	$outmask, $outmask, v9, $outperm
Packit c4476c
Packit c4476c
	#stvx	v4, 0, $out		# vmovdqu	%xmm3,	(%rdx)
Packit c4476c
	vperm	$outhead, v4, v4, $outperm	# rotate right/left
Packit c4476c
	stvewx	$outhead, 0, $out	# some are superfluous
Packit c4476c
	stvewx	$outhead, r9, $out
Packit c4476c
	stvewx	$outhead, r10, $out
Packit c4476c
	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
Packit c4476c
	stvewx	$outhead, r11, $out
Packit c4476c
	addi	$out, $out, 15		# 15 is not typo
Packit c4476c
	xori	r8, r8, 0x30		# xor	\$0x30, %r8
Packit c4476c
Packit c4476c
Lschedule_go:
Packit c4476c
	cmplwi	$bits, 192		# cmp	\$192,	%esi
Packit c4476c
	bgt	Lschedule_256
Packit c4476c
	beq	Lschedule_192
Packit c4476c
	# 128: fall though
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .schedule_128
Packit c4476c
##
Packit c4476c
##  128-bit specific part of key schedule.
Packit c4476c
##
Packit c4476c
##  This schedule is really simple, because all its parts
Packit c4476c
##  are accomplished by the subroutines.
Packit c4476c
##
Packit c4476c
Lschedule_128:
Packit c4476c
	li	r0, 10			# mov	\$10, %esi
Packit c4476c
	mtctr	r0
Packit c4476c
Packit c4476c
Loop_schedule_128:
Packit c4476c
	bl 	_vpaes_schedule_round
Packit c4476c
	bdz 	Lschedule_mangle_last	# dec	%esi
Packit c4476c
	bl	_vpaes_schedule_mangle	# write output
Packit c4476c
	b 	Loop_schedule_128
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_192
Packit c4476c
##
Packit c4476c
##  192-bit specific part of key schedule.
Packit c4476c
##
Packit c4476c
##  The main body of this schedule is the same as the 128-bit
Packit c4476c
##  schedule, but with more smearing.  The long, high side is
Packit c4476c
##  stored in %xmm7 as before, and the short, low side is in
Packit c4476c
##  the high bits of %xmm6.
Packit c4476c
##
Packit c4476c
##  This schedule is somewhat nastier, however, because each
Packit c4476c
##  round produces 192 bits of key material, or 1.5 round keys.
Packit c4476c
##  Therefore, on each cycle we do 2 rounds and produce 3 round
Packit c4476c
##  keys.
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
Lschedule_192:
Packit c4476c
	li	r0, 4			# mov	\$4,	%esi
Packit c4476c
	lvx	v0, 0, $inp
Packit c4476c
	?vperm	v0, v6, v0, $inpperm
Packit c4476c
	?vsldoi	v0, v3, v0, 8		# vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
Packit c4476c
	bl	_vpaes_schedule_transform	# input transform
Packit c4476c
	?vsldoi	v6, v0, v9, 8
Packit c4476c
	?vsldoi	v6, v9, v6, 8		# clobber "low" side with zeros
Packit c4476c
	mtctr	r0
Packit c4476c
Packit c4476c
Loop_schedule_192:
Packit c4476c
	bl	_vpaes_schedule_round
Packit c4476c
	?vsldoi	v0, v6, v0, 8		# vpalignr	\$8,%xmm6,%xmm0,%xmm0
Packit c4476c
	bl	_vpaes_schedule_mangle	# save key n
Packit c4476c
	bl	_vpaes_schedule_192_smear
Packit c4476c
	bl	_vpaes_schedule_mangle	# save key n+1
Packit c4476c
	bl	_vpaes_schedule_round
Packit c4476c
	bdz 	Lschedule_mangle_last	# dec	%esi
Packit c4476c
	bl	_vpaes_schedule_mangle	# save key n+2
Packit c4476c
	bl	_vpaes_schedule_192_smear
Packit c4476c
	b	Loop_schedule_192
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_256
Packit c4476c
##
Packit c4476c
##  256-bit specific part of key schedule.
Packit c4476c
##
Packit c4476c
##  The structure here is very similar to the 128-bit
Packit c4476c
##  schedule, but with an additional "low side" in
Packit c4476c
##  %xmm6.  The low side's rounds are the same as the
Packit c4476c
##  high side's, except no rcon and no rotation.
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
Lschedule_256:
Packit c4476c
	li	r0, 7			# mov	\$7, %esi
Packit c4476c
	addi	$inp, $inp, 8
Packit c4476c
	lvx	v0, 0, $inp		# vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
Packit c4476c
	?vperm	v0, v6, v0, $inpperm
Packit c4476c
	bl	_vpaes_schedule_transform	# input transform
Packit c4476c
	mtctr	r0
Packit c4476c
Packit c4476c
Loop_schedule_256:
Packit c4476c
	bl	_vpaes_schedule_mangle	# output low result
Packit c4476c
	vmr	v6, v0			# vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
Packit c4476c
Packit c4476c
	# high round
Packit c4476c
	bl	_vpaes_schedule_round
Packit c4476c
	bdz 	Lschedule_mangle_last	# dec	%esi
Packit c4476c
	bl	_vpaes_schedule_mangle
Packit c4476c
Packit c4476c
	# low round. swap xmm7 and xmm6
Packit c4476c
	?vspltw	v0, v0, 3		# vpshufd	\$0xFF,	%xmm0,	%xmm0
Packit c4476c
	vmr	v5, v7			# vmovdqa	%xmm7,	%xmm5
Packit c4476c
	vmr	v7, v6			# vmovdqa	%xmm6,	%xmm7
Packit c4476c
	bl	_vpaes_schedule_low_round
Packit c4476c
	vmr	v7, v5			# vmovdqa	%xmm5,	%xmm7
Packit c4476c
Packit c4476c
	b	Loop_schedule_256
Packit c4476c
##
Packit c4476c
##  .aes_schedule_mangle_last
Packit c4476c
##
Packit c4476c
##  Mangler for last round of key schedule
Packit c4476c
##  Mangles %xmm0
Packit c4476c
##    when encrypting, outputs out(%xmm0) ^ 63
Packit c4476c
##    when decrypting, outputs unskew(%xmm0)
Packit c4476c
##
Packit c4476c
##  Always called right before return... jumps to cleanup and exits
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
Lschedule_mangle_last:
Packit c4476c
	# schedule last round key from xmm0
Packit c4476c
	li	r11, 0x2e0		# lea	.Lk_deskew(%rip),%r11
Packit c4476c
	li	r9,  0x2f0
Packit c4476c
	bne	$dir, Lschedule_mangle_last_dec
Packit c4476c
Packit c4476c
	# encrypting
Packit c4476c
	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),%xmm1
Packit c4476c
	li	r11, 0x2c0		# lea		.Lk_opt(%rip),	%r11	# prepare to output transform
Packit c4476c
	li	r9,  0x2d0		# prepare to output transform
Packit c4476c
	vperm	v0, v0, v0, v1		# vpshufb	%xmm1,	%xmm0,	%xmm0	# output permute
Packit c4476c
Packit c4476c
	lvx	$iptlo, r11, r12	# reload $ipt
Packit c4476c
	lvx	$ipthi, r9, r12
Packit c4476c
	addi	$out, $out, 16		# add	\$16,	%rdx
Packit c4476c
	vxor	v0, v0, v26		# vpxor		.Lk_s63(%rip),	%xmm0,	%xmm0
Packit c4476c
	bl	_vpaes_schedule_transform	# output transform
Packit c4476c
Packit c4476c
	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	li	r10, 4
Packit c4476c
	vsel	v2, $outhead, v0, $outmask
Packit c4476c
	li	r11, 8
Packit c4476c
	stvx	v2, 0, $out
Packit c4476c
	li	r12, 12
Packit c4476c
	stvewx	v0, 0, $out		# some (or all) are redundant
Packit c4476c
	stvewx	v0, r10, $out
Packit c4476c
	stvewx	v0, r11, $out
Packit c4476c
	stvewx	v0, r12, $out
Packit c4476c
	b	Lschedule_mangle_done
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
Lschedule_mangle_last_dec:
Packit c4476c
	lvx	$iptlo, r11, r12	# reload $ipt
Packit c4476c
	lvx	$ipthi, r9,  r12
Packit c4476c
	addi	$out, $out, -16		# add	\$-16,	%rdx
Packit c4476c
	vxor	v0, v0, v26		# vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
Packit c4476c
	bl	_vpaes_schedule_transform	# output transform
Packit c4476c
Packit c4476c
	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
Packit c4476c
	addi	r9, $out, -15		# -15 is not typo
Packit c4476c
	vperm	v0, v0, v0, $outperm	# rotate right/left
Packit c4476c
	li	r10, 4
Packit c4476c
	vsel	v2, $outhead, v0, $outmask
Packit c4476c
	li	r11, 8
Packit c4476c
	stvx	v2, 0, $out
Packit c4476c
	li	r12, 12
Packit c4476c
	stvewx	v0, 0, r9		# some (or all) are redundant
Packit c4476c
	stvewx	v0, r10, r9
Packit c4476c
	stvewx	v0, r11, r9
Packit c4476c
	stvewx	v0, r12, r9
Packit c4476c
Packit c4476c
Packit c4476c
Lschedule_mangle_done:
Packit c4476c
	mtlr	r7
Packit c4476c
	# cleanup
Packit c4476c
	vxor	v0, v0, v0		# vpxor		%xmm0,	%xmm0,	%xmm0
Packit c4476c
	vxor	v1, v1, v1		# vpxor		%xmm1,	%xmm1,	%xmm1
Packit c4476c
	vxor	v2, v2, v2		# vpxor		%xmm2,	%xmm2,	%xmm2
Packit c4476c
	vxor	v3, v3, v3		# vpxor		%xmm3,	%xmm3,	%xmm3
Packit c4476c
	vxor	v4, v4, v4		# vpxor		%xmm4,	%xmm4,	%xmm4
Packit c4476c
	vxor	v5, v5, v5		# vpxor		%xmm5,	%xmm5,	%xmm5
Packit c4476c
	vxor	v6, v6, v6		# vpxor		%xmm6,	%xmm6,	%xmm6
Packit c4476c
	vxor	v7, v7, v7		# vpxor		%xmm7,	%xmm7,	%xmm7
Packit c4476c
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_192_smear
Packit c4476c
##
Packit c4476c
##  Smear the short, low side in the 192-bit key schedule.
Packit c4476c
##
Packit c4476c
##  Inputs:
Packit c4476c
##    %xmm7: high side, b  a  x  y
Packit c4476c
##    %xmm6:  low side, d  c  0  0
Packit c4476c
##    %xmm13: 0
Packit c4476c
##
Packit c4476c
##  Outputs:
Packit c4476c
##    %xmm6: b+c+d  b+c  0  0
Packit c4476c
##    %xmm0: b+c+d  b+c  b  a
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_schedule_192_smear:
Packit c4476c
	?vspltw	v0, v7, 3
Packit c4476c
	?vsldoi	v1, v9, v6, 12		# vpshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
Packit c4476c
	?vsldoi	v0, v7, v0, 8		# vpshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
Packit c4476c
	vxor	v6, v6, v1		# vpxor		%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
Packit c4476c
	vxor	v6, v6, v0		# vpxor		%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
Packit c4476c
	vmr	v0, v6
Packit c4476c
	?vsldoi	v6, v6, v9, 8
Packit c4476c
	?vsldoi	v6, v9, v6, 8		# clobber low side with zeros
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_round
Packit c4476c
##
Packit c4476c
##  Runs one main round of the key schedule on %xmm0, %xmm7
Packit c4476c
##
Packit c4476c
##  Specifically, runs subbytes on the high dword of %xmm0
Packit c4476c
##  then rotates it by one byte and xors into the low dword of
Packit c4476c
##  %xmm7.
Packit c4476c
##
Packit c4476c
##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
Packit c4476c
##  next rcon.
Packit c4476c
##
Packit c4476c
##  Smears the dwords of %xmm7 by xoring the low into the
Packit c4476c
##  second low, result into third, result into highest.
Packit c4476c
##
Packit c4476c
##  Returns results in %xmm7 = %xmm0.
Packit c4476c
##  Clobbers %xmm1-%xmm4, %r11.
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_schedule_round:
Packit c4476c
	# extract rcon from xmm8
Packit c4476c
	#vxor	v4, v4, v4		# vpxor		%xmm4,	%xmm4,	%xmm4
Packit c4476c
	?vsldoi	v1, $rcon, v9, 15	# vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
Packit c4476c
	?vsldoi	$rcon, $rcon, $rcon, 15	# vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
Packit c4476c
	vxor	v7, v7, v1		# vpxor		%xmm1,	%xmm7,	%xmm7
Packit c4476c
Packit c4476c
	# rotate
Packit c4476c
	?vspltw	v0, v0, 3		# vpshufd	\$0xFF,	%xmm0,	%xmm0
Packit c4476c
	?vsldoi	v0, v0, v0, 1		# vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
Packit c4476c
Packit c4476c
	# fall through...
Packit c4476c
Packit c4476c
	# low round: same as high round, but no rotation and no rcon.
Packit c4476c
_vpaes_schedule_low_round:
Packit c4476c
	# smear xmm7
Packit c4476c
	?vsldoi	v1, v9, v7, 12		# vpslldq	\$4,	%xmm7,	%xmm1
Packit c4476c
	vxor	v7, v7, v1		# vpxor		%xmm1,	%xmm7,	%xmm7
Packit c4476c
	vspltisb	v1, 0x0f	# 0x0f..0f
Packit c4476c
	?vsldoi	v4, v9, v7, 8		# vpslldq	\$8,	%xmm7,	%xmm4
Packit c4476c
Packit c4476c
	# subbytes
Packit c4476c
	vand	v1, v1, v0		# vpand		%xmm9,	%xmm0,	%xmm1		# 0 = k
Packit c4476c
	vsrb	v0, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
Packit c4476c
	 vxor	v7, v7, v4		# vpxor		%xmm4,	%xmm7,	%xmm7
Packit c4476c
	vperm	v2, $invhi, v9, v1	# vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
Packit c4476c
	vxor	v1, v1, v0		# vpxor		%xmm0,	%xmm1,	%xmm1		# 0 = j
Packit c4476c
	vperm	v3, $invlo, v9, v0	# vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
Packit c4476c
	vperm	v4, $invlo, v9, v1	# vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
Packit c4476c
	 vxor	v7, v7, v26		# vpxor		.Lk_s63(%rip),	%xmm7,	%xmm7
Packit c4476c
	vperm	v3, $invlo, v9, v3	# vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
Packit c4476c
	vxor	v4, v4, v2		# vpxor		%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
Packit c4476c
	vperm	v2, $invlo, v9, v4	# vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
Packit c4476c
	vxor	v3, v3, v1		# vpxor		%xmm1,	%xmm3,	%xmm3		# 2 = io
Packit c4476c
	vxor	v2, v2, v0		# vpxor		%xmm0,	%xmm2,	%xmm2		# 3 = jo
Packit c4476c
	vperm	v4, v15, v9, v3		# vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
Packit c4476c
	vperm	v1, v14, v9, v2		# vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
Packit c4476c
	vxor	v1, v1, v4		# vpxor		%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
Packit c4476c
Packit c4476c
	# add in smeared stuff
Packit c4476c
	vxor	v0, v1, v7		# vpxor		%xmm7,	%xmm1,	%xmm0
Packit c4476c
	vxor	v7, v1, v7		# vmovdqa	%xmm0,	%xmm7
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_transform
Packit c4476c
##
Packit c4476c
##  Linear-transform %xmm0 according to tables at (%r11)
Packit c4476c
##
Packit c4476c
##  Requires that %xmm9 = 0x0F0F... as in preheat
Packit c4476c
##  Output in %xmm0
Packit c4476c
##  Clobbers %xmm2
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_schedule_transform:
Packit c4476c
	#vand	v1, v0, v9		# vpand		%xmm9,	%xmm0,	%xmm1
Packit c4476c
	vsrb	v2, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
Packit c4476c
					# vmovdqa	(%r11),	%xmm2 	# lo
Packit c4476c
	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm2
Packit c4476c
					# vmovdqa	16(%r11),	%xmm1 # hi
Packit c4476c
	vperm	v2, $ipthi, $ipthi, v2	# vpshufb	%xmm0,	%xmm1,	%xmm0
Packit c4476c
	vxor	v0, v0, v2		# vpxor		%xmm2,	%xmm0,	%xmm0
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
##
Packit c4476c
##  .aes_schedule_mangle
Packit c4476c
##
Packit c4476c
##  Mangle xmm0 from (basis-transformed) standard version
Packit c4476c
##  to our version.
Packit c4476c
##
Packit c4476c
##  On encrypt,
Packit c4476c
##    xor with 0x63
Packit c4476c
##    multiply by circulant 0,1,1,1
Packit c4476c
##    apply shiftrows transform
Packit c4476c
##
Packit c4476c
##  On decrypt,
Packit c4476c
##    xor with 0x63
Packit c4476c
##    multiply by "inverse mixcolumns" circulant E,B,D,9
Packit c4476c
##    deskew
Packit c4476c
##    apply shiftrows transform
Packit c4476c
##
Packit c4476c
##
Packit c4476c
##  Writes out to (%rdx), and increments or decrements it
Packit c4476c
##  Keeps track of round number mod 4 in %r8
Packit c4476c
##  Preserves xmm0
Packit c4476c
##  Clobbers xmm1-xmm5
Packit c4476c
##
Packit c4476c
.align	4
Packit c4476c
_vpaes_schedule_mangle:
Packit c4476c
	#vmr	v4, v0			# vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
Packit c4476c
					# vmovdqa	.Lk_mc_forward(%rip),%xmm5
Packit c4476c
	bne	$dir, Lschedule_mangle_dec
Packit c4476c
Packit c4476c
	# encrypting
Packit c4476c
	vxor	v4, v0, v26		# vpxor	.Lk_s63(%rip),	%xmm0,	%xmm4
Packit c4476c
	addi	$out, $out, 16		# add	\$16,	%rdx
Packit c4476c
	vperm	v4, v4, v4, v25		# vpshufb	%xmm5,	%xmm4,	%xmm4
Packit c4476c
	vperm	v1, v4, v4, v25		# vpshufb	%xmm5,	%xmm4,	%xmm1
Packit c4476c
	vperm	v3, v1, v1, v25		# vpshufb	%xmm5,	%xmm1,	%xmm3
Packit c4476c
	vxor	v4, v4, v1		# vpxor		%xmm1,	%xmm4,	%xmm4
Packit c4476c
	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
Packit c4476c
	vxor	v3, v3, v4		# vpxor		%xmm4,	%xmm3,	%xmm3
Packit c4476c
Packit c4476c
	vperm	v3, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
	addi	r8, r8, -16		# add	\$-16,	%r8
Packit c4476c
	andi.	r8, r8, 0x30		# and	\$0x30,	%r8
Packit c4476c
Packit c4476c
	#stvx	v3, 0, $out		# vmovdqu	%xmm3,	(%rdx)
Packit c4476c
	vperm	v1, v3, v3, $outperm	# rotate right/left
Packit c4476c
	vsel	v2, $outhead, v1, $outmask
Packit c4476c
	vmr	$outhead, v1
Packit c4476c
	stvx	v2, 0, $out
Packit c4476c
	blr
Packit c4476c
Packit c4476c
.align	4
Packit c4476c
Lschedule_mangle_dec:
Packit c4476c
	# inverse mix columns
Packit c4476c
					# lea	.Lk_dksd(%rip),%r11
Packit c4476c
	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm4,	%xmm1	# 1 = hi
Packit c4476c
	#and	v4, v0, v9		# vpand		%xmm9,	%xmm4,	%xmm4	# 4 = lo
Packit c4476c
Packit c4476c
					# vmovdqa	0x00(%r11),	%xmm2
Packit c4476c
	vperm	v2, v16, v16, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
Packit c4476c
					# vmovdqa	0x10(%r11),	%xmm3
Packit c4476c
	vperm	v3, v17, v17, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
Packit c4476c
	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
Packit c4476c
Packit c4476c
					# vmovdqa	0x20(%r11),	%xmm2
Packit c4476c
	vperm	v2, v18, v18, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
Packit c4476c
	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
Packit c4476c
					# vmovdqa	0x30(%r11),	%xmm3
Packit c4476c
	vperm	v3, v19, v19, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
Packit c4476c
	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
Packit c4476c
Packit c4476c
					# vmovdqa	0x40(%r11),	%xmm2
Packit c4476c
	vperm	v2, v20, v20, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
Packit c4476c
	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
Packit c4476c
					# vmovdqa	0x50(%r11),	%xmm3
Packit c4476c
	vperm	v3, v21, v21, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
Packit c4476c
Packit c4476c
					# vmovdqa	0x60(%r11),	%xmm2
Packit c4476c
	vperm	v2, v22, v22, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
Packit c4476c
	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
Packit c4476c
					# vmovdqa	0x70(%r11),	%xmm4
Packit c4476c
	vperm	v4, v23, v23, v1	# vpshufb	%xmm1,	%xmm4,	%xmm4
Packit c4476c
	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
Packit c4476c
	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
Packit c4476c
	vxor	v3, v4, v2		# vpxor		%xmm2,	%xmm4,	%xmm3
Packit c4476c
Packit c4476c
	addi	$out, $out, -16		# add	\$-16,	%rdx
Packit c4476c
Packit c4476c
	vperm	v3, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
Packit c4476c
	addi	r8, r8, -16		# add	\$-16,	%r8
Packit c4476c
	andi.	r8, r8, 0x30		# and	\$0x30,	%r8
Packit c4476c
Packit c4476c
	#stvx	v3, 0, $out		# vmovdqu	%xmm3,	(%rdx)
Packit c4476c
	vperm	v1, v3, v3, $outperm	# rotate right/left
Packit c4476c
	vsel	v2, $outhead, v1, $outmask
Packit c4476c
	vmr	$outhead, v1
Packit c4476c
	stvx	v2, 0, $out
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
Packit c4476c
.globl	.vpaes_set_encrypt_key
Packit c4476c
.align	5
Packit c4476c
.vpaes_set_encrypt_key:
Packit c4476c
	$STU	$sp,-$FRAME($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mflr	r0
Packit c4476c
	mfspr	r6, 256			# save vrsave
Packit c4476c
	stvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v30,r10,$sp
Packit c4476c
	stvx	v31,r11,$sp
Packit c4476c
	stw	r6,`$FRAME-4`($sp)	# save vrsave
Packit c4476c
	li	r7, -1
Packit c4476c
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
Packit c4476c
	mtspr	256, r7			# preserve all AltiVec registers
Packit c4476c
Packit c4476c
	srwi	r9, $bits, 5		# shr	\$5,%eax
Packit c4476c
	addi	r9, r9, 6		# add	\$5,%eax
Packit c4476c
	stw	r9, 240($out)		# mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
Packit c4476c
Packit c4476c
	cmplw	$dir, $bits, $bits	# set encrypt direction
Packit c4476c
	li	r8, 0x30		# mov	\$0x30,%r8d
Packit c4476c
	bl	_vpaes_schedule_core
Packit c4476c
Packit c4476c
	$POP	r0, `$FRAME+$LRSAVE`($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mtspr	256, r6			# restore vrsave
Packit c4476c
	mtlr	r0
Packit c4476c
	xor	r3, r3, r3
Packit c4476c
	lvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v30,r10,$sp
Packit c4476c
	lvx	v31,r11,$sp
Packit c4476c
	addi	$sp,$sp,$FRAME
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x04,1,0x80,0,3,0
Packit c4476c
	.long	0
Packit c4476c
.size	.vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
Packit c4476c
Packit c4476c
.globl	.vpaes_set_decrypt_key
Packit c4476c
.align	4
Packit c4476c
.vpaes_set_decrypt_key:
Packit c4476c
	$STU	$sp,-$FRAME($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mflr	r0
Packit c4476c
	mfspr	r6, 256			# save vrsave
Packit c4476c
	stvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	stvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	stvx	v30,r10,$sp
Packit c4476c
	stvx	v31,r11,$sp
Packit c4476c
	stw	r6,`$FRAME-4`($sp)	# save vrsave
Packit c4476c
	li	r7, -1
Packit c4476c
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
Packit c4476c
	mtspr	256, r7			# preserve all AltiVec registers
Packit c4476c
Packit c4476c
	srwi	r9, $bits, 5		# shr	\$5,%eax
Packit c4476c
	addi	r9, r9, 6		# add	\$5,%eax
Packit c4476c
	stw	r9, 240($out)		# mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
Packit c4476c
Packit c4476c
	slwi	r9, r9, 4		# shl	\$4,%eax
Packit c4476c
	add	$out, $out, r9		# lea	(%rdx,%rax),%rdx
Packit c4476c
Packit c4476c
	cmplwi	$dir, $bits, 0		# set decrypt direction
Packit c4476c
	srwi	r8, $bits, 1		# shr	\$1,%r8d
Packit c4476c
	andi.	r8, r8, 32		# and	\$32,%r8d
Packit c4476c
	xori	r8, r8, 32		# xor	\$32,%r8d	# nbits==192?0:32
Packit c4476c
	bl	_vpaes_schedule_core
Packit c4476c
Packit c4476c
	$POP	r0,  `$FRAME+$LRSAVE`($sp)
Packit c4476c
	li	r10,`15+6*$SIZE_T`
Packit c4476c
	li	r11,`31+6*$SIZE_T`
Packit c4476c
	mtspr	256, r6			# restore vrsave
Packit c4476c
	mtlr	r0
Packit c4476c
	xor	r3, r3, r3
Packit c4476c
	lvx	v20,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v21,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v22,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v23,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v24,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v25,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v26,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v27,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v28,r10,$sp
Packit c4476c
	addi	r10,r10,32
Packit c4476c
	lvx	v29,r11,$sp
Packit c4476c
	addi	r11,r11,32
Packit c4476c
	lvx	v30,r10,$sp
Packit c4476c
	lvx	v31,r11,$sp
Packit c4476c
	addi	$sp,$sp,$FRAME
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x04,1,0x80,0,3,0
Packit c4476c
	.long	0
Packit c4476c
.size	.vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
my $consts=1;
Packit c4476c
foreach  (split("\n",$code)) {
Packit c4476c
	s/\`([^\`]*)\`/eval $1/geo;
Packit c4476c
Packit c4476c
	# constants table endian-specific conversion
Packit c4476c
	if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
Packit c4476c
	    my $conv=$2;
Packit c4476c
	    my @bytes=();
Packit c4476c
Packit c4476c
	    # convert to endian-agnostic format
Packit c4476c
	    foreach (split(/,\s+/,$1)) {
Packit c4476c
		my $l = /^0/?oct:int;
Packit c4476c
		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
Packit c4476c
	    }
Packit c4476c
Packit c4476c
	    # little-endian conversion
Packit c4476c
	    if ($flavour =~ /le$/o) {
Packit c4476c
		SWITCH: for($conv)  {
Packit c4476c
		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
Packit c4476c
		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
Packit c4476c
		}
Packit c4476c
	    }
Packit c4476c
Packit c4476c
	    #emit
Packit c4476c
	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
Packit c4476c
	    next;
Packit c4476c
	}
Packit c4476c
	$consts=0 if (m/Lconsts:/o);	# end of table
Packit c4476c
Packit c4476c
	# instructions prefixed with '?' are endian-specific and need
Packit c4476c
	# to be adjusted accordingly...
Packit c4476c
	if ($flavour =~ /le$/o) {	# little-endian
Packit c4476c
	    s/\?lvsr/lvsl/o or
Packit c4476c
	    s/\?lvsl/lvsr/o or
Packit c4476c
	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
Packit c4476c
	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
Packit c4476c
	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
Packit c4476c
	} else {			# big-endian
Packit c4476c
	    s/\?([a-z]+)/$1/o;
Packit c4476c
	}
Packit c4476c
Packit c4476c
	print $_,"\n";
Packit c4476c
}
Packit c4476c
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";