Blame crypto/aes/asm/aes-s390x.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
Packit c4476c
# AES for s390x.
Packit c4476c
Packit c4476c
# April 2007.
Packit c4476c
#
Packit c4476c
# Software performance improvement over gcc-generated code is ~70% and
Packit c4476c
# in absolute terms is ~73 cycles per byte processed with 128-bit key.
Packit c4476c
# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
Packit c4476c
# *strictly* in-order execution and issued instruction [in this case
Packit c4476c
# load value from memory is critical] has to complete before execution
Packit c4476c
# flow proceeds. S-boxes are compressed to 2KB[+256B].
Packit c4476c
#
Packit c4476c
# As for hardware acceleration support. It's basically a "teaser," as
Packit c4476c
# it can and should be improved in several ways. Most notably support
Packit c4476c
# for CBC is not utilized, nor multiple blocks are ever processed.
Packit c4476c
# Then software key schedule can be postponed till hardware support
Packit c4476c
# detection... Performance improvement over assembler is reportedly
Packit c4476c
# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
Packit c4476c
# support is implemented.
Packit c4476c
Packit c4476c
# May 2007.
Packit c4476c
#
Packit c4476c
# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
Packit c4476c
# for 128-bit keys, if hardware support is detected.
Packit c4476c
Packit c4476c
# January 2009.
Packit c4476c
#
Packit c4476c
# Add support for hardware AES192/256 and reschedule instructions to
Packit c4476c
# minimize/avoid Address Generation Interlock hazard and to favour
Packit c4476c
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
Packit c4476c
# almost 50% on z9. The gain is smaller on z10, because being dual-
Packit c4476c
# issue z10 makes it impossible to eliminate the interlock condition:
Packit c4476c
# critical path is not long enough. Yet it spends ~24 cycles per byte
Packit c4476c
# processed with 128-bit key.
Packit c4476c
#
Packit c4476c
# Unlike previous version hardware support detection takes place only
Packit c4476c
# at the moment of key schedule setup, which is denoted in key->rounds.
Packit c4476c
# This is done, because deferred key setup can't be made MT-safe, not
Packit c4476c
# for keys longer than 128 bits.
Packit c4476c
#
Packit c4476c
# Add AES_cbc_encrypt, which gives incredible performance improvement,
Packit c4476c
# it was measured to be ~6.6x. It's less than previously mentioned 8x,
Packit c4476c
# because software implementation was optimized.
Packit c4476c
Packit c4476c
# May 2010.
Packit c4476c
#
Packit c4476c
# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
Packit c4476c
# performance improvement over "generic" counter mode routine relying
Packit c4476c
# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
Packit c4476c
# to the fact that exact throughput value depends on current stack
Packit c4476c
# frame alignment within 4KB page. In worst case you get ~75% of the
Packit c4476c
# maximum, but *on average* it would be as much as ~98%. Meaning that
Packit c4476c
# worst case is unlike, it's like hitting ravine on plateau.
Packit c4476c
Packit c4476c
# November 2010.
Packit c4476c
#
Packit c4476c
# Adapt for -m31 build. If kernel supports what's called "highgprs"
Packit c4476c
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
Packit c4476c
# instructions and achieve "64-bit" performance even in 31-bit legacy
Packit c4476c
# application context. The feature is not specific to any particular
Packit c4476c
# processor, as long as it's "z-CPU". Latter implies that the code
Packit c4476c
# remains z/Architecture specific. On z990 it was measured to perform
Packit c4476c
# 2x better than code generated by gcc 4.3.
Packit c4476c
Packit c4476c
# December 2010.
Packit c4476c
#
Packit c4476c
# Add support for z196 "cipher message with counter" instruction.
Packit c4476c
# Note however that it's disengaged, because it was measured to
Packit c4476c
# perform ~12% worse than vanilla km-based code...
Packit c4476c
Packit c4476c
# February 2011.
Packit c4476c
#
Packit c4476c
# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
Packit c4476c
# instructions, which deliver ~70% improvement at 8KB block size over
Packit c4476c
# vanilla km-based code, 37% - at most like 512-bytes block size.
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
Packit c4476c
if ($flavour =~ /3[12]/) {
Packit c4476c
	$SIZE_T=4;
Packit c4476c
	$g="";
Packit c4476c
} else {
Packit c4476c
	$SIZE_T=8;
Packit c4476c
	$g="g";
Packit c4476c
}
Packit c4476c
Packit c4476c
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
Packit c4476c
open STDOUT,">$output";
Packit c4476c
Packit c4476c
$softonly=0;	# allow hardware support
Packit c4476c
Packit c4476c
$t0="%r0";	$mask="%r0";
Packit c4476c
$t1="%r1";
Packit c4476c
$t2="%r2";	$inp="%r2";
Packit c4476c
$t3="%r3";	$out="%r3";	$bits="%r3";
Packit c4476c
$key="%r4";
Packit c4476c
$i1="%r5";
Packit c4476c
$i2="%r6";
Packit c4476c
$i3="%r7";
Packit c4476c
$s0="%r8";
Packit c4476c
$s1="%r9";
Packit c4476c
$s2="%r10";
Packit c4476c
$s3="%r11";
Packit c4476c
$tbl="%r12";
Packit c4476c
$rounds="%r13";
Packit c4476c
$ra="%r14";
Packit c4476c
$sp="%r15";
Packit c4476c
Packit c4476c
$stdframe=16*$SIZE_T+4*8;
Packit c4476c
Packit c4476c
sub _data_word()
Packit c4476c
{ my $i;
Packit c4476c
    while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
Packit c4476c
}
Packit c4476c
Packit c4476c
$code=<<___;
Packit c4476c
#include "s390x_arch.h"
Packit c4476c
Packit c4476c
.text
Packit c4476c
Packit c4476c
.type	AES_Te,\@object
Packit c4476c
.align	256
Packit c4476c
AES_Te:
Packit c4476c
___
Packit c4476c
&_data_word(
Packit c4476c
	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
Packit c4476c
	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
Packit c4476c
	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
Packit c4476c
	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
Packit c4476c
	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
Packit c4476c
	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
Packit c4476c
	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
Packit c4476c
	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
Packit c4476c
	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
Packit c4476c
	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
Packit c4476c
	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
Packit c4476c
	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
Packit c4476c
	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
Packit c4476c
	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
Packit c4476c
	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
Packit c4476c
	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
Packit c4476c
	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
Packit c4476c
	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
Packit c4476c
	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
Packit c4476c
	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
Packit c4476c
	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
Packit c4476c
	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
Packit c4476c
	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
Packit c4476c
	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
Packit c4476c
	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
Packit c4476c
	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
Packit c4476c
	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
Packit c4476c
	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
Packit c4476c
	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
Packit c4476c
	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
Packit c4476c
	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
Packit c4476c
	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
Packit c4476c
	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
Packit c4476c
	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
Packit c4476c
	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
Packit c4476c
	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
Packit c4476c
	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
Packit c4476c
	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
Packit c4476c
	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
Packit c4476c
	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
Packit c4476c
	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
Packit c4476c
	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
Packit c4476c
	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
Packit c4476c
	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
Packit c4476c
	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
Packit c4476c
	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
Packit c4476c
	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
Packit c4476c
	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
Packit c4476c
	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
Packit c4476c
	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
Packit c4476c
	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
Packit c4476c
	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
Packit c4476c
	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
Packit c4476c
	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
Packit c4476c
	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
Packit c4476c
	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
Packit c4476c
	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
Packit c4476c
	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
Packit c4476c
	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
Packit c4476c
	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
Packit c4476c
	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
Packit c4476c
	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
Packit c4476c
	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
Packit c4476c
	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
Packit c4476c
$code.=<<___;
Packit c4476c
# Te4[256]
Packit c4476c
.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
Packit c4476c
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
Packit c4476c
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
Packit c4476c
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
Packit c4476c
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
Packit c4476c
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
Packit c4476c
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
Packit c4476c
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
Packit c4476c
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
Packit c4476c
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
Packit c4476c
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
Packit c4476c
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
Packit c4476c
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
Packit c4476c
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
Packit c4476c
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
Packit c4476c
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
Packit c4476c
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
Packit c4476c
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
Packit c4476c
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
Packit c4476c
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
Packit c4476c
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
Packit c4476c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
Packit c4476c
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
Packit c4476c
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
Packit c4476c
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
Packit c4476c
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
Packit c4476c
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
Packit c4476c
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
Packit c4476c
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
Packit c4476c
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
Packit c4476c
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
Packit c4476c
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
Packit c4476c
# rcon[]
Packit c4476c
.long	0x01000000, 0x02000000, 0x04000000, 0x08000000
Packit c4476c
.long	0x10000000, 0x20000000, 0x40000000, 0x80000000
Packit c4476c
.long	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
Packit c4476c
.align	256
Packit c4476c
.size	AES_Te,.-AES_Te
Packit c4476c
Packit c4476c
# void AES_encrypt(const unsigned char *inp, unsigned char *out,
Packit c4476c
# 		 const AES_KEY *key) {
Packit c4476c
.globl	AES_encrypt
Packit c4476c
.type	AES_encrypt,\@function
Packit c4476c
AES_encrypt:
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	l	%r0,240($key)
Packit c4476c
	lhi	%r1,16
Packit c4476c
	clr	%r0,%r1
Packit c4476c
	jl	.Lesoft
Packit c4476c
Packit c4476c
	la	%r1,0($key)
Packit c4476c
	#la	%r2,0($inp)
Packit c4476c
	la	%r4,0($out)
Packit c4476c
	lghi	%r3,16		# single block length
Packit c4476c
	.long	0xb92e0042	# km %r4,%r2
Packit c4476c
	brc	1,.-4		# can this happen?
Packit c4476c
	br	%r14
Packit c4476c
.align	64
Packit c4476c
.Lesoft:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	%r3,$ra,3*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	llgf	$s0,0($inp)
Packit c4476c
	llgf	$s1,4($inp)
Packit c4476c
	llgf	$s2,8($inp)
Packit c4476c
	llgf	$s3,12($inp)
Packit c4476c
Packit c4476c
	larl	$tbl,AES_Te
Packit c4476c
	bras	$ra,_s390x_AES_encrypt
Packit c4476c
Packit c4476c
	l${g}	$out,3*$SIZE_T($sp)
Packit c4476c
	st	$s0,0($out)
Packit c4476c
	st	$s1,4($out)
Packit c4476c
	st	$s2,8($out)
Packit c4476c
	st	$s3,12($out)
Packit c4476c
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_encrypt,.-AES_encrypt
Packit c4476c
Packit c4476c
.type   _s390x_AES_encrypt,\@function
Packit c4476c
.align	16
Packit c4476c
_s390x_AES_encrypt:
Packit c4476c
	st${g}	$ra,15*$SIZE_T($sp)
Packit c4476c
	x	$s0,0($key)
Packit c4476c
	x	$s1,4($key)
Packit c4476c
	x	$s2,8($key)
Packit c4476c
	x	$s3,12($key)
Packit c4476c
	l	$rounds,240($key)
Packit c4476c
	llill	$mask,`0xff<<3`
Packit c4476c
	aghi	$rounds,-1
Packit c4476c
	j	.Lenc_loop
Packit c4476c
.align	16
Packit c4476c
.Lenc_loop:
Packit c4476c
	sllg	$t1,$s0,`0+3`
Packit c4476c
	srlg	$t2,$s0,`8-3`
Packit c4476c
	srlg	$t3,$s0,`16-3`
Packit c4476c
	srl	$s0,`24-3`
Packit c4476c
	nr	$s0,$mask
Packit c4476c
	ngr	$t1,$mask
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	nr	$t3,$mask
Packit c4476c
Packit c4476c
	srlg	$i1,$s1,`16-3`	# i0
Packit c4476c
	sllg	$i2,$s1,`0+3`
Packit c4476c
	srlg	$i3,$s1,`8-3`
Packit c4476c
	srl	$s1,`24-3`
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$s1,$mask
Packit c4476c
	ngr	$i2,$mask
Packit c4476c
	nr	$i3,$mask
Packit c4476c
Packit c4476c
	l	$s0,0($s0,$tbl)	# Te0[s0>>24]
Packit c4476c
	l	$t1,1($t1,$tbl)	# Te3[s0>>0]
Packit c4476c
	l	$t2,2($t2,$tbl) # Te2[s0>>8]
Packit c4476c
	l	$t3,3($t3,$tbl)	# Te1[s0>>16]
Packit c4476c
Packit c4476c
	x	$s0,3($i1,$tbl)	# Te1[s1>>16]
Packit c4476c
	l	$s1,0($s1,$tbl)	# Te0[s1>>24]
Packit c4476c
	x	$t2,1($i2,$tbl)	# Te3[s1>>0]
Packit c4476c
	x	$t3,2($i3,$tbl)	# Te2[s1>>8]
Packit c4476c
Packit c4476c
	srlg	$i1,$s2,`8-3`	# i0
Packit c4476c
	srlg	$i2,$s2,`16-3`	# i1
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
	sllg	$i3,$s2,`0+3`
Packit c4476c
	srl	$s2,`24-3`
Packit c4476c
	nr	$s2,$mask
Packit c4476c
	ngr	$i3,$mask
Packit c4476c
Packit c4476c
	xr	$s1,$t1
Packit c4476c
	srlg	$ra,$s3,`8-3`	# i1
Packit c4476c
	sllg	$t1,$s3,`0+3`	# i0
Packit c4476c
	nr	$ra,$mask
Packit c4476c
	la	$key,16($key)
Packit c4476c
	ngr	$t1,$mask
Packit c4476c
Packit c4476c
	x	$s0,2($i1,$tbl)	# Te2[s2>>8]
Packit c4476c
	x	$s1,3($i2,$tbl)	# Te1[s2>>16]
Packit c4476c
	l	$s2,0($s2,$tbl)	# Te0[s2>>24]
Packit c4476c
	x	$t3,1($i3,$tbl)	# Te3[s2>>0]
Packit c4476c
Packit c4476c
	srlg	$i3,$s3,`16-3`	# i2
Packit c4476c
	xr	$s2,$t2
Packit c4476c
	srl	$s3,`24-3`
Packit c4476c
	nr	$i3,$mask
Packit c4476c
	nr	$s3,$mask
Packit c4476c
Packit c4476c
	x	$s0,0($key)
Packit c4476c
	x	$s1,4($key)
Packit c4476c
	x	$s2,8($key)
Packit c4476c
	x	$t3,12($key)
Packit c4476c
Packit c4476c
	x	$s0,1($t1,$tbl)	# Te3[s3>>0]
Packit c4476c
	x	$s1,2($ra,$tbl)	# Te2[s3>>8]
Packit c4476c
	x	$s2,3($i3,$tbl)	# Te1[s3>>16]
Packit c4476c
	l	$s3,0($s3,$tbl)	# Te0[s3>>24]
Packit c4476c
	xr	$s3,$t3
Packit c4476c
Packit c4476c
	brct	$rounds,.Lenc_loop
Packit c4476c
	.align	16
Packit c4476c
Packit c4476c
	sllg	$t1,$s0,`0+3`
Packit c4476c
	srlg	$t2,$s0,`8-3`
Packit c4476c
	ngr	$t1,$mask
Packit c4476c
	srlg	$t3,$s0,`16-3`
Packit c4476c
	srl	$s0,`24-3`
Packit c4476c
	nr	$s0,$mask
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	nr	$t3,$mask
Packit c4476c
Packit c4476c
	srlg	$i1,$s1,`16-3`	# i0
Packit c4476c
	sllg	$i2,$s1,`0+3`
Packit c4476c
	ngr	$i2,$mask
Packit c4476c
	srlg	$i3,$s1,`8-3`
Packit c4476c
	srl	$s1,`24-3`
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$s1,$mask
Packit c4476c
	nr	$i3,$mask
Packit c4476c
Packit c4476c
	llgc	$s0,2($s0,$tbl)	# Te4[s0>>24]
Packit c4476c
	llgc	$t1,2($t1,$tbl)	# Te4[s0>>0]
Packit c4476c
	sll	$s0,24
Packit c4476c
	llgc	$t2,2($t2,$tbl)	# Te4[s0>>8]
Packit c4476c
	llgc	$t3,2($t3,$tbl)	# Te4[s0>>16]
Packit c4476c
	sll	$t2,8
Packit c4476c
	sll	$t3,16
Packit c4476c
Packit c4476c
	llgc	$i1,2($i1,$tbl)	# Te4[s1>>16]
Packit c4476c
	llgc	$s1,2($s1,$tbl)	# Te4[s1>>24]
Packit c4476c
	llgc	$i2,2($i2,$tbl)	# Te4[s1>>0]
Packit c4476c
	llgc	$i3,2($i3,$tbl)	# Te4[s1>>8]
Packit c4476c
	sll	$i1,16
Packit c4476c
	sll	$s1,24
Packit c4476c
	sll	$i3,8
Packit c4476c
	or	$s0,$i1
Packit c4476c
	or	$s1,$t1
Packit c4476c
	or	$t2,$i2
Packit c4476c
	or	$t3,$i3
Packit c4476c
Packit c4476c
	srlg	$i1,$s2,`8-3`	# i0
Packit c4476c
	srlg	$i2,$s2,`16-3`	# i1
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
	sllg	$i3,$s2,`0+3`
Packit c4476c
	srl	$s2,`24-3`
Packit c4476c
	ngr	$i3,$mask
Packit c4476c
	nr	$s2,$mask
Packit c4476c
Packit c4476c
	sllg	$t1,$s3,`0+3`	# i0
Packit c4476c
	srlg	$ra,$s3,`8-3`	# i1
Packit c4476c
	ngr	$t1,$mask
Packit c4476c
Packit c4476c
	llgc	$i1,2($i1,$tbl)	# Te4[s2>>8]
Packit c4476c
	llgc	$i2,2($i2,$tbl)	# Te4[s2>>16]
Packit c4476c
	sll	$i1,8
Packit c4476c
	llgc	$s2,2($s2,$tbl)	# Te4[s2>>24]
Packit c4476c
	llgc	$i3,2($i3,$tbl)	# Te4[s2>>0]
Packit c4476c
	sll	$i2,16
Packit c4476c
	nr	$ra,$mask
Packit c4476c
	sll	$s2,24
Packit c4476c
	or	$s0,$i1
Packit c4476c
	or	$s1,$i2
Packit c4476c
	or	$s2,$t2
Packit c4476c
	or	$t3,$i3
Packit c4476c
Packit c4476c
	srlg	$i3,$s3,`16-3`	# i2
Packit c4476c
	srl	$s3,`24-3`
Packit c4476c
	nr	$i3,$mask
Packit c4476c
	nr	$s3,$mask
Packit c4476c
Packit c4476c
	l	$t0,16($key)
Packit c4476c
	l	$t2,20($key)
Packit c4476c
Packit c4476c
	llgc	$i1,2($t1,$tbl)	# Te4[s3>>0]
Packit c4476c
	llgc	$i2,2($ra,$tbl)	# Te4[s3>>8]
Packit c4476c
	llgc	$i3,2($i3,$tbl)	# Te4[s3>>16]
Packit c4476c
	llgc	$s3,2($s3,$tbl)	# Te4[s3>>24]
Packit c4476c
	sll	$i2,8
Packit c4476c
	sll	$i3,16
Packit c4476c
	sll	$s3,24
Packit c4476c
	or	$s0,$i1
Packit c4476c
	or	$s1,$i2
Packit c4476c
	or	$s2,$i3
Packit c4476c
	or	$s3,$t3
Packit c4476c
Packit c4476c
	l${g}	$ra,15*$SIZE_T($sp)
Packit c4476c
	xr	$s0,$t0
Packit c4476c
	xr	$s1,$t2
Packit c4476c
	x	$s2,24($key)
Packit c4476c
	x	$s3,28($key)
Packit c4476c
Packit c4476c
	br	$ra
Packit c4476c
.size	_s390x_AES_encrypt,.-_s390x_AES_encrypt
Packit c4476c
___
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.type	AES_Td,\@object
Packit c4476c
.align	256
Packit c4476c
AES_Td:
Packit c4476c
___
Packit c4476c
&_data_word(
Packit c4476c
	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
Packit c4476c
	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
Packit c4476c
	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
Packit c4476c
	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
Packit c4476c
	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
Packit c4476c
	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
Packit c4476c
	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
Packit c4476c
	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
Packit c4476c
	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
Packit c4476c
	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
Packit c4476c
	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
Packit c4476c
	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
Packit c4476c
	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
Packit c4476c
	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
Packit c4476c
	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
Packit c4476c
	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
Packit c4476c
	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
Packit c4476c
	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
Packit c4476c
	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
Packit c4476c
	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
Packit c4476c
	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
Packit c4476c
	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
Packit c4476c
	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
Packit c4476c
	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
Packit c4476c
	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
Packit c4476c
	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
Packit c4476c
	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
Packit c4476c
	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
Packit c4476c
	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
Packit c4476c
	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
Packit c4476c
	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
Packit c4476c
	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
Packit c4476c
	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
Packit c4476c
	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
Packit c4476c
	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
Packit c4476c
	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
Packit c4476c
	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
Packit c4476c
	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
Packit c4476c
	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
Packit c4476c
	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
Packit c4476c
	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
Packit c4476c
	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
Packit c4476c
	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
Packit c4476c
	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
Packit c4476c
	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
Packit c4476c
	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
Packit c4476c
	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
Packit c4476c
	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
Packit c4476c
	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
Packit c4476c
	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
Packit c4476c
	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
Packit c4476c
	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
Packit c4476c
	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
Packit c4476c
	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
Packit c4476c
	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
Packit c4476c
	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
Packit c4476c
	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
Packit c4476c
	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
Packit c4476c
	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
Packit c4476c
	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
Packit c4476c
	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
Packit c4476c
	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
Packit c4476c
	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
Packit c4476c
	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
Packit c4476c
$code.=<<___;
Packit c4476c
# Td4[256]
Packit c4476c
.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
Packit c4476c
.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
Packit c4476c
.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
Packit c4476c
.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
Packit c4476c
.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
Packit c4476c
.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
Packit c4476c
.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
Packit c4476c
.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
Packit c4476c
.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
Packit c4476c
.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
Packit c4476c
.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
Packit c4476c
.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
Packit c4476c
.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
Packit c4476c
.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
Packit c4476c
.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
Packit c4476c
.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
Packit c4476c
.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
Packit c4476c
.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
Packit c4476c
.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
Packit c4476c
.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
Packit c4476c
.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
Packit c4476c
.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
Packit c4476c
.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
Packit c4476c
.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
Packit c4476c
.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
Packit c4476c
.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
Packit c4476c
.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
Packit c4476c
.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
Packit c4476c
.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
Packit c4476c
.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
Packit c4476c
.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
Packit c4476c
.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
Packit c4476c
.size	AES_Td,.-AES_Td
Packit c4476c
Packit c4476c
# void AES_decrypt(const unsigned char *inp, unsigned char *out,
Packit c4476c
# 		 const AES_KEY *key) {
Packit c4476c
.globl	AES_decrypt
Packit c4476c
.type	AES_decrypt,\@function
Packit c4476c
AES_decrypt:
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	l	%r0,240($key)
Packit c4476c
	lhi	%r1,16
Packit c4476c
	clr	%r0,%r1
Packit c4476c
	jl	.Ldsoft
Packit c4476c
Packit c4476c
	la	%r1,0($key)
Packit c4476c
	#la	%r2,0($inp)
Packit c4476c
	la	%r4,0($out)
Packit c4476c
	lghi	%r3,16		# single block length
Packit c4476c
	.long	0xb92e0042	# km %r4,%r2
Packit c4476c
	brc	1,.-4		# can this happen?
Packit c4476c
	br	%r14
Packit c4476c
.align	64
Packit c4476c
.Ldsoft:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	%r3,$ra,3*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	llgf	$s0,0($inp)
Packit c4476c
	llgf	$s1,4($inp)
Packit c4476c
	llgf	$s2,8($inp)
Packit c4476c
	llgf	$s3,12($inp)
Packit c4476c
Packit c4476c
	larl	$tbl,AES_Td
Packit c4476c
	bras	$ra,_s390x_AES_decrypt
Packit c4476c
Packit c4476c
	l${g}	$out,3*$SIZE_T($sp)
Packit c4476c
	st	$s0,0($out)
Packit c4476c
	st	$s1,4($out)
Packit c4476c
	st	$s2,8($out)
Packit c4476c
	st	$s3,12($out)
Packit c4476c
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_decrypt,.-AES_decrypt
Packit c4476c
Packit c4476c
.type   _s390x_AES_decrypt,\@function
Packit c4476c
.align	16
Packit c4476c
_s390x_AES_decrypt:
Packit c4476c
	st${g}	$ra,15*$SIZE_T($sp)
Packit c4476c
	x	$s0,0($key)
Packit c4476c
	x	$s1,4($key)
Packit c4476c
	x	$s2,8($key)
Packit c4476c
	x	$s3,12($key)
Packit c4476c
	l	$rounds,240($key)
Packit c4476c
	llill	$mask,`0xff<<3`
Packit c4476c
	aghi	$rounds,-1
Packit c4476c
	j	.Ldec_loop
Packit c4476c
.align	16
Packit c4476c
.Ldec_loop:
Packit c4476c
	srlg	$t1,$s0,`16-3`
Packit c4476c
	srlg	$t2,$s0,`8-3`
Packit c4476c
	sllg	$t3,$s0,`0+3`
Packit c4476c
	srl	$s0,`24-3`
Packit c4476c
	nr	$s0,$mask
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	ngr	$t3,$mask
Packit c4476c
Packit c4476c
	sllg	$i1,$s1,`0+3`	# i0
Packit c4476c
	srlg	$i2,$s1,`16-3`
Packit c4476c
	srlg	$i3,$s1,`8-3`
Packit c4476c
	srl	$s1,`24-3`
Packit c4476c
	ngr	$i1,$mask
Packit c4476c
	nr	$s1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
	nr	$i3,$mask
Packit c4476c
Packit c4476c
	l	$s0,0($s0,$tbl)	# Td0[s0>>24]
Packit c4476c
	l	$t1,3($t1,$tbl)	# Td1[s0>>16]
Packit c4476c
	l	$t2,2($t2,$tbl)	# Td2[s0>>8]
Packit c4476c
	l	$t3,1($t3,$tbl)	# Td3[s0>>0]
Packit c4476c
Packit c4476c
	x	$s0,1($i1,$tbl)	# Td3[s1>>0]
Packit c4476c
	l	$s1,0($s1,$tbl)	# Td0[s1>>24]
Packit c4476c
	x	$t2,3($i2,$tbl)	# Td1[s1>>16]
Packit c4476c
	x	$t3,2($i3,$tbl)	# Td2[s1>>8]
Packit c4476c
Packit c4476c
	srlg	$i1,$s2,`8-3`	# i0
Packit c4476c
	sllg	$i2,$s2,`0+3`	# i1
Packit c4476c
	srlg	$i3,$s2,`16-3`
Packit c4476c
	srl	$s2,`24-3`
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	ngr	$i2,$mask
Packit c4476c
	nr	$s2,$mask
Packit c4476c
	nr	$i3,$mask
Packit c4476c
Packit c4476c
	xr	$s1,$t1
Packit c4476c
	srlg	$ra,$s3,`8-3`	# i1
Packit c4476c
	srlg	$t1,$s3,`16-3`	# i0
Packit c4476c
	nr	$ra,$mask
Packit c4476c
	la	$key,16($key)
Packit c4476c
	nr	$t1,$mask
Packit c4476c
Packit c4476c
	x	$s0,2($i1,$tbl)	# Td2[s2>>8]
Packit c4476c
	x	$s1,1($i2,$tbl)	# Td3[s2>>0]
Packit c4476c
	l	$s2,0($s2,$tbl)	# Td0[s2>>24]
Packit c4476c
	x	$t3,3($i3,$tbl)	# Td1[s2>>16]
Packit c4476c
Packit c4476c
	sllg	$i3,$s3,`0+3`	# i2
Packit c4476c
	srl	$s3,`24-3`
Packit c4476c
	ngr	$i3,$mask
Packit c4476c
	nr	$s3,$mask
Packit c4476c
Packit c4476c
	xr	$s2,$t2
Packit c4476c
	x	$s0,0($key)
Packit c4476c
	x	$s1,4($key)
Packit c4476c
	x	$s2,8($key)
Packit c4476c
	x	$t3,12($key)
Packit c4476c
Packit c4476c
	x	$s0,3($t1,$tbl)	# Td1[s3>>16]
Packit c4476c
	x	$s1,2($ra,$tbl)	# Td2[s3>>8]
Packit c4476c
	x	$s2,1($i3,$tbl)	# Td3[s3>>0]
Packit c4476c
	l	$s3,0($s3,$tbl)	# Td0[s3>>24]
Packit c4476c
	xr	$s3,$t3
Packit c4476c
Packit c4476c
	brct	$rounds,.Ldec_loop
Packit c4476c
	.align	16
Packit c4476c
Packit c4476c
	l	$t1,`2048+0`($tbl)	# prefetch Td4
Packit c4476c
	l	$t2,`2048+64`($tbl)
Packit c4476c
	l	$t3,`2048+128`($tbl)
Packit c4476c
	l	$i1,`2048+192`($tbl)
Packit c4476c
	llill	$mask,0xff
Packit c4476c
Packit c4476c
	srlg	$i3,$s0,24	# i0
Packit c4476c
	srlg	$t1,$s0,16
Packit c4476c
	srlg	$t2,$s0,8
Packit c4476c
	nr	$s0,$mask	# i3
Packit c4476c
	nr	$t1,$mask
Packit c4476c
Packit c4476c
	srlg	$i1,$s1,24
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	srlg	$i2,$s1,16
Packit c4476c
	srlg	$ra,$s1,8
Packit c4476c
	nr	$s1,$mask	# i0
Packit c4476c
	nr	$i2,$mask
Packit c4476c
	nr	$ra,$mask
Packit c4476c
Packit c4476c
	llgc	$i3,2048($i3,$tbl)	# Td4[s0>>24]
Packit c4476c
	llgc	$t1,2048($t1,$tbl)	# Td4[s0>>16]
Packit c4476c
	llgc	$t2,2048($t2,$tbl)	# Td4[s0>>8]
Packit c4476c
	sll	$t1,16
Packit c4476c
	llgc	$t3,2048($s0,$tbl)	# Td4[s0>>0]
Packit c4476c
	sllg	$s0,$i3,24
Packit c4476c
	sll	$t2,8
Packit c4476c
Packit c4476c
	llgc	$s1,2048($s1,$tbl)	# Td4[s1>>0]
Packit c4476c
	llgc	$i1,2048($i1,$tbl)	# Td4[s1>>24]
Packit c4476c
	llgc	$i2,2048($i2,$tbl)	# Td4[s1>>16]
Packit c4476c
	sll	$i1,24
Packit c4476c
	llgc	$i3,2048($ra,$tbl)	# Td4[s1>>8]
Packit c4476c
	sll	$i2,16
Packit c4476c
	sll	$i3,8
Packit c4476c
	or	$s0,$s1
Packit c4476c
	or	$t1,$i1
Packit c4476c
	or	$t2,$i2
Packit c4476c
	or	$t3,$i3
Packit c4476c
Packit c4476c
	srlg	$i1,$s2,8	# i0
Packit c4476c
	srlg	$i2,$s2,24
Packit c4476c
	srlg	$i3,$s2,16
Packit c4476c
	nr	$s2,$mask	# i1
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i3,$mask
Packit c4476c
	llgc	$i1,2048($i1,$tbl)	# Td4[s2>>8]
Packit c4476c
	llgc	$s1,2048($s2,$tbl)	# Td4[s2>>0]
Packit c4476c
	llgc	$i2,2048($i2,$tbl)	# Td4[s2>>24]
Packit c4476c
	llgc	$i3,2048($i3,$tbl)	# Td4[s2>>16]
Packit c4476c
	sll	$i1,8
Packit c4476c
	sll	$i2,24
Packit c4476c
	or	$s0,$i1
Packit c4476c
	sll	$i3,16
Packit c4476c
	or	$t2,$i2
Packit c4476c
	or	$t3,$i3
Packit c4476c
Packit c4476c
	srlg	$i1,$s3,16	# i0
Packit c4476c
	srlg	$i2,$s3,8	# i1
Packit c4476c
	srlg	$i3,$s3,24
Packit c4476c
	nr	$s3,$mask	# i2
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
	l${g}	$ra,15*$SIZE_T($sp)
Packit c4476c
	or	$s1,$t1
Packit c4476c
	l	$t0,16($key)
Packit c4476c
	l	$t1,20($key)
Packit c4476c
Packit c4476c
	llgc	$i1,2048($i1,$tbl)	# Td4[s3>>16]
Packit c4476c
	llgc	$i2,2048($i2,$tbl)	# Td4[s3>>8]
Packit c4476c
	sll	$i1,16
Packit c4476c
	llgc	$s2,2048($s3,$tbl)	# Td4[s3>>0]
Packit c4476c
	llgc	$s3,2048($i3,$tbl)	# Td4[s3>>24]
Packit c4476c
	sll	$i2,8
Packit c4476c
	sll	$s3,24
Packit c4476c
	or	$s0,$i1
Packit c4476c
	or	$s1,$i2
Packit c4476c
	or	$s2,$t2
Packit c4476c
	or	$s3,$t3
Packit c4476c
Packit c4476c
	xr	$s0,$t0
Packit c4476c
	xr	$s1,$t1
Packit c4476c
	x	$s2,24($key)
Packit c4476c
	x	$s3,28($key)
Packit c4476c
Packit c4476c
	br	$ra
Packit c4476c
.size	_s390x_AES_decrypt,.-_s390x_AES_decrypt
Packit c4476c
___
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
# void AES_set_encrypt_key(const unsigned char *in, int bits,
Packit c4476c
# 		 AES_KEY *key) {
Packit c4476c
.globl	AES_set_encrypt_key
Packit c4476c
.type	AES_set_encrypt_key,\@function
Packit c4476c
.align	16
Packit c4476c
AES_set_encrypt_key:
Packit c4476c
_s390x_AES_set_encrypt_key:
Packit c4476c
	lghi	$t0,0
Packit c4476c
	cl${g}r	$inp,$t0
Packit c4476c
	je	.Lminus1
Packit c4476c
	cl${g}r	$key,$t0
Packit c4476c
	je	.Lminus1
Packit c4476c
Packit c4476c
	lghi	$t0,128
Packit c4476c
	clr	$bits,$t0
Packit c4476c
	je	.Lproceed
Packit c4476c
	lghi	$t0,192
Packit c4476c
	clr	$bits,$t0
Packit c4476c
	je	.Lproceed
Packit c4476c
	lghi	$t0,256
Packit c4476c
	clr	$bits,$t0
Packit c4476c
	je	.Lproceed
Packit c4476c
	lghi	%r2,-2
Packit c4476c
	br	%r14
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lproceed:
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	# convert bits to km(c) code, [128,192,256]->[18,19,20]
Packit c4476c
	lhi	%r5,-128
Packit c4476c
	lhi	%r0,18
Packit c4476c
	ar	%r5,$bits
Packit c4476c
	srl	%r5,6
Packit c4476c
	ar	%r5,%r0
Packit c4476c
Packit c4476c
	larl	%r1,OPENSSL_s390xcap_P
Packit c4476c
	llihh	%r0,0x8000
Packit c4476c
	srlg	%r0,%r0,0(%r5)
Packit c4476c
	ng	%r0,S390X_KM(%r1)  # check availability of both km...
Packit c4476c
	ng	%r0,S390X_KMC(%r1) # ...and kmc support for given key length
Packit c4476c
	jz	.Lekey_internal
Packit c4476c
Packit c4476c
	lmg	%r0,%r1,0($inp)	# just copy 128 bits...
Packit c4476c
	stmg	%r0,%r1,0($key)
Packit c4476c
	lhi	%r0,192
Packit c4476c
	cr	$bits,%r0
Packit c4476c
	jl	1f
Packit c4476c
	lg	%r1,16($inp)
Packit c4476c
	stg	%r1,16($key)
Packit c4476c
	je	1f
Packit c4476c
	lg	%r1,24($inp)
Packit c4476c
	stg	%r1,24($key)
Packit c4476c
1:	st	$bits,236($key)	# save bits [for debugging purposes]
Packit c4476c
	lgr	$t0,%r5
Packit c4476c
	st	%r5,240($key)	# save km(c) code
Packit c4476c
	lghi	%r2,0
Packit c4476c
	br	%r14
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
.align	16
Packit c4476c
.Lekey_internal:
Packit c4476c
	stm${g}	%r4,%r13,4*$SIZE_T($sp)	# all non-volatile regs and $key
Packit c4476c
Packit c4476c
	larl	$tbl,AES_Te+2048
Packit c4476c
Packit c4476c
	llgf	$s0,0($inp)
Packit c4476c
	llgf	$s1,4($inp)
Packit c4476c
	llgf	$s2,8($inp)
Packit c4476c
	llgf	$s3,12($inp)
Packit c4476c
	st	$s0,0($key)
Packit c4476c
	st	$s1,4($key)
Packit c4476c
	st	$s2,8($key)
Packit c4476c
	st	$s3,12($key)
Packit c4476c
	lghi	$t0,128
Packit c4476c
	cr	$bits,$t0
Packit c4476c
	jne	.Lnot128
Packit c4476c
Packit c4476c
	llill	$mask,0xff
Packit c4476c
	lghi	$t3,0			# i=0
Packit c4476c
	lghi	$rounds,10
Packit c4476c
	st	$rounds,240($key)
Packit c4476c
Packit c4476c
	llgfr	$t2,$s3			# temp=rk[3]
Packit c4476c
	srlg	$i1,$s3,8
Packit c4476c
	srlg	$i2,$s3,16
Packit c4476c
	srlg	$i3,$s3,24
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.L128_loop:
Packit c4476c
	la	$t2,0($t2,$tbl)
Packit c4476c
	la	$i1,0($i1,$tbl)
Packit c4476c
	la	$i2,0($i2,$tbl)
Packit c4476c
	la	$i3,0($i3,$tbl)
Packit c4476c
	icm	$t2,2,0($t2)		# Te4[rk[3]>>0]<<8
Packit c4476c
	icm	$t2,4,0($i1)		# Te4[rk[3]>>8]<<16
Packit c4476c
	icm	$t2,8,0($i2)		# Te4[rk[3]>>16]<<24
Packit c4476c
	icm	$t2,1,0($i3)		# Te4[rk[3]>>24]
Packit c4476c
	x	$t2,256($t3,$tbl)	# rcon[i]
Packit c4476c
	xr	$s0,$t2			# rk[4]=rk[0]^...
Packit c4476c
	xr	$s1,$s0			# rk[5]=rk[1]^rk[4]
Packit c4476c
	xr	$s2,$s1			# rk[6]=rk[2]^rk[5]
Packit c4476c
	xr	$s3,$s2			# rk[7]=rk[3]^rk[6]
Packit c4476c
Packit c4476c
	llgfr	$t2,$s3			# temp=rk[3]
Packit c4476c
	srlg	$i1,$s3,8
Packit c4476c
	srlg	$i2,$s3,16
Packit c4476c
	nr	$t2,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	srlg	$i3,$s3,24
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
	st	$s0,16($key)
Packit c4476c
	st	$s1,20($key)
Packit c4476c
	st	$s2,24($key)
Packit c4476c
	st	$s3,28($key)
Packit c4476c
	la	$key,16($key)		# key+=4
Packit c4476c
	la	$t3,4($t3)		# i++
Packit c4476c
	brct	$rounds,.L128_loop
Packit c4476c
	lghi	$t0,10
Packit c4476c
	lghi	%r2,0
Packit c4476c
	lm${g}	%r4,%r13,4*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lnot128:
Packit c4476c
	llgf	$t0,16($inp)
Packit c4476c
	llgf	$t1,20($inp)
Packit c4476c
	st	$t0,16($key)
Packit c4476c
	st	$t1,20($key)
Packit c4476c
	lghi	$t0,192
Packit c4476c
	cr	$bits,$t0
Packit c4476c
	jne	.Lnot192
Packit c4476c
Packit c4476c
	llill	$mask,0xff
Packit c4476c
	lghi	$t3,0			# i=0
Packit c4476c
	lghi	$rounds,12
Packit c4476c
	st	$rounds,240($key)
Packit c4476c
	lghi	$rounds,8
Packit c4476c
Packit c4476c
	srlg	$i1,$t1,8
Packit c4476c
	srlg	$i2,$t1,16
Packit c4476c
	srlg	$i3,$t1,24
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.L192_loop:
Packit c4476c
	la	$t1,0($t1,$tbl)
Packit c4476c
	la	$i1,0($i1,$tbl)
Packit c4476c
	la	$i2,0($i2,$tbl)
Packit c4476c
	la	$i3,0($i3,$tbl)
Packit c4476c
	icm	$t1,2,0($t1)		# Te4[rk[5]>>0]<<8
Packit c4476c
	icm	$t1,4,0($i1)		# Te4[rk[5]>>8]<<16
Packit c4476c
	icm	$t1,8,0($i2)		# Te4[rk[5]>>16]<<24
Packit c4476c
	icm	$t1,1,0($i3)		# Te4[rk[5]>>24]
Packit c4476c
	x	$t1,256($t3,$tbl)	# rcon[i]
Packit c4476c
	xr	$s0,$t1			# rk[6]=rk[0]^...
Packit c4476c
	xr	$s1,$s0			# rk[7]=rk[1]^rk[6]
Packit c4476c
	xr	$s2,$s1			# rk[8]=rk[2]^rk[7]
Packit c4476c
	xr	$s3,$s2			# rk[9]=rk[3]^rk[8]
Packit c4476c
Packit c4476c
	st	$s0,24($key)
Packit c4476c
	st	$s1,28($key)
Packit c4476c
	st	$s2,32($key)
Packit c4476c
	st	$s3,36($key)
Packit c4476c
	brct	$rounds,.L192_continue
Packit c4476c
	lghi	$t0,12
Packit c4476c
	lghi	%r2,0
Packit c4476c
	lm${g}	%r4,%r13,4*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.L192_continue:
Packit c4476c
	lgr	$t1,$s3
Packit c4476c
	x	$t1,16($key)		# rk[10]=rk[4]^rk[9]
Packit c4476c
	st	$t1,40($key)
Packit c4476c
	x	$t1,20($key)		# rk[11]=rk[5]^rk[10]
Packit c4476c
	st	$t1,44($key)
Packit c4476c
Packit c4476c
	srlg	$i1,$t1,8
Packit c4476c
	srlg	$i2,$t1,16
Packit c4476c
	srlg	$i3,$t1,24
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
	la	$key,24($key)		# key+=6
Packit c4476c
	la	$t3,4($t3)		# i++
Packit c4476c
	j	.L192_loop
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lnot192:
Packit c4476c
	llgf	$t0,24($inp)
Packit c4476c
	llgf	$t1,28($inp)
Packit c4476c
	st	$t0,24($key)
Packit c4476c
	st	$t1,28($key)
Packit c4476c
	llill	$mask,0xff
Packit c4476c
	lghi	$t3,0			# i=0
Packit c4476c
	lghi	$rounds,14
Packit c4476c
	st	$rounds,240($key)
Packit c4476c
	lghi	$rounds,7
Packit c4476c
Packit c4476c
	srlg	$i1,$t1,8
Packit c4476c
	srlg	$i2,$t1,16
Packit c4476c
	srlg	$i3,$t1,24
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.L256_loop:
Packit c4476c
	la	$t1,0($t1,$tbl)
Packit c4476c
	la	$i1,0($i1,$tbl)
Packit c4476c
	la	$i2,0($i2,$tbl)
Packit c4476c
	la	$i3,0($i3,$tbl)
Packit c4476c
	icm	$t1,2,0($t1)		# Te4[rk[7]>>0]<<8
Packit c4476c
	icm	$t1,4,0($i1)		# Te4[rk[7]>>8]<<16
Packit c4476c
	icm	$t1,8,0($i2)		# Te4[rk[7]>>16]<<24
Packit c4476c
	icm	$t1,1,0($i3)		# Te4[rk[7]>>24]
Packit c4476c
	x	$t1,256($t3,$tbl)	# rcon[i]
Packit c4476c
	xr	$s0,$t1			# rk[8]=rk[0]^...
Packit c4476c
	xr	$s1,$s0			# rk[9]=rk[1]^rk[8]
Packit c4476c
	xr	$s2,$s1			# rk[10]=rk[2]^rk[9]
Packit c4476c
	xr	$s3,$s2			# rk[11]=rk[3]^rk[10]
Packit c4476c
	st	$s0,32($key)
Packit c4476c
	st	$s1,36($key)
Packit c4476c
	st	$s2,40($key)
Packit c4476c
	st	$s3,44($key)
Packit c4476c
	brct	$rounds,.L256_continue
Packit c4476c
	lghi	$t0,14
Packit c4476c
	lghi	%r2,0
Packit c4476c
	lm${g}	%r4,%r13,4*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.L256_continue:
Packit c4476c
	lgr	$t1,$s3			# temp=rk[11]
Packit c4476c
	srlg	$i1,$s3,8
Packit c4476c
	srlg	$i2,$s3,16
Packit c4476c
	srlg	$i3,$s3,24
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
	la	$t1,0($t1,$tbl)
Packit c4476c
	la	$i1,0($i1,$tbl)
Packit c4476c
	la	$i2,0($i2,$tbl)
Packit c4476c
	la	$i3,0($i3,$tbl)
Packit c4476c
	llgc	$t1,0($t1)		# Te4[rk[11]>>0]
Packit c4476c
	icm	$t1,2,0($i1)		# Te4[rk[11]>>8]<<8
Packit c4476c
	icm	$t1,4,0($i2)		# Te4[rk[11]>>16]<<16
Packit c4476c
	icm	$t1,8,0($i3)		# Te4[rk[11]>>24]<<24
Packit c4476c
	x	$t1,16($key)		# rk[12]=rk[4]^...
Packit c4476c
	st	$t1,48($key)
Packit c4476c
	x	$t1,20($key)		# rk[13]=rk[5]^rk[12]
Packit c4476c
	st	$t1,52($key)
Packit c4476c
	x	$t1,24($key)		# rk[14]=rk[6]^rk[13]
Packit c4476c
	st	$t1,56($key)
Packit c4476c
	x	$t1,28($key)		# rk[15]=rk[7]^rk[14]
Packit c4476c
	st	$t1,60($key)
Packit c4476c
Packit c4476c
	srlg	$i1,$t1,8
Packit c4476c
	srlg	$i2,$t1,16
Packit c4476c
	srlg	$i3,$t1,24
Packit c4476c
	nr	$t1,$mask
Packit c4476c
	nr	$i1,$mask
Packit c4476c
	nr	$i2,$mask
Packit c4476c
Packit c4476c
	la	$key,32($key)		# key+=8
Packit c4476c
	la	$t3,4($t3)		# i++
Packit c4476c
	j	.L256_loop
Packit c4476c
Packit c4476c
.Lminus1:
Packit c4476c
	lghi	%r2,-1
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_set_encrypt_key,.-AES_set_encrypt_key
Packit c4476c
Packit c4476c
# void AES_set_decrypt_key(const unsigned char *in, int bits,
Packit c4476c
# 		 AES_KEY *key) {
Packit c4476c
.globl	AES_set_decrypt_key
Packit c4476c
.type	AES_set_decrypt_key,\@function
Packit c4476c
.align	16
Packit c4476c
AES_set_decrypt_key:
Packit c4476c
	#st${g}	$key,4*$SIZE_T($sp)	# I rely on AES_set_encrypt_key to
Packit c4476c
	st${g}	$ra,14*$SIZE_T($sp)	# save non-volatile registers and $key!
Packit c4476c
	bras	$ra,_s390x_AES_set_encrypt_key
Packit c4476c
	#l${g}	$key,4*$SIZE_T($sp)
Packit c4476c
	l${g}	$ra,14*$SIZE_T($sp)
Packit c4476c
	ltgr	%r2,%r2
Packit c4476c
	bnzr	$ra
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	#l	$t0,240($key)
Packit c4476c
	lhi	$t1,16
Packit c4476c
	cr	$t0,$t1
Packit c4476c
	jl	.Lgo
Packit c4476c
	oill	$t0,S390X_DECRYPT	# set "decrypt" bit
Packit c4476c
	st	$t0,240($key)
Packit c4476c
	br	$ra
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
.align	16
Packit c4476c
.Lgo:	lgr	$rounds,$t0	#llgf	$rounds,240($key)
Packit c4476c
	la	$i1,0($key)
Packit c4476c
	sllg	$i2,$rounds,4
Packit c4476c
	la	$i2,0($i2,$key)
Packit c4476c
	srl	$rounds,1
Packit c4476c
	lghi	$t1,-16
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Linv:	lmg	$s0,$s1,0($i1)
Packit c4476c
	lmg	$s2,$s3,0($i2)
Packit c4476c
	stmg	$s0,$s1,0($i2)
Packit c4476c
	stmg	$s2,$s3,0($i1)
Packit c4476c
	la	$i1,16($i1)
Packit c4476c
	la	$i2,0($t1,$i2)
Packit c4476c
	brct	$rounds,.Linv
Packit c4476c
___
Packit c4476c
$mask80=$i1;
Packit c4476c
$mask1b=$i2;
Packit c4476c
$maskfe=$i3;
Packit c4476c
$code.=<<___;
Packit c4476c
	llgf	$rounds,240($key)
Packit c4476c
	aghi	$rounds,-1
Packit c4476c
	sll	$rounds,2	# (rounds-1)*4
Packit c4476c
	llilh	$mask80,0x8080
Packit c4476c
	llilh	$mask1b,0x1b1b
Packit c4476c
	llilh	$maskfe,0xfefe
Packit c4476c
	oill	$mask80,0x8080
Packit c4476c
	oill	$mask1b,0x1b1b
Packit c4476c
	oill	$maskfe,0xfefe
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lmix:	l	$s0,16($key)	# tp1
Packit c4476c
	lr	$s1,$s0
Packit c4476c
	ngr	$s1,$mask80
Packit c4476c
	srlg	$t1,$s1,7
Packit c4476c
	slr	$s1,$t1
Packit c4476c
	nr	$s1,$mask1b
Packit c4476c
	sllg	$t1,$s0,1
Packit c4476c
	nr	$t1,$maskfe
Packit c4476c
	xr	$s1,$t1		# tp2
Packit c4476c
Packit c4476c
	lr	$s2,$s1
Packit c4476c
	ngr	$s2,$mask80
Packit c4476c
	srlg	$t1,$s2,7
Packit c4476c
	slr	$s2,$t1
Packit c4476c
	nr	$s2,$mask1b
Packit c4476c
	sllg	$t1,$s1,1
Packit c4476c
	nr	$t1,$maskfe
Packit c4476c
	xr	$s2,$t1		# tp4
Packit c4476c
Packit c4476c
	lr	$s3,$s2
Packit c4476c
	ngr	$s3,$mask80
Packit c4476c
	srlg	$t1,$s3,7
Packit c4476c
	slr	$s3,$t1
Packit c4476c
	nr	$s3,$mask1b
Packit c4476c
	sllg	$t1,$s2,1
Packit c4476c
	nr	$t1,$maskfe
Packit c4476c
	xr	$s3,$t1		# tp8
Packit c4476c
Packit c4476c
	xr	$s1,$s0		# tp2^tp1
Packit c4476c
	xr	$s2,$s0		# tp4^tp1
Packit c4476c
	rll	$s0,$s0,24	# = ROTATE(tp1,8)
Packit c4476c
	xr	$s2,$s3		# ^=tp8
Packit c4476c
	xr	$s0,$s1		# ^=tp2^tp1
Packit c4476c
	xr	$s1,$s3		# tp2^tp1^tp8
Packit c4476c
	xr	$s0,$s2		# ^=tp4^tp1^tp8
Packit c4476c
	rll	$s1,$s1,8
Packit c4476c
	rll	$s2,$s2,16
Packit c4476c
	xr	$s0,$s1		# ^= ROTATE(tp8^tp2^tp1,24)
Packit c4476c
	rll	$s3,$s3,24
Packit c4476c
	xr	$s0,$s2    	# ^= ROTATE(tp8^tp4^tp1,16)
Packit c4476c
	xr	$s0,$s3		# ^= ROTATE(tp8,8)
Packit c4476c
Packit c4476c
	st	$s0,16($key)
Packit c4476c
	la	$key,4($key)
Packit c4476c
	brct	$rounds,.Lmix
Packit c4476c
Packit c4476c
	lm${g}	%r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
Packit c4476c
	lghi	%r2,0
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_set_decrypt_key,.-AES_set_decrypt_key
Packit c4476c
___
Packit c4476c
Packit c4476c
########################################################################
Packit c4476c
# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
Packit c4476c
#                     size_t length, const AES_KEY *key,
Packit c4476c
#                     unsigned char *ivec, const int enc)
Packit c4476c
{
Packit c4476c
my $inp="%r2";
Packit c4476c
my $out="%r4";	# length and out are swapped
Packit c4476c
my $len="%r3";
Packit c4476c
my $key="%r5";
Packit c4476c
my $ivp="%r6";
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	AES_cbc_encrypt
Packit c4476c
.type	AES_cbc_encrypt,\@function
Packit c4476c
.align	16
Packit c4476c
AES_cbc_encrypt:
Packit c4476c
	xgr	%r3,%r4		# flip %r3 and %r4, out and len
Packit c4476c
	xgr	%r4,%r3
Packit c4476c
	xgr	%r3,%r4
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	lhi	%r0,16
Packit c4476c
	cl	%r0,240($key)
Packit c4476c
	jh	.Lcbc_software
Packit c4476c
Packit c4476c
	lg	%r0,0($ivp)	# copy ivec
Packit c4476c
	lg	%r1,8($ivp)
Packit c4476c
	stmg	%r0,%r1,16($sp)
Packit c4476c
	lmg	%r0,%r1,0($key)	# copy key, cover 256 bit
Packit c4476c
	stmg	%r0,%r1,32($sp)
Packit c4476c
	lmg	%r0,%r1,16($key)
Packit c4476c
	stmg	%r0,%r1,48($sp)
Packit c4476c
	l	%r0,240($key)	# load kmc code
Packit c4476c
	lghi	$key,15		# res=len%16, len-=res;
Packit c4476c
	ngr	$key,$len
Packit c4476c
	sl${g}r	$len,$key
Packit c4476c
	la	%r1,16($sp)	# parameter block - ivec || key
Packit c4476c
	jz	.Lkmc_truncated
Packit c4476c
	.long	0xb92f0042	# kmc %r4,%r2
Packit c4476c
	brc	1,.-4		# pay attention to "partial completion"
Packit c4476c
	ltr	$key,$key
Packit c4476c
	jnz	.Lkmc_truncated
Packit c4476c
.Lkmc_done:
Packit c4476c
	lmg	%r0,%r1,16($sp)	# copy ivec to caller
Packit c4476c
	stg	%r0,0($ivp)
Packit c4476c
	stg	%r1,8($ivp)
Packit c4476c
	br	$ra
Packit c4476c
.align	16
Packit c4476c
.Lkmc_truncated:
Packit c4476c
	ahi	$key,-1		# it's the way it's encoded in mvc
Packit c4476c
	tmll	%r0,S390X_DECRYPT
Packit c4476c
	jnz	.Lkmc_truncated_dec
Packit c4476c
	lghi	%r1,0
Packit c4476c
	stg	%r1,16*$SIZE_T($sp)
Packit c4476c
	stg	%r1,16*$SIZE_T+8($sp)
Packit c4476c
	bras	%r1,1f
Packit c4476c
	mvc	16*$SIZE_T(1,$sp),0($inp)
Packit c4476c
1:	ex	$key,0(%r1)
Packit c4476c
	la	%r1,16($sp)	# restore parameter block
Packit c4476c
	la	$inp,16*$SIZE_T($sp)
Packit c4476c
	lghi	$len,16
Packit c4476c
	.long	0xb92f0042	# kmc %r4,%r2
Packit c4476c
	j	.Lkmc_done
Packit c4476c
.align	16
Packit c4476c
.Lkmc_truncated_dec:
Packit c4476c
	st${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	la	$out,16*$SIZE_T($sp)
Packit c4476c
	lghi	$len,16
Packit c4476c
	.long	0xb92f0042	# kmc %r4,%r2
Packit c4476c
	l${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	bras	%r1,2f
Packit c4476c
	mvc	0(1,$out),16*$SIZE_T($sp)
Packit c4476c
2:	ex	$key,0(%r1)
Packit c4476c
	j	.Lkmc_done
Packit c4476c
.align	16
Packit c4476c
.Lcbc_software:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	$key,$ra,5*$SIZE_T($sp)
Packit c4476c
	lhi	%r0,0
Packit c4476c
	cl	%r0,`$stdframe+$SIZE_T-4`($sp)
Packit c4476c
	je	.Lcbc_decrypt
Packit c4476c
Packit c4476c
	larl	$tbl,AES_Te
Packit c4476c
Packit c4476c
	llgf	$s0,0($ivp)
Packit c4476c
	llgf	$s1,4($ivp)
Packit c4476c
	llgf	$s2,8($ivp)
Packit c4476c
	llgf	$s3,12($ivp)
Packit c4476c
Packit c4476c
	lghi	$t0,16
Packit c4476c
	sl${g}r	$len,$t0
Packit c4476c
	brc	4,.Lcbc_enc_tail	# if borrow
Packit c4476c
.Lcbc_enc_loop:
Packit c4476c
	stm${g}	$inp,$out,2*$SIZE_T($sp)
Packit c4476c
	x	$s0,0($inp)
Packit c4476c
	x	$s1,4($inp)
Packit c4476c
	x	$s2,8($inp)
Packit c4476c
	x	$s3,12($inp)
Packit c4476c
	lgr	%r4,$key
Packit c4476c
Packit c4476c
	bras	$ra,_s390x_AES_encrypt
Packit c4476c
Packit c4476c
	lm${g}	$inp,$key,2*$SIZE_T($sp)
Packit c4476c
	st	$s0,0($out)
Packit c4476c
	st	$s1,4($out)
Packit c4476c
	st	$s2,8($out)
Packit c4476c
	st	$s3,12($out)
Packit c4476c
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	la	$out,16($out)
Packit c4476c
	lghi	$t0,16
Packit c4476c
	lt${g}r	$len,$len
Packit c4476c
	jz	.Lcbc_enc_done
Packit c4476c
	sl${g}r	$len,$t0
Packit c4476c
	brc	4,.Lcbc_enc_tail	# if borrow
Packit c4476c
	j	.Lcbc_enc_loop
Packit c4476c
.align	16
Packit c4476c
.Lcbc_enc_done:
Packit c4476c
	l${g}	$ivp,6*$SIZE_T($sp)
Packit c4476c
	st	$s0,0($ivp)
Packit c4476c
	st	$s1,4($ivp)
Packit c4476c
	st	$s2,8($ivp)
Packit c4476c
	st	$s3,12($ivp)
Packit c4476c
Packit c4476c
	lm${g}	%r7,$ra,7*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lcbc_enc_tail:
Packit c4476c
	aghi	$len,15
Packit c4476c
	lghi	$t0,0
Packit c4476c
	stg	$t0,16*$SIZE_T($sp)
Packit c4476c
	stg	$t0,16*$SIZE_T+8($sp)
Packit c4476c
	bras	$t1,3f
Packit c4476c
	mvc	16*$SIZE_T(1,$sp),0($inp)
Packit c4476c
3:	ex	$len,0($t1)
Packit c4476c
	lghi	$len,0
Packit c4476c
	la	$inp,16*$SIZE_T($sp)
Packit c4476c
	j	.Lcbc_enc_loop
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lcbc_decrypt:
Packit c4476c
	larl	$tbl,AES_Td
Packit c4476c
Packit c4476c
	lg	$t0,0($ivp)
Packit c4476c
	lg	$t1,8($ivp)
Packit c4476c
	stmg	$t0,$t1,16*$SIZE_T($sp)
Packit c4476c
Packit c4476c
.Lcbc_dec_loop:
Packit c4476c
	stm${g}	$inp,$out,2*$SIZE_T($sp)
Packit c4476c
	llgf	$s0,0($inp)
Packit c4476c
	llgf	$s1,4($inp)
Packit c4476c
	llgf	$s2,8($inp)
Packit c4476c
	llgf	$s3,12($inp)
Packit c4476c
	lgr	%r4,$key
Packit c4476c
Packit c4476c
	bras	$ra,_s390x_AES_decrypt
Packit c4476c
Packit c4476c
	lm${g}	$inp,$key,2*$SIZE_T($sp)
Packit c4476c
	sllg	$s0,$s0,32
Packit c4476c
	sllg	$s2,$s2,32
Packit c4476c
	lr	$s0,$s1
Packit c4476c
	lr	$s2,$s3
Packit c4476c
Packit c4476c
	lg	$t0,0($inp)
Packit c4476c
	lg	$t1,8($inp)
Packit c4476c
	xg	$s0,16*$SIZE_T($sp)
Packit c4476c
	xg	$s2,16*$SIZE_T+8($sp)
Packit c4476c
	lghi	$s1,16
Packit c4476c
	sl${g}r	$len,$s1
Packit c4476c
	brc	4,.Lcbc_dec_tail	# if borrow
Packit c4476c
	brc	2,.Lcbc_dec_done	# if zero
Packit c4476c
	stg	$s0,0($out)
Packit c4476c
	stg	$s2,8($out)
Packit c4476c
	stmg	$t0,$t1,16*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	la	$out,16($out)
Packit c4476c
	j	.Lcbc_dec_loop
Packit c4476c
Packit c4476c
.Lcbc_dec_done:
Packit c4476c
	stg	$s0,0($out)
Packit c4476c
	stg	$s2,8($out)
Packit c4476c
.Lcbc_dec_exit:
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	stmg	$t0,$t1,0($ivp)
Packit c4476c
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lcbc_dec_tail:
Packit c4476c
	aghi	$len,15
Packit c4476c
	stg	$s0,16*$SIZE_T($sp)
Packit c4476c
	stg	$s2,16*$SIZE_T+8($sp)
Packit c4476c
	bras	$s1,4f
Packit c4476c
	mvc	0(1,$out),16*$SIZE_T($sp)
Packit c4476c
4:	ex	$len,0($s1)
Packit c4476c
	j	.Lcbc_dec_exit
Packit c4476c
.size	AES_cbc_encrypt,.-AES_cbc_encrypt
Packit c4476c
___
Packit c4476c
}
Packit c4476c
########################################################################
Packit c4476c
# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
Packit c4476c
#                     size_t blocks, const AES_KEY *key,
Packit c4476c
#                     const unsigned char *ivec)
Packit c4476c
{
Packit c4476c
my $inp="%r2";
Packit c4476c
my $out="%r4";	# blocks and out are swapped
Packit c4476c
my $len="%r3";
Packit c4476c
my $key="%r5";	my $iv0="%r5";
Packit c4476c
my $ivp="%r6";
Packit c4476c
my $fp ="%r7";
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	AES_ctr32_encrypt
Packit c4476c
.type	AES_ctr32_encrypt,\@function
Packit c4476c
.align	16
Packit c4476c
AES_ctr32_encrypt:
Packit c4476c
	xgr	%r3,%r4		# flip %r3 and %r4, $out and $len
Packit c4476c
	xgr	%r4,%r3
Packit c4476c
	xgr	%r3,%r4
Packit c4476c
	llgfr	$len,$len	# safe in ctr32 subroutine even in 64-bit case
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	l	%r0,240($key)
Packit c4476c
	lhi	%r1,16
Packit c4476c
	clr	%r0,%r1
Packit c4476c
	jl	.Lctr32_software
Packit c4476c
Packit c4476c
	st${g}	$s2,10*$SIZE_T($sp)
Packit c4476c
	st${g}	$s3,11*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	clr	$len,%r1		# does work even in 64-bit mode
Packit c4476c
	jle	.Lctr32_nokma		# kma is slower for <= 16 blocks
Packit c4476c
Packit c4476c
	larl	%r1,OPENSSL_s390xcap_P
Packit c4476c
	lr	$s2,%r0
Packit c4476c
	llihh	$s3,0x8000
Packit c4476c
	srlg	$s3,$s3,0($s2)
Packit c4476c
	ng	$s3,S390X_KMA(%r1)		# check kma capability vector
Packit c4476c
	jz	.Lctr32_nokma
Packit c4476c
Packit c4476c
	l${g}hi	%r1,-$stdframe-112
Packit c4476c
	l${g}r	$s3,$sp
Packit c4476c
	la	$sp,0(%r1,$sp)			# prepare parameter block
Packit c4476c
Packit c4476c
	lhi	%r1,0x0600
Packit c4476c
	sllg	$len,$len,4
Packit c4476c
	or	%r0,%r1				# set HS and LAAD flags
Packit c4476c
Packit c4476c
	st${g}	$s3,0($sp)			# backchain
Packit c4476c
	la	%r1,$stdframe($sp)
Packit c4476c
Packit c4476c
	lmg	$s2,$s3,0($key)			# copy key
Packit c4476c
	stg	$s2,$stdframe+80($sp)
Packit c4476c
	stg	$s3,$stdframe+88($sp)
Packit c4476c
	lmg	$s2,$s3,16($key)
Packit c4476c
	stg	$s2,$stdframe+96($sp)
Packit c4476c
	stg	$s3,$stdframe+104($sp)
Packit c4476c
Packit c4476c
	lmg	$s2,$s3,0($ivp)			# copy iv
Packit c4476c
	stg	$s2,$stdframe+64($sp)
Packit c4476c
	ahi	$s3,-1				# kma requires counter-1
Packit c4476c
	stg	$s3,$stdframe+72($sp)
Packit c4476c
	st	$s3,$stdframe+12($sp)		# copy counter
Packit c4476c
Packit c4476c
	lghi	$s2,0				# no AAD
Packit c4476c
	lghi	$s3,0
Packit c4476c
Packit c4476c
	.long	0xb929a042	# kma $out,$s2,$inp
Packit c4476c
	brc	1,.-4		# pay attention to "partial completion"
Packit c4476c
Packit c4476c
	stg	%r0,$stdframe+80($sp)		# wipe key
Packit c4476c
	stg	%r0,$stdframe+88($sp)
Packit c4476c
	stg	%r0,$stdframe+96($sp)
Packit c4476c
	stg	%r0,$stdframe+104($sp)
Packit c4476c
	la	$sp,$stdframe+112($sp)
Packit c4476c
Packit c4476c
	lm${g}	$s2,$s3,10*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lctr32_nokma:
Packit c4476c
	stm${g}	%r6,$s1,6*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	slgr	$out,$inp
Packit c4476c
	la	%r1,0($key)	# %r1 is permanent copy of $key
Packit c4476c
	lg	$iv0,0($ivp)	# load ivec
Packit c4476c
	lg	$ivp,8($ivp)
Packit c4476c
Packit c4476c
	# prepare and allocate stack frame at the top of 4K page
Packit c4476c
	# with 1K reserved for eventual signal handling
Packit c4476c
	lghi	$s0,-1024-256-16# guarantee at least 256-bytes buffer
Packit c4476c
	lghi	$s1,-4096
Packit c4476c
	algr	$s0,$sp
Packit c4476c
	lgr	$fp,$sp
Packit c4476c
	ngr	$s0,$s1		# align at page boundary
Packit c4476c
	slgr	$fp,$s0		# total buffer size
Packit c4476c
	lgr	$s2,$sp
Packit c4476c
	lghi	$s1,1024+16	# sl[g]fi is extended-immediate facility
Packit c4476c
	slgr	$fp,$s1		# deduct reservation to get usable buffer size
Packit c4476c
	# buffer size is at lest 256 and at most 3072+256-16
Packit c4476c
Packit c4476c
	la	$sp,1024($s0)	# alloca
Packit c4476c
	srlg	$fp,$fp,4	# convert bytes to blocks, minimum 16
Packit c4476c
	st${g}	$s2,0($sp)	# back-chain
Packit c4476c
	st${g}	$fp,$SIZE_T($sp)
Packit c4476c
Packit c4476c
	slgr	$len,$fp
Packit c4476c
	brc	1,.Lctr32_hw_switch	# not zero, no borrow
Packit c4476c
	algr	$fp,$len	# input is shorter than allocated buffer
Packit c4476c
	lghi	$len,0
Packit c4476c
	st${g}	$fp,$SIZE_T($sp)
Packit c4476c
Packit c4476c
.Lctr32_hw_switch:
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
Packit c4476c
	llgfr	$s0,%r0
Packit c4476c
	lgr	$s1,%r1
Packit c4476c
	larl	%r1,OPENSSL_s390xcap_P
Packit c4476c
	llihh	%r0,0x8000	# check if kmctr supports the function code
Packit c4476c
	srlg	%r0,%r0,0($s0)
Packit c4476c
	ng	%r0,S390X_KMCTR(%r1)	# check kmctr capability vector
Packit c4476c
	lgr	%r0,$s0
Packit c4476c
	lgr	%r1,$s1
Packit c4476c
	jz	.Lctr32_km_loop
Packit c4476c
Packit c4476c
####### kmctr code
Packit c4476c
	algr	$out,$inp	# restore $out
Packit c4476c
	lgr	$s1,$len	# $s1 undertakes $len
Packit c4476c
	j	.Lctr32_kmctr_loop
Packit c4476c
.align	16
Packit c4476c
.Lctr32_kmctr_loop:
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
	lgr	$s3,$fp
Packit c4476c
.Lctr32_kmctr_prepare:
Packit c4476c
	stg	$iv0,0($s2)
Packit c4476c
	stg	$ivp,8($s2)
Packit c4476c
	la	$s2,16($s2)
Packit c4476c
	ahi	$ivp,1		# 32-bit increment, preserves upper half
Packit c4476c
	brct	$s3,.Lctr32_kmctr_prepare
Packit c4476c
Packit c4476c
	#la	$inp,0($inp)	# inp
Packit c4476c
	sllg	$len,$fp,4	# len
Packit c4476c
	#la	$out,0($out)	# out
Packit c4476c
	la	$s2,16($sp)	# iv
Packit c4476c
	.long	0xb92da042	# kmctr $out,$s2,$inp
Packit c4476c
	brc	1,.-4		# pay attention to "partial completion"
Packit c4476c
Packit c4476c
	slgr	$s1,$fp
Packit c4476c
	brc	1,.Lctr32_kmctr_loop	# not zero, no borrow
Packit c4476c
	algr	$fp,$s1
Packit c4476c
	lghi	$s1,0
Packit c4476c
	brc	4+1,.Lctr32_kmctr_loop	# not zero
Packit c4476c
Packit c4476c
	l${g}	$sp,0($sp)
Packit c4476c
	lm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.align	16
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
.Lctr32_km_loop:
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
	lgr	$s3,$fp
Packit c4476c
.Lctr32_km_prepare:
Packit c4476c
	stg	$iv0,0($s2)
Packit c4476c
	stg	$ivp,8($s2)
Packit c4476c
	la	$s2,16($s2)
Packit c4476c
	ahi	$ivp,1		# 32-bit increment, preserves upper half
Packit c4476c
	brct	$s3,.Lctr32_km_prepare
Packit c4476c
Packit c4476c
	la	$s0,16($sp)	# inp
Packit c4476c
	sllg	$s1,$fp,4	# len
Packit c4476c
	la	$s2,16($sp)	# out
Packit c4476c
	.long	0xb92e00a8	# km %r10,%r8
Packit c4476c
	brc	1,.-4		# pay attention to "partial completion"
Packit c4476c
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
	lgr	$s3,$fp
Packit c4476c
	slgr	$s2,$inp
Packit c4476c
.Lctr32_km_xor:
Packit c4476c
	lg	$s0,0($inp)
Packit c4476c
	lg	$s1,8($inp)
Packit c4476c
	xg	$s0,0($s2,$inp)
Packit c4476c
	xg	$s1,8($s2,$inp)
Packit c4476c
	stg	$s0,0($out,$inp)
Packit c4476c
	stg	$s1,8($out,$inp)
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	brct	$s3,.Lctr32_km_xor
Packit c4476c
Packit c4476c
	slgr	$len,$fp
Packit c4476c
	brc	1,.Lctr32_km_loop	# not zero, no borrow
Packit c4476c
	algr	$fp,$len
Packit c4476c
	lghi	$len,0
Packit c4476c
	brc	4+1,.Lctr32_km_loop	# not zero
Packit c4476c
Packit c4476c
	l${g}	$s0,0($sp)
Packit c4476c
	l${g}	$s1,$SIZE_T($sp)
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
.Lctr32_km_zap:
Packit c4476c
	stg	$s0,0($s2)
Packit c4476c
	stg	$s0,8($s2)
Packit c4476c
	la	$s2,16($s2)
Packit c4476c
	brct	$s1,.Lctr32_km_zap
Packit c4476c
Packit c4476c
	la	$sp,0($s0)
Packit c4476c
	lm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.align	16
Packit c4476c
.Lctr32_software:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	$key,$ra,5*$SIZE_T($sp)
Packit c4476c
	sl${g}r	$inp,$out
Packit c4476c
	larl	$tbl,AES_Te
Packit c4476c
	llgf	$t1,12($ivp)
Packit c4476c
Packit c4476c
.Lctr32_loop:
Packit c4476c
	stm${g}	$inp,$out,2*$SIZE_T($sp)
Packit c4476c
	llgf	$s0,0($ivp)
Packit c4476c
	llgf	$s1,4($ivp)
Packit c4476c
	llgf	$s2,8($ivp)
Packit c4476c
	lgr	$s3,$t1
Packit c4476c
	st	$t1,16*$SIZE_T($sp)
Packit c4476c
	lgr	%r4,$key
Packit c4476c
Packit c4476c
	bras	$ra,_s390x_AES_encrypt
Packit c4476c
Packit c4476c
	lm${g}	$inp,$ivp,2*$SIZE_T($sp)
Packit c4476c
	llgf	$t1,16*$SIZE_T($sp)
Packit c4476c
	x	$s0,0($inp,$out)
Packit c4476c
	x	$s1,4($inp,$out)
Packit c4476c
	x	$s2,8($inp,$out)
Packit c4476c
	x	$s3,12($inp,$out)
Packit c4476c
	stm	$s0,$s3,0($out)
Packit c4476c
Packit c4476c
	la	$out,16($out)
Packit c4476c
	ahi	$t1,1		# 32-bit increment
Packit c4476c
	brct	$len,.Lctr32_loop
Packit c4476c
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_ctr32_encrypt,.-AES_ctr32_encrypt
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
########################################################################
Packit c4476c
# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
Packit c4476c
#	size_t len, const AES_KEY *key1, const AES_KEY *key2,
Packit c4476c
#	const unsigned char iv[16]);
Packit c4476c
#
Packit c4476c
{
Packit c4476c
my $inp="%r2";
Packit c4476c
my $out="%r4";	# len and out are swapped
Packit c4476c
my $len="%r3";
Packit c4476c
my $key1="%r5";	# $i1
Packit c4476c
my $key2="%r6";	# $i2
Packit c4476c
my $fp="%r7";	# $i3
Packit c4476c
my $tweak=16*$SIZE_T+16;	# or $stdframe-16, bottom of the frame...
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.type	_s390x_xts_km,\@function
Packit c4476c
.align	16
Packit c4476c
_s390x_xts_km:
Packit c4476c
___
Packit c4476c
$code.=<<___ if(1);
Packit c4476c
	llgfr	$s0,%r0			# put aside the function code
Packit c4476c
	lghi	$s1,0x7f
Packit c4476c
	nr	$s1,%r0
Packit c4476c
	larl	%r1,OPENSSL_s390xcap_P
Packit c4476c
	llihh	%r0,0x8000
Packit c4476c
	srlg	%r0,%r0,32($s1)		# check for 32+function code
Packit c4476c
	ng	%r0,S390X_KM(%r1)	# check km capability vector
Packit c4476c
	lgr	%r0,$s0			# restore the function code
Packit c4476c
	la	%r1,0($key1)		# restore $key1
Packit c4476c
	jz	.Lxts_km_vanilla
Packit c4476c
Packit c4476c
	lmg	$i2,$i3,$tweak($sp)	# put aside the tweak value
Packit c4476c
	algr	$out,$inp
Packit c4476c
Packit c4476c
	oill	%r0,32			# switch to xts function code
Packit c4476c
	aghi	$s1,-18			#
Packit c4476c
	sllg	$s1,$s1,3		# (function code - 18)*8, 0 or 16
Packit c4476c
	la	%r1,$tweak-16($sp)
Packit c4476c
	slgr	%r1,$s1			# parameter block position
Packit c4476c
	lmg	$s0,$s3,0($key1)	# load 256 bits of key material,
Packit c4476c
	stmg	$s0,$s3,0(%r1)		# and copy it to parameter block.
Packit c4476c
					# yes, it contains junk and overlaps
Packit c4476c
					# with the tweak in 128-bit case.
Packit c4476c
					# it's done to avoid conditional
Packit c4476c
					# branch.
Packit c4476c
	stmg	$i2,$i3,$tweak($sp)	# "re-seat" the tweak value
Packit c4476c
Packit c4476c
	.long	0xb92e0042		# km %r4,%r2
Packit c4476c
	brc	1,.-4			# pay attention to "partial completion"
Packit c4476c
Packit c4476c
	lrvg	$s0,$tweak+0($sp)	# load the last tweak
Packit c4476c
	lrvg	$s1,$tweak+8($sp)
Packit c4476c
	stmg	%r0,%r3,$tweak-32($sp)	# wipe copy of the key
Packit c4476c
Packit c4476c
	nill	%r0,0xffdf		# switch back to original function code
Packit c4476c
	la	%r1,0($key1)		# restore pointer to $key1
Packit c4476c
	slgr	$out,$inp
Packit c4476c
Packit c4476c
	llgc	$len,2*$SIZE_T-1($sp)
Packit c4476c
	nill	$len,0x0f		# $len%=16
Packit c4476c
	br	$ra
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lxts_km_vanilla:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	# prepare and allocate stack frame at the top of 4K page
Packit c4476c
	# with 1K reserved for eventual signal handling
Packit c4476c
	lghi	$s0,-1024-256-16# guarantee at least 256-bytes buffer
Packit c4476c
	lghi	$s1,-4096
Packit c4476c
	algr	$s0,$sp
Packit c4476c
	lgr	$fp,$sp
Packit c4476c
	ngr	$s0,$s1		# align at page boundary
Packit c4476c
	slgr	$fp,$s0		# total buffer size
Packit c4476c
	lgr	$s2,$sp
Packit c4476c
	lghi	$s1,1024+16	# sl[g]fi is extended-immediate facility
Packit c4476c
	slgr	$fp,$s1		# deduct reservation to get usable buffer size
Packit c4476c
	# buffer size is at lest 256 and at most 3072+256-16
Packit c4476c
Packit c4476c
	la	$sp,1024($s0)	# alloca
Packit c4476c
	nill	$fp,0xfff0	# round to 16*n
Packit c4476c
	st${g}	$s2,0($sp)	# back-chain
Packit c4476c
	nill	$len,0xfff0	# redundant
Packit c4476c
	st${g}	$fp,$SIZE_T($sp)
Packit c4476c
Packit c4476c
	slgr	$len,$fp
Packit c4476c
	brc	1,.Lxts_km_go	# not zero, no borrow
Packit c4476c
	algr	$fp,$len	# input is shorter than allocated buffer
Packit c4476c
	lghi	$len,0
Packit c4476c
	st${g}	$fp,$SIZE_T($sp)
Packit c4476c
Packit c4476c
.Lxts_km_go:
Packit c4476c
	lrvg	$s0,$tweak+0($s2)	# load the tweak value in little-endian
Packit c4476c
	lrvg	$s1,$tweak+8($s2)
Packit c4476c
Packit c4476c
	la	$s2,16($sp)		# vector of ascending tweak values
Packit c4476c
	slgr	$s2,$inp
Packit c4476c
	srlg	$s3,$fp,4
Packit c4476c
	j	.Lxts_km_start
Packit c4476c
Packit c4476c
.Lxts_km_loop:
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
	slgr	$s2,$inp
Packit c4476c
	srlg	$s3,$fp,4
Packit c4476c
.Lxts_km_prepare:
Packit c4476c
	lghi	$i1,0x87
Packit c4476c
	srag	$i2,$s1,63		# broadcast upper bit
Packit c4476c
	ngr	$i1,$i2			# rem
Packit c4476c
	algr	$s0,$s0
Packit c4476c
	alcgr	$s1,$s1
Packit c4476c
	xgr	$s0,$i1
Packit c4476c
.Lxts_km_start:
Packit c4476c
	lrvgr	$i1,$s0			# flip byte order
Packit c4476c
	lrvgr	$i2,$s1
Packit c4476c
	stg	$i1,0($s2,$inp)
Packit c4476c
	stg	$i2,8($s2,$inp)
Packit c4476c
	xg	$i1,0($inp)
Packit c4476c
	xg	$i2,8($inp)
Packit c4476c
	stg	$i1,0($out,$inp)
Packit c4476c
	stg	$i2,8($out,$inp)
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	brct	$s3,.Lxts_km_prepare
Packit c4476c
Packit c4476c
	slgr	$inp,$fp		# rewind $inp
Packit c4476c
	la	$s2,0($out,$inp)
Packit c4476c
	lgr	$s3,$fp
Packit c4476c
	.long	0xb92e00aa		# km $s2,$s2
Packit c4476c
	brc	1,.-4			# pay attention to "partial completion"
Packit c4476c
Packit c4476c
	la	$s2,16($sp)
Packit c4476c
	slgr	$s2,$inp
Packit c4476c
	srlg	$s3,$fp,4
Packit c4476c
.Lxts_km_xor:
Packit c4476c
	lg	$i1,0($out,$inp)
Packit c4476c
	lg	$i2,8($out,$inp)
Packit c4476c
	xg	$i1,0($s2,$inp)
Packit c4476c
	xg	$i2,8($s2,$inp)
Packit c4476c
	stg	$i1,0($out,$inp)
Packit c4476c
	stg	$i2,8($out,$inp)
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	brct	$s3,.Lxts_km_xor
Packit c4476c
Packit c4476c
	slgr	$len,$fp
Packit c4476c
	brc	1,.Lxts_km_loop		# not zero, no borrow
Packit c4476c
	algr	$fp,$len
Packit c4476c
	lghi	$len,0
Packit c4476c
	brc	4+1,.Lxts_km_loop	# not zero
Packit c4476c
Packit c4476c
	l${g}	$i1,0($sp)		# back-chain
Packit c4476c
	llgf	$fp,`2*$SIZE_T-4`($sp)	# bytes used
Packit c4476c
	la	$i2,16($sp)
Packit c4476c
	srlg	$fp,$fp,4
Packit c4476c
.Lxts_km_zap:
Packit c4476c
	stg	$i1,0($i2)
Packit c4476c
	stg	$i1,8($i2)
Packit c4476c
	la	$i2,16($i2)
Packit c4476c
	brct	$fp,.Lxts_km_zap
Packit c4476c
Packit c4476c
	la	$sp,0($i1)
Packit c4476c
	llgc	$len,2*$SIZE_T-1($i1)
Packit c4476c
	nill	$len,0x0f		# $len%=16
Packit c4476c
	bzr	$ra
Packit c4476c
Packit c4476c
	# generate one more tweak...
Packit c4476c
	lghi	$i1,0x87
Packit c4476c
	srag	$i2,$s1,63		# broadcast upper bit
Packit c4476c
	ngr	$i1,$i2			# rem
Packit c4476c
	algr	$s0,$s0
Packit c4476c
	alcgr	$s1,$s1
Packit c4476c
	xgr	$s0,$i1
Packit c4476c
Packit c4476c
	ltr	$len,$len		# clear zero flag
Packit c4476c
	br	$ra
Packit c4476c
.size	_s390x_xts_km,.-_s390x_xts_km
Packit c4476c
Packit c4476c
.globl	AES_xts_encrypt
Packit c4476c
.type	AES_xts_encrypt,\@function
Packit c4476c
.align	16
Packit c4476c
AES_xts_encrypt:
Packit c4476c
	xgr	%r3,%r4			# flip %r3 and %r4, $out and $len
Packit c4476c
	xgr	%r4,%r3
Packit c4476c
	xgr	%r3,%r4
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	llgfr	$len,$len
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	st${g}	$len,1*$SIZE_T($sp)	# save copy of $len
Packit c4476c
	srag	$len,$len,4		# formally wrong, because it expands
Packit c4476c
					# sign byte, but who can afford asking
Packit c4476c
					# to process more than 2^63-1 bytes?
Packit c4476c
					# I use it, because it sets condition
Packit c4476c
					# code...
Packit c4476c
	bcr	8,$ra			# abort if zero (i.e. less than 16)
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	llgf	%r0,240($key2)
Packit c4476c
	lhi	%r1,16
Packit c4476c
	clr	%r0,%r1
Packit c4476c
	jl	.Lxts_enc_software
Packit c4476c
Packit c4476c
	st${g}	$ra,5*$SIZE_T($sp)
Packit c4476c
	stm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	sllg	$len,$len,4		# $len&=~15
Packit c4476c
	slgr	$out,$inp
Packit c4476c
Packit c4476c
	# generate the tweak value
Packit c4476c
	l${g}	$s3,$stdframe($sp)	# pointer to iv
Packit c4476c
	la	$s2,$tweak($sp)
Packit c4476c
	lmg	$s0,$s1,0($s3)
Packit c4476c
	lghi	$s3,16
Packit c4476c
	stmg	$s0,$s1,0($s2)
Packit c4476c
	la	%r1,0($key2)		# $key2 is not needed anymore
Packit c4476c
	.long	0xb92e00aa		# km $s2,$s2, generate the tweak
Packit c4476c
	brc	1,.-4			# can this happen?
Packit c4476c
Packit c4476c
	l	%r0,240($key1)
Packit c4476c
	la	%r1,0($key1)		# $key1 is not needed anymore
Packit c4476c
	bras	$ra,_s390x_xts_km
Packit c4476c
	jz	.Lxts_enc_km_done
Packit c4476c
Packit c4476c
	aghi	$inp,-16		# take one step back
Packit c4476c
	la	$i3,0($out,$inp)	# put aside real $out
Packit c4476c
.Lxts_enc_km_steal:
Packit c4476c
	llgc	$i1,16($inp)
Packit c4476c
	llgc	$i2,0($out,$inp)
Packit c4476c
	stc	$i1,0($out,$inp)
Packit c4476c
	stc	$i2,16($out,$inp)
Packit c4476c
	la	$inp,1($inp)
Packit c4476c
	brct	$len,.Lxts_enc_km_steal
Packit c4476c
Packit c4476c
	la	$s2,0($i3)
Packit c4476c
	lghi	$s3,16
Packit c4476c
	lrvgr	$i1,$s0			# flip byte order
Packit c4476c
	lrvgr	$i2,$s1
Packit c4476c
	xg	$i1,0($s2)
Packit c4476c
	xg	$i2,8($s2)
Packit c4476c
	stg	$i1,0($s2)
Packit c4476c
	stg	$i2,8($s2)
Packit c4476c
	.long	0xb92e00aa		# km $s2,$s2
Packit c4476c
	brc	1,.-4			# can this happen?
Packit c4476c
	lrvgr	$i1,$s0			# flip byte order
Packit c4476c
	lrvgr	$i2,$s1
Packit c4476c
	xg	$i1,0($i3)
Packit c4476c
	xg	$i2,8($i3)
Packit c4476c
	stg	$i1,0($i3)
Packit c4476c
	stg	$i2,8($i3)
Packit c4476c
Packit c4476c
.Lxts_enc_km_done:
Packit c4476c
	stg	$sp,$tweak+0($sp)	# wipe tweak
Packit c4476c
	stg	$sp,$tweak+8($sp)
Packit c4476c
	l${g}	$ra,5*$SIZE_T($sp)
Packit c4476c
	lm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.align	16
Packit c4476c
.Lxts_enc_software:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	slgr	$out,$inp
Packit c4476c
Packit c4476c
	l${g}	$s3,$stdframe($sp)	# ivp
Packit c4476c
	llgf	$s0,0($s3)		# load iv
Packit c4476c
	llgf	$s1,4($s3)
Packit c4476c
	llgf	$s2,8($s3)
Packit c4476c
	llgf	$s3,12($s3)
Packit c4476c
	stm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	la	$key,0($key2)
Packit c4476c
	larl	$tbl,AES_Te
Packit c4476c
	bras	$ra,_s390x_AES_encrypt	# generate the tweak
Packit c4476c
	lm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	stm	$s0,$s3,$tweak($sp)	# save the tweak
Packit c4476c
	j	.Lxts_enc_enter
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lxts_enc_loop:
Packit c4476c
	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
Packit c4476c
	lrvg	$s3,$tweak+8($sp)
Packit c4476c
	lghi	%r1,0x87
Packit c4476c
	srag	%r0,$s3,63		# broadcast upper bit
Packit c4476c
	ngr	%r1,%r0			# rem
Packit c4476c
	algr	$s1,$s1
Packit c4476c
	alcgr	$s3,$s3
Packit c4476c
	xgr	$s1,%r1
Packit c4476c
	lrvgr	$s1,$s1			# flip byte order
Packit c4476c
	lrvgr	$s3,$s3
Packit c4476c
	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
Packit c4476c
	stg	$s1,$tweak+0($sp)	# save the tweak
Packit c4476c
	llgfr	$s1,$s1
Packit c4476c
	srlg	$s2,$s3,32
Packit c4476c
	stg	$s3,$tweak+8($sp)
Packit c4476c
	llgfr	$s3,$s3
Packit c4476c
	la	$inp,16($inp)		# $inp+=16
Packit c4476c
.Lxts_enc_enter:
Packit c4476c
	x	$s0,0($inp)		# ^=*($inp)
Packit c4476c
	x	$s1,4($inp)
Packit c4476c
	x	$s2,8($inp)
Packit c4476c
	x	$s3,12($inp)
Packit c4476c
	stm${g}	%r2,%r3,2*$SIZE_T($sp)	# only two registers are changing
Packit c4476c
	la	$key,0($key1)
Packit c4476c
	bras	$ra,_s390x_AES_encrypt
Packit c4476c
	lm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	x	$s0,$tweak+0($sp)	# ^=tweak
Packit c4476c
	x	$s1,$tweak+4($sp)
Packit c4476c
	x	$s2,$tweak+8($sp)
Packit c4476c
	x	$s3,$tweak+12($sp)
Packit c4476c
	st	$s0,0($out,$inp)
Packit c4476c
	st	$s1,4($out,$inp)
Packit c4476c
	st	$s2,8($out,$inp)
Packit c4476c
	st	$s3,12($out,$inp)
Packit c4476c
	brct${g}	$len,.Lxts_enc_loop
Packit c4476c
Packit c4476c
	llgc	$len,`2*$SIZE_T-1`($sp)
Packit c4476c
	nill	$len,0x0f		# $len%16
Packit c4476c
	jz	.Lxts_enc_done
Packit c4476c
Packit c4476c
	la	$i3,0($inp,$out)	# put aside real $out
Packit c4476c
.Lxts_enc_steal:
Packit c4476c
	llgc	%r0,16($inp)
Packit c4476c
	llgc	%r1,0($out,$inp)
Packit c4476c
	stc	%r0,0($out,$inp)
Packit c4476c
	stc	%r1,16($out,$inp)
Packit c4476c
	la	$inp,1($inp)
Packit c4476c
	brct	$len,.Lxts_enc_steal
Packit c4476c
	la	$out,0($i3)		# restore real $out
Packit c4476c
Packit c4476c
	# generate last tweak...
Packit c4476c
	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
Packit c4476c
	lrvg	$s3,$tweak+8($sp)
Packit c4476c
	lghi	%r1,0x87
Packit c4476c
	srag	%r0,$s3,63		# broadcast upper bit
Packit c4476c
	ngr	%r1,%r0			# rem
Packit c4476c
	algr	$s1,$s1
Packit c4476c
	alcgr	$s3,$s3
Packit c4476c
	xgr	$s1,%r1
Packit c4476c
	lrvgr	$s1,$s1			# flip byte order
Packit c4476c
	lrvgr	$s3,$s3
Packit c4476c
	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
Packit c4476c
	stg	$s1,$tweak+0($sp)	# save the tweak
Packit c4476c
	llgfr	$s1,$s1
Packit c4476c
	srlg	$s2,$s3,32
Packit c4476c
	stg	$s3,$tweak+8($sp)
Packit c4476c
	llgfr	$s3,$s3
Packit c4476c
Packit c4476c
	x	$s0,0($out)		# ^=*(inp)|stolen cipther-text
Packit c4476c
	x	$s1,4($out)
Packit c4476c
	x	$s2,8($out)
Packit c4476c
	x	$s3,12($out)
Packit c4476c
	st${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	la	$key,0($key1)
Packit c4476c
	bras	$ra,_s390x_AES_encrypt
Packit c4476c
	l${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	x	$s0,`$tweak+0`($sp)	# ^=tweak
Packit c4476c
	x	$s1,`$tweak+4`($sp)
Packit c4476c
	x	$s2,`$tweak+8`($sp)
Packit c4476c
	x	$s3,`$tweak+12`($sp)
Packit c4476c
	st	$s0,0($out)
Packit c4476c
	st	$s1,4($out)
Packit c4476c
	st	$s2,8($out)
Packit c4476c
	st	$s3,12($out)
Packit c4476c
Packit c4476c
.Lxts_enc_done:
Packit c4476c
	stg	$sp,$tweak+0($sp)	# wipe tweak
Packit c4476c
	stg	$sp,$tweak+8($sp)
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_xts_encrypt,.-AES_xts_encrypt
Packit c4476c
___
Packit c4476c
# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
Packit c4476c
#	size_t len, const AES_KEY *key1, const AES_KEY *key2,
Packit c4476c
#	const unsigned char iv[16]);
Packit c4476c
#
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	AES_xts_decrypt
Packit c4476c
.type	AES_xts_decrypt,\@function
Packit c4476c
.align	16
Packit c4476c
AES_xts_decrypt:
Packit c4476c
	xgr	%r3,%r4			# flip %r3 and %r4, $out and $len
Packit c4476c
	xgr	%r4,%r3
Packit c4476c
	xgr	%r3,%r4
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	llgfr	$len,$len
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	st${g}	$len,1*$SIZE_T($sp)	# save copy of $len
Packit c4476c
	aghi	$len,-16
Packit c4476c
	bcr	4,$ra			# abort if less than zero. formally
Packit c4476c
					# wrong, because $len is unsigned,
Packit c4476c
					# but who can afford asking to
Packit c4476c
					# process more than 2^63-1 bytes?
Packit c4476c
	tmll	$len,0x0f
Packit c4476c
	jnz	.Lxts_dec_proceed
Packit c4476c
	aghi	$len,16
Packit c4476c
.Lxts_dec_proceed:
Packit c4476c
___
Packit c4476c
$code.=<<___ if (!$softonly);
Packit c4476c
	llgf	%r0,240($key2)
Packit c4476c
	lhi	%r1,16
Packit c4476c
	clr	%r0,%r1
Packit c4476c
	jl	.Lxts_dec_software
Packit c4476c
Packit c4476c
	st${g}	$ra,5*$SIZE_T($sp)
Packit c4476c
	stm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	nill	$len,0xfff0		# $len&=~15
Packit c4476c
	slgr	$out,$inp
Packit c4476c
Packit c4476c
	# generate the tweak value
Packit c4476c
	l${g}	$s3,$stdframe($sp)	# pointer to iv
Packit c4476c
	la	$s2,$tweak($sp)
Packit c4476c
	lmg	$s0,$s1,0($s3)
Packit c4476c
	lghi	$s3,16
Packit c4476c
	stmg	$s0,$s1,0($s2)
Packit c4476c
	la	%r1,0($key2)		# $key2 is not needed past this point
Packit c4476c
	.long	0xb92e00aa		# km $s2,$s2, generate the tweak
Packit c4476c
	brc	1,.-4			# can this happen?
Packit c4476c
Packit c4476c
	l	%r0,240($key1)
Packit c4476c
	la	%r1,0($key1)		# $key1 is not needed anymore
Packit c4476c
Packit c4476c
	ltgr	$len,$len
Packit c4476c
	jz	.Lxts_dec_km_short
Packit c4476c
	bras	$ra,_s390x_xts_km
Packit c4476c
	jz	.Lxts_dec_km_done
Packit c4476c
Packit c4476c
	lrvgr	$s2,$s0			# make copy in reverse byte order
Packit c4476c
	lrvgr	$s3,$s1
Packit c4476c
	j	.Lxts_dec_km_2ndtweak
Packit c4476c
Packit c4476c
.Lxts_dec_km_short:
Packit c4476c
	llgc	$len,`2*$SIZE_T-1`($sp)
Packit c4476c
	nill	$len,0x0f		# $len%=16
Packit c4476c
	lrvg	$s0,$tweak+0($sp)	# load the tweak
Packit c4476c
	lrvg	$s1,$tweak+8($sp)
Packit c4476c
	lrvgr	$s2,$s0			# make copy in reverse byte order
Packit c4476c
	lrvgr	$s3,$s1
Packit c4476c
Packit c4476c
.Lxts_dec_km_2ndtweak:
Packit c4476c
	lghi	$i1,0x87
Packit c4476c
	srag	$i2,$s1,63		# broadcast upper bit
Packit c4476c
	ngr	$i1,$i2			# rem
Packit c4476c
	algr	$s0,$s0
Packit c4476c
	alcgr	$s1,$s1
Packit c4476c
	xgr	$s0,$i1
Packit c4476c
	lrvgr	$i1,$s0			# flip byte order
Packit c4476c
	lrvgr	$i2,$s1
Packit c4476c
Packit c4476c
	xg	$i1,0($inp)
Packit c4476c
	xg	$i2,8($inp)
Packit c4476c
	stg	$i1,0($out,$inp)
Packit c4476c
	stg	$i2,8($out,$inp)
Packit c4476c
	la	$i2,0($out,$inp)
Packit c4476c
	lghi	$i3,16
Packit c4476c
	.long	0xb92e0066		# km $i2,$i2
Packit c4476c
	brc	1,.-4			# can this happen?
Packit c4476c
	lrvgr	$i1,$s0
Packit c4476c
	lrvgr	$i2,$s1
Packit c4476c
	xg	$i1,0($out,$inp)
Packit c4476c
	xg	$i2,8($out,$inp)
Packit c4476c
	stg	$i1,0($out,$inp)
Packit c4476c
	stg	$i2,8($out,$inp)
Packit c4476c
Packit c4476c
	la	$i3,0($out,$inp)	# put aside real $out
Packit c4476c
.Lxts_dec_km_steal:
Packit c4476c
	llgc	$i1,16($inp)
Packit c4476c
	llgc	$i2,0($out,$inp)
Packit c4476c
	stc	$i1,0($out,$inp)
Packit c4476c
	stc	$i2,16($out,$inp)
Packit c4476c
	la	$inp,1($inp)
Packit c4476c
	brct	$len,.Lxts_dec_km_steal
Packit c4476c
Packit c4476c
	lgr	$s0,$s2
Packit c4476c
	lgr	$s1,$s3
Packit c4476c
	xg	$s0,0($i3)
Packit c4476c
	xg	$s1,8($i3)
Packit c4476c
	stg	$s0,0($i3)
Packit c4476c
	stg	$s1,8($i3)
Packit c4476c
	la	$s0,0($i3)
Packit c4476c
	lghi	$s1,16
Packit c4476c
	.long	0xb92e0088		# km $s0,$s0
Packit c4476c
	brc	1,.-4			# can this happen?
Packit c4476c
	xg	$s2,0($i3)
Packit c4476c
	xg	$s3,8($i3)
Packit c4476c
	stg	$s2,0($i3)
Packit c4476c
	stg	$s3,8($i3)
Packit c4476c
.Lxts_dec_km_done:
Packit c4476c
	stg	$sp,$tweak+0($sp)	# wipe tweak
Packit c4476c
	stg	$sp,$tweak+8($sp)
Packit c4476c
	l${g}	$ra,5*$SIZE_T($sp)
Packit c4476c
	lm${g}	%r6,$s3,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.align	16
Packit c4476c
.Lxts_dec_software:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	stm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
Packit c4476c
	srlg	$len,$len,4
Packit c4476c
	slgr	$out,$inp
Packit c4476c
Packit c4476c
	l${g}	$s3,$stdframe($sp)	# ivp
Packit c4476c
	llgf	$s0,0($s3)		# load iv
Packit c4476c
	llgf	$s1,4($s3)
Packit c4476c
	llgf	$s2,8($s3)
Packit c4476c
	llgf	$s3,12($s3)
Packit c4476c
	stm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	la	$key,0($key2)
Packit c4476c
	larl	$tbl,AES_Te
Packit c4476c
	bras	$ra,_s390x_AES_encrypt	# generate the tweak
Packit c4476c
	lm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	larl	$tbl,AES_Td
Packit c4476c
	lt${g}r	$len,$len
Packit c4476c
	stm	$s0,$s3,$tweak($sp)	# save the tweak
Packit c4476c
	jz	.Lxts_dec_short
Packit c4476c
	j	.Lxts_dec_enter
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lxts_dec_loop:
Packit c4476c
	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
Packit c4476c
	lrvg	$s3,$tweak+8($sp)
Packit c4476c
	lghi	%r1,0x87
Packit c4476c
	srag	%r0,$s3,63		# broadcast upper bit
Packit c4476c
	ngr	%r1,%r0			# rem
Packit c4476c
	algr	$s1,$s1
Packit c4476c
	alcgr	$s3,$s3
Packit c4476c
	xgr	$s1,%r1
Packit c4476c
	lrvgr	$s1,$s1			# flip byte order
Packit c4476c
	lrvgr	$s3,$s3
Packit c4476c
	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
Packit c4476c
	stg	$s1,$tweak+0($sp)	# save the tweak
Packit c4476c
	llgfr	$s1,$s1
Packit c4476c
	srlg	$s2,$s3,32
Packit c4476c
	stg	$s3,$tweak+8($sp)
Packit c4476c
	llgfr	$s3,$s3
Packit c4476c
.Lxts_dec_enter:
Packit c4476c
	x	$s0,0($inp)		# tweak^=*(inp)
Packit c4476c
	x	$s1,4($inp)
Packit c4476c
	x	$s2,8($inp)
Packit c4476c
	x	$s3,12($inp)
Packit c4476c
	stm${g}	%r2,%r3,2*$SIZE_T($sp)	# only two registers are changing
Packit c4476c
	la	$key,0($key1)
Packit c4476c
	bras	$ra,_s390x_AES_decrypt
Packit c4476c
	lm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	x	$s0,$tweak+0($sp)	# ^=tweak
Packit c4476c
	x	$s1,$tweak+4($sp)
Packit c4476c
	x	$s2,$tweak+8($sp)
Packit c4476c
	x	$s3,$tweak+12($sp)
Packit c4476c
	st	$s0,0($out,$inp)
Packit c4476c
	st	$s1,4($out,$inp)
Packit c4476c
	st	$s2,8($out,$inp)
Packit c4476c
	st	$s3,12($out,$inp)
Packit c4476c
	la	$inp,16($inp)
Packit c4476c
	brct${g}	$len,.Lxts_dec_loop
Packit c4476c
Packit c4476c
	llgc	$len,`2*$SIZE_T-1`($sp)
Packit c4476c
	nill	$len,0x0f		# $len%16
Packit c4476c
	jz	.Lxts_dec_done
Packit c4476c
Packit c4476c
	# generate pair of tweaks...
Packit c4476c
	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
Packit c4476c
	lrvg	$s3,$tweak+8($sp)
Packit c4476c
	lghi	%r1,0x87
Packit c4476c
	srag	%r0,$s3,63		# broadcast upper bit
Packit c4476c
	ngr	%r1,%r0			# rem
Packit c4476c
	algr	$s1,$s1
Packit c4476c
	alcgr	$s3,$s3
Packit c4476c
	xgr	$s1,%r1
Packit c4476c
	lrvgr	$i2,$s1			# flip byte order
Packit c4476c
	lrvgr	$i3,$s3
Packit c4476c
	stmg	$i2,$i3,$tweak($sp)	# save the 1st tweak
Packit c4476c
	j	.Lxts_dec_2ndtweak
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Lxts_dec_short:
Packit c4476c
	llgc	$len,`2*$SIZE_T-1`($sp)
Packit c4476c
	nill	$len,0x0f		# $len%16
Packit c4476c
	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
Packit c4476c
	lrvg	$s3,$tweak+8($sp)
Packit c4476c
.Lxts_dec_2ndtweak:
Packit c4476c
	lghi	%r1,0x87
Packit c4476c
	srag	%r0,$s3,63		# broadcast upper bit
Packit c4476c
	ngr	%r1,%r0			# rem
Packit c4476c
	algr	$s1,$s1
Packit c4476c
	alcgr	$s3,$s3
Packit c4476c
	xgr	$s1,%r1
Packit c4476c
	lrvgr	$s1,$s1			# flip byte order
Packit c4476c
	lrvgr	$s3,$s3
Packit c4476c
	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
Packit c4476c
	stg	$s1,$tweak-16+0($sp)	# save the 2nd tweak
Packit c4476c
	llgfr	$s1,$s1
Packit c4476c
	srlg	$s2,$s3,32
Packit c4476c
	stg	$s3,$tweak-16+8($sp)
Packit c4476c
	llgfr	$s3,$s3
Packit c4476c
Packit c4476c
	x	$s0,0($inp)		# tweak_the_2nd^=*(inp)
Packit c4476c
	x	$s1,4($inp)
Packit c4476c
	x	$s2,8($inp)
Packit c4476c
	x	$s3,12($inp)
Packit c4476c
	stm${g}	%r2,%r3,2*$SIZE_T($sp)
Packit c4476c
	la	$key,0($key1)
Packit c4476c
	bras	$ra,_s390x_AES_decrypt
Packit c4476c
	lm${g}	%r2,%r5,2*$SIZE_T($sp)
Packit c4476c
	x	$s0,$tweak-16+0($sp)	# ^=tweak_the_2nd
Packit c4476c
	x	$s1,$tweak-16+4($sp)
Packit c4476c
	x	$s2,$tweak-16+8($sp)
Packit c4476c
	x	$s3,$tweak-16+12($sp)
Packit c4476c
	st	$s0,0($out,$inp)
Packit c4476c
	st	$s1,4($out,$inp)
Packit c4476c
	st	$s2,8($out,$inp)
Packit c4476c
	st	$s3,12($out,$inp)
Packit c4476c
Packit c4476c
	la	$i3,0($out,$inp)	# put aside real $out
Packit c4476c
.Lxts_dec_steal:
Packit c4476c
	llgc	%r0,16($inp)
Packit c4476c
	llgc	%r1,0($out,$inp)
Packit c4476c
	stc	%r0,0($out,$inp)
Packit c4476c
	stc	%r1,16($out,$inp)
Packit c4476c
	la	$inp,1($inp)
Packit c4476c
	brct	$len,.Lxts_dec_steal
Packit c4476c
	la	$out,0($i3)		# restore real $out
Packit c4476c
Packit c4476c
	lm	$s0,$s3,$tweak($sp)	# load the 1st tweak
Packit c4476c
	x	$s0,0($out)		# tweak^=*(inp)|stolen cipher-text
Packit c4476c
	x	$s1,4($out)
Packit c4476c
	x	$s2,8($out)
Packit c4476c
	x	$s3,12($out)
Packit c4476c
	st${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	la	$key,0($key1)
Packit c4476c
	bras	$ra,_s390x_AES_decrypt
Packit c4476c
	l${g}	$out,4*$SIZE_T($sp)
Packit c4476c
	x	$s0,$tweak+0($sp)	# ^=tweak
Packit c4476c
	x	$s1,$tweak+4($sp)
Packit c4476c
	x	$s2,$tweak+8($sp)
Packit c4476c
	x	$s3,$tweak+12($sp)
Packit c4476c
	st	$s0,0($out)
Packit c4476c
	st	$s1,4($out)
Packit c4476c
	st	$s2,8($out)
Packit c4476c
	st	$s3,12($out)
Packit c4476c
	stg	$sp,$tweak-16+0($sp)	# wipe 2nd tweak
Packit c4476c
	stg	$sp,$tweak-16+8($sp)
Packit c4476c
.Lxts_dec_done:
Packit c4476c
	stg	$sp,$tweak+0($sp)	# wipe tweak
Packit c4476c
	stg	$sp,$tweak+8($sp)
Packit c4476c
	lm${g}	%r6,$ra,6*$SIZE_T($sp)
Packit c4476c
	br	$ra
Packit c4476c
.size	AES_xts_decrypt,.-AES_xts_decrypt
Packit c4476c
___
Packit c4476c
}
Packit c4476c
$code.=<<___;
Packit c4476c
.string	"AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
Packit c4476c
___
Packit c4476c
Packit c4476c
$code =~ s/\`([^\`]*)\`/eval $1/gem;
Packit c4476c
print $code;
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";	# force flush