Blame crypto/sha/asm/sha1-s390x.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
Packit c4476c
# SHA1 block procedure for s390x.
Packit c4476c
Packit c4476c
# April 2007.
Packit c4476c
#
Packit c4476c
# Performance is >30% better than gcc 3.3 generated code. But the real
Packit c4476c
# twist is that SHA1 hardware support is detected and utilized. In
Packit c4476c
# which case performance can reach further >4.5x for larger chunks.
Packit c4476c
Packit c4476c
# January 2009.
Packit c4476c
#
Packit c4476c
# Optimize Xupdate for amount of memory references and reschedule
Packit c4476c
# instructions to favour dual-issue z10 pipeline. On z10 hardware is
Packit c4476c
# "only" ~2.3x faster than software.
Packit c4476c
Packit c4476c
# November 2010.
Packit c4476c
#
Packit c4476c
# Adapt for -m31 build. If kernel supports what's called "highgprs"
Packit c4476c
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
Packit c4476c
# instructions and achieve "64-bit" performance even in 31-bit legacy
Packit c4476c
# application context. The feature is not specific to any particular
Packit c4476c
# processor, as long as it's "z-CPU". Latter implies that the code
Packit c4476c
# remains z/Architecture specific. On z990 it was measured to perform
Packit c4476c
# 23% better than code generated by gcc 4.3.
Packit c4476c
Packit c4476c
$kimdfunc=1;	# magic function code for kimd instruction
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
Packit c4476c
if ($flavour =~ /3[12]/) {
Packit c4476c
	$SIZE_T=4;
Packit c4476c
	$g="";
Packit c4476c
} else {
Packit c4476c
	$SIZE_T=8;
Packit c4476c
	$g="g";
Packit c4476c
}
Packit c4476c
Packit c4476c
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
Packit c4476c
open STDOUT,">$output";
Packit c4476c
Packit c4476c
$K_00_39="%r0"; $K=$K_00_39;
Packit c4476c
$K_40_79="%r1";
Packit c4476c
$ctx="%r2";	$prefetch="%r2";
Packit c4476c
$inp="%r3";
Packit c4476c
$len="%r4";
Packit c4476c
Packit c4476c
$A="%r5";
Packit c4476c
$B="%r6";
Packit c4476c
$C="%r7";
Packit c4476c
$D="%r8";
Packit c4476c
$E="%r9";	@V=($A,$B,$C,$D,$E);
Packit c4476c
$t0="%r10";
Packit c4476c
$t1="%r11";
Packit c4476c
@X=("%r12","%r13","%r14");
Packit c4476c
$sp="%r15";
Packit c4476c
Packit c4476c
$stdframe=16*$SIZE_T+4*8;
Packit c4476c
$frame=$stdframe+16*4;
Packit c4476c
Packit c4476c
sub Xupdate {
Packit c4476c
my $i=shift;
Packit c4476c
Packit c4476c
$code.=<<___ if ($i==15);
Packit c4476c
	lg	$prefetch,$stdframe($sp)	### Xupdate(16) warm-up
Packit c4476c
	lr	$X[0],$X[2]
Packit c4476c
___
Packit c4476c
return if ($i&1;;	# Xupdate is vectorized and executed every 2nd cycle
Packit c4476c
$code.=<<___ if ($i<16);
Packit c4476c
	lg	$X[0],`$i*4`($inp)	### Xload($i)
Packit c4476c
	rllg	$X[1],$X[0],32
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i>=16);
Packit c4476c
	xgr	$X[0],$prefetch		### Xupdate($i)
Packit c4476c
	lg	$prefetch,`$stdframe+4*(($i+2)%16)`($sp)
Packit c4476c
	xg	$X[0],`$stdframe+4*(($i+8)%16)`($sp)
Packit c4476c
	xgr	$X[0],$prefetch
Packit c4476c
	rll	$X[0],$X[0],1
Packit c4476c
	rllg	$X[1],$X[0],32
Packit c4476c
	rll	$X[1],$X[1],1
Packit c4476c
	rllg	$X[0],$X[1],32
Packit c4476c
	lr	$X[2],$X[1]		# feedback
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i<=70);
Packit c4476c
	stg	$X[0],`$stdframe+4*($i%16)`($sp)
Packit c4476c
___
Packit c4476c
unshift(@X,pop(@X));
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_00_19 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e)=@_;
Packit c4476c
my $xi=$X[1];
Packit c4476c
Packit c4476c
	&Xupdate($i);
Packit c4476c
$code.=<<___;
Packit c4476c
	alr	$e,$K		### $i
Packit c4476c
	rll	$t1,$a,5
Packit c4476c
	lr	$t0,$d
Packit c4476c
	xr	$t0,$c
Packit c4476c
	alr	$e,$t1
Packit c4476c
	nr	$t0,$b
Packit c4476c
	alr	$e,$xi
Packit c4476c
	xr	$t0,$d
Packit c4476c
	rll	$b,$b,30
Packit c4476c
	alr	$e,$t0
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_20_39 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e)=@_;
Packit c4476c
my $xi=$X[1];
Packit c4476c
Packit c4476c
	&Xupdate($i);
Packit c4476c
$code.=<<___;
Packit c4476c
	alr	$e,$K		### $i
Packit c4476c
	rll	$t1,$a,5
Packit c4476c
	lr	$t0,$b
Packit c4476c
	alr	$e,$t1
Packit c4476c
	xr	$t0,$c
Packit c4476c
	alr	$e,$xi
Packit c4476c
	xr	$t0,$d
Packit c4476c
	rll	$b,$b,30
Packit c4476c
	alr	$e,$t0
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_40_59 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e)=@_;
Packit c4476c
my $xi=$X[1];
Packit c4476c
Packit c4476c
	&Xupdate($i);
Packit c4476c
$code.=<<___;
Packit c4476c
	alr	$e,$K		### $i
Packit c4476c
	rll	$t1,$a,5
Packit c4476c
	lr	$t0,$b
Packit c4476c
	alr	$e,$t1
Packit c4476c
	or	$t0,$c
Packit c4476c
	lr	$t1,$b
Packit c4476c
	nr	$t0,$d
Packit c4476c
	nr	$t1,$c
Packit c4476c
	alr	$e,$xi
Packit c4476c
	or	$t0,$t1
Packit c4476c
	rll	$b,$b,30
Packit c4476c
	alr	$e,$t0
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
#include "s390x_arch.h"
Packit c4476c
Packit c4476c
.text
Packit c4476c
.align	64
Packit c4476c
.type	Ktable,\@object
Packit c4476c
Ktable: .long	0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
Packit c4476c
	.skip	48	#.long	0,0,0,0,0,0,0,0,0,0,0,0
Packit c4476c
.size	Ktable,.-Ktable
Packit c4476c
.globl	sha1_block_data_order
Packit c4476c
.type	sha1_block_data_order,\@function
Packit c4476c
sha1_block_data_order:
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($kimdfunc);
Packit c4476c
	larl	%r1,OPENSSL_s390xcap_P
Packit c4476c
	lg	%r0,S390X_KIMD(%r1)	# check kimd capabilities
Packit c4476c
	tmhh	%r0,`0x8000>>$kimdfunc`
Packit c4476c
	jz	.Lsoftware
Packit c4476c
	lghi	%r0,$kimdfunc
Packit c4476c
	lgr	%r1,$ctx
Packit c4476c
	lgr	%r2,$inp
Packit c4476c
	sllg	%r3,$len,6
Packit c4476c
	.long	0xb93e0002	# kimd %r0,%r2
Packit c4476c
	brc	1,.-4		# pay attention to "partial completion"
Packit c4476c
	br	%r14
Packit c4476c
.align	16
Packit c4476c
.Lsoftware:
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	lghi	%r1,-$frame
Packit c4476c
	st${g}	$ctx,`2*$SIZE_T`($sp)
Packit c4476c
	stm${g}	%r6,%r15,`6*$SIZE_T`($sp)
Packit c4476c
	lgr	%r0,$sp
Packit c4476c
	la	$sp,0(%r1,$sp)
Packit c4476c
	st${g}	%r0,0($sp)
Packit c4476c
Packit c4476c
	larl	$t0,Ktable
Packit c4476c
	llgf	$A,0($ctx)
Packit c4476c
	llgf	$B,4($ctx)
Packit c4476c
	llgf	$C,8($ctx)
Packit c4476c
	llgf	$D,12($ctx)
Packit c4476c
	llgf	$E,16($ctx)
Packit c4476c
Packit c4476c
	lg	$K_00_39,0($t0)
Packit c4476c
	lg	$K_40_79,8($t0)
Packit c4476c
Packit c4476c
.Lloop:
Packit c4476c
	rllg	$K_00_39,$K_00_39,32
Packit c4476c
___
Packit c4476c
for ($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;
Packit c4476c
	rllg	$K_00_39,$K_00_39,32
Packit c4476c
___
Packit c4476c
for (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;	$K=$K_40_79;
Packit c4476c
	rllg	$K_40_79,$K_40_79,32
Packit c4476c
___
Packit c4476c
for (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;
Packit c4476c
	rllg	$K_40_79,$K_40_79,32
Packit c4476c
___
Packit c4476c
for (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;
Packit c4476c
Packit c4476c
	l${g}	$ctx,`$frame+2*$SIZE_T`($sp)
Packit c4476c
	la	$inp,64($inp)
Packit c4476c
	al	$A,0($ctx)
Packit c4476c
	al	$B,4($ctx)
Packit c4476c
	al	$C,8($ctx)
Packit c4476c
	al	$D,12($ctx)
Packit c4476c
	al	$E,16($ctx)
Packit c4476c
	st	$A,0($ctx)
Packit c4476c
	st	$B,4($ctx)
Packit c4476c
	st	$C,8($ctx)
Packit c4476c
	st	$D,12($ctx)
Packit c4476c
	st	$E,16($ctx)
Packit c4476c
	brct${g} $len,.Lloop
Packit c4476c
Packit c4476c
	lm${g}	%r6,%r15,`$frame+6*$SIZE_T`($sp)
Packit c4476c
	br	%r14
Packit c4476c
.size	sha1_block_data_order,.-sha1_block_data_order
Packit c4476c
.string	"SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
Packit c4476c
___
Packit c4476c
Packit c4476c
$code =~ s/\`([^\`]*)\`/eval $1/gem;
Packit c4476c
Packit c4476c
print $code;
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";