Blame crypto/rc4/asm/rc4-s390x.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
#
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
#
Packit c4476c
# February 2009
Packit c4476c
#
Packit c4476c
# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
Packit c4476c
# "cluster" Address Generation Interlocks, so that one pipeline stall
Packit c4476c
# resolves several dependencies.
Packit c4476c
Packit c4476c
# November 2010.
Packit c4476c
#
Packit c4476c
# Adapt for -m31 build. If kernel supports what's called "highgprs"
Packit c4476c
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
Packit c4476c
# instructions and achieve "64-bit" performance even in 31-bit legacy
Packit c4476c
# application context. The feature is not specific to any particular
Packit c4476c
# processor, as long as it's "z-CPU". Latter implies that the code
Packit c4476c
# remains z/Architecture specific. On z990 it was measured to perform
Packit c4476c
# 50% better than code generated by gcc 4.3.
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
Packit c4476c
if ($flavour =~ /3[12]/) {
Packit c4476c
	$SIZE_T=4;
Packit c4476c
	$g="";
Packit c4476c
} else {
Packit c4476c
	$SIZE_T=8;
Packit c4476c
	$g="g";
Packit c4476c
}
Packit c4476c
Packit c4476c
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
Packit c4476c
open STDOUT,">$output";
Packit c4476c
Packit c4476c
$rp="%r14";
Packit c4476c
$sp="%r15";
Packit c4476c
$code=<<___;
Packit c4476c
.text
Packit c4476c
Packit c4476c
___
Packit c4476c
Packit c4476c
# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
Packit c4476c
{
Packit c4476c
$acc="%r0";
Packit c4476c
$cnt="%r1";
Packit c4476c
$key="%r2";
Packit c4476c
$len="%r3";
Packit c4476c
$inp="%r4";
Packit c4476c
$out="%r5";
Packit c4476c
Packit c4476c
@XX=("%r6","%r7");
Packit c4476c
@TX=("%r8","%r9");
Packit c4476c
$YY="%r10";
Packit c4476c
$TY="%r11";
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	RC4
Packit c4476c
.type	RC4,\@function
Packit c4476c
.align	64
Packit c4476c
RC4:
Packit c4476c
	stm${g}	%r6,%r11,6*$SIZE_T($sp)
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($flavour =~ /3[12]/);
Packit c4476c
	llgfr	$len,$len
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	llgc	$XX[0],0($key)
Packit c4476c
	llgc	$YY,1($key)
Packit c4476c
	la	$XX[0],1($XX[0])
Packit c4476c
	nill	$XX[0],0xff
Packit c4476c
	srlg	$cnt,$len,3
Packit c4476c
	ltgr	$cnt,$cnt
Packit c4476c
	llgc	$TX[0],2($XX[0],$key)
Packit c4476c
	jz	.Lshort
Packit c4476c
	j	.Loop8
Packit c4476c
Packit c4476c
.align	64
Packit c4476c
.Loop8:
Packit c4476c
___
Packit c4476c
for ($i=0;$i<8;$i++) {
Packit c4476c
$code.=<<___;
Packit c4476c
	la	$YY,0($YY,$TX[0])	# $i
Packit c4476c
	nill	$YY,255
Packit c4476c
	la	$XX[1],1($XX[0])
Packit c4476c
	nill	$XX[1],255
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i==1);
Packit c4476c
	llgc	$acc,2($TY,$key)
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i>1);
Packit c4476c
	sllg	$acc,$acc,8
Packit c4476c
	ic	$acc,2($TY,$key)
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	llgc	$TY,2($YY,$key)
Packit c4476c
	stc	$TX[0],2($YY,$key)
Packit c4476c
	llgc	$TX[1],2($XX[1],$key)
Packit c4476c
	stc	$TY,2($XX[0],$key)
Packit c4476c
	cr	$XX[1],$YY
Packit c4476c
	jne	.Lcmov$i
Packit c4476c
	la	$TX[1],0($TX[0])
Packit c4476c
.Lcmov$i:
Packit c4476c
	la	$TY,0($TY,$TX[0])
Packit c4476c
	nill	$TY,255
Packit c4476c
___
Packit c4476c
push(@TX,shift(@TX)); push(@XX,shift(@XX));     # "rotate" registers
Packit c4476c
}
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
	lg	$TX[1],0($inp)
Packit c4476c
	sllg	$acc,$acc,8
Packit c4476c
	la	$inp,8($inp)
Packit c4476c
	ic	$acc,2($TY,$key)
Packit c4476c
	xgr	$acc,$TX[1]
Packit c4476c
	stg	$acc,0($out)
Packit c4476c
	la	$out,8($out)
Packit c4476c
	brctg	$cnt,.Loop8
Packit c4476c
Packit c4476c
.Lshort:
Packit c4476c
	lghi	$acc,7
Packit c4476c
	ngr	$len,$acc
Packit c4476c
	jz	.Lexit
Packit c4476c
	j	.Loop1
Packit c4476c
Packit c4476c
.align	16
Packit c4476c
.Loop1:
Packit c4476c
	la	$YY,0($YY,$TX[0])
Packit c4476c
	nill	$YY,255
Packit c4476c
	llgc	$TY,2($YY,$key)
Packit c4476c
	stc	$TX[0],2($YY,$key)
Packit c4476c
	stc	$TY,2($XX[0],$key)
Packit c4476c
	ar	$TY,$TX[0]
Packit c4476c
	ahi	$XX[0],1
Packit c4476c
	nill	$TY,255
Packit c4476c
	nill	$XX[0],255
Packit c4476c
	llgc	$acc,0($inp)
Packit c4476c
	la	$inp,1($inp)
Packit c4476c
	llgc	$TY,2($TY,$key)
Packit c4476c
	llgc	$TX[0],2($XX[0],$key)
Packit c4476c
	xr	$acc,$TY
Packit c4476c
	stc	$acc,0($out)
Packit c4476c
	la	$out,1($out)
Packit c4476c
	brct	$len,.Loop1
Packit c4476c
Packit c4476c
.Lexit:
Packit c4476c
	ahi	$XX[0],-1
Packit c4476c
	stc	$XX[0],0($key)
Packit c4476c
	stc	$YY,1($key)
Packit c4476c
	lm${g}	%r6,%r11,6*$SIZE_T($sp)
Packit c4476c
	br	$rp
Packit c4476c
.size	RC4,.-RC4
Packit c4476c
.string	"RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
Packit c4476c
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
Packit c4476c
{
Packit c4476c
$cnt="%r0";
Packit c4476c
$idx="%r1";
Packit c4476c
$key="%r2";
Packit c4476c
$len="%r3";
Packit c4476c
$inp="%r4";
Packit c4476c
$acc="%r5";
Packit c4476c
$dat="%r6";
Packit c4476c
$ikey="%r7";
Packit c4476c
$iinp="%r8";
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	RC4_set_key
Packit c4476c
.type	RC4_set_key,\@function
Packit c4476c
.align	64
Packit c4476c
RC4_set_key:
Packit c4476c
	stm${g}	%r6,%r8,6*$SIZE_T($sp)
Packit c4476c
	lhi	$cnt,256
Packit c4476c
	la	$idx,0
Packit c4476c
	sth	$idx,0($key)
Packit c4476c
.align	4
Packit c4476c
.L1stloop:
Packit c4476c
	stc	$idx,2($idx,$key)
Packit c4476c
	la	$idx,1($idx)
Packit c4476c
	brct	$cnt,.L1stloop
Packit c4476c
Packit c4476c
	lghi	$ikey,-256
Packit c4476c
	lr	$cnt,$len
Packit c4476c
	la	$iinp,0
Packit c4476c
	la	$idx,0
Packit c4476c
.align	16
Packit c4476c
.L2ndloop:
Packit c4476c
	llgc	$acc,2+256($ikey,$key)
Packit c4476c
	llgc	$dat,0($iinp,$inp)
Packit c4476c
	la	$idx,0($idx,$acc)
Packit c4476c
	la	$ikey,1($ikey)
Packit c4476c
	la	$idx,0($idx,$dat)
Packit c4476c
	nill	$idx,255
Packit c4476c
	la	$iinp,1($iinp)
Packit c4476c
	tml	$ikey,255
Packit c4476c
	llgc	$dat,2($idx,$key)
Packit c4476c
	stc	$dat,2+256-1($ikey,$key)
Packit c4476c
	stc	$acc,2($idx,$key)
Packit c4476c
	jz	.Ldone
Packit c4476c
	brct	$cnt,.L2ndloop
Packit c4476c
	lr	$cnt,$len
Packit c4476c
	la	$iinp,0
Packit c4476c
	j	.L2ndloop
Packit c4476c
.Ldone:
Packit c4476c
	lm${g}	%r6,%r8,6*$SIZE_T($sp)
Packit c4476c
	br	$rp
Packit c4476c
.size	RC4_set_key,.-RC4_set_key
Packit c4476c
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
# const char *RC4_options()
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	RC4_options
Packit c4476c
.type	RC4_options,\@function
Packit c4476c
.align	16
Packit c4476c
RC4_options:
Packit c4476c
	larl	%r2,.Loptions
Packit c4476c
	br	%r14
Packit c4476c
.size	RC4_options,.-RC4_options
Packit c4476c
.section	.rodata
Packit c4476c
.Loptions:
Packit c4476c
.align	8
Packit c4476c
.string	"rc4(8x,char)"
Packit c4476c
___
Packit c4476c
Packit c4476c
print $code;
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";	# force flush