Blame crypto/sha/asm/sha1-ppc.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
Packit c4476c
# I let hardware handle unaligned input(*), except on page boundaries
Packit c4476c
# (see below for details). Otherwise straightforward implementation
Packit c4476c
# with X vector in register bank.
Packit c4476c
#
Packit c4476c
# (*) this means that this module is inappropriate for PPC403? Does
Packit c4476c
#     anybody know if pre-POWER3 can sustain unaligned load?
Packit c4476c
Packit c4476c
# 			-m64	-m32
Packit c4476c
# ----------------------------------
Packit c4476c
# PPC970,gcc-4.0.0	+76%	+59%
Packit c4476c
# Power6,xlc-7		+68%	+33%
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
Packit c4476c
if ($flavour =~ /64/) {
Packit c4476c
	$SIZE_T	=8;
Packit c4476c
	$LRSAVE	=2*$SIZE_T;
Packit c4476c
	$UCMP	="cmpld";
Packit c4476c
	$STU	="stdu";
Packit c4476c
	$POP	="ld";
Packit c4476c
	$PUSH	="std";
Packit c4476c
} elsif ($flavour =~ /32/) {
Packit c4476c
	$SIZE_T	=4;
Packit c4476c
	$LRSAVE	=$SIZE_T;
Packit c4476c
	$UCMP	="cmplw";
Packit c4476c
	$STU	="stwu";
Packit c4476c
	$POP	="lwz";
Packit c4476c
	$PUSH	="stw";
Packit c4476c
} else { die "nonsense $flavour"; }
Packit c4476c
Packit c4476c
# Define endianness based on flavour
Packit c4476c
# i.e.: linux64le
Packit c4476c
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
Packit c4476c
Packit c4476c
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Packit c4476c
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
Packit c4476c
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
Packit c4476c
die "can't locate ppc-xlate.pl";
Packit c4476c
Packit c4476c
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
Packit c4476c
Packit c4476c
$FRAME=24*$SIZE_T+64;
Packit c4476c
$LOCALS=6*$SIZE_T;
Packit c4476c
Packit c4476c
$K  ="r0";
Packit c4476c
$sp ="r1";
Packit c4476c
$toc="r2";
Packit c4476c
$ctx="r3";
Packit c4476c
$inp="r4";
Packit c4476c
$num="r5";
Packit c4476c
$t0 ="r15";
Packit c4476c
$t1 ="r6";
Packit c4476c
Packit c4476c
$A  ="r7";
Packit c4476c
$B  ="r8";
Packit c4476c
$C  ="r9";
Packit c4476c
$D  ="r10";
Packit c4476c
$E  ="r11";
Packit c4476c
$T  ="r12";
Packit c4476c
Packit c4476c
@V=($A,$B,$C,$D,$E,$T);
Packit c4476c
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
Packit c4476c
    "r24","r25","r26","r27","r28","r29","r30","r31");
Packit c4476c
Packit c4476c
sub loadbe {
Packit c4476c
my ($dst, $src, $temp_reg) = @_;
Packit c4476c
$code.=<<___ if (!$LITTLE_ENDIAN);
Packit c4476c
	lwz	$dst,$src
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($LITTLE_ENDIAN);
Packit c4476c
	lwz	$temp_reg,$src
Packit c4476c
	rotlwi	$dst,$temp_reg,8
Packit c4476c
	rlwimi	$dst,$temp_reg,24,0,7
Packit c4476c
	rlwimi	$dst,$temp_reg,24,16,23
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_00_19 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e,$f)=@_;
Packit c4476c
my $j=$i+1;
Packit c4476c
Packit c4476c
	# Since the last value of $f is discarded, we can use
Packit c4476c
	# it as a temp reg to swap byte-order when needed.
Packit c4476c
	loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
Packit c4476c
	loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
Packit c4476c
$code.=<<___ if ($i<15);
Packit c4476c
	add	$f,$K,$e
Packit c4476c
	rotlwi	$e,$a,5
Packit c4476c
	add	$f,$f,@X[$i]
Packit c4476c
	and	$t0,$c,$b
Packit c4476c
	add	$f,$f,$e
Packit c4476c
	andc	$t1,$d,$b
Packit c4476c
	rotlwi	$b,$b,30
Packit c4476c
	or	$t0,$t0,$t1
Packit c4476c
	add	$f,$f,$t0
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i>=15);
Packit c4476c
	add	$f,$K,$e
Packit c4476c
	rotlwi	$e,$a,5
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
Packit c4476c
	add	$f,$f,@X[$i%16]
Packit c4476c
	and	$t0,$c,$b
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
Packit c4476c
	add	$f,$f,$e
Packit c4476c
	andc	$t1,$d,$b
Packit c4476c
	rotlwi	$b,$b,30
Packit c4476c
	or	$t0,$t0,$t1
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
Packit c4476c
	add	$f,$f,$t0
Packit c4476c
	rotlwi	@X[$j%16],@X[$j%16],1
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_20_39 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e,$f)=@_;
Packit c4476c
my $j=$i+1;
Packit c4476c
$code.=<<___ if ($i<79);
Packit c4476c
	add	$f,$K,$e
Packit c4476c
	xor	$t0,$b,$d
Packit c4476c
	rotlwi	$e,$a,5
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
Packit c4476c
	add	$f,$f,@X[$i%16]
Packit c4476c
	xor	$t0,$t0,$c
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
Packit c4476c
	add	$f,$f,$t0
Packit c4476c
	rotlwi	$b,$b,30
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
Packit c4476c
	add	$f,$f,$e
Packit c4476c
	rotlwi	@X[$j%16],@X[$j%16],1
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($i==79);
Packit c4476c
	add	$f,$K,$e
Packit c4476c
	xor	$t0,$b,$d
Packit c4476c
	rotlwi	$e,$a,5
Packit c4476c
	lwz	r16,0($ctx)
Packit c4476c
	add	$f,$f,@X[$i%16]
Packit c4476c
	xor	$t0,$t0,$c
Packit c4476c
	lwz	r17,4($ctx)
Packit c4476c
	add	$f,$f,$t0
Packit c4476c
	rotlwi	$b,$b,30
Packit c4476c
	lwz	r18,8($ctx)
Packit c4476c
	lwz	r19,12($ctx)
Packit c4476c
	add	$f,$f,$e
Packit c4476c
	lwz	r20,16($ctx)
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
sub BODY_40_59 {
Packit c4476c
my ($i,$a,$b,$c,$d,$e,$f)=@_;
Packit c4476c
my $j=$i+1;
Packit c4476c
$code.=<<___;
Packit c4476c
	add	$f,$K,$e
Packit c4476c
	rotlwi	$e,$a,5
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
Packit c4476c
	add	$f,$f,@X[$i%16]
Packit c4476c
	and	$t0,$b,$c
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
Packit c4476c
	add	$f,$f,$e
Packit c4476c
	or	$t1,$b,$c
Packit c4476c
	rotlwi	$b,$b,30
Packit c4476c
	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
Packit c4476c
	and	$t1,$t1,$d
Packit c4476c
	or	$t0,$t0,$t1
Packit c4476c
	rotlwi	@X[$j%16],@X[$j%16],1
Packit c4476c
	add	$f,$f,$t0
Packit c4476c
___
Packit c4476c
}
Packit c4476c
Packit c4476c
$code=<<___;
Packit c4476c
.machine	"any"
Packit c4476c
.text
Packit c4476c
Packit c4476c
.globl	.sha1_block_data_order
Packit c4476c
.align	4
Packit c4476c
.sha1_block_data_order:
Packit c4476c
	$STU	$sp,-$FRAME($sp)
Packit c4476c
	mflr	r0
Packit c4476c
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
Packit c4476c
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
Packit c4476c
	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
Packit c4476c
	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
Packit c4476c
	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
Packit c4476c
	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
Packit c4476c
	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
Packit c4476c
	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
Packit c4476c
	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
Packit c4476c
	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
Packit c4476c
	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
Packit c4476c
	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
Packit c4476c
	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
Packit c4476c
	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
Packit c4476c
	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
Packit c4476c
	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
Packit c4476c
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
Packit c4476c
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
Packit c4476c
	lwz	$A,0($ctx)
Packit c4476c
	lwz	$B,4($ctx)
Packit c4476c
	lwz	$C,8($ctx)
Packit c4476c
	lwz	$D,12($ctx)
Packit c4476c
	lwz	$E,16($ctx)
Packit c4476c
	andi.	r0,$inp,3
Packit c4476c
	bne	Lunaligned
Packit c4476c
Laligned:
Packit c4476c
	mtctr	$num
Packit c4476c
	bl	Lsha1_block_private
Packit c4476c
	b	Ldone
Packit c4476c
Packit c4476c
; PowerPC specification allows an implementation to be ill-behaved
Packit c4476c
; upon unaligned access which crosses page boundary. "Better safe
Packit c4476c
; than sorry" principle makes me treat it specially. But I don't
Packit c4476c
; look for particular offending word, but rather for 64-byte input
Packit c4476c
; block which crosses the boundary. Once found that block is aligned
Packit c4476c
; and hashed separately...
Packit c4476c
.align	4
Packit c4476c
Lunaligned:
Packit c4476c
	subfic	$t1,$inp,4096
Packit c4476c
	andi.	$t1,$t1,4095	; distance to closest page boundary
Packit c4476c
	srwi.	$t1,$t1,6	; t1/=64
Packit c4476c
	beq	Lcross_page
Packit c4476c
	$UCMP	$num,$t1
Packit c4476c
	ble	Laligned	; didn't cross the page boundary
Packit c4476c
	mtctr	$t1
Packit c4476c
	subfc	$num,$t1,$num
Packit c4476c
	bl	Lsha1_block_private
Packit c4476c
Lcross_page:
Packit c4476c
	li	$t1,16
Packit c4476c
	mtctr	$t1
Packit c4476c
	addi	r20,$sp,$LOCALS	; spot within the frame
Packit c4476c
Lmemcpy:
Packit c4476c
	lbz	r16,0($inp)
Packit c4476c
	lbz	r17,1($inp)
Packit c4476c
	lbz	r18,2($inp)
Packit c4476c
	lbz	r19,3($inp)
Packit c4476c
	addi	$inp,$inp,4
Packit c4476c
	stb	r16,0(r20)
Packit c4476c
	stb	r17,1(r20)
Packit c4476c
	stb	r18,2(r20)
Packit c4476c
	stb	r19,3(r20)
Packit c4476c
	addi	r20,r20,4
Packit c4476c
	bdnz	Lmemcpy
Packit c4476c
Packit c4476c
	$PUSH	$inp,`$FRAME-$SIZE_T*18`($sp)
Packit c4476c
	li	$t1,1
Packit c4476c
	addi	$inp,$sp,$LOCALS
Packit c4476c
	mtctr	$t1
Packit c4476c
	bl	Lsha1_block_private
Packit c4476c
	$POP	$inp,`$FRAME-$SIZE_T*18`($sp)
Packit c4476c
	addic.	$num,$num,-1
Packit c4476c
	bne	Lunaligned
Packit c4476c
Packit c4476c
Ldone:
Packit c4476c
	$POP	r0,`$FRAME+$LRSAVE`($sp)
Packit c4476c
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
Packit c4476c
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
Packit c4476c
	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
Packit c4476c
	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
Packit c4476c
	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
Packit c4476c
	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
Packit c4476c
	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
Packit c4476c
	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
Packit c4476c
	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
Packit c4476c
	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
Packit c4476c
	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
Packit c4476c
	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
Packit c4476c
	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
Packit c4476c
	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
Packit c4476c
	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
Packit c4476c
	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
Packit c4476c
	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
Packit c4476c
	mtlr	r0
Packit c4476c
	addi	$sp,$sp,$FRAME
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,4,1,0x80,18,3,0
Packit c4476c
	.long	0
Packit c4476c
___
Packit c4476c
Packit c4476c
# This is private block function, which uses tailored calling
Packit c4476c
# interface, namely upon entry SHA_CTX is pre-loaded to given
Packit c4476c
# registers and counter register contains amount of chunks to
Packit c4476c
# digest...
Packit c4476c
$code.=<<___;
Packit c4476c
.align	4
Packit c4476c
Lsha1_block_private:
Packit c4476c
___
Packit c4476c
$code.=<<___;	# load K_00_19
Packit c4476c
	lis	$K,0x5a82
Packit c4476c
	ori	$K,$K,0x7999
Packit c4476c
___
Packit c4476c
for($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;	# load K_20_39
Packit c4476c
	lis	$K,0x6ed9
Packit c4476c
	ori	$K,$K,0xeba1
Packit c4476c
___
Packit c4476c
for(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;	# load K_40_59
Packit c4476c
	lis	$K,0x8f1b
Packit c4476c
	ori	$K,$K,0xbcdc
Packit c4476c
___
Packit c4476c
for(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;	# load K_60_79
Packit c4476c
	lis	$K,0xca62
Packit c4476c
	ori	$K,$K,0xc1d6
Packit c4476c
___
Packit c4476c
for(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
Packit c4476c
$code.=<<___;
Packit c4476c
	add	r16,r16,$E
Packit c4476c
	add	r17,r17,$T
Packit c4476c
	add	r18,r18,$A
Packit c4476c
	add	r19,r19,$B
Packit c4476c
	add	r20,r20,$C
Packit c4476c
	stw	r16,0($ctx)
Packit c4476c
	mr	$A,r16
Packit c4476c
	stw	r17,4($ctx)
Packit c4476c
	mr	$B,r17
Packit c4476c
	stw	r18,8($ctx)
Packit c4476c
	mr	$C,r18
Packit c4476c
	stw	r19,12($ctx)
Packit c4476c
	mr	$D,r19
Packit c4476c
	stw	r20,16($ctx)
Packit c4476c
	mr	$E,r20
Packit c4476c
	addi	$inp,$inp,`16*4`
Packit c4476c
	bdnz	Lsha1_block_private
Packit c4476c
	blr
Packit c4476c
	.long	0
Packit c4476c
	.byte	0,12,0x14,0,0,0,0,0
Packit c4476c
.size	.sha1_block_data_order,.-.sha1_block_data_order
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
.asciz	"SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
Packit c4476c
___
Packit c4476c
Packit c4476c
$code =~ s/\`([^\`]*)\`/eval $1/gem;
Packit c4476c
print $code;
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";