Blame crypto/modes/asm/ghash-parisc.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
#
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
#
Packit c4476c
# April 2010
Packit c4476c
#
Packit c4476c
# The module implements "4-bit" GCM GHASH function and underlying
Packit c4476c
# single multiplication operation in GF(2^128). "4-bit" means that it
Packit c4476c
# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
Packit c4476c
# it processes one byte in 19.6 cycles, which is more than twice as
Packit c4476c
# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
Packit c4476c
# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
Packit c4476c
# processed byte. This is ~2.2x faster than 64-bit code generated by
Packit c4476c
# vendor compiler (which used to be very hard to beat:-).
Packit c4476c
#
Packit c4476c
# Special thanks to polarhome.com for providing HP-UX account.
Packit c4476c
Packit c4476c
$flavour = shift;
Packit c4476c
$output = shift;
Packit c4476c
open STDOUT,">$output";
Packit c4476c
Packit c4476c
if ($flavour =~ /64/) {
Packit c4476c
	$LEVEL		="2.0W";
Packit c4476c
	$SIZE_T		=8;
Packit c4476c
	$FRAME_MARKER	=80;
Packit c4476c
	$SAVED_RP	=16;
Packit c4476c
	$PUSH		="std";
Packit c4476c
	$PUSHMA		="std,ma";
Packit c4476c
	$POP		="ldd";
Packit c4476c
	$POPMB		="ldd,mb";
Packit c4476c
	$NREGS		=6;
Packit c4476c
} else {
Packit c4476c
	$LEVEL		="1.0";	#"\n\t.ALLOW\t2.0";
Packit c4476c
	$SIZE_T		=4;
Packit c4476c
	$FRAME_MARKER	=48;
Packit c4476c
	$SAVED_RP	=20;
Packit c4476c
	$PUSH		="stw";
Packit c4476c
	$PUSHMA		="stwm";
Packit c4476c
	$POP		="ldw";
Packit c4476c
	$POPMB		="ldwm";
Packit c4476c
	$NREGS		=11;
Packit c4476c
}
Packit c4476c
Packit c4476c
$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
Packit c4476c
				#                 [+ argument transfer]
Packit c4476c
Packit c4476c
################# volatile registers
Packit c4476c
$Xi="%r26";	# argument block
Packit c4476c
$Htbl="%r25";
Packit c4476c
$inp="%r24";
Packit c4476c
$len="%r23";
Packit c4476c
$Hhh=$Htbl;	# variables
Packit c4476c
$Hll="%r22";
Packit c4476c
$Zhh="%r21";
Packit c4476c
$Zll="%r20";
Packit c4476c
$cnt="%r19";
Packit c4476c
$rem_4bit="%r28";
Packit c4476c
$rem="%r29";
Packit c4476c
$mask0xf0="%r31";
Packit c4476c
Packit c4476c
################# preserved registers
Packit c4476c
$Thh="%r1";
Packit c4476c
$Tll="%r2";
Packit c4476c
$nlo="%r3";
Packit c4476c
$nhi="%r4";
Packit c4476c
$byte="%r5";
Packit c4476c
if ($SIZE_T==4) {
Packit c4476c
	$Zhl="%r6";
Packit c4476c
	$Zlh="%r7";
Packit c4476c
	$Hhl="%r8";
Packit c4476c
	$Hlh="%r9";
Packit c4476c
	$Thl="%r10";
Packit c4476c
	$Tlh="%r11";
Packit c4476c
}
Packit c4476c
$rem2="%r6";	# used in PA-RISC 2.0 code
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
	.LEVEL	$LEVEL
Packit c4476c
	.SPACE	\$TEXT\$
Packit c4476c
	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
Packit c4476c
Packit c4476c
	.EXPORT	gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
Packit c4476c
	.ALIGN	64
Packit c4476c
gcm_gmult_4bit
Packit c4476c
	.PROC
Packit c4476c
	.CALLINFO	FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
Packit c4476c
	.ENTRY
Packit c4476c
	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
Packit c4476c
	$PUSHMA	%r3,$FRAME(%sp)
Packit c4476c
	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	blr	%r0,$rem_4bit
Packit c4476c
	ldi	3,$rem
Packit c4476c
L\$pic_gmult
Packit c4476c
	andcm	$rem_4bit,$rem,$rem_4bit
Packit c4476c
	addl	$inp,$len,$len
Packit c4476c
	ldo	L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
Packit c4476c
	ldi	0xf0,$mask0xf0
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	ldi	31,$rem
Packit c4476c
	mtctl	$rem,%cr11
Packit c4476c
	extrd,u,*= $rem,%sar,1,$rem	; executes on PA-RISC 1.0
Packit c4476c
	b	L\$parisc1_gmult
Packit c4476c
	nop
Packit c4476c
___
Packit c4476c

Packit c4476c
$code.=<<___;
Packit c4476c
	ldb	15($Xi),$nlo
Packit c4476c
	ldo	8($Htbl),$Hll
Packit c4476c
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
Packit c4476c
	ldd	$nlo($Hll),$Zll
Packit c4476c
	ldd	$nlo($Hhh),$Zhh
Packit c4476c
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldb	14($Xi),$nlo
Packit c4476c
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
	b	L\$oop_gmult_pa2
Packit c4476c
	ldi	13,$cnt
Packit c4476c
Packit c4476c
	.ALIGN	8
Packit c4476c
L\$oop_gmult_pa2
Packit c4476c
	xor	$rem,$Zhh,$Zhh		; moved here to work around gas bug
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nlo($Hll),$Tll
Packit c4476c
	ldd	$nlo($Hhh),$Thh
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
	ldbx	$cnt($Xi),$nlo
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	addib,uv -1,$cnt,L\$oop_gmult_pa2
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nlo($Hll),$Tll
Packit c4476c
	ldd	$nlo($Hhh),$Thh
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	std	$Zll,8($Xi)
Packit c4476c
	std	$Zhh,0($Xi)
Packit c4476c
___
Packit c4476c

Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	b	L\$done_gmult
Packit c4476c
	nop
Packit c4476c
Packit c4476c
L\$parisc1_gmult
Packit c4476c
	ldb	15($Xi),$nlo
Packit c4476c
	ldo	12($Htbl),$Hll
Packit c4476c
	ldo	8($Htbl),$Hlh
Packit c4476c
	ldo	4($Htbl),$Hhl
Packit c4476c
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
Packit c4476c
	ldwx	$nlo($Hll),$Zll
Packit c4476c
	ldwx	$nlo($Hlh),$Zlh
Packit c4476c
	ldwx	$nlo($Hhl),$Zhl
Packit c4476c
	ldwx	$nlo($Hhh),$Zhh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldb	14($Xi),$nlo
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nlo($Hll),$Tll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nlo($Hlh),$Tlh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	b	L\$oop_gmult_pa1
Packit c4476c
	ldi	13,$cnt
Packit c4476c
Packit c4476c
	.ALIGN	8
Packit c4476c
L\$oop_gmult_pa1
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$nlo($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nlo($Hhh),$Thh
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	ldbx	$cnt($Xi),$nlo
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nlo($Hll),$Tll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nlo($Hlh),$Tlh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	addib,uv -1,$cnt,L\$oop_gmult_pa1
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$nlo($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nlo($Hhh),$Thh
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	stw	$Zll,12($Xi)
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	stw	$Zlh,8($Xi)
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	stw	$Zhl,4($Xi)
Packit c4476c
	stw	$Zhh,0($Xi)
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
L\$done_gmult
Packit c4476c
	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
Packit c4476c
	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
Packit c4476c
	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
Packit c4476c
	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
Packit c4476c
	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
Packit c4476c
	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
Packit c4476c
	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
Packit c4476c
	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	bv	(%r2)
Packit c4476c
	.EXIT
Packit c4476c
	$POPMB	-$FRAME(%sp),%r3
Packit c4476c
	.PROCEND
Packit c4476c
Packit c4476c
	.EXPORT	gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
Packit c4476c
	.ALIGN	64
Packit c4476c
gcm_ghash_4bit
Packit c4476c
	.PROC
Packit c4476c
	.CALLINFO	FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
Packit c4476c
	.ENTRY
Packit c4476c
	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
Packit c4476c
	$PUSHMA	%r3,$FRAME(%sp)
Packit c4476c
	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
Packit c4476c
	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	blr	%r0,$rem_4bit
Packit c4476c
	ldi	3,$rem
Packit c4476c
L\$pic_ghash
Packit c4476c
	andcm	$rem_4bit,$rem,$rem_4bit
Packit c4476c
	addl	$inp,$len,$len
Packit c4476c
	ldo	L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
Packit c4476c
	ldi	0xf0,$mask0xf0
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	ldi	31,$rem
Packit c4476c
	mtctl	$rem,%cr11
Packit c4476c
	extrd,u,*= $rem,%sar,1,$rem	; executes on PA-RISC 1.0
Packit c4476c
	b	L\$parisc1_ghash
Packit c4476c
	nop
Packit c4476c
___
Packit c4476c

Packit c4476c
$code.=<<___;
Packit c4476c
	ldb	15($Xi),$nlo
Packit c4476c
	ldo	8($Htbl),$Hll
Packit c4476c
Packit c4476c
L\$outer_ghash_pa2
Packit c4476c
	ldb	15($inp),$nhi
Packit c4476c
	xor	$nhi,$nlo,$nlo
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
Packit c4476c
	ldd	$nlo($Hll),$Zll
Packit c4476c
	ldd	$nlo($Hhh),$Zhh
Packit c4476c
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldb	14($Xi),$nlo
Packit c4476c
	ldb	14($inp),$byte
Packit c4476c
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
	xor	$byte,$nlo,$nlo
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
	b	L\$oop_ghash_pa2
Packit c4476c
	ldi	13,$cnt
Packit c4476c
Packit c4476c
	.ALIGN	8
Packit c4476c
L\$oop_ghash_pa2
Packit c4476c
	xor	$rem,$Zhh,$Zhh		; moved here to work around gas bug
Packit c4476c
	depd,z	$Zll,60,4,$rem2
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nlo($Hll),$Tll
Packit c4476c
	ldd	$nlo($Hhh),$Thh
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldbx	$cnt($Xi),$nlo
Packit c4476c
	ldbx	$cnt($inp),$byte
Packit c4476c
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	ldd	$rem2($rem_4bit),$rem2
Packit c4476c
Packit c4476c
	xor	$rem2,$Zhh,$Zhh
Packit c4476c
	xor	$byte,$nlo,$nlo
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	depd,z	$nlo,59,4,$nlo
Packit c4476c
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
	addib,uv -1,$cnt,L\$oop_ghash_pa2
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	depd,z	$Zll,60,4,$rem2
Packit c4476c
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	ldd	$nlo($Hll),$Tll
Packit c4476c
	ldd	$nlo($Hhh),$Thh
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
Packit c4476c
	depd,z	$Zll,60,4,$rem
Packit c4476c
	shrpd	$Zhh,$Zll,4,$Zll
Packit c4476c
	ldd	$rem2($rem_4bit),$rem2
Packit c4476c
Packit c4476c
	xor	$rem2,$Zhh,$Zhh
Packit c4476c
	ldd	$nhi($Hll),$Tll
Packit c4476c
	ldd	$nhi($Hhh),$Thh
Packit c4476c
Packit c4476c
	extrd,u	$Zhh,59,60,$Zhh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldd	$rem($rem_4bit),$rem
Packit c4476c
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	std	$Zll,8($Xi)
Packit c4476c
	ldo	16($inp),$inp
Packit c4476c
	std	$Zhh,0($Xi)
Packit c4476c
	cmpb,*<> $inp,$len,L\$outer_ghash_pa2
Packit c4476c
	copy	$Zll,$nlo
Packit c4476c
___
Packit c4476c

Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	b	L\$done_ghash
Packit c4476c
	nop
Packit c4476c
Packit c4476c
L\$parisc1_ghash
Packit c4476c
	ldb	15($Xi),$nlo
Packit c4476c
	ldo	12($Htbl),$Hll
Packit c4476c
	ldo	8($Htbl),$Hlh
Packit c4476c
	ldo	4($Htbl),$Hhl
Packit c4476c
Packit c4476c
L\$outer_ghash_pa1
Packit c4476c
	ldb	15($inp),$byte
Packit c4476c
	xor	$byte,$nlo,$nlo
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
Packit c4476c
	ldwx	$nlo($Hll),$Zll
Packit c4476c
	ldwx	$nlo($Hlh),$Zlh
Packit c4476c
	ldwx	$nlo($Hhl),$Zhl
Packit c4476c
	ldwx	$nlo($Hhh),$Zhh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldb	14($Xi),$nlo
Packit c4476c
	ldb	14($inp),$byte
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	xor	$byte,$nlo,$nlo
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nlo($Hll),$Tll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nlo($Hlh),$Tlh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	b	L\$oop_ghash_pa1
Packit c4476c
	ldi	13,$cnt
Packit c4476c
Packit c4476c
	.ALIGN	8
Packit c4476c
L\$oop_ghash_pa1
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$nlo($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nlo($Hhh),$Thh
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	ldbx	$cnt($Xi),$nlo
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	ldbx	$cnt($inp),$byte
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	xor	$byte,$nlo,$nlo
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	and	$mask0xf0,$nlo,$nhi
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	zdep	$nlo,27,4,$nlo
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nlo($Hll),$Tll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nlo($Hlh),$Tlh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	addib,uv -1,$cnt,L\$oop_ghash_pa1
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$nlo($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	ldwx	$nlo($Hhh),$Thh
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	ldwx	$nhi($Hll),$Tll
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	ldwx	$nhi($Hlh),$Tlh
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	ldwx	$nhi($Hhl),$Thl
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	ldwx	$nhi($Hhh),$Thh
Packit c4476c
	zdep	$Zll,28,4,$rem
Packit c4476c
	ldwx	$rem($rem_4bit),$rem
Packit c4476c
	shrpw	$Zlh,$Zll,4,$Zll
Packit c4476c
	shrpw	$Zhl,$Zlh,4,$Zlh
Packit c4476c
	shrpw	$Zhh,$Zhl,4,$Zhl
Packit c4476c
	extru	$Zhh,27,28,$Zhh
Packit c4476c
	xor	$Tll,$Zll,$Zll
Packit c4476c
	xor	$Tlh,$Zlh,$Zlh
Packit c4476c
	xor	$rem,$Zhh,$Zhh
Packit c4476c
	stw	$Zll,12($Xi)
Packit c4476c
	xor	$Thl,$Zhl,$Zhl
Packit c4476c
	stw	$Zlh,8($Xi)
Packit c4476c
	xor	$Thh,$Zhh,$Zhh
Packit c4476c
	stw	$Zhl,4($Xi)
Packit c4476c
	ldo	16($inp),$inp
Packit c4476c
	stw	$Zhh,0($Xi)
Packit c4476c
	comb,<>	$inp,$len,L\$outer_ghash_pa1
Packit c4476c
	copy	$Zll,$nlo
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
L\$done_ghash
Packit c4476c
	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
Packit c4476c
	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
Packit c4476c
	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
Packit c4476c
	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
Packit c4476c
___
Packit c4476c
$code.=<<___ if ($SIZE_T==4);
Packit c4476c
	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
Packit c4476c
	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
Packit c4476c
	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
Packit c4476c
	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
Packit c4476c
	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
Packit c4476c
___
Packit c4476c
$code.=<<___;
Packit c4476c
	bv	(%r2)
Packit c4476c
	.EXIT
Packit c4476c
	$POPMB	-$FRAME(%sp),%r3
Packit c4476c
	.PROCEND
Packit c4476c
Packit c4476c
	.ALIGN	64
Packit c4476c
L\$rem_4bit
Packit c4476c
	.WORD	`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
Packit c4476c
	.WORD	`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
Packit c4476c
	.WORD	`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
Packit c4476c
	.WORD	`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
Packit c4476c
	.STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
Packit c4476c
	.ALIGN	64
Packit c4476c
___
Packit c4476c
Packit c4476c
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
Packit c4476c
# that it can be compiled with .LEVEL 1.0. It should be noted that I
Packit c4476c
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
Packit c4476c
# directive...
Packit c4476c
Packit c4476c
my $ldd = sub {
Packit c4476c
  my ($mod,$args) = @_;
Packit c4476c
  my $orig = "ldd$mod\t$args";
Packit c4476c
Packit c4476c
    if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/)		# format 4
Packit c4476c
    {	my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/)	# format 5
Packit c4476c
    {	my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
Packit c4476c
	$opcode|=(($1&0xF)<<17)|(($1&0x10)<<12);		# encode offset
Packit c4476c
	$opcode|=(1<<5)  if ($mod =~ /^,m/);
Packit c4476c
	$opcode|=(1<<13) if ($mod =~ /^,mb/);
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    else { "\t".$orig; }
Packit c4476c
};
Packit c4476c
Packit c4476c
my $std = sub {
Packit c4476c
  my ($mod,$args) = @_;
Packit c4476c
  my $orig = "std$mod\t$args";
Packit c4476c
Packit c4476c
    if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
Packit c4476c
    {	my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1;;
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    else { "\t".$orig; }
Packit c4476c
};
Packit c4476c
Packit c4476c
my $extrd = sub {
Packit c4476c
  my ($mod,$args) = @_;
Packit c4476c
  my $orig = "extrd$mod\t$args";
Packit c4476c
Packit c4476c
    # I only have ",u" completer, it's implicitly encoded...
Packit c4476c
    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 15
Packit c4476c
    {	my $opcode=(0x36<<26)|($1<<21)|($4<<16);
Packit c4476c
	my $len=32-$3;
Packit c4476c
	$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5);		# encode pos
Packit c4476c
	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/)	# format 12
Packit c4476c
    {	my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
Packit c4476c
	my $len=32-$2;
Packit c4476c
	$opcode |= (($len&0x20)<<3)|($len&0x1f);		# encode len
Packit c4476c
	$opcode |= (1<<13) if ($mod =~ /,\**=/);
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    else { "\t".$orig; }
Packit c4476c
};
Packit c4476c
Packit c4476c
my $shrpd = sub {
Packit c4476c
  my ($mod,$args) = @_;
Packit c4476c
  my $orig = "shrpd$mod\t$args";
Packit c4476c
Packit c4476c
    if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/)	# format 14
Packit c4476c
    {	my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
Packit c4476c
	my $cpos=63-$3;
Packit c4476c
	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode sa
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/)	# format 11
Packit c4476c
    {	sprintf "\t.WORD\t0x%08x\t; %s",
Packit c4476c
		(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
Packit c4476c
    }
Packit c4476c
    else { "\t".$orig; }
Packit c4476c
};
Packit c4476c
Packit c4476c
my $depd = sub {
Packit c4476c
  my ($mod,$args) = @_;
Packit c4476c
  my $orig = "depd$mod\t$args";
Packit c4476c
Packit c4476c
    # I only have ",z" completer, it's implicitly encoded...
Packit c4476c
    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 16
Packit c4476c
    {	my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
Packit c4476c
    	my $cpos=63-$2;
Packit c4476c
	my $len=32-$3;
Packit c4476c
	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode pos
Packit c4476c
	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
Packit c4476c
	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
Packit c4476c
    }
Packit c4476c
    else { "\t".$orig; }
Packit c4476c
};
Packit c4476c
Packit c4476c
sub assemble {
Packit c4476c
  my ($mnemonic,$mod,$args)=@_;
Packit c4476c
  my $opcode = eval("\$$mnemonic");
Packit c4476c
Packit c4476c
    ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
Packit c4476c
}
Packit c4476c
Packit c4476c
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
Packit c4476c
	=~ /GNU assembler/) {
Packit c4476c
    $gnuas = 1;
Packit c4476c
}
Packit c4476c
Packit c4476c
foreach (split("\n",$code)) {
Packit c4476c
	s/\`([^\`]*)\`/eval $1/ge;
Packit c4476c
	if ($SIZE_T==4) {
Packit c4476c
		s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
Packit c4476c
		s/cmpb,\*/comb,/;
Packit c4476c
		s/,\*/,/;
Packit c4476c
	}
Packit c4476c
Packit c4476c
	s/(\.LEVEL\s+2\.0)W/$1w/	if ($gnuas && $SIZE_T==8);
Packit c4476c
	s/\.SPACE\s+\$TEXT\$/.text/	if ($gnuas && $SIZE_T==8);
Packit c4476c
	s/\.SUBSPA.*//			if ($gnuas && $SIZE_T==8);
Packit c4476c
	s/\bbv\b/bve/			if ($SIZE_T==8);
Packit c4476c
Packit c4476c
	print $_,"\n";
Packit c4476c
}
Packit c4476c
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";