Blame crypto/chacha/asm/chacha-ppc.pl

Packit Service 084de1
#! /usr/bin/env perl
Packit Service 084de1
# Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit Service 084de1
#
Packit Service 084de1
# Licensed under the OpenSSL license (the "License").  You may not use
Packit Service 084de1
# this file except in compliance with the License.  You can obtain a copy
Packit Service 084de1
# in the file LICENSE in the source distribution or at
Packit Service 084de1
# https://www.openssl.org/source/license.html
Packit Service 084de1
Packit Service 084de1
#
Packit Service 084de1
# ====================================================================
Packit Service 084de1
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit Service 084de1
# project. The module is, however, dual licensed under OpenSSL and
Packit Service 084de1
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit Service 084de1
# details see http://www.openssl.org/~appro/cryptogams/.
Packit Service 084de1
# ====================================================================
Packit Service 084de1
#
Packit Service 084de1
# October 2015
Packit Service 084de1
#
Packit Service 084de1
# ChaCha20 for PowerPC/AltiVec.
Packit Service 084de1
#
Packit Service 084de1
# June 2018
Packit Service 084de1
#
Packit Service 084de1
# Add VSX 2.07 code path. Original 3xAltiVec+1xIALU is well-suited for
Packit Service 084de1
# processors that can't issue more than one vector instruction per
Packit Service 084de1
# cycle. But POWER8 (and POWER9) can issue a pair, and vector-only 4x
Packit Service 084de1
# interleave would perform better. Incidentally PowerISA 2.07 (first
Packit Service 084de1
# implemented by POWER8) defined new usable instructions, hence 4xVSX
Packit Service 084de1
# code path...
Packit Service 084de1
#
Packit Service 084de1
# Performance in cycles per byte out of large buffer.
Packit Service 084de1
#
Packit Service 084de1
#			IALU/gcc-4.x    3xAltiVec+1xIALU	4xVSX
Packit Service 084de1
#
Packit Service 084de1
# Freescale e300	13.6/+115%	-			-
Packit Service 084de1
# PPC74x0/G4e		6.81/+310%	3.81			-
Packit Service 084de1
# PPC970/G5		9.29/+160%	?			-
Packit Service 084de1
# POWER7		8.62/+61%	3.35			-
Packit Service 084de1
# POWER8		8.70/+51%	2.91			2.09
Packit Service 084de1
# POWER9		8.80/+29%	4.44(*)			2.45(**)
Packit Service 084de1
#
Packit Service 084de1
# (*)	this is trade-off result, it's possible to improve it, but
Packit Service 084de1
#	then it would negatively affect all others;
Packit Service 084de1
# (**)	POWER9 seems to be "allergic" to mixing vector and integer
Packit Service 084de1
#	instructions, which is why switch to vector-only code pays
Packit Service 084de1
#	off that much;
Packit Service 084de1
Packit Service 084de1
$flavour = shift;
Packit Service 084de1
Packit Service 084de1
if ($flavour =~ /64/) {
Packit Service 084de1
	$SIZE_T	=8;
Packit Service 084de1
	$LRSAVE	=2*$SIZE_T;
Packit Service 084de1
	$STU	="stdu";
Packit Service 084de1
	$POP	="ld";
Packit Service 084de1
	$PUSH	="std";
Packit Service 084de1
	$UCMP	="cmpld";
Packit Service 084de1
} elsif ($flavour =~ /32/) {
Packit Service 084de1
	$SIZE_T	=4;
Packit Service 084de1
	$LRSAVE	=$SIZE_T;
Packit Service 084de1
	$STU	="stwu";
Packit Service 084de1
	$POP	="lwz";
Packit Service 084de1
	$PUSH	="stw";
Packit Service 084de1
	$UCMP	="cmplw";
Packit Service 084de1
} else { die "nonsense $flavour"; }
Packit Service 084de1
Packit Service 084de1
$LITTLE_ENDIAN = ($flavour=~/le$/) ? 1 : 0;
Packit Service 084de1
Packit Service 084de1
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Packit Service 084de1
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
Packit Service 084de1
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
Packit Service 084de1
die "can't locate ppc-xlate.pl";
Packit Service 084de1
Packit Service 084de1
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
Packit Service 084de1
Packit Service 084de1
$LOCALS=6*$SIZE_T;
Packit Service 084de1
$FRAME=$LOCALS+64+18*$SIZE_T;	# 64 is for local variables
Packit Service 084de1
Packit Service 084de1
sub AUTOLOAD()		# thunk [simplified] x86-style perlasm
Packit Service 084de1
{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
Packit Service 084de1
    $code .= "\t$opcode\t".join(',',@_)."\n";
Packit Service 084de1
}
Packit Service 084de1
Packit Service 084de1
my $sp = "r1";
Packit Service 084de1
Packit Service 084de1
my ($out,$inp,$len,$key,$ctr) = map("r$_",(3..7));
Packit Service 084de1
Packit Service 084de1
my @x=map("r$_",(16..31));
Packit Service 084de1
my @d=map("r$_",(11,12,14,15));
Packit Service 084de1
my @t=map("r$_",(7..10));
Packit Service 084de1
Packit Service 084de1
sub ROUND {
Packit Service 084de1
my ($a0,$b0,$c0,$d0)=@_;
Packit Service 084de1
my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
Packit Service 084de1
my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
Packit Service 084de1
my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
Packit Service 084de1
Packit Service 084de1
    (
Packit Service 084de1
	"&add		(@x[$a0],@x[$a0],@x[$b0])",
Packit Service 084de1
	 "&add		(@x[$a1],@x[$a1],@x[$b1])",
Packit Service 084de1
	  "&add		(@x[$a2],@x[$a2],@x[$b2])",
Packit Service 084de1
	   "&add	(@x[$a3],@x[$a3],@x[$b3])",
Packit Service 084de1
	"&xor		(@x[$d0],@x[$d0],@x[$a0])",
Packit Service 084de1
	 "&xor		(@x[$d1],@x[$d1],@x[$a1])",
Packit Service 084de1
	  "&xor		(@x[$d2],@x[$d2],@x[$a2])",
Packit Service 084de1
	   "&xor	(@x[$d3],@x[$d3],@x[$a3])",
Packit Service 084de1
	"&rotlwi	(@x[$d0],@x[$d0],16)",
Packit Service 084de1
	 "&rotlwi	(@x[$d1],@x[$d1],16)",
Packit Service 084de1
	  "&rotlwi	(@x[$d2],@x[$d2],16)",
Packit Service 084de1
	   "&rotlwi	(@x[$d3],@x[$d3],16)",
Packit Service 084de1
Packit Service 084de1
	"&add		(@x[$c0],@x[$c0],@x[$d0])",
Packit Service 084de1
	 "&add		(@x[$c1],@x[$c1],@x[$d1])",
Packit Service 084de1
	  "&add		(@x[$c2],@x[$c2],@x[$d2])",
Packit Service 084de1
	   "&add	(@x[$c3],@x[$c3],@x[$d3])",
Packit Service 084de1
	"&xor		(@x[$b0],@x[$b0],@x[$c0])",
Packit Service 084de1
	 "&xor		(@x[$b1],@x[$b1],@x[$c1])",
Packit Service 084de1
	  "&xor		(@x[$b2],@x[$b2],@x[$c2])",
Packit Service 084de1
	   "&xor	(@x[$b3],@x[$b3],@x[$c3])",
Packit Service 084de1
	"&rotlwi	(@x[$b0],@x[$b0],12)",
Packit Service 084de1
	 "&rotlwi	(@x[$b1],@x[$b1],12)",
Packit Service 084de1
	  "&rotlwi	(@x[$b2],@x[$b2],12)",
Packit Service 084de1
	   "&rotlwi	(@x[$b3],@x[$b3],12)",
Packit Service 084de1
Packit Service 084de1
	"&add		(@x[$a0],@x[$a0],@x[$b0])",
Packit Service 084de1
	 "&add		(@x[$a1],@x[$a1],@x[$b1])",
Packit Service 084de1
	  "&add		(@x[$a2],@x[$a2],@x[$b2])",
Packit Service 084de1
	   "&add	(@x[$a3],@x[$a3],@x[$b3])",
Packit Service 084de1
	"&xor		(@x[$d0],@x[$d0],@x[$a0])",
Packit Service 084de1
	 "&xor		(@x[$d1],@x[$d1],@x[$a1])",
Packit Service 084de1
	  "&xor		(@x[$d2],@x[$d2],@x[$a2])",
Packit Service 084de1
	   "&xor	(@x[$d3],@x[$d3],@x[$a3])",
Packit Service 084de1
	"&rotlwi	(@x[$d0],@x[$d0],8)",
Packit Service 084de1
	 "&rotlwi	(@x[$d1],@x[$d1],8)",
Packit Service 084de1
	  "&rotlwi	(@x[$d2],@x[$d2],8)",
Packit Service 084de1
	   "&rotlwi	(@x[$d3],@x[$d3],8)",
Packit Service 084de1
Packit Service 084de1
	"&add		(@x[$c0],@x[$c0],@x[$d0])",
Packit Service 084de1
	 "&add		(@x[$c1],@x[$c1],@x[$d1])",
Packit Service 084de1
	  "&add		(@x[$c2],@x[$c2],@x[$d2])",
Packit Service 084de1
	   "&add	(@x[$c3],@x[$c3],@x[$d3])",
Packit Service 084de1
	"&xor		(@x[$b0],@x[$b0],@x[$c0])",
Packit Service 084de1
	 "&xor		(@x[$b1],@x[$b1],@x[$c1])",
Packit Service 084de1
	  "&xor		(@x[$b2],@x[$b2],@x[$c2])",
Packit Service 084de1
	   "&xor	(@x[$b3],@x[$b3],@x[$c3])",
Packit Service 084de1
	"&rotlwi	(@x[$b0],@x[$b0],7)",
Packit Service 084de1
	 "&rotlwi	(@x[$b1],@x[$b1],7)",
Packit Service 084de1
	  "&rotlwi	(@x[$b2],@x[$b2],7)",
Packit Service 084de1
	   "&rotlwi	(@x[$b3],@x[$b3],7)"
Packit Service 084de1
    );
Packit Service 084de1
}
Packit Service 084de1
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
.machine	"any"
Packit Service 084de1
.text
Packit Service 084de1
Packit Service 084de1
.globl	.ChaCha20_ctr32_int
Packit Service 084de1
.align	5
Packit Service 084de1
.ChaCha20_ctr32_int:
Packit Service 084de1
__ChaCha20_ctr32_int:
Packit Service 084de1
	${UCMP}i $len,0
Packit Service 084de1
	beqlr-
Packit Service 084de1
Packit Service 084de1
	$STU	$sp,-$FRAME($sp)
Packit Service 084de1
	mflr	r0
Packit Service 084de1
Packit Service 084de1
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
Packit Service 084de1
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
Packit Service 084de1
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
Packit Service 084de1
	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
Packit Service 084de1
	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
Packit Service 084de1
	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
Packit Service 084de1
	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
Packit Service 084de1
	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
Packit Service 084de1
	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
Packit Service 084de1
	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
Packit Service 084de1
	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
Packit Service 084de1
	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
Packit Service 084de1
	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
Packit Service 084de1
	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
Packit Service 084de1
	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
Packit Service 084de1
	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
Packit Service 084de1
	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
Packit Service 084de1
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
Packit Service 084de1
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
Packit Service 084de1
Packit Service 084de1
	lwz	@d[0],0($ctr)			# load counter
Packit Service 084de1
	lwz	@d[1],4($ctr)
Packit Service 084de1
	lwz	@d[2],8($ctr)
Packit Service 084de1
	lwz	@d[3],12($ctr)
Packit Service 084de1
Packit Service 084de1
	bl	__ChaCha20_1x
Packit Service 084de1
Packit Service 084de1
	$POP	r0,`$FRAME+$LRSAVE`($sp)
Packit Service 084de1
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
Packit Service 084de1
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
Packit Service 084de1
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
Packit Service 084de1
	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
Packit Service 084de1
	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
Packit Service 084de1
	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
Packit Service 084de1
	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
Packit Service 084de1
	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
Packit Service 084de1
	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
Packit Service 084de1
	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
Packit Service 084de1
	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
Packit Service 084de1
	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
Packit Service 084de1
	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
Packit Service 084de1
	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
Packit Service 084de1
	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
Packit Service 084de1
	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
Packit Service 084de1
	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
Packit Service 084de1
	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
Packit Service 084de1
	mtlr	r0
Packit Service 084de1
	addi	$sp,$sp,$FRAME
Packit Service 084de1
	blr
Packit Service 084de1
	.long	0
Packit Service 084de1
	.byte	0,12,4,1,0x80,18,5,0
Packit Service 084de1
	.long	0
Packit Service 084de1
.size	.ChaCha20_ctr32_int,.-.ChaCha20_ctr32_int
Packit Service 084de1
Packit Service 084de1
.align	5
Packit Service 084de1
__ChaCha20_1x:
Packit Service 084de1
Loop_outer:
Packit Service 084de1
	lis	@x[0],0x6170			# synthesize sigma
Packit Service 084de1
	lis	@x[1],0x3320
Packit Service 084de1
	lis	@x[2],0x7962
Packit Service 084de1
	lis	@x[3],0x6b20
Packit Service 084de1
	ori	@x[0],@x[0],0x7865
Packit Service 084de1
	ori	@x[1],@x[1],0x646e
Packit Service 084de1
	ori	@x[2],@x[2],0x2d32
Packit Service 084de1
	ori	@x[3],@x[3],0x6574
Packit Service 084de1
Packit Service 084de1
	li	r0,10				# inner loop counter
Packit Service 084de1
	lwz	@x[4],0($key)			# load key
Packit Service 084de1
	lwz	@x[5],4($key)
Packit Service 084de1
	lwz	@x[6],8($key)
Packit Service 084de1
	lwz	@x[7],12($key)
Packit Service 084de1
	lwz	@x[8],16($key)
Packit Service 084de1
	mr	@x[12],@d[0]			# copy counter
Packit Service 084de1
	lwz	@x[9],20($key)
Packit Service 084de1
	mr	@x[13],@d[1]
Packit Service 084de1
	lwz	@x[10],24($key)
Packit Service 084de1
	mr	@x[14],@d[2]
Packit Service 084de1
	lwz	@x[11],28($key)
Packit Service 084de1
	mr	@x[15],@d[3]
Packit Service 084de1
Packit Service 084de1
	mr	@t[0],@x[4]
Packit Service 084de1
	mr	@t[1],@x[5]
Packit Service 084de1
	mr	@t[2],@x[6]
Packit Service 084de1
	mr	@t[3],@x[7]
Packit Service 084de1
Packit Service 084de1
	mtctr	r0
Packit Service 084de1
Loop:
Packit Service 084de1
___
Packit Service 084de1
	foreach (&ROUND(0, 4, 8,12)) { eval; }
Packit Service 084de1
	foreach (&ROUND(0, 5,10,15)) { eval; }
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	bdnz	Loop
Packit Service 084de1
Packit Service 084de1
	subic	$len,$len,64			# $len-=64
Packit Service 084de1
	addi	@x[0],@x[0],0x7865		# accumulate key block
Packit Service 084de1
	addi	@x[1],@x[1],0x646e
Packit Service 084de1
	addi	@x[2],@x[2],0x2d32
Packit Service 084de1
	addi	@x[3],@x[3],0x6574
Packit Service 084de1
	addis	@x[0],@x[0],0x6170
Packit Service 084de1
	addis	@x[1],@x[1],0x3320
Packit Service 084de1
	addis	@x[2],@x[2],0x7962
Packit Service 084de1
	addis	@x[3],@x[3],0x6b20
Packit Service 084de1
Packit Service 084de1
	subfe.	r0,r0,r0			# borrow?-1:0
Packit Service 084de1
	add	@x[4],@x[4],@t[0]
Packit Service 084de1
	lwz	@t[0],16($key)
Packit Service 084de1
	add	@x[5],@x[5],@t[1]
Packit Service 084de1
	lwz	@t[1],20($key)
Packit Service 084de1
	add	@x[6],@x[6],@t[2]
Packit Service 084de1
	lwz	@t[2],24($key)
Packit Service 084de1
	add	@x[7],@x[7],@t[3]
Packit Service 084de1
	lwz	@t[3],28($key)
Packit Service 084de1
	add	@x[8],@x[8],@t[0]
Packit Service 084de1
	add	@x[9],@x[9],@t[1]
Packit Service 084de1
	add	@x[10],@x[10],@t[2]
Packit Service 084de1
	add	@x[11],@x[11],@t[3]
Packit Service 084de1
Packit Service 084de1
	add	@x[12],@x[12],@d[0]
Packit Service 084de1
	add	@x[13],@x[13],@d[1]
Packit Service 084de1
	add	@x[14],@x[14],@d[2]
Packit Service 084de1
	add	@x[15],@x[15],@d[3]
Packit Service 084de1
	addi	@d[0],@d[0],1			# increment counter
Packit Service 084de1
___
Packit Service 084de1
if (!$LITTLE_ENDIAN) { for($i=0;$i<16;$i++) {	# flip byte order
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	mr	@t[$i&3],@x[$i]
Packit Service 084de1
	rotlwi	@x[$i],@x[$i],8
Packit Service 084de1
	rlwimi	@x[$i],@t[$i&3],24,0,7
Packit Service 084de1
	rlwimi	@x[$i],@t[$i&3],24,16,23
Packit Service 084de1
___
Packit Service 084de1
} }
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	bne	Ltail				# $len-=64 borrowed
Packit Service 084de1
Packit Service 084de1
	lwz	@t[0],0($inp)			# load input, aligned or not
Packit Service 084de1
	lwz	@t[1],4($inp)
Packit Service 084de1
	${UCMP}i $len,0				# done already?
Packit Service 084de1
	lwz	@t[2],8($inp)
Packit Service 084de1
	lwz	@t[3],12($inp)
Packit Service 084de1
	xor	@x[0],@x[0],@t[0]		# xor with input
Packit Service 084de1
	lwz	@t[0],16($inp)
Packit Service 084de1
	xor	@x[1],@x[1],@t[1]
Packit Service 084de1
	lwz	@t[1],20($inp)
Packit Service 084de1
	xor	@x[2],@x[2],@t[2]
Packit Service 084de1
	lwz	@t[2],24($inp)
Packit Service 084de1
	xor	@x[3],@x[3],@t[3]
Packit Service 084de1
	lwz	@t[3],28($inp)
Packit Service 084de1
	xor	@x[4],@x[4],@t[0]
Packit Service 084de1
	lwz	@t[0],32($inp)
Packit Service 084de1
	xor	@x[5],@x[5],@t[1]
Packit Service 084de1
	lwz	@t[1],36($inp)
Packit Service 084de1
	xor	@x[6],@x[6],@t[2]
Packit Service 084de1
	lwz	@t[2],40($inp)
Packit Service 084de1
	xor	@x[7],@x[7],@t[3]
Packit Service 084de1
	lwz	@t[3],44($inp)
Packit Service 084de1
	xor	@x[8],@x[8],@t[0]
Packit Service 084de1
	lwz	@t[0],48($inp)
Packit Service 084de1
	xor	@x[9],@x[9],@t[1]
Packit Service 084de1
	lwz	@t[1],52($inp)
Packit Service 084de1
	xor	@x[10],@x[10],@t[2]
Packit Service 084de1
	lwz	@t[2],56($inp)
Packit Service 084de1
	xor	@x[11],@x[11],@t[3]
Packit Service 084de1
	lwz	@t[3],60($inp)
Packit Service 084de1
	xor	@x[12],@x[12],@t[0]
Packit Service 084de1
	stw	@x[0],0($out)			# store output, aligned or not
Packit Service 084de1
	xor	@x[13],@x[13],@t[1]
Packit Service 084de1
	stw	@x[1],4($out)
Packit Service 084de1
	xor	@x[14],@x[14],@t[2]
Packit Service 084de1
	stw	@x[2],8($out)
Packit Service 084de1
	xor	@x[15],@x[15],@t[3]
Packit Service 084de1
	stw	@x[3],12($out)
Packit Service 084de1
	stw	@x[4],16($out)
Packit Service 084de1
	stw	@x[5],20($out)
Packit Service 084de1
	stw	@x[6],24($out)
Packit Service 084de1
	stw	@x[7],28($out)
Packit Service 084de1
	stw	@x[8],32($out)
Packit Service 084de1
	stw	@x[9],36($out)
Packit Service 084de1
	stw	@x[10],40($out)
Packit Service 084de1
	stw	@x[11],44($out)
Packit Service 084de1
	stw	@x[12],48($out)
Packit Service 084de1
	stw	@x[13],52($out)
Packit Service 084de1
	stw	@x[14],56($out)
Packit Service 084de1
	addi	$inp,$inp,64
Packit Service 084de1
	stw	@x[15],60($out)
Packit Service 084de1
	addi	$out,$out,64
Packit Service 084de1
Packit Service 084de1
	bne	Loop_outer
Packit Service 084de1
Packit Service 084de1
	blr
Packit Service 084de1
Packit Service 084de1
.align	4
Packit Service 084de1
Ltail:
Packit Service 084de1
	addi	$len,$len,64			# restore tail length
Packit Service 084de1
	subi	$inp,$inp,1			# prepare for *++ptr
Packit Service 084de1
	subi	$out,$out,1
Packit Service 084de1
	addi	@t[0],$sp,$LOCALS-1
Packit Service 084de1
	mtctr	$len
Packit Service 084de1
Packit Service 084de1
	stw	@x[0],`$LOCALS+0`($sp)		# save whole block to stack
Packit Service 084de1
	stw	@x[1],`$LOCALS+4`($sp)
Packit Service 084de1
	stw	@x[2],`$LOCALS+8`($sp)
Packit Service 084de1
	stw	@x[3],`$LOCALS+12`($sp)
Packit Service 084de1
	stw	@x[4],`$LOCALS+16`($sp)
Packit Service 084de1
	stw	@x[5],`$LOCALS+20`($sp)
Packit Service 084de1
	stw	@x[6],`$LOCALS+24`($sp)
Packit Service 084de1
	stw	@x[7],`$LOCALS+28`($sp)
Packit Service 084de1
	stw	@x[8],`$LOCALS+32`($sp)
Packit Service 084de1
	stw	@x[9],`$LOCALS+36`($sp)
Packit Service 084de1
	stw	@x[10],`$LOCALS+40`($sp)
Packit Service 084de1
	stw	@x[11],`$LOCALS+44`($sp)
Packit Service 084de1
	stw	@x[12],`$LOCALS+48`($sp)
Packit Service 084de1
	stw	@x[13],`$LOCALS+52`($sp)
Packit Service 084de1
	stw	@x[14],`$LOCALS+56`($sp)
Packit Service 084de1
	stw	@x[15],`$LOCALS+60`($sp)
Packit Service 084de1
Packit Service 084de1
Loop_tail:					# byte-by-byte loop
Packit Service 084de1
	lbzu	@d[0],1($inp)
Packit Service 084de1
	lbzu	@x[0],1(@t[0])
Packit Service 084de1
	xor	@d[1],@d[0],@x[0]
Packit Service 084de1
	stbu	@d[1],1($out)
Packit Service 084de1
	bdnz	Loop_tail
Packit Service 084de1
Packit Service 084de1
	stw	$sp,`$LOCALS+0`($sp)		# wipe block on stack
Packit Service 084de1
	stw	$sp,`$LOCALS+4`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+8`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+12`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+16`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+20`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+24`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+28`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+32`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+36`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+40`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+44`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+48`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+52`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+56`($sp)
Packit Service 084de1
	stw	$sp,`$LOCALS+60`($sp)
Packit Service 084de1
Packit Service 084de1
	blr
Packit Service 084de1
	.long	0
Packit Service 084de1
	.byte	0,12,0x14,0,0,0,0,0
Packit Service 084de1
___
Packit Service 084de1
Packit Service 084de1
{{{
Packit Service 084de1
my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2)
Packit Service 084de1
				= map("v$_",(0..11));
Packit Service 084de1
my @K				= map("v$_",(12..17));
Packit Service 084de1
my ($FOUR,$sixteen,$twenty4)	= map("v$_",(18..19,23));
Packit Service 084de1
my ($inpperm,$outperm,$outmask)	= map("v$_",(24..26));
Packit Service 084de1
my @D				= map("v$_",(27..31));
Packit Service 084de1
my ($twelve,$seven,$T0,$T1) = @D;
Packit Service 084de1
Packit Service 084de1
my $FRAME=$LOCALS+64+10*16+18*$SIZE_T;	# 10*16 is for v23-v31 offload
Packit Service 084de1
Packit Service 084de1
sub VMXROUND {
Packit Service 084de1
my $odd = pop;
Packit Service 084de1
my ($a,$b,$c,$d)=@_;
Packit Service 084de1
Packit Service 084de1
	(
Packit Service 084de1
	"&vadduwm	('$a','$a','$b')",
Packit Service 084de1
	"&vxor		('$d','$d','$a')",
Packit Service 084de1
	"&vperm		('$d','$d','$d','$sixteen')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	('$c','$c','$d')",
Packit Service 084de1
	"&vxor		('$b','$b','$c')",
Packit Service 084de1
	"&vrlw		('$b','$b','$twelve')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	('$a','$a','$b')",
Packit Service 084de1
	"&vxor		('$d','$d','$a')",
Packit Service 084de1
	"&vperm		('$d','$d','$d','$twenty4')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	('$c','$c','$d')",
Packit Service 084de1
	"&vxor		('$b','$b','$c')",
Packit Service 084de1
	"&vrlw		('$b','$b','$seven')",
Packit Service 084de1
Packit Service 084de1
	"&vrldoi	('$c','$c',8)",
Packit Service 084de1
	"&vrldoi	('$b','$b',$odd?4:12)",
Packit Service 084de1
	"&vrldoi	('$d','$d',$odd?12:4)"
Packit Service 084de1
	);
Packit Service 084de1
}
Packit Service 084de1
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
Packit Service 084de1
.globl	.ChaCha20_ctr32_vmx
Packit Service 084de1
.align	5
Packit Service 084de1
.ChaCha20_ctr32_vmx:
Packit Service 084de1
	${UCMP}i $len,256
Packit Service 084de1
	blt	__ChaCha20_ctr32_int
Packit Service 084de1
Packit Service 084de1
	$STU	$sp,-$FRAME($sp)
Packit Service 084de1
	mflr	r0
Packit Service 084de1
	li	r10,`15+$LOCALS+64`
Packit Service 084de1
	li	r11,`31+$LOCALS+64`
Packit Service 084de1
	mfspr	r12,256
Packit Service 084de1
	stvx	v23,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v24,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	stvx	v25,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v26,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	stvx	v27,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v28,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	stvx	v29,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v30,r11,$sp
Packit Service 084de1
	stvx	v31,r10,$sp
Packit Service 084de1
	stw	r12,`$FRAME-$SIZE_T*18-4`($sp)	# save vrsave
Packit Service 084de1
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
Packit Service 084de1
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
Packit Service 084de1
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
Packit Service 084de1
	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
Packit Service 084de1
	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
Packit Service 084de1
	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
Packit Service 084de1
	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
Packit Service 084de1
	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
Packit Service 084de1
	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
Packit Service 084de1
	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
Packit Service 084de1
	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
Packit Service 084de1
	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
Packit Service 084de1
	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
Packit Service 084de1
	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
Packit Service 084de1
	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
Packit Service 084de1
	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
Packit Service 084de1
	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
Packit Service 084de1
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
Packit Service 084de1
	li	r12,-4096+511
Packit Service 084de1
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
Packit Service 084de1
	mtspr	256,r12				# preserve 29 AltiVec registers
Packit Service 084de1
Packit Service 084de1
	bl	Lconsts				# returns pointer Lsigma in r12
Packit Service 084de1
	li	@x[0],16
Packit Service 084de1
	li	@x[1],32
Packit Service 084de1
	li	@x[2],48
Packit Service 084de1
	li	@x[3],64
Packit Service 084de1
	li	@x[4],31			# 31 is not a typo
Packit Service 084de1
	li	@x[5],15			# nor is 15
Packit Service 084de1
Packit Service 084de1
	lvx	@K[1],0,$key			# load key
Packit Service 084de1
	?lvsr	$T0,0,$key			# prepare unaligned load
Packit Service 084de1
	lvx	@K[2],@x[0],$key
Packit Service 084de1
	lvx	@D[0],@x[4],$key
Packit Service 084de1
Packit Service 084de1
	lvx	@K[3],0,$ctr			# load counter
Packit Service 084de1
	?lvsr	$T1,0,$ctr			# prepare unaligned load
Packit Service 084de1
	lvx	@D[1],@x[5],$ctr
Packit Service 084de1
Packit Service 084de1
	lvx	@K[0],0,r12			# load constants
Packit Service 084de1
	lvx	@K[5],@x[0],r12			# one
Packit Service 084de1
	lvx	$FOUR,@x[1],r12
Packit Service 084de1
	lvx	$sixteen,@x[2],r12
Packit Service 084de1
	lvx	$twenty4,@x[3],r12
Packit Service 084de1
Packit Service 084de1
	?vperm	@K[1],@K[2],@K[1],$T0		# align key
Packit Service 084de1
	?vperm	@K[2],@D[0],@K[2],$T0
Packit Service 084de1
	?vperm	@K[3],@D[1],@K[3],$T1		# align counter
Packit Service 084de1
Packit Service 084de1
	lwz	@d[0],0($ctr)			# load counter to GPR
Packit Service 084de1
	lwz	@d[1],4($ctr)
Packit Service 084de1
	vadduwm	@K[3],@K[3],@K[5]		# adjust AltiVec counter
Packit Service 084de1
	lwz	@d[2],8($ctr)
Packit Service 084de1
	vadduwm	@K[4],@K[3],@K[5]
Packit Service 084de1
	lwz	@d[3],12($ctr)
Packit Service 084de1
	vadduwm	@K[5],@K[4],@K[5]
Packit Service 084de1
Packit Service 084de1
	vxor	$T0,$T0,$T0			# 0x00..00
Packit Service 084de1
	vspltisw $outmask,-1			# 0xff..ff
Packit Service 084de1
	?lvsr	$inpperm,0,$inp			# prepare for unaligned load
Packit Service 084de1
	?lvsl	$outperm,0,$out			# prepare for unaligned store
Packit Service 084de1
	?vperm	$outmask,$outmask,$T0,$outperm
Packit Service 084de1
Packit Service 084de1
	be?lvsl	$T0,0,@x[0]			# 0x00..0f
Packit Service 084de1
	be?vspltisb $T1,3			# 0x03..03
Packit Service 084de1
	be?vxor	$T0,$T0,$T1			# swap bytes within words
Packit Service 084de1
	be?vxor	$outperm,$outperm,$T1
Packit Service 084de1
	be?vperm $inpperm,$inpperm,$inpperm,$T0
Packit Service 084de1
Packit Service 084de1
	li	r0,10				# inner loop counter
Packit Service 084de1
	b	Loop_outer_vmx
Packit Service 084de1
Packit Service 084de1
.align	4
Packit Service 084de1
Loop_outer_vmx:
Packit Service 084de1
	lis	@x[0],0x6170			# synthesize sigma
Packit Service 084de1
	lis	@x[1],0x3320
Packit Service 084de1
	 vmr	$A0,@K[0]
Packit Service 084de1
	lis	@x[2],0x7962
Packit Service 084de1
	lis	@x[3],0x6b20
Packit Service 084de1
	 vmr	$A1,@K[0]
Packit Service 084de1
	ori	@x[0],@x[0],0x7865
Packit Service 084de1
	ori	@x[1],@x[1],0x646e
Packit Service 084de1
	 vmr	$A2,@K[0]
Packit Service 084de1
	ori	@x[2],@x[2],0x2d32
Packit Service 084de1
	ori	@x[3],@x[3],0x6574
Packit Service 084de1
	 vmr	$B0,@K[1]
Packit Service 084de1
Packit Service 084de1
	lwz	@x[4],0($key)			# load key to GPR
Packit Service 084de1
	 vmr	$B1,@K[1]
Packit Service 084de1
	lwz	@x[5],4($key)
Packit Service 084de1
	 vmr	$B2,@K[1]
Packit Service 084de1
	lwz	@x[6],8($key)
Packit Service 084de1
	 vmr	$C0,@K[2]
Packit Service 084de1
	lwz	@x[7],12($key)
Packit Service 084de1
	 vmr	$C1,@K[2]
Packit Service 084de1
	lwz	@x[8],16($key)
Packit Service 084de1
	 vmr	$C2,@K[2]
Packit Service 084de1
	mr	@x[12],@d[0]			# copy GPR counter
Packit Service 084de1
	lwz	@x[9],20($key)
Packit Service 084de1
	 vmr	$D0,@K[3]
Packit Service 084de1
	mr	@x[13],@d[1]
Packit Service 084de1
	lwz	@x[10],24($key)
Packit Service 084de1
	 vmr	$D1,@K[4]
Packit Service 084de1
	mr	@x[14],@d[2]
Packit Service 084de1
	lwz	@x[11],28($key)
Packit Service 084de1
	 vmr	$D2,@K[5]
Packit Service 084de1
	mr	@x[15],@d[3]
Packit Service 084de1
Packit Service 084de1
	mr	@t[0],@x[4]
Packit Service 084de1
	mr	@t[1],@x[5]
Packit Service 084de1
	mr	@t[2],@x[6]
Packit Service 084de1
	mr	@t[3],@x[7]
Packit Service 084de1
Packit Service 084de1
	vspltisw $twelve,12			# synthesize constants
Packit Service 084de1
	vspltisw $seven,7
Packit Service 084de1
Packit Service 084de1
	mtctr	r0
Packit Service 084de1
	nop
Packit Service 084de1
Loop_vmx:
Packit Service 084de1
___
Packit Service 084de1
	my @thread0=&VMXROUND($A0,$B0,$C0,$D0,0);
Packit Service 084de1
	my @thread1=&VMXROUND($A1,$B1,$C1,$D1,0);
Packit Service 084de1
	my @thread2=&VMXROUND($A2,$B2,$C2,$D2,0);
Packit Service 084de1
	my @thread3=&ROUND(0,4,8,12);
Packit Service 084de1
Packit Service 084de1
	foreach (@thread0) {
Packit Service 084de1
		eval;
Packit Service 084de1
		eval(shift(@thread1));
Packit Service 084de1
		eval(shift(@thread2));
Packit Service 084de1
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
	}
Packit Service 084de1
	foreach (@thread3) { eval; }
Packit Service 084de1
Packit Service 084de1
	@thread0=&VMXROUND($A0,$B0,$C0,$D0,1);
Packit Service 084de1
	@thread1=&VMXROUND($A1,$B1,$C1,$D1,1);
Packit Service 084de1
	@thread2=&VMXROUND($A2,$B2,$C2,$D2,1);
Packit Service 084de1
	@thread3=&ROUND(0,5,10,15);
Packit Service 084de1
Packit Service 084de1
	foreach (@thread0) {
Packit Service 084de1
		eval;
Packit Service 084de1
		eval(shift(@thread1));
Packit Service 084de1
		eval(shift(@thread2));
Packit Service 084de1
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
		eval(shift(@thread3));
Packit Service 084de1
	}
Packit Service 084de1
	foreach (@thread3) { eval; }
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	bdnz	Loop_vmx
Packit Service 084de1
Packit Service 084de1
	subi	$len,$len,256			# $len-=256
Packit Service 084de1
	addi	@x[0],@x[0],0x7865		# accumulate key block
Packit Service 084de1
	addi	@x[1],@x[1],0x646e
Packit Service 084de1
	addi	@x[2],@x[2],0x2d32
Packit Service 084de1
	addi	@x[3],@x[3],0x6574
Packit Service 084de1
	addis	@x[0],@x[0],0x6170
Packit Service 084de1
	addis	@x[1],@x[1],0x3320
Packit Service 084de1
	addis	@x[2],@x[2],0x7962
Packit Service 084de1
	addis	@x[3],@x[3],0x6b20
Packit Service 084de1
	add	@x[4],@x[4],@t[0]
Packit Service 084de1
	lwz	@t[0],16($key)
Packit Service 084de1
	add	@x[5],@x[5],@t[1]
Packit Service 084de1
	lwz	@t[1],20($key)
Packit Service 084de1
	add	@x[6],@x[6],@t[2]
Packit Service 084de1
	lwz	@t[2],24($key)
Packit Service 084de1
	add	@x[7],@x[7],@t[3]
Packit Service 084de1
	lwz	@t[3],28($key)
Packit Service 084de1
	add	@x[8],@x[8],@t[0]
Packit Service 084de1
	add	@x[9],@x[9],@t[1]
Packit Service 084de1
	add	@x[10],@x[10],@t[2]
Packit Service 084de1
	add	@x[11],@x[11],@t[3]
Packit Service 084de1
	add	@x[12],@x[12],@d[0]
Packit Service 084de1
	add	@x[13],@x[13],@d[1]
Packit Service 084de1
	add	@x[14],@x[14],@d[2]
Packit Service 084de1
	add	@x[15],@x[15],@d[3]
Packit Service 084de1
Packit Service 084de1
	vadduwm	$A0,$A0,@K[0]			# accumulate key block
Packit Service 084de1
	vadduwm	$A1,$A1,@K[0]
Packit Service 084de1
	vadduwm	$A2,$A2,@K[0]
Packit Service 084de1
	vadduwm	$B0,$B0,@K[1]
Packit Service 084de1
	vadduwm	$B1,$B1,@K[1]
Packit Service 084de1
	vadduwm	$B2,$B2,@K[1]
Packit Service 084de1
	vadduwm	$C0,$C0,@K[2]
Packit Service 084de1
	vadduwm	$C1,$C1,@K[2]
Packit Service 084de1
	vadduwm	$C2,$C2,@K[2]
Packit Service 084de1
	vadduwm	$D0,$D0,@K[3]
Packit Service 084de1
	vadduwm	$D1,$D1,@K[4]
Packit Service 084de1
	vadduwm	$D2,$D2,@K[5]
Packit Service 084de1
Packit Service 084de1
	addi	@d[0],@d[0],4			# increment counter
Packit Service 084de1
	vadduwm	@K[3],@K[3],$FOUR
Packit Service 084de1
	vadduwm	@K[4],@K[4],$FOUR
Packit Service 084de1
	vadduwm	@K[5],@K[5],$FOUR
Packit Service 084de1
Packit Service 084de1
___
Packit Service 084de1
if (!$LITTLE_ENDIAN) { for($i=0;$i<16;$i++) {	# flip byte order
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	mr	@t[$i&3],@x[$i]
Packit Service 084de1
	rotlwi	@x[$i],@x[$i],8
Packit Service 084de1
	rlwimi	@x[$i],@t[$i&3],24,0,7
Packit Service 084de1
	rlwimi	@x[$i],@t[$i&3],24,16,23
Packit Service 084de1
___
Packit Service 084de1
} }
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	lwz	@t[0],0($inp)			# load input, aligned or not
Packit Service 084de1
	lwz	@t[1],4($inp)
Packit Service 084de1
	lwz	@t[2],8($inp)
Packit Service 084de1
	lwz	@t[3],12($inp)
Packit Service 084de1
	xor	@x[0],@x[0],@t[0]		# xor with input
Packit Service 084de1
	lwz	@t[0],16($inp)
Packit Service 084de1
	xor	@x[1],@x[1],@t[1]
Packit Service 084de1
	lwz	@t[1],20($inp)
Packit Service 084de1
	xor	@x[2],@x[2],@t[2]
Packit Service 084de1
	lwz	@t[2],24($inp)
Packit Service 084de1
	xor	@x[3],@x[3],@t[3]
Packit Service 084de1
	lwz	@t[3],28($inp)
Packit Service 084de1
	xor	@x[4],@x[4],@t[0]
Packit Service 084de1
	lwz	@t[0],32($inp)
Packit Service 084de1
	xor	@x[5],@x[5],@t[1]
Packit Service 084de1
	lwz	@t[1],36($inp)
Packit Service 084de1
	xor	@x[6],@x[6],@t[2]
Packit Service 084de1
	lwz	@t[2],40($inp)
Packit Service 084de1
	xor	@x[7],@x[7],@t[3]
Packit Service 084de1
	lwz	@t[3],44($inp)
Packit Service 084de1
	xor	@x[8],@x[8],@t[0]
Packit Service 084de1
	lwz	@t[0],48($inp)
Packit Service 084de1
	xor	@x[9],@x[9],@t[1]
Packit Service 084de1
	lwz	@t[1],52($inp)
Packit Service 084de1
	xor	@x[10],@x[10],@t[2]
Packit Service 084de1
	lwz	@t[2],56($inp)
Packit Service 084de1
	xor	@x[11],@x[11],@t[3]
Packit Service 084de1
	lwz	@t[3],60($inp)
Packit Service 084de1
	xor	@x[12],@x[12],@t[0]
Packit Service 084de1
	stw	@x[0],0($out)			# store output, aligned or not
Packit Service 084de1
	xor	@x[13],@x[13],@t[1]
Packit Service 084de1
	stw	@x[1],4($out)
Packit Service 084de1
	xor	@x[14],@x[14],@t[2]
Packit Service 084de1
	stw	@x[2],8($out)
Packit Service 084de1
	xor	@x[15],@x[15],@t[3]
Packit Service 084de1
	stw	@x[3],12($out)
Packit Service 084de1
	addi	$inp,$inp,64
Packit Service 084de1
	stw	@x[4],16($out)
Packit Service 084de1
	li	@t[0],16
Packit Service 084de1
	stw	@x[5],20($out)
Packit Service 084de1
	li	@t[1],32
Packit Service 084de1
	stw	@x[6],24($out)
Packit Service 084de1
	li	@t[2],48
Packit Service 084de1
	stw	@x[7],28($out)
Packit Service 084de1
	li	@t[3],64
Packit Service 084de1
	stw	@x[8],32($out)
Packit Service 084de1
	stw	@x[9],36($out)
Packit Service 084de1
	stw	@x[10],40($out)
Packit Service 084de1
	stw	@x[11],44($out)
Packit Service 084de1
	stw	@x[12],48($out)
Packit Service 084de1
	stw	@x[13],52($out)
Packit Service 084de1
	stw	@x[14],56($out)
Packit Service 084de1
	stw	@x[15],60($out)
Packit Service 084de1
	addi	$out,$out,64
Packit Service 084de1
Packit Service 084de1
	lvx	@D[0],0,$inp			# load input
Packit Service 084de1
	lvx	@D[1],@t[0],$inp
Packit Service 084de1
	lvx	@D[2],@t[1],$inp
Packit Service 084de1
	lvx	@D[3],@t[2],$inp
Packit Service 084de1
	lvx	@D[4],@t[3],$inp
Packit Service 084de1
	addi	$inp,$inp,64
Packit Service 084de1
Packit Service 084de1
	?vperm	@D[0],@D[1],@D[0],$inpperm	# align input
Packit Service 084de1
	?vperm	@D[1],@D[2],@D[1],$inpperm
Packit Service 084de1
	?vperm	@D[2],@D[3],@D[2],$inpperm
Packit Service 084de1
	?vperm	@D[3],@D[4],@D[3],$inpperm
Packit Service 084de1
	vxor	$A0,$A0,@D[0]			# xor with input
Packit Service 084de1
	vxor	$B0,$B0,@D[1]
Packit Service 084de1
	lvx	@D[1],@t[0],$inp		# keep loading input
Packit Service 084de1
	vxor	$C0,$C0,@D[2]
Packit Service 084de1
	lvx	@D[2],@t[1],$inp
Packit Service 084de1
	vxor	$D0,$D0,@D[3]
Packit Service 084de1
	lvx	@D[3],@t[2],$inp
Packit Service 084de1
	lvx	@D[0],@t[3],$inp
Packit Service 084de1
	addi	$inp,$inp,64
Packit Service 084de1
	li	@t[3],63			# 63 is not a typo
Packit Service 084de1
	vperm	$A0,$A0,$A0,$outperm		# pre-misalign output
Packit Service 084de1
	vperm	$B0,$B0,$B0,$outperm
Packit Service 084de1
	vperm	$C0,$C0,$C0,$outperm
Packit Service 084de1
	vperm	$D0,$D0,$D0,$outperm
Packit Service 084de1
Packit Service 084de1
	?vperm	@D[4],@D[1],@D[4],$inpperm	# align input
Packit Service 084de1
	?vperm	@D[1],@D[2],@D[1],$inpperm
Packit Service 084de1
	?vperm	@D[2],@D[3],@D[2],$inpperm
Packit Service 084de1
	?vperm	@D[3],@D[0],@D[3],$inpperm
Packit Service 084de1
	vxor	$A1,$A1,@D[4]
Packit Service 084de1
	vxor	$B1,$B1,@D[1]
Packit Service 084de1
	lvx	@D[1],@t[0],$inp		# keep loading input
Packit Service 084de1
	vxor	$C1,$C1,@D[2]
Packit Service 084de1
	lvx	@D[2],@t[1],$inp
Packit Service 084de1
	vxor	$D1,$D1,@D[3]
Packit Service 084de1
	lvx	@D[3],@t[2],$inp
Packit Service 084de1
	lvx	@D[4],@t[3],$inp		# redundant in aligned case
Packit Service 084de1
	addi	$inp,$inp,64
Packit Service 084de1
	vperm	$A1,$A1,$A1,$outperm		# pre-misalign output
Packit Service 084de1
	vperm	$B1,$B1,$B1,$outperm
Packit Service 084de1
	vperm	$C1,$C1,$C1,$outperm
Packit Service 084de1
	vperm	$D1,$D1,$D1,$outperm
Packit Service 084de1
Packit Service 084de1
	?vperm	@D[0],@D[1],@D[0],$inpperm	# align input
Packit Service 084de1
	?vperm	@D[1],@D[2],@D[1],$inpperm
Packit Service 084de1
	?vperm	@D[2],@D[3],@D[2],$inpperm
Packit Service 084de1
	?vperm	@D[3],@D[4],@D[3],$inpperm
Packit Service 084de1
	vxor	$A2,$A2,@D[0]
Packit Service 084de1
	vxor	$B2,$B2,@D[1]
Packit Service 084de1
	vxor	$C2,$C2,@D[2]
Packit Service 084de1
	vxor	$D2,$D2,@D[3]
Packit Service 084de1
	vperm	$A2,$A2,$A2,$outperm		# pre-misalign output
Packit Service 084de1
	vperm	$B2,$B2,$B2,$outperm
Packit Service 084de1
	vperm	$C2,$C2,$C2,$outperm
Packit Service 084de1
	vperm	$D2,$D2,$D2,$outperm
Packit Service 084de1
Packit Service 084de1
	andi.	@x[1],$out,15			# is $out aligned?
Packit Service 084de1
	mr	@x[0],$out
Packit Service 084de1
Packit Service 084de1
	vsel	@D[0],$A0,$B0,$outmask		# collect pre-misaligned output
Packit Service 084de1
	vsel	@D[1],$B0,$C0,$outmask
Packit Service 084de1
	vsel	@D[2],$C0,$D0,$outmask
Packit Service 084de1
	vsel	@D[3],$D0,$A1,$outmask
Packit Service 084de1
	vsel	$B0,$A1,$B1,$outmask
Packit Service 084de1
	vsel	$C0,$B1,$C1,$outmask
Packit Service 084de1
	vsel	$D0,$C1,$D1,$outmask
Packit Service 084de1
	vsel	$A1,$D1,$A2,$outmask
Packit Service 084de1
	vsel	$B1,$A2,$B2,$outmask
Packit Service 084de1
	vsel	$C1,$B2,$C2,$outmask
Packit Service 084de1
	vsel	$D1,$C2,$D2,$outmask
Packit Service 084de1
Packit Service 084de1
	#stvx	$A0,0,$out			# take it easy on the edges
Packit Service 084de1
	stvx	@D[0],@t[0],$out		# store output
Packit Service 084de1
	stvx	@D[1],@t[1],$out
Packit Service 084de1
	stvx	@D[2],@t[2],$out
Packit Service 084de1
	addi	$out,$out,64
Packit Service 084de1
	stvx	@D[3],0,$out
Packit Service 084de1
	stvx	$B0,@t[0],$out
Packit Service 084de1
	stvx	$C0,@t[1],$out
Packit Service 084de1
	stvx	$D0,@t[2],$out
Packit Service 084de1
	addi	$out,$out,64
Packit Service 084de1
	stvx	$A1,0,$out
Packit Service 084de1
	stvx	$B1,@t[0],$out
Packit Service 084de1
	stvx	$C1,@t[1],$out
Packit Service 084de1
	stvx	$D1,@t[2],$out
Packit Service 084de1
	addi	$out,$out,64
Packit Service 084de1
Packit Service 084de1
	beq	Laligned_vmx
Packit Service 084de1
Packit Service 084de1
	sub	@x[2],$out,@x[1]		# in misaligned case edges
Packit Service 084de1
	li	@x[3],0				# are written byte-by-byte
Packit Service 084de1
Lunaligned_tail_vmx:
Packit Service 084de1
	stvebx	$D2,@x[3],@x[2]
Packit Service 084de1
	addi	@x[3],@x[3],1
Packit Service 084de1
	cmpw	@x[3],@x[1]
Packit Service 084de1
	bne	Lunaligned_tail_vmx
Packit Service 084de1
Packit Service 084de1
	sub	@x[2],@x[0],@x[1]
Packit Service 084de1
Lunaligned_head_vmx:
Packit Service 084de1
	stvebx	$A0,@x[1],@x[2]
Packit Service 084de1
	cmpwi	@x[1],15
Packit Service 084de1
	addi	@x[1],@x[1],1
Packit Service 084de1
	bne	Lunaligned_head_vmx
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,255			# done with 256-byte blocks yet?
Packit Service 084de1
	bgt	Loop_outer_vmx
Packit Service 084de1
Packit Service 084de1
	b	Ldone_vmx
Packit Service 084de1
Packit Service 084de1
.align	4
Packit Service 084de1
Laligned_vmx:
Packit Service 084de1
	stvx	$A0,0,@x[0]			# head hexaword was not stored
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,255			# done with 256-byte blocks yet?
Packit Service 084de1
	bgt	Loop_outer_vmx
Packit Service 084de1
	nop
Packit Service 084de1
Packit Service 084de1
Ldone_vmx:
Packit Service 084de1
	${UCMP}i $len,0				# done yet?
Packit Service 084de1
	bnel	__ChaCha20_1x
Packit Service 084de1
Packit Service 084de1
	lwz	r12,`$FRAME-$SIZE_T*18-4`($sp)	# pull vrsave
Packit Service 084de1
	li	r10,`15+$LOCALS+64`
Packit Service 084de1
	li	r11,`31+$LOCALS+64`
Packit Service 084de1
	mtspr	256,r12				# restore vrsave
Packit Service 084de1
	lvx	v23,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v24,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	lvx	v25,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v26,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	lvx	v27,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v28,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	lvx	v29,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v30,r11,$sp
Packit Service 084de1
	lvx	v31,r10,$sp
Packit Service 084de1
	$POP	r0, `$FRAME+$LRSAVE`($sp)
Packit Service 084de1
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
Packit Service 084de1
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
Packit Service 084de1
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
Packit Service 084de1
	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
Packit Service 084de1
	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
Packit Service 084de1
	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
Packit Service 084de1
	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
Packit Service 084de1
	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
Packit Service 084de1
	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
Packit Service 084de1
	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
Packit Service 084de1
	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
Packit Service 084de1
	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
Packit Service 084de1
	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
Packit Service 084de1
	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
Packit Service 084de1
	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
Packit Service 084de1
	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
Packit Service 084de1
	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
Packit Service 084de1
	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
Packit Service 084de1
	mtlr	r0
Packit Service 084de1
	addi	$sp,$sp,$FRAME
Packit Service 084de1
	blr
Packit Service 084de1
	.long	0
Packit Service 084de1
	.byte	0,12,0x04,1,0x80,18,5,0
Packit Service 084de1
	.long	0
Packit Service 084de1
.size	.ChaCha20_ctr32_vmx,.-.ChaCha20_ctr32_vmx
Packit Service 084de1
___
Packit Service 084de1
}}}
Packit Service 084de1
{{{
Packit Service 084de1
my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3,
Packit Service 084de1
    $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3) = map("v$_",(0..15));
Packit Service 084de1
my @K = map("v$_",(16..19));
Packit Service 084de1
my $CTR = "v26";
Packit Service 084de1
my ($xt0,$xt1,$xt2,$xt3) = map("v$_",(27..30));
Packit Service 084de1
my ($sixteen,$twelve,$eight,$seven) = ($xt0,$xt1,$xt2,$xt3);
Packit Service 084de1
my $beperm = "v31";
Packit Service 084de1
Packit Service 084de1
my ($x00,$x10,$x20,$x30) = (0, map("r$_",(8..10)));
Packit Service 084de1
Packit Service 084de1
my $FRAME=$LOCALS+64+7*16;	# 7*16 is for v26-v31 offload
Packit Service 084de1
Packit Service 084de1
sub VSX_lane_ROUND {
Packit Service 084de1
my ($a0,$b0,$c0,$d0)=@_;
Packit Service 084de1
my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
Packit Service 084de1
my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
Packit Service 084de1
my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
Packit Service 084de1
my @x=map("\"v$_\"",(0..15));
Packit Service 084de1
Packit Service 084de1
	(
Packit Service 084de1
	"&vadduwm	(@x[$a0],@x[$a0],@x[$b0])",	# Q1
Packit Service 084de1
	 "&vadduwm	(@x[$a1],@x[$a1],@x[$b1])",	# Q2
Packit Service 084de1
	  "&vadduwm	(@x[$a2],@x[$a2],@x[$b2])",	# Q3
Packit Service 084de1
	   "&vadduwm	(@x[$a3],@x[$a3],@x[$b3])",	# Q4
Packit Service 084de1
	"&vxor		(@x[$d0],@x[$d0],@x[$a0])",
Packit Service 084de1
	 "&vxor		(@x[$d1],@x[$d1],@x[$a1])",
Packit Service 084de1
	  "&vxor	(@x[$d2],@x[$d2],@x[$a2])",
Packit Service 084de1
	   "&vxor	(@x[$d3],@x[$d3],@x[$a3])",
Packit Service 084de1
	"&vrlw		(@x[$d0],@x[$d0],'$sixteen')",
Packit Service 084de1
	 "&vrlw		(@x[$d1],@x[$d1],'$sixteen')",
Packit Service 084de1
	  "&vrlw	(@x[$d2],@x[$d2],'$sixteen')",
Packit Service 084de1
	   "&vrlw	(@x[$d3],@x[$d3],'$sixteen')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	(@x[$c0],@x[$c0],@x[$d0])",
Packit Service 084de1
	 "&vadduwm	(@x[$c1],@x[$c1],@x[$d1])",
Packit Service 084de1
	  "&vadduwm	(@x[$c2],@x[$c2],@x[$d2])",
Packit Service 084de1
	   "&vadduwm	(@x[$c3],@x[$c3],@x[$d3])",
Packit Service 084de1
	"&vxor		(@x[$b0],@x[$b0],@x[$c0])",
Packit Service 084de1
	 "&vxor		(@x[$b1],@x[$b1],@x[$c1])",
Packit Service 084de1
	  "&vxor	(@x[$b2],@x[$b2],@x[$c2])",
Packit Service 084de1
	   "&vxor	(@x[$b3],@x[$b3],@x[$c3])",
Packit Service 084de1
	"&vrlw		(@x[$b0],@x[$b0],'$twelve')",
Packit Service 084de1
	 "&vrlw		(@x[$b1],@x[$b1],'$twelve')",
Packit Service 084de1
	  "&vrlw	(@x[$b2],@x[$b2],'$twelve')",
Packit Service 084de1
	   "&vrlw	(@x[$b3],@x[$b3],'$twelve')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	(@x[$a0],@x[$a0],@x[$b0])",
Packit Service 084de1
	 "&vadduwm	(@x[$a1],@x[$a1],@x[$b1])",
Packit Service 084de1
	  "&vadduwm	(@x[$a2],@x[$a2],@x[$b2])",
Packit Service 084de1
	   "&vadduwm	(@x[$a3],@x[$a3],@x[$b3])",
Packit Service 084de1
	"&vxor		(@x[$d0],@x[$d0],@x[$a0])",
Packit Service 084de1
	 "&vxor		(@x[$d1],@x[$d1],@x[$a1])",
Packit Service 084de1
	  "&vxor	(@x[$d2],@x[$d2],@x[$a2])",
Packit Service 084de1
	   "&vxor	(@x[$d3],@x[$d3],@x[$a3])",
Packit Service 084de1
	"&vrlw		(@x[$d0],@x[$d0],'$eight')",
Packit Service 084de1
	 "&vrlw		(@x[$d1],@x[$d1],'$eight')",
Packit Service 084de1
	  "&vrlw	(@x[$d2],@x[$d2],'$eight')",
Packit Service 084de1
	   "&vrlw	(@x[$d3],@x[$d3],'$eight')",
Packit Service 084de1
Packit Service 084de1
	"&vadduwm	(@x[$c0],@x[$c0],@x[$d0])",
Packit Service 084de1
	 "&vadduwm	(@x[$c1],@x[$c1],@x[$d1])",
Packit Service 084de1
	  "&vadduwm	(@x[$c2],@x[$c2],@x[$d2])",
Packit Service 084de1
	   "&vadduwm	(@x[$c3],@x[$c3],@x[$d3])",
Packit Service 084de1
	"&vxor		(@x[$b0],@x[$b0],@x[$c0])",
Packit Service 084de1
	 "&vxor		(@x[$b1],@x[$b1],@x[$c1])",
Packit Service 084de1
	  "&vxor	(@x[$b2],@x[$b2],@x[$c2])",
Packit Service 084de1
	   "&vxor	(@x[$b3],@x[$b3],@x[$c3])",
Packit Service 084de1
	"&vrlw		(@x[$b0],@x[$b0],'$seven')",
Packit Service 084de1
	 "&vrlw		(@x[$b1],@x[$b1],'$seven')",
Packit Service 084de1
	  "&vrlw	(@x[$b2],@x[$b2],'$seven')",
Packit Service 084de1
	   "&vrlw	(@x[$b3],@x[$b3],'$seven')"
Packit Service 084de1
	);
Packit Service 084de1
}
Packit Service 084de1
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
Packit Service 084de1
.globl	.ChaCha20_ctr32_vsx
Packit Service 084de1
.align	5
Packit Service 084de1
.ChaCha20_ctr32_vsx:
Packit Service 084de1
	$STU	$sp,-$FRAME($sp)
Packit Service 084de1
	mflr	r0
Packit Service 084de1
	li	r10,`15+$LOCALS+64`
Packit Service 084de1
	li	r11,`31+$LOCALS+64`
Packit Service 084de1
	mfspr	r12,256
Packit Service 084de1
	stvx	v26,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v27,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	stvx	v28,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	stvx	v29,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	stvx	v30,r10,$sp
Packit Service 084de1
	stvx	v31,r11,$sp
Packit Service 084de1
	stw	r12,`$FRAME-4`($sp)		# save vrsave
Packit Service 084de1
	li	r12,-4096+63
Packit Service 084de1
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
Packit Service 084de1
	mtspr	256,r12				# preserve 29 AltiVec registers
Packit Service 084de1
Packit Service 084de1
	bl	Lconsts				# returns pointer Lsigma in r12
Packit Service 084de1
	lvx_4w	@K[0],0,r12			# load sigma
Packit Service 084de1
	addi	r12,r12,0x50
Packit Service 084de1
	li	$x10,16
Packit Service 084de1
	li	$x20,32
Packit Service 084de1
	li	$x30,48
Packit Service 084de1
	li	r11,64
Packit Service 084de1
Packit Service 084de1
	lvx_4w	@K[1],0,$key			# load key
Packit Service 084de1
	lvx_4w	@K[2],$x10,$key
Packit Service 084de1
	lvx_4w	@K[3],0,$ctr			# load counter
Packit Service 084de1
Packit Service 084de1
	vxor	$xt0,$xt0,$xt0
Packit Service 084de1
	lvx_4w	$xt1,r11,r12
Packit Service 084de1
	vspltw	$CTR,@K[3],0
Packit Service 084de1
	vsldoi	@K[3],@K[3],$xt0,4
Packit Service 084de1
	vsldoi	@K[3],$xt0,@K[3],12		# clear @K[3].word[0]
Packit Service 084de1
	vadduwm	$CTR,$CTR,$xt1
Packit Service 084de1
Packit Service 084de1
	be?lvsl	$beperm,0,$x10			# 0x00..0f
Packit Service 084de1
	be?vspltisb $xt0,3			# 0x03..03
Packit Service 084de1
	be?vxor	$beperm,$beperm,$xt0		# swap bytes within words
Packit Service 084de1
Packit Service 084de1
	li	r0,10				# inner loop counter
Packit Service 084de1
	mtctr	r0
Packit Service 084de1
	b	Loop_outer_vsx
Packit Service 084de1
Packit Service 084de1
.align	5
Packit Service 084de1
Loop_outer_vsx:
Packit Service 084de1
	lvx	$xa0,$x00,r12			# load [smashed] sigma
Packit Service 084de1
	lvx	$xa1,$x10,r12
Packit Service 084de1
	lvx	$xa2,$x20,r12
Packit Service 084de1
	lvx	$xa3,$x30,r12
Packit Service 084de1
Packit Service 084de1
	vspltw	$xb0,@K[1],0			# smash the key
Packit Service 084de1
	vspltw	$xb1,@K[1],1
Packit Service 084de1
	vspltw	$xb2,@K[1],2
Packit Service 084de1
	vspltw	$xb3,@K[1],3
Packit Service 084de1
Packit Service 084de1
	vspltw	$xc0,@K[2],0
Packit Service 084de1
	vspltw	$xc1,@K[2],1
Packit Service 084de1
	vspltw	$xc2,@K[2],2
Packit Service 084de1
	vspltw	$xc3,@K[2],3
Packit Service 084de1
Packit Service 084de1
	vmr	$xd0,$CTR			# smash the counter
Packit Service 084de1
	vspltw	$xd1,@K[3],1
Packit Service 084de1
	vspltw	$xd2,@K[3],2
Packit Service 084de1
	vspltw	$xd3,@K[3],3
Packit Service 084de1
Packit Service 084de1
	vspltisw $sixteen,-16			# synthesize constants
Packit Service 084de1
	vspltisw $twelve,12
Packit Service 084de1
	vspltisw $eight,8
Packit Service 084de1
	vspltisw $seven,7
Packit Service 084de1
Packit Service 084de1
Loop_vsx:
Packit Service 084de1
___
Packit Service 084de1
	foreach (&VSX_lane_ROUND(0, 4, 8,12)) { eval; }
Packit Service 084de1
	foreach (&VSX_lane_ROUND(0, 5,10,15)) { eval; }
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	bdnz	Loop_vsx
Packit Service 084de1
Packit Service 084de1
	vadduwm	$xd0,$xd0,$CTR
Packit Service 084de1
Packit Service 084de1
	vmrgew	$xt0,$xa0,$xa1			# transpose data
Packit Service 084de1
	vmrgew	$xt1,$xa2,$xa3
Packit Service 084de1
	vmrgow	$xa0,$xa0,$xa1
Packit Service 084de1
	vmrgow	$xa2,$xa2,$xa3
Packit Service 084de1
	 vmrgew	$xt2,$xb0,$xb1
Packit Service 084de1
	 vmrgew	$xt3,$xb2,$xb3
Packit Service 084de1
	vpermdi	$xa1,$xa0,$xa2,0b00
Packit Service 084de1
	vpermdi	$xa3,$xa0,$xa2,0b11
Packit Service 084de1
	vpermdi	$xa0,$xt0,$xt1,0b00
Packit Service 084de1
	vpermdi	$xa2,$xt0,$xt1,0b11
Packit Service 084de1
Packit Service 084de1
	vmrgow	$xb0,$xb0,$xb1
Packit Service 084de1
	vmrgow	$xb2,$xb2,$xb3
Packit Service 084de1
	 vmrgew	$xt0,$xc0,$xc1
Packit Service 084de1
	 vmrgew	$xt1,$xc2,$xc3
Packit Service 084de1
	vpermdi	$xb1,$xb0,$xb2,0b00
Packit Service 084de1
	vpermdi	$xb3,$xb0,$xb2,0b11
Packit Service 084de1
	vpermdi	$xb0,$xt2,$xt3,0b00
Packit Service 084de1
	vpermdi	$xb2,$xt2,$xt3,0b11
Packit Service 084de1
Packit Service 084de1
	vmrgow	$xc0,$xc0,$xc1
Packit Service 084de1
	vmrgow	$xc2,$xc2,$xc3
Packit Service 084de1
	 vmrgew	$xt2,$xd0,$xd1
Packit Service 084de1
	 vmrgew	$xt3,$xd2,$xd3
Packit Service 084de1
	vpermdi	$xc1,$xc0,$xc2,0b00
Packit Service 084de1
	vpermdi	$xc3,$xc0,$xc2,0b11
Packit Service 084de1
	vpermdi	$xc0,$xt0,$xt1,0b00
Packit Service 084de1
	vpermdi	$xc2,$xt0,$xt1,0b11
Packit Service 084de1
Packit Service 084de1
	vmrgow	$xd0,$xd0,$xd1
Packit Service 084de1
	vmrgow	$xd2,$xd2,$xd3
Packit Service 084de1
	 vspltisw $xt0,4
Packit Service 084de1
	 vadduwm  $CTR,$CTR,$xt0		# next counter value
Packit Service 084de1
	vpermdi	$xd1,$xd0,$xd2,0b00
Packit Service 084de1
	vpermdi	$xd3,$xd0,$xd2,0b11
Packit Service 084de1
	vpermdi	$xd0,$xt2,$xt3,0b00
Packit Service 084de1
	vpermdi	$xd2,$xt2,$xt3,0b11
Packit Service 084de1
Packit Service 084de1
	vadduwm	$xa0,$xa0,@K[0]
Packit Service 084de1
	vadduwm	$xb0,$xb0,@K[1]
Packit Service 084de1
	vadduwm	$xc0,$xc0,@K[2]
Packit Service 084de1
	vadduwm	$xd0,$xd0,@K[3]
Packit Service 084de1
Packit Service 084de1
	be?vperm $xa0,$xa0,$xa0,$beperm
Packit Service 084de1
	be?vperm $xb0,$xb0,$xb0,$beperm
Packit Service 084de1
	be?vperm $xc0,$xc0,$xc0,$beperm
Packit Service 084de1
	be?vperm $xd0,$xd0,$xd0,$beperm
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,0x40
Packit Service 084de1
	blt	Ltail_vsx
Packit Service 084de1
Packit Service 084de1
	lvx_4w	$xt0,$x00,$inp
Packit Service 084de1
	lvx_4w	$xt1,$x10,$inp
Packit Service 084de1
	lvx_4w	$xt2,$x20,$inp
Packit Service 084de1
	lvx_4w	$xt3,$x30,$inp
Packit Service 084de1
Packit Service 084de1
	vxor	$xt0,$xt0,$xa0
Packit Service 084de1
	vxor	$xt1,$xt1,$xb0
Packit Service 084de1
	vxor	$xt2,$xt2,$xc0
Packit Service 084de1
	vxor	$xt3,$xt3,$xd0
Packit Service 084de1
Packit Service 084de1
	stvx_4w	$xt0,$x00,$out
Packit Service 084de1
	stvx_4w	$xt1,$x10,$out
Packit Service 084de1
	addi	$inp,$inp,0x40
Packit Service 084de1
	stvx_4w	$xt2,$x20,$out
Packit Service 084de1
	subi	$len,$len,0x40
Packit Service 084de1
	stvx_4w	$xt3,$x30,$out
Packit Service 084de1
	addi	$out,$out,0x40
Packit Service 084de1
	beq	Ldone_vsx
Packit Service 084de1
Packit Service 084de1
	vadduwm	$xa0,$xa1,@K[0]
Packit Service 084de1
	vadduwm	$xb0,$xb1,@K[1]
Packit Service 084de1
	vadduwm	$xc0,$xc1,@K[2]
Packit Service 084de1
	vadduwm	$xd0,$xd1,@K[3]
Packit Service 084de1
Packit Service 084de1
	be?vperm $xa0,$xa0,$xa0,$beperm
Packit Service 084de1
	be?vperm $xb0,$xb0,$xb0,$beperm
Packit Service 084de1
	be?vperm $xc0,$xc0,$xc0,$beperm
Packit Service 084de1
	be?vperm $xd0,$xd0,$xd0,$beperm
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,0x40
Packit Service 084de1
	blt	Ltail_vsx
Packit Service 084de1
Packit Service 084de1
	lvx_4w	$xt0,$x00,$inp
Packit Service 084de1
	lvx_4w	$xt1,$x10,$inp
Packit Service 084de1
	lvx_4w	$xt2,$x20,$inp
Packit Service 084de1
	lvx_4w	$xt3,$x30,$inp
Packit Service 084de1
Packit Service 084de1
	vxor	$xt0,$xt0,$xa0
Packit Service 084de1
	vxor	$xt1,$xt1,$xb0
Packit Service 084de1
	vxor	$xt2,$xt2,$xc0
Packit Service 084de1
	vxor	$xt3,$xt3,$xd0
Packit Service 084de1
Packit Service 084de1
	stvx_4w	$xt0,$x00,$out
Packit Service 084de1
	stvx_4w	$xt1,$x10,$out
Packit Service 084de1
	addi	$inp,$inp,0x40
Packit Service 084de1
	stvx_4w	$xt2,$x20,$out
Packit Service 084de1
	subi	$len,$len,0x40
Packit Service 084de1
	stvx_4w	$xt3,$x30,$out
Packit Service 084de1
	addi	$out,$out,0x40
Packit Service 084de1
	beq	Ldone_vsx
Packit Service 084de1
Packit Service 084de1
	vadduwm	$xa0,$xa2,@K[0]
Packit Service 084de1
	vadduwm	$xb0,$xb2,@K[1]
Packit Service 084de1
	vadduwm	$xc0,$xc2,@K[2]
Packit Service 084de1
	vadduwm	$xd0,$xd2,@K[3]
Packit Service 084de1
Packit Service 084de1
	be?vperm $xa0,$xa0,$xa0,$beperm
Packit Service 084de1
	be?vperm $xb0,$xb0,$xb0,$beperm
Packit Service 084de1
	be?vperm $xc0,$xc0,$xc0,$beperm
Packit Service 084de1
	be?vperm $xd0,$xd0,$xd0,$beperm
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,0x40
Packit Service 084de1
	blt	Ltail_vsx
Packit Service 084de1
Packit Service 084de1
	lvx_4w	$xt0,$x00,$inp
Packit Service 084de1
	lvx_4w	$xt1,$x10,$inp
Packit Service 084de1
	lvx_4w	$xt2,$x20,$inp
Packit Service 084de1
	lvx_4w	$xt3,$x30,$inp
Packit Service 084de1
Packit Service 084de1
	vxor	$xt0,$xt0,$xa0
Packit Service 084de1
	vxor	$xt1,$xt1,$xb0
Packit Service 084de1
	vxor	$xt2,$xt2,$xc0
Packit Service 084de1
	vxor	$xt3,$xt3,$xd0
Packit Service 084de1
Packit Service 084de1
	stvx_4w	$xt0,$x00,$out
Packit Service 084de1
	stvx_4w	$xt1,$x10,$out
Packit Service 084de1
	addi	$inp,$inp,0x40
Packit Service 084de1
	stvx_4w	$xt2,$x20,$out
Packit Service 084de1
	subi	$len,$len,0x40
Packit Service 084de1
	stvx_4w	$xt3,$x30,$out
Packit Service 084de1
	addi	$out,$out,0x40
Packit Service 084de1
	beq	Ldone_vsx
Packit Service 084de1
Packit Service 084de1
	vadduwm	$xa0,$xa3,@K[0]
Packit Service 084de1
	vadduwm	$xb0,$xb3,@K[1]
Packit Service 084de1
	vadduwm	$xc0,$xc3,@K[2]
Packit Service 084de1
	vadduwm	$xd0,$xd3,@K[3]
Packit Service 084de1
Packit Service 084de1
	be?vperm $xa0,$xa0,$xa0,$beperm
Packit Service 084de1
	be?vperm $xb0,$xb0,$xb0,$beperm
Packit Service 084de1
	be?vperm $xc0,$xc0,$xc0,$beperm
Packit Service 084de1
	be?vperm $xd0,$xd0,$xd0,$beperm
Packit Service 084de1
Packit Service 084de1
	${UCMP}i $len,0x40
Packit Service 084de1
	blt	Ltail_vsx
Packit Service 084de1
Packit Service 084de1
	lvx_4w	$xt0,$x00,$inp
Packit Service 084de1
	lvx_4w	$xt1,$x10,$inp
Packit Service 084de1
	lvx_4w	$xt2,$x20,$inp
Packit Service 084de1
	lvx_4w	$xt3,$x30,$inp
Packit Service 084de1
Packit Service 084de1
	vxor	$xt0,$xt0,$xa0
Packit Service 084de1
	vxor	$xt1,$xt1,$xb0
Packit Service 084de1
	vxor	$xt2,$xt2,$xc0
Packit Service 084de1
	vxor	$xt3,$xt3,$xd0
Packit Service 084de1
Packit Service 084de1
	stvx_4w	$xt0,$x00,$out
Packit Service 084de1
	stvx_4w	$xt1,$x10,$out
Packit Service 084de1
	addi	$inp,$inp,0x40
Packit Service 084de1
	stvx_4w	$xt2,$x20,$out
Packit Service 084de1
	subi	$len,$len,0x40
Packit Service 084de1
	stvx_4w	$xt3,$x30,$out
Packit Service 084de1
	addi	$out,$out,0x40
Packit Service 084de1
	mtctr	r0
Packit Service 084de1
	bne	Loop_outer_vsx
Packit Service 084de1
Packit Service 084de1
Ldone_vsx:
Packit Service 084de1
	lwz	r12,`$FRAME-4`($sp)		# pull vrsave
Packit Service 084de1
	li	r10,`15+$LOCALS+64`
Packit Service 084de1
	li	r11,`31+$LOCALS+64`
Packit Service 084de1
	$POP	r0, `$FRAME+$LRSAVE`($sp)
Packit Service 084de1
	mtspr	256,r12				# restore vrsave
Packit Service 084de1
	lvx	v26,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v27,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	lvx	v28,r10,$sp
Packit Service 084de1
	addi	r10,r10,32
Packit Service 084de1
	lvx	v29,r11,$sp
Packit Service 084de1
	addi	r11,r11,32
Packit Service 084de1
	lvx	v30,r10,$sp
Packit Service 084de1
	lvx	v31,r11,$sp
Packit Service 084de1
	mtlr	r0
Packit Service 084de1
	addi	$sp,$sp,$FRAME
Packit Service 084de1
	blr
Packit Service 084de1
Packit Service 084de1
.align	4
Packit Service 084de1
Ltail_vsx:
Packit Service 084de1
	addi	r11,$sp,$LOCALS
Packit Service 084de1
	mtctr	$len
Packit Service 084de1
	stvx_4w	$xa0,$x00,r11			# offload block to stack
Packit Service 084de1
	stvx_4w	$xb0,$x10,r11
Packit Service 084de1
	stvx_4w	$xc0,$x20,r11
Packit Service 084de1
	stvx_4w	$xd0,$x30,r11
Packit Service 084de1
	subi	r12,r11,1			# prepare for *++ptr
Packit Service 084de1
	subi	$inp,$inp,1
Packit Service 084de1
	subi	$out,$out,1
Packit Service 084de1
Packit Service 084de1
Loop_tail_vsx:
Packit Service 084de1
	lbzu	r6,1(r12)
Packit Service 084de1
	lbzu	r7,1($inp)
Packit Service 084de1
	xor	r6,r6,r7
Packit Service 084de1
	stbu	r6,1($out)
Packit Service 084de1
	bdnz	Loop_tail_vsx
Packit Service 084de1
Packit Service 084de1
	stvx_4w	$K[0],$x00,r11			# wipe copy of the block
Packit Service 084de1
	stvx_4w	$K[0],$x10,r11
Packit Service 084de1
	stvx_4w	$K[0],$x20,r11
Packit Service 084de1
	stvx_4w	$K[0],$x30,r11
Packit Service 084de1
Packit Service 084de1
	b	Ldone_vsx
Packit Service 084de1
	.long	0
Packit Service 084de1
	.byte	0,12,0x04,1,0x80,0,5,0
Packit Service 084de1
	.long	0
Packit Service 084de1
.size	.ChaCha20_ctr32_vsx,.-.ChaCha20_ctr32_vsx
Packit Service 084de1
___
Packit Service 084de1
}}}
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
.align	5
Packit Service 084de1
Lconsts:
Packit Service 084de1
	mflr	r0
Packit Service 084de1
	bcl	20,31,\$+4
Packit Service 084de1
	mflr	r12	#vvvvv "distance between . and Lsigma
Packit Service 084de1
	addi	r12,r12,`64-8`
Packit Service 084de1
	mtlr	r0
Packit Service 084de1
	blr
Packit Service 084de1
	.long	0
Packit Service 084de1
	.byte	0,12,0x14,0,0,0,0,0
Packit Service 084de1
	.space	`64-9*4`
Packit Service 084de1
Lsigma:
Packit Service 084de1
	.long   0x61707865,0x3320646e,0x79622d32,0x6b206574
Packit Service 084de1
	.long	1,0,0,0
Packit Service 084de1
	.long	4,0,0,0
Packit Service 084de1
___
Packit Service 084de1
$code.=<<___ 	if ($LITTLE_ENDIAN);
Packit Service 084de1
	.long	0x0e0f0c0d,0x0a0b0809,0x06070405,0x02030001
Packit Service 084de1
	.long	0x0d0e0f0c,0x090a0b08,0x05060704,0x01020300
Packit Service 084de1
___
Packit Service 084de1
$code.=<<___ 	if (!$LITTLE_ENDIAN);	# flipped words
Packit Service 084de1
	.long	0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d
Packit Service 084de1
	.long	0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c
Packit Service 084de1
___
Packit Service 084de1
$code.=<<___;
Packit Service 084de1
	.long	0x61707865,0x61707865,0x61707865,0x61707865
Packit Service 084de1
	.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
Packit Service 084de1
	.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
Packit Service 084de1
	.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
Packit Service 084de1
	.long	0,1,2,3
Packit Service 084de1
.asciz  "ChaCha20 for PowerPC/AltiVec, CRYPTOGAMS by <appro\@openssl.org>"
Packit Service 084de1
.align	2
Packit Service 084de1
___
Packit Service 084de1
Packit Service 084de1
foreach (split("\n",$code)) {
Packit Service 084de1
	s/\`([^\`]*)\`/eval $1/ge;
Packit Service 084de1
Packit Service 084de1
	# instructions prefixed with '?' are endian-specific and need
Packit Service 084de1
	# to be adjusted accordingly...
Packit Service 084de1
	if ($flavour !~ /le$/) {	# big-endian
Packit Service 084de1
	    s/be\?//		or
Packit Service 084de1
	    s/le\?/#le#/	or
Packit Service 084de1
	    s/\?lvsr/lvsl/	or
Packit Service 084de1
	    s/\?lvsl/lvsr/	or
Packit Service 084de1
	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/ or
Packit Service 084de1
	    s/vrldoi(\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9]+)/vsldoi$1$2$2 16-$3/;
Packit Service 084de1
	} else {			# little-endian
Packit Service 084de1
	    s/le\?//		or
Packit Service 084de1
	    s/be\?/#be#/	or
Packit Service 084de1
	    s/\?([a-z]+)/$1/	or
Packit Service 084de1
	    s/vrldoi(\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9]+)/vsldoi$1$2$2 $3/;
Packit Service 084de1
	}
Packit Service 084de1
Packit Service 084de1
	print $_,"\n";
Packit Service 084de1
}
Packit Service 084de1
Packit Service 084de1
close STDOUT or die "error closing STDOUT: $!";