Blame crypto/bn/asm/s390x.S

Packit c4476c
.ident "s390x.S, version 1.1"
Packit c4476c
// ====================================================================
Packit c4476c
// Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
//
Packit c4476c
// Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
// this file except in compliance with the License.  You can obtain a copy
Packit c4476c
// in the file LICENSE in the source distribution or at
Packit c4476c
// https://www.openssl.org/source/license.html
Packit c4476c
// ====================================================================
Packit c4476c
Packit c4476c
.text
Packit c4476c
Packit c4476c
#define zero	%r0
Packit c4476c
Packit c4476c
// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
Packit c4476c
.globl	bn_mul_add_words
Packit c4476c
.type	bn_mul_add_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_mul_add_words:
Packit c4476c
	lghi	zero,0		// zero = 0
Packit c4476c
	la	%r1,0(%r2)	// put rp aside [to give way to]
Packit c4476c
	lghi	%r2,0		// return value
Packit c4476c
	ltgfr	%r4,%r4
Packit c4476c
	bler	%r14		// if (len<=0) return 0;
Packit c4476c
Packit c4476c
	stmg	%r6,%r13,48(%r15)
Packit c4476c
	lghi	%r2,3
Packit c4476c
	lghi	%r12,0		// carry = 0
Packit c4476c
	slgr	%r1,%r3		// rp-=ap
Packit c4476c
	nr	%r2,%r4		// len%4
Packit c4476c
	sra	%r4,2		// cnt=len/4
Packit c4476c
	jz	.Loop1_madd	// carry is incidentally cleared if branch taken
Packit c4476c
	algr	zero,zero	// clear carry
Packit c4476c
Packit c4476c
	lg	%r7,0(%r3)	// ap[0]
Packit c4476c
	lg	%r9,8(%r3)	// ap[1]
Packit c4476c
	mlgr	%r6,%r5		// *=w
Packit c4476c
	brct	%r4,.Loop4_madd
Packit c4476c
	j	.Loop4_madd_tail
Packit c4476c
Packit c4476c
.Loop4_madd:
Packit c4476c
	mlgr	%r8,%r5
Packit c4476c
	lg	%r11,16(%r3)	// ap[i+2]
Packit c4476c
	alcgr	%r7,%r12	// +=carry
Packit c4476c
	alcgr	%r6,zero
Packit c4476c
	alg	%r7,0(%r3,%r1)	// +=rp[i]
Packit c4476c
	stg	%r7,0(%r3,%r1)	// rp[i]=
Packit c4476c
Packit c4476c
	mlgr	%r10,%r5
Packit c4476c
	lg	%r13,24(%r3)
Packit c4476c
	alcgr	%r9,%r6
Packit c4476c
	alcgr	%r8,zero
Packit c4476c
	alg	%r9,8(%r3,%r1)
Packit c4476c
	stg	%r9,8(%r3,%r1)
Packit c4476c
Packit c4476c
	mlgr	%r12,%r5
Packit c4476c
	lg	%r7,32(%r3)
Packit c4476c
	alcgr	%r11,%r8
Packit c4476c
	alcgr	%r10,zero
Packit c4476c
	alg	%r11,16(%r3,%r1)
Packit c4476c
	stg	%r11,16(%r3,%r1)
Packit c4476c
Packit c4476c
	mlgr	%r6,%r5
Packit c4476c
	lg	%r9,40(%r3)
Packit c4476c
	alcgr	%r13,%r10
Packit c4476c
	alcgr	%r12,zero
Packit c4476c
	alg	%r13,24(%r3,%r1)
Packit c4476c
	stg	%r13,24(%r3,%r1)
Packit c4476c
Packit c4476c
	la	%r3,32(%r3)	// i+=4
Packit c4476c
	brct	%r4,.Loop4_madd
Packit c4476c
Packit c4476c
.Loop4_madd_tail:
Packit c4476c
	mlgr	%r8,%r5
Packit c4476c
	lg	%r11,16(%r3)
Packit c4476c
	alcgr	%r7,%r12	// +=carry
Packit c4476c
	alcgr	%r6,zero
Packit c4476c
	alg	%r7,0(%r3,%r1)	// +=rp[i]
Packit c4476c
	stg	%r7,0(%r3,%r1)	// rp[i]=
Packit c4476c
Packit c4476c
	mlgr	%r10,%r5
Packit c4476c
	lg	%r13,24(%r3)
Packit c4476c
	alcgr	%r9,%r6
Packit c4476c
	alcgr	%r8,zero
Packit c4476c
	alg	%r9,8(%r3,%r1)
Packit c4476c
	stg	%r9,8(%r3,%r1)
Packit c4476c
Packit c4476c
	mlgr	%r12,%r5
Packit c4476c
	alcgr	%r11,%r8
Packit c4476c
	alcgr	%r10,zero
Packit c4476c
	alg	%r11,16(%r3,%r1)
Packit c4476c
	stg	%r11,16(%r3,%r1)
Packit c4476c
Packit c4476c
	alcgr	%r13,%r10
Packit c4476c
	alcgr	%r12,zero
Packit c4476c
	alg	%r13,24(%r3,%r1)
Packit c4476c
	stg	%r13,24(%r3,%r1)
Packit c4476c
Packit c4476c
	la	%r3,32(%r3)	// i+=4
Packit c4476c
Packit c4476c
	la	%r2,1(%r2)	// see if len%4 is zero ...
Packit c4476c
	brct	%r2,.Loop1_madd	// without touching condition code:-)
Packit c4476c
Packit c4476c
.Lend_madd:
Packit c4476c
	lgr	%r2,zero	// return value
Packit c4476c
	alcgr	%r2,%r12	// collect even carry bit
Packit c4476c
	lmg	%r6,%r13,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
Packit c4476c
.Loop1_madd:
Packit c4476c
	lg	%r7,0(%r3)	// ap[i]
Packit c4476c
	mlgr	%r6,%r5		// *=w
Packit c4476c
	alcgr	%r7,%r12	// +=carry
Packit c4476c
	alcgr	%r6,zero
Packit c4476c
	alg	%r7,0(%r3,%r1)	// +=rp[i]
Packit c4476c
	stg	%r7,0(%r3,%r1)	// rp[i]=
Packit c4476c
Packit c4476c
	lgr	%r12,%r6
Packit c4476c
	la	%r3,8(%r3)	// i++
Packit c4476c
	brct	%r2,.Loop1_madd
Packit c4476c
Packit c4476c
	j	.Lend_madd
Packit c4476c
.size	bn_mul_add_words,.-bn_mul_add_words
Packit c4476c
Packit c4476c
// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
Packit c4476c
.globl	bn_mul_words
Packit c4476c
.type	bn_mul_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_mul_words:
Packit c4476c
	lghi	zero,0		// zero = 0
Packit c4476c
	la	%r1,0(%r2)	// put rp aside
Packit c4476c
	lghi	%r2,0		// i=0;
Packit c4476c
	ltgfr	%r4,%r4
Packit c4476c
	bler	%r14		// if (len<=0) return 0;
Packit c4476c
Packit c4476c
	stmg	%r6,%r10,48(%r15)
Packit c4476c
	lghi	%r10,3
Packit c4476c
	lghi	%r8,0		// carry = 0
Packit c4476c
	nr	%r10,%r4	// len%4
Packit c4476c
	sra	%r4,2		// cnt=len/4
Packit c4476c
	jz	.Loop1_mul	// carry is incidentally cleared if branch taken
Packit c4476c
	algr	zero,zero	// clear carry
Packit c4476c
Packit c4476c
.Loop4_mul:
Packit c4476c
	lg	%r7,0(%r2,%r3)	// ap[i]
Packit c4476c
	mlgr	%r6,%r5		// *=w
Packit c4476c
	alcgr	%r7,%r8		// +=carry
Packit c4476c
	stg	%r7,0(%r2,%r1)	// rp[i]=
Packit c4476c
Packit c4476c
	lg	%r9,8(%r2,%r3)
Packit c4476c
	mlgr	%r8,%r5
Packit c4476c
	alcgr	%r9,%r6
Packit c4476c
	stg	%r9,8(%r2,%r1)
Packit c4476c
Packit c4476c
	lg	%r7,16(%r2,%r3)
Packit c4476c
	mlgr	%r6,%r5
Packit c4476c
	alcgr	%r7,%r8
Packit c4476c
	stg	%r7,16(%r2,%r1)
Packit c4476c
Packit c4476c
	lg	%r9,24(%r2,%r3)
Packit c4476c
	mlgr	%r8,%r5
Packit c4476c
	alcgr	%r9,%r6
Packit c4476c
	stg	%r9,24(%r2,%r1)
Packit c4476c
Packit c4476c
	la	%r2,32(%r2)	// i+=4
Packit c4476c
	brct	%r4,.Loop4_mul
Packit c4476c
Packit c4476c
	la	%r10,1(%r10)		// see if len%4 is zero ...
Packit c4476c
	brct	%r10,.Loop1_mul		// without touching condition code:-)
Packit c4476c
Packit c4476c
.Lend_mul:
Packit c4476c
	alcgr	%r8,zero	// collect carry bit
Packit c4476c
	lgr	%r2,%r8
Packit c4476c
	lmg	%r6,%r10,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
Packit c4476c
.Loop1_mul:
Packit c4476c
	lg	%r7,0(%r2,%r3)	// ap[i]
Packit c4476c
	mlgr	%r6,%r5		// *=w
Packit c4476c
	alcgr	%r7,%r8		// +=carry
Packit c4476c
	stg	%r7,0(%r2,%r1)	// rp[i]=
Packit c4476c
Packit c4476c
	lgr	%r8,%r6
Packit c4476c
	la	%r2,8(%r2)	// i++
Packit c4476c
	brct	%r10,.Loop1_mul
Packit c4476c
Packit c4476c
	j	.Lend_mul
Packit c4476c
.size	bn_mul_words,.-bn_mul_words
Packit c4476c
Packit c4476c
// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
Packit c4476c
.globl	bn_sqr_words
Packit c4476c
.type	bn_sqr_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_sqr_words:
Packit c4476c
	ltgfr	%r4,%r4
Packit c4476c
	bler	%r14
Packit c4476c
Packit c4476c
	stmg	%r6,%r7,48(%r15)
Packit c4476c
	srag	%r1,%r4,2	// cnt=len/4
Packit c4476c
	jz	.Loop1_sqr
Packit c4476c
Packit c4476c
.Loop4_sqr:
Packit c4476c
	lg	%r7,0(%r3)
Packit c4476c
	mlgr	%r6,%r7
Packit c4476c
	stg	%r7,0(%r2)
Packit c4476c
	stg	%r6,8(%r2)
Packit c4476c
Packit c4476c
	lg	%r7,8(%r3)
Packit c4476c
	mlgr	%r6,%r7
Packit c4476c
	stg	%r7,16(%r2)
Packit c4476c
	stg	%r6,24(%r2)
Packit c4476c
Packit c4476c
	lg	%r7,16(%r3)
Packit c4476c
	mlgr	%r6,%r7
Packit c4476c
	stg	%r7,32(%r2)
Packit c4476c
	stg	%r6,40(%r2)
Packit c4476c
Packit c4476c
	lg	%r7,24(%r3)
Packit c4476c
	mlgr	%r6,%r7
Packit c4476c
	stg	%r7,48(%r2)
Packit c4476c
	stg	%r6,56(%r2)
Packit c4476c
Packit c4476c
	la	%r3,32(%r3)
Packit c4476c
	la	%r2,64(%r2)
Packit c4476c
	brct	%r1,.Loop4_sqr
Packit c4476c
Packit c4476c
	lghi	%r1,3
Packit c4476c
	nr	%r4,%r1		// cnt=len%4
Packit c4476c
	jz	.Lend_sqr
Packit c4476c
Packit c4476c
.Loop1_sqr:
Packit c4476c
	lg	%r7,0(%r3)
Packit c4476c
	mlgr	%r6,%r7
Packit c4476c
	stg	%r7,0(%r2)
Packit c4476c
	stg	%r6,8(%r2)
Packit c4476c
Packit c4476c
	la	%r3,8(%r3)
Packit c4476c
	la	%r2,16(%r2)
Packit c4476c
	brct	%r4,.Loop1_sqr
Packit c4476c
Packit c4476c
.Lend_sqr:
Packit c4476c
	lmg	%r6,%r7,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_sqr_words,.-bn_sqr_words
Packit c4476c
Packit c4476c
// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
Packit c4476c
.globl	bn_div_words
Packit c4476c
.type	bn_div_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_div_words:
Packit c4476c
	dlgr	%r2,%r4
Packit c4476c
	lgr	%r2,%r3
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_div_words,.-bn_div_words
Packit c4476c
Packit c4476c
// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
Packit c4476c
.globl	bn_add_words
Packit c4476c
.type	bn_add_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_add_words:
Packit c4476c
	la	%r1,0(%r2)	// put rp aside
Packit c4476c
	lghi	%r2,0		// i=0
Packit c4476c
	ltgfr	%r5,%r5
Packit c4476c
	bler	%r14		// if (len<=0) return 0;
Packit c4476c
Packit c4476c
	stg	%r6,48(%r15)
Packit c4476c
	lghi	%r6,3
Packit c4476c
	nr	%r6,%r5		// len%4
Packit c4476c
	sra	%r5,2		// len/4, use sra because it sets condition code
Packit c4476c
	jz	.Loop1_add	// carry is incidentally cleared if branch taken
Packit c4476c
	algr	%r2,%r2		// clear carry
Packit c4476c
Packit c4476c
.Loop4_add:
Packit c4476c
	lg	%r0,0(%r2,%r3)
Packit c4476c
	alcg	%r0,0(%r2,%r4)
Packit c4476c
	stg	%r0,0(%r2,%r1)
Packit c4476c
	lg	%r0,8(%r2,%r3)
Packit c4476c
	alcg	%r0,8(%r2,%r4)
Packit c4476c
	stg	%r0,8(%r2,%r1)
Packit c4476c
	lg	%r0,16(%r2,%r3)
Packit c4476c
	alcg	%r0,16(%r2,%r4)
Packit c4476c
	stg	%r0,16(%r2,%r1)
Packit c4476c
	lg	%r0,24(%r2,%r3)
Packit c4476c
	alcg	%r0,24(%r2,%r4)
Packit c4476c
	stg	%r0,24(%r2,%r1)
Packit c4476c
Packit c4476c
	la	%r2,32(%r2)	// i+=4
Packit c4476c
	brct	%r5,.Loop4_add
Packit c4476c
Packit c4476c
	la	%r6,1(%r6)	// see if len%4 is zero ...
Packit c4476c
	brct	%r6,.Loop1_add	// without touching condition code:-)
Packit c4476c
Packit c4476c
.Lexit_add:
Packit c4476c
	lghi	%r2,0
Packit c4476c
	alcgr	%r2,%r2
Packit c4476c
	lg	%r6,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
Packit c4476c
.Loop1_add:
Packit c4476c
	lg	%r0,0(%r2,%r3)
Packit c4476c
	alcg	%r0,0(%r2,%r4)
Packit c4476c
	stg	%r0,0(%r2,%r1)
Packit c4476c
Packit c4476c
	la	%r2,8(%r2)	// i++
Packit c4476c
	brct	%r6,.Loop1_add
Packit c4476c
Packit c4476c
	j	.Lexit_add
Packit c4476c
.size	bn_add_words,.-bn_add_words
Packit c4476c
Packit c4476c
// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
Packit c4476c
.globl	bn_sub_words
Packit c4476c
.type	bn_sub_words,@function
Packit c4476c
.align	4
Packit c4476c
bn_sub_words:
Packit c4476c
	la	%r1,0(%r2)	// put rp aside
Packit c4476c
	lghi	%r2,0		// i=0
Packit c4476c
	ltgfr	%r5,%r5
Packit c4476c
	bler	%r14		// if (len<=0) return 0;
Packit c4476c
Packit c4476c
	stg	%r6,48(%r15)
Packit c4476c
	lghi	%r6,3
Packit c4476c
	nr	%r6,%r5		// len%4
Packit c4476c
	sra	%r5,2		// len/4, use sra because it sets condition code
Packit c4476c
	jnz	.Loop4_sub	// borrow is incidentally cleared if branch taken
Packit c4476c
	slgr	%r2,%r2		// clear borrow
Packit c4476c
Packit c4476c
.Loop1_sub:
Packit c4476c
	lg	%r0,0(%r2,%r3)
Packit c4476c
	slbg	%r0,0(%r2,%r4)
Packit c4476c
	stg	%r0,0(%r2,%r1)
Packit c4476c
Packit c4476c
	la	%r2,8(%r2)	// i++
Packit c4476c
	brct	%r6,.Loop1_sub
Packit c4476c
	j	.Lexit_sub
Packit c4476c
Packit c4476c
.Loop4_sub:
Packit c4476c
	lg	%r0,0(%r2,%r3)
Packit c4476c
	slbg	%r0,0(%r2,%r4)
Packit c4476c
	stg	%r0,0(%r2,%r1)
Packit c4476c
	lg	%r0,8(%r2,%r3)
Packit c4476c
	slbg	%r0,8(%r2,%r4)
Packit c4476c
	stg	%r0,8(%r2,%r1)
Packit c4476c
	lg	%r0,16(%r2,%r3)
Packit c4476c
	slbg	%r0,16(%r2,%r4)
Packit c4476c
	stg	%r0,16(%r2,%r1)
Packit c4476c
	lg	%r0,24(%r2,%r3)
Packit c4476c
	slbg	%r0,24(%r2,%r4)
Packit c4476c
	stg	%r0,24(%r2,%r1)
Packit c4476c
Packit c4476c
	la	%r2,32(%r2)	// i+=4
Packit c4476c
	brct	%r5,.Loop4_sub
Packit c4476c
Packit c4476c
	la	%r6,1(%r6)	// see if len%4 is zero ...
Packit c4476c
	brct	%r6,.Loop1_sub	// without touching condition code:-)
Packit c4476c
Packit c4476c
.Lexit_sub:
Packit c4476c
	lghi	%r2,0
Packit c4476c
	slbgr	%r2,%r2
Packit c4476c
	lcgr	%r2,%r2
Packit c4476c
	lg	%r6,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_sub_words,.-bn_sub_words
Packit c4476c
Packit c4476c
#define c1	%r1
Packit c4476c
#define c2	%r5
Packit c4476c
#define c3	%r8
Packit c4476c
Packit c4476c
#define mul_add_c(ai,bi,c1,c2,c3)	\
Packit c4476c
	lg	%r7,ai*8(%r3);		\
Packit c4476c
	mlg	%r6,bi*8(%r4);		\
Packit c4476c
	algr	c1,%r7;			\
Packit c4476c
	alcgr	c2,%r6;			\
Packit c4476c
	alcgr	c3,zero
Packit c4476c
Packit c4476c
// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
Packit c4476c
.globl	bn_mul_comba8
Packit c4476c
.type	bn_mul_comba8,@function
Packit c4476c
.align	4
Packit c4476c
bn_mul_comba8:
Packit c4476c
	stmg	%r6,%r8,48(%r15)
Packit c4476c
Packit c4476c
	lghi	c1,0
Packit c4476c
	lghi	c2,0
Packit c4476c
	lghi	c3,0
Packit c4476c
	lghi	zero,0
Packit c4476c
Packit c4476c
	mul_add_c(0,0,c1,c2,c3);
Packit c4476c
	stg	c1,0*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(0,1,c2,c3,c1);
Packit c4476c
	mul_add_c(1,0,c2,c3,c1);
Packit c4476c
	stg	c2,1*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(2,0,c3,c1,c2);
Packit c4476c
	mul_add_c(1,1,c3,c1,c2);
Packit c4476c
	mul_add_c(0,2,c3,c1,c2);
Packit c4476c
	stg	c3,2*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(0,3,c1,c2,c3);
Packit c4476c
	mul_add_c(1,2,c1,c2,c3);
Packit c4476c
	mul_add_c(2,1,c1,c2,c3);
Packit c4476c
	mul_add_c(3,0,c1,c2,c3);
Packit c4476c
	stg	c1,3*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(4,0,c2,c3,c1);
Packit c4476c
	mul_add_c(3,1,c2,c3,c1);
Packit c4476c
	mul_add_c(2,2,c2,c3,c1);
Packit c4476c
	mul_add_c(1,3,c2,c3,c1);
Packit c4476c
	mul_add_c(0,4,c2,c3,c1);
Packit c4476c
	stg	c2,4*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(0,5,c3,c1,c2);
Packit c4476c
	mul_add_c(1,4,c3,c1,c2);
Packit c4476c
	mul_add_c(2,3,c3,c1,c2);
Packit c4476c
	mul_add_c(3,2,c3,c1,c2);
Packit c4476c
	mul_add_c(4,1,c3,c1,c2);
Packit c4476c
	mul_add_c(5,0,c3,c1,c2);
Packit c4476c
	stg	c3,5*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(6,0,c1,c2,c3);
Packit c4476c
	mul_add_c(5,1,c1,c2,c3);
Packit c4476c
	mul_add_c(4,2,c1,c2,c3);
Packit c4476c
	mul_add_c(3,3,c1,c2,c3);
Packit c4476c
	mul_add_c(2,4,c1,c2,c3);
Packit c4476c
	mul_add_c(1,5,c1,c2,c3);
Packit c4476c
	mul_add_c(0,6,c1,c2,c3);
Packit c4476c
	stg	c1,6*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(0,7,c2,c3,c1);
Packit c4476c
	mul_add_c(1,6,c2,c3,c1);
Packit c4476c
	mul_add_c(2,5,c2,c3,c1);
Packit c4476c
	mul_add_c(3,4,c2,c3,c1);
Packit c4476c
	mul_add_c(4,3,c2,c3,c1);
Packit c4476c
	mul_add_c(5,2,c2,c3,c1);
Packit c4476c
	mul_add_c(6,1,c2,c3,c1);
Packit c4476c
	mul_add_c(7,0,c2,c3,c1);
Packit c4476c
	stg	c2,7*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(7,1,c3,c1,c2);
Packit c4476c
	mul_add_c(6,2,c3,c1,c2);
Packit c4476c
	mul_add_c(5,3,c3,c1,c2);
Packit c4476c
	mul_add_c(4,4,c3,c1,c2);
Packit c4476c
	mul_add_c(3,5,c3,c1,c2);
Packit c4476c
	mul_add_c(2,6,c3,c1,c2);
Packit c4476c
	mul_add_c(1,7,c3,c1,c2);
Packit c4476c
	stg	c3,8*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(2,7,c1,c2,c3);
Packit c4476c
	mul_add_c(3,6,c1,c2,c3);
Packit c4476c
	mul_add_c(4,5,c1,c2,c3);
Packit c4476c
	mul_add_c(5,4,c1,c2,c3);
Packit c4476c
	mul_add_c(6,3,c1,c2,c3);
Packit c4476c
	mul_add_c(7,2,c1,c2,c3);
Packit c4476c
	stg	c1,9*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(7,3,c2,c3,c1);
Packit c4476c
	mul_add_c(6,4,c2,c3,c1);
Packit c4476c
	mul_add_c(5,5,c2,c3,c1);
Packit c4476c
	mul_add_c(4,6,c2,c3,c1);
Packit c4476c
	mul_add_c(3,7,c2,c3,c1);
Packit c4476c
	stg	c2,10*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(4,7,c3,c1,c2);
Packit c4476c
	mul_add_c(5,6,c3,c1,c2);
Packit c4476c
	mul_add_c(6,5,c3,c1,c2);
Packit c4476c
	mul_add_c(7,4,c3,c1,c2);
Packit c4476c
	stg	c3,11*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(7,5,c1,c2,c3);
Packit c4476c
	mul_add_c(6,6,c1,c2,c3);
Packit c4476c
	mul_add_c(5,7,c1,c2,c3);
Packit c4476c
	stg	c1,12*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
Packit c4476c
	mul_add_c(6,7,c2,c3,c1);
Packit c4476c
	mul_add_c(7,6,c2,c3,c1);
Packit c4476c
	stg	c2,13*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(7,7,c3,c1,c2);
Packit c4476c
	stg	c3,14*8(%r2)
Packit c4476c
	stg	c1,15*8(%r2)
Packit c4476c
Packit c4476c
	lmg	%r6,%r8,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_mul_comba8,.-bn_mul_comba8
Packit c4476c
Packit c4476c
// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
Packit c4476c
.globl	bn_mul_comba4
Packit c4476c
.type	bn_mul_comba4,@function
Packit c4476c
.align	4
Packit c4476c
bn_mul_comba4:
Packit c4476c
	stmg	%r6,%r8,48(%r15)
Packit c4476c
Packit c4476c
	lghi	c1,0
Packit c4476c
	lghi	c2,0
Packit c4476c
	lghi	c3,0
Packit c4476c
	lghi	zero,0
Packit c4476c
Packit c4476c
	mul_add_c(0,0,c1,c2,c3);
Packit c4476c
	stg	c1,0*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(0,1,c2,c3,c1);
Packit c4476c
	mul_add_c(1,0,c2,c3,c1);
Packit c4476c
	stg	c2,1*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(2,0,c3,c1,c2);
Packit c4476c
	mul_add_c(1,1,c3,c1,c2);
Packit c4476c
	mul_add_c(0,2,c3,c1,c2);
Packit c4476c
	stg	c3,2*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(0,3,c1,c2,c3);
Packit c4476c
	mul_add_c(1,2,c1,c2,c3);
Packit c4476c
	mul_add_c(2,1,c1,c2,c3);
Packit c4476c
	mul_add_c(3,0,c1,c2,c3);
Packit c4476c
	stg	c1,3*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	mul_add_c(3,1,c2,c3,c1);
Packit c4476c
	mul_add_c(2,2,c2,c3,c1);
Packit c4476c
	mul_add_c(1,3,c2,c3,c1);
Packit c4476c
	stg	c2,4*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	mul_add_c(2,3,c3,c1,c2);
Packit c4476c
	mul_add_c(3,2,c3,c1,c2);
Packit c4476c
	stg	c3,5*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	mul_add_c(3,3,c1,c2,c3);
Packit c4476c
	stg	c1,6*8(%r2)
Packit c4476c
	stg	c2,7*8(%r2)
Packit c4476c
Packit c4476c
	stmg	%r6,%r8,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_mul_comba4,.-bn_mul_comba4
Packit c4476c
Packit c4476c
#define sqr_add_c(ai,c1,c2,c3)		\
Packit c4476c
	lg	%r7,ai*8(%r3);		\
Packit c4476c
	mlgr	%r6,%r7;		\
Packit c4476c
	algr	c1,%r7;			\
Packit c4476c
	alcgr	c2,%r6;			\
Packit c4476c
	alcgr	c3,zero
Packit c4476c
Packit c4476c
#define sqr_add_c2(ai,aj,c1,c2,c3)	\
Packit c4476c
	lg	%r7,ai*8(%r3);		\
Packit c4476c
	mlg	%r6,aj*8(%r3);		\
Packit c4476c
	algr	c1,%r7;			\
Packit c4476c
	alcgr	c2,%r6;			\
Packit c4476c
	alcgr	c3,zero;		\
Packit c4476c
	algr	c1,%r7;			\
Packit c4476c
	alcgr	c2,%r6;			\
Packit c4476c
	alcgr	c3,zero
Packit c4476c
Packit c4476c
// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
Packit c4476c
.globl	bn_sqr_comba8
Packit c4476c
.type	bn_sqr_comba8,@function
Packit c4476c
.align	4
Packit c4476c
bn_sqr_comba8:
Packit c4476c
	stmg	%r6,%r8,48(%r15)
Packit c4476c
Packit c4476c
	lghi	c1,0
Packit c4476c
	lghi	c2,0
Packit c4476c
	lghi	c3,0
Packit c4476c
	lghi	zero,0
Packit c4476c
Packit c4476c
	sqr_add_c(0,c1,c2,c3);
Packit c4476c
	stg	c1,0*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c2(1,0,c2,c3,c1);
Packit c4476c
	stg	c2,1*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c(1,c3,c1,c2);
Packit c4476c
	sqr_add_c2(2,0,c3,c1,c2);
Packit c4476c
	stg	c3,2*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c2(3,0,c1,c2,c3);
Packit c4476c
	sqr_add_c2(2,1,c1,c2,c3);
Packit c4476c
	stg	c1,3*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c(2,c2,c3,c1);
Packit c4476c
	sqr_add_c2(3,1,c2,c3,c1);
Packit c4476c
	sqr_add_c2(4,0,c2,c3,c1);
Packit c4476c
	stg	c2,4*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c2(5,0,c3,c1,c2);
Packit c4476c
	sqr_add_c2(4,1,c3,c1,c2);
Packit c4476c
	sqr_add_c2(3,2,c3,c1,c2);
Packit c4476c
	stg	c3,5*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c(3,c1,c2,c3);
Packit c4476c
	sqr_add_c2(4,2,c1,c2,c3);
Packit c4476c
	sqr_add_c2(5,1,c1,c2,c3);
Packit c4476c
	sqr_add_c2(6,0,c1,c2,c3);
Packit c4476c
	stg	c1,6*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c2(7,0,c2,c3,c1);
Packit c4476c
	sqr_add_c2(6,1,c2,c3,c1);
Packit c4476c
	sqr_add_c2(5,2,c2,c3,c1);
Packit c4476c
	sqr_add_c2(4,3,c2,c3,c1);
Packit c4476c
	stg	c2,7*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c(4,c3,c1,c2);
Packit c4476c
	sqr_add_c2(5,3,c3,c1,c2);
Packit c4476c
	sqr_add_c2(6,2,c3,c1,c2);
Packit c4476c
	sqr_add_c2(7,1,c3,c1,c2);
Packit c4476c
	stg	c3,8*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c2(7,2,c1,c2,c3);
Packit c4476c
	sqr_add_c2(6,3,c1,c2,c3);
Packit c4476c
	sqr_add_c2(5,4,c1,c2,c3);
Packit c4476c
	stg	c1,9*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c(5,c2,c3,c1);
Packit c4476c
	sqr_add_c2(6,4,c2,c3,c1);
Packit c4476c
	sqr_add_c2(7,3,c2,c3,c1);
Packit c4476c
	stg	c2,10*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c2(7,4,c3,c1,c2);
Packit c4476c
	sqr_add_c2(6,5,c3,c1,c2);
Packit c4476c
	stg	c3,11*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c(6,c1,c2,c3);
Packit c4476c
	sqr_add_c2(7,5,c1,c2,c3);
Packit c4476c
	stg	c1,12*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c2(7,6,c2,c3,c1);
Packit c4476c
	stg	c2,13*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c(7,c3,c1,c2);
Packit c4476c
	stg	c3,14*8(%r2)
Packit c4476c
	stg	c1,15*8(%r2)
Packit c4476c
Packit c4476c
	lmg	%r6,%r8,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_sqr_comba8,.-bn_sqr_comba8
Packit c4476c
Packit c4476c
// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
Packit c4476c
.globl bn_sqr_comba4
Packit c4476c
.type	bn_sqr_comba4,@function
Packit c4476c
.align	4
Packit c4476c
bn_sqr_comba4:
Packit c4476c
	stmg	%r6,%r8,48(%r15)
Packit c4476c
Packit c4476c
	lghi	c1,0
Packit c4476c
	lghi	c2,0
Packit c4476c
	lghi	c3,0
Packit c4476c
	lghi	zero,0
Packit c4476c
Packit c4476c
	sqr_add_c(0,c1,c2,c3);
Packit c4476c
	stg	c1,0*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c2(1,0,c2,c3,c1);
Packit c4476c
	stg	c2,1*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c(1,c3,c1,c2);
Packit c4476c
	sqr_add_c2(2,0,c3,c1,c2);
Packit c4476c
	stg	c3,2*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c2(3,0,c1,c2,c3);
Packit c4476c
	sqr_add_c2(2,1,c1,c2,c3);
Packit c4476c
	stg	c1,3*8(%r2)
Packit c4476c
	lghi	c1,0
Packit c4476c
Packit c4476c
	sqr_add_c(2,c2,c3,c1);
Packit c4476c
	sqr_add_c2(3,1,c2,c3,c1);
Packit c4476c
	stg	c2,4*8(%r2)
Packit c4476c
	lghi	c2,0
Packit c4476c
Packit c4476c
	sqr_add_c2(3,2,c3,c1,c2);
Packit c4476c
	stg	c3,5*8(%r2)
Packit c4476c
	lghi	c3,0
Packit c4476c
Packit c4476c
	sqr_add_c(3,c1,c2,c3);
Packit c4476c
	stg	c1,6*8(%r2)
Packit c4476c
	stg	c2,7*8(%r2)
Packit c4476c
Packit c4476c
	lmg	%r6,%r8,48(%r15)
Packit c4476c
	br	%r14
Packit c4476c
.size	bn_sqr_comba4,.-bn_sqr_comba4