Blame crypto/sha/asm/keccak1600-avx512vl.pl

Packit c4476c
#!/usr/bin/env perl
Packit c4476c
# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
#
Packit c4476c
# ====================================================================
Packit c4476c
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
Packit c4476c
# project. The module is, however, dual licensed under OpenSSL and
Packit c4476c
# CRYPTOGAMS licenses depending on where you obtain it. For further
Packit c4476c
# details see http://www.openssl.org/~appro/cryptogams/.
Packit c4476c
# ====================================================================
Packit c4476c
#
Packit c4476c
# Keccak-1600 for AVX512VL.
Packit c4476c
#
Packit c4476c
# December 2017.
Packit c4476c
#
Packit c4476c
# This is an adaptation of AVX2 module that reuses register data
Packit c4476c
# layout, but utilizes new 256-bit AVX512VL instructions. See AVX2
Packit c4476c
# module for further information on layout.
Packit c4476c
#
Packit c4476c
########################################################################
Packit c4476c
# Numbers are cycles per processed byte out of large message.
Packit c4476c
#
Packit c4476c
#			r=1088(*)
Packit c4476c
#
Packit c4476c
# Skylake-X		6.4/+47%
Packit c4476c
#
Packit c4476c
# (*)	Corresponds to SHA3-256. Percentage after slash is improvement
Packit c4476c
#	coefficient in comparison to scalar keccak1600-x86_64.pl.
Packit c4476c
Packit c4476c
# Digits in variables' names denote right-most coordinates:
Packit c4476c
Packit c4476c
my ($A00,	# [0][0] [0][0] [0][0] [0][0]		# %ymm0
Packit c4476c
    $A01,	# [0][4] [0][3] [0][2] [0][1]		# %ymm1
Packit c4476c
    $A20,	# [3][0] [1][0] [4][0] [2][0]		# %ymm2
Packit c4476c
    $A31,	# [2][4] [4][3] [1][2] [3][1]		# %ymm3
Packit c4476c
    $A21,	# [3][4] [1][3] [4][2] [2][1]		# %ymm4
Packit c4476c
    $A41,	# [1][4] [2][3] [3][2] [4][1]		# %ymm5
Packit c4476c
    $A11) =	# [4][4] [3][3] [2][2] [1][1]		# %ymm6
Packit c4476c
    map("%ymm$_",(0..6));
Packit c4476c
Packit c4476c
# We also need to map the magic order into offsets within structure:
Packit c4476c
Packit c4476c
my @A_jagged = ([0,0], [1,0], [1,1], [1,2], [1,3],	# [0][0..4]
Packit c4476c
		[2,2], [6,0], [3,1], [4,2], [5,3],	# [1][0..4]
Packit c4476c
		[2,0], [4,0], [6,1], [5,2], [3,3],	# [2][0..4]
Packit c4476c
		[2,3], [3,0], [5,1], [6,2], [4,3],	# [3][0..4]
Packit c4476c
		[2,1], [5,0], [4,1], [3,2], [6,3]);	# [4][0..4]
Packit c4476c
   @A_jagged = map(8*($$_[0]*4+$$_[1]), @A_jagged);	# ... and now linear
Packit c4476c
Packit c4476c
my @T = map("%ymm$_",(7..15));
Packit c4476c
my ($C14,$C00,$D00,$D14) = @T[5..8];
Packit c4476c
my ($R20,$R01,$R31,$R21,$R41,$R11) = map("%ymm$_",(16..21));
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.text
Packit c4476c
Packit c4476c
.type	__KeccakF1600,\@function
Packit c4476c
.align	32
Packit c4476c
__KeccakF1600:
Packit c4476c
	lea		iotas(%rip),%r10
Packit c4476c
	mov		\$24,%eax
Packit c4476c
	jmp		.Loop_avx512vl
Packit c4476c
Packit c4476c
.align	32
Packit c4476c
.Loop_avx512vl:
Packit c4476c
	######################################### Theta
Packit c4476c
	vpshufd		\$0b01001110,$A20,$C00
Packit c4476c
	vpxor		$A31,$A41,$C14
Packit c4476c
	vpxor		$A11,$A21,@T[2]
Packit c4476c
	vpternlogq	\$0x96,$A01,$T[2],$C14	# C[1..4]
Packit c4476c
Packit c4476c
	vpxor		$A20,$C00,$C00
Packit c4476c
	vpermq		\$0b01001110,$C00,@T[0]
Packit c4476c
Packit c4476c
	vpermq		\$0b10010011,$C14,@T[4]
Packit c4476c
	vprolq		\$1,$C14,@T[1]		# ROL64(C[1..4],1)
Packit c4476c
Packit c4476c
	vpermq		\$0b00111001,@T[1],$D14
Packit c4476c
	vpxor		@T[4],@T[1],$D00
Packit c4476c
	vpermq		\$0b00000000,$D00,$D00	# D[0..0] = ROL64(C[1],1) ^ C[4]
Packit c4476c
Packit c4476c
	vpternlogq	\$0x96,@T[0],$A00,$C00	# C[0..0]
Packit c4476c
	vprolq		\$1,$C00,@T[1]		# ROL64(C[0..0],1)
Packit c4476c
Packit c4476c
	vpxor		$D00,$A00,$A00		# ^= D[0..0]
Packit c4476c
Packit c4476c
	vpblendd	\$0b11000000,@T[1],$D14,$D14
Packit c4476c
	vpblendd	\$0b00000011,$C00,@T[4],@T[0]
Packit c4476c
Packit c4476c
	######################################### Rho + Pi + pre-Chi shuffle
Packit c4476c
	 vpxor		$D00,$A20,$A20		# ^= D[0..0] from Theta
Packit c4476c
	vprolvq		$R20,$A20,$A20
Packit c4476c
Packit c4476c
	 vpternlogq	\$0x96,@T[0],$D14,$A31	# ^= D[1..4] from Theta
Packit c4476c
	vprolvq		$R31,$A31,$A31
Packit c4476c
Packit c4476c
	 vpternlogq	\$0x96,@T[0],$D14,$A21	# ^= D[1..4] from Theta
Packit c4476c
	vprolvq		$R21,$A21,$A21
Packit c4476c
Packit c4476c
	 vpternlogq	\$0x96,@T[0],$D14,$A41	# ^= D[1..4] from Theta
Packit c4476c
	vprolvq		$R41,$A41,$A41
Packit c4476c
Packit c4476c
	 vpermq		\$0b10001101,$A20,@T[3]	# $A20 -> future $A31
Packit c4476c
	 vpermq		\$0b10001101,$A31,@T[4]	# $A31 -> future $A21
Packit c4476c
	 vpternlogq	\$0x96,@T[0],$D14,$A11	# ^= D[1..4] from Theta
Packit c4476c
	vprolvq		$R11,$A11,@T[1]		# $A11 -> future $A01
Packit c4476c
Packit c4476c
	 vpermq		\$0b00011011,$A21,@T[5]	# $A21 -> future $A41
Packit c4476c
	 vpermq		\$0b01110010,$A41,@T[6]	# $A41 -> future $A11
Packit c4476c
	 vpternlogq	\$0x96,@T[0],$D14,$A01	# ^= D[1..4] from Theta
Packit c4476c
	vprolvq		$R01,$A01,@T[2]		# $A01 -> future $A20
Packit c4476c
Packit c4476c
	######################################### Chi
Packit c4476c
	vpblendd	\$0b00001100,@T[6],@T[2],$A31	#               [4][4] [2][0]
Packit c4476c
	vpblendd	\$0b00001100,@T[2],@T[4],@T[8]	#               [4][0] [2][1]
Packit c4476c
	 vpblendd	\$0b00001100,@T[4],@T[3],$A41	#               [4][2] [2][4]
Packit c4476c
	 vpblendd	\$0b00001100,@T[3],@T[2],@T[7]	#               [4][3] [2][0]
Packit c4476c
	vpblendd	\$0b00110000,@T[4],$A31,$A31	#        [1][3] [4][4] [2][0]
Packit c4476c
	vpblendd	\$0b00110000,@T[5],@T[8],@T[8]	#        [1][4] [4][0] [2][1]
Packit c4476c
	 vpblendd	\$0b00110000,@T[2],$A41,$A41	#        [1][0] [4][2] [2][4]
Packit c4476c
	 vpblendd	\$0b00110000,@T[6],@T[7],@T[7]	#        [1][1] [4][3] [2][0]
Packit c4476c
	vpblendd	\$0b11000000,@T[5],$A31,$A31	# [3][2] [1][3] [4][4] [2][0]
Packit c4476c
	vpblendd	\$0b11000000,@T[6],@T[8],@T[8]	# [3][3] [1][4] [4][0] [2][1]
Packit c4476c
	 vpblendd	\$0b11000000,@T[6],$A41,$A41	# [3][3] [1][0] [4][2] [2][4]
Packit c4476c
	 vpblendd	\$0b11000000,@T[4],@T[7],@T[7]	# [3][4] [1][1] [4][3] [2][0]
Packit c4476c
	vpternlogq	\$0xC6,@T[8],@T[3],$A31		# [3][1] [1][2] [4][3] [2][4]
Packit c4476c
	 vpternlogq	\$0xC6,@T[7],@T[5],$A41		# [3][2] [1][4] [4][1] [2][3]
Packit c4476c
Packit c4476c
	vpsrldq		\$8,@T[1],@T[0]
Packit c4476c
	vpandn		@T[0],@T[1],@T[0]	# tgting  [0][0] [0][0] [0][0] [0][0]
Packit c4476c
Packit c4476c
	vpblendd	\$0b00001100,@T[2],@T[5],$A11	#               [4][0] [2][3]
Packit c4476c
	vpblendd	\$0b00001100,@T[5],@T[3],@T[8]	#               [4][1] [2][4]
Packit c4476c
	vpblendd	\$0b00110000,@T[3],$A11,$A11	#        [1][2] [4][0] [2][3]
Packit c4476c
	vpblendd	\$0b00110000,@T[4],@T[8],@T[8]	#        [1][3] [4][1] [2][4]
Packit c4476c
	vpblendd	\$0b11000000,@T[4],$A11,$A11	# [3][4] [1][2] [4][0] [2][3]
Packit c4476c
	vpblendd	\$0b11000000,@T[2],@T[8],@T[8]	# [3][0] [1][3] [4][1] [2][4]
Packit c4476c
	vpternlogq	\$0xC6,@T[8],@T[6],$A11		# [3][3] [1][1] [4][4] [2][2]
Packit c4476c
Packit c4476c
	  vpermq	\$0b00011110,@T[1],$A21		# [0][1] [0][2] [0][4] [0][3]
Packit c4476c
	  vpblendd	\$0b00110000,$A00,$A21,@T[8]	# [0][1] [0][0] [0][4] [0][3]
Packit c4476c
	  vpermq	\$0b00111001,@T[1],$A01		# [0][1] [0][4] [0][3] [0][2]
Packit c4476c
	  vpblendd	\$0b11000000,$A00,$A01,$A01	# [0][0] [0][4] [0][3] [0][2]
Packit c4476c
Packit c4476c
	vpblendd	\$0b00001100,@T[5],@T[4],$A20	#               [4][1] [2][1]
Packit c4476c
	vpblendd	\$0b00001100,@T[4],@T[6],@T[7]	#               [4][2] [2][2]
Packit c4476c
	vpblendd	\$0b00110000,@T[6],$A20,$A20	#        [1][1] [4][1] [2][1]
Packit c4476c
	vpblendd	\$0b00110000,@T[3],@T[7],@T[7]	#        [1][2] [4][2] [2][2]
Packit c4476c
	vpblendd	\$0b11000000,@T[3],$A20,$A20	# [3][1] [1][1] [4][1] [2][1]
Packit c4476c
	vpblendd	\$0b11000000,@T[5],@T[7],@T[7]	# [3][2] [1][2] [4][2] [2][2]
Packit c4476c
	vpternlogq	\$0xC6,@T[7],@T[2],$A20		# [3][0] [1][0] [4][0] [2][0]
Packit c4476c
Packit c4476c
	 vpermq		\$0b00000000,@T[0],@T[0]	# [0][0] [0][0] [0][0] [0][0]
Packit c4476c
	 vpermq		\$0b00011011,$A31,$A31		# post-Chi shuffle
Packit c4476c
	 vpermq		\$0b10001101,$A41,$A41
Packit c4476c
	 vpermq		\$0b01110010,$A11,$A11
Packit c4476c
Packit c4476c
	vpblendd	\$0b00001100,@T[3],@T[6],$A21	#               [4][3] [2][2]
Packit c4476c
	vpblendd	\$0b00001100,@T[6],@T[5],@T[7]	#               [4][4] [2][3]
Packit c4476c
	vpblendd	\$0b00110000,@T[5],$A21,$A21	#        [1][4] [4][3] [2][2]
Packit c4476c
	vpblendd	\$0b00110000,@T[2],@T[7],@T[7]	#        [1][0] [4][4] [2][3]
Packit c4476c
	vpblendd	\$0b11000000,@T[2],$A21,$A21	# [3][0] [1][4] [4][3] [2][2]
Packit c4476c
	vpblendd	\$0b11000000,@T[3],@T[7],@T[7]	# [3][1] [1][0] [4][4] [2][3]
Packit c4476c
Packit c4476c
	vpternlogq	\$0xC6,@T[8],@T[1],$A01		# [0][4] [0][3] [0][2] [0][1]
Packit c4476c
	vpternlogq	\$0xC6,@T[7],@T[4],$A21		# [3][4] [1][3] [4][2] [2][1]
Packit c4476c
Packit c4476c
	######################################### Iota
Packit c4476c
	vpternlogq	\$0x96,(%r10),@T[0],$A00
Packit c4476c
	lea		32(%r10),%r10
Packit c4476c
Packit c4476c
	dec		%eax
Packit c4476c
	jnz		.Loop_avx512vl
Packit c4476c
Packit c4476c
	ret
Packit c4476c
.size	__KeccakF1600,.-__KeccakF1600
Packit c4476c
___
Packit c4476c
my ($A_flat,$inp,$len,$bsz) = ("%rdi","%rsi","%rdx","%rcx");
Packit c4476c
my  $out = $inp;	# in squeeze
Packit c4476c
Packit c4476c
$code.=<<___;
Packit c4476c
.globl	SHA3_absorb
Packit c4476c
.type	SHA3_absorb,\@function
Packit c4476c
.align	32
Packit c4476c
SHA3_absorb:
Packit c4476c
	mov	%rsp,%r11
Packit c4476c
Packit c4476c
	lea	-240(%rsp),%rsp
Packit c4476c
	and	\$-32,%rsp
Packit c4476c
Packit c4476c
	lea	96($A_flat),$A_flat
Packit c4476c
	lea	96($inp),$inp
Packit c4476c
	lea	96(%rsp),%r10
Packit c4476c
	lea	rhotates_left(%rip),%r8
Packit c4476c
Packit c4476c
	vzeroupper
Packit c4476c
Packit c4476c
	vpbroadcastq	-96($A_flat),$A00	# load A[5][5]
Packit c4476c
	vmovdqu		8+32*0-96($A_flat),$A01
Packit c4476c
	vmovdqu		8+32*1-96($A_flat),$A20
Packit c4476c
	vmovdqu		8+32*2-96($A_flat),$A31
Packit c4476c
	vmovdqu		8+32*3-96($A_flat),$A21
Packit c4476c
	vmovdqu		8+32*4-96($A_flat),$A41
Packit c4476c
	vmovdqu		8+32*5-96($A_flat),$A11
Packit c4476c
Packit c4476c
	vmovdqa64	0*32(%r8),$R20		# load "rhotate" indices
Packit c4476c
	vmovdqa64	1*32(%r8),$R01
Packit c4476c
	vmovdqa64	2*32(%r8),$R31
Packit c4476c
	vmovdqa64	3*32(%r8),$R21
Packit c4476c
	vmovdqa64	4*32(%r8),$R41
Packit c4476c
	vmovdqa64	5*32(%r8),$R11
Packit c4476c
Packit c4476c
	vpxor		@T[0],@T[0],@T[0]
Packit c4476c
	vmovdqa		@T[0],32*2-96(%r10)	# zero transfer area on stack
Packit c4476c
	vmovdqa		@T[0],32*3-96(%r10)
Packit c4476c
	vmovdqa		@T[0],32*4-96(%r10)
Packit c4476c
	vmovdqa		@T[0],32*5-96(%r10)
Packit c4476c
	vmovdqa		@T[0],32*6-96(%r10)
Packit c4476c
Packit c4476c
.Loop_absorb_avx512vl:
Packit c4476c
	mov		$bsz,%rax
Packit c4476c
	sub		$bsz,$len
Packit c4476c
	jc		.Ldone_absorb_avx512vl
Packit c4476c
Packit c4476c
	shr		\$3,%eax
Packit c4476c
	vpbroadcastq	0-96($inp),@T[0]
Packit c4476c
	vmovdqu		8-96($inp),@T[1]
Packit c4476c
	sub		\$4,%eax
Packit c4476c
___
Packit c4476c
for(my $i=5; $i<25; $i++) {
Packit c4476c
$code.=<<___
Packit c4476c
	dec	%eax
Packit c4476c
	jz	.Labsorved_avx512vl
Packit c4476c
	mov	8*$i-96($inp),%r8
Packit c4476c
	mov	%r8,$A_jagged[$i]-96(%r10)
Packit c4476c
___
Packit c4476c
}
Packit c4476c
$code.=<<___;
Packit c4476c
.Labsorved_avx512vl:
Packit c4476c
	lea	($inp,$bsz),$inp
Packit c4476c
Packit c4476c
	vpxor	@T[0],$A00,$A00
Packit c4476c
	vpxor	@T[1],$A01,$A01
Packit c4476c
	vpxor	32*2-96(%r10),$A20,$A20
Packit c4476c
	vpxor	32*3-96(%r10),$A31,$A31
Packit c4476c
	vpxor	32*4-96(%r10),$A21,$A21
Packit c4476c
	vpxor	32*5-96(%r10),$A41,$A41
Packit c4476c
	vpxor	32*6-96(%r10),$A11,$A11
Packit c4476c
Packit c4476c
	call	__KeccakF1600
Packit c4476c
Packit c4476c
	lea	96(%rsp),%r10
Packit c4476c
	jmp	.Loop_absorb_avx512vl
Packit c4476c
Packit c4476c
.Ldone_absorb_avx512vl:
Packit c4476c
	vmovq	%xmm0,-96($A_flat)
Packit c4476c
	vmovdqu	$A01,8+32*0-96($A_flat)
Packit c4476c
	vmovdqu	$A20,8+32*1-96($A_flat)
Packit c4476c
	vmovdqu	$A31,8+32*2-96($A_flat)
Packit c4476c
	vmovdqu	$A21,8+32*3-96($A_flat)
Packit c4476c
	vmovdqu	$A41,8+32*4-96($A_flat)
Packit c4476c
	vmovdqu	$A11,8+32*5-96($A_flat)
Packit c4476c
Packit c4476c
	vzeroupper
Packit c4476c
Packit c4476c
	lea	(%r11),%rsp
Packit c4476c
	lea	($len,$bsz),%rax		# return value
Packit c4476c
	ret
Packit c4476c
.size	SHA3_absorb,.-SHA3_absorb
Packit c4476c
Packit c4476c
.globl	SHA3_squeeze
Packit c4476c
.type	SHA3_squeeze,\@function
Packit c4476c
.align	32
Packit c4476c
SHA3_squeeze:
Packit c4476c
	mov	%rsp,%r11
Packit c4476c
Packit c4476c
	lea	96($A_flat),$A_flat
Packit c4476c
	lea	rhotates_left(%rip),%r8
Packit c4476c
	shr	\$3,$bsz
Packit c4476c
Packit c4476c
	vzeroupper
Packit c4476c
Packit c4476c
	vpbroadcastq	-96($A_flat),$A00
Packit c4476c
	vpxor		@T[0],@T[0],@T[0]
Packit c4476c
	vmovdqu		8+32*0-96($A_flat),$A01
Packit c4476c
	vmovdqu		8+32*1-96($A_flat),$A20
Packit c4476c
	vmovdqu		8+32*2-96($A_flat),$A31
Packit c4476c
	vmovdqu		8+32*3-96($A_flat),$A21
Packit c4476c
	vmovdqu		8+32*4-96($A_flat),$A41
Packit c4476c
	vmovdqu		8+32*5-96($A_flat),$A11
Packit c4476c
Packit c4476c
	vmovdqa64	0*32(%r8),$R20		# load "rhotate" indices
Packit c4476c
	vmovdqa64	1*32(%r8),$R01
Packit c4476c
	vmovdqa64	2*32(%r8),$R31
Packit c4476c
	vmovdqa64	3*32(%r8),$R21
Packit c4476c
	vmovdqa64	4*32(%r8),$R41
Packit c4476c
	vmovdqa64	5*32(%r8),$R11
Packit c4476c
Packit c4476c
	mov	$bsz,%rax
Packit c4476c
Packit c4476c
.Loop_squeeze_avx512vl:
Packit c4476c
	mov	@A_jagged[$i]-96($A_flat),%r8
Packit c4476c
___
Packit c4476c
for (my $i=0; $i<25; $i++) {
Packit c4476c
$code.=<<___;
Packit c4476c
	sub	\$8,$len
Packit c4476c
	jc	.Ltail_squeeze_avx512vl
Packit c4476c
	mov	%r8,($out)
Packit c4476c
	lea	8($out),$out
Packit c4476c
	je	.Ldone_squeeze_avx512vl
Packit c4476c
	dec	%eax
Packit c4476c
	je	.Lextend_output_avx512vl
Packit c4476c
	mov	@A_jagged[$i+1]-120($A_flat),%r8
Packit c4476c
___
Packit c4476c
}
Packit c4476c
$code.=<<___;
Packit c4476c
.Lextend_output_avx512vl:
Packit c4476c
	call	__KeccakF1600
Packit c4476c
Packit c4476c
	vmovq	%xmm0,-96($A_flat)
Packit c4476c
	vmovdqu	$A01,8+32*0-96($A_flat)
Packit c4476c
	vmovdqu	$A20,8+32*1-96($A_flat)
Packit c4476c
	vmovdqu	$A31,8+32*2-96($A_flat)
Packit c4476c
	vmovdqu	$A21,8+32*3-96($A_flat)
Packit c4476c
	vmovdqu	$A41,8+32*4-96($A_flat)
Packit c4476c
	vmovdqu	$A11,8+32*5-96($A_flat)
Packit c4476c
Packit c4476c
	mov	$bsz,%rax
Packit c4476c
	jmp	.Loop_squeeze_avx512vl
Packit c4476c
Packit c4476c
Packit c4476c
.Ltail_squeeze_avx512vl:
Packit c4476c
	add	\$8,$len
Packit c4476c
.Loop_tail_avx512vl:
Packit c4476c
	mov	%r8b,($out)
Packit c4476c
	lea	1($out),$out
Packit c4476c
	shr	\$8,%r8
Packit c4476c
	dec	$len
Packit c4476c
	jnz	.Loop_tail_avx512vl
Packit c4476c
Packit c4476c
.Ldone_squeeze_avx512vl:
Packit c4476c
	vzeroupper
Packit c4476c
Packit c4476c
	lea	(%r11),%rsp
Packit c4476c
	ret
Packit c4476c
.size	SHA3_squeeze,.-SHA3_squeeze
Packit c4476c
Packit c4476c
.align	64
Packit c4476c
rhotates_left:
Packit c4476c
	.quad	3,	18,	36,	41	# [2][0] [4][0] [1][0] [3][0]
Packit c4476c
	.quad	1,	62,	28,	27	# [0][1] [0][2] [0][3] [0][4]
Packit c4476c
	.quad	45,	6,	56,	39	# [3][1] [1][2] [4][3] [2][4]
Packit c4476c
	.quad	10,	61,	55,	8	# [2][1] [4][2] [1][3] [3][4]
Packit c4476c
	.quad	2,	15,	25,	20	# [4][1] [3][2] [2][3] [1][4]
Packit c4476c
	.quad	44,	43,	21,	14	# [1][1] [2][2] [3][3] [4][4]
Packit c4476c
iotas:
Packit c4476c
	.quad	0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
Packit c4476c
	.quad	0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
Packit c4476c
	.quad	0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
Packit c4476c
	.quad	0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
Packit c4476c
	.quad	0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
Packit c4476c
	.quad	0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
Packit c4476c
	.quad	0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
Packit c4476c
	.quad	0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
Packit c4476c
	.quad	0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
Packit c4476c
	.quad	0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
Packit c4476c
	.quad	0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
Packit c4476c
	.quad	0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
Packit c4476c
	.quad	0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
Packit c4476c
	.quad	0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
Packit c4476c
	.quad	0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
Packit c4476c
	.quad	0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
Packit c4476c
	.quad	0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
Packit c4476c
	.quad	0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
Packit c4476c
	.quad	0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
Packit c4476c
	.quad	0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
Packit c4476c
	.quad	0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
Packit c4476c
	.quad	0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
Packit c4476c
	.quad	0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
Packit c4476c
	.quad	0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
Packit c4476c
Packit c4476c
.asciz	"Keccak-1600 absorb and squeeze for AVX512VL, CRYPTOGAMS by <appro\@openssl.org>"
Packit c4476c
___
Packit c4476c
Packit c4476c
$output=pop;
Packit c4476c
open STDOUT,">$output";
Packit c4476c
print $code;
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";