Blame crypto/x86cpuid.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Packit c4476c
push(@INC, "${dir}perlasm", "perlasm");
Packit c4476c
require "x86asm.pl";
Packit c4476c
Packit c4476c
$output = pop;
Packit c4476c
open OUT,">$output";
Packit c4476c
*STDOUT=*OUT;
Packit c4476c
Packit c4476c
&asm_init($ARGV[0]);
Packit c4476c
Packit c4476c
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
Packit c4476c
Packit c4476c
&function_begin("OPENSSL_ia32_cpuid");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&pushf	();
Packit c4476c
	&pop	("eax");
Packit c4476c
	&mov	("ecx","eax");
Packit c4476c
	&xor	("eax",1<<21);
Packit c4476c
	&push	("eax");
Packit c4476c
	&popf	();
Packit c4476c
	&pushf	();
Packit c4476c
	&pop	("eax");
Packit c4476c
	&xor	("ecx","eax");
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&mov	("esi",&wparam(0));
Packit c4476c
	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
Packit c4476c
	&bt	("ecx",21);
Packit c4476c
	&jnc	(&label("nocpuid"));
Packit c4476c
	&cpuid	();
Packit c4476c
	&mov	("edi","eax");		# max value for standard query level
Packit c4476c
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&cmp	("ebx",0x756e6547);	# "Genu"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&mov	("ebp","eax");
Packit c4476c
	&cmp	("edx",0x49656e69);	# "ineI"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&or	("ebp","eax");
Packit c4476c
	&cmp	("ecx",0x6c65746e);	# "ntel"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&or	("ebp","eax");		# 0 indicates Intel CPU
Packit c4476c
	&jz	(&label("intel"));
Packit c4476c
Packit c4476c
	&cmp	("ebx",0x68747541);	# "Auth"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&mov	("esi","eax");
Packit c4476c
	&cmp	("edx",0x69746E65);	# "enti"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&or	("esi","eax");
Packit c4476c
	&cmp	("ecx",0x444D4163);	# "cAMD"
Packit c4476c
	&setne	(&LB("eax"));
Packit c4476c
	&or	("esi","eax");		# 0 indicates AMD CPU
Packit c4476c
	&jnz	(&label("intel"));
Packit c4476c
Packit c4476c
	# AMD specific
Packit c4476c
	&mov	("eax",0x80000000);
Packit c4476c
	&cpuid	();
Packit c4476c
	&cmp	("eax",0x80000001);
Packit c4476c
	&jb	(&label("intel"));
Packit c4476c
	&mov	("esi","eax");
Packit c4476c
	&mov	("eax",0x80000001);
Packit c4476c
	&cpuid	();
Packit c4476c
	&or	("ebp","ecx");
Packit c4476c
	&and	("ebp",1<<11|1);	# isolate XOP bit
Packit c4476c
	&cmp	("esi",0x80000008);
Packit c4476c
	&jb	(&label("intel"));
Packit c4476c
Packit c4476c
	&mov	("eax",0x80000008);
Packit c4476c
	&cpuid	();
Packit c4476c
	&movz	("esi",&LB("ecx"));	# number of cores - 1
Packit c4476c
	&inc	("esi");		# number of cores
Packit c4476c
Packit c4476c
	&mov	("eax",1);
Packit c4476c
	&xor	("ecx","ecx");
Packit c4476c
	&cpuid	();
Packit c4476c
	&bt	("edx",28);
Packit c4476c
	&jnc	(&label("generic"));
Packit c4476c
	&shr	("ebx",16);
Packit c4476c
	&and	("ebx",0xff);
Packit c4476c
	&cmp	("ebx","esi");
Packit c4476c
	&ja	(&label("generic"));
Packit c4476c
	&and	("edx",0xefffffff);	# clear hyper-threading bit
Packit c4476c
	&jmp	(&label("generic"));
Packit c4476c
Packit c4476c
&set_label("intel");
Packit c4476c
	&cmp	("edi",4);
Packit c4476c
	&mov	("esi",-1);
Packit c4476c
	&jb	(&label("nocacheinfo"));
Packit c4476c
Packit c4476c
	&mov	("eax",4);
Packit c4476c
	&mov	("ecx",0);		# query L1D
Packit c4476c
	&cpuid	();
Packit c4476c
	&mov	("esi","eax");
Packit c4476c
	&shr	("esi",14);
Packit c4476c
	&and	("esi",0xfff);		# number of cores -1 per L1D
Packit c4476c
Packit c4476c
&set_label("nocacheinfo");
Packit c4476c
	&mov	("eax",1);
Packit c4476c
	&xor	("ecx","ecx");
Packit c4476c
	&cpuid	();
Packit c4476c
	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
Packit c4476c
	&cmp	("ebp",0);
Packit c4476c
	&jne	(&label("notintel"));
Packit c4476c
	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
Packit c4476c
	&and	(&HB("eax"),15);	# family ID
Packit c4476c
	&cmp	(&HB("eax"),15);	# P4?
Packit c4476c
	&jne	(&label("notintel"));
Packit c4476c
	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
Packit c4476c
&set_label("notintel");
Packit c4476c
	&bt	("edx",28);		# test hyper-threading bit
Packit c4476c
	&jnc	(&label("generic"));
Packit c4476c
	&and	("edx",0xefffffff);
Packit c4476c
	&cmp	("esi",0);
Packit c4476c
	&je	(&label("generic"));
Packit c4476c
Packit c4476c
	&or	("edx",0x10000000);
Packit c4476c
	&shr	("ebx",16);
Packit c4476c
	&cmp	(&LB("ebx"),1);
Packit c4476c
	&ja	(&label("generic"));
Packit c4476c
	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
Packit c4476c
Packit c4476c
&set_label("generic");
Packit c4476c
	&and	("ebp",1<<11);		# isolate AMD XOP flag
Packit c4476c
	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
Packit c4476c
	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
Packit c4476c
	&or	("ebp","ecx");		# merge AMD XOP flag
Packit c4476c
Packit c4476c
	&cmp	("edi",7);
Packit c4476c
	&mov	("edi",&wparam(0));
Packit c4476c
	&jb	(&label("no_extended_info"));
Packit c4476c
	&mov	("eax",7);
Packit c4476c
	&xor	("ecx","ecx");
Packit c4476c
	&cpuid	();
Packit c4476c
	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
Packit c4476c
&set_label("no_extended_info");
Packit c4476c
Packit c4476c
	&bt	("ebp",27);		# check OSXSAVE bit
Packit c4476c
	&jnc	(&label("clear_avx"));
Packit c4476c
	&xor	("ecx","ecx");
Packit c4476c
	&data_byte(0x0f,0x01,0xd0);	# xgetbv
Packit c4476c
	&and	("eax",6);
Packit c4476c
	&cmp	("eax",6);
Packit c4476c
	&je	(&label("done"));
Packit c4476c
	&cmp	("eax",2);
Packit c4476c
	&je	(&label("clear_avx"));
Packit c4476c
&set_label("clear_xmm");
Packit c4476c
	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
Packit c4476c
	&and	("esi",0xfeffffff);	# clear FXSR
Packit c4476c
&set_label("clear_avx");
Packit c4476c
	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
Packit c4476c
	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
Packit c4476c
&set_label("done");
Packit c4476c
	&mov	("eax","esi");
Packit c4476c
	&mov	("edx","ebp");
Packit c4476c
&set_label("nocpuid");
Packit c4476c
&function_end("OPENSSL_ia32_cpuid");
Packit c4476c
Packit c4476c
&external_label("OPENSSL_ia32cap_P");
Packit c4476c
Packit c4476c
&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&picmeup("ecx","OPENSSL_ia32cap_P");
Packit c4476c
	&bt	(&DWP(0,"ecx"),4);
Packit c4476c
	&jnc	(&label("notsc"));
Packit c4476c
	&rdtsc	();
Packit c4476c
&set_label("notsc");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_rdtsc");
Packit c4476c
Packit c4476c
# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
Packit c4476c
# but it's safe to call it on any [supported] 32-bit platform...
Packit c4476c
# Just check for [non-]zero return value...
Packit c4476c
&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
Packit c4476c
	&picmeup("ecx","OPENSSL_ia32cap_P");
Packit c4476c
	&bt	(&DWP(0,"ecx"),4);
Packit c4476c
	&jnc	(&label("nohalt"));	# no TSC
Packit c4476c
Packit c4476c
	&data_word(0x9058900e);		# push %cs; pop %eax
Packit c4476c
	&and	("eax",3);
Packit c4476c
	&jnz	(&label("nohalt"));	# not enough privileges
Packit c4476c
Packit c4476c
	&pushf	();
Packit c4476c
	&pop	("eax");
Packit c4476c
	&bt	("eax",9);
Packit c4476c
	&jnc	(&label("nohalt"));	# interrupts are disabled
Packit c4476c
Packit c4476c
	&rdtsc	();
Packit c4476c
	&push	("edx");
Packit c4476c
	&push	("eax");
Packit c4476c
	&halt	();
Packit c4476c
	&rdtsc	();
Packit c4476c
Packit c4476c
	&sub	("eax",&DWP(0,"esp"));
Packit c4476c
	&sbb	("edx",&DWP(4,"esp"));
Packit c4476c
	&add	("esp",8);
Packit c4476c
	&ret	();
Packit c4476c
Packit c4476c
&set_label("nohalt");
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_instrument_halt");
Packit c4476c
Packit c4476c
# Essentially there is only one use for this function. Under DJGPP:
Packit c4476c
#
Packit c4476c
#	#include <go32.h>
Packit c4476c
#	...
Packit c4476c
#	i=OPENSSL_far_spin(_dos_ds,0x46c);
Packit c4476c
#	...
Packit c4476c
# to obtain the number of spins till closest timer interrupt.
Packit c4476c
Packit c4476c
&function_begin_B("OPENSSL_far_spin");
Packit c4476c
	&pushf	();
Packit c4476c
	&pop	("eax");
Packit c4476c
	&bt	("eax",9);
Packit c4476c
	&jnc	(&label("nospin"));	# interrupts are disabled
Packit c4476c
Packit c4476c
	&mov	("eax",&DWP(4,"esp"));
Packit c4476c
	&mov	("ecx",&DWP(8,"esp"));
Packit c4476c
	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&mov	("edx",&DWP(0,"ecx"));
Packit c4476c
	&jmp	(&label("spin"));
Packit c4476c
Packit c4476c
	&align	(16);
Packit c4476c
&set_label("spin");
Packit c4476c
	&inc	("eax");
Packit c4476c
	&cmp	("edx",&DWP(0,"ecx"));
Packit c4476c
	&je	(&label("spin"));
Packit c4476c
Packit c4476c
	&data_word (0x1f909090);	# pop	%ds
Packit c4476c
	&ret	();
Packit c4476c
Packit c4476c
&set_label("nospin");
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_far_spin");
Packit c4476c
Packit c4476c
&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&picmeup("ecx","OPENSSL_ia32cap_P");
Packit c4476c
	&mov	("ecx",&DWP(0,"ecx"));
Packit c4476c
	&bt	(&DWP(0,"ecx"),1);
Packit c4476c
	&jnc	(&label("no_x87"));
Packit c4476c
	if ($sse2) {
Packit c4476c
		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
Packit c4476c
		&cmp	("ecx",1<<26|1<<24);
Packit c4476c
		&jne	(&label("no_sse2"));
Packit c4476c
		&pxor	("xmm0","xmm0");
Packit c4476c
		&pxor	("xmm1","xmm1");
Packit c4476c
		&pxor	("xmm2","xmm2");
Packit c4476c
		&pxor	("xmm3","xmm3");
Packit c4476c
		&pxor	("xmm4","xmm4");
Packit c4476c
		&pxor	("xmm5","xmm5");
Packit c4476c
		&pxor	("xmm6","xmm6");
Packit c4476c
		&pxor	("xmm7","xmm7");
Packit c4476c
	&set_label("no_sse2");
Packit c4476c
	}
Packit c4476c
	# just a bunch of fldz to zap the fp/mm bank followed by finit...
Packit c4476c
	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
Packit c4476c
&set_label("no_x87");
Packit c4476c
	&lea	("eax",&DWP(4,"esp"));
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_wipe_cpu");
Packit c4476c
Packit c4476c
&function_begin_B("OPENSSL_atomic_add");
Packit c4476c
	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
Packit c4476c
	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
Packit c4476c
	&push	("ebx");
Packit c4476c
	&nop	();
Packit c4476c
	&mov	("eax",&DWP(0,"edx"));
Packit c4476c
&set_label("spin");
Packit c4476c
	&lea	("ebx",&DWP(0,"eax","ecx"));
Packit c4476c
	&nop	();
Packit c4476c
	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is involved and is always reloaded
Packit c4476c
	&jne	(&label("spin"));
Packit c4476c
	&mov	("eax","ebx");	# OpenSSL expects the new value
Packit c4476c
	&pop	("ebx");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_atomic_add");
Packit c4476c
Packit c4476c
&function_begin_B("OPENSSL_cleanse");
Packit c4476c
	&mov	("edx",&wparam(0));
Packit c4476c
	&mov	("ecx",&wparam(1));
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&cmp	("ecx",7);
Packit c4476c
	&jae	(&label("lot"));
Packit c4476c
	&cmp	("ecx",0);
Packit c4476c
	&je	(&label("ret"));
Packit c4476c
&set_label("little");
Packit c4476c
	&mov	(&BP(0,"edx"),"al");
Packit c4476c
	&sub	("ecx",1);
Packit c4476c
	&lea	("edx",&DWP(1,"edx"));
Packit c4476c
	&jnz	(&label("little"));
Packit c4476c
&set_label("ret");
Packit c4476c
	&ret	();
Packit c4476c
Packit c4476c
&set_label("lot",16);
Packit c4476c
	&test	("edx",3);
Packit c4476c
	&jz	(&label("aligned"));
Packit c4476c
	&mov	(&BP(0,"edx"),"al");
Packit c4476c
	&lea	("ecx",&DWP(-1,"ecx"));
Packit c4476c
	&lea	("edx",&DWP(1,"edx"));
Packit c4476c
	&jmp	(&label("lot"));
Packit c4476c
&set_label("aligned");
Packit c4476c
	&mov	(&DWP(0,"edx"),"eax");
Packit c4476c
	&lea	("ecx",&DWP(-4,"ecx"));
Packit c4476c
	&test	("ecx",-4);
Packit c4476c
	&lea	("edx",&DWP(4,"edx"));
Packit c4476c
	&jnz	(&label("aligned"));
Packit c4476c
	&cmp	("ecx",0);
Packit c4476c
	&jne	(&label("little"));
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_cleanse");
Packit c4476c
Packit c4476c
&function_begin_B("CRYPTO_memcmp");
Packit c4476c
	&push	("esi");
Packit c4476c
	&push	("edi");
Packit c4476c
	&mov	("esi",&wparam(0));
Packit c4476c
	&mov	("edi",&wparam(1));
Packit c4476c
	&mov	("ecx",&wparam(2));
Packit c4476c
	&xor	("eax","eax");
Packit c4476c
	&xor	("edx","edx");
Packit c4476c
	&cmp	("ecx",0);
Packit c4476c
	&je	(&label("no_data"));
Packit c4476c
&set_label("loop");
Packit c4476c
	&mov	("dl",&BP(0,"esi"));
Packit c4476c
	&lea	("esi",&DWP(1,"esi"));
Packit c4476c
	&xor	("dl",&BP(0,"edi"));
Packit c4476c
	&lea	("edi",&DWP(1,"edi"));
Packit c4476c
	&or	("al","dl");
Packit c4476c
	&dec	("ecx");
Packit c4476c
	&jnz	(&label("loop"));
Packit c4476c
	&neg	("eax");
Packit c4476c
	&shr	("eax",31);
Packit c4476c
&set_label("no_data");
Packit c4476c
	&pop	("edi");
Packit c4476c
	&pop	("esi");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("CRYPTO_memcmp");
Packit c4476c
{
Packit c4476c
my $lasttick = "esi";
Packit c4476c
my $lastdiff = "ebx";
Packit c4476c
my $out = "edi";
Packit c4476c
my $cnt = "ecx";
Packit c4476c
my $max = "ebp";
Packit c4476c
Packit c4476c
&function_begin("OPENSSL_instrument_bus");
Packit c4476c
    &mov	("eax",0);
Packit c4476c
    if ($sse2) {
Packit c4476c
	&picmeup("edx","OPENSSL_ia32cap_P");
Packit c4476c
	&bt	(&DWP(0,"edx"),4);
Packit c4476c
	&jnc	(&label("nogo"));	# no TSC
Packit c4476c
	&bt	(&DWP(0,"edx"),19);
Packit c4476c
	&jnc	(&label("nogo"));	# no CLFLUSH
Packit c4476c
Packit c4476c
	&mov	($out,&wparam(0));	# load arguments
Packit c4476c
	&mov	($cnt,&wparam(1));
Packit c4476c
Packit c4476c
	# collect 1st tick
Packit c4476c
	&rdtsc	();
Packit c4476c
	&mov	($lasttick,"eax");	# lasttick = tick
Packit c4476c
	&mov	($lastdiff,0);		# lastdiff = 0
Packit c4476c
	&clflush(&DWP(0,$out));
Packit c4476c
	&data_byte(0xf0);		# lock
Packit c4476c
	&add	(&DWP(0,$out),$lastdiff);
Packit c4476c
	&jmp	(&label("loop"));
Packit c4476c
Packit c4476c
&set_label("loop",16);
Packit c4476c
	&rdtsc	();
Packit c4476c
	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
Packit c4476c
	&sub	("eax",$lasttick);	# diff
Packit c4476c
	&mov	($lasttick,"edx");	# lasttick = tick
Packit c4476c
	&mov	($lastdiff,"eax");	# lastdiff = diff
Packit c4476c
	&clflush(&DWP(0,$out));
Packit c4476c
	&data_byte(0xf0);		# lock
Packit c4476c
	&add	(&DWP(0,$out),"eax");	# accumulate diff
Packit c4476c
	&lea	($out,&DWP(4,$out));	# ++$out
Packit c4476c
	&sub	($cnt,1);		# --$cnt
Packit c4476c
	&jnz	(&label("loop"));
Packit c4476c
Packit c4476c
	&mov	("eax",&wparam(1));
Packit c4476c
&set_label("nogo");
Packit c4476c
    }
Packit c4476c
&function_end("OPENSSL_instrument_bus");
Packit c4476c
Packit c4476c
&function_begin("OPENSSL_instrument_bus2");
Packit c4476c
    &mov	("eax",0);
Packit c4476c
    if ($sse2) {
Packit c4476c
	&picmeup("edx","OPENSSL_ia32cap_P");
Packit c4476c
	&bt	(&DWP(0,"edx"),4);
Packit c4476c
	&jnc	(&label("nogo"));	# no TSC
Packit c4476c
	&bt	(&DWP(0,"edx"),19);
Packit c4476c
	&jnc	(&label("nogo"));	# no CLFLUSH
Packit c4476c
Packit c4476c
	&mov	($out,&wparam(0));	# load arguments
Packit c4476c
	&mov	($cnt,&wparam(1));
Packit c4476c
	&mov	($max,&wparam(2));
Packit c4476c
Packit c4476c
	&rdtsc	();			# collect 1st tick
Packit c4476c
	&mov	($lasttick,"eax");	# lasttick = tick
Packit c4476c
	&mov	($lastdiff,0);		# lastdiff = 0
Packit c4476c
Packit c4476c
	&clflush(&DWP(0,$out));
Packit c4476c
	&data_byte(0xf0);		# lock
Packit c4476c
	&add	(&DWP(0,$out),$lastdiff);
Packit c4476c
Packit c4476c
	&rdtsc	();			# collect 1st diff
Packit c4476c
	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
Packit c4476c
	&sub	("eax",$lasttick);	# diff
Packit c4476c
	&mov	($lasttick,"edx");	# lasttick = tick
Packit c4476c
	&mov	($lastdiff,"eax");	# lastdiff = diff
Packit c4476c
	&jmp	(&label("loop2"));
Packit c4476c
Packit c4476c
&set_label("loop2",16);
Packit c4476c
	&clflush(&DWP(0,$out));
Packit c4476c
	&data_byte(0xf0);		# lock
Packit c4476c
	&add	(&DWP(0,$out),"eax");	# accumulate diff
Packit c4476c
Packit c4476c
	&sub	($max,1);
Packit c4476c
	&jz	(&label("done2"));
Packit c4476c
Packit c4476c
	&rdtsc	();
Packit c4476c
	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
Packit c4476c
	&sub	("eax",$lasttick);	# diff
Packit c4476c
	&mov	($lasttick,"edx");	# lasttick = tick
Packit c4476c
	&cmp	("eax",$lastdiff);
Packit c4476c
	&mov	($lastdiff,"eax");	# lastdiff = diff
Packit c4476c
	&mov	("edx",0);
Packit c4476c
	&setne	("dl");
Packit c4476c
	&sub	($cnt,"edx");		# conditional --$cnt
Packit c4476c
	&lea	($out,&DWP(0,$out,"edx",4));	# conditional ++$out
Packit c4476c
	&jnz	(&label("loop2"));
Packit c4476c
Packit c4476c
&set_label("done2");
Packit c4476c
	&mov	("eax",&wparam(1));
Packit c4476c
	&sub	("eax",$cnt);
Packit c4476c
&set_label("nogo");
Packit c4476c
    }
Packit c4476c
&function_end("OPENSSL_instrument_bus2");
Packit c4476c
}
Packit c4476c
Packit c4476c
sub gen_random {
Packit c4476c
my $rdop = shift;
Packit c4476c
&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
Packit c4476c
	&push	("edi");
Packit c4476c
	&push	("ebx");
Packit c4476c
	&xor	("eax","eax");		# return value
Packit c4476c
	&mov	("edi",&wparam(0));
Packit c4476c
	&mov	("ebx",&wparam(1));
Packit c4476c
Packit c4476c
	&cmp	("ebx",0);
Packit c4476c
	&je	(&label("done"));
Packit c4476c
Packit c4476c
	&mov	("ecx",8);
Packit c4476c
&set_label("loop");
Packit c4476c
	&${rdop}("edx");
Packit c4476c
	&jc	(&label("break"));
Packit c4476c
	&loop	(&label("loop"));
Packit c4476c
	&jmp	(&label("done"));
Packit c4476c
Packit c4476c
&set_label("break",16);
Packit c4476c
	&cmp	("ebx",4);
Packit c4476c
	&jb	(&label("tail"));
Packit c4476c
	&mov	(&DWP(0,"edi"),"edx");
Packit c4476c
	&lea	("edi",&DWP(4,"edi"));
Packit c4476c
	&add	("eax",4);
Packit c4476c
	&sub	("ebx",4);
Packit c4476c
	&jz	(&label("done"));
Packit c4476c
	&mov	("ecx",8);
Packit c4476c
	&jmp	(&label("loop"));
Packit c4476c
Packit c4476c
&set_label("tail",16);
Packit c4476c
	&mov	(&BP(0,"edi"),"dl");
Packit c4476c
	&lea	("edi",&DWP(1,"edi"));
Packit c4476c
	&inc	("eax");
Packit c4476c
	&shr	("edx",8);
Packit c4476c
	&dec	("ebx");
Packit c4476c
	&jnz	(&label("tail"));
Packit c4476c
Packit c4476c
&set_label("done");
Packit c4476c
	&xor	("edx","edx");		# Clear random value from registers
Packit c4476c
	&pop	("ebx");
Packit c4476c
	&pop	("edi");
Packit c4476c
	&ret	();
Packit c4476c
&function_end_B("OPENSSL_ia32_${rdop}_bytes");
Packit c4476c
}
Packit c4476c
&gen_random("rdrand");
Packit c4476c
&gen_random("rdseed");
Packit c4476c
Packit c4476c
&initseg("OPENSSL_cpuid_setup");
Packit c4476c
Packit c4476c
&hidden("OPENSSL_cpuid_setup");
Packit c4476c
&hidden("OPENSSL_ia32cap_P");
Packit c4476c
Packit c4476c
&asm_finish();
Packit c4476c
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";