Blame crypto/cast/asm/cast-586.pl

Packit c4476c
#! /usr/bin/env perl
Packit c4476c
# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved.
Packit c4476c
#
Packit c4476c
# Licensed under the OpenSSL license (the "License").  You may not use
Packit c4476c
# this file except in compliance with the License.  You can obtain a copy
Packit c4476c
# in the file LICENSE in the source distribution or at
Packit c4476c
# https://www.openssl.org/source/license.html
Packit c4476c
Packit c4476c
Packit c4476c
# This flag makes the inner loop one cycle longer, but generates
Packit c4476c
# code that runs %30 faster on the pentium pro/II, 44% faster
Packit c4476c
# of PIII, while only %7 slower on the pentium.
Packit c4476c
# By default, this flag is on.
Packit c4476c
$ppro=1;
Packit c4476c
Packit c4476c
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Packit c4476c
push(@INC,"${dir}","${dir}../../perlasm");
Packit c4476c
require "x86asm.pl";
Packit c4476c
require "cbc.pl";
Packit c4476c
Packit c4476c
$output=pop;
Packit c4476c
open STDOUT,">$output";
Packit c4476c
Packit c4476c
&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
Packit c4476c
Packit c4476c
$CAST_ROUNDS=16;
Packit c4476c
$L="edi";
Packit c4476c
$R="esi";
Packit c4476c
$K="ebp";
Packit c4476c
$tmp1="ecx";
Packit c4476c
$tmp2="ebx";
Packit c4476c
$tmp3="eax";
Packit c4476c
$tmp4="edx";
Packit c4476c
$S1="CAST_S_table0";
Packit c4476c
$S2="CAST_S_table1";
Packit c4476c
$S3="CAST_S_table2";
Packit c4476c
$S4="CAST_S_table3";
Packit c4476c
Packit c4476c
@F1=("add","xor","sub");
Packit c4476c
@F2=("xor","sub","add");
Packit c4476c
@F3=("sub","add","xor");
Packit c4476c
Packit c4476c
&CAST_encrypt("CAST_encrypt",1);
Packit c4476c
&CAST_encrypt("CAST_decrypt",0);
Packit c4476c
&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1);
Packit c4476c
Packit c4476c
&asm_finish();
Packit c4476c
Packit c4476c
close STDOUT or die "error closing STDOUT: $!";
Packit c4476c
Packit c4476c
sub CAST_encrypt {
Packit c4476c
    local($name,$enc)=@_;
Packit c4476c
Packit c4476c
    local($win_ex)=<<"EOF";
Packit c4476c
EXTERN	_CAST_S_table0:DWORD
Packit c4476c
EXTERN	_CAST_S_table1:DWORD
Packit c4476c
EXTERN	_CAST_S_table2:DWORD
Packit c4476c
EXTERN	_CAST_S_table3:DWORD
Packit c4476c
EOF
Packit c4476c
    &main::external_label(
Packit c4476c
			  "CAST_S_table0",
Packit c4476c
			  "CAST_S_table1",
Packit c4476c
			  "CAST_S_table2",
Packit c4476c
			  "CAST_S_table3",
Packit c4476c
			  );
Packit c4476c
Packit c4476c
    &function_begin_B($name,$win_ex);
Packit c4476c
Packit c4476c
    &comment("");
Packit c4476c
Packit c4476c
    &push("ebp");
Packit c4476c
    &push("ebx");
Packit c4476c
    &mov($tmp2,&wparam(0));
Packit c4476c
    &mov($K,&wparam(1));
Packit c4476c
    &push("esi");
Packit c4476c
    &push("edi");
Packit c4476c
Packit c4476c
    &comment("Load the 2 words");
Packit c4476c
    &mov($L,&DWP(0,$tmp2,"",0));
Packit c4476c
    &mov($R,&DWP(4,$tmp2,"",0));
Packit c4476c
Packit c4476c
    &comment('Get short key flag');
Packit c4476c
    &mov($tmp3,&DWP(128,$K,"",0));
Packit c4476c
    if($enc) {
Packit c4476c
	&push($tmp3);
Packit c4476c
    } else {
Packit c4476c
	&or($tmp3,$tmp3);
Packit c4476c
	&jnz(&label('cast_dec_skip'));
Packit c4476c
    }
Packit c4476c
Packit c4476c
    &xor($tmp3,	$tmp3);
Packit c4476c
Packit c4476c
    # encrypting part
Packit c4476c
Packit c4476c
    if ($enc) {
Packit c4476c
	&E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&comment('test short key flag');
Packit c4476c
	&pop($tmp4);
Packit c4476c
	&or($tmp4,$tmp4);
Packit c4476c
	&jnz(&label('cast_enc_done'));
Packit c4476c
	&E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
    } else {
Packit c4476c
	&E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&set_label('cast_dec_skip');
Packit c4476c
	&E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
	&E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    &set_label('cast_enc_done') if $enc;
Packit c4476c
# Why the nop? - Ben 17/1/99
Packit c4476c
    &nop(;;
Packit c4476c
    &mov($tmp3,&wparam(0));
Packit c4476c
    &mov(&DWP(4,$tmp3,"",0),$L);
Packit c4476c
    &mov(&DWP(0,$tmp3,"",0),$R);
Packit c4476c
    &function_end($name);
Packit c4476c
}
Packit c4476c
Packit c4476c
sub E_CAST {
Packit c4476c
    local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_;
Packit c4476c
    # Ri needs to have 16 pre added.
Packit c4476c
Packit c4476c
    &comment("round $i");
Packit c4476c
    &mov(	$tmp4,		&DWP($i*8,$K,"",1));
Packit c4476c
Packit c4476c
    &mov(	$tmp1,		&DWP($i*8+4,$K,"",1));
Packit c4476c
    &$OP1(	$tmp4,		$R);
Packit c4476c
Packit c4476c
    &rotl(	$tmp4,		&LB($tmp1));
Packit c4476c
Packit c4476c
    if ($ppro) {
Packit c4476c
	&xor(	$tmp1,		$tmp1);
Packit c4476c
	&mov(	$tmp2,		0xff);
Packit c4476c
Packit c4476c
	&movb(	&LB($tmp1),	&HB($tmp4));	# A
Packit c4476c
	&and(	$tmp2,		$tmp4);
Packit c4476c
Packit c4476c
	&shr(	$tmp4,		16); 		#
Packit c4476c
	&xor(	$tmp3,		$tmp3);
Packit c4476c
    } else {
Packit c4476c
	&mov(	$tmp2,		$tmp4);		# B
Packit c4476c
	&movb(	&LB($tmp1),	&HB($tmp4));	# A	# BAD BAD BAD
Packit c4476c
Packit c4476c
	&shr(	$tmp4,		16); 		#
Packit c4476c
	&and(	$tmp2,		0xff);
Packit c4476c
    }
Packit c4476c
Packit c4476c
    &movb(	&LB($tmp3),	&HB($tmp4));	# C	# BAD BAD BAD
Packit c4476c
    &and(	$tmp4,		0xff);		# D
Packit c4476c
Packit c4476c
    &mov(	$tmp1,		&DWP($S1,"",$tmp1,4));
Packit c4476c
    &mov(	$tmp2,		&DWP($S2,"",$tmp2,4));
Packit c4476c
Packit c4476c
    &$OP2(	$tmp1,		$tmp2);
Packit c4476c
    &mov(	$tmp2,		&DWP($S3,"",$tmp3,4));
Packit c4476c
Packit c4476c
    &$OP3(	$tmp1,		$tmp2);
Packit c4476c
    &mov(	$tmp2,		&DWP($S4,"",$tmp4,4));
Packit c4476c
Packit c4476c
    &$OP1(	$tmp1,		$tmp2);
Packit c4476c
    # XXX
Packit c4476c
Packit c4476c
    &xor(	$L,		$tmp1);
Packit c4476c
    # XXX
Packit c4476c
}
Packit c4476c