Blob Blame History Raw
C x86/camellia-crypt-internal.asm
ifelse(<
Copyright (C) 2010, Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
modify it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.
or both in parallel, as here.
GNU Nettle is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
>)
C Register usage:
C Camellia state, 128-bit value in little endian order.
C L0, H0 corresponds to D1 in the spec and i0 in the C implementation.
C while L1, H1 corresponds to D2/i1.
define(<L0>,<%eax>)
define(<H0>,<%ebx>)
define(<L1>,<%ecx>)
define(<H1>,<%edx>)
define(<TMP>,<%ebp>)
define(<KEY>,<%esi>)
define(<T>,<%edi>)
C Locals on the stack
define(<FRAME_L0>, <(%esp)>)
define(<FRAME_H0>, <4(%esp)>)
define(<FRAME_L1>, <8(%esp)>)
define(<FRAME_H1>, <12(%esp)>)
define(<FRAME_CNT>, <16(%esp)>)
C Arguments on stack.
define(<FRAME_NKEYS>, <40(%esp)>)
define(<FRAME_KEYS>, <44(%esp)>)
define(<FRAME_TABLE>, <48(%esp)>)
define(<FRAME_LENGTH>, <52(%esp)>)
define(<FRAME_DST>, <56(%esp)>)
define(<FRAME_SRC>, <60(%esp)>)
define(<SP1110>, <(T,$1,4)>)
define(<SP0222>, <1024(T,$1,4)>)
define(<SP3033>, <2048(T,$1,4)>)
define(<SP4404>, <3072(T,$1,4)>)
C ROUND(xl, xh, yl, yh, key-offset)
C xl and xh are rotated 16 bits at the end
C yl and yh are read from stack, and left in registers
define(<ROUND>, <
movzbl LREG($1), TMP
movl SP1110(TMP), $4
movzbl HREG($1), TMP
xorl SP4404(TMP), $4
roll <$>16, $1
movzbl LREG($2), TMP
movl SP4404(TMP), $3
movzbl HREG($2), TMP
xorl SP3033(TMP), $3
roll <$>16, $2
movzbl LREG($1), TMP
xorl SP3033(TMP), $4
movzbl HREG($1), TMP
xorl SP0222(TMP), $4
movzbl LREG($2), TMP
xorl SP0222(TMP), $3
movzbl HREG($2), TMP
xorl SP1110(TMP), $3
xorl $5(KEY), $4
xorl $5 + 4(KEY), $3
xorl $3, $4
rorl <$>8, $3
xorl $4, $3
xorl FRAME_$3, $3
xorl FRAME_$4, $4
>)
C Six rounds, with inputs and outputs in registers.
define(<ROUND6>, <
movl L0, FRAME_L0
movl H0, FRAME_H0
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L0,H0,<L1>,<H1>,0)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,<L0>,<H0>,8)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,<L1>,<H1>,16)
movl L1, FRAME_L1
movl H1, FRAME_H1
ROUND(L1,H1,<L0>,<H0>,24)
movl L0, FRAME_L0
movl H0, FRAME_H0
ROUND(L0,H0,<L1>,<H1>,32)
ROUND(L1,H1,<L0>,<H0>,40)
roll <$>16, L1
roll <$>16, H1
>)
C FL(x0, x1, key-offset)
define(<FL>, <
movl $3 + 4(KEY), TMP
andl $2, TMP
roll <$>1, TMP
xorl TMP, $1
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
>)
C FLINV(x0, x1, key-offset)
define(<FLINV>, <
movl $3(KEY), TMP
orl $1, TMP
xorl TMP, $2
movl $3 + 4(KEY), TMP
andl $2, TMP
roll <$>1, TMP
xorl TMP, $1
>)
.file "camellia-crypt-internal.asm"
C _camellia_crypt(unsigned nkeys, const uint64_t *keys,
C const struct camellia_table *T,
C size_t length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
C save all registers that need to be saved
pushl %ebx C 32(%esp)
pushl %ebp C 28(%esp)
pushl %esi C 24(%esp)
pushl %edi C 20(%esp)
subl $20, %esp
movl FRAME_LENGTH, %ebp
testl %ebp,%ebp
jz .Lend
.Lblock_loop:
C Load data, note that we'll happily do unaligned loads
movl FRAME_SRC, TMP
movl (TMP), H0
bswap H0
movl 4(TMP), L0
bswap L0
movl 8(TMP), H1
bswap H1
movl 12(TMP), L1
bswap L1
addl $16, FRAME_SRC
movl FRAME_KEYS, KEY
movl FRAME_NKEYS, TMP
subl $8, TMP
movl TMP, FRAME_CNT
xorl (KEY), L0
xorl 4(KEY), H0
addl $8, KEY
movl FRAME_TABLE, T
ROUND6
.Lround_loop:
addl $64, KEY
FL(L0, H0, -16)
FLINV(L1, H1, -8)
ROUND6
subl $8, FRAME_CNT
ja .Lround_loop
movl FRAME_DST, TMP
bswap H0
movl H0,8(TMP)
bswap L0
movl L0,12(TMP)
xorl 52(KEY), H1
bswap H1
movl H1, 0(TMP)
xorl 48(KEY), L1
bswap L1
movl L1, 4(TMP)
addl $16, FRAME_DST
subl $16, FRAME_LENGTH
ja .Lblock_loop
.Lend:
addl $20, %esp
popl %edi
popl %esi
popl %ebp
popl %ebx
ret
EPILOGUE(_nettle_camellia_crypt)