|
Packit |
5c3484 |
dnl x86 mpn_copyi -- copy limb vector, incrementing.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 1999-2002 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb startup (approx)
|
|
Packit |
5c3484 |
C P5 1.0 35
|
|
Packit |
5c3484 |
C P6 0.75 45
|
|
Packit |
5c3484 |
C K6 1.0 30
|
|
Packit |
5c3484 |
C K7 1.3 65
|
|
Packit |
5c3484 |
C P4 1.0 120
|
|
Packit |
5c3484 |
C
|
|
Packit |
5c3484 |
C (Startup time includes some function call overheads.)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
|
|
Packit |
5c3484 |
C
|
|
Packit |
5c3484 |
C Copy src,size to dst,size, working from low to high addresses.
|
|
Packit |
5c3484 |
C
|
|
Packit |
5c3484 |
C The code here is very generic and can be expected to be reasonable on all
|
|
Packit |
5c3484 |
C the x86 family.
|
|
Packit |
5c3484 |
C
|
|
Packit |
5c3484 |
C P6 - An MMX based copy was tried, but was found to be slower than a rep
|
|
Packit |
5c3484 |
C movs in all cases. The fastest MMX found was 0.8 cycles/limb (when
|
|
Packit |
5c3484 |
C fully aligned). A rep movs seems to have a startup time of about 15
|
|
Packit |
5c3484 |
C cycles, but doing something special for small sizes could lead to a
|
|
Packit |
5c3484 |
C branch misprediction that would destroy any saving. For now a plain
|
|
Packit |
5c3484 |
C rep movs seems ok.
|
|
Packit |
5c3484 |
C
|
|
Packit |
5c3484 |
C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
|
|
Packit |
5c3484 |
C aligned or a 1.0 rep movs if not. But that seemed excessive since
|
|
Packit |
5c3484 |
C it only got an advantage half the time, and even then only showed it
|
|
Packit |
5c3484 |
C above 50 limbs or so.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
defframe(PARAM_SIZE,12)
|
|
Packit |
5c3484 |
defframe(PARAM_SRC, 8)
|
|
Packit |
5c3484 |
defframe(PARAM_DST, 4)
|
|
Packit |
5c3484 |
deflit(`FRAME',0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TEXT
|
|
Packit |
5c3484 |
ALIGN(32)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C eax saved esi
|
|
Packit |
5c3484 |
C ebx
|
|
Packit |
5c3484 |
C ecx counter
|
|
Packit |
5c3484 |
C edx saved edi
|
|
Packit |
5c3484 |
C esi src
|
|
Packit |
5c3484 |
C edi dst
|
|
Packit |
5c3484 |
C ebp
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
PROLOGUE(mpn_copyi)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
movl PARAM_SIZE, %ecx
|
|
Packit |
5c3484 |
movl %esi, %eax
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
movl PARAM_SRC, %esi
|
|
Packit |
5c3484 |
movl %edi, %edx
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
movl PARAM_DST, %edi
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cld C better safe than sorry, see mpn/x86/README
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
rep
|
|
Packit |
5c3484 |
movsl
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
movl %eax, %esi
|
|
Packit |
5c3484 |
movl %edx, %edi
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
EPILOGUE()
|