|
Packit |
5c3484 |
dnl SPARC64 mpn_gcd_1.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for SPARC by Torbjörn
|
|
Packit |
5c3484 |
dnl Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/bit (approx)
|
|
Packit |
5c3484 |
C UltraSPARC 1&2: 5.1
|
|
Packit |
5c3484 |
C UltraSPARC 3: 5.0
|
|
Packit |
5c3484 |
C UltraSPARC T1: 11.4
|
|
Packit |
5c3484 |
C UltraSPARC T3: 10
|
|
Packit |
5c3484 |
C UltraSPARC T4: 6
|
|
Packit |
5c3484 |
C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
deflit(MAXSHIFT, 7)
|
|
Packit |
5c3484 |
deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
RODATA
|
|
Packit |
5c3484 |
TYPE(ctz_table,object)
|
|
Packit |
5c3484 |
ctz_table:
|
|
Packit |
5c3484 |
.byte MAXSHIFT
|
|
Packit |
5c3484 |
forloop(i,1,MASK,
|
|
Packit |
5c3484 |
` .byte m4_count_trailing_zeros(i)
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
SIZE(ctz_table,.-ctz_table)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Threshold of when to call bmod when U is one limb. Should be about
|
|
Packit |
5c3484 |
C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
|
|
Packit |
5c3484 |
define(`BMOD_THRES_LOG2', 14)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`up', `%i0')
|
|
Packit |
5c3484 |
define(`n', `%i1')
|
|
Packit |
5c3484 |
define(`v0', `%i2')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
REGISTER(%g2,#scratch)
|
|
Packit |
5c3484 |
REGISTER(%g3,#scratch)
|
|
Packit |
5c3484 |
PROLOGUE(mpn_gcd_1)
|
|
Packit |
5c3484 |
save %sp, -192, %sp
|
|
Packit |
5c3484 |
ldx [up+0], %g1 C U low limb
|
|
Packit |
5c3484 |
mov -1, %i4
|
|
Packit |
5c3484 |
or v0, %g1, %g2 C x | y
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(twos):
|
|
Packit |
5c3484 |
inc %i4
|
|
Packit |
5c3484 |
andcc %g2, 1, %g0
|
|
Packit |
5c3484 |
bz,a %xcc, L(twos)
|
|
Packit |
5c3484 |
srlx %g2, 1, %g2
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(divide_strip_y):
|
|
Packit |
5c3484 |
andcc v0, 1, %g0
|
|
Packit |
5c3484 |
bz,a %xcc, L(divide_strip_y)
|
|
Packit |
5c3484 |
srlx v0, 1, v0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cmp n, 1 C if n > 1 we need
|
|
Packit |
5c3484 |
bnz %xcc, L(bmod) C to call bmod_1
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Both U and V are single limbs, reduce with bmod if u0 >> v0.
|
|
Packit |
5c3484 |
srlx %g1, BMOD_THRES_LOG2, %g2
|
|
Packit |
5c3484 |
cmp %g2, v0
|
|
Packit |
5c3484 |
bleu %xcc, L(noreduce)
|
|
Packit |
5c3484 |
mov %g1, %o0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(bmod):
|
|
Packit |
5c3484 |
mov up, %o0
|
|
Packit |
5c3484 |
mov n, %o1
|
|
Packit |
5c3484 |
mov v0, %o2
|
|
Packit |
5c3484 |
call mpn_modexact_1c_odd
|
|
Packit |
5c3484 |
mov 0, %o3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(noreduce):
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
LEA64(ctz_table, i5, g4)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cmp %o0, 0
|
|
Packit |
5c3484 |
bnz %xcc, L(mid)
|
|
Packit |
5c3484 |
and %o0, MASK, %g3 C
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return %i7+8
|
|
Packit |
5c3484 |
sllx %o2, %o4, %o0 C CAUTION: v0 alias for o2
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
L(top): movcc %xcc, %l4, v0 C v = min(u,v)
|
|
Packit |
5c3484 |
movcc %xcc, %l2, %o0 C u = |v - u]
|
|
Packit |
5c3484 |
L(mid): ldub [%i5+%g3], %g5 C
|
|
Packit |
5c3484 |
brz,a,pn %g3, L(shift_alot) C
|
|
Packit |
5c3484 |
srlx %o0, MAXSHIFT, %o0
|
|
Packit |
5c3484 |
srlx %o0, %g5, %l4 C new u, odd
|
|
Packit |
5c3484 |
subcc v0, %l4, %l2 C v - u, set flags for branch and movcc
|
|
Packit |
5c3484 |
sub %l4, v0, %o0 C u - v
|
|
Packit |
5c3484 |
bnz,pt %xcc, L(top) C
|
|
Packit |
5c3484 |
and %l2, MASK, %g3 C extract low MAXSHIFT bits from (v-u)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return %i7+8
|
|
Packit |
5c3484 |
sllx %o2, %o4, %o0 C CAUTION: v0 alias for o2
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(shift_alot):
|
|
Packit |
5c3484 |
b L(mid)
|
|
Packit |
5c3484 |
and %o0, MASK, %g3 C
|
|
Packit |
5c3484 |
EPILOGUE()
|