|
Packit |
5c3484 |
dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb best
|
|
Packit |
5c3484 |
C AMD K8,K9 18
|
|
Packit |
5c3484 |
C AMD K10 18
|
|
Packit |
5c3484 |
C AMD bull
|
|
Packit |
5c3484 |
C AMD pile
|
|
Packit |
5c3484 |
C AMD bobcat
|
|
Packit |
5c3484 |
C AMD jaguar
|
|
Packit |
5c3484 |
C Intel P4 68
|
|
Packit |
5c3484 |
C Intel core 34
|
|
Packit |
5c3484 |
C Intel NHM 30.25
|
|
Packit |
5c3484 |
C Intel SBR 21.3
|
|
Packit |
5c3484 |
C Intel IBR 21.4
|
|
Packit |
5c3484 |
C Intel HWL 20.6
|
|
Packit |
5c3484 |
C Intel BWL
|
|
Packit |
5c3484 |
C Intel atom 73
|
|
Packit |
5c3484 |
C VIA nano 33
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`qp', `%rdi')
|
|
Packit |
5c3484 |
define(`fn', `%rsi')
|
|
Packit |
5c3484 |
define(`up_param', `%rdx')
|
|
Packit |
5c3484 |
define(`un_param', `%rcx')
|
|
Packit |
5c3484 |
define(`dp', `%r8')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ABI_SUPPORT(DOS64)
|
|
Packit |
5c3484 |
ABI_SUPPORT(STD64)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
TEXT
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
PROLOGUE(mpn_divrem_2)
|
|
Packit |
5c3484 |
FUNC_ENTRY(4)
|
|
Packit |
5c3484 |
IFDOS(` mov 56(%rsp), %r8 ')
|
|
Packit |
5c3484 |
push %r15
|
|
Packit |
5c3484 |
push %r14
|
|
Packit |
5c3484 |
push %r13
|
|
Packit |
5c3484 |
push %r12
|
|
Packit |
5c3484 |
lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1]
|
|
Packit |
5c3484 |
mov %rsi, %r13
|
|
Packit |
5c3484 |
push %rbp
|
|
Packit |
5c3484 |
mov %rdi, %rbp
|
|
Packit |
5c3484 |
push %rbx
|
|
Packit |
5c3484 |
mov 8(%r8), %r11 C d1
|
|
Packit |
5c3484 |
mov 16(%r12), %rbx
|
|
Packit |
5c3484 |
mov (%r8), %r8 C d0
|
|
Packit |
5c3484 |
mov 8(%r12), %r10
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
xor R32(%r15), R32(%r15)
|
|
Packit |
5c3484 |
cmp %rbx, %r11
|
|
Packit |
5c3484 |
ja L(2)
|
|
Packit |
5c3484 |
setb %dl
|
|
Packit |
5c3484 |
cmp %r10, %r8
|
|
Packit |
5c3484 |
setbe %al
|
|
Packit |
5c3484 |
orb %al, %dl C "orb" form to placate Sun tools
|
|
Packit |
5c3484 |
je L(2)
|
|
Packit |
5c3484 |
inc R32(%r15)
|
|
Packit |
5c3484 |
sub %r8, %r10
|
|
Packit |
5c3484 |
sbb %r11, %rbx
|
|
Packit |
5c3484 |
L(2):
|
|
Packit |
5c3484 |
lea -3(%rcx,%r13), %r14 C un + fn - 3
|
|
Packit |
5c3484 |
test %r14, %r14
|
|
Packit |
5c3484 |
js L(end)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
push %r8
|
|
Packit |
5c3484 |
push %r10
|
|
Packit |
5c3484 |
push %r11
|
|
Packit |
5c3484 |
IFSTD(` mov %r11, %rdi ')
|
|
Packit |
5c3484 |
IFDOS(` mov %r11, %rcx ')
|
|
Packit |
5c3484 |
ASSERT(nz, `test $15, %rsp')
|
|
Packit |
5c3484 |
CALL( mpn_invert_limb)
|
|
Packit |
5c3484 |
pop %r11
|
|
Packit |
5c3484 |
pop %r10
|
|
Packit |
5c3484 |
pop %r8
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mov %r11, %rdx
|
|
Packit |
5c3484 |
mov %rax, %rdi
|
|
Packit |
5c3484 |
imul %rax, %rdx
|
|
Packit |
5c3484 |
mov %rdx, %r9
|
|
Packit |
5c3484 |
mul %r8
|
|
Packit |
5c3484 |
xor R32(%rcx), R32(%rcx)
|
|
Packit |
5c3484 |
add %r8, %r9
|
|
Packit |
5c3484 |
adc $-1, %rcx
|
|
Packit |
5c3484 |
add %rdx, %r9
|
|
Packit |
5c3484 |
adc $0, %rcx
|
|
Packit |
5c3484 |
js 2f
|
|
Packit |
5c3484 |
1: dec %rdi
|
|
Packit |
5c3484 |
sub %r11, %r9
|
|
Packit |
5c3484 |
sbb $0, %rcx
|
|
Packit |
5c3484 |
jns 1b
|
|
Packit |
5c3484 |
2:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
lea (%rbp,%r14,8), %rbp
|
|
Packit |
5c3484 |
mov %r11, %rsi
|
|
Packit |
5c3484 |
neg %rsi C -d1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
|
|
Packit |
5c3484 |
C n2 un -d1 dinv qp d0 q0 d1 up fn msl
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
L(top): mov %rdi, %rax C di ncp
|
|
Packit |
5c3484 |
mul %rbx C 0, 17
|
|
Packit |
5c3484 |
mov %r10, %rcx C
|
|
Packit |
5c3484 |
add %rax, %rcx C 4
|
|
Packit |
5c3484 |
adc %rbx, %rdx C 5
|
|
Packit |
5c3484 |
mov %rdx, %r9 C q 6
|
|
Packit |
5c3484 |
imul %rsi, %rdx C 6
|
|
Packit |
5c3484 |
mov %r8, %rax C ncp
|
|
Packit |
5c3484 |
lea (%rdx, %r10), %rbx C n1 -= ... 10
|
|
Packit |
5c3484 |
xor R32(%r10), R32(%r10) C
|
|
Packit |
5c3484 |
mul %r9 C 7
|
|
Packit |
5c3484 |
cmp %r14, %r13 C
|
|
Packit |
5c3484 |
jg L(19) C
|
|
Packit |
5c3484 |
mov (%r12), %r10 C
|
|
Packit |
5c3484 |
sub $8, %r12 C
|
|
Packit |
5c3484 |
L(19): sub %r8, %r10 C ncp
|
|
Packit |
5c3484 |
sbb %r11, %rbx C 11
|
|
Packit |
5c3484 |
sub %rax, %r10 C 11
|
|
Packit |
5c3484 |
sbb %rdx, %rbx C 12
|
|
Packit |
5c3484 |
xor R32(%rax), R32(%rax) C
|
|
Packit |
5c3484 |
xor R32(%rdx), R32(%rdx) C
|
|
Packit |
5c3484 |
cmp %rcx, %rbx C 13
|
|
Packit |
5c3484 |
cmovnc %r8, %rax C 14
|
|
Packit |
5c3484 |
cmovnc %r11, %rdx C 14
|
|
Packit |
5c3484 |
adc $0, %r9 C adjust q 14
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
add %rax, %r10 C 15
|
|
Packit |
5c3484 |
adc %rdx, %rbx C 16
|
|
Packit |
5c3484 |
cmp %r11, %rbx C
|
|
Packit |
5c3484 |
jae L(fix) C
|
|
Packit |
5c3484 |
L(bck): mov %r9, (%rbp) C
|
|
Packit |
5c3484 |
sub $8, %rbp C
|
|
Packit |
5c3484 |
dec %r14
|
|
Packit |
5c3484 |
jns L(top)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end): mov %r10, 8(%r12)
|
|
Packit |
5c3484 |
mov %rbx, 16(%r12)
|
|
Packit |
5c3484 |
pop %rbx
|
|
Packit |
5c3484 |
pop %rbp
|
|
Packit |
5c3484 |
pop %r12
|
|
Packit |
5c3484 |
pop %r13
|
|
Packit |
5c3484 |
pop %r14
|
|
Packit |
5c3484 |
mov %r15, %rax
|
|
Packit |
5c3484 |
pop %r15
|
|
Packit |
5c3484 |
FUNC_EXIT()
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(fix): seta %dl
|
|
Packit |
5c3484 |
cmp %r8, %r10
|
|
Packit |
5c3484 |
setae %al
|
|
Packit |
5c3484 |
orb %dl, %al C "orb" form to placate Sun tools
|
|
Packit |
5c3484 |
je L(bck)
|
|
Packit |
5c3484 |
inc %r9
|
|
Packit |
5c3484 |
sub %r8, %r10
|
|
Packit |
5c3484 |
sbb %r11, %rbx
|
|
Packit |
5c3484 |
jmp L(bck)
|
|
Packit |
5c3484 |
EPILOGUE()
|