|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
Packit |
40b132 |
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
Packit |
40b132 |
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.data
|
|
Packit |
40b132 |
.align 4
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
# -1 means to call s_mpi_is_sse to determine if we support sse
|
|
Packit |
40b132 |
# instructions.
|
|
Packit |
40b132 |
# 0 means to use x86 instructions
|
|
Packit |
40b132 |
# 1 means to use sse2 instructions
|
|
Packit |
40b132 |
.type is_sse,@object
|
|
Packit |
40b132 |
.size is_sse,4
|
|
Packit |
40b132 |
is_sse: .long -1
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
# sigh, handle the difference between -fPIC and not PIC
|
|
Packit |
40b132 |
# default to pic, since this file seems to be exclusively
|
|
Packit |
40b132 |
# linux right now (solaris uses mpi_i86pc.s and windows uses
|
|
Packit |
40b132 |
# mpi_x86_asm.c)
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
.ifndef NO_PIC
|
|
Packit |
40b132 |
.macro GET var,reg
|
|
Packit |
40b132 |
movl \var@GOTOFF(%ebx),\reg
|
|
Packit |
40b132 |
.endm
|
|
Packit |
40b132 |
.macro PUT reg,var
|
|
Packit |
40b132 |
movl \reg,\var@GOTOFF(%ebx)
|
|
Packit |
40b132 |
.endm
|
|
Packit |
40b132 |
.else
|
|
Packit |
40b132 |
.macro GET var,reg
|
|
Packit |
40b132 |
movl \var,\reg
|
|
Packit |
40b132 |
.endm
|
|
Packit |
40b132 |
.macro PUT reg,var
|
|
Packit |
40b132 |
movl \reg,\var
|
|
Packit |
40b132 |
.endm
|
|
Packit |
40b132 |
.endif
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.text
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# ebp - 36: caller's esi
|
|
Packit |
40b132 |
# ebp - 32: caller's edi
|
|
Packit |
40b132 |
# ebp - 28:
|
|
Packit |
40b132 |
# ebp - 24:
|
|
Packit |
40b132 |
# ebp - 20:
|
|
Packit |
40b132 |
# ebp - 16:
|
|
Packit |
40b132 |
# ebp - 12:
|
|
Packit |
40b132 |
# ebp - 8:
|
|
Packit |
40b132 |
# ebp - 4:
|
|
Packit |
40b132 |
# ebp + 0: caller's ebp
|
|
Packit |
40b132 |
# ebp + 4: return address
|
|
Packit |
40b132 |
# ebp + 8: a argument
|
|
Packit |
40b132 |
# ebp + 12: a_len argument
|
|
Packit |
40b132 |
# ebp + 16: b argument
|
|
Packit |
40b132 |
# ebp + 20: c argument
|
|
Packit |
40b132 |
# registers:
|
|
Packit |
40b132 |
# eax:
|
|
Packit |
40b132 |
# ebx: carry
|
|
Packit |
40b132 |
# ecx: a_len
|
|
Packit |
40b132 |
# edx:
|
|
Packit |
40b132 |
# esi: a ptr
|
|
Packit |
40b132 |
# edi: c ptr
|
|
Packit |
40b132 |
.globl s_mpv_mul_d
|
|
Packit |
40b132 |
.type s_mpv_mul_d,@function
|
|
Packit |
40b132 |
s_mpv_mul_d:
|
|
Packit |
40b132 |
GET is_sse,%eax
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
je s_mpv_mul_d_x86
|
|
Packit |
40b132 |
jg s_mpv_mul_d_sse2
|
|
Packit |
40b132 |
call s_mpi_is_sse2
|
|
Packit |
40b132 |
PUT %eax,is_sse
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
jg s_mpv_mul_d_sse2
|
|
Packit |
40b132 |
s_mpv_mul_d_x86:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
sub $28,%esp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
movl $0,%ebx # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 2f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
1:
|
|
Packit |
40b132 |
lodsl # eax = [ds:esi]; esi += 4
|
|
Packit |
40b132 |
mov 16(%ebp),%edx # edx = b
|
|
Packit |
40b132 |
mull %edx # edx:eax = Phi:Plo = a_i * b
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
add %ebx,%eax # add carry (%ebx) to edx:eax
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov %edx,%ebx # high half of product becomes next carry
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 1b # jmp if a_len != 0
|
|
Packit |
40b132 |
2:
|
|
Packit |
40b132 |
mov %ebx,0(%edi) # *c = carry
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
s_mpv_mul_d_sse2:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
psubq %mm2,%mm2 # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
movd 16(%ebp),%mm1 # mm1 = b
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 6f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
5:
|
|
Packit |
40b132 |
movd 0(%esi),%mm0 # mm0 = *a++
|
|
Packit |
40b132 |
add $4,%esi
|
|
Packit |
40b132 |
pmuludq %mm1,%mm0 # mm0 = b * *a++
|
|
Packit |
40b132 |
paddq %mm0,%mm2 # add the carry
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # store the 32bit result
|
|
Packit |
40b132 |
add $4,%edi
|
|
Packit |
40b132 |
psrlq $32, %mm2 # save the carry
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 5b # jmp if a_len != 0
|
|
Packit |
40b132 |
6:
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # *c = carry
|
|
Packit |
40b132 |
emms
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# ebp - 36: caller's esi
|
|
Packit |
40b132 |
# ebp - 32: caller's edi
|
|
Packit |
40b132 |
# ebp - 28:
|
|
Packit |
40b132 |
# ebp - 24:
|
|
Packit |
40b132 |
# ebp - 20:
|
|
Packit |
40b132 |
# ebp - 16:
|
|
Packit |
40b132 |
# ebp - 12:
|
|
Packit |
40b132 |
# ebp - 8:
|
|
Packit |
40b132 |
# ebp - 4:
|
|
Packit |
40b132 |
# ebp + 0: caller's ebp
|
|
Packit |
40b132 |
# ebp + 4: return address
|
|
Packit |
40b132 |
# ebp + 8: a argument
|
|
Packit |
40b132 |
# ebp + 12: a_len argument
|
|
Packit |
40b132 |
# ebp + 16: b argument
|
|
Packit |
40b132 |
# ebp + 20: c argument
|
|
Packit |
40b132 |
# registers:
|
|
Packit |
40b132 |
# eax:
|
|
Packit |
40b132 |
# ebx: carry
|
|
Packit |
40b132 |
# ecx: a_len
|
|
Packit |
40b132 |
# edx:
|
|
Packit |
40b132 |
# esi: a ptr
|
|
Packit |
40b132 |
# edi: c ptr
|
|
Packit |
40b132 |
.globl s_mpv_mul_d_add
|
|
Packit |
40b132 |
.type s_mpv_mul_d_add,@function
|
|
Packit |
40b132 |
s_mpv_mul_d_add:
|
|
Packit |
40b132 |
GET is_sse,%eax
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
je s_mpv_mul_d_add_x86
|
|
Packit |
40b132 |
jg s_mpv_mul_d_add_sse2
|
|
Packit |
40b132 |
call s_mpi_is_sse2
|
|
Packit |
40b132 |
PUT %eax,is_sse
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
jg s_mpv_mul_d_add_sse2
|
|
Packit |
40b132 |
s_mpv_mul_d_add_x86:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
sub $28,%esp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
movl $0,%ebx # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 11f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
10:
|
|
Packit |
40b132 |
lodsl # eax = [ds:esi]; esi += 4
|
|
Packit |
40b132 |
mov 16(%ebp),%edx # edx = b
|
|
Packit |
40b132 |
mull %edx # edx:eax = Phi:Plo = a_i * b
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
add %ebx,%eax # add carry (%ebx) to edx:eax
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov 0(%edi),%ebx # add in current word from *c
|
|
Packit |
40b132 |
add %ebx,%eax
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov %edx,%ebx # high half of product becomes next carry
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 10b # jmp if a_len != 0
|
|
Packit |
40b132 |
11:
|
|
Packit |
40b132 |
mov %ebx,0(%edi) # *c = carry
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
s_mpv_mul_d_add_sse2:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
psubq %mm2,%mm2 # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
movd 16(%ebp),%mm1 # mm1 = b
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 16f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
15:
|
|
Packit |
40b132 |
movd 0(%esi),%mm0 # mm0 = *a++
|
|
Packit |
40b132 |
add $4,%esi
|
|
Packit |
40b132 |
pmuludq %mm1,%mm0 # mm0 = b * *a++
|
|
Packit |
40b132 |
paddq %mm0,%mm2 # add the carry
|
|
Packit |
40b132 |
movd 0(%edi),%mm0
|
|
Packit |
40b132 |
paddq %mm0,%mm2 # add the carry
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # store the 32bit result
|
|
Packit |
40b132 |
add $4,%edi
|
|
Packit |
40b132 |
psrlq $32, %mm2 # save the carry
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 15b # jmp if a_len != 0
|
|
Packit |
40b132 |
16:
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # *c = carry
|
|
Packit |
40b132 |
emms
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# ebp - 8: caller's esi
|
|
Packit |
40b132 |
# ebp - 4: caller's edi
|
|
Packit |
40b132 |
# ebp + 0: caller's ebp
|
|
Packit |
40b132 |
# ebp + 4: return address
|
|
Packit |
40b132 |
# ebp + 8: a argument
|
|
Packit |
40b132 |
# ebp + 12: a_len argument
|
|
Packit |
40b132 |
# ebp + 16: b argument
|
|
Packit |
40b132 |
# ebp + 20: c argument
|
|
Packit |
40b132 |
# registers:
|
|
Packit |
40b132 |
# eax:
|
|
Packit |
40b132 |
# ebx: carry
|
|
Packit |
40b132 |
# ecx: a_len
|
|
Packit |
40b132 |
# edx:
|
|
Packit |
40b132 |
# esi: a ptr
|
|
Packit |
40b132 |
# edi: c ptr
|
|
Packit |
40b132 |
.globl s_mpv_mul_d_add_prop
|
|
Packit |
40b132 |
.type s_mpv_mul_d_add_prop,@function
|
|
Packit |
40b132 |
s_mpv_mul_d_add_prop:
|
|
Packit |
40b132 |
GET is_sse,%eax
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
je s_mpv_mul_d_add_prop_x86
|
|
Packit |
40b132 |
jg s_mpv_mul_d_add_prop_sse2
|
|
Packit |
40b132 |
call s_mpi_is_sse2
|
|
Packit |
40b132 |
PUT %eax,is_sse
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
jg s_mpv_mul_d_add_prop_sse2
|
|
Packit |
40b132 |
s_mpv_mul_d_add_prop_x86:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
sub $28,%esp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
movl $0,%ebx # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 21f # jmp if a_len == 0
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
20:
|
|
Packit |
40b132 |
lodsl # eax = [ds:esi]; esi += 4
|
|
Packit |
40b132 |
mov 16(%ebp),%edx # edx = b
|
|
Packit |
40b132 |
mull %edx # edx:eax = Phi:Plo = a_i * b
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
add %ebx,%eax # add carry (%ebx) to edx:eax
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov 0(%edi),%ebx # add in current word from *c
|
|
Packit |
40b132 |
add %ebx,%eax
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov %edx,%ebx # high half of product becomes next carry
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 20b # jmp if a_len != 0
|
|
Packit |
40b132 |
21:
|
|
Packit |
40b132 |
cmp $0,%ebx # is carry zero?
|
|
Packit |
40b132 |
jz 23f
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
add %ebx,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jnc 23f
|
|
Packit |
40b132 |
22:
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
adc $0,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jc 22b
|
|
Packit |
40b132 |
23:
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
s_mpv_mul_d_add_prop_sse2:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
psubq %mm2,%mm2 # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
movd 16(%ebp),%mm1 # mm1 = b
|
|
Packit |
40b132 |
mov 20(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 26f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
25:
|
|
Packit |
40b132 |
movd 0(%esi),%mm0 # mm0 = *a++
|
|
Packit |
40b132 |
movd 0(%edi),%mm3 # fetch the sum
|
|
Packit |
40b132 |
add $4,%esi
|
|
Packit |
40b132 |
pmuludq %mm1,%mm0 # mm0 = b * *a++
|
|
Packit |
40b132 |
paddq %mm0,%mm2 # add the carry
|
|
Packit |
40b132 |
paddq %mm3,%mm2 # add *c++
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # store the 32bit result
|
|
Packit |
40b132 |
add $4,%edi
|
|
Packit |
40b132 |
psrlq $32, %mm2 # save the carry
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 25b # jmp if a_len != 0
|
|
Packit |
40b132 |
26:
|
|
Packit |
40b132 |
movd %mm2,%ebx
|
|
Packit |
40b132 |
cmp $0,%ebx # is carry zero?
|
|
Packit |
40b132 |
jz 28f
|
|
Packit |
40b132 |
mov 0(%edi),%eax
|
|
Packit |
40b132 |
add %ebx, %eax
|
|
Packit |
40b132 |
stosl
|
|
Packit |
40b132 |
jnc 28f
|
|
Packit |
40b132 |
27:
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
adc $0,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jc 27b
|
|
Packit |
40b132 |
28:
|
|
Packit |
40b132 |
emms
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# ebp - 20: caller's esi
|
|
Packit |
40b132 |
# ebp - 16: caller's edi
|
|
Packit |
40b132 |
# ebp - 12:
|
|
Packit |
40b132 |
# ebp - 8: carry
|
|
Packit |
40b132 |
# ebp - 4: a_len local
|
|
Packit |
40b132 |
# ebp + 0: caller's ebp
|
|
Packit |
40b132 |
# ebp + 4: return address
|
|
Packit |
40b132 |
# ebp + 8: pa argument
|
|
Packit |
40b132 |
# ebp + 12: a_len argument
|
|
Packit |
40b132 |
# ebp + 16: ps argument
|
|
Packit |
40b132 |
# ebp + 20:
|
|
Packit |
40b132 |
# registers:
|
|
Packit |
40b132 |
# eax:
|
|
Packit |
40b132 |
# ebx: carry
|
|
Packit |
40b132 |
# ecx: a_len
|
|
Packit |
40b132 |
# edx:
|
|
Packit |
40b132 |
# esi: a ptr
|
|
Packit |
40b132 |
# edi: c ptr
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.globl s_mpv_sqr_add_prop
|
|
Packit |
40b132 |
.type s_mpv_sqr_add_prop,@function
|
|
Packit |
40b132 |
s_mpv_sqr_add_prop:
|
|
Packit |
40b132 |
GET is_sse,%eax
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
je s_mpv_sqr_add_prop_x86
|
|
Packit |
40b132 |
jg s_mpv_sqr_add_prop_sse2
|
|
Packit |
40b132 |
call s_mpi_is_sse2
|
|
Packit |
40b132 |
PUT %eax,is_sse
|
|
Packit |
40b132 |
cmp $0,%eax
|
|
Packit |
40b132 |
jg s_mpv_sqr_add_prop_sse2
|
|
Packit |
40b132 |
s_mpv_sqr_add_prop_x86:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
sub $12,%esp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
movl $0,%ebx # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # a_len
|
|
Packit |
40b132 |
mov 16(%ebp),%edi # edi = ps
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 31f # jump if a_len == 0
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = pa
|
|
Packit |
40b132 |
30:
|
|
Packit |
40b132 |
lodsl # %eax = [ds:si]; si += 4;
|
|
Packit |
40b132 |
mull %eax
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
add %ebx,%eax # add "carry"
|
|
Packit |
40b132 |
adc $0,%edx
|
|
Packit |
40b132 |
mov 0(%edi),%ebx
|
|
Packit |
40b132 |
add %ebx,%eax # add low word from result
|
|
Packit |
40b132 |
mov 4(%edi),%ebx
|
|
Packit |
40b132 |
stosl # [es:di] = %eax; di += 4;
|
|
Packit |
40b132 |
adc %ebx,%edx # add high word from result
|
|
Packit |
40b132 |
movl $0,%ebx
|
|
Packit |
40b132 |
mov %edx,%eax
|
|
Packit |
40b132 |
adc $0,%ebx
|
|
Packit |
40b132 |
stosl # [es:di] = %eax; di += 4;
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 30b # jmp if a_len != 0
|
|
Packit |
40b132 |
31:
|
|
Packit |
40b132 |
cmp $0,%ebx # is carry zero?
|
|
Packit |
40b132 |
jz 34f
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
add %ebx,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jnc 34f
|
|
Packit |
40b132 |
32:
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
adc $0,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jc 32b
|
|
Packit |
40b132 |
34:
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
s_mpv_sqr_add_prop_sse2:
|
|
Packit |
40b132 |
push %ebp
|
|
Packit |
40b132 |
mov %esp,%ebp
|
|
Packit |
40b132 |
push %edi
|
|
Packit |
40b132 |
push %esi
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
psubq %mm2,%mm2 # carry = 0
|
|
Packit |
40b132 |
mov 12(%ebp),%ecx # ecx = a_len
|
|
Packit |
40b132 |
mov 16(%ebp),%edi
|
|
Packit |
40b132 |
cmp $0,%ecx
|
|
Packit |
40b132 |
je 36f # jmp if a_len == 0
|
|
Packit |
40b132 |
mov 8(%ebp),%esi # esi = a
|
|
Packit |
40b132 |
cld
|
|
Packit |
40b132 |
35:
|
|
Packit |
40b132 |
movd 0(%esi),%mm0 # mm0 = *a
|
|
Packit |
40b132 |
movd 0(%edi),%mm3 # fetch the sum
|
|
Packit |
40b132 |
add $4,%esi
|
|
Packit |
40b132 |
pmuludq %mm0,%mm0 # mm0 = sqr(a)
|
|
Packit |
40b132 |
paddq %mm0,%mm2 # add the carry
|
|
Packit |
40b132 |
paddq %mm3,%mm2 # add the low word
|
|
Packit |
40b132 |
movd 4(%edi),%mm3
|
|
Packit |
40b132 |
movd %mm2,0(%edi) # store the 32bit result
|
|
Packit |
40b132 |
psrlq $32, %mm2
|
|
Packit |
40b132 |
paddq %mm3,%mm2 # add the high word
|
|
Packit |
40b132 |
movd %mm2,4(%edi) # store the 32bit result
|
|
Packit |
40b132 |
psrlq $32, %mm2 # save the carry.
|
|
Packit |
40b132 |
add $8,%edi
|
|
Packit |
40b132 |
dec %ecx # --a_len
|
|
Packit |
40b132 |
jnz 35b # jmp if a_len != 0
|
|
Packit |
40b132 |
36:
|
|
Packit |
40b132 |
movd %mm2,%ebx
|
|
Packit |
40b132 |
cmp $0,%ebx # is carry zero?
|
|
Packit |
40b132 |
jz 38f
|
|
Packit |
40b132 |
mov 0(%edi),%eax
|
|
Packit |
40b132 |
add %ebx, %eax
|
|
Packit |
40b132 |
stosl
|
|
Packit |
40b132 |
jnc 38f
|
|
Packit |
40b132 |
37:
|
|
Packit |
40b132 |
mov 0(%edi),%eax # add in current word from *c
|
|
Packit |
40b132 |
adc $0,%eax
|
|
Packit |
40b132 |
stosl # [es:edi] = ax; edi += 4;
|
|
Packit |
40b132 |
jc 37b
|
|
Packit |
40b132 |
38:
|
|
Packit |
40b132 |
emms
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
pop %esi
|
|
Packit |
40b132 |
pop %edi
|
|
Packit |
40b132 |
leave
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
# Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
|
|
Packit |
40b132 |
# so its high bit is 1. This code is from NSPR.
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
# mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
|
|
Packit |
40b132 |
# mp_digit *qp, mp_digit *rp)
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# esp + 0: Caller's ebx
|
|
Packit |
40b132 |
# esp + 4: return address
|
|
Packit |
40b132 |
# esp + 8: Nhi argument
|
|
Packit |
40b132 |
# esp + 12: Nlo argument
|
|
Packit |
40b132 |
# esp + 16: divisor argument
|
|
Packit |
40b132 |
# esp + 20: qp argument
|
|
Packit |
40b132 |
# esp + 24: rp argument
|
|
Packit |
40b132 |
# registers:
|
|
Packit |
40b132 |
# eax:
|
|
Packit |
40b132 |
# ebx: carry
|
|
Packit |
40b132 |
# ecx: a_len
|
|
Packit |
40b132 |
# edx:
|
|
Packit |
40b132 |
# esi: a ptr
|
|
Packit |
40b132 |
# edi: c ptr
|
|
Packit |
40b132 |
#
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
.globl s_mpv_div_2dx1d
|
|
Packit |
40b132 |
.type s_mpv_div_2dx1d,@function
|
|
Packit |
40b132 |
s_mpv_div_2dx1d:
|
|
Packit |
40b132 |
push %ebx
|
|
Packit |
40b132 |
mov 8(%esp),%edx
|
|
Packit |
40b132 |
mov 12(%esp),%eax
|
|
Packit |
40b132 |
mov 16(%esp),%ebx
|
|
Packit |
40b132 |
div %ebx
|
|
Packit |
40b132 |
mov 20(%esp),%ebx
|
|
Packit |
40b132 |
mov %eax,0(%ebx)
|
|
Packit |
40b132 |
mov 24(%esp),%ebx
|
|
Packit |
40b132 |
mov %edx,0(%ebx)
|
|
Packit |
40b132 |
xor %eax,%eax # return zero
|
|
Packit |
40b132 |
pop %ebx
|
|
Packit |
40b132 |
ret
|
|
Packit |
40b132 |
nop
|
|
Packit |
40b132 |
|
|
Packit |
40b132 |
# Magic indicating no need for an executable stack
|
|
Packit |
40b132 |
.section .note.GNU-stack, "", @progbits
|
|
Packit |
40b132 |
.previous
|