|
Packit |
6c4009 |
/* Function sincosf vectorized with SSE2.
|
|
Packit |
6c4009 |
Copyright (C) 2014-2018 Free Software Foundation, Inc.
|
|
Packit |
6c4009 |
This file is part of the GNU C Library.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is free software; you can redistribute it and/or
|
|
Packit |
6c4009 |
modify it under the terms of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License as published by the Free Software Foundation; either
|
|
Packit |
6c4009 |
version 2.1 of the License, or (at your option) any later version.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
The GNU C Library is distributed in the hope that it will be useful,
|
|
Packit |
6c4009 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
6c4009 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
6c4009 |
Lesser General Public License for more details.
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
You should have received a copy of the GNU Lesser General Public
|
|
Packit |
6c4009 |
License along with the GNU C Library; if not, see
|
|
Packit |
6c4009 |
<http://www.gnu.org/licenses/>. */
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#include <sysdep.h>
|
|
Packit |
6c4009 |
#include "svml_s_wrapper_impl.h"
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
.text
|
|
Packit |
6c4009 |
ENTRY (_ZGVbN4vl4l4_sincosf)
|
|
Packit |
6c4009 |
WRAPPER_IMPL_SSE2_fFF sincosf
|
|
Packit |
6c4009 |
END (_ZGVbN4vl4l4_sincosf)
|
|
Packit |
6c4009 |
libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
/* SSE2 ISA version as wrapper to scalar (for vector
|
|
Packit |
6c4009 |
function declared with #pragma omp declare simd notinbranch). */
|
|
Packit |
6c4009 |
.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
|
|
Packit |
6c4009 |
#ifndef __ILP32__
|
|
Packit |
6c4009 |
subq $120, %rsp
|
|
Packit |
6c4009 |
cfi_adjust_cfa_offset(120)
|
|
Packit |
6c4009 |
movaps %xmm0, 96(%rsp)
|
|
Packit |
6c4009 |
lea (%rsp), %rdi
|
|
Packit |
6c4009 |
movdqa %xmm1, 32(%rdi)
|
|
Packit |
6c4009 |
lea 16(%rsp), %rsi
|
|
Packit |
6c4009 |
movdqa %xmm2, 32(%rsi)
|
|
Packit |
6c4009 |
movdqa %xmm3, 48(%rsi)
|
|
Packit |
6c4009 |
movdqa %xmm4, 64(%rsi)
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movss 100(%rsp), %xmm0
|
|
Packit |
6c4009 |
lea 4(%rsp), %rdi
|
|
Packit |
6c4009 |
lea 20(%rsp), %rsi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movss 104(%rsp), %xmm0
|
|
Packit |
6c4009 |
lea 8(%rsp), %rdi
|
|
Packit |
6c4009 |
lea 24(%rsp), %rsi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movss 108(%rsp), %xmm0
|
|
Packit |
6c4009 |
lea 12(%rsp), %rdi
|
|
Packit |
6c4009 |
lea 28(%rsp), %rsi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movq 32(%rsp), %rdx
|
|
Packit |
6c4009 |
movq 40(%rsp), %rsi
|
|
Packit |
6c4009 |
movq 48(%rsp), %r8
|
|
Packit |
6c4009 |
movq 56(%rsp), %r10
|
|
Packit |
6c4009 |
movl (%rsp), %eax
|
|
Packit |
6c4009 |
movl 4(%rsp), %ecx
|
|
Packit |
6c4009 |
movl 8(%rsp), %edi
|
|
Packit |
6c4009 |
movl 12(%rsp), %r9d
|
|
Packit |
6c4009 |
movl %eax, (%rdx)
|
|
Packit |
6c4009 |
movl %ecx, (%rsi)
|
|
Packit |
6c4009 |
movq 64(%rsp), %rax
|
|
Packit |
6c4009 |
movq 72(%rsp), %rcx
|
|
Packit |
6c4009 |
movl %edi, (%r8)
|
|
Packit |
6c4009 |
movl %r9d, (%r10)
|
|
Packit |
6c4009 |
movq 80(%rsp), %rdi
|
|
Packit |
6c4009 |
movq 88(%rsp), %r9
|
|
Packit |
6c4009 |
movl 16(%rsp), %r11d
|
|
Packit |
6c4009 |
movl 20(%rsp), %edx
|
|
Packit |
6c4009 |
movl 24(%rsp), %esi
|
|
Packit |
6c4009 |
movl 28(%rsp), %r8d
|
|
Packit |
6c4009 |
movl %r11d, (%rax)
|
|
Packit |
6c4009 |
movl %edx, (%rcx)
|
|
Packit |
6c4009 |
movl %esi, (%rdi)
|
|
Packit |
6c4009 |
movl %r8d, (%r9)
|
|
Packit |
6c4009 |
addq $120, %rsp
|
|
Packit |
6c4009 |
cfi_adjust_cfa_offset(-120)
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
#else
|
|
Packit |
6c4009 |
pushq %rbp
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 16
|
|
Packit |
6c4009 |
.cfi_offset 6, -16
|
|
Packit |
6c4009 |
pushq %rbx
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 24
|
|
Packit |
6c4009 |
.cfi_offset 3, -24
|
|
Packit |
6c4009 |
subl $88, %esp
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 112
|
|
Packit |
6c4009 |
leal 64(%rsp), %esi
|
|
Packit |
6c4009 |
movaps %xmm1, (%esp)
|
|
Packit |
6c4009 |
leal 48(%rsp), %edi
|
|
Packit |
6c4009 |
movaps %xmm2, 16(%esp)
|
|
Packit |
6c4009 |
movq %rsi, %rbp
|
|
Packit |
6c4009 |
movq %rdi, %rbx
|
|
Packit |
6c4009 |
movaps %xmm0, 32(%esp)
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movups 36(%esp), %xmm0
|
|
Packit |
6c4009 |
leal 4(%rbp), %esi
|
|
Packit |
6c4009 |
leal 4(%rbx), %edi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movups 40(%esp), %xmm0
|
|
Packit |
6c4009 |
leal 8(%rbp), %esi
|
|
Packit |
6c4009 |
leal 8(%rbx), %edi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movups 44(%esp), %xmm0
|
|
Packit |
6c4009 |
leal 12(%rbp), %esi
|
|
Packit |
6c4009 |
leal 12(%rbx), %edi
|
|
Packit |
6c4009 |
call JUMPTARGET(\callee)
|
|
Packit |
6c4009 |
movq (%esp), %rax
|
|
Packit |
6c4009 |
movss 48(%esp), %xmm0
|
|
Packit |
6c4009 |
movdqa (%esp), %xmm4
|
|
Packit |
6c4009 |
movdqa 16(%esp), %xmm7
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movss 52(%esp), %xmm0
|
|
Packit |
6c4009 |
pextrd $1, %xmm4, %eax
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movq 8(%esp), %rax
|
|
Packit |
6c4009 |
movss 56(%esp), %xmm0
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movss 60(%esp), %xmm0
|
|
Packit |
6c4009 |
pextrd $3, %xmm4, %eax
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movq 16(%esp), %rax
|
|
Packit |
6c4009 |
movss 64(%esp), %xmm0
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movss 68(%esp), %xmm0
|
|
Packit |
6c4009 |
pextrd $1, %xmm7, %eax
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movq 24(%esp), %rax
|
|
Packit |
6c4009 |
movss 72(%esp), %xmm0
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
movss 76(%esp), %xmm0
|
|
Packit |
6c4009 |
pextrd $3, %xmm7, %eax
|
|
Packit |
6c4009 |
movss %xmm0, (%eax)
|
|
Packit |
6c4009 |
addl $88, %esp
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 24
|
|
Packit |
6c4009 |
popq %rbx
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 16
|
|
Packit |
6c4009 |
popq %rbp
|
|
Packit |
6c4009 |
.cfi_def_cfa_offset 8
|
|
Packit |
6c4009 |
ret
|
|
Packit |
6c4009 |
#endif
|
|
Packit |
6c4009 |
.endm
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
ENTRY (_ZGVbN4vvv_sincosf)
|
|
Packit |
6c4009 |
WRAPPER_IMPL_SSE2_fFF_vvv sincosf
|
|
Packit |
6c4009 |
END (_ZGVbN4vvv_sincosf)
|
|
Packit |
6c4009 |
|
|
Packit |
6c4009 |
#ifndef USE_MULTIARCH
|
|
Packit |
6c4009 |
libmvec_hidden_def (_ZGVbN4vvv_sincosf)
|
|
Packit |
6c4009 |
#endif
|