Blame sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S

Packit Service 82fcde
/* Function cosf vectorized with AVX-512. KNL and SKX versions.
Packit Service 82fcde
   Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
#include "svml_s_trig_data.h"
Packit Service 82fcde
#include "svml_s_wrapper_impl.h"
Packit Service 82fcde
Packit Service 82fcde
	.text
Packit Service 82fcde
ENTRY (_ZGVeN16v_cosf_knl)
Packit Service 82fcde
#ifndef HAVE_AVX512DQ_ASM_SUPPORT
Packit Service 82fcde
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
Packit Service 82fcde
#else
Packit Service 82fcde
/*
Packit Service 82fcde
  ALGORITHM DESCRIPTION:
Packit Service 82fcde
Packit Service 82fcde
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit Service 82fcde
     a) We remove sign using AND operation
Packit Service 82fcde
     b) Add Pi/2 value to argument X for Cos to Sin transformation
Packit Service 82fcde
     c) Getting octant Y by 1/Pi multiplication
Packit Service 82fcde
     d) Add "Right Shifter" value
Packit Service 82fcde
     e) Treat obtained value as integer for destination sign setting.
Packit Service 82fcde
        Shift first bit of this value to the last (sign) position
Packit Service 82fcde
     f) Subtract "Right Shifter"  value
Packit Service 82fcde
     g) Subtract 0.5 from result for octant correction
Packit Service 82fcde
     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit Service 82fcde
        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
Packit Service 82fcde
  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
Packit Service 82fcde
     a) Calculate X^2 = X * X
Packit Service 82fcde
     b) Calculate polynomial:
Packit Service 82fcde
        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
Packit Service 82fcde
  3) Destination sign setting
Packit Service 82fcde
     a) Set shifted destination sign using XOR operation:
Packit Service 82fcde
        R = XOR( R, S );
Packit Service 82fcde
 */
Packit Service 82fcde
        pushq     %rbp
Packit Service 82fcde
        cfi_adjust_cfa_offset (8)
Packit Service 82fcde
        cfi_rel_offset (%rbp, 0)
Packit Service 82fcde
        movq      %rsp, %rbp
Packit Service 82fcde
        cfi_def_cfa_register (%rbp)
Packit Service 82fcde
        andq      $-64, %rsp
Packit Service 82fcde
        subq      $1280, %rsp
Packit Service 82fcde
        movq      __svml_s_trig_data@GOTPCREL(%rip), %rdx
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit Service 82fcde
  X = X - Y*PI1 - Y*PI2 - Y*PI3
Packit Service 82fcde
 */
Packit Service 82fcde
        vmovaps   %zmm0, %zmm6
Packit Service 82fcde
        movl      $-1, %eax
Packit Service 82fcde
Packit Service 82fcde
/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
Packit Service 82fcde
        vaddps    __sHalfPI(%rdx), %zmm0, %zmm2
Packit Service 82fcde
        vmovups   __sRShifter(%rdx), %zmm3
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit Service 82fcde
  c) Getting octant Y by 1/Pi multiplication
Packit Service 82fcde
  d) Add "Right Shifter" (0x4B000000) value
Packit Service 82fcde
 */
Packit Service 82fcde
        vfmadd132ps __sInvPI(%rdx), %zmm3, %zmm2
Packit Service 82fcde
        vmovups     __sPI1_FMA(%rdx), %zmm5
Packit Service 82fcde
Packit Service 82fcde
/* f) Subtract "Right Shifter" (0x4B000000) value */
Packit Service 82fcde
        vsubps    %zmm3, %zmm2, %zmm4
Packit Service 82fcde
        vmovups   __sA9_FMA(%rdx), %zmm9
Packit Service 82fcde
Packit Service 82fcde
/* Check for large and special arguments */
Packit Service 82fcde
        vpandd    __sAbsMask(%rdx), %zmm0, %zmm1
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  e) Treat obtained value as integer for destination sign setting.
Packit Service 82fcde
  Shift first bit of this value to the last (sign) position (S << 31)
Packit Service 82fcde
 */
Packit Service 82fcde
        vpslld       $31, %zmm2, %zmm8
Packit Service 82fcde
        vcmpps       $22, __sRangeReductionVal(%rdx), %zmm1, %k1
Packit Service 82fcde
        vpbroadcastd %eax, %zmm12{%k1}{z}
Packit Service 82fcde
Packit Service 82fcde
/* g) Subtract 0.5 from result for octant correction */
Packit Service 82fcde
        vsubps       __sOneHalf(%rdx), %zmm4, %zmm7
Packit Service 82fcde
        vptestmd     %zmm12, %zmm12, %k0
Packit Service 82fcde
        vfnmadd231ps %zmm7, %zmm5, %zmm6
Packit Service 82fcde
        kmovw        %k0, %ecx
Packit Service 82fcde
        vfnmadd231ps __sPI2_FMA(%rdx), %zmm7, %zmm6
Packit Service 82fcde
        vfnmadd132ps __sPI3_FMA(%rdx), %zmm6, %zmm7
Packit Service 82fcde
Packit Service 82fcde
/* a) Calculate X^2 = X * X */
Packit Service 82fcde
        vmulps    %zmm7, %zmm7, %zmm10
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  3) Destination sign setting
Packit Service 82fcde
    a) Set shifted destination sign using XOR operation:
Packit Service 82fcde
  R = XOR( R, S );
Packit Service 82fcde
 */
Packit Service 82fcde
        vpxord    %zmm8, %zmm7, %zmm11
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  b) Calculate polynomial:
Packit Service 82fcde
  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
Packit Service 82fcde
 */
Packit Service 82fcde
        vfmadd213ps __sA7_FMA(%rdx), %zmm10, %zmm9
Packit Service 82fcde
        vfmadd213ps __sA5_FMA(%rdx), %zmm10, %zmm9
Packit Service 82fcde
        vfmadd213ps __sA3(%rdx), %zmm10, %zmm9
Packit Service 82fcde
        vmulps      %zmm10, %zmm9, %zmm1
Packit Service 82fcde
        vfmadd213ps %zmm11, %zmm11, %zmm1
Packit Service 82fcde
        testl       %ecx, %ecx
Packit Service 82fcde
        jne         .LBL_1_3
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_2:
Packit Service 82fcde
        cfi_remember_state
Packit Service 82fcde
        vmovaps   %zmm1, %zmm0
Packit Service 82fcde
        movq      %rbp, %rsp
Packit Service 82fcde
        cfi_def_cfa_register (%rsp)
Packit Service 82fcde
        popq      %rbp
Packit Service 82fcde
        cfi_adjust_cfa_offset (-8)
Packit Service 82fcde
        cfi_restore (%rbp)
Packit Service 82fcde
        ret
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_3:
Packit Service 82fcde
        cfi_restore_state
Packit Service 82fcde
        vmovups   %zmm0, 1152(%rsp)
Packit Service 82fcde
        vmovups   %zmm1, 1216(%rsp)
Packit Service 82fcde
        je        .LBL_1_2
Packit Service 82fcde
Packit Service 82fcde
        xorb      %dl, %dl
Packit Service 82fcde
        kmovw     %k4, 1048(%rsp)
Packit Service 82fcde
        xorl      %eax, %eax
Packit Service 82fcde
        kmovw     %k5, 1040(%rsp)
Packit Service 82fcde
        kmovw     %k6, 1032(%rsp)
Packit Service 82fcde
        kmovw     %k7, 1024(%rsp)
Packit Service 82fcde
        vmovups   %zmm16, 960(%rsp)
Packit Service 82fcde
        vmovups   %zmm17, 896(%rsp)
Packit Service 82fcde
        vmovups   %zmm18, 832(%rsp)
Packit Service 82fcde
        vmovups   %zmm19, 768(%rsp)
Packit Service 82fcde
        vmovups   %zmm20, 704(%rsp)
Packit Service 82fcde
        vmovups   %zmm21, 640(%rsp)
Packit Service 82fcde
        vmovups   %zmm22, 576(%rsp)
Packit Service 82fcde
        vmovups   %zmm23, 512(%rsp)
Packit Service 82fcde
        vmovups   %zmm24, 448(%rsp)
Packit Service 82fcde
        vmovups   %zmm25, 384(%rsp)
Packit Service 82fcde
        vmovups   %zmm26, 320(%rsp)
Packit Service 82fcde
        vmovups   %zmm27, 256(%rsp)
Packit Service 82fcde
        vmovups   %zmm28, 192(%rsp)
Packit Service 82fcde
        vmovups   %zmm29, 128(%rsp)
Packit Service 82fcde
        vmovups   %zmm30, 64(%rsp)
Packit Service 82fcde
        vmovups   %zmm31, (%rsp)
Packit Service 82fcde
        movq      %rsi, 1064(%rsp)
Packit Service 82fcde
        movq      %rdi, 1056(%rsp)
Packit Service 82fcde
        movq      %r12, 1096(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (12, 1096)
Packit Service 82fcde
        movb      %dl, %r12b
Packit Service 82fcde
        movq      %r13, 1088(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (13, 1088)
Packit Service 82fcde
        movl      %ecx, %r13d
Packit Service 82fcde
        movq      %r14, 1080(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (14, 1080)
Packit Service 82fcde
        movl      %eax, %r14d
Packit Service 82fcde
        movq      %r15, 1072(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (15, 1072)
Packit Service 82fcde
        cfi_remember_state
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_6:
Packit Service 82fcde
        btl       %r14d, %r13d
Packit Service 82fcde
        jc        .LBL_1_12
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_7:
Packit Service 82fcde
        lea       1(%r14), %esi
Packit Service 82fcde
        btl       %esi, %r13d
Packit Service 82fcde
        jc        .LBL_1_10
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_8:
Packit Service 82fcde
        addb      $1, %r12b
Packit Service 82fcde
        addl      $2, %r14d
Packit Service 82fcde
        cmpb      $16, %r12b
Packit Service 82fcde
        jb        .LBL_1_6
Packit Service 82fcde
Packit Service 82fcde
        kmovw     1048(%rsp), %k4
Packit Service 82fcde
        movq      1064(%rsp), %rsi
Packit Service 82fcde
        kmovw     1040(%rsp), %k5
Packit Service 82fcde
        movq      1056(%rsp), %rdi
Packit Service 82fcde
        kmovw     1032(%rsp), %k6
Packit Service 82fcde
        movq      1096(%rsp), %r12
Packit Service 82fcde
        cfi_restore (%r12)
Packit Service 82fcde
        movq      1088(%rsp), %r13
Packit Service 82fcde
        cfi_restore (%r13)
Packit Service 82fcde
        kmovw     1024(%rsp), %k7
Packit Service 82fcde
        vmovups   960(%rsp), %zmm16
Packit Service 82fcde
        vmovups   896(%rsp), %zmm17
Packit Service 82fcde
        vmovups   832(%rsp), %zmm18
Packit Service 82fcde
        vmovups   768(%rsp), %zmm19
Packit Service 82fcde
        vmovups   704(%rsp), %zmm20
Packit Service 82fcde
        vmovups   640(%rsp), %zmm21
Packit Service 82fcde
        vmovups   576(%rsp), %zmm22
Packit Service 82fcde
        vmovups   512(%rsp), %zmm23
Packit Service 82fcde
        vmovups   448(%rsp), %zmm24
Packit Service 82fcde
        vmovups   384(%rsp), %zmm25
Packit Service 82fcde
        vmovups   320(%rsp), %zmm26
Packit Service 82fcde
        vmovups   256(%rsp), %zmm27
Packit Service 82fcde
        vmovups   192(%rsp), %zmm28
Packit Service 82fcde
        vmovups   128(%rsp), %zmm29
Packit Service 82fcde
        vmovups   64(%rsp), %zmm30
Packit Service 82fcde
        vmovups   (%rsp), %zmm31
Packit Service 82fcde
        movq      1080(%rsp), %r14
Packit Service 82fcde
        cfi_restore (%r14)
Packit Service 82fcde
        movq      1072(%rsp), %r15
Packit Service 82fcde
        cfi_restore (%r15)
Packit Service 82fcde
        vmovups   1216(%rsp), %zmm1
Packit Service 82fcde
        jmp       .LBL_1_2
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_10:
Packit Service 82fcde
        cfi_restore_state
Packit Service 82fcde
        movzbl    %r12b, %r15d
Packit Service 82fcde
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        call      JUMPTARGET(cosf)
Packit Service 82fcde
        vmovss    %xmm0, 1220(%rsp,%r15,8)
Packit Service 82fcde
        jmp       .LBL_1_8
Packit Service 82fcde
Packit Service 82fcde
.LBL_1_12:
Packit Service 82fcde
        movzbl    %r12b, %r15d
Packit Service 82fcde
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        call      JUMPTARGET(cosf)
Packit Service 82fcde
        vmovss    %xmm0, 1216(%rsp,%r15,8)
Packit Service 82fcde
        jmp       .LBL_1_7
Packit Service 82fcde
#endif
Packit Service 82fcde
END (_ZGVeN16v_cosf_knl)
Packit Service 82fcde
Packit Service 82fcde
ENTRY (_ZGVeN16v_cosf_skx)
Packit Service 82fcde
#ifndef HAVE_AVX512DQ_ASM_SUPPORT
Packit Service 82fcde
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
Packit Service 82fcde
#else
Packit Service 82fcde
/*
Packit Service 82fcde
  ALGORITHM DESCRIPTION:
Packit Service 82fcde
Packit Service 82fcde
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit Service 82fcde
     a) We remove sign using AND operation
Packit Service 82fcde
     b) Add Pi/2 value to argument X for Cos to Sin transformation
Packit Service 82fcde
     c) Getting octant Y by 1/Pi multiplication
Packit Service 82fcde
     d) Add "Right Shifter" value
Packit Service 82fcde
     e) Treat obtained value as integer for destination sign setting.
Packit Service 82fcde
        Shift first bit of this value to the last (sign) position
Packit Service 82fcde
     f) Subtract "Right Shifter"  value
Packit Service 82fcde
     g) Subtract 0.5 from result for octant correction
Packit Service 82fcde
     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit Service 82fcde
        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
Packit Service 82fcde
  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
Packit Service 82fcde
     a) Calculate X^2 = X * X
Packit Service 82fcde
     b) Calculate polynomial:
Packit Service 82fcde
        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
Packit Service 82fcde
  3) Destination sign setting
Packit Service 82fcde
     a) Set shifted destination sign using XOR operation:
Packit Service 82fcde
        R = XOR( R, S );
Packit Service 82fcde
 */
Packit Service 82fcde
        pushq     %rbp
Packit Service 82fcde
        cfi_adjust_cfa_offset (8)
Packit Service 82fcde
        cfi_rel_offset (%rbp, 0)
Packit Service 82fcde
        movq      %rsp, %rbp
Packit Service 82fcde
        cfi_def_cfa_register (%rbp)
Packit Service 82fcde
        andq      $-64, %rsp
Packit Service 82fcde
        subq      $1280, %rsp
Packit Service 82fcde
        movq      __svml_s_trig_data@GOTPCREL(%rip), %rax
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit Service 82fcde
  X = X - Y*PI1 - Y*PI2 - Y*PI3
Packit Service 82fcde
 */
Packit Service 82fcde
        vmovaps   %zmm0, %zmm6
Packit Service 82fcde
        vmovups   .L_2il0floatpacket.13(%rip), %zmm12
Packit Service 82fcde
        vmovups __sRShifter(%rax), %zmm3
Packit Service 82fcde
        vmovups __sPI1_FMA(%rax), %zmm5
Packit Service 82fcde
        vmovups __sA9_FMA(%rax), %zmm9
Packit Service 82fcde
Packit Service 82fcde
/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
Packit Service 82fcde
        vaddps __sHalfPI(%rax), %zmm0, %zmm2
Packit Service 82fcde
Packit Service 82fcde
/* Check for large and special arguments */
Packit Service 82fcde
        vandps __sAbsMask(%rax), %zmm0, %zmm1
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit Service 82fcde
  c) Getting octant Y by 1/Pi multiplication
Packit Service 82fcde
  d) Add "Right Shifter" (0x4B000000) value
Packit Service 82fcde
 */
Packit Service 82fcde
        vfmadd132ps __sInvPI(%rax), %zmm3, %zmm2
Packit Service 82fcde
        vcmpps    $18, __sRangeReductionVal(%rax), %zmm1, %k1
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  e) Treat obtained value as integer for destination sign setting.
Packit Service 82fcde
  Shift first bit of this value to the last (sign) position (S << 31)
Packit Service 82fcde
 */
Packit Service 82fcde
        vpslld    $31, %zmm2, %zmm8
Packit Service 82fcde
Packit Service 82fcde
/* f) Subtract "Right Shifter" (0x4B000000) value */
Packit Service 82fcde
        vsubps    %zmm3, %zmm2, %zmm4
Packit Service 82fcde
Packit Service 82fcde
/* g) Subtract 0.5 from result for octant correction */
Packit Service 82fcde
        vsubps __sOneHalf(%rax), %zmm4, %zmm7
Packit Service 82fcde
        vfnmadd231ps %zmm7, %zmm5, %zmm6
Packit Service 82fcde
        vfnmadd231ps __sPI2_FMA(%rax), %zmm7, %zmm6
Packit Service 82fcde
        vfnmadd132ps __sPI3_FMA(%rax), %zmm6, %zmm7
Packit Service 82fcde
Packit Service 82fcde
/* a) Calculate X^2 = X * X */
Packit Service 82fcde
        vmulps    %zmm7, %zmm7, %zmm10
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  3) Destination sign setting
Packit Service 82fcde
  a) Set shifted destination sign using XOR operation:
Packit Service 82fcde
  R = XOR( R, S );
Packit Service 82fcde
 */
Packit Service 82fcde
        vxorps    %zmm8, %zmm7, %zmm11
Packit Service 82fcde
Packit Service 82fcde
/*
Packit Service 82fcde
  b) Calculate polynomial:
Packit Service 82fcde
  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
Packit Service 82fcde
 */
Packit Service 82fcde
        vfmadd213ps __sA7_FMA(%rax), %zmm10, %zmm9
Packit Service 82fcde
        vfmadd213ps __sA5_FMA(%rax), %zmm10, %zmm9
Packit Service 82fcde
        vfmadd213ps __sA3(%rax), %zmm10, %zmm9
Packit Service 82fcde
        vpandnd   %zmm1, %zmm1, %zmm12{%k1}
Packit Service 82fcde
        vmulps    %zmm10, %zmm9, %zmm1
Packit Service 82fcde
        vptestmd  %zmm12, %zmm12, %k0
Packit Service 82fcde
        vfmadd213ps %zmm11, %zmm11, %zmm1
Packit Service 82fcde
        kmovw     %k0, %ecx
Packit Service 82fcde
        testl     %ecx, %ecx
Packit Service 82fcde
        jne       .LBL_2_3
Packit Service 82fcde
.LBL_2_2:
Packit Service 82fcde
        cfi_remember_state
Packit Service 82fcde
        vmovaps   %zmm1, %zmm0
Packit Service 82fcde
        movq      %rbp, %rsp
Packit Service 82fcde
        cfi_def_cfa_register (%rsp)
Packit Service 82fcde
        popq      %rbp
Packit Service 82fcde
        cfi_adjust_cfa_offset (-8)
Packit Service 82fcde
        cfi_restore (%rbp)
Packit Service 82fcde
        ret
Packit Service 82fcde
Packit Service 82fcde
.LBL_2_3:
Packit Service 82fcde
        cfi_restore_state
Packit Service 82fcde
        vmovups   %zmm0, 1152(%rsp)
Packit Service 82fcde
        vmovups   %zmm1, 1216(%rsp)
Packit Service 82fcde
        je        .LBL_2_2
Packit Service 82fcde
Packit Service 82fcde
        xorb      %dl, %dl
Packit Service 82fcde
        xorl      %eax, %eax
Packit Service 82fcde
        kmovw     %k4, 1048(%rsp)
Packit Service 82fcde
        kmovw     %k5, 1040(%rsp)
Packit Service 82fcde
        kmovw     %k6, 1032(%rsp)
Packit Service 82fcde
        kmovw     %k7, 1024(%rsp)
Packit Service 82fcde
        vmovups   %zmm16, 960(%rsp)
Packit Service 82fcde
        vmovups   %zmm17, 896(%rsp)
Packit Service 82fcde
        vmovups   %zmm18, 832(%rsp)
Packit Service 82fcde
        vmovups   %zmm19, 768(%rsp)
Packit Service 82fcde
        vmovups   %zmm20, 704(%rsp)
Packit Service 82fcde
        vmovups   %zmm21, 640(%rsp)
Packit Service 82fcde
        vmovups   %zmm22, 576(%rsp)
Packit Service 82fcde
        vmovups   %zmm23, 512(%rsp)
Packit Service 82fcde
        vmovups   %zmm24, 448(%rsp)
Packit Service 82fcde
        vmovups   %zmm25, 384(%rsp)
Packit Service 82fcde
        vmovups   %zmm26, 320(%rsp)
Packit Service 82fcde
        vmovups   %zmm27, 256(%rsp)
Packit Service 82fcde
        vmovups   %zmm28, 192(%rsp)
Packit Service 82fcde
        vmovups   %zmm29, 128(%rsp)
Packit Service 82fcde
        vmovups   %zmm30, 64(%rsp)
Packit Service 82fcde
        vmovups   %zmm31, (%rsp)
Packit Service 82fcde
        movq      %rsi, 1064(%rsp)
Packit Service 82fcde
        movq      %rdi, 1056(%rsp)
Packit Service 82fcde
        movq      %r12, 1096(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (12, 1096)
Packit Service 82fcde
        movb      %dl, %r12b
Packit Service 82fcde
        movq      %r13, 1088(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (13, 1088)
Packit Service 82fcde
        movl      %ecx, %r13d
Packit Service 82fcde
        movq      %r14, 1080(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (14, 1080)
Packit Service 82fcde
        movl      %eax, %r14d
Packit Service 82fcde
        movq      %r15, 1072(%rsp)
Packit Service 82fcde
        cfi_offset_rel_rsp (15, 1072)
Packit Service 82fcde
        cfi_remember_state
Packit Service 82fcde
Packit Service 82fcde
.LBL_2_6:
Packit Service 82fcde
        btl       %r14d, %r13d
Packit Service 82fcde
        jc        .LBL_2_12
Packit Service 82fcde
.LBL_2_7:
Packit Service 82fcde
        lea       1(%r14), %esi
Packit Service 82fcde
        btl       %esi, %r13d
Packit Service 82fcde
        jc        .LBL_2_10
Packit Service 82fcde
.LBL_2_8:
Packit Service 82fcde
        incb      %r12b
Packit Service 82fcde
        addl      $2, %r14d
Packit Service 82fcde
        cmpb      $16, %r12b
Packit Service 82fcde
        jb        .LBL_2_6
Packit Service 82fcde
        kmovw     1048(%rsp), %k4
Packit Service 82fcde
        kmovw     1040(%rsp), %k5
Packit Service 82fcde
        kmovw     1032(%rsp), %k6
Packit Service 82fcde
        kmovw     1024(%rsp), %k7
Packit Service 82fcde
        vmovups   960(%rsp), %zmm16
Packit Service 82fcde
        vmovups   896(%rsp), %zmm17
Packit Service 82fcde
        vmovups   832(%rsp), %zmm18
Packit Service 82fcde
        vmovups   768(%rsp), %zmm19
Packit Service 82fcde
        vmovups   704(%rsp), %zmm20
Packit Service 82fcde
        vmovups   640(%rsp), %zmm21
Packit Service 82fcde
        vmovups   576(%rsp), %zmm22
Packit Service 82fcde
        vmovups   512(%rsp), %zmm23
Packit Service 82fcde
        vmovups   448(%rsp), %zmm24
Packit Service 82fcde
        vmovups   384(%rsp), %zmm25
Packit Service 82fcde
        vmovups   320(%rsp), %zmm26
Packit Service 82fcde
        vmovups   256(%rsp), %zmm27
Packit Service 82fcde
        vmovups   192(%rsp), %zmm28
Packit Service 82fcde
        vmovups   128(%rsp), %zmm29
Packit Service 82fcde
        vmovups   64(%rsp), %zmm30
Packit Service 82fcde
        vmovups   (%rsp), %zmm31
Packit Service 82fcde
        vmovups   1216(%rsp), %zmm1
Packit Service 82fcde
        movq      1064(%rsp), %rsi
Packit Service 82fcde
        movq      1056(%rsp), %rdi
Packit Service 82fcde
        movq      1096(%rsp), %r12
Packit Service 82fcde
        cfi_restore (%r12)
Packit Service 82fcde
        movq      1088(%rsp), %r13
Packit Service 82fcde
        cfi_restore (%r13)
Packit Service 82fcde
        movq      1080(%rsp), %r14
Packit Service 82fcde
        cfi_restore (%r14)
Packit Service 82fcde
        movq      1072(%rsp), %r15
Packit Service 82fcde
        cfi_restore (%r15)
Packit Service 82fcde
        jmp       .LBL_2_2
Packit Service 82fcde
Packit Service 82fcde
.LBL_2_10:
Packit Service 82fcde
        cfi_restore_state
Packit Service 82fcde
        movzbl    %r12b, %r15d
Packit Service 82fcde
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        vzeroupper
Packit Service 82fcde
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        call      JUMPTARGET(cosf)
Packit Service 82fcde
        vmovss    %xmm0, 1220(%rsp,%r15,8)
Packit Service 82fcde
        jmp       .LBL_2_8
Packit Service 82fcde
.LBL_2_12:
Packit Service 82fcde
        movzbl    %r12b, %r15d
Packit Service 82fcde
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        vzeroupper
Packit Service 82fcde
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit Service 82fcde
        call      JUMPTARGET(cosf)
Packit Service 82fcde
        vmovss    %xmm0, 1216(%rsp,%r15,8)
Packit Service 82fcde
        jmp       .LBL_2_7
Packit Service 82fcde
#endif
Packit Service 82fcde
END (_ZGVeN16v_cosf_skx)
Packit Service 82fcde
Packit Service 82fcde
	.section .rodata, "a"
Packit Service 82fcde
.L_2il0floatpacket.13:
Packit Service 82fcde
	.long	0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
Packit Service 82fcde
	.type	.L_2il0floatpacket.13,@object