Blame sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S

Packit 6c4009
/* Function cosf vectorized with AVX-512. KNL and SKX versions.
Packit 6c4009
   Copyright (C) 2014-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include "svml_s_trig_data.h"
Packit 6c4009
#include "svml_s_wrapper_impl.h"
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (_ZGVeN16v_cosf_knl)
Packit 6c4009
#ifndef HAVE_AVX512DQ_ASM_SUPPORT
Packit 6c4009
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
Packit 6c4009
#else
Packit 6c4009
/*
Packit 6c4009
  ALGORITHM DESCRIPTION:
Packit 6c4009
Packit 6c4009
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit 6c4009
     a) We remove sign using AND operation
Packit 6c4009
     b) Add Pi/2 value to argument X for Cos to Sin transformation
Packit 6c4009
     c) Getting octant Y by 1/Pi multiplication
Packit 6c4009
     d) Add "Right Shifter" value
Packit 6c4009
     e) Treat obtained value as integer for destination sign setting.
Packit 6c4009
        Shift first bit of this value to the last (sign) position
Packit 6c4009
     f) Subtract "Right Shifter"  value
Packit 6c4009
     g) Subtract 0.5 from result for octant correction
Packit 6c4009
     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit 6c4009
        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
Packit 6c4009
  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
Packit 6c4009
     a) Calculate X^2 = X * X
Packit 6c4009
     b) Calculate polynomial:
Packit 6c4009
        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
Packit 6c4009
  3) Destination sign setting
Packit 6c4009
     a) Set shifted destination sign using XOR operation:
Packit 6c4009
        R = XOR( R, S );
Packit 6c4009
 */
Packit 6c4009
        pushq     %rbp
Packit 6c4009
        cfi_adjust_cfa_offset (8)
Packit 6c4009
        cfi_rel_offset (%rbp, 0)
Packit 6c4009
        movq      %rsp, %rbp
Packit 6c4009
        cfi_def_cfa_register (%rbp)
Packit 6c4009
        andq      $-64, %rsp
Packit 6c4009
        subq      $1280, %rsp
Packit 6c4009
        movq      __svml_s_trig_data@GOTPCREL(%rip), %rdx
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit 6c4009
  X = X - Y*PI1 - Y*PI2 - Y*PI3
Packit 6c4009
 */
Packit 6c4009
        vmovaps   %zmm0, %zmm6
Packit 6c4009
        movl      $-1, %eax
Packit 6c4009
Packit 6c4009
/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
Packit 6c4009
        vaddps    __sHalfPI(%rdx), %zmm0, %zmm2
Packit 6c4009
        vmovups   __sRShifter(%rdx), %zmm3
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit 6c4009
  c) Getting octant Y by 1/Pi multiplication
Packit 6c4009
  d) Add "Right Shifter" (0x4B000000) value
Packit 6c4009
 */
Packit 6c4009
        vfmadd132ps __sInvPI(%rdx), %zmm3, %zmm2
Packit 6c4009
        vmovups     __sPI1_FMA(%rdx), %zmm5
Packit 6c4009
Packit 6c4009
/* f) Subtract "Right Shifter" (0x4B000000) value */
Packit 6c4009
        vsubps    %zmm3, %zmm2, %zmm4
Packit 6c4009
        vmovups   __sA9_FMA(%rdx), %zmm9
Packit 6c4009
Packit 6c4009
/* Check for large and special arguments */
Packit 6c4009
        vpandd    __sAbsMask(%rdx), %zmm0, %zmm1
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  e) Treat obtained value as integer for destination sign setting.
Packit 6c4009
  Shift first bit of this value to the last (sign) position (S << 31)
Packit 6c4009
 */
Packit 6c4009
        vpslld       $31, %zmm2, %zmm8
Packit 6c4009
        vcmpps       $22, __sRangeReductionVal(%rdx), %zmm1, %k1
Packit 6c4009
        vpbroadcastd %eax, %zmm12{%k1}{z}
Packit 6c4009
Packit 6c4009
/* g) Subtract 0.5 from result for octant correction */
Packit 6c4009
        vsubps       __sOneHalf(%rdx), %zmm4, %zmm7
Packit 6c4009
        vptestmd     %zmm12, %zmm12, %k0
Packit 6c4009
        vfnmadd231ps %zmm7, %zmm5, %zmm6
Packit 6c4009
        kmovw        %k0, %ecx
Packit 6c4009
        vfnmadd231ps __sPI2_FMA(%rdx), %zmm7, %zmm6
Packit 6c4009
        vfnmadd132ps __sPI3_FMA(%rdx), %zmm6, %zmm7
Packit 6c4009
Packit 6c4009
/* a) Calculate X^2 = X * X */
Packit 6c4009
        vmulps    %zmm7, %zmm7, %zmm10
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  3) Destination sign setting
Packit 6c4009
    a) Set shifted destination sign using XOR operation:
Packit 6c4009
  R = XOR( R, S );
Packit 6c4009
 */
Packit 6c4009
        vpxord    %zmm8, %zmm7, %zmm11
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  b) Calculate polynomial:
Packit 6c4009
  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
Packit 6c4009
 */
Packit 6c4009
        vfmadd213ps __sA7_FMA(%rdx), %zmm10, %zmm9
Packit 6c4009
        vfmadd213ps __sA5_FMA(%rdx), %zmm10, %zmm9
Packit 6c4009
        vfmadd213ps __sA3(%rdx), %zmm10, %zmm9
Packit 6c4009
        vmulps      %zmm10, %zmm9, %zmm1
Packit 6c4009
        vfmadd213ps %zmm11, %zmm11, %zmm1
Packit 6c4009
        testl       %ecx, %ecx
Packit 6c4009
        jne         .LBL_1_3
Packit 6c4009
Packit 6c4009
.LBL_1_2:
Packit 6c4009
        cfi_remember_state
Packit 6c4009
        vmovaps   %zmm1, %zmm0
Packit 6c4009
        movq      %rbp, %rsp
Packit 6c4009
        cfi_def_cfa_register (%rsp)
Packit 6c4009
        popq      %rbp
Packit 6c4009
        cfi_adjust_cfa_offset (-8)
Packit 6c4009
        cfi_restore (%rbp)
Packit 6c4009
        ret
Packit 6c4009
Packit 6c4009
.LBL_1_3:
Packit 6c4009
        cfi_restore_state
Packit 6c4009
        vmovups   %zmm0, 1152(%rsp)
Packit 6c4009
        vmovups   %zmm1, 1216(%rsp)
Packit 6c4009
        je        .LBL_1_2
Packit 6c4009
Packit 6c4009
        xorb      %dl, %dl
Packit 6c4009
        kmovw     %k4, 1048(%rsp)
Packit 6c4009
        xorl      %eax, %eax
Packit 6c4009
        kmovw     %k5, 1040(%rsp)
Packit 6c4009
        kmovw     %k6, 1032(%rsp)
Packit 6c4009
        kmovw     %k7, 1024(%rsp)
Packit 6c4009
        vmovups   %zmm16, 960(%rsp)
Packit 6c4009
        vmovups   %zmm17, 896(%rsp)
Packit 6c4009
        vmovups   %zmm18, 832(%rsp)
Packit 6c4009
        vmovups   %zmm19, 768(%rsp)
Packit 6c4009
        vmovups   %zmm20, 704(%rsp)
Packit 6c4009
        vmovups   %zmm21, 640(%rsp)
Packit 6c4009
        vmovups   %zmm22, 576(%rsp)
Packit 6c4009
        vmovups   %zmm23, 512(%rsp)
Packit 6c4009
        vmovups   %zmm24, 448(%rsp)
Packit 6c4009
        vmovups   %zmm25, 384(%rsp)
Packit 6c4009
        vmovups   %zmm26, 320(%rsp)
Packit 6c4009
        vmovups   %zmm27, 256(%rsp)
Packit 6c4009
        vmovups   %zmm28, 192(%rsp)
Packit 6c4009
        vmovups   %zmm29, 128(%rsp)
Packit 6c4009
        vmovups   %zmm30, 64(%rsp)
Packit 6c4009
        vmovups   %zmm31, (%rsp)
Packit 6c4009
        movq      %rsi, 1064(%rsp)
Packit 6c4009
        movq      %rdi, 1056(%rsp)
Packit 6c4009
        movq      %r12, 1096(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (12, 1096)
Packit 6c4009
        movb      %dl, %r12b
Packit 6c4009
        movq      %r13, 1088(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (13, 1088)
Packit 6c4009
        movl      %ecx, %r13d
Packit 6c4009
        movq      %r14, 1080(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (14, 1080)
Packit 6c4009
        movl      %eax, %r14d
Packit 6c4009
        movq      %r15, 1072(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (15, 1072)
Packit 6c4009
        cfi_remember_state
Packit 6c4009
Packit 6c4009
.LBL_1_6:
Packit 6c4009
        btl       %r14d, %r13d
Packit 6c4009
        jc        .LBL_1_12
Packit 6c4009
Packit 6c4009
.LBL_1_7:
Packit 6c4009
        lea       1(%r14), %esi
Packit 6c4009
        btl       %esi, %r13d
Packit 6c4009
        jc        .LBL_1_10
Packit 6c4009
Packit 6c4009
.LBL_1_8:
Packit 6c4009
        addb      $1, %r12b
Packit 6c4009
        addl      $2, %r14d
Packit 6c4009
        cmpb      $16, %r12b
Packit 6c4009
        jb        .LBL_1_6
Packit 6c4009
Packit 6c4009
        kmovw     1048(%rsp), %k4
Packit 6c4009
        movq      1064(%rsp), %rsi
Packit 6c4009
        kmovw     1040(%rsp), %k5
Packit 6c4009
        movq      1056(%rsp), %rdi
Packit 6c4009
        kmovw     1032(%rsp), %k6
Packit 6c4009
        movq      1096(%rsp), %r12
Packit 6c4009
        cfi_restore (%r12)
Packit 6c4009
        movq      1088(%rsp), %r13
Packit 6c4009
        cfi_restore (%r13)
Packit 6c4009
        kmovw     1024(%rsp), %k7
Packit 6c4009
        vmovups   960(%rsp), %zmm16
Packit 6c4009
        vmovups   896(%rsp), %zmm17
Packit 6c4009
        vmovups   832(%rsp), %zmm18
Packit 6c4009
        vmovups   768(%rsp), %zmm19
Packit 6c4009
        vmovups   704(%rsp), %zmm20
Packit 6c4009
        vmovups   640(%rsp), %zmm21
Packit 6c4009
        vmovups   576(%rsp), %zmm22
Packit 6c4009
        vmovups   512(%rsp), %zmm23
Packit 6c4009
        vmovups   448(%rsp), %zmm24
Packit 6c4009
        vmovups   384(%rsp), %zmm25
Packit 6c4009
        vmovups   320(%rsp), %zmm26
Packit 6c4009
        vmovups   256(%rsp), %zmm27
Packit 6c4009
        vmovups   192(%rsp), %zmm28
Packit 6c4009
        vmovups   128(%rsp), %zmm29
Packit 6c4009
        vmovups   64(%rsp), %zmm30
Packit 6c4009
        vmovups   (%rsp), %zmm31
Packit 6c4009
        movq      1080(%rsp), %r14
Packit 6c4009
        cfi_restore (%r14)
Packit 6c4009
        movq      1072(%rsp), %r15
Packit 6c4009
        cfi_restore (%r15)
Packit 6c4009
        vmovups   1216(%rsp), %zmm1
Packit 6c4009
        jmp       .LBL_1_2
Packit 6c4009
Packit 6c4009
.LBL_1_10:
Packit 6c4009
        cfi_restore_state
Packit 6c4009
        movzbl    %r12b, %r15d
Packit 6c4009
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit 6c4009
        call      JUMPTARGET(cosf)
Packit 6c4009
        vmovss    %xmm0, 1220(%rsp,%r15,8)
Packit 6c4009
        jmp       .LBL_1_8
Packit 6c4009
Packit 6c4009
.LBL_1_12:
Packit 6c4009
        movzbl    %r12b, %r15d
Packit 6c4009
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit 6c4009
        call      JUMPTARGET(cosf)
Packit 6c4009
        vmovss    %xmm0, 1216(%rsp,%r15,8)
Packit 6c4009
        jmp       .LBL_1_7
Packit 6c4009
#endif
Packit 6c4009
END (_ZGVeN16v_cosf_knl)
Packit 6c4009
Packit 6c4009
ENTRY (_ZGVeN16v_cosf_skx)
Packit 6c4009
#ifndef HAVE_AVX512DQ_ASM_SUPPORT
Packit 6c4009
WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
Packit 6c4009
#else
Packit 6c4009
/*
Packit 6c4009
  ALGORITHM DESCRIPTION:
Packit 6c4009
Packit 6c4009
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit 6c4009
     a) We remove sign using AND operation
Packit 6c4009
     b) Add Pi/2 value to argument X for Cos to Sin transformation
Packit 6c4009
     c) Getting octant Y by 1/Pi multiplication
Packit 6c4009
     d) Add "Right Shifter" value
Packit 6c4009
     e) Treat obtained value as integer for destination sign setting.
Packit 6c4009
        Shift first bit of this value to the last (sign) position
Packit 6c4009
     f) Subtract "Right Shifter"  value
Packit 6c4009
     g) Subtract 0.5 from result for octant correction
Packit 6c4009
     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit 6c4009
        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
Packit 6c4009
  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
Packit 6c4009
     a) Calculate X^2 = X * X
Packit 6c4009
     b) Calculate polynomial:
Packit 6c4009
        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
Packit 6c4009
  3) Destination sign setting
Packit 6c4009
     a) Set shifted destination sign using XOR operation:
Packit 6c4009
        R = XOR( R, S );
Packit 6c4009
 */
Packit 6c4009
        pushq     %rbp
Packit 6c4009
        cfi_adjust_cfa_offset (8)
Packit 6c4009
        cfi_rel_offset (%rbp, 0)
Packit 6c4009
        movq      %rsp, %rbp
Packit 6c4009
        cfi_def_cfa_register (%rbp)
Packit 6c4009
        andq      $-64, %rsp
Packit 6c4009
        subq      $1280, %rsp
Packit 6c4009
        movq      __svml_s_trig_data@GOTPCREL(%rip), %rax
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
Packit 6c4009
  X = X - Y*PI1 - Y*PI2 - Y*PI3
Packit 6c4009
 */
Packit 6c4009
        vmovaps   %zmm0, %zmm6
Packit 6c4009
        vmovups   .L_2il0floatpacket.13(%rip), %zmm12
Packit 6c4009
        vmovups __sRShifter(%rax), %zmm3
Packit 6c4009
        vmovups __sPI1_FMA(%rax), %zmm5
Packit 6c4009
        vmovups __sA9_FMA(%rax), %zmm9
Packit 6c4009
Packit 6c4009
/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
Packit 6c4009
        vaddps __sHalfPI(%rax), %zmm0, %zmm2
Packit 6c4009
Packit 6c4009
/* Check for large and special arguments */
Packit 6c4009
        vandps __sAbsMask(%rax), %zmm0, %zmm1
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  1) Range reduction to [-Pi/2; +Pi/2] interval
Packit 6c4009
  c) Getting octant Y by 1/Pi multiplication
Packit 6c4009
  d) Add "Right Shifter" (0x4B000000) value
Packit 6c4009
 */
Packit 6c4009
        vfmadd132ps __sInvPI(%rax), %zmm3, %zmm2
Packit 6c4009
        vcmpps    $18, __sRangeReductionVal(%rax), %zmm1, %k1
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  e) Treat obtained value as integer for destination sign setting.
Packit 6c4009
  Shift first bit of this value to the last (sign) position (S << 31)
Packit 6c4009
 */
Packit 6c4009
        vpslld    $31, %zmm2, %zmm8
Packit 6c4009
Packit 6c4009
/* f) Subtract "Right Shifter" (0x4B000000) value */
Packit 6c4009
        vsubps    %zmm3, %zmm2, %zmm4
Packit 6c4009
Packit 6c4009
/* g) Subtract 0.5 from result for octant correction */
Packit 6c4009
        vsubps __sOneHalf(%rax), %zmm4, %zmm7
Packit 6c4009
        vfnmadd231ps %zmm7, %zmm5, %zmm6
Packit 6c4009
        vfnmadd231ps __sPI2_FMA(%rax), %zmm7, %zmm6
Packit 6c4009
        vfnmadd132ps __sPI3_FMA(%rax), %zmm6, %zmm7
Packit 6c4009
Packit 6c4009
/* a) Calculate X^2 = X * X */
Packit 6c4009
        vmulps    %zmm7, %zmm7, %zmm10
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  3) Destination sign setting
Packit 6c4009
  a) Set shifted destination sign using XOR operation:
Packit 6c4009
  R = XOR( R, S );
Packit 6c4009
 */
Packit 6c4009
        vxorps    %zmm8, %zmm7, %zmm11
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
  b) Calculate polynomial:
Packit 6c4009
  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
Packit 6c4009
 */
Packit 6c4009
        vfmadd213ps __sA7_FMA(%rax), %zmm10, %zmm9
Packit 6c4009
        vfmadd213ps __sA5_FMA(%rax), %zmm10, %zmm9
Packit 6c4009
        vfmadd213ps __sA3(%rax), %zmm10, %zmm9
Packit 6c4009
        vpandnd   %zmm1, %zmm1, %zmm12{%k1}
Packit 6c4009
        vmulps    %zmm10, %zmm9, %zmm1
Packit 6c4009
        vptestmd  %zmm12, %zmm12, %k0
Packit 6c4009
        vfmadd213ps %zmm11, %zmm11, %zmm1
Packit 6c4009
        kmovw     %k0, %ecx
Packit 6c4009
        testl     %ecx, %ecx
Packit 6c4009
        jne       .LBL_2_3
Packit 6c4009
.LBL_2_2:
Packit 6c4009
        cfi_remember_state
Packit 6c4009
        vmovaps   %zmm1, %zmm0
Packit 6c4009
        movq      %rbp, %rsp
Packit 6c4009
        cfi_def_cfa_register (%rsp)
Packit 6c4009
        popq      %rbp
Packit 6c4009
        cfi_adjust_cfa_offset (-8)
Packit 6c4009
        cfi_restore (%rbp)
Packit 6c4009
        ret
Packit 6c4009
Packit 6c4009
.LBL_2_3:
Packit 6c4009
        cfi_restore_state
Packit 6c4009
        vmovups   %zmm0, 1152(%rsp)
Packit 6c4009
        vmovups   %zmm1, 1216(%rsp)
Packit 6c4009
        je        .LBL_2_2
Packit 6c4009
Packit 6c4009
        xorb      %dl, %dl
Packit 6c4009
        xorl      %eax, %eax
Packit 6c4009
        kmovw     %k4, 1048(%rsp)
Packit 6c4009
        kmovw     %k5, 1040(%rsp)
Packit 6c4009
        kmovw     %k6, 1032(%rsp)
Packit 6c4009
        kmovw     %k7, 1024(%rsp)
Packit 6c4009
        vmovups   %zmm16, 960(%rsp)
Packit 6c4009
        vmovups   %zmm17, 896(%rsp)
Packit 6c4009
        vmovups   %zmm18, 832(%rsp)
Packit 6c4009
        vmovups   %zmm19, 768(%rsp)
Packit 6c4009
        vmovups   %zmm20, 704(%rsp)
Packit 6c4009
        vmovups   %zmm21, 640(%rsp)
Packit 6c4009
        vmovups   %zmm22, 576(%rsp)
Packit 6c4009
        vmovups   %zmm23, 512(%rsp)
Packit 6c4009
        vmovups   %zmm24, 448(%rsp)
Packit 6c4009
        vmovups   %zmm25, 384(%rsp)
Packit 6c4009
        vmovups   %zmm26, 320(%rsp)
Packit 6c4009
        vmovups   %zmm27, 256(%rsp)
Packit 6c4009
        vmovups   %zmm28, 192(%rsp)
Packit 6c4009
        vmovups   %zmm29, 128(%rsp)
Packit 6c4009
        vmovups   %zmm30, 64(%rsp)
Packit 6c4009
        vmovups   %zmm31, (%rsp)
Packit 6c4009
        movq      %rsi, 1064(%rsp)
Packit 6c4009
        movq      %rdi, 1056(%rsp)
Packit 6c4009
        movq      %r12, 1096(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (12, 1096)
Packit 6c4009
        movb      %dl, %r12b
Packit 6c4009
        movq      %r13, 1088(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (13, 1088)
Packit 6c4009
        movl      %ecx, %r13d
Packit 6c4009
        movq      %r14, 1080(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (14, 1080)
Packit 6c4009
        movl      %eax, %r14d
Packit 6c4009
        movq      %r15, 1072(%rsp)
Packit 6c4009
        cfi_offset_rel_rsp (15, 1072)
Packit 6c4009
        cfi_remember_state
Packit 6c4009
Packit 6c4009
.LBL_2_6:
Packit 6c4009
        btl       %r14d, %r13d
Packit 6c4009
        jc        .LBL_2_12
Packit 6c4009
.LBL_2_7:
Packit 6c4009
        lea       1(%r14), %esi
Packit 6c4009
        btl       %esi, %r13d
Packit 6c4009
        jc        .LBL_2_10
Packit 6c4009
.LBL_2_8:
Packit 6c4009
        incb      %r12b
Packit 6c4009
        addl      $2, %r14d
Packit 6c4009
        cmpb      $16, %r12b
Packit 6c4009
        jb        .LBL_2_6
Packit 6c4009
        kmovw     1048(%rsp), %k4
Packit 6c4009
        kmovw     1040(%rsp), %k5
Packit 6c4009
        kmovw     1032(%rsp), %k6
Packit 6c4009
        kmovw     1024(%rsp), %k7
Packit 6c4009
        vmovups   960(%rsp), %zmm16
Packit 6c4009
        vmovups   896(%rsp), %zmm17
Packit 6c4009
        vmovups   832(%rsp), %zmm18
Packit 6c4009
        vmovups   768(%rsp), %zmm19
Packit 6c4009
        vmovups   704(%rsp), %zmm20
Packit 6c4009
        vmovups   640(%rsp), %zmm21
Packit 6c4009
        vmovups   576(%rsp), %zmm22
Packit 6c4009
        vmovups   512(%rsp), %zmm23
Packit 6c4009
        vmovups   448(%rsp), %zmm24
Packit 6c4009
        vmovups   384(%rsp), %zmm25
Packit 6c4009
        vmovups   320(%rsp), %zmm26
Packit 6c4009
        vmovups   256(%rsp), %zmm27
Packit 6c4009
        vmovups   192(%rsp), %zmm28
Packit 6c4009
        vmovups   128(%rsp), %zmm29
Packit 6c4009
        vmovups   64(%rsp), %zmm30
Packit 6c4009
        vmovups   (%rsp), %zmm31
Packit 6c4009
        vmovups   1216(%rsp), %zmm1
Packit 6c4009
        movq      1064(%rsp), %rsi
Packit 6c4009
        movq      1056(%rsp), %rdi
Packit 6c4009
        movq      1096(%rsp), %r12
Packit 6c4009
        cfi_restore (%r12)
Packit 6c4009
        movq      1088(%rsp), %r13
Packit 6c4009
        cfi_restore (%r13)
Packit 6c4009
        movq      1080(%rsp), %r14
Packit 6c4009
        cfi_restore (%r14)
Packit 6c4009
        movq      1072(%rsp), %r15
Packit 6c4009
        cfi_restore (%r15)
Packit 6c4009
        jmp       .LBL_2_2
Packit 6c4009
Packit 6c4009
.LBL_2_10:
Packit 6c4009
        cfi_restore_state
Packit 6c4009
        movzbl    %r12b, %r15d
Packit 6c4009
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit 6c4009
        vzeroupper
Packit 6c4009
        vmovss    1156(%rsp,%r15,8), %xmm0
Packit 6c4009
        call      JUMPTARGET(cosf)
Packit 6c4009
        vmovss    %xmm0, 1220(%rsp,%r15,8)
Packit 6c4009
        jmp       .LBL_2_8
Packit 6c4009
.LBL_2_12:
Packit 6c4009
        movzbl    %r12b, %r15d
Packit 6c4009
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit 6c4009
        vzeroupper
Packit 6c4009
        vmovss    1152(%rsp,%r15,8), %xmm0
Packit 6c4009
        call      JUMPTARGET(cosf)
Packit 6c4009
        vmovss    %xmm0, 1216(%rsp,%r15,8)
Packit 6c4009
        jmp       .LBL_2_7
Packit 6c4009
#endif
Packit 6c4009
END (_ZGVeN16v_cosf_skx)
Packit 6c4009
Packit 6c4009
	.section .rodata, "a"
Packit 6c4009
.L_2il0floatpacket.13:
Packit 6c4009
	.long	0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
Packit 6c4009
	.type	.L_2il0floatpacket.13,@object