/* memset/bzero with unaligned store and rep stosb Copyright (C) 2016-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ /* memset is implemented as: 1. Use overlapping store to avoid branch. 2. If size is less than VEC, use integer register stores. 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores. 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores. 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with 4 VEC stores and store 4 * VEC at a time until done. */ #include #ifndef MEMSET_CHK_SYMBOL # define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s) #endif #ifndef WMEMSET_CHK_SYMBOL # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s) #endif #ifndef VZEROUPPER # if VEC_SIZE > 16 # define VZEROUPPER vzeroupper # else # define VZEROUPPER # endif #endif #ifndef VZEROUPPER_SHORT_RETURN # if VEC_SIZE > 16 # define VZEROUPPER_SHORT_RETURN vzeroupper # else # define VZEROUPPER_SHORT_RETURN rep # endif #endif #ifndef MOVQ # if VEC_SIZE > 16 # define MOVQ vmovq # else # define MOVQ movq # endif #endif #ifndef SECTION # error SECTION is not defined! #endif .section SECTION(.text),"ax",@progbits #if VEC_SIZE == 16 && IS_IN (libc) ENTRY (__bzero) movq %rdi, %rax /* Set return value. */ movq %rsi, %rdx /* Set n. */ pxor %xmm0, %xmm0 jmp L(entry_from_bzero) END (__bzero) weak_alias (__bzero, bzero) #endif #if IS_IN (libc) # if defined SHARED ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) # endif ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned)) shlq $2, %rdx WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) jmp L(entry_from_bzero) END (WMEMSET_SYMBOL (__wmemset, unaligned)) #endif #if defined SHARED && IS_IN (libc) ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) #endif ENTRY (MEMSET_SYMBOL (__memset, unaligned)) MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) L(entry_from_bzero): cmpq $VEC_SIZE, %rdx jb L(less_vec) cmpq $(VEC_SIZE * 2), %rdx ja L(more_2x_vec) /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) VZEROUPPER ret #if defined USE_MULTIARCH && IS_IN (libc) END (MEMSET_SYMBOL (__memset, unaligned)) # if VEC_SIZE == 16 ENTRY (__memset_chk_erms) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (__memset_chk_erms) /* Only used to measure performance of REP STOSB. */ ENTRY (__memset_erms) /* Skip zero length. */ testq %rdx, %rdx jnz L(stosb) movq %rdi, %rax ret # else /* Provide a hidden symbol to debugger. */ .hidden MEMSET_SYMBOL (__memset, erms) ENTRY (MEMSET_SYMBOL (__memset, erms)) # endif L(stosb): /* Issue vzeroupper before rep stosb. */ VZEROUPPER movq %rdx, %rcx movzbl %sil, %eax movq %rdi, %rdx rep stosb movq %rdx, %rax ret # if VEC_SIZE == 16 END (__memset_erms) # else END (MEMSET_SYMBOL (__memset, erms)) # endif # if defined SHARED && IS_IN (libc) ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) # endif ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) cmpq $VEC_SIZE, %rdx jb L(less_vec) cmpq $(VEC_SIZE * 2), %rdx ja L(stosb_more_2x_vec) /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) VZEROUPPER ret L(stosb_more_2x_vec): cmp __x86_rep_stosb_threshold(%rip), %RDX_LP ja L(stosb) #endif L(more_2x_vec): cmpq $(VEC_SIZE * 4), %rdx ja L(loop_start) VMOVU %VEC(0), (%rdi) VMOVU %VEC(0), VEC_SIZE(%rdi) VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) L(return): VZEROUPPER ret L(loop_start): leaq (VEC_SIZE * 4)(%rdi), %rcx VMOVU %VEC(0), (%rdi) andq $-(VEC_SIZE * 4), %rcx VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), VEC_SIZE(%rdi) VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi) VMOVU %VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx) VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi) VMOVU %VEC(0), -(VEC_SIZE * 4)(%rdi,%rdx) addq %rdi, %rdx andq $-(VEC_SIZE * 4), %rdx cmpq %rdx, %rcx je L(return) L(loop): VMOVA %VEC(0), (%rcx) VMOVA %VEC(0), VEC_SIZE(%rcx) VMOVA %VEC(0), (VEC_SIZE * 2)(%rcx) VMOVA %VEC(0), (VEC_SIZE * 3)(%rcx) addq $(VEC_SIZE * 4), %rcx cmpq %rcx, %rdx jne L(loop) VZEROUPPER_SHORT_RETURN ret L(less_vec): /* Less than 1 VEC. */ # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 # error Unsupported VEC_SIZE! # endif # if VEC_SIZE > 32 cmpb $32, %dl jae L(between_32_63) # endif # if VEC_SIZE > 16 cmpb $16, %dl jae L(between_16_31) # endif MOVQ %xmm0, %rcx cmpb $8, %dl jae L(between_8_15) cmpb $4, %dl jae L(between_4_7) cmpb $1, %dl ja L(between_2_3) jb 1f movb %cl, (%rdi) 1: VZEROUPPER ret # if VEC_SIZE > 32 /* From 32 to 63. No branch when size == 32. */ L(between_32_63): vmovdqu %ymm0, -32(%rdi,%rdx) vmovdqu %ymm0, (%rdi) VZEROUPPER ret # endif # if VEC_SIZE > 16 /* From 16 to 31. No branch when size == 16. */ L(between_16_31): vmovdqu %xmm0, -16(%rdi,%rdx) vmovdqu %xmm0, (%rdi) VZEROUPPER ret # endif /* From 8 to 15. No branch when size == 8. */ L(between_8_15): movq %rcx, -8(%rdi,%rdx) movq %rcx, (%rdi) VZEROUPPER ret L(between_4_7): /* From 4 to 7. No branch when size == 4. */ movl %ecx, -4(%rdi,%rdx) movl %ecx, (%rdi) VZEROUPPER ret L(between_2_3): /* From 2 to 3. No branch when size == 2. */ movw %cx, -2(%rdi,%rdx) movw %cx, (%rdi) VZEROUPPER ret END (MEMSET_SYMBOL (__memset, unaligned_erms))