Blame sysdeps/powerpc/powerpc32/memset.S

Packit Service 82fcde
/* Optimized memset implementation for PowerPC.
Packit Service 82fcde
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
Packit Service 82fcde
/* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
Packit Service 82fcde
   Returns 's'.
Packit Service 82fcde
Packit Service 82fcde
   The memset is done in four sizes: byte (8 bits), word (32 bits),
Packit Service 82fcde
   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
Packit Service 82fcde
   There is a special case for setting whole cache lines to 0, which
Packit Service 82fcde
   takes advantage of the dcbz instruction.  */
Packit Service 82fcde
Packit Service 82fcde
	.section	".text"
Packit Service 82fcde
EALIGN (memset, 5, 1)
Packit Service 82fcde
Packit Service 82fcde
#define rTMP	r0
Packit Service 82fcde
#define rRTN	r3	/* initial value of 1st argument */
Packit Service 82fcde
#define rMEMP0	r3	/* original value of 1st arg */
Packit Service 82fcde
#define rCHR	r4	/* char to set in each byte */
Packit Service 82fcde
#define rLEN	r5	/* length of region to set */
Packit Service 82fcde
#define rMEMP	r6	/* address at which we are storing */
Packit Service 82fcde
#define rALIGN	r7	/* number of bytes we are setting now (when aligning) */
Packit Service 82fcde
#define rMEMP2	r8
Packit Service 82fcde
Packit Service 82fcde
#define rPOS32	r7	/* constant +32 for clearing with dcbz */
Packit Service 82fcde
#define rNEG64	r8	/* constant -64 for clearing with dcbz */
Packit Service 82fcde
#define rNEG32	r9	/* constant -32 for clearing with dcbz */
Packit Service 82fcde
Packit Service 82fcde
#define rGOT	r9	/* Address of the Global Offset Table.  */
Packit Service 82fcde
#define rCLS	r8	/* Cache line size obtained from static.  */
Packit Service 82fcde
#define rCLM	r9	/* Cache line size mask to check for cache alignment.  */
Packit Service 82fcde
Packit Service 82fcde
/* take care of case for size <= 4  */
Packit Service 82fcde
	cmplwi	cr1, rLEN, 4
Packit Service 82fcde
	andi.	rALIGN, rMEMP0, 3
Packit Service 82fcde
	mr	rMEMP, rMEMP0
Packit Service 82fcde
	ble-	cr1, L(small)
Packit Service 82fcde
/* align to word boundary  */
Packit Service 82fcde
	cmplwi	cr5, rLEN, 31
Packit Service 82fcde
	rlwimi	rCHR, rCHR, 8, 16, 23
Packit Service 82fcde
	beq+	L(aligned)	/* 8th instruction from .align */
Packit Service 82fcde
	mtcrf	0x01, rMEMP0
Packit Service 82fcde
	subfic	rALIGN, rALIGN, 4
Packit Service 82fcde
	add	rMEMP, rMEMP, rALIGN
Packit Service 82fcde
	sub	rLEN, rLEN, rALIGN
Packit Service 82fcde
	bf+	31, L(g0)
Packit Service 82fcde
	stb	rCHR, 0(rMEMP0)
Packit Service 82fcde
	bt	30, L(aligned)
Packit Service 82fcde
L(g0):	sth	rCHR, -2(rMEMP)	/* 16th instruction from .align */
Packit Service 82fcde
/* take care of case for size < 31 */
Packit Service 82fcde
L(aligned):
Packit Service 82fcde
	mtcrf	0x01, rLEN
Packit Service 82fcde
	rlwimi	rCHR, rCHR, 16, 0, 15
Packit Service 82fcde
	ble	cr5, L(medium)
Packit Service 82fcde
/* align to cache line boundary...  */
Packit Service 82fcde
	andi.	rALIGN, rMEMP, 0x1C
Packit Service 82fcde
	subfic	rALIGN, rALIGN, 0x20
Packit Service 82fcde
	beq	L(caligned)
Packit Service 82fcde
	mtcrf	0x01, rALIGN
Packit Service 82fcde
	add	rMEMP, rMEMP, rALIGN
Packit Service 82fcde
	sub	rLEN, rLEN, rALIGN
Packit Service 82fcde
	cmplwi	cr1, rALIGN, 0x10
Packit Service 82fcde
	mr	rMEMP2, rMEMP
Packit Service 82fcde
	bf	28, L(a1)
Packit Service 82fcde
	stw	rCHR, -4(rMEMP2)
Packit Service 82fcde
	stwu	rCHR, -8(rMEMP2)
Packit Service 82fcde
L(a1):	blt	cr1, L(a2)
Packit Service 82fcde
	stw	rCHR, -4(rMEMP2) /* 32nd instruction from .align */
Packit Service 82fcde
	stw	rCHR, -8(rMEMP2)
Packit Service 82fcde
	stw	rCHR, -12(rMEMP2)
Packit Service 82fcde
	stwu	rCHR, -16(rMEMP2)
Packit Service 82fcde
L(a2):	bf	29, L(caligned)
Packit Service 82fcde
	stw	rCHR, -4(rMEMP2)
Packit Service 82fcde
/* now aligned to a cache line.  */
Packit Service 82fcde
L(caligned):
Packit Service 82fcde
	cmplwi	cr1, rCHR, 0
Packit Service 82fcde
	clrrwi.	rALIGN, rLEN, 5
Packit Service 82fcde
	mtcrf	0x01, rLEN	/* 40th instruction from .align */
Packit Service 82fcde
Packit Service 82fcde
/* Check if we can use the special case for clearing memory using dcbz.
Packit Service 82fcde
   This requires that we know the correct cache line size for this
Packit Service 82fcde
   processor.  Getting the __cache_line_size may require establishing GOT
Packit Service 82fcde
   addressability, so branch out of line to set this up.  */
Packit Service 82fcde
	beq	cr1, L(checklinesize)
Packit Service 82fcde
Packit Service 82fcde
/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
Packit Service 82fcde
   Can't assume that rCHR is zero or that the cache line size is either
Packit Service 82fcde
   32-bytes or even known.  */
Packit Service 82fcde
L(nondcbz):
Packit Service 82fcde
	srwi	rTMP, rALIGN, 5
Packit Service 82fcde
	mtctr	rTMP
Packit Service 82fcde
	beq	L(medium)	/* we may not actually get to do a full line */
Packit Service 82fcde
	clrlwi.	rLEN, rLEN, 27
Packit Service 82fcde
	add	rMEMP, rMEMP, rALIGN
Packit Service 82fcde
	li	rNEG64, -0x40
Packit Service 82fcde
	bdz	L(cloopdone)	/* 48th instruction from .align */
Packit Service 82fcde
Packit Service 82fcde
/* We can't use dcbz here as we don't know the cache line size.  We can
Packit Service 82fcde
   use "data cache block touch for store", which is safe.  */
Packit Service 82fcde
L(c3):	dcbtst	rNEG64, rMEMP
Packit Service 82fcde
	stw	rCHR, -4(rMEMP)
Packit Service 82fcde
	stw	rCHR, -8(rMEMP)
Packit Service 82fcde
	stw	rCHR, -12(rMEMP)
Packit Service 82fcde
	stw	rCHR, -16(rMEMP)
Packit Service 82fcde
	nop			/* let 601 fetch last 4 instructions of loop */
Packit Service 82fcde
	stw	rCHR, -20(rMEMP)
Packit Service 82fcde
	stw	rCHR, -24(rMEMP) /* 56th instruction from .align */
Packit Service 82fcde
	nop			/* let 601 fetch first 8 instructions of loop */
Packit Service 82fcde
	stw	rCHR, -28(rMEMP)
Packit Service 82fcde
	stwu	rCHR, -32(rMEMP)
Packit Service 82fcde
	bdnz	L(c3)
Packit Service 82fcde
L(cloopdone):
Packit Service 82fcde
	stw	rCHR, -4(rMEMP)
Packit Service 82fcde
	stw	rCHR, -8(rMEMP)
Packit Service 82fcde
	stw	rCHR, -12(rMEMP)
Packit Service 82fcde
	stw	rCHR, -16(rMEMP) /* 64th instruction from .align */
Packit Service 82fcde
	stw	rCHR, -20(rMEMP)
Packit Service 82fcde
	cmplwi	cr1, rLEN, 16
Packit Service 82fcde
	stw	rCHR, -24(rMEMP)
Packit Service 82fcde
	stw	rCHR, -28(rMEMP)
Packit Service 82fcde
	stwu	rCHR, -32(rMEMP)
Packit Service 82fcde
	beqlr
Packit Service 82fcde
	add	rMEMP, rMEMP, rALIGN
Packit Service 82fcde
	b	L(medium_tail2)	/* 72nd instruction from .align */
Packit Service 82fcde
Packit Service 82fcde
	.align	5
Packit Service 82fcde
	nop
Packit Service 82fcde
/* Clear cache lines of memory in 128-byte chunks.
Packit Service 82fcde
   This code is optimized for processors with 32-byte cache lines.
Packit Service 82fcde
   It is further optimized for the 601 processor, which requires
Packit Service 82fcde
   some care in how the code is aligned in the i-cache.  */
Packit Service 82fcde
L(zloopstart):
Packit Service 82fcde
	clrlwi	rLEN, rLEN, 27
Packit Service 82fcde
	mtcrf	0x02, rALIGN
Packit Service 82fcde
	srwi.	rTMP, rALIGN, 7
Packit Service 82fcde
	mtctr	rTMP
Packit Service 82fcde
	li	rPOS32, 0x20
Packit Service 82fcde
	li	rNEG64, -0x40
Packit Service 82fcde
	cmplwi	cr1, rLEN, 16	/* 8 */
Packit Service 82fcde
	bf	26, L(z0)
Packit Service 82fcde
	dcbz	0, rMEMP
Packit Service 82fcde
	addi	rMEMP, rMEMP, 0x20
Packit Service 82fcde
L(z0):	li	rNEG32, -0x20
Packit Service 82fcde
	bf	25, L(z1)
Packit Service 82fcde
	dcbz	0, rMEMP
Packit Service 82fcde
	dcbz	rPOS32, rMEMP
Packit Service 82fcde
	addi	rMEMP, rMEMP, 0x40 /* 16 */
Packit Service 82fcde
L(z1):	cmplwi	cr5, rLEN, 0
Packit Service 82fcde
	beq	L(medium)
Packit Service 82fcde
L(zloop):
Packit Service 82fcde
	dcbz	0, rMEMP
Packit Service 82fcde
	dcbz	rPOS32, rMEMP
Packit Service 82fcde
	addi	rMEMP, rMEMP, 0x80
Packit Service 82fcde
	dcbz	rNEG64, rMEMP
Packit Service 82fcde
	dcbz	rNEG32, rMEMP
Packit Service 82fcde
	bdnz	L(zloop)
Packit Service 82fcde
	beqlr	cr5
Packit Service 82fcde
	b	L(medium_tail2)
Packit Service 82fcde
Packit Service 82fcde
	.align	5
Packit Service 82fcde
L(small):
Packit Service 82fcde
/* Memset of 4 bytes or less.  */
Packit Service 82fcde
	cmplwi	cr5, rLEN, 1
Packit Service 82fcde
	cmplwi	cr1, rLEN, 3
Packit Service 82fcde
	bltlr	cr5
Packit Service 82fcde
	stb	rCHR, 0(rMEMP)
Packit Service 82fcde
	beqlr	cr5
Packit Service 82fcde
	nop
Packit Service 82fcde
	stb	rCHR, 1(rMEMP)
Packit Service 82fcde
	bltlr	cr1
Packit Service 82fcde
	stb	rCHR, 2(rMEMP)
Packit Service 82fcde
	beqlr	cr1
Packit Service 82fcde
	nop
Packit Service 82fcde
	stb	rCHR, 3(rMEMP)
Packit Service 82fcde
	blr
Packit Service 82fcde
Packit Service 82fcde
/* Memset of 0-31 bytes.  */
Packit Service 82fcde
	.align	5
Packit Service 82fcde
L(medium):
Packit Service 82fcde
	cmplwi	cr1, rLEN, 16
Packit Service 82fcde
L(medium_tail2):
Packit Service 82fcde
	add	rMEMP, rMEMP, rLEN
Packit Service 82fcde
L(medium_tail):
Packit Service 82fcde
	bt-	31, L(medium_31t)
Packit Service 82fcde
	bt-	30, L(medium_30t)
Packit Service 82fcde
L(medium_30f):
Packit Service 82fcde
	bt-	29, L(medium_29t)
Packit Service 82fcde
L(medium_29f):
Packit Service 82fcde
	bge-	cr1, L(medium_27t)
Packit Service 82fcde
	bflr-	28
Packit Service 82fcde
	stw	rCHR, -4(rMEMP)	/* 8th instruction from .align */
Packit Service 82fcde
	stw	rCHR, -8(rMEMP)
Packit Service 82fcde
	blr
Packit Service 82fcde
Packit Service 82fcde
L(medium_31t):
Packit Service 82fcde
	stbu	rCHR, -1(rMEMP)
Packit Service 82fcde
	bf-	30, L(medium_30f)
Packit Service 82fcde
L(medium_30t):
Packit Service 82fcde
	sthu	rCHR, -2(rMEMP)
Packit Service 82fcde
	bf-	29, L(medium_29f)
Packit Service 82fcde
L(medium_29t):
Packit Service 82fcde
	stwu	rCHR, -4(rMEMP)
Packit Service 82fcde
	blt-	cr1, L(medium_27f) /* 16th instruction from .align */
Packit Service 82fcde
L(medium_27t):
Packit Service 82fcde
	stw	rCHR, -4(rMEMP)
Packit Service 82fcde
	stw	rCHR, -8(rMEMP)
Packit Service 82fcde
	stw	rCHR, -12(rMEMP)
Packit Service 82fcde
	stwu	rCHR, -16(rMEMP)
Packit Service 82fcde
L(medium_27f):
Packit Service 82fcde
	bflr-	28
Packit Service 82fcde
L(medium_28t):
Packit Service 82fcde
	stw	rCHR, -4(rMEMP)
Packit Service 82fcde
	stw	rCHR, -8(rMEMP)
Packit Service 82fcde
	blr
Packit Service 82fcde
Packit Service 82fcde
L(checklinesize):
Packit Service 82fcde
#ifdef SHARED
Packit Service 82fcde
	mflr	rTMP
Packit Service 82fcde
/* If the remaining length is less the 32 bytes then don't bother getting
Packit Service 82fcde
   the cache line size.  */
Packit Service 82fcde
	beq	L(medium)
Packit Service 82fcde
/* Establishes GOT addressability so we can load __cache_line_size
Packit Service 82fcde
   from static. This value was set from the aux vector during startup.  */
Packit Service 82fcde
	SETUP_GOT_ACCESS(rGOT,got_label)
Packit Service 82fcde
	addis	rGOT,rGOT,__cache_line_size-got_label@ha
Packit Service 82fcde
	lwz	rCLS,__cache_line_size-got_label@l(rGOT)
Packit Service 82fcde
	mtlr	rTMP
Packit Service 82fcde
#else
Packit Service 82fcde
/* Load __cache_line_size from static. This value was set from the
Packit Service 82fcde
   aux vector during startup.  */
Packit Service 82fcde
	lis	rCLS,__cache_line_size@ha
Packit Service 82fcde
/* If the remaining length is less the 32 bytes then don't bother getting
Packit Service 82fcde
   the cache line size.  */
Packit Service 82fcde
	beq	L(medium)
Packit Service 82fcde
	lwz	rCLS,__cache_line_size@l(rCLS)
Packit Service 82fcde
#endif
Packit Service 82fcde
Packit Service 82fcde
/* If the cache line size was not set then goto to L(nondcbz), which is
Packit Service 82fcde
   safe for any cache line size.  */
Packit Service 82fcde
	cmplwi	cr1,rCLS,0
Packit Service 82fcde
	beq	cr1,L(nondcbz)
Packit Service 82fcde
Packit Service 82fcde
/* If the cache line size is 32 bytes then goto to L(zloopstart),
Packit Service 82fcde
   which is coded specifically for 32-byte lines (and 601).  */
Packit Service 82fcde
	cmplwi	cr1,rCLS,32
Packit Service 82fcde
	beq	cr1,L(zloopstart)
Packit Service 82fcde
Packit Service 82fcde
/* Now we know the cache line size and it is not 32-bytes.  However
Packit Service 82fcde
   we may not yet be aligned to the cache line and may have a partial
Packit Service 82fcde
   line to fill.  Touch it 1st to fetch the cache line.  */
Packit Service 82fcde
	dcbtst	0,rMEMP
Packit Service 82fcde
Packit Service 82fcde
	addi	rCLM,rCLS,-1
Packit Service 82fcde
L(getCacheAligned):
Packit Service 82fcde
	cmplwi	cr1,rLEN,32
Packit Service 82fcde
	and.	rTMP,rCLM,rMEMP
Packit Service 82fcde
	blt	cr1,L(handletail32)
Packit Service 82fcde
	beq	L(cacheAligned)
Packit Service 82fcde
/* We are not aligned to start of a cache line yet.  Store 32-byte
Packit Service 82fcde
   of data and test again.  */
Packit Service 82fcde
	addi	rMEMP,rMEMP,32
Packit Service 82fcde
	addi	rLEN,rLEN,-32
Packit Service 82fcde
	stw	rCHR,-32(rMEMP)
Packit Service 82fcde
	stw	rCHR,-28(rMEMP)
Packit Service 82fcde
	stw	rCHR,-24(rMEMP)
Packit Service 82fcde
	stw	rCHR,-20(rMEMP)
Packit Service 82fcde
	stw	rCHR,-16(rMEMP)
Packit Service 82fcde
	stw	rCHR,-12(rMEMP)
Packit Service 82fcde
	stw	rCHR,-8(rMEMP)
Packit Service 82fcde
	stw	rCHR,-4(rMEMP)
Packit Service 82fcde
	b	L(getCacheAligned)
Packit Service 82fcde
Packit Service 82fcde
/* Now we are aligned to the cache line and can use dcbz.  */
Packit Service 82fcde
L(cacheAligned):
Packit Service 82fcde
	cmplw	cr1,rLEN,rCLS
Packit Service 82fcde
	blt	cr1,L(handletail32)
Packit Service 82fcde
	dcbz	0,rMEMP
Packit Service 82fcde
	subf	rLEN,rCLS,rLEN
Packit Service 82fcde
	add	rMEMP,rMEMP,rCLS
Packit Service 82fcde
	b	L(cacheAligned)
Packit Service 82fcde
Packit Service 82fcde
/* We are here because; the cache line size was set, it was not
Packit Service 82fcde
   32-bytes, and the remainder (rLEN) is now less than the actual cache
Packit Service 82fcde
   line size.  Set up the preconditions for L(nondcbz) and go there to
Packit Service 82fcde
   store the remaining bytes.  */
Packit Service 82fcde
L(handletail32):
Packit Service 82fcde
	clrrwi.	rALIGN, rLEN, 5
Packit Service 82fcde
	b	L(nondcbz)
Packit Service 82fcde
Packit Service 82fcde
END (memset)
Packit Service 82fcde
libc_hidden_builtin_def (memset)