Blame sysdeps/ia64/memcmp.S

Packit Service 82fcde
/* Optimized version of the standard memcmp() function.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
   Copyright (C) 2000-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   Contributed by Dan Pop <Dan.Pop@cern.ch>.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 82fcde
/* Return: the result of the comparison
Packit Service 82fcde
Packit Service 82fcde
   Inputs:
Packit Service 82fcde
        in0:    dest (aka s1)
Packit Service 82fcde
        in1:    src  (aka s2)
Packit Service 82fcde
        in2:    byte count
Packit Service 82fcde
Packit Service 82fcde
   In this form, it assumes little endian mode.  For big endian mode,
Packit Service 82fcde
   the two shifts in .l2 must be inverted:
Packit Service 82fcde
Packit Service 82fcde
	shl   	tmp1[0] = r[1 + MEMLAT], sh1   // tmp1 = w0 << sh1
Packit Service 82fcde
	shr.u   tmp2[0] = r[0 + MEMLAT], sh2   // tmp2 = w1 >> sh2
Packit Service 82fcde
Packit Service 82fcde
   and all the mux1 instructions should be replaced by plain mov's.  */
Packit Service 82fcde
Packit Service 82fcde
#include <sysdep.h>
Packit Service 82fcde
#undef ret
Packit Service 82fcde
Packit Service 82fcde
#define OP_T_THRES 	16
Packit Service 82fcde
#define OPSIZ 		8
Packit Service 82fcde
#define MEMLAT		2
Packit Service 82fcde
Packit Service 82fcde
#define start		r15
Packit Service 82fcde
#define saved_pr	r17
Packit Service 82fcde
#define saved_lc	r18
Packit Service 82fcde
#define dest		r19
Packit Service 82fcde
#define src		r20
Packit Service 82fcde
#define len		r21
Packit Service 82fcde
#define asrc		r22
Packit Service 82fcde
#define tmp		r23
Packit Service 82fcde
#define value1		r24
Packit Service 82fcde
#define value2		r25
Packit Service 82fcde
#define sh2		r28
Packit Service 82fcde
#define	sh1		r29
Packit Service 82fcde
#define loopcnt		r30
Packit Service 82fcde
Packit Service 82fcde
ENTRY(memcmp)
Packit Service 82fcde
	.prologue
Packit Service 82fcde
	alloc 	r2 = ar.pfs, 3, 37, 0, 40
Packit Service 82fcde
Packit Service 82fcde
	.rotr	r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2]
Packit Service 82fcde
	.rotp	p[MEMLAT + 4 + 1]
Packit Service 82fcde
Packit Service 82fcde
	mov	ret0 = r0		// by default return value = 0
Packit Service 82fcde
	.save pr, saved_pr
Packit Service 82fcde
	mov	saved_pr = pr		// save the predicate registers
Packit Service 82fcde
	.save ar.lc, saved_lc
Packit Service 82fcde
        mov 	saved_lc = ar.lc	// save the loop counter
Packit Service 82fcde
	.body
Packit Service 82fcde
	mov 	dest = in0		// dest
Packit Service 82fcde
	mov 	src = in1		// src
Packit Service 82fcde
	mov	len = in2		// len
Packit Service 82fcde
	sub	tmp = r0, in0		// tmp = -dest
Packit Service 82fcde
	;;
Packit Service 82fcde
	and	loopcnt = 7, tmp		// loopcnt = -dest % 8
Packit Service 82fcde
	cmp.ge	p6, p0 = OP_T_THRES, len	// is len <= OP_T_THRES
Packit Service 82fcde
(p6)	br.cond.spnt	.cmpfew			// compare byte by byte
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.eq	p6, p0 = loopcnt, r0
Packit Service 82fcde
(p6)	br.cond.sptk .dest_aligned
Packit Service 82fcde
	sub	len = len, loopcnt	// len -= -dest % 8
Packit Service 82fcde
	adds	loopcnt = -1, loopcnt	// --loopcnt
Packit Service 82fcde
	;;
Packit Service 82fcde
	mov	ar.lc = loopcnt
Packit Service 82fcde
.l1:					// copy -dest % 8 bytes
Packit Service 82fcde
	ld1	value1 = [src], 1	// value = *src++
Packit Service 82fcde
	ld1	value2 = [dest], 1
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.ne	p6, p0 = value1, value2
Packit Service 82fcde
(p6)	br.cond.spnt .done
Packit Service 82fcde
	br.cloop.dptk .l1
Packit Service 82fcde
.dest_aligned:
Packit Service 82fcde
	and	sh1 = 7, src 		// sh1 = src % 8
Packit Service 82fcde
	and	tmp = -8, len   	// tmp = len & -OPSIZ
Packit Service 82fcde
	and	asrc = -8, src		// asrc = src & -OPSIZ  -- align src
Packit Service 82fcde
	shr.u	loopcnt = len, 3	// loopcnt = len / 8
Packit Service 82fcde
	and	len = 7, len ;;		// len = len % 8
Packit Service 82fcde
	shl	sh1 = sh1, 3		// sh1 = 8 * (src % 8)
Packit Service 82fcde
	adds	loopcnt = -1, loopcnt	// --loopcnt
Packit Service 82fcde
	mov     pr.rot = 1 << 16 ;;	// set rotating predicates
Packit Service 82fcde
	sub	sh2 = 64, sh1		// sh2 = 64 - sh1
Packit Service 82fcde
	mov	ar.lc = loopcnt		// set LC
Packit Service 82fcde
	cmp.eq  p6, p0 = sh1, r0 	// is the src aligned?
Packit Service 82fcde
(p6)    br.cond.sptk .src_aligned
Packit Service 82fcde
	add	src = src, tmp		// src += len & -OPSIZ
Packit Service 82fcde
	mov	ar.ec = MEMLAT + 4 + 1 	// four more passes needed
Packit Service 82fcde
	ld8	r[1] = [asrc], 8 ;;	// r[1] = w0
Packit Service 82fcde
	.align	32
Packit Service 82fcde
Packit Service 82fcde
// We enter this loop with p6 cleared by the above comparison
Packit Service 82fcde
Packit Service 82fcde
.l2:
Packit Service 82fcde
(p[0])		ld8	r[0] = [asrc], 8		// r[0] = w1
Packit Service 82fcde
(p[0])		ld8	q[0] = [dest], 8
Packit Service 82fcde
(p[MEMLAT])	shr.u	tmp1[0] = r[1 + MEMLAT], sh1	// tmp1 = w0 >> sh1
Packit Service 82fcde
(p[MEMLAT])	shl	tmp2[0] = r[0 + MEMLAT], sh2  	// tmp2 = w1 << sh2
Packit Service 82fcde
(p[MEMLAT+4])	cmp.ne	p6, p0 = q[MEMLAT + 4], val[1]
Packit Service 82fcde
(p[MEMLAT+3])	or	val[0] = tmp1[3], tmp2[3] 	// val = tmp1 | tmp2
Packit Service 82fcde
(p6)		br.cond.spnt .l2exit
Packit Service 82fcde
		br.ctop.sptk    .l2
Packit Service 82fcde
		br.cond.sptk .cmpfew
Packit Service 82fcde
.l3exit:
Packit Service 82fcde
	mux1	value1 = r[MEMLAT], @rev
Packit Service 82fcde
	mux1	value2 = q[MEMLAT], @rev
Packit Service 82fcde
	cmp.ne	p6, p0 = r0, r0	;;	// clear p6
Packit Service 82fcde
.l2exit:
Packit Service 82fcde
(p6)	mux1	value1 = val[1], @rev
Packit Service 82fcde
(p6)	mux1	value2 = q[MEMLAT + 4], @rev ;;
Packit Service 82fcde
	cmp.ltu	p6, p7 = value2, value1 ;;
Packit Service 82fcde
(p6)	mov	ret0 = -1
Packit Service 82fcde
(p7)	mov	ret0 = 1
Packit Service 82fcde
	mov     pr = saved_pr, -1    	// restore the predicate registers
Packit Service 82fcde
	mov 	ar.lc = saved_lc	// restore the loop counter
Packit Service 82fcde
	br.ret.sptk.many b0
Packit Service 82fcde
.src_aligned:
Packit Service 82fcde
	cmp.ne	p6, p0 = r0, r0		// clear p6
Packit Service 82fcde
	mov     ar.ec = MEMLAT + 1 ;;	// set EC
Packit Service 82fcde
.l3:
Packit Service 82fcde
(p[0])		ld8	r[0] = [src], 8
Packit Service 82fcde
(p[0])		ld8	q[0] = [dest], 8
Packit Service 82fcde
(p[MEMLAT])	cmp.ne	p6, p0 = r[MEMLAT], q[MEMLAT]
Packit Service 82fcde
(p6)		br.cond.spnt .l3exit
Packit Service 82fcde
		br.ctop.dptk .l3 ;;
Packit Service 82fcde
.cmpfew:
Packit Service 82fcde
	cmp.eq	p6, p0 = len, r0	// is len == 0 ?
Packit Service 82fcde
	adds	len = -1, len		// --len;
Packit Service 82fcde
(p6)	br.cond.spnt	.restore_and_exit ;;
Packit Service 82fcde
	mov	ar.lc = len
Packit Service 82fcde
.l4:
Packit Service 82fcde
	ld1	value1 = [src], 1
Packit Service 82fcde
	ld1	value2 = [dest], 1
Packit Service 82fcde
	;;
Packit Service 82fcde
	cmp.ne	p6, p0 = value1, value2
Packit Service 82fcde
(p6)	br.cond.spnt	.done
Packit Service 82fcde
	br.cloop.dptk	.l4 ;;
Packit Service 82fcde
.done:
Packit Service 82fcde
(p6)	sub	ret0 = value2, value1	// don't execute it if falling thru
Packit Service 82fcde
.restore_and_exit:
Packit Service 82fcde
	mov     pr = saved_pr, -1    	// restore the predicate registers
Packit Service 82fcde
	mov 	ar.lc = saved_lc	// restore the loop counter
Packit Service 82fcde
	br.ret.sptk.many b0
Packit Service 82fcde
END(memcmp)
Packit Service 82fcde
Packit Service 82fcde
weak_alias (memcmp, bcmp)
Packit Service 82fcde
libc_hidden_builtin_def (memcmp)