Blame sysdeps/s390/multiarch/strrchr-vx.S

Packit Bot 0c2104
/* Vector optimized 32/64 bit S/390 version of strrchr.
Packit Bot 0c2104
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit Bot 0c2104
   This file is part of the GNU C Library.
Packit Bot 0c2104
Packit Bot 0c2104
   The GNU C Library is free software; you can redistribute it and/or
Packit Bot 0c2104
   modify it under the terms of the GNU Lesser General Public
Packit Bot 0c2104
   License as published by the Free Software Foundation; either
Packit Bot 0c2104
   version 2.1 of the License, or (at your option) any later version.
Packit Bot 0c2104
Packit Bot 0c2104
   The GNU C Library is distributed in the hope that it will be useful,
Packit Bot 0c2104
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Bot 0c2104
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Bot 0c2104
   Lesser General Public License for more details.
Packit Bot 0c2104
Packit Bot 0c2104
   You should have received a copy of the GNU Lesser General Public
Packit Bot 0c2104
   License along with the GNU C Library; if not, see
Packit Bot 0c2104
   <http://www.gnu.org/licenses/>.  */
Packit Bot 0c2104
Packit Bot 0c2104
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
Packit Bot 0c2104
Packit Bot 0c2104
# include "sysdep.h"
Packit Bot 0c2104
# include "asm-syntax.h"
Packit Bot 0c2104
Packit Bot 0c2104
	.text
Packit Bot 0c2104
Packit Bot 0c2104
/* char *strrchr (const char *s, int c)
Packit Bot 0c2104
   Locate the last character c in string.
Packit Bot 0c2104
Packit Bot 0c2104
   Register usage:
Packit Bot 0c2104
   -r0=loaded bytes in first part of s.
Packit Bot 0c2104
   -r1=pointer to last occurence of c or NULL if not found.
Packit Bot 0c2104
   -r2=s
Packit Bot 0c2104
   -r3=c
Packit Bot 0c2104
   -r4=tmp
Packit Bot 0c2104
   -r5=current_len
Packit Bot 0c2104
   -v16=part of s
Packit Bot 0c2104
   -v17=index of found element
Packit Bot 0c2104
   -v18=replicated c
Packit Bot 0c2104
   -v19=part of s with last occurence of c.
Packit Bot 0c2104
   -v20=permute pattern
Packit Bot 0c2104
*/
Packit Bot 0c2104
ENTRY(__strrchr_vx)
Packit Bot 0c2104
	.machine "z13"
Packit Bot 0c2104
	.machinemode "zarch_nohighgprs"
Packit Bot 0c2104
Packit Bot 0c2104
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit Bot 0c2104
	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit Bot 0c2104
Packit Bot 0c2104
	vlvgb	%v18,%r3,0	/* Generate vector which elements are all c.
Packit Bot 0c2104
				   if c > 255, c will be truncated.  */
Packit Bot 0c2104
	vrepb	%v18,%v18,0
Packit Bot 0c2104
Packit Bot 0c2104
	lghi	%r1,-1		/* Currently no c found.  */
Packit Bot 0c2104
	lghi	%r5,0		/* current_len = 0.  */
Packit Bot 0c2104
Packit Bot 0c2104
	vfeezbs	%v17,%v16,%v18	/* Find element equal or zero.  */
Packit Bot 0c2104
	vlgvb	%r4,%v17,7	/* Load byte index of c/zero or 16.  */
Packit Bot 0c2104
	clrjl	%r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes.  */
Packit Bot 0c2104
.Lalign:
Packit Bot 0c2104
	/* Align s to 16 byte.  */
Packit Bot 0c2104
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit Bot 0c2104
	lghi	%r5,16		/* current_len = 16.  */
Packit Bot 0c2104
	slr	%r5,%r4		/* Compute bytes to 16bytes boundary.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lloop:
Packit Bot 0c2104
	vl	%v16,0(%r5,%r2) /* Load s.  */
Packit Bot 0c2104
	vfeezbs	%v17,%v16,%v18	/* Find element equal with zero search.  */
Packit Bot 0c2104
	jno	.Lfound		/* Found c/zero (cc=0|1|2).  */
Packit Bot 0c2104
	vl	%v16,16(%r5,%r2)
Packit Bot 0c2104
	vfeezbs	%v17,%v16,%v18
Packit Bot 0c2104
	jno	.Lfound16
Packit Bot 0c2104
	vl	%v16,32(%r5,%r2)
Packit Bot 0c2104
	vfeezbs	%v17,%v16,%v18
Packit Bot 0c2104
	jno	.Lfound32
Packit Bot 0c2104
	vl	%v16,48(%r5,%r2)
Packit Bot 0c2104
	vfeezbs	%v17,%v16,%v18
Packit Bot 0c2104
	jno	.Lfound48
Packit Bot 0c2104
Packit Bot 0c2104
	aghi	%r5,64
Packit Bot 0c2104
	j	.Lloop		/* No character and no zero -> loop.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lfound48:
Packit Bot 0c2104
	la	%r5,16(%r5)	/* Use la since aghi would clobber cc.  */
Packit Bot 0c2104
.Lfound32:
Packit Bot 0c2104
	la	%r5,16(%r5)
Packit Bot 0c2104
.Lfound16:
Packit Bot 0c2104
	la	%r5,16(%r5)
Packit Bot 0c2104
.Lfound:
Packit Bot 0c2104
	je	.Lzero		/* Found zero, but no c before that zero.  */
Packit Bot 0c2104
	/* Save this part of s to check for further matches after reaching
Packit Bot 0c2104
	   the end of the complete string.  */
Packit Bot 0c2104
	vlr	%v19,%v16
Packit Bot 0c2104
	lgr	%r1,%r5
Packit Bot 0c2104
Packit Bot 0c2104
	jh	.Lzero		/* Found a zero after the found c.  */
Packit Bot 0c2104
	aghi	%r5,16		/* Start search of next part of s.  */
Packit Bot 0c2104
	j	.Lloop
Packit Bot 0c2104
Packit Bot 0c2104
.Lfound_first_part:
Packit Bot 0c2104
	/* This code is only executed if the found c/zero is whithin loaded
Packit Bot 0c2104
	   bytes. If no c/zero was found (cc==3) the found index = 16, thus
Packit Bot 0c2104
	   this code is not called.
Packit Bot 0c2104
	   Resulting condition code of vector find element equal:
Packit Bot 0c2104
	   cc==0: no c, found zero
Packit Bot 0c2104
	   cc==1: c found, no zero
Packit Bot 0c2104
	   cc==2: c found, found zero after c
Packit Bot 0c2104
	   cc==3: no c, no zero (this case can be ignored).  */
Packit Bot 0c2104
	je	.Lzero		/* Found zero, but no c before that zero.  */
Packit Bot 0c2104
Packit Bot 0c2104
	locgrne	%r1,%r5		/* Mark c as found in first part of s.  */
Packit Bot 0c2104
	vlr	%v19,%v16
Packit Bot 0c2104
Packit Bot 0c2104
	jl	.Lalign		/* No zero (e.g. if vr was fully loaded)
Packit Bot 0c2104
				   -> Align and loop afterwards.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Found a zero in vr. If vr was not fully loaded due to block
Packit Bot 0c2104
	   boundary, the remaining bytes are filled with zero and we can't
Packit Bot 0c2104
	   rely on zero indication of condition code here!  */
Packit Bot 0c2104
Packit Bot 0c2104
	vfenezb	%v17,%v16,%v16	/* Find zero.  */
Packit Bot 0c2104
	vlgvb	%r4,%v17,7	/* Load byte index of zero or 16.  */
Packit Bot 0c2104
	clrjl	%r4,%r0,.Lzero	/* Zero within loaded bytes -> end.  */
Packit Bot 0c2104
	j	.Lalign		/* Align and loop afterwards.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lend_searched_zero:
Packit Bot 0c2104
	vlgvb	%r4,%v17,7	/* Load byte index of zero.  */
Packit Bot 0c2104
	algr	%r5,%r4
Packit Bot 0c2104
	la	%r2,0(%r5,%r2)	/* Return pointer to zero.  */
Packit Bot 0c2104
	br	%r14
Packit Bot 0c2104
Packit Bot 0c2104
.Lzero:
Packit Bot 0c2104
	/* Reached end of string. Check if one c was found before.  */
Packit Bot 0c2104
	clije	%r3,0,.Lend_searched_zero /* Found zero and c is zero.  */
Packit Bot 0c2104
Packit Bot 0c2104
	cgfi	%r1,-1		/* No c found -> return NULL.  */
Packit Bot 0c2104
	locghie	%r2,0
Packit Bot 0c2104
	ber	%r14
Packit Bot 0c2104
Packit Bot 0c2104
	larl	%r3,.Lpermute_mask /* Load permute mask.  */
Packit Bot 0c2104
	vl	%v20,0(%r3)
Packit Bot 0c2104
Packit Bot 0c2104
	/* c was found and is part of v19.  */
Packit Bot 0c2104
	vfenezb	%v17,%v19,%v19	/* Find zero.  */
Packit Bot 0c2104
	vlgvb	%r4,%v17,7	/* Load byte index of zero or 16.  */
Packit Bot 0c2104
Packit Bot 0c2104
	clgfi	%r5,0		/* Loaded byte count in v19 is 16, ...  */
Packit Bot 0c2104
	lochine	%r0,16		/* ... if v19 is not the first part of s.  */
Packit Bot 0c2104
	ahi	%r0,-1		/* Convert byte count to highest index.  */
Packit Bot 0c2104
Packit Bot 0c2104
	clr	%r0,%r4
Packit Bot 0c2104
	locrl	%r4,%r0		/* r4 = min (zero-index, highest-index).  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Right-shift of v19 to mask bytes after zero.  */
Packit Bot 0c2104
	clije	%r4,15,.Lzero_permute /* No shift is needed if highest index
Packit Bot 0c2104
					 in vr is 15.  */
Packit Bot 0c2104
	lhi	%r0,15
Packit Bot 0c2104
	slr	%r0,%r4		/* Compute byte count for vector shift right.  */
Packit Bot 0c2104
	sll	%r0,3		/* Convert to bit count.  */
Packit Bot 0c2104
	vlvgb	%v17,%r0,7
Packit Bot 0c2104
	vsrlb	%v19,%v19,%v17	/* Vector shift right by byte by number of bytes
Packit Bot 0c2104
				   specified in bits 1-4 of byte 7 in v17.   */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Reverse bytes in v19.  */
Packit Bot 0c2104
.Lzero_permute:
Packit Bot 0c2104
	vperm	%v19,%v19,%v19,%v20 /* Permute v19 to reversed order.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Find c in reversed v19.  */
Packit Bot 0c2104
	vfeeb	%v19,%v19,%v18	/* Find c.  */
Packit Bot 0c2104
	la	%r2,0(%r1,%r2)
Packit Bot 0c2104
	vlgvb	%r3,%v19,7	/* Load byte index of c.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Compute index in real s and return.  */
Packit Bot 0c2104
	slgr	%r4,%r3
Packit Bot 0c2104
	la	%r2,0(%r4,%r2)	/* Return pointer to zero.  */
Packit Bot 0c2104
	br	%r14
Packit Bot 0c2104
.Lpermute_mask:
Packit Bot 0c2104
	.byte	0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08
Packit Bot 0c2104
	.byte	0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00
Packit Bot 0c2104
END(__strrchr_vx)
Packit Bot 0c2104
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */