Blame sysdeps/s390/multiarch/wcscspn-vx.S

Packit Service 1c5418
/* Vector optimized 32/64 bit S/390 version of wcscspn.
Packit Service 2a648e
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit Service 2a648e
   This file is part of the GNU C Library.
Packit Service 2a648e
Packit Service 2a648e
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 2a648e
   modify it under the terms of the GNU Lesser General Public
Packit Service 2a648e
   License as published by the Free Software Foundation; either
Packit Service 2a648e
   version 2.1 of the License, or (at your option) any later version.
Packit Service 2a648e
Packit Service 2a648e
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 2a648e
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 2a648e
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 2a648e
   Lesser General Public License for more details.
Packit Service 2a648e
Packit Service 2a648e
   You should have received a copy of the GNU Lesser General Public
Packit Service 2a648e
   License along with the GNU C Library; if not, see
Packit Service 2a648e
   <http://www.gnu.org/licenses/>.  */
Packit Service 2a648e
Packit Service 1c5418
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
Packit Service 2a648e
Packit Service 2a648e
# include "sysdep.h"
Packit Service 2a648e
# include "asm-syntax.h"
Packit Service 2a648e
Packit Service 2a648e
	.text
Packit Service 2a648e
Packit Service 1c5418
/* size_t wcscspn (const wchar_t *s, const wchar_t * reject)
Packit Service 1c5418
   The wcscspn() function calculates the length of the initial segment
Packit Service 2a648e
   of s which consists entirely of characters not in reject.
Packit Service 2a648e
Packit Service 2a648e
   This method checks the length of reject string. If it fits entirely
Packit Service 2a648e
   in one vector register, a fast algorithm is used, which does not need
Packit Service 2a648e
   to check multiple parts of accept-string. Otherwise a slower full
Packit Service 2a648e
   check of accept-string is used.
Packit Service 2a648e
Packit Service 2a648e
   register overview:
Packit Service 2a648e
   r3:  pointer to start of reject-string
Packit Service 2a648e
   r2:  pointer to start of search-string
Packit Service 2a648e
   r0:  loaded byte count of vlbb search-string
Packit Service 2a648e
   r4:  found byte index
Packit Service 2a648e
   r1:  current return len
Packit Service 2a648e
   v16: search-string
Packit Service 2a648e
   v17: reject-string
Packit Service 2a648e
   v18: temp-vreg
Packit Service 2a648e
Packit Service 2a648e
   ONLY FOR SLOW:
Packit Service 2a648e
   v19: first reject-string
Packit Service 2a648e
   v20: zero for preparing acc-vector
Packit Service 2a648e
   v21: global mask; 1 indicates a match between
Packit Service 2a648e
	search-string-vreg and any reject-character
Packit Service 2a648e
   v22: current mask; 1 indicates a match between
Packit Service 2a648e
	search-string-vreg and any reject-character in current acc-vreg
Packit Service 2a648e
   v30, v31: for re-/storing registers r6, r8, r9
Packit Service 2a648e
   r5:  current len of reject-string
Packit Service 2a648e
   r6:  zero-index in search-string or 16 if no zero
Packit Service 2a648e
	or min(zero-index, loaded byte count)
Packit Service 2a648e
   r8:  >0, if former reject-string-part contains a zero,
Packit Service 2a648e
			otherwise =0;
Packit Service 2a648e
   r9:  loaded byte count of vlbb reject-string
Packit Service 2a648e
*/
Packit Service 1c5418
ENTRY(__wcscspn_vx)
Packit Service 2a648e
	.machine "z13"
Packit Service 2a648e
	.machinemode "zarch_nohighgprs"
Packit Service 2a648e
Packit Service 1c5418
	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
Packit Service 1c5418
	jne	.Lfallback	/* And use common-code variant if not.  */
Packit Service 1c5418
Packit Service 2a648e
	/*
Packit Service 2a648e
	  Check if reject-string fits in one vreg:
Packit Service 2a648e
	  ----------------------------------------
Packit Service 2a648e
	*/
Packit Service 1c5418
	vlbb	%v17,0(%r3),0	/* Load reject.  */
Packit Service 1c5418
	lcbb	%r0,0(%r3),0
Packit Service 2a648e
	jo	.Lcheck_onbb	/* Special case if reject
Packit Service 2a648e
				   lays on block-boundary.  */
Packit Service 1c5418
Packit Service 2a648e
.Lcheck_notonbb:
Packit Service 1c5418
	lghi	%r1,0		/* Zero out current len.  */
Packit Service 1c5418
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 2a648e
	je	.Lfast		/* Zero found -> reject fits in one vreg.  */
Packit Service 2a648e
	j	.Lslow		/* No zero -> reject exceeds one vreg.  */
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
.Lcheck_onbb:
Packit Service 2a648e
	/* Reject lays on block-boundary.  */
Packit Service 1c5418
	nill	%r0,65532	/* Recognize only fully loaded characters.  */
Packit Service 1c5418
	je	.Lcheck_onbb2	/* Reload vr, if we loaded no full wchar_t.  */
Packit Service 1c5418
	vfenezf	%v18,%v17,%v17	/* Search zero in loaded reject bytes.  */
Packit Service 2a648e
	vlgvb	%r4,%v18,7	/* Get index of zero or 16 if not found.  */
Packit Service 2a648e
	clrjl	%r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit Service 2a648e
					    Reject fits in one vreg;
Packit Service 2a648e
					    Fill with zeros and proceed
Packit Service 2a648e
					    with FAST.  */
Packit Service 1c5418
.Lcheck_onbb2:
Packit Service 2a648e
	vl	%v17,0(%r3)	/* Load reject, which exceeds loaded bytes.  */
Packit Service 2a648e
	j	.Lcheck_notonbb /* Check if reject fits in one vreg.  */
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
	/*
Packit Service 2a648e
	  Search s for reject in one vreg
Packit Service 2a648e
	  -------------------------------
Packit Service 2a648e
	*/
Packit Service 2a648e
.Lfast:
Packit Service 2a648e
	/* Complete reject-string in v17 and remaining bytes are zero.  */
Packit Service 2a648e
Packit Service 2a648e
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit Service 2a648e
	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit Service 2a648e
Packit Service 1c5418
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16
Packit Service 2a648e
				    unequal to any in v17
Packit Service 2a648e
				    or first zero element.  */
Packit Service 2a648e
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit Service 2a648e
	clrjl	%r4,%r0,.Lfast_loop_found2 /* If found index is within loaded
Packit Service 2a648e
					       bytes, return with found element
Packit Service 2a648e
					       index (=equal count).  */
Packit Service 2a648e
Packit Service 2a648e
	/* Align s to 16 byte.  */
Packit Service 2a648e
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit Service 2a648e
	lghi	%r1,16		/* current_len = 16.  */
Packit Service 2a648e
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit Service 2a648e
Packit Service 2a648e
	/* Process s in 16byte aligned loop.  */
Packit Service 2a648e
.Lfast_loop:
Packit Service 2a648e
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service 1c5418
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16 equal to any
Packit Service 2a648e
				    in v17 or first zero element.  */
Packit Service 2a648e
	jno	.Lfast_loop_found
Packit Service 2a648e
Packit Service 2a648e
	vl	%v16,16(%r1,%r2)
Packit Service 1c5418
	vfaezfs	%v18,%v16,%v17,0
Packit Service 2a648e
	jno	.Lfast_loop_found16
Packit Service 2a648e
Packit Service 2a648e
	vl	%v16,32(%r1,%r2)
Packit Service 1c5418
	vfaezfs	%v18,%v16,%v17,0
Packit Service 2a648e
	jno	.Lfast_loop_found32
Packit Service 2a648e
Packit Service 2a648e
	vl	%v16,48(%r1,%r2)
Packit Service 1c5418
	vfaezfs	%v18,%v16,%v17,0
Packit Service 2a648e
	jno	.Lfast_loop_found48
Packit Service 2a648e
Packit Service 2a648e
	aghi	%r1,64
Packit Service 2a648e
	j	.Lfast_loop	/* Loop if no element was unequal to reject
Packit Service 2a648e
				   and not zero.  */
Packit Service 2a648e
Packit Service 2a648e
	/* Found equal or zero element.  */
Packit Service 2a648e
.Lfast_loop_found48:
Packit Service 2a648e
	aghi	%r1,16
Packit Service 2a648e
.Lfast_loop_found32:
Packit Service 2a648e
	aghi	%r1,16
Packit Service 2a648e
.Lfast_loop_found16:
Packit Service 2a648e
	aghi	%r1,16
Packit Service 2a648e
.Lfast_loop_found:
Packit Service 2a648e
	vlgvb	%r4,%v18,7	/* Load byte index of found element or zero.  */
Packit Service 2a648e
.Lfast_loop_found2:
Packit Service 2a648e
	algrk	%r2,%r1,%r4	/* Add found index to current len.  */
Packit Service 1c5418
	srlg	%r2,%r2,2	/* Convert byte-count to character-count.  */
Packit Service 2a648e
	br	%r14
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
	/*
Packit Service 2a648e
	  Search s for reject in multiple vregs
Packit Service 2a648e
	  -------------------------------------
Packit Service 2a648e
	*/
Packit Service 2a648e
.Lslow:
Packit Service 2a648e
	/* Save registers.  */
Packit Service 2a648e
	vlvgg	%v30,%r6,0
Packit Service 2a648e
	vlvgp	%v31,%r8,%r9
Packit Service 2a648e
Packit Service 2a648e
	/* Reject in v17 without zero.  */
Packit Service 2a648e
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit Service 2a648e
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit Service 2a648e
	vone	%v24		/* One for checking result of former
Packit Service 2a648e
				   string-part.  */
Packit Service 2a648e
Packit Service 2a648e
	/* Align s to 16 byte.  */
Packit Service 2a648e
	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
Packit Service 2a648e
				       %r4 = bits 60-63 'and' 15.   */
Packit Service 2a648e
	je	.Lslow_loop_str /* If s is aligned, loop aligned.  */
Packit Service 2a648e
	lghi	%r0,15
Packit Service 2a648e
	slr	%r0,%r4		/* Compute highest index to load (15-x).  */
Packit Service 1c5418
	vll	%v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs
Packit Service 2a648e
				   highest index, remaining bytes are 0).  */
Packit Service 2a648e
	ahi	%r0,1		/* Work with loaded byte count.  */
Packit Service 2a648e
	vzero	%v21		/* Zero out global mask.  */
Packit Service 2a648e
	lghi	%r5,0		/* Set current len of reject-string to zero.  */
Packit Service 1c5418
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 2a648e
	lghi	%r8,0		/* There is no zero in first reject-part.  */
Packit Service 2a648e
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Service 2a648e
	clije	%r6,0,.Lslow_end /* If first element is zero -> return 0.  */
Packit Service 2a648e
	clr	%r0,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit Service 2a648e
	locrl	%r6,%r0		/* Load on cc==1; zero-index = lbc.  */
Packit Service 2a648e
	j	.Lslow_loop_acc
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
	/* Process s in 16byte aligned loop.  */
Packit Service 2a648e
.Lslow_next_str:
Packit Service 2a648e
	/* Check results of former processed str-part.  */
Packit Service 1c5418
	vfeef	%v18,%v21,%v24	/* Find first equal match in global mask
Packit Service 2a648e
				   (ones in element).  */
Packit Service 2a648e
	vlgvb	%r4,%v18,7	/* Get index of first one (=equal) or 16.  */
Packit Service 2a648e
	/* Equal-index < min(zero-index, loaded byte count)
Packit Service 2a648e
	   -> Return pointer to equal element.  */
Packit Service 2a648e
	clrjl	%r4,%r6,.Lslow_index_found
Packit Service 2a648e
	/* Zero-index < loaded byte count
Packit Service 2a648e
	   -> Former str-part was last str-part
Packit Service 1c5418
	   -> Return null  */
Packit Service 2a648e
	clrjl	%r6,%r0,.Lslow_end_not_found
Packit Service 2a648e
Packit Service 1c5418
	/* All elements are zero (=no match) -> proceed with next str-part.  */
Packit Service 2a648e
	vlr	%v17,%v19	/* Load first part of reject (no zero).  */
Packit Service 2a648e
	algfr	%r1,%r0		/* Add loaded byte count to current len.  */
Packit Service 2a648e
Packit Service 2a648e
.Lslow_loop_str:
Packit Service 2a648e
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service 2a648e
	lghi	%r0,16		/* Loaded byte count is 16.  */
Packit Service 2a648e
	vzero	%v21		/* Zero out global mask.  */
Packit Service 2a648e
	lghi	%r5,0		/* Set current len of reject to zero.  */
Packit Service 1c5418
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 2a648e
	lghi	%r8,0		/* There is no zero in first reject-part.  */
Packit Service 2a648e
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Service 2a648e
	clije	%r6,0,.Lslow_end /* If first element is zero (end of string)
Packit Service 2a648e
				     -> Return current length.  */
Packit Service 2a648e
Packit Service 2a648e
.Lslow_loop_acc:
Packit Service 1c5418
	vfaef	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit Service 2a648e
				    Character matches any rejected character in
Packit Service 2a648e
				    this reject-string-part) IN=0, RT=1.  */
Packit Service 1c5418
	vlgvf	%r4,%v22,0	/* Get result of first element.  */
Packit Service 2a648e
	/* First element is equal to any rejected characters?
Packit Service 1c5418
	   (All other parts of reject cannot lead to a match before this one)
Packit Service 2a648e
	   -> Return current len, which is pointing to this element.  */
Packit Service 2a648e
	clijh	%r4,0,.Lslow_end
Packit Service 2a648e
	vo	%v21,%v21,%v22	/* Global-mask = global-|matching-mask.  */
Packit Service 2a648e
	/* Proceed with next acc until end of acc is reached.  */
Packit Service 2a648e
Packit Service 2a648e
Packit Service 2a648e
.Lslow_next_acc:
Packit Service 2a648e
	clijh	%r8,0,.Lslow_next_str /* There was a zero in last reject-part
Packit Service 2a648e
					  -> Add found index to current len
Packit Service 2a648e
					     and end.  */
Packit Service 2a648e
	vlbb	%v17,16(%r5,%r3),6 /* Load next reject part.  */
Packit Service 2a648e
	aghi	%r5,16		/* Increment current len of reject-string.  */
Packit Service 2a648e
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string.  */
Packit Service 2a648e
	jo	.Lslow_next_acc_onbb /* Jump away if reject-string is
Packit Service 2a648e
					 on block-boundary.  */
Packit Service 2a648e
.Lslow_next_acc_notonbb:
Packit Service 1c5418
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 2a648e
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit Service 2a648e
Packit Service 2a648e
.Lslow_next_acc_prepare_zero:
Packit Service 2a648e
	/* Zero in reject-part: fill zeros with first-reject-character.  */
Packit Service 1c5418
	vlgvf	%r8,%v17,0	/* Load first element of reject-part.  */
Packit Service 2a648e
	clije	%r8,0,.Lslow_next_str /* Process next str-part if first
Packit Service 2a648e
					  character in this part of reject
Packit Service 2a648e
					  is a zero.  */
Packit Service 2a648e
	/* r8>0 -> zero found in this acc-part.  */
Packit Service 1c5418
	vrepf	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit Service 1c5418
	vceqf	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit Service 2a648e
				   by comparing with 0 (v20).  */
Packit Service 2a648e
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit Service 2a648e
	j	.Lslow_loop_acc /* Reject-string part is prepared.  */
Packit Service 2a648e
Packit Service 2a648e
.Lslow_next_acc_onbb:
Packit Service 1c5418
	nill	%r9,65532	/* Recognize only fully loaded characters.  */
Packit Service 1c5418
	je	.Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t
Packit Service 1c5418
					  loaded.  */
Packit Service 1c5418
	vfenezf	%v18,%v17,%v17	/* Find zero in loaded bytes of reject part.  */
Packit Service 2a648e
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit Service 2a648e
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit Service 2a648e
						    -> Prepare vreg.  */
Packit Service 1c5418
.Lslow_next_acc_onbb2:
Packit Service 2a648e
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit Service 2a648e
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit Service 2a648e
				   check for zero is in jump-target.  */
Packit Service 2a648e
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit Service 2a648e
					    fully loaded vreg again.  */
Packit Service 2a648e
Packit Service 2a648e
.Lslow_end_not_found:
Packit Service 2a648e
	algfr	%r1,%r6		/* Add zero-index to current len.  */
Packit Service 2a648e
	j	.Lslow_end
Packit Service 2a648e
.Lslow_index_found:
Packit Service 2a648e
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit Service 2a648e
.Lslow_end:
Packit Service 1c5418
	srlg	%r2,%r1,2	/* Convert byte-count to character-count.  */
Packit Service 2a648e
	/* Restore registers.  */
Packit Service 2a648e
	vlgvg	%r6,%v30,0
Packit Service 2a648e
	vlgvg	%r8,%v31,0
Packit Service 2a648e
	vlgvg	%r9,%v31,1
Packit Service 2a648e
	br	%r14
Packit Service 1c5418
.Lfallback:
Packit Service 1c5418
	jg	__wcscspn_c
Packit Service 1c5418
END(__wcscspn_vx)
Packit Service 1c5418
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */