Blame sysdeps/s390/multiarch/wcsspn-vx.S

Packit Bot 0c2104
/* Vector optimized 32/64 bit S/390 version of wcsspn.
Packit Bot 0c2104
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit Bot 0c2104
   This file is part of the GNU C Library.
Packit Bot 0c2104
Packit Bot 0c2104
   The GNU C Library is free software; you can redistribute it and/or
Packit Bot 0c2104
   modify it under the terms of the GNU Lesser General Public
Packit Bot 0c2104
   License as published by the Free Software Foundation; either
Packit Bot 0c2104
   version 2.1 of the License, or (at your option) any later version.
Packit Bot 0c2104
Packit Bot 0c2104
   The GNU C Library is distributed in the hope that it will be useful,
Packit Bot 0c2104
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Bot 0c2104
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Bot 0c2104
   Lesser General Public License for more details.
Packit Bot 0c2104
Packit Bot 0c2104
   You should have received a copy of the GNU Lesser General Public
Packit Bot 0c2104
   License along with the GNU C Library; if not, see
Packit Bot 0c2104
   <http://www.gnu.org/licenses/>.  */
Packit Bot 0c2104
Packit Bot 0c2104
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
Packit Bot 0c2104
Packit Bot 0c2104
# include "sysdep.h"
Packit Bot 0c2104
# include "asm-syntax.h"
Packit Bot 0c2104
Packit Bot 0c2104
	.text
Packit Bot 0c2104
Packit Bot 0c2104
/* size_t wcsspn (const wchar_t *s, const wchar_t * accept)
Packit Bot 0c2104
   The wcsspn() function calculates the length of the initial segment
Packit Bot 0c2104
   of s which consists entirely of characters in accept.
Packit Bot 0c2104
Packit Bot 0c2104
   This method checks the length of accept string. If it fits entirely
Packit Bot 0c2104
   in one vector register, a fast algorithm is used, which does not need
Packit Bot 0c2104
   to check multiple parts of accept-string. Otherwise a slower full
Packit Bot 0c2104
   check of accept-string is used.
Packit Bot 0c2104
Packit Bot 0c2104
   register overview:
Packit Bot 0c2104
   r3:  pointer to start of accept-string
Packit Bot 0c2104
   r2:  pointer to start of search-string
Packit Bot 0c2104
   r4:  loaded byte count of vl search-string
Packit Bot 0c2104
   r0:  found byte index
Packit Bot 0c2104
   r1:  current return len of s
Packit Bot 0c2104
   v16: search-string
Packit Bot 0c2104
   v17: accept-string
Packit Bot 0c2104
   v18: temp-vreg
Packit Bot 0c2104
Packit Bot 0c2104
   ONLY FOR SLOW:
Packit Bot 0c2104
   v19: first accept-string
Packit Bot 0c2104
   v20: zero for preparing acc-vector
Packit Bot 0c2104
   v21: global mask; 1 indicates a match between
Packit Bot 0c2104
	search-string-vreg and any accept-character
Packit Bot 0c2104
   v22: current mask; 1 indicates a match between
Packit Bot 0c2104
	search-string-vreg and any accept-character in current acc-vreg
Packit Bot 0c2104
   v30, v31: for re-/storing registers r6, r8, r9
Packit Bot 0c2104
   r5:  current len of accept-string
Packit Bot 0c2104
   r6:	zero-index in search-string or 16 if no zero
Packit Bot 0c2104
	or min(zero-index, loaded byte count)
Packit Bot 0c2104
   r8:	>0, if former accept-string-part contains a zero,
Packit Bot 0c2104
	otherwise =0;
Packit Bot 0c2104
   r9: loaded byte count of vlbb accept-string
Packit Bot 0c2104
*/
Packit Bot 0c2104
ENTRY(__wcsspn_vx)
Packit Bot 0c2104
	.machine "z13"
Packit Bot 0c2104
	.machinemode "zarch_nohighgprs"
Packit Bot 0c2104
Packit Bot 0c2104
	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
Packit Bot 0c2104
	jne	.Lfallback	/* And use common-code variant if not.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/*
Packit Bot 0c2104
	  Check if accept-string fits in one vreg:
Packit Bot 0c2104
	  ----------------------------------------
Packit Bot 0c2104
	*/
Packit Bot 0c2104
	vlbb	%v17,0(%r3),6	/* Load accept.  */
Packit Bot 0c2104
	lcbb	%r4,0(%r3),6
Packit Bot 0c2104
	jo	.Lcheck_onbb	/* Special case if accept lays
Packit Bot 0c2104
				   on block-boundary.  */
Packit Bot 0c2104
.Lcheck_notonbb:
Packit Bot 0c2104
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Bot 0c2104
	je	.Lfast		/* Zero found -> accept fits in one vreg.  */
Packit Bot 0c2104
	j	.Lslow		/* No zero -> accept exceeds one vreg.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lcheck_onbb:
Packit Bot 0c2104
	/* Accept lays on block-boundary.  */
Packit Bot 0c2104
	nill	%r4,65532	/* Recognize only fully loaded characters.  */
Packit Bot 0c2104
	je	.Lcheck_onbb2	/* Reload vr if no full wchar_t.  */
Packit Bot 0c2104
	vfenezf	%v18,%v17,%v17	/* Search zero in loaded accept bytes.  */
Packit Bot 0c2104
	vlgvb	%r0,%v18,7	/* Get index of zero or 16 if not found.  */
Packit Bot 0c2104
	clrjl	%r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit Bot 0c2104
					    Accept fits in one vreg;
Packit Bot 0c2104
					    Fill with zeros and proceed
Packit Bot 0c2104
					    with FAST.  */
Packit Bot 0c2104
.Lcheck_onbb2:
Packit Bot 0c2104
	vl	%v17,0(%r3)	/* Load accept, which exceeds loaded bytes.  */
Packit Bot 0c2104
	j	.Lcheck_notonbb /* Check if accept fits in one vreg.  */
Packit Bot 0c2104
Packit Bot 0c2104
Packit Bot 0c2104
	/*
Packit Bot 0c2104
	  Search s for accept in one vreg
Packit Bot 0c2104
	  -------------------------------
Packit Bot 0c2104
	*/
Packit Bot 0c2104
.Lfast:
Packit Bot 0c2104
	/* Complete accept-string in v17 and remaining bytes are zero.  */
Packit Bot 0c2104
Packit Bot 0c2104
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit Bot 0c2104
	lcbb	%r1,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit Bot 0c2104
Packit Bot 0c2104
	vfaezfs	%v16,%v16,%v17,8 /* Find first element in v16
Packit Bot 0c2104
				    unequal to any in v17
Packit Bot 0c2104
				    or first zero element.  */
Packit Bot 0c2104
Packit Bot 0c2104
	vlgvb	%r0,%v16,7	/* Load byte index of found element.  */
Packit Bot 0c2104
	/* If found index is within loaded bytes (%r0 < %r1),
Packit Bot 0c2104
	   return with found element index (=equal count).  */
Packit Bot 0c2104
	clr	%r0,%r1
Packit Bot 0c2104
	srlg	%r0,%r0,2	/* Convert byte-count to character-count.  */
Packit Bot 0c2104
	locgrl	%r2,%r0
Packit Bot 0c2104
	blr	%r14
Packit Bot 0c2104
Packit Bot 0c2104
	/* Align s to 16 byte.  */
Packit Bot 0c2104
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit Bot 0c2104
	lghi	%r1,16		/* current_len = 16.  */
Packit Bot 0c2104
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lfast_loop:
Packit Bot 0c2104
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Bot 0c2104
	vfaezfs	%v16,%v16,%v17,8 /* Find first element in v16
Packit Bot 0c2104
				    unequal to any in v17
Packit Bot 0c2104
				    or first zero element.  */
Packit Bot 0c2104
	jno	.Lfast_loop_found
Packit Bot 0c2104
	vl	%v16,16(%r1,%r2)
Packit Bot 0c2104
	vfaezfs	%v16,%v16,%v17,8
Packit Bot 0c2104
	jno	.Lfast_loop_found16
Packit Bot 0c2104
	vl	%v16,32(%r1,%r2)
Packit Bot 0c2104
	vfaezfs	%v16,%v16,%v17,8
Packit Bot 0c2104
	jno	.Lfast_loop_found32
Packit Bot 0c2104
	vl	%v16,48(%r1,%r2)
Packit Bot 0c2104
	vfaezfs	%v16,%v16,%v17,8
Packit Bot 0c2104
	jno	.Lfast_loop_found48
Packit Bot 0c2104
Packit Bot 0c2104
	aghi	%r1,64
Packit Bot 0c2104
	j	.Lfast_loop	/* Loop if no element was unequal to accept
Packit Bot 0c2104
				   and not zero.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Found unequal or zero element.  */
Packit Bot 0c2104
.Lfast_loop_found48:
Packit Bot 0c2104
	aghi	%r1,16
Packit Bot 0c2104
.Lfast_loop_found32:
Packit Bot 0c2104
	aghi	%r1,16
Packit Bot 0c2104
.Lfast_loop_found16:
Packit Bot 0c2104
	aghi	%r1,16
Packit Bot 0c2104
.Lfast_loop_found:
Packit Bot 0c2104
	vlgvb	%r0,%v16,7	/* Load byte index of found element.  */
Packit Bot 0c2104
	algrk	%r2,%r1,%r0	/* And add it to current len.  */
Packit Bot 0c2104
	srlg	%r2,%r2,2	/* Convert byte-count to character-count.  */
Packit Bot 0c2104
	br	%r14
Packit Bot 0c2104
Packit Bot 0c2104
Packit Bot 0c2104
	/*
Packit Bot 0c2104
	  Search s for accept in multiple vregs
Packit Bot 0c2104
	  -------------------------------------
Packit Bot 0c2104
	*/
Packit Bot 0c2104
.Lslow:
Packit Bot 0c2104
	/* Save registers.  */
Packit Bot 0c2104
	vlvgg	%v30,%r6,0
Packit Bot 0c2104
	vlvgp	%v31,%r8,%r9
Packit Bot 0c2104
	lghi	%r1,0		/* Zero out current len.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* accept in v17 without zero.  */
Packit Bot 0c2104
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit Bot 0c2104
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit Bot 0c2104
Packit Bot 0c2104
	/* Align s to 16 byte.  */
Packit Bot 0c2104
	risbg	%r0,%r2,60,128+63,0 /* Test if s is aligned and
Packit Bot 0c2104
				     %r0 = bits 60-63 'and' 15.  */
Packit Bot 0c2104
	je	.Lslow_loop_str /* If s is aligned, loop aligned */
Packit Bot 0c2104
	lghi	%r4,15
Packit Bot 0c2104
	slr	%r4,%r0		/* Compute highest index to load (15-x).  */
Packit Bot 0c2104
	vll	%v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs
Packit Bot 0c2104
				   highest index, remaining bytes are 0).  */
Packit Bot 0c2104
	aghi	%r4,1		/* Work with loaded byte count.  */
Packit Bot 0c2104
	vzero	%v21		/* Zero out global mask.  */
Packit Bot 0c2104
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Bot 0c2104
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Bot 0c2104
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Bot 0c2104
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16
Packit Bot 0c2104
				   if there is no zero.  */
Packit Bot 0c2104
	clr	%r4,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit Bot 0c2104
	locrl	%r6,%r4		/* Load on cc==1.  */
Packit Bot 0c2104
	j	.Lslow_loop_acc
Packit Bot 0c2104
Packit Bot 0c2104
	/* Process s in 16byte aligned loop.  */
Packit Bot 0c2104
.Lslow_next_str:
Packit Bot 0c2104
	vlr	%v17,%v19	/* Load first part of accept (no zero).  */
Packit Bot 0c2104
	algfr	%r1,%r4		/* Add loaded byte count to current len.  */
Packit Bot 0c2104
.Lslow_loop_str:
Packit Bot 0c2104
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Bot 0c2104
	lghi	%r4,16		/* Loaded byte count is 16.  */
Packit Bot 0c2104
	vzero	%v21		/* Zero out global mask.  */
Packit Bot 0c2104
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Bot 0c2104
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Bot 0c2104
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Bot 0c2104
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lslow_loop_acc:
Packit Bot 0c2104
	vfaef	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit Bot 0c2104
				    character matches any accepted character in
Packit Bot 0c2104
				    this accept-string-part) IN=0, RT=1.  */
Packit Bot 0c2104
	vo	%v21,%v21,%v22	/* global-mask = global- | matching-mask.  */
Packit Bot 0c2104
	vfenezf	%v18,%v21,%v21	/* Find first zero in global-mask.  */
Packit Bot 0c2104
	vlgvb	%r0,%v18,7	/* Get first found zero-index
Packit Bot 0c2104
				   (= first mismatch).  */
Packit Bot 0c2104
	clrjl	%r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index)
Packit Bot 0c2104
					   -> Process this string-part
Packit Bot 0c2104
					      with next acc-part.  */
Packit Bot 0c2104
	clrjhe	%r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count
Packit Bot 0c2104
					   -> All loaded bytes are matching
Packit Bot 0c2104
					      any accept-character
Packit Bot 0c2104
					      and are not zero.  */
Packit Bot 0c2104
	/* All bytes are matching any characters in accept-string
Packit Bot 0c2104
	   and search-string is fully processed (found-index == zero-index).  */
Packit Bot 0c2104
.Lslow_add_lbc_end:
Packit Bot 0c2104
	algrk	%r2,%r1,%r0	/* Add matching characters to current len.  */
Packit Bot 0c2104
	srlg	%r2,%r2,2	/* Convert byte-count to character-count.  */
Packit Bot 0c2104
	/* Restore registers.  */
Packit Bot 0c2104
	vlgvg	%r6,%v30,0
Packit Bot 0c2104
	vlgvg	%r8,%v31,0
Packit Bot 0c2104
	vlgvg	%r9,%v31,1
Packit Bot 0c2104
	br	%r14
Packit Bot 0c2104
Packit Bot 0c2104
.Lslow_next_acc:
Packit Bot 0c2104
	clijh	%r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part
Packit Bot 0c2104
					    -> Add found index to current len
Packit Bot 0c2104
					       and end.  */
Packit Bot 0c2104
	vlbb	%v17,16(%r5,%r3),6 /* Load next accept part.  */
Packit Bot 0c2104
	aghi	%r5,16		/* Increment current len of accept-string.  */
Packit Bot 0c2104
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string.  */
Packit Bot 0c2104
	jo	.Lslow_next_acc_onbb /* Jump away if accept-string is
Packit Bot 0c2104
					on block-boundary.  */
Packit Bot 0c2104
.Lslow_next_acc_notonbb:
Packit Bot 0c2104
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Bot 0c2104
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lslow_next_acc_prepare_zero:
Packit Bot 0c2104
	/* Zero in accept-part: fill zeros with first-accept-character.  */
Packit Bot 0c2104
	vlgvf	%r8,%v17,0	/* Load first element of acc-part.  */
Packit Bot 0c2104
	clije	%r8,0,.Lslow_add_lbc_end /* End if zero is first character
Packit Bot 0c2104
					     in this part of accept-string.  */
Packit Bot 0c2104
	/* r8>0 -> zero found in this acc-part.  */
Packit Bot 0c2104
	vrepf	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit Bot 0c2104
	vceqf	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit Bot 0c2104
				   by comparing with 0 (v20).  */
Packit Bot 0c2104
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit Bot 0c2104
	j	.Lslow_loop_acc /* Accept part is prepared -> process.  */
Packit Bot 0c2104
Packit Bot 0c2104
.Lslow_next_acc_onbb:
Packit Bot 0c2104
	nill	%r9,65532	/* Recognize only fully loaded characters.  */
Packit Bot 0c2104
	je	.Lslow_next_acc_onbb2 /* Reload vr, if we loaded no full
Packit Bot 0c2104
					  wchar_t.  */
Packit Bot 0c2104
	vfenezf	%v18,%v17,%v17	/* Find zero in loaded bytes of accept part.  */
Packit Bot 0c2104
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit Bot 0c2104
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit Bot 0c2104
						   -> Prepare vreg.  */
Packit Bot 0c2104
.Lslow_next_acc_onbb2:
Packit Bot 0c2104
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit Bot 0c2104
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit Bot 0c2104
				   check for zero is in jump-target.  */
Packit Bot 0c2104
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit Bot 0c2104
					   fully loaded vreg again.  */
Packit Bot 0c2104
.Lfallback:
Packit Bot 0c2104
	jg	__wcsspn_c
Packit Bot 0c2104
END(__wcsspn_vx)
Packit Bot 0c2104
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */