Blame sysdeps/s390/multiarch/wcsspn-vx.S

Packit Service 6de65a
/* Vector optimized 32/64 bit S/390 version of wcsspn.
Packit Service 6de65a
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit Service 6de65a
   This file is part of the GNU C Library.
Packit Service 6de65a
Packit Service 6de65a
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 6de65a
   modify it under the terms of the GNU Lesser General Public
Packit Service 6de65a
   License as published by the Free Software Foundation; either
Packit Service 6de65a
   version 2.1 of the License, or (at your option) any later version.
Packit Service 6de65a
Packit Service 6de65a
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 6de65a
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 6de65a
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 6de65a
   Lesser General Public License for more details.
Packit Service 6de65a
Packit Service 6de65a
   You should have received a copy of the GNU Lesser General Public
Packit Service 6de65a
   License along with the GNU C Library; if not, see
Packit Service 6de65a
   <http://www.gnu.org/licenses/>.  */
Packit Service 6de65a
Packit Service 6de65a
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
Packit Service 6de65a
Packit Service 6de65a
# include "sysdep.h"
Packit Service 6de65a
# include "asm-syntax.h"
Packit Service 6de65a
Packit Service 6de65a
	.text
Packit Service 6de65a
Packit Service 6de65a
/* size_t wcsspn (const wchar_t *s, const wchar_t * accept)
Packit Service 6de65a
   The wcsspn() function calculates the length of the initial segment
Packit Service 6de65a
   of s which consists entirely of characters in accept.
Packit Service 6de65a
Packit Service 6de65a
   This method checks the length of accept string. If it fits entirely
Packit Service 6de65a
   in one vector register, a fast algorithm is used, which does not need
Packit Service 6de65a
   to check multiple parts of accept-string. Otherwise a slower full
Packit Service 6de65a
   check of accept-string is used.
Packit Service 6de65a
Packit Service 6de65a
   register overview:
Packit Service 6de65a
   r3:  pointer to start of accept-string
Packit Service 6de65a
   r2:  pointer to start of search-string
Packit Service 6de65a
   r4:  loaded byte count of vl search-string
Packit Service 6de65a
   r0:  found byte index
Packit Service 6de65a
   r1:  current return len of s
Packit Service 6de65a
   v16: search-string
Packit Service 6de65a
   v17: accept-string
Packit Service 6de65a
   v18: temp-vreg
Packit Service 6de65a
Packit Service 6de65a
   ONLY FOR SLOW:
Packit Service 6de65a
   v19: first accept-string
Packit Service 6de65a
   v20: zero for preparing acc-vector
Packit Service 6de65a
   v21: global mask; 1 indicates a match between
Packit Service 6de65a
	search-string-vreg and any accept-character
Packit Service 6de65a
   v22: current mask; 1 indicates a match between
Packit Service 6de65a
	search-string-vreg and any accept-character in current acc-vreg
Packit Service 6de65a
   v30, v31: for re-/storing registers r6, r8, r9
Packit Service 6de65a
   r5:  current len of accept-string
Packit Service 6de65a
   r6:	zero-index in search-string or 16 if no zero
Packit Service 6de65a
	or min(zero-index, loaded byte count)
Packit Service 6de65a
   r8:	>0, if former accept-string-part contains a zero,
Packit Service 6de65a
	otherwise =0;
Packit Service 6de65a
   r9: loaded byte count of vlbb accept-string
Packit Service 6de65a
*/
Packit Service 6de65a
ENTRY(__wcsspn_vx)
Packit Service 6de65a
	.machine "z13"
Packit Service 6de65a
	.machinemode "zarch_nohighgprs"
Packit Service 6de65a
Packit Service 6de65a
	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
Packit Service 6de65a
	jne	.Lfallback	/* And use common-code variant if not.  */
Packit Service 6de65a
Packit Service 6de65a
	/*
Packit Service 6de65a
	  Check if accept-string fits in one vreg:
Packit Service 6de65a
	  ----------------------------------------
Packit Service 6de65a
	*/
Packit Service 6de65a
	vlbb	%v17,0(%r3),6	/* Load accept.  */
Packit Service 6de65a
	lcbb	%r4,0(%r3),6
Packit Service 6de65a
	jo	.Lcheck_onbb	/* Special case if accept lays
Packit Service 6de65a
				   on block-boundary.  */
Packit Service 6de65a
.Lcheck_notonbb:
Packit Service 6de65a
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 6de65a
	je	.Lfast		/* Zero found -> accept fits in one vreg.  */
Packit Service 6de65a
	j	.Lslow		/* No zero -> accept exceeds one vreg.  */
Packit Service 6de65a
Packit Service 6de65a
.Lcheck_onbb:
Packit Service 6de65a
	/* Accept lays on block-boundary.  */
Packit Service 6de65a
	nill	%r4,65532	/* Recognize only fully loaded characters.  */
Packit Service 6de65a
	je	.Lcheck_onbb2	/* Reload vr if no full wchar_t.  */
Packit Service 6de65a
	vfenezf	%v18,%v17,%v17	/* Search zero in loaded accept bytes.  */
Packit Service 6de65a
	vlgvb	%r0,%v18,7	/* Get index of zero or 16 if not found.  */
Packit Service 6de65a
	clrjl	%r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit Service 6de65a
					    Accept fits in one vreg;
Packit Service 6de65a
					    Fill with zeros and proceed
Packit Service 6de65a
					    with FAST.  */
Packit Service 6de65a
.Lcheck_onbb2:
Packit Service 6de65a
	vl	%v17,0(%r3)	/* Load accept, which exceeds loaded bytes.  */
Packit Service 6de65a
	j	.Lcheck_notonbb /* Check if accept fits in one vreg.  */
Packit Service 6de65a
Packit Service 6de65a
Packit Service 6de65a
	/*
Packit Service 6de65a
	  Search s for accept in one vreg
Packit Service 6de65a
	  -------------------------------
Packit Service 6de65a
	*/
Packit Service 6de65a
.Lfast:
Packit Service 6de65a
	/* Complete accept-string in v17 and remaining bytes are zero.  */
Packit Service 6de65a
Packit Service 6de65a
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit Service 6de65a
	lcbb	%r1,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit Service 6de65a
Packit Service 6de65a
	vfaezfs	%v16,%v16,%v17,8 /* Find first element in v16
Packit Service 6de65a
				    unequal to any in v17
Packit Service 6de65a
				    or first zero element.  */
Packit Service 6de65a
Packit Service 6de65a
	vlgvb	%r0,%v16,7	/* Load byte index of found element.  */
Packit Service 6de65a
	/* If found index is within loaded bytes (%r0 < %r1),
Packit Service 6de65a
	   return with found element index (=equal count).  */
Packit Service 6de65a
	clr	%r0,%r1
Packit Service 6de65a
	srlg	%r0,%r0,2	/* Convert byte-count to character-count.  */
Packit Service 6de65a
	locgrl	%r2,%r0
Packit Service 6de65a
	blr	%r14
Packit Service 6de65a
Packit Service 6de65a
	/* Align s to 16 byte.  */
Packit Service 6de65a
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit Service 6de65a
	lghi	%r1,16		/* current_len = 16.  */
Packit Service 6de65a
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit Service 6de65a
Packit Service 6de65a
.Lfast_loop:
Packit Service 6de65a
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service 6de65a
	vfaezfs	%v16,%v16,%v17,8 /* Find first element in v16
Packit Service 6de65a
				    unequal to any in v17
Packit Service 6de65a
				    or first zero element.  */
Packit Service 6de65a
	jno	.Lfast_loop_found
Packit Service 6de65a
	vl	%v16,16(%r1,%r2)
Packit Service 6de65a
	vfaezfs	%v16,%v16,%v17,8
Packit Service 6de65a
	jno	.Lfast_loop_found16
Packit Service 6de65a
	vl	%v16,32(%r1,%r2)
Packit Service 6de65a
	vfaezfs	%v16,%v16,%v17,8
Packit Service 6de65a
	jno	.Lfast_loop_found32
Packit Service 6de65a
	vl	%v16,48(%r1,%r2)
Packit Service 6de65a
	vfaezfs	%v16,%v16,%v17,8
Packit Service 6de65a
	jno	.Lfast_loop_found48
Packit Service 6de65a
Packit Service 6de65a
	aghi	%r1,64
Packit Service 6de65a
	j	.Lfast_loop	/* Loop if no element was unequal to accept
Packit Service 6de65a
				   and not zero.  */
Packit Service 6de65a
Packit Service 6de65a
	/* Found unequal or zero element.  */
Packit Service 6de65a
.Lfast_loop_found48:
Packit Service 6de65a
	aghi	%r1,16
Packit Service 6de65a
.Lfast_loop_found32:
Packit Service 6de65a
	aghi	%r1,16
Packit Service 6de65a
.Lfast_loop_found16:
Packit Service 6de65a
	aghi	%r1,16
Packit Service 6de65a
.Lfast_loop_found:
Packit Service 6de65a
	vlgvb	%r0,%v16,7	/* Load byte index of found element.  */
Packit Service 6de65a
	algrk	%r2,%r1,%r0	/* And add it to current len.  */
Packit Service 6de65a
	srlg	%r2,%r2,2	/* Convert byte-count to character-count.  */
Packit Service 6de65a
	br	%r14
Packit Service 6de65a
Packit Service 6de65a
Packit Service 6de65a
	/*
Packit Service 6de65a
	  Search s for accept in multiple vregs
Packit Service 6de65a
	  -------------------------------------
Packit Service 6de65a
	*/
Packit Service 6de65a
.Lslow:
Packit Service 6de65a
	/* Save registers.  */
Packit Service 6de65a
	vlvgg	%v30,%r6,0
Packit Service 6de65a
	vlvgp	%v31,%r8,%r9
Packit Service 6de65a
	lghi	%r1,0		/* Zero out current len.  */
Packit Service 6de65a
Packit Service 6de65a
	/* accept in v17 without zero.  */
Packit Service 6de65a
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit Service 6de65a
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit Service 6de65a
Packit Service 6de65a
	/* Align s to 16 byte.  */
Packit Service 6de65a
	risbg	%r0,%r2,60,128+63,0 /* Test if s is aligned and
Packit Service 6de65a
				     %r0 = bits 60-63 'and' 15.  */
Packit Service 6de65a
	je	.Lslow_loop_str /* If s is aligned, loop aligned */
Packit Service 6de65a
	lghi	%r4,15
Packit Service 6de65a
	slr	%r4,%r0		/* Compute highest index to load (15-x).  */
Packit Service 6de65a
	vll	%v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs
Packit Service 6de65a
				   highest index, remaining bytes are 0).  */
Packit Service 6de65a
	aghi	%r4,1		/* Work with loaded byte count.  */
Packit Service 6de65a
	vzero	%v21		/* Zero out global mask.  */
Packit Service 6de65a
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Service 6de65a
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 6de65a
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Service 6de65a
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16
Packit Service 6de65a
				   if there is no zero.  */
Packit Service 6de65a
	clr	%r4,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit Service 6de65a
	locrl	%r6,%r4		/* Load on cc==1.  */
Packit Service 6de65a
	j	.Lslow_loop_acc
Packit Service 6de65a
Packit Service 6de65a
	/* Process s in 16byte aligned loop.  */
Packit Service 6de65a
.Lslow_next_str:
Packit Service 6de65a
	vlr	%v17,%v19	/* Load first part of accept (no zero).  */
Packit Service 6de65a
	algfr	%r1,%r4		/* Add loaded byte count to current len.  */
Packit Service 6de65a
.Lslow_loop_str:
Packit Service 6de65a
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service 6de65a
	lghi	%r4,16		/* Loaded byte count is 16.  */
Packit Service 6de65a
	vzero	%v21		/* Zero out global mask.  */
Packit Service 6de65a
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Service 6de65a
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 6de65a
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Service 6de65a
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Service 6de65a
Packit Service 6de65a
.Lslow_loop_acc:
Packit Service 6de65a
	vfaef	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit Service 6de65a
				    character matches any accepted character in
Packit Service 6de65a
				    this accept-string-part) IN=0, RT=1.  */
Packit Service 6de65a
	vo	%v21,%v21,%v22	/* global-mask = global- | matching-mask.  */
Packit Service 6de65a
	vfenezf	%v18,%v21,%v21	/* Find first zero in global-mask.  */
Packit Service 6de65a
	vlgvb	%r0,%v18,7	/* Get first found zero-index
Packit Service 6de65a
				   (= first mismatch).  */
Packit Service 6de65a
	clrjl	%r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index)
Packit Service 6de65a
					   -> Process this string-part
Packit Service 6de65a
					      with next acc-part.  */
Packit Service 6de65a
	clrjhe	%r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count
Packit Service 6de65a
					   -> All loaded bytes are matching
Packit Service 6de65a
					      any accept-character
Packit Service 6de65a
					      and are not zero.  */
Packit Service 6de65a
	/* All bytes are matching any characters in accept-string
Packit Service 6de65a
	   and search-string is fully processed (found-index == zero-index).  */
Packit Service 6de65a
.Lslow_add_lbc_end:
Packit Service 6de65a
	algrk	%r2,%r1,%r0	/* Add matching characters to current len.  */
Packit Service 6de65a
	srlg	%r2,%r2,2	/* Convert byte-count to character-count.  */
Packit Service 6de65a
	/* Restore registers.  */
Packit Service 6de65a
	vlgvg	%r6,%v30,0
Packit Service 6de65a
	vlgvg	%r8,%v31,0
Packit Service 6de65a
	vlgvg	%r9,%v31,1
Packit Service 6de65a
	br	%r14
Packit Service 6de65a
Packit Service 6de65a
.Lslow_next_acc:
Packit Service 6de65a
	clijh	%r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part
Packit Service 6de65a
					    -> Add found index to current len
Packit Service 6de65a
					       and end.  */
Packit Service 6de65a
	vlbb	%v17,16(%r5,%r3),6 /* Load next accept part.  */
Packit Service 6de65a
	aghi	%r5,16		/* Increment current len of accept-string.  */
Packit Service 6de65a
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string.  */
Packit Service 6de65a
	jo	.Lslow_next_acc_onbb /* Jump away if accept-string is
Packit Service 6de65a
					on block-boundary.  */
Packit Service 6de65a
.Lslow_next_acc_notonbb:
Packit Service 6de65a
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 6de65a
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit Service 6de65a
Packit Service 6de65a
.Lslow_next_acc_prepare_zero:
Packit Service 6de65a
	/* Zero in accept-part: fill zeros with first-accept-character.  */
Packit Service 6de65a
	vlgvf	%r8,%v17,0	/* Load first element of acc-part.  */
Packit Service 6de65a
	clije	%r8,0,.Lslow_add_lbc_end /* End if zero is first character
Packit Service 6de65a
					     in this part of accept-string.  */
Packit Service 6de65a
	/* r8>0 -> zero found in this acc-part.  */
Packit Service 6de65a
	vrepf	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit Service 6de65a
	vceqf	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit Service 6de65a
				   by comparing with 0 (v20).  */
Packit Service 6de65a
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit Service 6de65a
	j	.Lslow_loop_acc /* Accept part is prepared -> process.  */
Packit Service 6de65a
Packit Service 6de65a
.Lslow_next_acc_onbb:
Packit Service 6de65a
	nill	%r9,65532	/* Recognize only fully loaded characters.  */
Packit Service 6de65a
	je	.Lslow_next_acc_onbb2 /* Reload vr, if we loaded no full
Packit Service 6de65a
					  wchar_t.  */
Packit Service 6de65a
	vfenezf	%v18,%v17,%v17	/* Find zero in loaded bytes of accept part.  */
Packit Service 6de65a
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit Service 6de65a
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit Service 6de65a
						   -> Prepare vreg.  */
Packit Service 6de65a
.Lslow_next_acc_onbb2:
Packit Service 6de65a
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit Service 6de65a
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit Service 6de65a
				   check for zero is in jump-target.  */
Packit Service 6de65a
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit Service 6de65a
					   fully loaded vreg again.  */
Packit Service 6de65a
.Lfallback:
Packit Service 6de65a
	jg	__wcsspn_c
Packit Service 6de65a
END(__wcsspn_vx)
Packit Service 6de65a
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */