Blame sysdeps/s390/multiarch/wcspbrk-vx.S

Packit Service 93dc7a
/* Vector optimized 32/64 bit S/390 version of wcspbrk.
Packit df9632
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit df9632
   This file is part of the GNU C Library.
Packit df9632
Packit df9632
   The GNU C Library is free software; you can redistribute it and/or
Packit df9632
   modify it under the terms of the GNU Lesser General Public
Packit df9632
   License as published by the Free Software Foundation; either
Packit df9632
   version 2.1 of the License, or (at your option) any later version.
Packit df9632
Packit df9632
   The GNU C Library is distributed in the hope that it will be useful,
Packit df9632
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit df9632
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit df9632
   Lesser General Public License for more details.
Packit df9632
Packit df9632
   You should have received a copy of the GNU Lesser General Public
Packit df9632
   License along with the GNU C Library; if not, see
Packit df9632
   <http://www.gnu.org/licenses/>.  */
Packit df9632
Packit Service 93dc7a
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
Packit df9632
Packit df9632
# include "sysdep.h"
Packit df9632
# include "asm-syntax.h"
Packit df9632
Packit df9632
	.text
Packit df9632
Packit Service 93dc7a
/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept)
Packit Service 93dc7a
   The  wcspbrk()  function locates the first occurrence in the string s
Packit df9632
   of any of the characters in the string accept and returns a pointer
Packit df9632
   to that character or NULL if not found.
Packit df9632
Packit df9632
   This method checks the length of accept string. If it fits entirely
Packit df9632
   in one vector register, a fast algorithm is used, which does not need
Packit df9632
   to check multiple parts of accept-string. Otherwise a slower full
Packit df9632
   check of accept-string is used.
Packit df9632
Packit df9632
   register overview:
Packit df9632
   r3:  pointer to start of accept-string
Packit df9632
   r2:  pointer to start of search-string
Packit df9632
   r0:  loaded byte count of vlbb search-string (32bit unsigned)
Packit df9632
   r4:  found byte index (32bit unsigned)
Packit df9632
   r1:  current return len (64bit unsigned)
Packit df9632
   v16: search-string
Packit df9632
   v17: accept-string
Packit df9632
   v18: temp-vreg
Packit df9632
Packit df9632
   ONLY FOR SLOW:
Packit df9632
   v19: first accept-string
Packit df9632
   v20: zero for preparing acc-vector
Packit df9632
   v21: global mask; 1 indicates a match between
Packit df9632
	search-string-vreg and any accept-character
Packit df9632
   v22: current mask; 1 indicates a match between
Packit df9632
	search-string-vreg and any accept-character in current acc-vreg
Packit df9632
   v24: one for result-checking of former string-part
Packit df9632
   v30, v31: for re-/storing registers r6, r8, r9
Packit df9632
   r5:  current len of accept-string
Packit df9632
   r6:  zero-index in search-string or 16 if no zero
Packit df9632
	or min(zero-index, loaded byte count)
Packit df9632
   r8:  >0, if former accept-string-part contains a zero,
Packit df9632
	otherwise =0;
Packit df9632
   r9:  loaded byte count of vlbb accept-string
Packit df9632
*/
Packit Service 93dc7a
ENTRY(__wcspbrk_vx)
Packit df9632
	.machine "z13"
Packit df9632
	.machinemode "zarch_nohighgprs"
Packit df9632
Packit Service 93dc7a
	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
Packit Service 93dc7a
	jne	.Lfallback	/* And use common-code variant if not.  */
Packit Service 93dc7a
Packit df9632
	/*
Packit df9632
	  Check if accept-string fits in one vreg:
Packit df9632
	  ----------------------------------------
Packit df9632
	*/
Packit df9632
	vlbb	%v17,0(%r3),6	/* Load accept.  */
Packit df9632
	lcbb	%r0,0(%r3),6
Packit df9632
	jo	.Lcheck_onbb	/* Special case if accept lays
Packit df9632
				   on block-boundary.  */
Packit Service 93dc7a
Packit df9632
.Lcheck_notonbb:
Packit Service 93dc7a
	lghi	%r1,0		/* Zero out current len.  */
Packit Service 93dc7a
	vlgvf	%r0,%v17,0	/* Get first element.  */
Packit Service 93dc7a
	clije	%r0,0,.Lfast_end_null /* Return null if accept is empty.  */
Packit Service 93dc7a
Packit Service 93dc7a
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit df9632
	je	.Lfast		/* Zero found -> accept fits in one vreg.  */
Packit df9632
	j	.Lslow		/* No zero -> accept exceeds one vreg  */
Packit df9632
Packit df9632
Packit df9632
.Lcheck_onbb:
Packit df9632
	/* Accept lays on block-boundary.  */
Packit Service 93dc7a
	nill	%r0,65532	/* Recognize only fully loaded characters.  */
Packit Service 93dc7a
	je	.Lcheck_onbb2	/* Reload vr, if we loaded no full wchar_t.  */
Packit Service 93dc7a
	vfenezf	%v18,%v17,%v17	/* Search zero in loaded accept bytes.  */
Packit df9632
	vlgvb	%r4,%v18,7	/* Get index of zero or 16 if not found.  */
Packit df9632
	clrjl	%r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit Service 93dc7a
					    accept fits in one vreg;
Packit df9632
					    Fill with zeros and proceed
Packit df9632
					    with FAST.  */
Packit Service 93dc7a
.Lcheck_onbb2:
Packit df9632
	vl	%v17,0(%r3)	/* Load accept, which exceeds loaded bytes.  */
Packit df9632
	j	.Lcheck_notonbb /* Check if accept fits in one vreg.  */
Packit df9632
Packit df9632
Packit df9632
	/*
Packit df9632
	  Search s for accept in one vreg
Packit df9632
	  -------------------------------
Packit df9632
	*/
Packit df9632
.Lfast:
Packit df9632
	/* Complete accept-string in v17 and remaining bytes are zero.  */
Packit df9632
Packit df9632
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit df9632
	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit df9632
Packit Service 93dc7a
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16 unequal to any
Packit df9632
				    in v17 or first zero element.  */
Packit df9632
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit df9632
	/* If found index is within loaded bytes, return with found
Packit df9632
	   element index (=equal count).  */
Packit df9632
	clrjl	%r4,%r0,.Lfast_loop_found2
Packit df9632
Packit df9632
	/* Align s to 16 byte.  */
Packit df9632
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit df9632
	lghi	%r1,16		/* current_len = 16.  */
Packit df9632
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit df9632
Packit df9632
.Lfast_loop:
Packit df9632
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service 93dc7a
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16 equal to any
Packit df9632
				    in v17 or first zero element.  */
Packit df9632
	jno	.Lfast_loop_found
Packit df9632
Packit df9632
	vl	%v16,16(%r1,%r2)
Packit Service 93dc7a
	vfaezfs	%v18,%v16,%v17,0
Packit df9632
	jno	.Lfast_loop_found16
Packit df9632
Packit df9632
	vl	%v16,32(%r1,%r2)
Packit Service 93dc7a
	vfaezfs	%v18,%v16,%v17,0
Packit df9632
	jno	.Lfast_loop_found32
Packit df9632
Packit df9632
	vl	%v16,48(%r1,%r2)
Packit Service 93dc7a
	vfaezfs	%v18,%v16,%v17,0
Packit df9632
	jno	.Lfast_loop_found48
Packit df9632
Packit df9632
	aghi	%r1,64
Packit df9632
	j	.Lfast_loop	/* Loop if no element was unequal to accept
Packit df9632
				   and not zero.  */
Packit df9632
Packit df9632
	/* Found equal or zero element.  */
Packit df9632
.Lfast_loop_found48:
Packit df9632
	aghi	%r1,16
Packit df9632
.Lfast_loop_found32:
Packit df9632
	aghi	%r1,16
Packit df9632
.Lfast_loop_found16:
Packit df9632
	aghi	%r1,16
Packit df9632
.Lfast_loop_found:
Packit df9632
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit df9632
.Lfast_loop_found2:
Packit Service 93dc7a
	srlg	%r5,%r4,2	/* Convert byte-index to character-index.  */
Packit Service 93dc7a
	vlgvf	%r0,%v16,0(%r5)	/* Get found element.  */
Packit df9632
	clije	%r0,0,.Lfast_end_null /* Return null if no accept-char found */
Packit df9632
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit df9632
	la	%r2,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit df9632
	br	%r14
Packit df9632
Packit df9632
.Lfast_end_null:
Packit df9632
	lghi	%r2,0		/* Return null if no character is equal.  */
Packit df9632
	br	%r14
Packit df9632
Packit df9632
Packit df9632
Packit df9632
Packit df9632
	/*
Packit df9632
	  Search s for accept in multiple vregs
Packit df9632
	  -------------------------------------
Packit df9632
	*/
Packit df9632
.Lslow:
Packit df9632
	/* Save registers.  */
Packit df9632
	vlvgg	%v30,%r6,0
Packit df9632
	vlvgp	%v31,%r8,%r9
Packit df9632
Packit Service 93dc7a
	/* Accept in v17 without zero */
Packit df9632
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit df9632
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit df9632
	vone	%v24		/* One for checking result of former string.  */
Packit df9632
Packit df9632
	/* Align s to 16 byte.  */
Packit df9632
	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
Packit df9632
				       %r4 = bits 60-63 'and' 15.  */
Packit df9632
	je	.Lslow_loop_str /* If s is aligned, loop aligned.  */
Packit df9632
	lghi	%r0,15
Packit df9632
	slr	%r0,%r4		/* Compute highest index to load (15-x).  */
Packit Service 93dc7a
	vll	%v16,%r0,0(%r2) /* Load up to 16byte boundary;
Packit Service 93dc7a
				   needs highest index, left bytes are 0.  */
Packit df9632
	ahi	%r0,1		/* Work with loaded byte count.  */
Packit df9632
	vzero	%v21		/* Zero out global mask.  */
Packit df9632
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Service 93dc7a
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit df9632
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit df9632
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit df9632
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit df9632
					  (end of string) -> return null */
Packit df9632
	clr	%r0,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit df9632
	locrl	%r6,%r0		/* Load on cc==1; zero-index = lbc.  */
Packit df9632
	j	.Lslow_loop_acc
Packit df9632
Packit df9632
Packit df9632
	/* Process s in 16byte aligned loop.  */
Packit df9632
.Lslow_next_str:
Packit df9632
	/* Check results of former processed str-part.  */
Packit Service 93dc7a
	vfeef	%v18,%v21,%v24	/* Find first equal match in global mask
Packit df9632
				   (ones in element).  */
Packit df9632
	vlgvb	%r4,%v18,7	/* Get index of first one (=equal)
Packit df9632
				   or 16 if no match.  */
Packit df9632
	/* Equal-index < min(zero-index, loaded byte count)
Packit df9632
	   -> return pointer to equal element.  */
Packit df9632
	clrjl	%r4,%r6,.Lslow_index_found
Packit df9632
	/* Zero-index < loaded byte count
Packit df9632
	   -> former str-part was last str-part
Packit df9632
	   -> return null */
Packit df9632
	clrjl	%r6,%r0,.Lslow_end_null
Packit df9632
	/* All elements are zero (=no match) -> proceed with next str-part.  */
Packit df9632
Packit df9632
	vlr	%v17,%v19	/* Load first part of accept (no zero).  */
Packit df9632
	algfr	%r1,%r0		/* Add loaded byte count to current len.  */
Packit df9632
Packit df9632
.Lslow_loop_str:
Packit df9632
	vl	%v16,0(%r1,%r2)	/* Load search-string */
Packit df9632
	lghi	%r0,16		/* Loaded byte count is 16.  */
Packit df9632
	vzero	%v21		/* Zero out global mask.  */
Packit df9632
	lghi	%r5,0		/* Set current len of accept to zero.  */
Packit Service 93dc7a
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit df9632
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit df9632
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit df9632
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit df9632
					  (end of string) -> return null.  */
Packit df9632
Packit df9632
.Lslow_loop_acc:
Packit Service 93dc7a
	vfaef	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit df9632
				    Character matches any accepted character in
Packit df9632
				    this accept-string-part) IN=0, RT=1.  */
Packit Service 93dc7a
	vlgvf	%r4,%v22,0	/* Get result of first element.  */
Packit df9632
	/* First element is equal to any accepted characters
Packit df9632
	   (all other parts of accept cannot lead to a match before this one)
Packit df9632
	   -> current len is pointing to first element
Packit Service 93dc7a
	   -> return found */
Packit df9632
	clijh	%r4,0,.Lslow_end_found
Packit df9632
	vo	%v21,%v21,%v22	/* Global-mask = global-|matching-mask.  */
Packit df9632
	/* Proceed with next acc until end of acc is reached.  */
Packit df9632
Packit df9632
Packit df9632
.Lslow_next_acc:
Packit df9632
	clijh	%r8,0,.Lslow_next_str /* There was a zero in the last acc-part
Packit Service 93dc7a
					  -> add index to current len and
Packit df9632
					     end.  */
Packit df9632
	vlbb	%v17,16(%r5,%r3),6 /* Load next accept part.  */
Packit df9632
	aghi	%r5,16		/* Increment current len of accept-string.  */
Packit df9632
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string.  */
Packit df9632
	jo	.Lslow_next_acc_onbb /* Jump away ifaccept-string is
Packit df9632
					 on block-boundary.  */
Packit df9632
.Lslow_next_acc_notonbb:
Packit Service 93dc7a
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit df9632
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit df9632
Packit df9632
.Lslow_next_acc_prepare_zero:
Packit df9632
	/* Zero in accept-part: fill zeros with first-accept-character.  */
Packit Service 93dc7a
	vlgvf	%r8,%v17,0	/* Load first element of acc-part.  */
Packit df9632
	clije	%r8,0,.Lslow_next_str /* Proceed with next string-part,
Packit Service 93dc7a
					  If first char in this part of accept
Packit df9632
					  is a zero.  */
Packit df9632
	/* r8>0 -> zero found in this acc-part.  */
Packit Service 93dc7a
	vrepf	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit Service 93dc7a
	vceqf	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit df9632
				   by comparing with 0 (v20).  */
Packit df9632
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit df9632
	j	.Lslow_loop_acc /* Accept part is prepared -> process.  */
Packit df9632
Packit df9632
.Lslow_next_acc_onbb:
Packit Service 93dc7a
	nill	%r9,65532	/* Recognize only fully loaded characters.  */
Packit Service 93dc7a
	je	.Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t.  */
Packit Service 93dc7a
	vfenezf	%v18,%v17,%v17	/* Find zero in loaded bytes of accept part.  */
Packit df9632
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit df9632
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit df9632
						    -> Prepare vreg.  */
Packit Service 93dc7a
.Lslow_next_acc_onbb2:
Packit df9632
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit df9632
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit df9632
				   check for zero is in jump-target.  */
Packit df9632
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit df9632
					    fully loaded vreg again.  */
Packit df9632
Packit df9632
.Lslow_end_null:
Packit df9632
	lghi	%r1,0		/* Return null if no character is equal.  */
Packit df9632
	j	.Lslow_end
Packit df9632
Packit df9632
.Lslow_loop_found:
Packit df9632
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit Service 93dc7a
	srlg	%r5,%r4,2	/* Convert byte-index to character-index.  */
Packit Service 93dc7a
	vlgvf	%r0,%v16,0(%r5)	/* Get found element.  */
Packit df9632
	clije	%r0,0,.Lslow_end_null /* Return null if no acc-char found.  */
Packit df9632
Packit df9632
.Lslow_index_found:
Packit df9632
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit df9632
.Lslow_end_found:
Packit df9632
	la	%r1,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit df9632
Packit df9632
.Lslow_end:
Packit df9632
	/* Restore registers.  */
Packit df9632
	vlgvg	%r6,%v30,0
Packit df9632
	vlgvg	%r8,%v31,0
Packit df9632
	vlgvg	%r9,%v31,1
Packit df9632
	lgr	%r2,%r1
Packit df9632
	br	%r14
Packit Service 93dc7a
.Lfallback:
Packit Service 93dc7a
	jg	__wcspbrk_c
Packit Service 93dc7a
END(__wcspbrk_vx)
Packit Service 93dc7a
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */