Blame sysdeps/s390/wcspbrk-vx.S

Packit Service dd846c
/* Vector optimized 32/64 bit S/390 version of wcspbrk.
Packit Service 82fcde
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit Service 82fcde
   This file is part of the GNU C Library.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 82fcde
   modify it under the terms of the GNU Lesser General Public
Packit Service 82fcde
   License as published by the Free Software Foundation; either
Packit Service 82fcde
   version 2.1 of the License, or (at your option) any later version.
Packit Service 82fcde
Packit Service 82fcde
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 82fcde
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 82fcde
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 82fcde
   Lesser General Public License for more details.
Packit Service 82fcde
Packit Service 82fcde
   You should have received a copy of the GNU Lesser General Public
Packit Service 82fcde
   License along with the GNU C Library; if not, see
Packit Service 82fcde
   <http://www.gnu.org/licenses/>.  */
Packit Service 82fcde
Packit Service 553098
#include <ifunc-wcspbrk.h>
Packit Service 553098
#if HAVE_WCSPBRK_Z13
Packit Service 82fcde
Packit Service 82fcde
# include "sysdep.h"
Packit Service 82fcde
# include "asm-syntax.h"
Packit Service 82fcde
Packit Service 82fcde
	.text
Packit Service 82fcde
Packit Service dd846c
/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept)
Packit Service dd846c
   The  wcspbrk()  function locates the first occurrence in the string s
Packit Service 82fcde
   of any of the characters in the string accept and returns a pointer
Packit Service 82fcde
   to that character or NULL if not found.
Packit Service 82fcde
Packit Service 82fcde
   This method checks the length of accept string. If it fits entirely
Packit Service 82fcde
   in one vector register, a fast algorithm is used, which does not need
Packit Service 82fcde
   to check multiple parts of accept-string. Otherwise a slower full
Packit Service 82fcde
   check of accept-string is used.
Packit Service 82fcde
Packit Service 82fcde
   register overview:
Packit Service 82fcde
   r3:  pointer to start of accept-string
Packit Service 82fcde
   r2:  pointer to start of search-string
Packit Service 82fcde
   r0:  loaded byte count of vlbb search-string (32bit unsigned)
Packit Service 82fcde
   r4:  found byte index (32bit unsigned)
Packit Service 82fcde
   r1:  current return len (64bit unsigned)
Packit Service 82fcde
   v16: search-string
Packit Service 82fcde
   v17: accept-string
Packit Service 82fcde
   v18: temp-vreg
Packit Service 82fcde
Packit Service 82fcde
   ONLY FOR SLOW:
Packit Service 82fcde
   v19: first accept-string
Packit Service 82fcde
   v20: zero for preparing acc-vector
Packit Service 82fcde
   v21: global mask; 1 indicates a match between
Packit Service 82fcde
	search-string-vreg and any accept-character
Packit Service 82fcde
   v22: current mask; 1 indicates a match between
Packit Service 82fcde
	search-string-vreg and any accept-character in current acc-vreg
Packit Service 82fcde
   v24: one for result-checking of former string-part
Packit Service 82fcde
   v30, v31: for re-/storing registers r6, r8, r9
Packit Service 82fcde
   r5:  current len of accept-string
Packit Service 82fcde
   r6:  zero-index in search-string or 16 if no zero
Packit Service 82fcde
	or min(zero-index, loaded byte count)
Packit Service 82fcde
   r8:  >0, if former accept-string-part contains a zero,
Packit Service 82fcde
	otherwise =0;
Packit Service 82fcde
   r9:  loaded byte count of vlbb accept-string
Packit Service 82fcde
*/
Packit Service 553098
ENTRY(WCSPBRK_Z13)
Packit Service 82fcde
	.machine "z13"
Packit Service 82fcde
	.machinemode "zarch_nohighgprs"
Packit Service 82fcde
Packit Service dd846c
	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
Packit Service dd846c
	jne	.Lfallback	/* And use common-code variant if not.  */
Packit Service dd846c
Packit Service 82fcde
	/*
Packit Service 82fcde
	  Check if accept-string fits in one vreg:
Packit Service 82fcde
	  ----------------------------------------
Packit Service 82fcde
	*/
Packit Service 82fcde
	vlbb	%v17,0(%r3),6	/* Load accept.  */
Packit Service 82fcde
	lcbb	%r0,0(%r3),6
Packit Service 82fcde
	jo	.Lcheck_onbb	/* Special case if accept lays
Packit Service 82fcde
				   on block-boundary.  */
Packit Service dd846c
Packit Service 82fcde
.Lcheck_notonbb:
Packit Service dd846c
	lghi	%r1,0		/* Zero out current len.  */
Packit Service dd846c
	vlgvf	%r0,%v17,0	/* Get first element.  */
Packit Service dd846c
	clije	%r0,0,.Lfast_end_null /* Return null if accept is empty.  */
Packit Service dd846c
Packit Service dd846c
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 82fcde
	je	.Lfast		/* Zero found -> accept fits in one vreg.  */
Packit Service 82fcde
	j	.Lslow		/* No zero -> accept exceeds one vreg  */
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
.Lcheck_onbb:
Packit Service 82fcde
	/* Accept lays on block-boundary.  */
Packit Service dd846c
	nill	%r0,65532	/* Recognize only fully loaded characters.  */
Packit Service dd846c
	je	.Lcheck_onbb2	/* Reload vr, if we loaded no full wchar_t.  */
Packit Service dd846c
	vfenezf	%v18,%v17,%v17	/* Search zero in loaded accept bytes.  */
Packit Service 82fcde
	vlgvb	%r4,%v18,7	/* Get index of zero or 16 if not found.  */
Packit Service 82fcde
	clrjl	%r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit Service dd846c
					    accept fits in one vreg;
Packit Service 82fcde
					    Fill with zeros and proceed
Packit Service 82fcde
					    with FAST.  */
Packit Service dd846c
.Lcheck_onbb2:
Packit Service 82fcde
	vl	%v17,0(%r3)	/* Load accept, which exceeds loaded bytes.  */
Packit Service 82fcde
	j	.Lcheck_notonbb /* Check if accept fits in one vreg.  */
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
	/*
Packit Service 82fcde
	  Search s for accept in one vreg
Packit Service 82fcde
	  -------------------------------
Packit Service 82fcde
	*/
Packit Service 82fcde
.Lfast:
Packit Service 82fcde
	/* Complete accept-string in v17 and remaining bytes are zero.  */
Packit Service 82fcde
Packit Service 82fcde
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit Service 82fcde
	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit Service 82fcde
Packit Service dd846c
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16 unequal to any
Packit Service 82fcde
				    in v17 or first zero element.  */
Packit Service 82fcde
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit Service 82fcde
	/* If found index is within loaded bytes, return with found
Packit Service 82fcde
	   element index (=equal count).  */
Packit Service 82fcde
	clrjl	%r4,%r0,.Lfast_loop_found2
Packit Service 82fcde
Packit Service 82fcde
	/* Align s to 16 byte.  */
Packit Service 82fcde
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit Service 82fcde
	lghi	%r1,16		/* current_len = 16.  */
Packit Service 82fcde
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit Service 82fcde
Packit Service 82fcde
.Lfast_loop:
Packit Service 82fcde
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit Service dd846c
	vfaezfs	%v18,%v16,%v17,0 /* Find first element in v16 equal to any
Packit Service 82fcde
				    in v17 or first zero element.  */
Packit Service 82fcde
	jno	.Lfast_loop_found
Packit Service 82fcde
Packit Service 82fcde
	vl	%v16,16(%r1,%r2)
Packit Service dd846c
	vfaezfs	%v18,%v16,%v17,0
Packit Service 82fcde
	jno	.Lfast_loop_found16
Packit Service 82fcde
Packit Service 82fcde
	vl	%v16,32(%r1,%r2)
Packit Service dd846c
	vfaezfs	%v18,%v16,%v17,0
Packit Service 82fcde
	jno	.Lfast_loop_found32
Packit Service 82fcde
Packit Service 82fcde
	vl	%v16,48(%r1,%r2)
Packit Service dd846c
	vfaezfs	%v18,%v16,%v17,0
Packit Service 82fcde
	jno	.Lfast_loop_found48
Packit Service 82fcde
Packit Service 82fcde
	aghi	%r1,64
Packit Service 82fcde
	j	.Lfast_loop	/* Loop if no element was unequal to accept
Packit Service 82fcde
				   and not zero.  */
Packit Service 82fcde
Packit Service 82fcde
	/* Found equal or zero element.  */
Packit Service 82fcde
.Lfast_loop_found48:
Packit Service 82fcde
	aghi	%r1,16
Packit Service 82fcde
.Lfast_loop_found32:
Packit Service 82fcde
	aghi	%r1,16
Packit Service 82fcde
.Lfast_loop_found16:
Packit Service 82fcde
	aghi	%r1,16
Packit Service 82fcde
.Lfast_loop_found:
Packit Service 82fcde
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit Service 82fcde
.Lfast_loop_found2:
Packit Service dd846c
	srlg	%r5,%r4,2	/* Convert byte-index to character-index.  */
Packit Service dd846c
	vlgvf	%r0,%v16,0(%r5)	/* Get found element.  */
Packit Service 82fcde
	clije	%r0,0,.Lfast_end_null /* Return null if no accept-char found */
Packit Service 82fcde
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit Service 82fcde
	la	%r2,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit Service 82fcde
	br	%r14
Packit Service 82fcde
Packit Service 82fcde
.Lfast_end_null:
Packit Service 82fcde
	lghi	%r2,0		/* Return null if no character is equal.  */
Packit Service 82fcde
	br	%r14
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
	/*
Packit Service 82fcde
	  Search s for accept in multiple vregs
Packit Service 82fcde
	  -------------------------------------
Packit Service 82fcde
	*/
Packit Service 82fcde
.Lslow:
Packit Service 82fcde
	/* Save registers.  */
Packit Service 82fcde
	vlvgg	%v30,%r6,0
Packit Service 82fcde
	vlvgp	%v31,%r8,%r9
Packit Service 82fcde
Packit Service dd846c
	/* Accept in v17 without zero */
Packit Service 82fcde
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit Service 82fcde
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit Service 82fcde
	vone	%v24		/* One for checking result of former string.  */
Packit Service 82fcde
Packit Service 82fcde
	/* Align s to 16 byte.  */
Packit Service 82fcde
	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
Packit Service 82fcde
				       %r4 = bits 60-63 'and' 15.  */
Packit Service 82fcde
	je	.Lslow_loop_str /* If s is aligned, loop aligned.  */
Packit Service 82fcde
	lghi	%r0,15
Packit Service 82fcde
	slr	%r0,%r4		/* Compute highest index to load (15-x).  */
Packit Service dd846c
	vll	%v16,%r0,0(%r2) /* Load up to 16byte boundary;
Packit Service dd846c
				   needs highest index, left bytes are 0.  */
Packit Service 82fcde
	ahi	%r0,1		/* Work with loaded byte count.  */
Packit Service 82fcde
	vzero	%v21		/* Zero out global mask.  */
Packit Service 82fcde
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit Service dd846c
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 82fcde
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Service 82fcde
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Service 82fcde
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit Service 82fcde
					  (end of string) -> return null */
Packit Service 82fcde
	clr	%r0,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit Service 82fcde
	locrl	%r6,%r0		/* Load on cc==1; zero-index = lbc.  */
Packit Service 82fcde
	j	.Lslow_loop_acc
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
	/* Process s in 16byte aligned loop.  */
Packit Service 82fcde
.Lslow_next_str:
Packit Service 82fcde
	/* Check results of former processed str-part.  */
Packit Service dd846c
	vfeef	%v18,%v21,%v24	/* Find first equal match in global mask
Packit Service 82fcde
				   (ones in element).  */
Packit Service 82fcde
	vlgvb	%r4,%v18,7	/* Get index of first one (=equal)
Packit Service 82fcde
				   or 16 if no match.  */
Packit Service 82fcde
	/* Equal-index < min(zero-index, loaded byte count)
Packit Service 82fcde
	   -> return pointer to equal element.  */
Packit Service 82fcde
	clrjl	%r4,%r6,.Lslow_index_found
Packit Service 82fcde
	/* Zero-index < loaded byte count
Packit Service 82fcde
	   -> former str-part was last str-part
Packit Service 82fcde
	   -> return null */
Packit Service 82fcde
	clrjl	%r6,%r0,.Lslow_end_null
Packit Service 82fcde
	/* All elements are zero (=no match) -> proceed with next str-part.  */
Packit Service 82fcde
Packit Service 82fcde
	vlr	%v17,%v19	/* Load first part of accept (no zero).  */
Packit Service 82fcde
	algfr	%r1,%r0		/* Add loaded byte count to current len.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_loop_str:
Packit Service 82fcde
	vl	%v16,0(%r1,%r2)	/* Load search-string */
Packit Service 82fcde
	lghi	%r0,16		/* Loaded byte count is 16.  */
Packit Service 82fcde
	vzero	%v21		/* Zero out global mask.  */
Packit Service 82fcde
	lghi	%r5,0		/* Set current len of accept to zero.  */
Packit Service dd846c
	vfenezf	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit Service 82fcde
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit Service 82fcde
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit Service 82fcde
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit Service 82fcde
					  (end of string) -> return null.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_loop_acc:
Packit Service dd846c
	vfaef	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit Service 82fcde
				    Character matches any accepted character in
Packit Service 82fcde
				    this accept-string-part) IN=0, RT=1.  */
Packit Service dd846c
	vlgvf	%r4,%v22,0	/* Get result of first element.  */
Packit Service 82fcde
	/* First element is equal to any accepted characters
Packit Service 82fcde
	   (all other parts of accept cannot lead to a match before this one)
Packit Service 82fcde
	   -> current len is pointing to first element
Packit Service dd846c
	   -> return found */
Packit Service 82fcde
	clijh	%r4,0,.Lslow_end_found
Packit Service 82fcde
	vo	%v21,%v21,%v22	/* Global-mask = global-|matching-mask.  */
Packit Service 82fcde
	/* Proceed with next acc until end of acc is reached.  */
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
.Lslow_next_acc:
Packit Service 82fcde
	clijh	%r8,0,.Lslow_next_str /* There was a zero in the last acc-part
Packit Service dd846c
					  -> add index to current len and
Packit Service 82fcde
					     end.  */
Packit Service 82fcde
	vlbb	%v17,16(%r5,%r3),6 /* Load next accept part.  */
Packit Service 82fcde
	aghi	%r5,16		/* Increment current len of accept-string.  */
Packit Service 82fcde
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string.  */
Packit Service 82fcde
	jo	.Lslow_next_acc_onbb /* Jump away ifaccept-string is
Packit Service 82fcde
					 on block-boundary.  */
Packit Service 82fcde
.Lslow_next_acc_notonbb:
Packit Service dd846c
	vistrfs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit Service 82fcde
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_next_acc_prepare_zero:
Packit Service 82fcde
	/* Zero in accept-part: fill zeros with first-accept-character.  */
Packit Service dd846c
	vlgvf	%r8,%v17,0	/* Load first element of acc-part.  */
Packit Service 82fcde
	clije	%r8,0,.Lslow_next_str /* Proceed with next string-part,
Packit Service dd846c
					  If first char in this part of accept
Packit Service 82fcde
					  is a zero.  */
Packit Service 82fcde
	/* r8>0 -> zero found in this acc-part.  */
Packit Service dd846c
	vrepf	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit Service dd846c
	vceqf	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit Service 82fcde
				   by comparing with 0 (v20).  */
Packit Service 82fcde
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit Service 82fcde
	j	.Lslow_loop_acc /* Accept part is prepared -> process.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_next_acc_onbb:
Packit Service dd846c
	nill	%r9,65532	/* Recognize only fully loaded characters.  */
Packit Service dd846c
	je	.Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t.  */
Packit Service dd846c
	vfenezf	%v18,%v17,%v17	/* Find zero in loaded bytes of accept part.  */
Packit Service 82fcde
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit Service 82fcde
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit Service 82fcde
						    -> Prepare vreg.  */
Packit Service dd846c
.Lslow_next_acc_onbb2:
Packit Service 82fcde
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit Service 82fcde
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit Service 82fcde
				   check for zero is in jump-target.  */
Packit Service 82fcde
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit Service 82fcde
					    fully loaded vreg again.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_end_null:
Packit Service 82fcde
	lghi	%r1,0		/* Return null if no character is equal.  */
Packit Service 82fcde
	j	.Lslow_end
Packit Service 82fcde
Packit Service 82fcde
.Lslow_loop_found:
Packit Service 82fcde
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit Service dd846c
	srlg	%r5,%r4,2	/* Convert byte-index to character-index.  */
Packit Service dd846c
	vlgvf	%r0,%v16,0(%r5)	/* Get found element.  */
Packit Service 82fcde
	clije	%r0,0,.Lslow_end_null /* Return null if no acc-char found.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_index_found:
Packit Service 82fcde
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit Service 82fcde
.Lslow_end_found:
Packit Service 82fcde
	la	%r1,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit Service 82fcde
Packit Service 82fcde
.Lslow_end:
Packit Service 82fcde
	/* Restore registers.  */
Packit Service 82fcde
	vlgvg	%r6,%v30,0
Packit Service 82fcde
	vlgvg	%r8,%v31,0
Packit Service 82fcde
	vlgvg	%r9,%v31,1
Packit Service 82fcde
	lgr	%r2,%r1
Packit Service 82fcde
	br	%r14
Packit Service dd846c
.Lfallback:
Packit Service 553098
	jg	WCSPBRK_C
Packit Service 553098
END(WCSPBRK_Z13)
Packit Service 553098
Packit Service 553098
# if ! HAVE_WCSPBRK_IFUNC
Packit Service 553098
strong_alias (WCSPBRK_Z13, wcspbrk)
Packit Service 553098
# endif
Packit Service 553098
Packit Service 553098
# if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT \
Packit Service 553098
	&& defined SHARED && IS_IN (libc)
Packit Service 553098
strong_alias (WCSPBRK_Z13, __GI_wcspbrk)
Packit Service 553098
# endif
Packit Service 553098
#endif