hjl / source-git / glibc

Forked from source-git/glibc 3 years ago
Clone

Blame sysdeps/s390/strpbrk-vx.S

Packit 6c4009
/* Vector optimized 32/64 bit S/390 version of strpbrk.
Packit 6c4009
   Copyright (C) 2015-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 492e9e
#include <ifunc-strpbrk.h>
Packit 492e9e
Packit 492e9e
#if HAVE_STRPBRK_Z13
Packit 6c4009
Packit 6c4009
# include "sysdep.h"
Packit 6c4009
# include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
Packit 6c4009
/* char *strpbrk (const char *s, const char * accept)
Packit 6c4009
   The  strpbrk()  function locates the first occurrence in the string s
Packit 6c4009
   of any of the characters in the string accept and returns a pointer
Packit 6c4009
   to that character or NULL if not found.
Packit 6c4009
Packit 6c4009
   This method checks the length of accept string. If it fits entirely
Packit 6c4009
   in one vector register, a fast algorithm is used, which does not need
Packit 6c4009
   to check multiple parts of accept-string. Otherwise a slower full
Packit 6c4009
   check of accept-string is used.
Packit 6c4009
Packit 6c4009
   register overview:
Packit 6c4009
   r3:  pointer to start of accept-string
Packit 6c4009
   r2:  pointer to start of search-string
Packit 6c4009
   r0:  loaded byte count of vlbb search-string (32bit unsigned)
Packit 6c4009
   r4:  found byte index (32bit unsigned)
Packit 6c4009
   r1:  current return len (64bit unsigned)
Packit 6c4009
   v16: search-string
Packit 6c4009
   v17: accept-string
Packit 6c4009
   v18: temp-vreg
Packit 6c4009
Packit 6c4009
   ONLY FOR SLOW:
Packit 6c4009
   v19: first accept-string
Packit 6c4009
   v20: zero for preparing acc-vector
Packit 6c4009
   v21: global mask; 1 indicates a match between
Packit 6c4009
	search-string-vreg and any accept-character
Packit 6c4009
   v22: current mask; 1 indicates a match between
Packit 6c4009
	search-string-vreg and any accept-character in current acc-vreg
Packit 6c4009
   v24: one for result-checking of former string-part
Packit 6c4009
   v30, v31: for re-/storing registers r6, r8, r9
Packit 6c4009
   r5:  current len of accept-string
Packit 6c4009
   r6:  zero-index in search-string or 16 if no zero
Packit 6c4009
	or min(zero-index, loaded byte count)
Packit 6c4009
   r8:  >0, if former accept-string-part contains a zero,
Packit 6c4009
	otherwise =0;
Packit 6c4009
   r9:  loaded byte count of vlbb accept-string
Packit 6c4009
*/
Packit 492e9e
ENTRY(STRPBRK_Z13)
Packit 6c4009
	.machine "z13"
Packit 6c4009
	.machinemode "zarch_nohighgprs"
Packit 6c4009
Packit 6c4009
	/*
Packit 6c4009
	  Check if accept-string fits in one vreg:
Packit 6c4009
	  ----------------------------------------
Packit 6c4009
	*/
Packit 6c4009
	vlbb	%v17,0(%r3),6	/* Load accept.  */
Packit 6c4009
	lghi	%r1,0		/* Zero out current len.  */
Packit 6c4009
	vlgvb	%r0,%v17,0	/* Get first element.  */
Packit 6c4009
	clije	%r0,0,.Lfast_end_null /* Return null if accept is empty.  */
Packit 6c4009
	lcbb	%r0,0(%r3),6
Packit 6c4009
	jo	.Lcheck_onbb	/* Special case if accept lays
Packit 6c4009
				   on block-boundary.  */
Packit 6c4009
.Lcheck_notonbb:
Packit 6c4009
	vistrbs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit 6c4009
	je	.Lfast		/* Zero found -> accept fits in one vreg.  */
Packit 6c4009
	j	.Lslow		/* No zero -> accept exceeds one vreg  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
.Lcheck_onbb:
Packit 6c4009
	/* Accept lays on block-boundary.  */
Packit 6c4009
	vfenezb	%v18,%v17,%v17	/* Search zero in loaded accept bytes.  */
Packit 6c4009
	vlgvb	%r4,%v18,7	/* Get index of zero or 16 if not found.  */
Packit 6c4009
	clrjl	%r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count ->
Packit 6c4009
					    Accept fits in one vreg;
Packit 6c4009
					    Fill with zeros and proceed
Packit 6c4009
					    with FAST.  */
Packit 6c4009
	vl	%v17,0(%r3)	/* Load accept, which exceeds loaded bytes.  */
Packit 6c4009
	j	.Lcheck_notonbb /* Check if accept fits in one vreg.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
	/*
Packit 6c4009
	  Search s for accept in one vreg
Packit 6c4009
	  -------------------------------
Packit 6c4009
	*/
Packit 6c4009
.Lfast:
Packit 6c4009
	/* Complete accept-string in v17 and remaining bytes are zero.  */
Packit 6c4009
Packit 6c4009
	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
Packit 6c4009
	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
Packit 6c4009
Packit 6c4009
	vfaezbs	%v18,%v16,%v17,0 /* Find first element in v16 unequal to any
Packit 6c4009
				    in v17 or first zero element.  */
Packit 6c4009
Packit 6c4009
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit 6c4009
	/* If found index is within loaded bytes, return with found
Packit 6c4009
	   element index (=equal count).  */
Packit 6c4009
	clrjl	%r4,%r0,.Lfast_loop_found2
Packit 6c4009
Packit 6c4009
	/* Align s to 16 byte.  */
Packit 6c4009
	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
Packit 6c4009
	lghi	%r1,16		/* current_len = 16.  */
Packit 6c4009
	slr	%r1,%r4		/* Compute bytes to 16bytes boundary.  */
Packit 6c4009
Packit 6c4009
	/* Process s in 16byte aligned loop.  */
Packit 6c4009
.Lfast_loop:
Packit 6c4009
	vl	%v16,0(%r1,%r2)	/* Load search-string.  */
Packit 6c4009
	vfaezbs	%v18,%v16,%v17,0 /* Find first element in v16 equal to any
Packit 6c4009
				    in v17 or first zero element.  */
Packit 6c4009
	jno	.Lfast_loop_found
Packit 6c4009
Packit 6c4009
	vl	%v16,16(%r1,%r2)
Packit 6c4009
	vfaezbs	%v18,%v16,%v17,0
Packit 6c4009
	jno	.Lfast_loop_found16
Packit 6c4009
Packit 6c4009
	vl	%v16,32(%r1,%r2)
Packit 6c4009
	vfaezbs	%v18,%v16,%v17,0
Packit 6c4009
	jno	.Lfast_loop_found32
Packit 6c4009
Packit 6c4009
	vl	%v16,48(%r1,%r2)
Packit 6c4009
	vfaezbs	%v18,%v16,%v17,0
Packit 6c4009
	jno	.Lfast_loop_found48
Packit 6c4009
Packit 6c4009
	aghi	%r1,64
Packit 6c4009
	j	.Lfast_loop	/* Loop if no element was unequal to accept
Packit 6c4009
				   and not zero.  */
Packit 6c4009
Packit 6c4009
	/* Found equal or zero element.  */
Packit 6c4009
.Lfast_loop_found48:
Packit 6c4009
	aghi	%r1,16
Packit 6c4009
.Lfast_loop_found32:
Packit 6c4009
	aghi	%r1,16
Packit 6c4009
.Lfast_loop_found16:
Packit 6c4009
	aghi	%r1,16
Packit 6c4009
.Lfast_loop_found:
Packit 6c4009
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit 6c4009
.Lfast_loop_found2:
Packit 6c4009
	vlgvb	%r0,%v16,0(%r4)	/* Get found element.  */
Packit 6c4009
	clije	%r0,0,.Lfast_end_null /* Return null if no accept-char found */
Packit 6c4009
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit 6c4009
	la	%r2,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit 6c4009
	br	%r14
Packit 6c4009
Packit 6c4009
.Lfast_end_null:
Packit 6c4009
	lghi	%r2,0		/* Return null if no character is equal.  */
Packit 6c4009
	br	%r14
Packit 6c4009
Packit 6c4009
Packit 6c4009
Packit 6c4009
Packit 6c4009
	/*
Packit 6c4009
	  Search s for accept in multiple vregs
Packit 6c4009
	  -------------------------------------
Packit 6c4009
	*/
Packit 6c4009
.Lslow:
Packit 6c4009
	/* Save registers.  */
Packit 6c4009
	vlvgg	%v30,%r6,0
Packit 6c4009
	vlvgp	%v31,%r8,%r9
Packit 6c4009
Packit 6c4009
	/* accept in v17 without zero.  */
Packit 6c4009
	vlr	%v19,%v17	/* Save first acc-part for a fast reload.  */
Packit 6c4009
	vzero	%v20		/* Zero for preparing acc-vector.  */
Packit 6c4009
	vone	%v24		/* One for checking result of former string.  */
Packit 6c4009
Packit 6c4009
	/* Align s to 16 byte.  */
Packit 6c4009
	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
Packit 6c4009
				       %r4 = bits 60-63 'and' 15.  */
Packit 6c4009
	je	.Lslow_loop_str /* If s is aligned, loop aligned.  */
Packit 6c4009
	lghi	%r0,15
Packit 6c4009
	slr	%r0,%r4		/* Compute highest index to load (15-x).  */
Packit 6c4009
	vll	%v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs
Packit 6c4009
				   highest index, remaining bytes are 0).  */
Packit 6c4009
	ahi	%r0,1		/* Work with loaded byte count.  */
Packit 6c4009
	vzero	%v21		/* Zero out global mask.  */
Packit 6c4009
	lghi	%r5,0		/* Set current len of accept-string to zero.  */
Packit 6c4009
	vfenezb	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit 6c4009
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit 6c4009
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit 6c4009
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit 6c4009
					  (end of string) -> return null */
Packit 6c4009
	clr	%r0,%r6		/* cc==1 if loaded byte count < zero-index.  */
Packit 6c4009
	locrl	%r6,%r0		/* Load on cc==1; zero-index = lbc.  */
Packit 6c4009
	j	.Lslow_loop_acc
Packit 6c4009
Packit 6c4009
Packit 6c4009
	/* Process s in 16byte aligned loop.  */
Packit 6c4009
.Lslow_next_str:
Packit 6c4009
	/* Check results of former processed str-part.  */
Packit 6c4009
	vfeeb	%v18,%v21,%v24	/* Find first equal match in global mask
Packit 6c4009
				   (ones in element).  */
Packit 6c4009
	vlgvb	%r4,%v18,7	/* Get index of first one (=equal)
Packit 6c4009
				   or 16 if no match.  */
Packit 6c4009
	/* Equal-index < min(zero-index, loaded byte count)
Packit 6c4009
	   -> return pointer to equal element.  */
Packit 6c4009
	clrjl	%r4,%r6,.Lslow_index_found
Packit 6c4009
	/* Zero-index < loaded byte count
Packit 6c4009
	   -> former str-part was last str-part
Packit 6c4009
	   -> return null */
Packit 6c4009
	clrjl	%r6,%r0,.Lslow_end_null
Packit 6c4009
	/* All elements are zero (=no match) -> proceed with next str-part.  */
Packit 6c4009
Packit 6c4009
	vlr	%v17,%v19	/* Load first part of accept (no zero).  */
Packit 6c4009
	algfr	%r1,%r0		/* Add loaded byte count to current len.  */
Packit 6c4009
Packit 6c4009
.Lslow_loop_str:
Packit 6c4009
	vl	%v16,0(%r1,%r2)	/* Load search-string */
Packit 6c4009
	lghi	%r0,16		/* Loaded byte count is 16.  */
Packit 6c4009
	vzero	%v21		/* Zero out global mask.  */
Packit 6c4009
	lghi	%r5,0		/* Set current len of accept to zero.  */
Packit 6c4009
	vfenezb	%v18,%v16,%v16	/* Find zero in current string-part.  */
Packit 6c4009
	lghi	%r8,0		/* There is no zero in first accept-part.  */
Packit 6c4009
	vlgvb	%r6,%v18,7	/* Load byte index of zero or 16 if no zero.  */
Packit 6c4009
	clije	%r6,0,.Lslow_end_null /* If first element is zero
Packit 6c4009
					  (end of string) -> return null.  */
Packit 6c4009
Packit 6c4009
.Lslow_loop_acc:
Packit 6c4009
	vfaeb	%v22,%v16,%v17,4 /* Create matching-mask (1 in mask ->
Packit 6c4009
				    Character matches any accepted character in
Packit 6c4009
				    this accept-string-part) IN=0, RT=1.  */
Packit 6c4009
	vlgvb	%r4,%v22,0	/* Get result of first element.  */
Packit 6c4009
	/* First element is equal to any accepted characters
Packit 6c4009
	   (all other parts of accept cannot lead to a match before this one)
Packit 6c4009
	   -> current len is pointing to first element
Packit 6c4009
	   -> return found  */
Packit 6c4009
	clijh	%r4,0,.Lslow_end_found
Packit 6c4009
	vo	%v21,%v21,%v22	/* Global-mask = global-|matching-mask.  */
Packit 6c4009
	/* Proceed with next acc until end of acc is reached.  */
Packit 6c4009
Packit 6c4009
Packit 6c4009
.Lslow_next_acc:
Packit 6c4009
	clijh	%r8,0,.Lslow_next_str /* There was a zero in the last acc-part
Packit 6c4009
					  -> add index to current_len and
Packit 6c4009
					     end.  */
Packit 6c4009
	vlbb	%v17,16(%r5,%r3),6 /* Load next accept part.  */
Packit 6c4009
	aghi	%r5,16		/* Increment current len of accept-string.  */
Packit 6c4009
	lcbb	%r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string.  */
Packit 6c4009
	jo	.Lslow_next_acc_onbb /* Jump away ifaccept-string is
Packit 6c4009
					 on block-boundary.  */
Packit 6c4009
.Lslow_next_acc_notonbb:
Packit 6c4009
	vistrbs	%v17,%v17	/* Fill with zeros after first zero.  */
Packit 6c4009
	jo	.Lslow_loop_acc /* No zero found -> no preparation needed.  */
Packit 6c4009
Packit 6c4009
.Lslow_next_acc_prepare_zero:
Packit 6c4009
	/* Zero in accept-part: fill zeros with first-accept-character.  */
Packit 6c4009
	vlgvb	%r8,%v17,0	/* Load first element of acc-part.  */
Packit 6c4009
	clije	%r8,0,.Lslow_next_str /* Proceed with next string-part,
Packit 6c4009
					  if first char in this part of accept
Packit 6c4009
					  is a zero.  */
Packit 6c4009
	/* r8>0 -> zero found in this acc-part.  */
Packit 6c4009
	vrepb	%v18,%v17,0	/* Replicate first char accross all chars.  */
Packit 6c4009
	vceqb	%v22,%v20,%v17	/* Create a mask (v22) of null chars
Packit 6c4009
				   by comparing with 0 (v20).  */
Packit 6c4009
	vsel	%v17,%v18,%v17,%v22 /* Replace null chars with first char.  */
Packit 6c4009
	j	.Lslow_loop_acc /* Accept part is prepared -> process.  */
Packit 6c4009
Packit 6c4009
.Lslow_next_acc_onbb:
Packit 6c4009
	vfenezb	%v18,%v17,%v17	/* Find zero in loaded bytes of accept part.  */
Packit 6c4009
	vlgvb	%r8,%v18,7	/* Load byte index of zero.  */
Packit 6c4009
	clrjl	%r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes
Packit 6c4009
						    -> Prepare vreg.  */
Packit 6c4009
	vl	%v17,0(%r5,%r3)	/* Load over boundary ...  */
Packit 6c4009
	lghi	%r8,0		/* r8=0 -> no zero in this part of acc,
Packit 6c4009
				   check for zero is in jump-target.  */
Packit 6c4009
	j	.Lslow_next_acc_notonbb /* ... and search for zero in
Packit 6c4009
					    fully loaded vreg again.  */
Packit 6c4009
Packit 6c4009
.Lslow_end_null:
Packit 6c4009
	lghi	%r1,0		/* Return null if no character is equal.  */
Packit 6c4009
	j	.Lslow_end
Packit 6c4009
Packit 6c4009
.Lslow_loop_found:
Packit 6c4009
	vlgvb	%r4,%v18,7	/* Load byte index of found element.  */
Packit 6c4009
	vlgvb	%r0,%v16,0(%r4)	/* Get found element.  */
Packit 6c4009
	clije	%r0,0,.Lslow_end_null /* Return null if no acc-char found.  */
Packit 6c4009
Packit 6c4009
.Lslow_index_found:
Packit 6c4009
	algfr	%r1,%r4		/* Add found index of char to current len.  */
Packit 6c4009
.Lslow_end_found:
Packit 6c4009
	la	%r1,0(%r1,%r2)	/* And return pointer to first equal char.  */
Packit 6c4009
Packit 6c4009
.Lslow_end:
Packit 6c4009
	/* Restore registers.  */
Packit 6c4009
	vlgvg	%r6,%v30,0
Packit 6c4009
	vlgvg	%r8,%v31,0
Packit 6c4009
	vlgvg	%r9,%v31,1
Packit 6c4009
	lgr	%r2,%r1
Packit 6c4009
	br	%r14
Packit 492e9e
END(STRPBRK_Z13)
Packit 492e9e
Packit 492e9e
# if ! HAVE_STRPBRK_IFUNC
Packit 492e9e
strong_alias (STRPBRK_Z13, strpbrk)
Packit 492e9e
# endif
Packit 492e9e
Packit 492e9e
# if ! HAVE_STRPBRK_C && defined SHARED && IS_IN (libc)
Packit 492e9e
strong_alias (STRPBRK_Z13, __GI_strpbrk)
Packit 492e9e
# endif
Packit 492e9e
Packit 492e9e
#endif /* HAVE_STRPBRK_Z13  */