Blame sysdeps/s390/strstr-arch13.S

Packit Service 49e686
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Service 49e686
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Service 49e686
   This file is part of the GNU C Library.
Packit Service 49e686
Packit Service 49e686
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 49e686
   modify it under the terms of the GNU Lesser General Public
Packit Service 49e686
   License as published by the Free Software Foundation; either
Packit Service 49e686
   version 2.1 of the License, or (at your option) any later version.
Packit Service 49e686
Packit Service 49e686
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 49e686
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 49e686
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 49e686
   Lesser General Public License for more details.
Packit Service 49e686
Packit Service 49e686
   You should have received a copy of the GNU Lesser General Public
Packit Service 49e686
   License along with the GNU C Library; if not, see
Packit Service 49e686
   <http://www.gnu.org/licenses/>.  */
Packit Service 49e686
Packit Service 49e686
#include <ifunc-strstr.h>
Packit Service 49e686
#if HAVE_STRSTR_ARCH13
Packit Service 49e686
# include "sysdep.h"
Packit Service 49e686
# include "asm-syntax.h"
Packit Service 49e686
	.text
Packit Service 49e686
Packit Service 49e686
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Service 49e686
   Locate a substring.  */
Packit Service 49e686
ENTRY(STRSTR_ARCH13)
Packit Service 49e686
	.machine "arch13"
Packit Service 49e686
	.machinemode "zarch_nohighgprs"
Packit Service 49e686
	lcbb	%r1,0(%r3),6
Packit Service 49e686
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Service 49e686
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Service 49e686
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Service 49e686
.Lneedle_loaded:
Packit Service 49e686
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Service 49e686
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Service 49e686
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Service 49e686
Packit Service 49e686
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Service 49e686
	   but then we may have to load the next part of haystack with a
Packit Service 49e686
	   small offset.  This will be slow - see examples:
Packit Service 49e686
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 49e686
	   needle   =  mmmmmmmmmmmmmma0
Packit Service 49e686
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Service 49e686
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 49e686
	   needle   =        mmmmmmmma0000000
Packit Service 49e686
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Service 49e686
# if ! HAVE_STRSTR_Z13
Packit Service 49e686
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Service 49e686
# endif
Packit Service 49e686
	clgfi	%r4,9
Packit Service 49e686
	jh	STRSTR_Z13
Packit Service 49e686
Packit Service 49e686
	/* In case of a partial match, the vstrs instruction returns the index
Packit Service 49e686
	   of the partial match in a vector-register.  Then we have to
Packit Service 49e686
	   reload the string at the "current-position plus this index" and run
Packit Service 49e686
	   vstrs again in order to determine if it was a full match or no match.
Packit Service 49e686
	   Transferring this index from vr to gr, compute the haystack-address
Packit Service 49e686
	   and loading with vl is quite slow as all instructions have data
Packit Service 49e686
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Service 49e686
	   first possible index and just load the next part of haystack from
Packit Service 49e686
	   there instead of waiting until the correct index is computed:
Packit Service 49e686
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Service 49e686
	sgr	%r5,%r4
Packit Service 49e686
Packit Service 49e686
.Lloop:
Packit Service 49e686
	lcbb	%r1,0(%r2),6
Packit Service 49e686
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Service 49e686
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Service 49e686
.Lloop_haystack_loaded:
Packit Service 49e686
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jne	.Lloop_vstrs_nonzero_cc
Packit Service 49e686
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb16
Packit Service 49e686
	vl	%v16,16(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jne	.Lloop_vstrs_nonzero_cc16
Packit Service 49e686
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb32
Packit Service 49e686
	vl	%v16,32(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jne	.Lloop_vstrs_nonzero_cc32
Packit Service 49e686
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb48
Packit Service 49e686
	vl	%v16,48(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jne	.Lloop_vstrs_nonzero_cc48
Packit Service 49e686
	la	%r2,64(%r2)
Packit Service 49e686
	j	.Lloop
Packit Service 49e686
Packit Service 49e686
.Lloop_vstrs_nonzero_cc48:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_vstrs_nonzero_cc32:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_vstrs_nonzero_cc16:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_vstrs_nonzero_cc:
Packit Service 49e686
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Service 49e686
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Service 49e686
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Service 49e686
	lcbb	%r1,0(%r5,%r2),6
Packit Service 49e686
	la	%r2,0(%r5,%r2)
Packit Service 49e686
	jo	.Lloop_haystack_on_bb
Packit Service 49e686
	vl	%v16,0(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
.Lloop_vstrs_nonzero_cc_loop:
Packit Service 49e686
	jh	.Lend_match_found
Packit Service 49e686
	jl	.Lend_no_match
Packit Service 49e686
	la	%r2,0(%r5,%r2)
Packit Service 49e686
	je	.Lloop
Packit Service 49e686
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb
Packit Service 49e686
	vl	%v16,0(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jh	.Lend_match_found
Packit Service 49e686
	jl	.Lend_no_match
Packit Service 49e686
	la	%r2,0(%r5,%r2)
Packit Service 49e686
	je	.Lloop
Packit Service 49e686
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb
Packit Service 49e686
	vl	%v16,0(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	jh	.Lend_match_found
Packit Service 49e686
	jl	.Lend_no_match
Packit Service 49e686
	la	%r2,0(%r5,%r2)
Packit Service 49e686
	je	.Lloop
Packit Service 49e686
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 49e686
	jo	.Lloop_haystack_on_bb
Packit Service 49e686
	vl	%v16,0(%r2)
Packit Service 49e686
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 49e686
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Service 49e686
Packit Service 49e686
.Lend_no_match:
Packit Service 49e686
	lghi	%r2,0
Packit Service 49e686
	br	%r14
Packit Service 49e686
.Lend_match_found:
Packit Service 49e686
	vlgvb	%r4,%v20,7
Packit Service 49e686
	la	%r2,0(%r4,%r2)
Packit Service 49e686
	br	%r14
Packit Service 49e686
Packit Service 49e686
.Lloop_haystack_on_bb48:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_haystack_on_bb32:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_haystack_on_bb16:
Packit Service 49e686
	la	%r2,16(%r2)
Packit Service 49e686
.Lloop_haystack_on_bb:
Packit Service 49e686
	/* Haystack located on page-boundary.  */
Packit Service 49e686
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 49e686
	vll	%v16,%r1,0(%r2)
Packit Service 49e686
	vlvgb	%v21,%r1,7
Packit Service 49e686
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Service 49e686
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Service 49e686
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Service 49e686
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Service 49e686
	j	.Lloop_haystack_loaded
Packit Service 49e686
Packit Service 49e686
.Lneedle_on_bb:
Packit Service 49e686
	/* Needle located on page-boundary.  */
Packit Service 49e686
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 49e686
	vll	%v18,%r1,0(%r3)
Packit Service 49e686
	vlvgb	%v21,%r1,7
Packit Service 49e686
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Service 49e686
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Service 49e686
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Service 49e686
	vl	%v16,0(%r3)	/* Load needle beyond page boundary.  */
Packit Service 49e686
	vfenezb	%v19,%v18,%v18
Packit Service 49e686
	j	.Lneedle_loaded
Packit Service 49e686
END(STRSTR_ARCH13)
Packit Service 49e686
Packit Service 49e686
# if ! HAVE_STRSTR_IFUNC
Packit Service 49e686
strong_alias (STRSTR_ARCH13, strstr)
Packit Service 49e686
# endif
Packit Service 49e686
Packit Service 49e686
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Service 49e686
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Service 49e686
# endif
Packit Service 49e686
#endif