Blame sysdeps/s390/strstr-arch13.S

Packit Service 691421
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Service 691421
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Service 691421
   This file is part of the GNU C Library.
Packit Service 691421
Packit Service 691421
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 691421
   modify it under the terms of the GNU Lesser General Public
Packit Service 691421
   License as published by the Free Software Foundation; either
Packit Service 691421
   version 2.1 of the License, or (at your option) any later version.
Packit Service 691421
Packit Service 691421
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 691421
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 691421
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 691421
   Lesser General Public License for more details.
Packit Service 691421
Packit Service 691421
   You should have received a copy of the GNU Lesser General Public
Packit Service 691421
   License along with the GNU C Library; if not, see
Packit Service 691421
   <http://www.gnu.org/licenses/>.  */
Packit Service 691421
Packit Service 691421
#include <ifunc-strstr.h>
Packit Service 691421
#if HAVE_STRSTR_ARCH13
Packit Service 691421
# include "sysdep.h"
Packit Service 691421
# include "asm-syntax.h"
Packit Service 691421
	.text
Packit Service 691421
Packit Service 691421
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Service 691421
   Locate a substring.  */
Packit Service 691421
ENTRY(STRSTR_ARCH13)
Packit Service 691421
	.machine "arch13"
Packit Service 691421
	.machinemode "zarch_nohighgprs"
Packit Service 691421
	lcbb	%r1,0(%r3),6
Packit Service 691421
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Service 691421
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Service 691421
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Service 691421
.Lneedle_loaded:
Packit Service 691421
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Service 691421
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Service 691421
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Service 691421
Packit Service 691421
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Service 691421
	   but then we may have to load the next part of haystack with a
Packit Service 691421
	   small offset.  This will be slow - see examples:
Packit Service 691421
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 691421
	   needle   =  mmmmmmmmmmmmmma0
Packit Service 691421
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Service 691421
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 691421
	   needle   =        mmmmmmmma0000000
Packit Service 691421
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Service 691421
# if ! HAVE_STRSTR_Z13
Packit Service 691421
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Service 691421
# endif
Packit Service 691421
	clgfi	%r4,9
Packit Service 691421
	jh	STRSTR_Z13
Packit Service 691421
Packit Service 691421
	/* In case of a partial match, the vstrs instruction returns the index
Packit Service 691421
	   of the partial match in a vector-register.  Then we have to
Packit Service 691421
	   reload the string at the "current-position plus this index" and run
Packit Service 691421
	   vstrs again in order to determine if it was a full match or no match.
Packit Service 691421
	   Transferring this index from vr to gr, compute the haystack-address
Packit Service 691421
	   and loading with vl is quite slow as all instructions have data
Packit Service 691421
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Service 691421
	   first possible index and just load the next part of haystack from
Packit Service 691421
	   there instead of waiting until the correct index is computed:
Packit Service 691421
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Service 691421
	sgr	%r5,%r4
Packit Service 691421
Packit Service 691421
.Lloop:
Packit Service 691421
	lcbb	%r1,0(%r2),6
Packit Service 691421
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Service 691421
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Service 691421
.Lloop_haystack_loaded:
Packit Service 691421
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jne	.Lloop_vstrs_nonzero_cc
Packit Service 691421
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb16
Packit Service 691421
	vl	%v16,16(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jne	.Lloop_vstrs_nonzero_cc16
Packit Service 691421
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb32
Packit Service 691421
	vl	%v16,32(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jne	.Lloop_vstrs_nonzero_cc32
Packit Service 691421
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb48
Packit Service 691421
	vl	%v16,48(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jne	.Lloop_vstrs_nonzero_cc48
Packit Service 691421
	la	%r2,64(%r2)
Packit Service 691421
	j	.Lloop
Packit Service 691421
Packit Service 691421
.Lloop_vstrs_nonzero_cc48:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_vstrs_nonzero_cc32:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_vstrs_nonzero_cc16:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_vstrs_nonzero_cc:
Packit Service 691421
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Service 691421
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Service 691421
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Service 691421
	lcbb	%r1,0(%r5,%r2),6
Packit Service 691421
	la	%r2,0(%r5,%r2)
Packit Service 691421
	jo	.Lloop_haystack_on_bb
Packit Service 691421
	vl	%v16,0(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
.Lloop_vstrs_nonzero_cc_loop:
Packit Service 691421
	jh	.Lend_match_found
Packit Service 691421
	jl	.Lend_no_match
Packit Service 691421
	la	%r2,0(%r5,%r2)
Packit Service 691421
	je	.Lloop
Packit Service 691421
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb
Packit Service 691421
	vl	%v16,0(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jh	.Lend_match_found
Packit Service 691421
	jl	.Lend_no_match
Packit Service 691421
	la	%r2,0(%r5,%r2)
Packit Service 691421
	je	.Lloop
Packit Service 691421
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb
Packit Service 691421
	vl	%v16,0(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	jh	.Lend_match_found
Packit Service 691421
	jl	.Lend_no_match
Packit Service 691421
	la	%r2,0(%r5,%r2)
Packit Service 691421
	je	.Lloop
Packit Service 691421
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 691421
	jo	.Lloop_haystack_on_bb
Packit Service 691421
	vl	%v16,0(%r2)
Packit Service 691421
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 691421
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Service 691421
Packit Service 691421
.Lend_no_match:
Packit Service 691421
	lghi	%r2,0
Packit Service 691421
	br	%r14
Packit Service 691421
.Lend_match_found:
Packit Service 691421
	vlgvb	%r4,%v20,7
Packit Service 691421
	la	%r2,0(%r4,%r2)
Packit Service 691421
	br	%r14
Packit Service 691421
Packit Service 691421
.Lloop_haystack_on_bb48:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_haystack_on_bb32:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_haystack_on_bb16:
Packit Service 691421
	la	%r2,16(%r2)
Packit Service 691421
.Lloop_haystack_on_bb:
Packit Service 691421
	/* Haystack located on page-boundary.  */
Packit Service 691421
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 691421
	vll	%v16,%r1,0(%r2)
Packit Service 691421
	vlvgb	%v21,%r1,7
Packit Service 691421
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Service 691421
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Service 691421
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Service 691421
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Service 691421
	j	.Lloop_haystack_loaded
Packit Service 691421
Packit Service 691421
.Lneedle_on_bb:
Packit Service 691421
	/* Needle located on page-boundary.  */
Packit Service 691421
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 691421
	vll	%v18,%r1,0(%r3)
Packit Service 691421
	vlvgb	%v21,%r1,7
Packit Service 691421
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Service 691421
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Service 691421
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Service ba23c6
	vl	%v18,0(%r3)	/* Load needle beyond page boundary.  */
Packit Service 691421
	vfenezb	%v19,%v18,%v18
Packit Service 691421
	j	.Lneedle_loaded
Packit Service 691421
END(STRSTR_ARCH13)
Packit Service 691421
Packit Service 691421
# if ! HAVE_STRSTR_IFUNC
Packit Service 691421
strong_alias (STRSTR_ARCH13, strstr)
Packit Service 691421
# endif
Packit Service 691421
Packit Service 691421
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Service 691421
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Service 691421
# endif
Packit Service 691421
#endif