Blame sysdeps/s390/strstr-arch13.S

Packit Service 40e629
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Service 40e629
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Service 40e629
   This file is part of the GNU C Library.
Packit Service 40e629
Packit Service 40e629
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 40e629
   modify it under the terms of the GNU Lesser General Public
Packit Service 40e629
   License as published by the Free Software Foundation; either
Packit Service 40e629
   version 2.1 of the License, or (at your option) any later version.
Packit Service 40e629
Packit Service 40e629
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 40e629
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 40e629
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 40e629
   Lesser General Public License for more details.
Packit Service 40e629
Packit Service 40e629
   You should have received a copy of the GNU Lesser General Public
Packit Service 40e629
   License along with the GNU C Library; if not, see
Packit Service 40e629
   <http://www.gnu.org/licenses/>.  */
Packit Service 40e629
Packit Service 40e629
#include <ifunc-strstr.h>
Packit Service 40e629
#if HAVE_STRSTR_ARCH13
Packit Service 40e629
# include "sysdep.h"
Packit Service 40e629
# include "asm-syntax.h"
Packit Service 40e629
	.text
Packit Service 40e629
Packit Service 40e629
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Service 40e629
   Locate a substring.  */
Packit Service 40e629
ENTRY(STRSTR_ARCH13)
Packit Service 40e629
	.machine "arch13"
Packit Service 40e629
	.machinemode "zarch_nohighgprs"
Packit Service 40e629
	lcbb	%r1,0(%r3),6
Packit Service 40e629
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Service 40e629
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Service 40e629
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Service 40e629
.Lneedle_loaded:
Packit Service 40e629
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Service 40e629
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Service 40e629
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Service 40e629
Packit Service 40e629
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Service 40e629
	   but then we may have to load the next part of haystack with a
Packit Service 40e629
	   small offset.  This will be slow - see examples:
Packit Service 40e629
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 40e629
	   needle   =  mmmmmmmmmmmmmma0
Packit Service 40e629
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Service 40e629
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 40e629
	   needle   =        mmmmmmmma0000000
Packit Service 40e629
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Service 40e629
# if ! HAVE_STRSTR_Z13
Packit Service 40e629
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Service 40e629
# endif
Packit Service 40e629
	clgfi	%r4,9
Packit Service 40e629
	jh	STRSTR_Z13
Packit Service 40e629
Packit Service 40e629
	/* In case of a partial match, the vstrs instruction returns the index
Packit Service 40e629
	   of the partial match in a vector-register.  Then we have to
Packit Service 40e629
	   reload the string at the "current-position plus this index" and run
Packit Service 40e629
	   vstrs again in order to determine if it was a full match or no match.
Packit Service 40e629
	   Transferring this index from vr to gr, compute the haystack-address
Packit Service 40e629
	   and loading with vl is quite slow as all instructions have data
Packit Service 40e629
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Service 40e629
	   first possible index and just load the next part of haystack from
Packit Service 40e629
	   there instead of waiting until the correct index is computed:
Packit Service 40e629
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Service 40e629
	sgr	%r5,%r4
Packit Service 40e629
Packit Service 40e629
.Lloop:
Packit Service 40e629
	lcbb	%r1,0(%r2),6
Packit Service 40e629
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Service 40e629
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Service 40e629
.Lloop_haystack_loaded:
Packit Service 40e629
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jne	.Lloop_vstrs_nonzero_cc
Packit Service 40e629
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb16
Packit Service 40e629
	vl	%v16,16(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jne	.Lloop_vstrs_nonzero_cc16
Packit Service 40e629
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb32
Packit Service 40e629
	vl	%v16,32(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jne	.Lloop_vstrs_nonzero_cc32
Packit Service 40e629
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb48
Packit Service 40e629
	vl	%v16,48(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jne	.Lloop_vstrs_nonzero_cc48
Packit Service 40e629
	la	%r2,64(%r2)
Packit Service 40e629
	j	.Lloop
Packit Service 40e629
Packit Service 40e629
.Lloop_vstrs_nonzero_cc48:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_vstrs_nonzero_cc32:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_vstrs_nonzero_cc16:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_vstrs_nonzero_cc:
Packit Service 40e629
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Service 40e629
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Service 40e629
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Service 40e629
	lcbb	%r1,0(%r5,%r2),6
Packit Service 40e629
	la	%r2,0(%r5,%r2)
Packit Service 40e629
	jo	.Lloop_haystack_on_bb
Packit Service 40e629
	vl	%v16,0(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
.Lloop_vstrs_nonzero_cc_loop:
Packit Service 40e629
	jh	.Lend_match_found
Packit Service 40e629
	jl	.Lend_no_match
Packit Service 40e629
	la	%r2,0(%r5,%r2)
Packit Service 40e629
	je	.Lloop
Packit Service 40e629
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb
Packit Service 40e629
	vl	%v16,0(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jh	.Lend_match_found
Packit Service 40e629
	jl	.Lend_no_match
Packit Service 40e629
	la	%r2,0(%r5,%r2)
Packit Service 40e629
	je	.Lloop
Packit Service 40e629
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb
Packit Service 40e629
	vl	%v16,0(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	jh	.Lend_match_found
Packit Service 40e629
	jl	.Lend_no_match
Packit Service 40e629
	la	%r2,0(%r5,%r2)
Packit Service 40e629
	je	.Lloop
Packit Service 40e629
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 40e629
	jo	.Lloop_haystack_on_bb
Packit Service 40e629
	vl	%v16,0(%r2)
Packit Service 40e629
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 40e629
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Service 40e629
Packit Service 40e629
.Lend_no_match:
Packit Service 40e629
	lghi	%r2,0
Packit Service 40e629
	br	%r14
Packit Service 40e629
.Lend_match_found:
Packit Service 40e629
	vlgvb	%r4,%v20,7
Packit Service 40e629
	la	%r2,0(%r4,%r2)
Packit Service 40e629
	br	%r14
Packit Service 40e629
Packit Service 40e629
.Lloop_haystack_on_bb48:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_haystack_on_bb32:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_haystack_on_bb16:
Packit Service 40e629
	la	%r2,16(%r2)
Packit Service 40e629
.Lloop_haystack_on_bb:
Packit Service 40e629
	/* Haystack located on page-boundary.  */
Packit Service 40e629
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 40e629
	vll	%v16,%r1,0(%r2)
Packit Service 40e629
	vlvgb	%v21,%r1,7
Packit Service 40e629
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Service 40e629
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Service 40e629
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Service 40e629
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Service 40e629
	j	.Lloop_haystack_loaded
Packit Service 40e629
Packit Service 40e629
.Lneedle_on_bb:
Packit Service 40e629
	/* Needle located on page-boundary.  */
Packit Service 40e629
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 40e629
	vll	%v18,%r1,0(%r3)
Packit Service 40e629
	vlvgb	%v21,%r1,7
Packit Service 40e629
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Service 40e629
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Service 40e629
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Service 5b4b1a
	vl	%v18,0(%r3)	/* Load needle beyond page boundary.  */
Packit Service 40e629
	vfenezb	%v19,%v18,%v18
Packit Service 40e629
	j	.Lneedle_loaded
Packit Service 40e629
END(STRSTR_ARCH13)
Packit Service 40e629
Packit Service 40e629
# if ! HAVE_STRSTR_IFUNC
Packit Service 40e629
strong_alias (STRSTR_ARCH13, strstr)
Packit Service 40e629
# endif
Packit Service 40e629
Packit Service 40e629
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Service 40e629
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Service 40e629
# endif
Packit Service 40e629
#endif