Blame sysdeps/s390/strstr-arch13.S

Packit Service 8a5b52
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Service 8a5b52
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Service 8a5b52
   This file is part of the GNU C Library.
Packit Service 8a5b52
Packit Service 8a5b52
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 8a5b52
   modify it under the terms of the GNU Lesser General Public
Packit Service 8a5b52
   License as published by the Free Software Foundation; either
Packit Service 8a5b52
   version 2.1 of the License, or (at your option) any later version.
Packit Service 8a5b52
Packit Service 8a5b52
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 8a5b52
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 8a5b52
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 8a5b52
   Lesser General Public License for more details.
Packit Service 8a5b52
Packit Service 8a5b52
   You should have received a copy of the GNU Lesser General Public
Packit Service 8a5b52
   License along with the GNU C Library; if not, see
Packit Service 8a5b52
   <http://www.gnu.org/licenses/>.  */
Packit Service 8a5b52
Packit Service 8a5b52
#include <ifunc-strstr.h>
Packit Service 8a5b52
#if HAVE_STRSTR_ARCH13
Packit Service 8a5b52
# include "sysdep.h"
Packit Service 8a5b52
# include "asm-syntax.h"
Packit Service 8a5b52
	.text
Packit Service 8a5b52
Packit Service 8a5b52
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Service 8a5b52
   Locate a substring.  */
Packit Service 8a5b52
ENTRY(STRSTR_ARCH13)
Packit Service 8a5b52
	.machine "arch13"
Packit Service 8a5b52
	.machinemode "zarch_nohighgprs"
Packit Service 8a5b52
	lcbb	%r1,0(%r3),6
Packit Service 8a5b52
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Service 8a5b52
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Service 8a5b52
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Service 8a5b52
.Lneedle_loaded:
Packit Service 8a5b52
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Service 8a5b52
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Service 8a5b52
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Service 8a5b52
Packit Service 8a5b52
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Service 8a5b52
	   but then we may have to load the next part of haystack with a
Packit Service 8a5b52
	   small offset.  This will be slow - see examples:
Packit Service 8a5b52
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 8a5b52
	   needle   =  mmmmmmmmmmmmmma0
Packit Service 8a5b52
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Service 8a5b52
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 8a5b52
	   needle   =        mmmmmmmma0000000
Packit Service 8a5b52
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Service 8a5b52
# if ! HAVE_STRSTR_Z13
Packit Service 8a5b52
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Service 8a5b52
# endif
Packit Service 8a5b52
	clgfi	%r4,9
Packit Service 8a5b52
	jh	STRSTR_Z13
Packit Service 8a5b52
Packit Service 8a5b52
	/* In case of a partial match, the vstrs instruction returns the index
Packit Service 8a5b52
	   of the partial match in a vector-register.  Then we have to
Packit Service 8a5b52
	   reload the string at the "current-position plus this index" and run
Packit Service 8a5b52
	   vstrs again in order to determine if it was a full match or no match.
Packit Service 8a5b52
	   Transferring this index from vr to gr, compute the haystack-address
Packit Service 8a5b52
	   and loading with vl is quite slow as all instructions have data
Packit Service 8a5b52
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Service 8a5b52
	   first possible index and just load the next part of haystack from
Packit Service 8a5b52
	   there instead of waiting until the correct index is computed:
Packit Service 8a5b52
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Service 8a5b52
	sgr	%r5,%r4
Packit Service 8a5b52
Packit Service 8a5b52
.Lloop:
Packit Service 8a5b52
	lcbb	%r1,0(%r2),6
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Service 8a5b52
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Service 8a5b52
.Lloop_haystack_loaded:
Packit Service 8a5b52
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jne	.Lloop_vstrs_nonzero_cc
Packit Service 8a5b52
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb16
Packit Service 8a5b52
	vl	%v16,16(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jne	.Lloop_vstrs_nonzero_cc16
Packit Service 8a5b52
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb32
Packit Service 8a5b52
	vl	%v16,32(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jne	.Lloop_vstrs_nonzero_cc32
Packit Service 8a5b52
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb48
Packit Service 8a5b52
	vl	%v16,48(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jne	.Lloop_vstrs_nonzero_cc48
Packit Service 8a5b52
	la	%r2,64(%r2)
Packit Service 8a5b52
	j	.Lloop
Packit Service 8a5b52
Packit Service 8a5b52
.Lloop_vstrs_nonzero_cc48:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_vstrs_nonzero_cc32:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_vstrs_nonzero_cc16:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_vstrs_nonzero_cc:
Packit Service 8a5b52
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Service 8a5b52
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Service 8a5b52
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Service 8a5b52
	lcbb	%r1,0(%r5,%r2),6
Packit Service 8a5b52
	la	%r2,0(%r5,%r2)
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb
Packit Service 8a5b52
	vl	%v16,0(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
.Lloop_vstrs_nonzero_cc_loop:
Packit Service 8a5b52
	jh	.Lend_match_found
Packit Service 8a5b52
	jl	.Lend_no_match
Packit Service 8a5b52
	la	%r2,0(%r5,%r2)
Packit Service 8a5b52
	je	.Lloop
Packit Service 8a5b52
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb
Packit Service 8a5b52
	vl	%v16,0(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jh	.Lend_match_found
Packit Service 8a5b52
	jl	.Lend_no_match
Packit Service 8a5b52
	la	%r2,0(%r5,%r2)
Packit Service 8a5b52
	je	.Lloop
Packit Service 8a5b52
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb
Packit Service 8a5b52
	vl	%v16,0(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	jh	.Lend_match_found
Packit Service 8a5b52
	jl	.Lend_no_match
Packit Service 8a5b52
	la	%r2,0(%r5,%r2)
Packit Service 8a5b52
	je	.Lloop
Packit Service 8a5b52
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 8a5b52
	jo	.Lloop_haystack_on_bb
Packit Service 8a5b52
	vl	%v16,0(%r2)
Packit Service 8a5b52
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 8a5b52
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Service 8a5b52
Packit Service 8a5b52
.Lend_no_match:
Packit Service 8a5b52
	lghi	%r2,0
Packit Service 8a5b52
	br	%r14
Packit Service 8a5b52
.Lend_match_found:
Packit Service 8a5b52
	vlgvb	%r4,%v20,7
Packit Service 8a5b52
	la	%r2,0(%r4,%r2)
Packit Service 8a5b52
	br	%r14
Packit Service 8a5b52
Packit Service 8a5b52
.Lloop_haystack_on_bb48:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_haystack_on_bb32:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_haystack_on_bb16:
Packit Service 8a5b52
	la	%r2,16(%r2)
Packit Service 8a5b52
.Lloop_haystack_on_bb:
Packit Service 8a5b52
	/* Haystack located on page-boundary.  */
Packit Service 8a5b52
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 8a5b52
	vll	%v16,%r1,0(%r2)
Packit Service 8a5b52
	vlvgb	%v21,%r1,7
Packit Service 8a5b52
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Service 8a5b52
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Service 8a5b52
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Service 8a5b52
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Service 8a5b52
	j	.Lloop_haystack_loaded
Packit Service 8a5b52
Packit Service 8a5b52
.Lneedle_on_bb:
Packit Service 8a5b52
	/* Needle located on page-boundary.  */
Packit Service 8a5b52
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 8a5b52
	vll	%v18,%r1,0(%r3)
Packit Service 8a5b52
	vlvgb	%v21,%r1,7
Packit Service 8a5b52
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Service 8a5b52
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Service 8a5b52
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Service 96a70d
	vl	%v18,0(%r3)	/* Load needle beyond page boundary.  */
Packit Service 8a5b52
	vfenezb	%v19,%v18,%v18
Packit Service 8a5b52
	j	.Lneedle_loaded
Packit Service 8a5b52
END(STRSTR_ARCH13)
Packit Service 8a5b52
Packit Service 8a5b52
# if ! HAVE_STRSTR_IFUNC
Packit Service 8a5b52
strong_alias (STRSTR_ARCH13, strstr)
Packit Service 8a5b52
# endif
Packit Service 8a5b52
Packit Service 8a5b52
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Service 8a5b52
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Service 8a5b52
# endif
Packit Service 8a5b52
#endif