Blame sysdeps/s390/strstr-arch13.S

Packit Service 357bf9
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Service 357bf9
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Service 357bf9
   This file is part of the GNU C Library.
Packit Service 357bf9
Packit Service 357bf9
   The GNU C Library is free software; you can redistribute it and/or
Packit Service 357bf9
   modify it under the terms of the GNU Lesser General Public
Packit Service 357bf9
   License as published by the Free Software Foundation; either
Packit Service 357bf9
   version 2.1 of the License, or (at your option) any later version.
Packit Service 357bf9
Packit Service 357bf9
   The GNU C Library is distributed in the hope that it will be useful,
Packit Service 357bf9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 357bf9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Service 357bf9
   Lesser General Public License for more details.
Packit Service 357bf9
Packit Service 357bf9
   You should have received a copy of the GNU Lesser General Public
Packit Service 357bf9
   License along with the GNU C Library; if not, see
Packit Service 357bf9
   <http://www.gnu.org/licenses/>.  */
Packit Service 357bf9
Packit Service 357bf9
#include <ifunc-strstr.h>
Packit Service 357bf9
#if HAVE_STRSTR_ARCH13
Packit Service 357bf9
# include "sysdep.h"
Packit Service 357bf9
# include "asm-syntax.h"
Packit Service 357bf9
	.text
Packit Service 357bf9
Packit Service 357bf9
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Service 357bf9
   Locate a substring.  */
Packit Service 357bf9
ENTRY(STRSTR_ARCH13)
Packit Service 357bf9
	.machine "arch13"
Packit Service 357bf9
	.machinemode "zarch_nohighgprs"
Packit Service 357bf9
	lcbb	%r1,0(%r3),6
Packit Service 357bf9
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Service 357bf9
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Service 357bf9
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Service 357bf9
.Lneedle_loaded:
Packit Service 357bf9
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Service 357bf9
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Service 357bf9
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Service 357bf9
Packit Service 357bf9
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Service 357bf9
	   but then we may have to load the next part of haystack with a
Packit Service 357bf9
	   small offset.  This will be slow - see examples:
Packit Service 357bf9
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 357bf9
	   needle   =  mmmmmmmmmmmmmma0
Packit Service 357bf9
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Service 357bf9
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Service 357bf9
	   needle   =        mmmmmmmma0000000
Packit Service 357bf9
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Service 357bf9
# if ! HAVE_STRSTR_Z13
Packit Service 357bf9
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Service 357bf9
# endif
Packit Service 357bf9
	clgfi	%r4,9
Packit Service 357bf9
	jh	STRSTR_Z13
Packit Service 357bf9
Packit Service 357bf9
	/* In case of a partial match, the vstrs instruction returns the index
Packit Service 357bf9
	   of the partial match in a vector-register.  Then we have to
Packit Service 357bf9
	   reload the string at the "current-position plus this index" and run
Packit Service 357bf9
	   vstrs again in order to determine if it was a full match or no match.
Packit Service 357bf9
	   Transferring this index from vr to gr, compute the haystack-address
Packit Service 357bf9
	   and loading with vl is quite slow as all instructions have data
Packit Service 357bf9
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Service 357bf9
	   first possible index and just load the next part of haystack from
Packit Service 357bf9
	   there instead of waiting until the correct index is computed:
Packit Service 357bf9
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Service 357bf9
	sgr	%r5,%r4
Packit Service 357bf9
Packit Service 357bf9
.Lloop:
Packit Service 357bf9
	lcbb	%r1,0(%r2),6
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Service 357bf9
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Service 357bf9
.Lloop_haystack_loaded:
Packit Service 357bf9
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jne	.Lloop_vstrs_nonzero_cc
Packit Service 357bf9
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb16
Packit Service 357bf9
	vl	%v16,16(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jne	.Lloop_vstrs_nonzero_cc16
Packit Service 357bf9
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb32
Packit Service 357bf9
	vl	%v16,32(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jne	.Lloop_vstrs_nonzero_cc32
Packit Service 357bf9
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb48
Packit Service 357bf9
	vl	%v16,48(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jne	.Lloop_vstrs_nonzero_cc48
Packit Service 357bf9
	la	%r2,64(%r2)
Packit Service 357bf9
	j	.Lloop
Packit Service 357bf9
Packit Service 357bf9
.Lloop_vstrs_nonzero_cc48:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_vstrs_nonzero_cc32:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_vstrs_nonzero_cc16:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_vstrs_nonzero_cc:
Packit Service 357bf9
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Service 357bf9
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Service 357bf9
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Service 357bf9
	lcbb	%r1,0(%r5,%r2),6
Packit Service 357bf9
	la	%r2,0(%r5,%r2)
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb
Packit Service 357bf9
	vl	%v16,0(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
.Lloop_vstrs_nonzero_cc_loop:
Packit Service 357bf9
	jh	.Lend_match_found
Packit Service 357bf9
	jl	.Lend_no_match
Packit Service 357bf9
	la	%r2,0(%r5,%r2)
Packit Service 357bf9
	je	.Lloop
Packit Service 357bf9
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb
Packit Service 357bf9
	vl	%v16,0(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jh	.Lend_match_found
Packit Service 357bf9
	jl	.Lend_no_match
Packit Service 357bf9
	la	%r2,0(%r5,%r2)
Packit Service 357bf9
	je	.Lloop
Packit Service 357bf9
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb
Packit Service 357bf9
	vl	%v16,0(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	jh	.Lend_match_found
Packit Service 357bf9
	jl	.Lend_no_match
Packit Service 357bf9
	la	%r2,0(%r5,%r2)
Packit Service 357bf9
	je	.Lloop
Packit Service 357bf9
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Service 357bf9
	jo	.Lloop_haystack_on_bb
Packit Service 357bf9
	vl	%v16,0(%r2)
Packit Service 357bf9
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Service 357bf9
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Service 357bf9
Packit Service 357bf9
.Lend_no_match:
Packit Service 357bf9
	lghi	%r2,0
Packit Service 357bf9
	br	%r14
Packit Service 357bf9
.Lend_match_found:
Packit Service 357bf9
	vlgvb	%r4,%v20,7
Packit Service 357bf9
	la	%r2,0(%r4,%r2)
Packit Service 357bf9
	br	%r14
Packit Service 357bf9
Packit Service 357bf9
.Lloop_haystack_on_bb48:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_haystack_on_bb32:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_haystack_on_bb16:
Packit Service 357bf9
	la	%r2,16(%r2)
Packit Service 357bf9
.Lloop_haystack_on_bb:
Packit Service 357bf9
	/* Haystack located on page-boundary.  */
Packit Service 357bf9
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 357bf9
	vll	%v16,%r1,0(%r2)
Packit Service 357bf9
	vlvgb	%v21,%r1,7
Packit Service 357bf9
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Service 357bf9
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Service 357bf9
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Service 357bf9
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Service 357bf9
	j	.Lloop_haystack_loaded
Packit Service 357bf9
Packit Service 357bf9
.Lneedle_on_bb:
Packit Service 357bf9
	/* Needle located on page-boundary.  */
Packit Service 357bf9
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Service 357bf9
	vll	%v18,%r1,0(%r3)
Packit Service 357bf9
	vlvgb	%v21,%r1,7
Packit Service 357bf9
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Service 357bf9
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Service 357bf9
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Service 907f0b
	vl	%v18,0(%r3)	/* Load needle beyond page boundary.  */
Packit Service 357bf9
	vfenezb	%v19,%v18,%v18
Packit Service 357bf9
	j	.Lneedle_loaded
Packit Service 357bf9
END(STRSTR_ARCH13)
Packit Service 357bf9
Packit Service 357bf9
# if ! HAVE_STRSTR_IFUNC
Packit Service 357bf9
strong_alias (STRSTR_ARCH13, strstr)
Packit Service 357bf9
# endif
Packit Service 357bf9
Packit Service 357bf9
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Service 357bf9
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Service 357bf9
# endif
Packit Service 357bf9
#endif