Blame sysdeps/s390/strstr-arch13.S

Packit Bot 76a31f
/* Vector optimized 32/64 bit S/390 version of strstr.
Packit Bot 76a31f
   Copyright (C) 2019 Free Software Foundation, Inc.
Packit Bot 76a31f
   This file is part of the GNU C Library.
Packit Bot 76a31f
Packit Bot 76a31f
   The GNU C Library is free software; you can redistribute it and/or
Packit Bot 76a31f
   modify it under the terms of the GNU Lesser General Public
Packit Bot 76a31f
   License as published by the Free Software Foundation; either
Packit Bot 76a31f
   version 2.1 of the License, or (at your option) any later version.
Packit Bot 76a31f
Packit Bot 76a31f
   The GNU C Library is distributed in the hope that it will be useful,
Packit Bot 76a31f
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Bot 76a31f
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit Bot 76a31f
   Lesser General Public License for more details.
Packit Bot 76a31f
Packit Bot 76a31f
   You should have received a copy of the GNU Lesser General Public
Packit Bot 76a31f
   License along with the GNU C Library; if not, see
Packit Bot 76a31f
   <http://www.gnu.org/licenses/>.  */
Packit Bot 76a31f
Packit Bot 76a31f
#include <ifunc-strstr.h>
Packit Bot 76a31f
#if HAVE_STRSTR_ARCH13
Packit Bot 76a31f
# include "sysdep.h"
Packit Bot 76a31f
# include "asm-syntax.h"
Packit Bot 76a31f
	.text
Packit Bot 76a31f
Packit Bot 76a31f
/* char *strstr (const char *haystack=r2, const char *needle=r3)
Packit Bot 76a31f
   Locate a substring.  */
Packit Bot 76a31f
ENTRY(STRSTR_ARCH13)
Packit Bot 76a31f
	.machine "arch13"
Packit Bot 76a31f
	.machinemode "zarch_nohighgprs"
Packit Bot 76a31f
	lcbb	%r1,0(%r3),6
Packit Bot 76a31f
	jo	.Lneedle_on_bb	/* Needle on block-boundary?  */
Packit Bot 76a31f
	vl	%v18,0(%r3),6	/* Load needle.  */
Packit Bot 76a31f
	vfenezb %v19,%v18,%v18	/* v19[7] contains the length of needle.  */
Packit Bot 76a31f
.Lneedle_loaded:
Packit Bot 76a31f
	vlgvb	%r4,%v19,7	/* Get index of zero or 16 if not found.  */
Packit Bot 76a31f
	lghi	%r5,17		/* See below: min-skip-partial-match-index.  */
Packit Bot 76a31f
	cgibe	%r4,0,0(%r14)	/* Test if needle is zero and return.  */
Packit Bot 76a31f
Packit Bot 76a31f
	/* The vstrs instruction is able to handle needles up to a length of 16,
Packit Bot 76a31f
	   but then we may have to load the next part of haystack with a
Packit Bot 76a31f
	   small offset.  This will be slow - see examples:
Packit Bot 76a31f
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Bot 76a31f
	   needle   =  mmmmmmmmmmmmmma0
Packit Bot 76a31f
	   => needle_len=15; vstrs reports a partial match; haystack+=2
Packit Bot 76a31f
	   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
Packit Bot 76a31f
	   needle   =        mmmmmmmma0000000
Packit Bot 76a31f
	   => needle_len=9; vstrs reports a partial match; haystack+=8  */
Packit Bot 76a31f
# if ! HAVE_STRSTR_Z13
Packit Bot 76a31f
#  error The arch13 variant of strstr needs the z13 variant of strstr!
Packit Bot 76a31f
# endif
Packit Bot 76a31f
	clgfi	%r4,9
Packit Bot 76a31f
	jh	STRSTR_Z13
Packit Bot 76a31f
Packit Bot 76a31f
	/* In case of a partial match, the vstrs instruction returns the index
Packit Bot 76a31f
	   of the partial match in a vector-register.  Then we have to
Packit Bot 76a31f
	   reload the string at the "current-position plus this index" and run
Packit Bot 76a31f
	   vstrs again in order to determine if it was a full match or no match.
Packit Bot 76a31f
	   Transferring this index from vr to gr, compute the haystack-address
Packit Bot 76a31f
	   and loading with vl is quite slow as all instructions have data
Packit Bot 76a31f
	   dependencies.  Thus we assume, that a partial match is always at the
Packit Bot 76a31f
	   first possible index and just load the next part of haystack from
Packit Bot 76a31f
	   there instead of waiting until the correct index is computed:
Packit Bot 76a31f
	   min-skip-partial-match-index = (16 - n_len) + 1  */
Packit Bot 76a31f
	sgr	%r5,%r4
Packit Bot 76a31f
Packit Bot 76a31f
.Lloop:
Packit Bot 76a31f
	lcbb	%r1,0(%r2),6
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb	/* Haystack on block-boundary?  */
Packit Bot 76a31f
	vl	%v16,0(%r2)		/* Load next part of haystack.  */
Packit Bot 76a31f
.Lloop_haystack_loaded:
Packit Bot 76a31f
	/* Vector string search with zero search (cc=0 => no match).  */
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jne	.Lloop_vstrs_nonzero_cc
Packit Bot 76a31f
	lcbb	%r1,16(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb16
Packit Bot 76a31f
	vl	%v16,16(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jne	.Lloop_vstrs_nonzero_cc16
Packit Bot 76a31f
	lcbb	%r1,32(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb32
Packit Bot 76a31f
	vl	%v16,32(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jne	.Lloop_vstrs_nonzero_cc32
Packit Bot 76a31f
	lcbb	%r1,48(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb48
Packit Bot 76a31f
	vl	%v16,48(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jne	.Lloop_vstrs_nonzero_cc48
Packit Bot 76a31f
	la	%r2,64(%r2)
Packit Bot 76a31f
	j	.Lloop
Packit Bot 76a31f
Packit Bot 76a31f
.Lloop_vstrs_nonzero_cc48:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_vstrs_nonzero_cc32:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_vstrs_nonzero_cc16:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_vstrs_nonzero_cc:
Packit Bot 76a31f
	jh	.Lend_match_found /* cc == 2 (full match)  */
Packit Bot 76a31f
	jl	.Lend_no_match	/* cc == 1 (no match, end of string)  */
Packit Bot 76a31f
	/* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
Packit Bot 76a31f
	lcbb	%r1,0(%r5,%r2),6
Packit Bot 76a31f
	la	%r2,0(%r5,%r2)
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb
Packit Bot 76a31f
	vl	%v16,0(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
.Lloop_vstrs_nonzero_cc_loop:
Packit Bot 76a31f
	jh	.Lend_match_found
Packit Bot 76a31f
	jl	.Lend_no_match
Packit Bot 76a31f
	la	%r2,0(%r5,%r2)
Packit Bot 76a31f
	je	.Lloop
Packit Bot 76a31f
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb
Packit Bot 76a31f
	vl	%v16,0(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jh	.Lend_match_found
Packit Bot 76a31f
	jl	.Lend_no_match
Packit Bot 76a31f
	la	%r2,0(%r5,%r2)
Packit Bot 76a31f
	je	.Lloop
Packit Bot 76a31f
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb
Packit Bot 76a31f
	vl	%v16,0(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	jh	.Lend_match_found
Packit Bot 76a31f
	jl	.Lend_no_match
Packit Bot 76a31f
	la	%r2,0(%r5,%r2)
Packit Bot 76a31f
	je	.Lloop
Packit Bot 76a31f
	lcbb	%r1,0(%r2),6		/* Next part of haystack.  */
Packit Bot 76a31f
	jo	.Lloop_haystack_on_bb
Packit Bot 76a31f
	vl	%v16,0(%r2)
Packit Bot 76a31f
	vstrs	%v20,%v16,%v18,%v19,0,2
Packit Bot 76a31f
	j	.Lloop_vstrs_nonzero_cc_loop
Packit Bot 76a31f
Packit Bot 76a31f
.Lend_no_match:
Packit Bot 76a31f
	lghi	%r2,0
Packit Bot 76a31f
	br	%r14
Packit Bot 76a31f
.Lend_match_found:
Packit Bot 76a31f
	vlgvb	%r4,%v20,7
Packit Bot 76a31f
	la	%r2,0(%r4,%r2)
Packit Bot 76a31f
	br	%r14
Packit Bot 76a31f
Packit Bot 76a31f
.Lloop_haystack_on_bb48:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_haystack_on_bb32:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_haystack_on_bb16:
Packit Bot 76a31f
	la	%r2,16(%r2)
Packit Bot 76a31f
.Lloop_haystack_on_bb:
Packit Bot 76a31f
	/* Haystack located on page-boundary.  */
Packit Bot 76a31f
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Bot 76a31f
	vll	%v16,%r1,0(%r2)
Packit Bot 76a31f
	vlvgb	%v21,%r1,7
Packit Bot 76a31f
	vfenezb	%v17,%v16,%v16	/* Search zero in loaded haystack bytes.  */
Packit Bot 76a31f
	veclb	%v17,%v21		/* Zero index <= loaded byte index?  */
Packit Bot 76a31f
	jle	.Lloop_haystack_loaded	/* -> v16 contains full haystack.  */
Packit Bot 76a31f
	vl	%v16,0(%r2)	/* Load haystack beyond page boundary.  */
Packit Bot 76a31f
	j	.Lloop_haystack_loaded
Packit Bot 76a31f
Packit Bot 76a31f
.Lneedle_on_bb:
Packit Bot 76a31f
	/* Needle located on page-boundary.  */
Packit Bot 76a31f
	ahi	%r1,-1		/* vll needs highest index instead of count.  */
Packit Bot 76a31f
	vll	%v18,%r1,0(%r3)
Packit Bot 76a31f
	vlvgb	%v21,%r1,7
Packit Bot 76a31f
	vfenezb	%v19,%v18,%v18	/* Search zero in loaded needle bytes.  */
Packit Bot 76a31f
	veclb	%v19,%v21	/* Zero index <= max loaded byte index?  */
Packit Bot 76a31f
	jle	.Lneedle_loaded	/* -> v18 contains full needle.  */
Packit Bot c1b9f3
	vl	%v18,0(%r3)	/* Load needle beyond page boundary.  */
Packit Bot 76a31f
	vfenezb	%v19,%v18,%v18
Packit Bot 76a31f
	j	.Lneedle_loaded
Packit Bot 76a31f
END(STRSTR_ARCH13)
Packit Bot 76a31f
Packit Bot 76a31f
# if ! HAVE_STRSTR_IFUNC
Packit Bot 76a31f
strong_alias (STRSTR_ARCH13, strstr)
Packit Bot 76a31f
# endif
Packit Bot 76a31f
Packit Bot 76a31f
# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
Packit Bot 76a31f
strong_alias (STRSTR_ARCH13, __GI_strstr)
Packit Bot 76a31f
# endif
Packit Bot 76a31f
#endif