Blame sysdeps/arm/memcpy.S

Packit 6c4009
/* Copyright (C) 2006-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
/* Thumb requires excessive IT insns here.  */
Packit 6c4009
#define NO_THUMB
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include <arm-features.h>
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
 * Data preload for architectures that support it (ARM V5TE and above)
Packit 6c4009
 */
Packit 6c4009
#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
Packit 6c4009
     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
Packit 6c4009
     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
Packit 6c4009
     && !defined (__ARM_ARCH_5T__))
Packit 6c4009
#define PLD(code...)    code
Packit 6c4009
#else
Packit 6c4009
#define PLD(code...)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
 * This can be used to enable code to cacheline align the source pointer.
Packit 6c4009
 * Experiments on tested architectures (StrongARM and XScale) didn't show
Packit 6c4009
 * this a worthwhile thing to do.  That might be different in the future.
Packit 6c4009
 */
Packit 6c4009
//#define CALGN(code...)        code
Packit 6c4009
#define CALGN(code...)
Packit 6c4009
Packit 6c4009
/*
Packit 6c4009
 * Endian independent macros for shifting bytes within registers.
Packit 6c4009
 */
Packit 6c4009
#ifndef __ARMEB__
Packit 6c4009
#define PULL            lsr
Packit 6c4009
#define PUSH            lsl
Packit 6c4009
#else
Packit 6c4009
#define PULL            lsl
Packit 6c4009
#define PUSH            lsr
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
		.text
Packit 6c4009
		.syntax unified
Packit 6c4009
Packit 6c4009
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
Packit 6c4009
Packit 6c4009
ENTRY(memcpy)
Packit 6c4009
Packit 6c4009
		push	{r0, r4, lr}
Packit 6c4009
		cfi_adjust_cfa_offset (12)
Packit 6c4009
		cfi_rel_offset (r4, 4)
Packit 6c4009
		cfi_rel_offset (lr, 8)
Packit 6c4009
Packit 6c4009
		cfi_remember_state
Packit 6c4009
Packit 6c4009
		subs	r2, r2, #4
Packit 6c4009
		blt	8f
Packit 6c4009
		ands	ip, r0, #3
Packit 6c4009
	PLD(	pld	[r1, #0]		)
Packit 6c4009
		bne	9f
Packit 6c4009
		ands	ip, r1, #3
Packit 6c4009
		bne	10f
Packit 6c4009
Packit 6c4009
1:		subs	r2, r2, #(28)
Packit 6c4009
		push	{r5 - r8}
Packit 6c4009
		cfi_adjust_cfa_offset (16)
Packit 6c4009
		cfi_rel_offset (r5, 0)
Packit 6c4009
		cfi_rel_offset (r6, 4)
Packit 6c4009
		cfi_rel_offset (r7, 8)
Packit 6c4009
		cfi_rel_offset (r8, 12)
Packit 6c4009
		blt	5f
Packit 6c4009
Packit 6c4009
	CALGN(	ands	ip, r1, #31		)
Packit 6c4009
	CALGN(	rsb	r3, ip, #32		)
Packit 6c4009
	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
Packit 6c4009
	CALGN(	bcs	2f			)
Packit 6c4009
	CALGN(	adr	r4, 6f			)
Packit 6c4009
	CALGN(	subs	r2, r2, r3		)  @ C gets set
Packit 6c4009
#ifndef ARM_ALWAYS_BX
Packit 6c4009
	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
Packit 6c4009
#else
Packit 6c4009
	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
Packit 6c4009
	CALGN(	bx	r4			)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	PLD(	pld	[r1, #0]		)
Packit 6c4009
2:	PLD(	subs	r2, r2, #96		)
Packit 6c4009
	PLD(	pld	[r1, #28]		)
Packit 6c4009
	PLD(	blt	4f			)
Packit 6c4009
	PLD(	pld	[r1, #60]		)
Packit 6c4009
	PLD(	pld	[r1, #92]		)
Packit 6c4009
Packit 6c4009
3:	PLD(	pld	[r1, #124]		)
Packit 6c4009
4:		ldmia	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
Packit 6c4009
		subs	r2, r2, #32
Packit 6c4009
		stmia	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
Packit 6c4009
		bge	3b
Packit 6c4009
	PLD(	cmn	r2, #96			)
Packit 6c4009
	PLD(	bge	4b			)
Packit 6c4009
Packit 6c4009
5:		ands	ip, r2, #28
Packit 6c4009
		rsb	ip, ip, #32
Packit 6c4009
#ifndef ARM_ALWAYS_BX
Packit 6c4009
		/* C is always clear here.  */
Packit 6c4009
		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
Packit 6c4009
		b	7f
Packit 6c4009
#else
Packit 6c4009
		beq	7f
Packit 6c4009
		push	{r10}
Packit 6c4009
		cfi_adjust_cfa_offset (4)
Packit 6c4009
		cfi_rel_offset (r10, 0)
Packit 6c4009
0:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
Packit 6c4009
		/* If alignment is not perfect, then there will be some
Packit 6c4009
		   padding (nop) instructions between this BX and label 6.
Packit 6c4009
		   The computation above assumed that two instructions
Packit 6c4009
		   later is exactly the right spot.  */
Packit 6c4009
		add	r10, #(6f - (0b + PC_OFS))
Packit 6c4009
		bx	r10
Packit 6c4009
#endif
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
6:		nop
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r3, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r4, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r5, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r6, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r7, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	r8, [r1], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		ldr	lr, [r1], #4
Packit 6c4009
Packit 6c4009
#ifndef ARM_ALWAYS_BX
Packit 6c4009
		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
Packit 6c4009
		nop
Packit 6c4009
#else
Packit 6c4009
0:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
Packit 6c4009
		/* If alignment is not perfect, then there will be some
Packit 6c4009
		   padding (nop) instructions between this BX and label 66.
Packit 6c4009
		   The computation above assumed that two instructions
Packit 6c4009
		   later is exactly the right spot.  */
Packit 6c4009
		add	r10, #(66f - (0b + PC_OFS))
Packit 6c4009
		bx	r10
Packit 6c4009
#endif
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
66:		nop
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r3, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r4, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r5, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r6, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r7, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	r8, [r0], #4
Packit 6c4009
		.p2align ARM_BX_ALIGN_LOG2
Packit 6c4009
		str	lr, [r0], #4
Packit 6c4009
Packit 6c4009
#ifdef ARM_ALWAYS_BX
Packit 6c4009
		pop	{r10}
Packit 6c4009
		cfi_adjust_cfa_offset (-4)
Packit 6c4009
		cfi_restore (r10)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	CALGN(	bcs	2b			)
Packit 6c4009
Packit 6c4009
7:		pop	{r5 - r8}
Packit 6c4009
		cfi_adjust_cfa_offset (-16)
Packit 6c4009
		cfi_restore (r5)
Packit 6c4009
		cfi_restore (r6)
Packit 6c4009
		cfi_restore (r7)
Packit 6c4009
		cfi_restore (r8)
Packit 6c4009
Packit 6c4009
8:		movs	r2, r2, lsl #31
Packit 6c4009
		ldrbne	r3, [r1], #1
Packit 6c4009
		ldrbcs	r4, [r1], #1
Packit 6c4009
		ldrbcs	ip, [r1]
Packit 6c4009
		strbne	r3, [r0], #1
Packit 6c4009
		strbcs	r4, [r0], #1
Packit 6c4009
		strbcs	ip, [r0]
Packit 6c4009
Packit 6c4009
#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
Packit 6c4009
     || defined (ARM_ALWAYS_BX))
Packit 6c4009
		pop	{r0, r4, lr}
Packit 6c4009
		cfi_adjust_cfa_offset (-12)
Packit 6c4009
		cfi_restore (r4)
Packit 6c4009
		cfi_restore (lr)
Packit 6c4009
		bx      lr
Packit 6c4009
#else
Packit 6c4009
		pop	{r0, r4, pc}
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
		cfi_restore_state
Packit 6c4009
Packit 6c4009
9:		rsb	ip, ip, #4
Packit 6c4009
		cmp	ip, #2
Packit 6c4009
		ldrbgt	r3, [r1], #1
Packit 6c4009
		ldrbge	r4, [r1], #1
Packit 6c4009
		ldrb	lr, [r1], #1
Packit 6c4009
		strbgt	r3, [r0], #1
Packit 6c4009
		strbge	r4, [r0], #1
Packit 6c4009
		subs	r2, r2, ip
Packit 6c4009
		strb	lr, [r0], #1
Packit 6c4009
		blt	8b
Packit 6c4009
		ands	ip, r1, #3
Packit 6c4009
		beq	1b
Packit 6c4009
Packit 6c4009
10:		bic	r1, r1, #3
Packit 6c4009
		cmp	ip, #2
Packit 6c4009
		ldr	lr, [r1], #4
Packit 6c4009
		beq	17f
Packit 6c4009
		bgt	18f
Packit 6c4009
Packit 6c4009
Packit 6c4009
		.macro	forward_copy_shift pull push
Packit 6c4009
Packit 6c4009
		subs	r2, r2, #28
Packit 6c4009
		blt	14f
Packit 6c4009
Packit 6c4009
	CALGN(	ands	ip, r1, #31		)
Packit 6c4009
	CALGN(	rsb	ip, ip, #32		)
Packit 6c4009
	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
Packit 6c4009
	CALGN(	subcc	r2, r2, ip		)
Packit 6c4009
	CALGN(	bcc	15f			)
Packit 6c4009
Packit 6c4009
11:		push	{r5 - r8, r10}
Packit 6c4009
		cfi_adjust_cfa_offset (20)
Packit 6c4009
		cfi_rel_offset (r5, 0)
Packit 6c4009
		cfi_rel_offset (r6, 4)
Packit 6c4009
		cfi_rel_offset (r7, 8)
Packit 6c4009
		cfi_rel_offset (r8, 12)
Packit 6c4009
		cfi_rel_offset (r10, 16)
Packit 6c4009
Packit 6c4009
	PLD(	pld	[r1, #0]		)
Packit 6c4009
	PLD(	subs	r2, r2, #96		)
Packit 6c4009
	PLD(	pld	[r1, #28]		)
Packit 6c4009
	PLD(	blt	13f			)
Packit 6c4009
	PLD(	pld	[r1, #60]		)
Packit 6c4009
	PLD(	pld	[r1, #92]		)
Packit 6c4009
Packit 6c4009
12:	PLD(	pld	[r1, #124]		)
Packit 6c4009
13:		ldmia	r1!, {r4, r5, r6, r7}
Packit 6c4009
		mov	r3, lr, PULL #\pull
Packit 6c4009
		subs	r2, r2, #32
Packit 6c4009
		ldmia	r1!, {r8, r10, ip, lr}
Packit 6c4009
		orr	r3, r3, r4, PUSH #\push
Packit 6c4009
		mov	r4, r4, PULL #\pull
Packit 6c4009
		orr	r4, r4, r5, PUSH #\push
Packit 6c4009
		mov	r5, r5, PULL #\pull
Packit 6c4009
		orr	r5, r5, r6, PUSH #\push
Packit 6c4009
		mov	r6, r6, PULL #\pull
Packit 6c4009
		orr	r6, r6, r7, PUSH #\push
Packit 6c4009
		mov	r7, r7, PULL #\pull
Packit 6c4009
		orr	r7, r7, r8, PUSH #\push
Packit 6c4009
		mov	r8, r8, PULL #\pull
Packit 6c4009
		orr	r8, r8, r10, PUSH #\push
Packit 6c4009
		mov	r10, r10, PULL #\pull
Packit 6c4009
		orr	r10, r10, ip, PUSH #\push
Packit 6c4009
		mov	ip, ip, PULL #\pull
Packit 6c4009
		orr	ip, ip, lr, PUSH #\push
Packit 6c4009
		stmia	r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
Packit 6c4009
		bge	12b
Packit 6c4009
	PLD(	cmn	r2, #96			)
Packit 6c4009
	PLD(	bge	13b			)
Packit 6c4009
Packit 6c4009
		pop	{r5 - r8, r10}
Packit 6c4009
		cfi_adjust_cfa_offset (-20)
Packit 6c4009
		cfi_restore (r5)
Packit 6c4009
		cfi_restore (r6)
Packit 6c4009
		cfi_restore (r7)
Packit 6c4009
		cfi_restore (r8)
Packit 6c4009
		cfi_restore (r10)
Packit 6c4009
Packit 6c4009
14:		ands	ip, r2, #28
Packit 6c4009
		beq	16f
Packit 6c4009
Packit 6c4009
15:		mov	r3, lr, PULL #\pull
Packit 6c4009
		ldr	lr, [r1], #4
Packit 6c4009
		subs	ip, ip, #4
Packit 6c4009
		orr	r3, r3, lr, PUSH #\push
Packit 6c4009
		str	r3, [r0], #4
Packit 6c4009
		bgt	15b
Packit 6c4009
	CALGN(	cmp	r2, #0			)
Packit 6c4009
	CALGN(	bge	11b			)
Packit 6c4009
Packit 6c4009
16:		sub	r1, r1, #(\push / 8)
Packit 6c4009
		b	8b
Packit 6c4009
Packit 6c4009
		.endm
Packit 6c4009
Packit 6c4009
Packit 6c4009
		forward_copy_shift	pull=8	push=24
Packit 6c4009
Packit 6c4009
17:		forward_copy_shift	pull=16	push=16
Packit 6c4009
Packit 6c4009
18:		forward_copy_shift	pull=24	push=8
Packit 6c4009
Packit 6c4009
END(memcpy)
Packit 6c4009
libc_hidden_builtin_def (memcpy)