|
Packit |
5c3484 |
dnl IA-64 mpn_lshiftc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Contributed to the GNU project by Torbjorn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2000-2005, 2010 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C Itanium: ?
|
|
Packit |
5c3484 |
C Itanium 2: 1.25
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C This code is scheduled deeply since the plain shift instructions shr and shl
|
|
Packit |
5c3484 |
C have a latency of 4 (on Itanium) or 3 (on Itanium 2). Poor scheduling of
|
|
Packit |
5c3484 |
C these instructions cause a 10 cycle replay trap on Itanium.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C The ld8 scheduling should probably be decreased to make the function smaller.
|
|
Packit |
5c3484 |
C Good lfetch will make sure we never stall anyway.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
|
|
Packit |
5c3484 |
C at cycle 2. Judicious use of predicates could allow us to issue more ld8's
|
|
Packit |
5c3484 |
C in the prologue.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`rp', `r32')
|
|
Packit |
5c3484 |
define(`up', `r33')
|
|
Packit |
5c3484 |
define(`n', `r34')
|
|
Packit |
5c3484 |
define(`cnt',`r35')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`tnc',`r9')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`FSH',`shl')
|
|
Packit |
5c3484 |
define(`BSH',`shr.u')
|
|
Packit |
5c3484 |
define(`UPD',`-8')
|
|
Packit |
5c3484 |
define(`POFF',`-512')
|
|
Packit |
5c3484 |
define(`PUPD',`-32')
|
|
Packit |
5c3484 |
define(`func',`mpn_lshiftc')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_lshiftc)
|
|
Packit |
5c3484 |
.prologue
|
|
Packit |
5c3484 |
.save ar.lc, r2
|
|
Packit |
5c3484 |
.body
|
|
Packit |
5c3484 |
ifdef(`HAVE_ABI_32',
|
|
Packit |
5c3484 |
` addp4 rp = 0, rp C M I
|
|
Packit |
5c3484 |
addp4 up = 0, up C M I
|
|
Packit |
5c3484 |
sxt4 n = n C M I
|
|
Packit |
5c3484 |
nop.m 0
|
|
Packit |
5c3484 |
nop.m 0
|
|
Packit |
5c3484 |
zxt4 cnt = cnt C I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{.mmi; nop 0 C M I
|
|
Packit |
5c3484 |
and r14 = 3, n C M I
|
|
Packit |
5c3484 |
mov.i r2 = ar.lc C I0
|
|
Packit |
5c3484 |
}{.mmi; add r15 = -1, n C M I
|
|
Packit |
5c3484 |
sub tnc = 64, cnt C M I
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
|
|
Packit |
5c3484 |
cmp.eq p7, p0 = 2, r14 C M I
|
|
Packit |
5c3484 |
shr.u n = r15, 2 C I0
|
|
Packit |
5c3484 |
}{.mmi; cmp.eq p8, p0 = 3, r14 C M I
|
|
Packit |
5c3484 |
shladd up = r15, 3, up C M I
|
|
Packit |
5c3484 |
shladd rp = r15, 3, rp C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
}{.mmi; add r11 = POFF, up C M I
|
|
Packit |
5c3484 |
ld8 r10 = [up], UPD C M01
|
|
Packit |
5c3484 |
mov.i ar.lc = n C I0
|
|
Packit |
5c3484 |
}{.bbb;
|
|
Packit |
5c3484 |
(p6) br.dptk .Lb01
|
|
Packit |
5c3484 |
(p7) br.dptk .Lb10
|
|
Packit |
5c3484 |
(p8) br.dptk .Lb11
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
.Lb00:
|
|
Packit |
5c3484 |
ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt4)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
FSH r24 = r10, cnt
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r14 = r25, r24
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
br .Lr4
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(gt4):
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r24 = r10, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r14 = r25, r24
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
}{.mib; ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt8)
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
br .Lr8 }
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(gt8):
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
br .LL00
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
.Lb01:
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt1)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
FSH r22 = r10, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
sub r31 = -1, r22
|
|
Packit |
5c3484 |
br .Lr1
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(gt1):
|
|
Packit |
5c3484 |
ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
FSH r22 = r10, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt5)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r15 = r23, r22
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r14 = r25, r24
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
br .Lr5
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(gt5):
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r15 = r23, r22
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; or r14 = r25, r24
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
}{.mib; ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
br L(end)
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
.Lb10:
|
|
Packit |
5c3484 |
ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt2)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
FSH r20 = r10, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
br .Lr2
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(gt2):
|
|
Packit |
5c3484 |
ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
FSH r20 = r10, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt6)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r15 = r23, r22
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
br .Lr6
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
L(gt6):
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; or r15 = r23, r22
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mib; ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
br .LL10
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
.Lb11:
|
|
Packit |
5c3484 |
ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r8 = r10, tnc
|
|
Packit |
5c3484 |
FSH r26 = r10, cnt
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt3)
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
br .Lr3
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(gt3):
|
|
Packit |
5c3484 |
ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
}{.mmi; nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
}{.mib; ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
br.cloop.dptk L(gt7)
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r15
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
br .Lr7
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
L(gt7):
|
|
Packit |
5c3484 |
{.mmi; nop 0
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r18 = [up], UPD
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
{.mmi; or r14 = r21, r20
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mib; ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
br .LL11
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C *** MAIN LOOP START ***
|
|
Packit |
5c3484 |
ALIGN(32)
|
|
Packit |
5c3484 |
L(top):
|
|
Packit |
5c3484 |
.LL01:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD C M2
|
|
Packit |
5c3484 |
or r15 = r27, r26 C M3
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt C I0
|
|
Packit |
5c3484 |
}{.mmi; ld8 r18 = [up], UPD C M0
|
|
Packit |
5c3484 |
sub r31 = -1, r14 C M1
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc C I1
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.LL00:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r19 = [up], UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.LL11:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r15 = r23, r22
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r16 = [up], UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.LL10:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r14 = r25, r24
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
}{.mmi; ld8 r17 = [up], UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
BSH r23 = r18, tnc
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
L(end): lfetch [r11], PUPD
|
|
Packit |
5c3484 |
br.cloop.dptk L(top)
|
|
Packit |
5c3484 |
C *** MAIN LOOP END ***
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
FSH r24 = r18, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r25 = r19, tnc
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.Lr8:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
FSH r26 = r19, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r15
|
|
Packit |
5c3484 |
BSH r27 = r16, tnc
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.Lr7:
|
|
Packit |
5c3484 |
{.mmi; st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r15 = r23, r22
|
|
Packit |
5c3484 |
FSH r20 = r16, cnt
|
|
Packit |
5c3484 |
}{.mib; sub r31 = -1, r14
|
|
Packit |
5c3484 |
BSH r21 = r17, tnc
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
;; }
|
|
Packit |
5c3484 |
.Lr6: st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r14 = r25, r24
|
|
Packit |
5c3484 |
FSH r22 = r17, cnt
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
.Lr5: st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r15 = r27, r26
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
.Lr4: st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
or r14 = r21, r20
|
|
Packit |
5c3484 |
sub r31 = -1, r15
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
.Lr3: st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r14
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
.Lr2: st8 [rp] = r31, UPD
|
|
Packit |
5c3484 |
sub r31 = -1, r22
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
.Lr1: st8 [rp] = r31, UPD C M23
|
|
Packit |
5c3484 |
mov ar.lc = r2 C I0
|
|
Packit |
5c3484 |
br.ret.sptk.many b0 C B
|
|
Packit |
5c3484 |
EPILOGUE(func)
|
|
Packit |
5c3484 |
ASM_END()
|