|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* Copyright © 2008 Mozilla Corporation
|
|
Packit |
030a23 |
* Copyright © 2010 Nokia Corporation
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
Packit |
030a23 |
* documentation for any purpose is hereby granted without fee, provided that
|
|
Packit |
030a23 |
* the above copyright notice appear in all copies and that both that
|
|
Packit |
030a23 |
* copyright notice and this permission notice appear in supporting
|
|
Packit |
030a23 |
* documentation, and that the name of Mozilla Corporation not be used in
|
|
Packit |
030a23 |
* advertising or publicity pertaining to distribution of the software without
|
|
Packit |
030a23 |
* specific, written prior permission. Mozilla Corporation makes no
|
|
Packit |
030a23 |
* representations about the suitability of this software for any purpose. It
|
|
Packit |
030a23 |
* is provided "as is" without express or implied warranty.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
|
Packit |
030a23 |
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
Packit |
030a23 |
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
Packit |
030a23 |
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
Packit |
030a23 |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
|
Packit |
030a23 |
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
|
Packit |
030a23 |
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
Packit |
030a23 |
* SOFTWARE.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Author: Jeff Muizelaar (jeff@infidigm.net)
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* Prevent the stack from becoming executable */
|
|
Packit |
030a23 |
#if defined(__linux__) && defined(__ELF__)
|
|
Packit |
030a23 |
.section .note.GNU-stack,"",%progbits
|
|
Packit |
030a23 |
#endif
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.text
|
|
Packit |
030a23 |
.arch armv6
|
|
Packit |
030a23 |
.object_arch armv4
|
|
Packit |
030a23 |
.arm
|
|
Packit |
030a23 |
.altmacro
|
|
Packit |
030a23 |
.p2align 2
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
#include "pixman-arm-asm.h"
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/*
|
|
Packit |
030a23 |
* Note: This code is only using armv5te instructions (not even armv6),
|
|
Packit |
030a23 |
* but is scheduled for ARM Cortex-A8 pipeline. So it might need to
|
|
Packit |
030a23 |
* be split into a few variants, tuned for each microarchitecture.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* TODO: In order to get good performance on ARM9/ARM11 cores (which don't
|
|
Packit |
030a23 |
* have efficient write combining), it needs to be changed to use 16-byte
|
|
Packit |
030a23 |
* aligned writes using STM instruction.
|
|
Packit |
030a23 |
*
|
|
Packit |
030a23 |
* Nearest scanline scaler macro template uses the following arguments:
|
|
Packit |
030a23 |
* fname - name of the function to generate
|
|
Packit |
030a23 |
* bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
|
|
Packit |
030a23 |
* t - type suffix for LDR/STR instructions
|
|
Packit |
030a23 |
* prefetch_distance - prefetch in the source image by that many
|
|
Packit |
030a23 |
* pixels ahead
|
|
Packit |
030a23 |
* prefetch_braking_distance - stop prefetching when that many pixels are
|
|
Packit |
030a23 |
* remaining before the end of scanline
|
|
Packit |
030a23 |
*/
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
.macro generate_nearest_scanline_func fname, bpp_shift, t, \
|
|
Packit |
030a23 |
prefetch_distance, \
|
|
Packit |
030a23 |
prefetch_braking_distance
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
pixman_asm_function fname
|
|
Packit |
030a23 |
W .req r0
|
|
Packit |
030a23 |
DST .req r1
|
|
Packit |
030a23 |
SRC .req r2
|
|
Packit |
030a23 |
VX .req r3
|
|
Packit |
030a23 |
UNIT_X .req ip
|
|
Packit |
030a23 |
TMP1 .req r4
|
|
Packit |
030a23 |
TMP2 .req r5
|
|
Packit |
030a23 |
VXMASK .req r6
|
|
Packit |
030a23 |
PF_OFFS .req r7
|
|
Packit |
030a23 |
SRC_WIDTH_FIXED .req r8
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
ldr UNIT_X, [sp]
|
|
Packit |
030a23 |
push {r4, r5, r6, r7, r8, r10}
|
|
Packit |
030a23 |
mvn VXMASK, #((1 << bpp_shift) - 1)
|
|
Packit |
030a23 |
ldr SRC_WIDTH_FIXED, [sp, #28]
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* define helper macro */
|
|
Packit |
030a23 |
.macro scale_2_pixels
|
|
Packit |
030a23 |
ldr&t TMP1, [SRC, TMP1]
|
|
Packit |
030a23 |
and TMP2, VXMASK, VX, asr #(16 - bpp_shift)
|
|
Packit |
030a23 |
adds VX, VX, UNIT_X
|
|
Packit |
030a23 |
str&t TMP1, [DST], #(1 << bpp_shift)
|
|
Packit |
030a23 |
9: subpls VX, VX, SRC_WIDTH_FIXED
|
|
Packit |
030a23 |
bpl 9b
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
ldr&t TMP2, [SRC, TMP2]
|
|
Packit |
030a23 |
and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
|
|
Packit |
030a23 |
adds VX, VX, UNIT_X
|
|
Packit |
030a23 |
str&t TMP2, [DST], #(1 << bpp_shift)
|
|
Packit |
030a23 |
9: subpls VX, VX, SRC_WIDTH_FIXED
|
|
Packit |
030a23 |
bpl 9b
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
/* now do the scaling */
|
|
Packit |
030a23 |
and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
|
|
Packit |
030a23 |
adds VX, VX, UNIT_X
|
|
Packit |
030a23 |
9: subpls VX, VX, SRC_WIDTH_FIXED
|
|
Packit |
030a23 |
bpl 9b
|
|
Packit |
030a23 |
subs W, W, #(8 + prefetch_braking_distance)
|
|
Packit |
030a23 |
blt 2f
|
|
Packit |
030a23 |
/* calculate prefetch offset */
|
|
Packit |
030a23 |
mov PF_OFFS, #prefetch_distance
|
|
Packit |
030a23 |
mla PF_OFFS, UNIT_X, PF_OFFS, VX
|
|
Packit |
030a23 |
1: /* main loop, process 8 pixels per iteration with prefetch */
|
|
Packit |
030a23 |
pld [SRC, PF_OFFS, asr #(16 - bpp_shift)]
|
|
Packit |
030a23 |
add PF_OFFS, UNIT_X, lsl #3
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
subs W, W, #8
|
|
Packit |
030a23 |
bge 1b
|
|
Packit |
030a23 |
2:
|
|
Packit |
030a23 |
subs W, W, #(4 - 8 - prefetch_braking_distance)
|
|
Packit |
030a23 |
blt 2f
|
|
Packit |
030a23 |
1: /* process the remaining pixels */
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
subs W, W, #4
|
|
Packit |
030a23 |
bge 1b
|
|
Packit |
030a23 |
2:
|
|
Packit |
030a23 |
tst W, #2
|
|
Packit |
030a23 |
beq 2f
|
|
Packit |
030a23 |
scale_2_pixels
|
|
Packit |
030a23 |
2:
|
|
Packit |
030a23 |
tst W, #1
|
|
Packit |
030a23 |
ldrne&t TMP1, [SRC, TMP1]
|
|
Packit |
030a23 |
strne&t TMP1, [DST]
|
|
Packit |
030a23 |
/* cleanup helper macro */
|
|
Packit |
030a23 |
.purgem scale_2_pixels
|
|
Packit |
030a23 |
.unreq DST
|
|
Packit |
030a23 |
.unreq SRC
|
|
Packit |
030a23 |
.unreq W
|
|
Packit |
030a23 |
.unreq VX
|
|
Packit |
030a23 |
.unreq UNIT_X
|
|
Packit |
030a23 |
.unreq TMP1
|
|
Packit |
030a23 |
.unreq TMP2
|
|
Packit |
030a23 |
.unreq VXMASK
|
|
Packit |
030a23 |
.unreq PF_OFFS
|
|
Packit |
030a23 |
.unreq SRC_WIDTH_FIXED
|
|
Packit |
030a23 |
/* return */
|
|
Packit |
030a23 |
pop {r4, r5, r6, r7, r8, r10}
|
|
Packit |
030a23 |
bx lr
|
|
Packit |
030a23 |
.endfunc
|
|
Packit |
030a23 |
.endm
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_nearest_scanline_func \
|
|
Packit |
030a23 |
pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
|
|
Packit |
030a23 |
|
|
Packit |
030a23 |
generate_nearest_scanline_func \
|
|
Packit |
030a23 |
pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32
|