Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | .macro skip args:vararg | |
24 | .endm | |
25 | ||
26 | .macro sum8 lo, hi, w, p, t1, t2, t3, t4, rsb=skip, offs=0 | |
27 | ldr \t1, [\w, #4*\offs] | |
28 | ldr \t2, [\p, #4]! | |
29 | \rsb \t1, \t1, #0 | |
30 | .irpc i, 135 | |
31 | ldr \t3, [\w, #4*64*\i+4*\offs] | |
32 | ldr \t4, [\p, #4*64*\i] | |
33 | smlal \lo, \hi, \t1, \t2 | |
34 | \rsb \t3, \t3, #0 | |
35 | ldr \t1, [\w, #4*64*(\i+1)+4*\offs] | |
36 | ldr \t2, [\p, #4*64*(\i+1)] | |
37 | smlal \lo, \hi, \t3, \t4 | |
38 | \rsb \t1, \t1, #0 | |
39 | .endr | |
40 | ldr \t3, [\w, #4*64*7+4*\offs] | |
41 | ldr \t4, [\p, #4*64*7] | |
42 | smlal \lo, \hi, \t1, \t2 | |
43 | \rsb \t3, \t3, #0 | |
44 | smlal \lo, \hi, \t3, \t4 | |
45 | .endm | |
46 | ||
47 | .macro round rd, lo, hi | |
48 | lsr \rd, \lo, #24 | |
49 | bic \lo, \lo, #0xff000000 | |
50 | orr \rd, \rd, \hi, lsl #8 | |
51 | mov \hi, #0 | |
52 | ssat \rd, #16, \rd | |
53 | .endm | |
54 | ||
55 | function ff_mpadsp_apply_window_fixed_armv6, export=1 | |
56 | push {r2,r4-r11,lr} | |
57 | ||
58 | add r4, r0, #4*512 @ synth_buf + 512 | |
59 | .rept 4 | |
60 | ldm r0!, {r5-r12} | |
61 | stm r4!, {r5-r12} | |
62 | .endr | |
63 | ||
64 | ldr r4, [sp, #40] @ incr | |
65 | sub r0, r0, #4*17 @ synth_buf + 16 | |
66 | ldr r8, [r2] @ sum:low | |
67 | add r2, r0, #4*32 @ synth_buf + 48 | |
68 | rsb r5, r4, r4, lsl #5 @ 31 * incr | |
69 | lsl r4, r4, #1 | |
70 | asr r9, r8, #31 @ sum:high | |
71 | add r5, r3, r5, lsl #1 @ samples2 | |
72 | add r6, r1, #4*32 @ w2 | |
73 | str r4, [sp, #40] | |
74 | ||
75 | sum8 r8, r9, r1, r0, r10, r11, r12, lr | |
76 | sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32 | |
77 | round r10, r8, r9 | |
78 | strh_post r10, r3, r4 | |
79 | ||
80 | mov lr, #15 | |
81 | 1: | |
82 | ldr r12, [r0, #4]! | |
83 | ldr r11, [r6, #-4]! | |
84 | ldr r10, [r1, #4]! | |
85 | .irpc i, 0246 | |
86 | .if \i | |
87 | ldr r11, [r6, #4*64*\i] | |
88 | ldr r10, [r1, #4*64*\i] | |
89 | .endif | |
90 | rsb r11, r11, #0 | |
91 | smlal r8, r9, r10, r12 | |
92 | ldr r10, [r0, #4*64*(\i+1)] | |
93 | .ifeq \i | |
94 | smull r4, r7, r11, r12 | |
95 | .else | |
96 | smlal r4, r7, r11, r12 | |
97 | .endif | |
98 | ldr r11, [r6, #4*64*(\i+1)] | |
99 | ldr r12, [r1, #4*64*(\i+1)] | |
100 | rsb r11, r11, #0 | |
101 | smlal r8, r9, r12, r10 | |
102 | .iflt \i-6 | |
103 | ldr r12, [r0, #4*64*(\i+2)] | |
104 | .else | |
105 | ldr r12, [r2, #-4]! | |
106 | .endif | |
107 | smlal r4, r7, r11, r10 | |
108 | .endr | |
109 | .irpc i, 0246 | |
110 | ldr r10, [r1, #4*64*\i+4*32] | |
111 | rsb r12, r12, #0 | |
112 | ldr r11, [r6, #4*64*\i+4*32] | |
113 | smlal r8, r9, r10, r12 | |
114 | ldr r10, [r2, #4*64*(\i+1)] | |
115 | smlal r4, r7, r11, r12 | |
116 | ldr r12, [r1, #4*64*(\i+1)+4*32] | |
117 | rsb r10, r10, #0 | |
118 | ldr r11, [r6, #4*64*(\i+1)+4*32] | |
119 | smlal r8, r9, r12, r10 | |
120 | .iflt \i-6 | |
121 | ldr r12, [r2, #4*64*(\i+2)] | |
122 | .else | |
123 | ldr r12, [sp, #40] | |
124 | .endif | |
125 | smlal r4, r7, r11, r10 | |
126 | .endr | |
127 | round r10, r8, r9 | |
128 | adds r8, r8, r4 | |
129 | adc r9, r9, r7 | |
130 | strh_post r10, r3, r12 | |
131 | round r11, r8, r9 | |
132 | subs lr, lr, #1 | |
133 | strh_dpost r11, r5, r12 | |
134 | bgt 1b | |
135 | ||
136 | sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33 | |
137 | pop {r4} | |
138 | round r10, r8, r9 | |
139 | str r8, [r4] | |
140 | strh r10, [r3] | |
141 | ||
142 | pop {r4-r11,pc} | |
143 | endfunc |