Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | function ff_synth_filter_float_neon, export=1 | |
24 | push {r3-r11,lr} | |
25 | ||
26 | ldr r4, [r2] @ synth_buf_offset | |
27 | add r1, r1, r4, lsl #2 @ synth_buf | |
28 | sub r12, r4, #32 | |
29 | bfc r12, #9, #23 | |
30 | bic r4, r4, #63 | |
31 | str r12, [r2] | |
32 | ||
33 | ldr r2, [sp, #12*4] @ in | |
34 | mov r9, r1 @ synth_buf | |
35 | ||
36 | VFP vpush {d0} | |
37 | bl X(ff_imdct_half_neon) | |
38 | VFP vpop {d0} | |
39 | pop {r3} | |
40 | ||
41 | ldr r5, [sp, #9*4] @ window | |
42 | ldr r2, [sp, #10*4] @ out | |
43 | NOVFP vldr s0, [sp, #12*4] @ scale | |
44 | add r8, r9, #12*4 | |
45 | ||
46 | mov lr, #64*4 | |
47 | mov r1, #4 | |
48 | 1: | |
49 | add r10, r9, #16*4 @ synth_buf | |
50 | add r11, r8, #16*4 | |
51 | add r0, r5, #16*4 @ window | |
52 | add r6, r5, #32*4 | |
53 | add r7, r5, #48*4 | |
54 | ||
55 | vld1.32 {q10}, [r3,:128] @ a | |
56 | add r3, r3, #16*4 | |
57 | vld1.32 {q1}, [r3,:128] @ b | |
58 | vmov.f32 q2, #0.0 @ c | |
59 | vmov.f32 q3, #0.0 @ d | |
60 | ||
61 | mov r12, #512 | |
62 | 2: | |
63 | vld1.32 {q9}, [r8, :128], lr | |
64 | vrev64.32 q9, q9 | |
65 | vld1.32 {q8}, [r5, :128], lr | |
66 | vmls.f32 d20, d16, d19 | |
67 | vld1.32 {q11}, [r0, :128], lr | |
68 | vmls.f32 d21, d17, d18 | |
69 | vld1.32 {q12}, [r9, :128], lr | |
70 | vmla.f32 d2, d22, d24 | |
71 | vld1.32 {q8}, [r6, :128], lr | |
72 | vmla.f32 d3, d23, d25 | |
73 | vld1.32 {q9}, [r10,:128], lr | |
74 | vmla.f32 d4, d16, d18 | |
75 | vld1.32 {q12}, [r11,:128], lr | |
76 | vmla.f32 d5, d17, d19 | |
77 | vrev64.32 q12, q12 | |
78 | vld1.32 {q11}, [r7, :128], lr | |
79 | vmla.f32 d6, d22, d25 | |
80 | vmla.f32 d7, d23, d24 | |
81 | subs r12, r12, #64 | |
82 | beq 3f | |
83 | cmp r12, r4 | |
84 | bne 2b | |
85 | sub r8, r8, #512*4 | |
86 | sub r9, r9, #512*4 | |
87 | sub r10, r10, #512*4 | |
88 | sub r11, r11, #512*4 | |
89 | b 2b | |
90 | 3: | |
91 | vmul.f32 q8, q10, d0[0] | |
92 | vmul.f32 q9, q1, d0[0] | |
93 | vst1.32 {q3}, [r3,:128] | |
94 | sub r3, r3, #16*4 | |
95 | vst1.32 {q2}, [r3,:128] | |
96 | vst1.32 {q8}, [r2,:128] | |
97 | add r2, r2, #16*4 | |
98 | vst1.32 {q9}, [r2,:128] | |
99 | ||
100 | subs r1, r1, #1 | |
101 | it eq | |
102 | popeq {r4-r11,pc} | |
103 | ||
104 | cmp r4, #0 | |
105 | itt eq | |
106 | subeq r8, r8, #512*4 | |
107 | subeq r9, r9, #512*4 | |
108 | sub r5, r5, #512*4 | |
109 | sub r2, r2, #12*4 @ out | |
110 | add r3, r3, #4*4 @ synth_buf2 | |
111 | add r5, r5, #4*4 @ window | |
112 | add r9, r9, #4*4 @ synth_buf | |
113 | sub r8, r8, #4*4 @ synth_buf | |
114 | b 1b | |
115 | endfunc |