Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | function ff_ac3_max_msb_abs_int16_neon, export=1 | |
24 | vmov.i16 q0, #0 | |
25 | vmov.i16 q2, #0 | |
26 | 1: vld1.16 {q1}, [r0,:128]! | |
27 | vabs.s16 q1, q1 | |
28 | vld1.16 {q3}, [r0,:128]! | |
29 | vabs.s16 q3, q3 | |
30 | vorr q0, q0, q1 | |
31 | vorr q2, q2, q3 | |
32 | subs r1, r1, #16 | |
33 | bgt 1b | |
34 | vorr q0, q0, q2 | |
35 | vorr d0, d0, d1 | |
36 | vpmax.u16 d0, d0, d0 | |
37 | vpmax.u16 d0, d0, d0 | |
38 | vmov.u16 r0, d0[0] | |
39 | bx lr | |
40 | endfunc | |
41 | ||
42 | function ff_ac3_exponent_min_neon, export=1 | |
43 | cmp r1, #0 | |
44 | it eq | |
45 | bxeq lr | |
46 | push {lr} | |
47 | mov r12, #256 | |
48 | 1: | |
49 | vld1.8 {q0}, [r0,:128] | |
50 | mov lr, r1 | |
51 | add r3, r0, #256 | |
52 | 2: vld1.8 {q1}, [r3,:128], r12 | |
53 | subs lr, lr, #1 | |
54 | vmin.u8 q0, q0, q1 | |
55 | bgt 2b | |
56 | subs r2, r2, #16 | |
57 | vst1.8 {q0}, [r0,:128]! | |
58 | bgt 1b | |
59 | pop {pc} | |
60 | endfunc | |
61 | ||
62 | function ff_ac3_lshift_int16_neon, export=1 | |
63 | vdup.16 q0, r2 | |
64 | 1: vld1.16 {q1}, [r0,:128] | |
65 | vshl.s16 q1, q1, q0 | |
66 | vst1.16 {q1}, [r0,:128]! | |
67 | subs r1, r1, #8 | |
68 | bgt 1b | |
69 | bx lr | |
70 | endfunc | |
71 | ||
72 | function ff_ac3_rshift_int32_neon, export=1 | |
73 | rsb r2, r2, #0 | |
74 | vdup.32 q0, r2 | |
75 | 1: vld1.32 {q1}, [r0,:128] | |
76 | vshl.s32 q1, q1, q0 | |
77 | vst1.32 {q1}, [r0,:128]! | |
78 | subs r1, r1, #4 | |
79 | bgt 1b | |
80 | bx lr | |
81 | endfunc | |
82 | ||
83 | function ff_float_to_fixed24_neon, export=1 | |
84 | 1: vld1.32 {q0-q1}, [r1,:128]! | |
85 | vcvt.s32.f32 q0, q0, #24 | |
86 | vld1.32 {q2-q3}, [r1,:128]! | |
87 | vcvt.s32.f32 q1, q1, #24 | |
88 | vcvt.s32.f32 q2, q2, #24 | |
89 | vst1.32 {q0-q1}, [r0,:128]! | |
90 | vcvt.s32.f32 q3, q3, #24 | |
91 | vst1.32 {q2-q3}, [r0,:128]! | |
92 | subs r2, r2, #16 | |
93 | bgt 1b | |
94 | bx lr | |
95 | endfunc | |
96 | ||
97 | function ff_ac3_extract_exponents_neon, export=1 | |
98 | vmov.i32 q15, #8 | |
99 | 1: | |
100 | vld1.32 {q0}, [r1,:128]! | |
101 | vabs.s32 q1, q0 | |
102 | vclz.i32 q3, q1 | |
103 | vsub.i32 q3, q3, q15 | |
104 | vmovn.i32 d6, q3 | |
105 | vmovn.i16 d6, q3 | |
106 | vst1.32 {d6[0]}, [r0,:32]! | |
107 | subs r2, r2, #4 | |
108 | bgt 1b | |
109 | bx lr | |
110 | endfunc | |
111 | ||
112 | function ff_apply_window_int16_neon, export=1 | |
113 | push {r4,lr} | |
114 | add r4, r1, r3, lsl #1 | |
115 | add lr, r0, r3, lsl #1 | |
116 | sub r4, r4, #16 | |
117 | sub lr, lr, #16 | |
118 | mov r12, #-16 | |
119 | 1: | |
120 | vld1.16 {q0}, [r1,:128]! | |
121 | vld1.16 {q2}, [r2,:128]! | |
122 | vld1.16 {q1}, [r4,:128], r12 | |
123 | vrev64.16 q3, q2 | |
124 | vqrdmulh.s16 q0, q0, q2 | |
125 | vqrdmulh.s16 d2, d2, d7 | |
126 | vqrdmulh.s16 d3, d3, d6 | |
127 | vst1.16 {q0}, [r0,:128]! | |
128 | vst1.16 {q1}, [lr,:128], r12 | |
129 | subs r3, r3, #16 | |
130 | bgt 1b | |
131 | ||
132 | pop {r4,pc} | |
133 | endfunc | |
134 | ||
135 | function ff_ac3_sum_square_butterfly_int32_neon, export=1 | |
136 | vmov.i64 q0, #0 | |
137 | vmov.i64 q1, #0 | |
138 | vmov.i64 q2, #0 | |
139 | vmov.i64 q3, #0 | |
140 | 1: | |
141 | vld1.32 {d16}, [r1]! | |
142 | vld1.32 {d17}, [r2]! | |
143 | vadd.s32 d18, d16, d17 | |
144 | vsub.s32 d19, d16, d17 | |
145 | vmlal.s32 q0, d16, d16 | |
146 | vmlal.s32 q1, d17, d17 | |
147 | vmlal.s32 q2, d18, d18 | |
148 | vmlal.s32 q3, d19, d19 | |
149 | subs r3, r3, #2 | |
150 | bgt 1b | |
151 | vadd.s64 d0, d0, d1 | |
152 | vadd.s64 d1, d2, d3 | |
153 | vadd.s64 d2, d4, d5 | |
154 | vadd.s64 d3, d6, d7 | |
155 | vst1.64 {q0-q1}, [r0] | |
156 | bx lr | |
157 | endfunc | |
158 | ||
159 | function ff_ac3_sum_square_butterfly_float_neon, export=1 | |
160 | vmov.f32 q0, #0.0 | |
161 | vmov.f32 q1, #0.0 | |
162 | 1: | |
163 | vld1.32 {d16}, [r1]! | |
164 | vld1.32 {d17}, [r2]! | |
165 | vadd.f32 d18, d16, d17 | |
166 | vsub.f32 d19, d16, d17 | |
167 | vmla.f32 d0, d16, d16 | |
168 | vmla.f32 d1, d17, d17 | |
169 | vmla.f32 d2, d18, d18 | |
170 | vmla.f32 d3, d19, d19 | |
171 | subs r3, r3, #2 | |
172 | bgt 1b | |
173 | vpadd.f32 d0, d0, d1 | |
174 | vpadd.f32 d1, d2, d3 | |
175 | vst1.32 {q0}, [r0] | |
176 | bx lr | |
177 | endfunc |