Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / ac3dsp_neon.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23function ff_ac3_max_msb_abs_int16_neon, export=1
24 vmov.i16 q0, #0
25 vmov.i16 q2, #0
261: vld1.16 {q1}, [r0,:128]!
27 vabs.s16 q1, q1
28 vld1.16 {q3}, [r0,:128]!
29 vabs.s16 q3, q3
30 vorr q0, q0, q1
31 vorr q2, q2, q3
32 subs r1, r1, #16
33 bgt 1b
34 vorr q0, q0, q2
35 vorr d0, d0, d1
36 vpmax.u16 d0, d0, d0
37 vpmax.u16 d0, d0, d0
38 vmov.u16 r0, d0[0]
39 bx lr
40endfunc
41
42function ff_ac3_exponent_min_neon, export=1
43 cmp r1, #0
44 it eq
45 bxeq lr
46 push {lr}
47 mov r12, #256
481:
49 vld1.8 {q0}, [r0,:128]
50 mov lr, r1
51 add r3, r0, #256
522: vld1.8 {q1}, [r3,:128], r12
53 subs lr, lr, #1
54 vmin.u8 q0, q0, q1
55 bgt 2b
56 subs r2, r2, #16
57 vst1.8 {q0}, [r0,:128]!
58 bgt 1b
59 pop {pc}
60endfunc
61
62function ff_ac3_lshift_int16_neon, export=1
63 vdup.16 q0, r2
641: vld1.16 {q1}, [r0,:128]
65 vshl.s16 q1, q1, q0
66 vst1.16 {q1}, [r0,:128]!
67 subs r1, r1, #8
68 bgt 1b
69 bx lr
70endfunc
71
72function ff_ac3_rshift_int32_neon, export=1
73 rsb r2, r2, #0
74 vdup.32 q0, r2
751: vld1.32 {q1}, [r0,:128]
76 vshl.s32 q1, q1, q0
77 vst1.32 {q1}, [r0,:128]!
78 subs r1, r1, #4
79 bgt 1b
80 bx lr
81endfunc
82
83function ff_float_to_fixed24_neon, export=1
841: vld1.32 {q0-q1}, [r1,:128]!
85 vcvt.s32.f32 q0, q0, #24
86 vld1.32 {q2-q3}, [r1,:128]!
87 vcvt.s32.f32 q1, q1, #24
88 vcvt.s32.f32 q2, q2, #24
89 vst1.32 {q0-q1}, [r0,:128]!
90 vcvt.s32.f32 q3, q3, #24
91 vst1.32 {q2-q3}, [r0,:128]!
92 subs r2, r2, #16
93 bgt 1b
94 bx lr
95endfunc
96
97function ff_ac3_extract_exponents_neon, export=1
98 vmov.i32 q15, #8
991:
100 vld1.32 {q0}, [r1,:128]!
101 vabs.s32 q1, q0
102 vclz.i32 q3, q1
103 vsub.i32 q3, q3, q15
104 vmovn.i32 d6, q3
105 vmovn.i16 d6, q3
106 vst1.32 {d6[0]}, [r0,:32]!
107 subs r2, r2, #4
108 bgt 1b
109 bx lr
110endfunc
111
112function ff_apply_window_int16_neon, export=1
113 push {r4,lr}
114 add r4, r1, r3, lsl #1
115 add lr, r0, r3, lsl #1
116 sub r4, r4, #16
117 sub lr, lr, #16
118 mov r12, #-16
1191:
120 vld1.16 {q0}, [r1,:128]!
121 vld1.16 {q2}, [r2,:128]!
122 vld1.16 {q1}, [r4,:128], r12
123 vrev64.16 q3, q2
124 vqrdmulh.s16 q0, q0, q2
125 vqrdmulh.s16 d2, d2, d7
126 vqrdmulh.s16 d3, d3, d6
127 vst1.16 {q0}, [r0,:128]!
128 vst1.16 {q1}, [lr,:128], r12
129 subs r3, r3, #16
130 bgt 1b
131
132 pop {r4,pc}
133endfunc
134
135function ff_ac3_sum_square_butterfly_int32_neon, export=1
136 vmov.i64 q0, #0
137 vmov.i64 q1, #0
138 vmov.i64 q2, #0
139 vmov.i64 q3, #0
1401:
141 vld1.32 {d16}, [r1]!
142 vld1.32 {d17}, [r2]!
143 vadd.s32 d18, d16, d17
144 vsub.s32 d19, d16, d17
145 vmlal.s32 q0, d16, d16
146 vmlal.s32 q1, d17, d17
147 vmlal.s32 q2, d18, d18
148 vmlal.s32 q3, d19, d19
149 subs r3, r3, #2
150 bgt 1b
151 vadd.s64 d0, d0, d1
152 vadd.s64 d1, d2, d3
153 vadd.s64 d2, d4, d5
154 vadd.s64 d3, d6, d7
155 vst1.64 {q0-q1}, [r0]
156 bx lr
157endfunc
158
159function ff_ac3_sum_square_butterfly_float_neon, export=1
160 vmov.f32 q0, #0.0
161 vmov.f32 q1, #0.0
1621:
163 vld1.32 {d16}, [r1]!
164 vld1.32 {d17}, [r2]!
165 vadd.f32 d18, d16, d17
166 vsub.f32 d19, d16, d17
167 vmla.f32 d0, d16, d16
168 vmla.f32 d1, d17, d17
169 vmla.f32 d2, d18, d18
170 vmla.f32 d3, d19, d19
171 subs r3, r3, #2
172 bgt 1b
173 vpadd.f32 d0, d0, d1
174 vpadd.f32 d1, d2, d3
175 vst1.32 {q0}, [r0]
176 bx lr
177endfunc