Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / hpeldsp_neon.S
CommitLineData
2ba45a60
DM
1/*
2 * ARM NEON optimised DSP functions
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24.macro pixels16 rnd=1, avg=0
25 .if \avg
26 mov r12, r0
27 .endif
281: vld1.8 {q0}, [r1], r2
29 vld1.8 {q1}, [r1], r2
30 vld1.8 {q2}, [r1], r2
31 pld [r1, r2, lsl #2]
32 vld1.8 {q3}, [r1], r2
33 pld [r1]
34 pld [r1, r2]
35 pld [r1, r2, lsl #1]
36 .if \avg
37 vld1.8 {q8}, [r12,:128], r2
38 vrhadd.u8 q0, q0, q8
39 vld1.8 {q9}, [r12,:128], r2
40 vrhadd.u8 q1, q1, q9
41 vld1.8 {q10}, [r12,:128], r2
42 vrhadd.u8 q2, q2, q10
43 vld1.8 {q11}, [r12,:128], r2
44 vrhadd.u8 q3, q3, q11
45 .endif
46 subs r3, r3, #4
47 vst1.64 {q0}, [r0,:128], r2
48 vst1.64 {q1}, [r0,:128], r2
49 vst1.64 {q2}, [r0,:128], r2
50 vst1.64 {q3}, [r0,:128], r2
51 bne 1b
52 bx lr
53.endm
54
55.macro pixels16_x2 rnd=1, avg=0
561: vld1.8 {d0-d2}, [r1], r2
57 vld1.8 {d4-d6}, [r1], r2
58 pld [r1]
59 pld [r1, r2]
60 subs r3, r3, #2
61 vext.8 q1, q0, q1, #1
62 avg q0, q0, q1
63 vext.8 q3, q2, q3, #1
64 avg q2, q2, q3
65 .if \avg
66 vld1.8 {q1}, [r0,:128], r2
67 vld1.8 {q3}, [r0,:128]
68 vrhadd.u8 q0, q0, q1
69 vrhadd.u8 q2, q2, q3
70 sub r0, r0, r2
71 .endif
72 vst1.8 {q0}, [r0,:128], r2
73 vst1.8 {q2}, [r0,:128], r2
74 bne 1b
75 bx lr
76.endm
77
78.macro pixels16_y2 rnd=1, avg=0
79 sub r3, r3, #2
80 vld1.8 {q0}, [r1], r2
81 vld1.8 {q1}, [r1], r2
821: subs r3, r3, #2
83 avg q2, q0, q1
84 vld1.8 {q0}, [r1], r2
85 avg q3, q0, q1
86 vld1.8 {q1}, [r1], r2
87 pld [r1]
88 pld [r1, r2]
89 .if \avg
90 vld1.8 {q8}, [r0,:128], r2
91 vld1.8 {q9}, [r0,:128]
92 vrhadd.u8 q2, q2, q8
93 vrhadd.u8 q3, q3, q9
94 sub r0, r0, r2
95 .endif
96 vst1.8 {q2}, [r0,:128], r2
97 vst1.8 {q3}, [r0,:128], r2
98 bne 1b
99
100 avg q2, q0, q1
101 vld1.8 {q0}, [r1], r2
102 avg q3, q0, q1
103 .if \avg
104 vld1.8 {q8}, [r0,:128], r2
105 vld1.8 {q9}, [r0,:128]
106 vrhadd.u8 q2, q2, q8
107 vrhadd.u8 q3, q3, q9
108 sub r0, r0, r2
109 .endif
110 vst1.8 {q2}, [r0,:128], r2
111 vst1.8 {q3}, [r0,:128], r2
112
113 bx lr
114.endm
115
116.macro pixels16_xy2 rnd=1, avg=0
117 sub r3, r3, #2
118 vld1.8 {d0-d2}, [r1], r2
119 vld1.8 {d4-d6}, [r1], r2
120NRND vmov.i16 q13, #1
121 pld [r1]
122 pld [r1, r2]
123 vext.8 q1, q0, q1, #1
124 vext.8 q3, q2, q3, #1
125 vaddl.u8 q8, d0, d2
126 vaddl.u8 q10, d1, d3
127 vaddl.u8 q9, d4, d6
128 vaddl.u8 q11, d5, d7
1291: subs r3, r3, #2
130 vld1.8 {d0-d2}, [r1], r2
131 vadd.u16 q12, q8, q9
132 pld [r1]
133NRND vadd.u16 q12, q12, q13
134 vext.8 q15, q0, q1, #1
135 vadd.u16 q1 , q10, q11
136 shrn d28, q12, #2
137NRND vadd.u16 q1, q1, q13
138 shrn d29, q1, #2
139 .if \avg
140 vld1.8 {q8}, [r0,:128]
141 vrhadd.u8 q14, q14, q8
142 .endif
143 vaddl.u8 q8, d0, d30
144 vld1.8 {d2-d4}, [r1], r2
145 vaddl.u8 q10, d1, d31
146 vst1.8 {q14}, [r0,:128], r2
147 vadd.u16 q12, q8, q9
148 pld [r1, r2]
149NRND vadd.u16 q12, q12, q13
150 vext.8 q2, q1, q2, #1
151 vadd.u16 q0, q10, q11
152 shrn d30, q12, #2
153NRND vadd.u16 q0, q0, q13
154 shrn d31, q0, #2
155 .if \avg
156 vld1.8 {q9}, [r0,:128]
157 vrhadd.u8 q15, q15, q9
158 .endif
159 vaddl.u8 q9, d2, d4
160 vaddl.u8 q11, d3, d5
161 vst1.8 {q15}, [r0,:128], r2
162 bgt 1b
163
164 vld1.8 {d0-d2}, [r1], r2
165 vadd.u16 q12, q8, q9
166NRND vadd.u16 q12, q12, q13
167 vext.8 q15, q0, q1, #1
168 vadd.u16 q1 , q10, q11
169 shrn d28, q12, #2
170NRND vadd.u16 q1, q1, q13
171 shrn d29, q1, #2
172 .if \avg
173 vld1.8 {q8}, [r0,:128]
174 vrhadd.u8 q14, q14, q8
175 .endif
176 vaddl.u8 q8, d0, d30
177 vaddl.u8 q10, d1, d31
178 vst1.8 {q14}, [r0,:128], r2
179 vadd.u16 q12, q8, q9
180NRND vadd.u16 q12, q12, q13
181 vadd.u16 q0, q10, q11
182 shrn d30, q12, #2
183NRND vadd.u16 q0, q0, q13
184 shrn d31, q0, #2
185 .if \avg
186 vld1.8 {q9}, [r0,:128]
187 vrhadd.u8 q15, q15, q9
188 .endif
189 vst1.8 {q15}, [r0,:128], r2
190
191 bx lr
192.endm
193
194.macro pixels8 rnd=1, avg=0
1951: vld1.8 {d0}, [r1], r2
196 vld1.8 {d1}, [r1], r2
197 vld1.8 {d2}, [r1], r2
198 pld [r1, r2, lsl #2]
199 vld1.8 {d3}, [r1], r2
200 pld [r1]
201 pld [r1, r2]
202 pld [r1, r2, lsl #1]
203 .if \avg
204 vld1.8 {d4}, [r0,:64], r2
205 vrhadd.u8 d0, d0, d4
206 vld1.8 {d5}, [r0,:64], r2
207 vrhadd.u8 d1, d1, d5
208 vld1.8 {d6}, [r0,:64], r2
209 vrhadd.u8 d2, d2, d6
210 vld1.8 {d7}, [r0,:64], r2
211 vrhadd.u8 d3, d3, d7
212 sub r0, r0, r2, lsl #2
213 .endif
214 subs r3, r3, #4
215 vst1.8 {d0}, [r0,:64], r2
216 vst1.8 {d1}, [r0,:64], r2
217 vst1.8 {d2}, [r0,:64], r2
218 vst1.8 {d3}, [r0,:64], r2
219 bne 1b
220 bx lr
221.endm
222
223.macro pixels8_x2 rnd=1, avg=0
2241: vld1.8 {q0}, [r1], r2
225 vext.8 d1, d0, d1, #1
226 vld1.8 {q1}, [r1], r2
227 vext.8 d3, d2, d3, #1
228 pld [r1]
229 pld [r1, r2]
230 subs r3, r3, #2
231 vswp d1, d2
232 avg q0, q0, q1
233 .if \avg
234 vld1.8 {d4}, [r0,:64], r2
235 vld1.8 {d5}, [r0,:64]
236 vrhadd.u8 q0, q0, q2
237 sub r0, r0, r2
238 .endif
239 vst1.8 {d0}, [r0,:64], r2
240 vst1.8 {d1}, [r0,:64], r2
241 bne 1b
242 bx lr
243.endm
244
245.macro pixels8_y2 rnd=1, avg=0
246 sub r3, r3, #2
247 vld1.8 {d0}, [r1], r2
248 vld1.8 {d1}, [r1], r2
2491: subs r3, r3, #2
250 avg d4, d0, d1
251 vld1.8 {d0}, [r1], r2
252 avg d5, d0, d1
253 vld1.8 {d1}, [r1], r2
254 pld [r1]
255 pld [r1, r2]
256 .if \avg
257 vld1.8 {d2}, [r0,:64], r2
258 vld1.8 {d3}, [r0,:64]
259 vrhadd.u8 q2, q2, q1
260 sub r0, r0, r2
261 .endif
262 vst1.8 {d4}, [r0,:64], r2
263 vst1.8 {d5}, [r0,:64], r2
264 bne 1b
265
266 avg d4, d0, d1
267 vld1.8 {d0}, [r1], r2
268 avg d5, d0, d1
269 .if \avg
270 vld1.8 {d2}, [r0,:64], r2
271 vld1.8 {d3}, [r0,:64]
272 vrhadd.u8 q2, q2, q1
273 sub r0, r0, r2
274 .endif
275 vst1.8 {d4}, [r0,:64], r2
276 vst1.8 {d5}, [r0,:64], r2
277
278 bx lr
279.endm
280
281.macro pixels8_xy2 rnd=1, avg=0
282 sub r3, r3, #2
283 vld1.8 {q0}, [r1], r2
284 vld1.8 {q1}, [r1], r2
285NRND vmov.i16 q11, #1
286 pld [r1]
287 pld [r1, r2]
288 vext.8 d4, d0, d1, #1
289 vext.8 d6, d2, d3, #1
290 vaddl.u8 q8, d0, d4
291 vaddl.u8 q9, d2, d6
2921: subs r3, r3, #2
293 vld1.8 {q0}, [r1], r2
294 pld [r1]
295 vadd.u16 q10, q8, q9
296 vext.8 d4, d0, d1, #1
297NRND vadd.u16 q10, q10, q11
298 vaddl.u8 q8, d0, d4
299 shrn d5, q10, #2
300 vld1.8 {q1}, [r1], r2
301 vadd.u16 q10, q8, q9
302 pld [r1, r2]
303 .if \avg
304 vld1.8 {d7}, [r0,:64]
305 vrhadd.u8 d5, d5, d7
306 .endif
307NRND vadd.u16 q10, q10, q11
308 vst1.8 {d5}, [r0,:64], r2
309 shrn d7, q10, #2
310 .if \avg
311 vld1.8 {d5}, [r0,:64]
312 vrhadd.u8 d7, d7, d5
313 .endif
314 vext.8 d6, d2, d3, #1
315 vaddl.u8 q9, d2, d6
316 vst1.8 {d7}, [r0,:64], r2
317 bgt 1b
318
319 vld1.8 {q0}, [r1], r2
320 vadd.u16 q10, q8, q9
321 vext.8 d4, d0, d1, #1
322NRND vadd.u16 q10, q10, q11
323 vaddl.u8 q8, d0, d4
324 shrn d5, q10, #2
325 vadd.u16 q10, q8, q9
326 .if \avg
327 vld1.8 {d7}, [r0,:64]
328 vrhadd.u8 d5, d5, d7
329 .endif
330NRND vadd.u16 q10, q10, q11
331 vst1.8 {d5}, [r0,:64], r2
332 shrn d7, q10, #2
333 .if \avg
334 vld1.8 {d5}, [r0,:64]
335 vrhadd.u8 d7, d7, d5
336 .endif
337 vst1.8 {d7}, [r0,:64], r2
338
339 bx lr
340.endm
341
342.macro pixfunc pfx, name, suf, rnd=1, avg=0
343 .if \rnd
344 .macro avg rd, rn, rm
345 vrhadd.u8 \rd, \rn, \rm
346 .endm
347 .macro shrn rd, rn, rm
348 vrshrn.u16 \rd, \rn, \rm
349 .endm
350 .macro NRND insn:vararg
351 .endm
352 .else
353 .macro avg rd, rn, rm
354 vhadd.u8 \rd, \rn, \rm
355 .endm
356 .macro shrn rd, rn, rm
357 vshrn.u16 \rd, \rn, \rm
358 .endm
359 .macro NRND insn:vararg
360 \insn
361 .endm
362 .endif
363function ff_\pfx\name\suf\()_neon, export=1
364 \name \rnd, \avg
365endfunc
366 .purgem avg
367 .purgem shrn
368 .purgem NRND
369.endm
370
371.macro pixfunc2 pfx, name, avg=0
372 pixfunc \pfx, \name, rnd=1, avg=\avg
373 pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
374.endm
375
376function ff_put_h264_qpel16_mc00_neon, export=1
377 mov r3, #16
378endfunc
379
380 pixfunc put_, pixels16, avg=0
381 pixfunc2 put_, pixels16_x2, avg=0
382 pixfunc2 put_, pixels16_y2, avg=0
383 pixfunc2 put_, pixels16_xy2, avg=0
384
385function ff_avg_h264_qpel16_mc00_neon, export=1
386 mov r3, #16
387endfunc
388
389 pixfunc avg_, pixels16, avg=1
390 pixfunc2 avg_, pixels16_x2, avg=1
391 pixfunc2 avg_, pixels16_y2, avg=1
392 pixfunc2 avg_, pixels16_xy2, avg=1
393
394function ff_put_h264_qpel8_mc00_neon, export=1
395 mov r3, #8
396endfunc
397
398 pixfunc put_, pixels8, avg=0
399 pixfunc2 put_, pixels8_x2, avg=0
400 pixfunc2 put_, pixels8_y2, avg=0
401 pixfunc2 put_, pixels8_xy2, avg=0
402
403function ff_avg_h264_qpel8_mc00_neon, export=1
404 mov r3, #8
405endfunc
406
407 pixfunc avg_, pixels8, avg=1
408 pixfunc avg_, pixels8_x2, avg=1
409 pixfunc avg_, pixels8_y2, avg=1
410 pixfunc avg_, pixels8_xy2, avg=1