Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / mlpdsp_armv6.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2014 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24.macro loadregoffsh2 group, index, base, offgroup, offindex
25 .altmacro
26 loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
27 .noaltmacro
28.endm
29
30.macro loadregoffsh2_ group, index, base, offgroup, offindex
31 ldr \group\index, [\base, \offgroup\offindex, lsl #2]
32.endm
33
34.macro eorlslreg check, data, group, index
35 .altmacro
36 eorlslreg_ \check, \data, \group, %(\index)
37 .noaltmacro
38.endm
39
40.macro eorlslreg_ check, data, group, index
41 eor \check, \check, \data, lsl \group\index
42.endm
43
44.macro decr_modulo var, by, modulus
45 .set \var, \var - \by
46 .if \var == 0
47 .set \var, \modulus
48 .endif
49.endm
50
51 .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0
52 .if \size == 2
53 ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4
54 .else // size == 4
55 .if IDX1 > 4 || \channels==8
56 ldm IN!, {\r0, \r1, \r2, \r3}
57 .else
58 ldm IN, {\r0, \r1, \r2, \r3}
59 .if !\pointer_dead
60 add IN, IN, #(4 + 8 - \channels) * 4
61 .endif
62 .endif
63 .endif
64 decr_modulo IDX1, \size, \channels
65 .endm
66
67 .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0
68 .if \size == 2
69 .if IDX1 > 2
70 ldm IN!, {\r2, \r3}
71 .else
72//A .ifc \r2, ip
73//A .if \pointer_dead
74//A ldm IN, {\r2, \r3}
75//A .else
76//A ldr \r2, [IN], #4
77//A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4
78//A .endif
79//A .else
80 ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4
81//A .endif
82 .endif
83 .endif
84 decr_modulo IDX1, \size, \channels
85 .endm
86
87.macro implement_pack inorder, channels, shift
88.if \inorder
89.ifc \shift, mixed
90
91CHECK .req a1
92COUNT .req a2
93IN .req a3
94OUT .req a4
95DAT0 .req v1
96DAT1 .req v2
97DAT2 .req v3
98DAT3 .req v4
99SHIFT0 .req v5
100SHIFT1 .req v6
101SHIFT2 .req sl
102SHIFT3 .req fp
103SHIFT4 .req ip
104SHIFT5 .req lr
105
106 .macro output4words
107 .set SIZE_GROUP1, IDX1
108 .if SIZE_GROUP1 > 4
109 .set SIZE_GROUP1, 4
110 .endif
111 .set SIZE_GROUP2, 4 - SIZE_GROUP1
112 load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
113 load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
114 .if \channels == 2
115 lsl DAT0, SHIFT0
116 lsl DAT1, SHIFT1
117 lsl DAT2, SHIFT0
118 lsl DAT3, SHIFT1
119 .elseif \channels == 6
120 .if IDX2 == 6
121 lsl DAT0, SHIFT0
122 lsl DAT1, SHIFT1
123 lsl DAT2, SHIFT2
124 lsl DAT3, SHIFT3
125 .elseif IDX2 == 2
126 lsl DAT0, SHIFT4
127 lsl DAT1, SHIFT5
128 lsl DAT2, SHIFT0
129 lsl DAT3, SHIFT1
130 .else // IDX2 == 4
131 lsl DAT0, SHIFT2
132 lsl DAT1, SHIFT3
133 lsl DAT2, SHIFT4
134 lsl DAT3, SHIFT5
135 .endif
136 .elseif \channels == 8
137 .if IDX2 == 8
138 uxtb SHIFT0, SHIFT4, ror #0
139 uxtb SHIFT1, SHIFT4, ror #8
140 uxtb SHIFT2, SHIFT4, ror #16
141 uxtb SHIFT3, SHIFT4, ror #24
142 .else
143 uxtb SHIFT0, SHIFT5, ror #0
144 uxtb SHIFT1, SHIFT5, ror #8
145 uxtb SHIFT2, SHIFT5, ror #16
146 uxtb SHIFT3, SHIFT5, ror #24
147 .endif
148 lsl DAT0, SHIFT0
149 lsl DAT1, SHIFT1
150 lsl DAT2, SHIFT2
151 lsl DAT3, SHIFT3
152 .endif
153 eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
154 eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
155 decr_modulo IDX2, 2, \channels
156 eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
157 eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
158 decr_modulo IDX2, 2, \channels
159 stm OUT!, {DAT0 - DAT3}
160 .endm
161
162 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
163 .if (WORDS_PER_LOOP % 2) == 0
164 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
165 .endif
166 .if (WORDS_PER_LOOP % 2) == 0
167 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
168 .endif
169 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
170 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
171
172function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
173 .if SAMPLES_PER_LOOP > 1
174 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
175 it ne
176 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
177 .endif
178 teq COUNT, #0
179 it eq
180 bxeq lr
181 push {v1-v6,sl,fp,lr}
182 ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack
183 ldr SHIFT1, =0x08080808
184 ldr SHIFT4, [SHIFT0]
185 .if \channels == 2
186 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
187 uxtb SHIFT0, SHIFT4, ror #0
188 uxtb SHIFT1, SHIFT4, ror #8
189 .else
190 ldr SHIFT5, [SHIFT0, #4]
191 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
192 uadd8 SHIFT5, SHIFT5, SHIFT1
193 .if \channels == 6
194 uxtb SHIFT0, SHIFT4, ror #0
195 uxtb SHIFT1, SHIFT4, ror #8
196 uxtb SHIFT2, SHIFT4, ror #16
197 uxtb SHIFT3, SHIFT4, ror #24
198 uxtb SHIFT4, SHIFT5, ror #0
199 uxtb SHIFT5, SHIFT5, ror #8
200 .endif
201 .endif
202 .set IDX1, \channels
203 .set IDX2, \channels
2040:
205 .rept WORDS_PER_LOOP / 4
206 output4words
207 .endr
208 subs COUNT, COUNT, #SAMPLES_PER_LOOP
209 bne 0b
210 pop {v1-v6,sl,fp,pc}
211 .ltorg
212endfunc
213 .purgem output4words
214
215 .unreq CHECK
216 .unreq COUNT
217 .unreq IN
218 .unreq OUT
219 .unreq DAT0
220 .unreq DAT1
221 .unreq DAT2
222 .unreq DAT3
223 .unreq SHIFT0
224 .unreq SHIFT1
225 .unreq SHIFT2
226 .unreq SHIFT3
227 .unreq SHIFT4
228 .unreq SHIFT5
229
230.else // not mixed
231
232CHECK .req a1
233COUNT .req a2
234IN .req a3
235OUT .req a4
236DAT0 .req v1
237DAT1 .req v2
238DAT2 .req v3
239DAT3 .req v4
240DAT4 .req v5
241DAT5 .req v6
242DAT6 .req sl // use these rather than the otherwise unused
243DAT7 .req fp // ip and lr so that we can load them usinf LDRD
244
245 .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
246 .if \head
247 .set SIZE_GROUP1, IDX1
248 .if SIZE_GROUP1 > 4
249 .set SIZE_GROUP1, 4
250 .endif
251 .set SIZE_GROUP2, 4 - SIZE_GROUP1
252 load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
253 .endif
254 .if \tail
255 eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
256 eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
257 decr_modulo IDX2, 2, \channels
258 .endif
259 .if \head
260 load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
261 .endif
262 .if \tail
263 eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
264 eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
265 decr_modulo IDX2, 2, \channels
266 stm OUT!, {\r4, \r5, \r6, \r7}
267 .endif
268 .if \head
269 lsl \r0, #8 + \shift
270 lsl \r1, #8 + \shift
271 lsl \r2, #8 + \shift
272 lsl \r3, #8 + \shift
273 .endif
274 .endm
275
276 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8)
277 .if (WORDS_PER_LOOP % 2) == 0
278 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
279 .endif
280 .if (WORDS_PER_LOOP % 2) == 0
281 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
282 .endif
283 .if (WORDS_PER_LOOP % 2) == 0
284 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
285 .endif
286 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
287 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
288
289function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
290 .if SAMPLES_PER_LOOP > 1
291 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
292 it ne
293 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
294 .endif
295 subs COUNT, COUNT, #SAMPLES_PER_LOOP
296 it lo
297 bxlo lr
298 push {v1-v6,sl,fp,lr}
299 .set IDX1, \channels
300 .set IDX2, \channels
301 output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
3020: beq 1f
303 .rept WORDS_PER_LOOP / 8
304 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
305 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
306 .endr
307 subs COUNT, COUNT, #SAMPLES_PER_LOOP
308 bne 0b
3091:
310 .rept WORDS_PER_LOOP / 8 - 1
311 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
312 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
313 .endr
314 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
315 output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
316 pop {v1-v6,sl,fp,pc}
317endfunc
318 .purgem output4words
319
320 .unreq CHECK
321 .unreq COUNT
322 .unreq IN
323 .unreq OUT
324 .unreq DAT0
325 .unreq DAT1
326 .unreq DAT2
327 .unreq DAT3
328 .unreq DAT4
329 .unreq DAT5
330 .unreq DAT6
331 .unreq DAT7
332
333.endif // mixed
334.else // not inorder
335.ifc \shift, mixed
336
337// This case not currently handled
338
339.else // not mixed
340
341#if !CONFIG_THUMB
342
343CHECK .req a1
344COUNT .req a2
345IN .req a3
346OUT .req a4
347DAT0 .req v1
348DAT1 .req v2
349DAT2 .req v3
350DAT3 .req v4
351CHAN0 .req v5
352CHAN1 .req v6
353CHAN2 .req sl
354CHAN3 .req fp
355CHAN4 .req ip
356CHAN5 .req lr
357
358 .macro output4words
359 .if \channels == 8
360 .if IDX1 == 8
361 uxtb CHAN0, CHAN4, ror #0
362 uxtb CHAN1, CHAN4, ror #8
363 uxtb CHAN2, CHAN4, ror #16
364 uxtb CHAN3, CHAN4, ror #24
365 .else
366 uxtb CHAN0, CHAN5, ror #0
367 uxtb CHAN1, CHAN5, ror #8
368 uxtb CHAN2, CHAN5, ror #16
369 uxtb CHAN3, CHAN5, ror #24
370 .endif
371 ldr DAT0, [IN, CHAN0, lsl #2]
372 ldr DAT1, [IN, CHAN1, lsl #2]
373 ldr DAT2, [IN, CHAN2, lsl #2]
374 ldr DAT3, [IN, CHAN3, lsl #2]
375 .if IDX1 == 4
376 add IN, IN, #8*4
377 .endif
378 decr_modulo IDX1, 4, \channels
379 .else
380 .set SIZE_GROUP1, IDX1
381 .if SIZE_GROUP1 > 4
382 .set SIZE_GROUP1, 4
383 .endif
384 .set SIZE_GROUP2, 4 - SIZE_GROUP1
385 .if SIZE_GROUP1 == 2
386 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
387 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
388 add IN, IN, #8*4
389 .else // SIZE_GROUP1 == 4
390 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
391 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
392 loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
393 loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
394 .if IDX1 == 4
395 add IN, IN, #8*4
396 .endif
397 .endif
398 decr_modulo IDX1, SIZE_GROUP1, \channels
399 .if SIZE_GROUP2 == 2
400 loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
401 loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
402 .if IDX1 == 2
403 add IN, IN, #8*4
404 .endif
405 .endif
406 decr_modulo IDX1, SIZE_GROUP2, \channels
407 .endif
408 .if \channels == 8 // in this case we can corrupt CHAN0-3
409 rsb CHAN0, CHAN0, #8
410 rsb CHAN1, CHAN1, #8
411 rsb CHAN2, CHAN2, #8
412 rsb CHAN3, CHAN3, #8
413 lsl DAT0, #8 + \shift
414 lsl DAT1, #8 + \shift
415 lsl DAT2, #8 + \shift
416 lsl DAT3, #8 + \shift
417 eor CHECK, CHECK, DAT0, lsr CHAN0
418 eor CHECK, CHECK, DAT1, lsr CHAN1
419 eor CHECK, CHECK, DAT2, lsr CHAN2
420 eor CHECK, CHECK, DAT3, lsr CHAN3
421 .else
422 .if \shift != 0
423 lsl DAT0, #\shift
424 lsl DAT1, #\shift
425 lsl DAT2, #\shift
426 lsl DAT3, #\shift
427 .endif
428 bic DAT0, DAT0, #0xff000000
429 bic DAT1, DAT1, #0xff000000
430 bic DAT2, DAT2, #0xff000000
431 bic DAT3, DAT3, #0xff000000
432 eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
433 eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
434 decr_modulo IDX2, 2, \channels
435 eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
436 eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
437 decr_modulo IDX2, 2, \channels
438 lsl DAT0, #8
439 lsl DAT1, #8
440 lsl DAT2, #8
441 lsl DAT3, #8
442 .endif
443 stm OUT!, {DAT0 - DAT3}
444 .endm
445
446 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
447 .if (WORDS_PER_LOOP % 2) == 0
448 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
449 .endif
450 .if (WORDS_PER_LOOP % 2) == 0
451 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
452 .endif
453 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
454 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
455
456function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
457 .if SAMPLES_PER_LOOP > 1
458 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
459 it ne
460 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
461 .endif
462 teq COUNT, #0
463 it eq
464 bxeq lr
465 push {v1-v6,sl,fp,lr}
466 ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack
467 ldr CHAN4, [CHAN0]
468 .if \channels == 2
469 uxtb CHAN0, CHAN4, ror #0
470 uxtb CHAN1, CHAN4, ror #8
471 .else
472 ldr CHAN5, [CHAN0, #4]
473 .if \channels == 6
474 uxtb CHAN0, CHAN4, ror #0
475 uxtb CHAN1, CHAN4, ror #8
476 uxtb CHAN2, CHAN4, ror #16
477 uxtb CHAN3, CHAN4, ror #24
478 uxtb CHAN4, CHAN5, ror #0
479 uxtb CHAN5, CHAN5, ror #8
480 .endif
481 .endif
482 .set IDX1, \channels
483 .set IDX2, \channels
4840:
485 .rept WORDS_PER_LOOP / 4
486 output4words
487 .endr
488 subs COUNT, COUNT, #SAMPLES_PER_LOOP
489 bne 0b
490 pop {v1-v6,sl,fp,pc}
491 .ltorg
492endfunc
493 .purgem output4words
494
495 .unreq CHECK
496 .unreq COUNT
497 .unreq IN
498 .unreq OUT
499 .unreq DAT0
500 .unreq DAT1
501 .unreq DAT2
502 .unreq DAT3
503 .unreq CHAN0
504 .unreq CHAN1
505 .unreq CHAN2
506 .unreq CHAN3
507 .unreq CHAN4
508 .unreq CHAN5
509
510#endif // !CONFIG_THUMB
511
512.endif // mixed
513.endif // inorder
514.endm // implement_pack
515
516.macro pack_channels inorder, channels
517 implement_pack \inorder, \channels, 0
518 implement_pack \inorder, \channels, 1
519 implement_pack \inorder, \channels, 2
520 implement_pack \inorder, \channels, 3
521 implement_pack \inorder, \channels, 4
522 implement_pack \inorder, \channels, 5
523 implement_pack \inorder, \channels, mixed
524.endm
525
526.macro pack_order inorder
527 pack_channels \inorder, 2
528 pack_channels \inorder, 6
529 pack_channels \inorder, 8
530.endm
531
532 pack_order 0
533 pack_order 1