Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / hpeldsp_arm.S
CommitLineData
2ba45a60
DM
1@
2@ ARMv4-optimized halfpel functions
3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4@
5@ This file is part of FFmpeg.
6@
7@ FFmpeg is free software; you can redistribute it and/or
8@ modify it under the terms of the GNU Lesser General Public
9@ License as published by the Free Software Foundation; either
10@ version 2.1 of the License, or (at your option) any later version.
11@
12@ FFmpeg is distributed in the hope that it will be useful,
13@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15@ Lesser General Public License for more details.
16@
17@ You should have received a copy of the GNU Lesser General Public
18@ License along with FFmpeg; if not, write to the Free Software
19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20@
21
22#include "config.h"
23#include "libavutil/arm/asm.S"
24
25#if !HAVE_ARMV5TE_EXTERNAL
26#define pld @
27#endif
28
29.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
30 mov \Rd0, \Rn0, lsr #(\shift * 8)
31 mov \Rd1, \Rn1, lsr #(\shift * 8)
32 mov \Rd2, \Rn2, lsr #(\shift * 8)
33 mov \Rd3, \Rn3, lsr #(\shift * 8)
34 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
35 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
36 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
37 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
38.endm
39.macro ALIGN_DWORD shift, R0, R1, R2
40 mov \R0, \R0, lsr #(\shift * 8)
41 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
42 mov \R1, \R1, lsr #(\shift * 8)
43 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
44.endm
45.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
46 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
47 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
48 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
49 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
50.endm
51
52.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
53 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
54 @ Rmask = 0xFEFEFEFE
55 @ Rn = destroy
56 eor \Rd0, \Rn0, \Rm0
57 eor \Rd1, \Rn1, \Rm1
58 orr \Rn0, \Rn0, \Rm0
59 orr \Rn1, \Rn1, \Rm1
60 and \Rd0, \Rd0, \Rmask
61 and \Rd1, \Rd1, \Rmask
62 sub \Rd0, \Rn0, \Rd0, lsr #1
63 sub \Rd1, \Rn1, \Rd1, lsr #1
64.endm
65
66.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
67 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
68 @ Rmask = 0xFEFEFEFE
69 @ Rn = destroy
70 eor \Rd0, \Rn0, \Rm0
71 eor \Rd1, \Rn1, \Rm1
72 and \Rn0, \Rn0, \Rm0
73 and \Rn1, \Rn1, \Rm1
74 and \Rd0, \Rd0, \Rmask
75 and \Rd1, \Rd1, \Rmask
76 add \Rd0, \Rn0, \Rd0, lsr #1
77 add \Rd1, \Rn1, \Rd1, lsr #1
78.endm
79
80.macro JMP_ALIGN tmp, reg
81 ands \tmp, \reg, #3
82 bic \reg, \reg, #3
83 beq 1f
84 subs \tmp, \tmp, #1
85 beq 2f
86 subs \tmp, \tmp, #1
87 beq 3f
88 b 4f
89.endm
90
91@ ----------------------------------------------------------------
92function ff_put_pixels16_arm, export=1, align=5
93 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
94 @ block = word aligned, pixles = unaligned
95 pld [r1]
96 push {r4-r11, lr}
97 JMP_ALIGN r5, r1
981:
99 ldm r1, {r4-r7}
100 add r1, r1, r2
101 stm r0, {r4-r7}
102 pld [r1]
103 subs r3, r3, #1
104 add r0, r0, r2
105 bne 1b
106 pop {r4-r11, pc}
107 .align 5
1082:
109 ldm r1, {r4-r8}
110 add r1, r1, r2
111 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
112 pld [r1]
113 subs r3, r3, #1
114 stm r0, {r9-r12}
115 add r0, r0, r2
116 bne 2b
117 pop {r4-r11, pc}
118 .align 5
1193:
120 ldm r1, {r4-r8}
121 add r1, r1, r2
122 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
123 pld [r1]
124 subs r3, r3, #1
125 stm r0, {r9-r12}
126 add r0, r0, r2
127 bne 3b
128 pop {r4-r11, pc}
129 .align 5
1304:
131 ldm r1, {r4-r8}
132 add r1, r1, r2
133 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
134 pld [r1]
135 subs r3, r3, #1
136 stm r0, {r9-r12}
137 add r0, r0, r2
138 bne 4b
139 pop {r4-r11,pc}
140endfunc
141
142@ ----------------------------------------------------------------
143function ff_put_pixels8_arm, export=1, align=5
144 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
145 @ block = word aligned, pixles = unaligned
146 pld [r1]
147 push {r4-r5,lr}
148 JMP_ALIGN r5, r1
1491:
150 ldm r1, {r4-r5}
151 add r1, r1, r2
152 subs r3, r3, #1
153 pld [r1]
154 stm r0, {r4-r5}
155 add r0, r0, r2
156 bne 1b
157 pop {r4-r5,pc}
158 .align 5
1592:
160 ldm r1, {r4-r5, r12}
161 add r1, r1, r2
162 ALIGN_DWORD 1, r4, r5, r12
163 pld [r1]
164 subs r3, r3, #1
165 stm r0, {r4-r5}
166 add r0, r0, r2
167 bne 2b
168 pop {r4-r5,pc}
169 .align 5
1703:
171 ldm r1, {r4-r5, r12}
172 add r1, r1, r2
173 ALIGN_DWORD 2, r4, r5, r12
174 pld [r1]
175 subs r3, r3, #1
176 stm r0, {r4-r5}
177 add r0, r0, r2
178 bne 3b
179 pop {r4-r5,pc}
180 .align 5
1814:
182 ldm r1, {r4-r5, r12}
183 add r1, r1, r2
184 ALIGN_DWORD 3, r4, r5, r12
185 pld [r1]
186 subs r3, r3, #1
187 stm r0, {r4-r5}
188 add r0, r0, r2
189 bne 4b
190 pop {r4-r5,pc}
191endfunc
192
193@ ----------------------------------------------------------------
194function ff_put_pixels8_x2_arm, export=1, align=5
195 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
196 @ block = word aligned, pixles = unaligned
197 pld [r1]
198 push {r4-r10,lr}
199 ldr r12, =0xfefefefe
200 JMP_ALIGN r5, r1
2011:
202 ldm r1, {r4-r5, r10}
203 add r1, r1, r2
204 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
205 pld [r1]
206 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
207 subs r3, r3, #1
208 stm r0, {r8-r9}
209 add r0, r0, r2
210 bne 1b
211 pop {r4-r10,pc}
212 .align 5
2132:
214 ldm r1, {r4-r5, r10}
215 add r1, r1, r2
216 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
217 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
218 pld [r1]
219 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
220 subs r3, r3, #1
221 stm r0, {r4-r5}
222 add r0, r0, r2
223 bne 2b
224 pop {r4-r10,pc}
225 .align 5
2263:
227 ldm r1, {r4-r5, r10}
228 add r1, r1, r2
229 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
230 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
231 pld [r1]
232 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
233 subs r3, r3, #1
234 stm r0, {r4-r5}
235 add r0, r0, r2
236 bne 3b
237 pop {r4-r10,pc}
238 .align 5
2394:
240 ldm r1, {r4-r5, r10}
241 add r1, r1, r2
242 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
243 pld [r1]
244 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
245 subs r3, r3, #1
246 stm r0, {r8-r9}
247 add r0, r0, r2
248 bne 4b
249 pop {r4-r10,pc}
250endfunc
251
252function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5
253 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
254 @ block = word aligned, pixles = unaligned
255 pld [r1]
256 push {r4-r10,lr}
257 ldr r12, =0xfefefefe
258 JMP_ALIGN r5, r1
2591:
260 ldm r1, {r4-r5, r10}
261 add r1, r1, r2
262 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
263 pld [r1]
264 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
265 subs r3, r3, #1
266 stm r0, {r8-r9}
267 add r0, r0, r2
268 bne 1b
269 pop {r4-r10,pc}
270 .align 5
2712:
272 ldm r1, {r4-r5, r10}
273 add r1, r1, r2
274 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
275 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
276 pld [r1]
277 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
278 subs r3, r3, #1
279 stm r0, {r4-r5}
280 add r0, r0, r2
281 bne 2b
282 pop {r4-r10,pc}
283 .align 5
2843:
285 ldm r1, {r4-r5, r10}
286 add r1, r1, r2
287 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
288 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
289 pld [r1]
290 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
291 subs r3, r3, #1
292 stm r0, {r4-r5}
293 add r0, r0, r2
294 bne 3b
295 pop {r4-r10,pc}
296 .align 5
2974:
298 ldm r1, {r4-r5, r10}
299 add r1, r1, r2
300 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
301 pld [r1]
302 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
303 subs r3, r3, #1
304 stm r0, {r8-r9}
305 add r0, r0, r2
306 bne 4b
307 pop {r4-r10,pc}
308endfunc
309
310
311@ ----------------------------------------------------------------
312function ff_put_pixels8_y2_arm, export=1, align=5
313 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
314 @ block = word aligned, pixles = unaligned
315 pld [r1]
316 push {r4-r11,lr}
317 mov r3, r3, lsr #1
318 ldr r12, =0xfefefefe
319 JMP_ALIGN r5, r1
3201:
321 ldm r1, {r4-r5}
322 add r1, r1, r2
3236: ldm r1, {r6-r7}
324 add r1, r1, r2
325 pld [r1]
326 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
327 ldm r1, {r4-r5}
328 add r1, r1, r2
329 stm r0, {r8-r9}
330 add r0, r0, r2
331 pld [r1]
332 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
333 subs r3, r3, #1
334 stm r0, {r8-r9}
335 add r0, r0, r2
336 bne 6b
337 pop {r4-r11,pc}
338 .align 5
3392:
340 ldm r1, {r4-r6}
341 add r1, r1, r2
342 pld [r1]
343 ALIGN_DWORD 1, r4, r5, r6
3446: ldm r1, {r7-r9}
345 add r1, r1, r2
346 pld [r1]
347 ALIGN_DWORD 1, r7, r8, r9
348 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
349 stm r0, {r10-r11}
350 add r0, r0, r2
351 ldm r1, {r4-r6}
352 add r1, r1, r2
353 pld [r1]
354 ALIGN_DWORD 1, r4, r5, r6
355 subs r3, r3, #1
356 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
357 stm r0, {r10-r11}
358 add r0, r0, r2
359 bne 6b
360 pop {r4-r11,pc}
361 .align 5
3623:
363 ldm r1, {r4-r6}
364 add r1, r1, r2
365 pld [r1]
366 ALIGN_DWORD 2, r4, r5, r6
3676: ldm r1, {r7-r9}
368 add r1, r1, r2
369 pld [r1]
370 ALIGN_DWORD 2, r7, r8, r9
371 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
372 stm r0, {r10-r11}
373 add r0, r0, r2
374 ldm r1, {r4-r6}
375 add r1, r1, r2
376 pld [r1]
377 ALIGN_DWORD 2, r4, r5, r6
378 subs r3, r3, #1
379 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
380 stm r0, {r10-r11}
381 add r0, r0, r2
382 bne 6b
383 pop {r4-r11,pc}
384 .align 5
3854:
386 ldm r1, {r4-r6}
387 add r1, r1, r2
388 pld [r1]
389 ALIGN_DWORD 3, r4, r5, r6
3906: ldm r1, {r7-r9}
391 add r1, r1, r2
392 pld [r1]
393 ALIGN_DWORD 3, r7, r8, r9
394 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
395 stm r0, {r10-r11}
396 add r0, r0, r2
397 ldm r1, {r4-r6}
398 add r1, r1, r2
399 pld [r1]
400 ALIGN_DWORD 3, r4, r5, r6
401 subs r3, r3, #1
402 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
403 stm r0, {r10-r11}
404 add r0, r0, r2
405 bne 6b
406 pop {r4-r11,pc}
407endfunc
408
409function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5
410 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
411 @ block = word aligned, pixles = unaligned
412 pld [r1]
413 push {r4-r11,lr}
414 mov r3, r3, lsr #1
415 ldr r12, =0xfefefefe
416 JMP_ALIGN r5, r1
4171:
418 ldm r1, {r4-r5}
419 add r1, r1, r2
4206: ldm r1, {r6-r7}
421 add r1, r1, r2
422 pld [r1]
423 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
424 ldm r1, {r4-r5}
425 add r1, r1, r2
426 stm r0, {r8-r9}
427 add r0, r0, r2
428 pld [r1]
429 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
430 subs r3, r3, #1
431 stm r0, {r8-r9}
432 add r0, r0, r2
433 bne 6b
434 pop {r4-r11,pc}
435 .align 5
4362:
437 ldm r1, {r4-r6}
438 add r1, r1, r2
439 pld [r1]
440 ALIGN_DWORD 1, r4, r5, r6
4416: ldm r1, {r7-r9}
442 add r1, r1, r2
443 pld [r1]
444 ALIGN_DWORD 1, r7, r8, r9
445 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
446 stm r0, {r10-r11}
447 add r0, r0, r2
448 ldm r1, {r4-r6}
449 add r1, r1, r2
450 pld [r1]
451 ALIGN_DWORD 1, r4, r5, r6
452 subs r3, r3, #1
453 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
454 stm r0, {r10-r11}
455 add r0, r0, r2
456 bne 6b
457 pop {r4-r11,pc}
458 .align 5
4593:
460 ldm r1, {r4-r6}
461 add r1, r1, r2
462 pld [r1]
463 ALIGN_DWORD 2, r4, r5, r6
4646: ldm r1, {r7-r9}
465 add r1, r1, r2
466 pld [r1]
467 ALIGN_DWORD 2, r7, r8, r9
468 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
469 stm r0, {r10-r11}
470 add r0, r0, r2
471 ldm r1, {r4-r6}
472 add r1, r1, r2
473 pld [r1]
474 ALIGN_DWORD 2, r4, r5, r6
475 subs r3, r3, #1
476 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
477 stm r0, {r10-r11}
478 add r0, r0, r2
479 bne 6b
480 pop {r4-r11,pc}
481 .align 5
4824:
483 ldm r1, {r4-r6}
484 add r1, r1, r2
485 pld [r1]
486 ALIGN_DWORD 3, r4, r5, r6
4876: ldm r1, {r7-r9}
488 add r1, r1, r2
489 pld [r1]
490 ALIGN_DWORD 3, r7, r8, r9
491 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
492 stm r0, {r10-r11}
493 add r0, r0, r2
494 ldm r1, {r4-r6}
495 add r1, r1, r2
496 pld [r1]
497 ALIGN_DWORD 3, r4, r5, r6
498 subs r3, r3, #1
499 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
500 stm r0, {r10-r11}
501 add r0, r0, r2
502 bne 6b
503 pop {r4-r11,pc}
504endfunc
505
506 .ltorg
507
508@ ----------------------------------------------------------------
509.macro RND_XY2_IT align, rnd
510 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
511 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
512.if \align == 0
513 ldm r1, {r6-r8}
514.elseif \align == 3
515 ldm r1, {r5-r7}
516.else
517 ldm r1, {r8-r10}
518.endif
519 add r1, r1, r2
520 pld [r1]
521.if \align == 0
522 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
523.elseif \align == 1
524 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
525 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
526.elseif \align == 2
527 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
528 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
529.elseif \align == 3
530 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
531.endif
532 ldr r14, =0x03030303
533 tst r3, #1
534 and r8, r4, r14
535 and r9, r5, r14
536 and r10, r6, r14
537 and r11, r7, r14
538 it eq
539 andeq r14, r14, r14, \rnd #1
540 add r8, r8, r10
541 add r9, r9, r11
542 ldr r12, =0xfcfcfcfc >> 2
543 itt eq
544 addeq r8, r8, r14
545 addeq r9, r9, r14
546 and r4, r12, r4, lsr #2
547 and r5, r12, r5, lsr #2
548 and r6, r12, r6, lsr #2
549 and r7, r12, r7, lsr #2
550 add r10, r4, r6
551 add r11, r5, r7
552 subs r3, r3, #1
553.endm
554
555.macro RND_XY2_EXPAND align, rnd
556 RND_XY2_IT \align, \rnd
5576: push {r8-r11}
558 RND_XY2_IT \align, \rnd
559 pop {r4-r7}
560 add r4, r4, r8
561 add r5, r5, r9
562 ldr r14, =0x0f0f0f0f
563 add r6, r6, r10
564 add r7, r7, r11
565 and r4, r14, r4, lsr #2
566 and r5, r14, r5, lsr #2
567 add r4, r4, r6
568 add r5, r5, r7
569 stm r0, {r4-r5}
570 add r0, r0, r2
571 bge 6b
572 pop {r4-r11,pc}
573.endm
574
575function ff_put_pixels8_xy2_arm, export=1, align=5
576 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
577 @ block = word aligned, pixles = unaligned
578 pld [r1]
579 push {r4-r11,lr} @ R14 is also called LR
580 JMP_ALIGN r5, r1
5811: RND_XY2_EXPAND 0, lsl
582 .align 5
5832: RND_XY2_EXPAND 1, lsl
584 .align 5
5853: RND_XY2_EXPAND 2, lsl
586 .align 5
5874: RND_XY2_EXPAND 3, lsl
588endfunc
589
590function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5
591 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
592 @ block = word aligned, pixles = unaligned
593 pld [r1]
594 push {r4-r11,lr}
595 JMP_ALIGN r5, r1
5961: RND_XY2_EXPAND 0, lsr
597 .align 5
5982: RND_XY2_EXPAND 1, lsr
599 .align 5
6003: RND_XY2_EXPAND 2, lsr
601 .align 5
6024: RND_XY2_EXPAND 3, lsr
603endfunc