Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | @ |
2 | @ ARMv4-optimized halfpel functions | |
3 | @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |
4 | @ | |
5 | @ This file is part of FFmpeg. | |
6 | @ | |
7 | @ FFmpeg is free software; you can redistribute it and/or | |
8 | @ modify it under the terms of the GNU Lesser General Public | |
9 | @ License as published by the Free Software Foundation; either | |
10 | @ version 2.1 of the License, or (at your option) any later version. | |
11 | @ | |
12 | @ FFmpeg is distributed in the hope that it will be useful, | |
13 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | @ Lesser General Public License for more details. | |
16 | @ | |
17 | @ You should have received a copy of the GNU Lesser General Public | |
18 | @ License along with FFmpeg; if not, write to the Free Software | |
19 | @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | @ | |
21 | ||
22 | #include "config.h" | |
23 | #include "libavutil/arm/asm.S" | |
24 | ||
25 | #if !HAVE_ARMV5TE_EXTERNAL | |
26 | #define pld @ | |
27 | #endif | |
28 | ||
29 | .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 | |
30 | mov \Rd0, \Rn0, lsr #(\shift * 8) | |
31 | mov \Rd1, \Rn1, lsr #(\shift * 8) | |
32 | mov \Rd2, \Rn2, lsr #(\shift * 8) | |
33 | mov \Rd3, \Rn3, lsr #(\shift * 8) | |
34 | orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |
35 | orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |
36 | orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |
37 | orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |
38 | .endm | |
39 | .macro ALIGN_DWORD shift, R0, R1, R2 | |
40 | mov \R0, \R0, lsr #(\shift * 8) | |
41 | orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |
42 | mov \R1, \R1, lsr #(\shift * 8) | |
43 | orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |
44 | .endm | |
45 | .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 | |
46 | mov \Rdst0, \Rsrc0, lsr #(\shift * 8) | |
47 | mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |
48 | orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |
49 | orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |
50 | .endm | |
51 | ||
52 | .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |
53 | @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |
54 | @ Rmask = 0xFEFEFEFE | |
55 | @ Rn = destroy | |
56 | eor \Rd0, \Rn0, \Rm0 | |
57 | eor \Rd1, \Rn1, \Rm1 | |
58 | orr \Rn0, \Rn0, \Rm0 | |
59 | orr \Rn1, \Rn1, \Rm1 | |
60 | and \Rd0, \Rd0, \Rmask | |
61 | and \Rd1, \Rd1, \Rmask | |
62 | sub \Rd0, \Rn0, \Rd0, lsr #1 | |
63 | sub \Rd1, \Rn1, \Rd1, lsr #1 | |
64 | .endm | |
65 | ||
66 | .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |
67 | @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |
68 | @ Rmask = 0xFEFEFEFE | |
69 | @ Rn = destroy | |
70 | eor \Rd0, \Rn0, \Rm0 | |
71 | eor \Rd1, \Rn1, \Rm1 | |
72 | and \Rn0, \Rn0, \Rm0 | |
73 | and \Rn1, \Rn1, \Rm1 | |
74 | and \Rd0, \Rd0, \Rmask | |
75 | and \Rd1, \Rd1, \Rmask | |
76 | add \Rd0, \Rn0, \Rd0, lsr #1 | |
77 | add \Rd1, \Rn1, \Rd1, lsr #1 | |
78 | .endm | |
79 | ||
80 | .macro JMP_ALIGN tmp, reg | |
81 | ands \tmp, \reg, #3 | |
82 | bic \reg, \reg, #3 | |
83 | beq 1f | |
84 | subs \tmp, \tmp, #1 | |
85 | beq 2f | |
86 | subs \tmp, \tmp, #1 | |
87 | beq 3f | |
88 | b 4f | |
89 | .endm | |
90 | ||
91 | @ ---------------------------------------------------------------- | |
92 | function ff_put_pixels16_arm, export=1, align=5 | |
93 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
94 | @ block = word aligned, pixles = unaligned | |
95 | pld [r1] | |
96 | push {r4-r11, lr} | |
97 | JMP_ALIGN r5, r1 | |
98 | 1: | |
99 | ldm r1, {r4-r7} | |
100 | add r1, r1, r2 | |
101 | stm r0, {r4-r7} | |
102 | pld [r1] | |
103 | subs r3, r3, #1 | |
104 | add r0, r0, r2 | |
105 | bne 1b | |
106 | pop {r4-r11, pc} | |
107 | .align 5 | |
108 | 2: | |
109 | ldm r1, {r4-r8} | |
110 | add r1, r1, r2 | |
111 | ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
112 | pld [r1] | |
113 | subs r3, r3, #1 | |
114 | stm r0, {r9-r12} | |
115 | add r0, r0, r2 | |
116 | bne 2b | |
117 | pop {r4-r11, pc} | |
118 | .align 5 | |
119 | 3: | |
120 | ldm r1, {r4-r8} | |
121 | add r1, r1, r2 | |
122 | ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
123 | pld [r1] | |
124 | subs r3, r3, #1 | |
125 | stm r0, {r9-r12} | |
126 | add r0, r0, r2 | |
127 | bne 3b | |
128 | pop {r4-r11, pc} | |
129 | .align 5 | |
130 | 4: | |
131 | ldm r1, {r4-r8} | |
132 | add r1, r1, r2 | |
133 | ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
134 | pld [r1] | |
135 | subs r3, r3, #1 | |
136 | stm r0, {r9-r12} | |
137 | add r0, r0, r2 | |
138 | bne 4b | |
139 | pop {r4-r11,pc} | |
140 | endfunc | |
141 | ||
142 | @ ---------------------------------------------------------------- | |
143 | function ff_put_pixels8_arm, export=1, align=5 | |
144 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
145 | @ block = word aligned, pixles = unaligned | |
146 | pld [r1] | |
147 | push {r4-r5,lr} | |
148 | JMP_ALIGN r5, r1 | |
149 | 1: | |
150 | ldm r1, {r4-r5} | |
151 | add r1, r1, r2 | |
152 | subs r3, r3, #1 | |
153 | pld [r1] | |
154 | stm r0, {r4-r5} | |
155 | add r0, r0, r2 | |
156 | bne 1b | |
157 | pop {r4-r5,pc} | |
158 | .align 5 | |
159 | 2: | |
160 | ldm r1, {r4-r5, r12} | |
161 | add r1, r1, r2 | |
162 | ALIGN_DWORD 1, r4, r5, r12 | |
163 | pld [r1] | |
164 | subs r3, r3, #1 | |
165 | stm r0, {r4-r5} | |
166 | add r0, r0, r2 | |
167 | bne 2b | |
168 | pop {r4-r5,pc} | |
169 | .align 5 | |
170 | 3: | |
171 | ldm r1, {r4-r5, r12} | |
172 | add r1, r1, r2 | |
173 | ALIGN_DWORD 2, r4, r5, r12 | |
174 | pld [r1] | |
175 | subs r3, r3, #1 | |
176 | stm r0, {r4-r5} | |
177 | add r0, r0, r2 | |
178 | bne 3b | |
179 | pop {r4-r5,pc} | |
180 | .align 5 | |
181 | 4: | |
182 | ldm r1, {r4-r5, r12} | |
183 | add r1, r1, r2 | |
184 | ALIGN_DWORD 3, r4, r5, r12 | |
185 | pld [r1] | |
186 | subs r3, r3, #1 | |
187 | stm r0, {r4-r5} | |
188 | add r0, r0, r2 | |
189 | bne 4b | |
190 | pop {r4-r5,pc} | |
191 | endfunc | |
192 | ||
193 | @ ---------------------------------------------------------------- | |
194 | function ff_put_pixels8_x2_arm, export=1, align=5 | |
195 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
196 | @ block = word aligned, pixles = unaligned | |
197 | pld [r1] | |
198 | push {r4-r10,lr} | |
199 | ldr r12, =0xfefefefe | |
200 | JMP_ALIGN r5, r1 | |
201 | 1: | |
202 | ldm r1, {r4-r5, r10} | |
203 | add r1, r1, r2 | |
204 | ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
205 | pld [r1] | |
206 | RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
207 | subs r3, r3, #1 | |
208 | stm r0, {r8-r9} | |
209 | add r0, r0, r2 | |
210 | bne 1b | |
211 | pop {r4-r10,pc} | |
212 | .align 5 | |
213 | 2: | |
214 | ldm r1, {r4-r5, r10} | |
215 | add r1, r1, r2 | |
216 | ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
217 | ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
218 | pld [r1] | |
219 | RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
220 | subs r3, r3, #1 | |
221 | stm r0, {r4-r5} | |
222 | add r0, r0, r2 | |
223 | bne 2b | |
224 | pop {r4-r10,pc} | |
225 | .align 5 | |
226 | 3: | |
227 | ldm r1, {r4-r5, r10} | |
228 | add r1, r1, r2 | |
229 | ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |
230 | ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
231 | pld [r1] | |
232 | RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
233 | subs r3, r3, #1 | |
234 | stm r0, {r4-r5} | |
235 | add r0, r0, r2 | |
236 | bne 3b | |
237 | pop {r4-r10,pc} | |
238 | .align 5 | |
239 | 4: | |
240 | ldm r1, {r4-r5, r10} | |
241 | add r1, r1, r2 | |
242 | ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |
243 | pld [r1] | |
244 | RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
245 | subs r3, r3, #1 | |
246 | stm r0, {r8-r9} | |
247 | add r0, r0, r2 | |
248 | bne 4b | |
249 | pop {r4-r10,pc} | |
250 | endfunc | |
251 | ||
252 | function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5 | |
253 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
254 | @ block = word aligned, pixles = unaligned | |
255 | pld [r1] | |
256 | push {r4-r10,lr} | |
257 | ldr r12, =0xfefefefe | |
258 | JMP_ALIGN r5, r1 | |
259 | 1: | |
260 | ldm r1, {r4-r5, r10} | |
261 | add r1, r1, r2 | |
262 | ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
263 | pld [r1] | |
264 | NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
265 | subs r3, r3, #1 | |
266 | stm r0, {r8-r9} | |
267 | add r0, r0, r2 | |
268 | bne 1b | |
269 | pop {r4-r10,pc} | |
270 | .align 5 | |
271 | 2: | |
272 | ldm r1, {r4-r5, r10} | |
273 | add r1, r1, r2 | |
274 | ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
275 | ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
276 | pld [r1] | |
277 | NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
278 | subs r3, r3, #1 | |
279 | stm r0, {r4-r5} | |
280 | add r0, r0, r2 | |
281 | bne 2b | |
282 | pop {r4-r10,pc} | |
283 | .align 5 | |
284 | 3: | |
285 | ldm r1, {r4-r5, r10} | |
286 | add r1, r1, r2 | |
287 | ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |
288 | ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
289 | pld [r1] | |
290 | NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
291 | subs r3, r3, #1 | |
292 | stm r0, {r4-r5} | |
293 | add r0, r0, r2 | |
294 | bne 3b | |
295 | pop {r4-r10,pc} | |
296 | .align 5 | |
297 | 4: | |
298 | ldm r1, {r4-r5, r10} | |
299 | add r1, r1, r2 | |
300 | ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |
301 | pld [r1] | |
302 | NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
303 | subs r3, r3, #1 | |
304 | stm r0, {r8-r9} | |
305 | add r0, r0, r2 | |
306 | bne 4b | |
307 | pop {r4-r10,pc} | |
308 | endfunc | |
309 | ||
310 | ||
311 | @ ---------------------------------------------------------------- | |
312 | function ff_put_pixels8_y2_arm, export=1, align=5 | |
313 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
314 | @ block = word aligned, pixles = unaligned | |
315 | pld [r1] | |
316 | push {r4-r11,lr} | |
317 | mov r3, r3, lsr #1 | |
318 | ldr r12, =0xfefefefe | |
319 | JMP_ALIGN r5, r1 | |
320 | 1: | |
321 | ldm r1, {r4-r5} | |
322 | add r1, r1, r2 | |
323 | 6: ldm r1, {r6-r7} | |
324 | add r1, r1, r2 | |
325 | pld [r1] | |
326 | RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
327 | ldm r1, {r4-r5} | |
328 | add r1, r1, r2 | |
329 | stm r0, {r8-r9} | |
330 | add r0, r0, r2 | |
331 | pld [r1] | |
332 | RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
333 | subs r3, r3, #1 | |
334 | stm r0, {r8-r9} | |
335 | add r0, r0, r2 | |
336 | bne 6b | |
337 | pop {r4-r11,pc} | |
338 | .align 5 | |
339 | 2: | |
340 | ldm r1, {r4-r6} | |
341 | add r1, r1, r2 | |
342 | pld [r1] | |
343 | ALIGN_DWORD 1, r4, r5, r6 | |
344 | 6: ldm r1, {r7-r9} | |
345 | add r1, r1, r2 | |
346 | pld [r1] | |
347 | ALIGN_DWORD 1, r7, r8, r9 | |
348 | RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
349 | stm r0, {r10-r11} | |
350 | add r0, r0, r2 | |
351 | ldm r1, {r4-r6} | |
352 | add r1, r1, r2 | |
353 | pld [r1] | |
354 | ALIGN_DWORD 1, r4, r5, r6 | |
355 | subs r3, r3, #1 | |
356 | RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
357 | stm r0, {r10-r11} | |
358 | add r0, r0, r2 | |
359 | bne 6b | |
360 | pop {r4-r11,pc} | |
361 | .align 5 | |
362 | 3: | |
363 | ldm r1, {r4-r6} | |
364 | add r1, r1, r2 | |
365 | pld [r1] | |
366 | ALIGN_DWORD 2, r4, r5, r6 | |
367 | 6: ldm r1, {r7-r9} | |
368 | add r1, r1, r2 | |
369 | pld [r1] | |
370 | ALIGN_DWORD 2, r7, r8, r9 | |
371 | RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
372 | stm r0, {r10-r11} | |
373 | add r0, r0, r2 | |
374 | ldm r1, {r4-r6} | |
375 | add r1, r1, r2 | |
376 | pld [r1] | |
377 | ALIGN_DWORD 2, r4, r5, r6 | |
378 | subs r3, r3, #1 | |
379 | RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
380 | stm r0, {r10-r11} | |
381 | add r0, r0, r2 | |
382 | bne 6b | |
383 | pop {r4-r11,pc} | |
384 | .align 5 | |
385 | 4: | |
386 | ldm r1, {r4-r6} | |
387 | add r1, r1, r2 | |
388 | pld [r1] | |
389 | ALIGN_DWORD 3, r4, r5, r6 | |
390 | 6: ldm r1, {r7-r9} | |
391 | add r1, r1, r2 | |
392 | pld [r1] | |
393 | ALIGN_DWORD 3, r7, r8, r9 | |
394 | RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
395 | stm r0, {r10-r11} | |
396 | add r0, r0, r2 | |
397 | ldm r1, {r4-r6} | |
398 | add r1, r1, r2 | |
399 | pld [r1] | |
400 | ALIGN_DWORD 3, r4, r5, r6 | |
401 | subs r3, r3, #1 | |
402 | RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
403 | stm r0, {r10-r11} | |
404 | add r0, r0, r2 | |
405 | bne 6b | |
406 | pop {r4-r11,pc} | |
407 | endfunc | |
408 | ||
409 | function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5 | |
410 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
411 | @ block = word aligned, pixles = unaligned | |
412 | pld [r1] | |
413 | push {r4-r11,lr} | |
414 | mov r3, r3, lsr #1 | |
415 | ldr r12, =0xfefefefe | |
416 | JMP_ALIGN r5, r1 | |
417 | 1: | |
418 | ldm r1, {r4-r5} | |
419 | add r1, r1, r2 | |
420 | 6: ldm r1, {r6-r7} | |
421 | add r1, r1, r2 | |
422 | pld [r1] | |
423 | NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
424 | ldm r1, {r4-r5} | |
425 | add r1, r1, r2 | |
426 | stm r0, {r8-r9} | |
427 | add r0, r0, r2 | |
428 | pld [r1] | |
429 | NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
430 | subs r3, r3, #1 | |
431 | stm r0, {r8-r9} | |
432 | add r0, r0, r2 | |
433 | bne 6b | |
434 | pop {r4-r11,pc} | |
435 | .align 5 | |
436 | 2: | |
437 | ldm r1, {r4-r6} | |
438 | add r1, r1, r2 | |
439 | pld [r1] | |
440 | ALIGN_DWORD 1, r4, r5, r6 | |
441 | 6: ldm r1, {r7-r9} | |
442 | add r1, r1, r2 | |
443 | pld [r1] | |
444 | ALIGN_DWORD 1, r7, r8, r9 | |
445 | NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
446 | stm r0, {r10-r11} | |
447 | add r0, r0, r2 | |
448 | ldm r1, {r4-r6} | |
449 | add r1, r1, r2 | |
450 | pld [r1] | |
451 | ALIGN_DWORD 1, r4, r5, r6 | |
452 | subs r3, r3, #1 | |
453 | NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
454 | stm r0, {r10-r11} | |
455 | add r0, r0, r2 | |
456 | bne 6b | |
457 | pop {r4-r11,pc} | |
458 | .align 5 | |
459 | 3: | |
460 | ldm r1, {r4-r6} | |
461 | add r1, r1, r2 | |
462 | pld [r1] | |
463 | ALIGN_DWORD 2, r4, r5, r6 | |
464 | 6: ldm r1, {r7-r9} | |
465 | add r1, r1, r2 | |
466 | pld [r1] | |
467 | ALIGN_DWORD 2, r7, r8, r9 | |
468 | NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
469 | stm r0, {r10-r11} | |
470 | add r0, r0, r2 | |
471 | ldm r1, {r4-r6} | |
472 | add r1, r1, r2 | |
473 | pld [r1] | |
474 | ALIGN_DWORD 2, r4, r5, r6 | |
475 | subs r3, r3, #1 | |
476 | NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
477 | stm r0, {r10-r11} | |
478 | add r0, r0, r2 | |
479 | bne 6b | |
480 | pop {r4-r11,pc} | |
481 | .align 5 | |
482 | 4: | |
483 | ldm r1, {r4-r6} | |
484 | add r1, r1, r2 | |
485 | pld [r1] | |
486 | ALIGN_DWORD 3, r4, r5, r6 | |
487 | 6: ldm r1, {r7-r9} | |
488 | add r1, r1, r2 | |
489 | pld [r1] | |
490 | ALIGN_DWORD 3, r7, r8, r9 | |
491 | NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
492 | stm r0, {r10-r11} | |
493 | add r0, r0, r2 | |
494 | ldm r1, {r4-r6} | |
495 | add r1, r1, r2 | |
496 | pld [r1] | |
497 | ALIGN_DWORD 3, r4, r5, r6 | |
498 | subs r3, r3, #1 | |
499 | NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
500 | stm r0, {r10-r11} | |
501 | add r0, r0, r2 | |
502 | bne 6b | |
503 | pop {r4-r11,pc} | |
504 | endfunc | |
505 | ||
506 | .ltorg | |
507 | ||
508 | @ ---------------------------------------------------------------- | |
509 | .macro RND_XY2_IT align, rnd | |
510 | @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | |
511 | @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | |
512 | .if \align == 0 | |
513 | ldm r1, {r6-r8} | |
514 | .elseif \align == 3 | |
515 | ldm r1, {r5-r7} | |
516 | .else | |
517 | ldm r1, {r8-r10} | |
518 | .endif | |
519 | add r1, r1, r2 | |
520 | pld [r1] | |
521 | .if \align == 0 | |
522 | ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 | |
523 | .elseif \align == 1 | |
524 | ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 | |
525 | ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |
526 | .elseif \align == 2 | |
527 | ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 | |
528 | ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |
529 | .elseif \align == 3 | |
530 | ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 | |
531 | .endif | |
532 | ldr r14, =0x03030303 | |
533 | tst r3, #1 | |
534 | and r8, r4, r14 | |
535 | and r9, r5, r14 | |
536 | and r10, r6, r14 | |
537 | and r11, r7, r14 | |
538 | it eq | |
539 | andeq r14, r14, r14, \rnd #1 | |
540 | add r8, r8, r10 | |
541 | add r9, r9, r11 | |
542 | ldr r12, =0xfcfcfcfc >> 2 | |
543 | itt eq | |
544 | addeq r8, r8, r14 | |
545 | addeq r9, r9, r14 | |
546 | and r4, r12, r4, lsr #2 | |
547 | and r5, r12, r5, lsr #2 | |
548 | and r6, r12, r6, lsr #2 | |
549 | and r7, r12, r7, lsr #2 | |
550 | add r10, r4, r6 | |
551 | add r11, r5, r7 | |
552 | subs r3, r3, #1 | |
553 | .endm | |
554 | ||
555 | .macro RND_XY2_EXPAND align, rnd | |
556 | RND_XY2_IT \align, \rnd | |
557 | 6: push {r8-r11} | |
558 | RND_XY2_IT \align, \rnd | |
559 | pop {r4-r7} | |
560 | add r4, r4, r8 | |
561 | add r5, r5, r9 | |
562 | ldr r14, =0x0f0f0f0f | |
563 | add r6, r6, r10 | |
564 | add r7, r7, r11 | |
565 | and r4, r14, r4, lsr #2 | |
566 | and r5, r14, r5, lsr #2 | |
567 | add r4, r4, r6 | |
568 | add r5, r5, r7 | |
569 | stm r0, {r4-r5} | |
570 | add r0, r0, r2 | |
571 | bge 6b | |
572 | pop {r4-r11,pc} | |
573 | .endm | |
574 | ||
575 | function ff_put_pixels8_xy2_arm, export=1, align=5 | |
576 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
577 | @ block = word aligned, pixles = unaligned | |
578 | pld [r1] | |
579 | push {r4-r11,lr} @ R14 is also called LR | |
580 | JMP_ALIGN r5, r1 | |
581 | 1: RND_XY2_EXPAND 0, lsl | |
582 | .align 5 | |
583 | 2: RND_XY2_EXPAND 1, lsl | |
584 | .align 5 | |
585 | 3: RND_XY2_EXPAND 2, lsl | |
586 | .align 5 | |
587 | 4: RND_XY2_EXPAND 3, lsl | |
588 | endfunc | |
589 | ||
590 | function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5 | |
591 | @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
592 | @ block = word aligned, pixles = unaligned | |
593 | pld [r1] | |
594 | push {r4-r11,lr} | |
595 | JMP_ALIGN r5, r1 | |
596 | 1: RND_XY2_EXPAND 0, lsr | |
597 | .align 5 | |
598 | 2: RND_XY2_EXPAND 1, lsr | |
599 | .align 5 | |
600 | 3: RND_XY2_EXPAND 2, lsr | |
601 | .align 5 | |
602 | 4: RND_XY2_EXPAND 3, lsr | |
603 | endfunc |