Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Simple IDCT | |
3 | * | |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> | |
5 | * Copyright (c) 2006 Mans Rullgard <mans@mansr.com> | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | #include "libavutil/arm/asm.S" | |
25 | ||
26 | #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
27 | #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
28 | #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
29 | #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
30 | #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
31 | #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
32 | #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ | |
33 | #define ROW_SHIFT 11 | |
34 | #define COL_SHIFT 20 | |
35 | ||
36 | #define W13 (W1 | (W3 << 16)) | |
37 | #define W26 (W2 | (W6 << 16)) | |
38 | #define W57 (W5 | (W7 << 16)) | |
39 | ||
40 | function idct_row_armv5te | |
41 | str lr, [sp, #-4]! | |
42 | ||
43 | ldrd v1, v2, [a1, #8] | |
44 | ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |
45 | orrs v1, v1, v2 | |
46 | itt eq | |
47 | cmpeq v1, a4 | |
48 | cmpeq v1, a3, lsr #16 | |
49 | beq row_dc_only | |
50 | ||
51 | mov v1, #(1<<(ROW_SHIFT-1)) | |
52 | mov ip, #16384 | |
53 | sub ip, ip, #1 /* ip = W4 */ | |
54 | smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ | |
55 | ldr ip, =W26 /* ip = W2 | (W6 << 16) */ | |
56 | smultb a2, ip, a4 | |
57 | smulbb lr, ip, a4 | |
58 | add v2, v1, a2 | |
59 | sub v3, v1, a2 | |
60 | sub v4, v1, lr | |
61 | add v1, v1, lr | |
62 | ||
63 | ldr ip, =W13 /* ip = W1 | (W3 << 16) */ | |
64 | ldr lr, =W57 /* lr = W5 | (W7 << 16) */ | |
65 | smulbt v5, ip, a3 | |
66 | smultt v6, lr, a4 | |
67 | smlatt v5, ip, a4, v5 | |
68 | smultt a2, ip, a3 | |
69 | smulbt v7, lr, a3 | |
70 | sub v6, v6, a2 | |
71 | smulbt a2, ip, a4 | |
72 | smultt fp, lr, a3 | |
73 | sub v7, v7, a2 | |
74 | smulbt a2, lr, a4 | |
75 | ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |
76 | sub fp, fp, a2 | |
77 | ||
78 | orrs a2, a3, a4 | |
79 | beq 1f | |
80 | ||
81 | smlabt v5, lr, a3, v5 | |
82 | smlabt v6, ip, a3, v6 | |
83 | smlatt v5, lr, a4, v5 | |
84 | smlabt v6, lr, a4, v6 | |
85 | smlatt v7, lr, a3, v7 | |
86 | smlatt fp, ip, a3, fp | |
87 | smulbt a2, ip, a4 | |
88 | smlatt v7, ip, a4, v7 | |
89 | sub fp, fp, a2 | |
90 | ||
91 | ldr ip, =W26 /* ip = W2 | (W6 << 16) */ | |
92 | mov a2, #16384 | |
93 | sub a2, a2, #1 /* a2 = W4 */ | |
94 | smulbb a2, a2, a3 /* a2 = W4*row[4] */ | |
95 | smultb lr, ip, a4 /* lr = W6*row[6] */ | |
96 | add v1, v1, a2 /* v1 += W4*row[4] */ | |
97 | add v1, v1, lr /* v1 += W6*row[6] */ | |
98 | add v4, v4, a2 /* v4 += W4*row[4] */ | |
99 | sub v4, v4, lr /* v4 -= W6*row[6] */ | |
100 | smulbb lr, ip, a4 /* lr = W2*row[6] */ | |
101 | sub v2, v2, a2 /* v2 -= W4*row[4] */ | |
102 | sub v2, v2, lr /* v2 -= W2*row[6] */ | |
103 | sub v3, v3, a2 /* v3 -= W4*row[4] */ | |
104 | add v3, v3, lr /* v3 += W2*row[6] */ | |
105 | ||
106 | 1: add a2, v1, v5 | |
107 | mov a3, a2, lsr #11 | |
108 | bic a3, a3, #0x1f0000 | |
109 | sub a2, v2, v6 | |
110 | mov a2, a2, lsr #11 | |
111 | add a3, a3, a2, lsl #16 | |
112 | add a2, v3, v7 | |
113 | mov a4, a2, lsr #11 | |
114 | bic a4, a4, #0x1f0000 | |
115 | add a2, v4, fp | |
116 | mov a2, a2, lsr #11 | |
117 | add a4, a4, a2, lsl #16 | |
118 | strd a3, a4, [a1] | |
119 | ||
120 | sub a2, v4, fp | |
121 | mov a3, a2, lsr #11 | |
122 | bic a3, a3, #0x1f0000 | |
123 | sub a2, v3, v7 | |
124 | mov a2, a2, lsr #11 | |
125 | add a3, a3, a2, lsl #16 | |
126 | add a2, v2, v6 | |
127 | mov a4, a2, lsr #11 | |
128 | bic a4, a4, #0x1f0000 | |
129 | sub a2, v1, v5 | |
130 | mov a2, a2, lsr #11 | |
131 | add a4, a4, a2, lsl #16 | |
132 | strd a3, a4, [a1, #8] | |
133 | ||
134 | ldr pc, [sp], #4 | |
135 | ||
136 | row_dc_only: | |
137 | orr a3, a3, a3, lsl #16 | |
138 | bic a3, a3, #0xe000 | |
139 | mov a3, a3, lsl #3 | |
140 | mov a4, a3 | |
141 | strd a3, a4, [a1] | |
142 | strd a3, a4, [a1, #8] | |
143 | ||
144 | ldr pc, [sp], #4 | |
145 | endfunc | |
146 | ||
147 | .macro idct_col | |
148 | ldr a4, [a1] /* a4 = col[1:0] */ | |
149 | mov ip, #16384 | |
150 | sub ip, ip, #1 /* ip = W4 */ | |
151 | #if 0 | |
152 | mov v1, #(1<<(COL_SHIFT-1)) | |
153 | smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ | |
154 | smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ | |
155 | ldr a4, [a1, #(16*4)] | |
156 | #else | |
157 | mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ | |
158 | add v2, v1, a4, asr #16 | |
159 | rsb v2, v2, v2, lsl #14 | |
160 | mov a4, a4, lsl #16 | |
161 | add v1, v1, a4, asr #16 | |
162 | ldr a4, [a1, #(16*4)] | |
163 | rsb v1, v1, v1, lsl #14 | |
164 | #endif | |
165 | ||
166 | smulbb lr, ip, a4 | |
167 | smulbt a3, ip, a4 | |
168 | sub v3, v1, lr | |
169 | sub v5, v1, lr | |
170 | add v7, v1, lr | |
171 | add v1, v1, lr | |
172 | sub v4, v2, a3 | |
173 | sub v6, v2, a3 | |
174 | add fp, v2, a3 | |
175 | ldr ip, =W26 | |
176 | ldr a4, [a1, #(16*2)] | |
177 | add v2, v2, a3 | |
178 | ||
179 | smulbb lr, ip, a4 | |
180 | smultb a3, ip, a4 | |
181 | add v1, v1, lr | |
182 | sub v7, v7, lr | |
183 | add v3, v3, a3 | |
184 | sub v5, v5, a3 | |
185 | smulbt lr, ip, a4 | |
186 | smultt a3, ip, a4 | |
187 | add v2, v2, lr | |
188 | sub fp, fp, lr | |
189 | add v4, v4, a3 | |
190 | ldr a4, [a1, #(16*6)] | |
191 | sub v6, v6, a3 | |
192 | ||
193 | smultb lr, ip, a4 | |
194 | smulbb a3, ip, a4 | |
195 | add v1, v1, lr | |
196 | sub v7, v7, lr | |
197 | sub v3, v3, a3 | |
198 | add v5, v5, a3 | |
199 | smultt lr, ip, a4 | |
200 | smulbt a3, ip, a4 | |
201 | add v2, v2, lr | |
202 | sub fp, fp, lr | |
203 | sub v4, v4, a3 | |
204 | add v6, v6, a3 | |
205 | ||
206 | stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} | |
207 | ||
208 | ldr ip, =W13 | |
209 | ldr a4, [a1, #(16*1)] | |
210 | ldr lr, =W57 | |
211 | smulbb v1, ip, a4 | |
212 | smultb v3, ip, a4 | |
213 | smulbb v5, lr, a4 | |
214 | smultb v7, lr, a4 | |
215 | smulbt v2, ip, a4 | |
216 | smultt v4, ip, a4 | |
217 | smulbt v6, lr, a4 | |
218 | smultt fp, lr, a4 | |
219 | rsb v4, v4, #0 | |
220 | ldr a4, [a1, #(16*3)] | |
221 | rsb v3, v3, #0 | |
222 | ||
223 | smlatb v1, ip, a4, v1 | |
224 | smlatb v3, lr, a4, v3 | |
225 | smulbb a3, ip, a4 | |
226 | smulbb a2, lr, a4 | |
227 | sub v5, v5, a3 | |
228 | sub v7, v7, a2 | |
229 | smlatt v2, ip, a4, v2 | |
230 | smlatt v4, lr, a4, v4 | |
231 | smulbt a3, ip, a4 | |
232 | smulbt a2, lr, a4 | |
233 | sub v6, v6, a3 | |
234 | ldr a4, [a1, #(16*5)] | |
235 | sub fp, fp, a2 | |
236 | ||
237 | smlabb v1, lr, a4, v1 | |
238 | smlabb v3, ip, a4, v3 | |
239 | smlatb v5, lr, a4, v5 | |
240 | smlatb v7, ip, a4, v7 | |
241 | smlabt v2, lr, a4, v2 | |
242 | smlabt v4, ip, a4, v4 | |
243 | smlatt v6, lr, a4, v6 | |
244 | ldr a3, [a1, #(16*7)] | |
245 | smlatt fp, ip, a4, fp | |
246 | ||
247 | smlatb v1, lr, a3, v1 | |
248 | smlabb v3, lr, a3, v3 | |
249 | smlatb v5, ip, a3, v5 | |
250 | smulbb a4, ip, a3 | |
251 | smlatt v2, lr, a3, v2 | |
252 | sub v7, v7, a4 | |
253 | smlabt v4, lr, a3, v4 | |
254 | smulbt a4, ip, a3 | |
255 | smlatt v6, ip, a3, v6 | |
256 | sub fp, fp, a4 | |
257 | .endm | |
258 | ||
259 | function idct_col_armv5te | |
260 | str lr, [sp, #-4]! | |
261 | ||
262 | idct_col | |
263 | ||
264 | ldmfd sp!, {a3, a4} | |
265 | adds a2, a3, v1 | |
266 | mov a2, a2, lsr #20 | |
267 | it mi | |
268 | orrmi a2, a2, #0xf000 | |
269 | add ip, a4, v2 | |
270 | mov ip, ip, asr #20 | |
271 | orr a2, a2, ip, lsl #16 | |
272 | str a2, [a1] | |
273 | subs a3, a3, v1 | |
274 | mov a2, a3, lsr #20 | |
275 | it mi | |
276 | orrmi a2, a2, #0xf000 | |
277 | sub a4, a4, v2 | |
278 | mov a4, a4, asr #20 | |
279 | orr a2, a2, a4, lsl #16 | |
280 | ldmfd sp!, {a3, a4} | |
281 | str a2, [a1, #(16*7)] | |
282 | ||
283 | subs a2, a3, v3 | |
284 | mov a2, a2, lsr #20 | |
285 | it mi | |
286 | orrmi a2, a2, #0xf000 | |
287 | sub ip, a4, v4 | |
288 | mov ip, ip, asr #20 | |
289 | orr a2, a2, ip, lsl #16 | |
290 | str a2, [a1, #(16*1)] | |
291 | adds a3, a3, v3 | |
292 | mov a2, a3, lsr #20 | |
293 | it mi | |
294 | orrmi a2, a2, #0xf000 | |
295 | add a4, a4, v4 | |
296 | mov a4, a4, asr #20 | |
297 | orr a2, a2, a4, lsl #16 | |
298 | ldmfd sp!, {a3, a4} | |
299 | str a2, [a1, #(16*6)] | |
300 | ||
301 | adds a2, a3, v5 | |
302 | mov a2, a2, lsr #20 | |
303 | it mi | |
304 | orrmi a2, a2, #0xf000 | |
305 | add ip, a4, v6 | |
306 | mov ip, ip, asr #20 | |
307 | orr a2, a2, ip, lsl #16 | |
308 | str a2, [a1, #(16*2)] | |
309 | subs a3, a3, v5 | |
310 | mov a2, a3, lsr #20 | |
311 | it mi | |
312 | orrmi a2, a2, #0xf000 | |
313 | sub a4, a4, v6 | |
314 | mov a4, a4, asr #20 | |
315 | orr a2, a2, a4, lsl #16 | |
316 | ldmfd sp!, {a3, a4} | |
317 | str a2, [a1, #(16*5)] | |
318 | ||
319 | adds a2, a3, v7 | |
320 | mov a2, a2, lsr #20 | |
321 | it mi | |
322 | orrmi a2, a2, #0xf000 | |
323 | add ip, a4, fp | |
324 | mov ip, ip, asr #20 | |
325 | orr a2, a2, ip, lsl #16 | |
326 | str a2, [a1, #(16*3)] | |
327 | subs a3, a3, v7 | |
328 | mov a2, a3, lsr #20 | |
329 | it mi | |
330 | orrmi a2, a2, #0xf000 | |
331 | sub a4, a4, fp | |
332 | mov a4, a4, asr #20 | |
333 | orr a2, a2, a4, lsl #16 | |
334 | str a2, [a1, #(16*4)] | |
335 | ||
336 | ldr pc, [sp], #4 | |
337 | endfunc | |
338 | ||
339 | .macro clip dst, src:vararg | |
340 | movs \dst, \src | |
341 | it mi | |
342 | movmi \dst, #0 | |
343 | cmp \dst, #255 | |
344 | it gt | |
345 | movgt \dst, #255 | |
346 | .endm | |
347 | ||
348 | .macro aclip dst, src:vararg | |
349 | adds \dst, \src | |
350 | it mi | |
351 | movmi \dst, #0 | |
352 | cmp \dst, #255 | |
353 | it gt | |
354 | movgt \dst, #255 | |
355 | .endm | |
356 | ||
357 | function idct_col_put_armv5te | |
358 | str lr, [sp, #-4]! | |
359 | ||
360 | idct_col | |
361 | ||
362 | ldmfd sp!, {a3, a4} | |
363 | ldr lr, [sp, #32] | |
364 | add a2, a3, v1 | |
365 | clip a2, a2, asr #20 | |
366 | add ip, a4, v2 | |
367 | clip ip, ip, asr #20 | |
368 | orr a2, a2, ip, lsl #8 | |
369 | sub a3, a3, v1 | |
370 | clip a3, a3, asr #20 | |
371 | sub a4, a4, v2 | |
372 | clip a4, a4, asr #20 | |
373 | ldr v1, [sp, #28] | |
374 | strh a2, [v1] | |
375 | add a2, v1, #2 | |
376 | str a2, [sp, #28] | |
377 | orr a2, a3, a4, lsl #8 | |
378 | rsb v2, lr, lr, lsl #3 | |
379 | ldmfd sp!, {a3, a4} | |
380 | strh_pre a2, v2, v1 | |
381 | ||
382 | sub a2, a3, v3 | |
383 | clip a2, a2, asr #20 | |
384 | sub ip, a4, v4 | |
385 | clip ip, ip, asr #20 | |
386 | orr a2, a2, ip, lsl #8 | |
387 | strh_pre a2, v1, lr | |
388 | add a3, a3, v3 | |
389 | clip a2, a3, asr #20 | |
390 | add a4, a4, v4 | |
391 | clip a4, a4, asr #20 | |
392 | orr a2, a2, a4, lsl #8 | |
393 | ldmfd sp!, {a3, a4} | |
394 | strh_dpre a2, v2, lr | |
395 | ||
396 | add a2, a3, v5 | |
397 | clip a2, a2, asr #20 | |
398 | add ip, a4, v6 | |
399 | clip ip, ip, asr #20 | |
400 | orr a2, a2, ip, lsl #8 | |
401 | strh_pre a2, v1, lr | |
402 | sub a3, a3, v5 | |
403 | clip a2, a3, asr #20 | |
404 | sub a4, a4, v6 | |
405 | clip a4, a4, asr #20 | |
406 | orr a2, a2, a4, lsl #8 | |
407 | ldmfd sp!, {a3, a4} | |
408 | strh_dpre a2, v2, lr | |
409 | ||
410 | add a2, a3, v7 | |
411 | clip a2, a2, asr #20 | |
412 | add ip, a4, fp | |
413 | clip ip, ip, asr #20 | |
414 | orr a2, a2, ip, lsl #8 | |
415 | strh a2, [v1, lr] | |
416 | sub a3, a3, v7 | |
417 | clip a2, a3, asr #20 | |
418 | sub a4, a4, fp | |
419 | clip a4, a4, asr #20 | |
420 | orr a2, a2, a4, lsl #8 | |
421 | strh_dpre a2, v2, lr | |
422 | ||
423 | ldr pc, [sp], #4 | |
424 | endfunc | |
425 | ||
426 | function idct_col_add_armv5te | |
427 | str lr, [sp, #-4]! | |
428 | ||
429 | idct_col | |
430 | ||
431 | ldr lr, [sp, #36] | |
432 | ||
433 | ldmfd sp!, {a3, a4} | |
434 | ldrh ip, [lr] | |
435 | add a2, a3, v1 | |
436 | sub a3, a3, v1 | |
437 | and v1, ip, #255 | |
438 | aclip a2, v1, a2, asr #20 | |
439 | add v1, a4, v2 | |
440 | mov v1, v1, asr #20 | |
441 | aclip v1, v1, ip, lsr #8 | |
442 | orr a2, a2, v1, lsl #8 | |
443 | ldr v1, [sp, #32] | |
444 | sub a4, a4, v2 | |
445 | rsb v2, v1, v1, lsl #3 | |
446 | ldrh_pre ip, v2, lr | |
447 | strh a2, [lr] | |
448 | and a2, ip, #255 | |
449 | aclip a3, a2, a3, asr #20 | |
450 | mov a4, a4, asr #20 | |
451 | aclip a4, a4, ip, lsr #8 | |
452 | add a2, lr, #2 | |
453 | str a2, [sp, #28] | |
454 | orr a2, a3, a4, lsl #8 | |
455 | strh a2, [v2] | |
456 | ||
457 | ldmfd sp!, {a3, a4} | |
458 | ldrh_pre ip, lr, v1 | |
459 | sub a2, a3, v3 | |
460 | add a3, a3, v3 | |
461 | and v3, ip, #255 | |
462 | aclip a2, v3, a2, asr #20 | |
463 | sub v3, a4, v4 | |
464 | mov v3, v3, asr #20 | |
465 | aclip v3, v3, ip, lsr #8 | |
466 | orr a2, a2, v3, lsl #8 | |
467 | add a4, a4, v4 | |
468 | ldrh_dpre ip, v2, v1 | |
469 | strh a2, [lr] | |
470 | and a2, ip, #255 | |
471 | aclip a3, a2, a3, asr #20 | |
472 | mov a4, a4, asr #20 | |
473 | aclip a4, a4, ip, lsr #8 | |
474 | orr a2, a3, a4, lsl #8 | |
475 | strh a2, [v2] | |
476 | ||
477 | ldmfd sp!, {a3, a4} | |
478 | ldrh_pre ip, lr, v1 | |
479 | add a2, a3, v5 | |
480 | sub a3, a3, v5 | |
481 | and v3, ip, #255 | |
482 | aclip a2, v3, a2, asr #20 | |
483 | add v3, a4, v6 | |
484 | mov v3, v3, asr #20 | |
485 | aclip v3, v3, ip, lsr #8 | |
486 | orr a2, a2, v3, lsl #8 | |
487 | sub a4, a4, v6 | |
488 | ldrh_dpre ip, v2, v1 | |
489 | strh a2, [lr] | |
490 | and a2, ip, #255 | |
491 | aclip a3, a2, a3, asr #20 | |
492 | mov a4, a4, asr #20 | |
493 | aclip a4, a4, ip, lsr #8 | |
494 | orr a2, a3, a4, lsl #8 | |
495 | strh a2, [v2] | |
496 | ||
497 | ldmfd sp!, {a3, a4} | |
498 | ldrh_pre ip, lr, v1 | |
499 | add a2, a3, v7 | |
500 | sub a3, a3, v7 | |
501 | and v3, ip, #255 | |
502 | aclip a2, v3, a2, asr #20 | |
503 | add v3, a4, fp | |
504 | mov v3, v3, asr #20 | |
505 | aclip v3, v3, ip, lsr #8 | |
506 | orr a2, a2, v3, lsl #8 | |
507 | sub a4, a4, fp | |
508 | ldrh_dpre ip, v2, v1 | |
509 | strh a2, [lr] | |
510 | and a2, ip, #255 | |
511 | aclip a3, a2, a3, asr #20 | |
512 | mov a4, a4, asr #20 | |
513 | aclip a4, a4, ip, lsr #8 | |
514 | orr a2, a3, a4, lsl #8 | |
515 | strh a2, [v2] | |
516 | ||
517 | ldr pc, [sp], #4 | |
518 | endfunc | |
519 | ||
520 | function ff_simple_idct_armv5te, export=1 | |
521 | stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
522 | ||
523 | bl idct_row_armv5te | |
524 | add a1, a1, #16 | |
525 | bl idct_row_armv5te | |
526 | add a1, a1, #16 | |
527 | bl idct_row_armv5te | |
528 | add a1, a1, #16 | |
529 | bl idct_row_armv5te | |
530 | add a1, a1, #16 | |
531 | bl idct_row_armv5te | |
532 | add a1, a1, #16 | |
533 | bl idct_row_armv5te | |
534 | add a1, a1, #16 | |
535 | bl idct_row_armv5te | |
536 | add a1, a1, #16 | |
537 | bl idct_row_armv5te | |
538 | ||
539 | sub a1, a1, #(16*7) | |
540 | ||
541 | bl idct_col_armv5te | |
542 | add a1, a1, #4 | |
543 | bl idct_col_armv5te | |
544 | add a1, a1, #4 | |
545 | bl idct_col_armv5te | |
546 | add a1, a1, #4 | |
547 | bl idct_col_armv5te | |
548 | ||
549 | ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
550 | endfunc | |
551 | ||
552 | function ff_simple_idct_add_armv5te, export=1 | |
553 | stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
554 | ||
555 | mov a1, a3 | |
556 | ||
557 | bl idct_row_armv5te | |
558 | add a1, a1, #16 | |
559 | bl idct_row_armv5te | |
560 | add a1, a1, #16 | |
561 | bl idct_row_armv5te | |
562 | add a1, a1, #16 | |
563 | bl idct_row_armv5te | |
564 | add a1, a1, #16 | |
565 | bl idct_row_armv5te | |
566 | add a1, a1, #16 | |
567 | bl idct_row_armv5te | |
568 | add a1, a1, #16 | |
569 | bl idct_row_armv5te | |
570 | add a1, a1, #16 | |
571 | bl idct_row_armv5te | |
572 | ||
573 | sub a1, a1, #(16*7) | |
574 | ||
575 | bl idct_col_add_armv5te | |
576 | add a1, a1, #4 | |
577 | bl idct_col_add_armv5te | |
578 | add a1, a1, #4 | |
579 | bl idct_col_add_armv5te | |
580 | add a1, a1, #4 | |
581 | bl idct_col_add_armv5te | |
582 | ||
583 | add sp, sp, #8 | |
584 | ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
585 | endfunc | |
586 | ||
587 | function ff_simple_idct_put_armv5te, export=1 | |
588 | stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} | |
589 | ||
590 | mov a1, a3 | |
591 | ||
592 | bl idct_row_armv5te | |
593 | add a1, a1, #16 | |
594 | bl idct_row_armv5te | |
595 | add a1, a1, #16 | |
596 | bl idct_row_armv5te | |
597 | add a1, a1, #16 | |
598 | bl idct_row_armv5te | |
599 | add a1, a1, #16 | |
600 | bl idct_row_armv5te | |
601 | add a1, a1, #16 | |
602 | bl idct_row_armv5te | |
603 | add a1, a1, #16 | |
604 | bl idct_row_armv5te | |
605 | add a1, a1, #16 | |
606 | bl idct_row_armv5te | |
607 | ||
608 | sub a1, a1, #(16*7) | |
609 | ||
610 | bl idct_col_put_armv5te | |
611 | add a1, a1, #4 | |
612 | bl idct_col_put_armv5te | |
613 | add a1, a1, #4 | |
614 | bl idct_col_put_armv5te | |
615 | add a1, a1, #4 | |
616 | bl idct_col_put_armv5te | |
617 | ||
618 | add sp, sp, #8 | |
619 | ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} | |
620 | endfunc |