3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Authors: Darko Laus (darko@mips.com)
30 * Djordje Pesut (djordje@mips.com)
31 * Mirjana Vulin (mvulin@mips.com)
33 * This file is part of FFmpeg.
35 * FFmpeg is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU Lesser General Public
37 * License as published by the Free Software Foundation; either
38 * version 2.1 of the License, or (at your option) any later version.
40 * FFmpeg is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43 * Lesser General Public License for more details.
45 * You should have received a copy of the GNU Lesser General Public
46 * License along with FFmpeg; if not, write to the Free Software
47 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
52 * Reference: libavcodec/aacdec.c
55 #include "libavcodec/aac.h"
56 #include "aacdec_mips.h"
57 #include "libavcodec/aactab.h"
58 #include "libavcodec/sinewin.h"
61 static av_always_inline
int lcg_random(unsigned previous_val
)
63 union { unsigned u
; int s
; } v
= { previous_val
* 1664525u + 1013904223 };
67 static void imdct_and_windowing_mips(AACContext
*ac
, SingleChannelElement
*sce
)
69 IndividualChannelStream
*ics
= &sce
->ics
;
70 float *in
= sce
->coeffs
;
71 float *out
= sce
->ret
;
72 float *saved
= sce
->saved
;
73 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
74 const float *lwindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
75 const float *swindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
76 float *buf
= ac
->buf_mdct
;
79 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
80 for (i
= 0; i
< 1024; i
+= 128)
81 ac
->mdct_small
.imdct_half(&ac
->mdct_small
, buf
+ i
, in
+ i
);
83 ac
->mdct
.imdct_half(&ac
->mdct
, buf
, in
);
86 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
87 * and long to short transitions are considered to be short to short
88 * transitions. This leaves just two cases (long to long and short to short)
89 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
91 if ((ics
->window_sequence
[1] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[1] == LONG_STOP_SEQUENCE
) &&
92 (ics
->window_sequence
[0] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[0] == LONG_START_SEQUENCE
)) {
93 ac
->fdsp
.vector_fmul_window( out
, saved
, buf
, lwindow_prev
, 512);
98 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
101 /* loop unrolled 8 times */
104 ".set noreorder \n\t"
105 "addiu %[loop_end], %[src], 1792 \n\t"
107 "lw %[temp0], 0(%[src]) \n\t"
108 "lw %[temp1], 4(%[src]) \n\t"
109 "lw %[temp2], 8(%[src]) \n\t"
110 "lw %[temp3], 12(%[src]) \n\t"
111 "lw %[temp4], 16(%[src]) \n\t"
112 "lw %[temp5], 20(%[src]) \n\t"
113 "lw %[temp6], 24(%[src]) \n\t"
114 "lw %[temp7], 28(%[src]) \n\t"
115 "addiu %[src], %[src], 32 \n\t"
116 "sw %[temp0], 0(%[dst]) \n\t"
117 "sw %[temp1], 4(%[dst]) \n\t"
118 "sw %[temp2], 8(%[dst]) \n\t"
119 "sw %[temp3], 12(%[dst]) \n\t"
120 "sw %[temp4], 16(%[dst]) \n\t"
121 "sw %[temp5], 20(%[dst]) \n\t"
122 "sw %[temp6], 24(%[dst]) \n\t"
123 "sw %[temp7], 28(%[dst]) \n\t"
124 "bne %[src], %[loop_end], 1b \n\t"
125 " addiu %[dst], %[dst], 32 \n\t"
128 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
129 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
130 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
131 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
132 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
139 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
144 float temp0
, temp1
, temp2
, temp3
;
145 float *dst0
= out
+ 448 + 0*128;
146 float *dst1
= dst0
+ 64 + 63;
147 float *dst2
= saved
+ 63;
148 float *win0
= (float*)swindow
;
149 float *win1
= win0
+ 64 + 63;
150 float *win0_prev
= (float*)swindow_prev
;
151 float *win1_prev
= win0_prev
+ 64 + 63;
152 float *src0_prev
= saved
+ 448;
153 float *src1_prev
= buf
+ 0*128 + 63;
154 float *src0
= buf
+ 0*128 + 64;
155 float *src1
= buf
+ 1*128 + 63;
157 for(i
= 0; i
< 64; i
++)
159 temp0
= src0_prev
[0];
160 temp1
= src1_prev
[0];
165 dst0
[0] = temp0
* wj
- temp1
* wi
;
166 dst1
[0] = temp0
* wi
+ temp1
* wj
;
173 dst0
[128] = temp2
* wj
- temp3
* wi
;
174 dst1
[128] = temp2
* wi
+ temp3
* wj
;
178 dst0
[256] = temp0
* wj
- temp1
* wi
;
179 dst1
[256] = temp0
* wi
+ temp1
* wj
;
180 dst0
[384] = temp2
* wj
- temp3
* wi
;
181 dst1
[384] = temp2
* wi
+ temp3
* wj
;
185 dst0
[512] = temp0
* wj
- temp1
* wi
;
186 dst2
[0] = temp0
* wi
+ temp1
* wj
;
202 ac
->fdsp
.vector_fmul_window(out
+ 448, saved
+ 448, buf
, swindow_prev
, 64);
204 float *buf1
= buf
+ 64;
205 float *buf2
= out
+ 576;
206 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
209 /* loop unrolled 8 times */
212 ".set noreorder \n\t"
213 "addiu %[loop_end], %[src], 1792 \n\t"
215 "lw %[temp0], 0(%[src]) \n\t"
216 "lw %[temp1], 4(%[src]) \n\t"
217 "lw %[temp2], 8(%[src]) \n\t"
218 "lw %[temp3], 12(%[src]) \n\t"
219 "lw %[temp4], 16(%[src]) \n\t"
220 "lw %[temp5], 20(%[src]) \n\t"
221 "lw %[temp6], 24(%[src]) \n\t"
222 "lw %[temp7], 28(%[src]) \n\t"
223 "addiu %[src], %[src], 32 \n\t"
224 "sw %[temp0], 0(%[dst]) \n\t"
225 "sw %[temp1], 4(%[dst]) \n\t"
226 "sw %[temp2], 8(%[dst]) \n\t"
227 "sw %[temp3], 12(%[dst]) \n\t"
228 "sw %[temp4], 16(%[dst]) \n\t"
229 "sw %[temp5], 20(%[dst]) \n\t"
230 "sw %[temp6], 24(%[dst]) \n\t"
231 "sw %[temp7], 28(%[dst]) \n\t"
232 "bne %[src], %[loop_end], 1b \n\t"
233 " addiu %[dst], %[dst], 32 \n\t"
236 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
237 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
238 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
239 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
240 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
250 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
251 ac
->fdsp
.vector_fmul_window(saved
+ 64, buf
+ 4*128 + 64, buf
+ 5*128, swindow
, 64);
252 ac
->fdsp
.vector_fmul_window(saved
+ 192, buf
+ 5*128 + 64, buf
+ 6*128, swindow
, 64);
253 ac
->fdsp
.vector_fmul_window(saved
+ 320, buf
+ 6*128 + 64, buf
+ 7*128, swindow
, 64);
255 float *buf1
= buf
+ 7*128 + 64;
256 float *buf2
= saved
+ 448;
257 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
260 /* loop unrolled 8 times */
263 ".set noreorder \n\t"
264 "addiu %[loop_end], %[src], 256 \n\t"
266 "lw %[temp0], 0(%[src]) \n\t"
267 "lw %[temp1], 4(%[src]) \n\t"
268 "lw %[temp2], 8(%[src]) \n\t"
269 "lw %[temp3], 12(%[src]) \n\t"
270 "lw %[temp4], 16(%[src]) \n\t"
271 "lw %[temp5], 20(%[src]) \n\t"
272 "lw %[temp6], 24(%[src]) \n\t"
273 "lw %[temp7], 28(%[src]) \n\t"
274 "addiu %[src], %[src], 32 \n\t"
275 "sw %[temp0], 0(%[dst]) \n\t"
276 "sw %[temp1], 4(%[dst]) \n\t"
277 "sw %[temp2], 8(%[dst]) \n\t"
278 "sw %[temp3], 12(%[dst]) \n\t"
279 "sw %[temp4], 16(%[dst]) \n\t"
280 "sw %[temp5], 20(%[dst]) \n\t"
281 "sw %[temp6], 24(%[dst]) \n\t"
282 "sw %[temp7], 28(%[dst]) \n\t"
283 "bne %[src], %[loop_end], 1b \n\t"
284 " addiu %[dst], %[dst], 32 \n\t"
287 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
288 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
289 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
290 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
291 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
297 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
298 float *buf1
= buf
+ 512;
300 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
303 /* loop unrolled 8 times */
306 ".set noreorder \n\t"
307 "addiu %[loop_end], %[src], 1792 \n\t"
309 "lw %[temp0], 0(%[src]) \n\t"
310 "lw %[temp1], 4(%[src]) \n\t"
311 "lw %[temp2], 8(%[src]) \n\t"
312 "lw %[temp3], 12(%[src]) \n\t"
313 "lw %[temp4], 16(%[src]) \n\t"
314 "lw %[temp5], 20(%[src]) \n\t"
315 "lw %[temp6], 24(%[src]) \n\t"
316 "lw %[temp7], 28(%[src]) \n\t"
317 "addiu %[src], %[src], 32 \n\t"
318 "sw %[temp0], 0(%[dst]) \n\t"
319 "sw %[temp1], 4(%[dst]) \n\t"
320 "sw %[temp2], 8(%[dst]) \n\t"
321 "sw %[temp3], 12(%[dst]) \n\t"
322 "sw %[temp4], 16(%[dst]) \n\t"
323 "sw %[temp5], 20(%[dst]) \n\t"
324 "sw %[temp6], 24(%[dst]) \n\t"
325 "sw %[temp7], 28(%[dst]) \n\t"
326 "bne %[src], %[loop_end], 1b \n\t"
327 " addiu %[dst], %[dst], 32 \n\t"
330 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
331 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
332 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
333 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
334 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
340 float *buf1
= buf
+ 7*128 + 64;
341 float *buf2
= saved
+ 448;
342 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
345 /* loop unrolled 8 times */
348 ".set noreorder \n\t"
349 "addiu %[loop_end], %[src], 256 \n\t"
351 "lw %[temp0], 0(%[src]) \n\t"
352 "lw %[temp1], 4(%[src]) \n\t"
353 "lw %[temp2], 8(%[src]) \n\t"
354 "lw %[temp3], 12(%[src]) \n\t"
355 "lw %[temp4], 16(%[src]) \n\t"
356 "lw %[temp5], 20(%[src]) \n\t"
357 "lw %[temp6], 24(%[src]) \n\t"
358 "lw %[temp7], 28(%[src]) \n\t"
359 "addiu %[src], %[src], 32 \n\t"
360 "sw %[temp0], 0(%[dst]) \n\t"
361 "sw %[temp1], 4(%[dst]) \n\t"
362 "sw %[temp2], 8(%[dst]) \n\t"
363 "sw %[temp3], 12(%[dst]) \n\t"
364 "sw %[temp4], 16(%[dst]) \n\t"
365 "sw %[temp5], 20(%[dst]) \n\t"
366 "sw %[temp6], 24(%[dst]) \n\t"
367 "sw %[temp7], 28(%[dst]) \n\t"
368 "bne %[src], %[loop_end], 1b \n\t"
369 " addiu %[dst], %[dst], 32 \n\t"
372 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
373 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
374 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
375 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
376 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
382 } else { // LONG_STOP or ONLY_LONG
383 float *buf1
= buf
+ 512;
385 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
388 /* loop unrolled 8 times */
391 ".set noreorder \n\t"
392 "addiu %[loop_end], %[src], 2048 \n\t"
394 "lw %[temp0], 0(%[src]) \n\t"
395 "lw %[temp1], 4(%[src]) \n\t"
396 "lw %[temp2], 8(%[src]) \n\t"
397 "lw %[temp3], 12(%[src]) \n\t"
398 "lw %[temp4], 16(%[src]) \n\t"
399 "lw %[temp5], 20(%[src]) \n\t"
400 "lw %[temp6], 24(%[src]) \n\t"
401 "lw %[temp7], 28(%[src]) \n\t"
402 "addiu %[src], %[src], 32 \n\t"
403 "sw %[temp0], 0(%[dst]) \n\t"
404 "sw %[temp1], 4(%[dst]) \n\t"
405 "sw %[temp2], 8(%[dst]) \n\t"
406 "sw %[temp3], 12(%[dst]) \n\t"
407 "sw %[temp4], 16(%[dst]) \n\t"
408 "sw %[temp5], 20(%[dst]) \n\t"
409 "sw %[temp6], 24(%[dst]) \n\t"
410 "sw %[temp7], 28(%[dst]) \n\t"
411 "bne %[src], %[loop_end], 1b \n\t"
412 " addiu %[dst], %[dst], 32 \n\t"
415 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
416 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
417 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
418 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
419 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf1
),
427 static void apply_ltp_mips(AACContext
*ac
, SingleChannelElement
*sce
)
429 const LongTermPrediction
*ltp
= &sce
->ics
.ltp
;
430 const uint16_t *offsets
= sce
->ics
.swb_offset
;
434 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
435 float *predTime
= sce
->ret
;
436 float *predFreq
= ac
->buf_mdct
;
438 int16_t num_samples
= 2048;
441 num_samples
= ltp
->lag
+ 1024;
442 j
= (2048 - num_samples
) >> 2;
443 k
= (2048 - num_samples
) & 3;
444 p_predTime
= &predTime
[num_samples
];
446 for (i
= 0; i
< num_samples
; i
++)
447 predTime
[i
] = sce
->ltp_state
[i
+ 2048 - ltp
->lag
] * ltp
->coef
;
448 for (i
= 0; i
< j
; i
++) {
450 /* loop unrolled 4 times */
452 "sw $0, 0(%[p_predTime]) \n\t"
453 "sw $0, 4(%[p_predTime]) \n\t"
454 "sw $0, 8(%[p_predTime]) \n\t"
455 "sw $0, 12(%[p_predTime]) \n\t"
456 "addiu %[p_predTime], %[p_predTime], 16 \n\t"
458 : [p_predTime
]"+r"(p_predTime
)
463 for (i
= 0; i
< k
; i
++) {
466 "sw $0, 0(%[p_predTime]) \n\t"
467 "addiu %[p_predTime], %[p_predTime], 4 \n\t"
469 : [p_predTime
]"+r"(p_predTime
)
475 ac
->windowing_and_mdct_ltp(ac
, predFreq
, predTime
, &sce
->ics
);
477 if (sce
->tns
.present
)
478 ac
->apply_tns(predFreq
, &sce
->tns
, &sce
->ics
, 0);
480 for (sfb
= 0; sfb
< FFMIN(sce
->ics
.max_sfb
, MAX_LTP_LONG_SFB
); sfb
++)
482 for (i
= offsets
[sfb
]; i
< offsets
[sfb
+ 1]; i
++)
483 sce
->coeffs
[i
] += predFreq
[i
];
488 static void update_ltp_mips(AACContext
*ac
, SingleChannelElement
*sce
)
490 IndividualChannelStream
*ics
= &sce
->ics
;
491 float *saved
= sce
->saved
;
492 float *saved_ltp
= sce
->coeffs
;
493 const float *lwindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
494 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
496 int loop_end
, loop_end1
, loop_end2
;
497 float temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
, temp8
, temp9
, temp10
, temp11
;
499 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
501 float *buf0
= saved_ltp
;
502 float *p_saved_ltp
= saved_ltp
+ 576;
503 float *ptr1
= &saved_ltp
[512];
504 float *ptr2
= &ac
->buf_mdct
[1023];
505 float *ptr3
= (float*)&swindow
[63];
506 loop_end1
= (int)(p_saved_ltp
+ 448);
508 /* loop unrolled 8 times */
511 ".set noreorder \n\t"
512 "addiu %[loop_end], %[src], 2048 \n\t"
514 "lw %[temp0], 0(%[src]) \n\t"
515 "lw %[temp1], 4(%[src]) \n\t"
516 "lw %[temp2], 8(%[src]) \n\t"
517 "lw %[temp3], 12(%[src]) \n\t"
518 "lw %[temp4], 16(%[src]) \n\t"
519 "lw %[temp5], 20(%[src]) \n\t"
520 "lw %[temp6], 24(%[src]) \n\t"
521 "lw %[temp7], 28(%[src]) \n\t"
522 "addiu %[src], %[src], 32 \n\t"
523 "sw %[temp0], 0(%[dst]) \n\t"
524 "sw %[temp1], 4(%[dst]) \n\t"
525 "sw %[temp2], 8(%[dst]) \n\t"
526 "sw %[temp3], 12(%[dst]) \n\t"
527 "sw %[temp4], 16(%[dst]) \n\t"
528 "sw %[temp5], 20(%[dst]) \n\t"
529 "sw %[temp6], 24(%[dst]) \n\t"
530 "sw %[temp7], 28(%[dst]) \n\t"
531 "bne %[src], %[loop_end], 1b \n\t"
532 " addiu %[dst], %[dst], 32 \n\t"
535 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
536 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
537 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
538 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
539 [loop_end
]"=&r"(loop_end
), [src
]"+r"(buf
),
545 /* loop unrolled 8 times */
548 "sw $0, 0(%[p_saved_ltp]) \n\t"
549 "sw $0, 4(%[p_saved_ltp]) \n\t"
550 "sw $0, 8(%[p_saved_ltp]) \n\t"
551 "sw $0, 12(%[p_saved_ltp]) \n\t"
552 "sw $0, 16(%[p_saved_ltp]) \n\t"
553 "sw $0, 20(%[p_saved_ltp]) \n\t"
554 "sw $0, 24(%[p_saved_ltp]) \n\t"
555 "sw $0, 28(%[p_saved_ltp]) \n\t"
556 "addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
557 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
559 : [p_saved_ltp
]"+r"(p_saved_ltp
)
560 : [loop_end1
]"r"(loop_end1
)
564 ac
->fdsp
.vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
565 for (i
= 0; i
< 16; i
++){
566 /* loop unrolled 4 times */
568 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
569 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
570 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
571 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
572 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
573 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
574 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
575 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
576 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
577 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
578 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
579 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
580 "swc1 %[temp8], 0(%[ptr1]) \n\t"
581 "swc1 %[temp9], 4(%[ptr1]) \n\t"
582 "swc1 %[temp10], 8(%[ptr1]) \n\t"
583 "swc1 %[temp11], 12(%[ptr1]) \n\t"
584 "addiu %[ptr1], %[ptr1], 16 \n\t"
585 "addiu %[ptr2], %[ptr2], -16 \n\t"
586 "addiu %[ptr3], %[ptr3], -16 \n\t"
588 : [temp0
]"=&f"(temp0
), [temp1
]"=&f"(temp1
),
589 [temp2
]"=&f"(temp2
), [temp3
]"=&f"(temp3
),
590 [temp4
]"=&f"(temp4
), [temp5
]"=&f"(temp5
),
591 [temp6
]"=&f"(temp6
), [temp7
]"=&f"(temp7
),
592 [temp8
]"=&f"(temp8
), [temp9
]"=&f"(temp9
),
593 [temp10
]"=&f"(temp10
), [temp11
]"=&f"(temp11
),
594 [ptr1
]"+r"(ptr1
), [ptr2
]"+r"(ptr2
), [ptr3
]"+r"(ptr3
)
599 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
600 float *buff0
= saved
;
601 float *buff1
= saved_ltp
;
602 float *ptr1
= &saved_ltp
[512];
603 float *ptr2
= &ac
->buf_mdct
[1023];
604 float *ptr3
= (float*)&swindow
[63];
605 loop_end
= (int)(saved
+ 448);
607 /* loop unrolled 8 times */
610 ".set noreorder \n\t"
612 "lw %[temp0], 0(%[src]) \n\t"
613 "lw %[temp1], 4(%[src]) \n\t"
614 "lw %[temp2], 8(%[src]) \n\t"
615 "lw %[temp3], 12(%[src]) \n\t"
616 "lw %[temp4], 16(%[src]) \n\t"
617 "lw %[temp5], 20(%[src]) \n\t"
618 "lw %[temp6], 24(%[src]) \n\t"
619 "lw %[temp7], 28(%[src]) \n\t"
620 "addiu %[src], %[src], 32 \n\t"
621 "sw %[temp0], 0(%[dst]) \n\t"
622 "sw %[temp1], 4(%[dst]) \n\t"
623 "sw %[temp2], 8(%[dst]) \n\t"
624 "sw %[temp3], 12(%[dst]) \n\t"
625 "sw %[temp4], 16(%[dst]) \n\t"
626 "sw %[temp5], 20(%[dst]) \n\t"
627 "sw %[temp6], 24(%[dst]) \n\t"
628 "sw %[temp7], 28(%[dst]) \n\t"
629 "sw $0, 2304(%[dst]) \n\t"
630 "sw $0, 2308(%[dst]) \n\t"
631 "sw $0, 2312(%[dst]) \n\t"
632 "sw $0, 2316(%[dst]) \n\t"
633 "sw $0, 2320(%[dst]) \n\t"
634 "sw $0, 2324(%[dst]) \n\t"
635 "sw $0, 2328(%[dst]) \n\t"
636 "sw $0, 2332(%[dst]) \n\t"
637 "bne %[src], %[loop_end], 1b \n\t"
638 " addiu %[dst], %[dst], 32 \n\t"
641 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
642 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
643 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
644 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
645 [src
]"+r"(buff0
), [dst
]"+r"(buff1
)
646 : [loop_end
]"r"(loop_end
)
649 ac
->fdsp
.vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
650 for (i
= 0; i
< 16; i
++){
651 /* loop unrolled 8 times */
653 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
654 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
655 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
656 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
657 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
658 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
659 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
660 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
661 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
662 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
663 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
664 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
665 "swc1 %[temp8], 0(%[ptr1]) \n\t"
666 "swc1 %[temp9], 4(%[ptr1]) \n\t"
667 "swc1 %[temp10], 8(%[ptr1]) \n\t"
668 "swc1 %[temp11], 12(%[ptr1]) \n\t"
669 "addiu %[ptr1], %[ptr1], 16 \n\t"
670 "addiu %[ptr2], %[ptr2], -16 \n\t"
671 "addiu %[ptr3], %[ptr3], -16 \n\t"
673 : [temp0
]"=&f"(temp0
), [temp1
]"=&f"(temp1
),
674 [temp2
]"=&f"(temp2
), [temp3
]"=&f"(temp3
),
675 [temp4
]"=&f"(temp4
), [temp5
]"=&f"(temp5
),
676 [temp6
]"=&f"(temp6
), [temp7
]"=&f"(temp7
),
677 [temp8
]"=&f"(temp8
), [temp9
]"=&f"(temp9
),
678 [temp10
]"=&f"(temp10
), [temp11
]"=&f"(temp11
),
679 [ptr1
]"+r"(ptr1
), [ptr2
]"+r"(ptr2
), [ptr3
]"+r"(ptr3
)
684 } else { // LONG_STOP or ONLY_LONG
685 float *ptr1
, *ptr2
, *ptr3
;
686 ac
->fdsp
.vector_fmul_reverse(saved_ltp
, ac
->buf_mdct
+ 512, &lwindow
[512], 512);
688 ptr1
= &saved_ltp
[512];
689 ptr2
= &ac
->buf_mdct
[1023];
690 ptr3
= (float*)&lwindow
[511];
692 for (i
= 0; i
< 512; i
+=4){
693 /* loop unrolled 4 times */
695 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
696 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
697 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
698 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
699 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
700 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
701 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
702 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
703 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
704 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
705 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
706 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
707 "swc1 %[temp8], 0(%[ptr1]) \n\t"
708 "swc1 %[temp9], 4(%[ptr1]) \n\t"
709 "swc1 %[temp10], 8(%[ptr1]) \n\t"
710 "swc1 %[temp11], 12(%[ptr1]) \n\t"
711 "addiu %[ptr1], %[ptr1], 16 \n\t"
712 "addiu %[ptr2], %[ptr2], -16 \n\t"
713 "addiu %[ptr3], %[ptr3], -16 \n\t"
715 : [temp0
]"=&f"(temp0
), [temp1
]"=&f"(temp1
),
716 [temp2
]"=&f"(temp2
), [temp3
]"=&f"(temp3
),
717 [temp4
]"=&f"(temp4
), [temp5
]"=&f"(temp5
),
718 [temp6
]"=&f"(temp6
), [temp7
]"=&f"(temp7
),
719 [temp8
]"=&f"(temp8
), [temp9
]"=&f"(temp9
),
720 [temp10
]"=&f"(temp10
), [temp11
]"=&f"(temp11
),
721 [ptr1
]"+r"(ptr1
), [ptr2
]"+r"(ptr2
),
730 float *buf1
= sce
->ltp_state
+1024;
731 float *buf2
= sce
->ltp_state
;
732 float *buf3
= sce
->ret
;
733 float *buf4
= sce
->ltp_state
+1024;
734 float *buf5
= saved_ltp
;
735 float *buf6
= sce
->ltp_state
+2048;
737 /* loops unrolled 8 times */
740 ".set noreorder \n\t"
741 "addiu %[loop_end], %[src], 4096 \n\t"
742 "addiu %[loop_end1], %[src1], 4096 \n\t"
743 "addiu %[loop_end2], %[src2], 4096 \n\t"
745 "lw %[temp0], 0(%[src]) \n\t"
746 "lw %[temp1], 4(%[src]) \n\t"
747 "lw %[temp2], 8(%[src]) \n\t"
748 "lw %[temp3], 12(%[src]) \n\t"
749 "lw %[temp4], 16(%[src]) \n\t"
750 "lw %[temp5], 20(%[src]) \n\t"
751 "lw %[temp6], 24(%[src]) \n\t"
752 "lw %[temp7], 28(%[src]) \n\t"
753 "addiu %[src], %[src], 32 \n\t"
754 "sw %[temp0], 0(%[dst]) \n\t"
755 "sw %[temp1], 4(%[dst]) \n\t"
756 "sw %[temp2], 8(%[dst]) \n\t"
757 "sw %[temp3], 12(%[dst]) \n\t"
758 "sw %[temp4], 16(%[dst]) \n\t"
759 "sw %[temp5], 20(%[dst]) \n\t"
760 "sw %[temp6], 24(%[dst]) \n\t"
761 "sw %[temp7], 28(%[dst]) \n\t"
762 "bne %[src], %[loop_end], 1b \n\t"
763 " addiu %[dst], %[dst], 32 \n\t"
765 "lw %[temp0], 0(%[src1]) \n\t"
766 "lw %[temp1], 4(%[src1]) \n\t"
767 "lw %[temp2], 8(%[src1]) \n\t"
768 "lw %[temp3], 12(%[src1]) \n\t"
769 "lw %[temp4], 16(%[src1]) \n\t"
770 "lw %[temp5], 20(%[src1]) \n\t"
771 "lw %[temp6], 24(%[src1]) \n\t"
772 "lw %[temp7], 28(%[src1]) \n\t"
773 "addiu %[src1], %[src1], 32 \n\t"
774 "sw %[temp0], 0(%[dst1]) \n\t"
775 "sw %[temp1], 4(%[dst1]) \n\t"
776 "sw %[temp2], 8(%[dst1]) \n\t"
777 "sw %[temp3], 12(%[dst1]) \n\t"
778 "sw %[temp4], 16(%[dst1]) \n\t"
779 "sw %[temp5], 20(%[dst1]) \n\t"
780 "sw %[temp6], 24(%[dst1]) \n\t"
781 "sw %[temp7], 28(%[dst1]) \n\t"
782 "bne %[src1], %[loop_end1], 2b \n\t"
783 " addiu %[dst1], %[dst1], 32 \n\t"
785 "lw %[temp0], 0(%[src2]) \n\t"
786 "lw %[temp1], 4(%[src2]) \n\t"
787 "lw %[temp2], 8(%[src2]) \n\t"
788 "lw %[temp3], 12(%[src2]) \n\t"
789 "lw %[temp4], 16(%[src2]) \n\t"
790 "lw %[temp5], 20(%[src2]) \n\t"
791 "lw %[temp6], 24(%[src2]) \n\t"
792 "lw %[temp7], 28(%[src2]) \n\t"
793 "addiu %[src2], %[src2], 32 \n\t"
794 "sw %[temp0], 0(%[dst2]) \n\t"
795 "sw %[temp1], 4(%[dst2]) \n\t"
796 "sw %[temp2], 8(%[dst2]) \n\t"
797 "sw %[temp3], 12(%[dst2]) \n\t"
798 "sw %[temp4], 16(%[dst2]) \n\t"
799 "sw %[temp5], 20(%[dst2]) \n\t"
800 "sw %[temp6], 24(%[dst2]) \n\t"
801 "sw %[temp7], 28(%[dst2]) \n\t"
802 "bne %[src2], %[loop_end2], 3b \n\t"
803 " addiu %[dst2], %[dst2], 32 \n\t"
806 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
807 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
808 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
809 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
810 [loop_end
]"=&r"(loop_end
), [loop_end1
]"=&r"(loop_end1
),
811 [loop_end2
]"=&r"(loop_end2
), [src
]"+r"(buf1
),
812 [dst
]"+r"(buf2
), [src1
]"+r"(buf3
), [dst1
]"+r"(buf4
),
813 [src2
]"+r"(buf5
), [dst2
]"+r"(buf6
)
819 #endif /* HAVE_MIPSFPU */
820 #endif /* HAVE_INLINE_ASM */
822 void ff_aacdec_init_mips(AACContext
*c
)
825 c
->imdct_and_windowing
= imdct_and_windowing_mips
;
826 c
->apply_ltp
= apply_ltp_mips
;
828 c
->update_ltp
= update_ltp_mips
;
829 #endif /* HAVE_MIPSFPU */
830 #endif /* HAVE_INLINE_ASM */