Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / mips / aacdec_mips.c
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2012
3 * MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Authors: Darko Laus (darko@mips.com)
30 * Djordje Pesut (djordje@mips.com)
31 * Mirjana Vulin (mvulin@mips.com)
32 *
33 * This file is part of FFmpeg.
34 *
35 * FFmpeg is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU Lesser General Public
37 * License as published by the Free Software Foundation; either
38 * version 2.1 of the License, or (at your option) any later version.
39 *
40 * FFmpeg is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43 * Lesser General Public License for more details.
44 *
45 * You should have received a copy of the GNU Lesser General Public
46 * License along with FFmpeg; if not, write to the Free Software
47 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48 */
49
50/**
51 * @file
52 * Reference: libavcodec/aacdec.c
53 */
54
55#include "libavcodec/aac.h"
56#include "aacdec_mips.h"
57#include "libavcodec/aactab.h"
58#include "libavcodec/sinewin.h"
59
60#if HAVE_INLINE_ASM
61static av_always_inline int lcg_random(unsigned previous_val)
62{
63 union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
64 return v.s;
65}
66
67static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
68{
69 IndividualChannelStream *ics = &sce->ics;
70 float *in = sce->coeffs;
71 float *out = sce->ret;
72 float *saved = sce->saved;
73 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
74 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
75 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
76 float *buf = ac->buf_mdct;
77 int i;
78
79 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
80 for (i = 0; i < 1024; i += 128)
81 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
82 } else
83 ac->mdct.imdct_half(&ac->mdct, buf, in);
84
85 /* window overlapping
86 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
87 * and long to short transitions are considered to be short to short
88 * transitions. This leaves just two cases (long to long and short to short)
89 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
90 */
91 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
92 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
93 ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
94 } else {
95 {
96 float *buf1 = saved;
97 float *buf2 = out;
98 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
99 int loop_end;
100
101 /* loop unrolled 8 times */
102 __asm__ volatile (
103 ".set push \n\t"
104 ".set noreorder \n\t"
105 "addiu %[loop_end], %[src], 1792 \n\t"
106 "1: \n\t"
107 "lw %[temp0], 0(%[src]) \n\t"
108 "lw %[temp1], 4(%[src]) \n\t"
109 "lw %[temp2], 8(%[src]) \n\t"
110 "lw %[temp3], 12(%[src]) \n\t"
111 "lw %[temp4], 16(%[src]) \n\t"
112 "lw %[temp5], 20(%[src]) \n\t"
113 "lw %[temp6], 24(%[src]) \n\t"
114 "lw %[temp7], 28(%[src]) \n\t"
115 "addiu %[src], %[src], 32 \n\t"
116 "sw %[temp0], 0(%[dst]) \n\t"
117 "sw %[temp1], 4(%[dst]) \n\t"
118 "sw %[temp2], 8(%[dst]) \n\t"
119 "sw %[temp3], 12(%[dst]) \n\t"
120 "sw %[temp4], 16(%[dst]) \n\t"
121 "sw %[temp5], 20(%[dst]) \n\t"
122 "sw %[temp6], 24(%[dst]) \n\t"
123 "sw %[temp7], 28(%[dst]) \n\t"
124 "bne %[src], %[loop_end], 1b \n\t"
125 " addiu %[dst], %[dst], 32 \n\t"
126 ".set pop \n\t"
127
128 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
129 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
130 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
131 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
132 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
133 [dst]"+r"(buf2)
134 :
135 : "memory"
136 );
137 }
138
139 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
140 {
141 float wi;
142 float wj;
143 int i;
144 float temp0, temp1, temp2, temp3;
145 float *dst0 = out + 448 + 0*128;
146 float *dst1 = dst0 + 64 + 63;
147 float *dst2 = saved + 63;
148 float *win0 = (float*)swindow;
149 float *win1 = win0 + 64 + 63;
150 float *win0_prev = (float*)swindow_prev;
151 float *win1_prev = win0_prev + 64 + 63;
152 float *src0_prev = saved + 448;
153 float *src1_prev = buf + 0*128 + 63;
154 float *src0 = buf + 0*128 + 64;
155 float *src1 = buf + 1*128 + 63;
156
157 for(i = 0; i < 64; i++)
158 {
159 temp0 = src0_prev[0];
160 temp1 = src1_prev[0];
161 wi = *win0_prev;
162 wj = *win1_prev;
163 temp2 = src0[0];
164 temp3 = src1[0];
165 dst0[0] = temp0 * wj - temp1 * wi;
166 dst1[0] = temp0 * wi + temp1 * wj;
167
168 wi = *win0;
169 wj = *win1;
170
171 temp0 = src0[128];
172 temp1 = src1[128];
173 dst0[128] = temp2 * wj - temp3 * wi;
174 dst1[128] = temp2 * wi + temp3 * wj;
175
176 temp2 = src0[256];
177 temp3 = src1[256];
178 dst0[256] = temp0 * wj - temp1 * wi;
179 dst1[256] = temp0 * wi + temp1 * wj;
180 dst0[384] = temp2 * wj - temp3 * wi;
181 dst1[384] = temp2 * wi + temp3 * wj;
182
183 temp0 = src0[384];
184 temp1 = src1[384];
185 dst0[512] = temp0 * wj - temp1 * wi;
186 dst2[0] = temp0 * wi + temp1 * wj;
187
188 src0++;
189 src1--;
190 src0_prev++;
191 src1_prev--;
192 win0++;
193 win1--;
194 win0_prev++;
195 win1_prev--;
196 dst0++;
197 dst1--;
198 dst2--;
199 }
200 }
201 } else {
202 ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
203 {
204 float *buf1 = buf + 64;
205 float *buf2 = out + 576;
206 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
207 int loop_end;
208
209 /* loop unrolled 8 times */
210 __asm__ volatile (
211 ".set push \n\t"
212 ".set noreorder \n\t"
213 "addiu %[loop_end], %[src], 1792 \n\t"
214 "1: \n\t"
215 "lw %[temp0], 0(%[src]) \n\t"
216 "lw %[temp1], 4(%[src]) \n\t"
217 "lw %[temp2], 8(%[src]) \n\t"
218 "lw %[temp3], 12(%[src]) \n\t"
219 "lw %[temp4], 16(%[src]) \n\t"
220 "lw %[temp5], 20(%[src]) \n\t"
221 "lw %[temp6], 24(%[src]) \n\t"
222 "lw %[temp7], 28(%[src]) \n\t"
223 "addiu %[src], %[src], 32 \n\t"
224 "sw %[temp0], 0(%[dst]) \n\t"
225 "sw %[temp1], 4(%[dst]) \n\t"
226 "sw %[temp2], 8(%[dst]) \n\t"
227 "sw %[temp3], 12(%[dst]) \n\t"
228 "sw %[temp4], 16(%[dst]) \n\t"
229 "sw %[temp5], 20(%[dst]) \n\t"
230 "sw %[temp6], 24(%[dst]) \n\t"
231 "sw %[temp7], 28(%[dst]) \n\t"
232 "bne %[src], %[loop_end], 1b \n\t"
233 " addiu %[dst], %[dst], 32 \n\t"
234 ".set pop \n\t"
235
236 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
237 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
238 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
239 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
240 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
241 [dst]"+r"(buf2)
242 :
243 : "memory"
244 );
245 }
246 }
247 }
248
249 // buffer update
250 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
251 ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
252 ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
253 ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
254 {
255 float *buf1 = buf + 7*128 + 64;
256 float *buf2 = saved + 448;
257 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
258 int loop_end;
259
260 /* loop unrolled 8 times */
261 __asm__ volatile (
262 ".set push \n\t"
263 ".set noreorder \n\t"
264 "addiu %[loop_end], %[src], 256 \n\t"
265 "1: \n\t"
266 "lw %[temp0], 0(%[src]) \n\t"
267 "lw %[temp1], 4(%[src]) \n\t"
268 "lw %[temp2], 8(%[src]) \n\t"
269 "lw %[temp3], 12(%[src]) \n\t"
270 "lw %[temp4], 16(%[src]) \n\t"
271 "lw %[temp5], 20(%[src]) \n\t"
272 "lw %[temp6], 24(%[src]) \n\t"
273 "lw %[temp7], 28(%[src]) \n\t"
274 "addiu %[src], %[src], 32 \n\t"
275 "sw %[temp0], 0(%[dst]) \n\t"
276 "sw %[temp1], 4(%[dst]) \n\t"
277 "sw %[temp2], 8(%[dst]) \n\t"
278 "sw %[temp3], 12(%[dst]) \n\t"
279 "sw %[temp4], 16(%[dst]) \n\t"
280 "sw %[temp5], 20(%[dst]) \n\t"
281 "sw %[temp6], 24(%[dst]) \n\t"
282 "sw %[temp7], 28(%[dst]) \n\t"
283 "bne %[src], %[loop_end], 1b \n\t"
284 " addiu %[dst], %[dst], 32 \n\t"
285 ".set pop \n\t"
286
287 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
288 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
289 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
290 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
291 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
292 [dst]"+r"(buf2)
293 :
294 : "memory"
295 );
296 }
297 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
298 float *buf1 = buf + 512;
299 float *buf2 = saved;
300 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
301 int loop_end;
302
303 /* loop unrolled 8 times */
304 __asm__ volatile (
305 ".set push \n\t"
306 ".set noreorder \n\t"
307 "addiu %[loop_end], %[src], 1792 \n\t"
308 "1: \n\t"
309 "lw %[temp0], 0(%[src]) \n\t"
310 "lw %[temp1], 4(%[src]) \n\t"
311 "lw %[temp2], 8(%[src]) \n\t"
312 "lw %[temp3], 12(%[src]) \n\t"
313 "lw %[temp4], 16(%[src]) \n\t"
314 "lw %[temp5], 20(%[src]) \n\t"
315 "lw %[temp6], 24(%[src]) \n\t"
316 "lw %[temp7], 28(%[src]) \n\t"
317 "addiu %[src], %[src], 32 \n\t"
318 "sw %[temp0], 0(%[dst]) \n\t"
319 "sw %[temp1], 4(%[dst]) \n\t"
320 "sw %[temp2], 8(%[dst]) \n\t"
321 "sw %[temp3], 12(%[dst]) \n\t"
322 "sw %[temp4], 16(%[dst]) \n\t"
323 "sw %[temp5], 20(%[dst]) \n\t"
324 "sw %[temp6], 24(%[dst]) \n\t"
325 "sw %[temp7], 28(%[dst]) \n\t"
326 "bne %[src], %[loop_end], 1b \n\t"
327 " addiu %[dst], %[dst], 32 \n\t"
328 ".set pop \n\t"
329
330 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
331 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
332 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
333 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
334 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
335 [dst]"+r"(buf2)
336 :
337 : "memory"
338 );
339 {
340 float *buf1 = buf + 7*128 + 64;
341 float *buf2 = saved + 448;
342 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
343 int loop_end;
344
345 /* loop unrolled 8 times */
346 __asm__ volatile (
347 ".set push \n\t"
348 ".set noreorder \n\t"
349 "addiu %[loop_end], %[src], 256 \n\t"
350 "1: \n\t"
351 "lw %[temp0], 0(%[src]) \n\t"
352 "lw %[temp1], 4(%[src]) \n\t"
353 "lw %[temp2], 8(%[src]) \n\t"
354 "lw %[temp3], 12(%[src]) \n\t"
355 "lw %[temp4], 16(%[src]) \n\t"
356 "lw %[temp5], 20(%[src]) \n\t"
357 "lw %[temp6], 24(%[src]) \n\t"
358 "lw %[temp7], 28(%[src]) \n\t"
359 "addiu %[src], %[src], 32 \n\t"
360 "sw %[temp0], 0(%[dst]) \n\t"
361 "sw %[temp1], 4(%[dst]) \n\t"
362 "sw %[temp2], 8(%[dst]) \n\t"
363 "sw %[temp3], 12(%[dst]) \n\t"
364 "sw %[temp4], 16(%[dst]) \n\t"
365 "sw %[temp5], 20(%[dst]) \n\t"
366 "sw %[temp6], 24(%[dst]) \n\t"
367 "sw %[temp7], 28(%[dst]) \n\t"
368 "bne %[src], %[loop_end], 1b \n\t"
369 " addiu %[dst], %[dst], 32 \n\t"
370 ".set pop \n\t"
371
372 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
373 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
374 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
375 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
376 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
377 [dst]"+r"(buf2)
378 :
379 : "memory"
380 );
381 }
382 } else { // LONG_STOP or ONLY_LONG
383 float *buf1 = buf + 512;
384 float *buf2 = saved;
385 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
386 int loop_end;
387
388 /* loop unrolled 8 times */
389 __asm__ volatile (
390 ".set push \n\t"
391 ".set noreorder \n\t"
392 "addiu %[loop_end], %[src], 2048 \n\t"
393 "1: \n\t"
394 "lw %[temp0], 0(%[src]) \n\t"
395 "lw %[temp1], 4(%[src]) \n\t"
396 "lw %[temp2], 8(%[src]) \n\t"
397 "lw %[temp3], 12(%[src]) \n\t"
398 "lw %[temp4], 16(%[src]) \n\t"
399 "lw %[temp5], 20(%[src]) \n\t"
400 "lw %[temp6], 24(%[src]) \n\t"
401 "lw %[temp7], 28(%[src]) \n\t"
402 "addiu %[src], %[src], 32 \n\t"
403 "sw %[temp0], 0(%[dst]) \n\t"
404 "sw %[temp1], 4(%[dst]) \n\t"
405 "sw %[temp2], 8(%[dst]) \n\t"
406 "sw %[temp3], 12(%[dst]) \n\t"
407 "sw %[temp4], 16(%[dst]) \n\t"
408 "sw %[temp5], 20(%[dst]) \n\t"
409 "sw %[temp6], 24(%[dst]) \n\t"
410 "sw %[temp7], 28(%[dst]) \n\t"
411 "bne %[src], %[loop_end], 1b \n\t"
412 " addiu %[dst], %[dst], 32 \n\t"
413 ".set pop \n\t"
414
415 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
416 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
417 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
418 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
419 [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
420 [dst]"+r"(buf2)
421 :
422 : "memory"
423 );
424 }
425}
426
427static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
428{
429 const LongTermPrediction *ltp = &sce->ics.ltp;
430 const uint16_t *offsets = sce->ics.swb_offset;
431 int i, sfb;
432 int j, k;
433
434 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
435 float *predTime = sce->ret;
436 float *predFreq = ac->buf_mdct;
437 float *p_predTime;
438 int16_t num_samples = 2048;
439
440 if (ltp->lag < 1024)
441 num_samples = ltp->lag + 1024;
442 j = (2048 - num_samples) >> 2;
443 k = (2048 - num_samples) & 3;
444 p_predTime = &predTime[num_samples];
445
446 for (i = 0; i < num_samples; i++)
447 predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
448 for (i = 0; i < j; i++) {
449
450 /* loop unrolled 4 times */
451 __asm__ volatile (
452 "sw $0, 0(%[p_predTime]) \n\t"
453 "sw $0, 4(%[p_predTime]) \n\t"
454 "sw $0, 8(%[p_predTime]) \n\t"
455 "sw $0, 12(%[p_predTime]) \n\t"
456 "addiu %[p_predTime], %[p_predTime], 16 \n\t"
457
458 : [p_predTime]"+r"(p_predTime)
459 :
460 : "memory"
461 );
462 }
463 for (i = 0; i < k; i++) {
464
465 __asm__ volatile (
466 "sw $0, 0(%[p_predTime]) \n\t"
467 "addiu %[p_predTime], %[p_predTime], 4 \n\t"
468
469 : [p_predTime]"+r"(p_predTime)
470 :
471 : "memory"
472 );
473 }
474
475 ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
476
477 if (sce->tns.present)
478 ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
479
480 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
481 if (ltp->used[sfb])
482 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
483 sce->coeffs[i] += predFreq[i];
484 }
485}
486
487#if HAVE_MIPSFPU
488static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
489{
490 IndividualChannelStream *ics = &sce->ics;
491 float *saved = sce->saved;
492 float *saved_ltp = sce->coeffs;
493 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
494 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
495 int i;
496 int loop_end, loop_end1, loop_end2;
497 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
498
499 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
500 float *buf = saved;
501 float *buf0 = saved_ltp;
502 float *p_saved_ltp = saved_ltp + 576;
503 float *ptr1 = &saved_ltp[512];
504 float *ptr2 = &ac->buf_mdct[1023];
505 float *ptr3 = (float*)&swindow[63];
506 loop_end1 = (int)(p_saved_ltp + 448);
507
508 /* loop unrolled 8 times */
509 __asm__ volatile (
510 ".set push \n\t"
511 ".set noreorder \n\t"
512 "addiu %[loop_end], %[src], 2048 \n\t"
513 "1: \n\t"
514 "lw %[temp0], 0(%[src]) \n\t"
515 "lw %[temp1], 4(%[src]) \n\t"
516 "lw %[temp2], 8(%[src]) \n\t"
517 "lw %[temp3], 12(%[src]) \n\t"
518 "lw %[temp4], 16(%[src]) \n\t"
519 "lw %[temp5], 20(%[src]) \n\t"
520 "lw %[temp6], 24(%[src]) \n\t"
521 "lw %[temp7], 28(%[src]) \n\t"
522 "addiu %[src], %[src], 32 \n\t"
523 "sw %[temp0], 0(%[dst]) \n\t"
524 "sw %[temp1], 4(%[dst]) \n\t"
525 "sw %[temp2], 8(%[dst]) \n\t"
526 "sw %[temp3], 12(%[dst]) \n\t"
527 "sw %[temp4], 16(%[dst]) \n\t"
528 "sw %[temp5], 20(%[dst]) \n\t"
529 "sw %[temp6], 24(%[dst]) \n\t"
530 "sw %[temp7], 28(%[dst]) \n\t"
531 "bne %[src], %[loop_end], 1b \n\t"
532 " addiu %[dst], %[dst], 32 \n\t"
533 ".set pop \n\t"
534
535 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
536 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
537 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
539 [loop_end]"=&r"(loop_end), [src]"+r"(buf),
540 [dst]"+r"(buf0)
541 :
542 : "memory"
543 );
544
545 /* loop unrolled 8 times */
546 __asm__ volatile (
547 "1: \n\t"
548 "sw $0, 0(%[p_saved_ltp]) \n\t"
549 "sw $0, 4(%[p_saved_ltp]) \n\t"
550 "sw $0, 8(%[p_saved_ltp]) \n\t"
551 "sw $0, 12(%[p_saved_ltp]) \n\t"
552 "sw $0, 16(%[p_saved_ltp]) \n\t"
553 "sw $0, 20(%[p_saved_ltp]) \n\t"
554 "sw $0, 24(%[p_saved_ltp]) \n\t"
555 "sw $0, 28(%[p_saved_ltp]) \n\t"
556 "addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
557 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
558
559 : [p_saved_ltp]"+r"(p_saved_ltp)
560 : [loop_end1]"r"(loop_end1)
561 : "memory"
562 );
563
564 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
565 for (i = 0; i < 16; i++){
566 /* loop unrolled 4 times */
567 __asm__ volatile (
568 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
569 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
570 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
571 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
572 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
573 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
574 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
575 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
576 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
577 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
578 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
579 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
580 "swc1 %[temp8], 0(%[ptr1]) \n\t"
581 "swc1 %[temp9], 4(%[ptr1]) \n\t"
582 "swc1 %[temp10], 8(%[ptr1]) \n\t"
583 "swc1 %[temp11], 12(%[ptr1]) \n\t"
584 "addiu %[ptr1], %[ptr1], 16 \n\t"
585 "addiu %[ptr2], %[ptr2], -16 \n\t"
586 "addiu %[ptr3], %[ptr3], -16 \n\t"
587
588 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
589 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
590 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
591 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
592 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
593 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
594 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
595 :
596 : "memory"
597 );
598 }
599 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
600 float *buff0 = saved;
601 float *buff1 = saved_ltp;
602 float *ptr1 = &saved_ltp[512];
603 float *ptr2 = &ac->buf_mdct[1023];
604 float *ptr3 = (float*)&swindow[63];
605 loop_end = (int)(saved + 448);
606
607 /* loop unrolled 8 times */
608 __asm__ volatile (
609 ".set push \n\t"
610 ".set noreorder \n\t"
611 "1: \n\t"
612 "lw %[temp0], 0(%[src]) \n\t"
613 "lw %[temp1], 4(%[src]) \n\t"
614 "lw %[temp2], 8(%[src]) \n\t"
615 "lw %[temp3], 12(%[src]) \n\t"
616 "lw %[temp4], 16(%[src]) \n\t"
617 "lw %[temp5], 20(%[src]) \n\t"
618 "lw %[temp6], 24(%[src]) \n\t"
619 "lw %[temp7], 28(%[src]) \n\t"
620 "addiu %[src], %[src], 32 \n\t"
621 "sw %[temp0], 0(%[dst]) \n\t"
622 "sw %[temp1], 4(%[dst]) \n\t"
623 "sw %[temp2], 8(%[dst]) \n\t"
624 "sw %[temp3], 12(%[dst]) \n\t"
625 "sw %[temp4], 16(%[dst]) \n\t"
626 "sw %[temp5], 20(%[dst]) \n\t"
627 "sw %[temp6], 24(%[dst]) \n\t"
628 "sw %[temp7], 28(%[dst]) \n\t"
629 "sw $0, 2304(%[dst]) \n\t"
630 "sw $0, 2308(%[dst]) \n\t"
631 "sw $0, 2312(%[dst]) \n\t"
632 "sw $0, 2316(%[dst]) \n\t"
633 "sw $0, 2320(%[dst]) \n\t"
634 "sw $0, 2324(%[dst]) \n\t"
635 "sw $0, 2328(%[dst]) \n\t"
636 "sw $0, 2332(%[dst]) \n\t"
637 "bne %[src], %[loop_end], 1b \n\t"
638 " addiu %[dst], %[dst], 32 \n\t"
639 ".set pop \n\t"
640
641 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
642 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
643 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
644 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
645 [src]"+r"(buff0), [dst]"+r"(buff1)
646 : [loop_end]"r"(loop_end)
647 : "memory"
648 );
649 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
650 for (i = 0; i < 16; i++){
651 /* loop unrolled 8 times */
652 __asm__ volatile (
653 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
654 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
655 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
656 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
657 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
658 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
659 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
660 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
661 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
662 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
663 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
664 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
665 "swc1 %[temp8], 0(%[ptr1]) \n\t"
666 "swc1 %[temp9], 4(%[ptr1]) \n\t"
667 "swc1 %[temp10], 8(%[ptr1]) \n\t"
668 "swc1 %[temp11], 12(%[ptr1]) \n\t"
669 "addiu %[ptr1], %[ptr1], 16 \n\t"
670 "addiu %[ptr2], %[ptr2], -16 \n\t"
671 "addiu %[ptr3], %[ptr3], -16 \n\t"
672
673 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
674 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
675 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
676 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
677 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
678 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
679 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
680 :
681 : "memory"
682 );
683 }
684 } else { // LONG_STOP or ONLY_LONG
685 float *ptr1, *ptr2, *ptr3;
686 ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
687
688 ptr1 = &saved_ltp[512];
689 ptr2 = &ac->buf_mdct[1023];
690 ptr3 = (float*)&lwindow[511];
691
692 for (i = 0; i < 512; i+=4){
693 /* loop unrolled 4 times */
694 __asm__ volatile (
695 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
696 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
697 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
698 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
699 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
700 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
701 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
702 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
703 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
704 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
705 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
706 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
707 "swc1 %[temp8], 0(%[ptr1]) \n\t"
708 "swc1 %[temp9], 4(%[ptr1]) \n\t"
709 "swc1 %[temp10], 8(%[ptr1]) \n\t"
710 "swc1 %[temp11], 12(%[ptr1]) \n\t"
711 "addiu %[ptr1], %[ptr1], 16 \n\t"
712 "addiu %[ptr2], %[ptr2], -16 \n\t"
713 "addiu %[ptr3], %[ptr3], -16 \n\t"
714
715 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
716 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
717 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
718 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
719 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
720 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
721 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2),
722 [ptr3]"+r"(ptr3)
723 :
724 : "memory"
725 );
726 }
727 }
728
729 {
730 float *buf1 = sce->ltp_state+1024;
731 float *buf2 = sce->ltp_state;
732 float *buf3 = sce->ret;
733 float *buf4 = sce->ltp_state+1024;
734 float *buf5 = saved_ltp;
735 float *buf6 = sce->ltp_state+2048;
736
737 /* loops unrolled 8 times */
738 __asm__ volatile (
739 ".set push \n\t"
740 ".set noreorder \n\t"
741 "addiu %[loop_end], %[src], 4096 \n\t"
742 "addiu %[loop_end1], %[src1], 4096 \n\t"
743 "addiu %[loop_end2], %[src2], 4096 \n\t"
744 "1: \n\t"
745 "lw %[temp0], 0(%[src]) \n\t"
746 "lw %[temp1], 4(%[src]) \n\t"
747 "lw %[temp2], 8(%[src]) \n\t"
748 "lw %[temp3], 12(%[src]) \n\t"
749 "lw %[temp4], 16(%[src]) \n\t"
750 "lw %[temp5], 20(%[src]) \n\t"
751 "lw %[temp6], 24(%[src]) \n\t"
752 "lw %[temp7], 28(%[src]) \n\t"
753 "addiu %[src], %[src], 32 \n\t"
754 "sw %[temp0], 0(%[dst]) \n\t"
755 "sw %[temp1], 4(%[dst]) \n\t"
756 "sw %[temp2], 8(%[dst]) \n\t"
757 "sw %[temp3], 12(%[dst]) \n\t"
758 "sw %[temp4], 16(%[dst]) \n\t"
759 "sw %[temp5], 20(%[dst]) \n\t"
760 "sw %[temp6], 24(%[dst]) \n\t"
761 "sw %[temp7], 28(%[dst]) \n\t"
762 "bne %[src], %[loop_end], 1b \n\t"
763 " addiu %[dst], %[dst], 32 \n\t"
764 "2: \n\t"
765 "lw %[temp0], 0(%[src1]) \n\t"
766 "lw %[temp1], 4(%[src1]) \n\t"
767 "lw %[temp2], 8(%[src1]) \n\t"
768 "lw %[temp3], 12(%[src1]) \n\t"
769 "lw %[temp4], 16(%[src1]) \n\t"
770 "lw %[temp5], 20(%[src1]) \n\t"
771 "lw %[temp6], 24(%[src1]) \n\t"
772 "lw %[temp7], 28(%[src1]) \n\t"
773 "addiu %[src1], %[src1], 32 \n\t"
774 "sw %[temp0], 0(%[dst1]) \n\t"
775 "sw %[temp1], 4(%[dst1]) \n\t"
776 "sw %[temp2], 8(%[dst1]) \n\t"
777 "sw %[temp3], 12(%[dst1]) \n\t"
778 "sw %[temp4], 16(%[dst1]) \n\t"
779 "sw %[temp5], 20(%[dst1]) \n\t"
780 "sw %[temp6], 24(%[dst1]) \n\t"
781 "sw %[temp7], 28(%[dst1]) \n\t"
782 "bne %[src1], %[loop_end1], 2b \n\t"
783 " addiu %[dst1], %[dst1], 32 \n\t"
784 "3: \n\t"
785 "lw %[temp0], 0(%[src2]) \n\t"
786 "lw %[temp1], 4(%[src2]) \n\t"
787 "lw %[temp2], 8(%[src2]) \n\t"
788 "lw %[temp3], 12(%[src2]) \n\t"
789 "lw %[temp4], 16(%[src2]) \n\t"
790 "lw %[temp5], 20(%[src2]) \n\t"
791 "lw %[temp6], 24(%[src2]) \n\t"
792 "lw %[temp7], 28(%[src2]) \n\t"
793 "addiu %[src2], %[src2], 32 \n\t"
794 "sw %[temp0], 0(%[dst2]) \n\t"
795 "sw %[temp1], 4(%[dst2]) \n\t"
796 "sw %[temp2], 8(%[dst2]) \n\t"
797 "sw %[temp3], 12(%[dst2]) \n\t"
798 "sw %[temp4], 16(%[dst2]) \n\t"
799 "sw %[temp5], 20(%[dst2]) \n\t"
800 "sw %[temp6], 24(%[dst2]) \n\t"
801 "sw %[temp7], 28(%[dst2]) \n\t"
802 "bne %[src2], %[loop_end2], 3b \n\t"
803 " addiu %[dst2], %[dst2], 32 \n\t"
804 ".set pop \n\t"
805
806 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
807 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
808 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
809 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
810 [loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1),
811 [loop_end2]"=&r"(loop_end2), [src]"+r"(buf1),
812 [dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4),
813 [src2]"+r"(buf5), [dst2]"+r"(buf6)
814 :
815 : "memory"
816 );
817 }
818}
819#endif /* HAVE_MIPSFPU */
820#endif /* HAVE_INLINE_ASM */
821
822void ff_aacdec_init_mips(AACContext *c)
823{
824#if HAVE_INLINE_ASM
825 c->imdct_and_windowing = imdct_and_windowing_mips;
826 c->apply_ltp = apply_ltp_mips;
827#if HAVE_MIPSFPU
828 c->update_ltp = update_ltp_mips;
829#endif /* HAVE_MIPSFPU */
830#endif /* HAVE_INLINE_ASM */
831}