3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Authors: Djordje Pesut (djordje@mips.com)
30 * Mirjana Vulin (mvulin@mips.com)
32 * This file is part of FFmpeg.
34 * FFmpeg is free software; you can redistribute it and/or
35 * modify it under the terms of the GNU Lesser General Public
36 * License as published by the Free Software Foundation; either
37 * version 2.1 of the License, or (at your option) any later version.
39 * FFmpeg is distributed in the hope that it will be useful,
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42 * Lesser General Public License for more details.
44 * You should have received a copy of the GNU Lesser General Public
45 * License along with FFmpeg; if not, write to the Free Software
46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
51 * Reference: libavcodec/aacsbr.c
54 #include "libavcodec/aac.h"
55 #include "libavcodec/aacsbr.h"
57 #define ENVELOPE_ADJUSTMENT_OFFSET 2
60 static int sbr_lf_gen_mips(AACContext
*ac
, SpectralBandReplication
*sbr
,
61 float X_low
[32][40][2], const float W
[2][32][32][2],
65 int temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
;
66 float *p_x_low
= &X_low
[0][8][0];
67 float *p_w
= (float*)&W
[buf_idx
][0][0][0];
68 float *p_x1_low
= &X_low
[0][0][0];
69 float *p_w1
= (float*)&W
[1-buf_idx
][24][0][0];
71 float *loop_end
=p_x1_low
+ 2560;
73 /* loop unrolled 8 times */
76 "sw $0, 0(%[p_x1_low]) \n\t"
77 "sw $0, 4(%[p_x1_low]) \n\t"
78 "sw $0, 8(%[p_x1_low]) \n\t"
79 "sw $0, 12(%[p_x1_low]) \n\t"
80 "sw $0, 16(%[p_x1_low]) \n\t"
81 "sw $0, 20(%[p_x1_low]) \n\t"
82 "sw $0, 24(%[p_x1_low]) \n\t"
83 "sw $0, 28(%[p_x1_low]) \n\t"
84 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
85 "bne %[p_x1_low], %[loop_end], 1b \n\t"
86 "addiu %[p_x1_low], %[p_x1_low], -10240 \n\t"
88 : [p_x1_low
]"+r"(p_x1_low
)
89 : [loop_end
]"r"(loop_end
)
93 for (k
= 0; k
< sbr
->kx
[1]; k
++) {
94 for (i
= 0; i
< 32; i
+=4) {
95 /* loop unrolled 4 times */
97 "lw %[temp0], 0(%[p_w]) \n\t"
98 "lw %[temp1], 4(%[p_w]) \n\t"
99 "lw %[temp2], 256(%[p_w]) \n\t"
100 "lw %[temp3], 260(%[p_w]) \n\t"
101 "lw %[temp4], 512(%[p_w]) \n\t"
102 "lw %[temp5], 516(%[p_w]) \n\t"
103 "lw %[temp6], 768(%[p_w]) \n\t"
104 "lw %[temp7], 772(%[p_w]) \n\t"
105 "sw %[temp0], 0(%[p_x_low]) \n\t"
106 "sw %[temp1], 4(%[p_x_low]) \n\t"
107 "sw %[temp2], 8(%[p_x_low]) \n\t"
108 "sw %[temp3], 12(%[p_x_low]) \n\t"
109 "sw %[temp4], 16(%[p_x_low]) \n\t"
110 "sw %[temp5], 20(%[p_x_low]) \n\t"
111 "sw %[temp6], 24(%[p_x_low]) \n\t"
112 "sw %[temp7], 28(%[p_x_low]) \n\t"
113 "addiu %[p_x_low], %[p_x_low], 32 \n\t"
114 "addiu %[p_w], %[p_w], 1024 \n\t"
116 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
117 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
118 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
119 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
120 [p_w
]"+r"(p_w
), [p_x_low
]"+r"(p_x_low
)
129 for (k
= 0; k
< sbr
->kx
[0]; k
++) {
130 for (i
= 0; i
< 2; i
++) {
132 /* loop unrolled 4 times */
134 "lw %[temp0], 0(%[p_w1]) \n\t"
135 "lw %[temp1], 4(%[p_w1]) \n\t"
136 "lw %[temp2], 256(%[p_w1]) \n\t"
137 "lw %[temp3], 260(%[p_w1]) \n\t"
138 "lw %[temp4], 512(%[p_w1]) \n\t"
139 "lw %[temp5], 516(%[p_w1]) \n\t"
140 "lw %[temp6], 768(%[p_w1]) \n\t"
141 "lw %[temp7], 772(%[p_w1]) \n\t"
142 "sw %[temp0], 0(%[p_x1_low]) \n\t"
143 "sw %[temp1], 4(%[p_x1_low]) \n\t"
144 "sw %[temp2], 8(%[p_x1_low]) \n\t"
145 "sw %[temp3], 12(%[p_x1_low]) \n\t"
146 "sw %[temp4], 16(%[p_x1_low]) \n\t"
147 "sw %[temp5], 20(%[p_x1_low]) \n\t"
148 "sw %[temp6], 24(%[p_x1_low]) \n\t"
149 "sw %[temp7], 28(%[p_x1_low]) \n\t"
150 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
151 "addiu %[p_w1], %[p_w1], 1024 \n\t"
153 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
154 [temp2
]"=&r"(temp2
), [temp3
]"=&r"(temp3
),
155 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
),
156 [temp6
]"=&r"(temp6
), [temp7
]"=&r"(temp7
),
157 [p_w1
]"+r"(p_w1
), [p_x1_low
]"+r"(p_x1_low
)
168 static int sbr_x_gen_mips(SpectralBandReplication
*sbr
, float X
[2][38][64],
169 const float Y0
[38][64][2], const float Y1
[38][64][2],
170 const float X_low
[32][40][2], int ch
)
174 int temp0
, temp1
, temp2
, temp3
;
175 const float *X_low1
, *Y01
, *Y11
;
176 float *x1
=&X
[0][0][0];
178 const int i_Temp
= FFMAX(2*sbr
->data
[ch
].t_env_num_env_old
- i_f
, 0);
180 /* loop unrolled 8 times */
183 "sw $0, 0(%[x1]) \n\t"
184 "sw $0, 4(%[x1]) \n\t"
185 "sw $0, 8(%[x1]) \n\t"
186 "sw $0, 12(%[x1]) \n\t"
187 "sw $0, 16(%[x1]) \n\t"
188 "sw $0, 20(%[x1]) \n\t"
189 "sw $0, 24(%[x1]) \n\t"
190 "sw $0, 28(%[x1]) \n\t"
191 "addiu %[x1], %[x1], 32 \n\t"
192 "bne %[x1], %[j], 1b \n\t"
193 "addiu %[x1], %[x1], -19456 \n\t"
202 X_low1
=&X_low
[0][2][0];
204 for (k
= 0; k
< sbr
->kx
[0]; k
++) {
207 "move %[i], $zero \n\t"
209 "lw %[temp0], 0(%[X_low1]) \n\t"
210 "lw %[temp1], 4(%[X_low1]) \n\t"
211 "sw %[temp0], 0(%[x1]) \n\t"
212 "sw %[temp1], 9728(%[x1]) \n\t"
213 "addiu %[x1], %[x1], 256 \n\t"
214 "addiu %[X_low1], %[X_low1], 8 \n\t"
215 "addiu %[i], %[i], 1 \n\t"
216 "bne %[i], %[i_Temp], 2b \n\t"
218 : [x1
]"+r"(x1
), [X_low1
]"+r"(X_low1
), [i
]"=&r"(i
),
219 [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
)
220 : [i_Temp
]"r"(i_Temp
)
224 X_low1
-=(i_Temp
<<1)-80;
228 Y01
=(float*)&Y0
[32][k
][0];
230 for (; k
< sbr
->kx
[0] + sbr
->m
[0]; k
++) {
232 "move %[i], $zero \n\t"
234 "lw %[temp0], 0(%[Y01]) \n\t"
235 "lw %[temp1], 4(%[Y01]) \n\t"
236 "sw %[temp0], 0(%[x1]) \n\t"
237 "sw %[temp1], 9728(%[x1]) \n\t"
238 "addiu %[x1], %[x1], 256 \n\t"
239 "addiu %[Y01], %[Y01], 512 \n\t"
240 "addiu %[i], %[i], 1 \n\t"
241 "bne %[i], %[i_Temp], 3b \n\t"
243 : [x1
]"+r"(x1
), [Y01
]"+r"(Y01
), [i
]"=&r"(i
),
244 [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
)
245 : [i_Temp
]"r"(i_Temp
)
254 X_low1
=&X_low
[0][i_Temp
+2][0];
257 for (k
= 0; k
< sbr
->kx
[1]; k
++) {
260 "move %[i], %[i_Temp] \n\t"
262 "lw %[temp0], 0(%[X_low1]) \n\t"
263 "lw %[temp1], 4(%[X_low1]) \n\t"
264 "sw %[temp0], 0(%[x1]) \n\t"
265 "sw %[temp1], 9728(%[x1]) \n\t"
266 "addiu %[x1], %[x1], 256 \n\t"
267 "addiu %[X_low1], %[X_low1], 8 \n\t"
268 "addiu %[i], %[i], 1 \n\t"
269 "bne %[i], %[temp3], 4b \n\t"
271 : [x1
]"+r"(x1
), [X_low1
]"+r"(X_low1
), [i
]"=&r"(i
),
272 [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
),
274 : [i_Temp
]"r"(i_Temp
), [temp3
]"r"(temp3
)
277 x1
-= ((38-i_Temp
)<<6)-1;
278 X_low1
-= ((38-i_Temp
)<<1)- 80;
282 Y11
=&Y1
[i_Temp
][k
][0];
285 for (; k
< sbr
->kx
[1] + sbr
->m
[1]; k
++) {
288 "move %[i], %[i_Temp] \n\t"
290 "lw %[temp0], 0(%[Y11]) \n\t"
291 "lw %[temp1], 4(%[Y11]) \n\t"
292 "sw %[temp0], 0(%[x1]) \n\t"
293 "sw %[temp1], 9728(%[x1]) \n\t"
294 "addiu %[x1], %[x1], 256 \n\t"
295 "addiu %[Y11], %[Y11], 512 \n\t"
296 "addiu %[i], %[i], 1 \n\t"
297 "bne %[i], %[temp2], 5b \n\t"
299 : [x1
]"+r"(x1
), [Y11
]"+r"(Y11
), [i
]"=&r"(i
),
300 [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
)
301 : [i_Temp
]"r"(i_Temp
), [temp3
]"r"(temp3
),
306 x1
-= ((32-i_Temp
)<<6)-1;
307 Y11
-= ((32-i_Temp
)<<7)-2;
313 static void sbr_hf_assemble_mips(float Y1
[38][64][2],
314 const float X_high
[64][40][2],
315 SpectralBandReplication
*sbr
, SBRData
*ch_data
,
319 const int h_SL
= 4 * !sbr
->bs_smoothing_mode
;
320 const int kx
= sbr
->kx
[1];
321 const int m_max
= sbr
->m
[1];
322 static const float h_smooth
[5] = {
330 float (*g_temp
)[48] = ch_data
->g_temp
, (*q_temp
)[48] = ch_data
->q_temp
;
331 int indexnoise
= ch_data
->f_indexnoise
;
332 int indexsine
= ch_data
->f_indexsine
;
333 float *g_temp1
, *q_temp1
, *pok
, *pok1
;
334 float temp1
, temp2
, temp3
, temp4
;
338 for (i
= 0; i
< h_SL
; i
++) {
339 memcpy(g_temp
[i
+ 2*ch_data
->t_env
[0]], sbr
->gain
[0], m_max
* sizeof(sbr
->gain
[0][0]));
340 memcpy(q_temp
[i
+ 2*ch_data
->t_env
[0]], sbr
->q_m
[0], m_max
* sizeof(sbr
->q_m
[0][0]));
343 memcpy(g_temp
[2*ch_data
->t_env
[0]], g_temp
[2*ch_data
->t_env_num_env_old
], 4*sizeof(g_temp
[0]));
344 memcpy(q_temp
[2*ch_data
->t_env
[0]], q_temp
[2*ch_data
->t_env_num_env_old
], 4*sizeof(q_temp
[0]));
347 for (e
= 0; e
< ch_data
->bs_num_env
; e
++) {
348 for (i
= 2 * ch_data
->t_env
[e
]; i
< 2 * ch_data
->t_env
[e
+ 1]; i
++) {
349 g_temp1
= g_temp
[h_SL
+ i
];
351 q_temp1
= q_temp
[h_SL
+ i
];
354 /* loop unrolled 4 times */
355 for (j
=0; j
<(size
>>2); j
++) {
357 "lw %[temp1], 0(%[pok]) \n\t"
358 "lw %[temp2], 4(%[pok]) \n\t"
359 "lw %[temp3], 8(%[pok]) \n\t"
360 "lw %[temp4], 12(%[pok]) \n\t"
361 "sw %[temp1], 0(%[g_temp1]) \n\t"
362 "sw %[temp2], 4(%[g_temp1]) \n\t"
363 "sw %[temp3], 8(%[g_temp1]) \n\t"
364 "sw %[temp4], 12(%[g_temp1]) \n\t"
365 "lw %[temp1], 0(%[pok1]) \n\t"
366 "lw %[temp2], 4(%[pok1]) \n\t"
367 "lw %[temp3], 8(%[pok1]) \n\t"
368 "lw %[temp4], 12(%[pok1]) \n\t"
369 "sw %[temp1], 0(%[q_temp1]) \n\t"
370 "sw %[temp2], 4(%[q_temp1]) \n\t"
371 "sw %[temp3], 8(%[q_temp1]) \n\t"
372 "sw %[temp4], 12(%[q_temp1]) \n\t"
373 "addiu %[pok], %[pok], 16 \n\t"
374 "addiu %[g_temp1], %[g_temp1], 16 \n\t"
375 "addiu %[pok1], %[pok1], 16 \n\t"
376 "addiu %[q_temp1], %[q_temp1], 16 \n\t"
378 : [temp1
]"=&r"(temp1
), [temp2
]"=&r"(temp2
),
379 [temp3
]"=&r"(temp3
), [temp4
]"=&r"(temp4
),
380 [pok
]"+r"(pok
), [g_temp1
]"+r"(g_temp1
),
381 [pok1
]"+r"(pok1
), [q_temp1
]"+r"(q_temp1
)
387 for (j
=0; j
<(size
&3); j
++) {
389 "lw %[temp1], 0(%[pok]) \n\t"
390 "lw %[temp2], 0(%[pok1]) \n\t"
391 "sw %[temp1], 0(%[g_temp1]) \n\t"
392 "sw %[temp2], 0(%[q_temp1]) \n\t"
393 "addiu %[pok], %[pok], 4 \n\t"
394 "addiu %[g_temp1], %[g_temp1], 4 \n\t"
395 "addiu %[pok1], %[pok1], 4 \n\t"
396 "addiu %[q_temp1], %[q_temp1], 4 \n\t"
398 : [temp1
]"=&r"(temp1
), [temp2
]"=&r"(temp2
),
399 [temp3
]"=&r"(temp3
), [temp4
]"=&r"(temp4
),
400 [pok
]"+r"(pok
), [g_temp1
]"+r"(g_temp1
),
401 [pok1
]"+r"(pok1
), [q_temp1
]"+r"(q_temp1
)
409 for (e
= 0; e
< ch_data
->bs_num_env
; e
++) {
410 for (i
= 2 * ch_data
->t_env
[e
]; i
< 2 * ch_data
->t_env
[e
+ 1]; i
++) {
411 LOCAL_ALIGNED_16(float, g_filt_tab
, [48]);
412 LOCAL_ALIGNED_16(float, q_filt_tab
, [48]);
413 float *g_filt
, *q_filt
;
415 if (h_SL
&& e
!= e_a
[0] && e
!= e_a
[1]) {
419 for (m
= 0; m
< m_max
; m
++) {
420 const int idx1
= i
+ h_SL
;
424 for (j
= 0; j
<= h_SL
; j
++) {
425 g_filt
[m
] += g_temp
[idx1
- j
][m
] * h_smooth
[j
];
426 q_filt
[m
] += q_temp
[idx1
- j
][m
] * h_smooth
[j
];
430 g_filt
= g_temp
[i
+ h_SL
];
434 sbr
->dsp
.hf_g_filt(Y1
[i
] + kx
, X_high
+ kx
, g_filt
, m_max
,
435 i
+ ENVELOPE_ADJUSTMENT_OFFSET
);
437 if (e
!= e_a
[0] && e
!= e_a
[1]) {
438 sbr
->dsp
.hf_apply_noise
[indexsine
](Y1
[i
] + kx
, sbr
->s_m
[e
],
442 int idx
= indexsine
&1;
443 int A
= (1-((indexsine
+(kx
& 1))&2));
444 int B
= (A
^(-idx
)) + idx
;
445 float *out
= &Y1
[i
][kx
][idx
];
446 float *in
= sbr
->s_m
[e
];
447 float temp0
, temp1
, temp2
, temp3
, temp4
, temp5
;
448 float A_f
= (float)A
;
449 float B_f
= (float)B
;
451 for (m
= 0; m
+1 < m_max
; m
+=2) {
457 "lwc1 %[temp0], 0(%[in]) \n\t"
458 "lwc1 %[temp1], 4(%[in]) \n\t"
459 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
460 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
461 "swc1 %[temp4], 0(%[out]) \n\t"
462 "swc1 %[temp5], 8(%[out]) \n\t"
463 "addiu %[in], %[in], 8 \n\t"
464 "addiu %[out], %[out], 16 \n\t"
466 : [temp0
]"=&f" (temp0
), [temp1
]"=&f"(temp1
),
467 [temp4
]"=&f" (temp4
), [temp5
]"=&f"(temp5
),
468 [in
]"+r"(in
), [out
]"+r"(out
)
469 : [A_f
]"f"(A_f
), [B_f
]"f"(B_f
), [temp2
]"f"(temp2
),
475 out
[2*m
] += in
[m
] * A
;
477 indexnoise
= (indexnoise
+ m_max
) & 0x1ff;
478 indexsine
= (indexsine
+ 1) & 3;
481 ch_data
->f_indexnoise
= indexnoise
;
482 ch_data
->f_indexsine
= indexsine
;
485 static void sbr_hf_inverse_filter_mips(SBRDSPContext
*dsp
,
486 float (*alpha0
)[2], float (*alpha1
)[2],
487 const float X_low
[32][40][2], int k0
)
490 float temp0
, temp1
, temp2
, temp3
, temp4
, temp5
, temp6
, temp7
, c
;
491 float *phi1
, *alpha_1
, *alpha_0
, res1
, res2
, temp_real
, temp_im
;
495 for (k
= 0; k
< k0
; k
++) {
496 LOCAL_ALIGNED_16(float, phi
, [3], [2][2]);
498 phi1
= &phi
[0][0][0];
499 alpha_1
= &alpha1
[k
][0];
500 alpha_0
= &alpha0
[k
][0];
501 dsp
->autocorrelate(X_low
[k
], phi
);
504 "lwc1 %[temp0], 40(%[phi1]) \n\t"
505 "lwc1 %[temp1], 16(%[phi1]) \n\t"
506 "lwc1 %[temp2], 24(%[phi1]) \n\t"
507 "lwc1 %[temp3], 28(%[phi1]) \n\t"
508 "mul.s %[dk], %[temp0], %[temp1] \n\t"
509 "lwc1 %[temp4], 0(%[phi1]) \n\t"
510 "mul.s %[res2], %[temp2], %[temp2] \n\t"
511 "lwc1 %[temp5], 4(%[phi1]) \n\t"
512 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
513 "lwc1 %[temp6], 8(%[phi1]) \n\t"
514 "div.s %[res2], %[res2], %[c] \n\t"
515 "lwc1 %[temp0], 12(%[phi1]) \n\t"
516 "sub.s %[dk], %[dk], %[res2] \n\t"
518 : [temp0
]"=&f"(temp0
), [temp1
]"=&f"(temp1
), [temp2
]"=&f"(temp2
),
519 [temp3
]"=&f"(temp3
), [temp4
]"=&f"(temp4
), [temp5
]"=&f"(temp5
),
520 [temp6
]"=&f"(temp6
), [res2
]"=&f"(res2
), [dk
]"=&f"(dk
)
521 : [phi1
]"r"(phi1
), [c
]"f"(c
)
530 "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
531 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
532 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
533 "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
534 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
535 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
536 "div.s %[temp_real], %[temp_real], %[dk] \n\t"
537 "div.s %[temp_im], %[temp_im], %[dk] \n\t"
538 "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
539 "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
541 : [temp_real
]"=&f" (temp_real
), [temp_im
]"=&f"(temp_im
)
542 : [phi1
]"r"(phi1
), [temp0
]"f"(temp0
), [temp1
]"f"(temp1
),
543 [temp2
]"f"(temp2
), [temp3
]"f"(temp3
), [temp4
]"f"(temp4
),
544 [temp5
]"f"(temp5
), [temp6
]"f"(temp6
),
545 [alpha_1
]"r"(alpha_1
), [dk
]"f"(dk
)
555 "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
556 "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
557 "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
558 "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
559 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
560 "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
561 "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
562 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
563 "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
564 "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
565 "neg.s %[temp_real], %[temp_real] \n\t"
566 "neg.s %[temp_im], %[temp_im] \n\t"
567 "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
568 "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
570 : [temp_real
]"=&f"(temp_real
), [temp_im
]"=&f"(temp_im
),
571 [temp6
]"=&f"(temp6
), [temp7
]"=&f"(temp7
),
572 [res1
]"=&f"(res1
), [res2
]"=&f"(res2
)
573 : [alpha_1
]"r"(alpha_1
), [alpha_0
]"r"(alpha_0
),
574 [temp0
]"f"(temp0
), [temp1
]"f"(temp1
), [temp2
]"f"(temp2
),
575 [temp3
]"f"(temp3
), [temp4
]"f"(temp4
), [temp5
]"f"(temp5
)
581 "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
582 "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
583 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
584 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
585 "mul.s %[res1], %[temp1], %[temp1] \n\t"
586 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
587 "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
588 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
590 : [temp_real
]"=&f"(temp_real
), [temp_im
]"=&f"(temp_im
),
591 [temp1
]"=&f"(temp1
), [temp2
]"=&f"(temp2
),
592 [res1
]"=&f"(res1
), [res2
]"=&f"(res2
)
593 : [alpha_1
]"r"(alpha_1
), [alpha_0
]"r"(alpha_0
)
597 if (res1
>= 16.0f
|| res2
>= 16.0f
) {
605 #endif /* HAVE_MIPSFPU */
606 #endif /* HAVE_INLINE_ASM */
608 void ff_aacsbr_func_ptr_init_mips(AACSBRContext
*c
)
611 c
->sbr_lf_gen
= sbr_lf_gen_mips
;
612 c
->sbr_x_gen
= sbr_x_gen_mips
;
614 c
->sbr_hf_inverse_filter
= sbr_hf_inverse_filter_mips
;
615 c
->sbr_hf_assemble
= sbr_hf_assemble_mips
;
616 #endif /* HAVE_MIPSFPU */
617 #endif /* HAVE_INLINE_ASM */