3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
67 typedef struct BandCodingPath
{
73 static const uint8_t run_value_bits_long
[64] = {
74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
80 static const uint8_t run_value_bits_short
[16] = {
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
84 static const uint8_t * const run_value_bits
[2] = {
85 run_value_bits_long
, run_value_bits_short
88 static const uint8_t uquad_sign_bits
[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits
[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits
[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits
[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
147 static void abs_pow34_v(float *out
, const float *in
, const int size
) {
148 #ifndef USE_REALLY_FULL_SEARCH
151 float ax
, bx
, cx
, dx
;
153 for (i
= 0; i
< size
; i
+= 4) {
174 #endif /* USE_REALLY_FULL_SEARCH */
177 static float find_max_val(int group_len
, int swb_size
, const float *scaled
) {
180 for (w2
= 0; w2
< group_len
; w2
++) {
181 for (i
= 0; i
< swb_size
; i
++) {
182 maxval
= FFMAX(maxval
, scaled
[w2
*128+i
]);
188 static int find_min_book(float maxval
, int sf
) {
189 float Q
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
- sf
+ SCALE_ONE_POS
- SCALE_DIV_512
];
190 float Q34
= sqrtf(Q
* sqrtf(Q
));
192 qmaxval
= maxval
* Q34
+ 0.4054f
;
193 if (qmaxval
== 0) cb
= 0;
194 else if (qmaxval
== 1) cb
= 1;
195 else if (qmaxval
== 2) cb
= 3;
196 else if (qmaxval
<= 4) cb
= 5;
197 else if (qmaxval
<= 7) cb
= 7;
198 else if (qmaxval
<= 12) cb
= 9;
204 * Functions developed from template function and optimized for quantizing and encoding band
206 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext
*s
,
207 PutBitContext
*pb
, const float *in
,
208 const float *scaled
, int size
, int scale_idx
,
209 int cb
, const float lambda
, const float uplim
,
212 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
214 int qc1
, qc2
, qc3
, qc4
;
216 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
217 uint16_t *p_codes
= (uint16_t *)ff_aac_spectral_codes
[cb
-1];
219 abs_pow34_v(s
->scoefs
, in
, size
);
221 for (i
= 0; i
< size
; i
+= 4) {
223 int *in_int
= (int *)&in
[i
];
225 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
226 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
227 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
228 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
232 ".set noreorder \n\t"
234 "slt %[qc1], $zero, %[qc1] \n\t"
235 "slt %[qc2], $zero, %[qc2] \n\t"
236 "slt %[qc3], $zero, %[qc3] \n\t"
237 "slt %[qc4], $zero, %[qc4] \n\t"
238 "lw $t0, 0(%[in_int]) \n\t"
239 "lw $t1, 4(%[in_int]) \n\t"
240 "lw $t2, 8(%[in_int]) \n\t"
241 "lw $t3, 12(%[in_int]) \n\t"
242 "srl $t0, $t0, 31 \n\t"
243 "srl $t1, $t1, 31 \n\t"
244 "srl $t2, $t2, 31 \n\t"
245 "srl $t3, $t3, 31 \n\t"
246 "subu $t4, $zero, %[qc1] \n\t"
247 "subu $t5, $zero, %[qc2] \n\t"
248 "subu $t6, $zero, %[qc3] \n\t"
249 "subu $t7, $zero, %[qc4] \n\t"
250 "movn %[qc1], $t4, $t0 \n\t"
251 "movn %[qc2], $t5, $t1 \n\t"
252 "movn %[qc3], $t6, $t2 \n\t"
253 "movn %[qc4], $t7, $t3 \n\t"
257 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
258 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
259 : [in_int
]"r"(in_int
)
260 : "t0", "t1", "t2", "t3",
261 "t4", "t5", "t6", "t7",
274 put_bits(pb
, p_bits
[curidx
], p_codes
[curidx
]);
278 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext
*s
,
279 PutBitContext
*pb
, const float *in
,
280 const float *scaled
, int size
, int scale_idx
,
281 int cb
, const float lambda
, const float uplim
,
284 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
286 int qc1
, qc2
, qc3
, qc4
;
288 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
289 uint16_t *p_codes
= (uint16_t *)ff_aac_spectral_codes
[cb
-1];
291 abs_pow34_v(s
->scoefs
, in
, size
);
293 for (i
= 0; i
< size
; i
+= 4) {
294 int curidx
, sign
, count
;
295 int *in_int
= (int *)&in
[i
];
297 unsigned int v_codes
;
299 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
300 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
301 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
302 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
306 ".set noreorder \n\t"
308 "ori $t4, $zero, 2 \n\t"
309 "ori %[sign], $zero, 0 \n\t"
310 "slt $t0, $t4, %[qc1] \n\t"
311 "slt $t1, $t4, %[qc2] \n\t"
312 "slt $t2, $t4, %[qc3] \n\t"
313 "slt $t3, $t4, %[qc4] \n\t"
314 "movn %[qc1], $t4, $t0 \n\t"
315 "movn %[qc2], $t4, $t1 \n\t"
316 "movn %[qc3], $t4, $t2 \n\t"
317 "movn %[qc4], $t4, $t3 \n\t"
318 "lw $t0, 0(%[in_int]) \n\t"
319 "lw $t1, 4(%[in_int]) \n\t"
320 "lw $t2, 8(%[in_int]) \n\t"
321 "lw $t3, 12(%[in_int]) \n\t"
322 "slt $t0, $t0, $zero \n\t"
323 "movn %[sign], $t0, %[qc1] \n\t"
324 "slt $t1, $t1, $zero \n\t"
325 "slt $t2, $t2, $zero \n\t"
326 "slt $t3, $t3, $zero \n\t"
327 "sll $t0, %[sign], 1 \n\t"
328 "or $t0, $t0, $t1 \n\t"
329 "movn %[sign], $t0, %[qc2] \n\t"
330 "slt $t4, $zero, %[qc1] \n\t"
331 "slt $t1, $zero, %[qc2] \n\t"
332 "slt %[count], $zero, %[qc3] \n\t"
333 "sll $t0, %[sign], 1 \n\t"
334 "or $t0, $t0, $t2 \n\t"
335 "movn %[sign], $t0, %[qc3] \n\t"
336 "slt $t2, $zero, %[qc4] \n\t"
337 "addu %[count], %[count], $t4 \n\t"
338 "addu %[count], %[count], $t1 \n\t"
339 "sll $t0, %[sign], 1 \n\t"
340 "or $t0, $t0, $t3 \n\t"
341 "movn %[sign], $t0, %[qc4] \n\t"
342 "addu %[count], %[count], $t2 \n\t"
346 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
347 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
348 [sign
]"=&r"(sign
), [count
]"=&r"(count
)
349 : [in_int
]"r"(in_int
)
350 : "t0", "t1", "t2", "t3", "t4",
362 v_codes
= (p_codes
[curidx
] << count
) | (sign
& ((1 << count
) - 1));
363 v_bits
= p_bits
[curidx
] + count
;
364 put_bits(pb
, v_bits
, v_codes
);
368 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext
*s
,
369 PutBitContext
*pb
, const float *in
,
370 const float *scaled
, int size
, int scale_idx
,
371 int cb
, const float lambda
, const float uplim
,
374 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
376 int qc1
, qc2
, qc3
, qc4
;
378 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
379 uint16_t *p_codes
= (uint16_t *)ff_aac_spectral_codes
[cb
-1];
381 abs_pow34_v(s
->scoefs
, in
, size
);
383 for (i
= 0; i
< size
; i
+= 4) {
385 int *in_int
= (int *)&in
[i
];
387 unsigned int v_codes
;
389 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
390 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
391 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
392 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
396 ".set noreorder \n\t"
398 "ori $t4, $zero, 4 \n\t"
399 "slt $t0, $t4, %[qc1] \n\t"
400 "slt $t1, $t4, %[qc2] \n\t"
401 "slt $t2, $t4, %[qc3] \n\t"
402 "slt $t3, $t4, %[qc4] \n\t"
403 "movn %[qc1], $t4, $t0 \n\t"
404 "movn %[qc2], $t4, $t1 \n\t"
405 "movn %[qc3], $t4, $t2 \n\t"
406 "movn %[qc4], $t4, $t3 \n\t"
407 "lw $t0, 0(%[in_int]) \n\t"
408 "lw $t1, 4(%[in_int]) \n\t"
409 "lw $t2, 8(%[in_int]) \n\t"
410 "lw $t3, 12(%[in_int]) \n\t"
411 "srl $t0, $t0, 31 \n\t"
412 "srl $t1, $t1, 31 \n\t"
413 "srl $t2, $t2, 31 \n\t"
414 "srl $t3, $t3, 31 \n\t"
415 "subu $t4, $zero, %[qc1] \n\t"
416 "subu $t5, $zero, %[qc2] \n\t"
417 "subu $t6, $zero, %[qc3] \n\t"
418 "subu $t7, $zero, %[qc4] \n\t"
419 "movn %[qc1], $t4, $t0 \n\t"
420 "movn %[qc2], $t5, $t1 \n\t"
421 "movn %[qc3], $t6, $t2 \n\t"
422 "movn %[qc4], $t7, $t3 \n\t"
426 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
427 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
428 : [in_int
]"r"(in_int
)
429 : "t0", "t1", "t2", "t3",
430 "t4", "t5", "t6", "t7",
440 v_codes
= (p_codes
[curidx
] << p_bits
[curidx2
]) | (p_codes
[curidx2
]);
441 v_bits
= p_bits
[curidx
] + p_bits
[curidx2
];
442 put_bits(pb
, v_bits
, v_codes
);
446 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext
*s
,
447 PutBitContext
*pb
, const float *in
,
448 const float *scaled
, int size
, int scale_idx
,
449 int cb
, const float lambda
, const float uplim
,
452 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
454 int qc1
, qc2
, qc3
, qc4
;
456 uint8_t *p_bits
= (uint8_t*) ff_aac_spectral_bits
[cb
-1];
457 uint16_t *p_codes
= (uint16_t*)ff_aac_spectral_codes
[cb
-1];
459 abs_pow34_v(s
->scoefs
, in
, size
);
461 for (i
= 0; i
< size
; i
+= 4) {
462 int curidx
, sign1
, count1
, sign2
, count2
;
463 int *in_int
= (int *)&in
[i
];
465 unsigned int v_codes
;
467 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
468 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
469 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
470 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
474 ".set noreorder \n\t"
476 "ori $t4, $zero, 7 \n\t"
477 "ori %[sign1], $zero, 0 \n\t"
478 "ori %[sign2], $zero, 0 \n\t"
479 "slt $t0, $t4, %[qc1] \n\t"
480 "slt $t1, $t4, %[qc2] \n\t"
481 "slt $t2, $t4, %[qc3] \n\t"
482 "slt $t3, $t4, %[qc4] \n\t"
483 "movn %[qc1], $t4, $t0 \n\t"
484 "movn %[qc2], $t4, $t1 \n\t"
485 "movn %[qc3], $t4, $t2 \n\t"
486 "movn %[qc4], $t4, $t3 \n\t"
487 "lw $t0, 0(%[in_int]) \n\t"
488 "lw $t1, 4(%[in_int]) \n\t"
489 "lw $t2, 8(%[in_int]) \n\t"
490 "lw $t3, 12(%[in_int]) \n\t"
491 "slt $t0, $t0, $zero \n\t"
492 "movn %[sign1], $t0, %[qc1] \n\t"
493 "slt $t2, $t2, $zero \n\t"
494 "movn %[sign2], $t2, %[qc3] \n\t"
495 "slt $t1, $t1, $zero \n\t"
496 "sll $t0, %[sign1], 1 \n\t"
497 "or $t0, $t0, $t1 \n\t"
498 "movn %[sign1], $t0, %[qc2] \n\t"
499 "slt $t3, $t3, $zero \n\t"
500 "sll $t0, %[sign2], 1 \n\t"
501 "or $t0, $t0, $t3 \n\t"
502 "movn %[sign2], $t0, %[qc4] \n\t"
503 "slt %[count1], $zero, %[qc1] \n\t"
504 "slt $t1, $zero, %[qc2] \n\t"
505 "slt %[count2], $zero, %[qc3] \n\t"
506 "slt $t2, $zero, %[qc4] \n\t"
507 "addu %[count1], %[count1], $t1 \n\t"
508 "addu %[count2], %[count2], $t2 \n\t"
512 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
513 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
514 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
515 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
)
516 : [in_int
]"r"(in_int
)
517 : "t0", "t1", "t2", "t3", "t4",
524 v_codes
= (p_codes
[curidx
] << count1
) | sign1
;
525 v_bits
= p_bits
[curidx
] + count1
;
526 put_bits(pb
, v_bits
, v_codes
);
531 v_codes
= (p_codes
[curidx
] << count2
) | sign2
;
532 v_bits
= p_bits
[curidx
] + count2
;
533 put_bits(pb
, v_bits
, v_codes
);
537 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext
*s
,
538 PutBitContext
*pb
, const float *in
,
539 const float *scaled
, int size
, int scale_idx
,
540 int cb
, const float lambda
, const float uplim
,
543 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
545 int qc1
, qc2
, qc3
, qc4
;
547 uint8_t *p_bits
= (uint8_t*) ff_aac_spectral_bits
[cb
-1];
548 uint16_t *p_codes
= (uint16_t*)ff_aac_spectral_codes
[cb
-1];
550 abs_pow34_v(s
->scoefs
, in
, size
);
552 for (i
= 0; i
< size
; i
+= 4) {
553 int curidx
, sign1
, count1
, sign2
, count2
;
554 int *in_int
= (int *)&in
[i
];
556 unsigned int v_codes
;
558 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
559 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
560 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
561 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
565 ".set noreorder \n\t"
567 "ori $t4, $zero, 12 \n\t"
568 "ori %[sign1], $zero, 0 \n\t"
569 "ori %[sign2], $zero, 0 \n\t"
570 "slt $t0, $t4, %[qc1] \n\t"
571 "slt $t1, $t4, %[qc2] \n\t"
572 "slt $t2, $t4, %[qc3] \n\t"
573 "slt $t3, $t4, %[qc4] \n\t"
574 "movn %[qc1], $t4, $t0 \n\t"
575 "movn %[qc2], $t4, $t1 \n\t"
576 "movn %[qc3], $t4, $t2 \n\t"
577 "movn %[qc4], $t4, $t3 \n\t"
578 "lw $t0, 0(%[in_int]) \n\t"
579 "lw $t1, 4(%[in_int]) \n\t"
580 "lw $t2, 8(%[in_int]) \n\t"
581 "lw $t3, 12(%[in_int]) \n\t"
582 "slt $t0, $t0, $zero \n\t"
583 "movn %[sign1], $t0, %[qc1] \n\t"
584 "slt $t2, $t2, $zero \n\t"
585 "movn %[sign2], $t2, %[qc3] \n\t"
586 "slt $t1, $t1, $zero \n\t"
587 "sll $t0, %[sign1], 1 \n\t"
588 "or $t0, $t0, $t1 \n\t"
589 "movn %[sign1], $t0, %[qc2] \n\t"
590 "slt $t3, $t3, $zero \n\t"
591 "sll $t0, %[sign2], 1 \n\t"
592 "or $t0, $t0, $t3 \n\t"
593 "movn %[sign2], $t0, %[qc4] \n\t"
594 "slt %[count1], $zero, %[qc1] \n\t"
595 "slt $t1, $zero, %[qc2] \n\t"
596 "slt %[count2], $zero, %[qc3] \n\t"
597 "slt $t2, $zero, %[qc4] \n\t"
598 "addu %[count1], %[count1], $t1 \n\t"
599 "addu %[count2], %[count2], $t2 \n\t"
603 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
604 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
605 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
606 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
)
607 : [in_int
]"r"(in_int
)
608 : "t0", "t1", "t2", "t3", "t4",
615 v_codes
= (p_codes
[curidx
] << count1
) | sign1
;
616 v_bits
= p_bits
[curidx
] + count1
;
617 put_bits(pb
, v_bits
, v_codes
);
622 v_codes
= (p_codes
[curidx
] << count2
) | sign2
;
623 v_bits
= p_bits
[curidx
] + count2
;
624 put_bits(pb
, v_bits
, v_codes
);
628 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext
*s
,
629 PutBitContext
*pb
, const float *in
,
630 const float *scaled
, int size
, int scale_idx
,
631 int cb
, const float lambda
, const float uplim
,
634 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
636 int qc1
, qc2
, qc3
, qc4
;
638 uint8_t *p_bits
= (uint8_t* )ff_aac_spectral_bits
[cb
-1];
639 uint16_t *p_codes
= (uint16_t*)ff_aac_spectral_codes
[cb
-1];
640 float *p_vectors
= (float* )ff_aac_codebook_vectors
[cb
-1];
642 abs_pow34_v(s
->scoefs
, in
, size
);
646 for (i
= 0; i
< size
; i
+= 4) {
647 int curidx
, curidx2
, sign1
, count1
, sign2
, count2
;
648 int *in_int
= (int *)&in
[i
];
650 unsigned int v_codes
;
652 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
653 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
654 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
655 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
659 ".set noreorder \n\t"
661 "ori $t4, $zero, 16 \n\t"
662 "ori %[sign1], $zero, 0 \n\t"
663 "ori %[sign2], $zero, 0 \n\t"
664 "slt $t0, $t4, %[qc1] \n\t"
665 "slt $t1, $t4, %[qc2] \n\t"
666 "slt $t2, $t4, %[qc3] \n\t"
667 "slt $t3, $t4, %[qc4] \n\t"
668 "movn %[qc1], $t4, $t0 \n\t"
669 "movn %[qc2], $t4, $t1 \n\t"
670 "movn %[qc3], $t4, $t2 \n\t"
671 "movn %[qc4], $t4, $t3 \n\t"
672 "lw $t0, 0(%[in_int]) \n\t"
673 "lw $t1, 4(%[in_int]) \n\t"
674 "lw $t2, 8(%[in_int]) \n\t"
675 "lw $t3, 12(%[in_int]) \n\t"
676 "slt $t0, $t0, $zero \n\t"
677 "movn %[sign1], $t0, %[qc1] \n\t"
678 "slt $t2, $t2, $zero \n\t"
679 "movn %[sign2], $t2, %[qc3] \n\t"
680 "slt $t1, $t1, $zero \n\t"
681 "sll $t0, %[sign1], 1 \n\t"
682 "or $t0, $t0, $t1 \n\t"
683 "movn %[sign1], $t0, %[qc2] \n\t"
684 "slt $t3, $t3, $zero \n\t"
685 "sll $t0, %[sign2], 1 \n\t"
686 "or $t0, $t0, $t3 \n\t"
687 "movn %[sign2], $t0, %[qc4] \n\t"
688 "slt %[count1], $zero, %[qc1] \n\t"
689 "slt $t1, $zero, %[qc2] \n\t"
690 "slt %[count2], $zero, %[qc3] \n\t"
691 "slt $t2, $zero, %[qc4] \n\t"
692 "addu %[count1], %[count1], $t1 \n\t"
693 "addu %[count2], %[count2], $t2 \n\t"
697 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
698 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
699 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
700 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
)
701 : [in_int
]"r"(in_int
)
702 : "t0", "t1", "t2", "t3", "t4",
711 v_codes
= (p_codes
[curidx
] << count1
) | sign1
;
712 v_bits
= p_bits
[curidx
] + count1
;
713 put_bits(pb
, v_bits
, v_codes
);
715 v_codes
= (p_codes
[curidx2
] << count2
) | sign2
;
716 v_bits
= p_bits
[curidx2
] + count2
;
717 put_bits(pb
, v_bits
, v_codes
);
720 for (i
= 0; i
< size
; i
+= 4) {
721 int curidx
, curidx2
, sign1
, count1
, sign2
, count2
;
722 int *in_int
= (int *)&in
[i
];
724 unsigned int v_codes
;
727 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
728 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
729 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
730 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
734 ".set noreorder \n\t"
736 "ori $t4, $zero, 16 \n\t"
737 "ori %[sign1], $zero, 0 \n\t"
738 "ori %[sign2], $zero, 0 \n\t"
739 "shll_s.w %[c1], %[qc1], 18 \n\t"
740 "shll_s.w %[c2], %[qc2], 18 \n\t"
741 "shll_s.w %[c3], %[qc3], 18 \n\t"
742 "shll_s.w %[c4], %[qc4], 18 \n\t"
743 "srl %[c1], %[c1], 18 \n\t"
744 "srl %[c2], %[c2], 18 \n\t"
745 "srl %[c3], %[c3], 18 \n\t"
746 "srl %[c4], %[c4], 18 \n\t"
747 "slt $t0, $t4, %[qc1] \n\t"
748 "slt $t1, $t4, %[qc2] \n\t"
749 "slt $t2, $t4, %[qc3] \n\t"
750 "slt $t3, $t4, %[qc4] \n\t"
751 "movn %[qc1], $t4, $t0 \n\t"
752 "movn %[qc2], $t4, $t1 \n\t"
753 "movn %[qc3], $t4, $t2 \n\t"
754 "movn %[qc4], $t4, $t3 \n\t"
755 "lw $t0, 0(%[in_int]) \n\t"
756 "lw $t1, 4(%[in_int]) \n\t"
757 "lw $t2, 8(%[in_int]) \n\t"
758 "lw $t3, 12(%[in_int]) \n\t"
759 "slt $t0, $t0, $zero \n\t"
760 "movn %[sign1], $t0, %[qc1] \n\t"
761 "slt $t2, $t2, $zero \n\t"
762 "movn %[sign2], $t2, %[qc3] \n\t"
763 "slt $t1, $t1, $zero \n\t"
764 "sll $t0, %[sign1], 1 \n\t"
765 "or $t0, $t0, $t1 \n\t"
766 "movn %[sign1], $t0, %[qc2] \n\t"
767 "slt $t3, $t3, $zero \n\t"
768 "sll $t0, %[sign2], 1 \n\t"
769 "or $t0, $t0, $t3 \n\t"
770 "movn %[sign2], $t0, %[qc4] \n\t"
771 "slt %[count1], $zero, %[qc1] \n\t"
772 "slt $t1, $zero, %[qc2] \n\t"
773 "slt %[count2], $zero, %[qc3] \n\t"
774 "slt $t2, $zero, %[qc4] \n\t"
775 "addu %[count1], %[count1], $t1 \n\t"
776 "addu %[count2], %[count2], $t2 \n\t"
780 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
781 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
782 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
783 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
),
784 [c1
]"=&r"(c1
), [c2
]"=&r"(c2
),
785 [c3
]"=&r"(c3
), [c4
]"=&r"(c4
)
786 : [in_int
]"r"(in_int
)
787 : "t0", "t1", "t2", "t3", "t4",
797 v_codes
= (p_codes
[curidx
] << count1
) | sign1
;
798 v_bits
= p_bits
[curidx
] + count1
;
799 put_bits(pb
, v_bits
, v_codes
);
801 if (p_vectors
[curidx
*2 ] == 64.0f
) {
802 int len
= av_log2(c1
);
803 v_codes
= (((1 << (len
- 3)) - 2) << len
) | (c1
& ((1 << len
) - 1));
804 put_bits(pb
, len
* 2 - 3, v_codes
);
806 if (p_vectors
[curidx
*2+1] == 64.0f
) {
807 int len
= av_log2(c2
);
808 v_codes
= (((1 << (len
- 3)) - 2) << len
) | (c2
& ((1 << len
) - 1));
809 put_bits(pb
, len
*2-3, v_codes
);
812 v_codes
= (p_codes
[curidx2
] << count2
) | sign2
;
813 v_bits
= p_bits
[curidx2
] + count2
;
814 put_bits(pb
, v_bits
, v_codes
);
816 if (p_vectors
[curidx2
*2 ] == 64.0f
) {
817 int len
= av_log2(c3
);
818 v_codes
= (((1 << (len
- 3)) - 2) << len
) | (c3
& ((1 << len
) - 1));
819 put_bits(pb
, len
* 2 - 3, v_codes
);
821 if (p_vectors
[curidx2
*2+1] == 64.0f
) {
822 int len
= av_log2(c4
);
823 v_codes
= (((1 << (len
- 3)) - 2) << len
) | (c4
& ((1 << len
) - 1));
824 put_bits(pb
, len
* 2 - 3, v_codes
);
830 static void (*const quantize_and_encode_band_cost_arr
[])(struct AACEncContext
*s
,
831 PutBitContext
*pb
, const float *in
,
832 const float *scaled
, int size
, int scale_idx
,
833 int cb
, const float lambda
, const float uplim
,
836 quantize_and_encode_band_cost_SQUAD_mips
,
837 quantize_and_encode_band_cost_SQUAD_mips
,
838 quantize_and_encode_band_cost_UQUAD_mips
,
839 quantize_and_encode_band_cost_UQUAD_mips
,
840 quantize_and_encode_band_cost_SPAIR_mips
,
841 quantize_and_encode_band_cost_SPAIR_mips
,
842 quantize_and_encode_band_cost_UPAIR7_mips
,
843 quantize_and_encode_band_cost_UPAIR7_mips
,
844 quantize_and_encode_band_cost_UPAIR12_mips
,
845 quantize_and_encode_band_cost_UPAIR12_mips
,
846 quantize_and_encode_band_cost_ESC_mips
,
849 #define quantize_and_encode_band_cost( \
850 s, pb, in, scaled, size, scale_idx, cb, \
851 lambda, uplim, bits) \
852 quantize_and_encode_band_cost_arr[cb]( \
853 s, pb, in, scaled, size, scale_idx, cb, \
856 static void quantize_and_encode_band_mips(struct AACEncContext
*s
, PutBitContext
*pb
,
857 const float *in
, int size
, int scale_idx
,
858 int cb
, const float lambda
)
860 quantize_and_encode_band_cost(s
, pb
, in
, NULL
, size
, scale_idx
, cb
, lambda
,
865 * Functions developed from template function and optimized for getting the number of bits
867 static float get_band_numbits_ZERO_mips(struct AACEncContext
*s
,
868 PutBitContext
*pb
, const float *in
,
869 const float *scaled
, int size
, int scale_idx
,
870 int cb
, const float lambda
, const float uplim
,
876 static float get_band_numbits_SQUAD_mips(struct AACEncContext
*s
,
877 PutBitContext
*pb
, const float *in
,
878 const float *scaled
, int size
, int scale_idx
,
879 int cb
, const float lambda
, const float uplim
,
882 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
884 int qc1
, qc2
, qc3
, qc4
;
887 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
889 for (i
= 0; i
< size
; i
+= 4) {
891 int *in_int
= (int *)&in
[i
];
893 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
894 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
895 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
896 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
900 ".set noreorder \n\t"
902 "slt %[qc1], $zero, %[qc1] \n\t"
903 "slt %[qc2], $zero, %[qc2] \n\t"
904 "slt %[qc3], $zero, %[qc3] \n\t"
905 "slt %[qc4], $zero, %[qc4] \n\t"
906 "lw $t0, 0(%[in_int]) \n\t"
907 "lw $t1, 4(%[in_int]) \n\t"
908 "lw $t2, 8(%[in_int]) \n\t"
909 "lw $t3, 12(%[in_int]) \n\t"
910 "srl $t0, $t0, 31 \n\t"
911 "srl $t1, $t1, 31 \n\t"
912 "srl $t2, $t2, 31 \n\t"
913 "srl $t3, $t3, 31 \n\t"
914 "subu $t4, $zero, %[qc1] \n\t"
915 "subu $t5, $zero, %[qc2] \n\t"
916 "subu $t6, $zero, %[qc3] \n\t"
917 "subu $t7, $zero, %[qc4] \n\t"
918 "movn %[qc1], $t4, $t0 \n\t"
919 "movn %[qc2], $t5, $t1 \n\t"
920 "movn %[qc3], $t6, $t2 \n\t"
921 "movn %[qc4], $t7, $t3 \n\t"
925 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
926 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
927 : [in_int
]"r"(in_int
)
928 : "t0", "t1", "t2", "t3",
929 "t4", "t5", "t6", "t7",
942 curbits
+= p_bits
[curidx
];
947 static float get_band_numbits_UQUAD_mips(struct AACEncContext
*s
,
948 PutBitContext
*pb
, const float *in
,
949 const float *scaled
, int size
, int scale_idx
,
950 int cb
, const float lambda
, const float uplim
,
953 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
956 int qc1
, qc2
, qc3
, qc4
;
958 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
960 for (i
= 0; i
< size
; i
+= 4) {
963 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
964 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
965 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
966 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
970 ".set noreorder \n\t"
972 "ori $t4, $zero, 2 \n\t"
973 "slt $t0, $t4, %[qc1] \n\t"
974 "slt $t1, $t4, %[qc2] \n\t"
975 "slt $t2, $t4, %[qc3] \n\t"
976 "slt $t3, $t4, %[qc4] \n\t"
977 "movn %[qc1], $t4, $t0 \n\t"
978 "movn %[qc2], $t4, $t1 \n\t"
979 "movn %[qc3], $t4, $t2 \n\t"
980 "movn %[qc4], $t4, $t3 \n\t"
984 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
985 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
987 : "t0", "t1", "t2", "t3", "t4"
998 curbits
+= p_bits
[curidx
];
999 curbits
+= uquad_sign_bits
[curidx
];
1004 static float get_band_numbits_SPAIR_mips(struct AACEncContext
*s
,
1005 PutBitContext
*pb
, const float *in
,
1006 const float *scaled
, int size
, int scale_idx
,
1007 int cb
, const float lambda
, const float uplim
,
1010 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1012 int qc1
, qc2
, qc3
, qc4
;
1015 uint8_t *p_bits
= (uint8_t*)ff_aac_spectral_bits
[cb
-1];
1017 for (i
= 0; i
< size
; i
+= 4) {
1018 int curidx
, curidx2
;
1019 int *in_int
= (int *)&in
[i
];
1021 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1022 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1023 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1024 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1028 ".set noreorder \n\t"
1030 "ori $t4, $zero, 4 \n\t"
1031 "slt $t0, $t4, %[qc1] \n\t"
1032 "slt $t1, $t4, %[qc2] \n\t"
1033 "slt $t2, $t4, %[qc3] \n\t"
1034 "slt $t3, $t4, %[qc4] \n\t"
1035 "movn %[qc1], $t4, $t0 \n\t"
1036 "movn %[qc2], $t4, $t1 \n\t"
1037 "movn %[qc3], $t4, $t2 \n\t"
1038 "movn %[qc4], $t4, $t3 \n\t"
1039 "lw $t0, 0(%[in_int]) \n\t"
1040 "lw $t1, 4(%[in_int]) \n\t"
1041 "lw $t2, 8(%[in_int]) \n\t"
1042 "lw $t3, 12(%[in_int]) \n\t"
1043 "srl $t0, $t0, 31 \n\t"
1044 "srl $t1, $t1, 31 \n\t"
1045 "srl $t2, $t2, 31 \n\t"
1046 "srl $t3, $t3, 31 \n\t"
1047 "subu $t4, $zero, %[qc1] \n\t"
1048 "subu $t5, $zero, %[qc2] \n\t"
1049 "subu $t6, $zero, %[qc3] \n\t"
1050 "subu $t7, $zero, %[qc4] \n\t"
1051 "movn %[qc1], $t4, $t0 \n\t"
1052 "movn %[qc2], $t5, $t1 \n\t"
1053 "movn %[qc3], $t6, $t2 \n\t"
1054 "movn %[qc4], $t7, $t3 \n\t"
1058 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1059 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1060 : [in_int
]"r"(in_int
)
1061 : "t0", "t1", "t2", "t3",
1062 "t4", "t5", "t6", "t7",
1070 curidx2
+= qc4
+ 40;
1072 curbits
+= p_bits
[curidx
] + p_bits
[curidx2
];
1077 static float get_band_numbits_UPAIR7_mips(struct AACEncContext
*s
,
1078 PutBitContext
*pb
, const float *in
,
1079 const float *scaled
, int size
, int scale_idx
,
1080 int cb
, const float lambda
, const float uplim
,
1083 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1085 int qc1
, qc2
, qc3
, qc4
;
1088 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1090 for (i
= 0; i
< size
; i
+= 4) {
1091 int curidx
, curidx2
;
1093 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1094 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1095 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1096 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1100 ".set noreorder \n\t"
1102 "ori $t4, $zero, 7 \n\t"
1103 "slt $t0, $t4, %[qc1] \n\t"
1104 "slt $t1, $t4, %[qc2] \n\t"
1105 "slt $t2, $t4, %[qc3] \n\t"
1106 "slt $t3, $t4, %[qc4] \n\t"
1107 "movn %[qc1], $t4, $t0 \n\t"
1108 "movn %[qc2], $t4, $t1 \n\t"
1109 "movn %[qc3], $t4, $t2 \n\t"
1110 "movn %[qc4], $t4, $t3 \n\t"
1114 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1115 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1117 : "t0", "t1", "t2", "t3", "t4"
1126 curbits
+= p_bits
[curidx
] +
1127 upair7_sign_bits
[curidx
] +
1129 upair7_sign_bits
[curidx2
];
1134 static float get_band_numbits_UPAIR12_mips(struct AACEncContext
*s
,
1135 PutBitContext
*pb
, const float *in
,
1136 const float *scaled
, int size
, int scale_idx
,
1137 int cb
, const float lambda
, const float uplim
,
1140 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1142 int qc1
, qc2
, qc3
, qc4
;
1145 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1147 for (i
= 0; i
< size
; i
+= 4) {
1148 int curidx
, curidx2
;
1150 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1151 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1152 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1153 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1157 ".set noreorder \n\t"
1159 "ori $t4, $zero, 12 \n\t"
1160 "slt $t0, $t4, %[qc1] \n\t"
1161 "slt $t1, $t4, %[qc2] \n\t"
1162 "slt $t2, $t4, %[qc3] \n\t"
1163 "slt $t3, $t4, %[qc4] \n\t"
1164 "movn %[qc1], $t4, $t0 \n\t"
1165 "movn %[qc2], $t4, $t1 \n\t"
1166 "movn %[qc3], $t4, $t2 \n\t"
1167 "movn %[qc4], $t4, $t3 \n\t"
1171 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1172 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1174 : "t0", "t1", "t2", "t3", "t4"
1183 curbits
+= p_bits
[curidx
] +
1185 upair12_sign_bits
[curidx
] +
1186 upair12_sign_bits
[curidx2
];
1191 static float get_band_numbits_ESC_mips(struct AACEncContext
*s
,
1192 PutBitContext
*pb
, const float *in
,
1193 const float *scaled
, int size
, int scale_idx
,
1194 int cb
, const float lambda
, const float uplim
,
1197 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1199 int qc1
, qc2
, qc3
, qc4
;
1202 uint8_t *p_bits
= (uint8_t*)ff_aac_spectral_bits
[cb
-1];
1204 for (i
= 0; i
< size
; i
+= 4) {
1205 int curidx
, curidx2
;
1206 int cond0
, cond1
, cond2
, cond3
;
1209 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1210 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1211 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1212 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1216 ".set noreorder \n\t"
1218 "ori $t4, $zero, 15 \n\t"
1219 "ori $t5, $zero, 16 \n\t"
1220 "shll_s.w %[c1], %[qc1], 18 \n\t"
1221 "shll_s.w %[c2], %[qc2], 18 \n\t"
1222 "shll_s.w %[c3], %[qc3], 18 \n\t"
1223 "shll_s.w %[c4], %[qc4], 18 \n\t"
1224 "srl %[c1], %[c1], 18 \n\t"
1225 "srl %[c2], %[c2], 18 \n\t"
1226 "srl %[c3], %[c3], 18 \n\t"
1227 "srl %[c4], %[c4], 18 \n\t"
1228 "slt %[cond0], $t4, %[qc1] \n\t"
1229 "slt %[cond1], $t4, %[qc2] \n\t"
1230 "slt %[cond2], $t4, %[qc3] \n\t"
1231 "slt %[cond3], $t4, %[qc4] \n\t"
1232 "movn %[qc1], $t5, %[cond0] \n\t"
1233 "movn %[qc2], $t5, %[cond1] \n\t"
1234 "movn %[qc3], $t5, %[cond2] \n\t"
1235 "movn %[qc4], $t5, %[cond3] \n\t"
1236 "ori $t5, $zero, 31 \n\t"
1237 "clz %[c1], %[c1] \n\t"
1238 "clz %[c2], %[c2] \n\t"
1239 "clz %[c3], %[c3] \n\t"
1240 "clz %[c4], %[c4] \n\t"
1241 "subu %[c1], $t5, %[c1] \n\t"
1242 "subu %[c2], $t5, %[c2] \n\t"
1243 "subu %[c3], $t5, %[c3] \n\t"
1244 "subu %[c4], $t5, %[c4] \n\t"
1245 "sll %[c1], %[c1], 1 \n\t"
1246 "sll %[c2], %[c2], 1 \n\t"
1247 "sll %[c3], %[c3], 1 \n\t"
1248 "sll %[c4], %[c4], 1 \n\t"
1249 "addiu %[c1], %[c1], -3 \n\t"
1250 "addiu %[c2], %[c2], -3 \n\t"
1251 "addiu %[c3], %[c3], -3 \n\t"
1252 "addiu %[c4], %[c4], -3 \n\t"
1253 "subu %[cond0], $zero, %[cond0] \n\t"
1254 "subu %[cond1], $zero, %[cond1] \n\t"
1255 "subu %[cond2], $zero, %[cond2] \n\t"
1256 "subu %[cond3], $zero, %[cond3] \n\t"
1257 "and %[c1], %[c1], %[cond0] \n\t"
1258 "and %[c2], %[c2], %[cond1] \n\t"
1259 "and %[c3], %[c3], %[cond2] \n\t"
1260 "and %[c4], %[c4], %[cond3] \n\t"
1264 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1265 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
1266 [cond0
]"=&r"(cond0
), [cond1
]"=&r"(cond1
),
1267 [cond2
]"=&r"(cond2
), [cond3
]"=&r"(cond3
),
1268 [c1
]"=&r"(c1
), [c2
]"=&r"(c2
),
1269 [c3
]"=&r"(c3
), [c4
]"=&r"(c4
)
1280 curbits
+= p_bits
[curidx
];
1281 curbits
+= esc_sign_bits
[curidx
];
1282 curbits
+= p_bits
[curidx2
];
1283 curbits
+= esc_sign_bits
[curidx2
];
1293 static float (*const get_band_numbits_arr
[])(struct AACEncContext
*s
,
1294 PutBitContext
*pb
, const float *in
,
1295 const float *scaled
, int size
, int scale_idx
,
1296 int cb
, const float lambda
, const float uplim
,
1298 get_band_numbits_ZERO_mips
,
1299 get_band_numbits_SQUAD_mips
,
1300 get_band_numbits_SQUAD_mips
,
1301 get_band_numbits_UQUAD_mips
,
1302 get_band_numbits_UQUAD_mips
,
1303 get_band_numbits_SPAIR_mips
,
1304 get_band_numbits_SPAIR_mips
,
1305 get_band_numbits_UPAIR7_mips
,
1306 get_band_numbits_UPAIR7_mips
,
1307 get_band_numbits_UPAIR12_mips
,
1308 get_band_numbits_UPAIR12_mips
,
1309 get_band_numbits_ESC_mips
,
1312 #define get_band_numbits( \
1313 s, pb, in, scaled, size, scale_idx, cb, \
1314 lambda, uplim, bits) \
1315 get_band_numbits_arr[cb]( \
1316 s, pb, in, scaled, size, scale_idx, cb, \
1317 lambda, uplim, bits)
1319 static float quantize_band_cost_bits(struct AACEncContext
*s
, const float *in
,
1320 const float *scaled
, int size
, int scale_idx
,
1321 int cb
, const float lambda
, const float uplim
,
1324 return get_band_numbits(s
, NULL
, in
, scaled
, size
, scale_idx
, cb
, lambda
, uplim
, bits
);
1328 * Functions developed from template function and optimized for getting the band cost
1331 static float get_band_cost_ZERO_mips(struct AACEncContext
*s
,
1332 PutBitContext
*pb
, const float *in
,
1333 const float *scaled
, int size
, int scale_idx
,
1334 int cb
, const float lambda
, const float uplim
,
1340 for (i
= 0; i
< size
; i
+= 4) {
1341 cost
+= in
[i
] * in
[i
];
1342 cost
+= in
[i
+1] * in
[i
+1];
1343 cost
+= in
[i
+2] * in
[i
+2];
1344 cost
+= in
[i
+3] * in
[i
+3];
1348 return cost
* lambda
;
1351 static float get_band_cost_SQUAD_mips(struct AACEncContext
*s
,
1352 PutBitContext
*pb
, const float *in
,
1353 const float *scaled
, int size
, int scale_idx
,
1354 int cb
, const float lambda
, const float uplim
,
1357 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1358 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1361 int qc1
, qc2
, qc3
, qc4
;
1364 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1365 float *p_codes
= (float *)ff_aac_codebook_vectors
[cb
-1];
1367 for (i
= 0; i
< size
; i
+= 4) {
1370 int *in_int
= (int *)&in
[i
];
1371 float *in_pos
= (float *)&in
[i
];
1372 float di0
, di1
, di2
, di3
;
1374 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1375 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1376 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1377 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1381 ".set noreorder \n\t"
1383 "slt %[qc1], $zero, %[qc1] \n\t"
1384 "slt %[qc2], $zero, %[qc2] \n\t"
1385 "slt %[qc3], $zero, %[qc3] \n\t"
1386 "slt %[qc4], $zero, %[qc4] \n\t"
1387 "lw $t0, 0(%[in_int]) \n\t"
1388 "lw $t1, 4(%[in_int]) \n\t"
1389 "lw $t2, 8(%[in_int]) \n\t"
1390 "lw $t3, 12(%[in_int]) \n\t"
1391 "srl $t0, $t0, 31 \n\t"
1392 "srl $t1, $t1, 31 \n\t"
1393 "srl $t2, $t2, 31 \n\t"
1394 "srl $t3, $t3, 31 \n\t"
1395 "subu $t4, $zero, %[qc1] \n\t"
1396 "subu $t5, $zero, %[qc2] \n\t"
1397 "subu $t6, $zero, %[qc3] \n\t"
1398 "subu $t7, $zero, %[qc4] \n\t"
1399 "movn %[qc1], $t4, $t0 \n\t"
1400 "movn %[qc2], $t5, $t1 \n\t"
1401 "movn %[qc3], $t6, $t2 \n\t"
1402 "movn %[qc4], $t7, $t3 \n\t"
1406 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1407 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1408 : [in_int
]"r"(in_int
)
1409 : "t0", "t1", "t2", "t3",
1410 "t4", "t5", "t6", "t7",
1423 curbits
+= p_bits
[curidx
];
1424 vec
= &p_codes
[curidx
*4];
1428 ".set noreorder \n\t"
1430 "lwc1 $f0, 0(%[in_pos]) \n\t"
1431 "lwc1 $f1, 0(%[vec]) \n\t"
1432 "lwc1 $f2, 4(%[in_pos]) \n\t"
1433 "lwc1 $f3, 4(%[vec]) \n\t"
1434 "lwc1 $f4, 8(%[in_pos]) \n\t"
1435 "lwc1 $f5, 8(%[vec]) \n\t"
1436 "lwc1 $f6, 12(%[in_pos]) \n\t"
1437 "lwc1 $f7, 12(%[vec]) \n\t"
1438 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1439 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1440 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1441 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1445 : [di0
]"=&f"(di0
), [di1
]"=&f"(di1
),
1446 [di2
]"=&f"(di2
), [di3
]"=&f"(di3
)
1447 : [in_pos
]"r"(in_pos
), [vec
]"r"(vec
),
1449 : "$f0", "$f1", "$f2", "$f3",
1450 "$f4", "$f5", "$f6", "$f7",
1454 cost
+= di0
* di0
+ di1
* di1
1455 + di2
* di2
+ di3
* di3
;
1460 return cost
* lambda
+ curbits
;
1463 static float get_band_cost_UQUAD_mips(struct AACEncContext
*s
,
1464 PutBitContext
*pb
, const float *in
,
1465 const float *scaled
, int size
, int scale_idx
,
1466 int cb
, const float lambda
, const float uplim
,
1469 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1470 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1474 int qc1
, qc2
, qc3
, qc4
;
1476 uint8_t *p_bits
= (uint8_t*)ff_aac_spectral_bits
[cb
-1];
1477 float *p_codes
= (float *)ff_aac_codebook_vectors
[cb
-1];
1479 for (i
= 0; i
< size
; i
+= 4) {
1482 float *in_pos
= (float *)&in
[i
];
1483 float di0
, di1
, di2
, di3
;
1485 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1486 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1487 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1488 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1492 ".set noreorder \n\t"
1494 "ori $t4, $zero, 2 \n\t"
1495 "slt $t0, $t4, %[qc1] \n\t"
1496 "slt $t1, $t4, %[qc2] \n\t"
1497 "slt $t2, $t4, %[qc3] \n\t"
1498 "slt $t3, $t4, %[qc4] \n\t"
1499 "movn %[qc1], $t4, $t0 \n\t"
1500 "movn %[qc2], $t4, $t1 \n\t"
1501 "movn %[qc3], $t4, $t2 \n\t"
1502 "movn %[qc4], $t4, $t3 \n\t"
1506 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1507 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1509 : "t0", "t1", "t2", "t3", "t4"
1520 curbits
+= p_bits
[curidx
];
1521 curbits
+= uquad_sign_bits
[curidx
];
1522 vec
= &p_codes
[curidx
*4];
1526 ".set noreorder \n\t"
1528 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1529 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1530 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1531 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1532 "abs.s %[di0], %[di0] \n\t"
1533 "abs.s %[di1], %[di1] \n\t"
1534 "abs.s %[di2], %[di2] \n\t"
1535 "abs.s %[di3], %[di3] \n\t"
1536 "lwc1 $f0, 0(%[vec]) \n\t"
1537 "lwc1 $f1, 4(%[vec]) \n\t"
1538 "lwc1 $f2, 8(%[vec]) \n\t"
1539 "lwc1 $f3, 12(%[vec]) \n\t"
1540 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1541 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1542 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1543 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1547 : [di0
]"=&f"(di0
), [di1
]"=&f"(di1
),
1548 [di2
]"=&f"(di2
), [di3
]"=&f"(di3
)
1549 : [in_pos
]"r"(in_pos
), [vec
]"r"(vec
),
1551 : "$f0", "$f1", "$f2", "$f3",
1555 cost
+= di0
* di0
+ di1
* di1
1556 + di2
* di2
+ di3
* di3
;
1561 return cost
* lambda
+ curbits
;
1564 static float get_band_cost_SPAIR_mips(struct AACEncContext
*s
,
1565 PutBitContext
*pb
, const float *in
,
1566 const float *scaled
, int size
, int scale_idx
,
1567 int cb
, const float lambda
, const float uplim
,
1570 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1571 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1574 int qc1
, qc2
, qc3
, qc4
;
1577 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1578 float *p_codes
= (float *)ff_aac_codebook_vectors
[cb
-1];
1580 for (i
= 0; i
< size
; i
+= 4) {
1581 const float *vec
, *vec2
;
1582 int curidx
, curidx2
;
1583 int *in_int
= (int *)&in
[i
];
1584 float *in_pos
= (float *)&in
[i
];
1585 float di0
, di1
, di2
, di3
;
1587 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1588 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1589 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1590 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1594 ".set noreorder \n\t"
1596 "ori $t4, $zero, 4 \n\t"
1597 "slt $t0, $t4, %[qc1] \n\t"
1598 "slt $t1, $t4, %[qc2] \n\t"
1599 "slt $t2, $t4, %[qc3] \n\t"
1600 "slt $t3, $t4, %[qc4] \n\t"
1601 "movn %[qc1], $t4, $t0 \n\t"
1602 "movn %[qc2], $t4, $t1 \n\t"
1603 "movn %[qc3], $t4, $t2 \n\t"
1604 "movn %[qc4], $t4, $t3 \n\t"
1605 "lw $t0, 0(%[in_int]) \n\t"
1606 "lw $t1, 4(%[in_int]) \n\t"
1607 "lw $t2, 8(%[in_int]) \n\t"
1608 "lw $t3, 12(%[in_int]) \n\t"
1609 "srl $t0, $t0, 31 \n\t"
1610 "srl $t1, $t1, 31 \n\t"
1611 "srl $t2, $t2, 31 \n\t"
1612 "srl $t3, $t3, 31 \n\t"
1613 "subu $t4, $zero, %[qc1] \n\t"
1614 "subu $t5, $zero, %[qc2] \n\t"
1615 "subu $t6, $zero, %[qc3] \n\t"
1616 "subu $t7, $zero, %[qc4] \n\t"
1617 "movn %[qc1], $t4, $t0 \n\t"
1618 "movn %[qc2], $t5, $t1 \n\t"
1619 "movn %[qc3], $t6, $t2 \n\t"
1620 "movn %[qc4], $t7, $t3 \n\t"
1624 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1625 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
)
1626 : [in_int
]"r"(in_int
)
1627 : "t0", "t1", "t2", "t3",
1628 "t4", "t5", "t6", "t7",
1636 curidx2
+= qc4
+ 40;
1638 curbits
+= p_bits
[curidx
];
1639 curbits
+= p_bits
[curidx2
];
1641 vec
= &p_codes
[curidx
*2];
1642 vec2
= &p_codes
[curidx2
*2];
1646 ".set noreorder \n\t"
1648 "lwc1 $f0, 0(%[in_pos]) \n\t"
1649 "lwc1 $f1, 0(%[vec]) \n\t"
1650 "lwc1 $f2, 4(%[in_pos]) \n\t"
1651 "lwc1 $f3, 4(%[vec]) \n\t"
1652 "lwc1 $f4, 8(%[in_pos]) \n\t"
1653 "lwc1 $f5, 0(%[vec2]) \n\t"
1654 "lwc1 $f6, 12(%[in_pos]) \n\t"
1655 "lwc1 $f7, 4(%[vec2]) \n\t"
1656 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1657 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1658 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1659 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1663 : [di0
]"=&f"(di0
), [di1
]"=&f"(di1
),
1664 [di2
]"=&f"(di2
), [di3
]"=&f"(di3
)
1665 : [in_pos
]"r"(in_pos
), [vec
]"r"(vec
),
1666 [vec2
]"r"(vec2
), [IQ
]"f"(IQ
)
1667 : "$f0", "$f1", "$f2", "$f3",
1668 "$f4", "$f5", "$f6", "$f7",
1672 cost
+= di0
* di0
+ di1
* di1
1673 + di2
* di2
+ di3
* di3
;
1678 return cost
* lambda
+ curbits
;
1681 static float get_band_cost_UPAIR7_mips(struct AACEncContext
*s
,
1682 PutBitContext
*pb
, const float *in
,
1683 const float *scaled
, int size
, int scale_idx
,
1684 int cb
, const float lambda
, const float uplim
,
1687 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1688 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1691 int qc1
, qc2
, qc3
, qc4
;
1694 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1695 float *p_codes
= (float *)ff_aac_codebook_vectors
[cb
-1];
1697 for (i
= 0; i
< size
; i
+= 4) {
1698 const float *vec
, *vec2
;
1699 int curidx
, curidx2
, sign1
, count1
, sign2
, count2
;
1700 int *in_int
= (int *)&in
[i
];
1701 float *in_pos
= (float *)&in
[i
];
1702 float di0
, di1
, di2
, di3
;
1704 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1705 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1706 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1707 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1711 ".set noreorder \n\t"
1713 "ori $t4, $zero, 7 \n\t"
1714 "ori %[sign1], $zero, 0 \n\t"
1715 "ori %[sign2], $zero, 0 \n\t"
1716 "slt $t0, $t4, %[qc1] \n\t"
1717 "slt $t1, $t4, %[qc2] \n\t"
1718 "slt $t2, $t4, %[qc3] \n\t"
1719 "slt $t3, $t4, %[qc4] \n\t"
1720 "movn %[qc1], $t4, $t0 \n\t"
1721 "movn %[qc2], $t4, $t1 \n\t"
1722 "movn %[qc3], $t4, $t2 \n\t"
1723 "movn %[qc4], $t4, $t3 \n\t"
1724 "lw $t0, 0(%[in_int]) \n\t"
1725 "lw $t1, 4(%[in_int]) \n\t"
1726 "lw $t2, 8(%[in_int]) \n\t"
1727 "lw $t3, 12(%[in_int]) \n\t"
1728 "slt $t0, $t0, $zero \n\t"
1729 "movn %[sign1], $t0, %[qc1] \n\t"
1730 "slt $t2, $t2, $zero \n\t"
1731 "movn %[sign2], $t2, %[qc3] \n\t"
1732 "slt $t1, $t1, $zero \n\t"
1733 "sll $t0, %[sign1], 1 \n\t"
1734 "or $t0, $t0, $t1 \n\t"
1735 "movn %[sign1], $t0, %[qc2] \n\t"
1736 "slt $t3, $t3, $zero \n\t"
1737 "sll $t0, %[sign2], 1 \n\t"
1738 "or $t0, $t0, $t3 \n\t"
1739 "movn %[sign2], $t0, %[qc4] \n\t"
1740 "slt %[count1], $zero, %[qc1] \n\t"
1741 "slt $t1, $zero, %[qc2] \n\t"
1742 "slt %[count2], $zero, %[qc3] \n\t"
1743 "slt $t2, $zero, %[qc4] \n\t"
1744 "addu %[count1], %[count1], $t1 \n\t"
1745 "addu %[count2], %[count2], $t2 \n\t"
1749 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1750 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
1751 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
1752 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
)
1753 : [in_int
]"r"(in_int
)
1754 : "t0", "t1", "t2", "t3", "t4",
1764 curbits
+= p_bits
[curidx
];
1765 curbits
+= upair7_sign_bits
[curidx
];
1766 vec
= &p_codes
[curidx
*2];
1768 curbits
+= p_bits
[curidx2
];
1769 curbits
+= upair7_sign_bits
[curidx2
];
1770 vec2
= &p_codes
[curidx2
*2];
1774 ".set noreorder \n\t"
1776 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1777 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1778 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1779 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1780 "abs.s %[di0], %[di0] \n\t"
1781 "abs.s %[di1], %[di1] \n\t"
1782 "abs.s %[di2], %[di2] \n\t"
1783 "abs.s %[di3], %[di3] \n\t"
1784 "lwc1 $f0, 0(%[vec]) \n\t"
1785 "lwc1 $f1, 4(%[vec]) \n\t"
1786 "lwc1 $f2, 0(%[vec2]) \n\t"
1787 "lwc1 $f3, 4(%[vec2]) \n\t"
1788 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1789 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1790 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1791 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1795 : [di0
]"=&f"(di0
), [di1
]"=&f"(di1
),
1796 [di2
]"=&f"(di2
), [di3
]"=&f"(di3
)
1797 : [in_pos
]"r"(in_pos
), [vec
]"r"(vec
),
1798 [vec2
]"r"(vec2
), [IQ
]"f"(IQ
)
1799 : "$f0", "$f1", "$f2", "$f3",
1803 cost
+= di0
* di0
+ di1
* di1
1804 + di2
* di2
+ di3
* di3
;
1809 return cost
* lambda
+ curbits
;
1812 static float get_band_cost_UPAIR12_mips(struct AACEncContext
*s
,
1813 PutBitContext
*pb
, const float *in
,
1814 const float *scaled
, int size
, int scale_idx
,
1815 int cb
, const float lambda
, const float uplim
,
1818 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1819 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1822 int qc1
, qc2
, qc3
, qc4
;
1825 uint8_t *p_bits
= (uint8_t *)ff_aac_spectral_bits
[cb
-1];
1826 float *p_codes
= (float *)ff_aac_codebook_vectors
[cb
-1];
1828 for (i
= 0; i
< size
; i
+= 4) {
1829 const float *vec
, *vec2
;
1830 int curidx
, curidx2
;
1831 int sign1
, count1
, sign2
, count2
;
1832 int *in_int
= (int *)&in
[i
];
1833 float *in_pos
= (float *)&in
[i
];
1834 float di0
, di1
, di2
, di3
;
1836 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1837 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1838 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1839 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1843 ".set noreorder \n\t"
1845 "ori $t4, $zero, 12 \n\t"
1846 "ori %[sign1], $zero, 0 \n\t"
1847 "ori %[sign2], $zero, 0 \n\t"
1848 "slt $t0, $t4, %[qc1] \n\t"
1849 "slt $t1, $t4, %[qc2] \n\t"
1850 "slt $t2, $t4, %[qc3] \n\t"
1851 "slt $t3, $t4, %[qc4] \n\t"
1852 "movn %[qc1], $t4, $t0 \n\t"
1853 "movn %[qc2], $t4, $t1 \n\t"
1854 "movn %[qc3], $t4, $t2 \n\t"
1855 "movn %[qc4], $t4, $t3 \n\t"
1856 "lw $t0, 0(%[in_int]) \n\t"
1857 "lw $t1, 4(%[in_int]) \n\t"
1858 "lw $t2, 8(%[in_int]) \n\t"
1859 "lw $t3, 12(%[in_int]) \n\t"
1860 "slt $t0, $t0, $zero \n\t"
1861 "movn %[sign1], $t0, %[qc1] \n\t"
1862 "slt $t2, $t2, $zero \n\t"
1863 "movn %[sign2], $t2, %[qc3] \n\t"
1864 "slt $t1, $t1, $zero \n\t"
1865 "sll $t0, %[sign1], 1 \n\t"
1866 "or $t0, $t0, $t1 \n\t"
1867 "movn %[sign1], $t0, %[qc2] \n\t"
1868 "slt $t3, $t3, $zero \n\t"
1869 "sll $t0, %[sign2], 1 \n\t"
1870 "or $t0, $t0, $t3 \n\t"
1871 "movn %[sign2], $t0, %[qc4] \n\t"
1872 "slt %[count1], $zero, %[qc1] \n\t"
1873 "slt $t1, $zero, %[qc2] \n\t"
1874 "slt %[count2], $zero, %[qc3] \n\t"
1875 "slt $t2, $zero, %[qc4] \n\t"
1876 "addu %[count1], %[count1], $t1 \n\t"
1877 "addu %[count2], %[count2], $t2 \n\t"
1881 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1882 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
1883 [sign1
]"=&r"(sign1
), [count1
]"=&r"(count1
),
1884 [sign2
]"=&r"(sign2
), [count2
]"=&r"(count2
)
1885 : [in_int
]"r"(in_int
)
1886 : "t0", "t1", "t2", "t3", "t4",
1896 curbits
+= p_bits
[curidx
];
1897 curbits
+= p_bits
[curidx2
];
1898 curbits
+= upair12_sign_bits
[curidx
];
1899 curbits
+= upair12_sign_bits
[curidx2
];
1900 vec
= &p_codes
[curidx
*2];
1901 vec2
= &p_codes
[curidx2
*2];
1905 ".set noreorder \n\t"
1907 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1908 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1909 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1910 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1911 "abs.s %[di0], %[di0] \n\t"
1912 "abs.s %[di1], %[di1] \n\t"
1913 "abs.s %[di2], %[di2] \n\t"
1914 "abs.s %[di3], %[di3] \n\t"
1915 "lwc1 $f0, 0(%[vec]) \n\t"
1916 "lwc1 $f1, 4(%[vec]) \n\t"
1917 "lwc1 $f2, 0(%[vec2]) \n\t"
1918 "lwc1 $f3, 4(%[vec2]) \n\t"
1919 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1920 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1921 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1922 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1926 : [di0
]"=&f"(di0
), [di1
]"=&f"(di1
),
1927 [di2
]"=&f"(di2
), [di3
]"=&f"(di3
)
1928 : [in_pos
]"r"(in_pos
), [vec
]"r"(vec
),
1929 [vec2
]"r"(vec2
), [IQ
]"f"(IQ
)
1930 : "$f0", "$f1", "$f2", "$f3",
1934 cost
+= di0
* di0
+ di1
* di1
1935 + di2
* di2
+ di3
* di3
;
1940 return cost
* lambda
+ curbits
;
1943 static float get_band_cost_ESC_mips(struct AACEncContext
*s
,
1944 PutBitContext
*pb
, const float *in
,
1945 const float *scaled
, int size
, int scale_idx
,
1946 int cb
, const float lambda
, const float uplim
,
1949 const float Q34
= ff_aac_pow34sf_tab
[POW_SF2_ZERO
- scale_idx
+ SCALE_ONE_POS
- SCALE_DIV_512
];
1950 const float IQ
= ff_aac_pow2sf_tab
[POW_SF2_ZERO
+ scale_idx
- SCALE_ONE_POS
+ SCALE_DIV_512
];
1951 const float CLIPPED_ESCAPE
= 165140.0f
* IQ
;
1954 int qc1
, qc2
, qc3
, qc4
;
1957 uint8_t *p_bits
= (uint8_t*)ff_aac_spectral_bits
[cb
-1];
1958 float *p_codes
= (float* )ff_aac_codebook_vectors
[cb
-1];
1960 for (i
= 0; i
< size
; i
+= 4) {
1961 const float *vec
, *vec2
;
1962 int curidx
, curidx2
;
1963 float t1
, t2
, t3
, t4
;
1964 float di1
, di2
, di3
, di4
;
1965 int cond0
, cond1
, cond2
, cond3
;
1968 qc1
= scaled
[i
] * Q34
+ 0.4054f
;
1969 qc2
= scaled
[i
+1] * Q34
+ 0.4054f
;
1970 qc3
= scaled
[i
+2] * Q34
+ 0.4054f
;
1971 qc4
= scaled
[i
+3] * Q34
+ 0.4054f
;
1975 ".set noreorder \n\t"
1977 "ori $t4, $zero, 15 \n\t"
1978 "ori $t5, $zero, 16 \n\t"
1979 "shll_s.w %[c1], %[qc1], 18 \n\t"
1980 "shll_s.w %[c2], %[qc2], 18 \n\t"
1981 "shll_s.w %[c3], %[qc3], 18 \n\t"
1982 "shll_s.w %[c4], %[qc4], 18 \n\t"
1983 "srl %[c1], %[c1], 18 \n\t"
1984 "srl %[c2], %[c2], 18 \n\t"
1985 "srl %[c3], %[c3], 18 \n\t"
1986 "srl %[c4], %[c4], 18 \n\t"
1987 "slt %[cond0], $t4, %[qc1] \n\t"
1988 "slt %[cond1], $t4, %[qc2] \n\t"
1989 "slt %[cond2], $t4, %[qc3] \n\t"
1990 "slt %[cond3], $t4, %[qc4] \n\t"
1991 "movn %[qc1], $t5, %[cond0] \n\t"
1992 "movn %[qc2], $t5, %[cond1] \n\t"
1993 "movn %[qc3], $t5, %[cond2] \n\t"
1994 "movn %[qc4], $t5, %[cond3] \n\t"
1998 : [qc1
]"+r"(qc1
), [qc2
]"+r"(qc2
),
1999 [qc3
]"+r"(qc3
), [qc4
]"+r"(qc4
),
2000 [cond0
]"=&r"(cond0
), [cond1
]"=&r"(cond1
),
2001 [cond2
]"=&r"(cond2
), [cond3
]"=&r"(cond3
),
2002 [c1
]"=&r"(c1
), [c2
]"=&r"(c2
),
2003 [c3
]"=&r"(c3
), [c4
]"=&r"(c4
)
2014 curbits
+= p_bits
[curidx
];
2015 curbits
+= esc_sign_bits
[curidx
];
2016 vec
= &p_codes
[curidx
*2];
2018 curbits
+= p_bits
[curidx2
];
2019 curbits
+= esc_sign_bits
[curidx2
];
2020 vec2
= &p_codes
[curidx2
*2];
2022 curbits
+= (av_log2(c1
) * 2 - 3) & (-cond0
);
2023 curbits
+= (av_log2(c2
) * 2 - 3) & (-cond1
);
2024 curbits
+= (av_log2(c3
) * 2 - 3) & (-cond2
);
2025 curbits
+= (av_log2(c4
) * 2 - 3) & (-cond3
);
2028 t2
= fabsf(in
[i
+1]);
2029 t3
= fabsf(in
[i
+2]);
2030 t4
= fabsf(in
[i
+3]);
2033 if (t1
>= CLIPPED_ESCAPE
) {
2034 di1
= t1
- CLIPPED_ESCAPE
;
2036 di1
= t1
- c1
* cbrtf(c1
) * IQ
;
2039 di1
= t1
- vec
[0] * IQ
;
2042 if (t2
>= CLIPPED_ESCAPE
) {
2043 di2
= t2
- CLIPPED_ESCAPE
;
2045 di2
= t2
- c2
* cbrtf(c2
) * IQ
;
2048 di2
= t2
- vec
[1] * IQ
;
2051 if (t3
>= CLIPPED_ESCAPE
) {
2052 di3
= t3
- CLIPPED_ESCAPE
;
2054 di3
= t3
- c3
* cbrtf(c3
) * IQ
;
2057 di3
= t3
- vec2
[0] * IQ
;
2060 if (t4
>= CLIPPED_ESCAPE
) {
2061 di4
= t4
- CLIPPED_ESCAPE
;
2063 di4
= t4
- c4
* cbrtf(c4
) * IQ
;
2066 di4
= t4
- vec2
[1]*IQ
;
2068 cost
+= di1
* di1
+ di2
* di2
2069 + di3
* di3
+ di4
* di4
;
2074 return cost
* lambda
+ curbits
;
2077 static float (*const get_band_cost_arr
[])(struct AACEncContext
*s
,
2078 PutBitContext
*pb
, const float *in
,
2079 const float *scaled
, int size
, int scale_idx
,
2080 int cb
, const float lambda
, const float uplim
,
2082 get_band_cost_ZERO_mips
,
2083 get_band_cost_SQUAD_mips
,
2084 get_band_cost_SQUAD_mips
,
2085 get_band_cost_UQUAD_mips
,
2086 get_band_cost_UQUAD_mips
,
2087 get_band_cost_SPAIR_mips
,
2088 get_band_cost_SPAIR_mips
,
2089 get_band_cost_UPAIR7_mips
,
2090 get_band_cost_UPAIR7_mips
,
2091 get_band_cost_UPAIR12_mips
,
2092 get_band_cost_UPAIR12_mips
,
2093 get_band_cost_ESC_mips
,
2096 #define get_band_cost( \
2097 s, pb, in, scaled, size, scale_idx, cb, \
2098 lambda, uplim, bits) \
2099 get_band_cost_arr[cb]( \
2100 s, pb, in, scaled, size, scale_idx, cb, \
2101 lambda, uplim, bits)
2103 static float quantize_band_cost(struct AACEncContext
*s
, const float *in
,
2104 const float *scaled
, int size
, int scale_idx
,
2105 int cb
, const float lambda
, const float uplim
,
2108 return get_band_cost(s
, NULL
, in
, scaled
, size
, scale_idx
, cb
, lambda
, uplim
, bits
);
2111 static void search_for_quantizers_twoloop_mips(AVCodecContext
*avctx
,
2113 SingleChannelElement
*sce
,
2116 int start
= 0, i
, w
, w2
, g
;
2117 int destbits
= avctx
->bit_rate
* 1024.0 / avctx
->sample_rate
/ avctx
->channels
;
2118 float dists
[128] = { 0 }, uplims
[128];
2120 int fflag
, minscaler
;
2123 float minthr
= INFINITY
;
2125 destbits
= FFMIN(destbits
, 5800);
2126 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2127 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2130 for (w2
= 0; w2
< sce
->ics
.group_len
[w
]; w2
++) {
2131 FFPsyBand
*band
= &s
->psy
.ch
[s
->cur_channel
].psy_bands
[(w
+w2
)*16+g
];
2132 uplim
+= band
->threshold
;
2133 if (band
->energy
<= band
->threshold
|| band
->threshold
== 0.0f
) {
2134 sce
->zeroes
[(w
+w2
)*16+g
] = 1;
2139 uplims
[w
*16+g
] = uplim
*512;
2140 sce
->zeroes
[w
*16+g
] = !nz
;
2142 minthr
= FFMIN(minthr
, uplim
);
2146 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2147 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2148 if (sce
->zeroes
[w
*16+g
]) {
2149 sce
->sf_idx
[w
*16+g
] = SCALE_ONE_POS
;
2152 sce
->sf_idx
[w
*16+g
] = SCALE_ONE_POS
+ FFMIN(log2f(uplims
[w
*16+g
]/minthr
)*4,59);
2158 abs_pow34_v(s
->scoefs
, sce
->coeffs
, 1024);
2160 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2162 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2163 const float *scaled
= s
->scoefs
+ start
;
2164 maxvals
[w
*16+g
] = find_max_val(sce
->ics
.group_len
[w
], sce
->ics
.swb_sizes
[g
], scaled
);
2165 start
+= sce
->ics
.swb_sizes
[g
];
2171 minscaler
= sce
->sf_idx
[0];
2172 qstep
= its
? 1 : 32;
2179 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2181 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2182 const float *coefs
= sce
->coeffs
+ start
;
2183 const float *scaled
= s
->scoefs
+ start
;
2187 if (sce
->zeroes
[w
*16+g
] || sce
->sf_idx
[w
*16+g
] >= 218) {
2188 start
+= sce
->ics
.swb_sizes
[g
];
2191 minscaler
= FFMIN(minscaler
, sce
->sf_idx
[w
*16+g
]);
2192 cb
= find_min_book(maxvals
[w
*16+g
], sce
->sf_idx
[w
*16+g
]);
2193 for (w2
= 0; w2
< sce
->ics
.group_len
[w
]; w2
++) {
2195 bits
+= quantize_band_cost_bits(s
, coefs
+ w2
*128,
2197 sce
->ics
.swb_sizes
[g
],
2198 sce
->sf_idx
[w
*16+g
],
2205 bits
+= ff_aac_scalefactor_bits
[sce
->sf_idx
[w
*16+g
] - prev
+ SCALE_DIFF_ZERO
];
2208 start
+= sce
->ics
.swb_sizes
[g
];
2209 prev
= sce
->sf_idx
[w
*16+g
];
2214 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2216 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2217 const float *coefs
= sce
->coeffs
+ start
;
2218 const float *scaled
= s
->scoefs
+ start
;
2223 if (sce
->zeroes
[w
*16+g
] || sce
->sf_idx
[w
*16+g
] >= 218) {
2224 start
+= sce
->ics
.swb_sizes
[g
];
2227 minscaler
= FFMIN(minscaler
, sce
->sf_idx
[w
*16+g
]);
2228 cb
= find_min_book(maxvals
[w
*16+g
], sce
->sf_idx
[w
*16+g
]);
2229 for (w2
= 0; w2
< sce
->ics
.group_len
[w
]; w2
++) {
2231 dist
+= quantize_band_cost(s
, coefs
+ w2
*128,
2233 sce
->ics
.swb_sizes
[g
],
2234 sce
->sf_idx
[w
*16+g
],
2241 dists
[w
*16+g
] = dist
- bits
;
2243 bits
+= ff_aac_scalefactor_bits
[sce
->sf_idx
[w
*16+g
] - prev
+ SCALE_DIFF_ZERO
];
2246 start
+= sce
->ics
.swb_sizes
[g
];
2247 prev
= sce
->sf_idx
[w
*16+g
];
2251 if (tbits
> destbits
) {
2252 for (i
= 0; i
< 128; i
++)
2253 if (sce
->sf_idx
[i
] < 218 - qstep
)
2254 sce
->sf_idx
[i
] += qstep
;
2256 for (i
= 0; i
< 128; i
++)
2257 if (sce
->sf_idx
[i
] > 60 - qstep
)
2258 sce
->sf_idx
[i
] -= qstep
;
2261 if (!qstep
&& tbits
> destbits
*1.02 && sce
->sf_idx
[0] < 217)
2266 minscaler
= av_clip(minscaler
, 60, 255 - SCALE_MAX_DIFF
);
2267 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
2268 for (g
= 0; g
< sce
->ics
.num_swb
; g
++) {
2269 int prevsc
= sce
->sf_idx
[w
*16+g
];
2270 if (dists
[w
*16+g
] > uplims
[w
*16+g
] && sce
->sf_idx
[w
*16+g
] > 60) {
2271 if (find_min_book(maxvals
[w
*16+g
], sce
->sf_idx
[w
*16+g
]-1))
2272 sce
->sf_idx
[w
*16+g
]--;
2274 sce
->sf_idx
[w
*16+g
]-=2;
2276 sce
->sf_idx
[w
*16+g
] = av_clip(sce
->sf_idx
[w
*16+g
], minscaler
, minscaler
+ SCALE_MAX_DIFF
);
2277 sce
->sf_idx
[w
*16+g
] = FFMIN(sce
->sf_idx
[w
*16+g
], 219);
2278 if (sce
->sf_idx
[w
*16+g
] != prevsc
)
2280 sce
->band_type
[w
*16+g
] = find_min_book(maxvals
[w
*16+g
], sce
->sf_idx
[w
*16+g
]);
2284 } while (fflag
&& its
< 10);
2287 static void search_for_ms_mips(AACEncContext
*s
, ChannelElement
*cpe
,
2290 int start
= 0, i
, w
, w2
, g
;
2291 float M
[128], S
[128];
2292 float *L34
= s
->scoefs
, *R34
= s
->scoefs
+ 128, *M34
= s
->scoefs
+ 128*2, *S34
= s
->scoefs
+ 128*3;
2293 SingleChannelElement
*sce0
= &cpe
->ch
[0];
2294 SingleChannelElement
*sce1
= &cpe
->ch
[1];
2295 if (!cpe
->common_window
)
2297 for (w
= 0; w
< sce0
->ics
.num_windows
; w
+= sce0
->ics
.group_len
[w
]) {
2298 for (g
= 0; g
< sce0
->ics
.num_swb
; g
++) {
2299 if (!cpe
->ch
[0].zeroes
[w
*16+g
] && !cpe
->ch
[1].zeroes
[w
*16+g
]) {
2300 float dist1
= 0.0f
, dist2
= 0.0f
;
2301 for (w2
= 0; w2
< sce0
->ics
.group_len
[w
]; w2
++) {
2302 FFPsyBand
*band0
= &s
->psy
.ch
[s
->cur_channel
+0].psy_bands
[(w
+w2
)*16+g
];
2303 FFPsyBand
*band1
= &s
->psy
.ch
[s
->cur_channel
+1].psy_bands
[(w
+w2
)*16+g
];
2304 float minthr
= FFMIN(band0
->threshold
, band1
->threshold
);
2305 float maxthr
= FFMAX(band0
->threshold
, band1
->threshold
);
2306 for (i
= 0; i
< sce0
->ics
.swb_sizes
[g
]; i
+=4) {
2307 M
[i
] = (sce0
->coeffs
[start
+w2
*128+i
]
2308 + sce1
->coeffs
[start
+w2
*128+i
]) * 0.5;
2309 M
[i
+1] = (sce0
->coeffs
[start
+w2
*128+i
+1]
2310 + sce1
->coeffs
[start
+w2
*128+i
+1]) * 0.5;
2311 M
[i
+2] = (sce0
->coeffs
[start
+w2
*128+i
+2]
2312 + sce1
->coeffs
[start
+w2
*128+i
+2]) * 0.5;
2313 M
[i
+3] = (sce0
->coeffs
[start
+w2
*128+i
+3]
2314 + sce1
->coeffs
[start
+w2
*128+i
+3]) * 0.5;
2317 - sce1
->coeffs
[start
+w2
*128+i
];
2319 - sce1
->coeffs
[start
+w2
*128+i
+1];
2321 - sce1
->coeffs
[start
+w2
*128+i
+2];
2323 - sce1
->coeffs
[start
+w2
*128+i
+3];
2325 abs_pow34_v(L34
, sce0
->coeffs
+start
+w2
*128, sce0
->ics
.swb_sizes
[g
]);
2326 abs_pow34_v(R34
, sce1
->coeffs
+start
+w2
*128, sce0
->ics
.swb_sizes
[g
]);
2327 abs_pow34_v(M34
, M
, sce0
->ics
.swb_sizes
[g
]);
2328 abs_pow34_v(S34
, S
, sce0
->ics
.swb_sizes
[g
]);
2329 dist1
+= quantize_band_cost(s
, sce0
->coeffs
+ start
+ w2
*128,
2331 sce0
->ics
.swb_sizes
[g
],
2332 sce0
->sf_idx
[(w
+w2
)*16+g
],
2333 sce0
->band_type
[(w
+w2
)*16+g
],
2334 lambda
/ band0
->threshold
, INFINITY
, NULL
);
2335 dist1
+= quantize_band_cost(s
, sce1
->coeffs
+ start
+ w2
*128,
2337 sce1
->ics
.swb_sizes
[g
],
2338 sce1
->sf_idx
[(w
+w2
)*16+g
],
2339 sce1
->band_type
[(w
+w2
)*16+g
],
2340 lambda
/ band1
->threshold
, INFINITY
, NULL
);
2341 dist2
+= quantize_band_cost(s
, M
,
2343 sce0
->ics
.swb_sizes
[g
],
2344 sce0
->sf_idx
[(w
+w2
)*16+g
],
2345 sce0
->band_type
[(w
+w2
)*16+g
],
2346 lambda
/ maxthr
, INFINITY
, NULL
);
2347 dist2
+= quantize_band_cost(s
, S
,
2349 sce1
->ics
.swb_sizes
[g
],
2350 sce1
->sf_idx
[(w
+w2
)*16+g
],
2351 sce1
->band_type
[(w
+w2
)*16+g
],
2352 lambda
/ minthr
, INFINITY
, NULL
);
2354 cpe
->ms_mask
[w
*16+g
] = dist2
< dist1
;
2356 start
+= sce0
->ics
.swb_sizes
[g
];
2360 #endif /*HAVE_MIPSFPU */
2362 static void codebook_trellis_rate_mips(AACEncContext
*s
, SingleChannelElement
*sce
,
2363 int win
, int group_len
, const float lambda
)
2365 BandCodingPath path
[120][12];
2366 int w
, swb
, cb
, start
, size
;
2368 const int max_sfb
= sce
->ics
.max_sfb
;
2369 const int run_bits
= sce
->ics
.num_windows
== 1 ? 5 : 3;
2370 const int run_esc
= (1 << run_bits
) - 1;
2371 int idx
, ppos
, count
;
2372 int stackrun
[120], stackcb
[120], stack_len
;
2373 float next_minbits
= INFINITY
;
2376 abs_pow34_v(s
->scoefs
, sce
->coeffs
, 1024);
2378 for (cb
= 0; cb
< 12; cb
++) {
2379 path
[0][cb
].cost
= run_bits
+4;
2380 path
[0][cb
].prev_idx
= -1;
2381 path
[0][cb
].run
= 0;
2383 for (swb
= 0; swb
< max_sfb
; swb
++) {
2384 size
= sce
->ics
.swb_sizes
[swb
];
2385 if (sce
->zeroes
[win
*16 + swb
]) {
2386 float cost_stay_here
= path
[swb
][0].cost
;
2387 float cost_get_here
= next_minbits
+ run_bits
+ 4;
2388 if ( run_value_bits
[sce
->ics
.num_windows
== 8][path
[swb
][0].run
]
2389 != run_value_bits
[sce
->ics
.num_windows
== 8][path
[swb
][0].run
+1])
2390 cost_stay_here
+= run_bits
;
2391 if (cost_get_here
< cost_stay_here
) {
2392 path
[swb
+1][0].prev_idx
= next_mincb
;
2393 path
[swb
+1][0].cost
= cost_get_here
;
2394 path
[swb
+1][0].run
= 1;
2396 path
[swb
+1][0].prev_idx
= 0;
2397 path
[swb
+1][0].cost
= cost_stay_here
;
2398 path
[swb
+1][0].run
= path
[swb
][0].run
+ 1;
2400 next_minbits
= path
[swb
+1][0].cost
;
2402 for (cb
= 1; cb
< 12; cb
++) {
2403 path
[swb
+1][cb
].cost
= 61450;
2404 path
[swb
+1][cb
].prev_idx
= -1;
2405 path
[swb
+1][cb
].run
= 0;
2408 float minbits
= next_minbits
;
2409 int mincb
= next_mincb
;
2410 int startcb
= sce
->band_type
[win
*16+swb
];
2411 next_minbits
= INFINITY
;
2413 for (cb
= 0; cb
< startcb
; cb
++) {
2414 path
[swb
+1][cb
].cost
= 61450;
2415 path
[swb
+1][cb
].prev_idx
= -1;
2416 path
[swb
+1][cb
].run
= 0;
2418 for (cb
= startcb
; cb
< 12; cb
++) {
2419 float cost_stay_here
, cost_get_here
;
2421 for (w
= 0; w
< group_len
; w
++) {
2422 bits
+= quantize_band_cost_bits(s
, sce
->coeffs
+ start
+ w
*128,
2423 s
->scoefs
+ start
+ w
*128, size
,
2424 sce
->sf_idx
[(win
+w
)*16+swb
], cb
,
2427 cost_stay_here
= path
[swb
][cb
].cost
+ bits
;
2428 cost_get_here
= minbits
+ bits
+ run_bits
+ 4;
2429 if ( run_value_bits
[sce
->ics
.num_windows
== 8][path
[swb
][cb
].run
]
2430 != run_value_bits
[sce
->ics
.num_windows
== 8][path
[swb
][cb
].run
+1])
2431 cost_stay_here
+= run_bits
;
2432 if (cost_get_here
< cost_stay_here
) {
2433 path
[swb
+1][cb
].prev_idx
= mincb
;
2434 path
[swb
+1][cb
].cost
= cost_get_here
;
2435 path
[swb
+1][cb
].run
= 1;
2437 path
[swb
+1][cb
].prev_idx
= cb
;
2438 path
[swb
+1][cb
].cost
= cost_stay_here
;
2439 path
[swb
+1][cb
].run
= path
[swb
][cb
].run
+ 1;
2441 if (path
[swb
+1][cb
].cost
< next_minbits
) {
2442 next_minbits
= path
[swb
+1][cb
].cost
;
2447 start
+= sce
->ics
.swb_sizes
[swb
];
2452 for (cb
= 1; cb
< 12; cb
++)
2453 if (path
[max_sfb
][cb
].cost
< path
[max_sfb
][idx
].cost
)
2457 av_assert1(idx
>= 0);
2459 stackrun
[stack_len
] = path
[ppos
][cb
].run
;
2460 stackcb
[stack_len
] = cb
;
2461 idx
= path
[ppos
-path
[ppos
][cb
].run
+1][cb
].prev_idx
;
2462 ppos
-= path
[ppos
][cb
].run
;
2467 for (i
= stack_len
- 1; i
>= 0; i
--) {
2468 put_bits(&s
->pb
, 4, stackcb
[i
]);
2469 count
= stackrun
[i
];
2470 memset(sce
->zeroes
+ win
*16 + start
, !stackcb
[i
], count
);
2471 for (j
= 0; j
< count
; j
++) {
2472 sce
->band_type
[win
*16 + start
] = stackcb
[i
];
2475 while (count
>= run_esc
) {
2476 put_bits(&s
->pb
, run_bits
, run_esc
);
2479 put_bits(&s
->pb
, run_bits
, count
);
2482 #endif /* HAVE_INLINE_ASM */
2484 void ff_aac_coder_init_mips(AACEncContext
*c
) {
2486 AACCoefficientsEncoder
*e
= c
->coder
;
2487 int option
= c
->options
.aac_coder
;
2490 e
->quantize_and_encode_band
= quantize_and_encode_band_mips
;
2491 e
->encode_window_bands_info
= codebook_trellis_rate_mips
;
2493 e
->search_for_quantizers
= search_for_quantizers_twoloop_mips
;
2494 e
->search_for_ms
= search_for_ms_mips
;
2495 #endif /* HAVE_MIPSFPU */
2497 #endif /* HAVE_INLINE_ASM */