3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 float ff_aac_pow34sf_tab
[428];
58 static const uint8_t swb_size_1024_96
[] = {
59 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
60 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
61 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
64 static const uint8_t swb_size_1024_64
[] = {
65 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
66 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
67 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
70 static const uint8_t swb_size_1024_48
[] = {
71 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
72 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
73 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
77 static const uint8_t swb_size_1024_32
[] = {
78 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
79 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
80 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
83 static const uint8_t swb_size_1024_24
[] = {
84 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
85 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
86 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
89 static const uint8_t swb_size_1024_16
[] = {
90 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
91 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
92 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
95 static const uint8_t swb_size_1024_8
[] = {
96 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
97 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
98 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
101 static const uint8_t *swb_size_1024
[] = {
102 swb_size_1024_96
, swb_size_1024_96
, swb_size_1024_64
,
103 swb_size_1024_48
, swb_size_1024_48
, swb_size_1024_32
,
104 swb_size_1024_24
, swb_size_1024_24
, swb_size_1024_16
,
105 swb_size_1024_16
, swb_size_1024_16
, swb_size_1024_8
108 static const uint8_t swb_size_128_96
[] = {
109 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
112 static const uint8_t swb_size_128_48
[] = {
113 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
116 static const uint8_t swb_size_128_24
[] = {
117 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
120 static const uint8_t swb_size_128_16
[] = {
121 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
124 static const uint8_t swb_size_128_8
[] = {
125 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
128 static const uint8_t *swb_size_128
[] = {
129 /* the last entry on the following row is swb_size_128_64 but is a
130 duplicate of swb_size_128_96 */
131 swb_size_128_96
, swb_size_128_96
, swb_size_128_96
,
132 swb_size_128_48
, swb_size_128_48
, swb_size_128_48
,
133 swb_size_128_24
, swb_size_128_24
, swb_size_128_16
,
134 swb_size_128_16
, swb_size_128_16
, swb_size_128_8
137 /** default channel configurations */
138 static const uint8_t aac_chan_configs
[6][5] = {
139 {1, TYPE_SCE
}, // 1 channel - single channel element
140 {1, TYPE_CPE
}, // 2 channels - channel pair
141 {2, TYPE_SCE
, TYPE_CPE
}, // 3 channels - center + stereo
142 {3, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
}, // 4 channels - front center + stereo + back center
143 {3, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
}, // 5 channels - front center + stereo + back stereo
144 {4, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
, TYPE_LFE
}, // 6 channels - front center + stereo + back stereo + LFE
148 * Table to remap channels from libavcodec's default order to AAC order.
150 static const uint8_t aac_chan_maps
[AAC_MAX_CHANNELS
][AAC_MAX_CHANNELS
] = {
156 { 2, 0, 1, 4, 5, 3 },
160 * Make AAC audio config object.
161 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
163 static void put_audio_specific_config(AVCodecContext
*avctx
)
166 AACEncContext
*s
= avctx
->priv_data
;
168 init_put_bits(&pb
, avctx
->extradata
, avctx
->extradata_size
*8);
169 put_bits(&pb
, 5, 2); //object type - AAC-LC
170 put_bits(&pb
, 4, s
->samplerate_index
); //sample rate index
171 put_bits(&pb
, 4, s
->channels
);
173 put_bits(&pb
, 1, 0); //frame length - 1024 samples
174 put_bits(&pb
, 1, 0); //does not depend on core coder
175 put_bits(&pb
, 1, 0); //is not extension
177 //Explicitly Mark SBR absent
178 put_bits(&pb
, 11, 0x2b7); //sync extension
179 put_bits(&pb
, 5, AOT_SBR
);
184 #define WINDOW_FUNC(type) \
185 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
186 SingleChannelElement *sce, \
189 WINDOW_FUNC(only_long
)
191 const float *lwindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
192 const float *pwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
193 float *out
= sce
->ret_buf
;
195 fdsp
->vector_fmul (out
, audio
, lwindow
, 1024);
196 fdsp
->vector_fmul_reverse(out
+ 1024, audio
+ 1024, pwindow
, 1024);
199 WINDOW_FUNC(long_start
)
201 const float *lwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
202 const float *swindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
203 float *out
= sce
->ret_buf
;
205 fdsp
->vector_fmul(out
, audio
, lwindow
, 1024);
206 memcpy(out
+ 1024, audio
+ 1024, sizeof(out
[0]) * 448);
207 fdsp
->vector_fmul_reverse(out
+ 1024 + 448, audio
+ 1024 + 448, swindow
, 128);
208 memset(out
+ 1024 + 576, 0, sizeof(out
[0]) * 448);
211 WINDOW_FUNC(long_stop
)
213 const float *lwindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
214 const float *swindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
215 float *out
= sce
->ret_buf
;
217 memset(out
, 0, sizeof(out
[0]) * 448);
218 fdsp
->vector_fmul(out
+ 448, audio
+ 448, swindow
, 128);
219 memcpy(out
+ 576, audio
+ 576, sizeof(out
[0]) * 448);
220 fdsp
->vector_fmul_reverse(out
+ 1024, audio
+ 1024, lwindow
, 1024);
223 WINDOW_FUNC(eight_short
)
225 const float *swindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
226 const float *pwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
227 const float *in
= audio
+ 448;
228 float *out
= sce
->ret_buf
;
231 for (w
= 0; w
< 8; w
++) {
232 fdsp
->vector_fmul (out
, in
, w
? pwindow
: swindow
, 128);
235 fdsp
->vector_fmul_reverse(out
, in
, swindow
, 128);
240 static void (*const apply_window
[4])(AVFloatDSPContext
*fdsp
,
241 SingleChannelElement
*sce
,
242 const float *audio
) = {
243 [ONLY_LONG_SEQUENCE
] = apply_only_long_window
,
244 [LONG_START_SEQUENCE
] = apply_long_start_window
,
245 [EIGHT_SHORT_SEQUENCE
] = apply_eight_short_window
,
246 [LONG_STOP_SEQUENCE
] = apply_long_stop_window
249 static void apply_window_and_mdct(AACEncContext
*s
, SingleChannelElement
*sce
,
253 float *output
= sce
->ret_buf
;
255 apply_window
[sce
->ics
.window_sequence
[0]](s
->fdsp
, sce
, audio
);
257 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
)
258 s
->mdct1024
.mdct_calc(&s
->mdct1024
, sce
->coeffs
, output
);
260 for (i
= 0; i
< 1024; i
+= 128)
261 s
->mdct128
.mdct_calc(&s
->mdct128
, sce
->coeffs
+ i
, output
+ i
*2);
262 memcpy(audio
, audio
+ 1024, sizeof(audio
[0]) * 1024);
266 * Encode ics_info element.
267 * @see Table 4.6 (syntax of ics_info)
269 static void put_ics_info(AACEncContext
*s
, IndividualChannelStream
*info
)
273 put_bits(&s
->pb
, 1, 0); // ics_reserved bit
274 put_bits(&s
->pb
, 2, info
->window_sequence
[0]);
275 put_bits(&s
->pb
, 1, info
->use_kb_window
[0]);
276 if (info
->window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
277 put_bits(&s
->pb
, 6, info
->max_sfb
);
278 put_bits(&s
->pb
, 1, 0); // no prediction
280 put_bits(&s
->pb
, 4, info
->max_sfb
);
281 for (w
= 1; w
< 8; w
++)
282 put_bits(&s
->pb
, 1, !info
->group_len
[w
]);
288 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
290 static void encode_ms_info(PutBitContext
*pb
, ChannelElement
*cpe
)
294 put_bits(pb
, 2, cpe
->ms_mode
);
295 if (cpe
->ms_mode
== 1)
296 for (w
= 0; w
< cpe
->ch
[0].ics
.num_windows
; w
+= cpe
->ch
[0].ics
.group_len
[w
])
297 for (i
= 0; i
< cpe
->ch
[0].ics
.max_sfb
; i
++)
298 put_bits(pb
, 1, cpe
->ms_mask
[w
*16 + i
]);
302 * Produce integer coefficients from scalefactors provided by the model.
304 static void adjust_frame_information(ChannelElement
*cpe
, int chans
)
307 int start
, maxsfb
, cmaxsfb
;
309 for (ch
= 0; ch
< chans
; ch
++) {
310 IndividualChannelStream
*ics
= &cpe
->ch
[ch
].ics
;
313 cpe
->ch
[ch
].pulse
.num_pulse
= 0;
314 for (w
= 0; w
< ics
->num_windows
*16; w
+= 16) {
315 for (g
= 0; g
< ics
->num_swb
; g
++) {
317 if (cpe
->common_window
&& !ch
&& cpe
->ms_mask
[w
+ g
]) {
318 for (i
= 0; i
< ics
->swb_sizes
[g
]; i
++) {
319 cpe
->ch
[0].coeffs
[start
+i
] = (cpe
->ch
[0].coeffs
[start
+i
] + cpe
->ch
[1].coeffs
[start
+i
]) / 2.0;
320 cpe
->ch
[1].coeffs
[start
+i
] = cpe
->ch
[0].coeffs
[start
+i
] - cpe
->ch
[1].coeffs
[start
+i
];
323 start
+= ics
->swb_sizes
[g
];
325 for (cmaxsfb
= ics
->num_swb
; cmaxsfb
> 0 && cpe
->ch
[ch
].zeroes
[w
+cmaxsfb
-1]; cmaxsfb
--)
327 maxsfb
= FFMAX(maxsfb
, cmaxsfb
);
329 ics
->max_sfb
= maxsfb
;
331 //adjust zero bands for window groups
332 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
]) {
333 for (g
= 0; g
< ics
->max_sfb
; g
++) {
335 for (w2
= w
; w2
< w
+ ics
->group_len
[w
]; w2
++) {
336 if (!cpe
->ch
[ch
].zeroes
[w2
*16 + g
]) {
341 cpe
->ch
[ch
].zeroes
[w
*16 + g
] = i
;
346 if (chans
> 1 && cpe
->common_window
) {
347 IndividualChannelStream
*ics0
= &cpe
->ch
[0].ics
;
348 IndividualChannelStream
*ics1
= &cpe
->ch
[1].ics
;
350 ics0
->max_sfb
= FFMAX(ics0
->max_sfb
, ics1
->max_sfb
);
351 ics1
->max_sfb
= ics0
->max_sfb
;
352 for (w
= 0; w
< ics0
->num_windows
*16; w
+= 16)
353 for (i
= 0; i
< ics0
->max_sfb
; i
++)
354 if (cpe
->ms_mask
[w
+i
])
356 if (msc
== 0 || ics0
->max_sfb
== 0)
359 cpe
->ms_mode
= msc
< ics0
->max_sfb
* ics0
->num_windows
? 1 : 2;
364 * Encode scalefactor band coding type.
366 static void encode_band_info(AACEncContext
*s
, SingleChannelElement
*sce
)
370 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
])
371 s
->coder
->encode_window_bands_info(s
, sce
, w
, sce
->ics
.group_len
[w
], s
->lambda
);
375 * Encode scalefactors.
377 static void encode_scale_factors(AVCodecContext
*avctx
, AACEncContext
*s
,
378 SingleChannelElement
*sce
)
380 int off
= sce
->sf_idx
[0], diff
;
383 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
384 for (i
= 0; i
< sce
->ics
.max_sfb
; i
++) {
385 if (!sce
->zeroes
[w
*16 + i
]) {
386 diff
= sce
->sf_idx
[w
*16 + i
] - off
+ SCALE_DIFF_ZERO
;
387 av_assert0(diff
>= 0 && diff
<= 120);
388 off
= sce
->sf_idx
[w
*16 + i
];
389 put_bits(&s
->pb
, ff_aac_scalefactor_bits
[diff
], ff_aac_scalefactor_code
[diff
]);
398 static void encode_pulses(AACEncContext
*s
, Pulse
*pulse
)
402 put_bits(&s
->pb
, 1, !!pulse
->num_pulse
);
403 if (!pulse
->num_pulse
)
406 put_bits(&s
->pb
, 2, pulse
->num_pulse
- 1);
407 put_bits(&s
->pb
, 6, pulse
->start
);
408 for (i
= 0; i
< pulse
->num_pulse
; i
++) {
409 put_bits(&s
->pb
, 5, pulse
->pos
[i
]);
410 put_bits(&s
->pb
, 4, pulse
->amp
[i
]);
415 * Encode spectral coefficients processed by psychoacoustic model.
417 static void encode_spectral_coeffs(AACEncContext
*s
, SingleChannelElement
*sce
)
421 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
423 for (i
= 0; i
< sce
->ics
.max_sfb
; i
++) {
424 if (sce
->zeroes
[w
*16 + i
]) {
425 start
+= sce
->ics
.swb_sizes
[i
];
428 for (w2
= w
; w2
< w
+ sce
->ics
.group_len
[w
]; w2
++)
429 s
->coder
->quantize_and_encode_band(s
, &s
->pb
, sce
->coeffs
+ start
+ w2
*128,
430 sce
->ics
.swb_sizes
[i
],
431 sce
->sf_idx
[w
*16 + i
],
432 sce
->band_type
[w
*16 + i
],
434 start
+= sce
->ics
.swb_sizes
[i
];
440 * Encode one channel of audio data.
442 static int encode_individual_channel(AVCodecContext
*avctx
, AACEncContext
*s
,
443 SingleChannelElement
*sce
,
446 put_bits(&s
->pb
, 8, sce
->sf_idx
[0]);
448 put_ics_info(s
, &sce
->ics
);
449 encode_band_info(s
, sce
);
450 encode_scale_factors(avctx
, s
, sce
);
451 encode_pulses(s
, &sce
->pulse
);
452 put_bits(&s
->pb
, 1, 0); //tns
453 put_bits(&s
->pb
, 1, 0); //ssr
454 encode_spectral_coeffs(s
, sce
);
459 * Write some auxiliary information about the created AAC file.
461 static void put_bitstream_info(AACEncContext
*s
, const char *name
)
463 int i
, namelen
, padbits
;
465 namelen
= strlen(name
) + 2;
466 put_bits(&s
->pb
, 3, TYPE_FIL
);
467 put_bits(&s
->pb
, 4, FFMIN(namelen
, 15));
469 put_bits(&s
->pb
, 8, namelen
- 14);
470 put_bits(&s
->pb
, 4, 0); //extension type - filler
471 padbits
= -put_bits_count(&s
->pb
) & 7;
472 avpriv_align_put_bits(&s
->pb
);
473 for (i
= 0; i
< namelen
- 2; i
++)
474 put_bits(&s
->pb
, 8, name
[i
]);
475 put_bits(&s
->pb
, 12 - padbits
, 0);
479 * Copy input samples.
480 * Channels are reordered from libavcodec's default order to AAC order.
482 static void copy_input_samples(AACEncContext
*s
, const AVFrame
*frame
)
485 int end
= 2048 + (frame
? frame
->nb_samples
: 0);
486 const uint8_t *channel_map
= aac_chan_maps
[s
->channels
- 1];
488 /* copy and remap input samples */
489 for (ch
= 0; ch
< s
->channels
; ch
++) {
490 /* copy last 1024 samples of previous frame to the start of the current frame */
491 memcpy(&s
->planar_samples
[ch
][1024], &s
->planar_samples
[ch
][2048], 1024 * sizeof(s
->planar_samples
[0][0]));
493 /* copy new samples and zero any remaining samples */
495 memcpy(&s
->planar_samples
[ch
][2048],
496 frame
->extended_data
[channel_map
[ch
]],
497 frame
->nb_samples
* sizeof(s
->planar_samples
[0][0]));
499 memset(&s
->planar_samples
[ch
][end
], 0,
500 (3072 - end
) * sizeof(s
->planar_samples
[0][0]));
504 static int aac_encode_frame(AVCodecContext
*avctx
, AVPacket
*avpkt
,
505 const AVFrame
*frame
, int *got_packet_ptr
)
507 AACEncContext
*s
= avctx
->priv_data
;
508 float **samples
= s
->planar_samples
, *samples2
, *la
, *overlap
;
510 int i
, ch
, w
, g
, chans
, tag
, start_ch
, ret
;
511 int chan_el_counter
[4];
512 FFPsyWindowInfo windows
[AAC_MAX_CHANNELS
];
514 if (s
->last_frame
== 2)
517 /* add current frame to queue */
519 if ((ret
= ff_af_queue_add(&s
->afq
, frame
)) < 0)
523 copy_input_samples(s
, frame
);
525 ff_psy_preprocess(s
->psypp
, s
->planar_samples
, s
->channels
);
527 if (!avctx
->frame_number
)
531 for (i
= 0; i
< s
->chan_map
[0]; i
++) {
532 FFPsyWindowInfo
* wi
= windows
+ start_ch
;
533 tag
= s
->chan_map
[i
+1];
534 chans
= tag
== TYPE_CPE
? 2 : 1;
536 for (ch
= 0; ch
< chans
; ch
++) {
537 IndividualChannelStream
*ics
= &cpe
->ch
[ch
].ics
;
538 int cur_channel
= start_ch
+ ch
;
539 overlap
= &samples
[cur_channel
][0];
540 samples2
= overlap
+ 1024;
541 la
= samples2
+ (448+64);
544 if (tag
== TYPE_LFE
) {
545 wi
[ch
].window_type
[0] = ONLY_LONG_SEQUENCE
;
546 wi
[ch
].window_shape
= 0;
547 wi
[ch
].num_windows
= 1;
548 wi
[ch
].grouping
[0] = 1;
550 /* Only the lowest 12 coefficients are used in a LFE channel.
551 * The expression below results in only the bottom 8 coefficients
552 * being used for 11.025kHz to 16kHz sample rates.
554 ics
->num_swb
= s
->samplerate_index
>= 8 ? 1 : 3;
556 wi
[ch
] = s
->psy
.model
->window(&s
->psy
, samples2
, la
, cur_channel
,
557 ics
->window_sequence
[0]);
559 ics
->window_sequence
[1] = ics
->window_sequence
[0];
560 ics
->window_sequence
[0] = wi
[ch
].window_type
[0];
561 ics
->use_kb_window
[1] = ics
->use_kb_window
[0];
562 ics
->use_kb_window
[0] = wi
[ch
].window_shape
;
563 ics
->num_windows
= wi
[ch
].num_windows
;
564 ics
->swb_sizes
= s
->psy
.bands
[ics
->num_windows
== 8];
565 ics
->num_swb
= tag
== TYPE_LFE
? ics
->num_swb
: s
->psy
.num_bands
[ics
->num_windows
== 8];
566 for (w
= 0; w
< ics
->num_windows
; w
++)
567 ics
->group_len
[w
] = wi
[ch
].grouping
[w
];
569 apply_window_and_mdct(s
, &cpe
->ch
[ch
], overlap
);
570 if (isnan(cpe
->ch
->coeffs
[0])) {
571 av_log(avctx
, AV_LOG_ERROR
, "Input contains NaN\n");
572 return AVERROR(EINVAL
);
577 if ((ret
= ff_alloc_packet2(avctx
, avpkt
, 8192 * s
->channels
)) < 0)
582 init_put_bits(&s
->pb
, avpkt
->data
, avpkt
->size
);
584 if ((avctx
->frame_number
& 0xFF)==1 && !(avctx
->flags
& CODEC_FLAG_BITEXACT
))
585 put_bitstream_info(s
, LIBAVCODEC_IDENT
);
587 memset(chan_el_counter
, 0, sizeof(chan_el_counter
));
588 for (i
= 0; i
< s
->chan_map
[0]; i
++) {
589 FFPsyWindowInfo
* wi
= windows
+ start_ch
;
590 const float *coeffs
[2];
591 tag
= s
->chan_map
[i
+1];
592 chans
= tag
== TYPE_CPE
? 2 : 1;
594 put_bits(&s
->pb
, 3, tag
);
595 put_bits(&s
->pb
, 4, chan_el_counter
[tag
]++);
596 for (ch
= 0; ch
< chans
; ch
++)
597 coeffs
[ch
] = cpe
->ch
[ch
].coeffs
;
598 s
->psy
.model
->analyze(&s
->psy
, start_ch
, coeffs
, wi
);
599 for (ch
= 0; ch
< chans
; ch
++) {
600 s
->cur_channel
= start_ch
+ ch
;
601 s
->coder
->search_for_quantizers(avctx
, s
, &cpe
->ch
[ch
], s
->lambda
);
603 cpe
->common_window
= 0;
605 && wi
[0].window_type
[0] == wi
[1].window_type
[0]
606 && wi
[0].window_shape
== wi
[1].window_shape
) {
608 cpe
->common_window
= 1;
609 for (w
= 0; w
< wi
[0].num_windows
; w
++) {
610 if (wi
[0].grouping
[w
] != wi
[1].grouping
[w
]) {
611 cpe
->common_window
= 0;
616 s
->cur_channel
= start_ch
;
617 if (s
->options
.stereo_mode
&& cpe
->common_window
) {
618 if (s
->options
.stereo_mode
> 0) {
619 IndividualChannelStream
*ics
= &cpe
->ch
[0].ics
;
620 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
])
621 for (g
= 0; g
< ics
->num_swb
; g
++)
622 cpe
->ms_mask
[w
*16+g
] = 1;
623 } else if (s
->coder
->search_for_ms
) {
624 s
->coder
->search_for_ms(s
, cpe
, s
->lambda
);
627 adjust_frame_information(cpe
, chans
);
629 put_bits(&s
->pb
, 1, cpe
->common_window
);
630 if (cpe
->common_window
) {
631 put_ics_info(s
, &cpe
->ch
[0].ics
);
632 encode_ms_info(&s
->pb
, cpe
);
635 for (ch
= 0; ch
< chans
; ch
++) {
636 s
->cur_channel
= start_ch
+ ch
;
637 encode_individual_channel(avctx
, s
, &cpe
->ch
[ch
], cpe
->common_window
);
642 frame_bits
= put_bits_count(&s
->pb
);
643 if (frame_bits
<= 6144 * s
->channels
- 3) {
644 s
->psy
.bitres
.bits
= frame_bits
/ s
->channels
;
648 s
->lambda
*= avctx
->bit_rate
* 1024.0f
/ avctx
->sample_rate
/ frame_bits
;
652 put_bits(&s
->pb
, 3, TYPE_END
);
653 flush_put_bits(&s
->pb
);
654 avctx
->frame_bits
= put_bits_count(&s
->pb
);
656 // rate control stuff
657 if (!(avctx
->flags
& CODEC_FLAG_QSCALE
)) {
658 float ratio
= avctx
->bit_rate
* 1024.0f
/ avctx
->sample_rate
/ avctx
->frame_bits
;
660 s
->lambda
= FFMIN(s
->lambda
, 65536.f
);
666 ff_af_queue_remove(&s
->afq
, avctx
->frame_size
, &avpkt
->pts
,
669 avpkt
->size
= put_bits_count(&s
->pb
) >> 3;
674 static av_cold
int aac_encode_end(AVCodecContext
*avctx
)
676 AACEncContext
*s
= avctx
->priv_data
;
678 ff_mdct_end(&s
->mdct1024
);
679 ff_mdct_end(&s
->mdct128
);
682 ff_psy_preprocess_end(s
->psypp
);
683 av_freep(&s
->buffer
.samples
);
686 ff_af_queue_close(&s
->afq
);
690 static av_cold
int dsp_init(AVCodecContext
*avctx
, AACEncContext
*s
)
694 s
->fdsp
= avpriv_float_dsp_alloc(avctx
->flags
& CODEC_FLAG_BITEXACT
);
696 return AVERROR(ENOMEM
);
699 ff_kbd_window_init(ff_aac_kbd_long_1024
, 4.0, 1024);
700 ff_kbd_window_init(ff_aac_kbd_short_128
, 6.0, 128);
701 ff_init_ff_sine_windows(10);
702 ff_init_ff_sine_windows(7);
704 if (ret
= ff_mdct_init(&s
->mdct1024
, 11, 0, 32768.0))
706 if (ret
= ff_mdct_init(&s
->mdct128
, 8, 0, 32768.0))
712 static av_cold
int alloc_buffers(AVCodecContext
*avctx
, AACEncContext
*s
)
715 FF_ALLOCZ_ARRAY_OR_GOTO(avctx
, s
->buffer
.samples
, s
->channels
, 3 * 1024 * sizeof(s
->buffer
.samples
[0]), alloc_fail
);
716 FF_ALLOCZ_ARRAY_OR_GOTO(avctx
, s
->cpe
, s
->chan_map
[0], sizeof(ChannelElement
), alloc_fail
);
717 FF_ALLOCZ_OR_GOTO(avctx
, avctx
->extradata
, 5 + FF_INPUT_BUFFER_PADDING_SIZE
, alloc_fail
);
719 for(ch
= 0; ch
< s
->channels
; ch
++)
720 s
->planar_samples
[ch
] = s
->buffer
.samples
+ 3 * 1024 * ch
;
724 return AVERROR(ENOMEM
);
727 static av_cold
int aac_encode_init(AVCodecContext
*avctx
)
729 AACEncContext
*s
= avctx
->priv_data
;
731 const uint8_t *sizes
[2];
732 uint8_t grouping
[AAC_MAX_CHANNELS
];
735 avctx
->frame_size
= 1024;
737 for (i
= 0; i
< 16; i
++)
738 if (avctx
->sample_rate
== avpriv_mpeg4audio_sample_rates
[i
])
741 s
->channels
= avctx
->channels
;
744 "Unsupported sample rate %d\n", avctx
->sample_rate
);
745 ERROR_IF(s
->channels
> AAC_MAX_CHANNELS
,
746 "Unsupported number of channels: %d\n", s
->channels
);
747 ERROR_IF(avctx
->profile
!= FF_PROFILE_UNKNOWN
&& avctx
->profile
!= FF_PROFILE_AAC_LOW
,
748 "Unsupported profile %d\n", avctx
->profile
);
749 ERROR_IF(1024.0 * avctx
->bit_rate
/ avctx
->sample_rate
> 6144 * s
->channels
,
750 "Too many bits per frame requested\n");
752 s
->samplerate_index
= i
;
754 s
->chan_map
= aac_chan_configs
[s
->channels
-1];
756 if (ret
= dsp_init(avctx
, s
))
759 if (ret
= alloc_buffers(avctx
, s
))
762 avctx
->extradata_size
= 5;
763 put_audio_specific_config(avctx
);
765 sizes
[0] = swb_size_1024
[i
];
766 sizes
[1] = swb_size_128
[i
];
767 lengths
[0] = ff_aac_num_swb_1024
[i
];
768 lengths
[1] = ff_aac_num_swb_128
[i
];
769 for (i
= 0; i
< s
->chan_map
[0]; i
++)
770 grouping
[i
] = s
->chan_map
[i
+ 1] == TYPE_CPE
;
771 if (ret
= ff_psy_init(&s
->psy
, avctx
, 2, sizes
, lengths
, s
->chan_map
[0], grouping
))
773 s
->psypp
= ff_psy_preprocess_init(avctx
);
774 s
->coder
= &ff_aac_coders
[s
->options
.aac_coder
];
777 ff_aac_coder_init_mips(s
);
779 s
->lambda
= avctx
->global_quality
> 0 ? avctx
->global_quality
: 120;
783 for (i
= 0; i
< 428; i
++)
784 ff_aac_pow34sf_tab
[i
] = sqrt(ff_aac_pow2sf_tab
[i
] * sqrt(ff_aac_pow2sf_tab
[i
]));
786 avctx
->initial_padding
= 1024;
787 ff_af_queue_init(avctx
, &s
->afq
);
791 aac_encode_end(avctx
);
795 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
796 static const AVOption aacenc_options
[] = {
797 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext
, options
.stereo_mode
), AV_OPT_TYPE_INT
, {.i64
= 0}, -1, 1, AACENC_FLAGS
, "stereo_mode"},
798 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST
, {.i64
= -1 }, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "stereo_mode"},
799 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST
, {.i64
= 0 }, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "stereo_mode"},
800 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST
, {.i64
= 1 }, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "stereo_mode"},
801 {"aac_coder", "", offsetof(AACEncContext
, options
.aac_coder
), AV_OPT_TYPE_INT
, {.i64
= AAC_CODER_TWOLOOP
}, 0, AAC_CODER_NB
-1, AACENC_FLAGS
, "aac_coder"},
802 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_FAAC
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "aac_coder"},
803 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_ANMR
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "aac_coder"},
804 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_TWOLOOP
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "aac_coder"},
805 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_FAST
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, "aac_coder"},
809 static const AVClass aacenc_class
= {
811 av_default_item_name
,
813 LIBAVUTIL_VERSION_INT
,
816 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
818 static const int mpeg4audio_sample_rates
[16] = {
819 96000, 88200, 64000, 48000, 44100, 32000,
820 24000, 22050, 16000, 12000, 11025, 8000, 7350
823 AVCodec ff_aac_encoder
= {
825 .long_name
= NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
826 .type
= AVMEDIA_TYPE_AUDIO
,
827 .id
= AV_CODEC_ID_AAC
,
828 .priv_data_size
= sizeof(AACEncContext
),
829 .init
= aac_encode_init
,
830 .encode2
= aac_encode_frame
,
831 .close
= aac_encode_end
,
832 .supported_samplerates
= mpeg4audio_sample_rates
,
833 .capabilities
= CODEC_CAP_SMALL_LAST_FRAME
| CODEC_CAP_DELAY
|
834 CODEC_CAP_EXPERIMENTAL
,
835 .sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_FLTP
,
836 AV_SAMPLE_FMT_NONE
},
837 .priv_class
= &aacenc_class
,