3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
11 * This file is part of FFmpeg.
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 * @author Oded Shimon ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
39 * N (code in SoC repo) gain control
41 * Y window shapes - standard
42 * N window shapes - Low Delay
43 * Y filterbank - standard
44 * N (code in SoC repo) filterbank - Scalable Sample Rate
45 * Y Temporal Noise Shaping
46 * Y Long Term Prediction
49 * Y frequency domain prediction
50 * Y Perceptual Noise Substitution
52 * N Scalable Inverse AAC Quantization
53 * N Frequency Selective Switch
55 * Y quantization & coding - AAC
56 * N quantization & coding - TwinVQ
57 * N quantization & coding - BSAC
58 * N AAC Error Resilience tools
59 * N Error Resilience payload syntax
60 * N Error Protection tool
62 * N Silence Compression
65 * N Structured Audio tools
66 * N Structured Audio Sample Bank Format
68 * N Harmonic and Individual Lines plus Noise
69 * N Text-To-Speech Interface
70 * Y Spectral Band Replication
71 * Y (not in this code) Layer-1
72 * Y (not in this code) Layer-2
73 * Y (not in this code) Layer-3
74 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
76 * N Direct Stream Transfer
77 * Y Enhanced AAC Low Delay (ER AAC ELD)
79 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
80 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
84 #include "libavutil/float_dsp.h"
85 #include "libavutil/opt.h"
90 #include "fmtconvert.h"
97 #include "aacdectab.h"
98 #include "cbrt_tablegen.h"
101 #include "mpeg4audio.h"
102 #include "aacadtsdec.h"
103 #include "libavutil/intfloat.h"
112 # include "arm/aac.h"
114 # include "mips/aacdec_mips.h"
117 static VLC vlc_scalefactors
;
118 static VLC vlc_spectral
[11];
120 static int output_configure(AACContext
*ac
,
121 uint8_t layout_map
[MAX_ELEM_ID
*4][3], int tags
,
122 enum OCStatus oc_type
, int get_new_frame
);
124 #define overread_err "Input buffer exhausted before END element found\n"
126 static int count_channels(uint8_t (*layout
)[3], int tags
)
129 for (i
= 0; i
< tags
; i
++) {
130 int syn_ele
= layout
[i
][0];
131 int pos
= layout
[i
][2];
132 sum
+= (1 + (syn_ele
== TYPE_CPE
)) *
133 (pos
!= AAC_CHANNEL_OFF
&& pos
!= AAC_CHANNEL_CC
);
139 * Check for the channel element in the current channel position configuration.
140 * If it exists, make sure the appropriate element is allocated and map the
141 * channel order to match the internal FFmpeg channel layout.
143 * @param che_pos current channel position configuration
144 * @param type channel element type
145 * @param id channel element id
146 * @param channels count of the number of channels in the configuration
148 * @return Returns error status. 0 - OK, !0 - error
150 static av_cold
int che_configure(AACContext
*ac
,
151 enum ChannelPosition che_pos
,
152 int type
, int id
, int *channels
)
154 if (*channels
>= MAX_CHANNELS
)
155 return AVERROR_INVALIDDATA
;
157 if (!ac
->che
[type
][id
]) {
158 if (!(ac
->che
[type
][id
] = av_mallocz(sizeof(ChannelElement
))))
159 return AVERROR(ENOMEM
);
160 ff_aac_sbr_ctx_init(ac
, &ac
->che
[type
][id
]->sbr
);
162 if (type
!= TYPE_CCE
) {
163 if (*channels
>= MAX_CHANNELS
- (type
== TYPE_CPE
|| (type
== TYPE_SCE
&& ac
->oc
[1].m4ac
.ps
== 1))) {
164 av_log(ac
->avctx
, AV_LOG_ERROR
, "Too many channels\n");
165 return AVERROR_INVALIDDATA
;
167 ac
->output_element
[(*channels
)++] = &ac
->che
[type
][id
]->ch
[0];
168 if (type
== TYPE_CPE
||
169 (type
== TYPE_SCE
&& ac
->oc
[1].m4ac
.ps
== 1)) {
170 ac
->output_element
[(*channels
)++] = &ac
->che
[type
][id
]->ch
[1];
174 if (ac
->che
[type
][id
])
175 ff_aac_sbr_ctx_close(&ac
->che
[type
][id
]->sbr
);
176 av_freep(&ac
->che
[type
][id
]);
181 static int frame_configure_elements(AVCodecContext
*avctx
)
183 AACContext
*ac
= avctx
->priv_data
;
184 int type
, id
, ch
, ret
;
186 /* set channel pointers to internal buffers by default */
187 for (type
= 0; type
< 4; type
++) {
188 for (id
= 0; id
< MAX_ELEM_ID
; id
++) {
189 ChannelElement
*che
= ac
->che
[type
][id
];
191 che
->ch
[0].ret
= che
->ch
[0].ret_buf
;
192 che
->ch
[1].ret
= che
->ch
[1].ret_buf
;
197 /* get output buffer */
198 av_frame_unref(ac
->frame
);
199 if (!avctx
->channels
)
202 ac
->frame
->nb_samples
= 2048;
203 if ((ret
= ff_get_buffer(avctx
, ac
->frame
, 0)) < 0)
206 /* map output channel pointers to AVFrame data */
207 for (ch
= 0; ch
< avctx
->channels
; ch
++) {
208 if (ac
->output_element
[ch
])
209 ac
->output_element
[ch
]->ret
= (float *)ac
->frame
->extended_data
[ch
];
215 struct elem_to_channel
{
216 uint64_t av_position
;
219 uint8_t aac_position
;
222 static int assign_pair(struct elem_to_channel e2c_vec
[MAX_ELEM_ID
],
223 uint8_t (*layout_map
)[3], int offset
, uint64_t left
,
224 uint64_t right
, int pos
)
226 if (layout_map
[offset
][0] == TYPE_CPE
) {
227 e2c_vec
[offset
] = (struct elem_to_channel
) {
228 .av_position
= left
| right
,
230 .elem_id
= layout_map
[offset
][1],
235 e2c_vec
[offset
] = (struct elem_to_channel
) {
238 .elem_id
= layout_map
[offset
][1],
241 e2c_vec
[offset
+ 1] = (struct elem_to_channel
) {
242 .av_position
= right
,
244 .elem_id
= layout_map
[offset
+ 1][1],
251 static int count_paired_channels(uint8_t (*layout_map
)[3], int tags
, int pos
,
254 int num_pos_channels
= 0;
258 for (i
= *current
; i
< tags
; i
++) {
259 if (layout_map
[i
][2] != pos
)
261 if (layout_map
[i
][0] == TYPE_CPE
) {
263 if (pos
== AAC_CHANNEL_FRONT
&& !first_cpe
) {
269 num_pos_channels
+= 2;
277 ((pos
== AAC_CHANNEL_FRONT
&& first_cpe
) || pos
== AAC_CHANNEL_SIDE
))
280 return num_pos_channels
;
283 static uint64_t sniff_channel_order(uint8_t (*layout_map
)[3], int tags
)
285 int i
, n
, total_non_cc_elements
;
286 struct elem_to_channel e2c_vec
[4 * MAX_ELEM_ID
] = { { 0 } };
287 int num_front_channels
, num_side_channels
, num_back_channels
;
290 if (FF_ARRAY_ELEMS(e2c_vec
) < tags
)
295 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_FRONT
, &i
);
296 if (num_front_channels
< 0)
299 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_SIDE
, &i
);
300 if (num_side_channels
< 0)
303 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_BACK
, &i
);
304 if (num_back_channels
< 0)
308 if (num_front_channels
& 1) {
309 e2c_vec
[i
] = (struct elem_to_channel
) {
310 .av_position
= AV_CH_FRONT_CENTER
,
312 .elem_id
= layout_map
[i
][1],
313 .aac_position
= AAC_CHANNEL_FRONT
316 num_front_channels
--;
318 if (num_front_channels
>= 4) {
319 i
+= assign_pair(e2c_vec
, layout_map
, i
,
320 AV_CH_FRONT_LEFT_OF_CENTER
,
321 AV_CH_FRONT_RIGHT_OF_CENTER
,
323 num_front_channels
-= 2;
325 if (num_front_channels
>= 2) {
326 i
+= assign_pair(e2c_vec
, layout_map
, i
,
330 num_front_channels
-= 2;
332 while (num_front_channels
>= 2) {
333 i
+= assign_pair(e2c_vec
, layout_map
, i
,
337 num_front_channels
-= 2;
340 if (num_side_channels
>= 2) {
341 i
+= assign_pair(e2c_vec
, layout_map
, i
,
345 num_side_channels
-= 2;
347 while (num_side_channels
>= 2) {
348 i
+= assign_pair(e2c_vec
, layout_map
, i
,
352 num_side_channels
-= 2;
355 while (num_back_channels
>= 4) {
356 i
+= assign_pair(e2c_vec
, layout_map
, i
,
360 num_back_channels
-= 2;
362 if (num_back_channels
>= 2) {
363 i
+= assign_pair(e2c_vec
, layout_map
, i
,
367 num_back_channels
-= 2;
369 if (num_back_channels
) {
370 e2c_vec
[i
] = (struct elem_to_channel
) {
371 .av_position
= AV_CH_BACK_CENTER
,
373 .elem_id
= layout_map
[i
][1],
374 .aac_position
= AAC_CHANNEL_BACK
380 if (i
< tags
&& layout_map
[i
][2] == AAC_CHANNEL_LFE
) {
381 e2c_vec
[i
] = (struct elem_to_channel
) {
382 .av_position
= AV_CH_LOW_FREQUENCY
,
384 .elem_id
= layout_map
[i
][1],
385 .aac_position
= AAC_CHANNEL_LFE
389 while (i
< tags
&& layout_map
[i
][2] == AAC_CHANNEL_LFE
) {
390 e2c_vec
[i
] = (struct elem_to_channel
) {
391 .av_position
= UINT64_MAX
,
393 .elem_id
= layout_map
[i
][1],
394 .aac_position
= AAC_CHANNEL_LFE
399 // Must choose a stable sort
400 total_non_cc_elements
= n
= i
;
403 for (i
= 1; i
< n
; i
++)
404 if (e2c_vec
[i
- 1].av_position
> e2c_vec
[i
].av_position
) {
405 FFSWAP(struct elem_to_channel
, e2c_vec
[i
- 1], e2c_vec
[i
]);
412 for (i
= 0; i
< total_non_cc_elements
; i
++) {
413 layout_map
[i
][0] = e2c_vec
[i
].syn_ele
;
414 layout_map
[i
][1] = e2c_vec
[i
].elem_id
;
415 layout_map
[i
][2] = e2c_vec
[i
].aac_position
;
416 if (e2c_vec
[i
].av_position
!= UINT64_MAX
) {
417 layout
|= e2c_vec
[i
].av_position
;
425 * Save current output configuration if and only if it has been locked.
427 static void push_output_configuration(AACContext
*ac
) {
428 if (ac
->oc
[1].status
== OC_LOCKED
) {
429 ac
->oc
[0] = ac
->oc
[1];
431 ac
->oc
[1].status
= OC_NONE
;
435 * Restore the previous output configuration if and only if the current
436 * configuration is unlocked.
438 static void pop_output_configuration(AACContext
*ac
) {
439 if (ac
->oc
[1].status
!= OC_LOCKED
&& ac
->oc
[0].status
!= OC_NONE
) {
440 ac
->oc
[1] = ac
->oc
[0];
441 ac
->avctx
->channels
= ac
->oc
[1].channels
;
442 ac
->avctx
->channel_layout
= ac
->oc
[1].channel_layout
;
443 output_configure(ac
, ac
->oc
[1].layout_map
, ac
->oc
[1].layout_map_tags
,
444 ac
->oc
[1].status
, 0);
449 * Configure output channel order based on the current program
450 * configuration element.
452 * @return Returns error status. 0 - OK, !0 - error
454 static int output_configure(AACContext
*ac
,
455 uint8_t layout_map
[MAX_ELEM_ID
* 4][3], int tags
,
456 enum OCStatus oc_type
, int get_new_frame
)
458 AVCodecContext
*avctx
= ac
->avctx
;
459 int i
, channels
= 0, ret
;
462 if (ac
->oc
[1].layout_map
!= layout_map
) {
463 memcpy(ac
->oc
[1].layout_map
, layout_map
, tags
* sizeof(layout_map
[0]));
464 ac
->oc
[1].layout_map_tags
= tags
;
467 // Try to sniff a reasonable channel order, otherwise output the
468 // channels in the order the PCE declared them.
469 if (avctx
->request_channel_layout
!= AV_CH_LAYOUT_NATIVE
)
470 layout
= sniff_channel_order(layout_map
, tags
);
471 for (i
= 0; i
< tags
; i
++) {
472 int type
= layout_map
[i
][0];
473 int id
= layout_map
[i
][1];
474 int position
= layout_map
[i
][2];
475 // Allocate or free elements depending on if they are in the
476 // current program configuration.
477 ret
= che_configure(ac
, position
, type
, id
, &channels
);
481 if (ac
->oc
[1].m4ac
.ps
== 1 && channels
== 2) {
482 if (layout
== AV_CH_FRONT_CENTER
) {
483 layout
= AV_CH_FRONT_LEFT
|AV_CH_FRONT_RIGHT
;
489 memcpy(ac
->tag_che_map
, ac
->che
, 4 * MAX_ELEM_ID
* sizeof(ac
->che
[0][0]));
490 if (layout
) avctx
->channel_layout
= layout
;
491 ac
->oc
[1].channel_layout
= layout
;
492 avctx
->channels
= ac
->oc
[1].channels
= channels
;
493 ac
->oc
[1].status
= oc_type
;
496 if ((ret
= frame_configure_elements(ac
->avctx
)) < 0)
503 static void flush(AVCodecContext
*avctx
)
505 AACContext
*ac
= avctx
->priv_data
;
508 for (type
= 3; type
>= 0; type
--) {
509 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
510 ChannelElement
*che
= ac
->che
[type
][i
];
512 for (j
= 0; j
<= 1; j
++) {
513 memset(che
->ch
[j
].saved
, 0, sizeof(che
->ch
[j
].saved
));
521 * Set up channel positions based on a default channel configuration
522 * as specified in table 1.17.
524 * @return Returns error status. 0 - OK, !0 - error
526 static int set_default_channel_config(AVCodecContext
*avctx
,
527 uint8_t (*layout_map
)[3],
531 if (channel_config
< 1 || channel_config
> 7) {
532 av_log(avctx
, AV_LOG_ERROR
,
533 "invalid default channel configuration (%d)\n",
535 return AVERROR_INVALIDDATA
;
537 *tags
= tags_per_config
[channel_config
];
538 memcpy(layout_map
, aac_channel_layout_map
[channel_config
- 1],
539 *tags
* sizeof(*layout_map
));
542 * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
543 * However, at least Nero AAC encoder encodes 7.1 streams using the default
544 * channel config 7, mapping the side channels of the original audio stream
545 * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
546 * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
547 * the incorrect streams as if they were correct (and as the encoder intended).
549 * As actual intended 7.1(wide) streams are very rare, default to assuming a
550 * 7.1 layout was intended.
552 if (channel_config
== 7 && avctx
->strict_std_compliance
< FF_COMPLIANCE_STRICT
) {
553 av_log(avctx
, AV_LOG_INFO
, "Assuming an incorrectly encoded 7.1 channel layout"
554 " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
555 " according to the specification instead.\n", FF_COMPLIANCE_STRICT
);
556 layout_map
[2][2] = AAC_CHANNEL_SIDE
;
562 static ChannelElement
*get_che(AACContext
*ac
, int type
, int elem_id
)
564 /* For PCE based channel configurations map the channels solely based
566 if (!ac
->oc
[1].m4ac
.chan_config
) {
567 return ac
->tag_che_map
[type
][elem_id
];
569 // Allow single CPE stereo files to be signalled with mono configuration.
570 if (!ac
->tags_mapped
&& type
== TYPE_CPE
&&
571 ac
->oc
[1].m4ac
.chan_config
== 1) {
572 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
574 push_output_configuration(ac
);
576 av_log(ac
->avctx
, AV_LOG_DEBUG
, "mono with CPE\n");
578 if (set_default_channel_config(ac
->avctx
, layout_map
,
579 &layout_map_tags
, 2) < 0)
581 if (output_configure(ac
, layout_map
, layout_map_tags
,
582 OC_TRIAL_FRAME
, 1) < 0)
585 ac
->oc
[1].m4ac
.chan_config
= 2;
586 ac
->oc
[1].m4ac
.ps
= 0;
589 if (!ac
->tags_mapped
&& type
== TYPE_SCE
&&
590 ac
->oc
[1].m4ac
.chan_config
== 2) {
591 uint8_t layout_map
[MAX_ELEM_ID
* 4][3];
593 push_output_configuration(ac
);
595 av_log(ac
->avctx
, AV_LOG_DEBUG
, "stereo with SCE\n");
597 if (set_default_channel_config(ac
->avctx
, layout_map
,
598 &layout_map_tags
, 1) < 0)
600 if (output_configure(ac
, layout_map
, layout_map_tags
,
601 OC_TRIAL_FRAME
, 1) < 0)
604 ac
->oc
[1].m4ac
.chan_config
= 1;
605 if (ac
->oc
[1].m4ac
.sbr
)
606 ac
->oc
[1].m4ac
.ps
= -1;
608 /* For indexed channel configurations map the channels solely based
610 switch (ac
->oc
[1].m4ac
.chan_config
) {
612 if (ac
->tags_mapped
== 3 && type
== TYPE_CPE
) {
614 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][2];
617 /* Some streams incorrectly code 5.1 audio as
618 * SCE[0] CPE[0] CPE[1] SCE[1]
620 * SCE[0] CPE[0] CPE[1] LFE[0].
621 * If we seem to have encountered such a stream, transfer
622 * the LFE[0] element to the SCE[1]'s mapping */
623 if (ac
->tags_mapped
== tags_per_config
[ac
->oc
[1].m4ac
.chan_config
] - 1 && (type
== TYPE_LFE
|| type
== TYPE_SCE
)) {
624 if (!ac
->warned_remapping_once
&& (type
!= TYPE_LFE
|| elem_id
!= 0)) {
625 av_log(ac
->avctx
, AV_LOG_WARNING
,
626 "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n",
627 type
== TYPE_SCE
? "SCE" : "LFE", elem_id
);
628 ac
->warned_remapping_once
++;
631 return ac
->tag_che_map
[type
][elem_id
] = ac
->che
[TYPE_LFE
][0];
634 if (ac
->tags_mapped
== 2 && type
== TYPE_CPE
) {
636 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][1];
639 /* Some streams incorrectly code 4.0 audio as
640 * SCE[0] CPE[0] LFE[0]
642 * SCE[0] CPE[0] SCE[1].
643 * If we seem to have encountered such a stream, transfer
644 * the SCE[1] element to the LFE[0]'s mapping */
645 if (ac
->tags_mapped
== tags_per_config
[ac
->oc
[1].m4ac
.chan_config
] - 1 && (type
== TYPE_LFE
|| type
== TYPE_SCE
)) {
646 if (!ac
->warned_remapping_once
&& (type
!= TYPE_SCE
|| elem_id
!= 1)) {
647 av_log(ac
->avctx
, AV_LOG_WARNING
,
648 "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n",
649 type
== TYPE_SCE
? "SCE" : "LFE", elem_id
);
650 ac
->warned_remapping_once
++;
653 return ac
->tag_che_map
[type
][elem_id
] = ac
->che
[TYPE_SCE
][1];
655 if (ac
->tags_mapped
== 2 &&
656 ac
->oc
[1].m4ac
.chan_config
== 4 &&
659 return ac
->tag_che_map
[TYPE_SCE
][elem_id
] = ac
->che
[TYPE_SCE
][1];
663 if (ac
->tags_mapped
== (ac
->oc
[1].m4ac
.chan_config
!= 2) &&
666 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][0];
667 } else if (ac
->oc
[1].m4ac
.chan_config
== 2) {
671 if (!ac
->tags_mapped
&& type
== TYPE_SCE
) {
673 return ac
->tag_che_map
[TYPE_SCE
][elem_id
] = ac
->che
[TYPE_SCE
][0];
681 * Decode an array of 4 bit element IDs, optionally interleaved with a
682 * stereo/mono switching bit.
684 * @param type speaker type/position for these channels
686 static void decode_channel_map(uint8_t layout_map
[][3],
687 enum ChannelPosition type
,
688 GetBitContext
*gb
, int n
)
691 enum RawDataBlockType syn_ele
;
693 case AAC_CHANNEL_FRONT
:
694 case AAC_CHANNEL_BACK
:
695 case AAC_CHANNEL_SIDE
:
696 syn_ele
= get_bits1(gb
);
702 case AAC_CHANNEL_LFE
:
706 // AAC_CHANNEL_OFF has no channel map
709 layout_map
[0][0] = syn_ele
;
710 layout_map
[0][1] = get_bits(gb
, 4);
711 layout_map
[0][2] = type
;
717 * Decode program configuration element; reference: table 4.2.
719 * @return Returns error status. 0 - OK, !0 - error
721 static int decode_pce(AVCodecContext
*avctx
, MPEG4AudioConfig
*m4ac
,
722 uint8_t (*layout_map
)[3],
725 int num_front
, num_side
, num_back
, num_lfe
, num_assoc_data
, num_cc
;
730 skip_bits(gb
, 2); // object_type
732 sampling_index
= get_bits(gb
, 4);
733 if (m4ac
->sampling_index
!= sampling_index
)
734 av_log(avctx
, AV_LOG_WARNING
,
735 "Sample rate index in program config element does not "
736 "match the sample rate index configured by the container.\n");
738 num_front
= get_bits(gb
, 4);
739 num_side
= get_bits(gb
, 4);
740 num_back
= get_bits(gb
, 4);
741 num_lfe
= get_bits(gb
, 2);
742 num_assoc_data
= get_bits(gb
, 3);
743 num_cc
= get_bits(gb
, 4);
746 skip_bits(gb
, 4); // mono_mixdown_tag
748 skip_bits(gb
, 4); // stereo_mixdown_tag
751 skip_bits(gb
, 3); // mixdown_coeff_index and pseudo_surround
753 if (get_bits_left(gb
) < 4 * (num_front
+ num_side
+ num_back
+ num_lfe
+ num_assoc_data
+ num_cc
)) {
754 av_log(avctx
, AV_LOG_ERROR
, "decode_pce: " overread_err
);
757 decode_channel_map(layout_map
, AAC_CHANNEL_FRONT
, gb
, num_front
);
759 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_SIDE
, gb
, num_side
);
761 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_BACK
, gb
, num_back
);
763 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_LFE
, gb
, num_lfe
);
766 skip_bits_long(gb
, 4 * num_assoc_data
);
768 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_CC
, gb
, num_cc
);
773 /* comment field, first byte is length */
774 comment_len
= get_bits(gb
, 8) * 8;
775 if (get_bits_left(gb
) < comment_len
) {
776 av_log(avctx
, AV_LOG_ERROR
, "decode_pce: " overread_err
);
777 return AVERROR_INVALIDDATA
;
779 skip_bits_long(gb
, comment_len
);
784 * Decode GA "General Audio" specific configuration; reference: table 4.1.
786 * @param ac pointer to AACContext, may be null
787 * @param avctx pointer to AVCCodecContext, used for logging
789 * @return Returns error status. 0 - OK, !0 - error
791 static int decode_ga_specific_config(AACContext
*ac
, AVCodecContext
*avctx
,
793 MPEG4AudioConfig
*m4ac
,
796 int extension_flag
, ret
, ep_config
, res_flags
;
797 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
800 if (get_bits1(gb
)) { // frameLengthFlag
801 avpriv_request_sample(avctx
, "960/120 MDCT window");
802 return AVERROR_PATCHWELCOME
;
805 if (get_bits1(gb
)) // dependsOnCoreCoder
806 skip_bits(gb
, 14); // coreCoderDelay
807 extension_flag
= get_bits1(gb
);
809 if (m4ac
->object_type
== AOT_AAC_SCALABLE
||
810 m4ac
->object_type
== AOT_ER_AAC_SCALABLE
)
811 skip_bits(gb
, 3); // layerNr
813 if (channel_config
== 0) {
814 skip_bits(gb
, 4); // element_instance_tag
815 tags
= decode_pce(avctx
, m4ac
, layout_map
, gb
);
819 if ((ret
= set_default_channel_config(avctx
, layout_map
,
820 &tags
, channel_config
)))
824 if (count_channels(layout_map
, tags
) > 1) {
826 } else if (m4ac
->sbr
== 1 && m4ac
->ps
== -1)
829 if (ac
&& (ret
= output_configure(ac
, layout_map
, tags
, OC_GLOBAL_HDR
, 0)))
832 if (extension_flag
) {
833 switch (m4ac
->object_type
) {
835 skip_bits(gb
, 5); // numOfSubFrame
836 skip_bits(gb
, 11); // layer_length
840 case AOT_ER_AAC_SCALABLE
:
842 res_flags
= get_bits(gb
, 3);
844 avpriv_report_missing_feature(avctx
,
845 "AAC data resilience (flags %x)",
847 return AVERROR_PATCHWELCOME
;
851 skip_bits1(gb
); // extensionFlag3 (TBD in version 3)
853 switch (m4ac
->object_type
) {
856 case AOT_ER_AAC_SCALABLE
:
858 ep_config
= get_bits(gb
, 2);
860 avpriv_report_missing_feature(avctx
,
861 "epConfig %d", ep_config
);
862 return AVERROR_PATCHWELCOME
;
868 static int decode_eld_specific_config(AACContext
*ac
, AVCodecContext
*avctx
,
870 MPEG4AudioConfig
*m4ac
,
873 int ret
, ep_config
, res_flags
;
874 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
876 const int ELDEXT_TERM
= 0;
881 if (get_bits1(gb
)) { // frameLengthFlag
882 avpriv_request_sample(avctx
, "960/120 MDCT window");
883 return AVERROR_PATCHWELCOME
;
886 res_flags
= get_bits(gb
, 3);
888 avpriv_report_missing_feature(avctx
,
889 "AAC data resilience (flags %x)",
891 return AVERROR_PATCHWELCOME
;
894 if (get_bits1(gb
)) { // ldSbrPresentFlag
895 avpriv_report_missing_feature(avctx
,
897 return AVERROR_PATCHWELCOME
;
900 while (get_bits(gb
, 4) != ELDEXT_TERM
) {
901 int len
= get_bits(gb
, 4);
903 len
+= get_bits(gb
, 8);
905 len
+= get_bits(gb
, 16);
906 if (get_bits_left(gb
) < len
* 8 + 4) {
907 av_log(ac
->avctx
, AV_LOG_ERROR
, overread_err
);
908 return AVERROR_INVALIDDATA
;
910 skip_bits_long(gb
, 8 * len
);
913 if ((ret
= set_default_channel_config(avctx
, layout_map
,
914 &tags
, channel_config
)))
917 if (ac
&& (ret
= output_configure(ac
, layout_map
, tags
, OC_GLOBAL_HDR
, 0)))
920 ep_config
= get_bits(gb
, 2);
922 avpriv_report_missing_feature(avctx
,
923 "epConfig %d", ep_config
);
924 return AVERROR_PATCHWELCOME
;
930 * Decode audio specific configuration; reference: table 1.13.
932 * @param ac pointer to AACContext, may be null
933 * @param avctx pointer to AVCCodecContext, used for logging
934 * @param m4ac pointer to MPEG4AudioConfig, used for parsing
935 * @param data pointer to buffer holding an audio specific config
936 * @param bit_size size of audio specific config or data in bits
937 * @param sync_extension look for an appended sync extension
939 * @return Returns error status or number of consumed bits. <0 - error
941 static int decode_audio_specific_config(AACContext
*ac
,
942 AVCodecContext
*avctx
,
943 MPEG4AudioConfig
*m4ac
,
944 const uint8_t *data
, int bit_size
,
950 av_dlog(avctx
, "audio specific config size %d\n", bit_size
>> 3);
951 for (i
= 0; i
< bit_size
>> 3; i
++)
952 av_dlog(avctx
, "%02x ", data
[i
]);
953 av_dlog(avctx
, "\n");
955 if ((ret
= init_get_bits(&gb
, data
, bit_size
)) < 0)
958 if ((i
= avpriv_mpeg4audio_get_config(m4ac
, data
, bit_size
,
959 sync_extension
)) < 0)
960 return AVERROR_INVALIDDATA
;
961 if (m4ac
->sampling_index
> 12) {
962 av_log(avctx
, AV_LOG_ERROR
,
963 "invalid sampling rate index %d\n",
964 m4ac
->sampling_index
);
965 return AVERROR_INVALIDDATA
;
967 if (m4ac
->object_type
== AOT_ER_AAC_LD
&&
968 (m4ac
->sampling_index
< 3 || m4ac
->sampling_index
> 7)) {
969 av_log(avctx
, AV_LOG_ERROR
,
970 "invalid low delay sampling rate index %d\n",
971 m4ac
->sampling_index
);
972 return AVERROR_INVALIDDATA
;
975 skip_bits_long(&gb
, i
);
977 switch (m4ac
->object_type
) {
983 if ((ret
= decode_ga_specific_config(ac
, avctx
, &gb
,
984 m4ac
, m4ac
->chan_config
)) < 0)
988 if ((ret
= decode_eld_specific_config(ac
, avctx
, &gb
,
989 m4ac
, m4ac
->chan_config
)) < 0)
993 avpriv_report_missing_feature(avctx
,
994 "Audio object type %s%d",
995 m4ac
->sbr
== 1 ? "SBR+" : "",
997 return AVERROR(ENOSYS
);
1001 "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
1002 m4ac
->object_type
, m4ac
->chan_config
, m4ac
->sampling_index
,
1003 m4ac
->sample_rate
, m4ac
->sbr
,
1006 return get_bits_count(&gb
);
1010 * linear congruential pseudorandom number generator
1012 * @param previous_val pointer to the current state of the generator
1014 * @return Returns a 32-bit pseudorandom integer
1016 static av_always_inline
int lcg_random(unsigned previous_val
)
1018 union { unsigned u
; int s
; } v
= { previous_val
* 1664525u + 1013904223 };
1022 static av_always_inline
void reset_predict_state(PredictorState
*ps
)
1032 static void reset_all_predictors(PredictorState
*ps
)
1035 for (i
= 0; i
< MAX_PREDICTORS
; i
++)
1036 reset_predict_state(&ps
[i
]);
1039 static int sample_rate_idx (int rate
)
1041 if (92017 <= rate
) return 0;
1042 else if (75132 <= rate
) return 1;
1043 else if (55426 <= rate
) return 2;
1044 else if (46009 <= rate
) return 3;
1045 else if (37566 <= rate
) return 4;
1046 else if (27713 <= rate
) return 5;
1047 else if (23004 <= rate
) return 6;
1048 else if (18783 <= rate
) return 7;
1049 else if (13856 <= rate
) return 8;
1050 else if (11502 <= rate
) return 9;
1051 else if (9391 <= rate
) return 10;
1055 static void reset_predictor_group(PredictorState
*ps
, int group_num
)
1058 for (i
= group_num
- 1; i
< MAX_PREDICTORS
; i
+= 30)
1059 reset_predict_state(&ps
[i
]);
1062 #define AAC_INIT_VLC_STATIC(num, size) \
1063 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
1064 ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]), \
1065 sizeof(ff_aac_spectral_bits[num][0]), \
1066 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
1067 sizeof(ff_aac_spectral_codes[num][0]), \
1070 static void aacdec_init(AACContext
*ac
);
1072 static av_cold
int aac_decode_init(AVCodecContext
*avctx
)
1074 AACContext
*ac
= avctx
->priv_data
;
1078 ac
->oc
[1].m4ac
.sample_rate
= avctx
->sample_rate
;
1082 avctx
->sample_fmt
= AV_SAMPLE_FMT_FLTP
;
1084 if (avctx
->extradata_size
> 0) {
1085 if ((ret
= decode_audio_specific_config(ac
, ac
->avctx
, &ac
->oc
[1].m4ac
,
1087 avctx
->extradata_size
* 8,
1092 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
1093 int layout_map_tags
;
1095 sr
= sample_rate_idx(avctx
->sample_rate
);
1096 ac
->oc
[1].m4ac
.sampling_index
= sr
;
1097 ac
->oc
[1].m4ac
.channels
= avctx
->channels
;
1098 ac
->oc
[1].m4ac
.sbr
= -1;
1099 ac
->oc
[1].m4ac
.ps
= -1;
1101 for (i
= 0; i
< FF_ARRAY_ELEMS(ff_mpeg4audio_channels
); i
++)
1102 if (ff_mpeg4audio_channels
[i
] == avctx
->channels
)
1104 if (i
== FF_ARRAY_ELEMS(ff_mpeg4audio_channels
)) {
1107 ac
->oc
[1].m4ac
.chan_config
= i
;
1109 if (ac
->oc
[1].m4ac
.chan_config
) {
1110 int ret
= set_default_channel_config(avctx
, layout_map
,
1111 &layout_map_tags
, ac
->oc
[1].m4ac
.chan_config
);
1113 output_configure(ac
, layout_map
, layout_map_tags
,
1115 else if (avctx
->err_recognition
& AV_EF_EXPLODE
)
1116 return AVERROR_INVALIDDATA
;
1120 if (avctx
->channels
> MAX_CHANNELS
) {
1121 av_log(avctx
, AV_LOG_ERROR
, "Too many channels\n");
1122 return AVERROR_INVALIDDATA
;
1125 AAC_INIT_VLC_STATIC( 0, 304);
1126 AAC_INIT_VLC_STATIC( 1, 270);
1127 AAC_INIT_VLC_STATIC( 2, 550);
1128 AAC_INIT_VLC_STATIC( 3, 300);
1129 AAC_INIT_VLC_STATIC( 4, 328);
1130 AAC_INIT_VLC_STATIC( 5, 294);
1131 AAC_INIT_VLC_STATIC( 6, 306);
1132 AAC_INIT_VLC_STATIC( 7, 268);
1133 AAC_INIT_VLC_STATIC( 8, 510);
1134 AAC_INIT_VLC_STATIC( 9, 366);
1135 AAC_INIT_VLC_STATIC(10, 462);
1139 ff_fmt_convert_init(&ac
->fmt_conv
, avctx
);
1140 ac
->fdsp
= avpriv_float_dsp_alloc(avctx
->flags
& CODEC_FLAG_BITEXACT
);
1142 return AVERROR(ENOMEM
);
1145 ac
->random_state
= 0x1f2e3d4c;
1149 INIT_VLC_STATIC(&vlc_scalefactors
, 7,
1150 FF_ARRAY_ELEMS(ff_aac_scalefactor_code
),
1151 ff_aac_scalefactor_bits
,
1152 sizeof(ff_aac_scalefactor_bits
[0]),
1153 sizeof(ff_aac_scalefactor_bits
[0]),
1154 ff_aac_scalefactor_code
,
1155 sizeof(ff_aac_scalefactor_code
[0]),
1156 sizeof(ff_aac_scalefactor_code
[0]),
1159 ff_mdct_init(&ac
->mdct
, 11, 1, 1.0 / (32768.0 * 1024.0));
1160 ff_mdct_init(&ac
->mdct_ld
, 10, 1, 1.0 / (32768.0 * 512.0));
1161 ff_mdct_init(&ac
->mdct_small
, 8, 1, 1.0 / (32768.0 * 128.0));
1162 ff_mdct_init(&ac
->mdct_ltp
, 11, 0, -2.0 * 32768.0);
1163 // window initialization
1164 ff_kbd_window_init(ff_aac_kbd_long_1024
, 4.0, 1024);
1165 ff_kbd_window_init(ff_aac_kbd_short_128
, 6.0, 128);
1166 ff_init_ff_sine_windows(10);
1167 ff_init_ff_sine_windows( 9);
1168 ff_init_ff_sine_windows( 7);
1176 * Skip data_stream_element; reference: table 4.10.
1178 static int skip_data_stream_element(AACContext
*ac
, GetBitContext
*gb
)
1180 int byte_align
= get_bits1(gb
);
1181 int count
= get_bits(gb
, 8);
1183 count
+= get_bits(gb
, 8);
1187 if (get_bits_left(gb
) < 8 * count
) {
1188 av_log(ac
->avctx
, AV_LOG_ERROR
, "skip_data_stream_element: "overread_err
);
1189 return AVERROR_INVALIDDATA
;
1191 skip_bits_long(gb
, 8 * count
);
1195 static int decode_prediction(AACContext
*ac
, IndividualChannelStream
*ics
,
1199 if (get_bits1(gb
)) {
1200 ics
->predictor_reset_group
= get_bits(gb
, 5);
1201 if (ics
->predictor_reset_group
== 0 ||
1202 ics
->predictor_reset_group
> 30) {
1203 av_log(ac
->avctx
, AV_LOG_ERROR
,
1204 "Invalid Predictor Reset Group.\n");
1205 return AVERROR_INVALIDDATA
;
1208 for (sfb
= 0; sfb
< FFMIN(ics
->max_sfb
, ff_aac_pred_sfb_max
[ac
->oc
[1].m4ac
.sampling_index
]); sfb
++) {
1209 ics
->prediction_used
[sfb
] = get_bits1(gb
);
1215 * Decode Long Term Prediction data; reference: table 4.xx.
1217 static void decode_ltp(LongTermPrediction
*ltp
,
1218 GetBitContext
*gb
, uint8_t max_sfb
)
1222 ltp
->lag
= get_bits(gb
, 11);
1223 ltp
->coef
= ltp_coef
[get_bits(gb
, 3)];
1224 for (sfb
= 0; sfb
< FFMIN(max_sfb
, MAX_LTP_LONG_SFB
); sfb
++)
1225 ltp
->used
[sfb
] = get_bits1(gb
);
1229 * Decode Individual Channel Stream info; reference: table 4.6.
1231 static int decode_ics_info(AACContext
*ac
, IndividualChannelStream
*ics
,
1234 int aot
= ac
->oc
[1].m4ac
.object_type
;
1235 if (aot
!= AOT_ER_AAC_ELD
) {
1236 if (get_bits1(gb
)) {
1237 av_log(ac
->avctx
, AV_LOG_ERROR
, "Reserved bit set.\n");
1238 return AVERROR_INVALIDDATA
;
1240 ics
->window_sequence
[1] = ics
->window_sequence
[0];
1241 ics
->window_sequence
[0] = get_bits(gb
, 2);
1242 if (aot
== AOT_ER_AAC_LD
&&
1243 ics
->window_sequence
[0] != ONLY_LONG_SEQUENCE
) {
1244 av_log(ac
->avctx
, AV_LOG_ERROR
,
1245 "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
1246 "window sequence %d found.\n", ics
->window_sequence
[0]);
1247 ics
->window_sequence
[0] = ONLY_LONG_SEQUENCE
;
1248 return AVERROR_INVALIDDATA
;
1250 ics
->use_kb_window
[1] = ics
->use_kb_window
[0];
1251 ics
->use_kb_window
[0] = get_bits1(gb
);
1253 ics
->num_window_groups
= 1;
1254 ics
->group_len
[0] = 1;
1255 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
1257 ics
->max_sfb
= get_bits(gb
, 4);
1258 for (i
= 0; i
< 7; i
++) {
1259 if (get_bits1(gb
)) {
1260 ics
->group_len
[ics
->num_window_groups
- 1]++;
1262 ics
->num_window_groups
++;
1263 ics
->group_len
[ics
->num_window_groups
- 1] = 1;
1266 ics
->num_windows
= 8;
1267 ics
->swb_offset
= ff_swb_offset_128
[ac
->oc
[1].m4ac
.sampling_index
];
1268 ics
->num_swb
= ff_aac_num_swb_128
[ac
->oc
[1].m4ac
.sampling_index
];
1269 ics
->tns_max_bands
= ff_tns_max_bands_128
[ac
->oc
[1].m4ac
.sampling_index
];
1270 ics
->predictor_present
= 0;
1272 ics
->max_sfb
= get_bits(gb
, 6);
1273 ics
->num_windows
= 1;
1274 if (aot
== AOT_ER_AAC_LD
|| aot
== AOT_ER_AAC_ELD
) {
1275 ics
->swb_offset
= ff_swb_offset_512
[ac
->oc
[1].m4ac
.sampling_index
];
1276 ics
->num_swb
= ff_aac_num_swb_512
[ac
->oc
[1].m4ac
.sampling_index
];
1277 ics
->tns_max_bands
= ff_tns_max_bands_512
[ac
->oc
[1].m4ac
.sampling_index
];
1278 if (!ics
->num_swb
|| !ics
->swb_offset
)
1281 ics
->swb_offset
= ff_swb_offset_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1282 ics
->num_swb
= ff_aac_num_swb_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1283 ics
->tns_max_bands
= ff_tns_max_bands_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1285 if (aot
!= AOT_ER_AAC_ELD
) {
1286 ics
->predictor_present
= get_bits1(gb
);
1287 ics
->predictor_reset_group
= 0;
1289 if (ics
->predictor_present
) {
1290 if (aot
== AOT_AAC_MAIN
) {
1291 if (decode_prediction(ac
, ics
, gb
)) {
1294 } else if (aot
== AOT_AAC_LC
||
1295 aot
== AOT_ER_AAC_LC
) {
1296 av_log(ac
->avctx
, AV_LOG_ERROR
,
1297 "Prediction is not allowed in AAC-LC.\n");
1300 if (aot
== AOT_ER_AAC_LD
) {
1301 av_log(ac
->avctx
, AV_LOG_ERROR
,
1302 "LTP in ER AAC LD not yet implemented.\n");
1303 return AVERROR_PATCHWELCOME
;
1305 if ((ics
->ltp
.present
= get_bits(gb
, 1)))
1306 decode_ltp(&ics
->ltp
, gb
, ics
->max_sfb
);
1311 if (ics
->max_sfb
> ics
->num_swb
) {
1312 av_log(ac
->avctx
, AV_LOG_ERROR
,
1313 "Number of scalefactor bands in group (%d) "
1314 "exceeds limit (%d).\n",
1315 ics
->max_sfb
, ics
->num_swb
);
1322 return AVERROR_INVALIDDATA
;
1326 * Decode band types (section_data payload); reference: table 4.46.
1328 * @param band_type array of the used band type
1329 * @param band_type_run_end array of the last scalefactor band of a band type run
1331 * @return Returns error status. 0 - OK, !0 - error
1333 static int decode_band_types(AACContext
*ac
, enum BandType band_type
[120],
1334 int band_type_run_end
[120], GetBitContext
*gb
,
1335 IndividualChannelStream
*ics
)
1338 const int bits
= (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) ? 3 : 5;
1339 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1341 while (k
< ics
->max_sfb
) {
1342 uint8_t sect_end
= k
;
1344 int sect_band_type
= get_bits(gb
, 4);
1345 if (sect_band_type
== 12) {
1346 av_log(ac
->avctx
, AV_LOG_ERROR
, "invalid band type\n");
1347 return AVERROR_INVALIDDATA
;
1350 sect_len_incr
= get_bits(gb
, bits
);
1351 sect_end
+= sect_len_incr
;
1352 if (get_bits_left(gb
) < 0) {
1353 av_log(ac
->avctx
, AV_LOG_ERROR
, "decode_band_types: "overread_err
);
1354 return AVERROR_INVALIDDATA
;
1356 if (sect_end
> ics
->max_sfb
) {
1357 av_log(ac
->avctx
, AV_LOG_ERROR
,
1358 "Number of bands (%d) exceeds limit (%d).\n",
1359 sect_end
, ics
->max_sfb
);
1360 return AVERROR_INVALIDDATA
;
1362 } while (sect_len_incr
== (1 << bits
) - 1);
1363 for (; k
< sect_end
; k
++) {
1364 band_type
[idx
] = sect_band_type
;
1365 band_type_run_end
[idx
++] = sect_end
;
1373 * Decode scalefactors; reference: table 4.47.
1375 * @param global_gain first scalefactor value as scalefactors are differentially coded
1376 * @param band_type array of the used band type
1377 * @param band_type_run_end array of the last scalefactor band of a band type run
1378 * @param sf array of scalefactors or intensity stereo positions
1380 * @return Returns error status. 0 - OK, !0 - error
1382 static int decode_scalefactors(AACContext
*ac
, float sf
[120], GetBitContext
*gb
,
1383 unsigned int global_gain
,
1384 IndividualChannelStream
*ics
,
1385 enum BandType band_type
[120],
1386 int band_type_run_end
[120])
1389 int offset
[3] = { global_gain
, global_gain
- 90, 0 };
1392 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1393 for (i
= 0; i
< ics
->max_sfb
;) {
1394 int run_end
= band_type_run_end
[idx
];
1395 if (band_type
[idx
] == ZERO_BT
) {
1396 for (; i
< run_end
; i
++, idx
++)
1398 } else if ((band_type
[idx
] == INTENSITY_BT
) ||
1399 (band_type
[idx
] == INTENSITY_BT2
)) {
1400 for (; i
< run_end
; i
++, idx
++) {
1401 offset
[2] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1402 clipped_offset
= av_clip(offset
[2], -155, 100);
1403 if (offset
[2] != clipped_offset
) {
1404 avpriv_request_sample(ac
->avctx
,
1405 "If you heard an audible artifact, there may be a bug in the decoder. "
1406 "Clipped intensity stereo position (%d -> %d)",
1407 offset
[2], clipped_offset
);
1409 sf
[idx
] = ff_aac_pow2sf_tab
[-clipped_offset
+ POW_SF2_ZERO
];
1411 } else if (band_type
[idx
] == NOISE_BT
) {
1412 for (; i
< run_end
; i
++, idx
++) {
1413 if (noise_flag
-- > 0)
1414 offset
[1] += get_bits(gb
, 9) - 256;
1416 offset
[1] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1417 clipped_offset
= av_clip(offset
[1], -100, 155);
1418 if (offset
[1] != clipped_offset
) {
1419 avpriv_request_sample(ac
->avctx
,
1420 "If you heard an audible artifact, there may be a bug in the decoder. "
1421 "Clipped noise gain (%d -> %d)",
1422 offset
[1], clipped_offset
);
1424 sf
[idx
] = -ff_aac_pow2sf_tab
[clipped_offset
+ POW_SF2_ZERO
];
1427 for (; i
< run_end
; i
++, idx
++) {
1428 offset
[0] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1429 if (offset
[0] > 255U) {
1430 av_log(ac
->avctx
, AV_LOG_ERROR
,
1431 "Scalefactor (%d) out of range.\n", offset
[0]);
1432 return AVERROR_INVALIDDATA
;
1434 sf
[idx
] = -ff_aac_pow2sf_tab
[offset
[0] - 100 + POW_SF2_ZERO
];
1443 * Decode pulse data; reference: table 4.7.
1445 static int decode_pulses(Pulse
*pulse
, GetBitContext
*gb
,
1446 const uint16_t *swb_offset
, int num_swb
)
1449 pulse
->num_pulse
= get_bits(gb
, 2) + 1;
1450 pulse_swb
= get_bits(gb
, 6);
1451 if (pulse_swb
>= num_swb
)
1453 pulse
->pos
[0] = swb_offset
[pulse_swb
];
1454 pulse
->pos
[0] += get_bits(gb
, 5);
1455 if (pulse
->pos
[0] >= swb_offset
[num_swb
])
1457 pulse
->amp
[0] = get_bits(gb
, 4);
1458 for (i
= 1; i
< pulse
->num_pulse
; i
++) {
1459 pulse
->pos
[i
] = get_bits(gb
, 5) + pulse
->pos
[i
- 1];
1460 if (pulse
->pos
[i
] >= swb_offset
[num_swb
])
1462 pulse
->amp
[i
] = get_bits(gb
, 4);
1468 * Decode Temporal Noise Shaping data; reference: table 4.48.
1470 * @return Returns error status. 0 - OK, !0 - error
1472 static int decode_tns(AACContext
*ac
, TemporalNoiseShaping
*tns
,
1473 GetBitContext
*gb
, const IndividualChannelStream
*ics
)
1475 int w
, filt
, i
, coef_len
, coef_res
, coef_compress
;
1476 const int is8
= ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
;
1477 const int tns_max_order
= is8
? 7 : ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
? 20 : 12;
1478 for (w
= 0; w
< ics
->num_windows
; w
++) {
1479 if ((tns
->n_filt
[w
] = get_bits(gb
, 2 - is8
))) {
1480 coef_res
= get_bits1(gb
);
1482 for (filt
= 0; filt
< tns
->n_filt
[w
]; filt
++) {
1484 tns
->length
[w
][filt
] = get_bits(gb
, 6 - 2 * is8
);
1486 if ((tns
->order
[w
][filt
] = get_bits(gb
, 5 - 2 * is8
)) > tns_max_order
) {
1487 av_log(ac
->avctx
, AV_LOG_ERROR
,
1488 "TNS filter order %d is greater than maximum %d.\n",
1489 tns
->order
[w
][filt
], tns_max_order
);
1490 tns
->order
[w
][filt
] = 0;
1491 return AVERROR_INVALIDDATA
;
1493 if (tns
->order
[w
][filt
]) {
1494 tns
->direction
[w
][filt
] = get_bits1(gb
);
1495 coef_compress
= get_bits1(gb
);
1496 coef_len
= coef_res
+ 3 - coef_compress
;
1497 tmp2_idx
= 2 * coef_compress
+ coef_res
;
1499 for (i
= 0; i
< tns
->order
[w
][filt
]; i
++)
1500 tns
->coef
[w
][filt
][i
] = tns_tmp2_map
[tmp2_idx
][get_bits(gb
, coef_len
)];
1509 * Decode Mid/Side data; reference: table 4.54.
1511 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1512 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1513 * [3] reserved for scalable AAC
1515 static void decode_mid_side_stereo(ChannelElement
*cpe
, GetBitContext
*gb
,
1519 int max_idx
= cpe
->ch
[0].ics
.num_window_groups
* cpe
->ch
[0].ics
.max_sfb
;
1520 if (ms_present
== 1) {
1521 for (idx
= 0; idx
< max_idx
; idx
++)
1522 cpe
->ms_mask
[idx
] = get_bits1(gb
);
1523 } else if (ms_present
== 2) {
1524 memset(cpe
->ms_mask
, 1, max_idx
* sizeof(cpe
->ms_mask
[0]));
1529 static inline float *VMUL2(float *dst
, const float *v
, unsigned idx
,
1533 *dst
++ = v
[idx
& 15] * s
;
1534 *dst
++ = v
[idx
>>4 & 15] * s
;
1540 static inline float *VMUL4(float *dst
, const float *v
, unsigned idx
,
1544 *dst
++ = v
[idx
& 3] * s
;
1545 *dst
++ = v
[idx
>>2 & 3] * s
;
1546 *dst
++ = v
[idx
>>4 & 3] * s
;
1547 *dst
++ = v
[idx
>>6 & 3] * s
;
1553 static inline float *VMUL2S(float *dst
, const float *v
, unsigned idx
,
1554 unsigned sign
, const float *scale
)
1556 union av_intfloat32 s0
, s1
;
1558 s0
.f
= s1
.f
= *scale
;
1559 s0
.i
^= sign
>> 1 << 31;
1562 *dst
++ = v
[idx
& 15] * s0
.f
;
1563 *dst
++ = v
[idx
>>4 & 15] * s1
.f
;
1570 static inline float *VMUL4S(float *dst
, const float *v
, unsigned idx
,
1571 unsigned sign
, const float *scale
)
1573 unsigned nz
= idx
>> 12;
1574 union av_intfloat32 s
= { .f
= *scale
};
1575 union av_intfloat32 t
;
1577 t
.i
= s
.i
^ (sign
& 1U<<31);
1578 *dst
++ = v
[idx
& 3] * t
.f
;
1580 sign
<<= nz
& 1; nz
>>= 1;
1581 t
.i
= s
.i
^ (sign
& 1U<<31);
1582 *dst
++ = v
[idx
>>2 & 3] * t
.f
;
1584 sign
<<= nz
& 1; nz
>>= 1;
1585 t
.i
= s
.i
^ (sign
& 1U<<31);
1586 *dst
++ = v
[idx
>>4 & 3] * t
.f
;
1589 t
.i
= s
.i
^ (sign
& 1U<<31);
1590 *dst
++ = v
[idx
>>6 & 3] * t
.f
;
1597 * Decode spectral data; reference: table 4.50.
1598 * Dequantize and scale spectral data; reference: 4.6.3.3.
1600 * @param coef array of dequantized, scaled spectral data
1601 * @param sf array of scalefactors or intensity stereo positions
1602 * @param pulse_present set if pulses are present
1603 * @param pulse pointer to pulse data struct
1604 * @param band_type array of the used band type
1606 * @return Returns error status. 0 - OK, !0 - error
1608 static int decode_spectrum_and_dequant(AACContext
*ac
, float coef
[1024],
1609 GetBitContext
*gb
, const float sf
[120],
1610 int pulse_present
, const Pulse
*pulse
,
1611 const IndividualChannelStream
*ics
,
1612 enum BandType band_type
[120])
1614 int i
, k
, g
, idx
= 0;
1615 const int c
= 1024 / ics
->num_windows
;
1616 const uint16_t *offsets
= ics
->swb_offset
;
1617 float *coef_base
= coef
;
1619 for (g
= 0; g
< ics
->num_windows
; g
++)
1620 memset(coef
+ g
* 128 + offsets
[ics
->max_sfb
], 0,
1621 sizeof(float) * (c
- offsets
[ics
->max_sfb
]));
1623 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1624 unsigned g_len
= ics
->group_len
[g
];
1626 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
1627 const unsigned cbt_m1
= band_type
[idx
] - 1;
1628 float *cfo
= coef
+ offsets
[i
];
1629 int off_len
= offsets
[i
+ 1] - offsets
[i
];
1632 if (cbt_m1
>= INTENSITY_BT2
- 1) {
1633 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1634 memset(cfo
, 0, off_len
* sizeof(float));
1636 } else if (cbt_m1
== NOISE_BT
- 1) {
1637 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1641 for (k
= 0; k
< off_len
; k
++) {
1642 ac
->random_state
= lcg_random(ac
->random_state
);
1643 cfo
[k
] = ac
->random_state
;
1646 band_energy
= ac
->fdsp
->scalarproduct_float(cfo
, cfo
, off_len
);
1647 scale
= sf
[idx
] / sqrtf(band_energy
);
1648 ac
->fdsp
->vector_fmul_scalar(cfo
, cfo
, scale
, off_len
);
1651 const float *vq
= ff_aac_codebook_vector_vals
[cbt_m1
];
1652 const uint16_t *cb_vector_idx
= ff_aac_codebook_vector_idx
[cbt_m1
];
1653 VLC_TYPE (*vlc_tab
)[2] = vlc_spectral
[cbt_m1
].table
;
1654 OPEN_READER(re
, gb
);
1656 switch (cbt_m1
>> 1) {
1658 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1666 UPDATE_CACHE(re
, gb
);
1667 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1668 cb_idx
= cb_vector_idx
[code
];
1669 cf
= VMUL4(cf
, vq
, cb_idx
, sf
+ idx
);
1675 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1685 UPDATE_CACHE(re
, gb
);
1686 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1687 cb_idx
= cb_vector_idx
[code
];
1688 nnz
= cb_idx
>> 8 & 15;
1689 bits
= nnz
? GET_CACHE(re
, gb
) : 0;
1690 LAST_SKIP_BITS(re
, gb
, nnz
);
1691 cf
= VMUL4S(cf
, vq
, cb_idx
, bits
, sf
+ idx
);
1697 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1705 UPDATE_CACHE(re
, gb
);
1706 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1707 cb_idx
= cb_vector_idx
[code
];
1708 cf
= VMUL2(cf
, vq
, cb_idx
, sf
+ idx
);
1715 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1725 UPDATE_CACHE(re
, gb
);
1726 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1727 cb_idx
= cb_vector_idx
[code
];
1728 nnz
= cb_idx
>> 8 & 15;
1729 sign
= nnz
? SHOW_UBITS(re
, gb
, nnz
) << (cb_idx
>> 12) : 0;
1730 LAST_SKIP_BITS(re
, gb
, nnz
);
1731 cf
= VMUL2S(cf
, vq
, cb_idx
, sign
, sf
+ idx
);
1737 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1739 uint32_t *icf
= (uint32_t *) cf
;
1749 UPDATE_CACHE(re
, gb
);
1750 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1758 cb_idx
= cb_vector_idx
[code
];
1761 bits
= SHOW_UBITS(re
, gb
, nnz
) << (32-nnz
);
1762 LAST_SKIP_BITS(re
, gb
, nnz
);
1764 for (j
= 0; j
< 2; j
++) {
1768 /* The total length of escape_sequence must be < 22 bits according
1769 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1770 UPDATE_CACHE(re
, gb
);
1771 b
= GET_CACHE(re
, gb
);
1772 b
= 31 - av_log2(~b
);
1775 av_log(ac
->avctx
, AV_LOG_ERROR
, "error in spectral data, ESC overflow\n");
1776 return AVERROR_INVALIDDATA
;
1779 SKIP_BITS(re
, gb
, b
+ 1);
1781 n
= (1 << b
) + SHOW_UBITS(re
, gb
, b
);
1782 LAST_SKIP_BITS(re
, gb
, b
);
1783 *icf
++ = cbrt_tab
[n
] | (bits
& 1U<<31);
1786 unsigned v
= ((const uint32_t*)vq
)[cb_idx
& 15];
1787 *icf
++ = (bits
& 1U<<31) | v
;
1794 ac
->fdsp
->vector_fmul_scalar(cfo
, cfo
, sf
[idx
], off_len
);
1798 CLOSE_READER(re
, gb
);
1804 if (pulse_present
) {
1806 for (i
= 0; i
< pulse
->num_pulse
; i
++) {
1807 float co
= coef_base
[ pulse
->pos
[i
] ];
1808 while (offsets
[idx
+ 1] <= pulse
->pos
[i
])
1810 if (band_type
[idx
] != NOISE_BT
&& sf
[idx
]) {
1811 float ico
= -pulse
->amp
[i
];
1814 ico
= co
/ sqrtf(sqrtf(fabsf(co
))) + (co
> 0 ? -ico
: ico
);
1816 coef_base
[ pulse
->pos
[i
] ] = cbrtf(fabsf(ico
)) * ico
* sf
[idx
];
1823 static av_always_inline
float flt16_round(float pf
)
1825 union av_intfloat32 tmp
;
1827 tmp
.i
= (tmp
.i
+ 0x00008000U
) & 0xFFFF0000U
;
1831 static av_always_inline
float flt16_even(float pf
)
1833 union av_intfloat32 tmp
;
1835 tmp
.i
= (tmp
.i
+ 0x00007FFFU
+ (tmp
.i
& 0x00010000U
>> 16)) & 0xFFFF0000U
;
1839 static av_always_inline
float flt16_trunc(float pf
)
1841 union av_intfloat32 pun
;
1843 pun
.i
&= 0xFFFF0000U
;
1847 static av_always_inline
void predict(PredictorState
*ps
, float *coef
,
1850 const float a
= 0.953125; // 61.0 / 64
1851 const float alpha
= 0.90625; // 29.0 / 32
1855 float r0
= ps
->r0
, r1
= ps
->r1
;
1856 float cor0
= ps
->cor0
, cor1
= ps
->cor1
;
1857 float var0
= ps
->var0
, var1
= ps
->var1
;
1859 k1
= var0
> 1 ? cor0
* flt16_even(a
/ var0
) : 0;
1860 k2
= var1
> 1 ? cor1
* flt16_even(a
/ var1
) : 0;
1862 pv
= flt16_round(k1
* r0
+ k2
* r1
);
1869 ps
->cor1
= flt16_trunc(alpha
* cor1
+ r1
* e1
);
1870 ps
->var1
= flt16_trunc(alpha
* var1
+ 0.5f
* (r1
* r1
+ e1
* e1
));
1871 ps
->cor0
= flt16_trunc(alpha
* cor0
+ r0
* e0
);
1872 ps
->var0
= flt16_trunc(alpha
* var0
+ 0.5f
* (r0
* r0
+ e0
* e0
));
1874 ps
->r1
= flt16_trunc(a
* (r0
- k1
* e0
));
1875 ps
->r0
= flt16_trunc(a
* e0
);
1879 * Apply AAC-Main style frequency domain prediction.
1881 static void apply_prediction(AACContext
*ac
, SingleChannelElement
*sce
)
1885 if (!sce
->ics
.predictor_initialized
) {
1886 reset_all_predictors(sce
->predictor_state
);
1887 sce
->ics
.predictor_initialized
= 1;
1890 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
1892 sfb
< ff_aac_pred_sfb_max
[ac
->oc
[1].m4ac
.sampling_index
];
1894 for (k
= sce
->ics
.swb_offset
[sfb
];
1895 k
< sce
->ics
.swb_offset
[sfb
+ 1];
1897 predict(&sce
->predictor_state
[k
], &sce
->coeffs
[k
],
1898 sce
->ics
.predictor_present
&&
1899 sce
->ics
.prediction_used
[sfb
]);
1902 if (sce
->ics
.predictor_reset_group
)
1903 reset_predictor_group(sce
->predictor_state
,
1904 sce
->ics
.predictor_reset_group
);
1906 reset_all_predictors(sce
->predictor_state
);
1910 * Decode an individual_channel_stream payload; reference: table 4.44.
1912 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1913 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1915 * @return Returns error status. 0 - OK, !0 - error
1917 static int decode_ics(AACContext
*ac
, SingleChannelElement
*sce
,
1918 GetBitContext
*gb
, int common_window
, int scale_flag
)
1921 TemporalNoiseShaping
*tns
= &sce
->tns
;
1922 IndividualChannelStream
*ics
= &sce
->ics
;
1923 float *out
= sce
->coeffs
;
1924 int global_gain
, eld_syntax
, er_syntax
, pulse_present
= 0;
1927 eld_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
1928 er_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LC
||
1929 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LTP
||
1930 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LD
||
1931 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
1933 /* This assignment is to silence a GCC warning about the variable being used
1934 * uninitialized when in fact it always is.
1936 pulse
.num_pulse
= 0;
1938 global_gain
= get_bits(gb
, 8);
1940 if (!common_window
&& !scale_flag
) {
1941 if (decode_ics_info(ac
, ics
, gb
) < 0)
1942 return AVERROR_INVALIDDATA
;
1945 if ((ret
= decode_band_types(ac
, sce
->band_type
,
1946 sce
->band_type_run_end
, gb
, ics
)) < 0)
1948 if ((ret
= decode_scalefactors(ac
, sce
->sf
, gb
, global_gain
, ics
,
1949 sce
->band_type
, sce
->band_type_run_end
)) < 0)
1954 if (!eld_syntax
&& (pulse_present
= get_bits1(gb
))) {
1955 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
1956 av_log(ac
->avctx
, AV_LOG_ERROR
,
1957 "Pulse tool not allowed in eight short sequence.\n");
1958 return AVERROR_INVALIDDATA
;
1960 if (decode_pulses(&pulse
, gb
, ics
->swb_offset
, ics
->num_swb
)) {
1961 av_log(ac
->avctx
, AV_LOG_ERROR
,
1962 "Pulse data corrupt or invalid.\n");
1963 return AVERROR_INVALIDDATA
;
1966 tns
->present
= get_bits1(gb
);
1967 if (tns
->present
&& !er_syntax
)
1968 if (decode_tns(ac
, tns
, gb
, ics
) < 0)
1969 return AVERROR_INVALIDDATA
;
1970 if (!eld_syntax
&& get_bits1(gb
)) {
1971 avpriv_request_sample(ac
->avctx
, "SSR");
1972 return AVERROR_PATCHWELCOME
;
1974 // I see no textual basis in the spec for this occurring after SSR gain
1975 // control, but this is what both reference and real implmentations do
1976 if (tns
->present
&& er_syntax
)
1977 if (decode_tns(ac
, tns
, gb
, ics
) < 0)
1978 return AVERROR_INVALIDDATA
;
1981 if (decode_spectrum_and_dequant(ac
, out
, gb
, sce
->sf
, pulse_present
,
1982 &pulse
, ics
, sce
->band_type
) < 0)
1983 return AVERROR_INVALIDDATA
;
1985 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
&& !common_window
)
1986 apply_prediction(ac
, sce
);
1992 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1994 static void apply_mid_side_stereo(AACContext
*ac
, ChannelElement
*cpe
)
1996 const IndividualChannelStream
*ics
= &cpe
->ch
[0].ics
;
1997 float *ch0
= cpe
->ch
[0].coeffs
;
1998 float *ch1
= cpe
->ch
[1].coeffs
;
1999 int g
, i
, group
, idx
= 0;
2000 const uint16_t *offsets
= ics
->swb_offset
;
2001 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
2002 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
2003 if (cpe
->ms_mask
[idx
] &&
2004 cpe
->ch
[0].band_type
[idx
] < NOISE_BT
&&
2005 cpe
->ch
[1].band_type
[idx
] < NOISE_BT
) {
2006 for (group
= 0; group
< ics
->group_len
[g
]; group
++) {
2007 ac
->fdsp
->butterflies_float(ch0
+ group
* 128 + offsets
[i
],
2008 ch1
+ group
* 128 + offsets
[i
],
2009 offsets
[i
+1] - offsets
[i
]);
2013 ch0
+= ics
->group_len
[g
] * 128;
2014 ch1
+= ics
->group_len
[g
] * 128;
2019 * intensity stereo decoding; reference: 4.6.8.2.3
2021 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
2022 * [1] mask is decoded from bitstream; [2] mask is all 1s;
2023 * [3] reserved for scalable AAC
2025 static void apply_intensity_stereo(AACContext
*ac
,
2026 ChannelElement
*cpe
, int ms_present
)
2028 const IndividualChannelStream
*ics
= &cpe
->ch
[1].ics
;
2029 SingleChannelElement
*sce1
= &cpe
->ch
[1];
2030 float *coef0
= cpe
->ch
[0].coeffs
, *coef1
= cpe
->ch
[1].coeffs
;
2031 const uint16_t *offsets
= ics
->swb_offset
;
2032 int g
, group
, i
, idx
= 0;
2035 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
2036 for (i
= 0; i
< ics
->max_sfb
;) {
2037 if (sce1
->band_type
[idx
] == INTENSITY_BT
||
2038 sce1
->band_type
[idx
] == INTENSITY_BT2
) {
2039 const int bt_run_end
= sce1
->band_type_run_end
[idx
];
2040 for (; i
< bt_run_end
; i
++, idx
++) {
2041 c
= -1 + 2 * (sce1
->band_type
[idx
] - 14);
2043 c
*= 1 - 2 * cpe
->ms_mask
[idx
];
2044 scale
= c
* sce1
->sf
[idx
];
2045 for (group
= 0; group
< ics
->group_len
[g
]; group
++)
2046 ac
->fdsp
->vector_fmul_scalar(coef1
+ group
* 128 + offsets
[i
],
2047 coef0
+ group
* 128 + offsets
[i
],
2049 offsets
[i
+ 1] - offsets
[i
]);
2052 int bt_run_end
= sce1
->band_type_run_end
[idx
];
2053 idx
+= bt_run_end
- i
;
2057 coef0
+= ics
->group_len
[g
] * 128;
2058 coef1
+= ics
->group_len
[g
] * 128;
2063 * Decode a channel_pair_element; reference: table 4.4.
2065 * @return Returns error status. 0 - OK, !0 - error
2067 static int decode_cpe(AACContext
*ac
, GetBitContext
*gb
, ChannelElement
*cpe
)
2069 int i
, ret
, common_window
, ms_present
= 0;
2070 int eld_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
2072 common_window
= eld_syntax
|| get_bits1(gb
);
2073 if (common_window
) {
2074 if (decode_ics_info(ac
, &cpe
->ch
[0].ics
, gb
))
2075 return AVERROR_INVALIDDATA
;
2076 i
= cpe
->ch
[1].ics
.use_kb_window
[0];
2077 cpe
->ch
[1].ics
= cpe
->ch
[0].ics
;
2078 cpe
->ch
[1].ics
.use_kb_window
[1] = i
;
2079 if (cpe
->ch
[1].ics
.predictor_present
&&
2080 (ac
->oc
[1].m4ac
.object_type
!= AOT_AAC_MAIN
))
2081 if ((cpe
->ch
[1].ics
.ltp
.present
= get_bits(gb
, 1)))
2082 decode_ltp(&cpe
->ch
[1].ics
.ltp
, gb
, cpe
->ch
[1].ics
.max_sfb
);
2083 ms_present
= get_bits(gb
, 2);
2084 if (ms_present
== 3) {
2085 av_log(ac
->avctx
, AV_LOG_ERROR
, "ms_present = 3 is reserved.\n");
2086 return AVERROR_INVALIDDATA
;
2087 } else if (ms_present
)
2088 decode_mid_side_stereo(cpe
, gb
, ms_present
);
2090 if ((ret
= decode_ics(ac
, &cpe
->ch
[0], gb
, common_window
, 0)))
2092 if ((ret
= decode_ics(ac
, &cpe
->ch
[1], gb
, common_window
, 0)))
2095 if (common_window
) {
2097 apply_mid_side_stereo(ac
, cpe
);
2098 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
) {
2099 apply_prediction(ac
, &cpe
->ch
[0]);
2100 apply_prediction(ac
, &cpe
->ch
[1]);
2104 apply_intensity_stereo(ac
, cpe
, ms_present
);
2108 static const float cce_scale
[] = {
2109 1.09050773266525765921, //2^(1/8)
2110 1.18920711500272106672, //2^(1/4)
2116 * Decode coupling_channel_element; reference: table 4.8.
2118 * @return Returns error status. 0 - OK, !0 - error
2120 static int decode_cce(AACContext
*ac
, GetBitContext
*gb
, ChannelElement
*che
)
2126 SingleChannelElement
*sce
= &che
->ch
[0];
2127 ChannelCoupling
*coup
= &che
->coup
;
2129 coup
->coupling_point
= 2 * get_bits1(gb
);
2130 coup
->num_coupled
= get_bits(gb
, 3);
2131 for (c
= 0; c
<= coup
->num_coupled
; c
++) {
2133 coup
->type
[c
] = get_bits1(gb
) ? TYPE_CPE
: TYPE_SCE
;
2134 coup
->id_select
[c
] = get_bits(gb
, 4);
2135 if (coup
->type
[c
] == TYPE_CPE
) {
2136 coup
->ch_select
[c
] = get_bits(gb
, 2);
2137 if (coup
->ch_select
[c
] == 3)
2140 coup
->ch_select
[c
] = 2;
2142 coup
->coupling_point
+= get_bits1(gb
) || (coup
->coupling_point
>> 1);
2144 sign
= get_bits(gb
, 1);
2145 scale
= cce_scale
[get_bits(gb
, 2)];
2147 if ((ret
= decode_ics(ac
, sce
, gb
, 0, 0)))
2150 for (c
= 0; c
< num_gain
; c
++) {
2154 float gain_cache
= 1.0;
2156 cge
= coup
->coupling_point
== AFTER_IMDCT
? 1 : get_bits1(gb
);
2157 gain
= cge
? get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60: 0;
2158 gain_cache
= powf(scale
, -gain
);
2160 if (coup
->coupling_point
== AFTER_IMDCT
) {
2161 coup
->gain
[c
][0] = gain_cache
;
2163 for (g
= 0; g
< sce
->ics
.num_window_groups
; g
++) {
2164 for (sfb
= 0; sfb
< sce
->ics
.max_sfb
; sfb
++, idx
++) {
2165 if (sce
->band_type
[idx
] != ZERO_BT
) {
2167 int t
= get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
2175 gain_cache
= powf(scale
, -t
) * s
;
2178 coup
->gain
[c
][idx
] = gain_cache
;
2188 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
2190 * @return Returns number of bytes consumed.
2192 static int decode_drc_channel_exclusions(DynamicRangeControl
*che_drc
,
2196 int num_excl_chan
= 0;
2199 for (i
= 0; i
< 7; i
++)
2200 che_drc
->exclude_mask
[num_excl_chan
++] = get_bits1(gb
);
2201 } while (num_excl_chan
< MAX_CHANNELS
- 7 && get_bits1(gb
));
2203 return num_excl_chan
/ 7;
2207 * Decode dynamic range information; reference: table 4.52.
2209 * @return Returns number of bytes consumed.
2211 static int decode_dynamic_range(DynamicRangeControl
*che_drc
,
2215 int drc_num_bands
= 1;
2218 /* pce_tag_present? */
2219 if (get_bits1(gb
)) {
2220 che_drc
->pce_instance_tag
= get_bits(gb
, 4);
2221 skip_bits(gb
, 4); // tag_reserved_bits
2225 /* excluded_chns_present? */
2226 if (get_bits1(gb
)) {
2227 n
+= decode_drc_channel_exclusions(che_drc
, gb
);
2230 /* drc_bands_present? */
2231 if (get_bits1(gb
)) {
2232 che_drc
->band_incr
= get_bits(gb
, 4);
2233 che_drc
->interpolation_scheme
= get_bits(gb
, 4);
2235 drc_num_bands
+= che_drc
->band_incr
;
2236 for (i
= 0; i
< drc_num_bands
; i
++) {
2237 che_drc
->band_top
[i
] = get_bits(gb
, 8);
2242 /* prog_ref_level_present? */
2243 if (get_bits1(gb
)) {
2244 che_drc
->prog_ref_level
= get_bits(gb
, 7);
2245 skip_bits1(gb
); // prog_ref_level_reserved_bits
2249 for (i
= 0; i
< drc_num_bands
; i
++) {
2250 che_drc
->dyn_rng_sgn
[i
] = get_bits1(gb
);
2251 che_drc
->dyn_rng_ctl
[i
] = get_bits(gb
, 7);
2258 static int decode_fill(AACContext
*ac
, GetBitContext
*gb
, int len
) {
2260 int i
, major
, minor
;
2265 get_bits(gb
, 13); len
-= 13;
2267 for(i
=0; i
+1<sizeof(buf
) && len
>=8; i
++, len
-=8)
2268 buf
[i
] = get_bits(gb
, 8);
2271 if (ac
->avctx
->debug
& FF_DEBUG_PICT_INFO
)
2272 av_log(ac
->avctx
, AV_LOG_DEBUG
, "FILL:%s\n", buf
);
2274 if (sscanf(buf
, "libfaac %d.%d", &major
, &minor
) == 2){
2275 ac
->avctx
->internal
->skip_samples
= 1024;
2279 skip_bits_long(gb
, len
);
2285 * Decode extension data (incomplete); reference: table 4.51.
2287 * @param cnt length of TYPE_FIL syntactic element in bytes
2289 * @return Returns number of bytes consumed
2291 static int decode_extension_payload(AACContext
*ac
, GetBitContext
*gb
, int cnt
,
2292 ChannelElement
*che
, enum RawDataBlockType elem_type
)
2296 int type
= get_bits(gb
, 4);
2298 if (ac
->avctx
->debug
& FF_DEBUG_STARTCODE
)
2299 av_log(ac
->avctx
, AV_LOG_DEBUG
, "extension type: %d len:%d\n", type
, cnt
);
2301 switch (type
) { // extension type
2302 case EXT_SBR_DATA_CRC
:
2306 av_log(ac
->avctx
, AV_LOG_ERROR
, "SBR was found before the first channel element.\n");
2308 } else if (!ac
->oc
[1].m4ac
.sbr
) {
2309 av_log(ac
->avctx
, AV_LOG_ERROR
, "SBR signaled to be not-present but was found in the bitstream.\n");
2310 skip_bits_long(gb
, 8 * cnt
- 4);
2312 } else if (ac
->oc
[1].m4ac
.sbr
== -1 && ac
->oc
[1].status
== OC_LOCKED
) {
2313 av_log(ac
->avctx
, AV_LOG_ERROR
, "Implicit SBR was found with a first occurrence after the first frame.\n");
2314 skip_bits_long(gb
, 8 * cnt
- 4);
2316 } else if (ac
->oc
[1].m4ac
.ps
== -1 && ac
->oc
[1].status
< OC_LOCKED
&& ac
->avctx
->channels
== 1) {
2317 ac
->oc
[1].m4ac
.sbr
= 1;
2318 ac
->oc
[1].m4ac
.ps
= 1;
2319 ac
->avctx
->profile
= FF_PROFILE_AAC_HE_V2
;
2320 output_configure(ac
, ac
->oc
[1].layout_map
, ac
->oc
[1].layout_map_tags
,
2321 ac
->oc
[1].status
, 1);
2323 ac
->oc
[1].m4ac
.sbr
= 1;
2324 ac
->avctx
->profile
= FF_PROFILE_AAC_HE
;
2326 res
= ff_decode_sbr_extension(ac
, &che
->sbr
, gb
, crc_flag
, cnt
, elem_type
);
2328 case EXT_DYNAMIC_RANGE
:
2329 res
= decode_dynamic_range(&ac
->che_drc
, gb
);
2332 decode_fill(ac
, gb
, 8 * cnt
- 4);
2335 case EXT_DATA_ELEMENT
:
2337 skip_bits_long(gb
, 8 * cnt
- 4);
2344 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2346 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
2347 * @param coef spectral coefficients
2349 static void apply_tns(float coef
[1024], TemporalNoiseShaping
*tns
,
2350 IndividualChannelStream
*ics
, int decode
)
2352 const int mmm
= FFMIN(ics
->tns_max_bands
, ics
->max_sfb
);
2354 int bottom
, top
, order
, start
, end
, size
, inc
;
2355 float lpc
[TNS_MAX_ORDER
];
2356 float tmp
[TNS_MAX_ORDER
+1];
2358 for (w
= 0; w
< ics
->num_windows
; w
++) {
2359 bottom
= ics
->num_swb
;
2360 for (filt
= 0; filt
< tns
->n_filt
[w
]; filt
++) {
2362 bottom
= FFMAX(0, top
- tns
->length
[w
][filt
]);
2363 order
= tns
->order
[w
][filt
];
2368 compute_lpc_coefs(tns
->coef
[w
][filt
], order
, lpc
, 0, 0, 0);
2370 start
= ics
->swb_offset
[FFMIN(bottom
, mmm
)];
2371 end
= ics
->swb_offset
[FFMIN( top
, mmm
)];
2372 if ((size
= end
- start
) <= 0)
2374 if (tns
->direction
[w
][filt
]) {
2384 for (m
= 0; m
< size
; m
++, start
+= inc
)
2385 for (i
= 1; i
<= FFMIN(m
, order
); i
++)
2386 coef
[start
] -= coef
[start
- i
* inc
] * lpc
[i
- 1];
2389 for (m
= 0; m
< size
; m
++, start
+= inc
) {
2390 tmp
[0] = coef
[start
];
2391 for (i
= 1; i
<= FFMIN(m
, order
); i
++)
2392 coef
[start
] += tmp
[i
] * lpc
[i
- 1];
2393 for (i
= order
; i
> 0; i
--)
2394 tmp
[i
] = tmp
[i
- 1];
2402 * Apply windowing and MDCT to obtain the spectral
2403 * coefficient from the predicted sample by LTP.
2405 static void windowing_and_mdct_ltp(AACContext
*ac
, float *out
,
2406 float *in
, IndividualChannelStream
*ics
)
2408 const float *lwindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2409 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2410 const float *lwindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2411 const float *swindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
2413 if (ics
->window_sequence
[0] != LONG_STOP_SEQUENCE
) {
2414 ac
->fdsp
->vector_fmul(in
, in
, lwindow_prev
, 1024);
2416 memset(in
, 0, 448 * sizeof(float));
2417 ac
->fdsp
->vector_fmul(in
+ 448, in
+ 448, swindow_prev
, 128);
2419 if (ics
->window_sequence
[0] != LONG_START_SEQUENCE
) {
2420 ac
->fdsp
->vector_fmul_reverse(in
+ 1024, in
+ 1024, lwindow
, 1024);
2422 ac
->fdsp
->vector_fmul_reverse(in
+ 1024 + 448, in
+ 1024 + 448, swindow
, 128);
2423 memset(in
+ 1024 + 576, 0, 448 * sizeof(float));
2425 ac
->mdct_ltp
.mdct_calc(&ac
->mdct_ltp
, out
, in
);
2429 * Apply the long term prediction
2431 static void apply_ltp(AACContext
*ac
, SingleChannelElement
*sce
)
2433 const LongTermPrediction
*ltp
= &sce
->ics
.ltp
;
2434 const uint16_t *offsets
= sce
->ics
.swb_offset
;
2437 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
2438 float *predTime
= sce
->ret
;
2439 float *predFreq
= ac
->buf_mdct
;
2440 int16_t num_samples
= 2048;
2442 if (ltp
->lag
< 1024)
2443 num_samples
= ltp
->lag
+ 1024;
2444 for (i
= 0; i
< num_samples
; i
++)
2445 predTime
[i
] = sce
->ltp_state
[i
+ 2048 - ltp
->lag
] * ltp
->coef
;
2446 memset(&predTime
[i
], 0, (2048 - i
) * sizeof(float));
2448 ac
->windowing_and_mdct_ltp(ac
, predFreq
, predTime
, &sce
->ics
);
2450 if (sce
->tns
.present
)
2451 ac
->apply_tns(predFreq
, &sce
->tns
, &sce
->ics
, 0);
2453 for (sfb
= 0; sfb
< FFMIN(sce
->ics
.max_sfb
, MAX_LTP_LONG_SFB
); sfb
++)
2455 for (i
= offsets
[sfb
]; i
< offsets
[sfb
+ 1]; i
++)
2456 sce
->coeffs
[i
] += predFreq
[i
];
2461 * Update the LTP buffer for next frame
2463 static void update_ltp(AACContext
*ac
, SingleChannelElement
*sce
)
2465 IndividualChannelStream
*ics
= &sce
->ics
;
2466 float *saved
= sce
->saved
;
2467 float *saved_ltp
= sce
->coeffs
;
2468 const float *lwindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2469 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2472 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2473 memcpy(saved_ltp
, saved
, 512 * sizeof(float));
2474 memset(saved_ltp
+ 576, 0, 448 * sizeof(float));
2475 ac
->fdsp
->vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
2476 for (i
= 0; i
< 64; i
++)
2477 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * swindow
[63 - i
];
2478 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
2479 memcpy(saved_ltp
, ac
->buf_mdct
+ 512, 448 * sizeof(float));
2480 memset(saved_ltp
+ 576, 0, 448 * sizeof(float));
2481 ac
->fdsp
->vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
2482 for (i
= 0; i
< 64; i
++)
2483 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * swindow
[63 - i
];
2484 } else { // LONG_STOP or ONLY_LONG
2485 ac
->fdsp
->vector_fmul_reverse(saved_ltp
, ac
->buf_mdct
+ 512, &lwindow
[512], 512);
2486 for (i
= 0; i
< 512; i
++)
2487 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * lwindow
[511 - i
];
2490 memcpy(sce
->ltp_state
, sce
->ltp_state
+1024, 1024 * sizeof(*sce
->ltp_state
));
2491 memcpy(sce
->ltp_state
+1024, sce
->ret
, 1024 * sizeof(*sce
->ltp_state
));
2492 memcpy(sce
->ltp_state
+2048, saved_ltp
, 1024 * sizeof(*sce
->ltp_state
));
2496 * Conduct IMDCT and windowing.
2498 static void imdct_and_windowing(AACContext
*ac
, SingleChannelElement
*sce
)
2500 IndividualChannelStream
*ics
= &sce
->ics
;
2501 float *in
= sce
->coeffs
;
2502 float *out
= sce
->ret
;
2503 float *saved
= sce
->saved
;
2504 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2505 const float *lwindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2506 const float *swindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
2507 float *buf
= ac
->buf_mdct
;
2508 float *temp
= ac
->temp
;
2512 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2513 for (i
= 0; i
< 1024; i
+= 128)
2514 ac
->mdct_small
.imdct_half(&ac
->mdct_small
, buf
+ i
, in
+ i
);
2516 ac
->mdct
.imdct_half(&ac
->mdct
, buf
, in
);
2518 /* window overlapping
2519 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2520 * and long to short transitions are considered to be short to short
2521 * transitions. This leaves just two cases (long to long and short to short)
2522 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2524 if ((ics
->window_sequence
[1] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[1] == LONG_STOP_SEQUENCE
) &&
2525 (ics
->window_sequence
[0] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[0] == LONG_START_SEQUENCE
)) {
2526 ac
->fdsp
->vector_fmul_window( out
, saved
, buf
, lwindow_prev
, 512);
2528 memcpy( out
, saved
, 448 * sizeof(float));
2530 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2531 ac
->fdsp
->vector_fmul_window(out
+ 448 + 0*128, saved
+ 448, buf
+ 0*128, swindow_prev
, 64);
2532 ac
->fdsp
->vector_fmul_window(out
+ 448 + 1*128, buf
+ 0*128 + 64, buf
+ 1*128, swindow
, 64);
2533 ac
->fdsp
->vector_fmul_window(out
+ 448 + 2*128, buf
+ 1*128 + 64, buf
+ 2*128, swindow
, 64);
2534 ac
->fdsp
->vector_fmul_window(out
+ 448 + 3*128, buf
+ 2*128 + 64, buf
+ 3*128, swindow
, 64);
2535 ac
->fdsp
->vector_fmul_window(temp
, buf
+ 3*128 + 64, buf
+ 4*128, swindow
, 64);
2536 memcpy( out
+ 448 + 4*128, temp
, 64 * sizeof(float));
2538 ac
->fdsp
->vector_fmul_window(out
+ 448, saved
+ 448, buf
, swindow_prev
, 64);
2539 memcpy( out
+ 576, buf
+ 64, 448 * sizeof(float));
2544 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2545 memcpy( saved
, temp
+ 64, 64 * sizeof(float));
2546 ac
->fdsp
->vector_fmul_window(saved
+ 64, buf
+ 4*128 + 64, buf
+ 5*128, swindow
, 64);
2547 ac
->fdsp
->vector_fmul_window(saved
+ 192, buf
+ 5*128 + 64, buf
+ 6*128, swindow
, 64);
2548 ac
->fdsp
->vector_fmul_window(saved
+ 320, buf
+ 6*128 + 64, buf
+ 7*128, swindow
, 64);
2549 memcpy( saved
+ 448, buf
+ 7*128 + 64, 64 * sizeof(float));
2550 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
2551 memcpy( saved
, buf
+ 512, 448 * sizeof(float));
2552 memcpy( saved
+ 448, buf
+ 7*128 + 64, 64 * sizeof(float));
2553 } else { // LONG_STOP or ONLY_LONG
2554 memcpy( saved
, buf
+ 512, 512 * sizeof(float));
2558 static void imdct_and_windowing_ld(AACContext
*ac
, SingleChannelElement
*sce
)
2560 IndividualChannelStream
*ics
= &sce
->ics
;
2561 float *in
= sce
->coeffs
;
2562 float *out
= sce
->ret
;
2563 float *saved
= sce
->saved
;
2564 float *buf
= ac
->buf_mdct
;
2567 ac
->mdct
.imdct_half(&ac
->mdct_ld
, buf
, in
);
2569 // window overlapping
2570 if (ics
->use_kb_window
[1]) {
2571 // AAC LD uses a low overlap sine window instead of a KBD window
2572 memcpy(out
, saved
, 192 * sizeof(float));
2573 ac
->fdsp
->vector_fmul_window(out
+ 192, saved
+ 192, buf
, ff_sine_128
, 64);
2574 memcpy( out
+ 320, buf
+ 64, 192 * sizeof(float));
2576 ac
->fdsp
->vector_fmul_window(out
, saved
, buf
, ff_sine_512
, 256);
2580 memcpy(saved
, buf
+ 256, 256 * sizeof(float));
2583 static void imdct_and_windowing_eld(AACContext
*ac
, SingleChannelElement
*sce
)
2585 float *in
= sce
->coeffs
;
2586 float *out
= sce
->ret
;
2587 float *saved
= sce
->saved
;
2588 const float *const window
= ff_aac_eld_window
;
2589 float *buf
= ac
->buf_mdct
;
2592 const int n2
= n
>> 1;
2593 const int n4
= n
>> 2;
2595 // Inverse transform, mapped to the conventional IMDCT by
2596 // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
2597 // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
2598 // International Conference on Audio, Language and Image Processing, ICALIP 2008.
2599 // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
2600 for (i
= 0; i
< n2
; i
+=2) {
2602 temp
= in
[i
]; in
[i
] = -in
[n
- 1 - i
]; in
[n
- 1 - i
] = temp
;
2603 temp
= -in
[i
+ 1]; in
[i
+ 1] = in
[n
- 2 - i
]; in
[n
- 2 - i
] = temp
;
2605 ac
->mdct
.imdct_half(&ac
->mdct_ld
, buf
, in
);
2606 for (i
= 0; i
< n
; i
+=2) {
2609 // Like with the regular IMDCT at this point we still have the middle half
2610 // of a transform but with even symmetry on the left and odd symmetry on
2613 // window overlapping
2614 // The spec says to use samples [0..511] but the reference decoder uses
2615 // samples [128..639].
2616 for (i
= n4
; i
< n2
; i
++) {
2617 out
[i
- n4
] = buf
[n2
- 1 - i
] * window
[i
- n4
] +
2618 saved
[ i
+ n2
] * window
[i
+ n
- n4
] +
2619 -saved
[ n
+ n2
- 1 - i
] * window
[i
+ 2*n
- n4
] +
2620 -saved
[2*n
+ n2
+ i
] * window
[i
+ 3*n
- n4
];
2622 for (i
= 0; i
< n2
; i
++) {
2623 out
[n4
+ i
] = buf
[i
] * window
[i
+ n2
- n4
] +
2624 -saved
[ n
- 1 - i
] * window
[i
+ n2
+ n
- n4
] +
2625 -saved
[ n
+ i
] * window
[i
+ n2
+ 2*n
- n4
] +
2626 saved
[2*n
+ n
- 1 - i
] * window
[i
+ n2
+ 3*n
- n4
];
2628 for (i
= 0; i
< n4
; i
++) {
2629 out
[n2
+ n4
+ i
] = buf
[ i
+ n2
] * window
[i
+ n
- n4
] +
2630 -saved
[ n2
- 1 - i
] * window
[i
+ 2*n
- n4
] +
2631 -saved
[ n
+ n2
+ i
] * window
[i
+ 3*n
- n4
];
2635 memmove(saved
+ n
, saved
, 2 * n
* sizeof(float));
2636 memcpy( saved
, buf
, n
* sizeof(float));
2640 * Apply dependent channel coupling (applied before IMDCT).
2642 * @param index index into coupling gain array
2644 static void apply_dependent_coupling(AACContext
*ac
,
2645 SingleChannelElement
*target
,
2646 ChannelElement
*cce
, int index
)
2648 IndividualChannelStream
*ics
= &cce
->ch
[0].ics
;
2649 const uint16_t *offsets
= ics
->swb_offset
;
2650 float *dest
= target
->coeffs
;
2651 const float *src
= cce
->ch
[0].coeffs
;
2652 int g
, i
, group
, k
, idx
= 0;
2653 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
) {
2654 av_log(ac
->avctx
, AV_LOG_ERROR
,
2655 "Dependent coupling is not supported together with LTP\n");
2658 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
2659 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
2660 if (cce
->ch
[0].band_type
[idx
] != ZERO_BT
) {
2661 const float gain
= cce
->coup
.gain
[index
][idx
];
2662 for (group
= 0; group
< ics
->group_len
[g
]; group
++) {
2663 for (k
= offsets
[i
]; k
< offsets
[i
+ 1]; k
++) {
2665 dest
[group
* 128 + k
] += gain
* src
[group
* 128 + k
];
2670 dest
+= ics
->group_len
[g
] * 128;
2671 src
+= ics
->group_len
[g
] * 128;
2676 * Apply independent channel coupling (applied after IMDCT).
2678 * @param index index into coupling gain array
2680 static void apply_independent_coupling(AACContext
*ac
,
2681 SingleChannelElement
*target
,
2682 ChannelElement
*cce
, int index
)
2685 const float gain
= cce
->coup
.gain
[index
][0];
2686 const float *src
= cce
->ch
[0].ret
;
2687 float *dest
= target
->ret
;
2688 const int len
= 1024 << (ac
->oc
[1].m4ac
.sbr
== 1);
2690 for (i
= 0; i
< len
; i
++)
2691 dest
[i
] += gain
* src
[i
];
2695 * channel coupling transformation interface
2697 * @param apply_coupling_method pointer to (in)dependent coupling function
2699 static void apply_channel_coupling(AACContext
*ac
, ChannelElement
*cc
,
2700 enum RawDataBlockType type
, int elem_id
,
2701 enum CouplingPoint coupling_point
,
2702 void (*apply_coupling_method
)(AACContext
*ac
, SingleChannelElement
*target
, ChannelElement
*cce
, int index
))
2706 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
2707 ChannelElement
*cce
= ac
->che
[TYPE_CCE
][i
];
2710 if (cce
&& cce
->coup
.coupling_point
== coupling_point
) {
2711 ChannelCoupling
*coup
= &cce
->coup
;
2713 for (c
= 0; c
<= coup
->num_coupled
; c
++) {
2714 if (coup
->type
[c
] == type
&& coup
->id_select
[c
] == elem_id
) {
2715 if (coup
->ch_select
[c
] != 1) {
2716 apply_coupling_method(ac
, &cc
->ch
[0], cce
, index
);
2717 if (coup
->ch_select
[c
] != 0)
2720 if (coup
->ch_select
[c
] != 2)
2721 apply_coupling_method(ac
, &cc
->ch
[1], cce
, index
++);
2723 index
+= 1 + (coup
->ch_select
[c
] == 3);
2730 * Convert spectral data to float samples, applying all supported tools as appropriate.
2732 static void spectral_to_sample(AACContext
*ac
)
2735 void (*imdct_and_window
)(AACContext
*ac
, SingleChannelElement
*sce
);
2736 switch (ac
->oc
[1].m4ac
.object_type
) {
2738 imdct_and_window
= imdct_and_windowing_ld
;
2740 case AOT_ER_AAC_ELD
:
2741 imdct_and_window
= imdct_and_windowing_eld
;
2744 imdct_and_window
= ac
->imdct_and_windowing
;
2746 for (type
= 3; type
>= 0; type
--) {
2747 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
2748 ChannelElement
*che
= ac
->che
[type
][i
];
2749 if (che
&& che
->present
) {
2750 if (type
<= TYPE_CPE
)
2751 apply_channel_coupling(ac
, che
, type
, i
, BEFORE_TNS
, apply_dependent_coupling
);
2752 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
) {
2753 if (che
->ch
[0].ics
.predictor_present
) {
2754 if (che
->ch
[0].ics
.ltp
.present
)
2755 ac
->apply_ltp(ac
, &che
->ch
[0]);
2756 if (che
->ch
[1].ics
.ltp
.present
&& type
== TYPE_CPE
)
2757 ac
->apply_ltp(ac
, &che
->ch
[1]);
2760 if (che
->ch
[0].tns
.present
)
2761 ac
->apply_tns(che
->ch
[0].coeffs
, &che
->ch
[0].tns
, &che
->ch
[0].ics
, 1);
2762 if (che
->ch
[1].tns
.present
)
2763 ac
->apply_tns(che
->ch
[1].coeffs
, &che
->ch
[1].tns
, &che
->ch
[1].ics
, 1);
2764 if (type
<= TYPE_CPE
)
2765 apply_channel_coupling(ac
, che
, type
, i
, BETWEEN_TNS_AND_IMDCT
, apply_dependent_coupling
);
2766 if (type
!= TYPE_CCE
|| che
->coup
.coupling_point
== AFTER_IMDCT
) {
2767 imdct_and_window(ac
, &che
->ch
[0]);
2768 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
)
2769 ac
->update_ltp(ac
, &che
->ch
[0]);
2770 if (type
== TYPE_CPE
) {
2771 imdct_and_window(ac
, &che
->ch
[1]);
2772 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
)
2773 ac
->update_ltp(ac
, &che
->ch
[1]);
2775 if (ac
->oc
[1].m4ac
.sbr
> 0) {
2776 ff_sbr_apply(ac
, &che
->sbr
, type
, che
->ch
[0].ret
, che
->ch
[1].ret
);
2779 if (type
<= TYPE_CCE
)
2780 apply_channel_coupling(ac
, che
, type
, i
, AFTER_IMDCT
, apply_independent_coupling
);
2783 av_log(ac
->avctx
, AV_LOG_VERBOSE
, "ChannelElement %d.%d missing \n", type
, i
);
2789 static int parse_adts_frame_header(AACContext
*ac
, GetBitContext
*gb
)
2792 AACADTSHeaderInfo hdr_info
;
2793 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
2794 int layout_map_tags
, ret
;
2796 size
= avpriv_aac_parse_header(gb
, &hdr_info
);
2798 if (!ac
->warned_num_aac_frames
&& hdr_info
.num_aac_frames
!= 1) {
2799 // This is 2 for "VLB " audio in NSV files.
2800 // See samples/nsv/vlb_audio.
2801 avpriv_report_missing_feature(ac
->avctx
,
2802 "More than one AAC RDB per ADTS frame");
2803 ac
->warned_num_aac_frames
= 1;
2805 push_output_configuration(ac
);
2806 if (hdr_info
.chan_config
) {
2807 ac
->oc
[1].m4ac
.chan_config
= hdr_info
.chan_config
;
2808 if ((ret
= set_default_channel_config(ac
->avctx
,
2811 hdr_info
.chan_config
)) < 0)
2813 if ((ret
= output_configure(ac
, layout_map
, layout_map_tags
,
2814 FFMAX(ac
->oc
[1].status
,
2815 OC_TRIAL_FRAME
), 0)) < 0)
2818 ac
->oc
[1].m4ac
.chan_config
= 0;
2820 * dual mono frames in Japanese DTV can have chan_config 0
2821 * WITHOUT specifying PCE.
2822 * thus, set dual mono as default.
2824 if (ac
->dmono_mode
&& ac
->oc
[0].status
== OC_NONE
) {
2825 layout_map_tags
= 2;
2826 layout_map
[0][0] = layout_map
[1][0] = TYPE_SCE
;
2827 layout_map
[0][2] = layout_map
[1][2] = AAC_CHANNEL_FRONT
;
2828 layout_map
[0][1] = 0;
2829 layout_map
[1][1] = 1;
2830 if (output_configure(ac
, layout_map
, layout_map_tags
,
2835 ac
->oc
[1].m4ac
.sample_rate
= hdr_info
.sample_rate
;
2836 ac
->oc
[1].m4ac
.sampling_index
= hdr_info
.sampling_index
;
2837 ac
->oc
[1].m4ac
.object_type
= hdr_info
.object_type
;
2838 if (ac
->oc
[0].status
!= OC_LOCKED
||
2839 ac
->oc
[0].m4ac
.chan_config
!= hdr_info
.chan_config
||
2840 ac
->oc
[0].m4ac
.sample_rate
!= hdr_info
.sample_rate
) {
2841 ac
->oc
[1].m4ac
.sbr
= -1;
2842 ac
->oc
[1].m4ac
.ps
= -1;
2844 if (!hdr_info
.crc_absent
)
2850 static int aac_decode_er_frame(AVCodecContext
*avctx
, void *data
,
2851 int *got_frame_ptr
, GetBitContext
*gb
)
2853 AACContext
*ac
= avctx
->priv_data
;
2854 ChannelElement
*che
;
2857 int chan_config
= ac
->oc
[1].m4ac
.chan_config
;
2858 int aot
= ac
->oc
[1].m4ac
.object_type
;
2860 if (aot
== AOT_ER_AAC_LD
|| aot
== AOT_ER_AAC_ELD
)
2865 if ((err
= frame_configure_elements(avctx
)) < 0)
2868 // The FF_PROFILE_AAC_* defines are all object_type - 1
2869 // This may lead to an undefined profile being signaled
2870 ac
->avctx
->profile
= ac
->oc
[1].m4ac
.object_type
- 1;
2872 ac
->tags_mapped
= 0;
2874 if (chan_config
< 0 || chan_config
>= 8) {
2875 avpriv_request_sample(avctx
, "Unknown ER channel configuration %d",
2876 ac
->oc
[1].m4ac
.chan_config
);
2877 return AVERROR_INVALIDDATA
;
2879 for (i
= 0; i
< tags_per_config
[chan_config
]; i
++) {
2880 const int elem_type
= aac_channel_layout_map
[chan_config
-1][i
][0];
2881 const int elem_id
= aac_channel_layout_map
[chan_config
-1][i
][1];
2882 if (!(che
=get_che(ac
, elem_type
, elem_id
))) {
2883 av_log(ac
->avctx
, AV_LOG_ERROR
,
2884 "channel element %d.%d is not allocated\n",
2885 elem_type
, elem_id
);
2886 return AVERROR_INVALIDDATA
;
2889 if (aot
!= AOT_ER_AAC_ELD
)
2891 switch (elem_type
) {
2893 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2896 err
= decode_cpe(ac
, gb
, che
);
2899 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2906 spectral_to_sample(ac
);
2908 ac
->frame
->nb_samples
= samples
;
2909 ac
->frame
->sample_rate
= avctx
->sample_rate
;
2912 skip_bits_long(gb
, get_bits_left(gb
));
2916 static int aac_decode_frame_int(AVCodecContext
*avctx
, void *data
,
2917 int *got_frame_ptr
, GetBitContext
*gb
, AVPacket
*avpkt
)
2919 AACContext
*ac
= avctx
->priv_data
;
2920 ChannelElement
*che
= NULL
, *che_prev
= NULL
;
2921 enum RawDataBlockType elem_type
, elem_type_prev
= TYPE_END
;
2923 int samples
= 0, multiplier
, audio_found
= 0, pce_found
= 0;
2924 int is_dmono
, sce_count
= 0;
2928 if (show_bits(gb
, 12) == 0xfff) {
2929 if ((err
= parse_adts_frame_header(ac
, gb
)) < 0) {
2930 av_log(avctx
, AV_LOG_ERROR
, "Error decoding AAC frame header.\n");
2933 if (ac
->oc
[1].m4ac
.sampling_index
> 12) {
2934 av_log(ac
->avctx
, AV_LOG_ERROR
, "invalid sampling rate index %d\n", ac
->oc
[1].m4ac
.sampling_index
);
2935 err
= AVERROR_INVALIDDATA
;
2940 if ((err
= frame_configure_elements(avctx
)) < 0)
2943 // The FF_PROFILE_AAC_* defines are all object_type - 1
2944 // This may lead to an undefined profile being signaled
2945 ac
->avctx
->profile
= ac
->oc
[1].m4ac
.object_type
- 1;
2947 ac
->tags_mapped
= 0;
2949 while ((elem_type
= get_bits(gb
, 3)) != TYPE_END
) {
2950 elem_id
= get_bits(gb
, 4);
2952 if (avctx
->debug
& FF_DEBUG_STARTCODE
)
2953 av_log(avctx
, AV_LOG_DEBUG
, "Elem type:%x id:%x\n", elem_type
, elem_id
);
2955 if (elem_type
< TYPE_DSE
) {
2956 if (!(che
=get_che(ac
, elem_type
, elem_id
))) {
2957 av_log(ac
->avctx
, AV_LOG_ERROR
, "channel element %d.%d is not allocated\n",
2958 elem_type
, elem_id
);
2959 err
= AVERROR_INVALIDDATA
;
2966 switch (elem_type
) {
2969 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2975 err
= decode_cpe(ac
, gb
, che
);
2980 err
= decode_cce(ac
, gb
, che
);
2984 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2989 err
= skip_data_stream_element(ac
, gb
);
2993 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
2995 push_output_configuration(ac
);
2996 tags
= decode_pce(avctx
, &ac
->oc
[1].m4ac
, layout_map
, gb
);
3002 av_log(avctx
, AV_LOG_ERROR
,
3003 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
3005 err
= output_configure(ac
, layout_map
, tags
, OC_TRIAL_PCE
, 1);
3007 ac
->oc
[1].m4ac
.chan_config
= 0;
3015 elem_id
+= get_bits(gb
, 8) - 1;
3016 if (get_bits_left(gb
) < 8 * elem_id
) {
3017 av_log(avctx
, AV_LOG_ERROR
, "TYPE_FIL: "overread_err
);
3018 err
= AVERROR_INVALIDDATA
;
3022 elem_id
-= decode_extension_payload(ac
, gb
, elem_id
, che_prev
, elem_type_prev
);
3023 err
= 0; /* FIXME */
3027 err
= AVERROR_BUG
; /* should not happen, but keeps compiler happy */
3032 elem_type_prev
= elem_type
;
3037 if (get_bits_left(gb
) < 3) {
3038 av_log(avctx
, AV_LOG_ERROR
, overread_err
);
3039 err
= AVERROR_INVALIDDATA
;
3044 spectral_to_sample(ac
);
3046 multiplier
= (ac
->oc
[1].m4ac
.sbr
== 1) ? ac
->oc
[1].m4ac
.ext_sample_rate
> ac
->oc
[1].m4ac
.sample_rate
: 0;
3047 samples
<<= multiplier
;
3049 if (ac
->oc
[1].status
&& audio_found
) {
3050 avctx
->sample_rate
= ac
->oc
[1].m4ac
.sample_rate
<< multiplier
;
3051 avctx
->frame_size
= samples
;
3052 ac
->oc
[1].status
= OC_LOCKED
;
3057 const uint8_t *side
= av_packet_get_side_data(avpkt
, AV_PKT_DATA_SKIP_SAMPLES
, &side_size
);
3058 if (side
&& side_size
>=4)
3059 AV_WL32(side
, 2*AV_RL32(side
));
3062 *got_frame_ptr
= !!samples
;
3064 ac
->frame
->nb_samples
= samples
;
3065 ac
->frame
->sample_rate
= avctx
->sample_rate
;
3067 av_frame_unref(ac
->frame
);
3068 *got_frame_ptr
= !!samples
;
3070 /* for dual-mono audio (SCE + SCE) */
3071 is_dmono
= ac
->dmono_mode
&& sce_count
== 2 &&
3072 ac
->oc
[1].channel_layout
== (AV_CH_FRONT_LEFT
| AV_CH_FRONT_RIGHT
);
3074 if (ac
->dmono_mode
== 1)
3075 ((AVFrame
*)data
)->data
[1] =((AVFrame
*)data
)->data
[0];
3076 else if (ac
->dmono_mode
== 2)
3077 ((AVFrame
*)data
)->data
[0] =((AVFrame
*)data
)->data
[1];
3082 pop_output_configuration(ac
);
3086 static int aac_decode_frame(AVCodecContext
*avctx
, void *data
,
3087 int *got_frame_ptr
, AVPacket
*avpkt
)
3089 AACContext
*ac
= avctx
->priv_data
;
3090 const uint8_t *buf
= avpkt
->data
;
3091 int buf_size
= avpkt
->size
;
3096 int new_extradata_size
;
3097 const uint8_t *new_extradata
= av_packet_get_side_data(avpkt
,
3098 AV_PKT_DATA_NEW_EXTRADATA
,
3099 &new_extradata_size
);
3100 int jp_dualmono_size
;
3101 const uint8_t *jp_dualmono
= av_packet_get_side_data(avpkt
,
3102 AV_PKT_DATA_JP_DUALMONO
,
3105 if (new_extradata
&& 0) {
3106 av_free(avctx
->extradata
);
3107 avctx
->extradata
= av_mallocz(new_extradata_size
+
3108 FF_INPUT_BUFFER_PADDING_SIZE
);
3109 if (!avctx
->extradata
)
3110 return AVERROR(ENOMEM
);
3111 avctx
->extradata_size
= new_extradata_size
;
3112 memcpy(avctx
->extradata
, new_extradata
, new_extradata_size
);
3113 push_output_configuration(ac
);
3114 if (decode_audio_specific_config(ac
, ac
->avctx
, &ac
->oc
[1].m4ac
,
3116 avctx
->extradata_size
*8, 1) < 0) {
3117 pop_output_configuration(ac
);
3118 return AVERROR_INVALIDDATA
;
3123 if (jp_dualmono
&& jp_dualmono_size
> 0)
3124 ac
->dmono_mode
= 1 + *jp_dualmono
;
3125 if (ac
->force_dmono_mode
>= 0)
3126 ac
->dmono_mode
= ac
->force_dmono_mode
;
3128 if (INT_MAX
/ 8 <= buf_size
)
3129 return AVERROR_INVALIDDATA
;
3131 if ((err
= init_get_bits(&gb
, buf
, buf_size
* 8)) < 0)
3134 switch (ac
->oc
[1].m4ac
.object_type
) {
3136 case AOT_ER_AAC_LTP
:
3138 case AOT_ER_AAC_ELD
:
3139 err
= aac_decode_er_frame(avctx
, data
, got_frame_ptr
, &gb
);
3142 err
= aac_decode_frame_int(avctx
, data
, got_frame_ptr
, &gb
, avpkt
);
3147 buf_consumed
= (get_bits_count(&gb
) + 7) >> 3;
3148 for (buf_offset
= buf_consumed
; buf_offset
< buf_size
; buf_offset
++)
3149 if (buf
[buf_offset
])
3152 return buf_size
> buf_offset
? buf_consumed
: buf_size
;
3155 static av_cold
int aac_decode_close(AVCodecContext
*avctx
)
3157 AACContext
*ac
= avctx
->priv_data
;
3160 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
3161 for (type
= 0; type
< 4; type
++) {
3162 if (ac
->che
[type
][i
])
3163 ff_aac_sbr_ctx_close(&ac
->che
[type
][i
]->sbr
);
3164 av_freep(&ac
->che
[type
][i
]);
3168 ff_mdct_end(&ac
->mdct
);
3169 ff_mdct_end(&ac
->mdct_small
);
3170 ff_mdct_end(&ac
->mdct_ld
);
3171 ff_mdct_end(&ac
->mdct_ltp
);
3172 av_freep(&ac
->fdsp
);
3177 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
3179 struct LATMContext
{
3180 AACContext aac_ctx
; ///< containing AACContext
3181 int initialized
; ///< initialized after a valid extradata was seen
3184 int audio_mux_version_A
; ///< LATM syntax version
3185 int frame_length_type
; ///< 0/1 variable/fixed frame length
3186 int frame_length
; ///< frame length for fixed frame length
3189 static inline uint32_t latm_get_value(GetBitContext
*b
)
3191 int length
= get_bits(b
, 2);
3193 return get_bits_long(b
, (length
+1)*8);
3196 static int latm_decode_audio_specific_config(struct LATMContext
*latmctx
,
3197 GetBitContext
*gb
, int asclen
)
3199 AACContext
*ac
= &latmctx
->aac_ctx
;
3200 AVCodecContext
*avctx
= ac
->avctx
;
3201 MPEG4AudioConfig m4ac
= { 0 };
3202 int config_start_bit
= get_bits_count(gb
);
3203 int sync_extension
= 0;
3204 int bits_consumed
, esize
;
3208 asclen
= FFMIN(asclen
, get_bits_left(gb
));
3210 asclen
= get_bits_left(gb
);
3212 if (config_start_bit
% 8) {
3213 avpriv_request_sample(latmctx
->aac_ctx
.avctx
,
3214 "Non-byte-aligned audio-specific config");
3215 return AVERROR_PATCHWELCOME
;
3218 return AVERROR_INVALIDDATA
;
3219 bits_consumed
= decode_audio_specific_config(NULL
, avctx
, &m4ac
,
3220 gb
->buffer
+ (config_start_bit
/ 8),
3221 asclen
, sync_extension
);
3223 if (bits_consumed
< 0)
3224 return AVERROR_INVALIDDATA
;
3226 if (!latmctx
->initialized
||
3227 ac
->oc
[1].m4ac
.sample_rate
!= m4ac
.sample_rate
||
3228 ac
->oc
[1].m4ac
.chan_config
!= m4ac
.chan_config
) {
3230 if(latmctx
->initialized
) {
3231 av_log(avctx
, AV_LOG_INFO
, "audio config changed\n");
3233 av_log(avctx
, AV_LOG_DEBUG
, "initializing latmctx\n");
3235 latmctx
->initialized
= 0;
3237 esize
= (bits_consumed
+7) / 8;
3239 if (avctx
->extradata_size
< esize
) {
3240 av_free(avctx
->extradata
);
3241 avctx
->extradata
= av_malloc(esize
+ FF_INPUT_BUFFER_PADDING_SIZE
);
3242 if (!avctx
->extradata
)
3243 return AVERROR(ENOMEM
);
3246 avctx
->extradata_size
= esize
;
3247 memcpy(avctx
->extradata
, gb
->buffer
+ (config_start_bit
/8), esize
);
3248 memset(avctx
->extradata
+esize
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
3250 skip_bits_long(gb
, bits_consumed
);
3252 return bits_consumed
;
3255 static int read_stream_mux_config(struct LATMContext
*latmctx
,
3258 int ret
, audio_mux_version
= get_bits(gb
, 1);
3260 latmctx
->audio_mux_version_A
= 0;
3261 if (audio_mux_version
)
3262 latmctx
->audio_mux_version_A
= get_bits(gb
, 1);
3264 if (!latmctx
->audio_mux_version_A
) {
3266 if (audio_mux_version
)
3267 latm_get_value(gb
); // taraFullness
3269 skip_bits(gb
, 1); // allStreamSameTimeFraming
3270 skip_bits(gb
, 6); // numSubFrames
3272 if (get_bits(gb
, 4)) { // numPrograms
3273 avpriv_request_sample(latmctx
->aac_ctx
.avctx
, "Multiple programs");
3274 return AVERROR_PATCHWELCOME
;
3277 // for each program (which there is only one in DVB)
3279 // for each layer (which there is only one in DVB)
3280 if (get_bits(gb
, 3)) { // numLayer
3281 avpriv_request_sample(latmctx
->aac_ctx
.avctx
, "Multiple layers");
3282 return AVERROR_PATCHWELCOME
;
3285 // for all but first stream: use_same_config = get_bits(gb, 1);
3286 if (!audio_mux_version
) {
3287 if ((ret
= latm_decode_audio_specific_config(latmctx
, gb
, 0)) < 0)
3290 int ascLen
= latm_get_value(gb
);
3291 if ((ret
= latm_decode_audio_specific_config(latmctx
, gb
, ascLen
)) < 0)
3294 skip_bits_long(gb
, ascLen
);
3297 latmctx
->frame_length_type
= get_bits(gb
, 3);
3298 switch (latmctx
->frame_length_type
) {
3300 skip_bits(gb
, 8); // latmBufferFullness
3303 latmctx
->frame_length
= get_bits(gb
, 9);
3308 skip_bits(gb
, 6); // CELP frame length table index
3312 skip_bits(gb
, 1); // HVXC frame length table index
3316 if (get_bits(gb
, 1)) { // other data
3317 if (audio_mux_version
) {
3318 latm_get_value(gb
); // other_data_bits
3322 esc
= get_bits(gb
, 1);
3328 if (get_bits(gb
, 1)) // crc present
3329 skip_bits(gb
, 8); // config_crc
3335 static int read_payload_length_info(struct LATMContext
*ctx
, GetBitContext
*gb
)
3339 if (ctx
->frame_length_type
== 0) {
3340 int mux_slot_length
= 0;
3342 tmp
= get_bits(gb
, 8);
3343 mux_slot_length
+= tmp
;
3344 } while (tmp
== 255);
3345 return mux_slot_length
;
3346 } else if (ctx
->frame_length_type
== 1) {
3347 return ctx
->frame_length
;
3348 } else if (ctx
->frame_length_type
== 3 ||
3349 ctx
->frame_length_type
== 5 ||
3350 ctx
->frame_length_type
== 7) {
3351 skip_bits(gb
, 2); // mux_slot_length_coded
3356 static int read_audio_mux_element(struct LATMContext
*latmctx
,
3360 uint8_t use_same_mux
= get_bits(gb
, 1);
3361 if (!use_same_mux
) {
3362 if ((err
= read_stream_mux_config(latmctx
, gb
)) < 0)
3364 } else if (!latmctx
->aac_ctx
.avctx
->extradata
) {
3365 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_DEBUG
,
3366 "no decoder config found\n");
3367 return AVERROR(EAGAIN
);
3369 if (latmctx
->audio_mux_version_A
== 0) {
3370 int mux_slot_length_bytes
= read_payload_length_info(latmctx
, gb
);
3371 if (mux_slot_length_bytes
* 8 > get_bits_left(gb
)) {
3372 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
, "incomplete frame\n");
3373 return AVERROR_INVALIDDATA
;
3374 } else if (mux_slot_length_bytes
* 8 + 256 < get_bits_left(gb
)) {
3375 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
,
3376 "frame length mismatch %d << %d\n",
3377 mux_slot_length_bytes
* 8, get_bits_left(gb
));
3378 return AVERROR_INVALIDDATA
;
3385 static int latm_decode_frame(AVCodecContext
*avctx
, void *out
,
3386 int *got_frame_ptr
, AVPacket
*avpkt
)
3388 struct LATMContext
*latmctx
= avctx
->priv_data
;
3392 if ((err
= init_get_bits8(&gb
, avpkt
->data
, avpkt
->size
)) < 0)
3395 // check for LOAS sync word
3396 if (get_bits(&gb
, 11) != LOAS_SYNC_WORD
)
3397 return AVERROR_INVALIDDATA
;
3399 muxlength
= get_bits(&gb
, 13) + 3;
3400 // not enough data, the parser should have sorted this out
3401 if (muxlength
> avpkt
->size
)
3402 return AVERROR_INVALIDDATA
;
3404 if ((err
= read_audio_mux_element(latmctx
, &gb
)) < 0)
3407 if (!latmctx
->initialized
) {
3408 if (!avctx
->extradata
) {
3412 push_output_configuration(&latmctx
->aac_ctx
);
3413 if ((err
= decode_audio_specific_config(
3414 &latmctx
->aac_ctx
, avctx
, &latmctx
->aac_ctx
.oc
[1].m4ac
,
3415 avctx
->extradata
, avctx
->extradata_size
*8, 1)) < 0) {
3416 pop_output_configuration(&latmctx
->aac_ctx
);
3419 latmctx
->initialized
= 1;
3423 if (show_bits(&gb
, 12) == 0xfff) {
3424 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
,
3425 "ADTS header detected, probably as result of configuration "
3427 return AVERROR_INVALIDDATA
;
3430 if ((err
= aac_decode_frame_int(avctx
, out
, got_frame_ptr
, &gb
, avpkt
)) < 0)
3436 static av_cold
int latm_decode_init(AVCodecContext
*avctx
)
3438 struct LATMContext
*latmctx
= avctx
->priv_data
;
3439 int ret
= aac_decode_init(avctx
);
3441 if (avctx
->extradata_size
> 0)
3442 latmctx
->initialized
= !ret
;
3447 static void aacdec_init(AACContext
*c
)
3449 c
->imdct_and_windowing
= imdct_and_windowing
;
3450 c
->apply_ltp
= apply_ltp
;
3451 c
->apply_tns
= apply_tns
;
3452 c
->windowing_and_mdct_ltp
= windowing_and_mdct_ltp
;
3453 c
->update_ltp
= update_ltp
;
3456 ff_aacdec_init_mips(c
);
3459 * AVOptions for Japanese DTV specific extensions (ADTS only)
3461 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3462 static const AVOption options
[] = {
3463 {"dual_mono_mode", "Select the channel to decode for dual mono",
3464 offsetof(AACContext
, force_dmono_mode
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, 2,
3465 AACDEC_FLAGS
, "dual_mono_mode"},
3467 {"auto", "autoselection", 0, AV_OPT_TYPE_CONST
, {.i64
=-1}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3468 {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST
, {.i64
= 1}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3469 {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST
, {.i64
= 2}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3470 {"both", "Select both channels", 0, AV_OPT_TYPE_CONST
, {.i64
= 0}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3475 static const AVClass aac_decoder_class
= {
3476 .class_name
= "AAC decoder",
3477 .item_name
= av_default_item_name
,
3479 .version
= LIBAVUTIL_VERSION_INT
,
3482 static const AVProfile profiles
[] = {
3483 { FF_PROFILE_AAC_MAIN
, "Main" },
3484 { FF_PROFILE_AAC_LOW
, "LC" },
3485 { FF_PROFILE_AAC_SSR
, "SSR" },
3486 { FF_PROFILE_AAC_LTP
, "LTP" },
3487 { FF_PROFILE_AAC_HE
, "HE-AAC" },
3488 { FF_PROFILE_AAC_HE_V2
, "HE-AACv2" },
3489 { FF_PROFILE_AAC_LD
, "LD" },
3490 { FF_PROFILE_AAC_ELD
, "ELD" },
3491 { FF_PROFILE_UNKNOWN
},
3494 AVCodec ff_aac_decoder
= {
3496 .long_name
= NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3497 .type
= AVMEDIA_TYPE_AUDIO
,
3498 .id
= AV_CODEC_ID_AAC
,
3499 .priv_data_size
= sizeof(AACContext
),
3500 .init
= aac_decode_init
,
3501 .close
= aac_decode_close
,
3502 .decode
= aac_decode_frame
,
3503 .sample_fmts
= (const enum AVSampleFormat
[]) {
3504 AV_SAMPLE_FMT_FLTP
, AV_SAMPLE_FMT_NONE
3506 .capabilities
= CODEC_CAP_CHANNEL_CONF
| CODEC_CAP_DR1
,
3507 .channel_layouts
= aac_channel_layout
,
3509 .priv_class
= &aac_decoder_class
,
3510 .profiles
= profiles
,
3514 Note: This decoder filter is intended to decode LATM streams transferred
3515 in MPEG transport streams which only contain one program.
3516 To do a more complex LATM demuxing a separate LATM demuxer should be used.
3518 AVCodec ff_aac_latm_decoder
= {
3520 .long_name
= NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3521 .type
= AVMEDIA_TYPE_AUDIO
,
3522 .id
= AV_CODEC_ID_AAC_LATM
,
3523 .priv_data_size
= sizeof(struct LATMContext
),
3524 .init
= latm_decode_init
,
3525 .close
= aac_decode_close
,
3526 .decode
= latm_decode_frame
,
3527 .sample_fmts
= (const enum AVSampleFormat
[]) {
3528 AV_SAMPLE_FMT_FLTP
, AV_SAMPLE_FMT_NONE
3530 .capabilities
= CODEC_CAP_CHANNEL_CONF
| CODEC_CAP_DR1
,
3531 .channel_layouts
= aac_channel_layout
,
3533 .profiles
= profiles
,