3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
11 * This file is part of FFmpeg.
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 * @author Oded Shimon ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
39 * N (code in SoC repo) gain control
41 * Y window shapes - standard
42 * N window shapes - Low Delay
43 * Y filterbank - standard
44 * N (code in SoC repo) filterbank - Scalable Sample Rate
45 * Y Temporal Noise Shaping
46 * Y Long Term Prediction
49 * Y frequency domain prediction
50 * Y Perceptual Noise Substitution
52 * N Scalable Inverse AAC Quantization
53 * N Frequency Selective Switch
55 * Y quantization & coding - AAC
56 * N quantization & coding - TwinVQ
57 * N quantization & coding - BSAC
58 * N AAC Error Resilience tools
59 * N Error Resilience payload syntax
60 * N Error Protection tool
62 * N Silence Compression
65 * N Structured Audio tools
66 * N Structured Audio Sample Bank Format
68 * N Harmonic and Individual Lines plus Noise
69 * N Text-To-Speech Interface
70 * Y Spectral Band Replication
71 * Y (not in this code) Layer-1
72 * Y (not in this code) Layer-2
73 * Y (not in this code) Layer-3
74 * N SinuSoidal Coding (Transient, Sinusoid, Noise)
76 * N Direct Stream Transfer
77 * Y Enhanced AAC Low Delay (ER AAC ELD)
79 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
80 * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
84 #include "libavutil/float_dsp.h"
85 #include "libavutil/opt.h"
90 #include "fmtconvert.h"
97 #include "aacdectab.h"
98 #include "cbrt_tablegen.h"
101 #include "mpeg4audio.h"
102 #include "aacadtsdec.h"
103 #include "libavutil/intfloat.h"
112 # include "arm/aac.h"
114 # include "mips/aacdec_mips.h"
117 static VLC vlc_scalefactors
;
118 static VLC vlc_spectral
[11];
120 static int output_configure(AACContext
*ac
,
121 uint8_t layout_map
[MAX_ELEM_ID
*4][3], int tags
,
122 enum OCStatus oc_type
, int get_new_frame
);
124 #define overread_err "Input buffer exhausted before END element found\n"
126 static int count_channels(uint8_t (*layout
)[3], int tags
)
129 for (i
= 0; i
< tags
; i
++) {
130 int syn_ele
= layout
[i
][0];
131 int pos
= layout
[i
][2];
132 sum
+= (1 + (syn_ele
== TYPE_CPE
)) *
133 (pos
!= AAC_CHANNEL_OFF
&& pos
!= AAC_CHANNEL_CC
);
139 * Check for the channel element in the current channel position configuration.
140 * If it exists, make sure the appropriate element is allocated and map the
141 * channel order to match the internal FFmpeg channel layout.
143 * @param che_pos current channel position configuration
144 * @param type channel element type
145 * @param id channel element id
146 * @param channels count of the number of channels in the configuration
148 * @return Returns error status. 0 - OK, !0 - error
150 static av_cold
int che_configure(AACContext
*ac
,
151 enum ChannelPosition che_pos
,
152 int type
, int id
, int *channels
)
154 if (*channels
>= MAX_CHANNELS
)
155 return AVERROR_INVALIDDATA
;
157 if (!ac
->che
[type
][id
]) {
158 if (!(ac
->che
[type
][id
] = av_mallocz(sizeof(ChannelElement
))))
159 return AVERROR(ENOMEM
);
160 ff_aac_sbr_ctx_init(ac
, &ac
->che
[type
][id
]->sbr
);
162 if (type
!= TYPE_CCE
) {
163 if (*channels
>= MAX_CHANNELS
- (type
== TYPE_CPE
|| (type
== TYPE_SCE
&& ac
->oc
[1].m4ac
.ps
== 1))) {
164 av_log(ac
->avctx
, AV_LOG_ERROR
, "Too many channels\n");
165 return AVERROR_INVALIDDATA
;
167 ac
->output_element
[(*channels
)++] = &ac
->che
[type
][id
]->ch
[0];
168 if (type
== TYPE_CPE
||
169 (type
== TYPE_SCE
&& ac
->oc
[1].m4ac
.ps
== 1)) {
170 ac
->output_element
[(*channels
)++] = &ac
->che
[type
][id
]->ch
[1];
174 if (ac
->che
[type
][id
])
175 ff_aac_sbr_ctx_close(&ac
->che
[type
][id
]->sbr
);
176 av_freep(&ac
->che
[type
][id
]);
181 static int frame_configure_elements(AVCodecContext
*avctx
)
183 AACContext
*ac
= avctx
->priv_data
;
184 int type
, id
, ch
, ret
;
186 /* set channel pointers to internal buffers by default */
187 for (type
= 0; type
< 4; type
++) {
188 for (id
= 0; id
< MAX_ELEM_ID
; id
++) {
189 ChannelElement
*che
= ac
->che
[type
][id
];
191 che
->ch
[0].ret
= che
->ch
[0].ret_buf
;
192 che
->ch
[1].ret
= che
->ch
[1].ret_buf
;
197 /* get output buffer */
198 av_frame_unref(ac
->frame
);
199 if (!avctx
->channels
)
202 ac
->frame
->nb_samples
= 2048;
203 if ((ret
= ff_get_buffer(avctx
, ac
->frame
, 0)) < 0)
206 /* map output channel pointers to AVFrame data */
207 for (ch
= 0; ch
< avctx
->channels
; ch
++) {
208 if (ac
->output_element
[ch
])
209 ac
->output_element
[ch
]->ret
= (float *)ac
->frame
->extended_data
[ch
];
215 struct elem_to_channel
{
216 uint64_t av_position
;
219 uint8_t aac_position
;
222 static int assign_pair(struct elem_to_channel e2c_vec
[MAX_ELEM_ID
],
223 uint8_t (*layout_map
)[3], int offset
, uint64_t left
,
224 uint64_t right
, int pos
)
226 if (layout_map
[offset
][0] == TYPE_CPE
) {
227 e2c_vec
[offset
] = (struct elem_to_channel
) {
228 .av_position
= left
| right
,
230 .elem_id
= layout_map
[offset
][1],
235 e2c_vec
[offset
] = (struct elem_to_channel
) {
238 .elem_id
= layout_map
[offset
][1],
241 e2c_vec
[offset
+ 1] = (struct elem_to_channel
) {
242 .av_position
= right
,
244 .elem_id
= layout_map
[offset
+ 1][1],
251 static int count_paired_channels(uint8_t (*layout_map
)[3], int tags
, int pos
,
254 int num_pos_channels
= 0;
258 for (i
= *current
; i
< tags
; i
++) {
259 if (layout_map
[i
][2] != pos
)
261 if (layout_map
[i
][0] == TYPE_CPE
) {
263 if (pos
== AAC_CHANNEL_FRONT
&& !first_cpe
) {
269 num_pos_channels
+= 2;
277 ((pos
== AAC_CHANNEL_FRONT
&& first_cpe
) || pos
== AAC_CHANNEL_SIDE
))
280 return num_pos_channels
;
283 static uint64_t sniff_channel_order(uint8_t (*layout_map
)[3], int tags
)
285 int i
, n
, total_non_cc_elements
;
286 struct elem_to_channel e2c_vec
[4 * MAX_ELEM_ID
] = { { 0 } };
287 int num_front_channels
, num_side_channels
, num_back_channels
;
290 if (FF_ARRAY_ELEMS(e2c_vec
) < tags
)
295 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_FRONT
, &i
);
296 if (num_front_channels
< 0)
299 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_SIDE
, &i
);
300 if (num_side_channels
< 0)
303 count_paired_channels(layout_map
, tags
, AAC_CHANNEL_BACK
, &i
);
304 if (num_back_channels
< 0)
308 if (num_front_channels
& 1) {
309 e2c_vec
[i
] = (struct elem_to_channel
) {
310 .av_position
= AV_CH_FRONT_CENTER
,
312 .elem_id
= layout_map
[i
][1],
313 .aac_position
= AAC_CHANNEL_FRONT
316 num_front_channels
--;
318 if (num_front_channels
>= 4) {
319 i
+= assign_pair(e2c_vec
, layout_map
, i
,
320 AV_CH_FRONT_LEFT_OF_CENTER
,
321 AV_CH_FRONT_RIGHT_OF_CENTER
,
323 num_front_channels
-= 2;
325 if (num_front_channels
>= 2) {
326 i
+= assign_pair(e2c_vec
, layout_map
, i
,
330 num_front_channels
-= 2;
332 while (num_front_channels
>= 2) {
333 i
+= assign_pair(e2c_vec
, layout_map
, i
,
337 num_front_channels
-= 2;
340 if (num_side_channels
>= 2) {
341 i
+= assign_pair(e2c_vec
, layout_map
, i
,
345 num_side_channels
-= 2;
347 while (num_side_channels
>= 2) {
348 i
+= assign_pair(e2c_vec
, layout_map
, i
,
352 num_side_channels
-= 2;
355 while (num_back_channels
>= 4) {
356 i
+= assign_pair(e2c_vec
, layout_map
, i
,
360 num_back_channels
-= 2;
362 if (num_back_channels
>= 2) {
363 i
+= assign_pair(e2c_vec
, layout_map
, i
,
367 num_back_channels
-= 2;
369 if (num_back_channels
) {
370 e2c_vec
[i
] = (struct elem_to_channel
) {
371 .av_position
= AV_CH_BACK_CENTER
,
373 .elem_id
= layout_map
[i
][1],
374 .aac_position
= AAC_CHANNEL_BACK
380 if (i
< tags
&& layout_map
[i
][2] == AAC_CHANNEL_LFE
) {
381 e2c_vec
[i
] = (struct elem_to_channel
) {
382 .av_position
= AV_CH_LOW_FREQUENCY
,
384 .elem_id
= layout_map
[i
][1],
385 .aac_position
= AAC_CHANNEL_LFE
389 while (i
< tags
&& layout_map
[i
][2] == AAC_CHANNEL_LFE
) {
390 e2c_vec
[i
] = (struct elem_to_channel
) {
391 .av_position
= UINT64_MAX
,
393 .elem_id
= layout_map
[i
][1],
394 .aac_position
= AAC_CHANNEL_LFE
399 // Must choose a stable sort
400 total_non_cc_elements
= n
= i
;
403 for (i
= 1; i
< n
; i
++)
404 if (e2c_vec
[i
- 1].av_position
> e2c_vec
[i
].av_position
) {
405 FFSWAP(struct elem_to_channel
, e2c_vec
[i
- 1], e2c_vec
[i
]);
412 for (i
= 0; i
< total_non_cc_elements
; i
++) {
413 layout_map
[i
][0] = e2c_vec
[i
].syn_ele
;
414 layout_map
[i
][1] = e2c_vec
[i
].elem_id
;
415 layout_map
[i
][2] = e2c_vec
[i
].aac_position
;
416 if (e2c_vec
[i
].av_position
!= UINT64_MAX
) {
417 layout
|= e2c_vec
[i
].av_position
;
425 * Save current output configuration if and only if it has been locked.
427 static void push_output_configuration(AACContext
*ac
) {
428 if (ac
->oc
[1].status
== OC_LOCKED
) {
429 ac
->oc
[0] = ac
->oc
[1];
431 ac
->oc
[1].status
= OC_NONE
;
435 * Restore the previous output configuration if and only if the current
436 * configuration is unlocked.
438 static void pop_output_configuration(AACContext
*ac
) {
439 if (ac
->oc
[1].status
!= OC_LOCKED
&& ac
->oc
[0].status
!= OC_NONE
) {
440 ac
->oc
[1] = ac
->oc
[0];
441 ac
->avctx
->channels
= ac
->oc
[1].channels
;
442 ac
->avctx
->channel_layout
= ac
->oc
[1].channel_layout
;
443 output_configure(ac
, ac
->oc
[1].layout_map
, ac
->oc
[1].layout_map_tags
,
444 ac
->oc
[1].status
, 0);
449 * Configure output channel order based on the current program
450 * configuration element.
452 * @return Returns error status. 0 - OK, !0 - error
454 static int output_configure(AACContext
*ac
,
455 uint8_t layout_map
[MAX_ELEM_ID
* 4][3], int tags
,
456 enum OCStatus oc_type
, int get_new_frame
)
458 AVCodecContext
*avctx
= ac
->avctx
;
459 int i
, channels
= 0, ret
;
462 if (ac
->oc
[1].layout_map
!= layout_map
) {
463 memcpy(ac
->oc
[1].layout_map
, layout_map
, tags
* sizeof(layout_map
[0]));
464 ac
->oc
[1].layout_map_tags
= tags
;
467 // Try to sniff a reasonable channel order, otherwise output the
468 // channels in the order the PCE declared them.
469 if (avctx
->request_channel_layout
!= AV_CH_LAYOUT_NATIVE
)
470 layout
= sniff_channel_order(layout_map
, tags
);
471 for (i
= 0; i
< tags
; i
++) {
472 int type
= layout_map
[i
][0];
473 int id
= layout_map
[i
][1];
474 int position
= layout_map
[i
][2];
475 // Allocate or free elements depending on if they are in the
476 // current program configuration.
477 ret
= che_configure(ac
, position
, type
, id
, &channels
);
481 if (ac
->oc
[1].m4ac
.ps
== 1 && channels
== 2) {
482 if (layout
== AV_CH_FRONT_CENTER
) {
483 layout
= AV_CH_FRONT_LEFT
|AV_CH_FRONT_RIGHT
;
489 memcpy(ac
->tag_che_map
, ac
->che
, 4 * MAX_ELEM_ID
* sizeof(ac
->che
[0][0]));
490 if (layout
) avctx
->channel_layout
= layout
;
491 ac
->oc
[1].channel_layout
= layout
;
492 avctx
->channels
= ac
->oc
[1].channels
= channels
;
493 ac
->oc
[1].status
= oc_type
;
496 if ((ret
= frame_configure_elements(ac
->avctx
)) < 0)
503 static void flush(AVCodecContext
*avctx
)
505 AACContext
*ac
= avctx
->priv_data
;
508 for (type
= 3; type
>= 0; type
--) {
509 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
510 ChannelElement
*che
= ac
->che
[type
][i
];
512 for (j
= 0; j
<= 1; j
++) {
513 memset(che
->ch
[j
].saved
, 0, sizeof(che
->ch
[j
].saved
));
521 * Set up channel positions based on a default channel configuration
522 * as specified in table 1.17.
524 * @return Returns error status. 0 - OK, !0 - error
526 static int set_default_channel_config(AVCodecContext
*avctx
,
527 uint8_t (*layout_map
)[3],
531 if (channel_config
< 1 || channel_config
> 7) {
532 av_log(avctx
, AV_LOG_ERROR
,
533 "invalid default channel configuration (%d)\n",
535 return AVERROR_INVALIDDATA
;
537 *tags
= tags_per_config
[channel_config
];
538 memcpy(layout_map
, aac_channel_layout_map
[channel_config
- 1],
539 *tags
* sizeof(*layout_map
));
542 * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
543 * However, at least Nero AAC encoder encodes 7.1 streams using the default
544 * channel config 7, mapping the side channels of the original audio stream
545 * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
546 * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
547 * the incorrect streams as if they were correct (and as the encoder intended).
549 * As actual intended 7.1(wide) streams are very rare, default to assuming a
550 * 7.1 layout was intended.
552 if (channel_config
== 7 && avctx
->strict_std_compliance
< FF_COMPLIANCE_STRICT
) {
553 av_log(avctx
, AV_LOG_INFO
, "Assuming an incorrectly encoded 7.1 channel layout"
554 " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
555 " according to the specification instead.\n", FF_COMPLIANCE_STRICT
);
556 layout_map
[2][2] = AAC_CHANNEL_SIDE
;
562 static ChannelElement
*get_che(AACContext
*ac
, int type
, int elem_id
)
564 /* For PCE based channel configurations map the channels solely based
566 if (!ac
->oc
[1].m4ac
.chan_config
) {
567 return ac
->tag_che_map
[type
][elem_id
];
569 // Allow single CPE stereo files to be signalled with mono configuration.
570 if (!ac
->tags_mapped
&& type
== TYPE_CPE
&&
571 ac
->oc
[1].m4ac
.chan_config
== 1) {
572 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
574 push_output_configuration(ac
);
576 av_log(ac
->avctx
, AV_LOG_DEBUG
, "mono with CPE\n");
578 if (set_default_channel_config(ac
->avctx
, layout_map
,
579 &layout_map_tags
, 2) < 0)
581 if (output_configure(ac
, layout_map
, layout_map_tags
,
582 OC_TRIAL_FRAME
, 1) < 0)
585 ac
->oc
[1].m4ac
.chan_config
= 2;
586 ac
->oc
[1].m4ac
.ps
= 0;
589 if (!ac
->tags_mapped
&& type
== TYPE_SCE
&&
590 ac
->oc
[1].m4ac
.chan_config
== 2) {
591 uint8_t layout_map
[MAX_ELEM_ID
* 4][3];
593 push_output_configuration(ac
);
595 av_log(ac
->avctx
, AV_LOG_DEBUG
, "stereo with SCE\n");
597 if (set_default_channel_config(ac
->avctx
, layout_map
,
598 &layout_map_tags
, 1) < 0)
600 if (output_configure(ac
, layout_map
, layout_map_tags
,
601 OC_TRIAL_FRAME
, 1) < 0)
604 ac
->oc
[1].m4ac
.chan_config
= 1;
605 if (ac
->oc
[1].m4ac
.sbr
)
606 ac
->oc
[1].m4ac
.ps
= -1;
608 /* For indexed channel configurations map the channels solely based
610 switch (ac
->oc
[1].m4ac
.chan_config
) {
612 if (ac
->tags_mapped
== 3 && type
== TYPE_CPE
) {
614 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][2];
617 /* Some streams incorrectly code 5.1 audio as
618 * SCE[0] CPE[0] CPE[1] SCE[1]
620 * SCE[0] CPE[0] CPE[1] LFE[0].
621 * If we seem to have encountered such a stream, transfer
622 * the LFE[0] element to the SCE[1]'s mapping */
623 if (ac
->tags_mapped
== tags_per_config
[ac
->oc
[1].m4ac
.chan_config
] - 1 && (type
== TYPE_LFE
|| type
== TYPE_SCE
)) {
625 return ac
->tag_che_map
[type
][elem_id
] = ac
->che
[TYPE_LFE
][0];
628 if (ac
->tags_mapped
== 2 && type
== TYPE_CPE
) {
630 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][1];
633 if (ac
->tags_mapped
== 2 &&
634 ac
->oc
[1].m4ac
.chan_config
== 4 &&
637 return ac
->tag_che_map
[TYPE_SCE
][elem_id
] = ac
->che
[TYPE_SCE
][1];
641 if (ac
->tags_mapped
== (ac
->oc
[1].m4ac
.chan_config
!= 2) &&
644 return ac
->tag_che_map
[TYPE_CPE
][elem_id
] = ac
->che
[TYPE_CPE
][0];
645 } else if (ac
->oc
[1].m4ac
.chan_config
== 2) {
649 if (!ac
->tags_mapped
&& type
== TYPE_SCE
) {
651 return ac
->tag_che_map
[TYPE_SCE
][elem_id
] = ac
->che
[TYPE_SCE
][0];
659 * Decode an array of 4 bit element IDs, optionally interleaved with a
660 * stereo/mono switching bit.
662 * @param type speaker type/position for these channels
664 static void decode_channel_map(uint8_t layout_map
[][3],
665 enum ChannelPosition type
,
666 GetBitContext
*gb
, int n
)
669 enum RawDataBlockType syn_ele
;
671 case AAC_CHANNEL_FRONT
:
672 case AAC_CHANNEL_BACK
:
673 case AAC_CHANNEL_SIDE
:
674 syn_ele
= get_bits1(gb
);
680 case AAC_CHANNEL_LFE
:
686 layout_map
[0][0] = syn_ele
;
687 layout_map
[0][1] = get_bits(gb
, 4);
688 layout_map
[0][2] = type
;
694 * Decode program configuration element; reference: table 4.2.
696 * @return Returns error status. 0 - OK, !0 - error
698 static int decode_pce(AVCodecContext
*avctx
, MPEG4AudioConfig
*m4ac
,
699 uint8_t (*layout_map
)[3],
702 int num_front
, num_side
, num_back
, num_lfe
, num_assoc_data
, num_cc
;
707 skip_bits(gb
, 2); // object_type
709 sampling_index
= get_bits(gb
, 4);
710 if (m4ac
->sampling_index
!= sampling_index
)
711 av_log(avctx
, AV_LOG_WARNING
,
712 "Sample rate index in program config element does not "
713 "match the sample rate index configured by the container.\n");
715 num_front
= get_bits(gb
, 4);
716 num_side
= get_bits(gb
, 4);
717 num_back
= get_bits(gb
, 4);
718 num_lfe
= get_bits(gb
, 2);
719 num_assoc_data
= get_bits(gb
, 3);
720 num_cc
= get_bits(gb
, 4);
723 skip_bits(gb
, 4); // mono_mixdown_tag
725 skip_bits(gb
, 4); // stereo_mixdown_tag
728 skip_bits(gb
, 3); // mixdown_coeff_index and pseudo_surround
730 if (get_bits_left(gb
) < 4 * (num_front
+ num_side
+ num_back
+ num_lfe
+ num_assoc_data
+ num_cc
)) {
731 av_log(avctx
, AV_LOG_ERROR
, "decode_pce: " overread_err
);
734 decode_channel_map(layout_map
, AAC_CHANNEL_FRONT
, gb
, num_front
);
736 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_SIDE
, gb
, num_side
);
738 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_BACK
, gb
, num_back
);
740 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_LFE
, gb
, num_lfe
);
743 skip_bits_long(gb
, 4 * num_assoc_data
);
745 decode_channel_map(layout_map
+ tags
, AAC_CHANNEL_CC
, gb
, num_cc
);
750 /* comment field, first byte is length */
751 comment_len
= get_bits(gb
, 8) * 8;
752 if (get_bits_left(gb
) < comment_len
) {
753 av_log(avctx
, AV_LOG_ERROR
, "decode_pce: " overread_err
);
754 return AVERROR_INVALIDDATA
;
756 skip_bits_long(gb
, comment_len
);
761 * Decode GA "General Audio" specific configuration; reference: table 4.1.
763 * @param ac pointer to AACContext, may be null
764 * @param avctx pointer to AVCCodecContext, used for logging
766 * @return Returns error status. 0 - OK, !0 - error
768 static int decode_ga_specific_config(AACContext
*ac
, AVCodecContext
*avctx
,
770 MPEG4AudioConfig
*m4ac
,
773 int extension_flag
, ret
, ep_config
, res_flags
;
774 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
777 if (get_bits1(gb
)) { // frameLengthFlag
778 avpriv_request_sample(avctx
, "960/120 MDCT window");
779 return AVERROR_PATCHWELCOME
;
782 if (get_bits1(gb
)) // dependsOnCoreCoder
783 skip_bits(gb
, 14); // coreCoderDelay
784 extension_flag
= get_bits1(gb
);
786 if (m4ac
->object_type
== AOT_AAC_SCALABLE
||
787 m4ac
->object_type
== AOT_ER_AAC_SCALABLE
)
788 skip_bits(gb
, 3); // layerNr
790 if (channel_config
== 0) {
791 skip_bits(gb
, 4); // element_instance_tag
792 tags
= decode_pce(avctx
, m4ac
, layout_map
, gb
);
796 if ((ret
= set_default_channel_config(avctx
, layout_map
,
797 &tags
, channel_config
)))
801 if (count_channels(layout_map
, tags
) > 1) {
803 } else if (m4ac
->sbr
== 1 && m4ac
->ps
== -1)
806 if (ac
&& (ret
= output_configure(ac
, layout_map
, tags
, OC_GLOBAL_HDR
, 0)))
809 if (extension_flag
) {
810 switch (m4ac
->object_type
) {
812 skip_bits(gb
, 5); // numOfSubFrame
813 skip_bits(gb
, 11); // layer_length
817 case AOT_ER_AAC_SCALABLE
:
819 res_flags
= get_bits(gb
, 3);
821 avpriv_report_missing_feature(avctx
,
822 "AAC data resilience (flags %x)",
824 return AVERROR_PATCHWELCOME
;
828 skip_bits1(gb
); // extensionFlag3 (TBD in version 3)
830 switch (m4ac
->object_type
) {
833 case AOT_ER_AAC_SCALABLE
:
835 ep_config
= get_bits(gb
, 2);
837 avpriv_report_missing_feature(avctx
,
838 "epConfig %d", ep_config
);
839 return AVERROR_PATCHWELCOME
;
845 static int decode_eld_specific_config(AACContext
*ac
, AVCodecContext
*avctx
,
847 MPEG4AudioConfig
*m4ac
,
850 int ret
, ep_config
, res_flags
;
851 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
853 const int ELDEXT_TERM
= 0;
858 if (get_bits1(gb
)) { // frameLengthFlag
859 avpriv_request_sample(avctx
, "960/120 MDCT window");
860 return AVERROR_PATCHWELCOME
;
863 res_flags
= get_bits(gb
, 3);
865 avpriv_report_missing_feature(avctx
,
866 "AAC data resilience (flags %x)",
868 return AVERROR_PATCHWELCOME
;
871 if (get_bits1(gb
)) { // ldSbrPresentFlag
872 avpriv_report_missing_feature(avctx
,
874 return AVERROR_PATCHWELCOME
;
877 while (get_bits(gb
, 4) != ELDEXT_TERM
) {
878 int len
= get_bits(gb
, 4);
880 len
+= get_bits(gb
, 8);
882 len
+= get_bits(gb
, 16);
883 if (get_bits_left(gb
) < len
* 8 + 4) {
884 av_log(ac
->avctx
, AV_LOG_ERROR
, overread_err
);
885 return AVERROR_INVALIDDATA
;
887 skip_bits_long(gb
, 8 * len
);
890 if ((ret
= set_default_channel_config(avctx
, layout_map
,
891 &tags
, channel_config
)))
894 if (ac
&& (ret
= output_configure(ac
, layout_map
, tags
, OC_GLOBAL_HDR
, 0)))
897 ep_config
= get_bits(gb
, 2);
899 avpriv_report_missing_feature(avctx
,
900 "epConfig %d", ep_config
);
901 return AVERROR_PATCHWELCOME
;
907 * Decode audio specific configuration; reference: table 1.13.
909 * @param ac pointer to AACContext, may be null
910 * @param avctx pointer to AVCCodecContext, used for logging
911 * @param m4ac pointer to MPEG4AudioConfig, used for parsing
912 * @param data pointer to buffer holding an audio specific config
913 * @param bit_size size of audio specific config or data in bits
914 * @param sync_extension look for an appended sync extension
916 * @return Returns error status or number of consumed bits. <0 - error
918 static int decode_audio_specific_config(AACContext
*ac
,
919 AVCodecContext
*avctx
,
920 MPEG4AudioConfig
*m4ac
,
921 const uint8_t *data
, int bit_size
,
927 av_dlog(avctx
, "audio specific config size %d\n", bit_size
>> 3);
928 for (i
= 0; i
< bit_size
>> 3; i
++)
929 av_dlog(avctx
, "%02x ", data
[i
]);
930 av_dlog(avctx
, "\n");
932 if ((ret
= init_get_bits(&gb
, data
, bit_size
)) < 0)
935 if ((i
= avpriv_mpeg4audio_get_config(m4ac
, data
, bit_size
,
936 sync_extension
)) < 0)
937 return AVERROR_INVALIDDATA
;
938 if (m4ac
->sampling_index
> 12) {
939 av_log(avctx
, AV_LOG_ERROR
,
940 "invalid sampling rate index %d\n",
941 m4ac
->sampling_index
);
942 return AVERROR_INVALIDDATA
;
944 if (m4ac
->object_type
== AOT_ER_AAC_LD
&&
945 (m4ac
->sampling_index
< 3 || m4ac
->sampling_index
> 7)) {
946 av_log(avctx
, AV_LOG_ERROR
,
947 "invalid low delay sampling rate index %d\n",
948 m4ac
->sampling_index
);
949 return AVERROR_INVALIDDATA
;
952 skip_bits_long(&gb
, i
);
954 switch (m4ac
->object_type
) {
960 if ((ret
= decode_ga_specific_config(ac
, avctx
, &gb
,
961 m4ac
, m4ac
->chan_config
)) < 0)
965 if ((ret
= decode_eld_specific_config(ac
, avctx
, &gb
,
966 m4ac
, m4ac
->chan_config
)) < 0)
970 avpriv_report_missing_feature(avctx
,
971 "Audio object type %s%d",
972 m4ac
->sbr
== 1 ? "SBR+" : "",
974 return AVERROR(ENOSYS
);
978 "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
979 m4ac
->object_type
, m4ac
->chan_config
, m4ac
->sampling_index
,
980 m4ac
->sample_rate
, m4ac
->sbr
,
983 return get_bits_count(&gb
);
987 * linear congruential pseudorandom number generator
989 * @param previous_val pointer to the current state of the generator
991 * @return Returns a 32-bit pseudorandom integer
993 static av_always_inline
int lcg_random(unsigned previous_val
)
995 union { unsigned u
; int s
; } v
= { previous_val
* 1664525u + 1013904223 };
999 static av_always_inline
void reset_predict_state(PredictorState
*ps
)
1009 static void reset_all_predictors(PredictorState
*ps
)
1012 for (i
= 0; i
< MAX_PREDICTORS
; i
++)
1013 reset_predict_state(&ps
[i
]);
1016 static int sample_rate_idx (int rate
)
1018 if (92017 <= rate
) return 0;
1019 else if (75132 <= rate
) return 1;
1020 else if (55426 <= rate
) return 2;
1021 else if (46009 <= rate
) return 3;
1022 else if (37566 <= rate
) return 4;
1023 else if (27713 <= rate
) return 5;
1024 else if (23004 <= rate
) return 6;
1025 else if (18783 <= rate
) return 7;
1026 else if (13856 <= rate
) return 8;
1027 else if (11502 <= rate
) return 9;
1028 else if (9391 <= rate
) return 10;
1032 static void reset_predictor_group(PredictorState
*ps
, int group_num
)
1035 for (i
= group_num
- 1; i
< MAX_PREDICTORS
; i
+= 30)
1036 reset_predict_state(&ps
[i
]);
1039 #define AAC_INIT_VLC_STATIC(num, size) \
1040 INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
1041 ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]), \
1042 sizeof(ff_aac_spectral_bits[num][0]), \
1043 ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
1044 sizeof(ff_aac_spectral_codes[num][0]), \
1047 static void aacdec_init(AACContext
*ac
);
1049 static av_cold
int aac_decode_init(AVCodecContext
*avctx
)
1051 AACContext
*ac
= avctx
->priv_data
;
1055 ac
->oc
[1].m4ac
.sample_rate
= avctx
->sample_rate
;
1059 avctx
->sample_fmt
= AV_SAMPLE_FMT_FLTP
;
1061 if (avctx
->extradata_size
> 0) {
1062 if ((ret
= decode_audio_specific_config(ac
, ac
->avctx
, &ac
->oc
[1].m4ac
,
1064 avctx
->extradata_size
* 8,
1069 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
1070 int layout_map_tags
;
1072 sr
= sample_rate_idx(avctx
->sample_rate
);
1073 ac
->oc
[1].m4ac
.sampling_index
= sr
;
1074 ac
->oc
[1].m4ac
.channels
= avctx
->channels
;
1075 ac
->oc
[1].m4ac
.sbr
= -1;
1076 ac
->oc
[1].m4ac
.ps
= -1;
1078 for (i
= 0; i
< FF_ARRAY_ELEMS(ff_mpeg4audio_channels
); i
++)
1079 if (ff_mpeg4audio_channels
[i
] == avctx
->channels
)
1081 if (i
== FF_ARRAY_ELEMS(ff_mpeg4audio_channels
)) {
1084 ac
->oc
[1].m4ac
.chan_config
= i
;
1086 if (ac
->oc
[1].m4ac
.chan_config
) {
1087 int ret
= set_default_channel_config(avctx
, layout_map
,
1088 &layout_map_tags
, ac
->oc
[1].m4ac
.chan_config
);
1090 output_configure(ac
, layout_map
, layout_map_tags
,
1092 else if (avctx
->err_recognition
& AV_EF_EXPLODE
)
1093 return AVERROR_INVALIDDATA
;
1097 if (avctx
->channels
> MAX_CHANNELS
) {
1098 av_log(avctx
, AV_LOG_ERROR
, "Too many channels\n");
1099 return AVERROR_INVALIDDATA
;
1102 AAC_INIT_VLC_STATIC( 0, 304);
1103 AAC_INIT_VLC_STATIC( 1, 270);
1104 AAC_INIT_VLC_STATIC( 2, 550);
1105 AAC_INIT_VLC_STATIC( 3, 300);
1106 AAC_INIT_VLC_STATIC( 4, 328);
1107 AAC_INIT_VLC_STATIC( 5, 294);
1108 AAC_INIT_VLC_STATIC( 6, 306);
1109 AAC_INIT_VLC_STATIC( 7, 268);
1110 AAC_INIT_VLC_STATIC( 8, 510);
1111 AAC_INIT_VLC_STATIC( 9, 366);
1112 AAC_INIT_VLC_STATIC(10, 462);
1116 ff_fmt_convert_init(&ac
->fmt_conv
, avctx
);
1117 avpriv_float_dsp_init(&ac
->fdsp
, avctx
->flags
& CODEC_FLAG_BITEXACT
);
1119 ac
->random_state
= 0x1f2e3d4c;
1123 INIT_VLC_STATIC(&vlc_scalefactors
, 7,
1124 FF_ARRAY_ELEMS(ff_aac_scalefactor_code
),
1125 ff_aac_scalefactor_bits
,
1126 sizeof(ff_aac_scalefactor_bits
[0]),
1127 sizeof(ff_aac_scalefactor_bits
[0]),
1128 ff_aac_scalefactor_code
,
1129 sizeof(ff_aac_scalefactor_code
[0]),
1130 sizeof(ff_aac_scalefactor_code
[0]),
1133 ff_mdct_init(&ac
->mdct
, 11, 1, 1.0 / (32768.0 * 1024.0));
1134 ff_mdct_init(&ac
->mdct_ld
, 10, 1, 1.0 / (32768.0 * 512.0));
1135 ff_mdct_init(&ac
->mdct_small
, 8, 1, 1.0 / (32768.0 * 128.0));
1136 ff_mdct_init(&ac
->mdct_ltp
, 11, 0, -2.0 * 32768.0);
1137 // window initialization
1138 ff_kbd_window_init(ff_aac_kbd_long_1024
, 4.0, 1024);
1139 ff_kbd_window_init(ff_aac_kbd_short_128
, 6.0, 128);
1140 ff_init_ff_sine_windows(10);
1141 ff_init_ff_sine_windows( 9);
1142 ff_init_ff_sine_windows( 7);
1150 * Skip data_stream_element; reference: table 4.10.
1152 static int skip_data_stream_element(AACContext
*ac
, GetBitContext
*gb
)
1154 int byte_align
= get_bits1(gb
);
1155 int count
= get_bits(gb
, 8);
1157 count
+= get_bits(gb
, 8);
1161 if (get_bits_left(gb
) < 8 * count
) {
1162 av_log(ac
->avctx
, AV_LOG_ERROR
, "skip_data_stream_element: "overread_err
);
1163 return AVERROR_INVALIDDATA
;
1165 skip_bits_long(gb
, 8 * count
);
1169 static int decode_prediction(AACContext
*ac
, IndividualChannelStream
*ics
,
1173 if (get_bits1(gb
)) {
1174 ics
->predictor_reset_group
= get_bits(gb
, 5);
1175 if (ics
->predictor_reset_group
== 0 ||
1176 ics
->predictor_reset_group
> 30) {
1177 av_log(ac
->avctx
, AV_LOG_ERROR
,
1178 "Invalid Predictor Reset Group.\n");
1179 return AVERROR_INVALIDDATA
;
1182 for (sfb
= 0; sfb
< FFMIN(ics
->max_sfb
, ff_aac_pred_sfb_max
[ac
->oc
[1].m4ac
.sampling_index
]); sfb
++) {
1183 ics
->prediction_used
[sfb
] = get_bits1(gb
);
1189 * Decode Long Term Prediction data; reference: table 4.xx.
1191 static void decode_ltp(LongTermPrediction
*ltp
,
1192 GetBitContext
*gb
, uint8_t max_sfb
)
1196 ltp
->lag
= get_bits(gb
, 11);
1197 ltp
->coef
= ltp_coef
[get_bits(gb
, 3)];
1198 for (sfb
= 0; sfb
< FFMIN(max_sfb
, MAX_LTP_LONG_SFB
); sfb
++)
1199 ltp
->used
[sfb
] = get_bits1(gb
);
1203 * Decode Individual Channel Stream info; reference: table 4.6.
1205 static int decode_ics_info(AACContext
*ac
, IndividualChannelStream
*ics
,
1208 int aot
= ac
->oc
[1].m4ac
.object_type
;
1209 if (aot
!= AOT_ER_AAC_ELD
) {
1210 if (get_bits1(gb
)) {
1211 av_log(ac
->avctx
, AV_LOG_ERROR
, "Reserved bit set.\n");
1212 return AVERROR_INVALIDDATA
;
1214 ics
->window_sequence
[1] = ics
->window_sequence
[0];
1215 ics
->window_sequence
[0] = get_bits(gb
, 2);
1216 if (aot
== AOT_ER_AAC_LD
&&
1217 ics
->window_sequence
[0] != ONLY_LONG_SEQUENCE
) {
1218 av_log(ac
->avctx
, AV_LOG_ERROR
,
1219 "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
1220 "window sequence %d found.\n", ics
->window_sequence
[0]);
1221 ics
->window_sequence
[0] = ONLY_LONG_SEQUENCE
;
1222 return AVERROR_INVALIDDATA
;
1224 ics
->use_kb_window
[1] = ics
->use_kb_window
[0];
1225 ics
->use_kb_window
[0] = get_bits1(gb
);
1227 ics
->num_window_groups
= 1;
1228 ics
->group_len
[0] = 1;
1229 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
1231 ics
->max_sfb
= get_bits(gb
, 4);
1232 for (i
= 0; i
< 7; i
++) {
1233 if (get_bits1(gb
)) {
1234 ics
->group_len
[ics
->num_window_groups
- 1]++;
1236 ics
->num_window_groups
++;
1237 ics
->group_len
[ics
->num_window_groups
- 1] = 1;
1240 ics
->num_windows
= 8;
1241 ics
->swb_offset
= ff_swb_offset_128
[ac
->oc
[1].m4ac
.sampling_index
];
1242 ics
->num_swb
= ff_aac_num_swb_128
[ac
->oc
[1].m4ac
.sampling_index
];
1243 ics
->tns_max_bands
= ff_tns_max_bands_128
[ac
->oc
[1].m4ac
.sampling_index
];
1244 ics
->predictor_present
= 0;
1246 ics
->max_sfb
= get_bits(gb
, 6);
1247 ics
->num_windows
= 1;
1248 if (aot
== AOT_ER_AAC_LD
|| aot
== AOT_ER_AAC_ELD
) {
1249 ics
->swb_offset
= ff_swb_offset_512
[ac
->oc
[1].m4ac
.sampling_index
];
1250 ics
->num_swb
= ff_aac_num_swb_512
[ac
->oc
[1].m4ac
.sampling_index
];
1251 ics
->tns_max_bands
= ff_tns_max_bands_512
[ac
->oc
[1].m4ac
.sampling_index
];
1252 if (!ics
->num_swb
|| !ics
->swb_offset
)
1255 ics
->swb_offset
= ff_swb_offset_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1256 ics
->num_swb
= ff_aac_num_swb_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1257 ics
->tns_max_bands
= ff_tns_max_bands_1024
[ac
->oc
[1].m4ac
.sampling_index
];
1259 if (aot
!= AOT_ER_AAC_ELD
) {
1260 ics
->predictor_present
= get_bits1(gb
);
1261 ics
->predictor_reset_group
= 0;
1263 if (ics
->predictor_present
) {
1264 if (aot
== AOT_AAC_MAIN
) {
1265 if (decode_prediction(ac
, ics
, gb
)) {
1268 } else if (aot
== AOT_AAC_LC
||
1269 aot
== AOT_ER_AAC_LC
) {
1270 av_log(ac
->avctx
, AV_LOG_ERROR
,
1271 "Prediction is not allowed in AAC-LC.\n");
1274 if (aot
== AOT_ER_AAC_LD
) {
1275 av_log(ac
->avctx
, AV_LOG_ERROR
,
1276 "LTP in ER AAC LD not yet implemented.\n");
1277 return AVERROR_PATCHWELCOME
;
1279 if ((ics
->ltp
.present
= get_bits(gb
, 1)))
1280 decode_ltp(&ics
->ltp
, gb
, ics
->max_sfb
);
1285 if (ics
->max_sfb
> ics
->num_swb
) {
1286 av_log(ac
->avctx
, AV_LOG_ERROR
,
1287 "Number of scalefactor bands in group (%d) "
1288 "exceeds limit (%d).\n",
1289 ics
->max_sfb
, ics
->num_swb
);
1296 return AVERROR_INVALIDDATA
;
1300 * Decode band types (section_data payload); reference: table 4.46.
1302 * @param band_type array of the used band type
1303 * @param band_type_run_end array of the last scalefactor band of a band type run
1305 * @return Returns error status. 0 - OK, !0 - error
1307 static int decode_band_types(AACContext
*ac
, enum BandType band_type
[120],
1308 int band_type_run_end
[120], GetBitContext
*gb
,
1309 IndividualChannelStream
*ics
)
1312 const int bits
= (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) ? 3 : 5;
1313 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1315 while (k
< ics
->max_sfb
) {
1316 uint8_t sect_end
= k
;
1318 int sect_band_type
= get_bits(gb
, 4);
1319 if (sect_band_type
== 12) {
1320 av_log(ac
->avctx
, AV_LOG_ERROR
, "invalid band type\n");
1321 return AVERROR_INVALIDDATA
;
1324 sect_len_incr
= get_bits(gb
, bits
);
1325 sect_end
+= sect_len_incr
;
1326 if (get_bits_left(gb
) < 0) {
1327 av_log(ac
->avctx
, AV_LOG_ERROR
, "decode_band_types: "overread_err
);
1328 return AVERROR_INVALIDDATA
;
1330 if (sect_end
> ics
->max_sfb
) {
1331 av_log(ac
->avctx
, AV_LOG_ERROR
,
1332 "Number of bands (%d) exceeds limit (%d).\n",
1333 sect_end
, ics
->max_sfb
);
1334 return AVERROR_INVALIDDATA
;
1336 } while (sect_len_incr
== (1 << bits
) - 1);
1337 for (; k
< sect_end
; k
++) {
1338 band_type
[idx
] = sect_band_type
;
1339 band_type_run_end
[idx
++] = sect_end
;
1347 * Decode scalefactors; reference: table 4.47.
1349 * @param global_gain first scalefactor value as scalefactors are differentially coded
1350 * @param band_type array of the used band type
1351 * @param band_type_run_end array of the last scalefactor band of a band type run
1352 * @param sf array of scalefactors or intensity stereo positions
1354 * @return Returns error status. 0 - OK, !0 - error
1356 static int decode_scalefactors(AACContext
*ac
, float sf
[120], GetBitContext
*gb
,
1357 unsigned int global_gain
,
1358 IndividualChannelStream
*ics
,
1359 enum BandType band_type
[120],
1360 int band_type_run_end
[120])
1363 int offset
[3] = { global_gain
, global_gain
- 90, 0 };
1366 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1367 for (i
= 0; i
< ics
->max_sfb
;) {
1368 int run_end
= band_type_run_end
[idx
];
1369 if (band_type
[idx
] == ZERO_BT
) {
1370 for (; i
< run_end
; i
++, idx
++)
1372 } else if ((band_type
[idx
] == INTENSITY_BT
) ||
1373 (band_type
[idx
] == INTENSITY_BT2
)) {
1374 for (; i
< run_end
; i
++, idx
++) {
1375 offset
[2] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1376 clipped_offset
= av_clip(offset
[2], -155, 100);
1377 if (offset
[2] != clipped_offset
) {
1378 avpriv_request_sample(ac
->avctx
,
1379 "If you heard an audible artifact, there may be a bug in the decoder. "
1380 "Clipped intensity stereo position (%d -> %d)",
1381 offset
[2], clipped_offset
);
1383 sf
[idx
] = ff_aac_pow2sf_tab
[-clipped_offset
+ POW_SF2_ZERO
];
1385 } else if (band_type
[idx
] == NOISE_BT
) {
1386 for (; i
< run_end
; i
++, idx
++) {
1387 if (noise_flag
-- > 0)
1388 offset
[1] += get_bits(gb
, 9) - 256;
1390 offset
[1] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1391 clipped_offset
= av_clip(offset
[1], -100, 155);
1392 if (offset
[1] != clipped_offset
) {
1393 avpriv_request_sample(ac
->avctx
,
1394 "If you heard an audible artifact, there may be a bug in the decoder. "
1395 "Clipped noise gain (%d -> %d)",
1396 offset
[1], clipped_offset
);
1398 sf
[idx
] = -ff_aac_pow2sf_tab
[clipped_offset
+ POW_SF2_ZERO
];
1401 for (; i
< run_end
; i
++, idx
++) {
1402 offset
[0] += get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
1403 if (offset
[0] > 255U) {
1404 av_log(ac
->avctx
, AV_LOG_ERROR
,
1405 "Scalefactor (%d) out of range.\n", offset
[0]);
1406 return AVERROR_INVALIDDATA
;
1408 sf
[idx
] = -ff_aac_pow2sf_tab
[offset
[0] - 100 + POW_SF2_ZERO
];
1417 * Decode pulse data; reference: table 4.7.
1419 static int decode_pulses(Pulse
*pulse
, GetBitContext
*gb
,
1420 const uint16_t *swb_offset
, int num_swb
)
1423 pulse
->num_pulse
= get_bits(gb
, 2) + 1;
1424 pulse_swb
= get_bits(gb
, 6);
1425 if (pulse_swb
>= num_swb
)
1427 pulse
->pos
[0] = swb_offset
[pulse_swb
];
1428 pulse
->pos
[0] += get_bits(gb
, 5);
1429 if (pulse
->pos
[0] >= swb_offset
[num_swb
])
1431 pulse
->amp
[0] = get_bits(gb
, 4);
1432 for (i
= 1; i
< pulse
->num_pulse
; i
++) {
1433 pulse
->pos
[i
] = get_bits(gb
, 5) + pulse
->pos
[i
- 1];
1434 if (pulse
->pos
[i
] >= swb_offset
[num_swb
])
1436 pulse
->amp
[i
] = get_bits(gb
, 4);
1442 * Decode Temporal Noise Shaping data; reference: table 4.48.
1444 * @return Returns error status. 0 - OK, !0 - error
1446 static int decode_tns(AACContext
*ac
, TemporalNoiseShaping
*tns
,
1447 GetBitContext
*gb
, const IndividualChannelStream
*ics
)
1449 int w
, filt
, i
, coef_len
, coef_res
, coef_compress
;
1450 const int is8
= ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
;
1451 const int tns_max_order
= is8
? 7 : ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
? 20 : 12;
1452 for (w
= 0; w
< ics
->num_windows
; w
++) {
1453 if ((tns
->n_filt
[w
] = get_bits(gb
, 2 - is8
))) {
1454 coef_res
= get_bits1(gb
);
1456 for (filt
= 0; filt
< tns
->n_filt
[w
]; filt
++) {
1458 tns
->length
[w
][filt
] = get_bits(gb
, 6 - 2 * is8
);
1460 if ((tns
->order
[w
][filt
] = get_bits(gb
, 5 - 2 * is8
)) > tns_max_order
) {
1461 av_log(ac
->avctx
, AV_LOG_ERROR
,
1462 "TNS filter order %d is greater than maximum %d.\n",
1463 tns
->order
[w
][filt
], tns_max_order
);
1464 tns
->order
[w
][filt
] = 0;
1465 return AVERROR_INVALIDDATA
;
1467 if (tns
->order
[w
][filt
]) {
1468 tns
->direction
[w
][filt
] = get_bits1(gb
);
1469 coef_compress
= get_bits1(gb
);
1470 coef_len
= coef_res
+ 3 - coef_compress
;
1471 tmp2_idx
= 2 * coef_compress
+ coef_res
;
1473 for (i
= 0; i
< tns
->order
[w
][filt
]; i
++)
1474 tns
->coef
[w
][filt
][i
] = tns_tmp2_map
[tmp2_idx
][get_bits(gb
, coef_len
)];
1483 * Decode Mid/Side data; reference: table 4.54.
1485 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1486 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1487 * [3] reserved for scalable AAC
1489 static void decode_mid_side_stereo(ChannelElement
*cpe
, GetBitContext
*gb
,
1493 if (ms_present
== 1) {
1495 idx
< cpe
->ch
[0].ics
.num_window_groups
* cpe
->ch
[0].ics
.max_sfb
;
1497 cpe
->ms_mask
[idx
] = get_bits1(gb
);
1498 } else if (ms_present
== 2) {
1499 memset(cpe
->ms_mask
, 1, sizeof(cpe
->ms_mask
[0]) * cpe
->ch
[0].ics
.num_window_groups
* cpe
->ch
[0].ics
.max_sfb
);
1504 static inline float *VMUL2(float *dst
, const float *v
, unsigned idx
,
1508 *dst
++ = v
[idx
& 15] * s
;
1509 *dst
++ = v
[idx
>>4 & 15] * s
;
1515 static inline float *VMUL4(float *dst
, const float *v
, unsigned idx
,
1519 *dst
++ = v
[idx
& 3] * s
;
1520 *dst
++ = v
[idx
>>2 & 3] * s
;
1521 *dst
++ = v
[idx
>>4 & 3] * s
;
1522 *dst
++ = v
[idx
>>6 & 3] * s
;
1528 static inline float *VMUL2S(float *dst
, const float *v
, unsigned idx
,
1529 unsigned sign
, const float *scale
)
1531 union av_intfloat32 s0
, s1
;
1533 s0
.f
= s1
.f
= *scale
;
1534 s0
.i
^= sign
>> 1 << 31;
1537 *dst
++ = v
[idx
& 15] * s0
.f
;
1538 *dst
++ = v
[idx
>>4 & 15] * s1
.f
;
1545 static inline float *VMUL4S(float *dst
, const float *v
, unsigned idx
,
1546 unsigned sign
, const float *scale
)
1548 unsigned nz
= idx
>> 12;
1549 union av_intfloat32 s
= { .f
= *scale
};
1550 union av_intfloat32 t
;
1552 t
.i
= s
.i
^ (sign
& 1U<<31);
1553 *dst
++ = v
[idx
& 3] * t
.f
;
1555 sign
<<= nz
& 1; nz
>>= 1;
1556 t
.i
= s
.i
^ (sign
& 1U<<31);
1557 *dst
++ = v
[idx
>>2 & 3] * t
.f
;
1559 sign
<<= nz
& 1; nz
>>= 1;
1560 t
.i
= s
.i
^ (sign
& 1U<<31);
1561 *dst
++ = v
[idx
>>4 & 3] * t
.f
;
1564 t
.i
= s
.i
^ (sign
& 1U<<31);
1565 *dst
++ = v
[idx
>>6 & 3] * t
.f
;
1572 * Decode spectral data; reference: table 4.50.
1573 * Dequantize and scale spectral data; reference: 4.6.3.3.
1575 * @param coef array of dequantized, scaled spectral data
1576 * @param sf array of scalefactors or intensity stereo positions
1577 * @param pulse_present set if pulses are present
1578 * @param pulse pointer to pulse data struct
1579 * @param band_type array of the used band type
1581 * @return Returns error status. 0 - OK, !0 - error
1583 static int decode_spectrum_and_dequant(AACContext
*ac
, float coef
[1024],
1584 GetBitContext
*gb
, const float sf
[120],
1585 int pulse_present
, const Pulse
*pulse
,
1586 const IndividualChannelStream
*ics
,
1587 enum BandType band_type
[120])
1589 int i
, k
, g
, idx
= 0;
1590 const int c
= 1024 / ics
->num_windows
;
1591 const uint16_t *offsets
= ics
->swb_offset
;
1592 float *coef_base
= coef
;
1594 for (g
= 0; g
< ics
->num_windows
; g
++)
1595 memset(coef
+ g
* 128 + offsets
[ics
->max_sfb
], 0,
1596 sizeof(float) * (c
- offsets
[ics
->max_sfb
]));
1598 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1599 unsigned g_len
= ics
->group_len
[g
];
1601 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
1602 const unsigned cbt_m1
= band_type
[idx
] - 1;
1603 float *cfo
= coef
+ offsets
[i
];
1604 int off_len
= offsets
[i
+ 1] - offsets
[i
];
1607 if (cbt_m1
>= INTENSITY_BT2
- 1) {
1608 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1609 memset(cfo
, 0, off_len
* sizeof(float));
1611 } else if (cbt_m1
== NOISE_BT
- 1) {
1612 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1616 for (k
= 0; k
< off_len
; k
++) {
1617 ac
->random_state
= lcg_random(ac
->random_state
);
1618 cfo
[k
] = ac
->random_state
;
1621 band_energy
= ac
->fdsp
.scalarproduct_float(cfo
, cfo
, off_len
);
1622 scale
= sf
[idx
] / sqrtf(band_energy
);
1623 ac
->fdsp
.vector_fmul_scalar(cfo
, cfo
, scale
, off_len
);
1626 const float *vq
= ff_aac_codebook_vector_vals
[cbt_m1
];
1627 const uint16_t *cb_vector_idx
= ff_aac_codebook_vector_idx
[cbt_m1
];
1628 VLC_TYPE (*vlc_tab
)[2] = vlc_spectral
[cbt_m1
].table
;
1629 OPEN_READER(re
, gb
);
1631 switch (cbt_m1
>> 1) {
1633 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1641 UPDATE_CACHE(re
, gb
);
1642 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1643 cb_idx
= cb_vector_idx
[code
];
1644 cf
= VMUL4(cf
, vq
, cb_idx
, sf
+ idx
);
1650 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1660 UPDATE_CACHE(re
, gb
);
1661 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1662 cb_idx
= cb_vector_idx
[code
];
1663 nnz
= cb_idx
>> 8 & 15;
1664 bits
= nnz
? GET_CACHE(re
, gb
) : 0;
1665 LAST_SKIP_BITS(re
, gb
, nnz
);
1666 cf
= VMUL4S(cf
, vq
, cb_idx
, bits
, sf
+ idx
);
1672 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1680 UPDATE_CACHE(re
, gb
);
1681 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1682 cb_idx
= cb_vector_idx
[code
];
1683 cf
= VMUL2(cf
, vq
, cb_idx
, sf
+ idx
);
1690 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1700 UPDATE_CACHE(re
, gb
);
1701 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1702 cb_idx
= cb_vector_idx
[code
];
1703 nnz
= cb_idx
>> 8 & 15;
1704 sign
= nnz
? SHOW_UBITS(re
, gb
, nnz
) << (cb_idx
>> 12) : 0;
1705 LAST_SKIP_BITS(re
, gb
, nnz
);
1706 cf
= VMUL2S(cf
, vq
, cb_idx
, sign
, sf
+ idx
);
1712 for (group
= 0; group
< g_len
; group
++, cfo
+=128) {
1714 uint32_t *icf
= (uint32_t *) cf
;
1724 UPDATE_CACHE(re
, gb
);
1725 GET_VLC(code
, re
, gb
, vlc_tab
, 8, 2);
1733 cb_idx
= cb_vector_idx
[code
];
1736 bits
= SHOW_UBITS(re
, gb
, nnz
) << (32-nnz
);
1737 LAST_SKIP_BITS(re
, gb
, nnz
);
1739 for (j
= 0; j
< 2; j
++) {
1743 /* The total length of escape_sequence must be < 22 bits according
1744 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1745 UPDATE_CACHE(re
, gb
);
1746 b
= GET_CACHE(re
, gb
);
1747 b
= 31 - av_log2(~b
);
1750 av_log(ac
->avctx
, AV_LOG_ERROR
, "error in spectral data, ESC overflow\n");
1751 return AVERROR_INVALIDDATA
;
1754 SKIP_BITS(re
, gb
, b
+ 1);
1756 n
= (1 << b
) + SHOW_UBITS(re
, gb
, b
);
1757 LAST_SKIP_BITS(re
, gb
, b
);
1758 *icf
++ = cbrt_tab
[n
] | (bits
& 1U<<31);
1761 unsigned v
= ((const uint32_t*)vq
)[cb_idx
& 15];
1762 *icf
++ = (bits
& 1U<<31) | v
;
1769 ac
->fdsp
.vector_fmul_scalar(cfo
, cfo
, sf
[idx
], off_len
);
1773 CLOSE_READER(re
, gb
);
1779 if (pulse_present
) {
1781 for (i
= 0; i
< pulse
->num_pulse
; i
++) {
1782 float co
= coef_base
[ pulse
->pos
[i
] ];
1783 while (offsets
[idx
+ 1] <= pulse
->pos
[i
])
1785 if (band_type
[idx
] != NOISE_BT
&& sf
[idx
]) {
1786 float ico
= -pulse
->amp
[i
];
1789 ico
= co
/ sqrtf(sqrtf(fabsf(co
))) + (co
> 0 ? -ico
: ico
);
1791 coef_base
[ pulse
->pos
[i
] ] = cbrtf(fabsf(ico
)) * ico
* sf
[idx
];
1798 static av_always_inline
float flt16_round(float pf
)
1800 union av_intfloat32 tmp
;
1802 tmp
.i
= (tmp
.i
+ 0x00008000U
) & 0xFFFF0000U
;
1806 static av_always_inline
float flt16_even(float pf
)
1808 union av_intfloat32 tmp
;
1810 tmp
.i
= (tmp
.i
+ 0x00007FFFU
+ (tmp
.i
& 0x00010000U
>> 16)) & 0xFFFF0000U
;
1814 static av_always_inline
float flt16_trunc(float pf
)
1816 union av_intfloat32 pun
;
1818 pun
.i
&= 0xFFFF0000U
;
1822 static av_always_inline
void predict(PredictorState
*ps
, float *coef
,
1825 const float a
= 0.953125; // 61.0 / 64
1826 const float alpha
= 0.90625; // 29.0 / 32
1830 float r0
= ps
->r0
, r1
= ps
->r1
;
1831 float cor0
= ps
->cor0
, cor1
= ps
->cor1
;
1832 float var0
= ps
->var0
, var1
= ps
->var1
;
1834 k1
= var0
> 1 ? cor0
* flt16_even(a
/ var0
) : 0;
1835 k2
= var1
> 1 ? cor1
* flt16_even(a
/ var1
) : 0;
1837 pv
= flt16_round(k1
* r0
+ k2
* r1
);
1844 ps
->cor1
= flt16_trunc(alpha
* cor1
+ r1
* e1
);
1845 ps
->var1
= flt16_trunc(alpha
* var1
+ 0.5f
* (r1
* r1
+ e1
* e1
));
1846 ps
->cor0
= flt16_trunc(alpha
* cor0
+ r0
* e0
);
1847 ps
->var0
= flt16_trunc(alpha
* var0
+ 0.5f
* (r0
* r0
+ e0
* e0
));
1849 ps
->r1
= flt16_trunc(a
* (r0
- k1
* e0
));
1850 ps
->r0
= flt16_trunc(a
* e0
);
1854 * Apply AAC-Main style frequency domain prediction.
1856 static void apply_prediction(AACContext
*ac
, SingleChannelElement
*sce
)
1860 if (!sce
->ics
.predictor_initialized
) {
1861 reset_all_predictors(sce
->predictor_state
);
1862 sce
->ics
.predictor_initialized
= 1;
1865 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
1867 sfb
< ff_aac_pred_sfb_max
[ac
->oc
[1].m4ac
.sampling_index
];
1869 for (k
= sce
->ics
.swb_offset
[sfb
];
1870 k
< sce
->ics
.swb_offset
[sfb
+ 1];
1872 predict(&sce
->predictor_state
[k
], &sce
->coeffs
[k
],
1873 sce
->ics
.predictor_present
&&
1874 sce
->ics
.prediction_used
[sfb
]);
1877 if (sce
->ics
.predictor_reset_group
)
1878 reset_predictor_group(sce
->predictor_state
,
1879 sce
->ics
.predictor_reset_group
);
1881 reset_all_predictors(sce
->predictor_state
);
1885 * Decode an individual_channel_stream payload; reference: table 4.44.
1887 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
1888 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
1890 * @return Returns error status. 0 - OK, !0 - error
1892 static int decode_ics(AACContext
*ac
, SingleChannelElement
*sce
,
1893 GetBitContext
*gb
, int common_window
, int scale_flag
)
1896 TemporalNoiseShaping
*tns
= &sce
->tns
;
1897 IndividualChannelStream
*ics
= &sce
->ics
;
1898 float *out
= sce
->coeffs
;
1899 int global_gain
, eld_syntax
, er_syntax
, pulse_present
= 0;
1902 eld_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
1903 er_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LC
||
1904 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LTP
||
1905 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_LD
||
1906 ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
1908 /* This assignment is to silence a GCC warning about the variable being used
1909 * uninitialized when in fact it always is.
1911 pulse
.num_pulse
= 0;
1913 global_gain
= get_bits(gb
, 8);
1915 if (!common_window
&& !scale_flag
) {
1916 if (decode_ics_info(ac
, ics
, gb
) < 0)
1917 return AVERROR_INVALIDDATA
;
1920 if ((ret
= decode_band_types(ac
, sce
->band_type
,
1921 sce
->band_type_run_end
, gb
, ics
)) < 0)
1923 if ((ret
= decode_scalefactors(ac
, sce
->sf
, gb
, global_gain
, ics
,
1924 sce
->band_type
, sce
->band_type_run_end
)) < 0)
1929 if (!eld_syntax
&& (pulse_present
= get_bits1(gb
))) {
1930 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
1931 av_log(ac
->avctx
, AV_LOG_ERROR
,
1932 "Pulse tool not allowed in eight short sequence.\n");
1933 return AVERROR_INVALIDDATA
;
1935 if (decode_pulses(&pulse
, gb
, ics
->swb_offset
, ics
->num_swb
)) {
1936 av_log(ac
->avctx
, AV_LOG_ERROR
,
1937 "Pulse data corrupt or invalid.\n");
1938 return AVERROR_INVALIDDATA
;
1941 tns
->present
= get_bits1(gb
);
1942 if (tns
->present
&& !er_syntax
)
1943 if (decode_tns(ac
, tns
, gb
, ics
) < 0)
1944 return AVERROR_INVALIDDATA
;
1945 if (!eld_syntax
&& get_bits1(gb
)) {
1946 avpriv_request_sample(ac
->avctx
, "SSR");
1947 return AVERROR_PATCHWELCOME
;
1949 // I see no textual basis in the spec for this occurring after SSR gain
1950 // control, but this is what both reference and real implmentations do
1951 if (tns
->present
&& er_syntax
)
1952 if (decode_tns(ac
, tns
, gb
, ics
) < 0)
1953 return AVERROR_INVALIDDATA
;
1956 if (decode_spectrum_and_dequant(ac
, out
, gb
, sce
->sf
, pulse_present
,
1957 &pulse
, ics
, sce
->band_type
) < 0)
1958 return AVERROR_INVALIDDATA
;
1960 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
&& !common_window
)
1961 apply_prediction(ac
, sce
);
1967 * Mid/Side stereo decoding; reference: 4.6.8.1.3.
1969 static void apply_mid_side_stereo(AACContext
*ac
, ChannelElement
*cpe
)
1971 const IndividualChannelStream
*ics
= &cpe
->ch
[0].ics
;
1972 float *ch0
= cpe
->ch
[0].coeffs
;
1973 float *ch1
= cpe
->ch
[1].coeffs
;
1974 int g
, i
, group
, idx
= 0;
1975 const uint16_t *offsets
= ics
->swb_offset
;
1976 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
1977 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
1978 if (cpe
->ms_mask
[idx
] &&
1979 cpe
->ch
[0].band_type
[idx
] < NOISE_BT
&&
1980 cpe
->ch
[1].band_type
[idx
] < NOISE_BT
) {
1981 for (group
= 0; group
< ics
->group_len
[g
]; group
++) {
1982 ac
->fdsp
.butterflies_float(ch0
+ group
* 128 + offsets
[i
],
1983 ch1
+ group
* 128 + offsets
[i
],
1984 offsets
[i
+1] - offsets
[i
]);
1988 ch0
+= ics
->group_len
[g
] * 128;
1989 ch1
+= ics
->group_len
[g
] * 128;
1994 * intensity stereo decoding; reference: 4.6.8.2.3
1996 * @param ms_present Indicates mid/side stereo presence. [0] mask is all 0s;
1997 * [1] mask is decoded from bitstream; [2] mask is all 1s;
1998 * [3] reserved for scalable AAC
2000 static void apply_intensity_stereo(AACContext
*ac
,
2001 ChannelElement
*cpe
, int ms_present
)
2003 const IndividualChannelStream
*ics
= &cpe
->ch
[1].ics
;
2004 SingleChannelElement
*sce1
= &cpe
->ch
[1];
2005 float *coef0
= cpe
->ch
[0].coeffs
, *coef1
= cpe
->ch
[1].coeffs
;
2006 const uint16_t *offsets
= ics
->swb_offset
;
2007 int g
, group
, i
, idx
= 0;
2010 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
2011 for (i
= 0; i
< ics
->max_sfb
;) {
2012 if (sce1
->band_type
[idx
] == INTENSITY_BT
||
2013 sce1
->band_type
[idx
] == INTENSITY_BT2
) {
2014 const int bt_run_end
= sce1
->band_type_run_end
[idx
];
2015 for (; i
< bt_run_end
; i
++, idx
++) {
2016 c
= -1 + 2 * (sce1
->band_type
[idx
] - 14);
2018 c
*= 1 - 2 * cpe
->ms_mask
[idx
];
2019 scale
= c
* sce1
->sf
[idx
];
2020 for (group
= 0; group
< ics
->group_len
[g
]; group
++)
2021 ac
->fdsp
.vector_fmul_scalar(coef1
+ group
* 128 + offsets
[i
],
2022 coef0
+ group
* 128 + offsets
[i
],
2024 offsets
[i
+ 1] - offsets
[i
]);
2027 int bt_run_end
= sce1
->band_type_run_end
[idx
];
2028 idx
+= bt_run_end
- i
;
2032 coef0
+= ics
->group_len
[g
] * 128;
2033 coef1
+= ics
->group_len
[g
] * 128;
2038 * Decode a channel_pair_element; reference: table 4.4.
2040 * @return Returns error status. 0 - OK, !0 - error
2042 static int decode_cpe(AACContext
*ac
, GetBitContext
*gb
, ChannelElement
*cpe
)
2044 int i
, ret
, common_window
, ms_present
= 0;
2045 int eld_syntax
= ac
->oc
[1].m4ac
.object_type
== AOT_ER_AAC_ELD
;
2047 common_window
= eld_syntax
|| get_bits1(gb
);
2048 if (common_window
) {
2049 if (decode_ics_info(ac
, &cpe
->ch
[0].ics
, gb
))
2050 return AVERROR_INVALIDDATA
;
2051 i
= cpe
->ch
[1].ics
.use_kb_window
[0];
2052 cpe
->ch
[1].ics
= cpe
->ch
[0].ics
;
2053 cpe
->ch
[1].ics
.use_kb_window
[1] = i
;
2054 if (cpe
->ch
[1].ics
.predictor_present
&&
2055 (ac
->oc
[1].m4ac
.object_type
!= AOT_AAC_MAIN
))
2056 if ((cpe
->ch
[1].ics
.ltp
.present
= get_bits(gb
, 1)))
2057 decode_ltp(&cpe
->ch
[1].ics
.ltp
, gb
, cpe
->ch
[1].ics
.max_sfb
);
2058 ms_present
= get_bits(gb
, 2);
2059 if (ms_present
== 3) {
2060 av_log(ac
->avctx
, AV_LOG_ERROR
, "ms_present = 3 is reserved.\n");
2061 return AVERROR_INVALIDDATA
;
2062 } else if (ms_present
)
2063 decode_mid_side_stereo(cpe
, gb
, ms_present
);
2065 if ((ret
= decode_ics(ac
, &cpe
->ch
[0], gb
, common_window
, 0)))
2067 if ((ret
= decode_ics(ac
, &cpe
->ch
[1], gb
, common_window
, 0)))
2070 if (common_window
) {
2072 apply_mid_side_stereo(ac
, cpe
);
2073 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_MAIN
) {
2074 apply_prediction(ac
, &cpe
->ch
[0]);
2075 apply_prediction(ac
, &cpe
->ch
[1]);
2079 apply_intensity_stereo(ac
, cpe
, ms_present
);
2083 static const float cce_scale
[] = {
2084 1.09050773266525765921, //2^(1/8)
2085 1.18920711500272106672, //2^(1/4)
2091 * Decode coupling_channel_element; reference: table 4.8.
2093 * @return Returns error status. 0 - OK, !0 - error
2095 static int decode_cce(AACContext
*ac
, GetBitContext
*gb
, ChannelElement
*che
)
2101 SingleChannelElement
*sce
= &che
->ch
[0];
2102 ChannelCoupling
*coup
= &che
->coup
;
2104 coup
->coupling_point
= 2 * get_bits1(gb
);
2105 coup
->num_coupled
= get_bits(gb
, 3);
2106 for (c
= 0; c
<= coup
->num_coupled
; c
++) {
2108 coup
->type
[c
] = get_bits1(gb
) ? TYPE_CPE
: TYPE_SCE
;
2109 coup
->id_select
[c
] = get_bits(gb
, 4);
2110 if (coup
->type
[c
] == TYPE_CPE
) {
2111 coup
->ch_select
[c
] = get_bits(gb
, 2);
2112 if (coup
->ch_select
[c
] == 3)
2115 coup
->ch_select
[c
] = 2;
2117 coup
->coupling_point
+= get_bits1(gb
) || (coup
->coupling_point
>> 1);
2119 sign
= get_bits(gb
, 1);
2120 scale
= cce_scale
[get_bits(gb
, 2)];
2122 if ((ret
= decode_ics(ac
, sce
, gb
, 0, 0)))
2125 for (c
= 0; c
< num_gain
; c
++) {
2129 float gain_cache
= 1.0;
2131 cge
= coup
->coupling_point
== AFTER_IMDCT
? 1 : get_bits1(gb
);
2132 gain
= cge
? get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60: 0;
2133 gain_cache
= powf(scale
, -gain
);
2135 if (coup
->coupling_point
== AFTER_IMDCT
) {
2136 coup
->gain
[c
][0] = gain_cache
;
2138 for (g
= 0; g
< sce
->ics
.num_window_groups
; g
++) {
2139 for (sfb
= 0; sfb
< sce
->ics
.max_sfb
; sfb
++, idx
++) {
2140 if (sce
->band_type
[idx
] != ZERO_BT
) {
2142 int t
= get_vlc2(gb
, vlc_scalefactors
.table
, 7, 3) - 60;
2150 gain_cache
= powf(scale
, -t
) * s
;
2153 coup
->gain
[c
][idx
] = gain_cache
;
2163 * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
2165 * @return Returns number of bytes consumed.
2167 static int decode_drc_channel_exclusions(DynamicRangeControl
*che_drc
,
2171 int num_excl_chan
= 0;
2174 for (i
= 0; i
< 7; i
++)
2175 che_drc
->exclude_mask
[num_excl_chan
++] = get_bits1(gb
);
2176 } while (num_excl_chan
< MAX_CHANNELS
- 7 && get_bits1(gb
));
2178 return num_excl_chan
/ 7;
2182 * Decode dynamic range information; reference: table 4.52.
2184 * @return Returns number of bytes consumed.
2186 static int decode_dynamic_range(DynamicRangeControl
*che_drc
,
2190 int drc_num_bands
= 1;
2193 /* pce_tag_present? */
2194 if (get_bits1(gb
)) {
2195 che_drc
->pce_instance_tag
= get_bits(gb
, 4);
2196 skip_bits(gb
, 4); // tag_reserved_bits
2200 /* excluded_chns_present? */
2201 if (get_bits1(gb
)) {
2202 n
+= decode_drc_channel_exclusions(che_drc
, gb
);
2205 /* drc_bands_present? */
2206 if (get_bits1(gb
)) {
2207 che_drc
->band_incr
= get_bits(gb
, 4);
2208 che_drc
->interpolation_scheme
= get_bits(gb
, 4);
2210 drc_num_bands
+= che_drc
->band_incr
;
2211 for (i
= 0; i
< drc_num_bands
; i
++) {
2212 che_drc
->band_top
[i
] = get_bits(gb
, 8);
2217 /* prog_ref_level_present? */
2218 if (get_bits1(gb
)) {
2219 che_drc
->prog_ref_level
= get_bits(gb
, 7);
2220 skip_bits1(gb
); // prog_ref_level_reserved_bits
2224 for (i
= 0; i
< drc_num_bands
; i
++) {
2225 che_drc
->dyn_rng_sgn
[i
] = get_bits1(gb
);
2226 che_drc
->dyn_rng_ctl
[i
] = get_bits(gb
, 7);
2233 static int decode_fill(AACContext
*ac
, GetBitContext
*gb
, int len
) {
2235 int i
, major
, minor
;
2240 get_bits(gb
, 13); len
-= 13;
2242 for(i
=0; i
+1<sizeof(buf
) && len
>=8; i
++, len
-=8)
2243 buf
[i
] = get_bits(gb
, 8);
2246 if (ac
->avctx
->debug
& FF_DEBUG_PICT_INFO
)
2247 av_log(ac
->avctx
, AV_LOG_DEBUG
, "FILL:%s\n", buf
);
2249 if (sscanf(buf
, "libfaac %d.%d", &major
, &minor
) == 2){
2250 ac
->avctx
->internal
->skip_samples
= 1024;
2254 skip_bits_long(gb
, len
);
2260 * Decode extension data (incomplete); reference: table 4.51.
2262 * @param cnt length of TYPE_FIL syntactic element in bytes
2264 * @return Returns number of bytes consumed
2266 static int decode_extension_payload(AACContext
*ac
, GetBitContext
*gb
, int cnt
,
2267 ChannelElement
*che
, enum RawDataBlockType elem_type
)
2271 switch (get_bits(gb
, 4)) { // extension type
2272 case EXT_SBR_DATA_CRC
:
2276 av_log(ac
->avctx
, AV_LOG_ERROR
, "SBR was found before the first channel element.\n");
2278 } else if (!ac
->oc
[1].m4ac
.sbr
) {
2279 av_log(ac
->avctx
, AV_LOG_ERROR
, "SBR signaled to be not-present but was found in the bitstream.\n");
2280 skip_bits_long(gb
, 8 * cnt
- 4);
2282 } else if (ac
->oc
[1].m4ac
.sbr
== -1 && ac
->oc
[1].status
== OC_LOCKED
) {
2283 av_log(ac
->avctx
, AV_LOG_ERROR
, "Implicit SBR was found with a first occurrence after the first frame.\n");
2284 skip_bits_long(gb
, 8 * cnt
- 4);
2286 } else if (ac
->oc
[1].m4ac
.ps
== -1 && ac
->oc
[1].status
< OC_LOCKED
&& ac
->avctx
->channels
== 1) {
2287 ac
->oc
[1].m4ac
.sbr
= 1;
2288 ac
->oc
[1].m4ac
.ps
= 1;
2289 ac
->avctx
->profile
= FF_PROFILE_AAC_HE_V2
;
2290 output_configure(ac
, ac
->oc
[1].layout_map
, ac
->oc
[1].layout_map_tags
,
2291 ac
->oc
[1].status
, 1);
2293 ac
->oc
[1].m4ac
.sbr
= 1;
2294 ac
->avctx
->profile
= FF_PROFILE_AAC_HE
;
2296 res
= ff_decode_sbr_extension(ac
, &che
->sbr
, gb
, crc_flag
, cnt
, elem_type
);
2298 case EXT_DYNAMIC_RANGE
:
2299 res
= decode_dynamic_range(&ac
->che_drc
, gb
);
2302 decode_fill(ac
, gb
, 8 * cnt
- 4);
2305 case EXT_DATA_ELEMENT
:
2307 skip_bits_long(gb
, 8 * cnt
- 4);
2314 * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2316 * @param decode 1 if tool is used normally, 0 if tool is used in LTP.
2317 * @param coef spectral coefficients
2319 static void apply_tns(float coef
[1024], TemporalNoiseShaping
*tns
,
2320 IndividualChannelStream
*ics
, int decode
)
2322 const int mmm
= FFMIN(ics
->tns_max_bands
, ics
->max_sfb
);
2324 int bottom
, top
, order
, start
, end
, size
, inc
;
2325 float lpc
[TNS_MAX_ORDER
];
2326 float tmp
[TNS_MAX_ORDER
+1];
2328 for (w
= 0; w
< ics
->num_windows
; w
++) {
2329 bottom
= ics
->num_swb
;
2330 for (filt
= 0; filt
< tns
->n_filt
[w
]; filt
++) {
2332 bottom
= FFMAX(0, top
- tns
->length
[w
][filt
]);
2333 order
= tns
->order
[w
][filt
];
2338 compute_lpc_coefs(tns
->coef
[w
][filt
], order
, lpc
, 0, 0, 0);
2340 start
= ics
->swb_offset
[FFMIN(bottom
, mmm
)];
2341 end
= ics
->swb_offset
[FFMIN( top
, mmm
)];
2342 if ((size
= end
- start
) <= 0)
2344 if (tns
->direction
[w
][filt
]) {
2354 for (m
= 0; m
< size
; m
++, start
+= inc
)
2355 for (i
= 1; i
<= FFMIN(m
, order
); i
++)
2356 coef
[start
] -= coef
[start
- i
* inc
] * lpc
[i
- 1];
2359 for (m
= 0; m
< size
; m
++, start
+= inc
) {
2360 tmp
[0] = coef
[start
];
2361 for (i
= 1; i
<= FFMIN(m
, order
); i
++)
2362 coef
[start
] += tmp
[i
] * lpc
[i
- 1];
2363 for (i
= order
; i
> 0; i
--)
2364 tmp
[i
] = tmp
[i
- 1];
2372 * Apply windowing and MDCT to obtain the spectral
2373 * coefficient from the predicted sample by LTP.
2375 static void windowing_and_mdct_ltp(AACContext
*ac
, float *out
,
2376 float *in
, IndividualChannelStream
*ics
)
2378 const float *lwindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2379 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2380 const float *lwindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2381 const float *swindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
2383 if (ics
->window_sequence
[0] != LONG_STOP_SEQUENCE
) {
2384 ac
->fdsp
.vector_fmul(in
, in
, lwindow_prev
, 1024);
2386 memset(in
, 0, 448 * sizeof(float));
2387 ac
->fdsp
.vector_fmul(in
+ 448, in
+ 448, swindow_prev
, 128);
2389 if (ics
->window_sequence
[0] != LONG_START_SEQUENCE
) {
2390 ac
->fdsp
.vector_fmul_reverse(in
+ 1024, in
+ 1024, lwindow
, 1024);
2392 ac
->fdsp
.vector_fmul_reverse(in
+ 1024 + 448, in
+ 1024 + 448, swindow
, 128);
2393 memset(in
+ 1024 + 576, 0, 448 * sizeof(float));
2395 ac
->mdct_ltp
.mdct_calc(&ac
->mdct_ltp
, out
, in
);
2399 * Apply the long term prediction
2401 static void apply_ltp(AACContext
*ac
, SingleChannelElement
*sce
)
2403 const LongTermPrediction
*ltp
= &sce
->ics
.ltp
;
2404 const uint16_t *offsets
= sce
->ics
.swb_offset
;
2407 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
2408 float *predTime
= sce
->ret
;
2409 float *predFreq
= ac
->buf_mdct
;
2410 int16_t num_samples
= 2048;
2412 if (ltp
->lag
< 1024)
2413 num_samples
= ltp
->lag
+ 1024;
2414 for (i
= 0; i
< num_samples
; i
++)
2415 predTime
[i
] = sce
->ltp_state
[i
+ 2048 - ltp
->lag
] * ltp
->coef
;
2416 memset(&predTime
[i
], 0, (2048 - i
) * sizeof(float));
2418 ac
->windowing_and_mdct_ltp(ac
, predFreq
, predTime
, &sce
->ics
);
2420 if (sce
->tns
.present
)
2421 ac
->apply_tns(predFreq
, &sce
->tns
, &sce
->ics
, 0);
2423 for (sfb
= 0; sfb
< FFMIN(sce
->ics
.max_sfb
, MAX_LTP_LONG_SFB
); sfb
++)
2425 for (i
= offsets
[sfb
]; i
< offsets
[sfb
+ 1]; i
++)
2426 sce
->coeffs
[i
] += predFreq
[i
];
2431 * Update the LTP buffer for next frame
2433 static void update_ltp(AACContext
*ac
, SingleChannelElement
*sce
)
2435 IndividualChannelStream
*ics
= &sce
->ics
;
2436 float *saved
= sce
->saved
;
2437 float *saved_ltp
= sce
->coeffs
;
2438 const float *lwindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2439 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2442 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2443 memcpy(saved_ltp
, saved
, 512 * sizeof(float));
2444 memset(saved_ltp
+ 576, 0, 448 * sizeof(float));
2445 ac
->fdsp
.vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
2446 for (i
= 0; i
< 64; i
++)
2447 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * swindow
[63 - i
];
2448 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
2449 memcpy(saved_ltp
, ac
->buf_mdct
+ 512, 448 * sizeof(float));
2450 memset(saved_ltp
+ 576, 0, 448 * sizeof(float));
2451 ac
->fdsp
.vector_fmul_reverse(saved_ltp
+ 448, ac
->buf_mdct
+ 960, &swindow
[64], 64);
2452 for (i
= 0; i
< 64; i
++)
2453 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * swindow
[63 - i
];
2454 } else { // LONG_STOP or ONLY_LONG
2455 ac
->fdsp
.vector_fmul_reverse(saved_ltp
, ac
->buf_mdct
+ 512, &lwindow
[512], 512);
2456 for (i
= 0; i
< 512; i
++)
2457 saved_ltp
[i
+ 512] = ac
->buf_mdct
[1023 - i
] * lwindow
[511 - i
];
2460 memcpy(sce
->ltp_state
, sce
->ltp_state
+1024, 1024 * sizeof(*sce
->ltp_state
));
2461 memcpy(sce
->ltp_state
+1024, sce
->ret
, 1024 * sizeof(*sce
->ltp_state
));
2462 memcpy(sce
->ltp_state
+2048, saved_ltp
, 1024 * sizeof(*sce
->ltp_state
));
2466 * Conduct IMDCT and windowing.
2468 static void imdct_and_windowing(AACContext
*ac
, SingleChannelElement
*sce
)
2470 IndividualChannelStream
*ics
= &sce
->ics
;
2471 float *in
= sce
->coeffs
;
2472 float *out
= sce
->ret
;
2473 float *saved
= sce
->saved
;
2474 const float *swindow
= ics
->use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
2475 const float *lwindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
2476 const float *swindow_prev
= ics
->use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
2477 float *buf
= ac
->buf_mdct
;
2478 float *temp
= ac
->temp
;
2482 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2483 for (i
= 0; i
< 1024; i
+= 128)
2484 ac
->mdct_small
.imdct_half(&ac
->mdct_small
, buf
+ i
, in
+ i
);
2486 ac
->mdct
.imdct_half(&ac
->mdct
, buf
, in
);
2488 /* window overlapping
2489 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2490 * and long to short transitions are considered to be short to short
2491 * transitions. This leaves just two cases (long to long and short to short)
2492 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2494 if ((ics
->window_sequence
[1] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[1] == LONG_STOP_SEQUENCE
) &&
2495 (ics
->window_sequence
[0] == ONLY_LONG_SEQUENCE
|| ics
->window_sequence
[0] == LONG_START_SEQUENCE
)) {
2496 ac
->fdsp
.vector_fmul_window( out
, saved
, buf
, lwindow_prev
, 512);
2498 memcpy( out
, saved
, 448 * sizeof(float));
2500 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2501 ac
->fdsp
.vector_fmul_window(out
+ 448 + 0*128, saved
+ 448, buf
+ 0*128, swindow_prev
, 64);
2502 ac
->fdsp
.vector_fmul_window(out
+ 448 + 1*128, buf
+ 0*128 + 64, buf
+ 1*128, swindow
, 64);
2503 ac
->fdsp
.vector_fmul_window(out
+ 448 + 2*128, buf
+ 1*128 + 64, buf
+ 2*128, swindow
, 64);
2504 ac
->fdsp
.vector_fmul_window(out
+ 448 + 3*128, buf
+ 2*128 + 64, buf
+ 3*128, swindow
, 64);
2505 ac
->fdsp
.vector_fmul_window(temp
, buf
+ 3*128 + 64, buf
+ 4*128, swindow
, 64);
2506 memcpy( out
+ 448 + 4*128, temp
, 64 * sizeof(float));
2508 ac
->fdsp
.vector_fmul_window(out
+ 448, saved
+ 448, buf
, swindow_prev
, 64);
2509 memcpy( out
+ 576, buf
+ 64, 448 * sizeof(float));
2514 if (ics
->window_sequence
[0] == EIGHT_SHORT_SEQUENCE
) {
2515 memcpy( saved
, temp
+ 64, 64 * sizeof(float));
2516 ac
->fdsp
.vector_fmul_window(saved
+ 64, buf
+ 4*128 + 64, buf
+ 5*128, swindow
, 64);
2517 ac
->fdsp
.vector_fmul_window(saved
+ 192, buf
+ 5*128 + 64, buf
+ 6*128, swindow
, 64);
2518 ac
->fdsp
.vector_fmul_window(saved
+ 320, buf
+ 6*128 + 64, buf
+ 7*128, swindow
, 64);
2519 memcpy( saved
+ 448, buf
+ 7*128 + 64, 64 * sizeof(float));
2520 } else if (ics
->window_sequence
[0] == LONG_START_SEQUENCE
) {
2521 memcpy( saved
, buf
+ 512, 448 * sizeof(float));
2522 memcpy( saved
+ 448, buf
+ 7*128 + 64, 64 * sizeof(float));
2523 } else { // LONG_STOP or ONLY_LONG
2524 memcpy( saved
, buf
+ 512, 512 * sizeof(float));
2528 static void imdct_and_windowing_ld(AACContext
*ac
, SingleChannelElement
*sce
)
2530 IndividualChannelStream
*ics
= &sce
->ics
;
2531 float *in
= sce
->coeffs
;
2532 float *out
= sce
->ret
;
2533 float *saved
= sce
->saved
;
2534 float *buf
= ac
->buf_mdct
;
2537 ac
->mdct
.imdct_half(&ac
->mdct_ld
, buf
, in
);
2539 // window overlapping
2540 if (ics
->use_kb_window
[1]) {
2541 // AAC LD uses a low overlap sine window instead of a KBD window
2542 memcpy(out
, saved
, 192 * sizeof(float));
2543 ac
->fdsp
.vector_fmul_window(out
+ 192, saved
+ 192, buf
, ff_sine_128
, 64);
2544 memcpy( out
+ 320, buf
+ 64, 192 * sizeof(float));
2546 ac
->fdsp
.vector_fmul_window(out
, saved
, buf
, ff_sine_512
, 256);
2550 memcpy(saved
, buf
+ 256, 256 * sizeof(float));
2553 static void imdct_and_windowing_eld(AACContext
*ac
, SingleChannelElement
*sce
)
2555 float *in
= sce
->coeffs
;
2556 float *out
= sce
->ret
;
2557 float *saved
= sce
->saved
;
2558 const float *const window
= ff_aac_eld_window
;
2559 float *buf
= ac
->buf_mdct
;
2562 const int n2
= n
>> 1;
2563 const int n4
= n
>> 2;
2565 // Inverse transform, mapped to the conventional IMDCT by
2566 // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
2567 // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
2568 // International Conference on Audio, Language and Image Processing, ICALIP 2008.
2569 // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
2570 for (i
= 0; i
< n2
; i
+=2) {
2572 temp
= in
[i
]; in
[i
] = -in
[n
- 1 - i
]; in
[n
- 1 - i
] = temp
;
2573 temp
= -in
[i
+ 1]; in
[i
+ 1] = in
[n
- 2 - i
]; in
[n
- 2 - i
] = temp
;
2575 ac
->mdct
.imdct_half(&ac
->mdct_ld
, buf
, in
);
2576 for (i
= 0; i
< n
; i
+=2) {
2579 // Like with the regular IMDCT at this point we still have the middle half
2580 // of a transform but with even symmetry on the left and odd symmetry on
2583 // window overlapping
2584 // The spec says to use samples [0..511] but the reference decoder uses
2585 // samples [128..639].
2586 for (i
= n4
; i
< n2
; i
++) {
2587 out
[i
- n4
] = buf
[n2
- 1 - i
] * window
[i
- n4
] +
2588 saved
[ i
+ n2
] * window
[i
+ n
- n4
] +
2589 -saved
[ n
+ n2
- 1 - i
] * window
[i
+ 2*n
- n4
] +
2590 -saved
[2*n
+ n2
+ i
] * window
[i
+ 3*n
- n4
];
2592 for (i
= 0; i
< n2
; i
++) {
2593 out
[n4
+ i
] = buf
[i
] * window
[i
+ n2
- n4
] +
2594 -saved
[ n
- 1 - i
] * window
[i
+ n2
+ n
- n4
] +
2595 -saved
[ n
+ i
] * window
[i
+ n2
+ 2*n
- n4
] +
2596 saved
[2*n
+ n
- 1 - i
] * window
[i
+ n2
+ 3*n
- n4
];
2598 for (i
= 0; i
< n4
; i
++) {
2599 out
[n2
+ n4
+ i
] = buf
[ i
+ n2
] * window
[i
+ n
- n4
] +
2600 -saved
[ n2
- 1 - i
] * window
[i
+ 2*n
- n4
] +
2601 -saved
[ n
+ n2
+ i
] * window
[i
+ 3*n
- n4
];
2605 memmove(saved
+ n
, saved
, 2 * n
* sizeof(float));
2606 memcpy( saved
, buf
, n
* sizeof(float));
2610 * Apply dependent channel coupling (applied before IMDCT).
2612 * @param index index into coupling gain array
2614 static void apply_dependent_coupling(AACContext
*ac
,
2615 SingleChannelElement
*target
,
2616 ChannelElement
*cce
, int index
)
2618 IndividualChannelStream
*ics
= &cce
->ch
[0].ics
;
2619 const uint16_t *offsets
= ics
->swb_offset
;
2620 float *dest
= target
->coeffs
;
2621 const float *src
= cce
->ch
[0].coeffs
;
2622 int g
, i
, group
, k
, idx
= 0;
2623 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
) {
2624 av_log(ac
->avctx
, AV_LOG_ERROR
,
2625 "Dependent coupling is not supported together with LTP\n");
2628 for (g
= 0; g
< ics
->num_window_groups
; g
++) {
2629 for (i
= 0; i
< ics
->max_sfb
; i
++, idx
++) {
2630 if (cce
->ch
[0].band_type
[idx
] != ZERO_BT
) {
2631 const float gain
= cce
->coup
.gain
[index
][idx
];
2632 for (group
= 0; group
< ics
->group_len
[g
]; group
++) {
2633 for (k
= offsets
[i
]; k
< offsets
[i
+ 1]; k
++) {
2635 dest
[group
* 128 + k
] += gain
* src
[group
* 128 + k
];
2640 dest
+= ics
->group_len
[g
] * 128;
2641 src
+= ics
->group_len
[g
] * 128;
2646 * Apply independent channel coupling (applied after IMDCT).
2648 * @param index index into coupling gain array
2650 static void apply_independent_coupling(AACContext
*ac
,
2651 SingleChannelElement
*target
,
2652 ChannelElement
*cce
, int index
)
2655 const float gain
= cce
->coup
.gain
[index
][0];
2656 const float *src
= cce
->ch
[0].ret
;
2657 float *dest
= target
->ret
;
2658 const int len
= 1024 << (ac
->oc
[1].m4ac
.sbr
== 1);
2660 for (i
= 0; i
< len
; i
++)
2661 dest
[i
] += gain
* src
[i
];
2665 * channel coupling transformation interface
2667 * @param apply_coupling_method pointer to (in)dependent coupling function
2669 static void apply_channel_coupling(AACContext
*ac
, ChannelElement
*cc
,
2670 enum RawDataBlockType type
, int elem_id
,
2671 enum CouplingPoint coupling_point
,
2672 void (*apply_coupling_method
)(AACContext
*ac
, SingleChannelElement
*target
, ChannelElement
*cce
, int index
))
2676 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
2677 ChannelElement
*cce
= ac
->che
[TYPE_CCE
][i
];
2680 if (cce
&& cce
->coup
.coupling_point
== coupling_point
) {
2681 ChannelCoupling
*coup
= &cce
->coup
;
2683 for (c
= 0; c
<= coup
->num_coupled
; c
++) {
2684 if (coup
->type
[c
] == type
&& coup
->id_select
[c
] == elem_id
) {
2685 if (coup
->ch_select
[c
] != 1) {
2686 apply_coupling_method(ac
, &cc
->ch
[0], cce
, index
);
2687 if (coup
->ch_select
[c
] != 0)
2690 if (coup
->ch_select
[c
] != 2)
2691 apply_coupling_method(ac
, &cc
->ch
[1], cce
, index
++);
2693 index
+= 1 + (coup
->ch_select
[c
] == 3);
2700 * Convert spectral data to float samples, applying all supported tools as appropriate.
2702 static void spectral_to_sample(AACContext
*ac
)
2705 void (*imdct_and_window
)(AACContext
*ac
, SingleChannelElement
*sce
);
2706 switch (ac
->oc
[1].m4ac
.object_type
) {
2708 imdct_and_window
= imdct_and_windowing_ld
;
2710 case AOT_ER_AAC_ELD
:
2711 imdct_and_window
= imdct_and_windowing_eld
;
2714 imdct_and_window
= ac
->imdct_and_windowing
;
2716 for (type
= 3; type
>= 0; type
--) {
2717 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
2718 ChannelElement
*che
= ac
->che
[type
][i
];
2720 if (type
<= TYPE_CPE
)
2721 apply_channel_coupling(ac
, che
, type
, i
, BEFORE_TNS
, apply_dependent_coupling
);
2722 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
) {
2723 if (che
->ch
[0].ics
.predictor_present
) {
2724 if (che
->ch
[0].ics
.ltp
.present
)
2725 ac
->apply_ltp(ac
, &che
->ch
[0]);
2726 if (che
->ch
[1].ics
.ltp
.present
&& type
== TYPE_CPE
)
2727 ac
->apply_ltp(ac
, &che
->ch
[1]);
2730 if (che
->ch
[0].tns
.present
)
2731 ac
->apply_tns(che
->ch
[0].coeffs
, &che
->ch
[0].tns
, &che
->ch
[0].ics
, 1);
2732 if (che
->ch
[1].tns
.present
)
2733 ac
->apply_tns(che
->ch
[1].coeffs
, &che
->ch
[1].tns
, &che
->ch
[1].ics
, 1);
2734 if (type
<= TYPE_CPE
)
2735 apply_channel_coupling(ac
, che
, type
, i
, BETWEEN_TNS_AND_IMDCT
, apply_dependent_coupling
);
2736 if (type
!= TYPE_CCE
|| che
->coup
.coupling_point
== AFTER_IMDCT
) {
2737 imdct_and_window(ac
, &che
->ch
[0]);
2738 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
)
2739 ac
->update_ltp(ac
, &che
->ch
[0]);
2740 if (type
== TYPE_CPE
) {
2741 imdct_and_window(ac
, &che
->ch
[1]);
2742 if (ac
->oc
[1].m4ac
.object_type
== AOT_AAC_LTP
)
2743 ac
->update_ltp(ac
, &che
->ch
[1]);
2745 if (ac
->oc
[1].m4ac
.sbr
> 0) {
2746 ff_sbr_apply(ac
, &che
->sbr
, type
, che
->ch
[0].ret
, che
->ch
[1].ret
);
2749 if (type
<= TYPE_CCE
)
2750 apply_channel_coupling(ac
, che
, type
, i
, AFTER_IMDCT
, apply_independent_coupling
);
2756 static int parse_adts_frame_header(AACContext
*ac
, GetBitContext
*gb
)
2759 AACADTSHeaderInfo hdr_info
;
2760 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
2761 int layout_map_tags
, ret
;
2763 size
= avpriv_aac_parse_header(gb
, &hdr_info
);
2765 if (!ac
->warned_num_aac_frames
&& hdr_info
.num_aac_frames
!= 1) {
2766 // This is 2 for "VLB " audio in NSV files.
2767 // See samples/nsv/vlb_audio.
2768 avpriv_report_missing_feature(ac
->avctx
,
2769 "More than one AAC RDB per ADTS frame");
2770 ac
->warned_num_aac_frames
= 1;
2772 push_output_configuration(ac
);
2773 if (hdr_info
.chan_config
) {
2774 ac
->oc
[1].m4ac
.chan_config
= hdr_info
.chan_config
;
2775 if ((ret
= set_default_channel_config(ac
->avctx
,
2778 hdr_info
.chan_config
)) < 0)
2780 if ((ret
= output_configure(ac
, layout_map
, layout_map_tags
,
2781 FFMAX(ac
->oc
[1].status
,
2782 OC_TRIAL_FRAME
), 0)) < 0)
2785 ac
->oc
[1].m4ac
.chan_config
= 0;
2787 * dual mono frames in Japanese DTV can have chan_config 0
2788 * WITHOUT specifying PCE.
2789 * thus, set dual mono as default.
2791 if (ac
->dmono_mode
&& ac
->oc
[0].status
== OC_NONE
) {
2792 layout_map_tags
= 2;
2793 layout_map
[0][0] = layout_map
[1][0] = TYPE_SCE
;
2794 layout_map
[0][2] = layout_map
[1][2] = AAC_CHANNEL_FRONT
;
2795 layout_map
[0][1] = 0;
2796 layout_map
[1][1] = 1;
2797 if (output_configure(ac
, layout_map
, layout_map_tags
,
2802 ac
->oc
[1].m4ac
.sample_rate
= hdr_info
.sample_rate
;
2803 ac
->oc
[1].m4ac
.sampling_index
= hdr_info
.sampling_index
;
2804 ac
->oc
[1].m4ac
.object_type
= hdr_info
.object_type
;
2805 if (ac
->oc
[0].status
!= OC_LOCKED
||
2806 ac
->oc
[0].m4ac
.chan_config
!= hdr_info
.chan_config
||
2807 ac
->oc
[0].m4ac
.sample_rate
!= hdr_info
.sample_rate
) {
2808 ac
->oc
[1].m4ac
.sbr
= -1;
2809 ac
->oc
[1].m4ac
.ps
= -1;
2811 if (!hdr_info
.crc_absent
)
2817 static int aac_decode_er_frame(AVCodecContext
*avctx
, void *data
,
2818 int *got_frame_ptr
, GetBitContext
*gb
)
2820 AACContext
*ac
= avctx
->priv_data
;
2821 ChannelElement
*che
;
2824 int chan_config
= ac
->oc
[1].m4ac
.chan_config
;
2825 int aot
= ac
->oc
[1].m4ac
.object_type
;
2827 if (aot
== AOT_ER_AAC_LD
|| aot
== AOT_ER_AAC_ELD
)
2832 if ((err
= frame_configure_elements(avctx
)) < 0)
2835 // The FF_PROFILE_AAC_* defines are all object_type - 1
2836 // This may lead to an undefined profile being signaled
2837 ac
->avctx
->profile
= ac
->oc
[1].m4ac
.object_type
- 1;
2839 ac
->tags_mapped
= 0;
2841 if (chan_config
< 0 || chan_config
>= 8) {
2842 avpriv_request_sample(avctx
, "Unknown ER channel configuration %d",
2843 ac
->oc
[1].m4ac
.chan_config
);
2844 return AVERROR_INVALIDDATA
;
2846 for (i
= 0; i
< tags_per_config
[chan_config
]; i
++) {
2847 const int elem_type
= aac_channel_layout_map
[chan_config
-1][i
][0];
2848 const int elem_id
= aac_channel_layout_map
[chan_config
-1][i
][1];
2849 if (!(che
=get_che(ac
, elem_type
, elem_id
))) {
2850 av_log(ac
->avctx
, AV_LOG_ERROR
,
2851 "channel element %d.%d is not allocated\n",
2852 elem_type
, elem_id
);
2853 return AVERROR_INVALIDDATA
;
2855 if (aot
!= AOT_ER_AAC_ELD
)
2857 switch (elem_type
) {
2859 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2862 err
= decode_cpe(ac
, gb
, che
);
2865 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2872 spectral_to_sample(ac
);
2874 ac
->frame
->nb_samples
= samples
;
2875 ac
->frame
->sample_rate
= avctx
->sample_rate
;
2878 skip_bits_long(gb
, get_bits_left(gb
));
2882 static int aac_decode_frame_int(AVCodecContext
*avctx
, void *data
,
2883 int *got_frame_ptr
, GetBitContext
*gb
, AVPacket
*avpkt
)
2885 AACContext
*ac
= avctx
->priv_data
;
2886 ChannelElement
*che
= NULL
, *che_prev
= NULL
;
2887 enum RawDataBlockType elem_type
, elem_type_prev
= TYPE_END
;
2889 int samples
= 0, multiplier
, audio_found
= 0, pce_found
= 0;
2890 int is_dmono
, sce_count
= 0;
2894 if (show_bits(gb
, 12) == 0xfff) {
2895 if ((err
= parse_adts_frame_header(ac
, gb
)) < 0) {
2896 av_log(avctx
, AV_LOG_ERROR
, "Error decoding AAC frame header.\n");
2899 if (ac
->oc
[1].m4ac
.sampling_index
> 12) {
2900 av_log(ac
->avctx
, AV_LOG_ERROR
, "invalid sampling rate index %d\n", ac
->oc
[1].m4ac
.sampling_index
);
2901 err
= AVERROR_INVALIDDATA
;
2906 if ((err
= frame_configure_elements(avctx
)) < 0)
2909 // The FF_PROFILE_AAC_* defines are all object_type - 1
2910 // This may lead to an undefined profile being signaled
2911 ac
->avctx
->profile
= ac
->oc
[1].m4ac
.object_type
- 1;
2913 ac
->tags_mapped
= 0;
2915 while ((elem_type
= get_bits(gb
, 3)) != TYPE_END
) {
2916 elem_id
= get_bits(gb
, 4);
2918 if (elem_type
< TYPE_DSE
) {
2919 if (!(che
=get_che(ac
, elem_type
, elem_id
))) {
2920 av_log(ac
->avctx
, AV_LOG_ERROR
, "channel element %d.%d is not allocated\n",
2921 elem_type
, elem_id
);
2922 err
= AVERROR_INVALIDDATA
;
2928 switch (elem_type
) {
2931 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2937 err
= decode_cpe(ac
, gb
, che
);
2942 err
= decode_cce(ac
, gb
, che
);
2946 err
= decode_ics(ac
, &che
->ch
[0], gb
, 0, 0);
2951 err
= skip_data_stream_element(ac
, gb
);
2955 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
2957 push_output_configuration(ac
);
2958 tags
= decode_pce(avctx
, &ac
->oc
[1].m4ac
, layout_map
, gb
);
2964 av_log(avctx
, AV_LOG_ERROR
,
2965 "Not evaluating a further program_config_element as this construct is dubious at best.\n");
2967 err
= output_configure(ac
, layout_map
, tags
, OC_TRIAL_PCE
, 1);
2969 ac
->oc
[1].m4ac
.chan_config
= 0;
2977 elem_id
+= get_bits(gb
, 8) - 1;
2978 if (get_bits_left(gb
) < 8 * elem_id
) {
2979 av_log(avctx
, AV_LOG_ERROR
, "TYPE_FIL: "overread_err
);
2980 err
= AVERROR_INVALIDDATA
;
2984 elem_id
-= decode_extension_payload(ac
, gb
, elem_id
, che_prev
, elem_type_prev
);
2985 err
= 0; /* FIXME */
2989 err
= AVERROR_BUG
; /* should not happen, but keeps compiler happy */
2994 elem_type_prev
= elem_type
;
2999 if (get_bits_left(gb
) < 3) {
3000 av_log(avctx
, AV_LOG_ERROR
, overread_err
);
3001 err
= AVERROR_INVALIDDATA
;
3006 spectral_to_sample(ac
);
3008 multiplier
= (ac
->oc
[1].m4ac
.sbr
== 1) ? ac
->oc
[1].m4ac
.ext_sample_rate
> ac
->oc
[1].m4ac
.sample_rate
: 0;
3009 samples
<<= multiplier
;
3011 if (ac
->oc
[1].status
&& audio_found
) {
3012 avctx
->sample_rate
= ac
->oc
[1].m4ac
.sample_rate
<< multiplier
;
3013 avctx
->frame_size
= samples
;
3014 ac
->oc
[1].status
= OC_LOCKED
;
3019 const uint8_t *side
= av_packet_get_side_data(avpkt
, AV_PKT_DATA_SKIP_SAMPLES
, &side_size
);
3020 if (side
&& side_size
>=4)
3021 AV_WL32(side
, 2*AV_RL32(side
));
3024 *got_frame_ptr
= !!samples
;
3026 ac
->frame
->nb_samples
= samples
;
3027 ac
->frame
->sample_rate
= avctx
->sample_rate
;
3029 av_frame_unref(ac
->frame
);
3030 *got_frame_ptr
= !!samples
;
3032 /* for dual-mono audio (SCE + SCE) */
3033 is_dmono
= ac
->dmono_mode
&& sce_count
== 2 &&
3034 ac
->oc
[1].channel_layout
== (AV_CH_FRONT_LEFT
| AV_CH_FRONT_RIGHT
);
3036 if (ac
->dmono_mode
== 1)
3037 ((AVFrame
*)data
)->data
[1] =((AVFrame
*)data
)->data
[0];
3038 else if (ac
->dmono_mode
== 2)
3039 ((AVFrame
*)data
)->data
[0] =((AVFrame
*)data
)->data
[1];
3044 pop_output_configuration(ac
);
3048 static int aac_decode_frame(AVCodecContext
*avctx
, void *data
,
3049 int *got_frame_ptr
, AVPacket
*avpkt
)
3051 AACContext
*ac
= avctx
->priv_data
;
3052 const uint8_t *buf
= avpkt
->data
;
3053 int buf_size
= avpkt
->size
;
3058 int new_extradata_size
;
3059 const uint8_t *new_extradata
= av_packet_get_side_data(avpkt
,
3060 AV_PKT_DATA_NEW_EXTRADATA
,
3061 &new_extradata_size
);
3062 int jp_dualmono_size
;
3063 const uint8_t *jp_dualmono
= av_packet_get_side_data(avpkt
,
3064 AV_PKT_DATA_JP_DUALMONO
,
3067 if (new_extradata
&& 0) {
3068 av_free(avctx
->extradata
);
3069 avctx
->extradata
= av_mallocz(new_extradata_size
+
3070 FF_INPUT_BUFFER_PADDING_SIZE
);
3071 if (!avctx
->extradata
)
3072 return AVERROR(ENOMEM
);
3073 avctx
->extradata_size
= new_extradata_size
;
3074 memcpy(avctx
->extradata
, new_extradata
, new_extradata_size
);
3075 push_output_configuration(ac
);
3076 if (decode_audio_specific_config(ac
, ac
->avctx
, &ac
->oc
[1].m4ac
,
3078 avctx
->extradata_size
*8, 1) < 0) {
3079 pop_output_configuration(ac
);
3080 return AVERROR_INVALIDDATA
;
3085 if (jp_dualmono
&& jp_dualmono_size
> 0)
3086 ac
->dmono_mode
= 1 + *jp_dualmono
;
3087 if (ac
->force_dmono_mode
>= 0)
3088 ac
->dmono_mode
= ac
->force_dmono_mode
;
3090 if (INT_MAX
/ 8 <= buf_size
)
3091 return AVERROR_INVALIDDATA
;
3093 if ((err
= init_get_bits(&gb
, buf
, buf_size
* 8)) < 0)
3096 switch (ac
->oc
[1].m4ac
.object_type
) {
3098 case AOT_ER_AAC_LTP
:
3100 case AOT_ER_AAC_ELD
:
3101 err
= aac_decode_er_frame(avctx
, data
, got_frame_ptr
, &gb
);
3104 err
= aac_decode_frame_int(avctx
, data
, got_frame_ptr
, &gb
, avpkt
);
3109 buf_consumed
= (get_bits_count(&gb
) + 7) >> 3;
3110 for (buf_offset
= buf_consumed
; buf_offset
< buf_size
; buf_offset
++)
3111 if (buf
[buf_offset
])
3114 return buf_size
> buf_offset
? buf_consumed
: buf_size
;
3117 static av_cold
int aac_decode_close(AVCodecContext
*avctx
)
3119 AACContext
*ac
= avctx
->priv_data
;
3122 for (i
= 0; i
< MAX_ELEM_ID
; i
++) {
3123 for (type
= 0; type
< 4; type
++) {
3124 if (ac
->che
[type
][i
])
3125 ff_aac_sbr_ctx_close(&ac
->che
[type
][i
]->sbr
);
3126 av_freep(&ac
->che
[type
][i
]);
3130 ff_mdct_end(&ac
->mdct
);
3131 ff_mdct_end(&ac
->mdct_small
);
3132 ff_mdct_end(&ac
->mdct_ld
);
3133 ff_mdct_end(&ac
->mdct_ltp
);
3138 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
3140 struct LATMContext
{
3141 AACContext aac_ctx
; ///< containing AACContext
3142 int initialized
; ///< initialized after a valid extradata was seen
3145 int audio_mux_version_A
; ///< LATM syntax version
3146 int frame_length_type
; ///< 0/1 variable/fixed frame length
3147 int frame_length
; ///< frame length for fixed frame length
3150 static inline uint32_t latm_get_value(GetBitContext
*b
)
3152 int length
= get_bits(b
, 2);
3154 return get_bits_long(b
, (length
+1)*8);
3157 static int latm_decode_audio_specific_config(struct LATMContext
*latmctx
,
3158 GetBitContext
*gb
, int asclen
)
3160 AACContext
*ac
= &latmctx
->aac_ctx
;
3161 AVCodecContext
*avctx
= ac
->avctx
;
3162 MPEG4AudioConfig m4ac
= { 0 };
3163 int config_start_bit
= get_bits_count(gb
);
3164 int sync_extension
= 0;
3165 int bits_consumed
, esize
;
3169 asclen
= FFMIN(asclen
, get_bits_left(gb
));
3171 asclen
= get_bits_left(gb
);
3173 if (config_start_bit
% 8) {
3174 avpriv_request_sample(latmctx
->aac_ctx
.avctx
,
3175 "Non-byte-aligned audio-specific config");
3176 return AVERROR_PATCHWELCOME
;
3179 return AVERROR_INVALIDDATA
;
3180 bits_consumed
= decode_audio_specific_config(NULL
, avctx
, &m4ac
,
3181 gb
->buffer
+ (config_start_bit
/ 8),
3182 asclen
, sync_extension
);
3184 if (bits_consumed
< 0)
3185 return AVERROR_INVALIDDATA
;
3187 if (!latmctx
->initialized
||
3188 ac
->oc
[1].m4ac
.sample_rate
!= m4ac
.sample_rate
||
3189 ac
->oc
[1].m4ac
.chan_config
!= m4ac
.chan_config
) {
3191 if(latmctx
->initialized
) {
3192 av_log(avctx
, AV_LOG_INFO
, "audio config changed\n");
3194 av_log(avctx
, AV_LOG_DEBUG
, "initializing latmctx\n");
3196 latmctx
->initialized
= 0;
3198 esize
= (bits_consumed
+7) / 8;
3200 if (avctx
->extradata_size
< esize
) {
3201 av_free(avctx
->extradata
);
3202 avctx
->extradata
= av_malloc(esize
+ FF_INPUT_BUFFER_PADDING_SIZE
);
3203 if (!avctx
->extradata
)
3204 return AVERROR(ENOMEM
);
3207 avctx
->extradata_size
= esize
;
3208 memcpy(avctx
->extradata
, gb
->buffer
+ (config_start_bit
/8), esize
);
3209 memset(avctx
->extradata
+esize
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
3211 skip_bits_long(gb
, bits_consumed
);
3213 return bits_consumed
;
3216 static int read_stream_mux_config(struct LATMContext
*latmctx
,
3219 int ret
, audio_mux_version
= get_bits(gb
, 1);
3221 latmctx
->audio_mux_version_A
= 0;
3222 if (audio_mux_version
)
3223 latmctx
->audio_mux_version_A
= get_bits(gb
, 1);
3225 if (!latmctx
->audio_mux_version_A
) {
3227 if (audio_mux_version
)
3228 latm_get_value(gb
); // taraFullness
3230 skip_bits(gb
, 1); // allStreamSameTimeFraming
3231 skip_bits(gb
, 6); // numSubFrames
3233 if (get_bits(gb
, 4)) { // numPrograms
3234 avpriv_request_sample(latmctx
->aac_ctx
.avctx
, "Multiple programs");
3235 return AVERROR_PATCHWELCOME
;
3238 // for each program (which there is only one in DVB)
3240 // for each layer (which there is only one in DVB)
3241 if (get_bits(gb
, 3)) { // numLayer
3242 avpriv_request_sample(latmctx
->aac_ctx
.avctx
, "Multiple layers");
3243 return AVERROR_PATCHWELCOME
;
3246 // for all but first stream: use_same_config = get_bits(gb, 1);
3247 if (!audio_mux_version
) {
3248 if ((ret
= latm_decode_audio_specific_config(latmctx
, gb
, 0)) < 0)
3251 int ascLen
= latm_get_value(gb
);
3252 if ((ret
= latm_decode_audio_specific_config(latmctx
, gb
, ascLen
)) < 0)
3255 skip_bits_long(gb
, ascLen
);
3258 latmctx
->frame_length_type
= get_bits(gb
, 3);
3259 switch (latmctx
->frame_length_type
) {
3261 skip_bits(gb
, 8); // latmBufferFullness
3264 latmctx
->frame_length
= get_bits(gb
, 9);
3269 skip_bits(gb
, 6); // CELP frame length table index
3273 skip_bits(gb
, 1); // HVXC frame length table index
3277 if (get_bits(gb
, 1)) { // other data
3278 if (audio_mux_version
) {
3279 latm_get_value(gb
); // other_data_bits
3283 esc
= get_bits(gb
, 1);
3289 if (get_bits(gb
, 1)) // crc present
3290 skip_bits(gb
, 8); // config_crc
3296 static int read_payload_length_info(struct LATMContext
*ctx
, GetBitContext
*gb
)
3300 if (ctx
->frame_length_type
== 0) {
3301 int mux_slot_length
= 0;
3303 tmp
= get_bits(gb
, 8);
3304 mux_slot_length
+= tmp
;
3305 } while (tmp
== 255);
3306 return mux_slot_length
;
3307 } else if (ctx
->frame_length_type
== 1) {
3308 return ctx
->frame_length
;
3309 } else if (ctx
->frame_length_type
== 3 ||
3310 ctx
->frame_length_type
== 5 ||
3311 ctx
->frame_length_type
== 7) {
3312 skip_bits(gb
, 2); // mux_slot_length_coded
3317 static int read_audio_mux_element(struct LATMContext
*latmctx
,
3321 uint8_t use_same_mux
= get_bits(gb
, 1);
3322 if (!use_same_mux
) {
3323 if ((err
= read_stream_mux_config(latmctx
, gb
)) < 0)
3325 } else if (!latmctx
->aac_ctx
.avctx
->extradata
) {
3326 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_DEBUG
,
3327 "no decoder config found\n");
3328 return AVERROR(EAGAIN
);
3330 if (latmctx
->audio_mux_version_A
== 0) {
3331 int mux_slot_length_bytes
= read_payload_length_info(latmctx
, gb
);
3332 if (mux_slot_length_bytes
* 8 > get_bits_left(gb
)) {
3333 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
, "incomplete frame\n");
3334 return AVERROR_INVALIDDATA
;
3335 } else if (mux_slot_length_bytes
* 8 + 256 < get_bits_left(gb
)) {
3336 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
,
3337 "frame length mismatch %d << %d\n",
3338 mux_slot_length_bytes
* 8, get_bits_left(gb
));
3339 return AVERROR_INVALIDDATA
;
3346 static int latm_decode_frame(AVCodecContext
*avctx
, void *out
,
3347 int *got_frame_ptr
, AVPacket
*avpkt
)
3349 struct LATMContext
*latmctx
= avctx
->priv_data
;
3353 if ((err
= init_get_bits8(&gb
, avpkt
->data
, avpkt
->size
)) < 0)
3356 // check for LOAS sync word
3357 if (get_bits(&gb
, 11) != LOAS_SYNC_WORD
)
3358 return AVERROR_INVALIDDATA
;
3360 muxlength
= get_bits(&gb
, 13) + 3;
3361 // not enough data, the parser should have sorted this out
3362 if (muxlength
> avpkt
->size
)
3363 return AVERROR_INVALIDDATA
;
3365 if ((err
= read_audio_mux_element(latmctx
, &gb
)) < 0)
3368 if (!latmctx
->initialized
) {
3369 if (!avctx
->extradata
) {
3373 push_output_configuration(&latmctx
->aac_ctx
);
3374 if ((err
= decode_audio_specific_config(
3375 &latmctx
->aac_ctx
, avctx
, &latmctx
->aac_ctx
.oc
[1].m4ac
,
3376 avctx
->extradata
, avctx
->extradata_size
*8, 1)) < 0) {
3377 pop_output_configuration(&latmctx
->aac_ctx
);
3380 latmctx
->initialized
= 1;
3384 if (show_bits(&gb
, 12) == 0xfff) {
3385 av_log(latmctx
->aac_ctx
.avctx
, AV_LOG_ERROR
,
3386 "ADTS header detected, probably as result of configuration "
3388 return AVERROR_INVALIDDATA
;
3391 if ((err
= aac_decode_frame_int(avctx
, out
, got_frame_ptr
, &gb
, avpkt
)) < 0)
3397 static av_cold
int latm_decode_init(AVCodecContext
*avctx
)
3399 struct LATMContext
*latmctx
= avctx
->priv_data
;
3400 int ret
= aac_decode_init(avctx
);
3402 if (avctx
->extradata_size
> 0)
3403 latmctx
->initialized
= !ret
;
3408 static void aacdec_init(AACContext
*c
)
3410 c
->imdct_and_windowing
= imdct_and_windowing
;
3411 c
->apply_ltp
= apply_ltp
;
3412 c
->apply_tns
= apply_tns
;
3413 c
->windowing_and_mdct_ltp
= windowing_and_mdct_ltp
;
3414 c
->update_ltp
= update_ltp
;
3417 ff_aacdec_init_mips(c
);
3420 * AVOptions for Japanese DTV specific extensions (ADTS only)
3422 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3423 static const AVOption options
[] = {
3424 {"dual_mono_mode", "Select the channel to decode for dual mono",
3425 offsetof(AACContext
, force_dmono_mode
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, 2,
3426 AACDEC_FLAGS
, "dual_mono_mode"},
3428 {"auto", "autoselection", 0, AV_OPT_TYPE_CONST
, {.i64
=-1}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3429 {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST
, {.i64
= 1}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3430 {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST
, {.i64
= 2}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3431 {"both", "Select both channels", 0, AV_OPT_TYPE_CONST
, {.i64
= 0}, INT_MIN
, INT_MAX
, AACDEC_FLAGS
, "dual_mono_mode"},
3436 static const AVClass aac_decoder_class
= {
3437 .class_name
= "AAC decoder",
3438 .item_name
= av_default_item_name
,
3440 .version
= LIBAVUTIL_VERSION_INT
,
3443 AVCodec ff_aac_decoder
= {
3445 .long_name
= NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
3446 .type
= AVMEDIA_TYPE_AUDIO
,
3447 .id
= AV_CODEC_ID_AAC
,
3448 .priv_data_size
= sizeof(AACContext
),
3449 .init
= aac_decode_init
,
3450 .close
= aac_decode_close
,
3451 .decode
= aac_decode_frame
,
3452 .sample_fmts
= (const enum AVSampleFormat
[]) {
3453 AV_SAMPLE_FMT_FLTP
, AV_SAMPLE_FMT_NONE
3455 .capabilities
= CODEC_CAP_CHANNEL_CONF
| CODEC_CAP_DR1
,
3456 .channel_layouts
= aac_channel_layout
,
3458 .priv_class
= &aac_decoder_class
,
3462 Note: This decoder filter is intended to decode LATM streams transferred
3463 in MPEG transport streams which only contain one program.
3464 To do a more complex LATM demuxing a separate LATM demuxer should be used.
3466 AVCodec ff_aac_latm_decoder
= {
3468 .long_name
= NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
3469 .type
= AVMEDIA_TYPE_AUDIO
,
3470 .id
= AV_CODEC_ID_AAC_LATM
,
3471 .priv_data_size
= sizeof(struct LATMContext
),
3472 .init
= latm_decode_init
,
3473 .close
= aac_decode_close
,
3474 .decode
= latm_decode_frame
,
3475 .sample_fmts
= (const enum AVSampleFormat
[]) {
3476 AV_SAMPLE_FMT_FLTP
, AV_SAMPLE_FMT_NONE
3478 .capabilities
= CODEC_CAP_CHANNEL_CONF
| CODEC_CAP_DR1
,
3479 .channel_layouts
= aac_channel_layout
,