2 * ATRAC3+ compatible decoder
4 * Copyright (c) 2010-2013 Maxim Poliakovski
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * Sony ATRAC3+ compatible decoder.
27 * Container formats used to store its data:
28 * RIFF WAV (.at3) and Sony OpenMG (.oma, .aa3).
30 * Technical description of this codec can be found here:
31 * http://wiki.multimedia.cx/index.php?title=ATRAC3plus
33 * Kudos to Benjamin Larsson and Michael Karcher
34 * for their precious technical help!
40 #include "libavutil/channel_layout.h"
41 #include "libavutil/float_dsp.h"
46 #include "atrac3plus.h"
48 typedef struct ATRAC3PContext
{
50 AVFloatDSPContext fdsp
;
52 DECLARE_ALIGNED(32, float, samples
)[2][ATRAC3P_FRAME_SAMPLES
]; ///< quantized MDCT spectrum
53 DECLARE_ALIGNED(32, float, mdct_buf
)[2][ATRAC3P_FRAME_SAMPLES
]; ///< output of the IMDCT
54 DECLARE_ALIGNED(32, float, time_buf
)[2][ATRAC3P_FRAME_SAMPLES
]; ///< output of the gain compensation
55 DECLARE_ALIGNED(32, float, outp_buf
)[2][ATRAC3P_FRAME_SAMPLES
];
57 AtracGCContext gainc_ctx
; ///< gain compensation context
59 FFTContext ipqf_dct_ctx
; ///< IDCT context used by IPQF
61 Atrac3pChanUnitCtx
*ch_units
; ///< global channel units
63 int num_channel_blocks
; ///< number of channel blocks
64 uint8_t channel_blocks
[5]; ///< channel configuration descriptor
65 uint64_t my_channel_layout
; ///< current channel layout
68 static av_cold
int atrac3p_decode_close(AVCodecContext
*avctx
)
70 av_free(((ATRAC3PContext
*)(avctx
->priv_data
))->ch_units
);
75 static av_cold
int set_channel_params(ATRAC3PContext
*ctx
,
76 AVCodecContext
*avctx
)
78 memset(ctx
->channel_blocks
, 0, sizeof(ctx
->channel_blocks
));
80 switch (avctx
->channels
) {
82 if (avctx
->channel_layout
!= AV_CH_FRONT_LEFT
)
83 avctx
->channel_layout
= AV_CH_LAYOUT_MONO
;
85 ctx
->num_channel_blocks
= 1;
86 ctx
->channel_blocks
[0] = CH_UNIT_MONO
;
89 avctx
->channel_layout
= AV_CH_LAYOUT_STEREO
;
90 ctx
->num_channel_blocks
= 1;
91 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
94 avctx
->channel_layout
= AV_CH_LAYOUT_SURROUND
;
95 ctx
->num_channel_blocks
= 2;
96 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
97 ctx
->channel_blocks
[1] = CH_UNIT_MONO
;
100 avctx
->channel_layout
= AV_CH_LAYOUT_4POINT0
;
101 ctx
->num_channel_blocks
= 3;
102 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
103 ctx
->channel_blocks
[1] = CH_UNIT_MONO
;
104 ctx
->channel_blocks
[2] = CH_UNIT_MONO
;
107 avctx
->channel_layout
= AV_CH_LAYOUT_5POINT1_BACK
;
108 ctx
->num_channel_blocks
= 4;
109 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
110 ctx
->channel_blocks
[1] = CH_UNIT_MONO
;
111 ctx
->channel_blocks
[2] = CH_UNIT_STEREO
;
112 ctx
->channel_blocks
[3] = CH_UNIT_MONO
;
115 avctx
->channel_layout
= AV_CH_LAYOUT_6POINT1_BACK
;
116 ctx
->num_channel_blocks
= 5;
117 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
118 ctx
->channel_blocks
[1] = CH_UNIT_MONO
;
119 ctx
->channel_blocks
[2] = CH_UNIT_STEREO
;
120 ctx
->channel_blocks
[3] = CH_UNIT_MONO
;
121 ctx
->channel_blocks
[4] = CH_UNIT_MONO
;
124 avctx
->channel_layout
= AV_CH_LAYOUT_7POINT1
;
125 ctx
->num_channel_blocks
= 5;
126 ctx
->channel_blocks
[0] = CH_UNIT_STEREO
;
127 ctx
->channel_blocks
[1] = CH_UNIT_MONO
;
128 ctx
->channel_blocks
[2] = CH_UNIT_STEREO
;
129 ctx
->channel_blocks
[3] = CH_UNIT_STEREO
;
130 ctx
->channel_blocks
[4] = CH_UNIT_MONO
;
133 av_log(avctx
, AV_LOG_ERROR
,
134 "Unsupported channel count: %d!\n", avctx
->channels
);
135 return AVERROR_INVALIDDATA
;
141 static av_cold
int atrac3p_decode_init(AVCodecContext
*avctx
)
143 ATRAC3PContext
*ctx
= avctx
->priv_data
;
146 if (!avctx
->block_align
) {
147 av_log(avctx
, AV_LOG_ERROR
, "block_align is not set\n");
148 return AVERROR(EINVAL
);
151 ff_atrac3p_init_vlcs();
153 avpriv_float_dsp_init(&ctx
->fdsp
, avctx
->flags
& CODEC_FLAG_BITEXACT
);
155 /* initialize IPQF */
156 ff_mdct_init(&ctx
->ipqf_dct_ctx
, 5, 1, 32.0 / 32768.0);
158 ff_atrac3p_init_imdct(avctx
, &ctx
->mdct_ctx
);
160 ff_atrac_init_gain_compensation(&ctx
->gainc_ctx
, 6, 2);
162 ff_atrac3p_init_wave_synth();
164 if ((ret
= set_channel_params(ctx
, avctx
)) < 0)
167 ctx
->my_channel_layout
= avctx
->channel_layout
;
169 ctx
->ch_units
= av_mallocz_array(ctx
->num_channel_blocks
, sizeof(*ctx
->ch_units
));
171 if (!ctx
->ch_units
) {
172 atrac3p_decode_close(avctx
);
173 return AVERROR(ENOMEM
);
176 for (i
= 0; i
< ctx
->num_channel_blocks
; i
++) {
177 for (ch
= 0; ch
< 2; ch
++) {
178 ctx
->ch_units
[i
].channels
[ch
].ch_num
= ch
;
179 ctx
->ch_units
[i
].channels
[ch
].wnd_shape
= &ctx
->ch_units
[i
].channels
[ch
].wnd_shape_hist
[0][0];
180 ctx
->ch_units
[i
].channels
[ch
].wnd_shape_prev
= &ctx
->ch_units
[i
].channels
[ch
].wnd_shape_hist
[1][0];
181 ctx
->ch_units
[i
].channels
[ch
].gain_data
= &ctx
->ch_units
[i
].channels
[ch
].gain_data_hist
[0][0];
182 ctx
->ch_units
[i
].channels
[ch
].gain_data_prev
= &ctx
->ch_units
[i
].channels
[ch
].gain_data_hist
[1][0];
183 ctx
->ch_units
[i
].channels
[ch
].tones_info
= &ctx
->ch_units
[i
].channels
[ch
].tones_info_hist
[0][0];
184 ctx
->ch_units
[i
].channels
[ch
].tones_info_prev
= &ctx
->ch_units
[i
].channels
[ch
].tones_info_hist
[1][0];
187 ctx
->ch_units
[i
].waves_info
= &ctx
->ch_units
[i
].wave_synth_hist
[0];
188 ctx
->ch_units
[i
].waves_info_prev
= &ctx
->ch_units
[i
].wave_synth_hist
[1];
191 avctx
->sample_fmt
= AV_SAMPLE_FMT_FLTP
;
196 static void decode_residual_spectrum(Atrac3pChanUnitCtx
*ctx
,
197 float out
[2][ATRAC3P_FRAME_SAMPLES
],
199 AVCodecContext
*avctx
)
201 int i
, sb
, ch
, qu
, nspeclines
, RNG_index
;
204 /* calculate RNG table index for each subband */
205 int sb_RNG_index
[ATRAC3P_SUBBANDS
] = { 0 };
207 if (ctx
->mute_flag
) {
208 for (ch
= 0; ch
< num_channels
; ch
++)
209 memset(out
[ch
], 0, ATRAC3P_FRAME_SAMPLES
* sizeof(*out
[ch
]));
213 for (qu
= 0, RNG_index
= 0; qu
< ctx
->used_quant_units
; qu
++)
214 RNG_index
+= ctx
->channels
[0].qu_sf_idx
[qu
] +
215 ctx
->channels
[1].qu_sf_idx
[qu
];
217 for (sb
= 0; sb
< ctx
->num_coded_subbands
; sb
++, RNG_index
+= 128)
218 sb_RNG_index
[sb
] = RNG_index
& 0x3FC;
220 /* inverse quant and power compensation */
221 for (ch
= 0; ch
< num_channels
; ch
++) {
222 /* clear channel's residual spectrum */
223 memset(out
[ch
], 0, ATRAC3P_FRAME_SAMPLES
* sizeof(*out
[ch
]));
225 for (qu
= 0; qu
< ctx
->used_quant_units
; qu
++) {
226 src
= &ctx
->channels
[ch
].spectrum
[ff_atrac3p_qu_to_spec_pos
[qu
]];
227 dst
= &out
[ch
][ff_atrac3p_qu_to_spec_pos
[qu
]];
228 nspeclines
= ff_atrac3p_qu_to_spec_pos
[qu
+ 1] -
229 ff_atrac3p_qu_to_spec_pos
[qu
];
231 if (ctx
->channels
[ch
].qu_wordlen
[qu
] > 0) {
232 q
= ff_atrac3p_sf_tab
[ctx
->channels
[ch
].qu_sf_idx
[qu
]] *
233 ff_atrac3p_mant_tab
[ctx
->channels
[ch
].qu_wordlen
[qu
]];
234 for (i
= 0; i
< nspeclines
; i
++)
239 for (sb
= 0; sb
< ctx
->num_coded_subbands
; sb
++)
240 ff_atrac3p_power_compensation(ctx
, ch
, &out
[ch
][0],
241 sb_RNG_index
[sb
], sb
);
244 if (ctx
->unit_type
== CH_UNIT_STEREO
) {
245 for (sb
= 0; sb
< ctx
->num_coded_subbands
; sb
++) {
246 if (ctx
->swap_channels
[sb
]) {
247 for (i
= 0; i
< ATRAC3P_SUBBAND_SAMPLES
; i
++)
248 FFSWAP(float, out
[0][sb
* ATRAC3P_SUBBAND_SAMPLES
+ i
],
249 out
[1][sb
* ATRAC3P_SUBBAND_SAMPLES
+ i
]);
252 /* flip coefficients' sign if requested */
253 if (ctx
->negate_coeffs
[sb
])
254 for (i
= 0; i
< ATRAC3P_SUBBAND_SAMPLES
; i
++)
255 out
[1][sb
* ATRAC3P_SUBBAND_SAMPLES
+ i
] = -(out
[1][sb
* ATRAC3P_SUBBAND_SAMPLES
+ i
]);
260 static void reconstruct_frame(ATRAC3PContext
*ctx
, Atrac3pChanUnitCtx
*ch_unit
,
261 int num_channels
, AVCodecContext
*avctx
)
265 for (ch
= 0; ch
< num_channels
; ch
++) {
266 for (sb
= 0; sb
< ch_unit
->num_subbands
; sb
++) {
267 /* inverse transform and windowing */
268 ff_atrac3p_imdct(&ctx
->fdsp
, &ctx
->mdct_ctx
,
269 &ctx
->samples
[ch
][sb
* ATRAC3P_SUBBAND_SAMPLES
],
270 &ctx
->mdct_buf
[ch
][sb
* ATRAC3P_SUBBAND_SAMPLES
],
271 (ch_unit
->channels
[ch
].wnd_shape_prev
[sb
] << 1) +
272 ch_unit
->channels
[ch
].wnd_shape
[sb
], sb
);
274 /* gain compensation and overlapping */
275 ff_atrac_gain_compensation(&ctx
->gainc_ctx
,
276 &ctx
->mdct_buf
[ch
][sb
* ATRAC3P_SUBBAND_SAMPLES
],
277 &ch_unit
->prev_buf
[ch
][sb
* ATRAC3P_SUBBAND_SAMPLES
],
278 &ch_unit
->channels
[ch
].gain_data_prev
[sb
],
279 &ch_unit
->channels
[ch
].gain_data
[sb
],
280 ATRAC3P_SUBBAND_SAMPLES
,
281 &ctx
->time_buf
[ch
][sb
* ATRAC3P_SUBBAND_SAMPLES
]);
284 /* zero unused subbands in both output and overlapping buffers */
285 memset(&ch_unit
->prev_buf
[ch
][ch_unit
->num_subbands
* ATRAC3P_SUBBAND_SAMPLES
],
287 (ATRAC3P_SUBBANDS
- ch_unit
->num_subbands
) *
288 ATRAC3P_SUBBAND_SAMPLES
*
289 sizeof(ch_unit
->prev_buf
[ch
][ch_unit
->num_subbands
* ATRAC3P_SUBBAND_SAMPLES
]));
290 memset(&ctx
->time_buf
[ch
][ch_unit
->num_subbands
* ATRAC3P_SUBBAND_SAMPLES
],
292 (ATRAC3P_SUBBANDS
- ch_unit
->num_subbands
) *
293 ATRAC3P_SUBBAND_SAMPLES
*
294 sizeof(ctx
->time_buf
[ch
][ch_unit
->num_subbands
* ATRAC3P_SUBBAND_SAMPLES
]));
296 /* resynthesize and add tonal signal */
297 if (ch_unit
->waves_info
->tones_present
||
298 ch_unit
->waves_info_prev
->tones_present
) {
299 for (sb
= 0; sb
< ch_unit
->num_subbands
; sb
++)
300 if (ch_unit
->channels
[ch
].tones_info
[sb
].num_wavs
||
301 ch_unit
->channels
[ch
].tones_info_prev
[sb
].num_wavs
) {
302 ff_atrac3p_generate_tones(ch_unit
, &ctx
->fdsp
, ch
, sb
,
303 &ctx
->time_buf
[ch
][sb
* 128]);
307 /* subband synthesis and acoustic signal output */
308 ff_atrac3p_ipqf(&ctx
->ipqf_dct_ctx
, &ch_unit
->ipqf_ctx
[ch
],
309 &ctx
->time_buf
[ch
][0], &ctx
->outp_buf
[ch
][0]);
312 /* swap window shape and gain control buffers. */
313 for (ch
= 0; ch
< num_channels
; ch
++) {
314 FFSWAP(uint8_t *, ch_unit
->channels
[ch
].wnd_shape
,
315 ch_unit
->channels
[ch
].wnd_shape_prev
);
316 FFSWAP(AtracGainInfo
*, ch_unit
->channels
[ch
].gain_data
,
317 ch_unit
->channels
[ch
].gain_data_prev
);
318 FFSWAP(Atrac3pWavesData
*, ch_unit
->channels
[ch
].tones_info
,
319 ch_unit
->channels
[ch
].tones_info_prev
);
322 FFSWAP(Atrac3pWaveSynthParams
*, ch_unit
->waves_info
, ch_unit
->waves_info_prev
);
325 static int atrac3p_decode_frame(AVCodecContext
*avctx
, void *data
,
326 int *got_frame_ptr
, AVPacket
*avpkt
)
328 ATRAC3PContext
*ctx
= avctx
->priv_data
;
329 AVFrame
*frame
= data
;
330 int i
, ret
, ch_unit_id
, ch_block
= 0, out_ch_index
= 0, channels_to_process
;
331 float **samples_p
= (float **)frame
->extended_data
;
333 frame
->nb_samples
= ATRAC3P_FRAME_SAMPLES
;
334 if ((ret
= ff_get_buffer(avctx
, frame
, 0)) < 0) {
335 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
339 if ((ret
= init_get_bits8(&ctx
->gb
, avpkt
->data
, avpkt
->size
)) < 0)
342 if (get_bits1(&ctx
->gb
)) {
343 av_log(avctx
, AV_LOG_ERROR
, "Invalid start bit!\n");
344 return AVERROR_INVALIDDATA
;
347 while (get_bits_left(&ctx
->gb
) >= 2 &&
348 (ch_unit_id
= get_bits(&ctx
->gb
, 2)) != CH_UNIT_TERMINATOR
) {
349 if (ch_unit_id
== CH_UNIT_EXTENSION
) {
350 avpriv_report_missing_feature(avctx
, "Channel unit extension");
351 return AVERROR_PATCHWELCOME
;
353 if (ch_block
>= ctx
->num_channel_blocks
||
354 ctx
->channel_blocks
[ch_block
] != ch_unit_id
) {
355 av_log(avctx
, AV_LOG_ERROR
,
356 "Frame data doesn't match channel configuration!\n");
357 return AVERROR_INVALIDDATA
;
360 ctx
->ch_units
[ch_block
].unit_type
= ch_unit_id
;
361 channels_to_process
= ch_unit_id
+ 1;
363 if ((ret
= ff_atrac3p_decode_channel_unit(&ctx
->gb
,
364 &ctx
->ch_units
[ch_block
],
369 decode_residual_spectrum(&ctx
->ch_units
[ch_block
], ctx
->samples
,
370 channels_to_process
, avctx
);
371 reconstruct_frame(ctx
, &ctx
->ch_units
[ch_block
],
372 channels_to_process
, avctx
);
374 for (i
= 0; i
< channels_to_process
; i
++)
375 memcpy(samples_p
[out_ch_index
+ i
], ctx
->outp_buf
[i
],
376 ATRAC3P_FRAME_SAMPLES
* sizeof(**samples_p
));
379 out_ch_index
+= channels_to_process
;
384 return avctx
->block_align
;
387 AVCodec ff_atrac3p_decoder
= {
388 .name
= "atrac3plus",
389 .long_name
= NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
390 .type
= AVMEDIA_TYPE_AUDIO
,
391 .id
= AV_CODEC_ID_ATRAC3P
,
392 .priv_data_size
= sizeof(ATRAC3PContext
),
393 .init
= atrac3p_decode_init
,
394 .close
= atrac3p_decode_close
,
395 .decode
= atrac3p_decode_frame
,