Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * ATRAC3 compatible decoder | |
3 | * Copyright (c) 2006-2008 Maxim Poliakovski | |
4 | * Copyright (c) 2006-2008 Benjamin Larsson | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * ATRAC3 compatible decoder. | |
26 | * This decoder handles Sony's ATRAC3 data. | |
27 | * | |
28 | * Container formats used to store ATRAC3 data: | |
29 | * RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3). | |
30 | * | |
31 | * To use this decoder, a calling application must supply the extradata | |
32 | * bytes provided in the containers above. | |
33 | */ | |
34 | ||
35 | #include <math.h> | |
36 | #include <stddef.h> | |
37 | #include <stdio.h> | |
38 | ||
39 | #include "libavutil/attributes.h" | |
40 | #include "libavutil/float_dsp.h" | |
41 | #include "libavutil/libm.h" | |
42 | #include "avcodec.h" | |
43 | #include "bytestream.h" | |
44 | #include "fft.h" | |
45 | #include "fmtconvert.h" | |
46 | #include "get_bits.h" | |
47 | #include "internal.h" | |
48 | ||
49 | #include "atrac.h" | |
50 | #include "atrac3data.h" | |
51 | ||
52 | #define JOINT_STEREO 0x12 | |
53 | #define STEREO 0x2 | |
54 | ||
55 | #define SAMPLES_PER_FRAME 1024 | |
56 | #define MDCT_SIZE 512 | |
57 | ||
58 | typedef struct GainBlock { | |
59 | AtracGainInfo g_block[4]; | |
60 | } GainBlock; | |
61 | ||
62 | typedef struct TonalComponent { | |
63 | int pos; | |
64 | int num_coefs; | |
65 | float coef[8]; | |
66 | } TonalComponent; | |
67 | ||
68 | typedef struct ChannelUnit { | |
69 | int bands_coded; | |
70 | int num_components; | |
71 | float prev_frame[SAMPLES_PER_FRAME]; | |
72 | int gc_blk_switch; | |
73 | TonalComponent components[64]; | |
74 | GainBlock gain_block[2]; | |
75 | ||
76 | DECLARE_ALIGNED(32, float, spectrum)[SAMPLES_PER_FRAME]; | |
77 | DECLARE_ALIGNED(32, float, imdct_buf)[SAMPLES_PER_FRAME]; | |
78 | ||
79 | float delay_buf1[46]; ///<qmf delay buffers | |
80 | float delay_buf2[46]; | |
81 | float delay_buf3[46]; | |
82 | } ChannelUnit; | |
83 | ||
84 | typedef struct ATRAC3Context { | |
85 | GetBitContext gb; | |
86 | //@{ | |
87 | /** stream data */ | |
88 | int coding_mode; | |
89 | ||
90 | ChannelUnit *units; | |
91 | //@} | |
92 | //@{ | |
93 | /** joint-stereo related variables */ | |
94 | int matrix_coeff_index_prev[4]; | |
95 | int matrix_coeff_index_now[4]; | |
96 | int matrix_coeff_index_next[4]; | |
97 | int weighting_delay[6]; | |
98 | //@} | |
99 | //@{ | |
100 | /** data buffers */ | |
101 | uint8_t *decoded_bytes_buffer; | |
102 | float temp_buf[1070]; | |
103 | //@} | |
104 | //@{ | |
105 | /** extradata */ | |
106 | int scrambled_stream; | |
107 | //@} | |
108 | ||
109 | AtracGCContext gainc_ctx; | |
110 | FFTContext mdct_ctx; | |
111 | FmtConvertContext fmt_conv; | |
f6fa7814 | 112 | AVFloatDSPContext *fdsp; |
2ba45a60 DM |
113 | } ATRAC3Context; |
114 | ||
115 | static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE]; | |
116 | static VLC_TYPE atrac3_vlc_table[4096][2]; | |
117 | static VLC spectral_coeff_tab[7]; | |
118 | ||
119 | /** | |
120 | * Regular 512 points IMDCT without overlapping, with the exception of the | |
121 | * swapping of odd bands caused by the reverse spectra of the QMF. | |
122 | * | |
123 | * @param odd_band 1 if the band is an odd band | |
124 | */ | |
125 | static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band) | |
126 | { | |
127 | int i; | |
128 | ||
129 | if (odd_band) { | |
130 | /** | |
131 | * Reverse the odd bands before IMDCT, this is an effect of the QMF | |
132 | * transform or it gives better compression to do it this way. | |
133 | * FIXME: It should be possible to handle this in imdct_calc | |
134 | * for that to happen a modification of the prerotation step of | |
135 | * all SIMD code and C code is needed. | |
136 | * Or fix the functions before so they generate a pre reversed spectrum. | |
137 | */ | |
138 | for (i = 0; i < 128; i++) | |
139 | FFSWAP(float, input[i], input[255 - i]); | |
140 | } | |
141 | ||
142 | q->mdct_ctx.imdct_calc(&q->mdct_ctx, output, input); | |
143 | ||
144 | /* Perform windowing on the output. */ | |
f6fa7814 | 145 | q->fdsp->vector_fmul(output, output, mdct_window, MDCT_SIZE); |
2ba45a60 DM |
146 | } |
147 | ||
148 | /* | |
149 | * indata descrambling, only used for data coming from the rm container | |
150 | */ | |
151 | static int decode_bytes(const uint8_t *input, uint8_t *out, int bytes) | |
152 | { | |
153 | int i, off; | |
154 | uint32_t c; | |
155 | const uint32_t *buf; | |
156 | uint32_t *output = (uint32_t *)out; | |
157 | ||
158 | off = (intptr_t)input & 3; | |
159 | buf = (const uint32_t *)(input - off); | |
160 | if (off) | |
161 | c = av_be2ne32((0x537F6103U >> (off * 8)) | (0x537F6103U << (32 - (off * 8)))); | |
162 | else | |
163 | c = av_be2ne32(0x537F6103U); | |
164 | bytes += 3 + off; | |
165 | for (i = 0; i < bytes / 4; i++) | |
166 | output[i] = c ^ buf[i]; | |
167 | ||
168 | if (off) | |
169 | avpriv_request_sample(NULL, "Offset of %d", off); | |
170 | ||
171 | return off; | |
172 | } | |
173 | ||
174 | static av_cold void init_imdct_window(void) | |
175 | { | |
176 | int i, j; | |
177 | ||
178 | /* generate the mdct window, for details see | |
179 | * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ | |
180 | for (i = 0, j = 255; i < 128; i++, j--) { | |
181 | float wi = sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; | |
182 | float wj = sin(((j + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; | |
183 | float w = 0.5 * (wi * wi + wj * wj); | |
184 | mdct_window[i] = mdct_window[511 - i] = wi / w; | |
185 | mdct_window[j] = mdct_window[511 - j] = wj / w; | |
186 | } | |
187 | } | |
188 | ||
189 | static av_cold int atrac3_decode_close(AVCodecContext *avctx) | |
190 | { | |
191 | ATRAC3Context *q = avctx->priv_data; | |
192 | ||
f6fa7814 DM |
193 | av_freep(&q->units); |
194 | av_freep(&q->decoded_bytes_buffer); | |
195 | av_freep(&q->fdsp); | |
2ba45a60 DM |
196 | |
197 | ff_mdct_end(&q->mdct_ctx); | |
198 | ||
199 | return 0; | |
200 | } | |
201 | ||
202 | /** | |
203 | * Mantissa decoding | |
204 | * | |
205 | * @param selector which table the output values are coded with | |
206 | * @param coding_flag constant length coding or variable length coding | |
207 | * @param mantissas mantissa output table | |
208 | * @param num_codes number of values to get | |
209 | */ | |
210 | static void read_quant_spectral_coeffs(GetBitContext *gb, int selector, | |
211 | int coding_flag, int *mantissas, | |
212 | int num_codes) | |
213 | { | |
214 | int i, code, huff_symb; | |
215 | ||
216 | if (selector == 1) | |
217 | num_codes /= 2; | |
218 | ||
219 | if (coding_flag != 0) { | |
220 | /* constant length coding (CLC) */ | |
221 | int num_bits = clc_length_tab[selector]; | |
222 | ||
223 | if (selector > 1) { | |
224 | for (i = 0; i < num_codes; i++) { | |
225 | if (num_bits) | |
226 | code = get_sbits(gb, num_bits); | |
227 | else | |
228 | code = 0; | |
229 | mantissas[i] = code; | |
230 | } | |
231 | } else { | |
232 | for (i = 0; i < num_codes; i++) { | |
233 | if (num_bits) | |
234 | code = get_bits(gb, num_bits); // num_bits is always 4 in this case | |
235 | else | |
236 | code = 0; | |
237 | mantissas[i * 2 ] = mantissa_clc_tab[code >> 2]; | |
238 | mantissas[i * 2 + 1] = mantissa_clc_tab[code & 3]; | |
239 | } | |
240 | } | |
241 | } else { | |
242 | /* variable length coding (VLC) */ | |
243 | if (selector != 1) { | |
244 | for (i = 0; i < num_codes; i++) { | |
245 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, | |
246 | spectral_coeff_tab[selector-1].bits, 3); | |
247 | huff_symb += 1; | |
248 | code = huff_symb >> 1; | |
249 | if (huff_symb & 1) | |
250 | code = -code; | |
251 | mantissas[i] = code; | |
252 | } | |
253 | } else { | |
254 | for (i = 0; i < num_codes; i++) { | |
255 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector - 1].table, | |
256 | spectral_coeff_tab[selector - 1].bits, 3); | |
257 | mantissas[i * 2 ] = mantissa_vlc_tab[huff_symb * 2 ]; | |
258 | mantissas[i * 2 + 1] = mantissa_vlc_tab[huff_symb * 2 + 1]; | |
259 | } | |
260 | } | |
261 | } | |
262 | } | |
263 | ||
264 | /** | |
265 | * Restore the quantized band spectrum coefficients | |
266 | * | |
267 | * @return subband count, fix for broken specification/files | |
268 | */ | |
269 | static int decode_spectrum(GetBitContext *gb, float *output) | |
270 | { | |
271 | int num_subbands, coding_mode, i, j, first, last, subband_size; | |
272 | int subband_vlc_index[32], sf_index[32]; | |
273 | int mantissas[128]; | |
274 | float scale_factor; | |
275 | ||
276 | num_subbands = get_bits(gb, 5); // number of coded subbands | |
277 | coding_mode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC | |
278 | ||
279 | /* get the VLC selector table for the subbands, 0 means not coded */ | |
280 | for (i = 0; i <= num_subbands; i++) | |
281 | subband_vlc_index[i] = get_bits(gb, 3); | |
282 | ||
283 | /* read the scale factor indexes from the stream */ | |
284 | for (i = 0; i <= num_subbands; i++) { | |
285 | if (subband_vlc_index[i] != 0) | |
286 | sf_index[i] = get_bits(gb, 6); | |
287 | } | |
288 | ||
289 | for (i = 0; i <= num_subbands; i++) { | |
290 | first = subband_tab[i ]; | |
291 | last = subband_tab[i + 1]; | |
292 | ||
293 | subband_size = last - first; | |
294 | ||
295 | if (subband_vlc_index[i] != 0) { | |
296 | /* decode spectral coefficients for this subband */ | |
297 | /* TODO: This can be done faster is several blocks share the | |
298 | * same VLC selector (subband_vlc_index) */ | |
299 | read_quant_spectral_coeffs(gb, subband_vlc_index[i], coding_mode, | |
300 | mantissas, subband_size); | |
301 | ||
302 | /* decode the scale factor for this subband */ | |
303 | scale_factor = ff_atrac_sf_table[sf_index[i]] * | |
304 | inv_max_quant[subband_vlc_index[i]]; | |
305 | ||
306 | /* inverse quantize the coefficients */ | |
307 | for (j = 0; first < last; first++, j++) | |
308 | output[first] = mantissas[j] * scale_factor; | |
309 | } else { | |
310 | /* this subband was not coded, so zero the entire subband */ | |
311 | memset(output + first, 0, subband_size * sizeof(*output)); | |
312 | } | |
313 | } | |
314 | ||
315 | /* clear the subbands that were not coded */ | |
316 | first = subband_tab[i]; | |
317 | memset(output + first, 0, (SAMPLES_PER_FRAME - first) * sizeof(*output)); | |
318 | return num_subbands; | |
319 | } | |
320 | ||
321 | /** | |
322 | * Restore the quantized tonal components | |
323 | * | |
324 | * @param components tonal components | |
325 | * @param num_bands number of coded bands | |
326 | */ | |
327 | static int decode_tonal_components(GetBitContext *gb, | |
328 | TonalComponent *components, int num_bands) | |
329 | { | |
330 | int i, b, c, m; | |
331 | int nb_components, coding_mode_selector, coding_mode; | |
332 | int band_flags[4], mantissa[8]; | |
333 | int component_count = 0; | |
334 | ||
335 | nb_components = get_bits(gb, 5); | |
336 | ||
337 | /* no tonal components */ | |
338 | if (nb_components == 0) | |
339 | return 0; | |
340 | ||
341 | coding_mode_selector = get_bits(gb, 2); | |
342 | if (coding_mode_selector == 2) | |
343 | return AVERROR_INVALIDDATA; | |
344 | ||
345 | coding_mode = coding_mode_selector & 1; | |
346 | ||
347 | for (i = 0; i < nb_components; i++) { | |
348 | int coded_values_per_component, quant_step_index; | |
349 | ||
350 | for (b = 0; b <= num_bands; b++) | |
351 | band_flags[b] = get_bits1(gb); | |
352 | ||
353 | coded_values_per_component = get_bits(gb, 3); | |
354 | ||
355 | quant_step_index = get_bits(gb, 3); | |
356 | if (quant_step_index <= 1) | |
357 | return AVERROR_INVALIDDATA; | |
358 | ||
359 | if (coding_mode_selector == 3) | |
360 | coding_mode = get_bits1(gb); | |
361 | ||
362 | for (b = 0; b < (num_bands + 1) * 4; b++) { | |
363 | int coded_components; | |
364 | ||
365 | if (band_flags[b >> 2] == 0) | |
366 | continue; | |
367 | ||
368 | coded_components = get_bits(gb, 3); | |
369 | ||
370 | for (c = 0; c < coded_components; c++) { | |
371 | TonalComponent *cmp = &components[component_count]; | |
372 | int sf_index, coded_values, max_coded_values; | |
373 | float scale_factor; | |
374 | ||
375 | sf_index = get_bits(gb, 6); | |
376 | if (component_count >= 64) | |
377 | return AVERROR_INVALIDDATA; | |
378 | ||
379 | cmp->pos = b * 64 + get_bits(gb, 6); | |
380 | ||
381 | max_coded_values = SAMPLES_PER_FRAME - cmp->pos; | |
382 | coded_values = coded_values_per_component + 1; | |
383 | coded_values = FFMIN(max_coded_values, coded_values); | |
384 | ||
385 | scale_factor = ff_atrac_sf_table[sf_index] * | |
386 | inv_max_quant[quant_step_index]; | |
387 | ||
388 | read_quant_spectral_coeffs(gb, quant_step_index, coding_mode, | |
389 | mantissa, coded_values); | |
390 | ||
391 | cmp->num_coefs = coded_values; | |
392 | ||
393 | /* inverse quant */ | |
394 | for (m = 0; m < coded_values; m++) | |
395 | cmp->coef[m] = mantissa[m] * scale_factor; | |
396 | ||
397 | component_count++; | |
398 | } | |
399 | } | |
400 | } | |
401 | ||
402 | return component_count; | |
403 | } | |
404 | ||
405 | /** | |
406 | * Decode gain parameters for the coded bands | |
407 | * | |
408 | * @param block the gainblock for the current band | |
409 | * @param num_bands amount of coded bands | |
410 | */ | |
411 | static int decode_gain_control(GetBitContext *gb, GainBlock *block, | |
412 | int num_bands) | |
413 | { | |
414 | int b, j; | |
415 | int *level, *loc; | |
416 | ||
417 | AtracGainInfo *gain = block->g_block; | |
418 | ||
419 | for (b = 0; b <= num_bands; b++) { | |
420 | gain[b].num_points = get_bits(gb, 3); | |
421 | level = gain[b].lev_code; | |
422 | loc = gain[b].loc_code; | |
423 | ||
424 | for (j = 0; j < gain[b].num_points; j++) { | |
425 | level[j] = get_bits(gb, 4); | |
426 | loc[j] = get_bits(gb, 5); | |
427 | if (j && loc[j] <= loc[j - 1]) | |
428 | return AVERROR_INVALIDDATA; | |
429 | } | |
430 | } | |
431 | ||
432 | /* Clear the unused blocks. */ | |
433 | for (; b < 4 ; b++) | |
434 | gain[b].num_points = 0; | |
435 | ||
436 | return 0; | |
437 | } | |
438 | ||
439 | /** | |
440 | * Combine the tonal band spectrum and regular band spectrum | |
441 | * | |
442 | * @param spectrum output spectrum buffer | |
443 | * @param num_components number of tonal components | |
444 | * @param components tonal components for this band | |
445 | * @return position of the last tonal coefficient | |
446 | */ | |
447 | static int add_tonal_components(float *spectrum, int num_components, | |
448 | TonalComponent *components) | |
449 | { | |
450 | int i, j, last_pos = -1; | |
451 | float *input, *output; | |
452 | ||
453 | for (i = 0; i < num_components; i++) { | |
454 | last_pos = FFMAX(components[i].pos + components[i].num_coefs, last_pos); | |
455 | input = components[i].coef; | |
456 | output = &spectrum[components[i].pos]; | |
457 | ||
458 | for (j = 0; j < components[i].num_coefs; j++) | |
459 | output[j] += input[j]; | |
460 | } | |
461 | ||
462 | return last_pos; | |
463 | } | |
464 | ||
465 | #define INTERPOLATE(old, new, nsample) \ | |
466 | ((old) + (nsample) * 0.125 * ((new) - (old))) | |
467 | ||
468 | static void reverse_matrixing(float *su1, float *su2, int *prev_code, | |
469 | int *curr_code) | |
470 | { | |
471 | int i, nsample, band; | |
472 | float mc1_l, mc1_r, mc2_l, mc2_r; | |
473 | ||
474 | for (i = 0, band = 0; band < 4 * 256; band += 256, i++) { | |
475 | int s1 = prev_code[i]; | |
476 | int s2 = curr_code[i]; | |
477 | nsample = band; | |
478 | ||
479 | if (s1 != s2) { | |
480 | /* Selector value changed, interpolation needed. */ | |
481 | mc1_l = matrix_coeffs[s1 * 2 ]; | |
482 | mc1_r = matrix_coeffs[s1 * 2 + 1]; | |
483 | mc2_l = matrix_coeffs[s2 * 2 ]; | |
484 | mc2_r = matrix_coeffs[s2 * 2 + 1]; | |
485 | ||
486 | /* Interpolation is done over the first eight samples. */ | |
487 | for (; nsample < band + 8; nsample++) { | |
488 | float c1 = su1[nsample]; | |
489 | float c2 = su2[nsample]; | |
490 | c2 = c1 * INTERPOLATE(mc1_l, mc2_l, nsample - band) + | |
491 | c2 * INTERPOLATE(mc1_r, mc2_r, nsample - band); | |
492 | su1[nsample] = c2; | |
493 | su2[nsample] = c1 * 2.0 - c2; | |
494 | } | |
495 | } | |
496 | ||
497 | /* Apply the matrix without interpolation. */ | |
498 | switch (s2) { | |
499 | case 0: /* M/S decoding */ | |
500 | for (; nsample < band + 256; nsample++) { | |
501 | float c1 = su1[nsample]; | |
502 | float c2 = su2[nsample]; | |
503 | su1[nsample] = c2 * 2.0; | |
504 | su2[nsample] = (c1 - c2) * 2.0; | |
505 | } | |
506 | break; | |
507 | case 1: | |
508 | for (; nsample < band + 256; nsample++) { | |
509 | float c1 = su1[nsample]; | |
510 | float c2 = su2[nsample]; | |
511 | su1[nsample] = (c1 + c2) * 2.0; | |
512 | su2[nsample] = c2 * -2.0; | |
513 | } | |
514 | break; | |
515 | case 2: | |
516 | case 3: | |
517 | for (; nsample < band + 256; nsample++) { | |
518 | float c1 = su1[nsample]; | |
519 | float c2 = su2[nsample]; | |
520 | su1[nsample] = c1 + c2; | |
521 | su2[nsample] = c1 - c2; | |
522 | } | |
523 | break; | |
524 | default: | |
525 | av_assert1(0); | |
526 | } | |
527 | } | |
528 | } | |
529 | ||
530 | static void get_channel_weights(int index, int flag, float ch[2]) | |
531 | { | |
532 | if (index == 7) { | |
533 | ch[0] = 1.0; | |
534 | ch[1] = 1.0; | |
535 | } else { | |
536 | ch[0] = (index & 7) / 7.0; | |
537 | ch[1] = sqrt(2 - ch[0] * ch[0]); | |
538 | if (flag) | |
539 | FFSWAP(float, ch[0], ch[1]); | |
540 | } | |
541 | } | |
542 | ||
543 | static void channel_weighting(float *su1, float *su2, int *p3) | |
544 | { | |
545 | int band, nsample; | |
546 | /* w[x][y] y=0 is left y=1 is right */ | |
547 | float w[2][2]; | |
548 | ||
549 | if (p3[1] != 7 || p3[3] != 7) { | |
550 | get_channel_weights(p3[1], p3[0], w[0]); | |
551 | get_channel_weights(p3[3], p3[2], w[1]); | |
552 | ||
553 | for (band = 256; band < 4 * 256; band += 256) { | |
554 | for (nsample = band; nsample < band + 8; nsample++) { | |
555 | su1[nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample - band); | |
556 | su2[nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample - band); | |
557 | } | |
558 | for(; nsample < band + 256; nsample++) { | |
559 | su1[nsample] *= w[1][0]; | |
560 | su2[nsample] *= w[1][1]; | |
561 | } | |
562 | } | |
563 | } | |
564 | } | |
565 | ||
566 | /** | |
567 | * Decode a Sound Unit | |
568 | * | |
569 | * @param snd the channel unit to be used | |
570 | * @param output the decoded samples before IQMF in float representation | |
571 | * @param channel_num channel number | |
572 | * @param coding_mode the coding mode (JOINT_STEREO or regular stereo/mono) | |
573 | */ | |
574 | static int decode_channel_sound_unit(ATRAC3Context *q, GetBitContext *gb, | |
575 | ChannelUnit *snd, float *output, | |
576 | int channel_num, int coding_mode) | |
577 | { | |
578 | int band, ret, num_subbands, last_tonal, num_bands; | |
579 | GainBlock *gain1 = &snd->gain_block[ snd->gc_blk_switch]; | |
580 | GainBlock *gain2 = &snd->gain_block[1 - snd->gc_blk_switch]; | |
581 | ||
582 | if (coding_mode == JOINT_STEREO && channel_num == 1) { | |
583 | if (get_bits(gb, 2) != 3) { | |
584 | av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); | |
585 | return AVERROR_INVALIDDATA; | |
586 | } | |
587 | } else { | |
588 | if (get_bits(gb, 6) != 0x28) { | |
589 | av_log(NULL,AV_LOG_ERROR,"Sound Unit id != 0x28.\n"); | |
590 | return AVERROR_INVALIDDATA; | |
591 | } | |
592 | } | |
593 | ||
594 | /* number of coded QMF bands */ | |
595 | snd->bands_coded = get_bits(gb, 2); | |
596 | ||
597 | ret = decode_gain_control(gb, gain2, snd->bands_coded); | |
598 | if (ret) | |
599 | return ret; | |
600 | ||
601 | snd->num_components = decode_tonal_components(gb, snd->components, | |
602 | snd->bands_coded); | |
603 | if (snd->num_components < 0) | |
604 | return snd->num_components; | |
605 | ||
606 | num_subbands = decode_spectrum(gb, snd->spectrum); | |
607 | ||
608 | /* Merge the decoded spectrum and tonal components. */ | |
609 | last_tonal = add_tonal_components(snd->spectrum, snd->num_components, | |
610 | snd->components); | |
611 | ||
612 | ||
613 | /* calculate number of used MLT/QMF bands according to the amount of coded | |
614 | spectral lines */ | |
615 | num_bands = (subband_tab[num_subbands] - 1) >> 8; | |
616 | if (last_tonal >= 0) | |
617 | num_bands = FFMAX((last_tonal + 256) >> 8, num_bands); | |
618 | ||
619 | ||
620 | /* Reconstruct time domain samples. */ | |
621 | for (band = 0; band < 4; band++) { | |
622 | /* Perform the IMDCT step without overlapping. */ | |
623 | if (band <= num_bands) | |
624 | imlt(q, &snd->spectrum[band * 256], snd->imdct_buf, band & 1); | |
625 | else | |
626 | memset(snd->imdct_buf, 0, 512 * sizeof(*snd->imdct_buf)); | |
627 | ||
628 | /* gain compensation and overlapping */ | |
629 | ff_atrac_gain_compensation(&q->gainc_ctx, snd->imdct_buf, | |
630 | &snd->prev_frame[band * 256], | |
631 | &gain1->g_block[band], &gain2->g_block[band], | |
632 | 256, &output[band * 256]); | |
633 | } | |
634 | ||
635 | /* Swap the gain control buffers for the next frame. */ | |
636 | snd->gc_blk_switch ^= 1; | |
637 | ||
638 | return 0; | |
639 | } | |
640 | ||
641 | static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf, | |
642 | float **out_samples) | |
643 | { | |
644 | ATRAC3Context *q = avctx->priv_data; | |
645 | int ret, i; | |
646 | uint8_t *ptr1; | |
647 | ||
648 | if (q->coding_mode == JOINT_STEREO) { | |
649 | /* channel coupling mode */ | |
650 | /* decode Sound Unit 1 */ | |
651 | init_get_bits(&q->gb, databuf, avctx->block_align * 8); | |
652 | ||
653 | ret = decode_channel_sound_unit(q, &q->gb, q->units, out_samples[0], 0, | |
654 | JOINT_STEREO); | |
655 | if (ret != 0) | |
656 | return ret; | |
657 | ||
658 | /* Framedata of the su2 in the joint-stereo mode is encoded in | |
659 | * reverse byte order so we need to swap it first. */ | |
660 | if (databuf == q->decoded_bytes_buffer) { | |
661 | uint8_t *ptr2 = q->decoded_bytes_buffer + avctx->block_align - 1; | |
662 | ptr1 = q->decoded_bytes_buffer; | |
663 | for (i = 0; i < avctx->block_align / 2; i++, ptr1++, ptr2--) | |
664 | FFSWAP(uint8_t, *ptr1, *ptr2); | |
665 | } else { | |
666 | const uint8_t *ptr2 = databuf + avctx->block_align - 1; | |
667 | for (i = 0; i < avctx->block_align; i++) | |
668 | q->decoded_bytes_buffer[i] = *ptr2--; | |
669 | } | |
670 | ||
671 | /* Skip the sync codes (0xF8). */ | |
672 | ptr1 = q->decoded_bytes_buffer; | |
673 | for (i = 4; *ptr1 == 0xF8; i++, ptr1++) { | |
674 | if (i >= avctx->block_align) | |
675 | return AVERROR_INVALIDDATA; | |
676 | } | |
677 | ||
678 | ||
679 | /* set the bitstream reader at the start of the second Sound Unit*/ | |
680 | init_get_bits8(&q->gb, ptr1, q->decoded_bytes_buffer + avctx->block_align - ptr1); | |
681 | ||
682 | /* Fill the Weighting coeffs delay buffer */ | |
683 | memmove(q->weighting_delay, &q->weighting_delay[2], | |
684 | 4 * sizeof(*q->weighting_delay)); | |
685 | q->weighting_delay[4] = get_bits1(&q->gb); | |
686 | q->weighting_delay[5] = get_bits(&q->gb, 3); | |
687 | ||
688 | for (i = 0; i < 4; i++) { | |
689 | q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i]; | |
690 | q->matrix_coeff_index_now[i] = q->matrix_coeff_index_next[i]; | |
691 | q->matrix_coeff_index_next[i] = get_bits(&q->gb, 2); | |
692 | } | |
693 | ||
694 | /* Decode Sound Unit 2. */ | |
695 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[1], | |
696 | out_samples[1], 1, JOINT_STEREO); | |
697 | if (ret != 0) | |
698 | return ret; | |
699 | ||
700 | /* Reconstruct the channel coefficients. */ | |
701 | reverse_matrixing(out_samples[0], out_samples[1], | |
702 | q->matrix_coeff_index_prev, | |
703 | q->matrix_coeff_index_now); | |
704 | ||
705 | channel_weighting(out_samples[0], out_samples[1], q->weighting_delay); | |
706 | } else { | |
707 | /* normal stereo mode or mono */ | |
708 | /* Decode the channel sound units. */ | |
709 | for (i = 0; i < avctx->channels; i++) { | |
710 | /* Set the bitstream reader at the start of a channel sound unit. */ | |
711 | init_get_bits(&q->gb, | |
712 | databuf + i * avctx->block_align / avctx->channels, | |
713 | avctx->block_align * 8 / avctx->channels); | |
714 | ||
715 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[i], | |
716 | out_samples[i], i, q->coding_mode); | |
717 | if (ret != 0) | |
718 | return ret; | |
719 | } | |
720 | } | |
721 | ||
722 | /* Apply the iQMF synthesis filter. */ | |
723 | for (i = 0; i < avctx->channels; i++) { | |
724 | float *p1 = out_samples[i]; | |
725 | float *p2 = p1 + 256; | |
726 | float *p3 = p2 + 256; | |
727 | float *p4 = p3 + 256; | |
728 | ff_atrac_iqmf(p1, p2, 256, p1, q->units[i].delay_buf1, q->temp_buf); | |
729 | ff_atrac_iqmf(p4, p3, 256, p3, q->units[i].delay_buf2, q->temp_buf); | |
730 | ff_atrac_iqmf(p1, p3, 512, p1, q->units[i].delay_buf3, q->temp_buf); | |
731 | } | |
732 | ||
733 | return 0; | |
734 | } | |
735 | ||
736 | static int atrac3_decode_frame(AVCodecContext *avctx, void *data, | |
737 | int *got_frame_ptr, AVPacket *avpkt) | |
738 | { | |
739 | AVFrame *frame = data; | |
740 | const uint8_t *buf = avpkt->data; | |
741 | int buf_size = avpkt->size; | |
742 | ATRAC3Context *q = avctx->priv_data; | |
743 | int ret; | |
744 | const uint8_t *databuf; | |
745 | ||
746 | if (buf_size < avctx->block_align) { | |
747 | av_log(avctx, AV_LOG_ERROR, | |
748 | "Frame too small (%d bytes). Truncated file?\n", buf_size); | |
749 | return AVERROR_INVALIDDATA; | |
750 | } | |
751 | ||
752 | /* get output buffer */ | |
753 | frame->nb_samples = SAMPLES_PER_FRAME; | |
754 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) | |
755 | return ret; | |
756 | ||
757 | /* Check if we need to descramble and what buffer to pass on. */ | |
758 | if (q->scrambled_stream) { | |
759 | decode_bytes(buf, q->decoded_bytes_buffer, avctx->block_align); | |
760 | databuf = q->decoded_bytes_buffer; | |
761 | } else { | |
762 | databuf = buf; | |
763 | } | |
764 | ||
765 | ret = decode_frame(avctx, databuf, (float **)frame->extended_data); | |
766 | if (ret) { | |
767 | av_log(NULL, AV_LOG_ERROR, "Frame decoding error!\n"); | |
768 | return ret; | |
769 | } | |
770 | ||
771 | *got_frame_ptr = 1; | |
772 | ||
773 | return avctx->block_align; | |
774 | } | |
775 | ||
776 | static av_cold void atrac3_init_static_data(void) | |
777 | { | |
778 | int i; | |
779 | ||
780 | init_imdct_window(); | |
781 | ff_atrac_generate_tables(); | |
782 | ||
783 | /* Initialize the VLC tables. */ | |
784 | for (i = 0; i < 7; i++) { | |
785 | spectral_coeff_tab[i].table = &atrac3_vlc_table[atrac3_vlc_offs[i]]; | |
786 | spectral_coeff_tab[i].table_allocated = atrac3_vlc_offs[i + 1] - | |
787 | atrac3_vlc_offs[i ]; | |
788 | init_vlc(&spectral_coeff_tab[i], 9, huff_tab_sizes[i], | |
789 | huff_bits[i], 1, 1, | |
790 | huff_codes[i], 1, 1, INIT_VLC_USE_NEW_STATIC); | |
791 | } | |
792 | } | |
793 | ||
794 | static av_cold int atrac3_decode_init(AVCodecContext *avctx) | |
795 | { | |
796 | static int static_init_done; | |
797 | int i, ret; | |
798 | int version, delay, samples_per_frame, frame_factor; | |
799 | const uint8_t *edata_ptr = avctx->extradata; | |
800 | ATRAC3Context *q = avctx->priv_data; | |
801 | ||
802 | if (avctx->channels <= 0 || avctx->channels > 2) { | |
803 | av_log(avctx, AV_LOG_ERROR, "Channel configuration error!\n"); | |
804 | return AVERROR(EINVAL); | |
805 | } | |
806 | ||
807 | if (!static_init_done) | |
808 | atrac3_init_static_data(); | |
809 | static_init_done = 1; | |
810 | ||
811 | /* Take care of the codec-specific extradata. */ | |
812 | if (avctx->extradata_size == 14) { | |
813 | /* Parse the extradata, WAV format */ | |
814 | av_log(avctx, AV_LOG_DEBUG, "[0-1] %d\n", | |
815 | bytestream_get_le16(&edata_ptr)); // Unknown value always 1 | |
816 | edata_ptr += 4; // samples per channel | |
817 | q->coding_mode = bytestream_get_le16(&edata_ptr); | |
818 | av_log(avctx, AV_LOG_DEBUG,"[8-9] %d\n", | |
819 | bytestream_get_le16(&edata_ptr)); //Dupe of coding mode | |
820 | frame_factor = bytestream_get_le16(&edata_ptr); // Unknown always 1 | |
821 | av_log(avctx, AV_LOG_DEBUG,"[12-13] %d\n", | |
822 | bytestream_get_le16(&edata_ptr)); // Unknown always 0 | |
823 | ||
824 | /* setup */ | |
825 | samples_per_frame = SAMPLES_PER_FRAME * avctx->channels; | |
826 | version = 4; | |
827 | delay = 0x88E; | |
828 | q->coding_mode = q->coding_mode ? JOINT_STEREO : STEREO; | |
829 | q->scrambled_stream = 0; | |
830 | ||
831 | if (avctx->block_align != 96 * avctx->channels * frame_factor && | |
832 | avctx->block_align != 152 * avctx->channels * frame_factor && | |
833 | avctx->block_align != 192 * avctx->channels * frame_factor) { | |
834 | av_log(avctx, AV_LOG_ERROR, "Unknown frame/channel/frame_factor " | |
835 | "configuration %d/%d/%d\n", avctx->block_align, | |
836 | avctx->channels, frame_factor); | |
837 | return AVERROR_INVALIDDATA; | |
838 | } | |
839 | } else if (avctx->extradata_size == 12 || avctx->extradata_size == 10) { | |
840 | /* Parse the extradata, RM format. */ | |
841 | version = bytestream_get_be32(&edata_ptr); | |
842 | samples_per_frame = bytestream_get_be16(&edata_ptr); | |
843 | delay = bytestream_get_be16(&edata_ptr); | |
844 | q->coding_mode = bytestream_get_be16(&edata_ptr); | |
845 | q->scrambled_stream = 1; | |
846 | ||
847 | } else { | |
848 | av_log(NULL, AV_LOG_ERROR, "Unknown extradata size %d.\n", | |
849 | avctx->extradata_size); | |
850 | return AVERROR(EINVAL); | |
851 | } | |
852 | ||
853 | /* Check the extradata */ | |
854 | ||
855 | if (version != 4) { | |
856 | av_log(avctx, AV_LOG_ERROR, "Version %d != 4.\n", version); | |
857 | return AVERROR_INVALIDDATA; | |
858 | } | |
859 | ||
860 | if (samples_per_frame != SAMPLES_PER_FRAME && | |
861 | samples_per_frame != SAMPLES_PER_FRAME * 2) { | |
862 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of samples per frame %d.\n", | |
863 | samples_per_frame); | |
864 | return AVERROR_INVALIDDATA; | |
865 | } | |
866 | ||
867 | if (delay != 0x88E) { | |
868 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of delay %x != 0x88E.\n", | |
869 | delay); | |
870 | return AVERROR_INVALIDDATA; | |
871 | } | |
872 | ||
873 | if (q->coding_mode == STEREO) | |
874 | av_log(avctx, AV_LOG_DEBUG, "Normal stereo detected.\n"); | |
875 | else if (q->coding_mode == JOINT_STEREO) { | |
876 | if (avctx->channels != 2) { | |
877 | av_log(avctx, AV_LOG_ERROR, "Invalid coding mode\n"); | |
878 | return AVERROR_INVALIDDATA; | |
879 | } | |
880 | av_log(avctx, AV_LOG_DEBUG, "Joint stereo detected.\n"); | |
881 | } else { | |
882 | av_log(avctx, AV_LOG_ERROR, "Unknown channel coding mode %x!\n", | |
883 | q->coding_mode); | |
884 | return AVERROR_INVALIDDATA; | |
885 | } | |
886 | ||
887 | if (avctx->block_align >= UINT_MAX / 2) | |
888 | return AVERROR(EINVAL); | |
889 | ||
890 | q->decoded_bytes_buffer = av_mallocz(FFALIGN(avctx->block_align, 4) + | |
891 | FF_INPUT_BUFFER_PADDING_SIZE); | |
892 | if (!q->decoded_bytes_buffer) | |
893 | return AVERROR(ENOMEM); | |
894 | ||
895 | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; | |
896 | ||
897 | /* initialize the MDCT transform */ | |
898 | if ((ret = ff_mdct_init(&q->mdct_ctx, 9, 1, 1.0 / 32768)) < 0) { | |
899 | av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); | |
900 | av_freep(&q->decoded_bytes_buffer); | |
901 | return ret; | |
902 | } | |
903 | ||
904 | /* init the joint-stereo decoding data */ | |
905 | q->weighting_delay[0] = 0; | |
906 | q->weighting_delay[1] = 7; | |
907 | q->weighting_delay[2] = 0; | |
908 | q->weighting_delay[3] = 7; | |
909 | q->weighting_delay[4] = 0; | |
910 | q->weighting_delay[5] = 7; | |
911 | ||
912 | for (i = 0; i < 4; i++) { | |
913 | q->matrix_coeff_index_prev[i] = 3; | |
914 | q->matrix_coeff_index_now[i] = 3; | |
915 | q->matrix_coeff_index_next[i] = 3; | |
916 | } | |
917 | ||
918 | ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3); | |
f6fa7814 | 919 | q->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT); |
2ba45a60 DM |
920 | ff_fmt_convert_init(&q->fmt_conv, avctx); |
921 | ||
922 | q->units = av_mallocz_array(avctx->channels, sizeof(*q->units)); | |
f6fa7814 | 923 | if (!q->units || !q->fdsp) { |
2ba45a60 DM |
924 | atrac3_decode_close(avctx); |
925 | return AVERROR(ENOMEM); | |
926 | } | |
927 | ||
928 | return 0; | |
929 | } | |
930 | ||
931 | AVCodec ff_atrac3_decoder = { | |
932 | .name = "atrac3", | |
933 | .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"), | |
934 | .type = AVMEDIA_TYPE_AUDIO, | |
935 | .id = AV_CODEC_ID_ATRAC3, | |
936 | .priv_data_size = sizeof(ATRAC3Context), | |
937 | .init = atrac3_decode_init, | |
938 | .close = atrac3_decode_close, | |
939 | .decode = atrac3_decode_frame, | |
940 | .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1, | |
941 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, | |
942 | AV_SAMPLE_FMT_NONE }, | |
943 | }; |