Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * DCA encoder | |
3 | * Copyright (C) 2008-2012 Alexander E. Patrakov | |
4 | * 2010 Benjamin Larsson | |
5 | * 2011 Xiang Wang | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | #include "libavutil/avassert.h" | |
25 | #include "libavutil/channel_layout.h" | |
26 | #include "libavutil/common.h" | |
27 | #include "avcodec.h" | |
28 | #include "dca.h" | |
29 | #include "dcadata.h" | |
30 | #include "dcaenc.h" | |
31 | #include "internal.h" | |
32 | #include "mathops.h" | |
33 | #include "put_bits.h" | |
34 | ||
35 | #define MAX_CHANNELS 6 | |
36 | #define DCA_MAX_FRAME_SIZE 16384 | |
37 | #define DCA_HEADER_SIZE 13 | |
38 | #define DCA_LFE_SAMPLES 8 | |
39 | ||
40 | #define DCA_SUBBANDS 32 | |
41 | #define SUBFRAMES 1 | |
42 | #define SUBSUBFRAMES 2 | |
43 | #define SUBBAND_SAMPLES (SUBFRAMES * SUBSUBFRAMES * 8) | |
44 | #define AUBANDS 25 | |
45 | ||
46 | typedef struct DCAContext { | |
47 | PutBitContext pb; | |
48 | int frame_size; | |
49 | int frame_bits; | |
50 | int fullband_channels; | |
51 | int channels; | |
52 | int lfe_channel; | |
53 | int samplerate_index; | |
54 | int bitrate_index; | |
55 | int channel_config; | |
56 | const int32_t *band_interpolation; | |
57 | const int32_t *band_spectrum; | |
58 | int lfe_scale_factor; | |
59 | softfloat lfe_quant; | |
60 | int32_t lfe_peak_cb; | |
61 | ||
62 | int32_t history[512][MAX_CHANNELS]; /* This is a circular buffer */ | |
63 | int32_t subband[SUBBAND_SAMPLES][DCA_SUBBANDS][MAX_CHANNELS]; | |
64 | int32_t quantized[SUBBAND_SAMPLES][DCA_SUBBANDS][MAX_CHANNELS]; | |
65 | int32_t peak_cb[DCA_SUBBANDS][MAX_CHANNELS]; | |
66 | int32_t downsampled_lfe[DCA_LFE_SAMPLES]; | |
67 | int32_t masking_curve_cb[SUBSUBFRAMES][256]; | |
68 | int abits[DCA_SUBBANDS][MAX_CHANNELS]; | |
69 | int scale_factor[DCA_SUBBANDS][MAX_CHANNELS]; | |
70 | softfloat quant[DCA_SUBBANDS][MAX_CHANNELS]; | |
71 | int32_t eff_masking_curve_cb[256]; | |
72 | int32_t band_masking_cb[32]; | |
73 | int32_t worst_quantization_noise; | |
74 | int32_t worst_noise_ever; | |
75 | int consumed_bits; | |
76 | } DCAContext; | |
77 | ||
78 | static int32_t cos_table[2048]; | |
79 | static int32_t band_interpolation[2][512]; | |
80 | static int32_t band_spectrum[2][8]; | |
81 | static int32_t auf[9][AUBANDS][256]; | |
82 | static int32_t cb_to_add[256]; | |
83 | static int32_t cb_to_level[2048]; | |
84 | static int32_t lfe_fir_64i[512]; | |
85 | ||
86 | /* Transfer function of outer and middle ear, Hz -> dB */ | |
87 | static double hom(double f) | |
88 | { | |
89 | double f1 = f / 1000; | |
90 | ||
91 | return -3.64 * pow(f1, -0.8) | |
92 | + 6.8 * exp(-0.6 * (f1 - 3.4) * (f1 - 3.4)) | |
93 | - 6.0 * exp(-0.15 * (f1 - 8.7) * (f1 - 8.7)) | |
94 | - 0.0006 * (f1 * f1) * (f1 * f1); | |
95 | } | |
96 | ||
97 | static double gammafilter(int i, double f) | |
98 | { | |
99 | double h = (f - fc[i]) / erb[i]; | |
100 | ||
101 | h = 1 + h * h; | |
102 | h = 1 / (h * h); | |
103 | return 20 * log10(h); | |
104 | } | |
105 | ||
106 | static int encode_init(AVCodecContext *avctx) | |
107 | { | |
108 | DCAContext *c = avctx->priv_data; | |
109 | uint64_t layout = avctx->channel_layout; | |
110 | int i, min_frame_bits; | |
111 | ||
112 | c->fullband_channels = c->channels = avctx->channels; | |
113 | c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6); | |
114 | c->band_interpolation = band_interpolation[1]; | |
115 | c->band_spectrum = band_spectrum[1]; | |
116 | c->worst_quantization_noise = -2047; | |
117 | c->worst_noise_ever = -2047; | |
118 | ||
119 | if (!layout) { | |
120 | av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The " | |
121 | "encoder will guess the layout, but it " | |
122 | "might be incorrect.\n"); | |
123 | layout = av_get_default_channel_layout(avctx->channels); | |
124 | } | |
125 | switch (layout) { | |
126 | case AV_CH_LAYOUT_MONO: c->channel_config = 0; break; | |
127 | case AV_CH_LAYOUT_STEREO: c->channel_config = 2; break; | |
128 | case AV_CH_LAYOUT_2_2: c->channel_config = 8; break; | |
129 | case AV_CH_LAYOUT_5POINT0: c->channel_config = 9; break; | |
130 | case AV_CH_LAYOUT_5POINT1: c->channel_config = 9; break; | |
131 | default: | |
132 | av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout!\n"); | |
133 | return AVERROR_PATCHWELCOME; | |
134 | } | |
135 | ||
136 | if (c->lfe_channel) | |
137 | c->fullband_channels--; | |
138 | ||
139 | for (i = 0; i < 9; i++) { | |
140 | if (sample_rates[i] == avctx->sample_rate) | |
141 | break; | |
142 | } | |
143 | if (i == 9) | |
144 | return AVERROR(EINVAL); | |
145 | c->samplerate_index = i; | |
146 | ||
147 | if (avctx->bit_rate < 32000 || avctx->bit_rate > 3840000) { | |
148 | av_log(avctx, AV_LOG_ERROR, "Bit rate %i not supported.", avctx->bit_rate); | |
149 | return AVERROR(EINVAL); | |
150 | } | |
151 | for (i = 0; dca_bit_rates[i] < avctx->bit_rate; i++) | |
152 | ; | |
153 | c->bitrate_index = i; | |
154 | avctx->bit_rate = dca_bit_rates[i]; | |
155 | c->frame_bits = FFALIGN((avctx->bit_rate * 512 + avctx->sample_rate - 1) / avctx->sample_rate, 32); | |
156 | min_frame_bits = 132 + (493 + 28 * 32) * c->fullband_channels + c->lfe_channel * 72; | |
157 | if (c->frame_bits < min_frame_bits || c->frame_bits > (DCA_MAX_FRAME_SIZE << 3)) | |
158 | return AVERROR(EINVAL); | |
159 | ||
160 | c->frame_size = (c->frame_bits + 7) / 8; | |
161 | ||
162 | avctx->frame_size = 32 * SUBBAND_SAMPLES; | |
163 | ||
164 | if (!cos_table[0]) { | |
165 | int j, k; | |
166 | ||
167 | for (i = 0; i < 2048; i++) { | |
168 | cos_table[i] = (int32_t)(0x7fffffff * cos(M_PI * i / 1024)); | |
169 | cb_to_level[i] = (int32_t)(0x7fffffff * pow(10, -0.005 * i)); | |
170 | } | |
171 | ||
172 | /* FIXME: probably incorrect */ | |
173 | for (i = 0; i < 256; i++) { | |
174 | lfe_fir_64i[i] = (int32_t)(0x01ffffff * lfe_fir_64[i]); | |
175 | lfe_fir_64i[511 - i] = (int32_t)(0x01ffffff * lfe_fir_64[i]); | |
176 | } | |
177 | ||
178 | for (i = 0; i < 512; i++) { | |
179 | band_interpolation[0][i] = (int32_t)(0x1000000000ULL * fir_32bands_perfect[i]); | |
180 | band_interpolation[1][i] = (int32_t)(0x1000000000ULL * fir_32bands_nonperfect[i]); | |
181 | } | |
182 | ||
183 | for (i = 0; i < 9; i++) { | |
184 | for (j = 0; j < AUBANDS; j++) { | |
185 | for (k = 0; k < 256; k++) { | |
186 | double freq = sample_rates[i] * (k + 0.5) / 512; | |
187 | ||
188 | auf[i][j][k] = (int32_t)(10 * (hom(freq) + gammafilter(j, freq))); | |
189 | } | |
190 | } | |
191 | } | |
192 | ||
193 | for (i = 0; i < 256; i++) { | |
194 | double add = 1 + pow(10, -0.01 * i); | |
195 | cb_to_add[i] = (int32_t)(100 * log10(add)); | |
196 | } | |
197 | for (j = 0; j < 8; j++) { | |
198 | double accum = 0; | |
199 | for (i = 0; i < 512; i++) { | |
200 | double reconst = fir_32bands_perfect[i] * ((i & 64) ? (-1) : 1); | |
201 | accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512); | |
202 | } | |
203 | band_spectrum[0][j] = (int32_t)(200 * log10(accum)); | |
204 | } | |
205 | for (j = 0; j < 8; j++) { | |
206 | double accum = 0; | |
207 | for (i = 0; i < 512; i++) { | |
208 | double reconst = fir_32bands_nonperfect[i] * ((i & 64) ? (-1) : 1); | |
209 | accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512); | |
210 | } | |
211 | band_spectrum[1][j] = (int32_t)(200 * log10(accum)); | |
212 | } | |
213 | } | |
214 | return 0; | |
215 | } | |
216 | ||
217 | static inline int32_t cos_t(int x) | |
218 | { | |
219 | return cos_table[x & 2047]; | |
220 | } | |
221 | ||
222 | static inline int32_t sin_t(int x) | |
223 | { | |
224 | return cos_t(x - 512); | |
225 | } | |
226 | ||
227 | static inline int32_t half32(int32_t a) | |
228 | { | |
229 | return (a + 1) >> 1; | |
230 | } | |
231 | ||
232 | static inline int32_t mul32(int32_t a, int32_t b) | |
233 | { | |
234 | int64_t r = (int64_t)a * b + 0x80000000ULL; | |
235 | return r >> 32; | |
236 | } | |
237 | ||
238 | static void subband_transform(DCAContext *c, const int32_t *input) | |
239 | { | |
240 | int ch, subs, i, k, j; | |
241 | ||
242 | for (ch = 0; ch < c->fullband_channels; ch++) { | |
243 | /* History is copied because it is also needed for PSY */ | |
244 | int32_t hist[512]; | |
245 | int hist_start = 0; | |
246 | ||
247 | for (i = 0; i < 512; i++) | |
248 | hist[i] = c->history[i][ch]; | |
249 | ||
250 | for (subs = 0; subs < SUBBAND_SAMPLES; subs++) { | |
251 | int32_t accum[64]; | |
252 | int32_t resp; | |
253 | int band; | |
254 | ||
255 | /* Calculate the convolutions at once */ | |
256 | for (i = 0; i < 64; i++) | |
257 | accum[i] = 0; | |
258 | ||
259 | for (k = 0, i = hist_start, j = 0; | |
260 | i < 512; k = (k + 1) & 63, i++, j++) | |
261 | accum[k] += mul32(hist[i], c->band_interpolation[j]); | |
262 | for (i = 0; i < hist_start; k = (k + 1) & 63, i++, j++) | |
263 | accum[k] += mul32(hist[i], c->band_interpolation[j]); | |
264 | ||
265 | for (k = 16; k < 32; k++) | |
266 | accum[k] = accum[k] - accum[31 - k]; | |
267 | for (k = 32; k < 48; k++) | |
268 | accum[k] = accum[k] + accum[95 - k]; | |
269 | ||
270 | for (band = 0; band < 32; band++) { | |
271 | resp = 0; | |
272 | for (i = 16; i < 48; i++) { | |
273 | int s = (2 * band + 1) * (2 * (i + 16) + 1); | |
274 | resp += mul32(accum[i], cos_t(s << 3)) >> 3; | |
275 | } | |
276 | ||
277 | c->subband[subs][band][ch] = ((band + 1) & 2) ? -resp : resp; | |
278 | } | |
279 | ||
280 | /* Copy in 32 new samples from input */ | |
281 | for (i = 0; i < 32; i++) | |
282 | hist[i + hist_start] = input[(subs * 32 + i) * c->channels + ch]; | |
283 | hist_start = (hist_start + 32) & 511; | |
284 | } | |
285 | } | |
286 | } | |
287 | ||
288 | static void lfe_downsample(DCAContext *c, const int32_t *input) | |
289 | { | |
290 | /* FIXME: make 128x LFE downsampling possible */ | |
291 | int i, j, lfes; | |
292 | int32_t hist[512]; | |
293 | int32_t accum; | |
294 | int hist_start = 0; | |
295 | ||
296 | for (i = 0; i < 512; i++) | |
297 | hist[i] = c->history[i][c->channels - 1]; | |
298 | ||
299 | for (lfes = 0; lfes < DCA_LFE_SAMPLES; lfes++) { | |
300 | /* Calculate the convolution */ | |
301 | accum = 0; | |
302 | ||
303 | for (i = hist_start, j = 0; i < 512; i++, j++) | |
304 | accum += mul32(hist[i], lfe_fir_64i[j]); | |
305 | for (i = 0; i < hist_start; i++, j++) | |
306 | accum += mul32(hist[i], lfe_fir_64i[j]); | |
307 | ||
308 | c->downsampled_lfe[lfes] = accum; | |
309 | ||
310 | /* Copy in 64 new samples from input */ | |
311 | for (i = 0; i < 64; i++) | |
312 | hist[i + hist_start] = input[(lfes * 64 + i) * c->channels + c->channels - 1]; | |
313 | ||
314 | hist_start = (hist_start + 64) & 511; | |
315 | } | |
316 | } | |
317 | ||
318 | typedef struct { | |
319 | int32_t re; | |
320 | int32_t im; | |
321 | } cplx32; | |
322 | ||
323 | static void fft(const int32_t in[2 * 256], cplx32 out[256]) | |
324 | { | |
325 | cplx32 buf[256], rin[256], rout[256]; | |
326 | int i, j, k, l; | |
327 | ||
328 | /* do two transforms in parallel */ | |
329 | for (i = 0; i < 256; i++) { | |
330 | /* Apply the Hann window */ | |
331 | rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >> 1)); | |
332 | rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i + 6) >> 1)); | |
333 | } | |
334 | /* pre-rotation */ | |
335 | for (i = 0; i < 256; i++) { | |
336 | buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re) | |
337 | - mul32(sin_t(4 * i + 2), rin[i].im); | |
338 | buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im) | |
339 | + mul32(sin_t(4 * i + 2), rin[i].re); | |
340 | } | |
341 | ||
342 | for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) { | |
343 | for (k = 0; k < 256; k += j) { | |
344 | for (i = k; i < k + j / 2; i++) { | |
345 | cplx32 sum, diff; | |
346 | int t = 8 * l * i; | |
347 | ||
348 | sum.re = buf[i].re + buf[i + j / 2].re; | |
349 | sum.im = buf[i].im + buf[i + j / 2].im; | |
350 | ||
351 | diff.re = buf[i].re - buf[i + j / 2].re; | |
352 | diff.im = buf[i].im - buf[i + j / 2].im; | |
353 | ||
354 | buf[i].re = half32(sum.re); | |
355 | buf[i].im = half32(sum.im); | |
356 | ||
357 | buf[i + j / 2].re = mul32(diff.re, cos_t(t)) | |
358 | - mul32(diff.im, sin_t(t)); | |
359 | buf[i + j / 2].im = mul32(diff.im, cos_t(t)) | |
360 | + mul32(diff.re, sin_t(t)); | |
361 | } | |
362 | } | |
363 | } | |
364 | /* post-rotation */ | |
365 | for (i = 0; i < 256; i++) { | |
366 | int b = ff_reverse[i]; | |
367 | rout[i].re = mul32(buf[b].re, cos_t(4 * i)) | |
368 | - mul32(buf[b].im, sin_t(4 * i)); | |
369 | rout[i].im = mul32(buf[b].im, cos_t(4 * i)) | |
370 | + mul32(buf[b].re, sin_t(4 * i)); | |
371 | } | |
372 | for (i = 0; i < 256; i++) { | |
373 | /* separate the results of the two transforms */ | |
374 | cplx32 o1, o2; | |
375 | ||
376 | o1.re = rout[i].re - rout[255 - i].re; | |
377 | o1.im = rout[i].im + rout[255 - i].im; | |
378 | ||
379 | o2.re = rout[i].im - rout[255 - i].im; | |
380 | o2.im = -rout[i].re - rout[255 - i].re; | |
381 | ||
382 | /* combine them into one long transform */ | |
383 | out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1)) | |
384 | + mul32( o1.im - o2.im, sin_t(2 * i + 1)); | |
385 | out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1)) | |
386 | + mul32(-o1.re + o2.re, sin_t(2 * i + 1)); | |
387 | } | |
388 | } | |
389 | ||
390 | static int32_t get_cb(int32_t in) | |
391 | { | |
392 | int i, res; | |
393 | ||
394 | res = 0; | |
395 | if (in < 0) | |
396 | in = -in; | |
397 | for (i = 1024; i > 0; i >>= 1) { | |
398 | if (cb_to_level[i + res] >= in) | |
399 | res += i; | |
400 | } | |
401 | return -res; | |
402 | } | |
403 | ||
404 | static int32_t add_cb(int32_t a, int32_t b) | |
405 | { | |
406 | if (a < b) | |
407 | FFSWAP(int32_t, a, b); | |
408 | ||
409 | if (a - b >= 256) | |
410 | return a; | |
411 | return a + cb_to_add[a - b]; | |
412 | } | |
413 | ||
414 | static void adjust_jnd(int samplerate_index, | |
415 | const int32_t in[512], int32_t out_cb[256]) | |
416 | { | |
417 | int32_t power[256]; | |
418 | cplx32 out[256]; | |
419 | int32_t out_cb_unnorm[256]; | |
420 | int32_t denom; | |
421 | const int32_t ca_cb = -1114; | |
422 | const int32_t cs_cb = 928; | |
423 | int i, j; | |
424 | ||
425 | fft(in, out); | |
426 | ||
427 | for (j = 0; j < 256; j++) { | |
428 | power[j] = add_cb(get_cb(out[j].re), get_cb(out[j].im)); | |
429 | out_cb_unnorm[j] = -2047; /* and can only grow */ | |
430 | } | |
431 | ||
432 | for (i = 0; i < AUBANDS; i++) { | |
433 | denom = ca_cb; /* and can only grow */ | |
434 | for (j = 0; j < 256; j++) | |
435 | denom = add_cb(denom, power[j] + auf[samplerate_index][i][j]); | |
436 | for (j = 0; j < 256; j++) | |
437 | out_cb_unnorm[j] = add_cb(out_cb_unnorm[j], | |
438 | -denom + auf[samplerate_index][i][j]); | |
439 | } | |
440 | ||
441 | for (j = 0; j < 256; j++) | |
442 | out_cb[j] = add_cb(out_cb[j], -out_cb_unnorm[j] - ca_cb - cs_cb); | |
443 | } | |
444 | ||
445 | typedef void (*walk_band_t)(DCAContext *c, int band1, int band2, int f, | |
446 | int32_t spectrum1, int32_t spectrum2, int channel, | |
447 | int32_t * arg); | |
448 | ||
449 | static void walk_band_low(DCAContext *c, int band, int channel, | |
450 | walk_band_t walk, int32_t *arg) | |
451 | { | |
452 | int f; | |
453 | ||
454 | if (band == 0) { | |
455 | for (f = 0; f < 4; f++) | |
456 | walk(c, 0, 0, f, 0, -2047, channel, arg); | |
457 | } else { | |
458 | for (f = 0; f < 8; f++) | |
459 | walk(c, band, band - 1, 8 * band - 4 + f, | |
460 | c->band_spectrum[7 - f], c->band_spectrum[f], channel, arg); | |
461 | } | |
462 | } | |
463 | ||
464 | static void walk_band_high(DCAContext *c, int band, int channel, | |
465 | walk_band_t walk, int32_t *arg) | |
466 | { | |
467 | int f; | |
468 | ||
469 | if (band == 31) { | |
470 | for (f = 0; f < 4; f++) | |
471 | walk(c, 31, 31, 256 - 4 + f, 0, -2047, channel, arg); | |
472 | } else { | |
473 | for (f = 0; f < 8; f++) | |
474 | walk(c, band, band + 1, 8 * band + 4 + f, | |
475 | c->band_spectrum[f], c->band_spectrum[7 - f], channel, arg); | |
476 | } | |
477 | } | |
478 | ||
479 | static void update_band_masking(DCAContext *c, int band1, int band2, | |
480 | int f, int32_t spectrum1, int32_t spectrum2, | |
481 | int channel, int32_t * arg) | |
482 | { | |
483 | int32_t value = c->eff_masking_curve_cb[f] - spectrum1; | |
484 | ||
485 | if (value < c->band_masking_cb[band1]) | |
486 | c->band_masking_cb[band1] = value; | |
487 | } | |
488 | ||
489 | static void calc_masking(DCAContext *c, const int32_t *input) | |
490 | { | |
491 | int i, k, band, ch, ssf; | |
492 | int32_t data[512]; | |
493 | ||
494 | for (i = 0; i < 256; i++) | |
495 | for (ssf = 0; ssf < SUBSUBFRAMES; ssf++) | |
496 | c->masking_curve_cb[ssf][i] = -2047; | |
497 | ||
498 | for (ssf = 0; ssf < SUBSUBFRAMES; ssf++) | |
499 | for (ch = 0; ch < c->fullband_channels; ch++) { | |
500 | for (i = 0, k = 128 + 256 * ssf; k < 512; i++, k++) | |
501 | data[i] = c->history[k][ch]; | |
502 | for (k -= 512; i < 512; i++, k++) | |
503 | data[i] = input[k * c->channels + ch]; | |
504 | adjust_jnd(c->samplerate_index, data, c->masking_curve_cb[ssf]); | |
505 | } | |
506 | for (i = 0; i < 256; i++) { | |
507 | int32_t m = 2048; | |
508 | ||
509 | for (ssf = 0; ssf < SUBSUBFRAMES; ssf++) | |
510 | if (c->masking_curve_cb[ssf][i] < m) | |
511 | m = c->masking_curve_cb[ssf][i]; | |
512 | c->eff_masking_curve_cb[i] = m; | |
513 | } | |
514 | ||
515 | for (band = 0; band < 32; band++) { | |
516 | c->band_masking_cb[band] = 2048; | |
517 | walk_band_low(c, band, 0, update_band_masking, NULL); | |
518 | walk_band_high(c, band, 0, update_band_masking, NULL); | |
519 | } | |
520 | } | |
521 | ||
522 | static void find_peaks(DCAContext *c) | |
523 | { | |
524 | int band, ch; | |
525 | ||
526 | for (band = 0; band < 32; band++) | |
527 | for (ch = 0; ch < c->fullband_channels; ch++) { | |
528 | int sample; | |
529 | int32_t m = 0; | |
530 | ||
531 | for (sample = 0; sample < SUBBAND_SAMPLES; sample++) { | |
532 | int32_t s = abs(c->subband[sample][band][ch]); | |
533 | if (m < s) | |
534 | m = s; | |
535 | } | |
536 | c->peak_cb[band][ch] = get_cb(m); | |
537 | } | |
538 | ||
539 | if (c->lfe_channel) { | |
540 | int sample; | |
541 | int32_t m = 0; | |
542 | ||
543 | for (sample = 0; sample < DCA_LFE_SAMPLES; sample++) | |
544 | if (m < abs(c->downsampled_lfe[sample])) | |
545 | m = abs(c->downsampled_lfe[sample]); | |
546 | c->lfe_peak_cb = get_cb(m); | |
547 | } | |
548 | } | |
549 | ||
550 | static const int snr_fudge = 128; | |
551 | #define USED_1ABITS 1 | |
552 | #define USED_NABITS 2 | |
553 | #define USED_26ABITS 4 | |
554 | ||
555 | static int init_quantization_noise(DCAContext *c, int noise) | |
556 | { | |
557 | int ch, band, ret = 0; | |
558 | ||
559 | c->consumed_bits = 132 + 493 * c->fullband_channels; | |
560 | if (c->lfe_channel) | |
561 | c->consumed_bits += 72; | |
562 | ||
563 | /* attempt to guess the bit distribution based on the prevoius frame */ | |
564 | for (ch = 0; ch < c->fullband_channels; ch++) { | |
565 | for (band = 0; band < 32; band++) { | |
566 | int snr_cb = c->peak_cb[band][ch] - c->band_masking_cb[band] - noise; | |
567 | ||
568 | if (snr_cb >= 1312) { | |
569 | c->abits[band][ch] = 26; | |
570 | ret |= USED_26ABITS; | |
571 | } else if (snr_cb >= 222) { | |
572 | c->abits[band][ch] = 8 + mul32(snr_cb - 222, 69000000); | |
573 | ret |= USED_NABITS; | |
574 | } else if (snr_cb >= 0) { | |
575 | c->abits[band][ch] = 2 + mul32(snr_cb, 106000000); | |
576 | ret |= USED_NABITS; | |
577 | } else { | |
578 | c->abits[band][ch] = 1; | |
579 | ret |= USED_1ABITS; | |
580 | } | |
581 | } | |
582 | } | |
583 | ||
584 | for (band = 0; band < 32; band++) | |
585 | for (ch = 0; ch < c->fullband_channels; ch++) { | |
586 | c->consumed_bits += bit_consumption[c->abits[band][ch]]; | |
587 | } | |
588 | ||
589 | return ret; | |
590 | } | |
591 | ||
592 | static void assign_bits(DCAContext *c) | |
593 | { | |
594 | /* Find the bounds where the binary search should work */ | |
595 | int low, high, down; | |
596 | int used_abits = 0; | |
597 | ||
598 | init_quantization_noise(c, c->worst_quantization_noise); | |
599 | low = high = c->worst_quantization_noise; | |
600 | if (c->consumed_bits > c->frame_bits) { | |
601 | while (c->consumed_bits > c->frame_bits) { | |
602 | av_assert0(used_abits != USED_1ABITS); | |
603 | low = high; | |
604 | high += snr_fudge; | |
605 | used_abits = init_quantization_noise(c, high); | |
606 | } | |
607 | } else { | |
608 | while (c->consumed_bits <= c->frame_bits) { | |
609 | high = low; | |
610 | if (used_abits == USED_26ABITS) | |
611 | goto out; /* The requested bitrate is too high, pad with zeros */ | |
612 | low -= snr_fudge; | |
613 | used_abits = init_quantization_noise(c, low); | |
614 | } | |
615 | } | |
616 | ||
617 | /* Now do a binary search between low and high to see what fits */ | |
618 | for (down = snr_fudge >> 1; down; down >>= 1) { | |
619 | init_quantization_noise(c, high - down); | |
620 | if (c->consumed_bits <= c->frame_bits) | |
621 | high -= down; | |
622 | } | |
623 | init_quantization_noise(c, high); | |
624 | out: | |
625 | c->worst_quantization_noise = high; | |
626 | if (high > c->worst_noise_ever) | |
627 | c->worst_noise_ever = high; | |
628 | } | |
629 | ||
630 | static void shift_history(DCAContext *c, const int32_t *input) | |
631 | { | |
632 | int k, ch; | |
633 | ||
634 | for (k = 0; k < 512; k++) | |
635 | for (ch = 0; ch < c->channels; ch++) | |
636 | c->history[k][ch] = input[k * c->channels + ch]; | |
637 | } | |
638 | ||
639 | static int32_t quantize_value(int32_t value, softfloat quant) | |
640 | { | |
641 | int32_t offset = 1 << (quant.e - 1); | |
642 | ||
643 | value = mul32(value, quant.m) + offset; | |
644 | value = value >> quant.e; | |
645 | return value; | |
646 | } | |
647 | ||
648 | static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant) | |
649 | { | |
650 | int32_t peak; | |
651 | int our_nscale, try_remove; | |
652 | softfloat our_quant; | |
653 | ||
654 | av_assert0(peak_cb <= 0); | |
655 | av_assert0(peak_cb >= -2047); | |
656 | ||
657 | our_nscale = 127; | |
658 | peak = cb_to_level[-peak_cb]; | |
659 | ||
660 | for (try_remove = 64; try_remove > 0; try_remove >>= 1) { | |
661 | if (scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e <= 17) | |
662 | continue; | |
663 | our_quant.m = mul32(scalefactor_inv[our_nscale - try_remove].m, stepsize_inv[abits].m); | |
664 | our_quant.e = scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e - 17; | |
665 | if ((quant_levels[abits] - 1) / 2 < quantize_value(peak, our_quant)) | |
666 | continue; | |
667 | our_nscale -= try_remove; | |
668 | } | |
669 | ||
670 | if (our_nscale >= 125) | |
671 | our_nscale = 124; | |
672 | ||
673 | quant->m = mul32(scalefactor_inv[our_nscale].m, stepsize_inv[abits].m); | |
674 | quant->e = scalefactor_inv[our_nscale].e + stepsize_inv[abits].e - 17; | |
675 | av_assert0((quant_levels[abits] - 1) / 2 >= quantize_value(peak, *quant)); | |
676 | ||
677 | return our_nscale; | |
678 | } | |
679 | ||
680 | static void calc_scales(DCAContext *c) | |
681 | { | |
682 | int band, ch; | |
683 | ||
684 | for (band = 0; band < 32; band++) | |
685 | for (ch = 0; ch < c->fullband_channels; ch++) | |
686 | c->scale_factor[band][ch] = calc_one_scale(c->peak_cb[band][ch], | |
687 | c->abits[band][ch], | |
688 | &c->quant[band][ch]); | |
689 | ||
690 | if (c->lfe_channel) | |
691 | c->lfe_scale_factor = calc_one_scale(c->lfe_peak_cb, 11, &c->lfe_quant); | |
692 | } | |
693 | ||
694 | static void quantize_all(DCAContext *c) | |
695 | { | |
696 | int sample, band, ch; | |
697 | ||
698 | for (sample = 0; sample < SUBBAND_SAMPLES; sample++) | |
699 | for (band = 0; band < 32; band++) | |
700 | for (ch = 0; ch < c->fullband_channels; ch++) | |
701 | c->quantized[sample][band][ch] = quantize_value(c->subband[sample][band][ch], c->quant[band][ch]); | |
702 | } | |
703 | ||
704 | static void put_frame_header(DCAContext *c) | |
705 | { | |
706 | /* SYNC */ | |
707 | put_bits(&c->pb, 16, 0x7ffe); | |
708 | put_bits(&c->pb, 16, 0x8001); | |
709 | ||
710 | /* Frame type: normal */ | |
711 | put_bits(&c->pb, 1, 1); | |
712 | ||
713 | /* Deficit sample count: none */ | |
714 | put_bits(&c->pb, 5, 31); | |
715 | ||
716 | /* CRC is not present */ | |
717 | put_bits(&c->pb, 1, 0); | |
718 | ||
719 | /* Number of PCM sample blocks */ | |
720 | put_bits(&c->pb, 7, SUBBAND_SAMPLES - 1); | |
721 | ||
722 | /* Primary frame byte size */ | |
723 | put_bits(&c->pb, 14, c->frame_size - 1); | |
724 | ||
725 | /* Audio channel arrangement */ | |
726 | put_bits(&c->pb, 6, c->channel_config); | |
727 | ||
728 | /* Core audio sampling frequency */ | |
729 | put_bits(&c->pb, 4, bitstream_sfreq[c->samplerate_index]); | |
730 | ||
731 | /* Transmission bit rate */ | |
732 | put_bits(&c->pb, 5, c->bitrate_index); | |
733 | ||
734 | /* Embedded down mix: disabled */ | |
735 | put_bits(&c->pb, 1, 0); | |
736 | ||
737 | /* Embedded dynamic range flag: not present */ | |
738 | put_bits(&c->pb, 1, 0); | |
739 | ||
740 | /* Embedded time stamp flag: not present */ | |
741 | put_bits(&c->pb, 1, 0); | |
742 | ||
743 | /* Auxiliary data flag: not present */ | |
744 | put_bits(&c->pb, 1, 0); | |
745 | ||
746 | /* HDCD source: no */ | |
747 | put_bits(&c->pb, 1, 0); | |
748 | ||
749 | /* Extension audio ID: N/A */ | |
750 | put_bits(&c->pb, 3, 0); | |
751 | ||
752 | /* Extended audio data: not present */ | |
753 | put_bits(&c->pb, 1, 0); | |
754 | ||
755 | /* Audio sync word insertion flag: after each sub-frame */ | |
756 | put_bits(&c->pb, 1, 0); | |
757 | ||
758 | /* Low frequency effects flag: not present or 64x subsampling */ | |
759 | put_bits(&c->pb, 2, c->lfe_channel ? 2 : 0); | |
760 | ||
761 | /* Predictor history switch flag: on */ | |
762 | put_bits(&c->pb, 1, 1); | |
763 | ||
764 | /* No CRC */ | |
765 | /* Multirate interpolator switch: non-perfect reconstruction */ | |
766 | put_bits(&c->pb, 1, 0); | |
767 | ||
768 | /* Encoder software revision: 7 */ | |
769 | put_bits(&c->pb, 4, 7); | |
770 | ||
771 | /* Copy history: 0 */ | |
772 | put_bits(&c->pb, 2, 0); | |
773 | ||
774 | /* Source PCM resolution: 16 bits, not DTS ES */ | |
775 | put_bits(&c->pb, 3, 0); | |
776 | ||
777 | /* Front sum/difference coding: no */ | |
778 | put_bits(&c->pb, 1, 0); | |
779 | ||
780 | /* Surrounds sum/difference coding: no */ | |
781 | put_bits(&c->pb, 1, 0); | |
782 | ||
783 | /* Dialog normalization: 0 dB */ | |
784 | put_bits(&c->pb, 4, 0); | |
785 | } | |
786 | ||
787 | static void put_primary_audio_header(DCAContext *c) | |
788 | { | |
789 | static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 }; | |
790 | static const int thr[11] = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 }; | |
791 | ||
792 | int ch, i; | |
793 | /* Number of subframes */ | |
794 | put_bits(&c->pb, 4, SUBFRAMES - 1); | |
795 | ||
796 | /* Number of primary audio channels */ | |
797 | put_bits(&c->pb, 3, c->fullband_channels - 1); | |
798 | ||
799 | /* Subband activity count */ | |
800 | for (ch = 0; ch < c->fullband_channels; ch++) | |
801 | put_bits(&c->pb, 5, DCA_SUBBANDS - 2); | |
802 | ||
803 | /* High frequency VQ start subband */ | |
804 | for (ch = 0; ch < c->fullband_channels; ch++) | |
805 | put_bits(&c->pb, 5, DCA_SUBBANDS - 1); | |
806 | ||
807 | /* Joint intensity coding index: 0, 0 */ | |
808 | for (ch = 0; ch < c->fullband_channels; ch++) | |
809 | put_bits(&c->pb, 3, 0); | |
810 | ||
811 | /* Transient mode codebook: A4, A4 (arbitrary) */ | |
812 | for (ch = 0; ch < c->fullband_channels; ch++) | |
813 | put_bits(&c->pb, 2, 0); | |
814 | ||
815 | /* Scale factor code book: 7 bit linear, 7-bit sqrt table (for each channel) */ | |
816 | for (ch = 0; ch < c->fullband_channels; ch++) | |
817 | put_bits(&c->pb, 3, 6); | |
818 | ||
819 | /* Bit allocation quantizer select: linear 5-bit */ | |
820 | for (ch = 0; ch < c->fullband_channels; ch++) | |
821 | put_bits(&c->pb, 3, 6); | |
822 | ||
823 | /* Quantization index codebook select: dummy data | |
824 | to avoid transmission of scale factor adjustment */ | |
825 | for (i = 1; i < 11; i++) | |
826 | for (ch = 0; ch < c->fullband_channels; ch++) | |
827 | put_bits(&c->pb, bitlen[i], thr[i]); | |
828 | ||
829 | /* Scale factor adjustment index: not transmitted */ | |
830 | /* Audio header CRC check word: not transmitted */ | |
831 | } | |
832 | ||
833 | static void put_subframe_samples(DCAContext *c, int ss, int band, int ch) | |
834 | { | |
835 | if (c->abits[band][ch] <= 7) { | |
836 | int sum, i, j; | |
837 | for (i = 0; i < 8; i += 4) { | |
838 | sum = 0; | |
839 | for (j = 3; j >= 0; j--) { | |
840 | sum *= quant_levels[c->abits[band][ch]]; | |
841 | sum += c->quantized[ss * 8 + i + j][band][ch]; | |
842 | sum += (quant_levels[c->abits[band][ch]] - 1) / 2; | |
843 | } | |
844 | put_bits(&c->pb, bit_consumption[c->abits[band][ch]] / 4, sum); | |
845 | } | |
846 | } else { | |
847 | int i; | |
848 | for (i = 0; i < 8; i++) { | |
849 | int bits = bit_consumption[c->abits[band][ch]] / 16; | |
850 | int32_t mask = (1 << bits) - 1; | |
851 | put_bits(&c->pb, bits, c->quantized[ss * 8 + i][band][ch] & mask); | |
852 | } | |
853 | } | |
854 | } | |
855 | ||
856 | static void put_subframe(DCAContext *c, int subframe) | |
857 | { | |
858 | int i, band, ss, ch; | |
859 | ||
860 | /* Subsubframes count */ | |
861 | put_bits(&c->pb, 2, SUBSUBFRAMES -1); | |
862 | ||
863 | /* Partial subsubframe sample count: dummy */ | |
864 | put_bits(&c->pb, 3, 0); | |
865 | ||
866 | /* Prediction mode: no ADPCM, in each channel and subband */ | |
867 | for (ch = 0; ch < c->fullband_channels; ch++) | |
868 | for (band = 0; band < DCA_SUBBANDS; band++) | |
869 | put_bits(&c->pb, 1, 0); | |
870 | ||
871 | /* Prediction VQ address: not transmitted */ | |
872 | /* Bit allocation index */ | |
873 | for (ch = 0; ch < c->fullband_channels; ch++) | |
874 | for (band = 0; band < DCA_SUBBANDS; band++) | |
875 | put_bits(&c->pb, 5, c->abits[band][ch]); | |
876 | ||
877 | if (SUBSUBFRAMES > 1) { | |
878 | /* Transition mode: none for each channel and subband */ | |
879 | for (ch = 0; ch < c->fullband_channels; ch++) | |
880 | for (band = 0; band < DCA_SUBBANDS; band++) | |
881 | put_bits(&c->pb, 1, 0); /* codebook A4 */ | |
882 | } | |
883 | ||
884 | /* Scale factors */ | |
885 | for (ch = 0; ch < c->fullband_channels; ch++) | |
886 | for (band = 0; band < DCA_SUBBANDS; band++) | |
887 | put_bits(&c->pb, 7, c->scale_factor[band][ch]); | |
888 | ||
889 | /* Joint subband scale factor codebook select: not transmitted */ | |
890 | /* Scale factors for joint subband coding: not transmitted */ | |
891 | /* Stereo down-mix coefficients: not transmitted */ | |
892 | /* Dynamic range coefficient: not transmitted */ | |
893 | /* Stde information CRC check word: not transmitted */ | |
894 | /* VQ encoded high frequency subbands: not transmitted */ | |
895 | ||
896 | /* LFE data: 8 samples and scalefactor */ | |
897 | if (c->lfe_channel) { | |
898 | for (i = 0; i < DCA_LFE_SAMPLES; i++) | |
899 | put_bits(&c->pb, 8, quantize_value(c->downsampled_lfe[i], c->lfe_quant) & 0xff); | |
900 | put_bits(&c->pb, 8, c->lfe_scale_factor); | |
901 | } | |
902 | ||
903 | /* Audio data (subsubframes) */ | |
904 | for (ss = 0; ss < SUBSUBFRAMES ; ss++) | |
905 | for (ch = 0; ch < c->fullband_channels; ch++) | |
906 | for (band = 0; band < DCA_SUBBANDS; band++) | |
907 | put_subframe_samples(c, ss, band, ch); | |
908 | ||
909 | /* DSYNC */ | |
910 | put_bits(&c->pb, 16, 0xffff); | |
911 | } | |
912 | ||
913 | static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, | |
914 | const AVFrame *frame, int *got_packet_ptr) | |
915 | { | |
916 | DCAContext *c = avctx->priv_data; | |
917 | const int32_t *samples; | |
918 | int ret, i; | |
919 | ||
920 | if ((ret = ff_alloc_packet2(avctx, avpkt, c->frame_size )) < 0) | |
921 | return ret; | |
922 | ||
923 | samples = (const int32_t *)frame->data[0]; | |
924 | ||
925 | subband_transform(c, samples); | |
926 | if (c->lfe_channel) | |
927 | lfe_downsample(c, samples); | |
928 | ||
929 | calc_masking(c, samples); | |
930 | find_peaks(c); | |
931 | assign_bits(c); | |
932 | calc_scales(c); | |
933 | quantize_all(c); | |
934 | shift_history(c, samples); | |
935 | ||
936 | init_put_bits(&c->pb, avpkt->data, avpkt->size); | |
937 | put_frame_header(c); | |
938 | put_primary_audio_header(c); | |
939 | for (i = 0; i < SUBFRAMES; i++) | |
940 | put_subframe(c, i); | |
941 | ||
942 | flush_put_bits(&c->pb); | |
943 | ||
944 | avpkt->pts = frame->pts; | |
945 | avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples); | |
946 | avpkt->size = c->frame_size + 1; | |
947 | *got_packet_ptr = 1; | |
948 | return 0; | |
949 | } | |
950 | ||
951 | static const AVCodecDefault defaults[] = { | |
952 | { "b", "1411200" }, | |
953 | { NULL }, | |
954 | }; | |
955 | ||
956 | AVCodec ff_dca_encoder = { | |
957 | .name = "dca", | |
958 | .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"), | |
959 | .type = AVMEDIA_TYPE_AUDIO, | |
960 | .id = AV_CODEC_ID_DTS, | |
961 | .priv_data_size = sizeof(DCAContext), | |
962 | .init = encode_init, | |
963 | .encode2 = encode_frame, | |
964 | .capabilities = CODEC_CAP_EXPERIMENTAL, | |
965 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32, | |
966 | AV_SAMPLE_FMT_NONE }, | |
967 | .supported_samplerates = sample_rates, | |
968 | .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO, | |
969 | AV_CH_LAYOUT_STEREO, | |
970 | AV_CH_LAYOUT_2_2, | |
971 | AV_CH_LAYOUT_5POINT0, | |
972 | AV_CH_LAYOUT_5POINT1, | |
973 | 0 }, | |
974 | .defaults = defaults, | |
975 | }; |