Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / nellymoserenc.c
CommitLineData
2ba45a60
DM
1/*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38#include "libavutil/common.h"
39#include "libavutil/float_dsp.h"
40#include "libavutil/mathematics.h"
41
42#include "audio_frame_queue.h"
43#include "avcodec.h"
44#include "fft.h"
45#include "internal.h"
46#include "nellymoser.h"
47#include "sinewin.h"
48
49#define BITSTREAM_WRITER_LE
50#include "put_bits.h"
51
52#define POW_TABLE_SIZE (1<<11)
53#define POW_TABLE_OFFSET 3
54#define OPT_SIZE ((1<<15) + 3000)
55
56typedef struct NellyMoserEncodeContext {
57 AVCodecContext *avctx;
58 int last_frame;
59 AVFloatDSPContext fdsp;
60 FFTContext mdct_ctx;
61 AudioFrameQueue afq;
62 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65 float (*opt )[OPT_SIZE];
66 uint8_t (*path)[OPT_SIZE];
67} NellyMoserEncodeContext;
68
69static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
70
71static const uint8_t sf_lut[96] = {
72 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78};
79
80static const uint8_t sf_delta_lut[78] = {
81 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86};
87
88static const uint8_t quant_lut[230] = {
89 0,
90
91 0, 1, 2,
92
93 0, 1, 2, 3, 4, 5, 6,
94
95 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96 12, 13, 13, 13, 14,
97
98 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101 30,
102
103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112 61, 61, 61, 61, 62,
113};
114
115static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118
119static void apply_mdct(NellyMoserEncodeContext *s)
120{
121 float *in0 = s->buf;
122 float *in1 = s->buf + NELLY_BUF_LEN;
123 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124
125 s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128
129 s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
132}
133
134static av_cold int encode_end(AVCodecContext *avctx)
135{
136 NellyMoserEncodeContext *s = avctx->priv_data;
137
138 ff_mdct_end(&s->mdct_ctx);
139
140 if (s->avctx->trellis) {
141 av_free(s->opt);
142 av_free(s->path);
143 }
144 ff_af_queue_close(&s->afq);
145
146 return 0;
147}
148
149static av_cold int encode_init(AVCodecContext *avctx)
150{
151 NellyMoserEncodeContext *s = avctx->priv_data;
152 int i, ret;
153
154 if (avctx->channels != 1) {
155 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
156 return AVERROR(EINVAL);
157 }
158
159 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
160 avctx->sample_rate != 11025 &&
161 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
162 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
163 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
164 return AVERROR(EINVAL);
165 }
166
167 avctx->frame_size = NELLY_SAMPLES;
168 avctx->delay = NELLY_BUF_LEN;
169 ff_af_queue_init(avctx, &s->afq);
170 s->avctx = avctx;
171 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
172 goto error;
173 avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
174
175 /* Generate overlap window */
176 ff_init_ff_sine_windows(7);
177 for (i = 0; i < POW_TABLE_SIZE; i++)
178 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
179
180 if (s->avctx->trellis) {
181 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
182 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
183 if (!s->opt || !s->path) {
184 ret = AVERROR(ENOMEM);
185 goto error;
186 }
187 }
188
189 return 0;
190error:
191 encode_end(avctx);
192 return ret;
193}
194
195#define find_best(val, table, LUT, LUT_add, LUT_size) \
196 best_idx = \
197 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
198 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
199 best_idx++;
200
201static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
202{
203 int band, best_idx, power_idx = 0;
204 float power_candidate;
205
206 //base exponent
207 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
208 idx_table[0] = best_idx;
209 power_idx = ff_nelly_init_table[best_idx];
210
211 for (band = 1; band < NELLY_BANDS; band++) {
212 power_candidate = cand[band] - power_idx;
213 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
214 idx_table[band] = best_idx;
215 power_idx += ff_nelly_delta_table[best_idx];
216 }
217}
218
219static inline float distance(float x, float y, int band)
220{
221 //return pow(fabs(x-y), 2.0);
222 float tmp = x - y;
223 return tmp * tmp;
224}
225
226static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
227{
228 int i, j, band, best_idx;
229 float power_candidate, best_val;
230
231 float (*opt )[OPT_SIZE] = s->opt ;
232 uint8_t(*path)[OPT_SIZE] = s->path;
233
234 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
235 opt[0][i] = INFINITY;
236 }
237
238 for (i = 0; i < 64; i++) {
239 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
240 path[0][ff_nelly_init_table[i]] = i;
241 }
242
243 for (band = 1; band < NELLY_BANDS; band++) {
244 int q, c = 0;
245 float tmp;
246 int idx_min, idx_max, idx;
247 power_candidate = cand[band];
248 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
249 idx_min = FFMAX(0, cand[band] - q);
250 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
251 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
252 if ( isinf(opt[band - 1][i]) )
253 continue;
254 for (j = 0; j < 32; j++) {
255 idx = i + ff_nelly_delta_table[j];
256 if (idx > idx_max)
257 break;
258 if (idx >= idx_min) {
259 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
260 if (opt[band][idx] > tmp) {
261 opt[band][idx] = tmp;
262 path[band][idx] = j;
263 c = 1;
264 }
265 }
266 }
267 }
268 }
269 assert(c); //FIXME
270 }
271
272 best_val = INFINITY;
273 best_idx = -1;
274 band = NELLY_BANDS - 1;
275 for (i = 0; i < OPT_SIZE; i++) {
276 if (best_val > opt[band][i]) {
277 best_val = opt[band][i];
278 best_idx = i;
279 }
280 }
281 for (band = NELLY_BANDS - 1; band >= 0; band--) {
282 idx_table[band] = path[band][best_idx];
283 if (band) {
284 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
285 }
286 }
287}
288
289/**
290 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
291 * @param s encoder context
292 * @param output output buffer
293 * @param output_size size of output buffer
294 */
295static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
296{
297 PutBitContext pb;
298 int i, j, band, block, best_idx, power_idx = 0;
299 float power_val, coeff, coeff_sum;
300 float pows[NELLY_FILL_LEN];
301 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
302 float cand[NELLY_BANDS];
303
304 apply_mdct(s);
305
306 init_put_bits(&pb, output, output_size * 8);
307
308 i = 0;
309 for (band = 0; band < NELLY_BANDS; band++) {
310 coeff_sum = 0;
311 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
312 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
313 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
314 }
315 cand[band] =
316 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
317 }
318
319 if (s->avctx->trellis) {
320 get_exponent_dynamic(s, cand, idx_table);
321 } else {
322 get_exponent_greedy(s, cand, idx_table);
323 }
324
325 i = 0;
326 for (band = 0; band < NELLY_BANDS; band++) {
327 if (band) {
328 power_idx += ff_nelly_delta_table[idx_table[band]];
329 put_bits(&pb, 5, idx_table[band]);
330 } else {
331 power_idx = ff_nelly_init_table[idx_table[0]];
332 put_bits(&pb, 6, idx_table[0]);
333 }
334 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
335 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
336 s->mdct_out[i] *= power_val;
337 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
338 pows[i] = power_idx;
339 }
340 }
341
342 ff_nelly_get_sample_bits(pows, bits);
343
344 for (block = 0; block < 2; block++) {
345 for (i = 0; i < NELLY_FILL_LEN; i++) {
346 if (bits[i] > 0) {
347 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
348 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
349 best_idx =
350 quant_lut[av_clip (
351 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
352 quant_lut_offset[bits[i]],
353 quant_lut_offset[bits[i]+1] - 1
354 )];
355 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
356 best_idx++;
357
358 put_bits(&pb, bits[i], best_idx);
359 }
360 }
361 if (!block)
362 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
363 }
364
365 flush_put_bits(&pb);
366 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
367}
368
369static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
370 const AVFrame *frame, int *got_packet_ptr)
371{
372 NellyMoserEncodeContext *s = avctx->priv_data;
373 int ret;
374
375 if (s->last_frame)
376 return 0;
377
378 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
379 if (frame) {
380 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
381 frame->nb_samples * sizeof(*s->buf));
382 if (frame->nb_samples < NELLY_SAMPLES) {
383 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
384 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
385 if (frame->nb_samples >= NELLY_BUF_LEN)
386 s->last_frame = 1;
387 }
388 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
389 return ret;
390 } else {
391 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
392 s->last_frame = 1;
393 }
394
395 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
396 return ret;
397 encode_block(s, avpkt->data, avpkt->size);
398
399 /* Get the next frame pts/duration */
400 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
401 &avpkt->duration);
402
403 *got_packet_ptr = 1;
404 return 0;
405}
406
407AVCodec ff_nellymoser_encoder = {
408 .name = "nellymoser",
409 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
410 .type = AVMEDIA_TYPE_AUDIO,
411 .id = AV_CODEC_ID_NELLYMOSER,
412 .priv_data_size = sizeof(NellyMoserEncodeContext),
413 .init = encode_init,
414 .encode2 = encode_frame,
415 .close = encode_end,
416 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
417 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
418 AV_SAMPLE_FMT_NONE },
419};