Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * WMA compatible encoder | |
3 | * Copyright (c) 2007 Michael Niedermayer | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "libavutil/attributes.h" | |
23 | ||
24 | #include "avcodec.h" | |
25 | #include "internal.h" | |
26 | #include "wma.h" | |
27 | #include "libavutil/avassert.h" | |
28 | ||
29 | ||
30 | static av_cold int encode_init(AVCodecContext *avctx) | |
31 | { | |
32 | WMACodecContext *s = avctx->priv_data; | |
33 | int i, flags1, flags2, block_align; | |
34 | uint8_t *extradata; | |
35 | ||
36 | s->avctx = avctx; | |
37 | ||
38 | if (avctx->channels > MAX_CHANNELS) { | |
39 | av_log(avctx, AV_LOG_ERROR, | |
40 | "too many channels: got %i, need %i or fewer\n", | |
41 | avctx->channels, MAX_CHANNELS); | |
42 | return AVERROR(EINVAL); | |
43 | } | |
44 | ||
45 | if (avctx->sample_rate > 48000) { | |
46 | av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n", | |
47 | avctx->sample_rate); | |
48 | return AVERROR(EINVAL); | |
49 | } | |
50 | ||
51 | if (avctx->bit_rate < 24 * 1000) { | |
52 | av_log(avctx, AV_LOG_ERROR, | |
53 | "bitrate too low: got %i, need 24000 or higher\n", | |
54 | avctx->bit_rate); | |
55 | return AVERROR(EINVAL); | |
56 | } | |
57 | ||
58 | /* extract flag infos */ | |
59 | flags1 = 0; | |
60 | flags2 = 1; | |
61 | if (avctx->codec->id == AV_CODEC_ID_WMAV1) { | |
62 | extradata = av_malloc(4); | |
63 | avctx->extradata_size = 4; | |
64 | AV_WL16(extradata, flags1); | |
65 | AV_WL16(extradata + 2, flags2); | |
66 | } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { | |
67 | extradata = av_mallocz(10); | |
68 | avctx->extradata_size = 10; | |
69 | AV_WL32(extradata, flags1); | |
70 | AV_WL16(extradata + 4, flags2); | |
71 | } else { | |
72 | av_assert0(0); | |
73 | } | |
74 | avctx->extradata = extradata; | |
75 | s->use_exp_vlc = flags2 & 0x0001; | |
76 | s->use_bit_reservoir = flags2 & 0x0002; | |
77 | s->use_variable_block_len = flags2 & 0x0004; | |
78 | if (avctx->channels == 2) | |
79 | s->ms_stereo = 1; | |
80 | ||
81 | ff_wma_init(avctx, flags2); | |
82 | ||
83 | /* init MDCT */ | |
84 | for (i = 0; i < s->nb_block_sizes; i++) | |
85 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); | |
86 | ||
87 | block_align = avctx->bit_rate * (int64_t) s->frame_len / | |
88 | (avctx->sample_rate * 8); | |
89 | block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE); | |
90 | avctx->block_align = block_align; | |
91 | ||
92 | avctx->frame_size = | |
93 | avctx->delay = s->frame_len; | |
94 | ||
95 | return 0; | |
96 | } | |
97 | ||
98 | static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame) | |
99 | { | |
100 | WMACodecContext *s = avctx->priv_data; | |
101 | float **audio = (float **) frame->extended_data; | |
102 | int len = frame->nb_samples; | |
103 | int window_index = s->frame_len_bits - s->block_len_bits; | |
104 | FFTContext *mdct = &s->mdct_ctx[window_index]; | |
105 | int ch; | |
106 | const float *win = s->windows[window_index]; | |
107 | int window_len = 1 << s->block_len_bits; | |
108 | float n = 2.0 * 32768.0 / window_len; | |
109 | ||
110 | for (ch = 0; ch < avctx->channels; ch++) { | |
111 | memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); | |
112 | s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); | |
113 | s->fdsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], | |
114 | win, len); | |
115 | s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); | |
116 | mdct->mdct_calc(mdct, s->coefs[ch], s->output); | |
117 | } | |
118 | } | |
119 | ||
120 | // FIXME use for decoding too | |
121 | static void init_exp(WMACodecContext *s, int ch, const int *exp_param) | |
122 | { | |
123 | int n; | |
124 | const uint16_t *ptr; | |
125 | float v, *q, max_scale, *q_end; | |
126 | ||
127 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; | |
128 | q = s->exponents[ch]; | |
129 | q_end = q + s->block_len; | |
130 | max_scale = 0; | |
131 | while (q < q_end) { | |
132 | /* XXX: use a table */ | |
133 | v = pow(10, *exp_param++ *(1.0 / 16.0)); | |
134 | max_scale = FFMAX(max_scale, v); | |
135 | n = *ptr++; | |
136 | do { | |
137 | *q++ = v; | |
138 | } while (--n); | |
139 | } | |
140 | s->max_exponent[ch] = max_scale; | |
141 | } | |
142 | ||
143 | static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param) | |
144 | { | |
145 | int last_exp; | |
146 | const uint16_t *ptr; | |
147 | float *q, *q_end; | |
148 | ||
149 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; | |
150 | q = s->exponents[ch]; | |
151 | q_end = q + s->block_len; | |
152 | if (s->version == 1) { | |
153 | last_exp = *exp_param++; | |
154 | av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32); | |
155 | put_bits(&s->pb, 5, last_exp - 10); | |
156 | q += *ptr++; | |
157 | } else | |
158 | last_exp = 36; | |
159 | while (q < q_end) { | |
160 | int exp = *exp_param++; | |
161 | int code = exp - last_exp + 60; | |
162 | av_assert1(code >= 0 && code < 120); | |
163 | put_bits(&s->pb, ff_aac_scalefactor_bits[code], | |
164 | ff_aac_scalefactor_code[code]); | |
165 | /* XXX: use a table */ | |
166 | q += *ptr++; | |
167 | last_exp = exp; | |
168 | } | |
169 | } | |
170 | ||
171 | static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], | |
172 | int total_gain) | |
173 | { | |
174 | int v, bsize, ch, coef_nb_bits, parse_exponents; | |
175 | float mdct_norm; | |
176 | int nb_coefs[MAX_CHANNELS]; | |
177 | static const int fixed_exp[25] = { | |
178 | 20, 20, 20, 20, 20, | |
179 | 20, 20, 20, 20, 20, | |
180 | 20, 20, 20, 20, 20, | |
181 | 20, 20, 20, 20, 20, | |
182 | 20, 20, 20, 20, 20 | |
183 | }; | |
184 | ||
185 | // FIXME remove duplication relative to decoder | |
186 | if (s->use_variable_block_len) { | |
187 | av_assert0(0); // FIXME not implemented | |
188 | } else { | |
189 | /* fixed block len */ | |
190 | s->next_block_len_bits = s->frame_len_bits; | |
191 | s->prev_block_len_bits = s->frame_len_bits; | |
192 | s->block_len_bits = s->frame_len_bits; | |
193 | } | |
194 | ||
195 | s->block_len = 1 << s->block_len_bits; | |
196 | // av_assert0((s->block_pos + s->block_len) <= s->frame_len); | |
197 | bsize = s->frame_len_bits - s->block_len_bits; | |
198 | ||
199 | // FIXME factor | |
200 | v = s->coefs_end[bsize] - s->coefs_start; | |
201 | for (ch = 0; ch < s->avctx->channels; ch++) | |
202 | nb_coefs[ch] = v; | |
203 | { | |
204 | int n4 = s->block_len / 2; | |
205 | mdct_norm = 1.0 / (float) n4; | |
206 | if (s->version == 1) | |
207 | mdct_norm *= sqrt(n4); | |
208 | } | |
209 | ||
210 | if (s->avctx->channels == 2) | |
211 | put_bits(&s->pb, 1, !!s->ms_stereo); | |
212 | ||
213 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
214 | // FIXME only set channel_coded when needed, instead of always | |
215 | s->channel_coded[ch] = 1; | |
216 | if (s->channel_coded[ch]) | |
217 | init_exp(s, ch, fixed_exp); | |
218 | } | |
219 | ||
220 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
221 | if (s->channel_coded[ch]) { | |
222 | WMACoef *coefs1; | |
223 | float *coefs, *exponents, mult; | |
224 | int i, n; | |
225 | ||
226 | coefs1 = s->coefs1[ch]; | |
227 | exponents = s->exponents[ch]; | |
228 | mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; | |
229 | mult *= mdct_norm; | |
230 | coefs = src_coefs[ch]; | |
231 | if (s->use_noise_coding && 0) { | |
232 | av_assert0(0); // FIXME not implemented | |
233 | } else { | |
234 | coefs += s->coefs_start; | |
235 | n = nb_coefs[ch]; | |
236 | for (i = 0; i < n; i++) { | |
237 | double t = *coefs++ / (exponents[i] * mult); | |
238 | if (t < -32768 || t > 32767) | |
239 | return -1; | |
240 | ||
241 | coefs1[i] = lrint(t); | |
242 | } | |
243 | } | |
244 | } | |
245 | } | |
246 | ||
247 | v = 0; | |
248 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
249 | int a = s->channel_coded[ch]; | |
250 | put_bits(&s->pb, 1, a); | |
251 | v |= a; | |
252 | } | |
253 | ||
254 | if (!v) | |
255 | return 1; | |
256 | ||
257 | for (v = total_gain - 1; v >= 127; v -= 127) | |
258 | put_bits(&s->pb, 7, 127); | |
259 | put_bits(&s->pb, 7, v); | |
260 | ||
261 | coef_nb_bits = ff_wma_total_gain_to_bits(total_gain); | |
262 | ||
263 | if (s->use_noise_coding) { | |
264 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
265 | if (s->channel_coded[ch]) { | |
266 | int i, n; | |
267 | n = s->exponent_high_sizes[bsize]; | |
268 | for (i = 0; i < n; i++) { | |
269 | put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0); | |
270 | if (0) | |
271 | nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; | |
272 | } | |
273 | } | |
274 | } | |
275 | } | |
276 | ||
277 | parse_exponents = 1; | |
278 | if (s->block_len_bits != s->frame_len_bits) | |
279 | put_bits(&s->pb, 1, parse_exponents); | |
280 | ||
281 | if (parse_exponents) { | |
282 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
283 | if (s->channel_coded[ch]) { | |
284 | if (s->use_exp_vlc) { | |
285 | encode_exp_vlc(s, ch, fixed_exp); | |
286 | } else { | |
287 | av_assert0(0); // FIXME not implemented | |
288 | // encode_exp_lsp(s, ch); | |
289 | } | |
290 | } | |
291 | } | |
292 | } else | |
293 | av_assert0(0); // FIXME not implemented | |
294 | ||
295 | for (ch = 0; ch < s->avctx->channels; ch++) { | |
296 | if (s->channel_coded[ch]) { | |
297 | int run, tindex; | |
298 | WMACoef *ptr, *eptr; | |
299 | tindex = (ch == 1 && s->ms_stereo); | |
300 | ptr = &s->coefs1[ch][0]; | |
301 | eptr = ptr + nb_coefs[ch]; | |
302 | ||
303 | run = 0; | |
304 | for (; ptr < eptr; ptr++) { | |
305 | if (*ptr) { | |
306 | int level = *ptr; | |
307 | int abs_level = FFABS(level); | |
308 | int code = 0; | |
309 | if (abs_level <= s->coef_vlcs[tindex]->max_level) | |
310 | if (run < s->coef_vlcs[tindex]->levels[abs_level - 1]) | |
311 | code = run + s->int_table[tindex][abs_level - 1]; | |
312 | ||
313 | av_assert2(code < s->coef_vlcs[tindex]->n); | |
314 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], | |
315 | s->coef_vlcs[tindex]->huffcodes[code]); | |
316 | ||
317 | if (code == 0) { | |
318 | if (1 << coef_nb_bits <= abs_level) | |
319 | return -1; | |
320 | ||
321 | put_bits(&s->pb, coef_nb_bits, abs_level); | |
322 | put_bits(&s->pb, s->frame_len_bits, run); | |
323 | } | |
324 | // FIXME the sign is flipped somewhere | |
325 | put_bits(&s->pb, 1, level < 0); | |
326 | run = 0; | |
327 | } else | |
328 | run++; | |
329 | } | |
330 | if (run) | |
331 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], | |
332 | s->coef_vlcs[tindex]->huffcodes[1]); | |
333 | } | |
334 | if (s->version == 1 && s->avctx->channels >= 2) | |
335 | avpriv_align_put_bits(&s->pb); | |
336 | } | |
337 | return 0; | |
338 | } | |
339 | ||
340 | static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], | |
341 | uint8_t *buf, int buf_size, int total_gain) | |
342 | { | |
343 | init_put_bits(&s->pb, buf, buf_size); | |
344 | ||
345 | if (s->use_bit_reservoir) | |
346 | av_assert0(0); // FIXME not implemented | |
347 | else if (encode_block(s, src_coefs, total_gain) < 0) | |
348 | return INT_MAX; | |
349 | ||
350 | avpriv_align_put_bits(&s->pb); | |
351 | ||
352 | return put_bits_count(&s->pb) / 8 - s->avctx->block_align; | |
353 | } | |
354 | ||
355 | static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt, | |
356 | const AVFrame *frame, int *got_packet_ptr) | |
357 | { | |
358 | WMACodecContext *s = avctx->priv_data; | |
359 | int i, total_gain, ret, error; | |
360 | ||
361 | s->block_len_bits = s->frame_len_bits; // required by non variable block len | |
362 | s->block_len = 1 << s->block_len_bits; | |
363 | ||
364 | apply_window_and_mdct(avctx, frame); | |
365 | ||
366 | if (s->ms_stereo) { | |
367 | float a, b; | |
368 | int i; | |
369 | ||
370 | for (i = 0; i < s->block_len; i++) { | |
371 | a = s->coefs[0][i] * 0.5; | |
372 | b = s->coefs[1][i] * 0.5; | |
373 | s->coefs[0][i] = a + b; | |
374 | s->coefs[1][i] = a - b; | |
375 | } | |
376 | } | |
377 | ||
378 | if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0) | |
379 | return ret; | |
380 | ||
381 | total_gain = 128; | |
382 | for (i = 64; i; i >>= 1) { | |
383 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, | |
384 | total_gain - i); | |
385 | if (error <= 0) | |
386 | total_gain -= i; | |
387 | } | |
388 | ||
389 | while(total_gain <= 128 && error > 0) | |
390 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++); | |
391 | if (error > 0) { | |
392 | av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n"); | |
393 | avpkt->size = 0; | |
394 | return AVERROR(EINVAL); | |
395 | } | |
396 | av_assert0((put_bits_count(&s->pb) & 7) == 0); | |
397 | i= avctx->block_align - (put_bits_count(&s->pb)+7)/8; | |
398 | av_assert0(i>=0); | |
399 | while(i--) | |
400 | put_bits(&s->pb, 8, 'N'); | |
401 | ||
402 | flush_put_bits(&s->pb); | |
403 | av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align); | |
404 | ||
405 | if (frame->pts != AV_NOPTS_VALUE) | |
406 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay); | |
407 | ||
408 | avpkt->size = avctx->block_align; | |
409 | *got_packet_ptr = 1; | |
410 | return 0; | |
411 | } | |
412 | ||
413 | #if CONFIG_WMAV1_ENCODER | |
414 | AVCodec ff_wmav1_encoder = { | |
415 | .name = "wmav1", | |
416 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), | |
417 | .type = AVMEDIA_TYPE_AUDIO, | |
418 | .id = AV_CODEC_ID_WMAV1, | |
419 | .priv_data_size = sizeof(WMACodecContext), | |
420 | .init = encode_init, | |
421 | .encode2 = encode_superframe, | |
422 | .close = ff_wma_end, | |
423 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, | |
424 | AV_SAMPLE_FMT_NONE }, | |
425 | }; | |
426 | #endif | |
427 | #if CONFIG_WMAV2_ENCODER | |
428 | AVCodec ff_wmav2_encoder = { | |
429 | .name = "wmav2", | |
430 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), | |
431 | .type = AVMEDIA_TYPE_AUDIO, | |
432 | .id = AV_CODEC_ID_WMAV2, | |
433 | .priv_data_size = sizeof(WMACodecContext), | |
434 | .init = encode_init, | |
435 | .encode2 = encode_superframe, | |
436 | .close = ff_wma_end, | |
437 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, | |
438 | AV_SAMPLE_FMT_NONE }, | |
439 | }; | |
440 | #endif |