Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Simple free lossless/lossy audio codec | |
3 | * Copyright (c) 2004 Alex Beregszaszi | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | #include "avcodec.h" | |
22 | #include "get_bits.h" | |
23 | #include "golomb.h" | |
24 | #include "internal.h" | |
25 | #include "rangecoder.h" | |
26 | ||
27 | ||
28 | /** | |
29 | * @file | |
30 | * Simple free lossless/lossy audio codec | |
31 | * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) | |
32 | * Written and designed by Alex Beregszaszi | |
33 | * | |
34 | * TODO: | |
35 | * - CABAC put/get_symbol | |
36 | * - independent quantizer for channels | |
37 | * - >2 channels support | |
38 | * - more decorrelation types | |
39 | * - more tap_quant tests | |
40 | * - selectable intlist writers/readers (bonk-style, golomb, cabac) | |
41 | */ | |
42 | ||
43 | #define MAX_CHANNELS 2 | |
44 | ||
45 | #define MID_SIDE 0 | |
46 | #define LEFT_SIDE 1 | |
47 | #define RIGHT_SIDE 2 | |
48 | ||
49 | typedef struct SonicContext { | |
50 | int version; | |
51 | int minor_version; | |
52 | int lossless, decorrelation; | |
53 | ||
54 | int num_taps, downsampling; | |
55 | double quantization; | |
56 | ||
57 | int channels, samplerate, block_align, frame_size; | |
58 | ||
59 | int *tap_quant; | |
60 | int *int_samples; | |
61 | int *coded_samples[MAX_CHANNELS]; | |
62 | ||
63 | // for encoding | |
64 | int *tail; | |
65 | int tail_size; | |
66 | int *window; | |
67 | int window_size; | |
68 | ||
69 | // for decoding | |
70 | int *predictor_k; | |
71 | int *predictor_state[MAX_CHANNELS]; | |
72 | } SonicContext; | |
73 | ||
74 | #define LATTICE_SHIFT 10 | |
75 | #define SAMPLE_SHIFT 4 | |
76 | #define LATTICE_FACTOR (1 << LATTICE_SHIFT) | |
77 | #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) | |
78 | ||
79 | #define BASE_QUANT 0.6 | |
80 | #define RATE_VARIATION 3.0 | |
81 | ||
82 | static inline int shift(int a,int b) | |
83 | { | |
84 | return (a+(1<<(b-1))) >> b; | |
85 | } | |
86 | ||
87 | static inline int shift_down(int a,int b) | |
88 | { | |
89 | return (a>>b)+(a<0); | |
90 | } | |
91 | ||
92 | static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){ | |
93 | int i; | |
94 | ||
95 | #define put_rac(C,S,B) \ | |
96 | do{\ | |
97 | if(rc_stat){\ | |
98 | rc_stat[*(S)][B]++;\ | |
99 | rc_stat2[(S)-state][B]++;\ | |
100 | }\ | |
101 | put_rac(C,S,B);\ | |
102 | }while(0) | |
103 | ||
104 | if(v){ | |
105 | const int a= FFABS(v); | |
106 | const int e= av_log2(a); | |
107 | put_rac(c, state+0, 0); | |
108 | if(e<=9){ | |
109 | for(i=0; i<e; i++){ | |
110 | put_rac(c, state+1+i, 1); //1..10 | |
111 | } | |
112 | put_rac(c, state+1+i, 0); | |
113 | ||
114 | for(i=e-1; i>=0; i--){ | |
115 | put_rac(c, state+22+i, (a>>i)&1); //22..31 | |
116 | } | |
117 | ||
118 | if(is_signed) | |
119 | put_rac(c, state+11 + e, v < 0); //11..21 | |
120 | }else{ | |
121 | for(i=0; i<e; i++){ | |
122 | put_rac(c, state+1+FFMIN(i,9), 1); //1..10 | |
123 | } | |
124 | put_rac(c, state+1+9, 0); | |
125 | ||
126 | for(i=e-1; i>=0; i--){ | |
127 | put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 | |
128 | } | |
129 | ||
130 | if(is_signed) | |
131 | put_rac(c, state+11 + 10, v < 0); //11..21 | |
132 | } | |
133 | }else{ | |
134 | put_rac(c, state+0, 1); | |
135 | } | |
136 | #undef put_rac | |
137 | } | |
138 | ||
139 | static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ | |
140 | if(get_rac(c, state+0)) | |
141 | return 0; | |
142 | else{ | |
143 | int i, e, a; | |
144 | e= 0; | |
145 | while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 | |
146 | e++; | |
147 | } | |
148 | ||
149 | a= 1; | |
150 | for(i=e-1; i>=0; i--){ | |
151 | a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 | |
152 | } | |
153 | ||
154 | e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21 | |
155 | return (a^e)-e; | |
156 | } | |
157 | } | |
158 | ||
159 | #if 1 | |
160 | static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) | |
161 | { | |
162 | int i; | |
163 | ||
164 | for (i = 0; i < entries; i++) | |
165 | put_symbol(c, state, buf[i], 1, NULL, NULL); | |
166 | ||
167 | return 1; | |
168 | } | |
169 | ||
170 | static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) | |
171 | { | |
172 | int i; | |
173 | ||
174 | for (i = 0; i < entries; i++) | |
175 | buf[i] = get_symbol(c, state, 1); | |
176 | ||
177 | return 1; | |
178 | } | |
179 | #elif 1 | |
180 | static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) | |
181 | { | |
182 | int i; | |
183 | ||
184 | for (i = 0; i < entries; i++) | |
185 | set_se_golomb(pb, buf[i]); | |
186 | ||
187 | return 1; | |
188 | } | |
189 | ||
190 | static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) | |
191 | { | |
192 | int i; | |
193 | ||
194 | for (i = 0; i < entries; i++) | |
195 | buf[i] = get_se_golomb(gb); | |
196 | ||
197 | return 1; | |
198 | } | |
199 | ||
200 | #else | |
201 | ||
202 | #define ADAPT_LEVEL 8 | |
203 | ||
204 | static int bits_to_store(uint64_t x) | |
205 | { | |
206 | int res = 0; | |
207 | ||
208 | while(x) | |
209 | { | |
210 | res++; | |
211 | x >>= 1; | |
212 | } | |
213 | return res; | |
214 | } | |
215 | ||
216 | static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) | |
217 | { | |
218 | int i, bits; | |
219 | ||
220 | if (!max) | |
221 | return; | |
222 | ||
223 | bits = bits_to_store(max); | |
224 | ||
225 | for (i = 0; i < bits-1; i++) | |
226 | put_bits(pb, 1, value & (1 << i)); | |
227 | ||
228 | if ( (value | (1 << (bits-1))) <= max) | |
229 | put_bits(pb, 1, value & (1 << (bits-1))); | |
230 | } | |
231 | ||
232 | static unsigned int read_uint_max(GetBitContext *gb, int max) | |
233 | { | |
234 | int i, bits, value = 0; | |
235 | ||
236 | if (!max) | |
237 | return 0; | |
238 | ||
239 | bits = bits_to_store(max); | |
240 | ||
241 | for (i = 0; i < bits-1; i++) | |
242 | if (get_bits1(gb)) | |
243 | value += 1 << i; | |
244 | ||
245 | if ( (value | (1<<(bits-1))) <= max) | |
246 | if (get_bits1(gb)) | |
247 | value += 1 << (bits-1); | |
248 | ||
249 | return value; | |
250 | } | |
251 | ||
252 | static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) | |
253 | { | |
254 | int i, j, x = 0, low_bits = 0, max = 0; | |
255 | int step = 256, pos = 0, dominant = 0, any = 0; | |
256 | int *copy, *bits; | |
257 | ||
258 | copy = av_calloc(entries, sizeof(*copy)); | |
259 | if (!copy) | |
260 | return AVERROR(ENOMEM); | |
261 | ||
262 | if (base_2_part) | |
263 | { | |
264 | int energy = 0; | |
265 | ||
266 | for (i = 0; i < entries; i++) | |
267 | energy += abs(buf[i]); | |
268 | ||
269 | low_bits = bits_to_store(energy / (entries * 2)); | |
270 | if (low_bits > 15) | |
271 | low_bits = 15; | |
272 | ||
273 | put_bits(pb, 4, low_bits); | |
274 | } | |
275 | ||
276 | for (i = 0; i < entries; i++) | |
277 | { | |
278 | put_bits(pb, low_bits, abs(buf[i])); | |
279 | copy[i] = abs(buf[i]) >> low_bits; | |
280 | if (copy[i] > max) | |
281 | max = abs(copy[i]); | |
282 | } | |
283 | ||
284 | bits = av_calloc(entries*max, sizeof(*bits)); | |
285 | if (!bits) | |
286 | { | |
287 | av_free(copy); | |
288 | return AVERROR(ENOMEM); | |
289 | } | |
290 | ||
291 | for (i = 0; i <= max; i++) | |
292 | { | |
293 | for (j = 0; j < entries; j++) | |
294 | if (copy[j] >= i) | |
295 | bits[x++] = copy[j] > i; | |
296 | } | |
297 | ||
298 | // store bitstream | |
299 | while (pos < x) | |
300 | { | |
301 | int steplet = step >> 8; | |
302 | ||
303 | if (pos + steplet > x) | |
304 | steplet = x - pos; | |
305 | ||
306 | for (i = 0; i < steplet; i++) | |
307 | if (bits[i+pos] != dominant) | |
308 | any = 1; | |
309 | ||
310 | put_bits(pb, 1, any); | |
311 | ||
312 | if (!any) | |
313 | { | |
314 | pos += steplet; | |
315 | step += step / ADAPT_LEVEL; | |
316 | } | |
317 | else | |
318 | { | |
319 | int interloper = 0; | |
320 | ||
321 | while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) | |
322 | interloper++; | |
323 | ||
324 | // note change | |
325 | write_uint_max(pb, interloper, (step >> 8) - 1); | |
326 | ||
327 | pos += interloper + 1; | |
328 | step -= step / ADAPT_LEVEL; | |
329 | } | |
330 | ||
331 | if (step < 256) | |
332 | { | |
333 | step = 65536 / step; | |
334 | dominant = !dominant; | |
335 | } | |
336 | } | |
337 | ||
338 | // store signs | |
339 | for (i = 0; i < entries; i++) | |
340 | if (buf[i]) | |
341 | put_bits(pb, 1, buf[i] < 0); | |
342 | ||
343 | av_free(bits); | |
344 | av_free(copy); | |
345 | ||
346 | return 0; | |
347 | } | |
348 | ||
349 | static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) | |
350 | { | |
351 | int i, low_bits = 0, x = 0; | |
352 | int n_zeros = 0, step = 256, dominant = 0; | |
353 | int pos = 0, level = 0; | |
354 | int *bits = av_calloc(entries, sizeof(*bits)); | |
355 | ||
356 | if (!bits) | |
357 | return AVERROR(ENOMEM); | |
358 | ||
359 | if (base_2_part) | |
360 | { | |
361 | low_bits = get_bits(gb, 4); | |
362 | ||
363 | if (low_bits) | |
364 | for (i = 0; i < entries; i++) | |
365 | buf[i] = get_bits(gb, low_bits); | |
366 | } | |
367 | ||
368 | // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); | |
369 | ||
370 | while (n_zeros < entries) | |
371 | { | |
372 | int steplet = step >> 8; | |
373 | ||
374 | if (!get_bits1(gb)) | |
375 | { | |
376 | for (i = 0; i < steplet; i++) | |
377 | bits[x++] = dominant; | |
378 | ||
379 | if (!dominant) | |
380 | n_zeros += steplet; | |
381 | ||
382 | step += step / ADAPT_LEVEL; | |
383 | } | |
384 | else | |
385 | { | |
386 | int actual_run = read_uint_max(gb, steplet-1); | |
387 | ||
388 | // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); | |
389 | ||
390 | for (i = 0; i < actual_run; i++) | |
391 | bits[x++] = dominant; | |
392 | ||
393 | bits[x++] = !dominant; | |
394 | ||
395 | if (!dominant) | |
396 | n_zeros += actual_run; | |
397 | else | |
398 | n_zeros++; | |
399 | ||
400 | step -= step / ADAPT_LEVEL; | |
401 | } | |
402 | ||
403 | if (step < 256) | |
404 | { | |
405 | step = 65536 / step; | |
406 | dominant = !dominant; | |
407 | } | |
408 | } | |
409 | ||
410 | // reconstruct unsigned values | |
411 | n_zeros = 0; | |
412 | for (i = 0; n_zeros < entries; i++) | |
413 | { | |
414 | while(1) | |
415 | { | |
416 | if (pos >= entries) | |
417 | { | |
418 | pos = 0; | |
419 | level += 1 << low_bits; | |
420 | } | |
421 | ||
422 | if (buf[pos] >= level) | |
423 | break; | |
424 | ||
425 | pos++; | |
426 | } | |
427 | ||
428 | if (bits[i]) | |
429 | buf[pos] += 1 << low_bits; | |
430 | else | |
431 | n_zeros++; | |
432 | ||
433 | pos++; | |
434 | } | |
435 | av_free(bits); | |
436 | ||
437 | // read signs | |
438 | for (i = 0; i < entries; i++) | |
439 | if (buf[i] && get_bits1(gb)) | |
440 | buf[i] = -buf[i]; | |
441 | ||
442 | // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); | |
443 | ||
444 | return 0; | |
445 | } | |
446 | #endif | |
447 | ||
448 | static void predictor_init_state(int *k, int *state, int order) | |
449 | { | |
450 | int i; | |
451 | ||
452 | for (i = order-2; i >= 0; i--) | |
453 | { | |
454 | int j, p, x = state[i]; | |
455 | ||
456 | for (j = 0, p = i+1; p < order; j++,p++) | |
457 | { | |
458 | int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); | |
459 | state[p] += shift_down(k[j]*x, LATTICE_SHIFT); | |
460 | x = tmp; | |
461 | } | |
462 | } | |
463 | } | |
464 | ||
465 | static int predictor_calc_error(int *k, int *state, int order, int error) | |
466 | { | |
467 | int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); | |
468 | ||
469 | #if 1 | |
470 | int *k_ptr = &(k[order-2]), | |
471 | *state_ptr = &(state[order-2]); | |
472 | for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) | |
473 | { | |
474 | int k_value = *k_ptr, state_value = *state_ptr; | |
475 | x -= shift_down(k_value * state_value, LATTICE_SHIFT); | |
476 | state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); | |
477 | } | |
478 | #else | |
479 | for (i = order-2; i >= 0; i--) | |
480 | { | |
481 | x -= shift_down(k[i] * state[i], LATTICE_SHIFT); | |
482 | state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); | |
483 | } | |
484 | #endif | |
485 | ||
486 | // don't drift too far, to avoid overflows | |
487 | if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); | |
488 | if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); | |
489 | ||
490 | state[0] = x; | |
491 | ||
492 | return x; | |
493 | } | |
494 | ||
495 | #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER | |
496 | // Heavily modified Levinson-Durbin algorithm which | |
497 | // copes better with quantization, and calculates the | |
498 | // actual whitened result as it goes. | |
499 | ||
500 | static void modified_levinson_durbin(int *window, int window_entries, | |
501 | int *out, int out_entries, int channels, int *tap_quant) | |
502 | { | |
503 | int i; | |
504 | int *state = av_calloc(window_entries, sizeof(*state)); | |
505 | ||
506 | memcpy(state, window, 4* window_entries); | |
507 | ||
508 | for (i = 0; i < out_entries; i++) | |
509 | { | |
510 | int step = (i+1)*channels, k, j; | |
511 | double xx = 0.0, xy = 0.0; | |
512 | #if 1 | |
513 | int *x_ptr = &(window[step]); | |
514 | int *state_ptr = &(state[0]); | |
515 | j = window_entries - step; | |
516 | for (;j>0;j--,x_ptr++,state_ptr++) | |
517 | { | |
518 | double x_value = *x_ptr; | |
519 | double state_value = *state_ptr; | |
520 | xx += state_value*state_value; | |
521 | xy += x_value*state_value; | |
522 | } | |
523 | #else | |
524 | for (j = 0; j <= (window_entries - step); j++); | |
525 | { | |
526 | double stepval = window[step+j]; | |
527 | double stateval = window[j]; | |
528 | // xx += (double)window[j]*(double)window[j]; | |
529 | // xy += (double)window[step+j]*(double)window[j]; | |
530 | xx += stateval*stateval; | |
531 | xy += stepval*stateval; | |
532 | } | |
533 | #endif | |
534 | if (xx == 0.0) | |
535 | k = 0; | |
536 | else | |
537 | k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); | |
538 | ||
539 | if (k > (LATTICE_FACTOR/tap_quant[i])) | |
540 | k = LATTICE_FACTOR/tap_quant[i]; | |
541 | if (-k > (LATTICE_FACTOR/tap_quant[i])) | |
542 | k = -(LATTICE_FACTOR/tap_quant[i]); | |
543 | ||
544 | out[i] = k; | |
545 | k *= tap_quant[i]; | |
546 | ||
547 | #if 1 | |
548 | x_ptr = &(window[step]); | |
549 | state_ptr = &(state[0]); | |
550 | j = window_entries - step; | |
551 | for (;j>0;j--,x_ptr++,state_ptr++) | |
552 | { | |
553 | int x_value = *x_ptr; | |
554 | int state_value = *state_ptr; | |
555 | *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); | |
556 | *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); | |
557 | } | |
558 | #else | |
559 | for (j=0; j <= (window_entries - step); j++) | |
560 | { | |
561 | int stepval = window[step+j]; | |
562 | int stateval=state[j]; | |
563 | window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); | |
564 | state[j] += shift_down(k * stepval, LATTICE_SHIFT); | |
565 | } | |
566 | #endif | |
567 | } | |
568 | ||
569 | av_free(state); | |
570 | } | |
571 | ||
572 | static inline int code_samplerate(int samplerate) | |
573 | { | |
574 | switch (samplerate) | |
575 | { | |
576 | case 44100: return 0; | |
577 | case 22050: return 1; | |
578 | case 11025: return 2; | |
579 | case 96000: return 3; | |
580 | case 48000: return 4; | |
581 | case 32000: return 5; | |
582 | case 24000: return 6; | |
583 | case 16000: return 7; | |
584 | case 8000: return 8; | |
585 | } | |
586 | return AVERROR(EINVAL); | |
587 | } | |
588 | ||
589 | static av_cold int sonic_encode_init(AVCodecContext *avctx) | |
590 | { | |
591 | SonicContext *s = avctx->priv_data; | |
592 | PutBitContext pb; | |
593 | int i; | |
594 | ||
595 | s->version = 2; | |
596 | ||
597 | if (avctx->channels > MAX_CHANNELS) | |
598 | { | |
599 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); | |
600 | return AVERROR(EINVAL); /* only stereo or mono for now */ | |
601 | } | |
602 | ||
603 | if (avctx->channels == 2) | |
604 | s->decorrelation = MID_SIDE; | |
605 | else | |
606 | s->decorrelation = 3; | |
607 | ||
608 | if (avctx->codec->id == AV_CODEC_ID_SONIC_LS) | |
609 | { | |
610 | s->lossless = 1; | |
611 | s->num_taps = 32; | |
612 | s->downsampling = 1; | |
613 | s->quantization = 0.0; | |
614 | } | |
615 | else | |
616 | { | |
617 | s->num_taps = 128; | |
618 | s->downsampling = 2; | |
619 | s->quantization = 1.0; | |
620 | } | |
621 | ||
622 | // max tap 2048 | |
623 | if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) { | |
624 | av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); | |
625 | return AVERROR_INVALIDDATA; | |
626 | } | |
627 | ||
628 | // generate taps | |
629 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); | |
630 | for (i = 0; i < s->num_taps; i++) | |
631 | s->tap_quant[i] = ff_sqrt(i+1); | |
632 | ||
633 | s->channels = avctx->channels; | |
634 | s->samplerate = avctx->sample_rate; | |
635 | ||
636 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); | |
637 | s->frame_size = s->channels*s->block_align*s->downsampling; | |
638 | ||
639 | s->tail_size = s->num_taps*s->channels; | |
640 | s->tail = av_calloc(s->tail_size, sizeof(*s->tail)); | |
641 | if (!s->tail) | |
642 | return AVERROR(ENOMEM); | |
643 | ||
644 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) ); | |
645 | if (!s->predictor_k) | |
646 | return AVERROR(ENOMEM); | |
647 | ||
648 | for (i = 0; i < s->channels; i++) | |
649 | { | |
650 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); | |
651 | if (!s->coded_samples[i]) | |
652 | return AVERROR(ENOMEM); | |
653 | } | |
654 | ||
655 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); | |
656 | ||
657 | s->window_size = ((2*s->tail_size)+s->frame_size); | |
658 | s->window = av_calloc(s->window_size, sizeof(*s->window)); | |
659 | if (!s->window) | |
660 | return AVERROR(ENOMEM); | |
661 | ||
662 | avctx->extradata = av_mallocz(16); | |
663 | if (!avctx->extradata) | |
664 | return AVERROR(ENOMEM); | |
665 | init_put_bits(&pb, avctx->extradata, 16*8); | |
666 | ||
667 | put_bits(&pb, 2, s->version); // version | |
668 | if (s->version >= 1) | |
669 | { | |
670 | if (s->version >= 2) { | |
671 | put_bits(&pb, 8, s->version); | |
672 | put_bits(&pb, 8, s->minor_version); | |
673 | } | |
674 | put_bits(&pb, 2, s->channels); | |
675 | put_bits(&pb, 4, code_samplerate(s->samplerate)); | |
676 | } | |
677 | put_bits(&pb, 1, s->lossless); | |
678 | if (!s->lossless) | |
679 | put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision | |
680 | put_bits(&pb, 2, s->decorrelation); | |
681 | put_bits(&pb, 2, s->downsampling); | |
682 | put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 | |
683 | put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table | |
684 | ||
685 | flush_put_bits(&pb); | |
686 | avctx->extradata_size = put_bits_count(&pb)/8; | |
687 | ||
688 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", | |
689 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); | |
690 | ||
691 | avctx->frame_size = s->block_align*s->downsampling; | |
692 | ||
693 | return 0; | |
694 | } | |
695 | ||
696 | static av_cold int sonic_encode_close(AVCodecContext *avctx) | |
697 | { | |
698 | SonicContext *s = avctx->priv_data; | |
699 | int i; | |
700 | ||
701 | for (i = 0; i < s->channels; i++) | |
702 | av_freep(&s->coded_samples[i]); | |
703 | ||
704 | av_freep(&s->predictor_k); | |
705 | av_freep(&s->tail); | |
706 | av_freep(&s->tap_quant); | |
707 | av_freep(&s->window); | |
708 | av_freep(&s->int_samples); | |
709 | ||
710 | return 0; | |
711 | } | |
712 | ||
713 | static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, | |
714 | const AVFrame *frame, int *got_packet_ptr) | |
715 | { | |
716 | SonicContext *s = avctx->priv_data; | |
717 | RangeCoder c; | |
718 | int i, j, ch, quant = 0, x = 0; | |
719 | int ret; | |
720 | const short *samples = (const int16_t*)frame->data[0]; | |
721 | uint8_t state[32]; | |
722 | ||
723 | if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0) | |
724 | return ret; | |
725 | ||
726 | ff_init_range_encoder(&c, avpkt->data, avpkt->size); | |
727 | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); | |
728 | memset(state, 128, sizeof(state)); | |
729 | ||
730 | // short -> internal | |
731 | for (i = 0; i < s->frame_size; i++) | |
732 | s->int_samples[i] = samples[i]; | |
733 | ||
734 | if (!s->lossless) | |
735 | for (i = 0; i < s->frame_size; i++) | |
736 | s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; | |
737 | ||
738 | switch(s->decorrelation) | |
739 | { | |
740 | case MID_SIDE: | |
741 | for (i = 0; i < s->frame_size; i += s->channels) | |
742 | { | |
743 | s->int_samples[i] += s->int_samples[i+1]; | |
744 | s->int_samples[i+1] -= shift(s->int_samples[i], 1); | |
745 | } | |
746 | break; | |
747 | case LEFT_SIDE: | |
748 | for (i = 0; i < s->frame_size; i += s->channels) | |
749 | s->int_samples[i+1] -= s->int_samples[i]; | |
750 | break; | |
751 | case RIGHT_SIDE: | |
752 | for (i = 0; i < s->frame_size; i += s->channels) | |
753 | s->int_samples[i] -= s->int_samples[i+1]; | |
754 | break; | |
755 | } | |
756 | ||
757 | memset(s->window, 0, 4* s->window_size); | |
758 | ||
759 | for (i = 0; i < s->tail_size; i++) | |
760 | s->window[x++] = s->tail[i]; | |
761 | ||
762 | for (i = 0; i < s->frame_size; i++) | |
763 | s->window[x++] = s->int_samples[i]; | |
764 | ||
765 | for (i = 0; i < s->tail_size; i++) | |
766 | s->window[x++] = 0; | |
767 | ||
768 | for (i = 0; i < s->tail_size; i++) | |
769 | s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; | |
770 | ||
771 | // generate taps | |
772 | modified_levinson_durbin(s->window, s->window_size, | |
773 | s->predictor_k, s->num_taps, s->channels, s->tap_quant); | |
774 | if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0) | |
775 | return ret; | |
776 | ||
777 | for (ch = 0; ch < s->channels; ch++) | |
778 | { | |
779 | x = s->tail_size+ch; | |
780 | for (i = 0; i < s->block_align; i++) | |
781 | { | |
782 | int sum = 0; | |
783 | for (j = 0; j < s->downsampling; j++, x += s->channels) | |
784 | sum += s->window[x]; | |
785 | s->coded_samples[ch][i] = sum; | |
786 | } | |
787 | } | |
788 | ||
789 | // simple rate control code | |
790 | if (!s->lossless) | |
791 | { | |
792 | double energy1 = 0.0, energy2 = 0.0; | |
793 | for (ch = 0; ch < s->channels; ch++) | |
794 | { | |
795 | for (i = 0; i < s->block_align; i++) | |
796 | { | |
797 | double sample = s->coded_samples[ch][i]; | |
798 | energy2 += sample*sample; | |
799 | energy1 += fabs(sample); | |
800 | } | |
801 | } | |
802 | ||
803 | energy2 = sqrt(energy2/(s->channels*s->block_align)); | |
804 | energy1 = M_SQRT2*energy1/(s->channels*s->block_align); | |
805 | ||
806 | // increase bitrate when samples are like a gaussian distribution | |
807 | // reduce bitrate when samples are like a two-tailed exponential distribution | |
808 | ||
809 | if (energy2 > energy1) | |
810 | energy2 += (energy2-energy1)*RATE_VARIATION; | |
811 | ||
812 | quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); | |
813 | // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); | |
814 | ||
815 | quant = av_clip(quant, 1, 65534); | |
816 | ||
817 | put_symbol(&c, state, quant, 0, NULL, NULL); | |
818 | ||
819 | quant *= SAMPLE_FACTOR; | |
820 | } | |
821 | ||
822 | // write out coded samples | |
823 | for (ch = 0; ch < s->channels; ch++) | |
824 | { | |
825 | if (!s->lossless) | |
826 | for (i = 0; i < s->block_align; i++) | |
827 | s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant); | |
828 | ||
829 | if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0) | |
830 | return ret; | |
831 | } | |
832 | ||
833 | // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); | |
834 | ||
835 | avpkt->size = ff_rac_terminate(&c); | |
836 | *got_packet_ptr = 1; | |
837 | return 0; | |
838 | ||
839 | } | |
840 | #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ | |
841 | ||
842 | #if CONFIG_SONIC_DECODER | |
843 | static const int samplerate_table[] = | |
844 | { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; | |
845 | ||
846 | static av_cold int sonic_decode_init(AVCodecContext *avctx) | |
847 | { | |
848 | SonicContext *s = avctx->priv_data; | |
849 | GetBitContext gb; | |
850 | int i; | |
851 | ||
852 | s->channels = avctx->channels; | |
853 | s->samplerate = avctx->sample_rate; | |
854 | ||
855 | if (!avctx->extradata) | |
856 | { | |
857 | av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); | |
858 | return AVERROR_INVALIDDATA; | |
859 | } | |
860 | ||
861 | init_get_bits8(&gb, avctx->extradata, avctx->extradata_size); | |
862 | ||
863 | s->version = get_bits(&gb, 2); | |
864 | if (s->version >= 2) { | |
865 | s->version = get_bits(&gb, 8); | |
866 | s->minor_version = get_bits(&gb, 8); | |
867 | } | |
868 | if (s->version != 2) | |
869 | { | |
870 | av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); | |
871 | return AVERROR_INVALIDDATA; | |
872 | } | |
873 | ||
874 | if (s->version >= 1) | |
875 | { | |
876 | s->channels = get_bits(&gb, 2); | |
877 | s->samplerate = samplerate_table[get_bits(&gb, 4)]; | |
878 | av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", | |
879 | s->channels, s->samplerate); | |
880 | } | |
881 | ||
882 | if (s->channels > MAX_CHANNELS) | |
883 | { | |
884 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); | |
885 | return AVERROR_INVALIDDATA; | |
886 | } | |
887 | ||
888 | s->lossless = get_bits1(&gb); | |
889 | if (!s->lossless) | |
890 | skip_bits(&gb, 3); // XXX FIXME | |
891 | s->decorrelation = get_bits(&gb, 2); | |
892 | if (s->decorrelation != 3 && s->channels != 2) { | |
893 | av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation); | |
894 | return AVERROR_INVALIDDATA; | |
895 | } | |
896 | ||
897 | s->downsampling = get_bits(&gb, 2); | |
898 | if (!s->downsampling) { | |
899 | av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n"); | |
900 | return AVERROR_INVALIDDATA; | |
901 | } | |
902 | ||
903 | s->num_taps = (get_bits(&gb, 5)+1)<<5; | |
904 | if (get_bits1(&gb)) // XXX FIXME | |
905 | av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); | |
906 | ||
907 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); | |
908 | s->frame_size = s->channels*s->block_align*s->downsampling; | |
909 | // avctx->frame_size = s->block_align; | |
910 | ||
911 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", | |
912 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); | |
913 | ||
914 | // generate taps | |
915 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); | |
916 | for (i = 0; i < s->num_taps; i++) | |
917 | s->tap_quant[i] = ff_sqrt(i+1); | |
918 | ||
919 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k)); | |
920 | ||
921 | for (i = 0; i < s->channels; i++) | |
922 | { | |
923 | s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state)); | |
924 | if (!s->predictor_state[i]) | |
925 | return AVERROR(ENOMEM); | |
926 | } | |
927 | ||
928 | for (i = 0; i < s->channels; i++) | |
929 | { | |
930 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); | |
931 | if (!s->coded_samples[i]) | |
932 | return AVERROR(ENOMEM); | |
933 | } | |
934 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); | |
935 | ||
936 | avctx->sample_fmt = AV_SAMPLE_FMT_S16; | |
937 | return 0; | |
938 | } | |
939 | ||
940 | static av_cold int sonic_decode_close(AVCodecContext *avctx) | |
941 | { | |
942 | SonicContext *s = avctx->priv_data; | |
943 | int i; | |
944 | ||
945 | av_freep(&s->int_samples); | |
946 | av_freep(&s->tap_quant); | |
947 | av_freep(&s->predictor_k); | |
948 | ||
949 | for (i = 0; i < s->channels; i++) | |
950 | { | |
951 | av_freep(&s->predictor_state[i]); | |
952 | av_freep(&s->coded_samples[i]); | |
953 | } | |
954 | ||
955 | return 0; | |
956 | } | |
957 | ||
958 | static int sonic_decode_frame(AVCodecContext *avctx, | |
959 | void *data, int *got_frame_ptr, | |
960 | AVPacket *avpkt) | |
961 | { | |
962 | const uint8_t *buf = avpkt->data; | |
963 | int buf_size = avpkt->size; | |
964 | SonicContext *s = avctx->priv_data; | |
965 | RangeCoder c; | |
966 | uint8_t state[32]; | |
967 | int i, quant, ch, j, ret; | |
968 | int16_t *samples; | |
969 | AVFrame *frame = data; | |
970 | ||
971 | if (buf_size == 0) return 0; | |
972 | ||
973 | frame->nb_samples = s->frame_size / avctx->channels; | |
974 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) | |
975 | return ret; | |
976 | samples = (int16_t *)frame->data[0]; | |
977 | ||
978 | // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); | |
979 | ||
980 | memset(state, 128, sizeof(state)); | |
981 | ff_init_range_decoder(&c, buf, buf_size); | |
982 | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); | |
983 | ||
984 | intlist_read(&c, state, s->predictor_k, s->num_taps, 0); | |
985 | ||
986 | // dequantize | |
987 | for (i = 0; i < s->num_taps; i++) | |
988 | s->predictor_k[i] *= s->tap_quant[i]; | |
989 | ||
990 | if (s->lossless) | |
991 | quant = 1; | |
992 | else | |
993 | quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR; | |
994 | ||
995 | // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); | |
996 | ||
997 | for (ch = 0; ch < s->channels; ch++) | |
998 | { | |
999 | int x = ch; | |
1000 | ||
1001 | predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); | |
1002 | ||
1003 | intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1); | |
1004 | ||
1005 | for (i = 0; i < s->block_align; i++) | |
1006 | { | |
1007 | for (j = 0; j < s->downsampling - 1; j++) | |
1008 | { | |
1009 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); | |
1010 | x += s->channels; | |
1011 | } | |
1012 | ||
1013 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); | |
1014 | x += s->channels; | |
1015 | } | |
1016 | ||
1017 | for (i = 0; i < s->num_taps; i++) | |
1018 | s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; | |
1019 | } | |
1020 | ||
1021 | switch(s->decorrelation) | |
1022 | { | |
1023 | case MID_SIDE: | |
1024 | for (i = 0; i < s->frame_size; i += s->channels) | |
1025 | { | |
1026 | s->int_samples[i+1] += shift(s->int_samples[i], 1); | |
1027 | s->int_samples[i] -= s->int_samples[i+1]; | |
1028 | } | |
1029 | break; | |
1030 | case LEFT_SIDE: | |
1031 | for (i = 0; i < s->frame_size; i += s->channels) | |
1032 | s->int_samples[i+1] += s->int_samples[i]; | |
1033 | break; | |
1034 | case RIGHT_SIDE: | |
1035 | for (i = 0; i < s->frame_size; i += s->channels) | |
1036 | s->int_samples[i] += s->int_samples[i+1]; | |
1037 | break; | |
1038 | } | |
1039 | ||
1040 | if (!s->lossless) | |
1041 | for (i = 0; i < s->frame_size; i++) | |
1042 | s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); | |
1043 | ||
1044 | // internal -> short | |
1045 | for (i = 0; i < s->frame_size; i++) | |
1046 | samples[i] = av_clip_int16(s->int_samples[i]); | |
1047 | ||
1048 | *got_frame_ptr = 1; | |
1049 | ||
1050 | return buf_size; | |
1051 | } | |
1052 | ||
1053 | AVCodec ff_sonic_decoder = { | |
1054 | .name = "sonic", | |
1055 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), | |
1056 | .type = AVMEDIA_TYPE_AUDIO, | |
1057 | .id = AV_CODEC_ID_SONIC, | |
1058 | .priv_data_size = sizeof(SonicContext), | |
1059 | .init = sonic_decode_init, | |
1060 | .close = sonic_decode_close, | |
1061 | .decode = sonic_decode_frame, | |
1062 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL, | |
1063 | }; | |
1064 | #endif /* CONFIG_SONIC_DECODER */ | |
1065 | ||
1066 | #if CONFIG_SONIC_ENCODER | |
1067 | AVCodec ff_sonic_encoder = { | |
1068 | .name = "sonic", | |
1069 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), | |
1070 | .type = AVMEDIA_TYPE_AUDIO, | |
1071 | .id = AV_CODEC_ID_SONIC, | |
1072 | .priv_data_size = sizeof(SonicContext), | |
1073 | .init = sonic_encode_init, | |
1074 | .encode2 = sonic_encode_frame, | |
1075 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, | |
1076 | .capabilities = CODEC_CAP_EXPERIMENTAL, | |
1077 | .close = sonic_encode_close, | |
1078 | }; | |
1079 | #endif | |
1080 | ||
1081 | #if CONFIG_SONIC_LS_ENCODER | |
1082 | AVCodec ff_sonic_ls_encoder = { | |
1083 | .name = "sonicls", | |
1084 | .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), | |
1085 | .type = AVMEDIA_TYPE_AUDIO, | |
1086 | .id = AV_CODEC_ID_SONIC_LS, | |
1087 | .priv_data_size = sizeof(SonicContext), | |
1088 | .init = sonic_encode_init, | |
1089 | .encode2 = sonic_encode_frame, | |
1090 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, | |
1091 | .capabilities = CODEC_CAP_EXPERIMENTAL, | |
1092 | .close = sonic_encode_close, | |
1093 | }; | |
1094 | #endif |