3 * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "alac_data.h"
29 #define DEFAULT_FRAME_SIZE 4096
30 #define ALAC_EXTRADATA_SIZE 36
31 #define ALAC_FRAME_HEADER_SIZE 55
32 #define ALAC_FRAME_FOOTER_SIZE 3
34 #define ALAC_ESCAPE_CODE 0x1FF
35 #define ALAC_MAX_LPC_ORDER 30
36 #define DEFAULT_MAX_PRED_ORDER 6
37 #define DEFAULT_MIN_PRED_ORDER 4
38 #define ALAC_MAX_LPC_PRECISION 9
39 #define ALAC_MAX_LPC_SHIFT 9
41 #define ALAC_CHMODE_LEFT_RIGHT 0
42 #define ALAC_CHMODE_LEFT_SIDE 1
43 #define ALAC_CHMODE_RIGHT_SIDE 2
44 #define ALAC_CHMODE_MID_SIDE 3
46 typedef struct RiceContext
{
53 typedef struct AlacLPCContext
{
55 int lpc_coeff
[ALAC_MAX_LPC_ORDER
+1];
59 typedef struct AlacEncodeContext
{
60 int frame_size
; /**< current frame size */
61 int verbatim
; /**< current frame verbatim mode flag */
62 int compression_level
;
63 int min_prediction_order
;
64 int max_prediction_order
;
65 int max_coded_frame_size
;
66 int write_sample_size
;
68 int32_t sample_buf
[2][DEFAULT_FRAME_SIZE
];
69 int32_t predictor_buf
[2][DEFAULT_FRAME_SIZE
];
70 int interlacing_shift
;
71 int interlacing_leftweight
;
74 AlacLPCContext lpc
[2];
76 AVCodecContext
*avctx
;
80 static void init_sample_buffers(AlacEncodeContext
*s
, int channels
,
81 uint8_t const *samples
[2])
84 int shift
= av_get_bytes_per_sample(s
->avctx
->sample_fmt
) * 8 -
85 s
->avctx
->bits_per_raw_sample
;
87 #define COPY_SAMPLES(type) do { \
88 for (ch = 0; ch < channels; ch++) { \
89 int32_t *bptr = s->sample_buf[ch]; \
90 const type *sptr = (const type *)samples[ch]; \
91 for (i = 0; i < s->frame_size; i++) \
92 bptr[i] = sptr[i] >> shift; \
96 if (s
->avctx
->sample_fmt
== AV_SAMPLE_FMT_S32P
)
97 COPY_SAMPLES(int32_t);
99 COPY_SAMPLES(int16_t);
102 static void encode_scalar(AlacEncodeContext
*s
, int x
,
103 int k
, int write_sample_size
)
107 k
= FFMIN(k
, s
->rc
.k_modifier
);
108 divisor
= (1<<k
) - 1;
113 // write escape code and sample value directly
114 put_bits(&s
->pbctx
, 9, ALAC_ESCAPE_CODE
);
115 put_bits(&s
->pbctx
, write_sample_size
, x
);
118 put_bits(&s
->pbctx
, q
, (1<<q
) - 1);
119 put_bits(&s
->pbctx
, 1, 0);
123 put_bits(&s
->pbctx
, k
, r
+1);
125 put_bits(&s
->pbctx
, k
-1, 0);
130 static void write_element_header(AlacEncodeContext
*s
,
131 enum AlacRawDataBlockType element
,
136 if (s
->frame_size
< DEFAULT_FRAME_SIZE
)
139 put_bits(&s
->pbctx
, 3, element
); // element type
140 put_bits(&s
->pbctx
, 4, instance
); // element instance
141 put_bits(&s
->pbctx
, 12, 0); // unused header bits
142 put_bits(&s
->pbctx
, 1, encode_fs
); // Sample count is in the header
143 put_bits(&s
->pbctx
, 2, s
->extra_bits
>> 3); // Extra bytes (for 24-bit)
144 put_bits(&s
->pbctx
, 1, s
->verbatim
); // Audio block is verbatim
146 put_bits32(&s
->pbctx
, s
->frame_size
); // No. of samples in the frame
149 static void calc_predictor_params(AlacEncodeContext
*s
, int ch
)
151 int32_t coefs
[MAX_LPC_ORDER
][MAX_LPC_ORDER
];
152 int shift
[MAX_LPC_ORDER
];
155 if (s
->compression_level
== 1) {
156 s
->lpc
[ch
].lpc_order
= 6;
157 s
->lpc
[ch
].lpc_quant
= 6;
158 s
->lpc
[ch
].lpc_coeff
[0] = 160;
159 s
->lpc
[ch
].lpc_coeff
[1] = -190;
160 s
->lpc
[ch
].lpc_coeff
[2] = 170;
161 s
->lpc
[ch
].lpc_coeff
[3] = -130;
162 s
->lpc
[ch
].lpc_coeff
[4] = 80;
163 s
->lpc
[ch
].lpc_coeff
[5] = -25;
165 opt_order
= ff_lpc_calc_coefs(&s
->lpc_ctx
, s
->sample_buf
[ch
],
167 s
->min_prediction_order
,
168 s
->max_prediction_order
,
169 ALAC_MAX_LPC_PRECISION
, coefs
, shift
,
170 FF_LPC_TYPE_LEVINSON
, 0,
171 ORDER_METHOD_EST
, ALAC_MAX_LPC_SHIFT
, 1);
173 s
->lpc
[ch
].lpc_order
= opt_order
;
174 s
->lpc
[ch
].lpc_quant
= shift
[opt_order
-1];
175 memcpy(s
->lpc
[ch
].lpc_coeff
, coefs
[opt_order
-1], opt_order
*sizeof(int));
179 static int estimate_stereo_mode(int32_t *left_ch
, int32_t *right_ch
, int n
)
186 /* calculate sum of 2nd order residual for each channel */
187 sum
[0] = sum
[1] = sum
[2] = sum
[3] = 0;
188 for (i
= 2; i
< n
; i
++) {
189 lt
= left_ch
[i
] - 2 * left_ch
[i
- 1] + left_ch
[i
- 2];
190 rt
= right_ch
[i
] - 2 * right_ch
[i
- 1] + right_ch
[i
- 2];
191 sum
[2] += FFABS((lt
+ rt
) >> 1);
192 sum
[3] += FFABS(lt
- rt
);
197 /* calculate score for each mode */
198 score
[0] = sum
[0] + sum
[1];
199 score
[1] = sum
[0] + sum
[3];
200 score
[2] = sum
[1] + sum
[3];
201 score
[3] = sum
[2] + sum
[3];
203 /* return mode with lowest score */
205 for (i
= 1; i
< 4; i
++) {
206 if (score
[i
] < score
[best
])
212 static void alac_stereo_decorrelation(AlacEncodeContext
*s
)
214 int32_t *left
= s
->sample_buf
[0], *right
= s
->sample_buf
[1];
215 int i
, mode
, n
= s
->frame_size
;
218 mode
= estimate_stereo_mode(left
, right
, n
);
221 case ALAC_CHMODE_LEFT_RIGHT
:
222 s
->interlacing_leftweight
= 0;
223 s
->interlacing_shift
= 0;
225 case ALAC_CHMODE_LEFT_SIDE
:
226 for (i
= 0; i
< n
; i
++)
227 right
[i
] = left
[i
] - right
[i
];
228 s
->interlacing_leftweight
= 1;
229 s
->interlacing_shift
= 0;
231 case ALAC_CHMODE_RIGHT_SIDE
:
232 for (i
= 0; i
< n
; i
++) {
234 right
[i
] = left
[i
] - right
[i
];
235 left
[i
] = tmp
+ (right
[i
] >> 31);
237 s
->interlacing_leftweight
= 1;
238 s
->interlacing_shift
= 31;
241 for (i
= 0; i
< n
; i
++) {
243 left
[i
] = (tmp
+ right
[i
]) >> 1;
244 right
[i
] = tmp
- right
[i
];
246 s
->interlacing_leftweight
= 1;
247 s
->interlacing_shift
= 1;
252 static void alac_linear_predictor(AlacEncodeContext
*s
, int ch
)
255 AlacLPCContext lpc
= s
->lpc
[ch
];
256 int32_t *residual
= s
->predictor_buf
[ch
];
258 if (lpc
.lpc_order
== 31) {
259 residual
[0] = s
->sample_buf
[ch
][0];
261 for (i
= 1; i
< s
->frame_size
; i
++) {
262 residual
[i
] = s
->sample_buf
[ch
][i
] -
263 s
->sample_buf
[ch
][i
- 1];
269 // generalised linear predictor
271 if (lpc
.lpc_order
> 0) {
272 int32_t *samples
= s
->sample_buf
[ch
];
274 // generate warm-up samples
275 residual
[0] = samples
[0];
276 for (i
= 1; i
<= lpc
.lpc_order
; i
++)
277 residual
[i
] = sign_extend(samples
[i
] - samples
[i
-1], s
->write_sample_size
);
279 // perform lpc on remaining samples
280 for (i
= lpc
.lpc_order
+ 1; i
< s
->frame_size
; i
++) {
281 int sum
= 1 << (lpc
.lpc_quant
- 1), res_val
, j
;
283 for (j
= 0; j
< lpc
.lpc_order
; j
++) {
284 sum
+= (samples
[lpc
.lpc_order
-j
] - samples
[0]) *
288 sum
>>= lpc
.lpc_quant
;
290 residual
[i
] = sign_extend(samples
[lpc
.lpc_order
+1] - sum
,
291 s
->write_sample_size
);
292 res_val
= residual
[i
];
295 int index
= lpc
.lpc_order
- 1;
296 int neg
= (res_val
< 0);
298 while (index
>= 0 && (neg
? (res_val
< 0) : (res_val
> 0))) {
299 int val
= samples
[0] - samples
[lpc
.lpc_order
- index
];
300 int sign
= (val
? FFSIGN(val
) : 0);
305 lpc
.lpc_coeff
[index
] -= sign
;
307 res_val
-= (val
>> lpc
.lpc_quant
) * (lpc
.lpc_order
- index
);
316 static void alac_entropy_coder(AlacEncodeContext
*s
, int ch
)
318 unsigned int history
= s
->rc
.initial_history
;
319 int sign_modifier
= 0, i
, k
;
320 int32_t *samples
= s
->predictor_buf
[ch
];
322 for (i
= 0; i
< s
->frame_size
;) {
325 k
= av_log2((history
>> 9) + 3);
327 x
= -2 * (*samples
) -1;
333 encode_scalar(s
, x
- sign_modifier
, k
, s
->write_sample_size
);
335 history
+= x
* s
->rc
.history_mult
-
336 ((history
* s
->rc
.history_mult
) >> 9);
342 if (history
< 128 && i
< s
->frame_size
) {
343 unsigned int block_size
= 0;
345 k
= 7 - av_log2(history
) + ((history
+ 16) >> 6);
347 while (*samples
== 0 && i
< s
->frame_size
) {
352 encode_scalar(s
, block_size
, k
, 16);
353 sign_modifier
= (block_size
<= 0xFFFF);
360 static void write_element(AlacEncodeContext
*s
,
361 enum AlacRawDataBlockType element
, int instance
,
362 const uint8_t *samples0
, const uint8_t *samples1
)
364 uint8_t const *samples
[2] = { samples0
, samples1
};
366 int prediction_type
= 0;
367 PutBitContext
*pb
= &s
->pbctx
;
369 channels
= element
== TYPE_CPE
? 2 : 1;
372 write_element_header(s
, element
, instance
);
373 /* samples are channel-interleaved in verbatim mode */
374 if (s
->avctx
->sample_fmt
== AV_SAMPLE_FMT_S32P
) {
375 int shift
= 32 - s
->avctx
->bits_per_raw_sample
;
376 int32_t const *samples_s32
[2] = { (const int32_t *)samples0
,
377 (const int32_t *)samples1
};
378 for (i
= 0; i
< s
->frame_size
; i
++)
379 for (j
= 0; j
< channels
; j
++)
380 put_sbits(pb
, s
->avctx
->bits_per_raw_sample
,
381 samples_s32
[j
][i
] >> shift
);
383 int16_t const *samples_s16
[2] = { (const int16_t *)samples0
,
384 (const int16_t *)samples1
};
385 for (i
= 0; i
< s
->frame_size
; i
++)
386 for (j
= 0; j
< channels
; j
++)
387 put_sbits(pb
, s
->avctx
->bits_per_raw_sample
,
391 s
->write_sample_size
= s
->avctx
->bits_per_raw_sample
- s
->extra_bits
+
394 init_sample_buffers(s
, channels
, samples
);
395 write_element_header(s
, element
, instance
);
397 // extract extra bits if needed
399 uint32_t mask
= (1 << s
->extra_bits
) - 1;
400 for (j
= 0; j
< channels
; j
++) {
401 int32_t *extra
= s
->predictor_buf
[j
];
402 int32_t *smp
= s
->sample_buf
[j
];
403 for (i
= 0; i
< s
->frame_size
; i
++) {
404 extra
[i
] = smp
[i
] & mask
;
405 smp
[i
] >>= s
->extra_bits
;
411 alac_stereo_decorrelation(s
);
413 s
->interlacing_shift
= s
->interlacing_leftweight
= 0;
414 put_bits(pb
, 8, s
->interlacing_shift
);
415 put_bits(pb
, 8, s
->interlacing_leftweight
);
417 for (i
= 0; i
< channels
; i
++) {
418 calc_predictor_params(s
, i
);
420 put_bits(pb
, 4, prediction_type
);
421 put_bits(pb
, 4, s
->lpc
[i
].lpc_quant
);
423 put_bits(pb
, 3, s
->rc
.rice_modifier
);
424 put_bits(pb
, 5, s
->lpc
[i
].lpc_order
);
425 // predictor coeff. table
426 for (j
= 0; j
< s
->lpc
[i
].lpc_order
; j
++)
427 put_sbits(pb
, 16, s
->lpc
[i
].lpc_coeff
[j
]);
430 // write extra bits if needed
432 for (i
= 0; i
< s
->frame_size
; i
++) {
433 for (j
= 0; j
< channels
; j
++) {
434 put_bits(pb
, s
->extra_bits
, s
->predictor_buf
[j
][i
]);
439 // apply lpc and entropy coding to audio samples
440 for (i
= 0; i
< channels
; i
++) {
441 alac_linear_predictor(s
, i
);
443 // TODO: determine when this will actually help. for now it's not used.
444 if (prediction_type
== 15) {
445 // 2nd pass 1st order filter
446 int32_t *residual
= s
->predictor_buf
[i
];
447 for (j
= s
->frame_size
- 1; j
> 0; j
--)
448 residual
[j
] -= residual
[j
- 1];
450 alac_entropy_coder(s
, i
);
455 static int write_frame(AlacEncodeContext
*s
, AVPacket
*avpkt
,
456 uint8_t * const *samples
)
458 PutBitContext
*pb
= &s
->pbctx
;
459 const enum AlacRawDataBlockType
*ch_elements
= ff_alac_channel_elements
[s
->avctx
->channels
- 1];
460 const uint8_t *ch_map
= ff_alac_channel_layout_offsets
[s
->avctx
->channels
- 1];
461 int ch
, element
, sce
, cpe
;
463 init_put_bits(pb
, avpkt
->data
, avpkt
->size
);
465 ch
= element
= sce
= cpe
= 0;
466 while (ch
< s
->avctx
->channels
) {
467 if (ch_elements
[element
] == TYPE_CPE
) {
468 write_element(s
, TYPE_CPE
, cpe
, samples
[ch_map
[ch
]],
469 samples
[ch_map
[ch
+ 1]]);
473 write_element(s
, TYPE_SCE
, sce
, samples
[ch_map
[ch
]], NULL
);
480 put_bits(pb
, 3, TYPE_END
);
483 return put_bits_count(pb
) >> 3;
486 static av_always_inline
int get_max_frame_size(int frame_size
, int ch
, int bps
)
488 int header_bits
= 23 + 32 * (frame_size
< DEFAULT_FRAME_SIZE
);
489 return FFALIGN(header_bits
+ bps
* ch
* frame_size
+ 3, 8) / 8;
492 static av_cold
int alac_encode_close(AVCodecContext
*avctx
)
494 AlacEncodeContext
*s
= avctx
->priv_data
;
495 ff_lpc_end(&s
->lpc_ctx
);
496 av_freep(&avctx
->extradata
);
497 avctx
->extradata_size
= 0;
501 static av_cold
int alac_encode_init(AVCodecContext
*avctx
)
503 AlacEncodeContext
*s
= avctx
->priv_data
;
505 uint8_t *alac_extradata
;
507 avctx
->frame_size
= s
->frame_size
= DEFAULT_FRAME_SIZE
;
509 if (avctx
->sample_fmt
== AV_SAMPLE_FMT_S32P
) {
510 if (avctx
->bits_per_raw_sample
!= 24)
511 av_log(avctx
, AV_LOG_WARNING
, "encoding as 24 bits-per-sample\n");
512 avctx
->bits_per_raw_sample
= 24;
514 avctx
->bits_per_raw_sample
= 16;
518 // Set default compression level
519 if (avctx
->compression_level
== FF_COMPRESSION_DEFAULT
)
520 s
->compression_level
= 2;
522 s
->compression_level
= av_clip(avctx
->compression_level
, 0, 2);
524 // Initialize default Rice parameters
525 s
->rc
.history_mult
= 40;
526 s
->rc
.initial_history
= 10;
527 s
->rc
.k_modifier
= 14;
528 s
->rc
.rice_modifier
= 4;
530 s
->max_coded_frame_size
= get_max_frame_size(avctx
->frame_size
,
532 avctx
->bits_per_raw_sample
);
534 avctx
->extradata
= av_mallocz(ALAC_EXTRADATA_SIZE
+ FF_INPUT_BUFFER_PADDING_SIZE
);
535 if (!avctx
->extradata
) {
536 ret
= AVERROR(ENOMEM
);
539 avctx
->extradata_size
= ALAC_EXTRADATA_SIZE
;
541 alac_extradata
= avctx
->extradata
;
542 AV_WB32(alac_extradata
, ALAC_EXTRADATA_SIZE
);
543 AV_WB32(alac_extradata
+4, MKBETAG('a','l','a','c'));
544 AV_WB32(alac_extradata
+12, avctx
->frame_size
);
545 AV_WB8 (alac_extradata
+17, avctx
->bits_per_raw_sample
);
546 AV_WB8 (alac_extradata
+21, avctx
->channels
);
547 AV_WB32(alac_extradata
+24, s
->max_coded_frame_size
);
548 AV_WB32(alac_extradata
+28,
549 avctx
->sample_rate
* avctx
->channels
* avctx
->bits_per_raw_sample
); // average bitrate
550 AV_WB32(alac_extradata
+32, avctx
->sample_rate
);
552 // Set relevant extradata fields
553 if (s
->compression_level
> 0) {
554 AV_WB8(alac_extradata
+18, s
->rc
.history_mult
);
555 AV_WB8(alac_extradata
+19, s
->rc
.initial_history
);
556 AV_WB8(alac_extradata
+20, s
->rc
.k_modifier
);
559 s
->min_prediction_order
= DEFAULT_MIN_PRED_ORDER
;
560 if (avctx
->min_prediction_order
>= 0) {
561 if (avctx
->min_prediction_order
< MIN_LPC_ORDER
||
562 avctx
->min_prediction_order
> ALAC_MAX_LPC_ORDER
) {
563 av_log(avctx
, AV_LOG_ERROR
, "invalid min prediction order: %d\n",
564 avctx
->min_prediction_order
);
565 ret
= AVERROR(EINVAL
);
569 s
->min_prediction_order
= avctx
->min_prediction_order
;
572 s
->max_prediction_order
= DEFAULT_MAX_PRED_ORDER
;
573 if (avctx
->max_prediction_order
>= 0) {
574 if (avctx
->max_prediction_order
< MIN_LPC_ORDER
||
575 avctx
->max_prediction_order
> ALAC_MAX_LPC_ORDER
) {
576 av_log(avctx
, AV_LOG_ERROR
, "invalid max prediction order: %d\n",
577 avctx
->max_prediction_order
);
578 ret
= AVERROR(EINVAL
);
582 s
->max_prediction_order
= avctx
->max_prediction_order
;
585 if (s
->max_prediction_order
< s
->min_prediction_order
) {
586 av_log(avctx
, AV_LOG_ERROR
,
587 "invalid prediction orders: min=%d max=%d\n",
588 s
->min_prediction_order
, s
->max_prediction_order
);
589 ret
= AVERROR(EINVAL
);
595 if ((ret
= ff_lpc_init(&s
->lpc_ctx
, avctx
->frame_size
,
596 s
->max_prediction_order
,
597 FF_LPC_TYPE_LEVINSON
)) < 0) {
603 alac_encode_close(avctx
);
607 static int alac_encode_frame(AVCodecContext
*avctx
, AVPacket
*avpkt
,
608 const AVFrame
*frame
, int *got_packet_ptr
)
610 AlacEncodeContext
*s
= avctx
->priv_data
;
611 int out_bytes
, max_frame_size
, ret
;
613 s
->frame_size
= frame
->nb_samples
;
615 if (frame
->nb_samples
< DEFAULT_FRAME_SIZE
)
616 max_frame_size
= get_max_frame_size(s
->frame_size
, avctx
->channels
,
617 avctx
->bits_per_raw_sample
);
619 max_frame_size
= s
->max_coded_frame_size
;
621 if ((ret
= ff_alloc_packet2(avctx
, avpkt
, 2 * max_frame_size
)) < 0)
624 /* use verbatim mode for compression_level 0 */
625 if (s
->compression_level
) {
627 s
->extra_bits
= avctx
->bits_per_raw_sample
- 16;
633 out_bytes
= write_frame(s
, avpkt
, frame
->extended_data
);
635 if (out_bytes
> max_frame_size
) {
636 /* frame too large. use verbatim mode */
639 out_bytes
= write_frame(s
, avpkt
, frame
->extended_data
);
642 avpkt
->size
= out_bytes
;
647 AVCodec ff_alac_encoder
= {
649 .long_name
= NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
650 .type
= AVMEDIA_TYPE_AUDIO
,
651 .id
= AV_CODEC_ID_ALAC
,
652 .priv_data_size
= sizeof(AlacEncodeContext
),
653 .init
= alac_encode_init
,
654 .encode2
= alac_encode_frame
,
655 .close
= alac_encode_close
,
656 .capabilities
= CODEC_CAP_SMALL_LAST_FRAME
,
657 .channel_layouts
= ff_alac_channel_layouts
,
658 .sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_S32P
,
660 AV_SAMPLE_FMT_NONE
},