2 * Real Audio 1.0 (14.4K) encoder
3 * Copyright (c) 2010 Francesco Lavra <francescolavra@interfree.it>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Real Audio 1.0 (14.4K) encoder
25 * @author Francesco Lavra <francescolavra@interfree.it>
31 #include "audio_frame_queue.h"
32 #include "celp_filters.h"
38 static av_cold
int ra144_encode_close(AVCodecContext
*avctx
)
40 RA144Context
*ractx
= avctx
->priv_data
;
41 ff_lpc_end(&ractx
->lpc_ctx
);
42 ff_af_queue_close(&ractx
->afq
);
47 static av_cold
int ra144_encode_init(AVCodecContext
* avctx
)
52 if (avctx
->channels
!= 1) {
53 av_log(avctx
, AV_LOG_ERROR
, "invalid number of channels: %d\n",
57 avctx
->frame_size
= NBLOCKS
* BLOCKSIZE
;
58 avctx
->initial_padding
= avctx
->frame_size
;
59 avctx
->bit_rate
= 8000;
60 ractx
= avctx
->priv_data
;
61 ractx
->lpc_coef
[0] = ractx
->lpc_tables
[0];
62 ractx
->lpc_coef
[1] = ractx
->lpc_tables
[1];
64 ff_audiodsp_init(&ractx
->adsp
);
65 ret
= ff_lpc_init(&ractx
->lpc_ctx
, avctx
->frame_size
, LPC_ORDER
,
66 FF_LPC_TYPE_LEVINSON
);
70 ff_af_queue_init(avctx
, &ractx
->afq
);
74 ra144_encode_close(avctx
);
80 * Quantize a value by searching a sorted table for the element with the
83 * @param value value to quantize
84 * @param table array containing the quantization table
85 * @param size size of the quantization table
86 * @return index of the quantization table corresponding to the element with the
89 static int quantize(int value
, const int16_t *table
, unsigned int size
)
91 unsigned int low
= 0, high
= size
- 1;
94 int index
= (low
+ high
) >> 1;
95 int error
= table
[index
] - value
;
98 return table
[high
] + error
> value
? low
: high
;
109 * Orthogonalize a vector to another vector
111 * @param v vector to orthogonalize
112 * @param u vector against which orthogonalization is performed
114 static void orthogonalize(float *v
, const float *u
)
117 float num
= 0, den
= 0;
119 for (i
= 0; i
< BLOCKSIZE
; i
++) {
124 for (i
= 0; i
< BLOCKSIZE
; i
++)
130 * Calculate match score and gain of an LPC-filtered vector with respect to
131 * input data, possibly othogonalizing it to up to 2 other vectors
133 * @param work array used to calculate the filtered vector
134 * @param coefs coefficients of the LPC filter
135 * @param vect original vector
136 * @param ortho1 first vector against which orthogonalization is performed
137 * @param ortho2 second vector against which orthogonalization is performed
138 * @param data input data
139 * @param score pointer to variable where match score is returned
140 * @param gain pointer to variable where gain is returned
142 static void get_match_score(float *work
, const float *coefs
, float *vect
,
143 const float *ortho1
, const float *ortho2
,
144 const float *data
, float *score
, float *gain
)
149 ff_celp_lp_synthesis_filterf(work
, coefs
, vect
, BLOCKSIZE
, LPC_ORDER
);
151 orthogonalize(work
, ortho1
);
153 orthogonalize(work
, ortho2
);
155 for (i
= 0; i
< BLOCKSIZE
; i
++) {
156 g
+= work
[i
] * work
[i
];
157 c
+= data
[i
] * work
[i
];
169 * Create a vector from the adaptive codebook at a given lag value
171 * @param vect array where vector is stored
172 * @param cb adaptive codebook
173 * @param lag lag value
175 static void create_adapt_vect(float *vect
, const int16_t *cb
, int lag
)
179 cb
+= BUFFERSIZE
- lag
;
180 for (i
= 0; i
< FFMIN(BLOCKSIZE
, lag
); i
++)
183 for (i
= 0; i
< BLOCKSIZE
- lag
; i
++)
184 vect
[lag
+ i
] = cb
[i
];
189 * Search the adaptive codebook for the best entry and gain and remove its
190 * contribution from input data
192 * @param adapt_cb array from which the adaptive codebook is extracted
193 * @param work array used to calculate LPC-filtered vectors
194 * @param coefs coefficients of the LPC filter
195 * @param data input data
196 * @return index of the best entry of the adaptive codebook
198 static int adaptive_cb_search(const int16_t *adapt_cb
, float *work
,
199 const float *coefs
, float *data
)
201 int i
, av_uninit(best_vect
);
202 float score
, gain
, best_score
, av_uninit(best_gain
);
203 float exc
[BLOCKSIZE
];
205 gain
= best_score
= 0;
206 for (i
= BLOCKSIZE
/ 2; i
<= BUFFERSIZE
; i
++) {
207 create_adapt_vect(exc
, adapt_cb
, i
);
208 get_match_score(work
, coefs
, exc
, NULL
, NULL
, data
, &score
, &gain
);
209 if (score
> best_score
) {
219 * Re-calculate the filtered vector from the vector with maximum match score
220 * and remove its contribution from input data.
222 create_adapt_vect(exc
, adapt_cb
, best_vect
);
223 ff_celp_lp_synthesis_filterf(work
, coefs
, exc
, BLOCKSIZE
, LPC_ORDER
);
224 for (i
= 0; i
< BLOCKSIZE
; i
++)
225 data
[i
] -= best_gain
* work
[i
];
226 return best_vect
- BLOCKSIZE
/ 2 + 1;
231 * Find the best vector of a fixed codebook by applying an LPC filter to
232 * codebook entries, possibly othogonalizing them to up to 2 other vectors and
233 * matching the results with input data
235 * @param work array used to calculate the filtered vectors
236 * @param coefs coefficients of the LPC filter
237 * @param cb fixed codebook
238 * @param ortho1 first vector against which orthogonalization is performed
239 * @param ortho2 second vector against which orthogonalization is performed
240 * @param data input data
241 * @param idx pointer to variable where the index of the best codebook entry is
243 * @param gain pointer to variable where the gain of the best codebook entry is
246 static void find_best_vect(float *work
, const float *coefs
,
247 const int8_t cb
[][BLOCKSIZE
], const float *ortho1
,
248 const float *ortho2
, float *data
, int *idx
,
252 float g
, score
, best_score
;
253 float vect
[BLOCKSIZE
];
255 *idx
= *gain
= best_score
= 0;
256 for (i
= 0; i
< FIXED_CB_SIZE
; i
++) {
257 for (j
= 0; j
< BLOCKSIZE
; j
++)
259 get_match_score(work
, coefs
, vect
, ortho1
, ortho2
, data
, &score
, &g
);
260 if (score
> best_score
) {
270 * Search the two fixed codebooks for the best entry and gain
272 * @param work array used to calculate LPC-filtered vectors
273 * @param coefs coefficients of the LPC filter
274 * @param data input data
275 * @param cba_idx index of the best entry of the adaptive codebook
276 * @param cb1_idx pointer to variable where the index of the best entry of the
277 * first fixed codebook is returned
278 * @param cb2_idx pointer to variable where the index of the best entry of the
279 * second fixed codebook is returned
281 static void fixed_cb_search(float *work
, const float *coefs
, float *data
,
282 int cba_idx
, int *cb1_idx
, int *cb2_idx
)
286 float cba_vect
[BLOCKSIZE
], cb1_vect
[BLOCKSIZE
];
287 float vect
[BLOCKSIZE
];
290 * The filtered vector from the adaptive codebook can be retrieved from
291 * work, because this function is called just after adaptive_cb_search().
294 memcpy(cba_vect
, work
, sizeof(cba_vect
));
296 find_best_vect(work
, coefs
, ff_cb1_vects
, cba_idx
? cba_vect
: NULL
, NULL
,
297 data
, cb1_idx
, &gain
);
300 * Re-calculate the filtered vector from the vector with maximum match score
301 * and remove its contribution from input data.
304 for (i
= 0; i
< BLOCKSIZE
; i
++)
305 vect
[i
] = ff_cb1_vects
[*cb1_idx
][i
];
306 ff_celp_lp_synthesis_filterf(work
, coefs
, vect
, BLOCKSIZE
, LPC_ORDER
);
308 orthogonalize(work
, cba_vect
);
309 for (i
= 0; i
< BLOCKSIZE
; i
++)
310 data
[i
] -= gain
* work
[i
];
311 memcpy(cb1_vect
, work
, sizeof(cb1_vect
));
316 find_best_vect(work
, coefs
, ff_cb2_vects
, cba_idx
? cba_vect
: NULL
,
317 ortho_cb1
? cb1_vect
: NULL
, data
, cb2_idx
, &gain
);
322 * Encode a subblock of the current frame
324 * @param ractx encoder context
325 * @param sblock_data input data of the subblock
326 * @param lpc_coefs coefficients of the LPC filter
327 * @param rms RMS of the reflection coefficients
328 * @param pb pointer to PutBitContext of the current frame
330 static void ra144_encode_subblock(RA144Context
*ractx
,
331 const int16_t *sblock_data
,
332 const int16_t *lpc_coefs
, unsigned int rms
,
335 float data
[BLOCKSIZE
] = { 0 }, work
[LPC_ORDER
+ BLOCKSIZE
];
336 float coefs
[LPC_ORDER
];
337 float zero
[BLOCKSIZE
], cba
[BLOCKSIZE
], cb1
[BLOCKSIZE
], cb2
[BLOCKSIZE
];
338 int cba_idx
, cb1_idx
, cb2_idx
, gain
;
342 float error
, best_error
;
344 for (i
= 0; i
< LPC_ORDER
; i
++) {
345 work
[i
] = ractx
->curr_sblock
[BLOCKSIZE
+ i
];
346 coefs
[i
] = lpc_coefs
[i
] * (1/4096.0);
350 * Calculate the zero-input response of the LPC filter and subtract it from
353 ff_celp_lp_synthesis_filterf(work
+ LPC_ORDER
, coefs
, data
, BLOCKSIZE
,
355 for (i
= 0; i
< BLOCKSIZE
; i
++) {
356 zero
[i
] = work
[LPC_ORDER
+ i
];
357 data
[i
] = sblock_data
[i
] - zero
[i
];
361 * Codebook search is performed without taking into account the contribution
362 * of the previous subblock, since it has been just subtracted from input
365 memset(work
, 0, LPC_ORDER
* sizeof(*work
));
367 cba_idx
= adaptive_cb_search(ractx
->adapt_cb
, work
+ LPC_ORDER
, coefs
,
371 * The filtered vector from the adaptive codebook can be retrieved from
372 * work, see implementation of adaptive_cb_search().
374 memcpy(cba
, work
+ LPC_ORDER
, sizeof(cba
));
376 ff_copy_and_dup(ractx
->buffer_a
, ractx
->adapt_cb
, cba_idx
+ BLOCKSIZE
/ 2 - 1);
377 m
[0] = (ff_irms(&ractx
->adsp
, ractx
->buffer_a
) * rms
) >> 12;
379 fixed_cb_search(work
+ LPC_ORDER
, coefs
, data
, cba_idx
, &cb1_idx
, &cb2_idx
);
380 for (i
= 0; i
< BLOCKSIZE
; i
++) {
381 cb1
[i
] = ff_cb1_vects
[cb1_idx
][i
];
382 cb2
[i
] = ff_cb2_vects
[cb2_idx
][i
];
384 ff_celp_lp_synthesis_filterf(work
+ LPC_ORDER
, coefs
, cb1
, BLOCKSIZE
,
386 memcpy(cb1
, work
+ LPC_ORDER
, sizeof(cb1
));
387 m
[1] = (ff_cb1_base
[cb1_idx
] * rms
) >> 8;
388 ff_celp_lp_synthesis_filterf(work
+ LPC_ORDER
, coefs
, cb2
, BLOCKSIZE
,
390 memcpy(cb2
, work
+ LPC_ORDER
, sizeof(cb2
));
391 m
[2] = (ff_cb2_base
[cb2_idx
] * rms
) >> 8;
392 best_error
= FLT_MAX
;
394 for (n
= 0; n
< 256; n
++) {
395 g
[1] = ((ff_gain_val_tab
[n
][1] * m
[1]) >> ff_gain_exp_tab
[n
]) *
397 g
[2] = ((ff_gain_val_tab
[n
][2] * m
[2]) >> ff_gain_exp_tab
[n
]) *
401 g
[0] = ((ff_gain_val_tab
[n
][0] * m
[0]) >> ff_gain_exp_tab
[n
]) *
403 for (i
= 0; i
< BLOCKSIZE
; i
++) {
404 data
[i
] = zero
[i
] + g
[0] * cba
[i
] + g
[1] * cb1
[i
] +
406 error
+= (data
[i
] - sblock_data
[i
]) *
407 (data
[i
] - sblock_data
[i
]);
410 for (i
= 0; i
< BLOCKSIZE
; i
++) {
411 data
[i
] = zero
[i
] + g
[1] * cb1
[i
] + g
[2] * cb2
[i
];
412 error
+= (data
[i
] - sblock_data
[i
]) *
413 (data
[i
] - sblock_data
[i
]);
416 if (error
< best_error
) {
421 put_bits(pb
, 7, cba_idx
);
422 put_bits(pb
, 8, gain
);
423 put_bits(pb
, 7, cb1_idx
);
424 put_bits(pb
, 7, cb2_idx
);
425 ff_subblock_synthesis(ractx
, lpc_coefs
, cba_idx
, cb1_idx
, cb2_idx
, rms
,
430 static int ra144_encode_frame(AVCodecContext
*avctx
, AVPacket
*avpkt
,
431 const AVFrame
*frame
, int *got_packet_ptr
)
433 static const uint8_t sizes
[LPC_ORDER
] = {64, 32, 32, 16, 16, 8, 8, 8, 8, 4};
434 static const uint8_t bit_sizes
[LPC_ORDER
] = {6, 5, 5, 4, 4, 3, 3, 3, 3, 2};
435 RA144Context
*ractx
= avctx
->priv_data
;
437 int32_t lpc_data
[NBLOCKS
* BLOCKSIZE
];
438 int32_t lpc_coefs
[LPC_ORDER
][MAX_LPC_ORDER
];
439 int shift
[LPC_ORDER
];
440 int16_t block_coefs
[NBLOCKS
][LPC_ORDER
];
441 int lpc_refl
[LPC_ORDER
]; /**< reflection coefficients of the frame */
442 unsigned int refl_rms
[NBLOCKS
]; /**< RMS of the reflection coefficients */
443 const int16_t *samples
= frame
? (const int16_t *)frame
->data
[0] : NULL
;
447 if (ractx
->last_frame
)
450 if ((ret
= ff_alloc_packet2(avctx
, avpkt
, FRAME_SIZE
)) < 0)
454 * Since the LPC coefficients are calculated on a frame centered over the
455 * fourth subframe, to encode a given frame, data from the next frame is
456 * needed. In each call to this function, the previous frame (whose data are
457 * saved in the encoder context) is encoded, and data from the current frame
458 * are saved in the encoder context to be used in the next function call.
460 for (i
= 0; i
< (2 * BLOCKSIZE
+ BLOCKSIZE
/ 2); i
++) {
461 lpc_data
[i
] = ractx
->curr_block
[BLOCKSIZE
+ BLOCKSIZE
/ 2 + i
];
462 energy
+= (lpc_data
[i
] * lpc_data
[i
]) >> 4;
466 for (j
= 0; j
< frame
->nb_samples
&& i
< NBLOCKS
* BLOCKSIZE
; i
++, j
++) {
467 lpc_data
[i
] = samples
[j
] >> 2;
468 energy
+= (lpc_data
[i
] * lpc_data
[i
]) >> 4;
471 if (i
< NBLOCKS
* BLOCKSIZE
)
472 memset(&lpc_data
[i
], 0, (NBLOCKS
* BLOCKSIZE
- i
) * sizeof(*lpc_data
));
473 energy
= ff_energy_tab
[quantize(ff_t_sqrt(energy
>> 5) >> 10, ff_energy_tab
,
476 ff_lpc_calc_coefs(&ractx
->lpc_ctx
, lpc_data
, NBLOCKS
* BLOCKSIZE
, LPC_ORDER
,
477 LPC_ORDER
, 16, lpc_coefs
, shift
, FF_LPC_TYPE_LEVINSON
,
478 0, ORDER_METHOD_EST
, 12, 0);
479 for (i
= 0; i
< LPC_ORDER
; i
++)
480 block_coefs
[NBLOCKS
- 1][i
] = -(lpc_coefs
[LPC_ORDER
- 1][i
] <<
481 (12 - shift
[LPC_ORDER
- 1]));
484 * TODO: apply perceptual weighting of the input speech through bandwidth
485 * expansion of the LPC filter.
488 if (ff_eval_refl(lpc_refl
, block_coefs
[NBLOCKS
- 1], avctx
)) {
490 * The filter is unstable: use the coefficients of the previous frame.
492 ff_int_to_int16(block_coefs
[NBLOCKS
- 1], ractx
->lpc_coef
[1]);
493 if (ff_eval_refl(lpc_refl
, block_coefs
[NBLOCKS
- 1], avctx
)) {
494 /* the filter is still unstable. set reflection coeffs to zero. */
495 memset(lpc_refl
, 0, sizeof(lpc_refl
));
498 init_put_bits(&pb
, avpkt
->data
, avpkt
->size
);
499 for (i
= 0; i
< LPC_ORDER
; i
++) {
500 idx
= quantize(lpc_refl
[i
], ff_lpc_refl_cb
[i
], sizes
[i
]);
501 put_bits(&pb
, bit_sizes
[i
], idx
);
502 lpc_refl
[i
] = ff_lpc_refl_cb
[i
][idx
];
504 ractx
->lpc_refl_rms
[0] = ff_rms(lpc_refl
);
505 ff_eval_coefs(ractx
->lpc_coef
[0], lpc_refl
);
506 refl_rms
[0] = ff_interp(ractx
, block_coefs
[0], 1, 1, ractx
->old_energy
);
507 refl_rms
[1] = ff_interp(ractx
, block_coefs
[1], 2,
508 energy
<= ractx
->old_energy
,
509 ff_t_sqrt(energy
* ractx
->old_energy
) >> 12);
510 refl_rms
[2] = ff_interp(ractx
, block_coefs
[2], 3, 0, energy
);
511 refl_rms
[3] = ff_rescale_rms(ractx
->lpc_refl_rms
[0], energy
);
512 ff_int_to_int16(block_coefs
[NBLOCKS
- 1], ractx
->lpc_coef
[0]);
513 put_bits(&pb
, 5, quantize(energy
, ff_energy_tab
, 32));
514 for (i
= 0; i
< NBLOCKS
; i
++)
515 ra144_encode_subblock(ractx
, ractx
->curr_block
+ i
* BLOCKSIZE
,
516 block_coefs
[i
], refl_rms
[i
], &pb
);
518 ractx
->old_energy
= energy
;
519 ractx
->lpc_refl_rms
[1] = ractx
->lpc_refl_rms
[0];
520 FFSWAP(unsigned int *, ractx
->lpc_coef
[0], ractx
->lpc_coef
[1]);
522 /* copy input samples to current block for processing in next call */
525 for (; i
< frame
->nb_samples
; i
++)
526 ractx
->curr_block
[i
] = samples
[i
] >> 2;
528 if ((ret
= ff_af_queue_add(&ractx
->afq
, frame
)) < 0)
531 ractx
->last_frame
= 1;
532 memset(&ractx
->curr_block
[i
], 0,
533 (NBLOCKS
* BLOCKSIZE
- i
) * sizeof(*ractx
->curr_block
));
535 /* Get the next frame pts/duration */
536 ff_af_queue_remove(&ractx
->afq
, avctx
->frame_size
, &avpkt
->pts
,
539 avpkt
->size
= FRAME_SIZE
;
545 AVCodec ff_ra_144_encoder
= {
547 .long_name
= NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"),
548 .type
= AVMEDIA_TYPE_AUDIO
,
549 .id
= AV_CODEC_ID_RA_144
,
550 .priv_data_size
= sizeof(RA144Context
),
551 .init
= ra144_encode_init
,
552 .encode2
= ra144_encode_frame
,
553 .close
= ra144_encode_close
,
554 .capabilities
= CODEC_CAP_DELAY
| CODEC_CAP_SMALL_LAST_FRAME
,
555 .sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_S16
,
556 AV_SAMPLE_FMT_NONE
},
557 .supported_samplerates
= (const int[]){ 8000, 0 },
558 .channel_layouts
= (const uint64_t[]) { AV_CH_LAYOUT_MONO
, 0 },