2 * G.723.1 compatible decoder
3 * Copyright (c) 2006 Benjamin Larsson
4 * Copyright (c) 2010 Mohamed Naufal Basheer
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * G.723.1 compatible decoder
28 #define BITSTREAM_READER_LE
29 #include "libavutil/channel_layout.h"
30 #include "libavutil/mem.h"
31 #include "libavutil/opt.h"
34 #include "acelp_vectors.h"
35 #include "celp_filters.h"
36 #include "celp_math.h"
37 #include "g723_1_data.h"
40 #define CNG_RANDOM_SEED 12345
42 typedef struct g723_1_context
{
45 G723_1_Subframe subframe
[4];
46 enum FrameType cur_frame_type
;
47 enum FrameType past_frame_type
;
49 uint8_t lsp_index
[LSP_BANDS
];
53 int16_t prev_lsp
[LPC_ORDER
];
54 int16_t sid_lsp
[LPC_ORDER
];
55 int16_t prev_excitation
[PITCH_MAX
];
56 int16_t excitation
[PITCH_MAX
+ FRAME_LEN
+ 4];
57 int16_t synth_mem
[LPC_ORDER
];
58 int16_t fir_mem
[LPC_ORDER
];
59 int iir_mem
[LPC_ORDER
];
68 int pf_gain
; ///< formant postfilter
69 ///< gain scaling unit memory
72 int16_t audio
[FRAME_LEN
+ LPC_ORDER
+ PITCH_MAX
+ 4];
73 int16_t prev_data
[HALF_FRAME_LEN
];
74 int16_t prev_weight_sig
[PITCH_MAX
];
77 int16_t hpf_fir_mem
; ///< highpass filter fir
78 int hpf_iir_mem
; ///< and iir memories
79 int16_t perf_fir_mem
[LPC_ORDER
]; ///< perceptual filter fir
80 int16_t perf_iir_mem
[LPC_ORDER
]; ///< and iir memories
82 int16_t harmonic_mem
[PITCH_MAX
];
85 static av_cold
int g723_1_decode_init(AVCodecContext
*avctx
)
87 G723_1_Context
*p
= avctx
->priv_data
;
89 avctx
->channel_layout
= AV_CH_LAYOUT_MONO
;
90 avctx
->sample_fmt
= AV_SAMPLE_FMT_S16
;
94 memcpy(p
->prev_lsp
, dc_lsp
, LPC_ORDER
* sizeof(*p
->prev_lsp
));
95 memcpy(p
->sid_lsp
, dc_lsp
, LPC_ORDER
* sizeof(*p
->sid_lsp
));
97 p
->cng_random_seed
= CNG_RANDOM_SEED
;
98 p
->past_frame_type
= SID_FRAME
;
104 * Unpack the frame into parameters.
106 * @param p the context
107 * @param buf pointer to the input buffer
108 * @param buf_size size of the input buffer
110 static int unpack_bitstream(G723_1_Context
*p
, const uint8_t *buf
,
115 int temp
, info_bits
, i
;
117 init_get_bits(&gb
, buf
, buf_size
* 8);
119 /* Extract frame type and rate info */
120 info_bits
= get_bits(&gb
, 2);
122 if (info_bits
== 3) {
123 p
->cur_frame_type
= UNTRANSMITTED_FRAME
;
127 /* Extract 24 bit lsp indices, 8 bit for each band */
128 p
->lsp_index
[2] = get_bits(&gb
, 8);
129 p
->lsp_index
[1] = get_bits(&gb
, 8);
130 p
->lsp_index
[0] = get_bits(&gb
, 8);
132 if (info_bits
== 2) {
133 p
->cur_frame_type
= SID_FRAME
;
134 p
->subframe
[0].amp_index
= get_bits(&gb
, 6);
138 /* Extract the info common to both rates */
139 p
->cur_rate
= info_bits
? RATE_5300
: RATE_6300
;
140 p
->cur_frame_type
= ACTIVE_FRAME
;
142 p
->pitch_lag
[0] = get_bits(&gb
, 7);
143 if (p
->pitch_lag
[0] > 123) /* test if forbidden code */
145 p
->pitch_lag
[0] += PITCH_MIN
;
146 p
->subframe
[1].ad_cb_lag
= get_bits(&gb
, 2);
148 p
->pitch_lag
[1] = get_bits(&gb
, 7);
149 if (p
->pitch_lag
[1] > 123)
151 p
->pitch_lag
[1] += PITCH_MIN
;
152 p
->subframe
[3].ad_cb_lag
= get_bits(&gb
, 2);
153 p
->subframe
[0].ad_cb_lag
= 1;
154 p
->subframe
[2].ad_cb_lag
= 1;
156 for (i
= 0; i
< SUBFRAMES
; i
++) {
157 /* Extract combined gain */
158 temp
= get_bits(&gb
, 12);
160 p
->subframe
[i
].dirac_train
= 0;
161 if (p
->cur_rate
== RATE_6300
&& p
->pitch_lag
[i
>> 1] < SUBFRAME_LEN
- 2) {
162 p
->subframe
[i
].dirac_train
= temp
>> 11;
166 p
->subframe
[i
].ad_cb_gain
= FASTDIV(temp
, GAIN_LEVELS
);
167 if (p
->subframe
[i
].ad_cb_gain
< ad_cb_len
) {
168 p
->subframe
[i
].amp_index
= temp
- p
->subframe
[i
].ad_cb_gain
*
175 p
->subframe
[0].grid_index
= get_bits1(&gb
);
176 p
->subframe
[1].grid_index
= get_bits1(&gb
);
177 p
->subframe
[2].grid_index
= get_bits1(&gb
);
178 p
->subframe
[3].grid_index
= get_bits1(&gb
);
180 if (p
->cur_rate
== RATE_6300
) {
181 skip_bits1(&gb
); /* skip reserved bit */
183 /* Compute pulse_pos index using the 13-bit combined position index */
184 temp
= get_bits(&gb
, 13);
185 p
->subframe
[0].pulse_pos
= temp
/ 810;
187 temp
-= p
->subframe
[0].pulse_pos
* 810;
188 p
->subframe
[1].pulse_pos
= FASTDIV(temp
, 90);
190 temp
-= p
->subframe
[1].pulse_pos
* 90;
191 p
->subframe
[2].pulse_pos
= FASTDIV(temp
, 9);
192 p
->subframe
[3].pulse_pos
= temp
- p
->subframe
[2].pulse_pos
* 9;
194 p
->subframe
[0].pulse_pos
= (p
->subframe
[0].pulse_pos
<< 16) +
196 p
->subframe
[1].pulse_pos
= (p
->subframe
[1].pulse_pos
<< 14) +
198 p
->subframe
[2].pulse_pos
= (p
->subframe
[2].pulse_pos
<< 16) +
200 p
->subframe
[3].pulse_pos
= (p
->subframe
[3].pulse_pos
<< 14) +
203 p
->subframe
[0].pulse_sign
= get_bits(&gb
, 6);
204 p
->subframe
[1].pulse_sign
= get_bits(&gb
, 5);
205 p
->subframe
[2].pulse_sign
= get_bits(&gb
, 6);
206 p
->subframe
[3].pulse_sign
= get_bits(&gb
, 5);
207 } else { /* 5300 bps */
208 p
->subframe
[0].pulse_pos
= get_bits(&gb
, 12);
209 p
->subframe
[1].pulse_pos
= get_bits(&gb
, 12);
210 p
->subframe
[2].pulse_pos
= get_bits(&gb
, 12);
211 p
->subframe
[3].pulse_pos
= get_bits(&gb
, 12);
213 p
->subframe
[0].pulse_sign
= get_bits(&gb
, 4);
214 p
->subframe
[1].pulse_sign
= get_bits(&gb
, 4);
215 p
->subframe
[2].pulse_sign
= get_bits(&gb
, 4);
216 p
->subframe
[3].pulse_sign
= get_bits(&gb
, 4);
223 * Bitexact implementation of sqrt(val/2).
225 static int16_t square_root(unsigned val
)
227 av_assert2(!(val
& 0x80000000));
229 return (ff_sqrt(val
<< 1) >> 1) & (~1);
233 * Calculate the number of left-shifts required for normalizing the input.
235 * @param num input number
236 * @param width width of the input, 15 or 31 bits
238 static int normalize_bits(int num
, int width
)
240 return width
- av_log2(num
) - 1;
243 #define normalize_bits_int16(num) normalize_bits(num, 15)
244 #define normalize_bits_int32(num) normalize_bits(num, 31)
247 * Scale vector contents based on the largest of their absolutes.
249 static int scale_vector(int16_t *dst
, const int16_t *vector
, int length
)
254 for (i
= 0; i
< length
; i
++)
255 max
|= FFABS(vector
[i
]);
257 bits
= 14 - av_log2_16bit(max
);
258 bits
= FFMAX(bits
, 0);
260 for (i
= 0; i
< length
; i
++)
261 dst
[i
] = vector
[i
] << bits
>> 3;
267 * Perform inverse quantization of LSP frequencies.
269 * @param cur_lsp the current LSP vector
270 * @param prev_lsp the previous LSP vector
271 * @param lsp_index VQ indices
272 * @param bad_frame bad frame flag
274 static void inverse_quant(int16_t *cur_lsp
, int16_t *prev_lsp
,
275 uint8_t *lsp_index
, int bad_frame
)
278 int i
, j
, temp
, stable
;
280 /* Check for frame erasure */
287 lsp_index
[0] = lsp_index
[1] = lsp_index
[2] = 0;
290 /* Get the VQ table entry corresponding to the transmitted index */
291 cur_lsp
[0] = lsp_band0
[lsp_index
[0]][0];
292 cur_lsp
[1] = lsp_band0
[lsp_index
[0]][1];
293 cur_lsp
[2] = lsp_band0
[lsp_index
[0]][2];
294 cur_lsp
[3] = lsp_band1
[lsp_index
[1]][0];
295 cur_lsp
[4] = lsp_band1
[lsp_index
[1]][1];
296 cur_lsp
[5] = lsp_band1
[lsp_index
[1]][2];
297 cur_lsp
[6] = lsp_band2
[lsp_index
[2]][0];
298 cur_lsp
[7] = lsp_band2
[lsp_index
[2]][1];
299 cur_lsp
[8] = lsp_band2
[lsp_index
[2]][2];
300 cur_lsp
[9] = lsp_band2
[lsp_index
[2]][3];
302 /* Add predicted vector & DC component to the previously quantized vector */
303 for (i
= 0; i
< LPC_ORDER
; i
++) {
304 temp
= ((prev_lsp
[i
] - dc_lsp
[i
]) * pred
+ (1 << 14)) >> 15;
305 cur_lsp
[i
] += dc_lsp
[i
] + temp
;
308 for (i
= 0; i
< LPC_ORDER
; i
++) {
309 cur_lsp
[0] = FFMAX(cur_lsp
[0], 0x180);
310 cur_lsp
[LPC_ORDER
- 1] = FFMIN(cur_lsp
[LPC_ORDER
- 1], 0x7e00);
312 /* Stability check */
313 for (j
= 1; j
< LPC_ORDER
; j
++) {
314 temp
= min_dist
+ cur_lsp
[j
- 1] - cur_lsp
[j
];
317 cur_lsp
[j
- 1] -= temp
;
322 for (j
= 1; j
< LPC_ORDER
; j
++) {
323 temp
= cur_lsp
[j
- 1] + min_dist
- cur_lsp
[j
] - 4;
333 memcpy(cur_lsp
, prev_lsp
, LPC_ORDER
* sizeof(*cur_lsp
));
337 * Bitexact implementation of 2ab scaled by 1/2^16.
339 * @param a 32 bit multiplicand
340 * @param b 16 bit multiplier
342 #define MULL2(a, b) \
346 * Convert LSP frequencies to LPC coefficients.
348 * @param lpc buffer for LPC coefficients
350 static void lsp2lpc(int16_t *lpc
)
352 int f1
[LPC_ORDER
/ 2 + 1];
353 int f2
[LPC_ORDER
/ 2 + 1];
356 /* Calculate negative cosine */
357 for (j
= 0; j
< LPC_ORDER
; j
++) {
358 int index
= (lpc
[j
] >> 7) & 0x1FF;
359 int offset
= lpc
[j
] & 0x7f;
360 int temp1
= cos_tab
[index
] << 16;
361 int temp2
= (cos_tab
[index
+ 1] - cos_tab
[index
]) *
362 ((offset
<< 8) + 0x80) << 1;
364 lpc
[j
] = -(av_sat_dadd32(1 << 15, temp1
+ temp2
) >> 16);
368 * Compute sum and difference polynomial coefficients
369 * (bitexact alternative to lsp2poly() in lsp.c)
371 /* Initialize with values in Q28 */
373 f1
[1] = (lpc
[0] << 14) + (lpc
[2] << 14);
374 f1
[2] = lpc
[0] * lpc
[2] + (2 << 28);
377 f2
[1] = (lpc
[1] << 14) + (lpc
[3] << 14);
378 f2
[2] = lpc
[1] * lpc
[3] + (2 << 28);
381 * Calculate and scale the coefficients by 1/2 in
382 * each iteration for a final scaling factor of Q25
384 for (i
= 2; i
< LPC_ORDER
/ 2; i
++) {
385 f1
[i
+ 1] = f1
[i
- 1] + MULL2(f1
[i
], lpc
[2 * i
]);
386 f2
[i
+ 1] = f2
[i
- 1] + MULL2(f2
[i
], lpc
[2 * i
+ 1]);
388 for (j
= i
; j
>= 2; j
--) {
389 f1
[j
] = MULL2(f1
[j
- 1], lpc
[2 * i
]) +
390 (f1
[j
] >> 1) + (f1
[j
- 2] >> 1);
391 f2
[j
] = MULL2(f2
[j
- 1], lpc
[2 * i
+ 1]) +
392 (f2
[j
] >> 1) + (f2
[j
- 2] >> 1);
397 f1
[1] = ((lpc
[2 * i
] << 16 >> i
) + f1
[1]) >> 1;
398 f2
[1] = ((lpc
[2 * i
+ 1] << 16 >> i
) + f2
[1]) >> 1;
401 /* Convert polynomial coefficients to LPC coefficients */
402 for (i
= 0; i
< LPC_ORDER
/ 2; i
++) {
403 int64_t ff1
= f1
[i
+ 1] + f1
[i
];
404 int64_t ff2
= f2
[i
+ 1] - f2
[i
];
406 lpc
[i
] = av_clipl_int32(((ff1
+ ff2
) << 3) + (1 << 15)) >> 16;
407 lpc
[LPC_ORDER
- i
- 1] = av_clipl_int32(((ff1
- ff2
) << 3) +
413 * Quantize LSP frequencies by interpolation and convert them to
414 * the corresponding LPC coefficients.
416 * @param lpc buffer for LPC coefficients
417 * @param cur_lsp the current LSP vector
418 * @param prev_lsp the previous LSP vector
420 static void lsp_interpolate(int16_t *lpc
, int16_t *cur_lsp
, int16_t *prev_lsp
)
423 int16_t *lpc_ptr
= lpc
;
425 /* cur_lsp * 0.25 + prev_lsp * 0.75 */
426 ff_acelp_weighted_vector_sum(lpc
, cur_lsp
, prev_lsp
,
427 4096, 12288, 1 << 13, 14, LPC_ORDER
);
428 ff_acelp_weighted_vector_sum(lpc
+ LPC_ORDER
, cur_lsp
, prev_lsp
,
429 8192, 8192, 1 << 13, 14, LPC_ORDER
);
430 ff_acelp_weighted_vector_sum(lpc
+ 2 * LPC_ORDER
, cur_lsp
, prev_lsp
,
431 12288, 4096, 1 << 13, 14, LPC_ORDER
);
432 memcpy(lpc
+ 3 * LPC_ORDER
, cur_lsp
, LPC_ORDER
* sizeof(*lpc
));
434 for (i
= 0; i
< SUBFRAMES
; i
++) {
436 lpc_ptr
+= LPC_ORDER
;
441 * Generate a train of dirac functions with period as pitch lag.
443 static void gen_dirac_train(int16_t *buf
, int pitch_lag
)
445 int16_t vector
[SUBFRAME_LEN
];
448 memcpy(vector
, buf
, SUBFRAME_LEN
* sizeof(*vector
));
449 for (i
= pitch_lag
; i
< SUBFRAME_LEN
; i
+= pitch_lag
) {
450 for (j
= 0; j
< SUBFRAME_LEN
- i
; j
++)
451 buf
[i
+ j
] += vector
[j
];
456 * Generate fixed codebook excitation vector.
458 * @param vector decoded excitation vector
459 * @param subfrm current subframe
460 * @param cur_rate current bitrate
461 * @param pitch_lag closed loop pitch lag
462 * @param index current subframe index
464 static void gen_fcb_excitation(int16_t *vector
, G723_1_Subframe
*subfrm
,
465 enum Rate cur_rate
, int pitch_lag
, int index
)
469 memset(vector
, 0, SUBFRAME_LEN
* sizeof(*vector
));
471 if (cur_rate
== RATE_6300
) {
472 if (subfrm
->pulse_pos
>= max_pos
[index
])
475 /* Decode amplitudes and positions */
476 j
= PULSE_MAX
- pulses
[index
];
477 temp
= subfrm
->pulse_pos
;
478 for (i
= 0; i
< SUBFRAME_LEN
/ GRID_SIZE
; i
++) {
479 temp
-= combinatorial_table
[j
][i
];
482 temp
+= combinatorial_table
[j
++][i
];
483 if (subfrm
->pulse_sign
& (1 << (PULSE_MAX
- j
))) {
484 vector
[subfrm
->grid_index
+ GRID_SIZE
* i
] =
485 -fixed_cb_gain
[subfrm
->amp_index
];
487 vector
[subfrm
->grid_index
+ GRID_SIZE
* i
] =
488 fixed_cb_gain
[subfrm
->amp_index
];
493 if (subfrm
->dirac_train
== 1)
494 gen_dirac_train(vector
, pitch_lag
);
495 } else { /* 5300 bps */
496 int cb_gain
= fixed_cb_gain
[subfrm
->amp_index
];
497 int cb_shift
= subfrm
->grid_index
;
498 int cb_sign
= subfrm
->pulse_sign
;
499 int cb_pos
= subfrm
->pulse_pos
;
500 int offset
, beta
, lag
;
502 for (i
= 0; i
< 8; i
+= 2) {
503 offset
= ((cb_pos
& 7) << 3) + cb_shift
+ i
;
504 vector
[offset
] = (cb_sign
& 1) ? cb_gain
: -cb_gain
;
509 /* Enhance harmonic components */
510 lag
= pitch_contrib
[subfrm
->ad_cb_gain
<< 1] + pitch_lag
+
511 subfrm
->ad_cb_lag
- 1;
512 beta
= pitch_contrib
[(subfrm
->ad_cb_gain
<< 1) + 1];
514 if (lag
< SUBFRAME_LEN
- 2) {
515 for (i
= lag
; i
< SUBFRAME_LEN
; i
++)
516 vector
[i
] += beta
* vector
[i
- lag
] >> 15;
522 * Get delayed contribution from the previous excitation vector.
524 static void get_residual(int16_t *residual
, int16_t *prev_excitation
, int lag
)
526 int offset
= PITCH_MAX
- PITCH_ORDER
/ 2 - lag
;
529 residual
[0] = prev_excitation
[offset
];
530 residual
[1] = prev_excitation
[offset
+ 1];
533 for (i
= 2; i
< SUBFRAME_LEN
+ PITCH_ORDER
- 1; i
++)
534 residual
[i
] = prev_excitation
[offset
+ (i
- 2) % lag
];
537 static int dot_product(const int16_t *a
, const int16_t *b
, int length
)
539 int sum
= ff_dot_product(a
,b
,length
);
540 return av_sat_add32(sum
, sum
);
544 * Generate adaptive codebook excitation.
546 static void gen_acb_excitation(int16_t *vector
, int16_t *prev_excitation
,
547 int pitch_lag
, G723_1_Subframe
*subfrm
,
550 int16_t residual
[SUBFRAME_LEN
+ PITCH_ORDER
- 1];
551 const int16_t *cb_ptr
;
552 int lag
= pitch_lag
+ subfrm
->ad_cb_lag
- 1;
557 get_residual(residual
, prev_excitation
, lag
);
559 /* Select quantization table */
560 if (cur_rate
== RATE_6300
&& pitch_lag
< SUBFRAME_LEN
- 2) {
561 cb_ptr
= adaptive_cb_gain85
;
563 cb_ptr
= adaptive_cb_gain170
;
565 /* Calculate adaptive vector */
566 cb_ptr
+= subfrm
->ad_cb_gain
* 20;
567 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
568 sum
= ff_dot_product(residual
+ i
, cb_ptr
, PITCH_ORDER
);
569 vector
[i
] = av_sat_dadd32(1 << 15, av_sat_add32(sum
, sum
)) >> 16;
574 * Estimate maximum auto-correlation around pitch lag.
576 * @param buf buffer with offset applied
577 * @param offset offset of the excitation vector
578 * @param ccr_max pointer to the maximum auto-correlation
579 * @param pitch_lag decoded pitch lag
580 * @param length length of autocorrelation
581 * @param dir forward lag(1) / backward lag(-1)
583 static int autocorr_max(const int16_t *buf
, int offset
, int *ccr_max
,
584 int pitch_lag
, int length
, int dir
)
586 int limit
, ccr
, lag
= 0;
589 pitch_lag
= FFMIN(PITCH_MAX
- 3, pitch_lag
);
591 limit
= FFMIN(FRAME_LEN
+ PITCH_MAX
- offset
- length
, pitch_lag
+ 3);
593 limit
= pitch_lag
+ 3;
595 for (i
= pitch_lag
- 3; i
<= limit
; i
++) {
596 ccr
= dot_product(buf
, buf
+ dir
* i
, length
);
598 if (ccr
> *ccr_max
) {
607 * Calculate pitch postfilter optimal and scaling gains.
609 * @param lag pitch postfilter forward/backward lag
610 * @param ppf pitch postfilter parameters
611 * @param cur_rate current bitrate
612 * @param tgt_eng target energy
613 * @param ccr cross-correlation
614 * @param res_eng residual energy
616 static void comp_ppf_gains(int lag
, PPFParam
*ppf
, enum Rate cur_rate
,
617 int tgt_eng
, int ccr
, int res_eng
)
619 int pf_residual
; /* square of postfiltered residual */
624 temp1
= tgt_eng
* res_eng
>> 1;
625 temp2
= ccr
* ccr
<< 1;
628 if (ccr
>= res_eng
) {
629 ppf
->opt_gain
= ppf_gain_weight
[cur_rate
];
631 ppf
->opt_gain
= (ccr
<< 15) / res_eng
*
632 ppf_gain_weight
[cur_rate
] >> 15;
634 /* pf_res^2 = tgt_eng + 2*ccr*gain + res_eng*gain^2 */
635 temp1
= (tgt_eng
<< 15) + (ccr
* ppf
->opt_gain
<< 1);
636 temp2
= (ppf
->opt_gain
* ppf
->opt_gain
>> 15) * res_eng
;
637 pf_residual
= av_sat_add32(temp1
, temp2
+ (1 << 15)) >> 16;
639 if (tgt_eng
>= pf_residual
<< 1) {
642 temp1
= (tgt_eng
<< 14) / pf_residual
;
645 /* scaling_gain = sqrt(tgt_eng/pf_res^2) */
646 ppf
->sc_gain
= square_root(temp1
<< 16);
649 ppf
->sc_gain
= 0x7fff;
652 ppf
->opt_gain
= av_clip_int16(ppf
->opt_gain
* ppf
->sc_gain
>> 15);
656 * Calculate pitch postfilter parameters.
658 * @param p the context
659 * @param offset offset of the excitation vector
660 * @param pitch_lag decoded pitch lag
661 * @param ppf pitch postfilter parameters
662 * @param cur_rate current bitrate
664 static void comp_ppf_coeff(G723_1_Context
*p
, int offset
, int pitch_lag
,
665 PPFParam
*ppf
, enum Rate cur_rate
)
674 * 1 - forward cross-correlation
675 * 2 - forward residual energy
676 * 3 - backward cross-correlation
677 * 4 - backward residual energy
679 int energy
[5] = {0, 0, 0, 0, 0};
680 int16_t *buf
= p
->audio
+ LPC_ORDER
+ offset
;
681 int fwd_lag
= autocorr_max(buf
, offset
, &energy
[1], pitch_lag
,
683 int back_lag
= autocorr_max(buf
, offset
, &energy
[3], pitch_lag
,
688 ppf
->sc_gain
= 0x7fff;
690 /* Case 0, Section 3.6 */
691 if (!back_lag
&& !fwd_lag
)
694 /* Compute target energy */
695 energy
[0] = dot_product(buf
, buf
, SUBFRAME_LEN
);
697 /* Compute forward residual energy */
699 energy
[2] = dot_product(buf
+ fwd_lag
, buf
+ fwd_lag
, SUBFRAME_LEN
);
701 /* Compute backward residual energy */
703 energy
[4] = dot_product(buf
- back_lag
, buf
- back_lag
, SUBFRAME_LEN
);
705 /* Normalize and shorten */
707 for (i
= 0; i
< 5; i
++)
708 temp1
= FFMAX(energy
[i
], temp1
);
710 scale
= normalize_bits(temp1
, 31);
711 for (i
= 0; i
< 5; i
++)
712 energy
[i
] = (energy
[i
] << scale
) >> 16;
714 if (fwd_lag
&& !back_lag
) { /* Case 1 */
715 comp_ppf_gains(fwd_lag
, ppf
, cur_rate
, energy
[0], energy
[1],
717 } else if (!fwd_lag
) { /* Case 2 */
718 comp_ppf_gains(-back_lag
, ppf
, cur_rate
, energy
[0], energy
[3],
720 } else { /* Case 3 */
723 * Select the largest of energy[1]^2/energy[2]
724 * and energy[3]^2/energy[4]
726 temp1
= energy
[4] * ((energy
[1] * energy
[1] + (1 << 14)) >> 15);
727 temp2
= energy
[2] * ((energy
[3] * energy
[3] + (1 << 14)) >> 15);
728 if (temp1
>= temp2
) {
729 comp_ppf_gains(fwd_lag
, ppf
, cur_rate
, energy
[0], energy
[1],
732 comp_ppf_gains(-back_lag
, ppf
, cur_rate
, energy
[0], energy
[3],
739 * Classify frames as voiced/unvoiced.
741 * @param p the context
742 * @param pitch_lag decoded pitch_lag
743 * @param exc_eng excitation energy estimation
744 * @param scale scaling factor of exc_eng
746 * @return residual interpolation index if voiced, 0 otherwise
748 static int comp_interp_index(G723_1_Context
*p
, int pitch_lag
,
749 int *exc_eng
, int *scale
)
751 int offset
= PITCH_MAX
+ 2 * SUBFRAME_LEN
;
752 int16_t *buf
= p
->audio
+ LPC_ORDER
;
754 int index
, ccr
, tgt_eng
, best_eng
, temp
;
756 *scale
= scale_vector(buf
, p
->excitation
, FRAME_LEN
+ PITCH_MAX
);
759 /* Compute maximum backward cross-correlation */
761 index
= autocorr_max(buf
, offset
, &ccr
, pitch_lag
, SUBFRAME_LEN
* 2, -1);
762 ccr
= av_sat_add32(ccr
, 1 << 15) >> 16;
764 /* Compute target energy */
765 tgt_eng
= dot_product(buf
, buf
, SUBFRAME_LEN
* 2);
766 *exc_eng
= av_sat_add32(tgt_eng
, 1 << 15) >> 16;
771 /* Compute best energy */
772 best_eng
= dot_product(buf
- index
, buf
- index
, SUBFRAME_LEN
* 2);
773 best_eng
= av_sat_add32(best_eng
, 1 << 15) >> 16;
775 temp
= best_eng
* *exc_eng
>> 3;
777 if (temp
< ccr
* ccr
) {
784 * Peform residual interpolation based on frame classification.
786 * @param buf decoded excitation vector
787 * @param out output vector
788 * @param lag decoded pitch lag
789 * @param gain interpolated gain
790 * @param rseed seed for random number generator
792 static void residual_interp(int16_t *buf
, int16_t *out
, int lag
,
793 int gain
, int *rseed
)
796 if (lag
) { /* Voiced */
797 int16_t *vector_ptr
= buf
+ PITCH_MAX
;
799 for (i
= 0; i
< lag
; i
++)
800 out
[i
] = vector_ptr
[i
- lag
] * 3 >> 2;
801 av_memcpy_backptr((uint8_t*)(out
+ lag
), lag
* sizeof(*out
),
802 (FRAME_LEN
- lag
) * sizeof(*out
));
803 } else { /* Unvoiced */
804 for (i
= 0; i
< FRAME_LEN
; i
++) {
805 *rseed
= *rseed
* 521 + 259;
806 out
[i
] = gain
* *rseed
>> 15;
808 memset(buf
, 0, (FRAME_LEN
+ PITCH_MAX
) * sizeof(*buf
));
813 * Perform IIR filtering.
815 * @param fir_coef FIR coefficients
816 * @param iir_coef IIR coefficients
817 * @param src source vector
818 * @param dest destination vector
819 * @param width width of the output, 16 bits(0) / 32 bits(1)
821 #define iir_filter(fir_coef, iir_coef, src, dest, width)\
824 int res_shift = 16 & ~-(width);\
825 int in_shift = 16 - res_shift;\
827 for (m = 0; m < SUBFRAME_LEN; m++) {\
829 for (n = 1; n <= LPC_ORDER; n++) {\
830 filter -= (fir_coef)[n - 1] * (src)[m - n] -\
831 (iir_coef)[n - 1] * ((dest)[m - n] >> in_shift);\
834 (dest)[m] = av_clipl_int32(((src)[m] << 16) + (filter << 3) +\
835 (1 << 15)) >> res_shift;\
840 * Adjust gain of postfiltered signal.
842 * @param p the context
843 * @param buf postfiltered output vector
844 * @param energy input energy coefficient
846 static void gain_scale(G723_1_Context
*p
, int16_t * buf
, int energy
)
848 int num
, denom
, gain
, bits1
, bits2
;
853 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
854 int temp
= buf
[i
] >> 2;
856 denom
= av_sat_dadd32(denom
, temp
);
860 bits1
= normalize_bits(num
, 31);
861 bits2
= normalize_bits(denom
, 31);
862 num
= num
<< bits1
>> 1;
865 bits2
= 5 + bits1
- bits2
;
866 bits2
= FFMAX(0, bits2
);
868 gain
= (num
>> 1) / (denom
>> 16);
869 gain
= square_root(gain
<< 16 >> bits2
);
874 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
875 p
->pf_gain
= (15 * p
->pf_gain
+ gain
+ (1 << 3)) >> 4;
876 buf
[i
] = av_clip_int16((buf
[i
] * (p
->pf_gain
+ (p
->pf_gain
>> 4)) +
882 * Perform formant filtering.
884 * @param p the context
885 * @param lpc quantized lpc coefficients
886 * @param buf input buffer
887 * @param dst output buffer
889 static void formant_postfilter(G723_1_Context
*p
, int16_t *lpc
,
890 int16_t *buf
, int16_t *dst
)
892 int16_t filter_coef
[2][LPC_ORDER
];
893 int filter_signal
[LPC_ORDER
+ FRAME_LEN
], *signal_ptr
;
896 memcpy(buf
, p
->fir_mem
, LPC_ORDER
* sizeof(*buf
));
897 memcpy(filter_signal
, p
->iir_mem
, LPC_ORDER
* sizeof(*filter_signal
));
899 for (i
= LPC_ORDER
, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++) {
900 for (k
= 0; k
< LPC_ORDER
; k
++) {
901 filter_coef
[0][k
] = (-lpc
[k
] * postfilter_tbl
[0][k
] +
903 filter_coef
[1][k
] = (-lpc
[k
] * postfilter_tbl
[1][k
] +
906 iir_filter(filter_coef
[0], filter_coef
[1], buf
+ i
,
907 filter_signal
+ i
, 1);
911 memcpy(p
->fir_mem
, buf
+ FRAME_LEN
, LPC_ORDER
* sizeof(int16_t));
912 memcpy(p
->iir_mem
, filter_signal
+ FRAME_LEN
, LPC_ORDER
* sizeof(int));
915 signal_ptr
= filter_signal
+ LPC_ORDER
;
916 for (i
= 0; i
< SUBFRAMES
; i
++) {
922 scale
= scale_vector(dst
, buf
, SUBFRAME_LEN
);
924 /* Compute auto correlation coefficients */
925 auto_corr
[0] = dot_product(dst
, dst
+ 1, SUBFRAME_LEN
- 1);
926 auto_corr
[1] = dot_product(dst
, dst
, SUBFRAME_LEN
);
928 /* Compute reflection coefficient */
929 temp
= auto_corr
[1] >> 16;
931 temp
= (auto_corr
[0] >> 2) / temp
;
933 p
->reflection_coef
= (3 * p
->reflection_coef
+ temp
+ 2) >> 2;
934 temp
= -p
->reflection_coef
>> 1 & ~3;
936 /* Compensation filter */
937 for (j
= 0; j
< SUBFRAME_LEN
; j
++) {
938 dst
[j
] = av_sat_dadd32(signal_ptr
[j
],
939 (signal_ptr
[j
- 1] >> 16) * temp
) >> 16;
942 /* Compute normalized signal energy */
943 temp
= 2 * scale
+ 4;
945 energy
= av_clipl_int32((int64_t)auto_corr
[1] << -temp
);
947 energy
= auto_corr
[1] >> temp
;
949 gain_scale(p
, dst
, energy
);
952 signal_ptr
+= SUBFRAME_LEN
;
957 static int sid_gain_to_lsp_index(int gain
)
961 else if (gain
< 0x20)
962 return gain
- 8 << 7;
964 return gain
- 20 << 8;
967 static inline int cng_rand(int *state
, int base
)
969 *state
= (*state
* 521 + 259) & 0xFFFF;
970 return (*state
& 0x7FFF) * base
>> 15;
973 static int estimate_sid_gain(G723_1_Context
*p
)
975 int i
, shift
, seg
, seg2
, t
, val
, val_add
, x
, y
;
977 shift
= 16 - p
->cur_gain
* 2;
979 t
= p
->sid_gain
<< shift
;
981 t
= p
->sid_gain
>> -shift
;
982 x
= t
* cng_filt
[0] >> 16;
984 if (x
>= cng_bseg
[2])
987 if (x
>= cng_bseg
[1]) {
992 seg
= (x
>= cng_bseg
[0]);
994 seg2
= FFMIN(seg
, 3);
998 for (i
= 0; i
< shift
; i
++) {
999 t
= seg
* 32 + (val
<< seg2
);
1008 t
= seg
* 32 + (val
<< seg2
);
1011 t
= seg
* 32 + (val
+ 1 << seg2
);
1013 val
= (seg2
- 1 << 4) + val
;
1017 t
= seg
* 32 + (val
- 1 << seg2
);
1019 val
= (seg2
- 1 << 4) + val
;
1027 static void generate_noise(G723_1_Context
*p
)
1031 int signs
[SUBFRAMES
/ 2 * 11], pos
[SUBFRAMES
/ 2 * 11];
1032 int tmp
[SUBFRAME_LEN
* 2];
1033 int16_t *vector_ptr
;
1035 int b0
, c
, delta
, x
, shift
;
1037 p
->pitch_lag
[0] = cng_rand(&p
->cng_random_seed
, 21) + 123;
1038 p
->pitch_lag
[1] = cng_rand(&p
->cng_random_seed
, 19) + 123;
1040 for (i
= 0; i
< SUBFRAMES
; i
++) {
1041 p
->subframe
[i
].ad_cb_gain
= cng_rand(&p
->cng_random_seed
, 50) + 1;
1042 p
->subframe
[i
].ad_cb_lag
= cng_adaptive_cb_lag
[i
];
1045 for (i
= 0; i
< SUBFRAMES
/ 2; i
++) {
1046 t
= cng_rand(&p
->cng_random_seed
, 1 << 13);
1048 off
[i
* 2 + 1] = ((t
>> 1) & 1) + SUBFRAME_LEN
;
1050 for (j
= 0; j
< 11; j
++) {
1051 signs
[i
* 11 + j
] = (t
& 1) * 2 - 1 << 14;
1057 for (i
= 0; i
< SUBFRAMES
; i
++) {
1058 for (j
= 0; j
< SUBFRAME_LEN
/ 2; j
++)
1060 t
= SUBFRAME_LEN
/ 2;
1061 for (j
= 0; j
< pulses
[i
]; j
++, idx
++) {
1062 int idx2
= cng_rand(&p
->cng_random_seed
, t
);
1064 pos
[idx
] = tmp
[idx2
] * 2 + off
[i
];
1065 tmp
[idx2
] = tmp
[--t
];
1069 vector_ptr
= p
->audio
+ LPC_ORDER
;
1070 memcpy(vector_ptr
, p
->prev_excitation
,
1071 PITCH_MAX
* sizeof(*p
->excitation
));
1072 for (i
= 0; i
< SUBFRAMES
; i
+= 2) {
1073 gen_acb_excitation(vector_ptr
, vector_ptr
,
1074 p
->pitch_lag
[i
>> 1], &p
->subframe
[i
],
1076 gen_acb_excitation(vector_ptr
+ SUBFRAME_LEN
,
1077 vector_ptr
+ SUBFRAME_LEN
,
1078 p
->pitch_lag
[i
>> 1], &p
->subframe
[i
+ 1],
1082 for (j
= 0; j
< SUBFRAME_LEN
* 2; j
++)
1083 t
|= FFABS(vector_ptr
[j
]);
1084 t
= FFMIN(t
, 0x7FFF);
1088 shift
= -10 + av_log2(t
);
1094 for (j
= 0; j
< SUBFRAME_LEN
* 2; j
++) {
1095 t
= vector_ptr
[j
] << -shift
;
1100 for (j
= 0; j
< SUBFRAME_LEN
* 2; j
++) {
1101 t
= vector_ptr
[j
] >> shift
;
1108 for (j
= 0; j
< 11; j
++)
1109 b0
+= tmp
[pos
[(i
/ 2) * 11 + j
]] * signs
[(i
/ 2) * 11 + j
];
1110 b0
= b0
* 2 * 2979LL + (1 << 29) >> 30; // approximated division by 11
1112 c
= p
->cur_gain
* (p
->cur_gain
* SUBFRAME_LEN
>> 5);
1113 if (shift
* 2 + 3 >= 0)
1114 c
>>= shift
* 2 + 3;
1116 c
<<= -(shift
* 2 + 3);
1117 c
= (av_clipl_int32(sum
<< 1) - c
) * 2979LL >> 15;
1119 delta
= b0
* b0
* 2 - c
;
1123 delta
= square_root(delta
);
1126 if (FFABS(t
) < FFABS(x
))
1134 x
= av_clip(x
, -10000, 10000);
1136 for (j
= 0; j
< 11; j
++) {
1137 idx
= (i
/ 2) * 11 + j
;
1138 vector_ptr
[pos
[idx
]] = av_clip_int16(vector_ptr
[pos
[idx
]] +
1139 (x
* signs
[idx
] >> 15));
1142 /* copy decoded data to serve as a history for the next decoded subframes */
1143 memcpy(vector_ptr
+ PITCH_MAX
, vector_ptr
,
1144 sizeof(*vector_ptr
) * SUBFRAME_LEN
* 2);
1145 vector_ptr
+= SUBFRAME_LEN
* 2;
1147 /* Save the excitation for the next frame */
1148 memcpy(p
->prev_excitation
, p
->audio
+ LPC_ORDER
+ FRAME_LEN
,
1149 PITCH_MAX
* sizeof(*p
->excitation
));
1152 static int g723_1_decode_frame(AVCodecContext
*avctx
, void *data
,
1153 int *got_frame_ptr
, AVPacket
*avpkt
)
1155 G723_1_Context
*p
= avctx
->priv_data
;
1156 AVFrame
*frame
= data
;
1157 const uint8_t *buf
= avpkt
->data
;
1158 int buf_size
= avpkt
->size
;
1159 int dec_mode
= buf
[0] & 3;
1161 PPFParam ppf
[SUBFRAMES
];
1162 int16_t cur_lsp
[LPC_ORDER
];
1163 int16_t lpc
[SUBFRAMES
* LPC_ORDER
];
1164 int16_t acb_vector
[SUBFRAME_LEN
];
1166 int bad_frame
= 0, i
, j
, ret
;
1167 int16_t *audio
= p
->audio
;
1169 if (buf_size
< frame_size
[dec_mode
]) {
1171 av_log(avctx
, AV_LOG_WARNING
,
1172 "Expected %d bytes, got %d - skipping packet\n",
1173 frame_size
[dec_mode
], buf_size
);
1178 if (unpack_bitstream(p
, buf
, buf_size
) < 0) {
1180 if (p
->past_frame_type
== ACTIVE_FRAME
)
1181 p
->cur_frame_type
= ACTIVE_FRAME
;
1183 p
->cur_frame_type
= UNTRANSMITTED_FRAME
;
1186 frame
->nb_samples
= FRAME_LEN
;
1187 if ((ret
= ff_get_buffer(avctx
, frame
, 0)) < 0)
1190 out
= (int16_t *)frame
->data
[0];
1192 if (p
->cur_frame_type
== ACTIVE_FRAME
) {
1194 p
->erased_frames
= 0;
1195 else if (p
->erased_frames
!= 3)
1198 inverse_quant(cur_lsp
, p
->prev_lsp
, p
->lsp_index
, bad_frame
);
1199 lsp_interpolate(lpc
, cur_lsp
, p
->prev_lsp
);
1201 /* Save the lsp_vector for the next frame */
1202 memcpy(p
->prev_lsp
, cur_lsp
, LPC_ORDER
* sizeof(*p
->prev_lsp
));
1204 /* Generate the excitation for the frame */
1205 memcpy(p
->excitation
, p
->prev_excitation
,
1206 PITCH_MAX
* sizeof(*p
->excitation
));
1207 if (!p
->erased_frames
) {
1208 int16_t *vector_ptr
= p
->excitation
+ PITCH_MAX
;
1210 /* Update interpolation gain memory */
1211 p
->interp_gain
= fixed_cb_gain
[(p
->subframe
[2].amp_index
+
1212 p
->subframe
[3].amp_index
) >> 1];
1213 for (i
= 0; i
< SUBFRAMES
; i
++) {
1214 gen_fcb_excitation(vector_ptr
, &p
->subframe
[i
], p
->cur_rate
,
1215 p
->pitch_lag
[i
>> 1], i
);
1216 gen_acb_excitation(acb_vector
, &p
->excitation
[SUBFRAME_LEN
* i
],
1217 p
->pitch_lag
[i
>> 1], &p
->subframe
[i
],
1219 /* Get the total excitation */
1220 for (j
= 0; j
< SUBFRAME_LEN
; j
++) {
1221 int v
= av_clip_int16(vector_ptr
[j
] << 1);
1222 vector_ptr
[j
] = av_clip_int16(v
+ acb_vector
[j
]);
1224 vector_ptr
+= SUBFRAME_LEN
;
1227 vector_ptr
= p
->excitation
+ PITCH_MAX
;
1229 p
->interp_index
= comp_interp_index(p
, p
->pitch_lag
[1],
1230 &p
->sid_gain
, &p
->cur_gain
);
1232 /* Peform pitch postfiltering */
1233 if (p
->postfilter
) {
1235 for (j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++)
1236 comp_ppf_coeff(p
, i
, p
->pitch_lag
[j
>> 1],
1237 ppf
+ j
, p
->cur_rate
);
1239 for (i
= 0, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++)
1240 ff_acelp_weighted_vector_sum(p
->audio
+ LPC_ORDER
+ i
,
1242 vector_ptr
+ i
+ ppf
[j
].index
,
1245 1 << 14, 15, SUBFRAME_LEN
);
1247 audio
= vector_ptr
- LPC_ORDER
;
1250 /* Save the excitation for the next frame */
1251 memcpy(p
->prev_excitation
, p
->excitation
+ FRAME_LEN
,
1252 PITCH_MAX
* sizeof(*p
->excitation
));
1254 p
->interp_gain
= (p
->interp_gain
* 3 + 2) >> 2;
1255 if (p
->erased_frames
== 3) {
1257 memset(p
->excitation
, 0,
1258 (FRAME_LEN
+ PITCH_MAX
) * sizeof(*p
->excitation
));
1259 memset(p
->prev_excitation
, 0,
1260 PITCH_MAX
* sizeof(*p
->excitation
));
1261 memset(frame
->data
[0], 0,
1262 (FRAME_LEN
+ LPC_ORDER
) * sizeof(int16_t));
1264 int16_t *buf
= p
->audio
+ LPC_ORDER
;
1266 /* Regenerate frame */
1267 residual_interp(p
->excitation
, buf
, p
->interp_index
,
1268 p
->interp_gain
, &p
->random_seed
);
1270 /* Save the excitation for the next frame */
1271 memcpy(p
->prev_excitation
, buf
+ (FRAME_LEN
- PITCH_MAX
),
1272 PITCH_MAX
* sizeof(*p
->excitation
));
1275 p
->cng_random_seed
= CNG_RANDOM_SEED
;
1277 if (p
->cur_frame_type
== SID_FRAME
) {
1278 p
->sid_gain
= sid_gain_to_lsp_index(p
->subframe
[0].amp_index
);
1279 inverse_quant(p
->sid_lsp
, p
->prev_lsp
, p
->lsp_index
, 0);
1280 } else if (p
->past_frame_type
== ACTIVE_FRAME
) {
1281 p
->sid_gain
= estimate_sid_gain(p
);
1284 if (p
->past_frame_type
== ACTIVE_FRAME
)
1285 p
->cur_gain
= p
->sid_gain
;
1287 p
->cur_gain
= (p
->cur_gain
* 7 + p
->sid_gain
) >> 3;
1289 lsp_interpolate(lpc
, p
->sid_lsp
, p
->prev_lsp
);
1290 /* Save the lsp_vector for the next frame */
1291 memcpy(p
->prev_lsp
, p
->sid_lsp
, LPC_ORDER
* sizeof(*p
->prev_lsp
));
1294 p
->past_frame_type
= p
->cur_frame_type
;
1296 memcpy(p
->audio
, p
->synth_mem
, LPC_ORDER
* sizeof(*p
->audio
));
1297 for (i
= LPC_ORDER
, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++)
1298 ff_celp_lp_synthesis_filter(p
->audio
+ i
, &lpc
[j
* LPC_ORDER
],
1299 audio
+ i
, SUBFRAME_LEN
, LPC_ORDER
,
1301 memcpy(p
->synth_mem
, p
->audio
+ FRAME_LEN
, LPC_ORDER
* sizeof(*p
->audio
));
1303 if (p
->postfilter
) {
1304 formant_postfilter(p
, lpc
, p
->audio
, out
);
1305 } else { // if output is not postfiltered it should be scaled by 2
1306 for (i
= 0; i
< FRAME_LEN
; i
++)
1307 out
[i
] = av_clip_int16(p
->audio
[LPC_ORDER
+ i
] << 1);
1312 return frame_size
[dec_mode
];
1315 #define OFFSET(x) offsetof(G723_1_Context, x)
1316 #define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
1318 static const AVOption options
[] = {
1319 { "postfilter", "postfilter on/off", OFFSET(postfilter
), AV_OPT_TYPE_INT
,
1320 { .i64
= 1 }, 0, 1, AD
},
1325 static const AVClass g723_1dec_class
= {
1326 .class_name
= "G.723.1 decoder",
1327 .item_name
= av_default_item_name
,
1329 .version
= LIBAVUTIL_VERSION_INT
,
1332 AVCodec ff_g723_1_decoder
= {
1334 .long_name
= NULL_IF_CONFIG_SMALL("G.723.1"),
1335 .type
= AVMEDIA_TYPE_AUDIO
,
1336 .id
= AV_CODEC_ID_G723_1
,
1337 .priv_data_size
= sizeof(G723_1_Context
),
1338 .init
= g723_1_decode_init
,
1339 .decode
= g723_1_decode_frame
,
1340 .capabilities
= CODEC_CAP_SUBFRAMES
| CODEC_CAP_DR1
,
1341 .priv_class
= &g723_1dec_class
,
1344 #if CONFIG_G723_1_ENCODER
1345 #define BITSTREAM_WRITER_LE
1346 #include "put_bits.h"
1348 static av_cold
int g723_1_encode_init(AVCodecContext
*avctx
)
1350 G723_1_Context
*p
= avctx
->priv_data
;
1352 if (avctx
->sample_rate
!= 8000) {
1353 av_log(avctx
, AV_LOG_ERROR
, "Only 8000Hz sample rate supported\n");
1357 if (avctx
->channels
!= 1) {
1358 av_log(avctx
, AV_LOG_ERROR
, "Only mono supported\n");
1359 return AVERROR(EINVAL
);
1362 if (avctx
->bit_rate
== 6300) {
1363 p
->cur_rate
= RATE_6300
;
1364 } else if (avctx
->bit_rate
== 5300) {
1365 av_log(avctx
, AV_LOG_ERROR
, "Bitrate not supported yet, use 6.3k\n");
1366 return AVERROR_PATCHWELCOME
;
1368 av_log(avctx
, AV_LOG_ERROR
,
1369 "Bitrate not supported, use 6.3k\n");
1370 return AVERROR(EINVAL
);
1372 avctx
->frame_size
= 240;
1373 memcpy(p
->prev_lsp
, dc_lsp
, LPC_ORDER
* sizeof(int16_t));
1379 * Remove DC component from the input signal.
1381 * @param buf input signal
1382 * @param fir zero memory
1383 * @param iir pole memory
1385 static void highpass_filter(int16_t *buf
, int16_t *fir
, int *iir
)
1388 for (i
= 0; i
< FRAME_LEN
; i
++) {
1389 *iir
= (buf
[i
] << 15) + ((-*fir
) << 15) + MULL2(*iir
, 0x7f00);
1391 buf
[i
] = av_clipl_int32((int64_t)*iir
+ (1 << 15)) >> 16;
1396 * Estimate autocorrelation of the input vector.
1398 * @param buf input buffer
1399 * @param autocorr autocorrelation coefficients vector
1401 static void comp_autocorr(int16_t *buf
, int16_t *autocorr
)
1404 int16_t vector
[LPC_FRAME
];
1406 scale_vector(vector
, buf
, LPC_FRAME
);
1408 /* Apply the Hamming window */
1409 for (i
= 0; i
< LPC_FRAME
; i
++)
1410 vector
[i
] = (vector
[i
] * hamming_window
[i
] + (1 << 14)) >> 15;
1412 /* Compute the first autocorrelation coefficient */
1413 temp
= ff_dot_product(vector
, vector
, LPC_FRAME
);
1415 /* Apply a white noise correlation factor of (1025/1024) */
1419 scale
= normalize_bits_int32(temp
);
1420 autocorr
[0] = av_clipl_int32((int64_t)(temp
<< scale
) +
1423 /* Compute the remaining coefficients */
1425 memset(autocorr
+ 1, 0, LPC_ORDER
* sizeof(int16_t));
1427 for (i
= 1; i
<= LPC_ORDER
; i
++) {
1428 temp
= ff_dot_product(vector
, vector
+ i
, LPC_FRAME
- i
);
1429 temp
= MULL2((temp
<< scale
), binomial_window
[i
- 1]);
1430 autocorr
[i
] = av_clipl_int32((int64_t)temp
+ (1 << 15)) >> 16;
1436 * Use Levinson-Durbin recursion to compute LPC coefficients from
1437 * autocorrelation values.
1439 * @param lpc LPC coefficients vector
1440 * @param autocorr autocorrelation coefficients vector
1441 * @param error prediction error
1443 static void levinson_durbin(int16_t *lpc
, int16_t *autocorr
, int16_t error
)
1445 int16_t vector
[LPC_ORDER
];
1446 int16_t partial_corr
;
1449 memset(lpc
, 0, LPC_ORDER
* sizeof(int16_t));
1451 for (i
= 0; i
< LPC_ORDER
; i
++) {
1452 /* Compute the partial correlation coefficient */
1454 for (j
= 0; j
< i
; j
++)
1455 temp
-= lpc
[j
] * autocorr
[i
- j
- 1];
1456 temp
= ((autocorr
[i
] << 13) + temp
) << 3;
1458 if (FFABS(temp
) >= (error
<< 16))
1461 partial_corr
= temp
/ (error
<< 1);
1463 lpc
[i
] = av_clipl_int32((int64_t)(partial_corr
<< 14) +
1466 /* Update the prediction error */
1467 temp
= MULL2(temp
, partial_corr
);
1468 error
= av_clipl_int32((int64_t)(error
<< 16) - temp
+
1471 memcpy(vector
, lpc
, i
* sizeof(int16_t));
1472 for (j
= 0; j
< i
; j
++) {
1473 temp
= partial_corr
* vector
[i
- j
- 1] << 1;
1474 lpc
[j
] = av_clipl_int32((int64_t)(lpc
[j
] << 16) - temp
+
1481 * Calculate LPC coefficients for the current frame.
1483 * @param buf current frame
1484 * @param prev_data 2 trailing subframes of the previous frame
1485 * @param lpc LPC coefficients vector
1487 static void comp_lpc_coeff(int16_t *buf
, int16_t *lpc
)
1489 int16_t autocorr
[(LPC_ORDER
+ 1) * SUBFRAMES
];
1490 int16_t *autocorr_ptr
= autocorr
;
1491 int16_t *lpc_ptr
= lpc
;
1494 for (i
= 0, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++) {
1495 comp_autocorr(buf
+ i
, autocorr_ptr
);
1496 levinson_durbin(lpc_ptr
, autocorr_ptr
+ 1, autocorr_ptr
[0]);
1498 lpc_ptr
+= LPC_ORDER
;
1499 autocorr_ptr
+= LPC_ORDER
+ 1;
1503 static void lpc2lsp(int16_t *lpc
, int16_t *prev_lsp
, int16_t *lsp
)
1505 int f
[LPC_ORDER
+ 2]; ///< coefficients of the sum and difference
1506 ///< polynomials (F1, F2) ordered as
1507 ///< f1[0], f2[0], ...., f1[5], f2[5]
1509 int max
, shift
, cur_val
, prev_val
, count
, p
;
1513 /* Initialize f1[0] and f2[0] to 1 in Q25 */
1514 for (i
= 0; i
< LPC_ORDER
; i
++)
1515 lsp
[i
] = (lpc
[i
] * bandwidth_expand
[i
] + (1 << 14)) >> 15;
1517 /* Apply bandwidth expansion on the LPC coefficients */
1518 f
[0] = f
[1] = 1 << 25;
1520 /* Compute the remaining coefficients */
1521 for (i
= 0; i
< LPC_ORDER
/ 2; i
++) {
1523 f
[2 * i
+ 2] = -f
[2 * i
] - ((lsp
[i
] + lsp
[LPC_ORDER
- 1 - i
]) << 12);
1525 f
[2 * i
+ 3] = f
[2 * i
+ 1] - ((lsp
[i
] - lsp
[LPC_ORDER
- 1 - i
]) << 12);
1528 /* Divide f1[5] and f2[5] by 2 for use in polynomial evaluation */
1530 f
[LPC_ORDER
+ 1] >>= 1;
1532 /* Normalize and shorten */
1534 for (i
= 1; i
< LPC_ORDER
+ 2; i
++)
1535 max
= FFMAX(max
, FFABS(f
[i
]));
1537 shift
= normalize_bits_int32(max
);
1539 for (i
= 0; i
< LPC_ORDER
+ 2; i
++)
1540 f
[i
] = av_clipl_int32((int64_t)(f
[i
] << shift
) + (1 << 15)) >> 16;
1543 * Evaluate F1 and F2 at uniform intervals of pi/256 along the
1544 * unit circle and check for zero crossings.
1548 for (i
= 0; i
<= LPC_ORDER
/ 2; i
++)
1549 temp
+= f
[2 * i
] * cos_tab
[0];
1550 prev_val
= av_clipl_int32(temp
<< 1);
1552 for ( i
= 1; i
< COS_TBL_SIZE
/ 2; i
++) {
1555 for (j
= 0; j
<= LPC_ORDER
/ 2; j
++)
1556 temp
+= f
[LPC_ORDER
- 2 * j
+ p
] * cos_tab
[i
* j
% COS_TBL_SIZE
];
1557 cur_val
= av_clipl_int32(temp
<< 1);
1559 /* Check for sign change, indicating a zero crossing */
1560 if ((cur_val
^ prev_val
) < 0) {
1561 int abs_cur
= FFABS(cur_val
);
1562 int abs_prev
= FFABS(prev_val
);
1563 int sum
= abs_cur
+ abs_prev
;
1565 shift
= normalize_bits_int32(sum
);
1567 abs_prev
= abs_prev
<< shift
>> 8;
1568 lsp
[count
++] = ((i
- 1) << 7) + (abs_prev
>> 1) / (sum
>> 16);
1570 if (count
== LPC_ORDER
)
1573 /* Switch between sum and difference polynomials */
1578 for (j
= 0; j
<= LPC_ORDER
/ 2; j
++){
1579 temp
+= f
[LPC_ORDER
- 2 * j
+ p
] *
1580 cos_tab
[i
* j
% COS_TBL_SIZE
];
1582 cur_val
= av_clipl_int32(temp
<<1);
1587 if (count
!= LPC_ORDER
)
1588 memcpy(lsp
, prev_lsp
, LPC_ORDER
* sizeof(int16_t));
1592 * Quantize the current LSP subvector.
1594 * @param num band number
1595 * @param offset offset of the current subvector in an LPC_ORDER vector
1596 * @param size size of the current subvector
1598 #define get_index(num, offset, size) \
1600 int error, max = -1;\
1603 for (i = 0; i < LSP_CB_SIZE; i++) {\
1604 for (j = 0; j < size; j++){\
1605 temp[j] = (weight[j + (offset)] * lsp_band##num[i][j] +\
1608 error = dot_product(lsp + (offset), temp, size) << 1;\
1609 error -= dot_product(lsp_band##num[i], temp, size);\
1612 lsp_index[num] = i;\
1618 * Vector quantize the LSP frequencies.
1620 * @param lsp the current lsp vector
1621 * @param prev_lsp the previous lsp vector
1623 static void lsp_quantize(uint8_t *lsp_index
, int16_t *lsp
, int16_t *prev_lsp
)
1625 int16_t weight
[LPC_ORDER
];
1629 /* Calculate the VQ weighting vector */
1630 weight
[0] = (1 << 20) / (lsp
[1] - lsp
[0]);
1631 weight
[LPC_ORDER
- 1] = (1 << 20) /
1632 (lsp
[LPC_ORDER
- 1] - lsp
[LPC_ORDER
- 2]);
1634 for (i
= 1; i
< LPC_ORDER
- 1; i
++) {
1635 min
= FFMIN(lsp
[i
] - lsp
[i
- 1], lsp
[i
+ 1] - lsp
[i
]);
1637 weight
[i
] = (1 << 20) / min
;
1639 weight
[i
] = INT16_MAX
;
1644 for (i
= 0; i
< LPC_ORDER
; i
++)
1645 max
= FFMAX(weight
[i
], max
);
1647 shift
= normalize_bits_int16(max
);
1648 for (i
= 0; i
< LPC_ORDER
; i
++) {
1649 weight
[i
] <<= shift
;
1652 /* Compute the VQ target vector */
1653 for (i
= 0; i
< LPC_ORDER
; i
++) {
1654 lsp
[i
] -= dc_lsp
[i
] +
1655 (((prev_lsp
[i
] - dc_lsp
[i
]) * 12288 + (1 << 14)) >> 15);
1664 * Apply the formant perceptual weighting filter.
1666 * @param flt_coef filter coefficients
1667 * @param unq_lpc unquantized lpc vector
1669 static void perceptual_filter(G723_1_Context
*p
, int16_t *flt_coef
,
1670 int16_t *unq_lpc
, int16_t *buf
)
1672 int16_t vector
[FRAME_LEN
+ LPC_ORDER
];
1675 memcpy(buf
, p
->iir_mem
, sizeof(int16_t) * LPC_ORDER
);
1676 memcpy(vector
, p
->fir_mem
, sizeof(int16_t) * LPC_ORDER
);
1677 memcpy(vector
+ LPC_ORDER
, buf
+ LPC_ORDER
, sizeof(int16_t) * FRAME_LEN
);
1679 for (i
= LPC_ORDER
, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++) {
1680 for (k
= 0; k
< LPC_ORDER
; k
++) {
1681 flt_coef
[k
+ 2 * l
] = (unq_lpc
[k
+ l
] * percept_flt_tbl
[0][k
] +
1683 flt_coef
[k
+ 2 * l
+ LPC_ORDER
] = (unq_lpc
[k
+ l
] *
1684 percept_flt_tbl
[1][k
] +
1687 iir_filter(flt_coef
+ 2 * l
, flt_coef
+ 2 * l
+ LPC_ORDER
, vector
+ i
,
1691 memcpy(p
->iir_mem
, buf
+ FRAME_LEN
, sizeof(int16_t) * LPC_ORDER
);
1692 memcpy(p
->fir_mem
, vector
+ FRAME_LEN
, sizeof(int16_t) * LPC_ORDER
);
1696 * Estimate the open loop pitch period.
1698 * @param buf perceptually weighted speech
1699 * @param start estimation is carried out from this position
1701 static int estimate_pitch(int16_t *buf
, int start
)
1704 int max_ccr
= 0x4000;
1705 int max_eng
= 0x7fff;
1706 int index
= PITCH_MIN
;
1707 int offset
= start
- PITCH_MIN
+ 1;
1709 int ccr
, eng
, orig_eng
, ccr_eng
, exp
;
1714 orig_eng
= ff_dot_product(buf
+ offset
, buf
+ offset
, HALF_FRAME_LEN
);
1716 for (i
= PITCH_MIN
; i
<= PITCH_MAX
- 3; i
++) {
1719 /* Update energy and compute correlation */
1720 orig_eng
+= buf
[offset
] * buf
[offset
] -
1721 buf
[offset
+ HALF_FRAME_LEN
] * buf
[offset
+ HALF_FRAME_LEN
];
1722 ccr
= ff_dot_product(buf
+ start
, buf
+ offset
, HALF_FRAME_LEN
);
1726 /* Split into mantissa and exponent to maintain precision */
1727 exp
= normalize_bits_int32(ccr
);
1728 ccr
= av_clipl_int32((int64_t)(ccr
<< exp
) + (1 << 15)) >> 16;
1731 temp
= normalize_bits_int32(ccr
);
1732 ccr
= ccr
<< temp
>> 16;
1735 temp
= normalize_bits_int32(orig_eng
);
1736 eng
= av_clipl_int32((int64_t)(orig_eng
<< temp
) + (1 << 15)) >> 16;
1746 if (exp
+ 1 < max_exp
)
1749 /* Equalize exponents before comparison */
1750 if (exp
+ 1 == max_exp
)
1751 temp
= max_ccr
>> 1;
1754 ccr_eng
= ccr
* max_eng
;
1755 diff
= ccr_eng
- eng
* temp
;
1756 if (diff
> 0 && (i
- index
< PITCH_MIN
|| diff
> ccr_eng
>> 2)) {
1768 * Compute harmonic noise filter parameters.
1770 * @param buf perceptually weighted speech
1771 * @param pitch_lag open loop pitch period
1772 * @param hf harmonic filter parameters
1774 static void comp_harmonic_coeff(int16_t *buf
, int16_t pitch_lag
, HFParam
*hf
)
1776 int ccr
, eng
, max_ccr
, max_eng
;
1781 for (i
= 0, j
= pitch_lag
- 3; j
<= pitch_lag
+ 3; i
++, j
++) {
1782 /* Compute residual energy */
1783 energy
[i
<< 1] = ff_dot_product(buf
- j
, buf
- j
, SUBFRAME_LEN
);
1784 /* Compute correlation */
1785 energy
[(i
<< 1) + 1] = ff_dot_product(buf
, buf
- j
, SUBFRAME_LEN
);
1788 /* Compute target energy */
1789 energy
[14] = ff_dot_product(buf
, buf
, SUBFRAME_LEN
);
1793 for (i
= 0; i
< 15; i
++)
1794 max
= FFMAX(max
, FFABS(energy
[i
]));
1796 exp
= normalize_bits_int32(max
);
1797 for (i
= 0; i
< 15; i
++) {
1798 energy
[i
] = av_clipl_int32((int64_t)(energy
[i
] << exp
) +
1807 for (i
= 0; i
<= 6; i
++) {
1808 eng
= energy
[i
<< 1];
1809 ccr
= energy
[(i
<< 1) + 1];
1814 ccr
= (ccr
* ccr
+ (1 << 14)) >> 15;
1815 diff
= ccr
* max_eng
- eng
* max_ccr
;
1823 if (hf
->index
== -1) {
1824 hf
->index
= pitch_lag
;
1828 eng
= energy
[14] * max_eng
;
1829 eng
= (eng
>> 2) + (eng
>> 3);
1830 ccr
= energy
[(hf
->index
<< 1) + 1] * energy
[(hf
->index
<< 1) + 1];
1832 eng
= energy
[(hf
->index
<< 1) + 1];
1837 hf
->gain
= ((eng
<< 15) / max_eng
* 0x2800 + (1 << 14)) >> 15;
1839 hf
->index
+= pitch_lag
- 3;
1843 * Apply the harmonic noise shaping filter.
1845 * @param hf filter parameters
1847 static void harmonic_filter(HFParam
*hf
, const int16_t *src
, int16_t *dest
)
1851 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
1852 int64_t temp
= hf
->gain
* src
[i
- hf
->index
] << 1;
1853 dest
[i
] = av_clipl_int32((src
[i
] << 16) - temp
+ (1 << 15)) >> 16;
1857 static void harmonic_noise_sub(HFParam
*hf
, const int16_t *src
, int16_t *dest
)
1860 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
1861 int64_t temp
= hf
->gain
* src
[i
- hf
->index
] << 1;
1862 dest
[i
] = av_clipl_int32(((dest
[i
] - src
[i
]) << 16) + temp
+
1869 * Combined synthesis and formant perceptual weighting filer.
1871 * @param qnt_lpc quantized lpc coefficients
1872 * @param perf_lpc perceptual filter coefficients
1873 * @param perf_fir perceptual filter fir memory
1874 * @param perf_iir perceptual filter iir memory
1875 * @param scale the filter output will be scaled by 2^scale
1877 static void synth_percept_filter(int16_t *qnt_lpc
, int16_t *perf_lpc
,
1878 int16_t *perf_fir
, int16_t *perf_iir
,
1879 const int16_t *src
, int16_t *dest
, int scale
)
1882 int16_t buf_16
[SUBFRAME_LEN
+ LPC_ORDER
];
1883 int64_t buf
[SUBFRAME_LEN
];
1885 int16_t *bptr_16
= buf_16
+ LPC_ORDER
;
1887 memcpy(buf_16
, perf_fir
, sizeof(int16_t) * LPC_ORDER
);
1888 memcpy(dest
- LPC_ORDER
, perf_iir
, sizeof(int16_t) * LPC_ORDER
);
1890 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
1892 for (j
= 1; j
<= LPC_ORDER
; j
++)
1893 temp
-= qnt_lpc
[j
- 1] * bptr_16
[i
- j
];
1895 buf
[i
] = (src
[i
] << 15) + (temp
<< 3);
1896 bptr_16
[i
] = av_clipl_int32(buf
[i
] + (1 << 15)) >> 16;
1899 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
1900 int64_t fir
= 0, iir
= 0;
1901 for (j
= 1; j
<= LPC_ORDER
; j
++) {
1902 fir
-= perf_lpc
[j
- 1] * bptr_16
[i
- j
];
1903 iir
+= perf_lpc
[j
+ LPC_ORDER
- 1] * dest
[i
- j
];
1905 dest
[i
] = av_clipl_int32(((buf
[i
] + (fir
<< 3)) << scale
) + (iir
<< 3) +
1908 memcpy(perf_fir
, buf_16
+ SUBFRAME_LEN
, sizeof(int16_t) * LPC_ORDER
);
1909 memcpy(perf_iir
, dest
+ SUBFRAME_LEN
- LPC_ORDER
,
1910 sizeof(int16_t) * LPC_ORDER
);
1914 * Compute the adaptive codebook contribution.
1916 * @param buf input signal
1917 * @param index the current subframe index
1919 static void acb_search(G723_1_Context
*p
, int16_t *residual
,
1920 int16_t *impulse_resp
, const int16_t *buf
,
1924 int16_t flt_buf
[PITCH_ORDER
][SUBFRAME_LEN
];
1926 const int16_t *cb_tbl
= adaptive_cb_gain85
;
1928 int ccr_buf
[PITCH_ORDER
* SUBFRAMES
<< 2];
1930 int pitch_lag
= p
->pitch_lag
[index
>> 1];
1933 int odd_frame
= index
& 1;
1934 int iter
= 3 + odd_frame
;
1938 int i
, j
, k
, l
, max
;
1942 if (pitch_lag
== PITCH_MIN
)
1945 pitch_lag
= FFMIN(pitch_lag
, PITCH_MAX
- 5);
1948 for (i
= 0; i
< iter
; i
++) {
1949 get_residual(residual
, p
->prev_excitation
, pitch_lag
+ i
- 1);
1951 for (j
= 0; j
< SUBFRAME_LEN
; j
++) {
1953 for (k
= 0; k
<= j
; k
++)
1954 temp
+= residual
[PITCH_ORDER
- 1 + k
] * impulse_resp
[j
- k
];
1955 flt_buf
[PITCH_ORDER
- 1][j
] = av_clipl_int32((temp
<< 1) +
1959 for (j
= PITCH_ORDER
- 2; j
>= 0; j
--) {
1960 flt_buf
[j
][0] = ((residual
[j
] << 13) + (1 << 14)) >> 15;
1961 for (k
= 1; k
< SUBFRAME_LEN
; k
++) {
1962 temp
= (flt_buf
[j
+ 1][k
- 1] << 15) +
1963 residual
[j
] * impulse_resp
[k
];
1964 flt_buf
[j
][k
] = av_clipl_int32((temp
<< 1) + (1 << 15)) >> 16;
1968 /* Compute crosscorrelation with the signal */
1969 for (j
= 0; j
< PITCH_ORDER
; j
++) {
1970 temp
= ff_dot_product(buf
, flt_buf
[j
], SUBFRAME_LEN
);
1971 ccr_buf
[count
++] = av_clipl_int32(temp
<< 1);
1974 /* Compute energies */
1975 for (j
= 0; j
< PITCH_ORDER
; j
++) {
1976 ccr_buf
[count
++] = dot_product(flt_buf
[j
], flt_buf
[j
],
1980 for (j
= 1; j
< PITCH_ORDER
; j
++) {
1981 for (k
= 0; k
< j
; k
++) {
1982 temp
= ff_dot_product(flt_buf
[j
], flt_buf
[k
], SUBFRAME_LEN
);
1983 ccr_buf
[count
++] = av_clipl_int32(temp
<<2);
1988 /* Normalize and shorten */
1990 for (i
= 0; i
< 20 * iter
; i
++)
1991 max
= FFMAX(max
, FFABS(ccr_buf
[i
]));
1993 temp
= normalize_bits_int32(max
);
1995 for (i
= 0; i
< 20 * iter
; i
++){
1996 ccr_buf
[i
] = av_clipl_int32((int64_t)(ccr_buf
[i
] << temp
) +
2001 for (i
= 0; i
< iter
; i
++) {
2002 /* Select quantization table */
2003 if (!odd_frame
&& pitch_lag
+ i
- 1 >= SUBFRAME_LEN
- 2 ||
2004 odd_frame
&& pitch_lag
>= SUBFRAME_LEN
- 2) {
2005 cb_tbl
= adaptive_cb_gain170
;
2009 for (j
= 0, k
= 0; j
< tbl_size
; j
++, k
+= 20) {
2011 for (l
= 0; l
< 20; l
++)
2012 temp
+= ccr_buf
[20 * i
+ l
] * cb_tbl
[k
+ l
];
2013 temp
= av_clipl_int32(temp
);
2024 pitch_lag
+= acb_lag
- 1;
2028 p
->pitch_lag
[index
>> 1] = pitch_lag
;
2029 p
->subframe
[index
].ad_cb_lag
= acb_lag
;
2030 p
->subframe
[index
].ad_cb_gain
= acb_gain
;
2034 * Subtract the adaptive codebook contribution from the input
2035 * to obtain the residual.
2037 * @param buf target vector
2039 static void sub_acb_contrib(const int16_t *residual
, const int16_t *impulse_resp
,
2043 /* Subtract adaptive CB contribution to obtain the residual */
2044 for (i
= 0; i
< SUBFRAME_LEN
; i
++) {
2045 int64_t temp
= buf
[i
] << 14;
2046 for (j
= 0; j
<= i
; j
++)
2047 temp
-= residual
[j
] * impulse_resp
[i
- j
];
2049 buf
[i
] = av_clipl_int32((temp
<< 2) + (1 << 15)) >> 16;
2054 * Quantize the residual signal using the fixed codebook (MP-MLQ).
2056 * @param optim optimized fixed codebook parameters
2057 * @param buf excitation vector
2059 static void get_fcb_param(FCBParam
*optim
, int16_t *impulse_resp
,
2060 int16_t *buf
, int pulse_cnt
, int pitch_lag
)
2063 int16_t impulse_r
[SUBFRAME_LEN
];
2064 int16_t temp_corr
[SUBFRAME_LEN
];
2065 int16_t impulse_corr
[SUBFRAME_LEN
];
2067 int ccr1
[SUBFRAME_LEN
];
2068 int ccr2
[SUBFRAME_LEN
];
2069 int amp
, err
, max
, max_amp_index
, min
, scale
, i
, j
, k
, l
;
2073 /* Update impulse response */
2074 memcpy(impulse_r
, impulse_resp
, sizeof(int16_t) * SUBFRAME_LEN
);
2075 param
.dirac_train
= 0;
2076 if (pitch_lag
< SUBFRAME_LEN
- 2) {
2077 param
.dirac_train
= 1;
2078 gen_dirac_train(impulse_r
, pitch_lag
);
2081 for (i
= 0; i
< SUBFRAME_LEN
; i
++)
2082 temp_corr
[i
] = impulse_r
[i
] >> 1;
2084 /* Compute impulse response autocorrelation */
2085 temp
= dot_product(temp_corr
, temp_corr
, SUBFRAME_LEN
);
2087 scale
= normalize_bits_int32(temp
);
2088 impulse_corr
[0] = av_clipl_int32((temp
<< scale
) + (1 << 15)) >> 16;
2090 for (i
= 1; i
< SUBFRAME_LEN
; i
++) {
2091 temp
= dot_product(temp_corr
+ i
, temp_corr
, SUBFRAME_LEN
- i
);
2092 impulse_corr
[i
] = av_clipl_int32((temp
<< scale
) + (1 << 15)) >> 16;
2095 /* Compute crosscorrelation of impulse response with residual signal */
2097 for (i
= 0; i
< SUBFRAME_LEN
; i
++){
2098 temp
= dot_product(buf
+ i
, impulse_r
, SUBFRAME_LEN
- i
);
2100 ccr1
[i
] = temp
>> -scale
;
2102 ccr1
[i
] = av_clipl_int32(temp
<< scale
);
2106 for (i
= 0; i
< GRID_SIZE
; i
++) {
2107 /* Maximize the crosscorrelation */
2109 for (j
= i
; j
< SUBFRAME_LEN
; j
+= GRID_SIZE
) {
2110 temp
= FFABS(ccr1
[j
]);
2113 param
.pulse_pos
[0] = j
;
2117 /* Quantize the gain (max crosscorrelation/impulse_corr[0]) */
2120 max_amp_index
= GAIN_LEVELS
- 2;
2121 for (j
= max_amp_index
; j
>= 2; j
--) {
2122 temp
= av_clipl_int32((int64_t)fixed_cb_gain
[j
] *
2123 impulse_corr
[0] << 1);
2124 temp
= FFABS(temp
- amp
);
2132 /* Select additional gain values */
2133 for (j
= 1; j
< 5; j
++) {
2134 for (k
= i
; k
< SUBFRAME_LEN
; k
+= GRID_SIZE
) {
2138 param
.amp_index
= max_amp_index
+ j
- 2;
2139 amp
= fixed_cb_gain
[param
.amp_index
];
2141 param
.pulse_sign
[0] = (ccr2
[param
.pulse_pos
[0]] < 0) ? -amp
: amp
;
2142 temp_corr
[param
.pulse_pos
[0]] = 1;
2144 for (k
= 1; k
< pulse_cnt
; k
++) {
2146 for (l
= i
; l
< SUBFRAME_LEN
; l
+= GRID_SIZE
) {
2149 temp
= impulse_corr
[FFABS(l
- param
.pulse_pos
[k
- 1])];
2150 temp
= av_clipl_int32((int64_t)temp
*
2151 param
.pulse_sign
[k
- 1] << 1);
2153 temp
= FFABS(ccr2
[l
]);
2156 param
.pulse_pos
[k
] = l
;
2160 param
.pulse_sign
[k
] = (ccr2
[param
.pulse_pos
[k
]] < 0) ?
2162 temp_corr
[param
.pulse_pos
[k
]] = 1;
2165 /* Create the error vector */
2166 memset(temp_corr
, 0, sizeof(int16_t) * SUBFRAME_LEN
);
2168 for (k
= 0; k
< pulse_cnt
; k
++)
2169 temp_corr
[param
.pulse_pos
[k
]] = param
.pulse_sign
[k
];
2171 for (k
= SUBFRAME_LEN
- 1; k
>= 0; k
--) {
2173 for (l
= 0; l
<= k
; l
++) {
2174 int prod
= av_clipl_int32((int64_t)temp_corr
[l
] *
2175 impulse_r
[k
- l
] << 1);
2176 temp
= av_clipl_int32(temp
+ prod
);
2178 temp_corr
[k
] = temp
<< 2 >> 16;
2181 /* Compute square of error */
2183 for (k
= 0; k
< SUBFRAME_LEN
; k
++) {
2185 prod
= av_clipl_int32((int64_t)buf
[k
] * temp_corr
[k
] << 1);
2186 err
= av_clipl_int32(err
- prod
);
2187 prod
= av_clipl_int32((int64_t)temp_corr
[k
] * temp_corr
[k
]);
2188 err
= av_clipl_int32(err
+ prod
);
2192 if (err
< optim
->min_err
) {
2193 optim
->min_err
= err
;
2194 optim
->grid_index
= i
;
2195 optim
->amp_index
= param
.amp_index
;
2196 optim
->dirac_train
= param
.dirac_train
;
2198 for (k
= 0; k
< pulse_cnt
; k
++) {
2199 optim
->pulse_sign
[k
] = param
.pulse_sign
[k
];
2200 optim
->pulse_pos
[k
] = param
.pulse_pos
[k
];
2208 * Encode the pulse position and gain of the current subframe.
2210 * @param optim optimized fixed CB parameters
2211 * @param buf excitation vector
2213 static void pack_fcb_param(G723_1_Subframe
*subfrm
, FCBParam
*optim
,
2214 int16_t *buf
, int pulse_cnt
)
2218 j
= PULSE_MAX
- pulse_cnt
;
2220 subfrm
->pulse_sign
= 0;
2221 subfrm
->pulse_pos
= 0;
2223 for (i
= 0; i
< SUBFRAME_LEN
>> 1; i
++) {
2224 int val
= buf
[optim
->grid_index
+ (i
<< 1)];
2226 subfrm
->pulse_pos
+= combinatorial_table
[j
][i
];
2228 subfrm
->pulse_sign
<<= 1;
2229 if (val
< 0) subfrm
->pulse_sign
++;
2232 if (j
== PULSE_MAX
) break;
2235 subfrm
->amp_index
= optim
->amp_index
;
2236 subfrm
->grid_index
= optim
->grid_index
;
2237 subfrm
->dirac_train
= optim
->dirac_train
;
2241 * Compute the fixed codebook excitation.
2243 * @param buf target vector
2244 * @param impulse_resp impulse response of the combined filter
2246 static void fcb_search(G723_1_Context
*p
, int16_t *impulse_resp
,
2247 int16_t *buf
, int index
)
2250 int pulse_cnt
= pulses
[index
];
2253 optim
.min_err
= 1 << 30;
2254 get_fcb_param(&optim
, impulse_resp
, buf
, pulse_cnt
, SUBFRAME_LEN
);
2256 if (p
->pitch_lag
[index
>> 1] < SUBFRAME_LEN
- 2) {
2257 get_fcb_param(&optim
, impulse_resp
, buf
, pulse_cnt
,
2258 p
->pitch_lag
[index
>> 1]);
2261 /* Reconstruct the excitation */
2262 memset(buf
, 0, sizeof(int16_t) * SUBFRAME_LEN
);
2263 for (i
= 0; i
< pulse_cnt
; i
++)
2264 buf
[optim
.pulse_pos
[i
]] = optim
.pulse_sign
[i
];
2266 pack_fcb_param(&p
->subframe
[index
], &optim
, buf
, pulse_cnt
);
2268 if (optim
.dirac_train
)
2269 gen_dirac_train(buf
, p
->pitch_lag
[index
>> 1]);
2273 * Pack the frame parameters into output bitstream.
2275 * @param frame output buffer
2276 * @param size size of the buffer
2278 static int pack_bitstream(G723_1_Context
*p
, unsigned char *frame
, int size
)
2281 int info_bits
, i
, temp
;
2283 init_put_bits(&pb
, frame
, size
);
2285 if (p
->cur_rate
== RATE_6300
) {
2287 put_bits(&pb
, 2, info_bits
);
2291 put_bits(&pb
, 8, p
->lsp_index
[2]);
2292 put_bits(&pb
, 8, p
->lsp_index
[1]);
2293 put_bits(&pb
, 8, p
->lsp_index
[0]);
2295 put_bits(&pb
, 7, p
->pitch_lag
[0] - PITCH_MIN
);
2296 put_bits(&pb
, 2, p
->subframe
[1].ad_cb_lag
);
2297 put_bits(&pb
, 7, p
->pitch_lag
[1] - PITCH_MIN
);
2298 put_bits(&pb
, 2, p
->subframe
[3].ad_cb_lag
);
2300 /* Write 12 bit combined gain */
2301 for (i
= 0; i
< SUBFRAMES
; i
++) {
2302 temp
= p
->subframe
[i
].ad_cb_gain
* GAIN_LEVELS
+
2303 p
->subframe
[i
].amp_index
;
2304 if (p
->cur_rate
== RATE_6300
)
2305 temp
+= p
->subframe
[i
].dirac_train
<< 11;
2306 put_bits(&pb
, 12, temp
);
2309 put_bits(&pb
, 1, p
->subframe
[0].grid_index
);
2310 put_bits(&pb
, 1, p
->subframe
[1].grid_index
);
2311 put_bits(&pb
, 1, p
->subframe
[2].grid_index
);
2312 put_bits(&pb
, 1, p
->subframe
[3].grid_index
);
2314 if (p
->cur_rate
== RATE_6300
) {
2315 skip_put_bits(&pb
, 1); /* reserved bit */
2317 /* Write 13 bit combined position index */
2318 temp
= (p
->subframe
[0].pulse_pos
>> 16) * 810 +
2319 (p
->subframe
[1].pulse_pos
>> 14) * 90 +
2320 (p
->subframe
[2].pulse_pos
>> 16) * 9 +
2321 (p
->subframe
[3].pulse_pos
>> 14);
2322 put_bits(&pb
, 13, temp
);
2324 put_bits(&pb
, 16, p
->subframe
[0].pulse_pos
& 0xffff);
2325 put_bits(&pb
, 14, p
->subframe
[1].pulse_pos
& 0x3fff);
2326 put_bits(&pb
, 16, p
->subframe
[2].pulse_pos
& 0xffff);
2327 put_bits(&pb
, 14, p
->subframe
[3].pulse_pos
& 0x3fff);
2329 put_bits(&pb
, 6, p
->subframe
[0].pulse_sign
);
2330 put_bits(&pb
, 5, p
->subframe
[1].pulse_sign
);
2331 put_bits(&pb
, 6, p
->subframe
[2].pulse_sign
);
2332 put_bits(&pb
, 5, p
->subframe
[3].pulse_sign
);
2335 flush_put_bits(&pb
);
2336 return frame_size
[info_bits
];
2339 static int g723_1_encode_frame(AVCodecContext
*avctx
, AVPacket
*avpkt
,
2340 const AVFrame
*frame
, int *got_packet_ptr
)
2342 G723_1_Context
*p
= avctx
->priv_data
;
2343 int16_t unq_lpc
[LPC_ORDER
* SUBFRAMES
];
2344 int16_t qnt_lpc
[LPC_ORDER
* SUBFRAMES
];
2345 int16_t cur_lsp
[LPC_ORDER
];
2346 int16_t weighted_lpc
[LPC_ORDER
* SUBFRAMES
<< 1];
2347 int16_t vector
[FRAME_LEN
+ PITCH_MAX
];
2349 int16_t *in_orig
= av_memdup(frame
->data
[0], frame
->nb_samples
* sizeof(int16_t));
2350 int16_t *in
= in_orig
;
2356 return AVERROR(ENOMEM
);
2358 highpass_filter(in
, &p
->hpf_fir_mem
, &p
->hpf_iir_mem
);
2360 memcpy(vector
, p
->prev_data
, HALF_FRAME_LEN
* sizeof(int16_t));
2361 memcpy(vector
+ HALF_FRAME_LEN
, in
, FRAME_LEN
* sizeof(int16_t));
2363 comp_lpc_coeff(vector
, unq_lpc
);
2364 lpc2lsp(&unq_lpc
[LPC_ORDER
* 3], p
->prev_lsp
, cur_lsp
);
2365 lsp_quantize(p
->lsp_index
, cur_lsp
, p
->prev_lsp
);
2368 memcpy(vector
+ LPC_ORDER
, p
->prev_data
+ SUBFRAME_LEN
,
2369 sizeof(int16_t) * SUBFRAME_LEN
);
2370 memcpy(vector
+ LPC_ORDER
+ SUBFRAME_LEN
, in
,
2371 sizeof(int16_t) * (HALF_FRAME_LEN
+ SUBFRAME_LEN
));
2372 memcpy(p
->prev_data
, in
+ HALF_FRAME_LEN
,
2373 sizeof(int16_t) * HALF_FRAME_LEN
);
2374 memcpy(in
, vector
+ LPC_ORDER
, sizeof(int16_t) * FRAME_LEN
);
2376 perceptual_filter(p
, weighted_lpc
, unq_lpc
, vector
);
2378 memcpy(in
, vector
+ LPC_ORDER
, sizeof(int16_t) * FRAME_LEN
);
2379 memcpy(vector
, p
->prev_weight_sig
, sizeof(int16_t) * PITCH_MAX
);
2380 memcpy(vector
+ PITCH_MAX
, in
, sizeof(int16_t) * FRAME_LEN
);
2382 scale_vector(vector
, vector
, FRAME_LEN
+ PITCH_MAX
);
2384 p
->pitch_lag
[0] = estimate_pitch(vector
, PITCH_MAX
);
2385 p
->pitch_lag
[1] = estimate_pitch(vector
, PITCH_MAX
+ HALF_FRAME_LEN
);
2387 for (i
= PITCH_MAX
, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++)
2388 comp_harmonic_coeff(vector
+ i
, p
->pitch_lag
[j
>> 1], hf
+ j
);
2390 memcpy(vector
, p
->prev_weight_sig
, sizeof(int16_t) * PITCH_MAX
);
2391 memcpy(vector
+ PITCH_MAX
, in
, sizeof(int16_t) * FRAME_LEN
);
2392 memcpy(p
->prev_weight_sig
, vector
+ FRAME_LEN
, sizeof(int16_t) * PITCH_MAX
);
2394 for (i
= 0, j
= 0; j
< SUBFRAMES
; i
+= SUBFRAME_LEN
, j
++)
2395 harmonic_filter(hf
+ j
, vector
+ PITCH_MAX
+ i
, in
+ i
);
2397 inverse_quant(cur_lsp
, p
->prev_lsp
, p
->lsp_index
, 0);
2398 lsp_interpolate(qnt_lpc
, cur_lsp
, p
->prev_lsp
);
2400 memcpy(p
->prev_lsp
, cur_lsp
, sizeof(int16_t) * LPC_ORDER
);
2403 for (i
= 0; i
< SUBFRAMES
; i
++) {
2404 int16_t impulse_resp
[SUBFRAME_LEN
];
2405 int16_t residual
[SUBFRAME_LEN
+ PITCH_ORDER
- 1];
2406 int16_t flt_in
[SUBFRAME_LEN
];
2407 int16_t zero
[LPC_ORDER
], fir
[LPC_ORDER
], iir
[LPC_ORDER
];
2410 * Compute the combined impulse response of the synthesis filter,
2411 * formant perceptual weighting filter and harmonic noise shaping filter
2413 memset(zero
, 0, sizeof(int16_t) * LPC_ORDER
);
2414 memset(vector
, 0, sizeof(int16_t) * PITCH_MAX
);
2415 memset(flt_in
, 0, sizeof(int16_t) * SUBFRAME_LEN
);
2417 flt_in
[0] = 1 << 13; /* Unit impulse */
2418 synth_percept_filter(qnt_lpc
+ offset
, weighted_lpc
+ (offset
<< 1),
2419 zero
, zero
, flt_in
, vector
+ PITCH_MAX
, 1);
2420 harmonic_filter(hf
+ i
, vector
+ PITCH_MAX
, impulse_resp
);
2422 /* Compute the combined zero input response */
2424 memcpy(fir
, p
->perf_fir_mem
, sizeof(int16_t) * LPC_ORDER
);
2425 memcpy(iir
, p
->perf_iir_mem
, sizeof(int16_t) * LPC_ORDER
);
2427 synth_percept_filter(qnt_lpc
+ offset
, weighted_lpc
+ (offset
<< 1),
2428 fir
, iir
, flt_in
, vector
+ PITCH_MAX
, 0);
2429 memcpy(vector
, p
->harmonic_mem
, sizeof(int16_t) * PITCH_MAX
);
2430 harmonic_noise_sub(hf
+ i
, vector
+ PITCH_MAX
, in
);
2432 acb_search(p
, residual
, impulse_resp
, in
, i
);
2433 gen_acb_excitation(residual
, p
->prev_excitation
,p
->pitch_lag
[i
>> 1],
2434 &p
->subframe
[i
], p
->cur_rate
);
2435 sub_acb_contrib(residual
, impulse_resp
, in
);
2437 fcb_search(p
, impulse_resp
, in
, i
);
2439 /* Reconstruct the excitation */
2440 gen_acb_excitation(impulse_resp
, p
->prev_excitation
, p
->pitch_lag
[i
>> 1],
2441 &p
->subframe
[i
], RATE_6300
);
2443 memmove(p
->prev_excitation
, p
->prev_excitation
+ SUBFRAME_LEN
,
2444 sizeof(int16_t) * (PITCH_MAX
- SUBFRAME_LEN
));
2445 for (j
= 0; j
< SUBFRAME_LEN
; j
++)
2446 in
[j
] = av_clip_int16((in
[j
] << 1) + impulse_resp
[j
]);
2447 memcpy(p
->prev_excitation
+ PITCH_MAX
- SUBFRAME_LEN
, in
,
2448 sizeof(int16_t) * SUBFRAME_LEN
);
2450 /* Update filter memories */
2451 synth_percept_filter(qnt_lpc
+ offset
, weighted_lpc
+ (offset
<< 1),
2452 p
->perf_fir_mem
, p
->perf_iir_mem
,
2453 in
, vector
+ PITCH_MAX
, 0);
2454 memmove(p
->harmonic_mem
, p
->harmonic_mem
+ SUBFRAME_LEN
,
2455 sizeof(int16_t) * (PITCH_MAX
- SUBFRAME_LEN
));
2456 memcpy(p
->harmonic_mem
+ PITCH_MAX
- SUBFRAME_LEN
, vector
+ PITCH_MAX
,
2457 sizeof(int16_t) * SUBFRAME_LEN
);
2460 offset
+= LPC_ORDER
;
2463 av_freep(&in_orig
); in
= NULL
;
2465 if ((ret
= ff_alloc_packet2(avctx
, avpkt
, 24)) < 0)
2468 *got_packet_ptr
= 1;
2469 avpkt
->size
= pack_bitstream(p
, avpkt
->data
, avpkt
->size
);
2473 AVCodec ff_g723_1_encoder
= {
2475 .long_name
= NULL_IF_CONFIG_SMALL("G.723.1"),
2476 .type
= AVMEDIA_TYPE_AUDIO
,
2477 .id
= AV_CODEC_ID_G723_1
,
2478 .priv_data_size
= sizeof(G723_1_Context
),
2479 .init
= g723_1_encode_init
,
2480 .encode2
= g723_1_encode_frame
,
2481 .sample_fmts
= (const enum AVSampleFormat
[]){AV_SAMPLE_FMT_S16
,
2482 AV_SAMPLE_FMT_NONE
},