Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * AMR wideband decoder | |
3 | * Copyright (c) 2010 Marcelo Galvao Povoa | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A particular PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * AMR wideband decoder | |
25 | */ | |
26 | ||
27 | #include "libavutil/channel_layout.h" | |
28 | #include "libavutil/common.h" | |
29 | #include "libavutil/float_dsp.h" | |
30 | #include "libavutil/lfg.h" | |
31 | ||
32 | #include "avcodec.h" | |
33 | #include "lsp.h" | |
34 | #include "celp_filters.h" | |
35 | #include "celp_math.h" | |
36 | #include "acelp_filters.h" | |
37 | #include "acelp_vectors.h" | |
38 | #include "acelp_pitch_delay.h" | |
39 | #include "internal.h" | |
40 | ||
41 | #define AMR_USE_16BIT_TABLES | |
42 | #include "amr.h" | |
43 | ||
44 | #include "amrwbdata.h" | |
45 | #include "mips/amrwbdec_mips.h" | |
46 | ||
47 | typedef struct { | |
48 | AMRWBFrame frame; ///< AMRWB parameters decoded from bitstream | |
49 | enum Mode fr_cur_mode; ///< mode index of current frame | |
50 | uint8_t fr_quality; ///< frame quality index (FQI) | |
51 | float isf_cur[LP_ORDER]; ///< working ISF vector from current frame | |
52 | float isf_q_past[LP_ORDER]; ///< quantized ISF vector of the previous frame | |
53 | float isf_past_final[LP_ORDER]; ///< final processed ISF vector of the previous frame | |
54 | double isp[4][LP_ORDER]; ///< ISP vectors from current frame | |
55 | double isp_sub4_past[LP_ORDER]; ///< ISP vector for the 4th subframe of the previous frame | |
56 | ||
57 | float lp_coef[4][LP_ORDER]; ///< Linear Prediction Coefficients from ISP vector | |
58 | ||
59 | uint8_t base_pitch_lag; ///< integer part of pitch lag for the next relative subframe | |
60 | uint8_t pitch_lag_int; ///< integer part of pitch lag of the previous subframe | |
61 | ||
62 | float excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 2 + AMRWB_SFR_SIZE]; ///< current excitation and all necessary excitation history | |
63 | float *excitation; ///< points to current excitation in excitation_buf[] | |
64 | ||
65 | float pitch_vector[AMRWB_SFR_SIZE]; ///< adaptive codebook (pitch) vector for current subframe | |
66 | float fixed_vector[AMRWB_SFR_SIZE]; ///< algebraic codebook (fixed) vector for current subframe | |
67 | ||
68 | float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes | |
69 | float pitch_gain[6]; ///< quantified pitch gains for the current and previous five subframes | |
70 | float fixed_gain[2]; ///< quantified fixed gains for the current and previous subframes | |
71 | ||
72 | float tilt_coef; ///< {beta_1} related to the voicing of the previous subframe | |
73 | ||
74 | float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness to determine "onset" | |
75 | uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none | |
76 | float prev_tr_gain; ///< previous initial gain used by noise enhancer for threshold | |
77 | ||
78 | float samples_az[LP_ORDER + AMRWB_SFR_SIZE]; ///< low-band samples and memory from synthesis at 12.8kHz | |
79 | float samples_up[UPS_MEM_SIZE + AMRWB_SFR_SIZE]; ///< low-band samples and memory processed for upsampling | |
80 | float samples_hb[LP_ORDER_16k + AMRWB_SFR_SIZE_16k]; ///< high-band samples and memory from synthesis at 16kHz | |
81 | ||
82 | float hpf_31_mem[2], hpf_400_mem[2]; ///< previous values in the high pass filters | |
83 | float demph_mem[1]; ///< previous value in the de-emphasis filter | |
84 | float bpf_6_7_mem[HB_FIR_SIZE]; ///< previous values in the high-band band pass filter | |
85 | float lpf_7_mem[HB_FIR_SIZE]; ///< previous values in the high-band low pass filter | |
86 | ||
87 | AVLFG prng; ///< random number generator for white noise excitation | |
88 | uint8_t first_frame; ///< flag active during decoding of the first frame | |
89 | ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs | |
90 | ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs | |
91 | CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs | |
92 | CELPMContext celpm_ctx; ///< context for fixed point math operations | |
93 | ||
94 | } AMRWBContext; | |
95 | ||
96 | static av_cold int amrwb_decode_init(AVCodecContext *avctx) | |
97 | { | |
98 | AMRWBContext *ctx = avctx->priv_data; | |
99 | int i; | |
100 | ||
101 | if (avctx->channels > 1) { | |
102 | avpriv_report_missing_feature(avctx, "multi-channel AMR"); | |
103 | return AVERROR_PATCHWELCOME; | |
104 | } | |
105 | ||
106 | avctx->channels = 1; | |
107 | avctx->channel_layout = AV_CH_LAYOUT_MONO; | |
108 | if (!avctx->sample_rate) | |
109 | avctx->sample_rate = 16000; | |
110 | avctx->sample_fmt = AV_SAMPLE_FMT_FLT; | |
111 | ||
112 | av_lfg_init(&ctx->prng, 1); | |
113 | ||
114 | ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1]; | |
115 | ctx->first_frame = 1; | |
116 | ||
117 | for (i = 0; i < LP_ORDER; i++) | |
118 | ctx->isf_past_final[i] = isf_init[i] * (1.0f / (1 << 15)); | |
119 | ||
120 | for (i = 0; i < 4; i++) | |
121 | ctx->prediction_error[i] = MIN_ENERGY; | |
122 | ||
123 | ff_acelp_filter_init(&ctx->acelpf_ctx); | |
124 | ff_acelp_vectors_init(&ctx->acelpv_ctx); | |
125 | ff_celp_filter_init(&ctx->celpf_ctx); | |
126 | ff_celp_math_init(&ctx->celpm_ctx); | |
127 | ||
128 | return 0; | |
129 | } | |
130 | ||
131 | /** | |
132 | * Decode the frame header in the "MIME/storage" format. This format | |
133 | * is simpler and does not carry the auxiliary frame information. | |
134 | * | |
135 | * @param[in] ctx The Context | |
136 | * @param[in] buf Pointer to the input buffer | |
137 | * | |
138 | * @return The decoded header length in bytes | |
139 | */ | |
140 | static int decode_mime_header(AMRWBContext *ctx, const uint8_t *buf) | |
141 | { | |
142 | /* Decode frame header (1st octet) */ | |
143 | ctx->fr_cur_mode = buf[0] >> 3 & 0x0F; | |
144 | ctx->fr_quality = (buf[0] & 0x4) == 0x4; | |
145 | ||
146 | return 1; | |
147 | } | |
148 | ||
149 | /** | |
150 | * Decode quantized ISF vectors using 36-bit indexes (6K60 mode only). | |
151 | * | |
152 | * @param[in] ind Array of 5 indexes | |
153 | * @param[out] isf_q Buffer for isf_q[LP_ORDER] | |
154 | * | |
155 | */ | |
156 | static void decode_isf_indices_36b(uint16_t *ind, float *isf_q) | |
157 | { | |
158 | int i; | |
159 | ||
160 | for (i = 0; i < 9; i++) | |
161 | isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15)); | |
162 | ||
163 | for (i = 0; i < 7; i++) | |
164 | isf_q[i + 9] = dico2_isf[ind[1]][i] * (1.0f / (1 << 15)); | |
165 | ||
166 | for (i = 0; i < 5; i++) | |
167 | isf_q[i] += dico21_isf_36b[ind[2]][i] * (1.0f / (1 << 15)); | |
168 | ||
169 | for (i = 0; i < 4; i++) | |
170 | isf_q[i + 5] += dico22_isf_36b[ind[3]][i] * (1.0f / (1 << 15)); | |
171 | ||
172 | for (i = 0; i < 7; i++) | |
173 | isf_q[i + 9] += dico23_isf_36b[ind[4]][i] * (1.0f / (1 << 15)); | |
174 | } | |
175 | ||
176 | /** | |
177 | * Decode quantized ISF vectors using 46-bit indexes (except 6K60 mode). | |
178 | * | |
179 | * @param[in] ind Array of 7 indexes | |
180 | * @param[out] isf_q Buffer for isf_q[LP_ORDER] | |
181 | * | |
182 | */ | |
183 | static void decode_isf_indices_46b(uint16_t *ind, float *isf_q) | |
184 | { | |
185 | int i; | |
186 | ||
187 | for (i = 0; i < 9; i++) | |
188 | isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15)); | |
189 | ||
190 | for (i = 0; i < 7; i++) | |
191 | isf_q[i + 9] = dico2_isf[ind[1]][i] * (1.0f / (1 << 15)); | |
192 | ||
193 | for (i = 0; i < 3; i++) | |
194 | isf_q[i] += dico21_isf[ind[2]][i] * (1.0f / (1 << 15)); | |
195 | ||
196 | for (i = 0; i < 3; i++) | |
197 | isf_q[i + 3] += dico22_isf[ind[3]][i] * (1.0f / (1 << 15)); | |
198 | ||
199 | for (i = 0; i < 3; i++) | |
200 | isf_q[i + 6] += dico23_isf[ind[4]][i] * (1.0f / (1 << 15)); | |
201 | ||
202 | for (i = 0; i < 3; i++) | |
203 | isf_q[i + 9] += dico24_isf[ind[5]][i] * (1.0f / (1 << 15)); | |
204 | ||
205 | for (i = 0; i < 4; i++) | |
206 | isf_q[i + 12] += dico25_isf[ind[6]][i] * (1.0f / (1 << 15)); | |
207 | } | |
208 | ||
209 | /** | |
210 | * Apply mean and past ISF values using the prediction factor. | |
211 | * Updates past ISF vector. | |
212 | * | |
213 | * @param[in,out] isf_q Current quantized ISF | |
214 | * @param[in,out] isf_past Past quantized ISF | |
215 | * | |
216 | */ | |
217 | static void isf_add_mean_and_past(float *isf_q, float *isf_past) | |
218 | { | |
219 | int i; | |
220 | float tmp; | |
221 | ||
222 | for (i = 0; i < LP_ORDER; i++) { | |
223 | tmp = isf_q[i]; | |
224 | isf_q[i] += isf_mean[i] * (1.0f / (1 << 15)); | |
225 | isf_q[i] += PRED_FACTOR * isf_past[i]; | |
226 | isf_past[i] = tmp; | |
227 | } | |
228 | } | |
229 | ||
230 | /** | |
231 | * Interpolate the fourth ISP vector from current and past frames | |
232 | * to obtain an ISP vector for each subframe. | |
233 | * | |
234 | * @param[in,out] isp_q ISPs for each subframe | |
235 | * @param[in] isp4_past Past ISP for subframe 4 | |
236 | */ | |
237 | static void interpolate_isp(double isp_q[4][LP_ORDER], const double *isp4_past) | |
238 | { | |
239 | int i, k; | |
240 | ||
241 | for (k = 0; k < 3; k++) { | |
242 | float c = isfp_inter[k]; | |
243 | for (i = 0; i < LP_ORDER; i++) | |
244 | isp_q[k][i] = (1.0 - c) * isp4_past[i] + c * isp_q[3][i]; | |
245 | } | |
246 | } | |
247 | ||
248 | /** | |
249 | * Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes). | |
250 | * Calculate integer lag and fractional lag always using 1/4 resolution. | |
251 | * In 1st and 3rd subframes the index is relative to last subframe integer lag. | |
252 | * | |
253 | * @param[out] lag_int Decoded integer pitch lag | |
254 | * @param[out] lag_frac Decoded fractional pitch lag | |
255 | * @param[in] pitch_index Adaptive codebook pitch index | |
256 | * @param[in,out] base_lag_int Base integer lag used in relative subframes | |
257 | * @param[in] subframe Current subframe index (0 to 3) | |
258 | */ | |
259 | static void decode_pitch_lag_high(int *lag_int, int *lag_frac, int pitch_index, | |
260 | uint8_t *base_lag_int, int subframe) | |
261 | { | |
262 | if (subframe == 0 || subframe == 2) { | |
263 | if (pitch_index < 376) { | |
264 | *lag_int = (pitch_index + 137) >> 2; | |
265 | *lag_frac = pitch_index - (*lag_int << 2) + 136; | |
266 | } else if (pitch_index < 440) { | |
267 | *lag_int = (pitch_index + 257 - 376) >> 1; | |
268 | *lag_frac = (pitch_index - (*lag_int << 1) + 256 - 376) << 1; | |
269 | /* the actual resolution is 1/2 but expressed as 1/4 */ | |
270 | } else { | |
271 | *lag_int = pitch_index - 280; | |
272 | *lag_frac = 0; | |
273 | } | |
274 | /* minimum lag for next subframe */ | |
275 | *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0), | |
276 | AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15); | |
277 | // XXX: the spec states clearly that *base_lag_int should be | |
278 | // the nearest integer to *lag_int (minus 8), but the ref code | |
279 | // actually always uses its floor, I'm following the latter | |
280 | } else { | |
281 | *lag_int = (pitch_index + 1) >> 2; | |
282 | *lag_frac = pitch_index - (*lag_int << 2); | |
283 | *lag_int += *base_lag_int; | |
284 | } | |
285 | } | |
286 | ||
287 | /** | |
288 | * Decode an adaptive codebook index into pitch lag for 8k85 and 6k60 modes. | |
289 | * The description is analogous to decode_pitch_lag_high, but in 6k60 the | |
290 | * relative index is used for all subframes except the first. | |
291 | */ | |
292 | static void decode_pitch_lag_low(int *lag_int, int *lag_frac, int pitch_index, | |
293 | uint8_t *base_lag_int, int subframe, enum Mode mode) | |
294 | { | |
295 | if (subframe == 0 || (subframe == 2 && mode != MODE_6k60)) { | |
296 | if (pitch_index < 116) { | |
297 | *lag_int = (pitch_index + 69) >> 1; | |
298 | *lag_frac = (pitch_index - (*lag_int << 1) + 68) << 1; | |
299 | } else { | |
300 | *lag_int = pitch_index - 24; | |
301 | *lag_frac = 0; | |
302 | } | |
303 | // XXX: same problem as before | |
304 | *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0), | |
305 | AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15); | |
306 | } else { | |
307 | *lag_int = (pitch_index + 1) >> 1; | |
308 | *lag_frac = (pitch_index - (*lag_int << 1)) << 1; | |
309 | *lag_int += *base_lag_int; | |
310 | } | |
311 | } | |
312 | ||
313 | /** | |
314 | * Find the pitch vector by interpolating the past excitation at the | |
315 | * pitch delay, which is obtained in this function. | |
316 | * | |
317 | * @param[in,out] ctx The context | |
318 | * @param[in] amr_subframe Current subframe data | |
319 | * @param[in] subframe Current subframe index (0 to 3) | |
320 | */ | |
321 | static void decode_pitch_vector(AMRWBContext *ctx, | |
322 | const AMRWBSubFrame *amr_subframe, | |
323 | const int subframe) | |
324 | { | |
325 | int pitch_lag_int, pitch_lag_frac; | |
326 | int i; | |
327 | float *exc = ctx->excitation; | |
328 | enum Mode mode = ctx->fr_cur_mode; | |
329 | ||
330 | if (mode <= MODE_8k85) { | |
331 | decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->adap, | |
332 | &ctx->base_pitch_lag, subframe, mode); | |
333 | } else | |
334 | decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->adap, | |
335 | &ctx->base_pitch_lag, subframe); | |
336 | ||
337 | ctx->pitch_lag_int = pitch_lag_int; | |
338 | pitch_lag_int += pitch_lag_frac > 0; | |
339 | ||
340 | /* Calculate the pitch vector by interpolating the past excitation at the | |
341 | pitch lag using a hamming windowed sinc function */ | |
342 | ctx->acelpf_ctx.acelp_interpolatef(exc, | |
343 | exc + 1 - pitch_lag_int, | |
344 | ac_inter, 4, | |
345 | pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4), | |
346 | LP_ORDER, AMRWB_SFR_SIZE + 1); | |
347 | ||
348 | /* Check which pitch signal path should be used | |
349 | * 6k60 and 8k85 modes have the ltp flag set to 0 */ | |
350 | if (amr_subframe->ltp) { | |
351 | memcpy(ctx->pitch_vector, exc, AMRWB_SFR_SIZE * sizeof(float)); | |
352 | } else { | |
353 | for (i = 0; i < AMRWB_SFR_SIZE; i++) | |
354 | ctx->pitch_vector[i] = 0.18 * exc[i - 1] + 0.64 * exc[i] + | |
355 | 0.18 * exc[i + 1]; | |
356 | memcpy(exc, ctx->pitch_vector, AMRWB_SFR_SIZE * sizeof(float)); | |
357 | } | |
358 | } | |
359 | ||
360 | /** Get x bits in the index interval [lsb,lsb+len-1] inclusive */ | |
361 | #define BIT_STR(x,lsb,len) (((x) >> (lsb)) & ((1 << (len)) - 1)) | |
362 | ||
363 | /** Get the bit at specified position */ | |
364 | #define BIT_POS(x, p) (((x) >> (p)) & 1) | |
365 | ||
366 | /** | |
367 | * The next six functions decode_[i]p_track decode exactly i pulses | |
368 | * positions and amplitudes (-1 or 1) in a subframe track using | |
369 | * an encoded pulse indexing (TS 26.190 section 5.8.2). | |
370 | * | |
371 | * The results are given in out[], in which a negative number means | |
372 | * amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) ). | |
373 | * | |
374 | * @param[out] out Output buffer (writes i elements) | |
375 | * @param[in] code Pulse index (no. of bits varies, see below) | |
376 | * @param[in] m (log2) Number of potential positions | |
377 | * @param[in] off Offset for decoded positions | |
378 | */ | |
379 | static inline void decode_1p_track(int *out, int code, int m, int off) | |
380 | { | |
381 | int pos = BIT_STR(code, 0, m) + off; ///code: m+1 bits | |
382 | ||
383 | out[0] = BIT_POS(code, m) ? -pos : pos; | |
384 | } | |
385 | ||
386 | static inline void decode_2p_track(int *out, int code, int m, int off) ///code: 2m+1 bits | |
387 | { | |
388 | int pos0 = BIT_STR(code, m, m) + off; | |
389 | int pos1 = BIT_STR(code, 0, m) + off; | |
390 | ||
391 | out[0] = BIT_POS(code, 2*m) ? -pos0 : pos0; | |
392 | out[1] = BIT_POS(code, 2*m) ? -pos1 : pos1; | |
393 | out[1] = pos0 > pos1 ? -out[1] : out[1]; | |
394 | } | |
395 | ||
396 | static void decode_3p_track(int *out, int code, int m, int off) ///code: 3m+1 bits | |
397 | { | |
398 | int half_2p = BIT_POS(code, 2*m - 1) << (m - 1); | |
399 | ||
400 | decode_2p_track(out, BIT_STR(code, 0, 2*m - 1), | |
401 | m - 1, off + half_2p); | |
402 | decode_1p_track(out + 2, BIT_STR(code, 2*m, m + 1), m, off); | |
403 | } | |
404 | ||
405 | static void decode_4p_track(int *out, int code, int m, int off) ///code: 4m bits | |
406 | { | |
407 | int half_4p, subhalf_2p; | |
408 | int b_offset = 1 << (m - 1); | |
409 | ||
410 | switch (BIT_STR(code, 4*m - 2, 2)) { /* case ID (2 bits) */ | |
411 | case 0: /* 0 pulses in A, 4 pulses in B or vice versa */ | |
412 | half_4p = BIT_POS(code, 4*m - 3) << (m - 1); // which has 4 pulses | |
413 | subhalf_2p = BIT_POS(code, 2*m - 3) << (m - 2); | |
414 | ||
415 | decode_2p_track(out, BIT_STR(code, 0, 2*m - 3), | |
416 | m - 2, off + half_4p + subhalf_2p); | |
417 | decode_2p_track(out + 2, BIT_STR(code, 2*m - 2, 2*m - 1), | |
418 | m - 1, off + half_4p); | |
419 | break; | |
420 | case 1: /* 1 pulse in A, 3 pulses in B */ | |
421 | decode_1p_track(out, BIT_STR(code, 3*m - 2, m), | |
422 | m - 1, off); | |
423 | decode_3p_track(out + 1, BIT_STR(code, 0, 3*m - 2), | |
424 | m - 1, off + b_offset); | |
425 | break; | |
426 | case 2: /* 2 pulses in each half */ | |
427 | decode_2p_track(out, BIT_STR(code, 2*m - 1, 2*m - 1), | |
428 | m - 1, off); | |
429 | decode_2p_track(out + 2, BIT_STR(code, 0, 2*m - 1), | |
430 | m - 1, off + b_offset); | |
431 | break; | |
432 | case 3: /* 3 pulses in A, 1 pulse in B */ | |
433 | decode_3p_track(out, BIT_STR(code, m, 3*m - 2), | |
434 | m - 1, off); | |
435 | decode_1p_track(out + 3, BIT_STR(code, 0, m), | |
436 | m - 1, off + b_offset); | |
437 | break; | |
438 | } | |
439 | } | |
440 | ||
441 | static void decode_5p_track(int *out, int code, int m, int off) ///code: 5m bits | |
442 | { | |
443 | int half_3p = BIT_POS(code, 5*m - 1) << (m - 1); | |
444 | ||
445 | decode_3p_track(out, BIT_STR(code, 2*m + 1, 3*m - 2), | |
446 | m - 1, off + half_3p); | |
447 | ||
448 | decode_2p_track(out + 3, BIT_STR(code, 0, 2*m + 1), m, off); | |
449 | } | |
450 | ||
451 | static void decode_6p_track(int *out, int code, int m, int off) ///code: 6m-2 bits | |
452 | { | |
453 | int b_offset = 1 << (m - 1); | |
454 | /* which half has more pulses in cases 0 to 2 */ | |
455 | int half_more = BIT_POS(code, 6*m - 5) << (m - 1); | |
456 | int half_other = b_offset - half_more; | |
457 | ||
458 | switch (BIT_STR(code, 6*m - 4, 2)) { /* case ID (2 bits) */ | |
459 | case 0: /* 0 pulses in A, 6 pulses in B or vice versa */ | |
460 | decode_1p_track(out, BIT_STR(code, 0, m), | |
461 | m - 1, off + half_more); | |
462 | decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5), | |
463 | m - 1, off + half_more); | |
464 | break; | |
465 | case 1: /* 1 pulse in A, 5 pulses in B or vice versa */ | |
466 | decode_1p_track(out, BIT_STR(code, 0, m), | |
467 | m - 1, off + half_other); | |
468 | decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5), | |
469 | m - 1, off + half_more); | |
470 | break; | |
471 | case 2: /* 2 pulses in A, 4 pulses in B or vice versa */ | |
472 | decode_2p_track(out, BIT_STR(code, 0, 2*m - 1), | |
473 | m - 1, off + half_other); | |
474 | decode_4p_track(out + 2, BIT_STR(code, 2*m - 1, 4*m - 4), | |
475 | m - 1, off + half_more); | |
476 | break; | |
477 | case 3: /* 3 pulses in A, 3 pulses in B */ | |
478 | decode_3p_track(out, BIT_STR(code, 3*m - 2, 3*m - 2), | |
479 | m - 1, off); | |
480 | decode_3p_track(out + 3, BIT_STR(code, 0, 3*m - 2), | |
481 | m - 1, off + b_offset); | |
482 | break; | |
483 | } | |
484 | } | |
485 | ||
486 | /** | |
487 | * Decode the algebraic codebook index to pulse positions and signs, | |
488 | * then construct the algebraic codebook vector. | |
489 | * | |
490 | * @param[out] fixed_vector Buffer for the fixed codebook excitation | |
491 | * @param[in] pulse_hi MSBs part of the pulse index array (higher modes only) | |
492 | * @param[in] pulse_lo LSBs part of the pulse index array | |
493 | * @param[in] mode Mode of the current frame | |
494 | */ | |
495 | static void decode_fixed_vector(float *fixed_vector, const uint16_t *pulse_hi, | |
496 | const uint16_t *pulse_lo, const enum Mode mode) | |
497 | { | |
498 | /* sig_pos stores for each track the decoded pulse position indexes | |
499 | * (1-based) multiplied by its corresponding amplitude (+1 or -1) */ | |
500 | int sig_pos[4][6]; | |
501 | int spacing = (mode == MODE_6k60) ? 2 : 4; | |
502 | int i, j; | |
503 | ||
504 | switch (mode) { | |
505 | case MODE_6k60: | |
506 | for (i = 0; i < 2; i++) | |
507 | decode_1p_track(sig_pos[i], pulse_lo[i], 5, 1); | |
508 | break; | |
509 | case MODE_8k85: | |
510 | for (i = 0; i < 4; i++) | |
511 | decode_1p_track(sig_pos[i], pulse_lo[i], 4, 1); | |
512 | break; | |
513 | case MODE_12k65: | |
514 | for (i = 0; i < 4; i++) | |
515 | decode_2p_track(sig_pos[i], pulse_lo[i], 4, 1); | |
516 | break; | |
517 | case MODE_14k25: | |
518 | for (i = 0; i < 2; i++) | |
519 | decode_3p_track(sig_pos[i], pulse_lo[i], 4, 1); | |
520 | for (i = 2; i < 4; i++) | |
521 | decode_2p_track(sig_pos[i], pulse_lo[i], 4, 1); | |
522 | break; | |
523 | case MODE_15k85: | |
524 | for (i = 0; i < 4; i++) | |
525 | decode_3p_track(sig_pos[i], pulse_lo[i], 4, 1); | |
526 | break; | |
527 | case MODE_18k25: | |
528 | for (i = 0; i < 4; i++) | |
529 | decode_4p_track(sig_pos[i], (int) pulse_lo[i] + | |
530 | ((int) pulse_hi[i] << 14), 4, 1); | |
531 | break; | |
532 | case MODE_19k85: | |
533 | for (i = 0; i < 2; i++) | |
534 | decode_5p_track(sig_pos[i], (int) pulse_lo[i] + | |
535 | ((int) pulse_hi[i] << 10), 4, 1); | |
536 | for (i = 2; i < 4; i++) | |
537 | decode_4p_track(sig_pos[i], (int) pulse_lo[i] + | |
538 | ((int) pulse_hi[i] << 14), 4, 1); | |
539 | break; | |
540 | case MODE_23k05: | |
541 | case MODE_23k85: | |
542 | for (i = 0; i < 4; i++) | |
543 | decode_6p_track(sig_pos[i], (int) pulse_lo[i] + | |
544 | ((int) pulse_hi[i] << 11), 4, 1); | |
545 | break; | |
546 | } | |
547 | ||
548 | memset(fixed_vector, 0, sizeof(float) * AMRWB_SFR_SIZE); | |
549 | ||
550 | for (i = 0; i < 4; i++) | |
551 | for (j = 0; j < pulses_nb_per_mode_tr[mode][i]; j++) { | |
552 | int pos = (FFABS(sig_pos[i][j]) - 1) * spacing + i; | |
553 | ||
554 | fixed_vector[pos] += sig_pos[i][j] < 0 ? -1.0 : 1.0; | |
555 | } | |
556 | } | |
557 | ||
558 | /** | |
559 | * Decode pitch gain and fixed gain correction factor. | |
560 | * | |
561 | * @param[in] vq_gain Vector-quantized index for gains | |
562 | * @param[in] mode Mode of the current frame | |
563 | * @param[out] fixed_gain_factor Decoded fixed gain correction factor | |
564 | * @param[out] pitch_gain Decoded pitch gain | |
565 | */ | |
566 | static void decode_gains(const uint8_t vq_gain, const enum Mode mode, | |
567 | float *fixed_gain_factor, float *pitch_gain) | |
568 | { | |
569 | const int16_t *gains = (mode <= MODE_8k85 ? qua_gain_6b[vq_gain] : | |
570 | qua_gain_7b[vq_gain]); | |
571 | ||
572 | *pitch_gain = gains[0] * (1.0f / (1 << 14)); | |
573 | *fixed_gain_factor = gains[1] * (1.0f / (1 << 11)); | |
574 | } | |
575 | ||
576 | /** | |
577 | * Apply pitch sharpening filters to the fixed codebook vector. | |
578 | * | |
579 | * @param[in] ctx The context | |
580 | * @param[in,out] fixed_vector Fixed codebook excitation | |
581 | */ | |
582 | // XXX: Spec states this procedure should be applied when the pitch | |
583 | // lag is less than 64, but this checking seems absent in reference and AMR-NB | |
584 | static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) | |
585 | { | |
586 | int i; | |
587 | ||
588 | /* Tilt part */ | |
589 | for (i = AMRWB_SFR_SIZE - 1; i != 0; i--) | |
590 | fixed_vector[i] -= fixed_vector[i - 1] * ctx->tilt_coef; | |
591 | ||
592 | /* Periodicity enhancement part */ | |
593 | for (i = ctx->pitch_lag_int; i < AMRWB_SFR_SIZE; i++) | |
594 | fixed_vector[i] += fixed_vector[i - ctx->pitch_lag_int] * 0.85; | |
595 | } | |
596 | ||
597 | /** | |
598 | * Calculate the voicing factor (-1.0 = unvoiced to 1.0 = voiced). | |
599 | * | |
600 | * @param[in] p_vector, f_vector Pitch and fixed excitation vectors | |
601 | * @param[in] p_gain, f_gain Pitch and fixed gains | |
602 | * @param[in] ctx The context | |
603 | */ | |
604 | // XXX: There is something wrong with the precision here! The magnitudes | |
605 | // of the energies are not correct. Please check the reference code carefully | |
606 | static float voice_factor(float *p_vector, float p_gain, | |
607 | float *f_vector, float f_gain, | |
608 | CELPMContext *ctx) | |
609 | { | |
610 | double p_ener = (double) ctx->dot_productf(p_vector, p_vector, | |
611 | AMRWB_SFR_SIZE) * | |
612 | p_gain * p_gain; | |
613 | double f_ener = (double) ctx->dot_productf(f_vector, f_vector, | |
614 | AMRWB_SFR_SIZE) * | |
615 | f_gain * f_gain; | |
616 | ||
617 | return (p_ener - f_ener) / (p_ener + f_ener); | |
618 | } | |
619 | ||
620 | /** | |
621 | * Reduce fixed vector sparseness by smoothing with one of three IR filters, | |
622 | * also known as "adaptive phase dispersion". | |
623 | * | |
624 | * @param[in] ctx The context | |
625 | * @param[in,out] fixed_vector Unfiltered fixed vector | |
626 | * @param[out] buf Space for modified vector if necessary | |
627 | * | |
628 | * @return The potentially overwritten filtered fixed vector address | |
629 | */ | |
630 | static float *anti_sparseness(AMRWBContext *ctx, | |
631 | float *fixed_vector, float *buf) | |
632 | { | |
633 | int ir_filter_nr; | |
634 | ||
635 | if (ctx->fr_cur_mode > MODE_8k85) // no filtering in higher modes | |
636 | return fixed_vector; | |
637 | ||
638 | if (ctx->pitch_gain[0] < 0.6) { | |
639 | ir_filter_nr = 0; // strong filtering | |
640 | } else if (ctx->pitch_gain[0] < 0.9) { | |
641 | ir_filter_nr = 1; // medium filtering | |
642 | } else | |
643 | ir_filter_nr = 2; // no filtering | |
644 | ||
645 | /* detect 'onset' */ | |
646 | if (ctx->fixed_gain[0] > 3.0 * ctx->fixed_gain[1]) { | |
647 | if (ir_filter_nr < 2) | |
648 | ir_filter_nr++; | |
649 | } else { | |
650 | int i, count = 0; | |
651 | ||
652 | for (i = 0; i < 6; i++) | |
653 | if (ctx->pitch_gain[i] < 0.6) | |
654 | count++; | |
655 | ||
656 | if (count > 2) | |
657 | ir_filter_nr = 0; | |
658 | ||
659 | if (ir_filter_nr > ctx->prev_ir_filter_nr + 1) | |
660 | ir_filter_nr--; | |
661 | } | |
662 | ||
663 | /* update ir filter strength history */ | |
664 | ctx->prev_ir_filter_nr = ir_filter_nr; | |
665 | ||
666 | ir_filter_nr += (ctx->fr_cur_mode == MODE_8k85); | |
667 | ||
668 | if (ir_filter_nr < 2) { | |
669 | int i; | |
670 | const float *coef = ir_filters_lookup[ir_filter_nr]; | |
671 | ||
672 | /* Circular convolution code in the reference | |
673 | * decoder was modified to avoid using one | |
674 | * extra array. The filtered vector is given by: | |
675 | * | |
676 | * c2(n) = sum(i,0,len-1){ c(i) * coef( (n - i + len) % len ) } | |
677 | */ | |
678 | ||
679 | memset(buf, 0, sizeof(float) * AMRWB_SFR_SIZE); | |
680 | for (i = 0; i < AMRWB_SFR_SIZE; i++) | |
681 | if (fixed_vector[i]) | |
682 | ff_celp_circ_addf(buf, buf, coef, i, fixed_vector[i], | |
683 | AMRWB_SFR_SIZE); | |
684 | fixed_vector = buf; | |
685 | } | |
686 | ||
687 | return fixed_vector; | |
688 | } | |
689 | ||
690 | /** | |
691 | * Calculate a stability factor {teta} based on distance between | |
692 | * current and past isf. A value of 1 shows maximum signal stability. | |
693 | */ | |
694 | static float stability_factor(const float *isf, const float *isf_past) | |
695 | { | |
696 | int i; | |
697 | float acc = 0.0; | |
698 | ||
699 | for (i = 0; i < LP_ORDER - 1; i++) | |
700 | acc += (isf[i] - isf_past[i]) * (isf[i] - isf_past[i]); | |
701 | ||
702 | // XXX: This part is not so clear from the reference code | |
703 | // the result is more accurate changing the "/ 256" to "* 512" | |
704 | return FFMAX(0.0, 1.25 - acc * 0.8 * 512); | |
705 | } | |
706 | ||
707 | /** | |
708 | * Apply a non-linear fixed gain smoothing in order to reduce | |
709 | * fluctuation in the energy of excitation. | |
710 | * | |
711 | * @param[in] fixed_gain Unsmoothed fixed gain | |
712 | * @param[in,out] prev_tr_gain Previous threshold gain (updated) | |
713 | * @param[in] voice_fac Frame voicing factor | |
714 | * @param[in] stab_fac Frame stability factor | |
715 | * | |
716 | * @return The smoothed gain | |
717 | */ | |
718 | static float noise_enhancer(float fixed_gain, float *prev_tr_gain, | |
719 | float voice_fac, float stab_fac) | |
720 | { | |
721 | float sm_fac = 0.5 * (1 - voice_fac) * stab_fac; | |
722 | float g0; | |
723 | ||
724 | // XXX: the following fixed-point constants used to in(de)crement | |
725 | // gain by 1.5dB were taken from the reference code, maybe it could | |
726 | // be simpler | |
727 | if (fixed_gain < *prev_tr_gain) { | |
728 | g0 = FFMIN(*prev_tr_gain, fixed_gain + fixed_gain * | |
729 | (6226 * (1.0f / (1 << 15)))); // +1.5 dB | |
730 | } else | |
731 | g0 = FFMAX(*prev_tr_gain, fixed_gain * | |
732 | (27536 * (1.0f / (1 << 15)))); // -1.5 dB | |
733 | ||
734 | *prev_tr_gain = g0; // update next frame threshold | |
735 | ||
736 | return sm_fac * g0 + (1 - sm_fac) * fixed_gain; | |
737 | } | |
738 | ||
739 | /** | |
740 | * Filter the fixed_vector to emphasize the higher frequencies. | |
741 | * | |
742 | * @param[in,out] fixed_vector Fixed codebook vector | |
743 | * @param[in] voice_fac Frame voicing factor | |
744 | */ | |
745 | static void pitch_enhancer(float *fixed_vector, float voice_fac) | |
746 | { | |
747 | int i; | |
748 | float cpe = 0.125 * (1 + voice_fac); | |
749 | float last = fixed_vector[0]; // holds c(i - 1) | |
750 | ||
751 | fixed_vector[0] -= cpe * fixed_vector[1]; | |
752 | ||
753 | for (i = 1; i < AMRWB_SFR_SIZE - 1; i++) { | |
754 | float cur = fixed_vector[i]; | |
755 | ||
756 | fixed_vector[i] -= cpe * (last + fixed_vector[i + 1]); | |
757 | last = cur; | |
758 | } | |
759 | ||
760 | fixed_vector[AMRWB_SFR_SIZE - 1] -= cpe * last; | |
761 | } | |
762 | ||
763 | /** | |
764 | * Conduct 16th order linear predictive coding synthesis from excitation. | |
765 | * | |
766 | * @param[in] ctx Pointer to the AMRWBContext | |
767 | * @param[in] lpc Pointer to the LPC coefficients | |
768 | * @param[out] excitation Buffer for synthesis final excitation | |
769 | * @param[in] fixed_gain Fixed codebook gain for synthesis | |
770 | * @param[in] fixed_vector Algebraic codebook vector | |
771 | * @param[in,out] samples Pointer to the output samples and memory | |
772 | */ | |
773 | static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, | |
774 | float fixed_gain, const float *fixed_vector, | |
775 | float *samples) | |
776 | { | |
777 | ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, | |
778 | ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE); | |
779 | ||
780 | /* emphasize pitch vector contribution in low bitrate modes */ | |
781 | if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { | |
782 | int i; | |
783 | float energy = ctx->celpm_ctx.dot_productf(excitation, excitation, | |
784 | AMRWB_SFR_SIZE); | |
785 | ||
786 | // XXX: Weird part in both ref code and spec. A unknown parameter | |
787 | // {beta} seems to be identical to the current pitch gain | |
788 | float pitch_factor = 0.25 * ctx->pitch_gain[0] * ctx->pitch_gain[0]; | |
789 | ||
790 | for (i = 0; i < AMRWB_SFR_SIZE; i++) | |
791 | excitation[i] += pitch_factor * ctx->pitch_vector[i]; | |
792 | ||
793 | ff_scale_vector_to_given_sum_of_squares(excitation, excitation, | |
794 | energy, AMRWB_SFR_SIZE); | |
795 | } | |
796 | ||
797 | ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, | |
798 | AMRWB_SFR_SIZE, LP_ORDER); | |
799 | } | |
800 | ||
801 | /** | |
802 | * Apply to synthesis a de-emphasis filter of the form: | |
803 | * H(z) = 1 / (1 - m * z^-1) | |
804 | * | |
805 | * @param[out] out Output buffer | |
806 | * @param[in] in Input samples array with in[-1] | |
807 | * @param[in] m Filter coefficient | |
808 | * @param[in,out] mem State from last filtering | |
809 | */ | |
810 | static void de_emphasis(float *out, float *in, float m, float mem[1]) | |
811 | { | |
812 | int i; | |
813 | ||
814 | out[0] = in[0] + m * mem[0]; | |
815 | ||
816 | for (i = 1; i < AMRWB_SFR_SIZE; i++) | |
817 | out[i] = in[i] + out[i - 1] * m; | |
818 | ||
819 | mem[0] = out[AMRWB_SFR_SIZE - 1]; | |
820 | } | |
821 | ||
822 | /** | |
823 | * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using | |
824 | * a FIR interpolation filter. Uses past data from before *in address. | |
825 | * | |
826 | * @param[out] out Buffer for interpolated signal | |
827 | * @param[in] in Current signal data (length 0.8*o_size) | |
828 | * @param[in] o_size Output signal length | |
829 | * @param[in] ctx The context | |
830 | */ | |
831 | static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx) | |
832 | { | |
833 | const float *in0 = in - UPS_FIR_SIZE + 1; | |
834 | int i, j, k; | |
835 | int int_part = 0, frac_part; | |
836 | ||
837 | i = 0; | |
838 | for (j = 0; j < o_size / 5; j++) { | |
839 | out[i] = in[int_part]; | |
840 | frac_part = 4; | |
841 | i++; | |
842 | ||
843 | for (k = 1; k < 5; k++) { | |
844 | out[i] = ctx->dot_productf(in0 + int_part, | |
845 | upsample_fir[4 - frac_part], | |
846 | UPS_MEM_SIZE); | |
847 | int_part++; | |
848 | frac_part--; | |
849 | i++; | |
850 | } | |
851 | } | |
852 | } | |
853 | ||
854 | /** | |
855 | * Calculate the high-band gain based on encoded index (23k85 mode) or | |
856 | * on the low-band speech signal and the Voice Activity Detection flag. | |
857 | * | |
858 | * @param[in] ctx The context | |
859 | * @param[in] synth LB speech synthesis at 12.8k | |
860 | * @param[in] hb_idx Gain index for mode 23k85 only | |
861 | * @param[in] vad VAD flag for the frame | |
862 | */ | |
863 | static float find_hb_gain(AMRWBContext *ctx, const float *synth, | |
864 | uint16_t hb_idx, uint8_t vad) | |
865 | { | |
866 | int wsp = (vad > 0); | |
867 | float tilt; | |
868 | ||
869 | if (ctx->fr_cur_mode == MODE_23k85) | |
870 | return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); | |
871 | ||
872 | tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / | |
873 | ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE); | |
874 | ||
875 | /* return gain bounded by [0.1, 1.0] */ | |
876 | return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); | |
877 | } | |
878 | ||
879 | /** | |
880 | * Generate the high-band excitation with the same energy from the lower | |
881 | * one and scaled by the given gain. | |
882 | * | |
883 | * @param[in] ctx The context | |
884 | * @param[out] hb_exc Buffer for the excitation | |
885 | * @param[in] synth_exc Low-band excitation used for synthesis | |
886 | * @param[in] hb_gain Wanted excitation gain | |
887 | */ | |
888 | static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, | |
889 | const float *synth_exc, float hb_gain) | |
890 | { | |
891 | int i; | |
892 | float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, | |
893 | AMRWB_SFR_SIZE); | |
894 | ||
895 | /* Generate a white-noise excitation */ | |
896 | for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) | |
897 | hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng); | |
898 | ||
899 | ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, | |
900 | energy * hb_gain * hb_gain, | |
901 | AMRWB_SFR_SIZE_16k); | |
902 | } | |
903 | ||
904 | /** | |
905 | * Calculate the auto-correlation for the ISF difference vector. | |
906 | */ | |
907 | static float auto_correlation(float *diff_isf, float mean, int lag) | |
908 | { | |
909 | int i; | |
910 | float sum = 0.0; | |
911 | ||
912 | for (i = 7; i < LP_ORDER - 2; i++) { | |
913 | float prod = (diff_isf[i] - mean) * (diff_isf[i - lag] - mean); | |
914 | sum += prod * prod; | |
915 | } | |
916 | return sum; | |
917 | } | |
918 | ||
919 | /** | |
920 | * Extrapolate a ISF vector to the 16kHz range (20th order LP) | |
921 | * used at mode 6k60 LP filter for the high frequency band. | |
922 | * | |
923 | * @param[out] isf Buffer for extrapolated isf; contains LP_ORDER | |
924 | * values on input | |
925 | */ | |
926 | static void extrapolate_isf(float isf[LP_ORDER_16k]) | |
927 | { | |
928 | float diff_isf[LP_ORDER - 2], diff_mean; | |
929 | float corr_lag[3]; | |
930 | float est, scale; | |
931 | int i, j, i_max_corr; | |
932 | ||
933 | isf[LP_ORDER_16k - 1] = isf[LP_ORDER - 1]; | |
934 | ||
935 | /* Calculate the difference vector */ | |
936 | for (i = 0; i < LP_ORDER - 2; i++) | |
937 | diff_isf[i] = isf[i + 1] - isf[i]; | |
938 | ||
939 | diff_mean = 0.0; | |
940 | for (i = 2; i < LP_ORDER - 2; i++) | |
941 | diff_mean += diff_isf[i] * (1.0f / (LP_ORDER - 4)); | |
942 | ||
943 | /* Find which is the maximum autocorrelation */ | |
944 | i_max_corr = 0; | |
945 | for (i = 0; i < 3; i++) { | |
946 | corr_lag[i] = auto_correlation(diff_isf, diff_mean, i + 2); | |
947 | ||
948 | if (corr_lag[i] > corr_lag[i_max_corr]) | |
949 | i_max_corr = i; | |
950 | } | |
951 | i_max_corr++; | |
952 | ||
953 | for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++) | |
954 | isf[i] = isf[i - 1] + isf[i - 1 - i_max_corr] | |
955 | - isf[i - 2 - i_max_corr]; | |
956 | ||
957 | /* Calculate an estimate for ISF(18) and scale ISF based on the error */ | |
958 | est = 7965 + (isf[2] - isf[3] - isf[4]) / 6.0; | |
959 | scale = 0.5 * (FFMIN(est, 7600) - isf[LP_ORDER - 2]) / | |
960 | (isf[LP_ORDER_16k - 2] - isf[LP_ORDER - 2]); | |
961 | ||
962 | for (i = LP_ORDER - 1, j = 0; i < LP_ORDER_16k - 1; i++, j++) | |
963 | diff_isf[j] = scale * (isf[i] - isf[i - 1]); | |
964 | ||
965 | /* Stability insurance */ | |
966 | for (i = 1; i < LP_ORDER_16k - LP_ORDER; i++) | |
967 | if (diff_isf[i] + diff_isf[i - 1] < 5.0) { | |
968 | if (diff_isf[i] > diff_isf[i - 1]) { | |
969 | diff_isf[i - 1] = 5.0 - diff_isf[i]; | |
970 | } else | |
971 | diff_isf[i] = 5.0 - diff_isf[i - 1]; | |
972 | } | |
973 | ||
974 | for (i = LP_ORDER - 1, j = 0; i < LP_ORDER_16k - 1; i++, j++) | |
975 | isf[i] = isf[i - 1] + diff_isf[j] * (1.0f / (1 << 15)); | |
976 | ||
977 | /* Scale the ISF vector for 16000 Hz */ | |
978 | for (i = 0; i < LP_ORDER_16k - 1; i++) | |
979 | isf[i] *= 0.8; | |
980 | } | |
981 | ||
982 | /** | |
983 | * Spectral expand the LP coefficients using the equation: | |
984 | * y[i] = x[i] * (gamma ** i) | |
985 | * | |
986 | * @param[out] out Output buffer (may use input array) | |
987 | * @param[in] lpc LP coefficients array | |
988 | * @param[in] gamma Weighting factor | |
989 | * @param[in] size LP array size | |
990 | */ | |
991 | static void lpc_weighting(float *out, const float *lpc, float gamma, int size) | |
992 | { | |
993 | int i; | |
994 | float fac = gamma; | |
995 | ||
996 | for (i = 0; i < size; i++) { | |
997 | out[i] = lpc[i] * fac; | |
998 | fac *= gamma; | |
999 | } | |
1000 | } | |
1001 | ||
1002 | /** | |
1003 | * Conduct 20th order linear predictive coding synthesis for the high | |
1004 | * frequency band excitation at 16kHz. | |
1005 | * | |
1006 | * @param[in] ctx The context | |
1007 | * @param[in] subframe Current subframe index (0 to 3) | |
1008 | * @param[in,out] samples Pointer to the output speech samples | |
1009 | * @param[in] exc Generated white-noise scaled excitation | |
1010 | * @param[in] isf Current frame isf vector | |
1011 | * @param[in] isf_past Past frame final isf vector | |
1012 | */ | |
1013 | static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, | |
1014 | const float *exc, const float *isf, const float *isf_past) | |
1015 | { | |
1016 | float hb_lpc[LP_ORDER_16k]; | |
1017 | enum Mode mode = ctx->fr_cur_mode; | |
1018 | ||
1019 | if (mode == MODE_6k60) { | |
1020 | float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation | |
1021 | double e_isp[LP_ORDER_16k]; | |
1022 | ||
1023 | ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], | |
1024 | 1.0 - isfp_inter[subframe], LP_ORDER); | |
1025 | ||
1026 | extrapolate_isf(e_isf); | |
1027 | ||
1028 | e_isf[LP_ORDER_16k - 1] *= 2.0; | |
1029 | ff_acelp_lsf2lspd(e_isp, e_isf, LP_ORDER_16k); | |
1030 | ff_amrwb_lsp2lpc(e_isp, hb_lpc, LP_ORDER_16k); | |
1031 | ||
1032 | lpc_weighting(hb_lpc, hb_lpc, 0.9, LP_ORDER_16k); | |
1033 | } else { | |
1034 | lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER); | |
1035 | } | |
1036 | ||
1037 | ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, | |
1038 | (mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER); | |
1039 | } | |
1040 | ||
1041 | /** | |
1042 | * Apply a 15th order filter to high-band samples. | |
1043 | * The filter characteristic depends on the given coefficients. | |
1044 | * | |
1045 | * @param[out] out Buffer for filtered output | |
1046 | * @param[in] fir_coef Filter coefficients | |
1047 | * @param[in,out] mem State from last filtering (updated) | |
1048 | * @param[in] in Input speech data (high-band) | |
1049 | * | |
1050 | * @remark It is safe to pass the same array in in and out parameters | |
1051 | */ | |
1052 | ||
1053 | #ifndef hb_fir_filter | |
1054 | static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], | |
1055 | float mem[HB_FIR_SIZE], const float *in) | |
1056 | { | |
1057 | int i, j; | |
1058 | float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples | |
1059 | ||
1060 | memcpy(data, mem, HB_FIR_SIZE * sizeof(float)); | |
1061 | memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float)); | |
1062 | ||
1063 | for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) { | |
1064 | out[i] = 0.0; | |
1065 | for (j = 0; j <= HB_FIR_SIZE; j++) | |
1066 | out[i] += data[i + j] * fir_coef[j]; | |
1067 | } | |
1068 | ||
1069 | memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); | |
1070 | } | |
1071 | #endif /* hb_fir_filter */ | |
1072 | ||
1073 | /** | |
1074 | * Update context state before the next subframe. | |
1075 | */ | |
1076 | static void update_sub_state(AMRWBContext *ctx) | |
1077 | { | |
1078 | memmove(&ctx->excitation_buf[0], &ctx->excitation_buf[AMRWB_SFR_SIZE], | |
1079 | (AMRWB_P_DELAY_MAX + LP_ORDER + 1) * sizeof(float)); | |
1080 | ||
1081 | memmove(&ctx->pitch_gain[1], &ctx->pitch_gain[0], 5 * sizeof(float)); | |
1082 | memmove(&ctx->fixed_gain[1], &ctx->fixed_gain[0], 1 * sizeof(float)); | |
1083 | ||
1084 | memmove(&ctx->samples_az[0], &ctx->samples_az[AMRWB_SFR_SIZE], | |
1085 | LP_ORDER * sizeof(float)); | |
1086 | memmove(&ctx->samples_up[0], &ctx->samples_up[AMRWB_SFR_SIZE], | |
1087 | UPS_MEM_SIZE * sizeof(float)); | |
1088 | memmove(&ctx->samples_hb[0], &ctx->samples_hb[AMRWB_SFR_SIZE_16k], | |
1089 | LP_ORDER_16k * sizeof(float)); | |
1090 | } | |
1091 | ||
1092 | static int amrwb_decode_frame(AVCodecContext *avctx, void *data, | |
1093 | int *got_frame_ptr, AVPacket *avpkt) | |
1094 | { | |
1095 | AMRWBContext *ctx = avctx->priv_data; | |
1096 | AVFrame *frame = data; | |
1097 | AMRWBFrame *cf = &ctx->frame; | |
1098 | const uint8_t *buf = avpkt->data; | |
1099 | int buf_size = avpkt->size; | |
1100 | int expected_fr_size, header_size; | |
1101 | float *buf_out; | |
1102 | float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing | |
1103 | float fixed_gain_factor; // fixed gain correction factor (gamma) | |
1104 | float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use | |
1105 | float synth_fixed_gain; // the fixed gain that synthesis should use | |
1106 | float voice_fac, stab_fac; // parameters used for gain smoothing | |
1107 | float synth_exc[AMRWB_SFR_SIZE]; // post-processed excitation for synthesis | |
1108 | float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band | |
1109 | float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis | |
1110 | float hb_gain; | |
1111 | int sub, i, ret; | |
1112 | ||
1113 | /* get output buffer */ | |
1114 | frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k; | |
1115 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) | |
1116 | return ret; | |
1117 | buf_out = (float *)frame->data[0]; | |
1118 | ||
1119 | header_size = decode_mime_header(ctx, buf); | |
1120 | if (ctx->fr_cur_mode > MODE_SID) { | |
1121 | av_log(avctx, AV_LOG_ERROR, | |
1122 | "Invalid mode %d\n", ctx->fr_cur_mode); | |
1123 | return AVERROR_INVALIDDATA; | |
1124 | } | |
1125 | expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1; | |
1126 | ||
1127 | if (buf_size < expected_fr_size) { | |
1128 | av_log(avctx, AV_LOG_ERROR, | |
1129 | "Frame too small (%d bytes). Truncated file?\n", buf_size); | |
1130 | *got_frame_ptr = 0; | |
1131 | return AVERROR_INVALIDDATA; | |
1132 | } | |
1133 | ||
1134 | if (!ctx->fr_quality || ctx->fr_cur_mode > MODE_SID) | |
1135 | av_log(avctx, AV_LOG_ERROR, "Encountered a bad or corrupted frame\n"); | |
1136 | ||
1137 | if (ctx->fr_cur_mode == MODE_SID) { /* Comfort noise frame */ | |
1138 | avpriv_request_sample(avctx, "SID mode"); | |
1139 | return AVERROR_PATCHWELCOME; | |
1140 | } | |
1141 | ||
1142 | ff_amr_bit_reorder((uint16_t *) &ctx->frame, sizeof(AMRWBFrame), | |
1143 | buf + header_size, amr_bit_orderings_by_mode[ctx->fr_cur_mode]); | |
1144 | ||
1145 | /* Decode the quantized ISF vector */ | |
1146 | if (ctx->fr_cur_mode == MODE_6k60) { | |
1147 | decode_isf_indices_36b(cf->isp_id, ctx->isf_cur); | |
1148 | } else { | |
1149 | decode_isf_indices_46b(cf->isp_id, ctx->isf_cur); | |
1150 | } | |
1151 | ||
1152 | isf_add_mean_and_past(ctx->isf_cur, ctx->isf_q_past); | |
1153 | ff_set_min_dist_lsf(ctx->isf_cur, MIN_ISF_SPACING, LP_ORDER - 1); | |
1154 | ||
1155 | stab_fac = stability_factor(ctx->isf_cur, ctx->isf_past_final); | |
1156 | ||
1157 | ctx->isf_cur[LP_ORDER - 1] *= 2.0; | |
1158 | ff_acelp_lsf2lspd(ctx->isp[3], ctx->isf_cur, LP_ORDER); | |
1159 | ||
1160 | /* Generate a ISP vector for each subframe */ | |
1161 | if (ctx->first_frame) { | |
1162 | ctx->first_frame = 0; | |
1163 | memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(double)); | |
1164 | } | |
1165 | interpolate_isp(ctx->isp, ctx->isp_sub4_past); | |
1166 | ||
1167 | for (sub = 0; sub < 4; sub++) | |
1168 | ff_amrwb_lsp2lpc(ctx->isp[sub], ctx->lp_coef[sub], LP_ORDER); | |
1169 | ||
1170 | for (sub = 0; sub < 4; sub++) { | |
1171 | const AMRWBSubFrame *cur_subframe = &cf->subframe[sub]; | |
1172 | float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_16k; | |
1173 | ||
1174 | /* Decode adaptive codebook (pitch vector) */ | |
1175 | decode_pitch_vector(ctx, cur_subframe, sub); | |
1176 | /* Decode innovative codebook (fixed vector) */ | |
1177 | decode_fixed_vector(ctx->fixed_vector, cur_subframe->pul_ih, | |
1178 | cur_subframe->pul_il, ctx->fr_cur_mode); | |
1179 | ||
1180 | pitch_sharpening(ctx, ctx->fixed_vector); | |
1181 | ||
1182 | decode_gains(cur_subframe->vq_gain, ctx->fr_cur_mode, | |
1183 | &fixed_gain_factor, &ctx->pitch_gain[0]); | |
1184 | ||
1185 | ctx->fixed_gain[0] = | |
1186 | ff_amr_set_fixed_gain(fixed_gain_factor, | |
1187 | ctx->celpm_ctx.dot_productf(ctx->fixed_vector, | |
1188 | ctx->fixed_vector, | |
1189 | AMRWB_SFR_SIZE) / | |
1190 | AMRWB_SFR_SIZE, | |
1191 | ctx->prediction_error, | |
1192 | ENERGY_MEAN, energy_pred_fac); | |
1193 | ||
1194 | /* Calculate voice factor and store tilt for next subframe */ | |
1195 | voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0], | |
1196 | ctx->fixed_vector, ctx->fixed_gain[0], | |
1197 | &ctx->celpm_ctx); | |
1198 | ctx->tilt_coef = voice_fac * 0.25 + 0.25; | |
1199 | ||
1200 | /* Construct current excitation */ | |
1201 | for (i = 0; i < AMRWB_SFR_SIZE; i++) { | |
1202 | ctx->excitation[i] *= ctx->pitch_gain[0]; | |
1203 | ctx->excitation[i] += ctx->fixed_gain[0] * ctx->fixed_vector[i]; | |
1204 | ctx->excitation[i] = truncf(ctx->excitation[i]); | |
1205 | } | |
1206 | ||
1207 | /* Post-processing of excitation elements */ | |
1208 | synth_fixed_gain = noise_enhancer(ctx->fixed_gain[0], &ctx->prev_tr_gain, | |
1209 | voice_fac, stab_fac); | |
1210 | ||
1211 | synth_fixed_vector = anti_sparseness(ctx, ctx->fixed_vector, | |
1212 | spare_vector); | |
1213 | ||
1214 | pitch_enhancer(synth_fixed_vector, voice_fac); | |
1215 | ||
1216 | synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain, | |
1217 | synth_fixed_vector, &ctx->samples_az[LP_ORDER]); | |
1218 | ||
1219 | /* Synthesis speech post-processing */ | |
1220 | de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], | |
1221 | &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); | |
1222 | ||
1223 | ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], | |
1224 | &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, | |
1225 | hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); | |
1226 | ||
1227 | upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], | |
1228 | AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx); | |
1229 | ||
1230 | /* High frequency band (6.4 - 7.0 kHz) generation part */ | |
1231 | ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples, | |
1232 | &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, | |
1233 | hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); | |
1234 | ||
1235 | hb_gain = find_hb_gain(ctx, hb_samples, | |
1236 | cur_subframe->hb_gain, cf->vad); | |
1237 | ||
1238 | scaled_hb_excitation(ctx, hb_exc, synth_exc, hb_gain); | |
1239 | ||
1240 | hb_synthesis(ctx, sub, &ctx->samples_hb[LP_ORDER_16k], | |
1241 | hb_exc, ctx->isf_cur, ctx->isf_past_final); | |
1242 | ||
1243 | /* High-band post-processing filters */ | |
1244 | hb_fir_filter(hb_samples, bpf_6_7_coef, ctx->bpf_6_7_mem, | |
1245 | &ctx->samples_hb[LP_ORDER_16k]); | |
1246 | ||
1247 | if (ctx->fr_cur_mode == MODE_23k85) | |
1248 | hb_fir_filter(hb_samples, lpf_7_coef, ctx->lpf_7_mem, | |
1249 | hb_samples); | |
1250 | ||
1251 | /* Add the low and high frequency bands */ | |
1252 | for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) | |
1253 | sub_buf[i] = (sub_buf[i] + hb_samples[i]) * (1.0f / (1 << 15)); | |
1254 | ||
1255 | /* Update buffers and history */ | |
1256 | update_sub_state(ctx); | |
1257 | } | |
1258 | ||
1259 | /* update state for next frame */ | |
1260 | memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0])); | |
1261 | memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float)); | |
1262 | ||
1263 | *got_frame_ptr = 1; | |
1264 | ||
1265 | return expected_fr_size; | |
1266 | } | |
1267 | ||
1268 | AVCodec ff_amrwb_decoder = { | |
1269 | .name = "amrwb", | |
1270 | .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"), | |
1271 | .type = AVMEDIA_TYPE_AUDIO, | |
1272 | .id = AV_CODEC_ID_AMR_WB, | |
1273 | .priv_data_size = sizeof(AMRWBContext), | |
1274 | .init = amrwb_decode_init, | |
1275 | .decode = amrwb_decode_frame, | |
1276 | .capabilities = CODEC_CAP_DR1, | |
1277 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT, | |
1278 | AV_SAMPLE_FMT_NONE }, | |
1279 | }; |