2 * Voxware MetaSound decoder
3 * Copyright (c) 2013 Konstantin Shishkov
4 * based on TwinVQ decoder
5 * Copyright (c) 2009 Vitor Sessak
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #define BITSTREAM_READER_LE
29 #include "libavutil/channel_layout.h"
30 #include "libavutil/float_dsp.h"
39 #include "metasound_data.h"
41 static void add_peak(float period
, int width
, const float *shape
,
42 float ppc_gain
, float *speech
, int len
)
45 const float *shape_end
= shape
+ len
;
47 // First peak centered around zero
48 for (i
= 0; i
< width
/ 2; i
++)
49 speech
[i
] += ppc_gain
* *shape
++;
51 for (i
= 1; i
< ROUNDED_DIV(len
, width
); i
++) {
52 center
= (int)(i
* period
+ 0.5);
53 for (j
= -width
/ 2; j
< (width
+ 1) / 2; j
++)
54 speech
[j
+ center
] += ppc_gain
* *shape
++;
57 // For the last block, be careful not to go beyond the end of the buffer
58 center
= (int)(i
* period
+ 0.5);
59 for (j
= -width
/ 2; j
< (width
+ 1) / 2 && shape
< shape_end
; j
++)
60 speech
[j
+ center
] += ppc_gain
* *shape
++;
63 static void decode_ppc(TwinVQContext
*tctx
, int period_coef
, int g_coef
,
64 const float *shape
, float *speech
)
66 const TwinVQModeTab
*mtab
= tctx
->mtab
;
67 int isampf
= tctx
->avctx
->sample_rate
/ 1000;
68 int ibps
= tctx
->avctx
->bit_rate
/ (1000 * tctx
->avctx
->channels
);
71 float ratio
= (float)mtab
->size
/ isampf
;
72 float min_period
, max_period
, period_range
, period
;
75 float pgain_base
, pgain_step
, ppc_gain
;
77 if (tctx
->avctx
->channels
== 1) {
78 min_period
= log2(ratio
* 0.2);
79 max_period
= min_period
+ log2(6);
81 min_period
= (int)(ratio
* 0.2 * 400 + 0.5) / 400.0;
82 max_period
= (int)(ratio
* 0.2 * 400 * 6 + 0.5) / 400.0;
84 period_range
= max_period
- min_period
;
85 period
= min_period
+ period_coef
* period_range
/
86 ((1 << mtab
->ppc_period_bit
) - 1);
87 if (tctx
->avctx
->channels
== 1)
88 period
= powf(2.0, period
);
90 period
= (int)(period
* 400 + 0.5) / 400.0;
93 case 8: some_mult
= 2.0; break;
94 case 11: some_mult
= 3.0; break;
95 case 16: some_mult
= 3.0; break;
96 case 22: some_mult
= ibps
== 32 ? 2.0 : 4.0; break;
97 case 44: some_mult
= 8.0; break;
98 default: some_mult
= 4.0;
101 width
= (int)(some_mult
/ (mtab
->size
/ period
) * mtab
->ppc_shape_len
);
102 if (isampf
== 22 && ibps
== 32)
103 width
= (int)((2.0 / period
+ 1) * width
+ 0.5);
105 pgain_base
= tctx
->avctx
->channels
== 2 ? 25000.0 : 20000.0;
106 pgain_step
= pgain_base
/ ((1 << mtab
->pgain_bit
) - 1);
107 ppc_gain
= 1.0 / 8192 *
108 twinvq_mulawinv(pgain_step
* g_coef
+ pgain_step
/ 2,
109 pgain_base
, TWINVQ_PGAIN_MU
);
111 add_peak(period
, width
, shape
, ppc_gain
, speech
, mtab
->ppc_shape_len
);
114 static void dec_bark_env(TwinVQContext
*tctx
, const uint8_t *in
, int use_hist
,
115 int ch
, float *out
, float gain
,
116 enum TwinVQFrameType ftype
)
118 const TwinVQModeTab
*mtab
= tctx
->mtab
;
120 float *hist
= tctx
->bark_hist
[ftype
][ch
];
121 float val
= ((const float []) { 0.4, 0.35, 0.28 })[ftype
];
122 int bark_n_coef
= mtab
->fmode
[ftype
].bark_n_coef
;
123 int fw_cb_len
= mtab
->fmode
[ftype
].bark_env_size
/ bark_n_coef
;
126 if (tctx
->avctx
->channels
== 1)
128 for (i
= 0; i
< fw_cb_len
; i
++)
129 for (j
= 0; j
< bark_n_coef
; j
++, idx
++) {
130 float tmp2
= mtab
->fmode
[ftype
].bark_cb
[fw_cb_len
* in
[j
] + i
] *
134 if (tctx
->avctx
->channels
== 1)
136 tmp2
+ val
* hist
[idx
] + 1.0 : tmp2
+ 1.0;
138 st
= use_hist
? (1.0 - val
) * tmp2
+ val
* hist
[idx
] + 1.0
145 twinvq_memset_float(out
, st
* gain
,
146 mtab
->fmode
[ftype
].bark_tab
[idx
]);
147 out
+= mtab
->fmode
[ftype
].bark_tab
[idx
];
151 static void read_cb_data(TwinVQContext
*tctx
, GetBitContext
*gb
,
152 uint8_t *dst
, enum TwinVQFrameType ftype
)
156 for (i
= 0; i
< tctx
->n_div
[ftype
]; i
++) {
157 int bs_second_part
= (i
>= tctx
->bits_main_spec_change
[ftype
]);
159 *dst
++ = get_bits(gb
, tctx
->bits_main_spec
[0][ftype
][bs_second_part
]);
160 *dst
++ = get_bits(gb
, tctx
->bits_main_spec
[1][ftype
][bs_second_part
]);
164 static int metasound_read_bitstream(AVCodecContext
*avctx
, TwinVQContext
*tctx
,
165 const uint8_t *buf
, int buf_size
)
167 TwinVQFrameData
*bits
;
168 const TwinVQModeTab
*mtab
= tctx
->mtab
;
169 int channels
= tctx
->avctx
->channels
;
174 init_get_bits(&gb
, buf
, buf_size
* 8);
176 for (tctx
->cur_frame
= 0; tctx
->cur_frame
< tctx
->frames_per_packet
;
178 bits
= tctx
->bits
+ tctx
->cur_frame
;
180 bits
->window_type
= get_bits(&gb
, TWINVQ_WINDOW_TYPE_BITS
);
182 if (bits
->window_type
> 8) {
183 av_log(avctx
, AV_LOG_ERROR
, "Invalid window type, broken sample?\n");
184 return AVERROR_INVALIDDATA
;
187 bits
->ftype
= ff_twinvq_wtype_to_ftype_table
[tctx
->bits
[tctx
->cur_frame
].window_type
];
189 sub
= mtab
->fmode
[bits
->ftype
].sub
;
191 if (bits
->ftype
!= TWINVQ_FT_SHORT
&& !tctx
->is_6kbps
)
194 read_cb_data(tctx
, &gb
, bits
->main_coeffs
, bits
->ftype
);
196 for (i
= 0; i
< channels
; i
++)
197 for (j
= 0; j
< sub
; j
++)
198 for (k
= 0; k
< mtab
->fmode
[bits
->ftype
].bark_n_coef
; k
++)
199 bits
->bark1
[i
][j
][k
] =
200 get_bits(&gb
, mtab
->fmode
[bits
->ftype
].bark_n_bit
);
202 for (i
= 0; i
< channels
; i
++)
203 for (j
= 0; j
< sub
; j
++)
204 bits
->bark_use_hist
[i
][j
] = get_bits1(&gb
);
206 if (bits
->ftype
== TWINVQ_FT_LONG
) {
207 for (i
= 0; i
< channels
; i
++)
208 bits
->gain_bits
[i
] = get_bits(&gb
, TWINVQ_GAIN_BITS
);
210 for (i
= 0; i
< channels
; i
++) {
211 bits
->gain_bits
[i
] = get_bits(&gb
, TWINVQ_GAIN_BITS
);
212 for (j
= 0; j
< sub
; j
++)
213 bits
->sub_gain_bits
[i
* sub
+ j
] =
214 get_bits(&gb
, TWINVQ_SUB_GAIN_BITS
);
218 for (i
= 0; i
< channels
; i
++) {
219 bits
->lpc_hist_idx
[i
] = get_bits(&gb
, mtab
->lsp_bit0
);
220 bits
->lpc_idx1
[i
] = get_bits(&gb
, mtab
->lsp_bit1
);
222 for (j
= 0; j
< mtab
->lsp_split
; j
++)
223 bits
->lpc_idx2
[i
][j
] = get_bits(&gb
, mtab
->lsp_bit2
);
226 if (bits
->ftype
== TWINVQ_FT_LONG
) {
227 read_cb_data(tctx
, &gb
, bits
->ppc_coeffs
, 3);
228 for (i
= 0; i
< channels
; i
++) {
229 bits
->p_coef
[i
] = get_bits(&gb
, mtab
->ppc_period_bit
);
230 bits
->g_coef
[i
] = get_bits(&gb
, mtab
->pgain_bit
);
234 // subframes are aligned to nibbles
235 if (get_bits_count(&gb
) & 3)
236 skip_bits(&gb
, 4 - (get_bits_count(&gb
) & 3));
239 return (get_bits_count(&gb
) + 7) / 8;
242 typedef struct MetasoundProps
{
249 static const MetasoundProps codec_props
[] = {
250 { MKTAG('V','X','0','3'), 6, 1, 8000 },
251 { MKTAG('V','X','0','4'), 12, 2, 8000 },
253 { MKTAG('V','O','X','i'), 8, 1, 8000 },
254 { MKTAG('V','O','X','j'), 10, 1, 11025 },
255 { MKTAG('V','O','X','k'), 16, 1, 16000 },
256 { MKTAG('V','O','X','L'), 24, 1, 22050 },
257 { MKTAG('V','O','X','q'), 32, 1, 44100 },
258 { MKTAG('V','O','X','r'), 40, 1, 44100 },
259 { MKTAG('V','O','X','s'), 48, 1, 44100 },
260 { MKTAG('V','O','X','t'), 16, 2, 8000 },
261 { MKTAG('V','O','X','u'), 20, 2, 11025 },
262 { MKTAG('V','O','X','v'), 32, 2, 16000 },
263 { MKTAG('V','O','X','w'), 48, 2, 22050 },
264 { MKTAG('V','O','X','x'), 64, 2, 44100 },
265 { MKTAG('V','O','X','y'), 80, 2, 44100 },
266 { MKTAG('V','O','X','z'), 96, 2, 44100 },
271 static av_cold
int metasound_decode_init(AVCodecContext
*avctx
)
274 TwinVQContext
*tctx
= avctx
->priv_data
;
276 const MetasoundProps
*props
= codec_props
;
278 if (!avctx
->extradata
|| avctx
->extradata_size
< 16) {
279 av_log(avctx
, AV_LOG_ERROR
, "Missing or incomplete extradata\n");
280 return AVERROR_INVALIDDATA
;
283 tag
= AV_RL32(avctx
->extradata
+ 12);
287 av_log(avctx
, AV_LOG_ERROR
, "Could not find tag %08"PRIX32
"\n", tag
);
288 return AVERROR_INVALIDDATA
;
290 if (props
->tag
== tag
) {
291 avctx
->sample_rate
= props
->sample_rate
;
292 avctx
->channels
= props
->channels
;
293 avctx
->bit_rate
= props
->bit_rate
* 1000;
294 isampf
= avctx
->sample_rate
/ 1000;
300 if (avctx
->channels
<= 0 || avctx
->channels
> TWINVQ_CHANNELS_MAX
) {
301 av_log(avctx
, AV_LOG_ERROR
, "Unsupported number of channels: %i\n",
303 return AVERROR_INVALIDDATA
;
305 avctx
->channel_layout
= avctx
->channels
== 1 ? AV_CH_LAYOUT_MONO
306 : AV_CH_LAYOUT_STEREO
;
308 ibps
= avctx
->bit_rate
/ (1000 * avctx
->channels
);
310 switch ((avctx
->channels
<< 16) + (isampf
<< 8) + ibps
) {
311 case (1 << 16) + ( 8 << 8) + 6:
312 tctx
->mtab
= &ff_metasound_mode0806
;
314 case (2 << 16) + ( 8 << 8) + 6:
315 tctx
->mtab
= &ff_metasound_mode0806s
;
317 case (1 << 16) + ( 8 << 8) + 8:
318 tctx
->mtab
= &ff_metasound_mode0808
;
320 case (2 << 16) + ( 8 << 8) + 8:
321 tctx
->mtab
= &ff_metasound_mode0808s
;
323 case (1 << 16) + (11 << 8) + 10:
324 tctx
->mtab
= &ff_metasound_mode1110
;
326 case (2 << 16) + (11 << 8) + 10:
327 tctx
->mtab
= &ff_metasound_mode1110s
;
329 case (1 << 16) + (16 << 8) + 16:
330 tctx
->mtab
= &ff_metasound_mode1616
;
332 case (2 << 16) + (16 << 8) + 16:
333 tctx
->mtab
= &ff_metasound_mode1616s
;
335 case (1 << 16) + (22 << 8) + 24:
336 tctx
->mtab
= &ff_metasound_mode2224
;
338 case (2 << 16) + (22 << 8) + 24:
339 tctx
->mtab
= &ff_metasound_mode2224s
;
341 case (1 << 16) + (44 << 8) + 32:
342 tctx
->mtab
= &ff_metasound_mode4432
;
344 case (2 << 16) + (44 << 8) + 32:
345 tctx
->mtab
= &ff_metasound_mode4432s
;
347 case (1 << 16) + (44 << 8) + 40:
348 tctx
->mtab
= &ff_metasound_mode4440
;
350 case (2 << 16) + (44 << 8) + 40:
351 tctx
->mtab
= &ff_metasound_mode4440s
;
353 case (1 << 16) + (44 << 8) + 48:
354 tctx
->mtab
= &ff_metasound_mode4448
;
356 case (2 << 16) + (44 << 8) + 48:
357 tctx
->mtab
= &ff_metasound_mode4448s
;
360 av_log(avctx
, AV_LOG_ERROR
,
361 "This version does not support %d kHz - %d kbit/s/ch mode.\n",
363 return AVERROR(ENOSYS
);
366 tctx
->codec
= TWINVQ_CODEC_METASOUND
;
367 tctx
->read_bitstream
= metasound_read_bitstream
;
368 tctx
->dec_bark_env
= dec_bark_env
;
369 tctx
->decode_ppc
= decode_ppc
;
370 tctx
->frame_size
= avctx
->bit_rate
* tctx
->mtab
->size
371 / avctx
->sample_rate
;
372 tctx
->is_6kbps
= ibps
== 6;
374 return ff_twinvq_decode_init(avctx
);
377 AVCodec ff_metasound_decoder
= {
379 .long_name
= NULL_IF_CONFIG_SMALL("Voxware MetaSound"),
380 .type
= AVMEDIA_TYPE_AUDIO
,
381 .id
= AV_CODEC_ID_METASOUND
,
382 .priv_data_size
= sizeof(TwinVQContext
),
383 .init
= metasound_decode_init
,
384 .close
= ff_twinvq_decode_close
,
385 .decode
= ff_twinvq_decode_frame
,
386 .capabilities
= CODEC_CAP_DR1
,
387 .sample_fmts
= (const enum AVSampleFormat
[]) { AV_SAMPLE_FMT_FLTP
,
388 AV_SAMPLE_FMT_NONE
},