| 1 | /* |
| 2 | * DSP Group TrueSpeech compatible decoder |
| 3 | * Copyright (c) 2005 Konstantin Shishkov |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #include "libavutil/channel_layout.h" |
| 23 | #include "libavutil/intreadwrite.h" |
| 24 | #include "avcodec.h" |
| 25 | #include "bswapdsp.h" |
| 26 | #include "get_bits.h" |
| 27 | #include "internal.h" |
| 28 | |
| 29 | #include "truespeech_data.h" |
| 30 | /** |
| 31 | * @file |
| 32 | * TrueSpeech decoder. |
| 33 | */ |
| 34 | |
| 35 | /** |
| 36 | * TrueSpeech decoder context |
| 37 | */ |
| 38 | typedef struct { |
| 39 | BswapDSPContext bdsp; |
| 40 | /* input data */ |
| 41 | DECLARE_ALIGNED(16, uint8_t, buffer)[32]; |
| 42 | int16_t vector[8]; ///< input vector: 5/5/4/4/4/3/3/3 |
| 43 | int offset1[2]; ///< 8-bit value, used in one copying offset |
| 44 | int offset2[4]; ///< 7-bit value, encodes offsets for copying and for two-point filter |
| 45 | int pulseoff[4]; ///< 4-bit offset of pulse values block |
| 46 | int pulsepos[4]; ///< 27-bit variable, encodes 7 pulse positions |
| 47 | int pulseval[4]; ///< 7x2-bit pulse values |
| 48 | int flag; ///< 1-bit flag, shows how to choose filters |
| 49 | /* temporary data */ |
| 50 | int filtbuf[146]; // some big vector used for storing filters |
| 51 | int prevfilt[8]; // filter from previous frame |
| 52 | int16_t tmp1[8]; // coefficients for adding to out |
| 53 | int16_t tmp2[8]; // coefficients for adding to out |
| 54 | int16_t tmp3[8]; // coefficients for adding to out |
| 55 | int16_t cvector[8]; // correlated input vector |
| 56 | int filtval; // gain value for one function |
| 57 | int16_t newvec[60]; // tmp vector |
| 58 | int16_t filters[32]; // filters for every subframe |
| 59 | } TSContext; |
| 60 | |
| 61 | static av_cold int truespeech_decode_init(AVCodecContext * avctx) |
| 62 | { |
| 63 | TSContext *c = avctx->priv_data; |
| 64 | |
| 65 | if (avctx->channels != 1) { |
| 66 | avpriv_request_sample(avctx, "Channel count %d", avctx->channels); |
| 67 | return AVERROR_PATCHWELCOME; |
| 68 | } |
| 69 | |
| 70 | avctx->channel_layout = AV_CH_LAYOUT_MONO; |
| 71 | avctx->sample_fmt = AV_SAMPLE_FMT_S16; |
| 72 | |
| 73 | ff_bswapdsp_init(&c->bdsp); |
| 74 | |
| 75 | return 0; |
| 76 | } |
| 77 | |
| 78 | static void truespeech_read_frame(TSContext *dec, const uint8_t *input) |
| 79 | { |
| 80 | GetBitContext gb; |
| 81 | |
| 82 | dec->bdsp.bswap_buf((uint32_t *) dec->buffer, (const uint32_t *) input, 8); |
| 83 | init_get_bits(&gb, dec->buffer, 32 * 8); |
| 84 | |
| 85 | dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)]; |
| 86 | dec->vector[6] = ts_codebook[6][get_bits(&gb, 3)]; |
| 87 | dec->vector[5] = ts_codebook[5][get_bits(&gb, 3)]; |
| 88 | dec->vector[4] = ts_codebook[4][get_bits(&gb, 4)]; |
| 89 | dec->vector[3] = ts_codebook[3][get_bits(&gb, 4)]; |
| 90 | dec->vector[2] = ts_codebook[2][get_bits(&gb, 4)]; |
| 91 | dec->vector[1] = ts_codebook[1][get_bits(&gb, 5)]; |
| 92 | dec->vector[0] = ts_codebook[0][get_bits(&gb, 5)]; |
| 93 | dec->flag = get_bits1(&gb); |
| 94 | |
| 95 | dec->offset1[0] = get_bits(&gb, 4) << 4; |
| 96 | dec->offset2[3] = get_bits(&gb, 7); |
| 97 | dec->offset2[2] = get_bits(&gb, 7); |
| 98 | dec->offset2[1] = get_bits(&gb, 7); |
| 99 | dec->offset2[0] = get_bits(&gb, 7); |
| 100 | |
| 101 | dec->offset1[1] = get_bits(&gb, 4); |
| 102 | dec->pulseval[1] = get_bits(&gb, 14); |
| 103 | dec->pulseval[0] = get_bits(&gb, 14); |
| 104 | |
| 105 | dec->offset1[1] |= get_bits(&gb, 4) << 4; |
| 106 | dec->pulseval[3] = get_bits(&gb, 14); |
| 107 | dec->pulseval[2] = get_bits(&gb, 14); |
| 108 | |
| 109 | dec->offset1[0] |= get_bits1(&gb); |
| 110 | dec->pulsepos[0] = get_bits_long(&gb, 27); |
| 111 | dec->pulseoff[0] = get_bits(&gb, 4); |
| 112 | |
| 113 | dec->offset1[0] |= get_bits1(&gb) << 1; |
| 114 | dec->pulsepos[1] = get_bits_long(&gb, 27); |
| 115 | dec->pulseoff[1] = get_bits(&gb, 4); |
| 116 | |
| 117 | dec->offset1[0] |= get_bits1(&gb) << 2; |
| 118 | dec->pulsepos[2] = get_bits_long(&gb, 27); |
| 119 | dec->pulseoff[2] = get_bits(&gb, 4); |
| 120 | |
| 121 | dec->offset1[0] |= get_bits1(&gb) << 3; |
| 122 | dec->pulsepos[3] = get_bits_long(&gb, 27); |
| 123 | dec->pulseoff[3] = get_bits(&gb, 4); |
| 124 | } |
| 125 | |
| 126 | static void truespeech_correlate_filter(TSContext *dec) |
| 127 | { |
| 128 | int16_t tmp[8]; |
| 129 | int i, j; |
| 130 | |
| 131 | for(i = 0; i < 8; i++){ |
| 132 | if(i > 0){ |
| 133 | memcpy(tmp, dec->cvector, i * sizeof(*tmp)); |
| 134 | for(j = 0; j < i; j++) |
| 135 | dec->cvector[j] = ((tmp[i - j - 1] * dec->vector[i]) + |
| 136 | (dec->cvector[j] << 15) + 0x4000) >> 15; |
| 137 | } |
| 138 | dec->cvector[i] = (8 - dec->vector[i]) >> 3; |
| 139 | } |
| 140 | for(i = 0; i < 8; i++) |
| 141 | dec->cvector[i] = (dec->cvector[i] * ts_decay_994_1000[i]) >> 15; |
| 142 | |
| 143 | dec->filtval = dec->vector[0]; |
| 144 | } |
| 145 | |
| 146 | static void truespeech_filters_merge(TSContext *dec) |
| 147 | { |
| 148 | int i; |
| 149 | |
| 150 | if(!dec->flag){ |
| 151 | for(i = 0; i < 8; i++){ |
| 152 | dec->filters[i + 0] = dec->prevfilt[i]; |
| 153 | dec->filters[i + 8] = dec->prevfilt[i]; |
| 154 | } |
| 155 | }else{ |
| 156 | for(i = 0; i < 8; i++){ |
| 157 | dec->filters[i + 0]=(dec->cvector[i] * 21846 + dec->prevfilt[i] * 10923 + 16384) >> 15; |
| 158 | dec->filters[i + 8]=(dec->cvector[i] * 10923 + dec->prevfilt[i] * 21846 + 16384) >> 15; |
| 159 | } |
| 160 | } |
| 161 | for(i = 0; i < 8; i++){ |
| 162 | dec->filters[i + 16] = dec->cvector[i]; |
| 163 | dec->filters[i + 24] = dec->cvector[i]; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | static void truespeech_apply_twopoint_filter(TSContext *dec, int quart) |
| 168 | { |
| 169 | int16_t tmp[146 + 60], *ptr0, *ptr1; |
| 170 | const int16_t *filter; |
| 171 | int i, t, off; |
| 172 | |
| 173 | t = dec->offset2[quart]; |
| 174 | if(t == 127){ |
| 175 | memset(dec->newvec, 0, 60 * sizeof(*dec->newvec)); |
| 176 | return; |
| 177 | } |
| 178 | for(i = 0; i < 146; i++) |
| 179 | tmp[i] = dec->filtbuf[i]; |
| 180 | off = (t / 25) + dec->offset1[quart >> 1] + 18; |
| 181 | off = av_clip(off, 0, 145); |
| 182 | ptr0 = tmp + 145 - off; |
| 183 | ptr1 = tmp + 146; |
| 184 | filter = ts_order2_coeffs + (t % 25) * 2; |
| 185 | for(i = 0; i < 60; i++){ |
| 186 | t = (ptr0[0] * filter[0] + ptr0[1] * filter[1] + 0x2000) >> 14; |
| 187 | ptr0++; |
| 188 | dec->newvec[i] = t; |
| 189 | ptr1[i] = t; |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | static void truespeech_place_pulses(TSContext *dec, int16_t *out, int quart) |
| 194 | { |
| 195 | int16_t tmp[7]; |
| 196 | int i, j, t; |
| 197 | const int16_t *ptr1; |
| 198 | int16_t *ptr2; |
| 199 | int coef; |
| 200 | |
| 201 | memset(out, 0, 60 * sizeof(*out)); |
| 202 | for(i = 0; i < 7; i++) { |
| 203 | t = dec->pulseval[quart] & 3; |
| 204 | dec->pulseval[quart] >>= 2; |
| 205 | tmp[6 - i] = ts_pulse_scales[dec->pulseoff[quart] * 4 + t]; |
| 206 | } |
| 207 | |
| 208 | coef = dec->pulsepos[quart] >> 15; |
| 209 | ptr1 = ts_pulse_values + 30; |
| 210 | ptr2 = tmp; |
| 211 | for(i = 0, j = 3; (i < 30) && (j > 0); i++){ |
| 212 | t = *ptr1++; |
| 213 | if(coef >= t) |
| 214 | coef -= t; |
| 215 | else{ |
| 216 | out[i] = *ptr2++; |
| 217 | ptr1 += 30; |
| 218 | j--; |
| 219 | } |
| 220 | } |
| 221 | coef = dec->pulsepos[quart] & 0x7FFF; |
| 222 | ptr1 = ts_pulse_values; |
| 223 | for(i = 30, j = 4; (i < 60) && (j > 0); i++){ |
| 224 | t = *ptr1++; |
| 225 | if(coef >= t) |
| 226 | coef -= t; |
| 227 | else{ |
| 228 | out[i] = *ptr2++; |
| 229 | ptr1 += 30; |
| 230 | j--; |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | } |
| 235 | |
| 236 | static void truespeech_update_filters(TSContext *dec, int16_t *out, int quart) |
| 237 | { |
| 238 | int i; |
| 239 | |
| 240 | memmove(dec->filtbuf, &dec->filtbuf[60], 86 * sizeof(*dec->filtbuf)); |
| 241 | for(i = 0; i < 60; i++){ |
| 242 | dec->filtbuf[i + 86] = out[i] + dec->newvec[i] - (dec->newvec[i] >> 3); |
| 243 | out[i] += dec->newvec[i]; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | static void truespeech_synth(TSContext *dec, int16_t *out, int quart) |
| 248 | { |
| 249 | int i,k; |
| 250 | int t[8]; |
| 251 | int16_t *ptr0, *ptr1; |
| 252 | |
| 253 | ptr0 = dec->tmp1; |
| 254 | ptr1 = dec->filters + quart * 8; |
| 255 | for(i = 0; i < 60; i++){ |
| 256 | int sum = 0; |
| 257 | for(k = 0; k < 8; k++) |
| 258 | sum += ptr0[k] * ptr1[k]; |
| 259 | sum = (sum + (out[i] << 12) + 0x800) >> 12; |
| 260 | out[i] = av_clip(sum, -0x7FFE, 0x7FFE); |
| 261 | for(k = 7; k > 0; k--) |
| 262 | ptr0[k] = ptr0[k - 1]; |
| 263 | ptr0[0] = out[i]; |
| 264 | } |
| 265 | |
| 266 | for(i = 0; i < 8; i++) |
| 267 | t[i] = (ts_decay_35_64[i] * ptr1[i]) >> 15; |
| 268 | |
| 269 | ptr0 = dec->tmp2; |
| 270 | for(i = 0; i < 60; i++){ |
| 271 | int sum = 0; |
| 272 | for(k = 0; k < 8; k++) |
| 273 | sum += ptr0[k] * t[k]; |
| 274 | for(k = 7; k > 0; k--) |
| 275 | ptr0[k] = ptr0[k - 1]; |
| 276 | ptr0[0] = out[i]; |
| 277 | out[i] = ((out[i] << 12) - sum) >> 12; |
| 278 | } |
| 279 | |
| 280 | for(i = 0; i < 8; i++) |
| 281 | t[i] = (ts_decay_3_4[i] * ptr1[i]) >> 15; |
| 282 | |
| 283 | ptr0 = dec->tmp3; |
| 284 | for(i = 0; i < 60; i++){ |
| 285 | int sum = out[i] << 12; |
| 286 | for(k = 0; k < 8; k++) |
| 287 | sum += ptr0[k] * t[k]; |
| 288 | for(k = 7; k > 0; k--) |
| 289 | ptr0[k] = ptr0[k - 1]; |
| 290 | ptr0[0] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE); |
| 291 | |
| 292 | sum = ((ptr0[1] * (dec->filtval - (dec->filtval >> 2))) >> 4) + sum; |
| 293 | sum = sum - (sum >> 3); |
| 294 | out[i] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | static void truespeech_save_prevvec(TSContext *c) |
| 299 | { |
| 300 | int i; |
| 301 | |
| 302 | for(i = 0; i < 8; i++) |
| 303 | c->prevfilt[i] = c->cvector[i]; |
| 304 | } |
| 305 | |
| 306 | static int truespeech_decode_frame(AVCodecContext *avctx, void *data, |
| 307 | int *got_frame_ptr, AVPacket *avpkt) |
| 308 | { |
| 309 | AVFrame *frame = data; |
| 310 | const uint8_t *buf = avpkt->data; |
| 311 | int buf_size = avpkt->size; |
| 312 | TSContext *c = avctx->priv_data; |
| 313 | |
| 314 | int i, j; |
| 315 | int16_t *samples; |
| 316 | int iterations, ret; |
| 317 | |
| 318 | iterations = buf_size / 32; |
| 319 | |
| 320 | if (!iterations) { |
| 321 | av_log(avctx, AV_LOG_ERROR, |
| 322 | "Too small input buffer (%d bytes), need at least 32 bytes\n", buf_size); |
| 323 | return -1; |
| 324 | } |
| 325 | |
| 326 | /* get output buffer */ |
| 327 | frame->nb_samples = iterations * 240; |
| 328 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
| 329 | return ret; |
| 330 | samples = (int16_t *)frame->data[0]; |
| 331 | |
| 332 | memset(samples, 0, iterations * 240 * sizeof(*samples)); |
| 333 | |
| 334 | for(j = 0; j < iterations; j++) { |
| 335 | truespeech_read_frame(c, buf); |
| 336 | buf += 32; |
| 337 | |
| 338 | truespeech_correlate_filter(c); |
| 339 | truespeech_filters_merge(c); |
| 340 | |
| 341 | for(i = 0; i < 4; i++) { |
| 342 | truespeech_apply_twopoint_filter(c, i); |
| 343 | truespeech_place_pulses (c, samples, i); |
| 344 | truespeech_update_filters(c, samples, i); |
| 345 | truespeech_synth (c, samples, i); |
| 346 | samples += 60; |
| 347 | } |
| 348 | |
| 349 | truespeech_save_prevvec(c); |
| 350 | } |
| 351 | |
| 352 | *got_frame_ptr = 1; |
| 353 | |
| 354 | return buf_size; |
| 355 | } |
| 356 | |
| 357 | AVCodec ff_truespeech_decoder = { |
| 358 | .name = "truespeech", |
| 359 | .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"), |
| 360 | .type = AVMEDIA_TYPE_AUDIO, |
| 361 | .id = AV_CODEC_ID_TRUESPEECH, |
| 362 | .priv_data_size = sizeof(TSContext), |
| 363 | .init = truespeech_decode_init, |
| 364 | .decode = truespeech_decode_frame, |
| 365 | .capabilities = CODEC_CAP_DR1, |
| 366 | }; |