| 1 | /* |
| 2 | * Copyright (c) CMU 1993 Computer Science, Speech Group |
| 3 | * Chengxiang Lu and Alex Hauptmann |
| 4 | * Copyright (c) 2005 Steve Underwood <steveu at coppice.org> |
| 5 | * Copyright (c) 2009 Kenan Gillet |
| 6 | * Copyright (c) 2010 Martin Storsjo |
| 7 | * |
| 8 | * This file is part of FFmpeg. |
| 9 | * |
| 10 | * FFmpeg is free software; you can redistribute it and/or |
| 11 | * modify it under the terms of the GNU Lesser General Public |
| 12 | * License as published by the Free Software Foundation; either |
| 13 | * version 2.1 of the License, or (at your option) any later version. |
| 14 | * |
| 15 | * FFmpeg is distributed in the hope that it will be useful, |
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 18 | * Lesser General Public License for more details. |
| 19 | * |
| 20 | * You should have received a copy of the GNU Lesser General Public |
| 21 | * License along with FFmpeg; if not, write to the Free Software |
| 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 23 | */ |
| 24 | |
| 25 | /** |
| 26 | * @file |
| 27 | * G.722 ADPCM audio encoder |
| 28 | */ |
| 29 | |
| 30 | #include "libavutil/avassert.h" |
| 31 | #include "avcodec.h" |
| 32 | #include "internal.h" |
| 33 | #include "g722.h" |
| 34 | #include "libavutil/common.h" |
| 35 | |
| 36 | #define FREEZE_INTERVAL 128 |
| 37 | |
| 38 | /* This is an arbitrary value. Allowing insanely large values leads to strange |
| 39 | problems, so we limit it to a reasonable value */ |
| 40 | #define MAX_FRAME_SIZE 32768 |
| 41 | |
| 42 | /* We clip the value of avctx->trellis to prevent data type overflows and |
| 43 | undefined behavior. Using larger values is insanely slow anyway. */ |
| 44 | #define MIN_TRELLIS 0 |
| 45 | #define MAX_TRELLIS 16 |
| 46 | |
| 47 | static av_cold int g722_encode_close(AVCodecContext *avctx) |
| 48 | { |
| 49 | G722Context *c = avctx->priv_data; |
| 50 | int i; |
| 51 | for (i = 0; i < 2; i++) { |
| 52 | av_freep(&c->paths[i]); |
| 53 | av_freep(&c->node_buf[i]); |
| 54 | av_freep(&c->nodep_buf[i]); |
| 55 | } |
| 56 | return 0; |
| 57 | } |
| 58 | |
| 59 | static av_cold int g722_encode_init(AVCodecContext * avctx) |
| 60 | { |
| 61 | G722Context *c = avctx->priv_data; |
| 62 | int ret; |
| 63 | |
| 64 | if (avctx->channels != 1) { |
| 65 | av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n"); |
| 66 | return AVERROR_INVALIDDATA; |
| 67 | } |
| 68 | |
| 69 | c->band[0].scale_factor = 8; |
| 70 | c->band[1].scale_factor = 2; |
| 71 | c->prev_samples_pos = 22; |
| 72 | |
| 73 | if (avctx->trellis) { |
| 74 | int frontier = 1 << avctx->trellis; |
| 75 | int max_paths = frontier * FREEZE_INTERVAL; |
| 76 | int i; |
| 77 | for (i = 0; i < 2; i++) { |
| 78 | c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths)); |
| 79 | c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf)); |
| 80 | c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf)); |
| 81 | if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) { |
| 82 | ret = AVERROR(ENOMEM); |
| 83 | goto error; |
| 84 | } |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | if (avctx->frame_size) { |
| 89 | /* validate frame size */ |
| 90 | if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) { |
| 91 | int new_frame_size; |
| 92 | |
| 93 | if (avctx->frame_size == 1) |
| 94 | new_frame_size = 2; |
| 95 | else if (avctx->frame_size > MAX_FRAME_SIZE) |
| 96 | new_frame_size = MAX_FRAME_SIZE; |
| 97 | else |
| 98 | new_frame_size = avctx->frame_size - 1; |
| 99 | |
| 100 | av_log(avctx, AV_LOG_WARNING, "Requested frame size is not " |
| 101 | "allowed. Using %d instead of %d\n", new_frame_size, |
| 102 | avctx->frame_size); |
| 103 | avctx->frame_size = new_frame_size; |
| 104 | } |
| 105 | } else { |
| 106 | /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is |
| 107 | a common packet size for VoIP applications */ |
| 108 | avctx->frame_size = 320; |
| 109 | } |
| 110 | avctx->initial_padding = 22; |
| 111 | |
| 112 | if (avctx->trellis) { |
| 113 | /* validate trellis */ |
| 114 | if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) { |
| 115 | int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS); |
| 116 | av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not " |
| 117 | "allowed. Using %d instead of %d\n", new_trellis, |
| 118 | avctx->trellis); |
| 119 | avctx->trellis = new_trellis; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | return 0; |
| 124 | error: |
| 125 | g722_encode_close(avctx); |
| 126 | return ret; |
| 127 | } |
| 128 | |
| 129 | static const int16_t low_quant[33] = { |
| 130 | 35, 72, 110, 150, 190, 233, 276, 323, |
| 131 | 370, 422, 473, 530, 587, 650, 714, 786, |
| 132 | 858, 940, 1023, 1121, 1219, 1339, 1458, 1612, |
| 133 | 1765, 1980, 2195, 2557, 2919 |
| 134 | }; |
| 135 | |
| 136 | static inline void filter_samples(G722Context *c, const int16_t *samples, |
| 137 | int *xlow, int *xhigh) |
| 138 | { |
| 139 | int xout1, xout2; |
| 140 | c->prev_samples[c->prev_samples_pos++] = samples[0]; |
| 141 | c->prev_samples[c->prev_samples_pos++] = samples[1]; |
| 142 | ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2); |
| 143 | *xlow = xout1 + xout2 >> 14; |
| 144 | *xhigh = xout1 - xout2 >> 14; |
| 145 | if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) { |
| 146 | memmove(c->prev_samples, |
| 147 | c->prev_samples + c->prev_samples_pos - 22, |
| 148 | 22 * sizeof(c->prev_samples[0])); |
| 149 | c->prev_samples_pos = 22; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | static inline int encode_high(const struct G722Band *state, int xhigh) |
| 154 | { |
| 155 | int diff = av_clip_int16(xhigh - state->s_predictor); |
| 156 | int pred = 141 * state->scale_factor >> 8; |
| 157 | /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */ |
| 158 | return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0); |
| 159 | } |
| 160 | |
| 161 | static inline int encode_low(const struct G722Band* state, int xlow) |
| 162 | { |
| 163 | int diff = av_clip_int16(xlow - state->s_predictor); |
| 164 | /* = diff >= 0 ? diff : -(diff + 1) */ |
| 165 | int limit = diff ^ (diff >> (sizeof(diff)*8-1)); |
| 166 | int i = 0; |
| 167 | limit = limit + 1 << 10; |
| 168 | if (limit > low_quant[8] * state->scale_factor) |
| 169 | i = 9; |
| 170 | while (i < 29 && limit > low_quant[i] * state->scale_factor) |
| 171 | i++; |
| 172 | return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i; |
| 173 | } |
| 174 | |
| 175 | static void g722_encode_trellis(G722Context *c, int trellis, |
| 176 | uint8_t *dst, int nb_samples, |
| 177 | const int16_t *samples) |
| 178 | { |
| 179 | int i, j, k; |
| 180 | int frontier = 1 << trellis; |
| 181 | struct TrellisNode **nodes[2]; |
| 182 | struct TrellisNode **nodes_next[2]; |
| 183 | int pathn[2] = {0, 0}, froze = -1; |
| 184 | struct TrellisPath *p[2]; |
| 185 | |
| 186 | for (i = 0; i < 2; i++) { |
| 187 | nodes[i] = c->nodep_buf[i]; |
| 188 | nodes_next[i] = c->nodep_buf[i] + frontier; |
| 189 | memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i])); |
| 190 | nodes[i][0] = c->node_buf[i] + frontier; |
| 191 | nodes[i][0]->ssd = 0; |
| 192 | nodes[i][0]->path = 0; |
| 193 | nodes[i][0]->state = c->band[i]; |
| 194 | } |
| 195 | |
| 196 | for (i = 0; i < nb_samples >> 1; i++) { |
| 197 | int xlow, xhigh; |
| 198 | struct TrellisNode *next[2]; |
| 199 | int heap_pos[2] = {0, 0}; |
| 200 | |
| 201 | for (j = 0; j < 2; j++) { |
| 202 | next[j] = c->node_buf[j] + frontier*(i & 1); |
| 203 | memset(nodes_next[j], 0, frontier * sizeof(**nodes_next)); |
| 204 | } |
| 205 | |
| 206 | filter_samples(c, &samples[2*i], &xlow, &xhigh); |
| 207 | |
| 208 | for (j = 0; j < frontier && nodes[0][j]; j++) { |
| 209 | /* Only k >> 2 affects the future adaptive state, therefore testing |
| 210 | * small steps that don't change k >> 2 is useless, the original |
| 211 | * value from encode_low is better than them. Since we step k |
| 212 | * in steps of 4, make sure range is a multiple of 4, so that |
| 213 | * we don't miss the original value from encode_low. */ |
| 214 | int range = j < frontier/2 ? 4 : 0; |
| 215 | struct TrellisNode *cur_node = nodes[0][j]; |
| 216 | |
| 217 | int ilow = encode_low(&cur_node->state, xlow); |
| 218 | |
| 219 | for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) { |
| 220 | int decoded, dec_diff, pos; |
| 221 | uint32_t ssd; |
| 222 | struct TrellisNode* node; |
| 223 | |
| 224 | if (k < 0) |
| 225 | continue; |
| 226 | |
| 227 | decoded = av_clip((cur_node->state.scale_factor * |
| 228 | ff_g722_low_inv_quant6[k] >> 10) |
| 229 | + cur_node->state.s_predictor, -16384, 16383); |
| 230 | dec_diff = xlow - decoded; |
| 231 | |
| 232 | #define STORE_NODE(index, UPDATE, VALUE)\ |
| 233 | ssd = cur_node->ssd + dec_diff*dec_diff;\ |
| 234 | /* Check for wraparound. Using 64 bit ssd counters would \ |
| 235 | * be simpler, but is slower on x86 32 bit. */\ |
| 236 | if (ssd < cur_node->ssd)\ |
| 237 | continue;\ |
| 238 | if (heap_pos[index] < frontier) {\ |
| 239 | pos = heap_pos[index]++;\ |
| 240 | av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\ |
| 241 | node = nodes_next[index][pos] = next[index]++;\ |
| 242 | node->path = pathn[index]++;\ |
| 243 | } else {\ |
| 244 | /* Try to replace one of the leaf nodes with the new \ |
| 245 | * one, but not always testing the same leaf position */\ |
| 246 | pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\ |
| 247 | if (ssd >= nodes_next[index][pos]->ssd)\ |
| 248 | continue;\ |
| 249 | heap_pos[index]++;\ |
| 250 | node = nodes_next[index][pos];\ |
| 251 | }\ |
| 252 | node->ssd = ssd;\ |
| 253 | node->state = cur_node->state;\ |
| 254 | UPDATE;\ |
| 255 | c->paths[index][node->path].value = VALUE;\ |
| 256 | c->paths[index][node->path].prev = cur_node->path;\ |
| 257 | /* Sift the newly inserted node up in the heap to restore \ |
| 258 | * the heap property */\ |
| 259 | while (pos > 0) {\ |
| 260 | int parent = (pos - 1) >> 1;\ |
| 261 | if (nodes_next[index][parent]->ssd <= ssd)\ |
| 262 | break;\ |
| 263 | FFSWAP(struct TrellisNode*, nodes_next[index][parent],\ |
| 264 | nodes_next[index][pos]);\ |
| 265 | pos = parent;\ |
| 266 | } |
| 267 | STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | for (j = 0; j < frontier && nodes[1][j]; j++) { |
| 272 | int ihigh; |
| 273 | struct TrellisNode *cur_node = nodes[1][j]; |
| 274 | |
| 275 | /* We don't try to get any initial guess for ihigh via |
| 276 | * encode_high - since there's only 4 possible values, test |
| 277 | * them all. Testing all of these gives a much, much larger |
| 278 | * gain than testing a larger range around ilow. */ |
| 279 | for (ihigh = 0; ihigh < 4; ihigh++) { |
| 280 | int dhigh, decoded, dec_diff, pos; |
| 281 | uint32_t ssd; |
| 282 | struct TrellisNode* node; |
| 283 | |
| 284 | dhigh = cur_node->state.scale_factor * |
| 285 | ff_g722_high_inv_quant[ihigh] >> 10; |
| 286 | decoded = av_clip(dhigh + cur_node->state.s_predictor, |
| 287 | -16384, 16383); |
| 288 | dec_diff = xhigh - decoded; |
| 289 | |
| 290 | STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh); |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | for (j = 0; j < 2; j++) { |
| 295 | FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]); |
| 296 | |
| 297 | if (nodes[j][0]->ssd > (1 << 16)) { |
| 298 | for (k = 1; k < frontier && nodes[j][k]; k++) |
| 299 | nodes[j][k]->ssd -= nodes[j][0]->ssd; |
| 300 | nodes[j][0]->ssd = 0; |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | if (i == froze + FREEZE_INTERVAL) { |
| 305 | p[0] = &c->paths[0][nodes[0][0]->path]; |
| 306 | p[1] = &c->paths[1][nodes[1][0]->path]; |
| 307 | for (j = i; j > froze; j--) { |
| 308 | dst[j] = p[1]->value << 6 | p[0]->value; |
| 309 | p[0] = &c->paths[0][p[0]->prev]; |
| 310 | p[1] = &c->paths[1][p[1]->prev]; |
| 311 | } |
| 312 | froze = i; |
| 313 | pathn[0] = pathn[1] = 0; |
| 314 | memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes)); |
| 315 | memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes)); |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | p[0] = &c->paths[0][nodes[0][0]->path]; |
| 320 | p[1] = &c->paths[1][nodes[1][0]->path]; |
| 321 | for (j = i; j > froze; j--) { |
| 322 | dst[j] = p[1]->value << 6 | p[0]->value; |
| 323 | p[0] = &c->paths[0][p[0]->prev]; |
| 324 | p[1] = &c->paths[1][p[1]->prev]; |
| 325 | } |
| 326 | c->band[0] = nodes[0][0]->state; |
| 327 | c->band[1] = nodes[1][0]->state; |
| 328 | } |
| 329 | |
| 330 | static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, |
| 331 | const int16_t *samples) |
| 332 | { |
| 333 | int xlow, xhigh, ilow, ihigh; |
| 334 | filter_samples(c, samples, &xlow, &xhigh); |
| 335 | ihigh = encode_high(&c->band[1], xhigh); |
| 336 | ilow = encode_low (&c->band[0], xlow); |
| 337 | ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor * |
| 338 | ff_g722_high_inv_quant[ihigh] >> 10, ihigh); |
| 339 | ff_g722_update_low_predictor(&c->band[0], ilow >> 2); |
| 340 | *dst = ihigh << 6 | ilow; |
| 341 | } |
| 342 | |
| 343 | static void g722_encode_no_trellis(G722Context *c, |
| 344 | uint8_t *dst, int nb_samples, |
| 345 | const int16_t *samples) |
| 346 | { |
| 347 | int i; |
| 348 | for (i = 0; i < nb_samples; i += 2) |
| 349 | encode_byte(c, dst++, &samples[i]); |
| 350 | } |
| 351 | |
| 352 | static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
| 353 | const AVFrame *frame, int *got_packet_ptr) |
| 354 | { |
| 355 | G722Context *c = avctx->priv_data; |
| 356 | const int16_t *samples = (const int16_t *)frame->data[0]; |
| 357 | int nb_samples, out_size, ret; |
| 358 | |
| 359 | out_size = (frame->nb_samples + 1) / 2; |
| 360 | if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0) |
| 361 | return ret; |
| 362 | |
| 363 | nb_samples = frame->nb_samples - (frame->nb_samples & 1); |
| 364 | |
| 365 | if (avctx->trellis) |
| 366 | g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples); |
| 367 | else |
| 368 | g722_encode_no_trellis(c, avpkt->data, nb_samples, samples); |
| 369 | |
| 370 | /* handle last frame with odd frame_size */ |
| 371 | if (nb_samples < frame->nb_samples) { |
| 372 | int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] }; |
| 373 | encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples); |
| 374 | } |
| 375 | |
| 376 | if (frame->pts != AV_NOPTS_VALUE) |
| 377 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); |
| 378 | *got_packet_ptr = 1; |
| 379 | return 0; |
| 380 | } |
| 381 | |
| 382 | AVCodec ff_adpcm_g722_encoder = { |
| 383 | .name = "g722", |
| 384 | .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"), |
| 385 | .type = AVMEDIA_TYPE_AUDIO, |
| 386 | .id = AV_CODEC_ID_ADPCM_G722, |
| 387 | .priv_data_size = sizeof(G722Context), |
| 388 | .init = g722_encode_init, |
| 389 | .close = g722_encode_close, |
| 390 | .encode2 = g722_encode_frame, |
| 391 | .capabilities = CODEC_CAP_SMALL_LAST_FRAME, |
| 392 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, |
| 393 | AV_SAMPLE_FMT_NONE }, |
| 394 | }; |