| 1 | /* |
| 2 | * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | /** |
| 22 | * @file |
| 23 | * tempo scaling audio filter -- an implementation of WSOLA algorithm |
| 24 | * |
| 25 | * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h |
| 26 | * from Apprentice Video player by Pavel Koshevoy. |
| 27 | * https://sourceforge.net/projects/apprenticevideo/ |
| 28 | * |
| 29 | * An explanation of SOLA algorithm is available at |
| 30 | * http://www.surina.net/article/time-and-pitch-scaling.html |
| 31 | * |
| 32 | * WSOLA is very similar to SOLA, only one major difference exists between |
| 33 | * these algorithms. SOLA shifts audio fragments along the output stream, |
| 34 | * where as WSOLA shifts audio fragments along the input stream. |
| 35 | * |
| 36 | * The advantage of WSOLA algorithm is that the overlap region size is |
| 37 | * always the same, therefore the blending function is constant and |
| 38 | * can be precomputed. |
| 39 | */ |
| 40 | |
| 41 | #include <float.h> |
| 42 | #include "libavcodec/avfft.h" |
| 43 | #include "libavutil/avassert.h" |
| 44 | #include "libavutil/avstring.h" |
| 45 | #include "libavutil/channel_layout.h" |
| 46 | #include "libavutil/eval.h" |
| 47 | #include "libavutil/opt.h" |
| 48 | #include "libavutil/samplefmt.h" |
| 49 | #include "avfilter.h" |
| 50 | #include "audio.h" |
| 51 | #include "internal.h" |
| 52 | |
| 53 | /** |
| 54 | * A fragment of audio waveform |
| 55 | */ |
| 56 | typedef struct { |
| 57 | // index of the first sample of this fragment in the overall waveform; |
| 58 | // 0: input sample position |
| 59 | // 1: output sample position |
| 60 | int64_t position[2]; |
| 61 | |
| 62 | // original packed multi-channel samples: |
| 63 | uint8_t *data; |
| 64 | |
| 65 | // number of samples in this fragment: |
| 66 | int nsamples; |
| 67 | |
| 68 | // rDFT transform of the down-mixed mono fragment, used for |
| 69 | // fast waveform alignment via correlation in frequency domain: |
| 70 | FFTSample *xdat; |
| 71 | } AudioFragment; |
| 72 | |
| 73 | /** |
| 74 | * Filter state machine states |
| 75 | */ |
| 76 | typedef enum { |
| 77 | YAE_LOAD_FRAGMENT, |
| 78 | YAE_ADJUST_POSITION, |
| 79 | YAE_RELOAD_FRAGMENT, |
| 80 | YAE_OUTPUT_OVERLAP_ADD, |
| 81 | YAE_FLUSH_OUTPUT, |
| 82 | } FilterState; |
| 83 | |
| 84 | /** |
| 85 | * Filter state machine |
| 86 | */ |
| 87 | typedef struct { |
| 88 | const AVClass *class; |
| 89 | |
| 90 | // ring-buffer of input samples, necessary because some times |
| 91 | // input fragment position may be adjusted backwards: |
| 92 | uint8_t *buffer; |
| 93 | |
| 94 | // ring-buffer maximum capacity, expressed in sample rate time base: |
| 95 | int ring; |
| 96 | |
| 97 | // ring-buffer house keeping: |
| 98 | int size; |
| 99 | int head; |
| 100 | int tail; |
| 101 | |
| 102 | // 0: input sample position corresponding to the ring buffer tail |
| 103 | // 1: output sample position |
| 104 | int64_t position[2]; |
| 105 | |
| 106 | // sample format: |
| 107 | enum AVSampleFormat format; |
| 108 | |
| 109 | // number of channels: |
| 110 | int channels; |
| 111 | |
| 112 | // row of bytes to skip from one sample to next, across multple channels; |
| 113 | // stride = (number-of-channels * bits-per-sample-per-channel) / 8 |
| 114 | int stride; |
| 115 | |
| 116 | // fragment window size, power-of-two integer: |
| 117 | int window; |
| 118 | |
| 119 | // Hann window coefficients, for feathering |
| 120 | // (blending) the overlapping fragment region: |
| 121 | float *hann; |
| 122 | |
| 123 | // tempo scaling factor: |
| 124 | double tempo; |
| 125 | |
| 126 | // a snapshot of previous fragment input and output position values |
| 127 | // captured when the tempo scale factor was set most recently: |
| 128 | int64_t origin[2]; |
| 129 | |
| 130 | // current/previous fragment ring-buffer: |
| 131 | AudioFragment frag[2]; |
| 132 | |
| 133 | // current fragment index: |
| 134 | uint64_t nfrag; |
| 135 | |
| 136 | // current state: |
| 137 | FilterState state; |
| 138 | |
| 139 | // for fast correlation calculation in frequency domain: |
| 140 | RDFTContext *real_to_complex; |
| 141 | RDFTContext *complex_to_real; |
| 142 | FFTSample *correlation; |
| 143 | |
| 144 | // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame |
| 145 | AVFrame *dst_buffer; |
| 146 | uint8_t *dst; |
| 147 | uint8_t *dst_end; |
| 148 | uint64_t nsamples_in; |
| 149 | uint64_t nsamples_out; |
| 150 | } ATempoContext; |
| 151 | |
| 152 | #define OFFSET(x) offsetof(ATempoContext, x) |
| 153 | |
| 154 | static const AVOption atempo_options[] = { |
| 155 | { "tempo", "set tempo scale factor", |
| 156 | OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.5, 2.0, |
| 157 | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM }, |
| 158 | { NULL } |
| 159 | }; |
| 160 | |
| 161 | AVFILTER_DEFINE_CLASS(atempo); |
| 162 | |
| 163 | inline static AudioFragment *yae_curr_frag(ATempoContext *atempo) |
| 164 | { |
| 165 | return &atempo->frag[atempo->nfrag % 2]; |
| 166 | } |
| 167 | |
| 168 | inline static AudioFragment *yae_prev_frag(ATempoContext *atempo) |
| 169 | { |
| 170 | return &atempo->frag[(atempo->nfrag + 1) % 2]; |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * Reset filter to initial state, do not deallocate existing local buffers. |
| 175 | */ |
| 176 | static void yae_clear(ATempoContext *atempo) |
| 177 | { |
| 178 | atempo->size = 0; |
| 179 | atempo->head = 0; |
| 180 | atempo->tail = 0; |
| 181 | |
| 182 | atempo->nfrag = 0; |
| 183 | atempo->state = YAE_LOAD_FRAGMENT; |
| 184 | |
| 185 | atempo->position[0] = 0; |
| 186 | atempo->position[1] = 0; |
| 187 | |
| 188 | atempo->origin[0] = 0; |
| 189 | atempo->origin[1] = 0; |
| 190 | |
| 191 | atempo->frag[0].position[0] = 0; |
| 192 | atempo->frag[0].position[1] = 0; |
| 193 | atempo->frag[0].nsamples = 0; |
| 194 | |
| 195 | atempo->frag[1].position[0] = 0; |
| 196 | atempo->frag[1].position[1] = 0; |
| 197 | atempo->frag[1].nsamples = 0; |
| 198 | |
| 199 | // shift left position of 1st fragment by half a window |
| 200 | // so that no re-normalization would be required for |
| 201 | // the left half of the 1st fragment: |
| 202 | atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2); |
| 203 | atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2); |
| 204 | |
| 205 | av_frame_free(&atempo->dst_buffer); |
| 206 | atempo->dst = NULL; |
| 207 | atempo->dst_end = NULL; |
| 208 | |
| 209 | atempo->nsamples_in = 0; |
| 210 | atempo->nsamples_out = 0; |
| 211 | } |
| 212 | |
| 213 | /** |
| 214 | * Reset filter to initial state and deallocate all buffers. |
| 215 | */ |
| 216 | static void yae_release_buffers(ATempoContext *atempo) |
| 217 | { |
| 218 | yae_clear(atempo); |
| 219 | |
| 220 | av_freep(&atempo->frag[0].data); |
| 221 | av_freep(&atempo->frag[1].data); |
| 222 | av_freep(&atempo->frag[0].xdat); |
| 223 | av_freep(&atempo->frag[1].xdat); |
| 224 | |
| 225 | av_freep(&atempo->buffer); |
| 226 | av_freep(&atempo->hann); |
| 227 | av_freep(&atempo->correlation); |
| 228 | |
| 229 | av_rdft_end(atempo->real_to_complex); |
| 230 | atempo->real_to_complex = NULL; |
| 231 | |
| 232 | av_rdft_end(atempo->complex_to_real); |
| 233 | atempo->complex_to_real = NULL; |
| 234 | } |
| 235 | |
| 236 | /* av_realloc is not aligned enough; fortunately, the data does not need to |
| 237 | * be preserved */ |
| 238 | #define RE_MALLOC_OR_FAIL(field, field_size) \ |
| 239 | do { \ |
| 240 | av_freep(&field); \ |
| 241 | field = av_malloc(field_size); \ |
| 242 | if (!field) { \ |
| 243 | yae_release_buffers(atempo); \ |
| 244 | return AVERROR(ENOMEM); \ |
| 245 | } \ |
| 246 | } while (0) |
| 247 | |
| 248 | /** |
| 249 | * Prepare filter for processing audio data of given format, |
| 250 | * sample rate and number of channels. |
| 251 | */ |
| 252 | static int yae_reset(ATempoContext *atempo, |
| 253 | enum AVSampleFormat format, |
| 254 | int sample_rate, |
| 255 | int channels) |
| 256 | { |
| 257 | const int sample_size = av_get_bytes_per_sample(format); |
| 258 | uint32_t nlevels = 0; |
| 259 | uint32_t pot; |
| 260 | int i; |
| 261 | |
| 262 | atempo->format = format; |
| 263 | atempo->channels = channels; |
| 264 | atempo->stride = sample_size * channels; |
| 265 | |
| 266 | // pick a segment window size: |
| 267 | atempo->window = sample_rate / 24; |
| 268 | |
| 269 | // adjust window size to be a power-of-two integer: |
| 270 | nlevels = av_log2(atempo->window); |
| 271 | pot = 1 << nlevels; |
| 272 | av_assert0(pot <= atempo->window); |
| 273 | |
| 274 | if (pot < atempo->window) { |
| 275 | atempo->window = pot * 2; |
| 276 | nlevels++; |
| 277 | } |
| 278 | |
| 279 | // initialize audio fragment buffers: |
| 280 | RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride); |
| 281 | RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride); |
| 282 | RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex)); |
| 283 | RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex)); |
| 284 | |
| 285 | // initialize rDFT contexts: |
| 286 | av_rdft_end(atempo->real_to_complex); |
| 287 | atempo->real_to_complex = NULL; |
| 288 | |
| 289 | av_rdft_end(atempo->complex_to_real); |
| 290 | atempo->complex_to_real = NULL; |
| 291 | |
| 292 | atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C); |
| 293 | if (!atempo->real_to_complex) { |
| 294 | yae_release_buffers(atempo); |
| 295 | return AVERROR(ENOMEM); |
| 296 | } |
| 297 | |
| 298 | atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R); |
| 299 | if (!atempo->complex_to_real) { |
| 300 | yae_release_buffers(atempo); |
| 301 | return AVERROR(ENOMEM); |
| 302 | } |
| 303 | |
| 304 | RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex)); |
| 305 | |
| 306 | atempo->ring = atempo->window * 3; |
| 307 | RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride); |
| 308 | |
| 309 | // initialize the Hann window function: |
| 310 | RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float)); |
| 311 | |
| 312 | for (i = 0; i < atempo->window; i++) { |
| 313 | double t = (double)i / (double)(atempo->window - 1); |
| 314 | double h = 0.5 * (1.0 - cos(2.0 * M_PI * t)); |
| 315 | atempo->hann[i] = (float)h; |
| 316 | } |
| 317 | |
| 318 | yae_clear(atempo); |
| 319 | return 0; |
| 320 | } |
| 321 | |
| 322 | static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo) |
| 323 | { |
| 324 | const AudioFragment *prev; |
| 325 | ATempoContext *atempo = ctx->priv; |
| 326 | char *tail = NULL; |
| 327 | double tempo = av_strtod(arg_tempo, &tail); |
| 328 | |
| 329 | if (tail && *tail) { |
| 330 | av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo); |
| 331 | return AVERROR(EINVAL); |
| 332 | } |
| 333 | |
| 334 | if (tempo < 0.5 || tempo > 2.0) { |
| 335 | av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n", |
| 336 | tempo); |
| 337 | return AVERROR(EINVAL); |
| 338 | } |
| 339 | |
| 340 | prev = yae_prev_frag(atempo); |
| 341 | atempo->origin[0] = prev->position[0] + atempo->window / 2; |
| 342 | atempo->origin[1] = prev->position[1] + atempo->window / 2; |
| 343 | atempo->tempo = tempo; |
| 344 | return 0; |
| 345 | } |
| 346 | |
| 347 | /** |
| 348 | * A helper macro for initializing complex data buffer with scalar data |
| 349 | * of a given type. |
| 350 | */ |
| 351 | #define yae_init_xdat(scalar_type, scalar_max) \ |
| 352 | do { \ |
| 353 | const uint8_t *src_end = src + \ |
| 354 | frag->nsamples * atempo->channels * sizeof(scalar_type); \ |
| 355 | \ |
| 356 | FFTSample *xdat = frag->xdat; \ |
| 357 | scalar_type tmp; \ |
| 358 | \ |
| 359 | if (atempo->channels == 1) { \ |
| 360 | for (; src < src_end; xdat++) { \ |
| 361 | tmp = *(const scalar_type *)src; \ |
| 362 | src += sizeof(scalar_type); \ |
| 363 | \ |
| 364 | *xdat = (FFTSample)tmp; \ |
| 365 | } \ |
| 366 | } else { \ |
| 367 | FFTSample s, max, ti, si; \ |
| 368 | int i; \ |
| 369 | \ |
| 370 | for (; src < src_end; xdat++) { \ |
| 371 | tmp = *(const scalar_type *)src; \ |
| 372 | src += sizeof(scalar_type); \ |
| 373 | \ |
| 374 | max = (FFTSample)tmp; \ |
| 375 | s = FFMIN((FFTSample)scalar_max, \ |
| 376 | (FFTSample)fabsf(max)); \ |
| 377 | \ |
| 378 | for (i = 1; i < atempo->channels; i++) { \ |
| 379 | tmp = *(const scalar_type *)src; \ |
| 380 | src += sizeof(scalar_type); \ |
| 381 | \ |
| 382 | ti = (FFTSample)tmp; \ |
| 383 | si = FFMIN((FFTSample)scalar_max, \ |
| 384 | (FFTSample)fabsf(ti)); \ |
| 385 | \ |
| 386 | if (s < si) { \ |
| 387 | s = si; \ |
| 388 | max = ti; \ |
| 389 | } \ |
| 390 | } \ |
| 391 | \ |
| 392 | *xdat = max; \ |
| 393 | } \ |
| 394 | } \ |
| 395 | } while (0) |
| 396 | |
| 397 | /** |
| 398 | * Initialize complex data buffer of a given audio fragment |
| 399 | * with down-mixed mono data of appropriate scalar type. |
| 400 | */ |
| 401 | static void yae_downmix(ATempoContext *atempo, AudioFragment *frag) |
| 402 | { |
| 403 | // shortcuts: |
| 404 | const uint8_t *src = frag->data; |
| 405 | |
| 406 | // init complex data buffer used for FFT and Correlation: |
| 407 | memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window); |
| 408 | |
| 409 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
| 410 | yae_init_xdat(uint8_t, 127); |
| 411 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
| 412 | yae_init_xdat(int16_t, 32767); |
| 413 | } else if (atempo->format == AV_SAMPLE_FMT_S32) { |
| 414 | yae_init_xdat(int, 2147483647); |
| 415 | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { |
| 416 | yae_init_xdat(float, 1); |
| 417 | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { |
| 418 | yae_init_xdat(double, 1); |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | /** |
| 423 | * Populate the internal data buffer on as-needed basis. |
| 424 | * |
| 425 | * @return |
| 426 | * 0 if requested data was already available or was successfully loaded, |
| 427 | * AVERROR(EAGAIN) if more input data is required. |
| 428 | */ |
| 429 | static int yae_load_data(ATempoContext *atempo, |
| 430 | const uint8_t **src_ref, |
| 431 | const uint8_t *src_end, |
| 432 | int64_t stop_here) |
| 433 | { |
| 434 | // shortcut: |
| 435 | const uint8_t *src = *src_ref; |
| 436 | const int read_size = stop_here - atempo->position[0]; |
| 437 | |
| 438 | if (stop_here <= atempo->position[0]) { |
| 439 | return 0; |
| 440 | } |
| 441 | |
| 442 | // samples are not expected to be skipped: |
| 443 | av_assert0(read_size <= atempo->ring); |
| 444 | |
| 445 | while (atempo->position[0] < stop_here && src < src_end) { |
| 446 | int src_samples = (src_end - src) / atempo->stride; |
| 447 | |
| 448 | // load data piece-wise, in order to avoid complicating the logic: |
| 449 | int nsamples = FFMIN(read_size, src_samples); |
| 450 | int na; |
| 451 | int nb; |
| 452 | |
| 453 | nsamples = FFMIN(nsamples, atempo->ring); |
| 454 | na = FFMIN(nsamples, atempo->ring - atempo->tail); |
| 455 | nb = FFMIN(nsamples - na, atempo->ring); |
| 456 | |
| 457 | if (na) { |
| 458 | uint8_t *a = atempo->buffer + atempo->tail * atempo->stride; |
| 459 | memcpy(a, src, na * atempo->stride); |
| 460 | |
| 461 | src += na * atempo->stride; |
| 462 | atempo->position[0] += na; |
| 463 | |
| 464 | atempo->size = FFMIN(atempo->size + na, atempo->ring); |
| 465 | atempo->tail = (atempo->tail + na) % atempo->ring; |
| 466 | atempo->head = |
| 467 | atempo->size < atempo->ring ? |
| 468 | atempo->tail - atempo->size : |
| 469 | atempo->tail; |
| 470 | } |
| 471 | |
| 472 | if (nb) { |
| 473 | uint8_t *b = atempo->buffer; |
| 474 | memcpy(b, src, nb * atempo->stride); |
| 475 | |
| 476 | src += nb * atempo->stride; |
| 477 | atempo->position[0] += nb; |
| 478 | |
| 479 | atempo->size = FFMIN(atempo->size + nb, atempo->ring); |
| 480 | atempo->tail = (atempo->tail + nb) % atempo->ring; |
| 481 | atempo->head = |
| 482 | atempo->size < atempo->ring ? |
| 483 | atempo->tail - atempo->size : |
| 484 | atempo->tail; |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | // pass back the updated source buffer pointer: |
| 489 | *src_ref = src; |
| 490 | |
| 491 | // sanity check: |
| 492 | av_assert0(atempo->position[0] <= stop_here); |
| 493 | |
| 494 | return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN); |
| 495 | } |
| 496 | |
| 497 | /** |
| 498 | * Populate current audio fragment data buffer. |
| 499 | * |
| 500 | * @return |
| 501 | * 0 when the fragment is ready, |
| 502 | * AVERROR(EAGAIN) if more input data is required. |
| 503 | */ |
| 504 | static int yae_load_frag(ATempoContext *atempo, |
| 505 | const uint8_t **src_ref, |
| 506 | const uint8_t *src_end) |
| 507 | { |
| 508 | // shortcuts: |
| 509 | AudioFragment *frag = yae_curr_frag(atempo); |
| 510 | uint8_t *dst; |
| 511 | int64_t missing, start, zeros; |
| 512 | uint32_t nsamples; |
| 513 | const uint8_t *a, *b; |
| 514 | int i0, i1, n0, n1, na, nb; |
| 515 | |
| 516 | int64_t stop_here = frag->position[0] + atempo->window; |
| 517 | if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) { |
| 518 | return AVERROR(EAGAIN); |
| 519 | } |
| 520 | |
| 521 | // calculate the number of samples we don't have: |
| 522 | missing = |
| 523 | stop_here > atempo->position[0] ? |
| 524 | stop_here - atempo->position[0] : 0; |
| 525 | |
| 526 | nsamples = |
| 527 | missing < (int64_t)atempo->window ? |
| 528 | (uint32_t)(atempo->window - missing) : 0; |
| 529 | |
| 530 | // setup the output buffer: |
| 531 | frag->nsamples = nsamples; |
| 532 | dst = frag->data; |
| 533 | |
| 534 | start = atempo->position[0] - atempo->size; |
| 535 | zeros = 0; |
| 536 | |
| 537 | if (frag->position[0] < start) { |
| 538 | // what we don't have we substitute with zeros: |
| 539 | zeros = FFMIN(start - frag->position[0], (int64_t)nsamples); |
| 540 | av_assert0(zeros != nsamples); |
| 541 | |
| 542 | memset(dst, 0, zeros * atempo->stride); |
| 543 | dst += zeros * atempo->stride; |
| 544 | } |
| 545 | |
| 546 | if (zeros == nsamples) { |
| 547 | return 0; |
| 548 | } |
| 549 | |
| 550 | // get the remaining data from the ring buffer: |
| 551 | na = (atempo->head < atempo->tail ? |
| 552 | atempo->tail - atempo->head : |
| 553 | atempo->ring - atempo->head); |
| 554 | |
| 555 | nb = atempo->head < atempo->tail ? 0 : atempo->tail; |
| 556 | |
| 557 | // sanity check: |
| 558 | av_assert0(nsamples <= zeros + na + nb); |
| 559 | |
| 560 | a = atempo->buffer + atempo->head * atempo->stride; |
| 561 | b = atempo->buffer; |
| 562 | |
| 563 | i0 = frag->position[0] + zeros - start; |
| 564 | i1 = i0 < na ? 0 : i0 - na; |
| 565 | |
| 566 | n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0; |
| 567 | n1 = nsamples - zeros - n0; |
| 568 | |
| 569 | if (n0) { |
| 570 | memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride); |
| 571 | dst += n0 * atempo->stride; |
| 572 | } |
| 573 | |
| 574 | if (n1) { |
| 575 | memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride); |
| 576 | } |
| 577 | |
| 578 | return 0; |
| 579 | } |
| 580 | |
| 581 | /** |
| 582 | * Prepare for loading next audio fragment. |
| 583 | */ |
| 584 | static void yae_advance_to_next_frag(ATempoContext *atempo) |
| 585 | { |
| 586 | const double fragment_step = atempo->tempo * (double)(atempo->window / 2); |
| 587 | |
| 588 | const AudioFragment *prev; |
| 589 | AudioFragment *frag; |
| 590 | |
| 591 | atempo->nfrag++; |
| 592 | prev = yae_prev_frag(atempo); |
| 593 | frag = yae_curr_frag(atempo); |
| 594 | |
| 595 | frag->position[0] = prev->position[0] + (int64_t)fragment_step; |
| 596 | frag->position[1] = prev->position[1] + atempo->window / 2; |
| 597 | frag->nsamples = 0; |
| 598 | } |
| 599 | |
| 600 | /** |
| 601 | * Calculate cross-correlation via rDFT. |
| 602 | * |
| 603 | * Multiply two vectors of complex numbers (result of real_to_complex rDFT) |
| 604 | * and transform back via complex_to_real rDFT. |
| 605 | */ |
| 606 | static void yae_xcorr_via_rdft(FFTSample *xcorr, |
| 607 | RDFTContext *complex_to_real, |
| 608 | const FFTComplex *xa, |
| 609 | const FFTComplex *xb, |
| 610 | const int window) |
| 611 | { |
| 612 | FFTComplex *xc = (FFTComplex *)xcorr; |
| 613 | int i; |
| 614 | |
| 615 | // NOTE: first element requires special care -- Given Y = rDFT(X), |
| 616 | // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc |
| 617 | // stores Re(Y[N/2]) in place of Im(Y[0]). |
| 618 | |
| 619 | xc->re = xa->re * xb->re; |
| 620 | xc->im = xa->im * xb->im; |
| 621 | xa++; |
| 622 | xb++; |
| 623 | xc++; |
| 624 | |
| 625 | for (i = 1; i < window; i++, xa++, xb++, xc++) { |
| 626 | xc->re = (xa->re * xb->re + xa->im * xb->im); |
| 627 | xc->im = (xa->im * xb->re - xa->re * xb->im); |
| 628 | } |
| 629 | |
| 630 | // apply inverse rDFT: |
| 631 | av_rdft_calc(complex_to_real, xcorr); |
| 632 | } |
| 633 | |
| 634 | /** |
| 635 | * Calculate alignment offset for given fragment |
| 636 | * relative to the previous fragment. |
| 637 | * |
| 638 | * @return alignment offset of current fragment relative to previous. |
| 639 | */ |
| 640 | static int yae_align(AudioFragment *frag, |
| 641 | const AudioFragment *prev, |
| 642 | const int window, |
| 643 | const int delta_max, |
| 644 | const int drift, |
| 645 | FFTSample *correlation, |
| 646 | RDFTContext *complex_to_real) |
| 647 | { |
| 648 | int best_offset = -drift; |
| 649 | FFTSample best_metric = -FLT_MAX; |
| 650 | FFTSample *xcorr; |
| 651 | |
| 652 | int i0; |
| 653 | int i1; |
| 654 | int i; |
| 655 | |
| 656 | yae_xcorr_via_rdft(correlation, |
| 657 | complex_to_real, |
| 658 | (const FFTComplex *)prev->xdat, |
| 659 | (const FFTComplex *)frag->xdat, |
| 660 | window); |
| 661 | |
| 662 | // identify search window boundaries: |
| 663 | i0 = FFMAX(window / 2 - delta_max - drift, 0); |
| 664 | i0 = FFMIN(i0, window); |
| 665 | |
| 666 | i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16); |
| 667 | i1 = FFMAX(i1, 0); |
| 668 | |
| 669 | // identify cross-correlation peaks within search window: |
| 670 | xcorr = correlation + i0; |
| 671 | |
| 672 | for (i = i0; i < i1; i++, xcorr++) { |
| 673 | FFTSample metric = *xcorr; |
| 674 | |
| 675 | // normalize: |
| 676 | FFTSample drifti = (FFTSample)(drift + i); |
| 677 | metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i); |
| 678 | |
| 679 | if (metric > best_metric) { |
| 680 | best_metric = metric; |
| 681 | best_offset = i - window / 2; |
| 682 | } |
| 683 | } |
| 684 | |
| 685 | return best_offset; |
| 686 | } |
| 687 | |
| 688 | /** |
| 689 | * Adjust current fragment position for better alignment |
| 690 | * with previous fragment. |
| 691 | * |
| 692 | * @return alignment correction. |
| 693 | */ |
| 694 | static int yae_adjust_position(ATempoContext *atempo) |
| 695 | { |
| 696 | const AudioFragment *prev = yae_prev_frag(atempo); |
| 697 | AudioFragment *frag = yae_curr_frag(atempo); |
| 698 | |
| 699 | const double prev_output_position = |
| 700 | (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2); |
| 701 | |
| 702 | const double ideal_output_position = |
| 703 | (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2) / |
| 704 | atempo->tempo; |
| 705 | |
| 706 | const int drift = (int)(prev_output_position - ideal_output_position); |
| 707 | |
| 708 | const int delta_max = atempo->window / 2; |
| 709 | const int correction = yae_align(frag, |
| 710 | prev, |
| 711 | atempo->window, |
| 712 | delta_max, |
| 713 | drift, |
| 714 | atempo->correlation, |
| 715 | atempo->complex_to_real); |
| 716 | |
| 717 | if (correction) { |
| 718 | // adjust fragment position: |
| 719 | frag->position[0] -= correction; |
| 720 | |
| 721 | // clear so that the fragment can be reloaded: |
| 722 | frag->nsamples = 0; |
| 723 | } |
| 724 | |
| 725 | return correction; |
| 726 | } |
| 727 | |
| 728 | /** |
| 729 | * A helper macro for blending the overlap region of previous |
| 730 | * and current audio fragment. |
| 731 | */ |
| 732 | #define yae_blend(scalar_type) \ |
| 733 | do { \ |
| 734 | const scalar_type *aaa = (const scalar_type *)a; \ |
| 735 | const scalar_type *bbb = (const scalar_type *)b; \ |
| 736 | \ |
| 737 | scalar_type *out = (scalar_type *)dst; \ |
| 738 | scalar_type *out_end = (scalar_type *)dst_end; \ |
| 739 | int64_t i; \ |
| 740 | \ |
| 741 | for (i = 0; i < overlap && out < out_end; \ |
| 742 | i++, atempo->position[1]++, wa++, wb++) { \ |
| 743 | float w0 = *wa; \ |
| 744 | float w1 = *wb; \ |
| 745 | int j; \ |
| 746 | \ |
| 747 | for (j = 0; j < atempo->channels; \ |
| 748 | j++, aaa++, bbb++, out++) { \ |
| 749 | float t0 = (float)*aaa; \ |
| 750 | float t1 = (float)*bbb; \ |
| 751 | \ |
| 752 | *out = \ |
| 753 | frag->position[0] + i < 0 ? \ |
| 754 | *aaa : \ |
| 755 | (scalar_type)(t0 * w0 + t1 * w1); \ |
| 756 | } \ |
| 757 | } \ |
| 758 | dst = (uint8_t *)out; \ |
| 759 | } while (0) |
| 760 | |
| 761 | /** |
| 762 | * Blend the overlap region of previous and current audio fragment |
| 763 | * and output the results to the given destination buffer. |
| 764 | * |
| 765 | * @return |
| 766 | * 0 if the overlap region was completely stored in the dst buffer, |
| 767 | * AVERROR(EAGAIN) if more destination buffer space is required. |
| 768 | */ |
| 769 | static int yae_overlap_add(ATempoContext *atempo, |
| 770 | uint8_t **dst_ref, |
| 771 | uint8_t *dst_end) |
| 772 | { |
| 773 | // shortcuts: |
| 774 | const AudioFragment *prev = yae_prev_frag(atempo); |
| 775 | const AudioFragment *frag = yae_curr_frag(atempo); |
| 776 | |
| 777 | const int64_t start_here = FFMAX(atempo->position[1], |
| 778 | frag->position[1]); |
| 779 | |
| 780 | const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples, |
| 781 | frag->position[1] + frag->nsamples); |
| 782 | |
| 783 | const int64_t overlap = stop_here - start_here; |
| 784 | |
| 785 | const int64_t ia = start_here - prev->position[1]; |
| 786 | const int64_t ib = start_here - frag->position[1]; |
| 787 | |
| 788 | const float *wa = atempo->hann + ia; |
| 789 | const float *wb = atempo->hann + ib; |
| 790 | |
| 791 | const uint8_t *a = prev->data + ia * atempo->stride; |
| 792 | const uint8_t *b = frag->data + ib * atempo->stride; |
| 793 | |
| 794 | uint8_t *dst = *dst_ref; |
| 795 | |
| 796 | av_assert0(start_here <= stop_here && |
| 797 | frag->position[1] <= start_here && |
| 798 | overlap <= frag->nsamples); |
| 799 | |
| 800 | if (atempo->format == AV_SAMPLE_FMT_U8) { |
| 801 | yae_blend(uint8_t); |
| 802 | } else if (atempo->format == AV_SAMPLE_FMT_S16) { |
| 803 | yae_blend(int16_t); |
| 804 | } else if (atempo->format == AV_SAMPLE_FMT_S32) { |
| 805 | yae_blend(int); |
| 806 | } else if (atempo->format == AV_SAMPLE_FMT_FLT) { |
| 807 | yae_blend(float); |
| 808 | } else if (atempo->format == AV_SAMPLE_FMT_DBL) { |
| 809 | yae_blend(double); |
| 810 | } |
| 811 | |
| 812 | // pass-back the updated destination buffer pointer: |
| 813 | *dst_ref = dst; |
| 814 | |
| 815 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
| 816 | } |
| 817 | |
| 818 | /** |
| 819 | * Feed as much data to the filter as it is able to consume |
| 820 | * and receive as much processed data in the destination buffer |
| 821 | * as it is able to produce or store. |
| 822 | */ |
| 823 | static void |
| 824 | yae_apply(ATempoContext *atempo, |
| 825 | const uint8_t **src_ref, |
| 826 | const uint8_t *src_end, |
| 827 | uint8_t **dst_ref, |
| 828 | uint8_t *dst_end) |
| 829 | { |
| 830 | while (1) { |
| 831 | if (atempo->state == YAE_LOAD_FRAGMENT) { |
| 832 | // load additional data for the current fragment: |
| 833 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
| 834 | break; |
| 835 | } |
| 836 | |
| 837 | // down-mix to mono: |
| 838 | yae_downmix(atempo, yae_curr_frag(atempo)); |
| 839 | |
| 840 | // apply rDFT: |
| 841 | av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); |
| 842 | |
| 843 | // must load the second fragment before alignment can start: |
| 844 | if (!atempo->nfrag) { |
| 845 | yae_advance_to_next_frag(atempo); |
| 846 | continue; |
| 847 | } |
| 848 | |
| 849 | atempo->state = YAE_ADJUST_POSITION; |
| 850 | } |
| 851 | |
| 852 | if (atempo->state == YAE_ADJUST_POSITION) { |
| 853 | // adjust position for better alignment: |
| 854 | if (yae_adjust_position(atempo)) { |
| 855 | // reload the fragment at the corrected position, so that the |
| 856 | // Hann window blending would not require normalization: |
| 857 | atempo->state = YAE_RELOAD_FRAGMENT; |
| 858 | } else { |
| 859 | atempo->state = YAE_OUTPUT_OVERLAP_ADD; |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | if (atempo->state == YAE_RELOAD_FRAGMENT) { |
| 864 | // load additional data if necessary due to position adjustment: |
| 865 | if (yae_load_frag(atempo, src_ref, src_end) != 0) { |
| 866 | break; |
| 867 | } |
| 868 | |
| 869 | // down-mix to mono: |
| 870 | yae_downmix(atempo, yae_curr_frag(atempo)); |
| 871 | |
| 872 | // apply rDFT: |
| 873 | av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); |
| 874 | |
| 875 | atempo->state = YAE_OUTPUT_OVERLAP_ADD; |
| 876 | } |
| 877 | |
| 878 | if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) { |
| 879 | // overlap-add and output the result: |
| 880 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
| 881 | break; |
| 882 | } |
| 883 | |
| 884 | // advance to the next fragment, repeat: |
| 885 | yae_advance_to_next_frag(atempo); |
| 886 | atempo->state = YAE_LOAD_FRAGMENT; |
| 887 | } |
| 888 | } |
| 889 | } |
| 890 | |
| 891 | /** |
| 892 | * Flush any buffered data from the filter. |
| 893 | * |
| 894 | * @return |
| 895 | * 0 if all data was completely stored in the dst buffer, |
| 896 | * AVERROR(EAGAIN) if more destination buffer space is required. |
| 897 | */ |
| 898 | static int yae_flush(ATempoContext *atempo, |
| 899 | uint8_t **dst_ref, |
| 900 | uint8_t *dst_end) |
| 901 | { |
| 902 | AudioFragment *frag = yae_curr_frag(atempo); |
| 903 | int64_t overlap_end; |
| 904 | int64_t start_here; |
| 905 | int64_t stop_here; |
| 906 | int64_t offset; |
| 907 | |
| 908 | const uint8_t *src; |
| 909 | uint8_t *dst; |
| 910 | |
| 911 | int src_size; |
| 912 | int dst_size; |
| 913 | int nbytes; |
| 914 | |
| 915 | atempo->state = YAE_FLUSH_OUTPUT; |
| 916 | |
| 917 | if (atempo->position[0] == frag->position[0] + frag->nsamples && |
| 918 | atempo->position[1] == frag->position[1] + frag->nsamples) { |
| 919 | // the current fragment is already flushed: |
| 920 | return 0; |
| 921 | } |
| 922 | |
| 923 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
| 924 | // finish loading the current (possibly partial) fragment: |
| 925 | yae_load_frag(atempo, NULL, NULL); |
| 926 | |
| 927 | if (atempo->nfrag) { |
| 928 | // down-mix to mono: |
| 929 | yae_downmix(atempo, frag); |
| 930 | |
| 931 | // apply rDFT: |
| 932 | av_rdft_calc(atempo->real_to_complex, frag->xdat); |
| 933 | |
| 934 | // align current fragment to previous fragment: |
| 935 | if (yae_adjust_position(atempo)) { |
| 936 | // reload the current fragment due to adjusted position: |
| 937 | yae_load_frag(atempo, NULL, NULL); |
| 938 | } |
| 939 | } |
| 940 | } |
| 941 | |
| 942 | // flush the overlap region: |
| 943 | overlap_end = frag->position[1] + FFMIN(atempo->window / 2, |
| 944 | frag->nsamples); |
| 945 | |
| 946 | while (atempo->position[1] < overlap_end) { |
| 947 | if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) { |
| 948 | return AVERROR(EAGAIN); |
| 949 | } |
| 950 | } |
| 951 | |
| 952 | // check whether all of the input samples have been consumed: |
| 953 | if (frag->position[0] + frag->nsamples < atempo->position[0]) { |
| 954 | yae_advance_to_next_frag(atempo); |
| 955 | return AVERROR(EAGAIN); |
| 956 | } |
| 957 | |
| 958 | // flush the remainder of the current fragment: |
| 959 | start_here = FFMAX(atempo->position[1], overlap_end); |
| 960 | stop_here = frag->position[1] + frag->nsamples; |
| 961 | offset = start_here - frag->position[1]; |
| 962 | av_assert0(start_here <= stop_here && frag->position[1] <= start_here); |
| 963 | |
| 964 | src = frag->data + offset * atempo->stride; |
| 965 | dst = (uint8_t *)*dst_ref; |
| 966 | |
| 967 | src_size = (int)(stop_here - start_here) * atempo->stride; |
| 968 | dst_size = dst_end - dst; |
| 969 | nbytes = FFMIN(src_size, dst_size); |
| 970 | |
| 971 | memcpy(dst, src, nbytes); |
| 972 | dst += nbytes; |
| 973 | |
| 974 | atempo->position[1] += (nbytes / atempo->stride); |
| 975 | |
| 976 | // pass-back the updated destination buffer pointer: |
| 977 | *dst_ref = (uint8_t *)dst; |
| 978 | |
| 979 | return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN); |
| 980 | } |
| 981 | |
| 982 | static av_cold int init(AVFilterContext *ctx) |
| 983 | { |
| 984 | ATempoContext *atempo = ctx->priv; |
| 985 | atempo->format = AV_SAMPLE_FMT_NONE; |
| 986 | atempo->state = YAE_LOAD_FRAGMENT; |
| 987 | return 0; |
| 988 | } |
| 989 | |
| 990 | static av_cold void uninit(AVFilterContext *ctx) |
| 991 | { |
| 992 | ATempoContext *atempo = ctx->priv; |
| 993 | yae_release_buffers(atempo); |
| 994 | } |
| 995 | |
| 996 | static int query_formats(AVFilterContext *ctx) |
| 997 | { |
| 998 | AVFilterChannelLayouts *layouts = NULL; |
| 999 | AVFilterFormats *formats = NULL; |
| 1000 | |
| 1001 | // WSOLA necessitates an internal sliding window ring buffer |
| 1002 | // for incoming audio stream. |
| 1003 | // |
| 1004 | // Planar sample formats are too cumbersome to store in a ring buffer, |
| 1005 | // therefore planar sample formats are not supported. |
| 1006 | // |
| 1007 | static const enum AVSampleFormat sample_fmts[] = { |
| 1008 | AV_SAMPLE_FMT_U8, |
| 1009 | AV_SAMPLE_FMT_S16, |
| 1010 | AV_SAMPLE_FMT_S32, |
| 1011 | AV_SAMPLE_FMT_FLT, |
| 1012 | AV_SAMPLE_FMT_DBL, |
| 1013 | AV_SAMPLE_FMT_NONE |
| 1014 | }; |
| 1015 | |
| 1016 | layouts = ff_all_channel_layouts(); |
| 1017 | if (!layouts) { |
| 1018 | return AVERROR(ENOMEM); |
| 1019 | } |
| 1020 | ff_set_common_channel_layouts(ctx, layouts); |
| 1021 | |
| 1022 | formats = ff_make_format_list(sample_fmts); |
| 1023 | if (!formats) { |
| 1024 | return AVERROR(ENOMEM); |
| 1025 | } |
| 1026 | ff_set_common_formats(ctx, formats); |
| 1027 | |
| 1028 | formats = ff_all_samplerates(); |
| 1029 | if (!formats) { |
| 1030 | return AVERROR(ENOMEM); |
| 1031 | } |
| 1032 | ff_set_common_samplerates(ctx, formats); |
| 1033 | |
| 1034 | return 0; |
| 1035 | } |
| 1036 | |
| 1037 | static int config_props(AVFilterLink *inlink) |
| 1038 | { |
| 1039 | AVFilterContext *ctx = inlink->dst; |
| 1040 | ATempoContext *atempo = ctx->priv; |
| 1041 | |
| 1042 | enum AVSampleFormat format = inlink->format; |
| 1043 | int sample_rate = (int)inlink->sample_rate; |
| 1044 | int channels = av_get_channel_layout_nb_channels(inlink->channel_layout); |
| 1045 | |
| 1046 | ctx->outputs[0]->flags |= FF_LINK_FLAG_REQUEST_LOOP; |
| 1047 | |
| 1048 | return yae_reset(atempo, format, sample_rate, channels); |
| 1049 | } |
| 1050 | |
| 1051 | static int push_samples(ATempoContext *atempo, |
| 1052 | AVFilterLink *outlink, |
| 1053 | int n_out) |
| 1054 | { |
| 1055 | int ret; |
| 1056 | |
| 1057 | atempo->dst_buffer->sample_rate = outlink->sample_rate; |
| 1058 | atempo->dst_buffer->nb_samples = n_out; |
| 1059 | |
| 1060 | // adjust the PTS: |
| 1061 | atempo->dst_buffer->pts = |
| 1062 | av_rescale_q(atempo->nsamples_out, |
| 1063 | (AVRational){ 1, outlink->sample_rate }, |
| 1064 | outlink->time_base); |
| 1065 | |
| 1066 | ret = ff_filter_frame(outlink, atempo->dst_buffer); |
| 1067 | atempo->dst_buffer = NULL; |
| 1068 | atempo->dst = NULL; |
| 1069 | atempo->dst_end = NULL; |
| 1070 | if (ret < 0) |
| 1071 | return ret; |
| 1072 | |
| 1073 | atempo->nsamples_out += n_out; |
| 1074 | return 0; |
| 1075 | } |
| 1076 | |
| 1077 | static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer) |
| 1078 | { |
| 1079 | AVFilterContext *ctx = inlink->dst; |
| 1080 | ATempoContext *atempo = ctx->priv; |
| 1081 | AVFilterLink *outlink = ctx->outputs[0]; |
| 1082 | |
| 1083 | int ret = 0; |
| 1084 | int n_in = src_buffer->nb_samples; |
| 1085 | int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo); |
| 1086 | |
| 1087 | const uint8_t *src = src_buffer->data[0]; |
| 1088 | const uint8_t *src_end = src + n_in * atempo->stride; |
| 1089 | |
| 1090 | while (src < src_end) { |
| 1091 | if (!atempo->dst_buffer) { |
| 1092 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out); |
| 1093 | if (!atempo->dst_buffer) |
| 1094 | return AVERROR(ENOMEM); |
| 1095 | av_frame_copy_props(atempo->dst_buffer, src_buffer); |
| 1096 | |
| 1097 | atempo->dst = atempo->dst_buffer->data[0]; |
| 1098 | atempo->dst_end = atempo->dst + n_out * atempo->stride; |
| 1099 | } |
| 1100 | |
| 1101 | yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end); |
| 1102 | |
| 1103 | if (atempo->dst == atempo->dst_end) { |
| 1104 | int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) / |
| 1105 | atempo->stride); |
| 1106 | ret = push_samples(atempo, outlink, n_samples); |
| 1107 | if (ret < 0) |
| 1108 | goto end; |
| 1109 | } |
| 1110 | } |
| 1111 | |
| 1112 | atempo->nsamples_in += n_in; |
| 1113 | end: |
| 1114 | av_frame_free(&src_buffer); |
| 1115 | return ret; |
| 1116 | } |
| 1117 | |
| 1118 | static int request_frame(AVFilterLink *outlink) |
| 1119 | { |
| 1120 | AVFilterContext *ctx = outlink->src; |
| 1121 | ATempoContext *atempo = ctx->priv; |
| 1122 | int ret; |
| 1123 | |
| 1124 | ret = ff_request_frame(ctx->inputs[0]); |
| 1125 | |
| 1126 | if (ret == AVERROR_EOF) { |
| 1127 | // flush the filter: |
| 1128 | int n_max = atempo->ring; |
| 1129 | int n_out; |
| 1130 | int err = AVERROR(EAGAIN); |
| 1131 | |
| 1132 | while (err == AVERROR(EAGAIN)) { |
| 1133 | if (!atempo->dst_buffer) { |
| 1134 | atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max); |
| 1135 | if (!atempo->dst_buffer) |
| 1136 | return AVERROR(ENOMEM); |
| 1137 | |
| 1138 | atempo->dst = atempo->dst_buffer->data[0]; |
| 1139 | atempo->dst_end = atempo->dst + n_max * atempo->stride; |
| 1140 | } |
| 1141 | |
| 1142 | err = yae_flush(atempo, &atempo->dst, atempo->dst_end); |
| 1143 | |
| 1144 | n_out = ((atempo->dst - atempo->dst_buffer->data[0]) / |
| 1145 | atempo->stride); |
| 1146 | |
| 1147 | if (n_out) { |
| 1148 | ret = push_samples(atempo, outlink, n_out); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | av_frame_free(&atempo->dst_buffer); |
| 1153 | atempo->dst = NULL; |
| 1154 | atempo->dst_end = NULL; |
| 1155 | |
| 1156 | return AVERROR_EOF; |
| 1157 | } |
| 1158 | |
| 1159 | return ret; |
| 1160 | } |
| 1161 | |
| 1162 | static int process_command(AVFilterContext *ctx, |
| 1163 | const char *cmd, |
| 1164 | const char *arg, |
| 1165 | char *res, |
| 1166 | int res_len, |
| 1167 | int flags) |
| 1168 | { |
| 1169 | return !strcmp(cmd, "tempo") ? yae_set_tempo(ctx, arg) : AVERROR(ENOSYS); |
| 1170 | } |
| 1171 | |
| 1172 | static const AVFilterPad atempo_inputs[] = { |
| 1173 | { |
| 1174 | .name = "default", |
| 1175 | .type = AVMEDIA_TYPE_AUDIO, |
| 1176 | .filter_frame = filter_frame, |
| 1177 | .config_props = config_props, |
| 1178 | }, |
| 1179 | { NULL } |
| 1180 | }; |
| 1181 | |
| 1182 | static const AVFilterPad atempo_outputs[] = { |
| 1183 | { |
| 1184 | .name = "default", |
| 1185 | .request_frame = request_frame, |
| 1186 | .type = AVMEDIA_TYPE_AUDIO, |
| 1187 | }, |
| 1188 | { NULL } |
| 1189 | }; |
| 1190 | |
| 1191 | AVFilter ff_af_atempo = { |
| 1192 | .name = "atempo", |
| 1193 | .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."), |
| 1194 | .init = init, |
| 1195 | .uninit = uninit, |
| 1196 | .query_formats = query_formats, |
| 1197 | .process_command = process_command, |
| 1198 | .priv_size = sizeof(ATempoContext), |
| 1199 | .priv_class = &atempo_class, |
| 1200 | .inputs = atempo_inputs, |
| 1201 | .outputs = atempo_outputs, |
| 1202 | }; |