| 1 | /* |
| 2 | * This file is part of FFmpeg. |
| 3 | * |
| 4 | * FFmpeg is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU Lesser General Public |
| 6 | * License as published by the Free Software Foundation; either |
| 7 | * version 2.1 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * FFmpeg is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | * Lesser General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU Lesser General Public |
| 15 | * License along with FFmpeg; if not, write to the Free Software |
| 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #include <stdint.h> |
| 20 | |
| 21 | #include "libavresample/avresample.h" |
| 22 | #include "libavutil/attributes.h" |
| 23 | #include "libavutil/audio_fifo.h" |
| 24 | #include "libavutil/common.h" |
| 25 | #include "libavutil/mathematics.h" |
| 26 | #include "libavutil/opt.h" |
| 27 | #include "libavutil/samplefmt.h" |
| 28 | |
| 29 | #include "audio.h" |
| 30 | #include "avfilter.h" |
| 31 | #include "internal.h" |
| 32 | |
| 33 | typedef struct ASyncContext { |
| 34 | const AVClass *class; |
| 35 | |
| 36 | AVAudioResampleContext *avr; |
| 37 | int64_t pts; ///< timestamp in samples of the first sample in fifo |
| 38 | int min_delta; ///< pad/trim min threshold in samples |
| 39 | int first_frame; ///< 1 until filter_frame() has processed at least 1 frame with a pts != AV_NOPTS_VALUE |
| 40 | int64_t first_pts; ///< user-specified first expected pts, in samples |
| 41 | int comp; ///< current resample compensation |
| 42 | |
| 43 | /* options */ |
| 44 | int resample; |
| 45 | float min_delta_sec; |
| 46 | int max_comp; |
| 47 | |
| 48 | /* set by filter_frame() to signal an output frame to request_frame() */ |
| 49 | int got_output; |
| 50 | } ASyncContext; |
| 51 | |
| 52 | #define OFFSET(x) offsetof(ASyncContext, x) |
| 53 | #define A AV_OPT_FLAG_AUDIO_PARAM |
| 54 | #define F AV_OPT_FLAG_FILTERING_PARAM |
| 55 | static const AVOption asyncts_options[] = { |
| 56 | { "compensate", "Stretch/squeeze the data to make it match the timestamps", OFFSET(resample), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, A|F }, |
| 57 | { "min_delta", "Minimum difference between timestamps and audio data " |
| 58 | "(in seconds) to trigger padding/trimmin the data.", OFFSET(min_delta_sec), AV_OPT_TYPE_FLOAT, { .dbl = 0.1 }, 0, INT_MAX, A|F }, |
| 59 | { "max_comp", "Maximum compensation in samples per second.", OFFSET(max_comp), AV_OPT_TYPE_INT, { .i64 = 500 }, 0, INT_MAX, A|F }, |
| 60 | { "first_pts", "Assume the first pts should be this value.", OFFSET(first_pts), AV_OPT_TYPE_INT64, { .i64 = AV_NOPTS_VALUE }, INT64_MIN, INT64_MAX, A|F }, |
| 61 | { NULL } |
| 62 | }; |
| 63 | |
| 64 | AVFILTER_DEFINE_CLASS(asyncts); |
| 65 | |
| 66 | static av_cold int init(AVFilterContext *ctx) |
| 67 | { |
| 68 | ASyncContext *s = ctx->priv; |
| 69 | |
| 70 | s->pts = AV_NOPTS_VALUE; |
| 71 | s->first_frame = 1; |
| 72 | |
| 73 | return 0; |
| 74 | } |
| 75 | |
| 76 | static av_cold void uninit(AVFilterContext *ctx) |
| 77 | { |
| 78 | ASyncContext *s = ctx->priv; |
| 79 | |
| 80 | if (s->avr) { |
| 81 | avresample_close(s->avr); |
| 82 | avresample_free(&s->avr); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static int config_props(AVFilterLink *link) |
| 87 | { |
| 88 | ASyncContext *s = link->src->priv; |
| 89 | int ret; |
| 90 | |
| 91 | s->min_delta = s->min_delta_sec * link->sample_rate; |
| 92 | link->time_base = (AVRational){1, link->sample_rate}; |
| 93 | |
| 94 | s->avr = avresample_alloc_context(); |
| 95 | if (!s->avr) |
| 96 | return AVERROR(ENOMEM); |
| 97 | |
| 98 | av_opt_set_int(s->avr, "in_channel_layout", link->channel_layout, 0); |
| 99 | av_opt_set_int(s->avr, "out_channel_layout", link->channel_layout, 0); |
| 100 | av_opt_set_int(s->avr, "in_sample_fmt", link->format, 0); |
| 101 | av_opt_set_int(s->avr, "out_sample_fmt", link->format, 0); |
| 102 | av_opt_set_int(s->avr, "in_sample_rate", link->sample_rate, 0); |
| 103 | av_opt_set_int(s->avr, "out_sample_rate", link->sample_rate, 0); |
| 104 | |
| 105 | if (s->resample) |
| 106 | av_opt_set_int(s->avr, "force_resampling", 1, 0); |
| 107 | |
| 108 | if ((ret = avresample_open(s->avr)) < 0) |
| 109 | return ret; |
| 110 | |
| 111 | return 0; |
| 112 | } |
| 113 | |
| 114 | /* get amount of data currently buffered, in samples */ |
| 115 | static int64_t get_delay(ASyncContext *s) |
| 116 | { |
| 117 | return avresample_available(s->avr) + avresample_get_delay(s->avr); |
| 118 | } |
| 119 | |
| 120 | static void handle_trimming(AVFilterContext *ctx) |
| 121 | { |
| 122 | ASyncContext *s = ctx->priv; |
| 123 | |
| 124 | if (s->pts < s->first_pts) { |
| 125 | int delta = FFMIN(s->first_pts - s->pts, avresample_available(s->avr)); |
| 126 | av_log(ctx, AV_LOG_VERBOSE, "Trimming %d samples from start\n", |
| 127 | delta); |
| 128 | avresample_read(s->avr, NULL, delta); |
| 129 | s->pts += delta; |
| 130 | } else if (s->first_frame) |
| 131 | s->pts = s->first_pts; |
| 132 | } |
| 133 | |
| 134 | static int request_frame(AVFilterLink *link) |
| 135 | { |
| 136 | AVFilterContext *ctx = link->src; |
| 137 | ASyncContext *s = ctx->priv; |
| 138 | int ret = 0; |
| 139 | int nb_samples; |
| 140 | |
| 141 | s->got_output = 0; |
| 142 | while (ret >= 0 && !s->got_output) |
| 143 | ret = ff_request_frame(ctx->inputs[0]); |
| 144 | |
| 145 | /* flush the fifo */ |
| 146 | if (ret == AVERROR_EOF) { |
| 147 | if (s->first_pts != AV_NOPTS_VALUE) |
| 148 | handle_trimming(ctx); |
| 149 | |
| 150 | if (nb_samples = get_delay(s)) { |
| 151 | AVFrame *buf = ff_get_audio_buffer(link, nb_samples); |
| 152 | if (!buf) |
| 153 | return AVERROR(ENOMEM); |
| 154 | ret = avresample_convert(s->avr, buf->extended_data, |
| 155 | buf->linesize[0], nb_samples, NULL, 0, 0); |
| 156 | if (ret <= 0) { |
| 157 | av_frame_free(&buf); |
| 158 | return (ret < 0) ? ret : AVERROR_EOF; |
| 159 | } |
| 160 | |
| 161 | buf->pts = s->pts; |
| 162 | return ff_filter_frame(link, buf); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | return ret; |
| 167 | } |
| 168 | |
| 169 | static int write_to_fifo(ASyncContext *s, AVFrame *buf) |
| 170 | { |
| 171 | int ret = avresample_convert(s->avr, NULL, 0, 0, buf->extended_data, |
| 172 | buf->linesize[0], buf->nb_samples); |
| 173 | av_frame_free(&buf); |
| 174 | return ret; |
| 175 | } |
| 176 | |
| 177 | static int filter_frame(AVFilterLink *inlink, AVFrame *buf) |
| 178 | { |
| 179 | AVFilterContext *ctx = inlink->dst; |
| 180 | ASyncContext *s = ctx->priv; |
| 181 | AVFilterLink *outlink = ctx->outputs[0]; |
| 182 | int nb_channels = av_get_channel_layout_nb_channels(buf->channel_layout); |
| 183 | int64_t pts = (buf->pts == AV_NOPTS_VALUE) ? buf->pts : |
| 184 | av_rescale_q(buf->pts, inlink->time_base, outlink->time_base); |
| 185 | int out_size, ret; |
| 186 | int64_t delta; |
| 187 | int64_t new_pts; |
| 188 | |
| 189 | /* buffer data until we get the next timestamp */ |
| 190 | if (s->pts == AV_NOPTS_VALUE || pts == AV_NOPTS_VALUE) { |
| 191 | if (pts != AV_NOPTS_VALUE) { |
| 192 | s->pts = pts - get_delay(s); |
| 193 | } |
| 194 | return write_to_fifo(s, buf); |
| 195 | } |
| 196 | |
| 197 | if (s->first_pts != AV_NOPTS_VALUE) { |
| 198 | handle_trimming(ctx); |
| 199 | if (!avresample_available(s->avr)) |
| 200 | return write_to_fifo(s, buf); |
| 201 | } |
| 202 | |
| 203 | /* when we have two timestamps, compute how many samples would we have |
| 204 | * to add/remove to get proper sync between data and timestamps */ |
| 205 | delta = pts - s->pts - get_delay(s); |
| 206 | out_size = avresample_available(s->avr); |
| 207 | |
| 208 | if (labs(delta) > s->min_delta || |
| 209 | (s->first_frame && delta && s->first_pts != AV_NOPTS_VALUE)) { |
| 210 | av_log(ctx, AV_LOG_VERBOSE, "Discontinuity - %"PRId64" samples.\n", delta); |
| 211 | out_size = av_clipl_int32((int64_t)out_size + delta); |
| 212 | } else { |
| 213 | if (s->resample) { |
| 214 | // adjust the compensation if delta is non-zero |
| 215 | int delay = get_delay(s); |
| 216 | int comp = s->comp + av_clip(delta * inlink->sample_rate / delay, |
| 217 | -s->max_comp, s->max_comp); |
| 218 | if (comp != s->comp) { |
| 219 | av_log(ctx, AV_LOG_VERBOSE, "Compensating %d samples per second.\n", comp); |
| 220 | if (avresample_set_compensation(s->avr, comp, inlink->sample_rate) == 0) { |
| 221 | s->comp = comp; |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | // adjust PTS to avoid monotonicity errors with input PTS jitter |
| 226 | pts -= delta; |
| 227 | delta = 0; |
| 228 | } |
| 229 | |
| 230 | if (out_size > 0) { |
| 231 | AVFrame *buf_out = ff_get_audio_buffer(outlink, out_size); |
| 232 | if (!buf_out) { |
| 233 | ret = AVERROR(ENOMEM); |
| 234 | goto fail; |
| 235 | } |
| 236 | |
| 237 | if (s->first_frame && delta > 0) { |
| 238 | int planar = av_sample_fmt_is_planar(buf_out->format); |
| 239 | int planes = planar ? nb_channels : 1; |
| 240 | int block_size = av_get_bytes_per_sample(buf_out->format) * |
| 241 | (planar ? 1 : nb_channels); |
| 242 | |
| 243 | int ch; |
| 244 | |
| 245 | av_samples_set_silence(buf_out->extended_data, 0, delta, |
| 246 | nb_channels, buf->format); |
| 247 | |
| 248 | for (ch = 0; ch < planes; ch++) |
| 249 | buf_out->extended_data[ch] += delta * block_size; |
| 250 | |
| 251 | avresample_read(s->avr, buf_out->extended_data, out_size); |
| 252 | |
| 253 | for (ch = 0; ch < planes; ch++) |
| 254 | buf_out->extended_data[ch] -= delta * block_size; |
| 255 | } else { |
| 256 | avresample_read(s->avr, buf_out->extended_data, out_size); |
| 257 | |
| 258 | if (delta > 0) { |
| 259 | av_samples_set_silence(buf_out->extended_data, out_size - delta, |
| 260 | delta, nb_channels, buf->format); |
| 261 | } |
| 262 | } |
| 263 | buf_out->pts = s->pts; |
| 264 | ret = ff_filter_frame(outlink, buf_out); |
| 265 | if (ret < 0) |
| 266 | goto fail; |
| 267 | s->got_output = 1; |
| 268 | } else if (avresample_available(s->avr)) { |
| 269 | av_log(ctx, AV_LOG_WARNING, "Non-monotonous timestamps, dropping " |
| 270 | "whole buffer.\n"); |
| 271 | } |
| 272 | |
| 273 | /* drain any remaining buffered data */ |
| 274 | avresample_read(s->avr, NULL, avresample_available(s->avr)); |
| 275 | |
| 276 | new_pts = pts - avresample_get_delay(s->avr); |
| 277 | /* check for s->pts monotonicity */ |
| 278 | if (new_pts > s->pts) { |
| 279 | s->pts = new_pts; |
| 280 | ret = avresample_convert(s->avr, NULL, 0, 0, buf->extended_data, |
| 281 | buf->linesize[0], buf->nb_samples); |
| 282 | } else { |
| 283 | av_log(ctx, AV_LOG_WARNING, "Non-monotonous timestamps, dropping " |
| 284 | "whole buffer.\n"); |
| 285 | ret = 0; |
| 286 | } |
| 287 | |
| 288 | s->first_frame = 0; |
| 289 | fail: |
| 290 | av_frame_free(&buf); |
| 291 | |
| 292 | return ret; |
| 293 | } |
| 294 | |
| 295 | static const AVFilterPad avfilter_af_asyncts_inputs[] = { |
| 296 | { |
| 297 | .name = "default", |
| 298 | .type = AVMEDIA_TYPE_AUDIO, |
| 299 | .filter_frame = filter_frame |
| 300 | }, |
| 301 | { NULL } |
| 302 | }; |
| 303 | |
| 304 | static const AVFilterPad avfilter_af_asyncts_outputs[] = { |
| 305 | { |
| 306 | .name = "default", |
| 307 | .type = AVMEDIA_TYPE_AUDIO, |
| 308 | .config_props = config_props, |
| 309 | .request_frame = request_frame |
| 310 | }, |
| 311 | { NULL } |
| 312 | }; |
| 313 | |
| 314 | AVFilter ff_af_asyncts = { |
| 315 | .name = "asyncts", |
| 316 | .description = NULL_IF_CONFIG_SMALL("Sync audio data to timestamps"), |
| 317 | .init = init, |
| 318 | .uninit = uninit, |
| 319 | .priv_size = sizeof(ASyncContext), |
| 320 | .priv_class = &asyncts_class, |
| 321 | .inputs = avfilter_af_asyncts_inputs, |
| 322 | .outputs = avfilter_af_asyncts_outputs, |
| 323 | }; |