Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2012 Nicolas George | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
14 | * See the GNU Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public License | |
17 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., | |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | /** | |
22 | * @file | |
23 | * concat audio-video filter | |
24 | */ | |
25 | ||
26 | #include "libavutil/avassert.h" | |
27 | #include "libavutil/avstring.h" | |
28 | #include "libavutil/channel_layout.h" | |
29 | #include "libavutil/opt.h" | |
30 | #include "avfilter.h" | |
31 | #define FF_BUFQUEUE_SIZE 256 | |
32 | #include "bufferqueue.h" | |
33 | #include "internal.h" | |
34 | #include "video.h" | |
35 | #include "audio.h" | |
36 | ||
37 | #define TYPE_ALL 2 | |
38 | ||
39 | typedef struct { | |
40 | const AVClass *class; | |
41 | unsigned nb_streams[TYPE_ALL]; /**< number of out streams of each type */ | |
42 | unsigned nb_segments; | |
43 | unsigned cur_idx; /**< index of the first input of current segment */ | |
44 | int64_t delta_ts; /**< timestamp to add to produce output timestamps */ | |
45 | unsigned nb_in_active; /**< number of active inputs in current segment */ | |
46 | unsigned unsafe; | |
47 | struct concat_in { | |
48 | int64_t pts; | |
49 | int64_t nb_frames; | |
50 | unsigned eof; | |
51 | struct FFBufQueue queue; | |
52 | } *in; | |
53 | } ConcatContext; | |
54 | ||
55 | #define OFFSET(x) offsetof(ConcatContext, x) | |
56 | #define A AV_OPT_FLAG_AUDIO_PARAM | |
57 | #define F AV_OPT_FLAG_FILTERING_PARAM | |
58 | #define V AV_OPT_FLAG_VIDEO_PARAM | |
59 | ||
60 | static const AVOption concat_options[] = { | |
61 | { "n", "specify the number of segments", OFFSET(nb_segments), | |
f6fa7814 | 62 | AV_OPT_TYPE_INT, { .i64 = 2 }, 1, INT_MAX, V|A|F}, |
2ba45a60 DM |
63 | { "v", "specify the number of video streams", |
64 | OFFSET(nb_streams[AVMEDIA_TYPE_VIDEO]), | |
65 | AV_OPT_TYPE_INT, { .i64 = 1 }, 0, INT_MAX, V|F }, | |
66 | { "a", "specify the number of audio streams", | |
67 | OFFSET(nb_streams[AVMEDIA_TYPE_AUDIO]), | |
68 | AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, A|F}, | |
69 | { "unsafe", "enable unsafe mode", | |
70 | OFFSET(unsafe), | |
71 | AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, V|A|F}, | |
72 | { NULL } | |
73 | }; | |
74 | ||
75 | AVFILTER_DEFINE_CLASS(concat); | |
76 | ||
77 | static int query_formats(AVFilterContext *ctx) | |
78 | { | |
79 | ConcatContext *cat = ctx->priv; | |
80 | unsigned type, nb_str, idx0 = 0, idx, str, seg; | |
81 | AVFilterFormats *formats, *rates = NULL; | |
82 | AVFilterChannelLayouts *layouts = NULL; | |
83 | ||
84 | for (type = 0; type < TYPE_ALL; type++) { | |
85 | nb_str = cat->nb_streams[type]; | |
86 | for (str = 0; str < nb_str; str++) { | |
87 | idx = idx0; | |
88 | ||
89 | /* Set the output formats */ | |
90 | formats = ff_all_formats(type); | |
91 | if (!formats) | |
92 | return AVERROR(ENOMEM); | |
93 | ff_formats_ref(formats, &ctx->outputs[idx]->in_formats); | |
94 | if (type == AVMEDIA_TYPE_AUDIO) { | |
95 | rates = ff_all_samplerates(); | |
96 | if (!rates) | |
97 | return AVERROR(ENOMEM); | |
98 | ff_formats_ref(rates, &ctx->outputs[idx]->in_samplerates); | |
99 | layouts = ff_all_channel_layouts(); | |
100 | if (!layouts) | |
101 | return AVERROR(ENOMEM); | |
102 | ff_channel_layouts_ref(layouts, &ctx->outputs[idx]->in_channel_layouts); | |
103 | } | |
104 | ||
105 | /* Set the same formats for each corresponding input */ | |
106 | for (seg = 0; seg < cat->nb_segments; seg++) { | |
107 | ff_formats_ref(formats, &ctx->inputs[idx]->out_formats); | |
108 | if (type == AVMEDIA_TYPE_AUDIO) { | |
109 | ff_formats_ref(rates, &ctx->inputs[idx]->out_samplerates); | |
110 | ff_channel_layouts_ref(layouts, &ctx->inputs[idx]->out_channel_layouts); | |
111 | } | |
112 | idx += ctx->nb_outputs; | |
113 | } | |
114 | ||
115 | idx0++; | |
116 | } | |
117 | } | |
118 | return 0; | |
119 | } | |
120 | ||
121 | static int config_output(AVFilterLink *outlink) | |
122 | { | |
123 | AVFilterContext *ctx = outlink->src; | |
124 | ConcatContext *cat = ctx->priv; | |
125 | unsigned out_no = FF_OUTLINK_IDX(outlink); | |
126 | unsigned in_no = out_no, seg; | |
127 | AVFilterLink *inlink = ctx->inputs[in_no]; | |
128 | ||
129 | /* enhancement: find a common one */ | |
130 | outlink->time_base = AV_TIME_BASE_Q; | |
131 | outlink->w = inlink->w; | |
132 | outlink->h = inlink->h; | |
133 | outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; | |
134 | outlink->format = inlink->format; | |
135 | for (seg = 1; seg < cat->nb_segments; seg++) { | |
136 | inlink = ctx->inputs[in_no += ctx->nb_outputs]; | |
137 | if (!outlink->sample_aspect_ratio.num) | |
138 | outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; | |
139 | /* possible enhancement: unsafe mode, do not check */ | |
140 | if (outlink->w != inlink->w || | |
141 | outlink->h != inlink->h || | |
142 | outlink->sample_aspect_ratio.num != inlink->sample_aspect_ratio.num && | |
143 | inlink->sample_aspect_ratio.num || | |
144 | outlink->sample_aspect_ratio.den != inlink->sample_aspect_ratio.den) { | |
145 | av_log(ctx, AV_LOG_ERROR, "Input link %s parameters " | |
146 | "(size %dx%d, SAR %d:%d) do not match the corresponding " | |
147 | "output link %s parameters (%dx%d, SAR %d:%d)\n", | |
148 | ctx->input_pads[in_no].name, inlink->w, inlink->h, | |
149 | inlink->sample_aspect_ratio.num, | |
150 | inlink->sample_aspect_ratio.den, | |
151 | ctx->input_pads[out_no].name, outlink->w, outlink->h, | |
152 | outlink->sample_aspect_ratio.num, | |
153 | outlink->sample_aspect_ratio.den); | |
154 | if (!cat->unsafe) | |
155 | return AVERROR(EINVAL); | |
156 | } | |
157 | } | |
158 | ||
159 | return 0; | |
160 | } | |
161 | ||
162 | static int push_frame(AVFilterContext *ctx, unsigned in_no, AVFrame *buf) | |
163 | { | |
164 | ConcatContext *cat = ctx->priv; | |
165 | unsigned out_no = in_no % ctx->nb_outputs; | |
166 | AVFilterLink * inlink = ctx-> inputs[ in_no]; | |
167 | AVFilterLink *outlink = ctx->outputs[out_no]; | |
168 | struct concat_in *in = &cat->in[in_no]; | |
169 | ||
170 | buf->pts = av_rescale_q(buf->pts, inlink->time_base, outlink->time_base); | |
171 | in->pts = buf->pts; | |
172 | in->nb_frames++; | |
173 | /* add duration to input PTS */ | |
174 | if (inlink->sample_rate) | |
175 | /* use number of audio samples */ | |
176 | in->pts += av_rescale_q(buf->nb_samples, | |
177 | av_make_q(1, inlink->sample_rate), | |
178 | outlink->time_base); | |
179 | else if (in->nb_frames >= 2) | |
180 | /* use mean duration */ | |
181 | in->pts = av_rescale(in->pts, in->nb_frames, in->nb_frames - 1); | |
182 | ||
183 | buf->pts += cat->delta_ts; | |
184 | return ff_filter_frame(outlink, buf); | |
185 | } | |
186 | ||
187 | static int process_frame(AVFilterLink *inlink, AVFrame *buf) | |
188 | { | |
189 | AVFilterContext *ctx = inlink->dst; | |
190 | ConcatContext *cat = ctx->priv; | |
191 | unsigned in_no = FF_INLINK_IDX(inlink); | |
192 | ||
193 | if (in_no < cat->cur_idx) { | |
194 | av_log(ctx, AV_LOG_ERROR, "Frame after EOF on input %s\n", | |
195 | ctx->input_pads[in_no].name); | |
196 | av_frame_free(&buf); | |
197 | } else if (in_no >= cat->cur_idx + ctx->nb_outputs) { | |
198 | ff_bufqueue_add(ctx, &cat->in[in_no].queue, buf); | |
199 | } else { | |
200 | return push_frame(ctx, in_no, buf); | |
201 | } | |
202 | return 0; | |
203 | } | |
204 | ||
205 | static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h) | |
206 | { | |
207 | AVFilterContext *ctx = inlink->dst; | |
208 | unsigned in_no = FF_INLINK_IDX(inlink); | |
209 | AVFilterLink *outlink = ctx->outputs[in_no % ctx->nb_outputs]; | |
210 | ||
211 | return ff_get_video_buffer(outlink, w, h); | |
212 | } | |
213 | ||
214 | static AVFrame *get_audio_buffer(AVFilterLink *inlink, int nb_samples) | |
215 | { | |
216 | AVFilterContext *ctx = inlink->dst; | |
217 | unsigned in_no = FF_INLINK_IDX(inlink); | |
218 | AVFilterLink *outlink = ctx->outputs[in_no % ctx->nb_outputs]; | |
219 | ||
220 | return ff_get_audio_buffer(outlink, nb_samples); | |
221 | } | |
222 | ||
223 | static int filter_frame(AVFilterLink *inlink, AVFrame *buf) | |
224 | { | |
225 | return process_frame(inlink, buf); | |
226 | } | |
227 | ||
228 | static void close_input(AVFilterContext *ctx, unsigned in_no) | |
229 | { | |
230 | ConcatContext *cat = ctx->priv; | |
231 | ||
232 | cat->in[in_no].eof = 1; | |
233 | cat->nb_in_active--; | |
234 | av_log(ctx, AV_LOG_VERBOSE, "EOF on %s, %d streams left in segment.\n", | |
235 | ctx->input_pads[in_no].name, cat->nb_in_active); | |
236 | } | |
237 | ||
238 | static void find_next_delta_ts(AVFilterContext *ctx, int64_t *seg_delta) | |
239 | { | |
240 | ConcatContext *cat = ctx->priv; | |
241 | unsigned i = cat->cur_idx; | |
242 | unsigned imax = i + ctx->nb_outputs; | |
243 | int64_t pts; | |
244 | ||
245 | pts = cat->in[i++].pts; | |
246 | for (; i < imax; i++) | |
247 | pts = FFMAX(pts, cat->in[i].pts); | |
248 | cat->delta_ts += pts; | |
249 | *seg_delta = pts; | |
250 | } | |
251 | ||
252 | static int send_silence(AVFilterContext *ctx, unsigned in_no, unsigned out_no, | |
253 | int64_t seg_delta) | |
254 | { | |
255 | ConcatContext *cat = ctx->priv; | |
256 | AVFilterLink *outlink = ctx->outputs[out_no]; | |
257 | int64_t base_pts = cat->in[in_no].pts + cat->delta_ts - seg_delta; | |
258 | int64_t nb_samples, sent = 0; | |
259 | int frame_nb_samples, ret; | |
260 | AVRational rate_tb = { 1, ctx->inputs[in_no]->sample_rate }; | |
261 | AVFrame *buf; | |
262 | int nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout); | |
263 | ||
264 | if (!rate_tb.den) | |
265 | return AVERROR_BUG; | |
266 | nb_samples = av_rescale_q(seg_delta - cat->in[in_no].pts, | |
267 | outlink->time_base, rate_tb); | |
268 | frame_nb_samples = FFMAX(9600, rate_tb.den / 5); /* arbitrary */ | |
269 | while (nb_samples) { | |
270 | frame_nb_samples = FFMIN(frame_nb_samples, nb_samples); | |
271 | buf = ff_get_audio_buffer(outlink, frame_nb_samples); | |
272 | if (!buf) | |
273 | return AVERROR(ENOMEM); | |
274 | av_samples_set_silence(buf->extended_data, 0, frame_nb_samples, | |
275 | nb_channels, outlink->format); | |
276 | buf->pts = base_pts + av_rescale_q(sent, rate_tb, outlink->time_base); | |
277 | ret = ff_filter_frame(outlink, buf); | |
278 | if (ret < 0) | |
279 | return ret; | |
280 | sent += frame_nb_samples; | |
281 | nb_samples -= frame_nb_samples; | |
282 | } | |
283 | return 0; | |
284 | } | |
285 | ||
286 | static int flush_segment(AVFilterContext *ctx) | |
287 | { | |
288 | int ret; | |
289 | ConcatContext *cat = ctx->priv; | |
290 | unsigned str, str_max; | |
291 | int64_t seg_delta; | |
292 | ||
293 | find_next_delta_ts(ctx, &seg_delta); | |
294 | cat->cur_idx += ctx->nb_outputs; | |
295 | cat->nb_in_active = ctx->nb_outputs; | |
296 | av_log(ctx, AV_LOG_VERBOSE, "Segment finished at pts=%"PRId64"\n", | |
297 | cat->delta_ts); | |
298 | ||
299 | if (cat->cur_idx < ctx->nb_inputs) { | |
300 | /* pad audio streams with silence */ | |
301 | str = cat->nb_streams[AVMEDIA_TYPE_VIDEO]; | |
302 | str_max = str + cat->nb_streams[AVMEDIA_TYPE_AUDIO]; | |
303 | for (; str < str_max; str++) { | |
304 | ret = send_silence(ctx, cat->cur_idx - ctx->nb_outputs + str, str, | |
305 | seg_delta); | |
306 | if (ret < 0) | |
307 | return ret; | |
308 | } | |
309 | /* flush queued buffers */ | |
310 | /* possible enhancement: flush in PTS order */ | |
311 | str_max = cat->cur_idx + ctx->nb_outputs; | |
312 | for (str = cat->cur_idx; str < str_max; str++) { | |
313 | while (cat->in[str].queue.available) { | |
314 | ret = push_frame(ctx, str, ff_bufqueue_get(&cat->in[str].queue)); | |
315 | if (ret < 0) | |
316 | return ret; | |
317 | } | |
318 | } | |
319 | } | |
320 | return 0; | |
321 | } | |
322 | ||
323 | static int request_frame(AVFilterLink *outlink) | |
324 | { | |
325 | AVFilterContext *ctx = outlink->src; | |
326 | ConcatContext *cat = ctx->priv; | |
327 | unsigned out_no = FF_OUTLINK_IDX(outlink); | |
328 | unsigned in_no = out_no + cat->cur_idx; | |
329 | unsigned str, str_max; | |
330 | int ret; | |
331 | ||
332 | while (1) { | |
333 | if (in_no >= ctx->nb_inputs) | |
334 | return AVERROR_EOF; | |
335 | if (!cat->in[in_no].eof) { | |
336 | ret = ff_request_frame(ctx->inputs[in_no]); | |
337 | if (ret != AVERROR_EOF) | |
338 | return ret; | |
339 | close_input(ctx, in_no); | |
340 | } | |
341 | /* cycle on all inputs to finish the segment */ | |
342 | /* possible enhancement: request in PTS order */ | |
343 | str_max = cat->cur_idx + ctx->nb_outputs - 1; | |
344 | for (str = cat->cur_idx; cat->nb_in_active; | |
345 | str = str == str_max ? cat->cur_idx : str + 1) { | |
346 | if (cat->in[str].eof) | |
347 | continue; | |
348 | ret = ff_request_frame(ctx->inputs[str]); | |
349 | if (ret == AVERROR_EOF) | |
350 | close_input(ctx, str); | |
351 | else if (ret < 0) | |
352 | return ret; | |
353 | } | |
354 | ret = flush_segment(ctx); | |
355 | if (ret < 0) | |
356 | return ret; | |
357 | in_no += ctx->nb_outputs; | |
358 | } | |
359 | } | |
360 | ||
361 | static av_cold int init(AVFilterContext *ctx) | |
362 | { | |
363 | ConcatContext *cat = ctx->priv; | |
364 | unsigned seg, type, str; | |
365 | ||
366 | /* create input pads */ | |
367 | for (seg = 0; seg < cat->nb_segments; seg++) { | |
368 | for (type = 0; type < TYPE_ALL; type++) { | |
369 | for (str = 0; str < cat->nb_streams[type]; str++) { | |
370 | AVFilterPad pad = { | |
371 | .type = type, | |
372 | .get_video_buffer = get_video_buffer, | |
373 | .get_audio_buffer = get_audio_buffer, | |
374 | .filter_frame = filter_frame, | |
375 | }; | |
376 | pad.name = av_asprintf("in%d:%c%d", seg, "va"[type], str); | |
377 | ff_insert_inpad(ctx, ctx->nb_inputs, &pad); | |
378 | } | |
379 | } | |
380 | } | |
381 | /* create output pads */ | |
382 | for (type = 0; type < TYPE_ALL; type++) { | |
383 | for (str = 0; str < cat->nb_streams[type]; str++) { | |
384 | AVFilterPad pad = { | |
385 | .type = type, | |
386 | .config_props = config_output, | |
387 | .request_frame = request_frame, | |
388 | }; | |
389 | pad.name = av_asprintf("out:%c%d", "va"[type], str); | |
390 | ff_insert_outpad(ctx, ctx->nb_outputs, &pad); | |
391 | } | |
392 | } | |
393 | ||
394 | cat->in = av_calloc(ctx->nb_inputs, sizeof(*cat->in)); | |
395 | if (!cat->in) | |
396 | return AVERROR(ENOMEM); | |
397 | cat->nb_in_active = ctx->nb_outputs; | |
398 | return 0; | |
399 | } | |
400 | ||
401 | static av_cold void uninit(AVFilterContext *ctx) | |
402 | { | |
403 | ConcatContext *cat = ctx->priv; | |
404 | unsigned i; | |
405 | ||
406 | for (i = 0; i < ctx->nb_inputs; i++) { | |
407 | av_freep(&ctx->input_pads[i].name); | |
408 | ff_bufqueue_discard_all(&cat->in[i].queue); | |
409 | } | |
410 | for (i = 0; i < ctx->nb_outputs; i++) | |
411 | av_freep(&ctx->output_pads[i].name); | |
f6fa7814 | 412 | av_freep(&cat->in); |
2ba45a60 DM |
413 | } |
414 | ||
415 | AVFilter ff_avf_concat = { | |
416 | .name = "concat", | |
417 | .description = NULL_IF_CONFIG_SMALL("Concatenate audio and video streams."), | |
418 | .init = init, | |
419 | .uninit = uninit, | |
420 | .query_formats = query_formats, | |
421 | .priv_size = sizeof(ConcatContext), | |
422 | .inputs = NULL, | |
423 | .outputs = NULL, | |
424 | .priv_class = &concat_class, | |
425 | .flags = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_DYNAMIC_OUTPUTS, | |
426 | }; |