Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2011 Stefano Sabatini | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | /** | |
22 | * @file | |
23 | * filter for selecting which frame passes in the filterchain | |
24 | */ | |
25 | ||
26 | #include "libavutil/avstring.h" | |
27 | #include "libavutil/eval.h" | |
28 | #include "libavutil/fifo.h" | |
29 | #include "libavutil/internal.h" | |
30 | #include "libavutil/opt.h" | |
31 | #include "libavutil/pixelutils.h" | |
32 | #include "avfilter.h" | |
33 | #include "audio.h" | |
34 | #include "formats.h" | |
35 | #include "internal.h" | |
36 | #include "video.h" | |
37 | ||
38 | static const char *const var_names[] = { | |
39 | "TB", ///< timebase | |
40 | ||
41 | "pts", ///< original pts in the file of the frame | |
42 | "start_pts", ///< first PTS in the stream, expressed in TB units | |
43 | "prev_pts", ///< previous frame PTS | |
44 | "prev_selected_pts", ///< previous selected frame PTS | |
45 | ||
46 | "t", ///< timestamp expressed in seconds | |
47 | "start_t", ///< first PTS in the stream, expressed in seconds | |
48 | "prev_t", ///< previous frame time | |
49 | "prev_selected_t", ///< previously selected time | |
50 | ||
51 | "pict_type", ///< the type of picture in the movie | |
52 | "I", | |
53 | "P", | |
54 | "B", | |
55 | "S", | |
56 | "SI", | |
57 | "SP", | |
58 | "BI", | |
59 | "PICT_TYPE_I", | |
60 | "PICT_TYPE_P", | |
61 | "PICT_TYPE_B", | |
62 | "PICT_TYPE_S", | |
63 | "PICT_TYPE_SI", | |
64 | "PICT_TYPE_SP", | |
65 | "PICT_TYPE_BI", | |
66 | ||
67 | "interlace_type", ///< the frame interlace type | |
68 | "PROGRESSIVE", | |
69 | "TOPFIRST", | |
70 | "BOTTOMFIRST", | |
71 | ||
72 | "consumed_samples_n",///< number of samples consumed by the filter (only audio) | |
73 | "samples_n", ///< number of samples in the current frame (only audio) | |
74 | "sample_rate", ///< sample rate (only audio) | |
75 | ||
76 | "n", ///< frame number (starting from zero) | |
77 | "selected_n", ///< selected frame number (starting from zero) | |
78 | "prev_selected_n", ///< number of the last selected frame | |
79 | ||
80 | "key", ///< tell if the frame is a key frame | |
81 | "pos", ///< original position in the file of the frame | |
82 | ||
83 | "scene", | |
84 | ||
85 | NULL | |
86 | }; | |
87 | ||
88 | enum var_name { | |
89 | VAR_TB, | |
90 | ||
91 | VAR_PTS, | |
92 | VAR_START_PTS, | |
93 | VAR_PREV_PTS, | |
94 | VAR_PREV_SELECTED_PTS, | |
95 | ||
96 | VAR_T, | |
97 | VAR_START_T, | |
98 | VAR_PREV_T, | |
99 | VAR_PREV_SELECTED_T, | |
100 | ||
101 | VAR_PICT_TYPE, | |
102 | VAR_I, | |
103 | VAR_P, | |
104 | VAR_B, | |
105 | VAR_S, | |
106 | VAR_SI, | |
107 | VAR_SP, | |
108 | VAR_BI, | |
109 | VAR_PICT_TYPE_I, | |
110 | VAR_PICT_TYPE_P, | |
111 | VAR_PICT_TYPE_B, | |
112 | VAR_PICT_TYPE_S, | |
113 | VAR_PICT_TYPE_SI, | |
114 | VAR_PICT_TYPE_SP, | |
115 | VAR_PICT_TYPE_BI, | |
116 | ||
117 | VAR_INTERLACE_TYPE, | |
118 | VAR_INTERLACE_TYPE_P, | |
119 | VAR_INTERLACE_TYPE_T, | |
120 | VAR_INTERLACE_TYPE_B, | |
121 | ||
122 | VAR_CONSUMED_SAMPLES_N, | |
123 | VAR_SAMPLES_N, | |
124 | VAR_SAMPLE_RATE, | |
125 | ||
126 | VAR_N, | |
127 | VAR_SELECTED_N, | |
128 | VAR_PREV_SELECTED_N, | |
129 | ||
130 | VAR_KEY, | |
131 | VAR_POS, | |
132 | ||
133 | VAR_SCENE, | |
134 | ||
135 | VAR_VARS_NB | |
136 | }; | |
137 | ||
138 | typedef struct SelectContext { | |
139 | const AVClass *class; | |
140 | char *expr_str; | |
141 | AVExpr *expr; | |
142 | double var_values[VAR_VARS_NB]; | |
143 | int do_scene_detect; ///< 1 if the expression requires scene detection variables, 0 otherwise | |
144 | av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function (scene detect only) | |
145 | double prev_mafd; ///< previous MAFD (scene detect only) | |
146 | AVFrame *prev_picref; ///< previous frame (scene detect only) | |
147 | double select; | |
148 | int select_out; ///< mark the selected output pad index | |
149 | int nb_outputs; | |
150 | } SelectContext; | |
151 | ||
152 | #define OFFSET(x) offsetof(SelectContext, x) | |
153 | #define DEFINE_OPTIONS(filt_name, FLAGS) \ | |
154 | static const AVOption filt_name##_options[] = { \ | |
155 | { "expr", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \ | |
156 | { "e", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \ | |
157 | { "outputs", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \ | |
158 | { "n", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \ | |
159 | { NULL } \ | |
160 | } | |
161 | ||
162 | static int request_frame(AVFilterLink *outlink); | |
163 | ||
164 | static av_cold int init(AVFilterContext *ctx) | |
165 | { | |
166 | SelectContext *select = ctx->priv; | |
167 | int i, ret; | |
168 | ||
169 | if ((ret = av_expr_parse(&select->expr, select->expr_str, | |
170 | var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) { | |
171 | av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n", | |
172 | select->expr_str); | |
173 | return ret; | |
174 | } | |
175 | select->do_scene_detect = !!strstr(select->expr_str, "scene"); | |
176 | ||
177 | for (i = 0; i < select->nb_outputs; i++) { | |
178 | AVFilterPad pad = { 0 }; | |
179 | ||
180 | pad.name = av_asprintf("output%d", i); | |
181 | if (!pad.name) | |
182 | return AVERROR(ENOMEM); | |
183 | pad.type = ctx->filter->inputs[0].type; | |
184 | pad.request_frame = request_frame; | |
185 | ff_insert_outpad(ctx, i, &pad); | |
186 | } | |
187 | ||
188 | return 0; | |
189 | } | |
190 | ||
191 | #define INTERLACE_TYPE_P 0 | |
192 | #define INTERLACE_TYPE_T 1 | |
193 | #define INTERLACE_TYPE_B 2 | |
194 | ||
195 | static int config_input(AVFilterLink *inlink) | |
196 | { | |
197 | SelectContext *select = inlink->dst->priv; | |
198 | ||
199 | select->var_values[VAR_N] = 0.0; | |
200 | select->var_values[VAR_SELECTED_N] = 0.0; | |
201 | ||
202 | select->var_values[VAR_TB] = av_q2d(inlink->time_base); | |
203 | ||
204 | select->var_values[VAR_PREV_PTS] = NAN; | |
205 | select->var_values[VAR_PREV_SELECTED_PTS] = NAN; | |
206 | select->var_values[VAR_PREV_SELECTED_T] = NAN; | |
207 | select->var_values[VAR_PREV_T] = NAN; | |
208 | select->var_values[VAR_START_PTS] = NAN; | |
209 | select->var_values[VAR_START_T] = NAN; | |
210 | ||
211 | select->var_values[VAR_I] = AV_PICTURE_TYPE_I; | |
212 | select->var_values[VAR_P] = AV_PICTURE_TYPE_P; | |
213 | select->var_values[VAR_B] = AV_PICTURE_TYPE_B; | |
214 | select->var_values[VAR_SI] = AV_PICTURE_TYPE_SI; | |
215 | select->var_values[VAR_SP] = AV_PICTURE_TYPE_SP; | |
216 | select->var_values[VAR_BI] = AV_PICTURE_TYPE_BI; | |
217 | select->var_values[VAR_PICT_TYPE_I] = AV_PICTURE_TYPE_I; | |
218 | select->var_values[VAR_PICT_TYPE_P] = AV_PICTURE_TYPE_P; | |
219 | select->var_values[VAR_PICT_TYPE_B] = AV_PICTURE_TYPE_B; | |
220 | select->var_values[VAR_PICT_TYPE_SI] = AV_PICTURE_TYPE_SI; | |
221 | select->var_values[VAR_PICT_TYPE_SP] = AV_PICTURE_TYPE_SP; | |
222 | select->var_values[VAR_PICT_TYPE_BI] = AV_PICTURE_TYPE_BI; | |
223 | ||
224 | select->var_values[VAR_INTERLACE_TYPE_P] = INTERLACE_TYPE_P; | |
225 | select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T; | |
226 | select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B; | |
227 | ||
228 | select->var_values[VAR_PICT_TYPE] = NAN; | |
229 | select->var_values[VAR_INTERLACE_TYPE] = NAN; | |
230 | select->var_values[VAR_SCENE] = NAN; | |
231 | select->var_values[VAR_CONSUMED_SAMPLES_N] = NAN; | |
232 | select->var_values[VAR_SAMPLES_N] = NAN; | |
233 | ||
234 | select->var_values[VAR_SAMPLE_RATE] = | |
235 | inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN; | |
236 | ||
237 | if (select->do_scene_detect) { | |
238 | select->sad = av_pixelutils_get_sad_fn(3, 3, 2, select); // 8x8 both sources aligned | |
239 | if (!select->sad) | |
240 | return AVERROR(EINVAL); | |
241 | } | |
242 | return 0; | |
243 | } | |
244 | ||
245 | static double get_scene_score(AVFilterContext *ctx, AVFrame *frame) | |
246 | { | |
247 | double ret = 0; | |
248 | SelectContext *select = ctx->priv; | |
249 | AVFrame *prev_picref = select->prev_picref; | |
250 | ||
251 | if (prev_picref && | |
252 | frame->height == prev_picref->height && | |
253 | frame->width == prev_picref->width) { | |
254 | int x, y, nb_sad = 0; | |
255 | int64_t sad = 0; | |
256 | double mafd, diff; | |
257 | uint8_t *p1 = frame->data[0]; | |
258 | uint8_t *p2 = prev_picref->data[0]; | |
259 | const int p1_linesize = frame->linesize[0]; | |
260 | const int p2_linesize = prev_picref->linesize[0]; | |
261 | ||
262 | for (y = 0; y < frame->height - 7; y += 8) { | |
263 | for (x = 0; x < frame->width*3 - 7; x += 8) { | |
264 | sad += select->sad(p1 + x, p1_linesize, p2 + x, p2_linesize); | |
265 | nb_sad += 8 * 8; | |
266 | } | |
267 | p1 += 8 * p1_linesize; | |
268 | p2 += 8 * p2_linesize; | |
269 | } | |
270 | emms_c(); | |
271 | mafd = nb_sad ? (double)sad / nb_sad : 0; | |
272 | diff = fabs(mafd - select->prev_mafd); | |
273 | ret = av_clipf(FFMIN(mafd, diff) / 100., 0, 1); | |
274 | select->prev_mafd = mafd; | |
275 | av_frame_free(&prev_picref); | |
276 | } | |
277 | select->prev_picref = av_frame_clone(frame); | |
278 | return ret; | |
279 | } | |
280 | ||
281 | #define D2TS(d) (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d)) | |
282 | #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts)) | |
283 | ||
284 | static void select_frame(AVFilterContext *ctx, AVFrame *frame) | |
285 | { | |
286 | SelectContext *select = ctx->priv; | |
287 | AVFilterLink *inlink = ctx->inputs[0]; | |
288 | double res; | |
289 | ||
290 | if (isnan(select->var_values[VAR_START_PTS])) | |
291 | select->var_values[VAR_START_PTS] = TS2D(frame->pts); | |
292 | if (isnan(select->var_values[VAR_START_T])) | |
293 | select->var_values[VAR_START_T] = TS2D(frame->pts) * av_q2d(inlink->time_base); | |
294 | ||
295 | select->var_values[VAR_N ] = inlink->frame_count; | |
296 | select->var_values[VAR_PTS] = TS2D(frame->pts); | |
297 | select->var_values[VAR_T ] = TS2D(frame->pts) * av_q2d(inlink->time_base); | |
298 | select->var_values[VAR_POS] = av_frame_get_pkt_pos(frame) == -1 ? NAN : av_frame_get_pkt_pos(frame); | |
299 | select->var_values[VAR_KEY] = frame->key_frame; | |
300 | ||
301 | switch (inlink->type) { | |
302 | case AVMEDIA_TYPE_AUDIO: | |
303 | select->var_values[VAR_SAMPLES_N] = frame->nb_samples; | |
304 | break; | |
305 | ||
306 | case AVMEDIA_TYPE_VIDEO: | |
307 | select->var_values[VAR_INTERLACE_TYPE] = | |
308 | !frame->interlaced_frame ? INTERLACE_TYPE_P : | |
309 | frame->top_field_first ? INTERLACE_TYPE_T : INTERLACE_TYPE_B; | |
310 | select->var_values[VAR_PICT_TYPE] = frame->pict_type; | |
311 | if (select->do_scene_detect) { | |
312 | char buf[32]; | |
313 | select->var_values[VAR_SCENE] = get_scene_score(ctx, frame); | |
314 | // TODO: document metadata | |
315 | snprintf(buf, sizeof(buf), "%f", select->var_values[VAR_SCENE]); | |
316 | av_dict_set(avpriv_frame_get_metadatap(frame), "lavfi.scene_score", buf, 0); | |
317 | } | |
318 | break; | |
319 | } | |
320 | ||
321 | select->select = res = av_expr_eval(select->expr, select->var_values, NULL); | |
322 | av_log(inlink->dst, AV_LOG_DEBUG, | |
323 | "n:%f pts:%f t:%f key:%d", | |
324 | select->var_values[VAR_N], | |
325 | select->var_values[VAR_PTS], | |
326 | select->var_values[VAR_T], | |
327 | frame->key_frame); | |
328 | ||
329 | switch (inlink->type) { | |
330 | case AVMEDIA_TYPE_VIDEO: | |
331 | av_log(inlink->dst, AV_LOG_DEBUG, " interlace_type:%c pict_type:%c scene:%f", | |
332 | (!frame->interlaced_frame) ? 'P' : | |
333 | frame->top_field_first ? 'T' : 'B', | |
334 | av_get_picture_type_char(frame->pict_type), | |
335 | select->var_values[VAR_SCENE]); | |
336 | break; | |
337 | case AVMEDIA_TYPE_AUDIO: | |
338 | av_log(inlink->dst, AV_LOG_DEBUG, " samples_n:%d consumed_samples_n:%f", | |
339 | frame->nb_samples, | |
340 | select->var_values[VAR_CONSUMED_SAMPLES_N]); | |
341 | break; | |
342 | } | |
343 | ||
344 | if (res == 0) { | |
345 | select->select_out = -1; /* drop */ | |
346 | } else if (isnan(res) || res < 0) { | |
347 | select->select_out = 0; /* first output */ | |
348 | } else { | |
349 | select->select_out = FFMIN(ceilf(res)-1, select->nb_outputs-1); /* other outputs */ | |
350 | } | |
351 | ||
352 | av_log(inlink->dst, AV_LOG_DEBUG, " -> select:%f select_out:%d\n", res, select->select_out); | |
353 | ||
354 | if (res) { | |
355 | select->var_values[VAR_PREV_SELECTED_N] = select->var_values[VAR_N]; | |
356 | select->var_values[VAR_PREV_SELECTED_PTS] = select->var_values[VAR_PTS]; | |
357 | select->var_values[VAR_PREV_SELECTED_T] = select->var_values[VAR_T]; | |
358 | select->var_values[VAR_SELECTED_N] += 1.0; | |
359 | if (inlink->type == AVMEDIA_TYPE_AUDIO) | |
360 | select->var_values[VAR_CONSUMED_SAMPLES_N] += frame->nb_samples; | |
361 | } | |
362 | ||
363 | select->var_values[VAR_PREV_PTS] = select->var_values[VAR_PTS]; | |
364 | select->var_values[VAR_PREV_T] = select->var_values[VAR_T]; | |
365 | } | |
366 | ||
367 | static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | |
368 | { | |
369 | AVFilterContext *ctx = inlink->dst; | |
370 | SelectContext *select = ctx->priv; | |
371 | ||
372 | select_frame(ctx, frame); | |
373 | if (select->select) | |
374 | return ff_filter_frame(ctx->outputs[select->select_out], frame); | |
375 | ||
376 | av_frame_free(&frame); | |
377 | return 0; | |
378 | } | |
379 | ||
380 | static int request_frame(AVFilterLink *outlink) | |
381 | { | |
382 | AVFilterContext *ctx = outlink->src; | |
383 | SelectContext *select = ctx->priv; | |
384 | AVFilterLink *inlink = outlink->src->inputs[0]; | |
385 | int out_no = FF_OUTLINK_IDX(outlink); | |
386 | ||
387 | do { | |
388 | int ret = ff_request_frame(inlink); | |
389 | if (ret < 0) | |
390 | return ret; | |
391 | } while (select->select_out != out_no); | |
392 | ||
393 | return 0; | |
394 | } | |
395 | ||
396 | static av_cold void uninit(AVFilterContext *ctx) | |
397 | { | |
398 | SelectContext *select = ctx->priv; | |
399 | int i; | |
400 | ||
401 | av_expr_free(select->expr); | |
402 | select->expr = NULL; | |
403 | ||
404 | for (i = 0; i < ctx->nb_outputs; i++) | |
405 | av_freep(&ctx->output_pads[i].name); | |
406 | ||
407 | if (select->do_scene_detect) { | |
408 | av_frame_free(&select->prev_picref); | |
409 | } | |
410 | } | |
411 | ||
412 | static int query_formats(AVFilterContext *ctx) | |
413 | { | |
414 | SelectContext *select = ctx->priv; | |
415 | ||
416 | if (!select->do_scene_detect) { | |
417 | return ff_default_query_formats(ctx); | |
418 | } else { | |
419 | static const enum AVPixelFormat pix_fmts[] = { | |
420 | AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, | |
421 | AV_PIX_FMT_NONE | |
422 | }; | |
423 | ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); | |
424 | } | |
425 | return 0; | |
426 | } | |
427 | ||
428 | #if CONFIG_ASELECT_FILTER | |
429 | ||
430 | DEFINE_OPTIONS(aselect, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM); | |
431 | AVFILTER_DEFINE_CLASS(aselect); | |
432 | ||
433 | static av_cold int aselect_init(AVFilterContext *ctx) | |
434 | { | |
435 | SelectContext *select = ctx->priv; | |
436 | int ret; | |
437 | ||
438 | if ((ret = init(ctx)) < 0) | |
439 | return ret; | |
440 | ||
441 | if (select->do_scene_detect) { | |
442 | av_log(ctx, AV_LOG_ERROR, "Scene detection is ignored in aselect filter\n"); | |
443 | return AVERROR(EINVAL); | |
444 | } | |
445 | ||
446 | return 0; | |
447 | } | |
448 | ||
449 | static const AVFilterPad avfilter_af_aselect_inputs[] = { | |
450 | { | |
451 | .name = "default", | |
452 | .type = AVMEDIA_TYPE_AUDIO, | |
453 | .config_props = config_input, | |
454 | .filter_frame = filter_frame, | |
455 | }, | |
456 | { NULL } | |
457 | }; | |
458 | ||
459 | AVFilter ff_af_aselect = { | |
460 | .name = "aselect", | |
461 | .description = NULL_IF_CONFIG_SMALL("Select audio frames to pass in output."), | |
462 | .init = aselect_init, | |
463 | .uninit = uninit, | |
464 | .priv_size = sizeof(SelectContext), | |
465 | .inputs = avfilter_af_aselect_inputs, | |
466 | .priv_class = &aselect_class, | |
467 | .flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS, | |
468 | }; | |
469 | #endif /* CONFIG_ASELECT_FILTER */ | |
470 | ||
471 | #if CONFIG_SELECT_FILTER | |
472 | ||
473 | DEFINE_OPTIONS(select, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM); | |
474 | AVFILTER_DEFINE_CLASS(select); | |
475 | ||
476 | static av_cold int select_init(AVFilterContext *ctx) | |
477 | { | |
478 | int ret; | |
479 | ||
480 | if ((ret = init(ctx)) < 0) | |
481 | return ret; | |
482 | ||
483 | return 0; | |
484 | } | |
485 | ||
486 | static const AVFilterPad avfilter_vf_select_inputs[] = { | |
487 | { | |
488 | .name = "default", | |
489 | .type = AVMEDIA_TYPE_VIDEO, | |
490 | .config_props = config_input, | |
491 | .filter_frame = filter_frame, | |
492 | }, | |
493 | { NULL } | |
494 | }; | |
495 | ||
496 | AVFilter ff_vf_select = { | |
497 | .name = "select", | |
498 | .description = NULL_IF_CONFIG_SMALL("Select video frames to pass in output."), | |
499 | .init = select_init, | |
500 | .uninit = uninit, | |
501 | .query_formats = query_formats, | |
502 | .priv_size = sizeof(SelectContext), | |
503 | .priv_class = &select_class, | |
504 | .inputs = avfilter_vf_select_inputs, | |
505 | .flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS, | |
506 | }; | |
507 | #endif /* CONFIG_SELECT_FILTER */ |