ffmpeg/libavfilter/vf_decimate.c

   1 /*
   2  * Copyright (c) 2012 Fredrik Mellbin
   3  * Copyright (c) 2013 Clément Bœsch
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/opt.h"
  23 #include "libavutil/pixdesc.h"
  24 #include "libavutil/timestamp.h"
  25 #include "avfilter.h"
  26 #include "internal.h"
  27
  28 #define INPUT_MAIN     0
  29 #define INPUT_CLEANSRC 1
  30
  31 struct qitem {
  32     AVFrame *frame;
  33     int64_t maxbdiff;
  34     int64_t totdiff;
  35 };
  36
  37 typedef struct {
  38     const AVClass *class;
  39     struct qitem *queue;    ///< window of cycle frames and the associated data diff
  40     int fid;                ///< current frame id in the queue
  41     int filled;             ///< 1 if the queue is filled, 0 otherwise
  42     AVFrame *last;          ///< last frame from the previous queue
  43     AVFrame **clean_src;    ///< frame queue for the clean source
  44     int got_frame[2];       ///< frame request flag for each input stream
  45     double ts_unit;         ///< timestamp units for the output frames
  46     int64_t start_pts;      ///< base for output timestamps
  47     uint32_t eof;           ///< bitmask for end of stream
  48     int hsub, vsub;         ///< chroma subsampling values
  49     int depth;
  50     int nxblocks, nyblocks;
  51     int bdiffsize;
  52     int64_t *bdiffs;
  53
  54     /* options */
  55     int cycle;
  56     double dupthresh_flt;
  57     double scthresh_flt;
  58     int64_t dupthresh;
  59     int64_t scthresh;
  60     int blockx, blocky;
  61     int ppsrc;
  62     int chroma;
  63 } DecimateContext;
  64
  65 #define OFFSET(x) offsetof(DecimateContext, x)
  66 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  67
  68 static const AVOption decimate_options[] = {
  69     { "cycle",     "set the number of frame from which one will be dropped", OFFSET(cycle), AV_OPT_TYPE_INT, {.i64 = 5}, 2, 25, FLAGS },
  70     { "dupthresh", "set duplicate threshold",    OFFSET(dupthresh_flt), AV_OPT_TYPE_DOUBLE, {.dbl =  1.1}, 0, 100, FLAGS },
  71     { "scthresh",  "set scene change threshold", OFFSET(scthresh_flt),  AV_OPT_TYPE_DOUBLE, {.dbl = 15.0}, 0, 100, FLAGS },
  72     { "blockx",    "set the size of the x-axis blocks used during metric calculations", OFFSET(blockx), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
  73     { "blocky",    "set the size of the y-axis blocks used during metric calculations", OFFSET(blocky), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
  74     { "ppsrc",     "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
  75     { "chroma",    "set whether or not chroma is considered in the metric calculations", OFFSET(chroma), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
  76     { NULL }
  77 };
  78
  79 AVFILTER_DEFINE_CLASS(decimate);
  80
  81 static void calc_diffs(const DecimateContext *dm, struct qitem *q,
  82                        const AVFrame *f1, const AVFrame *f2)
  83 {
  84     int64_t maxdiff = -1;
  85     int64_t *bdiffs = dm->bdiffs;
  86     int plane, i, j;
  87
  88     memset(bdiffs, 0, dm->bdiffsize * sizeof(*bdiffs));
  89
  90     for (plane = 0; plane < (dm->chroma && f1->data[2] ? 3 : 1); plane++) {
  91         int x, y, xl;
  92         const int linesize1 = f1->linesize[plane];
  93         const int linesize2 = f2->linesize[plane];
  94         const uint8_t *f1p = f1->data[plane];
  95         const uint8_t *f2p = f2->data[plane];
  96         int width    = plane ? FF_CEIL_RSHIFT(f1->width,  dm->hsub) : f1->width;
  97         int height   = plane ? FF_CEIL_RSHIFT(f1->height, dm->vsub) : f1->height;
  98         int hblockx  = dm->blockx / 2;
  99         int hblocky  = dm->blocky / 2;
 100
 101         if (plane) {
 102             hblockx >>= dm->hsub;
 103             hblocky >>= dm->vsub;
 104         }
 105
 106         for (y = 0; y < height; y++) {
 107             int ydest = y / hblocky;
 108             int xdest = 0;
 109
 110 #define CALC_DIFF(nbits) do {                               \
 111     for (x = 0; x < width; x += hblockx) {                  \
 112         int64_t acc = 0;                                    \
 113         int m = FFMIN(width, x + hblockx);                  \
 114         for (xl = x; xl < m; xl++)                          \
 115             acc += abs(((const uint##nbits##_t *)f1p)[xl] - \
 116                        ((const uint##nbits##_t *)f2p)[xl]); \
 117         bdiffs[ydest * dm->nxblocks + xdest] += acc;        \
 118         xdest++;                                            \
 119     }                                                       \
 120 } while (0)
 121             if (dm->depth == 8) CALC_DIFF(8);
 122             else                CALC_DIFF(16);
 123
 124             f1p += linesize1;
 125             f2p += linesize2;
 126         }
 127     }
 128
 129     for (i = 0; i < dm->nyblocks - 1; i++) {
 130         for (j = 0; j < dm->nxblocks - 1; j++) {
 131             int64_t tmp = bdiffs[      i * dm->nxblocks + j    ]
 132                         + bdiffs[      i * dm->nxblocks + j + 1]
 133                         + bdiffs[(i + 1) * dm->nxblocks + j    ]
 134                         + bdiffs[(i + 1) * dm->nxblocks + j + 1];
 135             if (tmp > maxdiff)
 136                 maxdiff = tmp;
 137         }
 138     }
 139
 140     q->totdiff = 0;
 141     for (i = 0; i < dm->bdiffsize; i++)
 142         q->totdiff += bdiffs[i];
 143     q->maxbdiff = maxdiff;
 144 }
 145
 146 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 147 {
 148     int scpos = -1, duppos = -1;
 149     int drop = INT_MIN, i, lowest = 0, ret;
 150     AVFilterContext *ctx  = inlink->dst;
 151     AVFilterLink *outlink = ctx->outputs[0];
 152     DecimateContext *dm   = ctx->priv;
 153     AVFrame *prv;
 154
 155     /* update frames queue(s) */
 156     if (FF_INLINK_IDX(inlink) == INPUT_MAIN) {
 157         dm->queue[dm->fid].frame = in;
 158         dm->got_frame[INPUT_MAIN] = 1;
 159     } else {
 160         dm->clean_src[dm->fid] = in;
 161         dm->got_frame[INPUT_CLEANSRC] = 1;
 162     }
 163     if (!dm->got_frame[INPUT_MAIN] || (dm->ppsrc && !dm->got_frame[INPUT_CLEANSRC]))
 164         return 0;
 165     dm->got_frame[INPUT_MAIN] = dm->got_frame[INPUT_CLEANSRC] = 0;
 166
 167     if (in) {
 168         /* update frame metrics */
 169         prv = dm->fid ? dm->queue[dm->fid - 1].frame : dm->last;
 170         if (!prv)
 171             prv = in;
 172         calc_diffs(dm, &dm->queue[dm->fid], prv, in);
 173         if (++dm->fid != dm->cycle)
 174             return 0;
 175         av_frame_free(&dm->last);
 176         dm->last = av_frame_clone(in);
 177         dm->fid = 0;
 178
 179         /* we have a complete cycle, select the frame to drop */
 180         lowest = 0;
 181         for (i = 0; i < dm->cycle; i++) {
 182             if (dm->queue[i].totdiff > dm->scthresh)
 183                 scpos = i;
 184             if (dm->queue[i].maxbdiff < dm->queue[lowest].maxbdiff)
 185                 lowest = i;
 186         }
 187         if (dm->queue[lowest].maxbdiff < dm->dupthresh)
 188             duppos = lowest;
 189         drop = scpos >= 0 && duppos < 0 ? scpos : lowest;
 190     }
 191
 192     /* metrics debug */
 193     if (av_log_get_level() >= AV_LOG_DEBUG) {
 194         av_log(ctx, AV_LOG_DEBUG, "1/%d frame drop:\n", dm->cycle);
 195         for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
 196             av_log(ctx, AV_LOG_DEBUG,"  #%d: totdiff=%08"PRIx64" maxbdiff=%08"PRIx64"%s%s%s%s\n",
 197                    i + 1, dm->queue[i].totdiff, dm->queue[i].maxbdiff,
 198                    i == scpos  ? " sc"     : "",
 199                    i == duppos ? " dup"    : "",
 200                    i == lowest ? " lowest" : "",
 201                    i == drop   ? " [DROP]" : "");
 202         }
 203     }
 204
 205     /* push all frames except the drop */
 206     ret = 0;
 207     for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
 208         if (i == drop) {
 209             if (dm->ppsrc)
 210                 av_frame_free(&dm->clean_src[i]);
 211             av_frame_free(&dm->queue[i].frame);
 212         } else {
 213             AVFrame *frame = dm->queue[i].frame;
 214             if (frame->pts != AV_NOPTS_VALUE && dm->start_pts == AV_NOPTS_VALUE)
 215                 dm->start_pts = frame->pts;
 216             if (dm->ppsrc) {
 217                 av_frame_free(&frame);
 218                 frame = dm->clean_src[i];
 219             }
 220             frame->pts = outlink->frame_count * dm->ts_unit +
 221                          (dm->start_pts == AV_NOPTS_VALUE ? 0 : dm->start_pts);
 222             ret = ff_filter_frame(outlink, frame);
 223             if (ret < 0)
 224                 break;
 225         }
 226     }
 227
 228     return ret;
 229 }
 230
 231 static int config_input(AVFilterLink *inlink)
 232 {
 233     int max_value;
 234     AVFilterContext *ctx = inlink->dst;
 235     DecimateContext *dm = ctx->priv;
 236     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
 237     const int w = inlink->w;
 238     const int h = inlink->h;
 239
 240     dm->hsub      = pix_desc->log2_chroma_w;
 241     dm->vsub      = pix_desc->log2_chroma_h;
 242     dm->depth     = pix_desc->comp[0].depth_minus1 + 1;
 243     max_value     = (1 << dm->depth) - 1;
 244     dm->scthresh  = (int64_t)(((int64_t)max_value *          w * h          * dm->scthresh_flt)  / 100);
 245     dm->dupthresh = (int64_t)(((int64_t)max_value * dm->blockx * dm->blocky * dm->dupthresh_flt) / 100);
 246     dm->nxblocks  = (w + dm->blockx/2 - 1) / (dm->blockx/2);
 247     dm->nyblocks  = (h + dm->blocky/2 - 1) / (dm->blocky/2);
 248     dm->bdiffsize = dm->nxblocks * dm->nyblocks;
 249     dm->bdiffs    = av_malloc_array(dm->bdiffsize, sizeof(*dm->bdiffs));
 250     dm->queue     = av_calloc(dm->cycle, sizeof(*dm->queue));
 251
 252     if (!dm->bdiffs || !dm->queue)
 253         return AVERROR(ENOMEM);
 254
 255     if (dm->ppsrc) {
 256         dm->clean_src = av_calloc(dm->cycle, sizeof(*dm->clean_src));
 257         if (!dm->clean_src)
 258             return AVERROR(ENOMEM);
 259     }
 260
 261     return 0;
 262 }
 263
 264 static av_cold int decimate_init(AVFilterContext *ctx)
 265 {
 266     DecimateContext *dm = ctx->priv;
 267     AVFilterPad pad = {
 268         .name         = av_strdup("main"),
 269         .type         = AVMEDIA_TYPE_VIDEO,
 270         .filter_frame = filter_frame,
 271         .config_props = config_input,
 272     };
 273
 274     if (!pad.name)
 275         return AVERROR(ENOMEM);
 276     ff_insert_inpad(ctx, INPUT_MAIN, &pad);
 277
 278     if (dm->ppsrc) {
 279         pad.name = av_strdup("clean_src");
 280         pad.config_props = NULL;
 281         if (!pad.name)
 282             return AVERROR(ENOMEM);
 283         ff_insert_inpad(ctx, INPUT_CLEANSRC, &pad);
 284     }
 285
 286     if ((dm->blockx & (dm->blockx - 1)) ||
 287         (dm->blocky & (dm->blocky - 1))) {
 288         av_log(ctx, AV_LOG_ERROR, "blockx and blocky settings must be power of two\n");
 289         return AVERROR(EINVAL);
 290     }
 291
 292     dm->start_pts = AV_NOPTS_VALUE;
 293
 294     return 0;
 295 }
 296
 297 static av_cold void decimate_uninit(AVFilterContext *ctx)
 298 {
 299     int i;
 300     DecimateContext *dm = ctx->priv;
 301
 302     av_frame_free(&dm->last);
 303     av_freep(&dm->bdiffs);
 304     av_freep(&dm->queue);
 305     av_freep(&dm->clean_src);
 306     for (i = 0; i < ctx->nb_inputs; i++)
 307         av_freep(&ctx->input_pads[i].name);
 308 }
 309
 310 static int request_inlink(AVFilterContext *ctx, int lid)
 311 {
 312     int ret = 0;
 313     DecimateContext *dm = ctx->priv;
 314
 315     if (!dm->got_frame[lid]) {
 316         AVFilterLink *inlink = ctx->inputs[lid];
 317         ret = ff_request_frame(inlink);
 318         if (ret == AVERROR_EOF) { // flushing
 319             dm->eof |= 1 << lid;
 320             ret = filter_frame(inlink, NULL);
 321         }
 322     }
 323     return ret;
 324 }
 325
 326 static int request_frame(AVFilterLink *outlink)
 327 {
 328     int ret;
 329     AVFilterContext *ctx = outlink->src;
 330     DecimateContext *dm = ctx->priv;
 331     const uint32_t eof_mask = 1<<INPUT_MAIN | dm->ppsrc<<INPUT_CLEANSRC;
 332
 333     if ((dm->eof & eof_mask) == eof_mask) // flush done?
 334         return AVERROR_EOF;
 335     if ((ret = request_inlink(ctx, INPUT_MAIN)) < 0)
 336         return ret;
 337     if (dm->ppsrc && (ret = request_inlink(ctx, INPUT_CLEANSRC)) < 0)
 338         return ret;
 339     return 0;
 340 }
 341
 342 static int query_formats(AVFilterContext *ctx)
 343 {
 344     static const enum AVPixelFormat pix_fmts[] = {
 345 #define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf,  AV_PIX_FMT_YUV422##suf,  AV_PIX_FMT_YUV444##suf
 346 #define PF_ALPHA(suf)   AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf
 347 #define PF(suf)         PF_NOALPHA(suf), PF_ALPHA(suf)
 348         PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16),
 349         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
 350         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
 351         AV_PIX_FMT_NONE
 352     };
 353     ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 354     return 0;
 355 }
 356
 357 static int config_output(AVFilterLink *outlink)
 358 {
 359     AVFilterContext *ctx = outlink->src;
 360     DecimateContext *dm = ctx->priv;
 361     const AVFilterLink *inlink =
 362         ctx->inputs[dm->ppsrc ? INPUT_CLEANSRC : INPUT_MAIN];
 363     AVRational fps = inlink->frame_rate;
 364
 365     if (!fps.num || !fps.den) {
 366         av_log(ctx, AV_LOG_ERROR, "The input needs a constant frame rate; "
 367                "current rate of %d/%d is invalid\n", fps.num, fps.den);
 368         return AVERROR(EINVAL);
 369     }
 370     fps = av_mul_q(fps, (AVRational){dm->cycle - 1, dm->cycle});
 371     av_log(ctx, AV_LOG_VERBOSE, "FPS: %d/%d -> %d/%d\n",
 372            inlink->frame_rate.num, inlink->frame_rate.den, fps.num, fps.den);
 373     outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
 374     outlink->time_base  = inlink->time_base;
 375     outlink->frame_rate = fps;
 376     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 377     outlink->w = inlink->w;
 378     outlink->h = inlink->h;
 379     dm->ts_unit = av_q2d(av_inv_q(av_mul_q(fps, outlink->time_base)));
 380     return 0;
 381 }
 382
 383 static const AVFilterPad decimate_outputs[] = {
 384     {
 385         .name          = "default",
 386         .type          = AVMEDIA_TYPE_VIDEO,
 387         .request_frame = request_frame,
 388         .config_props  = config_output,
 389     },
 390     { NULL }
 391 };
 392
 393 AVFilter ff_vf_decimate = {
 394     .name          = "decimate",
 395     .description   = NULL_IF_CONFIG_SMALL("Decimate frames (post field matching filter)."),
 396     .init          = decimate_init,
 397     .uninit        = decimate_uninit,
 398     .priv_size     = sizeof(DecimateContext),
 399     .query_formats = query_formats,
 400     .outputs       = decimate_outputs,
 401     .priv_class    = &decimate_class,
 402     .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
 403 };