2 * Copyright (C) 2010 Georg Martius <georg.martius@web.de>
3 * Copyright (C) 2010 Daniel G. Taylor <dan@programmer-art.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * fast deshake / depan video filter
26 * SAD block-matching motion compensation to fix small changes in
27 * horizontal and/or vertical shift. This filter helps remove camera shake
28 * from hand-holding a camera, bumping a tripod, moving on a vehicle, etc.
31 * - For each frame with one previous reference frame
32 * - For each block in the frame
33 * - If contrast > threshold then find likely motion vector
34 * - For all found motion vectors
35 * - Find most common, store as global motion vector
36 * - Find most likely rotation angle
37 * - Transform image along global motion
40 * - Fill frame edges based on previous/next reference frames
41 * - Fill frame edges by stretching image near the edges?
42 * - Can this be done quickly and look decent?
44 * Dark Shikari links to http://wiki.videolan.org/SoC_x264_2010#GPU_Motion_Estimation_2
45 * for an algorithm similar to what could be used here to get the gmv
46 * It requires only a couple diamond searches + fast downscaling
48 * Special thanks to Jason Kotenko for his help with the algorithm and my
49 * inability to see simple errors in C code.
56 #include "libavutil/common.h"
57 #include "libavutil/mem.h"
58 #include "libavutil/opt.h"
59 #include "libavutil/pixdesc.h"
62 #include "deshake_opencl.h"
64 #define CHROMA_WIDTH(link) (-((-(link)->w) >> av_pix_fmt_desc_get((link)->format)->log2_chroma_w))
65 #define CHROMA_HEIGHT(link) (-((-(link)->h) >> av_pix_fmt_desc_get((link)->format)->log2_chroma_h))
67 #define OFFSET(x) offsetof(DeshakeContext, x)
68 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
70 static const AVOption deshake_options
[] = {
71 { "x", "set x for the rectangular search area", OFFSET(cx
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, INT_MAX
, .flags
= FLAGS
},
72 { "y", "set y for the rectangular search area", OFFSET(cy
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, INT_MAX
, .flags
= FLAGS
},
73 { "w", "set width for the rectangular search area", OFFSET(cw
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, INT_MAX
, .flags
= FLAGS
},
74 { "h", "set height for the rectangular search area", OFFSET(ch
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, INT_MAX
, .flags
= FLAGS
},
75 { "rx", "set x for the rectangular search area", OFFSET(rx
), AV_OPT_TYPE_INT
, {.i64
=16}, 0, MAX_R
, .flags
= FLAGS
},
76 { "ry", "set y for the rectangular search area", OFFSET(ry
), AV_OPT_TYPE_INT
, {.i64
=16}, 0, MAX_R
, .flags
= FLAGS
},
77 { "edge", "set edge mode", OFFSET(edge
), AV_OPT_TYPE_INT
, {.i64
=FILL_MIRROR
}, FILL_BLANK
, FILL_COUNT
-1, FLAGS
, "edge"},
78 { "blank", "fill zeroes at blank locations", 0, AV_OPT_TYPE_CONST
, {.i64
=FILL_BLANK
}, INT_MIN
, INT_MAX
, FLAGS
, "edge" },
79 { "original", "original image at blank locations", 0, AV_OPT_TYPE_CONST
, {.i64
=FILL_ORIGINAL
}, INT_MIN
, INT_MAX
, FLAGS
, "edge" },
80 { "clamp", "extruded edge value at blank locations", 0, AV_OPT_TYPE_CONST
, {.i64
=FILL_CLAMP
}, INT_MIN
, INT_MAX
, FLAGS
, "edge" },
81 { "mirror", "mirrored edge at blank locations", 0, AV_OPT_TYPE_CONST
, {.i64
=FILL_MIRROR
}, INT_MIN
, INT_MAX
, FLAGS
, "edge" },
82 { "blocksize", "set motion search blocksize", OFFSET(blocksize
), AV_OPT_TYPE_INT
, {.i64
=8}, 4, 128, .flags
= FLAGS
},
83 { "contrast", "set contrast threshold for blocks", OFFSET(contrast
), AV_OPT_TYPE_INT
, {.i64
=125}, 1, 255, .flags
= FLAGS
},
84 { "search", "set search strategy", OFFSET(search
), AV_OPT_TYPE_INT
, {.i64
=EXHAUSTIVE
}, EXHAUSTIVE
, SEARCH_COUNT
-1, FLAGS
, "smode" },
85 { "exhaustive", "exhaustive search", 0, AV_OPT_TYPE_CONST
, {.i64
=EXHAUSTIVE
}, INT_MIN
, INT_MAX
, FLAGS
, "smode" },
86 { "less", "less exhaustive search", 0, AV_OPT_TYPE_CONST
, {.i64
=SMART_EXHAUSTIVE
}, INT_MIN
, INT_MAX
, FLAGS
, "smode" },
87 { "filename", "set motion search detailed log file name", OFFSET(filename
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
88 { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl
), AV_OPT_TYPE_INT
, {.i64
=0}, 0, 1, .flags
= FLAGS
},
92 AVFILTER_DEFINE_CLASS(deshake
);
94 static int cmp(const double *a
, const double *b
)
96 return *a
< *b
? -1 : ( *a
> *b
? 1 : 0 );
100 * Cleaned mean (cuts off 20% of values to remove outliers and then averages)
102 static double clean_mean(double *values
, int count
)
108 qsort(values
, count
, sizeof(double), (void*)cmp
);
110 for (x
= cut
; x
< count
- cut
; x
++) {
114 return mean
/ (count
- cut
* 2);
118 * Find the most likely shift in motion between two frames for a given
119 * macroblock. Test each block against several shifts given by the rx
120 * and ry attributes. Searches using a simple matrix of those shifts and
121 * chooses the most likely shift by the smallest difference in blocks.
123 static void find_block_motion(DeshakeContext
*deshake
, uint8_t *src1
,
124 uint8_t *src2
, int cx
, int cy
, int stride
,
129 int smallest
= INT_MAX
;
132 #define CMP(i, j) deshake->sad(src1 + cy * stride + cx, stride,\
133 src2 + (j) * stride + (i), stride)
135 if (deshake
->search
== EXHAUSTIVE
) {
136 // Compare every possible position - this is sloooow!
137 for (y
= -deshake
->ry
; y
<= deshake
->ry
; y
++) {
138 for (x
= -deshake
->rx
; x
<= deshake
->rx
; x
++) {
139 diff
= CMP(cx
- x
, cy
- y
);
140 if (diff
< smallest
) {
147 } else if (deshake
->search
== SMART_EXHAUSTIVE
) {
148 // Compare every other possible position and find the best match
149 for (y
= -deshake
->ry
+ 1; y
< deshake
->ry
; y
+= 2) {
150 for (x
= -deshake
->rx
+ 1; x
< deshake
->rx
; x
+= 2) {
151 diff
= CMP(cx
- x
, cy
- y
);
152 if (diff
< smallest
) {
160 // Hone in on the specific best match around the match we found above
164 for (y
= tmp2
- 1; y
<= tmp2
+ 1; y
++) {
165 for (x
= tmp
- 1; x
<= tmp
+ 1; x
++) {
166 if (x
== tmp
&& y
== tmp2
)
169 diff
= CMP(cx
- x
, cy
- y
);
170 if (diff
< smallest
) {
179 if (smallest
> 512) {
184 //av_log(NULL, AV_LOG_ERROR, "%d\n", smallest);
185 //av_log(NULL, AV_LOG_ERROR, "Final: (%d, %d) = %d x %d\n", cx, cy, mv->x, mv->y);
189 * Find the contrast of a given block. When searching for global motion we
190 * really only care about the high contrast blocks, so using this method we
191 * can actually skip blocks we don't care much about.
193 static int block_contrast(uint8_t *src
, int x
, int y
, int stride
, int blocksize
)
199 for (i
= 0; i
<= blocksize
* 2; i
++) {
200 // We use a width of 16 here to match the sad function
201 for (j
= 0; j
<= 15; j
++) {
202 pos
= (y
- i
) * stride
+ (x
- j
);
203 if (src
[pos
] < lowest
)
205 else if (src
[pos
] > highest
) {
211 return highest
- lowest
;
215 * Find the rotation for a given block.
217 static double block_angle(int x
, int y
, int cx
, int cy
, IntMotionVector
*shift
)
221 a1
= atan2(y
- cy
, x
- cx
);
222 a2
= atan2(y
- cy
+ shift
->y
, x
- cx
+ shift
->x
);
226 return (diff
> M_PI
) ? diff
- 2 * M_PI
:
227 (diff
< -M_PI
) ? diff
+ 2 * M_PI
:
232 * Find the estimated global motion for a scene given the most likely shift
233 * for each block in the frame. The global motion is estimated to be the
234 * same as the motion from most blocks in the frame, so if most blocks
235 * move one pixel to the right and two pixels down, this would yield a
236 * motion vector (1, -2).
238 static void find_motion(DeshakeContext
*deshake
, uint8_t *src1
, uint8_t *src2
,
239 int width
, int height
, int stride
, Transform
*t
)
242 IntMotionVector mv
= {0, 0};
243 int count_max_value
= 0;
247 int center_x
= 0, center_y
= 0;
250 av_fast_malloc(&deshake
->angles
, &deshake
->angles_size
, width
* height
/ (16 * deshake
->blocksize
) * sizeof(*deshake
->angles
));
252 // Reset counts to zero
253 for (x
= 0; x
< deshake
->rx
* 2 + 1; x
++) {
254 for (y
= 0; y
< deshake
->ry
* 2 + 1; y
++) {
255 deshake
->counts
[x
][y
] = 0;
260 // Find motion for every block and store the motion vector in the counts
261 for (y
= deshake
->ry
; y
< height
- deshake
->ry
- (deshake
->blocksize
* 2); y
+= deshake
->blocksize
* 2) {
262 // We use a width of 16 here to match the sad function
263 for (x
= deshake
->rx
; x
< width
- deshake
->rx
- 16; x
+= 16) {
264 // If the contrast is too low, just skip this block as it probably
265 // won't be very useful to us.
266 contrast
= block_contrast(src2
, x
, y
, stride
, deshake
->blocksize
);
267 if (contrast
> deshake
->contrast
) {
268 //av_log(NULL, AV_LOG_ERROR, "%d\n", contrast);
269 find_block_motion(deshake
, src1
, src2
, x
, y
, stride
, &mv
);
270 if (mv
.x
!= -1 && mv
.y
!= -1) {
271 deshake
->counts
[mv
.x
+ deshake
->rx
][mv
.y
+ deshake
->ry
] += 1;
272 if (x
> deshake
->rx
&& y
> deshake
->ry
)
273 deshake
->angles
[pos
++] = block_angle(x
, y
, 0, 0, &mv
);
285 t
->angle
= clean_mean(deshake
->angles
, pos
);
286 if (t
->angle
< 0.001)
292 // Find the most common motion vector in the frame and use it as the gmv
293 for (y
= deshake
->ry
* 2; y
>= 0; y
--) {
294 for (x
= 0; x
< deshake
->rx
* 2 + 1; x
++) {
295 //av_log(NULL, AV_LOG_ERROR, "%5d ", deshake->counts[x][y]);
296 if (deshake
->counts
[x
][y
] > count_max_value
) {
297 t
->vec
.x
= x
- deshake
->rx
;
298 t
->vec
.y
= y
- deshake
->ry
;
299 count_max_value
= deshake
->counts
[x
][y
];
302 //av_log(NULL, AV_LOG_ERROR, "\n");
305 p_x
= (center_x
- width
/ 2.0);
306 p_y
= (center_y
- height
/ 2.0);
307 t
->vec
.x
+= (cos(t
->angle
)-1)*p_x
- sin(t
->angle
)*p_y
;
308 t
->vec
.y
+= sin(t
->angle
)*p_x
+ (cos(t
->angle
)-1)*p_y
;
310 // Clamp max shift & rotation?
311 t
->vec
.x
= av_clipf(t
->vec
.x
, -deshake
->rx
* 2, deshake
->rx
* 2);
312 t
->vec
.y
= av_clipf(t
->vec
.y
, -deshake
->ry
* 2, deshake
->ry
* 2);
313 t
->angle
= av_clipf(t
->angle
, -0.1, 0.1);
315 //av_log(NULL, AV_LOG_ERROR, "%d x %d\n", avg->x, avg->y);
318 static int deshake_transform_c(AVFilterContext
*ctx
,
319 int width
, int height
, int cw
, int ch
,
320 const float *matrix_y
, const float *matrix_uv
,
321 enum InterpolateMethod interpolate
,
322 enum FillMethod fill
, AVFrame
*in
, AVFrame
*out
)
325 const float *matrixs
[3];
326 int plane_w
[3], plane_h
[3];
327 matrixs
[0] = matrix_y
;
328 matrixs
[1] = matrixs
[2] = matrix_uv
;
330 plane_w
[1] = plane_w
[2] = cw
;
332 plane_h
[1] = plane_h
[2] = ch
;
334 for (i
= 0; i
< 3; i
++) {
335 // Transform the luma and chroma planes
336 ret
= avfilter_transform(in
->data
[i
], out
->data
[i
], in
->linesize
[i
], out
->linesize
[i
],
337 plane_w
[i
], plane_h
[i
], matrixs
[i
], interpolate
, fill
);
344 static av_cold
int init(AVFilterContext
*ctx
)
347 DeshakeContext
*deshake
= ctx
->priv
;
349 deshake
->sad
= av_pixelutils_get_sad_fn(4, 4, 1, deshake
); // 16x16, 2nd source unaligned
351 return AVERROR(EINVAL
);
353 deshake
->refcount
= 20; // XXX: add to options?
354 deshake
->blocksize
/= 2;
355 deshake
->blocksize
= av_clip(deshake
->blocksize
, 4, 128);
357 if (deshake
->rx
% 16) {
358 av_log(ctx
, AV_LOG_ERROR
, "rx must be a multiple of 16\n");
359 return AVERROR_PATCHWELCOME
;
362 if (deshake
->filename
)
363 deshake
->fp
= fopen(deshake
->filename
, "w");
365 fwrite("Ori x, Avg x, Fin x, Ori y, Avg y, Fin y, Ori angle, Avg angle, Fin angle, Ori zoom, Avg zoom, Fin zoom\n", sizeof(char), 104, deshake
->fp
);
367 // Quadword align left edge of box for MMX code, adjust width if necessary
368 // to keep right margin
369 if (deshake
->cx
> 0) {
370 deshake
->cw
+= deshake
->cx
- (deshake
->cx
& ~15);
373 deshake
->transform
= deshake_transform_c
;
374 if (!CONFIG_OPENCL
&& deshake
->opencl
) {
375 av_log(ctx
, AV_LOG_ERROR
, "OpenCL support was not enabled in this build, cannot be selected\n");
376 return AVERROR(EINVAL
);
379 if (CONFIG_OPENCL
&& deshake
->opencl
) {
380 deshake
->transform
= ff_opencl_transform
;
381 ret
= ff_opencl_deshake_init(ctx
);
385 av_log(ctx
, AV_LOG_VERBOSE
, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
386 deshake
->cx
, deshake
->cy
, deshake
->cw
, deshake
->ch
,
387 deshake
->rx
, deshake
->ry
, deshake
->edge
, deshake
->blocksize
* 2, deshake
->contrast
, deshake
->search
);
392 static int query_formats(AVFilterContext
*ctx
)
394 static const enum AVPixelFormat pix_fmts
[] = {
395 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
, AV_PIX_FMT_YUV444P
, AV_PIX_FMT_YUV410P
,
396 AV_PIX_FMT_YUV411P
, AV_PIX_FMT_YUV440P
, AV_PIX_FMT_YUVJ420P
, AV_PIX_FMT_YUVJ422P
,
397 AV_PIX_FMT_YUVJ444P
, AV_PIX_FMT_YUVJ440P
, AV_PIX_FMT_NONE
400 ff_set_common_formats(ctx
, ff_make_format_list(pix_fmts
));
405 static int config_props(AVFilterLink
*link
)
407 DeshakeContext
*deshake
= link
->dst
->priv
;
410 deshake
->last
.vec
.x
= 0;
411 deshake
->last
.vec
.y
= 0;
412 deshake
->last
.angle
= 0;
413 deshake
->last
.zoom
= 0;
418 static av_cold
void uninit(AVFilterContext
*ctx
)
420 DeshakeContext
*deshake
= ctx
->priv
;
421 if (CONFIG_OPENCL
&& deshake
->opencl
) {
422 ff_opencl_deshake_uninit(ctx
);
424 av_frame_free(&deshake
->ref
);
425 av_freep(&deshake
->angles
);
426 deshake
->angles_size
= 0;
431 static int filter_frame(AVFilterLink
*link
, AVFrame
*in
)
433 DeshakeContext
*deshake
= link
->dst
->priv
;
434 AVFilterLink
*outlink
= link
->dst
->outputs
[0];
436 Transform t
= {{0},0}, orig
= {{0},0};
437 float matrix_y
[9], matrix_uv
[9];
438 float alpha
= 2.0 / deshake
->refcount
;
442 out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
445 return AVERROR(ENOMEM
);
447 av_frame_copy_props(out
, in
);
449 if (CONFIG_OPENCL
&& deshake
->opencl
) {
450 ret
= ff_opencl_deshake_process_inout_buf(link
->dst
,in
, out
);
455 if (deshake
->cx
< 0 || deshake
->cy
< 0 || deshake
->cw
< 0 || deshake
->ch
< 0) {
456 // Find the most likely global motion for the current frame
457 find_motion(deshake
, (deshake
->ref
== NULL
) ? in
->data
[0] : deshake
->ref
->data
[0], in
->data
[0], link
->w
, link
->h
, in
->linesize
[0], &t
);
459 uint8_t *src1
= (deshake
->ref
== NULL
) ? in
->data
[0] : deshake
->ref
->data
[0];
460 uint8_t *src2
= in
->data
[0];
462 deshake
->cx
= FFMIN(deshake
->cx
, link
->w
);
463 deshake
->cy
= FFMIN(deshake
->cy
, link
->h
);
465 if ((unsigned)deshake
->cx
+ (unsigned)deshake
->cw
> link
->w
) deshake
->cw
= link
->w
- deshake
->cx
;
466 if ((unsigned)deshake
->cy
+ (unsigned)deshake
->ch
> link
->h
) deshake
->ch
= link
->h
- deshake
->cy
;
468 // Quadword align right margin
471 src1
+= deshake
->cy
* in
->linesize
[0] + deshake
->cx
;
472 src2
+= deshake
->cy
* in
->linesize
[0] + deshake
->cx
;
474 find_motion(deshake
, src1
, src2
, deshake
->cw
, deshake
->ch
, in
->linesize
[0], &t
);
478 // Copy transform so we can output it later to compare to the smoothed value
479 orig
.vec
.x
= t
.vec
.x
;
480 orig
.vec
.y
= t
.vec
.y
;
481 orig
.angle
= t
.angle
;
484 // Generate a one-sided moving exponential average
485 deshake
->avg
.vec
.x
= alpha
* t
.vec
.x
+ (1.0 - alpha
) * deshake
->avg
.vec
.x
;
486 deshake
->avg
.vec
.y
= alpha
* t
.vec
.y
+ (1.0 - alpha
) * deshake
->avg
.vec
.y
;
487 deshake
->avg
.angle
= alpha
* t
.angle
+ (1.0 - alpha
) * deshake
->avg
.angle
;
488 deshake
->avg
.zoom
= alpha
* t
.zoom
+ (1.0 - alpha
) * deshake
->avg
.zoom
;
490 // Remove the average from the current motion to detect the motion that
491 // is not on purpose, just as jitter from bumping the camera
492 t
.vec
.x
-= deshake
->avg
.vec
.x
;
493 t
.vec
.y
-= deshake
->avg
.vec
.y
;
494 t
.angle
-= deshake
->avg
.angle
;
495 t
.zoom
-= deshake
->avg
.zoom
;
497 // Invert the motion to undo it
502 // Write statistics to file
504 snprintf(tmp
, 256, "%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", orig
.vec
.x
, deshake
->avg
.vec
.x
, t
.vec
.x
, orig
.vec
.y
, deshake
->avg
.vec
.y
, t
.vec
.y
, orig
.angle
, deshake
->avg
.angle
, t
.angle
, orig
.zoom
, deshake
->avg
.zoom
, t
.zoom
);
505 fwrite(tmp
, sizeof(char), strlen(tmp
), deshake
->fp
);
508 // Turn relative current frame motion into absolute by adding it to the
509 // last absolute motion
510 t
.vec
.x
+= deshake
->last
.vec
.x
;
511 t
.vec
.y
+= deshake
->last
.vec
.y
;
512 t
.angle
+= deshake
->last
.angle
;
513 t
.zoom
+= deshake
->last
.zoom
;
515 // Shrink motion by 10% to keep things centered in the camera frame
520 // Store the last absolute motion information
521 deshake
->last
.vec
.x
= t
.vec
.x
;
522 deshake
->last
.vec
.y
= t
.vec
.y
;
523 deshake
->last
.angle
= t
.angle
;
524 deshake
->last
.zoom
= t
.zoom
;
526 // Generate a luma transformation matrix
527 avfilter_get_matrix(t
.vec
.x
, t
.vec
.y
, t
.angle
, 1.0 + t
.zoom
/ 100.0, matrix_y
);
528 // Generate a chroma transformation matrix
529 avfilter_get_matrix(t
.vec
.x
/ (link
->w
/ CHROMA_WIDTH(link
)), t
.vec
.y
/ (link
->h
/ CHROMA_HEIGHT(link
)), t
.angle
, 1.0 + t
.zoom
/ 100.0, matrix_uv
);
530 // Transform the luma and chroma planes
531 ret
= deshake
->transform(link
->dst
, link
->w
, link
->h
, CHROMA_WIDTH(link
), CHROMA_HEIGHT(link
),
532 matrix_y
, matrix_uv
, INTERPOLATE_BILINEAR
, deshake
->edge
, in
, out
);
534 // Cleanup the old reference frame
535 av_frame_free(&deshake
->ref
);
540 // Store the current frame as the reference frame for calculating the
541 // motion of the next frame
544 return ff_filter_frame(outlink
, out
);
547 static const AVFilterPad deshake_inputs
[] = {
550 .type
= AVMEDIA_TYPE_VIDEO
,
551 .filter_frame
= filter_frame
,
552 .config_props
= config_props
,
557 static const AVFilterPad deshake_outputs
[] = {
560 .type
= AVMEDIA_TYPE_VIDEO
,
565 AVFilter ff_vf_deshake
= {
567 .description
= NULL_IF_CONFIG_SMALL("Stabilize shaky video."),
568 .priv_size
= sizeof(DeshakeContext
),
571 .query_formats
= query_formats
,
572 .inputs
= deshake_inputs
,
573 .outputs
= deshake_outputs
,
574 .priv_class
= &deshake_class
,