src/transform.c

   1 /*
   2  *  transform.c
   3  *
   4  *  Copyright (C) Georg Martius - June 2007 - 2011
   5  *   georg dot martius at web dot de
   6  *
   7  *  This file is part of vid.stab video stabilization library
   8  *
   9  *  vid.stab is free software; you can redistribute it and/or modify
  10  *  it under the terms of the GNU General Public License,
  11  *  as published by the Free Software Foundation; either version 2, or
  12  *  (at your option) any later version.
  13  *
  14  *  vid.stab is distributed in the hope that it will be useful,
  15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  *  GNU General Public License for more details.
  18  *
  19  *  You should have received a copy of the GNU General Public License
  20  *  along with GNU Make; see the file COPYING.  If not, write to
  21  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  22  *
  23  */
  24
  25 #include "transform.h"
  26 #include "transform_internal.h"
  27 #include "transformtype_operations.h"
  28
  29 #include "transformfixedpoint.h"
  30 #ifdef TESTING
  31 #include "transformfloat.h"
  32 #endif
  33
  34 #include <math.h>
  35 #include <libgen.h>
  36 #include <string.h>
  37
  38 const char* interpol_type_names[5] = {"No (0)", "Linear (1)", "Bi-Linear (2)",
  39                                       "Bi-Cubic (3)"};
  40
  41 const char* getInterpolationTypeName(VSInterpolType type){
  42   if (type >= VS_Zero && type < VS_NBInterPolTypes)
  43     return interpol_type_names[(int) type];
  44   else
  45     return "unknown";
  46 }
  47
  48 // default initialization: attention the ffmpeg filter cannot call it
  49 VSTransformConfig vsTransformGetDefaultConfig(const char* modName){
  50   VSTransformConfig conf;
  51   /* Options */
  52   conf.maxShift           = -1;
  53   conf.maxAngle           = -1;
  54   conf.crop               = VSKeepBorder;
  55   conf.relative           = 1;
  56   conf.invert             = 0;
  57   conf.smoothing          = 15;
  58   conf.zoom               = 0;
  59   conf.optZoom            = 1;
  60   conf.zoomSpeed          = 0.25;
  61   conf.interpolType       = VS_BiLinear;
  62   conf.verbose            = 0;
  63   conf.modName            = modName;
  64   conf.simpleMotionCalculation = 0;
  65   conf.storeTransforms    = 0;
  66   conf.smoothZoom         = 0;
  67   conf.camPathAlgo        = VSOptimalL1;
  68   return conf;
  69 }
  70
  71 void vsTransformGetConfig(VSTransformConfig* conf, const VSTransformData* td){
  72   if(td && conf)
  73     *conf = td->conf;
  74 }
  75
  76 const VSFrameInfo* vsTransformGetSrcFrameInfo(const VSTransformData* td){
  77   return &td->fiSrc;
  78 }
  79
  80 const VSFrameInfo* vsTransformGetDestFrameInfo(const VSTransformData* td){
  81   return &td->fiDest;
  82 }
  83
  84
  85 int vsTransformDataInit(VSTransformData* td, const VSTransformConfig* conf,
  86                         const VSFrameInfo* fi_src, const VSFrameInfo* fi_dest){
  87   td->conf = *conf;
  88
  89   td->fiSrc = *fi_src;
  90   td->fiDest = *fi_dest;
  91
  92   vsFrameNull(&td->src);
  93   td->srcMalloced = 0;
  94
  95   vsFrameNull(&td->destbuf);
  96   vsFrameNull(&td->dest);
  97
  98   if (td->conf.maxShift > td->fiDest.width/2)
  99     td->conf.maxShift = td->fiDest.width/2;
 100   if (td->conf.maxShift > td->fiDest.height/2)
 101     td->conf.maxShift = td->fiDest.height/2;
 102
 103   td->conf.interpolType = VS_MAX(VS_MIN(td->conf.interpolType,VS_BiCubic),VS_Zero);
 104
 105   // not yet implemented
 106   if(td->conf.camPathAlgo==VSOptimalL1) td->conf.camPathAlgo=VSGaussian;
 107
 108   switch(td->conf.interpolType){
 109    case VS_Zero:     td->interpolate = &interpolateZero; break;
 110    case VS_Linear:   td->interpolate = &interpolateLin; break;
 111    case VS_BiLinear: td->interpolate = &interpolateBiLin; break;
 112    case VS_BiCubic:  td->interpolate = &interpolateBiCub; break;
 113    default: td->interpolate = &interpolateBiLin;
 114   }
 115 #ifdef TESTING
 116   switch(td->conf.interpolType){
 117    case VS_Zero:     td->_FLT(interpolate) = &_FLT(interpolateZero); break;
 118    case VS_Linear:   td->_FLT(interpolate) = &_FLT(interpolateLin); break;
 119    case VS_BiLinear: td->_FLT(interpolate) = &_FLT(interpolateBiLin); break;
 120    case VS_BiCubic:  td->_FLT(interpolate) = &_FLT(interpolateBiCub); break;
 121    default: td->_FLT(interpolate)          = &_FLT(interpolateBiLin);
 122   }
 123
 124 #endif
 125   return VS_OK;
 126 }
 127
 128 void vsTransformDataCleanup(VSTransformData* td){
 129   if (td->srcMalloced && !vsFrameIsNull(&td->src)) {
 130     vsFrameFree(&td->src);
 131   }
 132   if (td->conf.crop == VSKeepBorder && !vsFrameIsNull(&td->destbuf)) {
 133     vsFrameFree(&td->destbuf);
 134   }
 135 }
 136
 137 int vsTransformPrepare(VSTransformData* td, const VSFrame* src, VSFrame* dest){
 138   // we first copy the frame to td->src and then overwrite the destination
 139   // with the transformed version
 140   td->dest = *dest;
 141   if(src==dest || td->srcMalloced){ // in place operation: we have to copy the src first
 142     if(vsFrameIsNull(&td->src)) {
 143       vsFrameAllocate(&td->src,&td->fiSrc);
 144       td->srcMalloced = 1;
 145     }
 146     if (vsFrameIsNull(&td->src)) {
 147       vs_log_error(td->conf.modName, "vs_malloc failed\n");
 148       return VS_ERROR;
 149     }
 150     vsFrameCopy(&td->src, src, &td->fiSrc);
 151   }else{ // otherwise no copy needed
 152     td->src=*src;
 153   }
 154   if (td->conf.crop == VSKeepBorder) {
 155     if(vsFrameIsNull(&td->destbuf)) {
 156       // if we keep the borders, we need a second buffer to store
 157       //  the previous stabilized frame, so we use destbuf
 158       vsFrameAllocate(&td->destbuf,&td->fiDest);
 159       if (vsFrameIsNull(&td->destbuf)) {
 160         vs_log_error(td->conf.modName, "vs_malloc failed\n");
 161         return VS_ERROR;
 162       }
 163       // if we keep borders, save first frame into the background buffer (destbuf)
 164       vsFrameCopy(&td->destbuf, src, &td->fiSrc); // here we have to take care
 165     }
 166   }else{ // otherwise we directly operate on the destination
 167     td->destbuf = *dest;
 168   }
 169   return VS_OK;
 170 }
 171
 172 int vsDoTransform(VSTransformData* td, VSTransform t){
 173   if (td->fiSrc.pFormat < PF_PACKED)
 174     return transformPlanar(td, t);
 175   else
 176     return transformPacked(td, t);
 177 }
 178
 179
 180 int vsTransformFinish(VSTransformData* td){
 181   if(td->conf.crop == VSKeepBorder){
 182     // we have to store our result to video buffer
 183     // note: destbuf stores stabilized frame to be the default for next frame
 184     vsFrameCopy(&td->dest, &td->destbuf, &td->fiSrc);
 185   }
 186   return VS_OK;
 187 }
 188
 189
 190 VSTransform vsGetNextTransform(const VSTransformData* td, VSTransformations* trans){
 191   if(trans->len <=0 ) return null_transform();
 192   if (trans->current >= trans->len) {
 193     trans->current = trans->len;
 194     if(!trans->warned_end)
 195       vs_log_warn(td->conf.modName, "not enough transforms found, use last transformation!\n");
 196     trans->warned_end = 1;
 197   }else{
 198     trans->current++;
 199   }
 200   return trans->ts[trans->current-1];
 201 }
 202
 203 void vsTransformationsInit(VSTransformations* trans){
 204   trans->ts = 0;
 205   trans->len = 0;
 206   trans->current = 0;
 207   trans->warned_end = 0;
 208 }
 209
 210 void vsTransformationsCleanup(VSTransformations* trans){
 211   if (trans->ts) {
 212     vs_free(trans->ts);
 213     trans->ts = NULL;
 214   }
 215   trans->len=0;
 216 }
 217
 218 /*
 219  *  This is actually the core algorithm for canceling the jiggle in the
 220  *  movie. We have different implementations which are patched here.
 221  */
 222 int cameraPathOptimization(VSTransformData* td, VSTransformations* trans){
 223   switch(td->conf.camPathAlgo){
 224    case VSAvg: return cameraPathAvg(td,trans);
 225    case VSOptimalL1: // not yet implenented
 226    case VSGaussian: return cameraPathGaussian(td,trans);
 227 //   case VSOptimalL1: return cameraPathOptimalL1(td,trans);
 228   }
 229   return VS_ERROR;
 230 }
 231
 232 /*
 233  *  We perform a low-pass filter on the camera path.
 234  *  This supports slow camera movemen, but in a smooth fasion.
 235  *  Here we use gaussian filter (gaussian kernel) lowpass filter
 236  */
 237 int cameraPathGaussian(VSTransformData* td, VSTransformations* trans){
 238   VSTransform* ts = trans->ts;
 239   if (trans->len < 1)
 240     return VS_ERROR;
 241   if (td->conf.verbose & VS_DEBUG) {
 242     vs_log_msg(td->conf.modName, "Preprocess transforms:");
 243   }
 244
 245   /* relative to absolute (integrate transformations) */
 246   if (td->conf.relative) {
 247     VSTransform t = ts[0];
 248     for (int i = 1; i < trans->len; i++) {
 249       ts[i] = add_transforms(&ts[i], &t);
 250       t = ts[i];
 251     }
 252   }
 253
 254   if (td->conf.smoothing>0) {
 255     VSTransform* ts2 = vs_malloc(sizeof(VSTransform) * trans->len);
 256     memcpy(ts2, ts, sizeof(VSTransform) * trans->len);
 257     int s = td->conf.smoothing * 2 + 1;
 258     VSArray kernel = vs_array_new(s);
 259     // initialize gaussian kernel
 260     int mu        = td->conf.smoothing;
 261     double sigma2 = sqr(mu/2.0);
 262     for(int i=0; i<=mu; i++){
 263       kernel.dat[i] = kernel.dat[s-i-1] = exp(-sqr(i-mu)/sigma2);
 264     }
 265     // vs_array_print(kernel, stdout);
 266
 267     for (int i = 0; i < trans->len; i++) {
 268       // make a convolution:
 269       double weightsum=0;
 270       VSTransform avg = null_transform();
 271       for(int k=0; k<s; k++){
 272         int idx = i+k-mu;
 273         if(idx>=0 && idx<trans->len){
 274           if(unlikely(0 && ts2[idx].extra==1)){ // deal with scene cuts or bad frames
 275             if(k<mu) { // in the past of our frame: ignore everthing before
 276               avg=null_transform();
 277               weightsum=0;
 278               continue;
 279             }else{           //current frame or in future: stop here
 280               if(k==mu)      //for current frame: ignore completely
 281                 weightsum=0;
 282               break;
 283             }
 284           }
 285           weightsum+=kernel.dat[k];
 286           avg=add_transforms_(avg, mult_transform(&ts2[idx], kernel.dat[k]));
 287         }
 288       }
 289       if(weightsum>0){
 290         avg = mult_transform(&avg, 1.0/weightsum);
 291
 292         // high frequency must be transformed away
 293         ts[i] = sub_transforms(&ts[i], &avg);
 294       }
 295       if (td->conf.verbose & VS_DEBUG) {
 296         vs_log_msg(td->conf.modName,
 297                    " avg: %5lf, %5lf, %5lf extra: %i weightsum %5lf",
 298                    avg.x, avg.y, avg.alpha, ts[i].extra, weightsum
 299                   );
 300       }
 301     }
 302   }
 303   return VS_OK;
 304 }
 305
 306 /*
 307  *  We perform a low-pass filter in terms of transformations.
 308  *  This supports slow camera movement (low frequency), but in a smooth fasion.
 309  *  Here a simple average based filter
 310  */
 311 int cameraPathAvg(VSTransformData* td, VSTransformations* trans){
 312   VSTransform* ts = trans->ts;
 313
 314   if (trans->len < 1)
 315     return VS_ERROR;
 316   if (td->conf.verbose & VS_DEBUG) {
 317    vs_log_msg(td->conf.modName, "Preprocess transforms:");
 318   }
 319   if (td->conf.smoothing>0) {
 320     /* smoothing */
 321     VSTransform* ts2 = vs_malloc(sizeof(VSTransform) * trans->len);
 322     memcpy(ts2, ts, sizeof(VSTransform) * trans->len);
 323
 324     /*  we will do a sliding average with minimal update
 325      *   \hat x_{n/2} = x_1+x_2 + .. + x_n
 326      *   \hat x_{n/2+1} = x_2+x_3 + .. + x_{n+1} = x_{n/2} - x_1 + x_{n+1}
 327      *   avg = \hat x / n
 328      */
 329     int s = td->conf.smoothing * 2 + 1;
 330     VSTransform null = null_transform();
 331     /* avg is the average over [-smoothing, smoothing] transforms
 332        around the current point */
 333     VSTransform avg;
 334     /* avg2 is a sliding average over the filtered signal! (only to past)
 335      *  with smoothing * 2 horizon to kill offsets */
 336     VSTransform avg2 = null_transform();
 337     double tau = 1.0/(2 * s);
 338     /* initialise sliding sum with hypothetic sum centered around
 339      * -1st element. We have two choices:
 340      * a) assume the camera is not moving at the beginning
 341      * b) assume that the camera moves and we use the first transforms
 342      */
 343     VSTransform s_sum = null;
 344     for (int i = 0; i < td->conf.smoothing; i++){
 345       s_sum = add_transforms(&s_sum, i < trans->len ? &ts2[i]:&null);
 346     }
 347     mult_transform(&s_sum, 2); // choice b (comment out for choice a)
 348
 349     for (int i = 0; i < trans->len; i++) {
 350       VSTransform* old = ((i - td->conf.smoothing - 1) < 0)
 351         ? &null : &ts2[(i - td->conf.smoothing - 1)];
 352       VSTransform* new = ((i + td->conf.smoothing) >= trans->len)
 353         ? &null : &ts2[(i + td->conf.smoothing)];
 354       s_sum = sub_transforms(&s_sum, old);
 355       s_sum = add_transforms(&s_sum, new);
 356
 357       avg = mult_transform(&s_sum, 1.0/s);
 358
 359       /* lowpass filter:
 360        * meaning high frequency must be transformed away
 361        */
 362       ts[i] = sub_transforms(&ts2[i], &avg);
 363       /* kill accumulating offset in the filtered signal*/
 364       avg2 = add_transforms_(mult_transform(&avg2, 1 - tau),
 365                              mult_transform(&ts[i], tau));
 366       ts[i] = sub_transforms(&ts[i], &avg2);
 367
 368       if (td->conf.verbose & VS_DEBUG) {
 369         vs_log_msg(td->conf.modName,
 370                    "s_sum: %5lf %5lf %5lf, ts: %5lf, %5lf, %5lf\n",
 371                    s_sum.x, s_sum.y, s_sum.alpha,
 372                    ts[i].x, ts[i].y, ts[i].alpha);
 373         vs_log_msg(td->conf.modName,
 374                    "  avg: %5lf, %5lf, %5lf avg2: %5lf, %5lf, %5lf",
 375                    avg.x, avg.y, avg.alpha,
 376                    avg2.x, avg2.y, avg2.alpha);
 377       }
 378     }
 379     vs_free(ts2);
 380   }
 381   /* relative to absolute */
 382   if (td->conf.relative) {
 383     VSTransform t = ts[0];
 384     for (int i = 1; i < trans->len; i++) {
 385       ts[i] = add_transforms(&ts[i], &t);
 386       t = ts[i];
 387     }
 388   }
 389   return VS_OK;
 390 }
 391
 392
 393 /**
 394  * vsPreprocessTransforms: camera path optimization, relative to absolute conversion,
 395  *  and cropping of too large transforms.
 396  *
 397  * Parameters:
 398  *            td: transform private data structure
 399  *         trans: list of transformations (changed)
 400  * Return value:
 401  *     1 for success and 0 for failure
 402  * Preconditions:
 403  *     None
 404  * Side effects:
 405  *     td->trans will be modified
 406  */
 407 int vsPreprocessTransforms(VSTransformData* td, VSTransformations* trans)
 408 {
 409   // works inplace on trans
 410   if(cameraPathOptimization(td, trans)!=VS_OK) return VS_ERROR;
 411   VSTransform* ts = trans->ts;
 412   /*  invert? */
 413   if (td->conf.invert) {
 414     for (int i = 0; i < trans->len; i++) {
 415       ts[i] = mult_transform(&ts[i], -1);
 416     }
 417   }
 418
 419   /* crop at maximal shift */
 420   if (td->conf.maxShift != -1)
 421     for (int i = 0; i < trans->len; i++) {
 422       ts[i].x     = VS_CLAMP(ts[i].x, -td->conf.maxShift, td->conf.maxShift);
 423       ts[i].y     = VS_CLAMP(ts[i].y, -td->conf.maxShift, td->conf.maxShift);
 424     }
 425   if (td->conf.maxAngle != - 1.0)
 426     for (int i = 0; i < trans->len; i++)
 427       ts[i].alpha = VS_CLAMP(ts[i].alpha, -td->conf.maxAngle, td->conf.maxAngle);
 428
 429   /* Calc optimal zoom (1)
 430    *  cheap algo is to only consider translations
 431    *  uses cleaned max and min to eliminate 99% of transforms
 432    */
 433   if (td->conf.optZoom == 1 && trans->len > 1){
 434     VSTransform min_t, max_t;
 435     cleanmaxmin_xy_transform(ts, trans->len, 1, &min_t, &max_t);  // 99% of all transformations
 436     // the zoom value only for x
 437     double zx = 2*VS_MAX(max_t.x,fabs(min_t.x))/td->fiSrc.width;
 438     // the zoom value only for y
 439     double zy = 2*VS_MAX(max_t.y,fabs(min_t.y))/td->fiSrc.height;
 440     td->conf.zoom += 100 * VS_MAX(zx,zy); // use maximum
 441     td->conf.zoom = VS_CLAMP(td->conf.zoom,-60,60);
 442     vs_log_info(td->conf.modName, "Final zoom: %lf\n", td->conf.zoom);
 443   }
 444   /* Calc optimal zoom (2)
 445    *  sliding average to zoom only as much as needed also using rotation angles
 446    *  the baseline zoom is the mean required zoom + global zoom
 447    *  in order to avoid too much zooming in and out
 448    */
 449   if (td->conf.optZoom == 2 && trans->len > 1){
 450     double* zooms=(double*)vs_zalloc(sizeof(double)*trans->len);
 451     int w = td->fiSrc.width;
 452     int h = td->fiSrc.height;
 453     double req;
 454     double meanzoom;
 455     for (int i = 0; i < trans->len; i++) {
 456       zooms[i] = transform_get_required_zoom(&ts[i], w, h);
 457     }
 458     meanzoom = mean(zooms, trans->len) + td->conf.zoom; // add global zoom
 459     // forward - propagation (to make the zooming smooth)
 460     req = meanzoom;
 461     for (int i = 0; i < trans->len; i++) {
 462       req = VS_MAX(req, zooms[i]);
 463       ts[i].zoom=VS_MAX(ts[i].zoom,req);
 464       req= VS_MAX(meanzoom, req - td->conf.zoomSpeed); // zoom-out each frame
 465     }
 466     // backward - propagation
 467     req = meanzoom;
 468     for (int i = trans->len-1; i >= 0; i--) {
 469       req = VS_MAX(req, zooms[i]);
 470       ts[i].zoom=VS_MAX(ts[i].zoom,req);
 471       req= VS_MAX(meanzoom, req - td->conf.zoomSpeed);
 472     }
 473     vs_free(zooms);
 474   }else if (td->conf.zoom != 0){ /* apply global zoom */
 475     for (int i = 0; i < trans->len; i++)
 476       ts[i].zoom += td->conf.zoom;
 477   }
 478
 479   return VS_OK;
 480 }
 481
 482
 483 /**
 484  * vsLowPassTransforms: single step smoothing of transforms, using only the past.
 485  *  see also vsPreprocessTransforms. Here only relative transformations are
 486  *  considered (produced by motiondetection). Also cropping of too large transforms.
 487  *
 488  * Parameters:
 489  *            td: transform private data structure
 490  *           mem: memory for sliding average transformation
 491  *         trans: current transform (from previous to current frame)
 492  * Return value:
 493  *         new transformation for current frame
 494  * Preconditions:
 495  *     None
 496  */
 497 VSTransform vsLowPassTransforms(VSTransformData* td, VSSlidingAvgTrans* mem,
 498                                 const VSTransform* trans)
 499 {
 500
 501   if (!mem->initialized){
 502     // use the first transformation as the average camera movement
 503     mem->avg=*trans;
 504     mem->initialized=1;
 505     mem->zoomavg=0.0;
 506     mem->accum = null_transform();
 507     return mem->accum;
 508   }else{
 509     double s = 1.0/(td->conf.smoothing + 1);
 510     double tau = 1.0/(3.0 * (td->conf.smoothing + 1));
 511     if(td->conf.smoothing>0){
 512       // otherwise do the sliding window
 513       mem->avg = add_transforms_(mult_transform(&mem->avg, 1 - s),
 514                                  mult_transform(trans, s));
 515     }else{
 516       mem->avg = *trans;
 517     }
 518
 519     /* lowpass filter:
 520      * meaning high frequency must be transformed away
 521      */
 522     VSTransform newtrans = sub_transforms(trans, &mem->avg);
 523
 524     /* relative to absolute */
 525     if (td->conf.relative) {
 526       newtrans = add_transforms(&newtrans, &mem->accum);
 527       mem->accum = newtrans;
 528       if(td->conf.smoothing>0){
 529         // kill accumulating effects
 530         mem->accum = mult_transform(&mem->accum, 1.0 - tau);
 531       }
 532     }
 533
 534     /* crop at maximal shift */
 535     if (td->conf.maxShift != -1){
 536       newtrans.x     = VS_CLAMP(newtrans.x, -td->conf.maxShift, td->conf.maxShift);
 537       newtrans.y     = VS_CLAMP(newtrans.y, -td->conf.maxShift, td->conf.maxShift);
 538     }
 539     if (td->conf.maxAngle != - 1.0)
 540       newtrans.alpha = VS_CLAMP(newtrans.alpha, -td->conf.maxAngle, td->conf.maxAngle);
 541
 542     /* Calc sliding optimal zoom
 543      *  cheap algo is to only consider translations and to sliding avg
 544      */
 545     if (td->conf.optZoom != 0 && td->conf.smoothing > 0){
 546       // the zoom value only for x
 547       double zx = 2*newtrans.x/td->fiSrc.width;
 548       // the zoom value only for y
 549       double zy = 2*newtrans.y/td->fiSrc.height;
 550       double reqzoom = 100* VS_MAX(fabs(zx),fabs(zy)); // maximum is requried zoom
 551       mem->zoomavg = (mem->zoomavg*(1-s) + reqzoom*s);
 552       // since we only use past it is good to aniticipate
 553       //  and zoom a little in any case (so set td->zoom to 2 or so)
 554       newtrans.zoom = mem->zoomavg;
 555     }
 556     if (td->conf.zoom != 0){
 557       newtrans.zoom += td->conf.zoom;
 558     }
 559     return newtrans;
 560   }
 561 }
 562
 563 /*
 564  * Local variables:
 565  *   c-file-style: "stroustrup"
 566  *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
 567  *   indent-tabs-mode: nil
 568  *   c-basic-offset: 2 t
 569  * End:
 570  *
 571  * vim: expandtab shiftwidth=2:
 572  */