| 1 | /* |
| 2 | * motiondetect.c |
| 3 | * |
| 4 | * Copyright (C) Georg Martius - February 1007-2011 |
| 5 | * georg dot martius at web dot de |
| 6 | * Copyright (C) Alexey Osipov - Jule 2011 |
| 7 | * simba at lerlan dot ru |
| 8 | * speed optimizations (threshold, spiral, SSE, asm) |
| 9 | * |
| 10 | * This file is part of vid.stab video stabilization library |
| 11 | * |
| 12 | * vid.stab is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU General Public License, |
| 14 | * as published by the Free Software Foundation; either version 2, or |
| 15 | * (at your option) any later version. |
| 16 | * |
| 17 | * vid.stab is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU General Public License |
| 23 | * along with GNU Make; see the file COPYING. If not, write to |
| 24 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
| 25 | * |
| 26 | */ |
| 27 | #include "motiondetect.h" |
| 28 | #include "motiondetect_internal.h" |
| 29 | #include "motiondetect_opt.h" |
| 30 | #include <math.h> |
| 31 | #include <limits.h> |
| 32 | #include <stdint.h> |
| 33 | #include <string.h> |
| 34 | #include <assert.h> |
| 35 | |
| 36 | #ifdef USE_OMP |
| 37 | #include <omp.h> |
| 38 | #endif |
| 39 | |
| 40 | #include "boxblur.h" |
| 41 | #include "vidstabdefines.h" |
| 42 | #include "localmotion2transform.h" |
| 43 | #include "transformtype_operations.h" |
| 44 | #include "transformtype_operations.h" |
| 45 | |
| 46 | #define USE_SPIRAL_FIELD_CALC |
| 47 | |
| 48 | |
| 49 | /* internal data structures */ |
| 50 | |
| 51 | // structure that contains the contrast and the index of a field |
| 52 | typedef struct _contrast_idx { |
| 53 | double contrast; |
| 54 | int index; |
| 55 | } contrast_idx; |
| 56 | |
| 57 | |
| 58 | VSMotionDetectConfig vsMotionDetectGetDefaultConfig(const char* modName){ |
| 59 | VSMotionDetectConfig conf; |
| 60 | conf.stepSize = 6; |
| 61 | conf.accuracy = 15; |
| 62 | conf.shakiness = 5; |
| 63 | conf.virtualTripod = 0; |
| 64 | conf.contrastThreshold = 0.25; |
| 65 | conf.show = 0; |
| 66 | conf.modName = modName; |
| 67 | return conf; |
| 68 | } |
| 69 | |
| 70 | void vsMotionDetectGetConfig(VSMotionDetectConfig* conf, const VSMotionDetect* md){ |
| 71 | if(md && conf) |
| 72 | *conf = md->conf; |
| 73 | } |
| 74 | |
| 75 | const VSFrameInfo* vsMotionDetectGetFrameInfo(const VSMotionDetect* md){ |
| 76 | return &md->fi; |
| 77 | } |
| 78 | |
| 79 | |
| 80 | int vsMotionDetectInit(VSMotionDetect* md, const VSMotionDetectConfig* conf, const VSFrameInfo* fi){ |
| 81 | assert(md && fi); |
| 82 | md->conf = *conf; |
| 83 | md->fi = *fi; |
| 84 | |
| 85 | if(fi->pFormat<=PF_NONE || fi->pFormat==PF_PACKED || fi->pFormat>=PF_NUMBER) { |
| 86 | vs_log_warn(md->conf.modName, "unsupported Pixel Format (%i)\n", |
| 87 | md->fi.pFormat); |
| 88 | return VS_ERROR; |
| 89 | } |
| 90 | |
| 91 | vsFrameAllocate(&md->prev, &md->fi); |
| 92 | if (vsFrameIsNull(&md->prev)) { |
| 93 | vs_log_error(md->conf.modName, "malloc failed"); |
| 94 | return VS_ERROR; |
| 95 | } |
| 96 | |
| 97 | vsFrameNull(&md->curr); |
| 98 | vsFrameNull(&md->currorig); |
| 99 | vsFrameNull(&md->currtmp); |
| 100 | md->hasSeenOneFrame = 0; |
| 101 | md->frameNum = 0; |
| 102 | |
| 103 | // TODO: get rid of shakiness parameter in the long run |
| 104 | md->conf.shakiness = VS_MIN(10,VS_MAX(1,md->conf.shakiness)); |
| 105 | md->conf.accuracy = VS_MIN(15,VS_MAX(1,md->conf.accuracy)); |
| 106 | if (md->conf.accuracy < md->conf.shakiness / 2) { |
| 107 | vs_log_info(md->conf.modName, "Accuracy should not be lower than shakiness/2 -- fixed"); |
| 108 | md->conf.accuracy = md->conf.shakiness / 2; |
| 109 | } |
| 110 | if (md->conf.accuracy > 9 && md->conf.stepSize > 6) { |
| 111 | vs_log_info(md->conf.modName, "For high accuracy use lower stepsize -- set to 6 now"); |
| 112 | md->conf.stepSize = 6; // maybe 4 |
| 113 | } |
| 114 | |
| 115 | int minDimension = VS_MIN(md->fi.width, md->fi.height); |
| 116 | // shift: shakiness 1: height/40; 10: height/4 |
| 117 | // md->maxShift = VS_MAX(4,(minDimension*md->conf.shakiness)/40); |
| 118 | // size: shakiness 1: height/40; 10: height/6 (clipped) |
| 119 | // md->fieldSize = VS_MAX(4,VS_MIN(minDimension/6, (minDimension*md->conf.shakiness)/40)); |
| 120 | |
| 121 | // fixed size and shift now |
| 122 | int maxShift = VS_MAX(16, minDimension/7); |
| 123 | int fieldSize = VS_MAX(16, minDimension/10); |
| 124 | int fieldSizeFine = VS_MAX(6, minDimension/60); |
| 125 | #if defined(USE_SSE2) || defined(USE_SSE2_ASM) |
| 126 | fieldSize = (fieldSize / 16 + 1) * 16; |
| 127 | fieldSizeFine = (fieldSizeFine / 16 + 1) * 16; |
| 128 | #endif |
| 129 | if (!initFields(md, &md->fieldscoarse, fieldSize, maxShift, md->conf.stepSize, |
| 130 | 1, 0, md->conf.contrastThreshold)) { |
| 131 | return VS_ERROR; |
| 132 | } |
| 133 | // for the fine check we use a smaller size and smaller maximal shift (=size) |
| 134 | if (!initFields(md, &md->fieldsfine, fieldSizeFine, fieldSizeFine, |
| 135 | 2, 1, fieldSizeFine, md->conf.contrastThreshold/2)) { |
| 136 | return VS_ERROR; |
| 137 | } |
| 138 | |
| 139 | vsFrameAllocate(&md->curr,&md->fi); |
| 140 | vsFrameAllocate(&md->currtmp, &md->fi); |
| 141 | |
| 142 | md->initialized = 2; |
| 143 | return VS_OK; |
| 144 | } |
| 145 | |
| 146 | void vsMotionDetectionCleanup(VSMotionDetect* md) { |
| 147 | if(md->fieldscoarse.fields) { |
| 148 | vs_free(md->fieldscoarse.fields); |
| 149 | md->fieldscoarse.fields=0; |
| 150 | } |
| 151 | if(md->fieldsfine.fields) { |
| 152 | vs_free(md->fieldsfine.fields); |
| 153 | md->fieldsfine.fields=0; |
| 154 | } |
| 155 | vsFrameFree(&md->prev); |
| 156 | vsFrameFree(&md->curr); |
| 157 | vsFrameFree(&md->currtmp); |
| 158 | |
| 159 | md->initialized = 0; |
| 160 | } |
| 161 | |
| 162 | // returns true if match of local motion is better than threshold |
| 163 | short lm_match_better(void* thresh, void* lm){ |
| 164 | if(((LocalMotion*)lm)->match <= *((double*)thresh)) |
| 165 | return 1; |
| 166 | else |
| 167 | return 0; |
| 168 | } |
| 169 | |
| 170 | int vsMotionDetection(VSMotionDetect* md, LocalMotions* motions, VSFrame *frame) { |
| 171 | assert(md->initialized==2); |
| 172 | |
| 173 | md->currorig = *frame; |
| 174 | // smoothen image to do better motion detection |
| 175 | // (larger stepsize or eventually gradient descent (need higher resolution)) |
| 176 | if (md->fi.pFormat > PF_PACKED) { |
| 177 | // we could calculate a grayscale version and use the PLANAR stuff afterwards |
| 178 | // so far smoothing is only implemented for PLANAR |
| 179 | vsFrameCopy(&md->curr, frame, &md->fi); |
| 180 | } else { |
| 181 | // box-kernel smoothing (plain average of pixels), which is fine for us |
| 182 | boxblurPlanar(&md->curr, frame, &md->currtmp, &md->fi, md->conf.stepSize*1/*1.4*/, |
| 183 | BoxBlurNoColor); |
| 184 | // two times yields tent-kernel smoothing, which may be better, but I don't |
| 185 | // think we need it |
| 186 | //boxblurPlanar(md->curr, md->curr, md->currtmp, &md->fi, md->stepSize*1, |
| 187 | // BoxBlurNoColor); |
| 188 | } |
| 189 | |
| 190 | if (md->hasSeenOneFrame) { |
| 191 | LocalMotions motionscoarse; |
| 192 | LocalMotions motionsfine; |
| 193 | vs_vector_init(&motionsfine,0); |
| 194 | // md->curr = frame; |
| 195 | if (md->fi.pFormat > PF_PACKED) { |
| 196 | motionscoarse = calcTransFields(md, &md->fieldscoarse, |
| 197 | calcFieldTransPacked, contrastSubImgPacked); |
| 198 | } else { // PLANAR |
| 199 | motionscoarse = calcTransFields(md, &md->fieldscoarse, |
| 200 | calcFieldTransPlanar, contrastSubImgPlanar); |
| 201 | } |
| 202 | int num_motions = vs_vector_size(&motionscoarse); |
| 203 | if (num_motions < 1) { |
| 204 | vs_log_warn(md->conf.modName, "too low contrast. \ |
| 205 | (no translations are detected in frame %i)\n", md->frameNum); |
| 206 | }else{ |
| 207 | // calc transformation and perform another scan with small fields |
| 208 | VSTransform t = vsSimpleMotionsToTransform(md->fi, md->conf.modName, &motionscoarse); |
| 209 | md->fieldsfine.offset = t; |
| 210 | md->fieldsfine.useOffset = 1; |
| 211 | LocalMotions motions2; |
| 212 | if (md->fi.pFormat > PF_PACKED) { |
| 213 | motions2 = calcTransFields(md, &md->fieldsfine, |
| 214 | calcFieldTransPacked, contrastSubImgPacked); |
| 215 | } else { // PLANAR |
| 216 | motions2 = calcTransFields(md, &md->fieldsfine, |
| 217 | calcFieldTransPlanar, contrastSubImgPlanar); |
| 218 | } |
| 219 | // through out those with bad match (worse than mean of coarse scan) |
| 220 | VSArray matchQualities1 = localmotionsGetMatch(&motionscoarse); |
| 221 | double meanMatch = cleanmean(matchQualities1.dat, matchQualities1.len, NULL, NULL); |
| 222 | motionsfine = vs_vector_filter(&motions2, lm_match_better, &meanMatch); |
| 223 | if(0){ |
| 224 | printf("\nMatches: mean: %f | ", meanMatch); |
| 225 | vs_array_print(matchQualities1, stdout); |
| 226 | printf("\n fine: "); |
| 227 | VSArray matchQualities2 = localmotionsGetMatch(&motions2); |
| 228 | vs_array_print(matchQualities2, stdout); |
| 229 | printf("\n"); |
| 230 | } |
| 231 | } |
| 232 | if (md->conf.show) { // draw fields and transforms into frame. |
| 233 | int num_motions_fine = vs_vector_size(&motionsfine); |
| 234 | // this has to be done one after another to handle possible overlap |
| 235 | if (md->conf.show > 1) { |
| 236 | for (int i = 0; i < num_motions; i++) |
| 237 | drawFieldScanArea(md, LMGet(&motionscoarse,i), md->fieldscoarse.maxShift); |
| 238 | } |
| 239 | for (int i = 0; i < num_motions; i++) |
| 240 | drawField(md, LMGet(&motionscoarse,i), 1); |
| 241 | for (int i = 0; i < num_motions_fine; i++) |
| 242 | drawField(md, LMGet(&motionsfine,i), 0); |
| 243 | for (int i = 0; i < num_motions; i++) |
| 244 | drawFieldTrans(md, LMGet(&motionscoarse,i),180); |
| 245 | for (int i = 0; i < num_motions_fine; i++) |
| 246 | drawFieldTrans(md, LMGet(&motionsfine,i), 64); |
| 247 | } |
| 248 | *motions = vs_vector_concat(&motionscoarse,&motionsfine); |
| 249 | //*motions = motionscoarse; |
| 250 | //*motions = motionsfine; |
| 251 | } else { |
| 252 | vs_vector_init(motions,1); // dummy vector |
| 253 | md->hasSeenOneFrame = 1; |
| 254 | } |
| 255 | |
| 256 | // for tripod we keep a certain reference frame |
| 257 | if(md->conf.virtualTripod < 1 || md->frameNum < md->conf.virtualTripod) |
| 258 | // copy current frame (smoothed) to prev for next frame comparison |
| 259 | vsFrameCopy(&md->prev, &md->curr, &md->fi); |
| 260 | md->frameNum++; |
| 261 | return VS_OK; |
| 262 | } |
| 263 | |
| 264 | |
| 265 | /** initialise measurement fields on the frame. |
| 266 | The size of the fields and the maxshift is used to |
| 267 | calculate an optimal distribution in the frame. |
| 268 | if border is set then they are placed savely away from the border for maxShift |
| 269 | */ |
| 270 | |
| 271 | int initFields(VSMotionDetect* md, VSMotionDetectFields* fs, |
| 272 | int size, int maxShift, int stepSize, |
| 273 | short keepBorder, int spacing, double contrastThreshold) { |
| 274 | fs->fieldSize = size; |
| 275 | fs->maxShift = maxShift; |
| 276 | fs->stepSize = stepSize; |
| 277 | fs->useOffset = 0; |
| 278 | fs->contrastThreshold = contrastThreshold; |
| 279 | |
| 280 | int rows = VS_MAX(3,(md->fi.height - fs->maxShift*2)/(size+spacing)-1); |
| 281 | int cols = VS_MAX(3,(md->fi.width - fs->maxShift*2)/(size+spacing)-1); |
| 282 | // make sure that the remaining rows have the same length |
| 283 | fs->fieldNum = rows * cols; |
| 284 | fs->fieldRows = rows; |
| 285 | |
| 286 | if (!(fs->fields = (Field*) vs_malloc(sizeof(Field) * fs->fieldNum))) { |
| 287 | vs_log_error(md->conf.modName, "malloc failed!\n"); |
| 288 | return 0; |
| 289 | } else { |
| 290 | int i, j; |
| 291 | int border=fs->stepSize; |
| 292 | // the border is the amount by which the field centers |
| 293 | // have to be away from the image boundary |
| 294 | // (stepsize is added in case shift is increased through stepsize) |
| 295 | if(keepBorder) |
| 296 | border = size / 2 + fs->maxShift + fs->stepSize; |
| 297 | int step_x = (md->fi.width - 2 * border) / VS_MAX(cols-1,1); |
| 298 | int step_y = (md->fi.height - 2 * border) / VS_MAX(rows-1,1); |
| 299 | for (j = 0; j < rows; j++) { |
| 300 | for (i = 0; i < cols; i++) { |
| 301 | int idx = j * cols + i; |
| 302 | fs->fields[idx].x = border + i * step_x; |
| 303 | fs->fields[idx].y = border + j * step_y; |
| 304 | fs->fields[idx].size = size; |
| 305 | } |
| 306 | } |
| 307 | } |
| 308 | fs->maxFields = (md->conf.accuracy) * fs->fieldNum / 15; |
| 309 | vs_log_info(md->conf.modName, "Fieldsize: %i, Maximal translation: %i pixel\n", |
| 310 | fs->fieldSize, fs->maxShift); |
| 311 | vs_log_info(md->conf.modName, "Number of used measurement fields: %i out of %i\n", |
| 312 | fs->maxFields, fs->fieldNum); |
| 313 | |
| 314 | return 1; |
| 315 | } |
| 316 | |
| 317 | /** \see contrastSubImg*/ |
| 318 | double contrastSubImgPlanar(VSMotionDetect* md, const Field* field) { |
| 319 | #ifdef USE_SSE2 |
| 320 | return contrastSubImg1_SSE(md->curr.data[0], field, md->curr.linesize[0],md->fi.height); |
| 321 | #else |
| 322 | return contrastSubImg(md->curr.data[0],field,md->curr.linesize[0],md->fi.height,1); |
| 323 | #endif |
| 324 | |
| 325 | } |
| 326 | |
| 327 | /** |
| 328 | \see contrastSubImg_Michelson three times called with bytesPerPixel=3 |
| 329 | for all channels |
| 330 | */ |
| 331 | double contrastSubImgPacked(VSMotionDetect* md, const Field* field) { |
| 332 | unsigned char* const I = md->curr.data[0]; |
| 333 | int linesize2 = md->curr.linesize[0]/3; // linesize in pixels |
| 334 | return (contrastSubImg(I, field, linesize2, md->fi.height, 3) |
| 335 | + contrastSubImg(I + 1, field, linesize2, md->fi.height, 3) |
| 336 | + contrastSubImg(I + 2, field, linesize2, md->fi.height, 3)) / 3; |
| 337 | } |
| 338 | |
| 339 | /** |
| 340 | calculates Michelson-contrast in the given small part of the given image |
| 341 | to be more compatible with the absolute difference formula this is scaled by 0.1 |
| 342 | |
| 343 | \param I pointer to framebuffer |
| 344 | \param field Field specifies position(center) and size of subimage |
| 345 | \param width width of frame (linesize in pixels) |
| 346 | \param height height of frame |
| 347 | \param bytesPerPixel calc contrast for only for first channel |
| 348 | */ |
| 349 | double contrastSubImg(unsigned char* const I, const Field* field, int width, |
| 350 | int height, int bytesPerPixel) { |
| 351 | int k, j; |
| 352 | unsigned char* p = NULL; |
| 353 | int s2 = field->size / 2; |
| 354 | unsigned char mini = 255; |
| 355 | unsigned char maxi = 0; |
| 356 | |
| 357 | p = I + ((field->x - s2) + (field->y - s2) * width) * bytesPerPixel; |
| 358 | for (j = 0; j < field->size; j++) { |
| 359 | for (k = 0; k < field->size; k++) { |
| 360 | mini = (mini < *p) ? mini : *p; |
| 361 | maxi = (maxi > *p) ? maxi : *p; |
| 362 | p += bytesPerPixel; |
| 363 | } |
| 364 | p += (width - field->size) * bytesPerPixel; |
| 365 | } |
| 366 | return (maxi - mini) / (maxi + mini + 0.1); // +0.1 to avoid division by 0 |
| 367 | } |
| 368 | |
| 369 | /* calculates the optimal transformation for one field in Planar frames |
| 370 | * (only luminance) |
| 371 | */ |
| 372 | LocalMotion calcFieldTransPlanar(VSMotionDetect* md, VSMotionDetectFields* fs, |
| 373 | const Field* field, int fieldnum) { |
| 374 | int tx = 0; |
| 375 | int ty = 0; |
| 376 | uint8_t *Y_c = md->curr.data[0], *Y_p = md->prev.data[0]; |
| 377 | int linesize_c = md->curr.linesize[0], linesize_p = md->prev.linesize[0]; |
| 378 | // we only use the luminance part of the image |
| 379 | int i, j; |
| 380 | int stepSize = fs->stepSize; |
| 381 | int maxShift = fs->maxShift; |
| 382 | Vec offset = { 0, 0}; |
| 383 | LocalMotion lm = null_localmotion(); |
| 384 | if(fs->useOffset){ |
| 385 | // Todo: we could put the preparedtransform into fs |
| 386 | PreparedTransform pt = prepare_transform(&fs->offset, &md->fi); |
| 387 | Vec fieldpos = {field->x, field->y}; |
| 388 | offset = sub_vec(transform_vec(&pt, &fieldpos), fieldpos); |
| 389 | // is the field still in the frame |
| 390 | int s2 = field->size/2; |
| 391 | if(unlikely(fieldpos.x+offset.x-s2-maxShift-stepSize < 0 || |
| 392 | fieldpos.x+offset.x+s2+maxShift+stepSize >= md->fi.width || |
| 393 | fieldpos.y+offset.y-s2-maxShift-stepSize < 0 || |
| 394 | fieldpos.y+offset.y+s2+maxShift+stepSize >= md->fi.height)){ |
| 395 | lm.match=-1; |
| 396 | return lm; |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | #ifdef STABVERBOSE |
| 401 | // printf("%i %i %f\n", md->frameNum, fieldnum, contr); |
| 402 | FILE *f = NULL; |
| 403 | char buffer[32]; |
| 404 | vs_snprintf(buffer, sizeof(buffer), "f%04i_%02i.dat", md->frameNum, fieldnum); |
| 405 | f = fopen(buffer, "w"); |
| 406 | fprintf(f, "# splot \"%s\"\n", buffer); |
| 407 | #endif |
| 408 | |
| 409 | #ifdef USE_SPIRAL_FIELD_CALC |
| 410 | unsigned int minerror = UINT_MAX; |
| 411 | |
| 412 | // check all positions by outgoing spiral |
| 413 | i = 0; j = 0; |
| 414 | int limit = 1; |
| 415 | int step = 0; |
| 416 | int dir = 0; |
| 417 | while (j >= -maxShift && j <= maxShift && i >= -maxShift && i <= maxShift) { |
| 418 | unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p, |
| 419 | md->fi.height, 1, i + offset.x, j + offset.y, |
| 420 | minerror); |
| 421 | |
| 422 | if (error < minerror) { |
| 423 | minerror = error; |
| 424 | tx = i; |
| 425 | ty = j; |
| 426 | } |
| 427 | |
| 428 | //spiral indexing... |
| 429 | step++; |
| 430 | switch (dir) { |
| 431 | case 0: |
| 432 | i += stepSize; |
| 433 | if (step == limit) { |
| 434 | dir = 1; |
| 435 | step = 0; |
| 436 | } |
| 437 | break; |
| 438 | case 1: |
| 439 | j += stepSize; |
| 440 | if (step == limit) { |
| 441 | dir = 2; |
| 442 | step = 0; |
| 443 | limit++; |
| 444 | } |
| 445 | break; |
| 446 | case 2: |
| 447 | i -= stepSize; |
| 448 | if (step == limit) { |
| 449 | dir = 3; |
| 450 | step = 0; |
| 451 | } |
| 452 | break; |
| 453 | case 3: |
| 454 | j -= stepSize; |
| 455 | if (step == limit) { |
| 456 | dir = 0; |
| 457 | step = 0; |
| 458 | limit++; |
| 459 | } |
| 460 | break; |
| 461 | } |
| 462 | } |
| 463 | #else |
| 464 | /* Here we improve speed by checking first the most probable position |
| 465 | then the search paths are most effectively cut. (0,0) is a simple start |
| 466 | */ |
| 467 | unsigned int minerror = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p, |
| 468 | md->fi.height, 1, 0, 0, UINT_MAX); |
| 469 | // check all positions... |
| 470 | for (i = -maxShift; i <= maxShift; i += stepSize) { |
| 471 | for (j = -maxShift; j <= maxShift; j += stepSize) { |
| 472 | if( i==0 && j==0 ) |
| 473 | continue; //no need to check this since already done |
| 474 | unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p, |
| 475 | md->fi.height, 1, i+offset.x, j+offset.y, minerror); |
| 476 | if (error < minerror) { |
| 477 | minerror = error; |
| 478 | tx = i; |
| 479 | ty = j; |
| 480 | } |
| 481 | #ifdef STABVERBOSE |
| 482 | fprintf(f, "%i %i %f\n", i, j, error); |
| 483 | #endif |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | #endif |
| 488 | |
| 489 | while(stepSize > 1) {// make fine grain check around the best match |
| 490 | int txc = tx; // save the shifts |
| 491 | int tyc = ty; |
| 492 | int newStepSize = stepSize/2; |
| 493 | int r = stepSize - newStepSize; |
| 494 | for (i = txc - r; i <= txc + r; i += newStepSize) { |
| 495 | for (j = tyc - r; j <= tyc + r; j += newStepSize) { |
| 496 | if (i == txc && j == tyc) |
| 497 | continue; //no need to check this since already done |
| 498 | unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p, |
| 499 | md->fi.height, 1, i+offset.x, j+offset.y, minerror); |
| 500 | #ifdef STABVERBOSE |
| 501 | fprintf(f, "%i %i %f\n", i, j, error); |
| 502 | #endif |
| 503 | if (error < minerror) { |
| 504 | minerror = error; |
| 505 | tx = i; |
| 506 | ty = j; |
| 507 | } |
| 508 | } |
| 509 | } |
| 510 | stepSize /= 2; |
| 511 | } |
| 512 | #ifdef STABVERBOSE |
| 513 | fclose(f); |
| 514 | vs_log_msg(md->modName, "Minerror: %f\n", minerror); |
| 515 | #endif |
| 516 | |
| 517 | if (unlikely(fabs(tx) >= maxShift + stepSize - 1 || |
| 518 | fabs(ty) >= maxShift + stepSize)) { |
| 519 | #ifdef STABVERBOSE |
| 520 | vs_log_msg(md->modName, "maximal shift "); |
| 521 | #endif |
| 522 | lm.match =-1.0; // to be kicked out |
| 523 | return lm; |
| 524 | } |
| 525 | lm.f = *field; |
| 526 | lm.v.x = tx + offset.x; |
| 527 | lm.v.y = ty + offset.y; |
| 528 | lm.match = ((double) minerror)/(field->size*field->size); |
| 529 | return lm; |
| 530 | } |
| 531 | |
| 532 | /* calculates the optimal transformation for one field in Packed |
| 533 | * slower than the Planar version because it uses all three color channels |
| 534 | */ |
| 535 | LocalMotion calcFieldTransPacked(VSMotionDetect* md, VSMotionDetectFields* fs, |
| 536 | const Field* field, int fieldnum) { |
| 537 | int tx = 0; |
| 538 | int ty = 0; |
| 539 | uint8_t *I_c = md->curr.data[0], *I_p = md->prev.data[0]; |
| 540 | int width1 = md->curr.linesize[0]/3; // linesize in pixels |
| 541 | int width2 = md->prev.linesize[0]/3; // linesize in pixels |
| 542 | int i, j; |
| 543 | int stepSize = fs->stepSize; |
| 544 | int maxShift = fs->maxShift; |
| 545 | |
| 546 | Vec offset = { 0, 0}; |
| 547 | LocalMotion lm = null_localmotion(); |
| 548 | if(fs->useOffset){ |
| 549 | PreparedTransform pt = prepare_transform(&fs->offset, &md->fi); |
| 550 | offset = transform_vec(&pt, (Vec*)field); |
| 551 | // is the field still in the frame |
| 552 | if(unlikely(offset.x-maxShift-stepSize < 0 || offset.x+maxShift+stepSize >= md->fi.width || |
| 553 | offset.y-maxShift-stepSize < 0 || offset.y+maxShift+stepSize >= md->fi.height)){ |
| 554 | lm.match=-1; |
| 555 | return lm; |
| 556 | } |
| 557 | } |
| 558 | |
| 559 | /* Here we improve speed by checking first the most probable position |
| 560 | then the search paths are most effectively cut. (0,0) is a simple start |
| 561 | */ |
| 562 | unsigned int minerror = compareSubImg(I_c, I_p, field, width1, width2, md->fi.height, |
| 563 | 3, offset.x, offset.y, UINT_MAX); |
| 564 | // check all positions... |
| 565 | for (i = -maxShift; i <= maxShift; i += stepSize) { |
| 566 | for (j = -maxShift; j <= maxShift; j += stepSize) { |
| 567 | if( i==0 && j==0 ) |
| 568 | continue; //no need to check this since already done |
| 569 | unsigned int error = compareSubImg(I_c, I_p, field, width1, width2, |
| 570 | md->fi.height, 3, i + offset.x, j + offset.y, minerror); |
| 571 | if (error < minerror) { |
| 572 | minerror = error; |
| 573 | tx = i; |
| 574 | ty = j; |
| 575 | } |
| 576 | } |
| 577 | } |
| 578 | if (stepSize > 1) { // make fine grain check around the best match |
| 579 | int txc = tx; // save the shifts |
| 580 | int tyc = ty; |
| 581 | int r = stepSize - 1; |
| 582 | for (i = txc - r; i <= txc + r; i += 1) { |
| 583 | for (j = tyc - r; j <= tyc + r; j += 1) { |
| 584 | if (i == txc && j == tyc) |
| 585 | continue; //no need to check this since already done |
| 586 | unsigned int error = compareSubImg(I_c, I_p, field, width1, width2, |
| 587 | md->fi.height, 3, i + offset.x, j + offset.y, minerror); |
| 588 | if (error < minerror) { |
| 589 | minerror = error; |
| 590 | tx = i; |
| 591 | ty = j; |
| 592 | } |
| 593 | } |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | if (fabs(tx) >= maxShift + stepSize - 1 || fabs(ty) >= maxShift + stepSize - 1) { |
| 598 | #ifdef STABVERBOSE |
| 599 | vs_log_msg(md->modName, "maximal shift "); |
| 600 | #endif |
| 601 | lm.match = -1; |
| 602 | return lm; |
| 603 | } |
| 604 | lm.f = *field; |
| 605 | lm.v.x = tx + offset.x; |
| 606 | lm.v.y = ty + offset.y; |
| 607 | lm.match = ((double)minerror)/(field->size*field->size); |
| 608 | return lm; |
| 609 | } |
| 610 | |
| 611 | /* compares contrast_idx structures respect to the contrast |
| 612 | (for sort function) |
| 613 | */ |
| 614 | int cmp_contrast_idx(const void *ci1, const void* ci2) { |
| 615 | double a = ((contrast_idx*) ci1)->contrast; |
| 616 | double b = ((contrast_idx*) ci2)->contrast; |
| 617 | return a < b ? 1 : (a > b ? -1 : 0); |
| 618 | } |
| 619 | |
| 620 | /* select only the best 'maxfields' fields |
| 621 | first calc contrasts then select from each part of the |
| 622 | frame some fields |
| 623 | We may simplify here by using random. People want high quality, so typically we use all. |
| 624 | */ |
| 625 | VSVector selectfields(VSMotionDetect* md, VSMotionDetectFields* fs, |
| 626 | contrastSubImgFunc contrastfunc) { |
| 627 | int i, j; |
| 628 | VSVector goodflds; |
| 629 | contrast_idx *ci = |
| 630 | (contrast_idx*) vs_malloc(sizeof(contrast_idx) * fs->fieldNum); |
| 631 | vs_vector_init(&goodflds, fs->fieldNum); |
| 632 | |
| 633 | // we split all fields into row+1 segments and take from each segment |
| 634 | // the best fields |
| 635 | int numsegms = (fs->fieldRows + 1); |
| 636 | int segmlen = fs->fieldNum / (fs->fieldRows + 1) + 1; |
| 637 | // split the frame list into rows+1 segments |
| 638 | contrast_idx *ci_segms = |
| 639 | (contrast_idx*) vs_malloc(sizeof(contrast_idx) * fs->fieldNum); |
| 640 | int remaining = 0; |
| 641 | // calculate contrast for each field |
| 642 | // #pragma omp parallel for shared(ci,md) no speedup because to short |
| 643 | for (i = 0; i < fs->fieldNum; i++) { |
| 644 | ci[i].contrast = contrastfunc(md, &fs->fields[i]); |
| 645 | ci[i].index = i; |
| 646 | if (ci[i].contrast < fs->contrastThreshold) |
| 647 | ci[i].contrast = 0; |
| 648 | // else printf("%i %lf\n", ci[i].index, ci[i].contrast); |
| 649 | } |
| 650 | |
| 651 | memcpy(ci_segms, ci, sizeof(contrast_idx) * fs->fieldNum); |
| 652 | // get best fields from each segment |
| 653 | for (i = 0; i < numsegms; i++) { |
| 654 | int startindex = segmlen * i; |
| 655 | int endindex = segmlen * (i + 1); |
| 656 | endindex = endindex > fs->fieldNum ? fs->fieldNum : endindex; |
| 657 | //printf("Segment: %i: %i-%i\n", i, startindex, endindex); |
| 658 | |
| 659 | // sort within segment |
| 660 | qsort(ci_segms + startindex, endindex - startindex, |
| 661 | sizeof(contrast_idx), cmp_contrast_idx); |
| 662 | // take maxfields/numsegms |
| 663 | for (j = 0; j < fs->maxFields / numsegms; j++) { |
| 664 | if (startindex + j >= endindex) |
| 665 | continue; |
| 666 | // printf("%i %lf\n", ci_segms[startindex+j].index, |
| 667 | // ci_segms[startindex+j].contrast); |
| 668 | if (ci_segms[startindex + j].contrast > 0) { |
| 669 | vs_vector_append_dup(&goodflds, &ci[ci_segms[startindex+j].index], |
| 670 | sizeof(contrast_idx)); |
| 671 | // don't consider them in the later selection process |
| 672 | ci_segms[startindex + j].contrast = 0; |
| 673 | } |
| 674 | } |
| 675 | } |
| 676 | // check whether enough fields are selected |
| 677 | // printf("Phase2: %i\n", vs_list_size(goodflds)); |
| 678 | remaining = fs->maxFields - vs_vector_size(&goodflds); |
| 679 | if (remaining > 0) { |
| 680 | // take the remaining from the leftovers |
| 681 | qsort(ci_segms, fs->fieldNum, sizeof(contrast_idx), cmp_contrast_idx); |
| 682 | for (j = 0; j < remaining; j++) { |
| 683 | if (ci_segms[j].contrast > 0) { |
| 684 | vs_vector_append_dup(&goodflds, &ci_segms[j], sizeof(contrast_idx)); |
| 685 | } |
| 686 | } |
| 687 | } |
| 688 | // printf("Ende: %i\n", vs_list_size(goodflds)); |
| 689 | vs_free(ci); |
| 690 | vs_free(ci_segms); |
| 691 | return goodflds; |
| 692 | } |
| 693 | |
| 694 | /* tries to register current frame onto previous frame. |
| 695 | * Algorithm: |
| 696 | * discards fields with low contrast |
| 697 | * select maxfields fields according to their contrast |
| 698 | * check theses fields for vertical and horizontal transformation |
| 699 | * use minimal difference of all possible positions |
| 700 | */ |
| 701 | LocalMotions calcTransFields(VSMotionDetect* md, |
| 702 | VSMotionDetectFields* fields, |
| 703 | calcFieldTransFunc fieldfunc, |
| 704 | contrastSubImgFunc contrastfunc) { |
| 705 | LocalMotions localmotions; |
| 706 | vs_vector_init(&localmotions,fields->maxFields); |
| 707 | |
| 708 | #ifdef STABVERBOSE |
| 709 | FILE *file = NULL; |
| 710 | char buffer[32]; |
| 711 | vs_snprintf(buffer, sizeof(buffer), "k%04i.dat", md->frameNum); |
| 712 | file = fopen(buffer, "w"); |
| 713 | fprintf(file, "# plot \"%s\" w l, \"\" every 2:1:0\n", buffer); |
| 714 | #endif |
| 715 | |
| 716 | VSVector goodflds = selectfields(md, fields, contrastfunc); |
| 717 | // use all "good" fields and calculate optimal match to previous frame |
| 718 | #ifdef USE_OMP |
| 719 | #pragma omp parallel for shared(goodflds, md, localmotions, fs) // does not bring speedup |
| 720 | #endif |
| 721 | for(int index=0; index < vs_vector_size(&goodflds); index++){ |
| 722 | int i = ((contrast_idx*)vs_vector_get(&goodflds,index))->index; |
| 723 | LocalMotion m; |
| 724 | m = fieldfunc(md, fields, &fields->fields[i], i); // e.g. calcFieldTransPlanar |
| 725 | if(m.match >= 0){ |
| 726 | m.contrast = ((contrast_idx*)vs_vector_get(&goodflds,index))->contrast; |
| 727 | #ifdef STABVERBOSE |
| 728 | fprintf(file, "%i %i\n%f %f %f %f\n \n\n", m.f.x, m.f.y, |
| 729 | m.f.x + m.v.x, m.f.y + m.v.y, m.match, m.contrast); |
| 730 | #endif |
| 731 | vs_vector_append_dup(&localmotions, &m, sizeof(LocalMotion)); |
| 732 | } |
| 733 | } |
| 734 | vs_vector_del(&goodflds); |
| 735 | |
| 736 | #ifdef STABVERBOSE |
| 737 | fclose(file); |
| 738 | #endif |
| 739 | return localmotions; |
| 740 | } |
| 741 | |
| 742 | |
| 743 | |
| 744 | |
| 745 | |
| 746 | /** draws the field scanning area */ |
| 747 | void drawFieldScanArea(VSMotionDetect* md, const LocalMotion* lm, int maxShift) { |
| 748 | if (md->fi.pFormat > PF_PACKED) |
| 749 | return; |
| 750 | drawRectangle(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, lm->f.x, lm->f.y, |
| 751 | lm->f.size + 2 * maxShift, lm->f.size + 2 * maxShift, 80); |
| 752 | } |
| 753 | |
| 754 | /** draws the field */ |
| 755 | void drawField(VSMotionDetect* md, const LocalMotion* lm, short box) { |
| 756 | if (md->fi.pFormat > PF_PACKED) |
| 757 | return; |
| 758 | if(box) |
| 759 | drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, |
| 760 | lm->f.x, lm->f.y, lm->f.size, lm->f.size, /*lm->match >100 ? 100 :*/ 40); |
| 761 | else |
| 762 | drawRectangle(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, |
| 763 | lm->f.x, lm->f.y, lm->f.size, lm->f.size, /*lm->match >100 ? 100 :*/ 40); |
| 764 | } |
| 765 | |
| 766 | /** draws the transform data of this field */ |
| 767 | void drawFieldTrans(VSMotionDetect* md, const LocalMotion* lm, int color) { |
| 768 | if (md->fi.pFormat > PF_PACKED) |
| 769 | return; |
| 770 | Vec end = add_vec(field_to_vec(lm->f),lm->v); |
| 771 | drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, |
| 772 | lm->f.x, lm->f.y, 5, 5, 0); // draw center |
| 773 | drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, |
| 774 | lm->f.x + lm->v.x, lm->f.y + lm->v.y, 5, 5, 250); // draw translation |
| 775 | drawLine(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, |
| 776 | (Vec*)&lm->f, &end, 3, color); |
| 777 | |
| 778 | } |
| 779 | |
| 780 | /** |
| 781 | * draws a box at the given position x,y (center) in the given color |
| 782 | (the same for all channels) |
| 783 | */ |
| 784 | void drawBox(unsigned char* I, int width, int height, int bytesPerPixel, int x, |
| 785 | int y, int sizex, int sizey, unsigned char color) { |
| 786 | |
| 787 | unsigned char* p = NULL; |
| 788 | int j, k; |
| 789 | p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel; |
| 790 | for (j = 0; j < sizey; j++) { |
| 791 | for (k = 0; k < sizex * bytesPerPixel; k++) { |
| 792 | *p = color; |
| 793 | p++; |
| 794 | } |
| 795 | p += (width - sizex) * bytesPerPixel; |
| 796 | } |
| 797 | } |
| 798 | |
| 799 | /** |
| 800 | * draws a rectangle (not filled) at the given position x,y (center) in the given color |
| 801 | at the first channel |
| 802 | */ |
| 803 | void drawRectangle(unsigned char* I, int width, int height, int bytesPerPixel, int x, |
| 804 | int y, int sizex, int sizey, unsigned char color) { |
| 805 | |
| 806 | unsigned char* p; |
| 807 | int k; |
| 808 | p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel; |
| 809 | for (k = 0; k < sizex; k++) { *p = color; p+= bytesPerPixel; } // upper line |
| 810 | p = I + ((x - sizex / 2) + (y + sizey / 2) * width) * bytesPerPixel; |
| 811 | for (k = 0; k < sizex; k++) { *p = color; p+= bytesPerPixel; } // lower line |
| 812 | p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel; |
| 813 | for (k = 0; k < sizey; k++) { *p = color; p+= width * bytesPerPixel; } // left line |
| 814 | p = I + ((x + sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel; |
| 815 | for (k = 0; k < sizey; k++) { *p = color; p+= width * bytesPerPixel; } // right line |
| 816 | } |
| 817 | |
| 818 | /** |
| 819 | * draws a line from a to b with given thickness(not filled) at the given position x,y (center) in the given color |
| 820 | at the first channel |
| 821 | */ |
| 822 | void drawLine(unsigned char* I, int width, int height, int bytesPerPixel, |
| 823 | Vec* a, Vec* b, int thickness, unsigned char color) { |
| 824 | unsigned char* p; |
| 825 | Vec div = sub_vec(*b,*a); |
| 826 | if(div.y==0){ // horizontal line |
| 827 | if(div.x<0) {*a=*b; div.x*=-1;} |
| 828 | for(int r=-thickness/2; r<=thickness/2; r++){ |
| 829 | p = I + ((a->x) + (a->y+r) * width) * bytesPerPixel; |
| 830 | for (int k = 0; k <= div.x; k++) { *p = color; p+= bytesPerPixel; } |
| 831 | } |
| 832 | }else{ |
| 833 | if(div.x==0){ // vertical line |
| 834 | if(div.y<0) {*a=*b; div.y*=-1;} |
| 835 | for(int r=-thickness/2; r<=thickness/2; r++){ |
| 836 | p = I + ((a->x+r) + (a->y) * width) * bytesPerPixel; |
| 837 | for (int k = 0; k <= div.y; k++) { *p = color; p+= width * bytesPerPixel; } |
| 838 | } |
| 839 | }else{ |
| 840 | double m = (double)div.x/(double)div.y; |
| 841 | int horlen = thickness + fabs(m); |
| 842 | for( int c=0; c<= abs(div.y); c++){ |
| 843 | int dy = div.y<0 ? -c : c; |
| 844 | int x = a->x + m*dy - horlen/2; |
| 845 | p = I + (x + (a->y+dy) * width) * bytesPerPixel; |
| 846 | for( int k=0; k<= horlen; k++){ *p = color; p+= bytesPerPixel; } |
| 847 | } |
| 848 | } |
| 849 | } |
| 850 | } |
| 851 | |
| 852 | |
| 853 | // void addTrans(VSMotionDetect* md, VSTransform sl) { |
| 854 | // if (!md->transs) { |
| 855 | // md->transs = vs_list_new(0); |
| 856 | // } |
| 857 | // vs_list_append_dup(md->transs, &sl, sizeof(sl)); |
| 858 | // } |
| 859 | |
| 860 | // VSTransform getLastVSTransform(VSMotionDetect* md){ |
| 861 | // if (!md->transs || !md->transs->head) { |
| 862 | // return null_transform(); |
| 863 | // } |
| 864 | // return *((VSTransform*)md->transs->tail); |
| 865 | // } |
| 866 | |
| 867 | |
| 868 | //#ifdef TESTING |
| 869 | /// plain C implementation of compareSubImg (without ORC) |
| 870 | unsigned int compareSubImg_thr(unsigned char* const I1, unsigned char* const I2, |
| 871 | const Field* field, int width1, int width2, int height, |
| 872 | int bytesPerPixel, int d_x, int d_y, |
| 873 | unsigned int threshold) { |
| 874 | int k, j; |
| 875 | unsigned char* p1 = NULL; |
| 876 | unsigned char* p2 = NULL; |
| 877 | int s2 = field->size / 2; |
| 878 | unsigned int sum = 0; |
| 879 | |
| 880 | p1 = I1 + ((field->x - s2) + (field->y - s2) * width1) * bytesPerPixel; |
| 881 | p2 = I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y) * width2) |
| 882 | * bytesPerPixel; |
| 883 | for (j = 0; j < field->size; j++) { |
| 884 | for (k = 0; k < field->size * bytesPerPixel; k++) { |
| 885 | sum += abs((int) *p1 - (int) *p2); |
| 886 | p1++; |
| 887 | p2++; |
| 888 | } |
| 889 | if( sum > threshold) // no need to calculate any longer: worse than the best match |
| 890 | break; |
| 891 | p1 += (width1 - field->size) * bytesPerPixel; |
| 892 | p2 += (width2 - field->size) * bytesPerPixel; |
| 893 | } |
| 894 | return sum; |
| 895 | } |
| 896 | |
| 897 | |
| 898 | /* |
| 899 | * Local variables: |
| 900 | * c-file-style: "stroustrup" |
| 901 | * c-file-offsets: ((case-label . *) (statement-case-intro . *)) |
| 902 | * indent-tabs-mode: nil |
| 903 | * tab-width: 2 |
| 904 | * c-basic-offset: 2 t |
| 905 | * End: |
| 906 | * |
| 907 | */ |