[deb_vid.stab.git] / src / transformfixedpoint.c

/*
 *  transformfixedpoint.c
 *
 *  Fixed point implementation of image transformations (see also transformfloat.c/h)
 *
 *  Copyright (C) Georg Martius - June 2011
 *   georg dot martius at web dot de
 *
 *  This file is part of vid.stab video stabilization library
 *
 *  vid.stab is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License,
 *  as published by the Free Software Foundation; either version 2, or
 *  (at your option) any later version.
 *
 *  vid.stab is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *
 */
#include "transformfixedpoint.h"
#include "transform.h"
#include "transformtype_operations.h"

// the orc code does not work at the moment (BUG in ORC?)
// #include "orc/transformorc.h"

//#include <math.h>
//#include <libgen.h>

#define iToFp8(v)  ((v)<<8)
#define fToFp8(v)  ((int32_t)((v)*((float)0xFF)))
#define iToFp16(v) ((v)<<16)
#define fToFp16(v) ((int32_t)((v)*((double)0xFFFF)))
#define fp16To8(v) ((v)>>8)
//#define fp16To8(v) ( (v) && 0x80 == 1 ? ((v)>>8 + 1) : ((v)>>8) )
#define fp24To8(v) ((v)>>16)

#define fp8ToI(v)  ((v)>>8)
#define fp16ToI(v) ((v)>>16)
#define fp8ToF(v)  ((v)/((double)(1<<8)))
#define fp16ToF(v) ((v)/((double)(1<<16)))

// #define fp8ToIRound(v) ( (((v)>>7) & 0x1) == 0 ? ((v)>>8) : ((v)>>8)+1 )
#define fp8_0_5 (1<<7)
#define fp8ToIRound(v) (((v) + fp8_0_5) >> 7)
//#define fp16ToIRound(v) ( (((v)>>15) & 0x1) == 0 ? ((v)>>16) : ((v)>>16)+1 )
#define fp16_0_5 (1<<15)
#define fp16ToIRound(v) (((v) + fp16_0_5) >> 16)

/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
    This is used by many other interpolation methods at and outsize the border, see interpolate */
inline void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
                                   const uint8_t *img, int img_linesize,
                                   int32_t width, int32_t height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  int32_t ix_c = ix_f + 1;
  int32_t iy_c = iy_f + 1;
  if (ix_f < 0 || ix_c >= width || iy_f < 0 || iy_c >= height) {
    int32_t w  = 10; // number of pixels to blur out the border pixel outwards
    int32_t xl = - w - ix_f;
    int32_t yl = - w - iy_f;
    int32_t xh = ix_c - w - width;
    int32_t yh = iy_c - w - height;
    int32_t c = VS_MAX(VS_MIN(VS_MAX(xl, VS_MAX(yl, VS_MAX(xh, yh))),w),0);
    // pixel at border of source image
    short val_border = PIX(img, img_linesize, VS_MAX(VS_MIN(ix_f, width-1),0),
                           VS_MAX(VS_MIN(iy_f, height-1),0));
    *rv = (def * c + val_border * (w - c)) / w;
  }else{
    short v1 = PIXEL(img, img_linesize, ix_c, iy_c, width, height, def);
    short v2 = PIXEL(img, img_linesize, ix_c, iy_f, width, height, def);
    short v3 = PIXEL(img, img_linesize, ix_f, iy_c, width, height, def);
    short v4 = PIXEL(img, img_linesize, ix_f, iy_f, width, height, def);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s   = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y) + 1;
    *rv = fp16ToIRound(s);
  }
}

/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
    in matrix notation:
    a0-a3 are the neigthboring points where the target point is between a1 and a2
    t is the point of interpolation (position between a1 and a2) value between 0 and 1
    | 0, 2, 0, 0 |  |a0|
    |-1, 0, 1, 0 |  |a1|
    (1,t,t^2,t^3) | 2,-5, 4,-1 |  |a2|
    |-1, 3,-3, 1 |  |a3|
*/
/* inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){ */
/*   // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2; */
/*   return ((iToFp16(2*a1) + t*(-a0+a2 */
/*             + fp16ToI(t*((2*a0-5*a1+4*a2-a3) */
/*              + fp16ToI(t*(-a0+3*a1-3*a2+a3)) )) ) */
/*      ) ) >> 17; */
/* } */

inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){
  // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2;
  // we add 1/2 because of truncation errors
  return fp16ToIRound((iToFp16(2*a1) + t*(-a0+a2
                                          + fp16ToIRound(t*((2*a0-5*a1+4*a2-a3)
                                                            + fp16ToIRound(t*(-a0+3*a1-3*a2+a3)) )) )
                       ) >> 1);
}

/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
inline void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
                             const uint8_t *img, int img_linesize,
                             int width, int height, uint8_t def)
{
  // do a simple linear interpolation at the border
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (unlikely(ix_f < 1 || ix_f > width - 3 || iy_f < 1 || iy_f > height - 3)) {
    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
  } else {
    fp16 x_f = iToFp16(ix_f);
    fp16 y_f = iToFp16(iy_f);
    fp16 tx  = x-x_f;
    short v1 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f-1),
                            PIX(img, img_linesize, ix_f,   iy_f-1),
                            PIX(img, img_linesize, ix_f+1, iy_f-1),
                            PIX(img, img_linesize, ix_f+2, iy_f-1));
    short v2 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f),
                            PIX(img, img_linesize, ix_f,   iy_f),
                            PIX(img, img_linesize, ix_f+1, iy_f),
                            PIX(img, img_linesize, ix_f+2, iy_f));
    short v3 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f+1),
                            PIX(img, img_linesize, ix_f,   iy_f+1),
                            PIX(img, img_linesize, ix_f+1, iy_f+1),
                            PIX(img, img_linesize, ix_f+2, iy_f+1));
    short v4 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f+2),
                            PIX(img, img_linesize, ix_f,   iy_f+2),
                            PIX(img, img_linesize, ix_f+1, iy_f+2),
                            PIX(img, img_linesize, ix_f+2, iy_f+2));
    short res = bicub_kernel(y-y_f, v1, v2, v3, v4);
    *rv = res < 255 ? res : 255;
  }
}


/** interpolateBiLin: bi-linear interpolation function, see interpolate */
inline void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
                             const uint8_t *img, int img_linesize,
                             int32_t width, int32_t height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (unlikely(ix_f < 0 || ix_f > width - 2 || iy_f < 0 || iy_f > height - 2)) {
    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
  } else {
    int32_t ix_c = ix_f + 1;
    int32_t iy_c = iy_f + 1;
    short v1 = PIX(img, img_linesize, ix_c, iy_c);
    short v2 = PIX(img, img_linesize, ix_c, iy_f);
    short v3 = PIX(img, img_linesize, ix_f, iy_c);
    short v4 = PIX(img, img_linesize, ix_f, iy_f);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s  = fp16To8(v1*(x - x_f) + v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
    // it is underestimated due to truncation, so we add one
    short res = fp16ToI(s);
    *rv = res < 255 ? res+1 : 255;
  }
}

/** interpolateLin: linear (only x) interpolation function, see interpolate */
inline void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
                           const uint8_t *img, int img_linesize,
                           int width, int height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t ix_c = ix_f + 1;
  fp16    x_c  = iToFp16(ix_c);
  fp16    x_f  = iToFp16(ix_f);
  int     y_n  = fp16ToIRound(y);

  short v1 = PIXEL(img, img_linesize, ix_c, y_n, width, height, def);
  short v2 = PIXEL(img, img_linesize, ix_f, y_n, width, height, def);
  fp16 s   = v1*(x - x_f) + v2*(x_c - x);
  short res = fp16ToI(s);
  *rv =   res < 255 ? res : 255;
}

/** interpolateZero: nearest neighbor interpolation function, see interpolate */
inline void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
                            const uint8_t *img, int img_linesize,
                            int width, int height, uint8_t def)
{
  int32_t ix_n = fp16ToIRound(x);
  int32_t iy_n = fp16ToIRound(y);
  *rv = (uint8_t) PIXEL(img, img_linesize, ix_n, iy_n, width, height, def);
}


/**
 * interpolateN: Bi-linear interpolation function for N channel image.
 *
 * Parameters:
 *             rv: destination pixel (call by reference)
 *            x,y: the source coordinates in the image img. Note this
 *                 are real-value coordinates, that's why we interpolate
 *            img: source image
 *   width,height: dimension of image
 *              N: number of channels
 *        channel: channel number (0..N-1)
 *            def: default value if coordinates are out of range
 * Return value:  None
 */
inline void interpolateN(uint8_t *rv, fp16 x, fp16 y,
                         const uint8_t *img, int img_linesize,
                         int width, int height,
                         uint8_t N, uint8_t channel,
                         uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (ix_f < 0 || ix_f > width-1 || iy_f < 0 || iy_f > height - 1) {
    *rv = def;
  } else {
    int32_t ix_c = ix_f + 1;
    int32_t iy_c = iy_f + 1;
    short v1 = PIXN(img, img_linesize, ix_c, iy_c, N, channel);
    short v2 = PIXN(img, img_linesize, ix_c, iy_f, N, channel);
    short v3 = PIXN(img, img_linesize, ix_f, iy_c, N, channel);
    short v4 = PIXN(img, img_linesize, ix_f, iy_f, N, channel);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s  = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
    *rv = fp16ToIRound(s);
  }
}


/**
 * transformPacked: applies current transformation to frame
 * Parameters:
 *         td: private data structure of this filter
 * Return value:
 *         0 for failture, 1 for success
 * Preconditions:
 *  The frame must be in Packed format
 */
int transformPacked(VSTransformData* td, VSTransform t)
{
  int x = 0, y = 0, k = 0;
  uint8_t *D_1, *D_2;

  D_1  = td->src.data[0];
  D_2  = td->destbuf.data[0];
  fp16 c_s_x = iToFp16(td->fiSrc.width/2);
  fp16 c_s_y = iToFp16(td->fiSrc.height/2);
  int32_t c_d_x = td->fiDest.width/2;
  int32_t c_d_y = td->fiDest.height/2;

  /* for each pixel in the destination image we calc the source
   * coordinate and make an interpolation:
   *      p_d = c_d + M(p_s - c_s) + t
   * where p are the points, c the center coordinate,
   *  _s source and _d destination,
   *  t the translation, and M the rotation matrix
   *      p_s = M^{-1}(p_d - c_d - t) + c_s
   */
  float z     = 1.0-t.zoom/100.0;
  fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
  fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
  fp16  c_tx    = c_s_x - fToFp16(t.x);
  fp16  c_ty    = c_s_y - fToFp16(t.y);
  int channels = td->fiSrc.bytesPerPixel;
  /* All channels */
  for (y = 0; y < td->fiDest.height; y++) {
    int32_t y_d1 = (y - c_d_y);
    for (x = 0; x < td->fiDest.width; x++) {
      int32_t x_d1 = (x - c_d_x);
      fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
      fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;

      for (k = 0; k < channels; k++) { // iterate over colors
        uint8_t *dest = &D_2[x + y * td->destbuf.linesize[0]+k];
        interpolateN(dest, x_s, y_s, D_1, td->src.linesize[0],
                     td->fiSrc.width, td->fiSrc.height,
                     channels, k, td->conf.crop ? 16 : *dest);
      }
    }
  }
  return VS_OK;
}

/**
 * transformPlanar: applies current transformation to frame
 *
 * Parameters:
 *         td: private data structure of this filter
 * Return value:
 *         0 for failture, 1 for success
 * Preconditions:
 *  The frame must be in Planar format
 *
 * Fixed-point format 32 bit integer:
 *  for image coords we use val<<8
 *  for angle and zoom we use val<<16
 *
 */
int transformPlanar(VSTransformData* td, VSTransform t)
{
  int32_t x = 0, y = 0;
  uint8_t *dat_1, *dat_2;

  if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
    if(vsFramesEqual(&td->src,&td->destbuf))
      return VS_OK; // noop
    else {
      vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
      return VS_OK;
    }
  }

  int plane;
  for(plane=0; plane< td->fiSrc.planes; plane++){
    dat_1  = td->src.data[plane];
    dat_2  = td->destbuf.data[plane];
    int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
    int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
    int dw = CHROMA_SIZE(td->fiDest.width , wsub);
    int dh = CHROMA_SIZE(td->fiDest.height, hsub);
    int sw = CHROMA_SIZE(td->fiSrc.width  , wsub);
    int sh = CHROMA_SIZE(td->fiSrc.height , hsub);
    uint8_t black = plane==0 ? 0 : 0x80;

    fp16 c_s_x = iToFp16(sw / 2);
    fp16 c_s_y = iToFp16(sh / 2);
    int32_t c_d_x = dw / 2;
    int32_t c_d_y = dh / 2;

    float z     = 1.0-t.zoom/100.0;
    fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
    fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
    fp16  c_tx    = c_s_x - (fToFp16(t.x) >> wsub);
    fp16  c_ty    = c_s_y - (fToFp16(t.y) >> hsub);

    /* for each pixel in the destination image we calc the source
     * coordinate and make an interpolation:
     *      p_d = c_d + M(p_s - c_s) + t
     * where p are the points, c the center coordinate,
     *  _s source and _d destination,
     *  t the translation, and M the rotation and scaling matrix
     *      p_s = M^{-1}(p_d - c_d - t) + c_s
     */
    for (y = 0; y < dh; y++) {
      // swapping of the loops brought 15% performace gain
      int32_t y_d1 = (y - c_d_y);
      for (x = 0; x < dw; x++) {
        int32_t x_d1 = (x - c_d_x);
        fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
        fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
        uint8_t *dest = &dat_2[x + y * td->destbuf.linesize[plane]];
        // inlining the interpolation function would bring 10%
        //  (but then we cannot use the function pointer anymore...)
        td->interpolate(dest, x_s, y_s, dat_1,
                        td->src.linesize[plane], sw, sh,
                        td->conf.crop ? black : *dest);
      }
    }
  }

  return VS_OK;
}


/* /\** TESTING */
/*  * transformPlanar_orc: applies current transformation to frame */
/*  * */
/*  * Parameters: */
/*  *         td: private data structure of this filter */
/*  * Return value:  */
/*  *         0 for failture, 1 for success */
/*  * Preconditions: */
/*  *  The frame must be in Planar format */
/*  * */
/*  * Fixed-point format 32 bit integer: */
/*  *  for image coords we use val<<8 */
/*  *  for angle and zoom we use val<<16 */
/*  * */
/*  *\/ */
/* int transformPlanar_orc(VSTransformData* td, VSTransform t) */
/* { */
/*     int32_t x = 0, y = 0; */
/*     uint8_t *Y_1, *Y_2, *Cb_1, *Cb_2, *Cr_1, *Cr_2; */

/*     if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0) return VS_OK; // noop */

/*     Y_1  = td->src;   */
/*     Y_2  = td->destbuf;   */
/*     Cb_1 = td->src + td->fiSrc.width * td->fiSrc.height; */
/*     Cb_2 = td->destbuf + td->fiDest.width * td->fiDest.height; */
/*     Cr_1 = td->src + 5*td->fiSrc.width * td->fiSrc.height/4; */
/*     Cr_2 = td->destbuf + 5*td->fiDest.width * td->fiDest.height/4; */
/*     fp16 c_s_x = iToFp16(td->fiSrc.width / 2); */
/*     fp16 c_s_y = iToFp16(td->fiSrc.height / 2); */
/*     int32_t c_d_x = td->fiDest.width / 2; */
/*     int32_t c_d_y = td->fiDest.height / 2;     */

/*     float z     = 1.0-t.zoom/100.0; */
/*     fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos */
/*     fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin */
/*     fp16  c_tx    = c_s_x - fToFp16(t.x); */
/*     fp16  c_ty    = c_s_y - fToFp16(t.y); */

/*     /\* for each pixel in the destination image we calc the source */
/*      * coordinate and make an interpolation:  */
/*      *      p_d = c_d + M(p_s - c_s) + t  */
/*      * where p are the points, c the center coordinate,  */
/*      *  _s source and _d destination,  */
/*      *  t the translation, and M the rotation and scaling matrix */
/*      *      p_s = M^{-1}(p_d - c_d - t) + c_s */
/*      *\/ */
/*     /\* Luminance channel *\/ */
/*     fp16* x_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width); */
/*     fp16* y_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width);     */
/*     int32_t* xs = (int32_t*)malloc(sizeof(int32_t)*td->fiDest.width);         */
/*     for (x = 0; x < td->fiDest.width; x++) { // this can go to td */
/*       xs[x]=x; */
/*     } */

/*     for (y = 0; y < td->fiDest.height; y++) { */
/*       int32_t y_d1 = (y - c_d_y);   */
/*       fp16 sin_y   = zsin_a * y_d1; */
/*       fp16 cos_y   = zcos_a * y_d1; */
/*       for (x = 0; x < td->fiDest.width; x++) { */
/*         int32_t x_d1 = (xs[x] - c_d_x); */
/*         //x_ss[x]  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx; */
/*   y_ss[x]  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; */
/*       } */
/*       transform_one_line_optimized1 (x_ss, y_ss, xs, y_d1, c_d_x,  */
/*              c_tx, c_ty, zcos_a, zsin_a, sin_y, cos_y,  */
/*              td->fiDest.width); */
/*       // transform_one_line_optimized (x_ss, y_ss, xs, y_d1, c_d_x,  */
/*       //             c_tx, c_ty, zcos_a, zsin_a, td->fiDest.width); */

/*       for (x = 0; x < td->fiDest.width; x++) { */
/*   uint8_t *dest = &Y_2[x + y * td->fiDest.width]; */
/*   td->interpolate(dest, x_ss[x], y_ss[x], Y_1,  */
/*         td->fiSrc.width, td->fiSrc.height,  */
/*         td->crop ? 16 : *dest); */
/*       } */
/*     } */

/*     /\* Color channels *\/ */
/*     int32_t ws2 = td->fiSrc.width/2; */
/*     int32_t wd2 = td->fiDest.width/2; */
/*     int32_t hs2 = td->fiSrc.height/2; */
/*     int32_t hd2 = td->fiDest.height/2; */
/*     fp16 c_tx2   = c_tx/2; */
/*     fp16 c_ty2   = c_ty/2; */

/*     for (y = 0; y < hd2; y++) { */
/*       int32_t y_d1 = y - (c_d_y)/2; */
/*       for (x = 0; x < wd2; x++) { */
/*   int32_t x_d1 = x - (c_d_x)/2; */
/*   fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx2; */
/*   fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty2;  */
/*   uint8_t *dest = &Cr_2[x + y * wd2]; */
/*   td->interpolate(dest, x_s, y_s, Cr_1, ws2, hs2,  */
/*         td->crop ? 128 : *dest); */
/*   dest = &Cb_2[x + y * wd2]; */
/*   td->interpolate(dest, x_s, y_s, Cb_1, ws2, hs2,  */
/*         td->crop ? 128 : *dest); */
/*       } */
/*     } */

/*     return VS_OK; */
/* } */

/*
  some debugging stuff
  FILE* f1 = fopen("transFP.pos","w");
  fprintf(f1,"%i,%i:\t %f,%f\n", x, y, x_s / (float)(1<<16), y_s / (float)(1<<16));
  fclose(f1);

*/


/*
 * Local variables:
 *   c-file-style: "stroustrup"
 *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
 *   indent-tabs-mode: nil
 *   c-basic-offset: 2 t
 *
 * End:
 *
 * vim: expandtab shiftwidth=2:
 */
Commit	Line	Data
	1	/*
	2	* transformfixedpoint.c
	3	*
	4	* Fixed point implementation of image transformations (see also transformfloat.c/h)
	5	*
	6	* Copyright (C) Georg Martius - June 2011
	7	* georg dot martius at web dot de
	8	*
	9	* This file is part of vid.stab video stabilization library
	10	*
	11	* vid.stab is free software; you can redistribute it and/or modify
	12	* it under the terms of the GNU General Public License,
	13	* as published by the Free Software Foundation; either version 2, or
	14	* (at your option) any later version.
	15	*
	16	* vid.stab is distributed in the hope that it will be useful,
	17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	* GNU General Public License for more details.
	20	*
	21	* You should have received a copy of the GNU General Public License
	22	* along with GNU Make; see the file COPYING. If not, write to
	23	* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	*
	25	*
	26	*/
	27	#include "transformfixedpoint.h"
	28	#include "transform.h"
	29	#include "transformtype_operations.h"
	30
	31	// the orc code does not work at the moment (BUG in ORC?)
	32	// #include "orc/transformorc.h"
	33
	34	//#include <math.h>
	35	//#include <libgen.h>
	36
	37	#define iToFp8(v) ((v)<<8)
	38	#define fToFp8(v) ((int32_t)((v)*((float)0xFF)))
	39	#define iToFp16(v) ((v)<<16)
	40	#define fToFp16(v) ((int32_t)((v)*((double)0xFFFF)))
	41	#define fp16To8(v) ((v)>>8)
	42	//#define fp16To8(v) ( (v) && 0x80 == 1 ? ((v)>>8 + 1) : ((v)>>8) )
	43	#define fp24To8(v) ((v)>>16)
	44
	45	#define fp8ToI(v) ((v)>>8)
	46	#define fp16ToI(v) ((v)>>16)
	47	#define fp8ToF(v) ((v)/((double)(1<<8)))
	48	#define fp16ToF(v) ((v)/((double)(1<<16)))
	49
	50	// #define fp8ToIRound(v) ( (((v)>>7) & 0x1) == 0 ? ((v)>>8) : ((v)>>8)+1 )
	51	#define fp8_0_5 (1<<7)
	52	#define fp8ToIRound(v) (((v) + fp8_0_5) >> 7)
	53	//#define fp16ToIRound(v) ( (((v)>>15) & 0x1) == 0 ? ((v)>>16) : ((v)>>16)+1 )
	54	#define fp16_0_5 (1<<15)
	55	#define fp16ToIRound(v) (((v) + fp16_0_5) >> 16)
	56
	57	/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
	58	This is used by many other interpolation methods at and outsize the border, see interpolate */
	59	inline void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
	60	const uint8_t *img, int img_linesize,
	61	int32_t width, int32_t height, uint8_t def)
	62	{
	63	int32_t ix_f = fp16ToI(x);
	64	int32_t iy_f = fp16ToI(y);
	65	int32_t ix_c = ix_f + 1;
	66	int32_t iy_c = iy_f + 1;
	67	if (ix_f < 0 \|\| ix_c >= width \|\| iy_f < 0 \|\| iy_c >= height) {
	68	int32_t w = 10; // number of pixels to blur out the border pixel outwards
	69	int32_t xl = - w - ix_f;
	70	int32_t yl = - w - iy_f;
	71	int32_t xh = ix_c - w - width;
	72	int32_t yh = iy_c - w - height;
	73	int32_t c = VS_MAX(VS_MIN(VS_MAX(xl, VS_MAX(yl, VS_MAX(xh, yh))),w),0);
	74	// pixel at border of source image
	75	short val_border = PIX(img, img_linesize, VS_MAX(VS_MIN(ix_f, width-1),0),
	76	VS_MAX(VS_MIN(iy_f, height-1),0));
	77	rv = (def c + val_border * (w - c)) / w;
	78	}else{
	79	short v1 = PIXEL(img, img_linesize, ix_c, iy_c, width, height, def);
	80	short v2 = PIXEL(img, img_linesize, ix_c, iy_f, width, height, def);
	81	short v3 = PIXEL(img, img_linesize, ix_f, iy_c, width, height, def);
	82	short v4 = PIXEL(img, img_linesize, ix_f, iy_f, width, height, def);
	83	fp16 x_f = iToFp16(ix_f);
	84	fp16 x_c = iToFp16(ix_c);
	85	fp16 y_f = iToFp16(iy_f);
	86	fp16 y_c = iToFp16(iy_c);
	87	fp16 s = fp16To8(v1(x - x_f)+v3(x_c - x))*fp16To8(y - y_f) +
	88	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y) + 1;
	89	*rv = fp16ToIRound(s);
	90	}
	91	}
	92
	93	/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
	94	in matrix notation:
	95	a0-a3 are the neigthboring points where the target point is between a1 and a2
	96	t is the point of interpolation (position between a1 and a2) value between 0 and 1
	97	\| 0, 2, 0, 0 \| \|a0\|
	98	\|-1, 0, 1, 0 \| \|a1\|
	99	(1,t,t^2,t^3) \| 2,-5, 4,-1 \| \|a2\|
	100	\|-1, 3,-3, 1 \| \|a3\|
	101	*/
	102	/* inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){ */
	103	/* // (2a1 + t((-a0+a2) + t((2a0-5a1+4a2-a3) + t(-a0+3a1-3a2+a3) )) ) / 2; /
	104	/* return ((iToFp16(2a1) + t(-a0+a2 */
	105	/* + fp16ToI(t((2a0-5a1+4a2-a3) */
	106	/* + fp16ToI(t(-a0+3a1-3a2+a3)) )) ) /
	107	/* ) ) >> 17; */
	108	/* } */
	109
	110	inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){
	111	// (2a1 + t((-a0+a2) + t((2a0-5a1+4a2-a3) + t(-a0+3a1-3*a2+a3) )) ) / 2;
	112	// we add 1/2 because of truncation errors
	113	return fp16ToIRound((iToFp16(2a1) + t(-a0+a2
	114	+ fp16ToIRound(t((2a0-5a1+4a2-a3)
	115	+ fp16ToIRound(t(-a0+3a1-3*a2+a3)) )) )
	116	) >> 1);
	117	}
	118
	119	/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
	120	inline void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
	121	const uint8_t *img, int img_linesize,
	122	int width, int height, uint8_t def)
	123	{
	124	// do a simple linear interpolation at the border
	125	int32_t ix_f = fp16ToI(x);
	126	int32_t iy_f = fp16ToI(y);
	127	if (unlikely(ix_f < 1 \|\| ix_f > width - 3 \|\| iy_f < 1 \|\| iy_f > height - 3)) {
	128	interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
	129	} else {
	130	fp16 x_f = iToFp16(ix_f);
	131	fp16 y_f = iToFp16(iy_f);
	132	fp16 tx = x-x_f;
	133	short v1 = bicub_kernel(tx,
	134	PIX(img, img_linesize, ix_f-1, iy_f-1),
	135	PIX(img, img_linesize, ix_f, iy_f-1),
	136	PIX(img, img_linesize, ix_f+1, iy_f-1),
	137	PIX(img, img_linesize, ix_f+2, iy_f-1));
	138	short v2 = bicub_kernel(tx,
	139	PIX(img, img_linesize, ix_f-1, iy_f),
	140	PIX(img, img_linesize, ix_f, iy_f),
	141	PIX(img, img_linesize, ix_f+1, iy_f),
	142	PIX(img, img_linesize, ix_f+2, iy_f));
	143	short v3 = bicub_kernel(tx,
	144	PIX(img, img_linesize, ix_f-1, iy_f+1),
	145	PIX(img, img_linesize, ix_f, iy_f+1),
	146	PIX(img, img_linesize, ix_f+1, iy_f+1),
	147	PIX(img, img_linesize, ix_f+2, iy_f+1));
	148	short v4 = bicub_kernel(tx,
	149	PIX(img, img_linesize, ix_f-1, iy_f+2),
	150	PIX(img, img_linesize, ix_f, iy_f+2),
	151	PIX(img, img_linesize, ix_f+1, iy_f+2),
	152	PIX(img, img_linesize, ix_f+2, iy_f+2));
	153	short res = bicub_kernel(y-y_f, v1, v2, v3, v4);
	154	*rv = res < 255 ? res : 255;
	155	}
	156	}
	157
	158
	159	/** interpolateBiLin: bi-linear interpolation function, see interpolate */
	160	inline void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
	161	const uint8_t *img, int img_linesize,
	162	int32_t width, int32_t height, uint8_t def)
	163	{
	164	int32_t ix_f = fp16ToI(x);
	165	int32_t iy_f = fp16ToI(y);
	166	if (unlikely(ix_f < 0 \|\| ix_f > width - 2 \|\| iy_f < 0 \|\| iy_f > height - 2)) {
	167	interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
	168	} else {
	169	int32_t ix_c = ix_f + 1;
	170	int32_t iy_c = iy_f + 1;
	171	short v1 = PIX(img, img_linesize, ix_c, iy_c);
	172	short v2 = PIX(img, img_linesize, ix_c, iy_f);
	173	short v3 = PIX(img, img_linesize, ix_f, iy_c);
	174	short v4 = PIX(img, img_linesize, ix_f, iy_f);
	175	fp16 x_f = iToFp16(ix_f);
	176	fp16 x_c = iToFp16(ix_c);
	177	fp16 y_f = iToFp16(iy_f);
	178	fp16 y_c = iToFp16(iy_c);
	179	fp16 s = fp16To8(v1(x - x_f) + v3(x_c - x))*fp16To8(y - y_f) +
	180	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y);
	181	// it is underestimated due to truncation, so we add one
	182	short res = fp16ToI(s);
	183	*rv = res < 255 ? res+1 : 255;
	184	}
	185	}
	186
	187	/** interpolateLin: linear (only x) interpolation function, see interpolate */
	188	inline void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
	189	const uint8_t *img, int img_linesize,
	190	int width, int height, uint8_t def)
	191	{
	192	int32_t ix_f = fp16ToI(x);
	193	int32_t ix_c = ix_f + 1;
	194	fp16 x_c = iToFp16(ix_c);
	195	fp16 x_f = iToFp16(ix_f);
	196	int y_n = fp16ToIRound(y);
	197
	198	short v1 = PIXEL(img, img_linesize, ix_c, y_n, width, height, def);
	199	short v2 = PIXEL(img, img_linesize, ix_f, y_n, width, height, def);
	200	fp16 s = v1(x - x_f) + v2(x_c - x);
	201	short res = fp16ToI(s);
	202	*rv = res < 255 ? res : 255;
	203	}
	204
	205	/** interpolateZero: nearest neighbor interpolation function, see interpolate */
	206	inline void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
	207	const uint8_t *img, int img_linesize,
	208	int width, int height, uint8_t def)
	209	{
	210	int32_t ix_n = fp16ToIRound(x);
	211	int32_t iy_n = fp16ToIRound(y);
	212	*rv = (uint8_t) PIXEL(img, img_linesize, ix_n, iy_n, width, height, def);
	213	}
	214
	215
	216	/**
	217	* interpolateN: Bi-linear interpolation function for N channel image.
	218	*
	219	* Parameters:
	220	* rv: destination pixel (call by reference)
	221	* x,y: the source coordinates in the image img. Note this
	222	* are real-value coordinates, that's why we interpolate
	223	* img: source image
	224	* width,height: dimension of image
	225	* N: number of channels
	226	* channel: channel number (0..N-1)
	227	* def: default value if coordinates are out of range
	228	* Return value: None
	229	*/
	230	inline void interpolateN(uint8_t *rv, fp16 x, fp16 y,
	231	const uint8_t *img, int img_linesize,
	232	int width, int height,
	233	uint8_t N, uint8_t channel,
	234	uint8_t def)
	235	{
	236	int32_t ix_f = fp16ToI(x);
	237	int32_t iy_f = fp16ToI(y);
	238	if (ix_f < 0 \|\| ix_f > width-1 \|\| iy_f < 0 \|\| iy_f > height - 1) {
	239	*rv = def;
	240	} else {
	241	int32_t ix_c = ix_f + 1;
	242	int32_t iy_c = iy_f + 1;
	243	short v1 = PIXN(img, img_linesize, ix_c, iy_c, N, channel);
	244	short v2 = PIXN(img, img_linesize, ix_c, iy_f, N, channel);
	245	short v3 = PIXN(img, img_linesize, ix_f, iy_c, N, channel);
	246	short v4 = PIXN(img, img_linesize, ix_f, iy_f, N, channel);
	247	fp16 x_f = iToFp16(ix_f);
	248	fp16 x_c = iToFp16(ix_c);
	249	fp16 y_f = iToFp16(iy_f);
	250	fp16 y_c = iToFp16(iy_c);
	251	fp16 s = fp16To8(v1(x - x_f)+v3(x_c - x))*fp16To8(y - y_f) +
	252	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y);
	253	*rv = fp16ToIRound(s);
	254	}
	255	}
	256
	257
	258	/**
	259	* transformPacked: applies current transformation to frame
	260	* Parameters:
	261	* td: private data structure of this filter
	262	* Return value:
	263	* 0 for failture, 1 for success
	264	* Preconditions:
	265	* The frame must be in Packed format
	266	*/
	267	int transformPacked(VSTransformData* td, VSTransform t)
	268	{
	269	int x = 0, y = 0, k = 0;
	270	uint8_t D_1, D_2;
	271
	272	D_1 = td->src.data[0];
	273	D_2 = td->destbuf.data[0];
	274	fp16 c_s_x = iToFp16(td->fiSrc.width/2);
	275	fp16 c_s_y = iToFp16(td->fiSrc.height/2);
	276	int32_t c_d_x = td->fiDest.width/2;
	277	int32_t c_d_y = td->fiDest.height/2;
	278
	279	/* for each pixel in the destination image we calc the source
	280	* coordinate and make an interpolation:
	281	* p_d = c_d + M(p_s - c_s) + t
	282	* where p are the points, c the center coordinate,
	283	* _s source and _d destination,
	284	* t the translation, and M the rotation matrix
	285	* p_s = M^{-1}(p_d - c_d - t) + c_s
	286	*/
	287	float z = 1.0-t.zoom/100.0;
	288	fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
	289	fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
	290	fp16 c_tx = c_s_x - fToFp16(t.x);
	291	fp16 c_ty = c_s_y - fToFp16(t.y);
	292	int channels = td->fiSrc.bytesPerPixel;
	293	/* All channels */
	294	for (y = 0; y < td->fiDest.height; y++) {
	295	int32_t y_d1 = (y - c_d_y);
	296	for (x = 0; x < td->fiDest.width; x++) {
	297	int32_t x_d1 = (x - c_d_x);
	298	fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
	299	fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
	300
	301	for (k = 0; k < channels; k++) { // iterate over colors
	302	uint8_t dest = &D_2[x + y td->destbuf.linesize[0]+k];
	303	interpolateN(dest, x_s, y_s, D_1, td->src.linesize[0],
	304	td->fiSrc.width, td->fiSrc.height,
	305	channels, k, td->conf.crop ? 16 : *dest);
	306	}
	307	}
	308	}
	309	return VS_OK;
	310	}
	311
	312	/**
	313	* transformPlanar: applies current transformation to frame
	314	*
	315	* Parameters:
	316	* td: private data structure of this filter
	317	* Return value:
	318	* 0 for failture, 1 for success
	319	* Preconditions:
	320	* The frame must be in Planar format
	321	*
	322	* Fixed-point format 32 bit integer:
	323	* for image coords we use val<<8
	324	* for angle and zoom we use val<<16
	325	*
	326	*/
	327	int transformPlanar(VSTransformData* td, VSTransform t)
	328	{
	329	int32_t x = 0, y = 0;
	330	uint8_t dat_1, dat_2;
	331
	332	if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
	333	if(vsFramesEqual(&td->src,&td->destbuf))
	334	return VS_OK; // noop
	335	else {
	336	vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
	337	return VS_OK;
	338	}
	339	}
	340
	341	int plane;
	342	for(plane=0; plane< td->fiSrc.planes; plane++){
	343	dat_1 = td->src.data[plane];
	344	dat_2 = td->destbuf.data[plane];
	345	int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
	346	int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
	347	int dw = CHROMA_SIZE(td->fiDest.width , wsub);
	348	int dh = CHROMA_SIZE(td->fiDest.height, hsub);
	349	int sw = CHROMA_SIZE(td->fiSrc.width , wsub);
	350	int sh = CHROMA_SIZE(td->fiSrc.height , hsub);
	351	uint8_t black = plane==0 ? 0 : 0x80;
	352
	353	fp16 c_s_x = iToFp16(sw / 2);
	354	fp16 c_s_y = iToFp16(sh / 2);
	355	int32_t c_d_x = dw / 2;
	356	int32_t c_d_y = dh / 2;
	357
	358	float z = 1.0-t.zoom/100.0;
	359	fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
	360	fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
	361	fp16 c_tx = c_s_x - (fToFp16(t.x) >> wsub);
	362	fp16 c_ty = c_s_y - (fToFp16(t.y) >> hsub);
	363
	364	/* for each pixel in the destination image we calc the source
	365	* coordinate and make an interpolation:
	366	* p_d = c_d + M(p_s - c_s) + t
	367	* where p are the points, c the center coordinate,
	368	* _s source and _d destination,
	369	* t the translation, and M the rotation and scaling matrix
	370	* p_s = M^{-1}(p_d - c_d - t) + c_s
	371	*/
	372	for (y = 0; y < dh; y++) {
	373	// swapping of the loops brought 15% performace gain
	374	int32_t y_d1 = (y - c_d_y);
	375	for (x = 0; x < dw; x++) {
	376	int32_t x_d1 = (x - c_d_x);
	377	fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
	378	fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
	379	uint8_t dest = &dat_2[x + y td->destbuf.linesize[plane]];
	380	// inlining the interpolation function would bring 10%
	381	// (but then we cannot use the function pointer anymore...)
	382	td->interpolate(dest, x_s, y_s, dat_1,
	383	td->src.linesize[plane], sw, sh,
	384	td->conf.crop ? black : *dest);
	385	}
	386	}
	387	}
	388
	389	return VS_OK;
	390	}
	391
	392
	393
	394	/* /\** TESTING */
	395	/* * transformPlanar_orc: applies current transformation to frame */
	396	/* * */
	397	/* * Parameters: */
	398	/* * td: private data structure of this filter */
	399	/* * Return value: */
	400	/* * 0 for failture, 1 for success */
	401	/* * Preconditions: */
	402	/* * The frame must be in Planar format */
	403	/* * */
	404	/* * Fixed-point format 32 bit integer: */
	405	/* * for image coords we use val<<8 */
	406	/* * for angle and zoom we use val<<16 */
	407	/* * */
	408	/* \/ /
	409	/* int transformPlanar_orc(VSTransformData* td, VSTransform t) */
	410	/* { */
	411	/* int32_t x = 0, y = 0; */
	412	/* uint8_t Y_1, Y_2, Cb_1, Cb_2, Cr_1, Cr_2; */
	413
	414	/* if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0) return VS_OK; // noop */
	415
	416	/* Y_1 = td->src; */
	417	/* Y_2 = td->destbuf; */
	418	/* Cb_1 = td->src + td->fiSrc.width * td->fiSrc.height; */
	419	/* Cb_2 = td->destbuf + td->fiDest.width * td->fiDest.height; */
	420	/* Cr_1 = td->src + 5td->fiSrc.width td->fiSrc.height/4; */
	421	/* Cr_2 = td->destbuf + 5td->fiDest.width td->fiDest.height/4; */
	422	/* fp16 c_s_x = iToFp16(td->fiSrc.width / 2); */
	423	/* fp16 c_s_y = iToFp16(td->fiSrc.height / 2); */
	424	/* int32_t c_d_x = td->fiDest.width / 2; */
	425	/* int32_t c_d_y = td->fiDest.height / 2; */
	426
	427	/* float z = 1.0-t.zoom/100.0; */
	428	/* fp16 zcos_a = fToFp16(zcos(-t.alpha)); // scaled cos /
	429	/* fp16 zsin_a = fToFp16(zsin(-t.alpha)); // scaled sin /
	430	/* fp16 c_tx = c_s_x - fToFp16(t.x); */
	431	/* fp16 c_ty = c_s_y - fToFp16(t.y); */
	432
	433	/* /\* for each pixel in the destination image we calc the source */
	434	/* * coordinate and make an interpolation: */
	435	/* * p_d = c_d + M(p_s - c_s) + t */
	436	/* * where p are the points, c the center coordinate, */
	437	/* * _s source and _d destination, */
	438	/* * t the translation, and M the rotation and scaling matrix */
	439	/* * p_s = M^{-1}(p_d - c_d - t) + c_s */
	440	/* \/ /
	441	/* /\* Luminance channel \/ /
	442	/* fp16* x_ss = (fp16)malloc(sizeof(fp16)td->fiDest.width); */
	443	/* fp16* y_ss = (fp16)malloc(sizeof(fp16)td->fiDest.width); */
	444	/* int32_t* xs = (int32_t)malloc(sizeof(int32_t)td->fiDest.width); */
	445	/* for (x = 0; x < td->fiDest.width; x++) { // this can go to td */
	446	/* xs[x]=x; */
	447	/* } */
	448
	449	/* for (y = 0; y < td->fiDest.height; y++) { */
	450	/* int32_t y_d1 = (y - c_d_y); */
	451	/* fp16 sin_y = zsin_a * y_d1; */
	452	/* fp16 cos_y = zcos_a * y_d1; */
	453	/* for (x = 0; x < td->fiDest.width; x++) { */
	454	/* int32_t x_d1 = (xs[x] - c_d_x); */
	455	/* //x_ss[x] = zcos_a * x_d1 + zsin_a * y_d1 + c_tx; */
	456	/* y_ss[x] = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; */
	457	/* } */
	458	/* transform_one_line_optimized1 (x_ss, y_ss, xs, y_d1, c_d_x, */
	459	/* c_tx, c_ty, zcos_a, zsin_a, sin_y, cos_y, */
	460	/* td->fiDest.width); */
	461	/* // transform_one_line_optimized (x_ss, y_ss, xs, y_d1, c_d_x, */
	462	/* // c_tx, c_ty, zcos_a, zsin_a, td->fiDest.width); */
	463
	464	/* for (x = 0; x < td->fiDest.width; x++) { */
	465	/* uint8_t dest = &Y_2[x + y td->fiDest.width]; */
	466	/* td->interpolate(dest, x_ss[x], y_ss[x], Y_1, */
	467	/* td->fiSrc.width, td->fiSrc.height, */
	468	/* td->crop ? 16 : dest); /
	469	/* } */
	470	/* } */
	471
	472	/* /\* Color channels \/ /
	473	/* int32_t ws2 = td->fiSrc.width/2; */
	474	/* int32_t wd2 = td->fiDest.width/2; */
	475	/* int32_t hs2 = td->fiSrc.height/2; */
	476	/* int32_t hd2 = td->fiDest.height/2; */
	477	/* fp16 c_tx2 = c_tx/2; */
	478	/* fp16 c_ty2 = c_ty/2; */
	479
	480	/* for (y = 0; y < hd2; y++) { */
	481	/* int32_t y_d1 = y - (c_d_y)/2; */
	482	/* for (x = 0; x < wd2; x++) { */
	483	/* int32_t x_d1 = x - (c_d_x)/2; */
	484	/* fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx2; */
	485	/* fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty2; */
	486	/* uint8_t dest = &Cr_2[x + y wd2]; */
	487	/* td->interpolate(dest, x_s, y_s, Cr_1, ws2, hs2, */
	488	/* td->crop ? 128 : dest); /
	489	/* dest = &Cb_2[x + y * wd2]; */
	490	/* td->interpolate(dest, x_s, y_s, Cb_1, ws2, hs2, */
	491	/* td->crop ? 128 : dest); /
	492	/* } */
	493	/* } */
	494
	495	/* return VS_OK; */
	496	/* } */
	497
	498	/*
	499	some debugging stuff
	500	FILE* f1 = fopen("transFP.pos","w");
	501	fprintf(f1,"%i,%i:\t %f,%f\n", x, y, x_s / (float)(1<<16), y_s / (float)(1<<16));
	502	fclose(f1);
	503
	504	*/
	505
	506
	507
	508	/*
	509	* Local variables:
	510	* c-file-style: "stroustrup"
	511	* c-file-offsets: ((case-label . ) (statement-case-intro . ))
	512	* indent-tabs-mode: nil
	513	* c-basic-offset: 2 t
	514	*
	515	* End:
	516	*
	517	* vim: expandtab shiftwidth=2:
	518	*/