[deb_vid.stab.git] / src / transformfixedpoint.c

/*
 *  transformfixedpoint.c
 *
 *  Fixed point implementation of image transformations (see also transformfloat.c/h)
 *
 *  Copyright (C) Georg Martius - June 2011
 *   georg dot martius at web dot de
 *
 *  This file is part of vid.stab video stabilization library
 *
 *  vid.stab is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License,
 *  as published by the Free Software Foundation; either version 2, or
 *  (at your option) any later version.
 *
 *  vid.stab is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *
 */
#include "transformfixedpoint.h"
#include "transform.h"
#include "transformtype_operations.h"

// the orc code does not work at the moment (BUG in ORC?)
// #include "orc/transformorc.h"

//#include <math.h>
//#include <libgen.h>

#define iToFp8(v)  ((v)<<8)
#define fToFp8(v)  ((int32_t)((v)*((float)0xFF)))
#define iToFp16(v) ((v)<<16)
#define fToFp16(v) ((int32_t)((v)*((double)0xFFFF)))
#define fp16To8(v) ((v)>>8)
//#define fp16To8(v) ( (v) && 0x80 == 1 ? ((v)>>8 + 1) : ((v)>>8) )
#define fp24To8(v) ((v)>>16)

#define fp8ToI(v)  ((v)>>8)
#define fp16ToI(v) ((v)>>16)
#define fp8ToF(v)  ((v)/((double)(1<<8)))
#define fp16ToF(v) ((v)/((double)(1<<16)))

// #define fp8ToIRound(v) ( (((v)>>7) & 0x1) == 0 ? ((v)>>8) : ((v)>>8)+1 )
#define fp8_0_5 (1<<7)
#define fp8ToIRound(v) (((v) + fp8_0_5) >> 7)
//#define fp16ToIRound(v) ( (((v)>>15) & 0x1) == 0 ? ((v)>>16) : ((v)>>16)+1 )
#define fp16_0_5 (1<<15)
#define fp16ToIRound(v) (((v) + fp16_0_5) >> 16)

/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
    This is used by many other interpolation methods at and outsize the border, see interpolate */
inline void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
                                   const uint8_t *img, int img_linesize,
                                   int32_t width, int32_t height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  int32_t ix_c = ix_f + 1;
  int32_t iy_c = iy_f + 1;
  if (ix_f < 0 || ix_c >= width || iy_f < 0 || iy_c >= height) {
    int32_t w  = 10; // number of pixels to blur out the border pixel outwards
    int32_t xl = - w - ix_f;
    int32_t yl = - w - iy_f;
    int32_t xh = ix_c - w - width;
    int32_t yh = iy_c - w - height;
    int32_t c = VS_MAX(VS_MIN(VS_MAX(xl, VS_MAX(yl, VS_MAX(xh, yh))),w),0);
    // pixel at border of source image
    short val_border = PIX(img, img_linesize, VS_MAX(VS_MIN(ix_f, width-1),0),
                           VS_MAX(VS_MIN(iy_f, height-1),0));
    *rv = (def * c + val_border * (w - c)) / w;
  }else{
    short v1 = PIXEL(img, img_linesize, ix_c, iy_c, width, height, def);
    short v2 = PIXEL(img, img_linesize, ix_c, iy_f, width, height, def);
    short v3 = PIXEL(img, img_linesize, ix_f, iy_c, width, height, def);
    short v4 = PIXEL(img, img_linesize, ix_f, iy_f, width, height, def);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s   = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y) + 1;
    *rv = fp16ToIRound(s);
  }
}

/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
    in matrix notation:
    a0-a3 are the neigthboring points where the target point is between a1 and a2
    t is the point of interpolation (position between a1 and a2) value between 0 and 1
    | 0, 2, 0, 0 |  |a0|
    |-1, 0, 1, 0 |  |a1|
    (1,t,t^2,t^3) | 2,-5, 4,-1 |  |a2|
    |-1, 3,-3, 1 |  |a3|
*/
/* inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){ */
/*   // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2; */
/*   return ((iToFp16(2*a1) + t*(-a0+a2 */
/*             + fp16ToI(t*((2*a0-5*a1+4*a2-a3) */
/*              + fp16ToI(t*(-a0+3*a1-3*a2+a3)) )) ) */
/*      ) ) >> 17; */
/* } */

inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){
  // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2;
  // we add 1/2 because of truncation errors
  return fp16ToIRound((iToFp16(2*a1) + t*(-a0+a2
                                          + fp16ToIRound(t*((2*a0-5*a1+4*a2-a3)
                                                            + fp16ToIRound(t*(-a0+3*a1-3*a2+a3)) )) )
                       ) >> 1);
}

/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
inline void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
                             const uint8_t *img, int img_linesize,
                             int width, int height, uint8_t def)
{
  // do a simple linear interpolation at the border
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (unlikely(ix_f < 1 || ix_f > width - 3 || iy_f < 1 || iy_f > height - 3)) {
    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
  } else {
    fp16 x_f = iToFp16(ix_f);
    fp16 y_f = iToFp16(iy_f);
    fp16 tx  = x-x_f;
    short v1 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f-1),
                            PIX(img, img_linesize, ix_f,   iy_f-1),
                            PIX(img, img_linesize, ix_f+1, iy_f-1),
                            PIX(img, img_linesize, ix_f+2, iy_f-1));
    short v2 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f),
                            PIX(img, img_linesize, ix_f,   iy_f),
                            PIX(img, img_linesize, ix_f+1, iy_f),
                            PIX(img, img_linesize, ix_f+2, iy_f));
    short v3 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f+1),
                            PIX(img, img_linesize, ix_f,   iy_f+1),
                            PIX(img, img_linesize, ix_f+1, iy_f+1),
                            PIX(img, img_linesize, ix_f+2, iy_f+1));
    short v4 = bicub_kernel(tx,
                            PIX(img, img_linesize, ix_f-1, iy_f+2),
                            PIX(img, img_linesize, ix_f,   iy_f+2),
                            PIX(img, img_linesize, ix_f+1, iy_f+2),
                            PIX(img, img_linesize, ix_f+2, iy_f+2));
    short res = bicub_kernel(y-y_f, v1, v2, v3, v4);
    *rv = res < 255 ? res : 255;
  }
}


/** interpolateBiLin: bi-linear interpolation function, see interpolate */
inline void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
                             const uint8_t *img, int img_linesize,
                             int32_t width, int32_t height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (unlikely(ix_f < 0 || ix_f > width - 2 || iy_f < 0 || iy_f > height - 2)) {
    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
  } else {
    int32_t ix_c = ix_f + 1;
    int32_t iy_c = iy_f + 1;
    short v1 = PIX(img, img_linesize, ix_c, iy_c);
    short v2 = PIX(img, img_linesize, ix_c, iy_f);
    short v3 = PIX(img, img_linesize, ix_f, iy_c);
    short v4 = PIX(img, img_linesize, ix_f, iy_f);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s  = fp16To8(v1*(x - x_f) + v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
    // it is underestimated due to truncation, so we add one
    short res = fp16ToI(s);
    *rv = res < 255 ? res+1 : 255;
  }
}

/** interpolateLin: linear (only x) interpolation function, see interpolate */
inline void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
                           const uint8_t *img, int img_linesize,
                           int width, int height, uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t ix_c = ix_f + 1;
  fp16    x_c  = iToFp16(ix_c);
  fp16    x_f  = iToFp16(ix_f);
  int     y_n  = fp16ToIRound(y);

  short v1 = PIXEL(img, img_linesize, ix_c, y_n, width, height, def);
  short v2 = PIXEL(img, img_linesize, ix_f, y_n, width, height, def);
  fp16 s   = v1*(x - x_f) + v2*(x_c - x);
  short res = fp16ToI(s);
  *rv =   res < 255 ? res : 255;
}

/** interpolateZero: nearest neighbor interpolation function, see interpolate */
inline void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
                            const uint8_t *img, int img_linesize,
                            int width, int height, uint8_t def)
{
  int32_t ix_n = fp16ToIRound(x);
  int32_t iy_n = fp16ToIRound(y);
  *rv = (uint8_t) PIXEL(img, img_linesize, ix_n, iy_n, width, height, def);
}


/**
 * interpolateN: Bi-linear interpolation function for N channel image.
 *
 * Parameters:
 *             rv: destination pixel (call by reference)
 *            x,y: the source coordinates in the image img. Note this
 *                 are real-value coordinates, that's why we interpolate
 *            img: source image
 *   width,height: dimension of image
 *              N: number of channels
 *        channel: channel number (0..N-1)
 *            def: default value if coordinates are out of range
 * Return value:  None
 */
inline void interpolateN(uint8_t *rv, fp16 x, fp16 y,
                         const uint8_t *img, int img_linesize,
                         int width, int height,
                         uint8_t N, uint8_t channel,
                         uint8_t def)
{
  int32_t ix_f = fp16ToI(x);
  int32_t iy_f = fp16ToI(y);
  if (ix_f < 0 || ix_f > width-1 || iy_f < 0 || iy_f > height - 1) {
    *rv = def;
  } else {
    int32_t ix_c = ix_f + 1;
    int32_t iy_c = iy_f + 1;
    short v1 = PIXN(img, img_linesize, ix_c, iy_c, N, channel);
    short v2 = PIXN(img, img_linesize, ix_c, iy_f, N, channel);
    short v3 = PIXN(img, img_linesize, ix_f, iy_c, N, channel);
    short v4 = PIXN(img, img_linesize, ix_f, iy_f, N, channel);
    fp16 x_f = iToFp16(ix_f);
    fp16 x_c = iToFp16(ix_c);
    fp16 y_f = iToFp16(iy_f);
    fp16 y_c = iToFp16(iy_c);
    fp16 s  = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
    *rv = fp16ToIRound(s);
  }
}


/**
 * transformPacked: applies current transformation to frame
 * Parameters:
 *         td: private data structure of this filter
 * Return value:
 *         0 for failture, 1 for success
 * Preconditions:
 *  The frame must be in Packed format
 */
int transformPacked(VSTransformData* td, VSTransform t)
{
  int x = 0, y = 0, k = 0;
  uint8_t *D_1, *D_2;

  D_1  = td->src.data[0];
  D_2  = td->destbuf.data[0];
  fp16 c_s_x = iToFp16(td->fiSrc.width/2);
  fp16 c_s_y = iToFp16(td->fiSrc.height/2);
  int32_t c_d_x = td->fiDest.width/2;
  int32_t c_d_y = td->fiDest.height/2;

  /* for each pixel in the destination image we calc the source
   * coordinate and make an interpolation:
   *      p_d = c_d + M(p_s - c_s) + t
   * where p are the points, c the center coordinate,
   *  _s source and _d destination,
   *  t the translation, and M the rotation matrix
   *      p_s = M^{-1}(p_d - c_d - t) + c_s
   */
  float z     = 1.0-t.zoom/100.0;
  fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
  fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
  fp16  c_tx    = c_s_x - fToFp16(t.x);
  fp16  c_ty    = c_s_y - fToFp16(t.y);
  int channels = td->fiSrc.bytesPerPixel;
  /* All channels */
  for (y = 0; y < td->fiDest.height; y++) {
    int32_t y_d1 = (y - c_d_y);
    for (x = 0; x < td->fiDest.width; x++) {
      int32_t x_d1 = (x - c_d_x);
      fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
      fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;

      for (k = 0; k < channels; k++) { // iterate over colors
        uint8_t *dest = &D_2[x + y * td->destbuf.linesize[0]+k];
        interpolateN(dest, x_s, y_s, D_1, td->src.linesize[0],
                     td->fiSrc.width, td->fiSrc.height,
                     channels, k, td->conf.crop ? 16 : *dest);
      }
    }
  }
  return VS_OK;
}

/**
 * transformPlanar: applies current transformation to frame
 *
 * Parameters:
 *         td: private data structure of this filter
 * Return value:
 *         0 for failture, 1 for success
 * Preconditions:
 *  The frame must be in Planar format
 *
 * Fixed-point format 32 bit integer:
 *  for image coords we use val<<8
 *  for angle and zoom we use val<<16
 *
 */
int transformPlanar(VSTransformData* td, VSTransform t)
{
  int32_t x = 0, y = 0;
  uint8_t *dat_1, *dat_2;

  if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
    if(vsFramesEqual(&td->src,&td->destbuf))
      return VS_OK; // noop
    else {
      vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
      return VS_OK;
    }
  }

  int plane;
  for(plane=0; plane< td->fiSrc.planes; plane++){
    dat_1  = td->src.data[plane];
    dat_2  = td->destbuf.data[plane];
    int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
    int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
    int dw = CHROMA_SIZE(td->fiDest.width , wsub);
    int dh = CHROMA_SIZE(td->fiDest.height, hsub);
    int sw = CHROMA_SIZE(td->fiSrc.width  , wsub);
    int sh = CHROMA_SIZE(td->fiSrc.height , hsub);
    uint8_t black = plane==0 ? 0 : 0x80;

    fp16 c_s_x = iToFp16(sw / 2);
    fp16 c_s_y = iToFp16(sh / 2);
    int32_t c_d_x = dw / 2;
    int32_t c_d_y = dh / 2;

    float z     = 1.0-t.zoom/100.0;
    fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
    fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
    fp16  c_tx    = c_s_x - (fToFp16(t.x) >> wsub);
    fp16  c_ty    = c_s_y - (fToFp16(t.y) >> hsub);

    /* for each pixel in the destination image we calc the source
     * coordinate and make an interpolation:
     *      p_d = c_d + M(p_s - c_s) + t
     * where p are the points, c the center coordinate,
     *  _s source and _d destination,
     *  t the translation, and M the rotation and scaling matrix
     *      p_s = M^{-1}(p_d - c_d - t) + c_s
     */
    for (y = 0; y < dh; y++) {
      // swapping of the loops brought 15% performace gain
      int32_t y_d1 = (y - c_d_y);
      for (x = 0; x < dw; x++) {
        int32_t x_d1 = (x - c_d_x);
        fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
        fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
        uint8_t *dest = &dat_2[x + y * td->destbuf.linesize[plane]];
        // inlining the interpolation function would bring 10%
        //  (but then we cannot use the function pointer anymore...)
        td->interpolate(dest, x_s, y_s, dat_1,
                        td->src.linesize[plane], sw, sh,
                        td->conf.crop ? black : *dest);
      }
    }
  }

  return VS_OK;
}


/* /\** TESTING */
/*  * transformPlanar_orc: applies current transformation to frame */
/*  * */
/*  * Parameters: */
/*  *         td: private data structure of this filter */
/*  * Return value:  */
/*  *         0 for failture, 1 for success */
/*  * Preconditions: */
/*  *  The frame must be in Planar format */
/*  * */
/*  * Fixed-point format 32 bit integer: */
/*  *  for image coords we use val<<8 */
/*  *  for angle and zoom we use val<<16 */
/*  * */
/*  *\/ */
/* int transformPlanar_orc(VSTransformData* td, VSTransform t) */
/* { */
/*     int32_t x = 0, y = 0; */
/*     uint8_t *Y_1, *Y_2, *Cb_1, *Cb_2, *Cr_1, *Cr_2; */

/*     if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0) return VS_OK; // noop */

/*     Y_1  = td->src;   */
/*     Y_2  = td->destbuf;   */
/*     Cb_1 = td->src + td->fiSrc.width * td->fiSrc.height; */
/*     Cb_2 = td->destbuf + td->fiDest.width * td->fiDest.height; */
/*     Cr_1 = td->src + 5*td->fiSrc.width * td->fiSrc.height/4; */
/*     Cr_2 = td->destbuf + 5*td->fiDest.width * td->fiDest.height/4; */
/*     fp16 c_s_x = iToFp16(td->fiSrc.width / 2); */
/*     fp16 c_s_y = iToFp16(td->fiSrc.height / 2); */
/*     int32_t c_d_x = td->fiDest.width / 2; */
/*     int32_t c_d_y = td->fiDest.height / 2;     */

/*     float z     = 1.0-t.zoom/100.0; */
/*     fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos */
/*     fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin */
/*     fp16  c_tx    = c_s_x - fToFp16(t.x); */
/*     fp16  c_ty    = c_s_y - fToFp16(t.y); */

/*     /\* for each pixel in the destination image we calc the source */
/*      * coordinate and make an interpolation:  */
/*      *      p_d = c_d + M(p_s - c_s) + t  */
/*      * where p are the points, c the center coordinate,  */
/*      *  _s source and _d destination,  */
/*      *  t the translation, and M the rotation and scaling matrix */
/*      *      p_s = M^{-1}(p_d - c_d - t) + c_s */
/*      *\/ */
/*     /\* Luminance channel *\/ */
/*     fp16* x_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width); */
/*     fp16* y_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width);     */
/*     int32_t* xs = (int32_t*)malloc(sizeof(int32_t)*td->fiDest.width);         */
/*     for (x = 0; x < td->fiDest.width; x++) { // this can go to td */
/*       xs[x]=x; */
/*     } */

/*     for (y = 0; y < td->fiDest.height; y++) { */
/*       int32_t y_d1 = (y - c_d_y);   */
/*       fp16 sin_y   = zsin_a * y_d1; */
/*       fp16 cos_y   = zcos_a * y_d1; */
/*       for (x = 0; x < td->fiDest.width; x++) { */
/*         int32_t x_d1 = (xs[x] - c_d_x); */
/*         //x_ss[x]  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx; */
/*   y_ss[x]  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; */
/*       } */
/*       transform_one_line_optimized1 (x_ss, y_ss, xs, y_d1, c_d_x,  */
/*              c_tx, c_ty, zcos_a, zsin_a, sin_y, cos_y,  */
/*              td->fiDest.width); */
/*       // transform_one_line_optimized (x_ss, y_ss, xs, y_d1, c_d_x,  */
/*       //             c_tx, c_ty, zcos_a, zsin_a, td->fiDest.width); */

/*       for (x = 0; x < td->fiDest.width; x++) { */
/*   uint8_t *dest = &Y_2[x + y * td->fiDest.width]; */
/*   td->interpolate(dest, x_ss[x], y_ss[x], Y_1,  */
/*         td->fiSrc.width, td->fiSrc.height,  */
/*         td->crop ? 16 : *dest); */
/*       } */
/*     } */

/*     /\* Color channels *\/ */
/*     int32_t ws2 = td->fiSrc.width/2; */
/*     int32_t wd2 = td->fiDest.width/2; */
/*     int32_t hs2 = td->fiSrc.height/2; */
/*     int32_t hd2 = td->fiDest.height/2; */
/*     fp16 c_tx2   = c_tx/2; */
/*     fp16 c_ty2   = c_ty/2; */

/*     for (y = 0; y < hd2; y++) { */
/*       int32_t y_d1 = y - (c_d_y)/2; */
/*       for (x = 0; x < wd2; x++) { */
/*   int32_t x_d1 = x - (c_d_x)/2; */
/*   fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx2; */
/*   fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty2;  */
/*   uint8_t *dest = &Cr_2[x + y * wd2]; */
/*   td->interpolate(dest, x_s, y_s, Cr_1, ws2, hs2,  */
/*         td->crop ? 128 : *dest); */
/*   dest = &Cb_2[x + y * wd2]; */
/*   td->interpolate(dest, x_s, y_s, Cb_1, ws2, hs2,  */
/*         td->crop ? 128 : *dest); */
/*       } */
/*     } */

/*     return VS_OK; */
/* } */

/*
  some debugging stuff
  FILE* f1 = fopen("transFP.pos","w");
  fprintf(f1,"%i,%i:\t %f,%f\n", x, y, x_s / (float)(1<<16), y_s / (float)(1<<16));
  fclose(f1);

*/


/*
 * Local variables:
 *   c-file-style: "stroustrup"
 *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
 *   indent-tabs-mode: nil
 *   c-basic-offset: 2 t
 *
 * End:
 *
 * vim: expandtab shiftwidth=2:
 */
Commit	Line	Data
80f575fc DM	1	/*
	2	* transformfixedpoint.c
	3	*
	4	* Fixed point implementation of image transformations (see also transformfloat.c/h)
	5	*
	6	* Copyright (C) Georg Martius - June 2011
	7	* georg dot martius at web dot de
	8	*
	9	* This file is part of vid.stab video stabilization library
	10	*
	11	* vid.stab is free software; you can redistribute it and/or modify
	12	* it under the terms of the GNU General Public License,
	13	* as published by the Free Software Foundation; either version 2, or
	14	* (at your option) any later version.
	15	*
	16	* vid.stab is distributed in the hope that it will be useful,
	17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	* GNU General Public License for more details.
	20	*
	21	* You should have received a copy of the GNU General Public License
	22	* along with GNU Make; see the file COPYING. If not, write to
	23	* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
	24	*
	25	*
	26	*/
	27	#include "transformfixedpoint.h"
	28	#include "transform.h"
	29	#include "transformtype_operations.h"
	30
	31	// the orc code does not work at the moment (BUG in ORC?)
	32	// #include "orc/transformorc.h"
	33
	34	//#include <math.h>
	35	//#include <libgen.h>
	36
	37	#define iToFp8(v) ((v)<<8)
	38	#define fToFp8(v) ((int32_t)((v)*((float)0xFF)))
	39	#define iToFp16(v) ((v)<<16)
	40	#define fToFp16(v) ((int32_t)((v)*((double)0xFFFF)))
	41	#define fp16To8(v) ((v)>>8)
	42	//#define fp16To8(v) ( (v) && 0x80 == 1 ? ((v)>>8 + 1) : ((v)>>8) )
	43	#define fp24To8(v) ((v)>>16)
	44
	45	#define fp8ToI(v) ((v)>>8)
	46	#define fp16ToI(v) ((v)>>16)
	47	#define fp8ToF(v) ((v)/((double)(1<<8)))
	48	#define fp16ToF(v) ((v)/((double)(1<<16)))
	49
	50	// #define fp8ToIRound(v) ( (((v)>>7) & 0x1) == 0 ? ((v)>>8) : ((v)>>8)+1 )
	51	#define fp8_0_5 (1<<7)
	52	#define fp8ToIRound(v) (((v) + fp8_0_5) >> 7)
	53	//#define fp16ToIRound(v) ( (((v)>>15) & 0x1) == 0 ? ((v)>>16) : ((v)>>16)+1 )
	54	#define fp16_0_5 (1<<15)
	55	#define fp16ToIRound(v) (((v) + fp16_0_5) >> 16)
	56
	57	/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
	58	This is used by many other interpolation methods at and outsize the border, see interpolate */
	59	inline void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
	60	const uint8_t *img, int img_linesize,
	61	int32_t width, int32_t height, uint8_t def)
	62	{
	63	int32_t ix_f = fp16ToI(x);
	64	int32_t iy_f = fp16ToI(y);
65	int32_t ix_c = ix_f + 1;
66	int32_t iy_c = iy_f + 1;
67	if (ix_f < 0 \|\| ix_c >= width \|\| iy_f < 0 \|\| iy_c >= height) {
68	int32_t w = 10; // number of pixels to blur out the border pixel outwards
69	int32_t xl = - w - ix_f;
70	int32_t yl = - w - iy_f;
71	int32_t xh = ix_c - w - width;
72	int32_t yh = iy_c - w - height;
73	int32_t c = VS_MAX(VS_MIN(VS_MAX(xl, VS_MAX(yl, VS_MAX(xh, yh))),w),0);
74	// pixel at border of source image
75	short val_border = PIX(img, img_linesize, VS_MAX(VS_MIN(ix_f, width-1),0),
76	VS_MAX(VS_MIN(iy_f, height-1),0));
77	rv = (def c + val_border * (w - c)) / w;
78	}else{
79	short v1 = PIXEL(img, img_linesize, ix_c, iy_c, width, height, def);
80	short v2 = PIXEL(img, img_linesize, ix_c, iy_f, width, height, def);
81	short v3 = PIXEL(img, img_linesize, ix_f, iy_c, width, height, def);
82	short v4 = PIXEL(img, img_linesize, ix_f, iy_f, width, height, def);
83	fp16 x_f = iToFp16(ix_f);
84	fp16 x_c = iToFp16(ix_c);
85	fp16 y_f = iToFp16(iy_f);
86	fp16 y_c = iToFp16(iy_c);
87	fp16 s = fp16To8(v1(x - x_f)+v3(x_c - x))*fp16To8(y - y_f) +
88	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y) + 1;
89	*rv = fp16ToIRound(s);
90	}
91	}
92
93	/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
94	in matrix notation:
95	a0-a3 are the neigthboring points where the target point is between a1 and a2
96	t is the point of interpolation (position between a1 and a2) value between 0 and 1
97	\| 0, 2, 0, 0 \| \|a0\|
98	\|-1, 0, 1, 0 \| \|a1\|
99	(1,t,t^2,t^3) \| 2,-5, 4,-1 \| \|a2\|
100	\|-1, 3,-3, 1 \| \|a3\|
101	*/
102	/* inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){ */
103	/* // (2a1 + t((-a0+a2) + t((2a0-5a1+4a2-a3) + t(-a0+3a1-3a2+a3) )) ) / 2; /
104	/* return ((iToFp16(2a1) + t(-a0+a2 */
105	/* + fp16ToI(t((2a0-5a1+4a2-a3) */
106	/* + fp16ToI(t(-a0+3a1-3a2+a3)) )) ) /
107	/* ) ) >> 17; */
108	/* } */
109
110	inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){
111	// (2a1 + t((-a0+a2) + t((2a0-5a1+4a2-a3) + t(-a0+3a1-3*a2+a3) )) ) / 2;
112	// we add 1/2 because of truncation errors
113	return fp16ToIRound((iToFp16(2a1) + t(-a0+a2
114	+ fp16ToIRound(t((2a0-5a1+4a2-a3)
115	+ fp16ToIRound(t(-a0+3a1-3*a2+a3)) )) )
116	) >> 1);
117	}
118
119	/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
120	inline void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
121	const uint8_t *img, int img_linesize,
122	int width, int height, uint8_t def)
123	{
124	// do a simple linear interpolation at the border
125	int32_t ix_f = fp16ToI(x);
126	int32_t iy_f = fp16ToI(y);
127	if (unlikely(ix_f < 1 \|\| ix_f > width - 3 \|\| iy_f < 1 \|\| iy_f > height - 3)) {
128	interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
129	} else {
130	fp16 x_f = iToFp16(ix_f);
131	fp16 y_f = iToFp16(iy_f);
132	fp16 tx = x-x_f;
133	short v1 = bicub_kernel(tx,
134	PIX(img, img_linesize, ix_f-1, iy_f-1),
135	PIX(img, img_linesize, ix_f, iy_f-1),
136	PIX(img, img_linesize, ix_f+1, iy_f-1),
137	PIX(img, img_linesize, ix_f+2, iy_f-1));
138	short v2 = bicub_kernel(tx,
139	PIX(img, img_linesize, ix_f-1, iy_f),
140	PIX(img, img_linesize, ix_f, iy_f),
141	PIX(img, img_linesize, ix_f+1, iy_f),
142	PIX(img, img_linesize, ix_f+2, iy_f));
143	short v3 = bicub_kernel(tx,
144	PIX(img, img_linesize, ix_f-1, iy_f+1),
145	PIX(img, img_linesize, ix_f, iy_f+1),
146	PIX(img, img_linesize, ix_f+1, iy_f+1),
147	PIX(img, img_linesize, ix_f+2, iy_f+1));
148	short v4 = bicub_kernel(tx,
149	PIX(img, img_linesize, ix_f-1, iy_f+2),
150	PIX(img, img_linesize, ix_f, iy_f+2),
151	PIX(img, img_linesize, ix_f+1, iy_f+2),
152	PIX(img, img_linesize, ix_f+2, iy_f+2));
153	short res = bicub_kernel(y-y_f, v1, v2, v3, v4);
154	*rv = res < 255 ? res : 255;
155	}
156	}
157
158
159	/** interpolateBiLin: bi-linear interpolation function, see interpolate */
160	inline void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
161	const uint8_t *img, int img_linesize,
162	int32_t width, int32_t height, uint8_t def)
163	{
164	int32_t ix_f = fp16ToI(x);
165	int32_t iy_f = fp16ToI(y);
166	if (unlikely(ix_f < 0 \|\| ix_f > width - 2 \|\| iy_f < 0 \|\| iy_f > height - 2)) {
167	interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
168	} else {
169	int32_t ix_c = ix_f + 1;
170	int32_t iy_c = iy_f + 1;
171	short v1 = PIX(img, img_linesize, ix_c, iy_c);
172	short v2 = PIX(img, img_linesize, ix_c, iy_f);
173	short v3 = PIX(img, img_linesize, ix_f, iy_c);
174	short v4 = PIX(img, img_linesize, ix_f, iy_f);
175	fp16 x_f = iToFp16(ix_f);
176	fp16 x_c = iToFp16(ix_c);
177	fp16 y_f = iToFp16(iy_f);
178	fp16 y_c = iToFp16(iy_c);
179	fp16 s = fp16To8(v1(x - x_f) + v3(x_c - x))*fp16To8(y - y_f) +
180	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y);
181	// it is underestimated due to truncation, so we add one
182	short res = fp16ToI(s);
183	*rv = res < 255 ? res+1 : 255;
184	}
185	}
186
187	/** interpolateLin: linear (only x) interpolation function, see interpolate */
188	inline void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
189	const uint8_t *img, int img_linesize,
190	int width, int height, uint8_t def)
191	{
192	int32_t ix_f = fp16ToI(x);
193	int32_t ix_c = ix_f + 1;
194	fp16 x_c = iToFp16(ix_c);
195	fp16 x_f = iToFp16(ix_f);
196	int y_n = fp16ToIRound(y);
197
198	short v1 = PIXEL(img, img_linesize, ix_c, y_n, width, height, def);
199	short v2 = PIXEL(img, img_linesize, ix_f, y_n, width, height, def);
200	fp16 s = v1(x - x_f) + v2(x_c - x);
201	short res = fp16ToI(s);
202	*rv = res < 255 ? res : 255;
203	}
204
205	/** interpolateZero: nearest neighbor interpolation function, see interpolate */
206	inline void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
207	const uint8_t *img, int img_linesize,
208	int width, int height, uint8_t def)
209	{
210	int32_t ix_n = fp16ToIRound(x);
211	int32_t iy_n = fp16ToIRound(y);
212	*rv = (uint8_t) PIXEL(img, img_linesize, ix_n, iy_n, width, height, def);
213	}
214
215
216	/**
217	* interpolateN: Bi-linear interpolation function for N channel image.
218	*
219	* Parameters:
220	* rv: destination pixel (call by reference)
221	* x,y: the source coordinates in the image img. Note this
222	* are real-value coordinates, that's why we interpolate
223	* img: source image
224	* width,height: dimension of image
225	* N: number of channels
226	* channel: channel number (0..N-1)
227	* def: default value if coordinates are out of range
228	* Return value: None
229	*/
230	inline void interpolateN(uint8_t *rv, fp16 x, fp16 y,
231	const uint8_t *img, int img_linesize,
232	int width, int height,
233	uint8_t N, uint8_t channel,
234	uint8_t def)
235	{
236	int32_t ix_f = fp16ToI(x);
237	int32_t iy_f = fp16ToI(y);
238	if (ix_f < 0 \|\| ix_f > width-1 \|\| iy_f < 0 \|\| iy_f > height - 1) {
239	*rv = def;
240	} else {
241	int32_t ix_c = ix_f + 1;
242	int32_t iy_c = iy_f + 1;
243	short v1 = PIXN(img, img_linesize, ix_c, iy_c, N, channel);
244	short v2 = PIXN(img, img_linesize, ix_c, iy_f, N, channel);
245	short v3 = PIXN(img, img_linesize, ix_f, iy_c, N, channel);
246	short v4 = PIXN(img, img_linesize, ix_f, iy_f, N, channel);
247	fp16 x_f = iToFp16(ix_f);
248	fp16 x_c = iToFp16(ix_c);
249	fp16 y_f = iToFp16(iy_f);
250	fp16 y_c = iToFp16(iy_c);
251	fp16 s = fp16To8(v1(x - x_f)+v3(x_c - x))*fp16To8(y - y_f) +
252	fp16To8(v2(x - x_f) + v4(x_c - x))*fp16To8(y_c - y);
253	*rv = fp16ToIRound(s);
254	}
255	}
256
257
258	/**
259	* transformPacked: applies current transformation to frame
260	* Parameters:
261	* td: private data structure of this filter
262	* Return value:
263	* 0 for failture, 1 for success
264	* Preconditions:
265	* The frame must be in Packed format
266	*/
267	int transformPacked(VSTransformData* td, VSTransform t)
268	{
269	int x = 0, y = 0, k = 0;
270	uint8_t D_1, D_2;
271
272	D_1 = td->src.data[0];
273	D_2 = td->destbuf.data[0];
274	fp16 c_s_x = iToFp16(td->fiSrc.width/2);
275	fp16 c_s_y = iToFp16(td->fiSrc.height/2);
276	int32_t c_d_x = td->fiDest.width/2;
277	int32_t c_d_y = td->fiDest.height/2;
278
279	/* for each pixel in the destination image we calc the source
280	* coordinate and make an interpolation:
281	* p_d = c_d + M(p_s - c_s) + t
282	* where p are the points, c the center coordinate,
283	* _s source and _d destination,
284	* t the translation, and M the rotation matrix
285	* p_s = M^{-1}(p_d - c_d - t) + c_s
286	*/
287	float z = 1.0-t.zoom/100.0;
288	fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
289	fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
290	fp16 c_tx = c_s_x - fToFp16(t.x);
291	fp16 c_ty = c_s_y - fToFp16(t.y);
292	int channels = td->fiSrc.bytesPerPixel;
293	/* All channels */
294	for (y = 0; y < td->fiDest.height; y++) {
295	int32_t y_d1 = (y - c_d_y);
296	for (x = 0; x < td->fiDest.width; x++) {
297	int32_t x_d1 = (x - c_d_x);
298	fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
299	fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
300
301	for (k = 0; k < channels; k++) { // iterate over colors
302	uint8_t dest = &D_2[x + y td->destbuf.linesize[0]+k];
303	interpolateN(dest, x_s, y_s, D_1, td->src.linesize[0],
304	td->fiSrc.width, td->fiSrc.height,
305	channels, k, td->conf.crop ? 16 : *dest);
306	}
307	}
308	}
309	return VS_OK;
310	}
311
312	/**
313	* transformPlanar: applies current transformation to frame
314	*
315	* Parameters:
316	* td: private data structure of this filter
317	* Return value:
318	* 0 for failture, 1 for success
319	* Preconditions:
320	* The frame must be in Planar format
321	*
322	* Fixed-point format 32 bit integer:
323	* for image coords we use val<<8
324	* for angle and zoom we use val<<16
325	*
326	*/
327	int transformPlanar(VSTransformData* td, VSTransform t)
328	{
329	int32_t x = 0, y = 0;
330	uint8_t dat_1, dat_2;
331
332	if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
333	if(vsFramesEqual(&td->src,&td->destbuf))
334	return VS_OK; // noop
335	else {
336	vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
337	return VS_OK;
338	}
339	}
340
341	int plane;
342	for(plane=0; plane< td->fiSrc.planes; plane++){
343	dat_1 = td->src.data[plane];
344	dat_2 = td->destbuf.data[plane];
345	int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
346	int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
347	int dw = CHROMA_SIZE(td->fiDest.width , wsub);
348	int dh = CHROMA_SIZE(td->fiDest.height, hsub);
349	int sw = CHROMA_SIZE(td->fiSrc.width , wsub);
350	int sh = CHROMA_SIZE(td->fiSrc.height , hsub);
351	uint8_t black = plane==0 ? 0 : 0x80;
352
353	fp16 c_s_x = iToFp16(sw / 2);
354	fp16 c_s_y = iToFp16(sh / 2);
355	int32_t c_d_x = dw / 2;
356	int32_t c_d_y = dh / 2;
357
358	float z = 1.0-t.zoom/100.0;
359	fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
360	fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
361	fp16 c_tx = c_s_x - (fToFp16(t.x) >> wsub);
362	fp16 c_ty = c_s_y - (fToFp16(t.y) >> hsub);
363
364	/* for each pixel in the destination image we calc the source
365	* coordinate and make an interpolation:
366	* p_d = c_d + M(p_s - c_s) + t
367	* where p are the points, c the center coordinate,
368	* _s source and _d destination,
369	* t the translation, and M the rotation and scaling matrix
370	* p_s = M^{-1}(p_d - c_d - t) + c_s
371	*/
372	for (y = 0; y < dh; y++) {
373	// swapping of the loops brought 15% performace gain
374	int32_t y_d1 = (y - c_d_y);
375	for (x = 0; x < dw; x++) {
376	int32_t x_d1 = (x - c_d_x);
377	fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
378	fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
379	uint8_t dest = &dat_2[x + y td->destbuf.linesize[plane]];
380	// inlining the interpolation function would bring 10%
381	// (but then we cannot use the function pointer anymore...)
382	td->interpolate(dest, x_s, y_s, dat_1,
383	td->src.linesize[plane], sw, sh,
384	td->conf.crop ? black : *dest);
385	}
386	}
387	}
388
389	return VS_OK;
390	}
391
392
393
394	/* /\** TESTING */
395	/* * transformPlanar_orc: applies current transformation to frame */
396	/* * */
397	/* * Parameters: */
398	/* * td: private data structure of this filter */
399	/* * Return value: */
400	/* * 0 for failture, 1 for success */
401	/* * Preconditions: */
402	/* * The frame must be in Planar format */
403	/* * */
404	/* * Fixed-point format 32 bit integer: */
405	/* * for image coords we use val<<8 */
406	/* * for angle and zoom we use val<<16 */
407	/* * */
408	/* \/ /
409	/* int transformPlanar_orc(VSTransformData* td, VSTransform t) */
410	/* { */
411	/* int32_t x = 0, y = 0; */
412	/* uint8_t Y_1, Y_2, Cb_1, Cb_2, Cr_1, Cr_2; */
413
414	/* if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0) return VS_OK; // noop */
415
416	/* Y_1 = td->src; */
417	/* Y_2 = td->destbuf; */
418	/* Cb_1 = td->src + td->fiSrc.width * td->fiSrc.height; */
419	/* Cb_2 = td->destbuf + td->fiDest.width * td->fiDest.height; */
420	/* Cr_1 = td->src + 5td->fiSrc.width td->fiSrc.height/4; */
421	/* Cr_2 = td->destbuf + 5td->fiDest.width td->fiDest.height/4; */
422	/* fp16 c_s_x = iToFp16(td->fiSrc.width / 2); */
423	/* fp16 c_s_y = iToFp16(td->fiSrc.height / 2); */
424	/* int32_t c_d_x = td->fiDest.width / 2; */
425	/* int32_t c_d_y = td->fiDest.height / 2; */
426
427	/* float z = 1.0-t.zoom/100.0; */
428	/* fp16 zcos_a = fToFp16(zcos(-t.alpha)); // scaled cos /
429	/* fp16 zsin_a = fToFp16(zsin(-t.alpha)); // scaled sin /
430	/* fp16 c_tx = c_s_x - fToFp16(t.x); */
431	/* fp16 c_ty = c_s_y - fToFp16(t.y); */
432
433	/* /\* for each pixel in the destination image we calc the source */
434	/* * coordinate and make an interpolation: */
435	/* * p_d = c_d + M(p_s - c_s) + t */
436	/* * where p are the points, c the center coordinate, */
437	/* * _s source and _d destination, */
438	/* * t the translation, and M the rotation and scaling matrix */
439	/* * p_s = M^{-1}(p_d - c_d - t) + c_s */
440	/* \/ /
441	/* /\* Luminance channel \/ /
442	/* fp16* x_ss = (fp16)malloc(sizeof(fp16)td->fiDest.width); */
443	/* fp16* y_ss = (fp16)malloc(sizeof(fp16)td->fiDest.width); */
444	/* int32_t* xs = (int32_t)malloc(sizeof(int32_t)td->fiDest.width); */
445	/* for (x = 0; x < td->fiDest.width; x++) { // this can go to td */
446	/* xs[x]=x; */
447	/* } */
448
449	/* for (y = 0; y < td->fiDest.height; y++) { */
450	/* int32_t y_d1 = (y - c_d_y); */
451	/* fp16 sin_y = zsin_a * y_d1; */
452	/* fp16 cos_y = zcos_a * y_d1; */
453	/* for (x = 0; x < td->fiDest.width; x++) { */
454	/* int32_t x_d1 = (xs[x] - c_d_x); */
455	/* //x_ss[x] = zcos_a * x_d1 + zsin_a * y_d1 + c_tx; */
456	/* y_ss[x] = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; */
457	/* } */
458	/* transform_one_line_optimized1 (x_ss, y_ss, xs, y_d1, c_d_x, */
459	/* c_tx, c_ty, zcos_a, zsin_a, sin_y, cos_y, */
460	/* td->fiDest.width); */
461	/* // transform_one_line_optimized (x_ss, y_ss, xs, y_d1, c_d_x, */
462	/* // c_tx, c_ty, zcos_a, zsin_a, td->fiDest.width); */
463
464	/* for (x = 0; x < td->fiDest.width; x++) { */
465	/* uint8_t dest = &Y_2[x + y td->fiDest.width]; */
466	/* td->interpolate(dest, x_ss[x], y_ss[x], Y_1, */
467	/* td->fiSrc.width, td->fiSrc.height, */
468	/* td->crop ? 16 : dest); /
469	/* } */
470	/* } */
471
472	/* /\* Color channels \/ /
473	/* int32_t ws2 = td->fiSrc.width/2; */
474	/* int32_t wd2 = td->fiDest.width/2; */
475	/* int32_t hs2 = td->fiSrc.height/2; */
476	/* int32_t hd2 = td->fiDest.height/2; */
477	/* fp16 c_tx2 = c_tx/2; */
478	/* fp16 c_ty2 = c_ty/2; */
479
480	/* for (y = 0; y < hd2; y++) { */
481	/* int32_t y_d1 = y - (c_d_y)/2; */
482	/* for (x = 0; x < wd2; x++) { */
483	/* int32_t x_d1 = x - (c_d_x)/2; */
484	/* fp16 x_s = zcos_a * x_d1 + zsin_a * y_d1 + c_tx2; */
485	/* fp16 y_s = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty2; */
486	/* uint8_t dest = &Cr_2[x + y wd2]; */
487	/* td->interpolate(dest, x_s, y_s, Cr_1, ws2, hs2, */
488	/* td->crop ? 128 : dest); /
489	/* dest = &Cb_2[x + y * wd2]; */
490	/* td->interpolate(dest, x_s, y_s, Cb_1, ws2, hs2, */
491	/* td->crop ? 128 : dest); /
492	/* } */
493	/* } */
494
495	/* return VS_OK; */
496	/* } */
497
498	/*
499	some debugging stuff
500	FILE* f1 = fopen("transFP.pos","w");
501	fprintf(f1,"%i,%i:\t %f,%f\n", x, y, x_s / (float)(1<<16), y_s / (float)(1<<16));
502	fclose(f1);
503
504	*/
505
506
507
508	/*
509	* Local variables:
510	* c-file-style: "stroustrup"
511	* c-file-offsets: ((case-label . ) (statement-case-intro . ))
512	* indent-tabs-mode: nil
513	* c-basic-offset: 2 t
514	*
515	* End:
516	*
517	* vim: expandtab shiftwidth=2:
518	*/