src/orc/transformorc.orc

   1 ################################################################################
   2 # Optimized functions
   3 ################################################################################
   4
   5 # Hint: use only one space between opcode and operands (and also between them)
   6
   7 # Rotation and Translation of one line
   8
   9 # for (x = 0; x < td->fiDest.width; x++) {
  10 #       int32_t x_d1 = (xs[x] - c_d_x);
  11 #       x_ss[x]  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
  12 #       y_ss[x]  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
  13 # }
  14
  15
  16 .function transform_one_line_optimized
  17 .dest 4 x_ss int32_t    # fp16
  18 .dest 4 y_ss int32_t    # fp16
  19 .source 4 xs int32_t
  20 .param 4 y_d1 int32_t
  21 .param 4 c_d_x int32_t
  22 .param 4 c_tx int32_t   # fp16
  23 .param 4 c_ty int32_t   # fp16
  24 .param 4 zcos_a int32_t # fp16
  25 .param 4 zsin_a int32_t # fp16
  26 .temp 4 x_d1
  27 .temp 4 tmp1
  28 .temp 4 tmp2
  29
  30 subl x_d1, xs, c_d_x
  31 mulll tmp1 zcos_a x_d1
  32 mulll tmp2 zsin_a y_d1
  33 addl tmp1 tmp1 tmp2
  34 addl x_ss tmp1 c_tx
  35 mulll tmp1 zcos_a y_d1
  36 mulll tmp2 zsin_a x_d1
  37 subl tmp1 tmp1 tmp2
  38 addl y_ss tmp1 c_ty
  39
  40
  41 .function transform_one_line_optimized1
  42 .dest 4 x_ss int32_t    # fp16
  43 .source 4 xs int32_t
  44 .param 4 y_d1 int32_t
  45 .param 4 c_d_x int32_t
  46 .param 4 c_tx int32_t   # fp16
  47 .param 4 c_ty int32_t   # fp16
  48 .param 4 zcos_a int32_t # fp16
  49 .param 4 zsin_a int32_t # fp16
  50 .param 4 sin_y int32_t # fp16
  51 .param 4 cos_y int32_t # fp16
  52 .temp 4 x_d1
  53 .temp 4 tmp1
  54 .temp 4 tmp2
  55 subl x_d1, xs, c_d_x
  56 mulll tmp1, x_d1, zcos_a
  57 addl tmp1, tmp1, sin_y
  58 addl x_ss, tmp1, c_tx
  59 mulll tmp1, x_d1, zsin_a
  60 mulll tmp1, tmp1, -1
  61 addl tmp2, tmp1, cos_y
  62 # addl y_ss, tmp1, c_ty
  63
  64
  65