| 1 | ################################################################################ |
| 2 | # Optimized functions |
| 3 | ################################################################################ |
| 4 | |
| 5 | # Hint: use only one space between opcode and operands (and also between them) |
| 6 | |
| 7 | # Rotation and Translation of one line |
| 8 | |
| 9 | # for (x = 0; x < td->fiDest.width; x++) { |
| 10 | # int32_t x_d1 = (xs[x] - c_d_x); |
| 11 | # x_ss[x] = zcos_a * x_d1 + zsin_a * y_d1 + c_tx; |
| 12 | # y_ss[x] = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; |
| 13 | # } |
| 14 | |
| 15 | |
| 16 | .function transform_one_line_optimized |
| 17 | .dest 4 x_ss int32_t # fp16 |
| 18 | .dest 4 y_ss int32_t # fp16 |
| 19 | .source 4 xs int32_t |
| 20 | .param 4 y_d1 int32_t |
| 21 | .param 4 c_d_x int32_t |
| 22 | .param 4 c_tx int32_t # fp16 |
| 23 | .param 4 c_ty int32_t # fp16 |
| 24 | .param 4 zcos_a int32_t # fp16 |
| 25 | .param 4 zsin_a int32_t # fp16 |
| 26 | .temp 4 x_d1 |
| 27 | .temp 4 tmp1 |
| 28 | .temp 4 tmp2 |
| 29 | |
| 30 | subl x_d1, xs, c_d_x |
| 31 | mulll tmp1 zcos_a x_d1 |
| 32 | mulll tmp2 zsin_a y_d1 |
| 33 | addl tmp1 tmp1 tmp2 |
| 34 | addl x_ss tmp1 c_tx |
| 35 | mulll tmp1 zcos_a y_d1 |
| 36 | mulll tmp2 zsin_a x_d1 |
| 37 | subl tmp1 tmp1 tmp2 |
| 38 | addl y_ss tmp1 c_ty |
| 39 | |
| 40 | |
| 41 | .function transform_one_line_optimized1 |
| 42 | .dest 4 x_ss int32_t # fp16 |
| 43 | .source 4 xs int32_t |
| 44 | .param 4 y_d1 int32_t |
| 45 | .param 4 c_d_x int32_t |
| 46 | .param 4 c_tx int32_t # fp16 |
| 47 | .param 4 c_ty int32_t # fp16 |
| 48 | .param 4 zcos_a int32_t # fp16 |
| 49 | .param 4 zsin_a int32_t # fp16 |
| 50 | .param 4 sin_y int32_t # fp16 |
| 51 | .param 4 cos_y int32_t # fp16 |
| 52 | .temp 4 x_d1 |
| 53 | .temp 4 tmp1 |
| 54 | .temp 4 tmp2 |
| 55 | subl x_d1, xs, c_d_x |
| 56 | mulll tmp1, x_d1, zcos_a |
| 57 | addl tmp1, tmp1, sin_y |
| 58 | addl x_ss, tmp1, c_tx |
| 59 | mulll tmp1, x_d1, zsin_a |
| 60 | mulll tmp1, tmp1, -1 |
| 61 | addl tmp2, tmp1, cos_y |
| 62 | # addl y_ss, tmp1, c_ty |
| 63 | |
| 64 | |
| 65 | |