Commit | Line | Data |
---|---|---|
80f575fc DM |
1 | ################################################################################ |
2 | # Optimized functions | |
3 | ################################################################################ | |
4 | ||
5 | # Hint: use only one space between opcode and operands (and also between them) | |
6 | ||
7 | # Rotation and Translation of one line | |
8 | ||
9 | # for (x = 0; x < td->fiDest.width; x++) { | |
10 | # int32_t x_d1 = (xs[x] - c_d_x); | |
11 | # x_ss[x] = zcos_a * x_d1 + zsin_a * y_d1 + c_tx; | |
12 | # y_ss[x] = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; | |
13 | # } | |
14 | ||
15 | ||
16 | .function transform_one_line_optimized | |
17 | .dest 4 x_ss int32_t # fp16 | |
18 | .dest 4 y_ss int32_t # fp16 | |
19 | .source 4 xs int32_t | |
20 | .param 4 y_d1 int32_t | |
21 | .param 4 c_d_x int32_t | |
22 | .param 4 c_tx int32_t # fp16 | |
23 | .param 4 c_ty int32_t # fp16 | |
24 | .param 4 zcos_a int32_t # fp16 | |
25 | .param 4 zsin_a int32_t # fp16 | |
26 | .temp 4 x_d1 | |
27 | .temp 4 tmp1 | |
28 | .temp 4 tmp2 | |
29 | ||
30 | subl x_d1, xs, c_d_x | |
31 | mulll tmp1 zcos_a x_d1 | |
32 | mulll tmp2 zsin_a y_d1 | |
33 | addl tmp1 tmp1 tmp2 | |
34 | addl x_ss tmp1 c_tx | |
35 | mulll tmp1 zcos_a y_d1 | |
36 | mulll tmp2 zsin_a x_d1 | |
37 | subl tmp1 tmp1 tmp2 | |
38 | addl y_ss tmp1 c_ty | |
39 | ||
40 | ||
41 | .function transform_one_line_optimized1 | |
42 | .dest 4 x_ss int32_t # fp16 | |
43 | .source 4 xs int32_t | |
44 | .param 4 y_d1 int32_t | |
45 | .param 4 c_d_x int32_t | |
46 | .param 4 c_tx int32_t # fp16 | |
47 | .param 4 c_ty int32_t # fp16 | |
48 | .param 4 zcos_a int32_t # fp16 | |
49 | .param 4 zsin_a int32_t # fp16 | |
50 | .param 4 sin_y int32_t # fp16 | |
51 | .param 4 cos_y int32_t # fp16 | |
52 | .temp 4 x_d1 | |
53 | .temp 4 tmp1 | |
54 | .temp 4 tmp2 | |
55 | subl x_d1, xs, c_d_x | |
56 | mulll tmp1, x_d1, zcos_a | |
57 | addl tmp1, tmp1, sin_y | |
58 | addl x_ss, tmp1, c_tx | |
59 | mulll tmp1, x_d1, zsin_a | |
60 | mulll tmp1, tmp1, -1 | |
61 | addl tmp2, tmp1, cos_y | |
62 | # addl y_ss, tmp1, c_ty | |
63 | ||
64 | ||
65 |