Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Alpha optimized DSP utils | |
3 | * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "regdef.h" | |
23 | ||
24 | /* Some nicer register names. */ | |
25 | #define ta t10 | |
26 | #define tb t11 | |
27 | #define tc t12 | |
28 | #define td AT | |
29 | /* Danger: these overlap with the argument list and the return value */ | |
30 | #define te a5 | |
31 | #define tf a4 | |
32 | #define tg a3 | |
33 | #define th v0 | |
34 | ||
35 | .set noat | |
36 | .set noreorder | |
37 | .arch pca56 | |
38 | .text | |
39 | ||
40 | /***************************************************************************** | |
41 | * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) | |
42 | * | |
43 | * This code is written with a pca56 in mind. For ev6, one should | |
44 | * really take the increased latency of 3 cycles for MVI instructions | |
45 | * into account. | |
46 | * | |
47 | * It is important to keep the loading and first use of a register as | |
48 | * far apart as possible, because if a register is accessed before it | |
49 | * has been fetched from memory, the CPU will stall. | |
50 | */ | |
51 | .align 4 | |
52 | .globl pix_abs16x16_mvi_asm | |
53 | .ent pix_abs16x16_mvi_asm | |
54 | pix_abs16x16_mvi_asm: | |
55 | .frame sp, 0, ra, 0 | |
56 | .prologue 0 | |
57 | ||
58 | and a2, 7, t0 | |
59 | clr v0 | |
60 | beq t0, $aligned | |
61 | .align 4 | |
62 | $unaligned: | |
63 | /* Registers: | |
64 | line 0: | |
65 | t0: left_u -> left lo -> left | |
66 | t1: mid | |
67 | t2: right_u -> right hi -> right | |
68 | t3: ref left | |
69 | t4: ref right | |
70 | line 1: | |
71 | t5: left_u -> left lo -> left | |
72 | t6: mid | |
73 | t7: right_u -> right hi -> right | |
74 | t8: ref left | |
75 | t9: ref right | |
76 | temp: | |
77 | ta: left hi | |
78 | tb: right lo | |
79 | tc: error left | |
80 | td: error right */ | |
81 | ||
82 | /* load line 0 */ | |
83 | ldq_u t0, 0(a2) # left_u | |
84 | ldq_u t1, 8(a2) # mid | |
85 | ldq_u t2, 16(a2) # right_u | |
86 | ldq t3, 0(a1) # ref left | |
87 | ldq t4, 8(a1) # ref right | |
88 | addq a1, a3, a1 # pix1 | |
89 | addq a2, a3, a2 # pix2 | |
90 | /* load line 1 */ | |
91 | ldq_u t5, 0(a2) # left_u | |
92 | ldq_u t6, 8(a2) # mid | |
93 | ldq_u t7, 16(a2) # right_u | |
94 | ldq t8, 0(a1) # ref left | |
95 | ldq t9, 8(a1) # ref right | |
96 | addq a1, a3, a1 # pix1 | |
97 | addq a2, a3, a2 # pix2 | |
98 | /* calc line 0 */ | |
99 | extql t0, a2, t0 # left lo | |
100 | extqh t1, a2, ta # left hi | |
101 | extql t1, a2, tb # right lo | |
102 | or t0, ta, t0 # left | |
103 | extqh t2, a2, t2 # right hi | |
104 | perr t3, t0, tc # error left | |
105 | or t2, tb, t2 # right | |
106 | perr t4, t2, td # error right | |
107 | addq v0, tc, v0 # add error left | |
108 | addq v0, td, v0 # add error left | |
109 | /* calc line 1 */ | |
110 | extql t5, a2, t5 # left lo | |
111 | extqh t6, a2, ta # left hi | |
112 | extql t6, a2, tb # right lo | |
113 | or t5, ta, t5 # left | |
114 | extqh t7, a2, t7 # right hi | |
115 | perr t8, t5, tc # error left | |
116 | or t7, tb, t7 # right | |
117 | perr t9, t7, td # error right | |
118 | addq v0, tc, v0 # add error left | |
119 | addq v0, td, v0 # add error left | |
120 | /* loop */ | |
121 | subq a4, 2, a4 # h -= 2 | |
122 | bne a4, $unaligned | |
123 | ret | |
124 | ||
125 | .align 4 | |
126 | $aligned: | |
127 | /* load line 0 */ | |
128 | ldq t0, 0(a2) # left | |
129 | ldq t1, 8(a2) # right | |
130 | addq a2, a3, a2 # pix2 | |
131 | ldq t2, 0(a1) # ref left | |
132 | ldq t3, 8(a1) # ref right | |
133 | addq a1, a3, a1 # pix1 | |
134 | /* load line 1 */ | |
135 | ldq t4, 0(a2) # left | |
136 | ldq t5, 8(a2) # right | |
137 | addq a2, a3, a2 # pix2 | |
138 | ldq t6, 0(a1) # ref left | |
139 | ldq t7, 8(a1) # ref right | |
140 | addq a1, a3, a1 # pix1 | |
141 | /* load line 2 */ | |
142 | ldq t8, 0(a2) # left | |
143 | ldq t9, 8(a2) # right | |
144 | addq a2, a3, a2 # pix2 | |
145 | ldq ta, 0(a1) # ref left | |
146 | ldq tb, 8(a1) # ref right | |
147 | addq a1, a3, a1 # pix1 | |
148 | /* load line 3 */ | |
149 | ldq tc, 0(a2) # left | |
150 | ldq td, 8(a2) # right | |
151 | addq a2, a3, a2 # pix2 | |
152 | ldq te, 0(a1) # ref left | |
153 | ldq a0, 8(a1) # ref right | |
154 | /* calc line 0 */ | |
155 | perr t0, t2, t0 # error left | |
156 | addq a1, a3, a1 # pix1 | |
157 | perr t1, t3, t1 # error right | |
158 | addq v0, t0, v0 # add error left | |
159 | /* calc line 1 */ | |
160 | perr t4, t6, t0 # error left | |
161 | addq v0, t1, v0 # add error right | |
162 | perr t5, t7, t1 # error right | |
163 | addq v0, t0, v0 # add error left | |
164 | /* calc line 2 */ | |
165 | perr t8, ta, t0 # error left | |
166 | addq v0, t1, v0 # add error right | |
167 | perr t9, tb, t1 # error right | |
168 | addq v0, t0, v0 # add error left | |
169 | /* calc line 3 */ | |
170 | perr tc, te, t0 # error left | |
171 | addq v0, t1, v0 # add error right | |
172 | perr td, a0, t1 # error right | |
173 | addq v0, t0, v0 # add error left | |
174 | addq v0, t1, v0 # add error right | |
175 | /* loop */ | |
176 | subq a4, 4, a4 # h -= 4 | |
177 | bne a4, $aligned | |
178 | ret | |
179 | .end pix_abs16x16_mvi_asm |