Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | function ff_pix_abs16_armv6, export=1 | |
24 | ldr r0, [sp] | |
25 | push {r4-r9, lr} | |
26 | mov r12, #0 | |
27 | mov lr, #0 | |
28 | ldm r1, {r4-r7} | |
29 | ldr r8, [r2] | |
30 | 1: | |
31 | ldr r9, [r2, #4] | |
32 | pld [r1, r3] | |
33 | usada8 r12, r4, r8, r12 | |
34 | ldr r8, [r2, #8] | |
35 | pld [r2, r3] | |
36 | usada8 lr, r5, r9, lr | |
37 | ldr r9, [r2, #12] | |
38 | usada8 r12, r6, r8, r12 | |
39 | subs r0, r0, #1 | |
40 | usada8 lr, r7, r9, lr | |
41 | beq 2f | |
42 | add r1, r1, r3 | |
43 | ldm r1, {r4-r7} | |
44 | add r2, r2, r3 | |
45 | ldr r8, [r2] | |
46 | b 1b | |
47 | 2: | |
48 | add r0, r12, lr | |
49 | pop {r4-r9, pc} | |
50 | endfunc | |
51 | ||
52 | function ff_pix_abs16_x2_armv6, export=1 | |
53 | ldr r12, [sp] | |
54 | push {r4-r11, lr} | |
55 | mov r0, #0 | |
56 | mov lr, #1 | |
57 | orr lr, lr, lr, lsl #8 | |
58 | orr lr, lr, lr, lsl #16 | |
59 | 1: | |
60 | ldr r8, [r2] | |
61 | ldr r9, [r2, #4] | |
62 | lsr r10, r8, #8 | |
63 | ldr r4, [r1] | |
64 | lsr r6, r9, #8 | |
65 | orr r10, r10, r9, lsl #24 | |
66 | ldr r5, [r2, #8] | |
67 | eor r11, r8, r10 | |
68 | uhadd8 r7, r8, r10 | |
69 | orr r6, r6, r5, lsl #24 | |
70 | and r11, r11, lr | |
71 | uadd8 r7, r7, r11 | |
72 | ldr r8, [r1, #4] | |
73 | usada8 r0, r4, r7, r0 | |
74 | eor r7, r9, r6 | |
75 | lsr r10, r5, #8 | |
76 | and r7, r7, lr | |
77 | uhadd8 r4, r9, r6 | |
78 | ldr r6, [r2, #12] | |
79 | uadd8 r4, r4, r7 | |
80 | pld [r1, r3] | |
81 | orr r10, r10, r6, lsl #24 | |
82 | usada8 r0, r8, r4, r0 | |
83 | ldr r4, [r1, #8] | |
84 | eor r11, r5, r10 | |
85 | ldrb r7, [r2, #16] | |
86 | and r11, r11, lr | |
87 | uhadd8 r8, r5, r10 | |
88 | ldr r5, [r1, #12] | |
89 | uadd8 r8, r8, r11 | |
90 | pld [r2, r3] | |
91 | lsr r10, r6, #8 | |
92 | usada8 r0, r4, r8, r0 | |
93 | orr r10, r10, r7, lsl #24 | |
94 | subs r12, r12, #1 | |
95 | eor r11, r6, r10 | |
96 | add r1, r1, r3 | |
97 | uhadd8 r9, r6, r10 | |
98 | and r11, r11, lr | |
99 | uadd8 r9, r9, r11 | |
100 | add r2, r2, r3 | |
101 | usada8 r0, r5, r9, r0 | |
102 | bgt 1b | |
103 | ||
104 | pop {r4-r11, pc} | |
105 | endfunc | |
106 | ||
107 | .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 | |
108 | ldr \n0, [r2] | |
109 | eor \n1, \p0, \n0 | |
110 | uhadd8 \p0, \p0, \n0 | |
111 | and \n1, \n1, lr | |
112 | ldr \n2, [r1] | |
113 | uadd8 \p0, \p0, \n1 | |
114 | ldr \n1, [r2, #4] | |
115 | usada8 r0, \p0, \n2, r0 | |
116 | pld [r1, r3] | |
117 | eor \n3, \p1, \n1 | |
118 | uhadd8 \p1, \p1, \n1 | |
119 | and \n3, \n3, lr | |
120 | ldr \p0, [r1, #4] | |
121 | uadd8 \p1, \p1, \n3 | |
122 | ldr \n2, [r2, #8] | |
123 | usada8 r0, \p1, \p0, r0 | |
124 | pld [r2, r3] | |
125 | eor \p0, \p2, \n2 | |
126 | uhadd8 \p2, \p2, \n2 | |
127 | and \p0, \p0, lr | |
128 | ldr \p1, [r1, #8] | |
129 | uadd8 \p2, \p2, \p0 | |
130 | ldr \n3, [r2, #12] | |
131 | usada8 r0, \p2, \p1, r0 | |
132 | eor \p1, \p3, \n3 | |
133 | uhadd8 \p3, \p3, \n3 | |
134 | and \p1, \p1, lr | |
135 | ldr \p0, [r1, #12] | |
136 | uadd8 \p3, \p3, \p1 | |
137 | add r1, r1, r3 | |
138 | usada8 r0, \p3, \p0, r0 | |
139 | add r2, r2, r3 | |
140 | .endm | |
141 | ||
142 | function ff_pix_abs16_y2_armv6, export=1 | |
143 | pld [r1] | |
144 | pld [r2] | |
145 | ldr r12, [sp] | |
146 | push {r4-r11, lr} | |
147 | mov r0, #0 | |
148 | mov lr, #1 | |
149 | orr lr, lr, lr, lsl #8 | |
150 | orr lr, lr, lr, lsl #16 | |
151 | ldr r4, [r2] | |
152 | ldr r5, [r2, #4] | |
153 | ldr r6, [r2, #8] | |
154 | ldr r7, [r2, #12] | |
155 | add r2, r2, r3 | |
156 | 1: | |
157 | usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 | |
158 | subs r12, r12, #2 | |
159 | usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 | |
160 | bgt 1b | |
161 | ||
162 | pop {r4-r11, pc} | |
163 | endfunc | |
164 | ||
165 | function ff_pix_abs8_armv6, export=1 | |
166 | pld [r2, r3] | |
167 | ldr r12, [sp] | |
168 | push {r4-r9, lr} | |
169 | mov r0, #0 | |
170 | mov lr, #0 | |
171 | ldrd_post r4, r5, r1, r3 | |
172 | 1: | |
173 | subs r12, r12, #2 | |
174 | ldr r7, [r2, #4] | |
175 | ldr_post r6, r2, r3 | |
176 | ldrd_post r8, r9, r1, r3 | |
177 | usada8 r0, r4, r6, r0 | |
178 | pld [r2, r3] | |
179 | usada8 lr, r5, r7, lr | |
180 | ldr r7, [r2, #4] | |
181 | ldr_post r6, r2, r3 | |
182 | beq 2f | |
183 | ldrd_post r4, r5, r1, r3 | |
184 | usada8 r0, r8, r6, r0 | |
185 | pld [r2, r3] | |
186 | usada8 lr, r9, r7, lr | |
187 | b 1b | |
188 | 2: | |
189 | usada8 r0, r8, r6, r0 | |
190 | usada8 lr, r9, r7, lr | |
191 | add r0, r0, lr | |
192 | pop {r4-r9, pc} | |
193 | endfunc | |
194 | ||
195 | function ff_sse16_armv6, export=1 | |
196 | ldr r12, [sp] | |
197 | push {r4-r9, lr} | |
198 | mov r0, #0 | |
199 | 1: | |
200 | ldrd r4, r5, [r1] | |
201 | ldr r8, [r2] | |
202 | uxtb16 lr, r4 | |
203 | uxtb16 r4, r4, ror #8 | |
204 | uxtb16 r9, r8 | |
205 | uxtb16 r8, r8, ror #8 | |
206 | ldr r7, [r2, #4] | |
207 | usub16 lr, lr, r9 | |
208 | usub16 r4, r4, r8 | |
209 | smlad r0, lr, lr, r0 | |
210 | uxtb16 r6, r5 | |
211 | uxtb16 lr, r5, ror #8 | |
212 | uxtb16 r8, r7 | |
213 | uxtb16 r9, r7, ror #8 | |
214 | smlad r0, r4, r4, r0 | |
215 | ldrd r4, r5, [r1, #8] | |
216 | usub16 r6, r6, r8 | |
217 | usub16 r8, lr, r9 | |
218 | ldr r7, [r2, #8] | |
219 | smlad r0, r6, r6, r0 | |
220 | uxtb16 lr, r4 | |
221 | uxtb16 r4, r4, ror #8 | |
222 | uxtb16 r9, r7 | |
223 | uxtb16 r7, r7, ror #8 | |
224 | smlad r0, r8, r8, r0 | |
225 | ldr r8, [r2, #12] | |
226 | usub16 lr, lr, r9 | |
227 | usub16 r4, r4, r7 | |
228 | smlad r0, lr, lr, r0 | |
229 | uxtb16 r6, r5 | |
230 | uxtb16 r5, r5, ror #8 | |
231 | uxtb16 r9, r8 | |
232 | uxtb16 r8, r8, ror #8 | |
233 | smlad r0, r4, r4, r0 | |
234 | usub16 r6, r6, r9 | |
235 | usub16 r5, r5, r8 | |
236 | smlad r0, r6, r6, r0 | |
237 | add r1, r1, r3 | |
238 | add r2, r2, r3 | |
239 | subs r12, r12, #1 | |
240 | smlad r0, r5, r5, r0 | |
241 | bgt 1b | |
242 | ||
243 | pop {r4-r9, pc} | |
244 | endfunc |