Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | .macro call_2x_pixels type, subp | |
24 | function ff_\type\()_pixels16\subp\()_armv6, export=1 | |
25 | push {r0-r3, lr} | |
26 | bl X(ff_\type\()_pixels8\subp\()_armv6) | |
27 | pop {r0-r3, lr} | |
28 | add r0, r0, #8 | |
29 | add r1, r1, #8 | |
30 | b X(ff_\type\()_pixels8\subp\()_armv6) | |
31 | endfunc | |
32 | .endm | |
33 | ||
34 | call_2x_pixels avg | |
35 | call_2x_pixels put, _x2 | |
36 | call_2x_pixels put, _y2 | |
37 | call_2x_pixels put, _x2_no_rnd | |
38 | call_2x_pixels put, _y2_no_rnd | |
39 | ||
40 | function ff_put_pixels16_armv6, export=1 | |
41 | push {r4-r11} | |
42 | 1: | |
43 | ldr r5, [r1, #4] | |
44 | ldr r6, [r1, #8] | |
45 | ldr r7, [r1, #12] | |
46 | ldr_post r4, r1, r2 | |
47 | strd r6, r7, [r0, #8] | |
48 | ldr r9, [r1, #4] | |
49 | strd_post r4, r5, r0, r2 | |
50 | ldr r10, [r1, #8] | |
51 | ldr r11, [r1, #12] | |
52 | ldr_post r8, r1, r2 | |
53 | strd r10, r11, [r0, #8] | |
54 | subs r3, r3, #2 | |
55 | strd_post r8, r9, r0, r2 | |
56 | bne 1b | |
57 | ||
58 | pop {r4-r11} | |
59 | bx lr | |
60 | endfunc | |
61 | ||
62 | function ff_put_pixels8_armv6, export=1 | |
63 | push {r4-r7} | |
64 | 1: | |
65 | ldr r5, [r1, #4] | |
66 | ldr_post r4, r1, r2 | |
67 | ldr r7, [r1, #4] | |
68 | strd_post r4, r5, r0, r2 | |
69 | ldr_post r6, r1, r2 | |
70 | subs r3, r3, #2 | |
71 | strd_post r6, r7, r0, r2 | |
72 | bne 1b | |
73 | ||
74 | pop {r4-r7} | |
75 | bx lr | |
76 | endfunc | |
77 | ||
78 | function ff_put_pixels8_x2_armv6, export=1 | |
79 | push {r4-r11, lr} | |
80 | mov r12, #1 | |
81 | orr r12, r12, r12, lsl #8 | |
82 | orr r12, r12, r12, lsl #16 | |
83 | 1: | |
84 | ldr r4, [r1] | |
85 | subs r3, r3, #2 | |
86 | ldr r5, [r1, #4] | |
87 | ldr r7, [r1, #5] | |
88 | lsr r6, r4, #8 | |
89 | ldr_pre r8, r1, r2 | |
90 | orr r6, r6, r5, lsl #24 | |
91 | ldr r9, [r1, #4] | |
92 | ldr r11, [r1, #5] | |
93 | lsr r10, r8, #8 | |
94 | add r1, r1, r2 | |
95 | orr r10, r10, r9, lsl #24 | |
96 | eor r14, r4, r6 | |
97 | uhadd8 r4, r4, r6 | |
98 | eor r6, r5, r7 | |
99 | uhadd8 r5, r5, r7 | |
100 | and r14, r14, r12 | |
101 | and r6, r6, r12 | |
102 | uadd8 r4, r4, r14 | |
103 | eor r14, r8, r10 | |
104 | uadd8 r5, r5, r6 | |
105 | eor r6, r9, r11 | |
106 | uhadd8 r8, r8, r10 | |
107 | and r14, r14, r12 | |
108 | uhadd8 r9, r9, r11 | |
109 | and r6, r6, r12 | |
110 | uadd8 r8, r8, r14 | |
111 | strd_post r4, r5, r0, r2 | |
112 | uadd8 r9, r9, r6 | |
113 | strd_post r8, r9, r0, r2 | |
114 | bne 1b | |
115 | ||
116 | pop {r4-r11, pc} | |
117 | endfunc | |
118 | ||
119 | function ff_put_pixels8_y2_armv6, export=1 | |
120 | push {r4-r11} | |
121 | mov r12, #1 | |
122 | orr r12, r12, r12, lsl #8 | |
123 | orr r12, r12, r12, lsl #16 | |
124 | ldr r4, [r1] | |
125 | ldr r5, [r1, #4] | |
126 | ldr_pre r6, r1, r2 | |
127 | ldr r7, [r1, #4] | |
128 | 1: | |
129 | subs r3, r3, #2 | |
130 | uhadd8 r8, r4, r6 | |
131 | eor r10, r4, r6 | |
132 | uhadd8 r9, r5, r7 | |
133 | eor r11, r5, r7 | |
134 | and r10, r10, r12 | |
135 | ldr_pre r4, r1, r2 | |
136 | uadd8 r8, r8, r10 | |
137 | and r11, r11, r12 | |
138 | uadd8 r9, r9, r11 | |
139 | ldr r5, [r1, #4] | |
140 | uhadd8 r10, r4, r6 | |
141 | eor r6, r4, r6 | |
142 | uhadd8 r11, r5, r7 | |
143 | and r6, r6, r12 | |
144 | eor r7, r5, r7 | |
145 | uadd8 r10, r10, r6 | |
146 | and r7, r7, r12 | |
147 | ldrc_pre ne, r6, r1, r2 | |
148 | uadd8 r11, r11, r7 | |
149 | strd_post r8, r9, r0, r2 | |
150 | it ne | |
151 | ldrne r7, [r1, #4] | |
152 | strd_post r10, r11, r0, r2 | |
153 | bne 1b | |
154 | ||
155 | pop {r4-r11} | |
156 | bx lr | |
157 | endfunc | |
158 | ||
159 | function ff_put_pixels8_x2_no_rnd_armv6, export=1 | |
160 | push {r4-r9, lr} | |
161 | 1: | |
162 | subs r3, r3, #2 | |
163 | ldr r4, [r1] | |
164 | ldr r5, [r1, #4] | |
165 | ldr r7, [r1, #5] | |
166 | ldr_pre r8, r1, r2 | |
167 | ldr r9, [r1, #4] | |
168 | ldr r14, [r1, #5] | |
169 | add r1, r1, r2 | |
170 | lsr r6, r4, #8 | |
171 | orr r6, r6, r5, lsl #24 | |
172 | lsr r12, r8, #8 | |
173 | orr r12, r12, r9, lsl #24 | |
174 | uhadd8 r4, r4, r6 | |
175 | uhadd8 r5, r5, r7 | |
176 | uhadd8 r8, r8, r12 | |
177 | uhadd8 r9, r9, r14 | |
178 | stm r0, {r4,r5} | |
179 | add r0, r0, r2 | |
180 | stm r0, {r8,r9} | |
181 | add r0, r0, r2 | |
182 | bne 1b | |
183 | ||
184 | pop {r4-r9, pc} | |
185 | endfunc | |
186 | ||
187 | function ff_put_pixels8_y2_no_rnd_armv6, export=1 | |
188 | push {r4-r9, lr} | |
189 | ldr r4, [r1] | |
190 | ldr r5, [r1, #4] | |
191 | ldr_pre r6, r1, r2 | |
192 | ldr r7, [r1, #4] | |
193 | 1: | |
194 | subs r3, r3, #2 | |
195 | uhadd8 r8, r4, r6 | |
196 | ldr_pre r4, r1, r2 | |
197 | uhadd8 r9, r5, r7 | |
198 | ldr r5, [r1, #4] | |
199 | uhadd8 r12, r4, r6 | |
200 | ldrc_pre ne, r6, r1, r2 | |
201 | uhadd8 r14, r5, r7 | |
202 | it ne | |
203 | ldrne r7, [r1, #4] | |
204 | stm r0, {r8,r9} | |
205 | add r0, r0, r2 | |
206 | stm r0, {r12,r14} | |
207 | add r0, r0, r2 | |
208 | bne 1b | |
209 | ||
210 | pop {r4-r9, pc} | |
211 | endfunc | |
212 | ||
213 | function ff_avg_pixels8_armv6, export=1 | |
214 | pld [r1, r2] | |
215 | push {r4-r10, lr} | |
216 | mov lr, #1 | |
217 | orr lr, lr, lr, lsl #8 | |
218 | orr lr, lr, lr, lsl #16 | |
219 | ldrd r4, r5, [r0] | |
220 | ldr r10, [r1, #4] | |
221 | ldr_post r9, r1, r2 | |
222 | subs r3, r3, #2 | |
223 | 1: | |
224 | pld [r1, r2] | |
225 | eor r8, r4, r9 | |
226 | uhadd8 r4, r4, r9 | |
227 | eor r12, r5, r10 | |
228 | ldrd_reg r6, r7, r0, r2 | |
229 | uhadd8 r5, r5, r10 | |
230 | and r8, r8, lr | |
231 | ldr r10, [r1, #4] | |
232 | and r12, r12, lr | |
233 | uadd8 r4, r4, r8 | |
234 | ldr_post r9, r1, r2 | |
235 | eor r8, r6, r9 | |
236 | uadd8 r5, r5, r12 | |
237 | pld [r1, r2, lsl #1] | |
238 | eor r12, r7, r10 | |
239 | uhadd8 r6, r6, r9 | |
240 | strd_post r4, r5, r0, r2 | |
241 | uhadd8 r7, r7, r10 | |
242 | beq 2f | |
243 | and r8, r8, lr | |
244 | ldrd_reg r4, r5, r0, r2 | |
245 | uadd8 r6, r6, r8 | |
246 | ldr r10, [r1, #4] | |
247 | and r12, r12, lr | |
248 | subs r3, r3, #2 | |
249 | uadd8 r7, r7, r12 | |
250 | ldr_post r9, r1, r2 | |
251 | strd_post r6, r7, r0, r2 | |
252 | b 1b | |
253 | 2: | |
254 | and r8, r8, lr | |
255 | and r12, r12, lr | |
256 | uadd8 r6, r6, r8 | |
257 | uadd8 r7, r7, r12 | |
258 | strd_post r6, r7, r0, r2 | |
259 | ||
260 | pop {r4-r10, pc} | |
261 | endfunc |