Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / hpeldsp_armv6.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro call_2x_pixels type, subp
24function ff_\type\()_pixels16\subp\()_armv6, export=1
25 push {r0-r3, lr}
26 bl X(ff_\type\()_pixels8\subp\()_armv6)
27 pop {r0-r3, lr}
28 add r0, r0, #8
29 add r1, r1, #8
30 b X(ff_\type\()_pixels8\subp\()_armv6)
31endfunc
32.endm
33
34call_2x_pixels avg
35call_2x_pixels put, _x2
36call_2x_pixels put, _y2
37call_2x_pixels put, _x2_no_rnd
38call_2x_pixels put, _y2_no_rnd
39
40function ff_put_pixels16_armv6, export=1
41 push {r4-r11}
421:
43 ldr r5, [r1, #4]
44 ldr r6, [r1, #8]
45 ldr r7, [r1, #12]
46 ldr_post r4, r1, r2
47 strd r6, r7, [r0, #8]
48 ldr r9, [r1, #4]
49 strd_post r4, r5, r0, r2
50 ldr r10, [r1, #8]
51 ldr r11, [r1, #12]
52 ldr_post r8, r1, r2
53 strd r10, r11, [r0, #8]
54 subs r3, r3, #2
55 strd_post r8, r9, r0, r2
56 bne 1b
57
58 pop {r4-r11}
59 bx lr
60endfunc
61
62function ff_put_pixels8_armv6, export=1
63 push {r4-r7}
641:
65 ldr r5, [r1, #4]
66 ldr_post r4, r1, r2
67 ldr r7, [r1, #4]
68 strd_post r4, r5, r0, r2
69 ldr_post r6, r1, r2
70 subs r3, r3, #2
71 strd_post r6, r7, r0, r2
72 bne 1b
73
74 pop {r4-r7}
75 bx lr
76endfunc
77
78function ff_put_pixels8_x2_armv6, export=1
79 push {r4-r11, lr}
80 mov r12, #1
81 orr r12, r12, r12, lsl #8
82 orr r12, r12, r12, lsl #16
831:
84 ldr r4, [r1]
85 subs r3, r3, #2
86 ldr r5, [r1, #4]
87 ldr r7, [r1, #5]
88 lsr r6, r4, #8
89 ldr_pre r8, r1, r2
90 orr r6, r6, r5, lsl #24
91 ldr r9, [r1, #4]
92 ldr r11, [r1, #5]
93 lsr r10, r8, #8
94 add r1, r1, r2
95 orr r10, r10, r9, lsl #24
96 eor r14, r4, r6
97 uhadd8 r4, r4, r6
98 eor r6, r5, r7
99 uhadd8 r5, r5, r7
100 and r14, r14, r12
101 and r6, r6, r12
102 uadd8 r4, r4, r14
103 eor r14, r8, r10
104 uadd8 r5, r5, r6
105 eor r6, r9, r11
106 uhadd8 r8, r8, r10
107 and r14, r14, r12
108 uhadd8 r9, r9, r11
109 and r6, r6, r12
110 uadd8 r8, r8, r14
111 strd_post r4, r5, r0, r2
112 uadd8 r9, r9, r6
113 strd_post r8, r9, r0, r2
114 bne 1b
115
116 pop {r4-r11, pc}
117endfunc
118
119function ff_put_pixels8_y2_armv6, export=1
120 push {r4-r11}
121 mov r12, #1
122 orr r12, r12, r12, lsl #8
123 orr r12, r12, r12, lsl #16
124 ldr r4, [r1]
125 ldr r5, [r1, #4]
126 ldr_pre r6, r1, r2
127 ldr r7, [r1, #4]
1281:
129 subs r3, r3, #2
130 uhadd8 r8, r4, r6
131 eor r10, r4, r6
132 uhadd8 r9, r5, r7
133 eor r11, r5, r7
134 and r10, r10, r12
135 ldr_pre r4, r1, r2
136 uadd8 r8, r8, r10
137 and r11, r11, r12
138 uadd8 r9, r9, r11
139 ldr r5, [r1, #4]
140 uhadd8 r10, r4, r6
141 eor r6, r4, r6
142 uhadd8 r11, r5, r7
143 and r6, r6, r12
144 eor r7, r5, r7
145 uadd8 r10, r10, r6
146 and r7, r7, r12
147 ldrc_pre ne, r6, r1, r2
148 uadd8 r11, r11, r7
149 strd_post r8, r9, r0, r2
150 it ne
151 ldrne r7, [r1, #4]
152 strd_post r10, r11, r0, r2
153 bne 1b
154
155 pop {r4-r11}
156 bx lr
157endfunc
158
159function ff_put_pixels8_x2_no_rnd_armv6, export=1
160 push {r4-r9, lr}
1611:
162 subs r3, r3, #2
163 ldr r4, [r1]
164 ldr r5, [r1, #4]
165 ldr r7, [r1, #5]
166 ldr_pre r8, r1, r2
167 ldr r9, [r1, #4]
168 ldr r14, [r1, #5]
169 add r1, r1, r2
170 lsr r6, r4, #8
171 orr r6, r6, r5, lsl #24
172 lsr r12, r8, #8
173 orr r12, r12, r9, lsl #24
174 uhadd8 r4, r4, r6
175 uhadd8 r5, r5, r7
176 uhadd8 r8, r8, r12
177 uhadd8 r9, r9, r14
178 stm r0, {r4,r5}
179 add r0, r0, r2
180 stm r0, {r8,r9}
181 add r0, r0, r2
182 bne 1b
183
184 pop {r4-r9, pc}
185endfunc
186
187function ff_put_pixels8_y2_no_rnd_armv6, export=1
188 push {r4-r9, lr}
189 ldr r4, [r1]
190 ldr r5, [r1, #4]
191 ldr_pre r6, r1, r2
192 ldr r7, [r1, #4]
1931:
194 subs r3, r3, #2
195 uhadd8 r8, r4, r6
196 ldr_pre r4, r1, r2
197 uhadd8 r9, r5, r7
198 ldr r5, [r1, #4]
199 uhadd8 r12, r4, r6
200 ldrc_pre ne, r6, r1, r2
201 uhadd8 r14, r5, r7
202 it ne
203 ldrne r7, [r1, #4]
204 stm r0, {r8,r9}
205 add r0, r0, r2
206 stm r0, {r12,r14}
207 add r0, r0, r2
208 bne 1b
209
210 pop {r4-r9, pc}
211endfunc
212
213function ff_avg_pixels8_armv6, export=1
214 pld [r1, r2]
215 push {r4-r10, lr}
216 mov lr, #1
217 orr lr, lr, lr, lsl #8
218 orr lr, lr, lr, lsl #16
219 ldrd r4, r5, [r0]
220 ldr r10, [r1, #4]
221 ldr_post r9, r1, r2
222 subs r3, r3, #2
2231:
224 pld [r1, r2]
225 eor r8, r4, r9
226 uhadd8 r4, r4, r9
227 eor r12, r5, r10
228 ldrd_reg r6, r7, r0, r2
229 uhadd8 r5, r5, r10
230 and r8, r8, lr
231 ldr r10, [r1, #4]
232 and r12, r12, lr
233 uadd8 r4, r4, r8
234 ldr_post r9, r1, r2
235 eor r8, r6, r9
236 uadd8 r5, r5, r12
237 pld [r1, r2, lsl #1]
238 eor r12, r7, r10
239 uhadd8 r6, r6, r9
240 strd_post r4, r5, r0, r2
241 uhadd8 r7, r7, r10
242 beq 2f
243 and r8, r8, lr
244 ldrd_reg r4, r5, r0, r2
245 uadd8 r6, r6, r8
246 ldr r10, [r1, #4]
247 and r12, r12, lr
248 subs r3, r3, #2
249 uadd8 r7, r7, r12
250 ldr_post r9, r1, r2
251 strd_post r6, r7, r0, r2
252 b 1b
2532:
254 and r8, r8, lr
255 and r12, r12, lr
256 uadd8 r6, r6, r8
257 uadd8 r7, r7, r12
258 strd_post r6, r7, r0, r2
259
260 pop {r4-r10, pc}
261endfunc