Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / arm / me_cmp_armv6.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23function ff_pix_abs16_armv6, export=1
24 ldr r0, [sp]
25 push {r4-r9, lr}
26 mov r12, #0
27 mov lr, #0
28 ldm r1, {r4-r7}
29 ldr r8, [r2]
301:
31 ldr r9, [r2, #4]
32 pld [r1, r3]
33 usada8 r12, r4, r8, r12
34 ldr r8, [r2, #8]
35 pld [r2, r3]
36 usada8 lr, r5, r9, lr
37 ldr r9, [r2, #12]
38 usada8 r12, r6, r8, r12
39 subs r0, r0, #1
40 usada8 lr, r7, r9, lr
41 beq 2f
42 add r1, r1, r3
43 ldm r1, {r4-r7}
44 add r2, r2, r3
45 ldr r8, [r2]
46 b 1b
472:
48 add r0, r12, lr
49 pop {r4-r9, pc}
50endfunc
51
52function ff_pix_abs16_x2_armv6, export=1
53 ldr r12, [sp]
54 push {r4-r11, lr}
55 mov r0, #0
56 mov lr, #1
57 orr lr, lr, lr, lsl #8
58 orr lr, lr, lr, lsl #16
591:
60 ldr r8, [r2]
61 ldr r9, [r2, #4]
62 lsr r10, r8, #8
63 ldr r4, [r1]
64 lsr r6, r9, #8
65 orr r10, r10, r9, lsl #24
66 ldr r5, [r2, #8]
67 eor r11, r8, r10
68 uhadd8 r7, r8, r10
69 orr r6, r6, r5, lsl #24
70 and r11, r11, lr
71 uadd8 r7, r7, r11
72 ldr r8, [r1, #4]
73 usada8 r0, r4, r7, r0
74 eor r7, r9, r6
75 lsr r10, r5, #8
76 and r7, r7, lr
77 uhadd8 r4, r9, r6
78 ldr r6, [r2, #12]
79 uadd8 r4, r4, r7
80 pld [r1, r3]
81 orr r10, r10, r6, lsl #24
82 usada8 r0, r8, r4, r0
83 ldr r4, [r1, #8]
84 eor r11, r5, r10
85 ldrb r7, [r2, #16]
86 and r11, r11, lr
87 uhadd8 r8, r5, r10
88 ldr r5, [r1, #12]
89 uadd8 r8, r8, r11
90 pld [r2, r3]
91 lsr r10, r6, #8
92 usada8 r0, r4, r8, r0
93 orr r10, r10, r7, lsl #24
94 subs r12, r12, #1
95 eor r11, r6, r10
96 add r1, r1, r3
97 uhadd8 r9, r6, r10
98 and r11, r11, lr
99 uadd8 r9, r9, r11
100 add r2, r2, r3
101 usada8 r0, r5, r9, r0
102 bgt 1b
103
104 pop {r4-r11, pc}
105endfunc
106
107.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
108 ldr \n0, [r2]
109 eor \n1, \p0, \n0
110 uhadd8 \p0, \p0, \n0
111 and \n1, \n1, lr
112 ldr \n2, [r1]
113 uadd8 \p0, \p0, \n1
114 ldr \n1, [r2, #4]
115 usada8 r0, \p0, \n2, r0
116 pld [r1, r3]
117 eor \n3, \p1, \n1
118 uhadd8 \p1, \p1, \n1
119 and \n3, \n3, lr
120 ldr \p0, [r1, #4]
121 uadd8 \p1, \p1, \n3
122 ldr \n2, [r2, #8]
123 usada8 r0, \p1, \p0, r0
124 pld [r2, r3]
125 eor \p0, \p2, \n2
126 uhadd8 \p2, \p2, \n2
127 and \p0, \p0, lr
128 ldr \p1, [r1, #8]
129 uadd8 \p2, \p2, \p0
130 ldr \n3, [r2, #12]
131 usada8 r0, \p2, \p1, r0
132 eor \p1, \p3, \n3
133 uhadd8 \p3, \p3, \n3
134 and \p1, \p1, lr
135 ldr \p0, [r1, #12]
136 uadd8 \p3, \p3, \p1
137 add r1, r1, r3
138 usada8 r0, \p3, \p0, r0
139 add r2, r2, r3
140.endm
141
142function ff_pix_abs16_y2_armv6, export=1
143 pld [r1]
144 pld [r2]
145 ldr r12, [sp]
146 push {r4-r11, lr}
147 mov r0, #0
148 mov lr, #1
149 orr lr, lr, lr, lsl #8
150 orr lr, lr, lr, lsl #16
151 ldr r4, [r2]
152 ldr r5, [r2, #4]
153 ldr r6, [r2, #8]
154 ldr r7, [r2, #12]
155 add r2, r2, r3
1561:
157 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11
158 subs r12, r12, #2
159 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7
160 bgt 1b
161
162 pop {r4-r11, pc}
163endfunc
164
165function ff_pix_abs8_armv6, export=1
166 pld [r2, r3]
167 ldr r12, [sp]
168 push {r4-r9, lr}
169 mov r0, #0
170 mov lr, #0
171 ldrd_post r4, r5, r1, r3
1721:
173 subs r12, r12, #2
174 ldr r7, [r2, #4]
175 ldr_post r6, r2, r3
176 ldrd_post r8, r9, r1, r3
177 usada8 r0, r4, r6, r0
178 pld [r2, r3]
179 usada8 lr, r5, r7, lr
180 ldr r7, [r2, #4]
181 ldr_post r6, r2, r3
182 beq 2f
183 ldrd_post r4, r5, r1, r3
184 usada8 r0, r8, r6, r0
185 pld [r2, r3]
186 usada8 lr, r9, r7, lr
187 b 1b
1882:
189 usada8 r0, r8, r6, r0
190 usada8 lr, r9, r7, lr
191 add r0, r0, lr
192 pop {r4-r9, pc}
193endfunc
194
195function ff_sse16_armv6, export=1
196 ldr r12, [sp]
197 push {r4-r9, lr}
198 mov r0, #0
1991:
200 ldrd r4, r5, [r1]
201 ldr r8, [r2]
202 uxtb16 lr, r4
203 uxtb16 r4, r4, ror #8
204 uxtb16 r9, r8
205 uxtb16 r8, r8, ror #8
206 ldr r7, [r2, #4]
207 usub16 lr, lr, r9
208 usub16 r4, r4, r8
209 smlad r0, lr, lr, r0
210 uxtb16 r6, r5
211 uxtb16 lr, r5, ror #8
212 uxtb16 r8, r7
213 uxtb16 r9, r7, ror #8
214 smlad r0, r4, r4, r0
215 ldrd r4, r5, [r1, #8]
216 usub16 r6, r6, r8
217 usub16 r8, lr, r9
218 ldr r7, [r2, #8]
219 smlad r0, r6, r6, r0
220 uxtb16 lr, r4
221 uxtb16 r4, r4, ror #8
222 uxtb16 r9, r7
223 uxtb16 r7, r7, ror #8
224 smlad r0, r8, r8, r0
225 ldr r8, [r2, #12]
226 usub16 lr, lr, r9
227 usub16 r4, r4, r7
228 smlad r0, lr, lr, r0
229 uxtb16 r6, r5
230 uxtb16 r5, r5, ror #8
231 uxtb16 r9, r8
232 uxtb16 r8, r8, ror #8
233 smlad r0, r4, r4, r0
234 usub16 r6, r6, r9
235 usub16 r5, r5, r8
236 smlad r0, r6, r6, r0
237 add r1, r1, r3
238 add r2, r2, r3
239 subs r12, r12, #1
240 smlad r0, r5, r5, r0
241 bgt 1b
242
243 pop {r4-r9, pc}
244endfunc