Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / aarch64 / h264qpel_neon.S
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3 * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/aarch64/asm.S"
23#include "neon.S"
24
25 /* H.264 qpel MC */
26
27.macro lowpass_const r
28 movz \r, #20, lsl #16
29 movk \r, #5
30 mov v6.S[0], \r
31.endm
32
33//trashes v0-v5
34.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
35 ext v2.8B, \r0\().8B, \r1\().8B, #2
36 ext v3.8B, \r0\().8B, \r1\().8B, #3
37 uaddl v2.8H, v2.8B, v3.8B
38 ext v4.8B, \r0\().8B, \r1\().8B, #1
39 ext v5.8B, \r0\().8B, \r1\().8B, #4
40 uaddl v4.8H, v4.8B, v5.8B
41 ext v1.8B, \r0\().8B, \r1\().8B, #5
42 uaddl \d0\().8H, \r0\().8B, v1.8B
43 ext v0.8B, \r2\().8B, \r3\().8B, #2
44 mla \d0\().8H, v2.8H, v6.H[1]
45 ext v1.8B, \r2\().8B, \r3\().8B, #3
46 uaddl v0.8H, v0.8B, v1.8B
47 ext v1.8B, \r2\().8B, \r3\().8B, #1
48 mls \d0\().8H, v4.8H, v6.H[0]
49 ext v3.8B, \r2\().8B, \r3\().8B, #4
50 uaddl v1.8H, v1.8B, v3.8B
51 ext v2.8B, \r2\().8B, \r3\().8B, #5
52 uaddl \d1\().8H, \r2\().8B, v2.8B
53 mla \d1\().8H, v0.8H, v6.H[1]
54 mls \d1\().8H, v1.8H, v6.H[0]
55 .if \narrow
56 sqrshrun \d0\().8B, \d0\().8H, #5
57 sqrshrun \d1\().8B, \d1\().8H, #5
58 .endif
59.endm
60
61//trashes v0-v5, v7, v30-v31
62.macro lowpass_8H r0, r1
63 ext v0.16B, \r0\().16B, \r0\().16B, #2
64 ext v1.16B, \r0\().16B, \r0\().16B, #3
65 uaddl v0.8H, v0.8B, v1.8B
66 ext v2.16B, \r0\().16B, \r0\().16B, #1
67 ext v3.16B, \r0\().16B, \r0\().16B, #4
68 uaddl v2.8H, v2.8B, v3.8B
69 ext v30.16B, \r0\().16B, \r0\().16B, #5
70 uaddl \r0\().8H, \r0\().8B, v30.8B
71 ext v4.16B, \r1\().16B, \r1\().16B, #2
72 mla \r0\().8H, v0.8H, v6.H[1]
73 ext v5.16B, \r1\().16B, \r1\().16B, #3
74 uaddl v4.8H, v4.8B, v5.8B
75 ext v7.16B, \r1\().16B, \r1\().16B, #1
76 mls \r0\().8H, v2.8H, v6.H[0]
77 ext v0.16B, \r1\().16B, \r1\().16B, #4
78 uaddl v7.8H, v7.8B, v0.8B
79 ext v31.16B, \r1\().16B, \r1\().16B, #5
80 uaddl \r1\().8H, \r1\().8B, v31.8B
81 mla \r1\().8H, v4.8H, v6.H[1]
82 mls \r1\().8H, v7.8H, v6.H[0]
83.endm
84
85// trashes v2-v5, v30
86.macro lowpass_8_1 r0, r1, d0, narrow=1
87 ext v2.8B, \r0\().8B, \r1\().8B, #2
88 ext v3.8B, \r0\().8B, \r1\().8B, #3
89 uaddl v2.8H, v2.8B, v3.8B
90 ext v4.8B, \r0\().8B, \r1\().8B, #1
91 ext v5.8B, \r0\().8B, \r1\().8B, #4
92 uaddl v4.8H, v4.8B, v5.8B
93 ext v30.8B, \r0\().8B, \r1\().8B, #5
94 uaddl \d0\().8H, \r0\().8B, v30.8B
95 mla \d0\().8H, v2.8H, v6.H[1]
96 mls \d0\().8H, v4.8H, v6.H[0]
97 .if \narrow
98 sqrshrun \d0\().8B, \d0\().8H, #5
99 .endif
100.endm
101
102// trashed v0-v7
103.macro lowpass_8.16 r0, r1, r2
104 ext v1.16B, \r0\().16B, \r1\().16B, #4
105 ext v0.16B, \r0\().16B, \r1\().16B, #6
106 saddl v5.4S, v1.4H, v0.4H
107 ext v2.16B, \r0\().16B, \r1\().16B, #2
108 saddl2 v1.4S, v1.8H, v0.8H
109 ext v3.16B, \r0\().16B, \r1\().16B, #8
110 saddl v6.4S, v2.4H, v3.4H
111 ext \r1\().16B, \r0\().16B, \r1\().16B, #10
112 saddl2 v2.4S, v2.8H, v3.8H
113 saddl v0.4S, \r0\().4H, \r1\().4H
114 saddl2 v4.4S, \r0\().8H, \r1\().8H
115
116 shl v3.4S, v5.4S, #4
117 shl v5.4S, v5.4S, #2
118 shl v7.4S, v6.4S, #2
119 add v5.4S, v5.4S, v3.4S
120 add v6.4S, v6.4S, v7.4S
121
122 shl v3.4S, v1.4S, #4
123 shl v1.4S, v1.4S, #2
124 shl v7.4S, v2.4S, #2
125 add v1.4S, v1.4S, v3.4S
126 add v2.4S, v2.4S, v7.4S
127
128 add v5.4S, v5.4S, v0.4S
129 sub v5.4S, v5.4S, v6.4S
130
131 add v1.4S, v1.4S, v4.4S
132 sub v1.4S, v1.4S, v2.4S
133
134 rshrn v5.4H, v5.4S, #10
135 rshrn2 v5.8H, v1.4S, #10
136
137 sqxtun \r2\().8B, v5.8H
138.endm
139
140function put_h264_qpel16_h_lowpass_neon_packed
141 mov x4, x30
142 mov x12, #16
143 mov x3, #8
144 bl put_h264_qpel8_h_lowpass_neon
145 sub x1, x1, x2, lsl #4
146 add x1, x1, #8
147 mov x12, #16
148 mov x30, x4
149 b put_h264_qpel8_h_lowpass_neon
150endfunc
151
152.macro h264_qpel_h_lowpass type
153function \type\()_h264_qpel16_h_lowpass_neon
154 mov x13, x30
155 mov x12, #16
156 bl \type\()_h264_qpel8_h_lowpass_neon
157 sub x0, x0, x3, lsl #4
158 sub x1, x1, x2, lsl #4
159 add x0, x0, #8
160 add x1, x1, #8
161 mov x12, #16
162 mov x30, x13
163endfunc
164
165function \type\()_h264_qpel8_h_lowpass_neon
1661: ld1 {v28.8B, v29.8B}, [x1], x2
167 ld1 {v16.8B, v17.8B}, [x1], x2
168 subs x12, x12, #2
169 lowpass_8 v28, v29, v16, v17, v28, v16
170 .ifc \type,avg
171 ld1 {v2.8B}, [x0], x3
172 urhadd v28.8B, v28.8B, v2.8B
173 ld1 {v3.8B}, [x0]
174 urhadd v16.8B, v16.8B, v3.8B
175 sub x0, x0, x3
176 .endif
177 st1 {v28.8B}, [x0], x3
178 st1 {v16.8B}, [x0], x3
179 b.ne 1b
180 ret
181endfunc
182.endm
183
184 h264_qpel_h_lowpass put
185 h264_qpel_h_lowpass avg
186
187.macro h264_qpel_h_lowpass_l2 type
188function \type\()_h264_qpel16_h_lowpass_l2_neon
189 mov x13, x30
190 mov x12, #16
191 bl \type\()_h264_qpel8_h_lowpass_l2_neon
192 sub x0, x0, x2, lsl #4
193 sub x1, x1, x2, lsl #4
194 sub x3, x3, x2, lsl #4
195 add x0, x0, #8
196 add x1, x1, #8
197 add x3, x3, #8
198 mov x12, #16
199 mov x30, x13
200endfunc
201
202function \type\()_h264_qpel8_h_lowpass_l2_neon
2031: ld1 {v26.8B, v27.8B}, [x1], x2
204 ld1 {v16.8B, v17.8B}, [x1], x2
205 ld1 {v28.8B}, [x3], x2
206 ld1 {v29.8B}, [x3], x2
207 subs x12, x12, #2
208 lowpass_8 v26, v27, v16, v17, v26, v27
209 urhadd v26.8B, v26.8B, v28.8B
210 urhadd v27.8B, v27.8B, v29.8B
211 .ifc \type,avg
212 ld1 {v2.8B}, [x0], x2
213 urhadd v26.8B, v26.8B, v2.8B
214 ld1 {v3.8B}, [x0]
215 urhadd v27.8B, v27.8B, v3.8B
216 sub x0, x0, x2
217 .endif
218 st1 {v26.8B}, [x0], x2
219 st1 {v27.8B}, [x0], x2
220 b.ne 1b
221 ret
222endfunc
223.endm
224
225 h264_qpel_h_lowpass_l2 put
226 h264_qpel_h_lowpass_l2 avg
227
228function put_h264_qpel16_v_lowpass_neon_packed
229 mov x4, x30
230 mov x2, #8
231 bl put_h264_qpel8_v_lowpass_neon
232 sub x1, x1, x3, lsl #2
233 bl put_h264_qpel8_v_lowpass_neon
234 sub x1, x1, x3, lsl #4
235 sub x1, x1, x3, lsl #2
236 add x1, x1, #8
237 bl put_h264_qpel8_v_lowpass_neon
238 sub x1, x1, x3, lsl #2
239 mov x30, x4
240 b put_h264_qpel8_v_lowpass_neon
241endfunc
242
243.macro h264_qpel_v_lowpass type
244function \type\()_h264_qpel16_v_lowpass_neon
245 mov x4, x30
246 bl \type\()_h264_qpel8_v_lowpass_neon
247 sub x1, x1, x3, lsl #2
248 bl \type\()_h264_qpel8_v_lowpass_neon
249 sub x0, x0, x2, lsl #4
250 add x0, x0, #8
251 sub x1, x1, x3, lsl #4
252 sub x1, x1, x3, lsl #2
253 add x1, x1, #8
254 bl \type\()_h264_qpel8_v_lowpass_neon
255 sub x1, x1, x3, lsl #2
256 mov x30, x4
257endfunc
258
259function \type\()_h264_qpel8_v_lowpass_neon
260 ld1 {v16.8B}, [x1], x3
261 ld1 {v18.8B}, [x1], x3
262 ld1 {v20.8B}, [x1], x3
263 ld1 {v22.8B}, [x1], x3
264 ld1 {v24.8B}, [x1], x3
265 ld1 {v26.8B}, [x1], x3
266 ld1 {v28.8B}, [x1], x3
267 ld1 {v30.8B}, [x1], x3
268 ld1 {v17.8B}, [x1], x3
269 ld1 {v19.8B}, [x1], x3
270 ld1 {v21.8B}, [x1], x3
271 ld1 {v23.8B}, [x1], x3
272 ld1 {v25.8B}, [x1]
273
274 transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
275 transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
276 lowpass_8 v16, v17, v18, v19, v16, v17
277 lowpass_8 v20, v21, v22, v23, v18, v19
278 lowpass_8 v24, v25, v26, v27, v20, v21
279 lowpass_8 v28, v29, v30, v31, v22, v23
280 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
281
282 .ifc \type,avg
283 ld1 {v24.8B}, [x0], x2
284 urhadd v16.8B, v16.8B, v24.8B
285 ld1 {v25.8B}, [x0], x2
286 urhadd v17.8B, v17.8B, v25.8B
287 ld1 {v26.8B}, [x0], x2
288 urhadd v18.8B, v18.8B, v26.8B
289 ld1 {v27.8B}, [x0], x2
290 urhadd v19.8B, v19.8B, v27.8B
291 ld1 {v28.8B}, [x0], x2
292 urhadd v20.8B, v20.8B, v28.8B
293 ld1 {v29.8B}, [x0], x2
294 urhadd v21.8B, v21.8B, v29.8B
295 ld1 {v30.8B}, [x0], x2
296 urhadd v22.8B, v22.8B, v30.8B
297 ld1 {v31.8B}, [x0], x2
298 urhadd v23.8B, v23.8B, v31.8B
299 sub x0, x0, x2, lsl #3
300 .endif
301
302 st1 {v16.8B}, [x0], x2
303 st1 {v17.8B}, [x0], x2
304 st1 {v18.8B}, [x0], x2
305 st1 {v19.8B}, [x0], x2
306 st1 {v20.8B}, [x0], x2
307 st1 {v21.8B}, [x0], x2
308 st1 {v22.8B}, [x0], x2
309 st1 {v23.8B}, [x0], x2
310
311 ret
312endfunc
313.endm
314
315 h264_qpel_v_lowpass put
316 h264_qpel_v_lowpass avg
317
318.macro h264_qpel_v_lowpass_l2 type
319function \type\()_h264_qpel16_v_lowpass_l2_neon
320 mov x4, x30
321 bl \type\()_h264_qpel8_v_lowpass_l2_neon
322 sub x1, x1, x3, lsl #2
323 bl \type\()_h264_qpel8_v_lowpass_l2_neon
324 sub x0, x0, x3, lsl #4
325 sub x12, x12, x2, lsl #4
326 add x0, x0, #8
327 add x12, x12, #8
328 sub x1, x1, x3, lsl #4
329 sub x1, x1, x3, lsl #2
330 add x1, x1, #8
331 bl \type\()_h264_qpel8_v_lowpass_l2_neon
332 sub x1, x1, x3, lsl #2
333 mov x30, x4
334endfunc
335
336function \type\()_h264_qpel8_v_lowpass_l2_neon
337 ld1 {v16.8B}, [x1], x3
338 ld1 {v18.8B}, [x1], x3
339 ld1 {v20.8B}, [x1], x3
340 ld1 {v22.8B}, [x1], x3
341 ld1 {v24.8B}, [x1], x3
342 ld1 {v26.8B}, [x1], x3
343 ld1 {v28.8B}, [x1], x3
344 ld1 {v30.8B}, [x1], x3
345 ld1 {v17.8B}, [x1], x3
346 ld1 {v19.8B}, [x1], x3
347 ld1 {v21.8B}, [x1], x3
348 ld1 {v23.8B}, [x1], x3
349 ld1 {v25.8B}, [x1]
350
351 transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
352 transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
353 lowpass_8 v16, v17, v18, v19, v16, v17
354 lowpass_8 v20, v21, v22, v23, v18, v19
355 lowpass_8 v24, v25, v26, v27, v20, v21
356 lowpass_8 v28, v29, v30, v31, v22, v23
357 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
358
359 ld1 {v24.8B}, [x12], x2
360 ld1 {v25.8B}, [x12], x2
361 ld1 {v26.8B}, [x12], x2
362 ld1 {v27.8B}, [x12], x2
363 ld1 {v28.8B}, [x12], x2
364 urhadd v16.8B, v24.8B, v16.8B
365 urhadd v17.8B, v25.8B, v17.8B
366 ld1 {v29.8B}, [x12], x2
367 urhadd v18.8B, v26.8B, v18.8B
368 urhadd v19.8B, v27.8B, v19.8B
369 ld1 {v30.8B}, [x12], x2
370 urhadd v20.8B, v28.8B, v20.8B
371 urhadd v21.8B, v29.8B, v21.8B
372 ld1 {v31.8B}, [x12], x2
373 urhadd v22.8B, v30.8B, v22.8B
374 urhadd v23.8B, v31.8B, v23.8B
375
376 .ifc \type,avg
377 ld1 {v24.8B}, [x0], x3
378 urhadd v16.8B, v16.8B, v24.8B
379 ld1 {v25.8B}, [x0], x3
380 urhadd v17.8B, v17.8B, v25.8B
381 ld1 {v26.8B}, [x0], x3
382 urhadd v18.8B, v18.8B, v26.8B
383 ld1 {v27.8B}, [x0], x3
384 urhadd v19.8B, v19.8B, v27.8B
385 ld1 {v28.8B}, [x0], x3
386 urhadd v20.8B, v20.8B, v28.8B
387 ld1 {v29.8B}, [x0], x3
388 urhadd v21.8B, v21.8B, v29.8B
389 ld1 {v30.8B}, [x0], x3
390 urhadd v22.8B, v22.8B, v30.8B
391 ld1 {v31.8B}, [x0], x3
392 urhadd v23.8B, v23.8B, v31.8B
393 sub x0, x0, x3, lsl #3
394 .endif
395
396 st1 {v16.8B}, [x0], x3
397 st1 {v17.8B}, [x0], x3
398 st1 {v18.8B}, [x0], x3
399 st1 {v19.8B}, [x0], x3
400 st1 {v20.8B}, [x0], x3
401 st1 {v21.8B}, [x0], x3
402 st1 {v22.8B}, [x0], x3
403 st1 {v23.8B}, [x0], x3
404
405 ret
406endfunc
407.endm
408
409 h264_qpel_v_lowpass_l2 put
410 h264_qpel_v_lowpass_l2 avg
411
412function put_h264_qpel8_hv_lowpass_neon_top
413 lowpass_const w12
414 ld1 {v16.8H}, [x1], x3
415 ld1 {v17.8H}, [x1], x3
416 ld1 {v18.8H}, [x1], x3
417 ld1 {v19.8H}, [x1], x3
418 ld1 {v20.8H}, [x1], x3
419 ld1 {v21.8H}, [x1], x3
420 ld1 {v22.8H}, [x1], x3
421 ld1 {v23.8H}, [x1], x3
422 ld1 {v24.8H}, [x1], x3
423 ld1 {v25.8H}, [x1], x3
424 ld1 {v26.8H}, [x1], x3
425 ld1 {v27.8H}, [x1], x3
426 ld1 {v28.8H}, [x1]
427 lowpass_8H v16, v17
428 lowpass_8H v18, v19
429 lowpass_8H v20, v21
430 lowpass_8H v22, v23
431 lowpass_8H v24, v25
432 lowpass_8H v26, v27
433 lowpass_8H v28, v29
434
435 transpose_8x8H v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
436 transpose_8x8H v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
437
438 lowpass_8.16 v16, v24, v16
439 lowpass_8.16 v17, v25, v17
440
441 lowpass_8.16 v18, v26, v18
442 lowpass_8.16 v19, v27, v19
443
444 lowpass_8.16 v20, v28, v20
445 lowpass_8.16 v21, v29, v21
446
447 lowpass_8.16 v22, v30, v22
448 lowpass_8.16 v23, v31, v23
449
450 transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
451
452 ret
453endfunc
454
455.macro h264_qpel8_hv_lowpass type
456function \type\()_h264_qpel8_hv_lowpass_neon
457 mov x10, x30
458 bl put_h264_qpel8_hv_lowpass_neon_top
459 .ifc \type,avg
460 ld1 {v0.8B}, [x0], x2
461 urhadd v16.8B, v16.8B, v0.8B
462 ld1 {v1.8B}, [x0], x2
463 urhadd v17.8B, v17.8B, v1.8B
464 ld1 {v2.8B}, [x0], x2
465 urhadd v18.8B, v18.8B, v2.8B
466 ld1 {v3.8B}, [x0], x2
467 urhadd v19.8B, v19.8B, v3.8B
468 ld1 {v4.8B}, [x0], x2
469 urhadd v20.8B, v20.8B, v4.8B
470 ld1 {v5.8B}, [x0], x2
471 urhadd v21.8B, v21.8B, v5.8B
472 ld1 {v6.8B}, [x0], x2
473 urhadd v22.8B, v22.8B, v6.8B
474 ld1 {v7.8B}, [x0], x2
475 urhadd v23.8B, v23.8B, v7.8B
476 sub x0, x0, x2, lsl #3
477 .endif
478
479 st1 {v16.8B}, [x0], x2
480 st1 {v17.8B}, [x0], x2
481 st1 {v18.8B}, [x0], x2
482 st1 {v19.8B}, [x0], x2
483 st1 {v20.8B}, [x0], x2
484 st1 {v21.8B}, [x0], x2
485 st1 {v22.8B}, [x0], x2
486 st1 {v23.8B}, [x0], x2
487
488 ret x10
489endfunc
490.endm
491
492 h264_qpel8_hv_lowpass put
493 h264_qpel8_hv_lowpass avg
494
495.macro h264_qpel8_hv_lowpass_l2 type
496function \type\()_h264_qpel8_hv_lowpass_l2_neon
497 mov x10, x30
498 bl put_h264_qpel8_hv_lowpass_neon_top
499
500 ld1 {v0.8B, v1.8B}, [x2], #16
501 ld1 {v2.8B, v3.8B}, [x2], #16
502 urhadd v0.8B, v0.8B, v16.8B
503 urhadd v1.8B, v1.8B, v17.8B
504 ld1 {v4.8B, v5.8B}, [x2], #16
505 urhadd v2.8B, v2.8B, v18.8B
506 urhadd v3.8B, v3.8B, v19.8B
507 ld1 {v6.8B, v7.8B}, [x2], #16
508 urhadd v4.8B, v4.8B, v20.8B
509 urhadd v5.8B, v5.8B, v21.8B
510 urhadd v6.8B, v6.8B, v22.8B
511 urhadd v7.8B, v7.8B, v23.8B
512 .ifc \type,avg
513 ld1 {v16.8B}, [x0], x3
514 urhadd v0.8B, v0.8B, v16.8B
515 ld1 {v17.8B}, [x0], x3
516 urhadd v1.8B, v1.8B, v17.8B
517 ld1 {v18.8B}, [x0], x3
518 urhadd v2.8B, v2.8B, v18.8B
519 ld1 {v19.8B}, [x0], x3
520 urhadd v3.8B, v3.8B, v19.8B
521 ld1 {v20.8B}, [x0], x3
522 urhadd v4.8B, v4.8B, v20.8B
523 ld1 {v21.8B}, [x0], x3
524 urhadd v5.8B, v5.8B, v21.8B
525 ld1 {v22.8B}, [x0], x3
526 urhadd v6.8B, v6.8B, v22.8B
527 ld1 {v23.8B}, [x0], x3
528 urhadd v7.8B, v7.8B, v23.8B
529 sub x0, x0, x3, lsl #3
530 .endif
531 st1 {v0.8B}, [x0], x3
532 st1 {v1.8B}, [x0], x3
533 st1 {v2.8B}, [x0], x3
534 st1 {v3.8B}, [x0], x3
535 st1 {v4.8B}, [x0], x3
536 st1 {v5.8B}, [x0], x3
537 st1 {v6.8B}, [x0], x3
538 st1 {v7.8B}, [x0], x3
539
540 ret x10
541endfunc
542.endm
543
544 h264_qpel8_hv_lowpass_l2 put
545 h264_qpel8_hv_lowpass_l2 avg
546
547.macro h264_qpel16_hv type
548function \type\()_h264_qpel16_hv_lowpass_neon
549 mov x13, x30
550 bl \type\()_h264_qpel8_hv_lowpass_neon
551 sub x1, x1, x3, lsl #2
552 bl \type\()_h264_qpel8_hv_lowpass_neon
553 sub x1, x1, x3, lsl #4
554 sub x1, x1, x3, lsl #2
555 add x1, x1, #8
556 sub x0, x0, x2, lsl #4
557 add x0, x0, #8
558 bl \type\()_h264_qpel8_hv_lowpass_neon
559 sub x1, x1, x3, lsl #2
560 mov x30, x13
561 b \type\()_h264_qpel8_hv_lowpass_neon
562endfunc
563
564function \type\()_h264_qpel16_hv_lowpass_l2_neon
565 mov x13, x30
566 sub x2, x4, #256
567 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
568 sub x1, x1, x3, lsl #2
569 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
570 sub x1, x1, x3, lsl #4
571 sub x1, x1, x3, lsl #2
572 add x1, x1, #8
573 sub x0, x0, x3, lsl #4
574 add x0, x0, #8
575 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
576 sub x1, x1, x3, lsl #2
577 mov x30, x13
578 b \type\()_h264_qpel8_hv_lowpass_l2_neon
579endfunc
580.endm
581
582 h264_qpel16_hv put
583 h264_qpel16_hv avg
584
585.macro h264_qpel8 type
586function ff_\type\()_h264_qpel8_mc10_neon, export=1
587 lowpass_const w3
588 mov x3, x1
589 sub x1, x1, #2
590 mov x12, #8
591 b \type\()_h264_qpel8_h_lowpass_l2_neon
592endfunc
593
594function ff_\type\()_h264_qpel8_mc20_neon, export=1
595 lowpass_const w3
596 sub x1, x1, #2
597 mov x3, x2
598 mov x12, #8
599 b \type\()_h264_qpel8_h_lowpass_neon
600endfunc
601
602function ff_\type\()_h264_qpel8_mc30_neon, export=1
603 lowpass_const w3
604 add x3, x1, #1
605 sub x1, x1, #2
606 mov x12, #8
607 b \type\()_h264_qpel8_h_lowpass_l2_neon
608endfunc
609
610function ff_\type\()_h264_qpel8_mc01_neon, export=1
611 mov x14, x30
612 mov x12, x1
613\type\()_h264_qpel8_mc01:
614 lowpass_const w3
615 mov x3, x2
616 sub x1, x1, x2, lsl #1
617 bl \type\()_h264_qpel8_v_lowpass_l2_neon
618 ret x14
619endfunc
620
621function ff_\type\()_h264_qpel8_mc11_neon, export=1
622 mov x14, x30
623 mov x8, x0
624 mov x9, x1
625\type\()_h264_qpel8_mc11:
626 lowpass_const w3
627 mov x11, sp
628 sub sp, sp, #64
629 mov x0, sp
630 sub x1, x1, #2
631 mov x3, #8
632 mov x12, #8
633 bl put_h264_qpel8_h_lowpass_neon
634 mov x0, x8
635 mov x3, x2
636 mov x12, sp
637 sub x1, x9, x2, lsl #1
638 mov x2, #8
639 bl \type\()_h264_qpel8_v_lowpass_l2_neon
640 mov sp, x11
641 ret x14
642endfunc
643
644function ff_\type\()_h264_qpel8_mc21_neon, export=1
645 mov x14, x30
646 mov x8, x0
647 mov x9, x1
648\type\()_h264_qpel8_mc21:
649 lowpass_const w3
650 mov x11, sp
651 sub sp, sp, #(8*8+16*12)
652 sub x1, x1, #2
653 mov x3, #8
654 mov x0, sp
655 mov x12, #8
656 bl put_h264_qpel8_h_lowpass_neon
657 mov x4, x0
658 mov x0, x8
659 sub x1, x9, x2, lsl #1
660 sub x1, x1, #2
661 mov x3, x2
662 sub x2, x4, #64
663 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
664 mov sp, x11
665 ret x14
666endfunc
667
668function ff_\type\()_h264_qpel8_mc31_neon, export=1
669 add x1, x1, #1
670 mov x14, x30
671 mov x8, x0
672 mov x9, x1
673 sub x1, x1, #1
674 b \type\()_h264_qpel8_mc11
675endfunc
676
677function ff_\type\()_h264_qpel8_mc02_neon, export=1
678 mov x14, x30
679 lowpass_const w3
680 sub x1, x1, x2, lsl #1
681 mov x3, x2
682 bl \type\()_h264_qpel8_v_lowpass_neon
683 ret x14
684endfunc
685
686function ff_\type\()_h264_qpel8_mc12_neon, export=1
687 mov x14, x30
688 mov x8, x0
689 mov x9, x1
690\type\()_h264_qpel8_mc12:
691 lowpass_const w3
692 mov x11, sp
693 sub sp, sp, #(8*8+16*12)
694 sub x1, x1, x2, lsl #1
695 mov x3, x2
696 mov x2, #8
697 mov x0, sp
698 bl put_h264_qpel8_v_lowpass_neon
699 mov x4, x0
700 mov x0, x8
701 sub x1, x9, x3, lsl #1
702 sub x1, x1, #2
703 sub x2, x4, #64
704 bl \type\()_h264_qpel8_hv_lowpass_l2_neon
705 mov sp, x11
706 ret x14
707endfunc
708
709function ff_\type\()_h264_qpel8_mc22_neon, export=1
710 mov x14, x30
711 mov x11, sp
712 sub x1, x1, x2, lsl #1
713 sub x1, x1, #2
714 mov x3, x2
715 bl \type\()_h264_qpel8_hv_lowpass_neon
716 mov sp, x11
717 ret x14
718endfunc
719
720function ff_\type\()_h264_qpel8_mc32_neon, export=1
721 mov x14, x30
722 mov x8, x0
723 mov x9, x1
724 add x1, x1, #1
725 b \type\()_h264_qpel8_mc12
726endfunc
727
728function ff_\type\()_h264_qpel8_mc03_neon, export=1
729 mov x14, x30
730 add x12, x1, x2
731 b \type\()_h264_qpel8_mc01
732endfunc
733
734function ff_\type\()_h264_qpel8_mc13_neon, export=1
735 mov x14, x30
736 mov x8, x0
737 mov x9, x1
738 add x1, x1, x2
739 b \type\()_h264_qpel8_mc11
740endfunc
741
742function ff_\type\()_h264_qpel8_mc23_neon, export=1
743 mov x14, x30
744 mov x8, x0
745 mov x9, x1
746 add x1, x1, x2
747 b \type\()_h264_qpel8_mc21
748endfunc
749
750function ff_\type\()_h264_qpel8_mc33_neon, export=1
751 add x1, x1, #1
752 mov x14, x30
753 mov x8, x0
754 mov x9, x1
755 add x1, x1, x2
756 sub x1, x1, #1
757 b \type\()_h264_qpel8_mc11
758endfunc
759.endm
760
761 h264_qpel8 put
762 h264_qpel8 avg
763
764.macro h264_qpel16 type
765function ff_\type\()_h264_qpel16_mc10_neon, export=1
766 lowpass_const w3
767 mov x3, x1
768 sub x1, x1, #2
769 b \type\()_h264_qpel16_h_lowpass_l2_neon
770endfunc
771
772function ff_\type\()_h264_qpel16_mc20_neon, export=1
773 lowpass_const w3
774 sub x1, x1, #2
775 mov x3, x2
776 b \type\()_h264_qpel16_h_lowpass_neon
777endfunc
778
779function ff_\type\()_h264_qpel16_mc30_neon, export=1
780 lowpass_const w3
781 add x3, x1, #1
782 sub x1, x1, #2
783 b \type\()_h264_qpel16_h_lowpass_l2_neon
784endfunc
785
786function ff_\type\()_h264_qpel16_mc01_neon, export=1
787 mov x14, x30
788 mov x12, x1
789\type\()_h264_qpel16_mc01:
790 lowpass_const w3
791 mov x3, x2
792 sub x1, x1, x2, lsl #1
793 bl \type\()_h264_qpel16_v_lowpass_l2_neon
794 ret x14
795endfunc
796
797function ff_\type\()_h264_qpel16_mc11_neon, export=1
798 mov x14, x30
799 mov x8, x0
800 mov x9, x1
801\type\()_h264_qpel16_mc11:
802 lowpass_const w3
803 mov x11, sp
804 sub sp, sp, #256
805 mov x0, sp
806 sub x1, x1, #2
807 mov x3, #16
808 bl put_h264_qpel16_h_lowpass_neon
809 mov x0, x8
810 mov x3, x2
811 mov x12, sp
812 sub x1, x9, x2, lsl #1
813 mov x2, #16
814 bl \type\()_h264_qpel16_v_lowpass_l2_neon
815 mov sp, x11
816 ret x14
817endfunc
818
819function ff_\type\()_h264_qpel16_mc21_neon, export=1
820 mov x14, x30
821 mov x8, x0
822 mov x9, x1
823\type\()_h264_qpel16_mc21:
824 lowpass_const w3
825 mov x11, sp
826 sub sp, sp, #(16*16+16*12)
827 sub x1, x1, #2
828 mov x0, sp
829 bl put_h264_qpel16_h_lowpass_neon_packed
830 mov x4, x0
831 mov x0, x8
832 sub x1, x9, x2, lsl #1
833 sub x1, x1, #2
834 mov x3, x2
835 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
836 mov sp, x11
837 ret x14
838endfunc
839
840function ff_\type\()_h264_qpel16_mc31_neon, export=1
841 add x1, x1, #1
842 mov x14, x30
843 mov x8, x0
844 mov x9, x1
845 sub x1, x1, #1
846 b \type\()_h264_qpel16_mc11
847endfunc
848
849function ff_\type\()_h264_qpel16_mc02_neon, export=1
850 mov x14, x30
851 lowpass_const w3
852 sub x1, x1, x2, lsl #1
853 mov x3, x2
854 bl \type\()_h264_qpel16_v_lowpass_neon
855 ret x14
856endfunc
857
858function ff_\type\()_h264_qpel16_mc12_neon, export=1
859 mov x14, x30
860 mov x8, x0
861 mov x9, x1
862\type\()_h264_qpel16_mc12:
863 lowpass_const w3
864 mov x11, sp
865 sub sp, sp, #(16*16+16*12)
866 sub x1, x1, x2, lsl #1
867 mov x0, sp
868 mov x3, x2
869 bl put_h264_qpel16_v_lowpass_neon_packed
870 mov x4, x0
871 mov x0, x8
872 sub x1, x9, x3, lsl #1
873 sub x1, x1, #2
874 mov x2, x3
875 bl \type\()_h264_qpel16_hv_lowpass_l2_neon
876 mov sp, x11
877 ret x14
878endfunc
879
880function ff_\type\()_h264_qpel16_mc22_neon, export=1
881 mov x14, x30
882 lowpass_const w3
883 mov x11, sp
884 sub x1, x1, x2, lsl #1
885 sub x1, x1, #2
886 mov x3, x2
887 bl \type\()_h264_qpel16_hv_lowpass_neon
888 mov sp, x11 // restore stack
889 ret x14
890endfunc
891
892function ff_\type\()_h264_qpel16_mc32_neon, export=1
893 mov x14, x30
894 mov x8, x0
895 mov x9, x1
896 add x1, x1, #1
897 b \type\()_h264_qpel16_mc12
898endfunc
899
900function ff_\type\()_h264_qpel16_mc03_neon, export=1
901 mov x14, x30
902 add x12, x1, x2
903 b \type\()_h264_qpel16_mc01
904endfunc
905
906function ff_\type\()_h264_qpel16_mc13_neon, export=1
907 mov x14, x30
908 mov x8, x0
909 mov x9, x1
910 add x1, x1, x2
911 b \type\()_h264_qpel16_mc11
912endfunc
913
914function ff_\type\()_h264_qpel16_mc23_neon, export=1
915 mov x14, x30
916 mov x8, x0
917 mov x9, x1
918 add x1, x1, x2
919 b \type\()_h264_qpel16_mc21
920endfunc
921
922function ff_\type\()_h264_qpel16_mc33_neon, export=1
923 add x1, x1, #1
924 mov x14, x30
925 mov x8, x0
926 mov x9, x1
927 add x1, x1, x2
928 sub x1, x1, #1
929 b \type\()_h264_qpel16_mc11
930endfunc
931.endm
932
933 h264_qpel16 put
934 h264_qpel16 avg