Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/arm/asm.S" | |
22 | ||
23 | .macro vp6_edge_filter | |
24 | vdup.16 q3, r2 @ t | |
25 | vmov.i16 q13, #1 | |
26 | vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s] | |
27 | vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s] | |
28 | vsubl.u8 q14, d21, d19 | |
29 | vsubl.u8 q15, d17, d23 | |
30 | vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s]) | |
31 | vadd.i16 d29, d28, d28 | |
32 | vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s] | |
33 | vadd.i16 d28, d28, d30 | |
34 | vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s] | |
35 | vadd.i16 d28, d28, d29 | |
36 | vrshr.s16 q0, q0, #3 @ v | |
37 | vrshr.s16 d28, d28, #3 | |
38 | vsub.i16 q8, q3, q13 @ t-1 | |
39 | vabs.s16 q1, q0 @ V | |
40 | vshr.s16 q2, q0, #15 @ s | |
41 | vabs.s16 d30, d28 | |
42 | vshr.s16 d29, d28, #15 | |
43 | vsub.i16 q12, q1, q3 @ V-t | |
44 | vsub.i16 d31, d30, d6 | |
45 | vsub.i16 q12, q12, q13 @ V-t-1 | |
46 | vsub.i16 d31, d31, d26 | |
47 | vcge.u16 q12, q12, q8 @ V-t-1 >= t-1 | |
48 | vcge.u16 d31, d31, d16 | |
49 | vadd.i16 q13, q3, q3 @ 2*t | |
50 | vadd.i16 d16, d6, d6 | |
51 | vsub.i16 q13, q13, q1 @ 2*t - V | |
52 | vsub.i16 d16, d16, d30 | |
53 | vadd.i16 q13, q13, q2 @ += s | |
54 | vadd.i16 d16, d16, d29 | |
55 | veor q13, q13, q2 @ ^= s | |
56 | veor d16, d16, d29 | |
57 | vbif q0, q13, q12 | |
58 | vbif d28, d16, d31 | |
59 | vmovl.u8 q1, d20 | |
60 | vmovl.u8 q15, d21 | |
61 | vaddw.u8 q2, q0, d18 | |
62 | vaddw.u8 q3, q14, d19 | |
63 | vsub.i16 q1, q1, q0 | |
64 | vsub.i16 d30, d30, d28 | |
65 | vqmovun.s16 d18, q2 | |
66 | vqmovun.s16 d19, q3 | |
67 | vqmovun.s16 d20, q1 | |
68 | vqmovun.s16 d21, q15 | |
69 | .endm | |
70 | ||
71 | function ff_vp6_edge_filter_ver_neon, export=1 | |
72 | sub r0, r0, r1, lsl #1 | |
73 | vld1.8 {q8}, [r0], r1 @ p[-2*s] | |
74 | vld1.8 {q9}, [r0], r1 @ p[-s] | |
75 | vld1.8 {q10}, [r0], r1 @ p[0] | |
76 | vld1.8 {q11}, [r0] @ p[s] | |
77 | vp6_edge_filter | |
78 | sub r0, r0, r1, lsl #1 | |
79 | sub r1, r1, #8 | |
80 | vst1.8 {d18}, [r0]! | |
81 | vst1.32 {d19[0]}, [r0], r1 | |
82 | vst1.8 {d20}, [r0]! | |
83 | vst1.32 {d21[0]}, [r0] | |
84 | bx lr | |
85 | endfunc | |
86 | ||
87 | function ff_vp6_edge_filter_hor_neon, export=1 | |
88 | sub r3, r0, #1 | |
89 | sub r0, r0, #2 | |
90 | vld1.32 {d16[0]}, [r0], r1 | |
91 | vld1.32 {d18[0]}, [r0], r1 | |
92 | vld1.32 {d20[0]}, [r0], r1 | |
93 | vld1.32 {d22[0]}, [r0], r1 | |
94 | vld1.32 {d16[1]}, [r0], r1 | |
95 | vld1.32 {d18[1]}, [r0], r1 | |
96 | vld1.32 {d20[1]}, [r0], r1 | |
97 | vld1.32 {d22[1]}, [r0], r1 | |
98 | vld1.32 {d17[0]}, [r0], r1 | |
99 | vld1.32 {d19[0]}, [r0], r1 | |
100 | vld1.32 {d21[0]}, [r0], r1 | |
101 | vld1.32 {d23[0]}, [r0], r1 | |
102 | vtrn.8 q8, q9 | |
103 | vtrn.8 q10, q11 | |
104 | vtrn.16 q8, q10 | |
105 | vtrn.16 q9, q11 | |
106 | vp6_edge_filter | |
107 | vtrn.8 q9, q10 | |
108 | vst1.16 {d18[0]}, [r3], r1 | |
109 | vst1.16 {d20[0]}, [r3], r1 | |
110 | vst1.16 {d18[1]}, [r3], r1 | |
111 | vst1.16 {d20[1]}, [r3], r1 | |
112 | vst1.16 {d18[2]}, [r3], r1 | |
113 | vst1.16 {d20[2]}, [r3], r1 | |
114 | vst1.16 {d18[3]}, [r3], r1 | |
115 | vst1.16 {d20[3]}, [r3], r1 | |
116 | vst1.16 {d19[0]}, [r3], r1 | |
117 | vst1.16 {d21[0]}, [r3], r1 | |
118 | vst1.16 {d19[1]}, [r3], r1 | |
119 | vst1.16 {d21[1]}, [r3], r1 | |
120 | bx lr | |
121 | endfunc |