Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;***************************************************************************** |
2 | ;* x86-optimized functions for yadif filter | |
3 | ;* | |
4 | ;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at> | |
5 | ;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com> | |
6 | ;* | |
7 | ;* This file is part of FFmpeg. | |
8 | ;* | |
9 | ;* FFmpeg is free software; you can redistribute it and/or | |
10 | ;* modify it under the terms of the GNU Lesser General Public | |
11 | ;* License as published by the Free Software Foundation; either | |
12 | ;* version 2.1 of the License, or (at your option) any later version. | |
13 | ;* | |
14 | ;* FFmpeg is distributed in the hope that it will be useful, | |
15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;* Lesser General Public License for more details. | |
18 | ;* | |
19 | ;* You should have received a copy of the GNU Lesser General Public | |
20 | ;* License along with FFmpeg; if not, write to the Free Software | |
21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | ;****************************************************************************** | |
23 | ||
24 | %include "libavutil/x86/x86util.asm" | |
25 | ||
26 | SECTION_RODATA | |
27 | ||
28 | pb_1: times 16 db 1 | |
29 | pw_1: times 8 dw 1 | |
30 | ||
31 | SECTION .text | |
32 | ||
33 | %macro CHECK 2 | |
34 | movu m2, [curq+t1+%1] | |
35 | movu m3, [curq+t0+%2] | |
36 | mova m4, m2 | |
37 | mova m5, m2 | |
38 | pxor m4, m3 | |
39 | pavgb m5, m3 | |
40 | pand m4, [pb_1] | |
41 | psubusb m5, m4 | |
42 | RSHIFT m5, 1 | |
43 | punpcklbw m5, m7 | |
44 | mova m4, m2 | |
45 | psubusb m2, m3 | |
46 | psubusb m3, m4 | |
47 | pmaxub m2, m3 | |
48 | mova m3, m2 | |
49 | mova m4, m2 | |
50 | RSHIFT m3, 1 | |
51 | RSHIFT m4, 2 | |
52 | punpcklbw m2, m7 | |
53 | punpcklbw m3, m7 | |
54 | punpcklbw m4, m7 | |
55 | paddw m2, m3 | |
56 | paddw m2, m4 | |
57 | %endmacro | |
58 | ||
59 | %macro CHECK1 0 | |
60 | mova m3, m0 | |
61 | pcmpgtw m3, m2 | |
62 | pminsw m0, m2 | |
63 | mova m6, m3 | |
64 | pand m5, m3 | |
65 | pandn m3, m1 | |
66 | por m3, m5 | |
67 | mova m1, m3 | |
68 | %endmacro | |
69 | ||
70 | %macro CHECK2 0 | |
71 | paddw m6, [pw_1] | |
72 | psllw m6, 14 | |
73 | paddsw m2, m6 | |
74 | mova m3, m0 | |
75 | pcmpgtw m3, m2 | |
76 | pminsw m0, m2 | |
77 | pand m5, m3 | |
78 | pandn m3, m1 | |
79 | por m3, m5 | |
80 | mova m1, m3 | |
81 | %endmacro | |
82 | ||
83 | %macro LOAD 2 | |
84 | movh %1, %2 | |
85 | punpcklbw %1, m7 | |
86 | %endmacro | |
87 | ||
88 | %macro FILTER 3 | |
89 | .loop%1: | |
90 | pxor m7, m7 | |
91 | LOAD m0, [curq+t1] | |
92 | LOAD m1, [curq+t0] | |
93 | LOAD m2, [%2] | |
94 | LOAD m3, [%3] | |
95 | mova m4, m3 | |
96 | paddw m3, m2 | |
97 | psraw m3, 1 | |
98 | mova [rsp+ 0], m0 | |
99 | mova [rsp+16], m3 | |
100 | mova [rsp+32], m1 | |
101 | psubw m2, m4 | |
102 | ABS1 m2, m4 | |
103 | LOAD m3, [prevq+t1] | |
104 | LOAD m4, [prevq+t0] | |
105 | psubw m3, m0 | |
106 | psubw m4, m1 | |
107 | ABS1 m3, m5 | |
108 | ABS1 m4, m5 | |
109 | paddw m3, m4 | |
110 | psrlw m2, 1 | |
111 | psrlw m3, 1 | |
112 | pmaxsw m2, m3 | |
113 | LOAD m3, [nextq+t1] | |
114 | LOAD m4, [nextq+t0] | |
115 | psubw m3, m0 | |
116 | psubw m4, m1 | |
117 | ABS1 m3, m5 | |
118 | ABS1 m4, m5 | |
119 | paddw m3, m4 | |
120 | psrlw m3, 1 | |
121 | pmaxsw m2, m3 | |
122 | mova [rsp+48], m2 | |
123 | ||
124 | paddw m1, m0 | |
125 | paddw m0, m0 | |
126 | psubw m0, m1 | |
127 | psrlw m1, 1 | |
128 | ABS1 m0, m2 | |
129 | ||
130 | movu m2, [curq+t1-1] | |
131 | movu m3, [curq+t0-1] | |
132 | mova m4, m2 | |
133 | psubusb m2, m3 | |
134 | psubusb m3, m4 | |
135 | pmaxub m2, m3 | |
136 | %if mmsize == 16 | |
137 | mova m3, m2 | |
138 | psrldq m3, 2 | |
139 | %else | |
140 | pshufw m3, m2, q0021 | |
141 | %endif | |
142 | punpcklbw m2, m7 | |
143 | punpcklbw m3, m7 | |
144 | paddw m0, m2 | |
145 | paddw m0, m3 | |
146 | psubw m0, [pw_1] | |
147 | ||
148 | CHECK -2, 0 | |
149 | CHECK1 | |
150 | CHECK -3, 1 | |
151 | CHECK2 | |
152 | CHECK 0, -2 | |
153 | CHECK1 | |
154 | CHECK 1, -3 | |
155 | CHECK2 | |
156 | ||
157 | mova m6, [rsp+48] | |
158 | cmp DWORD r8m, 2 | |
159 | jge .end%1 | |
160 | LOAD m2, [%2+t1*2] | |
161 | LOAD m4, [%3+t1*2] | |
162 | LOAD m3, [%2+t0*2] | |
163 | LOAD m5, [%3+t0*2] | |
164 | paddw m2, m4 | |
165 | paddw m3, m5 | |
166 | psrlw m2, 1 | |
167 | psrlw m3, 1 | |
168 | mova m4, [rsp+ 0] | |
169 | mova m5, [rsp+16] | |
170 | mova m7, [rsp+32] | |
171 | psubw m2, m4 | |
172 | psubw m3, m7 | |
173 | mova m0, m5 | |
174 | psubw m5, m4 | |
175 | psubw m0, m7 | |
176 | mova m4, m2 | |
177 | pminsw m2, m3 | |
178 | pmaxsw m3, m4 | |
179 | pmaxsw m2, m5 | |
180 | pminsw m3, m5 | |
181 | pmaxsw m2, m0 | |
182 | pminsw m3, m0 | |
183 | pxor m4, m4 | |
184 | pmaxsw m6, m3 | |
185 | psubw m4, m2 | |
186 | pmaxsw m6, m4 | |
187 | ||
188 | .end%1: | |
189 | mova m2, [rsp+16] | |
190 | mova m3, m2 | |
191 | psubw m2, m6 | |
192 | paddw m3, m6 | |
193 | pmaxsw m1, m2 | |
194 | pminsw m1, m3 | |
195 | packuswb m1, m1 | |
196 | ||
197 | movh [dstq], m1 | |
198 | add dstq, mmsize/2 | |
199 | add prevq, mmsize/2 | |
200 | add curq, mmsize/2 | |
201 | add nextq, mmsize/2 | |
202 | sub DWORD r4m, mmsize/2 | |
203 | jg .loop%1 | |
204 | %endmacro | |
205 | ||
206 | %macro YADIF 0 | |
207 | %if ARCH_X86_32 | |
208 | cglobal yadif_filter_line, 4, 6, 8, 80, dst, prev, cur, next, w, prefs, \ | |
209 | mrefs, parity, mode | |
210 | %else | |
211 | cglobal yadif_filter_line, 4, 7, 8, 80, dst, prev, cur, next, w, prefs, \ | |
212 | mrefs, parity, mode | |
213 | %endif | |
214 | %if ARCH_X86_32 | |
215 | mov r4, r5mp | |
216 | mov r5, r6mp | |
217 | DECLARE_REG_TMP 4,5 | |
218 | %else | |
219 | movsxd r5, DWORD r5m | |
220 | movsxd r6, DWORD r6m | |
221 | DECLARE_REG_TMP 5,6 | |
222 | %endif | |
223 | ||
224 | cmp DWORD paritym, 0 | |
225 | je .parity0 | |
226 | FILTER 1, prevq, curq | |
227 | jmp .ret | |
228 | ||
229 | .parity0: | |
230 | FILTER 0, curq, nextq | |
231 | ||
232 | .ret: | |
233 | RET | |
234 | %endmacro | |
235 | ||
236 | INIT_XMM ssse3 | |
237 | YADIF | |
238 | INIT_XMM sse2 | |
239 | YADIF | |
240 | %if ARCH_X86_32 | |
241 | INIT_MMX mmxext | |
242 | YADIF | |
243 | %endif |