Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavfilter / x86 / vf_yadif.asm
CommitLineData
2ba45a60
DM
1;*****************************************************************************
2;* x86-optimized functions for yadif filter
3;*
4;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
5;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
6;*
7;* This file is part of FFmpeg.
8;*
9;* FFmpeg is free software; you can redistribute it and/or
10;* modify it under the terms of the GNU Lesser General Public
11;* License as published by the Free Software Foundation; either
12;* version 2.1 of the License, or (at your option) any later version.
13;*
14;* FFmpeg is distributed in the hope that it will be useful,
15;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17;* Lesser General Public License for more details.
18;*
19;* You should have received a copy of the GNU Lesser General Public
20;* License along with FFmpeg; if not, write to the Free Software
21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22;******************************************************************************
23
24%include "libavutil/x86/x86util.asm"
25
26SECTION_RODATA
27
28pb_1: times 16 db 1
29pw_1: times 8 dw 1
30
31SECTION .text
32
33%macro CHECK 2
34 movu m2, [curq+t1+%1]
35 movu m3, [curq+t0+%2]
36 mova m4, m2
37 mova m5, m2
38 pxor m4, m3
39 pavgb m5, m3
40 pand m4, [pb_1]
41 psubusb m5, m4
42 RSHIFT m5, 1
43 punpcklbw m5, m7
44 mova m4, m2
45 psubusb m2, m3
46 psubusb m3, m4
47 pmaxub m2, m3
48 mova m3, m2
49 mova m4, m2
50 RSHIFT m3, 1
51 RSHIFT m4, 2
52 punpcklbw m2, m7
53 punpcklbw m3, m7
54 punpcklbw m4, m7
55 paddw m2, m3
56 paddw m2, m4
57%endmacro
58
59%macro CHECK1 0
60 mova m3, m0
61 pcmpgtw m3, m2
62 pminsw m0, m2
63 mova m6, m3
64 pand m5, m3
65 pandn m3, m1
66 por m3, m5
67 mova m1, m3
68%endmacro
69
70%macro CHECK2 0
71 paddw m6, [pw_1]
72 psllw m6, 14
73 paddsw m2, m6
74 mova m3, m0
75 pcmpgtw m3, m2
76 pminsw m0, m2
77 pand m5, m3
78 pandn m3, m1
79 por m3, m5
80 mova m1, m3
81%endmacro
82
83%macro LOAD 2
84 movh %1, %2
85 punpcklbw %1, m7
86%endmacro
87
88%macro FILTER 3
89.loop%1:
90 pxor m7, m7
91 LOAD m0, [curq+t1]
92 LOAD m1, [curq+t0]
93 LOAD m2, [%2]
94 LOAD m3, [%3]
95 mova m4, m3
96 paddw m3, m2
97 psraw m3, 1
98 mova [rsp+ 0], m0
99 mova [rsp+16], m3
100 mova [rsp+32], m1
101 psubw m2, m4
102 ABS1 m2, m4
103 LOAD m3, [prevq+t1]
104 LOAD m4, [prevq+t0]
105 psubw m3, m0
106 psubw m4, m1
107 ABS1 m3, m5
108 ABS1 m4, m5
109 paddw m3, m4
110 psrlw m2, 1
111 psrlw m3, 1
112 pmaxsw m2, m3
113 LOAD m3, [nextq+t1]
114 LOAD m4, [nextq+t0]
115 psubw m3, m0
116 psubw m4, m1
117 ABS1 m3, m5
118 ABS1 m4, m5
119 paddw m3, m4
120 psrlw m3, 1
121 pmaxsw m2, m3
122 mova [rsp+48], m2
123
124 paddw m1, m0
125 paddw m0, m0
126 psubw m0, m1
127 psrlw m1, 1
128 ABS1 m0, m2
129
130 movu m2, [curq+t1-1]
131 movu m3, [curq+t0-1]
132 mova m4, m2
133 psubusb m2, m3
134 psubusb m3, m4
135 pmaxub m2, m3
136%if mmsize == 16
137 mova m3, m2
138 psrldq m3, 2
139%else
140 pshufw m3, m2, q0021
141%endif
142 punpcklbw m2, m7
143 punpcklbw m3, m7
144 paddw m0, m2
145 paddw m0, m3
146 psubw m0, [pw_1]
147
148 CHECK -2, 0
149 CHECK1
150 CHECK -3, 1
151 CHECK2
152 CHECK 0, -2
153 CHECK1
154 CHECK 1, -3
155 CHECK2
156
157 mova m6, [rsp+48]
158 cmp DWORD r8m, 2
159 jge .end%1
160 LOAD m2, [%2+t1*2]
161 LOAD m4, [%3+t1*2]
162 LOAD m3, [%2+t0*2]
163 LOAD m5, [%3+t0*2]
164 paddw m2, m4
165 paddw m3, m5
166 psrlw m2, 1
167 psrlw m3, 1
168 mova m4, [rsp+ 0]
169 mova m5, [rsp+16]
170 mova m7, [rsp+32]
171 psubw m2, m4
172 psubw m3, m7
173 mova m0, m5
174 psubw m5, m4
175 psubw m0, m7
176 mova m4, m2
177 pminsw m2, m3
178 pmaxsw m3, m4
179 pmaxsw m2, m5
180 pminsw m3, m5
181 pmaxsw m2, m0
182 pminsw m3, m0
183 pxor m4, m4
184 pmaxsw m6, m3
185 psubw m4, m2
186 pmaxsw m6, m4
187
188.end%1:
189 mova m2, [rsp+16]
190 mova m3, m2
191 psubw m2, m6
192 paddw m3, m6
193 pmaxsw m1, m2
194 pminsw m1, m3
195 packuswb m1, m1
196
197 movh [dstq], m1
198 add dstq, mmsize/2
199 add prevq, mmsize/2
200 add curq, mmsize/2
201 add nextq, mmsize/2
202 sub DWORD r4m, mmsize/2
203 jg .loop%1
204%endmacro
205
206%macro YADIF 0
207%if ARCH_X86_32
208cglobal yadif_filter_line, 4, 6, 8, 80, dst, prev, cur, next, w, prefs, \
209 mrefs, parity, mode
210%else
211cglobal yadif_filter_line, 4, 7, 8, 80, dst, prev, cur, next, w, prefs, \
212 mrefs, parity, mode
213%endif
214%if ARCH_X86_32
215 mov r4, r5mp
216 mov r5, r6mp
217 DECLARE_REG_TMP 4,5
218%else
219 movsxd r5, DWORD r5m
220 movsxd r6, DWORD r6m
221 DECLARE_REG_TMP 5,6
222%endif
223
224 cmp DWORD paritym, 0
225 je .parity0
226 FILTER 1, prevq, curq
227 jmp .ret
228
229.parity0:
230 FILTER 0, curq, nextq
231
232.ret:
233 RET
234%endmacro
235
236INIT_XMM ssse3
237YADIF
238INIT_XMM sse2
239YADIF
240%if ARCH_X86_32
241INIT_MMX mmxext
242YADIF
243%endif