Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavfilter / x86 / vf_pullup.asm
CommitLineData
2ba45a60
DM
1;*****************************************************************************
2;* x86-optimized functions for pullup filter
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or modify
7;* it under the terms of the GNU General Public License as published by
8;* the Free Software Foundation; either version 2 of the License, or
9;* (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;* GNU General Public License for more details.
15;*
16;* You should have received a copy of the GNU General Public License along
17;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_TEXT
24
25INIT_MMX mmx
26cglobal pullup_filter_diff, 3, 5, 8, first, second, size
27 mov r3, 4
28 pxor m4, m4
29 pxor m7, m7
30
31.loop:
32 movq m0, [firstq]
33 movq m2, [firstq]
34 add firstq, sizeq
35 movq m1, [secondq]
36 add secondq, sizeq
37 psubusb m2, m1
38 psubusb m1, m0
39 movq m0, m2
40 movq m3, m1
41 punpcklbw m0, m7
42 punpcklbw m1, m7
43 punpckhbw m2, m7
44 punpckhbw m3, m7
45 paddw m4, m0
46 paddw m4, m1
47 paddw m4, m2
48 paddw m4, m3
49
50 dec r3
51 jnz .loop
52
53 movq m3, m4
54 punpcklwd m4, m7
55 punpckhwd m3, m7
56 paddd m3, m4
57 movd eax, m3
58 psrlq m3, 32
59 movd r4d, m3
60 add eax, r4d
61 RET
62
63INIT_MMX mmx
64cglobal pullup_filter_comb, 3, 5, 8, first, second, size
65 mov r3, 4
66 pxor m6, m6
67 pxor m7, m7
68 sub secondq, sizeq
69
70.loop:
71 movq m0, [firstq]
72 movq m1, [secondq]
73 punpcklbw m0, m7
74 movq m2, [secondq+sizeq]
75 punpcklbw m1, m7
76 punpcklbw m2, m7
77 paddw m0, m0
78 paddw m1, m2
79 movq m2, m0
80 psubusw m0, m1
81 psubusw m1, m2
82 paddw m6, m0
83 paddw m6, m1
84
85 movq m0, [firstq]
86 movq m1, [secondq]
87 punpckhbw m0, m7
88 movq m2, [secondq+sizeq]
89 punpckhbw m1, m7
90 punpckhbw m2, m7
91 paddw m0, m0
92 paddw m1, m2
93 movq m2, m0
94 psubusw m0, m1
95 psubusw m1, m2
96 paddw m6, m0
97 paddw m6, m1
98
99 movq m0, [secondq+sizeq]
100 movq m1, [firstq]
101 punpcklbw m0, m7
102 movq m2, [firstq+sizeq]
103 punpcklbw m1, m7
104 punpcklbw m2, m7
105 paddw m0, m0
106 paddw m1, m2
107 movq m2, m0
108 psubusw m0, m1
109 psubusw m1, m2
110 paddw m6, m0
111 paddw m6, m1
112
113 movq m0, [secondq+sizeq]
114 movq m1, [firstq]
115 punpckhbw m0, m7
116 movq m2, [firstq+sizeq]
117 punpckhbw m1, m7
118 punpckhbw m2, m7
119 paddw m0, m0
120 paddw m1, m2
121 movq m2, m0
122 psubusw m0, m1
123 psubusw m1, m2
124 paddw m6, m0
125 paddw m6, m1
126
127 add firstq, sizeq
128 add secondq, sizeq
129 dec r3
130 jnz .loop
131
132 movq m5, m6
133 punpcklwd m6, m7
134 punpckhwd m5, m7
135 paddd m5, m6
136 movd eax, m5
137 psrlq m5, 32
138 movd r4d, m5
139 add eax, r4d
140 RET
141
142INIT_MMX mmx
143cglobal pullup_filter_var, 3, 5, 8, first, second, size
144 mov r3, 3
145 pxor m4, m4
146 pxor m7, m7
147
148.loop:
149 movq m0, [firstq]
150 movq m2, [firstq]
151 movq m1, [firstq+sizeq]
152 add firstq, sizeq
153 psubusb m2, m1
154 psubusb m1, m0
155 movq m0, m2
156 movq m3, m1
157 punpcklbw m0, m7
158 punpcklbw m1, m7
159 punpckhbw m2, m7
160 punpckhbw m3, m7
161 paddw m4, m0
162 paddw m4, m1
163 paddw m4, m2
164 paddw m4, m3
165
166 dec r3
167 jnz .loop
168
169 movq m3, m4
170 punpcklwd m4, m7
171 punpckhwd m3, m7
172 paddd m3, m4
173 movd eax, m3
174 psrlq m3, 32
175 movd r4d, m3
176 add eax, r4d
177 shl eax, 2
178 RET