Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;***************************************************************************** |
2 | ;* x86-optimized functions for pullup filter | |
3 | ;* | |
4 | ;* This file is part of FFmpeg. | |
5 | ;* | |
6 | ;* FFmpeg is free software; you can redistribute it and/or modify | |
7 | ;* it under the terms of the GNU General Public License as published by | |
8 | ;* the Free Software Foundation; either version 2 of the License, or | |
9 | ;* (at your option) any later version. | |
10 | ;* | |
11 | ;* FFmpeg is distributed in the hope that it will be useful, | |
12 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;* GNU General Public License for more details. | |
15 | ;* | |
16 | ;* You should have received a copy of the GNU General Public License along | |
17 | ;* with FFmpeg; if not, write to the Free Software Foundation, Inc., | |
18 | ;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
19 | ;****************************************************************************** | |
20 | ||
21 | %include "libavutil/x86/x86util.asm" | |
22 | ||
23 | SECTION_TEXT | |
24 | ||
25 | INIT_MMX mmx | |
26 | cglobal pullup_filter_diff, 3, 5, 8, first, second, size | |
27 | mov r3, 4 | |
28 | pxor m4, m4 | |
29 | pxor m7, m7 | |
30 | ||
31 | .loop: | |
32 | movq m0, [firstq] | |
33 | movq m2, [firstq] | |
34 | add firstq, sizeq | |
35 | movq m1, [secondq] | |
36 | add secondq, sizeq | |
37 | psubusb m2, m1 | |
38 | psubusb m1, m0 | |
39 | movq m0, m2 | |
40 | movq m3, m1 | |
41 | punpcklbw m0, m7 | |
42 | punpcklbw m1, m7 | |
43 | punpckhbw m2, m7 | |
44 | punpckhbw m3, m7 | |
45 | paddw m4, m0 | |
46 | paddw m4, m1 | |
47 | paddw m4, m2 | |
48 | paddw m4, m3 | |
49 | ||
50 | dec r3 | |
51 | jnz .loop | |
52 | ||
53 | movq m3, m4 | |
54 | punpcklwd m4, m7 | |
55 | punpckhwd m3, m7 | |
56 | paddd m3, m4 | |
57 | movd eax, m3 | |
58 | psrlq m3, 32 | |
59 | movd r4d, m3 | |
60 | add eax, r4d | |
61 | RET | |
62 | ||
63 | INIT_MMX mmx | |
64 | cglobal pullup_filter_comb, 3, 5, 8, first, second, size | |
65 | mov r3, 4 | |
66 | pxor m6, m6 | |
67 | pxor m7, m7 | |
68 | sub secondq, sizeq | |
69 | ||
70 | .loop: | |
71 | movq m0, [firstq] | |
72 | movq m1, [secondq] | |
73 | punpcklbw m0, m7 | |
74 | movq m2, [secondq+sizeq] | |
75 | punpcklbw m1, m7 | |
76 | punpcklbw m2, m7 | |
77 | paddw m0, m0 | |
78 | paddw m1, m2 | |
79 | movq m2, m0 | |
80 | psubusw m0, m1 | |
81 | psubusw m1, m2 | |
82 | paddw m6, m0 | |
83 | paddw m6, m1 | |
84 | ||
85 | movq m0, [firstq] | |
86 | movq m1, [secondq] | |
87 | punpckhbw m0, m7 | |
88 | movq m2, [secondq+sizeq] | |
89 | punpckhbw m1, m7 | |
90 | punpckhbw m2, m7 | |
91 | paddw m0, m0 | |
92 | paddw m1, m2 | |
93 | movq m2, m0 | |
94 | psubusw m0, m1 | |
95 | psubusw m1, m2 | |
96 | paddw m6, m0 | |
97 | paddw m6, m1 | |
98 | ||
99 | movq m0, [secondq+sizeq] | |
100 | movq m1, [firstq] | |
101 | punpcklbw m0, m7 | |
102 | movq m2, [firstq+sizeq] | |
103 | punpcklbw m1, m7 | |
104 | punpcklbw m2, m7 | |
105 | paddw m0, m0 | |
106 | paddw m1, m2 | |
107 | movq m2, m0 | |
108 | psubusw m0, m1 | |
109 | psubusw m1, m2 | |
110 | paddw m6, m0 | |
111 | paddw m6, m1 | |
112 | ||
113 | movq m0, [secondq+sizeq] | |
114 | movq m1, [firstq] | |
115 | punpckhbw m0, m7 | |
116 | movq m2, [firstq+sizeq] | |
117 | punpckhbw m1, m7 | |
118 | punpckhbw m2, m7 | |
119 | paddw m0, m0 | |
120 | paddw m1, m2 | |
121 | movq m2, m0 | |
122 | psubusw m0, m1 | |
123 | psubusw m1, m2 | |
124 | paddw m6, m0 | |
125 | paddw m6, m1 | |
126 | ||
127 | add firstq, sizeq | |
128 | add secondq, sizeq | |
129 | dec r3 | |
130 | jnz .loop | |
131 | ||
132 | movq m5, m6 | |
133 | punpcklwd m6, m7 | |
134 | punpckhwd m5, m7 | |
135 | paddd m5, m6 | |
136 | movd eax, m5 | |
137 | psrlq m5, 32 | |
138 | movd r4d, m5 | |
139 | add eax, r4d | |
140 | RET | |
141 | ||
142 | INIT_MMX mmx | |
143 | cglobal pullup_filter_var, 3, 5, 8, first, second, size | |
144 | mov r3, 3 | |
145 | pxor m4, m4 | |
146 | pxor m7, m7 | |
147 | ||
148 | .loop: | |
149 | movq m0, [firstq] | |
150 | movq m2, [firstq] | |
151 | movq m1, [firstq+sizeq] | |
152 | add firstq, sizeq | |
153 | psubusb m2, m1 | |
154 | psubusb m1, m0 | |
155 | movq m0, m2 | |
156 | movq m3, m1 | |
157 | punpcklbw m0, m7 | |
158 | punpcklbw m1, m7 | |
159 | punpckhbw m2, m7 | |
160 | punpckhbw m3, m7 | |
161 | paddw m4, m0 | |
162 | paddw m4, m1 | |
163 | paddw m4, m2 | |
164 | paddw m4, m3 | |
165 | ||
166 | dec r3 | |
167 | jnz .loop | |
168 | ||
169 | movq m3, m4 | |
170 | punpcklwd m4, m7 | |
171 | punpckhwd m3, m7 | |
172 | paddd m3, m4 | |
173 | movd eax, m3 | |
174 | psrlq m3, 32 | |
175 | movd r4d, m3 | |
176 | add eax, r4d | |
177 | shl eax, 2 | |
178 | RET |