Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * pixel.h: x86 pixel metrics | |
3 | ***************************************************************************** | |
4 | * Copyright (C) 2003-2013 x264 project | |
5 | * | |
6 | * Authors: Laurent Aimar <fenrir@via.ecp.fr> | |
7 | * Loren Merritt <lorenm@u.washington.edu> | |
8 | * Fiona Glaser <fiona@x264.com> | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; either version 2 of the License, or | |
13 | * (at your option) any later version. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU General Public License | |
21 | * along with this program; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
23 | * | |
24 | * This program is also available under a commercial proprietary license. | |
25 | * For more information, contact us at license @ x265.com. | |
26 | *****************************************************************************/ | |
27 | ||
28 | #ifndef X265_I386_PIXEL_H | |
29 | #define X265_I386_PIXEL_H | |
30 | ||
31 | #define DECL_PIXELS(ret, name, suffix, args) \ | |
32 | ret x265_pixel_ ## name ## _16x64_ ## suffix args; \ | |
33 | ret x265_pixel_ ## name ## _16x32_ ## suffix args; \ | |
34 | ret x265_pixel_ ## name ## _16x16_ ## suffix args; \ | |
35 | ret x265_pixel_ ## name ## _16x12_ ## suffix args; \ | |
36 | ret x265_pixel_ ## name ## _16x8_ ## suffix args; \ | |
37 | ret x265_pixel_ ## name ## _16x4_ ## suffix args; \ | |
38 | ret x265_pixel_ ## name ## _8x32_ ## suffix args; \ | |
39 | ret x265_pixel_ ## name ## _8x16_ ## suffix args; \ | |
40 | ret x265_pixel_ ## name ## _8x8_ ## suffix args; \ | |
41 | ret x265_pixel_ ## name ## _8x4_ ## suffix args; \ | |
42 | ret x265_pixel_ ## name ## _4x16_ ## suffix args; \ | |
43 | ret x265_pixel_ ## name ## _4x8_ ## suffix args; \ | |
44 | ret x265_pixel_ ## name ## _4x4_ ## suffix args; \ | |
45 | ret x265_pixel_ ## name ## _32x8_ ## suffix args; \ | |
46 | ret x265_pixel_ ## name ## _32x16_ ## suffix args; \ | |
47 | ret x265_pixel_ ## name ## _32x24_ ## suffix args; \ | |
48 | ret x265_pixel_ ## name ## _24x32_ ## suffix args; \ | |
49 | ret x265_pixel_ ## name ## _32x32_ ## suffix args; \ | |
50 | ret x265_pixel_ ## name ## _32x64_ ## suffix args; \ | |
51 | ret x265_pixel_ ## name ## _64x16_ ## suffix args; \ | |
52 | ret x265_pixel_ ## name ## _64x32_ ## suffix args; \ | |
53 | ret x265_pixel_ ## name ## _64x48_ ## suffix args; \ | |
54 | ret x265_pixel_ ## name ## _64x64_ ## suffix args; \ | |
55 | ret x265_pixel_ ## name ## _48x64_ ## suffix args; \ | |
56 | ret x265_pixel_ ## name ## _24x32_ ## suffix args; \ | |
57 | ret x265_pixel_ ## name ## _12x16_ ## suffix args; \ | |
58 | ||
59 | #define DECL_X1(name, suffix) \ | |
b53f7c52 | 60 | DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t)) |
72b9787e JB |
61 | |
62 | #define DECL_X1_SS(name, suffix) \ | |
b53f7c52 | 63 | DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t)) |
72b9787e JB |
64 | |
65 | #define DECL_X1_SP(name, suffix) \ | |
b53f7c52 | 66 | DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t)) |
72b9787e JB |
67 | |
68 | #define DECL_X4(name, suffix) \ | |
b53f7c52 JB |
69 | DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \ |
70 | DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) | |
72b9787e JB |
71 | |
72 | /* sad-a.asm */ | |
73 | DECL_X1(sad, mmx2) | |
74 | DECL_X1(sad, sse2) | |
75 | DECL_X4(sad, sse2_misalign) | |
76 | DECL_X1(sad, sse3) | |
77 | DECL_X1(sad, sse2_aligned) | |
78 | DECL_X1(sad, ssse3) | |
79 | DECL_X1(sad, ssse3_aligned) | |
80 | DECL_X1(sad, avx2) | |
81 | DECL_X1(sad, avx2_aligned) | |
82 | DECL_X4(sad, mmx2) | |
83 | DECL_X4(sad, sse2) | |
84 | DECL_X4(sad, sse3) | |
85 | DECL_X4(sad, ssse3) | |
86 | DECL_X4(sad, avx) | |
87 | DECL_X4(sad, avx2) | |
88 | DECL_X1(sad, cache32_mmx2); | |
89 | DECL_X1(sad, cache64_mmx2); | |
90 | DECL_X1(sad, cache64_sse2); | |
91 | DECL_X1(sad, cache64_ssse3); | |
92 | DECL_X4(sad, cache32_mmx2); | |
93 | DECL_X4(sad, cache64_mmx2); | |
94 | DECL_X4(sad, cache64_sse2); | |
95 | DECL_X4(sad, cache64_ssse3); | |
96 | ||
97 | /* pixel-a.asm */ | |
98 | DECL_X1(satd, mmx2) | |
99 | DECL_X1(satd, sse2) | |
100 | DECL_X1(satd, ssse3) | |
101 | DECL_X1(satd, ssse3_atom) | |
102 | DECL_X1(satd, sse4) | |
103 | DECL_X1(satd, avx) | |
104 | DECL_X1(satd, xop) | |
105 | DECL_X1(satd, avx2) | |
b53f7c52 JB |
106 | int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
107 | int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t); | |
108 | int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t); | |
109 | int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t); | |
110 | int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t); | |
72b9787e JB |
111 | |
112 | DECL_X1(sa8d, mmx2) | |
113 | DECL_X1(sa8d, sse2) | |
114 | DECL_X1(sa8d, ssse3) | |
115 | DECL_X1(sa8d, ssse3_atom) | |
116 | DECL_X1(sa8d, sse4) | |
117 | DECL_X1(sa8d, avx) | |
118 | DECL_X1(sa8d, xop) | |
119 | DECL_X1(sa8d, avx2) | |
120 | ||
121 | /* ssd-a.asm */ | |
122 | DECL_X1(ssd, mmx) | |
123 | DECL_X1(ssd, mmx2) | |
124 | DECL_X1(ssd, sse2slow) | |
125 | DECL_X1(ssd, sse2) | |
126 | DECL_X1(ssd, ssse3) | |
127 | DECL_X1(ssd, avx) | |
128 | DECL_X1(ssd, xop) | |
129 | DECL_X1(ssd, avx2) | |
130 | DECL_X1_SS(ssd_ss, mmx) | |
131 | DECL_X1_SS(ssd_ss, mmx2) | |
132 | DECL_X1_SS(ssd_ss, sse2slow) | |
133 | DECL_X1_SS(ssd_ss, sse2) | |
134 | DECL_X1_SS(ssd_ss, ssse3) | |
135 | DECL_X1_SS(ssd_ss, sse4) | |
136 | DECL_X1_SS(ssd_ss, avx) | |
137 | DECL_X1_SS(ssd_ss, xop) | |
138 | DECL_X1_SS(ssd_ss, avx2) | |
139 | DECL_X1_SP(ssd_sp, sse4) | |
140 | #define DECL_HEVC_SSD(suffix) \ | |
b53f7c52 JB |
141 | int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
142 | int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
143 | int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
144 | int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
145 | int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
146 | int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
147 | int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
148 | int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
149 | int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
150 | int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
151 | int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
152 | int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
153 | int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
154 | int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
155 | int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ | |
156 | int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); | |
72b9787e JB |
157 | DECL_HEVC_SSD(sse2) |
158 | DECL_HEVC_SSD(ssse3) | |
159 | DECL_HEVC_SSD(avx) | |
160 | ||
b53f7c52 JB |
161 | int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
162 | int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
163 | int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
164 | int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
165 | int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
166 | int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
167 | int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t); | |
72b9787e | 168 | |
b53f7c52 JB |
169 | int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t); |
170 | int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t); | |
171 | int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t); | |
172 | int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t); | |
173 | int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t); | |
72b9787e JB |
174 | |
175 | #define ADDAVG(func) \ | |
b53f7c52 | 176 | void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t); |
72b9787e JB |
177 | ADDAVG(addAvg_2x4) |
178 | ADDAVG(addAvg_2x8) | |
179 | ADDAVG(addAvg_4x2); | |
180 | ADDAVG(addAvg_4x4) | |
181 | ADDAVG(addAvg_4x8) | |
182 | ADDAVG(addAvg_4x16) | |
183 | ADDAVG(addAvg_6x8) | |
184 | ADDAVG(addAvg_8x2) | |
185 | ADDAVG(addAvg_8x4) | |
186 | ADDAVG(addAvg_8x6) | |
187 | ADDAVG(addAvg_8x8) | |
188 | ADDAVG(addAvg_8x16) | |
189 | ADDAVG(addAvg_8x32) | |
190 | ADDAVG(addAvg_12x16) | |
191 | ADDAVG(addAvg_16x4) | |
192 | ADDAVG(addAvg_16x8) | |
193 | ADDAVG(addAvg_16x12) | |
194 | ADDAVG(addAvg_16x16) | |
195 | ADDAVG(addAvg_16x32) | |
196 | ADDAVG(addAvg_16x64) | |
197 | ADDAVG(addAvg_24x32) | |
198 | ADDAVG(addAvg_32x8) | |
199 | ADDAVG(addAvg_32x16) | |
200 | ADDAVG(addAvg_32x24) | |
201 | ADDAVG(addAvg_32x32) | |
202 | ADDAVG(addAvg_32x64) | |
203 | ADDAVG(addAvg_48x64) | |
204 | ADDAVG(addAvg_64x16) | |
205 | ADDAVG(addAvg_64x32) | |
206 | ADDAVG(addAvg_64x48) | |
207 | ADDAVG(addAvg_64x64) | |
208 | ||
209 | ADDAVG(addAvg_2x16) | |
210 | ADDAVG(addAvg_4x32) | |
211 | ADDAVG(addAvg_6x16) | |
212 | ADDAVG(addAvg_8x12) | |
213 | ADDAVG(addAvg_8x64) | |
214 | ADDAVG(addAvg_12x32) | |
215 | ADDAVG(addAvg_16x24) | |
216 | ADDAVG(addAvg_24x64) | |
217 | ADDAVG(addAvg_32x48) | |
218 | ||
b53f7c52 JB |
219 | void x265_downShift_16_sse2(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); |
220 | void x265_upShift_8_sse4(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); | |
72b9787e JB |
221 | |
222 | #undef DECL_PIXELS | |
223 | #undef DECL_HEVC_SSD | |
224 | #undef DECL_X1 | |
225 | #undef DECL_X4 | |
226 | ||
227 | #endif // ifndef X265_I386_PIXEL_H |