Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / x86 / pixel.h
1 /*****************************************************************************
2 * pixel.h: x86 pixel metrics
3 *****************************************************************************
4 * Copyright (C) 2003-2013 x264 project
5 *
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at license @ x265.com.
26 *****************************************************************************/
27
28 #ifndef X265_I386_PIXEL_H
29 #define X265_I386_PIXEL_H
30
31 #define DECL_PIXELS(ret, name, suffix, args) \
32 ret x265_pixel_ ## name ## _16x64_ ## suffix args; \
33 ret x265_pixel_ ## name ## _16x32_ ## suffix args; \
34 ret x265_pixel_ ## name ## _16x16_ ## suffix args; \
35 ret x265_pixel_ ## name ## _16x12_ ## suffix args; \
36 ret x265_pixel_ ## name ## _16x8_ ## suffix args; \
37 ret x265_pixel_ ## name ## _16x4_ ## suffix args; \
38 ret x265_pixel_ ## name ## _8x32_ ## suffix args; \
39 ret x265_pixel_ ## name ## _8x16_ ## suffix args; \
40 ret x265_pixel_ ## name ## _8x8_ ## suffix args; \
41 ret x265_pixel_ ## name ## _8x4_ ## suffix args; \
42 ret x265_pixel_ ## name ## _4x16_ ## suffix args; \
43 ret x265_pixel_ ## name ## _4x8_ ## suffix args; \
44 ret x265_pixel_ ## name ## _4x4_ ## suffix args; \
45 ret x265_pixel_ ## name ## _32x8_ ## suffix args; \
46 ret x265_pixel_ ## name ## _32x16_ ## suffix args; \
47 ret x265_pixel_ ## name ## _32x24_ ## suffix args; \
48 ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
49 ret x265_pixel_ ## name ## _32x32_ ## suffix args; \
50 ret x265_pixel_ ## name ## _32x64_ ## suffix args; \
51 ret x265_pixel_ ## name ## _64x16_ ## suffix args; \
52 ret x265_pixel_ ## name ## _64x32_ ## suffix args; \
53 ret x265_pixel_ ## name ## _64x48_ ## suffix args; \
54 ret x265_pixel_ ## name ## _64x64_ ## suffix args; \
55 ret x265_pixel_ ## name ## _48x64_ ## suffix args; \
56 ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
57 ret x265_pixel_ ## name ## _12x16_ ## suffix args; \
58
59 #define DECL_X1(name, suffix) \
60 DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t))
61
62 #define DECL_X1_SS(name, suffix) \
63 DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t))
64
65 #define DECL_X1_SP(name, suffix) \
66 DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t))
67
68 #define DECL_X4(name, suffix) \
69 DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \
70 DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*))
71
72 /* sad-a.asm */
73 DECL_X1(sad, mmx2)
74 DECL_X1(sad, sse2)
75 DECL_X4(sad, sse2_misalign)
76 DECL_X1(sad, sse3)
77 DECL_X1(sad, sse2_aligned)
78 DECL_X1(sad, ssse3)
79 DECL_X1(sad, ssse3_aligned)
80 DECL_X1(sad, avx2)
81 DECL_X1(sad, avx2_aligned)
82 DECL_X4(sad, mmx2)
83 DECL_X4(sad, sse2)
84 DECL_X4(sad, sse3)
85 DECL_X4(sad, ssse3)
86 DECL_X4(sad, avx)
87 DECL_X4(sad, avx2)
88 DECL_X1(sad, cache32_mmx2);
89 DECL_X1(sad, cache64_mmx2);
90 DECL_X1(sad, cache64_sse2);
91 DECL_X1(sad, cache64_ssse3);
92 DECL_X4(sad, cache32_mmx2);
93 DECL_X4(sad, cache64_mmx2);
94 DECL_X4(sad, cache64_sse2);
95 DECL_X4(sad, cache64_ssse3);
96
97 /* pixel-a.asm */
98 DECL_X1(satd, mmx2)
99 DECL_X1(satd, sse2)
100 DECL_X1(satd, ssse3)
101 DECL_X1(satd, ssse3_atom)
102 DECL_X1(satd, sse4)
103 DECL_X1(satd, avx)
104 DECL_X1(satd, xop)
105 DECL_X1(satd, avx2)
106 int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
107 int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
108 int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
109 int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
110 int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
111
112 DECL_X1(sa8d, mmx2)
113 DECL_X1(sa8d, sse2)
114 DECL_X1(sa8d, ssse3)
115 DECL_X1(sa8d, ssse3_atom)
116 DECL_X1(sa8d, sse4)
117 DECL_X1(sa8d, avx)
118 DECL_X1(sa8d, xop)
119 DECL_X1(sa8d, avx2)
120
121 /* ssd-a.asm */
122 DECL_X1(ssd, mmx)
123 DECL_X1(ssd, mmx2)
124 DECL_X1(ssd, sse2slow)
125 DECL_X1(ssd, sse2)
126 DECL_X1(ssd, ssse3)
127 DECL_X1(ssd, avx)
128 DECL_X1(ssd, xop)
129 DECL_X1(ssd, avx2)
130 DECL_X1_SS(ssd_ss, mmx)
131 DECL_X1_SS(ssd_ss, mmx2)
132 DECL_X1_SS(ssd_ss, sse2slow)
133 DECL_X1_SS(ssd_ss, sse2)
134 DECL_X1_SS(ssd_ss, ssse3)
135 DECL_X1_SS(ssd_ss, sse4)
136 DECL_X1_SS(ssd_ss, avx)
137 DECL_X1_SS(ssd_ss, xop)
138 DECL_X1_SS(ssd_ss, avx2)
139 DECL_X1_SP(ssd_sp, sse4)
140 #define DECL_HEVC_SSD(suffix) \
141 int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
142 int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
143 int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
144 int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
145 int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
146 int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
147 int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
148 int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
149 int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
150 int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
151 int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
152 int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
153 int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
154 int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
155 int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
156 int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t);
157 DECL_HEVC_SSD(sse2)
158 DECL_HEVC_SSD(ssse3)
159 DECL_HEVC_SSD(avx)
160
161 int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
162 int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
163 int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
164 int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
165 int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
166 int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
167 int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
168
169 int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t);
170 int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t);
171 int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t);
172 int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t);
173 int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t);
174
175 #define ADDAVG(func) \
176 void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
177 ADDAVG(addAvg_2x4)
178 ADDAVG(addAvg_2x8)
179 ADDAVG(addAvg_4x2);
180 ADDAVG(addAvg_4x4)
181 ADDAVG(addAvg_4x8)
182 ADDAVG(addAvg_4x16)
183 ADDAVG(addAvg_6x8)
184 ADDAVG(addAvg_8x2)
185 ADDAVG(addAvg_8x4)
186 ADDAVG(addAvg_8x6)
187 ADDAVG(addAvg_8x8)
188 ADDAVG(addAvg_8x16)
189 ADDAVG(addAvg_8x32)
190 ADDAVG(addAvg_12x16)
191 ADDAVG(addAvg_16x4)
192 ADDAVG(addAvg_16x8)
193 ADDAVG(addAvg_16x12)
194 ADDAVG(addAvg_16x16)
195 ADDAVG(addAvg_16x32)
196 ADDAVG(addAvg_16x64)
197 ADDAVG(addAvg_24x32)
198 ADDAVG(addAvg_32x8)
199 ADDAVG(addAvg_32x16)
200 ADDAVG(addAvg_32x24)
201 ADDAVG(addAvg_32x32)
202 ADDAVG(addAvg_32x64)
203 ADDAVG(addAvg_48x64)
204 ADDAVG(addAvg_64x16)
205 ADDAVG(addAvg_64x32)
206 ADDAVG(addAvg_64x48)
207 ADDAVG(addAvg_64x64)
208
209 ADDAVG(addAvg_2x16)
210 ADDAVG(addAvg_4x32)
211 ADDAVG(addAvg_6x16)
212 ADDAVG(addAvg_8x12)
213 ADDAVG(addAvg_8x64)
214 ADDAVG(addAvg_12x32)
215 ADDAVG(addAvg_16x24)
216 ADDAVG(addAvg_24x64)
217 ADDAVG(addAvg_32x48)
218
219 void x265_downShift_16_sse2(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
220 void x265_upShift_8_sse4(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
221
222 #undef DECL_PIXELS
223 #undef DECL_HEVC_SSD
224 #undef DECL_X1
225 #undef DECL_X4
226
227 #endif // ifndef X265_I386_PIXEL_H