Fix cut&paste typo in changelog.
[deb_x265.git] / source / common / x86 / pixel.h
... / ...
CommitLineData
1/*****************************************************************************
2 * pixel.h: x86 pixel metrics
3 *****************************************************************************
4 * Copyright (C) 2003-2013 x264 project
5 *
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at license @ x265.com.
26 *****************************************************************************/
27
28#ifndef X265_I386_PIXEL_H
29#define X265_I386_PIXEL_H
30
31#define DECL_PIXELS(ret, name, suffix, args) \
32 ret x265_pixel_ ## name ## _16x64_ ## suffix args; \
33 ret x265_pixel_ ## name ## _16x32_ ## suffix args; \
34 ret x265_pixel_ ## name ## _16x16_ ## suffix args; \
35 ret x265_pixel_ ## name ## _16x12_ ## suffix args; \
36 ret x265_pixel_ ## name ## _16x8_ ## suffix args; \
37 ret x265_pixel_ ## name ## _16x4_ ## suffix args; \
38 ret x265_pixel_ ## name ## _8x32_ ## suffix args; \
39 ret x265_pixel_ ## name ## _8x16_ ## suffix args; \
40 ret x265_pixel_ ## name ## _8x8_ ## suffix args; \
41 ret x265_pixel_ ## name ## _8x4_ ## suffix args; \
42 ret x265_pixel_ ## name ## _4x16_ ## suffix args; \
43 ret x265_pixel_ ## name ## _4x8_ ## suffix args; \
44 ret x265_pixel_ ## name ## _4x4_ ## suffix args; \
45 ret x265_pixel_ ## name ## _32x8_ ## suffix args; \
46 ret x265_pixel_ ## name ## _32x16_ ## suffix args; \
47 ret x265_pixel_ ## name ## _32x24_ ## suffix args; \
48 ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
49 ret x265_pixel_ ## name ## _32x32_ ## suffix args; \
50 ret x265_pixel_ ## name ## _32x64_ ## suffix args; \
51 ret x265_pixel_ ## name ## _64x16_ ## suffix args; \
52 ret x265_pixel_ ## name ## _64x32_ ## suffix args; \
53 ret x265_pixel_ ## name ## _64x48_ ## suffix args; \
54 ret x265_pixel_ ## name ## _64x64_ ## suffix args; \
55 ret x265_pixel_ ## name ## _48x64_ ## suffix args; \
56 ret x265_pixel_ ## name ## _24x32_ ## suffix args; \
57 ret x265_pixel_ ## name ## _12x16_ ## suffix args; \
58
59#define DECL_X1(name, suffix) \
60 DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t))
61
62#define DECL_X1_SS(name, suffix) \
63 DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t))
64
65#define DECL_X1_SP(name, suffix) \
66 DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t))
67
68#define DECL_X4(name, suffix) \
69 DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \
70 DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*))
71
72/* sad-a.asm */
73DECL_X1(sad, mmx2)
74DECL_X1(sad, sse2)
75DECL_X4(sad, sse2_misalign)
76DECL_X1(sad, sse3)
77DECL_X1(sad, sse2_aligned)
78DECL_X1(sad, ssse3)
79DECL_X1(sad, ssse3_aligned)
80DECL_X1(sad, avx2)
81DECL_X1(sad, avx2_aligned)
82DECL_X4(sad, mmx2)
83DECL_X4(sad, sse2)
84DECL_X4(sad, sse3)
85DECL_X4(sad, ssse3)
86DECL_X4(sad, avx)
87DECL_X4(sad, avx2)
88DECL_X1(sad, cache32_mmx2);
89DECL_X1(sad, cache64_mmx2);
90DECL_X1(sad, cache64_sse2);
91DECL_X1(sad, cache64_ssse3);
92DECL_X4(sad, cache32_mmx2);
93DECL_X4(sad, cache64_mmx2);
94DECL_X4(sad, cache64_sse2);
95DECL_X4(sad, cache64_ssse3);
96
97/* pixel-a.asm */
98DECL_X1(satd, mmx2)
99DECL_X1(satd, sse2)
100DECL_X1(satd, ssse3)
101DECL_X1(satd, ssse3_atom)
102DECL_X1(satd, sse4)
103DECL_X1(satd, avx)
104DECL_X1(satd, xop)
105DECL_X1(satd, avx2)
106int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
107int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
108int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
109int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
110int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
111
112DECL_X1(sa8d, mmx2)
113DECL_X1(sa8d, sse2)
114DECL_X1(sa8d, ssse3)
115DECL_X1(sa8d, ssse3_atom)
116DECL_X1(sa8d, sse4)
117DECL_X1(sa8d, avx)
118DECL_X1(sa8d, xop)
119DECL_X1(sa8d, avx2)
120
121/* ssd-a.asm */
122DECL_X1(ssd, mmx)
123DECL_X1(ssd, mmx2)
124DECL_X1(ssd, sse2slow)
125DECL_X1(ssd, sse2)
126DECL_X1(ssd, ssse3)
127DECL_X1(ssd, avx)
128DECL_X1(ssd, xop)
129DECL_X1(ssd, avx2)
130DECL_X1_SS(ssd_ss, mmx)
131DECL_X1_SS(ssd_ss, mmx2)
132DECL_X1_SS(ssd_ss, sse2slow)
133DECL_X1_SS(ssd_ss, sse2)
134DECL_X1_SS(ssd_ss, ssse3)
135DECL_X1_SS(ssd_ss, sse4)
136DECL_X1_SS(ssd_ss, avx)
137DECL_X1_SS(ssd_ss, xop)
138DECL_X1_SS(ssd_ss, avx2)
139DECL_X1_SP(ssd_sp, sse4)
140#define DECL_HEVC_SSD(suffix) \
141 int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
142 int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
143 int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
144 int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
145 int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
146 int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
147 int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
148 int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
149 int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
150 int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
151 int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
152 int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
153 int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
154 int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
155 int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
156 int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t);
157DECL_HEVC_SSD(sse2)
158DECL_HEVC_SSD(ssse3)
159DECL_HEVC_SSD(avx)
160
161int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
162int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
163int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
164int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
165int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
166int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
167int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
168
169int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t);
170int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t);
171int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t);
172int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t);
173int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t);
174
175#define ADDAVG(func) \
176 void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
177ADDAVG(addAvg_2x4)
178ADDAVG(addAvg_2x8)
179ADDAVG(addAvg_4x2);
180ADDAVG(addAvg_4x4)
181ADDAVG(addAvg_4x8)
182ADDAVG(addAvg_4x16)
183ADDAVG(addAvg_6x8)
184ADDAVG(addAvg_8x2)
185ADDAVG(addAvg_8x4)
186ADDAVG(addAvg_8x6)
187ADDAVG(addAvg_8x8)
188ADDAVG(addAvg_8x16)
189ADDAVG(addAvg_8x32)
190ADDAVG(addAvg_12x16)
191ADDAVG(addAvg_16x4)
192ADDAVG(addAvg_16x8)
193ADDAVG(addAvg_16x12)
194ADDAVG(addAvg_16x16)
195ADDAVG(addAvg_16x32)
196ADDAVG(addAvg_16x64)
197ADDAVG(addAvg_24x32)
198ADDAVG(addAvg_32x8)
199ADDAVG(addAvg_32x16)
200ADDAVG(addAvg_32x24)
201ADDAVG(addAvg_32x32)
202ADDAVG(addAvg_32x64)
203ADDAVG(addAvg_48x64)
204ADDAVG(addAvg_64x16)
205ADDAVG(addAvg_64x32)
206ADDAVG(addAvg_64x48)
207ADDAVG(addAvg_64x64)
208
209ADDAVG(addAvg_2x16)
210ADDAVG(addAvg_4x32)
211ADDAVG(addAvg_6x16)
212ADDAVG(addAvg_8x12)
213ADDAVG(addAvg_8x64)
214ADDAVG(addAvg_12x32)
215ADDAVG(addAvg_16x24)
216ADDAVG(addAvg_24x64)
217ADDAVG(addAvg_32x48)
218
219void x265_downShift_16_sse2(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
220void x265_upShift_8_sse4(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
221
222#undef DECL_PIXELS
223#undef DECL_HEVC_SSD
224#undef DECL_X1
225#undef DECL_X4
226
227#endif // ifndef X265_I386_PIXEL_H