1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
5 * Praveen Kumar Tiwari <praveen@multicorewareinc.com>
6 * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 * This program is also available under a commercial proprietary license.
23 * For more information, contact us at license @ x265.com.
24 *****************************************************************************/
27 #include "primitives.h"
33 #include "pixel-util.h"
35 #include "ipfilter8.h"
36 #include "loopfilter.h"
37 #include "blockcopy8.h"
38 #include "intrapred.h"
42 #define INIT2_NAME(name1, name2, cpu) \
43 p.name1[LUMA_16x16] = x265_pixel_ ## name2 ## _16x16 ## cpu; \
44 p.name1[LUMA_16x8] = x265_pixel_ ## name2 ## _16x8 ## cpu;
45 #define INIT4_NAME(name1, name2, cpu) \
46 INIT2_NAME(name1, name2, cpu) \
47 p.name1[LUMA_8x16] = x265_pixel_ ## name2 ## _8x16 ## cpu; \
48 p.name1[LUMA_8x8] = x265_pixel_ ## name2 ## _8x8 ## cpu;
49 #define INIT5_NAME(name1, name2, cpu) \
50 INIT4_NAME(name1, name2, cpu) \
51 p.name1[LUMA_8x4] = x265_pixel_ ## name2 ## _8x4 ## cpu;
52 #define INIT6_NAME(name1, name2, cpu) \
53 INIT5_NAME(name1, name2, cpu) \
54 p.name1[LUMA_4x8] = x265_pixel_ ## name2 ## _4x8 ## cpu;
55 #define INIT7_NAME(name1, name2, cpu) \
56 INIT6_NAME(name1, name2, cpu) \
57 p.name1[LUMA_4x4] = x265_pixel_ ## name2 ## _4x4 ## cpu;
58 #define INIT8_NAME(name1, name2, cpu) \
59 INIT7_NAME(name1, name2, cpu) \
60 p.name1[LUMA_4x16] = x265_pixel_ ## name2 ## _4x16 ## cpu;
61 #define INIT2(name, cpu) INIT2_NAME(name, name, cpu)
62 #define INIT4(name, cpu) INIT4_NAME(name, name, cpu)
63 #define INIT5(name, cpu) INIT5_NAME(name, name, cpu)
64 #define INIT6(name, cpu) INIT6_NAME(name, name, cpu)
65 #define INIT7(name, cpu) INIT7_NAME(name, name, cpu)
66 #define INIT8(name, cpu) INIT8_NAME(name, name, cpu)
68 #define HEVC_SATD(cpu) \
69 p.satd[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
70 p.satd[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
71 p.satd[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
72 p.satd[LUMA_8x8] = x265_pixel_satd_8x8_ ## cpu; \
73 p.satd[LUMA_8x16] = x265_pixel_satd_8x16_ ## cpu; \
74 p.satd[LUMA_8x32] = x265_pixel_satd_8x32_ ## cpu; \
75 p.satd[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
76 p.satd[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
77 p.satd[LUMA_16x8] = x265_pixel_satd_16x8_ ## cpu; \
78 p.satd[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
79 p.satd[LUMA_16x16] = x265_pixel_satd_16x16_ ## cpu; \
80 p.satd[LUMA_16x32] = x265_pixel_satd_16x32_ ## cpu; \
81 p.satd[LUMA_16x64] = x265_pixel_satd_16x64_ ## cpu; \
82 p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
83 p.satd[LUMA_32x8] = x265_pixel_satd_32x8_ ## cpu; \
84 p.satd[LUMA_32x16] = x265_pixel_satd_32x16_ ## cpu; \
85 p.satd[LUMA_32x24] = x265_pixel_satd_32x24_ ## cpu; \
86 p.satd[LUMA_32x32] = x265_pixel_satd_32x32_ ## cpu; \
87 p.satd[LUMA_32x64] = x265_pixel_satd_32x64_ ## cpu; \
88 p.satd[LUMA_48x64] = x265_pixel_satd_48x64_ ## cpu; \
89 p.satd[LUMA_64x16] = x265_pixel_satd_64x16_ ## cpu; \
90 p.satd[LUMA_64x32] = x265_pixel_satd_64x32_ ## cpu; \
91 p.satd[LUMA_64x48] = x265_pixel_satd_64x48_ ## cpu; \
92 p.satd[LUMA_64x64] = x265_pixel_satd_64x64_ ## cpu;
95 p.sad_x3[LUMA_16x8] = x265_pixel_sad_x3_16x8_ ## cpu; \
96 p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ ## cpu; \
97 p.sad_x3[LUMA_16x16] = x265_pixel_sad_x3_16x16_ ## cpu; \
98 p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ ## cpu; \
99 p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ ## cpu; \
100 p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ ## cpu; \
101 p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ ## cpu; \
102 p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ ## cpu; \
103 p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ ## cpu; \
104 p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ ## cpu; \
105 p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ ## cpu; \
106 p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ ## cpu; \
107 p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ ## cpu; \
108 p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ ## cpu; \
109 p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ ## cpu; \
110 p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ ## cpu
112 #define SAD_X4(cpu) \
113 p.sad_x4[LUMA_16x8] = x265_pixel_sad_x4_16x8_ ## cpu; \
114 p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ ## cpu; \
115 p.sad_x4[LUMA_16x16] = x265_pixel_sad_x4_16x16_ ## cpu; \
116 p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ ## cpu; \
117 p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ ## cpu; \
118 p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ ## cpu; \
119 p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ ## cpu; \
120 p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ ## cpu; \
121 p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ ## cpu; \
122 p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ ## cpu; \
123 p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ ## cpu; \
124 p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ ## cpu; \
125 p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ ## cpu; \
126 p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ ## cpu; \
127 p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
128 p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
131 p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \
132 p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \
133 p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
134 p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
135 p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
136 p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \
137 p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
138 p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
139 p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
140 p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
141 p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
142 p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
143 p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
144 p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
145 p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
146 p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
147 p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
149 #define ASSGN_SSE(cpu) \
150 p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \
151 p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \
152 p.sse_pp[LUMA_16x16] = x265_pixel_ssd_16x16_ ## cpu; \
153 p.sse_pp[LUMA_16x4] = x265_pixel_ssd_16x4_ ## cpu; \
154 p.sse_pp[LUMA_16x8] = x265_pixel_ssd_16x8_ ## cpu; \
155 p.sse_pp[LUMA_8x16] = x265_pixel_ssd_8x16_ ## cpu; \
156 p.sse_pp[LUMA_16x12] = x265_pixel_ssd_16x12_ ## cpu; \
157 p.sse_pp[LUMA_32x32] = x265_pixel_ssd_32x32_ ## cpu; \
158 p.sse_pp[LUMA_32x16] = x265_pixel_ssd_32x16_ ## cpu; \
159 p.sse_pp[LUMA_16x32] = x265_pixel_ssd_16x32_ ## cpu; \
160 p.sse_pp[LUMA_8x32] = x265_pixel_ssd_8x32_ ## cpu; \
161 p.sse_pp[LUMA_32x8] = x265_pixel_ssd_32x8_ ## cpu; \
162 p.sse_pp[LUMA_32x24] = x265_pixel_ssd_32x24_ ## cpu; \
163 p.sse_pp[LUMA_32x64] = x265_pixel_ssd_32x64_ ## cpu; \
164 p.sse_pp[LUMA_16x64] = x265_pixel_ssd_16x64_ ## cpu
166 #define ASSGN_SSE_SS(cpu) \
167 p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_ ## cpu; \
168 p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_ ## cpu; \
169 p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_ ## cpu; \
170 p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_ ## cpu; \
171 p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_ ## cpu; \
172 p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_ ## cpu; \
173 p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_ ## cpu; \
174 p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_ ## cpu; \
175 p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_ ## cpu; \
176 p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_ ## cpu; \
177 p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_ ## cpu; \
178 p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_ ## cpu; \
179 p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_ ## cpu; \
180 p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_ ## cpu; \
181 p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_ ## cpu; \
182 p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_ ## cpu; \
183 p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_ ## cpu; \
184 p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_ ## cpu; \
185 p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_ ## cpu; \
186 p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_ ## cpu; \
187 p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_ ## cpu; \
188 p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_ ## cpu; \
189 p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_ ## cpu; \
190 p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_ ## cpu; \
191 p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_ ## cpu;
193 #define SA8D_INTER_FROM_BLOCK(cpu) \
194 p.sa8d_inter[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
195 p.sa8d_inter[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
196 p.sa8d_inter[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
197 p.sa8d_inter[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
198 p.sa8d_inter[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
199 p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_ ## cpu; \
200 p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_ ## cpu; \
201 p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
202 p.sa8d_inter[LUMA_16x8] = x265_pixel_sa8d_16x8_ ## cpu; \
203 p.sa8d_inter[LUMA_8x16] = x265_pixel_sa8d_8x16_ ## cpu; \
204 p.sa8d_inter[LUMA_32x24] = x265_pixel_sa8d_32x24_ ## cpu; \
205 p.sa8d_inter[LUMA_24x32] = x265_pixel_sa8d_24x32_ ## cpu; \
206 p.sa8d_inter[LUMA_32x8] = x265_pixel_sa8d_32x8_ ## cpu; \
207 p.sa8d_inter[LUMA_8x32] = x265_pixel_sa8d_8x32_ ## cpu; \
208 p.sa8d_inter[LUMA_32x32] = x265_pixel_sa8d_32x32_ ## cpu; \
209 p.sa8d_inter[LUMA_32x16] = x265_pixel_sa8d_32x16_ ## cpu; \
210 p.sa8d_inter[LUMA_16x32] = x265_pixel_sa8d_16x32_ ## cpu; \
211 p.sa8d_inter[LUMA_64x64] = x265_pixel_sa8d_64x64_ ## cpu; \
212 p.sa8d_inter[LUMA_64x32] = x265_pixel_sa8d_64x32_ ## cpu; \
213 p.sa8d_inter[LUMA_32x64] = x265_pixel_sa8d_32x64_ ## cpu; \
214 p.sa8d_inter[LUMA_64x48] = x265_pixel_sa8d_64x48_ ## cpu; \
215 p.sa8d_inter[LUMA_48x64] = x265_pixel_sa8d_48x64_ ## cpu; \
216 p.sa8d_inter[LUMA_64x16] = x265_pixel_sa8d_64x16_ ## cpu; \
217 p.sa8d_inter[LUMA_16x64] = x265_pixel_sa8d_16x64_ ## cpu;
219 #define PIXEL_AVG(cpu) \
220 p.pixelavg_pp[LUMA_64x64] = x265_pixel_avg_64x64_ ## cpu; \
221 p.pixelavg_pp[LUMA_64x48] = x265_pixel_avg_64x48_ ## cpu; \
222 p.pixelavg_pp[LUMA_64x32] = x265_pixel_avg_64x32_ ## cpu; \
223 p.pixelavg_pp[LUMA_64x16] = x265_pixel_avg_64x16_ ## cpu; \
224 p.pixelavg_pp[LUMA_48x64] = x265_pixel_avg_48x64_ ## cpu; \
225 p.pixelavg_pp[LUMA_32x64] = x265_pixel_avg_32x64_ ## cpu; \
226 p.pixelavg_pp[LUMA_32x32] = x265_pixel_avg_32x32_ ## cpu; \
227 p.pixelavg_pp[LUMA_32x24] = x265_pixel_avg_32x24_ ## cpu; \
228 p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \
229 p.pixelavg_pp[LUMA_32x8] = x265_pixel_avg_32x8_ ## cpu; \
230 p.pixelavg_pp[LUMA_24x32] = x265_pixel_avg_24x32_ ## cpu; \
231 p.pixelavg_pp[LUMA_16x64] = x265_pixel_avg_16x64_ ## cpu; \
232 p.pixelavg_pp[LUMA_16x32] = x265_pixel_avg_16x32_ ## cpu; \
233 p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
234 p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \
235 p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
236 p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \
237 p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
238 p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \
239 p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
240 p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
241 p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu;
243 #define PIXEL_AVG_W4(cpu) \
244 p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_ ## cpu; \
245 p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_ ## cpu; \
246 p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu;
248 #define SETUP_CHROMA_FUNC_DEF_420(W, H, cpu) \
249 p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
250 p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
251 p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
252 p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
254 #define SETUP_CHROMA_FUNC_DEF_422(W, H, cpu) \
255 p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
256 p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
257 p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
258 p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
260 #define SETUP_CHROMA_FUNC_DEF_444(W, H, cpu) \
261 p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
262 p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
263 p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
264 p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
266 #define SETUP_CHROMA_SP_FUNC_DEF_420(W, H, cpu) \
267 p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
269 #define SETUP_CHROMA_SP_FUNC_DEF_422(W, H, cpu) \
270 p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
272 #define SETUP_CHROMA_SP_FUNC_DEF_444(W, H, cpu) \
273 p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
275 #define SETUP_CHROMA_SS_FUNC_DEF_420(W, H, cpu) \
276 p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
278 #define SETUP_CHROMA_SS_FUNC_DEF_422(W, H, cpu) \
279 p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
281 #define SETUP_CHROMA_SS_FUNC_DEF_444(W, H, cpu) \
282 p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
284 #define CHROMA_FILTERS_420(cpu) \
285 SETUP_CHROMA_FUNC_DEF_420(4, 4, cpu); \
286 SETUP_CHROMA_FUNC_DEF_420(4, 2, cpu); \
287 SETUP_CHROMA_FUNC_DEF_420(2, 4, cpu); \
288 SETUP_CHROMA_FUNC_DEF_420(8, 8, cpu); \
289 SETUP_CHROMA_FUNC_DEF_420(8, 4, cpu); \
290 SETUP_CHROMA_FUNC_DEF_420(4, 8, cpu); \
291 SETUP_CHROMA_FUNC_DEF_420(8, 6, cpu); \
292 SETUP_CHROMA_FUNC_DEF_420(6, 8, cpu); \
293 SETUP_CHROMA_FUNC_DEF_420(8, 2, cpu); \
294 SETUP_CHROMA_FUNC_DEF_420(2, 8, cpu); \
295 SETUP_CHROMA_FUNC_DEF_420(16, 16, cpu); \
296 SETUP_CHROMA_FUNC_DEF_420(16, 8, cpu); \
297 SETUP_CHROMA_FUNC_DEF_420(8, 16, cpu); \
298 SETUP_CHROMA_FUNC_DEF_420(16, 12, cpu); \
299 SETUP_CHROMA_FUNC_DEF_420(12, 16, cpu); \
300 SETUP_CHROMA_FUNC_DEF_420(16, 4, cpu); \
301 SETUP_CHROMA_FUNC_DEF_420(4, 16, cpu); \
302 SETUP_CHROMA_FUNC_DEF_420(32, 32, cpu); \
303 SETUP_CHROMA_FUNC_DEF_420(32, 16, cpu); \
304 SETUP_CHROMA_FUNC_DEF_420(16, 32, cpu); \
305 SETUP_CHROMA_FUNC_DEF_420(32, 24, cpu); \
306 SETUP_CHROMA_FUNC_DEF_420(24, 32, cpu); \
307 SETUP_CHROMA_FUNC_DEF_420(32, 8, cpu); \
308 SETUP_CHROMA_FUNC_DEF_420(8, 32, cpu);
310 #define CHROMA_FILTERS_422(cpu) \
311 SETUP_CHROMA_FUNC_DEF_422(4, 8, cpu); \
312 SETUP_CHROMA_FUNC_DEF_422(4, 4, cpu); \
313 SETUP_CHROMA_FUNC_DEF_422(2, 8, cpu); \
314 SETUP_CHROMA_FUNC_DEF_422(8, 16, cpu); \
315 SETUP_CHROMA_FUNC_DEF_422(8, 8, cpu); \
316 SETUP_CHROMA_FUNC_DEF_422(4, 16, cpu); \
317 SETUP_CHROMA_FUNC_DEF_422(8, 12, cpu); \
318 SETUP_CHROMA_FUNC_DEF_422(6, 16, cpu); \
319 SETUP_CHROMA_FUNC_DEF_422(8, 4, cpu); \
320 SETUP_CHROMA_FUNC_DEF_422(2, 16, cpu); \
321 SETUP_CHROMA_FUNC_DEF_422(16, 32, cpu); \
322 SETUP_CHROMA_FUNC_DEF_422(16, 16, cpu); \
323 SETUP_CHROMA_FUNC_DEF_422(8, 32, cpu); \
324 SETUP_CHROMA_FUNC_DEF_422(16, 24, cpu); \
325 SETUP_CHROMA_FUNC_DEF_422(12, 32, cpu); \
326 SETUP_CHROMA_FUNC_DEF_422(16, 8, cpu); \
327 SETUP_CHROMA_FUNC_DEF_422(4, 32, cpu); \
328 SETUP_CHROMA_FUNC_DEF_422(32, 64, cpu); \
329 SETUP_CHROMA_FUNC_DEF_422(32, 32, cpu); \
330 SETUP_CHROMA_FUNC_DEF_422(16, 64, cpu); \
331 SETUP_CHROMA_FUNC_DEF_422(32, 48, cpu); \
332 SETUP_CHROMA_FUNC_DEF_422(24, 64, cpu); \
333 SETUP_CHROMA_FUNC_DEF_422(32, 16, cpu); \
334 SETUP_CHROMA_FUNC_DEF_422(8, 64, cpu);
336 #define CHROMA_FILTERS_444(cpu) \
337 SETUP_CHROMA_FUNC_DEF_444(8, 8, cpu); \
338 SETUP_CHROMA_FUNC_DEF_444(8, 4, cpu); \
339 SETUP_CHROMA_FUNC_DEF_444(4, 8, cpu); \
340 SETUP_CHROMA_FUNC_DEF_444(16, 16, cpu); \
341 SETUP_CHROMA_FUNC_DEF_444(16, 8, cpu); \
342 SETUP_CHROMA_FUNC_DEF_444(8, 16, cpu); \
343 SETUP_CHROMA_FUNC_DEF_444(16, 12, cpu); \
344 SETUP_CHROMA_FUNC_DEF_444(12, 16, cpu); \
345 SETUP_CHROMA_FUNC_DEF_444(16, 4, cpu); \
346 SETUP_CHROMA_FUNC_DEF_444(4, 16, cpu); \
347 SETUP_CHROMA_FUNC_DEF_444(32, 32, cpu); \
348 SETUP_CHROMA_FUNC_DEF_444(32, 16, cpu); \
349 SETUP_CHROMA_FUNC_DEF_444(16, 32, cpu); \
350 SETUP_CHROMA_FUNC_DEF_444(32, 24, cpu); \
351 SETUP_CHROMA_FUNC_DEF_444(24, 32, cpu); \
352 SETUP_CHROMA_FUNC_DEF_444(32, 8, cpu); \
353 SETUP_CHROMA_FUNC_DEF_444(8, 32, cpu); \
354 SETUP_CHROMA_FUNC_DEF_444(64, 64, cpu); \
355 SETUP_CHROMA_FUNC_DEF_444(64, 32, cpu); \
356 SETUP_CHROMA_FUNC_DEF_444(32, 64, cpu); \
357 SETUP_CHROMA_FUNC_DEF_444(64, 48, cpu); \
358 SETUP_CHROMA_FUNC_DEF_444(48, 64, cpu); \
359 SETUP_CHROMA_FUNC_DEF_444(64, 16, cpu); \
360 SETUP_CHROMA_FUNC_DEF_444(16, 64, cpu);
362 #define CHROMA_SP_FILTERS_SSE4_420(cpu) \
363 SETUP_CHROMA_SP_FUNC_DEF_420(4, 4, cpu); \
364 SETUP_CHROMA_SP_FUNC_DEF_420(4, 2, cpu); \
365 SETUP_CHROMA_SP_FUNC_DEF_420(2, 4, cpu); \
366 SETUP_CHROMA_SP_FUNC_DEF_420(4, 8, cpu); \
367 SETUP_CHROMA_SP_FUNC_DEF_420(6, 8, cpu); \
368 SETUP_CHROMA_SP_FUNC_DEF_420(2, 8, cpu); \
369 SETUP_CHROMA_SP_FUNC_DEF_420(16, 16, cpu); \
370 SETUP_CHROMA_SP_FUNC_DEF_420(16, 8, cpu); \
371 SETUP_CHROMA_SP_FUNC_DEF_420(16, 12, cpu); \
372 SETUP_CHROMA_SP_FUNC_DEF_420(12, 16, cpu); \
373 SETUP_CHROMA_SP_FUNC_DEF_420(16, 4, cpu); \
374 SETUP_CHROMA_SP_FUNC_DEF_420(4, 16, cpu); \
375 SETUP_CHROMA_SP_FUNC_DEF_420(32, 32, cpu); \
376 SETUP_CHROMA_SP_FUNC_DEF_420(32, 16, cpu); \
377 SETUP_CHROMA_SP_FUNC_DEF_420(16, 32, cpu); \
378 SETUP_CHROMA_SP_FUNC_DEF_420(32, 24, cpu); \
379 SETUP_CHROMA_SP_FUNC_DEF_420(24, 32, cpu); \
380 SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu);
382 #define CHROMA_SP_FILTERS_420(cpu) \
383 SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \
384 SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \
385 SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \
386 SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \
387 SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \
388 SETUP_CHROMA_SP_FUNC_DEF_420(8, 32, cpu);
390 #define CHROMA_SP_FILTERS_SSE4_422(cpu) \
391 SETUP_CHROMA_SP_FUNC_DEF_422(4, 8, cpu); \
392 SETUP_CHROMA_SP_FUNC_DEF_422(4, 4, cpu); \
393 SETUP_CHROMA_SP_FUNC_DEF_422(2, 8, cpu); \
394 SETUP_CHROMA_SP_FUNC_DEF_422(4, 16, cpu); \
395 SETUP_CHROMA_SP_FUNC_DEF_422(6, 16, cpu); \
396 SETUP_CHROMA_SP_FUNC_DEF_422(2, 16, cpu); \
397 SETUP_CHROMA_SP_FUNC_DEF_422(16, 32, cpu); \
398 SETUP_CHROMA_SP_FUNC_DEF_422(16, 16, cpu); \
399 SETUP_CHROMA_SP_FUNC_DEF_422(16, 24, cpu); \
400 SETUP_CHROMA_SP_FUNC_DEF_422(12, 32, cpu); \
401 SETUP_CHROMA_SP_FUNC_DEF_422(16, 8, cpu); \
402 SETUP_CHROMA_SP_FUNC_DEF_422(4, 32, cpu); \
403 SETUP_CHROMA_SP_FUNC_DEF_422(32, 64, cpu); \
404 SETUP_CHROMA_SP_FUNC_DEF_422(32, 32, cpu); \
405 SETUP_CHROMA_SP_FUNC_DEF_422(16, 64, cpu); \
406 SETUP_CHROMA_SP_FUNC_DEF_422(32, 48, cpu); \
407 SETUP_CHROMA_SP_FUNC_DEF_422(24, 64, cpu); \
408 SETUP_CHROMA_SP_FUNC_DEF_422(32, 16, cpu);
410 #define CHROMA_SP_FILTERS_422(cpu) \
411 SETUP_CHROMA_SP_FUNC_DEF_422(8, 4, cpu); \
412 SETUP_CHROMA_SP_FUNC_DEF_422(8, 8, cpu); \
413 SETUP_CHROMA_SP_FUNC_DEF_422(8, 12, cpu); \
414 SETUP_CHROMA_SP_FUNC_DEF_422(8, 16, cpu); \
415 SETUP_CHROMA_SP_FUNC_DEF_422(8, 32, cpu); \
416 SETUP_CHROMA_SP_FUNC_DEF_422(8, 64, cpu);
418 #define CHROMA_SP_FILTERS_SSE4_444(cpu) \
419 SETUP_CHROMA_SP_FUNC_DEF_444(4, 8, cpu); \
420 SETUP_CHROMA_SP_FUNC_DEF_444(16, 16, cpu); \
421 SETUP_CHROMA_SP_FUNC_DEF_444(16, 8, cpu); \
422 SETUP_CHROMA_SP_FUNC_DEF_444(16, 12, cpu); \
423 SETUP_CHROMA_SP_FUNC_DEF_444(12, 16, cpu); \
424 SETUP_CHROMA_SP_FUNC_DEF_444(16, 4, cpu); \
425 SETUP_CHROMA_SP_FUNC_DEF_444(4, 16, cpu); \
426 SETUP_CHROMA_SP_FUNC_DEF_444(32, 32, cpu); \
427 SETUP_CHROMA_SP_FUNC_DEF_444(32, 16, cpu); \
428 SETUP_CHROMA_SP_FUNC_DEF_444(16, 32, cpu); \
429 SETUP_CHROMA_SP_FUNC_DEF_444(32, 24, cpu); \
430 SETUP_CHROMA_SP_FUNC_DEF_444(24, 32, cpu); \
431 SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu); \
432 SETUP_CHROMA_SP_FUNC_DEF_444(64, 64, cpu); \
433 SETUP_CHROMA_SP_FUNC_DEF_444(64, 32, cpu); \
434 SETUP_CHROMA_SP_FUNC_DEF_444(32, 64, cpu); \
435 SETUP_CHROMA_SP_FUNC_DEF_444(64, 48, cpu); \
436 SETUP_CHROMA_SP_FUNC_DEF_444(48, 64, cpu); \
437 SETUP_CHROMA_SP_FUNC_DEF_444(64, 16, cpu); \
438 SETUP_CHROMA_SP_FUNC_DEF_444(16, 64, cpu);
440 #define CHROMA_SP_FILTERS_444(cpu) \
441 SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \
442 SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \
443 SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \
444 SETUP_CHROMA_SP_FUNC_DEF_444(8, 32, cpu);
446 #define CHROMA_SS_FILTERS_420(cpu) \
447 SETUP_CHROMA_SS_FUNC_DEF_420(4, 4, cpu); \
448 SETUP_CHROMA_SS_FUNC_DEF_420(4, 2, cpu); \
449 SETUP_CHROMA_SS_FUNC_DEF_420(8, 8, cpu); \
450 SETUP_CHROMA_SS_FUNC_DEF_420(8, 4, cpu); \
451 SETUP_CHROMA_SS_FUNC_DEF_420(4, 8, cpu); \
452 SETUP_CHROMA_SS_FUNC_DEF_420(8, 6, cpu); \
453 SETUP_CHROMA_SS_FUNC_DEF_420(8, 2, cpu); \
454 SETUP_CHROMA_SS_FUNC_DEF_420(16, 16, cpu); \
455 SETUP_CHROMA_SS_FUNC_DEF_420(16, 8, cpu); \
456 SETUP_CHROMA_SS_FUNC_DEF_420(8, 16, cpu); \
457 SETUP_CHROMA_SS_FUNC_DEF_420(16, 12, cpu); \
458 SETUP_CHROMA_SS_FUNC_DEF_420(12, 16, cpu); \
459 SETUP_CHROMA_SS_FUNC_DEF_420(16, 4, cpu); \
460 SETUP_CHROMA_SS_FUNC_DEF_420(4, 16, cpu); \
461 SETUP_CHROMA_SS_FUNC_DEF_420(32, 32, cpu); \
462 SETUP_CHROMA_SS_FUNC_DEF_420(32, 16, cpu); \
463 SETUP_CHROMA_SS_FUNC_DEF_420(16, 32, cpu); \
464 SETUP_CHROMA_SS_FUNC_DEF_420(32, 24, cpu); \
465 SETUP_CHROMA_SS_FUNC_DEF_420(24, 32, cpu); \
466 SETUP_CHROMA_SS_FUNC_DEF_420(32, 8, cpu); \
467 SETUP_CHROMA_SS_FUNC_DEF_420(8, 32, cpu);
469 #define CHROMA_SS_FILTERS_SSE4_420(cpu) \
470 SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \
471 SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \
472 SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu);
474 #define CHROMA_SS_FILTERS_422(cpu) \
475 SETUP_CHROMA_SS_FUNC_DEF_422(4, 8, cpu); \
476 SETUP_CHROMA_SS_FUNC_DEF_422(4, 4, cpu); \
477 SETUP_CHROMA_SS_FUNC_DEF_422(8, 16, cpu); \
478 SETUP_CHROMA_SS_FUNC_DEF_422(8, 8, cpu); \
479 SETUP_CHROMA_SS_FUNC_DEF_422(4, 16, cpu); \
480 SETUP_CHROMA_SS_FUNC_DEF_422(8, 12, cpu); \
481 SETUP_CHROMA_SS_FUNC_DEF_422(8, 4, cpu); \
482 SETUP_CHROMA_SS_FUNC_DEF_422(16, 32, cpu); \
483 SETUP_CHROMA_SS_FUNC_DEF_422(16, 16, cpu); \
484 SETUP_CHROMA_SS_FUNC_DEF_422(8, 32, cpu); \
485 SETUP_CHROMA_SS_FUNC_DEF_422(16, 24, cpu); \
486 SETUP_CHROMA_SS_FUNC_DEF_422(12, 32, cpu); \
487 SETUP_CHROMA_SS_FUNC_DEF_422(16, 8, cpu); \
488 SETUP_CHROMA_SS_FUNC_DEF_422(4, 32, cpu); \
489 SETUP_CHROMA_SS_FUNC_DEF_422(32, 64, cpu); \
490 SETUP_CHROMA_SS_FUNC_DEF_422(32, 32, cpu); \
491 SETUP_CHROMA_SS_FUNC_DEF_422(16, 64, cpu); \
492 SETUP_CHROMA_SS_FUNC_DEF_422(32, 48, cpu); \
493 SETUP_CHROMA_SS_FUNC_DEF_422(24, 64, cpu); \
494 SETUP_CHROMA_SS_FUNC_DEF_422(32, 16, cpu); \
495 SETUP_CHROMA_SS_FUNC_DEF_422(8, 64, cpu);
497 #define CHROMA_SS_FILTERS_SSE4_422(cpu) \
498 SETUP_CHROMA_SS_FUNC_DEF_422(2, 8, cpu); \
499 SETUP_CHROMA_SS_FUNC_DEF_422(2, 16, cpu); \
500 SETUP_CHROMA_SS_FUNC_DEF_422(6, 16, cpu);
502 #define CHROMA_SS_FILTERS_444(cpu) \
503 SETUP_CHROMA_SS_FUNC_DEF_444(8, 8, cpu); \
504 SETUP_CHROMA_SS_FUNC_DEF_444(8, 4, cpu); \
505 SETUP_CHROMA_SS_FUNC_DEF_444(4, 8, cpu); \
506 SETUP_CHROMA_SS_FUNC_DEF_444(16, 16, cpu); \
507 SETUP_CHROMA_SS_FUNC_DEF_444(16, 8, cpu); \
508 SETUP_CHROMA_SS_FUNC_DEF_444(8, 16, cpu); \
509 SETUP_CHROMA_SS_FUNC_DEF_444(16, 12, cpu); \
510 SETUP_CHROMA_SS_FUNC_DEF_444(12, 16, cpu); \
511 SETUP_CHROMA_SS_FUNC_DEF_444(16, 4, cpu); \
512 SETUP_CHROMA_SS_FUNC_DEF_444(4, 16, cpu); \
513 SETUP_CHROMA_SS_FUNC_DEF_444(32, 32, cpu); \
514 SETUP_CHROMA_SS_FUNC_DEF_444(32, 16, cpu); \
515 SETUP_CHROMA_SS_FUNC_DEF_444(16, 32, cpu); \
516 SETUP_CHROMA_SS_FUNC_DEF_444(32, 24, cpu); \
517 SETUP_CHROMA_SS_FUNC_DEF_444(24, 32, cpu); \
518 SETUP_CHROMA_SS_FUNC_DEF_444(32, 8, cpu); \
519 SETUP_CHROMA_SS_FUNC_DEF_444(8, 32, cpu); \
520 SETUP_CHROMA_SS_FUNC_DEF_444(64, 64, cpu); \
521 SETUP_CHROMA_SS_FUNC_DEF_444(64, 32, cpu); \
522 SETUP_CHROMA_SS_FUNC_DEF_444(32, 64, cpu); \
523 SETUP_CHROMA_SS_FUNC_DEF_444(64, 48, cpu); \
524 SETUP_CHROMA_SS_FUNC_DEF_444(48, 64, cpu); \
525 SETUP_CHROMA_SS_FUNC_DEF_444(64, 16, cpu); \
526 SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu);
528 #if HIGH_BIT_DEPTH // temporary, until all 10bit functions are completed
529 #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
530 p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
531 p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
532 p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
533 p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
534 p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
536 #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
537 p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
538 p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
539 p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
540 p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;
541 #endif // if HIGH_BIT_DEPTH
543 #define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
544 p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
545 p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
547 #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
548 p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
550 #define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
551 p.luma_vss[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
553 #define SETUP_LUMA_BLOCKCOPY(type, W, H, cpu) \
554 p.luma_copy_ ## type[LUMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
556 #define SETUP_CHROMA_BLOCKCOPY(type, W, H, cpu) \
557 p.chroma[X265_CSP_I420].copy_ ## type[CHROMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
559 #define CHROMA_BLOCKCOPY(type, cpu) \
560 SETUP_CHROMA_BLOCKCOPY(type, 2, 4, cpu); \
561 SETUP_CHROMA_BLOCKCOPY(type, 2, 8, cpu); \
562 SETUP_CHROMA_BLOCKCOPY(type, 4, 2, cpu); \
563 SETUP_CHROMA_BLOCKCOPY(type, 4, 4, cpu); \
564 SETUP_CHROMA_BLOCKCOPY(type, 4, 8, cpu); \
565 SETUP_CHROMA_BLOCKCOPY(type, 4, 16, cpu); \
566 SETUP_CHROMA_BLOCKCOPY(type, 6, 8, cpu); \
567 SETUP_CHROMA_BLOCKCOPY(type, 8, 2, cpu); \
568 SETUP_CHROMA_BLOCKCOPY(type, 8, 4, cpu); \
569 SETUP_CHROMA_BLOCKCOPY(type, 8, 6, cpu); \
570 SETUP_CHROMA_BLOCKCOPY(type, 8, 8, cpu); \
571 SETUP_CHROMA_BLOCKCOPY(type, 8, 16, cpu); \
572 SETUP_CHROMA_BLOCKCOPY(type, 8, 32, cpu); \
573 SETUP_CHROMA_BLOCKCOPY(type, 12, 16, cpu); \
574 SETUP_CHROMA_BLOCKCOPY(type, 16, 4, cpu); \
575 SETUP_CHROMA_BLOCKCOPY(type, 16, 8, cpu); \
576 SETUP_CHROMA_BLOCKCOPY(type, 16, 12, cpu); \
577 SETUP_CHROMA_BLOCKCOPY(type, 16, 16, cpu); \
578 SETUP_CHROMA_BLOCKCOPY(type, 16, 32, cpu); \
579 SETUP_CHROMA_BLOCKCOPY(type, 24, 32, cpu); \
580 SETUP_CHROMA_BLOCKCOPY(type, 32, 8, cpu); \
581 SETUP_CHROMA_BLOCKCOPY(type, 32, 16, cpu); \
582 SETUP_CHROMA_BLOCKCOPY(type, 32, 24, cpu); \
583 SETUP_CHROMA_BLOCKCOPY(type, 32, 32, cpu);
585 #define SETUP_CHROMA_BLOCKCOPY_422(type, W, H, cpu) \
586 p.chroma[X265_CSP_I422].copy_ ## type[CHROMA422_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
588 #define CHROMA_BLOCKCOPY_422(type, cpu) \
589 SETUP_CHROMA_BLOCKCOPY_422(type, 2, 8, cpu); \
590 SETUP_CHROMA_BLOCKCOPY_422(type, 2, 16, cpu); \
591 SETUP_CHROMA_BLOCKCOPY_422(type, 4, 4, cpu); \
592 SETUP_CHROMA_BLOCKCOPY_422(type, 4, 8, cpu); \
593 SETUP_CHROMA_BLOCKCOPY_422(type, 4, 16, cpu); \
594 SETUP_CHROMA_BLOCKCOPY_422(type, 4, 32, cpu); \
595 SETUP_CHROMA_BLOCKCOPY_422(type, 6, 16, cpu); \
596 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 4, cpu); \
597 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 8, cpu); \
598 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 12, cpu); \
599 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 16, cpu); \
600 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 32, cpu); \
601 SETUP_CHROMA_BLOCKCOPY_422(type, 8, 64, cpu); \
602 SETUP_CHROMA_BLOCKCOPY_422(type, 12, 32, cpu); \
603 SETUP_CHROMA_BLOCKCOPY_422(type, 16, 8, cpu); \
604 SETUP_CHROMA_BLOCKCOPY_422(type, 16, 16, cpu); \
605 SETUP_CHROMA_BLOCKCOPY_422(type, 16, 24, cpu); \
606 SETUP_CHROMA_BLOCKCOPY_422(type, 16, 32, cpu); \
607 SETUP_CHROMA_BLOCKCOPY_422(type, 16, 64, cpu); \
608 SETUP_CHROMA_BLOCKCOPY_422(type, 24, 64, cpu); \
609 SETUP_CHROMA_BLOCKCOPY_422(type, 32, 16, cpu); \
610 SETUP_CHROMA_BLOCKCOPY_422(type, 32, 32, cpu); \
611 SETUP_CHROMA_BLOCKCOPY_422(type, 32, 48, cpu); \
612 SETUP_CHROMA_BLOCKCOPY_422(type, 32, 64, cpu);
614 #define LUMA_BLOCKCOPY(type, cpu) \
615 SETUP_LUMA_BLOCKCOPY(type, 4, 4, cpu); \
616 SETUP_LUMA_BLOCKCOPY(type, 8, 8, cpu); \
617 SETUP_LUMA_BLOCKCOPY(type, 8, 4, cpu); \
618 SETUP_LUMA_BLOCKCOPY(type, 4, 8, cpu); \
619 SETUP_LUMA_BLOCKCOPY(type, 16, 16, cpu); \
620 SETUP_LUMA_BLOCKCOPY(type, 16, 8, cpu); \
621 SETUP_LUMA_BLOCKCOPY(type, 8, 16, cpu); \
622 SETUP_LUMA_BLOCKCOPY(type, 16, 12, cpu); \
623 SETUP_LUMA_BLOCKCOPY(type, 12, 16, cpu); \
624 SETUP_LUMA_BLOCKCOPY(type, 16, 4, cpu); \
625 SETUP_LUMA_BLOCKCOPY(type, 4, 16, cpu); \
626 SETUP_LUMA_BLOCKCOPY(type, 32, 32, cpu); \
627 SETUP_LUMA_BLOCKCOPY(type, 32, 16, cpu); \
628 SETUP_LUMA_BLOCKCOPY(type, 16, 32, cpu); \
629 SETUP_LUMA_BLOCKCOPY(type, 32, 24, cpu); \
630 SETUP_LUMA_BLOCKCOPY(type, 24, 32, cpu); \
631 SETUP_LUMA_BLOCKCOPY(type, 32, 8, cpu); \
632 SETUP_LUMA_BLOCKCOPY(type, 8, 32, cpu); \
633 SETUP_LUMA_BLOCKCOPY(type, 64, 64, cpu); \
634 SETUP_LUMA_BLOCKCOPY(type, 64, 32, cpu); \
635 SETUP_LUMA_BLOCKCOPY(type, 32, 64, cpu); \
636 SETUP_LUMA_BLOCKCOPY(type, 64, 48, cpu); \
637 SETUP_LUMA_BLOCKCOPY(type, 48, 64, cpu); \
638 SETUP_LUMA_BLOCKCOPY(type, 64, 16, cpu); \
639 SETUP_LUMA_BLOCKCOPY(type, 16, 64, cpu);
641 #define SETUP_CHROMA_BLOCKCOPY_SP(W, H, cpu) \
642 p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
644 #define CHROMA_BLOCKCOPY_SP(cpu) \
645 SETUP_CHROMA_BLOCKCOPY_SP(2, 4, cpu); \
646 SETUP_CHROMA_BLOCKCOPY_SP(2, 8, cpu); \
647 SETUP_CHROMA_BLOCKCOPY_SP(4, 2, cpu); \
648 SETUP_CHROMA_BLOCKCOPY_SP(4, 4, cpu); \
649 SETUP_CHROMA_BLOCKCOPY_SP(4, 8, cpu); \
650 SETUP_CHROMA_BLOCKCOPY_SP(4, 16, cpu); \
651 SETUP_CHROMA_BLOCKCOPY_SP(6, 8, cpu); \
652 SETUP_CHROMA_BLOCKCOPY_SP(8, 2, cpu); \
653 SETUP_CHROMA_BLOCKCOPY_SP(8, 4, cpu); \
654 SETUP_CHROMA_BLOCKCOPY_SP(8, 6, cpu); \
655 SETUP_CHROMA_BLOCKCOPY_SP(8, 8, cpu); \
656 SETUP_CHROMA_BLOCKCOPY_SP(8, 16, cpu); \
657 SETUP_CHROMA_BLOCKCOPY_SP(8, 32, cpu); \
658 SETUP_CHROMA_BLOCKCOPY_SP(12, 16, cpu); \
659 SETUP_CHROMA_BLOCKCOPY_SP(16, 4, cpu); \
660 SETUP_CHROMA_BLOCKCOPY_SP(16, 8, cpu); \
661 SETUP_CHROMA_BLOCKCOPY_SP(16, 12, cpu); \
662 SETUP_CHROMA_BLOCKCOPY_SP(16, 16, cpu); \
663 SETUP_CHROMA_BLOCKCOPY_SP(16, 32, cpu); \
664 SETUP_CHROMA_BLOCKCOPY_SP(24, 32, cpu); \
665 SETUP_CHROMA_BLOCKCOPY_SP(32, 8, cpu); \
666 SETUP_CHROMA_BLOCKCOPY_SP(32, 16, cpu); \
667 SETUP_CHROMA_BLOCKCOPY_SP(32, 24, cpu); \
668 SETUP_CHROMA_BLOCKCOPY_SP(32, 32, cpu);
670 #define SETUP_CHROMA_BLOCKCOPY_SP_422(W, H, cpu) \
671 p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
673 #define CHROMA_BLOCKCOPY_SP_422(cpu) \
674 SETUP_CHROMA_BLOCKCOPY_SP_422(2, 8, cpu); \
675 SETUP_CHROMA_BLOCKCOPY_SP_422(2, 16, cpu); \
676 SETUP_CHROMA_BLOCKCOPY_SP_422(4, 4, cpu); \
677 SETUP_CHROMA_BLOCKCOPY_SP_422(4, 8, cpu); \
678 SETUP_CHROMA_BLOCKCOPY_SP_422(4, 16, cpu); \
679 SETUP_CHROMA_BLOCKCOPY_SP_422(4, 32, cpu); \
680 SETUP_CHROMA_BLOCKCOPY_SP_422(6, 16, cpu); \
681 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 4, cpu); \
682 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 8, cpu); \
683 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 12, cpu); \
684 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 16, cpu); \
685 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 32, cpu); \
686 SETUP_CHROMA_BLOCKCOPY_SP_422(8, 64, cpu); \
687 SETUP_CHROMA_BLOCKCOPY_SP_422(12, 32, cpu); \
688 SETUP_CHROMA_BLOCKCOPY_SP_422(16, 8, cpu); \
689 SETUP_CHROMA_BLOCKCOPY_SP_422(16, 16, cpu); \
690 SETUP_CHROMA_BLOCKCOPY_SP_422(16, 24, cpu); \
691 SETUP_CHROMA_BLOCKCOPY_SP_422(16, 32, cpu); \
692 SETUP_CHROMA_BLOCKCOPY_SP_422(16, 64, cpu); \
693 SETUP_CHROMA_BLOCKCOPY_SP_422(24, 64, cpu); \
694 SETUP_CHROMA_BLOCKCOPY_SP_422(32, 16, cpu); \
695 SETUP_CHROMA_BLOCKCOPY_SP_422(32, 32, cpu); \
696 SETUP_CHROMA_BLOCKCOPY_SP_422(32, 48, cpu); \
697 SETUP_CHROMA_BLOCKCOPY_SP_422(32, 64, cpu);
699 #define SETUP_CHROMA_PIXELSUB(W, H, cpu) \
700 p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
701 p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
703 #define CHROMA_PIXELSUB_PS(cpu) \
704 SETUP_CHROMA_PIXELSUB(4, 4, cpu); \
705 SETUP_CHROMA_PIXELSUB(8, 8, cpu); \
706 SETUP_CHROMA_PIXELSUB(16, 16, cpu); \
707 SETUP_CHROMA_PIXELSUB(32, 32, cpu);
709 #define SETUP_CHROMA_PIXELSUB_422(W, H, cpu) \
710 p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
711 p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
713 #define CHROMA_PIXELSUB_PS_422(cpu) \
714 SETUP_CHROMA_PIXELSUB_422(4, 8, cpu); \
715 SETUP_CHROMA_PIXELSUB_422(8, 16, cpu); \
716 SETUP_CHROMA_PIXELSUB_422(16, 32, cpu); \
717 SETUP_CHROMA_PIXELSUB_422(32, 64, cpu);
719 #define LUMA_FILTERS(cpu) \
720 SETUP_LUMA_FUNC_DEF(4, 4, cpu); \
721 SETUP_LUMA_FUNC_DEF(8, 8, cpu); \
722 SETUP_LUMA_FUNC_DEF(8, 4, cpu); \
723 SETUP_LUMA_FUNC_DEF(4, 8, cpu); \
724 SETUP_LUMA_FUNC_DEF(16, 16, cpu); \
725 SETUP_LUMA_FUNC_DEF(16, 8, cpu); \
726 SETUP_LUMA_FUNC_DEF(8, 16, cpu); \
727 SETUP_LUMA_FUNC_DEF(16, 12, cpu); \
728 SETUP_LUMA_FUNC_DEF(12, 16, cpu); \
729 SETUP_LUMA_FUNC_DEF(16, 4, cpu); \
730 SETUP_LUMA_FUNC_DEF(4, 16, cpu); \
731 SETUP_LUMA_FUNC_DEF(32, 32, cpu); \
732 SETUP_LUMA_FUNC_DEF(32, 16, cpu); \
733 SETUP_LUMA_FUNC_DEF(16, 32, cpu); \
734 SETUP_LUMA_FUNC_DEF(32, 24, cpu); \
735 SETUP_LUMA_FUNC_DEF(24, 32, cpu); \
736 SETUP_LUMA_FUNC_DEF(32, 8, cpu); \
737 SETUP_LUMA_FUNC_DEF(8, 32, cpu); \
738 SETUP_LUMA_FUNC_DEF(64, 64, cpu); \
739 SETUP_LUMA_FUNC_DEF(64, 32, cpu); \
740 SETUP_LUMA_FUNC_DEF(32, 64, cpu); \
741 SETUP_LUMA_FUNC_DEF(64, 48, cpu); \
742 SETUP_LUMA_FUNC_DEF(48, 64, cpu); \
743 SETUP_LUMA_FUNC_DEF(64, 16, cpu); \
744 SETUP_LUMA_FUNC_DEF(16, 64, cpu);
746 #define LUMA_PIXELSUB(cpu) \
747 SETUP_LUMA_SUB_FUNC_DEF(4, 4, cpu); \
748 SETUP_LUMA_SUB_FUNC_DEF(8, 8, cpu); \
749 SETUP_LUMA_SUB_FUNC_DEF(16, 16, cpu); \
750 SETUP_LUMA_SUB_FUNC_DEF(32, 32, cpu); \
751 SETUP_LUMA_SUB_FUNC_DEF(64, 64, cpu);
753 #define LUMA_SP_FILTERS(cpu) \
754 SETUP_LUMA_SP_FUNC_DEF(4, 4, cpu); \
755 SETUP_LUMA_SP_FUNC_DEF(8, 8, cpu); \
756 SETUP_LUMA_SP_FUNC_DEF(8, 4, cpu); \
757 SETUP_LUMA_SP_FUNC_DEF(4, 8, cpu); \
758 SETUP_LUMA_SP_FUNC_DEF(16, 16, cpu); \
759 SETUP_LUMA_SP_FUNC_DEF(16, 8, cpu); \
760 SETUP_LUMA_SP_FUNC_DEF(8, 16, cpu); \
761 SETUP_LUMA_SP_FUNC_DEF(16, 12, cpu); \
762 SETUP_LUMA_SP_FUNC_DEF(12, 16, cpu); \
763 SETUP_LUMA_SP_FUNC_DEF(16, 4, cpu); \
764 SETUP_LUMA_SP_FUNC_DEF(4, 16, cpu); \
765 SETUP_LUMA_SP_FUNC_DEF(32, 32, cpu); \
766 SETUP_LUMA_SP_FUNC_DEF(32, 16, cpu); \
767 SETUP_LUMA_SP_FUNC_DEF(16, 32, cpu); \
768 SETUP_LUMA_SP_FUNC_DEF(32, 24, cpu); \
769 SETUP_LUMA_SP_FUNC_DEF(24, 32, cpu); \
770 SETUP_LUMA_SP_FUNC_DEF(32, 8, cpu); \
771 SETUP_LUMA_SP_FUNC_DEF(8, 32, cpu); \
772 SETUP_LUMA_SP_FUNC_DEF(64, 64, cpu); \
773 SETUP_LUMA_SP_FUNC_DEF(64, 32, cpu); \
774 SETUP_LUMA_SP_FUNC_DEF(32, 64, cpu); \
775 SETUP_LUMA_SP_FUNC_DEF(64, 48, cpu); \
776 SETUP_LUMA_SP_FUNC_DEF(48, 64, cpu); \
777 SETUP_LUMA_SP_FUNC_DEF(64, 16, cpu); \
778 SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu);
780 #define LUMA_SS_FILTERS(cpu) \
781 SETUP_LUMA_SS_FUNC_DEF(4, 4, cpu); \
782 SETUP_LUMA_SS_FUNC_DEF(8, 8, cpu); \
783 SETUP_LUMA_SS_FUNC_DEF(8, 4, cpu); \
784 SETUP_LUMA_SS_FUNC_DEF(4, 8, cpu); \
785 SETUP_LUMA_SS_FUNC_DEF(16, 16, cpu); \
786 SETUP_LUMA_SS_FUNC_DEF(16, 8, cpu); \
787 SETUP_LUMA_SS_FUNC_DEF(8, 16, cpu); \
788 SETUP_LUMA_SS_FUNC_DEF(16, 12, cpu); \
789 SETUP_LUMA_SS_FUNC_DEF(12, 16, cpu); \
790 SETUP_LUMA_SS_FUNC_DEF(16, 4, cpu); \
791 SETUP_LUMA_SS_FUNC_DEF(4, 16, cpu); \
792 SETUP_LUMA_SS_FUNC_DEF(32, 32, cpu); \
793 SETUP_LUMA_SS_FUNC_DEF(32, 16, cpu); \
794 SETUP_LUMA_SS_FUNC_DEF(16, 32, cpu); \
795 SETUP_LUMA_SS_FUNC_DEF(32, 24, cpu); \
796 SETUP_LUMA_SS_FUNC_DEF(24, 32, cpu); \
797 SETUP_LUMA_SS_FUNC_DEF(32, 8, cpu); \
798 SETUP_LUMA_SS_FUNC_DEF(8, 32, cpu); \
799 SETUP_LUMA_SS_FUNC_DEF(64, 64, cpu); \
800 SETUP_LUMA_SS_FUNC_DEF(64, 32, cpu); \
801 SETUP_LUMA_SS_FUNC_DEF(32, 64, cpu); \
802 SETUP_LUMA_SS_FUNC_DEF(64, 48, cpu); \
803 SETUP_LUMA_SS_FUNC_DEF(48, 64, cpu); \
804 SETUP_LUMA_SS_FUNC_DEF(64, 16, cpu); \
805 SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
807 #define SETUP_PIXEL_VAR_DEF(W, H, cpu) \
808 p.var[BLOCK_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
810 #define LUMA_VAR(cpu) \
811 SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
812 SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
813 SETUP_PIXEL_VAR_DEF(32, 32, cpu); \
814 SETUP_PIXEL_VAR_DEF(64, 64, cpu);
816 #define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \
817 p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
819 #define LUMA_SSE_SP(cpu) \
820 SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \
821 SETUP_PIXEL_SSE_SP_DEF(8, 8, cpu); \
822 SETUP_PIXEL_SSE_SP_DEF(8, 4, cpu); \
823 SETUP_PIXEL_SSE_SP_DEF(4, 8, cpu); \
824 SETUP_PIXEL_SSE_SP_DEF(16, 16, cpu); \
825 SETUP_PIXEL_SSE_SP_DEF(16, 8, cpu); \
826 SETUP_PIXEL_SSE_SP_DEF(8, 16, cpu); \
827 SETUP_PIXEL_SSE_SP_DEF(16, 12, cpu); \
828 SETUP_PIXEL_SSE_SP_DEF(12, 16, cpu); \
829 SETUP_PIXEL_SSE_SP_DEF(16, 4, cpu); \
830 SETUP_PIXEL_SSE_SP_DEF(4, 16, cpu); \
831 SETUP_PIXEL_SSE_SP_DEF(32, 32, cpu); \
832 SETUP_PIXEL_SSE_SP_DEF(32, 16, cpu); \
833 SETUP_PIXEL_SSE_SP_DEF(16, 32, cpu); \
834 SETUP_PIXEL_SSE_SP_DEF(32, 24, cpu); \
835 SETUP_PIXEL_SSE_SP_DEF(24, 32, cpu); \
836 SETUP_PIXEL_SSE_SP_DEF(32, 8, cpu); \
837 SETUP_PIXEL_SSE_SP_DEF(8, 32, cpu); \
838 SETUP_PIXEL_SSE_SP_DEF(64, 64, cpu); \
839 SETUP_PIXEL_SSE_SP_DEF(64, 32, cpu); \
840 SETUP_PIXEL_SSE_SP_DEF(32, 64, cpu); \
841 SETUP_PIXEL_SSE_SP_DEF(64, 48, cpu); \
842 SETUP_PIXEL_SSE_SP_DEF(48, 64, cpu); \
843 SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
844 SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
846 #define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
847 p.luma_addAvg[LUMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
849 #define LUMA_ADDAVG(cpu) \
850 SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
851 SETUP_LUMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
852 SETUP_LUMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
853 SETUP_LUMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
854 SETUP_LUMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
855 SETUP_LUMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
856 SETUP_LUMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
857 SETUP_LUMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
858 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
859 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
860 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
861 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
862 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
863 SETUP_LUMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
864 SETUP_LUMA_ADDAVG_FUNC_DEF(16, 64, cpu); \
865 SETUP_LUMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
866 SETUP_LUMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
867 SETUP_LUMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
868 SETUP_LUMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
869 SETUP_LUMA_ADDAVG_FUNC_DEF(32, 64, cpu); \
870 SETUP_LUMA_ADDAVG_FUNC_DEF(48, 64, cpu); \
871 SETUP_LUMA_ADDAVG_FUNC_DEF(64, 16, cpu); \
872 SETUP_LUMA_ADDAVG_FUNC_DEF(64, 32, cpu); \
873 SETUP_LUMA_ADDAVG_FUNC_DEF(64, 48, cpu); \
874 SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
876 #define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
877 p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
879 #define CHROMA_ADDAVG(cpu) \
880 SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 4, cpu); \
881 SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 8, cpu); \
882 SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 2, cpu); \
883 SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
884 SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
885 SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
886 SETUP_CHROMA_ADDAVG_FUNC_DEF(6, 8, cpu); \
887 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 2, cpu); \
888 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
889 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 6, cpu); \
890 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
891 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
892 SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
893 SETUP_CHROMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
894 SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
895 SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
896 SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
897 SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
898 SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
899 SETUP_CHROMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
900 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
901 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
902 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
903 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu);
905 #define SETUP_CHROMA_ADDAVG_FUNC_DEF_422(W, H, cpu) \
906 p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
908 #define CHROMA_ADDAVG_422(cpu) \
909 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 8, cpu); \
910 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 16, cpu); \
911 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 4, cpu); \
912 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 8, cpu); \
913 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 16, cpu); \
914 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 32, cpu); \
915 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(6, 16, cpu); \
916 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 4, cpu); \
917 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 8, cpu); \
918 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 12, cpu); \
919 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 16, cpu); \
920 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 32, cpu); \
921 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 64, cpu); \
922 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(12, 32, cpu); \
923 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 8, cpu); \
924 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 16, cpu); \
925 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 24, cpu); \
926 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 32, cpu); \
927 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 64, cpu); \
928 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(24, 64, cpu); \
929 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 16, cpu); \
930 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 32, cpu); \
931 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 48, cpu); \
932 SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 64, cpu);
934 #define SETUP_INTRA_ANG_COMMON(mode, fno, cpu) \
935 p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
936 p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
937 p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
938 p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
940 #define SETUP_INTRA_ANG(mode, fno, cpu) \
941 p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
942 p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
943 p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
945 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
946 p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
948 #define SETUP_INTRA_ANG16_32(mode, fno, cpu) \
949 p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
950 p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
952 #define SETUP_INTRA_ANG4_8(mode, fno, cpu) \
953 p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
954 p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
956 #define INTRA_ANG_SSSE3(cpu) \
957 SETUP_INTRA_ANG_COMMON(2, 2, cpu); \
958 SETUP_INTRA_ANG_COMMON(34, 2, cpu);
960 #define INTRA_ANG_SSE4_COMMON(cpu) \
961 SETUP_INTRA_ANG_COMMON(3, 3, cpu); \
962 SETUP_INTRA_ANG_COMMON(4, 4, cpu); \
963 SETUP_INTRA_ANG_COMMON(5, 5, cpu); \
964 SETUP_INTRA_ANG_COMMON(6, 6, cpu); \
965 SETUP_INTRA_ANG_COMMON(7, 7, cpu); \
966 SETUP_INTRA_ANG_COMMON(8, 8, cpu); \
967 SETUP_INTRA_ANG_COMMON(9, 9, cpu); \
968 SETUP_INTRA_ANG_COMMON(10, 10, cpu); \
969 SETUP_INTRA_ANG_COMMON(11, 11, cpu); \
970 SETUP_INTRA_ANG_COMMON(12, 12, cpu); \
971 SETUP_INTRA_ANG_COMMON(13, 13, cpu); \
972 SETUP_INTRA_ANG_COMMON(14, 14, cpu); \
973 SETUP_INTRA_ANG_COMMON(15, 15, cpu); \
974 SETUP_INTRA_ANG_COMMON(16, 16, cpu); \
975 SETUP_INTRA_ANG_COMMON(17, 17, cpu); \
976 SETUP_INTRA_ANG_COMMON(18, 18, cpu);
978 #define INTRA_ANG_SSE4_HIGH(cpu) \
979 SETUP_INTRA_ANG(19, 19, cpu); \
980 SETUP_INTRA_ANG(20, 20, cpu); \
981 SETUP_INTRA_ANG(21, 21, cpu); \
982 SETUP_INTRA_ANG(22, 22, cpu); \
983 SETUP_INTRA_ANG(23, 23, cpu); \
984 SETUP_INTRA_ANG(24, 24, cpu); \
985 SETUP_INTRA_ANG(25, 25, cpu); \
986 SETUP_INTRA_ANG(26, 26, cpu); \
987 SETUP_INTRA_ANG(27, 27, cpu); \
988 SETUP_INTRA_ANG(28, 28, cpu); \
989 SETUP_INTRA_ANG(29, 29, cpu); \
990 SETUP_INTRA_ANG(30, 30, cpu); \
991 SETUP_INTRA_ANG(31, 31, cpu); \
992 SETUP_INTRA_ANG(32, 32, cpu); \
993 SETUP_INTRA_ANG(33, 33, cpu); \
994 SETUP_INTRA_ANG4(19, 17, cpu); \
995 SETUP_INTRA_ANG4(20, 16, cpu); \
996 SETUP_INTRA_ANG4(21, 15, cpu); \
997 SETUP_INTRA_ANG4(22, 14, cpu); \
998 SETUP_INTRA_ANG4(23, 13, cpu); \
999 SETUP_INTRA_ANG4(24, 12, cpu); \
1000 SETUP_INTRA_ANG4(25, 11, cpu); \
1001 SETUP_INTRA_ANG4(26, 26, cpu); \
1002 SETUP_INTRA_ANG4(27, 9, cpu); \
1003 SETUP_INTRA_ANG4(28, 8, cpu); \
1004 SETUP_INTRA_ANG4(29, 7, cpu); \
1005 SETUP_INTRA_ANG4(30, 6, cpu); \
1006 SETUP_INTRA_ANG4(31, 5, cpu); \
1007 SETUP_INTRA_ANG4(32, 4, cpu); \
1008 SETUP_INTRA_ANG4(33, 3, cpu);
1010 #define INTRA_ANG_SSE4(cpu) \
1011 SETUP_INTRA_ANG4_8(19, 17, cpu); \
1012 SETUP_INTRA_ANG4_8(20, 16, cpu); \
1013 SETUP_INTRA_ANG4_8(21, 15, cpu); \
1014 SETUP_INTRA_ANG4_8(22, 14, cpu); \
1015 SETUP_INTRA_ANG4_8(23, 13, cpu); \
1016 SETUP_INTRA_ANG4_8(24, 12, cpu); \
1017 SETUP_INTRA_ANG4_8(25, 11, cpu); \
1018 SETUP_INTRA_ANG4_8(26, 26, cpu); \
1019 SETUP_INTRA_ANG4_8(27, 9, cpu); \
1020 SETUP_INTRA_ANG4_8(28, 8, cpu); \
1021 SETUP_INTRA_ANG4_8(29, 7, cpu); \
1022 SETUP_INTRA_ANG4_8(30, 6, cpu); \
1023 SETUP_INTRA_ANG4_8(31, 5, cpu); \
1024 SETUP_INTRA_ANG4_8(32, 4, cpu); \
1025 SETUP_INTRA_ANG4_8(33, 3, cpu); \
1026 SETUP_INTRA_ANG16_32(19, 19, cpu); \
1027 SETUP_INTRA_ANG16_32(20, 20, cpu); \
1028 SETUP_INTRA_ANG16_32(21, 21, cpu); \
1029 SETUP_INTRA_ANG16_32(22, 22, cpu); \
1030 SETUP_INTRA_ANG16_32(23, 23, cpu); \
1031 SETUP_INTRA_ANG16_32(24, 24, cpu); \
1032 SETUP_INTRA_ANG16_32(25, 25, cpu); \
1033 SETUP_INTRA_ANG16_32(26, 26, cpu); \
1034 SETUP_INTRA_ANG16_32(27, 27, cpu); \
1035 SETUP_INTRA_ANG16_32(28, 28, cpu); \
1036 SETUP_INTRA_ANG16_32(29, 29, cpu); \
1037 SETUP_INTRA_ANG16_32(30, 30, cpu); \
1038 SETUP_INTRA_ANG16_32(31, 31, cpu); \
1039 SETUP_INTRA_ANG16_32(32, 32, cpu); \
1040 SETUP_INTRA_ANG16_32(33, 33, cpu);
1042 #define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \
1043 p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
1044 p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
1045 p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
1046 p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
1048 #define CHROMA_VERT_FILTERS(cpu) \
1049 SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \
1050 SETUP_CHROMA_VERT_FUNC_DEF(8, 8, cpu); \
1051 SETUP_CHROMA_VERT_FUNC_DEF(8, 4, cpu); \
1052 SETUP_CHROMA_VERT_FUNC_DEF(4, 8, cpu); \
1053 SETUP_CHROMA_VERT_FUNC_DEF(8, 6, cpu); \
1054 SETUP_CHROMA_VERT_FUNC_DEF(8, 2, cpu); \
1055 SETUP_CHROMA_VERT_FUNC_DEF(16, 16, cpu); \
1056 SETUP_CHROMA_VERT_FUNC_DEF(16, 8, cpu); \
1057 SETUP_CHROMA_VERT_FUNC_DEF(8, 16, cpu); \
1058 SETUP_CHROMA_VERT_FUNC_DEF(16, 12, cpu); \
1059 SETUP_CHROMA_VERT_FUNC_DEF(12, 16, cpu); \
1060 SETUP_CHROMA_VERT_FUNC_DEF(16, 4, cpu); \
1061 SETUP_CHROMA_VERT_FUNC_DEF(4, 16, cpu); \
1062 SETUP_CHROMA_VERT_FUNC_DEF(32, 32, cpu); \
1063 SETUP_CHROMA_VERT_FUNC_DEF(32, 16, cpu); \
1064 SETUP_CHROMA_VERT_FUNC_DEF(16, 32, cpu); \
1065 SETUP_CHROMA_VERT_FUNC_DEF(32, 24, cpu); \
1066 SETUP_CHROMA_VERT_FUNC_DEF(24, 32, cpu); \
1067 SETUP_CHROMA_VERT_FUNC_DEF(32, 8, cpu); \
1068 SETUP_CHROMA_VERT_FUNC_DEF(8, 32, cpu);
1070 #define CHROMA_VERT_FILTERS_SSE4(cpu) \
1071 SETUP_CHROMA_VERT_FUNC_DEF(2, 4, cpu); \
1072 SETUP_CHROMA_VERT_FUNC_DEF(2, 8, cpu); \
1073 SETUP_CHROMA_VERT_FUNC_DEF(4, 2, cpu); \
1074 SETUP_CHROMA_VERT_FUNC_DEF(6, 8, cpu);
1076 #define SETUP_CHROMA_VERT_FUNC_DEF_422(W, H, cpu) \
1077 p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
1078 p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
1079 p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
1080 p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
1082 #define CHROMA_VERT_FILTERS_422(cpu) \
1083 SETUP_CHROMA_VERT_FUNC_DEF_422(4, 8, cpu); \
1084 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 16, cpu); \
1085 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 8, cpu); \
1086 SETUP_CHROMA_VERT_FUNC_DEF_422(4, 16, cpu); \
1087 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 12, cpu); \
1088 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 4, cpu); \
1089 SETUP_CHROMA_VERT_FUNC_DEF_422(16, 32, cpu); \
1090 SETUP_CHROMA_VERT_FUNC_DEF_422(16, 16, cpu); \
1091 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 32, cpu); \
1092 SETUP_CHROMA_VERT_FUNC_DEF_422(16, 24, cpu); \
1093 SETUP_CHROMA_VERT_FUNC_DEF_422(12, 32, cpu); \
1094 SETUP_CHROMA_VERT_FUNC_DEF_422(16, 8, cpu); \
1095 SETUP_CHROMA_VERT_FUNC_DEF_422(4, 32, cpu); \
1096 SETUP_CHROMA_VERT_FUNC_DEF_422(32, 64, cpu); \
1097 SETUP_CHROMA_VERT_FUNC_DEF_422(32, 32, cpu); \
1098 SETUP_CHROMA_VERT_FUNC_DEF_422(16, 64, cpu); \
1099 SETUP_CHROMA_VERT_FUNC_DEF_422(32, 48, cpu); \
1100 SETUP_CHROMA_VERT_FUNC_DEF_422(24, 64, cpu); \
1101 SETUP_CHROMA_VERT_FUNC_DEF_422(32, 16, cpu); \
1102 SETUP_CHROMA_VERT_FUNC_DEF_422(8, 64, cpu);
1104 #define CHROMA_VERT_FILTERS_SSE4_422(cpu) \
1105 SETUP_CHROMA_VERT_FUNC_DEF_422(2, 8, cpu); \
1106 SETUP_CHROMA_VERT_FUNC_DEF_422(2, 16, cpu); \
1107 SETUP_CHROMA_VERT_FUNC_DEF_422(4, 4, cpu); \
1108 SETUP_CHROMA_VERT_FUNC_DEF_422(6, 16, cpu);
1110 #define SETUP_CHROMA_VERT_FUNC_DEF_444(W, H, cpu) \
1111 p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
1112 p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
1113 p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
1114 p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
1116 #define CHROMA_VERT_FILTERS_444(cpu) \
1117 SETUP_CHROMA_VERT_FUNC_DEF_444(8, 8, cpu); \
1118 SETUP_CHROMA_VERT_FUNC_DEF_444(8, 4, cpu); \
1119 SETUP_CHROMA_VERT_FUNC_DEF_444(4, 8, cpu); \
1120 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 16, cpu); \
1121 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 8, cpu); \
1122 SETUP_CHROMA_VERT_FUNC_DEF_444(8, 16, cpu); \
1123 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 12, cpu); \
1124 SETUP_CHROMA_VERT_FUNC_DEF_444(12, 16, cpu); \
1125 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 4, cpu); \
1126 SETUP_CHROMA_VERT_FUNC_DEF_444(4, 16, cpu); \
1127 SETUP_CHROMA_VERT_FUNC_DEF_444(32, 32, cpu); \
1128 SETUP_CHROMA_VERT_FUNC_DEF_444(32, 16, cpu); \
1129 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 32, cpu); \
1130 SETUP_CHROMA_VERT_FUNC_DEF_444(32, 24, cpu); \
1131 SETUP_CHROMA_VERT_FUNC_DEF_444(24, 32, cpu); \
1132 SETUP_CHROMA_VERT_FUNC_DEF_444(32, 8, cpu); \
1133 SETUP_CHROMA_VERT_FUNC_DEF_444(8, 32, cpu); \
1134 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 64, cpu); \
1135 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 32, cpu); \
1136 SETUP_CHROMA_VERT_FUNC_DEF_444(32, 64, cpu); \
1137 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 48, cpu); \
1138 SETUP_CHROMA_VERT_FUNC_DEF_444(48, 64, cpu); \
1139 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 16, cpu); \
1140 SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu);
1142 #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
1143 p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
1144 p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
1146 #define CHROMA_HORIZ_FILTERS(cpu) \
1147 SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \
1148 SETUP_CHROMA_HORIZ_FUNC_DEF(4, 2, cpu); \
1149 SETUP_CHROMA_HORIZ_FUNC_DEF(2, 4, cpu); \
1150 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 8, cpu); \
1151 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 4, cpu); \
1152 SETUP_CHROMA_HORIZ_FUNC_DEF(4, 8, cpu); \
1153 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 6, cpu); \
1154 SETUP_CHROMA_HORIZ_FUNC_DEF(6, 8, cpu); \
1155 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 2, cpu); \
1156 SETUP_CHROMA_HORIZ_FUNC_DEF(2, 8, cpu); \
1157 SETUP_CHROMA_HORIZ_FUNC_DEF(16, 16, cpu); \
1158 SETUP_CHROMA_HORIZ_FUNC_DEF(16, 8, cpu); \
1159 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 16, cpu); \
1160 SETUP_CHROMA_HORIZ_FUNC_DEF(16, 12, cpu); \
1161 SETUP_CHROMA_HORIZ_FUNC_DEF(12, 16, cpu); \
1162 SETUP_CHROMA_HORIZ_FUNC_DEF(16, 4, cpu); \
1163 SETUP_CHROMA_HORIZ_FUNC_DEF(4, 16, cpu); \
1164 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 32, cpu); \
1165 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 16, cpu); \
1166 SETUP_CHROMA_HORIZ_FUNC_DEF(16, 32, cpu); \
1167 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 24, cpu); \
1168 SETUP_CHROMA_HORIZ_FUNC_DEF(24, 32, cpu); \
1169 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 8, cpu); \
1170 SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu);
1172 #define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \
1173 p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
1174 p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
1176 #define CHROMA_HORIZ_FILTERS_422(cpu) \
1177 SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 8, cpu); \
1178 SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 4, cpu); \
1179 SETUP_CHROMA_HORIZ_FUNC_DEF_422(2, 8, cpu); \
1180 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 16, cpu); \
1181 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 8, cpu); \
1182 SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 16, cpu); \
1183 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 12, cpu); \
1184 SETUP_CHROMA_HORIZ_FUNC_DEF_422(6, 16, cpu); \
1185 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 4, cpu); \
1186 SETUP_CHROMA_HORIZ_FUNC_DEF_422(2, 16, cpu); \
1187 SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 32, cpu); \
1188 SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 16, cpu); \
1189 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 32, cpu); \
1190 SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 24, cpu); \
1191 SETUP_CHROMA_HORIZ_FUNC_DEF_422(12, 32, cpu); \
1192 SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 8, cpu); \
1193 SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 32, cpu); \
1194 SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 64, cpu); \
1195 SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 32, cpu); \
1196 SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 64, cpu); \
1197 SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 48, cpu); \
1198 SETUP_CHROMA_HORIZ_FUNC_DEF_422(24, 64, cpu); \
1199 SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 16, cpu); \
1200 SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 64, cpu);
1202 #define SETUP_CHROMA_HORIZ_FUNC_DEF_444(W, H, cpu) \
1203 p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
1204 p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
1206 #define CHROMA_HORIZ_FILTERS_444(cpu) \
1207 SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 8, cpu); \
1208 SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 4, cpu); \
1209 SETUP_CHROMA_HORIZ_FUNC_DEF_444(4, 8, cpu); \
1210 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 16, cpu); \
1211 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 8, cpu); \
1212 SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 16, cpu); \
1213 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 12, cpu); \
1214 SETUP_CHROMA_HORIZ_FUNC_DEF_444(12, 16, cpu); \
1215 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 4, cpu); \
1216 SETUP_CHROMA_HORIZ_FUNC_DEF_444(4, 16, cpu); \
1217 SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 32, cpu); \
1218 SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 16, cpu); \
1219 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 32, cpu); \
1220 SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 24, cpu); \
1221 SETUP_CHROMA_HORIZ_FUNC_DEF_444(24, 32, cpu); \
1222 SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 8, cpu); \
1223 SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 32, cpu); \
1224 SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 64, cpu); \
1225 SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 32, cpu); \
1226 SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 64, cpu); \
1227 SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 48, cpu); \
1228 SETUP_CHROMA_HORIZ_FUNC_DEF_444(48, 64, cpu); \
1229 SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 16, cpu); \
1230 SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 64, cpu);
1233 // private x265 namespace
1236 /* Very similar to CRef in intrapred.cpp, except it uses optimized primitives */
1237 template<int log2Size
>
1238 void intra_allangs(pixel
*dest
, pixel
*above0
, pixel
*left0
, pixel
*above1
, pixel
*left1
, int bLuma
)
1240 const int size
= 1 << log2Size
;
1241 const int sizeIdx
= log2Size
- 2;
1242 ALIGN_VAR_32(pixel
, buffer
[32 * 32]);
1244 for (int mode
= 2; mode
<= 34; mode
++)
1246 pixel
*left
= (g_intraFilterFlags
[mode
] & size
? left1
: left0
);
1247 pixel
*above
= (g_intraFilterFlags
[mode
] & size
? above1
: above0
);
1248 pixel
*out
= dest
+ ((mode
- 2) << (log2Size
* 2));
1252 primitives
.intra_pred
[mode
][sizeIdx
](buffer
, size
, left
, above
, mode
, bLuma
);
1253 primitives
.transpose
[sizeIdx
](out
, buffer
, size
);
1256 primitives
.intra_pred
[mode
][sizeIdx
](out
, size
, left
, above
, mode
, bLuma
);
1261 void Setup_Assembly_Primitives(EncoderPrimitives
&p
, int cpuMask
)
1264 if (cpuMask
& X265_CPU_SSE2
)
1272 p
.satd
[LUMA_4x4
] = x265_pixel_satd_4x4_mmx2
;
1274 p
.sa8d_inter
[LUMA_4x4
] = x265_pixel_satd_4x4_mmx2
;
1275 SA8D_INTER_FROM_BLOCK(sse2
);
1276 p
.sa8d_inter
[LUMA_8x8
] = x265_pixel_sa8d_8x8_sse2
;
1277 p
.sa8d_inter
[LUMA_16x16
] = x265_pixel_sa8d_16x16_sse2
;
1279 p
.sse_ss
[LUMA_4x4
] = x265_pixel_ssd_ss_4x4_mmx2
;
1280 p
.sse_ss
[LUMA_4x8
] = x265_pixel_ssd_ss_4x8_mmx2
;
1281 p
.sse_ss
[LUMA_4x16
] = x265_pixel_ssd_ss_4x16_mmx2
;
1282 p
.sse_ss
[LUMA_8x4
] = x265_pixel_ssd_ss_8x4_sse2
;
1283 p
.sse_ss
[LUMA_8x8
] = x265_pixel_ssd_ss_8x8_sse2
;
1284 p
.sse_ss
[LUMA_8x16
] = x265_pixel_ssd_ss_8x16_sse2
;
1285 p
.sse_ss
[LUMA_8x32
] = x265_pixel_ssd_ss_8x32_sse2
;
1286 p
.sse_ss
[LUMA_12x16
] = x265_pixel_ssd_ss_12x16_sse2
;
1287 p
.sse_ss
[LUMA_16x4
] = x265_pixel_ssd_ss_16x4_sse2
;
1288 p
.sse_ss
[LUMA_16x8
] = x265_pixel_ssd_ss_16x8_sse2
;
1289 p
.sse_ss
[LUMA_16x12
] = x265_pixel_ssd_ss_16x12_sse2
;
1290 p
.sse_ss
[LUMA_16x16
] = x265_pixel_ssd_ss_16x16_sse2
;
1291 p
.sse_ss
[LUMA_16x32
] = x265_pixel_ssd_ss_16x32_sse2
;
1292 p
.sse_ss
[LUMA_16x64
] = x265_pixel_ssd_ss_16x64_sse2
;
1293 p
.sse_ss
[LUMA_24x32
] = x265_pixel_ssd_ss_24x32_sse2
;
1294 p
.sse_ss
[LUMA_32x8
] = x265_pixel_ssd_ss_32x8_sse2
;
1295 p
.sse_ss
[LUMA_32x16
] = x265_pixel_ssd_ss_32x16_sse2
;
1296 p
.sse_ss
[LUMA_32x24
] = x265_pixel_ssd_ss_32x24_sse2
;
1297 p
.sse_ss
[LUMA_32x32
] = x265_pixel_ssd_ss_32x32_sse2
;
1298 p
.sse_ss
[LUMA_32x64
] = x265_pixel_ssd_ss_32x64_sse2
;
1299 p
.sse_ss
[LUMA_48x64
] = x265_pixel_ssd_ss_48x64_sse2
;
1300 p
.sse_ss
[LUMA_64x16
] = x265_pixel_ssd_ss_64x16_sse2
;
1301 p
.sse_ss
[LUMA_64x32
] = x265_pixel_ssd_ss_64x32_sse2
;
1302 p
.sse_ss
[LUMA_64x48
] = x265_pixel_ssd_ss_64x48_sse2
;
1303 p
.sse_ss
[LUMA_64x64
] = x265_pixel_ssd_ss_64x64_sse2
;
1305 p
.transpose
[BLOCK_4x4
] = x265_transpose4_sse2
;
1306 p
.transpose
[BLOCK_8x8
] = x265_transpose8_sse2
;
1307 p
.transpose
[BLOCK_16x16
] = x265_transpose16_sse2
;
1308 p
.transpose
[BLOCK_32x32
] = x265_transpose32_sse2
;
1309 p
.transpose
[BLOCK_64x64
] = x265_transpose64_sse2
;
1311 p
.ssim_4x4x2_core
= x265_pixel_ssim_4x4x2_core_sse2
;
1312 p
.ssim_end_4
= x265_pixel_ssim_end4_sse2
;
1318 p
.sad_x3
[LUMA_4x4
] = x265_pixel_sad_x3_4x4_mmx2
;
1319 p
.sad_x3
[LUMA_4x8
] = x265_pixel_sad_x3_4x8_mmx2
;
1320 p
.sad_x3
[LUMA_4x16
] = x265_pixel_sad_x3_4x16_mmx2
;
1321 p
.sad_x3
[LUMA_8x4
] = x265_pixel_sad_x3_8x4_sse2
;
1322 p
.sad_x3
[LUMA_8x8
] = x265_pixel_sad_x3_8x8_sse2
;
1323 p
.sad_x3
[LUMA_8x16
] = x265_pixel_sad_x3_8x16_sse2
;
1324 p
.sad_x3
[LUMA_8x32
] = x265_pixel_sad_x3_8x32_sse2
;
1325 p
.sad_x3
[LUMA_16x4
] = x265_pixel_sad_x3_16x4_sse2
;
1326 p
.sad_x3
[LUMA_12x16
] = x265_pixel_sad_x3_12x16_mmx2
;
1329 p
.sad_x4
[LUMA_4x4
] = x265_pixel_sad_x4_4x4_mmx2
;
1330 p
.sad_x4
[LUMA_4x8
] = x265_pixel_sad_x4_4x8_mmx2
;
1331 p
.sad_x4
[LUMA_4x16
] = x265_pixel_sad_x4_4x16_mmx2
;
1332 p
.sad_x4
[LUMA_8x4
] = x265_pixel_sad_x4_8x4_sse2
;
1333 p
.sad_x4
[LUMA_8x8
] = x265_pixel_sad_x4_8x8_sse2
;
1334 p
.sad_x4
[LUMA_8x16
] = x265_pixel_sad_x4_8x16_sse2
;
1335 p
.sad_x4
[LUMA_8x32
] = x265_pixel_sad_x4_8x32_sse2
;
1336 p
.sad_x4
[LUMA_16x4
] = x265_pixel_sad_x4_16x4_sse2
;
1337 p
.sad_x4
[LUMA_12x16
] = x265_pixel_sad_x4_12x16_mmx2
;
1339 p
.cpy2Dto1D_shl
[BLOCK_4x4
] = x265_cpy2Dto1D_shl_4_sse2
;
1340 p
.cpy2Dto1D_shl
[BLOCK_8x8
] = x265_cpy2Dto1D_shl_8_sse2
;
1341 p
.cpy2Dto1D_shl
[BLOCK_16x16
] = x265_cpy2Dto1D_shl_16_sse2
;
1342 p
.cpy2Dto1D_shl
[BLOCK_32x32
] = x265_cpy2Dto1D_shl_32_sse2
;
1343 p
.cpy2Dto1D_shr
[BLOCK_4x4
] = x265_cpy2Dto1D_shr_4_sse2
;
1344 p
.cpy2Dto1D_shr
[BLOCK_8x8
] = x265_cpy2Dto1D_shr_8_sse2
;
1345 p
.cpy2Dto1D_shr
[BLOCK_16x16
] = x265_cpy2Dto1D_shr_16_sse2
;
1346 p
.cpy2Dto1D_shr
[BLOCK_32x32
] = x265_cpy2Dto1D_shr_32_sse2
;
1347 p
.cpy1Dto2D_shl
[BLOCK_4x4
] = x265_cpy1Dto2D_shl_4_sse2
;
1348 p
.cpy1Dto2D_shl
[BLOCK_8x8
] = x265_cpy1Dto2D_shl_8_sse2
;
1349 p
.cpy1Dto2D_shl
[BLOCK_16x16
] = x265_cpy1Dto2D_shl_16_sse2
;
1350 p
.cpy1Dto2D_shl
[BLOCK_32x32
] = x265_cpy1Dto2D_shl_32_sse2
;
1351 p
.cpy1Dto2D_shr
[BLOCK_4x4
] = x265_cpy1Dto2D_shr_4_sse2
;
1352 p
.cpy1Dto2D_shr
[BLOCK_8x8
] = x265_cpy1Dto2D_shr_8_sse2
;
1353 p
.cpy1Dto2D_shr
[BLOCK_16x16
] = x265_cpy1Dto2D_shr_16_sse2
;
1354 p
.cpy1Dto2D_shr
[BLOCK_32x32
] = x265_cpy1Dto2D_shr_32_sse2
;
1356 CHROMA_PIXELSUB_PS(_sse2
);
1357 CHROMA_PIXELSUB_PS_422(_sse2
);
1358 LUMA_PIXELSUB(_sse2
);
1360 CHROMA_BLOCKCOPY(ss
, _sse2
);
1361 CHROMA_BLOCKCOPY_422(ss
, _sse2
);
1362 LUMA_BLOCKCOPY(ss
, _sse2
);
1364 CHROMA_VERT_FILTERS(_sse2
);
1365 CHROMA_VERT_FILTERS_422(_sse2
);
1366 CHROMA_VERT_FILTERS_444(_sse2
);
1367 p
.luma_p2s
= x265_luma_p2s_sse2
;
1368 p
.chroma
[X265_CSP_I420
].p2s
= x265_chroma_p2s_sse2
;
1369 p
.chroma
[X265_CSP_I422
].p2s
= x265_chroma_p2s_sse2
;
1370 p
.chroma
[X265_CSP_I444
].p2s
= x265_luma_p2s_sse2
; // for i444 , chroma_p2s can be replaced by luma_p2s
1372 p
.blockfill_s
[BLOCK_4x4
] = x265_blockfill_s_4x4_sse2
;
1373 p
.blockfill_s
[BLOCK_8x8
] = x265_blockfill_s_8x8_sse2
;
1374 p
.blockfill_s
[BLOCK_16x16
] = x265_blockfill_s_16x16_sse2
;
1375 p
.blockfill_s
[BLOCK_32x32
] = x265_blockfill_s_32x32_sse2
;
1377 // TODO: overflow on 12-bits mode!
1378 p
.ssd_s
[BLOCK_4x4
] = x265_pixel_ssd_s_4_sse2
;
1379 p
.ssd_s
[BLOCK_8x8
] = x265_pixel_ssd_s_8_sse2
;
1380 p
.ssd_s
[BLOCK_16x16
] = x265_pixel_ssd_s_16_sse2
;
1381 p
.ssd_s
[BLOCK_32x32
] = x265_pixel_ssd_s_32_sse2
;
1383 p
.calcresidual
[BLOCK_4x4
] = x265_getResidual4_sse2
;
1384 p
.calcresidual
[BLOCK_8x8
] = x265_getResidual8_sse2
;
1385 p
.calcresidual
[BLOCK_16x16
] = x265_getResidual16_sse2
;
1386 p
.calcresidual
[BLOCK_32x32
] = x265_getResidual32_sse2
;
1388 p
.dct
[DCT_4x4
] = x265_dct4_sse2
;
1389 p
.idct
[IDCT_4x4
] = x265_idct4_sse2
;
1391 p
.idct
[IDCT_8x8
] = x265_idct8_sse2
;
1393 p
.idct
[IDST_4x4
] = x265_idst4_sse2
;
1395 LUMA_SS_FILTERS(_sse2
);
1397 if (cpuMask
& X265_CPU_SSSE3
)
1399 p
.scale1D_128to64
= x265_scale1D_128to64_ssse3
;
1400 p
.scale2D_64to32
= x265_scale2D_64to32_ssse3
;
1402 INTRA_ANG_SSSE3(ssse3
);
1404 p
.dct
[DST_4x4
] = x265_dst4_ssse3
;
1405 p
.idct
[IDCT_8x8
] = x265_idct8_ssse3
;
1406 p
.count_nonzero
= x265_count_nonzero_ssse3
;
1408 if (cpuMask
& X265_CPU_SSE4
)
1411 CHROMA_ADDAVG(_sse4
);
1412 CHROMA_ADDAVG_422(_sse4
);
1413 LUMA_FILTERS(_sse4
);
1414 CHROMA_HORIZ_FILTERS(_sse4
);
1415 CHROMA_VERT_FILTERS_SSE4(_sse4
);
1416 CHROMA_HORIZ_FILTERS_422(_sse4
);
1417 CHROMA_VERT_FILTERS_SSE4_422(_sse4
);
1418 CHROMA_HORIZ_FILTERS_444(_sse4
);
1420 p
.dct
[DCT_8x8
] = x265_dct8_sse4
;
1421 p
.quant
= x265_quant_sse4
;
1422 p
.nquant
= x265_nquant_sse4
;
1423 p
.dequant_normal
= x265_dequant_normal_sse4
;
1424 p
.intra_pred
[0][BLOCK_4x4
] = x265_intra_pred_planar4_sse4
;
1425 p
.intra_pred
[0][BLOCK_8x8
] = x265_intra_pred_planar8_sse4
;
1426 p
.intra_pred
[0][BLOCK_16x16
] = x265_intra_pred_planar16_sse4
;
1427 p
.intra_pred
[0][BLOCK_32x32
] = x265_intra_pred_planar32_sse4
;
1429 p
.intra_pred
[1][BLOCK_4x4
] = x265_intra_pred_dc4_sse4
;
1430 p
.intra_pred
[1][BLOCK_8x8
] = x265_intra_pred_dc8_sse4
;
1431 p
.intra_pred
[1][BLOCK_16x16
] = x265_intra_pred_dc16_sse4
;
1432 p
.intra_pred
[1][BLOCK_32x32
] = x265_intra_pred_dc32_sse4
;
1433 p
.planecopy_cp
= x265_upShift_8_sse4
;
1435 INTRA_ANG_SSE4_COMMON(sse4
);
1436 INTRA_ANG_SSE4_HIGH(sse4
);
1438 if (cpuMask
& X265_CPU_XOP
)
1440 p
.frameInitLowres
= x265_frame_init_lowres_core_xop
;
1441 SA8D_INTER_FROM_BLOCK(xop
);
1445 if (cpuMask
& X265_CPU_AVX2
)
1447 p
.dct
[DCT_4x4
] = x265_dct4_avx2
;
1448 p
.quant
= x265_quant_avx2
;
1449 p
.nquant
= x265_nquant_avx2
;
1450 p
.dequant_normal
= x265_dequant_normal_avx2
;
1451 p
.scale1D_128to64
= x265_scale1D_128to64_avx2
;
1452 p
.cpy1Dto2D_shl
[BLOCK_4x4
] = x265_cpy1Dto2D_shl_4_avx2
;
1453 p
.cpy1Dto2D_shl
[BLOCK_8x8
] = x265_cpy1Dto2D_shl_8_avx2
;
1454 p
.cpy1Dto2D_shl
[BLOCK_16x16
] = x265_cpy1Dto2D_shl_16_avx2
;
1455 p
.cpy1Dto2D_shl
[BLOCK_32x32
] = x265_cpy1Dto2D_shl_32_avx2
;
1456 p
.cpy1Dto2D_shr
[BLOCK_4x4
] = x265_cpy1Dto2D_shr_4_avx2
;
1457 p
.cpy1Dto2D_shr
[BLOCK_8x8
] = x265_cpy1Dto2D_shr_8_avx2
;
1458 p
.cpy1Dto2D_shr
[BLOCK_16x16
] = x265_cpy1Dto2D_shr_16_avx2
;
1459 p
.cpy1Dto2D_shr
[BLOCK_32x32
] = x265_cpy1Dto2D_shr_32_avx2
;
1461 p
.dct
[DCT_8x8
] = x265_dct8_avx2
;
1462 p
.dct
[DCT_16x16
] = x265_dct16_avx2
;
1463 p
.dct
[DCT_32x32
] = x265_dct32_avx2
;
1464 p
.idct
[IDCT_4x4
] = x265_idct4_avx2
;
1465 p
.idct
[IDCT_8x8
] = x265_idct8_avx2
;
1466 p
.idct
[IDCT_16x16
] = x265_idct16_avx2
;
1467 p
.idct
[IDCT_32x32
] = x265_idct32_avx2
;
1468 p
.transpose
[BLOCK_8x8
] = x265_transpose8_avx2
;
1469 p
.transpose
[BLOCK_16x16
] = x265_transpose16_avx2
;
1470 p
.transpose
[BLOCK_32x32
] = x265_transpose32_avx2
;
1471 p
.transpose
[BLOCK_64x64
] = x265_transpose64_avx2
;
1474 /* at HIGH_BIT_DEPTH, pixel == short so we can reuse a number of primitives */
1475 for (int i
= 0; i
< NUM_LUMA_PARTITIONS
; i
++)
1477 p
.sse_pp
[i
] = (pixelcmp_t
)p
.sse_ss
[i
];
1478 p
.sse_sp
[i
] = (pixelcmp_sp_t
)p
.sse_ss
[i
];
1481 for (int i
= 0; i
< NUM_LUMA_PARTITIONS
; i
++)
1483 p
.luma_copy_ps
[i
] = (copy_ps_t
)p
.luma_copy_ss
[i
];
1484 p
.luma_copy_sp
[i
] = (copy_sp_t
)p
.luma_copy_ss
[i
];
1485 p
.luma_copy_pp
[i
] = (copy_pp_t
)p
.luma_copy_ss
[i
];
1488 for (int i
= 0; i
< NUM_CHROMA_PARTITIONS
; i
++)
1490 p
.chroma
[X265_CSP_I420
].copy_ps
[i
] = (copy_ps_t
)p
.chroma
[X265_CSP_I420
].copy_ss
[i
];
1491 p
.chroma
[X265_CSP_I420
].copy_sp
[i
] = (copy_sp_t
)p
.chroma
[X265_CSP_I420
].copy_ss
[i
];
1492 p
.chroma
[X265_CSP_I420
].copy_pp
[i
] = (copy_pp_t
)p
.chroma
[X265_CSP_I420
].copy_ss
[i
];
1495 for (int i
= 0; i
< NUM_CHROMA_PARTITIONS
; i
++)
1497 p
.chroma
[X265_CSP_I422
].copy_ps
[i
] = (copy_ps_t
)p
.chroma
[X265_CSP_I422
].copy_ss
[i
];
1498 p
.chroma
[X265_CSP_I422
].copy_sp
[i
] = (copy_sp_t
)p
.chroma
[X265_CSP_I422
].copy_ss
[i
];
1499 p
.chroma
[X265_CSP_I422
].copy_pp
[i
] = (copy_pp_t
)p
.chroma
[X265_CSP_I422
].copy_ss
[i
];
1502 if (p
.intra_pred
[0][0] && p
.transpose
[0])
1504 p
.intra_pred_allangs
[BLOCK_4x4
] = intra_allangs
<2>;
1505 p
.intra_pred_allangs
[BLOCK_8x8
] = intra_allangs
<3>;
1506 p
.intra_pred_allangs
[BLOCK_16x16
] = intra_allangs
<4>;
1507 p
.intra_pred_allangs
[BLOCK_32x32
] = intra_allangs
<5>;
1510 #else // if HIGH_BIT_DEPTH
1511 if (cpuMask
& X265_CPU_SSE2
)
1513 INIT8_NAME(sse_pp
, ssd
, _mmx
);
1515 INIT8(sad_x3
, _mmx2
);
1516 INIT8(sad_x4
, _mmx2
);
1517 p
.satd
[LUMA_4x4
] = x265_pixel_satd_4x4_mmx2
;
1518 p
.sa8d_inter
[LUMA_4x4
] = x265_pixel_satd_4x4_mmx2
;
1519 p
.frameInitLowres
= x265_frame_init_lowres_core_mmx2
;
1530 INIT2(sad_x3
, _sse2
);
1531 INIT2(sad_x4
, _sse2
);
1534 CHROMA_BLOCKCOPY(ss
, _sse2
);
1535 CHROMA_BLOCKCOPY(pp
, _sse2
);
1536 CHROMA_BLOCKCOPY_422(ss
, _sse2
);
1537 CHROMA_BLOCKCOPY_422(pp
, _sse2
);
1538 LUMA_BLOCKCOPY(ss
, _sse2
);
1539 LUMA_BLOCKCOPY(pp
, _sse2
);
1540 LUMA_BLOCKCOPY(sp
, _sse2
);
1541 CHROMA_BLOCKCOPY_SP(_sse2
);
1542 CHROMA_BLOCKCOPY_SP_422(_sse2
);
1544 CHROMA_SS_FILTERS_420(_sse2
);
1545 CHROMA_SS_FILTERS_422(_sse2
);
1546 CHROMA_SS_FILTERS_444(_sse2
);
1547 CHROMA_SP_FILTERS_420(_sse2
);
1548 CHROMA_SP_FILTERS_422(_sse2
);
1549 CHROMA_SP_FILTERS_444(_sse2
);
1550 LUMA_SS_FILTERS(_sse2
);
1552 // This function pointer initialization is temporary will be removed
1553 // later with macro definitions. It is used to avoid linker errors
1554 // until all partitions are coded and commit smaller patches, easier to
1557 p
.blockfill_s
[BLOCK_4x4
] = x265_blockfill_s_4x4_sse2
;
1558 p
.blockfill_s
[BLOCK_8x8
] = x265_blockfill_s_8x8_sse2
;
1559 p
.blockfill_s
[BLOCK_16x16
] = x265_blockfill_s_16x16_sse2
;
1560 p
.blockfill_s
[BLOCK_32x32
] = x265_blockfill_s_32x32_sse2
;
1562 p
.ssd_s
[BLOCK_4x4
] = x265_pixel_ssd_s_4_sse2
;
1563 p
.ssd_s
[BLOCK_8x8
] = x265_pixel_ssd_s_8_sse2
;
1564 p
.ssd_s
[BLOCK_16x16
] = x265_pixel_ssd_s_16_sse2
;
1565 p
.ssd_s
[BLOCK_32x32
] = x265_pixel_ssd_s_32_sse2
;
1567 p
.frameInitLowres
= x265_frame_init_lowres_core_sse2
;
1568 SA8D_INTER_FROM_BLOCK(sse2
);
1570 p
.cpy2Dto1D_shl
[BLOCK_4x4
] = x265_cpy2Dto1D_shl_4_sse2
;
1571 p
.cpy2Dto1D_shl
[BLOCK_8x8
] = x265_cpy2Dto1D_shl_8_sse2
;
1572 p
.cpy2Dto1D_shl
[BLOCK_16x16
] = x265_cpy2Dto1D_shl_16_sse2
;
1573 p
.cpy2Dto1D_shl
[BLOCK_32x32
] = x265_cpy2Dto1D_shl_32_sse2
;
1574 p
.cpy2Dto1D_shr
[BLOCK_4x4
] = x265_cpy2Dto1D_shr_4_sse2
;
1575 p
.cpy2Dto1D_shr
[BLOCK_8x8
] = x265_cpy2Dto1D_shr_8_sse2
;
1576 p
.cpy2Dto1D_shr
[BLOCK_16x16
] = x265_cpy2Dto1D_shr_16_sse2
;
1577 p
.cpy2Dto1D_shr
[BLOCK_32x32
] = x265_cpy2Dto1D_shr_32_sse2
;
1578 p
.cpy1Dto2D_shl
[BLOCK_4x4
] = x265_cpy1Dto2D_shl_4_sse2
;
1579 p
.cpy1Dto2D_shl
[BLOCK_8x8
] = x265_cpy1Dto2D_shl_8_sse2
;
1580 p
.cpy1Dto2D_shl
[BLOCK_16x16
] = x265_cpy1Dto2D_shl_16_sse2
;
1581 p
.cpy1Dto2D_shl
[BLOCK_32x32
] = x265_cpy1Dto2D_shl_32_sse2
;
1582 p
.cpy1Dto2D_shr
[BLOCK_4x4
] = x265_cpy1Dto2D_shr_4_sse2
;
1583 p
.cpy1Dto2D_shr
[BLOCK_8x8
] = x265_cpy1Dto2D_shr_8_sse2
;
1584 p
.cpy1Dto2D_shr
[BLOCK_16x16
] = x265_cpy1Dto2D_shr_16_sse2
;
1585 p
.cpy1Dto2D_shr
[BLOCK_32x32
] = x265_cpy1Dto2D_shr_32_sse2
;
1587 p
.calcresidual
[BLOCK_4x4
] = x265_getResidual4_sse2
;
1588 p
.calcresidual
[BLOCK_8x8
] = x265_getResidual8_sse2
;
1589 p
.transpose
[BLOCK_4x4
] = x265_transpose4_sse2
;
1590 p
.transpose
[BLOCK_8x8
] = x265_transpose8_sse2
;
1591 p
.transpose
[BLOCK_16x16
] = x265_transpose16_sse2
;
1592 p
.transpose
[BLOCK_32x32
] = x265_transpose32_sse2
;
1593 p
.transpose
[BLOCK_64x64
] = x265_transpose64_sse2
;
1594 p
.ssim_4x4x2_core
= x265_pixel_ssim_4x4x2_core_sse2
;
1595 p
.ssim_end_4
= x265_pixel_ssim_end4_sse2
;
1597 p
.dct
[DCT_4x4
] = x265_dct4_sse2
;
1598 p
.idct
[IDCT_4x4
] = x265_idct4_sse2
;
1600 p
.idct
[IDCT_8x8
] = x265_idct8_sse2
;
1602 p
.idct
[IDST_4x4
] = x265_idst4_sse2
;
1604 p
.planecopy_sp
= x265_downShift_16_sse2
;
1606 if (cpuMask
& X265_CPU_SSSE3
)
1608 p
.frameInitLowres
= x265_frame_init_lowres_core_ssse3
;
1609 SA8D_INTER_FROM_BLOCK(ssse3
);
1610 p
.sse_pp
[LUMA_4x4
] = x265_pixel_ssd_4x4_ssse3
;
1613 PIXEL_AVG_W4(ssse3
);
1615 INTRA_ANG_SSSE3(ssse3
);
1617 p
.scale1D_128to64
= x265_scale1D_128to64_ssse3
;
1618 p
.scale2D_64to32
= x265_scale2D_64to32_ssse3
;
1621 p
.sad_x4
[LUMA_8x4
] = x265_pixel_sad_x4_8x4_ssse3
;
1622 p
.sad_x4
[LUMA_8x8
] = x265_pixel_sad_x4_8x8_ssse3
;
1623 p
.sad_x3
[LUMA_8x16
] = x265_pixel_sad_x3_8x16_ssse3
;
1624 p
.sad_x4
[LUMA_8x16
] = x265_pixel_sad_x4_8x16_ssse3
;
1625 p
.sad_x3
[LUMA_8x32
] = x265_pixel_sad_x3_8x32_ssse3
;
1626 p
.sad_x4
[LUMA_8x32
] = x265_pixel_sad_x4_8x32_ssse3
;
1628 p
.sad_x3
[LUMA_12x16
] = x265_pixel_sad_x3_12x16_ssse3
;
1629 p
.sad_x4
[LUMA_12x16
] = x265_pixel_sad_x4_12x16_ssse3
;
1631 p
.luma_hvpp
[LUMA_8x8
] = x265_interp_8tap_hv_pp_8x8_ssse3
;
1632 p
.luma_p2s
= x265_luma_p2s_ssse3
;
1633 p
.chroma
[X265_CSP_I420
].p2s
= x265_chroma_p2s_ssse3
;
1634 p
.chroma
[X265_CSP_I422
].p2s
= x265_chroma_p2s_ssse3
;
1635 p
.chroma
[X265_CSP_I444
].p2s
= x265_luma_p2s_ssse3
; // for i444, chroma_p2s can use luma_p2s
1637 p
.dct
[DST_4x4
] = x265_dst4_ssse3
;
1638 p
.idct
[IDCT_8x8
] = x265_idct8_ssse3
;
1639 p
.count_nonzero
= x265_count_nonzero_ssse3
;
1641 if (cpuMask
& X265_CPU_SSE4
)
1643 p
.saoCuOrgE0
= x265_saoCuOrgE0_sse4
;
1646 CHROMA_ADDAVG(_sse4
);
1647 CHROMA_ADDAVG_422(_sse4
);
1649 // TODO: check POPCNT flag!
1650 p
.copy_cnt
[BLOCK_4x4
] = x265_copy_cnt_4_sse4
;
1651 p
.copy_cnt
[BLOCK_8x8
] = x265_copy_cnt_8_sse4
;
1652 p
.copy_cnt
[BLOCK_16x16
] = x265_copy_cnt_16_sse4
;
1653 p
.copy_cnt
[BLOCK_32x32
] = x265_copy_cnt_32_sse4
;
1656 SA8D_INTER_FROM_BLOCK(sse4
);
1658 p
.sse_pp
[LUMA_12x16
] = x265_pixel_ssd_12x16_sse4
;
1659 p
.sse_pp
[LUMA_24x32
] = x265_pixel_ssd_24x32_sse4
;
1660 p
.sse_pp
[LUMA_48x64
] = x265_pixel_ssd_48x64_sse4
;
1661 p
.sse_pp
[LUMA_64x16
] = x265_pixel_ssd_64x16_sse4
;
1662 p
.sse_pp
[LUMA_64x32
] = x265_pixel_ssd_64x32_sse4
;
1663 p
.sse_pp
[LUMA_64x48
] = x265_pixel_ssd_64x48_sse4
;
1664 p
.sse_pp
[LUMA_64x64
] = x265_pixel_ssd_64x64_sse4
;
1668 CHROMA_PIXELSUB_PS(_sse4
);
1669 CHROMA_PIXELSUB_PS_422(_sse4
);
1670 LUMA_PIXELSUB(_sse4
);
1672 CHROMA_FILTERS_420(_sse4
);
1673 CHROMA_FILTERS_422(_sse4
);
1674 CHROMA_FILTERS_444(_sse4
);
1675 CHROMA_SS_FILTERS_SSE4_420(_sse4
);
1676 CHROMA_SS_FILTERS_SSE4_422(_sse4
);
1677 CHROMA_SP_FILTERS_SSE4_420(_sse4
);
1678 CHROMA_SP_FILTERS_SSE4_422(_sse4
);
1679 CHROMA_SP_FILTERS_SSE4_444(_sse4
);
1680 LUMA_SP_FILTERS(_sse4
);
1681 LUMA_FILTERS(_sse4
);
1684 p
.chroma
[X265_CSP_I420
].copy_sp
[CHROMA_2x4
] = x265_blockcopy_sp_2x4_sse4
;
1685 p
.chroma
[X265_CSP_I420
].copy_sp
[CHROMA_2x8
] = x265_blockcopy_sp_2x8_sse4
;
1686 p
.chroma
[X265_CSP_I420
].copy_sp
[CHROMA_6x8
] = x265_blockcopy_sp_6x8_sse4
;
1687 CHROMA_BLOCKCOPY(ps
, _sse4
);
1688 CHROMA_BLOCKCOPY_422(ps
, _sse4
);
1689 LUMA_BLOCKCOPY(ps
, _sse4
);
1691 p
.calcresidual
[BLOCK_16x16
] = x265_getResidual16_sse4
;
1692 p
.calcresidual
[BLOCK_32x32
] = x265_getResidual32_sse4
;
1693 p
.quant
= x265_quant_sse4
;
1694 p
.nquant
= x265_nquant_sse4
;
1695 p
.dequant_normal
= x265_dequant_normal_sse4
;
1696 p
.weight_pp
= x265_weight_pp_sse4
;
1697 p
.weight_sp
= x265_weight_sp_sse4
;
1698 p
.intra_pred
[0][BLOCK_4x4
] = x265_intra_pred_planar4_sse4
;
1699 p
.intra_pred
[0][BLOCK_8x8
] = x265_intra_pred_planar8_sse4
;
1700 p
.intra_pred
[0][BLOCK_16x16
] = x265_intra_pred_planar16_sse4
;
1701 p
.intra_pred
[0][BLOCK_32x32
] = x265_intra_pred_planar32_sse4
;
1703 p
.intra_pred_allangs
[BLOCK_4x4
] = x265_all_angs_pred_4x4_sse4
;
1704 p
.intra_pred_allangs
[BLOCK_8x8
] = x265_all_angs_pred_8x8_sse4
;
1705 p
.intra_pred_allangs
[BLOCK_16x16
] = x265_all_angs_pred_16x16_sse4
;
1706 p
.intra_pred_allangs
[BLOCK_32x32
] = x265_all_angs_pred_32x32_sse4
;
1708 p
.intra_pred
[1][BLOCK_4x4
] = x265_intra_pred_dc4_sse4
;
1709 p
.intra_pred
[1][BLOCK_8x8
] = x265_intra_pred_dc8_sse4
;
1710 p
.intra_pred
[1][BLOCK_16x16
] = x265_intra_pred_dc16_sse4
;
1711 p
.intra_pred
[1][BLOCK_32x32
] = x265_intra_pred_dc32_sse4
;
1713 INTRA_ANG_SSE4_COMMON(sse4
);
1714 INTRA_ANG_SSE4(sse4
);
1716 p
.dct
[DCT_8x8
] = x265_dct8_sse4
;
1717 // p.denoiseDct = x265_denoise_dct_sse4;
1719 if (cpuMask
& X265_CPU_AVX
)
1721 p
.frameInitLowres
= x265_frame_init_lowres_core_avx
;
1723 SA8D_INTER_FROM_BLOCK(avx
);
1729 p
.sad_x3
[LUMA_12x16
] = x265_pixel_sad_x3_12x16_avx
;
1730 p
.sad_x4
[LUMA_12x16
] = x265_pixel_sad_x4_12x16_avx
;
1731 p
.sad_x3
[LUMA_16x4
] = x265_pixel_sad_x3_16x4_avx
;
1732 p
.sad_x4
[LUMA_16x4
] = x265_pixel_sad_x4_16x4_avx
;
1734 p
.ssim_4x4x2_core
= x265_pixel_ssim_4x4x2_core_avx
;
1735 p
.ssim_end_4
= x265_pixel_ssim_end4_avx
;
1736 p
.luma_copy_ss
[LUMA_64x16
] = x265_blockcopy_ss_64x16_avx
;
1737 p
.luma_copy_ss
[LUMA_64x32
] = x265_blockcopy_ss_64x32_avx
;
1738 p
.luma_copy_ss
[LUMA_64x48
] = x265_blockcopy_ss_64x48_avx
;
1739 p
.luma_copy_ss
[LUMA_64x64
] = x265_blockcopy_ss_64x64_avx
;
1741 p
.chroma
[X265_CSP_I420
].copy_pp
[CHROMA_32x8
] = x265_blockcopy_pp_32x8_avx
;
1742 p
.luma_copy_pp
[LUMA_32x8
] = x265_blockcopy_pp_32x8_avx
;
1744 p
.chroma
[X265_CSP_I420
].copy_pp
[CHROMA_32x16
] = x265_blockcopy_pp_32x16_avx
;
1745 p
.chroma
[X265_CSP_I422
].copy_pp
[CHROMA422_32x16
] = x265_blockcopy_pp_32x16_avx
;
1746 p
.luma_copy_pp
[LUMA_32x16
] = x265_blockcopy_pp_32x16_avx
;
1748 p
.chroma
[X265_CSP_I420
].copy_pp
[CHROMA_32x24
] = x265_blockcopy_pp_32x24_avx
;
1749 p
.luma_copy_pp
[LUMA_32x24
] = x265_blockcopy_pp_32x24_avx
;
1751 p
.chroma
[X265_CSP_I420
].copy_pp
[CHROMA_32x32
] = x265_blockcopy_pp_32x32_avx
;
1752 p
.chroma
[X265_CSP_I422
].copy_pp
[CHROMA422_32x32
] = x265_blockcopy_pp_32x32_avx
;
1753 p
.luma_copy_pp
[LUMA_32x32
] = x265_blockcopy_pp_32x32_avx
;
1755 p
.chroma
[X265_CSP_I422
].copy_pp
[CHROMA422_32x48
] = x265_blockcopy_pp_32x48_avx
;
1757 p
.chroma
[X265_CSP_I422
].copy_pp
[CHROMA422_32x64
] = x265_blockcopy_pp_32x64_avx
;
1758 p
.luma_copy_pp
[LUMA_32x64
] = x265_blockcopy_pp_32x64_avx
;
1760 if (cpuMask
& X265_CPU_XOP
)
1762 p
.frameInitLowres
= x265_frame_init_lowres_core_xop
;
1763 SA8D_INTER_FROM_BLOCK(xop
);
1765 INIT5_NAME(sse_pp
, ssd
, _xop
);
1768 if (cpuMask
& X265_CPU_AVX2
)
1770 INIT2(sad_x4
, _avx2
);
1772 INIT2_NAME(sse_pp
, ssd
, _avx2
);
1773 p
.sad_x4
[LUMA_16x12
] = x265_pixel_sad_x4_16x12_avx2
;
1774 p
.sad_x4
[LUMA_16x32
] = x265_pixel_sad_x4_16x32_avx2
;
1775 p
.ssd_s
[BLOCK_32x32
] = x265_pixel_ssd_s_32_avx2
;
1777 /* Need to update assembly code as per changed interface of the copy_cnt primitive, once
1778 * code is updated, avx2 version will be enabled */
1780 p
.copy_cnt
[BLOCK_8x8
] = x265_copy_cnt_8_avx2
;
1781 p
.copy_cnt
[BLOCK_16x16
] = x265_copy_cnt_16_avx2
;
1782 p
.copy_cnt
[BLOCK_32x32
] = x265_copy_cnt_32_avx2
;
1784 p
.blockfill_s
[BLOCK_16x16
] = x265_blockfill_s_16x16_avx2
;
1785 p
.blockfill_s
[BLOCK_32x32
] = x265_blockfill_s_32x32_avx2
;
1787 p
.cpy1Dto2D_shl
[BLOCK_4x4
] = x265_cpy1Dto2D_shl_4_avx2
;
1788 p
.cpy1Dto2D_shl
[BLOCK_8x8
] = x265_cpy1Dto2D_shl_8_avx2
;
1789 p
.cpy1Dto2D_shl
[BLOCK_16x16
] = x265_cpy1Dto2D_shl_16_avx2
;
1790 p
.cpy1Dto2D_shl
[BLOCK_32x32
] = x265_cpy1Dto2D_shl_32_avx2
;
1791 p
.cpy1Dto2D_shr
[BLOCK_4x4
] = x265_cpy1Dto2D_shr_4_avx2
;
1792 p
.cpy1Dto2D_shr
[BLOCK_8x8
] = x265_cpy1Dto2D_shr_8_avx2
;
1793 p
.cpy1Dto2D_shr
[BLOCK_16x16
] = x265_cpy1Dto2D_shr_16_avx2
;
1794 p
.cpy1Dto2D_shr
[BLOCK_32x32
] = x265_cpy1Dto2D_shr_32_avx2
;
1796 // p.denoiseDct = x265_denoise_dct_avx2;
1797 p
.dct
[DCT_4x4
] = x265_dct4_avx2
;
1798 p
.quant
= x265_quant_avx2
;
1799 p
.nquant
= x265_nquant_avx2
;
1800 p
.dequant_normal
= x265_dequant_normal_avx2
;
1802 p
.chroma
[X265_CSP_I420
].copy_ss
[CHROMA_16x4
] = x265_blockcopy_ss_16x4_avx
;
1803 p
.chroma
[X265_CSP_I420
].copy_ss
[CHROMA_16x12
] = x265_blockcopy_ss_16x12_avx
;
1804 p
.chroma
[X265_CSP_I420
].copy_ss
[CHROMA_16x8
] = x265_blockcopy_ss_16x8_avx
;
1805 p
.chroma
[X265_CSP_I420
].copy_ss
[CHROMA_16x16
] = x265_blockcopy_ss_16x16_avx
;
1806 p
.chroma
[X265_CSP_I420
].copy_ss
[CHROMA_16x32
] = x265_blockcopy_ss_16x32_avx
;
1807 p
.chroma
[X265_CSP_I422
].copy_ss
[CHROMA422_16x8
] = x265_blockcopy_ss_16x8_avx
;
1808 p
.chroma
[X265_CSP_I422
].copy_ss
[CHROMA422_16x16
] = x265_blockcopy_ss_16x16_avx
;
1809 p
.chroma
[X265_CSP_I422
].copy_ss
[CHROMA422_16x24
] = x265_blockcopy_ss_16x24_avx
;
1810 p
.chroma
[X265_CSP_I422
].copy_ss
[CHROMA422_16x32
] = x265_blockcopy_ss_16x32_avx
;
1811 p
.chroma
[X265_CSP_I422
].copy_ss
[CHROMA422_16x64
] = x265_blockcopy_ss_16x64_avx
;
1812 p
.scale1D_128to64
= x265_scale1D_128to64_avx2
;
1814 p
.weight_pp
= x265_weight_pp_avx2
;
1818 p
.dct
[DCT_8x8
] = x265_dct8_avx2
;
1819 p
.dct
[DCT_16x16
] = x265_dct16_avx2
;
1820 p
.dct
[DCT_32x32
] = x265_dct32_avx2
;
1821 p
.idct
[IDCT_4x4
] = x265_idct4_avx2
;
1822 p
.idct
[IDCT_8x8
] = x265_idct8_avx2
;
1823 p
.idct
[IDCT_16x16
] = x265_idct16_avx2
;
1824 p
.idct
[IDCT_32x32
] = x265_idct32_avx2
;
1826 p
.transpose
[BLOCK_8x8
] = x265_transpose8_avx2
;
1827 p
.transpose
[BLOCK_16x16
] = x265_transpose16_avx2
;
1828 p
.transpose
[BLOCK_32x32
] = x265_transpose32_avx2
;
1829 p
.transpose
[BLOCK_64x64
] = x265_transpose64_avx2
;
1831 p
.luma_vpp
[LUMA_12x16
] = x265_interp_8tap_vert_pp_12x16_avx2
;
1833 p
.luma_vpp
[LUMA_16x4
] = x265_interp_8tap_vert_pp_16x4_avx2
;
1834 p
.luma_vpp
[LUMA_16x8
] = x265_interp_8tap_vert_pp_16x8_avx2
;
1835 p
.luma_vpp
[LUMA_16x12
] = x265_interp_8tap_vert_pp_16x12_avx2
;
1836 p
.luma_vpp
[LUMA_16x16
] = x265_interp_8tap_vert_pp_16x16_avx2
;
1837 p
.luma_vpp
[LUMA_16x32
] = x265_interp_8tap_vert_pp_16x32_avx2
;
1838 p
.luma_vpp
[LUMA_16x64
] = x265_interp_8tap_vert_pp_16x64_avx2
;
1840 p
.luma_vpp
[LUMA_24x32
] = x265_interp_8tap_vert_pp_24x32_avx2
;
1842 p
.luma_vpp
[LUMA_32x8
] = x265_interp_8tap_vert_pp_32x8_avx2
;
1843 p
.luma_vpp
[LUMA_32x16
] = x265_interp_8tap_vert_pp_32x16_avx2
;
1844 p
.luma_vpp
[LUMA_32x24
] = x265_interp_8tap_vert_pp_32x24_avx2
;
1845 p
.luma_vpp
[LUMA_32x32
] = x265_interp_8tap_vert_pp_32x32_avx2
;
1846 p
.luma_vpp
[LUMA_32x64
] = x265_interp_8tap_vert_pp_32x64_avx2
;
1848 p
.luma_vpp
[LUMA_48x64
] = x265_interp_8tap_vert_pp_48x64_avx2
;
1850 p
.luma_vpp
[LUMA_64x16
] = x265_interp_8tap_vert_pp_64x16_avx2
;
1851 p
.luma_vpp
[LUMA_64x32
] = x265_interp_8tap_vert_pp_64x32_avx2
;
1852 p
.luma_vpp
[LUMA_64x48
] = x265_interp_8tap_vert_pp_64x48_avx2
;
1853 p
.luma_vpp
[LUMA_64x64
] = x265_interp_8tap_vert_pp_64x64_avx2
;
1855 p
.luma_hpp
[LUMA_4x4
] = x265_interp_8tap_horiz_pp_4x4_avx2
;
1857 p
.luma_hpp
[LUMA_8x4
] = x265_interp_8tap_horiz_pp_8x4_avx2
;
1858 p
.luma_hpp
[LUMA_8x8
] = x265_interp_8tap_horiz_pp_8x8_avx2
;
1859 p
.luma_hpp
[LUMA_8x16
] = x265_interp_8tap_horiz_pp_8x16_avx2
;
1860 p
.luma_hpp
[LUMA_8x32
] = x265_interp_8tap_horiz_pp_8x32_avx2
;
1862 p
.luma_hpp
[LUMA_16x4
] = x265_interp_8tap_horiz_pp_16x4_avx2
;
1863 p
.luma_hpp
[LUMA_16x8
] = x265_interp_8tap_horiz_pp_16x8_avx2
;
1864 p
.luma_hpp
[LUMA_16x12
] = x265_interp_8tap_horiz_pp_16x12_avx2
;
1865 p
.luma_hpp
[LUMA_16x16
] = x265_interp_8tap_horiz_pp_16x16_avx2
;
1866 p
.luma_hpp
[LUMA_16x32
] = x265_interp_8tap_horiz_pp_16x32_avx2
;
1867 p
.luma_hpp
[LUMA_16x64
] = x265_interp_8tap_horiz_pp_16x64_avx2
;
1869 p
.luma_hpp
[LUMA_32x8
] = x265_interp_8tap_horiz_pp_32x8_avx2
;
1870 p
.luma_hpp
[LUMA_32x16
] = x265_interp_8tap_horiz_pp_32x16_avx2
;
1871 p
.luma_hpp
[LUMA_32x24
] = x265_interp_8tap_horiz_pp_32x24_avx2
;
1872 p
.luma_hpp
[LUMA_32x32
] = x265_interp_8tap_horiz_pp_32x32_avx2
;
1873 p
.luma_hpp
[LUMA_32x64
] = x265_interp_8tap_horiz_pp_32x64_avx2
;
1875 p
.luma_hpp
[LUMA_64x64
] = x265_interp_8tap_horiz_pp_64x64_avx2
;
1876 p
.luma_hpp
[LUMA_64x48
] = x265_interp_8tap_horiz_pp_64x48_avx2
;
1877 p
.luma_hpp
[LUMA_64x32
] = x265_interp_8tap_horiz_pp_64x32_avx2
;
1878 p
.luma_hpp
[LUMA_64x16
] = x265_interp_8tap_horiz_pp_64x16_avx2
;
1880 p
.luma_hpp
[LUMA_48x64
] = x265_interp_8tap_horiz_pp_48x64_avx2
;
1882 p
.chroma
[X265_CSP_I420
].filter_hpp
[CHROMA_8x8
] = x265_interp_4tap_horiz_pp_8x8_avx2
;
1883 p
.chroma
[X265_CSP_I420
].filter_hpp
[CHROMA_4x4
] = x265_interp_4tap_horiz_pp_4x4_avx2
;
1884 p
.chroma
[X265_CSP_I420
].filter_hpp
[CHROMA_32x32
] = x265_interp_4tap_horiz_pp_32x32_avx2
;
1885 p
.chroma
[X265_CSP_I420
].filter_hpp
[CHROMA_16x16
] = x265_interp_4tap_horiz_pp_16x16_avx2
;
1887 p
.luma_vpp
[LUMA_4x4
] = x265_interp_8tap_vert_pp_4x4_avx2
;
1889 p
.luma_vpp
[LUMA_8x4
] = x265_interp_8tap_vert_pp_8x4_avx2
;
1890 p
.luma_vpp
[LUMA_8x8
] = x265_interp_8tap_vert_pp_8x8_avx2
;
1891 p
.luma_vpp
[LUMA_8x16
] = x265_interp_8tap_vert_pp_8x16_avx2
;
1892 p
.luma_vpp
[LUMA_8x32
] = x265_interp_8tap_vert_pp_8x32_avx2
;
1895 p
.chroma
[X265_CSP_I420
].filter_vpp
[CHROMA_4x4
] = x265_interp_4tap_vert_pp_4x4_avx2
;
1896 p
.chroma
[X265_CSP_I420
].filter_vpp
[CHROMA_8x8
] = x265_interp_4tap_vert_pp_8x8_avx2
;
1899 p
.chroma
[X265_CSP_I422
].filter_vpp
[CHROMA422_4x4
] = x265_interp_4tap_vert_pp_4x4_avx2
;
1901 p
.luma_vps
[LUMA_4x4
] = x265_interp_8tap_vert_ps_4x4_avx2
;
1904 p
.chroma
[X265_CSP_I420
].filter_vpp
[CHROMA_16x16
] = x265_interp_4tap_vert_pp_16x16_avx2
;
1905 p
.chroma
[X265_CSP_I420
].filter_vpp
[CHROMA_32x32
] = x265_interp_4tap_vert_pp_32x32_avx2
;
1908 #endif // if HIGH_BIT_DEPTH
1913 #ifdef __INTEL_COMPILER
1915 /* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
1916 * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
1917 * adapted to x265's cpu schema. */
1919 // Global variable indicating cpu
1920 int __intel_cpu_indicator
= 0;
1921 // CPU dispatcher function
1922 void x265_intel_cpu_indicator_init(void)
1924 uint32_t cpu
= x265::cpu_detect();
1926 if (cpu
& X265_CPU_AVX
)
1927 __intel_cpu_indicator
= 0x20000;
1928 else if (cpu
& X265_CPU_SSE42
)
1929 __intel_cpu_indicator
= 0x8000;
1930 else if (cpu
& X265_CPU_SSE4
)
1931 __intel_cpu_indicator
= 0x2000;
1932 else if (cpu
& X265_CPU_SSSE3
)
1933 __intel_cpu_indicator
= 0x1000;
1934 else if (cpu
& X265_CPU_SSE3
)
1935 __intel_cpu_indicator
= 0x800;
1936 else if (cpu
& X265_CPU_SSE2
&& !(cpu
& X265_CPU_SSE2_IS_SLOW
))
1937 __intel_cpu_indicator
= 0x200;
1938 else if (cpu
& X265_CPU_SSE
)
1939 __intel_cpu_indicator
= 0x80;
1940 else if (cpu
& X265_CPU_MMX2
)
1941 __intel_cpu_indicator
= 8;
1943 __intel_cpu_indicator
= 1;
1946 /* __intel_cpu_indicator_init appears to have a non-standard calling convention that
1947 * assumes certain registers aren't preserved, so we'll route it through a function
1948 * that backs up all the registers. */
1949 void __intel_cpu_indicator_init(void)
1951 x265_safe_intel_cpu_indicator_init();
1954 #else // ifdef __INTEL_COMPILER
1955 void x265_intel_cpu_indicator_init(void) {}
1957 #endif // ifdef __INTEL_COMPILER