Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * Praveen Kumar Tiwari <praveen@multicorewareinc.com> | |
6 | * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
21 | * | |
22 | * This program is also available under a commercial proprietary license. | |
23 | * For more information, contact us at license @ x265.com. | |
24 | *****************************************************************************/ | |
25 | ||
26 | #include "common.h" | |
27 | #include "primitives.h" | |
28 | #include "x265.h" | |
29 | #include "cpu.h" | |
30 | ||
31 | extern "C" { | |
32 | #include "pixel.h" | |
33 | #include "pixel-util.h" | |
34 | #include "mc.h" | |
35 | #include "ipfilter8.h" | |
36 | #include "loopfilter.h" | |
37 | #include "blockcopy8.h" | |
38 | #include "intrapred.h" | |
39 | #include "dct8.h" | |
40 | } | |
41 | ||
42 | #define INIT2_NAME(name1, name2, cpu) \ | |
43 | p.name1[LUMA_16x16] = x265_pixel_ ## name2 ## _16x16 ## cpu; \ | |
44 | p.name1[LUMA_16x8] = x265_pixel_ ## name2 ## _16x8 ## cpu; | |
45 | #define INIT4_NAME(name1, name2, cpu) \ | |
46 | INIT2_NAME(name1, name2, cpu) \ | |
47 | p.name1[LUMA_8x16] = x265_pixel_ ## name2 ## _8x16 ## cpu; \ | |
48 | p.name1[LUMA_8x8] = x265_pixel_ ## name2 ## _8x8 ## cpu; | |
49 | #define INIT5_NAME(name1, name2, cpu) \ | |
50 | INIT4_NAME(name1, name2, cpu) \ | |
51 | p.name1[LUMA_8x4] = x265_pixel_ ## name2 ## _8x4 ## cpu; | |
52 | #define INIT6_NAME(name1, name2, cpu) \ | |
53 | INIT5_NAME(name1, name2, cpu) \ | |
54 | p.name1[LUMA_4x8] = x265_pixel_ ## name2 ## _4x8 ## cpu; | |
55 | #define INIT7_NAME(name1, name2, cpu) \ | |
56 | INIT6_NAME(name1, name2, cpu) \ | |
57 | p.name1[LUMA_4x4] = x265_pixel_ ## name2 ## _4x4 ## cpu; | |
58 | #define INIT8_NAME(name1, name2, cpu) \ | |
59 | INIT7_NAME(name1, name2, cpu) \ | |
60 | p.name1[LUMA_4x16] = x265_pixel_ ## name2 ## _4x16 ## cpu; | |
61 | #define INIT2(name, cpu) INIT2_NAME(name, name, cpu) | |
62 | #define INIT4(name, cpu) INIT4_NAME(name, name, cpu) | |
63 | #define INIT5(name, cpu) INIT5_NAME(name, name, cpu) | |
64 | #define INIT6(name, cpu) INIT6_NAME(name, name, cpu) | |
65 | #define INIT7(name, cpu) INIT7_NAME(name, name, cpu) | |
66 | #define INIT8(name, cpu) INIT8_NAME(name, name, cpu) | |
67 | ||
68 | #define HEVC_SATD(cpu) \ | |
69 | p.satd[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \ | |
70 | p.satd[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \ | |
71 | p.satd[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \ | |
72 | p.satd[LUMA_8x8] = x265_pixel_satd_8x8_ ## cpu; \ | |
73 | p.satd[LUMA_8x16] = x265_pixel_satd_8x16_ ## cpu; \ | |
74 | p.satd[LUMA_8x32] = x265_pixel_satd_8x32_ ## cpu; \ | |
75 | p.satd[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \ | |
76 | p.satd[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \ | |
77 | p.satd[LUMA_16x8] = x265_pixel_satd_16x8_ ## cpu; \ | |
78 | p.satd[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \ | |
79 | p.satd[LUMA_16x16] = x265_pixel_satd_16x16_ ## cpu; \ | |
80 | p.satd[LUMA_16x32] = x265_pixel_satd_16x32_ ## cpu; \ | |
81 | p.satd[LUMA_16x64] = x265_pixel_satd_16x64_ ## cpu; \ | |
82 | p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \ | |
83 | p.satd[LUMA_32x8] = x265_pixel_satd_32x8_ ## cpu; \ | |
84 | p.satd[LUMA_32x16] = x265_pixel_satd_32x16_ ## cpu; \ | |
85 | p.satd[LUMA_32x24] = x265_pixel_satd_32x24_ ## cpu; \ | |
86 | p.satd[LUMA_32x32] = x265_pixel_satd_32x32_ ## cpu; \ | |
87 | p.satd[LUMA_32x64] = x265_pixel_satd_32x64_ ## cpu; \ | |
88 | p.satd[LUMA_48x64] = x265_pixel_satd_48x64_ ## cpu; \ | |
89 | p.satd[LUMA_64x16] = x265_pixel_satd_64x16_ ## cpu; \ | |
90 | p.satd[LUMA_64x32] = x265_pixel_satd_64x32_ ## cpu; \ | |
91 | p.satd[LUMA_64x48] = x265_pixel_satd_64x48_ ## cpu; \ | |
92 | p.satd[LUMA_64x64] = x265_pixel_satd_64x64_ ## cpu; | |
93 | ||
94 | #define SAD_X3(cpu) \ | |
95 | p.sad_x3[LUMA_16x8] = x265_pixel_sad_x3_16x8_ ## cpu; \ | |
96 | p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ ## cpu; \ | |
97 | p.sad_x3[LUMA_16x16] = x265_pixel_sad_x3_16x16_ ## cpu; \ | |
98 | p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ ## cpu; \ | |
99 | p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ ## cpu; \ | |
100 | p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ ## cpu; \ | |
101 | p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ ## cpu; \ | |
102 | p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ ## cpu; \ | |
103 | p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ ## cpu; \ | |
104 | p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ ## cpu; \ | |
105 | p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ ## cpu; \ | |
106 | p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ ## cpu; \ | |
107 | p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ ## cpu; \ | |
108 | p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ ## cpu; \ | |
109 | p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ ## cpu; \ | |
110 | p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ ## cpu | |
111 | ||
112 | #define SAD_X4(cpu) \ | |
113 | p.sad_x4[LUMA_16x8] = x265_pixel_sad_x4_16x8_ ## cpu; \ | |
114 | p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ ## cpu; \ | |
115 | p.sad_x4[LUMA_16x16] = x265_pixel_sad_x4_16x16_ ## cpu; \ | |
116 | p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ ## cpu; \ | |
117 | p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ ## cpu; \ | |
118 | p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ ## cpu; \ | |
119 | p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ ## cpu; \ | |
120 | p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ ## cpu; \ | |
121 | p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ ## cpu; \ | |
122 | p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ ## cpu; \ | |
123 | p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ ## cpu; \ | |
124 | p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ ## cpu; \ | |
125 | p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ ## cpu; \ | |
126 | p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ ## cpu; \ | |
127 | p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \ | |
128 | p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu | |
129 | ||
130 | #define SAD(cpu) \ | |
131 | p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \ | |
132 | p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \ | |
133 | p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \ | |
134 | p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \ | |
135 | p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \ | |
136 | p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \ | |
137 | p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \ | |
138 | p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \ | |
139 | p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \ | |
140 | p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \ | |
141 | p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \ | |
142 | p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \ | |
143 | p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \ | |
144 | p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \ | |
145 | p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \ | |
146 | p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \ | |
147 | p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu | |
148 | ||
149 | #define ASSGN_SSE(cpu) \ | |
150 | p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \ | |
151 | p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \ | |
152 | p.sse_pp[LUMA_16x16] = x265_pixel_ssd_16x16_ ## cpu; \ | |
153 | p.sse_pp[LUMA_16x4] = x265_pixel_ssd_16x4_ ## cpu; \ | |
154 | p.sse_pp[LUMA_16x8] = x265_pixel_ssd_16x8_ ## cpu; \ | |
155 | p.sse_pp[LUMA_8x16] = x265_pixel_ssd_8x16_ ## cpu; \ | |
156 | p.sse_pp[LUMA_16x12] = x265_pixel_ssd_16x12_ ## cpu; \ | |
157 | p.sse_pp[LUMA_32x32] = x265_pixel_ssd_32x32_ ## cpu; \ | |
158 | p.sse_pp[LUMA_32x16] = x265_pixel_ssd_32x16_ ## cpu; \ | |
159 | p.sse_pp[LUMA_16x32] = x265_pixel_ssd_16x32_ ## cpu; \ | |
160 | p.sse_pp[LUMA_8x32] = x265_pixel_ssd_8x32_ ## cpu; \ | |
161 | p.sse_pp[LUMA_32x8] = x265_pixel_ssd_32x8_ ## cpu; \ | |
162 | p.sse_pp[LUMA_32x24] = x265_pixel_ssd_32x24_ ## cpu; \ | |
163 | p.sse_pp[LUMA_32x64] = x265_pixel_ssd_32x64_ ## cpu; \ | |
164 | p.sse_pp[LUMA_16x64] = x265_pixel_ssd_16x64_ ## cpu | |
165 | ||
166 | #define ASSGN_SSE_SS(cpu) \ | |
167 | p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_ ## cpu; \ | |
168 | p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_ ## cpu; \ | |
169 | p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_ ## cpu; \ | |
170 | p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_ ## cpu; \ | |
171 | p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_ ## cpu; \ | |
172 | p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_ ## cpu; \ | |
173 | p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_ ## cpu; \ | |
174 | p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_ ## cpu; \ | |
175 | p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_ ## cpu; \ | |
176 | p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_ ## cpu; \ | |
177 | p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_ ## cpu; \ | |
178 | p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_ ## cpu; \ | |
179 | p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_ ## cpu; \ | |
180 | p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_ ## cpu; \ | |
181 | p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_ ## cpu; \ | |
182 | p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_ ## cpu; \ | |
183 | p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_ ## cpu; \ | |
184 | p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_ ## cpu; \ | |
185 | p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_ ## cpu; \ | |
186 | p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_ ## cpu; \ | |
187 | p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_ ## cpu; \ | |
188 | p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_ ## cpu; \ | |
189 | p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_ ## cpu; \ | |
190 | p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_ ## cpu; \ | |
191 | p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_ ## cpu; | |
192 | ||
193 | #define SA8D_INTER_FROM_BLOCK(cpu) \ | |
194 | p.sa8d_inter[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \ | |
195 | p.sa8d_inter[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \ | |
196 | p.sa8d_inter[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \ | |
197 | p.sa8d_inter[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \ | |
198 | p.sa8d_inter[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \ | |
199 | p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_ ## cpu; \ | |
200 | p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_ ## cpu; \ | |
201 | p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \ | |
202 | p.sa8d_inter[LUMA_16x8] = x265_pixel_sa8d_16x8_ ## cpu; \ | |
203 | p.sa8d_inter[LUMA_8x16] = x265_pixel_sa8d_8x16_ ## cpu; \ | |
204 | p.sa8d_inter[LUMA_32x24] = x265_pixel_sa8d_32x24_ ## cpu; \ | |
205 | p.sa8d_inter[LUMA_24x32] = x265_pixel_sa8d_24x32_ ## cpu; \ | |
206 | p.sa8d_inter[LUMA_32x8] = x265_pixel_sa8d_32x8_ ## cpu; \ | |
207 | p.sa8d_inter[LUMA_8x32] = x265_pixel_sa8d_8x32_ ## cpu; \ | |
208 | p.sa8d_inter[LUMA_32x32] = x265_pixel_sa8d_32x32_ ## cpu; \ | |
209 | p.sa8d_inter[LUMA_32x16] = x265_pixel_sa8d_32x16_ ## cpu; \ | |
210 | p.sa8d_inter[LUMA_16x32] = x265_pixel_sa8d_16x32_ ## cpu; \ | |
211 | p.sa8d_inter[LUMA_64x64] = x265_pixel_sa8d_64x64_ ## cpu; \ | |
212 | p.sa8d_inter[LUMA_64x32] = x265_pixel_sa8d_64x32_ ## cpu; \ | |
213 | p.sa8d_inter[LUMA_32x64] = x265_pixel_sa8d_32x64_ ## cpu; \ | |
214 | p.sa8d_inter[LUMA_64x48] = x265_pixel_sa8d_64x48_ ## cpu; \ | |
215 | p.sa8d_inter[LUMA_48x64] = x265_pixel_sa8d_48x64_ ## cpu; \ | |
216 | p.sa8d_inter[LUMA_64x16] = x265_pixel_sa8d_64x16_ ## cpu; \ | |
217 | p.sa8d_inter[LUMA_16x64] = x265_pixel_sa8d_16x64_ ## cpu; | |
218 | ||
219 | #define PIXEL_AVG(cpu) \ | |
220 | p.pixelavg_pp[LUMA_64x64] = x265_pixel_avg_64x64_ ## cpu; \ | |
221 | p.pixelavg_pp[LUMA_64x48] = x265_pixel_avg_64x48_ ## cpu; \ | |
222 | p.pixelavg_pp[LUMA_64x32] = x265_pixel_avg_64x32_ ## cpu; \ | |
223 | p.pixelavg_pp[LUMA_64x16] = x265_pixel_avg_64x16_ ## cpu; \ | |
224 | p.pixelavg_pp[LUMA_48x64] = x265_pixel_avg_48x64_ ## cpu; \ | |
225 | p.pixelavg_pp[LUMA_32x64] = x265_pixel_avg_32x64_ ## cpu; \ | |
226 | p.pixelavg_pp[LUMA_32x32] = x265_pixel_avg_32x32_ ## cpu; \ | |
227 | p.pixelavg_pp[LUMA_32x24] = x265_pixel_avg_32x24_ ## cpu; \ | |
228 | p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \ | |
229 | p.pixelavg_pp[LUMA_32x8] = x265_pixel_avg_32x8_ ## cpu; \ | |
230 | p.pixelavg_pp[LUMA_24x32] = x265_pixel_avg_24x32_ ## cpu; \ | |
231 | p.pixelavg_pp[LUMA_16x64] = x265_pixel_avg_16x64_ ## cpu; \ | |
232 | p.pixelavg_pp[LUMA_16x32] = x265_pixel_avg_16x32_ ## cpu; \ | |
233 | p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \ | |
234 | p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \ | |
235 | p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \ | |
236 | p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \ | |
237 | p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \ | |
238 | p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \ | |
239 | p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \ | |
240 | p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \ | |
241 | p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu; | |
242 | ||
243 | #define PIXEL_AVG_W4(cpu) \ | |
244 | p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_ ## cpu; \ | |
245 | p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_ ## cpu; \ | |
246 | p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu; | |
247 | ||
248 | #define SETUP_CHROMA_FUNC_DEF_420(W, H, cpu) \ | |
249 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
250 | p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ | |
251 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
252 | p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; | |
253 | ||
254 | #define SETUP_CHROMA_FUNC_DEF_422(W, H, cpu) \ | |
255 | p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
256 | p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ | |
257 | p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
258 | p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; | |
259 | ||
260 | #define SETUP_CHROMA_FUNC_DEF_444(W, H, cpu) \ | |
261 | p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
262 | p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ | |
263 | p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
264 | p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; | |
265 | ||
266 | #define SETUP_CHROMA_SP_FUNC_DEF_420(W, H, cpu) \ | |
267 | p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
268 | ||
269 | #define SETUP_CHROMA_SP_FUNC_DEF_422(W, H, cpu) \ | |
270 | p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
271 | ||
272 | #define SETUP_CHROMA_SP_FUNC_DEF_444(W, H, cpu) \ | |
273 | p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
274 | ||
275 | #define SETUP_CHROMA_SS_FUNC_DEF_420(W, H, cpu) \ | |
276 | p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; | |
277 | ||
278 | #define SETUP_CHROMA_SS_FUNC_DEF_422(W, H, cpu) \ | |
279 | p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; | |
280 | ||
281 | #define SETUP_CHROMA_SS_FUNC_DEF_444(W, H, cpu) \ | |
282 | p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; | |
283 | ||
284 | #define CHROMA_FILTERS_420(cpu) \ | |
285 | SETUP_CHROMA_FUNC_DEF_420(4, 4, cpu); \ | |
286 | SETUP_CHROMA_FUNC_DEF_420(4, 2, cpu); \ | |
287 | SETUP_CHROMA_FUNC_DEF_420(2, 4, cpu); \ | |
288 | SETUP_CHROMA_FUNC_DEF_420(8, 8, cpu); \ | |
289 | SETUP_CHROMA_FUNC_DEF_420(8, 4, cpu); \ | |
290 | SETUP_CHROMA_FUNC_DEF_420(4, 8, cpu); \ | |
291 | SETUP_CHROMA_FUNC_DEF_420(8, 6, cpu); \ | |
292 | SETUP_CHROMA_FUNC_DEF_420(6, 8, cpu); \ | |
293 | SETUP_CHROMA_FUNC_DEF_420(8, 2, cpu); \ | |
294 | SETUP_CHROMA_FUNC_DEF_420(2, 8, cpu); \ | |
295 | SETUP_CHROMA_FUNC_DEF_420(16, 16, cpu); \ | |
296 | SETUP_CHROMA_FUNC_DEF_420(16, 8, cpu); \ | |
297 | SETUP_CHROMA_FUNC_DEF_420(8, 16, cpu); \ | |
298 | SETUP_CHROMA_FUNC_DEF_420(16, 12, cpu); \ | |
299 | SETUP_CHROMA_FUNC_DEF_420(12, 16, cpu); \ | |
300 | SETUP_CHROMA_FUNC_DEF_420(16, 4, cpu); \ | |
301 | SETUP_CHROMA_FUNC_DEF_420(4, 16, cpu); \ | |
302 | SETUP_CHROMA_FUNC_DEF_420(32, 32, cpu); \ | |
303 | SETUP_CHROMA_FUNC_DEF_420(32, 16, cpu); \ | |
304 | SETUP_CHROMA_FUNC_DEF_420(16, 32, cpu); \ | |
305 | SETUP_CHROMA_FUNC_DEF_420(32, 24, cpu); \ | |
306 | SETUP_CHROMA_FUNC_DEF_420(24, 32, cpu); \ | |
307 | SETUP_CHROMA_FUNC_DEF_420(32, 8, cpu); \ | |
308 | SETUP_CHROMA_FUNC_DEF_420(8, 32, cpu); | |
309 | ||
310 | #define CHROMA_FILTERS_422(cpu) \ | |
311 | SETUP_CHROMA_FUNC_DEF_422(4, 8, cpu); \ | |
312 | SETUP_CHROMA_FUNC_DEF_422(4, 4, cpu); \ | |
313 | SETUP_CHROMA_FUNC_DEF_422(2, 8, cpu); \ | |
314 | SETUP_CHROMA_FUNC_DEF_422(8, 16, cpu); \ | |
315 | SETUP_CHROMA_FUNC_DEF_422(8, 8, cpu); \ | |
316 | SETUP_CHROMA_FUNC_DEF_422(4, 16, cpu); \ | |
317 | SETUP_CHROMA_FUNC_DEF_422(8, 12, cpu); \ | |
318 | SETUP_CHROMA_FUNC_DEF_422(6, 16, cpu); \ | |
319 | SETUP_CHROMA_FUNC_DEF_422(8, 4, cpu); \ | |
320 | SETUP_CHROMA_FUNC_DEF_422(2, 16, cpu); \ | |
321 | SETUP_CHROMA_FUNC_DEF_422(16, 32, cpu); \ | |
322 | SETUP_CHROMA_FUNC_DEF_422(16, 16, cpu); \ | |
323 | SETUP_CHROMA_FUNC_DEF_422(8, 32, cpu); \ | |
324 | SETUP_CHROMA_FUNC_DEF_422(16, 24, cpu); \ | |
325 | SETUP_CHROMA_FUNC_DEF_422(12, 32, cpu); \ | |
326 | SETUP_CHROMA_FUNC_DEF_422(16, 8, cpu); \ | |
327 | SETUP_CHROMA_FUNC_DEF_422(4, 32, cpu); \ | |
328 | SETUP_CHROMA_FUNC_DEF_422(32, 64, cpu); \ | |
329 | SETUP_CHROMA_FUNC_DEF_422(32, 32, cpu); \ | |
330 | SETUP_CHROMA_FUNC_DEF_422(16, 64, cpu); \ | |
331 | SETUP_CHROMA_FUNC_DEF_422(32, 48, cpu); \ | |
332 | SETUP_CHROMA_FUNC_DEF_422(24, 64, cpu); \ | |
333 | SETUP_CHROMA_FUNC_DEF_422(32, 16, cpu); \ | |
334 | SETUP_CHROMA_FUNC_DEF_422(8, 64, cpu); | |
335 | ||
336 | #define CHROMA_FILTERS_444(cpu) \ | |
337 | SETUP_CHROMA_FUNC_DEF_444(8, 8, cpu); \ | |
338 | SETUP_CHROMA_FUNC_DEF_444(8, 4, cpu); \ | |
339 | SETUP_CHROMA_FUNC_DEF_444(4, 8, cpu); \ | |
340 | SETUP_CHROMA_FUNC_DEF_444(16, 16, cpu); \ | |
341 | SETUP_CHROMA_FUNC_DEF_444(16, 8, cpu); \ | |
342 | SETUP_CHROMA_FUNC_DEF_444(8, 16, cpu); \ | |
343 | SETUP_CHROMA_FUNC_DEF_444(16, 12, cpu); \ | |
344 | SETUP_CHROMA_FUNC_DEF_444(12, 16, cpu); \ | |
345 | SETUP_CHROMA_FUNC_DEF_444(16, 4, cpu); \ | |
346 | SETUP_CHROMA_FUNC_DEF_444(4, 16, cpu); \ | |
347 | SETUP_CHROMA_FUNC_DEF_444(32, 32, cpu); \ | |
348 | SETUP_CHROMA_FUNC_DEF_444(32, 16, cpu); \ | |
349 | SETUP_CHROMA_FUNC_DEF_444(16, 32, cpu); \ | |
350 | SETUP_CHROMA_FUNC_DEF_444(32, 24, cpu); \ | |
351 | SETUP_CHROMA_FUNC_DEF_444(24, 32, cpu); \ | |
352 | SETUP_CHROMA_FUNC_DEF_444(32, 8, cpu); \ | |
353 | SETUP_CHROMA_FUNC_DEF_444(8, 32, cpu); \ | |
354 | SETUP_CHROMA_FUNC_DEF_444(64, 64, cpu); \ | |
355 | SETUP_CHROMA_FUNC_DEF_444(64, 32, cpu); \ | |
356 | SETUP_CHROMA_FUNC_DEF_444(32, 64, cpu); \ | |
357 | SETUP_CHROMA_FUNC_DEF_444(64, 48, cpu); \ | |
358 | SETUP_CHROMA_FUNC_DEF_444(48, 64, cpu); \ | |
359 | SETUP_CHROMA_FUNC_DEF_444(64, 16, cpu); \ | |
360 | SETUP_CHROMA_FUNC_DEF_444(16, 64, cpu); | |
361 | ||
362 | #define CHROMA_SP_FILTERS_SSE4_420(cpu) \ | |
363 | SETUP_CHROMA_SP_FUNC_DEF_420(4, 4, cpu); \ | |
364 | SETUP_CHROMA_SP_FUNC_DEF_420(4, 2, cpu); \ | |
365 | SETUP_CHROMA_SP_FUNC_DEF_420(2, 4, cpu); \ | |
366 | SETUP_CHROMA_SP_FUNC_DEF_420(4, 8, cpu); \ | |
367 | SETUP_CHROMA_SP_FUNC_DEF_420(6, 8, cpu); \ | |
368 | SETUP_CHROMA_SP_FUNC_DEF_420(2, 8, cpu); \ | |
369 | SETUP_CHROMA_SP_FUNC_DEF_420(16, 16, cpu); \ | |
370 | SETUP_CHROMA_SP_FUNC_DEF_420(16, 8, cpu); \ | |
371 | SETUP_CHROMA_SP_FUNC_DEF_420(16, 12, cpu); \ | |
372 | SETUP_CHROMA_SP_FUNC_DEF_420(12, 16, cpu); \ | |
373 | SETUP_CHROMA_SP_FUNC_DEF_420(16, 4, cpu); \ | |
374 | SETUP_CHROMA_SP_FUNC_DEF_420(4, 16, cpu); \ | |
375 | SETUP_CHROMA_SP_FUNC_DEF_420(32, 32, cpu); \ | |
376 | SETUP_CHROMA_SP_FUNC_DEF_420(32, 16, cpu); \ | |
377 | SETUP_CHROMA_SP_FUNC_DEF_420(16, 32, cpu); \ | |
378 | SETUP_CHROMA_SP_FUNC_DEF_420(32, 24, cpu); \ | |
379 | SETUP_CHROMA_SP_FUNC_DEF_420(24, 32, cpu); \ | |
380 | SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu); | |
381 | ||
382 | #define CHROMA_SP_FILTERS_420(cpu) \ | |
383 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \ | |
384 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \ | |
385 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \ | |
386 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \ | |
387 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \ | |
388 | SETUP_CHROMA_SP_FUNC_DEF_420(8, 32, cpu); | |
389 | ||
390 | #define CHROMA_SP_FILTERS_SSE4_422(cpu) \ | |
391 | SETUP_CHROMA_SP_FUNC_DEF_422(4, 8, cpu); \ | |
392 | SETUP_CHROMA_SP_FUNC_DEF_422(4, 4, cpu); \ | |
393 | SETUP_CHROMA_SP_FUNC_DEF_422(2, 8, cpu); \ | |
394 | SETUP_CHROMA_SP_FUNC_DEF_422(4, 16, cpu); \ | |
395 | SETUP_CHROMA_SP_FUNC_DEF_422(6, 16, cpu); \ | |
396 | SETUP_CHROMA_SP_FUNC_DEF_422(2, 16, cpu); \ | |
397 | SETUP_CHROMA_SP_FUNC_DEF_422(16, 32, cpu); \ | |
398 | SETUP_CHROMA_SP_FUNC_DEF_422(16, 16, cpu); \ | |
399 | SETUP_CHROMA_SP_FUNC_DEF_422(16, 24, cpu); \ | |
400 | SETUP_CHROMA_SP_FUNC_DEF_422(12, 32, cpu); \ | |
401 | SETUP_CHROMA_SP_FUNC_DEF_422(16, 8, cpu); \ | |
402 | SETUP_CHROMA_SP_FUNC_DEF_422(4, 32, cpu); \ | |
403 | SETUP_CHROMA_SP_FUNC_DEF_422(32, 64, cpu); \ | |
404 | SETUP_CHROMA_SP_FUNC_DEF_422(32, 32, cpu); \ | |
405 | SETUP_CHROMA_SP_FUNC_DEF_422(16, 64, cpu); \ | |
406 | SETUP_CHROMA_SP_FUNC_DEF_422(32, 48, cpu); \ | |
407 | SETUP_CHROMA_SP_FUNC_DEF_422(24, 64, cpu); \ | |
408 | SETUP_CHROMA_SP_FUNC_DEF_422(32, 16, cpu); | |
409 | ||
410 | #define CHROMA_SP_FILTERS_422(cpu) \ | |
411 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 4, cpu); \ | |
412 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 8, cpu); \ | |
413 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 12, cpu); \ | |
414 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 16, cpu); \ | |
415 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 32, cpu); \ | |
416 | SETUP_CHROMA_SP_FUNC_DEF_422(8, 64, cpu); | |
417 | ||
418 | #define CHROMA_SP_FILTERS_SSE4_444(cpu) \ | |
419 | SETUP_CHROMA_SP_FUNC_DEF_444(4, 8, cpu); \ | |
420 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 16, cpu); \ | |
421 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 8, cpu); \ | |
422 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 12, cpu); \ | |
423 | SETUP_CHROMA_SP_FUNC_DEF_444(12, 16, cpu); \ | |
424 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 4, cpu); \ | |
425 | SETUP_CHROMA_SP_FUNC_DEF_444(4, 16, cpu); \ | |
426 | SETUP_CHROMA_SP_FUNC_DEF_444(32, 32, cpu); \ | |
427 | SETUP_CHROMA_SP_FUNC_DEF_444(32, 16, cpu); \ | |
428 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 32, cpu); \ | |
429 | SETUP_CHROMA_SP_FUNC_DEF_444(32, 24, cpu); \ | |
430 | SETUP_CHROMA_SP_FUNC_DEF_444(24, 32, cpu); \ | |
431 | SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu); \ | |
432 | SETUP_CHROMA_SP_FUNC_DEF_444(64, 64, cpu); \ | |
433 | SETUP_CHROMA_SP_FUNC_DEF_444(64, 32, cpu); \ | |
434 | SETUP_CHROMA_SP_FUNC_DEF_444(32, 64, cpu); \ | |
435 | SETUP_CHROMA_SP_FUNC_DEF_444(64, 48, cpu); \ | |
436 | SETUP_CHROMA_SP_FUNC_DEF_444(48, 64, cpu); \ | |
437 | SETUP_CHROMA_SP_FUNC_DEF_444(64, 16, cpu); \ | |
438 | SETUP_CHROMA_SP_FUNC_DEF_444(16, 64, cpu); | |
439 | ||
440 | #define CHROMA_SP_FILTERS_444(cpu) \ | |
441 | SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \ | |
442 | SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \ | |
443 | SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \ | |
444 | SETUP_CHROMA_SP_FUNC_DEF_444(8, 32, cpu); | |
445 | ||
446 | #define CHROMA_SS_FILTERS_420(cpu) \ | |
447 | SETUP_CHROMA_SS_FUNC_DEF_420(4, 4, cpu); \ | |
448 | SETUP_CHROMA_SS_FUNC_DEF_420(4, 2, cpu); \ | |
449 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 8, cpu); \ | |
450 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 4, cpu); \ | |
451 | SETUP_CHROMA_SS_FUNC_DEF_420(4, 8, cpu); \ | |
452 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 6, cpu); \ | |
453 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 2, cpu); \ | |
454 | SETUP_CHROMA_SS_FUNC_DEF_420(16, 16, cpu); \ | |
455 | SETUP_CHROMA_SS_FUNC_DEF_420(16, 8, cpu); \ | |
456 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 16, cpu); \ | |
457 | SETUP_CHROMA_SS_FUNC_DEF_420(16, 12, cpu); \ | |
458 | SETUP_CHROMA_SS_FUNC_DEF_420(12, 16, cpu); \ | |
459 | SETUP_CHROMA_SS_FUNC_DEF_420(16, 4, cpu); \ | |
460 | SETUP_CHROMA_SS_FUNC_DEF_420(4, 16, cpu); \ | |
461 | SETUP_CHROMA_SS_FUNC_DEF_420(32, 32, cpu); \ | |
462 | SETUP_CHROMA_SS_FUNC_DEF_420(32, 16, cpu); \ | |
463 | SETUP_CHROMA_SS_FUNC_DEF_420(16, 32, cpu); \ | |
464 | SETUP_CHROMA_SS_FUNC_DEF_420(32, 24, cpu); \ | |
465 | SETUP_CHROMA_SS_FUNC_DEF_420(24, 32, cpu); \ | |
466 | SETUP_CHROMA_SS_FUNC_DEF_420(32, 8, cpu); \ | |
467 | SETUP_CHROMA_SS_FUNC_DEF_420(8, 32, cpu); | |
468 | ||
469 | #define CHROMA_SS_FILTERS_SSE4_420(cpu) \ | |
470 | SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \ | |
471 | SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \ | |
472 | SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu); | |
473 | ||
474 | #define CHROMA_SS_FILTERS_422(cpu) \ | |
475 | SETUP_CHROMA_SS_FUNC_DEF_422(4, 8, cpu); \ | |
476 | SETUP_CHROMA_SS_FUNC_DEF_422(4, 4, cpu); \ | |
477 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 16, cpu); \ | |
478 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 8, cpu); \ | |
479 | SETUP_CHROMA_SS_FUNC_DEF_422(4, 16, cpu); \ | |
480 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 12, cpu); \ | |
481 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 4, cpu); \ | |
482 | SETUP_CHROMA_SS_FUNC_DEF_422(16, 32, cpu); \ | |
483 | SETUP_CHROMA_SS_FUNC_DEF_422(16, 16, cpu); \ | |
484 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 32, cpu); \ | |
485 | SETUP_CHROMA_SS_FUNC_DEF_422(16, 24, cpu); \ | |
486 | SETUP_CHROMA_SS_FUNC_DEF_422(12, 32, cpu); \ | |
487 | SETUP_CHROMA_SS_FUNC_DEF_422(16, 8, cpu); \ | |
488 | SETUP_CHROMA_SS_FUNC_DEF_422(4, 32, cpu); \ | |
489 | SETUP_CHROMA_SS_FUNC_DEF_422(32, 64, cpu); \ | |
490 | SETUP_CHROMA_SS_FUNC_DEF_422(32, 32, cpu); \ | |
491 | SETUP_CHROMA_SS_FUNC_DEF_422(16, 64, cpu); \ | |
492 | SETUP_CHROMA_SS_FUNC_DEF_422(32, 48, cpu); \ | |
493 | SETUP_CHROMA_SS_FUNC_DEF_422(24, 64, cpu); \ | |
494 | SETUP_CHROMA_SS_FUNC_DEF_422(32, 16, cpu); \ | |
495 | SETUP_CHROMA_SS_FUNC_DEF_422(8, 64, cpu); | |
496 | ||
497 | #define CHROMA_SS_FILTERS_SSE4_422(cpu) \ | |
498 | SETUP_CHROMA_SS_FUNC_DEF_422(2, 8, cpu); \ | |
499 | SETUP_CHROMA_SS_FUNC_DEF_422(2, 16, cpu); \ | |
500 | SETUP_CHROMA_SS_FUNC_DEF_422(6, 16, cpu); | |
501 | ||
502 | #define CHROMA_SS_FILTERS_444(cpu) \ | |
503 | SETUP_CHROMA_SS_FUNC_DEF_444(8, 8, cpu); \ | |
504 | SETUP_CHROMA_SS_FUNC_DEF_444(8, 4, cpu); \ | |
505 | SETUP_CHROMA_SS_FUNC_DEF_444(4, 8, cpu); \ | |
506 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 16, cpu); \ | |
507 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 8, cpu); \ | |
508 | SETUP_CHROMA_SS_FUNC_DEF_444(8, 16, cpu); \ | |
509 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 12, cpu); \ | |
510 | SETUP_CHROMA_SS_FUNC_DEF_444(12, 16, cpu); \ | |
511 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 4, cpu); \ | |
512 | SETUP_CHROMA_SS_FUNC_DEF_444(4, 16, cpu); \ | |
513 | SETUP_CHROMA_SS_FUNC_DEF_444(32, 32, cpu); \ | |
514 | SETUP_CHROMA_SS_FUNC_DEF_444(32, 16, cpu); \ | |
515 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 32, cpu); \ | |
516 | SETUP_CHROMA_SS_FUNC_DEF_444(32, 24, cpu); \ | |
517 | SETUP_CHROMA_SS_FUNC_DEF_444(24, 32, cpu); \ | |
518 | SETUP_CHROMA_SS_FUNC_DEF_444(32, 8, cpu); \ | |
519 | SETUP_CHROMA_SS_FUNC_DEF_444(8, 32, cpu); \ | |
520 | SETUP_CHROMA_SS_FUNC_DEF_444(64, 64, cpu); \ | |
521 | SETUP_CHROMA_SS_FUNC_DEF_444(64, 32, cpu); \ | |
522 | SETUP_CHROMA_SS_FUNC_DEF_444(32, 64, cpu); \ | |
523 | SETUP_CHROMA_SS_FUNC_DEF_444(64, 48, cpu); \ | |
524 | SETUP_CHROMA_SS_FUNC_DEF_444(48, 64, cpu); \ | |
525 | SETUP_CHROMA_SS_FUNC_DEF_444(64, 16, cpu); \ | |
526 | SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu); | |
527 | ||
528 | #if HIGH_BIT_DEPTH // temporary, until all 10bit functions are completed | |
529 | #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \ | |
530 | p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
531 | p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \ | |
532 | p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
533 | p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \ | |
534 | p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; | |
535 | #else | |
536 | #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \ | |
537 | p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
538 | p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \ | |
539 | p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
540 | p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; | |
541 | #endif // if HIGH_BIT_DEPTH | |
542 | ||
543 | #define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \ | |
544 | p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \ | |
545 | p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu; | |
546 | ||
547 | #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \ | |
548 | p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; | |
549 | ||
550 | #define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \ | |
551 | p.luma_vss[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu; | |
552 | ||
553 | #define SETUP_LUMA_BLOCKCOPY(type, W, H, cpu) \ | |
554 | p.luma_copy_ ## type[LUMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu; | |
555 | ||
556 | #define SETUP_CHROMA_BLOCKCOPY(type, W, H, cpu) \ | |
557 | p.chroma[X265_CSP_I420].copy_ ## type[CHROMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu; | |
558 | ||
559 | #define CHROMA_BLOCKCOPY(type, cpu) \ | |
560 | SETUP_CHROMA_BLOCKCOPY(type, 2, 4, cpu); \ | |
561 | SETUP_CHROMA_BLOCKCOPY(type, 2, 8, cpu); \ | |
562 | SETUP_CHROMA_BLOCKCOPY(type, 4, 2, cpu); \ | |
563 | SETUP_CHROMA_BLOCKCOPY(type, 4, 4, cpu); \ | |
564 | SETUP_CHROMA_BLOCKCOPY(type, 4, 8, cpu); \ | |
565 | SETUP_CHROMA_BLOCKCOPY(type, 4, 16, cpu); \ | |
566 | SETUP_CHROMA_BLOCKCOPY(type, 6, 8, cpu); \ | |
567 | SETUP_CHROMA_BLOCKCOPY(type, 8, 2, cpu); \ | |
568 | SETUP_CHROMA_BLOCKCOPY(type, 8, 4, cpu); \ | |
569 | SETUP_CHROMA_BLOCKCOPY(type, 8, 6, cpu); \ | |
570 | SETUP_CHROMA_BLOCKCOPY(type, 8, 8, cpu); \ | |
571 | SETUP_CHROMA_BLOCKCOPY(type, 8, 16, cpu); \ | |
572 | SETUP_CHROMA_BLOCKCOPY(type, 8, 32, cpu); \ | |
573 | SETUP_CHROMA_BLOCKCOPY(type, 12, 16, cpu); \ | |
574 | SETUP_CHROMA_BLOCKCOPY(type, 16, 4, cpu); \ | |
575 | SETUP_CHROMA_BLOCKCOPY(type, 16, 8, cpu); \ | |
576 | SETUP_CHROMA_BLOCKCOPY(type, 16, 12, cpu); \ | |
577 | SETUP_CHROMA_BLOCKCOPY(type, 16, 16, cpu); \ | |
578 | SETUP_CHROMA_BLOCKCOPY(type, 16, 32, cpu); \ | |
579 | SETUP_CHROMA_BLOCKCOPY(type, 24, 32, cpu); \ | |
580 | SETUP_CHROMA_BLOCKCOPY(type, 32, 8, cpu); \ | |
581 | SETUP_CHROMA_BLOCKCOPY(type, 32, 16, cpu); \ | |
582 | SETUP_CHROMA_BLOCKCOPY(type, 32, 24, cpu); \ | |
583 | SETUP_CHROMA_BLOCKCOPY(type, 32, 32, cpu); | |
584 | ||
585 | #define SETUP_CHROMA_BLOCKCOPY_422(type, W, H, cpu) \ | |
586 | p.chroma[X265_CSP_I422].copy_ ## type[CHROMA422_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu; | |
587 | ||
588 | #define CHROMA_BLOCKCOPY_422(type, cpu) \ | |
589 | SETUP_CHROMA_BLOCKCOPY_422(type, 2, 8, cpu); \ | |
590 | SETUP_CHROMA_BLOCKCOPY_422(type, 2, 16, cpu); \ | |
591 | SETUP_CHROMA_BLOCKCOPY_422(type, 4, 4, cpu); \ | |
592 | SETUP_CHROMA_BLOCKCOPY_422(type, 4, 8, cpu); \ | |
593 | SETUP_CHROMA_BLOCKCOPY_422(type, 4, 16, cpu); \ | |
594 | SETUP_CHROMA_BLOCKCOPY_422(type, 4, 32, cpu); \ | |
595 | SETUP_CHROMA_BLOCKCOPY_422(type, 6, 16, cpu); \ | |
596 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 4, cpu); \ | |
597 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 8, cpu); \ | |
598 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 12, cpu); \ | |
599 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 16, cpu); \ | |
600 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 32, cpu); \ | |
601 | SETUP_CHROMA_BLOCKCOPY_422(type, 8, 64, cpu); \ | |
602 | SETUP_CHROMA_BLOCKCOPY_422(type, 12, 32, cpu); \ | |
603 | SETUP_CHROMA_BLOCKCOPY_422(type, 16, 8, cpu); \ | |
604 | SETUP_CHROMA_BLOCKCOPY_422(type, 16, 16, cpu); \ | |
605 | SETUP_CHROMA_BLOCKCOPY_422(type, 16, 24, cpu); \ | |
606 | SETUP_CHROMA_BLOCKCOPY_422(type, 16, 32, cpu); \ | |
607 | SETUP_CHROMA_BLOCKCOPY_422(type, 16, 64, cpu); \ | |
608 | SETUP_CHROMA_BLOCKCOPY_422(type, 24, 64, cpu); \ | |
609 | SETUP_CHROMA_BLOCKCOPY_422(type, 32, 16, cpu); \ | |
610 | SETUP_CHROMA_BLOCKCOPY_422(type, 32, 32, cpu); \ | |
611 | SETUP_CHROMA_BLOCKCOPY_422(type, 32, 48, cpu); \ | |
612 | SETUP_CHROMA_BLOCKCOPY_422(type, 32, 64, cpu); | |
613 | ||
614 | #define LUMA_BLOCKCOPY(type, cpu) \ | |
615 | SETUP_LUMA_BLOCKCOPY(type, 4, 4, cpu); \ | |
616 | SETUP_LUMA_BLOCKCOPY(type, 8, 8, cpu); \ | |
617 | SETUP_LUMA_BLOCKCOPY(type, 8, 4, cpu); \ | |
618 | SETUP_LUMA_BLOCKCOPY(type, 4, 8, cpu); \ | |
619 | SETUP_LUMA_BLOCKCOPY(type, 16, 16, cpu); \ | |
620 | SETUP_LUMA_BLOCKCOPY(type, 16, 8, cpu); \ | |
621 | SETUP_LUMA_BLOCKCOPY(type, 8, 16, cpu); \ | |
622 | SETUP_LUMA_BLOCKCOPY(type, 16, 12, cpu); \ | |
623 | SETUP_LUMA_BLOCKCOPY(type, 12, 16, cpu); \ | |
624 | SETUP_LUMA_BLOCKCOPY(type, 16, 4, cpu); \ | |
625 | SETUP_LUMA_BLOCKCOPY(type, 4, 16, cpu); \ | |
626 | SETUP_LUMA_BLOCKCOPY(type, 32, 32, cpu); \ | |
627 | SETUP_LUMA_BLOCKCOPY(type, 32, 16, cpu); \ | |
628 | SETUP_LUMA_BLOCKCOPY(type, 16, 32, cpu); \ | |
629 | SETUP_LUMA_BLOCKCOPY(type, 32, 24, cpu); \ | |
630 | SETUP_LUMA_BLOCKCOPY(type, 24, 32, cpu); \ | |
631 | SETUP_LUMA_BLOCKCOPY(type, 32, 8, cpu); \ | |
632 | SETUP_LUMA_BLOCKCOPY(type, 8, 32, cpu); \ | |
633 | SETUP_LUMA_BLOCKCOPY(type, 64, 64, cpu); \ | |
634 | SETUP_LUMA_BLOCKCOPY(type, 64, 32, cpu); \ | |
635 | SETUP_LUMA_BLOCKCOPY(type, 32, 64, cpu); \ | |
636 | SETUP_LUMA_BLOCKCOPY(type, 64, 48, cpu); \ | |
637 | SETUP_LUMA_BLOCKCOPY(type, 48, 64, cpu); \ | |
638 | SETUP_LUMA_BLOCKCOPY(type, 64, 16, cpu); \ | |
639 | SETUP_LUMA_BLOCKCOPY(type, 16, 64, cpu); | |
640 | ||
641 | #define SETUP_CHROMA_BLOCKCOPY_SP(W, H, cpu) \ | |
642 | p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu; | |
643 | ||
644 | #define CHROMA_BLOCKCOPY_SP(cpu) \ | |
645 | SETUP_CHROMA_BLOCKCOPY_SP(2, 4, cpu); \ | |
646 | SETUP_CHROMA_BLOCKCOPY_SP(2, 8, cpu); \ | |
647 | SETUP_CHROMA_BLOCKCOPY_SP(4, 2, cpu); \ | |
648 | SETUP_CHROMA_BLOCKCOPY_SP(4, 4, cpu); \ | |
649 | SETUP_CHROMA_BLOCKCOPY_SP(4, 8, cpu); \ | |
650 | SETUP_CHROMA_BLOCKCOPY_SP(4, 16, cpu); \ | |
651 | SETUP_CHROMA_BLOCKCOPY_SP(6, 8, cpu); \ | |
652 | SETUP_CHROMA_BLOCKCOPY_SP(8, 2, cpu); \ | |
653 | SETUP_CHROMA_BLOCKCOPY_SP(8, 4, cpu); \ | |
654 | SETUP_CHROMA_BLOCKCOPY_SP(8, 6, cpu); \ | |
655 | SETUP_CHROMA_BLOCKCOPY_SP(8, 8, cpu); \ | |
656 | SETUP_CHROMA_BLOCKCOPY_SP(8, 16, cpu); \ | |
657 | SETUP_CHROMA_BLOCKCOPY_SP(8, 32, cpu); \ | |
658 | SETUP_CHROMA_BLOCKCOPY_SP(12, 16, cpu); \ | |
659 | SETUP_CHROMA_BLOCKCOPY_SP(16, 4, cpu); \ | |
660 | SETUP_CHROMA_BLOCKCOPY_SP(16, 8, cpu); \ | |
661 | SETUP_CHROMA_BLOCKCOPY_SP(16, 12, cpu); \ | |
662 | SETUP_CHROMA_BLOCKCOPY_SP(16, 16, cpu); \ | |
663 | SETUP_CHROMA_BLOCKCOPY_SP(16, 32, cpu); \ | |
664 | SETUP_CHROMA_BLOCKCOPY_SP(24, 32, cpu); \ | |
665 | SETUP_CHROMA_BLOCKCOPY_SP(32, 8, cpu); \ | |
666 | SETUP_CHROMA_BLOCKCOPY_SP(32, 16, cpu); \ | |
667 | SETUP_CHROMA_BLOCKCOPY_SP(32, 24, cpu); \ | |
668 | SETUP_CHROMA_BLOCKCOPY_SP(32, 32, cpu); | |
669 | ||
670 | #define SETUP_CHROMA_BLOCKCOPY_SP_422(W, H, cpu) \ | |
671 | p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu; | |
672 | ||
673 | #define CHROMA_BLOCKCOPY_SP_422(cpu) \ | |
674 | SETUP_CHROMA_BLOCKCOPY_SP_422(2, 8, cpu); \ | |
675 | SETUP_CHROMA_BLOCKCOPY_SP_422(2, 16, cpu); \ | |
676 | SETUP_CHROMA_BLOCKCOPY_SP_422(4, 4, cpu); \ | |
677 | SETUP_CHROMA_BLOCKCOPY_SP_422(4, 8, cpu); \ | |
678 | SETUP_CHROMA_BLOCKCOPY_SP_422(4, 16, cpu); \ | |
679 | SETUP_CHROMA_BLOCKCOPY_SP_422(4, 32, cpu); \ | |
680 | SETUP_CHROMA_BLOCKCOPY_SP_422(6, 16, cpu); \ | |
681 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 4, cpu); \ | |
682 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 8, cpu); \ | |
683 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 12, cpu); \ | |
684 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 16, cpu); \ | |
685 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 32, cpu); \ | |
686 | SETUP_CHROMA_BLOCKCOPY_SP_422(8, 64, cpu); \ | |
687 | SETUP_CHROMA_BLOCKCOPY_SP_422(12, 32, cpu); \ | |
688 | SETUP_CHROMA_BLOCKCOPY_SP_422(16, 8, cpu); \ | |
689 | SETUP_CHROMA_BLOCKCOPY_SP_422(16, 16, cpu); \ | |
690 | SETUP_CHROMA_BLOCKCOPY_SP_422(16, 24, cpu); \ | |
691 | SETUP_CHROMA_BLOCKCOPY_SP_422(16, 32, cpu); \ | |
692 | SETUP_CHROMA_BLOCKCOPY_SP_422(16, 64, cpu); \ | |
693 | SETUP_CHROMA_BLOCKCOPY_SP_422(24, 64, cpu); \ | |
694 | SETUP_CHROMA_BLOCKCOPY_SP_422(32, 16, cpu); \ | |
695 | SETUP_CHROMA_BLOCKCOPY_SP_422(32, 32, cpu); \ | |
696 | SETUP_CHROMA_BLOCKCOPY_SP_422(32, 48, cpu); \ | |
697 | SETUP_CHROMA_BLOCKCOPY_SP_422(32, 64, cpu); | |
698 | ||
699 | #define SETUP_CHROMA_PIXELSUB(W, H, cpu) \ | |
700 | p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \ | |
701 | p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu; | |
702 | ||
703 | #define CHROMA_PIXELSUB_PS(cpu) \ | |
704 | SETUP_CHROMA_PIXELSUB(4, 4, cpu); \ | |
705 | SETUP_CHROMA_PIXELSUB(8, 8, cpu); \ | |
706 | SETUP_CHROMA_PIXELSUB(16, 16, cpu); \ | |
707 | SETUP_CHROMA_PIXELSUB(32, 32, cpu); | |
708 | ||
709 | #define SETUP_CHROMA_PIXELSUB_422(W, H, cpu) \ | |
710 | p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \ | |
711 | p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu; | |
712 | ||
713 | #define CHROMA_PIXELSUB_PS_422(cpu) \ | |
714 | SETUP_CHROMA_PIXELSUB_422(4, 8, cpu); \ | |
715 | SETUP_CHROMA_PIXELSUB_422(8, 16, cpu); \ | |
716 | SETUP_CHROMA_PIXELSUB_422(16, 32, cpu); \ | |
717 | SETUP_CHROMA_PIXELSUB_422(32, 64, cpu); | |
718 | ||
719 | #define LUMA_FILTERS(cpu) \ | |
720 | SETUP_LUMA_FUNC_DEF(4, 4, cpu); \ | |
721 | SETUP_LUMA_FUNC_DEF(8, 8, cpu); \ | |
722 | SETUP_LUMA_FUNC_DEF(8, 4, cpu); \ | |
723 | SETUP_LUMA_FUNC_DEF(4, 8, cpu); \ | |
724 | SETUP_LUMA_FUNC_DEF(16, 16, cpu); \ | |
725 | SETUP_LUMA_FUNC_DEF(16, 8, cpu); \ | |
726 | SETUP_LUMA_FUNC_DEF(8, 16, cpu); \ | |
727 | SETUP_LUMA_FUNC_DEF(16, 12, cpu); \ | |
728 | SETUP_LUMA_FUNC_DEF(12, 16, cpu); \ | |
729 | SETUP_LUMA_FUNC_DEF(16, 4, cpu); \ | |
730 | SETUP_LUMA_FUNC_DEF(4, 16, cpu); \ | |
731 | SETUP_LUMA_FUNC_DEF(32, 32, cpu); \ | |
732 | SETUP_LUMA_FUNC_DEF(32, 16, cpu); \ | |
733 | SETUP_LUMA_FUNC_DEF(16, 32, cpu); \ | |
734 | SETUP_LUMA_FUNC_DEF(32, 24, cpu); \ | |
735 | SETUP_LUMA_FUNC_DEF(24, 32, cpu); \ | |
736 | SETUP_LUMA_FUNC_DEF(32, 8, cpu); \ | |
737 | SETUP_LUMA_FUNC_DEF(8, 32, cpu); \ | |
738 | SETUP_LUMA_FUNC_DEF(64, 64, cpu); \ | |
739 | SETUP_LUMA_FUNC_DEF(64, 32, cpu); \ | |
740 | SETUP_LUMA_FUNC_DEF(32, 64, cpu); \ | |
741 | SETUP_LUMA_FUNC_DEF(64, 48, cpu); \ | |
742 | SETUP_LUMA_FUNC_DEF(48, 64, cpu); \ | |
743 | SETUP_LUMA_FUNC_DEF(64, 16, cpu); \ | |
744 | SETUP_LUMA_FUNC_DEF(16, 64, cpu); | |
745 | ||
746 | #define LUMA_PIXELSUB(cpu) \ | |
747 | SETUP_LUMA_SUB_FUNC_DEF(4, 4, cpu); \ | |
748 | SETUP_LUMA_SUB_FUNC_DEF(8, 8, cpu); \ | |
749 | SETUP_LUMA_SUB_FUNC_DEF(16, 16, cpu); \ | |
750 | SETUP_LUMA_SUB_FUNC_DEF(32, 32, cpu); \ | |
751 | SETUP_LUMA_SUB_FUNC_DEF(64, 64, cpu); | |
752 | ||
753 | #define LUMA_SP_FILTERS(cpu) \ | |
754 | SETUP_LUMA_SP_FUNC_DEF(4, 4, cpu); \ | |
755 | SETUP_LUMA_SP_FUNC_DEF(8, 8, cpu); \ | |
756 | SETUP_LUMA_SP_FUNC_DEF(8, 4, cpu); \ | |
757 | SETUP_LUMA_SP_FUNC_DEF(4, 8, cpu); \ | |
758 | SETUP_LUMA_SP_FUNC_DEF(16, 16, cpu); \ | |
759 | SETUP_LUMA_SP_FUNC_DEF(16, 8, cpu); \ | |
760 | SETUP_LUMA_SP_FUNC_DEF(8, 16, cpu); \ | |
761 | SETUP_LUMA_SP_FUNC_DEF(16, 12, cpu); \ | |
762 | SETUP_LUMA_SP_FUNC_DEF(12, 16, cpu); \ | |
763 | SETUP_LUMA_SP_FUNC_DEF(16, 4, cpu); \ | |
764 | SETUP_LUMA_SP_FUNC_DEF(4, 16, cpu); \ | |
765 | SETUP_LUMA_SP_FUNC_DEF(32, 32, cpu); \ | |
766 | SETUP_LUMA_SP_FUNC_DEF(32, 16, cpu); \ | |
767 | SETUP_LUMA_SP_FUNC_DEF(16, 32, cpu); \ | |
768 | SETUP_LUMA_SP_FUNC_DEF(32, 24, cpu); \ | |
769 | SETUP_LUMA_SP_FUNC_DEF(24, 32, cpu); \ | |
770 | SETUP_LUMA_SP_FUNC_DEF(32, 8, cpu); \ | |
771 | SETUP_LUMA_SP_FUNC_DEF(8, 32, cpu); \ | |
772 | SETUP_LUMA_SP_FUNC_DEF(64, 64, cpu); \ | |
773 | SETUP_LUMA_SP_FUNC_DEF(64, 32, cpu); \ | |
774 | SETUP_LUMA_SP_FUNC_DEF(32, 64, cpu); \ | |
775 | SETUP_LUMA_SP_FUNC_DEF(64, 48, cpu); \ | |
776 | SETUP_LUMA_SP_FUNC_DEF(48, 64, cpu); \ | |
777 | SETUP_LUMA_SP_FUNC_DEF(64, 16, cpu); \ | |
778 | SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu); | |
779 | ||
780 | #define LUMA_SS_FILTERS(cpu) \ | |
781 | SETUP_LUMA_SS_FUNC_DEF(4, 4, cpu); \ | |
782 | SETUP_LUMA_SS_FUNC_DEF(8, 8, cpu); \ | |
783 | SETUP_LUMA_SS_FUNC_DEF(8, 4, cpu); \ | |
784 | SETUP_LUMA_SS_FUNC_DEF(4, 8, cpu); \ | |
785 | SETUP_LUMA_SS_FUNC_DEF(16, 16, cpu); \ | |
786 | SETUP_LUMA_SS_FUNC_DEF(16, 8, cpu); \ | |
787 | SETUP_LUMA_SS_FUNC_DEF(8, 16, cpu); \ | |
788 | SETUP_LUMA_SS_FUNC_DEF(16, 12, cpu); \ | |
789 | SETUP_LUMA_SS_FUNC_DEF(12, 16, cpu); \ | |
790 | SETUP_LUMA_SS_FUNC_DEF(16, 4, cpu); \ | |
791 | SETUP_LUMA_SS_FUNC_DEF(4, 16, cpu); \ | |
792 | SETUP_LUMA_SS_FUNC_DEF(32, 32, cpu); \ | |
793 | SETUP_LUMA_SS_FUNC_DEF(32, 16, cpu); \ | |
794 | SETUP_LUMA_SS_FUNC_DEF(16, 32, cpu); \ | |
795 | SETUP_LUMA_SS_FUNC_DEF(32, 24, cpu); \ | |
796 | SETUP_LUMA_SS_FUNC_DEF(24, 32, cpu); \ | |
797 | SETUP_LUMA_SS_FUNC_DEF(32, 8, cpu); \ | |
798 | SETUP_LUMA_SS_FUNC_DEF(8, 32, cpu); \ | |
799 | SETUP_LUMA_SS_FUNC_DEF(64, 64, cpu); \ | |
800 | SETUP_LUMA_SS_FUNC_DEF(64, 32, cpu); \ | |
801 | SETUP_LUMA_SS_FUNC_DEF(32, 64, cpu); \ | |
802 | SETUP_LUMA_SS_FUNC_DEF(64, 48, cpu); \ | |
803 | SETUP_LUMA_SS_FUNC_DEF(48, 64, cpu); \ | |
804 | SETUP_LUMA_SS_FUNC_DEF(64, 16, cpu); \ | |
805 | SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu); | |
806 | ||
807 | #define SETUP_PIXEL_VAR_DEF(W, H, cpu) \ | |
808 | p.var[BLOCK_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu; | |
809 | ||
810 | #define LUMA_VAR(cpu) \ | |
811 | SETUP_PIXEL_VAR_DEF(8, 8, cpu); \ | |
812 | SETUP_PIXEL_VAR_DEF(16, 16, cpu); \ | |
813 | SETUP_PIXEL_VAR_DEF(32, 32, cpu); \ | |
814 | SETUP_PIXEL_VAR_DEF(64, 64, cpu); | |
815 | ||
816 | #define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \ | |
817 | p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu; | |
818 | ||
819 | #define LUMA_SSE_SP(cpu) \ | |
820 | SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \ | |
821 | SETUP_PIXEL_SSE_SP_DEF(8, 8, cpu); \ | |
822 | SETUP_PIXEL_SSE_SP_DEF(8, 4, cpu); \ | |
823 | SETUP_PIXEL_SSE_SP_DEF(4, 8, cpu); \ | |
824 | SETUP_PIXEL_SSE_SP_DEF(16, 16, cpu); \ | |
825 | SETUP_PIXEL_SSE_SP_DEF(16, 8, cpu); \ | |
826 | SETUP_PIXEL_SSE_SP_DEF(8, 16, cpu); \ | |
827 | SETUP_PIXEL_SSE_SP_DEF(16, 12, cpu); \ | |
828 | SETUP_PIXEL_SSE_SP_DEF(12, 16, cpu); \ | |
829 | SETUP_PIXEL_SSE_SP_DEF(16, 4, cpu); \ | |
830 | SETUP_PIXEL_SSE_SP_DEF(4, 16, cpu); \ | |
831 | SETUP_PIXEL_SSE_SP_DEF(32, 32, cpu); \ | |
832 | SETUP_PIXEL_SSE_SP_DEF(32, 16, cpu); \ | |
833 | SETUP_PIXEL_SSE_SP_DEF(16, 32, cpu); \ | |
834 | SETUP_PIXEL_SSE_SP_DEF(32, 24, cpu); \ | |
835 | SETUP_PIXEL_SSE_SP_DEF(24, 32, cpu); \ | |
836 | SETUP_PIXEL_SSE_SP_DEF(32, 8, cpu); \ | |
837 | SETUP_PIXEL_SSE_SP_DEF(8, 32, cpu); \ | |
838 | SETUP_PIXEL_SSE_SP_DEF(64, 64, cpu); \ | |
839 | SETUP_PIXEL_SSE_SP_DEF(64, 32, cpu); \ | |
840 | SETUP_PIXEL_SSE_SP_DEF(32, 64, cpu); \ | |
841 | SETUP_PIXEL_SSE_SP_DEF(64, 48, cpu); \ | |
842 | SETUP_PIXEL_SSE_SP_DEF(48, 64, cpu); \ | |
843 | SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \ | |
844 | SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu); | |
845 | ||
846 | #define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \ | |
847 | p.luma_addAvg[LUMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu; | |
848 | ||
849 | #define LUMA_ADDAVG(cpu) \ | |
850 | SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \ | |
851 | SETUP_LUMA_ADDAVG_FUNC_DEF(4, 8, cpu); \ | |
852 | SETUP_LUMA_ADDAVG_FUNC_DEF(4, 16, cpu); \ | |
853 | SETUP_LUMA_ADDAVG_FUNC_DEF(8, 4, cpu); \ | |
854 | SETUP_LUMA_ADDAVG_FUNC_DEF(8, 8, cpu); \ | |
855 | SETUP_LUMA_ADDAVG_FUNC_DEF(8, 16, cpu); \ | |
856 | SETUP_LUMA_ADDAVG_FUNC_DEF(8, 32, cpu); \ | |
857 | SETUP_LUMA_ADDAVG_FUNC_DEF(12, 16, cpu); \ | |
858 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 4, cpu); \ | |
859 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 8, cpu); \ | |
860 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 12, cpu); \ | |
861 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 16, cpu); \ | |
862 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 32, cpu); \ | |
863 | SETUP_LUMA_ADDAVG_FUNC_DEF(24, 32, cpu); \ | |
864 | SETUP_LUMA_ADDAVG_FUNC_DEF(16, 64, cpu); \ | |
865 | SETUP_LUMA_ADDAVG_FUNC_DEF(32, 8, cpu); \ | |
866 | SETUP_LUMA_ADDAVG_FUNC_DEF(32, 16, cpu); \ | |
867 | SETUP_LUMA_ADDAVG_FUNC_DEF(32, 24, cpu); \ | |
868 | SETUP_LUMA_ADDAVG_FUNC_DEF(32, 32, cpu); \ | |
869 | SETUP_LUMA_ADDAVG_FUNC_DEF(32, 64, cpu); \ | |
870 | SETUP_LUMA_ADDAVG_FUNC_DEF(48, 64, cpu); \ | |
871 | SETUP_LUMA_ADDAVG_FUNC_DEF(64, 16, cpu); \ | |
872 | SETUP_LUMA_ADDAVG_FUNC_DEF(64, 32, cpu); \ | |
873 | SETUP_LUMA_ADDAVG_FUNC_DEF(64, 48, cpu); \ | |
874 | SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \ | |
875 | ||
876 | #define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \ | |
877 | p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu; | |
878 | ||
879 | #define CHROMA_ADDAVG(cpu) \ | |
880 | SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 4, cpu); \ | |
881 | SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 8, cpu); \ | |
882 | SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 2, cpu); \ | |
883 | SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 4, cpu); \ | |
884 | SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 8, cpu); \ | |
885 | SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 16, cpu); \ | |
886 | SETUP_CHROMA_ADDAVG_FUNC_DEF(6, 8, cpu); \ | |
887 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 2, cpu); \ | |
888 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 4, cpu); \ | |
889 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 6, cpu); \ | |
890 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 8, cpu); \ | |
891 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 16, cpu); \ | |
892 | SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 32, cpu); \ | |
893 | SETUP_CHROMA_ADDAVG_FUNC_DEF(12, 16, cpu); \ | |
894 | SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 4, cpu); \ | |
895 | SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 8, cpu); \ | |
896 | SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 12, cpu); \ | |
897 | SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 16, cpu); \ | |
898 | SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 32, cpu); \ | |
899 | SETUP_CHROMA_ADDAVG_FUNC_DEF(24, 32, cpu); \ | |
900 | SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 8, cpu); \ | |
901 | SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 16, cpu); \ | |
902 | SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \ | |
903 | SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu); | |
904 | ||
905 | #define SETUP_CHROMA_ADDAVG_FUNC_DEF_422(W, H, cpu) \ | |
906 | p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu; | |
907 | ||
908 | #define CHROMA_ADDAVG_422(cpu) \ | |
909 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 8, cpu); \ | |
910 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 16, cpu); \ | |
911 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 4, cpu); \ | |
912 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 8, cpu); \ | |
913 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 16, cpu); \ | |
914 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(4, 32, cpu); \ | |
915 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(6, 16, cpu); \ | |
916 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 4, cpu); \ | |
917 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 8, cpu); \ | |
918 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 12, cpu); \ | |
919 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 16, cpu); \ | |
920 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 32, cpu); \ | |
921 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(8, 64, cpu); \ | |
922 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(12, 32, cpu); \ | |
923 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 8, cpu); \ | |
924 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 16, cpu); \ | |
925 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 24, cpu); \ | |
926 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 32, cpu); \ | |
927 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(16, 64, cpu); \ | |
928 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(24, 64, cpu); \ | |
929 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 16, cpu); \ | |
930 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 32, cpu); \ | |
931 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 48, cpu); \ | |
932 | SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 64, cpu); | |
933 | ||
934 | #define SETUP_INTRA_ANG_COMMON(mode, fno, cpu) \ | |
935 | p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \ | |
936 | p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \ | |
937 | p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \ | |
938 | p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu; | |
939 | ||
940 | #define SETUP_INTRA_ANG(mode, fno, cpu) \ | |
941 | p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \ | |
942 | p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \ | |
943 | p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu; | |
944 | ||
945 | #define SETUP_INTRA_ANG4(mode, fno, cpu) \ | |
946 | p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; | |
947 | ||
948 | #define SETUP_INTRA_ANG16_32(mode, fno, cpu) \ | |
949 | p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \ | |
950 | p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu; | |
951 | ||
952 | #define SETUP_INTRA_ANG4_8(mode, fno, cpu) \ | |
953 | p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \ | |
954 | p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; | |
955 | ||
956 | #define INTRA_ANG_SSSE3(cpu) \ | |
957 | SETUP_INTRA_ANG_COMMON(2, 2, cpu); \ | |
958 | SETUP_INTRA_ANG_COMMON(34, 2, cpu); | |
959 | ||
960 | #define INTRA_ANG_SSE4_COMMON(cpu) \ | |
961 | SETUP_INTRA_ANG_COMMON(3, 3, cpu); \ | |
962 | SETUP_INTRA_ANG_COMMON(4, 4, cpu); \ | |
963 | SETUP_INTRA_ANG_COMMON(5, 5, cpu); \ | |
964 | SETUP_INTRA_ANG_COMMON(6, 6, cpu); \ | |
965 | SETUP_INTRA_ANG_COMMON(7, 7, cpu); \ | |
966 | SETUP_INTRA_ANG_COMMON(8, 8, cpu); \ | |
967 | SETUP_INTRA_ANG_COMMON(9, 9, cpu); \ | |
968 | SETUP_INTRA_ANG_COMMON(10, 10, cpu); \ | |
969 | SETUP_INTRA_ANG_COMMON(11, 11, cpu); \ | |
970 | SETUP_INTRA_ANG_COMMON(12, 12, cpu); \ | |
971 | SETUP_INTRA_ANG_COMMON(13, 13, cpu); \ | |
972 | SETUP_INTRA_ANG_COMMON(14, 14, cpu); \ | |
973 | SETUP_INTRA_ANG_COMMON(15, 15, cpu); \ | |
974 | SETUP_INTRA_ANG_COMMON(16, 16, cpu); \ | |
975 | SETUP_INTRA_ANG_COMMON(17, 17, cpu); \ | |
976 | SETUP_INTRA_ANG_COMMON(18, 18, cpu); | |
977 | ||
978 | #define INTRA_ANG_SSE4_HIGH(cpu) \ | |
979 | SETUP_INTRA_ANG(19, 19, cpu); \ | |
980 | SETUP_INTRA_ANG(20, 20, cpu); \ | |
981 | SETUP_INTRA_ANG(21, 21, cpu); \ | |
982 | SETUP_INTRA_ANG(22, 22, cpu); \ | |
983 | SETUP_INTRA_ANG(23, 23, cpu); \ | |
984 | SETUP_INTRA_ANG(24, 24, cpu); \ | |
985 | SETUP_INTRA_ANG(25, 25, cpu); \ | |
986 | SETUP_INTRA_ANG(26, 26, cpu); \ | |
987 | SETUP_INTRA_ANG(27, 27, cpu); \ | |
988 | SETUP_INTRA_ANG(28, 28, cpu); \ | |
989 | SETUP_INTRA_ANG(29, 29, cpu); \ | |
990 | SETUP_INTRA_ANG(30, 30, cpu); \ | |
991 | SETUP_INTRA_ANG(31, 31, cpu); \ | |
992 | SETUP_INTRA_ANG(32, 32, cpu); \ | |
993 | SETUP_INTRA_ANG(33, 33, cpu); \ | |
994 | SETUP_INTRA_ANG4(19, 17, cpu); \ | |
995 | SETUP_INTRA_ANG4(20, 16, cpu); \ | |
996 | SETUP_INTRA_ANG4(21, 15, cpu); \ | |
997 | SETUP_INTRA_ANG4(22, 14, cpu); \ | |
998 | SETUP_INTRA_ANG4(23, 13, cpu); \ | |
999 | SETUP_INTRA_ANG4(24, 12, cpu); \ | |
1000 | SETUP_INTRA_ANG4(25, 11, cpu); \ | |
1001 | SETUP_INTRA_ANG4(26, 26, cpu); \ | |
1002 | SETUP_INTRA_ANG4(27, 9, cpu); \ | |
1003 | SETUP_INTRA_ANG4(28, 8, cpu); \ | |
1004 | SETUP_INTRA_ANG4(29, 7, cpu); \ | |
1005 | SETUP_INTRA_ANG4(30, 6, cpu); \ | |
1006 | SETUP_INTRA_ANG4(31, 5, cpu); \ | |
1007 | SETUP_INTRA_ANG4(32, 4, cpu); \ | |
1008 | SETUP_INTRA_ANG4(33, 3, cpu); | |
1009 | ||
1010 | #define INTRA_ANG_SSE4(cpu) \ | |
1011 | SETUP_INTRA_ANG4_8(19, 17, cpu); \ | |
1012 | SETUP_INTRA_ANG4_8(20, 16, cpu); \ | |
1013 | SETUP_INTRA_ANG4_8(21, 15, cpu); \ | |
1014 | SETUP_INTRA_ANG4_8(22, 14, cpu); \ | |
1015 | SETUP_INTRA_ANG4_8(23, 13, cpu); \ | |
1016 | SETUP_INTRA_ANG4_8(24, 12, cpu); \ | |
1017 | SETUP_INTRA_ANG4_8(25, 11, cpu); \ | |
1018 | SETUP_INTRA_ANG4_8(26, 26, cpu); \ | |
1019 | SETUP_INTRA_ANG4_8(27, 9, cpu); \ | |
1020 | SETUP_INTRA_ANG4_8(28, 8, cpu); \ | |
1021 | SETUP_INTRA_ANG4_8(29, 7, cpu); \ | |
1022 | SETUP_INTRA_ANG4_8(30, 6, cpu); \ | |
1023 | SETUP_INTRA_ANG4_8(31, 5, cpu); \ | |
1024 | SETUP_INTRA_ANG4_8(32, 4, cpu); \ | |
1025 | SETUP_INTRA_ANG4_8(33, 3, cpu); \ | |
1026 | SETUP_INTRA_ANG16_32(19, 19, cpu); \ | |
1027 | SETUP_INTRA_ANG16_32(20, 20, cpu); \ | |
1028 | SETUP_INTRA_ANG16_32(21, 21, cpu); \ | |
1029 | SETUP_INTRA_ANG16_32(22, 22, cpu); \ | |
1030 | SETUP_INTRA_ANG16_32(23, 23, cpu); \ | |
1031 | SETUP_INTRA_ANG16_32(24, 24, cpu); \ | |
1032 | SETUP_INTRA_ANG16_32(25, 25, cpu); \ | |
1033 | SETUP_INTRA_ANG16_32(26, 26, cpu); \ | |
1034 | SETUP_INTRA_ANG16_32(27, 27, cpu); \ | |
1035 | SETUP_INTRA_ANG16_32(28, 28, cpu); \ | |
1036 | SETUP_INTRA_ANG16_32(29, 29, cpu); \ | |
1037 | SETUP_INTRA_ANG16_32(30, 30, cpu); \ | |
1038 | SETUP_INTRA_ANG16_32(31, 31, cpu); \ | |
1039 | SETUP_INTRA_ANG16_32(32, 32, cpu); \ | |
1040 | SETUP_INTRA_ANG16_32(33, 33, cpu); | |
1041 | ||
1042 | #define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \ | |
1043 | p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \ | |
1044 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
1045 | p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ | |
1046 | p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
1047 | ||
1048 | #define CHROMA_VERT_FILTERS(cpu) \ | |
1049 | SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \ | |
1050 | SETUP_CHROMA_VERT_FUNC_DEF(8, 8, cpu); \ | |
1051 | SETUP_CHROMA_VERT_FUNC_DEF(8, 4, cpu); \ | |
1052 | SETUP_CHROMA_VERT_FUNC_DEF(4, 8, cpu); \ | |
1053 | SETUP_CHROMA_VERT_FUNC_DEF(8, 6, cpu); \ | |
1054 | SETUP_CHROMA_VERT_FUNC_DEF(8, 2, cpu); \ | |
1055 | SETUP_CHROMA_VERT_FUNC_DEF(16, 16, cpu); \ | |
1056 | SETUP_CHROMA_VERT_FUNC_DEF(16, 8, cpu); \ | |
1057 | SETUP_CHROMA_VERT_FUNC_DEF(8, 16, cpu); \ | |
1058 | SETUP_CHROMA_VERT_FUNC_DEF(16, 12, cpu); \ | |
1059 | SETUP_CHROMA_VERT_FUNC_DEF(12, 16, cpu); \ | |
1060 | SETUP_CHROMA_VERT_FUNC_DEF(16, 4, cpu); \ | |
1061 | SETUP_CHROMA_VERT_FUNC_DEF(4, 16, cpu); \ | |
1062 | SETUP_CHROMA_VERT_FUNC_DEF(32, 32, cpu); \ | |
1063 | SETUP_CHROMA_VERT_FUNC_DEF(32, 16, cpu); \ | |
1064 | SETUP_CHROMA_VERT_FUNC_DEF(16, 32, cpu); \ | |
1065 | SETUP_CHROMA_VERT_FUNC_DEF(32, 24, cpu); \ | |
1066 | SETUP_CHROMA_VERT_FUNC_DEF(24, 32, cpu); \ | |
1067 | SETUP_CHROMA_VERT_FUNC_DEF(32, 8, cpu); \ | |
1068 | SETUP_CHROMA_VERT_FUNC_DEF(8, 32, cpu); | |
1069 | ||
1070 | #define CHROMA_VERT_FILTERS_SSE4(cpu) \ | |
1071 | SETUP_CHROMA_VERT_FUNC_DEF(2, 4, cpu); \ | |
1072 | SETUP_CHROMA_VERT_FUNC_DEF(2, 8, cpu); \ | |
1073 | SETUP_CHROMA_VERT_FUNC_DEF(4, 2, cpu); \ | |
1074 | SETUP_CHROMA_VERT_FUNC_DEF(6, 8, cpu); | |
1075 | ||
1076 | #define SETUP_CHROMA_VERT_FUNC_DEF_422(W, H, cpu) \ | |
1077 | p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \ | |
1078 | p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
1079 | p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ | |
1080 | p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
1081 | ||
1082 | #define CHROMA_VERT_FILTERS_422(cpu) \ | |
1083 | SETUP_CHROMA_VERT_FUNC_DEF_422(4, 8, cpu); \ | |
1084 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 16, cpu); \ | |
1085 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 8, cpu); \ | |
1086 | SETUP_CHROMA_VERT_FUNC_DEF_422(4, 16, cpu); \ | |
1087 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 12, cpu); \ | |
1088 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 4, cpu); \ | |
1089 | SETUP_CHROMA_VERT_FUNC_DEF_422(16, 32, cpu); \ | |
1090 | SETUP_CHROMA_VERT_FUNC_DEF_422(16, 16, cpu); \ | |
1091 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 32, cpu); \ | |
1092 | SETUP_CHROMA_VERT_FUNC_DEF_422(16, 24, cpu); \ | |
1093 | SETUP_CHROMA_VERT_FUNC_DEF_422(12, 32, cpu); \ | |
1094 | SETUP_CHROMA_VERT_FUNC_DEF_422(16, 8, cpu); \ | |
1095 | SETUP_CHROMA_VERT_FUNC_DEF_422(4, 32, cpu); \ | |
1096 | SETUP_CHROMA_VERT_FUNC_DEF_422(32, 64, cpu); \ | |
1097 | SETUP_CHROMA_VERT_FUNC_DEF_422(32, 32, cpu); \ | |
1098 | SETUP_CHROMA_VERT_FUNC_DEF_422(16, 64, cpu); \ | |
1099 | SETUP_CHROMA_VERT_FUNC_DEF_422(32, 48, cpu); \ | |
1100 | SETUP_CHROMA_VERT_FUNC_DEF_422(24, 64, cpu); \ | |
1101 | SETUP_CHROMA_VERT_FUNC_DEF_422(32, 16, cpu); \ | |
1102 | SETUP_CHROMA_VERT_FUNC_DEF_422(8, 64, cpu); | |
1103 | ||
1104 | #define CHROMA_VERT_FILTERS_SSE4_422(cpu) \ | |
1105 | SETUP_CHROMA_VERT_FUNC_DEF_422(2, 8, cpu); \ | |
1106 | SETUP_CHROMA_VERT_FUNC_DEF_422(2, 16, cpu); \ | |
1107 | SETUP_CHROMA_VERT_FUNC_DEF_422(4, 4, cpu); \ | |
1108 | SETUP_CHROMA_VERT_FUNC_DEF_422(6, 16, cpu); | |
1109 | ||
1110 | #define SETUP_CHROMA_VERT_FUNC_DEF_444(W, H, cpu) \ | |
1111 | p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \ | |
1112 | p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ | |
1113 | p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ | |
1114 | p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu; | |
1115 | ||
1116 | #define CHROMA_VERT_FILTERS_444(cpu) \ | |
1117 | SETUP_CHROMA_VERT_FUNC_DEF_444(8, 8, cpu); \ | |
1118 | SETUP_CHROMA_VERT_FUNC_DEF_444(8, 4, cpu); \ | |
1119 | SETUP_CHROMA_VERT_FUNC_DEF_444(4, 8, cpu); \ | |
1120 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 16, cpu); \ | |
1121 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 8, cpu); \ | |
1122 | SETUP_CHROMA_VERT_FUNC_DEF_444(8, 16, cpu); \ | |
1123 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 12, cpu); \ | |
1124 | SETUP_CHROMA_VERT_FUNC_DEF_444(12, 16, cpu); \ | |
1125 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 4, cpu); \ | |
1126 | SETUP_CHROMA_VERT_FUNC_DEF_444(4, 16, cpu); \ | |
1127 | SETUP_CHROMA_VERT_FUNC_DEF_444(32, 32, cpu); \ | |
1128 | SETUP_CHROMA_VERT_FUNC_DEF_444(32, 16, cpu); \ | |
1129 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 32, cpu); \ | |
1130 | SETUP_CHROMA_VERT_FUNC_DEF_444(32, 24, cpu); \ | |
1131 | SETUP_CHROMA_VERT_FUNC_DEF_444(24, 32, cpu); \ | |
1132 | SETUP_CHROMA_VERT_FUNC_DEF_444(32, 8, cpu); \ | |
1133 | SETUP_CHROMA_VERT_FUNC_DEF_444(8, 32, cpu); \ | |
1134 | SETUP_CHROMA_VERT_FUNC_DEF_444(64, 64, cpu); \ | |
1135 | SETUP_CHROMA_VERT_FUNC_DEF_444(64, 32, cpu); \ | |
1136 | SETUP_CHROMA_VERT_FUNC_DEF_444(32, 64, cpu); \ | |
1137 | SETUP_CHROMA_VERT_FUNC_DEF_444(64, 48, cpu); \ | |
1138 | SETUP_CHROMA_VERT_FUNC_DEF_444(48, 64, cpu); \ | |
1139 | SETUP_CHROMA_VERT_FUNC_DEF_444(64, 16, cpu); \ | |
1140 | SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu); | |
1141 | ||
1142 | #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \ | |
1143 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
1144 | p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; | |
1145 | ||
1146 | #define CHROMA_HORIZ_FILTERS(cpu) \ | |
1147 | SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \ | |
1148 | SETUP_CHROMA_HORIZ_FUNC_DEF(4, 2, cpu); \ | |
1149 | SETUP_CHROMA_HORIZ_FUNC_DEF(2, 4, cpu); \ | |
1150 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 8, cpu); \ | |
1151 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 4, cpu); \ | |
1152 | SETUP_CHROMA_HORIZ_FUNC_DEF(4, 8, cpu); \ | |
1153 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 6, cpu); \ | |
1154 | SETUP_CHROMA_HORIZ_FUNC_DEF(6, 8, cpu); \ | |
1155 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 2, cpu); \ | |
1156 | SETUP_CHROMA_HORIZ_FUNC_DEF(2, 8, cpu); \ | |
1157 | SETUP_CHROMA_HORIZ_FUNC_DEF(16, 16, cpu); \ | |
1158 | SETUP_CHROMA_HORIZ_FUNC_DEF(16, 8, cpu); \ | |
1159 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 16, cpu); \ | |
1160 | SETUP_CHROMA_HORIZ_FUNC_DEF(16, 12, cpu); \ | |
1161 | SETUP_CHROMA_HORIZ_FUNC_DEF(12, 16, cpu); \ | |
1162 | SETUP_CHROMA_HORIZ_FUNC_DEF(16, 4, cpu); \ | |
1163 | SETUP_CHROMA_HORIZ_FUNC_DEF(4, 16, cpu); \ | |
1164 | SETUP_CHROMA_HORIZ_FUNC_DEF(32, 32, cpu); \ | |
1165 | SETUP_CHROMA_HORIZ_FUNC_DEF(32, 16, cpu); \ | |
1166 | SETUP_CHROMA_HORIZ_FUNC_DEF(16, 32, cpu); \ | |
1167 | SETUP_CHROMA_HORIZ_FUNC_DEF(32, 24, cpu); \ | |
1168 | SETUP_CHROMA_HORIZ_FUNC_DEF(24, 32, cpu); \ | |
1169 | SETUP_CHROMA_HORIZ_FUNC_DEF(32, 8, cpu); \ | |
1170 | SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu); | |
1171 | ||
1172 | #define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \ | |
1173 | p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
1174 | p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; | |
1175 | ||
1176 | #define CHROMA_HORIZ_FILTERS_422(cpu) \ | |
1177 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 8, cpu); \ | |
1178 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 4, cpu); \ | |
1179 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(2, 8, cpu); \ | |
1180 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 16, cpu); \ | |
1181 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 8, cpu); \ | |
1182 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 16, cpu); \ | |
1183 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 12, cpu); \ | |
1184 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(6, 16, cpu); \ | |
1185 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 4, cpu); \ | |
1186 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(2, 16, cpu); \ | |
1187 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 32, cpu); \ | |
1188 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 16, cpu); \ | |
1189 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 32, cpu); \ | |
1190 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 24, cpu); \ | |
1191 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(12, 32, cpu); \ | |
1192 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 8, cpu); \ | |
1193 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 32, cpu); \ | |
1194 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 64, cpu); \ | |
1195 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 32, cpu); \ | |
1196 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(16, 64, cpu); \ | |
1197 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 48, cpu); \ | |
1198 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(24, 64, cpu); \ | |
1199 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(32, 16, cpu); \ | |
1200 | SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 64, cpu); | |
1201 | ||
1202 | #define SETUP_CHROMA_HORIZ_FUNC_DEF_444(W, H, cpu) \ | |
1203 | p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ | |
1204 | p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; | |
1205 | ||
1206 | #define CHROMA_HORIZ_FILTERS_444(cpu) \ | |
1207 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 8, cpu); \ | |
1208 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 4, cpu); \ | |
1209 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(4, 8, cpu); \ | |
1210 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 16, cpu); \ | |
1211 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 8, cpu); \ | |
1212 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 16, cpu); \ | |
1213 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 12, cpu); \ | |
1214 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(12, 16, cpu); \ | |
1215 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 4, cpu); \ | |
1216 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(4, 16, cpu); \ | |
1217 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 32, cpu); \ | |
1218 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 16, cpu); \ | |
1219 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 32, cpu); \ | |
1220 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 24, cpu); \ | |
1221 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(24, 32, cpu); \ | |
1222 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 8, cpu); \ | |
1223 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 32, cpu); \ | |
1224 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 64, cpu); \ | |
1225 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 32, cpu); \ | |
1226 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(32, 64, cpu); \ | |
1227 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 48, cpu); \ | |
1228 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(48, 64, cpu); \ | |
1229 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(64, 16, cpu); \ | |
1230 | SETUP_CHROMA_HORIZ_FUNC_DEF_444(16, 64, cpu); | |
1231 | ||
1232 | namespace x265 { | |
1233 | // private x265 namespace | |
1234 | ||
1235 | #if HIGH_BIT_DEPTH | |
1236 | /* Very similar to CRef in intrapred.cpp, except it uses optimized primitives */ | |
1237 | template<int log2Size> | |
1238 | void intra_allangs(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma) | |
1239 | { | |
1240 | const int size = 1 << log2Size; | |
1241 | const int sizeIdx = log2Size - 2; | |
1242 | ALIGN_VAR_32(pixel, buffer[32 * 32]); | |
1243 | ||
1244 | for (int mode = 2; mode <= 34; mode++) | |
1245 | { | |
1246 | pixel *left = (g_intraFilterFlags[mode] & size ? left1 : left0); | |
1247 | pixel *above = (g_intraFilterFlags[mode] & size ? above1 : above0); | |
1248 | pixel *out = dest + ((mode - 2) << (log2Size * 2)); | |
1249 | ||
1250 | if (mode < 18) | |
1251 | { | |
1252 | primitives.intra_pred[mode][sizeIdx](buffer, size, left, above, mode, bLuma); | |
1253 | primitives.transpose[sizeIdx](out, buffer, size); | |
1254 | } | |
1255 | else | |
1256 | primitives.intra_pred[mode][sizeIdx](out, size, left, above, mode, bLuma); | |
1257 | } | |
1258 | } | |
1259 | #endif | |
1260 | ||
1261 | void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) | |
1262 | { | |
1263 | #if HIGH_BIT_DEPTH | |
1264 | if (cpuMask & X265_CPU_SSE2) | |
1265 | { | |
1266 | INIT8(sad, _mmx2); | |
1267 | INIT2(sad, _sse2); | |
1268 | SAD(sse2); | |
1269 | ||
1270 | INIT6(satd, _sse2); | |
1271 | HEVC_SATD(sse2); | |
1272 | p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; | |
1273 | ||
1274 | p.sa8d_inter[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; | |
1275 | SA8D_INTER_FROM_BLOCK(sse2); | |
1276 | p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2; | |
1277 | p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2; | |
1278 | ||
1279 | p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_mmx2; | |
1280 | p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_mmx2; | |
1281 | p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_mmx2; | |
1282 | p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_sse2; | |
1283 | p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_sse2; | |
1284 | p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_sse2; | |
1285 | p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_sse2; | |
1286 | p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_sse2; | |
1287 | p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_sse2; | |
1288 | p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_sse2; | |
1289 | p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_sse2; | |
1290 | p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_sse2; | |
1291 | p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_sse2; | |
1292 | p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_sse2; | |
1293 | p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_sse2; | |
1294 | p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_sse2; | |
1295 | p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_sse2; | |
1296 | p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_sse2; | |
1297 | p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_sse2; | |
1298 | p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_sse2; | |
1299 | p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_sse2; | |
1300 | p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_sse2; | |
1301 | p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_sse2; | |
1302 | p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_sse2; | |
1303 | p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_sse2; | |
1304 | ||
1305 | p.transpose[BLOCK_4x4] = x265_transpose4_sse2; | |
1306 | p.transpose[BLOCK_8x8] = x265_transpose8_sse2; | |
1307 | p.transpose[BLOCK_16x16] = x265_transpose16_sse2; | |
1308 | p.transpose[BLOCK_32x32] = x265_transpose32_sse2; | |
1309 | p.transpose[BLOCK_64x64] = x265_transpose64_sse2; | |
1310 | ||
1311 | p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2; | |
1312 | p.ssim_end_4 = x265_pixel_ssim_end4_sse2; | |
1313 | PIXEL_AVG(sse2); | |
1314 | PIXEL_AVG_W4(mmx2); | |
1315 | LUMA_VAR(_sse2); | |
1316 | ||
1317 | SAD_X3(sse2); | |
1318 | p.sad_x3[LUMA_4x4] = x265_pixel_sad_x3_4x4_mmx2; | |
1319 | p.sad_x3[LUMA_4x8] = x265_pixel_sad_x3_4x8_mmx2; | |
1320 | p.sad_x3[LUMA_4x16] = x265_pixel_sad_x3_4x16_mmx2; | |
1321 | p.sad_x3[LUMA_8x4] = x265_pixel_sad_x3_8x4_sse2; | |
1322 | p.sad_x3[LUMA_8x8] = x265_pixel_sad_x3_8x8_sse2; | |
1323 | p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_sse2; | |
1324 | p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_sse2; | |
1325 | p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_sse2; | |
1326 | p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_mmx2; | |
1327 | ||
1328 | SAD_X4(sse2); | |
1329 | p.sad_x4[LUMA_4x4] = x265_pixel_sad_x4_4x4_mmx2; | |
1330 | p.sad_x4[LUMA_4x8] = x265_pixel_sad_x4_4x8_mmx2; | |
1331 | p.sad_x4[LUMA_4x16] = x265_pixel_sad_x4_4x16_mmx2; | |
1332 | p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_sse2; | |
1333 | p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_sse2; | |
1334 | p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_sse2; | |
1335 | p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_sse2; | |
1336 | p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_sse2; | |
1337 | p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2; | |
1338 | ||
b53f7c52 JB |
1339 | p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2; |
1340 | p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2; | |
1341 | p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2; | |
1342 | p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2; | |
1343 | p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2; | |
1344 | p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2; | |
1345 | p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2; | |
1346 | p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2; | |
1347 | p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2; | |
1348 | p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2; | |
1349 | p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2; | |
1350 | p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2; | |
1351 | p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2; | |
1352 | p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2; | |
1353 | p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2; | |
1354 | p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2; | |
72b9787e JB |
1355 | |
1356 | CHROMA_PIXELSUB_PS(_sse2); | |
1357 | CHROMA_PIXELSUB_PS_422(_sse2); | |
1358 | LUMA_PIXELSUB(_sse2); | |
1359 | ||
1360 | CHROMA_BLOCKCOPY(ss, _sse2); | |
1361 | CHROMA_BLOCKCOPY_422(ss, _sse2); | |
1362 | LUMA_BLOCKCOPY(ss, _sse2); | |
1363 | ||
1364 | CHROMA_VERT_FILTERS(_sse2); | |
1365 | CHROMA_VERT_FILTERS_422(_sse2); | |
1366 | CHROMA_VERT_FILTERS_444(_sse2); | |
1367 | p.luma_p2s = x265_luma_p2s_sse2; | |
b53f7c52 JB |
1368 | p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2; |
1369 | p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2; | |
1370 | p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s | |
72b9787e JB |
1371 | |
1372 | p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2; | |
1373 | p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2; | |
1374 | p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2; | |
1375 | p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2; | |
1376 | ||
1377 | // TODO: overflow on 12-bits mode! | |
1378 | p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2; | |
1379 | p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2; | |
1380 | p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2; | |
1381 | p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2; | |
1382 | ||
1383 | p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; | |
1384 | p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; | |
1385 | p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse2; | |
1386 | p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse2; | |
1387 | ||
1388 | p.dct[DCT_4x4] = x265_dct4_sse2; | |
1389 | p.idct[IDCT_4x4] = x265_idct4_sse2; | |
b53f7c52 JB |
1390 | #if X86_64 |
1391 | p.idct[IDCT_8x8] = x265_idct8_sse2; | |
1392 | #endif | |
72b9787e JB |
1393 | p.idct[IDST_4x4] = x265_idst4_sse2; |
1394 | ||
1395 | LUMA_SS_FILTERS(_sse2); | |
1396 | } | |
1397 | if (cpuMask & X265_CPU_SSSE3) | |
1398 | { | |
1399 | p.scale1D_128to64 = x265_scale1D_128to64_ssse3; | |
1400 | p.scale2D_64to32 = x265_scale2D_64to32_ssse3; | |
1401 | ||
1402 | INTRA_ANG_SSSE3(ssse3); | |
1403 | ||
1404 | p.dct[DST_4x4] = x265_dst4_ssse3; | |
1405 | p.idct[IDCT_8x8] = x265_idct8_ssse3; | |
1406 | p.count_nonzero = x265_count_nonzero_ssse3; | |
1407 | } | |
1408 | if (cpuMask & X265_CPU_SSE4) | |
1409 | { | |
1410 | LUMA_ADDAVG(_sse4); | |
1411 | CHROMA_ADDAVG(_sse4); | |
1412 | CHROMA_ADDAVG_422(_sse4); | |
1413 | LUMA_FILTERS(_sse4); | |
1414 | CHROMA_HORIZ_FILTERS(_sse4); | |
1415 | CHROMA_VERT_FILTERS_SSE4(_sse4); | |
1416 | CHROMA_HORIZ_FILTERS_422(_sse4); | |
1417 | CHROMA_VERT_FILTERS_SSE4_422(_sse4); | |
1418 | CHROMA_HORIZ_FILTERS_444(_sse4); | |
1419 | ||
1420 | p.dct[DCT_8x8] = x265_dct8_sse4; | |
1421 | p.quant = x265_quant_sse4; | |
1422 | p.nquant = x265_nquant_sse4; | |
1423 | p.dequant_normal = x265_dequant_normal_sse4; | |
72b9787e JB |
1424 | p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4; |
1425 | p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4; | |
1426 | p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4; | |
1427 | p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4; | |
1428 | ||
1429 | p.intra_pred[1][BLOCK_4x4] = x265_intra_pred_dc4_sse4; | |
1430 | p.intra_pred[1][BLOCK_8x8] = x265_intra_pred_dc8_sse4; | |
1431 | p.intra_pred[1][BLOCK_16x16] = x265_intra_pred_dc16_sse4; | |
1432 | p.intra_pred[1][BLOCK_32x32] = x265_intra_pred_dc32_sse4; | |
1433 | p.planecopy_cp = x265_upShift_8_sse4; | |
1434 | ||
1435 | INTRA_ANG_SSE4_COMMON(sse4); | |
1436 | INTRA_ANG_SSE4_HIGH(sse4); | |
1437 | } | |
1438 | if (cpuMask & X265_CPU_XOP) | |
1439 | { | |
b53f7c52 | 1440 | p.frameInitLowres = x265_frame_init_lowres_core_xop; |
72b9787e JB |
1441 | SA8D_INTER_FROM_BLOCK(xop); |
1442 | INIT7(satd, _xop); | |
1443 | HEVC_SATD(xop); | |
1444 | } | |
1445 | if (cpuMask & X265_CPU_AVX2) | |
1446 | { | |
1447 | p.dct[DCT_4x4] = x265_dct4_avx2; | |
1448 | p.quant = x265_quant_avx2; | |
1449 | p.nquant = x265_nquant_avx2; | |
1450 | p.dequant_normal = x265_dequant_normal_avx2; | |
1451 | p.scale1D_128to64 = x265_scale1D_128to64_avx2; | |
b53f7c52 JB |
1452 | p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2; |
1453 | p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2; | |
1454 | p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2; | |
1455 | p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2; | |
1456 | p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2; | |
1457 | p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2; | |
1458 | p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2; | |
1459 | p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2; | |
72b9787e JB |
1460 | #if X86_64 |
1461 | p.dct[DCT_8x8] = x265_dct8_avx2; | |
1462 | p.dct[DCT_16x16] = x265_dct16_avx2; | |
1463 | p.dct[DCT_32x32] = x265_dct32_avx2; | |
1464 | p.idct[IDCT_4x4] = x265_idct4_avx2; | |
1465 | p.idct[IDCT_8x8] = x265_idct8_avx2; | |
1466 | p.idct[IDCT_16x16] = x265_idct16_avx2; | |
1467 | p.idct[IDCT_32x32] = x265_idct32_avx2; | |
72b9787e JB |
1468 | p.transpose[BLOCK_8x8] = x265_transpose8_avx2; |
1469 | p.transpose[BLOCK_16x16] = x265_transpose16_avx2; | |
1470 | p.transpose[BLOCK_32x32] = x265_transpose32_avx2; | |
1471 | p.transpose[BLOCK_64x64] = x265_transpose64_avx2; | |
1472 | #endif | |
1473 | } | |
1474 | /* at HIGH_BIT_DEPTH, pixel == short so we can reuse a number of primitives */ | |
1475 | for (int i = 0; i < NUM_LUMA_PARTITIONS; i++) | |
1476 | { | |
1477 | p.sse_pp[i] = (pixelcmp_t)p.sse_ss[i]; | |
1478 | p.sse_sp[i] = (pixelcmp_sp_t)p.sse_ss[i]; | |
1479 | } | |
1480 | ||
1481 | for (int i = 0; i < NUM_LUMA_PARTITIONS; i++) | |
1482 | { | |
1483 | p.luma_copy_ps[i] = (copy_ps_t)p.luma_copy_ss[i]; | |
1484 | p.luma_copy_sp[i] = (copy_sp_t)p.luma_copy_ss[i]; | |
1485 | p.luma_copy_pp[i] = (copy_pp_t)p.luma_copy_ss[i]; | |
1486 | } | |
1487 | ||
1488 | for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++) | |
1489 | { | |
1490 | p.chroma[X265_CSP_I420].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I420].copy_ss[i]; | |
1491 | p.chroma[X265_CSP_I420].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I420].copy_ss[i]; | |
1492 | p.chroma[X265_CSP_I420].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I420].copy_ss[i]; | |
1493 | } | |
1494 | ||
1495 | for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++) | |
1496 | { | |
1497 | p.chroma[X265_CSP_I422].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I422].copy_ss[i]; | |
1498 | p.chroma[X265_CSP_I422].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I422].copy_ss[i]; | |
1499 | p.chroma[X265_CSP_I422].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I422].copy_ss[i]; | |
1500 | } | |
1501 | ||
1502 | if (p.intra_pred[0][0] && p.transpose[0]) | |
1503 | { | |
1504 | p.intra_pred_allangs[BLOCK_4x4] = intra_allangs<2>; | |
1505 | p.intra_pred_allangs[BLOCK_8x8] = intra_allangs<3>; | |
1506 | p.intra_pred_allangs[BLOCK_16x16] = intra_allangs<4>; | |
1507 | p.intra_pred_allangs[BLOCK_32x32] = intra_allangs<5>; | |
1508 | } | |
1509 | ||
1510 | #else // if HIGH_BIT_DEPTH | |
1511 | if (cpuMask & X265_CPU_SSE2) | |
1512 | { | |
1513 | INIT8_NAME(sse_pp, ssd, _mmx); | |
1514 | INIT8(sad, _mmx2); | |
1515 | INIT8(sad_x3, _mmx2); | |
1516 | INIT8(sad_x4, _mmx2); | |
1517 | p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; | |
1518 | p.sa8d_inter[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; | |
b53f7c52 | 1519 | p.frameInitLowres = x265_frame_init_lowres_core_mmx2; |
72b9787e JB |
1520 | |
1521 | PIXEL_AVG(sse2); | |
1522 | PIXEL_AVG_W4(mmx2); | |
1523 | ||
1524 | LUMA_VAR(_sse2); | |
1525 | ||
1526 | ASSGN_SSE(sse2); | |
1527 | ASSGN_SSE_SS(sse2); | |
1528 | INIT2(sad, _sse2); | |
1529 | SAD(sse2); | |
1530 | INIT2(sad_x3, _sse2); | |
1531 | INIT2(sad_x4, _sse2); | |
1532 | HEVC_SATD(sse2); | |
1533 | ||
1534 | CHROMA_BLOCKCOPY(ss, _sse2); | |
1535 | CHROMA_BLOCKCOPY(pp, _sse2); | |
1536 | CHROMA_BLOCKCOPY_422(ss, _sse2); | |
1537 | CHROMA_BLOCKCOPY_422(pp, _sse2); | |
1538 | LUMA_BLOCKCOPY(ss, _sse2); | |
1539 | LUMA_BLOCKCOPY(pp, _sse2); | |
1540 | LUMA_BLOCKCOPY(sp, _sse2); | |
1541 | CHROMA_BLOCKCOPY_SP(_sse2); | |
1542 | CHROMA_BLOCKCOPY_SP_422(_sse2); | |
1543 | ||
1544 | CHROMA_SS_FILTERS_420(_sse2); | |
1545 | CHROMA_SS_FILTERS_422(_sse2); | |
1546 | CHROMA_SS_FILTERS_444(_sse2); | |
1547 | CHROMA_SP_FILTERS_420(_sse2); | |
1548 | CHROMA_SP_FILTERS_422(_sse2); | |
1549 | CHROMA_SP_FILTERS_444(_sse2); | |
1550 | LUMA_SS_FILTERS(_sse2); | |
1551 | ||
1552 | // This function pointer initialization is temporary will be removed | |
1553 | // later with macro definitions. It is used to avoid linker errors | |
1554 | // until all partitions are coded and commit smaller patches, easier to | |
1555 | // review. | |
1556 | ||
1557 | p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2; | |
1558 | p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2; | |
1559 | p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2; | |
1560 | p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2; | |
1561 | ||
1562 | p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2; | |
1563 | p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2; | |
1564 | p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2; | |
1565 | p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2; | |
1566 | ||
b53f7c52 | 1567 | p.frameInitLowres = x265_frame_init_lowres_core_sse2; |
72b9787e JB |
1568 | SA8D_INTER_FROM_BLOCK(sse2); |
1569 | ||
b53f7c52 JB |
1570 | p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2; |
1571 | p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2; | |
1572 | p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2; | |
1573 | p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2; | |
1574 | p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2; | |
1575 | p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2; | |
1576 | p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2; | |
1577 | p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2; | |
1578 | p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2; | |
1579 | p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2; | |
1580 | p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2; | |
1581 | p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2; | |
1582 | p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2; | |
1583 | p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2; | |
1584 | p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2; | |
1585 | p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2; | |
1586 | ||
72b9787e JB |
1587 | p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; |
1588 | p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; | |
1589 | p.transpose[BLOCK_4x4] = x265_transpose4_sse2; | |
1590 | p.transpose[BLOCK_8x8] = x265_transpose8_sse2; | |
1591 | p.transpose[BLOCK_16x16] = x265_transpose16_sse2; | |
1592 | p.transpose[BLOCK_32x32] = x265_transpose32_sse2; | |
1593 | p.transpose[BLOCK_64x64] = x265_transpose64_sse2; | |
1594 | p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2; | |
1595 | p.ssim_end_4 = x265_pixel_ssim_end4_sse2; | |
b53f7c52 | 1596 | |
72b9787e JB |
1597 | p.dct[DCT_4x4] = x265_dct4_sse2; |
1598 | p.idct[IDCT_4x4] = x265_idct4_sse2; | |
b53f7c52 JB |
1599 | #if X86_64 |
1600 | p.idct[IDCT_8x8] = x265_idct8_sse2; | |
1601 | #endif | |
72b9787e | 1602 | p.idct[IDST_4x4] = x265_idst4_sse2; |
b53f7c52 | 1603 | |
72b9787e | 1604 | p.planecopy_sp = x265_downShift_16_sse2; |
72b9787e JB |
1605 | } |
1606 | if (cpuMask & X265_CPU_SSSE3) | |
1607 | { | |
b53f7c52 | 1608 | p.frameInitLowres = x265_frame_init_lowres_core_ssse3; |
72b9787e JB |
1609 | SA8D_INTER_FROM_BLOCK(ssse3); |
1610 | p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3; | |
1611 | ASSGN_SSE(ssse3); | |
1612 | PIXEL_AVG(ssse3); | |
1613 | PIXEL_AVG_W4(ssse3); | |
1614 | ||
1615 | INTRA_ANG_SSSE3(ssse3); | |
1616 | ||
1617 | p.scale1D_128to64 = x265_scale1D_128to64_ssse3; | |
1618 | p.scale2D_64to32 = x265_scale2D_64to32_ssse3; | |
1619 | SAD_X3(ssse3); | |
1620 | SAD_X4(ssse3); | |
1621 | p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3; | |
1622 | p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3; | |
1623 | p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3; | |
1624 | p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3; | |
1625 | p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_ssse3; | |
1626 | p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_ssse3; | |
1627 | ||
1628 | p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3; | |
1629 | p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3; | |
1630 | ||
1631 | p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3; | |
1632 | p.luma_p2s = x265_luma_p2s_ssse3; | |
b53f7c52 JB |
1633 | p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3; |
1634 | p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3; | |
1635 | p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_ssse3; // for i444, chroma_p2s can use luma_p2s | |
72b9787e JB |
1636 | |
1637 | p.dct[DST_4x4] = x265_dst4_ssse3; | |
1638 | p.idct[IDCT_8x8] = x265_idct8_ssse3; | |
1639 | p.count_nonzero = x265_count_nonzero_ssse3; | |
1640 | } | |
1641 | if (cpuMask & X265_CPU_SSE4) | |
1642 | { | |
1643 | p.saoCuOrgE0 = x265_saoCuOrgE0_sse4; | |
1644 | ||
1645 | LUMA_ADDAVG(_sse4); | |
1646 | CHROMA_ADDAVG(_sse4); | |
1647 | CHROMA_ADDAVG_422(_sse4); | |
72b9787e JB |
1648 | |
1649 | // TODO: check POPCNT flag! | |
1650 | p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4; | |
1651 | p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_sse4; | |
1652 | p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_sse4; | |
1653 | p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_sse4; | |
1654 | ||
1655 | HEVC_SATD(sse4); | |
1656 | SA8D_INTER_FROM_BLOCK(sse4); | |
1657 | ||
1658 | p.sse_pp[LUMA_12x16] = x265_pixel_ssd_12x16_sse4; | |
1659 | p.sse_pp[LUMA_24x32] = x265_pixel_ssd_24x32_sse4; | |
1660 | p.sse_pp[LUMA_48x64] = x265_pixel_ssd_48x64_sse4; | |
1661 | p.sse_pp[LUMA_64x16] = x265_pixel_ssd_64x16_sse4; | |
1662 | p.sse_pp[LUMA_64x32] = x265_pixel_ssd_64x32_sse4; | |
1663 | p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4; | |
1664 | p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4; | |
1665 | ||
1666 | LUMA_SSE_SP(_sse4); | |
1667 | ||
1668 | CHROMA_PIXELSUB_PS(_sse4); | |
1669 | CHROMA_PIXELSUB_PS_422(_sse4); | |
1670 | LUMA_PIXELSUB(_sse4); | |
1671 | ||
1672 | CHROMA_FILTERS_420(_sse4); | |
1673 | CHROMA_FILTERS_422(_sse4); | |
1674 | CHROMA_FILTERS_444(_sse4); | |
1675 | CHROMA_SS_FILTERS_SSE4_420(_sse4); | |
1676 | CHROMA_SS_FILTERS_SSE4_422(_sse4); | |
1677 | CHROMA_SP_FILTERS_SSE4_420(_sse4); | |
1678 | CHROMA_SP_FILTERS_SSE4_422(_sse4); | |
1679 | CHROMA_SP_FILTERS_SSE4_444(_sse4); | |
1680 | LUMA_SP_FILTERS(_sse4); | |
1681 | LUMA_FILTERS(_sse4); | |
1682 | ASSGN_SSE_SS(sse4); | |
1683 | ||
1684 | p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4; | |
1685 | p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4; | |
1686 | p.chroma[X265_CSP_I420].copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4; | |
1687 | CHROMA_BLOCKCOPY(ps, _sse4); | |
1688 | CHROMA_BLOCKCOPY_422(ps, _sse4); | |
1689 | LUMA_BLOCKCOPY(ps, _sse4); | |
1690 | ||
1691 | p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4; | |
1692 | p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4; | |
1693 | p.quant = x265_quant_sse4; | |
1694 | p.nquant = x265_nquant_sse4; | |
1695 | p.dequant_normal = x265_dequant_normal_sse4; | |
1696 | p.weight_pp = x265_weight_pp_sse4; | |
1697 | p.weight_sp = x265_weight_sp_sse4; | |
1698 | p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4; | |
1699 | p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4; | |
1700 | p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4; | |
1701 | p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4; | |
1702 | ||
1703 | p.intra_pred_allangs[BLOCK_4x4] = x265_all_angs_pred_4x4_sse4; | |
1704 | p.intra_pred_allangs[BLOCK_8x8] = x265_all_angs_pred_8x8_sse4; | |
1705 | p.intra_pred_allangs[BLOCK_16x16] = x265_all_angs_pred_16x16_sse4; | |
1706 | p.intra_pred_allangs[BLOCK_32x32] = x265_all_angs_pred_32x32_sse4; | |
1707 | ||
1708 | p.intra_pred[1][BLOCK_4x4] = x265_intra_pred_dc4_sse4; | |
1709 | p.intra_pred[1][BLOCK_8x8] = x265_intra_pred_dc8_sse4; | |
1710 | p.intra_pred[1][BLOCK_16x16] = x265_intra_pred_dc16_sse4; | |
1711 | p.intra_pred[1][BLOCK_32x32] = x265_intra_pred_dc32_sse4; | |
1712 | ||
1713 | INTRA_ANG_SSE4_COMMON(sse4); | |
1714 | INTRA_ANG_SSE4(sse4); | |
1715 | ||
1716 | p.dct[DCT_8x8] = x265_dct8_sse4; | |
b53f7c52 | 1717 | // p.denoiseDct = x265_denoise_dct_sse4; |
72b9787e JB |
1718 | } |
1719 | if (cpuMask & X265_CPU_AVX) | |
1720 | { | |
b53f7c52 | 1721 | p.frameInitLowres = x265_frame_init_lowres_core_avx; |
72b9787e JB |
1722 | HEVC_SATD(avx); |
1723 | SA8D_INTER_FROM_BLOCK(avx); | |
1724 | ASSGN_SSE(avx); | |
1725 | ||
1726 | ASSGN_SSE_SS(avx); | |
1727 | SAD_X3(avx); | |
1728 | SAD_X4(avx); | |
1729 | p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx; | |
1730 | p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx; | |
1731 | p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_avx; | |
1732 | p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_avx; | |
1733 | ||
1734 | p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx; | |
1735 | p.ssim_end_4 = x265_pixel_ssim_end4_avx; | |
1736 | p.luma_copy_ss[LUMA_64x16] = x265_blockcopy_ss_64x16_avx; | |
1737 | p.luma_copy_ss[LUMA_64x32] = x265_blockcopy_ss_64x32_avx; | |
1738 | p.luma_copy_ss[LUMA_64x48] = x265_blockcopy_ss_64x48_avx; | |
1739 | p.luma_copy_ss[LUMA_64x64] = x265_blockcopy_ss_64x64_avx; | |
1740 | ||
1741 | p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x8] = x265_blockcopy_pp_32x8_avx; | |
1742 | p.luma_copy_pp[LUMA_32x8] = x265_blockcopy_pp_32x8_avx; | |
1743 | ||
1744 | p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x16] = x265_blockcopy_pp_32x16_avx; | |
1745 | p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x16] = x265_blockcopy_pp_32x16_avx; | |
1746 | p.luma_copy_pp[LUMA_32x16] = x265_blockcopy_pp_32x16_avx; | |
1747 | ||
1748 | p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x24] = x265_blockcopy_pp_32x24_avx; | |
1749 | p.luma_copy_pp[LUMA_32x24] = x265_blockcopy_pp_32x24_avx; | |
1750 | ||
1751 | p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x32] = x265_blockcopy_pp_32x32_avx; | |
1752 | p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x32] = x265_blockcopy_pp_32x32_avx; | |
1753 | p.luma_copy_pp[LUMA_32x32] = x265_blockcopy_pp_32x32_avx; | |
1754 | ||
1755 | p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x48] = x265_blockcopy_pp_32x48_avx; | |
1756 | ||
1757 | p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x64] = x265_blockcopy_pp_32x64_avx; | |
1758 | p.luma_copy_pp[LUMA_32x64] = x265_blockcopy_pp_32x64_avx; | |
1759 | } | |
1760 | if (cpuMask & X265_CPU_XOP) | |
1761 | { | |
b53f7c52 | 1762 | p.frameInitLowres = x265_frame_init_lowres_core_xop; |
72b9787e JB |
1763 | SA8D_INTER_FROM_BLOCK(xop); |
1764 | INIT7(satd, _xop); | |
1765 | INIT5_NAME(sse_pp, ssd, _xop); | |
1766 | HEVC_SATD(xop); | |
1767 | } | |
1768 | if (cpuMask & X265_CPU_AVX2) | |
1769 | { | |
1770 | INIT2(sad_x4, _avx2); | |
1771 | INIT4(satd, _avx2); | |
1772 | INIT2_NAME(sse_pp, ssd, _avx2); | |
1773 | p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2; | |
1774 | p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2; | |
1775 | p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2; | |
1776 | ||
1777 | /* Need to update assembly code as per changed interface of the copy_cnt primitive, once | |
1778 | * code is updated, avx2 version will be enabled */ | |
1779 | ||
1780 | p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2; | |
1781 | p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2; | |
1782 | p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2; | |
1783 | ||
1784 | p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_avx2; | |
1785 | p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_avx2; | |
1786 | ||
b53f7c52 JB |
1787 | p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2; |
1788 | p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2; | |
1789 | p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2; | |
1790 | p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2; | |
1791 | p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2; | |
1792 | p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2; | |
1793 | p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2; | |
1794 | p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2; | |
1795 | ||
1796 | // p.denoiseDct = x265_denoise_dct_avx2; | |
72b9787e JB |
1797 | p.dct[DCT_4x4] = x265_dct4_avx2; |
1798 | p.quant = x265_quant_avx2; | |
1799 | p.nquant = x265_nquant_avx2; | |
1800 | p.dequant_normal = x265_dequant_normal_avx2; | |
b53f7c52 | 1801 | |
72b9787e JB |
1802 | p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x4] = x265_blockcopy_ss_16x4_avx; |
1803 | p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x12] = x265_blockcopy_ss_16x12_avx; | |
1804 | p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x8] = x265_blockcopy_ss_16x8_avx; | |
1805 | p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x16] = x265_blockcopy_ss_16x16_avx; | |
1806 | p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x32] = x265_blockcopy_ss_16x32_avx; | |
1807 | p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x8] = x265_blockcopy_ss_16x8_avx; | |
1808 | p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x16] = x265_blockcopy_ss_16x16_avx; | |
1809 | p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x24] = x265_blockcopy_ss_16x24_avx; | |
1810 | p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x32] = x265_blockcopy_ss_16x32_avx; | |
1811 | p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x64] = x265_blockcopy_ss_16x64_avx; | |
1812 | p.scale1D_128to64 = x265_scale1D_128to64_avx2; | |
1813 | ||
1814 | p.weight_pp = x265_weight_pp_avx2; | |
1815 | ||
1816 | #if X86_64 | |
b53f7c52 | 1817 | |
72b9787e JB |
1818 | p.dct[DCT_8x8] = x265_dct8_avx2; |
1819 | p.dct[DCT_16x16] = x265_dct16_avx2; | |
1820 | p.dct[DCT_32x32] = x265_dct32_avx2; | |
1821 | p.idct[IDCT_4x4] = x265_idct4_avx2; | |
1822 | p.idct[IDCT_8x8] = x265_idct8_avx2; | |
1823 | p.idct[IDCT_16x16] = x265_idct16_avx2; | |
1824 | p.idct[IDCT_32x32] = x265_idct32_avx2; | |
1825 | ||
1826 | p.transpose[BLOCK_8x8] = x265_transpose8_avx2; | |
1827 | p.transpose[BLOCK_16x16] = x265_transpose16_avx2; | |
1828 | p.transpose[BLOCK_32x32] = x265_transpose32_avx2; | |
1829 | p.transpose[BLOCK_64x64] = x265_transpose64_avx2; | |
b53f7c52 JB |
1830 | |
1831 | p.luma_vpp[LUMA_12x16] = x265_interp_8tap_vert_pp_12x16_avx2; | |
1832 | ||
1833 | p.luma_vpp[LUMA_16x4] = x265_interp_8tap_vert_pp_16x4_avx2; | |
1834 | p.luma_vpp[LUMA_16x8] = x265_interp_8tap_vert_pp_16x8_avx2; | |
1835 | p.luma_vpp[LUMA_16x12] = x265_interp_8tap_vert_pp_16x12_avx2; | |
1836 | p.luma_vpp[LUMA_16x16] = x265_interp_8tap_vert_pp_16x16_avx2; | |
1837 | p.luma_vpp[LUMA_16x32] = x265_interp_8tap_vert_pp_16x32_avx2; | |
1838 | p.luma_vpp[LUMA_16x64] = x265_interp_8tap_vert_pp_16x64_avx2; | |
1839 | ||
1840 | p.luma_vpp[LUMA_24x32] = x265_interp_8tap_vert_pp_24x32_avx2; | |
1841 | ||
1842 | p.luma_vpp[LUMA_32x8] = x265_interp_8tap_vert_pp_32x8_avx2; | |
1843 | p.luma_vpp[LUMA_32x16] = x265_interp_8tap_vert_pp_32x16_avx2; | |
1844 | p.luma_vpp[LUMA_32x24] = x265_interp_8tap_vert_pp_32x24_avx2; | |
1845 | p.luma_vpp[LUMA_32x32] = x265_interp_8tap_vert_pp_32x32_avx2; | |
1846 | p.luma_vpp[LUMA_32x64] = x265_interp_8tap_vert_pp_32x64_avx2; | |
1847 | ||
1848 | p.luma_vpp[LUMA_48x64] = x265_interp_8tap_vert_pp_48x64_avx2; | |
1849 | ||
1850 | p.luma_vpp[LUMA_64x16] = x265_interp_8tap_vert_pp_64x16_avx2; | |
1851 | p.luma_vpp[LUMA_64x32] = x265_interp_8tap_vert_pp_64x32_avx2; | |
1852 | p.luma_vpp[LUMA_64x48] = x265_interp_8tap_vert_pp_64x48_avx2; | |
1853 | p.luma_vpp[LUMA_64x64] = x265_interp_8tap_vert_pp_64x64_avx2; | |
72b9787e JB |
1854 | #endif |
1855 | p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2; | |
b53f7c52 JB |
1856 | |
1857 | p.luma_hpp[LUMA_8x4] = x265_interp_8tap_horiz_pp_8x4_avx2; | |
1858 | p.luma_hpp[LUMA_8x8] = x265_interp_8tap_horiz_pp_8x8_avx2; | |
1859 | p.luma_hpp[LUMA_8x16] = x265_interp_8tap_horiz_pp_8x16_avx2; | |
1860 | p.luma_hpp[LUMA_8x32] = x265_interp_8tap_horiz_pp_8x32_avx2; | |
1861 | ||
1862 | p.luma_hpp[LUMA_16x4] = x265_interp_8tap_horiz_pp_16x4_avx2; | |
1863 | p.luma_hpp[LUMA_16x8] = x265_interp_8tap_horiz_pp_16x8_avx2; | |
1864 | p.luma_hpp[LUMA_16x12] = x265_interp_8tap_horiz_pp_16x12_avx2; | |
1865 | p.luma_hpp[LUMA_16x16] = x265_interp_8tap_horiz_pp_16x16_avx2; | |
1866 | p.luma_hpp[LUMA_16x32] = x265_interp_8tap_horiz_pp_16x32_avx2; | |
1867 | p.luma_hpp[LUMA_16x64] = x265_interp_8tap_horiz_pp_16x64_avx2; | |
1868 | ||
1869 | p.luma_hpp[LUMA_32x8] = x265_interp_8tap_horiz_pp_32x8_avx2; | |
1870 | p.luma_hpp[LUMA_32x16] = x265_interp_8tap_horiz_pp_32x16_avx2; | |
1871 | p.luma_hpp[LUMA_32x24] = x265_interp_8tap_horiz_pp_32x24_avx2; | |
1872 | p.luma_hpp[LUMA_32x32] = x265_interp_8tap_horiz_pp_32x32_avx2; | |
1873 | p.luma_hpp[LUMA_32x64] = x265_interp_8tap_horiz_pp_32x64_avx2; | |
1874 | ||
1875 | p.luma_hpp[LUMA_64x64] = x265_interp_8tap_horiz_pp_64x64_avx2; | |
1876 | p.luma_hpp[LUMA_64x48] = x265_interp_8tap_horiz_pp_64x48_avx2; | |
1877 | p.luma_hpp[LUMA_64x32] = x265_interp_8tap_horiz_pp_64x32_avx2; | |
1878 | p.luma_hpp[LUMA_64x16] = x265_interp_8tap_horiz_pp_64x16_avx2; | |
1879 | ||
1880 | p.luma_hpp[LUMA_48x64] = x265_interp_8tap_horiz_pp_48x64_avx2; | |
1881 | ||
1882 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2; | |
1883 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2; | |
1884 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_32x32] = x265_interp_4tap_horiz_pp_32x32_avx2; | |
1885 | p.chroma[X265_CSP_I420].filter_hpp[CHROMA_16x16] = x265_interp_4tap_horiz_pp_16x16_avx2; | |
1886 | ||
1887 | p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2; | |
1888 | ||
1889 | p.luma_vpp[LUMA_8x4] = x265_interp_8tap_vert_pp_8x4_avx2; | |
1890 | p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2; | |
1891 | p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2; | |
1892 | p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2; | |
1893 | ||
1894 | // color space i420 | |
1895 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2; | |
1896 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_8x8] = x265_interp_4tap_vert_pp_8x8_avx2; | |
1897 | ||
1898 | // color space i422 | |
1899 | p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_4x4] = x265_interp_4tap_vert_pp_4x4_avx2; | |
1900 | ||
1901 | p.luma_vps[LUMA_4x4] = x265_interp_8tap_vert_ps_4x4_avx2; | |
1902 | ||
1903 | #if X86_64 | |
1904 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2; | |
1905 | p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2; | |
1906 | #endif | |
72b9787e JB |
1907 | } |
1908 | #endif // if HIGH_BIT_DEPTH | |
1909 | } | |
1910 | } | |
1911 | ||
1912 | extern "C" { | |
1913 | #ifdef __INTEL_COMPILER | |
1914 | ||
1915 | /* Agner's patch to Intel's CPU dispatcher from pages 131-132 of | |
1916 | * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30) | |
1917 | * adapted to x265's cpu schema. */ | |
1918 | ||
1919 | // Global variable indicating cpu | |
1920 | int __intel_cpu_indicator = 0; | |
1921 | // CPU dispatcher function | |
1922 | void x265_intel_cpu_indicator_init(void) | |
1923 | { | |
1924 | uint32_t cpu = x265::cpu_detect(); | |
1925 | ||
1926 | if (cpu & X265_CPU_AVX) | |
1927 | __intel_cpu_indicator = 0x20000; | |
1928 | else if (cpu & X265_CPU_SSE42) | |
1929 | __intel_cpu_indicator = 0x8000; | |
1930 | else if (cpu & X265_CPU_SSE4) | |
1931 | __intel_cpu_indicator = 0x2000; | |
1932 | else if (cpu & X265_CPU_SSSE3) | |
1933 | __intel_cpu_indicator = 0x1000; | |
1934 | else if (cpu & X265_CPU_SSE3) | |
1935 | __intel_cpu_indicator = 0x800; | |
1936 | else if (cpu & X265_CPU_SSE2 && !(cpu & X265_CPU_SSE2_IS_SLOW)) | |
1937 | __intel_cpu_indicator = 0x200; | |
1938 | else if (cpu & X265_CPU_SSE) | |
1939 | __intel_cpu_indicator = 0x80; | |
1940 | else if (cpu & X265_CPU_MMX2) | |
1941 | __intel_cpu_indicator = 8; | |
1942 | else | |
1943 | __intel_cpu_indicator = 1; | |
1944 | } | |
1945 | ||
1946 | /* __intel_cpu_indicator_init appears to have a non-standard calling convention that | |
1947 | * assumes certain registers aren't preserved, so we'll route it through a function | |
1948 | * that backs up all the registers. */ | |
1949 | void __intel_cpu_indicator_init(void) | |
1950 | { | |
1951 | x265_safe_intel_cpu_indicator_init(); | |
1952 | } | |
1953 | ||
1954 | #else // ifdef __INTEL_COMPILER | |
1955 | void x265_intel_cpu_indicator_init(void) {} | |
1956 | ||
1957 | #endif // ifdef __INTEL_COMPILER | |
1958 | } |