Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * DSP utils | |
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | |
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include "libavutil/attributes.h" | |
24 | #include "libavutil/internal.h" | |
25 | #include "avcodec.h" | |
26 | #include "copy_block.h" | |
27 | #include "simple_idct.h" | |
28 | #include "me_cmp.h" | |
29 | #include "mpegvideo.h" | |
30 | #include "config.h" | |
31 | ||
32 | uint32_t ff_square_tab[512] = { 0, }; | |
33 | ||
34 | static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 35 | ptrdiff_t stride, int h) |
2ba45a60 DM |
36 | { |
37 | int s = 0, i; | |
38 | uint32_t *sq = ff_square_tab + 256; | |
39 | ||
40 | for (i = 0; i < h; i++) { | |
41 | s += sq[pix1[0] - pix2[0]]; | |
42 | s += sq[pix1[1] - pix2[1]]; | |
43 | s += sq[pix1[2] - pix2[2]]; | |
44 | s += sq[pix1[3] - pix2[3]]; | |
f6fa7814 DM |
45 | pix1 += stride; |
46 | pix2 += stride; | |
2ba45a60 DM |
47 | } |
48 | return s; | |
49 | } | |
50 | ||
51 | static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 52 | ptrdiff_t stride, int h) |
2ba45a60 DM |
53 | { |
54 | int s = 0, i; | |
55 | uint32_t *sq = ff_square_tab + 256; | |
56 | ||
57 | for (i = 0; i < h; i++) { | |
58 | s += sq[pix1[0] - pix2[0]]; | |
59 | s += sq[pix1[1] - pix2[1]]; | |
60 | s += sq[pix1[2] - pix2[2]]; | |
61 | s += sq[pix1[3] - pix2[3]]; | |
62 | s += sq[pix1[4] - pix2[4]]; | |
63 | s += sq[pix1[5] - pix2[5]]; | |
64 | s += sq[pix1[6] - pix2[6]]; | |
65 | s += sq[pix1[7] - pix2[7]]; | |
f6fa7814 DM |
66 | pix1 += stride; |
67 | pix2 += stride; | |
2ba45a60 DM |
68 | } |
69 | return s; | |
70 | } | |
71 | ||
72 | static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 73 | ptrdiff_t stride, int h) |
2ba45a60 DM |
74 | { |
75 | int s = 0, i; | |
76 | uint32_t *sq = ff_square_tab + 256; | |
77 | ||
78 | for (i = 0; i < h; i++) { | |
79 | s += sq[pix1[0] - pix2[0]]; | |
80 | s += sq[pix1[1] - pix2[1]]; | |
81 | s += sq[pix1[2] - pix2[2]]; | |
82 | s += sq[pix1[3] - pix2[3]]; | |
83 | s += sq[pix1[4] - pix2[4]]; | |
84 | s += sq[pix1[5] - pix2[5]]; | |
85 | s += sq[pix1[6] - pix2[6]]; | |
86 | s += sq[pix1[7] - pix2[7]]; | |
87 | s += sq[pix1[8] - pix2[8]]; | |
88 | s += sq[pix1[9] - pix2[9]]; | |
89 | s += sq[pix1[10] - pix2[10]]; | |
90 | s += sq[pix1[11] - pix2[11]]; | |
91 | s += sq[pix1[12] - pix2[12]]; | |
92 | s += sq[pix1[13] - pix2[13]]; | |
93 | s += sq[pix1[14] - pix2[14]]; | |
94 | s += sq[pix1[15] - pix2[15]]; | |
95 | ||
f6fa7814 DM |
96 | pix1 += stride; |
97 | pix2 += stride; | |
2ba45a60 DM |
98 | } |
99 | return s; | |
100 | } | |
101 | ||
102 | static int sum_abs_dctelem_c(int16_t *block) | |
103 | { | |
104 | int sum = 0, i; | |
105 | ||
106 | for (i = 0; i < 64; i++) | |
107 | sum += FFABS(block[i]); | |
108 | return sum; | |
109 | } | |
110 | ||
111 | #define avg2(a, b) ((a + b + 1) >> 1) | |
112 | #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) | |
113 | ||
114 | static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 115 | ptrdiff_t stride, int h) |
2ba45a60 DM |
116 | { |
117 | int s = 0, i; | |
118 | ||
119 | for (i = 0; i < h; i++) { | |
120 | s += abs(pix1[0] - pix2[0]); | |
121 | s += abs(pix1[1] - pix2[1]); | |
122 | s += abs(pix1[2] - pix2[2]); | |
123 | s += abs(pix1[3] - pix2[3]); | |
124 | s += abs(pix1[4] - pix2[4]); | |
125 | s += abs(pix1[5] - pix2[5]); | |
126 | s += abs(pix1[6] - pix2[6]); | |
127 | s += abs(pix1[7] - pix2[7]); | |
128 | s += abs(pix1[8] - pix2[8]); | |
129 | s += abs(pix1[9] - pix2[9]); | |
130 | s += abs(pix1[10] - pix2[10]); | |
131 | s += abs(pix1[11] - pix2[11]); | |
132 | s += abs(pix1[12] - pix2[12]); | |
133 | s += abs(pix1[13] - pix2[13]); | |
134 | s += abs(pix1[14] - pix2[14]); | |
135 | s += abs(pix1[15] - pix2[15]); | |
f6fa7814 DM |
136 | pix1 += stride; |
137 | pix2 += stride; | |
2ba45a60 DM |
138 | } |
139 | return s; | |
140 | } | |
141 | ||
142 | static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 143 | ptrdiff_t stride, int h) |
2ba45a60 DM |
144 | { |
145 | int s = 0, i; | |
146 | ||
147 | for (i = 0; i < h; i++) { | |
148 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | |
149 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
150 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
151 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
152 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
153 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
154 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
155 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
156 | s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
157 | s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
158 | s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
159 | s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
160 | s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
161 | s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
162 | s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
163 | s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
f6fa7814 DM |
164 | pix1 += stride; |
165 | pix2 += stride; | |
2ba45a60 DM |
166 | } |
167 | return s; | |
168 | } | |
169 | ||
170 | static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 171 | ptrdiff_t stride, int h) |
2ba45a60 DM |
172 | { |
173 | int s = 0, i; | |
f6fa7814 | 174 | uint8_t *pix3 = pix2 + stride; |
2ba45a60 DM |
175 | |
176 | for (i = 0; i < h; i++) { | |
177 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | |
178 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
179 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
180 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
181 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
182 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
183 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
184 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
185 | s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
186 | s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
187 | s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
188 | s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
189 | s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
190 | s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
191 | s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
192 | s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
f6fa7814 DM |
193 | pix1 += stride; |
194 | pix2 += stride; | |
195 | pix3 += stride; | |
2ba45a60 DM |
196 | } |
197 | return s; | |
198 | } | |
199 | ||
200 | static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 201 | ptrdiff_t stride, int h) |
2ba45a60 DM |
202 | { |
203 | int s = 0, i; | |
f6fa7814 | 204 | uint8_t *pix3 = pix2 + stride; |
2ba45a60 DM |
205 | |
206 | for (i = 0; i < h; i++) { | |
207 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | |
208 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
209 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
210 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
211 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
212 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
213 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
214 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
215 | s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
216 | s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
217 | s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
218 | s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
219 | s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
220 | s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
221 | s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
222 | s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
f6fa7814 DM |
223 | pix1 += stride; |
224 | pix2 += stride; | |
225 | pix3 += stride; | |
2ba45a60 DM |
226 | } |
227 | return s; | |
228 | } | |
229 | ||
230 | static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 231 | ptrdiff_t stride, int h) |
2ba45a60 DM |
232 | { |
233 | int s = 0, i; | |
234 | ||
235 | for (i = 0; i < h; i++) { | |
236 | s += abs(pix1[0] - pix2[0]); | |
237 | s += abs(pix1[1] - pix2[1]); | |
238 | s += abs(pix1[2] - pix2[2]); | |
239 | s += abs(pix1[3] - pix2[3]); | |
240 | s += abs(pix1[4] - pix2[4]); | |
241 | s += abs(pix1[5] - pix2[5]); | |
242 | s += abs(pix1[6] - pix2[6]); | |
243 | s += abs(pix1[7] - pix2[7]); | |
f6fa7814 DM |
244 | pix1 += stride; |
245 | pix2 += stride; | |
2ba45a60 DM |
246 | } |
247 | return s; | |
248 | } | |
249 | ||
250 | static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 251 | ptrdiff_t stride, int h) |
2ba45a60 DM |
252 | { |
253 | int s = 0, i; | |
254 | ||
255 | for (i = 0; i < h; i++) { | |
256 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | |
257 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
258 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
259 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
260 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
261 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
262 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
263 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
f6fa7814 DM |
264 | pix1 += stride; |
265 | pix2 += stride; | |
2ba45a60 DM |
266 | } |
267 | return s; | |
268 | } | |
269 | ||
270 | static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 271 | ptrdiff_t stride, int h) |
2ba45a60 DM |
272 | { |
273 | int s = 0, i; | |
f6fa7814 | 274 | uint8_t *pix3 = pix2 + stride; |
2ba45a60 DM |
275 | |
276 | for (i = 0; i < h; i++) { | |
277 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | |
278 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
279 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
280 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
281 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
282 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
283 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
284 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
f6fa7814 DM |
285 | pix1 += stride; |
286 | pix2 += stride; | |
287 | pix3 += stride; | |
2ba45a60 DM |
288 | } |
289 | return s; | |
290 | } | |
291 | ||
292 | static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |
f6fa7814 | 293 | ptrdiff_t stride, int h) |
2ba45a60 DM |
294 | { |
295 | int s = 0, i; | |
f6fa7814 | 296 | uint8_t *pix3 = pix2 + stride; |
2ba45a60 DM |
297 | |
298 | for (i = 0; i < h; i++) { | |
299 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | |
300 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
301 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
302 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
303 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
304 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
305 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
306 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
f6fa7814 DM |
307 | pix1 += stride; |
308 | pix2 += stride; | |
309 | pix3 += stride; | |
2ba45a60 DM |
310 | } |
311 | return s; | |
312 | } | |
313 | ||
f6fa7814 DM |
314 | static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, |
315 | ptrdiff_t stride, int h) | |
2ba45a60 DM |
316 | { |
317 | int score1 = 0, score2 = 0, x, y; | |
318 | ||
319 | for (y = 0; y < h; y++) { | |
320 | for (x = 0; x < 16; x++) | |
321 | score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); | |
322 | if (y + 1 < h) { | |
323 | for (x = 0; x < 15; x++) | |
324 | score2 += FFABS(s1[x] - s1[x + stride] - | |
325 | s1[x + 1] + s1[x + stride + 1]) - | |
326 | FFABS(s2[x] - s2[x + stride] - | |
327 | s2[x + 1] + s2[x + stride + 1]); | |
328 | } | |
329 | s1 += stride; | |
330 | s2 += stride; | |
331 | } | |
332 | ||
333 | if (c) | |
334 | return score1 + FFABS(score2) * c->avctx->nsse_weight; | |
335 | else | |
336 | return score1 + FFABS(score2) * 8; | |
337 | } | |
338 | ||
f6fa7814 DM |
339 | static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, |
340 | ptrdiff_t stride, int h) | |
2ba45a60 DM |
341 | { |
342 | int score1 = 0, score2 = 0, x, y; | |
343 | ||
344 | for (y = 0; y < h; y++) { | |
345 | for (x = 0; x < 8; x++) | |
346 | score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]); | |
347 | if (y + 1 < h) { | |
348 | for (x = 0; x < 7; x++) | |
349 | score2 += FFABS(s1[x] - s1[x + stride] - | |
350 | s1[x + 1] + s1[x + stride + 1]) - | |
351 | FFABS(s2[x] - s2[x + stride] - | |
352 | s2[x + 1] + s2[x + stride + 1]); | |
353 | } | |
354 | s1 += stride; | |
355 | s2 += stride; | |
356 | } | |
357 | ||
358 | if (c) | |
359 | return score1 + FFABS(score2) * c->avctx->nsse_weight; | |
360 | else | |
361 | return score1 + FFABS(score2) * 8; | |
362 | } | |
363 | ||
364 | static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, | |
f6fa7814 | 365 | ptrdiff_t stride, int h) |
2ba45a60 DM |
366 | { |
367 | return 0; | |
368 | } | |
369 | ||
370 | void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type) | |
371 | { | |
372 | int i; | |
373 | ||
374 | memset(cmp, 0, sizeof(void *) * 6); | |
375 | ||
376 | for (i = 0; i < 6; i++) { | |
377 | switch (type & 0xFF) { | |
378 | case FF_CMP_SAD: | |
379 | cmp[i] = c->sad[i]; | |
380 | break; | |
381 | case FF_CMP_SATD: | |
382 | cmp[i] = c->hadamard8_diff[i]; | |
383 | break; | |
384 | case FF_CMP_SSE: | |
385 | cmp[i] = c->sse[i]; | |
386 | break; | |
387 | case FF_CMP_DCT: | |
388 | cmp[i] = c->dct_sad[i]; | |
389 | break; | |
390 | case FF_CMP_DCT264: | |
391 | cmp[i] = c->dct264_sad[i]; | |
392 | break; | |
393 | case FF_CMP_DCTMAX: | |
394 | cmp[i] = c->dct_max[i]; | |
395 | break; | |
396 | case FF_CMP_PSNR: | |
397 | cmp[i] = c->quant_psnr[i]; | |
398 | break; | |
399 | case FF_CMP_BIT: | |
400 | cmp[i] = c->bit[i]; | |
401 | break; | |
402 | case FF_CMP_RD: | |
403 | cmp[i] = c->rd[i]; | |
404 | break; | |
405 | case FF_CMP_VSAD: | |
406 | cmp[i] = c->vsad[i]; | |
407 | break; | |
408 | case FF_CMP_VSSE: | |
409 | cmp[i] = c->vsse[i]; | |
410 | break; | |
411 | case FF_CMP_ZERO: | |
412 | cmp[i] = zero_cmp; | |
413 | break; | |
414 | case FF_CMP_NSSE: | |
415 | cmp[i] = c->nsse[i]; | |
416 | break; | |
417 | #if CONFIG_DWT | |
418 | case FF_CMP_W53: | |
419 | cmp[i]= c->w53[i]; | |
420 | break; | |
421 | case FF_CMP_W97: | |
422 | cmp[i]= c->w97[i]; | |
423 | break; | |
424 | #endif | |
425 | default: | |
426 | av_log(NULL, AV_LOG_ERROR, | |
427 | "internal error in cmp function selection\n"); | |
428 | } | |
429 | } | |
430 | } | |
431 | ||
432 | #define BUTTERFLY2(o1, o2, i1, i2) \ | |
433 | o1 = (i1) + (i2); \ | |
434 | o2 = (i1) - (i2); | |
435 | ||
436 | #define BUTTERFLY1(x, y) \ | |
437 | { \ | |
438 | int a, b; \ | |
439 | a = x; \ | |
440 | b = y; \ | |
441 | x = a + b; \ | |
442 | y = a - b; \ | |
443 | } | |
444 | ||
445 | #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y))) | |
446 | ||
447 | static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst, | |
f6fa7814 | 448 | uint8_t *src, ptrdiff_t stride, int h) |
2ba45a60 DM |
449 | { |
450 | int i, temp[64], sum = 0; | |
451 | ||
452 | av_assert2(h == 8); | |
453 | ||
454 | for (i = 0; i < 8; i++) { | |
455 | // FIXME: try pointer walks | |
456 | BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], | |
457 | src[stride * i + 0] - dst[stride * i + 0], | |
458 | src[stride * i + 1] - dst[stride * i + 1]); | |
459 | BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], | |
460 | src[stride * i + 2] - dst[stride * i + 2], | |
461 | src[stride * i + 3] - dst[stride * i + 3]); | |
462 | BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], | |
463 | src[stride * i + 4] - dst[stride * i + 4], | |
464 | src[stride * i + 5] - dst[stride * i + 5]); | |
465 | BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], | |
466 | src[stride * i + 6] - dst[stride * i + 6], | |
467 | src[stride * i + 7] - dst[stride * i + 7]); | |
468 | ||
469 | BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); | |
470 | BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); | |
471 | BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); | |
472 | BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); | |
473 | ||
474 | BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); | |
475 | BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); | |
476 | BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); | |
477 | BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); | |
478 | } | |
479 | ||
480 | for (i = 0; i < 8; i++) { | |
481 | BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); | |
482 | BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); | |
483 | BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); | |
484 | BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); | |
485 | ||
486 | BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); | |
487 | BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); | |
488 | BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); | |
489 | BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); | |
490 | ||
491 | sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) + | |
492 | BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) + | |
493 | BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) + | |
494 | BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); | |
495 | } | |
496 | return sum; | |
497 | } | |
498 | ||
499 | static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src, | |
f6fa7814 | 500 | uint8_t *dummy, ptrdiff_t stride, int h) |
2ba45a60 DM |
501 | { |
502 | int i, temp[64], sum = 0; | |
503 | ||
504 | av_assert2(h == 8); | |
505 | ||
506 | for (i = 0; i < 8; i++) { | |
507 | // FIXME: try pointer walks | |
508 | BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1], | |
509 | src[stride * i + 0], src[stride * i + 1]); | |
510 | BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3], | |
511 | src[stride * i + 2], src[stride * i + 3]); | |
512 | BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5], | |
513 | src[stride * i + 4], src[stride * i + 5]); | |
514 | BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7], | |
515 | src[stride * i + 6], src[stride * i + 7]); | |
516 | ||
517 | BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]); | |
518 | BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]); | |
519 | BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]); | |
520 | BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]); | |
521 | ||
522 | BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]); | |
523 | BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]); | |
524 | BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]); | |
525 | BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]); | |
526 | } | |
527 | ||
528 | for (i = 0; i < 8; i++) { | |
529 | BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]); | |
530 | BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]); | |
531 | BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]); | |
532 | BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]); | |
533 | ||
534 | BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]); | |
535 | BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]); | |
536 | BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]); | |
537 | BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]); | |
538 | ||
539 | sum += | |
540 | BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) | |
541 | + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) | |
542 | + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) | |
543 | + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]); | |
544 | } | |
545 | ||
546 | sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean | |
547 | ||
548 | return sum; | |
549 | } | |
550 | ||
551 | static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |
f6fa7814 | 552 | uint8_t *src2, ptrdiff_t stride, int h) |
2ba45a60 DM |
553 | { |
554 | LOCAL_ALIGNED_16(int16_t, temp, [64]); | |
555 | ||
556 | av_assert2(h == 8); | |
557 | ||
558 | s->pdsp.diff_pixels(temp, src1, src2, stride); | |
559 | s->fdsp.fdct(temp); | |
560 | return s->mecc.sum_abs_dctelem(temp); | |
561 | } | |
562 | ||
563 | #if CONFIG_GPL | |
564 | #define DCT8_1D \ | |
565 | { \ | |
566 | const int s07 = SRC(0) + SRC(7); \ | |
567 | const int s16 = SRC(1) + SRC(6); \ | |
568 | const int s25 = SRC(2) + SRC(5); \ | |
569 | const int s34 = SRC(3) + SRC(4); \ | |
570 | const int a0 = s07 + s34; \ | |
571 | const int a1 = s16 + s25; \ | |
572 | const int a2 = s07 - s34; \ | |
573 | const int a3 = s16 - s25; \ | |
574 | const int d07 = SRC(0) - SRC(7); \ | |
575 | const int d16 = SRC(1) - SRC(6); \ | |
576 | const int d25 = SRC(2) - SRC(5); \ | |
577 | const int d34 = SRC(3) - SRC(4); \ | |
578 | const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \ | |
579 | const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \ | |
580 | const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \ | |
581 | const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \ | |
582 | DST(0, a0 + a1); \ | |
583 | DST(1, a4 + (a7 >> 2)); \ | |
584 | DST(2, a2 + (a3 >> 1)); \ | |
585 | DST(3, a5 + (a6 >> 2)); \ | |
586 | DST(4, a0 - a1); \ | |
587 | DST(5, a6 - (a5 >> 2)); \ | |
588 | DST(6, (a2 >> 1) - a3); \ | |
589 | DST(7, (a4 >> 2) - a7); \ | |
590 | } | |
591 | ||
592 | static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |
f6fa7814 | 593 | uint8_t *src2, ptrdiff_t stride, int h) |
2ba45a60 DM |
594 | { |
595 | int16_t dct[8][8]; | |
596 | int i, sum = 0; | |
597 | ||
598 | s->pdsp.diff_pixels(dct[0], src1, src2, stride); | |
599 | ||
600 | #define SRC(x) dct[i][x] | |
601 | #define DST(x, v) dct[i][x] = v | |
602 | for (i = 0; i < 8; i++) | |
603 | DCT8_1D | |
604 | #undef SRC | |
605 | #undef DST | |
606 | ||
607 | #define SRC(x) dct[x][i] | |
608 | #define DST(x, v) sum += FFABS(v) | |
609 | for (i = 0; i < 8; i++) | |
610 | DCT8_1D | |
611 | #undef SRC | |
612 | #undef DST | |
613 | return sum; | |
614 | } | |
615 | #endif | |
616 | ||
617 | static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, | |
f6fa7814 | 618 | uint8_t *src2, ptrdiff_t stride, int h) |
2ba45a60 DM |
619 | { |
620 | LOCAL_ALIGNED_16(int16_t, temp, [64]); | |
621 | int sum = 0, i; | |
622 | ||
623 | av_assert2(h == 8); | |
624 | ||
625 | s->pdsp.diff_pixels(temp, src1, src2, stride); | |
626 | s->fdsp.fdct(temp); | |
627 | ||
628 | for (i = 0; i < 64; i++) | |
629 | sum = FFMAX(sum, FFABS(temp[i])); | |
630 | ||
631 | return sum; | |
632 | } | |
633 | ||
634 | static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, | |
f6fa7814 | 635 | uint8_t *src2, ptrdiff_t stride, int h) |
2ba45a60 DM |
636 | { |
637 | LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]); | |
638 | int16_t *const bak = temp + 64; | |
639 | int sum = 0, i; | |
640 | ||
641 | av_assert2(h == 8); | |
642 | s->mb_intra = 0; | |
643 | ||
644 | s->pdsp.diff_pixels(temp, src1, src2, stride); | |
645 | ||
646 | memcpy(bak, temp, 64 * sizeof(int16_t)); | |
647 | ||
648 | s->block_last_index[0 /* FIXME */] = | |
649 | s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); | |
650 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
651 | ff_simple_idct_8(temp); // FIXME | |
652 | ||
653 | for (i = 0; i < 64; i++) | |
654 | sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); | |
655 | ||
656 | return sum; | |
657 | } | |
658 | ||
659 | static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |
f6fa7814 | 660 | ptrdiff_t stride, int h) |
2ba45a60 DM |
661 | { |
662 | const uint8_t *scantable = s->intra_scantable.permutated; | |
663 | LOCAL_ALIGNED_16(int16_t, temp, [64]); | |
664 | LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); | |
665 | LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); | |
666 | int i, last, run, bits, level, distortion, start_i; | |
667 | const int esc_length = s->ac_esc_length; | |
668 | uint8_t *length, *last_length; | |
669 | ||
670 | av_assert2(h == 8); | |
671 | ||
672 | copy_block8(lsrc1, src1, 8, stride, 8); | |
673 | copy_block8(lsrc2, src2, 8, stride, 8); | |
674 | ||
675 | s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |
676 | ||
677 | s->block_last_index[0 /* FIXME */] = | |
678 | last = | |
679 | s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); | |
680 | ||
681 | bits = 0; | |
682 | ||
683 | if (s->mb_intra) { | |
684 | start_i = 1; | |
685 | length = s->intra_ac_vlc_length; | |
686 | last_length = s->intra_ac_vlc_last_length; | |
687 | bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma | |
688 | } else { | |
689 | start_i = 0; | |
690 | length = s->inter_ac_vlc_length; | |
691 | last_length = s->inter_ac_vlc_last_length; | |
692 | } | |
693 | ||
694 | if (last >= start_i) { | |
695 | run = 0; | |
696 | for (i = start_i; i < last; i++) { | |
697 | int j = scantable[i]; | |
698 | level = temp[j]; | |
699 | ||
700 | if (level) { | |
701 | level += 64; | |
702 | if ((level & (~127)) == 0) | |
703 | bits += length[UNI_AC_ENC_INDEX(run, level)]; | |
704 | else | |
705 | bits += esc_length; | |
706 | run = 0; | |
707 | } else | |
708 | run++; | |
709 | } | |
710 | i = scantable[last]; | |
711 | ||
712 | level = temp[i] + 64; | |
713 | ||
714 | av_assert2(level - 64); | |
715 | ||
716 | if ((level & (~127)) == 0) { | |
717 | bits += last_length[UNI_AC_ENC_INDEX(run, level)]; | |
718 | } else | |
719 | bits += esc_length; | |
720 | } | |
721 | ||
722 | if (last >= 0) { | |
723 | if (s->mb_intra) | |
724 | s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
725 | else | |
726 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
727 | } | |
728 | ||
729 | s->idsp.idct_add(lsrc2, 8, temp); | |
730 | ||
731 | distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8); | |
732 | ||
733 | return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7); | |
734 | } | |
735 | ||
736 | static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |
f6fa7814 | 737 | ptrdiff_t stride, int h) |
2ba45a60 DM |
738 | { |
739 | const uint8_t *scantable = s->intra_scantable.permutated; | |
740 | LOCAL_ALIGNED_16(int16_t, temp, [64]); | |
741 | int i, last, run, bits, level, start_i; | |
742 | const int esc_length = s->ac_esc_length; | |
743 | uint8_t *length, *last_length; | |
744 | ||
745 | av_assert2(h == 8); | |
746 | ||
747 | s->pdsp.diff_pixels(temp, src1, src2, stride); | |
748 | ||
749 | s->block_last_index[0 /* FIXME */] = | |
750 | last = | |
751 | s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); | |
752 | ||
753 | bits = 0; | |
754 | ||
755 | if (s->mb_intra) { | |
756 | start_i = 1; | |
757 | length = s->intra_ac_vlc_length; | |
758 | last_length = s->intra_ac_vlc_last_length; | |
759 | bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma | |
760 | } else { | |
761 | start_i = 0; | |
762 | length = s->inter_ac_vlc_length; | |
763 | last_length = s->inter_ac_vlc_last_length; | |
764 | } | |
765 | ||
766 | if (last >= start_i) { | |
767 | run = 0; | |
768 | for (i = start_i; i < last; i++) { | |
769 | int j = scantable[i]; | |
770 | level = temp[j]; | |
771 | ||
772 | if (level) { | |
773 | level += 64; | |
774 | if ((level & (~127)) == 0) | |
775 | bits += length[UNI_AC_ENC_INDEX(run, level)]; | |
776 | else | |
777 | bits += esc_length; | |
778 | run = 0; | |
779 | } else | |
780 | run++; | |
781 | } | |
782 | i = scantable[last]; | |
783 | ||
784 | level = temp[i] + 64; | |
785 | ||
786 | av_assert2(level - 64); | |
787 | ||
788 | if ((level & (~127)) == 0) | |
789 | bits += last_length[UNI_AC_ENC_INDEX(run, level)]; | |
790 | else | |
791 | bits += esc_length; | |
792 | } | |
793 | ||
794 | return bits; | |
795 | } | |
796 | ||
797 | #define VSAD_INTRA(size) \ | |
798 | static int vsad_intra ## size ## _c(MpegEncContext *c, \ | |
799 | uint8_t *s, uint8_t *dummy, \ | |
f6fa7814 | 800 | ptrdiff_t stride, int h) \ |
2ba45a60 DM |
801 | { \ |
802 | int score = 0, x, y; \ | |
803 | \ | |
804 | for (y = 1; y < h; y++) { \ | |
805 | for (x = 0; x < size; x += 4) { \ | |
806 | score += FFABS(s[x] - s[x + stride]) + \ | |
807 | FFABS(s[x + 1] - s[x + stride + 1]) + \ | |
808 | FFABS(s[x + 2] - s[x + 2 + stride]) + \ | |
809 | FFABS(s[x + 3] - s[x + 3 + stride]); \ | |
810 | } \ | |
811 | s += stride; \ | |
812 | } \ | |
813 | \ | |
814 | return score; \ | |
815 | } | |
816 | VSAD_INTRA(8) | |
817 | VSAD_INTRA(16) | |
818 | ||
819 | #define VSAD(size) \ | |
820 | static int vsad ## size ## _c(MpegEncContext *c, \ | |
821 | uint8_t *s1, uint8_t *s2, \ | |
f6fa7814 | 822 | ptrdiff_t stride, int h) \ |
2ba45a60 DM |
823 | { \ |
824 | int score = 0, x, y; \ | |
825 | \ | |
826 | for (y = 1; y < h; y++) { \ | |
827 | for (x = 0; x < size; x++) \ | |
828 | score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ | |
829 | s1 += stride; \ | |
830 | s2 += stride; \ | |
831 | } \ | |
832 | \ | |
833 | return score; \ | |
834 | } | |
835 | VSAD(8) | |
836 | VSAD(16) | |
837 | ||
838 | #define SQ(a) ((a) * (a)) | |
839 | #define VSSE_INTRA(size) \ | |
840 | static int vsse_intra ## size ## _c(MpegEncContext *c, \ | |
841 | uint8_t *s, uint8_t *dummy, \ | |
f6fa7814 | 842 | ptrdiff_t stride, int h) \ |
2ba45a60 DM |
843 | { \ |
844 | int score = 0, x, y; \ | |
845 | \ | |
846 | for (y = 1; y < h; y++) { \ | |
847 | for (x = 0; x < size; x += 4) { \ | |
848 | score += SQ(s[x] - s[x + stride]) + \ | |
849 | SQ(s[x + 1] - s[x + stride + 1]) + \ | |
850 | SQ(s[x + 2] - s[x + stride + 2]) + \ | |
851 | SQ(s[x + 3] - s[x + stride + 3]); \ | |
852 | } \ | |
853 | s += stride; \ | |
854 | } \ | |
855 | \ | |
856 | return score; \ | |
857 | } | |
858 | VSSE_INTRA(8) | |
859 | VSSE_INTRA(16) | |
860 | ||
861 | #define VSSE(size) \ | |
862 | static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \ | |
f6fa7814 | 863 | ptrdiff_t stride, int h) \ |
2ba45a60 DM |
864 | { \ |
865 | int score = 0, x, y; \ | |
866 | \ | |
867 | for (y = 1; y < h; y++) { \ | |
868 | for (x = 0; x < size; x++) \ | |
869 | score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \ | |
870 | s1 += stride; \ | |
871 | s2 += stride; \ | |
872 | } \ | |
873 | \ | |
874 | return score; \ | |
875 | } | |
876 | VSSE(8) | |
877 | VSSE(16) | |
878 | ||
879 | #define WRAPPER8_16_SQ(name8, name16) \ | |
880 | static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ | |
f6fa7814 | 881 | ptrdiff_t stride, int h) \ |
2ba45a60 DM |
882 | { \ |
883 | int score = 0; \ | |
884 | \ | |
885 | score += name8(s, dst, src, stride, 8); \ | |
886 | score += name8(s, dst + 8, src + 8, stride, 8); \ | |
887 | if (h == 16) { \ | |
888 | dst += 8 * stride; \ | |
889 | src += 8 * stride; \ | |
890 | score += name8(s, dst, src, stride, 8); \ | |
891 | score += name8(s, dst + 8, src + 8, stride, 8); \ | |
892 | } \ | |
893 | return score; \ | |
894 | } | |
895 | ||
896 | WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) | |
897 | WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) | |
898 | WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) | |
899 | #if CONFIG_GPL | |
900 | WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) | |
901 | #endif | |
902 | WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) | |
903 | WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |
904 | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | |
905 | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | |
906 | ||
907 | av_cold void ff_me_cmp_init_static(void) | |
908 | { | |
909 | int i; | |
910 | ||
911 | for (i = 0; i < 512; i++) | |
912 | ff_square_tab[i] = (i - 256) * (i - 256); | |
913 | } | |
914 | ||
915 | int ff_check_alignment(void) | |
916 | { | |
917 | static int did_fail = 0; | |
918 | LOCAL_ALIGNED_16(int, aligned, [4]); | |
919 | ||
920 | if ((intptr_t)aligned & 15) { | |
921 | if (!did_fail) { | |
922 | #if HAVE_MMX || HAVE_ALTIVEC | |
923 | av_log(NULL, AV_LOG_ERROR, | |
924 | "Compiler did not align stack variables. Libavcodec has been miscompiled\n" | |
925 | "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
926 | "but in the compiler. You may try recompiling using gcc >= 4.2.\n" | |
927 | "Do not report crashes to FFmpeg developers.\n"); | |
928 | #endif | |
929 | did_fail=1; | |
930 | } | |
931 | return -1; | |
932 | } | |
933 | return 0; | |
934 | } | |
935 | ||
936 | av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx) | |
937 | { | |
938 | ff_check_alignment(); | |
939 | ||
940 | c->sum_abs_dctelem = sum_abs_dctelem_c; | |
941 | ||
942 | /* TODO [0] 16 [1] 8 */ | |
943 | c->pix_abs[0][0] = pix_abs16_c; | |
944 | c->pix_abs[0][1] = pix_abs16_x2_c; | |
945 | c->pix_abs[0][2] = pix_abs16_y2_c; | |
946 | c->pix_abs[0][3] = pix_abs16_xy2_c; | |
947 | c->pix_abs[1][0] = pix_abs8_c; | |
948 | c->pix_abs[1][1] = pix_abs8_x2_c; | |
949 | c->pix_abs[1][2] = pix_abs8_y2_c; | |
950 | c->pix_abs[1][3] = pix_abs8_xy2_c; | |
951 | ||
952 | #define SET_CMP_FUNC(name) \ | |
953 | c->name[0] = name ## 16_c; \ | |
954 | c->name[1] = name ## 8x8_c; | |
955 | ||
956 | SET_CMP_FUNC(hadamard8_diff) | |
957 | c->hadamard8_diff[4] = hadamard8_intra16_c; | |
958 | c->hadamard8_diff[5] = hadamard8_intra8x8_c; | |
959 | SET_CMP_FUNC(dct_sad) | |
960 | SET_CMP_FUNC(dct_max) | |
961 | #if CONFIG_GPL | |
962 | SET_CMP_FUNC(dct264_sad) | |
963 | #endif | |
964 | c->sad[0] = pix_abs16_c; | |
965 | c->sad[1] = pix_abs8_c; | |
966 | c->sse[0] = sse16_c; | |
967 | c->sse[1] = sse8_c; | |
968 | c->sse[2] = sse4_c; | |
969 | SET_CMP_FUNC(quant_psnr) | |
970 | SET_CMP_FUNC(rd) | |
971 | SET_CMP_FUNC(bit) | |
972 | c->vsad[0] = vsad16_c; | |
973 | c->vsad[1] = vsad8_c; | |
974 | c->vsad[4] = vsad_intra16_c; | |
975 | c->vsad[5] = vsad_intra8_c; | |
976 | c->vsse[0] = vsse16_c; | |
977 | c->vsse[1] = vsse8_c; | |
978 | c->vsse[4] = vsse_intra16_c; | |
979 | c->vsse[5] = vsse_intra8_c; | |
980 | c->nsse[0] = nsse16_c; | |
981 | c->nsse[1] = nsse8_c; | |
982 | #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER | |
983 | ff_dsputil_init_dwt(c); | |
984 | #endif | |
985 | ||
986 | if (ARCH_ALPHA) | |
987 | ff_me_cmp_init_alpha(c, avctx); | |
988 | if (ARCH_ARM) | |
989 | ff_me_cmp_init_arm(c, avctx); | |
990 | if (ARCH_PPC) | |
991 | ff_me_cmp_init_ppc(c, avctx); | |
992 | if (ARCH_X86) | |
993 | ff_me_cmp_init_x86(c, avctx); | |
994 | } |