3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/attributes.h"
24 #include "libavutil/internal.h"
26 #include "copy_block.h"
27 #include "simple_idct.h"
29 #include "mpegvideo.h"
32 uint32_t ff_square_tab
[512] = { 0, };
34 static int sse4_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
35 ptrdiff_t stride
, int h
)
38 uint32_t *sq
= ff_square_tab
+ 256;
40 for (i
= 0; i
< h
; i
++) {
41 s
+= sq
[pix1
[0] - pix2
[0]];
42 s
+= sq
[pix1
[1] - pix2
[1]];
43 s
+= sq
[pix1
[2] - pix2
[2]];
44 s
+= sq
[pix1
[3] - pix2
[3]];
51 static int sse8_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
52 ptrdiff_t stride
, int h
)
55 uint32_t *sq
= ff_square_tab
+ 256;
57 for (i
= 0; i
< h
; i
++) {
58 s
+= sq
[pix1
[0] - pix2
[0]];
59 s
+= sq
[pix1
[1] - pix2
[1]];
60 s
+= sq
[pix1
[2] - pix2
[2]];
61 s
+= sq
[pix1
[3] - pix2
[3]];
62 s
+= sq
[pix1
[4] - pix2
[4]];
63 s
+= sq
[pix1
[5] - pix2
[5]];
64 s
+= sq
[pix1
[6] - pix2
[6]];
65 s
+= sq
[pix1
[7] - pix2
[7]];
72 static int sse16_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
73 ptrdiff_t stride
, int h
)
76 uint32_t *sq
= ff_square_tab
+ 256;
78 for (i
= 0; i
< h
; i
++) {
79 s
+= sq
[pix1
[0] - pix2
[0]];
80 s
+= sq
[pix1
[1] - pix2
[1]];
81 s
+= sq
[pix1
[2] - pix2
[2]];
82 s
+= sq
[pix1
[3] - pix2
[3]];
83 s
+= sq
[pix1
[4] - pix2
[4]];
84 s
+= sq
[pix1
[5] - pix2
[5]];
85 s
+= sq
[pix1
[6] - pix2
[6]];
86 s
+= sq
[pix1
[7] - pix2
[7]];
87 s
+= sq
[pix1
[8] - pix2
[8]];
88 s
+= sq
[pix1
[9] - pix2
[9]];
89 s
+= sq
[pix1
[10] - pix2
[10]];
90 s
+= sq
[pix1
[11] - pix2
[11]];
91 s
+= sq
[pix1
[12] - pix2
[12]];
92 s
+= sq
[pix1
[13] - pix2
[13]];
93 s
+= sq
[pix1
[14] - pix2
[14]];
94 s
+= sq
[pix1
[15] - pix2
[15]];
102 static int sum_abs_dctelem_c(int16_t *block
)
106 for (i
= 0; i
< 64; i
++)
107 sum
+= FFABS(block
[i
]);
111 #define avg2(a, b) ((a + b + 1) >> 1)
112 #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
114 static inline int pix_abs16_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
115 ptrdiff_t stride
, int h
)
119 for (i
= 0; i
< h
; i
++) {
120 s
+= abs(pix1
[0] - pix2
[0]);
121 s
+= abs(pix1
[1] - pix2
[1]);
122 s
+= abs(pix1
[2] - pix2
[2]);
123 s
+= abs(pix1
[3] - pix2
[3]);
124 s
+= abs(pix1
[4] - pix2
[4]);
125 s
+= abs(pix1
[5] - pix2
[5]);
126 s
+= abs(pix1
[6] - pix2
[6]);
127 s
+= abs(pix1
[7] - pix2
[7]);
128 s
+= abs(pix1
[8] - pix2
[8]);
129 s
+= abs(pix1
[9] - pix2
[9]);
130 s
+= abs(pix1
[10] - pix2
[10]);
131 s
+= abs(pix1
[11] - pix2
[11]);
132 s
+= abs(pix1
[12] - pix2
[12]);
133 s
+= abs(pix1
[13] - pix2
[13]);
134 s
+= abs(pix1
[14] - pix2
[14]);
135 s
+= abs(pix1
[15] - pix2
[15]);
142 static int pix_abs16_x2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
143 ptrdiff_t stride
, int h
)
147 for (i
= 0; i
< h
; i
++) {
148 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
149 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
150 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
151 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
152 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
153 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
154 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
155 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
156 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
157 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
158 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
159 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
160 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
161 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
162 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
163 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
170 static int pix_abs16_y2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
171 ptrdiff_t stride
, int h
)
174 uint8_t *pix3
= pix2
+ stride
;
176 for (i
= 0; i
< h
; i
++) {
177 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
178 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
179 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
180 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
181 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
182 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
183 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
184 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
185 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
186 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
187 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
188 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
189 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
190 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
191 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
192 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
200 static int pix_abs16_xy2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
201 ptrdiff_t stride
, int h
)
204 uint8_t *pix3
= pix2
+ stride
;
206 for (i
= 0; i
< h
; i
++) {
207 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
208 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
209 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
210 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
211 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
212 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
213 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
214 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
215 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
216 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
217 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
218 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
219 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
220 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
221 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
222 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
230 static inline int pix_abs8_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
231 ptrdiff_t stride
, int h
)
235 for (i
= 0; i
< h
; i
++) {
236 s
+= abs(pix1
[0] - pix2
[0]);
237 s
+= abs(pix1
[1] - pix2
[1]);
238 s
+= abs(pix1
[2] - pix2
[2]);
239 s
+= abs(pix1
[3] - pix2
[3]);
240 s
+= abs(pix1
[4] - pix2
[4]);
241 s
+= abs(pix1
[5] - pix2
[5]);
242 s
+= abs(pix1
[6] - pix2
[6]);
243 s
+= abs(pix1
[7] - pix2
[7]);
250 static int pix_abs8_x2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
251 ptrdiff_t stride
, int h
)
255 for (i
= 0; i
< h
; i
++) {
256 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
257 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
258 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
259 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
260 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
261 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
262 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
263 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
270 static int pix_abs8_y2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
271 ptrdiff_t stride
, int h
)
274 uint8_t *pix3
= pix2
+ stride
;
276 for (i
= 0; i
< h
; i
++) {
277 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
278 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
279 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
280 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
281 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
282 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
283 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
284 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
292 static int pix_abs8_xy2_c(MpegEncContext
*v
, uint8_t *pix1
, uint8_t *pix2
,
293 ptrdiff_t stride
, int h
)
296 uint8_t *pix3
= pix2
+ stride
;
298 for (i
= 0; i
< h
; i
++) {
299 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
300 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
301 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
302 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
303 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
304 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
305 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
306 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
314 static int nsse16_c(MpegEncContext
*c
, uint8_t *s1
, uint8_t *s2
,
315 ptrdiff_t stride
, int h
)
317 int score1
= 0, score2
= 0, x
, y
;
319 for (y
= 0; y
< h
; y
++) {
320 for (x
= 0; x
< 16; x
++)
321 score1
+= (s1
[x
] - s2
[x
]) * (s1
[x
] - s2
[x
]);
323 for (x
= 0; x
< 15; x
++)
324 score2
+= FFABS(s1
[x
] - s1
[x
+ stride
] -
325 s1
[x
+ 1] + s1
[x
+ stride
+ 1]) -
326 FFABS(s2
[x
] - s2
[x
+ stride
] -
327 s2
[x
+ 1] + s2
[x
+ stride
+ 1]);
334 return score1
+ FFABS(score2
) * c
->avctx
->nsse_weight
;
336 return score1
+ FFABS(score2
) * 8;
339 static int nsse8_c(MpegEncContext
*c
, uint8_t *s1
, uint8_t *s2
,
340 ptrdiff_t stride
, int h
)
342 int score1
= 0, score2
= 0, x
, y
;
344 for (y
= 0; y
< h
; y
++) {
345 for (x
= 0; x
< 8; x
++)
346 score1
+= (s1
[x
] - s2
[x
]) * (s1
[x
] - s2
[x
]);
348 for (x
= 0; x
< 7; x
++)
349 score2
+= FFABS(s1
[x
] - s1
[x
+ stride
] -
350 s1
[x
+ 1] + s1
[x
+ stride
+ 1]) -
351 FFABS(s2
[x
] - s2
[x
+ stride
] -
352 s2
[x
+ 1] + s2
[x
+ stride
+ 1]);
359 return score1
+ FFABS(score2
) * c
->avctx
->nsse_weight
;
361 return score1
+ FFABS(score2
) * 8;
364 static int zero_cmp(MpegEncContext
*s
, uint8_t *a
, uint8_t *b
,
365 ptrdiff_t stride
, int h
)
370 void ff_set_cmp(MECmpContext
*c
, me_cmp_func
*cmp
, int type
)
374 memset(cmp
, 0, sizeof(void *) * 6);
376 for (i
= 0; i
< 6; i
++) {
377 switch (type
& 0xFF) {
382 cmp
[i
] = c
->hadamard8_diff
[i
];
388 cmp
[i
] = c
->dct_sad
[i
];
391 cmp
[i
] = c
->dct264_sad
[i
];
394 cmp
[i
] = c
->dct_max
[i
];
397 cmp
[i
] = c
->quant_psnr
[i
];
426 av_log(NULL
, AV_LOG_ERROR
,
427 "internal error in cmp function selection\n");
432 #define BUTTERFLY2(o1, o2, i1, i2) \
436 #define BUTTERFLY1(x, y) \
445 #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
447 static int hadamard8_diff8x8_c(MpegEncContext
*s
, uint8_t *dst
,
448 uint8_t *src
, ptrdiff_t stride
, int h
)
450 int i
, temp
[64], sum
= 0;
454 for (i
= 0; i
< 8; i
++) {
455 // FIXME: try pointer walks
456 BUTTERFLY2(temp
[8 * i
+ 0], temp
[8 * i
+ 1],
457 src
[stride
* i
+ 0] - dst
[stride
* i
+ 0],
458 src
[stride
* i
+ 1] - dst
[stride
* i
+ 1]);
459 BUTTERFLY2(temp
[8 * i
+ 2], temp
[8 * i
+ 3],
460 src
[stride
* i
+ 2] - dst
[stride
* i
+ 2],
461 src
[stride
* i
+ 3] - dst
[stride
* i
+ 3]);
462 BUTTERFLY2(temp
[8 * i
+ 4], temp
[8 * i
+ 5],
463 src
[stride
* i
+ 4] - dst
[stride
* i
+ 4],
464 src
[stride
* i
+ 5] - dst
[stride
* i
+ 5]);
465 BUTTERFLY2(temp
[8 * i
+ 6], temp
[8 * i
+ 7],
466 src
[stride
* i
+ 6] - dst
[stride
* i
+ 6],
467 src
[stride
* i
+ 7] - dst
[stride
* i
+ 7]);
469 BUTTERFLY1(temp
[8 * i
+ 0], temp
[8 * i
+ 2]);
470 BUTTERFLY1(temp
[8 * i
+ 1], temp
[8 * i
+ 3]);
471 BUTTERFLY1(temp
[8 * i
+ 4], temp
[8 * i
+ 6]);
472 BUTTERFLY1(temp
[8 * i
+ 5], temp
[8 * i
+ 7]);
474 BUTTERFLY1(temp
[8 * i
+ 0], temp
[8 * i
+ 4]);
475 BUTTERFLY1(temp
[8 * i
+ 1], temp
[8 * i
+ 5]);
476 BUTTERFLY1(temp
[8 * i
+ 2], temp
[8 * i
+ 6]);
477 BUTTERFLY1(temp
[8 * i
+ 3], temp
[8 * i
+ 7]);
480 for (i
= 0; i
< 8; i
++) {
481 BUTTERFLY1(temp
[8 * 0 + i
], temp
[8 * 1 + i
]);
482 BUTTERFLY1(temp
[8 * 2 + i
], temp
[8 * 3 + i
]);
483 BUTTERFLY1(temp
[8 * 4 + i
], temp
[8 * 5 + i
]);
484 BUTTERFLY1(temp
[8 * 6 + i
], temp
[8 * 7 + i
]);
486 BUTTERFLY1(temp
[8 * 0 + i
], temp
[8 * 2 + i
]);
487 BUTTERFLY1(temp
[8 * 1 + i
], temp
[8 * 3 + i
]);
488 BUTTERFLY1(temp
[8 * 4 + i
], temp
[8 * 6 + i
]);
489 BUTTERFLY1(temp
[8 * 5 + i
], temp
[8 * 7 + i
]);
491 sum
+= BUTTERFLYA(temp
[8 * 0 + i
], temp
[8 * 4 + i
]) +
492 BUTTERFLYA(temp
[8 * 1 + i
], temp
[8 * 5 + i
]) +
493 BUTTERFLYA(temp
[8 * 2 + i
], temp
[8 * 6 + i
]) +
494 BUTTERFLYA(temp
[8 * 3 + i
], temp
[8 * 7 + i
]);
499 static int hadamard8_intra8x8_c(MpegEncContext
*s
, uint8_t *src
,
500 uint8_t *dummy
, ptrdiff_t stride
, int h
)
502 int i
, temp
[64], sum
= 0;
506 for (i
= 0; i
< 8; i
++) {
507 // FIXME: try pointer walks
508 BUTTERFLY2(temp
[8 * i
+ 0], temp
[8 * i
+ 1],
509 src
[stride
* i
+ 0], src
[stride
* i
+ 1]);
510 BUTTERFLY2(temp
[8 * i
+ 2], temp
[8 * i
+ 3],
511 src
[stride
* i
+ 2], src
[stride
* i
+ 3]);
512 BUTTERFLY2(temp
[8 * i
+ 4], temp
[8 * i
+ 5],
513 src
[stride
* i
+ 4], src
[stride
* i
+ 5]);
514 BUTTERFLY2(temp
[8 * i
+ 6], temp
[8 * i
+ 7],
515 src
[stride
* i
+ 6], src
[stride
* i
+ 7]);
517 BUTTERFLY1(temp
[8 * i
+ 0], temp
[8 * i
+ 2]);
518 BUTTERFLY1(temp
[8 * i
+ 1], temp
[8 * i
+ 3]);
519 BUTTERFLY1(temp
[8 * i
+ 4], temp
[8 * i
+ 6]);
520 BUTTERFLY1(temp
[8 * i
+ 5], temp
[8 * i
+ 7]);
522 BUTTERFLY1(temp
[8 * i
+ 0], temp
[8 * i
+ 4]);
523 BUTTERFLY1(temp
[8 * i
+ 1], temp
[8 * i
+ 5]);
524 BUTTERFLY1(temp
[8 * i
+ 2], temp
[8 * i
+ 6]);
525 BUTTERFLY1(temp
[8 * i
+ 3], temp
[8 * i
+ 7]);
528 for (i
= 0; i
< 8; i
++) {
529 BUTTERFLY1(temp
[8 * 0 + i
], temp
[8 * 1 + i
]);
530 BUTTERFLY1(temp
[8 * 2 + i
], temp
[8 * 3 + i
]);
531 BUTTERFLY1(temp
[8 * 4 + i
], temp
[8 * 5 + i
]);
532 BUTTERFLY1(temp
[8 * 6 + i
], temp
[8 * 7 + i
]);
534 BUTTERFLY1(temp
[8 * 0 + i
], temp
[8 * 2 + i
]);
535 BUTTERFLY1(temp
[8 * 1 + i
], temp
[8 * 3 + i
]);
536 BUTTERFLY1(temp
[8 * 4 + i
], temp
[8 * 6 + i
]);
537 BUTTERFLY1(temp
[8 * 5 + i
], temp
[8 * 7 + i
]);
540 BUTTERFLYA(temp
[8 * 0 + i
], temp
[8 * 4 + i
])
541 + BUTTERFLYA(temp
[8 * 1 + i
], temp
[8 * 5 + i
])
542 + BUTTERFLYA(temp
[8 * 2 + i
], temp
[8 * 6 + i
])
543 + BUTTERFLYA(temp
[8 * 3 + i
], temp
[8 * 7 + i
]);
546 sum
-= FFABS(temp
[8 * 0] + temp
[8 * 4]); // -mean
551 static int dct_sad8x8_c(MpegEncContext
*s
, uint8_t *src1
,
552 uint8_t *src2
, ptrdiff_t stride
, int h
)
554 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
558 s
->pdsp
.diff_pixels(temp
, src1
, src2
, stride
);
560 return s
->mecc
.sum_abs_dctelem(temp
);
566 const int s07 = SRC(0) + SRC(7); \
567 const int s16 = SRC(1) + SRC(6); \
568 const int s25 = SRC(2) + SRC(5); \
569 const int s34 = SRC(3) + SRC(4); \
570 const int a0 = s07 + s34; \
571 const int a1 = s16 + s25; \
572 const int a2 = s07 - s34; \
573 const int a3 = s16 - s25; \
574 const int d07 = SRC(0) - SRC(7); \
575 const int d16 = SRC(1) - SRC(6); \
576 const int d25 = SRC(2) - SRC(5); \
577 const int d34 = SRC(3) - SRC(4); \
578 const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
579 const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
580 const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
581 const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
583 DST(1, a4 + (a7 >> 2)); \
584 DST(2, a2 + (a3 >> 1)); \
585 DST(3, a5 + (a6 >> 2)); \
587 DST(5, a6 - (a5 >> 2)); \
588 DST(6, (a2 >> 1) - a3); \
589 DST(7, (a4 >> 2) - a7); \
592 static int dct264_sad8x8_c(MpegEncContext
*s
, uint8_t *src1
,
593 uint8_t *src2
, ptrdiff_t stride
, int h
)
598 s
->pdsp
.diff_pixels(dct
[0], src1
, src2
, stride
);
600 #define SRC(x) dct[i][x]
601 #define DST(x, v) dct[i][x] = v
602 for (i
= 0; i
< 8; i
++)
607 #define SRC(x) dct[x][i]
608 #define DST(x, v) sum += FFABS(v)
609 for (i
= 0; i
< 8; i
++)
617 static int dct_max8x8_c(MpegEncContext
*s
, uint8_t *src1
,
618 uint8_t *src2
, ptrdiff_t stride
, int h
)
620 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
625 s
->pdsp
.diff_pixels(temp
, src1
, src2
, stride
);
628 for (i
= 0; i
< 64; i
++)
629 sum
= FFMAX(sum
, FFABS(temp
[i
]));
634 static int quant_psnr8x8_c(MpegEncContext
*s
, uint8_t *src1
,
635 uint8_t *src2
, ptrdiff_t stride
, int h
)
637 LOCAL_ALIGNED_16(int16_t, temp
, [64 * 2]);
638 int16_t *const bak
= temp
+ 64;
644 s
->pdsp
.diff_pixels(temp
, src1
, src2
, stride
);
646 memcpy(bak
, temp
, 64 * sizeof(int16_t));
648 s
->block_last_index
[0 /* FIXME */] =
649 s
->fast_dct_quantize(s
, temp
, 0 /* FIXME */, s
->qscale
, &i
);
650 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
651 ff_simple_idct_8(temp
); // FIXME
653 for (i
= 0; i
< 64; i
++)
654 sum
+= (temp
[i
] - bak
[i
]) * (temp
[i
] - bak
[i
]);
659 static int rd8x8_c(MpegEncContext
*s
, uint8_t *src1
, uint8_t *src2
,
660 ptrdiff_t stride
, int h
)
662 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
663 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
664 LOCAL_ALIGNED_16(uint8_t, lsrc1
, [64]);
665 LOCAL_ALIGNED_16(uint8_t, lsrc2
, [64]);
666 int i
, last
, run
, bits
, level
, distortion
, start_i
;
667 const int esc_length
= s
->ac_esc_length
;
668 uint8_t *length
, *last_length
;
672 copy_block8(lsrc1
, src1
, 8, stride
, 8);
673 copy_block8(lsrc2
, src2
, 8, stride
, 8);
675 s
->pdsp
.diff_pixels(temp
, lsrc1
, lsrc2
, 8);
677 s
->block_last_index
[0 /* FIXME */] =
679 s
->fast_dct_quantize(s
, temp
, 0 /* FIXME */, s
->qscale
, &i
);
685 length
= s
->intra_ac_vlc_length
;
686 last_length
= s
->intra_ac_vlc_last_length
;
687 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; // FIXME: chroma
690 length
= s
->inter_ac_vlc_length
;
691 last_length
= s
->inter_ac_vlc_last_length
;
694 if (last
>= start_i
) {
696 for (i
= start_i
; i
< last
; i
++) {
697 int j
= scantable
[i
];
702 if ((level
& (~127)) == 0)
703 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
712 level
= temp
[i
] + 64;
714 av_assert2(level
- 64);
716 if ((level
& (~127)) == 0) {
717 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
724 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
726 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
729 s
->idsp
.idct_add(lsrc2
, 8, temp
);
731 distortion
= s
->mecc
.sse
[1](NULL
, lsrc2
, lsrc1
, 8, 8);
733 return distortion
+ ((bits
* s
->qscale
* s
->qscale
* 109 + 64) >> 7);
736 static int bit8x8_c(MpegEncContext
*s
, uint8_t *src1
, uint8_t *src2
,
737 ptrdiff_t stride
, int h
)
739 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
740 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
741 int i
, last
, run
, bits
, level
, start_i
;
742 const int esc_length
= s
->ac_esc_length
;
743 uint8_t *length
, *last_length
;
747 s
->pdsp
.diff_pixels(temp
, src1
, src2
, stride
);
749 s
->block_last_index
[0 /* FIXME */] =
751 s
->fast_dct_quantize(s
, temp
, 0 /* FIXME */, s
->qscale
, &i
);
757 length
= s
->intra_ac_vlc_length
;
758 last_length
= s
->intra_ac_vlc_last_length
;
759 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; // FIXME: chroma
762 length
= s
->inter_ac_vlc_length
;
763 last_length
= s
->inter_ac_vlc_last_length
;
766 if (last
>= start_i
) {
768 for (i
= start_i
; i
< last
; i
++) {
769 int j
= scantable
[i
];
774 if ((level
& (~127)) == 0)
775 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
784 level
= temp
[i
] + 64;
786 av_assert2(level
- 64);
788 if ((level
& (~127)) == 0)
789 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
797 #define VSAD_INTRA(size) \
798 static int vsad_intra ## size ## _c(MpegEncContext *c, \
799 uint8_t *s, uint8_t *dummy, \
800 ptrdiff_t stride, int h) \
802 int score = 0, x, y; \
804 for (y = 1; y < h; y++) { \
805 for (x = 0; x < size; x += 4) { \
806 score += FFABS(s[x] - s[x + stride]) + \
807 FFABS(s[x + 1] - s[x + stride + 1]) + \
808 FFABS(s[x + 2] - s[x + 2 + stride]) + \
809 FFABS(s[x + 3] - s[x + 3 + stride]); \
820 static int vsad ## size ## _c(MpegEncContext *c, \
821 uint8_t *s1, uint8_t *s2, \
822 ptrdiff_t stride, int h) \
824 int score = 0, x, y; \
826 for (y = 1; y < h; y++) { \
827 for (x = 0; x < size; x++) \
828 score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
838 #define SQ(a) ((a) * (a))
839 #define VSSE_INTRA(size) \
840 static int vsse_intra ## size ## _c(MpegEncContext *c, \
841 uint8_t *s, uint8_t *dummy, \
842 ptrdiff_t stride, int h) \
844 int score = 0, x, y; \
846 for (y = 1; y < h; y++) { \
847 for (x = 0; x < size; x += 4) { \
848 score += SQ(s[x] - s[x + stride]) + \
849 SQ(s[x + 1] - s[x + stride + 1]) + \
850 SQ(s[x + 2] - s[x + stride + 2]) + \
851 SQ(s[x + 3] - s[x + stride + 3]); \
862 static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \
863 ptrdiff_t stride, int h) \
865 int score = 0, x, y; \
867 for (y = 1; y < h; y++) { \
868 for (x = 0; x < size; x++) \
869 score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
879 #define WRAPPER8_16_SQ(name8, name16) \
880 static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
881 ptrdiff_t stride, int h) \
885 score += name8(s, dst, src, stride, 8); \
886 score += name8(s, dst + 8, src + 8, stride, 8); \
890 score += name8(s, dst, src, stride, 8); \
891 score += name8(s, dst + 8, src + 8, stride, 8); \
896 WRAPPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
897 WRAPPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
898 WRAPPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
900 WRAPPER8_16_SQ(dct264_sad8x8_c
, dct264_sad16_c
)
902 WRAPPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
903 WRAPPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
904 WRAPPER8_16_SQ(rd8x8_c
, rd16_c
)
905 WRAPPER8_16_SQ(bit8x8_c
, bit16_c
)
907 av_cold
void ff_me_cmp_init_static(void)
911 for (i
= 0; i
< 512; i
++)
912 ff_square_tab
[i
] = (i
- 256) * (i
- 256);
915 int ff_check_alignment(void)
917 static int did_fail
= 0;
918 LOCAL_ALIGNED_16(int, aligned
, [4]);
920 if ((intptr_t)aligned
& 15) {
922 #if HAVE_MMX || HAVE_ALTIVEC
923 av_log(NULL
, AV_LOG_ERROR
,
924 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
925 "and may be very slow or crash. This is not a bug in libavcodec,\n"
926 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
927 "Do not report crashes to FFmpeg developers.\n");
936 av_cold
void ff_me_cmp_init(MECmpContext
*c
, AVCodecContext
*avctx
)
938 ff_check_alignment();
940 c
->sum_abs_dctelem
= sum_abs_dctelem_c
;
942 /* TODO [0] 16 [1] 8 */
943 c
->pix_abs
[0][0] = pix_abs16_c
;
944 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
945 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
946 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
947 c
->pix_abs
[1][0] = pix_abs8_c
;
948 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
949 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
950 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
952 #define SET_CMP_FUNC(name) \
953 c->name[0] = name ## 16_c; \
954 c->name[1] = name ## 8x8_c;
956 SET_CMP_FUNC(hadamard8_diff
)
957 c
->hadamard8_diff
[4] = hadamard8_intra16_c
;
958 c
->hadamard8_diff
[5] = hadamard8_intra8x8_c
;
959 SET_CMP_FUNC(dct_sad
)
960 SET_CMP_FUNC(dct_max
)
962 SET_CMP_FUNC(dct264_sad
)
964 c
->sad
[0] = pix_abs16_c
;
965 c
->sad
[1] = pix_abs8_c
;
969 SET_CMP_FUNC(quant_psnr
)
972 c
->vsad
[0] = vsad16_c
;
973 c
->vsad
[1] = vsad8_c
;
974 c
->vsad
[4] = vsad_intra16_c
;
975 c
->vsad
[5] = vsad_intra8_c
;
976 c
->vsse
[0] = vsse16_c
;
977 c
->vsse
[1] = vsse8_c
;
978 c
->vsse
[4] = vsse_intra16_c
;
979 c
->vsse
[5] = vsse_intra8_c
;
980 c
->nsse
[0] = nsse16_c
;
981 c
->nsse
[1] = nsse8_c
;
982 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
983 ff_dsputil_init_dwt(c
);
987 ff_me_cmp_init_alpha(c
, avctx
);
989 ff_me_cmp_init_arm(c
, avctx
);
991 ff_me_cmp_init_ppc(c
, avctx
);
993 ff_me_cmp_init_x86(c
, avctx
);