1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
24 #include "pixelharness.h"
25 #include "primitives.h"
29 PixelHarness::PixelHarness()
31 /* [0] --- Random values
34 for (int i
= 0; i
< BUFFSIZE
; i
++)
36 pixel_test_buff
[0][i
] = rand() % PIXEL_MAX
;
37 short_test_buff
[0][i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; // max(SHORT_MIN, min(rand(), SMAX));
38 short_test_buff1
[0][i
] = rand() & PIXEL_MAX
; // For block copy only
39 short_test_buff2
[0][i
] = rand() % 16383; // for addAvg
40 int_test_buff
[0][i
] = rand() % SHORT_MAX
;
41 ushort_test_buff
[0][i
] = rand() % ((1 << 16) - 1);
42 uchar_test_buff
[0][i
] = rand() % ((1 << 8) - 1);
44 pixel_test_buff
[1][i
] = PIXEL_MIN
;
45 short_test_buff
[1][i
] = SMIN
;
46 short_test_buff1
[1][i
] = PIXEL_MIN
;
47 short_test_buff2
[1][i
] = -16384;
48 int_test_buff
[1][i
] = SHORT_MIN
;
49 ushort_test_buff
[1][i
] = PIXEL_MIN
;
50 uchar_test_buff
[1][i
] = PIXEL_MIN
;
52 pixel_test_buff
[2][i
] = PIXEL_MAX
;
53 short_test_buff
[2][i
] = SMAX
;
54 short_test_buff1
[2][i
] = PIXEL_MAX
;
55 short_test_buff2
[2][i
] = 16383;
56 int_test_buff
[2][i
] = SHORT_MAX
;
57 ushort_test_buff
[2][i
] = ((1 << 16) - 1);
58 uchar_test_buff
[2][i
] = 255;
60 pbuf1
[i
] = rand() & PIXEL_MAX
;
61 pbuf2
[i
] = rand() & PIXEL_MAX
;
62 pbuf3
[i
] = rand() & PIXEL_MAX
;
63 pbuf4
[i
] = rand() & PIXEL_MAX
;
65 sbuf1
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; //max(SHORT_MIN, min(rand(), SMAX));
66 sbuf2
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; //max(SHORT_MIN, min(rand(), SMAX));
67 ibuf1
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1;
68 psbuf1
[i
] = (rand() % 65) - 32; // range is between -32 to 32
69 sbuf3
[i
] = rand() % PIXEL_MAX
; // for blockcopy only
73 bool PixelHarness::check_pixelcmp(pixelcmp_t ref
, pixelcmp_t opt
)
76 intptr_t stride
= STRIDE
;
78 for (int i
= 0; i
< ITERS
; i
++)
80 int index1
= rand() % TEST_CASES
;
81 int index2
= rand() % TEST_CASES
;
82 int vres
= (int)checked(opt
, pixel_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
83 int cres
= ref(pixel_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
94 bool PixelHarness::check_pixelcmp_sp(pixelcmp_sp_t ref
, pixelcmp_sp_t opt
)
97 intptr_t stride
= STRIDE
;
99 for (int i
= 0; i
< ITERS
; i
++)
101 int index1
= rand() % TEST_CASES
;
102 int index2
= rand() % TEST_CASES
;
103 int vres
= (int)checked(opt
, short_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
104 int cres
= ref(short_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
115 bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref
, pixelcmp_ss_t opt
)
118 intptr_t stride
= STRIDE
;
120 for (int i
= 0; i
< ITERS
; i
++)
122 int index1
= rand() % TEST_CASES
;
123 int index2
= rand() % TEST_CASES
;
124 int vres
= (int)checked(opt
, short_test_buff
[index1
], stride
, short_test_buff
[index2
] + j
, stride
);
125 int cres
= ref(short_test_buff
[index1
], stride
, short_test_buff
[index2
] + j
, stride
);
136 bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref
, pixelcmp_x3_t opt
)
138 ALIGN_VAR_16(int, cres
[16]);
139 ALIGN_VAR_16(int, vres
[16]);
141 intptr_t stride
= FENC_STRIDE
- 5;
142 for (int i
= 0; i
< ITERS
; i
++)
144 int index1
= rand() % TEST_CASES
;
145 int index2
= rand() % TEST_CASES
;
146 checked(opt
, pixel_test_buff
[index1
],
147 pixel_test_buff
[index2
] + j
,
148 pixel_test_buff
[index2
] + j
+ 1,
149 pixel_test_buff
[index2
] + j
+ 2, stride
, &vres
[0]);
150 ref(pixel_test_buff
[index1
],
151 pixel_test_buff
[index2
] + j
,
152 pixel_test_buff
[index2
] + j
+ 1,
153 pixel_test_buff
[index2
] + j
+ 2, stride
, &cres
[0]);
154 if ((vres
[0] != cres
[0]) || ((vres
[1] != cres
[1])) || ((vres
[2] != cres
[2])))
164 bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref
, pixelcmp_x4_t opt
)
166 ALIGN_VAR_16(int, cres
[16]);
167 ALIGN_VAR_16(int, vres
[16]);
169 intptr_t stride
= FENC_STRIDE
- 5;
170 for (int i
= 0; i
< ITERS
; i
++)
172 int index1
= rand() % TEST_CASES
;
173 int index2
= rand() % TEST_CASES
;
174 checked(opt
, pixel_test_buff
[index1
],
175 pixel_test_buff
[index2
] + j
,
176 pixel_test_buff
[index2
] + j
+ 1,
177 pixel_test_buff
[index2
] + j
+ 2,
178 pixel_test_buff
[index2
] + j
+ 3, stride
, &vres
[0]);
179 ref(pixel_test_buff
[index1
],
180 pixel_test_buff
[index2
] + j
,
181 pixel_test_buff
[index2
] + j
+ 1,
182 pixel_test_buff
[index2
] + j
+ 2,
183 pixel_test_buff
[index2
] + j
+ 3, stride
, &cres
[0]);
185 if ((vres
[0] != cres
[0]) || ((vres
[1] != cres
[1])) || ((vres
[2] != cres
[2])) || ((vres
[3] != cres
[3])))
195 bool PixelHarness::check_calresidual(calcresidual_t ref
, calcresidual_t opt
)
197 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
198 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
199 memset(ref_dest
, 0, 64 * 64 * sizeof(int16_t));
200 memset(opt_dest
, 0, 64 * 64 * sizeof(int16_t));
203 intptr_t stride
= STRIDE
;
204 for (int i
= 0; i
< ITERS
; i
++)
206 int index
= i
% TEST_CASES
;
207 checked(opt
, pbuf1
+ j
, pixel_test_buff
[index
] + j
, opt_dest
, stride
);
208 ref(pbuf1
+ j
, pixel_test_buff
[index
] + j
, ref_dest
, stride
);
210 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
220 bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref
, pixel_ssd_s_t opt
)
223 for (int i
= 0; i
< ITERS
; i
++)
225 // NOTE: stride must be multiple of 16, because minimum block is 4x4
226 int stride
= (STRIDE
+ (rand() % STRIDE
)) & ~15;
227 int cres
= ref(sbuf1
+ j
, stride
);
228 int vres
= (int)checked(opt
, sbuf1
+ j
, (intptr_t)stride
);
242 bool PixelHarness::check_weightp(weightp_sp_t ref
, weightp_sp_t opt
)
244 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
245 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
247 memset(ref_dest
, 0, 64 * 64 * sizeof(pixel
));
248 memset(opt_dest
, 0, 64 * 64 * sizeof(pixel
));
250 int width
= 2 * (rand() % 32 + 1);
252 int w0
= rand() % 128;
253 int shift
= rand() % 15;
254 int round
= shift
? (1 << (shift
- 1)) : 0;
255 int offset
= (rand() % 256) - 128;
256 intptr_t stride
= 64;
257 for (int i
= 0; i
< ITERS
; i
++)
259 int index
= i
% TEST_CASES
;
260 checked(opt
, short_test_buff
[index
] + j
, opt_dest
, stride
, stride
, width
, height
, w0
, round
, shift
, offset
);
261 ref(short_test_buff
[index
] + j
, ref_dest
, stride
, stride
, width
, height
, w0
, round
, shift
, offset
);
263 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
273 bool PixelHarness::check_weightp(weightp_pp_t ref
, weightp_pp_t opt
)
275 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
276 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
278 memset(ref_dest
, 0, 64 * 64 * sizeof(pixel
));
279 memset(opt_dest
, 0, 64 * 64 * sizeof(pixel
));
281 int width
= 16 * (rand() % 4 + 1);
283 int w0
= rand() % 128;
284 int shift
= rand() % 15;
285 int round
= shift
? (1 << (shift
- 1)) : 0;
286 int offset
= (rand() % 256) - 128;
287 intptr_t stride
= 64;
288 for (int i
= 0; i
< ITERS
; i
++)
290 int index
= i
% TEST_CASES
;
291 checked(opt
, pixel_test_buff
[index
] + j
, opt_dest
, stride
, width
, height
, w0
, round
, shift
, offset
);
292 ref(pixel_test_buff
[index
] + j
, ref_dest
, stride
, width
, height
, w0
, round
, shift
, offset
);
294 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
304 bool PixelHarness::check_downscale_t(downscale_t ref
, downscale_t opt
)
306 ALIGN_VAR_16(pixel
, ref_destf
[32 * 32]);
307 ALIGN_VAR_16(pixel
, opt_destf
[32 * 32]);
309 ALIGN_VAR_16(pixel
, ref_desth
[32 * 32]);
310 ALIGN_VAR_16(pixel
, opt_desth
[32 * 32]);
312 ALIGN_VAR_16(pixel
, ref_destv
[32 * 32]);
313 ALIGN_VAR_16(pixel
, opt_destv
[32 * 32]);
315 ALIGN_VAR_16(pixel
, ref_destc
[32 * 32]);
316 ALIGN_VAR_16(pixel
, opt_destc
[32 * 32]);
318 intptr_t src_stride
= 64;
319 intptr_t dst_stride
= 32;
323 for (int i
= 0; i
< ITERS
; i
++)
325 int index
= i
% TEST_CASES
;
326 ref(pixel_test_buff
[index
] + j
, ref_destf
, ref_desth
, ref_destv
,
327 ref_destc
, src_stride
, dst_stride
, bx
, by
);
328 checked(opt
, pixel_test_buff
[index
] + j
, opt_destf
, opt_desth
, opt_destv
,
329 opt_destc
, src_stride
, dst_stride
, bx
, by
);
331 if (memcmp(ref_destf
, opt_destf
, 32 * 32 * sizeof(pixel
)))
333 if (memcmp(ref_desth
, opt_desth
, 32 * 32 * sizeof(pixel
)))
335 if (memcmp(ref_destv
, opt_destv
, 32 * 32 * sizeof(pixel
)))
337 if (memcmp(ref_destc
, opt_destc
, 32 * 32 * sizeof(pixel
)))
347 bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref
, cvt32to16_shr_t opt
)
349 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
350 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
352 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
353 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
356 intptr_t stride
= STRIDE
;
357 for (int i
= 0; i
< ITERS
; i
++)
359 int shift
= (rand() % 7 + 1);
361 int index
= i
% TEST_CASES
;
362 checked(opt
, opt_dest
, int_test_buff
[index
] + j
, stride
, shift
, (int)STRIDE
);
363 ref(ref_dest
, int_test_buff
[index
] + j
, stride
, shift
, (int)STRIDE
);
365 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
375 bool PixelHarness::check_cvt16to32_shl_t(cvt16to32_shl_t ref
, cvt16to32_shl_t opt
)
377 ALIGN_VAR_16(int32_t, ref_dest
[64 * 64]);
378 ALIGN_VAR_16(int32_t, opt_dest
[64 * 64]);
381 intptr_t stride
= STRIDE
;
382 for (int i
= 0; i
< ITERS
; i
++)
384 int shift
= (rand() % 7 + 1);
386 int index
= i
% TEST_CASES
;
387 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)stride
);
388 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)stride
);
390 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int32_t)))
400 bool PixelHarness::check_cvt16to32_shr_t(cvt16to32_shr_t ref
, cvt16to32_shr_t opt
)
402 ALIGN_VAR_16(int32_t, ref_dest
[64 * 64]);
403 ALIGN_VAR_16(int32_t, opt_dest
[64 * 64]);
405 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
406 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
409 intptr_t stride
= STRIDE
;
410 for (int i
= 0; i
< ITERS
; i
++)
412 int shift
= (rand() % 7 + 1);
414 int index
= i
% TEST_CASES
;
415 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)stride
);
416 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)stride
);
418 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int32_t)))
428 bool PixelHarness::check_cvt32to16_shl_t(cvt32to16_shl_t ref
, cvt32to16_shl_t opt
)
430 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
431 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
433 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
434 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
437 intptr_t stride
= STRIDE
;
438 for (int i
= 0; i
< ITERS
; i
++)
440 int shift
= (rand() % 7 + 1);
442 int index
= i
% TEST_CASES
;
443 checked(opt
, opt_dest
, int_test_buff
[index
] + j
, stride
, shift
);
444 ref(ref_dest
, int_test_buff
[index
] + j
, stride
, shift
);
446 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
456 bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref
, copy_cnt_t opt
)
458 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
459 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
461 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
462 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
465 intptr_t stride
= STRIDE
;
466 for (int i
= 0; i
< ITERS
; i
++)
468 int index
= i
% TEST_CASES
;
469 int opt_cnt
= (int)checked(opt
, opt_dest
, short_test_buff1
[index
] + j
, stride
);
470 int ref_cnt
= ref(ref_dest
, short_test_buff1
[index
] + j
, stride
);
472 if ((ref_cnt
!= opt_cnt
) || memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
482 bool PixelHarness::check_copy_shr_t(copy_shr_t ref
, copy_shr_t opt
)
484 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
485 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
487 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
488 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
491 intptr_t stride
= STRIDE
;
492 for (int i
= 0; i
< ITERS
; i
++)
494 int shift
= (rand() % 7 + 1);
496 int index
= i
% TEST_CASES
;
497 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)STRIDE
);
498 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
, (int)STRIDE
);
500 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
510 bool PixelHarness::check_copy_shl_t(copy_shl_t ref
, copy_shl_t opt
)
512 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
513 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
515 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
516 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
519 intptr_t stride
= STRIDE
;
520 for (int i
= 0; i
< ITERS
; i
++)
522 int shift
= (rand() % 7 + 1);
524 int index
= i
% TEST_CASES
;
525 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
);
526 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
);
528 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
538 bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref
, pixelavg_pp_t opt
)
540 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
541 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
545 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
546 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
548 intptr_t stride
= STRIDE
;
549 for (int i
= 0; i
< ITERS
; i
++)
551 int index1
= rand() % TEST_CASES
;
552 int index2
= rand() % TEST_CASES
;
553 checked(ref
, ref_dest
, stride
, pixel_test_buff
[index1
] + j
,
554 stride
, pixel_test_buff
[index2
] + j
, stride
, 32);
555 opt(opt_dest
, stride
, pixel_test_buff
[index1
] + j
,
556 stride
, pixel_test_buff
[index2
] + j
, stride
, 32);
558 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
568 bool PixelHarness::check_copy_pp(copy_pp_t ref
, copy_pp_t opt
)
570 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
571 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
573 // we don't know the partition size so we are checking the entire output buffer so
574 // we must initialize the buffers
575 memset(ref_dest
, 0, sizeof(ref_dest
));
576 memset(opt_dest
, 0, sizeof(opt_dest
));
579 intptr_t stride
= STRIDE
;
580 for (int i
= 0; i
< ITERS
; i
++)
582 int index
= i
% TEST_CASES
;
583 checked(opt
, opt_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
584 ref(ref_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
586 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
596 bool PixelHarness::check_copy_sp(copy_sp_t ref
, copy_sp_t opt
)
598 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
599 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
601 // we don't know the partition size so we are checking the entire output buffer so
602 // we must initialize the buffers
603 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
604 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
607 intptr_t stride1
= 64, stride2
= STRIDE
;
608 for (int i
= 0; i
< ITERS
; i
++)
610 int index
= i
% TEST_CASES
;
611 checked(opt
, opt_dest
, stride1
, short_test_buff1
[index
] + j
, stride2
);
612 ref(ref_dest
, stride1
, short_test_buff1
[index
] + j
, stride2
);
614 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
624 bool PixelHarness::check_copy_ps(copy_ps_t ref
, copy_ps_t opt
)
626 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
627 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
629 // we don't know the partition size so we are checking the entire output buffer so
630 // we must initialize the buffers
631 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
632 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
635 intptr_t stride
= STRIDE
;
636 for (int i
= 0; i
< ITERS
; i
++)
638 int index
= i
% TEST_CASES
;
639 checked(opt
, opt_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
640 ref(ref_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
642 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
652 bool PixelHarness::check_copy_ss(copy_ss_t ref
, copy_ss_t opt
)
654 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
655 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
657 // we don't know the partition size so we are checking the entire output buffer so
658 // we must initialize the buffers
659 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
660 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
663 intptr_t stride
= STRIDE
;
664 for (int i
= 0; i
< ITERS
; i
++)
666 int index
= i
% TEST_CASES
;
667 checked(opt
, opt_dest
, stride
, short_test_buff1
[index
] + j
, stride
);
668 ref(ref_dest
, stride
, short_test_buff1
[index
] + j
, stride
);
670 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
680 bool PixelHarness::check_blockfill_s(blockfill_s_t ref
, blockfill_s_t opt
)
682 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
683 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
685 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
686 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
688 intptr_t stride
= 64;
689 for (int i
= 0; i
< ITERS
; i
++)
691 int16_t value
= (rand() % SHORT_MAX
) + 1;
693 checked(opt
, opt_dest
, stride
, value
);
694 ref(ref_dest
, stride
, value
);
696 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
705 bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref
, pixel_sub_ps_t opt
)
707 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
708 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
710 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
711 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
714 intptr_t stride2
= 64, stride
= STRIDE
;
715 for (int i
= 0; i
< 1; i
++)
717 int index1
= rand() % TEST_CASES
;
718 int index2
= rand() % TEST_CASES
;
719 checked(opt
, opt_dest
, stride2
, pixel_test_buff
[index1
] + j
,
720 pixel_test_buff
[index2
] + j
, stride
, stride
);
721 ref(ref_dest
, stride2
, pixel_test_buff
[index1
] + j
,
722 pixel_test_buff
[index2
] + j
, stride
, stride
);
724 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
734 bool PixelHarness::check_scale_pp(scale_t ref
, scale_t opt
)
736 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
737 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
739 memset(ref_dest
, 0, sizeof(ref_dest
));
740 memset(opt_dest
, 0, sizeof(opt_dest
));
743 intptr_t stride
= STRIDE
;
744 for (int i
= 0; i
< ITERS
; i
++)
746 int index
= i
% TEST_CASES
;
747 checked(opt
, opt_dest
, pixel_test_buff
[index
] + j
, stride
);
748 ref(ref_dest
, pixel_test_buff
[index
] + j
, stride
);
750 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
760 bool PixelHarness::check_transpose(transpose_t ref
, transpose_t opt
)
762 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
763 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
765 memset(ref_dest
, 0, sizeof(ref_dest
));
766 memset(opt_dest
, 0, sizeof(opt_dest
));
769 intptr_t stride
= STRIDE
;
770 for (int i
= 0; i
< ITERS
; i
++)
772 int index
= i
% TEST_CASES
;
773 checked(opt
, opt_dest
, pixel_test_buff
[index
] + j
, stride
);
774 ref(ref_dest
, pixel_test_buff
[index
] + j
, stride
);
776 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
786 bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref
, pixel_add_ps_t opt
)
788 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
789 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
791 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
792 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
795 intptr_t stride2
= 64, stride
= STRIDE
;
796 for (int i
= 0; i
< ITERS
; i
++)
798 int index1
= rand() % TEST_CASES
;
799 int index2
= rand() % TEST_CASES
;
800 checked(opt
, opt_dest
, stride2
, pixel_test_buff
[index1
] + j
, short_test_buff
[index2
] + j
, stride
, stride
);
801 ref(ref_dest
, stride2
, pixel_test_buff
[index1
] + j
, short_test_buff
[index2
] + j
, stride
, stride
);
803 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
813 bool PixelHarness::check_pixel_var(var_t ref
, var_t opt
)
817 intptr_t stride
= STRIDE
;
819 for (int i
= 0; i
< ITERS
; i
++)
821 int index
= i
% TEST_CASES
;
822 uint64_t vres
= checked(opt
, pixel_test_buff
[index
], stride
);
823 uint64_t cres
= ref(pixel_test_buff
[index
], stride
);
834 bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref
, ssim_4x4x2_core_t opt
)
836 ALIGN_VAR_32(int, sum0
[2][4]);
837 ALIGN_VAR_32(int, sum1
[2][4]);
839 for (int i
= 0; i
< ITERS
; i
++)
841 intptr_t stride
= rand() % 64;
842 int index1
= rand() % TEST_CASES
;
843 int index2
= rand() % TEST_CASES
;
844 ref(pixel_test_buff
[index1
] + i
, stride
, pixel_test_buff
[index2
] + i
, stride
, sum0
);
845 checked(opt
, pixel_test_buff
[index1
] + i
, stride
, pixel_test_buff
[index2
] + i
, stride
, sum1
);
847 if (memcmp(sum0
, sum1
, sizeof(sum0
)))
856 /* TODO: This function causes crashes when checked. Is this a real bug? */
857 bool PixelHarness::check_ssim_end(ssim_end4_t ref
, ssim_end4_t opt
)
859 ALIGN_VAR_32(int, sum0
[5][4]);
860 ALIGN_VAR_32(int, sum1
[5][4]);
862 for (int i
= 0; i
< ITERS
; i
++)
864 for (int j
= 0; j
< 5; j
++)
866 for (int k
= 0; k
< 4; k
++)
868 sum0
[j
][k
] = rand() % (1 << 12);
869 sum1
[j
][k
] = rand() % (1 << 12);
873 int width
= (rand() % 4) + 1; // range[1-4]
874 float cres
= ref(sum0
, sum1
, width
);
875 float vres
= checked_float(opt
, sum0
, sum1
, width
);
876 if (fabs(vres
- cres
) > 0.00001)
885 bool PixelHarness::check_addAvg(addAvg_t ref
, addAvg_t opt
)
887 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
888 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
892 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
893 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
894 intptr_t stride
= STRIDE
;
896 for (int i
= 0; i
< ITERS
; i
++)
898 int index1
= rand() % TEST_CASES
;
899 int index2
= rand() % TEST_CASES
;
900 ref(short_test_buff2
[index1
] + j
, short_test_buff2
[index2
] + j
, ref_dest
, stride
, stride
, stride
);
901 checked(opt
, short_test_buff2
[index1
] + j
, short_test_buff2
[index2
] + j
, opt_dest
, stride
, stride
, stride
);
902 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
912 bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref
, saoCuOrgE0_t opt
)
914 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
915 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
917 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
918 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
922 for (int i
= 0; i
< ITERS
; i
++)
924 int width
= 16 * (rand() % 4 + 1);
925 int8_t sign
= rand() % 3;
931 ref(ref_dest
, psbuf1
+ j
, width
, sign
);
932 checked(opt
, opt_dest
, psbuf1
+ j
, width
, sign
);
934 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
944 bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref
, planecopy_sp_t opt
)
946 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
947 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
949 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
950 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
952 int width
= 16 + rand() % 48;
953 int height
= 16 + rand() % 48;
954 intptr_t srcStride
= 64;
955 intptr_t dstStride
= width
;
958 for (int i
= 0; i
< ITERS
; i
++)
960 int index
= i
% TEST_CASES
;
961 checked(opt
, ushort_test_buff
[index
] + j
, srcStride
, opt_dest
, dstStride
, width
, height
, (int)8, (uint16_t)255);
962 ref(ushort_test_buff
[index
] + j
, srcStride
, ref_dest
, dstStride
, width
, height
, (int)8, (uint16_t)255);
964 if (memcmp(ref_dest
, opt_dest
, width
* height
* sizeof(pixel
)))
974 bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref
, planecopy_cp_t opt
)
976 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
977 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
979 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
980 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
982 int width
= 16 + rand() % 48;
983 int height
= 16 + rand() % 48;
984 intptr_t srcStride
= 64;
985 intptr_t dstStride
= width
;
988 for (int i
= 0; i
< ITERS
; i
++)
990 int index
= i
% TEST_CASES
;
991 checked(opt
, uchar_test_buff
[index
] + j
, srcStride
, opt_dest
, dstStride
, width
, height
, (int)2);
992 ref(uchar_test_buff
[index
] + j
, srcStride
, ref_dest
, dstStride
, width
, height
, (int)2);
994 if (memcmp(ref_dest
, opt_dest
, width
* height
* sizeof(pixel
)))
1004 bool PixelHarness::testPartition(int part
, const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1008 if (!check_pixelcmp(ref
.satd
[part
], opt
.satd
[part
]))
1010 printf("satd[%s]: failed!\n", lumaPartStr
[part
]);
1015 if (opt
.sa8d_inter
[part
])
1017 if (!check_pixelcmp(ref
.sa8d_inter
[part
], opt
.sa8d_inter
[part
]))
1019 printf("sa8d_inter[%s]: failed!\n", lumaPartStr
[part
]);
1026 if (!check_pixelcmp(ref
.sad
[part
], opt
.sad
[part
]))
1028 printf("sad[%s]: failed!\n", lumaPartStr
[part
]);
1033 if (opt
.sse_pp
[part
])
1035 if (!check_pixelcmp(ref
.sse_pp
[part
], opt
.sse_pp
[part
]))
1037 printf("sse_pp[%s]: failed!\n", lumaPartStr
[part
]);
1042 if (opt
.sse_sp
[part
])
1044 if (!check_pixelcmp_sp(ref
.sse_sp
[part
], opt
.sse_sp
[part
]))
1046 printf("sse_sp[%s]: failed!\n", lumaPartStr
[part
]);
1051 if (opt
.sse_ss
[part
])
1053 if (!check_pixelcmp_ss(ref
.sse_ss
[part
], opt
.sse_ss
[part
]))
1055 printf("sse_ss[%s]: failed!\n", lumaPartStr
[part
]);
1060 if (opt
.sad_x3
[part
])
1062 if (!check_pixelcmp_x3(ref
.sad_x3
[part
], opt
.sad_x3
[part
]))
1064 printf("sad_x3[%s]: failed!\n", lumaPartStr
[part
]);
1069 if (opt
.sad_x4
[part
])
1071 if (!check_pixelcmp_x4(ref
.sad_x4
[part
], opt
.sad_x4
[part
]))
1073 printf("sad_x4[%s]: failed!\n", lumaPartStr
[part
]);
1078 if (opt
.pixelavg_pp
[part
])
1080 if (!check_pixelavg_pp(ref
.pixelavg_pp
[part
], opt
.pixelavg_pp
[part
]))
1082 printf("pixelavg_pp[%s]: failed!\n", lumaPartStr
[part
]);
1087 if (opt
.luma_copy_pp
[part
])
1089 if (!check_copy_pp(ref
.luma_copy_pp
[part
], opt
.luma_copy_pp
[part
]))
1091 printf("luma_copy_pp[%s] failed\n", lumaPartStr
[part
]);
1096 if (opt
.luma_copy_sp
[part
])
1098 if (!check_copy_sp(ref
.luma_copy_sp
[part
], opt
.luma_copy_sp
[part
]))
1100 printf("luma_copy_sp[%s] failed\n", lumaPartStr
[part
]);
1105 if (opt
.luma_copy_ps
[part
])
1107 if (!check_copy_ps(ref
.luma_copy_ps
[part
], opt
.luma_copy_ps
[part
]))
1109 printf("luma_copy_ps[%s] failed\n", lumaPartStr
[part
]);
1114 if (opt
.luma_copy_ss
[part
])
1116 if (!check_copy_ss(ref
.luma_copy_ss
[part
], opt
.luma_copy_ss
[part
]))
1118 printf("luma_copy_ss[%s] failed\n", lumaPartStr
[part
]);
1123 if (opt
.luma_addAvg
[part
])
1125 if (!check_addAvg(ref
.luma_addAvg
[part
], opt
.luma_addAvg
[part
]))
1127 printf("luma_addAvg[%s] failed\n", lumaPartStr
[part
]);
1132 if (part
< NUM_SQUARE_BLOCKS
)
1134 if (opt
.luma_sub_ps
[part
])
1136 if (!check_pixel_sub_ps(ref
.luma_sub_ps
[part
], opt
.luma_sub_ps
[part
]))
1138 printf("luma_sub_ps[%s] failed\n", lumaPartStr
[part
]);
1143 if (opt
.luma_add_ps
[part
])
1145 if (!check_pixel_add_ps(ref
.luma_add_ps
[part
], opt
.luma_add_ps
[part
]))
1147 printf("luma_add_ps[%s] failed\n", lumaPartStr
[part
]);
1153 for (int i
= 0; i
< X265_CSP_COUNT
; i
++)
1155 if (opt
.chroma
[i
].copy_pp
[part
])
1157 if (!check_copy_pp(ref
.chroma
[i
].copy_pp
[part
], opt
.chroma
[i
].copy_pp
[part
]))
1159 printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1163 if (opt
.chroma
[i
].copy_sp
[part
])
1165 if (!check_copy_sp(ref
.chroma
[i
].copy_sp
[part
], opt
.chroma
[i
].copy_sp
[part
]))
1167 printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1171 if (opt
.chroma
[i
].copy_ps
[part
])
1173 if (!check_copy_ps(ref
.chroma
[i
].copy_ps
[part
], opt
.chroma
[i
].copy_ps
[part
]))
1175 printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1179 if (opt
.chroma
[i
].copy_ss
[part
])
1181 if (!check_copy_ss(ref
.chroma
[i
].copy_ss
[part
], opt
.chroma
[i
].copy_ss
[part
]))
1183 printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1187 if (opt
.chroma
[i
].addAvg
[part
])
1189 if (!check_addAvg(ref
.chroma
[i
].addAvg
[part
], opt
.chroma
[i
].addAvg
[part
]))
1191 printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1195 if (part
< NUM_SQUARE_BLOCKS
)
1197 if (opt
.chroma
[i
].sub_ps
[part
])
1199 if (!check_pixel_sub_ps(ref
.chroma
[i
].sub_ps
[part
], opt
.chroma
[i
].sub_ps
[part
]))
1201 printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1205 if (opt
.chroma
[i
].add_ps
[part
])
1207 if (!check_pixel_add_ps(ref
.chroma
[i
].add_ps
[part
], opt
.chroma
[i
].add_ps
[part
]))
1209 printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1219 bool PixelHarness::testCorrectness(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1221 for (int size
= 4; size
<= 64; size
*= 2)
1223 int part
= partitionFromSizes(size
, size
); // 2Nx2N
1224 if (!testPartition(part
, ref
, opt
)) return false;
1228 part
= partitionFromSizes(size
, size
>> 1); // 2NxN
1229 if (!testPartition(part
, ref
, opt
)) return false;
1230 part
= partitionFromSizes(size
>> 1, size
); // Nx2N
1231 if (!testPartition(part
, ref
, opt
)) return false;
1236 part
= partitionFromSizes(size
, size
>> 2);
1237 if (!testPartition(part
, ref
, opt
)) return false;
1238 part
= partitionFromSizes(size
, 3 * (size
>> 2));
1239 if (!testPartition(part
, ref
, opt
)) return false;
1241 part
= partitionFromSizes(size
>> 2, size
);
1242 if (!testPartition(part
, ref
, opt
)) return false;
1243 part
= partitionFromSizes(3 * (size
>> 2), size
);
1244 if (!testPartition(part
, ref
, opt
)) return false;
1248 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
1250 if (opt
.calcresidual
[i
])
1252 if (!check_calresidual(ref
.calcresidual
[i
], opt
.calcresidual
[i
]))
1254 printf("calcresidual width: %d failed!\n", 4 << i
);
1260 if (!check_pixelcmp(ref
.sa8d
[i
], opt
.sa8d
[i
]))
1262 printf("sa8d[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1267 if ((i
<= BLOCK_32x32
) && opt
.ssd_s
[i
])
1269 if (!check_ssd_s(ref
.ssd_s
[i
], opt
.ssd_s
[i
]))
1271 printf("ssd_s[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1276 if (opt
.blockfill_s
[i
])
1278 if (!check_blockfill_s(ref
.blockfill_s
[i
], opt
.blockfill_s
[i
]))
1280 printf("blockfill_s[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1284 if (opt
.transpose
[i
])
1286 if (!check_transpose(ref
.transpose
[i
], opt
.transpose
[i
]))
1288 printf("transpose[%dx%d] failed\n", 4 << i
, 4 << i
);
1295 if (!check_pixel_var(ref
.var
[i
], opt
.var
[i
]))
1297 printf("var[%dx%d] failed\n", 4 << i
, 4 << i
);
1302 if ((i
< BLOCK_64x64
) && opt
.copy_cnt
[i
])
1304 if (!check_copy_cnt_t(ref
.copy_cnt
[i
], opt
.copy_cnt
[i
]))
1306 printf("copy_cnt[%dx%d] failed!\n", 4 << i
, 4 << i
);
1311 if ((i
< BLOCK_64x64
) && opt
.cvt16to32_shr
[i
])
1313 if (!check_cvt16to32_shr_t(ref
.cvt16to32_shr
[i
], opt
.cvt16to32_shr
[i
]))
1315 printf("cvt16to32_shr failed!\n");
1320 if ((i
< BLOCK_64x64
) && opt
.cvt32to16_shl
[i
])
1322 if (!check_cvt32to16_shl_t(ref
.cvt32to16_shl
[i
], opt
.cvt32to16_shl
[i
]))
1324 printf("cvt32to16_shl failed!\n");
1329 if ((i
< BLOCK_64x64
) && opt
.copy_shl
[i
])
1331 if (!check_copy_shl_t(ref
.copy_shl
[i
], opt
.copy_shl
[i
]))
1333 printf("copy_shl[%dx%d] failed!\n", 4 << i
, 4 << i
);
1340 if (opt
.cvt32to16_shr
)
1342 if (!check_cvt32to16_shr_t(ref
.cvt32to16_shr
, opt
.cvt32to16_shr
))
1344 printf("cvt32to16 failed!\n");
1349 if (opt
.cvt16to32_shl
)
1351 if (!check_cvt16to32_shl_t(ref
.cvt16to32_shl
, opt
.cvt16to32_shl
))
1353 printf("cvt16to32_shl failed!\n");
1360 if (!check_weightp(ref
.weight_pp
, opt
.weight_pp
))
1362 printf("Weighted Prediction (pixel) failed!\n");
1369 if (!check_weightp(ref
.weight_sp
, opt
.weight_sp
))
1371 printf("Weighted Prediction (short) failed!\n");
1376 if (opt
.frame_init_lowres_core
)
1378 if (!check_downscale_t(ref
.frame_init_lowres_core
, opt
.frame_init_lowres_core
))
1380 printf("downscale failed!\n");
1385 if (opt
.scale1D_128to64
)
1387 if (!check_scale_pp(ref
.scale1D_128to64
, opt
.scale1D_128to64
))
1389 printf("scale1D_128to64 failed!\n");
1394 if (opt
.scale2D_64to32
)
1396 if (!check_scale_pp(ref
.scale2D_64to32
, opt
.scale2D_64to32
))
1398 printf("scale2D_64to32 failed!\n");
1403 if (opt
.ssim_4x4x2_core
)
1405 if (!check_ssim_4x4x2_core(ref
.ssim_4x4x2_core
, opt
.ssim_4x4x2_core
))
1407 printf("ssim_end_4 failed!\n");
1414 if (!check_ssim_end(ref
.ssim_end_4
, opt
.ssim_end_4
))
1416 printf("ssim_end_4 failed!\n");
1423 if (!check_saoCuOrgE0_t(ref
.saoCuOrgE0
, opt
.saoCuOrgE0
))
1425 printf("SAO_EO_0 failed\n");
1430 if (opt
.planecopy_sp
)
1432 if (!check_planecopy_sp(ref
.planecopy_sp
, opt
.planecopy_sp
))
1434 printf("planecopy_sp failed\n");
1439 if (opt
.planecopy_cp
)
1441 if (!check_planecopy_cp(ref
.planecopy_cp
, opt
.planecopy_cp
))
1443 printf("planecopy_cp failed\n");
1450 if (!check_copy_shr_t(ref
.copy_shr
, opt
.copy_shr
))
1452 printf("copy_shr failed!\n");
1460 void PixelHarness::measurePartition(int part
, const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1462 ALIGN_VAR_16(int, cres
[16]);
1463 pixel
*fref
= pbuf2
+ 2 * INCR
;
1465 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1469 HEADER("satd[%s]", lumaPartStr
[part
]);
1470 REPORT_SPEEDUP(opt
.satd
[part
], ref
.satd
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1473 if (opt
.pixelavg_pp
[part
])
1475 HEADER("avg_pp[%s]", lumaPartStr
[part
]);
1476 REPORT_SPEEDUP(opt
.pixelavg_pp
[part
], ref
.pixelavg_pp
[part
], pbuf1
, STRIDE
, pbuf2
, STRIDE
, pbuf3
, STRIDE
, 32);
1479 if (opt
.sa8d_inter
[part
])
1481 HEADER("sa8d[%s]", lumaPartStr
[part
]);
1482 REPORT_SPEEDUP(opt
.sa8d_inter
[part
], ref
.sa8d_inter
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1487 HEADER("sad[%s]", lumaPartStr
[part
]);
1488 REPORT_SPEEDUP(opt
.sad
[part
], ref
.sad
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1491 if (opt
.sad_x3
[part
])
1493 HEADER("sad_x3[%s]", lumaPartStr
[part
]);
1494 REPORT_SPEEDUP(opt
.sad_x3
[part
], ref
.sad_x3
[part
], pbuf1
, fref
, fref
+ 1, fref
- 1, FENC_STRIDE
+ 5, &cres
[0]);
1497 if (opt
.sad_x4
[part
])
1499 HEADER("sad_x4[%s]", lumaPartStr
[part
]);
1500 REPORT_SPEEDUP(opt
.sad_x4
[part
], ref
.sad_x4
[part
], pbuf1
, fref
, fref
+ 1, fref
- 1, fref
- INCR
, FENC_STRIDE
+ 5, &cres
[0]);
1503 if (opt
.sse_pp
[part
])
1505 HEADER("sse_pp[%s]", lumaPartStr
[part
]);
1506 REPORT_SPEEDUP(opt
.sse_pp
[part
], ref
.sse_pp
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1509 if (opt
.sse_sp
[part
])
1511 HEADER("sse_sp[%s]", lumaPartStr
[part
]);
1512 REPORT_SPEEDUP(opt
.sse_sp
[part
], ref
.sse_sp
[part
], (int16_t*)pbuf1
, STRIDE
, fref
, STRIDE
);
1515 if (opt
.sse_ss
[part
])
1517 HEADER("sse_ss[%s]", lumaPartStr
[part
]);
1518 REPORT_SPEEDUP(opt
.sse_ss
[part
], ref
.sse_ss
[part
], (int16_t*)pbuf1
, STRIDE
, (int16_t*)fref
, STRIDE
);
1521 if (opt
.luma_copy_pp
[part
])
1523 HEADER("luma_copy_pp[%s]", lumaPartStr
[part
]);
1524 REPORT_SPEEDUP(opt
.luma_copy_pp
[part
], ref
.luma_copy_pp
[part
], pbuf1
, 64, pbuf2
, 128);
1527 if (opt
.luma_copy_sp
[part
])
1529 HEADER("luma_copy_sp[%s]", lumaPartStr
[part
]);
1530 REPORT_SPEEDUP(opt
.luma_copy_sp
[part
], ref
.luma_copy_sp
[part
], pbuf1
, 64, sbuf3
, 128);
1533 if (opt
.luma_copy_ps
[part
])
1535 HEADER("luma_copy_ps[%s]", lumaPartStr
[part
]);
1536 REPORT_SPEEDUP(opt
.luma_copy_ps
[part
], ref
.luma_copy_ps
[part
], sbuf1
, 64, pbuf1
, 128);
1538 if (opt
.luma_copy_ss
[part
])
1540 HEADER("luma_copy_ss[%s]", lumaPartStr
[part
]);
1541 REPORT_SPEEDUP(opt
.luma_copy_ss
[part
], ref
.luma_copy_ss
[part
], sbuf1
, 64, sbuf2
, 128);
1543 if (opt
.luma_addAvg
[part
])
1545 HEADER("luma_addAvg[%s]", lumaPartStr
[part
]);
1546 REPORT_SPEEDUP(opt
.luma_addAvg
[part
], ref
.luma_addAvg
[part
], sbuf1
, sbuf2
, pbuf1
, STRIDE
, STRIDE
, STRIDE
);
1548 if (part
< NUM_SQUARE_BLOCKS
)
1550 if (opt
.luma_sub_ps
[part
])
1552 HEADER("luma_sub_ps[%s]", lumaPartStr
[part
]);
1553 REPORT_SPEEDUP(opt
.luma_sub_ps
[part
], ref
.luma_sub_ps
[part
], (int16_t*)pbuf1
, FENC_STRIDE
, pbuf2
, pbuf1
, STRIDE
, STRIDE
);
1555 if (opt
.luma_add_ps
[part
])
1557 HEADER("luma_add_ps[%s]", lumaPartStr
[part
]);
1558 REPORT_SPEEDUP(opt
.luma_add_ps
[part
], ref
.luma_add_ps
[part
], pbuf1
, FENC_STRIDE
, pbuf2
, sbuf1
, STRIDE
, STRIDE
);
1562 for (int i
= 0; i
< X265_CSP_COUNT
; i
++)
1564 if (opt
.chroma
[i
].copy_pp
[part
])
1566 HEADER("[%s] copy_pp[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1567 REPORT_SPEEDUP(opt
.chroma
[i
].copy_pp
[part
], ref
.chroma
[i
].copy_pp
[part
], pbuf1
, 64, pbuf2
, 128);
1569 if (opt
.chroma
[i
].copy_sp
[part
])
1571 HEADER("[%s] copy_sp[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1572 REPORT_SPEEDUP(opt
.chroma
[i
].copy_sp
[part
], ref
.chroma
[i
].copy_sp
[part
], pbuf1
, 64, sbuf3
, 128);
1574 if (opt
.chroma
[i
].copy_ps
[part
])
1576 HEADER("[%s] copy_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1577 REPORT_SPEEDUP(opt
.chroma
[i
].copy_ps
[part
], ref
.chroma
[i
].copy_ps
[part
], sbuf1
, 64, pbuf1
, 128);
1579 if (opt
.chroma
[i
].copy_ss
[part
])
1581 HEADER("[%s] copy_ss[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1582 REPORT_SPEEDUP(opt
.chroma
[i
].copy_ss
[part
], ref
.chroma
[i
].copy_ss
[part
], sbuf1
, 64, sbuf2
, 128);
1584 if (opt
.chroma
[i
].addAvg
[part
])
1586 HEADER("[%s] addAvg[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1587 REPORT_SPEEDUP(opt
.chroma
[i
].addAvg
[part
], ref
.chroma
[i
].addAvg
[part
], sbuf1
, sbuf2
, pbuf1
, STRIDE
, STRIDE
, STRIDE
);
1589 if (part
< NUM_SQUARE_BLOCKS
)
1591 if (opt
.chroma
[i
].sub_ps
[part
])
1593 HEADER("[%s] sub_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1594 REPORT_SPEEDUP(opt
.chroma
[i
].sub_ps
[part
], ref
.chroma
[i
].sub_ps
[part
], (int16_t*)pbuf1
, FENC_STRIDE
, pbuf2
, pbuf1
, STRIDE
, STRIDE
);
1596 if (opt
.chroma
[i
].add_ps
[part
])
1598 HEADER("[%s] add_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1599 REPORT_SPEEDUP(opt
.chroma
[i
].add_ps
[part
], ref
.chroma
[i
].add_ps
[part
], pbuf1
, FENC_STRIDE
, pbuf2
, sbuf1
, STRIDE
, STRIDE
);
1607 void PixelHarness::measureSpeed(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1611 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1612 #define HEADER0(str) printf("%22s", str);
1614 for (int size
= 4; size
<= 64; size
*= 2)
1616 int part
= partitionFromSizes(size
, size
); // 2Nx2N
1617 measurePartition(part
, ref
, opt
);
1621 part
= partitionFromSizes(size
, size
>> 1); // 2NxN
1622 measurePartition(part
, ref
, opt
);
1623 part
= partitionFromSizes(size
>> 1, size
); // Nx2N
1624 measurePartition(part
, ref
, opt
);
1629 part
= partitionFromSizes(size
, size
>> 2);
1630 measurePartition(part
, ref
, opt
);
1631 part
= partitionFromSizes(size
, 3 * (size
>> 2));
1632 measurePartition(part
, ref
, opt
);
1634 part
= partitionFromSizes(size
>> 2, size
);
1635 measurePartition(part
, ref
, opt
);
1636 part
= partitionFromSizes(3 * (size
>> 2), size
);
1637 measurePartition(part
, ref
, opt
);
1641 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
1643 if ((i
<= BLOCK_32x32
) && opt
.ssd_s
[i
])
1645 HEADER("ssd_s[%dx%d]", 4 << i
, 4 << i
);
1646 REPORT_SPEEDUP(opt
.ssd_s
[i
], ref
.ssd_s
[i
], sbuf1
, STRIDE
);
1650 HEADER("sa8d[%dx%d]", 4 << i
, 4 << i
);
1651 REPORT_SPEEDUP(opt
.sa8d
[i
], ref
.sa8d
[i
], pbuf1
, STRIDE
, pbuf2
, STRIDE
);
1653 if (opt
.calcresidual
[i
])
1655 HEADER("residual[%dx%d]", 4 << i
, 4 << i
);
1656 REPORT_SPEEDUP(opt
.calcresidual
[i
], ref
.calcresidual
[i
], pbuf1
, pbuf2
, sbuf1
, 64);
1659 if (opt
.blockfill_s
[i
])
1661 HEADER("blkfill[%dx%d]", 4 << i
, 4 << i
);
1662 REPORT_SPEEDUP(opt
.blockfill_s
[i
], ref
.blockfill_s
[i
], sbuf1
, 64, SHORT_MAX
);
1665 if (opt
.transpose
[i
])
1667 HEADER("transpose[%dx%d]", 4 << i
, 4 << i
);
1668 REPORT_SPEEDUP(opt
.transpose
[i
], ref
.transpose
[i
], pbuf1
, pbuf2
, STRIDE
);
1673 HEADER("var[%dx%d]", 4 << i
, 4 << i
);
1674 REPORT_SPEEDUP(opt
.var
[i
], ref
.var
[i
], pbuf1
, STRIDE
);
1677 if ((i
< BLOCK_64x64
) && opt
.cvt16to32_shr
[i
])
1679 HEADER("cvt16to32_shr[%dx%d]", 4 << i
, 4 << i
);
1680 REPORT_SPEEDUP(opt
.cvt16to32_shr
[i
], ref
.cvt16to32_shr
[i
], ibuf1
, sbuf2
, STRIDE
, 3, 4);
1683 if ((i
< BLOCK_64x64
) && opt
.cvt32to16_shl
[i
])
1685 HEADER("cvt32to16_shl[%dx%d]", 4 << i
, 4 << i
);
1686 REPORT_SPEEDUP(opt
.cvt32to16_shl
[i
], ref
.cvt32to16_shl
[i
], sbuf2
, ibuf1
, STRIDE
, 3);
1689 if ((i
< BLOCK_64x64
) && opt
.copy_cnt
[i
])
1691 HEADER("copy_cnt[%dx%d]", 4 << i
, 4 << i
);
1692 REPORT_SPEEDUP(opt
.copy_cnt
[i
], ref
.copy_cnt
[i
], sbuf1
, sbuf2
, STRIDE
);
1695 if ((i
< BLOCK_64x64
) && opt
.copy_shl
[i
])
1697 HEADER("copy_shl[%dx%d]", 4 << i
, 4 << i
);
1698 REPORT_SPEEDUP(opt
.copy_shl
[i
], ref
.copy_shl
[i
], sbuf1
, sbuf2
, STRIDE
, 64);
1703 if (opt
.cvt32to16_shr
)
1705 HEADER0("cvt32to16_shr");
1706 REPORT_SPEEDUP(opt
.cvt32to16_shr
, ref
.cvt32to16_shr
, sbuf1
, ibuf1
, 64, 5, 64);
1709 if (opt
.cvt16to32_shl
)
1711 HEADER0("cvt16to32_shl");
1712 REPORT_SPEEDUP(opt
.cvt16to32_shl
, ref
.cvt16to32_shl
, ibuf1
, sbuf1
, 64, 5, 64);
1717 HEADER0("weight_pp");
1718 REPORT_SPEEDUP(opt
.weight_pp
, ref
.weight_pp
, pbuf1
, pbuf2
, 64, 32, 32, 128, 1 << 9, 10, 100);
1723 HEADER0("weight_sp");
1724 REPORT_SPEEDUP(opt
.weight_sp
, ref
.weight_sp
, (int16_t*)sbuf1
, pbuf1
, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
1727 if (opt
.frame_init_lowres_core
)
1729 HEADER0("downscale");
1730 REPORT_SPEEDUP(opt
.frame_init_lowres_core
, ref
.frame_init_lowres_core
, pbuf2
, pbuf1
, pbuf2
, pbuf3
, pbuf4
, 64, 64, 64, 64);
1733 if (opt
.scale1D_128to64
)
1735 HEADER0("scale1D_128to64");
1736 REPORT_SPEEDUP(opt
.scale1D_128to64
, ref
.scale1D_128to64
, pbuf2
, pbuf1
, 64);
1739 if (opt
.scale2D_64to32
)
1741 HEADER0("scale2D_64to32");
1742 REPORT_SPEEDUP(opt
.scale2D_64to32
, ref
.scale2D_64to32
, pbuf2
, pbuf1
, 64);
1745 if (opt
.ssim_4x4x2_core
)
1747 HEADER0("ssim_4x4x2_core");
1748 REPORT_SPEEDUP(opt
.ssim_4x4x2_core
, ref
.ssim_4x4x2_core
, pbuf1
, 64, pbuf2
, 64, (int(*)[4])sbuf1
);
1753 HEADER0("ssim_end_4");
1754 REPORT_SPEEDUP(opt
.ssim_end_4
, ref
.ssim_end_4
, (int(*)[4])pbuf2
, (int(*)[4])pbuf1
, 4);
1759 HEADER0("SAO_EO_0");
1760 REPORT_SPEEDUP(opt
.saoCuOrgE0
, ref
.saoCuOrgE0
, pbuf1
, psbuf1
, 64, 1);
1763 if (opt
.planecopy_sp
)
1765 HEADER0("planecopy_sp");
1766 REPORT_SPEEDUP(opt
.planecopy_sp
, ref
.planecopy_sp
, ushort_test_buff
[0], 64, pbuf1
, 64, 64, 64, 8, 255);
1769 if (opt
.planecopy_cp
)
1771 HEADER0("planecopy_cp");
1772 REPORT_SPEEDUP(opt
.planecopy_cp
, ref
.planecopy_cp
, uchar_test_buff
[0], 64, pbuf1
, 64, 64, 64, 2);
1777 HEADER0("copy_shr");
1778 REPORT_SPEEDUP(opt
.copy_shr
, ref
.copy_shr
, sbuf1
, sbuf2
, 64, 5, 64);