1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
24 #include "pixelharness.h"
25 #include "primitives.h"
29 PixelHarness::PixelHarness()
31 /* [0] --- Random values
34 for (int i
= 0; i
< BUFFSIZE
; i
++)
36 pixel_test_buff
[0][i
] = rand() % PIXEL_MAX
;
37 short_test_buff
[0][i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; // max(SHORT_MIN, min(rand(), SMAX));
38 short_test_buff1
[0][i
] = rand() & PIXEL_MAX
; // For block copy only
39 short_test_buff2
[0][i
] = rand() % 16383; // for addAvg
40 int_test_buff
[0][i
] = rand() % SHORT_MAX
;
41 ushort_test_buff
[0][i
] = rand() % ((1 << 16) - 1);
42 uchar_test_buff
[0][i
] = rand() % ((1 << 8) - 1);
44 pixel_test_buff
[1][i
] = PIXEL_MIN
;
45 short_test_buff
[1][i
] = SMIN
;
46 short_test_buff1
[1][i
] = PIXEL_MIN
;
47 short_test_buff2
[1][i
] = -16384;
48 int_test_buff
[1][i
] = SHORT_MIN
;
49 ushort_test_buff
[1][i
] = PIXEL_MIN
;
50 uchar_test_buff
[1][i
] = PIXEL_MIN
;
52 pixel_test_buff
[2][i
] = PIXEL_MAX
;
53 short_test_buff
[2][i
] = SMAX
;
54 short_test_buff1
[2][i
] = PIXEL_MAX
;
55 short_test_buff2
[2][i
] = 16383;
56 int_test_buff
[2][i
] = SHORT_MAX
;
57 ushort_test_buff
[2][i
] = ((1 << 16) - 1);
58 uchar_test_buff
[2][i
] = 255;
60 pbuf1
[i
] = rand() & PIXEL_MAX
;
61 pbuf2
[i
] = rand() & PIXEL_MAX
;
62 pbuf3
[i
] = rand() & PIXEL_MAX
;
63 pbuf4
[i
] = rand() & PIXEL_MAX
;
65 sbuf1
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; //max(SHORT_MIN, min(rand(), SMAX));
66 sbuf2
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1; //max(SHORT_MIN, min(rand(), SMAX));
67 ibuf1
[i
] = (rand() % (2 * SMAX
+ 1)) - SMAX
- 1;
68 psbuf1
[i
] = (rand() % 65) - 32; // range is between -32 to 32
69 sbuf3
[i
] = rand() % PIXEL_MAX
; // for blockcopy only
73 bool PixelHarness::check_pixelcmp(pixelcmp_t ref
, pixelcmp_t opt
)
76 intptr_t stride
= STRIDE
;
78 for (int i
= 0; i
< ITERS
; i
++)
80 int index1
= rand() % TEST_CASES
;
81 int index2
= rand() % TEST_CASES
;
82 int vres
= (int)checked(opt
, pixel_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
83 int cres
= ref(pixel_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
94 bool PixelHarness::check_pixelcmp_sp(pixelcmp_sp_t ref
, pixelcmp_sp_t opt
)
97 intptr_t stride
= STRIDE
;
99 for (int i
= 0; i
< ITERS
; i
++)
101 int index1
= rand() % TEST_CASES
;
102 int index2
= rand() % TEST_CASES
;
103 int vres
= (int)checked(opt
, short_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
104 int cres
= ref(short_test_buff
[index1
], stride
, pixel_test_buff
[index2
] + j
, stride
);
115 bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref
, pixelcmp_ss_t opt
)
118 intptr_t stride
= STRIDE
;
120 for (int i
= 0; i
< ITERS
; i
++)
122 int index1
= rand() % TEST_CASES
;
123 int index2
= rand() % TEST_CASES
;
124 int vres
= (int)checked(opt
, short_test_buff
[index1
], stride
, short_test_buff
[index2
] + j
, stride
);
125 int cres
= ref(short_test_buff
[index1
], stride
, short_test_buff
[index2
] + j
, stride
);
136 bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref
, pixelcmp_x3_t opt
)
138 ALIGN_VAR_16(int, cres
[16]);
139 ALIGN_VAR_16(int, vres
[16]);
141 intptr_t stride
= FENC_STRIDE
- 5;
142 for (int i
= 0; i
< ITERS
; i
++)
144 int index1
= rand() % TEST_CASES
;
145 int index2
= rand() % TEST_CASES
;
146 checked(opt
, pixel_test_buff
[index1
],
147 pixel_test_buff
[index2
] + j
,
148 pixel_test_buff
[index2
] + j
+ 1,
149 pixel_test_buff
[index2
] + j
+ 2, stride
, &vres
[0]);
150 ref(pixel_test_buff
[index1
],
151 pixel_test_buff
[index2
] + j
,
152 pixel_test_buff
[index2
] + j
+ 1,
153 pixel_test_buff
[index2
] + j
+ 2, stride
, &cres
[0]);
154 if ((vres
[0] != cres
[0]) || ((vres
[1] != cres
[1])) || ((vres
[2] != cres
[2])))
164 bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref
, pixelcmp_x4_t opt
)
166 ALIGN_VAR_16(int, cres
[16]);
167 ALIGN_VAR_16(int, vres
[16]);
169 intptr_t stride
= FENC_STRIDE
- 5;
170 for (int i
= 0; i
< ITERS
; i
++)
172 int index1
= rand() % TEST_CASES
;
173 int index2
= rand() % TEST_CASES
;
174 checked(opt
, pixel_test_buff
[index1
],
175 pixel_test_buff
[index2
] + j
,
176 pixel_test_buff
[index2
] + j
+ 1,
177 pixel_test_buff
[index2
] + j
+ 2,
178 pixel_test_buff
[index2
] + j
+ 3, stride
, &vres
[0]);
179 ref(pixel_test_buff
[index1
],
180 pixel_test_buff
[index2
] + j
,
181 pixel_test_buff
[index2
] + j
+ 1,
182 pixel_test_buff
[index2
] + j
+ 2,
183 pixel_test_buff
[index2
] + j
+ 3, stride
, &cres
[0]);
185 if ((vres
[0] != cres
[0]) || ((vres
[1] != cres
[1])) || ((vres
[2] != cres
[2])) || ((vres
[3] != cres
[3])))
195 bool PixelHarness::check_calresidual(calcresidual_t ref
, calcresidual_t opt
)
197 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
198 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
199 memset(ref_dest
, 0, 64 * 64 * sizeof(int16_t));
200 memset(opt_dest
, 0, 64 * 64 * sizeof(int16_t));
203 intptr_t stride
= STRIDE
;
204 for (int i
= 0; i
< ITERS
; i
++)
206 int index
= i
% TEST_CASES
;
207 checked(opt
, pbuf1
+ j
, pixel_test_buff
[index
] + j
, opt_dest
, stride
);
208 ref(pbuf1
+ j
, pixel_test_buff
[index
] + j
, ref_dest
, stride
);
210 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
220 bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref
, pixel_ssd_s_t opt
)
223 for (int i
= 0; i
< ITERS
; i
++)
225 // NOTE: stride must be multiple of 16, because minimum block is 4x4
226 int stride
= (STRIDE
+ (rand() % STRIDE
)) & ~15;
227 int cres
= ref(sbuf1
+ j
, stride
);
228 int vres
= (int)checked(opt
, sbuf1
+ j
, (intptr_t)stride
);
242 bool PixelHarness::check_weightp(weightp_sp_t ref
, weightp_sp_t opt
)
244 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
245 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
247 memset(ref_dest
, 0, 64 * 64 * sizeof(pixel
));
248 memset(opt_dest
, 0, 64 * 64 * sizeof(pixel
));
250 int width
= 2 * (rand() % 32 + 1);
252 int w0
= rand() % 128;
253 int shift
= rand() % 15;
254 int round
= shift
? (1 << (shift
- 1)) : 0;
255 int offset
= (rand() % 256) - 128;
256 intptr_t stride
= 64;
257 for (int i
= 0; i
< ITERS
; i
++)
259 int index
= i
% TEST_CASES
;
260 checked(opt
, short_test_buff
[index
] + j
, opt_dest
, stride
, stride
, width
, height
, w0
, round
, shift
, offset
);
261 ref(short_test_buff
[index
] + j
, ref_dest
, stride
, stride
, width
, height
, w0
, round
, shift
, offset
);
263 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
273 bool PixelHarness::check_weightp(weightp_pp_t ref
, weightp_pp_t opt
)
275 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
276 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
278 memset(ref_dest
, 0, 64 * 64 * sizeof(pixel
));
279 memset(opt_dest
, 0, 64 * 64 * sizeof(pixel
));
281 int width
= 16 * (rand() % 4 + 1);
283 int w0
= rand() % 128;
284 int shift
= rand() % 15;
285 int round
= shift
? (1 << (shift
- 1)) : 0;
286 int offset
= (rand() % 256) - 128;
287 intptr_t stride
= 64;
288 for (int i
= 0; i
< ITERS
; i
++)
290 int index
= i
% TEST_CASES
;
291 checked(opt
, pixel_test_buff
[index
] + j
, opt_dest
, stride
, width
, height
, w0
, round
, shift
, offset
);
292 ref(pixel_test_buff
[index
] + j
, ref_dest
, stride
, width
, height
, w0
, round
, shift
, offset
);
294 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
304 bool PixelHarness::check_downscale_t(downscale_t ref
, downscale_t opt
)
306 ALIGN_VAR_16(pixel
, ref_destf
[32 * 32]);
307 ALIGN_VAR_16(pixel
, opt_destf
[32 * 32]);
309 ALIGN_VAR_16(pixel
, ref_desth
[32 * 32]);
310 ALIGN_VAR_16(pixel
, opt_desth
[32 * 32]);
312 ALIGN_VAR_16(pixel
, ref_destv
[32 * 32]);
313 ALIGN_VAR_16(pixel
, opt_destv
[32 * 32]);
315 ALIGN_VAR_16(pixel
, ref_destc
[32 * 32]);
316 ALIGN_VAR_16(pixel
, opt_destc
[32 * 32]);
318 intptr_t src_stride
= 64;
319 intptr_t dst_stride
= 32;
323 for (int i
= 0; i
< ITERS
; i
++)
325 int index
= i
% TEST_CASES
;
326 ref(pixel_test_buff
[index
] + j
, ref_destf
, ref_desth
, ref_destv
,
327 ref_destc
, src_stride
, dst_stride
, bx
, by
);
328 checked(opt
, pixel_test_buff
[index
] + j
, opt_destf
, opt_desth
, opt_destv
,
329 opt_destc
, src_stride
, dst_stride
, bx
, by
);
331 if (memcmp(ref_destf
, opt_destf
, 32 * 32 * sizeof(pixel
)))
333 if (memcmp(ref_desth
, opt_desth
, 32 * 32 * sizeof(pixel
)))
335 if (memcmp(ref_destv
, opt_destv
, 32 * 32 * sizeof(pixel
)))
337 if (memcmp(ref_destc
, opt_destc
, 32 * 32 * sizeof(pixel
)))
347 bool PixelHarness::check_cpy2Dto1D_shl_t(cpy2Dto1D_shl_t ref
, cpy2Dto1D_shl_t opt
)
349 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
350 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
352 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
353 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
356 intptr_t stride
= STRIDE
;
357 for (int i
= 0; i
< ITERS
; i
++)
359 int shift
= (rand() % 7 + 1);
361 int index
= i
% TEST_CASES
;
362 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
);
363 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
);
365 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
375 bool PixelHarness::check_cpy2Dto1D_shr_t(cpy2Dto1D_shr_t ref
, cpy2Dto1D_shr_t opt
)
377 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
378 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
380 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
381 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
384 intptr_t stride
= STRIDE
;
385 for (int i
= 0; i
< ITERS
; i
++)
387 int shift
= (rand() % 7 + 1);
389 int index
= i
% TEST_CASES
;
390 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
);
391 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
);
393 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
403 bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref
, copy_cnt_t opt
)
405 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
406 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
408 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
409 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
412 intptr_t stride
= STRIDE
;
413 for (int i
= 0; i
< ITERS
; i
++)
415 int index
= i
% TEST_CASES
;
416 int opt_cnt
= (int)checked(opt
, opt_dest
, short_test_buff1
[index
] + j
, stride
);
417 int ref_cnt
= ref(ref_dest
, short_test_buff1
[index
] + j
, stride
);
419 if ((ref_cnt
!= opt_cnt
) || memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
429 bool PixelHarness::check_cpy1Dto2D_shl_t(cpy1Dto2D_shl_t ref
, cpy1Dto2D_shl_t opt
)
431 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
432 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
434 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
435 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
438 intptr_t stride
= STRIDE
;
439 for (int i
= 0; i
< ITERS
; i
++)
441 int shift
= (rand() % 7 + 1);
443 int index
= i
% TEST_CASES
;
444 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
);
445 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
);
447 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
457 bool PixelHarness::check_cpy1Dto2D_shr_t(cpy1Dto2D_shr_t ref
, cpy1Dto2D_shr_t opt
)
459 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
460 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
462 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
463 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
466 intptr_t stride
= STRIDE
;
467 for (int i
= 0; i
< ITERS
; i
++)
469 int shift
= (rand() % 7 + 1);
471 int index
= i
% TEST_CASES
;
472 checked(opt
, opt_dest
, short_test_buff
[index
] + j
, stride
, shift
);
473 ref(ref_dest
, short_test_buff
[index
] + j
, stride
, shift
);
475 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
485 bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref
, pixelavg_pp_t opt
)
487 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
488 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
492 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
493 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
495 intptr_t stride
= STRIDE
;
496 for (int i
= 0; i
< ITERS
; i
++)
498 int index1
= rand() % TEST_CASES
;
499 int index2
= rand() % TEST_CASES
;
500 checked(ref
, ref_dest
, stride
, pixel_test_buff
[index1
] + j
,
501 stride
, pixel_test_buff
[index2
] + j
, stride
, 32);
502 opt(opt_dest
, stride
, pixel_test_buff
[index1
] + j
,
503 stride
, pixel_test_buff
[index2
] + j
, stride
, 32);
505 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
515 bool PixelHarness::check_copy_pp(copy_pp_t ref
, copy_pp_t opt
)
517 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
518 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
520 // we don't know the partition size so we are checking the entire output buffer so
521 // we must initialize the buffers
522 memset(ref_dest
, 0, sizeof(ref_dest
));
523 memset(opt_dest
, 0, sizeof(opt_dest
));
526 intptr_t stride
= STRIDE
;
527 for (int i
= 0; i
< ITERS
; i
++)
529 int index
= i
% TEST_CASES
;
530 checked(opt
, opt_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
531 ref(ref_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
533 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
543 bool PixelHarness::check_copy_sp(copy_sp_t ref
, copy_sp_t opt
)
545 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
546 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
548 // we don't know the partition size so we are checking the entire output buffer so
549 // we must initialize the buffers
550 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
551 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
554 intptr_t stride1
= 64, stride2
= STRIDE
;
555 for (int i
= 0; i
< ITERS
; i
++)
557 int index
= i
% TEST_CASES
;
558 checked(opt
, opt_dest
, stride1
, short_test_buff1
[index
] + j
, stride2
);
559 ref(ref_dest
, stride1
, short_test_buff1
[index
] + j
, stride2
);
561 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
571 bool PixelHarness::check_copy_ps(copy_ps_t ref
, copy_ps_t opt
)
573 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
574 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
576 // we don't know the partition size so we are checking the entire output buffer so
577 // we must initialize the buffers
578 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
579 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
582 intptr_t stride
= STRIDE
;
583 for (int i
= 0; i
< ITERS
; i
++)
585 int index
= i
% TEST_CASES
;
586 checked(opt
, opt_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
587 ref(ref_dest
, stride
, pixel_test_buff
[index
] + j
, stride
);
589 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
599 bool PixelHarness::check_copy_ss(copy_ss_t ref
, copy_ss_t opt
)
601 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
602 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
604 // we don't know the partition size so we are checking the entire output buffer so
605 // we must initialize the buffers
606 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
607 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
610 intptr_t stride
= STRIDE
;
611 for (int i
= 0; i
< ITERS
; i
++)
613 int index
= i
% TEST_CASES
;
614 checked(opt
, opt_dest
, stride
, short_test_buff1
[index
] + j
, stride
);
615 ref(ref_dest
, stride
, short_test_buff1
[index
] + j
, stride
);
617 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
627 bool PixelHarness::check_blockfill_s(blockfill_s_t ref
, blockfill_s_t opt
)
629 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
630 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
632 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
633 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
635 intptr_t stride
= 64;
636 for (int i
= 0; i
< ITERS
; i
++)
638 int16_t value
= (rand() % SHORT_MAX
) + 1;
640 checked(opt
, opt_dest
, stride
, value
);
641 ref(ref_dest
, stride
, value
);
643 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
652 bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref
, pixel_sub_ps_t opt
)
654 ALIGN_VAR_16(int16_t, ref_dest
[64 * 64]);
655 ALIGN_VAR_16(int16_t, opt_dest
[64 * 64]);
657 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
658 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
661 intptr_t stride2
= 64, stride
= STRIDE
;
662 for (int i
= 0; i
< 1; i
++)
664 int index1
= rand() % TEST_CASES
;
665 int index2
= rand() % TEST_CASES
;
666 checked(opt
, opt_dest
, stride2
, pixel_test_buff
[index1
] + j
,
667 pixel_test_buff
[index2
] + j
, stride
, stride
);
668 ref(ref_dest
, stride2
, pixel_test_buff
[index1
] + j
,
669 pixel_test_buff
[index2
] + j
, stride
, stride
);
671 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(int16_t)))
681 bool PixelHarness::check_scale_pp(scale_t ref
, scale_t opt
)
683 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
684 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
686 memset(ref_dest
, 0, sizeof(ref_dest
));
687 memset(opt_dest
, 0, sizeof(opt_dest
));
690 intptr_t stride
= STRIDE
;
691 for (int i
= 0; i
< ITERS
; i
++)
693 int index
= i
% TEST_CASES
;
694 checked(opt
, opt_dest
, pixel_test_buff
[index
] + j
, stride
);
695 ref(ref_dest
, pixel_test_buff
[index
] + j
, stride
);
697 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
707 bool PixelHarness::check_transpose(transpose_t ref
, transpose_t opt
)
709 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
710 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
712 memset(ref_dest
, 0, sizeof(ref_dest
));
713 memset(opt_dest
, 0, sizeof(opt_dest
));
716 intptr_t stride
= STRIDE
;
717 for (int i
= 0; i
< ITERS
; i
++)
719 int index
= i
% TEST_CASES
;
720 checked(opt
, opt_dest
, pixel_test_buff
[index
] + j
, stride
);
721 ref(ref_dest
, pixel_test_buff
[index
] + j
, stride
);
723 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
733 bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref
, pixel_add_ps_t opt
)
735 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
736 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
738 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
739 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
742 intptr_t stride2
= 64, stride
= STRIDE
;
743 for (int i
= 0; i
< ITERS
; i
++)
745 int index1
= rand() % TEST_CASES
;
746 int index2
= rand() % TEST_CASES
;
747 checked(opt
, opt_dest
, stride2
, pixel_test_buff
[index1
] + j
, short_test_buff
[index2
] + j
, stride
, stride
);
748 ref(ref_dest
, stride2
, pixel_test_buff
[index1
] + j
, short_test_buff
[index2
] + j
, stride
, stride
);
750 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
760 bool PixelHarness::check_pixel_var(var_t ref
, var_t opt
)
764 intptr_t stride
= STRIDE
;
766 for (int i
= 0; i
< ITERS
; i
++)
768 int index
= i
% TEST_CASES
;
769 uint64_t vres
= checked(opt
, pixel_test_buff
[index
], stride
);
770 uint64_t cres
= ref(pixel_test_buff
[index
], stride
);
781 bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref
, ssim_4x4x2_core_t opt
)
783 ALIGN_VAR_32(int, sum0
[2][4]);
784 ALIGN_VAR_32(int, sum1
[2][4]);
786 for (int i
= 0; i
< ITERS
; i
++)
788 intptr_t stride
= rand() % 64;
789 int index1
= rand() % TEST_CASES
;
790 int index2
= rand() % TEST_CASES
;
791 ref(pixel_test_buff
[index1
] + i
, stride
, pixel_test_buff
[index2
] + i
, stride
, sum0
);
792 checked(opt
, pixel_test_buff
[index1
] + i
, stride
, pixel_test_buff
[index2
] + i
, stride
, sum1
);
794 if (memcmp(sum0
, sum1
, sizeof(sum0
)))
803 /* TODO: This function causes crashes when checked. Is this a real bug? */
804 bool PixelHarness::check_ssim_end(ssim_end4_t ref
, ssim_end4_t opt
)
806 ALIGN_VAR_32(int, sum0
[5][4]);
807 ALIGN_VAR_32(int, sum1
[5][4]);
809 for (int i
= 0; i
< ITERS
; i
++)
811 for (int j
= 0; j
< 5; j
++)
813 for (int k
= 0; k
< 4; k
++)
815 sum0
[j
][k
] = rand() % (1 << 12);
816 sum1
[j
][k
] = rand() % (1 << 12);
820 int width
= (rand() % 4) + 1; // range[1-4]
821 float cres
= ref(sum0
, sum1
, width
);
822 float vres
= checked_float(opt
, sum0
, sum1
, width
);
823 if (fabs(vres
- cres
) > 0.00001)
832 bool PixelHarness::check_addAvg(addAvg_t ref
, addAvg_t opt
)
834 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
835 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
839 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
840 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
841 intptr_t stride
= STRIDE
;
843 for (int i
= 0; i
< ITERS
; i
++)
845 int index1
= rand() % TEST_CASES
;
846 int index2
= rand() % TEST_CASES
;
847 ref(short_test_buff2
[index1
] + j
, short_test_buff2
[index2
] + j
, ref_dest
, stride
, stride
, stride
);
848 checked(opt
, short_test_buff2
[index1
] + j
, short_test_buff2
[index2
] + j
, opt_dest
, stride
, stride
, stride
);
849 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
859 bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref
, saoCuOrgE0_t opt
)
861 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
862 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
864 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
865 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
869 for (int i
= 0; i
< ITERS
; i
++)
871 int width
= 16 * (rand() % 4 + 1);
872 int8_t sign
= rand() % 3;
878 ref(ref_dest
, psbuf1
+ j
, width
, sign
);
879 checked(opt
, opt_dest
, psbuf1
+ j
, width
, sign
);
881 if (memcmp(ref_dest
, opt_dest
, 64 * 64 * sizeof(pixel
)))
891 bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref
, planecopy_sp_t opt
)
893 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
894 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
896 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
897 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
899 int width
= 16 + rand() % 48;
900 int height
= 16 + rand() % 48;
901 intptr_t srcStride
= 64;
902 intptr_t dstStride
= width
;
905 for (int i
= 0; i
< ITERS
; i
++)
907 int index
= i
% TEST_CASES
;
908 checked(opt
, ushort_test_buff
[index
] + j
, srcStride
, opt_dest
, dstStride
, width
, height
, (int)8, (uint16_t)255);
909 ref(ushort_test_buff
[index
] + j
, srcStride
, ref_dest
, dstStride
, width
, height
, (int)8, (uint16_t)255);
911 if (memcmp(ref_dest
, opt_dest
, width
* height
* sizeof(pixel
)))
921 bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref
, planecopy_cp_t opt
)
923 ALIGN_VAR_16(pixel
, ref_dest
[64 * 64]);
924 ALIGN_VAR_16(pixel
, opt_dest
[64 * 64]);
926 memset(ref_dest
, 0xCD, sizeof(ref_dest
));
927 memset(opt_dest
, 0xCD, sizeof(opt_dest
));
929 int width
= 16 + rand() % 48;
930 int height
= 16 + rand() % 48;
931 intptr_t srcStride
= 64;
932 intptr_t dstStride
= width
;
935 for (int i
= 0; i
< ITERS
; i
++)
937 int index
= i
% TEST_CASES
;
938 checked(opt
, uchar_test_buff
[index
] + j
, srcStride
, opt_dest
, dstStride
, width
, height
, (int)2);
939 ref(uchar_test_buff
[index
] + j
, srcStride
, ref_dest
, dstStride
, width
, height
, (int)2);
941 if (memcmp(ref_dest
, opt_dest
, width
* height
* sizeof(pixel
)))
951 bool PixelHarness::testPartition(int part
, const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
955 if (!check_pixelcmp(ref
.satd
[part
], opt
.satd
[part
]))
957 printf("satd[%s]: failed!\n", lumaPartStr
[part
]);
962 if (opt
.sa8d_inter
[part
])
964 if (!check_pixelcmp(ref
.sa8d_inter
[part
], opt
.sa8d_inter
[part
]))
966 printf("sa8d_inter[%s]: failed!\n", lumaPartStr
[part
]);
973 if (!check_pixelcmp(ref
.sad
[part
], opt
.sad
[part
]))
975 printf("sad[%s]: failed!\n", lumaPartStr
[part
]);
980 if (opt
.sse_pp
[part
])
982 if (!check_pixelcmp(ref
.sse_pp
[part
], opt
.sse_pp
[part
]))
984 printf("sse_pp[%s]: failed!\n", lumaPartStr
[part
]);
989 if (opt
.sse_sp
[part
])
991 if (!check_pixelcmp_sp(ref
.sse_sp
[part
], opt
.sse_sp
[part
]))
993 printf("sse_sp[%s]: failed!\n", lumaPartStr
[part
]);
998 if (opt
.sse_ss
[part
])
1000 if (!check_pixelcmp_ss(ref
.sse_ss
[part
], opt
.sse_ss
[part
]))
1002 printf("sse_ss[%s]: failed!\n", lumaPartStr
[part
]);
1007 if (opt
.sad_x3
[part
])
1009 if (!check_pixelcmp_x3(ref
.sad_x3
[part
], opt
.sad_x3
[part
]))
1011 printf("sad_x3[%s]: failed!\n", lumaPartStr
[part
]);
1016 if (opt
.sad_x4
[part
])
1018 if (!check_pixelcmp_x4(ref
.sad_x4
[part
], opt
.sad_x4
[part
]))
1020 printf("sad_x4[%s]: failed!\n", lumaPartStr
[part
]);
1025 if (opt
.pixelavg_pp
[part
])
1027 if (!check_pixelavg_pp(ref
.pixelavg_pp
[part
], opt
.pixelavg_pp
[part
]))
1029 printf("pixelavg_pp[%s]: failed!\n", lumaPartStr
[part
]);
1034 if (opt
.luma_copy_pp
[part
])
1036 if (!check_copy_pp(ref
.luma_copy_pp
[part
], opt
.luma_copy_pp
[part
]))
1038 printf("luma_copy_pp[%s] failed\n", lumaPartStr
[part
]);
1043 if (opt
.luma_copy_sp
[part
])
1045 if (!check_copy_sp(ref
.luma_copy_sp
[part
], opt
.luma_copy_sp
[part
]))
1047 printf("luma_copy_sp[%s] failed\n", lumaPartStr
[part
]);
1052 if (opt
.luma_copy_ps
[part
])
1054 if (!check_copy_ps(ref
.luma_copy_ps
[part
], opt
.luma_copy_ps
[part
]))
1056 printf("luma_copy_ps[%s] failed\n", lumaPartStr
[part
]);
1061 if (opt
.luma_copy_ss
[part
])
1063 if (!check_copy_ss(ref
.luma_copy_ss
[part
], opt
.luma_copy_ss
[part
]))
1065 printf("luma_copy_ss[%s] failed\n", lumaPartStr
[part
]);
1070 if (opt
.luma_addAvg
[part
])
1072 if (!check_addAvg(ref
.luma_addAvg
[part
], opt
.luma_addAvg
[part
]))
1074 printf("luma_addAvg[%s] failed\n", lumaPartStr
[part
]);
1079 if (part
< NUM_SQUARE_BLOCKS
)
1081 if (opt
.luma_sub_ps
[part
])
1083 if (!check_pixel_sub_ps(ref
.luma_sub_ps
[part
], opt
.luma_sub_ps
[part
]))
1085 printf("luma_sub_ps[%s] failed\n", lumaPartStr
[part
]);
1090 if (opt
.luma_add_ps
[part
])
1092 if (!check_pixel_add_ps(ref
.luma_add_ps
[part
], opt
.luma_add_ps
[part
]))
1094 printf("luma_add_ps[%s] failed\n", lumaPartStr
[part
]);
1100 for (int i
= 0; i
< X265_CSP_COUNT
; i
++)
1102 if (opt
.chroma
[i
].copy_pp
[part
])
1104 if (!check_copy_pp(ref
.chroma
[i
].copy_pp
[part
], opt
.chroma
[i
].copy_pp
[part
]))
1106 printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1110 if (opt
.chroma
[i
].copy_sp
[part
])
1112 if (!check_copy_sp(ref
.chroma
[i
].copy_sp
[part
], opt
.chroma
[i
].copy_sp
[part
]))
1114 printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1118 if (opt
.chroma
[i
].copy_ps
[part
])
1120 if (!check_copy_ps(ref
.chroma
[i
].copy_ps
[part
], opt
.chroma
[i
].copy_ps
[part
]))
1122 printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1126 if (opt
.chroma
[i
].copy_ss
[part
])
1128 if (!check_copy_ss(ref
.chroma
[i
].copy_ss
[part
], opt
.chroma
[i
].copy_ss
[part
]))
1130 printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1134 if (opt
.chroma
[i
].addAvg
[part
])
1136 if (!check_addAvg(ref
.chroma
[i
].addAvg
[part
], opt
.chroma
[i
].addAvg
[part
]))
1138 printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1142 if (part
< NUM_SQUARE_BLOCKS
)
1144 if (opt
.chroma
[i
].sub_ps
[part
])
1146 if (!check_pixel_sub_ps(ref
.chroma
[i
].sub_ps
[part
], opt
.chroma
[i
].sub_ps
[part
]))
1148 printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1152 if (opt
.chroma
[i
].add_ps
[part
])
1154 if (!check_pixel_add_ps(ref
.chroma
[i
].add_ps
[part
], opt
.chroma
[i
].add_ps
[part
]))
1156 printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1166 bool PixelHarness::testCorrectness(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1168 for (int size
= 4; size
<= 64; size
*= 2)
1170 int part
= partitionFromSizes(size
, size
); // 2Nx2N
1171 if (!testPartition(part
, ref
, opt
)) return false;
1175 part
= partitionFromSizes(size
, size
>> 1); // 2NxN
1176 if (!testPartition(part
, ref
, opt
)) return false;
1177 part
= partitionFromSizes(size
>> 1, size
); // Nx2N
1178 if (!testPartition(part
, ref
, opt
)) return false;
1183 part
= partitionFromSizes(size
, size
>> 2);
1184 if (!testPartition(part
, ref
, opt
)) return false;
1185 part
= partitionFromSizes(size
, 3 * (size
>> 2));
1186 if (!testPartition(part
, ref
, opt
)) return false;
1188 part
= partitionFromSizes(size
>> 2, size
);
1189 if (!testPartition(part
, ref
, opt
)) return false;
1190 part
= partitionFromSizes(3 * (size
>> 2), size
);
1191 if (!testPartition(part
, ref
, opt
)) return false;
1195 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
1197 if (opt
.calcresidual
[i
])
1199 if (!check_calresidual(ref
.calcresidual
[i
], opt
.calcresidual
[i
]))
1201 printf("calcresidual width: %d failed!\n", 4 << i
);
1207 if (!check_pixelcmp(ref
.sa8d
[i
], opt
.sa8d
[i
]))
1209 printf("sa8d[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1214 if ((i
<= BLOCK_32x32
) && opt
.ssd_s
[i
])
1216 if (!check_ssd_s(ref
.ssd_s
[i
], opt
.ssd_s
[i
]))
1218 printf("ssd_s[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1223 if (opt
.blockfill_s
[i
])
1225 if (!check_blockfill_s(ref
.blockfill_s
[i
], opt
.blockfill_s
[i
]))
1227 printf("blockfill_s[%dx%d]: failed!\n", 4 << i
, 4 << i
);
1231 if (opt
.transpose
[i
])
1233 if (!check_transpose(ref
.transpose
[i
], opt
.transpose
[i
]))
1235 printf("transpose[%dx%d] failed\n", 4 << i
, 4 << i
);
1242 if (!check_pixel_var(ref
.var
[i
], opt
.var
[i
]))
1244 printf("var[%dx%d] failed\n", 4 << i
, 4 << i
);
1249 if ((i
< BLOCK_64x64
) && opt
.copy_cnt
[i
])
1251 if (!check_copy_cnt_t(ref
.copy_cnt
[i
], opt
.copy_cnt
[i
]))
1253 printf("copy_cnt[%dx%d] failed!\n", 4 << i
, 4 << i
);
1258 if ((i
< BLOCK_64x64
) && opt
.cpy2Dto1D_shl
[i
])
1260 if (!check_cpy2Dto1D_shl_t(ref
.cpy2Dto1D_shl
[i
], opt
.cpy2Dto1D_shl
[i
]))
1262 printf("cpy2Dto1D_shl failed!\n");
1267 if ((i
< BLOCK_64x64
) && opt
.cpy2Dto1D_shr
[i
])
1269 if (!check_cpy2Dto1D_shr_t(ref
.cpy2Dto1D_shr
[i
], opt
.cpy2Dto1D_shr
[i
]))
1271 printf("cpy2Dto1D_shr failed!\n");
1276 if ((i
< BLOCK_64x64
) && opt
.cpy1Dto2D_shl
[i
])
1278 if (!check_cpy1Dto2D_shl_t(ref
.cpy1Dto2D_shl
[i
], opt
.cpy1Dto2D_shl
[i
]))
1280 printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i
, 4 << i
);
1285 if ((i
< BLOCK_64x64
) && opt
.cpy1Dto2D_shr
[i
])
1287 if (!check_cpy1Dto2D_shr_t(ref
.cpy1Dto2D_shr
[i
], opt
.cpy1Dto2D_shr
[i
]))
1289 printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i
, 4 << i
);
1297 if (!check_weightp(ref
.weight_pp
, opt
.weight_pp
))
1299 printf("Weighted Prediction (pixel) failed!\n");
1306 if (!check_weightp(ref
.weight_sp
, opt
.weight_sp
))
1308 printf("Weighted Prediction (short) failed!\n");
1313 if (opt
.frameInitLowres
)
1315 if (!check_downscale_t(ref
.frameInitLowres
, opt
.frameInitLowres
))
1317 printf("downscale failed!\n");
1322 if (opt
.scale1D_128to64
)
1324 if (!check_scale_pp(ref
.scale1D_128to64
, opt
.scale1D_128to64
))
1326 printf("scale1D_128to64 failed!\n");
1331 if (opt
.scale2D_64to32
)
1333 if (!check_scale_pp(ref
.scale2D_64to32
, opt
.scale2D_64to32
))
1335 printf("scale2D_64to32 failed!\n");
1340 if (opt
.ssim_4x4x2_core
)
1342 if (!check_ssim_4x4x2_core(ref
.ssim_4x4x2_core
, opt
.ssim_4x4x2_core
))
1344 printf("ssim_end_4 failed!\n");
1351 if (!check_ssim_end(ref
.ssim_end_4
, opt
.ssim_end_4
))
1353 printf("ssim_end_4 failed!\n");
1360 if (!check_saoCuOrgE0_t(ref
.saoCuOrgE0
, opt
.saoCuOrgE0
))
1362 printf("SAO_EO_0 failed\n");
1367 if (opt
.planecopy_sp
)
1369 if (!check_planecopy_sp(ref
.planecopy_sp
, opt
.planecopy_sp
))
1371 printf("planecopy_sp failed\n");
1376 if (opt
.planecopy_cp
)
1378 if (!check_planecopy_cp(ref
.planecopy_cp
, opt
.planecopy_cp
))
1380 printf("planecopy_cp failed\n");
1388 void PixelHarness::measurePartition(int part
, const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1390 ALIGN_VAR_16(int, cres
[16]);
1391 pixel
*fref
= pbuf2
+ 2 * INCR
;
1393 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1397 HEADER("satd[%s]", lumaPartStr
[part
]);
1398 REPORT_SPEEDUP(opt
.satd
[part
], ref
.satd
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1401 if (opt
.pixelavg_pp
[part
])
1403 HEADER("avg_pp[%s]", lumaPartStr
[part
]);
1404 REPORT_SPEEDUP(opt
.pixelavg_pp
[part
], ref
.pixelavg_pp
[part
], pbuf1
, STRIDE
, pbuf2
, STRIDE
, pbuf3
, STRIDE
, 32);
1407 if (opt
.sa8d_inter
[part
])
1409 HEADER("sa8d[%s]", lumaPartStr
[part
]);
1410 REPORT_SPEEDUP(opt
.sa8d_inter
[part
], ref
.sa8d_inter
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1415 HEADER("sad[%s]", lumaPartStr
[part
]);
1416 REPORT_SPEEDUP(opt
.sad
[part
], ref
.sad
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1419 if (opt
.sad_x3
[part
])
1421 HEADER("sad_x3[%s]", lumaPartStr
[part
]);
1422 REPORT_SPEEDUP(opt
.sad_x3
[part
], ref
.sad_x3
[part
], pbuf1
, fref
, fref
+ 1, fref
- 1, FENC_STRIDE
+ 5, &cres
[0]);
1425 if (opt
.sad_x4
[part
])
1427 HEADER("sad_x4[%s]", lumaPartStr
[part
]);
1428 REPORT_SPEEDUP(opt
.sad_x4
[part
], ref
.sad_x4
[part
], pbuf1
, fref
, fref
+ 1, fref
- 1, fref
- INCR
, FENC_STRIDE
+ 5, &cres
[0]);
1431 if (opt
.sse_pp
[part
])
1433 HEADER("sse_pp[%s]", lumaPartStr
[part
]);
1434 REPORT_SPEEDUP(opt
.sse_pp
[part
], ref
.sse_pp
[part
], pbuf1
, STRIDE
, fref
, STRIDE
);
1437 if (opt
.sse_sp
[part
])
1439 HEADER("sse_sp[%s]", lumaPartStr
[part
]);
1440 REPORT_SPEEDUP(opt
.sse_sp
[part
], ref
.sse_sp
[part
], (int16_t*)pbuf1
, STRIDE
, fref
, STRIDE
);
1443 if (opt
.sse_ss
[part
])
1445 HEADER("sse_ss[%s]", lumaPartStr
[part
]);
1446 REPORT_SPEEDUP(opt
.sse_ss
[part
], ref
.sse_ss
[part
], (int16_t*)pbuf1
, STRIDE
, (int16_t*)fref
, STRIDE
);
1449 if (opt
.luma_copy_pp
[part
])
1451 HEADER("luma_copy_pp[%s]", lumaPartStr
[part
]);
1452 REPORT_SPEEDUP(opt
.luma_copy_pp
[part
], ref
.luma_copy_pp
[part
], pbuf1
, 64, pbuf2
, 128);
1455 if (opt
.luma_copy_sp
[part
])
1457 HEADER("luma_copy_sp[%s]", lumaPartStr
[part
]);
1458 REPORT_SPEEDUP(opt
.luma_copy_sp
[part
], ref
.luma_copy_sp
[part
], pbuf1
, 64, sbuf3
, 128);
1461 if (opt
.luma_copy_ps
[part
])
1463 HEADER("luma_copy_ps[%s]", lumaPartStr
[part
]);
1464 REPORT_SPEEDUP(opt
.luma_copy_ps
[part
], ref
.luma_copy_ps
[part
], sbuf1
, 64, pbuf1
, 128);
1466 if (opt
.luma_copy_ss
[part
])
1468 HEADER("luma_copy_ss[%s]", lumaPartStr
[part
]);
1469 REPORT_SPEEDUP(opt
.luma_copy_ss
[part
], ref
.luma_copy_ss
[part
], sbuf1
, 64, sbuf2
, 128);
1471 if (opt
.luma_addAvg
[part
])
1473 HEADER("luma_addAvg[%s]", lumaPartStr
[part
]);
1474 REPORT_SPEEDUP(opt
.luma_addAvg
[part
], ref
.luma_addAvg
[part
], sbuf1
, sbuf2
, pbuf1
, STRIDE
, STRIDE
, STRIDE
);
1476 if (part
< NUM_SQUARE_BLOCKS
)
1478 if (opt
.luma_sub_ps
[part
])
1480 HEADER("luma_sub_ps[%s]", lumaPartStr
[part
]);
1481 REPORT_SPEEDUP(opt
.luma_sub_ps
[part
], ref
.luma_sub_ps
[part
], (int16_t*)pbuf1
, FENC_STRIDE
, pbuf2
, pbuf1
, STRIDE
, STRIDE
);
1483 if (opt
.luma_add_ps
[part
])
1485 HEADER("luma_add_ps[%s]", lumaPartStr
[part
]);
1486 REPORT_SPEEDUP(opt
.luma_add_ps
[part
], ref
.luma_add_ps
[part
], pbuf1
, FENC_STRIDE
, pbuf2
, sbuf1
, STRIDE
, STRIDE
);
1490 for (int i
= 0; i
< X265_CSP_COUNT
; i
++)
1492 if (opt
.chroma
[i
].copy_pp
[part
])
1494 HEADER("[%s] copy_pp[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1495 REPORT_SPEEDUP(opt
.chroma
[i
].copy_pp
[part
], ref
.chroma
[i
].copy_pp
[part
], pbuf1
, 64, pbuf2
, 128);
1497 if (opt
.chroma
[i
].copy_sp
[part
])
1499 HEADER("[%s] copy_sp[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1500 REPORT_SPEEDUP(opt
.chroma
[i
].copy_sp
[part
], ref
.chroma
[i
].copy_sp
[part
], pbuf1
, 64, sbuf3
, 128);
1502 if (opt
.chroma
[i
].copy_ps
[part
])
1504 HEADER("[%s] copy_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1505 REPORT_SPEEDUP(opt
.chroma
[i
].copy_ps
[part
], ref
.chroma
[i
].copy_ps
[part
], sbuf1
, 64, pbuf1
, 128);
1507 if (opt
.chroma
[i
].copy_ss
[part
])
1509 HEADER("[%s] copy_ss[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1510 REPORT_SPEEDUP(opt
.chroma
[i
].copy_ss
[part
], ref
.chroma
[i
].copy_ss
[part
], sbuf1
, 64, sbuf2
, 128);
1512 if (opt
.chroma
[i
].addAvg
[part
])
1514 HEADER("[%s] addAvg[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1515 REPORT_SPEEDUP(opt
.chroma
[i
].addAvg
[part
], ref
.chroma
[i
].addAvg
[part
], sbuf1
, sbuf2
, pbuf1
, STRIDE
, STRIDE
, STRIDE
);
1517 if (part
< NUM_SQUARE_BLOCKS
)
1519 if (opt
.chroma
[i
].sub_ps
[part
])
1521 HEADER("[%s] sub_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1522 REPORT_SPEEDUP(opt
.chroma
[i
].sub_ps
[part
], ref
.chroma
[i
].sub_ps
[part
], (int16_t*)pbuf1
, FENC_STRIDE
, pbuf2
, pbuf1
, STRIDE
, STRIDE
);
1524 if (opt
.chroma
[i
].add_ps
[part
])
1526 HEADER("[%s] add_ps[%s]", x265_source_csp_names
[i
], chromaPartStr
[i
][part
]);
1527 REPORT_SPEEDUP(opt
.chroma
[i
].add_ps
[part
], ref
.chroma
[i
].add_ps
[part
], pbuf1
, FENC_STRIDE
, pbuf2
, sbuf1
, STRIDE
, STRIDE
);
1535 void PixelHarness::measureSpeed(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
1539 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1540 #define HEADER0(str) printf("%22s", str);
1542 for (int size
= 4; size
<= 64; size
*= 2)
1544 int part
= partitionFromSizes(size
, size
); // 2Nx2N
1545 measurePartition(part
, ref
, opt
);
1549 part
= partitionFromSizes(size
, size
>> 1); // 2NxN
1550 measurePartition(part
, ref
, opt
);
1551 part
= partitionFromSizes(size
>> 1, size
); // Nx2N
1552 measurePartition(part
, ref
, opt
);
1557 part
= partitionFromSizes(size
, size
>> 2);
1558 measurePartition(part
, ref
, opt
);
1559 part
= partitionFromSizes(size
, 3 * (size
>> 2));
1560 measurePartition(part
, ref
, opt
);
1562 part
= partitionFromSizes(size
>> 2, size
);
1563 measurePartition(part
, ref
, opt
);
1564 part
= partitionFromSizes(3 * (size
>> 2), size
);
1565 measurePartition(part
, ref
, opt
);
1569 for (int i
= 0; i
< NUM_SQUARE_BLOCKS
; i
++)
1571 if ((i
<= BLOCK_32x32
) && opt
.ssd_s
[i
])
1573 HEADER("ssd_s[%dx%d]", 4 << i
, 4 << i
);
1574 REPORT_SPEEDUP(opt
.ssd_s
[i
], ref
.ssd_s
[i
], sbuf1
, STRIDE
);
1578 HEADER("sa8d[%dx%d]", 4 << i
, 4 << i
);
1579 REPORT_SPEEDUP(opt
.sa8d
[i
], ref
.sa8d
[i
], pbuf1
, STRIDE
, pbuf2
, STRIDE
);
1581 if (opt
.calcresidual
[i
])
1583 HEADER("residual[%dx%d]", 4 << i
, 4 << i
);
1584 REPORT_SPEEDUP(opt
.calcresidual
[i
], ref
.calcresidual
[i
], pbuf1
, pbuf2
, sbuf1
, 64);
1587 if (opt
.blockfill_s
[i
])
1589 HEADER("blkfill[%dx%d]", 4 << i
, 4 << i
);
1590 REPORT_SPEEDUP(opt
.blockfill_s
[i
], ref
.blockfill_s
[i
], sbuf1
, 64, SHORT_MAX
);
1593 if (opt
.transpose
[i
])
1595 HEADER("transpose[%dx%d]", 4 << i
, 4 << i
);
1596 REPORT_SPEEDUP(opt
.transpose
[i
], ref
.transpose
[i
], pbuf1
, pbuf2
, STRIDE
);
1601 HEADER("var[%dx%d]", 4 << i
, 4 << i
);
1602 REPORT_SPEEDUP(opt
.var
[i
], ref
.var
[i
], pbuf1
, STRIDE
);
1605 if ((i
< BLOCK_64x64
) && opt
.cpy2Dto1D_shl
[i
])
1607 HEADER("cpy2Dto1D_shl[%dx%d]", 4 << i
, 4 << i
);
1608 REPORT_SPEEDUP(opt
.cpy2Dto1D_shl
[i
], ref
.cpy2Dto1D_shl
[i
], sbuf1
, sbuf2
, STRIDE
, MAX_TR_DYNAMIC_RANGE
- X265_DEPTH
- (i
+ 2));
1611 if ((i
< BLOCK_64x64
) && opt
.cpy2Dto1D_shr
[i
])
1613 HEADER("cpy2Dto1D_shr[%dx%d]", 4 << i
, 4 << i
);
1614 REPORT_SPEEDUP(opt
.cpy2Dto1D_shr
[i
], ref
.cpy2Dto1D_shr
[i
], sbuf1
, sbuf2
, STRIDE
, 3);
1617 if ((i
< BLOCK_64x64
) && opt
.cpy1Dto2D_shl
[i
])
1619 HEADER("cpy1Dto2D_shl[%dx%d]", 4 << i
, 4 << i
);
1620 REPORT_SPEEDUP(opt
.cpy1Dto2D_shl
[i
], ref
.cpy1Dto2D_shl
[i
], sbuf1
, sbuf2
, STRIDE
, 64);
1623 if ((i
< BLOCK_64x64
) && opt
.cpy1Dto2D_shr
[i
])
1625 HEADER("cpy1Dto2D_shr[%dx%d]", 4 << i
, 4 << i
);
1626 REPORT_SPEEDUP(opt
.cpy1Dto2D_shr
[i
], ref
.cpy1Dto2D_shr
[i
], sbuf1
, sbuf2
, STRIDE
, 64);
1629 if ((i
< BLOCK_64x64
) && opt
.copy_cnt
[i
])
1631 HEADER("copy_cnt[%dx%d]", 4 << i
, 4 << i
);
1632 REPORT_SPEEDUP(opt
.copy_cnt
[i
], ref
.copy_cnt
[i
], sbuf1
, sbuf2
, STRIDE
);
1638 HEADER0("weight_pp");
1639 REPORT_SPEEDUP(opt
.weight_pp
, ref
.weight_pp
, pbuf1
, pbuf2
, 64, 32, 32, 128, 1 << 9, 10, 100);
1644 HEADER0("weight_sp");
1645 REPORT_SPEEDUP(opt
.weight_sp
, ref
.weight_sp
, (int16_t*)sbuf1
, pbuf1
, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
1648 if (opt
.frameInitLowres
)
1650 HEADER0("downscale");
1651 REPORT_SPEEDUP(opt
.frameInitLowres
, ref
.frameInitLowres
, pbuf2
, pbuf1
, pbuf2
, pbuf3
, pbuf4
, 64, 64, 64, 64);
1654 if (opt
.scale1D_128to64
)
1656 HEADER0("scale1D_128to64");
1657 REPORT_SPEEDUP(opt
.scale1D_128to64
, ref
.scale1D_128to64
, pbuf2
, pbuf1
, 64);
1660 if (opt
.scale2D_64to32
)
1662 HEADER0("scale2D_64to32");
1663 REPORT_SPEEDUP(opt
.scale2D_64to32
, ref
.scale2D_64to32
, pbuf2
, pbuf1
, 64);
1666 if (opt
.ssim_4x4x2_core
)
1668 HEADER0("ssim_4x4x2_core");
1669 REPORT_SPEEDUP(opt
.ssim_4x4x2_core
, ref
.ssim_4x4x2_core
, pbuf1
, 64, pbuf2
, 64, (int(*)[4])sbuf1
);
1674 HEADER0("ssim_end_4");
1675 REPORT_SPEEDUP(opt
.ssim_end_4
, ref
.ssim_end_4
, (int(*)[4])pbuf2
, (int(*)[4])pbuf1
, 4);
1680 HEADER0("SAO_EO_0");
1681 REPORT_SPEEDUP(opt
.saoCuOrgE0
, ref
.saoCuOrgE0
, pbuf1
, psbuf1
, 64, 1);
1684 if (opt
.planecopy_sp
)
1686 HEADER0("planecopy_sp");
1687 REPORT_SPEEDUP(opt
.planecopy_sp
, ref
.planecopy_sp
, ushort_test_buff
[0], 64, pbuf1
, 64, 64, 64, 8, 255);
1690 if (opt
.planecopy_cp
)
1692 HEADER0("planecopy_cp");
1693 REPORT_SPEEDUP(opt
.planecopy_cp
, ref
.planecopy_cp
, uchar_test_buff
[0], 64, pbuf1
, 64, 64, 64, 2);