Imported Upstream version 1.4
[deb_x265.git] / source / test / pixelharness.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "pixelharness.h"
25#include "primitives.h"
26
27using namespace x265;
28
29PixelHarness::PixelHarness()
30{
31 /* [0] --- Random values
32 * [1] --- Minimum
33 * [2] --- Maximum */
34 for (int i = 0; i < BUFFSIZE; i++)
35 {
36 pixel_test_buff[0][i] = rand() % PIXEL_MAX;
37 short_test_buff[0][i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; // max(SHORT_MIN, min(rand(), SMAX));
38 short_test_buff1[0][i] = rand() & PIXEL_MAX; // For block copy only
39 short_test_buff2[0][i] = rand() % 16383; // for addAvg
40 int_test_buff[0][i] = rand() % SHORT_MAX;
41 ushort_test_buff[0][i] = rand() % ((1 << 16) - 1);
42 uchar_test_buff[0][i] = rand() % ((1 << 8) - 1);
43
44 pixel_test_buff[1][i] = PIXEL_MIN;
45 short_test_buff[1][i] = SMIN;
46 short_test_buff1[1][i] = PIXEL_MIN;
47 short_test_buff2[1][i] = -16384;
48 int_test_buff[1][i] = SHORT_MIN;
49 ushort_test_buff[1][i] = PIXEL_MIN;
50 uchar_test_buff[1][i] = PIXEL_MIN;
51
52 pixel_test_buff[2][i] = PIXEL_MAX;
53 short_test_buff[2][i] = SMAX;
54 short_test_buff1[2][i] = PIXEL_MAX;
55 short_test_buff2[2][i] = 16383;
56 int_test_buff[2][i] = SHORT_MAX;
57 ushort_test_buff[2][i] = ((1 << 16) - 1);
58 uchar_test_buff[2][i] = 255;
59
60 pbuf1[i] = rand() & PIXEL_MAX;
61 pbuf2[i] = rand() & PIXEL_MAX;
62 pbuf3[i] = rand() & PIXEL_MAX;
63 pbuf4[i] = rand() & PIXEL_MAX;
64
65 sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
66 sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
67 ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
68 psbuf1[i] = (rand() % 65) - 32; // range is between -32 to 32
69 sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only
70 }
71}
72
73bool PixelHarness::check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt)
74{
75 int j = 0;
76 intptr_t stride = STRIDE;
77
78 for (int i = 0; i < ITERS; i++)
79 {
80 int index1 = rand() % TEST_CASES;
81 int index2 = rand() % TEST_CASES;
82 int vres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
83 int cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
84 if (vres != cres)
85 return false;
86
87 reportfail();
88 j += INCR;
89 }
90
91 return true;
92}
93
94bool PixelHarness::check_pixelcmp_sp(pixelcmp_sp_t ref, pixelcmp_sp_t opt)
95{
96 int j = 0;
97 intptr_t stride = STRIDE;
98
99 for (int i = 0; i < ITERS; i++)
100 {
101 int index1 = rand() % TEST_CASES;
102 int index2 = rand() % TEST_CASES;
103 int vres = (int)checked(opt, short_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
104 int cres = ref(short_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
105 if (vres != cres)
106 return false;
107
108 reportfail();
109 j += INCR;
110 }
111
112 return true;
113}
114
115bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt)
116{
117 int j = 0;
118 intptr_t stride = STRIDE;
119
120 for (int i = 0; i < ITERS; i++)
121 {
122 int index1 = rand() % TEST_CASES;
123 int index2 = rand() % TEST_CASES;
124 int vres = (int)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
125 int cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
126 if (vres != cres)
127 return false;
128
129 reportfail();
130 j += INCR;
131 }
132
133 return true;
134}
135
136bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt)
137{
138 ALIGN_VAR_16(int, cres[16]);
139 ALIGN_VAR_16(int, vres[16]);
140 int j = 0;
141 intptr_t stride = FENC_STRIDE - 5;
142 for (int i = 0; i < ITERS; i++)
143 {
144 int index1 = rand() % TEST_CASES;
145 int index2 = rand() % TEST_CASES;
146 checked(opt, pixel_test_buff[index1],
147 pixel_test_buff[index2] + j,
148 pixel_test_buff[index2] + j + 1,
149 pixel_test_buff[index2] + j + 2, stride, &vres[0]);
150 ref(pixel_test_buff[index1],
151 pixel_test_buff[index2] + j,
152 pixel_test_buff[index2] + j + 1,
153 pixel_test_buff[index2] + j + 2, stride, &cres[0]);
154 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])))
155 return false;
156
157 reportfail();
158 j += INCR;
159 }
160
161 return true;
162}
163
164bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt)
165{
166 ALIGN_VAR_16(int, cres[16]);
167 ALIGN_VAR_16(int, vres[16]);
168 int j = 0;
169 intptr_t stride = FENC_STRIDE - 5;
170 for (int i = 0; i < ITERS; i++)
171 {
172 int index1 = rand() % TEST_CASES;
173 int index2 = rand() % TEST_CASES;
174 checked(opt, pixel_test_buff[index1],
175 pixel_test_buff[index2] + j,
176 pixel_test_buff[index2] + j + 1,
177 pixel_test_buff[index2] + j + 2,
178 pixel_test_buff[index2] + j + 3, stride, &vres[0]);
179 ref(pixel_test_buff[index1],
180 pixel_test_buff[index2] + j,
181 pixel_test_buff[index2] + j + 1,
182 pixel_test_buff[index2] + j + 2,
183 pixel_test_buff[index2] + j + 3, stride, &cres[0]);
184
185 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])) || ((vres[3] != cres[3])))
186 return false;
187
188 reportfail();
189 j += INCR;
190 }
191
192 return true;
193}
194
195bool PixelHarness::check_calresidual(calcresidual_t ref, calcresidual_t opt)
196{
197 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
198 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
199 memset(ref_dest, 0, 64 * 64 * sizeof(int16_t));
200 memset(opt_dest, 0, 64 * 64 * sizeof(int16_t));
201
202 int j = 0;
203 intptr_t stride = STRIDE;
204 for (int i = 0; i < ITERS; i++)
205 {
206 int index = i % TEST_CASES;
207 checked(opt, pbuf1 + j, pixel_test_buff[index] + j, opt_dest, stride);
208 ref(pbuf1 + j, pixel_test_buff[index] + j, ref_dest, stride);
209
210 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
211 return false;
212
213 reportfail();
214 j += INCR;
215 }
216
217 return true;
218}
219
220bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt)
221{
222 int j = 0;
223 for (int i = 0; i < ITERS; i++)
224 {
225 // NOTE: stride must be multiple of 16, because minimum block is 4x4
226 int stride = (STRIDE + (rand() % STRIDE)) & ~15;
227 int cres = ref(sbuf1 + j, stride);
228 int vres = (int)checked(opt, sbuf1 + j, (intptr_t)stride);
229
230 if (cres != vres)
231 {
232 return false;
233 }
234
235 reportfail();
236 j += INCR;
237 }
238
239 return true;
240}
241
242bool PixelHarness::check_weightp(weightp_sp_t ref, weightp_sp_t opt)
243{
244 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
245 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
246
247 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
248 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
249 int j = 0;
250 int width = 2 * (rand() % 32 + 1);
251 int height = 8;
252 int w0 = rand() % 128;
253 int shift = rand() % 15;
254 int round = shift ? (1 << (shift - 1)) : 0;
255 int offset = (rand() % 256) - 128;
256 intptr_t stride = 64;
257 for (int i = 0; i < ITERS; i++)
258 {
259 int index = i % TEST_CASES;
260 checked(opt, short_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
261 ref(short_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
262
263 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
264 return false;
265
266 reportfail();
267 j += INCR;
268 }
269
270 return true;
271}
272
273bool PixelHarness::check_weightp(weightp_pp_t ref, weightp_pp_t opt)
274{
275 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
276 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
277
278 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
279 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
280 int j = 0;
281 int width = 16 * (rand() % 4 + 1);
282 int height = 8;
283 int w0 = rand() % 128;
284 int shift = rand() % 15;
285 int round = shift ? (1 << (shift - 1)) : 0;
286 int offset = (rand() % 256) - 128;
287 intptr_t stride = 64;
288 for (int i = 0; i < ITERS; i++)
289 {
290 int index = i % TEST_CASES;
291 checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
292 ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
293
294 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
295 return false;
296
297 reportfail();
298 j += INCR;
299 }
300
301 return true;
302}
303
304bool PixelHarness::check_downscale_t(downscale_t ref, downscale_t opt)
305{
306 ALIGN_VAR_16(pixel, ref_destf[32 * 32]);
307 ALIGN_VAR_16(pixel, opt_destf[32 * 32]);
308
309 ALIGN_VAR_16(pixel, ref_desth[32 * 32]);
310 ALIGN_VAR_16(pixel, opt_desth[32 * 32]);
311
312 ALIGN_VAR_16(pixel, ref_destv[32 * 32]);
313 ALIGN_VAR_16(pixel, opt_destv[32 * 32]);
314
315 ALIGN_VAR_16(pixel, ref_destc[32 * 32]);
316 ALIGN_VAR_16(pixel, opt_destc[32 * 32]);
317
318 intptr_t src_stride = 64;
319 intptr_t dst_stride = 32;
320 int bx = 32;
321 int by = 32;
322 int j = 0;
323 for (int i = 0; i < ITERS; i++)
324 {
325 int index = i % TEST_CASES;
326 ref(pixel_test_buff[index] + j, ref_destf, ref_desth, ref_destv,
327 ref_destc, src_stride, dst_stride, bx, by);
328 checked(opt, pixel_test_buff[index] + j, opt_destf, opt_desth, opt_destv,
329 opt_destc, src_stride, dst_stride, bx, by);
330
331 if (memcmp(ref_destf, opt_destf, 32 * 32 * sizeof(pixel)))
332 return false;
333 if (memcmp(ref_desth, opt_desth, 32 * 32 * sizeof(pixel)))
334 return false;
335 if (memcmp(ref_destv, opt_destv, 32 * 32 * sizeof(pixel)))
336 return false;
337 if (memcmp(ref_destc, opt_destc, 32 * 32 * sizeof(pixel)))
338 return false;
339
340 reportfail();
341 j += INCR;
342 }
343
344 return true;
345}
346
347bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
348{
349 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
350 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
351
352 memset(ref_dest, 0xCD, sizeof(ref_dest));
353 memset(opt_dest, 0xCD, sizeof(opt_dest));
354
355 int j = 0;
356 intptr_t stride = STRIDE;
357 for (int i = 0; i < ITERS; i++)
358 {
359 int shift = (rand() % 7 + 1);
360
361 int index = i % TEST_CASES;
362 checked(opt, opt_dest, int_test_buff[index] + j, stride, shift, (int)STRIDE);
363 ref(ref_dest, int_test_buff[index] + j, stride, shift, (int)STRIDE);
364
365 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
366 return false;
367
368 reportfail();
369 j += INCR;
370 }
371
372 return true;
373}
374
375bool PixelHarness::check_cvt16to32_shl_t(cvt16to32_shl_t ref, cvt16to32_shl_t opt)
376{
377 ALIGN_VAR_16(int32_t, ref_dest[64 * 64]);
378 ALIGN_VAR_16(int32_t, opt_dest[64 * 64]);
379
380 int j = 0;
381 intptr_t stride = STRIDE;
382 for (int i = 0; i < ITERS; i++)
383 {
384 int shift = (rand() % 7 + 1);
385
386 int index = i % TEST_CASES;
387 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift, (int)stride);
388 ref(ref_dest, short_test_buff[index] + j, stride, shift, (int)stride);
389
390 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int32_t)))
391 return false;
392
393 reportfail();
394 j += INCR;
395 }
396
397 return true;
398}
399
400bool PixelHarness::check_cvt16to32_shr_t(cvt16to32_shr_t ref, cvt16to32_shr_t opt)
401{
402 ALIGN_VAR_16(int32_t, ref_dest[64 * 64]);
403 ALIGN_VAR_16(int32_t, opt_dest[64 * 64]);
404
405 memset(ref_dest, 0xCD, sizeof(ref_dest));
406 memset(opt_dest, 0xCD, sizeof(opt_dest));
407
408 int j = 0;
409 intptr_t stride = STRIDE;
410 for (int i = 0; i < ITERS; i++)
411 {
412 int shift = (rand() % 7 + 1);
413
414 int index = i % TEST_CASES;
415 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift, (int)stride);
416 ref(ref_dest, short_test_buff[index] + j, stride, shift, (int)stride);
417
418 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int32_t)))
419 return false;
420
421 reportfail();
422 j += INCR;
423 }
424
425 return true;
426}
427
428bool PixelHarness::check_cvt32to16_shl_t(cvt32to16_shl_t ref, cvt32to16_shl_t opt)
429{
430 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
431 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
432
433 memset(ref_dest, 0xCD, sizeof(ref_dest));
434 memset(opt_dest, 0xCD, sizeof(opt_dest));
435
436 int j = 0;
437 intptr_t stride = STRIDE;
438 for (int i = 0; i < ITERS; i++)
439 {
440 int shift = (rand() % 7 + 1);
441
442 int index = i % TEST_CASES;
443 checked(opt, opt_dest, int_test_buff[index] + j, stride, shift);
444 ref(ref_dest, int_test_buff[index] + j, stride, shift);
445
446 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
447 return false;
448
449 reportfail();
450 j += INCR;
451 }
452
453 return true;
454}
455
456bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt)
457{
458 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
459 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
460
461 memset(ref_dest, 0xCD, sizeof(ref_dest));
462 memset(opt_dest, 0xCD, sizeof(opt_dest));
463
464 int j = 0;
465 intptr_t stride = STRIDE;
466 for (int i = 0; i < ITERS; i++)
467 {
468 int index = i % TEST_CASES;
469 int opt_cnt = (int)checked(opt, opt_dest, short_test_buff1[index] + j, stride);
470 int ref_cnt = ref(ref_dest, short_test_buff1[index] + j, stride);
471
472 if ((ref_cnt != opt_cnt) || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
473 return false;
474
475 reportfail();
476 j += INCR;
477 }
478
479 return true;
480}
481
482bool PixelHarness::check_copy_shr_t(copy_shr_t ref, copy_shr_t opt)
483{
484 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
485 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
486
487 memset(ref_dest, 0xCD, sizeof(ref_dest));
488 memset(opt_dest, 0xCD, sizeof(opt_dest));
489
490 int j = 0;
491 intptr_t stride = STRIDE;
492 for (int i = 0; i < ITERS; i++)
493 {
494 int shift = (rand() % 7 + 1);
495
496 int index = i % TEST_CASES;
497 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift, (int)STRIDE);
498 ref(ref_dest, short_test_buff[index] + j, stride, shift, (int)STRIDE);
499
500 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
501 return false;
502
503 reportfail();
504 j += INCR;
505 }
506
507 return true;
508}
509
510bool PixelHarness::check_copy_shl_t(copy_shl_t ref, copy_shl_t opt)
511{
512 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
513 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
514
515 memset(ref_dest, 0xCD, sizeof(ref_dest));
516 memset(opt_dest, 0xCD, sizeof(opt_dest));
517
518 int j = 0;
519 intptr_t stride = STRIDE;
520 for (int i = 0; i < ITERS; i++)
521 {
522 int shift = (rand() % 7 + 1);
523
524 int index = i % TEST_CASES;
525 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
526 ref(ref_dest, short_test_buff[index] + j, stride, shift);
527
528 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
529 return false;
530
531 reportfail();
532 j += INCR;
533 }
534
535 return true;
536}
537
538bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
539{
540 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
541 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
542
543 int j = 0;
544
545 memset(ref_dest, 0xCD, sizeof(ref_dest));
546 memset(opt_dest, 0xCD, sizeof(opt_dest));
547
548 intptr_t stride = STRIDE;
549 for (int i = 0; i < ITERS; i++)
550 {
551 int index1 = rand() % TEST_CASES;
552 int index2 = rand() % TEST_CASES;
553 checked(ref, ref_dest, stride, pixel_test_buff[index1] + j,
554 stride, pixel_test_buff[index2] + j, stride, 32);
555 opt(opt_dest, stride, pixel_test_buff[index1] + j,
556 stride, pixel_test_buff[index2] + j, stride, 32);
557
558 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
559 return false;
560
561 reportfail();
562 j += INCR;
563 }
564
565 return true;
566}
567
568bool PixelHarness::check_copy_pp(copy_pp_t ref, copy_pp_t opt)
569{
570 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
571 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
572
573 // we don't know the partition size so we are checking the entire output buffer so
574 // we must initialize the buffers
575 memset(ref_dest, 0, sizeof(ref_dest));
576 memset(opt_dest, 0, sizeof(opt_dest));
577
578 int j = 0;
579 intptr_t stride = STRIDE;
580 for (int i = 0; i < ITERS; i++)
581 {
582 int index = i % TEST_CASES;
583 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
584 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
585
586 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
587 return false;
588
589 reportfail();
590 j += INCR;
591 }
592
593 return true;
594}
595
596bool PixelHarness::check_copy_sp(copy_sp_t ref, copy_sp_t opt)
597{
598 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
599 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
600
601 // we don't know the partition size so we are checking the entire output buffer so
602 // we must initialize the buffers
603 memset(ref_dest, 0xCD, sizeof(ref_dest));
604 memset(opt_dest, 0xCD, sizeof(opt_dest));
605
606 int j = 0;
607 intptr_t stride1 = 64, stride2 = STRIDE;
608 for (int i = 0; i < ITERS; i++)
609 {
610 int index = i % TEST_CASES;
611 checked(opt, opt_dest, stride1, short_test_buff1[index] + j, stride2);
612 ref(ref_dest, stride1, short_test_buff1[index] + j, stride2);
613
614 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
615 return false;
616
617 reportfail();
618 j += INCR;
619 }
620
621 return true;
622}
623
624bool PixelHarness::check_copy_ps(copy_ps_t ref, copy_ps_t opt)
625{
626 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
627 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
628
629 // we don't know the partition size so we are checking the entire output buffer so
630 // we must initialize the buffers
631 memset(ref_dest, 0xCD, sizeof(ref_dest));
632 memset(opt_dest, 0xCD, sizeof(opt_dest));
633
634 int j = 0;
635 intptr_t stride = STRIDE;
636 for (int i = 0; i < ITERS; i++)
637 {
638 int index = i % TEST_CASES;
639 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
640 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
641
642 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
643 return false;
644
645 reportfail();
646 j += INCR;
647 }
648
649 return true;
650}
651
652bool PixelHarness::check_copy_ss(copy_ss_t ref, copy_ss_t opt)
653{
654 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
655 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
656
657 // we don't know the partition size so we are checking the entire output buffer so
658 // we must initialize the buffers
659 memset(ref_dest, 0xCD, sizeof(ref_dest));
660 memset(opt_dest, 0xCD, sizeof(opt_dest));
661
662 int j = 0;
663 intptr_t stride = STRIDE;
664 for (int i = 0; i < ITERS; i++)
665 {
666 int index = i % TEST_CASES;
667 checked(opt, opt_dest, stride, short_test_buff1[index] + j, stride);
668 ref(ref_dest, stride, short_test_buff1[index] + j, stride);
669
670 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
671 return false;
672
673 reportfail();
674 j += INCR;
675 }
676
677 return true;
678}
679
680bool PixelHarness::check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt)
681{
682 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
683 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
684
685 memset(ref_dest, 0xCD, sizeof(ref_dest));
686 memset(opt_dest, 0xCD, sizeof(opt_dest));
687
688 intptr_t stride = 64;
689 for (int i = 0; i < ITERS; i++)
690 {
691 int16_t value = (rand() % SHORT_MAX) + 1;
692
693 checked(opt, opt_dest, stride, value);
694 ref(ref_dest, stride, value);
695
696 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
697 return false;
698
699 reportfail();
700 }
701
702 return true;
703}
704
705bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt)
706{
707 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
708 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
709
710 memset(ref_dest, 0xCD, sizeof(ref_dest));
711 memset(opt_dest, 0xCD, sizeof(opt_dest));
712
713 int j = 0;
714 intptr_t stride2 = 64, stride = STRIDE;
715 for (int i = 0; i < 1; i++)
716 {
717 int index1 = rand() % TEST_CASES;
718 int index2 = rand() % TEST_CASES;
719 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j,
720 pixel_test_buff[index2] + j, stride, stride);
721 ref(ref_dest, stride2, pixel_test_buff[index1] + j,
722 pixel_test_buff[index2] + j, stride, stride);
723
724 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
725 return false;
726
727 reportfail();
728 j += INCR;
729 }
730
731 return true;
732}
733
734bool PixelHarness::check_scale_pp(scale_t ref, scale_t opt)
735{
736 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
737 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
738
739 memset(ref_dest, 0, sizeof(ref_dest));
740 memset(opt_dest, 0, sizeof(opt_dest));
741
742 int j = 0;
743 intptr_t stride = STRIDE;
744 for (int i = 0; i < ITERS; i++)
745 {
746 int index = i % TEST_CASES;
747 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
748 ref(ref_dest, pixel_test_buff[index] + j, stride);
749
750 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
751 return false;
752
753 reportfail();
754 j += INCR;
755 }
756
757 return true;
758}
759
760bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt)
761{
762 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
763 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
764
765 memset(ref_dest, 0, sizeof(ref_dest));
766 memset(opt_dest, 0, sizeof(opt_dest));
767
768 int j = 0;
769 intptr_t stride = STRIDE;
770 for (int i = 0; i < ITERS; i++)
771 {
772 int index = i % TEST_CASES;
773 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
774 ref(ref_dest, pixel_test_buff[index] + j, stride);
775
776 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
777 return false;
778
779 reportfail();
780 j += INCR;
781 }
782
783 return true;
784}
785
786bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt)
787{
788 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
789 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
790
791 memset(ref_dest, 0xCD, sizeof(ref_dest));
792 memset(opt_dest, 0xCD, sizeof(opt_dest));
793
794 int j = 0;
795 intptr_t stride2 = 64, stride = STRIDE;
796 for (int i = 0; i < ITERS; i++)
797 {
798 int index1 = rand() % TEST_CASES;
799 int index2 = rand() % TEST_CASES;
800 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
801 ref(ref_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
802
803 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
804 return false;
805
806 reportfail();
807 j += INCR;
808 }
809
810 return true;
811}
812
813bool PixelHarness::check_pixel_var(var_t ref, var_t opt)
814{
815 int j = 0;
816
817 intptr_t stride = STRIDE;
818
819 for (int i = 0; i < ITERS; i++)
820 {
821 int index = i % TEST_CASES;
822 uint64_t vres = checked(opt, pixel_test_buff[index], stride);
823 uint64_t cres = ref(pixel_test_buff[index], stride);
824 if (vres != cres)
825 return false;
826
827 reportfail();
828 j += INCR;
829 }
830
831 return true;
832}
833
834bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt)
835{
836 ALIGN_VAR_32(int, sum0[2][4]);
837 ALIGN_VAR_32(int, sum1[2][4]);
838
839 for (int i = 0; i < ITERS; i++)
840 {
841 intptr_t stride = rand() % 64;
842 int index1 = rand() % TEST_CASES;
843 int index2 = rand() % TEST_CASES;
844 ref(pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum0);
845 checked(opt, pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum1);
846
847 if (memcmp(sum0, sum1, sizeof(sum0)))
848 return false;
849
850 reportfail();
851 }
852
853 return true;
854}
855
856/* TODO: This function causes crashes when checked. Is this a real bug? */
857bool PixelHarness::check_ssim_end(ssim_end4_t ref, ssim_end4_t opt)
858{
859 ALIGN_VAR_32(int, sum0[5][4]);
860 ALIGN_VAR_32(int, sum1[5][4]);
861
862 for (int i = 0; i < ITERS; i++)
863 {
864 for (int j = 0; j < 5; j++)
865 {
866 for (int k = 0; k < 4; k++)
867 {
868 sum0[j][k] = rand() % (1 << 12);
869 sum1[j][k] = rand() % (1 << 12);
870 }
871 }
872
873 int width = (rand() % 4) + 1; // range[1-4]
874 float cres = ref(sum0, sum1, width);
875 float vres = checked_float(opt, sum0, sum1, width);
876 if (fabs(vres - cres) > 0.00001)
877 return false;
878
879 reportfail();
880 }
881
882 return true;
883}
884
885bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt)
886{
887 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
888 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
889
890 int j = 0;
891
892 memset(ref_dest, 0xCD, sizeof(ref_dest));
893 memset(opt_dest, 0xCD, sizeof(opt_dest));
894 intptr_t stride = STRIDE;
895
896 for (int i = 0; i < ITERS; i++)
897 {
898 int index1 = rand() % TEST_CASES;
899 int index2 = rand() % TEST_CASES;
900 ref(short_test_buff2[index1] + j, short_test_buff2[index2] + j, ref_dest, stride, stride, stride);
901 checked(opt, short_test_buff2[index1] + j, short_test_buff2[index2] + j, opt_dest, stride, stride, stride);
902 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
903 return false;
904
905 reportfail();
906 j += INCR;
907 }
908
909 return true;
910}
911
912bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt)
913{
914 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
915 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
916
917 memset(ref_dest, 0xCD, sizeof(ref_dest));
918 memset(opt_dest, 0xCD, sizeof(opt_dest));
919
920 int j = 0;
921
922 for (int i = 0; i < ITERS; i++)
923 {
924 int width = 16 * (rand() % 4 + 1);
925 int8_t sign = rand() % 3;
926 if (sign == 2)
927 {
928 sign = -1;
929 }
930
931 ref(ref_dest, psbuf1 + j, width, sign);
932 checked(opt, opt_dest, psbuf1 + j, width, sign);
933
934 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
935 return false;
936
937 reportfail();
938 j += INCR;
939 }
940
941 return true;
942}
943
944bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt)
945{
946 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
947 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
948
949 memset(ref_dest, 0xCD, sizeof(ref_dest));
950 memset(opt_dest, 0xCD, sizeof(opt_dest));
951
952 int width = 16 + rand() % 48;
953 int height = 16 + rand() % 48;
954 intptr_t srcStride = 64;
955 intptr_t dstStride = width;
956 int j = 0;
957
958 for (int i = 0; i < ITERS; i++)
959 {
960 int index = i % TEST_CASES;
961 checked(opt, ushort_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)8, (uint16_t)255);
962 ref(ushort_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)8, (uint16_t)255);
963
964 if (memcmp(ref_dest, opt_dest, width * height * sizeof(pixel)))
965 return false;
966
967 reportfail();
968 j += INCR;
969 }
970
971 return true;
972}
973
974bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt)
975{
976 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
977 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
978
979 memset(ref_dest, 0xCD, sizeof(ref_dest));
980 memset(opt_dest, 0xCD, sizeof(opt_dest));
981
982 int width = 16 + rand() % 48;
983 int height = 16 + rand() % 48;
984 intptr_t srcStride = 64;
985 intptr_t dstStride = width;
986 int j = 0;
987
988 for (int i = 0; i < ITERS; i++)
989 {
990 int index = i % TEST_CASES;
991 checked(opt, uchar_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)2);
992 ref(uchar_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)2);
993
994 if (memcmp(ref_dest, opt_dest, width * height * sizeof(pixel)))
995 return false;
996
997 reportfail();
998 j += INCR;
999 }
1000
1001 return true;
1002}
1003
1004bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1005{
1006 if (opt.satd[part])
1007 {
1008 if (!check_pixelcmp(ref.satd[part], opt.satd[part]))
1009 {
1010 printf("satd[%s]: failed!\n", lumaPartStr[part]);
1011 return false;
1012 }
1013 }
1014
1015 if (opt.sa8d_inter[part])
1016 {
1017 if (!check_pixelcmp(ref.sa8d_inter[part], opt.sa8d_inter[part]))
1018 {
1019 printf("sa8d_inter[%s]: failed!\n", lumaPartStr[part]);
1020 return false;
1021 }
1022 }
1023
1024 if (opt.sad[part])
1025 {
1026 if (!check_pixelcmp(ref.sad[part], opt.sad[part]))
1027 {
1028 printf("sad[%s]: failed!\n", lumaPartStr[part]);
1029 return false;
1030 }
1031 }
1032
1033 if (opt.sse_pp[part])
1034 {
1035 if (!check_pixelcmp(ref.sse_pp[part], opt.sse_pp[part]))
1036 {
1037 printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);
1038 return false;
1039 }
1040 }
1041
1042 if (opt.sse_sp[part])
1043 {
1044 if (!check_pixelcmp_sp(ref.sse_sp[part], opt.sse_sp[part]))
1045 {
1046 printf("sse_sp[%s]: failed!\n", lumaPartStr[part]);
1047 return false;
1048 }
1049 }
1050
1051 if (opt.sse_ss[part])
1052 {
1053 if (!check_pixelcmp_ss(ref.sse_ss[part], opt.sse_ss[part]))
1054 {
1055 printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);
1056 return false;
1057 }
1058 }
1059
1060 if (opt.sad_x3[part])
1061 {
1062 if (!check_pixelcmp_x3(ref.sad_x3[part], opt.sad_x3[part]))
1063 {
1064 printf("sad_x3[%s]: failed!\n", lumaPartStr[part]);
1065 return false;
1066 }
1067 }
1068
1069 if (opt.sad_x4[part])
1070 {
1071 if (!check_pixelcmp_x4(ref.sad_x4[part], opt.sad_x4[part]))
1072 {
1073 printf("sad_x4[%s]: failed!\n", lumaPartStr[part]);
1074 return false;
1075 }
1076 }
1077
1078 if (opt.pixelavg_pp[part])
1079 {
1080 if (!check_pixelavg_pp(ref.pixelavg_pp[part], opt.pixelavg_pp[part]))
1081 {
1082 printf("pixelavg_pp[%s]: failed!\n", lumaPartStr[part]);
1083 return false;
1084 }
1085 }
1086
1087 if (opt.luma_copy_pp[part])
1088 {
1089 if (!check_copy_pp(ref.luma_copy_pp[part], opt.luma_copy_pp[part]))
1090 {
1091 printf("luma_copy_pp[%s] failed\n", lumaPartStr[part]);
1092 return false;
1093 }
1094 }
1095
1096 if (opt.luma_copy_sp[part])
1097 {
1098 if (!check_copy_sp(ref.luma_copy_sp[part], opt.luma_copy_sp[part]))
1099 {
1100 printf("luma_copy_sp[%s] failed\n", lumaPartStr[part]);
1101 return false;
1102 }
1103 }
1104
1105 if (opt.luma_copy_ps[part])
1106 {
1107 if (!check_copy_ps(ref.luma_copy_ps[part], opt.luma_copy_ps[part]))
1108 {
1109 printf("luma_copy_ps[%s] failed\n", lumaPartStr[part]);
1110 return false;
1111 }
1112 }
1113
1114 if (opt.luma_copy_ss[part])
1115 {
1116 if (!check_copy_ss(ref.luma_copy_ss[part], opt.luma_copy_ss[part]))
1117 {
1118 printf("luma_copy_ss[%s] failed\n", lumaPartStr[part]);
1119 return false;
1120 }
1121 }
1122
1123 if (opt.luma_addAvg[part])
1124 {
1125 if (!check_addAvg(ref.luma_addAvg[part], opt.luma_addAvg[part]))
1126 {
1127 printf("luma_addAvg[%s] failed\n", lumaPartStr[part]);
1128 return false;
1129 }
1130 }
1131
1132 if (part < NUM_SQUARE_BLOCKS)
1133 {
1134 if (opt.luma_sub_ps[part])
1135 {
1136 if (!check_pixel_sub_ps(ref.luma_sub_ps[part], opt.luma_sub_ps[part]))
1137 {
1138 printf("luma_sub_ps[%s] failed\n", lumaPartStr[part]);
1139 return false;
1140 }
1141 }
1142
1143 if (opt.luma_add_ps[part])
1144 {
1145 if (!check_pixel_add_ps(ref.luma_add_ps[part], opt.luma_add_ps[part]))
1146 {
1147 printf("luma_add_ps[%s] failed\n", lumaPartStr[part]);
1148 return false;
1149 }
1150 }
1151 }
1152
1153 for (int i = 0; i < X265_CSP_COUNT; i++)
1154 {
1155 if (opt.chroma[i].copy_pp[part])
1156 {
1157 if (!check_copy_pp(ref.chroma[i].copy_pp[part], opt.chroma[i].copy_pp[part]))
1158 {
1159 printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1160 return false;
1161 }
1162 }
1163 if (opt.chroma[i].copy_sp[part])
1164 {
1165 if (!check_copy_sp(ref.chroma[i].copy_sp[part], opt.chroma[i].copy_sp[part]))
1166 {
1167 printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1168 return false;
1169 }
1170 }
1171 if (opt.chroma[i].copy_ps[part])
1172 {
1173 if (!check_copy_ps(ref.chroma[i].copy_ps[part], opt.chroma[i].copy_ps[part]))
1174 {
1175 printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1176 return false;
1177 }
1178 }
1179 if (opt.chroma[i].copy_ss[part])
1180 {
1181 if (!check_copy_ss(ref.chroma[i].copy_ss[part], opt.chroma[i].copy_ss[part]))
1182 {
1183 printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1184 return false;
1185 }
1186 }
1187 if (opt.chroma[i].addAvg[part])
1188 {
1189 if (!check_addAvg(ref.chroma[i].addAvg[part], opt.chroma[i].addAvg[part]))
1190 {
1191 printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1192 return false;
1193 }
1194 }
1195 if (part < NUM_SQUARE_BLOCKS)
1196 {
1197 if (opt.chroma[i].sub_ps[part])
1198 {
1199 if (!check_pixel_sub_ps(ref.chroma[i].sub_ps[part], opt.chroma[i].sub_ps[part]))
1200 {
1201 printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1202 return false;
1203 }
1204 }
1205 if (opt.chroma[i].add_ps[part])
1206 {
1207 if (!check_pixel_add_ps(ref.chroma[i].add_ps[part], opt.chroma[i].add_ps[part]))
1208 {
1209 printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1210 return false;
1211 }
1212 }
1213 }
1214 }
1215
1216 return true;
1217}
1218
1219bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1220{
1221 for (int size = 4; size <= 64; size *= 2)
1222 {
1223 int part = partitionFromSizes(size, size); // 2Nx2N
1224 if (!testPartition(part, ref, opt)) return false;
1225
1226 if (size > 4)
1227 {
1228 part = partitionFromSizes(size, size >> 1); // 2NxN
1229 if (!testPartition(part, ref, opt)) return false;
1230 part = partitionFromSizes(size >> 1, size); // Nx2N
1231 if (!testPartition(part, ref, opt)) return false;
1232 }
1233 if (size > 8)
1234 {
1235 // 4 AMP modes
1236 part = partitionFromSizes(size, size >> 2);
1237 if (!testPartition(part, ref, opt)) return false;
1238 part = partitionFromSizes(size, 3 * (size >> 2));
1239 if (!testPartition(part, ref, opt)) return false;
1240
1241 part = partitionFromSizes(size >> 2, size);
1242 if (!testPartition(part, ref, opt)) return false;
1243 part = partitionFromSizes(3 * (size >> 2), size);
1244 if (!testPartition(part, ref, opt)) return false;
1245 }
1246 }
1247
1248 for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
1249 {
1250 if (opt.calcresidual[i])
1251 {
1252 if (!check_calresidual(ref.calcresidual[i], opt.calcresidual[i]))
1253 {
1254 printf("calcresidual width: %d failed!\n", 4 << i);
1255 return false;
1256 }
1257 }
1258 if (opt.sa8d[i])
1259 {
1260 if (!check_pixelcmp(ref.sa8d[i], opt.sa8d[i]))
1261 {
1262 printf("sa8d[%dx%d]: failed!\n", 4 << i, 4 << i);
1263 return false;
1264 }
1265 }
1266
1267 if ((i <= BLOCK_32x32) && opt.ssd_s[i])
1268 {
1269 if (!check_ssd_s(ref.ssd_s[i], opt.ssd_s[i]))
1270 {
1271 printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
1272 return false;
1273 }
1274 }
1275
1276 if (opt.blockfill_s[i])
1277 {
1278 if (!check_blockfill_s(ref.blockfill_s[i], opt.blockfill_s[i]))
1279 {
1280 printf("blockfill_s[%dx%d]: failed!\n", 4 << i, 4 << i);
1281 return false;
1282 }
1283 }
1284 if (opt.transpose[i])
1285 {
1286 if (!check_transpose(ref.transpose[i], opt.transpose[i]))
1287 {
1288 printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
1289 return false;
1290 }
1291 }
1292
1293 if (opt.var[i])
1294 {
1295 if (!check_pixel_var(ref.var[i], opt.var[i]))
1296 {
1297 printf("var[%dx%d] failed\n", 4 << i, 4 << i);
1298 return false;
1299 }
1300 }
1301
1302 if ((i < BLOCK_64x64) && opt.copy_cnt[i])
1303 {
1304 if (!check_copy_cnt_t(ref.copy_cnt[i], opt.copy_cnt[i]))
1305 {
1306 printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
1307 return false;
1308 }
1309 }
1310
1311 if ((i < BLOCK_64x64) && opt.cvt16to32_shr[i])
1312 {
1313 if (!check_cvt16to32_shr_t(ref.cvt16to32_shr[i], opt.cvt16to32_shr[i]))
1314 {
1315 printf("cvt16to32_shr failed!\n");
1316 return false;
1317 }
1318 }
1319
1320 if ((i < BLOCK_64x64) && opt.cvt32to16_shl[i])
1321 {
1322 if (!check_cvt32to16_shl_t(ref.cvt32to16_shl[i], opt.cvt32to16_shl[i]))
1323 {
1324 printf("cvt32to16_shl failed!\n");
1325 return false;
1326 }
1327 }
1328
1329 if ((i < BLOCK_64x64) && opt.copy_shl[i])
1330 {
1331 if (!check_copy_shl_t(ref.copy_shl[i], opt.copy_shl[i]))
1332 {
1333 printf("copy_shl[%dx%d] failed!\n", 4 << i, 4 << i);
1334 return false;
1335 }
1336 }
1337
1338 }
1339
1340 if (opt.cvt32to16_shr)
1341 {
1342 if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
1343 {
1344 printf("cvt32to16 failed!\n");
1345 return false;
1346 }
1347 }
1348
1349 if (opt.cvt16to32_shl)
1350 {
1351 if (!check_cvt16to32_shl_t(ref.cvt16to32_shl, opt.cvt16to32_shl))
1352 {
1353 printf("cvt16to32_shl failed!\n");
1354 return false;
1355 }
1356 }
1357
1358 if (opt.weight_pp)
1359 {
1360 if (!check_weightp(ref.weight_pp, opt.weight_pp))
1361 {
1362 printf("Weighted Prediction (pixel) failed!\n");
1363 return false;
1364 }
1365 }
1366
1367 if (opt.weight_sp)
1368 {
1369 if (!check_weightp(ref.weight_sp, opt.weight_sp))
1370 {
1371 printf("Weighted Prediction (short) failed!\n");
1372 return false;
1373 }
1374 }
1375
1376 if (opt.frame_init_lowres_core)
1377 {
1378 if (!check_downscale_t(ref.frame_init_lowres_core, opt.frame_init_lowres_core))
1379 {
1380 printf("downscale failed!\n");
1381 return false;
1382 }
1383 }
1384
1385 if (opt.scale1D_128to64)
1386 {
1387 if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
1388 {
1389 printf("scale1D_128to64 failed!\n");
1390 return false;
1391 }
1392 }
1393
1394 if (opt.scale2D_64to32)
1395 {
1396 if (!check_scale_pp(ref.scale2D_64to32, opt.scale2D_64to32))
1397 {
1398 printf("scale2D_64to32 failed!\n");
1399 return false;
1400 }
1401 }
1402
1403 if (opt.ssim_4x4x2_core)
1404 {
1405 if (!check_ssim_4x4x2_core(ref.ssim_4x4x2_core, opt.ssim_4x4x2_core))
1406 {
1407 printf("ssim_end_4 failed!\n");
1408 return false;
1409 }
1410 }
1411
1412 if (opt.ssim_end_4)
1413 {
1414 if (!check_ssim_end(ref.ssim_end_4, opt.ssim_end_4))
1415 {
1416 printf("ssim_end_4 failed!\n");
1417 return false;
1418 }
1419 }
1420
1421 if (opt.saoCuOrgE0)
1422 {
1423 if (!check_saoCuOrgE0_t(ref.saoCuOrgE0, opt.saoCuOrgE0))
1424 {
1425 printf("SAO_EO_0 failed\n");
1426 return false;
1427 }
1428 }
1429
1430 if (opt.planecopy_sp)
1431 {
1432 if (!check_planecopy_sp(ref.planecopy_sp, opt.planecopy_sp))
1433 {
1434 printf("planecopy_sp failed\n");
1435 return false;
1436 }
1437 }
1438
1439 if (opt.planecopy_cp)
1440 {
1441 if (!check_planecopy_cp(ref.planecopy_cp, opt.planecopy_cp))
1442 {
1443 printf("planecopy_cp failed\n");
1444 return false;
1445 }
1446 }
1447
1448 if (opt.copy_shr)
1449 {
1450 if (!check_copy_shr_t(ref.copy_shr, opt.copy_shr))
1451 {
1452 printf("copy_shr failed!\n");
1453 return false;
1454 }
1455 }
1456
1457 return true;
1458}
1459
1460void PixelHarness::measurePartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1461{
1462 ALIGN_VAR_16(int, cres[16]);
1463 pixel *fref = pbuf2 + 2 * INCR;
1464 char header[128];
1465#define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1466
1467 if (opt.satd[part])
1468 {
1469 HEADER("satd[%s]", lumaPartStr[part]);
1470 REPORT_SPEEDUP(opt.satd[part], ref.satd[part], pbuf1, STRIDE, fref, STRIDE);
1471 }
1472
1473 if (opt.pixelavg_pp[part])
1474 {
1475 HEADER("avg_pp[%s]", lumaPartStr[part]);
1476 REPORT_SPEEDUP(opt.pixelavg_pp[part], ref.pixelavg_pp[part], pbuf1, STRIDE, pbuf2, STRIDE, pbuf3, STRIDE, 32);
1477 }
1478
1479 if (opt.sa8d_inter[part])
1480 {
1481 HEADER("sa8d[%s]", lumaPartStr[part]);
1482 REPORT_SPEEDUP(opt.sa8d_inter[part], ref.sa8d_inter[part], pbuf1, STRIDE, fref, STRIDE);
1483 }
1484
1485 if (opt.sad[part])
1486 {
1487 HEADER("sad[%s]", lumaPartStr[part]);
1488 REPORT_SPEEDUP(opt.sad[part], ref.sad[part], pbuf1, STRIDE, fref, STRIDE);
1489 }
1490
1491 if (opt.sad_x3[part])
1492 {
1493 HEADER("sad_x3[%s]", lumaPartStr[part]);
1494 REPORT_SPEEDUP(opt.sad_x3[part], ref.sad_x3[part], pbuf1, fref, fref + 1, fref - 1, FENC_STRIDE + 5, &cres[0]);
1495 }
1496
1497 if (opt.sad_x4[part])
1498 {
1499 HEADER("sad_x4[%s]", lumaPartStr[part]);
1500 REPORT_SPEEDUP(opt.sad_x4[part], ref.sad_x4[part], pbuf1, fref, fref + 1, fref - 1, fref - INCR, FENC_STRIDE + 5, &cres[0]);
1501 }
1502
1503 if (opt.sse_pp[part])
1504 {
1505 HEADER("sse_pp[%s]", lumaPartStr[part]);
1506 REPORT_SPEEDUP(opt.sse_pp[part], ref.sse_pp[part], pbuf1, STRIDE, fref, STRIDE);
1507 }
1508
1509 if (opt.sse_sp[part])
1510 {
1511 HEADER("sse_sp[%s]", lumaPartStr[part]);
1512 REPORT_SPEEDUP(opt.sse_sp[part], ref.sse_sp[part], (int16_t*)pbuf1, STRIDE, fref, STRIDE);
1513 }
1514
1515 if (opt.sse_ss[part])
1516 {
1517 HEADER("sse_ss[%s]", lumaPartStr[part]);
1518 REPORT_SPEEDUP(opt.sse_ss[part], ref.sse_ss[part], (int16_t*)pbuf1, STRIDE, (int16_t*)fref, STRIDE);
1519 }
1520
1521 if (opt.luma_copy_pp[part])
1522 {
1523 HEADER("luma_copy_pp[%s]", lumaPartStr[part]);
1524 REPORT_SPEEDUP(opt.luma_copy_pp[part], ref.luma_copy_pp[part], pbuf1, 64, pbuf2, 128);
1525 }
1526
1527 if (opt.luma_copy_sp[part])
1528 {
1529 HEADER("luma_copy_sp[%s]", lumaPartStr[part]);
1530 REPORT_SPEEDUP(opt.luma_copy_sp[part], ref.luma_copy_sp[part], pbuf1, 64, sbuf3, 128);
1531 }
1532
1533 if (opt.luma_copy_ps[part])
1534 {
1535 HEADER("luma_copy_ps[%s]", lumaPartStr[part]);
1536 REPORT_SPEEDUP(opt.luma_copy_ps[part], ref.luma_copy_ps[part], sbuf1, 64, pbuf1, 128);
1537 }
1538 if (opt.luma_copy_ss[part])
1539 {
1540 HEADER("luma_copy_ss[%s]", lumaPartStr[part]);
1541 REPORT_SPEEDUP(opt.luma_copy_ss[part], ref.luma_copy_ss[part], sbuf1, 64, sbuf2, 128);
1542 }
1543 if (opt.luma_addAvg[part])
1544 {
1545 HEADER("luma_addAvg[%s]", lumaPartStr[part]);
1546 REPORT_SPEEDUP(opt.luma_addAvg[part], ref.luma_addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
1547 }
1548 if (part < NUM_SQUARE_BLOCKS)
1549 {
1550 if (opt.luma_sub_ps[part])
1551 {
1552 HEADER("luma_sub_ps[%s]", lumaPartStr[part]);
1553 REPORT_SPEEDUP(opt.luma_sub_ps[part], ref.luma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
1554 }
1555 if (opt.luma_add_ps[part])
1556 {
1557 HEADER("luma_add_ps[%s]", lumaPartStr[part]);
1558 REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
1559 }
1560 }
1561
1562 for (int i = 0; i < X265_CSP_COUNT; i++)
1563 {
1564 if (opt.chroma[i].copy_pp[part])
1565 {
1566 HEADER("[%s] copy_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1567 REPORT_SPEEDUP(opt.chroma[i].copy_pp[part], ref.chroma[i].copy_pp[part], pbuf1, 64, pbuf2, 128);
1568 }
1569 if (opt.chroma[i].copy_sp[part])
1570 {
1571 HEADER("[%s] copy_sp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1572 REPORT_SPEEDUP(opt.chroma[i].copy_sp[part], ref.chroma[i].copy_sp[part], pbuf1, 64, sbuf3, 128);
1573 }
1574 if (opt.chroma[i].copy_ps[part])
1575 {
1576 HEADER("[%s] copy_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1577 REPORT_SPEEDUP(opt.chroma[i].copy_ps[part], ref.chroma[i].copy_ps[part], sbuf1, 64, pbuf1, 128);
1578 }
1579 if (opt.chroma[i].copy_ss[part])
1580 {
1581 HEADER("[%s] copy_ss[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1582 REPORT_SPEEDUP(opt.chroma[i].copy_ss[part], ref.chroma[i].copy_ss[part], sbuf1, 64, sbuf2, 128);
1583 }
1584 if (opt.chroma[i].addAvg[part])
1585 {
1586 HEADER("[%s] addAvg[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1587 REPORT_SPEEDUP(opt.chroma[i].addAvg[part], ref.chroma[i].addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
1588 }
1589 if (part < NUM_SQUARE_BLOCKS)
1590 {
1591 if (opt.chroma[i].sub_ps[part])
1592 {
1593 HEADER("[%s] sub_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1594 REPORT_SPEEDUP(opt.chroma[i].sub_ps[part], ref.chroma[i].sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
1595 }
1596 if (opt.chroma[i].add_ps[part])
1597 {
1598 HEADER("[%s] add_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1599 REPORT_SPEEDUP(opt.chroma[i].add_ps[part], ref.chroma[i].add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
1600 }
1601 }
1602 }
1603
1604#undef HEADER
1605}
1606
1607void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1608{
1609 char header[128];
1610
1611#define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1612#define HEADER0(str) printf("%22s", str);
1613
1614 for (int size = 4; size <= 64; size *= 2)
1615 {
1616 int part = partitionFromSizes(size, size); // 2Nx2N
1617 measurePartition(part, ref, opt);
1618
1619 if (size > 4)
1620 {
1621 part = partitionFromSizes(size, size >> 1); // 2NxN
1622 measurePartition(part, ref, opt);
1623 part = partitionFromSizes(size >> 1, size); // Nx2N
1624 measurePartition(part, ref, opt);
1625 }
1626 if (size > 8)
1627 {
1628 // 4 AMP modes
1629 part = partitionFromSizes(size, size >> 2);
1630 measurePartition(part, ref, opt);
1631 part = partitionFromSizes(size, 3 * (size >> 2));
1632 measurePartition(part, ref, opt);
1633
1634 part = partitionFromSizes(size >> 2, size);
1635 measurePartition(part, ref, opt);
1636 part = partitionFromSizes(3 * (size >> 2), size);
1637 measurePartition(part, ref, opt);
1638 }
1639 }
1640
1641 for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
1642 {
1643 if ((i <= BLOCK_32x32) && opt.ssd_s[i])
1644 {
1645 HEADER("ssd_s[%dx%d]", 4 << i, 4 << i);
1646 REPORT_SPEEDUP(opt.ssd_s[i], ref.ssd_s[i], sbuf1, STRIDE);
1647 }
1648 if (opt.sa8d[i])
1649 {
1650 HEADER("sa8d[%dx%d]", 4 << i, 4 << i);
1651 REPORT_SPEEDUP(opt.sa8d[i], ref.sa8d[i], pbuf1, STRIDE, pbuf2, STRIDE);
1652 }
1653 if (opt.calcresidual[i])
1654 {
1655 HEADER("residual[%dx%d]", 4 << i, 4 << i);
1656 REPORT_SPEEDUP(opt.calcresidual[i], ref.calcresidual[i], pbuf1, pbuf2, sbuf1, 64);
1657 }
1658
1659 if (opt.blockfill_s[i])
1660 {
1661 HEADER("blkfill[%dx%d]", 4 << i, 4 << i);
1662 REPORT_SPEEDUP(opt.blockfill_s[i], ref.blockfill_s[i], sbuf1, 64, SHORT_MAX);
1663 }
1664
1665 if (opt.transpose[i])
1666 {
1667 HEADER("transpose[%dx%d]", 4 << i, 4 << i);
1668 REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1, pbuf2, STRIDE);
1669 }
1670
1671 if (opt.var[i])
1672 {
1673 HEADER("var[%dx%d]", 4 << i, 4 << i);
1674 REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
1675 }
1676
1677 if ((i < BLOCK_64x64) && opt.cvt16to32_shr[i])
1678 {
1679 HEADER("cvt16to32_shr[%dx%d]", 4 << i, 4 << i);
1680 REPORT_SPEEDUP(opt.cvt16to32_shr[i], ref.cvt16to32_shr[i], ibuf1, sbuf2, STRIDE, 3, 4);
1681 }
1682
1683 if ((i < BLOCK_64x64) && opt.cvt32to16_shl[i])
1684 {
1685 HEADER("cvt32to16_shl[%dx%d]", 4 << i, 4 << i);
1686 REPORT_SPEEDUP(opt.cvt32to16_shl[i], ref.cvt32to16_shl[i], sbuf2, ibuf1, STRIDE, 3);
1687 }
1688
1689 if ((i < BLOCK_64x64) && opt.copy_cnt[i])
1690 {
1691 HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i);
1692 REPORT_SPEEDUP(opt.copy_cnt[i], ref.copy_cnt[i], sbuf1, sbuf2, STRIDE);
1693 }
1694
1695 if ((i < BLOCK_64x64) && opt.copy_shl[i])
1696 {
1697 HEADER("copy_shl[%dx%d]", 4 << i, 4 << i);
1698 REPORT_SPEEDUP(opt.copy_shl[i], ref.copy_shl[i], sbuf1, sbuf2, STRIDE, 64);
1699 }
1700
1701 }
1702
1703 if (opt.cvt32to16_shr)
1704 {
1705 HEADER0("cvt32to16_shr");
1706 REPORT_SPEEDUP(opt.cvt32to16_shr, ref.cvt32to16_shr, sbuf1, ibuf1, 64, 5, 64);
1707 }
1708
1709 if (opt.cvt16to32_shl)
1710 {
1711 HEADER0("cvt16to32_shl");
1712 REPORT_SPEEDUP(opt.cvt16to32_shl, ref.cvt16to32_shl, ibuf1, sbuf1, 64, 5, 64);
1713 }
1714
1715 if (opt.weight_pp)
1716 {
1717 HEADER0("weight_pp");
1718 REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
1719 }
1720
1721 if (opt.weight_sp)
1722 {
1723 HEADER0("weight_sp");
1724 REPORT_SPEEDUP(opt.weight_sp, ref.weight_sp, (int16_t*)sbuf1, pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
1725 }
1726
1727 if (opt.frame_init_lowres_core)
1728 {
1729 HEADER0("downscale");
1730 REPORT_SPEEDUP(opt.frame_init_lowres_core, ref.frame_init_lowres_core, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64);
1731 }
1732
1733 if (opt.scale1D_128to64)
1734 {
1735 HEADER0("scale1D_128to64");
1736 REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
1737 }
1738
1739 if (opt.scale2D_64to32)
1740 {
1741 HEADER0("scale2D_64to32");
1742 REPORT_SPEEDUP(opt.scale2D_64to32, ref.scale2D_64to32, pbuf2, pbuf1, 64);
1743 }
1744
1745 if (opt.ssim_4x4x2_core)
1746 {
1747 HEADER0("ssim_4x4x2_core");
1748 REPORT_SPEEDUP(opt.ssim_4x4x2_core, ref.ssim_4x4x2_core, pbuf1, 64, pbuf2, 64, (int(*)[4])sbuf1);
1749 }
1750
1751 if (opt.ssim_end_4)
1752 {
1753 HEADER0("ssim_end_4");
1754 REPORT_SPEEDUP(opt.ssim_end_4, ref.ssim_end_4, (int(*)[4])pbuf2, (int(*)[4])pbuf1, 4);
1755 }
1756
1757 if (opt.saoCuOrgE0)
1758 {
1759 HEADER0("SAO_EO_0");
1760 REPORT_SPEEDUP(opt.saoCuOrgE0, ref.saoCuOrgE0, pbuf1, psbuf1, 64, 1);
1761 }
1762
1763 if (opt.planecopy_sp)
1764 {
1765 HEADER0("planecopy_sp");
1766 REPORT_SPEEDUP(opt.planecopy_sp, ref.planecopy_sp, ushort_test_buff[0], 64, pbuf1, 64, 64, 64, 8, 255);
1767 }
1768
1769 if (opt.planecopy_cp)
1770 {
1771 HEADER0("planecopy_cp");
1772 REPORT_SPEEDUP(opt.planecopy_cp, ref.planecopy_cp, uchar_test_buff[0], 64, pbuf1, 64, 64, 64, 2);
1773 }
1774
1775 if (opt.copy_shr)
1776 {
1777 HEADER0("copy_shr");
1778 REPORT_SPEEDUP(opt.copy_shr, ref.copy_shr, sbuf1, sbuf2, 64, 5, 64);
1779 }
1780
1781}