Update patch for ARM processors.
[deb_x265.git] / source / test / pixelharness.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#include "pixelharness.h"
25#include "primitives.h"
26
27using namespace x265;
28
29PixelHarness::PixelHarness()
30{
31 /* [0] --- Random values
32 * [1] --- Minimum
33 * [2] --- Maximum */
34 for (int i = 0; i < BUFFSIZE; i++)
35 {
36 pixel_test_buff[0][i] = rand() % PIXEL_MAX;
37 short_test_buff[0][i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; // max(SHORT_MIN, min(rand(), SMAX));
38 short_test_buff1[0][i] = rand() & PIXEL_MAX; // For block copy only
39 short_test_buff2[0][i] = rand() % 16383; // for addAvg
40 int_test_buff[0][i] = rand() % SHORT_MAX;
41 ushort_test_buff[0][i] = rand() % ((1 << 16) - 1);
42 uchar_test_buff[0][i] = rand() % ((1 << 8) - 1);
43
44 pixel_test_buff[1][i] = PIXEL_MIN;
45 short_test_buff[1][i] = SMIN;
46 short_test_buff1[1][i] = PIXEL_MIN;
47 short_test_buff2[1][i] = -16384;
48 int_test_buff[1][i] = SHORT_MIN;
49 ushort_test_buff[1][i] = PIXEL_MIN;
50 uchar_test_buff[1][i] = PIXEL_MIN;
51
52 pixel_test_buff[2][i] = PIXEL_MAX;
53 short_test_buff[2][i] = SMAX;
54 short_test_buff1[2][i] = PIXEL_MAX;
55 short_test_buff2[2][i] = 16383;
56 int_test_buff[2][i] = SHORT_MAX;
57 ushort_test_buff[2][i] = ((1 << 16) - 1);
58 uchar_test_buff[2][i] = 255;
59
60 pbuf1[i] = rand() & PIXEL_MAX;
61 pbuf2[i] = rand() & PIXEL_MAX;
62 pbuf3[i] = rand() & PIXEL_MAX;
63 pbuf4[i] = rand() & PIXEL_MAX;
64
65 sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
66 sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
67 ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
68 psbuf1[i] = (rand() % 65) - 32; // range is between -32 to 32
69 sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only
70 }
71}
72
73bool PixelHarness::check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt)
74{
75 int j = 0;
76 intptr_t stride = STRIDE;
77
78 for (int i = 0; i < ITERS; i++)
79 {
80 int index1 = rand() % TEST_CASES;
81 int index2 = rand() % TEST_CASES;
82 int vres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
83 int cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
84 if (vres != cres)
85 return false;
86
87 reportfail();
88 j += INCR;
89 }
90
91 return true;
92}
93
94bool PixelHarness::check_pixelcmp_sp(pixelcmp_sp_t ref, pixelcmp_sp_t opt)
95{
96 int j = 0;
97 intptr_t stride = STRIDE;
98
99 for (int i = 0; i < ITERS; i++)
100 {
101 int index1 = rand() % TEST_CASES;
102 int index2 = rand() % TEST_CASES;
103 int vres = (int)checked(opt, short_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
104 int cres = ref(short_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
105 if (vres != cres)
106 return false;
107
108 reportfail();
109 j += INCR;
110 }
111
112 return true;
113}
114
115bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt)
116{
117 int j = 0;
118 intptr_t stride = STRIDE;
119
120 for (int i = 0; i < ITERS; i++)
121 {
122 int index1 = rand() % TEST_CASES;
123 int index2 = rand() % TEST_CASES;
124 int vres = (int)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
125 int cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
126 if (vres != cres)
127 return false;
128
129 reportfail();
130 j += INCR;
131 }
132
133 return true;
134}
135
136bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt)
137{
138 ALIGN_VAR_16(int, cres[16]);
139 ALIGN_VAR_16(int, vres[16]);
140 int j = 0;
141 intptr_t stride = FENC_STRIDE - 5;
142 for (int i = 0; i < ITERS; i++)
143 {
144 int index1 = rand() % TEST_CASES;
145 int index2 = rand() % TEST_CASES;
146 checked(opt, pixel_test_buff[index1],
147 pixel_test_buff[index2] + j,
148 pixel_test_buff[index2] + j + 1,
149 pixel_test_buff[index2] + j + 2, stride, &vres[0]);
150 ref(pixel_test_buff[index1],
151 pixel_test_buff[index2] + j,
152 pixel_test_buff[index2] + j + 1,
153 pixel_test_buff[index2] + j + 2, stride, &cres[0]);
154 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])))
155 return false;
156
157 reportfail();
158 j += INCR;
159 }
160
161 return true;
162}
163
164bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt)
165{
166 ALIGN_VAR_16(int, cres[16]);
167 ALIGN_VAR_16(int, vres[16]);
168 int j = 0;
169 intptr_t stride = FENC_STRIDE - 5;
170 for (int i = 0; i < ITERS; i++)
171 {
172 int index1 = rand() % TEST_CASES;
173 int index2 = rand() % TEST_CASES;
174 checked(opt, pixel_test_buff[index1],
175 pixel_test_buff[index2] + j,
176 pixel_test_buff[index2] + j + 1,
177 pixel_test_buff[index2] + j + 2,
178 pixel_test_buff[index2] + j + 3, stride, &vres[0]);
179 ref(pixel_test_buff[index1],
180 pixel_test_buff[index2] + j,
181 pixel_test_buff[index2] + j + 1,
182 pixel_test_buff[index2] + j + 2,
183 pixel_test_buff[index2] + j + 3, stride, &cres[0]);
184
185 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])) || ((vres[3] != cres[3])))
186 return false;
187
188 reportfail();
189 j += INCR;
190 }
191
192 return true;
193}
194
195bool PixelHarness::check_calresidual(calcresidual_t ref, calcresidual_t opt)
196{
197 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
198 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
199 memset(ref_dest, 0, 64 * 64 * sizeof(int16_t));
200 memset(opt_dest, 0, 64 * 64 * sizeof(int16_t));
201
202 int j = 0;
203 intptr_t stride = STRIDE;
204 for (int i = 0; i < ITERS; i++)
205 {
206 int index = i % TEST_CASES;
207 checked(opt, pbuf1 + j, pixel_test_buff[index] + j, opt_dest, stride);
208 ref(pbuf1 + j, pixel_test_buff[index] + j, ref_dest, stride);
209
210 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
211 return false;
212
213 reportfail();
214 j += INCR;
215 }
216
217 return true;
218}
219
220bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt)
221{
222 int j = 0;
223 for (int i = 0; i < ITERS; i++)
224 {
225 // NOTE: stride must be multiple of 16, because minimum block is 4x4
226 int stride = (STRIDE + (rand() % STRIDE)) & ~15;
227 int cres = ref(sbuf1 + j, stride);
228 int vres = (int)checked(opt, sbuf1 + j, (intptr_t)stride);
229
230 if (cres != vres)
231 {
232 return false;
233 }
234
235 reportfail();
236 j += INCR;
237 }
238
239 return true;
240}
241
242bool PixelHarness::check_weightp(weightp_sp_t ref, weightp_sp_t opt)
243{
244 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
245 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
246
247 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
248 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
249 int j = 0;
250 int width = 2 * (rand() % 32 + 1);
251 int height = 8;
252 int w0 = rand() % 128;
253 int shift = rand() % 15;
254 int round = shift ? (1 << (shift - 1)) : 0;
255 int offset = (rand() % 256) - 128;
256 intptr_t stride = 64;
257 for (int i = 0; i < ITERS; i++)
258 {
259 int index = i % TEST_CASES;
260 checked(opt, short_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
261 ref(short_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
262
263 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
264 return false;
265
266 reportfail();
267 j += INCR;
268 }
269
270 return true;
271}
272
273bool PixelHarness::check_weightp(weightp_pp_t ref, weightp_pp_t opt)
274{
275 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
276 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
277
278 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
279 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
280 int j = 0;
281 int width = 16 * (rand() % 4 + 1);
282 int height = 8;
283 int w0 = rand() % 128;
284 int shift = rand() % 15;
285 int round = shift ? (1 << (shift - 1)) : 0;
286 int offset = (rand() % 256) - 128;
287 intptr_t stride = 64;
288 for (int i = 0; i < ITERS; i++)
289 {
290 int index = i % TEST_CASES;
291 checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
292 ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
293
294 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
295 return false;
296
297 reportfail();
298 j += INCR;
299 }
300
301 return true;
302}
303
304bool PixelHarness::check_downscale_t(downscale_t ref, downscale_t opt)
305{
306 ALIGN_VAR_16(pixel, ref_destf[32 * 32]);
307 ALIGN_VAR_16(pixel, opt_destf[32 * 32]);
308
309 ALIGN_VAR_16(pixel, ref_desth[32 * 32]);
310 ALIGN_VAR_16(pixel, opt_desth[32 * 32]);
311
312 ALIGN_VAR_16(pixel, ref_destv[32 * 32]);
313 ALIGN_VAR_16(pixel, opt_destv[32 * 32]);
314
315 ALIGN_VAR_16(pixel, ref_destc[32 * 32]);
316 ALIGN_VAR_16(pixel, opt_destc[32 * 32]);
317
318 intptr_t src_stride = 64;
319 intptr_t dst_stride = 32;
320 int bx = 32;
321 int by = 32;
322 int j = 0;
323 for (int i = 0; i < ITERS; i++)
324 {
325 int index = i % TEST_CASES;
326 ref(pixel_test_buff[index] + j, ref_destf, ref_desth, ref_destv,
327 ref_destc, src_stride, dst_stride, bx, by);
328 checked(opt, pixel_test_buff[index] + j, opt_destf, opt_desth, opt_destv,
329 opt_destc, src_stride, dst_stride, bx, by);
330
331 if (memcmp(ref_destf, opt_destf, 32 * 32 * sizeof(pixel)))
332 return false;
333 if (memcmp(ref_desth, opt_desth, 32 * 32 * sizeof(pixel)))
334 return false;
335 if (memcmp(ref_destv, opt_destv, 32 * 32 * sizeof(pixel)))
336 return false;
337 if (memcmp(ref_destc, opt_destc, 32 * 32 * sizeof(pixel)))
338 return false;
339
340 reportfail();
341 j += INCR;
342 }
343
344 return true;
345}
346
b53f7c52 347bool PixelHarness::check_cpy2Dto1D_shl_t(cpy2Dto1D_shl_t ref, cpy2Dto1D_shl_t opt)
72b9787e
JB
348{
349 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
350 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
351
352 memset(ref_dest, 0xCD, sizeof(ref_dest));
353 memset(opt_dest, 0xCD, sizeof(opt_dest));
354
355 int j = 0;
356 intptr_t stride = STRIDE;
357 for (int i = 0; i < ITERS; i++)
358 {
359 int shift = (rand() % 7 + 1);
360
361 int index = i % TEST_CASES;
b53f7c52
JB
362 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
363 ref(ref_dest, short_test_buff[index] + j, stride, shift);
72b9787e
JB
364
365 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
366 return false;
367
368 reportfail();
369 j += INCR;
370 }
371
372 return true;
373}
374
b53f7c52 375bool PixelHarness::check_cpy2Dto1D_shr_t(cpy2Dto1D_shr_t ref, cpy2Dto1D_shr_t opt)
72b9787e
JB
376{
377 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
378 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
379
380 memset(ref_dest, 0xCD, sizeof(ref_dest));
381 memset(opt_dest, 0xCD, sizeof(opt_dest));
382
383 int j = 0;
384 intptr_t stride = STRIDE;
385 for (int i = 0; i < ITERS; i++)
386 {
387 int shift = (rand() % 7 + 1);
388
389 int index = i % TEST_CASES;
b53f7c52
JB
390 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
391 ref(ref_dest, short_test_buff[index] + j, stride, shift);
72b9787e
JB
392
393 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
394 return false;
395
396 reportfail();
397 j += INCR;
398 }
399
400 return true;
401}
402
403bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt)
404{
405 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
406 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
407
408 memset(ref_dest, 0xCD, sizeof(ref_dest));
409 memset(opt_dest, 0xCD, sizeof(opt_dest));
410
411 int j = 0;
412 intptr_t stride = STRIDE;
413 for (int i = 0; i < ITERS; i++)
414 {
415 int index = i % TEST_CASES;
416 int opt_cnt = (int)checked(opt, opt_dest, short_test_buff1[index] + j, stride);
417 int ref_cnt = ref(ref_dest, short_test_buff1[index] + j, stride);
418
419 if ((ref_cnt != opt_cnt) || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
420 return false;
421
422 reportfail();
423 j += INCR;
424 }
425
426 return true;
427}
428
b53f7c52 429bool PixelHarness::check_cpy1Dto2D_shl_t(cpy1Dto2D_shl_t ref, cpy1Dto2D_shl_t opt)
72b9787e
JB
430{
431 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
432 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
433
434 memset(ref_dest, 0xCD, sizeof(ref_dest));
435 memset(opt_dest, 0xCD, sizeof(opt_dest));
436
437 int j = 0;
438 intptr_t stride = STRIDE;
439 for (int i = 0; i < ITERS; i++)
440 {
441 int shift = (rand() % 7 + 1);
442
443 int index = i % TEST_CASES;
b53f7c52
JB
444 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
445 ref(ref_dest, short_test_buff[index] + j, stride, shift);
72b9787e
JB
446
447 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
448 return false;
449
450 reportfail();
451 j += INCR;
452 }
453
454 return true;
455}
456
b53f7c52 457bool PixelHarness::check_cpy1Dto2D_shr_t(cpy1Dto2D_shr_t ref, cpy1Dto2D_shr_t opt)
72b9787e
JB
458{
459 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
460 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
461
462 memset(ref_dest, 0xCD, sizeof(ref_dest));
463 memset(opt_dest, 0xCD, sizeof(opt_dest));
464
465 int j = 0;
466 intptr_t stride = STRIDE;
467 for (int i = 0; i < ITERS; i++)
468 {
469 int shift = (rand() % 7 + 1);
470
471 int index = i % TEST_CASES;
472 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
473 ref(ref_dest, short_test_buff[index] + j, stride, shift);
474
475 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
476 return false;
477
478 reportfail();
479 j += INCR;
480 }
481
482 return true;
483}
484
485bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
486{
487 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
488 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
489
490 int j = 0;
491
492 memset(ref_dest, 0xCD, sizeof(ref_dest));
493 memset(opt_dest, 0xCD, sizeof(opt_dest));
494
495 intptr_t stride = STRIDE;
496 for (int i = 0; i < ITERS; i++)
497 {
498 int index1 = rand() % TEST_CASES;
499 int index2 = rand() % TEST_CASES;
500 checked(ref, ref_dest, stride, pixel_test_buff[index1] + j,
501 stride, pixel_test_buff[index2] + j, stride, 32);
502 opt(opt_dest, stride, pixel_test_buff[index1] + j,
503 stride, pixel_test_buff[index2] + j, stride, 32);
504
505 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
506 return false;
507
508 reportfail();
509 j += INCR;
510 }
511
512 return true;
513}
514
515bool PixelHarness::check_copy_pp(copy_pp_t ref, copy_pp_t opt)
516{
517 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
518 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
519
520 // we don't know the partition size so we are checking the entire output buffer so
521 // we must initialize the buffers
522 memset(ref_dest, 0, sizeof(ref_dest));
523 memset(opt_dest, 0, sizeof(opt_dest));
524
525 int j = 0;
526 intptr_t stride = STRIDE;
527 for (int i = 0; i < ITERS; i++)
528 {
529 int index = i % TEST_CASES;
530 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
531 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
532
533 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
534 return false;
535
536 reportfail();
537 j += INCR;
538 }
539
540 return true;
541}
542
543bool PixelHarness::check_copy_sp(copy_sp_t ref, copy_sp_t opt)
544{
545 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
546 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
547
548 // we don't know the partition size so we are checking the entire output buffer so
549 // we must initialize the buffers
550 memset(ref_dest, 0xCD, sizeof(ref_dest));
551 memset(opt_dest, 0xCD, sizeof(opt_dest));
552
553 int j = 0;
554 intptr_t stride1 = 64, stride2 = STRIDE;
555 for (int i = 0; i < ITERS; i++)
556 {
557 int index = i % TEST_CASES;
558 checked(opt, opt_dest, stride1, short_test_buff1[index] + j, stride2);
559 ref(ref_dest, stride1, short_test_buff1[index] + j, stride2);
560
561 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
562 return false;
563
564 reportfail();
565 j += INCR;
566 }
567
568 return true;
569}
570
571bool PixelHarness::check_copy_ps(copy_ps_t ref, copy_ps_t opt)
572{
573 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
574 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
575
576 // we don't know the partition size so we are checking the entire output buffer so
577 // we must initialize the buffers
578 memset(ref_dest, 0xCD, sizeof(ref_dest));
579 memset(opt_dest, 0xCD, sizeof(opt_dest));
580
581 int j = 0;
582 intptr_t stride = STRIDE;
583 for (int i = 0; i < ITERS; i++)
584 {
585 int index = i % TEST_CASES;
586 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
587 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
588
589 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
590 return false;
591
592 reportfail();
593 j += INCR;
594 }
595
596 return true;
597}
598
599bool PixelHarness::check_copy_ss(copy_ss_t ref, copy_ss_t opt)
600{
601 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
602 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
603
604 // we don't know the partition size so we are checking the entire output buffer so
605 // we must initialize the buffers
606 memset(ref_dest, 0xCD, sizeof(ref_dest));
607 memset(opt_dest, 0xCD, sizeof(opt_dest));
608
609 int j = 0;
610 intptr_t stride = STRIDE;
611 for (int i = 0; i < ITERS; i++)
612 {
613 int index = i % TEST_CASES;
614 checked(opt, opt_dest, stride, short_test_buff1[index] + j, stride);
615 ref(ref_dest, stride, short_test_buff1[index] + j, stride);
616
617 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
618 return false;
619
620 reportfail();
621 j += INCR;
622 }
623
624 return true;
625}
626
627bool PixelHarness::check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt)
628{
629 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
630 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
631
632 memset(ref_dest, 0xCD, sizeof(ref_dest));
633 memset(opt_dest, 0xCD, sizeof(opt_dest));
634
635 intptr_t stride = 64;
636 for (int i = 0; i < ITERS; i++)
637 {
638 int16_t value = (rand() % SHORT_MAX) + 1;
639
640 checked(opt, opt_dest, stride, value);
641 ref(ref_dest, stride, value);
642
643 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
644 return false;
645
646 reportfail();
647 }
648
649 return true;
650}
651
652bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt)
653{
654 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
655 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
656
657 memset(ref_dest, 0xCD, sizeof(ref_dest));
658 memset(opt_dest, 0xCD, sizeof(opt_dest));
659
660 int j = 0;
661 intptr_t stride2 = 64, stride = STRIDE;
662 for (int i = 0; i < 1; i++)
663 {
664 int index1 = rand() % TEST_CASES;
665 int index2 = rand() % TEST_CASES;
666 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j,
667 pixel_test_buff[index2] + j, stride, stride);
668 ref(ref_dest, stride2, pixel_test_buff[index1] + j,
669 pixel_test_buff[index2] + j, stride, stride);
670
671 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
672 return false;
673
674 reportfail();
675 j += INCR;
676 }
677
678 return true;
679}
680
681bool PixelHarness::check_scale_pp(scale_t ref, scale_t opt)
682{
683 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
684 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
685
686 memset(ref_dest, 0, sizeof(ref_dest));
687 memset(opt_dest, 0, sizeof(opt_dest));
688
689 int j = 0;
690 intptr_t stride = STRIDE;
691 for (int i = 0; i < ITERS; i++)
692 {
693 int index = i % TEST_CASES;
694 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
695 ref(ref_dest, pixel_test_buff[index] + j, stride);
696
697 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
698 return false;
699
700 reportfail();
701 j += INCR;
702 }
703
704 return true;
705}
706
707bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt)
708{
709 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
710 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
711
712 memset(ref_dest, 0, sizeof(ref_dest));
713 memset(opt_dest, 0, sizeof(opt_dest));
714
715 int j = 0;
716 intptr_t stride = STRIDE;
717 for (int i = 0; i < ITERS; i++)
718 {
719 int index = i % TEST_CASES;
720 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
721 ref(ref_dest, pixel_test_buff[index] + j, stride);
722
723 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
724 return false;
725
726 reportfail();
727 j += INCR;
728 }
729
730 return true;
731}
732
733bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt)
734{
735 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
736 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
737
738 memset(ref_dest, 0xCD, sizeof(ref_dest));
739 memset(opt_dest, 0xCD, sizeof(opt_dest));
740
741 int j = 0;
742 intptr_t stride2 = 64, stride = STRIDE;
743 for (int i = 0; i < ITERS; i++)
744 {
745 int index1 = rand() % TEST_CASES;
746 int index2 = rand() % TEST_CASES;
747 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
748 ref(ref_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
749
750 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
751 return false;
752
753 reportfail();
754 j += INCR;
755 }
756
757 return true;
758}
759
760bool PixelHarness::check_pixel_var(var_t ref, var_t opt)
761{
762 int j = 0;
763
764 intptr_t stride = STRIDE;
765
766 for (int i = 0; i < ITERS; i++)
767 {
768 int index = i % TEST_CASES;
769 uint64_t vres = checked(opt, pixel_test_buff[index], stride);
770 uint64_t cres = ref(pixel_test_buff[index], stride);
771 if (vres != cres)
772 return false;
773
774 reportfail();
775 j += INCR;
776 }
777
778 return true;
779}
780
781bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt)
782{
783 ALIGN_VAR_32(int, sum0[2][4]);
784 ALIGN_VAR_32(int, sum1[2][4]);
785
786 for (int i = 0; i < ITERS; i++)
787 {
788 intptr_t stride = rand() % 64;
789 int index1 = rand() % TEST_CASES;
790 int index2 = rand() % TEST_CASES;
791 ref(pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum0);
792 checked(opt, pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum1);
793
794 if (memcmp(sum0, sum1, sizeof(sum0)))
795 return false;
796
797 reportfail();
798 }
799
800 return true;
801}
802
803/* TODO: This function causes crashes when checked. Is this a real bug? */
804bool PixelHarness::check_ssim_end(ssim_end4_t ref, ssim_end4_t opt)
805{
806 ALIGN_VAR_32(int, sum0[5][4]);
807 ALIGN_VAR_32(int, sum1[5][4]);
808
809 for (int i = 0; i < ITERS; i++)
810 {
811 for (int j = 0; j < 5; j++)
812 {
813 for (int k = 0; k < 4; k++)
814 {
815 sum0[j][k] = rand() % (1 << 12);
816 sum1[j][k] = rand() % (1 << 12);
817 }
818 }
819
820 int width = (rand() % 4) + 1; // range[1-4]
821 float cres = ref(sum0, sum1, width);
822 float vres = checked_float(opt, sum0, sum1, width);
823 if (fabs(vres - cres) > 0.00001)
824 return false;
825
826 reportfail();
827 }
828
829 return true;
830}
831
832bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt)
833{
834 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
835 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
836
837 int j = 0;
838
839 memset(ref_dest, 0xCD, sizeof(ref_dest));
840 memset(opt_dest, 0xCD, sizeof(opt_dest));
841 intptr_t stride = STRIDE;
842
843 for (int i = 0; i < ITERS; i++)
844 {
845 int index1 = rand() % TEST_CASES;
846 int index2 = rand() % TEST_CASES;
847 ref(short_test_buff2[index1] + j, short_test_buff2[index2] + j, ref_dest, stride, stride, stride);
848 checked(opt, short_test_buff2[index1] + j, short_test_buff2[index2] + j, opt_dest, stride, stride, stride);
849 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
850 return false;
851
852 reportfail();
853 j += INCR;
854 }
855
856 return true;
857}
858
859bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt)
860{
861 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
862 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
863
864 memset(ref_dest, 0xCD, sizeof(ref_dest));
865 memset(opt_dest, 0xCD, sizeof(opt_dest));
866
867 int j = 0;
868
869 for (int i = 0; i < ITERS; i++)
870 {
871 int width = 16 * (rand() % 4 + 1);
872 int8_t sign = rand() % 3;
873 if (sign == 2)
874 {
875 sign = -1;
876 }
877
878 ref(ref_dest, psbuf1 + j, width, sign);
879 checked(opt, opt_dest, psbuf1 + j, width, sign);
880
881 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
882 return false;
883
884 reportfail();
885 j += INCR;
886 }
887
888 return true;
889}
890
891bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt)
892{
893 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
894 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
895
896 memset(ref_dest, 0xCD, sizeof(ref_dest));
897 memset(opt_dest, 0xCD, sizeof(opt_dest));
898
899 int width = 16 + rand() % 48;
900 int height = 16 + rand() % 48;
901 intptr_t srcStride = 64;
902 intptr_t dstStride = width;
903 int j = 0;
904
905 for (int i = 0; i < ITERS; i++)
906 {
907 int index = i % TEST_CASES;
908 checked(opt, ushort_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)8, (uint16_t)255);
909 ref(ushort_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)8, (uint16_t)255);
910
911 if (memcmp(ref_dest, opt_dest, width * height * sizeof(pixel)))
912 return false;
913
914 reportfail();
915 j += INCR;
916 }
917
918 return true;
919}
920
921bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt)
922{
923 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
924 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
925
926 memset(ref_dest, 0xCD, sizeof(ref_dest));
927 memset(opt_dest, 0xCD, sizeof(opt_dest));
928
929 int width = 16 + rand() % 48;
930 int height = 16 + rand() % 48;
931 intptr_t srcStride = 64;
932 intptr_t dstStride = width;
933 int j = 0;
934
935 for (int i = 0; i < ITERS; i++)
936 {
937 int index = i % TEST_CASES;
938 checked(opt, uchar_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)2);
939 ref(uchar_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)2);
940
941 if (memcmp(ref_dest, opt_dest, width * height * sizeof(pixel)))
942 return false;
943
944 reportfail();
945 j += INCR;
946 }
947
948 return true;
949}
950
951bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
952{
953 if (opt.satd[part])
954 {
955 if (!check_pixelcmp(ref.satd[part], opt.satd[part]))
956 {
957 printf("satd[%s]: failed!\n", lumaPartStr[part]);
958 return false;
959 }
960 }
961
962 if (opt.sa8d_inter[part])
963 {
964 if (!check_pixelcmp(ref.sa8d_inter[part], opt.sa8d_inter[part]))
965 {
966 printf("sa8d_inter[%s]: failed!\n", lumaPartStr[part]);
967 return false;
968 }
969 }
970
971 if (opt.sad[part])
972 {
973 if (!check_pixelcmp(ref.sad[part], opt.sad[part]))
974 {
975 printf("sad[%s]: failed!\n", lumaPartStr[part]);
976 return false;
977 }
978 }
979
980 if (opt.sse_pp[part])
981 {
982 if (!check_pixelcmp(ref.sse_pp[part], opt.sse_pp[part]))
983 {
984 printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);
985 return false;
986 }
987 }
988
989 if (opt.sse_sp[part])
990 {
991 if (!check_pixelcmp_sp(ref.sse_sp[part], opt.sse_sp[part]))
992 {
993 printf("sse_sp[%s]: failed!\n", lumaPartStr[part]);
994 return false;
995 }
996 }
997
998 if (opt.sse_ss[part])
999 {
1000 if (!check_pixelcmp_ss(ref.sse_ss[part], opt.sse_ss[part]))
1001 {
1002 printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);
1003 return false;
1004 }
1005 }
1006
1007 if (opt.sad_x3[part])
1008 {
1009 if (!check_pixelcmp_x3(ref.sad_x3[part], opt.sad_x3[part]))
1010 {
1011 printf("sad_x3[%s]: failed!\n", lumaPartStr[part]);
1012 return false;
1013 }
1014 }
1015
1016 if (opt.sad_x4[part])
1017 {
1018 if (!check_pixelcmp_x4(ref.sad_x4[part], opt.sad_x4[part]))
1019 {
1020 printf("sad_x4[%s]: failed!\n", lumaPartStr[part]);
1021 return false;
1022 }
1023 }
1024
1025 if (opt.pixelavg_pp[part])
1026 {
1027 if (!check_pixelavg_pp(ref.pixelavg_pp[part], opt.pixelavg_pp[part]))
1028 {
1029 printf("pixelavg_pp[%s]: failed!\n", lumaPartStr[part]);
1030 return false;
1031 }
1032 }
1033
1034 if (opt.luma_copy_pp[part])
1035 {
1036 if (!check_copy_pp(ref.luma_copy_pp[part], opt.luma_copy_pp[part]))
1037 {
1038 printf("luma_copy_pp[%s] failed\n", lumaPartStr[part]);
1039 return false;
1040 }
1041 }
1042
1043 if (opt.luma_copy_sp[part])
1044 {
1045 if (!check_copy_sp(ref.luma_copy_sp[part], opt.luma_copy_sp[part]))
1046 {
1047 printf("luma_copy_sp[%s] failed\n", lumaPartStr[part]);
1048 return false;
1049 }
1050 }
1051
1052 if (opt.luma_copy_ps[part])
1053 {
1054 if (!check_copy_ps(ref.luma_copy_ps[part], opt.luma_copy_ps[part]))
1055 {
1056 printf("luma_copy_ps[%s] failed\n", lumaPartStr[part]);
1057 return false;
1058 }
1059 }
1060
1061 if (opt.luma_copy_ss[part])
1062 {
1063 if (!check_copy_ss(ref.luma_copy_ss[part], opt.luma_copy_ss[part]))
1064 {
1065 printf("luma_copy_ss[%s] failed\n", lumaPartStr[part]);
1066 return false;
1067 }
1068 }
1069
1070 if (opt.luma_addAvg[part])
1071 {
1072 if (!check_addAvg(ref.luma_addAvg[part], opt.luma_addAvg[part]))
1073 {
1074 printf("luma_addAvg[%s] failed\n", lumaPartStr[part]);
1075 return false;
1076 }
1077 }
1078
1079 if (part < NUM_SQUARE_BLOCKS)
1080 {
1081 if (opt.luma_sub_ps[part])
1082 {
1083 if (!check_pixel_sub_ps(ref.luma_sub_ps[part], opt.luma_sub_ps[part]))
1084 {
1085 printf("luma_sub_ps[%s] failed\n", lumaPartStr[part]);
1086 return false;
1087 }
1088 }
1089
1090 if (opt.luma_add_ps[part])
1091 {
1092 if (!check_pixel_add_ps(ref.luma_add_ps[part], opt.luma_add_ps[part]))
1093 {
1094 printf("luma_add_ps[%s] failed\n", lumaPartStr[part]);
1095 return false;
1096 }
1097 }
1098 }
1099
1100 for (int i = 0; i < X265_CSP_COUNT; i++)
1101 {
1102 if (opt.chroma[i].copy_pp[part])
1103 {
1104 if (!check_copy_pp(ref.chroma[i].copy_pp[part], opt.chroma[i].copy_pp[part]))
1105 {
1106 printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1107 return false;
1108 }
1109 }
1110 if (opt.chroma[i].copy_sp[part])
1111 {
1112 if (!check_copy_sp(ref.chroma[i].copy_sp[part], opt.chroma[i].copy_sp[part]))
1113 {
1114 printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1115 return false;
1116 }
1117 }
1118 if (opt.chroma[i].copy_ps[part])
1119 {
1120 if (!check_copy_ps(ref.chroma[i].copy_ps[part], opt.chroma[i].copy_ps[part]))
1121 {
1122 printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1123 return false;
1124 }
1125 }
1126 if (opt.chroma[i].copy_ss[part])
1127 {
1128 if (!check_copy_ss(ref.chroma[i].copy_ss[part], opt.chroma[i].copy_ss[part]))
1129 {
1130 printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1131 return false;
1132 }
1133 }
1134 if (opt.chroma[i].addAvg[part])
1135 {
1136 if (!check_addAvg(ref.chroma[i].addAvg[part], opt.chroma[i].addAvg[part]))
1137 {
1138 printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1139 return false;
1140 }
1141 }
1142 if (part < NUM_SQUARE_BLOCKS)
1143 {
1144 if (opt.chroma[i].sub_ps[part])
1145 {
1146 if (!check_pixel_sub_ps(ref.chroma[i].sub_ps[part], opt.chroma[i].sub_ps[part]))
1147 {
1148 printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1149 return false;
1150 }
1151 }
1152 if (opt.chroma[i].add_ps[part])
1153 {
1154 if (!check_pixel_add_ps(ref.chroma[i].add_ps[part], opt.chroma[i].add_ps[part]))
1155 {
1156 printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
1157 return false;
1158 }
1159 }
1160 }
1161 }
1162
1163 return true;
1164}
1165
1166bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1167{
1168 for (int size = 4; size <= 64; size *= 2)
1169 {
1170 int part = partitionFromSizes(size, size); // 2Nx2N
1171 if (!testPartition(part, ref, opt)) return false;
1172
1173 if (size > 4)
1174 {
1175 part = partitionFromSizes(size, size >> 1); // 2NxN
1176 if (!testPartition(part, ref, opt)) return false;
1177 part = partitionFromSizes(size >> 1, size); // Nx2N
1178 if (!testPartition(part, ref, opt)) return false;
1179 }
1180 if (size > 8)
1181 {
1182 // 4 AMP modes
1183 part = partitionFromSizes(size, size >> 2);
1184 if (!testPartition(part, ref, opt)) return false;
1185 part = partitionFromSizes(size, 3 * (size >> 2));
1186 if (!testPartition(part, ref, opt)) return false;
1187
1188 part = partitionFromSizes(size >> 2, size);
1189 if (!testPartition(part, ref, opt)) return false;
1190 part = partitionFromSizes(3 * (size >> 2), size);
1191 if (!testPartition(part, ref, opt)) return false;
1192 }
1193 }
1194
1195 for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
1196 {
1197 if (opt.calcresidual[i])
1198 {
1199 if (!check_calresidual(ref.calcresidual[i], opt.calcresidual[i]))
1200 {
1201 printf("calcresidual width: %d failed!\n", 4 << i);
1202 return false;
1203 }
1204 }
1205 if (opt.sa8d[i])
1206 {
1207 if (!check_pixelcmp(ref.sa8d[i], opt.sa8d[i]))
1208 {
1209 printf("sa8d[%dx%d]: failed!\n", 4 << i, 4 << i);
1210 return false;
1211 }
1212 }
1213
1214 if ((i <= BLOCK_32x32) && opt.ssd_s[i])
1215 {
1216 if (!check_ssd_s(ref.ssd_s[i], opt.ssd_s[i]))
1217 {
1218 printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
1219 return false;
1220 }
1221 }
1222
1223 if (opt.blockfill_s[i])
1224 {
1225 if (!check_blockfill_s(ref.blockfill_s[i], opt.blockfill_s[i]))
1226 {
1227 printf("blockfill_s[%dx%d]: failed!\n", 4 << i, 4 << i);
1228 return false;
1229 }
1230 }
1231 if (opt.transpose[i])
1232 {
1233 if (!check_transpose(ref.transpose[i], opt.transpose[i]))
1234 {
1235 printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
1236 return false;
1237 }
1238 }
1239
1240 if (opt.var[i])
1241 {
1242 if (!check_pixel_var(ref.var[i], opt.var[i]))
1243 {
1244 printf("var[%dx%d] failed\n", 4 << i, 4 << i);
1245 return false;
1246 }
1247 }
1248
1249 if ((i < BLOCK_64x64) && opt.copy_cnt[i])
1250 {
1251 if (!check_copy_cnt_t(ref.copy_cnt[i], opt.copy_cnt[i]))
1252 {
1253 printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
1254 return false;
1255 }
1256 }
1257
b53f7c52 1258 if ((i < BLOCK_64x64) && opt.cpy2Dto1D_shl[i])
72b9787e 1259 {
b53f7c52 1260 if (!check_cpy2Dto1D_shl_t(ref.cpy2Dto1D_shl[i], opt.cpy2Dto1D_shl[i]))
72b9787e 1261 {
b53f7c52 1262 printf("cpy2Dto1D_shl failed!\n");
72b9787e
JB
1263 return false;
1264 }
1265 }
1266
b53f7c52 1267 if ((i < BLOCK_64x64) && opt.cpy2Dto1D_shr[i])
72b9787e 1268 {
b53f7c52 1269 if (!check_cpy2Dto1D_shr_t(ref.cpy2Dto1D_shr[i], opt.cpy2Dto1D_shr[i]))
72b9787e 1270 {
b53f7c52 1271 printf("cpy2Dto1D_shr failed!\n");
72b9787e
JB
1272 return false;
1273 }
1274 }
1275
b53f7c52 1276 if ((i < BLOCK_64x64) && opt.cpy1Dto2D_shl[i])
72b9787e 1277 {
b53f7c52 1278 if (!check_cpy1Dto2D_shl_t(ref.cpy1Dto2D_shl[i], opt.cpy1Dto2D_shl[i]))
72b9787e 1279 {
b53f7c52 1280 printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
72b9787e
JB
1281 return false;
1282 }
1283 }
1284
b53f7c52 1285 if ((i < BLOCK_64x64) && opt.cpy1Dto2D_shr[i])
72b9787e 1286 {
b53f7c52
JB
1287 if (!check_cpy1Dto2D_shr_t(ref.cpy1Dto2D_shr[i], opt.cpy1Dto2D_shr[i]))
1288 {
1289 printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
1290 return false;
1291 }
72b9787e
JB
1292 }
1293 }
1294
1295 if (opt.weight_pp)
1296 {
1297 if (!check_weightp(ref.weight_pp, opt.weight_pp))
1298 {
1299 printf("Weighted Prediction (pixel) failed!\n");
1300 return false;
1301 }
1302 }
1303
1304 if (opt.weight_sp)
1305 {
1306 if (!check_weightp(ref.weight_sp, opt.weight_sp))
1307 {
1308 printf("Weighted Prediction (short) failed!\n");
1309 return false;
1310 }
1311 }
1312
b53f7c52 1313 if (opt.frameInitLowres)
72b9787e 1314 {
b53f7c52 1315 if (!check_downscale_t(ref.frameInitLowres, opt.frameInitLowres))
72b9787e
JB
1316 {
1317 printf("downscale failed!\n");
1318 return false;
1319 }
1320 }
1321
1322 if (opt.scale1D_128to64)
1323 {
1324 if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
1325 {
1326 printf("scale1D_128to64 failed!\n");
1327 return false;
1328 }
1329 }
1330
1331 if (opt.scale2D_64to32)
1332 {
1333 if (!check_scale_pp(ref.scale2D_64to32, opt.scale2D_64to32))
1334 {
1335 printf("scale2D_64to32 failed!\n");
1336 return false;
1337 }
1338 }
1339
1340 if (opt.ssim_4x4x2_core)
1341 {
1342 if (!check_ssim_4x4x2_core(ref.ssim_4x4x2_core, opt.ssim_4x4x2_core))
1343 {
1344 printf("ssim_end_4 failed!\n");
1345 return false;
1346 }
1347 }
1348
1349 if (opt.ssim_end_4)
1350 {
1351 if (!check_ssim_end(ref.ssim_end_4, opt.ssim_end_4))
1352 {
1353 printf("ssim_end_4 failed!\n");
1354 return false;
1355 }
1356 }
1357
1358 if (opt.saoCuOrgE0)
1359 {
1360 if (!check_saoCuOrgE0_t(ref.saoCuOrgE0, opt.saoCuOrgE0))
1361 {
1362 printf("SAO_EO_0 failed\n");
1363 return false;
1364 }
1365 }
1366
1367 if (opt.planecopy_sp)
1368 {
1369 if (!check_planecopy_sp(ref.planecopy_sp, opt.planecopy_sp))
1370 {
1371 printf("planecopy_sp failed\n");
1372 return false;
1373 }
1374 }
1375
1376 if (opt.planecopy_cp)
1377 {
1378 if (!check_planecopy_cp(ref.planecopy_cp, opt.planecopy_cp))
1379 {
1380 printf("planecopy_cp failed\n");
1381 return false;
1382 }
1383 }
1384
72b9787e
JB
1385 return true;
1386}
1387
1388void PixelHarness::measurePartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1389{
1390 ALIGN_VAR_16(int, cres[16]);
1391 pixel *fref = pbuf2 + 2 * INCR;
1392 char header[128];
1393#define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1394
1395 if (opt.satd[part])
1396 {
1397 HEADER("satd[%s]", lumaPartStr[part]);
1398 REPORT_SPEEDUP(opt.satd[part], ref.satd[part], pbuf1, STRIDE, fref, STRIDE);
1399 }
1400
1401 if (opt.pixelavg_pp[part])
1402 {
1403 HEADER("avg_pp[%s]", lumaPartStr[part]);
1404 REPORT_SPEEDUP(opt.pixelavg_pp[part], ref.pixelavg_pp[part], pbuf1, STRIDE, pbuf2, STRIDE, pbuf3, STRIDE, 32);
1405 }
1406
1407 if (opt.sa8d_inter[part])
1408 {
1409 HEADER("sa8d[%s]", lumaPartStr[part]);
1410 REPORT_SPEEDUP(opt.sa8d_inter[part], ref.sa8d_inter[part], pbuf1, STRIDE, fref, STRIDE);
1411 }
1412
1413 if (opt.sad[part])
1414 {
1415 HEADER("sad[%s]", lumaPartStr[part]);
1416 REPORT_SPEEDUP(opt.sad[part], ref.sad[part], pbuf1, STRIDE, fref, STRIDE);
1417 }
1418
1419 if (opt.sad_x3[part])
1420 {
1421 HEADER("sad_x3[%s]", lumaPartStr[part]);
1422 REPORT_SPEEDUP(opt.sad_x3[part], ref.sad_x3[part], pbuf1, fref, fref + 1, fref - 1, FENC_STRIDE + 5, &cres[0]);
1423 }
1424
1425 if (opt.sad_x4[part])
1426 {
1427 HEADER("sad_x4[%s]", lumaPartStr[part]);
1428 REPORT_SPEEDUP(opt.sad_x4[part], ref.sad_x4[part], pbuf1, fref, fref + 1, fref - 1, fref - INCR, FENC_STRIDE + 5, &cres[0]);
1429 }
1430
1431 if (opt.sse_pp[part])
1432 {
1433 HEADER("sse_pp[%s]", lumaPartStr[part]);
1434 REPORT_SPEEDUP(opt.sse_pp[part], ref.sse_pp[part], pbuf1, STRIDE, fref, STRIDE);
1435 }
1436
1437 if (opt.sse_sp[part])
1438 {
1439 HEADER("sse_sp[%s]", lumaPartStr[part]);
1440 REPORT_SPEEDUP(opt.sse_sp[part], ref.sse_sp[part], (int16_t*)pbuf1, STRIDE, fref, STRIDE);
1441 }
1442
1443 if (opt.sse_ss[part])
1444 {
1445 HEADER("sse_ss[%s]", lumaPartStr[part]);
1446 REPORT_SPEEDUP(opt.sse_ss[part], ref.sse_ss[part], (int16_t*)pbuf1, STRIDE, (int16_t*)fref, STRIDE);
1447 }
1448
1449 if (opt.luma_copy_pp[part])
1450 {
1451 HEADER("luma_copy_pp[%s]", lumaPartStr[part]);
1452 REPORT_SPEEDUP(opt.luma_copy_pp[part], ref.luma_copy_pp[part], pbuf1, 64, pbuf2, 128);
1453 }
1454
1455 if (opt.luma_copy_sp[part])
1456 {
1457 HEADER("luma_copy_sp[%s]", lumaPartStr[part]);
1458 REPORT_SPEEDUP(opt.luma_copy_sp[part], ref.luma_copy_sp[part], pbuf1, 64, sbuf3, 128);
1459 }
1460
1461 if (opt.luma_copy_ps[part])
1462 {
1463 HEADER("luma_copy_ps[%s]", lumaPartStr[part]);
1464 REPORT_SPEEDUP(opt.luma_copy_ps[part], ref.luma_copy_ps[part], sbuf1, 64, pbuf1, 128);
1465 }
1466 if (opt.luma_copy_ss[part])
1467 {
1468 HEADER("luma_copy_ss[%s]", lumaPartStr[part]);
1469 REPORT_SPEEDUP(opt.luma_copy_ss[part], ref.luma_copy_ss[part], sbuf1, 64, sbuf2, 128);
1470 }
1471 if (opt.luma_addAvg[part])
1472 {
1473 HEADER("luma_addAvg[%s]", lumaPartStr[part]);
1474 REPORT_SPEEDUP(opt.luma_addAvg[part], ref.luma_addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
1475 }
1476 if (part < NUM_SQUARE_BLOCKS)
1477 {
1478 if (opt.luma_sub_ps[part])
1479 {
1480 HEADER("luma_sub_ps[%s]", lumaPartStr[part]);
1481 REPORT_SPEEDUP(opt.luma_sub_ps[part], ref.luma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
1482 }
1483 if (opt.luma_add_ps[part])
1484 {
1485 HEADER("luma_add_ps[%s]", lumaPartStr[part]);
1486 REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
1487 }
1488 }
1489
1490 for (int i = 0; i < X265_CSP_COUNT; i++)
1491 {
1492 if (opt.chroma[i].copy_pp[part])
1493 {
1494 HEADER("[%s] copy_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1495 REPORT_SPEEDUP(opt.chroma[i].copy_pp[part], ref.chroma[i].copy_pp[part], pbuf1, 64, pbuf2, 128);
1496 }
1497 if (opt.chroma[i].copy_sp[part])
1498 {
1499 HEADER("[%s] copy_sp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1500 REPORT_SPEEDUP(opt.chroma[i].copy_sp[part], ref.chroma[i].copy_sp[part], pbuf1, 64, sbuf3, 128);
1501 }
1502 if (opt.chroma[i].copy_ps[part])
1503 {
1504 HEADER("[%s] copy_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1505 REPORT_SPEEDUP(opt.chroma[i].copy_ps[part], ref.chroma[i].copy_ps[part], sbuf1, 64, pbuf1, 128);
1506 }
1507 if (opt.chroma[i].copy_ss[part])
1508 {
1509 HEADER("[%s] copy_ss[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1510 REPORT_SPEEDUP(opt.chroma[i].copy_ss[part], ref.chroma[i].copy_ss[part], sbuf1, 64, sbuf2, 128);
1511 }
1512 if (opt.chroma[i].addAvg[part])
1513 {
1514 HEADER("[%s] addAvg[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1515 REPORT_SPEEDUP(opt.chroma[i].addAvg[part], ref.chroma[i].addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
1516 }
1517 if (part < NUM_SQUARE_BLOCKS)
1518 {
1519 if (opt.chroma[i].sub_ps[part])
1520 {
1521 HEADER("[%s] sub_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1522 REPORT_SPEEDUP(opt.chroma[i].sub_ps[part], ref.chroma[i].sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
1523 }
1524 if (opt.chroma[i].add_ps[part])
1525 {
1526 HEADER("[%s] add_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
1527 REPORT_SPEEDUP(opt.chroma[i].add_ps[part], ref.chroma[i].add_ps[part], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
1528 }
1529 }
1530 }
1531
1532#undef HEADER
1533}
1534
1535void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
1536{
1537 char header[128];
1538
1539#define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
1540#define HEADER0(str) printf("%22s", str);
1541
1542 for (int size = 4; size <= 64; size *= 2)
1543 {
1544 int part = partitionFromSizes(size, size); // 2Nx2N
1545 measurePartition(part, ref, opt);
1546
1547 if (size > 4)
1548 {
1549 part = partitionFromSizes(size, size >> 1); // 2NxN
1550 measurePartition(part, ref, opt);
1551 part = partitionFromSizes(size >> 1, size); // Nx2N
1552 measurePartition(part, ref, opt);
1553 }
1554 if (size > 8)
1555 {
1556 // 4 AMP modes
1557 part = partitionFromSizes(size, size >> 2);
1558 measurePartition(part, ref, opt);
1559 part = partitionFromSizes(size, 3 * (size >> 2));
1560 measurePartition(part, ref, opt);
1561
1562 part = partitionFromSizes(size >> 2, size);
1563 measurePartition(part, ref, opt);
1564 part = partitionFromSizes(3 * (size >> 2), size);
1565 measurePartition(part, ref, opt);
1566 }
1567 }
1568
1569 for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
1570 {
1571 if ((i <= BLOCK_32x32) && opt.ssd_s[i])
1572 {
1573 HEADER("ssd_s[%dx%d]", 4 << i, 4 << i);
1574 REPORT_SPEEDUP(opt.ssd_s[i], ref.ssd_s[i], sbuf1, STRIDE);
1575 }
1576 if (opt.sa8d[i])
1577 {
1578 HEADER("sa8d[%dx%d]", 4 << i, 4 << i);
1579 REPORT_SPEEDUP(opt.sa8d[i], ref.sa8d[i], pbuf1, STRIDE, pbuf2, STRIDE);
1580 }
1581 if (opt.calcresidual[i])
1582 {
1583 HEADER("residual[%dx%d]", 4 << i, 4 << i);
1584 REPORT_SPEEDUP(opt.calcresidual[i], ref.calcresidual[i], pbuf1, pbuf2, sbuf1, 64);
1585 }
1586
1587 if (opt.blockfill_s[i])
1588 {
1589 HEADER("blkfill[%dx%d]", 4 << i, 4 << i);
1590 REPORT_SPEEDUP(opt.blockfill_s[i], ref.blockfill_s[i], sbuf1, 64, SHORT_MAX);
1591 }
1592
1593 if (opt.transpose[i])
1594 {
1595 HEADER("transpose[%dx%d]", 4 << i, 4 << i);
1596 REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1, pbuf2, STRIDE);
1597 }
1598
1599 if (opt.var[i])
1600 {
1601 HEADER("var[%dx%d]", 4 << i, 4 << i);
1602 REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
1603 }
1604
b53f7c52 1605 if ((i < BLOCK_64x64) && opt.cpy2Dto1D_shl[i])
72b9787e 1606 {
b53f7c52
JB
1607 HEADER("cpy2Dto1D_shl[%dx%d]", 4 << i, 4 << i);
1608 REPORT_SPEEDUP(opt.cpy2Dto1D_shl[i], ref.cpy2Dto1D_shl[i], sbuf1, sbuf2, STRIDE, MAX_TR_DYNAMIC_RANGE - X265_DEPTH - (i + 2));
72b9787e
JB
1609 }
1610
b53f7c52 1611 if ((i < BLOCK_64x64) && opt.cpy2Dto1D_shr[i])
72b9787e 1612 {
b53f7c52
JB
1613 HEADER("cpy2Dto1D_shr[%dx%d]", 4 << i, 4 << i);
1614 REPORT_SPEEDUP(opt.cpy2Dto1D_shr[i], ref.cpy2Dto1D_shr[i], sbuf1, sbuf2, STRIDE, 3);
72b9787e
JB
1615 }
1616
b53f7c52 1617 if ((i < BLOCK_64x64) && opt.cpy1Dto2D_shl[i])
72b9787e 1618 {
b53f7c52
JB
1619 HEADER("cpy1Dto2D_shl[%dx%d]", 4 << i, 4 << i);
1620 REPORT_SPEEDUP(opt.cpy1Dto2D_shl[i], ref.cpy1Dto2D_shl[i], sbuf1, sbuf2, STRIDE, 64);
72b9787e
JB
1621 }
1622
b53f7c52 1623 if ((i < BLOCK_64x64) && opt.cpy1Dto2D_shr[i])
72b9787e 1624 {
b53f7c52
JB
1625 HEADER("cpy1Dto2D_shr[%dx%d]", 4 << i, 4 << i);
1626 REPORT_SPEEDUP(opt.cpy1Dto2D_shr[i], ref.cpy1Dto2D_shr[i], sbuf1, sbuf2, STRIDE, 64);
72b9787e
JB
1627 }
1628
b53f7c52
JB
1629 if ((i < BLOCK_64x64) && opt.copy_cnt[i])
1630 {
1631 HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i);
1632 REPORT_SPEEDUP(opt.copy_cnt[i], ref.copy_cnt[i], sbuf1, sbuf2, STRIDE);
1633 }
72b9787e
JB
1634 }
1635
1636 if (opt.weight_pp)
1637 {
1638 HEADER0("weight_pp");
1639 REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
1640 }
1641
1642 if (opt.weight_sp)
1643 {
1644 HEADER0("weight_sp");
1645 REPORT_SPEEDUP(opt.weight_sp, ref.weight_sp, (int16_t*)sbuf1, pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
1646 }
1647
b53f7c52 1648 if (opt.frameInitLowres)
72b9787e
JB
1649 {
1650 HEADER0("downscale");
b53f7c52 1651 REPORT_SPEEDUP(opt.frameInitLowres, ref.frameInitLowres, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64);
72b9787e
JB
1652 }
1653
1654 if (opt.scale1D_128to64)
1655 {
1656 HEADER0("scale1D_128to64");
1657 REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
1658 }
1659
1660 if (opt.scale2D_64to32)
1661 {
1662 HEADER0("scale2D_64to32");
1663 REPORT_SPEEDUP(opt.scale2D_64to32, ref.scale2D_64to32, pbuf2, pbuf1, 64);
1664 }
1665
1666 if (opt.ssim_4x4x2_core)
1667 {
1668 HEADER0("ssim_4x4x2_core");
1669 REPORT_SPEEDUP(opt.ssim_4x4x2_core, ref.ssim_4x4x2_core, pbuf1, 64, pbuf2, 64, (int(*)[4])sbuf1);
1670 }
1671
1672 if (opt.ssim_end_4)
1673 {
1674 HEADER0("ssim_end_4");
1675 REPORT_SPEEDUP(opt.ssim_end_4, ref.ssim_end_4, (int(*)[4])pbuf2, (int(*)[4])pbuf1, 4);
1676 }
1677
1678 if (opt.saoCuOrgE0)
1679 {
1680 HEADER0("SAO_EO_0");
1681 REPORT_SPEEDUP(opt.saoCuOrgE0, ref.saoCuOrgE0, pbuf1, psbuf1, 64, 1);
1682 }
1683
1684 if (opt.planecopy_sp)
1685 {
1686 HEADER0("planecopy_sp");
1687 REPORT_SPEEDUP(opt.planecopy_sp, ref.planecopy_sp, ushort_test_buff[0], 64, pbuf1, 64, 64, 64, 8, 255);
1688 }
1689
1690 if (opt.planecopy_cp)
1691 {
1692 HEADER0("planecopy_cp");
1693 REPORT_SPEEDUP(opt.planecopy_cp, ref.planecopy_cp, uchar_test_buff[0], 64, pbuf1, 64, 64, 64, 2);
1694 }
72b9787e 1695}