1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Deepthi Devaki <deepthidevaki@multicorewareinc.com>,
5 * Rajesh Paulraj <rajesh@multicorewareinc.com>
6 * Praveen Kumar Tiwari <praveen@multicorewareinc.com>
7 * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 * This program is also available under a commercial proprietary license.
24 * For more information, contact us at license @ x265.com.
25 *****************************************************************************/
28 #include "ipfilterharness.h"
32 IPFilterHarness::IPFilterHarness()
34 /* [0] --- Random values
37 for (int i
= 0; i
< TEST_BUF_SIZE
; i
++)
39 pixel_test_buff
[0][i
] = rand() & PIXEL_MAX
;
40 short_test_buff
[0][i
] = (rand() % (2 * SMAX
)) - SMAX
;
42 pixel_test_buff
[1][i
] = PIXEL_MIN
;
43 short_test_buff
[1][i
] = SMIN
;
45 pixel_test_buff
[2][i
] = PIXEL_MAX
;
46 short_test_buff
[2][i
] = SMAX
;
49 memset(IPF_C_output_p
, 0xCD, TEST_BUF_SIZE
* sizeof(pixel
));
50 memset(IPF_vec_output_p
, 0xCD, TEST_BUF_SIZE
* sizeof(pixel
));
51 memset(IPF_C_output_s
, 0xCD, TEST_BUF_SIZE
* sizeof(int16_t));
52 memset(IPF_vec_output_s
, 0xCD, TEST_BUF_SIZE
* sizeof(int16_t));
54 int pixelMax
= (1 << X265_DEPTH
) - 1;
55 int shortMax
= (1 << 15) - 1;
56 for (int i
= 0; i
< TEST_BUF_SIZE
; i
++)
58 pixel_buff
[i
] = (pixel
)(rand() & pixelMax
);
59 int isPositive
= (rand() & 1) ? 1 : -1;
60 short_buff
[i
] = (int16_t)(isPositive
* (rand() & shortMax
));
64 bool IPFilterHarness::check_IPFilter_primitive(filter_p2s_t ref
, filter_p2s_t opt
, int isChroma
, int csp
)
66 intptr_t rand_srcStride
;
67 int min_size
= isChroma
? 2 : 4;
68 int max_size
= isChroma
? (MAX_CU_SIZE
>> 1) : MAX_CU_SIZE
;
70 if (isChroma
&& (csp
== X265_CSP_I444
))
73 max_size
= MAX_CU_SIZE
;
76 for (int i
= 0; i
< ITERS
; i
++)
78 int index
= i
% TEST_CASES
;
79 int rand_height
= (int16_t)rand() % 100;
80 int rand_width
= (int16_t)rand() % 100;
82 rand_srcStride
= rand_width
+ rand() % 100;
83 if (rand_srcStride
< rand_width
)
84 rand_srcStride
= rand_width
;
86 rand_width
&= ~(min_size
- 1);
87 rand_width
= Clip3(min_size
, max_size
, rand_width
);
89 rand_height
&= ~(min_size
- 1);
90 rand_height
= Clip3(min_size
, max_size
, rand_height
);
92 ref(pixel_test_buff
[index
],
98 checked(opt
, pixel_test_buff
[index
],
104 if (memcmp(IPF_vec_output_s
, IPF_C_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
113 bool IPFilterHarness::check_IPFilterChroma_primitive(filter_pp_t ref
, filter_pp_t opt
)
115 intptr_t rand_srcStride
, rand_dstStride
;
117 for (int i
= 0; i
< ITERS
; i
++)
119 int index
= i
% TEST_CASES
;
121 for (int coeffIdx
= 0; coeffIdx
< 8; coeffIdx
++)
123 rand_srcStride
= rand() % 100 + 2;
124 rand_dstStride
= rand() % 100 + 64;
126 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
,
132 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
,
138 if (memcmp(IPF_vec_output_p
, IPF_C_output_p
, TEST_BUF_SIZE
* sizeof(pixel
)))
148 bool IPFilterHarness::check_IPFilterChroma_ps_primitive(filter_ps_t ref
, filter_ps_t opt
)
150 intptr_t rand_srcStride
, rand_dstStride
;
152 for (int i
= 0; i
< ITERS
; i
++)
154 int index
= i
% TEST_CASES
;
156 for (int coeffIdx
= 0; coeffIdx
< 8; coeffIdx
++)
158 rand_srcStride
= rand() % 100;
159 rand_dstStride
= rand() % 100 + 64;
161 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
,
167 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
,
173 if (memcmp(IPF_vec_output_s
, IPF_C_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
183 bool IPFilterHarness::check_IPFilterChroma_hps_primitive(filter_hps_t ref
, filter_hps_t opt
)
185 intptr_t rand_srcStride
, rand_dstStride
;
187 for (int i
= 0; i
< ITERS
; i
++)
189 int index
= i
% TEST_CASES
;
191 for (int coeffIdx
= 0; coeffIdx
< 8; coeffIdx
++)
193 // 0 : Interpolate W x H, 1 : Interpolate W x (H + 7)
194 for (int isRowExt
= 0; isRowExt
< 2; isRowExt
++)
196 rand_srcStride
= rand() % 100 + 2;
197 rand_dstStride
= rand() % 100 + 64;
199 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
,
206 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
,
213 if (memcmp(IPF_vec_output_s
, IPF_C_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
224 bool IPFilterHarness::check_IPFilterChroma_sp_primitive(filter_sp_t ref
, filter_sp_t opt
)
226 intptr_t rand_srcStride
, rand_dstStride
;
228 for (int i
= 0; i
< ITERS
; i
++)
230 int index
= i
% TEST_CASES
;
232 for (int coeffIdx
= 0; coeffIdx
< 8; coeffIdx
++)
234 rand_srcStride
= rand() % 100;
235 rand_dstStride
= rand() % 100 + 64;
237 ref(short_test_buff
[index
] + 3 * rand_srcStride
,
243 checked(opt
, short_test_buff
[index
] + 3 * rand_srcStride
,
249 if (memcmp(IPF_vec_output_p
, IPF_C_output_p
, TEST_BUF_SIZE
* sizeof(pixel
)))
259 bool IPFilterHarness::check_IPFilterChroma_ss_primitive(filter_ss_t ref
, filter_ss_t opt
)
261 intptr_t rand_srcStride
, rand_dstStride
;
263 for (int i
= 0; i
< ITERS
; i
++)
265 int index
= i
% TEST_CASES
;
267 for (int coeffIdx
= 0; coeffIdx
< 8; coeffIdx
++)
269 rand_srcStride
= rand() % 100;
270 rand_dstStride
= rand() % 100 + 64;
272 ref(short_test_buff
[index
] + 3 * rand_srcStride
,
278 checked(opt
, short_test_buff
[index
] + 3 * rand_srcStride
,
284 if (memcmp(IPF_C_output_s
, IPF_vec_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
294 bool IPFilterHarness::check_IPFilterLuma_primitive(filter_pp_t ref
, filter_pp_t opt
)
296 intptr_t rand_srcStride
, rand_dstStride
;
298 for (int i
= 0; i
< ITERS
; i
++)
300 int index
= i
% TEST_CASES
;
302 for (int coeffIdx
= 0; coeffIdx
< 4; coeffIdx
++)
304 rand_srcStride
= rand() % 100;
305 rand_dstStride
= rand() % 100 + 64;
307 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
+ 6,
313 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
+ 6,
319 if (memcmp(IPF_vec_output_p
, IPF_C_output_p
, TEST_BUF_SIZE
))
329 bool IPFilterHarness::check_IPFilterLuma_ps_primitive(filter_ps_t ref
, filter_ps_t opt
)
331 intptr_t rand_srcStride
, rand_dstStride
;
333 for (int i
= 0; i
< ITERS
; i
++)
335 int index
= i
% TEST_CASES
;
337 for (int coeffIdx
= 0; coeffIdx
< 4; coeffIdx
++)
339 rand_srcStride
= rand() % 100;
340 rand_dstStride
= rand() % 100 + 64;
342 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
,
348 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
,
354 if (memcmp(IPF_vec_output_s
, IPF_C_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
364 bool IPFilterHarness::check_IPFilterLuma_hps_primitive(filter_hps_t ref
, filter_hps_t opt
)
366 intptr_t rand_srcStride
, rand_dstStride
;
368 for (int i
= 0; i
< ITERS
; i
++)
370 int index
= i
% TEST_CASES
;
372 for (int coeffIdx
= 0; coeffIdx
< 4; coeffIdx
++)
374 // 0 : Interpolate W x H, 1 : Interpolate W x (H + 7)
375 for (int isRowExt
= 0; isRowExt
< 2; isRowExt
++)
377 rand_srcStride
= rand() % 100;
378 rand_dstStride
= rand() % 100 + 64;
380 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
+ 6,
387 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
+ 6,
394 if (memcmp(IPF_vec_output_s
, IPF_C_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
405 bool IPFilterHarness::check_IPFilterLuma_sp_primitive(filter_sp_t ref
, filter_sp_t opt
)
407 intptr_t rand_srcStride
, rand_dstStride
;
409 for (int i
= 0; i
< ITERS
; i
++)
411 int index
= i
% TEST_CASES
;
413 for (int coeffIdx
= 0; coeffIdx
< 4; coeffIdx
++)
415 rand_srcStride
= rand() % 100;
416 rand_dstStride
= rand() % 100 + 64;
418 ref(short_test_buff
[index
] + 3 * rand_srcStride
,
424 checked(opt
, short_test_buff
[index
] + 3 * rand_srcStride
,
430 if (memcmp(IPF_vec_output_p
, IPF_C_output_p
, TEST_BUF_SIZE
* sizeof(pixel
)))
440 bool IPFilterHarness::check_IPFilterLuma_ss_primitive(filter_ss_t ref
, filter_ss_t opt
)
442 intptr_t rand_srcStride
, rand_dstStride
;
444 for (int i
= 0; i
< ITERS
; i
++)
446 int index
= i
% TEST_CASES
;
448 for (int coeffIdx
= 0; coeffIdx
< 4; coeffIdx
++)
450 rand_srcStride
= rand() % 100;
451 rand_dstStride
= rand() % 100 + 64;
453 ref(short_test_buff
[index
] + 3 * rand_srcStride
,
459 checked(opt
, short_test_buff
[index
] + 3 * rand_srcStride
,
465 if (memcmp(IPF_C_output_s
, IPF_vec_output_s
, TEST_BUF_SIZE
* sizeof(int16_t)))
475 bool IPFilterHarness::check_IPFilterLumaHV_primitive(filter_hv_pp_t ref
, filter_hv_pp_t opt
)
477 intptr_t rand_srcStride
, rand_dstStride
;
479 for (int i
= 0; i
< ITERS
; i
++)
481 int index
= i
% TEST_CASES
;
483 for (int coeffIdxX
= 0; coeffIdxX
< 4; coeffIdxX
++)
485 for (int coeffIdxY
= 0; coeffIdxY
< 4; coeffIdxY
++)
487 rand_srcStride
= rand() % 100;
488 rand_dstStride
= rand() % 100 + 64;
490 ref(pixel_test_buff
[index
] + 3 * rand_srcStride
,
497 checked(opt
, pixel_test_buff
[index
] + 3 * rand_srcStride
,
504 if (memcmp(IPF_vec_output_p
, IPF_C_output_p
, TEST_BUF_SIZE
* sizeof(pixel
)))
515 bool IPFilterHarness::testCorrectness(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
519 // last parameter does not matter in case of luma
520 if (!check_IPFilter_primitive(ref
.luma_p2s
, opt
.luma_p2s
, 0, 1))
522 printf("luma_p2s failed\n");
527 for (int value
= 0; value
< NUM_LUMA_PARTITIONS
; value
++)
529 if (opt
.luma_hpp
[value
])
531 if (!check_IPFilterLuma_primitive(ref
.luma_hpp
[value
], opt
.luma_hpp
[value
]))
533 printf("luma_hpp[%s]", lumaPartStr
[value
]);
537 if (opt
.luma_hps
[value
])
539 if (!check_IPFilterLuma_hps_primitive(ref
.luma_hps
[value
], opt
.luma_hps
[value
]))
541 printf("luma_hps[%s]", lumaPartStr
[value
]);
545 if (opt
.luma_vpp
[value
])
547 if (!check_IPFilterLuma_primitive(ref
.luma_vpp
[value
], opt
.luma_vpp
[value
]))
549 printf("luma_vpp[%s]", lumaPartStr
[value
]);
553 if (opt
.luma_vps
[value
])
555 if (!check_IPFilterLuma_ps_primitive(ref
.luma_vps
[value
], opt
.luma_vps
[value
]))
557 printf("luma_vps[%s]", lumaPartStr
[value
]);
561 if (opt
.luma_vsp
[value
])
563 if (!check_IPFilterLuma_sp_primitive(ref
.luma_vsp
[value
], opt
.luma_vsp
[value
]))
565 printf("luma_vsp[%s]", lumaPartStr
[value
]);
569 if (opt
.luma_vss
[value
])
571 if (!check_IPFilterLuma_ss_primitive(ref
.luma_vss
[value
], opt
.luma_vss
[value
]))
573 printf("luma_vss[%s]", lumaPartStr
[value
]);
577 if (opt
.luma_hvpp
[value
])
579 if (!check_IPFilterLumaHV_primitive(ref
.luma_hvpp
[value
], opt
.luma_hvpp
[value
]))
581 printf("luma_hvpp[%s]", lumaPartStr
[value
]);
587 for (int csp
= X265_CSP_I420
; csp
< X265_CSP_COUNT
; csp
++)
589 if (opt
.chroma
[csp
].p2s
)
591 if (!check_IPFilter_primitive(ref
.chroma
[csp
].p2s
, opt
.chroma
[csp
].p2s
, 1, csp
))
593 printf("chroma_p2s[%s]", x265_source_csp_names
[csp
]);
597 for (int value
= 0; value
< NUM_CHROMA_PARTITIONS
; value
++)
599 if (opt
.chroma
[csp
].filter_hpp
[value
])
601 if (!check_IPFilterChroma_primitive(ref
.chroma
[csp
].filter_hpp
[value
], opt
.chroma
[csp
].filter_hpp
[value
]))
603 printf("chroma_hpp[%s]", chromaPartStr
[csp
][value
]);
607 if (opt
.chroma
[csp
].filter_hps
[value
])
609 if (!check_IPFilterChroma_hps_primitive(ref
.chroma
[csp
].filter_hps
[value
], opt
.chroma
[csp
].filter_hps
[value
]))
611 printf("chroma_hps[%s]", chromaPartStr
[csp
][value
]);
615 if (opt
.chroma
[csp
].filter_vpp
[value
])
617 if (!check_IPFilterChroma_primitive(ref
.chroma
[csp
].filter_vpp
[value
], opt
.chroma
[csp
].filter_vpp
[value
]))
619 printf("chroma_vpp[%s]", chromaPartStr
[csp
][value
]);
623 if (opt
.chroma
[csp
].filter_vps
[value
])
625 if (!check_IPFilterChroma_ps_primitive(ref
.chroma
[csp
].filter_vps
[value
], opt
.chroma
[csp
].filter_vps
[value
]))
627 printf("chroma_vps[%s]", chromaPartStr
[csp
][value
]);
631 if (opt
.chroma
[csp
].filter_vsp
[value
])
633 if (!check_IPFilterChroma_sp_primitive(ref
.chroma
[csp
].filter_vsp
[value
], opt
.chroma
[csp
].filter_vsp
[value
]))
635 printf("chroma_vsp[%s]", chromaPartStr
[csp
][value
]);
639 if (opt
.chroma
[csp
].filter_vss
[value
])
641 if (!check_IPFilterChroma_ss_primitive(ref
.chroma
[csp
].filter_vss
[value
], opt
.chroma
[csp
].filter_vss
[value
]))
643 printf("chroma_vss[%s]", chromaPartStr
[csp
][value
]);
653 void IPFilterHarness::measureSpeed(const EncoderPrimitives
& ref
, const EncoderPrimitives
& opt
)
657 int16_t srcStride
= 96;
658 int16_t dstStride
= 96;
659 int maxVerticalfilterHalfDistance
= 3;
663 printf("luma_p2s\t");
664 REPORT_SPEEDUP(opt
.luma_p2s
, ref
.luma_p2s
,
665 pixel_buff
, srcStride
, IPF_vec_output_s
, width
, height
);
668 for (int value
= 0; value
< NUM_LUMA_PARTITIONS
; value
++)
670 if (opt
.luma_hpp
[value
])
672 printf("luma_hpp[%s]\t", lumaPartStr
[value
]);
673 REPORT_SPEEDUP(opt
.luma_hpp
[value
], ref
.luma_hpp
[value
],
674 pixel_buff
+ srcStride
, srcStride
, IPF_vec_output_p
, dstStride
, 1);
677 if (opt
.luma_hps
[value
])
679 printf("luma_hps[%s]\t", lumaPartStr
[value
]);
680 REPORT_SPEEDUP(opt
.luma_hps
[value
], ref
.luma_hps
[value
],
681 pixel_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
682 IPF_vec_output_s
, dstStride
, 1, 1);
685 if (opt
.luma_vpp
[value
])
687 printf("luma_vpp[%s]\t", lumaPartStr
[value
]);
688 REPORT_SPEEDUP(opt
.luma_vpp
[value
], ref
.luma_vpp
[value
],
689 pixel_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
690 IPF_vec_output_p
, dstStride
, 1);
693 if (opt
.luma_vps
[value
])
695 printf("luma_vps[%s]\t", lumaPartStr
[value
]);
696 REPORT_SPEEDUP(opt
.luma_vps
[value
], ref
.luma_vps
[value
],
697 pixel_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
698 IPF_vec_output_s
, dstStride
, 1);
701 if (opt
.luma_vsp
[value
])
703 printf("luma_vsp[%s]\t", lumaPartStr
[value
]);
704 REPORT_SPEEDUP(opt
.luma_vsp
[value
], ref
.luma_vsp
[value
],
705 short_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
706 IPF_vec_output_p
, dstStride
, 1);
709 if (opt
.luma_vss
[value
])
711 printf("luma_vss[%s]\t", lumaPartStr
[value
]);
712 REPORT_SPEEDUP(opt
.luma_vss
[value
], ref
.luma_vss
[value
],
713 short_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
714 IPF_vec_output_s
, dstStride
, 1);
717 if (opt
.luma_hvpp
[value
])
719 printf("luma_hv [%s]\t", lumaPartStr
[value
]);
720 REPORT_SPEEDUP(opt
.luma_hvpp
[value
], ref
.luma_hvpp
[value
],
721 pixel_buff
+ 3 * srcStride
, srcStride
, IPF_vec_output_p
, srcStride
, 1, 3);
725 for (int csp
= X265_CSP_I420
; csp
< X265_CSP_COUNT
; csp
++)
727 printf("= Color Space %s =\n", x265_source_csp_names
[csp
]);
728 if (opt
.chroma
[csp
].p2s
)
730 printf("chroma_p2s\t");
731 REPORT_SPEEDUP(opt
.chroma
[csp
].p2s
, ref
.chroma
[csp
].p2s
,
732 pixel_buff
, srcStride
, IPF_vec_output_s
, width
, height
);
734 for (int value
= 0; value
< NUM_CHROMA_PARTITIONS
; value
++)
736 if (opt
.chroma
[csp
].filter_hpp
[value
])
738 printf("chroma_hpp[%s]", chromaPartStr
[csp
][value
]);
739 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_hpp
[value
], ref
.chroma
[csp
].filter_hpp
[value
],
740 pixel_buff
+ srcStride
, srcStride
, IPF_vec_output_p
, dstStride
, 1);
742 if (opt
.chroma
[csp
].filter_hps
[value
])
744 printf("chroma_hps[%s]", chromaPartStr
[csp
][value
]);
745 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_hps
[value
], ref
.chroma
[csp
].filter_hps
[value
],
746 pixel_buff
+ srcStride
, srcStride
, IPF_vec_output_s
, dstStride
, 1, 1);
748 if (opt
.chroma
[csp
].filter_vpp
[value
])
750 printf("chroma_vpp[%s]", chromaPartStr
[csp
][value
]);
751 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_vpp
[value
], ref
.chroma
[csp
].filter_vpp
[value
],
752 pixel_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
753 IPF_vec_output_p
, dstStride
, 1);
755 if (opt
.chroma
[csp
].filter_vps
[value
])
757 printf("chroma_vps[%s]", chromaPartStr
[csp
][value
]);
758 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_vps
[value
], ref
.chroma
[csp
].filter_vps
[value
],
759 pixel_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
760 IPF_vec_output_s
, dstStride
, 1);
762 if (opt
.chroma
[csp
].filter_vsp
[value
])
764 printf("chroma_vsp[%s]", chromaPartStr
[csp
][value
]);
765 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_vsp
[value
], ref
.chroma
[csp
].filter_vsp
[value
],
766 short_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
767 IPF_vec_output_p
, dstStride
, 1);
769 if (opt
.chroma
[csp
].filter_vss
[value
])
771 printf("chroma_vss[%s]", chromaPartStr
[csp
][value
]);
772 REPORT_SPEEDUP(opt
.chroma
[csp
].filter_vss
[value
], ref
.chroma
[csp
].filter_vss
[value
],
773 short_buff
+ maxVerticalfilterHalfDistance
* srcStride
, srcStride
,
774 IPF_vec_output_s
, dstStride
, 1);