| 1 | /***************************************************************************** |
| 2 | * Copyright (C) 2013 x265 project |
| 3 | * |
| 4 | * Authors: Steve Borho <steve@borho.org> |
| 5 | * Min Chen <min.chen@multicorewareinc.com> |
| 6 | * Praveen Kumar Tiwari <praveen@multicorewareinc.com> |
| 7 | * Nabajit Deka <nabajit@multicorewareinc.com> |
| 8 | * |
| 9 | * This program is free software; you can redistribute it and/or modify |
| 10 | * it under the terms of the GNU General Public License as published by |
| 11 | * the Free Software Foundation; either version 2 of the License, or |
| 12 | * (at your option) any later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | * GNU General Public License for more details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU General Public License |
| 20 | * along with this program; if not, write to the Free Software |
| 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
| 22 | * |
| 23 | * This program is also available under a commercial proprietary license. |
| 24 | * For more information, contact us at license @ x265.com. |
| 25 | *****************************************************************************/ |
| 26 | |
| 27 | #include "common.h" |
| 28 | #include "mbdstharness.h" |
| 29 | |
| 30 | using namespace x265; |
| 31 | |
| 32 | struct DctConf |
| 33 | { |
| 34 | const char *name; |
| 35 | int width; |
| 36 | }; |
| 37 | |
| 38 | const DctConf dctInfo[] = |
| 39 | { |
| 40 | { "dst4x4\t", 4 }, |
| 41 | { "dct4x4\t", 4 }, |
| 42 | { "dct8x8\t", 8 }, |
| 43 | { "dct16x16", 16 }, |
| 44 | { "dct32x32", 32 }, |
| 45 | }; |
| 46 | |
| 47 | const DctConf idctInfo[] = |
| 48 | { |
| 49 | { "idst4x4\t", 4 }, |
| 50 | { "idct4x4\t", 4 }, |
| 51 | { "idct8x8\t", 8 }, |
| 52 | { "idct16x16", 16 }, |
| 53 | { "idct32x32", 32 }, |
| 54 | }; |
| 55 | |
| 56 | MBDstHarness::MBDstHarness() |
| 57 | { |
| 58 | const int idct_max = (1 << (BIT_DEPTH + 4)) - 1; |
| 59 | |
| 60 | /* [0] --- Random values |
| 61 | * [1] --- Minimum |
| 62 | * [2] --- Maximum */ |
| 63 | for (int i = 0; i < TEST_BUF_SIZE; i++) |
| 64 | { |
| 65 | short_test_buff[0][i] = (rand() & PIXEL_MAX) - (rand() & PIXEL_MAX); |
| 66 | int_test_buff[0][i] = rand() % PIXEL_MAX; |
| 67 | int_idct_test_buff[0][i] = (rand() % (SHORT_MAX - SHORT_MIN)) - SHORT_MAX; |
| 68 | short_denoise_test_buff1[0][i] = short_denoise_test_buff2[0][i] = (rand() & SHORT_MAX) - (rand() & SHORT_MAX); |
| 69 | |
| 70 | short_test_buff[1][i] = -PIXEL_MAX; |
| 71 | int_test_buff[1][i] = -PIXEL_MAX; |
| 72 | int_idct_test_buff[1][i] = SHORT_MIN; |
| 73 | short_denoise_test_buff1[1][i] = short_denoise_test_buff2[1][i] = -SHORT_MAX; |
| 74 | |
| 75 | short_test_buff[2][i] = PIXEL_MAX; |
| 76 | int_test_buff[2][i] = PIXEL_MAX; |
| 77 | int_idct_test_buff[2][i] = SHORT_MAX; |
| 78 | short_denoise_test_buff1[2][i] = short_denoise_test_buff2[2][i] = SHORT_MAX; |
| 79 | |
| 80 | mbuf1[i] = rand() & PIXEL_MAX; |
| 81 | mbufdct[i] = (rand() & PIXEL_MAX) - (rand() & PIXEL_MAX); |
| 82 | mbufidct[i] = (rand() & idct_max); |
| 83 | } |
| 84 | |
| 85 | #if _DEBUG |
| 86 | memset(mshortbuf2, 0, MAX_TU_SIZE * sizeof(int16_t)); |
| 87 | memset(mshortbuf3, 0, MAX_TU_SIZE * sizeof(int16_t)); |
| 88 | |
| 89 | memset(mintbuf1, 0, MAX_TU_SIZE * sizeof(int)); |
| 90 | memset(mintbuf2, 0, MAX_TU_SIZE * sizeof(int)); |
| 91 | memset(mintbuf3, 0, MAX_TU_SIZE * sizeof(int)); |
| 92 | memset(mintbuf4, 0, MAX_TU_SIZE * sizeof(int)); |
| 93 | #endif // if _DEBUG |
| 94 | } |
| 95 | |
| 96 | bool MBDstHarness::check_dct_primitive(dct_t ref, dct_t opt, intptr_t width) |
| 97 | { |
| 98 | int j = 0; |
| 99 | intptr_t cmp_size = sizeof(short) * width * width; |
| 100 | |
| 101 | for (int i = 0; i < ITERS; i++) |
| 102 | { |
| 103 | int index = rand() % TEST_CASES; |
| 104 | |
| 105 | ref(short_test_buff[index] + j, mshortbuf2, width); |
| 106 | checked(opt, short_test_buff[index] + j, mshortbuf3, width); |
| 107 | |
| 108 | if (memcmp(mshortbuf2, mshortbuf3, cmp_size)) |
| 109 | return false; |
| 110 | |
| 111 | reportfail(); |
| 112 | j += INCR; |
| 113 | } |
| 114 | |
| 115 | return true; |
| 116 | } |
| 117 | |
| 118 | bool MBDstHarness::check_idct_primitive(idct_t ref, idct_t opt, intptr_t width) |
| 119 | { |
| 120 | int j = 0; |
| 121 | intptr_t cmp_size = sizeof(int16_t) * width * width; |
| 122 | |
| 123 | for (int i = 0; i < ITERS; i++) |
| 124 | { |
| 125 | int index = rand() % TEST_CASES; |
| 126 | |
| 127 | ref(short_test_buff[index] + j, mshortbuf2, width); |
| 128 | checked(opt, short_test_buff[index] + j, mshortbuf3, width); |
| 129 | |
| 130 | if (memcmp(mshortbuf2, mshortbuf3, cmp_size)) |
| 131 | return false; |
| 132 | |
| 133 | reportfail(); |
| 134 | j += INCR; |
| 135 | } |
| 136 | |
| 137 | return true; |
| 138 | } |
| 139 | |
| 140 | bool MBDstHarness::check_dequant_primitive(dequant_normal_t ref, dequant_normal_t opt) |
| 141 | { |
| 142 | int j = 0; |
| 143 | |
| 144 | for (int i = 0; i < ITERS; i++) |
| 145 | { |
| 146 | int index = rand() % TEST_CASES; |
| 147 | int log2TrSize = (rand() % 4) + 2; |
| 148 | |
| 149 | int width = (1 << log2TrSize); |
| 150 | int height = width; |
| 151 | int qp = rand() % (QP_MAX_SPEC + QP_BD_OFFSET + 1); |
| 152 | int per = qp / 6; |
| 153 | int rem = qp % 6; |
| 154 | static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 }; |
| 155 | int scale = invQuantScales[rem] << per; |
| 156 | int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; |
| 157 | int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift; |
| 158 | |
| 159 | ref(short_test_buff[index] + j, mshortbuf2, width * height, scale, shift); |
| 160 | checked(opt, short_test_buff[index] + j, mshortbuf3, width * height, scale, shift); |
| 161 | |
| 162 | if (memcmp(mshortbuf2, mshortbuf3, sizeof(int16_t) * height * width)) |
| 163 | return false; |
| 164 | |
| 165 | reportfail(); |
| 166 | j += INCR; |
| 167 | } |
| 168 | |
| 169 | return true; |
| 170 | } |
| 171 | |
| 172 | bool MBDstHarness::check_dequant_primitive(dequant_scaling_t ref, dequant_scaling_t opt) |
| 173 | { |
| 174 | int j = 0; |
| 175 | |
| 176 | for (int i = 0; i < ITERS; i++) |
| 177 | { |
| 178 | |
| 179 | memset(mshortbuf2, 0, MAX_TU_SIZE * sizeof(int16_t)); |
| 180 | memset(mshortbuf3, 0, MAX_TU_SIZE * sizeof(int16_t)); |
| 181 | |
| 182 | int log2TrSize = (rand() % 4) + 2; |
| 183 | |
| 184 | int width = (1 << log2TrSize); |
| 185 | int height = width; |
| 186 | |
| 187 | int qp = rand() % (QP_MAX_SPEC + QP_BD_OFFSET + 1); |
| 188 | int per = qp / 6; |
| 189 | int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; |
| 190 | int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift; |
| 191 | |
| 192 | int cmp_size = sizeof(int16_t) * height * width; |
| 193 | int index1 = rand() % TEST_CASES; |
| 194 | |
| 195 | ref(short_test_buff[index1] + j, int_test_buff[index1] + j, mshortbuf2, width * height, per, shift); |
| 196 | checked(opt, short_test_buff[index1] + j, int_test_buff[index1] + j, mshortbuf3, width * height, per, shift); |
| 197 | |
| 198 | if (memcmp(mshortbuf2, mshortbuf3, cmp_size)) |
| 199 | return false; |
| 200 | |
| 201 | reportfail(); |
| 202 | j += INCR; |
| 203 | } |
| 204 | |
| 205 | return true; |
| 206 | } |
| 207 | |
| 208 | bool MBDstHarness::check_quant_primitive(quant_t ref, quant_t opt) |
| 209 | { |
| 210 | int j = 0; |
| 211 | |
| 212 | for (int i = 0; i < ITERS; i++) |
| 213 | { |
| 214 | int width = (rand() % 4 + 1) * 4; |
| 215 | int height = width; |
| 216 | |
| 217 | uint32_t optReturnValue = 0; |
| 218 | uint32_t refReturnValue = 0; |
| 219 | |
| 220 | int bits = (rand() % 24) + 8; |
| 221 | int valueToAdd = rand() % (1 << bits); |
| 222 | int cmp_size = sizeof(int) * height * width; |
| 223 | int cmp_size1 = sizeof(short) * height * width; |
| 224 | int numCoeff = height * width; |
| 225 | |
| 226 | int index1 = rand() % TEST_CASES; |
| 227 | int index2 = rand() % TEST_CASES; |
| 228 | |
| 229 | refReturnValue = ref(short_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf1, mshortbuf2, bits, valueToAdd, numCoeff); |
| 230 | optReturnValue = (uint32_t)checked(opt, short_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mshortbuf3, bits, valueToAdd, numCoeff); |
| 231 | |
| 232 | if (memcmp(mintbuf1, mintbuf3, cmp_size)) |
| 233 | return false; |
| 234 | |
| 235 | if (memcmp(mshortbuf2, mshortbuf3, cmp_size1)) |
| 236 | return false; |
| 237 | |
| 238 | if (optReturnValue != refReturnValue) |
| 239 | return false; |
| 240 | |
| 241 | reportfail(); |
| 242 | j += INCR; |
| 243 | } |
| 244 | |
| 245 | return true; |
| 246 | } |
| 247 | |
| 248 | bool MBDstHarness::check_nquant_primitive(nquant_t ref, nquant_t opt) |
| 249 | { |
| 250 | int j = 0; |
| 251 | |
| 252 | for (int i = 0; i < ITERS; i++) |
| 253 | { |
| 254 | int width = (rand() % 4 + 1) * 4; |
| 255 | int height = width; |
| 256 | |
| 257 | uint32_t optReturnValue = 0; |
| 258 | uint32_t refReturnValue = 0; |
| 259 | |
| 260 | int bits = rand() % 32; |
| 261 | int valueToAdd = rand() % (1 << bits); |
| 262 | int cmp_size = sizeof(short) * height * width; |
| 263 | int numCoeff = height * width; |
| 264 | |
| 265 | int index1 = rand() % TEST_CASES; |
| 266 | int index2 = rand() % TEST_CASES; |
| 267 | |
| 268 | refReturnValue = ref(short_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf2, bits, valueToAdd, numCoeff); |
| 269 | optReturnValue = (uint32_t)checked(opt, short_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf3, bits, valueToAdd, numCoeff); |
| 270 | |
| 271 | if (memcmp(mshortbuf2, mshortbuf3, cmp_size)) |
| 272 | return false; |
| 273 | |
| 274 | if (optReturnValue != refReturnValue) |
| 275 | return false; |
| 276 | |
| 277 | reportfail(); |
| 278 | j += INCR; |
| 279 | } |
| 280 | |
| 281 | return true; |
| 282 | } |
| 283 | |
| 284 | bool MBDstHarness::check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt) |
| 285 | { |
| 286 | ALIGN_VAR_32(int16_t, qcoeff[32 * 32]); |
| 287 | |
| 288 | for (int i = 0; i < 4; i++) |
| 289 | { |
| 290 | int log2TrSize = i + 2; |
| 291 | int num = 1 << (log2TrSize * 2); |
| 292 | int mask = num - 1; |
| 293 | |
| 294 | for (int n = 0; n <= num; n++) |
| 295 | { |
| 296 | memset(qcoeff, 0, num * sizeof(int16_t)); |
| 297 | |
| 298 | for (int j = 0; j < n; j++) |
| 299 | { |
| 300 | int k = rand() & mask; |
| 301 | while (qcoeff[k]) |
| 302 | { |
| 303 | k = (k + 11) & mask; |
| 304 | } |
| 305 | |
| 306 | qcoeff[k] = (int16_t)rand() - RAND_MAX / 2; |
| 307 | } |
| 308 | |
| 309 | int refval = ref(qcoeff, num); |
| 310 | int optval = (int)checked(opt, qcoeff, num); |
| 311 | |
| 312 | if (refval != optval) |
| 313 | return false; |
| 314 | |
| 315 | reportfail(); |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | return true; |
| 320 | } |
| 321 | |
| 322 | bool MBDstHarness::check_denoise_dct_primitive(denoiseDct_t ref, denoiseDct_t opt) |
| 323 | { |
| 324 | int j = 0; |
| 325 | |
| 326 | for (int s = 0; s < 4; s++) |
| 327 | { |
| 328 | int log2TrSize = s + 2; |
| 329 | int num = 1 << (log2TrSize * 2); |
| 330 | int cmp_size = sizeof(int) * num; |
| 331 | int cmp_short = sizeof(short) * num; |
| 332 | |
| 333 | for (int i = 0; i < ITERS; i++) |
| 334 | { |
| 335 | memset(mubuf1, 0, num * sizeof(uint32_t)); |
| 336 | memset(mubuf2, 0, num * sizeof(uint32_t)); |
| 337 | memset(mushortbuf1, 0, num * sizeof(uint16_t)); |
| 338 | |
| 339 | for (int k = 0; k < num; k++) |
| 340 | mushortbuf1[k] = rand() % UNSIGNED_SHORT_MAX; |
| 341 | |
| 342 | int index = rand() % TEST_CASES; |
| 343 | |
| 344 | ref(short_denoise_test_buff1[index] + j, mubuf1, mushortbuf1, num); |
| 345 | checked(opt, short_denoise_test_buff2[index] + j, mubuf2, mushortbuf1, num); |
| 346 | |
| 347 | if (memcmp(short_denoise_test_buff1[index] + j, short_denoise_test_buff2[index] + j, cmp_short)) |
| 348 | return false; |
| 349 | |
| 350 | if (memcmp(mubuf1, mubuf2, cmp_size)) |
| 351 | return false; |
| 352 | |
| 353 | reportfail(); |
| 354 | j += INCR; |
| 355 | } |
| 356 | j = 0; |
| 357 | } |
| 358 | |
| 359 | return true; |
| 360 | } |
| 361 | |
| 362 | |
| 363 | bool MBDstHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) |
| 364 | { |
| 365 | for (int i = 0; i < NUM_DCTS; i++) |
| 366 | { |
| 367 | if (opt.dct[i]) |
| 368 | { |
| 369 | if (!check_dct_primitive(ref.dct[i], opt.dct[i], dctInfo[i].width)) |
| 370 | { |
| 371 | printf("\n%s failed\n", dctInfo[i].name); |
| 372 | return false; |
| 373 | } |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | for (int i = 0; i < NUM_IDCTS; i++) |
| 378 | { |
| 379 | if (opt.idct[i]) |
| 380 | { |
| 381 | if (!check_idct_primitive(ref.idct[i], opt.idct[i], idctInfo[i].width)) |
| 382 | { |
| 383 | printf("%s failed\n", idctInfo[i].name); |
| 384 | return false; |
| 385 | } |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | if (opt.dequant_normal) |
| 390 | { |
| 391 | if (!check_dequant_primitive(ref.dequant_normal, opt.dequant_normal)) |
| 392 | { |
| 393 | printf("dequant: Failed!\n"); |
| 394 | return false; |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | if (opt.dequant_scaling) |
| 399 | { |
| 400 | if (!check_dequant_primitive(ref.dequant_scaling, opt.dequant_scaling)) |
| 401 | { |
| 402 | printf("dequant_scaling: Failed!\n"); |
| 403 | return false; |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | if (opt.quant) |
| 408 | { |
| 409 | if (!check_quant_primitive(ref.quant, opt.quant)) |
| 410 | { |
| 411 | printf("quant: Failed!\n"); |
| 412 | return false; |
| 413 | } |
| 414 | } |
| 415 | |
| 416 | if (opt.nquant) |
| 417 | { |
| 418 | if (!check_nquant_primitive(ref.nquant, opt.nquant)) |
| 419 | { |
| 420 | printf("nquant: Failed!\n"); |
| 421 | return false; |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | if (opt.count_nonzero) |
| 426 | { |
| 427 | if (!check_count_nonzero_primitive(ref.count_nonzero, opt.count_nonzero)) |
| 428 | { |
| 429 | printf("count_nonzero: Failed!\n"); |
| 430 | return false; |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | if (opt.dequant_scaling) |
| 435 | { |
| 436 | if (!check_dequant_primitive(ref.dequant_scaling, opt.dequant_scaling)) |
| 437 | { |
| 438 | printf("dequant_scaling: Failed!\n"); |
| 439 | return false; |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | if (opt.denoiseDct) |
| 444 | { |
| 445 | if (!check_denoise_dct_primitive(ref.denoiseDct, opt.denoiseDct)) |
| 446 | { |
| 447 | printf("denoiseDct: Failed!\n"); |
| 448 | return false; |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | return true; |
| 453 | } |
| 454 | |
| 455 | void MBDstHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) |
| 456 | { |
| 457 | for (int value = 0; value < NUM_DCTS; value++) |
| 458 | { |
| 459 | if (opt.dct[value]) |
| 460 | { |
| 461 | printf("%s\t", dctInfo[value].name); |
| 462 | REPORT_SPEEDUP(opt.dct[value], ref.dct[value], mbuf1, mshortbuf2, dctInfo[value].width); |
| 463 | } |
| 464 | } |
| 465 | |
| 466 | for (int value = 0; value < NUM_IDCTS; value++) |
| 467 | { |
| 468 | if (opt.idct[value]) |
| 469 | { |
| 470 | printf("%s\t", idctInfo[value].name); |
| 471 | REPORT_SPEEDUP(opt.idct[value], ref.idct[value], mshortbuf3, mshortbuf2, idctInfo[value].width); |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | if (opt.dequant_normal) |
| 476 | { |
| 477 | printf("dequant_normal\t"); |
| 478 | REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, short_test_buff[0], mshortbuf2, 32 * 32, 70, 1); |
| 479 | } |
| 480 | |
| 481 | if (opt.dequant_scaling) |
| 482 | { |
| 483 | printf("dequant_scaling\t"); |
| 484 | REPORT_SPEEDUP(opt.dequant_scaling, ref.dequant_scaling, short_test_buff[0], mintbuf3, mshortbuf2, 32 * 32, 5, 1); |
| 485 | } |
| 486 | |
| 487 | if (opt.quant) |
| 488 | { |
| 489 | printf("quant\t\t"); |
| 490 | REPORT_SPEEDUP(opt.quant, ref.quant, short_test_buff[0], int_test_buff[1], mintbuf3, mshortbuf2, 23, 23785, 32 * 32); |
| 491 | } |
| 492 | |
| 493 | if (opt.nquant) |
| 494 | { |
| 495 | printf("nquant\t\t"); |
| 496 | REPORT_SPEEDUP(opt.nquant, ref.nquant, short_test_buff[0], int_test_buff[1], mshortbuf2, 23, 23785, 32 * 32); |
| 497 | } |
| 498 | |
| 499 | if (opt.count_nonzero) |
| 500 | { |
| 501 | for (int i = 4; i <= 32; i <<= 1) |
| 502 | { |
| 503 | printf("count_nonzero[%dx%d]", i, i); |
| 504 | REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbuf1, i * i) |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | if (opt.denoiseDct) |
| 509 | { |
| 510 | printf("denoiseDct\t"); |
| 511 | REPORT_SPEEDUP(opt.denoiseDct, ref.denoiseDct, short_denoise_test_buff1[0], mubuf1, mushortbuf1, 32 * 32); |
| 512 | } |
| 513 | |
| 514 | } |