Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * (c) 2001 Fabrice Bellard | |
3 | * 2007 Marc Hoffman <marc.hoffman@analog.com> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * DCT test (c) 2001 Fabrice Bellard | |
25 | * Started from sample code by Juan J. Sierralta P. | |
26 | */ | |
27 | ||
28 | #include "config.h" | |
29 | #include <stdlib.h> | |
30 | #include <stdio.h> | |
31 | #include <string.h> | |
32 | #if HAVE_UNISTD_H | |
33 | #include <unistd.h> | |
34 | #endif | |
35 | #include <math.h> | |
36 | ||
37 | #include "libavutil/cpu.h" | |
38 | #include "libavutil/common.h" | |
39 | #include "libavutil/lfg.h" | |
40 | #include "libavutil/time.h" | |
41 | ||
42 | #include "dct.h" | |
43 | #include "idctdsp.h" | |
44 | #include "simple_idct.h" | |
45 | #include "xvididct.h" | |
46 | #include "aandcttab.h" | |
47 | #include "faandct.h" | |
48 | #include "faanidct.h" | |
49 | #include "dctref.h" | |
50 | ||
51 | struct algo { | |
52 | const char *name; | |
53 | void (*func)(int16_t *block); | |
54 | enum idct_permutation_type perm_type; | |
55 | int cpu_flag; | |
56 | int nonspec; | |
57 | }; | |
58 | ||
59 | static const struct algo fdct_tab[] = { | |
60 | { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, | |
61 | { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, | |
62 | { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, | |
63 | #if CONFIG_FAANDCT | |
64 | { "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, | |
65 | #endif /* CONFIG_FAANDCT */ | |
66 | }; | |
67 | ||
68 | static void ff_prores_idct_wrap(int16_t *dst){ | |
69 | DECLARE_ALIGNED(16, static int16_t, qmat)[64]; | |
70 | int i; | |
71 | ||
72 | for(i=0; i<64; i++){ | |
73 | qmat[i]=4; | |
74 | } | |
75 | ff_prores_idct(dst, qmat); | |
76 | for(i=0; i<64; i++) { | |
77 | dst[i] -= 512; | |
78 | } | |
79 | } | |
80 | ||
81 | static const struct algo idct_tab[] = { | |
82 | { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, | |
83 | { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, | |
84 | { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, | |
85 | { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, | |
86 | #if CONFIG_FAANIDCT | |
87 | { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, | |
88 | #endif /* CONFIG_FAANIDCT */ | |
89 | #if CONFIG_MPEG4_DECODER | |
90 | { "XVID", ff_xvid_idct, FF_IDCT_PERM_NONE, 0, 1 }, | |
91 | #endif /* CONFIG_MPEG4_DECODER */ | |
92 | }; | |
93 | ||
94 | #if ARCH_ARM | |
95 | #include "arm/dct-test.c" | |
96 | #elif ARCH_PPC | |
97 | #include "ppc/dct-test.c" | |
98 | #elif ARCH_X86 | |
99 | #include "x86/dct-test.c" | |
100 | #else | |
f6fa7814 DM |
101 | static const struct algo fdct_tab_arch[] = { { 0 } }; |
102 | static const struct algo idct_tab_arch[] = { { 0 } }; | |
2ba45a60 DM |
103 | #endif |
104 | ||
105 | #define AANSCALE_BITS 12 | |
106 | ||
107 | #define NB_ITS 20000 | |
108 | #define NB_ITS_SPEED 50000 | |
109 | ||
110 | DECLARE_ALIGNED(16, static int16_t, block)[64]; | |
111 | DECLARE_ALIGNED(8, static int16_t, block1)[64]; | |
112 | ||
113 | static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals) | |
114 | { | |
115 | int i, j; | |
116 | ||
117 | memset(block, 0, 64 * sizeof(*block)); | |
118 | ||
119 | switch (test) { | |
120 | case 0: | |
121 | for (i = 0; i < 64; i++) | |
122 | block[i] = (av_lfg_get(prng) % (2*vals)) -vals; | |
123 | if (is_idct) { | |
124 | ff_ref_fdct(block); | |
125 | for (i = 0; i < 64; i++) | |
126 | block[i] >>= 3; | |
127 | } | |
128 | break; | |
129 | case 1: | |
130 | j = av_lfg_get(prng) % 10 + 1; | |
131 | for (i = 0; i < j; i++) { | |
132 | int idx = av_lfg_get(prng) % 64; | |
133 | block[idx] = av_lfg_get(prng) % (2*vals) -vals; | |
134 | } | |
135 | break; | |
136 | case 2: | |
137 | block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals); | |
138 | block[63] = (block[0] & 1) ^ 1; | |
139 | break; | |
140 | } | |
141 | } | |
142 | ||
143 | static void permute(int16_t dst[64], const int16_t src[64], | |
144 | enum idct_permutation_type perm_type) | |
145 | { | |
146 | int i; | |
147 | ||
148 | #if ARCH_X86 | |
149 | if (permute_x86(dst, src, perm_type)) | |
150 | return; | |
151 | #endif | |
152 | ||
153 | switch (perm_type) { | |
154 | case FF_IDCT_PERM_LIBMPEG2: | |
155 | for (i = 0; i < 64; i++) | |
156 | dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; | |
157 | break; | |
158 | case FF_IDCT_PERM_PARTTRANS: | |
159 | for (i = 0; i < 64; i++) | |
160 | dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; | |
161 | break; | |
162 | case FF_IDCT_PERM_TRANSPOSE: | |
163 | for (i = 0; i < 64; i++) | |
164 | dst[(i>>3) | ((i<<3)&0x38)] = src[i]; | |
165 | break; | |
166 | default: | |
167 | for (i = 0; i < 64; i++) | |
168 | dst[i] = src[i]; | |
169 | break; | |
170 | } | |
171 | } | |
172 | ||
173 | static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits) | |
174 | { | |
175 | void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct; | |
176 | int it, i, scale; | |
177 | int err_inf, v; | |
178 | int64_t err2, ti, ti1, it1, err_sum = 0; | |
179 | int64_t sysErr[64], sysErrMax = 0; | |
180 | int maxout = 0; | |
181 | int blockSumErrMax = 0, blockSumErr; | |
182 | AVLFG prng; | |
183 | const int vals=1<<bits; | |
184 | double omse, ome; | |
185 | int spec_err; | |
186 | ||
187 | av_lfg_init(&prng, 1); | |
188 | ||
189 | err_inf = 0; | |
190 | err2 = 0; | |
191 | for (i = 0; i < 64; i++) | |
192 | sysErr[i] = 0; | |
193 | for (it = 0; it < NB_ITS; it++) { | |
194 | init_block(block1, test, is_idct, &prng, vals); | |
195 | permute(block, block1, dct->perm_type); | |
196 | ||
197 | dct->func(block); | |
198 | emms_c(); | |
199 | ||
200 | if (!strcmp(dct->name, "IJG-AAN-INT")) { | |
201 | for (i = 0; i < 64; i++) { | |
202 | scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; | |
203 | block[i] = (block[i] * scale) >> AANSCALE_BITS; | |
204 | } | |
205 | } | |
206 | ||
207 | ref(block1); | |
208 | if (!strcmp(dct->name, "PR-SSE2")) | |
209 | for (i = 0; i < 64; i++) | |
210 | block1[i] = av_clip(block1[i], 4-512, 1019-512); | |
211 | ||
212 | blockSumErr = 0; | |
213 | for (i = 0; i < 64; i++) { | |
214 | int err = block[i] - block1[i]; | |
215 | err_sum += err; | |
216 | v = abs(err); | |
217 | if (v > err_inf) | |
218 | err_inf = v; | |
219 | err2 += v * v; | |
220 | sysErr[i] += block[i] - block1[i]; | |
221 | blockSumErr += v; | |
222 | if (abs(block[i]) > maxout) | |
223 | maxout = abs(block[i]); | |
224 | } | |
225 | if (blockSumErrMax < blockSumErr) | |
226 | blockSumErrMax = blockSumErr; | |
227 | } | |
228 | for (i = 0; i < 64; i++) | |
229 | sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i])); | |
230 | ||
231 | for (i = 0; i < 64; i++) { | |
232 | if (i % 8 == 0) | |
233 | printf("\n"); | |
234 | printf("%7d ", (int) sysErr[i]); | |
235 | } | |
236 | printf("\n"); | |
237 | ||
238 | omse = (double) err2 / NB_ITS / 64; | |
239 | ome = (double) err_sum / NB_ITS / 64; | |
240 | ||
241 | spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); | |
242 | ||
243 | printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", | |
244 | is_idct ? "IDCT" : "DCT", dct->name, err_inf, | |
245 | omse, ome, (double) sysErrMax / NB_ITS, | |
246 | maxout, blockSumErrMax); | |
247 | ||
248 | if (spec_err && !dct->nonspec) | |
249 | return 1; | |
250 | ||
251 | if (!speed) | |
252 | return 0; | |
253 | ||
254 | /* speed test */ | |
255 | ||
256 | init_block(block, test, is_idct, &prng, vals); | |
257 | permute(block1, block, dct->perm_type); | |
258 | ||
259 | ti = av_gettime_relative(); | |
260 | it1 = 0; | |
261 | do { | |
262 | for (it = 0; it < NB_ITS_SPEED; it++) { | |
263 | memcpy(block, block1, sizeof(block)); | |
264 | dct->func(block); | |
265 | } | |
266 | emms_c(); | |
267 | it1 += NB_ITS_SPEED; | |
268 | ti1 = av_gettime_relative() - ti; | |
269 | } while (ti1 < 1000000); | |
270 | ||
271 | printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, | |
272 | (double) it1 * 1000.0 / (double) ti1); | |
273 | ||
274 | return 0; | |
275 | } | |
276 | ||
277 | DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; | |
278 | DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; | |
279 | ||
280 | static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) | |
281 | { | |
282 | static int init; | |
283 | static double c8[8][8]; | |
284 | static double c4[4][4]; | |
285 | double block1[64], block2[64], block3[64]; | |
286 | double s, sum, v; | |
287 | int i, j, k; | |
288 | ||
289 | if (!init) { | |
290 | init = 1; | |
291 | ||
292 | for (i = 0; i < 8; i++) { | |
293 | sum = 0; | |
294 | for (j = 0; j < 8; j++) { | |
295 | s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0); | |
296 | c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
297 | sum += c8[i][j] * c8[i][j]; | |
298 | } | |
299 | } | |
300 | ||
301 | for (i = 0; i < 4; i++) { | |
302 | sum = 0; | |
303 | for (j = 0; j < 4; j++) { | |
304 | s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0); | |
305 | c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
306 | sum += c4[i][j] * c4[i][j]; | |
307 | } | |
308 | } | |
309 | } | |
310 | ||
311 | /* butterfly */ | |
312 | s = 0.5 * sqrt(2.0); | |
313 | for (i = 0; i < 4; i++) { | |
314 | for (j = 0; j < 8; j++) { | |
315 | block1[8 * (2 * i) + j] = | |
316 | (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s; | |
317 | block1[8 * (2 * i + 1) + j] = | |
318 | (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s; | |
319 | } | |
320 | } | |
321 | ||
322 | /* idct8 on lines */ | |
323 | for (i = 0; i < 8; i++) { | |
324 | for (j = 0; j < 8; j++) { | |
325 | sum = 0; | |
326 | for (k = 0; k < 8; k++) | |
327 | sum += c8[k][j] * block1[8 * i + k]; | |
328 | block2[8 * i + j] = sum; | |
329 | } | |
330 | } | |
331 | ||
332 | /* idct4 */ | |
333 | for (i = 0; i < 8; i++) { | |
334 | for (j = 0; j < 4; j++) { | |
335 | /* top */ | |
336 | sum = 0; | |
337 | for (k = 0; k < 4; k++) | |
338 | sum += c4[k][j] * block2[8 * (2 * k) + i]; | |
339 | block3[8 * (2 * j) + i] = sum; | |
340 | ||
341 | /* bottom */ | |
342 | sum = 0; | |
343 | for (k = 0; k < 4; k++) | |
344 | sum += c4[k][j] * block2[8 * (2 * k + 1) + i]; | |
345 | block3[8 * (2 * j + 1) + i] = sum; | |
346 | } | |
347 | } | |
348 | ||
349 | /* clamp and store the result */ | |
350 | for (i = 0; i < 8; i++) { | |
351 | for (j = 0; j < 8; j++) { | |
352 | v = block3[8 * i + j]; | |
353 | if (v < 0) v = 0; | |
354 | else if (v > 255) v = 255; | |
355 | dest[i * linesize + j] = (int) rint(v); | |
356 | } | |
357 | } | |
358 | } | |
359 | ||
360 | static void idct248_error(const char *name, | |
361 | void (*idct248_put)(uint8_t *dest, int line_size, | |
362 | int16_t *block), | |
363 | int speed) | |
364 | { | |
365 | int it, i, it1, ti, ti1, err_max, v; | |
366 | AVLFG prng; | |
367 | ||
368 | av_lfg_init(&prng, 1); | |
369 | ||
370 | /* just one test to see if code is correct (precision is less | |
371 | important here) */ | |
372 | err_max = 0; | |
373 | for (it = 0; it < NB_ITS; it++) { | |
374 | /* XXX: use forward transform to generate values */ | |
375 | for (i = 0; i < 64; i++) | |
376 | block1[i] = av_lfg_get(&prng) % 256 - 128; | |
377 | block1[0] += 1024; | |
378 | ||
379 | for (i = 0; i < 64; i++) | |
380 | block[i] = block1[i]; | |
381 | idct248_ref(img_dest1, 8, block); | |
382 | ||
383 | for (i = 0; i < 64; i++) | |
384 | block[i] = block1[i]; | |
385 | idct248_put(img_dest, 8, block); | |
386 | ||
387 | for (i = 0; i < 64; i++) { | |
388 | v = abs((int) img_dest[i] - (int) img_dest1[i]); | |
389 | if (v == 255) | |
390 | printf("%d %d\n", img_dest[i], img_dest1[i]); | |
391 | if (v > err_max) | |
392 | err_max = v; | |
393 | } | |
394 | #if 0 | |
395 | printf("ref=\n"); | |
396 | for(i=0;i<8;i++) { | |
397 | int j; | |
398 | for(j=0;j<8;j++) { | |
399 | printf(" %3d", img_dest1[i*8+j]); | |
400 | } | |
401 | printf("\n"); | |
402 | } | |
403 | ||
404 | printf("out=\n"); | |
405 | for(i=0;i<8;i++) { | |
406 | int j; | |
407 | for(j=0;j<8;j++) { | |
408 | printf(" %3d", img_dest[i*8+j]); | |
409 | } | |
410 | printf("\n"); | |
411 | } | |
412 | #endif | |
413 | } | |
414 | printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); | |
415 | ||
416 | if (!speed) | |
417 | return; | |
418 | ||
419 | ti = av_gettime_relative(); | |
420 | it1 = 0; | |
421 | do { | |
422 | for (it = 0; it < NB_ITS_SPEED; it++) { | |
423 | for (i = 0; i < 64; i++) | |
424 | block[i] = block1[i]; | |
425 | idct248_put(img_dest, 8, block); | |
426 | } | |
427 | emms_c(); | |
428 | it1 += NB_ITS_SPEED; | |
429 | ti1 = av_gettime_relative() - ti; | |
430 | } while (ti1 < 1000000); | |
431 | ||
432 | printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, | |
433 | (double) it1 * 1000.0 / (double) ti1); | |
434 | } | |
435 | ||
436 | static void help(void) | |
437 | { | |
438 | printf("dct-test [-i] [<test-number>] [<bits>]\n" | |
439 | "test-number 0 -> test with random matrixes\n" | |
440 | " 1 -> test with random sparse matrixes\n" | |
441 | " 2 -> do 3. test from mpeg4 std\n" | |
442 | "bits Number of time domain bits to use, 8 is default\n" | |
443 | "-i test IDCT implementations\n" | |
444 | "-4 test IDCT248 implementations\n" | |
445 | "-t speed test\n"); | |
446 | } | |
447 | ||
448 | #if !HAVE_GETOPT | |
449 | #include "compat/getopt.c" | |
450 | #endif | |
451 | ||
452 | int main(int argc, char **argv) | |
453 | { | |
454 | int test_idct = 0, test_248_dct = 0; | |
455 | int c, i; | |
456 | int test = 1; | |
457 | int speed = 0; | |
458 | int err = 0; | |
459 | int bits=8; | |
460 | ||
461 | ff_ref_dct_init(); | |
462 | ||
463 | for (;;) { | |
464 | c = getopt(argc, argv, "ih4t"); | |
465 | if (c == -1) | |
466 | break; | |
467 | switch (c) { | |
468 | case 'i': | |
469 | test_idct = 1; | |
470 | break; | |
471 | case '4': | |
472 | test_248_dct = 1; | |
473 | break; | |
474 | case 't': | |
475 | speed = 1; | |
476 | break; | |
477 | default: | |
478 | case 'h': | |
479 | help(); | |
480 | return 0; | |
481 | } | |
482 | } | |
483 | ||
484 | if (optind < argc) | |
485 | test = atoi(argv[optind]); | |
486 | if(optind+1 < argc) bits= atoi(argv[optind+1]); | |
487 | ||
488 | printf("ffmpeg DCT/IDCT test\n"); | |
489 | ||
490 | if (test_248_dct) { | |
491 | idct248_error("SIMPLE-C", ff_simple_idct248_put, speed); | |
492 | } else { | |
493 | const int cpu_flags = av_get_cpu_flags(); | |
494 | if (test_idct) { | |
495 | for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++) | |
496 | err |= dct_error(&idct_tab[i], test, test_idct, speed, bits); | |
497 | ||
498 | for (i = 0; idct_tab_arch[i].name; i++) | |
499 | if (!(~cpu_flags & idct_tab_arch[i].cpu_flag)) | |
500 | err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits); | |
501 | } | |
502 | #if CONFIG_FDCTDSP | |
503 | else { | |
504 | for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++) | |
505 | err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits); | |
506 | ||
507 | for (i = 0; fdct_tab_arch[i].name; i++) | |
508 | if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag)) | |
509 | err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits); | |
510 | } | |
511 | #endif /* CONFIG_FDCTDSP */ | |
512 | } | |
513 | ||
514 | if (err) | |
515 | printf("Error: %d.\n", err); | |
516 | ||
517 | return !!err; | |
518 | } |