2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
44 #include "simple_idct.h"
46 #include "aandcttab.h"
53 void (*func
)(int16_t *block
);
54 enum idct_permutation_type perm_type
;
59 static const struct algo fdct_tab
[] = {
60 { "REF-DBL", ff_ref_fdct
, FF_IDCT_PERM_NONE
},
61 { "IJG-AAN-INT", ff_fdct_ifast
, FF_IDCT_PERM_NONE
},
62 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8
, FF_IDCT_PERM_NONE
},
64 { "FAAN", ff_faandct
, FF_IDCT_PERM_NONE
},
65 #endif /* CONFIG_FAANDCT */
68 static void ff_prores_idct_wrap(int16_t *dst
){
69 DECLARE_ALIGNED(16, static int16_t, qmat
)[64];
75 ff_prores_idct(dst
, qmat
);
81 static const struct algo idct_tab
[] = {
82 { "REF-DBL", ff_ref_idct
, FF_IDCT_PERM_NONE
},
83 { "INT", ff_j_rev_dct
, FF_IDCT_PERM_LIBMPEG2
},
84 { "SIMPLE-C", ff_simple_idct_8
, FF_IDCT_PERM_NONE
},
85 { "PR-C", ff_prores_idct_wrap
, FF_IDCT_PERM_NONE
, 0, 1 },
87 { "FAANI", ff_faanidct
, FF_IDCT_PERM_NONE
},
88 #endif /* CONFIG_FAANIDCT */
89 #if CONFIG_MPEG4_DECODER
90 { "XVID", ff_xvid_idct
, FF_IDCT_PERM_NONE
, 0, 1 },
91 #endif /* CONFIG_MPEG4_DECODER */
95 #include "arm/dct-test.c"
97 #include "ppc/dct-test.c"
99 #include "x86/dct-test.c"
101 static const struct algo fdct_tab_arch
[] = { { 0 } };
102 static const struct algo idct_tab_arch
[] = { { 0 } };
105 #define AANSCALE_BITS 12
108 #define NB_ITS_SPEED 50000
110 DECLARE_ALIGNED(16, static int16_t, block
)[64];
111 DECLARE_ALIGNED(8, static int16_t, block1
)[64];
113 static void init_block(int16_t block
[64], int test
, int is_idct
, AVLFG
*prng
, int vals
)
117 memset(block
, 0, 64 * sizeof(*block
));
121 for (i
= 0; i
< 64; i
++)
122 block
[i
] = (av_lfg_get(prng
) % (2*vals
)) -vals
;
125 for (i
= 0; i
< 64; i
++)
130 j
= av_lfg_get(prng
) % 10 + 1;
131 for (i
= 0; i
< j
; i
++) {
132 int idx
= av_lfg_get(prng
) % 64;
133 block
[idx
] = av_lfg_get(prng
) % (2*vals
) -vals
;
137 block
[ 0] = av_lfg_get(prng
) % (16*vals
) - (8*vals
);
138 block
[63] = (block
[0] & 1) ^ 1;
143 static void permute(int16_t dst
[64], const int16_t src
[64],
144 enum idct_permutation_type perm_type
)
149 if (permute_x86(dst
, src
, perm_type
))
154 case FF_IDCT_PERM_LIBMPEG2
:
155 for (i
= 0; i
< 64; i
++)
156 dst
[(i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2)] = src
[i
];
158 case FF_IDCT_PERM_PARTTRANS
:
159 for (i
= 0; i
< 64; i
++)
160 dst
[(i
& 0x24) | ((i
& 3) << 3) | ((i
>> 3) & 3)] = src
[i
];
162 case FF_IDCT_PERM_TRANSPOSE
:
163 for (i
= 0; i
< 64; i
++)
164 dst
[(i
>>3) | ((i
<<3)&0x38)] = src
[i
];
167 for (i
= 0; i
< 64; i
++)
173 static int dct_error(const struct algo
*dct
, int test
, int is_idct
, int speed
, const int bits
)
175 void (*ref
)(int16_t *block
) = is_idct
? ff_ref_idct
: ff_ref_fdct
;
178 int64_t err2
, ti
, ti1
, it1
, err_sum
= 0;
179 int64_t sysErr
[64], sysErrMax
= 0;
181 int blockSumErrMax
= 0, blockSumErr
;
183 const int vals
=1<<bits
;
187 av_lfg_init(&prng
, 1);
191 for (i
= 0; i
< 64; i
++)
193 for (it
= 0; it
< NB_ITS
; it
++) {
194 init_block(block1
, test
, is_idct
, &prng
, vals
);
195 permute(block
, block1
, dct
->perm_type
);
200 if (!strcmp(dct
->name
, "IJG-AAN-INT")) {
201 for (i
= 0; i
< 64; i
++) {
202 scale
= 8 * (1 << (AANSCALE_BITS
+ 11)) / ff_aanscales
[i
];
203 block
[i
] = (block
[i
] * scale
) >> AANSCALE_BITS
;
208 if (!strcmp(dct
->name
, "PR-SSE2"))
209 for (i
= 0; i
< 64; i
++)
210 block1
[i
] = av_clip(block1
[i
], 4-512, 1019-512);
213 for (i
= 0; i
< 64; i
++) {
214 int err
= block
[i
] - block1
[i
];
220 sysErr
[i
] += block
[i
] - block1
[i
];
222 if (abs(block
[i
]) > maxout
)
223 maxout
= abs(block
[i
]);
225 if (blockSumErrMax
< blockSumErr
)
226 blockSumErrMax
= blockSumErr
;
228 for (i
= 0; i
< 64; i
++)
229 sysErrMax
= FFMAX(sysErrMax
, FFABS(sysErr
[i
]));
231 for (i
= 0; i
< 64; i
++) {
234 printf("%7d ", (int) sysErr
[i
]);
238 omse
= (double) err2
/ NB_ITS
/ 64;
239 ome
= (double) err_sum
/ NB_ITS
/ 64;
241 spec_err
= is_idct
&& (err_inf
> 1 || omse
> 0.02 || fabs(ome
) > 0.0015);
243 printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
244 is_idct
? "IDCT" : "DCT", dct
->name
, err_inf
,
245 omse
, ome
, (double) sysErrMax
/ NB_ITS
,
246 maxout
, blockSumErrMax
);
248 if (spec_err
&& !dct
->nonspec
)
256 init_block(block
, test
, is_idct
, &prng
, vals
);
257 permute(block1
, block
, dct
->perm_type
);
259 ti
= av_gettime_relative();
262 for (it
= 0; it
< NB_ITS_SPEED
; it
++) {
263 memcpy(block
, block1
, sizeof(block
));
268 ti1
= av_gettime_relative() - ti
;
269 } while (ti1
< 1000000);
271 printf("%s %s: %0.1f kdct/s\n", is_idct
? "IDCT" : "DCT", dct
->name
,
272 (double) it1
* 1000.0 / (double) ti1
);
277 DECLARE_ALIGNED(8, static uint8_t, img_dest
)[64];
278 DECLARE_ALIGNED(8, static uint8_t, img_dest1
)[64];
280 static void idct248_ref(uint8_t *dest
, int linesize
, int16_t *block
)
283 static double c8
[8][8];
284 static double c4
[4][4];
285 double block1
[64], block2
[64], block3
[64];
292 for (i
= 0; i
< 8; i
++) {
294 for (j
= 0; j
< 8; j
++) {
295 s
= (i
== 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
296 c8
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 8.0);
297 sum
+= c8
[i
][j
] * c8
[i
][j
];
301 for (i
= 0; i
< 4; i
++) {
303 for (j
= 0; j
< 4; j
++) {
304 s
= (i
== 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
305 c4
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 4.0);
306 sum
+= c4
[i
][j
] * c4
[i
][j
];
313 for (i
= 0; i
< 4; i
++) {
314 for (j
= 0; j
< 8; j
++) {
315 block1
[8 * (2 * i
) + j
] =
316 (block
[8 * (2 * i
) + j
] + block
[8 * (2 * i
+ 1) + j
]) * s
;
317 block1
[8 * (2 * i
+ 1) + j
] =
318 (block
[8 * (2 * i
) + j
] - block
[8 * (2 * i
+ 1) + j
]) * s
;
323 for (i
= 0; i
< 8; i
++) {
324 for (j
= 0; j
< 8; j
++) {
326 for (k
= 0; k
< 8; k
++)
327 sum
+= c8
[k
][j
] * block1
[8 * i
+ k
];
328 block2
[8 * i
+ j
] = sum
;
333 for (i
= 0; i
< 8; i
++) {
334 for (j
= 0; j
< 4; j
++) {
337 for (k
= 0; k
< 4; k
++)
338 sum
+= c4
[k
][j
] * block2
[8 * (2 * k
) + i
];
339 block3
[8 * (2 * j
) + i
] = sum
;
343 for (k
= 0; k
< 4; k
++)
344 sum
+= c4
[k
][j
] * block2
[8 * (2 * k
+ 1) + i
];
345 block3
[8 * (2 * j
+ 1) + i
] = sum
;
349 /* clamp and store the result */
350 for (i
= 0; i
< 8; i
++) {
351 for (j
= 0; j
< 8; j
++) {
352 v
= block3
[8 * i
+ j
];
354 else if (v
> 255) v
= 255;
355 dest
[i
* linesize
+ j
] = (int) rint(v
);
360 static void idct248_error(const char *name
,
361 void (*idct248_put
)(uint8_t *dest
, int line_size
,
365 int it
, i
, it1
, ti
, ti1
, err_max
, v
;
368 av_lfg_init(&prng
, 1);
370 /* just one test to see if code is correct (precision is less
373 for (it
= 0; it
< NB_ITS
; it
++) {
374 /* XXX: use forward transform to generate values */
375 for (i
= 0; i
< 64; i
++)
376 block1
[i
] = av_lfg_get(&prng
) % 256 - 128;
379 for (i
= 0; i
< 64; i
++)
380 block
[i
] = block1
[i
];
381 idct248_ref(img_dest1
, 8, block
);
383 for (i
= 0; i
< 64; i
++)
384 block
[i
] = block1
[i
];
385 idct248_put(img_dest
, 8, block
);
387 for (i
= 0; i
< 64; i
++) {
388 v
= abs((int) img_dest
[i
] - (int) img_dest1
[i
]);
390 printf("%d %d\n", img_dest
[i
], img_dest1
[i
]);
399 printf(" %3d", img_dest1
[i
*8+j
]);
408 printf(" %3d", img_dest
[i
*8+j
]);
414 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name
, err_max
);
419 ti
= av_gettime_relative();
422 for (it
= 0; it
< NB_ITS_SPEED
; it
++) {
423 for (i
= 0; i
< 64; i
++)
424 block
[i
] = block1
[i
];
425 idct248_put(img_dest
, 8, block
);
429 ti1
= av_gettime_relative() - ti
;
430 } while (ti1
< 1000000);
432 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name
,
433 (double) it1
* 1000.0 / (double) ti1
);
436 static void help(void)
438 printf("dct-test [-i] [<test-number>] [<bits>]\n"
439 "test-number 0 -> test with random matrixes\n"
440 " 1 -> test with random sparse matrixes\n"
441 " 2 -> do 3. test from mpeg4 std\n"
442 "bits Number of time domain bits to use, 8 is default\n"
443 "-i test IDCT implementations\n"
444 "-4 test IDCT248 implementations\n"
449 #include "compat/getopt.c"
452 int main(int argc
, char **argv
)
454 int test_idct
= 0, test_248_dct
= 0;
464 c
= getopt(argc
, argv
, "ih4t");
485 test
= atoi(argv
[optind
]);
486 if(optind
+1 < argc
) bits
= atoi(argv
[optind
+1]);
488 printf("ffmpeg DCT/IDCT test\n");
491 idct248_error("SIMPLE-C", ff_simple_idct248_put
, speed
);
493 const int cpu_flags
= av_get_cpu_flags();
495 for (i
= 0; i
< FF_ARRAY_ELEMS(idct_tab
); i
++)
496 err
|= dct_error(&idct_tab
[i
], test
, test_idct
, speed
, bits
);
498 for (i
= 0; idct_tab_arch
[i
].name
; i
++)
499 if (!(~cpu_flags
& idct_tab_arch
[i
].cpu_flag
))
500 err
|= dct_error(&idct_tab_arch
[i
], test
, test_idct
, speed
, bits
);
504 for (i
= 0; i
< FF_ARRAY_ELEMS(fdct_tab
); i
++)
505 err
|= dct_error(&fdct_tab
[i
], test
, test_idct
, speed
, bits
);
507 for (i
= 0; fdct_tab_arch
[i
].name
; i
++)
508 if (!(~cpu_flags
& fdct_tab_arch
[i
].cpu_flag
))
509 err
|= dct_error(&fdct_tab_arch
[i
], test
, test_idct
, speed
, bits
);
511 #endif /* CONFIG_FDCTDSP */
515 printf("Error: %d.\n", err
);