Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * (c) 2002 Fabrice Bellard | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | /** | |
22 | * @file | |
23 | * FFT and MDCT tests. | |
24 | */ | |
25 | ||
26 | #include "config.h" | |
27 | ||
28 | #include <math.h> | |
29 | #if HAVE_UNISTD_H | |
30 | #include <unistd.h> | |
31 | #endif | |
32 | #include <stdio.h> | |
33 | #include <stdlib.h> | |
34 | #include <string.h> | |
35 | ||
36 | #include "libavutil/cpu.h" | |
37 | #include "libavutil/lfg.h" | |
38 | #include "libavutil/log.h" | |
39 | #include "libavutil/mathematics.h" | |
40 | #include "libavutil/time.h" | |
41 | ||
42 | #include "fft.h" | |
43 | #if FFT_FLOAT | |
44 | #include "dct.h" | |
45 | #include "rdft.h" | |
46 | #endif | |
47 | ||
48 | /* reference fft */ | |
49 | ||
50 | #define MUL16(a, b) ((a) * (b)) | |
51 | ||
52 | #define CMAC(pre, pim, are, aim, bre, bim) \ | |
53 | { \ | |
54 | pre += (MUL16(are, bre) - MUL16(aim, bim)); \ | |
55 | pim += (MUL16(are, bim) + MUL16(bre, aim)); \ | |
56 | } | |
57 | ||
58 | #if FFT_FLOAT | |
59 | #define RANGE 1.0 | |
60 | #define REF_SCALE(x, bits) (x) | |
61 | #define FMT "%10.6f" | |
62 | #elif FFT_FIXED_32 | |
63 | #define RANGE 8388608 | |
64 | #define REF_SCALE(x, bits) (x) | |
65 | #define FMT "%6d" | |
66 | #else | |
67 | #define RANGE 16384 | |
68 | #define REF_SCALE(x, bits) ((x) / (1 << (bits))) | |
69 | #define FMT "%6d" | |
70 | #endif | |
71 | ||
72 | static struct { | |
73 | float re, im; | |
74 | } *exptab; | |
75 | ||
76 | static int fft_ref_init(int nbits, int inverse) | |
77 | { | |
78 | int i, n = 1 << nbits; | |
79 | ||
80 | exptab = av_malloc_array((n / 2), sizeof(*exptab)); | |
81 | if (!exptab) | |
82 | return AVERROR(ENOMEM); | |
83 | ||
84 | for (i = 0; i < (n / 2); i++) { | |
85 | double alpha = 2 * M_PI * (float) i / (float) n; | |
86 | double c1 = cos(alpha), s1 = sin(alpha); | |
87 | if (!inverse) | |
88 | s1 = -s1; | |
89 | exptab[i].re = c1; | |
90 | exptab[i].im = s1; | |
91 | } | |
92 | return 0; | |
93 | } | |
94 | ||
95 | static void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits) | |
96 | { | |
97 | int i, j; | |
98 | int n = 1 << nbits; | |
99 | int n2 = n >> 1; | |
100 | ||
101 | for (i = 0; i < n; i++) { | |
102 | double tmp_re = 0, tmp_im = 0; | |
103 | FFTComplex *q = tab; | |
104 | for (j = 0; j < n; j++) { | |
105 | double s, c; | |
106 | int k = (i * j) & (n - 1); | |
107 | if (k >= n2) { | |
108 | c = -exptab[k - n2].re; | |
109 | s = -exptab[k - n2].im; | |
110 | } else { | |
111 | c = exptab[k].re; | |
112 | s = exptab[k].im; | |
113 | } | |
114 | CMAC(tmp_re, tmp_im, c, s, q->re, q->im); | |
115 | q++; | |
116 | } | |
117 | tabr[i].re = REF_SCALE(tmp_re, nbits); | |
118 | tabr[i].im = REF_SCALE(tmp_im, nbits); | |
119 | } | |
120 | } | |
121 | ||
122 | #if CONFIG_MDCT | |
123 | static void imdct_ref(FFTSample *out, FFTSample *in, int nbits) | |
124 | { | |
125 | int i, k, n = 1 << nbits; | |
126 | ||
127 | for (i = 0; i < n; i++) { | |
128 | double sum = 0; | |
129 | for (k = 0; k < n / 2; k++) { | |
130 | int a = (2 * i + 1 + (n / 2)) * (2 * k + 1); | |
131 | double f = cos(M_PI * a / (double) (2 * n)); | |
132 | sum += f * in[k]; | |
133 | } | |
134 | out[i] = REF_SCALE(-sum, nbits - 2); | |
135 | } | |
136 | } | |
137 | ||
138 | /* NOTE: no normalisation by 1 / N is done */ | |
139 | static void mdct_ref(FFTSample *output, FFTSample *input, int nbits) | |
140 | { | |
141 | int i, k, n = 1 << nbits; | |
142 | ||
143 | /* do it by hand */ | |
144 | for (k = 0; k < n / 2; k++) { | |
145 | double s = 0; | |
146 | for (i = 0; i < n; i++) { | |
147 | double a = (2 * M_PI * (2 * i + 1 + n / 2) * (2 * k + 1) / (4 * n)); | |
148 | s += input[i] * cos(a); | |
149 | } | |
150 | output[k] = REF_SCALE(s, nbits - 1); | |
151 | } | |
152 | } | |
153 | #endif /* CONFIG_MDCT */ | |
154 | ||
155 | #if FFT_FLOAT | |
156 | #if CONFIG_DCT | |
157 | static void idct_ref(FFTSample *output, FFTSample *input, int nbits) | |
158 | { | |
159 | int i, k, n = 1 << nbits; | |
160 | ||
161 | /* do it by hand */ | |
162 | for (i = 0; i < n; i++) { | |
163 | double s = 0.5 * input[0]; | |
164 | for (k = 1; k < n; k++) { | |
165 | double a = M_PI * k * (i + 0.5) / n; | |
166 | s += input[k] * cos(a); | |
167 | } | |
168 | output[i] = 2 * s / n; | |
169 | } | |
170 | } | |
171 | ||
172 | static void dct_ref(FFTSample *output, FFTSample *input, int nbits) | |
173 | { | |
174 | int i, k, n = 1 << nbits; | |
175 | ||
176 | /* do it by hand */ | |
177 | for (k = 0; k < n; k++) { | |
178 | double s = 0; | |
179 | for (i = 0; i < n; i++) { | |
180 | double a = M_PI * k * (i + 0.5) / n; | |
181 | s += input[i] * cos(a); | |
182 | } | |
183 | output[k] = s; | |
184 | } | |
185 | } | |
186 | #endif /* CONFIG_DCT */ | |
187 | #endif /* FFT_FLOAT */ | |
188 | ||
189 | static FFTSample frandom(AVLFG *prng) | |
190 | { | |
191 | return (int16_t) av_lfg_get(prng) / 32768.0 * RANGE; | |
192 | } | |
193 | ||
194 | static int check_diff(FFTSample *tab1, FFTSample *tab2, int n, double scale) | |
195 | { | |
196 | int i, err = 0; | |
197 | double error = 0, max = 0; | |
198 | ||
199 | for (i = 0; i < n; i++) { | |
200 | double e = fabsf(tab1[i] - (tab2[i] / scale)) / RANGE; | |
201 | if (e >= 1e-3) { | |
202 | av_log(NULL, AV_LOG_ERROR, "ERROR %5d: "FMT" "FMT"\n", | |
203 | i, tab1[i], tab2[i]); | |
204 | err = 1; | |
205 | } | |
206 | error += e * e; | |
207 | if (e > max) | |
208 | max = e; | |
209 | } | |
210 | av_log(NULL, AV_LOG_INFO, "max:%f e:%g\n", max, sqrt(error / n)); | |
211 | return err; | |
212 | } | |
213 | ||
214 | static void help(void) | |
215 | { | |
216 | av_log(NULL, AV_LOG_INFO, | |
217 | "usage: fft-test [-h] [-s] [-i] [-n b]\n" | |
218 | "-h print this help\n" | |
219 | "-s speed test\n" | |
220 | "-m (I)MDCT test\n" | |
221 | "-d (I)DCT test\n" | |
222 | "-r (I)RDFT test\n" | |
223 | "-i inverse transform test\n" | |
224 | "-n b set the transform size to 2^b\n" | |
225 | "-f x set scale factor for output data of (I)MDCT to x\n"); | |
226 | } | |
227 | ||
228 | enum tf_transform { | |
229 | TRANSFORM_FFT, | |
230 | TRANSFORM_MDCT, | |
231 | TRANSFORM_RDFT, | |
232 | TRANSFORM_DCT, | |
233 | }; | |
234 | ||
235 | #if !HAVE_GETOPT | |
236 | #include "compat/getopt.c" | |
237 | #endif | |
238 | ||
239 | int main(int argc, char **argv) | |
240 | { | |
241 | FFTComplex *tab, *tab1, *tab_ref; | |
242 | FFTSample *tab2; | |
243 | enum tf_transform transform = TRANSFORM_FFT; | |
244 | FFTContext m, s; | |
245 | #if FFT_FLOAT | |
246 | RDFTContext r; | |
247 | DCTContext d; | |
248 | #endif /* FFT_FLOAT */ | |
249 | int it, i, err = 1; | |
250 | int do_speed = 0, do_inverse = 0; | |
251 | int fft_nbits = 9, fft_size; | |
252 | double scale = 1.0; | |
253 | AVLFG prng; | |
254 | ||
255 | av_lfg_init(&prng, 1); | |
256 | ||
257 | for (;;) { | |
258 | int c = getopt(argc, argv, "hsimrdn:f:c:"); | |
259 | if (c == -1) | |
260 | break; | |
261 | switch (c) { | |
262 | case 'h': | |
263 | help(); | |
264 | return 1; | |
265 | case 's': | |
266 | do_speed = 1; | |
267 | break; | |
268 | case 'i': | |
269 | do_inverse = 1; | |
270 | break; | |
271 | case 'm': | |
272 | transform = TRANSFORM_MDCT; | |
273 | break; | |
274 | case 'r': | |
275 | transform = TRANSFORM_RDFT; | |
276 | break; | |
277 | case 'd': | |
278 | transform = TRANSFORM_DCT; | |
279 | break; | |
280 | case 'n': | |
281 | fft_nbits = atoi(optarg); | |
282 | break; | |
283 | case 'f': | |
284 | scale = atof(optarg); | |
285 | break; | |
286 | case 'c': | |
287 | { | |
288 | int cpuflags = av_get_cpu_flags(); | |
289 | ||
290 | if (av_parse_cpu_caps(&cpuflags, optarg) < 0) | |
291 | return 1; | |
292 | ||
293 | av_force_cpu_flags(cpuflags); | |
294 | break; | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
299 | fft_size = 1 << fft_nbits; | |
300 | tab = av_malloc_array(fft_size, sizeof(FFTComplex)); | |
301 | tab1 = av_malloc_array(fft_size, sizeof(FFTComplex)); | |
302 | tab_ref = av_malloc_array(fft_size, sizeof(FFTComplex)); | |
303 | tab2 = av_malloc_array(fft_size, sizeof(FFTSample)); | |
304 | ||
305 | if (!(tab && tab1 && tab_ref && tab2)) | |
306 | goto cleanup; | |
307 | ||
308 | switch (transform) { | |
309 | #if CONFIG_MDCT | |
310 | case TRANSFORM_MDCT: | |
311 | av_log(NULL, AV_LOG_INFO, "Scale factor is set to %f\n", scale); | |
312 | if (do_inverse) | |
313 | av_log(NULL, AV_LOG_INFO, "IMDCT"); | |
314 | else | |
315 | av_log(NULL, AV_LOG_INFO, "MDCT"); | |
316 | ff_mdct_init(&m, fft_nbits, do_inverse, scale); | |
317 | break; | |
318 | #endif /* CONFIG_MDCT */ | |
319 | case TRANSFORM_FFT: | |
320 | if (do_inverse) | |
321 | av_log(NULL, AV_LOG_INFO, "IFFT"); | |
322 | else | |
323 | av_log(NULL, AV_LOG_INFO, "FFT"); | |
324 | ff_fft_init(&s, fft_nbits, do_inverse); | |
325 | if ((err = fft_ref_init(fft_nbits, do_inverse)) < 0) | |
326 | goto cleanup; | |
327 | break; | |
328 | #if FFT_FLOAT | |
329 | # if CONFIG_RDFT | |
330 | case TRANSFORM_RDFT: | |
331 | if (do_inverse) | |
332 | av_log(NULL, AV_LOG_INFO, "IDFT_C2R"); | |
333 | else | |
334 | av_log(NULL, AV_LOG_INFO, "DFT_R2C"); | |
335 | ff_rdft_init(&r, fft_nbits, do_inverse ? IDFT_C2R : DFT_R2C); | |
336 | if ((err = fft_ref_init(fft_nbits, do_inverse)) < 0) | |
337 | goto cleanup; | |
338 | break; | |
339 | # endif /* CONFIG_RDFT */ | |
340 | # if CONFIG_DCT | |
341 | case TRANSFORM_DCT: | |
342 | if (do_inverse) | |
343 | av_log(NULL, AV_LOG_INFO, "DCT_III"); | |
344 | else | |
345 | av_log(NULL, AV_LOG_INFO, "DCT_II"); | |
346 | ff_dct_init(&d, fft_nbits, do_inverse ? DCT_III : DCT_II); | |
347 | break; | |
348 | # endif /* CONFIG_DCT */ | |
349 | #endif /* FFT_FLOAT */ | |
350 | default: | |
351 | av_log(NULL, AV_LOG_ERROR, "Requested transform not supported\n"); | |
352 | goto cleanup; | |
353 | } | |
354 | av_log(NULL, AV_LOG_INFO, " %d test\n", fft_size); | |
355 | ||
356 | /* generate random data */ | |
357 | ||
358 | for (i = 0; i < fft_size; i++) { | |
359 | tab1[i].re = frandom(&prng); | |
360 | tab1[i].im = frandom(&prng); | |
361 | } | |
362 | ||
363 | /* checking result */ | |
364 | av_log(NULL, AV_LOG_INFO, "Checking...\n"); | |
365 | ||
366 | switch (transform) { | |
367 | #if CONFIG_MDCT | |
368 | case TRANSFORM_MDCT: | |
369 | if (do_inverse) { | |
370 | imdct_ref(&tab_ref->re, &tab1->re, fft_nbits); | |
371 | m.imdct_calc(&m, tab2, &tab1->re); | |
372 | err = check_diff(&tab_ref->re, tab2, fft_size, scale); | |
373 | } else { | |
374 | mdct_ref(&tab_ref->re, &tab1->re, fft_nbits); | |
375 | m.mdct_calc(&m, tab2, &tab1->re); | |
376 | err = check_diff(&tab_ref->re, tab2, fft_size / 2, scale); | |
377 | } | |
378 | break; | |
379 | #endif /* CONFIG_MDCT */ | |
380 | case TRANSFORM_FFT: | |
381 | memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); | |
382 | s.fft_permute(&s, tab); | |
383 | s.fft_calc(&s, tab); | |
384 | ||
385 | fft_ref(tab_ref, tab1, fft_nbits); | |
386 | err = check_diff(&tab_ref->re, &tab->re, fft_size * 2, 1.0); | |
387 | break; | |
388 | #if FFT_FLOAT | |
389 | #if CONFIG_RDFT | |
390 | case TRANSFORM_RDFT: | |
391 | { | |
392 | int fft_size_2 = fft_size >> 1; | |
393 | if (do_inverse) { | |
394 | tab1[0].im = 0; | |
395 | tab1[fft_size_2].im = 0; | |
396 | for (i = 1; i < fft_size_2; i++) { | |
397 | tab1[fft_size_2 + i].re = tab1[fft_size_2 - i].re; | |
398 | tab1[fft_size_2 + i].im = -tab1[fft_size_2 - i].im; | |
399 | } | |
400 | ||
401 | memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); | |
402 | tab2[1] = tab1[fft_size_2].re; | |
403 | ||
404 | r.rdft_calc(&r, tab2); | |
405 | fft_ref(tab_ref, tab1, fft_nbits); | |
406 | for (i = 0; i < fft_size; i++) { | |
407 | tab[i].re = tab2[i]; | |
408 | tab[i].im = 0; | |
409 | } | |
410 | err = check_diff(&tab_ref->re, &tab->re, fft_size * 2, 0.5); | |
411 | } else { | |
412 | for (i = 0; i < fft_size; i++) { | |
413 | tab2[i] = tab1[i].re; | |
414 | tab1[i].im = 0; | |
415 | } | |
416 | r.rdft_calc(&r, tab2); | |
417 | fft_ref(tab_ref, tab1, fft_nbits); | |
418 | tab_ref[0].im = tab_ref[fft_size_2].re; | |
419 | err = check_diff(&tab_ref->re, tab2, fft_size, 1.0); | |
420 | } | |
421 | break; | |
422 | } | |
423 | #endif /* CONFIG_RDFT */ | |
424 | #if CONFIG_DCT | |
425 | case TRANSFORM_DCT: | |
426 | memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); | |
427 | d.dct_calc(&d, &tab->re); | |
428 | if (do_inverse) | |
429 | idct_ref(&tab_ref->re, &tab1->re, fft_nbits); | |
430 | else | |
431 | dct_ref(&tab_ref->re, &tab1->re, fft_nbits); | |
432 | err = check_diff(&tab_ref->re, &tab->re, fft_size, 1.0); | |
433 | break; | |
434 | #endif /* CONFIG_DCT */ | |
435 | #endif /* FFT_FLOAT */ | |
436 | } | |
437 | ||
438 | /* do a speed test */ | |
439 | ||
440 | if (do_speed) { | |
441 | int64_t time_start, duration; | |
442 | int nb_its; | |
443 | ||
444 | av_log(NULL, AV_LOG_INFO, "Speed test...\n"); | |
445 | /* we measure during about 1 seconds */ | |
446 | nb_its = 1; | |
447 | for (;;) { | |
448 | time_start = av_gettime_relative(); | |
449 | for (it = 0; it < nb_its; it++) { | |
450 | switch (transform) { | |
451 | case TRANSFORM_MDCT: | |
452 | if (do_inverse) | |
453 | m.imdct_calc(&m, &tab->re, &tab1->re); | |
454 | else | |
455 | m.mdct_calc(&m, &tab->re, &tab1->re); | |
456 | break; | |
457 | case TRANSFORM_FFT: | |
458 | memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); | |
459 | s.fft_calc(&s, tab); | |
460 | break; | |
461 | #if FFT_FLOAT | |
462 | case TRANSFORM_RDFT: | |
463 | memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); | |
464 | r.rdft_calc(&r, tab2); | |
465 | break; | |
466 | case TRANSFORM_DCT: | |
467 | memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); | |
468 | d.dct_calc(&d, tab2); | |
469 | break; | |
470 | #endif /* FFT_FLOAT */ | |
471 | } | |
472 | } | |
473 | duration = av_gettime_relative() - time_start; | |
474 | if (duration >= 1000000) | |
475 | break; | |
476 | nb_its *= 2; | |
477 | } | |
478 | av_log(NULL, AV_LOG_INFO, | |
479 | "time: %0.1f us/transform [total time=%0.2f s its=%d]\n", | |
480 | (double) duration / nb_its, | |
481 | (double) duration / 1000000.0, | |
482 | nb_its); | |
483 | } | |
484 | ||
485 | switch (transform) { | |
486 | #if CONFIG_MDCT | |
487 | case TRANSFORM_MDCT: | |
488 | ff_mdct_end(&m); | |
489 | break; | |
490 | #endif /* CONFIG_MDCT */ | |
491 | case TRANSFORM_FFT: | |
492 | ff_fft_end(&s); | |
493 | break; | |
494 | #if FFT_FLOAT | |
495 | # if CONFIG_RDFT | |
496 | case TRANSFORM_RDFT: | |
497 | ff_rdft_end(&r); | |
498 | break; | |
499 | # endif /* CONFIG_RDFT */ | |
500 | # if CONFIG_DCT | |
501 | case TRANSFORM_DCT: | |
502 | ff_dct_end(&d); | |
503 | break; | |
504 | # endif /* CONFIG_DCT */ | |
505 | #endif /* FFT_FLOAT */ | |
506 | } | |
507 | ||
508 | cleanup: | |
509 | av_free(tab); | |
510 | av_free(tab1); | |
511 | av_free(tab2); | |
512 | av_free(tab_ref); | |
513 | av_free(exptab); | |
514 | ||
515 | if (err) | |
516 | printf("Error: %d.\n", err); | |
517 | ||
518 | return !!err; | |
519 | } |