Imported Debian version 0.1.3.1
[deb_fdk-aac.git] / libFDK / src / fft.cpp
1
2 /* -----------------------------------------------------------------------------------------------------------
3 Software License for The Fraunhofer FDK AAC Codec Library for Android
4
5 © Copyright 1995 - 2013 Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
6 All rights reserved.
7
8 1. INTRODUCTION
9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12
13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16 of the MPEG specifications.
17
18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
24
25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27 applications information and documentation.
28
29 2. COPYRIGHT LICENSE
30
31 Redistribution and use in source and binary forms, with or without modification, are permitted without
32 payment of copyright license fees provided that you satisfy the following conditions:
33
34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35 your modifications thereto in source code form.
36
37 You must retain the complete text of this software license in the documentation and/or other materials
38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40 modifications thereto to recipients of copies in binary form.
41
42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43 prior written permission.
44
45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46 software or your modifications thereto.
47
48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49 and the date of any change. For modified versions of the FDK AAC Codec, the term
50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52
53 3. NO PATENT LICENSE
54
55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57 respect to this software.
58
59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60 by appropriate patent licenses.
61
62 4. DISCLAIMER
63
64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69 or business interruption, however caused and on any theory of liability, whether in contract, strict
70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
71 advised of the possibility of such damage.
72
73 5. CONTACT INFORMATION
74
75 Fraunhofer Institute for Integrated Circuits IIS
76 Attention: Audio and Multimedia Departments - FDK AAC LL
77 Am Wolfsmantel 33
78 91058 Erlangen, Germany
79
80 www.iis.fraunhofer.de/amm
81 amm-info@iis.fraunhofer.de
82 ----------------------------------------------------------------------------------------------------------- */
83
84 /*************************** Fraunhofer IIS FDK Tools **********************
85
86 Author(s): Josef Hoepfl, DSP Solutions
87 Description: Fix point FFT
88
89 ******************************************************************************/
90
91 #include "fft.h"
92
93 #include "fft_rad2.h"
94 #include "FDK_tools_rom.h"
95
96
97
98
99
100 #define F3C(x) STC(x)
101
102 #define C31 (F3C(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) */
103
104 /* Performs the FFT of length 3 according to the algorithm after winograd.
105 No scaling of the input vector because the scaling is already done in the rotation vector. */
106 static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat)
107 {
108 FIXP_DBL r1,r2;
109 FIXP_DBL s1,s2;
110 /* real part */
111 r1 = pDat[2] + pDat[4];
112 r2 = fMult((pDat[2] - pDat[4]), C31);
113 pDat[0] = pDat[0] + r1;
114 r1 = pDat[0] - r1 - (r1>>1);
115
116 /* imaginary part */
117 s1 = pDat[3] + pDat[5];
118 s2 = fMult((pDat[3] - pDat[5]), C31);
119 pDat[1] = pDat[1] + s1;
120 s1 = pDat[1] - s1 - (s1>>1);
121
122 /* combination */
123 pDat[2] = r1 - s2;
124 pDat[4] = r1 + s2;
125 pDat[3] = s1 + r2;
126 pDat[5] = s1 - r2;
127 }
128
129
130 #define F5C(x) STC(x)
131
132 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */
133 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
134 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */
135 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */
136 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */
137
138 /* performs the FFT of length 5 according to the algorithm after winograd */
139 static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat)
140 {
141 FIXP_DBL r1,r2,r3,r4;
142 FIXP_DBL s1,s2,s3,s4;
143 FIXP_DBL t;
144
145 /* real part */
146 r1 = pDat[2] + pDat[8];
147 r4 = pDat[2] - pDat[8];
148 r3 = pDat[4] + pDat[6];
149 r2 = pDat[4] - pDat[6];
150 t = fMult((r1-r3), C54);
151 r1 = r1 + r3;
152 pDat[0] = pDat[0] + r1;
153 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
154 the values as fracts */
155 r1 = pDat[0] + (fMultDiv2(r1, C55) <<(2));
156 r3 = r1 - t;
157 r1 = r1 + t;
158 t = fMult((r4 + r2), C51);
159 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
160 the values as fracts */
161 r4 = t + (fMultDiv2(r4, C52) <<(2));
162 r2 = t + fMult(r2, C53);
163
164 /* imaginary part */
165 s1 = pDat[3] + pDat[9];
166 s4 = pDat[3] - pDat[9];
167 s3 = pDat[5] + pDat[7];
168 s2 = pDat[5] - pDat[7];
169 t = fMult((s1 - s3), C54);
170 s1 = s1 + s3;
171 pDat[1] = pDat[1] + s1;
172 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
173 the values as fracts */
174 s1 = pDat[1] + (fMultDiv2(s1, C55) <<(2));
175 s3 = s1 - t;
176 s1 = s1 + t;
177 t = fMult((s4 + s2), C51);
178 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
179 the values as fracts */
180 s4 = t + (fMultDiv2(s4, C52) <<(2));
181 s2 = t + fMult(s2, C53);
182
183 /* combination */
184 pDat[2] = r1 + s2;
185 pDat[8] = r1 - s2;
186 pDat[4] = r3 - s4;
187 pDat[6] = r3 + s4;
188
189 pDat[3] = s1 - r2;
190 pDat[9] = s1 + r2;
191 pDat[5] = s3 + r4;
192 pDat[7] = s3 - r4;
193 }
194
195
196
197
198 #define N3 3
199 #define N5 5
200 #define N6 6
201 #define N15 15
202
203 /* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */
204 static inline void fft15(FIXP_DBL *pInput)
205 {
206 FIXP_DBL aDst[2*N15];
207 FIXP_DBL aDst1[2*N15];
208 int i,k,l;
209
210 /* Sort input vector for fft's of length 3
211 input3(0:2) = [input(0) input(5) input(10)];
212 input3(3:5) = [input(3) input(8) input(13)];
213 input3(6:8) = [input(6) input(11) input(1)];
214 input3(9:11) = [input(9) input(14) input(4)];
215 input3(12:14) = [input(12) input(2) input(7)]; */
216 {
217 const FIXP_DBL *pSrc = pInput;
218 FIXP_DBL *RESTRICT pDst = aDst;
219 /* Merge 3 loops into one, skip call of fft3 */
220 for(i=0,l=0,k=0; i<N5; i++, k+=6)
221 {
222 pDst[k+0] = pSrc[l];
223 pDst[k+1] = pSrc[l+1];
224 l += 2*N5;
225 if (l >= (2*N15))
226 l -= (2*N15);
227
228 pDst[k+2] = pSrc[l];
229 pDst[k+3] = pSrc[l+1];
230 l += 2*N5;
231 if (l >= (2*N15))
232 l -= (2*N15);
233 pDst[k+4] = pSrc[l];
234 pDst[k+5] = pSrc[l+1];
235 l += (2*N5) + (2*N3);
236 if (l >= (2*N15))
237 l -= (2*N15);
238
239 /* fft3 merged with shift right by 2 loop */
240 FIXP_DBL r1,r2,r3;
241 FIXP_DBL s1,s2;
242 /* real part */
243 r1 = pDst[k+2] + pDst[k+4];
244 r2 = fMult((pDst[k+2] - pDst[k+4]), C31);
245 s1 = pDst[k+0];
246 pDst[k+0] = (s1 + r1)>>2;
247 r1 = s1 - (r1>>1);
248
249 /* imaginary part */
250 s1 = pDst[k+3] + pDst[k+5];
251 s2 = fMult((pDst[k+3] - pDst[k+5]), C31);
252 r3 = pDst[k+1];
253 pDst[k+1] = (r3 + s1)>>2;
254 s1 = r3 - (s1>>1);
255
256 /* combination */
257 pDst[k+2] = (r1 - s2)>>2;
258 pDst[k+4] = (r1 + s2)>>2;
259 pDst[k+3] = (s1 + r2)>>2;
260 pDst[k+5] = (s1 - r2)>>2;
261 }
262 }
263 /* Sort input vector for fft's of length 5
264 input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)];
265 input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)];
266 input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
267 /* Merge 2 loops into one, brings about 10% */
268 {
269 const FIXP_DBL *pSrc = aDst;
270 FIXP_DBL *RESTRICT pDst = aDst1;
271 for(i=0,l=0,k=0; i<N3; i++, k+=10)
272 {
273 l = 2*i;
274 pDst[k+0] = pSrc[l+0];
275 pDst[k+1] = pSrc[l+1];
276 pDst[k+2] = pSrc[l+0+(2*N3)];
277 pDst[k+3] = pSrc[l+1+(2*N3)];
278 pDst[k+4] = pSrc[l+0+(4*N3)];
279 pDst[k+5] = pSrc[l+1+(4*N3)];
280 pDst[k+6] = pSrc[l+0+(6*N3)];
281 pDst[k+7] = pSrc[l+1+(6*N3)];
282 pDst[k+8] = pSrc[l+0+(8*N3)];
283 pDst[k+9] = pSrc[l+1+(8*N3)];
284 fft5(&pDst[k]);
285 }
286 }
287 /* Sort output vector of length 15
288 output = [out5(0) out5(6) out5(12) out5(3) out5(9)
289 out5(10) out5(1) out5(7) out5(13) out5(4)
290 out5(5) out5(11) out5(2) out5(8) out5(14)]; */
291 /* optimize clumsy loop, brings about 5% */
292 {
293 const FIXP_DBL *pSrc = aDst1;
294 FIXP_DBL *RESTRICT pDst = pInput;
295 for(i=0,l=0,k=0; i<N3; i++, k+=10)
296 {
297 pDst[k+0] = pSrc[l];
298 pDst[k+1] = pSrc[l+1];
299 l += (2*N6);
300 if (l >= (2*N15))
301 l -= (2*N15);
302 pDst[k+2] = pSrc[l];
303 pDst[k+3] = pSrc[l+1];
304 l += (2*N6);
305 if (l >= (2*N15))
306 l -= (2*N15);
307 pDst[k+4] = pSrc[l];
308 pDst[k+5] = pSrc[l+1];
309 l += (2*N6);
310 if (l >= (2*N15))
311 l -= (2*N15);
312 pDst[k+6] = pSrc[l];
313 pDst[k+7] = pSrc[l+1];
314 l += (2*N6);
315 if (l >= (2*N15))
316 l -= (2*N15);
317 pDst[k+8] = pSrc[l];
318 pDst[k+9] = pSrc[l+1];
319 l += 2; /* no modulo check needed, it cannot occur */
320 }
321 }
322 }
323
324 #define W_PiFOURTH STC(0x5a82799a)
325 #ifndef SUMDIFF_PIFOURTH
326 #define SUMDIFF_PIFOURTH(diff,sum,a,b) \
327 { \
328 FIXP_DBL wa, wb;\
329 wa = fMultDiv2(a, W_PiFOURTH);\
330 wb = fMultDiv2(b, W_PiFOURTH);\
331 diff = wb - wa;\
332 sum = wb + wa;\
333 }
334 #endif
335
336 /* This version is more overflow save, but less cycle optimal. */
337 #define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \
338 vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1); /* Re A + Re B */ \
339 vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1); /* Re C + Re D */ \
340 ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1); /* Im A + Im B */ \
341 ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1); /* Im C + Im D */ \
342 y[ 0 + iy] = vr + vi; /* Re A' = ReA + ReB +ReC + ReD */ \
343 y[ 4 + iy] = vr - vi; /* Re C' = -(ReC+ReD) + (ReA+ReB) */ \
344 y[ 1 + iy] = ur + ui; /* Im A' = sum of imag values */ \
345 y[ 5 + iy] = ur - ui; /* Im C' = -Im C -Im D +Im A +Im B */ \
346 vr -= x[16 + ix]; /* Re A - Re B */ \
347 vi = vi - x[24 + ix]; /* Re C - Re D */ \
348 ur -= x[17 + ix]; /* Im A - Im B */ \
349 ui = ui - x[25 + ix]; /* Im C - Im D */ \
350 y[ 2 + iy] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ \
351 y[ 6 + iy] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ \
352 y[ 3 + iy] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ \
353 y[ 7 + iy] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
354
355 static const FIXP_STP fft16_w16[2] = { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) };
356
357 LNK_SECTION_CODE_L1
358 inline void fft_16(FIXP_DBL *RESTRICT x)
359 {
360 FIXP_DBL vr, vi, ur, ui;
361 FIXP_DBL y[32];
362
363 SUMDIFF_EIGTH(x, y, 0, 0, vr, vi, ur, ui);
364 SUMDIFF_EIGTH(x, y, 4, 8, vr, vi, ur, ui);
365 SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui);
366 SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui);
367
368 // xt1 = 0
369 // xt2 = 8
370 vr = y[ 8];
371 vi = y[ 9];
372 ur = y[ 0]>>1;
373 ui = y[ 1]>>1;
374 x[ 0] = ur + (vr>>1);
375 x[ 1] = ui + (vi>>1);
376 x[ 8] = ur - (vr>>1);
377 x[ 9] = ui - (vi>>1);
378
379 // xt1 = 4
380 // xt2 = 12
381 vr = y[13];
382 vi = y[12];
383 ur = y[ 4]>>1;
384 ui = y[ 5]>>1;
385 x[ 4] = ur + (vr>>1);
386 x[ 5] = ui - (vi>>1);
387 x[12] = ur - (vr>>1);
388 x[13] = ui + (vi>>1);
389
390 // xt1 = 16
391 // xt2 = 24
392 vr = y[24];
393 vi = y[25];
394 ur = y[16]>>1;
395 ui = y[17]>>1;
396 x[16] = ur + (vr>>1);
397 x[17] = ui + (vi>>1);
398 x[24] = ur - (vr>>1);
399 x[25] = ui - (vi>>1);
400
401 // xt1 = 20
402 // xt2 = 28
403 vr = y[29];
404 vi = y[28];
405 ur = y[20]>>1;
406 ui = y[21]>>1;
407 x[20] = ur + (vr>>1);
408 x[21] = ui - (vi>>1);
409 x[28] = ur - (vr>>1);
410 x[29] = ui + (vi>>1);
411
412 // xt1 = 2
413 // xt2 = 10
414 SUMDIFF_PIFOURTH(vi, vr, y[10], y[11])
415 //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH);
416 //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH);
417 ur = y[ 2];
418 ui = y[ 3];
419 x[ 2] = (ur>>1) + vr;
420 x[ 3] = (ui>>1) + vi;
421 x[10] = (ur>>1) - vr;
422 x[11] = (ui>>1) - vi;
423
424 // xt1 = 6
425 // xt2 = 14
426 SUMDIFF_PIFOURTH(vr, vi, y[14], y[15])
427 ur = y[ 6];
428 ui = y[ 7];
429 x[ 6] = (ur>>1) + vr;
430 x[ 7] = (ui>>1) - vi;
431 x[14] = (ur>>1) - vr;
432 x[15] = (ui>>1) + vi;
433
434 // xt1 = 18
435 // xt2 = 26
436 SUMDIFF_PIFOURTH(vi, vr, y[26], y[27])
437 ur = y[18];
438 ui = y[19];
439 x[18] = (ur>>1) + vr;
440 x[19] = (ui>>1) + vi;
441 x[26] = (ur>>1) - vr;
442 x[27] = (ui>>1) - vi;
443
444 // xt1 = 22
445 // xt2 = 30
446 SUMDIFF_PIFOURTH(vr, vi, y[30], y[31])
447 ur = y[22];
448 ui = y[23];
449 x[22] = (ur>>1) + vr;
450 x[23] = (ui>>1) - vi;
451 x[30] = (ur>>1) - vr;
452 x[31] = (ui>>1) + vi;
453
454 // xt1 = 0
455 // xt2 = 16
456 vr = x[16];
457 vi = x[17];
458 ur = x[ 0]>>1;
459 ui = x[ 1]>>1;
460 x[ 0] = ur + (vr>>1);
461 x[ 1] = ui + (vi>>1);
462 x[16] = ur - (vr>>1);
463 x[17] = ui - (vi>>1);
464
465 // xt1 = 8
466 // xt2 = 24
467 vi = x[24];
468 vr = x[25];
469 ur = x[ 8]>>1;
470 ui = x[ 9]>>1;
471 x[ 8] = ur + (vr>>1);
472 x[ 9] = ui - (vi>>1);
473 x[24] = ur - (vr>>1);
474 x[25] = ui + (vi>>1);
475
476 // xt1 = 2
477 // xt2 = 18
478 cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
479 ur = x[ 2];
480 ui = x[ 3];
481 x[ 2] = (ur>>1) + vr;
482 x[ 3] = (ui>>1) + vi;
483 x[18] = (ur>>1) - vr;
484 x[19] = (ui>>1) - vi;
485
486 // xt1 = 10
487 // xt2 = 26
488 cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
489 ur = x[10];
490 ui = x[11];
491 x[10] = (ur>>1) + vr;
492 x[11] = (ui>>1) - vi;
493 x[26] = (ur>>1) - vr;
494 x[27] = (ui>>1) + vi;
495
496 // xt1 = 4
497 // xt2 = 20
498 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
499 ur = x[ 4];
500 ui = x[ 5];
501 x[ 4] = (ur>>1) + vr;
502 x[ 5] = (ui>>1) + vi;
503 x[20] = (ur>>1) - vr;
504 x[21] = (ui>>1) - vi;
505
506 // xt1 = 12
507 // xt2 = 28
508 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
509 ur = x[12];
510 ui = x[13];
511 x[12] = (ur>>1) + vr;
512 x[13] = (ui>>1) - vi;
513 x[28] = (ur>>1) - vr;
514 x[29] = (ui>>1) + vi;
515
516 // xt1 = 6
517 // xt2 = 22
518 cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
519 ur = x[ 6];
520 ui = x[ 7];
521 x[ 6] = (ur>>1) + vr;
522 x[ 7] = (ui>>1) + vi;
523 x[22] = (ur>>1) - vr;
524 x[23] = (ui>>1) - vi;
525
526 // xt1 = 14
527 // xt2 = 30
528 cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
529 ur = x[14];
530 ui = x[15];
531 x[14] = (ur>>1) + vr;
532 x[15] = (ui>>1) - vi;
533 x[30] = (ur>>1) - vr;
534 x[31] = (ui>>1) + vi;
535 }
536
537 #ifndef FUNCTION_fft_32
538 static const FIXP_STP fft32_w32[6] =
539 {
540 STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c),
541 STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)
542 };
543
544 LNK_SECTION_CODE_L1
545 inline void fft_32(FIXP_DBL *x)
546 {
547
548 #define W_PiFOURTH STC(0x5a82799a)
549
550 FIXP_DBL vr,vi,ur,ui;
551 FIXP_DBL y[64];
552
553 /*
554 * 1+2 stage radix 4
555 */
556
557 /////////////////////////////////////////////////////////////////////////////////////////
558
559 // i = 0
560 vr = (x[ 0] + x[32])>>1; /* Re A + Re B */
561 vi = (x[16] + x[48]); /* Re C + Re D */
562 ur = (x[ 1] + x[33])>>1; /* Im A + Im B */
563 ui = (x[17] + x[49]); /* Im C + Im D */
564
565 y[ 0] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
566 y[ 4] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
567 y[ 1] = ur + (ui>>1); /* Im A' = sum of imag values */
568 y[ 5] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
569
570 vr -= x[32]; /* Re A - Re B */
571 vi = (vi>>1) - x[48]; /* Re C - Re D */
572 ur -= x[33]; /* Im A - Im B */
573 ui = (ui>>1) - x[49]; /* Im C - Im D */
574
575 y[ 2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
576 y[ 6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
577 y[ 3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
578 y[ 7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
579
580 //i=8
581 vr = (x[ 8] + x[40])>>1; /* Re A + Re B */
582 vi = (x[24] + x[56]); /* Re C + Re D */
583 ur = (x[ 9] + x[41])>>1; /* Im A + Im B */
584 ui = (x[25] + x[57]); /* Im C + Im D */
585
586 y[ 8] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
587 y[12] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
588 y[ 9] = ur + (ui>>1); /* Im A' = sum of imag values */
589 y[13] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
590
591 vr -= x[40]; /* Re A - Re B */
592 vi = (vi>>1) - x[56]; /* Re C - Re D */
593 ur -= x[41]; /* Im A - Im B */
594 ui = (ui>>1) - x[57]; /* Im C - Im D */
595
596 y[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
597 y[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
598 y[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
599 y[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
600
601 //i=16
602 vr = (x[ 4] + x[36])>>1; /* Re A + Re B */
603 vi = (x[20] + x[52]); /* Re C + Re D */
604 ur = (x[ 5] + x[37])>>1; /* Im A + Im B */
605 ui = (x[21] + x[53]); /* Im C + Im D */
606
607 y[16] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
608 y[20] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
609 y[17] = ur + (ui>>1); /* Im A' = sum of imag values */
610 y[21] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
611
612 vr -= x[36]; /* Re A - Re B */
613 vi = (vi>>1) - x[52]; /* Re C - Re D */
614 ur -= x[37]; /* Im A - Im B */
615 ui = (ui>>1) - x[53]; /* Im C - Im D */
616
617 y[18] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
618 y[22] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
619 y[19] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
620 y[23] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
621
622 //i=24
623 vr = (x[12] + x[44])>>1; /* Re A + Re B */
624 vi = (x[28] + x[60]); /* Re C + Re D */
625 ur = (x[13] + x[45])>>1; /* Im A + Im B */
626 ui = (x[29] + x[61]); /* Im C + Im D */
627
628 y[24] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
629 y[28] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
630 y[25] = ur + (ui>>1); /* Im A' = sum of imag values */
631 y[29] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
632
633 vr -= x[44]; /* Re A - Re B */
634 vi = (vi>>1) - x[60]; /* Re C - Re D */
635 ur -= x[45]; /* Im A - Im B */
636 ui = (ui>>1) - x[61]; /* Im C - Im D */
637
638 y[26] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
639 y[30] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
640 y[27] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
641 y[31] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
642
643 // i = 32
644 vr = (x[ 2] + x[34])>>1; /* Re A + Re B */
645 vi = (x[18] + x[50]); /* Re C + Re D */
646 ur = (x[ 3] + x[35])>>1; /* Im A + Im B */
647 ui = (x[19] + x[51]); /* Im C + Im D */
648
649 y[32] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
650 y[36] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
651 y[33] = ur + (ui>>1); /* Im A' = sum of imag values */
652 y[37] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
653
654 vr -= x[34]; /* Re A - Re B */
655 vi = (vi>>1) - x[50]; /* Re C - Re D */
656 ur -= x[35]; /* Im A - Im B */
657 ui = (ui>>1) - x[51]; /* Im C - Im D */
658
659 y[34] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
660 y[38] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
661 y[35] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
662 y[39] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
663
664 //i=40
665 vr = (x[10] + x[42])>>1; /* Re A + Re B */
666 vi = (x[26] + x[58]); /* Re C + Re D */
667 ur = (x[11] + x[43])>>1; /* Im A + Im B */
668 ui = (x[27] + x[59]); /* Im C + Im D */
669
670 y[40] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
671 y[44] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
672 y[41] = ur + (ui>>1); /* Im A' = sum of imag values */
673 y[45] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
674
675 vr -= x[42]; /* Re A - Re B */
676 vi = (vi>>1) - x[58]; /* Re C - Re D */
677 ur -= x[43]; /* Im A - Im B */
678 ui = (ui>>1) - x[59]; /* Im C - Im D */
679
680 y[42] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
681 y[46] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
682 y[43] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
683 y[47] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
684
685 //i=48
686 vr = (x[ 6] + x[38])>>1; /* Re A + Re B */
687 vi = (x[22] + x[54]); /* Re C + Re D */
688 ur = (x[ 7] + x[39])>>1; /* Im A + Im B */
689 ui = (x[23] + x[55]); /* Im C + Im D */
690
691 y[48] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
692 y[52] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
693 y[49] = ur + (ui>>1); /* Im A' = sum of imag values */
694 y[53] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
695
696 vr -= x[38]; /* Re A - Re B */
697 vi = (vi>>1) - x[54]; /* Re C - Re D */
698 ur -= x[39]; /* Im A - Im B */
699 ui = (ui>>1) - x[55]; /* Im C - Im D */
700
701 y[50] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
702 y[54] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
703 y[51] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
704 y[55] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
705
706 //i=56
707 vr = (x[14] + x[46])>>1; /* Re A + Re B */
708 vi = (x[30] + x[62]); /* Re C + Re D */
709 ur = (x[15] + x[47])>>1; /* Im A + Im B */
710 ui = (x[31] + x[63]); /* Im C + Im D */
711
712 y[56] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */
713 y[60] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
714 y[57] = ur + (ui>>1); /* Im A' = sum of imag values */
715 y[61] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */
716
717 vr -= x[46]; /* Re A - Re B */
718 vi = (vi>>1) - x[62]; /* Re C - Re D */
719 ur -= x[47]; /* Im A - Im B */
720 ui = (ui>>1) - x[63]; /* Im C - Im D */
721
722 y[58] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
723 y[62] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
724 y[59] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
725 y[63] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
726
727
728 FIXP_DBL *xt = &x[0];
729 FIXP_DBL *yt = &y[0];
730
731 int j = 4;
732 do
733 {
734 vr = yt[8];
735 vi = yt[9];
736 ur = yt[0]>>1;
737 ui = yt[1]>>1;
738 xt[ 0] = ur + (vr>>1);
739 xt[ 1] = ui + (vi>>1);
740 xt[ 8] = ur - (vr>>1);
741 xt[ 9] = ui - (vi>>1);
742
743 vr = yt[13];
744 vi = yt[12];
745 ur = yt[4]>>1;
746 ui = yt[5]>>1;
747 xt[ 4] = ur + (vr>>1);
748 xt[ 5] = ui - (vi>>1);
749 xt[12] = ur - (vr>>1);
750 xt[13] = ui + (vi>>1);
751
752 SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11])
753 ur = yt[2];
754 ui = yt[3];
755 xt[ 2] = (ur>>1) + vr;
756 xt[ 3] = (ui>>1) + vi;
757 xt[10] = (ur>>1) - vr;
758 xt[11] = (ui>>1) - vi;
759
760 SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15])
761 ur = yt[6];
762 ui = yt[7];
763
764 xt[ 6] = (ur>>1) + vr;
765 xt[ 7] = (ui>>1) - vi;
766 xt[14] = (ur>>1) - vr;
767 xt[15] = (ui>>1) + vi;
768 xt += 16;
769 yt += 16;
770 } while (--j != 0);
771
772 vr = x[16];
773 vi = x[17];
774 ur = x[ 0]>>1;
775 ui = x[ 1]>>1;
776 x[ 0] = ur + (vr>>1);
777 x[ 1] = ui + (vi>>1);
778 x[16] = ur - (vr>>1);
779 x[17] = ui - (vi>>1);
780
781 vi = x[24];
782 vr = x[25];
783 ur = x[ 8]>>1;
784 ui = x[ 9]>>1;
785 x[ 8] = ur + (vr>>1);
786 x[ 9] = ui - (vi>>1);
787 x[24] = ur - (vr>>1);
788 x[25] = ui + (vi>>1);
789
790 vr = x[48];
791 vi = x[49];
792 ur = x[32]>>1;
793 ui = x[33]>>1;
794 x[32] = ur + (vr>>1);
795 x[33] = ui + (vi>>1);
796 x[48] = ur - (vr>>1);
797 x[49] = ui - (vi>>1);
798
799 vi = x[56];
800 vr = x[57];
801 ur = x[40]>>1;
802 ui = x[41]>>1;
803 x[40] = ur + (vr>>1);
804 x[41] = ui - (vi>>1);
805 x[56] = ur - (vr>>1);
806 x[57] = ui + (vi>>1);
807
808 cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
809 ur = x[ 2];
810 ui = x[ 3];
811 x[ 2] = (ur>>1) + vr;
812 x[ 3] = (ui>>1) + vi;
813 x[18] = (ur>>1) - vr;
814 x[19] = (ui>>1) - vi;
815
816 cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
817 ur = x[10];
818 ui = x[11];
819 x[10] = (ur>>1) + vr;
820 x[11] = (ui>>1) - vi;
821 x[26] = (ur>>1) - vr;
822 x[27] = (ui>>1) + vi;
823
824 cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
825 ur = x[34];
826 ui = x[35];
827 x[34] = (ur>>1) + vr;
828 x[35] = (ui>>1) + vi;
829 x[50] = (ur>>1) - vr;
830 x[51] = (ui>>1) - vi;
831
832 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
833 ur = x[42];
834 ui = x[43];
835 x[42] = (ur>>1) + vr;
836 x[43] = (ui>>1) - vi;
837 x[58] = (ur>>1) - vr;
838 x[59] = (ui>>1) + vi;
839
840 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
841 ur = x[ 4];
842 ui = x[ 5];
843 x[ 4] = (ur>>1) + vr;
844 x[ 5] = (ui>>1) + vi;
845 x[20] = (ur>>1) - vr;
846 x[21] = (ui>>1) - vi;
847
848 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
849 ur = x[12];
850 ui = x[13];
851 x[12] = (ur>>1) + vr;
852 x[13] = (ui>>1) - vi;
853 x[28] = (ur>>1) - vr;
854 x[29] = (ui>>1) + vi;
855
856 SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
857 ur = x[36];
858 ui = x[37];
859 x[36] = (ur>>1) + vr;
860 x[37] = (ui>>1) + vi;
861 x[52] = (ur>>1) - vr;
862 x[53] = (ui>>1) - vi;
863
864 SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
865 ur = x[44];
866 ui = x[45];
867 x[44] = (ur>>1) + vr;
868 x[45] = (ui>>1) - vi;
869 x[60] = (ur>>1) - vr;
870 x[61] = (ui>>1) + vi;
871
872
873 cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
874 ur = x[ 6];
875 ui = x[ 7];
876 x[ 6] = (ur>>1) + vr;
877 x[ 7] = (ui>>1) + vi;
878 x[22] = (ur>>1) - vr;
879 x[23] = (ui>>1) - vi;
880
881 cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
882 ur = x[14];
883 ui = x[15];
884 x[14] = (ur>>1) + vr;
885 x[15] = (ui>>1) - vi;
886 x[30] = (ur>>1) - vr;
887 x[31] = (ui>>1) + vi;
888
889 cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
890 ur = x[38];
891 ui = x[39];
892 x[38] = (ur>>1) + vr;
893 x[39] = (ui>>1) + vi;
894 x[54] = (ur>>1) - vr;
895 x[55] = (ui>>1) - vi;
896
897 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
898 ur = x[46];
899 ui = x[47];
900
901 x[46] = (ur>>1) + vr;
902 x[47] = (ui>>1) - vi;
903 x[62] = (ur>>1) - vr;
904 x[63] = (ui>>1) + vi;
905
906 vr = x[32];
907 vi = x[33];
908 ur = x[ 0]>>1;
909 ui = x[ 1]>>1;
910 x[ 0] = ur + (vr>>1);
911 x[ 1] = ui + (vi>>1);
912 x[32] = ur - (vr>>1);
913 x[33] = ui - (vi>>1);
914
915 vi = x[48];
916 vr = x[49];
917 ur = x[16]>>1;
918 ui = x[17]>>1;
919 x[16] = ur + (vr>>1);
920 x[17] = ui - (vi>>1);
921 x[48] = ur - (vr>>1);
922 x[49] = ui + (vi>>1);
923
924 cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
925 ur = x[ 2];
926 ui = x[ 3];
927 x[ 2] = (ur>>1) + vr;
928 x[ 3] = (ui>>1) + vi;
929 x[34] = (ur>>1) - vr;
930 x[35] = (ui>>1) - vi;
931
932 cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
933 ur = x[18];
934 ui = x[19];
935 x[18] = (ur>>1) + vr;
936 x[19] = (ui>>1) - vi;
937 x[50] = (ur>>1) - vr;
938 x[51] = (ui>>1) + vi;
939
940 cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
941 ur = x[ 4];
942 ui = x[ 5];
943 x[ 4] = (ur>>1) + vr;
944 x[ 5] = (ui>>1) + vi;
945 x[36] = (ur>>1) - vr;
946 x[37] = (ui>>1) - vi;
947
948 cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
949 ur = x[20];
950 ui = x[21];
951 x[20] = (ur>>1) + vr;
952 x[21] = (ui>>1) - vi;
953 x[52] = (ur>>1) - vr;
954 x[53] = (ui>>1) + vi;
955
956 cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
957 ur = x[ 6];
958 ui = x[ 7];
959 x[ 6] = (ur>>1) + vr;
960 x[ 7] = (ui>>1) + vi;
961 x[38] = (ur>>1) - vr;
962 x[39] = (ui>>1) - vi;
963
964 cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
965 ur = x[22];
966 ui = x[23];
967 x[22] = (ur>>1) + vr;
968 x[23] = (ui>>1) - vi;
969 x[54] = (ur>>1) - vr;
970 x[55] = (ui>>1) + vi;
971
972 SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
973 ur = x[ 8];
974 ui = x[ 9];
975 x[ 8] = (ur>>1) + vr;
976 x[ 9] = (ui>>1) + vi;
977 x[40] = (ur>>1) - vr;
978 x[41] = (ui>>1) - vi;
979
980 SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
981 ur = x[24];
982 ui = x[25];
983 x[24] = (ur>>1) + vr;
984 x[25] = (ui>>1) - vi;
985 x[56] = (ur>>1) - vr;
986 x[57] = (ui>>1) + vi;
987
988 cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
989 ur = x[10];
990 ui = x[11];
991
992 x[10] = (ur>>1) + vr;
993 x[11] = (ui>>1) + vi;
994 x[42] = (ur>>1) - vr;
995 x[43] = (ui>>1) - vi;
996
997 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
998 ur = x[26];
999 ui = x[27];
1000 x[26] = (ur>>1) + vr;
1001 x[27] = (ui>>1) - vi;
1002 x[58] = (ur>>1) - vr;
1003 x[59] = (ui>>1) + vi;
1004
1005 cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1006 ur = x[12];
1007 ui = x[13];
1008 x[12] = (ur>>1) + vr;
1009 x[13] = (ui>>1) + vi;
1010 x[44] = (ur>>1) - vr;
1011 x[45] = (ui>>1) - vi;
1012
1013 cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1014 ur = x[28];
1015 ui = x[29];
1016 x[28] = (ur>>1) + vr;
1017 x[29] = (ui>>1) - vi;
1018 x[60] = (ur>>1) - vr;
1019 x[61] = (ui>>1) + vi;
1020
1021 cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1022 ur = x[14];
1023 ui = x[15];
1024 x[14] = (ur>>1) + vr;
1025 x[15] = (ui>>1) + vi;
1026 x[46] = (ur>>1) - vr;
1027 x[47] = (ui>>1) - vi;
1028
1029 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1030 ur = x[30];
1031 ui = x[31];
1032 x[30] = (ur>>1) + vr;
1033 x[31] = (ui>>1) - vi;
1034 x[62] = (ur>>1) - vr;
1035 x[63] = (ui>>1) + vi;
1036 }
1037 #endif /* #ifndef FUNCTION_fft_32 */
1038
1039
1040 /**
1041 * \brief Apply rotation vectors to a data buffer.
1042 * \param cl length of each row of input data.
1043 * \param l total length of input data.
1044 * \param pVecRe real part of rotation ceofficient vector.
1045 * \param pVecIm imaginary part of rotation ceofficient vector.
1046 */
1047 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm)
1048 {
1049 FIXP_DBL re, im;
1050 FIXP_STB vre, vim;
1051
1052 int i, c;
1053
1054 for(i=0; i<cl; i++) {
1055 re = pData[2*i];
1056 im = pData[2*i+1];
1057
1058 pData[2*i] = re>>2; /* * 0.25 */
1059 pData[2*i+1] = im>>2; /* * 0.25 */
1060 }
1061 for(; i<l; i+=cl)
1062 {
1063 re = pData[2*i];
1064 im = pData[2*i+1];
1065
1066 pData[2*i] = re>>2; /* * 0.25 */
1067 pData[2*i+1] = im>>2; /* * 0.25 */
1068
1069 for (c=i+1; c<i+cl; c++)
1070 {
1071 re = pData[2*c]>>1;
1072 im = pData[2*c+1]>>1;
1073 vre = *pVecRe++;
1074 vim = *pVecIm++;
1075
1076 cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim);
1077 }
1078 }
1079 }
1080
1081 #define FFT_TWO_STAGE_MACRO_ENABLE
1082
1083
1084 #ifdef FFT_TWO_STAGE_MACRO_ENABLE
1085
1086 #define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \
1087 { \
1088 int i, j; \
1089 \
1090 C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \
1091 C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \
1092 \
1093 FDK_ASSERT(length == dim1*dim2); \
1094 \
1095 /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \
1096 output samples are at the address of pDst. The input vector for the fft of length dim1 is built \
1097 of the interleaved samples in pSrc, the output samples are stored consecutively. \
1098 */ \
1099 { \
1100 const FIXP_DBL* pSrc = pInput; \
1101 FIXP_DBL *RESTRICT pDst = aDst; \
1102 \
1103 for(i=0; i<dim2; i++) \
1104 { \
1105 for(j=0; j<dim1; j++) \
1106 { \
1107 pDst[2*j] = pSrc[2*j*dim2]; \
1108 pDst[2*j+1] = pSrc[2*j*dim2+1]; \
1109 } \
1110 \
1111 fft_func1(pDst); \
1112 pSrc += 2; \
1113 pDst = pDst + 2*dim1; \
1114 } \
1115 } \
1116 \
1117 /* Perform the modulation of the output of the fft of length dim1 */ \
1118 fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \
1119 \
1120 /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \
1121 output samples are at the address of pInput. The input vector for the fft of length dim2 is built \
1122 of the interleaved samples in aDst, the output samples are stored consecutively at the address \
1123 of pInput. \
1124 */ \
1125 { \
1126 const FIXP_DBL* pSrc = aDst; \
1127 FIXP_DBL *RESTRICT pDst = aDst2; \
1128 FIXP_DBL *RESTRICT pDstOut = pInput; \
1129 \
1130 for(i=0; i<dim1; i++) \
1131 { \
1132 for(j=0; j<dim2; j++) \
1133 { \
1134 pDst[2*j] = pSrc[2*j*dim1]; \
1135 pDst[2*j+1] = pSrc[2*j*dim1+1]; \
1136 } \
1137 \
1138 fft_func2(pDst); \
1139 \
1140 for(j=0; j<dim2; j++) \
1141 { \
1142 pDstOut[2*j*dim1] = pDst[2*j]; \
1143 pDstOut[2*j*dim1+1] = pDst[2*j+1]; \
1144 } \
1145 pSrc += 2; \
1146 pDstOut += 2; \
1147 } \
1148 } \
1149 \
1150 C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \
1151 C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \
1152 } \
1153
1154 #else /* FFT_TWO_STAGE_MACRO_ENABLE */
1155
1156 /* select either switch case of function pointer. */
1157 //#define FFT_TWO_STAGE_SWITCH_CASE
1158
1159 static inline void fftN2(
1160 FIXP_DBL *pInput,
1161 const int length,
1162 const int dim1,
1163 const int dim2,
1164 void (* const fft1)(FIXP_DBL *),
1165 void (* const fft2)(FIXP_DBL *),
1166 const FIXP_STB *RotVectorReal,
1167 const FIXP_STB *RotVectorImag
1168 )
1169 {
1170 /* The real part of the input samples are at the addresses with even indices and the imaginary
1171 part of the input samples are at the addresses with odd indices. The output samples are stored
1172 at the address of pInput
1173 */
1174 FIXP_DBL *pSrc, *pDst, *pDstOut;
1175 int i, j;
1176
1177 C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2);
1178 C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2);
1179
1180 FDK_ASSERT(length == dim1*dim2);
1181
1182 /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the
1183 output samples are at the address of pDst. The input vector for the fft of length dim1 is built
1184 of the interleaved samples in pSrc, the output samples are stored consecutively.
1185 */
1186 pSrc = pInput;
1187 pDst = aDst;
1188 for(i=0; i<length/dim1; i++)
1189 {
1190 for(j=0; j<length/dim2; j++)
1191 {
1192 pDst[2*j] = pSrc[2*j*dim2];
1193 pDst[2*j+1] = pSrc[2*j*dim2+1];
1194 }
1195
1196 /* fft of size dim1 */
1197 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1198 fft1(pDst);
1199 #else
1200 switch (dim1) {
1201 case 3: fft3(pDst); break;
1202 case 4: fft_4(pDst); break;
1203 case 5: fft5(pDst); break;
1204 case 8: fft_8(pDst); break;
1205 case 15: fft15(pDst); break;
1206 case 16: fft_16(pDst); break;
1207 case 32: fft_32(pDst); break;
1208 /*case 64: fft_64(pDst); break;*/
1209 case 128: fft_128(pDst); break;
1210 }
1211 #endif
1212 pSrc += 2;
1213 pDst = pDst + 2*length/dim2;
1214 }
1215
1216 /* Perform the modulation of the output of the fft of length dim1 */
1217 pSrc=aDst;
1218 fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag);
1219
1220 /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the
1221 output samples are at the address of pInput. The input vector for the fft of length dim2 is built
1222 of the interleaved samples in aDst, the output samples are stored consecutively at the address
1223 of pInput.
1224 */
1225 pSrc = aDst;
1226 pDst = aDst2;
1227 pDstOut = pInput;
1228 for(i=0; i<length/dim2; i++)
1229 {
1230 for(j=0; j<length/dim1; j++)
1231 {
1232 pDst[2*j] = pSrc[2*j*dim1];
1233 pDst[2*j+1] = pSrc[2*j*dim1+1];
1234 }
1235
1236 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1237 fft2(pDst);
1238 #else
1239 switch (dim2) {
1240 case 3: fft3(pDst); break;
1241 case 4: fft_4(pDst); break;
1242 case 5: fft5(pDst); break;
1243 case 8: fft_8(pDst); break;
1244 case 15: fft15(pDst); break;
1245 case 16: fft_16(pDst); break;
1246 case 32: fft_32(pDst); break;
1247 /*case 64: fft_64(pDst); break;*/
1248 case 128: fft_128(pDst); break;
1249 }
1250 #endif
1251
1252 for(j=0; j<length/dim1; j++)
1253 {
1254 pDstOut[2*j*dim1] = pDst[2*j];
1255 pDstOut[2*j*dim1+1] = pDst[2*j+1];
1256 }
1257 pSrc += 2;
1258 pDstOut += 2;
1259 }
1260
1261 C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2);
1262 C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2);
1263 }
1264
1265 #endif /* FFT_TWO_STAGE_MACRO_ENABLE */
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 #define SCALEFACTOR60 5
1279 /**
1280 The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of
1281 length 15. Between the fft's a modolation is calculated.
1282 */
1283 static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor)
1284 {
1285 fftN2(
1286 pInput, 60, 4, 15,
1287 fft_4, fft15,
1288 RotVectorReal60, RotVectorImag60
1289 );
1290 *pScalefactor += SCALEFACTOR60;
1291 }
1292
1293
1294
1295 /* Fallback implementation in case of no better implementation available. */
1296
1297 #define SCALEFACTOR240 7
1298
1299 /**
1300 The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of
1301 length 15. Between the fft's a modulation is calculated.
1302 */
1303 static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor)
1304 {
1305 fftN2(
1306 pInput, 240, 16, 15,
1307 fft_16, fft15,
1308 RotVectorReal240, RotVectorImag240
1309 );
1310 *pScalefactor += SCALEFACTOR240;
1311 }
1312
1313
1314 #define SCALEFACTOR480 8
1315 #define N32 32
1316 #define TABLE_SIZE_16 (32/2)
1317
1318 /**
1319 The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of
1320 length 15. Between the fft's a modulation is calculated.
1321 */
1322 static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor)
1323 {
1324 fftN2(
1325 pInput, 480, 32, 15,
1326 fft_32, fft15,
1327 RotVectorReal480, RotVectorImag480
1328 );
1329 *pScalefactor += SCALEFACTOR480;
1330 }
1331
1332 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor)
1333 {
1334 if (length == 32)
1335 {
1336 fft_32(pInput);
1337 *pScalefactor += SCALEFACTOR32;
1338 }
1339 else
1340 {
1341
1342 switch (length) {
1343 case 16:
1344 fft_16(pInput);
1345 *pScalefactor += SCALEFACTOR16;
1346 break;
1347 case 8:
1348 fft_8(pInput);
1349 *pScalefactor += SCALEFACTOR8;
1350 break;
1351 case 3:
1352 fft3(pInput);
1353 break;
1354 case 4:
1355 fft_4(pInput);
1356 *pScalefactor += SCALEFACTOR4;
1357 break;
1358 case 5:
1359 fft5(pInput);
1360 break;
1361 case 15:
1362 fft15(pInput);
1363 *pScalefactor += 2;
1364 break;
1365 case 60:
1366 fft60(pInput, pScalefactor);
1367 break;
1368 case 64:
1369 dit_fft(pInput, 6, SineTable512, 512);
1370 *pScalefactor += SCALEFACTOR64;
1371 break;
1372 case 240:
1373 fft240(pInput, pScalefactor);
1374 break;
1375 case 256:
1376 dit_fft(pInput, 8, SineTable512, 512);
1377 *pScalefactor += SCALEFACTOR256;
1378 break;
1379 case 480:
1380 fft480(pInput, pScalefactor);
1381 break;
1382 case 512:
1383 dit_fft(pInput, 9, SineTable512, 512);
1384 *pScalefactor += SCALEFACTOR512;
1385 break;
1386 default:
1387 FDK_ASSERT(0); /* FFT length not supported! */
1388 break;
1389 }
1390 }
1391 }
1392
1393
1394 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor)
1395 {
1396 switch (length) {
1397 default:
1398 FDK_ASSERT(0); /* IFFT length not supported! */
1399 break;
1400 }
1401 }
1402
1403