2 * FFT transform, optimized with VSX built-in functions
3 * Copyright (c) 2014 Rong Yan
5 * This algorithm (though not any of the implementation details) is
6 * based on libdjbfft by D. J. Bernstein.
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/cpu.h"
28 #include "libavutil/ppc/types_altivec.h"
29 #include "libavutil/ppc/util_altivec.h"
30 #include "libavcodec/fft.h"
31 #include "libavcodec/fft-internal.h"
36 static void fft32_vsx_interleave(FFTComplex
*z
)
38 fft16_vsx_interleave(z
);
39 fft8_vsx_interleave(z
+16);
40 fft8_vsx_interleave(z
+24);
41 pass_vsx_interleave(z
,ff_cos_32
,4);
44 static void fft64_vsx_interleave(FFTComplex
*z
)
46 fft32_vsx_interleave(z
);
47 fft16_vsx_interleave(z
+32);
48 fft16_vsx_interleave(z
+48);
49 pass_vsx_interleave(z
,ff_cos_64
, 8);
51 static void fft128_vsx_interleave(FFTComplex
*z
)
53 fft64_vsx_interleave(z
);
54 fft32_vsx_interleave(z
+64);
55 fft32_vsx_interleave(z
+96);
56 pass_vsx_interleave(z
,ff_cos_128
,16);
58 static void fft256_vsx_interleave(FFTComplex
*z
)
60 fft128_vsx_interleave(z
);
61 fft64_vsx_interleave(z
+128);
62 fft64_vsx_interleave(z
+192);
63 pass_vsx_interleave(z
,ff_cos_256
,32);
65 static void fft512_vsx_interleave(FFTComplex
*z
)
67 fft256_vsx_interleave(z
);
68 fft128_vsx_interleave(z
+256);
69 fft128_vsx_interleave(z
+384);
70 pass_vsx_interleave(z
,ff_cos_512
,64);
72 static void fft1024_vsx_interleave(FFTComplex
*z
)
74 fft512_vsx_interleave(z
);
75 fft256_vsx_interleave(z
+512);
76 fft256_vsx_interleave(z
+768);
77 pass_vsx_interleave(z
,ff_cos_1024
,128);
80 static void fft2048_vsx_interleave(FFTComplex
*z
)
82 fft1024_vsx_interleave(z
);
83 fft512_vsx_interleave(z
+1024);
84 fft512_vsx_interleave(z
+1536);
85 pass_vsx_interleave(z
,ff_cos_2048
,256);
87 static void fft4096_vsx_interleave(FFTComplex
*z
)
89 fft2048_vsx_interleave(z
);
90 fft1024_vsx_interleave(z
+2048);
91 fft1024_vsx_interleave(z
+3072);
92 pass_vsx_interleave(z
,ff_cos_4096
, 512);
94 static void fft8192_vsx_interleave(FFTComplex
*z
)
96 fft4096_vsx_interleave(z
);
97 fft2048_vsx_interleave(z
+4096);
98 fft2048_vsx_interleave(z
+6144);
99 pass_vsx_interleave(z
,ff_cos_8192
,1024);
101 static void fft16384_vsx_interleave(FFTComplex
*z
)
103 fft8192_vsx_interleave(z
);
104 fft4096_vsx_interleave(z
+8192);
105 fft4096_vsx_interleave(z
+12288);
106 pass_vsx_interleave(z
,ff_cos_16384
,2048);
108 static void fft32768_vsx_interleave(FFTComplex
*z
)
110 fft16384_vsx_interleave(z
);
111 fft8192_vsx_interleave(z
+16384);
112 fft8192_vsx_interleave(z
+24576);
113 pass_vsx_interleave(z
,ff_cos_32768
,4096);
115 static void fft65536_vsx_interleave(FFTComplex
*z
)
117 fft32768_vsx_interleave(z
);
118 fft16384_vsx_interleave(z
+32768);
119 fft16384_vsx_interleave(z
+49152);
120 pass_vsx_interleave(z
,ff_cos_65536
,8192);
123 static void fft32_vsx(FFTComplex
*z
)
128 pass_vsx(z
,ff_cos_32
,4);
131 static void fft64_vsx(FFTComplex
*z
)
136 pass_vsx(z
,ff_cos_64
, 8);
138 static void fft128_vsx(FFTComplex
*z
)
143 pass_vsx(z
,ff_cos_128
,16);
145 static void fft256_vsx(FFTComplex
*z
)
150 pass_vsx(z
,ff_cos_256
,32);
152 static void fft512_vsx(FFTComplex
*z
)
157 pass_vsx(z
,ff_cos_512
,64);
159 static void fft1024_vsx(FFTComplex
*z
)
164 pass_vsx(z
,ff_cos_1024
,128);
167 static void fft2048_vsx(FFTComplex
*z
)
172 pass_vsx(z
,ff_cos_2048
,256);
174 static void fft4096_vsx(FFTComplex
*z
)
179 pass_vsx(z
,ff_cos_4096
, 512);
181 static void fft8192_vsx(FFTComplex
*z
)
186 pass_vsx(z
,ff_cos_8192
,1024);
188 static void fft16384_vsx(FFTComplex
*z
)
192 fft4096_vsx(z
+12288);
193 pass_vsx(z
,ff_cos_16384
,2048);
195 static void fft32768_vsx(FFTComplex
*z
)
198 fft8192_vsx(z
+16384);
199 fft8192_vsx(z
+24576);
200 pass_vsx(z
,ff_cos_32768
,4096);
202 static void fft65536_vsx(FFTComplex
*z
)
205 fft16384_vsx(z
+32768);
206 fft16384_vsx(z
+49152);
207 pass_vsx(z
,ff_cos_65536
,8192);
210 static void (* const fft_dispatch_vsx
[])(FFTComplex
*) = {
211 fft4_vsx
, fft8_vsx
, fft16_vsx
, fft32_vsx
, fft64_vsx
, fft128_vsx
, fft256_vsx
, fft512_vsx
, fft1024_vsx
,
212 fft2048_vsx
, fft4096_vsx
, fft8192_vsx
, fft16384_vsx
, fft32768_vsx
, fft65536_vsx
,
214 static void (* const fft_dispatch_vsx_interleave
[])(FFTComplex
*) = {
215 fft4_vsx_interleave
, fft8_vsx_interleave
, fft16_vsx_interleave
, fft32_vsx_interleave
, fft64_vsx_interleave
,
216 fft128_vsx_interleave
, fft256_vsx_interleave
, fft512_vsx_interleave
, fft1024_vsx_interleave
,
217 fft2048_vsx_interleave
, fft4096_vsx_interleave
, fft8192_vsx_interleave
, fft16384_vsx_interleave
, fft32768_vsx_interleave
, fft65536_vsx_interleave
,
219 void ff_fft_calc_interleave_vsx(FFTContext
*s
, FFTComplex
*z
)
221 fft_dispatch_vsx_interleave
[s
->nbits
-2](z
);
223 void ff_fft_calc_vsx(FFTContext
*s
, FFTComplex
*z
)
225 fft_dispatch_vsx
[s
->nbits
-2](z
);
227 #endif /* HAVE_VSX */