Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavcodec / ppc / fft_vsx.c
CommitLineData
2ba45a60
DM
1/*
2 * FFT transform, optimized with VSX built-in functions
3 * Copyright (c) 2014 Rong Yan
4 *
5 * This algorithm (though not any of the implementation details) is
6 * based on libdjbfft by D. J. Bernstein.
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25
26#include "config.h"
27#include "libavutil/cpu.h"
28#include "libavutil/ppc/types_altivec.h"
29#include "libavutil/ppc/util_altivec.h"
30#include "libavcodec/fft.h"
31#include "libavcodec/fft-internal.h"
32#include "fft_vsx.h"
33
34#if HAVE_VSX
35
36static void fft32_vsx_interleave(FFTComplex *z)
37{
38 fft16_vsx_interleave(z);
39 fft8_vsx_interleave(z+16);
40 fft8_vsx_interleave(z+24);
41 pass_vsx_interleave(z,ff_cos_32,4);
42}
43
44static void fft64_vsx_interleave(FFTComplex *z)
45{
46 fft32_vsx_interleave(z);
47 fft16_vsx_interleave(z+32);
48 fft16_vsx_interleave(z+48);
49 pass_vsx_interleave(z,ff_cos_64, 8);
50}
51static void fft128_vsx_interleave(FFTComplex *z)
52{
53 fft64_vsx_interleave(z);
54 fft32_vsx_interleave(z+64);
55 fft32_vsx_interleave(z+96);
56 pass_vsx_interleave(z,ff_cos_128,16);
57}
58static void fft256_vsx_interleave(FFTComplex *z)
59{
60 fft128_vsx_interleave(z);
61 fft64_vsx_interleave(z+128);
62 fft64_vsx_interleave(z+192);
63 pass_vsx_interleave(z,ff_cos_256,32);
64}
65static void fft512_vsx_interleave(FFTComplex *z)
66{
67 fft256_vsx_interleave(z);
68 fft128_vsx_interleave(z+256);
69 fft128_vsx_interleave(z+384);
70 pass_vsx_interleave(z,ff_cos_512,64);
71}
72static void fft1024_vsx_interleave(FFTComplex *z)
73{
74 fft512_vsx_interleave(z);
75 fft256_vsx_interleave(z+512);
76 fft256_vsx_interleave(z+768);
77 pass_vsx_interleave(z,ff_cos_1024,128);
78
79}
80static void fft2048_vsx_interleave(FFTComplex *z)
81{
82 fft1024_vsx_interleave(z);
83 fft512_vsx_interleave(z+1024);
84 fft512_vsx_interleave(z+1536);
85 pass_vsx_interleave(z,ff_cos_2048,256);
86}
87static void fft4096_vsx_interleave(FFTComplex *z)
88{
89 fft2048_vsx_interleave(z);
90 fft1024_vsx_interleave(z+2048);
91 fft1024_vsx_interleave(z+3072);
92 pass_vsx_interleave(z,ff_cos_4096, 512);
93}
94static void fft8192_vsx_interleave(FFTComplex *z)
95{
96 fft4096_vsx_interleave(z);
97 fft2048_vsx_interleave(z+4096);
98 fft2048_vsx_interleave(z+6144);
99 pass_vsx_interleave(z,ff_cos_8192,1024);
100}
101static void fft16384_vsx_interleave(FFTComplex *z)
102{
103 fft8192_vsx_interleave(z);
104 fft4096_vsx_interleave(z+8192);
105 fft4096_vsx_interleave(z+12288);
106 pass_vsx_interleave(z,ff_cos_16384,2048);
107}
108static void fft32768_vsx_interleave(FFTComplex *z)
109{
110 fft16384_vsx_interleave(z);
111 fft8192_vsx_interleave(z+16384);
112 fft8192_vsx_interleave(z+24576);
113 pass_vsx_interleave(z,ff_cos_32768,4096);
114}
115static void fft65536_vsx_interleave(FFTComplex *z)
116{
117 fft32768_vsx_interleave(z);
118 fft16384_vsx_interleave(z+32768);
119 fft16384_vsx_interleave(z+49152);
120 pass_vsx_interleave(z,ff_cos_65536,8192);
121}
122
123static void fft32_vsx(FFTComplex *z)
124{
125 fft16_vsx(z);
126 fft8_vsx(z+16);
127 fft8_vsx(z+24);
128 pass_vsx(z,ff_cos_32,4);
129}
130
131static void fft64_vsx(FFTComplex *z)
132{
133 fft32_vsx(z);
134 fft16_vsx(z+32);
135 fft16_vsx(z+48);
136 pass_vsx(z,ff_cos_64, 8);
137}
138static void fft128_vsx(FFTComplex *z)
139{
140 fft64_vsx(z);
141 fft32_vsx(z+64);
142 fft32_vsx(z+96);
143 pass_vsx(z,ff_cos_128,16);
144}
145static void fft256_vsx(FFTComplex *z)
146{
147 fft128_vsx(z);
148 fft64_vsx(z+128);
149 fft64_vsx(z+192);
150 pass_vsx(z,ff_cos_256,32);
151}
152static void fft512_vsx(FFTComplex *z)
153{
154 fft256_vsx(z);
155 fft128_vsx(z+256);
156 fft128_vsx(z+384);
157 pass_vsx(z,ff_cos_512,64);
158}
159static void fft1024_vsx(FFTComplex *z)
160{
161 fft512_vsx(z);
162 fft256_vsx(z+512);
163 fft256_vsx(z+768);
164 pass_vsx(z,ff_cos_1024,128);
165
166}
167static void fft2048_vsx(FFTComplex *z)
168{
169 fft1024_vsx(z);
170 fft512_vsx(z+1024);
171 fft512_vsx(z+1536);
172 pass_vsx(z,ff_cos_2048,256);
173}
174static void fft4096_vsx(FFTComplex *z)
175{
176 fft2048_vsx(z);
177 fft1024_vsx(z+2048);
178 fft1024_vsx(z+3072);
179 pass_vsx(z,ff_cos_4096, 512);
180}
181static void fft8192_vsx(FFTComplex *z)
182{
183 fft4096_vsx(z);
184 fft2048_vsx(z+4096);
185 fft2048_vsx(z+6144);
186 pass_vsx(z,ff_cos_8192,1024);
187}
188static void fft16384_vsx(FFTComplex *z)
189{
190 fft8192_vsx(z);
191 fft4096_vsx(z+8192);
192 fft4096_vsx(z+12288);
193 pass_vsx(z,ff_cos_16384,2048);
194}
195static void fft32768_vsx(FFTComplex *z)
196{
197 fft16384_vsx(z);
198 fft8192_vsx(z+16384);
199 fft8192_vsx(z+24576);
200 pass_vsx(z,ff_cos_32768,4096);
201}
202static void fft65536_vsx(FFTComplex *z)
203{
204 fft32768_vsx(z);
205 fft16384_vsx(z+32768);
206 fft16384_vsx(z+49152);
207 pass_vsx(z,ff_cos_65536,8192);
208}
209
210static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
211 fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
212 fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
213};
214static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
215 fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
216 fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
217 fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
218};
219void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
220{
221 fft_dispatch_vsx_interleave[s->nbits-2](z);
222}
223void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
224{
225 fft_dispatch_vsx[s->nbits-2](z);
226}
227#endif /* HAVE_VSX */