2 * Copyright (c) 2013-2014 Mozilla Corporation
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * Celt non-power of 2 iMDCT
32 #include "libavutil/attributes.h"
33 #include "libavutil/common.h"
37 #include "opus_imdct.h"
39 // minimal iMDCT size to make SIMD opts easier
40 #define CELT_MIN_IMDCT_SIZE 120
43 #define CMUL3(cre, cim, are, aim, bre, bim) \
45 cre = are * bre - aim * bim; \
46 cim = are * bim + aim * bre; \
49 #define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
52 // d = a * conjugate(b)
53 #define CMUL2(c, d, a, b) \
59 float rr = are * bre; \
60 float ri = are * bim; \
61 float ir = aim * bre; \
62 float ii = aim * bim; \
69 av_cold
void ff_celt_imdct_uninit(CeltIMDCTContext
**ps
)
71 CeltIMDCTContext
*s
= *ps
;
77 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->exptab
); i
++)
78 av_freep(&s
->exptab
[i
]);
80 av_freep(&s
->twiddle_exptab
);
87 static void celt_imdct_half(CeltIMDCTContext
*s
, float *dst
, const float *src
,
88 ptrdiff_t stride
, float scale
);
90 av_cold
int ff_celt_imdct_init(CeltIMDCTContext
**ps
, int N
)
93 int len2
= 15 * (1 << N
);
97 if (len2
> CELT_MAX_FRAME_SIZE
|| len2
< CELT_MIN_IMDCT_SIZE
)
98 return AVERROR(EINVAL
);
100 s
= av_mallocz(sizeof(*s
));
102 return AVERROR(ENOMEM
);
108 s
->tmp
= av_malloc(len
* 2 * sizeof(*s
->tmp
));
112 s
->twiddle_exptab
= av_malloc(s
->len4
* sizeof(*s
->twiddle_exptab
));
113 if (!s
->twiddle_exptab
)
116 for (i
= 0; i
< s
->len4
; i
++) {
117 s
->twiddle_exptab
[i
].re
= cos(2 * M_PI
* (i
+ 0.125 + s
->len4
) / len
);
118 s
->twiddle_exptab
[i
].im
= sin(2 * M_PI
* (i
+ 0.125 + s
->len4
) / len
);
121 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->exptab
); i
++) {
122 int N
= 15 * (1 << i
);
123 s
->exptab
[i
] = av_malloc(sizeof(*s
->exptab
[i
]) * FFMAX(N
, 19));
127 for (j
= 0; j
< N
; j
++) {
128 s
->exptab
[i
][j
].re
= cos(2 * M_PI
* j
/ N
);
129 s
->exptab
[i
][j
].im
= sin(2 * M_PI
* j
/ N
);
133 // wrap around to simplify fft15
134 for (j
= 15; j
< 19; j
++)
135 s
->exptab
[0][j
] = s
->exptab
[0][j
- 15];
137 s
->imdct_half
= celt_imdct_half
;
140 ff_celt_imdct_init_aarch64(s
);
146 ff_celt_imdct_uninit(&s
);
147 return AVERROR(ENOMEM
);
150 static void fft5(FFTComplex
*out
, const FFTComplex
*in
, ptrdiff_t stride
)
152 // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
153 static const FFTComplex fact
[] = { { 0.30901699437494745, 0.95105651629515353 },
154 { -0.80901699437494734, 0.58778525229247325 } };
158 CMUL2(z
[0][0], z
[0][3], in
[1 * stride
], fact
[0]);
159 CMUL2(z
[0][1], z
[0][2], in
[1 * stride
], fact
[1]);
160 CMUL2(z
[1][0], z
[1][3], in
[2 * stride
], fact
[0]);
161 CMUL2(z
[1][1], z
[1][2], in
[2 * stride
], fact
[1]);
162 CMUL2(z
[2][0], z
[2][3], in
[3 * stride
], fact
[0]);
163 CMUL2(z
[2][1], z
[2][2], in
[3 * stride
], fact
[1]);
164 CMUL2(z
[3][0], z
[3][3], in
[4 * stride
], fact
[0]);
165 CMUL2(z
[3][1], z
[3][2], in
[4 * stride
], fact
[1]);
167 out
[0].re
= in
[0].re
+ in
[stride
].re
+ in
[2 * stride
].re
+ in
[3 * stride
].re
+ in
[4 * stride
].re
;
168 out
[0].im
= in
[0].im
+ in
[stride
].im
+ in
[2 * stride
].im
+ in
[3 * stride
].im
+ in
[4 * stride
].im
;
170 out
[1].re
= in
[0].re
+ z
[0][0].re
+ z
[1][1].re
+ z
[2][2].re
+ z
[3][3].re
;
171 out
[1].im
= in
[0].im
+ z
[0][0].im
+ z
[1][1].im
+ z
[2][2].im
+ z
[3][3].im
;
173 out
[2].re
= in
[0].re
+ z
[0][1].re
+ z
[1][3].re
+ z
[2][0].re
+ z
[3][2].re
;
174 out
[2].im
= in
[0].im
+ z
[0][1].im
+ z
[1][3].im
+ z
[2][0].im
+ z
[3][2].im
;
176 out
[3].re
= in
[0].re
+ z
[0][2].re
+ z
[1][0].re
+ z
[2][3].re
+ z
[3][1].re
;
177 out
[3].im
= in
[0].im
+ z
[0][2].im
+ z
[1][0].im
+ z
[2][3].im
+ z
[3][1].im
;
179 out
[4].re
= in
[0].re
+ z
[0][3].re
+ z
[1][2].re
+ z
[2][1].re
+ z
[3][0].re
;
180 out
[4].im
= in
[0].im
+ z
[0][3].im
+ z
[1][2].im
+ z
[2][1].im
+ z
[3][0].im
;
183 static void fft15(CeltIMDCTContext
*s
, FFTComplex
*out
, const FFTComplex
*in
, ptrdiff_t stride
)
185 const FFTComplex
*exptab
= s
->exptab
[0];
191 fft5(tmp
, in
, stride
* 3);
192 fft5(tmp1
, in
+ stride
, stride
* 3);
193 fft5(tmp2
, in
+ 2 * stride
, stride
* 3);
195 for (k
= 0; k
< 5; k
++) {
198 CMUL(t1
, tmp1
[k
], exptab
[k
]);
199 CMUL(t2
, tmp2
[k
], exptab
[2 * k
]);
200 out
[k
].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
201 out
[k
].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
203 CMUL(t1
, tmp1
[k
], exptab
[k
+ 5]);
204 CMUL(t2
, tmp2
[k
], exptab
[2 * (k
+ 5)]);
205 out
[k
+ 5].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
206 out
[k
+ 5].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
208 CMUL(t1
, tmp1
[k
], exptab
[k
+ 10]);
209 CMUL(t2
, tmp2
[k
], exptab
[2 * k
+ 5]);
210 out
[k
+ 10].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
211 out
[k
+ 10].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
216 * FFT of the length 15 * (2^N)
218 static void fft_calc(CeltIMDCTContext
*s
, FFTComplex
*out
, const FFTComplex
*in
,
219 int N
, ptrdiff_t stride
)
222 const FFTComplex
*exptab
= s
->exptab
[N
];
223 const int len2
= 15 * (1 << (N
- 1));
226 fft_calc(s
, out
, in
, N
- 1, stride
* 2);
227 fft_calc(s
, out
+ len2
, in
+ stride
, N
- 1, stride
* 2);
229 for (k
= 0; k
< len2
; k
++) {
232 CMUL(t
, out
[len2
+ k
], exptab
[k
]);
234 out
[len2
+ k
].re
= out
[k
].re
- t
.re
;
235 out
[len2
+ k
].im
= out
[k
].im
- t
.im
;
241 fft15(s
, out
, in
, stride
);
244 static void celt_imdct_half(CeltIMDCTContext
*s
, float *dst
, const float *src
,
245 ptrdiff_t stride
, float scale
)
247 FFTComplex
*z
= (FFTComplex
*)dst
;
248 const int len8
= s
->len4
/ 2;
249 const float *in1
= src
;
250 const float *in2
= src
+ (s
->len2
- 1) * stride
;
253 for (i
= 0; i
< s
->len4
; i
++) {
254 FFTComplex tmp
= { *in2
, *in1
};
255 CMUL(s
->tmp
[i
], tmp
, s
->twiddle_exptab
[i
]);
260 fft_calc(s
, z
, s
->tmp
, s
->fft_n
, 1);
262 for (i
= 0; i
< len8
; i
++) {
263 float r0
, i0
, r1
, i1
;
265 CMUL3(r0
, i1
, z
[len8
- i
- 1].im
, z
[len8
- i
- 1].re
, s
->twiddle_exptab
[len8
- i
- 1].im
, s
->twiddle_exptab
[len8
- i
- 1].re
);
266 CMUL3(r1
, i0
, z
[len8
+ i
].im
, z
[len8
+ i
].re
, s
->twiddle_exptab
[len8
+ i
].im
, s
->twiddle_exptab
[len8
+ i
].re
);
267 z
[len8
- i
- 1].re
= scale
* r0
;
268 z
[len8
- i
- 1].im
= scale
* i0
;
269 z
[len8
+ i
].re
= scale
* r1
;
270 z
[len8
+ i
].im
= scale
* i1
;