2 * Copyright (c) 2001, 2002 Fabrice Bellard
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/attributes.h"
24 #include "libavutil/mem.h"
27 #include "mpegaudiodsp.h"
28 #include "mpegaudio.h"
31 #define RENAME(n) n##_float
33 static inline float round_sample(float *sum
)
40 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
41 #define MULS(ra, rb) ((ra)*(rb))
42 #define MULH3(x, y, s) ((s)*(y)*(x))
43 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
44 #define MULLx(x, y, s) ((y)*(x))
45 #define FIXHR(x) ((float)(x))
46 #define FIXR(x) ((float)(x))
47 #define SHR(a,b) ((a)*(1.0f/(1<<(b))))
51 #define RENAME(n) n##_fixed
52 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
54 static inline int round_sample(int64_t *sum
)
57 sum1
= (int)((*sum
) >> OUT_SHIFT
);
58 *sum
&= (1<<OUT_SHIFT
)-1;
59 return av_clip_int16(sum1
);
62 # define MULS(ra, rb) MUL64(ra, rb)
63 # define MACS(rt, ra, rb) MAC64(rt, ra, rb)
64 # define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
65 # define MULH3(x, y, s) MULH((s)*(x), y)
66 # define MULLx(x, y, s) MULL(x,y,s)
67 # define SHR(a,b) ((a)>>(b))
68 # define FIXR(a) ((int)((a) * FRAC_ONE + 0.5))
69 # define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
72 /** Window for MDCT. Actually only the elements in [0,17] and
73 [MDCT_BUF_SIZE/2, MDCT_BUF_SIZE/2 + 17] are actually used. The rest
74 is just to preserve alignment for SIMD implementations.
76 DECLARE_ALIGNED(16, INTFLOAT
, RENAME(ff_mdct_win
))[8][MDCT_BUF_SIZE
];
78 DECLARE_ALIGNED(16, MPA_INT
, RENAME(ff_mpa_synth_window
))[512+256];
80 #define SUM8(op, sum, w, p) \
82 op(sum, (w)[0 * 64], (p)[0 * 64]); \
83 op(sum, (w)[1 * 64], (p)[1 * 64]); \
84 op(sum, (w)[2 * 64], (p)[2 * 64]); \
85 op(sum, (w)[3 * 64], (p)[3 * 64]); \
86 op(sum, (w)[4 * 64], (p)[4 * 64]); \
87 op(sum, (w)[5 * 64], (p)[5 * 64]); \
88 op(sum, (w)[6 * 64], (p)[6 * 64]); \
89 op(sum, (w)[7 * 64], (p)[7 * 64]); \
92 #define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
96 op1(sum1, (w1)[0 * 64], tmp);\
97 op2(sum2, (w2)[0 * 64], tmp);\
99 op1(sum1, (w1)[1 * 64], tmp);\
100 op2(sum2, (w2)[1 * 64], tmp);\
102 op1(sum1, (w1)[2 * 64], tmp);\
103 op2(sum2, (w2)[2 * 64], tmp);\
105 op1(sum1, (w1)[3 * 64], tmp);\
106 op2(sum2, (w2)[3 * 64], tmp);\
108 op1(sum1, (w1)[4 * 64], tmp);\
109 op2(sum2, (w2)[4 * 64], tmp);\
111 op1(sum1, (w1)[5 * 64], tmp);\
112 op2(sum2, (w2)[5 * 64], tmp);\
114 op1(sum1, (w1)[6 * 64], tmp);\
115 op2(sum2, (w2)[6 * 64], tmp);\
117 op1(sum1, (w1)[7 * 64], tmp);\
118 op2(sum2, (w2)[7 * 64], tmp);\
121 void RENAME(ff_mpadsp_apply_window
)(MPA_INT
*synth_buf
, MPA_INT
*window
,
122 int *dither_state
, OUT_INT
*samples
,
125 register const MPA_INT
*w
, *w2
, *p
;
134 /* copy to avoid wrap */
135 memcpy(synth_buf
+ 512, synth_buf
, 32 * sizeof(*synth_buf
));
137 samples2
= samples
+ 31 * incr
;
143 SUM8(MACS
, sum
, w
, p
);
145 SUM8(MLSS
, sum
, w
+ 32, p
);
146 *samples
= round_sample(&sum
);
150 /* we calculate two samples at the same time to avoid one memory
151 access per two sample */
154 p
= synth_buf
+ 16 + j
;
155 SUM8P2(sum
, MACS
, sum2
, MLSS
, w
, w2
, p
);
156 p
= synth_buf
+ 48 - j
;
157 SUM8P2(sum
, MLSS
, sum2
, MLSS
, w
+ 32, w2
+ 32, p
);
159 *samples
= round_sample(&sum
);
162 *samples2
= round_sample(&sum
);
169 SUM8(MLSS
, sum
, w
+ 32, p
);
170 *samples
= round_sample(&sum
);
174 /* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
176 void RENAME(ff_mpa_synth_filter
)(MPADSPContext
*s
, MPA_INT
*synth_buf_ptr
,
177 int *synth_buf_offset
,
178 MPA_INT
*window
, int *dither_state
,
179 OUT_INT
*samples
, int incr
,
185 offset
= *synth_buf_offset
;
186 synth_buf
= synth_buf_ptr
+ offset
;
188 s
->RENAME(dct32
)(synth_buf
, sb_samples
);
189 s
->RENAME(apply_window
)(synth_buf
, window
, dither_state
, samples
, incr
);
191 offset
= (offset
- 32) & 511;
192 *synth_buf_offset
= offset
;
195 av_cold
void RENAME(ff_mpa_synth_init
)(MPA_INT
*window
)
199 /* max = 18760, max sum over all 16 coefs : 44736 */
202 v
= ff_mpa_enwindow
[i
];
204 v
*= 1.0 / (1LL<<(16 + FRAC_BITS
));
214 // Needed for avoiding shuffles in ASM implementations
216 for(j
=0; j
< 16; j
++)
217 window
[512+16*i
+j
] = window
[64*i
+32-j
];
220 for(j
=0; j
< 16; j
++)
221 window
[512+128+16*i
+j
] = window
[64*i
+48-j
];
224 av_cold
void RENAME(ff_init_mpadsp_tabs
)(void)
227 /* compute mdct windows */
228 for (i
= 0; i
< 36; i
++) {
229 for (j
= 0; j
< 4; j
++) {
232 if (j
== 2 && i
% 3 != 1)
235 d
= sin(M_PI
* (i
+ 0.5) / 36.0);
238 else if (i
>= 24) d
= sin(M_PI
* (i
- 18 + 0.5) / 12.0);
239 else if (i
>= 18) d
= 1;
242 else if (i
< 12) d
= sin(M_PI
* (i
- 6 + 0.5) / 12.0);
243 else if (i
< 18) d
= 1;
245 //merge last stage of imdct into the window coefficients
246 d
*= 0.5 * IMDCT_SCALAR
/ cos(M_PI
* (2 * i
+ 19) / 72);
249 RENAME(ff_mdct_win
)[j
][i
/3] = FIXHR((d
/ (1<<5)));
251 int idx
= i
< 18 ? i
: i
+ (MDCT_BUF_SIZE
/2 - 18);
252 RENAME(ff_mdct_win
)[j
][idx
] = FIXHR((d
/ (1<<5)));
257 /* NOTE: we do frequency inversion adter the MDCT by changing
258 the sign of the right window coefs */
259 for (j
= 0; j
< 4; j
++) {
260 for (i
= 0; i
< MDCT_BUF_SIZE
; i
+= 2) {
261 RENAME(ff_mdct_win
)[j
+ 4][i
] = RENAME(ff_mdct_win
)[j
][i
];
262 RENAME(ff_mdct_win
)[j
+ 4][i
+ 1] = -RENAME(ff_mdct_win
)[j
][i
+ 1];
267 #define C1 FIXHR(0.98480775301220805936/2)
268 #define C2 FIXHR(0.93969262078590838405/2)
269 #define C3 FIXHR(0.86602540378443864676/2)
270 #define C4 FIXHR(0.76604444311897803520/2)
271 #define C5 FIXHR(0.64278760968653932632/2)
272 #define C6 FIXHR(0.5/2)
273 #define C7 FIXHR(0.34202014332566873304/2)
274 #define C8 FIXHR(0.17364817766693034885/2)
276 /* 0.5 / cos(pi*(2*i+1)/36) */
277 static const INTFLOAT icos36
[9] = {
278 FIXR(0.50190991877167369479),
279 FIXR(0.51763809020504152469), //0
280 FIXR(0.55168895948124587824),
281 FIXR(0.61038729438072803416),
282 FIXR(0.70710678118654752439), //1
283 FIXR(0.87172339781054900991),
284 FIXR(1.18310079157624925896),
285 FIXR(1.93185165257813657349), //2
286 FIXR(5.73685662283492756461),
289 /* 0.5 / cos(pi*(2*i+1)/36) */
290 static const INTFLOAT icos36h
[9] = {
291 FIXHR(0.50190991877167369479/2),
292 FIXHR(0.51763809020504152469/2), //0
293 FIXHR(0.55168895948124587824/2),
294 FIXHR(0.61038729438072803416/2),
295 FIXHR(0.70710678118654752439/2), //1
296 FIXHR(0.87172339781054900991/2),
297 FIXHR(1.18310079157624925896/4),
298 FIXHR(1.93185165257813657349/4), //2
299 // FIXHR(5.73685662283492756461),
302 /* using Lee like decomposition followed by hand coded 9 points DCT */
303 static void imdct36(INTFLOAT
*out
, INTFLOAT
*buf
, INTFLOAT
*in
, INTFLOAT
*win
)
306 INTFLOAT t0
, t1
, t2
, t3
, s0
, s1
, s2
, s3
;
307 INTFLOAT tmp
[18], *tmp1
, *in1
;
309 for (i
= 17; i
>= 1; i
--)
311 for (i
= 17; i
>= 3; i
-= 2)
314 for (j
= 0; j
< 2; j
++) {
318 t2
= in1
[2*4] + in1
[2*8] - in1
[2*2];
320 t3
= in1
[2*0] + SHR(in1
[2*6],1);
321 t1
= in1
[2*0] - in1
[2*6];
322 tmp1
[ 6] = t1
- SHR(t2
,1);
325 t0
= MULH3(in1
[2*2] + in1
[2*4] , C2
, 2);
326 t1
= MULH3(in1
[2*4] - in1
[2*8] , -2*C8
, 1);
327 t2
= MULH3(in1
[2*2] + in1
[2*8] , -C4
, 2);
329 tmp1
[10] = t3
- t0
- t2
;
330 tmp1
[ 2] = t3
+ t0
+ t1
;
331 tmp1
[14] = t3
+ t2
- t1
;
333 tmp1
[ 4] = MULH3(in1
[2*5] + in1
[2*7] - in1
[2*1], -C3
, 2);
334 t2
= MULH3(in1
[2*1] + in1
[2*5], C1
, 2);
335 t3
= MULH3(in1
[2*5] - in1
[2*7], -2*C7
, 1);
336 t0
= MULH3(in1
[2*3], C3
, 2);
338 t1
= MULH3(in1
[2*1] + in1
[2*7], -C5
, 2);
340 tmp1
[ 0] = t2
+ t3
+ t0
;
341 tmp1
[12] = t2
+ t1
- t0
;
342 tmp1
[ 8] = t3
- t1
- t0
;
346 for (j
= 0; j
< 4; j
++) {
354 s1
= MULH3(t3
+ t2
, icos36h
[ j
], 2);
355 s3
= MULLx(t3
- t2
, icos36
[8 - j
], FRAC_BITS
);
359 out
[(9 + j
) * SBLIMIT
] = MULH3(t1
, win
[ 9 + j
], 1) + buf
[4*(9 + j
)];
360 out
[(8 - j
) * SBLIMIT
] = MULH3(t1
, win
[ 8 - j
], 1) + buf
[4*(8 - j
)];
361 buf
[4 * ( 9 + j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 9 + j
], 1);
362 buf
[4 * ( 8 - j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 8 - j
], 1);
366 out
[(9 + 8 - j
) * SBLIMIT
] = MULH3(t1
, win
[ 9 + 8 - j
], 1) + buf
[4*(9 + 8 - j
)];
367 out
[ j
* SBLIMIT
] = MULH3(t1
, win
[ j
], 1) + buf
[4*( j
)];
368 buf
[4 * ( 9 + 8 - j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 9 + 8 - j
], 1);
369 buf
[4 * ( j
)] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + j
], 1);
374 s1
= MULH3(tmp
[17], icos36h
[4], 2);
377 out
[(9 + 4) * SBLIMIT
] = MULH3(t1
, win
[ 9 + 4], 1) + buf
[4*(9 + 4)];
378 out
[(8 - 4) * SBLIMIT
] = MULH3(t1
, win
[ 8 - 4], 1) + buf
[4*(8 - 4)];
379 buf
[4 * ( 9 + 4 )] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 9 + 4], 1);
380 buf
[4 * ( 8 - 4 )] = MULH3(t0
, win
[MDCT_BUF_SIZE
/2 + 8 - 4], 1);
383 void RENAME(ff_imdct36_blocks
)(INTFLOAT
*out
, INTFLOAT
*buf
, INTFLOAT
*in
,
384 int count
, int switch_point
, int block_type
)
387 for (j
=0 ; j
< count
; j
++) {
388 /* apply window & overlap with previous buffer */
391 int win_idx
= (switch_point
&& j
< 2) ? 0 : block_type
;
392 INTFLOAT
*win
= RENAME(ff_mdct_win
)[win_idx
+ (4 & -(j
& 1))];
394 imdct36(out
, buf
, in
, win
);
397 buf
+= ((j
&3) != 3 ? 1 : (72-3));