Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * AC-3 DSP functions | |
3 | * Copyright (c) 2011 Justin Ruggles | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "libavutil/avassert.h" | |
23 | #include "avcodec.h" | |
24 | #include "ac3.h" | |
25 | #include "ac3dsp.h" | |
26 | #include "mathops.h" | |
27 | ||
28 | static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) | |
29 | { | |
30 | int blk, i; | |
31 | ||
32 | if (!num_reuse_blocks) | |
33 | return; | |
34 | ||
35 | for (i = 0; i < nb_coefs; i++) { | |
36 | uint8_t min_exp = *exp; | |
37 | uint8_t *exp1 = exp + 256; | |
38 | for (blk = 0; blk < num_reuse_blocks; blk++) { | |
39 | uint8_t next_exp = *exp1; | |
40 | if (next_exp < min_exp) | |
41 | min_exp = next_exp; | |
42 | exp1 += 256; | |
43 | } | |
44 | *exp++ = min_exp; | |
45 | } | |
46 | } | |
47 | ||
48 | static int ac3_max_msb_abs_int16_c(const int16_t *src, int len) | |
49 | { | |
50 | int i, v = 0; | |
51 | for (i = 0; i < len; i++) | |
52 | v |= abs(src[i]); | |
53 | return v; | |
54 | } | |
55 | ||
56 | static void ac3_lshift_int16_c(int16_t *src, unsigned int len, | |
57 | unsigned int shift) | |
58 | { | |
59 | uint32_t *src32 = (uint32_t *)src; | |
60 | const uint32_t mask = ~(((1 << shift) - 1) << 16); | |
61 | int i; | |
62 | len >>= 1; | |
63 | for (i = 0; i < len; i += 8) { | |
64 | src32[i ] = (src32[i ] << shift) & mask; | |
65 | src32[i+1] = (src32[i+1] << shift) & mask; | |
66 | src32[i+2] = (src32[i+2] << shift) & mask; | |
67 | src32[i+3] = (src32[i+3] << shift) & mask; | |
68 | src32[i+4] = (src32[i+4] << shift) & mask; | |
69 | src32[i+5] = (src32[i+5] << shift) & mask; | |
70 | src32[i+6] = (src32[i+6] << shift) & mask; | |
71 | src32[i+7] = (src32[i+7] << shift) & mask; | |
72 | } | |
73 | } | |
74 | ||
75 | static void ac3_rshift_int32_c(int32_t *src, unsigned int len, | |
76 | unsigned int shift) | |
77 | { | |
78 | do { | |
79 | *src++ >>= shift; | |
80 | *src++ >>= shift; | |
81 | *src++ >>= shift; | |
82 | *src++ >>= shift; | |
83 | *src++ >>= shift; | |
84 | *src++ >>= shift; | |
85 | *src++ >>= shift; | |
86 | *src++ >>= shift; | |
87 | len -= 8; | |
88 | } while (len > 0); | |
89 | } | |
90 | ||
91 | static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len) | |
92 | { | |
93 | const float scale = 1 << 24; | |
94 | do { | |
95 | *dst++ = lrintf(*src++ * scale); | |
96 | *dst++ = lrintf(*src++ * scale); | |
97 | *dst++ = lrintf(*src++ * scale); | |
98 | *dst++ = lrintf(*src++ * scale); | |
99 | *dst++ = lrintf(*src++ * scale); | |
100 | *dst++ = lrintf(*src++ * scale); | |
101 | *dst++ = lrintf(*src++ * scale); | |
102 | *dst++ = lrintf(*src++ * scale); | |
103 | len -= 8; | |
104 | } while (len > 0); | |
105 | } | |
106 | ||
107 | static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, | |
108 | int start, int end, | |
109 | int snr_offset, int floor, | |
110 | const uint8_t *bap_tab, uint8_t *bap) | |
111 | { | |
112 | int bin, band, band_end; | |
113 | ||
114 | /* special case, if snr offset is -960, set all bap's to zero */ | |
115 | if (snr_offset == -960) { | |
116 | memset(bap, 0, AC3_MAX_COEFS); | |
117 | return; | |
118 | } | |
119 | ||
120 | bin = start; | |
121 | band = ff_ac3_bin_to_band_tab[start]; | |
122 | do { | |
123 | int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor; | |
124 | band_end = ff_ac3_band_start_tab[++band]; | |
125 | band_end = FFMIN(band_end, end); | |
126 | ||
127 | for (; bin < band_end; bin++) { | |
128 | int address = av_clip((psd[bin] - m) >> 5, 0, 63); | |
129 | bap[bin] = bap_tab[address]; | |
130 | } | |
131 | } while (end > band_end); | |
132 | } | |
133 | ||
134 | static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap, | |
135 | int len) | |
136 | { | |
137 | while (len-- > 0) | |
138 | mant_cnt[bap[len]]++; | |
139 | } | |
140 | ||
141 | DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { | |
142 | 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 | |
143 | }; | |
144 | ||
145 | static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16]) | |
146 | { | |
147 | int blk, bap; | |
148 | int bits = 0; | |
149 | ||
150 | for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { | |
151 | // bap=1 : 3 mantissas in 5 bits | |
152 | bits += (mant_cnt[blk][1] / 3) * 5; | |
153 | // bap=2 : 3 mantissas in 7 bits | |
154 | // bap=4 : 2 mantissas in 7 bits | |
155 | bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7; | |
156 | // bap=3 : 1 mantissa in 3 bits | |
157 | bits += mant_cnt[blk][3] * 3; | |
158 | // bap=5 to 15 : get bits per mantissa from table | |
159 | for (bap = 5; bap < 16; bap++) | |
160 | bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap]; | |
161 | } | |
162 | return bits; | |
163 | } | |
164 | ||
165 | static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs) | |
166 | { | |
167 | int i; | |
168 | ||
169 | for (i = 0; i < nb_coefs; i++) { | |
170 | int v = abs(coef[i]); | |
171 | exp[i] = v ? 23 - av_log2(v) : 24; | |
172 | } | |
173 | } | |
174 | ||
175 | static void ac3_sum_square_butterfly_int32_c(int64_t sum[4], | |
176 | const int32_t *coef0, | |
177 | const int32_t *coef1, | |
178 | int len) | |
179 | { | |
180 | int i; | |
181 | ||
182 | sum[0] = sum[1] = sum[2] = sum[3] = 0; | |
183 | ||
184 | for (i = 0; i < len; i++) { | |
185 | int lt = coef0[i]; | |
186 | int rt = coef1[i]; | |
187 | int md = lt + rt; | |
188 | int sd = lt - rt; | |
189 | MAC64(sum[0], lt, lt); | |
190 | MAC64(sum[1], rt, rt); | |
191 | MAC64(sum[2], md, md); | |
192 | MAC64(sum[3], sd, sd); | |
193 | } | |
194 | } | |
195 | ||
196 | static void ac3_sum_square_butterfly_float_c(float sum[4], | |
197 | const float *coef0, | |
198 | const float *coef1, | |
199 | int len) | |
200 | { | |
201 | int i; | |
202 | ||
203 | sum[0] = sum[1] = sum[2] = sum[3] = 0; | |
204 | ||
205 | for (i = 0; i < len; i++) { | |
206 | float lt = coef0[i]; | |
207 | float rt = coef1[i]; | |
208 | float md = lt + rt; | |
209 | float sd = lt - rt; | |
210 | sum[0] += lt * lt; | |
211 | sum[1] += rt * rt; | |
212 | sum[2] += md * md; | |
213 | sum[3] += sd * sd; | |
214 | } | |
215 | } | |
216 | ||
217 | static void ac3_downmix_c(float **samples, float (*matrix)[2], | |
218 | int out_ch, int in_ch, int len) | |
219 | { | |
220 | int i, j; | |
221 | float v0, v1; | |
222 | if (out_ch == 2) { | |
223 | for (i = 0; i < len; i++) { | |
224 | v0 = v1 = 0.0f; | |
225 | for (j = 0; j < in_ch; j++) { | |
226 | v0 += samples[j][i] * matrix[j][0]; | |
227 | v1 += samples[j][i] * matrix[j][1]; | |
228 | } | |
229 | samples[0][i] = v0; | |
230 | samples[1][i] = v1; | |
231 | } | |
232 | } else if (out_ch == 1) { | |
233 | for (i = 0; i < len; i++) { | |
234 | v0 = 0.0f; | |
235 | for (j = 0; j < in_ch; j++) | |
236 | v0 += samples[j][i] * matrix[j][0]; | |
237 | samples[0][i] = v0; | |
238 | } | |
239 | } | |
240 | } | |
241 | ||
242 | static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2], | |
243 | int out_ch, int in_ch, int len) | |
244 | { | |
245 | int i, j; | |
246 | int64_t v0, v1; | |
247 | if (out_ch == 2) { | |
248 | for (i = 0; i < len; i++) { | |
249 | v0 = v1 = 0; | |
250 | for (j = 0; j < in_ch; j++) { | |
251 | v0 += (int64_t)samples[j][i] * matrix[j][0]; | |
252 | v1 += (int64_t)samples[j][i] * matrix[j][1]; | |
253 | } | |
254 | samples[0][i] = (v0+2048)>>12; | |
255 | samples[1][i] = (v1+2048)>>12; | |
256 | } | |
257 | } else if (out_ch == 1) { | |
258 | for (i = 0; i < len; i++) { | |
259 | v0 = 0; | |
260 | for (j = 0; j < in_ch; j++) | |
261 | v0 += (int64_t)samples[j][i] * matrix[j][0]; | |
262 | samples[0][i] = (v0+2048)>>12; | |
263 | } | |
264 | } | |
265 | } | |
266 | ||
267 | static void apply_window_int16_c(int16_t *output, const int16_t *input, | |
268 | const int16_t *window, unsigned int len) | |
269 | { | |
270 | int i; | |
271 | int len2 = len >> 1; | |
272 | ||
273 | for (i = 0; i < len2; i++) { | |
274 | int16_t w = window[i]; | |
275 | output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; | |
276 | output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; | |
277 | } | |
278 | } | |
279 | ||
280 | av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) | |
281 | { | |
282 | c->ac3_exponent_min = ac3_exponent_min_c; | |
283 | c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c; | |
284 | c->ac3_lshift_int16 = ac3_lshift_int16_c; | |
285 | c->ac3_rshift_int32 = ac3_rshift_int32_c; | |
286 | c->float_to_fixed24 = float_to_fixed24_c; | |
287 | c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; | |
288 | c->update_bap_counts = ac3_update_bap_counts_c; | |
289 | c->compute_mantissa_size = ac3_compute_mantissa_size_c; | |
290 | c->extract_exponents = ac3_extract_exponents_c; | |
291 | c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c; | |
292 | c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c; | |
293 | c->downmix = ac3_downmix_c; | |
294 | c->downmix_fixed = ac3_downmix_c_fixed; | |
295 | c->apply_window_int16 = apply_window_int16_c; | |
296 | ||
297 | if (ARCH_ARM) | |
298 | ff_ac3dsp_init_arm(c, bit_exact); | |
299 | if (ARCH_X86) | |
300 | ff_ac3dsp_init_x86(c, bit_exact); | |
301 | if (ARCH_MIPS) | |
302 | ff_ac3dsp_init_mips(c, bit_exact); | |
303 | } |