Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * AAC definitions and structures | |
3 | * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) | |
4 | * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | /** | |
24 | * @file | |
25 | * AAC definitions and structures | |
26 | * @author Oded Shimon ( ods15 ods15 dyndns org ) | |
27 | * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) | |
28 | */ | |
29 | ||
30 | #ifndef AVCODEC_AAC_H | |
31 | #define AVCODEC_AAC_H | |
32 | ||
33 | #include "libavutil/float_dsp.h" | |
34 | #include "avcodec.h" | |
35 | #include "fft.h" | |
36 | #include "mpeg4audio.h" | |
37 | #include "sbr.h" | |
38 | #include "fmtconvert.h" | |
39 | ||
40 | #include <stdint.h> | |
41 | ||
42 | #define MAX_CHANNELS 64 | |
43 | #define MAX_ELEM_ID 16 | |
44 | ||
45 | #define TNS_MAX_ORDER 20 | |
46 | #define MAX_LTP_LONG_SFB 40 | |
47 | ||
48 | enum RawDataBlockType { | |
49 | TYPE_SCE, | |
50 | TYPE_CPE, | |
51 | TYPE_CCE, | |
52 | TYPE_LFE, | |
53 | TYPE_DSE, | |
54 | TYPE_PCE, | |
55 | TYPE_FIL, | |
56 | TYPE_END, | |
57 | }; | |
58 | ||
59 | enum ExtensionPayloadID { | |
60 | EXT_FILL, | |
61 | EXT_FILL_DATA, | |
62 | EXT_DATA_ELEMENT, | |
63 | EXT_DYNAMIC_RANGE = 0xb, | |
64 | EXT_SBR_DATA = 0xd, | |
65 | EXT_SBR_DATA_CRC = 0xe, | |
66 | }; | |
67 | ||
68 | enum WindowSequence { | |
69 | ONLY_LONG_SEQUENCE, | |
70 | LONG_START_SEQUENCE, | |
71 | EIGHT_SHORT_SEQUENCE, | |
72 | LONG_STOP_SEQUENCE, | |
73 | }; | |
74 | ||
75 | enum BandType { | |
76 | ZERO_BT = 0, ///< Scalefactors and spectral data are all zero. | |
77 | FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word. | |
78 | ESC_BT = 11, ///< Spectral data are coded with an escape sequence. | |
79 | NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream. | |
80 | INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions. | |
81 | INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions. | |
82 | }; | |
83 | ||
84 | #define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10) | |
85 | ||
86 | enum ChannelPosition { | |
87 | AAC_CHANNEL_OFF = 0, | |
88 | AAC_CHANNEL_FRONT = 1, | |
89 | AAC_CHANNEL_SIDE = 2, | |
90 | AAC_CHANNEL_BACK = 3, | |
91 | AAC_CHANNEL_LFE = 4, | |
92 | AAC_CHANNEL_CC = 5, | |
93 | }; | |
94 | ||
95 | /** | |
96 | * The point during decoding at which channel coupling is applied. | |
97 | */ | |
98 | enum CouplingPoint { | |
99 | BEFORE_TNS, | |
100 | BETWEEN_TNS_AND_IMDCT, | |
101 | AFTER_IMDCT = 3, | |
102 | }; | |
103 | ||
104 | /** | |
105 | * Output configuration status | |
106 | */ | |
107 | enum OCStatus { | |
108 | OC_NONE, ///< Output unconfigured | |
109 | OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE | |
110 | OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header | |
111 | OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked | |
112 | OC_LOCKED, ///< Output configuration locked in place | |
113 | }; | |
114 | ||
115 | typedef struct OutputConfiguration { | |
116 | MPEG4AudioConfig m4ac; | |
117 | uint8_t layout_map[MAX_ELEM_ID*4][3]; | |
118 | int layout_map_tags; | |
119 | int channels; | |
120 | uint64_t channel_layout; | |
121 | enum OCStatus status; | |
122 | } OutputConfiguration; | |
123 | ||
124 | /** | |
125 | * Predictor State | |
126 | */ | |
127 | typedef struct PredictorState { | |
128 | float cor0; | |
129 | float cor1; | |
130 | float var0; | |
131 | float var1; | |
132 | float r0; | |
133 | float r1; | |
134 | } PredictorState; | |
135 | ||
136 | #define MAX_PREDICTORS 672 | |
137 | ||
138 | #define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times | |
139 | #define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0 | |
140 | #define SCALE_MAX_POS 255 ///< scalefactor index maximum value | |
141 | #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard | |
142 | #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference | |
143 | ||
144 | /** | |
145 | * Long Term Prediction | |
146 | */ | |
147 | typedef struct LongTermPrediction { | |
148 | int8_t present; | |
149 | int16_t lag; | |
150 | float coef; | |
151 | int8_t used[MAX_LTP_LONG_SFB]; | |
152 | } LongTermPrediction; | |
153 | ||
154 | /** | |
155 | * Individual Channel Stream | |
156 | */ | |
157 | typedef struct IndividualChannelStream { | |
158 | uint8_t max_sfb; ///< number of scalefactor bands per group | |
159 | enum WindowSequence window_sequence[2]; | |
160 | uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window. | |
161 | int num_window_groups; | |
162 | uint8_t group_len[8]; | |
163 | LongTermPrediction ltp; | |
164 | const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window | |
165 | const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window | |
166 | int num_swb; ///< number of scalefactor window bands | |
167 | int num_windows; | |
168 | int tns_max_bands; | |
169 | int predictor_present; | |
170 | int predictor_initialized; | |
171 | int predictor_reset_group; | |
172 | uint8_t prediction_used[41]; | |
173 | } IndividualChannelStream; | |
174 | ||
175 | /** | |
176 | * Temporal Noise Shaping | |
177 | */ | |
178 | typedef struct TemporalNoiseShaping { | |
179 | int present; | |
180 | int n_filt[8]; | |
181 | int length[8][4]; | |
182 | int direction[8][4]; | |
183 | int order[8][4]; | |
184 | float coef[8][4][TNS_MAX_ORDER]; | |
185 | } TemporalNoiseShaping; | |
186 | ||
187 | /** | |
188 | * Dynamic Range Control - decoded from the bitstream but not processed further. | |
189 | */ | |
190 | typedef struct DynamicRangeControl { | |
191 | int pce_instance_tag; ///< Indicates with which program the DRC info is associated. | |
192 | int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative | |
193 | int dyn_rng_ctl[17]; ///< DRC magnitude information | |
194 | int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing. | |
195 | int band_incr; ///< Number of DRC bands greater than 1 having DRC info. | |
196 | int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain. | |
197 | int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines. | |
198 | int prog_ref_level; /**< A reference level for the long-term program audio level for all | |
199 | * channels combined. | |
200 | */ | |
201 | } DynamicRangeControl; | |
202 | ||
203 | typedef struct Pulse { | |
204 | int num_pulse; | |
205 | int start; | |
206 | int pos[4]; | |
207 | int amp[4]; | |
208 | } Pulse; | |
209 | ||
210 | /** | |
211 | * coupling parameters | |
212 | */ | |
213 | typedef struct ChannelCoupling { | |
214 | enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. | |
215 | int num_coupled; ///< number of target elements | |
216 | enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. | |
217 | int id_select[8]; ///< element id | |
218 | int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; | |
219 | * [2] list of gains for left channel; [3] lists of gains for both channels | |
220 | */ | |
221 | float gain[16][120]; | |
222 | } ChannelCoupling; | |
223 | ||
224 | /** | |
225 | * Single Channel Element - used for both SCE and LFE elements. | |
226 | */ | |
227 | typedef struct SingleChannelElement { | |
228 | IndividualChannelStream ics; | |
229 | TemporalNoiseShaping tns; | |
230 | Pulse pulse; | |
231 | enum BandType band_type[128]; ///< band types | |
232 | int band_type_run_end[120]; ///< band type run end points | |
233 | float sf[120]; ///< scalefactors | |
234 | int sf_idx[128]; ///< scalefactor indices (used by encoder) | |
235 | uint8_t zeroes[128]; ///< band is not coded (used by encoder) | |
236 | DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT | |
237 | DECLARE_ALIGNED(32, float, saved)[1536]; ///< overlap | |
238 | DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer | |
239 | DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP | |
240 | PredictorState predictor_state[MAX_PREDICTORS]; | |
241 | float *ret; ///< PCM output | |
242 | } SingleChannelElement; | |
243 | ||
244 | /** | |
245 | * channel element - generic struct for SCE/CPE/CCE/LFE | |
246 | */ | |
247 | typedef struct ChannelElement { | |
f6fa7814 | 248 | int present; |
2ba45a60 DM |
249 | // CPE specific |
250 | int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. | |
251 | int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder) | |
252 | uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band | |
253 | // shared | |
254 | SingleChannelElement ch[2]; | |
255 | // CCE specific | |
256 | ChannelCoupling coup; | |
257 | SpectralBandReplication sbr; | |
258 | } ChannelElement; | |
259 | ||
260 | /** | |
261 | * main AAC context | |
262 | */ | |
263 | struct AACContext { | |
264 | AVClass *class; | |
265 | AVCodecContext *avctx; | |
266 | AVFrame *frame; | |
267 | ||
268 | int is_saved; ///< Set if elements have stored overlap from previous frame. | |
269 | DynamicRangeControl che_drc; | |
270 | ||
271 | /** | |
272 | * @name Channel element related data | |
273 | * @{ | |
274 | */ | |
275 | ChannelElement *che[4][MAX_ELEM_ID]; | |
276 | ChannelElement *tag_che_map[4][MAX_ELEM_ID]; | |
277 | int tags_mapped; | |
f6fa7814 | 278 | int warned_remapping_once; |
2ba45a60 DM |
279 | /** @} */ |
280 | ||
281 | /** | |
282 | * @name temporary aligned temporary buffers | |
283 | * (We do not want to have these on the stack.) | |
284 | * @{ | |
285 | */ | |
286 | DECLARE_ALIGNED(32, float, buf_mdct)[1024]; | |
287 | /** @} */ | |
288 | ||
289 | /** | |
290 | * @name Computed / set up during initialization | |
291 | * @{ | |
292 | */ | |
293 | FFTContext mdct; | |
294 | FFTContext mdct_small; | |
295 | FFTContext mdct_ld; | |
296 | FFTContext mdct_ltp; | |
297 | FmtConvertContext fmt_conv; | |
f6fa7814 | 298 | AVFloatDSPContext *fdsp; |
2ba45a60 DM |
299 | int random_state; |
300 | /** @} */ | |
301 | ||
302 | /** | |
303 | * @name Members used for output | |
304 | * @{ | |
305 | */ | |
306 | SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement | |
307 | /** @} */ | |
308 | ||
309 | ||
310 | /** | |
311 | * @name Japanese DTV specific extension | |
312 | * @{ | |
313 | */ | |
314 | int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel | |
315 | int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel | |
316 | /** @} */ | |
317 | ||
318 | DECLARE_ALIGNED(32, float, temp)[128]; | |
319 | ||
320 | OutputConfiguration oc[2]; | |
321 | int warned_num_aac_frames; | |
322 | ||
323 | /* aacdec functions pointers */ | |
324 | void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce); | |
325 | void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce); | |
326 | void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns, | |
327 | IndividualChannelStream *ics, int decode); | |
328 | void (*windowing_and_mdct_ltp)(AACContext *ac, float *out, | |
329 | float *in, IndividualChannelStream *ics); | |
330 | void (*update_ltp)(AACContext *ac, SingleChannelElement *sce); | |
331 | ||
332 | }; | |
333 | ||
334 | void ff_aacdec_init_mips(AACContext *c); | |
335 | ||
336 | #endif /* AVCODEC_AAC_H */ |