| 1 | /* |
| 2 | * AAC definitions and structures |
| 3 | * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) |
| 4 | * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) |
| 5 | * |
| 6 | * This file is part of FFmpeg. |
| 7 | * |
| 8 | * FFmpeg is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Lesser General Public |
| 10 | * License as published by the Free Software Foundation; either |
| 11 | * version 2.1 of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * FFmpeg is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
| 19 | * License along with FFmpeg; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | */ |
| 22 | |
| 23 | /** |
| 24 | * @file |
| 25 | * AAC definitions and structures |
| 26 | * @author Oded Shimon ( ods15 ods15 dyndns org ) |
| 27 | * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) |
| 28 | */ |
| 29 | |
| 30 | #ifndef AVCODEC_AAC_H |
| 31 | #define AVCODEC_AAC_H |
| 32 | |
| 33 | #include "libavutil/float_dsp.h" |
| 34 | #include "avcodec.h" |
| 35 | #include "fft.h" |
| 36 | #include "mpeg4audio.h" |
| 37 | #include "sbr.h" |
| 38 | #include "fmtconvert.h" |
| 39 | |
| 40 | #include <stdint.h> |
| 41 | |
| 42 | #define MAX_CHANNELS 64 |
| 43 | #define MAX_ELEM_ID 16 |
| 44 | |
| 45 | #define TNS_MAX_ORDER 20 |
| 46 | #define MAX_LTP_LONG_SFB 40 |
| 47 | |
| 48 | enum RawDataBlockType { |
| 49 | TYPE_SCE, |
| 50 | TYPE_CPE, |
| 51 | TYPE_CCE, |
| 52 | TYPE_LFE, |
| 53 | TYPE_DSE, |
| 54 | TYPE_PCE, |
| 55 | TYPE_FIL, |
| 56 | TYPE_END, |
| 57 | }; |
| 58 | |
| 59 | enum ExtensionPayloadID { |
| 60 | EXT_FILL, |
| 61 | EXT_FILL_DATA, |
| 62 | EXT_DATA_ELEMENT, |
| 63 | EXT_DYNAMIC_RANGE = 0xb, |
| 64 | EXT_SBR_DATA = 0xd, |
| 65 | EXT_SBR_DATA_CRC = 0xe, |
| 66 | }; |
| 67 | |
| 68 | enum WindowSequence { |
| 69 | ONLY_LONG_SEQUENCE, |
| 70 | LONG_START_SEQUENCE, |
| 71 | EIGHT_SHORT_SEQUENCE, |
| 72 | LONG_STOP_SEQUENCE, |
| 73 | }; |
| 74 | |
| 75 | enum BandType { |
| 76 | ZERO_BT = 0, ///< Scalefactors and spectral data are all zero. |
| 77 | FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word. |
| 78 | ESC_BT = 11, ///< Spectral data are coded with an escape sequence. |
| 79 | NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream. |
| 80 | INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions. |
| 81 | INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions. |
| 82 | }; |
| 83 | |
| 84 | #define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10) |
| 85 | |
| 86 | enum ChannelPosition { |
| 87 | AAC_CHANNEL_OFF = 0, |
| 88 | AAC_CHANNEL_FRONT = 1, |
| 89 | AAC_CHANNEL_SIDE = 2, |
| 90 | AAC_CHANNEL_BACK = 3, |
| 91 | AAC_CHANNEL_LFE = 4, |
| 92 | AAC_CHANNEL_CC = 5, |
| 93 | }; |
| 94 | |
| 95 | /** |
| 96 | * The point during decoding at which channel coupling is applied. |
| 97 | */ |
| 98 | enum CouplingPoint { |
| 99 | BEFORE_TNS, |
| 100 | BETWEEN_TNS_AND_IMDCT, |
| 101 | AFTER_IMDCT = 3, |
| 102 | }; |
| 103 | |
| 104 | /** |
| 105 | * Output configuration status |
| 106 | */ |
| 107 | enum OCStatus { |
| 108 | OC_NONE, ///< Output unconfigured |
| 109 | OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE |
| 110 | OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header |
| 111 | OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked |
| 112 | OC_LOCKED, ///< Output configuration locked in place |
| 113 | }; |
| 114 | |
| 115 | typedef struct OutputConfiguration { |
| 116 | MPEG4AudioConfig m4ac; |
| 117 | uint8_t layout_map[MAX_ELEM_ID*4][3]; |
| 118 | int layout_map_tags; |
| 119 | int channels; |
| 120 | uint64_t channel_layout; |
| 121 | enum OCStatus status; |
| 122 | } OutputConfiguration; |
| 123 | |
| 124 | /** |
| 125 | * Predictor State |
| 126 | */ |
| 127 | typedef struct PredictorState { |
| 128 | float cor0; |
| 129 | float cor1; |
| 130 | float var0; |
| 131 | float var1; |
| 132 | float r0; |
| 133 | float r1; |
| 134 | } PredictorState; |
| 135 | |
| 136 | #define MAX_PREDICTORS 672 |
| 137 | |
| 138 | #define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times |
| 139 | #define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0 |
| 140 | #define SCALE_MAX_POS 255 ///< scalefactor index maximum value |
| 141 | #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard |
| 142 | #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference |
| 143 | |
| 144 | /** |
| 145 | * Long Term Prediction |
| 146 | */ |
| 147 | typedef struct LongTermPrediction { |
| 148 | int8_t present; |
| 149 | int16_t lag; |
| 150 | float coef; |
| 151 | int8_t used[MAX_LTP_LONG_SFB]; |
| 152 | } LongTermPrediction; |
| 153 | |
| 154 | /** |
| 155 | * Individual Channel Stream |
| 156 | */ |
| 157 | typedef struct IndividualChannelStream { |
| 158 | uint8_t max_sfb; ///< number of scalefactor bands per group |
| 159 | enum WindowSequence window_sequence[2]; |
| 160 | uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window. |
| 161 | int num_window_groups; |
| 162 | uint8_t group_len[8]; |
| 163 | LongTermPrediction ltp; |
| 164 | const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window |
| 165 | const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window |
| 166 | int num_swb; ///< number of scalefactor window bands |
| 167 | int num_windows; |
| 168 | int tns_max_bands; |
| 169 | int predictor_present; |
| 170 | int predictor_initialized; |
| 171 | int predictor_reset_group; |
| 172 | uint8_t prediction_used[41]; |
| 173 | } IndividualChannelStream; |
| 174 | |
| 175 | /** |
| 176 | * Temporal Noise Shaping |
| 177 | */ |
| 178 | typedef struct TemporalNoiseShaping { |
| 179 | int present; |
| 180 | int n_filt[8]; |
| 181 | int length[8][4]; |
| 182 | int direction[8][4]; |
| 183 | int order[8][4]; |
| 184 | float coef[8][4][TNS_MAX_ORDER]; |
| 185 | } TemporalNoiseShaping; |
| 186 | |
| 187 | /** |
| 188 | * Dynamic Range Control - decoded from the bitstream but not processed further. |
| 189 | */ |
| 190 | typedef struct DynamicRangeControl { |
| 191 | int pce_instance_tag; ///< Indicates with which program the DRC info is associated. |
| 192 | int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative |
| 193 | int dyn_rng_ctl[17]; ///< DRC magnitude information |
| 194 | int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing. |
| 195 | int band_incr; ///< Number of DRC bands greater than 1 having DRC info. |
| 196 | int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain. |
| 197 | int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines. |
| 198 | int prog_ref_level; /**< A reference level for the long-term program audio level for all |
| 199 | * channels combined. |
| 200 | */ |
| 201 | } DynamicRangeControl; |
| 202 | |
| 203 | typedef struct Pulse { |
| 204 | int num_pulse; |
| 205 | int start; |
| 206 | int pos[4]; |
| 207 | int amp[4]; |
| 208 | } Pulse; |
| 209 | |
| 210 | /** |
| 211 | * coupling parameters |
| 212 | */ |
| 213 | typedef struct ChannelCoupling { |
| 214 | enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. |
| 215 | int num_coupled; ///< number of target elements |
| 216 | enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. |
| 217 | int id_select[8]; ///< element id |
| 218 | int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; |
| 219 | * [2] list of gains for left channel; [3] lists of gains for both channels |
| 220 | */ |
| 221 | float gain[16][120]; |
| 222 | } ChannelCoupling; |
| 223 | |
| 224 | /** |
| 225 | * Single Channel Element - used for both SCE and LFE elements. |
| 226 | */ |
| 227 | typedef struct SingleChannelElement { |
| 228 | IndividualChannelStream ics; |
| 229 | TemporalNoiseShaping tns; |
| 230 | Pulse pulse; |
| 231 | enum BandType band_type[128]; ///< band types |
| 232 | int band_type_run_end[120]; ///< band type run end points |
| 233 | float sf[120]; ///< scalefactors |
| 234 | int sf_idx[128]; ///< scalefactor indices (used by encoder) |
| 235 | uint8_t zeroes[128]; ///< band is not coded (used by encoder) |
| 236 | DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT |
| 237 | DECLARE_ALIGNED(32, float, saved)[1536]; ///< overlap |
| 238 | DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer |
| 239 | DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP |
| 240 | PredictorState predictor_state[MAX_PREDICTORS]; |
| 241 | float *ret; ///< PCM output |
| 242 | } SingleChannelElement; |
| 243 | |
| 244 | /** |
| 245 | * channel element - generic struct for SCE/CPE/CCE/LFE |
| 246 | */ |
| 247 | typedef struct ChannelElement { |
| 248 | // CPE specific |
| 249 | int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. |
| 250 | int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder) |
| 251 | uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band |
| 252 | // shared |
| 253 | SingleChannelElement ch[2]; |
| 254 | // CCE specific |
| 255 | ChannelCoupling coup; |
| 256 | SpectralBandReplication sbr; |
| 257 | } ChannelElement; |
| 258 | |
| 259 | /** |
| 260 | * main AAC context |
| 261 | */ |
| 262 | struct AACContext { |
| 263 | AVClass *class; |
| 264 | AVCodecContext *avctx; |
| 265 | AVFrame *frame; |
| 266 | |
| 267 | int is_saved; ///< Set if elements have stored overlap from previous frame. |
| 268 | DynamicRangeControl che_drc; |
| 269 | |
| 270 | /** |
| 271 | * @name Channel element related data |
| 272 | * @{ |
| 273 | */ |
| 274 | ChannelElement *che[4][MAX_ELEM_ID]; |
| 275 | ChannelElement *tag_che_map[4][MAX_ELEM_ID]; |
| 276 | int tags_mapped; |
| 277 | /** @} */ |
| 278 | |
| 279 | /** |
| 280 | * @name temporary aligned temporary buffers |
| 281 | * (We do not want to have these on the stack.) |
| 282 | * @{ |
| 283 | */ |
| 284 | DECLARE_ALIGNED(32, float, buf_mdct)[1024]; |
| 285 | /** @} */ |
| 286 | |
| 287 | /** |
| 288 | * @name Computed / set up during initialization |
| 289 | * @{ |
| 290 | */ |
| 291 | FFTContext mdct; |
| 292 | FFTContext mdct_small; |
| 293 | FFTContext mdct_ld; |
| 294 | FFTContext mdct_ltp; |
| 295 | FmtConvertContext fmt_conv; |
| 296 | AVFloatDSPContext fdsp; |
| 297 | int random_state; |
| 298 | /** @} */ |
| 299 | |
| 300 | /** |
| 301 | * @name Members used for output |
| 302 | * @{ |
| 303 | */ |
| 304 | SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement |
| 305 | /** @} */ |
| 306 | |
| 307 | |
| 308 | /** |
| 309 | * @name Japanese DTV specific extension |
| 310 | * @{ |
| 311 | */ |
| 312 | int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel |
| 313 | int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel |
| 314 | /** @} */ |
| 315 | |
| 316 | DECLARE_ALIGNED(32, float, temp)[128]; |
| 317 | |
| 318 | OutputConfiguration oc[2]; |
| 319 | int warned_num_aac_frames; |
| 320 | |
| 321 | /* aacdec functions pointers */ |
| 322 | void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce); |
| 323 | void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce); |
| 324 | void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns, |
| 325 | IndividualChannelStream *ics, int decode); |
| 326 | void (*windowing_and_mdct_ltp)(AACContext *ac, float *out, |
| 327 | float *in, IndividualChannelStream *ics); |
| 328 | void (*update_ltp)(AACContext *ac, SingleChannelElement *sce); |
| 329 | |
| 330 | }; |
| 331 | |
| 332 | void ff_aacdec_init_mips(AACContext *c); |
| 333 | |
| 334 | #endif /* AVCODEC_AAC_H */ |