| 1 | /* |
| 2 | * audio encoder psychoacoustic model |
| 3 | * Copyright (C) 2008 Konstantin Shishkov |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #ifndef AVCODEC_PSYMODEL_H |
| 23 | #define AVCODEC_PSYMODEL_H |
| 24 | |
| 25 | #include "avcodec.h" |
| 26 | |
| 27 | /** maximum possible number of bands */ |
| 28 | #define PSY_MAX_BANDS 128 |
| 29 | /** maximum number of channels */ |
| 30 | #define PSY_MAX_CHANS 20 |
| 31 | |
| 32 | #define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2)) |
| 33 | |
| 34 | /** |
| 35 | * single band psychoacoustic information |
| 36 | */ |
| 37 | typedef struct FFPsyBand { |
| 38 | int bits; |
| 39 | float energy; |
| 40 | float threshold; |
| 41 | float distortion; |
| 42 | float perceptual_weight; |
| 43 | } FFPsyBand; |
| 44 | |
| 45 | /** |
| 46 | * single channel psychoacoustic information |
| 47 | */ |
| 48 | typedef struct FFPsyChannel { |
| 49 | FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information |
| 50 | float entropy; ///< total PE for this channel |
| 51 | } FFPsyChannel; |
| 52 | |
| 53 | /** |
| 54 | * psychoacoustic information for an arbitrary group of channels |
| 55 | */ |
| 56 | typedef struct FFPsyChannelGroup { |
| 57 | FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group |
| 58 | uint8_t num_ch; ///< number of channels in this group |
| 59 | uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group |
| 60 | } FFPsyChannelGroup; |
| 61 | |
| 62 | /** |
| 63 | * windowing related information |
| 64 | */ |
| 65 | typedef struct FFPsyWindowInfo { |
| 66 | int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next |
| 67 | int window_shape; ///< window shape (sine/KBD/whatever) |
| 68 | int num_windows; ///< number of windows in a frame |
| 69 | int grouping[8]; ///< window grouping (for e.g. AAC) |
| 70 | int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) |
| 71 | } FFPsyWindowInfo; |
| 72 | |
| 73 | /** |
| 74 | * context used by psychoacoustic model |
| 75 | */ |
| 76 | typedef struct FFPsyContext { |
| 77 | AVCodecContext *avctx; ///< encoder context |
| 78 | const struct FFPsyModel *model; ///< encoder-specific model functions |
| 79 | |
| 80 | FFPsyChannel *ch; ///< single channel information |
| 81 | FFPsyChannelGroup *group; ///< channel group information |
| 82 | int num_groups; ///< number of channel groups |
| 83 | |
| 84 | uint8_t **bands; ///< scalefactor band sizes for possible frame sizes |
| 85 | int *num_bands; ///< number of scalefactor bands for possible frame sizes |
| 86 | int num_lens; ///< number of scalefactor band sets |
| 87 | |
| 88 | struct { |
| 89 | int size; ///< size of the bitresevoir in bits |
| 90 | int bits; ///< number of bits used in the bitresevoir |
| 91 | } bitres; |
| 92 | |
| 93 | void* model_priv_data; ///< psychoacoustic model implementation private data |
| 94 | } FFPsyContext; |
| 95 | |
| 96 | /** |
| 97 | * codec-specific psychoacoustic model implementation |
| 98 | */ |
| 99 | typedef struct FFPsyModel { |
| 100 | const char *name; |
| 101 | int (*init) (FFPsyContext *apc); |
| 102 | |
| 103 | /** |
| 104 | * Suggest window sequence for channel. |
| 105 | * |
| 106 | * @param ctx model context |
| 107 | * @param audio samples for the current frame |
| 108 | * @param la lookahead samples (NULL when unavailable) |
| 109 | * @param channel number of channel element to analyze |
| 110 | * @param prev_type previous window type |
| 111 | * |
| 112 | * @return suggested window information in a structure |
| 113 | */ |
| 114 | FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); |
| 115 | |
| 116 | /** |
| 117 | * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. |
| 118 | * |
| 119 | * @param ctx model context |
| 120 | * @param channel channel number of the first channel in the group to perform analysis on |
| 121 | * @param coeffs array of pointers to the transformed coefficients |
| 122 | * @param wi window information for the channels in the group |
| 123 | */ |
| 124 | void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); |
| 125 | |
| 126 | void (*end) (FFPsyContext *apc); |
| 127 | } FFPsyModel; |
| 128 | |
| 129 | /** |
| 130 | * Initialize psychoacoustic model. |
| 131 | * |
| 132 | * @param ctx model context |
| 133 | * @param avctx codec context |
| 134 | * @param num_lens number of possible frame lengths |
| 135 | * @param bands scalefactor band lengths for all frame lengths |
| 136 | * @param num_bands number of scalefactor bands for all frame lengths |
| 137 | * @param num_groups number of channel groups |
| 138 | * @param group_map array with # of channels in group - 1, for each group |
| 139 | * |
| 140 | * @return zero if successful, a negative value if not |
| 141 | */ |
| 142 | int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, |
| 143 | const uint8_t **bands, const int *num_bands, |
| 144 | int num_groups, const uint8_t *group_map); |
| 145 | |
| 146 | /** |
| 147 | * Determine what group a channel belongs to. |
| 148 | * |
| 149 | * @param ctx psymodel context |
| 150 | * @param channel channel to locate the group for |
| 151 | * |
| 152 | * @return pointer to the FFPsyChannelGroup this channel belongs to |
| 153 | */ |
| 154 | FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); |
| 155 | |
| 156 | /** |
| 157 | * Cleanup model context at the end. |
| 158 | * |
| 159 | * @param ctx model context |
| 160 | */ |
| 161 | void ff_psy_end(FFPsyContext *ctx); |
| 162 | |
| 163 | |
| 164 | /************************************************************************** |
| 165 | * Audio preprocessing stuff. * |
| 166 | * This should be moved into some audio filter eventually. * |
| 167 | **************************************************************************/ |
| 168 | struct FFPsyPreprocessContext; |
| 169 | |
| 170 | /** |
| 171 | * psychoacoustic model audio preprocessing initialization |
| 172 | */ |
| 173 | struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); |
| 174 | |
| 175 | /** |
| 176 | * Preprocess several channel in audio frame in order to compress it better. |
| 177 | * |
| 178 | * @param ctx preprocessing context |
| 179 | * @param audio samples to be filtered (in place) |
| 180 | * @param channels number of channel to preprocess |
| 181 | */ |
| 182 | void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); |
| 183 | |
| 184 | /** |
| 185 | * Cleanup audio preprocessing module. |
| 186 | */ |
| 187 | void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); |
| 188 | |
| 189 | #endif /* AVCODEC_PSYMODEL_H */ |