Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * audio encoder psychoacoustic model | |
3 | * Copyright (C) 2008 Konstantin Shishkov | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #ifndef AVCODEC_PSYMODEL_H | |
23 | #define AVCODEC_PSYMODEL_H | |
24 | ||
25 | #include "avcodec.h" | |
26 | ||
27 | /** maximum possible number of bands */ | |
28 | #define PSY_MAX_BANDS 128 | |
29 | /** maximum number of channels */ | |
30 | #define PSY_MAX_CHANS 20 | |
31 | ||
32 | #define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2)) | |
33 | ||
34 | /** | |
35 | * single band psychoacoustic information | |
36 | */ | |
37 | typedef struct FFPsyBand { | |
38 | int bits; | |
39 | float energy; | |
40 | float threshold; | |
41 | float distortion; | |
42 | float perceptual_weight; | |
43 | } FFPsyBand; | |
44 | ||
45 | /** | |
46 | * single channel psychoacoustic information | |
47 | */ | |
48 | typedef struct FFPsyChannel { | |
49 | FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information | |
50 | float entropy; ///< total PE for this channel | |
51 | } FFPsyChannel; | |
52 | ||
53 | /** | |
54 | * psychoacoustic information for an arbitrary group of channels | |
55 | */ | |
56 | typedef struct FFPsyChannelGroup { | |
57 | FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group | |
58 | uint8_t num_ch; ///< number of channels in this group | |
59 | uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group | |
60 | } FFPsyChannelGroup; | |
61 | ||
62 | /** | |
63 | * windowing related information | |
64 | */ | |
65 | typedef struct FFPsyWindowInfo { | |
66 | int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next | |
67 | int window_shape; ///< window shape (sine/KBD/whatever) | |
68 | int num_windows; ///< number of windows in a frame | |
69 | int grouping[8]; ///< window grouping (for e.g. AAC) | |
70 | int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) | |
71 | } FFPsyWindowInfo; | |
72 | ||
73 | /** | |
74 | * context used by psychoacoustic model | |
75 | */ | |
76 | typedef struct FFPsyContext { | |
77 | AVCodecContext *avctx; ///< encoder context | |
78 | const struct FFPsyModel *model; ///< encoder-specific model functions | |
79 | ||
80 | FFPsyChannel *ch; ///< single channel information | |
81 | FFPsyChannelGroup *group; ///< channel group information | |
82 | int num_groups; ///< number of channel groups | |
83 | ||
84 | uint8_t **bands; ///< scalefactor band sizes for possible frame sizes | |
85 | int *num_bands; ///< number of scalefactor bands for possible frame sizes | |
86 | int num_lens; ///< number of scalefactor band sets | |
87 | ||
88 | struct { | |
89 | int size; ///< size of the bitresevoir in bits | |
90 | int bits; ///< number of bits used in the bitresevoir | |
91 | } bitres; | |
92 | ||
93 | void* model_priv_data; ///< psychoacoustic model implementation private data | |
94 | } FFPsyContext; | |
95 | ||
96 | /** | |
97 | * codec-specific psychoacoustic model implementation | |
98 | */ | |
99 | typedef struct FFPsyModel { | |
100 | const char *name; | |
101 | int (*init) (FFPsyContext *apc); | |
102 | ||
103 | /** | |
104 | * Suggest window sequence for channel. | |
105 | * | |
106 | * @param ctx model context | |
107 | * @param audio samples for the current frame | |
108 | * @param la lookahead samples (NULL when unavailable) | |
109 | * @param channel number of channel element to analyze | |
110 | * @param prev_type previous window type | |
111 | * | |
112 | * @return suggested window information in a structure | |
113 | */ | |
114 | FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); | |
115 | ||
116 | /** | |
117 | * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. | |
118 | * | |
119 | * @param ctx model context | |
120 | * @param channel channel number of the first channel in the group to perform analysis on | |
121 | * @param coeffs array of pointers to the transformed coefficients | |
122 | * @param wi window information for the channels in the group | |
123 | */ | |
124 | void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); | |
125 | ||
126 | void (*end) (FFPsyContext *apc); | |
127 | } FFPsyModel; | |
128 | ||
129 | /** | |
130 | * Initialize psychoacoustic model. | |
131 | * | |
132 | * @param ctx model context | |
133 | * @param avctx codec context | |
134 | * @param num_lens number of possible frame lengths | |
135 | * @param bands scalefactor band lengths for all frame lengths | |
136 | * @param num_bands number of scalefactor bands for all frame lengths | |
137 | * @param num_groups number of channel groups | |
138 | * @param group_map array with # of channels in group - 1, for each group | |
139 | * | |
140 | * @return zero if successful, a negative value if not | |
141 | */ | |
142 | int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, | |
143 | const uint8_t **bands, const int *num_bands, | |
144 | int num_groups, const uint8_t *group_map); | |
145 | ||
146 | /** | |
147 | * Determine what group a channel belongs to. | |
148 | * | |
149 | * @param ctx psymodel context | |
150 | * @param channel channel to locate the group for | |
151 | * | |
152 | * @return pointer to the FFPsyChannelGroup this channel belongs to | |
153 | */ | |
154 | FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); | |
155 | ||
156 | /** | |
157 | * Cleanup model context at the end. | |
158 | * | |
159 | * @param ctx model context | |
160 | */ | |
161 | void ff_psy_end(FFPsyContext *ctx); | |
162 | ||
163 | ||
164 | /************************************************************************** | |
165 | * Audio preprocessing stuff. * | |
166 | * This should be moved into some audio filter eventually. * | |
167 | **************************************************************************/ | |
168 | struct FFPsyPreprocessContext; | |
169 | ||
170 | /** | |
171 | * psychoacoustic model audio preprocessing initialization | |
172 | */ | |
173 | struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); | |
174 | ||
175 | /** | |
176 | * Preprocess several channel in audio frame in order to compress it better. | |
177 | * | |
178 | * @param ctx preprocessing context | |
179 | * @param audio samples to be filtered (in place) | |
180 | * @param channels number of channel to preprocess | |
181 | */ | |
182 | void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); | |
183 | ||
184 | /** | |
185 | * Cleanup audio preprocessing module. | |
186 | */ | |
187 | void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); | |
188 | ||
189 | #endif /* AVCODEC_PSYMODEL_H */ |