3 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * Mixes audio from multiple sources into a single output. The channel layout,
27 * sample rate, and sample format will be the same for all inputs and the
31 #include "libavutil/attributes.h"
32 #include "libavutil/audio_fifo.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/avstring.h"
35 #include "libavutil/channel_layout.h"
36 #include "libavutil/common.h"
37 #include "libavutil/float_dsp.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/opt.h"
40 #include "libavutil/samplefmt.h"
47 #define INPUT_OFF 0 /**< input has reached EOF */
48 #define INPUT_ON 1 /**< input is active */
49 #define INPUT_INACTIVE 2 /**< input is on, but is currently inactive */
51 #define DURATION_LONGEST 0
52 #define DURATION_SHORTEST 1
53 #define DURATION_FIRST 2
56 typedef struct FrameInfo
{
59 struct FrameInfo
*next
;
63 * Linked list used to store timestamps and frame sizes of all frames in the
64 * FIFO for the first input.
66 * This is needed to keep timestamps synchronized for the case where multiple
67 * input frames are pushed to the filter for processing before a frame is
68 * requested by the output link.
70 typedef struct FrameList
{
77 static void frame_list_clear(FrameList
*frame_list
)
80 while (frame_list
->list
) {
81 FrameInfo
*info
= frame_list
->list
;
82 frame_list
->list
= info
->next
;
85 frame_list
->nb_frames
= 0;
86 frame_list
->nb_samples
= 0;
87 frame_list
->end
= NULL
;
91 static int frame_list_next_frame_size(FrameList
*frame_list
)
93 if (!frame_list
->list
)
95 return frame_list
->list
->nb_samples
;
98 static int64_t frame_list_next_pts(FrameList
*frame_list
)
100 if (!frame_list
->list
)
101 return AV_NOPTS_VALUE
;
102 return frame_list
->list
->pts
;
105 static void frame_list_remove_samples(FrameList
*frame_list
, int nb_samples
)
107 if (nb_samples
>= frame_list
->nb_samples
) {
108 frame_list_clear(frame_list
);
110 int samples
= nb_samples
;
111 while (samples
> 0) {
112 FrameInfo
*info
= frame_list
->list
;
114 if (info
->nb_samples
<= samples
) {
115 samples
-= info
->nb_samples
;
116 frame_list
->list
= info
->next
;
117 if (!frame_list
->list
)
118 frame_list
->end
= NULL
;
119 frame_list
->nb_frames
--;
120 frame_list
->nb_samples
-= info
->nb_samples
;
123 info
->nb_samples
-= samples
;
124 info
->pts
+= samples
;
125 frame_list
->nb_samples
-= samples
;
132 static int frame_list_add_frame(FrameList
*frame_list
, int nb_samples
, int64_t pts
)
134 FrameInfo
*info
= av_malloc(sizeof(*info
));
136 return AVERROR(ENOMEM
);
137 info
->nb_samples
= nb_samples
;
141 if (!frame_list
->list
) {
142 frame_list
->list
= info
;
143 frame_list
->end
= info
;
145 av_assert0(frame_list
->end
);
146 frame_list
->end
->next
= info
;
147 frame_list
->end
= info
;
149 frame_list
->nb_frames
++;
150 frame_list
->nb_samples
+= nb_samples
;
156 typedef struct MixContext
{
157 const AVClass
*class; /**< class for AVOptions */
158 AVFloatDSPContext fdsp
;
160 int nb_inputs
; /**< number of inputs */
161 int active_inputs
; /**< number of input currently active */
162 int duration_mode
; /**< mode for determining duration */
163 float dropout_transition
; /**< transition time when an input drops out */
165 int nb_channels
; /**< number of channels */
166 int sample_rate
; /**< sample rate */
168 AVAudioFifo
**fifos
; /**< audio fifo for each input */
169 uint8_t *input_state
; /**< current state of each input */
170 float *input_scale
; /**< mixing scale factor for each input */
171 float scale_norm
; /**< normalization factor for all inputs */
172 int64_t next_pts
; /**< calculated pts for next output frame */
173 FrameList
*frame_list
; /**< list of frame info for the first input */
176 #define OFFSET(x) offsetof(MixContext, x)
177 #define A AV_OPT_FLAG_AUDIO_PARAM
178 #define F AV_OPT_FLAG_FILTERING_PARAM
179 static const AVOption amix_options
[] = {
180 { "inputs", "Number of inputs.",
181 OFFSET(nb_inputs
), AV_OPT_TYPE_INT
, { .i64
= 2 }, 1, 32, A
|F
},
182 { "duration", "How to determine the end-of-stream.",
183 OFFSET(duration_mode
), AV_OPT_TYPE_INT
, { .i64
= DURATION_LONGEST
}, 0, 2, A
|F
, "duration" },
184 { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_LONGEST
}, INT_MIN
, INT_MAX
, A
|F
, "duration" },
185 { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_SHORTEST
}, INT_MIN
, INT_MAX
, A
|F
, "duration" },
186 { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_FIRST
}, INT_MIN
, INT_MAX
, A
|F
, "duration" },
187 { "dropout_transition", "Transition time, in seconds, for volume "
188 "renormalization when an input stream ends.",
189 OFFSET(dropout_transition
), AV_OPT_TYPE_FLOAT
, { .dbl
= 2.0 }, 0, INT_MAX
, A
|F
},
193 AVFILTER_DEFINE_CLASS(amix
);
196 * Update the scaling factors to apply to each input during mixing.
198 * This balances the full volume range between active inputs and handles
199 * volume transitions when EOF is encountered on an input but mixing continues
200 * with the remaining inputs.
202 static void calculate_scales(MixContext
*s
, int nb_samples
)
206 if (s
->scale_norm
> s
->active_inputs
) {
207 s
->scale_norm
-= nb_samples
/ (s
->dropout_transition
* s
->sample_rate
);
208 s
->scale_norm
= FFMAX(s
->scale_norm
, s
->active_inputs
);
211 for (i
= 0; i
< s
->nb_inputs
; i
++) {
212 if (s
->input_state
[i
] == INPUT_ON
)
213 s
->input_scale
[i
] = 1.0f
/ s
->scale_norm
;
215 s
->input_scale
[i
] = 0.0f
;
219 static int config_output(AVFilterLink
*outlink
)
221 AVFilterContext
*ctx
= outlink
->src
;
222 MixContext
*s
= ctx
->priv
;
226 s
->planar
= av_sample_fmt_is_planar(outlink
->format
);
227 s
->sample_rate
= outlink
->sample_rate
;
228 outlink
->time_base
= (AVRational
){ 1, outlink
->sample_rate
};
229 s
->next_pts
= AV_NOPTS_VALUE
;
231 s
->frame_list
= av_mallocz(sizeof(*s
->frame_list
));
233 return AVERROR(ENOMEM
);
235 s
->fifos
= av_mallocz(s
->nb_inputs
* sizeof(*s
->fifos
));
237 return AVERROR(ENOMEM
);
239 s
->nb_channels
= av_get_channel_layout_nb_channels(outlink
->channel_layout
);
240 for (i
= 0; i
< s
->nb_inputs
; i
++) {
241 s
->fifos
[i
] = av_audio_fifo_alloc(outlink
->format
, s
->nb_channels
, 1024);
243 return AVERROR(ENOMEM
);
246 s
->input_state
= av_malloc(s
->nb_inputs
);
248 return AVERROR(ENOMEM
);
249 memset(s
->input_state
, INPUT_ON
, s
->nb_inputs
);
250 s
->active_inputs
= s
->nb_inputs
;
252 s
->input_scale
= av_mallocz_array(s
->nb_inputs
, sizeof(*s
->input_scale
));
254 return AVERROR(ENOMEM
);
255 s
->scale_norm
= s
->active_inputs
;
256 calculate_scales(s
, 0);
258 av_get_channel_layout_string(buf
, sizeof(buf
), -1, outlink
->channel_layout
);
260 av_log(ctx
, AV_LOG_VERBOSE
,
261 "inputs:%d fmt:%s srate:%d cl:%s\n", s
->nb_inputs
,
262 av_get_sample_fmt_name(outlink
->format
), outlink
->sample_rate
, buf
);
268 * Read samples from the input FIFOs, mix, and write to the output link.
270 static int output_frame(AVFilterLink
*outlink
, int nb_samples
)
272 AVFilterContext
*ctx
= outlink
->src
;
273 MixContext
*s
= ctx
->priv
;
274 AVFrame
*out_buf
, *in_buf
;
277 calculate_scales(s
, nb_samples
);
279 out_buf
= ff_get_audio_buffer(outlink
, nb_samples
);
281 return AVERROR(ENOMEM
);
283 in_buf
= ff_get_audio_buffer(outlink
, nb_samples
);
285 av_frame_free(&out_buf
);
286 return AVERROR(ENOMEM
);
289 for (i
= 0; i
< s
->nb_inputs
; i
++) {
290 if (s
->input_state
[i
] == INPUT_ON
) {
291 int planes
, plane_size
, p
;
293 av_audio_fifo_read(s
->fifos
[i
], (void **)in_buf
->extended_data
,
296 planes
= s
->planar
? s
->nb_channels
: 1;
297 plane_size
= nb_samples
* (s
->planar
? 1 : s
->nb_channels
);
298 plane_size
= FFALIGN(plane_size
, 16);
300 for (p
= 0; p
< planes
; p
++) {
301 s
->fdsp
.vector_fmac_scalar((float *)out_buf
->extended_data
[p
],
302 (float *) in_buf
->extended_data
[p
],
303 s
->input_scale
[i
], plane_size
);
307 av_frame_free(&in_buf
);
309 out_buf
->pts
= s
->next_pts
;
310 if (s
->next_pts
!= AV_NOPTS_VALUE
)
311 s
->next_pts
+= nb_samples
;
313 return ff_filter_frame(outlink
, out_buf
);
317 * Returns the smallest number of samples available in the input FIFOs other
318 * than that of the first input.
320 static int get_available_samples(MixContext
*s
)
323 int available_samples
= INT_MAX
;
325 av_assert0(s
->nb_inputs
> 1);
327 for (i
= 1; i
< s
->nb_inputs
; i
++) {
329 if (s
->input_state
[i
] == INPUT_OFF
)
331 nb_samples
= av_audio_fifo_size(s
->fifos
[i
]);
332 available_samples
= FFMIN(available_samples
, nb_samples
);
334 if (available_samples
== INT_MAX
)
336 return available_samples
;
340 * Requests a frame, if needed, from each input link other than the first.
342 static int request_samples(AVFilterContext
*ctx
, int min_samples
)
344 MixContext
*s
= ctx
->priv
;
347 av_assert0(s
->nb_inputs
> 1);
349 for (i
= 1; i
< s
->nb_inputs
; i
++) {
351 if (s
->input_state
[i
] == INPUT_OFF
)
353 while (!ret
&& av_audio_fifo_size(s
->fifos
[i
]) < min_samples
)
354 ret
= ff_request_frame(ctx
->inputs
[i
]);
355 if (ret
== AVERROR_EOF
) {
356 if (av_audio_fifo_size(s
->fifos
[i
]) == 0) {
357 s
->input_state
[i
] = INPUT_OFF
;
367 * Calculates the number of active inputs and determines EOF based on the
370 * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
372 static int calc_active_inputs(MixContext
*s
)
375 int active_inputs
= 0;
376 for (i
= 0; i
< s
->nb_inputs
; i
++)
377 active_inputs
+= !!(s
->input_state
[i
] != INPUT_OFF
);
378 s
->active_inputs
= active_inputs
;
380 if (!active_inputs
||
381 (s
->duration_mode
== DURATION_FIRST
&& s
->input_state
[0] == INPUT_OFF
) ||
382 (s
->duration_mode
== DURATION_SHORTEST
&& active_inputs
!= s
->nb_inputs
))
387 static int request_frame(AVFilterLink
*outlink
)
389 AVFilterContext
*ctx
= outlink
->src
;
390 MixContext
*s
= ctx
->priv
;
392 int wanted_samples
, available_samples
;
394 ret
= calc_active_inputs(s
);
398 if (s
->input_state
[0] == INPUT_OFF
) {
399 ret
= request_samples(ctx
, 1);
403 ret
= calc_active_inputs(s
);
407 available_samples
= get_available_samples(s
);
408 if (!available_samples
)
409 return AVERROR(EAGAIN
);
411 return output_frame(outlink
, available_samples
);
414 if (s
->frame_list
->nb_frames
== 0) {
415 ret
= ff_request_frame(ctx
->inputs
[0]);
416 if (ret
== AVERROR_EOF
) {
417 s
->input_state
[0] = INPUT_OFF
;
418 if (s
->nb_inputs
== 1)
421 return AVERROR(EAGAIN
);
425 av_assert0(s
->frame_list
->nb_frames
> 0);
427 wanted_samples
= frame_list_next_frame_size(s
->frame_list
);
429 if (s
->active_inputs
> 1) {
430 ret
= request_samples(ctx
, wanted_samples
);
434 ret
= calc_active_inputs(s
);
439 if (s
->active_inputs
> 1) {
440 available_samples
= get_available_samples(s
);
441 if (!available_samples
)
442 return AVERROR(EAGAIN
);
443 available_samples
= FFMIN(available_samples
, wanted_samples
);
445 available_samples
= wanted_samples
;
448 s
->next_pts
= frame_list_next_pts(s
->frame_list
);
449 frame_list_remove_samples(s
->frame_list
, available_samples
);
451 return output_frame(outlink
, available_samples
);
454 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*buf
)
456 AVFilterContext
*ctx
= inlink
->dst
;
457 MixContext
*s
= ctx
->priv
;
458 AVFilterLink
*outlink
= ctx
->outputs
[0];
461 for (i
= 0; i
< ctx
->nb_inputs
; i
++)
462 if (ctx
->inputs
[i
] == inlink
)
464 if (i
>= ctx
->nb_inputs
) {
465 av_log(ctx
, AV_LOG_ERROR
, "unknown input link\n");
466 ret
= AVERROR(EINVAL
);
471 int64_t pts
= av_rescale_q(buf
->pts
, inlink
->time_base
,
473 ret
= frame_list_add_frame(s
->frame_list
, buf
->nb_samples
, pts
);
478 ret
= av_audio_fifo_write(s
->fifos
[i
], (void **)buf
->extended_data
,
487 static av_cold
int init(AVFilterContext
*ctx
)
489 MixContext
*s
= ctx
->priv
;
492 for (i
= 0; i
< s
->nb_inputs
; i
++) {
494 AVFilterPad pad
= { 0 };
496 snprintf(name
, sizeof(name
), "input%d", i
);
497 pad
.type
= AVMEDIA_TYPE_AUDIO
;
498 pad
.name
= av_strdup(name
);
499 pad
.filter_frame
= filter_frame
;
501 ff_insert_inpad(ctx
, i
, &pad
);
504 avpriv_float_dsp_init(&s
->fdsp
, 0);
509 static av_cold
void uninit(AVFilterContext
*ctx
)
512 MixContext
*s
= ctx
->priv
;
515 for (i
= 0; i
< s
->nb_inputs
; i
++)
516 av_audio_fifo_free(s
->fifos
[i
]);
519 frame_list_clear(s
->frame_list
);
520 av_freep(&s
->frame_list
);
521 av_freep(&s
->input_state
);
522 av_freep(&s
->input_scale
);
524 for (i
= 0; i
< ctx
->nb_inputs
; i
++)
525 av_freep(&ctx
->input_pads
[i
].name
);
528 static int query_formats(AVFilterContext
*ctx
)
530 AVFilterFormats
*formats
= NULL
;
531 ff_add_format(&formats
, AV_SAMPLE_FMT_FLT
);
532 ff_add_format(&formats
, AV_SAMPLE_FMT_FLTP
);
533 ff_set_common_formats(ctx
, formats
);
534 ff_set_common_channel_layouts(ctx
, ff_all_channel_layouts());
535 ff_set_common_samplerates(ctx
, ff_all_samplerates());
539 static const AVFilterPad avfilter_af_amix_outputs
[] = {
542 .type
= AVMEDIA_TYPE_AUDIO
,
543 .config_props
= config_output
,
544 .request_frame
= request_frame
549 AVFilter ff_af_amix
= {
551 .description
= NULL_IF_CONFIG_SMALL("Audio mixing."),
552 .priv_size
= sizeof(MixContext
),
553 .priv_class
= &amix_class
,
556 .query_formats
= query_formats
,
558 .outputs
= avfilter_af_amix_outputs
,
559 .flags
= AVFILTER_FLAG_DYNAMIC_INPUTS
,