[deb_ffmpeg.git] / ffmpeg / libavfilter / af_amix.c

/*
 * Audio Mix Filter
 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * Audio Mix Filter
 *
 * Mixes audio from multiple sources into a single output. The channel layout,
 * sample rate, and sample format will be the same for all inputs and the
 * output.
 */

#include "libavutil/attributes.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
#include "libavutil/float_dsp.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
#include "libavutil/samplefmt.h"

#include "audio.h"
#include "avfilter.h"
#include "formats.h"
#include "internal.h"

#define INPUT_OFF      0    /**< input has reached EOF */
#define INPUT_ON       1    /**< input is active */
#define INPUT_INACTIVE 2    /**< input is on, but is currently inactive */

#define DURATION_LONGEST  0
#define DURATION_SHORTEST 1
#define DURATION_FIRST    2


typedef struct FrameInfo {
    int nb_samples;
    int64_t pts;
    struct FrameInfo *next;
} FrameInfo;

/**
 * Linked list used to store timestamps and frame sizes of all frames in the
 * FIFO for the first input.
 *
 * This is needed to keep timestamps synchronized for the case where multiple
 * input frames are pushed to the filter for processing before a frame is
 * requested by the output link.
 */
typedef struct FrameList {
    int nb_frames;
    int nb_samples;
    FrameInfo *list;
    FrameInfo *end;
} FrameList;

static void frame_list_clear(FrameList *frame_list)
{
    if (frame_list) {
        while (frame_list->list) {
            FrameInfo *info = frame_list->list;
            frame_list->list = info->next;
            av_free(info);
        }
        frame_list->nb_frames  = 0;
        frame_list->nb_samples = 0;
        frame_list->end        = NULL;
    }
}

static int frame_list_next_frame_size(FrameList *frame_list)
{
    if (!frame_list->list)
        return 0;
    return frame_list->list->nb_samples;
}

static int64_t frame_list_next_pts(FrameList *frame_list)
{
    if (!frame_list->list)
        return AV_NOPTS_VALUE;
    return frame_list->list->pts;
}

static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
{
    if (nb_samples >= frame_list->nb_samples) {
        frame_list_clear(frame_list);
    } else {
        int samples = nb_samples;
        while (samples > 0) {
            FrameInfo *info = frame_list->list;
            av_assert0(info);
            if (info->nb_samples <= samples) {
                samples -= info->nb_samples;
                frame_list->list = info->next;
                if (!frame_list->list)
                    frame_list->end = NULL;
                frame_list->nb_frames--;
                frame_list->nb_samples -= info->nb_samples;
                av_free(info);
            } else {
                info->nb_samples       -= samples;
                info->pts              += samples;
                frame_list->nb_samples -= samples;
                samples = 0;
            }
        }
    }
}

static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
{
    FrameInfo *info = av_malloc(sizeof(*info));
    if (!info)
        return AVERROR(ENOMEM);
    info->nb_samples = nb_samples;
    info->pts        = pts;
    info->next       = NULL;

    if (!frame_list->list) {
        frame_list->list = info;
        frame_list->end  = info;
    } else {
        av_assert0(frame_list->end);
        frame_list->end->next = info;
        frame_list->end       = info;
    }
    frame_list->nb_frames++;
    frame_list->nb_samples += nb_samples;

    return 0;
}


typedef struct MixContext {
    const AVClass *class;       /**< class for AVOptions */
    AVFloatDSPContext *fdsp;

    int nb_inputs;              /**< number of inputs */
    int active_inputs;          /**< number of input currently active */
    int duration_mode;          /**< mode for determining duration */
    float dropout_transition;   /**< transition time when an input drops out */

    int nb_channels;            /**< number of channels */
    int sample_rate;            /**< sample rate */
    int planar;
    AVAudioFifo **fifos;        /**< audio fifo for each input */
    uint8_t *input_state;       /**< current state of each input */
    float *input_scale;         /**< mixing scale factor for each input */
    float scale_norm;           /**< normalization factor for all inputs */
    int64_t next_pts;           /**< calculated pts for next output frame */
    FrameList *frame_list;      /**< list of frame info for the first input */
} MixContext;

#define OFFSET(x) offsetof(MixContext, x)
#define A AV_OPT_FLAG_AUDIO_PARAM
#define F AV_OPT_FLAG_FILTERING_PARAM
static const AVOption amix_options[] = {
    { "inputs", "Number of inputs.",
            OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
    { "duration", "How to determine the end-of-stream.",
            OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0,  2, A|F, "duration" },
        { "longest",  "Duration of longest input.",  0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST  }, INT_MIN, INT_MAX, A|F, "duration" },
        { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
        { "first",    "Duration of first input.",    0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST    }, INT_MIN, INT_MAX, A|F, "duration" },
    { "dropout_transition", "Transition time, in seconds, for volume "
                            "renormalization when an input stream ends.",
            OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
    { NULL }
};

AVFILTER_DEFINE_CLASS(amix);

/**
 * Update the scaling factors to apply to each input during mixing.
 *
 * This balances the full volume range between active inputs and handles
 * volume transitions when EOF is encountered on an input but mixing continues
 * with the remaining inputs.
 */
static void calculate_scales(MixContext *s, int nb_samples)
{
    int i;

    if (s->scale_norm > s->active_inputs) {
        s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
        s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
    }

    for (i = 0; i < s->nb_inputs; i++) {
        if (s->input_state[i] == INPUT_ON)
            s->input_scale[i] = 1.0f / s->scale_norm;
        else
            s->input_scale[i] = 0.0f;
    }
}

static int config_output(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    MixContext *s      = ctx->priv;
    int i;
    char buf[64];

    s->planar          = av_sample_fmt_is_planar(outlink->format);
    s->sample_rate     = outlink->sample_rate;
    outlink->time_base = (AVRational){ 1, outlink->sample_rate };
    s->next_pts        = AV_NOPTS_VALUE;

    s->frame_list = av_mallocz(sizeof(*s->frame_list));
    if (!s->frame_list)
        return AVERROR(ENOMEM);

    s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
    if (!s->fifos)
        return AVERROR(ENOMEM);

    s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
    for (i = 0; i < s->nb_inputs; i++) {
        s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
        if (!s->fifos[i])
            return AVERROR(ENOMEM);
    }

    s->input_state = av_malloc(s->nb_inputs);
    if (!s->input_state)
        return AVERROR(ENOMEM);
    memset(s->input_state, INPUT_ON, s->nb_inputs);
    s->active_inputs = s->nb_inputs;

    s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
    if (!s->input_scale)
        return AVERROR(ENOMEM);
    s->scale_norm = s->active_inputs;
    calculate_scales(s, 0);

    av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);

    av_log(ctx, AV_LOG_VERBOSE,
           "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
           av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);

    return 0;
}

/**
 * Read samples from the input FIFOs, mix, and write to the output link.
 */
static int output_frame(AVFilterLink *outlink, int nb_samples)
{
    AVFilterContext *ctx = outlink->src;
    MixContext      *s = ctx->priv;
    AVFrame *out_buf, *in_buf;
    int i;

    calculate_scales(s, nb_samples);

    out_buf = ff_get_audio_buffer(outlink, nb_samples);
    if (!out_buf)
        return AVERROR(ENOMEM);

    in_buf = ff_get_audio_buffer(outlink, nb_samples);
    if (!in_buf) {
        av_frame_free(&out_buf);
        return AVERROR(ENOMEM);
    }

    for (i = 0; i < s->nb_inputs; i++) {
        if (s->input_state[i] == INPUT_ON) {
            int planes, plane_size, p;

            av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
                               nb_samples);

            planes     = s->planar ? s->nb_channels : 1;
            plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
            plane_size = FFALIGN(plane_size, 16);

            for (p = 0; p < planes; p++) {
                s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
                                           (float *) in_buf->extended_data[p],
                                           s->input_scale[i], plane_size);
            }
        }
    }
    av_frame_free(&in_buf);

    out_buf->pts = s->next_pts;
    if (s->next_pts != AV_NOPTS_VALUE)
        s->next_pts += nb_samples;

    return ff_filter_frame(outlink, out_buf);
}

/**
 * Returns the smallest number of samples available in the input FIFOs other
 * than that of the first input.
 */
static int get_available_samples(MixContext *s)
{
    int i;
    int available_samples = INT_MAX;

    av_assert0(s->nb_inputs > 1);

    for (i = 1; i < s->nb_inputs; i++) {
        int nb_samples;
        if (s->input_state[i] == INPUT_OFF)
            continue;
        nb_samples = av_audio_fifo_size(s->fifos[i]);
        available_samples = FFMIN(available_samples, nb_samples);
    }
    if (available_samples == INT_MAX)
        return 0;
    return available_samples;
}

/**
 * Requests a frame, if needed, from each input link other than the first.
 */
static int request_samples(AVFilterContext *ctx, int min_samples)
{
    MixContext *s = ctx->priv;
    int i, ret;

    av_assert0(s->nb_inputs > 1);

    for (i = 1; i < s->nb_inputs; i++) {
        ret = 0;
        if (s->input_state[i] == INPUT_OFF)
            continue;
        while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
            ret = ff_request_frame(ctx->inputs[i]);
        if (ret == AVERROR_EOF) {
            if (av_audio_fifo_size(s->fifos[i]) == 0) {
                s->input_state[i] = INPUT_OFF;
                continue;
            }
        } else if (ret < 0)
            return ret;
    }
    return 0;
}

/**
 * Calculates the number of active inputs and determines EOF based on the
 * duration option.
 *
 * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
 */
static int calc_active_inputs(MixContext *s)
{
    int i;
    int active_inputs = 0;
    for (i = 0; i < s->nb_inputs; i++)
        active_inputs += !!(s->input_state[i] != INPUT_OFF);
    s->active_inputs = active_inputs;

    if (!active_inputs ||
        (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
        (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
        return AVERROR_EOF;
    return 0;
}

static int request_frame(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    MixContext      *s = ctx->priv;
    int ret;
    int wanted_samples, available_samples;

    ret = calc_active_inputs(s);
    if (ret < 0)
        return ret;

    if (s->input_state[0] == INPUT_OFF) {
        ret = request_samples(ctx, 1);
        if (ret < 0)
            return ret;

        ret = calc_active_inputs(s);
        if (ret < 0)
            return ret;

        available_samples = get_available_samples(s);
        if (!available_samples)
            return AVERROR(EAGAIN);

        return output_frame(outlink, available_samples);
    }

    if (s->frame_list->nb_frames == 0) {
        ret = ff_request_frame(ctx->inputs[0]);
        if (ret == AVERROR_EOF) {
            s->input_state[0] = INPUT_OFF;
            if (s->nb_inputs == 1)
                return AVERROR_EOF;
            else
                return AVERROR(EAGAIN);
        } else if (ret < 0)
            return ret;
    }
    av_assert0(s->frame_list->nb_frames > 0);

    wanted_samples = frame_list_next_frame_size(s->frame_list);

    if (s->active_inputs > 1) {
        ret = request_samples(ctx, wanted_samples);
        if (ret < 0)
            return ret;

        ret = calc_active_inputs(s);
        if (ret < 0)
            return ret;
    }

    if (s->active_inputs > 1) {
        available_samples = get_available_samples(s);
        if (!available_samples)
            return AVERROR(EAGAIN);
        available_samples = FFMIN(available_samples, wanted_samples);
    } else {
        available_samples = wanted_samples;
    }

    s->next_pts = frame_list_next_pts(s->frame_list);
    frame_list_remove_samples(s->frame_list, available_samples);

    return output_frame(outlink, available_samples);
}

static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
{
    AVFilterContext  *ctx = inlink->dst;
    MixContext       *s = ctx->priv;
    AVFilterLink *outlink = ctx->outputs[0];
    int i, ret = 0;

    for (i = 0; i < ctx->nb_inputs; i++)
        if (ctx->inputs[i] == inlink)
            break;
    if (i >= ctx->nb_inputs) {
        av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
        ret = AVERROR(EINVAL);
        goto fail;
    }

    if (i == 0) {
        int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
                                   outlink->time_base);
        ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
        if (ret < 0)
            goto fail;
    }

    ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
                              buf->nb_samples);

fail:
    av_frame_free(&buf);

    return ret;
}

static av_cold int init(AVFilterContext *ctx)
{
    MixContext *s = ctx->priv;
    int i;

    for (i = 0; i < s->nb_inputs; i++) {
        char name[32];
        AVFilterPad pad = { 0 };

        snprintf(name, sizeof(name), "input%d", i);
        pad.type           = AVMEDIA_TYPE_AUDIO;
        pad.name           = av_strdup(name);
        if (!pad.name)
            return AVERROR(ENOMEM);
        pad.filter_frame   = filter_frame;

        ff_insert_inpad(ctx, i, &pad);
    }

    s->fdsp = avpriv_float_dsp_alloc(0);
    if (!s->fdsp)
        return AVERROR(ENOMEM);

    return 0;
}

static av_cold void uninit(AVFilterContext *ctx)
{
    int i;
    MixContext *s = ctx->priv;

    if (s->fifos) {
        for (i = 0; i < s->nb_inputs; i++)
            av_audio_fifo_free(s->fifos[i]);
        av_freep(&s->fifos);
    }
    frame_list_clear(s->frame_list);
    av_freep(&s->frame_list);
    av_freep(&s->input_state);
    av_freep(&s->input_scale);
    av_freep(&s->fdsp);

    for (i = 0; i < ctx->nb_inputs; i++)
        av_freep(&ctx->input_pads[i].name);
}

static int query_formats(AVFilterContext *ctx)
{
    AVFilterFormats *formats = NULL;
    AVFilterChannelLayouts *layouts;

    layouts = ff_all_channel_layouts();

    if (!layouts)
        return AVERROR(ENOMEM);

    ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
    ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
    ff_set_common_formats(ctx, formats);
    ff_set_common_channel_layouts(ctx, layouts);
    ff_set_common_samplerates(ctx, ff_all_samplerates());
    return 0;
}

static const AVFilterPad avfilter_af_amix_outputs[] = {
    {
        .name          = "default",
        .type          = AVMEDIA_TYPE_AUDIO,
        .config_props  = config_output,
        .request_frame = request_frame
    },
    { NULL }
};

AVFilter ff_af_amix = {
    .name           = "amix",
    .description    = NULL_IF_CONFIG_SMALL("Audio mixing."),
    .priv_size      = sizeof(MixContext),
    .priv_class     = &amix_class,
    .init           = init,
    .uninit         = uninit,
    .query_formats  = query_formats,
    .inputs         = NULL,
    .outputs        = avfilter_af_amix_outputs,
    .flags          = AVFILTER_FLAG_DYNAMIC_INPUTS,
};
Commit	Line	Data
2ba45a60 DM	1	/*
	2	* Audio Mix Filter
	3	* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
	4	*
	5	* This file is part of FFmpeg.
	6	*
	7	* FFmpeg is free software; you can redistribute it and/or
	8	* modify it under the terms of the GNU Lesser General Public
	9	* License as published by the Free Software Foundation; either
	10	* version 2.1 of the License, or (at your option) any later version.
	11	*
	12	* FFmpeg is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	* Lesser General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU Lesser General Public
	18	* License along with FFmpeg; if not, write to the Free Software
	19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	20	*/
	21
	22	/**
	23	* @file
	24	* Audio Mix Filter
	25	*
	26	* Mixes audio from multiple sources into a single output. The channel layout,
	27	* sample rate, and sample format will be the same for all inputs and the
	28	* output.
	29	*/
	30
	31	#include "libavutil/attributes.h"
	32	#include "libavutil/audio_fifo.h"
	33	#include "libavutil/avassert.h"
	34	#include "libavutil/avstring.h"
	35	#include "libavutil/channel_layout.h"
	36	#include "libavutil/common.h"
	37	#include "libavutil/float_dsp.h"
	38	#include "libavutil/mathematics.h"
	39	#include "libavutil/opt.h"
	40	#include "libavutil/samplefmt.h"
	41
	42	#include "audio.h"
	43	#include "avfilter.h"
	44	#include "formats.h"
	45	#include "internal.h"
	46
	47	#define INPUT_OFF 0 /*< input has reached EOF /
	48	#define INPUT_ON 1 /*< input is active /
	49	#define INPUT_INACTIVE 2 /*< input is on, but is currently inactive /
	50
	51	#define DURATION_LONGEST 0
	52	#define DURATION_SHORTEST 1
	53	#define DURATION_FIRST 2
	54
	55
	56	typedef struct FrameInfo {
	57	int nb_samples;
	58	int64_t pts;
	59	struct FrameInfo *next;
	60	} FrameInfo;
	61
	62	/**
	63	* Linked list used to store timestamps and frame sizes of all frames in the
	64	* FIFO for the first input.
65	*
66	* This is needed to keep timestamps synchronized for the case where multiple
67	* input frames are pushed to the filter for processing before a frame is
68	* requested by the output link.
69	*/
70	typedef struct FrameList {
71	int nb_frames;
72	int nb_samples;
73	FrameInfo *list;
74	FrameInfo *end;
75	} FrameList;
76
77	static void frame_list_clear(FrameList *frame_list)
78	{
79	if (frame_list) {
80	while (frame_list->list) {
81	FrameInfo *info = frame_list->list;
82	frame_list->list = info->next;
83	av_free(info);
84	}
85	frame_list->nb_frames = 0;
86	frame_list->nb_samples = 0;
87	frame_list->end = NULL;
88	}
89	}
90
91	static int frame_list_next_frame_size(FrameList *frame_list)
92	{
93	if (!frame_list->list)
94	return 0;
95	return frame_list->list->nb_samples;
96	}
97
98	static int64_t frame_list_next_pts(FrameList *frame_list)
99	{
100	if (!frame_list->list)
101	return AV_NOPTS_VALUE;
102	return frame_list->list->pts;
103	}
104
105	static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
106	{
107	if (nb_samples >= frame_list->nb_samples) {
108	frame_list_clear(frame_list);
109	} else {
110	int samples = nb_samples;
111	while (samples > 0) {
112	FrameInfo *info = frame_list->list;
113	av_assert0(info);
114	if (info->nb_samples <= samples) {
115	samples -= info->nb_samples;
116	frame_list->list = info->next;
117	if (!frame_list->list)
118	frame_list->end = NULL;
119	frame_list->nb_frames--;
120	frame_list->nb_samples -= info->nb_samples;
121	av_free(info);
122	} else {
123	info->nb_samples -= samples;
124	info->pts += samples;
125	frame_list->nb_samples -= samples;
126	samples = 0;
127	}
128	}
129	}
130	}
131
132	static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
133	{
134	FrameInfo info = av_malloc(sizeof(info));
135	if (!info)
136	return AVERROR(ENOMEM);
137	info->nb_samples = nb_samples;
138	info->pts = pts;
139	info->next = NULL;
140
141	if (!frame_list->list) {
142	frame_list->list = info;
143	frame_list->end = info;
144	} else {
145	av_assert0(frame_list->end);
146	frame_list->end->next = info;
147	frame_list->end = info;
148	}
149	frame_list->nb_frames++;
150	frame_list->nb_samples += nb_samples;
151
152	return 0;
153	}
154
155
156	typedef struct MixContext {
157	const AVClass class; /< class for AVOptions /
f6fa7814	158	AVFloatDSPContext *fdsp;
2ba45a60 DM	159
	160	int nb_inputs; /*< number of inputs /
	161	int active_inputs; /*< number of input currently active /
	162	int duration_mode; /*< mode for determining duration /
	163	float dropout_transition; /*< transition time when an input drops out /
	164
	165	int nb_channels; /*< number of channels /
	166	int sample_rate; /*< sample rate /
	167	int planar;
	168	AVAudioFifo fifos; /< audio fifo for each input */
	169	uint8_t input_state; /< current state of each input /
	170	float input_scale; /< mixing scale factor for each input /
	171	float scale_norm; /*< normalization factor for all inputs /
	172	int64_t next_pts; /*< calculated pts for next output frame /
	173	FrameList frame_list; /< list of frame info for the first input /
	174	} MixContext;
	175
	176	#define OFFSET(x) offsetof(MixContext, x)
	177	#define A AV_OPT_FLAG_AUDIO_PARAM
	178	#define F AV_OPT_FLAG_FILTERING_PARAM
	179	static const AVOption amix_options[] = {
	180	{ "inputs", "Number of inputs.",
	181	OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A\|F },
	182	{ "duration", "How to determine the end-of-stream.",
	183	OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A\|F, "duration" },
	184	{ "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A\|F, "duration" },
	185	{ "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A\|F, "duration" },
	186	{ "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A\|F, "duration" },
	187	{ "dropout_transition", "Transition time, in seconds, for volume "
	188	"renormalization when an input stream ends.",
	189	OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A\|F },
	190	{ NULL }
	191	};
	192
	193	AVFILTER_DEFINE_CLASS(amix);
	194
	195	/**
	196	* Update the scaling factors to apply to each input during mixing.
	197	*
	198	* This balances the full volume range between active inputs and handles
	199	* volume transitions when EOF is encountered on an input but mixing continues
	200	* with the remaining inputs.
	201	*/
	202	static void calculate_scales(MixContext *s, int nb_samples)
	203	{
	204	int i;
	205
	206	if (s->scale_norm > s->active_inputs) {
	207	s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
	208	s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
	209	}
	210
	211	for (i = 0; i < s->nb_inputs; i++) {
	212	if (s->input_state[i] == INPUT_ON)
	213	s->input_scale[i] = 1.0f / s->scale_norm;
	214	else
	215	s->input_scale[i] = 0.0f;
	216	}
	217	}
	218
	219	static int config_output(AVFilterLink *outlink)
	220	{
	221	AVFilterContext *ctx = outlink->src;
	222	MixContext *s = ctx->priv;
223	int i;
224	char buf[64];
225
226	s->planar = av_sample_fmt_is_planar(outlink->format);
227	s->sample_rate = outlink->sample_rate;
228	outlink->time_base = (AVRational){ 1, outlink->sample_rate };
229	s->next_pts = AV_NOPTS_VALUE;
230
231	s->frame_list = av_mallocz(sizeof(*s->frame_list));
232	if (!s->frame_list)
233	return AVERROR(ENOMEM);
234
235	s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
236	if (!s->fifos)
237	return AVERROR(ENOMEM);
238
239	s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
240	for (i = 0; i < s->nb_inputs; i++) {
241	s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
242	if (!s->fifos[i])
243	return AVERROR(ENOMEM);
244	}
245
246	s->input_state = av_malloc(s->nb_inputs);
247	if (!s->input_state)
248	return AVERROR(ENOMEM);
249	memset(s->input_state, INPUT_ON, s->nb_inputs);
250	s->active_inputs = s->nb_inputs;
251
252	s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
253	if (!s->input_scale)
254	return AVERROR(ENOMEM);
255	s->scale_norm = s->active_inputs;
256	calculate_scales(s, 0);
257
258	av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
259
260	av_log(ctx, AV_LOG_VERBOSE,
261	"inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
262	av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
263
264	return 0;
265	}
266
267	/**
268	* Read samples from the input FIFOs, mix, and write to the output link.
269	*/
270	static int output_frame(AVFilterLink *outlink, int nb_samples)
271	{
272	AVFilterContext *ctx = outlink->src;
273	MixContext *s = ctx->priv;
274	AVFrame out_buf, in_buf;
275	int i;
276
277	calculate_scales(s, nb_samples);
278
279	out_buf = ff_get_audio_buffer(outlink, nb_samples);
280	if (!out_buf)
281	return AVERROR(ENOMEM);
282
283	in_buf = ff_get_audio_buffer(outlink, nb_samples);
284	if (!in_buf) {
285	av_frame_free(&out_buf);
286	return AVERROR(ENOMEM);
287	}
288
289	for (i = 0; i < s->nb_inputs; i++) {
290	if (s->input_state[i] == INPUT_ON) {
291	int planes, plane_size, p;
292
293	av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
294	nb_samples);
295
296	planes = s->planar ? s->nb_channels : 1;
297	plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
298	plane_size = FFALIGN(plane_size, 16);
299
300	for (p = 0; p < planes; p++) {
f6fa7814	301	s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
2ba45a60 DM	302	(float *) in_buf->extended_data[p],
	303	s->input_scale[i], plane_size);
	304	}
	305	}
	306	}
	307	av_frame_free(&in_buf);
	308
	309	out_buf->pts = s->next_pts;
	310	if (s->next_pts != AV_NOPTS_VALUE)
	311	s->next_pts += nb_samples;
	312
	313	return ff_filter_frame(outlink, out_buf);
	314	}
	315
	316	/**
	317	* Returns the smallest number of samples available in the input FIFOs other
	318	* than that of the first input.
	319	*/
	320	static int get_available_samples(MixContext *s)
	321	{
	322	int i;
	323	int available_samples = INT_MAX;
	324
	325	av_assert0(s->nb_inputs > 1);
	326
	327	for (i = 1; i < s->nb_inputs; i++) {
	328	int nb_samples;
	329	if (s->input_state[i] == INPUT_OFF)
	330	continue;
	331	nb_samples = av_audio_fifo_size(s->fifos[i]);
	332	available_samples = FFMIN(available_samples, nb_samples);
	333	}
	334	if (available_samples == INT_MAX)
	335	return 0;
	336	return available_samples;
	337	}
	338
	339	/**
	340	* Requests a frame, if needed, from each input link other than the first.
	341	*/
	342	static int request_samples(AVFilterContext *ctx, int min_samples)
	343	{
	344	MixContext *s = ctx->priv;
	345	int i, ret;
	346
	347	av_assert0(s->nb_inputs > 1);
	348
	349	for (i = 1; i < s->nb_inputs; i++) {
	350	ret = 0;
	351	if (s->input_state[i] == INPUT_OFF)
	352	continue;
	353	while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
	354	ret = ff_request_frame(ctx->inputs[i]);
	355	if (ret == AVERROR_EOF) {
	356	if (av_audio_fifo_size(s->fifos[i]) == 0) {
	357	s->input_state[i] = INPUT_OFF;
	358	continue;
	359	}
	360	} else if (ret < 0)
	361	return ret;
	362	}
	363	return 0;
	364	}
	365
366	/**
367	* Calculates the number of active inputs and determines EOF based on the
368	* duration option.
369	*
370	* @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
371	*/
372	static int calc_active_inputs(MixContext *s)
373	{
374	int i;
375	int active_inputs = 0;
376	for (i = 0; i < s->nb_inputs; i++)
377	active_inputs += !!(s->input_state[i] != INPUT_OFF);
378	s->active_inputs = active_inputs;
379
380	if (!active_inputs \|\|
381	(s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) \|\|
382	(s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
383	return AVERROR_EOF;
384	return 0;
385	}
386
387	static int request_frame(AVFilterLink *outlink)
388	{
389	AVFilterContext *ctx = outlink->src;
390	MixContext *s = ctx->priv;
391	int ret;
392	int wanted_samples, available_samples;
393
394	ret = calc_active_inputs(s);
395	if (ret < 0)
396	return ret;
397
398	if (s->input_state[0] == INPUT_OFF) {
399	ret = request_samples(ctx, 1);
400	if (ret < 0)
401	return ret;
402
403	ret = calc_active_inputs(s);
404	if (ret < 0)
405	return ret;
406
407	available_samples = get_available_samples(s);
408	if (!available_samples)
409	return AVERROR(EAGAIN);
410
411	return output_frame(outlink, available_samples);
412	}
413
414	if (s->frame_list->nb_frames == 0) {
415	ret = ff_request_frame(ctx->inputs[0]);
416	if (ret == AVERROR_EOF) {
417	s->input_state[0] = INPUT_OFF;
418	if (s->nb_inputs == 1)
419	return AVERROR_EOF;
420	else
421	return AVERROR(EAGAIN);
422	} else if (ret < 0)
423	return ret;
424	}
425	av_assert0(s->frame_list->nb_frames > 0);
426
427	wanted_samples = frame_list_next_frame_size(s->frame_list);
428
429	if (s->active_inputs > 1) {
430	ret = request_samples(ctx, wanted_samples);
431	if (ret < 0)
432	return ret;
433
434	ret = calc_active_inputs(s);
435	if (ret < 0)
436	return ret;
437	}
438
439	if (s->active_inputs > 1) {
440	available_samples = get_available_samples(s);
441	if (!available_samples)
442	return AVERROR(EAGAIN);
443	available_samples = FFMIN(available_samples, wanted_samples);
444	} else {
445	available_samples = wanted_samples;
446	}
447
448	s->next_pts = frame_list_next_pts(s->frame_list);
449	frame_list_remove_samples(s->frame_list, available_samples);
450
451	return output_frame(outlink, available_samples);
452	}
453
454	static int filter_frame(AVFilterLink inlink, AVFrame buf)
455	{
456	AVFilterContext *ctx = inlink->dst;
457	MixContext *s = ctx->priv;
458	AVFilterLink *outlink = ctx->outputs[0];
459	int i, ret = 0;
460
461	for (i = 0; i < ctx->nb_inputs; i++)
462	if (ctx->inputs[i] == inlink)
463	break;
464	if (i >= ctx->nb_inputs) {
465	av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
466	ret = AVERROR(EINVAL);
467	goto fail;
468	}
469
470	if (i == 0) {
471	int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
472	outlink->time_base);
473	ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
474	if (ret < 0)
475	goto fail;
476	}
477
478	ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
479	buf->nb_samples);
480
481	fail:
482	av_frame_free(&buf);
483
484	return ret;
485	}
486
487	static av_cold int init(AVFilterContext *ctx)
488	{
489	MixContext *s = ctx->priv;
490	int i;
491
492	for (i = 0; i < s->nb_inputs; i++) {
493	char name[32];
494	AVFilterPad pad = { 0 };
495
496	snprintf(name, sizeof(name), "input%d", i);
497	pad.type = AVMEDIA_TYPE_AUDIO;
498	pad.name = av_strdup(name);
0e279ba6 DM	499	if (!pad.name)
0e279ba6 DM	500	return AVERROR(ENOMEM);
2ba45a60 DM	501	pad.filter_frame = filter_frame;
	502
	503	ff_insert_inpad(ctx, i, &pad);
	504	}
	505
f6fa7814 DM	506	s->fdsp = avpriv_float_dsp_alloc(0);
	507	if (!s->fdsp)
	508	return AVERROR(ENOMEM);
2ba45a60 DM	509
	510	return 0;
	511	}
	512
	513	static av_cold void uninit(AVFilterContext *ctx)
	514	{
	515	int i;
	516	MixContext *s = ctx->priv;
	517
	518	if (s->fifos) {
	519	for (i = 0; i < s->nb_inputs; i++)
	520	av_audio_fifo_free(s->fifos[i]);
	521	av_freep(&s->fifos);
	522	}
	523	frame_list_clear(s->frame_list);
	524	av_freep(&s->frame_list);
	525	av_freep(&s->input_state);
	526	av_freep(&s->input_scale);
f6fa7814	527	av_freep(&s->fdsp);
2ba45a60 DM	528
	529	for (i = 0; i < ctx->nb_inputs; i++)
	530	av_freep(&ctx->input_pads[i].name);
	531	}
	532
	533	static int query_formats(AVFilterContext *ctx)
	534	{
	535	AVFilterFormats *formats = NULL;
f6fa7814 DM	536	AVFilterChannelLayouts *layouts;
	537
	538	layouts = ff_all_channel_layouts();
	539
	540	if (!layouts)
	541	return AVERROR(ENOMEM);
	542
2ba45a60 DM	543	ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
	544	ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
	545	ff_set_common_formats(ctx, formats);
f6fa7814	546	ff_set_common_channel_layouts(ctx, layouts);
2ba45a60 DM	547	ff_set_common_samplerates(ctx, ff_all_samplerates());
	548	return 0;
	549	}
	550
	551	static const AVFilterPad avfilter_af_amix_outputs[] = {
	552	{
	553	.name = "default",
	554	.type = AVMEDIA_TYPE_AUDIO,
	555	.config_props = config_output,
	556	.request_frame = request_frame
	557	},
	558	{ NULL }
	559	};
	560
	561	AVFilter ff_af_amix = {
	562	.name = "amix",
	563	.description = NULL_IF_CONFIG_SMALL("Audio mixing."),
	564	.priv_size = sizeof(MixContext),
	565	.priv_class = &amix_class,
	566	.init = init,
	567	.uninit = uninit,
	568	.query_formats = query_formats,
	569	.inputs = NULL,
	570	.outputs = avfilter_af_amix_outputs,
	571	.flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
	572	};