[deb_ffmpeg.git] / ffmpeg / libavfilter / af_volumedetect.c

/*
 * Copyright (c) 2012 Nicolas George
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/channel_layout.h"
#include "libavutil/avassert.h"
#include "audio.h"
#include "avfilter.h"
#include "internal.h"

typedef struct {
    /**
     * Number of samples at each PCM value.
     * histogram[0x8000 + i] is the number of samples at value i.
     * The extra element is there for symmetry.
     */
    uint64_t histogram[0x10001];
} VolDetectContext;

static int query_formats(AVFilterContext *ctx)
{
    static const enum AVSampleFormat sample_fmts[] = {
        AV_SAMPLE_FMT_S16,
        AV_SAMPLE_FMT_S16P,
        AV_SAMPLE_FMT_NONE
    };
    AVFilterFormats *formats;

    if (!(formats = ff_make_format_list(sample_fmts)))
        return AVERROR(ENOMEM);
    ff_set_common_formats(ctx, formats);

    return 0;
}

static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
{
    AVFilterContext *ctx = inlink->dst;
    VolDetectContext *vd = ctx->priv;
    int64_t layout  = samples->channel_layout;
    int nb_samples  = samples->nb_samples;
    int nb_channels = av_get_channel_layout_nb_channels(layout);
    int nb_planes   = nb_channels;
    int plane, i;
    int16_t *pcm;

    if (!av_sample_fmt_is_planar(samples->format)) {
        nb_samples *= nb_channels;
        nb_planes = 1;
    }
    for (plane = 0; plane < nb_planes; plane++) {
        pcm = (int16_t *)samples->extended_data[plane];
        for (i = 0; i < nb_samples; i++)
            vd->histogram[pcm[i] + 0x8000]++;
    }

    return ff_filter_frame(inlink->dst->outputs[0], samples);
}

#define MAX_DB 91

static inline double logdb(uint64_t v)
{
    double d = v / (double)(0x8000 * 0x8000);
    if (!v)
        return MAX_DB;
    return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
}

static void print_stats(AVFilterContext *ctx)
{
    VolDetectContext *vd = ctx->priv;
    int i, max_volume, shift;
    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
    uint64_t histdb[MAX_DB + 1] = { 0 };

    for (i = 0; i < 0x10000; i++)
        nb_samples += vd->histogram[i];
    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
    if (!nb_samples)
        return;

    /* If nb_samples > 1<<34, there is a risk of overflow in the
       multiplication or the sum: shift all histogram values to avoid that.
       The total number of samples must be recomputed to avoid rounding
       errors. */
    shift = av_log2(nb_samples >> 33);
    for (i = 0; i < 0x10000; i++) {
        nb_samples_shift += vd->histogram[i] >> shift;
        power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
    }
    if (!nb_samples_shift)
        return;
    power = (power + nb_samples_shift / 2) / nb_samples_shift;
    av_assert0(power <= 0x8000 * 0x8000);
    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));

    max_volume = 0x8000;
    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
                             !vd->histogram[0x8000 - max_volume])
        max_volume--;
    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));

    for (i = 0; i < 0x10000; i++)
        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
        sum += histdb[i];
    }
}

static av_cold void uninit(AVFilterContext *ctx)
{
    print_stats(ctx);
}

static const AVFilterPad volumedetect_inputs[] = {
    {
        .name         = "default",
        .type         = AVMEDIA_TYPE_AUDIO,
        .filter_frame = filter_frame,
    },
    { NULL }
};

static const AVFilterPad volumedetect_outputs[] = {
    {
        .name = "default",
        .type = AVMEDIA_TYPE_AUDIO,
    },
    { NULL }
};

AVFilter ff_af_volumedetect = {
    .name          = "volumedetect",
    .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
    .priv_size     = sizeof(VolDetectContext),
    .query_formats = query_formats,
    .uninit        = uninit,
    .inputs        = volumedetect_inputs,
    .outputs       = volumedetect_outputs,
};
Commit	Line	Data
	1	/*
	2	* Copyright (c) 2012 Nicolas George
	3	*
	4	* This file is part of FFmpeg.
	5	*
	6	* FFmpeg is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Lesser General Public License
	8	* as published by the Free Software Foundation; either
	9	* version 2.1 of the License, or (at your option) any later version.
	10	*
	11	* FFmpeg is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	* GNU Lesser General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Lesser General Public License
	17	* along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
	18	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	19	*/
	20
	21	#include "libavutil/channel_layout.h"
	22	#include "libavutil/avassert.h"
	23	#include "audio.h"
	24	#include "avfilter.h"
	25	#include "internal.h"
	26
	27	typedef struct {
	28	/**
	29	* Number of samples at each PCM value.
	30	* histogram[0x8000 + i] is the number of samples at value i.
	31	* The extra element is there for symmetry.
	32	*/
	33	uint64_t histogram[0x10001];
	34	} VolDetectContext;
	35
	36	static int query_formats(AVFilterContext *ctx)
	37	{
	38	static const enum AVSampleFormat sample_fmts[] = {
	39	AV_SAMPLE_FMT_S16,
	40	AV_SAMPLE_FMT_S16P,
	41	AV_SAMPLE_FMT_NONE
	42	};
	43	AVFilterFormats *formats;
	44
	45	if (!(formats = ff_make_format_list(sample_fmts)))
	46	return AVERROR(ENOMEM);
	47	ff_set_common_formats(ctx, formats);
	48
	49	return 0;
	50	}
	51
	52	static int filter_frame(AVFilterLink inlink, AVFrame samples)
	53	{
	54	AVFilterContext *ctx = inlink->dst;
	55	VolDetectContext *vd = ctx->priv;
	56	int64_t layout = samples->channel_layout;
	57	int nb_samples = samples->nb_samples;
	58	int nb_channels = av_get_channel_layout_nb_channels(layout);
	59	int nb_planes = nb_channels;
	60	int plane, i;
	61	int16_t *pcm;
	62
	63	if (!av_sample_fmt_is_planar(samples->format)) {
	64	nb_samples *= nb_channels;
	65	nb_planes = 1;
	66	}
	67	for (plane = 0; plane < nb_planes; plane++) {
	68	pcm = (int16_t *)samples->extended_data[plane];
	69	for (i = 0; i < nb_samples; i++)
	70	vd->histogram[pcm[i] + 0x8000]++;
	71	}
	72
	73	return ff_filter_frame(inlink->dst->outputs[0], samples);
	74	}
	75
	76	#define MAX_DB 91
	77
	78	static inline double logdb(uint64_t v)
	79	{
	80	double d = v / (double)(0x8000 * 0x8000);
	81	if (!v)
	82	return MAX_DB;
	83	return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
	84	}
	85
	86	static void print_stats(AVFilterContext *ctx)
	87	{
	88	VolDetectContext *vd = ctx->priv;
	89	int i, max_volume, shift;
	90	uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
	91	uint64_t histdb[MAX_DB + 1] = { 0 };
	92
	93	for (i = 0; i < 0x10000; i++)
	94	nb_samples += vd->histogram[i];
	95	av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
	96	if (!nb_samples)
	97	return;
	98
	99	/* If nb_samples > 1<<34, there is a risk of overflow in the
	100	multiplication or the sum: shift all histogram values to avoid that.
	101	The total number of samples must be recomputed to avoid rounding
	102	errors. */
	103	shift = av_log2(nb_samples >> 33);
	104	for (i = 0; i < 0x10000; i++) {
	105	nb_samples_shift += vd->histogram[i] >> shift;
	106	power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
	107	}
	108	if (!nb_samples_shift)
	109	return;
	110	power = (power + nb_samples_shift / 2) / nb_samples_shift;
	111	av_assert0(power <= 0x8000 * 0x8000);
	112	av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
	113
	114	max_volume = 0x8000;
	115	while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
	116	!vd->histogram[0x8000 - max_volume])
	117	max_volume--;
	118	av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
	119
	120	for (i = 0; i < 0x10000; i++)
	121	histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
	122	for (i = 0; i <= MAX_DB && !histdb[i]; i++);
	123	for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
	124	av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
	125	sum += histdb[i];
	126	}
	127	}
	128
	129	static av_cold void uninit(AVFilterContext *ctx)
	130	{
	131	print_stats(ctx);
	132	}
	133
	134	static const AVFilterPad volumedetect_inputs[] = {
	135	{
	136	.name = "default",
	137	.type = AVMEDIA_TYPE_AUDIO,
	138	.filter_frame = filter_frame,
	139	},
	140	{ NULL }
	141	};
	142
	143	static const AVFilterPad volumedetect_outputs[] = {
	144	{
	145	.name = "default",
	146	.type = AVMEDIA_TYPE_AUDIO,
	147	},
	148	{ NULL }
	149	};
	150
	151	AVFilter ff_af_volumedetect = {
	152	.name = "volumedetect",
	153	.description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
	154	.priv_size = sizeof(VolDetectContext),
	155	.query_formats = query_formats,
	156	.uninit = uninit,
	157	.inputs = volumedetect_inputs,
	158	.outputs = volumedetect_outputs,
	159	};