2 * Copyright (c) 2012 Nicolas George
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public License
8 * as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/channel_layout.h"
22 #include "libavutil/avassert.h"
29 * Number of samples at each PCM value.
30 * histogram[0x8000 + i] is the number of samples at value i.
31 * The extra element is there for symmetry.
33 uint64_t histogram
[0x10001];
36 static int query_formats(AVFilterContext
*ctx
)
38 static const enum AVSampleFormat sample_fmts
[] = {
43 AVFilterFormats
*formats
;
45 if (!(formats
= ff_make_format_list(sample_fmts
)))
46 return AVERROR(ENOMEM
);
47 ff_set_common_formats(ctx
, formats
);
52 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*samples
)
54 AVFilterContext
*ctx
= inlink
->dst
;
55 VolDetectContext
*vd
= ctx
->priv
;
56 int64_t layout
= samples
->channel_layout
;
57 int nb_samples
= samples
->nb_samples
;
58 int nb_channels
= av_get_channel_layout_nb_channels(layout
);
59 int nb_planes
= nb_channels
;
63 if (!av_sample_fmt_is_planar(samples
->format
)) {
64 nb_samples
*= nb_channels
;
67 for (plane
= 0; plane
< nb_planes
; plane
++) {
68 pcm
= (int16_t *)samples
->extended_data
[plane
];
69 for (i
= 0; i
< nb_samples
; i
++)
70 vd
->histogram
[pcm
[i
] + 0x8000]++;
73 return ff_filter_frame(inlink
->dst
->outputs
[0], samples
);
78 static inline double logdb(uint64_t v
)
80 double d
= v
/ (double)(0x8000 * 0x8000);
83 return log(d
) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
86 static void print_stats(AVFilterContext
*ctx
)
88 VolDetectContext
*vd
= ctx
->priv
;
89 int i
, max_volume
, shift
;
90 uint64_t nb_samples
= 0, power
= 0, nb_samples_shift
= 0, sum
= 0;
91 uint64_t histdb
[MAX_DB
+ 1] = { 0 };
93 for (i
= 0; i
< 0x10000; i
++)
94 nb_samples
+= vd
->histogram
[i
];
95 av_log(ctx
, AV_LOG_INFO
, "n_samples: %"PRId64
"\n", nb_samples
);
99 /* If nb_samples > 1<<34, there is a risk of overflow in the
100 multiplication or the sum: shift all histogram values to avoid that.
101 The total number of samples must be recomputed to avoid rounding
103 shift
= av_log2(nb_samples
>> 33);
104 for (i
= 0; i
< 0x10000; i
++) {
105 nb_samples_shift
+= vd
->histogram
[i
] >> shift
;
106 power
+= (i
- 0x8000) * (i
- 0x8000) * (vd
->histogram
[i
] >> shift
);
108 if (!nb_samples_shift
)
110 power
= (power
+ nb_samples_shift
/ 2) / nb_samples_shift
;
111 av_assert0(power
<= 0x8000 * 0x8000);
112 av_log(ctx
, AV_LOG_INFO
, "mean_volume: %.1f dB\n", -logdb(power
));
115 while (max_volume
> 0 && !vd
->histogram
[0x8000 + max_volume
] &&
116 !vd
->histogram
[0x8000 - max_volume
])
118 av_log(ctx
, AV_LOG_INFO
, "max_volume: %.1f dB\n", -logdb(max_volume
* max_volume
));
120 for (i
= 0; i
< 0x10000; i
++)
121 histdb
[(int)logdb((i
- 0x8000) * (i
- 0x8000))] += vd
->histogram
[i
];
122 for (i
= 0; i
<= MAX_DB
&& !histdb
[i
]; i
++);
123 for (; i
<= MAX_DB
&& sum
< nb_samples
/ 1000; i
++) {
124 av_log(ctx
, AV_LOG_INFO
, "histogram_%ddb: %"PRId64
"\n", i
, histdb
[i
]);
129 static av_cold
void uninit(AVFilterContext
*ctx
)
134 static const AVFilterPad volumedetect_inputs
[] = {
137 .type
= AVMEDIA_TYPE_AUDIO
,
138 .filter_frame
= filter_frame
,
143 static const AVFilterPad volumedetect_outputs
[] = {
146 .type
= AVMEDIA_TYPE_AUDIO
,
151 AVFilter ff_af_volumedetect
= {
152 .name
= "volumedetect",
153 .description
= NULL_IF_CONFIG_SMALL("Detect audio volume."),
154 .priv_size
= sizeof(VolDetectContext
),
155 .query_formats
= query_formats
,
157 .inputs
= volumedetect_inputs
,
158 .outputs
= volumedetect_outputs
,