Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2012 Nicolas George | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public License | |
8 | * as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public License | |
17 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., | |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/channel_layout.h" | |
22 | #include "libavutil/avassert.h" | |
23 | #include "audio.h" | |
24 | #include "avfilter.h" | |
25 | #include "internal.h" | |
26 | ||
27 | typedef struct { | |
28 | /** | |
29 | * Number of samples at each PCM value. | |
30 | * histogram[0x8000 + i] is the number of samples at value i. | |
31 | * The extra element is there for symmetry. | |
32 | */ | |
33 | uint64_t histogram[0x10001]; | |
34 | } VolDetectContext; | |
35 | ||
36 | static int query_formats(AVFilterContext *ctx) | |
37 | { | |
38 | static const enum AVSampleFormat sample_fmts[] = { | |
39 | AV_SAMPLE_FMT_S16, | |
40 | AV_SAMPLE_FMT_S16P, | |
41 | AV_SAMPLE_FMT_NONE | |
42 | }; | |
43 | AVFilterFormats *formats; | |
44 | ||
45 | if (!(formats = ff_make_format_list(sample_fmts))) | |
46 | return AVERROR(ENOMEM); | |
47 | ff_set_common_formats(ctx, formats); | |
48 | ||
49 | return 0; | |
50 | } | |
51 | ||
52 | static int filter_frame(AVFilterLink *inlink, AVFrame *samples) | |
53 | { | |
54 | AVFilterContext *ctx = inlink->dst; | |
55 | VolDetectContext *vd = ctx->priv; | |
56 | int64_t layout = samples->channel_layout; | |
57 | int nb_samples = samples->nb_samples; | |
58 | int nb_channels = av_get_channel_layout_nb_channels(layout); | |
59 | int nb_planes = nb_channels; | |
60 | int plane, i; | |
61 | int16_t *pcm; | |
62 | ||
63 | if (!av_sample_fmt_is_planar(samples->format)) { | |
64 | nb_samples *= nb_channels; | |
65 | nb_planes = 1; | |
66 | } | |
67 | for (plane = 0; plane < nb_planes; plane++) { | |
68 | pcm = (int16_t *)samples->extended_data[plane]; | |
69 | for (i = 0; i < nb_samples; i++) | |
70 | vd->histogram[pcm[i] + 0x8000]++; | |
71 | } | |
72 | ||
73 | return ff_filter_frame(inlink->dst->outputs[0], samples); | |
74 | } | |
75 | ||
76 | #define MAX_DB 91 | |
77 | ||
78 | static inline double logdb(uint64_t v) | |
79 | { | |
80 | double d = v / (double)(0x8000 * 0x8000); | |
81 | if (!v) | |
82 | return MAX_DB; | |
83 | return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */ | |
84 | } | |
85 | ||
86 | static void print_stats(AVFilterContext *ctx) | |
87 | { | |
88 | VolDetectContext *vd = ctx->priv; | |
89 | int i, max_volume, shift; | |
90 | uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; | |
91 | uint64_t histdb[MAX_DB + 1] = { 0 }; | |
92 | ||
93 | for (i = 0; i < 0x10000; i++) | |
94 | nb_samples += vd->histogram[i]; | |
95 | av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); | |
96 | if (!nb_samples) | |
97 | return; | |
98 | ||
99 | /* If nb_samples > 1<<34, there is a risk of overflow in the | |
100 | multiplication or the sum: shift all histogram values to avoid that. | |
101 | The total number of samples must be recomputed to avoid rounding | |
102 | errors. */ | |
103 | shift = av_log2(nb_samples >> 33); | |
104 | for (i = 0; i < 0x10000; i++) { | |
105 | nb_samples_shift += vd->histogram[i] >> shift; | |
106 | power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); | |
107 | } | |
108 | if (!nb_samples_shift) | |
109 | return; | |
110 | power = (power + nb_samples_shift / 2) / nb_samples_shift; | |
111 | av_assert0(power <= 0x8000 * 0x8000); | |
112 | av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); | |
113 | ||
114 | max_volume = 0x8000; | |
115 | while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && | |
116 | !vd->histogram[0x8000 - max_volume]) | |
117 | max_volume--; | |
118 | av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); | |
119 | ||
120 | for (i = 0; i < 0x10000; i++) | |
121 | histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; | |
122 | for (i = 0; i <= MAX_DB && !histdb[i]; i++); | |
123 | for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { | |
124 | av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); | |
125 | sum += histdb[i]; | |
126 | } | |
127 | } | |
128 | ||
129 | static av_cold void uninit(AVFilterContext *ctx) | |
130 | { | |
131 | print_stats(ctx); | |
132 | } | |
133 | ||
134 | static const AVFilterPad volumedetect_inputs[] = { | |
135 | { | |
136 | .name = "default", | |
137 | .type = AVMEDIA_TYPE_AUDIO, | |
138 | .filter_frame = filter_frame, | |
139 | }, | |
140 | { NULL } | |
141 | }; | |
142 | ||
143 | static const AVFilterPad volumedetect_outputs[] = { | |
144 | { | |
145 | .name = "default", | |
146 | .type = AVMEDIA_TYPE_AUDIO, | |
147 | }, | |
148 | { NULL } | |
149 | }; | |
150 | ||
151 | AVFilter ff_af_volumedetect = { | |
152 | .name = "volumedetect", | |
153 | .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), | |
154 | .priv_size = sizeof(VolDetectContext), | |
155 | .query_formats = query_formats, | |
156 | .uninit = uninit, | |
157 | .inputs = volumedetect_inputs, | |
158 | .outputs = volumedetect_outputs, | |
159 | }; |