| 1 | /* |
| 2 | * Copyright (c) 2012 Nicolas George |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public License |
| 8 | * as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * GNU Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public License |
| 17 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., |
| 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "libavutil/channel_layout.h" |
| 22 | #include "libavutil/avassert.h" |
| 23 | #include "audio.h" |
| 24 | #include "avfilter.h" |
| 25 | #include "internal.h" |
| 26 | |
| 27 | typedef struct { |
| 28 | /** |
| 29 | * Number of samples at each PCM value. |
| 30 | * histogram[0x8000 + i] is the number of samples at value i. |
| 31 | * The extra element is there for symmetry. |
| 32 | */ |
| 33 | uint64_t histogram[0x10001]; |
| 34 | } VolDetectContext; |
| 35 | |
| 36 | static int query_formats(AVFilterContext *ctx) |
| 37 | { |
| 38 | static const enum AVSampleFormat sample_fmts[] = { |
| 39 | AV_SAMPLE_FMT_S16, |
| 40 | AV_SAMPLE_FMT_S16P, |
| 41 | AV_SAMPLE_FMT_NONE |
| 42 | }; |
| 43 | AVFilterFormats *formats; |
| 44 | |
| 45 | if (!(formats = ff_make_format_list(sample_fmts))) |
| 46 | return AVERROR(ENOMEM); |
| 47 | ff_set_common_formats(ctx, formats); |
| 48 | |
| 49 | return 0; |
| 50 | } |
| 51 | |
| 52 | static int filter_frame(AVFilterLink *inlink, AVFrame *samples) |
| 53 | { |
| 54 | AVFilterContext *ctx = inlink->dst; |
| 55 | VolDetectContext *vd = ctx->priv; |
| 56 | int64_t layout = samples->channel_layout; |
| 57 | int nb_samples = samples->nb_samples; |
| 58 | int nb_channels = av_get_channel_layout_nb_channels(layout); |
| 59 | int nb_planes = nb_channels; |
| 60 | int plane, i; |
| 61 | int16_t *pcm; |
| 62 | |
| 63 | if (!av_sample_fmt_is_planar(samples->format)) { |
| 64 | nb_samples *= nb_channels; |
| 65 | nb_planes = 1; |
| 66 | } |
| 67 | for (plane = 0; plane < nb_planes; plane++) { |
| 68 | pcm = (int16_t *)samples->extended_data[plane]; |
| 69 | for (i = 0; i < nb_samples; i++) |
| 70 | vd->histogram[pcm[i] + 0x8000]++; |
| 71 | } |
| 72 | |
| 73 | return ff_filter_frame(inlink->dst->outputs[0], samples); |
| 74 | } |
| 75 | |
| 76 | #define MAX_DB 91 |
| 77 | |
| 78 | static inline double logdb(uint64_t v) |
| 79 | { |
| 80 | double d = v / (double)(0x8000 * 0x8000); |
| 81 | if (!v) |
| 82 | return MAX_DB; |
| 83 | return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */ |
| 84 | } |
| 85 | |
| 86 | static void print_stats(AVFilterContext *ctx) |
| 87 | { |
| 88 | VolDetectContext *vd = ctx->priv; |
| 89 | int i, max_volume, shift; |
| 90 | uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; |
| 91 | uint64_t histdb[MAX_DB + 1] = { 0 }; |
| 92 | |
| 93 | for (i = 0; i < 0x10000; i++) |
| 94 | nb_samples += vd->histogram[i]; |
| 95 | av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); |
| 96 | if (!nb_samples) |
| 97 | return; |
| 98 | |
| 99 | /* If nb_samples > 1<<34, there is a risk of overflow in the |
| 100 | multiplication or the sum: shift all histogram values to avoid that. |
| 101 | The total number of samples must be recomputed to avoid rounding |
| 102 | errors. */ |
| 103 | shift = av_log2(nb_samples >> 33); |
| 104 | for (i = 0; i < 0x10000; i++) { |
| 105 | nb_samples_shift += vd->histogram[i] >> shift; |
| 106 | power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); |
| 107 | } |
| 108 | if (!nb_samples_shift) |
| 109 | return; |
| 110 | power = (power + nb_samples_shift / 2) / nb_samples_shift; |
| 111 | av_assert0(power <= 0x8000 * 0x8000); |
| 112 | av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); |
| 113 | |
| 114 | max_volume = 0x8000; |
| 115 | while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && |
| 116 | !vd->histogram[0x8000 - max_volume]) |
| 117 | max_volume--; |
| 118 | av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); |
| 119 | |
| 120 | for (i = 0; i < 0x10000; i++) |
| 121 | histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; |
| 122 | for (i = 0; i <= MAX_DB && !histdb[i]; i++); |
| 123 | for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { |
| 124 | av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); |
| 125 | sum += histdb[i]; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | static av_cold void uninit(AVFilterContext *ctx) |
| 130 | { |
| 131 | print_stats(ctx); |
| 132 | } |
| 133 | |
| 134 | static const AVFilterPad volumedetect_inputs[] = { |
| 135 | { |
| 136 | .name = "default", |
| 137 | .type = AVMEDIA_TYPE_AUDIO, |
| 138 | .filter_frame = filter_frame, |
| 139 | }, |
| 140 | { NULL } |
| 141 | }; |
| 142 | |
| 143 | static const AVFilterPad volumedetect_outputs[] = { |
| 144 | { |
| 145 | .name = "default", |
| 146 | .type = AVMEDIA_TYPE_AUDIO, |
| 147 | }, |
| 148 | { NULL } |
| 149 | }; |
| 150 | |
| 151 | AVFilter ff_af_volumedetect = { |
| 152 | .name = "volumedetect", |
| 153 | .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), |
| 154 | .priv_size = sizeof(VolDetectContext), |
| 155 | .query_formats = query_formats, |
| 156 | .uninit = uninit, |
| 157 | .inputs = volumedetect_inputs, |
| 158 | .outputs = volumedetect_outputs, |
| 159 | }; |