2 * Copyright (c) 2001 Heikki Leinonen
3 * Copyright (c) 2001 Chris Bagwell
4 * Copyright (c) 2003 Donnie Smith
5 * Copyright (c) 2014 Paul B Mahol
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include <float.h> /* DBL_MAX */
26 #include "libavutil/opt.h"
27 #include "libavutil/timestamp.h"
41 typedef struct SilenceRemoveContext
{
44 enum SilenceMode mode
;
47 int64_t start_duration
;
48 double start_threshold
;
51 int64_t stop_duration
;
52 double stop_threshold
;
54 double *start_holdoff
;
55 size_t start_holdoff_offset
;
56 size_t start_holdoff_end
;
57 int start_found_periods
;
60 size_t stop_holdoff_offset
;
61 size_t stop_holdoff_end
;
62 int stop_found_periods
;
65 double *window_current
;
73 } SilenceRemoveContext
;
75 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
76 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
77 static const AVOption silenceremove_options
[] = {
78 { "start_periods", NULL
, OFFSET(start_periods
), AV_OPT_TYPE_INT
, {.i64
=0}, 0, 9000, FLAGS
},
79 { "start_duration", NULL
, OFFSET(start_duration
), AV_OPT_TYPE_DURATION
, {.i64
=0}, 0, 9000, FLAGS
},
80 { "start_threshold", NULL
, OFFSET(start_threshold
), AV_OPT_TYPE_DOUBLE
, {.dbl
=0}, 0, DBL_MAX
, FLAGS
},
81 { "stop_periods", NULL
, OFFSET(stop_periods
), AV_OPT_TYPE_INT
, {.i64
=0}, -9000, 9000, FLAGS
},
82 { "stop_duration", NULL
, OFFSET(stop_duration
), AV_OPT_TYPE_DURATION
, {.i64
=0}, 0, 9000, FLAGS
},
83 { "stop_threshold", NULL
, OFFSET(stop_threshold
), AV_OPT_TYPE_DOUBLE
, {.dbl
=0}, 0, DBL_MAX
, FLAGS
},
84 { "leave_silence", NULL
, OFFSET(leave_silence
), AV_OPT_TYPE_INT
, {.i64
=0}, 0, 1, FLAGS
},
88 AVFILTER_DEFINE_CLASS(silenceremove
);
90 static av_cold
int init(AVFilterContext
*ctx
)
92 SilenceRemoveContext
*s
= ctx
->priv
;
94 if (s
->stop_periods
< 0) {
95 s
->stop_periods
= -s
->stop_periods
;
102 static void clear_rms(SilenceRemoveContext
*s
)
104 memset(s
->window
, 0, s
->window_size
* sizeof(*s
->window
));
106 s
->window_current
= s
->window
;
107 s
->window_end
= s
->window
+ s
->window_size
;
111 static int config_input(AVFilterLink
*inlink
)
113 AVFilterContext
*ctx
= inlink
->dst
;
114 SilenceRemoveContext
*s
= ctx
->priv
;
116 s
->window_size
= (inlink
->sample_rate
/ 50) * inlink
->channels
;
117 s
->window
= av_malloc_array(s
->window_size
, sizeof(*s
->window
));
119 return AVERROR(ENOMEM
);
123 s
->start_duration
= av_rescale(s
->start_duration
, inlink
->sample_rate
,
125 s
->stop_duration
= av_rescale(s
->stop_duration
, inlink
->sample_rate
,
128 s
->start_holdoff
= av_malloc_array(FFMAX(s
->start_duration
, 1),
129 sizeof(*s
->start_holdoff
) *
131 if (!s
->start_holdoff
)
132 return AVERROR(ENOMEM
);
134 s
->start_holdoff_offset
= 0;
135 s
->start_holdoff_end
= 0;
136 s
->start_found_periods
= 0;
138 s
->stop_holdoff
= av_malloc_array(FFMAX(s
->stop_duration
, 1),
139 sizeof(*s
->stop_holdoff
) *
141 if (!s
->stop_holdoff
)
142 return AVERROR(ENOMEM
);
144 s
->stop_holdoff_offset
= 0;
145 s
->stop_holdoff_end
= 0;
146 s
->stop_found_periods
= 0;
148 if (s
->start_periods
)
149 s
->mode
= SILENCE_TRIM
;
151 s
->mode
= SILENCE_COPY
;
156 static int config_output(AVFilterLink
*outlink
)
158 outlink
->flags
|= FF_LINK_FLAG_REQUEST_LOOP
;
163 static double compute_rms(SilenceRemoveContext
*s
, double sample
)
167 new_sum
= s
->rms_sum
;
168 new_sum
-= *s
->window_current
;
169 new_sum
+= sample
* sample
;
171 return sqrt(new_sum
/ s
->window_size
);
174 static void update_rms(SilenceRemoveContext
*s
, double sample
)
176 s
->rms_sum
-= *s
->window_current
;
177 *s
->window_current
= sample
* sample
;
178 s
->rms_sum
+= *s
->window_current
;
181 if (s
->window_current
>= s
->window_end
)
182 s
->window_current
= s
->window
;
185 static void flush(AVFrame
*out
, AVFilterLink
*outlink
,
186 int *nb_samples_written
, int *ret
)
188 if (*nb_samples_written
) {
189 out
->nb_samples
= *nb_samples_written
/ outlink
->channels
;
190 *ret
= ff_filter_frame(outlink
, out
);
191 *nb_samples_written
= 0;
197 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
199 AVFilterContext
*ctx
= inlink
->dst
;
200 AVFilterLink
*outlink
= ctx
->outputs
[0];
201 SilenceRemoveContext
*s
= ctx
->priv
;
202 int i
, j
, threshold
, ret
= 0;
203 int nbs
, nb_samples_read
, nb_samples_written
;
204 double *obuf
, *ibuf
= (double *)in
->data
[0];
207 nb_samples_read
= nb_samples_written
= 0;
212 nbs
= in
->nb_samples
- nb_samples_read
/ inlink
->channels
;
216 for (i
= 0; i
< nbs
; i
++) {
218 for (j
= 0; j
< inlink
->channels
; j
++) {
219 threshold
|= compute_rms(s
, ibuf
[j
]) > s
->start_threshold
;
223 for (j
= 0; j
< inlink
->channels
; j
++) {
224 update_rms(s
, *ibuf
);
225 s
->start_holdoff
[s
->start_holdoff_end
++] = *ibuf
++;
229 if (s
->start_holdoff_end
>= s
->start_duration
* inlink
->channels
) {
230 if (++s
->start_found_periods
>= s
->start_periods
) {
231 s
->mode
= SILENCE_TRIM_FLUSH
;
232 goto silence_trim_flush
;
235 s
->start_holdoff_offset
= 0;
236 s
->start_holdoff_end
= 0;
239 s
->start_holdoff_end
= 0;
241 for (j
= 0; j
< inlink
->channels
; j
++)
242 update_rms(s
, ibuf
[j
]);
244 ibuf
+= inlink
->channels
;
245 nb_samples_read
+= inlink
->channels
;
250 case SILENCE_TRIM_FLUSH
:
252 nbs
= s
->start_holdoff_end
- s
->start_holdoff_offset
;
253 nbs
-= nbs
% inlink
->channels
;
257 out
= ff_get_audio_buffer(inlink
, nbs
/ inlink
->channels
);
260 return AVERROR(ENOMEM
);
263 memcpy(out
->data
[0], &s
->start_holdoff
[s
->start_holdoff_offset
],
264 nbs
* sizeof(double));
265 s
->start_holdoff_offset
+= nbs
;
267 ret
= ff_filter_frame(outlink
, out
);
269 if (s
->start_holdoff_offset
== s
->start_holdoff_end
) {
270 s
->start_holdoff_offset
= 0;
271 s
->start_holdoff_end
= 0;
272 s
->mode
= SILENCE_COPY
;
279 nbs
= in
->nb_samples
- nb_samples_read
/ inlink
->channels
;
283 out
= ff_get_audio_buffer(inlink
, nbs
);
286 return AVERROR(ENOMEM
);
288 obuf
= (double *)out
->data
[0];
290 if (s
->stop_periods
) {
291 for (i
= 0; i
< nbs
; i
++) {
293 for (j
= 0; j
< inlink
->channels
; j
++)
294 threshold
&= compute_rms(s
, ibuf
[j
]) > s
->stop_threshold
;
296 if (threshold
&& s
->stop_holdoff_end
&& !s
->leave_silence
) {
297 s
->mode
= SILENCE_COPY_FLUSH
;
298 flush(out
, outlink
, &nb_samples_written
, &ret
);
299 goto silence_copy_flush
;
300 } else if (threshold
) {
301 for (j
= 0; j
< inlink
->channels
; j
++) {
302 update_rms(s
, *ibuf
);
305 nb_samples_written
++;
307 } else if (!threshold
) {
308 for (j
= 0; j
< inlink
->channels
; j
++) {
309 update_rms(s
, *ibuf
);
310 if (s
->leave_silence
) {
312 nb_samples_written
++;
315 s
->stop_holdoff
[s
->stop_holdoff_end
++] = *ibuf
++;
319 if (s
->stop_holdoff_end
>= s
->stop_duration
* inlink
->channels
) {
320 if (++s
->stop_found_periods
>= s
->stop_periods
) {
321 s
->stop_holdoff_offset
= 0;
322 s
->stop_holdoff_end
= 0;
325 s
->mode
= SILENCE_STOP
;
326 flush(out
, outlink
, &nb_samples_written
, &ret
);
329 s
->stop_found_periods
= 0;
330 s
->start_found_periods
= 0;
331 s
->start_holdoff_offset
= 0;
332 s
->start_holdoff_end
= 0;
334 s
->mode
= SILENCE_TRIM
;
335 flush(out
, outlink
, &nb_samples_written
, &ret
);
339 s
->mode
= SILENCE_COPY_FLUSH
;
340 flush(out
, outlink
, &nb_samples_written
, &ret
);
341 goto silence_copy_flush
;
345 flush(out
, outlink
, &nb_samples_written
, &ret
);
347 memcpy(obuf
, ibuf
, sizeof(double) * nbs
* inlink
->channels
);
348 ret
= ff_filter_frame(outlink
, out
);
352 case SILENCE_COPY_FLUSH
:
354 nbs
= s
->stop_holdoff_end
- s
->stop_holdoff_offset
;
355 nbs
-= nbs
% inlink
->channels
;
359 out
= ff_get_audio_buffer(inlink
, nbs
/ inlink
->channels
);
362 return AVERROR(ENOMEM
);
365 memcpy(out
->data
[0], &s
->stop_holdoff
[s
->stop_holdoff_offset
],
366 nbs
* sizeof(double));
367 s
->stop_holdoff_offset
+= nbs
;
369 ret
= ff_filter_frame(outlink
, out
);
371 if (s
->stop_holdoff_offset
== s
->stop_holdoff_end
) {
372 s
->stop_holdoff_offset
= 0;
373 s
->stop_holdoff_end
= 0;
374 s
->mode
= SILENCE_COPY
;
388 static int request_frame(AVFilterLink
*outlink
)
390 AVFilterContext
*ctx
= outlink
->src
;
391 SilenceRemoveContext
*s
= ctx
->priv
;
394 ret
= ff_request_frame(ctx
->inputs
[0]);
395 if (ret
== AVERROR_EOF
&& (s
->mode
== SILENCE_COPY_FLUSH
||
396 s
->mode
== SILENCE_COPY
)) {
397 int nbs
= s
->stop_holdoff_end
- s
->stop_holdoff_offset
;
401 frame
= ff_get_audio_buffer(outlink
, nbs
/ outlink
->channels
);
403 return AVERROR(ENOMEM
);
405 memcpy(frame
->data
[0], &s
->stop_holdoff
[s
->stop_holdoff_offset
],
406 nbs
* sizeof(double));
407 ret
= ff_filter_frame(ctx
->inputs
[0], frame
);
409 s
->mode
= SILENCE_STOP
;
414 static int query_formats(AVFilterContext
*ctx
)
416 AVFilterFormats
*formats
= NULL
;
417 AVFilterChannelLayouts
*layouts
= NULL
;
418 static const enum AVSampleFormat sample_fmts
[] = {
419 AV_SAMPLE_FMT_DBL
, AV_SAMPLE_FMT_NONE
422 layouts
= ff_all_channel_layouts();
424 return AVERROR(ENOMEM
);
425 ff_set_common_channel_layouts(ctx
, layouts
);
427 formats
= ff_make_format_list(sample_fmts
);
429 return AVERROR(ENOMEM
);
430 ff_set_common_formats(ctx
, formats
);
432 formats
= ff_all_samplerates();
434 return AVERROR(ENOMEM
);
435 ff_set_common_samplerates(ctx
, formats
);
440 static av_cold
void uninit(AVFilterContext
*ctx
)
442 SilenceRemoveContext
*s
= ctx
->priv
;
444 av_freep(&s
->start_holdoff
);
445 av_freep(&s
->stop_holdoff
);
446 av_freep(&s
->window
);
449 static const AVFilterPad silenceremove_inputs
[] = {
452 .type
= AVMEDIA_TYPE_AUDIO
,
453 .config_props
= config_input
,
454 .filter_frame
= filter_frame
,
459 static const AVFilterPad silenceremove_outputs
[] = {
462 .type
= AVMEDIA_TYPE_AUDIO
,
463 .config_props
= config_output
,
464 .request_frame
= request_frame
,
469 AVFilter ff_af_silenceremove
= {
470 .name
= "silenceremove",
471 .description
= NULL_IF_CONFIG_SMALL("Remove silence."),
472 .priv_size
= sizeof(SilenceRemoveContext
),
473 .priv_class
= &silenceremove_class
,
476 .query_formats
= query_formats
,
477 .inputs
= silenceremove_inputs
,
478 .outputs
= silenceremove_outputs
,