Imported Debian version 2.4.3~trusty1
[deb_ffmpeg.git] / ffmpeg / libavfilter / af_silenceremove.c
CommitLineData
2ba45a60
DM
1/*
2 * Copyright (c) 2001 Heikki Leinonen
3 * Copyright (c) 2001 Chris Bagwell
4 * Copyright (c) 2003 Donnie Smith
5 * Copyright (c) 2014 Paul B Mahol
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include <float.h> /* DBL_MAX */
25
26#include "libavutil/opt.h"
27#include "libavutil/timestamp.h"
28#include "audio.h"
29#include "formats.h"
30#include "avfilter.h"
31#include "internal.h"
32
33enum SilenceMode {
34 SILENCE_TRIM,
35 SILENCE_TRIM_FLUSH,
36 SILENCE_COPY,
37 SILENCE_COPY_FLUSH,
38 SILENCE_STOP
39};
40
41typedef struct SilenceRemoveContext {
42 const AVClass *class;
43
44 enum SilenceMode mode;
45
46 int start_periods;
47 int64_t start_duration;
48 double start_threshold;
49
50 int stop_periods;
51 int64_t stop_duration;
52 double stop_threshold;
53
54 double *start_holdoff;
55 size_t start_holdoff_offset;
56 size_t start_holdoff_end;
57 int start_found_periods;
58
59 double *stop_holdoff;
60 size_t stop_holdoff_offset;
61 size_t stop_holdoff_end;
62 int stop_found_periods;
63
64 double *window;
65 double *window_current;
66 double *window_end;
67 int window_size;
68 double rms_sum;
69
70 int leave_silence;
71 int restart;
72 int64_t next_pts;
73} SilenceRemoveContext;
74
75#define OFFSET(x) offsetof(SilenceRemoveContext, x)
76#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
77static const AVOption silenceremove_options[] = {
78 { "start_periods", NULL, OFFSET(start_periods), AV_OPT_TYPE_INT, {.i64=0}, 0, 9000, FLAGS },
79 { "start_duration", NULL, OFFSET(start_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
80 { "start_threshold", NULL, OFFSET(start_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
81 { "stop_periods", NULL, OFFSET(stop_periods), AV_OPT_TYPE_INT, {.i64=0}, -9000, 9000, FLAGS },
82 { "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
83 { "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
84 { "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
85 { NULL }
86};
87
88AVFILTER_DEFINE_CLASS(silenceremove);
89
90static av_cold int init(AVFilterContext *ctx)
91{
92 SilenceRemoveContext *s = ctx->priv;
93
94 if (s->stop_periods < 0) {
95 s->stop_periods = -s->stop_periods;
96 s->restart = 1;
97 }
98
99 return 0;
100}
101
102static void clear_rms(SilenceRemoveContext *s)
103{
104 memset(s->window, 0, s->window_size * sizeof(*s->window));
105
106 s->window_current = s->window;
107 s->window_end = s->window + s->window_size;
108 s->rms_sum = 0;
109}
110
111static int config_input(AVFilterLink *inlink)
112{
113 AVFilterContext *ctx = inlink->dst;
114 SilenceRemoveContext *s = ctx->priv;
115
116 s->window_size = (inlink->sample_rate / 50) * inlink->channels;
117 s->window = av_malloc_array(s->window_size, sizeof(*s->window));
118 if (!s->window)
119 return AVERROR(ENOMEM);
120
121 clear_rms(s);
122
123 s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
124 AV_TIME_BASE);
125 s->stop_duration = av_rescale(s->stop_duration, inlink->sample_rate,
126 AV_TIME_BASE);
127
128 s->start_holdoff = av_malloc_array(FFMAX(s->start_duration, 1),
129 sizeof(*s->start_holdoff) *
130 inlink->channels);
131 if (!s->start_holdoff)
132 return AVERROR(ENOMEM);
133
134 s->start_holdoff_offset = 0;
135 s->start_holdoff_end = 0;
136 s->start_found_periods = 0;
137
138 s->stop_holdoff = av_malloc_array(FFMAX(s->stop_duration, 1),
139 sizeof(*s->stop_holdoff) *
140 inlink->channels);
141 if (!s->stop_holdoff)
142 return AVERROR(ENOMEM);
143
144 s->stop_holdoff_offset = 0;
145 s->stop_holdoff_end = 0;
146 s->stop_found_periods = 0;
147
148 if (s->start_periods)
149 s->mode = SILENCE_TRIM;
150 else
151 s->mode = SILENCE_COPY;
152
153 return 0;
154}
155
156static int config_output(AVFilterLink *outlink)
157{
158 outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
159
160 return 0;
161}
162
163static double compute_rms(SilenceRemoveContext *s, double sample)
164{
165 double new_sum;
166
167 new_sum = s->rms_sum;
168 new_sum -= *s->window_current;
169 new_sum += sample * sample;
170
171 return sqrt(new_sum / s->window_size);
172}
173
174static void update_rms(SilenceRemoveContext *s, double sample)
175{
176 s->rms_sum -= *s->window_current;
177 *s->window_current = sample * sample;
178 s->rms_sum += *s->window_current;
179
180 s->window_current++;
181 if (s->window_current >= s->window_end)
182 s->window_current = s->window;
183}
184
185static void flush(AVFrame *out, AVFilterLink *outlink,
186 int *nb_samples_written, int *ret)
187{
188 if (*nb_samples_written) {
189 out->nb_samples = *nb_samples_written / outlink->channels;
190 *ret = ff_filter_frame(outlink, out);
191 *nb_samples_written = 0;
192 } else {
193 av_frame_free(&out);
194 }
195}
196
197static int filter_frame(AVFilterLink *inlink, AVFrame *in)
198{
199 AVFilterContext *ctx = inlink->dst;
200 AVFilterLink *outlink = ctx->outputs[0];
201 SilenceRemoveContext *s = ctx->priv;
202 int i, j, threshold, ret = 0;
203 int nbs, nb_samples_read, nb_samples_written;
204 double *obuf, *ibuf = (double *)in->data[0];
205 AVFrame *out;
206
207 nb_samples_read = nb_samples_written = 0;
208
209 switch (s->mode) {
210 case SILENCE_TRIM:
211silence_trim:
212 nbs = in->nb_samples - nb_samples_read / inlink->channels;
213 if (!nbs)
214 break;
215
216 for (i = 0; i < nbs; i++) {
217 threshold = 0;
218 for (j = 0; j < inlink->channels; j++) {
219 threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
220 }
221
222 if (threshold) {
223 for (j = 0; j < inlink->channels; j++) {
224 update_rms(s, *ibuf);
225 s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
226 nb_samples_read++;
227 }
228
229 if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
230 if (++s->start_found_periods >= s->start_periods) {
231 s->mode = SILENCE_TRIM_FLUSH;
232 goto silence_trim_flush;
233 }
234
235 s->start_holdoff_offset = 0;
236 s->start_holdoff_end = 0;
237 }
238 } else {
239 s->start_holdoff_end = 0;
240
241 for (j = 0; j < inlink->channels; j++)
242 update_rms(s, ibuf[j]);
243
244 ibuf += inlink->channels;
245 nb_samples_read += inlink->channels;
246 }
247 }
248 break;
249
250 case SILENCE_TRIM_FLUSH:
251silence_trim_flush:
252 nbs = s->start_holdoff_end - s->start_holdoff_offset;
253 nbs -= nbs % inlink->channels;
254 if (!nbs)
255 break;
256
257 out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
258 if (!out) {
259 av_frame_free(&in);
260 return AVERROR(ENOMEM);
261 }
262
263 memcpy(out->data[0], &s->start_holdoff[s->start_holdoff_offset],
264 nbs * sizeof(double));
265 s->start_holdoff_offset += nbs;
266
267 ret = ff_filter_frame(outlink, out);
268
269 if (s->start_holdoff_offset == s->start_holdoff_end) {
270 s->start_holdoff_offset = 0;
271 s->start_holdoff_end = 0;
272 s->mode = SILENCE_COPY;
273 goto silence_copy;
274 }
275 break;
276
277 case SILENCE_COPY:
278silence_copy:
279 nbs = in->nb_samples - nb_samples_read / inlink->channels;
280 if (!nbs)
281 break;
282
283 out = ff_get_audio_buffer(inlink, nbs);
284 if (!out) {
285 av_frame_free(&in);
286 return AVERROR(ENOMEM);
287 }
288 obuf = (double *)out->data[0];
289
290 if (s->stop_periods) {
291 for (i = 0; i < nbs; i++) {
292 threshold = 1;
293 for (j = 0; j < inlink->channels; j++)
294 threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
295
296 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
297 s->mode = SILENCE_COPY_FLUSH;
298 flush(out, outlink, &nb_samples_written, &ret);
299 goto silence_copy_flush;
300 } else if (threshold) {
301 for (j = 0; j < inlink->channels; j++) {
302 update_rms(s, *ibuf);
303 *obuf++ = *ibuf++;
304 nb_samples_read++;
305 nb_samples_written++;
306 }
307 } else if (!threshold) {
308 for (j = 0; j < inlink->channels; j++) {
309 update_rms(s, *ibuf);
310 if (s->leave_silence) {
311 *obuf++ = *ibuf;
312 nb_samples_written++;
313 }
314
315 s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++;
316 nb_samples_read++;
317 }
318
319 if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) {
320 if (++s->stop_found_periods >= s->stop_periods) {
321 s->stop_holdoff_offset = 0;
322 s->stop_holdoff_end = 0;
323
324 if (!s->restart) {
325 s->mode = SILENCE_STOP;
326 flush(out, outlink, &nb_samples_written, &ret);
327 goto silence_stop;
328 } else {
329 s->stop_found_periods = 0;
330 s->start_found_periods = 0;
331 s->start_holdoff_offset = 0;
332 s->start_holdoff_end = 0;
333 clear_rms(s);
334 s->mode = SILENCE_TRIM;
335 flush(out, outlink, &nb_samples_written, &ret);
336 goto silence_trim;
337 }
338 }
339 s->mode = SILENCE_COPY_FLUSH;
340 flush(out, outlink, &nb_samples_written, &ret);
341 goto silence_copy_flush;
342 }
343 }
344 }
345 flush(out, outlink, &nb_samples_written, &ret);
346 } else {
347 memcpy(obuf, ibuf, sizeof(double) * nbs * inlink->channels);
348 ret = ff_filter_frame(outlink, out);
349 }
350 break;
351
352 case SILENCE_COPY_FLUSH:
353silence_copy_flush:
354 nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
355 nbs -= nbs % inlink->channels;
356 if (!nbs)
357 break;
358
359 out = ff_get_audio_buffer(inlink, nbs / inlink->channels);
360 if (!out) {
361 av_frame_free(&in);
362 return AVERROR(ENOMEM);
363 }
364
365 memcpy(out->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
366 nbs * sizeof(double));
367 s->stop_holdoff_offset += nbs;
368
369 ret = ff_filter_frame(outlink, out);
370
371 if (s->stop_holdoff_offset == s->stop_holdoff_end) {
372 s->stop_holdoff_offset = 0;
373 s->stop_holdoff_end = 0;
374 s->mode = SILENCE_COPY;
375 goto silence_copy;
376 }
377 break;
378 case SILENCE_STOP:
379silence_stop:
380 break;
381 }
382
383 av_frame_free(&in);
384
385 return ret;
386}
387
388static int request_frame(AVFilterLink *outlink)
389{
390 AVFilterContext *ctx = outlink->src;
391 SilenceRemoveContext *s = ctx->priv;
392 int ret;
393
394 ret = ff_request_frame(ctx->inputs[0]);
395 if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
396 s->mode == SILENCE_COPY)) {
397 int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
398 if (nbs) {
399 AVFrame *frame;
400
401 frame = ff_get_audio_buffer(outlink, nbs / outlink->channels);
402 if (!frame)
403 return AVERROR(ENOMEM);
404
405 memcpy(frame->data[0], &s->stop_holdoff[s->stop_holdoff_offset],
406 nbs * sizeof(double));
407 ret = ff_filter_frame(ctx->inputs[0], frame);
408 }
409 s->mode = SILENCE_STOP;
410 }
411 return ret;
412}
413
414static int query_formats(AVFilterContext *ctx)
415{
416 AVFilterFormats *formats = NULL;
417 AVFilterChannelLayouts *layouts = NULL;
418 static const enum AVSampleFormat sample_fmts[] = {
419 AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE
420 };
421
422 layouts = ff_all_channel_layouts();
423 if (!layouts)
424 return AVERROR(ENOMEM);
425 ff_set_common_channel_layouts(ctx, layouts);
426
427 formats = ff_make_format_list(sample_fmts);
428 if (!formats)
429 return AVERROR(ENOMEM);
430 ff_set_common_formats(ctx, formats);
431
432 formats = ff_all_samplerates();
433 if (!formats)
434 return AVERROR(ENOMEM);
435 ff_set_common_samplerates(ctx, formats);
436
437 return 0;
438}
439
440static av_cold void uninit(AVFilterContext *ctx)
441{
442 SilenceRemoveContext *s = ctx->priv;
443
444 av_freep(&s->start_holdoff);
445 av_freep(&s->stop_holdoff);
446 av_freep(&s->window);
447}
448
449static const AVFilterPad silenceremove_inputs[] = {
450 {
451 .name = "default",
452 .type = AVMEDIA_TYPE_AUDIO,
453 .config_props = config_input,
454 .filter_frame = filter_frame,
455 },
456 { NULL }
457};
458
459static const AVFilterPad silenceremove_outputs[] = {
460 {
461 .name = "default",
462 .type = AVMEDIA_TYPE_AUDIO,
463 .config_props = config_output,
464 .request_frame = request_frame,
465 },
466 { NULL }
467};
468
469AVFilter ff_af_silenceremove = {
470 .name = "silenceremove",
471 .description = NULL_IF_CONFIG_SMALL("Remove silence."),
472 .priv_size = sizeof(SilenceRemoveContext),
473 .priv_class = &silenceremove_class,
474 .init = init,
475 .uninit = uninit,
476 .query_formats = query_formats,
477 .inputs = silenceremove_inputs,
478 .outputs = silenceremove_outputs,
479};