2 * Copyright (c) 2012-2013 Clément Bœsch
3 * Copyright (c) 2013 Rudolf Polzer <divverent@xonotic.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * audio to spectrum (video) transmedia filter, based on ffplay rdft showmode
25 * (by Michael Niedermayer) and lavfi/avf_showwaves (by Stefano Sabatini).
30 #include "libavcodec/avfft.h"
31 #include "libavutil/avassert.h"
32 #include "libavutil/channel_layout.h"
33 #include "libavutil/opt.h"
37 enum DisplayMode
{ COMBINED
, SEPARATE
, NB_MODES
};
38 enum DisplayScale
{ LINEAR
, SQRT
, CBRT
, LOG
, NB_SCALES
};
39 enum ColorMode
{ CHANNEL
, INTENSITY
, NB_CLMODES
};
40 enum WindowFunc
{ WFUNC_NONE
, WFUNC_HANN
, WFUNC_HAMMING
, WFUNC_BLACKMAN
, NB_WFUNC
};
41 enum SlideMode
{ REPLACE
, SCROLL
, FULLFRAME
, NB_SLIDES
};
48 int nb_display_channels
;
50 int sliding
; ///< 1 if sliding mode, 0 otherwise
51 enum DisplayMode mode
; ///< channel display mode
52 enum ColorMode color_mode
; ///< display color scheme
53 enum DisplayScale scale
;
54 float saturation
; ///< color saturation multiplier
55 int xpos
; ///< x position (current column)
56 RDFTContext
*rdft
; ///< Real Discrete Fourier Transform context
57 int rdft_bits
; ///< number of bits (RDFT window size = 1<<rdft_bits)
58 FFTSample
**rdft_data
; ///< bins holder for each (displayed) channels
59 float *window_func_lut
; ///< Window function LUT
60 enum WindowFunc win_func
;
61 float *combine_buffer
; ///< color combining buffer (3 * h items)
62 } ShowSpectrumContext
;
64 #define OFFSET(x) offsetof(ShowSpectrumContext, x)
65 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
67 static const AVOption showspectrum_options
[] = {
68 { "size", "set video size", OFFSET(w
), AV_OPT_TYPE_IMAGE_SIZE
, {.str
= "640x512"}, 0, 0, FLAGS
},
69 { "s", "set video size", OFFSET(w
), AV_OPT_TYPE_IMAGE_SIZE
, {.str
= "640x512"}, 0, 0, FLAGS
},
70 { "slide", "set sliding mode", OFFSET(sliding
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, NB_SLIDES
, FLAGS
, "slide" },
71 { "replace", "replace old columns with new", 0, AV_OPT_TYPE_CONST
, {.i64
=REPLACE
}, 0, 0, FLAGS
, "slide" },
72 { "scroll", "scroll from right to left", 0, AV_OPT_TYPE_CONST
, {.i64
=SCROLL
}, 0, 0, FLAGS
, "slide" },
73 { "fullframe", "return full frames", 0, AV_OPT_TYPE_CONST
, {.i64
=FULLFRAME
}, 0, 0, FLAGS
, "slide" },
74 { "mode", "set channel display mode", OFFSET(mode
), AV_OPT_TYPE_INT
, {.i64
=COMBINED
}, COMBINED
, NB_MODES
-1, FLAGS
, "mode" },
75 { "combined", "combined mode", 0, AV_OPT_TYPE_CONST
, {.i64
=COMBINED
}, 0, 0, FLAGS
, "mode" },
76 { "separate", "separate mode", 0, AV_OPT_TYPE_CONST
, {.i64
=SEPARATE
}, 0, 0, FLAGS
, "mode" },
77 { "color", "set channel coloring", OFFSET(color_mode
), AV_OPT_TYPE_INT
, {.i64
=CHANNEL
}, CHANNEL
, NB_CLMODES
-1, FLAGS
, "color" },
78 { "channel", "separate color for each channel", 0, AV_OPT_TYPE_CONST
, {.i64
=CHANNEL
}, 0, 0, FLAGS
, "color" },
79 { "intensity", "intensity based coloring", 0, AV_OPT_TYPE_CONST
, {.i64
=INTENSITY
}, 0, 0, FLAGS
, "color" },
80 { "scale", "set display scale", OFFSET(scale
), AV_OPT_TYPE_INT
, {.i64
=SQRT
}, LINEAR
, NB_SCALES
-1, FLAGS
, "scale" },
81 { "sqrt", "square root", 0, AV_OPT_TYPE_CONST
, {.i64
=SQRT
}, 0, 0, FLAGS
, "scale" },
82 { "cbrt", "cubic root", 0, AV_OPT_TYPE_CONST
, {.i64
=CBRT
}, 0, 0, FLAGS
, "scale" },
83 { "log", "logarithmic", 0, AV_OPT_TYPE_CONST
, {.i64
=LOG
}, 0, 0, FLAGS
, "scale" },
84 { "lin", "linear", 0, AV_OPT_TYPE_CONST
, {.i64
=LINEAR
}, 0, 0, FLAGS
, "scale" },
85 { "saturation", "color saturation multiplier", OFFSET(saturation
), AV_OPT_TYPE_FLOAT
, {.dbl
= 1}, -10, 10, FLAGS
},
86 { "win_func", "set window function", OFFSET(win_func
), AV_OPT_TYPE_INT
, {.i64
= WFUNC_HANN
}, 0, NB_WFUNC
-1, FLAGS
, "win_func" },
87 { "hann", "Hann window", 0, AV_OPT_TYPE_CONST
, {.i64
= WFUNC_HANN
}, 0, 0, FLAGS
, "win_func" },
88 { "hamming", "Hamming window", 0, AV_OPT_TYPE_CONST
, {.i64
= WFUNC_HAMMING
}, 0, 0, FLAGS
, "win_func" },
89 { "blackman", "Blackman window", 0, AV_OPT_TYPE_CONST
, {.i64
= WFUNC_BLACKMAN
}, 0, 0, FLAGS
, "win_func" },
93 AVFILTER_DEFINE_CLASS(showspectrum
);
97 } intensity_color_table
[] = {
99 { 0.13, .03587126228984074, .1573300977624594, -.02548747583751842 },
100 { 0.30, .18572281794568020, .1772436246393981, .17475554840414750 },
101 { 0.60, .28184980583656130, -.1593064119945782, .47132074554608920 },
102 { 0.73, .65830621175547810, -.3716070802232764, .24352759331252930 },
103 { 0.78, .76318535758242900, -.4307467689263783, .16866496622310430 },
104 { 0.91, .95336363636363640, -.2045454545454546, .03313636363636363 },
108 static av_cold
void uninit(AVFilterContext
*ctx
)
110 ShowSpectrumContext
*s
= ctx
->priv
;
113 av_freep(&s
->combine_buffer
);
114 av_rdft_end(s
->rdft
);
115 for (i
= 0; i
< s
->nb_display_channels
; i
++)
116 av_freep(&s
->rdft_data
[i
]);
117 av_freep(&s
->rdft_data
);
118 av_freep(&s
->window_func_lut
);
119 av_frame_free(&s
->outpicref
);
122 static int query_formats(AVFilterContext
*ctx
)
124 AVFilterFormats
*formats
= NULL
;
125 AVFilterChannelLayouts
*layouts
= NULL
;
126 AVFilterLink
*inlink
= ctx
->inputs
[0];
127 AVFilterLink
*outlink
= ctx
->outputs
[0];
128 static const enum AVSampleFormat sample_fmts
[] = { AV_SAMPLE_FMT_S16P
, AV_SAMPLE_FMT_NONE
};
129 static const enum AVPixelFormat pix_fmts
[] = { AV_PIX_FMT_YUVJ444P
, AV_PIX_FMT_NONE
};
131 /* set input audio formats */
132 formats
= ff_make_format_list(sample_fmts
);
134 return AVERROR(ENOMEM
);
135 ff_formats_ref(formats
, &inlink
->out_formats
);
137 layouts
= ff_all_channel_layouts();
139 return AVERROR(ENOMEM
);
140 ff_channel_layouts_ref(layouts
, &inlink
->out_channel_layouts
);
142 formats
= ff_all_samplerates();
144 return AVERROR(ENOMEM
);
145 ff_formats_ref(formats
, &inlink
->out_samplerates
);
147 /* set output video format */
148 formats
= ff_make_format_list(pix_fmts
);
150 return AVERROR(ENOMEM
);
151 ff_formats_ref(formats
, &outlink
->in_formats
);
156 static int config_output(AVFilterLink
*outlink
)
158 AVFilterContext
*ctx
= outlink
->src
;
159 AVFilterLink
*inlink
= ctx
->inputs
[0];
160 ShowSpectrumContext
*s
= ctx
->priv
;
161 int i
, rdft_bits
, win_size
, h
;
166 h
= (s
->mode
== COMBINED
) ? outlink
->h
: outlink
->h
/ inlink
->channels
;
167 s
->channel_height
= h
;
169 /* RDFT window size (precision) according to the requested output frame height */
170 for (rdft_bits
= 1; 1 << rdft_bits
< 2 * h
; rdft_bits
++);
171 win_size
= 1 << rdft_bits
;
173 /* (re-)configuration if the video output changed (or first init) */
174 if (rdft_bits
!= s
->rdft_bits
) {
175 size_t rdft_size
, rdft_listsize
;
178 av_rdft_end(s
->rdft
);
179 s
->rdft
= av_rdft_init(rdft_bits
, DFT_R2C
);
181 av_log(ctx
, AV_LOG_ERROR
, "Unable to create RDFT context. "
182 "The window size might be too high.\n");
183 return AVERROR(EINVAL
);
185 s
->rdft_bits
= rdft_bits
;
187 /* RDFT buffers: x2 for each (display) channel buffer.
188 * Note: we use free and malloc instead of a realloc-like function to
189 * make sure the buffer is aligned in memory for the FFT functions. */
190 for (i
= 0; i
< s
->nb_display_channels
; i
++)
191 av_freep(&s
->rdft_data
[i
]);
192 av_freep(&s
->rdft_data
);
193 s
->nb_display_channels
= inlink
->channels
;
195 if (av_size_mult(sizeof(*s
->rdft_data
),
196 s
->nb_display_channels
, &rdft_listsize
) < 0)
197 return AVERROR(EINVAL
);
198 if (av_size_mult(sizeof(**s
->rdft_data
),
199 win_size
, &rdft_size
) < 0)
200 return AVERROR(EINVAL
);
201 s
->rdft_data
= av_malloc(rdft_listsize
);
203 return AVERROR(ENOMEM
);
204 for (i
= 0; i
< s
->nb_display_channels
; i
++) {
205 s
->rdft_data
[i
] = av_malloc(rdft_size
);
206 if (!s
->rdft_data
[i
])
207 return AVERROR(ENOMEM
);
210 /* pre-calc windowing function */
212 av_realloc_f(s
->window_func_lut
, win_size
,
213 sizeof(*s
->window_func_lut
));
214 if (!s
->window_func_lut
)
215 return AVERROR(ENOMEM
);
216 switch (s
->win_func
) {
218 for (i
= 0; i
< win_size
; i
++)
219 s
->window_func_lut
[i
] = 1.;
222 for (i
= 0; i
< win_size
; i
++)
223 s
->window_func_lut
[i
] = .5f
* (1 - cos(2*M_PI
*i
/ (win_size
-1)));
226 for (i
= 0; i
< win_size
; i
++)
227 s
->window_func_lut
[i
] = .54f
- .46f
* cos(2*M_PI
*i
/ (win_size
-1));
229 case WFUNC_BLACKMAN
: {
230 for (i
= 0; i
< win_size
; i
++)
231 s
->window_func_lut
[i
] = .42f
- .5f
*cos(2*M_PI
*i
/ (win_size
-1)) + .08f
*cos(4*M_PI
*i
/ (win_size
-1));
238 /* prepare the initial picref buffer (black frame) */
239 av_frame_free(&s
->outpicref
);
240 s
->outpicref
= outpicref
=
241 ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
243 return AVERROR(ENOMEM
);
244 outlink
->sample_aspect_ratio
= (AVRational
){1,1};
245 for (i
= 0; i
< outlink
->h
; i
++) {
246 memset(outpicref
->data
[0] + i
* outpicref
->linesize
[0], 0, outlink
->w
);
247 memset(outpicref
->data
[1] + i
* outpicref
->linesize
[1], 128, outlink
->w
);
248 memset(outpicref
->data
[2] + i
* outpicref
->linesize
[2], 128, outlink
->w
);
252 if (s
->xpos
>= outlink
->w
)
255 outlink
->frame_rate
= av_make_q(inlink
->sample_rate
, win_size
);
256 if (s
->sliding
== FULLFRAME
)
257 outlink
->frame_rate
.den
*= outlink
->w
;
259 inlink
->min_samples
= inlink
->max_samples
= inlink
->partial_buf_size
=
263 av_realloc_f(s
->combine_buffer
, outlink
->h
* 3,
264 sizeof(*s
->combine_buffer
));
266 av_log(ctx
, AV_LOG_VERBOSE
, "s:%dx%d RDFT window size:%d\n",
267 s
->w
, s
->h
, win_size
);
271 static int request_frame(AVFilterLink
*outlink
)
273 ShowSpectrumContext
*s
= outlink
->src
->priv
;
274 AVFilterLink
*inlink
= outlink
->src
->inputs
[0];
278 s
->req_fullfilled
= 0;
280 ret
= ff_request_frame(inlink
);
281 if (ret
== AVERROR_EOF
&& s
->sliding
== FULLFRAME
&& s
->xpos
> 0 &&
283 for (i
= 0; i
< outlink
->h
; i
++) {
284 memset(s
->outpicref
->data
[0] + i
* s
->outpicref
->linesize
[0] + s
->xpos
, 0, outlink
->w
- s
->xpos
);
285 memset(s
->outpicref
->data
[1] + i
* s
->outpicref
->linesize
[1] + s
->xpos
, 128, outlink
->w
- s
->xpos
);
286 memset(s
->outpicref
->data
[2] + i
* s
->outpicref
->linesize
[2] + s
->xpos
, 128, outlink
->w
- s
->xpos
);
288 ret
= ff_filter_frame(outlink
, s
->outpicref
);
290 s
->req_fullfilled
= 1;
292 } while (!s
->req_fullfilled
&& ret
>= 0);
297 static int plot_spectrum_column(AVFilterLink
*inlink
, AVFrame
*insamples
)
300 AVFilterContext
*ctx
= inlink
->dst
;
301 AVFilterLink
*outlink
= ctx
->outputs
[0];
302 ShowSpectrumContext
*s
= ctx
->priv
;
303 AVFrame
*outpicref
= s
->outpicref
;
305 /* nb_freq contains the power of two superior or equal to the output image
306 * height (or half the RDFT window size) */
307 const int nb_freq
= 1 << (s
->rdft_bits
- 1);
308 const int win_size
= nb_freq
<< 1;
309 const double w
= 1. / (sqrt(nb_freq
) * 32768.);
310 int h
= s
->channel_height
;
314 av_assert0(insamples
->nb_samples
== win_size
);
316 /* fill RDFT input with the number of samples available */
317 for (ch
= 0; ch
< s
->nb_display_channels
; ch
++) {
318 const int16_t *p
= (int16_t *)insamples
->extended_data
[ch
];
320 for (n
= 0; n
< win_size
; n
++)
321 s
->rdft_data
[ch
][n
] = p
[n
] * s
->window_func_lut
[n
];
326 /* run RDFT on each samples set */
327 for (ch
= 0; ch
< s
->nb_display_channels
; ch
++)
328 av_rdft_calc(s
->rdft
, s
->rdft_data
[ch
]);
330 /* fill a new spectrum column */
331 #define RE(y, ch) s->rdft_data[ch][2 * (y) + 0]
332 #define IM(y, ch) s->rdft_data[ch][2 * (y) + 1]
333 #define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
335 /* initialize buffer for combining to black */
336 for (y
= 0; y
< outlink
->h
; y
++) {
337 s
->combine_buffer
[3 * y
] = 0;
338 s
->combine_buffer
[3 * y
+ 1] = 127.5;
339 s
->combine_buffer
[3 * y
+ 2] = 127.5;
342 for (ch
= 0; ch
< s
->nb_display_channels
; ch
++) {
345 /* decide color range */
348 // reduce range by channel count
349 yf
= 256.0f
/ s
->nb_display_channels
;
350 switch (s
->color_mode
) {
356 /* adjust saturation for mixed UV coloring */
357 /* this factor is correct for infinite channels, an approximation otherwise */
375 if (s
->color_mode
== CHANNEL
) {
376 if (s
->nb_display_channels
> 1) {
377 uf
*= 0.5 * sin((2 * M_PI
* ch
) / s
->nb_display_channels
);
378 vf
*= 0.5 * cos((2 * M_PI
* ch
) / s
->nb_display_channels
);
387 /* draw the channel */
388 for (y
= 0; y
< h
; y
++) {
389 int row
= (s
->mode
== COMBINED
) ? y
: ch
* h
+ y
;
390 float *out
= &s
->combine_buffer
[3 * row
];
393 float a
= w
* MAGNITUDE(y
, ch
);
406 a
= 1 - log(FFMAX(FFMIN(1, a
), 1e-6)) / log(1e-6); // zero = -120dBFS
412 if (s
->color_mode
== INTENSITY
) {
416 for (i
= 1; i
< sizeof(intensity_color_table
) / sizeof(*intensity_color_table
) - 1; i
++)
417 if (intensity_color_table
[i
].a
>= a
)
419 // i now is the first item >= the color
420 // now we know to interpolate between item i - 1 and i
421 if (a
<= intensity_color_table
[i
- 1].a
) {
422 y
= intensity_color_table
[i
- 1].y
;
423 u
= intensity_color_table
[i
- 1].u
;
424 v
= intensity_color_table
[i
- 1].v
;
425 } else if (a
>= intensity_color_table
[i
].a
) {
426 y
= intensity_color_table
[i
].y
;
427 u
= intensity_color_table
[i
].u
;
428 v
= intensity_color_table
[i
].v
;
430 float start
= intensity_color_table
[i
- 1].a
;
431 float end
= intensity_color_table
[i
].a
;
432 float lerpfrac
= (a
- start
) / (end
- start
);
433 y
= intensity_color_table
[i
- 1].y
* (1.0f
- lerpfrac
)
434 + intensity_color_table
[i
].y
* lerpfrac
;
435 u
= intensity_color_table
[i
- 1].u
* (1.0f
- lerpfrac
)
436 + intensity_color_table
[i
].u
* lerpfrac
;
437 v
= intensity_color_table
[i
- 1].v
* (1.0f
- lerpfrac
)
438 + intensity_color_table
[i
].v
* lerpfrac
;
453 if (s
->sliding
== SCROLL
) {
454 for (plane
= 0; plane
< 3; plane
++) {
455 for (y
= 0; y
< outlink
->h
; y
++) {
456 uint8_t *p
= outpicref
->data
[plane
] +
457 y
* outpicref
->linesize
[plane
];
458 memmove(p
, p
+ 1, outlink
->w
- 1);
461 s
->xpos
= outlink
->w
- 1;
463 for (plane
= 0; plane
< 3; plane
++) {
464 uint8_t *p
= outpicref
->data
[plane
] +
465 (outlink
->h
- 1) * outpicref
->linesize
[plane
] +
467 for (y
= 0; y
< outlink
->h
; y
++) {
468 *p
= rint(FFMAX(0, FFMIN(s
->combine_buffer
[3 * y
+ plane
], 255)));
469 p
-= outpicref
->linesize
[plane
];
473 if (s
->sliding
!= FULLFRAME
|| s
->xpos
== 0)
474 outpicref
->pts
= insamples
->pts
;
477 if (s
->xpos
>= outlink
->w
)
479 if (s
->sliding
!= FULLFRAME
|| s
->xpos
== 0) {
480 s
->req_fullfilled
= 1;
481 ret
= ff_filter_frame(outlink
, av_frame_clone(s
->outpicref
));
489 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*insamples
)
491 AVFilterContext
*ctx
= inlink
->dst
;
492 ShowSpectrumContext
*s
= ctx
->priv
;
493 unsigned win_size
= 1 << s
->rdft_bits
;
496 av_assert0(insamples
->nb_samples
<= win_size
);
497 if (insamples
->nb_samples
== win_size
)
498 ret
= plot_spectrum_column(inlink
, insamples
);
500 av_frame_free(&insamples
);
504 static const AVFilterPad showspectrum_inputs
[] = {
507 .type
= AVMEDIA_TYPE_AUDIO
,
508 .filter_frame
= filter_frame
,
513 static const AVFilterPad showspectrum_outputs
[] = {
516 .type
= AVMEDIA_TYPE_VIDEO
,
517 .config_props
= config_output
,
518 .request_frame
= request_frame
,
523 AVFilter ff_avf_showspectrum
= {
524 .name
= "showspectrum",
525 .description
= NULL_IF_CONFIG_SMALL("Convert input audio to a spectrum video output."),
527 .query_formats
= query_formats
,
528 .priv_size
= sizeof(ShowSpectrumContext
),
529 .inputs
= showspectrum_inputs
,
530 .outputs
= showspectrum_outputs
,
531 .priv_class
= &showspectrum_class
,