2 * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavcodec/avfft.h"
23 #include "libavutil/avassert.h"
24 #include "libavutil/channel_layout.h"
25 #include "libavutil/opt.h"
26 #include "libavutil/xga_font_data.h"
27 #include "libavutil/qsort.h"
28 #include "libavutil/time.h"
29 #include "libavutil/eval.h"
36 #if CONFIG_LIBFREETYPE
38 #include FT_FREETYPE_H
41 /* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm
42 * start from E0 to D#10 (10 octaves)
43 * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins
44 * match with full HD resolution */
46 #define VIDEO_WIDTH 1920
47 #define VIDEO_HEIGHT 1080
48 #define FONT_HEIGHT 32
49 #define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
50 #define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
51 #define BASE_FREQ 20.051392800492
52 #define COEFF_CLAMP 1.0e-4
53 #define TLENGTH_MIN 0.001
54 #define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)"
55 #define VOLUME_MIN 1e-10
56 #define VOLUME_MAX 100.0
57 #define FONTCOLOR_DEFAULT "st(0, (midi(f)-59.5)/12);" \
58 "st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));" \
59 "r(1-ld(1)) + b(ld(1))"
69 FFTContext
*fft_context
;
71 FFTComplex
*fft_result_left
;
72 FFTComplex
*fft_result_right
;
74 SparseCoeff
*coeff_sort
;
75 SparseCoeff
*coeffs
[VIDEO_WIDTH
];
77 char *fontfile
; /* using freetype */
78 int coeffs_len
[VIDEO_WIDTH
];
79 uint8_t fontcolor_value
[VIDEO_WIDTH
*3]; /* result of fontcolor option */
89 double timeclamp
; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/
90 float coeffclamp
; /* lower coeffclamp, more precise, higher coeffclamp, faster */
91 int fullhd
; /* if true, output video is at full HD resolution, otherwise it will be halved */
92 float gamma
; /* lower gamma, more contrast, higher gamma, more range */
93 int fps
; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */
94 int count
; /* fps * count = transform rate */
97 #define OFFSET(x) offsetof(ShowCQTContext, x)
98 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
100 static const AVOption showcqt_options
[] = {
101 { "volume", "set volume", OFFSET(volume
), AV_OPT_TYPE_STRING
, { .str
= "16" }, CHAR_MIN
, CHAR_MAX
, FLAGS
},
102 { "tlength", "set transform length", OFFSET(tlength
), AV_OPT_TYPE_STRING
, { .str
= TLENGTH_DEFAULT
}, CHAR_MIN
, CHAR_MAX
, FLAGS
},
103 { "timeclamp", "set timeclamp", OFFSET(timeclamp
), AV_OPT_TYPE_DOUBLE
, { .dbl
= 0.17 }, 0.1, 1.0, FLAGS
},
104 { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp
), AV_OPT_TYPE_FLOAT
, { .dbl
= 1 }, 0.1, 10, FLAGS
},
105 { "gamma", "set gamma", OFFSET(gamma
), AV_OPT_TYPE_FLOAT
, { .dbl
= 3 }, 1, 7, FLAGS
},
106 { "fullhd", "set full HD resolution", OFFSET(fullhd
), AV_OPT_TYPE_INT
, { .i64
= 1 }, 0, 1, FLAGS
},
107 { "fps", "set video fps", OFFSET(fps
), AV_OPT_TYPE_INT
, { .i64
= 25 }, 10, 100, FLAGS
},
108 { "count", "set number of transform per frame", OFFSET(count
), AV_OPT_TYPE_INT
, { .i64
= 6 }, 1, 30, FLAGS
},
109 { "fontfile", "set font file", OFFSET(fontfile
), AV_OPT_TYPE_STRING
, { .str
= NULL
}, CHAR_MIN
, CHAR_MAX
, FLAGS
},
110 { "fontcolor", "set font color", OFFSET(fontcolor
), AV_OPT_TYPE_STRING
, { .str
= FONTCOLOR_DEFAULT
}, CHAR_MIN
, CHAR_MAX
, FLAGS
},
114 AVFILTER_DEFINE_CLASS(showcqt
);
116 static av_cold
void uninit(AVFilterContext
*ctx
)
120 ShowCQTContext
*s
= ctx
->priv
;
121 av_fft_end(s
->fft_context
);
122 s
->fft_context
= NULL
;
123 for (k
= 0; k
< VIDEO_WIDTH
; k
++)
124 av_freep(&s
->coeffs
[k
]);
125 av_freep(&s
->fft_data
);
126 av_freep(&s
->fft_result_left
);
127 av_freep(&s
->fft_result_right
);
128 av_freep(&s
->coeff_sort
);
129 av_freep(&s
->spectogram
);
130 av_freep(&s
->font_alpha
);
131 av_frame_free(&s
->outpicref
);
134 static int query_formats(AVFilterContext
*ctx
)
136 AVFilterFormats
*formats
= NULL
;
137 AVFilterChannelLayouts
*layouts
= NULL
;
138 AVFilterLink
*inlink
= ctx
->inputs
[0];
139 AVFilterLink
*outlink
= ctx
->outputs
[0];
140 static const enum AVSampleFormat sample_fmts
[] = { AV_SAMPLE_FMT_FLT
, AV_SAMPLE_FMT_NONE
};
141 static const enum AVPixelFormat pix_fmts
[] = { AV_PIX_FMT_RGB24
, AV_PIX_FMT_NONE
};
142 static const int64_t channel_layouts
[] = { AV_CH_LAYOUT_STEREO
, AV_CH_LAYOUT_STEREO_DOWNMIX
, -1 };
143 static const int samplerates
[] = { 44100, 48000, -1 };
145 /* set input audio formats */
146 formats
= ff_make_format_list(sample_fmts
);
148 return AVERROR(ENOMEM
);
149 ff_formats_ref(formats
, &inlink
->out_formats
);
151 layouts
= avfilter_make_format64_list(channel_layouts
);
153 return AVERROR(ENOMEM
);
154 ff_channel_layouts_ref(layouts
, &inlink
->out_channel_layouts
);
156 formats
= ff_make_format_list(samplerates
);
158 return AVERROR(ENOMEM
);
159 ff_formats_ref(formats
, &inlink
->out_samplerates
);
161 /* set output video format */
162 formats
= ff_make_format_list(pix_fmts
);
164 return AVERROR(ENOMEM
);
165 ff_formats_ref(formats
, &outlink
->in_formats
);
170 #if CONFIG_LIBFREETYPE
171 static void load_freetype_font(AVFilterContext
*ctx
)
173 static const char str
[] = "EF G A BC D ";
174 ShowCQTContext
*s
= ctx
->priv
;
175 FT_Library lib
= NULL
;
177 int video_scale
= s
->fullhd
? 2 : 1;
178 int video_width
= (VIDEO_WIDTH
/2) * video_scale
;
179 int font_height
= (FONT_HEIGHT
/2) * video_scale
;
180 int font_width
= 8 * video_scale
;
181 int font_repeat
= font_width
* 12;
182 int linear_hori_advance
= font_width
* 65536;
183 int non_monospace_warning
= 0;
186 s
->font_alpha
= NULL
;
191 if (FT_Init_FreeType(&lib
))
194 if (FT_New_Face(lib
, s
->fontfile
, 0, &face
))
197 if (FT_Set_Char_Size(face
, 16*64, 0, 0, 0))
200 if (FT_Load_Char(face
, 'A', FT_LOAD_RENDER
))
203 if (FT_Set_Char_Size(face
, 16*64 * linear_hori_advance
/ face
->glyph
->linearHoriAdvance
, 0, 0, 0))
206 s
->font_alpha
= av_malloc(font_height
* video_width
);
210 memset(s
->font_alpha
, 0, font_height
* video_width
);
212 for (x
= 0; x
< 12; x
++) {
213 int sx
, sy
, rx
, bx
, by
, dx
, dy
;
218 if (FT_Load_Char(face
, str
[x
], FT_LOAD_RENDER
))
221 if (face
->glyph
->advance
.x
!= font_width
*64 && !non_monospace_warning
) {
222 av_log(ctx
, AV_LOG_WARNING
, "Font is not monospace\n");
223 non_monospace_warning
= 1;
226 sy
= font_height
- 4*video_scale
- face
->glyph
->bitmap_top
;
227 for (rx
= 0; rx
< 10; rx
++) {
228 sx
= rx
* font_repeat
+ x
* font_width
+ face
->glyph
->bitmap_left
;
229 for (by
= 0; by
< face
->glyph
->bitmap
.rows
; by
++) {
233 if (dy
>= font_height
)
236 for (bx
= 0; bx
< face
->glyph
->bitmap
.width
; bx
++) {
240 if (dx
>= video_width
)
242 s
->font_alpha
[dy
*video_width
+dx
] = face
->glyph
->bitmap
.buffer
[by
*face
->glyph
->bitmap
.width
+bx
];
249 FT_Done_FreeType(lib
);
253 av_log(ctx
, AV_LOG_WARNING
, "Error while loading freetype font, using default font instead\n");
255 FT_Done_FreeType(lib
);
256 av_freep(&s
->font_alpha
);
261 static double a_weighting(void *p
, double f
)
263 double ret
= 12200.0*12200.0 * (f
*f
*f
*f
);
264 ret
/= (f
*f
+ 20.6*20.6) * (f
*f
+ 12200.0*12200.0) *
265 sqrt((f
*f
+ 107.7*107.7) * (f
*f
+ 737.9*737.9));
269 static double b_weighting(void *p
, double f
)
271 double ret
= 12200.0*12200.0 * (f
*f
*f
);
272 ret
/= (f
*f
+ 20.6*20.6) * (f
*f
+ 12200.0*12200.0) * sqrt(f
*f
+ 158.5*158.5);
276 static double c_weighting(void *p
, double f
)
278 double ret
= 12200.0*12200.0 * (f
*f
);
279 ret
/= (f
*f
+ 20.6*20.6) * (f
*f
+ 12200.0*12200.0);
283 static double midi(void *p
, double f
)
285 return log2(f
/440.0) * 12.0 + 69.0;
288 static double r_func(void *p
, double x
)
290 x
= av_clipd(x
, 0.0, 1.0);
291 return (int)(x
*255.0+0.5) << 16;
294 static double g_func(void *p
, double x
)
296 x
= av_clipd(x
, 0.0, 1.0);
297 return (int)(x
*255.0+0.5) << 8;
300 static double b_func(void *p
, double x
)
302 x
= av_clipd(x
, 0.0, 1.0);
303 return (int)(x
*255.0+0.5);
306 static inline int qsort_sparsecoeff(const SparseCoeff
*a
, const SparseCoeff
*b
)
308 if (fabsf(a
->value
) >= fabsf(b
->value
))
314 static int config_output(AVFilterLink
*outlink
)
316 AVFilterContext
*ctx
= outlink
->src
;
317 AVFilterLink
*inlink
= ctx
->inputs
[0];
318 ShowCQTContext
*s
= ctx
->priv
;
319 AVExpr
*tlength_expr
= NULL
, *volume_expr
= NULL
, *fontcolor_expr
= NULL
;
320 uint8_t *fontcolor_value
= s
->fontcolor_value
;
321 static const char * const expr_vars
[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL
};
322 static const char * const expr_func_names
[] = { "a_weighting", "b_weighting", "c_weighting", NULL
};
323 static const char * const expr_fontcolor_func_names
[] = { "midi", "r", "g", "b", NULL
};
324 static double (* const expr_funcs
[])(void *, double) = { a_weighting
, b_weighting
, c_weighting
, NULL
};
325 static double (* const expr_fontcolor_funcs
[])(void *, double) = { midi
, r_func
, g_func
, b_func
, NULL
};
326 int fft_len
, k
, x
, y
, ret
;
328 int rate
= inlink
->sample_rate
;
329 double max_len
= rate
* (double) s
->timeclamp
;
330 int64_t start_time
, end_time
;
331 int video_scale
= s
->fullhd
? 2 : 1;
332 int video_width
= (VIDEO_WIDTH
/2) * video_scale
;
333 int video_height
= (VIDEO_HEIGHT
/2) * video_scale
;
334 int spectogram_height
= (SPECTOGRAM_HEIGHT
/2) * video_scale
;
336 s
->fft_bits
= ceil(log2(max_len
));
337 fft_len
= 1 << s
->fft_bits
;
339 if (rate
% (s
->fps
* s
->count
)) {
340 av_log(ctx
, AV_LOG_ERROR
, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate
, s
->fps
, s
->count
);
341 return AVERROR(EINVAL
);
344 s
->fft_data
= av_malloc_array(fft_len
, sizeof(*s
->fft_data
));
345 s
->coeff_sort
= av_malloc_array(fft_len
, sizeof(*s
->coeff_sort
));
346 s
->fft_result_left
= av_malloc_array(fft_len
, sizeof(*s
->fft_result_left
));
347 s
->fft_result_right
= av_malloc_array(fft_len
, sizeof(*s
->fft_result_right
));
348 s
->fft_context
= av_fft_init(s
->fft_bits
, 0);
350 if (!s
->fft_data
|| !s
->coeff_sort
|| !s
->fft_result_left
|| !s
->fft_result_right
|| !s
->fft_context
)
351 return AVERROR(ENOMEM
);
353 #if CONFIG_LIBFREETYPE
354 load_freetype_font(ctx
);
357 av_log(ctx
, AV_LOG_WARNING
, "Freetype is not available, ignoring fontfile option\n");
358 s
->font_alpha
= NULL
;
361 av_log(ctx
, AV_LOG_INFO
, "Calculating spectral kernel, please wait\n");
362 start_time
= av_gettime_relative();
363 ret
= av_expr_parse(&tlength_expr
, s
->tlength
, expr_vars
, NULL
, NULL
, NULL
, NULL
, 0, ctx
);
367 ret
= av_expr_parse(&volume_expr
, s
->volume
, expr_vars
, expr_func_names
,
368 expr_funcs
, NULL
, NULL
, 0, ctx
);
372 ret
= av_expr_parse(&fontcolor_expr
, s
->fontcolor
, expr_vars
, expr_fontcolor_func_names
,
373 expr_fontcolor_funcs
, NULL
, NULL
, 0, ctx
);
377 for (k
= 0; k
< VIDEO_WIDTH
; k
++) {
378 int hlen
= fft_len
>> 1;
381 double freq
= BASE_FREQ
* exp2(k
* (1.0/192.0));
382 double tlen
, tlength
, volume
;
383 double expr_vars_val
[] = { s
->timeclamp
, s
->timeclamp
, freq
, freq
, freq
, 0 };
384 /* a window function from Albert H. Nuttall,
385 * "Some Windows with Very Good Sidelobe Behavior"
386 * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay
387 * coefficient normalized to a0 = 1 */
388 double a0
= 0.355768;
389 double a1
= 0.487396/a0
;
390 double a2
= 0.144232/a0
;
391 double a3
= 0.012604/a0
;
392 double sv_step
, cv_step
, sv
, cv
;
393 double sw_step
, cw_step
, sw
, cw
, w
;
395 tlength
= av_expr_eval(tlength_expr
, expr_vars_val
, NULL
);
396 if (isnan(tlength
)) {
397 av_log(ctx
, AV_LOG_WARNING
, "at freq %g: tlength is nan, setting it to %g\n", freq
, s
->timeclamp
);
398 tlength
= s
->timeclamp
;
399 } else if (tlength
< TLENGTH_MIN
) {
400 av_log(ctx
, AV_LOG_WARNING
, "at freq %g: tlength is %g, setting it to %g\n", freq
, tlength
, TLENGTH_MIN
);
401 tlength
= TLENGTH_MIN
;
402 } else if (tlength
> s
->timeclamp
) {
403 av_log(ctx
, AV_LOG_WARNING
, "at freq %g: tlength is %g, setting it to %g\n", freq
, tlength
, s
->timeclamp
);
404 tlength
= s
->timeclamp
;
407 volume
= FFABS(av_expr_eval(volume_expr
, expr_vars_val
, NULL
));
409 av_log(ctx
, AV_LOG_WARNING
, "at freq %g: volume is nan, setting it to 0\n", freq
);
411 } else if (volume
< VOLUME_MIN
) {
413 } else if (volume
> VOLUME_MAX
) {
414 av_log(ctx
, AV_LOG_WARNING
, "at freq %g: volume is %g, setting it to %g\n", freq
, volume
, VOLUME_MAX
);
418 if (s
->fullhd
|| !(k
& 1)) {
419 int fontcolor
= av_expr_eval(fontcolor_expr
, expr_vars_val
, NULL
);
420 fontcolor_value
[0] = (fontcolor
>> 16) & 0xFF;
421 fontcolor_value
[1] = (fontcolor
>> 8) & 0xFF;
422 fontcolor_value
[2] = fontcolor
& 0xFF;
423 fontcolor_value
+= 3;
426 tlen
= tlength
* rate
;
427 s
->fft_data
[0].re
= 0;
428 s
->fft_data
[0].im
= 0;
429 s
->fft_data
[hlen
].re
= (1.0 + a1
+ a2
+ a3
) * (1.0/tlen
) * volume
* (1.0/fft_len
);
430 s
->fft_data
[hlen
].im
= 0;
431 sv_step
= sv
= sin(2.0*M_PI
*freq
*(1.0/rate
));
432 cv_step
= cv
= cos(2.0*M_PI
*freq
*(1.0/rate
));
433 /* also optimizing window func */
434 sw_step
= sw
= sin(2.0*M_PI
*(1.0/tlen
));
435 cw_step
= cw
= cos(2.0*M_PI
*(1.0/tlen
));
436 for (x
= 1; x
< 0.5 * tlen
; x
++) {
437 double cv_tmp
, cw_tmp
;
438 double cw2
, cw3
, sw2
;
440 cw2
= cw
* cw
- sw
* sw
;
441 sw2
= cw
* sw
+ sw
* cw
;
442 cw3
= cw
* cw2
- sw
* sw2
;
443 w
= (1.0 + a1
* cw
+ a2
* cw2
+ a3
* cw3
) * (1.0/tlen
) * volume
* (1.0/fft_len
);
444 s
->fft_data
[hlen
+ x
].re
= w
* cv
;
445 s
->fft_data
[hlen
+ x
].im
= w
* sv
;
446 s
->fft_data
[hlen
- x
].re
= s
->fft_data
[hlen
+ x
].re
;
447 s
->fft_data
[hlen
- x
].im
= -s
->fft_data
[hlen
+ x
].im
;
449 cv_tmp
= cv
* cv_step
- sv
* sv_step
;
450 sv
= sv
* cv_step
+ cv
* sv_step
;
452 cw_tmp
= cw
* cw_step
- sw
* sw_step
;
453 sw
= sw
* cw_step
+ cw
* sw_step
;
456 for (; x
< hlen
; x
++) {
457 s
->fft_data
[hlen
+ x
].re
= 0;
458 s
->fft_data
[hlen
+ x
].im
= 0;
459 s
->fft_data
[hlen
- x
].re
= 0;
460 s
->fft_data
[hlen
- x
].im
= 0;
462 av_fft_permute(s
->fft_context
, s
->fft_data
);
463 av_fft_calc(s
->fft_context
, s
->fft_data
);
465 for (x
= 0; x
< fft_len
; x
++) {
466 s
->coeff_sort
[x
].index
= x
;
467 s
->coeff_sort
[x
].value
= s
->fft_data
[x
].re
;
470 AV_QSORT(s
->coeff_sort
, fft_len
, SparseCoeff
, qsort_sparsecoeff
);
471 for (x
= 0; x
< fft_len
; x
++)
472 total
+= fabsf(s
->coeff_sort
[x
].value
);
474 for (x
= 0; x
< fft_len
; x
++) {
475 partial
+= fabsf(s
->coeff_sort
[x
].value
);
476 if (partial
> total
* s
->coeffclamp
* COEFF_CLAMP
) {
477 s
->coeffs_len
[k
] = fft_len
- x
;
478 num_coeffs
+= s
->coeffs_len
[k
];
479 s
->coeffs
[k
] = av_malloc_array(s
->coeffs_len
[k
], sizeof(*s
->coeffs
[k
]));
481 ret
= AVERROR(ENOMEM
);
484 for (y
= 0; y
< s
->coeffs_len
[k
]; y
++)
485 s
->coeffs
[k
][y
] = s
->coeff_sort
[x
+y
];
490 av_expr_free(fontcolor_expr
);
491 av_expr_free(volume_expr
);
492 av_expr_free(tlength_expr
);
493 end_time
= av_gettime_relative();
494 av_log(ctx
, AV_LOG_INFO
, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time
-start_time
), fft_len
, num_coeffs
);
496 outlink
->w
= video_width
;
497 outlink
->h
= video_height
;
499 s
->req_fullfilled
= 0;
500 s
->spectogram_index
= 0;
502 s
->spectogram_count
= 0;
503 s
->remaining_fill
= fft_len
>> 1;
504 memset(s
->fft_data
, 0, fft_len
* sizeof(*s
->fft_data
));
506 s
->outpicref
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
508 return AVERROR(ENOMEM
);
510 s
->spectogram
= av_calloc(spectogram_height
, s
->outpicref
->linesize
[0]);
512 return AVERROR(ENOMEM
);
514 outlink
->sample_aspect_ratio
= av_make_q(1, 1);
515 outlink
->time_base
= av_make_q(1, s
->fps
);
516 outlink
->frame_rate
= av_make_q(s
->fps
, 1);
520 av_expr_free(fontcolor_expr
);
521 av_expr_free(volume_expr
);
522 av_expr_free(tlength_expr
);
526 static int plot_cqt(AVFilterLink
*inlink
)
528 AVFilterContext
*ctx
= inlink
->dst
;
529 ShowCQTContext
*s
= ctx
->priv
;
530 AVFilterLink
*outlink
= ctx
->outputs
[0];
531 int fft_len
= 1 << s
->fft_bits
;
532 FFTSample result
[VIDEO_WIDTH
][4];
534 int linesize
= s
->outpicref
->linesize
[0];
535 int video_scale
= s
->fullhd
? 2 : 1;
536 int video_width
= (VIDEO_WIDTH
/2) * video_scale
;
537 int spectogram_height
= (SPECTOGRAM_HEIGHT
/2) * video_scale
;
538 int spectogram_start
= (SPECTOGRAM_START
/2) * video_scale
;
539 int font_height
= (FONT_HEIGHT
/2) * video_scale
;
541 /* real part contains left samples, imaginary part contains right samples */
542 memcpy(s
->fft_result_left
, s
->fft_data
, fft_len
* sizeof(*s
->fft_data
));
543 av_fft_permute(s
->fft_context
, s
->fft_result_left
);
544 av_fft_calc(s
->fft_context
, s
->fft_result_left
);
546 /* separate left and right, (and multiply by 2.0) */
547 s
->fft_result_right
[0].re
= 2.0f
* s
->fft_result_left
[0].im
;
548 s
->fft_result_right
[0].im
= 0;
549 s
->fft_result_left
[0].re
= 2.0f
* s
->fft_result_left
[0].re
;
550 s
->fft_result_left
[0].im
= 0;
551 for (x
= 1; x
<= fft_len
>> 1; x
++) {
552 FFTSample tmpy
= s
->fft_result_left
[fft_len
-x
].im
- s
->fft_result_left
[x
].im
;
554 s
->fft_result_right
[x
].re
= s
->fft_result_left
[x
].im
+ s
->fft_result_left
[fft_len
-x
].im
;
555 s
->fft_result_right
[x
].im
= s
->fft_result_left
[x
].re
- s
->fft_result_left
[fft_len
-x
].re
;
556 s
->fft_result_right
[fft_len
-x
].re
= s
->fft_result_right
[x
].re
;
557 s
->fft_result_right
[fft_len
-x
].im
= -s
->fft_result_right
[x
].im
;
559 s
->fft_result_left
[x
].re
= s
->fft_result_left
[x
].re
+ s
->fft_result_left
[fft_len
-x
].re
;
560 s
->fft_result_left
[x
].im
= tmpy
;
561 s
->fft_result_left
[fft_len
-x
].re
= s
->fft_result_left
[x
].re
;
562 s
->fft_result_left
[fft_len
-x
].im
= -s
->fft_result_left
[x
].im
;
565 /* calculating cqt */
566 for (x
= 0; x
< VIDEO_WIDTH
; x
++) {
568 float g
= 1.0f
/ s
->gamma
;
569 FFTComplex l
= {0,0};
570 FFTComplex r
= {0,0};
572 for (u
= 0; u
< s
->coeffs_len
[x
]; u
++) {
573 FFTSample value
= s
->coeffs
[x
][u
].value
;
574 int index
= s
->coeffs
[x
][u
].index
;
575 l
.re
+= value
* s
->fft_result_left
[index
].re
;
576 l
.im
+= value
* s
->fft_result_left
[index
].im
;
577 r
.re
+= value
* s
->fft_result_right
[index
].re
;
578 r
.im
+= value
* s
->fft_result_right
[index
].im
;
580 /* result is power, not amplitude */
581 result
[x
][0] = l
.re
* l
.re
+ l
.im
* l
.im
;
582 result
[x
][2] = r
.re
* r
.re
+ r
.im
* r
.im
;
583 result
[x
][1] = 0.5f
* (result
[x
][0] + result
[x
][2]);
584 result
[x
][3] = result
[x
][1];
585 result
[x
][0] = 255.0f
* powf(FFMIN(1.0f
,result
[x
][0]), g
);
586 result
[x
][1] = 255.0f
* powf(FFMIN(1.0f
,result
[x
][1]), g
);
587 result
[x
][2] = 255.0f
* powf(FFMIN(1.0f
,result
[x
][2]), g
);
591 for (x
= 0; x
< video_width
; x
++) {
592 result
[x
][0] = 0.5f
* (result
[2*x
][0] + result
[2*x
+1][0]);
593 result
[x
][1] = 0.5f
* (result
[2*x
][1] + result
[2*x
+1][1]);
594 result
[x
][2] = 0.5f
* (result
[2*x
][2] + result
[2*x
+1][2]);
595 result
[x
][3] = 0.5f
* (result
[2*x
][3] + result
[2*x
+1][3]);
599 for (x
= 0; x
< video_width
; x
++) {
600 s
->spectogram
[s
->spectogram_index
*linesize
+ 3*x
] = result
[x
][0] + 0.5f
;
601 s
->spectogram
[s
->spectogram_index
*linesize
+ 3*x
+ 1] = result
[x
][1] + 0.5f
;
602 s
->spectogram
[s
->spectogram_index
*linesize
+ 3*x
+ 2] = result
[x
][2] + 0.5f
;
606 if (!s
->spectogram_count
) {
607 uint8_t *data
= (uint8_t*) s
->outpicref
->data
[0];
608 float rcp_result
[VIDEO_WIDTH
];
609 int total_length
= linesize
* spectogram_height
;
610 int back_length
= linesize
* s
->spectogram_index
;
612 for (x
= 0; x
< video_width
; x
++)
613 rcp_result
[x
] = 1.0f
/ (result
[x
][3]+0.0001f
);
616 for (y
= 0; y
< spectogram_height
; y
++) {
617 float height
= (spectogram_height
- y
) * (1.0f
/spectogram_height
);
618 uint8_t *lineptr
= data
+ y
* linesize
;
619 for (x
= 0; x
< video_width
; x
++) {
621 if (result
[x
][3] <= height
) {
626 mul
= (result
[x
][3] - height
) * rcp_result
[x
];
627 *lineptr
++ = mul
* result
[x
][0] + 0.5f
;
628 *lineptr
++ = mul
* result
[x
][1] + 0.5f
;
629 *lineptr
++ = mul
* result
[x
][2] + 0.5f
;
636 for (y
= 0; y
< font_height
; y
++) {
637 uint8_t *lineptr
= data
+ (spectogram_height
+ y
) * linesize
;
638 uint8_t *spectogram_src
= s
->spectogram
+ s
->spectogram_index
* linesize
;
639 uint8_t *fontcolor_value
= s
->fontcolor_value
;
640 for (x
= 0; x
< video_width
; x
++) {
641 uint8_t alpha
= s
->font_alpha
[y
*video_width
+x
];
642 lineptr
[3*x
] = (spectogram_src
[3*x
] * (255-alpha
) + fontcolor_value
[0] * alpha
+ 255) >> 8;
643 lineptr
[3*x
+1] = (spectogram_src
[3*x
+1] * (255-alpha
) + fontcolor_value
[1] * alpha
+ 255) >> 8;
644 lineptr
[3*x
+2] = (spectogram_src
[3*x
+2] * (255-alpha
) + fontcolor_value
[2] * alpha
+ 255) >> 8;
645 fontcolor_value
+= 3;
649 for (y
= 0; y
< font_height
; y
++) {
650 uint8_t *lineptr
= data
+ (spectogram_height
+ y
) * linesize
;
651 memcpy(lineptr
, s
->spectogram
+ s
->spectogram_index
* linesize
, video_width
*3);
653 for (x
= 0; x
< video_width
; x
+= video_width
/10) {
655 static const char str
[] = "EF G A BC D ";
656 uint8_t *startptr
= data
+ spectogram_height
* linesize
+ x
* 3;
657 for (u
= 0; str
[u
]; u
++) {
659 for (v
= 0; v
< 16; v
++) {
660 uint8_t *p
= startptr
+ v
* linesize
* video_scale
+ 8 * 3 * u
* video_scale
;
661 int ux
= x
+ 8 * u
* video_scale
;
663 for (mask
= 0x80; mask
; mask
>>= 1) {
664 if (mask
& avpriv_vga16_font
[str
[u
] * 16 + v
]) {
665 p
[0] = s
->fontcolor_value
[3*ux
];
666 p
[1] = s
->fontcolor_value
[3*ux
+1];
667 p
[2] = s
->fontcolor_value
[3*ux
+2];
668 if (video_scale
== 2) {
670 p
[linesize
+1] = p
[1];
671 p
[linesize
+2] = p
[2];
672 p
[3] = p
[linesize
+3] = s
->fontcolor_value
[3*ux
+3];
673 p
[4] = p
[linesize
+4] = s
->fontcolor_value
[3*ux
+4];
674 p
[5] = p
[linesize
+5] = s
->fontcolor_value
[3*ux
+5];
677 p
+= 3 * video_scale
;
685 /* drawing spectogram/sonogram */
686 data
+= spectogram_start
* linesize
;
687 memcpy(data
, s
->spectogram
+ s
->spectogram_index
*linesize
, total_length
- back_length
);
689 data
+= total_length
- back_length
;
691 memcpy(data
, s
->spectogram
, back_length
);
693 s
->outpicref
->pts
= s
->frame_count
;
694 ret
= ff_filter_frame(outlink
, av_frame_clone(s
->outpicref
));
695 s
->req_fullfilled
= 1;
698 s
->spectogram_count
= (s
->spectogram_count
+ 1) % s
->count
;
699 s
->spectogram_index
= (s
->spectogram_index
+ spectogram_height
- 1) % spectogram_height
;
703 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*insamples
)
705 AVFilterContext
*ctx
= inlink
->dst
;
706 ShowCQTContext
*s
= ctx
->priv
;
707 int step
= inlink
->sample_rate
/ (s
->fps
* s
->count
);
708 int fft_len
= 1 << s
->fft_bits
;
713 while (s
->remaining_fill
< (fft_len
>> 1)) {
715 memset(&s
->fft_data
[fft_len
- s
->remaining_fill
], 0, sizeof(*s
->fft_data
) * s
->remaining_fill
);
716 ret
= plot_cqt(inlink
);
719 for (x
= 0; x
< (fft_len
-step
); x
++)
720 s
->fft_data
[x
] = s
->fft_data
[x
+step
];
721 s
->remaining_fill
+= step
;
726 remaining
= insamples
->nb_samples
;
727 audio_data
= (float*) insamples
->data
[0];
730 if (remaining
>= s
->remaining_fill
) {
731 int i
= insamples
->nb_samples
- remaining
;
732 int j
= fft_len
- s
->remaining_fill
;
734 for (m
= 0; m
< s
->remaining_fill
; m
++) {
735 s
->fft_data
[j
+m
].re
= audio_data
[2*(i
+m
)];
736 s
->fft_data
[j
+m
].im
= audio_data
[2*(i
+m
)+1];
738 ret
= plot_cqt(inlink
);
740 av_frame_free(&insamples
);
743 remaining
-= s
->remaining_fill
;
744 for (m
= 0; m
< fft_len
-step
; m
++)
745 s
->fft_data
[m
] = s
->fft_data
[m
+step
];
746 s
->remaining_fill
= step
;
748 int i
= insamples
->nb_samples
- remaining
;
749 int j
= fft_len
- s
->remaining_fill
;
751 for (m
= 0; m
< remaining
; m
++) {
752 s
->fft_data
[m
+j
].re
= audio_data
[2*(i
+m
)];
753 s
->fft_data
[m
+j
].im
= audio_data
[2*(i
+m
)+1];
755 s
->remaining_fill
-= remaining
;
759 av_frame_free(&insamples
);
763 static int request_frame(AVFilterLink
*outlink
)
765 ShowCQTContext
*s
= outlink
->src
->priv
;
766 AVFilterLink
*inlink
= outlink
->src
->inputs
[0];
769 s
->req_fullfilled
= 0;
771 ret
= ff_request_frame(inlink
);
772 } while (!s
->req_fullfilled
&& ret
>= 0);
774 if (ret
== AVERROR_EOF
&& s
->outpicref
)
775 filter_frame(inlink
, NULL
);
779 static const AVFilterPad showcqt_inputs
[] = {
782 .type
= AVMEDIA_TYPE_AUDIO
,
783 .filter_frame
= filter_frame
,
788 static const AVFilterPad showcqt_outputs
[] = {
791 .type
= AVMEDIA_TYPE_VIDEO
,
792 .config_props
= config_output
,
793 .request_frame
= request_frame
,
798 AVFilter ff_avf_showcqt
= {
800 .description
= NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."),
802 .query_formats
= query_formats
,
803 .priv_size
= sizeof(ShowCQTContext
),
804 .inputs
= showcqt_inputs
,
805 .outputs
= showcqt_outputs
,
806 .priv_class
= &showcqt_class
,