Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "libavcodec/avfft.h" | |
23 | #include "libavutil/avassert.h" | |
24 | #include "libavutil/channel_layout.h" | |
25 | #include "libavutil/opt.h" | |
26 | #include "libavutil/xga_font_data.h" | |
27 | #include "libavutil/qsort.h" | |
28 | #include "libavutil/time.h" | |
29 | #include "libavutil/eval.h" | |
30 | #include "avfilter.h" | |
31 | #include "internal.h" | |
32 | ||
33 | #include <math.h> | |
34 | #include <stdlib.h> | |
35 | ||
36 | #if CONFIG_LIBFREETYPE | |
37 | #include <ft2build.h> | |
38 | #include FT_FREETYPE_H | |
39 | #endif | |
40 | ||
41 | /* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm | |
42 | * start from E0 to D#10 (10 octaves) | |
43 | * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins | |
44 | * match with full HD resolution */ | |
45 | ||
46 | #define VIDEO_WIDTH 1920 | |
47 | #define VIDEO_HEIGHT 1080 | |
48 | #define FONT_HEIGHT 32 | |
49 | #define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2) | |
50 | #define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT) | |
51 | #define BASE_FREQ 20.051392800492 | |
52 | #define COEFF_CLAMP 1.0e-4 | |
53 | #define TLENGTH_MIN 0.001 | |
54 | #define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)" | |
55 | #define VOLUME_MIN 1e-10 | |
56 | #define VOLUME_MAX 100.0 | |
57 | #define FONTCOLOR_DEFAULT "st(0, (midi(f)-59.5)/12);" \ | |
58 | "st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));" \ | |
59 | "r(1-ld(1)) + b(ld(1))" | |
60 | ||
61 | typedef struct { | |
62 | FFTSample value; | |
63 | int index; | |
64 | } SparseCoeff; | |
65 | ||
66 | typedef struct { | |
67 | const AVClass *class; | |
68 | AVFrame *outpicref; | |
69 | FFTContext *fft_context; | |
70 | FFTComplex *fft_data; | |
71 | FFTComplex *fft_result_left; | |
72 | FFTComplex *fft_result_right; | |
73 | uint8_t *spectogram; | |
74 | SparseCoeff *coeff_sort; | |
75 | SparseCoeff *coeffs[VIDEO_WIDTH]; | |
76 | uint8_t *font_alpha; | |
77 | char *fontfile; /* using freetype */ | |
78 | int coeffs_len[VIDEO_WIDTH]; | |
79 | uint8_t fontcolor_value[VIDEO_WIDTH*3]; /* result of fontcolor option */ | |
80 | int64_t frame_count; | |
81 | int spectogram_count; | |
82 | int spectogram_index; | |
83 | int fft_bits; | |
84 | int req_fullfilled; | |
85 | int remaining_fill; | |
86 | char *tlength; | |
87 | char *volume; | |
88 | char *fontcolor; | |
89 | double timeclamp; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/ | |
90 | float coeffclamp; /* lower coeffclamp, more precise, higher coeffclamp, faster */ | |
91 | int fullhd; /* if true, output video is at full HD resolution, otherwise it will be halved */ | |
92 | float gamma; /* lower gamma, more contrast, higher gamma, more range */ | |
93 | int fps; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */ | |
94 | int count; /* fps * count = transform rate */ | |
95 | } ShowCQTContext; | |
96 | ||
97 | #define OFFSET(x) offsetof(ShowCQTContext, x) | |
98 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM | |
99 | ||
100 | static const AVOption showcqt_options[] = { | |
101 | { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS }, | |
102 | { "tlength", "set transform length", OFFSET(tlength), AV_OPT_TYPE_STRING, { .str = TLENGTH_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS }, | |
103 | { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, | |
104 | { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS }, | |
105 | { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS }, | |
106 | { "fullhd", "set full HD resolution", OFFSET(fullhd), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS }, | |
107 | { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS }, | |
108 | { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS }, | |
109 | { "fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, | |
110 | { "fontcolor", "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS }, | |
111 | { NULL } | |
112 | }; | |
113 | ||
114 | AVFILTER_DEFINE_CLASS(showcqt); | |
115 | ||
116 | static av_cold void uninit(AVFilterContext *ctx) | |
117 | { | |
118 | int k; | |
119 | ||
120 | ShowCQTContext *s = ctx->priv; | |
121 | av_fft_end(s->fft_context); | |
122 | s->fft_context = NULL; | |
123 | for (k = 0; k < VIDEO_WIDTH; k++) | |
124 | av_freep(&s->coeffs[k]); | |
125 | av_freep(&s->fft_data); | |
126 | av_freep(&s->fft_result_left); | |
127 | av_freep(&s->fft_result_right); | |
128 | av_freep(&s->coeff_sort); | |
129 | av_freep(&s->spectogram); | |
130 | av_freep(&s->font_alpha); | |
131 | av_frame_free(&s->outpicref); | |
132 | } | |
133 | ||
134 | static int query_formats(AVFilterContext *ctx) | |
135 | { | |
136 | AVFilterFormats *formats = NULL; | |
137 | AVFilterChannelLayouts *layouts = NULL; | |
138 | AVFilterLink *inlink = ctx->inputs[0]; | |
139 | AVFilterLink *outlink = ctx->outputs[0]; | |
140 | static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }; | |
141 | static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE }; | |
142 | static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 }; | |
143 | static const int samplerates[] = { 44100, 48000, -1 }; | |
144 | ||
145 | /* set input audio formats */ | |
146 | formats = ff_make_format_list(sample_fmts); | |
147 | if (!formats) | |
148 | return AVERROR(ENOMEM); | |
149 | ff_formats_ref(formats, &inlink->out_formats); | |
150 | ||
151 | layouts = avfilter_make_format64_list(channel_layouts); | |
152 | if (!layouts) | |
153 | return AVERROR(ENOMEM); | |
154 | ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts); | |
155 | ||
156 | formats = ff_make_format_list(samplerates); | |
157 | if (!formats) | |
158 | return AVERROR(ENOMEM); | |
159 | ff_formats_ref(formats, &inlink->out_samplerates); | |
160 | ||
161 | /* set output video format */ | |
162 | formats = ff_make_format_list(pix_fmts); | |
163 | if (!formats) | |
164 | return AVERROR(ENOMEM); | |
165 | ff_formats_ref(formats, &outlink->in_formats); | |
166 | ||
167 | return 0; | |
168 | } | |
169 | ||
170 | #if CONFIG_LIBFREETYPE | |
171 | static void load_freetype_font(AVFilterContext *ctx) | |
172 | { | |
173 | static const char str[] = "EF G A BC D "; | |
174 | ShowCQTContext *s = ctx->priv; | |
175 | FT_Library lib = NULL; | |
176 | FT_Face face = NULL; | |
177 | int video_scale = s->fullhd ? 2 : 1; | |
178 | int video_width = (VIDEO_WIDTH/2) * video_scale; | |
179 | int font_height = (FONT_HEIGHT/2) * video_scale; | |
180 | int font_width = 8 * video_scale; | |
181 | int font_repeat = font_width * 12; | |
182 | int linear_hori_advance = font_width * 65536; | |
183 | int non_monospace_warning = 0; | |
184 | int x; | |
185 | ||
186 | s->font_alpha = NULL; | |
187 | ||
188 | if (!s->fontfile) | |
189 | return; | |
190 | ||
191 | if (FT_Init_FreeType(&lib)) | |
192 | goto fail; | |
193 | ||
194 | if (FT_New_Face(lib, s->fontfile, 0, &face)) | |
195 | goto fail; | |
196 | ||
197 | if (FT_Set_Char_Size(face, 16*64, 0, 0, 0)) | |
198 | goto fail; | |
199 | ||
200 | if (FT_Load_Char(face, 'A', FT_LOAD_RENDER)) | |
201 | goto fail; | |
202 | ||
203 | if (FT_Set_Char_Size(face, 16*64 * linear_hori_advance / face->glyph->linearHoriAdvance, 0, 0, 0)) | |
204 | goto fail; | |
205 | ||
206 | s->font_alpha = av_malloc(font_height * video_width); | |
207 | if (!s->font_alpha) | |
208 | goto fail; | |
209 | ||
210 | memset(s->font_alpha, 0, font_height * video_width); | |
211 | ||
212 | for (x = 0; x < 12; x++) { | |
213 | int sx, sy, rx, bx, by, dx, dy; | |
214 | ||
215 | if (str[x] == ' ') | |
216 | continue; | |
217 | ||
218 | if (FT_Load_Char(face, str[x], FT_LOAD_RENDER)) | |
219 | goto fail; | |
220 | ||
221 | if (face->glyph->advance.x != font_width*64 && !non_monospace_warning) { | |
222 | av_log(ctx, AV_LOG_WARNING, "Font is not monospace\n"); | |
223 | non_monospace_warning = 1; | |
224 | } | |
225 | ||
226 | sy = font_height - 4*video_scale - face->glyph->bitmap_top; | |
227 | for (rx = 0; rx < 10; rx++) { | |
228 | sx = rx * font_repeat + x * font_width + face->glyph->bitmap_left; | |
229 | for (by = 0; by < face->glyph->bitmap.rows; by++) { | |
230 | dy = by + sy; | |
231 | if (dy < 0) | |
232 | continue; | |
233 | if (dy >= font_height) | |
234 | break; | |
235 | ||
236 | for (bx = 0; bx < face->glyph->bitmap.width; bx++) { | |
237 | dx = bx + sx; | |
238 | if (dx < 0) | |
239 | continue; | |
240 | if (dx >= video_width) | |
241 | break; | |
242 | s->font_alpha[dy*video_width+dx] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx]; | |
243 | } | |
244 | } | |
245 | } | |
246 | } | |
247 | ||
248 | FT_Done_Face(face); | |
249 | FT_Done_FreeType(lib); | |
250 | return; | |
251 | ||
252 | fail: | |
253 | av_log(ctx, AV_LOG_WARNING, "Error while loading freetype font, using default font instead\n"); | |
254 | FT_Done_Face(face); | |
255 | FT_Done_FreeType(lib); | |
256 | av_freep(&s->font_alpha); | |
257 | return; | |
258 | } | |
259 | #endif | |
260 | ||
261 | static double a_weighting(void *p, double f) | |
262 | { | |
263 | double ret = 12200.0*12200.0 * (f*f*f*f); | |
264 | ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * | |
265 | sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)); | |
266 | return ret; | |
267 | } | |
268 | ||
269 | static double b_weighting(void *p, double f) | |
270 | { | |
271 | double ret = 12200.0*12200.0 * (f*f*f); | |
272 | ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5); | |
273 | return ret; | |
274 | } | |
275 | ||
276 | static double c_weighting(void *p, double f) | |
277 | { | |
278 | double ret = 12200.0*12200.0 * (f*f); | |
279 | ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0); | |
280 | return ret; | |
281 | } | |
282 | ||
283 | static double midi(void *p, double f) | |
284 | { | |
285 | return log2(f/440.0) * 12.0 + 69.0; | |
286 | } | |
287 | ||
288 | static double r_func(void *p, double x) | |
289 | { | |
290 | x = av_clipd(x, 0.0, 1.0); | |
291 | return (int)(x*255.0+0.5) << 16; | |
292 | } | |
293 | ||
294 | static double g_func(void *p, double x) | |
295 | { | |
296 | x = av_clipd(x, 0.0, 1.0); | |
297 | return (int)(x*255.0+0.5) << 8; | |
298 | } | |
299 | ||
300 | static double b_func(void *p, double x) | |
301 | { | |
302 | x = av_clipd(x, 0.0, 1.0); | |
303 | return (int)(x*255.0+0.5); | |
304 | } | |
305 | ||
306 | static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b) | |
307 | { | |
308 | if (fabsf(a->value) >= fabsf(b->value)) | |
309 | return 1; | |
310 | else | |
311 | return -1; | |
312 | } | |
313 | ||
314 | static int config_output(AVFilterLink *outlink) | |
315 | { | |
316 | AVFilterContext *ctx = outlink->src; | |
317 | AVFilterLink *inlink = ctx->inputs[0]; | |
318 | ShowCQTContext *s = ctx->priv; | |
319 | AVExpr *tlength_expr = NULL, *volume_expr = NULL, *fontcolor_expr = NULL; | |
320 | uint8_t *fontcolor_value = s->fontcolor_value; | |
321 | static const char * const expr_vars[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL }; | |
322 | static const char * const expr_func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL }; | |
323 | static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL }; | |
324 | static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL }; | |
325 | static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL }; | |
326 | int fft_len, k, x, y, ret; | |
327 | int num_coeffs = 0; | |
328 | int rate = inlink->sample_rate; | |
329 | double max_len = rate * (double) s->timeclamp; | |
330 | int64_t start_time, end_time; | |
331 | int video_scale = s->fullhd ? 2 : 1; | |
332 | int video_width = (VIDEO_WIDTH/2) * video_scale; | |
333 | int video_height = (VIDEO_HEIGHT/2) * video_scale; | |
334 | int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; | |
335 | ||
336 | s->fft_bits = ceil(log2(max_len)); | |
337 | fft_len = 1 << s->fft_bits; | |
338 | ||
339 | if (rate % (s->fps * s->count)) { | |
340 | av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count); | |
341 | return AVERROR(EINVAL); | |
342 | } | |
343 | ||
344 | s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data)); | |
345 | s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort)); | |
346 | s->fft_result_left = av_malloc_array(fft_len, sizeof(*s->fft_result_left)); | |
347 | s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right)); | |
348 | s->fft_context = av_fft_init(s->fft_bits, 0); | |
349 | ||
350 | if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context) | |
351 | return AVERROR(ENOMEM); | |
352 | ||
353 | #if CONFIG_LIBFREETYPE | |
354 | load_freetype_font(ctx); | |
355 | #else | |
356 | if (s->fontfile) | |
357 | av_log(ctx, AV_LOG_WARNING, "Freetype is not available, ignoring fontfile option\n"); | |
358 | s->font_alpha = NULL; | |
359 | #endif | |
360 | ||
361 | av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n"); | |
362 | start_time = av_gettime_relative(); | |
363 | ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx); | |
364 | if (ret < 0) | |
365 | goto eval_error; | |
366 | ||
367 | ret = av_expr_parse(&volume_expr, s->volume, expr_vars, expr_func_names, | |
368 | expr_funcs, NULL, NULL, 0, ctx); | |
369 | if (ret < 0) | |
370 | goto eval_error; | |
371 | ||
372 | ret = av_expr_parse(&fontcolor_expr, s->fontcolor, expr_vars, expr_fontcolor_func_names, | |
373 | expr_fontcolor_funcs, NULL, NULL, 0, ctx); | |
374 | if (ret < 0) | |
375 | goto eval_error; | |
376 | ||
377 | for (k = 0; k < VIDEO_WIDTH; k++) { | |
378 | int hlen = fft_len >> 1; | |
379 | float total = 0; | |
380 | float partial = 0; | |
381 | double freq = BASE_FREQ * exp2(k * (1.0/192.0)); | |
382 | double tlen, tlength, volume; | |
383 | double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 }; | |
384 | /* a window function from Albert H. Nuttall, | |
385 | * "Some Windows with Very Good Sidelobe Behavior" | |
386 | * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay | |
387 | * coefficient normalized to a0 = 1 */ | |
388 | double a0 = 0.355768; | |
389 | double a1 = 0.487396/a0; | |
390 | double a2 = 0.144232/a0; | |
391 | double a3 = 0.012604/a0; | |
392 | double sv_step, cv_step, sv, cv; | |
393 | double sw_step, cw_step, sw, cw, w; | |
394 | ||
395 | tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL); | |
396 | if (isnan(tlength)) { | |
397 | av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is nan, setting it to %g\n", freq, s->timeclamp); | |
398 | tlength = s->timeclamp; | |
399 | } else if (tlength < TLENGTH_MIN) { | |
400 | av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, TLENGTH_MIN); | |
401 | tlength = TLENGTH_MIN; | |
402 | } else if (tlength > s->timeclamp) { | |
403 | av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, s->timeclamp); | |
404 | tlength = s->timeclamp; | |
405 | } | |
406 | ||
407 | volume = FFABS(av_expr_eval(volume_expr, expr_vars_val, NULL)); | |
408 | if (isnan(volume)) { | |
409 | av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq); | |
410 | volume = VOLUME_MIN; | |
411 | } else if (volume < VOLUME_MIN) { | |
412 | volume = VOLUME_MIN; | |
413 | } else if (volume > VOLUME_MAX) { | |
414 | av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is %g, setting it to %g\n", freq, volume, VOLUME_MAX); | |
415 | volume = VOLUME_MAX; | |
416 | } | |
417 | ||
418 | if (s->fullhd || !(k & 1)) { | |
419 | int fontcolor = av_expr_eval(fontcolor_expr, expr_vars_val, NULL); | |
420 | fontcolor_value[0] = (fontcolor >> 16) & 0xFF; | |
421 | fontcolor_value[1] = (fontcolor >> 8) & 0xFF; | |
422 | fontcolor_value[2] = fontcolor & 0xFF; | |
423 | fontcolor_value += 3; | |
424 | } | |
425 | ||
426 | tlen = tlength * rate; | |
427 | s->fft_data[0].re = 0; | |
428 | s->fft_data[0].im = 0; | |
429 | s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * volume * (1.0/fft_len); | |
430 | s->fft_data[hlen].im = 0; | |
431 | sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate)); | |
432 | cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate)); | |
433 | /* also optimizing window func */ | |
434 | sw_step = sw = sin(2.0*M_PI*(1.0/tlen)); | |
435 | cw_step = cw = cos(2.0*M_PI*(1.0/tlen)); | |
436 | for (x = 1; x < 0.5 * tlen; x++) { | |
437 | double cv_tmp, cw_tmp; | |
438 | double cw2, cw3, sw2; | |
439 | ||
440 | cw2 = cw * cw - sw * sw; | |
441 | sw2 = cw * sw + sw * cw; | |
442 | cw3 = cw * cw2 - sw * sw2; | |
443 | w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * volume * (1.0/fft_len); | |
444 | s->fft_data[hlen + x].re = w * cv; | |
445 | s->fft_data[hlen + x].im = w * sv; | |
446 | s->fft_data[hlen - x].re = s->fft_data[hlen + x].re; | |
447 | s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im; | |
448 | ||
449 | cv_tmp = cv * cv_step - sv * sv_step; | |
450 | sv = sv * cv_step + cv * sv_step; | |
451 | cv = cv_tmp; | |
452 | cw_tmp = cw * cw_step - sw * sw_step; | |
453 | sw = sw * cw_step + cw * sw_step; | |
454 | cw = cw_tmp; | |
455 | } | |
456 | for (; x < hlen; x++) { | |
457 | s->fft_data[hlen + x].re = 0; | |
458 | s->fft_data[hlen + x].im = 0; | |
459 | s->fft_data[hlen - x].re = 0; | |
460 | s->fft_data[hlen - x].im = 0; | |
461 | } | |
462 | av_fft_permute(s->fft_context, s->fft_data); | |
463 | av_fft_calc(s->fft_context, s->fft_data); | |
464 | ||
465 | for (x = 0; x < fft_len; x++) { | |
466 | s->coeff_sort[x].index = x; | |
467 | s->coeff_sort[x].value = s->fft_data[x].re; | |
468 | } | |
469 | ||
470 | AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff); | |
471 | for (x = 0; x < fft_len; x++) | |
472 | total += fabsf(s->coeff_sort[x].value); | |
473 | ||
474 | for (x = 0; x < fft_len; x++) { | |
475 | partial += fabsf(s->coeff_sort[x].value); | |
476 | if (partial > total * s->coeffclamp * COEFF_CLAMP) { | |
477 | s->coeffs_len[k] = fft_len - x; | |
478 | num_coeffs += s->coeffs_len[k]; | |
479 | s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k])); | |
480 | if (!s->coeffs[k]) { | |
481 | ret = AVERROR(ENOMEM); | |
482 | goto eval_error; | |
483 | } | |
484 | for (y = 0; y < s->coeffs_len[k]; y++) | |
485 | s->coeffs[k][y] = s->coeff_sort[x+y]; | |
486 | break; | |
487 | } | |
488 | } | |
489 | } | |
490 | av_expr_free(fontcolor_expr); | |
491 | av_expr_free(volume_expr); | |
492 | av_expr_free(tlength_expr); | |
493 | end_time = av_gettime_relative(); | |
494 | av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs); | |
495 | ||
496 | outlink->w = video_width; | |
497 | outlink->h = video_height; | |
498 | ||
499 | s->req_fullfilled = 0; | |
500 | s->spectogram_index = 0; | |
501 | s->frame_count = 0; | |
502 | s->spectogram_count = 0; | |
503 | s->remaining_fill = fft_len >> 1; | |
504 | memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data)); | |
505 | ||
506 | s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); | |
507 | if (!s->outpicref) | |
508 | return AVERROR(ENOMEM); | |
509 | ||
510 | s->spectogram = av_calloc(spectogram_height, s->outpicref->linesize[0]); | |
511 | if (!s->spectogram) | |
512 | return AVERROR(ENOMEM); | |
513 | ||
514 | outlink->sample_aspect_ratio = av_make_q(1, 1); | |
515 | outlink->time_base = av_make_q(1, s->fps); | |
516 | outlink->frame_rate = av_make_q(s->fps, 1); | |
517 | return 0; | |
518 | ||
519 | eval_error: | |
520 | av_expr_free(fontcolor_expr); | |
521 | av_expr_free(volume_expr); | |
522 | av_expr_free(tlength_expr); | |
523 | return ret; | |
524 | } | |
525 | ||
526 | static int plot_cqt(AVFilterLink *inlink) | |
527 | { | |
528 | AVFilterContext *ctx = inlink->dst; | |
529 | ShowCQTContext *s = ctx->priv; | |
530 | AVFilterLink *outlink = ctx->outputs[0]; | |
531 | int fft_len = 1 << s->fft_bits; | |
532 | FFTSample result[VIDEO_WIDTH][4]; | |
533 | int x, y, ret = 0; | |
534 | int linesize = s->outpicref->linesize[0]; | |
535 | int video_scale = s->fullhd ? 2 : 1; | |
536 | int video_width = (VIDEO_WIDTH/2) * video_scale; | |
537 | int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; | |
538 | int spectogram_start = (SPECTOGRAM_START/2) * video_scale; | |
539 | int font_height = (FONT_HEIGHT/2) * video_scale; | |
540 | ||
541 | /* real part contains left samples, imaginary part contains right samples */ | |
542 | memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data)); | |
543 | av_fft_permute(s->fft_context, s->fft_result_left); | |
544 | av_fft_calc(s->fft_context, s->fft_result_left); | |
545 | ||
546 | /* separate left and right, (and multiply by 2.0) */ | |
547 | s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im; | |
548 | s->fft_result_right[0].im = 0; | |
549 | s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re; | |
550 | s->fft_result_left[0].im = 0; | |
551 | for (x = 1; x <= fft_len >> 1; x++) { | |
552 | FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im; | |
553 | ||
554 | s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im; | |
555 | s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re; | |
556 | s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re; | |
557 | s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im; | |
558 | ||
559 | s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re; | |
560 | s->fft_result_left[x].im = tmpy; | |
561 | s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re; | |
562 | s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im; | |
563 | } | |
564 | ||
565 | /* calculating cqt */ | |
566 | for (x = 0; x < VIDEO_WIDTH; x++) { | |
567 | int u; | |
568 | float g = 1.0f / s->gamma; | |
569 | FFTComplex l = {0,0}; | |
570 | FFTComplex r = {0,0}; | |
571 | ||
572 | for (u = 0; u < s->coeffs_len[x]; u++) { | |
573 | FFTSample value = s->coeffs[x][u].value; | |
574 | int index = s->coeffs[x][u].index; | |
575 | l.re += value * s->fft_result_left[index].re; | |
576 | l.im += value * s->fft_result_left[index].im; | |
577 | r.re += value * s->fft_result_right[index].re; | |
578 | r.im += value * s->fft_result_right[index].im; | |
579 | } | |
580 | /* result is power, not amplitude */ | |
581 | result[x][0] = l.re * l.re + l.im * l.im; | |
582 | result[x][2] = r.re * r.re + r.im * r.im; | |
583 | result[x][1] = 0.5f * (result[x][0] + result[x][2]); | |
584 | result[x][3] = result[x][1]; | |
585 | result[x][0] = 255.0f * powf(FFMIN(1.0f,result[x][0]), g); | |
586 | result[x][1] = 255.0f * powf(FFMIN(1.0f,result[x][1]), g); | |
587 | result[x][2] = 255.0f * powf(FFMIN(1.0f,result[x][2]), g); | |
588 | } | |
589 | ||
590 | if (!s->fullhd) { | |
591 | for (x = 0; x < video_width; x++) { | |
592 | result[x][0] = 0.5f * (result[2*x][0] + result[2*x+1][0]); | |
593 | result[x][1] = 0.5f * (result[2*x][1] + result[2*x+1][1]); | |
594 | result[x][2] = 0.5f * (result[2*x][2] + result[2*x+1][2]); | |
595 | result[x][3] = 0.5f * (result[2*x][3] + result[2*x+1][3]); | |
596 | } | |
597 | } | |
598 | ||
599 | for (x = 0; x < video_width; x++) { | |
600 | s->spectogram[s->spectogram_index*linesize + 3*x] = result[x][0] + 0.5f; | |
601 | s->spectogram[s->spectogram_index*linesize + 3*x + 1] = result[x][1] + 0.5f; | |
602 | s->spectogram[s->spectogram_index*linesize + 3*x + 2] = result[x][2] + 0.5f; | |
603 | } | |
604 | ||
605 | /* drawing */ | |
606 | if (!s->spectogram_count) { | |
607 | uint8_t *data = (uint8_t*) s->outpicref->data[0]; | |
608 | float rcp_result[VIDEO_WIDTH]; | |
609 | int total_length = linesize * spectogram_height; | |
610 | int back_length = linesize * s->spectogram_index; | |
611 | ||
612 | for (x = 0; x < video_width; x++) | |
613 | rcp_result[x] = 1.0f / (result[x][3]+0.0001f); | |
614 | ||
615 | /* drawing bar */ | |
616 | for (y = 0; y < spectogram_height; y++) { | |
617 | float height = (spectogram_height - y) * (1.0f/spectogram_height); | |
618 | uint8_t *lineptr = data + y * linesize; | |
619 | for (x = 0; x < video_width; x++) { | |
620 | float mul; | |
621 | if (result[x][3] <= height) { | |
622 | *lineptr++ = 0; | |
623 | *lineptr++ = 0; | |
624 | *lineptr++ = 0; | |
625 | } else { | |
626 | mul = (result[x][3] - height) * rcp_result[x]; | |
627 | *lineptr++ = mul * result[x][0] + 0.5f; | |
628 | *lineptr++ = mul * result[x][1] + 0.5f; | |
629 | *lineptr++ = mul * result[x][2] + 0.5f; | |
630 | } | |
631 | } | |
632 | } | |
633 | ||
634 | /* drawing font */ | |
635 | if (s->font_alpha) { | |
636 | for (y = 0; y < font_height; y++) { | |
637 | uint8_t *lineptr = data + (spectogram_height + y) * linesize; | |
638 | uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize; | |
639 | uint8_t *fontcolor_value = s->fontcolor_value; | |
640 | for (x = 0; x < video_width; x++) { | |
641 | uint8_t alpha = s->font_alpha[y*video_width+x]; | |
642 | lineptr[3*x] = (spectogram_src[3*x] * (255-alpha) + fontcolor_value[0] * alpha + 255) >> 8; | |
643 | lineptr[3*x+1] = (spectogram_src[3*x+1] * (255-alpha) + fontcolor_value[1] * alpha + 255) >> 8; | |
644 | lineptr[3*x+2] = (spectogram_src[3*x+2] * (255-alpha) + fontcolor_value[2] * alpha + 255) >> 8; | |
645 | fontcolor_value += 3; | |
646 | } | |
647 | } | |
648 | } else { | |
649 | for (y = 0; y < font_height; y++) { | |
650 | uint8_t *lineptr = data + (spectogram_height + y) * linesize; | |
651 | memcpy(lineptr, s->spectogram + s->spectogram_index * linesize, video_width*3); | |
652 | } | |
653 | for (x = 0; x < video_width; x += video_width/10) { | |
654 | int u; | |
655 | static const char str[] = "EF G A BC D "; | |
656 | uint8_t *startptr = data + spectogram_height * linesize + x * 3; | |
657 | for (u = 0; str[u]; u++) { | |
658 | int v; | |
659 | for (v = 0; v < 16; v++) { | |
660 | uint8_t *p = startptr + v * linesize * video_scale + 8 * 3 * u * video_scale; | |
661 | int ux = x + 8 * u * video_scale; | |
662 | int mask; | |
663 | for (mask = 0x80; mask; mask >>= 1) { | |
664 | if (mask & avpriv_vga16_font[str[u] * 16 + v]) { | |
665 | p[0] = s->fontcolor_value[3*ux]; | |
666 | p[1] = s->fontcolor_value[3*ux+1]; | |
667 | p[2] = s->fontcolor_value[3*ux+2]; | |
668 | if (video_scale == 2) { | |
669 | p[linesize] = p[0]; | |
670 | p[linesize+1] = p[1]; | |
671 | p[linesize+2] = p[2]; | |
672 | p[3] = p[linesize+3] = s->fontcolor_value[3*ux+3]; | |
673 | p[4] = p[linesize+4] = s->fontcolor_value[3*ux+4]; | |
674 | p[5] = p[linesize+5] = s->fontcolor_value[3*ux+5]; | |
675 | } | |
676 | } | |
677 | p += 3 * video_scale; | |
678 | ux += video_scale; | |
679 | } | |
680 | } | |
681 | } | |
682 | } | |
683 | } | |
684 | ||
685 | /* drawing spectogram/sonogram */ | |
686 | data += spectogram_start * linesize; | |
687 | memcpy(data, s->spectogram + s->spectogram_index*linesize, total_length - back_length); | |
688 | ||
689 | data += total_length - back_length; | |
690 | if (back_length) | |
691 | memcpy(data, s->spectogram, back_length); | |
692 | ||
693 | s->outpicref->pts = s->frame_count; | |
694 | ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref)); | |
695 | s->req_fullfilled = 1; | |
696 | s->frame_count++; | |
697 | } | |
698 | s->spectogram_count = (s->spectogram_count + 1) % s->count; | |
699 | s->spectogram_index = (s->spectogram_index + spectogram_height - 1) % spectogram_height; | |
700 | return ret; | |
701 | } | |
702 | ||
703 | static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) | |
704 | { | |
705 | AVFilterContext *ctx = inlink->dst; | |
706 | ShowCQTContext *s = ctx->priv; | |
707 | int step = inlink->sample_rate / (s->fps * s->count); | |
708 | int fft_len = 1 << s->fft_bits; | |
709 | int remaining; | |
710 | float *audio_data; | |
711 | ||
712 | if (!insamples) { | |
713 | while (s->remaining_fill < (fft_len >> 1)) { | |
714 | int ret, x; | |
715 | memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); | |
716 | ret = plot_cqt(inlink); | |
717 | if (ret < 0) | |
718 | return ret; | |
719 | for (x = 0; x < (fft_len-step); x++) | |
720 | s->fft_data[x] = s->fft_data[x+step]; | |
721 | s->remaining_fill += step; | |
722 | } | |
723 | return AVERROR(EOF); | |
724 | } | |
725 | ||
726 | remaining = insamples->nb_samples; | |
727 | audio_data = (float*) insamples->data[0]; | |
728 | ||
729 | while (remaining) { | |
730 | if (remaining >= s->remaining_fill) { | |
731 | int i = insamples->nb_samples - remaining; | |
732 | int j = fft_len - s->remaining_fill; | |
733 | int m, ret; | |
734 | for (m = 0; m < s->remaining_fill; m++) { | |
735 | s->fft_data[j+m].re = audio_data[2*(i+m)]; | |
736 | s->fft_data[j+m].im = audio_data[2*(i+m)+1]; | |
737 | } | |
738 | ret = plot_cqt(inlink); | |
739 | if (ret < 0) { | |
740 | av_frame_free(&insamples); | |
741 | return ret; | |
742 | } | |
743 | remaining -= s->remaining_fill; | |
744 | for (m = 0; m < fft_len-step; m++) | |
745 | s->fft_data[m] = s->fft_data[m+step]; | |
746 | s->remaining_fill = step; | |
747 | } else { | |
748 | int i = insamples->nb_samples - remaining; | |
749 | int j = fft_len - s->remaining_fill; | |
750 | int m; | |
751 | for (m = 0; m < remaining; m++) { | |
752 | s->fft_data[m+j].re = audio_data[2*(i+m)]; | |
753 | s->fft_data[m+j].im = audio_data[2*(i+m)+1]; | |
754 | } | |
755 | s->remaining_fill -= remaining; | |
756 | remaining = 0; | |
757 | } | |
758 | } | |
759 | av_frame_free(&insamples); | |
760 | return 0; | |
761 | } | |
762 | ||
763 | static int request_frame(AVFilterLink *outlink) | |
764 | { | |
765 | ShowCQTContext *s = outlink->src->priv; | |
766 | AVFilterLink *inlink = outlink->src->inputs[0]; | |
767 | int ret; | |
768 | ||
769 | s->req_fullfilled = 0; | |
770 | do { | |
771 | ret = ff_request_frame(inlink); | |
772 | } while (!s->req_fullfilled && ret >= 0); | |
773 | ||
774 | if (ret == AVERROR_EOF && s->outpicref) | |
775 | filter_frame(inlink, NULL); | |
776 | return ret; | |
777 | } | |
778 | ||
779 | static const AVFilterPad showcqt_inputs[] = { | |
780 | { | |
781 | .name = "default", | |
782 | .type = AVMEDIA_TYPE_AUDIO, | |
783 | .filter_frame = filter_frame, | |
784 | }, | |
785 | { NULL } | |
786 | }; | |
787 | ||
788 | static const AVFilterPad showcqt_outputs[] = { | |
789 | { | |
790 | .name = "default", | |
791 | .type = AVMEDIA_TYPE_VIDEO, | |
792 | .config_props = config_output, | |
793 | .request_frame = request_frame, | |
794 | }, | |
795 | { NULL } | |
796 | }; | |
797 | ||
798 | AVFilter ff_avf_showcqt = { | |
799 | .name = "showcqt", | |
800 | .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."), | |
801 | .uninit = uninit, | |
802 | .query_formats = query_formats, | |
803 | .priv_size = sizeof(ShowCQTContext), | |
804 | .inputs = showcqt_inputs, | |
805 | .outputs = showcqt_outputs, | |
806 | .priv_class = &showcqt_class, | |
807 | }; |