Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2013-2014 Clément Bœsch | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | /** | |
22 | * A simple, relatively efficient and slow DCT image denoiser. | |
23 | * | |
24 | * @see http://www.ipol.im/pub/art/2011/ys-dct/ | |
25 | * | |
26 | * The DCT factorization used is based on "Fast and numerically stable | |
27 | * algorithms for discrete cosine transforms" from Gerlind Plonkaa & Manfred | |
28 | * Tasche (DOI: 10.1016/j.laa.2004.07.015). | |
29 | */ | |
30 | ||
31 | #include "libavutil/avassert.h" | |
32 | #include "libavutil/eval.h" | |
33 | #include "libavutil/opt.h" | |
34 | #include "internal.h" | |
35 | ||
36 | static const char *const var_names[] = { "c", NULL }; | |
37 | enum { VAR_C, VAR_VARS_NB }; | |
38 | ||
39 | #define MAX_THREADS 8 | |
40 | ||
41 | typedef struct DCTdnoizContext { | |
42 | const AVClass *class; | |
43 | ||
44 | /* coefficient factor expression */ | |
45 | char *expr_str; | |
46 | AVExpr *expr[MAX_THREADS]; | |
47 | double var_values[MAX_THREADS][VAR_VARS_NB]; | |
48 | ||
49 | int nb_threads; | |
50 | int pr_width, pr_height; // width and height to process | |
51 | float sigma; // used when no expression are st | |
52 | float th; // threshold (3*sigma) | |
53 | float *cbuf[2][3]; // two planar rgb color buffers | |
54 | float *slices[MAX_THREADS]; // slices buffers (1 slice buffer per thread) | |
55 | float *weights; // dct coeff are cumulated with overlapping; these values are used for averaging | |
56 | int p_linesize; // line sizes for color and weights | |
57 | int overlap; // number of block overlapping pixels | |
58 | int step; // block step increment (blocksize - overlap) | |
59 | int n; // 1<<n is the block size | |
60 | int bsize; // block size, 1<<n | |
61 | void (*filter_freq_func)(struct DCTdnoizContext *s, | |
62 | const float *src, int src_linesize, | |
63 | float *dst, int dst_linesize, | |
64 | int thread_id); | |
65 | void (*color_decorrelation)(float **dst, int dst_linesize, | |
66 | const uint8_t *src, int src_linesize, | |
67 | int w, int h); | |
68 | void (*color_correlation)(uint8_t *dst, int dst_linesize, | |
69 | float **src, int src_linesize, | |
70 | int w, int h); | |
71 | } DCTdnoizContext; | |
72 | ||
73 | #define MIN_NBITS 3 /* blocksize = 1<<3 = 8 */ | |
74 | #define MAX_NBITS 4 /* blocksize = 1<<4 = 16 */ | |
75 | #define DEFAULT_NBITS 3 | |
76 | ||
77 | #define OFFSET(x) offsetof(DCTdnoizContext, x) | |
78 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM | |
79 | static const AVOption dctdnoiz_options[] = { | |
80 | { "sigma", "set noise sigma constant", OFFSET(sigma), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, 999, .flags = FLAGS }, | |
81 | { "s", "set noise sigma constant", OFFSET(sigma), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, 999, .flags = FLAGS }, | |
82 | { "overlap", "set number of block overlapping pixels", OFFSET(overlap), AV_OPT_TYPE_INT, {.i64=-1}, -1, (1<<MAX_NBITS)-1, .flags = FLAGS }, | |
83 | { "expr", "set coefficient factor expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, | |
84 | { "e", "set coefficient factor expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, | |
85 | { "n", "set the block size, expressed in bits", OFFSET(n), AV_OPT_TYPE_INT, {.i64=DEFAULT_NBITS}, MIN_NBITS, MAX_NBITS, .flags = FLAGS }, | |
86 | { NULL } | |
87 | }; | |
88 | ||
89 | AVFILTER_DEFINE_CLASS(dctdnoiz); | |
90 | ||
91 | static void av_always_inline fdct8_1d(float *dst, const float *src, | |
92 | int dst_stridea, int dst_strideb, | |
93 | int src_stridea, int src_strideb) | |
94 | { | |
95 | int i; | |
96 | ||
97 | for (i = 0; i < 8; i++) { | |
98 | const float x00 = src[0*src_stridea] + src[7*src_stridea]; | |
99 | const float x01 = src[1*src_stridea] + src[6*src_stridea]; | |
100 | const float x02 = src[2*src_stridea] + src[5*src_stridea]; | |
101 | const float x03 = src[3*src_stridea] + src[4*src_stridea]; | |
102 | const float x04 = src[0*src_stridea] - src[7*src_stridea]; | |
103 | const float x05 = src[1*src_stridea] - src[6*src_stridea]; | |
104 | const float x06 = src[2*src_stridea] - src[5*src_stridea]; | |
105 | const float x07 = src[3*src_stridea] - src[4*src_stridea]; | |
106 | const float x08 = x00 + x03; | |
107 | const float x09 = x01 + x02; | |
108 | const float x0a = x00 - x03; | |
109 | const float x0b = x01 - x02; | |
110 | const float x0c = 1.38703984532215f*x04 + 0.275899379282943f*x07; | |
111 | const float x0d = 1.17587560241936f*x05 + 0.785694958387102f*x06; | |
112 | const float x0e = -0.785694958387102f*x05 + 1.17587560241936f*x06; | |
113 | const float x0f = 0.275899379282943f*x04 - 1.38703984532215f*x07; | |
114 | const float x10 = 0.353553390593274f * (x0c - x0d); | |
115 | const float x11 = 0.353553390593274f * (x0e - x0f); | |
116 | dst[0*dst_stridea] = 0.353553390593274f * (x08 + x09); | |
117 | dst[1*dst_stridea] = 0.353553390593274f * (x0c + x0d); | |
118 | dst[2*dst_stridea] = 0.461939766255643f*x0a + 0.191341716182545f*x0b; | |
119 | dst[3*dst_stridea] = 0.707106781186547f * (x10 - x11); | |
120 | dst[4*dst_stridea] = 0.353553390593274f * (x08 - x09); | |
121 | dst[5*dst_stridea] = 0.707106781186547f * (x10 + x11); | |
122 | dst[6*dst_stridea] = 0.191341716182545f*x0a - 0.461939766255643f*x0b; | |
123 | dst[7*dst_stridea] = 0.353553390593274f * (x0e + x0f); | |
124 | dst += dst_strideb; | |
125 | src += src_strideb; | |
126 | } | |
127 | } | |
128 | ||
129 | static void av_always_inline idct8_1d(float *dst, const float *src, | |
130 | int dst_stridea, int dst_strideb, | |
131 | int src_stridea, int src_strideb, | |
132 | int add) | |
133 | { | |
134 | int i; | |
135 | ||
136 | for (i = 0; i < 8; i++) { | |
137 | const float x00 = 1.4142135623731f *src[0*src_stridea]; | |
138 | const float x01 = 1.38703984532215f *src[1*src_stridea] + 0.275899379282943f*src[7*src_stridea]; | |
139 | const float x02 = 1.30656296487638f *src[2*src_stridea] + 0.541196100146197f*src[6*src_stridea]; | |
140 | const float x03 = 1.17587560241936f *src[3*src_stridea] + 0.785694958387102f*src[5*src_stridea]; | |
141 | const float x04 = 1.4142135623731f *src[4*src_stridea]; | |
142 | const float x05 = -0.785694958387102f*src[3*src_stridea] + 1.17587560241936f*src[5*src_stridea]; | |
143 | const float x06 = 0.541196100146197f*src[2*src_stridea] - 1.30656296487638f*src[6*src_stridea]; | |
144 | const float x07 = -0.275899379282943f*src[1*src_stridea] + 1.38703984532215f*src[7*src_stridea]; | |
145 | const float x09 = x00 + x04; | |
146 | const float x0a = x01 + x03; | |
147 | const float x0b = 1.4142135623731f*x02; | |
148 | const float x0c = x00 - x04; | |
149 | const float x0d = x01 - x03; | |
150 | const float x0e = 0.353553390593274f * (x09 - x0b); | |
151 | const float x0f = 0.353553390593274f * (x0c + x0d); | |
152 | const float x10 = 0.353553390593274f * (x0c - x0d); | |
153 | const float x11 = 1.4142135623731f*x06; | |
154 | const float x12 = x05 + x07; | |
155 | const float x13 = x05 - x07; | |
156 | const float x14 = 0.353553390593274f * (x11 + x12); | |
157 | const float x15 = 0.353553390593274f * (x11 - x12); | |
158 | const float x16 = 0.5f*x13; | |
159 | dst[0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.25f * (x09 + x0b) + 0.353553390593274f*x0a; | |
160 | dst[1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x0f + x15); | |
161 | dst[2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x0f - x15); | |
162 | dst[3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x0e + x16); | |
163 | dst[4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x0e - x16); | |
164 | dst[5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x10 - x14); | |
165 | dst[6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x10 + x14); | |
166 | dst[7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.25f * (x09 + x0b) - 0.353553390593274f*x0a; | |
167 | dst += dst_strideb; | |
168 | src += src_strideb; | |
169 | } | |
170 | } | |
171 | ||
172 | ||
173 | static void av_always_inline fdct16_1d(float *dst, const float *src, | |
174 | int dst_stridea, int dst_strideb, | |
175 | int src_stridea, int src_strideb) | |
176 | { | |
177 | int i; | |
178 | ||
179 | for (i = 0; i < 16; i++) { | |
180 | const float x00 = src[ 0*src_stridea] + src[15*src_stridea]; | |
181 | const float x01 = src[ 1*src_stridea] + src[14*src_stridea]; | |
182 | const float x02 = src[ 2*src_stridea] + src[13*src_stridea]; | |
183 | const float x03 = src[ 3*src_stridea] + src[12*src_stridea]; | |
184 | const float x04 = src[ 4*src_stridea] + src[11*src_stridea]; | |
185 | const float x05 = src[ 5*src_stridea] + src[10*src_stridea]; | |
186 | const float x06 = src[ 6*src_stridea] + src[ 9*src_stridea]; | |
187 | const float x07 = src[ 7*src_stridea] + src[ 8*src_stridea]; | |
188 | const float x08 = src[ 0*src_stridea] - src[15*src_stridea]; | |
189 | const float x09 = src[ 1*src_stridea] - src[14*src_stridea]; | |
190 | const float x0a = src[ 2*src_stridea] - src[13*src_stridea]; | |
191 | const float x0b = src[ 3*src_stridea] - src[12*src_stridea]; | |
192 | const float x0c = src[ 4*src_stridea] - src[11*src_stridea]; | |
193 | const float x0d = src[ 5*src_stridea] - src[10*src_stridea]; | |
194 | const float x0e = src[ 6*src_stridea] - src[ 9*src_stridea]; | |
195 | const float x0f = src[ 7*src_stridea] - src[ 8*src_stridea]; | |
196 | const float x10 = x00 + x07; | |
197 | const float x11 = x01 + x06; | |
198 | const float x12 = x02 + x05; | |
199 | const float x13 = x03 + x04; | |
200 | const float x14 = x00 - x07; | |
201 | const float x15 = x01 - x06; | |
202 | const float x16 = x02 - x05; | |
203 | const float x17 = x03 - x04; | |
204 | const float x18 = x10 + x13; | |
205 | const float x19 = x11 + x12; | |
206 | const float x1a = x10 - x13; | |
207 | const float x1b = x11 - x12; | |
208 | const float x1c = 1.38703984532215f*x14 + 0.275899379282943f*x17; | |
209 | const float x1d = 1.17587560241936f*x15 + 0.785694958387102f*x16; | |
210 | const float x1e = -0.785694958387102f*x15 + 1.17587560241936f *x16; | |
211 | const float x1f = 0.275899379282943f*x14 - 1.38703984532215f *x17; | |
212 | const float x20 = 0.25f * (x1c - x1d); | |
213 | const float x21 = 0.25f * (x1e - x1f); | |
214 | const float x22 = 1.40740373752638f *x08 + 0.138617169199091f*x0f; | |
215 | const float x23 = 1.35331800117435f *x09 + 0.410524527522357f*x0e; | |
216 | const float x24 = 1.24722501298667f *x0a + 0.666655658477747f*x0d; | |
217 | const float x25 = 1.09320186700176f *x0b + 0.897167586342636f*x0c; | |
218 | const float x26 = -0.897167586342636f*x0b + 1.09320186700176f *x0c; | |
219 | const float x27 = 0.666655658477747f*x0a - 1.24722501298667f *x0d; | |
220 | const float x28 = -0.410524527522357f*x09 + 1.35331800117435f *x0e; | |
221 | const float x29 = 0.138617169199091f*x08 - 1.40740373752638f *x0f; | |
222 | const float x2a = x22 + x25; | |
223 | const float x2b = x23 + x24; | |
224 | const float x2c = x22 - x25; | |
225 | const float x2d = x23 - x24; | |
226 | const float x2e = 0.25f * (x2a - x2b); | |
227 | const float x2f = 0.326640741219094f*x2c + 0.135299025036549f*x2d; | |
228 | const float x30 = 0.135299025036549f*x2c - 0.326640741219094f*x2d; | |
229 | const float x31 = x26 + x29; | |
230 | const float x32 = x27 + x28; | |
231 | const float x33 = x26 - x29; | |
232 | const float x34 = x27 - x28; | |
233 | const float x35 = 0.25f * (x31 - x32); | |
234 | const float x36 = 0.326640741219094f*x33 + 0.135299025036549f*x34; | |
235 | const float x37 = 0.135299025036549f*x33 - 0.326640741219094f*x34; | |
236 | dst[ 0*dst_stridea] = 0.25f * (x18 + x19); | |
237 | dst[ 1*dst_stridea] = 0.25f * (x2a + x2b); | |
238 | dst[ 2*dst_stridea] = 0.25f * (x1c + x1d); | |
239 | dst[ 3*dst_stridea] = 0.707106781186547f * (x2f - x37); | |
240 | dst[ 4*dst_stridea] = 0.326640741219094f*x1a + 0.135299025036549f*x1b; | |
241 | dst[ 5*dst_stridea] = 0.707106781186547f * (x2f + x37); | |
242 | dst[ 6*dst_stridea] = 0.707106781186547f * (x20 - x21); | |
243 | dst[ 7*dst_stridea] = 0.707106781186547f * (x2e + x35); | |
244 | dst[ 8*dst_stridea] = 0.25f * (x18 - x19); | |
245 | dst[ 9*dst_stridea] = 0.707106781186547f * (x2e - x35); | |
246 | dst[10*dst_stridea] = 0.707106781186547f * (x20 + x21); | |
247 | dst[11*dst_stridea] = 0.707106781186547f * (x30 - x36); | |
248 | dst[12*dst_stridea] = 0.135299025036549f*x1a - 0.326640741219094f*x1b; | |
249 | dst[13*dst_stridea] = 0.707106781186547f * (x30 + x36); | |
250 | dst[14*dst_stridea] = 0.25f * (x1e + x1f); | |
251 | dst[15*dst_stridea] = 0.25f * (x31 + x32); | |
252 | dst += dst_strideb; | |
253 | src += src_strideb; | |
254 | } | |
255 | } | |
256 | ||
257 | static void av_always_inline idct16_1d(float *dst, const float *src, | |
258 | int dst_stridea, int dst_strideb, | |
259 | int src_stridea, int src_strideb, | |
260 | int add) | |
261 | { | |
262 | int i; | |
263 | ||
264 | for (i = 0; i < 16; i++) { | |
265 | const float x00 = 1.4142135623731f *src[ 0*src_stridea]; | |
266 | const float x01 = 1.40740373752638f *src[ 1*src_stridea] + 0.138617169199091f*src[15*src_stridea]; | |
267 | const float x02 = 1.38703984532215f *src[ 2*src_stridea] + 0.275899379282943f*src[14*src_stridea]; | |
268 | const float x03 = 1.35331800117435f *src[ 3*src_stridea] + 0.410524527522357f*src[13*src_stridea]; | |
269 | const float x04 = 1.30656296487638f *src[ 4*src_stridea] + 0.541196100146197f*src[12*src_stridea]; | |
270 | const float x05 = 1.24722501298667f *src[ 5*src_stridea] + 0.666655658477747f*src[11*src_stridea]; | |
271 | const float x06 = 1.17587560241936f *src[ 6*src_stridea] + 0.785694958387102f*src[10*src_stridea]; | |
272 | const float x07 = 1.09320186700176f *src[ 7*src_stridea] + 0.897167586342636f*src[ 9*src_stridea]; | |
273 | const float x08 = 1.4142135623731f *src[ 8*src_stridea]; | |
274 | const float x09 = -0.897167586342636f*src[ 7*src_stridea] + 1.09320186700176f*src[ 9*src_stridea]; | |
275 | const float x0a = 0.785694958387102f*src[ 6*src_stridea] - 1.17587560241936f*src[10*src_stridea]; | |
276 | const float x0b = -0.666655658477747f*src[ 5*src_stridea] + 1.24722501298667f*src[11*src_stridea]; | |
277 | const float x0c = 0.541196100146197f*src[ 4*src_stridea] - 1.30656296487638f*src[12*src_stridea]; | |
278 | const float x0d = -0.410524527522357f*src[ 3*src_stridea] + 1.35331800117435f*src[13*src_stridea]; | |
279 | const float x0e = 0.275899379282943f*src[ 2*src_stridea] - 1.38703984532215f*src[14*src_stridea]; | |
280 | const float x0f = -0.138617169199091f*src[ 1*src_stridea] + 1.40740373752638f*src[15*src_stridea]; | |
281 | const float x12 = x00 + x08; | |
282 | const float x13 = x01 + x07; | |
283 | const float x14 = x02 + x06; | |
284 | const float x15 = x03 + x05; | |
285 | const float x16 = 1.4142135623731f*x04; | |
286 | const float x17 = x00 - x08; | |
287 | const float x18 = x01 - x07; | |
288 | const float x19 = x02 - x06; | |
289 | const float x1a = x03 - x05; | |
290 | const float x1d = x12 + x16; | |
291 | const float x1e = x13 + x15; | |
292 | const float x1f = 1.4142135623731f*x14; | |
293 | const float x20 = x12 - x16; | |
294 | const float x21 = x13 - x15; | |
295 | const float x22 = 0.25f * (x1d - x1f); | |
296 | const float x23 = 0.25f * (x20 + x21); | |
297 | const float x24 = 0.25f * (x20 - x21); | |
298 | const float x25 = 1.4142135623731f*x17; | |
299 | const float x26 = 1.30656296487638f*x18 + 0.541196100146197f*x1a; | |
300 | const float x27 = 1.4142135623731f*x19; | |
301 | const float x28 = -0.541196100146197f*x18 + 1.30656296487638f*x1a; | |
302 | const float x29 = 0.176776695296637f * (x25 + x27) + 0.25f*x26; | |
303 | const float x2a = 0.25f * (x25 - x27); | |
304 | const float x2b = 0.176776695296637f * (x25 + x27) - 0.25f*x26; | |
305 | const float x2c = 0.353553390593274f*x28; | |
306 | const float x1b = 0.707106781186547f * (x2a - x2c); | |
307 | const float x1c = 0.707106781186547f * (x2a + x2c); | |
308 | const float x2d = 1.4142135623731f*x0c; | |
309 | const float x2e = x0b + x0d; | |
310 | const float x2f = x0a + x0e; | |
311 | const float x30 = x09 + x0f; | |
312 | const float x31 = x09 - x0f; | |
313 | const float x32 = x0a - x0e; | |
314 | const float x33 = x0b - x0d; | |
315 | const float x37 = 1.4142135623731f*x2d; | |
316 | const float x38 = 1.30656296487638f*x2e + 0.541196100146197f*x30; | |
317 | const float x39 = 1.4142135623731f*x2f; | |
318 | const float x3a = -0.541196100146197f*x2e + 1.30656296487638f*x30; | |
319 | const float x3b = 0.176776695296637f * (x37 + x39) + 0.25f*x38; | |
320 | const float x3c = 0.25f * (x37 - x39); | |
321 | const float x3d = 0.176776695296637f * (x37 + x39) - 0.25f*x38; | |
322 | const float x3e = 0.353553390593274f*x3a; | |
323 | const float x34 = 0.707106781186547f * (x3c - x3e); | |
324 | const float x35 = 0.707106781186547f * (x3c + x3e); | |
325 | const float x3f = 1.4142135623731f*x32; | |
326 | const float x40 = x31 + x33; | |
327 | const float x41 = x31 - x33; | |
328 | const float x42 = 0.25f * (x3f + x40); | |
329 | const float x43 = 0.25f * (x3f - x40); | |
330 | const float x44 = 0.353553390593274f*x41; | |
331 | dst[ 0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) + 0.25f*x1e; | |
332 | dst[ 1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x29 + x3d); | |
333 | dst[ 2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x29 - x3d); | |
334 | dst[ 3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x23 - x43); | |
335 | dst[ 4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x23 + x43); | |
336 | dst[ 5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x1b - x35); | |
337 | dst[ 6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x1b + x35); | |
338 | dst[ 7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.707106781186547f * (x22 + x44); | |
339 | dst[ 8*dst_stridea] = (add ? dst[ 8*dst_stridea] : 0) + 0.707106781186547f * (x22 - x44); | |
340 | dst[ 9*dst_stridea] = (add ? dst[ 9*dst_stridea] : 0) + 0.707106781186547f * (x1c + x34); | |
341 | dst[10*dst_stridea] = (add ? dst[10*dst_stridea] : 0) + 0.707106781186547f * (x1c - x34); | |
342 | dst[11*dst_stridea] = (add ? dst[11*dst_stridea] : 0) + 0.707106781186547f * (x24 + x42); | |
343 | dst[12*dst_stridea] = (add ? dst[12*dst_stridea] : 0) + 0.707106781186547f * (x24 - x42); | |
344 | dst[13*dst_stridea] = (add ? dst[13*dst_stridea] : 0) + 0.707106781186547f * (x2b - x3b); | |
345 | dst[14*dst_stridea] = (add ? dst[14*dst_stridea] : 0) + 0.707106781186547f * (x2b + x3b); | |
346 | dst[15*dst_stridea] = (add ? dst[15*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) - 0.25f*x1e; | |
347 | dst += dst_strideb; | |
348 | src += src_strideb; | |
349 | } | |
350 | } | |
351 | ||
352 | #define DEF_FILTER_FREQ_FUNCS(bsize) \ | |
353 | static av_always_inline void filter_freq_##bsize(const float *src, int src_linesize, \ | |
354 | float *dst, int dst_linesize, \ | |
355 | AVExpr *expr, double *var_values, \ | |
356 | int sigma_th) \ | |
357 | { \ | |
358 | unsigned i; \ | |
359 | DECLARE_ALIGNED(32, float, tmp_block1)[bsize * bsize]; \ | |
360 | DECLARE_ALIGNED(32, float, tmp_block2)[bsize * bsize]; \ | |
361 | \ | |
362 | /* forward DCT */ \ | |
363 | fdct##bsize##_1d(tmp_block1, src, 1, bsize, 1, src_linesize); \ | |
364 | fdct##bsize##_1d(tmp_block2, tmp_block1, bsize, 1, bsize, 1); \ | |
365 | \ | |
366 | for (i = 0; i < bsize*bsize; i++) { \ | |
367 | float *b = &tmp_block2[i]; \ | |
368 | /* frequency filtering */ \ | |
369 | if (expr) { \ | |
370 | var_values[VAR_C] = FFABS(*b); \ | |
371 | *b *= av_expr_eval(expr, var_values, NULL); \ | |
372 | } else { \ | |
373 | if (FFABS(*b) < sigma_th) \ | |
374 | *b = 0; \ | |
375 | } \ | |
376 | } \ | |
377 | \ | |
378 | /* inverse DCT */ \ | |
379 | idct##bsize##_1d(tmp_block1, tmp_block2, 1, bsize, 1, bsize, 0); \ | |
380 | idct##bsize##_1d(dst, tmp_block1, dst_linesize, 1, bsize, 1, 1); \ | |
381 | } \ | |
382 | \ | |
383 | static void filter_freq_sigma_##bsize(DCTdnoizContext *s, \ | |
384 | const float *src, int src_linesize, \ | |
385 | float *dst, int dst_linesize, int thread_id) \ | |
386 | { \ | |
387 | filter_freq_##bsize(src, src_linesize, dst, dst_linesize, NULL, NULL, s->th); \ | |
388 | } \ | |
389 | \ | |
390 | static void filter_freq_expr_##bsize(DCTdnoizContext *s, \ | |
391 | const float *src, int src_linesize, \ | |
392 | float *dst, int dst_linesize, int thread_id) \ | |
393 | { \ | |
394 | filter_freq_##bsize(src, src_linesize, dst, dst_linesize, \ | |
395 | s->expr[thread_id], s->var_values[thread_id], 0); \ | |
396 | } | |
397 | ||
398 | DEF_FILTER_FREQ_FUNCS(8) | |
399 | DEF_FILTER_FREQ_FUNCS(16) | |
400 | ||
401 | #define DCT3X3_0_0 0.5773502691896258f /* 1/sqrt(3) */ | |
402 | #define DCT3X3_0_1 0.5773502691896258f /* 1/sqrt(3) */ | |
403 | #define DCT3X3_0_2 0.5773502691896258f /* 1/sqrt(3) */ | |
404 | #define DCT3X3_1_0 0.7071067811865475f /* 1/sqrt(2) */ | |
405 | #define DCT3X3_1_2 -0.7071067811865475f /* -1/sqrt(2) */ | |
406 | #define DCT3X3_2_0 0.4082482904638631f /* 1/sqrt(6) */ | |
407 | #define DCT3X3_2_1 -0.8164965809277261f /* -2/sqrt(6) */ | |
408 | #define DCT3X3_2_2 0.4082482904638631f /* 1/sqrt(6) */ | |
409 | ||
410 | static av_always_inline void color_decorrelation(float **dst, int dst_linesize, | |
411 | const uint8_t *src, int src_linesize, | |
412 | int w, int h, | |
413 | int r, int g, int b) | |
414 | { | |
415 | int x, y; | |
416 | float *dstp_r = dst[0]; | |
417 | float *dstp_g = dst[1]; | |
418 | float *dstp_b = dst[2]; | |
419 | ||
420 | for (y = 0; y < h; y++) { | |
421 | const uint8_t *srcp = src; | |
422 | ||
423 | for (x = 0; x < w; x++) { | |
424 | dstp_r[x] = srcp[r] * DCT3X3_0_0 + srcp[g] * DCT3X3_0_1 + srcp[b] * DCT3X3_0_2; | |
425 | dstp_g[x] = srcp[r] * DCT3X3_1_0 + srcp[b] * DCT3X3_1_2; | |
426 | dstp_b[x] = srcp[r] * DCT3X3_2_0 + srcp[g] * DCT3X3_2_1 + srcp[b] * DCT3X3_2_2; | |
427 | srcp += 3; | |
428 | } | |
429 | src += src_linesize; | |
430 | dstp_r += dst_linesize; | |
431 | dstp_g += dst_linesize; | |
432 | dstp_b += dst_linesize; | |
433 | } | |
434 | } | |
435 | ||
436 | static av_always_inline void color_correlation(uint8_t *dst, int dst_linesize, | |
437 | float **src, int src_linesize, | |
438 | int w, int h, | |
439 | int r, int g, int b) | |
440 | { | |
441 | int x, y; | |
442 | const float *src_r = src[0]; | |
443 | const float *src_g = src[1]; | |
444 | const float *src_b = src[2]; | |
445 | ||
446 | for (y = 0; y < h; y++) { | |
447 | uint8_t *dstp = dst; | |
448 | ||
449 | for (x = 0; x < w; x++) { | |
450 | dstp[r] = av_clip_uint8(src_r[x] * DCT3X3_0_0 + src_g[x] * DCT3X3_1_0 + src_b[x] * DCT3X3_2_0); | |
451 | dstp[g] = av_clip_uint8(src_r[x] * DCT3X3_0_1 + src_b[x] * DCT3X3_2_1); | |
452 | dstp[b] = av_clip_uint8(src_r[x] * DCT3X3_0_2 + src_g[x] * DCT3X3_1_2 + src_b[x] * DCT3X3_2_2); | |
453 | dstp += 3; | |
454 | } | |
455 | dst += dst_linesize; | |
456 | src_r += src_linesize; | |
457 | src_g += src_linesize; | |
458 | src_b += src_linesize; | |
459 | } | |
460 | } | |
461 | ||
462 | #define DECLARE_COLOR_FUNCS(name, r, g, b) \ | |
463 | static void color_decorrelation_##name(float **dst, int dst_linesize, \ | |
464 | const uint8_t *src, int src_linesize, \ | |
465 | int w, int h) \ | |
466 | { \ | |
467 | color_decorrelation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ | |
468 | } \ | |
469 | \ | |
470 | static void color_correlation_##name(uint8_t *dst, int dst_linesize, \ | |
471 | float **src, int src_linesize, \ | |
472 | int w, int h) \ | |
473 | { \ | |
474 | color_correlation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ | |
475 | } | |
476 | ||
477 | DECLARE_COLOR_FUNCS(rgb, 0, 1, 2) | |
478 | DECLARE_COLOR_FUNCS(bgr, 2, 1, 0) | |
479 | ||
480 | static int config_input(AVFilterLink *inlink) | |
481 | { | |
482 | AVFilterContext *ctx = inlink->dst; | |
483 | DCTdnoizContext *s = ctx->priv; | |
484 | int i, x, y, bx, by, linesize, *iweights, max_slice_h, slice_h; | |
485 | const int bsize = 1 << s->n; | |
486 | ||
487 | switch (inlink->format) { | |
488 | case AV_PIX_FMT_BGR24: | |
489 | s->color_decorrelation = color_decorrelation_bgr; | |
490 | s->color_correlation = color_correlation_bgr; | |
491 | break; | |
492 | case AV_PIX_FMT_RGB24: | |
493 | s->color_decorrelation = color_decorrelation_rgb; | |
494 | s->color_correlation = color_correlation_rgb; | |
495 | break; | |
496 | default: | |
497 | av_assert0(0); | |
498 | } | |
499 | ||
500 | s->pr_width = inlink->w - (inlink->w - bsize) % s->step; | |
501 | s->pr_height = inlink->h - (inlink->h - bsize) % s->step; | |
502 | if (s->pr_width != inlink->w) | |
503 | av_log(ctx, AV_LOG_WARNING, "The last %d horizontal pixels won't be denoised\n", | |
504 | inlink->w - s->pr_width); | |
505 | if (s->pr_height != inlink->h) | |
506 | av_log(ctx, AV_LOG_WARNING, "The last %d vertical pixels won't be denoised\n", | |
507 | inlink->h - s->pr_height); | |
508 | ||
509 | max_slice_h = s->pr_height / ((s->bsize - 1) * 2); | |
510 | s->nb_threads = FFMIN3(MAX_THREADS, ctx->graph->nb_threads, max_slice_h); | |
511 | av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n", | |
512 | MAX_THREADS, max_slice_h, ctx->graph->nb_threads, s->nb_threads); | |
513 | ||
514 | s->p_linesize = linesize = FFALIGN(s->pr_width, 32); | |
515 | for (i = 0; i < 2; i++) { | |
516 | s->cbuf[i][0] = av_malloc(linesize * s->pr_height * sizeof(*s->cbuf[i][0])); | |
517 | s->cbuf[i][1] = av_malloc(linesize * s->pr_height * sizeof(*s->cbuf[i][1])); | |
518 | s->cbuf[i][2] = av_malloc(linesize * s->pr_height * sizeof(*s->cbuf[i][2])); | |
519 | if (!s->cbuf[i][0] || !s->cbuf[i][1] || !s->cbuf[i][2]) | |
520 | return AVERROR(ENOMEM); | |
521 | } | |
522 | ||
523 | /* eval expressions are probably not thread safe when the eval internal | |
524 | * state can be changed (typically through load & store operations) */ | |
525 | if (s->expr_str) { | |
526 | for (i = 0; i < s->nb_threads; i++) { | |
527 | int ret = av_expr_parse(&s->expr[i], s->expr_str, var_names, | |
528 | NULL, NULL, NULL, NULL, 0, ctx); | |
529 | if (ret < 0) | |
530 | return ret; | |
531 | } | |
532 | } | |
533 | ||
534 | /* each slice will need to (pre & re)process the top and bottom block of | |
535 | * the previous one in in addition to its processing area. This is because | |
536 | * each pixel is averaged by all the surrounding blocks */ | |
537 | slice_h = (int)ceilf(s->pr_height / s->nb_threads) + (s->bsize - 1) * 2; | |
538 | for (i = 0; i < s->nb_threads; i++) { | |
539 | s->slices[i] = av_malloc_array(linesize, slice_h * sizeof(*s->slices[i])); | |
540 | if (!s->slices[i]) | |
541 | return AVERROR(ENOMEM); | |
542 | } | |
543 | ||
544 | s->weights = av_malloc(s->pr_height * linesize * sizeof(*s->weights)); | |
545 | if (!s->weights) | |
546 | return AVERROR(ENOMEM); | |
547 | iweights = av_calloc(s->pr_height, linesize * sizeof(*iweights)); | |
548 | if (!iweights) | |
549 | return AVERROR(ENOMEM); | |
550 | for (y = 0; y < s->pr_height - bsize + 1; y += s->step) | |
551 | for (x = 0; x < s->pr_width - bsize + 1; x += s->step) | |
552 | for (by = 0; by < bsize; by++) | |
553 | for (bx = 0; bx < bsize; bx++) | |
554 | iweights[(y + by)*linesize + x + bx]++; | |
555 | for (y = 0; y < s->pr_height; y++) | |
556 | for (x = 0; x < s->pr_width; x++) | |
557 | s->weights[y*linesize + x] = 1. / iweights[y*linesize + x]; | |
558 | av_free(iweights); | |
559 | ||
560 | return 0; | |
561 | } | |
562 | ||
563 | static av_cold int init(AVFilterContext *ctx) | |
564 | { | |
565 | DCTdnoizContext *s = ctx->priv; | |
566 | ||
567 | s->bsize = 1 << s->n; | |
568 | if (s->overlap == -1) | |
569 | s->overlap = s->bsize - 1; | |
570 | ||
571 | if (s->overlap > s->bsize - 1) { | |
572 | av_log(s, AV_LOG_ERROR, "Overlap value can not except %d " | |
573 | "with a block size of %dx%d\n", | |
574 | s->bsize - 1, s->bsize, s->bsize); | |
575 | return AVERROR(EINVAL); | |
576 | } | |
577 | ||
578 | if (s->expr_str) { | |
579 | switch (s->n) { | |
580 | case 3: s->filter_freq_func = filter_freq_expr_8; break; | |
581 | case 4: s->filter_freq_func = filter_freq_expr_16; break; | |
582 | default: av_assert0(0); | |
583 | } | |
584 | } else { | |
585 | switch (s->n) { | |
586 | case 3: s->filter_freq_func = filter_freq_sigma_8; break; | |
587 | case 4: s->filter_freq_func = filter_freq_sigma_16; break; | |
588 | default: av_assert0(0); | |
589 | } | |
590 | } | |
591 | ||
592 | s->th = s->sigma * 3.; | |
593 | s->step = s->bsize - s->overlap; | |
594 | return 0; | |
595 | } | |
596 | ||
597 | static int query_formats(AVFilterContext *ctx) | |
598 | { | |
599 | static const enum AVPixelFormat pix_fmts[] = { | |
600 | AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24, | |
601 | AV_PIX_FMT_NONE | |
602 | }; | |
603 | ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); | |
604 | return 0; | |
605 | } | |
606 | ||
607 | typedef struct ThreadData { | |
608 | float *src, *dst; | |
609 | } ThreadData; | |
610 | ||
611 | static int filter_slice(AVFilterContext *ctx, | |
612 | void *arg, int jobnr, int nb_jobs) | |
613 | { | |
614 | int x, y; | |
615 | DCTdnoizContext *s = ctx->priv; | |
616 | const ThreadData *td = arg; | |
617 | const int w = s->pr_width; | |
618 | const int h = s->pr_height; | |
619 | const int slice_start = (h * jobnr ) / nb_jobs; | |
620 | const int slice_end = (h * (jobnr+1)) / nb_jobs; | |
621 | const int slice_start_ctx = FFMAX(slice_start - s->bsize + 1, 0); | |
622 | const int slice_end_ctx = FFMIN(slice_end, h - s->bsize + 1); | |
623 | const int slice_h = slice_end_ctx - slice_start_ctx; | |
624 | const int src_linesize = s->p_linesize; | |
625 | const int dst_linesize = s->p_linesize; | |
626 | const int slice_linesize = s->p_linesize; | |
627 | float *dst; | |
628 | const float *src = td->src + slice_start_ctx * src_linesize; | |
629 | const float *weights = s->weights + slice_start * dst_linesize; | |
630 | float *slice = s->slices[jobnr]; | |
631 | ||
632 | // reset block sums | |
633 | memset(slice, 0, (slice_h + s->bsize - 1) * dst_linesize * sizeof(*slice)); | |
634 | ||
635 | // block dct sums | |
636 | for (y = 0; y < slice_h; y += s->step) { | |
637 | for (x = 0; x < w - s->bsize + 1; x += s->step) | |
638 | s->filter_freq_func(s, src + x, src_linesize, | |
639 | slice + x, slice_linesize, | |
640 | jobnr); | |
641 | src += s->step * src_linesize; | |
642 | slice += s->step * slice_linesize; | |
643 | } | |
644 | ||
645 | // average blocks | |
646 | slice = s->slices[jobnr] + (slice_start - slice_start_ctx) * slice_linesize; | |
647 | dst = td->dst + slice_start * dst_linesize; | |
648 | for (y = slice_start; y < slice_end; y++) { | |
649 | for (x = 0; x < w; x++) | |
650 | dst[x] = slice[x] * weights[x]; | |
651 | slice += slice_linesize; | |
652 | dst += dst_linesize; | |
653 | weights += dst_linesize; | |
654 | } | |
655 | ||
656 | return 0; | |
657 | } | |
658 | ||
659 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |
660 | { | |
661 | AVFilterContext *ctx = inlink->dst; | |
662 | DCTdnoizContext *s = ctx->priv; | |
663 | AVFilterLink *outlink = inlink->dst->outputs[0]; | |
664 | int direct, plane; | |
665 | AVFrame *out; | |
666 | ||
667 | if (av_frame_is_writable(in)) { | |
668 | direct = 1; | |
669 | out = in; | |
670 | } else { | |
671 | direct = 0; | |
672 | out = ff_get_video_buffer(outlink, outlink->w, outlink->h); | |
673 | if (!out) { | |
674 | av_frame_free(&in); | |
675 | return AVERROR(ENOMEM); | |
676 | } | |
677 | av_frame_copy_props(out, in); | |
678 | } | |
679 | ||
680 | s->color_decorrelation(s->cbuf[0], s->p_linesize, | |
681 | in->data[0], in->linesize[0], | |
682 | s->pr_width, s->pr_height); | |
683 | for (plane = 0; plane < 3; plane++) { | |
684 | ThreadData td = { | |
685 | .src = s->cbuf[0][plane], | |
686 | .dst = s->cbuf[1][plane], | |
687 | }; | |
688 | ctx->internal->execute(ctx, filter_slice, &td, NULL, s->nb_threads); | |
689 | } | |
690 | s->color_correlation(out->data[0], out->linesize[0], | |
691 | s->cbuf[1], s->p_linesize, | |
692 | s->pr_width, s->pr_height); | |
693 | ||
694 | if (!direct) { | |
695 | int y; | |
696 | uint8_t *dst = out->data[0]; | |
697 | const uint8_t *src = in->data[0]; | |
698 | const int dst_linesize = out->linesize[0]; | |
699 | const int src_linesize = in->linesize[0]; | |
700 | const int hpad = (inlink->w - s->pr_width) * 3; | |
701 | const int vpad = (inlink->h - s->pr_height); | |
702 | ||
703 | if (hpad) { | |
704 | uint8_t *dstp = dst + s->pr_width * 3; | |
705 | const uint8_t *srcp = src + s->pr_width * 3; | |
706 | ||
707 | for (y = 0; y < s->pr_height; y++) { | |
708 | memcpy(dstp, srcp, hpad); | |
709 | dstp += dst_linesize; | |
710 | srcp += src_linesize; | |
711 | } | |
712 | } | |
713 | if (vpad) { | |
714 | uint8_t *dstp = dst + s->pr_height * dst_linesize; | |
715 | const uint8_t *srcp = src + s->pr_height * src_linesize; | |
716 | ||
717 | for (y = 0; y < vpad; y++) { | |
718 | memcpy(dstp, srcp, inlink->w * 3); | |
719 | dstp += dst_linesize; | |
720 | srcp += src_linesize; | |
721 | } | |
722 | } | |
723 | ||
724 | av_frame_free(&in); | |
725 | } | |
726 | ||
727 | return ff_filter_frame(outlink, out); | |
728 | } | |
729 | ||
730 | static av_cold void uninit(AVFilterContext *ctx) | |
731 | { | |
732 | int i; | |
733 | DCTdnoizContext *s = ctx->priv; | |
734 | ||
735 | av_free(s->weights); | |
736 | for (i = 0; i < 2; i++) { | |
737 | av_free(s->cbuf[i][0]); | |
738 | av_free(s->cbuf[i][1]); | |
739 | av_free(s->cbuf[i][2]); | |
740 | } | |
741 | for (i = 0; i < s->nb_threads; i++) { | |
742 | av_free(s->slices[i]); | |
743 | av_expr_free(s->expr[i]); | |
744 | } | |
745 | } | |
746 | ||
747 | static const AVFilterPad dctdnoiz_inputs[] = { | |
748 | { | |
749 | .name = "default", | |
750 | .type = AVMEDIA_TYPE_VIDEO, | |
751 | .filter_frame = filter_frame, | |
752 | .config_props = config_input, | |
753 | }, | |
754 | { NULL } | |
755 | }; | |
756 | ||
757 | static const AVFilterPad dctdnoiz_outputs[] = { | |
758 | { | |
759 | .name = "default", | |
760 | .type = AVMEDIA_TYPE_VIDEO, | |
761 | }, | |
762 | { NULL } | |
763 | }; | |
764 | ||
765 | AVFilter ff_vf_dctdnoiz = { | |
766 | .name = "dctdnoiz", | |
767 | .description = NULL_IF_CONFIG_SMALL("Denoise frames using 2D DCT."), | |
768 | .priv_size = sizeof(DCTdnoizContext), | |
769 | .init = init, | |
770 | .uninit = uninit, | |
771 | .query_formats = query_formats, | |
772 | .inputs = dctdnoiz_inputs, | |
773 | .outputs = dctdnoiz_outputs, | |
774 | .priv_class = &dctdnoiz_class, | |
775 | .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, | |
776 | }; |