2 * Copyright (c) 2013-2014 Clément Bœsch
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 * A simple, relatively efficient and slow DCT image denoiser.
24 * @see http://www.ipol.im/pub/art/2011/ys-dct/
26 * The DCT factorization used is based on "Fast and numerically stable
27 * algorithms for discrete cosine transforms" from Gerlind Plonkaa & Manfred
28 * Tasche (DOI: 10.1016/j.laa.2004.07.015).
31 #include "libavutil/avassert.h"
32 #include "libavutil/eval.h"
33 #include "libavutil/opt.h"
36 static const char *const var_names
[] = { "c", NULL
};
37 enum { VAR_C
, VAR_VARS_NB
};
41 typedef struct DCTdnoizContext
{
44 /* coefficient factor expression */
46 AVExpr
*expr
[MAX_THREADS
];
47 double var_values
[MAX_THREADS
][VAR_VARS_NB
];
50 int pr_width
, pr_height
; // width and height to process
51 float sigma
; // used when no expression are st
52 float th
; // threshold (3*sigma)
53 float *cbuf
[2][3]; // two planar rgb color buffers
54 float *slices
[MAX_THREADS
]; // slices buffers (1 slice buffer per thread)
55 float *weights
; // dct coeff are cumulated with overlapping; these values are used for averaging
56 int p_linesize
; // line sizes for color and weights
57 int overlap
; // number of block overlapping pixels
58 int step
; // block step increment (blocksize - overlap)
59 int n
; // 1<<n is the block size
60 int bsize
; // block size, 1<<n
61 void (*filter_freq_func
)(struct DCTdnoizContext
*s
,
62 const float *src
, int src_linesize
,
63 float *dst
, int dst_linesize
,
65 void (*color_decorrelation
)(float **dst
, int dst_linesize
,
66 const uint8_t *src
, int src_linesize
,
68 void (*color_correlation
)(uint8_t *dst
, int dst_linesize
,
69 float **src
, int src_linesize
,
73 #define MIN_NBITS 3 /* blocksize = 1<<3 = 8 */
74 #define MAX_NBITS 4 /* blocksize = 1<<4 = 16 */
75 #define DEFAULT_NBITS 3
77 #define OFFSET(x) offsetof(DCTdnoizContext, x)
78 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
79 static const AVOption dctdnoiz_options
[] = {
80 { "sigma", "set noise sigma constant", OFFSET(sigma
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, 0, 999, .flags
= FLAGS
},
81 { "s", "set noise sigma constant", OFFSET(sigma
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, 0, 999, .flags
= FLAGS
},
82 { "overlap", "set number of block overlapping pixels", OFFSET(overlap
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, (1<<MAX_NBITS
)-1, .flags
= FLAGS
},
83 { "expr", "set coefficient factor expression", OFFSET(expr_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
84 { "e", "set coefficient factor expression", OFFSET(expr_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
85 { "n", "set the block size, expressed in bits", OFFSET(n
), AV_OPT_TYPE_INT
, {.i64
=DEFAULT_NBITS
}, MIN_NBITS
, MAX_NBITS
, .flags
= FLAGS
},
89 AVFILTER_DEFINE_CLASS(dctdnoiz
);
91 static void av_always_inline
fdct8_1d(float *dst
, const float *src
,
92 int dst_stridea
, int dst_strideb
,
93 int src_stridea
, int src_strideb
)
97 for (i
= 0; i
< 8; i
++) {
98 const float x00
= src
[0*src_stridea
] + src
[7*src_stridea
];
99 const float x01
= src
[1*src_stridea
] + src
[6*src_stridea
];
100 const float x02
= src
[2*src_stridea
] + src
[5*src_stridea
];
101 const float x03
= src
[3*src_stridea
] + src
[4*src_stridea
];
102 const float x04
= src
[0*src_stridea
] - src
[7*src_stridea
];
103 const float x05
= src
[1*src_stridea
] - src
[6*src_stridea
];
104 const float x06
= src
[2*src_stridea
] - src
[5*src_stridea
];
105 const float x07
= src
[3*src_stridea
] - src
[4*src_stridea
];
106 const float x08
= x00
+ x03
;
107 const float x09
= x01
+ x02
;
108 const float x0a
= x00
- x03
;
109 const float x0b
= x01
- x02
;
110 const float x0c
= 1.38703984532215f
*x04
+ 0.275899379282943f
*x07
;
111 const float x0d
= 1.17587560241936f
*x05
+ 0.785694958387102f
*x06
;
112 const float x0e
= -0.785694958387102f
*x05
+ 1.17587560241936f
*x06
;
113 const float x0f
= 0.275899379282943f
*x04
- 1.38703984532215f
*x07
;
114 const float x10
= 0.353553390593274f
* (x0c
- x0d
);
115 const float x11
= 0.353553390593274f
* (x0e
- x0f
);
116 dst
[0*dst_stridea
] = 0.353553390593274f
* (x08
+ x09
);
117 dst
[1*dst_stridea
] = 0.353553390593274f
* (x0c
+ x0d
);
118 dst
[2*dst_stridea
] = 0.461939766255643f
*x0a
+ 0.191341716182545f
*x0b
;
119 dst
[3*dst_stridea
] = 0.707106781186547f
* (x10
- x11
);
120 dst
[4*dst_stridea
] = 0.353553390593274f
* (x08
- x09
);
121 dst
[5*dst_stridea
] = 0.707106781186547f
* (x10
+ x11
);
122 dst
[6*dst_stridea
] = 0.191341716182545f
*x0a
- 0.461939766255643f
*x0b
;
123 dst
[7*dst_stridea
] = 0.353553390593274f
* (x0e
+ x0f
);
129 static void av_always_inline
idct8_1d(float *dst
, const float *src
,
130 int dst_stridea
, int dst_strideb
,
131 int src_stridea
, int src_strideb
,
136 for (i
= 0; i
< 8; i
++) {
137 const float x00
= 1.4142135623731f
*src
[0*src_stridea
];
138 const float x01
= 1.38703984532215f
*src
[1*src_stridea
] + 0.275899379282943f
*src
[7*src_stridea
];
139 const float x02
= 1.30656296487638f
*src
[2*src_stridea
] + 0.541196100146197f
*src
[6*src_stridea
];
140 const float x03
= 1.17587560241936f
*src
[3*src_stridea
] + 0.785694958387102f
*src
[5*src_stridea
];
141 const float x04
= 1.4142135623731f
*src
[4*src_stridea
];
142 const float x05
= -0.785694958387102f
*src
[3*src_stridea
] + 1.17587560241936f
*src
[5*src_stridea
];
143 const float x06
= 0.541196100146197f
*src
[2*src_stridea
] - 1.30656296487638f
*src
[6*src_stridea
];
144 const float x07
= -0.275899379282943f
*src
[1*src_stridea
] + 1.38703984532215f
*src
[7*src_stridea
];
145 const float x09
= x00
+ x04
;
146 const float x0a
= x01
+ x03
;
147 const float x0b
= 1.4142135623731f
*x02
;
148 const float x0c
= x00
- x04
;
149 const float x0d
= x01
- x03
;
150 const float x0e
= 0.353553390593274f
* (x09
- x0b
);
151 const float x0f
= 0.353553390593274f
* (x0c
+ x0d
);
152 const float x10
= 0.353553390593274f
* (x0c
- x0d
);
153 const float x11
= 1.4142135623731f
*x06
;
154 const float x12
= x05
+ x07
;
155 const float x13
= x05
- x07
;
156 const float x14
= 0.353553390593274f
* (x11
+ x12
);
157 const float x15
= 0.353553390593274f
* (x11
- x12
);
158 const float x16
= 0.5f
*x13
;
159 dst
[0*dst_stridea
] = (add
? dst
[ 0*dst_stridea
] : 0) + 0.25f
* (x09
+ x0b
) + 0.353553390593274f
*x0a
;
160 dst
[1*dst_stridea
] = (add
? dst
[ 1*dst_stridea
] : 0) + 0.707106781186547f
* (x0f
+ x15
);
161 dst
[2*dst_stridea
] = (add
? dst
[ 2*dst_stridea
] : 0) + 0.707106781186547f
* (x0f
- x15
);
162 dst
[3*dst_stridea
] = (add
? dst
[ 3*dst_stridea
] : 0) + 0.707106781186547f
* (x0e
+ x16
);
163 dst
[4*dst_stridea
] = (add
? dst
[ 4*dst_stridea
] : 0) + 0.707106781186547f
* (x0e
- x16
);
164 dst
[5*dst_stridea
] = (add
? dst
[ 5*dst_stridea
] : 0) + 0.707106781186547f
* (x10
- x14
);
165 dst
[6*dst_stridea
] = (add
? dst
[ 6*dst_stridea
] : 0) + 0.707106781186547f
* (x10
+ x14
);
166 dst
[7*dst_stridea
] = (add
? dst
[ 7*dst_stridea
] : 0) + 0.25f
* (x09
+ x0b
) - 0.353553390593274f
*x0a
;
173 static void av_always_inline
fdct16_1d(float *dst
, const float *src
,
174 int dst_stridea
, int dst_strideb
,
175 int src_stridea
, int src_strideb
)
179 for (i
= 0; i
< 16; i
++) {
180 const float x00
= src
[ 0*src_stridea
] + src
[15*src_stridea
];
181 const float x01
= src
[ 1*src_stridea
] + src
[14*src_stridea
];
182 const float x02
= src
[ 2*src_stridea
] + src
[13*src_stridea
];
183 const float x03
= src
[ 3*src_stridea
] + src
[12*src_stridea
];
184 const float x04
= src
[ 4*src_stridea
] + src
[11*src_stridea
];
185 const float x05
= src
[ 5*src_stridea
] + src
[10*src_stridea
];
186 const float x06
= src
[ 6*src_stridea
] + src
[ 9*src_stridea
];
187 const float x07
= src
[ 7*src_stridea
] + src
[ 8*src_stridea
];
188 const float x08
= src
[ 0*src_stridea
] - src
[15*src_stridea
];
189 const float x09
= src
[ 1*src_stridea
] - src
[14*src_stridea
];
190 const float x0a
= src
[ 2*src_stridea
] - src
[13*src_stridea
];
191 const float x0b
= src
[ 3*src_stridea
] - src
[12*src_stridea
];
192 const float x0c
= src
[ 4*src_stridea
] - src
[11*src_stridea
];
193 const float x0d
= src
[ 5*src_stridea
] - src
[10*src_stridea
];
194 const float x0e
= src
[ 6*src_stridea
] - src
[ 9*src_stridea
];
195 const float x0f
= src
[ 7*src_stridea
] - src
[ 8*src_stridea
];
196 const float x10
= x00
+ x07
;
197 const float x11
= x01
+ x06
;
198 const float x12
= x02
+ x05
;
199 const float x13
= x03
+ x04
;
200 const float x14
= x00
- x07
;
201 const float x15
= x01
- x06
;
202 const float x16
= x02
- x05
;
203 const float x17
= x03
- x04
;
204 const float x18
= x10
+ x13
;
205 const float x19
= x11
+ x12
;
206 const float x1a
= x10
- x13
;
207 const float x1b
= x11
- x12
;
208 const float x1c
= 1.38703984532215f
*x14
+ 0.275899379282943f
*x17
;
209 const float x1d
= 1.17587560241936f
*x15
+ 0.785694958387102f
*x16
;
210 const float x1e
= -0.785694958387102f
*x15
+ 1.17587560241936f
*x16
;
211 const float x1f
= 0.275899379282943f
*x14
- 1.38703984532215f
*x17
;
212 const float x20
= 0.25f
* (x1c
- x1d
);
213 const float x21
= 0.25f
* (x1e
- x1f
);
214 const float x22
= 1.40740373752638f
*x08
+ 0.138617169199091f
*x0f
;
215 const float x23
= 1.35331800117435f
*x09
+ 0.410524527522357f
*x0e
;
216 const float x24
= 1.24722501298667f
*x0a
+ 0.666655658477747f
*x0d
;
217 const float x25
= 1.09320186700176f
*x0b
+ 0.897167586342636f
*x0c
;
218 const float x26
= -0.897167586342636f
*x0b
+ 1.09320186700176f
*x0c
;
219 const float x27
= 0.666655658477747f
*x0a
- 1.24722501298667f
*x0d
;
220 const float x28
= -0.410524527522357f
*x09
+ 1.35331800117435f
*x0e
;
221 const float x29
= 0.138617169199091f
*x08
- 1.40740373752638f
*x0f
;
222 const float x2a
= x22
+ x25
;
223 const float x2b
= x23
+ x24
;
224 const float x2c
= x22
- x25
;
225 const float x2d
= x23
- x24
;
226 const float x2e
= 0.25f
* (x2a
- x2b
);
227 const float x2f
= 0.326640741219094f
*x2c
+ 0.135299025036549f
*x2d
;
228 const float x30
= 0.135299025036549f
*x2c
- 0.326640741219094f
*x2d
;
229 const float x31
= x26
+ x29
;
230 const float x32
= x27
+ x28
;
231 const float x33
= x26
- x29
;
232 const float x34
= x27
- x28
;
233 const float x35
= 0.25f
* (x31
- x32
);
234 const float x36
= 0.326640741219094f
*x33
+ 0.135299025036549f
*x34
;
235 const float x37
= 0.135299025036549f
*x33
- 0.326640741219094f
*x34
;
236 dst
[ 0*dst_stridea
] = 0.25f
* (x18
+ x19
);
237 dst
[ 1*dst_stridea
] = 0.25f
* (x2a
+ x2b
);
238 dst
[ 2*dst_stridea
] = 0.25f
* (x1c
+ x1d
);
239 dst
[ 3*dst_stridea
] = 0.707106781186547f
* (x2f
- x37
);
240 dst
[ 4*dst_stridea
] = 0.326640741219094f
*x1a
+ 0.135299025036549f
*x1b
;
241 dst
[ 5*dst_stridea
] = 0.707106781186547f
* (x2f
+ x37
);
242 dst
[ 6*dst_stridea
] = 0.707106781186547f
* (x20
- x21
);
243 dst
[ 7*dst_stridea
] = 0.707106781186547f
* (x2e
+ x35
);
244 dst
[ 8*dst_stridea
] = 0.25f
* (x18
- x19
);
245 dst
[ 9*dst_stridea
] = 0.707106781186547f
* (x2e
- x35
);
246 dst
[10*dst_stridea
] = 0.707106781186547f
* (x20
+ x21
);
247 dst
[11*dst_stridea
] = 0.707106781186547f
* (x30
- x36
);
248 dst
[12*dst_stridea
] = 0.135299025036549f
*x1a
- 0.326640741219094f
*x1b
;
249 dst
[13*dst_stridea
] = 0.707106781186547f
* (x30
+ x36
);
250 dst
[14*dst_stridea
] = 0.25f
* (x1e
+ x1f
);
251 dst
[15*dst_stridea
] = 0.25f
* (x31
+ x32
);
257 static void av_always_inline
idct16_1d(float *dst
, const float *src
,
258 int dst_stridea
, int dst_strideb
,
259 int src_stridea
, int src_strideb
,
264 for (i
= 0; i
< 16; i
++) {
265 const float x00
= 1.4142135623731f
*src
[ 0*src_stridea
];
266 const float x01
= 1.40740373752638f
*src
[ 1*src_stridea
] + 0.138617169199091f
*src
[15*src_stridea
];
267 const float x02
= 1.38703984532215f
*src
[ 2*src_stridea
] + 0.275899379282943f
*src
[14*src_stridea
];
268 const float x03
= 1.35331800117435f
*src
[ 3*src_stridea
] + 0.410524527522357f
*src
[13*src_stridea
];
269 const float x04
= 1.30656296487638f
*src
[ 4*src_stridea
] + 0.541196100146197f
*src
[12*src_stridea
];
270 const float x05
= 1.24722501298667f
*src
[ 5*src_stridea
] + 0.666655658477747f
*src
[11*src_stridea
];
271 const float x06
= 1.17587560241936f
*src
[ 6*src_stridea
] + 0.785694958387102f
*src
[10*src_stridea
];
272 const float x07
= 1.09320186700176f
*src
[ 7*src_stridea
] + 0.897167586342636f
*src
[ 9*src_stridea
];
273 const float x08
= 1.4142135623731f
*src
[ 8*src_stridea
];
274 const float x09
= -0.897167586342636f
*src
[ 7*src_stridea
] + 1.09320186700176f
*src
[ 9*src_stridea
];
275 const float x0a
= 0.785694958387102f
*src
[ 6*src_stridea
] - 1.17587560241936f
*src
[10*src_stridea
];
276 const float x0b
= -0.666655658477747f
*src
[ 5*src_stridea
] + 1.24722501298667f
*src
[11*src_stridea
];
277 const float x0c
= 0.541196100146197f
*src
[ 4*src_stridea
] - 1.30656296487638f
*src
[12*src_stridea
];
278 const float x0d
= -0.410524527522357f
*src
[ 3*src_stridea
] + 1.35331800117435f
*src
[13*src_stridea
];
279 const float x0e
= 0.275899379282943f
*src
[ 2*src_stridea
] - 1.38703984532215f
*src
[14*src_stridea
];
280 const float x0f
= -0.138617169199091f
*src
[ 1*src_stridea
] + 1.40740373752638f
*src
[15*src_stridea
];
281 const float x12
= x00
+ x08
;
282 const float x13
= x01
+ x07
;
283 const float x14
= x02
+ x06
;
284 const float x15
= x03
+ x05
;
285 const float x16
= 1.4142135623731f
*x04
;
286 const float x17
= x00
- x08
;
287 const float x18
= x01
- x07
;
288 const float x19
= x02
- x06
;
289 const float x1a
= x03
- x05
;
290 const float x1d
= x12
+ x16
;
291 const float x1e
= x13
+ x15
;
292 const float x1f
= 1.4142135623731f
*x14
;
293 const float x20
= x12
- x16
;
294 const float x21
= x13
- x15
;
295 const float x22
= 0.25f
* (x1d
- x1f
);
296 const float x23
= 0.25f
* (x20
+ x21
);
297 const float x24
= 0.25f
* (x20
- x21
);
298 const float x25
= 1.4142135623731f
*x17
;
299 const float x26
= 1.30656296487638f
*x18
+ 0.541196100146197f
*x1a
;
300 const float x27
= 1.4142135623731f
*x19
;
301 const float x28
= -0.541196100146197f
*x18
+ 1.30656296487638f
*x1a
;
302 const float x29
= 0.176776695296637f
* (x25
+ x27
) + 0.25f
*x26
;
303 const float x2a
= 0.25f
* (x25
- x27
);
304 const float x2b
= 0.176776695296637f
* (x25
+ x27
) - 0.25f
*x26
;
305 const float x2c
= 0.353553390593274f
*x28
;
306 const float x1b
= 0.707106781186547f
* (x2a
- x2c
);
307 const float x1c
= 0.707106781186547f
* (x2a
+ x2c
);
308 const float x2d
= 1.4142135623731f
*x0c
;
309 const float x2e
= x0b
+ x0d
;
310 const float x2f
= x0a
+ x0e
;
311 const float x30
= x09
+ x0f
;
312 const float x31
= x09
- x0f
;
313 const float x32
= x0a
- x0e
;
314 const float x33
= x0b
- x0d
;
315 const float x37
= 1.4142135623731f
*x2d
;
316 const float x38
= 1.30656296487638f
*x2e
+ 0.541196100146197f
*x30
;
317 const float x39
= 1.4142135623731f
*x2f
;
318 const float x3a
= -0.541196100146197f
*x2e
+ 1.30656296487638f
*x30
;
319 const float x3b
= 0.176776695296637f
* (x37
+ x39
) + 0.25f
*x38
;
320 const float x3c
= 0.25f
* (x37
- x39
);
321 const float x3d
= 0.176776695296637f
* (x37
+ x39
) - 0.25f
*x38
;
322 const float x3e
= 0.353553390593274f
*x3a
;
323 const float x34
= 0.707106781186547f
* (x3c
- x3e
);
324 const float x35
= 0.707106781186547f
* (x3c
+ x3e
);
325 const float x3f
= 1.4142135623731f
*x32
;
326 const float x40
= x31
+ x33
;
327 const float x41
= x31
- x33
;
328 const float x42
= 0.25f
* (x3f
+ x40
);
329 const float x43
= 0.25f
* (x3f
- x40
);
330 const float x44
= 0.353553390593274f
*x41
;
331 dst
[ 0*dst_stridea
] = (add
? dst
[ 0*dst_stridea
] : 0) + 0.176776695296637f
* (x1d
+ x1f
) + 0.25f
*x1e
;
332 dst
[ 1*dst_stridea
] = (add
? dst
[ 1*dst_stridea
] : 0) + 0.707106781186547f
* (x29
+ x3d
);
333 dst
[ 2*dst_stridea
] = (add
? dst
[ 2*dst_stridea
] : 0) + 0.707106781186547f
* (x29
- x3d
);
334 dst
[ 3*dst_stridea
] = (add
? dst
[ 3*dst_stridea
] : 0) + 0.707106781186547f
* (x23
- x43
);
335 dst
[ 4*dst_stridea
] = (add
? dst
[ 4*dst_stridea
] : 0) + 0.707106781186547f
* (x23
+ x43
);
336 dst
[ 5*dst_stridea
] = (add
? dst
[ 5*dst_stridea
] : 0) + 0.707106781186547f
* (x1b
- x35
);
337 dst
[ 6*dst_stridea
] = (add
? dst
[ 6*dst_stridea
] : 0) + 0.707106781186547f
* (x1b
+ x35
);
338 dst
[ 7*dst_stridea
] = (add
? dst
[ 7*dst_stridea
] : 0) + 0.707106781186547f
* (x22
+ x44
);
339 dst
[ 8*dst_stridea
] = (add
? dst
[ 8*dst_stridea
] : 0) + 0.707106781186547f
* (x22
- x44
);
340 dst
[ 9*dst_stridea
] = (add
? dst
[ 9*dst_stridea
] : 0) + 0.707106781186547f
* (x1c
+ x34
);
341 dst
[10*dst_stridea
] = (add
? dst
[10*dst_stridea
] : 0) + 0.707106781186547f
* (x1c
- x34
);
342 dst
[11*dst_stridea
] = (add
? dst
[11*dst_stridea
] : 0) + 0.707106781186547f
* (x24
+ x42
);
343 dst
[12*dst_stridea
] = (add
? dst
[12*dst_stridea
] : 0) + 0.707106781186547f
* (x24
- x42
);
344 dst
[13*dst_stridea
] = (add
? dst
[13*dst_stridea
] : 0) + 0.707106781186547f
* (x2b
- x3b
);
345 dst
[14*dst_stridea
] = (add
? dst
[14*dst_stridea
] : 0) + 0.707106781186547f
* (x2b
+ x3b
);
346 dst
[15*dst_stridea
] = (add
? dst
[15*dst_stridea
] : 0) + 0.176776695296637f
* (x1d
+ x1f
) - 0.25f
*x1e
;
352 #define DEF_FILTER_FREQ_FUNCS(bsize) \
353 static av_always_inline void filter_freq_##bsize(const float *src, int src_linesize, \
354 float *dst, int dst_linesize, \
355 AVExpr *expr, double *var_values, \
359 DECLARE_ALIGNED(32, float, tmp_block1)[bsize * bsize]; \
360 DECLARE_ALIGNED(32, float, tmp_block2)[bsize * bsize]; \
363 fdct##bsize##_1d(tmp_block1, src, 1, bsize, 1, src_linesize); \
364 fdct##bsize##_1d(tmp_block2, tmp_block1, bsize, 1, bsize, 1); \
366 for (i = 0; i < bsize*bsize; i++) { \
367 float *b = &tmp_block2[i]; \
368 /* frequency filtering */ \
370 var_values[VAR_C] = FFABS(*b); \
371 *b *= av_expr_eval(expr, var_values, NULL); \
373 if (FFABS(*b) < sigma_th) \
379 idct##bsize##_1d(tmp_block1, tmp_block2, 1, bsize, 1, bsize, 0); \
380 idct##bsize##_1d(dst, tmp_block1, dst_linesize, 1, bsize, 1, 1); \
383 static void filter_freq_sigma_##bsize(DCTdnoizContext *s, \
384 const float *src, int src_linesize, \
385 float *dst, int dst_linesize, int thread_id) \
387 filter_freq_##bsize(src, src_linesize, dst, dst_linesize, NULL, NULL, s->th); \
390 static void filter_freq_expr_##bsize(DCTdnoizContext *s, \
391 const float *src, int src_linesize, \
392 float *dst, int dst_linesize, int thread_id) \
394 filter_freq_##bsize(src, src_linesize, dst, dst_linesize, \
395 s->expr[thread_id], s->var_values[thread_id], 0); \
398 DEF_FILTER_FREQ_FUNCS(8)
399 DEF_FILTER_FREQ_FUNCS(16)
401 #define DCT3X3_0_0 0.5773502691896258f /* 1/sqrt(3) */
402 #define DCT3X3_0_1 0.5773502691896258f /* 1/sqrt(3) */
403 #define DCT3X3_0_2 0.5773502691896258f /* 1/sqrt(3) */
404 #define DCT3X3_1_0 0.7071067811865475f /* 1/sqrt(2) */
405 #define DCT3X3_1_2 -0.7071067811865475f /* -1/sqrt(2) */
406 #define DCT3X3_2_0 0.4082482904638631f /* 1/sqrt(6) */
407 #define DCT3X3_2_1 -0.8164965809277261f /* -2/sqrt(6) */
408 #define DCT3X3_2_2 0.4082482904638631f /* 1/sqrt(6) */
410 static av_always_inline
void color_decorrelation(float **dst
, int dst_linesize
,
411 const uint8_t *src
, int src_linesize
,
416 float *dstp_r
= dst
[0];
417 float *dstp_g
= dst
[1];
418 float *dstp_b
= dst
[2];
420 for (y
= 0; y
< h
; y
++) {
421 const uint8_t *srcp
= src
;
423 for (x
= 0; x
< w
; x
++) {
424 dstp_r
[x
] = srcp
[r
] * DCT3X3_0_0
+ srcp
[g
] * DCT3X3_0_1
+ srcp
[b
] * DCT3X3_0_2
;
425 dstp_g
[x
] = srcp
[r
] * DCT3X3_1_0
+ srcp
[b
] * DCT3X3_1_2
;
426 dstp_b
[x
] = srcp
[r
] * DCT3X3_2_0
+ srcp
[g
] * DCT3X3_2_1
+ srcp
[b
] * DCT3X3_2_2
;
430 dstp_r
+= dst_linesize
;
431 dstp_g
+= dst_linesize
;
432 dstp_b
+= dst_linesize
;
436 static av_always_inline
void color_correlation(uint8_t *dst
, int dst_linesize
,
437 float **src
, int src_linesize
,
442 const float *src_r
= src
[0];
443 const float *src_g
= src
[1];
444 const float *src_b
= src
[2];
446 for (y
= 0; y
< h
; y
++) {
449 for (x
= 0; x
< w
; x
++) {
450 dstp
[r
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_0
+ src_g
[x
] * DCT3X3_1_0
+ src_b
[x
] * DCT3X3_2_0
);
451 dstp
[g
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_1
+ src_b
[x
] * DCT3X3_2_1
);
452 dstp
[b
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_2
+ src_g
[x
] * DCT3X3_1_2
+ src_b
[x
] * DCT3X3_2_2
);
456 src_r
+= src_linesize
;
457 src_g
+= src_linesize
;
458 src_b
+= src_linesize
;
462 #define DECLARE_COLOR_FUNCS(name, r, g, b) \
463 static void color_decorrelation_##name(float **dst, int dst_linesize, \
464 const uint8_t *src, int src_linesize, \
467 color_decorrelation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \
470 static void color_correlation_##name(uint8_t *dst, int dst_linesize, \
471 float **src, int src_linesize, \
474 color_correlation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \
477 DECLARE_COLOR_FUNCS(rgb
, 0, 1, 2)
478 DECLARE_COLOR_FUNCS(bgr
, 2, 1, 0)
480 static int config_input(AVFilterLink
*inlink
)
482 AVFilterContext
*ctx
= inlink
->dst
;
483 DCTdnoizContext
*s
= ctx
->priv
;
484 int i
, x
, y
, bx
, by
, linesize
, *iweights
, max_slice_h
, slice_h
;
485 const int bsize
= 1 << s
->n
;
487 switch (inlink
->format
) {
488 case AV_PIX_FMT_BGR24
:
489 s
->color_decorrelation
= color_decorrelation_bgr
;
490 s
->color_correlation
= color_correlation_bgr
;
492 case AV_PIX_FMT_RGB24
:
493 s
->color_decorrelation
= color_decorrelation_rgb
;
494 s
->color_correlation
= color_correlation_rgb
;
500 s
->pr_width
= inlink
->w
- (inlink
->w
- bsize
) % s
->step
;
501 s
->pr_height
= inlink
->h
- (inlink
->h
- bsize
) % s
->step
;
502 if (s
->pr_width
!= inlink
->w
)
503 av_log(ctx
, AV_LOG_WARNING
, "The last %d horizontal pixels won't be denoised\n",
504 inlink
->w
- s
->pr_width
);
505 if (s
->pr_height
!= inlink
->h
)
506 av_log(ctx
, AV_LOG_WARNING
, "The last %d vertical pixels won't be denoised\n",
507 inlink
->h
- s
->pr_height
);
509 max_slice_h
= s
->pr_height
/ ((s
->bsize
- 1) * 2);
510 s
->nb_threads
= FFMIN3(MAX_THREADS
, ctx
->graph
->nb_threads
, max_slice_h
);
511 av_log(ctx
, AV_LOG_DEBUG
, "threads: [max=%d hmax=%d user=%d] => %d\n",
512 MAX_THREADS
, max_slice_h
, ctx
->graph
->nb_threads
, s
->nb_threads
);
514 s
->p_linesize
= linesize
= FFALIGN(s
->pr_width
, 32);
515 for (i
= 0; i
< 2; i
++) {
516 s
->cbuf
[i
][0] = av_malloc(linesize
* s
->pr_height
* sizeof(*s
->cbuf
[i
][0]));
517 s
->cbuf
[i
][1] = av_malloc(linesize
* s
->pr_height
* sizeof(*s
->cbuf
[i
][1]));
518 s
->cbuf
[i
][2] = av_malloc(linesize
* s
->pr_height
* sizeof(*s
->cbuf
[i
][2]));
519 if (!s
->cbuf
[i
][0] || !s
->cbuf
[i
][1] || !s
->cbuf
[i
][2])
520 return AVERROR(ENOMEM
);
523 /* eval expressions are probably not thread safe when the eval internal
524 * state can be changed (typically through load & store operations) */
526 for (i
= 0; i
< s
->nb_threads
; i
++) {
527 int ret
= av_expr_parse(&s
->expr
[i
], s
->expr_str
, var_names
,
528 NULL
, NULL
, NULL
, NULL
, 0, ctx
);
534 /* each slice will need to (pre & re)process the top and bottom block of
535 * the previous one in in addition to its processing area. This is because
536 * each pixel is averaged by all the surrounding blocks */
537 slice_h
= (int)ceilf(s
->pr_height
/ s
->nb_threads
) + (s
->bsize
- 1) * 2;
538 for (i
= 0; i
< s
->nb_threads
; i
++) {
539 s
->slices
[i
] = av_malloc_array(linesize
, slice_h
* sizeof(*s
->slices
[i
]));
541 return AVERROR(ENOMEM
);
544 s
->weights
= av_malloc(s
->pr_height
* linesize
* sizeof(*s
->weights
));
546 return AVERROR(ENOMEM
);
547 iweights
= av_calloc(s
->pr_height
, linesize
* sizeof(*iweights
));
549 return AVERROR(ENOMEM
);
550 for (y
= 0; y
< s
->pr_height
- bsize
+ 1; y
+= s
->step
)
551 for (x
= 0; x
< s
->pr_width
- bsize
+ 1; x
+= s
->step
)
552 for (by
= 0; by
< bsize
; by
++)
553 for (bx
= 0; bx
< bsize
; bx
++)
554 iweights
[(y
+ by
)*linesize
+ x
+ bx
]++;
555 for (y
= 0; y
< s
->pr_height
; y
++)
556 for (x
= 0; x
< s
->pr_width
; x
++)
557 s
->weights
[y
*linesize
+ x
] = 1. / iweights
[y
*linesize
+ x
];
563 static av_cold
int init(AVFilterContext
*ctx
)
565 DCTdnoizContext
*s
= ctx
->priv
;
567 s
->bsize
= 1 << s
->n
;
568 if (s
->overlap
== -1)
569 s
->overlap
= s
->bsize
- 1;
571 if (s
->overlap
> s
->bsize
- 1) {
572 av_log(s
, AV_LOG_ERROR
, "Overlap value can not except %d "
573 "with a block size of %dx%d\n",
574 s
->bsize
- 1, s
->bsize
, s
->bsize
);
575 return AVERROR(EINVAL
);
580 case 3: s
->filter_freq_func
= filter_freq_expr_8
; break;
581 case 4: s
->filter_freq_func
= filter_freq_expr_16
; break;
582 default: av_assert0(0);
586 case 3: s
->filter_freq_func
= filter_freq_sigma_8
; break;
587 case 4: s
->filter_freq_func
= filter_freq_sigma_16
; break;
588 default: av_assert0(0);
592 s
->th
= s
->sigma
* 3.;
593 s
->step
= s
->bsize
- s
->overlap
;
597 static int query_formats(AVFilterContext
*ctx
)
599 static const enum AVPixelFormat pix_fmts
[] = {
600 AV_PIX_FMT_BGR24
, AV_PIX_FMT_RGB24
,
603 ff_set_common_formats(ctx
, ff_make_format_list(pix_fmts
));
607 typedef struct ThreadData
{
611 static int filter_slice(AVFilterContext
*ctx
,
612 void *arg
, int jobnr
, int nb_jobs
)
615 DCTdnoizContext
*s
= ctx
->priv
;
616 const ThreadData
*td
= arg
;
617 const int w
= s
->pr_width
;
618 const int h
= s
->pr_height
;
619 const int slice_start
= (h
* jobnr
) / nb_jobs
;
620 const int slice_end
= (h
* (jobnr
+1)) / nb_jobs
;
621 const int slice_start_ctx
= FFMAX(slice_start
- s
->bsize
+ 1, 0);
622 const int slice_end_ctx
= FFMIN(slice_end
, h
- s
->bsize
+ 1);
623 const int slice_h
= slice_end_ctx
- slice_start_ctx
;
624 const int src_linesize
= s
->p_linesize
;
625 const int dst_linesize
= s
->p_linesize
;
626 const int slice_linesize
= s
->p_linesize
;
628 const float *src
= td
->src
+ slice_start_ctx
* src_linesize
;
629 const float *weights
= s
->weights
+ slice_start
* dst_linesize
;
630 float *slice
= s
->slices
[jobnr
];
633 memset(slice
, 0, (slice_h
+ s
->bsize
- 1) * dst_linesize
* sizeof(*slice
));
636 for (y
= 0; y
< slice_h
; y
+= s
->step
) {
637 for (x
= 0; x
< w
- s
->bsize
+ 1; x
+= s
->step
)
638 s
->filter_freq_func(s
, src
+ x
, src_linesize
,
639 slice
+ x
, slice_linesize
,
641 src
+= s
->step
* src_linesize
;
642 slice
+= s
->step
* slice_linesize
;
646 slice
= s
->slices
[jobnr
] + (slice_start
- slice_start_ctx
) * slice_linesize
;
647 dst
= td
->dst
+ slice_start
* dst_linesize
;
648 for (y
= slice_start
; y
< slice_end
; y
++) {
649 for (x
= 0; x
< w
; x
++)
650 dst
[x
] = slice
[x
] * weights
[x
];
651 slice
+= slice_linesize
;
653 weights
+= dst_linesize
;
659 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
661 AVFilterContext
*ctx
= inlink
->dst
;
662 DCTdnoizContext
*s
= ctx
->priv
;
663 AVFilterLink
*outlink
= inlink
->dst
->outputs
[0];
667 if (av_frame_is_writable(in
)) {
672 out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
675 return AVERROR(ENOMEM
);
677 av_frame_copy_props(out
, in
);
680 s
->color_decorrelation(s
->cbuf
[0], s
->p_linesize
,
681 in
->data
[0], in
->linesize
[0],
682 s
->pr_width
, s
->pr_height
);
683 for (plane
= 0; plane
< 3; plane
++) {
685 .src
= s
->cbuf
[0][plane
],
686 .dst
= s
->cbuf
[1][plane
],
688 ctx
->internal
->execute(ctx
, filter_slice
, &td
, NULL
, s
->nb_threads
);
690 s
->color_correlation(out
->data
[0], out
->linesize
[0],
691 s
->cbuf
[1], s
->p_linesize
,
692 s
->pr_width
, s
->pr_height
);
696 uint8_t *dst
= out
->data
[0];
697 const uint8_t *src
= in
->data
[0];
698 const int dst_linesize
= out
->linesize
[0];
699 const int src_linesize
= in
->linesize
[0];
700 const int hpad
= (inlink
->w
- s
->pr_width
) * 3;
701 const int vpad
= (inlink
->h
- s
->pr_height
);
704 uint8_t *dstp
= dst
+ s
->pr_width
* 3;
705 const uint8_t *srcp
= src
+ s
->pr_width
* 3;
707 for (y
= 0; y
< s
->pr_height
; y
++) {
708 memcpy(dstp
, srcp
, hpad
);
709 dstp
+= dst_linesize
;
710 srcp
+= src_linesize
;
714 uint8_t *dstp
= dst
+ s
->pr_height
* dst_linesize
;
715 const uint8_t *srcp
= src
+ s
->pr_height
* src_linesize
;
717 for (y
= 0; y
< vpad
; y
++) {
718 memcpy(dstp
, srcp
, inlink
->w
* 3);
719 dstp
+= dst_linesize
;
720 srcp
+= src_linesize
;
727 return ff_filter_frame(outlink
, out
);
730 static av_cold
void uninit(AVFilterContext
*ctx
)
733 DCTdnoizContext
*s
= ctx
->priv
;
736 for (i
= 0; i
< 2; i
++) {
737 av_free(s
->cbuf
[i
][0]);
738 av_free(s
->cbuf
[i
][1]);
739 av_free(s
->cbuf
[i
][2]);
741 for (i
= 0; i
< s
->nb_threads
; i
++) {
742 av_free(s
->slices
[i
]);
743 av_expr_free(s
->expr
[i
]);
747 static const AVFilterPad dctdnoiz_inputs
[] = {
750 .type
= AVMEDIA_TYPE_VIDEO
,
751 .filter_frame
= filter_frame
,
752 .config_props
= config_input
,
757 static const AVFilterPad dctdnoiz_outputs
[] = {
760 .type
= AVMEDIA_TYPE_VIDEO
,
765 AVFilter ff_vf_dctdnoiz
= {
767 .description
= NULL_IF_CONFIG_SMALL("Denoise frames using 2D DCT."),
768 .priv_size
= sizeof(DCTdnoizContext
),
771 .query_formats
= query_formats
,
772 .inputs
= dctdnoiz_inputs
,
773 .outputs
= dctdnoiz_outputs
,
774 .priv_class
= &dctdnoiz_class
,
775 .flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
| AVFILTER_FLAG_SLICE_THREADS
,