5c6b5ef5cb1b6f8f3a0cacf3fbea4dbe90ab8e2d
2 * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3 * Copyright (C) 2013 Lenny Wang
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "unsharp_opencl.h"
28 #include "libavutil/common.h"
29 #include "libavutil/opencl_internal.h"
32 #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
34 static inline void add_mask_counter(uint32_t *dst
, uint32_t *counter1
, uint32_t *counter2
, int len
)
37 for (i
= 0; i
< len
; i
++) {
38 dst
[i
] = counter1
[i
] + counter2
[i
];
42 static int compute_mask(int step
, uint32_t *mask
)
45 int counter_size
= sizeof(uint32_t) * (2 * step
+ 1);
46 uint32_t *temp1_counter
, *temp2_counter
, **counter
;
47 temp1_counter
= av_mallocz(counter_size
);
49 ret
= AVERROR(ENOMEM
);
52 temp2_counter
= av_mallocz(counter_size
);
54 ret
= AVERROR(ENOMEM
);
57 counter
= av_mallocz_array(2 * step
+ 1, sizeof(uint32_t *));
59 ret
= AVERROR(ENOMEM
);
62 for (i
= 0; i
< 2 * step
+ 1; i
++) {
63 counter
[i
] = av_mallocz(counter_size
);
65 ret
= AVERROR(ENOMEM
);
69 for (i
= 0; i
< 2 * step
+ 1; i
++) {
70 memset(temp1_counter
, 0, counter_size
);
72 for (z
= 0; z
< step
* 2; z
+= 2) {
73 add_mask_counter(temp2_counter
, counter
[z
], temp1_counter
, step
* 2);
74 memcpy(counter
[z
], temp1_counter
, counter_size
);
75 add_mask_counter(temp1_counter
, counter
[z
+ 1], temp2_counter
, step
* 2);
76 memcpy(counter
[z
+ 1], temp2_counter
, counter_size
);
79 memcpy(mask
, temp1_counter
, counter_size
);
81 av_freep(&temp1_counter
);
82 av_freep(&temp2_counter
);
83 for (i
= 0; i
< 2 * step
+ 1; i
++) {
84 av_freep(&counter
[i
]);
90 static int compute_mask_matrix(cl_mem cl_mask_matrix
, int step_x
, int step_y
)
93 uint32_t *mask_matrix
, *mask_x
, *mask_y
;
94 size_t size_matrix
= sizeof(uint32_t) * (2 * step_x
+ 1) * (2 * step_y
+ 1);
95 mask_x
= av_mallocz_array(2 * step_x
+ 1, sizeof(uint32_t));
97 ret
= AVERROR(ENOMEM
);
100 mask_y
= av_mallocz_array(2 * step_y
+ 1, sizeof(uint32_t));
102 ret
= AVERROR(ENOMEM
);
105 mask_matrix
= av_mallocz(size_matrix
);
107 ret
= AVERROR(ENOMEM
);
110 ret
= compute_mask(step_x
, mask_x
);
113 ret
= compute_mask(step_y
, mask_y
);
116 for (j
= 0; j
< 2 * step_y
+ 1; j
++) {
117 for (i
= 0; i
< 2 * step_x
+ 1; i
++) {
118 mask_matrix
[i
+ j
* (2 * step_x
+ 1)] = mask_y
[j
] * mask_x
[i
];
121 ret
= av_opencl_buffer_write(cl_mask_matrix
, (uint8_t *)mask_matrix
, size_matrix
);
125 av_freep(&mask_matrix
);
129 static int generate_mask(AVFilterContext
*ctx
)
131 UnsharpContext
*unsharp
= ctx
->priv
;
132 int i
, ret
= 0, step_x
[2], step_y
[2];
133 cl_mem mask_matrix
[2];
134 mask_matrix
[0] = unsharp
->opencl_ctx
.cl_luma_mask
;
135 mask_matrix
[1] = unsharp
->opencl_ctx
.cl_chroma_mask
;
136 step_x
[0] = unsharp
->luma
.steps_x
;
137 step_x
[1] = unsharp
->chroma
.steps_x
;
138 step_y
[0] = unsharp
->luma
.steps_y
;
139 step_y
[1] = unsharp
->chroma
.steps_y
;
141 /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
142 if (step_x
[0]>8 || step_x
[1]>8 || step_y
[0]>8 || step_y
[1]>8)
143 unsharp
->opencl_ctx
.use_fast_kernels
= 0;
145 unsharp
->opencl_ctx
.use_fast_kernels
= 1;
147 if (!mask_matrix
[0] || !mask_matrix
[1]) {
148 av_log(ctx
, AV_LOG_ERROR
, "Luma mask and chroma mask should not be NULL\n");
149 return AVERROR(EINVAL
);
151 for (i
= 0; i
< 2; i
++) {
152 ret
= compute_mask_matrix(mask_matrix
[i
], step_x
[i
], step_y
[i
]);
159 int ff_opencl_apply_unsharp(AVFilterContext
*ctx
, AVFrame
*in
, AVFrame
*out
)
162 AVFilterLink
*link
= ctx
->inputs
[0];
163 UnsharpContext
*unsharp
= ctx
->priv
;
165 FFOpenclParam kernel1
= {0};
166 FFOpenclParam kernel2
= {0};
168 int height
= link
->h
;
169 int cw
= FF_CEIL_RSHIFT(link
->w
, unsharp
->hsub
);
170 int ch
= FF_CEIL_RSHIFT(link
->h
, unsharp
->vsub
);
171 size_t globalWorkSize1d
= width
* height
+ 2 * ch
* cw
;
172 size_t globalWorkSize2dLuma
[2];
173 size_t globalWorkSize2dChroma
[2];
174 size_t localWorkSize2d
[2] = {16, 16};
176 if (unsharp
->opencl_ctx
.use_fast_kernels
) {
177 globalWorkSize2dLuma
[0] = (size_t)ROUND_TO_16(width
);
178 globalWorkSize2dLuma
[1] = (size_t)ROUND_TO_16(height
);
179 globalWorkSize2dChroma
[0] = (size_t)ROUND_TO_16(cw
);
180 globalWorkSize2dChroma
[1] = (size_t)(2*ROUND_TO_16(ch
));
183 kernel1
.kernel
= unsharp
->opencl_ctx
.kernel_luma
;
184 ret
= avpriv_opencl_set_parameter(&kernel1
,
185 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_inbuf
),
186 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_outbuf
),
187 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_luma_mask
),
188 FF_OPENCL_PARAM_INFO(unsharp
->luma
.amount
),
189 FF_OPENCL_PARAM_INFO(unsharp
->luma
.scalebits
),
190 FF_OPENCL_PARAM_INFO(unsharp
->luma
.halfscale
),
191 FF_OPENCL_PARAM_INFO(in
->linesize
[0]),
192 FF_OPENCL_PARAM_INFO(out
->linesize
[0]),
193 FF_OPENCL_PARAM_INFO(width
),
194 FF_OPENCL_PARAM_INFO(height
),
200 kernel2
.kernel
= unsharp
->opencl_ctx
.kernel_chroma
;
201 ret
= avpriv_opencl_set_parameter(&kernel2
,
202 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_inbuf
),
203 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_outbuf
),
204 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_chroma_mask
),
205 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.amount
),
206 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.scalebits
),
207 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.halfscale
),
208 FF_OPENCL_PARAM_INFO(in
->linesize
[0]),
209 FF_OPENCL_PARAM_INFO(in
->linesize
[1]),
210 FF_OPENCL_PARAM_INFO(out
->linesize
[0]),
211 FF_OPENCL_PARAM_INFO(out
->linesize
[1]),
212 FF_OPENCL_PARAM_INFO(link
->w
),
213 FF_OPENCL_PARAM_INFO(link
->h
),
214 FF_OPENCL_PARAM_INFO(cw
),
215 FF_OPENCL_PARAM_INFO(ch
),
219 status
= clEnqueueNDRangeKernel(unsharp
->opencl_ctx
.command_queue
,
220 unsharp
->opencl_ctx
.kernel_luma
, 2, NULL
,
221 globalWorkSize2dLuma
, localWorkSize2d
, 0, NULL
, NULL
);
222 status
|=clEnqueueNDRangeKernel(unsharp
->opencl_ctx
.command_queue
,
223 unsharp
->opencl_ctx
.kernel_chroma
, 2, NULL
,
224 globalWorkSize2dChroma
, localWorkSize2d
, 0, NULL
, NULL
);
225 if (status
!= CL_SUCCESS
) {
226 av_log(ctx
, AV_LOG_ERROR
, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status
));
227 return AVERROR_EXTERNAL
;
229 } else { /* use default kernel */
231 kernel1
.kernel
= unsharp
->opencl_ctx
.kernel_default
;
233 ret
= avpriv_opencl_set_parameter(&kernel1
,
234 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_inbuf
),
235 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_outbuf
),
236 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_luma_mask
),
237 FF_OPENCL_PARAM_INFO(unsharp
->opencl_ctx
.cl_chroma_mask
),
238 FF_OPENCL_PARAM_INFO(unsharp
->luma
.amount
),
239 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.amount
),
240 FF_OPENCL_PARAM_INFO(unsharp
->luma
.steps_x
),
241 FF_OPENCL_PARAM_INFO(unsharp
->luma
.steps_y
),
242 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.steps_x
),
243 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.steps_y
),
244 FF_OPENCL_PARAM_INFO(unsharp
->luma
.scalebits
),
245 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.scalebits
),
246 FF_OPENCL_PARAM_INFO(unsharp
->luma
.halfscale
),
247 FF_OPENCL_PARAM_INFO(unsharp
->chroma
.halfscale
),
248 FF_OPENCL_PARAM_INFO(in
->linesize
[0]),
249 FF_OPENCL_PARAM_INFO(in
->linesize
[1]),
250 FF_OPENCL_PARAM_INFO(out
->linesize
[0]),
251 FF_OPENCL_PARAM_INFO(out
->linesize
[1]),
252 FF_OPENCL_PARAM_INFO(link
->h
),
253 FF_OPENCL_PARAM_INFO(link
->w
),
254 FF_OPENCL_PARAM_INFO(ch
),
255 FF_OPENCL_PARAM_INFO(cw
),
259 status
= clEnqueueNDRangeKernel(unsharp
->opencl_ctx
.command_queue
,
260 unsharp
->opencl_ctx
.kernel_default
, 1, NULL
,
261 &globalWorkSize1d
, NULL
, 0, NULL
, NULL
);
262 if (status
!= CL_SUCCESS
) {
263 av_log(ctx
, AV_LOG_ERROR
, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status
));
264 return AVERROR_EXTERNAL
;
267 clFinish(unsharp
->opencl_ctx
.command_queue
);
268 return av_opencl_buffer_read_image(out
->data
, unsharp
->opencl_ctx
.out_plane_size
,
269 unsharp
->opencl_ctx
.plane_num
, unsharp
->opencl_ctx
.cl_outbuf
,
270 unsharp
->opencl_ctx
.cl_outbuf_size
);
273 int ff_opencl_unsharp_init(AVFilterContext
*ctx
)
277 UnsharpContext
*unsharp
= ctx
->priv
;
278 ret
= av_opencl_init(NULL
);
281 ret
= av_opencl_buffer_create(&unsharp
->opencl_ctx
.cl_luma_mask
,
282 sizeof(uint32_t) * (2 * unsharp
->luma
.steps_x
+ 1) * (2 * unsharp
->luma
.steps_y
+ 1),
283 CL_MEM_READ_ONLY
, NULL
);
286 ret
= av_opencl_buffer_create(&unsharp
->opencl_ctx
.cl_chroma_mask
,
287 sizeof(uint32_t) * (2 * unsharp
->chroma
.steps_x
+ 1) * (2 * unsharp
->chroma
.steps_y
+ 1),
288 CL_MEM_READ_ONLY
, NULL
);
291 ret
= generate_mask(ctx
);
294 unsharp
->opencl_ctx
.plane_num
= PLANE_NUM
;
295 unsharp
->opencl_ctx
.command_queue
= av_opencl_get_command_queue();
296 if (!unsharp
->opencl_ctx
.command_queue
) {
297 av_log(ctx
, AV_LOG_ERROR
, "Unable to get OpenCL command queue in filter 'unsharp'\n");
298 return AVERROR(EINVAL
);
300 snprintf(build_opts
, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
301 2*unsharp
->luma
.steps_x
+1, 2*unsharp
->luma
.steps_y
+1, 2*unsharp
->chroma
.steps_x
+1, 2*unsharp
->chroma
.steps_y
+1);
302 unsharp
->opencl_ctx
.program
= av_opencl_compile("unsharp", build_opts
);
303 if (!unsharp
->opencl_ctx
.program
) {
304 av_log(ctx
, AV_LOG_ERROR
, "OpenCL failed to compile program 'unsharp'\n");
305 return AVERROR(EINVAL
);
307 if (unsharp
->opencl_ctx
.use_fast_kernels
) {
308 if (!unsharp
->opencl_ctx
.kernel_luma
) {
309 unsharp
->opencl_ctx
.kernel_luma
= clCreateKernel(unsharp
->opencl_ctx
.program
, "unsharp_luma", &ret
);
310 if (ret
!= CL_SUCCESS
) {
311 av_log(ctx
, AV_LOG_ERROR
, "OpenCL failed to create kernel 'unsharp_luma'\n");
315 if (!unsharp
->opencl_ctx
.kernel_chroma
) {
316 unsharp
->opencl_ctx
.kernel_chroma
= clCreateKernel(unsharp
->opencl_ctx
.program
, "unsharp_chroma", &ret
);
318 av_log(ctx
, AV_LOG_ERROR
, "OpenCL failed to create kernel 'unsharp_chroma'\n");
324 if (!unsharp
->opencl_ctx
.kernel_default
) {
325 unsharp
->opencl_ctx
.kernel_default
= clCreateKernel(unsharp
->opencl_ctx
.program
, "unsharp_default", &ret
);
327 av_log(ctx
, AV_LOG_ERROR
, "OpenCL failed to create kernel 'unsharp_default'\n");
335 void ff_opencl_unsharp_uninit(AVFilterContext
*ctx
)
337 UnsharpContext
*unsharp
= ctx
->priv
;
338 av_opencl_buffer_release(&unsharp
->opencl_ctx
.cl_inbuf
);
339 av_opencl_buffer_release(&unsharp
->opencl_ctx
.cl_outbuf
);
340 av_opencl_buffer_release(&unsharp
->opencl_ctx
.cl_luma_mask
);
341 av_opencl_buffer_release(&unsharp
->opencl_ctx
.cl_chroma_mask
);
342 clReleaseKernel(unsharp
->opencl_ctx
.kernel_default
);
343 clReleaseKernel(unsharp
->opencl_ctx
.kernel_luma
);
344 clReleaseKernel(unsharp
->opencl_ctx
.kernel_chroma
);
345 clReleaseProgram(unsharp
->opencl_ctx
.program
);
346 unsharp
->opencl_ctx
.command_queue
= NULL
;
350 int ff_opencl_unsharp_process_inout_buf(AVFilterContext
*ctx
, AVFrame
*in
, AVFrame
*out
)
353 AVFilterLink
*link
= ctx
->inputs
[0];
354 UnsharpContext
*unsharp
= ctx
->priv
;
355 int ch
= FF_CEIL_RSHIFT(link
->h
, unsharp
->vsub
);
357 if ((!unsharp
->opencl_ctx
.cl_inbuf
) || (!unsharp
->opencl_ctx
.cl_outbuf
)) {
358 unsharp
->opencl_ctx
.in_plane_size
[0] = (in
->linesize
[0] * in
->height
);
359 unsharp
->opencl_ctx
.in_plane_size
[1] = (in
->linesize
[1] * ch
);
360 unsharp
->opencl_ctx
.in_plane_size
[2] = (in
->linesize
[2] * ch
);
361 unsharp
->opencl_ctx
.out_plane_size
[0] = (out
->linesize
[0] * out
->height
);
362 unsharp
->opencl_ctx
.out_plane_size
[1] = (out
->linesize
[1] * ch
);
363 unsharp
->opencl_ctx
.out_plane_size
[2] = (out
->linesize
[2] * ch
);
364 unsharp
->opencl_ctx
.cl_inbuf_size
= unsharp
->opencl_ctx
.in_plane_size
[0] +
365 unsharp
->opencl_ctx
.in_plane_size
[1] +
366 unsharp
->opencl_ctx
.in_plane_size
[2];
367 unsharp
->opencl_ctx
.cl_outbuf_size
= unsharp
->opencl_ctx
.out_plane_size
[0] +
368 unsharp
->opencl_ctx
.out_plane_size
[1] +
369 unsharp
->opencl_ctx
.out_plane_size
[2];
370 if (!unsharp
->opencl_ctx
.cl_inbuf
) {
371 ret
= av_opencl_buffer_create(&unsharp
->opencl_ctx
.cl_inbuf
,
372 unsharp
->opencl_ctx
.cl_inbuf_size
,
373 CL_MEM_READ_ONLY
, NULL
);
377 if (!unsharp
->opencl_ctx
.cl_outbuf
) {
378 ret
= av_opencl_buffer_create(&unsharp
->opencl_ctx
.cl_outbuf
,
379 unsharp
->opencl_ctx
.cl_outbuf_size
,
380 CL_MEM_READ_WRITE
, NULL
);
385 return av_opencl_buffer_write_image(unsharp
->opencl_ctx
.cl_inbuf
,
386 unsharp
->opencl_ctx
.cl_inbuf_size
,
387 0, in
->data
, unsharp
->opencl_ctx
.in_plane_size
,
388 unsharp
->opencl_ctx
.plane_num
);