Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com> | |
3 | * Copyright (C) 2013 Lenny Wang | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | /** | |
23 | * @file | |
24 | * transform input video | |
25 | */ | |
26 | ||
27 | #include "libavutil/common.h" | |
28 | #include "libavutil/dict.h" | |
29 | #include "libavutil/pixdesc.h" | |
30 | #include "deshake_opencl.h" | |
31 | #include "libavutil/opencl_internal.h" | |
32 | ||
33 | #define PLANE_NUM 3 | |
34 | #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16) | |
35 | ||
36 | int ff_opencl_transform(AVFilterContext *ctx, | |
37 | int width, int height, int cw, int ch, | |
38 | const float *matrix_y, const float *matrix_uv, | |
39 | enum InterpolateMethod interpolate, | |
40 | enum FillMethod fill, AVFrame *in, AVFrame *out) | |
41 | { | |
42 | int ret = 0; | |
43 | cl_int status; | |
44 | DeshakeContext *deshake = ctx->priv; | |
45 | float4 packed_matrix_lu = {matrix_y[0], matrix_y[1], matrix_y[2], matrix_y[5]}; | |
46 | float4 packed_matrix_ch = {matrix_uv[0], matrix_uv[1], matrix_uv[2], matrix_uv[5]}; | |
47 | size_t global_worksize_lu[2] = {(size_t)ROUND_TO_16(width), (size_t)ROUND_TO_16(height)}; | |
48 | size_t global_worksize_ch[2] = {(size_t)ROUND_TO_16(cw), (size_t)(2*ROUND_TO_16(ch))}; | |
49 | size_t local_worksize[2] = {16, 16}; | |
50 | FFOpenclParam param_lu = {0}; | |
51 | FFOpenclParam param_ch = {0}; | |
52 | param_lu.ctx = param_ch.ctx = ctx; | |
53 | param_lu.kernel = deshake->opencl_ctx.kernel_luma; | |
54 | param_ch.kernel = deshake->opencl_ctx.kernel_chroma; | |
55 | ||
56 | if ((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) { | |
57 | av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n"); | |
58 | return AVERROR(EINVAL); | |
59 | } | |
60 | ret = avpriv_opencl_set_parameter(¶m_lu, | |
61 | FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf), | |
62 | FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf), | |
63 | FF_OPENCL_PARAM_INFO(packed_matrix_lu), | |
64 | FF_OPENCL_PARAM_INFO(interpolate), | |
65 | FF_OPENCL_PARAM_INFO(fill), | |
66 | FF_OPENCL_PARAM_INFO(in->linesize[0]), | |
67 | FF_OPENCL_PARAM_INFO(out->linesize[0]), | |
68 | FF_OPENCL_PARAM_INFO(height), | |
69 | FF_OPENCL_PARAM_INFO(width), | |
70 | NULL); | |
71 | if (ret < 0) | |
72 | return ret; | |
73 | ret = avpriv_opencl_set_parameter(¶m_ch, | |
74 | FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf), | |
75 | FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf), | |
76 | FF_OPENCL_PARAM_INFO(packed_matrix_ch), | |
77 | FF_OPENCL_PARAM_INFO(interpolate), | |
78 | FF_OPENCL_PARAM_INFO(fill), | |
79 | FF_OPENCL_PARAM_INFO(in->linesize[0]), | |
80 | FF_OPENCL_PARAM_INFO(out->linesize[0]), | |
81 | FF_OPENCL_PARAM_INFO(in->linesize[1]), | |
82 | FF_OPENCL_PARAM_INFO(out->linesize[1]), | |
83 | FF_OPENCL_PARAM_INFO(height), | |
84 | FF_OPENCL_PARAM_INFO(width), | |
85 | FF_OPENCL_PARAM_INFO(ch), | |
86 | FF_OPENCL_PARAM_INFO(cw), | |
87 | NULL); | |
88 | if (ret < 0) | |
89 | return ret; | |
90 | status = clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue, | |
91 | deshake->opencl_ctx.kernel_luma, 2, NULL, | |
92 | global_worksize_lu, local_worksize, 0, NULL, NULL); | |
93 | status |= clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue, | |
94 | deshake->opencl_ctx.kernel_chroma, 2, NULL, | |
95 | global_worksize_ch, local_worksize, 0, NULL, NULL); | |
96 | if (status != CL_SUCCESS) { | |
97 | av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status)); | |
98 | return AVERROR_EXTERNAL; | |
99 | } | |
100 | ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size, | |
101 | deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf, | |
102 | deshake->opencl_ctx.cl_outbuf_size); | |
103 | if (ret < 0) | |
104 | return ret; | |
105 | return ret; | |
106 | } | |
107 | ||
108 | int ff_opencl_deshake_init(AVFilterContext *ctx) | |
109 | { | |
110 | int ret = 0; | |
111 | DeshakeContext *deshake = ctx->priv; | |
112 | ret = av_opencl_init(NULL); | |
113 | if (ret < 0) | |
114 | return ret; | |
115 | deshake->opencl_ctx.plane_num = PLANE_NUM; | |
116 | deshake->opencl_ctx.command_queue = av_opencl_get_command_queue(); | |
117 | if (!deshake->opencl_ctx.command_queue) { | |
118 | av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'deshake'\n"); | |
119 | return AVERROR(EINVAL); | |
120 | } | |
121 | deshake->opencl_ctx.program = av_opencl_compile("avfilter_transform", NULL); | |
122 | if (!deshake->opencl_ctx.program) { | |
123 | av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'avfilter_transform'\n"); | |
124 | return AVERROR(EINVAL); | |
125 | } | |
126 | if (!deshake->opencl_ctx.kernel_luma) { | |
127 | deshake->opencl_ctx.kernel_luma = clCreateKernel(deshake->opencl_ctx.program, | |
128 | "avfilter_transform_luma", &ret); | |
129 | if (ret != CL_SUCCESS) { | |
130 | av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_luma'\n"); | |
131 | return AVERROR(EINVAL); | |
132 | } | |
133 | } | |
134 | if (!deshake->opencl_ctx.kernel_chroma) { | |
135 | deshake->opencl_ctx.kernel_chroma = clCreateKernel(deshake->opencl_ctx.program, | |
136 | "avfilter_transform_chroma", &ret); | |
137 | if (ret != CL_SUCCESS) { | |
138 | av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_chroma'\n"); | |
139 | return AVERROR(EINVAL); | |
140 | } | |
141 | } | |
142 | return ret; | |
143 | } | |
144 | ||
145 | void ff_opencl_deshake_uninit(AVFilterContext *ctx) | |
146 | { | |
147 | DeshakeContext *deshake = ctx->priv; | |
148 | av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf); | |
149 | av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf); | |
150 | clReleaseKernel(deshake->opencl_ctx.kernel_luma); | |
151 | clReleaseKernel(deshake->opencl_ctx.kernel_chroma); | |
152 | clReleaseProgram(deshake->opencl_ctx.program); | |
153 | deshake->opencl_ctx.command_queue = NULL; | |
154 | av_opencl_uninit(); | |
155 | } | |
156 | ||
157 | int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out) | |
158 | { | |
159 | int ret = 0; | |
160 | AVFilterLink *link = ctx->inputs[0]; | |
161 | DeshakeContext *deshake = ctx->priv; | |
162 | const int hshift = av_pix_fmt_desc_get(link->format)->log2_chroma_h; | |
163 | int chroma_height = FF_CEIL_RSHIFT(link->h, hshift); | |
164 | ||
165 | if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) { | |
166 | deshake->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height); | |
167 | deshake->opencl_ctx.in_plane_size[1] = (in->linesize[1] * chroma_height); | |
168 | deshake->opencl_ctx.in_plane_size[2] = (in->linesize[2] * chroma_height); | |
169 | deshake->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height); | |
170 | deshake->opencl_ctx.out_plane_size[1] = (out->linesize[1] * chroma_height); | |
171 | deshake->opencl_ctx.out_plane_size[2] = (out->linesize[2] * chroma_height); | |
172 | deshake->opencl_ctx.cl_inbuf_size = deshake->opencl_ctx.in_plane_size[0] + | |
173 | deshake->opencl_ctx.in_plane_size[1] + | |
174 | deshake->opencl_ctx.in_plane_size[2]; | |
175 | deshake->opencl_ctx.cl_outbuf_size = deshake->opencl_ctx.out_plane_size[0] + | |
176 | deshake->opencl_ctx.out_plane_size[1] + | |
177 | deshake->opencl_ctx.out_plane_size[2]; | |
178 | if (!deshake->opencl_ctx.cl_inbuf) { | |
179 | ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_inbuf, | |
180 | deshake->opencl_ctx.cl_inbuf_size, | |
181 | CL_MEM_READ_ONLY, NULL); | |
182 | if (ret < 0) | |
183 | return ret; | |
184 | } | |
185 | if (!deshake->opencl_ctx.cl_outbuf) { | |
186 | ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_outbuf, | |
187 | deshake->opencl_ctx.cl_outbuf_size, | |
188 | CL_MEM_READ_WRITE, NULL); | |
189 | if (ret < 0) | |
190 | return ret; | |
191 | } | |
192 | } | |
193 | ret = av_opencl_buffer_write_image(deshake->opencl_ctx.cl_inbuf, | |
194 | deshake->opencl_ctx.cl_inbuf_size, | |
195 | 0, in->data,deshake->opencl_ctx.in_plane_size, | |
196 | deshake->opencl_ctx.plane_num); | |
197 | if(ret < 0) | |
198 | return ret; | |
199 | return ret; | |
200 | } |