Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com> | |
3 | * Copyright (C) 2012 Li Cao <li@multicorewareinc.com> | |
4 | * Copyright (C) 2012 Wei Gao <weigao@multicorewareinc.com> | |
5 | * Copyright (C) 2013 Lenny Wang <lwanghpc@gmail.com> | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | #include "opencl.h" | |
25 | #include "avstring.h" | |
26 | #include "log.h" | |
27 | #include "avassert.h" | |
28 | #include "opt.h" | |
29 | ||
30 | #if HAVE_THREADS | |
31 | #if HAVE_PTHREADS | |
32 | #include <pthread.h> | |
33 | #elif HAVE_W32THREADS | |
34 | #include "compat/w32pthreads.h" | |
35 | #elif HAVE_OS2THREADS | |
36 | #include "compat/os2threads.h" | |
37 | #endif | |
38 | #include "atomic.h" | |
39 | ||
40 | static volatile pthread_mutex_t *atomic_opencl_lock = NULL; | |
41 | #define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock) | |
42 | #define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock) | |
43 | #else | |
44 | #define LOCK_OPENCL | |
45 | #define UNLOCK_OPENCL | |
46 | #endif | |
47 | ||
48 | #define MAX_KERNEL_CODE_NUM 200 | |
49 | ||
50 | typedef struct { | |
51 | int is_compiled; | |
52 | const char *kernel_string; | |
53 | } KernelCode; | |
54 | ||
55 | typedef struct { | |
56 | const AVClass *class; | |
57 | int log_offset; | |
58 | void *log_ctx; | |
59 | int init_count; | |
60 | int opt_init_flag; | |
61 | /** | |
62 | * if set to 1, the OpenCL environment was created by the user and | |
63 | * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper. | |
64 | */ | |
65 | int is_user_created; | |
66 | int platform_idx; | |
67 | int device_idx; | |
68 | cl_platform_id platform_id; | |
69 | cl_device_type device_type; | |
70 | cl_context context; | |
71 | cl_device_id device_id; | |
72 | cl_command_queue command_queue; | |
73 | #if FF_API_OLD_OPENCL | |
74 | char *build_options; | |
75 | int program_count; | |
76 | cl_program programs[MAX_KERNEL_CODE_NUM]; | |
77 | int kernel_count; | |
78 | #endif | |
79 | int kernel_code_count; | |
80 | KernelCode kernel_code[MAX_KERNEL_CODE_NUM]; | |
81 | AVOpenCLDeviceList device_list; | |
82 | } OpenclContext; | |
83 | ||
84 | #define OFFSET(x) offsetof(OpenclContext, x) | |
85 | ||
86 | static const AVOption opencl_options[] = { | |
87 | { "platform_idx", "set platform index value", OFFSET(platform_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, | |
88 | { "device_idx", "set device index value", OFFSET(device_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, | |
89 | #if FF_API_OLD_OPENCL | |
90 | { "build_options", "build options of opencl", OFFSET(build_options), AV_OPT_TYPE_STRING, {.str="-I."}, CHAR_MIN, CHAR_MAX}, | |
91 | #endif | |
92 | { NULL } | |
93 | }; | |
94 | ||
95 | static const AVClass openclutils_class = { | |
96 | .class_name = "OPENCLUTILS", | |
97 | .option = opencl_options, | |
98 | .item_name = av_default_item_name, | |
99 | .version = LIBAVUTIL_VERSION_INT, | |
100 | .log_level_offset_offset = offsetof(OpenclContext, log_offset), | |
101 | .parent_log_context_offset = offsetof(OpenclContext, log_ctx), | |
102 | }; | |
103 | ||
104 | static OpenclContext opencl_ctx = {&openclutils_class}; | |
105 | ||
106 | static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU}; | |
107 | ||
108 | typedef struct { | |
109 | int err_code; | |
110 | const char *err_str; | |
111 | } OpenclErrorMsg; | |
112 | ||
113 | static const OpenclErrorMsg opencl_err_msg[] = { | |
114 | {CL_DEVICE_NOT_FOUND, "DEVICE NOT FOUND"}, | |
115 | {CL_DEVICE_NOT_AVAILABLE, "DEVICE NOT AVAILABLE"}, | |
116 | {CL_COMPILER_NOT_AVAILABLE, "COMPILER NOT AVAILABLE"}, | |
117 | {CL_MEM_OBJECT_ALLOCATION_FAILURE, "MEM OBJECT ALLOCATION FAILURE"}, | |
118 | {CL_OUT_OF_RESOURCES, "OUT OF RESOURCES"}, | |
119 | {CL_OUT_OF_HOST_MEMORY, "OUT OF HOST MEMORY"}, | |
120 | {CL_PROFILING_INFO_NOT_AVAILABLE, "PROFILING INFO NOT AVAILABLE"}, | |
121 | {CL_MEM_COPY_OVERLAP, "MEM COPY OVERLAP"}, | |
122 | {CL_IMAGE_FORMAT_MISMATCH, "IMAGE FORMAT MISMATCH"}, | |
123 | {CL_IMAGE_FORMAT_NOT_SUPPORTED, "IMAGE FORMAT NOT_SUPPORTED"}, | |
124 | {CL_BUILD_PROGRAM_FAILURE, "BUILD PROGRAM FAILURE"}, | |
125 | {CL_MAP_FAILURE, "MAP FAILURE"}, | |
126 | {CL_MISALIGNED_SUB_BUFFER_OFFSET, "MISALIGNED SUB BUFFER OFFSET"}, | |
127 | {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"}, | |
128 | {CL_COMPILE_PROGRAM_FAILURE, "COMPILE PROGRAM FAILURE"}, | |
129 | {CL_LINKER_NOT_AVAILABLE, "LINKER NOT AVAILABLE"}, | |
130 | {CL_LINK_PROGRAM_FAILURE, "LINK PROGRAM FAILURE"}, | |
131 | {CL_DEVICE_PARTITION_FAILED, "DEVICE PARTITION FAILED"}, | |
132 | {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "KERNEL ARG INFO NOT AVAILABLE"}, | |
133 | {CL_INVALID_VALUE, "INVALID VALUE"}, | |
134 | {CL_INVALID_DEVICE_TYPE, "INVALID DEVICE TYPE"}, | |
135 | {CL_INVALID_PLATFORM, "INVALID PLATFORM"}, | |
136 | {CL_INVALID_DEVICE, "INVALID DEVICE"}, | |
137 | {CL_INVALID_CONTEXT, "INVALID CONTEXT"}, | |
138 | {CL_INVALID_QUEUE_PROPERTIES, "INVALID QUEUE PROPERTIES"}, | |
139 | {CL_INVALID_COMMAND_QUEUE, "INVALID COMMAND QUEUE"}, | |
140 | {CL_INVALID_HOST_PTR, "INVALID HOST PTR"}, | |
141 | {CL_INVALID_MEM_OBJECT, "INVALID MEM OBJECT"}, | |
142 | {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "INVALID IMAGE FORMAT DESCRIPTOR"}, | |
143 | {CL_INVALID_IMAGE_SIZE, "INVALID IMAGE SIZE"}, | |
144 | {CL_INVALID_SAMPLER, "INVALID SAMPLER"}, | |
145 | {CL_INVALID_BINARY, "INVALID BINARY"}, | |
146 | {CL_INVALID_BUILD_OPTIONS, "INVALID BUILD OPTIONS"}, | |
147 | {CL_INVALID_PROGRAM, "INVALID PROGRAM"}, | |
148 | {CL_INVALID_PROGRAM_EXECUTABLE, "INVALID PROGRAM EXECUTABLE"}, | |
149 | {CL_INVALID_KERNEL_NAME, "INVALID KERNEL NAME"}, | |
150 | {CL_INVALID_KERNEL_DEFINITION, "INVALID KERNEL DEFINITION"}, | |
151 | {CL_INVALID_KERNEL, "INVALID KERNEL"}, | |
152 | {CL_INVALID_ARG_INDEX, "INVALID ARG INDEX"}, | |
153 | {CL_INVALID_ARG_VALUE, "INVALID ARG VALUE"}, | |
154 | {CL_INVALID_ARG_SIZE, "INVALID ARG_SIZE"}, | |
155 | {CL_INVALID_KERNEL_ARGS, "INVALID KERNEL ARGS"}, | |
156 | {CL_INVALID_WORK_DIMENSION, "INVALID WORK DIMENSION"}, | |
157 | {CL_INVALID_WORK_GROUP_SIZE, "INVALID WORK GROUP SIZE"}, | |
158 | {CL_INVALID_WORK_ITEM_SIZE, "INVALID WORK ITEM SIZE"}, | |
159 | {CL_INVALID_GLOBAL_OFFSET, "INVALID GLOBAL OFFSET"}, | |
160 | {CL_INVALID_EVENT_WAIT_LIST, "INVALID EVENT WAIT LIST"}, | |
161 | {CL_INVALID_EVENT, "INVALID EVENT"}, | |
162 | {CL_INVALID_OPERATION, "INVALID OPERATION"}, | |
163 | {CL_INVALID_GL_OBJECT, "INVALID GL OBJECT"}, | |
164 | {CL_INVALID_BUFFER_SIZE, "INVALID BUFFER SIZE"}, | |
165 | {CL_INVALID_MIP_LEVEL, "INVALID MIP LEVEL"}, | |
166 | {CL_INVALID_GLOBAL_WORK_SIZE, "INVALID GLOBAL WORK SIZE"}, | |
167 | {CL_INVALID_PROPERTY, "INVALID PROPERTY"}, | |
168 | {CL_INVALID_IMAGE_DESCRIPTOR, "INVALID IMAGE DESCRIPTOR"}, | |
169 | {CL_INVALID_COMPILER_OPTIONS, "INVALID COMPILER OPTIONS"}, | |
170 | {CL_INVALID_LINKER_OPTIONS, "INVALID LINKER OPTIONS"}, | |
171 | {CL_INVALID_DEVICE_PARTITION_COUNT, "INVALID DEVICE PARTITION COUNT"}, | |
172 | }; | |
173 | ||
174 | const char *av_opencl_errstr(cl_int status) | |
175 | { | |
176 | int i; | |
177 | for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) { | |
178 | if (opencl_err_msg[i].err_code == status) | |
179 | return opencl_err_msg[i].err_str; | |
180 | } | |
181 | return "unknown error"; | |
182 | } | |
183 | ||
184 | static void free_device_list(AVOpenCLDeviceList *device_list) | |
185 | { | |
186 | int i, j; | |
187 | if (!device_list) | |
188 | return; | |
189 | for (i = 0; i < device_list->platform_num; i++) { | |
190 | if (!device_list->platform_node[i]) | |
191 | continue; | |
192 | for (j = 0; j < device_list->platform_node[i]->device_num; j++) { | |
193 | av_freep(&(device_list->platform_node[i]->device_node[j])); | |
194 | } | |
195 | av_freep(&device_list->platform_node[i]->device_node); | |
196 | av_freep(&device_list->platform_node[i]); | |
197 | } | |
198 | av_freep(&device_list->platform_node); | |
199 | device_list->platform_num = 0; | |
200 | } | |
201 | ||
202 | static int get_device_list(AVOpenCLDeviceList *device_list) | |
203 | { | |
204 | cl_int status; | |
205 | int i, j, k, device_num, total_devices_num, ret = 0; | |
206 | int *devices_num; | |
207 | cl_platform_id *platform_ids = NULL; | |
208 | cl_device_id *device_ids = NULL; | |
209 | AVOpenCLDeviceNode *device_node = NULL; | |
210 | status = clGetPlatformIDs(0, NULL, &device_list->platform_num); | |
211 | if (status != CL_SUCCESS) { | |
212 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
213 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); | |
214 | return AVERROR_EXTERNAL; | |
215 | } | |
216 | platform_ids = av_mallocz_array(device_list->platform_num, sizeof(cl_platform_id)); | |
217 | if (!platform_ids) | |
218 | return AVERROR(ENOMEM); | |
219 | status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL); | |
220 | if (status != CL_SUCCESS) { | |
221 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
222 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); | |
223 | ret = AVERROR_EXTERNAL; | |
224 | goto end; | |
225 | } | |
226 | device_list->platform_node = av_mallocz_array(device_list->platform_num, sizeof(AVOpenCLPlatformNode *)); | |
227 | if (!device_list->platform_node) { | |
228 | ret = AVERROR(ENOMEM); | |
229 | goto end; | |
230 | } | |
231 | devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type)); | |
232 | if (!devices_num) { | |
233 | ret = AVERROR(ENOMEM); | |
234 | goto end; | |
235 | } | |
236 | for (i = 0; i < device_list->platform_num; i++) { | |
237 | device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode)); | |
238 | if (!device_list->platform_node[i]) { | |
239 | ret = AVERROR(ENOMEM); | |
240 | goto end; | |
241 | } | |
242 | device_list->platform_node[i]->platform_id = platform_ids[i]; | |
243 | status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, | |
244 | sizeof(device_list->platform_node[i]->platform_name), | |
245 | device_list->platform_node[i]->platform_name, NULL); | |
246 | total_devices_num = 0; | |
247 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { | |
248 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, | |
249 | device_type[j], 0, NULL, &devices_num[j]); | |
250 | total_devices_num += devices_num[j]; | |
251 | } | |
252 | device_list->platform_node[i]->device_node = av_mallocz_array(total_devices_num, sizeof(AVOpenCLDeviceNode *)); | |
253 | if (!device_list->platform_node[i]->device_node) { | |
254 | ret = AVERROR(ENOMEM); | |
255 | goto end; | |
256 | } | |
257 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { | |
258 | if (devices_num[j]) { | |
259 | device_ids = av_mallocz_array(devices_num[j], sizeof(cl_device_id)); | |
260 | if (!device_ids) { | |
261 | ret = AVERROR(ENOMEM); | |
262 | goto end; | |
263 | } | |
264 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j], | |
265 | devices_num[j], device_ids, NULL); | |
266 | if (status != CL_SUCCESS) { | |
267 | av_log(&opencl_ctx, AV_LOG_WARNING, | |
268 | "Could not get device ID: %s:\n", av_opencl_errstr(status)); | |
269 | av_freep(&device_ids); | |
270 | continue; | |
271 | } | |
272 | for (k = 0; k < devices_num[j]; k++) { | |
273 | device_num = device_list->platform_node[i]->device_num; | |
274 | device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode)); | |
275 | if (!device_list->platform_node[i]->device_node[device_num]) { | |
276 | ret = AVERROR(ENOMEM); | |
277 | goto end; | |
278 | } | |
279 | device_node = device_list->platform_node[i]->device_node[device_num]; | |
280 | device_node->device_id = device_ids[k]; | |
281 | device_node->device_type = device_type[j]; | |
282 | status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME, | |
283 | sizeof(device_node->device_name), device_node->device_name, | |
284 | NULL); | |
285 | if (status != CL_SUCCESS) { | |
286 | av_log(&opencl_ctx, AV_LOG_WARNING, | |
287 | "Could not get device name: %s\n", av_opencl_errstr(status)); | |
288 | continue; | |
289 | } | |
290 | device_list->platform_node[i]->device_num++; | |
291 | } | |
292 | av_freep(&device_ids); | |
293 | } | |
294 | } | |
295 | } | |
296 | end: | |
297 | av_freep(&platform_ids); | |
298 | av_freep(&devices_num); | |
299 | av_freep(&device_ids); | |
300 | if (ret < 0) | |
301 | free_device_list(device_list); | |
302 | return ret; | |
303 | } | |
304 | ||
305 | int av_opencl_get_device_list(AVOpenCLDeviceList **device_list) | |
306 | { | |
307 | int ret = 0; | |
308 | *device_list = av_mallocz(sizeof(AVOpenCLDeviceList)); | |
309 | if (!(*device_list)) { | |
310 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n"); | |
311 | return AVERROR(ENOMEM); | |
312 | } | |
313 | ret = get_device_list(*device_list); | |
314 | if (ret < 0) { | |
315 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n"); | |
316 | free_device_list(*device_list); | |
317 | av_freep(device_list); | |
318 | return ret; | |
319 | } | |
320 | return ret; | |
321 | } | |
322 | ||
323 | void av_opencl_free_device_list(AVOpenCLDeviceList **device_list) | |
324 | { | |
325 | free_device_list(*device_list); | |
326 | av_freep(device_list); | |
327 | } | |
328 | ||
329 | static inline int init_opencl_mtx(void) | |
330 | { | |
331 | #if HAVE_THREADS | |
332 | if (!atomic_opencl_lock) { | |
333 | int err; | |
334 | pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t)); | |
335 | if (!tmp) | |
336 | return AVERROR(ENOMEM); | |
337 | if ((err = pthread_mutex_init(tmp, NULL))) { | |
338 | av_free(tmp); | |
339 | return AVERROR(err); | |
340 | } | |
341 | if (avpriv_atomic_ptr_cas(&atomic_opencl_lock, NULL, tmp)) { | |
342 | pthread_mutex_destroy(tmp); | |
343 | av_free(tmp); | |
344 | } | |
345 | } | |
346 | #endif | |
347 | return 0; | |
348 | } | |
349 | ||
350 | int av_opencl_set_option(const char *key, const char *val) | |
351 | { | |
352 | int ret = init_opencl_mtx( ); | |
353 | if (ret < 0) | |
354 | return ret; | |
355 | LOCK_OPENCL; | |
356 | if (!opencl_ctx.opt_init_flag) { | |
357 | av_opt_set_defaults(&opencl_ctx); | |
358 | opencl_ctx.opt_init_flag = 1; | |
359 | } | |
360 | ret = av_opt_set(&opencl_ctx, key, val, 0); | |
361 | UNLOCK_OPENCL; | |
362 | return ret; | |
363 | } | |
364 | ||
365 | int av_opencl_get_option(const char *key, uint8_t **out_val) | |
366 | { | |
367 | int ret = 0; | |
368 | LOCK_OPENCL; | |
369 | ret = av_opt_get(&opencl_ctx, key, 0, out_val); | |
370 | UNLOCK_OPENCL; | |
371 | return ret; | |
372 | } | |
373 | ||
374 | void av_opencl_free_option(void) | |
375 | { | |
376 | /*FIXME: free openclutils context*/ | |
377 | LOCK_OPENCL; | |
378 | av_opt_free(&opencl_ctx); | |
379 | UNLOCK_OPENCL; | |
380 | } | |
381 | ||
382 | AVOpenCLExternalEnv *av_opencl_alloc_external_env(void) | |
383 | { | |
384 | AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv)); | |
385 | if (!ext) { | |
386 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
387 | "Could not malloc external opencl environment data space\n"); | |
388 | } | |
389 | return ext; | |
390 | } | |
391 | ||
392 | void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env) | |
393 | { | |
394 | av_freep(ext_opencl_env); | |
395 | } | |
396 | ||
397 | int av_opencl_register_kernel_code(const char *kernel_code) | |
398 | { | |
399 | int i, ret = init_opencl_mtx( ); | |
400 | if (ret < 0) | |
401 | return ret; | |
402 | LOCK_OPENCL; | |
403 | if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) { | |
404 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
405 | "Could not register kernel code, maximum number of registered kernel code %d already reached\n", | |
406 | MAX_KERNEL_CODE_NUM); | |
407 | ret = AVERROR(EINVAL); | |
408 | goto end; | |
409 | } | |
410 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { | |
411 | if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) { | |
412 | av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n"); | |
413 | goto end; | |
414 | } | |
415 | } | |
416 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code; | |
417 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0; | |
418 | opencl_ctx.kernel_code_count++; | |
419 | end: | |
420 | UNLOCK_OPENCL; | |
421 | return ret; | |
422 | } | |
423 | ||
424 | cl_program av_opencl_compile(const char *program_name, const char *build_opts) | |
425 | { | |
426 | int i; | |
427 | cl_int status; | |
428 | int kernel_code_idx = 0; | |
429 | const char *kernel_source; | |
430 | size_t kernel_code_len; | |
431 | char* ptr = NULL; | |
432 | cl_program program = NULL; | |
433 | ||
434 | LOCK_OPENCL; | |
435 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { | |
436 | // identify a program using a unique name within the kernel source | |
437 | ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name); | |
438 | if (ptr && !opencl_ctx.kernel_code[i].is_compiled) { | |
439 | kernel_source = opencl_ctx.kernel_code[i].kernel_string; | |
440 | kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string); | |
441 | kernel_code_idx = i; | |
442 | break; | |
443 | } | |
444 | } | |
445 | if (!kernel_source) { | |
446 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
447 | "Unable to find OpenCL kernel source '%s'\n", program_name); | |
448 | goto end; | |
449 | } | |
450 | ||
451 | /* create a CL program from kernel source */ | |
452 | program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status); | |
453 | if(status != CL_SUCCESS) { | |
454 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
455 | "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status)); | |
456 | program = NULL; | |
457 | goto end; | |
458 | } | |
459 | status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL); | |
460 | if (status != CL_SUCCESS) { | |
461 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
462 | "Compilation failed with OpenCL program: %s\n", program_name); | |
463 | program = NULL; | |
464 | goto end; | |
465 | } | |
466 | ||
467 | opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1; | |
468 | end: | |
469 | UNLOCK_OPENCL; | |
470 | return program; | |
471 | } | |
472 | ||
473 | cl_command_queue av_opencl_get_command_queue(void) | |
474 | { | |
475 | return opencl_ctx.command_queue; | |
476 | } | |
477 | ||
478 | #if FF_API_OLD_OPENCL | |
479 | int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name) | |
480 | { | |
481 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL kernel %s, please update libavfilter.\n", kernel_name); | |
482 | return AVERROR(EINVAL); | |
483 | } | |
484 | ||
485 | void av_opencl_release_kernel(AVOpenCLKernelEnv *env) | |
486 | { | |
487 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not release OpenCL kernel, please update libavfilter.\n"); | |
488 | } | |
489 | #endif | |
490 | ||
491 | static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env) | |
492 | { | |
493 | cl_int status; | |
494 | cl_context_properties cps[3]; | |
495 | int i, ret = 0; | |
496 | AVOpenCLDeviceNode *device_node = NULL; | |
497 | ||
498 | if (ext_opencl_env) { | |
499 | if (opencl_ctx->is_user_created) | |
500 | return 0; | |
501 | opencl_ctx->platform_id = ext_opencl_env->platform_id; | |
502 | opencl_ctx->is_user_created = 1; | |
503 | opencl_ctx->command_queue = ext_opencl_env->command_queue; | |
504 | opencl_ctx->context = ext_opencl_env->context; | |
505 | opencl_ctx->device_id = ext_opencl_env->device_id; | |
506 | opencl_ctx->device_type = ext_opencl_env->device_type; | |
507 | } else { | |
508 | if (!opencl_ctx->is_user_created) { | |
509 | if (!opencl_ctx->device_list.platform_num) { | |
510 | ret = get_device_list(&opencl_ctx->device_list); | |
511 | if (ret < 0) { | |
512 | return ret; | |
513 | } | |
514 | } | |
515 | if (opencl_ctx->platform_idx >= 0) { | |
516 | if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) { | |
517 | av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n"); | |
518 | return AVERROR(EINVAL); | |
519 | } | |
520 | if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) { | |
521 | av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n", | |
522 | opencl_ctx->platform_idx); | |
523 | return AVERROR(EINVAL); | |
524 | } | |
525 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id; | |
526 | } else { | |
527 | /* get a usable platform by default*/ | |
528 | for (i = 0; i < opencl_ctx->device_list.platform_num; i++) { | |
529 | if (opencl_ctx->device_list.platform_node[i]->device_num) { | |
530 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id; | |
531 | opencl_ctx->platform_idx = i; | |
532 | break; | |
533 | } | |
534 | } | |
535 | } | |
536 | if (!opencl_ctx->platform_id) { | |
537 | av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n"); | |
538 | return AVERROR_EXTERNAL; | |
539 | } | |
540 | /* get a usable device*/ | |
541 | if (opencl_ctx->device_idx >= 0) { | |
542 | if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) { | |
543 | av_log(opencl_ctx, AV_LOG_ERROR, | |
544 | "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx); | |
545 | return AVERROR(EINVAL); | |
546 | } | |
547 | } else { | |
548 | opencl_ctx->device_idx = 0; | |
549 | } | |
550 | ||
551 | device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx]; | |
552 | opencl_ctx->device_id = device_node->device_id; | |
553 | opencl_ctx->device_type = device_node->device_type; | |
554 | ||
555 | /* | |
556 | * Use available platform. | |
557 | */ | |
558 | av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n", | |
559 | opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name, | |
560 | device_node->device_name); | |
561 | cps[0] = CL_CONTEXT_PLATFORM; | |
562 | cps[1] = (cl_context_properties)opencl_ctx->platform_id; | |
563 | cps[2] = 0; | |
564 | ||
565 | opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type, | |
566 | NULL, NULL, &status); | |
567 | if (status != CL_SUCCESS) { | |
568 | av_log(opencl_ctx, AV_LOG_ERROR, | |
569 | "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status)); | |
570 | return AVERROR_EXTERNAL; | |
571 | } | |
572 | opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id, | |
573 | 0, &status); | |
574 | if (status != CL_SUCCESS) { | |
575 | av_log(opencl_ctx, AV_LOG_ERROR, | |
576 | "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status)); | |
577 | return AVERROR_EXTERNAL; | |
578 | } | |
579 | } | |
580 | } | |
581 | return ret; | |
582 | } | |
583 | ||
584 | int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env) | |
585 | { | |
586 | int ret = init_opencl_mtx( ); | |
587 | if (ret < 0) | |
588 | return ret; | |
589 | LOCK_OPENCL; | |
590 | if (!opencl_ctx.init_count) { | |
591 | if (!opencl_ctx.opt_init_flag) { | |
592 | av_opt_set_defaults(&opencl_ctx); | |
593 | opencl_ctx.opt_init_flag = 1; | |
594 | } | |
595 | ret = init_opencl_env(&opencl_ctx, ext_opencl_env); | |
596 | if (ret < 0) | |
597 | goto end; | |
598 | if (opencl_ctx.kernel_code_count <= 0) { | |
599 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
600 | "No kernel code is registered, compile kernel file failed\n"); | |
601 | ret = AVERROR(EINVAL); | |
602 | goto end; | |
603 | } | |
604 | } | |
605 | opencl_ctx.init_count++; | |
606 | end: | |
607 | UNLOCK_OPENCL; | |
608 | return ret; | |
609 | } | |
610 | ||
611 | void av_opencl_uninit(void) | |
612 | { | |
613 | cl_int status; | |
614 | LOCK_OPENCL; | |
615 | opencl_ctx.init_count--; | |
616 | if (opencl_ctx.is_user_created) | |
617 | goto end; | |
618 | if (opencl_ctx.init_count > 0) | |
619 | goto end; | |
620 | if (opencl_ctx.command_queue) { | |
621 | status = clReleaseCommandQueue(opencl_ctx.command_queue); | |
622 | if (status != CL_SUCCESS) { | |
623 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
624 | "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status)); | |
625 | } | |
626 | opencl_ctx.command_queue = NULL; | |
627 | } | |
628 | if (opencl_ctx.context) { | |
629 | status = clReleaseContext(opencl_ctx.context); | |
630 | if (status != CL_SUCCESS) { | |
631 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
632 | "Could not release OpenCL context: %s\n", av_opencl_errstr(status)); | |
633 | } | |
634 | opencl_ctx.context = NULL; | |
635 | } | |
636 | free_device_list(&opencl_ctx.device_list); | |
637 | end: | |
638 | if (opencl_ctx.init_count <= 0) | |
639 | av_opt_free(&opencl_ctx); //FIXME: free openclutils context | |
640 | UNLOCK_OPENCL; | |
641 | } | |
642 | ||
643 | int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr) | |
644 | { | |
645 | cl_int status; | |
646 | *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status); | |
647 | if (status != CL_SUCCESS) { | |
648 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
649 | return AVERROR_EXTERNAL; | |
650 | } | |
651 | return 0; | |
652 | } | |
653 | ||
654 | void av_opencl_buffer_release(cl_mem *cl_buf) | |
655 | { | |
656 | cl_int status = 0; | |
657 | if (!cl_buf) | |
658 | return; | |
659 | status = clReleaseMemObject(*cl_buf); | |
660 | if (status != CL_SUCCESS) { | |
661 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
662 | "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
663 | } | |
664 | memset(cl_buf, 0, sizeof(*cl_buf)); | |
665 | } | |
666 | ||
667 | int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size) | |
668 | { | |
669 | cl_int status; | |
670 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, | |
671 | CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size, | |
672 | 0, NULL, NULL, &status); | |
673 | ||
674 | if (status != CL_SUCCESS) { | |
675 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
676 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
677 | return AVERROR_EXTERNAL; | |
678 | } | |
679 | memcpy(mapped, src_buf, buf_size); | |
680 | ||
681 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); | |
682 | if (status != CL_SUCCESS) { | |
683 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
684 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
685 | return AVERROR_EXTERNAL; | |
686 | } | |
687 | return 0; | |
688 | } | |
689 | ||
690 | int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size) | |
691 | { | |
692 | cl_int status; | |
693 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, | |
694 | CL_TRUE, CL_MAP_READ, 0, buf_size, | |
695 | 0, NULL, NULL, &status); | |
696 | ||
697 | if (status != CL_SUCCESS) { | |
698 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
699 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
700 | return AVERROR_EXTERNAL; | |
701 | } | |
702 | memcpy(dst_buf, mapped, buf_size); | |
703 | ||
704 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); | |
705 | if (status != CL_SUCCESS) { | |
706 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
707 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
708 | return AVERROR_EXTERNAL; | |
709 | } | |
710 | return 0; | |
711 | } | |
712 | ||
713 | int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset, | |
714 | uint8_t **src_data, int *plane_size, int plane_num) | |
715 | { | |
716 | int i, buffer_size = 0; | |
717 | uint8_t *temp; | |
718 | cl_int status; | |
719 | void *mapped; | |
720 | if ((unsigned int)plane_num > 8) { | |
721 | return AVERROR(EINVAL); | |
722 | } | |
723 | for (i = 0;i < plane_num;i++) { | |
724 | buffer_size += plane_size[i]; | |
725 | } | |
726 | if (buffer_size > cl_buffer_size) { | |
727 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
728 | "Cannot write image to OpenCL buffer: buffer too small\n"); | |
729 | return AVERROR(EINVAL); | |
730 | } | |
731 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, | |
732 | CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset, | |
733 | 0, NULL, NULL, &status); | |
734 | if (status != CL_SUCCESS) { | |
735 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
736 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
737 | return AVERROR_EXTERNAL; | |
738 | } | |
739 | temp = mapped; | |
740 | temp += dst_cl_offset; | |
741 | for (i = 0; i < plane_num; i++) { | |
742 | memcpy(temp, src_data[i], plane_size[i]); | |
743 | temp += plane_size[i]; | |
744 | } | |
745 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); | |
746 | if (status != CL_SUCCESS) { | |
747 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
748 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
749 | return AVERROR_EXTERNAL; | |
750 | } | |
751 | return 0; | |
752 | } | |
753 | ||
754 | int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num, | |
755 | cl_mem src_cl_buf, size_t cl_buffer_size) | |
756 | { | |
757 | int i,buffer_size = 0,ret = 0; | |
758 | uint8_t *temp; | |
759 | void *mapped; | |
760 | cl_int status; | |
761 | if ((unsigned int)plane_num > 8) { | |
762 | return AVERROR(EINVAL); | |
763 | } | |
764 | for (i = 0; i < plane_num; i++) { | |
765 | buffer_size += plane_size[i]; | |
766 | } | |
767 | if (buffer_size > cl_buffer_size) { | |
768 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
769 | "Cannot write image to CPU buffer: OpenCL buffer too small\n"); | |
770 | return AVERROR(EINVAL); | |
771 | } | |
772 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, | |
773 | CL_TRUE, CL_MAP_READ, 0, buffer_size, | |
774 | 0, NULL, NULL, &status); | |
775 | ||
776 | if (status != CL_SUCCESS) { | |
777 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
778 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
779 | return AVERROR_EXTERNAL; | |
780 | } | |
781 | temp = mapped; | |
782 | if (ret >= 0) { | |
783 | for (i = 0; i < plane_num; i++) { | |
784 | memcpy(dst_data[i], temp, plane_size[i]); | |
785 | temp += plane_size[i]; | |
786 | } | |
787 | } | |
788 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); | |
789 | if (status != CL_SUCCESS) { | |
790 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
791 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
792 | return AVERROR_EXTERNAL; | |
793 | } | |
794 | return 0; | |
795 | } | |
796 | ||
797 | int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform, | |
798 | int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env)) | |
799 | { | |
800 | int64_t ret = 0; | |
801 | cl_int status; | |
802 | cl_context_properties cps[3]; | |
803 | AVOpenCLExternalEnv *ext_opencl_env = NULL; | |
804 | ||
805 | ext_opencl_env = av_opencl_alloc_external_env(); | |
806 | ext_opencl_env->device_id = device_node->device_id; | |
807 | ext_opencl_env->device_type = device_node->device_type; | |
808 | av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n", | |
809 | device_node->device_name); | |
810 | ||
811 | cps[0] = CL_CONTEXT_PLATFORM; | |
812 | cps[1] = (cl_context_properties)platform; | |
813 | cps[2] = 0; | |
814 | ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type, | |
815 | NULL, NULL, &status); | |
816 | if (status != CL_SUCCESS || !ext_opencl_env->context) { | |
817 | ret = AVERROR_EXTERNAL; | |
818 | goto end; | |
819 | } | |
820 | ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context, | |
821 | ext_opencl_env->device_id, 0, &status); | |
822 | if (status != CL_SUCCESS || !ext_opencl_env->command_queue) { | |
823 | ret = AVERROR_EXTERNAL; | |
824 | goto end; | |
825 | } | |
826 | ret = benchmark(ext_opencl_env); | |
827 | if (ret < 0) | |
828 | av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n", | |
829 | device_node->device_name); | |
830 | end: | |
831 | if (ext_opencl_env->command_queue) | |
832 | clReleaseCommandQueue(ext_opencl_env->command_queue); | |
833 | if (ext_opencl_env->context) | |
834 | clReleaseContext(ext_opencl_env->context); | |
835 | av_opencl_free_external_env(&ext_opencl_env); | |
836 | return ret; | |
837 | } |