Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com> | |
3 | * Copyright (C) 2012 Li Cao <li@multicorewareinc.com> | |
4 | * Copyright (C) 2012 Wei Gao <weigao@multicorewareinc.com> | |
5 | * Copyright (C) 2013 Lenny Wang <lwanghpc@gmail.com> | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | #include "opencl.h" | |
25 | #include "avstring.h" | |
26 | #include "log.h" | |
27 | #include "avassert.h" | |
28 | #include "opt.h" | |
29 | ||
30 | #if HAVE_THREADS | |
31 | #if HAVE_PTHREADS | |
32 | #include <pthread.h> | |
33 | #elif HAVE_W32THREADS | |
34 | #include "compat/w32pthreads.h" | |
35 | #elif HAVE_OS2THREADS | |
36 | #include "compat/os2threads.h" | |
37 | #endif | |
38 | #include "atomic.h" | |
39 | ||
40 | static volatile pthread_mutex_t *atomic_opencl_lock = NULL; | |
41 | #define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock) | |
42 | #define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock) | |
43 | #else | |
44 | #define LOCK_OPENCL | |
45 | #define UNLOCK_OPENCL | |
46 | #endif | |
47 | ||
48 | #define MAX_KERNEL_CODE_NUM 200 | |
49 | ||
50 | typedef struct { | |
51 | int is_compiled; | |
52 | const char *kernel_string; | |
53 | } KernelCode; | |
54 | ||
55 | typedef struct { | |
56 | const AVClass *class; | |
57 | int log_offset; | |
58 | void *log_ctx; | |
59 | int init_count; | |
60 | int opt_init_flag; | |
61 | /** | |
62 | * if set to 1, the OpenCL environment was created by the user and | |
63 | * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper. | |
64 | */ | |
65 | int is_user_created; | |
66 | int platform_idx; | |
67 | int device_idx; | |
68 | cl_platform_id platform_id; | |
69 | cl_device_type device_type; | |
70 | cl_context context; | |
71 | cl_device_id device_id; | |
72 | cl_command_queue command_queue; | |
2ba45a60 DM |
73 | int kernel_code_count; |
74 | KernelCode kernel_code[MAX_KERNEL_CODE_NUM]; | |
75 | AVOpenCLDeviceList device_list; | |
76 | } OpenclContext; | |
77 | ||
78 | #define OFFSET(x) offsetof(OpenclContext, x) | |
79 | ||
80 | static const AVOption opencl_options[] = { | |
81 | { "platform_idx", "set platform index value", OFFSET(platform_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, | |
82 | { "device_idx", "set device index value", OFFSET(device_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, | |
2ba45a60 DM |
83 | { NULL } |
84 | }; | |
85 | ||
86 | static const AVClass openclutils_class = { | |
87 | .class_name = "OPENCLUTILS", | |
88 | .option = opencl_options, | |
89 | .item_name = av_default_item_name, | |
90 | .version = LIBAVUTIL_VERSION_INT, | |
91 | .log_level_offset_offset = offsetof(OpenclContext, log_offset), | |
92 | .parent_log_context_offset = offsetof(OpenclContext, log_ctx), | |
93 | }; | |
94 | ||
95 | static OpenclContext opencl_ctx = {&openclutils_class}; | |
96 | ||
97 | static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU}; | |
98 | ||
99 | typedef struct { | |
100 | int err_code; | |
101 | const char *err_str; | |
102 | } OpenclErrorMsg; | |
103 | ||
104 | static const OpenclErrorMsg opencl_err_msg[] = { | |
105 | {CL_DEVICE_NOT_FOUND, "DEVICE NOT FOUND"}, | |
106 | {CL_DEVICE_NOT_AVAILABLE, "DEVICE NOT AVAILABLE"}, | |
107 | {CL_COMPILER_NOT_AVAILABLE, "COMPILER NOT AVAILABLE"}, | |
108 | {CL_MEM_OBJECT_ALLOCATION_FAILURE, "MEM OBJECT ALLOCATION FAILURE"}, | |
109 | {CL_OUT_OF_RESOURCES, "OUT OF RESOURCES"}, | |
110 | {CL_OUT_OF_HOST_MEMORY, "OUT OF HOST MEMORY"}, | |
111 | {CL_PROFILING_INFO_NOT_AVAILABLE, "PROFILING INFO NOT AVAILABLE"}, | |
112 | {CL_MEM_COPY_OVERLAP, "MEM COPY OVERLAP"}, | |
113 | {CL_IMAGE_FORMAT_MISMATCH, "IMAGE FORMAT MISMATCH"}, | |
114 | {CL_IMAGE_FORMAT_NOT_SUPPORTED, "IMAGE FORMAT NOT_SUPPORTED"}, | |
115 | {CL_BUILD_PROGRAM_FAILURE, "BUILD PROGRAM FAILURE"}, | |
116 | {CL_MAP_FAILURE, "MAP FAILURE"}, | |
117 | {CL_MISALIGNED_SUB_BUFFER_OFFSET, "MISALIGNED SUB BUFFER OFFSET"}, | |
118 | {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"}, | |
119 | {CL_COMPILE_PROGRAM_FAILURE, "COMPILE PROGRAM FAILURE"}, | |
120 | {CL_LINKER_NOT_AVAILABLE, "LINKER NOT AVAILABLE"}, | |
121 | {CL_LINK_PROGRAM_FAILURE, "LINK PROGRAM FAILURE"}, | |
122 | {CL_DEVICE_PARTITION_FAILED, "DEVICE PARTITION FAILED"}, | |
123 | {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "KERNEL ARG INFO NOT AVAILABLE"}, | |
124 | {CL_INVALID_VALUE, "INVALID VALUE"}, | |
125 | {CL_INVALID_DEVICE_TYPE, "INVALID DEVICE TYPE"}, | |
126 | {CL_INVALID_PLATFORM, "INVALID PLATFORM"}, | |
127 | {CL_INVALID_DEVICE, "INVALID DEVICE"}, | |
128 | {CL_INVALID_CONTEXT, "INVALID CONTEXT"}, | |
129 | {CL_INVALID_QUEUE_PROPERTIES, "INVALID QUEUE PROPERTIES"}, | |
130 | {CL_INVALID_COMMAND_QUEUE, "INVALID COMMAND QUEUE"}, | |
131 | {CL_INVALID_HOST_PTR, "INVALID HOST PTR"}, | |
132 | {CL_INVALID_MEM_OBJECT, "INVALID MEM OBJECT"}, | |
133 | {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "INVALID IMAGE FORMAT DESCRIPTOR"}, | |
134 | {CL_INVALID_IMAGE_SIZE, "INVALID IMAGE SIZE"}, | |
135 | {CL_INVALID_SAMPLER, "INVALID SAMPLER"}, | |
136 | {CL_INVALID_BINARY, "INVALID BINARY"}, | |
137 | {CL_INVALID_BUILD_OPTIONS, "INVALID BUILD OPTIONS"}, | |
138 | {CL_INVALID_PROGRAM, "INVALID PROGRAM"}, | |
139 | {CL_INVALID_PROGRAM_EXECUTABLE, "INVALID PROGRAM EXECUTABLE"}, | |
140 | {CL_INVALID_KERNEL_NAME, "INVALID KERNEL NAME"}, | |
141 | {CL_INVALID_KERNEL_DEFINITION, "INVALID KERNEL DEFINITION"}, | |
142 | {CL_INVALID_KERNEL, "INVALID KERNEL"}, | |
143 | {CL_INVALID_ARG_INDEX, "INVALID ARG INDEX"}, | |
144 | {CL_INVALID_ARG_VALUE, "INVALID ARG VALUE"}, | |
145 | {CL_INVALID_ARG_SIZE, "INVALID ARG_SIZE"}, | |
146 | {CL_INVALID_KERNEL_ARGS, "INVALID KERNEL ARGS"}, | |
147 | {CL_INVALID_WORK_DIMENSION, "INVALID WORK DIMENSION"}, | |
148 | {CL_INVALID_WORK_GROUP_SIZE, "INVALID WORK GROUP SIZE"}, | |
149 | {CL_INVALID_WORK_ITEM_SIZE, "INVALID WORK ITEM SIZE"}, | |
150 | {CL_INVALID_GLOBAL_OFFSET, "INVALID GLOBAL OFFSET"}, | |
151 | {CL_INVALID_EVENT_WAIT_LIST, "INVALID EVENT WAIT LIST"}, | |
152 | {CL_INVALID_EVENT, "INVALID EVENT"}, | |
153 | {CL_INVALID_OPERATION, "INVALID OPERATION"}, | |
154 | {CL_INVALID_GL_OBJECT, "INVALID GL OBJECT"}, | |
155 | {CL_INVALID_BUFFER_SIZE, "INVALID BUFFER SIZE"}, | |
156 | {CL_INVALID_MIP_LEVEL, "INVALID MIP LEVEL"}, | |
157 | {CL_INVALID_GLOBAL_WORK_SIZE, "INVALID GLOBAL WORK SIZE"}, | |
158 | {CL_INVALID_PROPERTY, "INVALID PROPERTY"}, | |
159 | {CL_INVALID_IMAGE_DESCRIPTOR, "INVALID IMAGE DESCRIPTOR"}, | |
160 | {CL_INVALID_COMPILER_OPTIONS, "INVALID COMPILER OPTIONS"}, | |
161 | {CL_INVALID_LINKER_OPTIONS, "INVALID LINKER OPTIONS"}, | |
162 | {CL_INVALID_DEVICE_PARTITION_COUNT, "INVALID DEVICE PARTITION COUNT"}, | |
163 | }; | |
164 | ||
165 | const char *av_opencl_errstr(cl_int status) | |
166 | { | |
167 | int i; | |
168 | for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) { | |
169 | if (opencl_err_msg[i].err_code == status) | |
170 | return opencl_err_msg[i].err_str; | |
171 | } | |
172 | return "unknown error"; | |
173 | } | |
174 | ||
175 | static void free_device_list(AVOpenCLDeviceList *device_list) | |
176 | { | |
177 | int i, j; | |
178 | if (!device_list) | |
179 | return; | |
180 | for (i = 0; i < device_list->platform_num; i++) { | |
181 | if (!device_list->platform_node[i]) | |
182 | continue; | |
183 | for (j = 0; j < device_list->platform_node[i]->device_num; j++) { | |
184 | av_freep(&(device_list->platform_node[i]->device_node[j])); | |
185 | } | |
186 | av_freep(&device_list->platform_node[i]->device_node); | |
187 | av_freep(&device_list->platform_node[i]); | |
188 | } | |
189 | av_freep(&device_list->platform_node); | |
190 | device_list->platform_num = 0; | |
191 | } | |
192 | ||
193 | static int get_device_list(AVOpenCLDeviceList *device_list) | |
194 | { | |
195 | cl_int status; | |
196 | int i, j, k, device_num, total_devices_num, ret = 0; | |
197 | int *devices_num; | |
198 | cl_platform_id *platform_ids = NULL; | |
199 | cl_device_id *device_ids = NULL; | |
200 | AVOpenCLDeviceNode *device_node = NULL; | |
201 | status = clGetPlatformIDs(0, NULL, &device_list->platform_num); | |
202 | if (status != CL_SUCCESS) { | |
203 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
204 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); | |
205 | return AVERROR_EXTERNAL; | |
206 | } | |
207 | platform_ids = av_mallocz_array(device_list->platform_num, sizeof(cl_platform_id)); | |
208 | if (!platform_ids) | |
209 | return AVERROR(ENOMEM); | |
210 | status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL); | |
211 | if (status != CL_SUCCESS) { | |
212 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
213 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); | |
214 | ret = AVERROR_EXTERNAL; | |
215 | goto end; | |
216 | } | |
217 | device_list->platform_node = av_mallocz_array(device_list->platform_num, sizeof(AVOpenCLPlatformNode *)); | |
218 | if (!device_list->platform_node) { | |
219 | ret = AVERROR(ENOMEM); | |
220 | goto end; | |
221 | } | |
222 | devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type)); | |
223 | if (!devices_num) { | |
224 | ret = AVERROR(ENOMEM); | |
225 | goto end; | |
226 | } | |
227 | for (i = 0; i < device_list->platform_num; i++) { | |
228 | device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode)); | |
229 | if (!device_list->platform_node[i]) { | |
230 | ret = AVERROR(ENOMEM); | |
231 | goto end; | |
232 | } | |
233 | device_list->platform_node[i]->platform_id = platform_ids[i]; | |
234 | status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, | |
235 | sizeof(device_list->platform_node[i]->platform_name), | |
236 | device_list->platform_node[i]->platform_name, NULL); | |
237 | total_devices_num = 0; | |
238 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { | |
239 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, | |
240 | device_type[j], 0, NULL, &devices_num[j]); | |
241 | total_devices_num += devices_num[j]; | |
242 | } | |
243 | device_list->platform_node[i]->device_node = av_mallocz_array(total_devices_num, sizeof(AVOpenCLDeviceNode *)); | |
244 | if (!device_list->platform_node[i]->device_node) { | |
245 | ret = AVERROR(ENOMEM); | |
246 | goto end; | |
247 | } | |
248 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { | |
249 | if (devices_num[j]) { | |
250 | device_ids = av_mallocz_array(devices_num[j], sizeof(cl_device_id)); | |
251 | if (!device_ids) { | |
252 | ret = AVERROR(ENOMEM); | |
253 | goto end; | |
254 | } | |
255 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j], | |
256 | devices_num[j], device_ids, NULL); | |
257 | if (status != CL_SUCCESS) { | |
258 | av_log(&opencl_ctx, AV_LOG_WARNING, | |
259 | "Could not get device ID: %s:\n", av_opencl_errstr(status)); | |
260 | av_freep(&device_ids); | |
261 | continue; | |
262 | } | |
263 | for (k = 0; k < devices_num[j]; k++) { | |
264 | device_num = device_list->platform_node[i]->device_num; | |
265 | device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode)); | |
266 | if (!device_list->platform_node[i]->device_node[device_num]) { | |
267 | ret = AVERROR(ENOMEM); | |
268 | goto end; | |
269 | } | |
270 | device_node = device_list->platform_node[i]->device_node[device_num]; | |
271 | device_node->device_id = device_ids[k]; | |
272 | device_node->device_type = device_type[j]; | |
273 | status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME, | |
274 | sizeof(device_node->device_name), device_node->device_name, | |
275 | NULL); | |
276 | if (status != CL_SUCCESS) { | |
277 | av_log(&opencl_ctx, AV_LOG_WARNING, | |
278 | "Could not get device name: %s\n", av_opencl_errstr(status)); | |
279 | continue; | |
280 | } | |
281 | device_list->platform_node[i]->device_num++; | |
282 | } | |
283 | av_freep(&device_ids); | |
284 | } | |
285 | } | |
286 | } | |
287 | end: | |
288 | av_freep(&platform_ids); | |
289 | av_freep(&devices_num); | |
290 | av_freep(&device_ids); | |
291 | if (ret < 0) | |
292 | free_device_list(device_list); | |
293 | return ret; | |
294 | } | |
295 | ||
296 | int av_opencl_get_device_list(AVOpenCLDeviceList **device_list) | |
297 | { | |
298 | int ret = 0; | |
299 | *device_list = av_mallocz(sizeof(AVOpenCLDeviceList)); | |
300 | if (!(*device_list)) { | |
301 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n"); | |
302 | return AVERROR(ENOMEM); | |
303 | } | |
304 | ret = get_device_list(*device_list); | |
305 | if (ret < 0) { | |
306 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n"); | |
307 | free_device_list(*device_list); | |
308 | av_freep(device_list); | |
309 | return ret; | |
310 | } | |
311 | return ret; | |
312 | } | |
313 | ||
314 | void av_opencl_free_device_list(AVOpenCLDeviceList **device_list) | |
315 | { | |
316 | free_device_list(*device_list); | |
317 | av_freep(device_list); | |
318 | } | |
319 | ||
320 | static inline int init_opencl_mtx(void) | |
321 | { | |
322 | #if HAVE_THREADS | |
323 | if (!atomic_opencl_lock) { | |
324 | int err; | |
325 | pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t)); | |
326 | if (!tmp) | |
327 | return AVERROR(ENOMEM); | |
328 | if ((err = pthread_mutex_init(tmp, NULL))) { | |
329 | av_free(tmp); | |
330 | return AVERROR(err); | |
331 | } | |
332 | if (avpriv_atomic_ptr_cas(&atomic_opencl_lock, NULL, tmp)) { | |
333 | pthread_mutex_destroy(tmp); | |
334 | av_free(tmp); | |
335 | } | |
336 | } | |
337 | #endif | |
338 | return 0; | |
339 | } | |
340 | ||
341 | int av_opencl_set_option(const char *key, const char *val) | |
342 | { | |
343 | int ret = init_opencl_mtx( ); | |
344 | if (ret < 0) | |
345 | return ret; | |
346 | LOCK_OPENCL; | |
347 | if (!opencl_ctx.opt_init_flag) { | |
348 | av_opt_set_defaults(&opencl_ctx); | |
349 | opencl_ctx.opt_init_flag = 1; | |
350 | } | |
351 | ret = av_opt_set(&opencl_ctx, key, val, 0); | |
352 | UNLOCK_OPENCL; | |
353 | return ret; | |
354 | } | |
355 | ||
356 | int av_opencl_get_option(const char *key, uint8_t **out_val) | |
357 | { | |
358 | int ret = 0; | |
359 | LOCK_OPENCL; | |
360 | ret = av_opt_get(&opencl_ctx, key, 0, out_val); | |
361 | UNLOCK_OPENCL; | |
362 | return ret; | |
363 | } | |
364 | ||
365 | void av_opencl_free_option(void) | |
366 | { | |
367 | /*FIXME: free openclutils context*/ | |
368 | LOCK_OPENCL; | |
369 | av_opt_free(&opencl_ctx); | |
370 | UNLOCK_OPENCL; | |
371 | } | |
372 | ||
373 | AVOpenCLExternalEnv *av_opencl_alloc_external_env(void) | |
374 | { | |
375 | AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv)); | |
376 | if (!ext) { | |
377 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
378 | "Could not malloc external opencl environment data space\n"); | |
379 | } | |
380 | return ext; | |
381 | } | |
382 | ||
383 | void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env) | |
384 | { | |
385 | av_freep(ext_opencl_env); | |
386 | } | |
387 | ||
388 | int av_opencl_register_kernel_code(const char *kernel_code) | |
389 | { | |
390 | int i, ret = init_opencl_mtx( ); | |
391 | if (ret < 0) | |
392 | return ret; | |
393 | LOCK_OPENCL; | |
394 | if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) { | |
395 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
396 | "Could not register kernel code, maximum number of registered kernel code %d already reached\n", | |
397 | MAX_KERNEL_CODE_NUM); | |
398 | ret = AVERROR(EINVAL); | |
399 | goto end; | |
400 | } | |
401 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { | |
402 | if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) { | |
403 | av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n"); | |
404 | goto end; | |
405 | } | |
406 | } | |
407 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code; | |
408 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0; | |
409 | opencl_ctx.kernel_code_count++; | |
410 | end: | |
411 | UNLOCK_OPENCL; | |
412 | return ret; | |
413 | } | |
414 | ||
415 | cl_program av_opencl_compile(const char *program_name, const char *build_opts) | |
416 | { | |
417 | int i; | |
418 | cl_int status; | |
419 | int kernel_code_idx = 0; | |
420 | const char *kernel_source; | |
421 | size_t kernel_code_len; | |
422 | char* ptr = NULL; | |
423 | cl_program program = NULL; | |
424 | ||
425 | LOCK_OPENCL; | |
426 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { | |
427 | // identify a program using a unique name within the kernel source | |
428 | ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name); | |
429 | if (ptr && !opencl_ctx.kernel_code[i].is_compiled) { | |
430 | kernel_source = opencl_ctx.kernel_code[i].kernel_string; | |
431 | kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string); | |
432 | kernel_code_idx = i; | |
433 | break; | |
434 | } | |
435 | } | |
436 | if (!kernel_source) { | |
437 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
438 | "Unable to find OpenCL kernel source '%s'\n", program_name); | |
439 | goto end; | |
440 | } | |
441 | ||
442 | /* create a CL program from kernel source */ | |
443 | program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status); | |
444 | if(status != CL_SUCCESS) { | |
445 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
446 | "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status)); | |
447 | program = NULL; | |
448 | goto end; | |
449 | } | |
450 | status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL); | |
451 | if (status != CL_SUCCESS) { | |
452 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
453 | "Compilation failed with OpenCL program: %s\n", program_name); | |
454 | program = NULL; | |
455 | goto end; | |
456 | } | |
457 | ||
458 | opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1; | |
459 | end: | |
460 | UNLOCK_OPENCL; | |
461 | return program; | |
462 | } | |
463 | ||
464 | cl_command_queue av_opencl_get_command_queue(void) | |
465 | { | |
466 | return opencl_ctx.command_queue; | |
467 | } | |
468 | ||
2ba45a60 DM |
469 | static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env) |
470 | { | |
471 | cl_int status; | |
472 | cl_context_properties cps[3]; | |
473 | int i, ret = 0; | |
474 | AVOpenCLDeviceNode *device_node = NULL; | |
475 | ||
476 | if (ext_opencl_env) { | |
477 | if (opencl_ctx->is_user_created) | |
478 | return 0; | |
479 | opencl_ctx->platform_id = ext_opencl_env->platform_id; | |
480 | opencl_ctx->is_user_created = 1; | |
481 | opencl_ctx->command_queue = ext_opencl_env->command_queue; | |
482 | opencl_ctx->context = ext_opencl_env->context; | |
483 | opencl_ctx->device_id = ext_opencl_env->device_id; | |
484 | opencl_ctx->device_type = ext_opencl_env->device_type; | |
485 | } else { | |
486 | if (!opencl_ctx->is_user_created) { | |
487 | if (!opencl_ctx->device_list.platform_num) { | |
488 | ret = get_device_list(&opencl_ctx->device_list); | |
489 | if (ret < 0) { | |
490 | return ret; | |
491 | } | |
492 | } | |
493 | if (opencl_ctx->platform_idx >= 0) { | |
494 | if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) { | |
495 | av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n"); | |
496 | return AVERROR(EINVAL); | |
497 | } | |
498 | if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) { | |
499 | av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n", | |
500 | opencl_ctx->platform_idx); | |
501 | return AVERROR(EINVAL); | |
502 | } | |
503 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id; | |
504 | } else { | |
505 | /* get a usable platform by default*/ | |
506 | for (i = 0; i < opencl_ctx->device_list.platform_num; i++) { | |
507 | if (opencl_ctx->device_list.platform_node[i]->device_num) { | |
508 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id; | |
509 | opencl_ctx->platform_idx = i; | |
510 | break; | |
511 | } | |
512 | } | |
513 | } | |
514 | if (!opencl_ctx->platform_id) { | |
515 | av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n"); | |
516 | return AVERROR_EXTERNAL; | |
517 | } | |
518 | /* get a usable device*/ | |
519 | if (opencl_ctx->device_idx >= 0) { | |
520 | if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) { | |
521 | av_log(opencl_ctx, AV_LOG_ERROR, | |
522 | "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx); | |
523 | return AVERROR(EINVAL); | |
524 | } | |
525 | } else { | |
526 | opencl_ctx->device_idx = 0; | |
527 | } | |
528 | ||
529 | device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx]; | |
530 | opencl_ctx->device_id = device_node->device_id; | |
531 | opencl_ctx->device_type = device_node->device_type; | |
532 | ||
533 | /* | |
534 | * Use available platform. | |
535 | */ | |
536 | av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n", | |
537 | opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name, | |
538 | device_node->device_name); | |
539 | cps[0] = CL_CONTEXT_PLATFORM; | |
540 | cps[1] = (cl_context_properties)opencl_ctx->platform_id; | |
541 | cps[2] = 0; | |
542 | ||
543 | opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type, | |
544 | NULL, NULL, &status); | |
545 | if (status != CL_SUCCESS) { | |
546 | av_log(opencl_ctx, AV_LOG_ERROR, | |
547 | "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status)); | |
548 | return AVERROR_EXTERNAL; | |
549 | } | |
550 | opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id, | |
551 | 0, &status); | |
552 | if (status != CL_SUCCESS) { | |
553 | av_log(opencl_ctx, AV_LOG_ERROR, | |
554 | "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status)); | |
555 | return AVERROR_EXTERNAL; | |
556 | } | |
557 | } | |
558 | } | |
559 | return ret; | |
560 | } | |
561 | ||
562 | int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env) | |
563 | { | |
564 | int ret = init_opencl_mtx( ); | |
565 | if (ret < 0) | |
566 | return ret; | |
567 | LOCK_OPENCL; | |
568 | if (!opencl_ctx.init_count) { | |
569 | if (!opencl_ctx.opt_init_flag) { | |
570 | av_opt_set_defaults(&opencl_ctx); | |
571 | opencl_ctx.opt_init_flag = 1; | |
572 | } | |
573 | ret = init_opencl_env(&opencl_ctx, ext_opencl_env); | |
574 | if (ret < 0) | |
575 | goto end; | |
576 | if (opencl_ctx.kernel_code_count <= 0) { | |
577 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
578 | "No kernel code is registered, compile kernel file failed\n"); | |
579 | ret = AVERROR(EINVAL); | |
580 | goto end; | |
581 | } | |
582 | } | |
583 | opencl_ctx.init_count++; | |
584 | end: | |
585 | UNLOCK_OPENCL; | |
586 | return ret; | |
587 | } | |
588 | ||
589 | void av_opencl_uninit(void) | |
590 | { | |
591 | cl_int status; | |
592 | LOCK_OPENCL; | |
593 | opencl_ctx.init_count--; | |
594 | if (opencl_ctx.is_user_created) | |
595 | goto end; | |
596 | if (opencl_ctx.init_count > 0) | |
597 | goto end; | |
598 | if (opencl_ctx.command_queue) { | |
599 | status = clReleaseCommandQueue(opencl_ctx.command_queue); | |
600 | if (status != CL_SUCCESS) { | |
601 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
602 | "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status)); | |
603 | } | |
604 | opencl_ctx.command_queue = NULL; | |
605 | } | |
606 | if (opencl_ctx.context) { | |
607 | status = clReleaseContext(opencl_ctx.context); | |
608 | if (status != CL_SUCCESS) { | |
609 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
610 | "Could not release OpenCL context: %s\n", av_opencl_errstr(status)); | |
611 | } | |
612 | opencl_ctx.context = NULL; | |
613 | } | |
614 | free_device_list(&opencl_ctx.device_list); | |
615 | end: | |
616 | if (opencl_ctx.init_count <= 0) | |
617 | av_opt_free(&opencl_ctx); //FIXME: free openclutils context | |
618 | UNLOCK_OPENCL; | |
619 | } | |
620 | ||
621 | int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr) | |
622 | { | |
623 | cl_int status; | |
624 | *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status); | |
625 | if (status != CL_SUCCESS) { | |
626 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
627 | return AVERROR_EXTERNAL; | |
628 | } | |
629 | return 0; | |
630 | } | |
631 | ||
632 | void av_opencl_buffer_release(cl_mem *cl_buf) | |
633 | { | |
634 | cl_int status = 0; | |
635 | if (!cl_buf) | |
636 | return; | |
637 | status = clReleaseMemObject(*cl_buf); | |
638 | if (status != CL_SUCCESS) { | |
639 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
640 | "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
641 | } | |
642 | memset(cl_buf, 0, sizeof(*cl_buf)); | |
643 | } | |
644 | ||
645 | int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size) | |
646 | { | |
647 | cl_int status; | |
648 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, | |
649 | CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size, | |
650 | 0, NULL, NULL, &status); | |
651 | ||
652 | if (status != CL_SUCCESS) { | |
653 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
654 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
655 | return AVERROR_EXTERNAL; | |
656 | } | |
657 | memcpy(mapped, src_buf, buf_size); | |
658 | ||
659 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); | |
660 | if (status != CL_SUCCESS) { | |
661 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
662 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
663 | return AVERROR_EXTERNAL; | |
664 | } | |
665 | return 0; | |
666 | } | |
667 | ||
668 | int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size) | |
669 | { | |
670 | cl_int status; | |
671 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, | |
672 | CL_TRUE, CL_MAP_READ, 0, buf_size, | |
673 | 0, NULL, NULL, &status); | |
674 | ||
675 | if (status != CL_SUCCESS) { | |
676 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
677 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
678 | return AVERROR_EXTERNAL; | |
679 | } | |
680 | memcpy(dst_buf, mapped, buf_size); | |
681 | ||
682 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); | |
683 | if (status != CL_SUCCESS) { | |
684 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
685 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
686 | return AVERROR_EXTERNAL; | |
687 | } | |
688 | return 0; | |
689 | } | |
690 | ||
691 | int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset, | |
692 | uint8_t **src_data, int *plane_size, int plane_num) | |
693 | { | |
694 | int i, buffer_size = 0; | |
695 | uint8_t *temp; | |
696 | cl_int status; | |
697 | void *mapped; | |
698 | if ((unsigned int)plane_num > 8) { | |
699 | return AVERROR(EINVAL); | |
700 | } | |
701 | for (i = 0;i < plane_num;i++) { | |
702 | buffer_size += plane_size[i]; | |
703 | } | |
704 | if (buffer_size > cl_buffer_size) { | |
705 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
706 | "Cannot write image to OpenCL buffer: buffer too small\n"); | |
707 | return AVERROR(EINVAL); | |
708 | } | |
709 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, | |
710 | CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset, | |
711 | 0, NULL, NULL, &status); | |
712 | if (status != CL_SUCCESS) { | |
713 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
714 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
715 | return AVERROR_EXTERNAL; | |
716 | } | |
717 | temp = mapped; | |
718 | temp += dst_cl_offset; | |
719 | for (i = 0; i < plane_num; i++) { | |
720 | memcpy(temp, src_data[i], plane_size[i]); | |
721 | temp += plane_size[i]; | |
722 | } | |
723 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); | |
724 | if (status != CL_SUCCESS) { | |
725 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
726 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
727 | return AVERROR_EXTERNAL; | |
728 | } | |
729 | return 0; | |
730 | } | |
731 | ||
732 | int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num, | |
733 | cl_mem src_cl_buf, size_t cl_buffer_size) | |
734 | { | |
735 | int i,buffer_size = 0,ret = 0; | |
736 | uint8_t *temp; | |
737 | void *mapped; | |
738 | cl_int status; | |
739 | if ((unsigned int)plane_num > 8) { | |
740 | return AVERROR(EINVAL); | |
741 | } | |
742 | for (i = 0; i < plane_num; i++) { | |
743 | buffer_size += plane_size[i]; | |
744 | } | |
745 | if (buffer_size > cl_buffer_size) { | |
746 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
747 | "Cannot write image to CPU buffer: OpenCL buffer too small\n"); | |
748 | return AVERROR(EINVAL); | |
749 | } | |
750 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, | |
751 | CL_TRUE, CL_MAP_READ, 0, buffer_size, | |
752 | 0, NULL, NULL, &status); | |
753 | ||
754 | if (status != CL_SUCCESS) { | |
755 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
756 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
757 | return AVERROR_EXTERNAL; | |
758 | } | |
759 | temp = mapped; | |
760 | if (ret >= 0) { | |
761 | for (i = 0; i < plane_num; i++) { | |
762 | memcpy(dst_data[i], temp, plane_size[i]); | |
763 | temp += plane_size[i]; | |
764 | } | |
765 | } | |
766 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); | |
767 | if (status != CL_SUCCESS) { | |
768 | av_log(&opencl_ctx, AV_LOG_ERROR, | |
769 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); | |
770 | return AVERROR_EXTERNAL; | |
771 | } | |
772 | return 0; | |
773 | } | |
774 | ||
775 | int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform, | |
776 | int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env)) | |
777 | { | |
778 | int64_t ret = 0; | |
779 | cl_int status; | |
780 | cl_context_properties cps[3]; | |
781 | AVOpenCLExternalEnv *ext_opencl_env = NULL; | |
782 | ||
783 | ext_opencl_env = av_opencl_alloc_external_env(); | |
784 | ext_opencl_env->device_id = device_node->device_id; | |
785 | ext_opencl_env->device_type = device_node->device_type; | |
786 | av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n", | |
787 | device_node->device_name); | |
788 | ||
789 | cps[0] = CL_CONTEXT_PLATFORM; | |
790 | cps[1] = (cl_context_properties)platform; | |
791 | cps[2] = 0; | |
792 | ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type, | |
793 | NULL, NULL, &status); | |
794 | if (status != CL_SUCCESS || !ext_opencl_env->context) { | |
795 | ret = AVERROR_EXTERNAL; | |
796 | goto end; | |
797 | } | |
798 | ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context, | |
799 | ext_opencl_env->device_id, 0, &status); | |
800 | if (status != CL_SUCCESS || !ext_opencl_env->command_queue) { | |
801 | ret = AVERROR_EXTERNAL; | |
802 | goto end; | |
803 | } | |
804 | ret = benchmark(ext_opencl_env); | |
805 | if (ret < 0) | |
806 | av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n", | |
807 | device_node->device_name); | |
808 | end: | |
809 | if (ext_opencl_env->command_queue) | |
810 | clReleaseCommandQueue(ext_opencl_env->command_queue); | |
811 | if (ext_opencl_env->context) | |
812 | clReleaseContext(ext_opencl_env->context); | |
813 | av_opencl_free_external_env(&ext_opencl_env); | |
814 | return ret; | |
815 | } |