| 1 | /* |
| 2 | * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com> |
| 3 | * Copyright (C) 2012 Li Cao <li@multicorewareinc.com> |
| 4 | * Copyright (C) 2012 Wei Gao <weigao@multicorewareinc.com> |
| 5 | * Copyright (C) 2013 Lenny Wang <lwanghpc@gmail.com> |
| 6 | * |
| 7 | * This file is part of FFmpeg. |
| 8 | * |
| 9 | * FFmpeg is free software; you can redistribute it and/or |
| 10 | * modify it under the terms of the GNU Lesser General Public |
| 11 | * License as published by the Free Software Foundation; either |
| 12 | * version 2.1 of the License, or (at your option) any later version. |
| 13 | * |
| 14 | * FFmpeg is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 17 | * Lesser General Public License for more details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Lesser General Public |
| 20 | * License along with FFmpeg; if not, write to the Free Software |
| 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 22 | */ |
| 23 | |
| 24 | #include "opencl.h" |
| 25 | #include "avstring.h" |
| 26 | #include "log.h" |
| 27 | #include "avassert.h" |
| 28 | #include "opt.h" |
| 29 | |
| 30 | #if HAVE_THREADS |
| 31 | #if HAVE_PTHREADS |
| 32 | #include <pthread.h> |
| 33 | #elif HAVE_W32THREADS |
| 34 | #include "compat/w32pthreads.h" |
| 35 | #elif HAVE_OS2THREADS |
| 36 | #include "compat/os2threads.h" |
| 37 | #endif |
| 38 | #include "atomic.h" |
| 39 | |
| 40 | static volatile pthread_mutex_t *atomic_opencl_lock = NULL; |
| 41 | #define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock) |
| 42 | #define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock) |
| 43 | #else |
| 44 | #define LOCK_OPENCL |
| 45 | #define UNLOCK_OPENCL |
| 46 | #endif |
| 47 | |
| 48 | #define MAX_KERNEL_CODE_NUM 200 |
| 49 | |
| 50 | typedef struct { |
| 51 | int is_compiled; |
| 52 | const char *kernel_string; |
| 53 | } KernelCode; |
| 54 | |
| 55 | typedef struct { |
| 56 | const AVClass *class; |
| 57 | int log_offset; |
| 58 | void *log_ctx; |
| 59 | int init_count; |
| 60 | int opt_init_flag; |
| 61 | /** |
| 62 | * if set to 1, the OpenCL environment was created by the user and |
| 63 | * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper. |
| 64 | */ |
| 65 | int is_user_created; |
| 66 | int platform_idx; |
| 67 | int device_idx; |
| 68 | cl_platform_id platform_id; |
| 69 | cl_device_type device_type; |
| 70 | cl_context context; |
| 71 | cl_device_id device_id; |
| 72 | cl_command_queue command_queue; |
| 73 | #if FF_API_OLD_OPENCL |
| 74 | char *build_options; |
| 75 | int program_count; |
| 76 | cl_program programs[MAX_KERNEL_CODE_NUM]; |
| 77 | int kernel_count; |
| 78 | #endif |
| 79 | int kernel_code_count; |
| 80 | KernelCode kernel_code[MAX_KERNEL_CODE_NUM]; |
| 81 | AVOpenCLDeviceList device_list; |
| 82 | } OpenclContext; |
| 83 | |
| 84 | #define OFFSET(x) offsetof(OpenclContext, x) |
| 85 | |
| 86 | static const AVOption opencl_options[] = { |
| 87 | { "platform_idx", "set platform index value", OFFSET(platform_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, |
| 88 | { "device_idx", "set device index value", OFFSET(device_idx), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX}, |
| 89 | #if FF_API_OLD_OPENCL |
| 90 | { "build_options", "build options of opencl", OFFSET(build_options), AV_OPT_TYPE_STRING, {.str="-I."}, CHAR_MIN, CHAR_MAX}, |
| 91 | #endif |
| 92 | { NULL } |
| 93 | }; |
| 94 | |
| 95 | static const AVClass openclutils_class = { |
| 96 | .class_name = "OPENCLUTILS", |
| 97 | .option = opencl_options, |
| 98 | .item_name = av_default_item_name, |
| 99 | .version = LIBAVUTIL_VERSION_INT, |
| 100 | .log_level_offset_offset = offsetof(OpenclContext, log_offset), |
| 101 | .parent_log_context_offset = offsetof(OpenclContext, log_ctx), |
| 102 | }; |
| 103 | |
| 104 | static OpenclContext opencl_ctx = {&openclutils_class}; |
| 105 | |
| 106 | static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU}; |
| 107 | |
| 108 | typedef struct { |
| 109 | int err_code; |
| 110 | const char *err_str; |
| 111 | } OpenclErrorMsg; |
| 112 | |
| 113 | static const OpenclErrorMsg opencl_err_msg[] = { |
| 114 | {CL_DEVICE_NOT_FOUND, "DEVICE NOT FOUND"}, |
| 115 | {CL_DEVICE_NOT_AVAILABLE, "DEVICE NOT AVAILABLE"}, |
| 116 | {CL_COMPILER_NOT_AVAILABLE, "COMPILER NOT AVAILABLE"}, |
| 117 | {CL_MEM_OBJECT_ALLOCATION_FAILURE, "MEM OBJECT ALLOCATION FAILURE"}, |
| 118 | {CL_OUT_OF_RESOURCES, "OUT OF RESOURCES"}, |
| 119 | {CL_OUT_OF_HOST_MEMORY, "OUT OF HOST MEMORY"}, |
| 120 | {CL_PROFILING_INFO_NOT_AVAILABLE, "PROFILING INFO NOT AVAILABLE"}, |
| 121 | {CL_MEM_COPY_OVERLAP, "MEM COPY OVERLAP"}, |
| 122 | {CL_IMAGE_FORMAT_MISMATCH, "IMAGE FORMAT MISMATCH"}, |
| 123 | {CL_IMAGE_FORMAT_NOT_SUPPORTED, "IMAGE FORMAT NOT_SUPPORTED"}, |
| 124 | {CL_BUILD_PROGRAM_FAILURE, "BUILD PROGRAM FAILURE"}, |
| 125 | {CL_MAP_FAILURE, "MAP FAILURE"}, |
| 126 | {CL_MISALIGNED_SUB_BUFFER_OFFSET, "MISALIGNED SUB BUFFER OFFSET"}, |
| 127 | {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"}, |
| 128 | {CL_COMPILE_PROGRAM_FAILURE, "COMPILE PROGRAM FAILURE"}, |
| 129 | {CL_LINKER_NOT_AVAILABLE, "LINKER NOT AVAILABLE"}, |
| 130 | {CL_LINK_PROGRAM_FAILURE, "LINK PROGRAM FAILURE"}, |
| 131 | {CL_DEVICE_PARTITION_FAILED, "DEVICE PARTITION FAILED"}, |
| 132 | {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "KERNEL ARG INFO NOT AVAILABLE"}, |
| 133 | {CL_INVALID_VALUE, "INVALID VALUE"}, |
| 134 | {CL_INVALID_DEVICE_TYPE, "INVALID DEVICE TYPE"}, |
| 135 | {CL_INVALID_PLATFORM, "INVALID PLATFORM"}, |
| 136 | {CL_INVALID_DEVICE, "INVALID DEVICE"}, |
| 137 | {CL_INVALID_CONTEXT, "INVALID CONTEXT"}, |
| 138 | {CL_INVALID_QUEUE_PROPERTIES, "INVALID QUEUE PROPERTIES"}, |
| 139 | {CL_INVALID_COMMAND_QUEUE, "INVALID COMMAND QUEUE"}, |
| 140 | {CL_INVALID_HOST_PTR, "INVALID HOST PTR"}, |
| 141 | {CL_INVALID_MEM_OBJECT, "INVALID MEM OBJECT"}, |
| 142 | {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "INVALID IMAGE FORMAT DESCRIPTOR"}, |
| 143 | {CL_INVALID_IMAGE_SIZE, "INVALID IMAGE SIZE"}, |
| 144 | {CL_INVALID_SAMPLER, "INVALID SAMPLER"}, |
| 145 | {CL_INVALID_BINARY, "INVALID BINARY"}, |
| 146 | {CL_INVALID_BUILD_OPTIONS, "INVALID BUILD OPTIONS"}, |
| 147 | {CL_INVALID_PROGRAM, "INVALID PROGRAM"}, |
| 148 | {CL_INVALID_PROGRAM_EXECUTABLE, "INVALID PROGRAM EXECUTABLE"}, |
| 149 | {CL_INVALID_KERNEL_NAME, "INVALID KERNEL NAME"}, |
| 150 | {CL_INVALID_KERNEL_DEFINITION, "INVALID KERNEL DEFINITION"}, |
| 151 | {CL_INVALID_KERNEL, "INVALID KERNEL"}, |
| 152 | {CL_INVALID_ARG_INDEX, "INVALID ARG INDEX"}, |
| 153 | {CL_INVALID_ARG_VALUE, "INVALID ARG VALUE"}, |
| 154 | {CL_INVALID_ARG_SIZE, "INVALID ARG_SIZE"}, |
| 155 | {CL_INVALID_KERNEL_ARGS, "INVALID KERNEL ARGS"}, |
| 156 | {CL_INVALID_WORK_DIMENSION, "INVALID WORK DIMENSION"}, |
| 157 | {CL_INVALID_WORK_GROUP_SIZE, "INVALID WORK GROUP SIZE"}, |
| 158 | {CL_INVALID_WORK_ITEM_SIZE, "INVALID WORK ITEM SIZE"}, |
| 159 | {CL_INVALID_GLOBAL_OFFSET, "INVALID GLOBAL OFFSET"}, |
| 160 | {CL_INVALID_EVENT_WAIT_LIST, "INVALID EVENT WAIT LIST"}, |
| 161 | {CL_INVALID_EVENT, "INVALID EVENT"}, |
| 162 | {CL_INVALID_OPERATION, "INVALID OPERATION"}, |
| 163 | {CL_INVALID_GL_OBJECT, "INVALID GL OBJECT"}, |
| 164 | {CL_INVALID_BUFFER_SIZE, "INVALID BUFFER SIZE"}, |
| 165 | {CL_INVALID_MIP_LEVEL, "INVALID MIP LEVEL"}, |
| 166 | {CL_INVALID_GLOBAL_WORK_SIZE, "INVALID GLOBAL WORK SIZE"}, |
| 167 | {CL_INVALID_PROPERTY, "INVALID PROPERTY"}, |
| 168 | {CL_INVALID_IMAGE_DESCRIPTOR, "INVALID IMAGE DESCRIPTOR"}, |
| 169 | {CL_INVALID_COMPILER_OPTIONS, "INVALID COMPILER OPTIONS"}, |
| 170 | {CL_INVALID_LINKER_OPTIONS, "INVALID LINKER OPTIONS"}, |
| 171 | {CL_INVALID_DEVICE_PARTITION_COUNT, "INVALID DEVICE PARTITION COUNT"}, |
| 172 | }; |
| 173 | |
| 174 | const char *av_opencl_errstr(cl_int status) |
| 175 | { |
| 176 | int i; |
| 177 | for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) { |
| 178 | if (opencl_err_msg[i].err_code == status) |
| 179 | return opencl_err_msg[i].err_str; |
| 180 | } |
| 181 | return "unknown error"; |
| 182 | } |
| 183 | |
| 184 | static void free_device_list(AVOpenCLDeviceList *device_list) |
| 185 | { |
| 186 | int i, j; |
| 187 | if (!device_list) |
| 188 | return; |
| 189 | for (i = 0; i < device_list->platform_num; i++) { |
| 190 | if (!device_list->platform_node[i]) |
| 191 | continue; |
| 192 | for (j = 0; j < device_list->platform_node[i]->device_num; j++) { |
| 193 | av_freep(&(device_list->platform_node[i]->device_node[j])); |
| 194 | } |
| 195 | av_freep(&device_list->platform_node[i]->device_node); |
| 196 | av_freep(&device_list->platform_node[i]); |
| 197 | } |
| 198 | av_freep(&device_list->platform_node); |
| 199 | device_list->platform_num = 0; |
| 200 | } |
| 201 | |
| 202 | static int get_device_list(AVOpenCLDeviceList *device_list) |
| 203 | { |
| 204 | cl_int status; |
| 205 | int i, j, k, device_num, total_devices_num, ret = 0; |
| 206 | int *devices_num; |
| 207 | cl_platform_id *platform_ids = NULL; |
| 208 | cl_device_id *device_ids = NULL; |
| 209 | AVOpenCLDeviceNode *device_node = NULL; |
| 210 | status = clGetPlatformIDs(0, NULL, &device_list->platform_num); |
| 211 | if (status != CL_SUCCESS) { |
| 212 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 213 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); |
| 214 | return AVERROR_EXTERNAL; |
| 215 | } |
| 216 | platform_ids = av_mallocz_array(device_list->platform_num, sizeof(cl_platform_id)); |
| 217 | if (!platform_ids) |
| 218 | return AVERROR(ENOMEM); |
| 219 | status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL); |
| 220 | if (status != CL_SUCCESS) { |
| 221 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 222 | "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status)); |
| 223 | ret = AVERROR_EXTERNAL; |
| 224 | goto end; |
| 225 | } |
| 226 | device_list->platform_node = av_mallocz_array(device_list->platform_num, sizeof(AVOpenCLPlatformNode *)); |
| 227 | if (!device_list->platform_node) { |
| 228 | ret = AVERROR(ENOMEM); |
| 229 | goto end; |
| 230 | } |
| 231 | devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type)); |
| 232 | if (!devices_num) { |
| 233 | ret = AVERROR(ENOMEM); |
| 234 | goto end; |
| 235 | } |
| 236 | for (i = 0; i < device_list->platform_num; i++) { |
| 237 | device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode)); |
| 238 | if (!device_list->platform_node[i]) { |
| 239 | ret = AVERROR(ENOMEM); |
| 240 | goto end; |
| 241 | } |
| 242 | device_list->platform_node[i]->platform_id = platform_ids[i]; |
| 243 | status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, |
| 244 | sizeof(device_list->platform_node[i]->platform_name), |
| 245 | device_list->platform_node[i]->platform_name, NULL); |
| 246 | total_devices_num = 0; |
| 247 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { |
| 248 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, |
| 249 | device_type[j], 0, NULL, &devices_num[j]); |
| 250 | total_devices_num += devices_num[j]; |
| 251 | } |
| 252 | device_list->platform_node[i]->device_node = av_mallocz_array(total_devices_num, sizeof(AVOpenCLDeviceNode *)); |
| 253 | if (!device_list->platform_node[i]->device_node) { |
| 254 | ret = AVERROR(ENOMEM); |
| 255 | goto end; |
| 256 | } |
| 257 | for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) { |
| 258 | if (devices_num[j]) { |
| 259 | device_ids = av_mallocz_array(devices_num[j], sizeof(cl_device_id)); |
| 260 | if (!device_ids) { |
| 261 | ret = AVERROR(ENOMEM); |
| 262 | goto end; |
| 263 | } |
| 264 | status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j], |
| 265 | devices_num[j], device_ids, NULL); |
| 266 | if (status != CL_SUCCESS) { |
| 267 | av_log(&opencl_ctx, AV_LOG_WARNING, |
| 268 | "Could not get device ID: %s:\n", av_opencl_errstr(status)); |
| 269 | av_freep(&device_ids); |
| 270 | continue; |
| 271 | } |
| 272 | for (k = 0; k < devices_num[j]; k++) { |
| 273 | device_num = device_list->platform_node[i]->device_num; |
| 274 | device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode)); |
| 275 | if (!device_list->platform_node[i]->device_node[device_num]) { |
| 276 | ret = AVERROR(ENOMEM); |
| 277 | goto end; |
| 278 | } |
| 279 | device_node = device_list->platform_node[i]->device_node[device_num]; |
| 280 | device_node->device_id = device_ids[k]; |
| 281 | device_node->device_type = device_type[j]; |
| 282 | status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME, |
| 283 | sizeof(device_node->device_name), device_node->device_name, |
| 284 | NULL); |
| 285 | if (status != CL_SUCCESS) { |
| 286 | av_log(&opencl_ctx, AV_LOG_WARNING, |
| 287 | "Could not get device name: %s\n", av_opencl_errstr(status)); |
| 288 | continue; |
| 289 | } |
| 290 | device_list->platform_node[i]->device_num++; |
| 291 | } |
| 292 | av_freep(&device_ids); |
| 293 | } |
| 294 | } |
| 295 | } |
| 296 | end: |
| 297 | av_freep(&platform_ids); |
| 298 | av_freep(&devices_num); |
| 299 | av_freep(&device_ids); |
| 300 | if (ret < 0) |
| 301 | free_device_list(device_list); |
| 302 | return ret; |
| 303 | } |
| 304 | |
| 305 | int av_opencl_get_device_list(AVOpenCLDeviceList **device_list) |
| 306 | { |
| 307 | int ret = 0; |
| 308 | *device_list = av_mallocz(sizeof(AVOpenCLDeviceList)); |
| 309 | if (!(*device_list)) { |
| 310 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n"); |
| 311 | return AVERROR(ENOMEM); |
| 312 | } |
| 313 | ret = get_device_list(*device_list); |
| 314 | if (ret < 0) { |
| 315 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n"); |
| 316 | free_device_list(*device_list); |
| 317 | av_freep(device_list); |
| 318 | return ret; |
| 319 | } |
| 320 | return ret; |
| 321 | } |
| 322 | |
| 323 | void av_opencl_free_device_list(AVOpenCLDeviceList **device_list) |
| 324 | { |
| 325 | free_device_list(*device_list); |
| 326 | av_freep(device_list); |
| 327 | } |
| 328 | |
| 329 | static inline int init_opencl_mtx(void) |
| 330 | { |
| 331 | #if HAVE_THREADS |
| 332 | if (!atomic_opencl_lock) { |
| 333 | int err; |
| 334 | pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t)); |
| 335 | if (!tmp) |
| 336 | return AVERROR(ENOMEM); |
| 337 | if ((err = pthread_mutex_init(tmp, NULL))) { |
| 338 | av_free(tmp); |
| 339 | return AVERROR(err); |
| 340 | } |
| 341 | if (avpriv_atomic_ptr_cas(&atomic_opencl_lock, NULL, tmp)) { |
| 342 | pthread_mutex_destroy(tmp); |
| 343 | av_free(tmp); |
| 344 | } |
| 345 | } |
| 346 | #endif |
| 347 | return 0; |
| 348 | } |
| 349 | |
| 350 | int av_opencl_set_option(const char *key, const char *val) |
| 351 | { |
| 352 | int ret = init_opencl_mtx( ); |
| 353 | if (ret < 0) |
| 354 | return ret; |
| 355 | LOCK_OPENCL; |
| 356 | if (!opencl_ctx.opt_init_flag) { |
| 357 | av_opt_set_defaults(&opencl_ctx); |
| 358 | opencl_ctx.opt_init_flag = 1; |
| 359 | } |
| 360 | ret = av_opt_set(&opencl_ctx, key, val, 0); |
| 361 | UNLOCK_OPENCL; |
| 362 | return ret; |
| 363 | } |
| 364 | |
| 365 | int av_opencl_get_option(const char *key, uint8_t **out_val) |
| 366 | { |
| 367 | int ret = 0; |
| 368 | LOCK_OPENCL; |
| 369 | ret = av_opt_get(&opencl_ctx, key, 0, out_val); |
| 370 | UNLOCK_OPENCL; |
| 371 | return ret; |
| 372 | } |
| 373 | |
| 374 | void av_opencl_free_option(void) |
| 375 | { |
| 376 | /*FIXME: free openclutils context*/ |
| 377 | LOCK_OPENCL; |
| 378 | av_opt_free(&opencl_ctx); |
| 379 | UNLOCK_OPENCL; |
| 380 | } |
| 381 | |
| 382 | AVOpenCLExternalEnv *av_opencl_alloc_external_env(void) |
| 383 | { |
| 384 | AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv)); |
| 385 | if (!ext) { |
| 386 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 387 | "Could not malloc external opencl environment data space\n"); |
| 388 | } |
| 389 | return ext; |
| 390 | } |
| 391 | |
| 392 | void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env) |
| 393 | { |
| 394 | av_freep(ext_opencl_env); |
| 395 | } |
| 396 | |
| 397 | int av_opencl_register_kernel_code(const char *kernel_code) |
| 398 | { |
| 399 | int i, ret = init_opencl_mtx( ); |
| 400 | if (ret < 0) |
| 401 | return ret; |
| 402 | LOCK_OPENCL; |
| 403 | if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) { |
| 404 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 405 | "Could not register kernel code, maximum number of registered kernel code %d already reached\n", |
| 406 | MAX_KERNEL_CODE_NUM); |
| 407 | ret = AVERROR(EINVAL); |
| 408 | goto end; |
| 409 | } |
| 410 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { |
| 411 | if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) { |
| 412 | av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n"); |
| 413 | goto end; |
| 414 | } |
| 415 | } |
| 416 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code; |
| 417 | opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0; |
| 418 | opencl_ctx.kernel_code_count++; |
| 419 | end: |
| 420 | UNLOCK_OPENCL; |
| 421 | return ret; |
| 422 | } |
| 423 | |
| 424 | cl_program av_opencl_compile(const char *program_name, const char *build_opts) |
| 425 | { |
| 426 | int i; |
| 427 | cl_int status; |
| 428 | int kernel_code_idx = 0; |
| 429 | const char *kernel_source; |
| 430 | size_t kernel_code_len; |
| 431 | char* ptr = NULL; |
| 432 | cl_program program = NULL; |
| 433 | |
| 434 | LOCK_OPENCL; |
| 435 | for (i = 0; i < opencl_ctx.kernel_code_count; i++) { |
| 436 | // identify a program using a unique name within the kernel source |
| 437 | ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name); |
| 438 | if (ptr && !opencl_ctx.kernel_code[i].is_compiled) { |
| 439 | kernel_source = opencl_ctx.kernel_code[i].kernel_string; |
| 440 | kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string); |
| 441 | kernel_code_idx = i; |
| 442 | break; |
| 443 | } |
| 444 | } |
| 445 | if (!kernel_source) { |
| 446 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 447 | "Unable to find OpenCL kernel source '%s'\n", program_name); |
| 448 | goto end; |
| 449 | } |
| 450 | |
| 451 | /* create a CL program from kernel source */ |
| 452 | program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status); |
| 453 | if(status != CL_SUCCESS) { |
| 454 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 455 | "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status)); |
| 456 | program = NULL; |
| 457 | goto end; |
| 458 | } |
| 459 | status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL); |
| 460 | if (status != CL_SUCCESS) { |
| 461 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 462 | "Compilation failed with OpenCL program: %s\n", program_name); |
| 463 | program = NULL; |
| 464 | goto end; |
| 465 | } |
| 466 | |
| 467 | opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1; |
| 468 | end: |
| 469 | UNLOCK_OPENCL; |
| 470 | return program; |
| 471 | } |
| 472 | |
| 473 | cl_command_queue av_opencl_get_command_queue(void) |
| 474 | { |
| 475 | return opencl_ctx.command_queue; |
| 476 | } |
| 477 | |
| 478 | #if FF_API_OLD_OPENCL |
| 479 | int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name) |
| 480 | { |
| 481 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL kernel %s, please update libavfilter.\n", kernel_name); |
| 482 | return AVERROR(EINVAL); |
| 483 | } |
| 484 | |
| 485 | void av_opencl_release_kernel(AVOpenCLKernelEnv *env) |
| 486 | { |
| 487 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not release OpenCL kernel, please update libavfilter.\n"); |
| 488 | } |
| 489 | #endif |
| 490 | |
| 491 | static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env) |
| 492 | { |
| 493 | cl_int status; |
| 494 | cl_context_properties cps[3]; |
| 495 | int i, ret = 0; |
| 496 | AVOpenCLDeviceNode *device_node = NULL; |
| 497 | |
| 498 | if (ext_opencl_env) { |
| 499 | if (opencl_ctx->is_user_created) |
| 500 | return 0; |
| 501 | opencl_ctx->platform_id = ext_opencl_env->platform_id; |
| 502 | opencl_ctx->is_user_created = 1; |
| 503 | opencl_ctx->command_queue = ext_opencl_env->command_queue; |
| 504 | opencl_ctx->context = ext_opencl_env->context; |
| 505 | opencl_ctx->device_id = ext_opencl_env->device_id; |
| 506 | opencl_ctx->device_type = ext_opencl_env->device_type; |
| 507 | } else { |
| 508 | if (!opencl_ctx->is_user_created) { |
| 509 | if (!opencl_ctx->device_list.platform_num) { |
| 510 | ret = get_device_list(&opencl_ctx->device_list); |
| 511 | if (ret < 0) { |
| 512 | return ret; |
| 513 | } |
| 514 | } |
| 515 | if (opencl_ctx->platform_idx >= 0) { |
| 516 | if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) { |
| 517 | av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n"); |
| 518 | return AVERROR(EINVAL); |
| 519 | } |
| 520 | if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) { |
| 521 | av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n", |
| 522 | opencl_ctx->platform_idx); |
| 523 | return AVERROR(EINVAL); |
| 524 | } |
| 525 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id; |
| 526 | } else { |
| 527 | /* get a usable platform by default*/ |
| 528 | for (i = 0; i < opencl_ctx->device_list.platform_num; i++) { |
| 529 | if (opencl_ctx->device_list.platform_node[i]->device_num) { |
| 530 | opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id; |
| 531 | opencl_ctx->platform_idx = i; |
| 532 | break; |
| 533 | } |
| 534 | } |
| 535 | } |
| 536 | if (!opencl_ctx->platform_id) { |
| 537 | av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n"); |
| 538 | return AVERROR_EXTERNAL; |
| 539 | } |
| 540 | /* get a usable device*/ |
| 541 | if (opencl_ctx->device_idx >= 0) { |
| 542 | if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) { |
| 543 | av_log(opencl_ctx, AV_LOG_ERROR, |
| 544 | "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx); |
| 545 | return AVERROR(EINVAL); |
| 546 | } |
| 547 | } else { |
| 548 | opencl_ctx->device_idx = 0; |
| 549 | } |
| 550 | |
| 551 | device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx]; |
| 552 | opencl_ctx->device_id = device_node->device_id; |
| 553 | opencl_ctx->device_type = device_node->device_type; |
| 554 | |
| 555 | /* |
| 556 | * Use available platform. |
| 557 | */ |
| 558 | av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n", |
| 559 | opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name, |
| 560 | device_node->device_name); |
| 561 | cps[0] = CL_CONTEXT_PLATFORM; |
| 562 | cps[1] = (cl_context_properties)opencl_ctx->platform_id; |
| 563 | cps[2] = 0; |
| 564 | |
| 565 | opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type, |
| 566 | NULL, NULL, &status); |
| 567 | if (status != CL_SUCCESS) { |
| 568 | av_log(opencl_ctx, AV_LOG_ERROR, |
| 569 | "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status)); |
| 570 | return AVERROR_EXTERNAL; |
| 571 | } |
| 572 | opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id, |
| 573 | 0, &status); |
| 574 | if (status != CL_SUCCESS) { |
| 575 | av_log(opencl_ctx, AV_LOG_ERROR, |
| 576 | "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status)); |
| 577 | return AVERROR_EXTERNAL; |
| 578 | } |
| 579 | } |
| 580 | } |
| 581 | return ret; |
| 582 | } |
| 583 | |
| 584 | int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env) |
| 585 | { |
| 586 | int ret = init_opencl_mtx( ); |
| 587 | if (ret < 0) |
| 588 | return ret; |
| 589 | LOCK_OPENCL; |
| 590 | if (!opencl_ctx.init_count) { |
| 591 | if (!opencl_ctx.opt_init_flag) { |
| 592 | av_opt_set_defaults(&opencl_ctx); |
| 593 | opencl_ctx.opt_init_flag = 1; |
| 594 | } |
| 595 | ret = init_opencl_env(&opencl_ctx, ext_opencl_env); |
| 596 | if (ret < 0) |
| 597 | goto end; |
| 598 | if (opencl_ctx.kernel_code_count <= 0) { |
| 599 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 600 | "No kernel code is registered, compile kernel file failed\n"); |
| 601 | ret = AVERROR(EINVAL); |
| 602 | goto end; |
| 603 | } |
| 604 | } |
| 605 | opencl_ctx.init_count++; |
| 606 | end: |
| 607 | UNLOCK_OPENCL; |
| 608 | return ret; |
| 609 | } |
| 610 | |
| 611 | void av_opencl_uninit(void) |
| 612 | { |
| 613 | cl_int status; |
| 614 | LOCK_OPENCL; |
| 615 | opencl_ctx.init_count--; |
| 616 | if (opencl_ctx.is_user_created) |
| 617 | goto end; |
| 618 | if (opencl_ctx.init_count > 0) |
| 619 | goto end; |
| 620 | if (opencl_ctx.command_queue) { |
| 621 | status = clReleaseCommandQueue(opencl_ctx.command_queue); |
| 622 | if (status != CL_SUCCESS) { |
| 623 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 624 | "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status)); |
| 625 | } |
| 626 | opencl_ctx.command_queue = NULL; |
| 627 | } |
| 628 | if (opencl_ctx.context) { |
| 629 | status = clReleaseContext(opencl_ctx.context); |
| 630 | if (status != CL_SUCCESS) { |
| 631 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 632 | "Could not release OpenCL context: %s\n", av_opencl_errstr(status)); |
| 633 | } |
| 634 | opencl_ctx.context = NULL; |
| 635 | } |
| 636 | free_device_list(&opencl_ctx.device_list); |
| 637 | end: |
| 638 | if (opencl_ctx.init_count <= 0) |
| 639 | av_opt_free(&opencl_ctx); //FIXME: free openclutils context |
| 640 | UNLOCK_OPENCL; |
| 641 | } |
| 642 | |
| 643 | int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr) |
| 644 | { |
| 645 | cl_int status; |
| 646 | *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status); |
| 647 | if (status != CL_SUCCESS) { |
| 648 | av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 649 | return AVERROR_EXTERNAL; |
| 650 | } |
| 651 | return 0; |
| 652 | } |
| 653 | |
| 654 | void av_opencl_buffer_release(cl_mem *cl_buf) |
| 655 | { |
| 656 | cl_int status = 0; |
| 657 | if (!cl_buf) |
| 658 | return; |
| 659 | status = clReleaseMemObject(*cl_buf); |
| 660 | if (status != CL_SUCCESS) { |
| 661 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 662 | "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 663 | } |
| 664 | memset(cl_buf, 0, sizeof(*cl_buf)); |
| 665 | } |
| 666 | |
| 667 | int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size) |
| 668 | { |
| 669 | cl_int status; |
| 670 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, |
| 671 | CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size, |
| 672 | 0, NULL, NULL, &status); |
| 673 | |
| 674 | if (status != CL_SUCCESS) { |
| 675 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 676 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 677 | return AVERROR_EXTERNAL; |
| 678 | } |
| 679 | memcpy(mapped, src_buf, buf_size); |
| 680 | |
| 681 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); |
| 682 | if (status != CL_SUCCESS) { |
| 683 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 684 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 685 | return AVERROR_EXTERNAL; |
| 686 | } |
| 687 | return 0; |
| 688 | } |
| 689 | |
| 690 | int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size) |
| 691 | { |
| 692 | cl_int status; |
| 693 | void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, |
| 694 | CL_TRUE, CL_MAP_READ, 0, buf_size, |
| 695 | 0, NULL, NULL, &status); |
| 696 | |
| 697 | if (status != CL_SUCCESS) { |
| 698 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 699 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 700 | return AVERROR_EXTERNAL; |
| 701 | } |
| 702 | memcpy(dst_buf, mapped, buf_size); |
| 703 | |
| 704 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); |
| 705 | if (status != CL_SUCCESS) { |
| 706 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 707 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 708 | return AVERROR_EXTERNAL; |
| 709 | } |
| 710 | return 0; |
| 711 | } |
| 712 | |
| 713 | int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset, |
| 714 | uint8_t **src_data, int *plane_size, int plane_num) |
| 715 | { |
| 716 | int i, buffer_size = 0; |
| 717 | uint8_t *temp; |
| 718 | cl_int status; |
| 719 | void *mapped; |
| 720 | if ((unsigned int)plane_num > 8) { |
| 721 | return AVERROR(EINVAL); |
| 722 | } |
| 723 | for (i = 0;i < plane_num;i++) { |
| 724 | buffer_size += plane_size[i]; |
| 725 | } |
| 726 | if (buffer_size > cl_buffer_size) { |
| 727 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 728 | "Cannot write image to OpenCL buffer: buffer too small\n"); |
| 729 | return AVERROR(EINVAL); |
| 730 | } |
| 731 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf, |
| 732 | CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset, |
| 733 | 0, NULL, NULL, &status); |
| 734 | if (status != CL_SUCCESS) { |
| 735 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 736 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 737 | return AVERROR_EXTERNAL; |
| 738 | } |
| 739 | temp = mapped; |
| 740 | temp += dst_cl_offset; |
| 741 | for (i = 0; i < plane_num; i++) { |
| 742 | memcpy(temp, src_data[i], plane_size[i]); |
| 743 | temp += plane_size[i]; |
| 744 | } |
| 745 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL); |
| 746 | if (status != CL_SUCCESS) { |
| 747 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 748 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 749 | return AVERROR_EXTERNAL; |
| 750 | } |
| 751 | return 0; |
| 752 | } |
| 753 | |
| 754 | int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num, |
| 755 | cl_mem src_cl_buf, size_t cl_buffer_size) |
| 756 | { |
| 757 | int i,buffer_size = 0,ret = 0; |
| 758 | uint8_t *temp; |
| 759 | void *mapped; |
| 760 | cl_int status; |
| 761 | if ((unsigned int)plane_num > 8) { |
| 762 | return AVERROR(EINVAL); |
| 763 | } |
| 764 | for (i = 0; i < plane_num; i++) { |
| 765 | buffer_size += plane_size[i]; |
| 766 | } |
| 767 | if (buffer_size > cl_buffer_size) { |
| 768 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 769 | "Cannot write image to CPU buffer: OpenCL buffer too small\n"); |
| 770 | return AVERROR(EINVAL); |
| 771 | } |
| 772 | mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf, |
| 773 | CL_TRUE, CL_MAP_READ, 0, buffer_size, |
| 774 | 0, NULL, NULL, &status); |
| 775 | |
| 776 | if (status != CL_SUCCESS) { |
| 777 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 778 | "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 779 | return AVERROR_EXTERNAL; |
| 780 | } |
| 781 | temp = mapped; |
| 782 | if (ret >= 0) { |
| 783 | for (i = 0; i < plane_num; i++) { |
| 784 | memcpy(dst_data[i], temp, plane_size[i]); |
| 785 | temp += plane_size[i]; |
| 786 | } |
| 787 | } |
| 788 | status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL); |
| 789 | if (status != CL_SUCCESS) { |
| 790 | av_log(&opencl_ctx, AV_LOG_ERROR, |
| 791 | "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status)); |
| 792 | return AVERROR_EXTERNAL; |
| 793 | } |
| 794 | return 0; |
| 795 | } |
| 796 | |
| 797 | int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform, |
| 798 | int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env)) |
| 799 | { |
| 800 | int64_t ret = 0; |
| 801 | cl_int status; |
| 802 | cl_context_properties cps[3]; |
| 803 | AVOpenCLExternalEnv *ext_opencl_env = NULL; |
| 804 | |
| 805 | ext_opencl_env = av_opencl_alloc_external_env(); |
| 806 | ext_opencl_env->device_id = device_node->device_id; |
| 807 | ext_opencl_env->device_type = device_node->device_type; |
| 808 | av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n", |
| 809 | device_node->device_name); |
| 810 | |
| 811 | cps[0] = CL_CONTEXT_PLATFORM; |
| 812 | cps[1] = (cl_context_properties)platform; |
| 813 | cps[2] = 0; |
| 814 | ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type, |
| 815 | NULL, NULL, &status); |
| 816 | if (status != CL_SUCCESS || !ext_opencl_env->context) { |
| 817 | ret = AVERROR_EXTERNAL; |
| 818 | goto end; |
| 819 | } |
| 820 | ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context, |
| 821 | ext_opencl_env->device_id, 0, &status); |
| 822 | if (status != CL_SUCCESS || !ext_opencl_env->command_queue) { |
| 823 | ret = AVERROR_EXTERNAL; |
| 824 | goto end; |
| 825 | } |
| 826 | ret = benchmark(ext_opencl_env); |
| 827 | if (ret < 0) |
| 828 | av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n", |
| 829 | device_node->device_name); |
| 830 | end: |
| 831 | if (ext_opencl_env->command_queue) |
| 832 | clReleaseCommandQueue(ext_opencl_env->command_queue); |
| 833 | if (ext_opencl_env->context) |
| 834 | clReleaseContext(ext_opencl_env->context); |
| 835 | av_opencl_free_external_env(&ext_opencl_env); |
| 836 | return ret; |
| 837 | } |