2 Copyright (c) 2012, Brice Videau <brice.videau@imag.fr>
3 Copyright (c) 2012, Vincent Danjean <Vincent.Danjean@ens-lyon.org>
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/types.h>
40 #pragma GCC diagnostic push
41 # pragma GCC diagnostic ignored "-Wcpp"
42 # define CL_USE_DEPRECATED_OPENCL_1_1_APIS
43 # include <CL/opencl.h>
44 #pragma GCC diagnostic pop
46 #pragma GCC visibility push(hidden)
48 #include "ocl_icd_loader.h"
49 #define DEBUG_OCL_ICD_PROVIDE_DUMP_FIELD
50 #include "ocl_icd_debug.h"
52 #define ETC_OPENCL_VENDORS "/etc/OpenCL/vendors"
54 int debug_ocl_icd_mask=0;
56 typedef __typeof__(clGetExtensionFunctionAddress) *clGetExtensionFunctionAddress_fn;
57 typedef __typeof__(clGetPlatformInfo) *clGetPlatformInfo_fn;
61 cl_uint num_platforms;
62 cl_uint first_platform;
64 clGetExtensionFunctionAddress_fn ext_fn_ptr;
68 char * extension_suffix;
70 struct vendor_icd *vicd;
74 struct vendor_icd *_icds=NULL;
75 struct platform_icd *_picds=NULL;
76 static cl_uint _num_icds = 0;
77 static cl_uint _num_picds = 0;
80 # define _clS(x) [-x] = #x
81 # define MAX_CL_ERRORS (-CL_INVALID_DEVICE_PARTITION_COUNT)
82 static char const * const clErrorStr[MAX_CL_ERRORS+1] = {
84 _clS(CL_DEVICE_NOT_FOUND),
85 _clS(CL_DEVICE_NOT_AVAILABLE),
86 _clS(CL_COMPILER_NOT_AVAILABLE),
87 _clS(CL_MEM_OBJECT_ALLOCATION_FAILURE),
88 _clS(CL_OUT_OF_RESOURCES),
89 _clS(CL_OUT_OF_HOST_MEMORY),
90 _clS(CL_PROFILING_INFO_NOT_AVAILABLE),
91 _clS(CL_MEM_COPY_OVERLAP),
92 _clS(CL_IMAGE_FORMAT_MISMATCH),
93 _clS(CL_IMAGE_FORMAT_NOT_SUPPORTED),
94 _clS(CL_BUILD_PROGRAM_FAILURE),
96 _clS(CL_MISALIGNED_SUB_BUFFER_OFFSET),
97 _clS(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST),
98 _clS(CL_COMPILE_PROGRAM_FAILURE),
99 _clS(CL_LINKER_NOT_AVAILABLE),
100 _clS(CL_LINK_PROGRAM_FAILURE),
101 _clS(CL_DEVICE_PARTITION_FAILED),
102 _clS(CL_KERNEL_ARG_INFO_NOT_AVAILABLE),
103 _clS(CL_INVALID_VALUE),
104 _clS(CL_INVALID_DEVICE_TYPE),
105 _clS(CL_INVALID_PLATFORM),
106 _clS(CL_INVALID_DEVICE),
107 _clS(CL_INVALID_CONTEXT),
108 _clS(CL_INVALID_QUEUE_PROPERTIES),
109 _clS(CL_INVALID_COMMAND_QUEUE),
110 _clS(CL_INVALID_HOST_PTR),
111 _clS(CL_INVALID_MEM_OBJECT),
112 _clS(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
113 _clS(CL_INVALID_IMAGE_SIZE),
114 _clS(CL_INVALID_SAMPLER),
115 _clS(CL_INVALID_BINARY),
116 _clS(CL_INVALID_BUILD_OPTIONS),
117 _clS(CL_INVALID_PROGRAM),
118 _clS(CL_INVALID_PROGRAM_EXECUTABLE),
119 _clS(CL_INVALID_KERNEL_NAME),
120 _clS(CL_INVALID_KERNEL_DEFINITION),
121 _clS(CL_INVALID_KERNEL),
122 _clS(CL_INVALID_ARG_INDEX),
123 _clS(CL_INVALID_ARG_VALUE),
124 _clS(CL_INVALID_ARG_SIZE),
125 _clS(CL_INVALID_KERNEL_ARGS),
126 _clS(CL_INVALID_WORK_DIMENSION),
127 _clS(CL_INVALID_WORK_GROUP_SIZE),
128 _clS(CL_INVALID_WORK_ITEM_SIZE),
129 _clS(CL_INVALID_GLOBAL_OFFSET),
130 _clS(CL_INVALID_EVENT_WAIT_LIST),
131 _clS(CL_INVALID_EVENT),
132 _clS(CL_INVALID_OPERATION),
133 _clS(CL_INVALID_GL_OBJECT),
134 _clS(CL_INVALID_BUFFER_SIZE),
135 _clS(CL_INVALID_MIP_LEVEL),
136 _clS(CL_INVALID_GLOBAL_WORK_SIZE),
137 _clS(CL_INVALID_PROPERTY),
138 _clS(CL_INVALID_IMAGE_DESCRIPTOR),
139 _clS(CL_INVALID_COMPILER_OPTIONS),
140 _clS(CL_INVALID_LINKER_OPTIONS),
141 _clS(CL_INVALID_DEVICE_PARTITION_COUNT)
146 static char* _clerror2string (cl_int error) __attribute__((unused));
147 static char* _clerror2string (cl_int error) {
149 if (-error > MAX_CL_ERRORS || error > 0) {
150 debug(D_WARN, "Unknown error code %d", error);
151 RETURN_STR("OpenCL Error");
153 const char *ret=clErrorStr[-error];
155 debug(D_WARN, "Unknown error code %d", error);
156 RETURN_STR("OpenCL Error");
160 static char number[15];
162 RETURN_STR("CL_SUCCESS");
164 snprintf(number, 15, "%i", error);
169 static inline int _string_end_with_icd(const char* str) {
170 size_t len = strlen(str);
171 if( len<5 || strcmp(str + len - 4, ".icd" ) != 0 ) {
177 static inline int _string_with_slash(const char* str) {
178 return strchr(str, '/') != NULL;
181 static inline unsigned int _find_num_icds(DIR *dir) {
182 unsigned int num_icds = 0;
184 while( (ent=readdir(dir)) != NULL ){
185 if (_string_end_with_icd(ent->d_name)) {
193 static inline unsigned int _load_icd(int num_icds, const char* lib_path) {
195 debug(D_LOG, "Loading ICD '%s'", lib_path);
197 _icds[num_icds].dl_handle = dlopen(lib_path, RTLD_LAZY|RTLD_LOCAL);//|RTLD_DEEPBIND);
198 if(_icds[num_icds].dl_handle != NULL) {
199 debug(D_LOG, "ICD[%i] loaded", num_icds);
202 debug(D_WARN, "error while dlopening the IDL: '%s',\n => skipping ICD", dlerror());
207 static inline unsigned int _open_driver(unsigned int num_icds,
208 const char*dir_path, const char*file_path) {
211 unsigned int lib_path_length;
212 if (dir_path != NULL) {
213 lib_path_length = strlen(dir_path) + strlen(file_path) + 2;
214 lib_path = malloc(lib_path_length*sizeof(char));
215 sprintf(lib_path,"%s/%s", dir_path, file_path);
217 lib_path_length = strlen(file_path) + 1;
218 lib_path = malloc(lib_path_length*sizeof(char));
219 sprintf(lib_path,"%s", file_path);
221 debug(D_LOG, "Considering file '%s'", lib_path);
222 FILE *f = fopen(lib_path,"r");
228 fseek(f, 0, SEEK_END);
229 lib_path_length = ftell(f)+1;
230 fseek(f, 0, SEEK_SET);
231 if(lib_path_length == 1) {
232 debug(D_WARN, "File contents too short, skipping ICD");
236 lib_path = malloc(lib_path_length*sizeof(char));
237 err = fgets(lib_path, lib_path_length, f);
241 debug(D_WARN, "Error while loading file contents, skipping ICD");
245 lib_path_length = strlen(lib_path);
247 if( lib_path[lib_path_length-1] == '\n' )
248 lib_path[lib_path_length-1] = '\0';
250 num_icds += _load_icd(num_icds, lib_path);
256 static inline unsigned int _open_drivers(DIR *dir, const char* dir_path) {
257 unsigned int num_icds = 0;
259 while( (ent=readdir(dir)) != NULL ){
260 if(! _string_end_with_icd(ent->d_name)) {
263 num_icds = _open_driver(num_icds, dir_path, ent->d_name);
269 static void* _get_function_addr(void* dlh, clGetExtensionFunctionAddress_fn fn, const char*name) {
271 debug(D_LOG,"Looking for function %s",name);
272 addr1=dlsym(dlh, name);
274 debug(D_WARN, "Missing global symbol '%s' in ICD, should be skipped", name);
280 debug(D_WARN, "Missing function '%s' in ICD, should be skipped", name);
283 if (addr1 && addr2 && addr1!=addr2) {
284 debug(D_WARN, "Function and symbol '%s' have different addresses!", name);
288 if (!addr2) addr2=addr1;
292 static int _allocate_platforms(int req) {
293 static cl_uint allocated=0;
294 debug(D_LOG,"Requesting allocation for %d platforms",req);
295 if (allocated - _num_picds < req) {
297 _picds=(struct platform_icd*)malloc(req*sizeof(struct platform_icd));
299 req = req - (allocated - _num_picds);
300 _picds=(struct platform_icd*)realloc(_picds, (allocated+req)*sizeof(struct platform_icd));
304 RETURN(allocated - _num_picds);
307 static char* _malloc_clGetPlatformInfo(clGetPlatformInfo_fn plt_info_ptr,
308 cl_platform_id pid, cl_platform_info cname, char* sname) {
310 size_t param_value_size_ret;
311 error = plt_info_ptr(pid, cname, 0, NULL, ¶m_value_size_ret);
312 if (error != CL_SUCCESS) {
313 debug(D_WARN, "Error %s while requesting %s in platform %p",
314 _clerror2string(error), sname, pid);
317 char *param_value = (char *)malloc(sizeof(char)*param_value_size_ret);
318 if (param_value == NULL) {
319 debug(D_WARN, "Error in malloc while requesting %s in platform %p",
323 error = plt_info_ptr(pid, cname, param_value_size_ret, param_value, NULL);
324 if (error != CL_SUCCESS){
326 debug(D_WARN, "Error %s while requesting %s in platform %p",
327 _clerror2string(error), sname, pid);
330 RETURN_STR(param_value);
333 static inline void _find_and_check_platforms(cl_uint num_icds) {
336 for( i=0; i<num_icds; i++){
337 debug(D_LOG, "Checking ICD %i", i);
338 struct vendor_icd *picd = &_icds[_num_icds];
339 void* dlh = _icds[i].dl_handle;
340 picd->ext_fn_ptr = _get_function_addr(dlh, NULL, "clGetExtensionFunctionAddress");
341 clIcdGetPlatformIDsKHR_fn plt_fn_ptr =
342 _get_function_addr(dlh, picd->ext_fn_ptr, "clIcdGetPlatformIDsKHR");
343 clGetPlatformInfo_fn plt_info_ptr =
344 _get_function_addr(dlh, picd->ext_fn_ptr, "clGetPlatformInfo");
345 if( picd->ext_fn_ptr == NULL
346 || plt_fn_ptr == NULL
347 || plt_info_ptr == NULL) {
348 debug(D_WARN, "Missing symbols in ICD, skipping it");
351 cl_uint num_platforms=0;
353 error = (*plt_fn_ptr)(0, NULL, &num_platforms);
354 if( error != CL_SUCCESS || num_platforms == 0) {
355 debug(D_LOG, "No platform in ICD, skipping it");
358 cl_platform_id *platforms = (cl_platform_id *) malloc( sizeof(cl_platform_id) * num_platforms);
359 error = (*plt_fn_ptr)(num_platforms, platforms, NULL);
360 if( error != CL_SUCCESS ){
362 debug(D_WARN, "Error in loading ICD platforms, skipping ICD");
365 cl_uint num_valid_platforms=0;
367 debug(D_LOG, "Try to load %d plateforms", num_platforms);
368 if (_allocate_platforms(num_platforms) < num_platforms) {
370 debug(D_WARN, "Not enought platform allocated. Skipping ICD");
373 for(j=0; j<num_platforms; j++) {
374 debug(D_LOG, "Checking platform %i", j);
375 struct platform_icd *p=&_picds[_num_picds];
376 char *param_value=NULL;
377 p->extension_suffix=NULL;
380 #ifdef DDEBUG_OCL_ICD
381 if (debug_ocl_icd_mask & D_DUMP) {
382 int log=debug_ocl_icd_mask & D_TRACE;
383 debug_ocl_icd_mask &= ~D_TRACE;
384 dump_platform(p->vicd->ext_fn_ptr, p->pid);
385 debug_ocl_icd_mask |= log;
389 /* Allow to workaround a bug in the Intel ICD used
390 * with optirun (search for NVidia Optimus for more info)
392 const char* str=getenv("OCL_ICD_ASSUME_ICD_EXTENSION");
393 if (! str || str[0]==0) {
394 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_EXTENSIONS, "extensions");
395 if (param_value == NULL){
396 debug(D_WARN, "Skipping platform %i", j);
399 debug(D_DUMP, "Supported extensions: %s", param_value);
400 if( strstr(param_value, "cl_khr_icd") == NULL){
402 debug(D_WARN, "Missing khr extension in platform %i, skipping it", j);
408 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_ICD_SUFFIX_KHR, "suffix");
409 if (param_value == NULL){
410 debug(D_WARN, "Skipping platform %i", j);
413 p->extension_suffix = param_value;
414 debug(D_DUMP|D_LOG, "Extension suffix: %s", param_value);
416 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_PROFILE, "profile");
417 if (param_value != NULL){
418 debug(D_DUMP, "Profile: %s", param_value);
421 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VERSION, "version");
422 p->version = param_value;
423 if (param_value != NULL){
424 debug(D_DUMP, "Version: %s", param_value);
427 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_NAME, "name");
428 if (param_value != NULL){
429 debug(D_DUMP, "Name: %s", param_value);
432 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VENDOR, "vendor");
433 if (param_value != NULL){
434 debug(D_DUMP, "Vendor: %s", param_value);
438 num_valid_platforms++;
441 if( num_valid_platforms != 0 ) {
442 if ( _num_icds != i ) {
443 picd->dl_handle = dlh;
446 picd->num_platforms = num_valid_platforms;
447 _icds[i].first_platform = _num_picds - num_valid_platforms;
455 static void __initClIcd( void ) {
457 cl_uint num_icds = 0;
460 const char* dir_path=getenv("OCL_ICD_VENDORS");
461 if (! dir_path || dir_path[0]==0) {
462 debug(D_DUMP, "OCL_ICD_VENDORS empty or not defined, using %s", ETC_OPENCL_VENDORS);
463 dir_path=ETC_OPENCL_VENDORS;
468 int ret=stat(dir_path, &buf);
469 if (ret != 0 && errno != ENOENT) {
470 debug(D_WARN, "Cannot stat '%s'. Aborting", dir_path);
472 if (ret == 0 && S_ISDIR(buf.st_mode)) {
478 debug(D_LOG,"Only loading '%s' as an ICD", dir_path);
482 debug(D_LOG,"Reading icd list from '%s'", dir_path);
483 dir = opendir(dir_path);
485 if (errno == ENOTDIR) {
486 debug(D_DUMP, "%s is not a directory, trying to use it as a ICD libname",
492 num_icds = _find_num_icds(dir);
498 _icds = (struct vendor_icd*)malloc(num_icds * sizeof(struct vendor_icd));
504 if (_string_end_with_icd(dir_path)) {
506 if (! _string_with_slash(dir_path)) {
507 num_icds = _open_driver(0, ETC_OPENCL_VENDORS, dir_path);
510 num_icds = _open_driver(0, NULL, dir_path);
513 num_icds = _load_icd(0, dir_path);
516 num_icds = _open_drivers(dir, dir_path);
522 _find_and_check_platforms(num_icds);
527 if (_num_icds < num_icds) {
528 _icds = (struct vendor_icd*)realloc(_icds, _num_icds * sizeof(struct vendor_icd));
530 debug(D_WARN, "%d valid vendor(s)!", _num_icds);
542 static pthread_once_t once_init = PTHREAD_ONCE_INIT;
546 volatile static __thread int in_init = 0;
547 volatile static cl_uint _initialized = 0;
549 static inline void __attribute__((constructor)) _initClIcd( void ) {
554 /* probably reentrency */
557 __sync_synchronize();
558 pthread_once(&once_init, &__initClIcd);
559 __sync_synchronize();
563 if (__sync_bool_compare_and_swap(&gard, 0, 1)) {
565 __sync_synchronize();
567 __sync_synchronize();
571 /* probably reentrency (could also be user threads). */
573 /* someone else started __initClIcd(). We wait until its end. */
574 debug(D_WARN, "Waiting end of init");
575 while (!_initialized) ;
576 debug(D_WARN, "Wait done");
583 #pragma GCC visibility pop
584 #define hidden_alias(name) \
585 typeof(name) name##_hid __attribute__ ((alias (#name), visibility("hidden")))
587 CL_API_ENTRY void * CL_API_CALL
588 clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 {
591 if( func_name == NULL )
593 cl_uint suffix_length;
595 void * return_value=NULL;
596 struct func_desc const * fn=&function_description[0];
597 while (fn->name != NULL) {
598 if (strcmp(func_name, fn->name)==0)
602 for(i=0; i<_num_picds; i++) {
603 suffix_length = strlen(_picds[i].extension_suffix);
604 if( suffix_length > strlen(func_name) )
606 if(strcmp(_picds[i].extension_suffix, &func_name[strlen(func_name)-suffix_length]) == 0)
607 RETURN((*_picds[i].vicd->ext_fn_ptr)(func_name));
609 RETURN(return_value);
611 hidden_alias(clGetExtensionFunctionAddress);
613 CL_API_ENTRY cl_int CL_API_CALL
614 clGetPlatformIDs(cl_uint num_entries,
615 cl_platform_id * platforms,
616 cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0 {
619 if( platforms == NULL && num_platforms == NULL )
620 RETURN(CL_INVALID_VALUE);
621 if( num_entries == 0 && platforms != NULL )
622 RETURN(CL_INVALID_VALUE);
624 RETURN(CL_PLATFORM_NOT_FOUND_KHR);
627 if( num_platforms != NULL ){
628 *num_platforms = _num_picds;
630 if( platforms != NULL ) {
631 cl_uint n_platforms = _num_picds < num_entries ? _num_picds : num_entries;
632 for( i=0; i<n_platforms; i++) {
633 *(platforms++) = _picds[i].pid;
638 hidden_alias(clGetPlatformIDs);
640 CL_API_ENTRY cl_context CL_API_CALL
641 clCreateContext(const cl_context_properties * properties ,
642 cl_uint num_devices ,
643 const cl_device_id * devices ,
644 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
646 cl_int * errcode_ret ){
650 if( properties != NULL){
651 while( properties[i] != 0 ) {
652 if( properties[i] == CL_CONTEXT_PLATFORM )
653 RETURN(((struct _cl_platform_id *) properties[i+1])
654 ->dispatch->clCreateContext(properties, num_devices, devices,
655 pfn_notify, user_data, errcode_ret));
659 if(devices == NULL || num_devices == 0) {
661 *errcode_ret = CL_INVALID_VALUE;
665 RETURN(((struct _cl_device_id *)devices[0])
666 ->dispatch->clCreateContext(properties, num_devices, devices,
667 pfn_notify, user_data, errcode_ret));
669 hidden_alias(clCreateContext);
671 CL_API_ENTRY cl_context CL_API_CALL
672 clCreateContextFromType(const cl_context_properties * properties ,
673 cl_device_type device_type ,
674 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
676 cl_int * errcode_ret ){
680 if( properties != NULL){
681 while( properties[i] != 0 ) {
682 if( properties[i] == CL_CONTEXT_PLATFORM )
683 if (properties[i+1] == 0) {
686 return ((struct _cl_platform_id *) properties[i+1])
687 ->dispatch->clCreateContextFromType(properties, device_type,
688 pfn_notify, user_data, errcode_ret);
692 /* if properties is null, the selected platform is implementation dependant
693 * We will use the first one if any
695 if(_num_picds == 0) {
697 *errcode_ret = CL_INVALID_VALUE;
701 RETURN(_picds[0].pid->dispatch->clCreateContextFromType
702 (properties, device_type, pfn_notify, user_data, errcode_ret));
706 *errcode_ret = CL_INVALID_PLATFORM;
710 hidden_alias(clCreateContextFromType);
712 CL_API_ENTRY cl_int CL_API_CALL
713 clGetGLContextInfoKHR(const cl_context_properties * properties ,
714 cl_gl_context_info param_name ,
715 size_t param_value_size ,
717 size_t * param_value_size_ret ){
721 if( properties != NULL){
722 while( properties[i] != 0 ) {
723 if( properties[i] == CL_CONTEXT_PLATFORM )
724 RETURN(((struct _cl_platform_id *) properties[i+1])
725 ->dispatch->clGetGLContextInfoKHR(properties, param_name,
726 param_value_size, param_value, param_value_size_ret));
730 RETURN(CL_INVALID_PLATFORM);
732 hidden_alias(clGetGLContextInfoKHR);
734 CL_API_ENTRY cl_int CL_API_CALL
735 clWaitForEvents(cl_uint num_events ,
736 const cl_event * event_list ){
738 if( num_events == 0 || event_list == NULL )
739 RETURN(CL_INVALID_VALUE);
740 RETURN(((struct _cl_event *)event_list[0])
741 ->dispatch->clWaitForEvents(num_events, event_list));
743 hidden_alias(clWaitForEvents);
745 CL_API_ENTRY cl_int CL_API_CALL
746 clUnloadCompiler( void ){
750 hidden_alias(clUnloadCompiler);