2 Copyright (c) 2012, Brice Videau <brice.videau@imag.fr>
3 Copyright (c) 2012, Vincent Danjean <Vincent.Danjean@ens-lyon.org>
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/types.h>
40 #pragma GCC diagnostic push
41 # pragma GCC diagnostic ignored "-Wcpp"
42 # define CL_USE_DEPRECATED_OPENCL_1_1_APIS
43 # include <CL/opencl.h>
44 #pragma GCC diagnostic pop
46 #pragma GCC visibility push(hidden)
48 #include "ocl_icd_loader.h"
49 #define DEBUG_OCL_ICD_PROVIDE_DUMP_FIELD
50 #include "ocl_icd_debug.h"
52 #define ETC_OPENCL_VENDORS "/etc/OpenCL/vendors"
54 int debug_ocl_icd_mask=0;
56 typedef __typeof__(clGetExtensionFunctionAddress) *clGetExtensionFunctionAddress_fn;
57 typedef __typeof__(clGetPlatformInfo) *clGetPlatformInfo_fn;
61 cl_uint num_platforms;
62 cl_uint first_platform;
64 clGetExtensionFunctionAddress_fn ext_fn_ptr;
68 char * extension_suffix;
70 struct vendor_icd *vicd;
74 struct vendor_icd *_icds=NULL;
75 struct platform_icd *_picds=NULL;
76 static cl_uint _num_icds = 0;
77 static cl_uint _num_picds = 0;
80 # define _clS(x) [-x] = #x
81 # define MAX_CL_ERRORS CL_INVALID_DEVICE_PARTITION_COUNT
82 static char const * const clErrorStr[-MAX_CL_ERRORS+1] = {
84 _clS(CL_DEVICE_NOT_FOUND),
85 _clS(CL_DEVICE_NOT_AVAILABLE),
86 _clS(CL_COMPILER_NOT_AVAILABLE),
87 _clS(CL_MEM_OBJECT_ALLOCATION_FAILURE),
88 _clS(CL_OUT_OF_RESOURCES),
89 _clS(CL_OUT_OF_HOST_MEMORY),
90 _clS(CL_PROFILING_INFO_NOT_AVAILABLE),
91 _clS(CL_MEM_COPY_OVERLAP),
92 _clS(CL_IMAGE_FORMAT_MISMATCH),
93 _clS(CL_IMAGE_FORMAT_NOT_SUPPORTED),
94 _clS(CL_BUILD_PROGRAM_FAILURE),
96 _clS(CL_MISALIGNED_SUB_BUFFER_OFFSET),
97 _clS(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST),
98 _clS(CL_COMPILE_PROGRAM_FAILURE),
99 _clS(CL_LINKER_NOT_AVAILABLE),
100 _clS(CL_LINK_PROGRAM_FAILURE),
101 _clS(CL_DEVICE_PARTITION_FAILED),
102 _clS(CL_KERNEL_ARG_INFO_NOT_AVAILABLE),
103 _clS(CL_INVALID_VALUE),
104 _clS(CL_INVALID_DEVICE_TYPE),
105 _clS(CL_INVALID_PLATFORM),
106 _clS(CL_INVALID_DEVICE),
107 _clS(CL_INVALID_CONTEXT),
108 _clS(CL_INVALID_QUEUE_PROPERTIES),
109 _clS(CL_INVALID_COMMAND_QUEUE),
110 _clS(CL_INVALID_HOST_PTR),
111 _clS(CL_INVALID_MEM_OBJECT),
112 _clS(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
113 _clS(CL_INVALID_IMAGE_SIZE),
114 _clS(CL_INVALID_SAMPLER),
115 _clS(CL_INVALID_BINARY),
116 _clS(CL_INVALID_BUILD_OPTIONS),
117 _clS(CL_INVALID_PROGRAM),
118 _clS(CL_INVALID_PROGRAM_EXECUTABLE),
119 _clS(CL_INVALID_KERNEL_NAME),
120 _clS(CL_INVALID_KERNEL_DEFINITION),
121 _clS(CL_INVALID_KERNEL),
122 _clS(CL_INVALID_ARG_INDEX),
123 _clS(CL_INVALID_ARG_VALUE),
124 _clS(CL_INVALID_ARG_SIZE),
125 _clS(CL_INVALID_KERNEL_ARGS),
126 _clS(CL_INVALID_WORK_DIMENSION),
127 _clS(CL_INVALID_WORK_GROUP_SIZE),
128 _clS(CL_INVALID_WORK_ITEM_SIZE),
129 _clS(CL_INVALID_GLOBAL_OFFSET),
130 _clS(CL_INVALID_EVENT_WAIT_LIST),
131 _clS(CL_INVALID_EVENT),
132 _clS(CL_INVALID_OPERATION),
133 _clS(CL_INVALID_GL_OBJECT),
134 _clS(CL_INVALID_BUFFER_SIZE),
135 _clS(CL_INVALID_MIP_LEVEL),
136 _clS(CL_INVALID_GLOBAL_WORK_SIZE),
137 _clS(CL_INVALID_PROPERTY),
138 _clS(CL_INVALID_IMAGE_DESCRIPTOR),
139 _clS(CL_INVALID_COMPILER_OPTIONS),
140 _clS(CL_INVALID_LINKER_OPTIONS),
141 _clS(CL_INVALID_DEVICE_PARTITION_COUNT)
146 static char* _clerror2string (cl_int error) __attribute__((unused));
147 static char* _clerror2string (cl_int error) {
149 if (-error > MAX_CL_ERRORS || error > 0) {
150 debug(D_WARN, "Unknown error code %d", error);
151 RETURN_STR("OpenCL Error");
153 const char *ret=clErrorStr[-error];
155 debug(D_WARN, "Unknown error code %d", error);
156 RETURN_STR("OpenCL Error");
160 static char number[15];
162 RETURN_STR("CL_SUCCESS");
164 snprintf(number, 15, "%i", error);
169 static inline int _string_end_with_icd(const char* str) {
170 size_t len = strlen(str);
171 if( len<5 || strcmp(str + len - 4, ".icd" ) != 0 ) {
177 static inline int _string_with_slash(const char* str) {
178 return strchr(str, '/') != NULL;
181 static inline unsigned int _find_num_icds(DIR *dir) {
182 unsigned int num_icds = 0;
184 while( (ent=readdir(dir)) != NULL ){
185 if (_string_end_with_icd(ent->d_name)) {
193 static inline unsigned int _load_icd(int num_icds, const char* lib_path) {
195 debug(D_LOG, "Loading ICD '%s'", lib_path);
197 _icds[num_icds].dl_handle = dlopen(lib_path, RTLD_LAZY|RTLD_LOCAL);//|RTLD_DEEPBIND);
198 if(_icds[num_icds].dl_handle != NULL) {
199 debug(D_LOG, "ICD[%i] loaded", num_icds);
202 debug(D_WARN, "error while dlopening the IDL: '%s',\n => skipping ICD", dlerror());
207 static inline unsigned int _open_driver(unsigned int num_icds,
208 const char*dir_path, const char*file_path) {
211 unsigned int lib_path_length;
212 if (dir_path != NULL) {
213 lib_path_length = strlen(dir_path) + strlen(file_path) + 2;
214 lib_path = malloc(lib_path_length*sizeof(char));
215 sprintf(lib_path,"%s/%s", dir_path, file_path);
217 lib_path_length = strlen(file_path) + 1;
218 lib_path = malloc(lib_path_length*sizeof(char));
219 sprintf(lib_path,"%s", file_path);
221 debug(D_LOG, "Considering file '%s'", lib_path);
222 FILE *f = fopen(lib_path,"r");
228 fseek(f, 0, SEEK_END);
229 lib_path_length = ftell(f)+1;
230 fseek(f, 0, SEEK_SET);
231 if(lib_path_length == 1) {
232 debug(D_WARN, "File contents too short, skipping ICD");
236 lib_path = malloc(lib_path_length*sizeof(char));
237 err = fgets(lib_path, lib_path_length, f);
241 debug(D_WARN, "Error while loading file contents, skipping ICD");
245 lib_path_length = strlen(lib_path);
247 if( lib_path[lib_path_length-1] == '\n' )
248 lib_path[lib_path_length-1] = '\0';
250 num_icds += _load_icd(num_icds, lib_path);
256 static inline unsigned int _open_drivers(DIR *dir, const char* dir_path) {
257 unsigned int num_icds = 0;
259 while( (ent=readdir(dir)) != NULL ){
260 if(! _string_end_with_icd(ent->d_name)) {
263 num_icds = _open_driver(num_icds, dir_path, ent->d_name);
269 static void* _get_function_addr(void* dlh, clGetExtensionFunctionAddress_fn fn, const char*name) {
271 debug(D_LOG,"Looking for function %s",name);
272 addr1=dlsym(dlh, name);
274 debug(D_WARN, "Missing global symbol '%s' in ICD, should be skipped", name);
280 debug(D_WARN, "Missing function '%s' in ICD, should be skipped", name);
283 if (addr1 && addr2 && addr1!=addr2) {
284 debug(D_WARN, "Function and symbol '%s' have different addresses!", name);
288 if (!addr2) addr2=addr1;
292 static int _allocate_platforms(int req) {
293 static cl_uint allocated=0;
294 debug(D_LOG,"Requesting allocation for %d platforms",req);
295 if (allocated - _num_picds < req) {
297 _picds=(struct platform_icd*)malloc(req*sizeof(struct platform_icd));
299 req = req - (allocated - _num_picds);
300 _picds=(struct platform_icd*)realloc(_picds, (allocated+req)*sizeof(struct platform_icd));
304 RETURN(allocated - _num_picds);
307 static char* _malloc_clGetPlatformInfo(clGetPlatformInfo_fn plt_info_ptr,
308 cl_platform_id pid, cl_platform_info cname, char* sname) {
310 size_t param_value_size_ret;
311 error = plt_info_ptr(pid, cname, 0, NULL, ¶m_value_size_ret);
312 if (error != CL_SUCCESS) {
313 debug(D_WARN, "Error %s while requesting %s in platform %p",
314 _clerror2string(error), sname, pid);
317 char *param_value = (char *)malloc(sizeof(char)*param_value_size_ret);
318 if (param_value == NULL) {
319 debug(D_WARN, "Error in malloc while requesting %s in platform %p",
323 error = plt_info_ptr(pid, cname, param_value_size_ret, param_value, NULL);
324 if (error != CL_SUCCESS){
326 debug(D_WARN, "Error %s while requesting %s in platform %p",
327 _clerror2string(error), sname, pid);
330 RETURN_STR(param_value);
333 static inline void _find_and_check_platforms(cl_uint num_icds) {
336 for( i=0; i<num_icds; i++){
337 debug(D_LOG, "Checking ICD %i", i);
338 struct vendor_icd *picd = &_icds[_num_icds];
339 void* dlh = _icds[i].dl_handle;
340 picd->ext_fn_ptr = _get_function_addr(dlh, NULL, "clGetExtensionFunctionAddress");
341 clIcdGetPlatformIDsKHR_fn plt_fn_ptr =
342 _get_function_addr(dlh, picd->ext_fn_ptr, "clIcdGetPlatformIDsKHR");
343 clGetPlatformInfo_fn plt_info_ptr =
344 _get_function_addr(dlh, picd->ext_fn_ptr, "clGetPlatformInfo");
345 if( picd->ext_fn_ptr == NULL
346 || plt_fn_ptr == NULL
347 || plt_info_ptr == NULL) {
348 debug(D_WARN, "Missing symbols in ICD, skipping it");
351 cl_uint num_platforms=0;
353 error = (*plt_fn_ptr)(0, NULL, &num_platforms);
354 if( error != CL_SUCCESS || num_platforms == 0) {
355 debug(D_LOG, "No platform in ICD, skipping it");
358 cl_platform_id *platforms = (cl_platform_id *) malloc( sizeof(cl_platform_id) * num_platforms);
359 error = (*plt_fn_ptr)(num_platforms, platforms, NULL);
360 if( error != CL_SUCCESS ){
362 debug(D_WARN, "Error in loading ICD platforms, skipping ICD");
365 cl_uint num_valid_platforms=0;
367 debug(D_LOG, "Try to load %d plateforms", num_platforms);
368 if (_allocate_platforms(num_platforms) < num_platforms) {
370 debug(D_WARN, "Not enought platform allocated. Skipping ICD");
373 for(j=0; j<num_platforms; j++) {
374 debug(D_LOG, "Checking platform %i", j);
375 struct platform_icd *p=&_picds[_num_picds];
376 p->extension_suffix=NULL;
380 if (debug_ocl_icd_mask & D_DUMP) {
381 int log=debug_ocl_icd_mask & D_TRACE;
382 debug_ocl_icd_mask &= ~D_TRACE;
383 dump_platform(p->vicd->ext_fn_ptr, p->pid);
384 debug_ocl_icd_mask |= log;
387 char *param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_EXTENSIONS, "extensions");
388 if (param_value == NULL){
389 debug(D_WARN, "Skipping platform %i", j);
392 debug(D_DUMP, "Supported extensions: %s", param_value);
393 if( strstr(param_value, "cl_khr_icd") == NULL){
395 debug(D_WARN, "Missing khr extension in platform %i, skipping it", j);
399 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_ICD_SUFFIX_KHR, "suffix");
400 if (param_value == NULL){
401 debug(D_WARN, "Skipping platform %i", j);
404 p->extension_suffix = param_value;
405 debug(D_DUMP|D_LOG, "Extension suffix: %s", param_value);
407 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_PROFILE, "profile");
408 if (param_value != NULL){
409 debug(D_DUMP, "Profile: %s", param_value);
413 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VERSION, "version");
414 p->version = param_value;
415 if (param_value != NULL){
416 debug(D_DUMP, "Version: %s", param_value);
420 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_NAME, "name");
421 if (param_value != NULL){
422 debug(D_DUMP, "Name: %s", param_value);
425 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VENDOR, "vendor");
426 if (param_value != NULL){
427 debug(D_DUMP, "Vendor: %s", param_value);
431 num_valid_platforms++;
434 if( num_valid_platforms != 0 ) {
435 if ( _num_icds != i ) {
436 picd->dl_handle = dlh;
439 picd->num_platforms = num_valid_platforms;
440 _icds[i].first_platform = _num_picds - num_valid_platforms;
448 static void __initClIcd( void ) {
450 cl_uint num_icds = 0;
453 const char* dir_path=getenv("OCL_ICD_VENDORS");
454 if (! dir_path || dir_path[0]==0) {
455 debug(D_DUMP, "OCL_ICD_VENDORS empty or not defined, using %s", ETC_OPENCL_VENDORS);
456 dir_path=ETC_OPENCL_VENDORS;
461 int ret=stat(dir_path, &buf);
462 if (ret != 0 && errno != ENOENT) {
463 debug(D_WARN, "Cannot stat '%s'. Aborting", dir_path);
465 if (ret == 0 && S_ISDIR(buf.st_mode)) {
471 debug(D_LOG,"Only loading '%s' as an ICD", dir_path);
475 debug(D_LOG,"Reading icd list from '%s'", dir_path);
476 dir = opendir(dir_path);
478 if (errno == ENOTDIR) {
479 debug(D_DUMP, "%s is not a directory, trying to use it as a ICD libname",
485 num_icds = _find_num_icds(dir);
491 _icds = (struct vendor_icd*)malloc(num_icds * sizeof(struct vendor_icd));
497 if (_string_end_with_icd(dir_path)) {
499 if (! _string_with_slash(dir_path)) {
500 num_icds = _open_driver(0, ETC_OPENCL_VENDORS, dir_path);
503 num_icds = _open_driver(0, NULL, dir_path);
506 num_icds = _load_icd(0, dir_path);
509 num_icds = _open_drivers(dir, dir_path);
515 _find_and_check_platforms(num_icds);
520 if (_num_icds < num_icds) {
521 _icds = (struct vendor_icd*)realloc(_icds, _num_icds * sizeof(struct vendor_icd));
523 debug(D_WARN, "%d valid vendor(s)!", _num_icds);
535 static pthread_once_t once_init = PTHREAD_ONCE_INIT;
539 volatile static __thread int in_init = 0;
540 volatile static cl_uint _initialized = 0;
542 static inline void __attribute__((constructor)) _initClIcd( void ) {
547 /* probably reentrency */
550 __sync_synchronize();
551 pthread_once(&once_init, &__initClIcd);
552 __sync_synchronize();
556 if (__sync_bool_compare_and_swap(&gard, 0, 1)) {
558 __sync_synchronize();
560 __sync_synchronize();
564 /* probably reentrency (could also be user threads). */
566 /* someone else started __initClIcd(). We wait until its end. */
567 debug(D_WARN, "Waiting end of init");
568 while (!_initialized) ;
569 debug(D_WARN, "Wait done");
576 #pragma GCC visibility pop
577 #define hidden_alias(name) \
578 typeof(name) name##_hid __attribute__ ((alias (#name), visibility("hidden")))
580 CL_API_ENTRY void * CL_API_CALL
581 clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 {
584 if( func_name == NULL )
586 cl_uint suffix_length;
588 void * return_value=NULL;
589 struct func_desc const * fn=&function_description[0];
590 while (fn->name != NULL) {
591 if (strcmp(func_name, fn->name)==0)
595 for(i=0; i<_num_picds; i++) {
596 suffix_length = strlen(_picds[i].extension_suffix);
597 if( suffix_length > strlen(func_name) )
599 if(strcmp(_picds[i].extension_suffix, &func_name[strlen(func_name)-suffix_length]) == 0)
600 RETURN((*_picds[i].vicd->ext_fn_ptr)(func_name));
602 RETURN(return_value);
604 hidden_alias(clGetExtensionFunctionAddress);
606 CL_API_ENTRY cl_int CL_API_CALL
607 clGetPlatformIDs(cl_uint num_entries,
608 cl_platform_id * platforms,
609 cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0 {
612 if( platforms == NULL && num_platforms == NULL )
613 RETURN(CL_INVALID_VALUE);
614 if( num_entries == 0 && platforms != NULL )
615 RETURN(CL_INVALID_VALUE);
617 RETURN(CL_PLATFORM_NOT_FOUND_KHR);
620 if( num_platforms != NULL ){
621 *num_platforms = _num_picds;
623 if( platforms != NULL ) {
624 cl_uint n_platforms = _num_picds < num_entries ? _num_picds : num_entries;
625 for( i=0; i<n_platforms; i++) {
626 *(platforms++) = _picds[i].pid;
631 hidden_alias(clGetPlatformIDs);
633 CL_API_ENTRY cl_context CL_API_CALL
634 clCreateContext(const cl_context_properties * properties ,
635 cl_uint num_devices ,
636 const cl_device_id * devices ,
637 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
639 cl_int * errcode_ret ){
643 if( properties != NULL){
644 while( properties[i] != 0 ) {
645 if( properties[i] == CL_CONTEXT_PLATFORM )
646 RETURN(((struct _cl_platform_id *) properties[i+1])
647 ->dispatch->clCreateContext(properties, num_devices, devices,
648 pfn_notify, user_data, errcode_ret));
652 if(devices == NULL || num_devices == 0) {
654 *errcode_ret = CL_INVALID_VALUE;
658 RETURN(((struct _cl_device_id *)devices[0])
659 ->dispatch->clCreateContext(properties, num_devices, devices,
660 pfn_notify, user_data, errcode_ret));
662 hidden_alias(clCreateContext);
664 CL_API_ENTRY cl_context CL_API_CALL
665 clCreateContextFromType(const cl_context_properties * properties ,
666 cl_device_type device_type ,
667 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
669 cl_int * errcode_ret ){
673 if( properties != NULL){
674 while( properties[i] != 0 ) {
675 if( properties[i] == CL_CONTEXT_PLATFORM )
676 if (properties[i+1] == 0) {
679 return ((struct _cl_platform_id *) properties[i+1])
680 ->dispatch->clCreateContextFromType(properties, device_type,
681 pfn_notify, user_data, errcode_ret);
685 /* if properties is null, the selected platform is implementation dependant
686 * We will use the first one if any
688 if(_num_picds == 0) {
690 *errcode_ret = CL_INVALID_VALUE;
694 RETURN(_picds[0].pid->dispatch->clCreateContextFromType
695 (properties, device_type, pfn_notify, user_data, errcode_ret));
699 *errcode_ret = CL_INVALID_PLATFORM;
703 hidden_alias(clCreateContextFromType);
705 CL_API_ENTRY cl_int CL_API_CALL
706 clGetGLContextInfoKHR(const cl_context_properties * properties ,
707 cl_gl_context_info param_name ,
708 size_t param_value_size ,
710 size_t * param_value_size_ret ){
714 if( properties != NULL){
715 while( properties[i] != 0 ) {
716 if( properties[i] == CL_CONTEXT_PLATFORM )
717 RETURN(((struct _cl_platform_id *) properties[i+1])
718 ->dispatch->clGetGLContextInfoKHR(properties, param_name,
719 param_value_size, param_value, param_value_size_ret));
723 RETURN(CL_INVALID_PLATFORM);
725 hidden_alias(clGetGLContextInfoKHR);
727 CL_API_ENTRY cl_int CL_API_CALL
728 clWaitForEvents(cl_uint num_events ,
729 const cl_event * event_list ){
731 if( num_events == 0 || event_list == NULL )
732 RETURN(CL_INVALID_VALUE);
733 RETURN(((struct _cl_event *)event_list[0])
734 ->dispatch->clWaitForEvents(num_events, event_list));
736 hidden_alias(clWaitForEvents);
738 CL_API_ENTRY cl_int CL_API_CALL
739 clUnloadCompiler( void ){
743 hidden_alias(clUnloadCompiler);