2 Copyright (c) 2012, Brice Videau <brice.videau@imag.fr>
3 Copyright (c) 2012, Vincent Danjean <Vincent.Danjean@ens-lyon.org>
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/types.h>
40 #pragma GCC diagnostic push
41 # pragma GCC diagnostic ignored "-Wcpp"
42 # define CL_USE_DEPRECATED_OPENCL_1_1_APIS
43 # include <CL/opencl.h>
44 #pragma GCC diagnostic pop
46 #pragma GCC visibility push(hidden)
48 #include "ocl_icd_loader.h"
49 #define DEBUG_OCL_ICD_PROVIDE_DUMP_FIELD
50 #include "ocl_icd_debug.h"
52 #define ETC_OPENCL_VENDORS "/etc/OpenCL/vendors"
54 int debug_ocl_icd_mask=0;
56 typedef __typeof__(clGetExtensionFunctionAddress) *clGetExtensionFunctionAddress_fn;
57 typedef __typeof__(clGetPlatformInfo) *clGetPlatformInfo_fn;
61 cl_uint num_platforms;
62 cl_uint first_platform;
64 clGetExtensionFunctionAddress_fn ext_fn_ptr;
68 char * extension_suffix;
70 struct vendor_icd *vicd;
74 struct vendor_icd *_icds=NULL;
75 struct platform_icd *_picds=NULL;
76 static cl_uint _num_icds = 0;
77 static cl_uint _num_picds = 0;
80 # define _clS(x) [-x] = #x
81 # define MAX_CL_ERRORS (-CL_INVALID_DEVICE_PARTITION_COUNT)
82 static char const * const clErrorStr[MAX_CL_ERRORS+1] = {
84 _clS(CL_DEVICE_NOT_FOUND),
85 _clS(CL_DEVICE_NOT_AVAILABLE),
86 _clS(CL_COMPILER_NOT_AVAILABLE),
87 _clS(CL_MEM_OBJECT_ALLOCATION_FAILURE),
88 _clS(CL_OUT_OF_RESOURCES),
89 _clS(CL_OUT_OF_HOST_MEMORY),
90 _clS(CL_PROFILING_INFO_NOT_AVAILABLE),
91 _clS(CL_MEM_COPY_OVERLAP),
92 _clS(CL_IMAGE_FORMAT_MISMATCH),
93 _clS(CL_IMAGE_FORMAT_NOT_SUPPORTED),
94 _clS(CL_BUILD_PROGRAM_FAILURE),
96 _clS(CL_MISALIGNED_SUB_BUFFER_OFFSET),
97 _clS(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST),
98 _clS(CL_COMPILE_PROGRAM_FAILURE),
99 _clS(CL_LINKER_NOT_AVAILABLE),
100 _clS(CL_LINK_PROGRAM_FAILURE),
101 _clS(CL_DEVICE_PARTITION_FAILED),
102 _clS(CL_KERNEL_ARG_INFO_NOT_AVAILABLE),
103 _clS(CL_INVALID_VALUE),
104 _clS(CL_INVALID_DEVICE_TYPE),
105 _clS(CL_INVALID_PLATFORM),
106 _clS(CL_INVALID_DEVICE),
107 _clS(CL_INVALID_CONTEXT),
108 _clS(CL_INVALID_QUEUE_PROPERTIES),
109 _clS(CL_INVALID_COMMAND_QUEUE),
110 _clS(CL_INVALID_HOST_PTR),
111 _clS(CL_INVALID_MEM_OBJECT),
112 _clS(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
113 _clS(CL_INVALID_IMAGE_SIZE),
114 _clS(CL_INVALID_SAMPLER),
115 _clS(CL_INVALID_BINARY),
116 _clS(CL_INVALID_BUILD_OPTIONS),
117 _clS(CL_INVALID_PROGRAM),
118 _clS(CL_INVALID_PROGRAM_EXECUTABLE),
119 _clS(CL_INVALID_KERNEL_NAME),
120 _clS(CL_INVALID_KERNEL_DEFINITION),
121 _clS(CL_INVALID_KERNEL),
122 _clS(CL_INVALID_ARG_INDEX),
123 _clS(CL_INVALID_ARG_VALUE),
124 _clS(CL_INVALID_ARG_SIZE),
125 _clS(CL_INVALID_KERNEL_ARGS),
126 _clS(CL_INVALID_WORK_DIMENSION),
127 _clS(CL_INVALID_WORK_GROUP_SIZE),
128 _clS(CL_INVALID_WORK_ITEM_SIZE),
129 _clS(CL_INVALID_GLOBAL_OFFSET),
130 _clS(CL_INVALID_EVENT_WAIT_LIST),
131 _clS(CL_INVALID_EVENT),
132 _clS(CL_INVALID_OPERATION),
133 _clS(CL_INVALID_GL_OBJECT),
134 _clS(CL_INVALID_BUFFER_SIZE),
135 _clS(CL_INVALID_MIP_LEVEL),
136 _clS(CL_INVALID_GLOBAL_WORK_SIZE),
137 _clS(CL_INVALID_PROPERTY),
138 _clS(CL_INVALID_IMAGE_DESCRIPTOR),
139 _clS(CL_INVALID_COMPILER_OPTIONS),
140 _clS(CL_INVALID_LINKER_OPTIONS),
141 _clS(CL_INVALID_DEVICE_PARTITION_COUNT)
146 static char* _clerror2string (cl_int error) __attribute__((unused));
147 static char* _clerror2string (cl_int error) {
149 if (-error > MAX_CL_ERRORS || error > 0) {
150 debug(D_WARN, "Unknown error code %d", error);
151 RETURN_STR("OpenCL Error");
153 const char *ret=clErrorStr[-error];
155 debug(D_WARN, "Unknown error code %d", error);
156 RETURN_STR("OpenCL Error");
160 static char number[15];
162 RETURN_STR("CL_SUCCESS");
164 snprintf(number, 15, "%i", error);
169 static inline int _string_end_with_icd(const char* str) {
170 size_t len = strlen(str);
171 if( len<5 || strcmp(str + len - 4, ".icd" ) != 0 ) {
177 static inline int _string_with_slash(const char* str) {
178 return strchr(str, '/') != NULL;
181 static inline unsigned int _find_num_icds(DIR *dir) {
182 unsigned int num_icds = 0;
184 while( (ent=readdir(dir)) != NULL ){
185 if (_string_end_with_icd(ent->d_name)) {
193 static inline unsigned int _load_icd(int num_icds, const char* lib_path) {
195 debug(D_LOG, "Loading ICD '%s'", lib_path);
197 _icds[num_icds].dl_handle = dlopen(lib_path, RTLD_LAZY|RTLD_LOCAL);//|RTLD_DEEPBIND);
198 if(_icds[num_icds].dl_handle != NULL) {
199 debug(D_LOG, "ICD[%i] loaded", num_icds);
202 debug(D_WARN, "error while dlopening the IDL: '%s',\n => skipping ICD", dlerror());
207 static inline unsigned int _open_driver(unsigned int num_icds,
208 const char*dir_path, const char*file_path) {
211 unsigned int lib_path_length;
212 if (dir_path != NULL) {
213 lib_path_length = strlen(dir_path) + strlen(file_path) + 2;
214 lib_path = malloc(lib_path_length*sizeof(char));
215 sprintf(lib_path,"%s/%s", dir_path, file_path);
217 lib_path_length = strlen(file_path) + 1;
218 lib_path = malloc(lib_path_length*sizeof(char));
219 sprintf(lib_path,"%s", file_path);
221 debug(D_LOG, "Considering file '%s'", lib_path);
222 FILE *f = fopen(lib_path,"r");
228 fseek(f, 0, SEEK_END);
229 lib_path_length = ftell(f)+1;
230 fseek(f, 0, SEEK_SET);
231 if(lib_path_length == 1) {
232 debug(D_WARN, "File contents too short, skipping ICD");
236 lib_path = malloc(lib_path_length*sizeof(char));
237 err = fgets(lib_path, lib_path_length, f);
241 debug(D_WARN, "Error while loading file contents, skipping ICD");
245 lib_path_length = strlen(lib_path);
247 if( lib_path[lib_path_length-1] == '\n' )
248 lib_path[lib_path_length-1] = '\0';
250 num_icds += _load_icd(num_icds, lib_path);
256 static inline unsigned int _open_drivers(DIR *dir, const char* dir_path) {
257 unsigned int num_icds = 0;
259 while( (ent=readdir(dir)) != NULL ){
260 if(! _string_end_with_icd(ent->d_name)) {
263 num_icds = _open_driver(num_icds, dir_path, ent->d_name);
269 static void* _get_function_addr(void* dlh, clGetExtensionFunctionAddress_fn fn, const char*name) {
271 debug(D_LOG,"Looking for function %s",name);
272 addr1=dlsym(dlh, name);
274 debug(D_WARN, "Missing global symbol '%s' in ICD, should be skipped", name);
280 debug(D_WARN, "Missing function '%s' in ICD, should be skipped", name);
283 if (addr1 && addr2 && addr1!=addr2) {
284 debug(D_WARN, "Function and symbol '%s' have different addresses!", name);
288 if (!addr2) addr2=addr1;
292 static int _allocate_platforms(int req) {
293 static cl_uint allocated=0;
294 debug(D_LOG,"Requesting allocation for %d platforms",req);
295 if (allocated - _num_picds < req) {
297 _picds=(struct platform_icd*)malloc(req*sizeof(struct platform_icd));
299 req = req - (allocated - _num_picds);
300 _picds=(struct platform_icd*)realloc(_picds, (allocated+req)*sizeof(struct platform_icd));
304 RETURN(allocated - _num_picds);
307 static char* _malloc_clGetPlatformInfo(clGetPlatformInfo_fn plt_info_ptr,
308 cl_platform_id pid, cl_platform_info cname, char* sname) {
310 size_t param_value_size_ret;
311 error = plt_info_ptr(pid, cname, 0, NULL, ¶m_value_size_ret);
312 if (error != CL_SUCCESS) {
313 debug(D_WARN, "Error %s while requesting %s in platform %p",
314 _clerror2string(error), sname, pid);
317 char *param_value = (char *)malloc(sizeof(char)*param_value_size_ret);
318 if (param_value == NULL) {
319 debug(D_WARN, "Error in malloc while requesting %s in platform %p",
323 error = plt_info_ptr(pid, cname, param_value_size_ret, param_value, NULL);
324 if (error != CL_SUCCESS){
326 debug(D_WARN, "Error %s while requesting %s in platform %p",
327 _clerror2string(error), sname, pid);
330 RETURN_STR(param_value);
333 static inline void _find_and_check_platforms(cl_uint num_icds) {
336 for( i=0; i<num_icds; i++){
337 debug(D_LOG, "Checking ICD %i", i);
338 struct vendor_icd *picd = &_icds[_num_icds];
339 void* dlh = _icds[i].dl_handle;
340 picd->ext_fn_ptr = _get_function_addr(dlh, NULL, "clGetExtensionFunctionAddress");
341 clIcdGetPlatformIDsKHR_fn plt_fn_ptr =
342 _get_function_addr(dlh, picd->ext_fn_ptr, "clIcdGetPlatformIDsKHR");
343 clGetPlatformInfo_fn plt_info_ptr =
344 _get_function_addr(dlh, picd->ext_fn_ptr, "clGetPlatformInfo");
345 if( picd->ext_fn_ptr == NULL
346 || plt_fn_ptr == NULL
347 || plt_info_ptr == NULL) {
348 debug(D_WARN, "Missing symbols in ICD, skipping it");
351 cl_uint num_platforms=0;
353 error = (*plt_fn_ptr)(0, NULL, &num_platforms);
354 if( error != CL_SUCCESS || num_platforms == 0) {
355 debug(D_LOG, "No platform in ICD, skipping it");
358 cl_platform_id *platforms = (cl_platform_id *) malloc( sizeof(cl_platform_id) * num_platforms);
359 error = (*plt_fn_ptr)(num_platforms, platforms, NULL);
360 if( error != CL_SUCCESS ){
362 debug(D_WARN, "Error in loading ICD platforms, skipping ICD");
365 cl_uint num_valid_platforms=0;
367 debug(D_LOG, "Try to load %d plateforms", num_platforms);
368 if (_allocate_platforms(num_platforms) < num_platforms) {
370 debug(D_WARN, "Not enought platform allocated. Skipping ICD");
373 for(j=0; j<num_platforms; j++) {
374 debug(D_LOG, "Checking platform %i", j);
375 struct platform_icd *p=&_picds[_num_picds];
376 char *param_value=NULL;
377 p->extension_suffix=NULL;
380 #ifdef DDEBUG_OCL_ICD
381 if (debug_ocl_icd_mask & D_DUMP) {
382 int log=debug_ocl_icd_mask & D_TRACE;
383 debug_ocl_icd_mask &= ~D_TRACE;
384 dump_platform(p->vicd->ext_fn_ptr, p->pid);
385 debug_ocl_icd_mask |= log;
388 char *param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_EXTENSIONS, "extensions");
389 if (param_value == NULL){
390 debug(D_WARN, "Skipping platform %i", j);
393 debug(D_DUMP, "Supported extensions: %s", param_value);
394 if( strstr(param_value, "cl_khr_icd") == NULL){
396 debug(D_WARN, "Missing khr extension in platform %i, skipping it", j);
400 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_ICD_SUFFIX_KHR, "suffix");
401 if (param_value == NULL){
402 debug(D_WARN, "Skipping platform %i", j);
405 p->extension_suffix = param_value;
406 debug(D_DUMP|D_LOG, "Extension suffix: %s", param_value);
408 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_PROFILE, "profile");
409 if (param_value != NULL){
410 debug(D_DUMP, "Profile: %s", param_value);
413 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VERSION, "version");
414 p->version = param_value;
415 if (param_value != NULL){
416 debug(D_DUMP, "Version: %s", param_value);
419 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_NAME, "name");
420 if (param_value != NULL){
421 debug(D_DUMP, "Name: %s", param_value);
424 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VENDOR, "vendor");
425 if (param_value != NULL){
426 debug(D_DUMP, "Vendor: %s", param_value);
430 num_valid_platforms++;
433 if( num_valid_platforms != 0 ) {
434 if ( _num_icds != i ) {
435 picd->dl_handle = dlh;
438 picd->num_platforms = num_valid_platforms;
439 _icds[i].first_platform = _num_picds - num_valid_platforms;
447 static void __initClIcd( void ) {
449 cl_uint num_icds = 0;
452 const char* dir_path=getenv("OCL_ICD_VENDORS");
453 if (! dir_path || dir_path[0]==0) {
454 debug(D_DUMP, "OCL_ICD_VENDORS empty or not defined, using %s", ETC_OPENCL_VENDORS);
455 dir_path=ETC_OPENCL_VENDORS;
460 int ret=stat(dir_path, &buf);
461 if (ret != 0 && errno != ENOENT) {
462 debug(D_WARN, "Cannot stat '%s'. Aborting", dir_path);
464 if (ret == 0 && S_ISDIR(buf.st_mode)) {
470 debug(D_LOG,"Only loading '%s' as an ICD", dir_path);
474 debug(D_LOG,"Reading icd list from '%s'", dir_path);
475 dir = opendir(dir_path);
477 if (errno == ENOTDIR) {
478 debug(D_DUMP, "%s is not a directory, trying to use it as a ICD libname",
484 num_icds = _find_num_icds(dir);
490 _icds = (struct vendor_icd*)malloc(num_icds * sizeof(struct vendor_icd));
496 if (_string_end_with_icd(dir_path)) {
498 if (! _string_with_slash(dir_path)) {
499 num_icds = _open_driver(0, ETC_OPENCL_VENDORS, dir_path);
502 num_icds = _open_driver(0, NULL, dir_path);
505 num_icds = _load_icd(0, dir_path);
508 num_icds = _open_drivers(dir, dir_path);
514 _find_and_check_platforms(num_icds);
519 if (_num_icds < num_icds) {
520 _icds = (struct vendor_icd*)realloc(_icds, _num_icds * sizeof(struct vendor_icd));
522 debug(D_WARN, "%d valid vendor(s)!", _num_icds);
534 static pthread_once_t once_init = PTHREAD_ONCE_INIT;
538 volatile static __thread int in_init = 0;
539 volatile static cl_uint _initialized = 0;
541 static inline void __attribute__((constructor)) _initClIcd( void ) {
546 /* probably reentrency */
549 __sync_synchronize();
550 pthread_once(&once_init, &__initClIcd);
551 __sync_synchronize();
555 if (__sync_bool_compare_and_swap(&gard, 0, 1)) {
557 __sync_synchronize();
559 __sync_synchronize();
563 /* probably reentrency (could also be user threads). */
565 /* someone else started __initClIcd(). We wait until its end. */
566 debug(D_WARN, "Waiting end of init");
567 while (!_initialized) ;
568 debug(D_WARN, "Wait done");
575 #pragma GCC visibility pop
576 #define hidden_alias(name) \
577 typeof(name) name##_hid __attribute__ ((alias (#name), visibility("hidden")))
579 CL_API_ENTRY void * CL_API_CALL
580 clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 {
583 if( func_name == NULL )
585 cl_uint suffix_length;
587 void * return_value=NULL;
588 struct func_desc const * fn=&function_description[0];
589 while (fn->name != NULL) {
590 if (strcmp(func_name, fn->name)==0)
594 for(i=0; i<_num_picds; i++) {
595 suffix_length = strlen(_picds[i].extension_suffix);
596 if( suffix_length > strlen(func_name) )
598 if(strcmp(_picds[i].extension_suffix, &func_name[strlen(func_name)-suffix_length]) == 0)
599 RETURN((*_picds[i].vicd->ext_fn_ptr)(func_name));
601 RETURN(return_value);
603 hidden_alias(clGetExtensionFunctionAddress);
605 CL_API_ENTRY cl_int CL_API_CALL
606 clGetPlatformIDs(cl_uint num_entries,
607 cl_platform_id * platforms,
608 cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0 {
611 if( platforms == NULL && num_platforms == NULL )
612 RETURN(CL_INVALID_VALUE);
613 if( num_entries == 0 && platforms != NULL )
614 RETURN(CL_INVALID_VALUE);
616 RETURN(CL_PLATFORM_NOT_FOUND_KHR);
619 if( num_platforms != NULL ){
620 *num_platforms = _num_picds;
622 if( platforms != NULL ) {
623 cl_uint n_platforms = _num_picds < num_entries ? _num_picds : num_entries;
624 for( i=0; i<n_platforms; i++) {
625 *(platforms++) = _picds[i].pid;
630 hidden_alias(clGetPlatformIDs);
632 CL_API_ENTRY cl_context CL_API_CALL
633 clCreateContext(const cl_context_properties * properties ,
634 cl_uint num_devices ,
635 const cl_device_id * devices ,
636 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
638 cl_int * errcode_ret ){
642 if( properties != NULL){
643 while( properties[i] != 0 ) {
644 if( properties[i] == CL_CONTEXT_PLATFORM )
645 RETURN(((struct _cl_platform_id *) properties[i+1])
646 ->dispatch->clCreateContext(properties, num_devices, devices,
647 pfn_notify, user_data, errcode_ret));
651 if(devices == NULL || num_devices == 0) {
653 *errcode_ret = CL_INVALID_VALUE;
657 RETURN(((struct _cl_device_id *)devices[0])
658 ->dispatch->clCreateContext(properties, num_devices, devices,
659 pfn_notify, user_data, errcode_ret));
661 hidden_alias(clCreateContext);
663 CL_API_ENTRY cl_context CL_API_CALL
664 clCreateContextFromType(const cl_context_properties * properties ,
665 cl_device_type device_type ,
666 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
668 cl_int * errcode_ret ){
672 if( properties != NULL){
673 while( properties[i] != 0 ) {
674 if( properties[i] == CL_CONTEXT_PLATFORM )
675 if (properties[i+1] == 0) {
678 return ((struct _cl_platform_id *) properties[i+1])
679 ->dispatch->clCreateContextFromType(properties, device_type,
680 pfn_notify, user_data, errcode_ret);
684 /* if properties is null, the selected platform is implementation dependant
685 * We will use the first one if any
687 if(_num_picds == 0) {
689 *errcode_ret = CL_INVALID_VALUE;
693 RETURN(_picds[0].pid->dispatch->clCreateContextFromType
694 (properties, device_type, pfn_notify, user_data, errcode_ret));
698 *errcode_ret = CL_INVALID_PLATFORM;
702 hidden_alias(clCreateContextFromType);
704 CL_API_ENTRY cl_int CL_API_CALL
705 clGetGLContextInfoKHR(const cl_context_properties * properties ,
706 cl_gl_context_info param_name ,
707 size_t param_value_size ,
709 size_t * param_value_size_ret ){
713 if( properties != NULL){
714 while( properties[i] != 0 ) {
715 if( properties[i] == CL_CONTEXT_PLATFORM )
716 RETURN(((struct _cl_platform_id *) properties[i+1])
717 ->dispatch->clGetGLContextInfoKHR(properties, param_name,
718 param_value_size, param_value, param_value_size_ret));
722 RETURN(CL_INVALID_PLATFORM);
724 hidden_alias(clGetGLContextInfoKHR);
726 CL_API_ENTRY cl_int CL_API_CALL
727 clWaitForEvents(cl_uint num_events ,
728 const cl_event * event_list ){
730 if( num_events == 0 || event_list == NULL )
731 RETURN(CL_INVALID_VALUE);
732 RETURN(((struct _cl_event *)event_list[0])
733 ->dispatch->clWaitForEvents(num_events, event_list));
735 hidden_alias(clWaitForEvents);
737 CL_API_ENTRY cl_int CL_API_CALL
738 clUnloadCompiler( void ){
742 hidden_alias(clUnloadCompiler);