2 Copyright (c) 2012, Brice Videau <brice.videau@imag.fr>
3 Copyright (c) 2012, Vincent Danjean <Vincent.Danjean@ens-lyon.org>
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #pragma GCC diagnostic push
34 # pragma GCC diagnostic ignored "-Wcpp"
35 # define CL_USE_DEPRECATED_OPENCL_1_1_APIS
36 # include <CL/opencl.h>
37 #pragma GCC diagnostic pop
39 #pragma GCC visibility push(hidden)
41 #include "ocl_icd_loader.h"
42 #define DEBUG_OCL_ICD_PROVIDE_DUMP_FIELD
43 #include "ocl_icd_debug.h"
45 #define ETC_OPENCL_VENDORS "/etc/OpenCL/vendors"
47 int debug_ocl_icd_mask=0;
49 typedef __typeof__(clGetExtensionFunctionAddress) *clGetExtensionFunctionAddress_fn;
50 typedef __typeof__(clGetPlatformInfo) *clGetPlatformInfo_fn;
54 cl_uint num_platforms;
55 cl_uint first_platform;
57 clGetExtensionFunctionAddress_fn ext_fn_ptr;
61 char * extension_suffix;
63 struct vendor_icd *vicd;
67 struct vendor_icd *_icds=NULL;
68 struct platform_icd *_picds=NULL;
69 static cl_uint _num_icds = 0;
70 static cl_uint _num_picds = 0;
72 static cl_uint _initialized = 0;
75 # define _clS(x) [-x] = #x
76 # define MAX_CL_ERRORS CL_INVALID_DEVICE_PARTITION_COUNT
77 static char const * const clErrorStr[-MAX_CL_ERRORS+1] = {
79 _clS(CL_DEVICE_NOT_FOUND),
80 _clS(CL_DEVICE_NOT_AVAILABLE),
81 _clS(CL_COMPILER_NOT_AVAILABLE),
82 _clS(CL_MEM_OBJECT_ALLOCATION_FAILURE),
83 _clS(CL_OUT_OF_RESOURCES),
84 _clS(CL_OUT_OF_HOST_MEMORY),
85 _clS(CL_PROFILING_INFO_NOT_AVAILABLE),
86 _clS(CL_MEM_COPY_OVERLAP),
87 _clS(CL_IMAGE_FORMAT_MISMATCH),
88 _clS(CL_IMAGE_FORMAT_NOT_SUPPORTED),
89 _clS(CL_BUILD_PROGRAM_FAILURE),
91 _clS(CL_MISALIGNED_SUB_BUFFER_OFFSET),
92 _clS(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST),
93 _clS(CL_COMPILE_PROGRAM_FAILURE),
94 _clS(CL_LINKER_NOT_AVAILABLE),
95 _clS(CL_LINK_PROGRAM_FAILURE),
96 _clS(CL_DEVICE_PARTITION_FAILED),
97 _clS(CL_KERNEL_ARG_INFO_NOT_AVAILABLE),
98 _clS(CL_INVALID_VALUE),
99 _clS(CL_INVALID_DEVICE_TYPE),
100 _clS(CL_INVALID_PLATFORM),
101 _clS(CL_INVALID_DEVICE),
102 _clS(CL_INVALID_CONTEXT),
103 _clS(CL_INVALID_QUEUE_PROPERTIES),
104 _clS(CL_INVALID_COMMAND_QUEUE),
105 _clS(CL_INVALID_HOST_PTR),
106 _clS(CL_INVALID_MEM_OBJECT),
107 _clS(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
108 _clS(CL_INVALID_IMAGE_SIZE),
109 _clS(CL_INVALID_SAMPLER),
110 _clS(CL_INVALID_BINARY),
111 _clS(CL_INVALID_BUILD_OPTIONS),
112 _clS(CL_INVALID_PROGRAM),
113 _clS(CL_INVALID_PROGRAM_EXECUTABLE),
114 _clS(CL_INVALID_KERNEL_NAME),
115 _clS(CL_INVALID_KERNEL_DEFINITION),
116 _clS(CL_INVALID_KERNEL),
117 _clS(CL_INVALID_ARG_INDEX),
118 _clS(CL_INVALID_ARG_VALUE),
119 _clS(CL_INVALID_ARG_SIZE),
120 _clS(CL_INVALID_KERNEL_ARGS),
121 _clS(CL_INVALID_WORK_DIMENSION),
122 _clS(CL_INVALID_WORK_GROUP_SIZE),
123 _clS(CL_INVALID_WORK_ITEM_SIZE),
124 _clS(CL_INVALID_GLOBAL_OFFSET),
125 _clS(CL_INVALID_EVENT_WAIT_LIST),
126 _clS(CL_INVALID_EVENT),
127 _clS(CL_INVALID_OPERATION),
128 _clS(CL_INVALID_GL_OBJECT),
129 _clS(CL_INVALID_BUFFER_SIZE),
130 _clS(CL_INVALID_MIP_LEVEL),
131 _clS(CL_INVALID_GLOBAL_WORK_SIZE),
132 _clS(CL_INVALID_PROPERTY),
133 _clS(CL_INVALID_IMAGE_DESCRIPTOR),
134 _clS(CL_INVALID_COMPILER_OPTIONS),
135 _clS(CL_INVALID_LINKER_OPTIONS),
136 _clS(CL_INVALID_DEVICE_PARTITION_COUNT)
141 static char* _clerror2string (cl_int error) __attribute__((unused));
142 static char* _clerror2string (cl_int error) {
144 if (-error > MAX_CL_ERRORS || error > 0) {
145 debug(D_WARN, "Unknown error code %d", error);
146 RETURN_STR("OpenCL Error");
148 const char *ret=clErrorStr[-error];
150 debug(D_WARN, "Unknown error code %d", error);
151 RETURN_STR("OpenCL Error");
155 static char number[15];
157 RETURN_STR("CL_SUCCESS");
159 snprintf(number, 15, "%i", error);
164 static inline cl_uint _find_num_icds(DIR *dir) {
165 cl_uint num_icds = 0;
167 while( (ent=readdir(dir)) != NULL ){
168 cl_uint d_name_len = strlen(ent->d_name);
169 if( d_name_len<5 || strcmp(ent->d_name + d_name_len - 4, ".icd" ) != 0 )
177 static inline cl_uint _open_drivers(DIR *dir, const char* dir_path) {
178 cl_uint num_icds = 0;
180 while( (ent=readdir(dir)) != NULL ){
181 cl_uint d_name_len = strlen(ent->d_name);
182 if( d_name_len<5 || strcmp(ent->d_name + d_name_len - 4, ".icd" ) != 0 )
186 unsigned int lib_path_length = strlen(dir_path) + strlen(ent->d_name) + 2;
187 lib_path = malloc(lib_path_length*sizeof(char));
188 sprintf(lib_path,"%s/%s", dir_path, ent->d_name);
189 debug(D_LOG, "Considering file '%s'", lib_path);
190 FILE *f = fopen(lib_path,"r");
193 fseek(f, 0, SEEK_END);
194 lib_path_length = ftell(f)+1;
195 fseek(f, 0, SEEK_SET);
196 if(lib_path_length == 1) {
197 debug(D_WARN, "File contents too short, skipping ICD");
201 lib_path = malloc(lib_path_length*sizeof(char));
202 err = fgets(lib_path, lib_path_length, f);
206 debug(D_WARN, "Error while loading file contents, skipping ICD");
210 lib_path_length = strlen(lib_path);
212 if( lib_path[lib_path_length-1] == '\n' )
213 lib_path[lib_path_length-1] = '\0';
215 debug(D_LOG, "Loading ICD '%s'", lib_path);
217 _icds[num_icds].dl_handle = dlopen(lib_path, RTLD_LAZY|RTLD_LOCAL);//|RTLD_DEEPBIND);
218 if(_icds[num_icds].dl_handle != NULL) {
219 debug(D_LOG, "ICD[%i] loaded", num_icds);
222 debug(D_WARN, "error while dlopening the IDL, skipping ICD");
229 static void* _get_function_addr(void* dlh, clGetExtensionFunctionAddress_fn fn, const char*name) {
231 debug(D_LOG,"Looking for function %s",name);
232 addr1=dlsym(dlh, name);
234 debug(D_WARN, "Missing global symbol '%s' in ICD, should be skipped", name);
240 debug(D_WARN, "Missing function '%s' in ICD, should be skipped", name);
243 if (addr1 && addr2 && addr1!=addr2) {
244 debug(D_WARN, "Function and symbol '%s' have different addresses!", name);
248 if (!addr2) addr2=addr1;
252 static int _allocate_platforms(int req) {
253 static cl_uint allocated=0;
254 debug(D_LOG,"Requesting allocation for %d platforms",req);
255 if (allocated - _num_picds < req) {
257 _picds=(struct platform_icd*)malloc(req*sizeof(struct platform_icd));
259 req = req - (allocated - _num_picds);
260 _picds=(struct platform_icd*)realloc(_picds, (allocated+req)*sizeof(struct platform_icd));
264 RETURN(allocated - _num_picds);
267 static char* _malloc_clGetPlatformInfo(clGetPlatformInfo_fn plt_info_ptr,
268 cl_platform_id pid, cl_platform_info cname, char* sname) {
270 size_t param_value_size_ret;
271 error = plt_info_ptr(pid, cname, 0, NULL, ¶m_value_size_ret);
272 if (error != CL_SUCCESS) {
273 debug(D_WARN, "Error %s while requesting %s in platform %p",
274 _clerror2string(error), sname, pid);
277 char *param_value = (char *)malloc(sizeof(char)*param_value_size_ret);
278 if (param_value == NULL) {
279 debug(D_WARN, "Error in malloc while requesting %s in platform %p",
283 error = plt_info_ptr(pid, cname, param_value_size_ret, param_value, NULL);
284 if (error != CL_SUCCESS){
286 debug(D_WARN, "Error %s while requesting %s in platform %p",
287 _clerror2string(error), sname, pid);
290 RETURN_STR(param_value);
293 static inline void _find_and_check_platforms(cl_uint num_icds) {
296 for( i=0; i<num_icds; i++){
297 debug(D_LOG, "Checking ICD %i", i);
298 struct vendor_icd *picd = &_icds[_num_icds];
299 void* dlh = _icds[i].dl_handle;
300 picd->ext_fn_ptr = _get_function_addr(dlh, NULL, "clGetExtensionFunctionAddress");
301 clIcdGetPlatformIDsKHR_fn plt_fn_ptr =
302 _get_function_addr(dlh, picd->ext_fn_ptr, "clIcdGetPlatformIDsKHR");
303 clGetPlatformInfo_fn plt_info_ptr =
304 _get_function_addr(dlh, picd->ext_fn_ptr, "clGetPlatformInfo");
305 if( picd->ext_fn_ptr == NULL
306 || plt_fn_ptr == NULL
307 || plt_info_ptr == NULL) {
308 debug(D_WARN, "Missing symbols in ICD, skipping it");
311 cl_uint num_platforms=0;
313 error = (*plt_fn_ptr)(0, NULL, &num_platforms);
314 if( error != CL_SUCCESS || num_platforms == 0) {
315 debug(D_LOG, "No platform in ICD, skipping it");
318 cl_platform_id *platforms = (cl_platform_id *) malloc( sizeof(cl_platform_id) * num_platforms);
319 error = (*plt_fn_ptr)(num_platforms, platforms, NULL);
320 if( error != CL_SUCCESS ){
322 debug(D_WARN, "Error in loading ICD platforms, skipping ICD");
325 cl_uint num_valid_platforms=0;
327 debug(D_LOG, "Try to load %d plateforms", num_platforms);
328 if (_allocate_platforms(num_platforms) < num_platforms) {
330 debug(D_WARN, "Not enought platform allocated. Skipping ICD");
333 for(j=0; j<num_platforms; j++) {
334 debug(D_LOG, "Checking platform %i", j);
335 struct platform_icd *p=&_picds[_num_picds];
336 p->extension_suffix=NULL;
340 if (debug_ocl_icd_mask & D_DUMP) {
341 dump_platform(p->vicd->ext_fn_ptr, p->pid);
344 char *param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_EXTENSIONS, "extensions");
345 if (param_value == NULL){
346 debug(D_WARN, "Skipping platform %i", j);
349 debug(D_DUMP, "Supported extensions: %s", param_value);
350 if( strstr(param_value, "cl_khr_icd") == NULL){
352 debug(D_WARN, "Missing khr extension in platform %i, skipping it", j);
356 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_ICD_SUFFIX_KHR, "suffix");
357 if (param_value == NULL){
358 debug(D_WARN, "Skipping platform %i", j);
361 p->extension_suffix = param_value;
362 debug(D_DUMP|D_LOG, "Extension suffix: %s", param_value);
364 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_PROFILE, "profile");
365 if (param_value != NULL){
366 debug(D_DUMP, "Profile: %s", param_value);
370 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VERSION, "version");
371 p->version = param_value;
372 if (param_value != NULL){
373 debug(D_DUMP, "Version: %s", param_value);
377 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_NAME, "name");
378 if (param_value != NULL){
379 debug(D_DUMP, "Name: %s", param_value);
382 param_value=_malloc_clGetPlatformInfo(plt_info_ptr, p->pid, CL_PLATFORM_VENDOR, "vendor");
383 if (param_value != NULL){
384 debug(D_DUMP, "Vendor: %s", param_value);
388 num_valid_platforms++;
391 if( num_valid_platforms != 0 ) {
392 if ( _num_icds != i ) {
393 picd->dl_handle = dlh;
396 picd->num_platforms = num_valid_platforms;
397 _icds[i].first_platform = _num_picds - num_valid_platforms;
405 static void __initClIcd( void ) {
407 cl_uint num_icds = 0;
409 const char* dir_path=getenv("OCL_ICD_VENDORS");
410 if (! dir_path || dir_path[0]==0) {
411 debug(D_DUMP, "OCL_ICD_VENDORS empty or not defined, using %s", ETC_OPENCL_VENDORS);
412 dir_path=ETC_OPENCL_VENDORS;
414 debug(D_LOG,"Reading icd list from '%s'", dir_path);
415 dir = opendir(dir_path);
420 num_icds = _find_num_icds(dir);
425 _icds = (struct vendor_icd*)malloc(num_icds * sizeof(struct vendor_icd));
430 num_icds = _open_drivers(dir, dir_path);
435 _find_and_check_platforms(num_icds);
440 if (_num_icds < num_icds) {
441 _icds = (struct vendor_icd*)realloc(_icds, _num_icds * sizeof(struct vendor_icd));
443 debug(D_WARN, "%d valid vendor(s)!", _num_icds);
454 static pthread_once_t once_init = PTHREAD_ONCE_INIT;
455 static inline void _initClIcd( void ) {
459 pthread_once(&once_init, &__initClIcd);
461 /* No pthread, assuming no concurrency */
467 #pragma GCC visibility pop
468 #define hidden_alias(name) \
469 typeof(name) name##_hid __attribute__ ((alias (#name), visibility("hidden")))
471 CL_API_ENTRY void * CL_API_CALL
472 clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 {
475 if( func_name == NULL )
477 cl_uint suffix_length;
479 void * return_value=NULL;
480 struct func_desc const * fn=&function_description[0];
481 while (fn->name != NULL) {
482 if (strcmp(func_name, fn->name)==0)
486 for(i=0; i<_num_picds; i++) {
487 suffix_length = strlen(_picds[i].extension_suffix);
488 if( suffix_length > strlen(func_name) )
490 if(strcmp(_picds[i].extension_suffix, &func_name[strlen(func_name)-suffix_length]) == 0)
491 RETURN((*_picds[i].vicd->ext_fn_ptr)(func_name));
493 RETURN(return_value);
495 hidden_alias(clGetExtensionFunctionAddress);
497 CL_API_ENTRY cl_int CL_API_CALL
498 clGetPlatformIDs(cl_uint num_entries,
499 cl_platform_id * platforms,
500 cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0 {
503 if( platforms == NULL && num_platforms == NULL )
504 RETURN(CL_INVALID_VALUE);
505 if( num_entries == 0 && platforms != NULL )
506 RETURN(CL_INVALID_VALUE);
508 RETURN(CL_PLATFORM_NOT_FOUND_KHR);
511 if( num_platforms != NULL ){
512 *num_platforms = _num_picds;
514 if( platforms != NULL ) {
515 cl_uint n_platforms = _num_picds < num_entries ? _num_picds : num_entries;
516 for( i=0; i<n_platforms; i++) {
517 *(platforms++) = _picds[i].pid;
522 hidden_alias(clGetPlatformIDs);
524 CL_API_ENTRY cl_context CL_API_CALL
525 clCreateContext(const cl_context_properties * properties ,
526 cl_uint num_devices ,
527 const cl_device_id * devices ,
528 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
530 cl_int * errcode_ret ){
534 if( properties != NULL){
535 while( properties[i] != 0 ) {
536 if( properties[i] == CL_CONTEXT_PLATFORM )
537 RETURN(((struct _cl_platform_id *) properties[i+1])
538 ->dispatch->clCreateContext(properties, num_devices, devices,
539 pfn_notify, user_data, errcode_ret));
543 if(devices == NULL || num_devices == 0) {
545 *errcode_ret = CL_INVALID_VALUE;
549 RETURN(((struct _cl_device_id *)devices[0])
550 ->dispatch->clCreateContext(properties, num_devices, devices,
551 pfn_notify, user_data, errcode_ret));
553 hidden_alias(clCreateContext);
555 CL_API_ENTRY cl_context CL_API_CALL
556 clCreateContextFromType(const cl_context_properties * properties ,
557 cl_device_type device_type ,
558 void (CL_CALLBACK * pfn_notify )(const char *, const void *, size_t, void *),
560 cl_int * errcode_ret ){
564 if( properties != NULL){
565 while( properties[i] != 0 ) {
566 if( properties[i] == CL_CONTEXT_PLATFORM )
567 if (properties[i+1] == 0) {
570 return ((struct _cl_platform_id *) properties[i+1])
571 ->dispatch->clCreateContextFromType(properties, device_type,
572 pfn_notify, user_data, errcode_ret);
576 /* if properties is null, the selected platform is implementation dependant
577 * We will use the first one if any
579 if(_num_picds == 0) {
581 *errcode_ret = CL_INVALID_VALUE;
585 RETURN(_picds[0].pid->dispatch->clCreateContextFromType
586 (properties, device_type, pfn_notify, user_data, errcode_ret));
590 *errcode_ret = CL_INVALID_PLATFORM;
594 hidden_alias(clCreateContextFromType);
596 CL_API_ENTRY cl_int CL_API_CALL
597 clGetGLContextInfoKHR(const cl_context_properties * properties ,
598 cl_gl_context_info param_name ,
599 size_t param_value_size ,
601 size_t * param_value_size_ret ){
605 if( properties != NULL){
606 while( properties[i] != 0 ) {
607 if( properties[i] == CL_CONTEXT_PLATFORM )
608 RETURN(((struct _cl_platform_id *) properties[i+1])
609 ->dispatch->clGetGLContextInfoKHR(properties, param_name,
610 param_value_size, param_value, param_value_size_ret));
614 RETURN(CL_INVALID_PLATFORM);
616 hidden_alias(clGetGLContextInfoKHR);
618 CL_API_ENTRY cl_int CL_API_CALL
619 clWaitForEvents(cl_uint num_events ,
620 const cl_event * event_list ){
622 if( num_events == 0 || event_list == NULL )
623 RETURN(CL_INVALID_VALUE);
624 RETURN(((struct _cl_event *)event_list[0])
625 ->dispatch->clWaitForEvents(num_events, event_list));
627 hidden_alias(clWaitForEvents);
629 CL_API_ENTRY cl_int CL_API_CALL
630 clUnloadCompiler( void ){
634 hidden_alias(clUnloadCompiler);