1 /* Collect all available information on all available devices
2 * on all available OpenCL platforms present in the system
8 /* We will want to check for symbols in the OpenCL library.
9 * On Windows, we must get the module handle for it, on Unix-like
10 * systems we can just use RTLD_DEFAULT
14 # define dlsym GetProcAddress
15 # define DL_MODULE GetModuleHandle("OpenCL")
18 # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */
21 /* Load STDC format macros (PRI*), or define them
22 * for those crappy, non-standard compilers
24 #include "fmtmacros.h"
26 // More support for the horrible MS C compiler
28 #include "ms_support.h"
41 #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar))
44 #define UNUSED(x) x __attribute__((unused))
47 struct platform_data {
48 char *pname; /* CL_PLATFORM_NAME */
49 char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */
50 cl_uint ndevs; /* number of devices */
51 cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */
54 struct platform_info_checks {
57 cl_bool has_amd_object_metadata;
60 struct platform_list {
61 /* Number of platforms in the system */
62 cl_uint num_platforms;
63 /* Total number of devices across all platforms */
65 /* Number of devices allocated in all_devs array */
67 /* Highest OpenCL version supported by any platform.
68 * If the OpenCL library / ICD loader only supports
69 * a lower version, problems may arise (such as
70 * API calls causing segfaults or any other unexpected
73 cl_uint max_plat_version;
74 /* Largest number of devices on any platform */
76 /* Length of the longest platform sname */
78 /* Array of platform IDs */
79 cl_platform_id *platform;
80 /* Array of device IDs (across all platforms) */
81 cl_device_id *all_devs;
82 /* Array of offsets in all_devs where the devices
83 * of each platform begin */
85 /* Array of clinfo-specific platform data */
86 struct platform_data *pdata;
87 /* Arrau of clinfo-specifici platform checks */
88 struct platform_info_checks *platform_checks;
92 init_plist(struct platform_list *plist)
94 plist->num_platforms = 0;
95 plist->ndevs_total = 0;
96 plist->alloc_devs = 0;
97 plist->max_plat_version = 0;
98 plist->platform = NULL;
99 plist->all_devs = NULL;
100 plist->dev_offset = NULL;
102 plist->platform_checks = NULL;
105 void plist_devs_reserve(struct platform_list *plist, cl_uint amount)
107 if (amount > plist->alloc_devs) {
108 REALLOC(plist->all_devs, amount, "all devices");
109 plist->alloc_devs = amount;
115 alloc_plist(struct platform_list *plist)
117 ALLOC(plist->platform, plist->num_platforms, "platform IDs");
118 ALLOC(plist->dev_offset, plist->num_platforms, "platform device list offset");
119 /* The actual sizing for this will change as we gather platform info,
120 * but assume at least one device per platform
122 plist_devs_reserve(plist, plist->num_platforms);
123 ALLOC(plist->pdata, plist->num_platforms, "platform data");
124 ALLOC(plist->platform_checks, plist->num_platforms, "platform checks data");
127 free_plist(struct platform_list *plist)
129 free(plist->platform);
130 free(plist->all_devs);
131 free(plist->dev_offset);
133 free(plist->platform_checks);
138 get_platform_devs(const struct platform_list *plist, cl_uint p)
140 return plist->all_devs + plist->dev_offset[p];
144 get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d)
146 return get_platform_devs(plist, p)[d];
149 /* Data for the OpenCL library / ICD loader */
151 /* auto-detected OpenCL version support for the ICD loader */
152 cl_uint detected_version;
153 /* OpenCL version support declared by the ICD loader */
154 cl_uint reported_version;
157 /* line prefix, used to identify the platform/device for each
158 * device property in RAW output mode */
162 #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \
163 /* check if the issue is with param size */ \
164 if (output->check_size && ret->err == CL_INVALID_VALUE) { \
166 if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \
167 REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \
172 static const char unk[] = "Unknown";
173 static const char none[] = "None";
174 static const char none_raw[] = "CL_NONE";
175 static const char na[] = "n/a"; // not available
176 static const char na_wrap[] = "(n/a)"; // not available
177 static const char core[] = "core";
179 static const char bytes_str[] = " bytes";
180 static const char pixels_str[] = " pixels";
181 static const char images_str[] = " images";
183 static const char* bool_str[] = { "No", "Yes" };
184 static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" };
186 static const char* endian_str[] = { "Big-Endian", "Little-Endian" };
188 static const cl_device_type devtype[] = { 0,
189 CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU,
190 CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL };
192 const size_t devtype_count = ARRAY_SIZE(devtype);
193 /* number of actual device types, without ALL */
194 const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1;
196 static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" };
197 static const char* device_type_raw_str[] = { unk,
198 "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU",
199 "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL"
202 static const char* partition_type_str[] = {
203 none, "equally", "by counts", "by affinity domain", "by names (Intel)"
205 static const char* partition_type_raw_str[] = {
207 "CL_DEVICE_PARTITION_EQUALLY_EXT",
208 "CL_DEVICE_PARTITION_BY_COUNTS_EXT",
209 "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT",
210 "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT"
213 static const char numa[] = "NUMA";
214 static const char l1cache[] = "L1 cache";
215 static const char l2cache[] = "L2 cache";
216 static const char l3cache[] = "L3 cache";
217 static const char l4cache[] = "L4 cache";
219 static const char* affinity_domain_str[] = {
220 numa, l4cache, l3cache, l2cache, l1cache, "next partitionable"
223 static const char* affinity_domain_ext_str[] = {
224 numa, l4cache, l3cache, l2cache, l1cache, "next fissionable"
227 static const char* affinity_domain_raw_str[] = {
228 "CL_DEVICE_AFFINITY_DOMAIN_NUMA",
229 "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE",
230 "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE",
231 "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE",
232 "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE",
233 "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE"
236 static const char* affinity_domain_raw_ext_str[] = {
237 "CL_AFFINITY_DOMAIN_NUMA_EXT",
238 "CL_AFFINITY_DOMAIN_L4_CACHE_EXT",
239 "CL_AFFINITY_DOMAIN_L3_CACHE_EXT",
240 "CL_AFFINITY_DOMAIN_L2_CACHE_EXT",
241 "CL_AFFINITY_DOMAIN_L1_CACHE_EXT",
242 "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT"
245 const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str);
247 static const char *terminate_capability_str[] = {
251 static const char *terminate_capability_raw_str[] = {
252 "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR"
255 const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str);
257 static const char* fp_conf_str[] = {
258 "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero",
259 "Round to infinity", "IEEE754-2008 fused multiply-add",
260 "Support is emulated in software",
261 "Correctly-rounded divide and sqrt operations"
264 static const char* fp_conf_raw_str[] = {
267 "CL_FP_ROUND_TO_NEAREST",
268 "CL_FP_ROUND_TO_ZERO",
269 "CL_FP_ROUND_TO_INF",
272 "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT"
275 const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str);
277 static const char* svm_cap_str[] = {
278 "Coarse-grained buffer sharing",
279 "Fine-grained buffer sharing",
280 "Fine-grained system sharing",
284 static const char* svm_cap_raw_str[] = {
285 "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER",
286 "CL_DEVICE_SVM_FINE_GRAIN_BUFFER",
287 "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM",
288 "CL_DEVICE_SVM_ATOMICS",
291 const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str);
293 /* SI suffixes for memory sizes. Note that in OpenCL most of them are
294 * passed via a cl_ulong, which at most can mode 16 EiB, but hey,
295 * let's be forward-thinking ;-)
297 static const char* memsfx[] = {
298 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
301 const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1;
303 static const char* lmem_type_str[] = { none, "Local", "Global" };
304 static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" };
305 static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" };
306 static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" };
308 static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" };
309 static const char* queue_prop_raw_str[] = {
310 "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
311 "CL_QUEUE_PROFILING_ENABLE"
314 const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str);
316 static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" };
317 static const char* execap_raw_str[] = {
319 "CL_EXEC_NATIVE_KERNEL"
322 const size_t execap_count = ARRAY_SIZE(execap_str);
324 static const char* sources[] = {
325 "#define GWO(type) global type* restrict\n",
326 "#define GRO(type) global const type* restrict\n",
327 "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n",
328 "#define _KRN(T, N) kernel void sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n",
329 "#define KRN(N) _KRN(float, N)\n",
330 "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n",
333 const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline)
335 return output->mode == CLINFO_HUMAN ?
336 (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") :
337 (these_are_offline ? "#OFFDEVICES" : "#DEVICES");
340 const char *not_specified(const struct opt_out *output)
342 return output->mode == CLINFO_HUMAN ?
346 const char *no_plat(const struct opt_out *output)
348 return output->mode == CLINFO_HUMAN ?
350 "CL_INVALID_PLATFORM";
353 const char *invalid_dev_type(const struct opt_out *output)
355 return output->mode == CLINFO_HUMAN ?
356 "Invalid device type for platform" :
357 "CL_INVALID_DEVICE_TYPE";
360 const char *invalid_dev_value(const struct opt_out *output)
362 return output->mode == CLINFO_HUMAN ?
363 "Invalid device type value for platform" :
367 const char *no_dev_found(const struct opt_out *output)
369 return output->mode == CLINFO_HUMAN ?
370 "No devices found in platform" :
371 "CL_DEVICE_NOT_FOUND";
374 const char *no_dev_avail(const struct opt_out *output)
376 return output->mode == CLINFO_HUMAN ?
377 "No devices available in platform" :
378 "CL_DEVICE_NOT_AVAILABLE";
381 /* OpenCL context interop names */
383 typedef struct cl_interop_name {
386 /* 5 because that's the largest we know of,
387 * 2 because it's HUMAN, RAW */
388 const char *value[5][2];
391 static const cl_interop_name cl_interop_names[] = {
392 { /* cl_khr_gl_sharing */
394 CL_CGL_SHAREGROUP_KHR,
396 { "GL", "CL_GL_CONTEXT_KHR" },
397 { "EGL", "CL_EGL_DISPALY_KHR" },
398 { "GLX", "CL_GLX_DISPLAY_KHR" },
399 { "WGL", "CL_WGL_HDC_KHR" },
400 { "CGL", "CL_CGL_SHAREGROUP_KHR" }
403 { /* cl_khr_dx9_media_sharing */
404 CL_CONTEXT_ADAPTER_D3D9_KHR,
405 CL_CONTEXT_ADAPTER_DXVA_KHR,
407 { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" },
408 { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" },
409 { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" }
412 { /* cl_khr_d3d10_sharing */
413 CL_CONTEXT_D3D10_DEVICE_KHR,
414 CL_CONTEXT_D3D10_DEVICE_KHR,
416 { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" }
419 { /* cl_khr_d3d11_sharing */
420 CL_CONTEXT_D3D11_DEVICE_KHR,
421 CL_CONTEXT_D3D11_DEVICE_KHR,
423 { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" }
426 /* cl_intel_dx9_media_sharing is split in two because the allowed values are not consecutive */
427 { /* cl_intel_dx9_media_sharing part 1 */
428 CL_CONTEXT_D3D9_DEVICE_INTEL,
429 CL_CONTEXT_D3D9_DEVICE_INTEL,
431 { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" }
434 { /* cl_intel_dx9_media_sharing part 2 */
435 CL_CONTEXT_D3D9EX_DEVICE_INTEL,
436 CL_CONTEXT_DXVA_DEVICE_INTEL,
438 { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" },
439 { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" }
442 { /* cl_intel_va_api_media_sharing */
443 CL_CONTEXT_VA_API_DISPLAY_INTEL,
444 CL_CONTEXT_VA_API_DISPLAY_INTEL,
446 { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" }
451 const size_t num_known_interops = ARRAY_SIZE(cl_interop_names);
455 #define I0_STR "%-48s "
456 #define I1_STR " %-46s "
457 #define I2_STR " %-44s "
459 static const char empty_str[] = "";
460 static const char spc_str[] = " ";
461 static const char times_str[] = "x";
462 static const char comma_str[] = ", ";
463 static const char vbar_str[] = " | ";
465 const char *cur_sfx = empty_str;
467 /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version.
468 * Returns an unsigned integer in the form major*10 + minor
471 getOpenCLVersion(const char *version)
475 const char *from = version;
477 parse = strtol(from, &next, 10);
481 // skip the dot TODO should we actually check for the dot?
483 parse = strtol(from, &next, 10);
490 #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10)
492 /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero,
493 * affixing cur_sfx */
495 void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err)
497 printf("%s" I1_STR "%s%s\n",
499 (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf),
500 err ? empty_str : cur_sfx);
504 platform_info_str(struct platform_info_ret *ret,
505 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
506 const struct opt_out* UNUSED(output))
508 GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat);
512 platform_info_ulong(struct platform_info_ret *ret,
513 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
514 const struct opt_out *output)
516 ret->err = REPORT_ERROR_LOC(ret,
517 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL),
519 CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat);
520 strbuf_printf(&ret->str, "%" PRIu64, ret->value.u64);
524 platform_info_sz(struct platform_info_ret *ret,
525 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
526 const struct opt_out *output)
528 ret->err = REPORT_ERROR_LOC(ret,
529 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL),
531 CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat);
532 strbuf_printf(&ret->str, "%" PRIuS, ret->value.s);
535 struct platform_info_traits {
536 cl_platform_info param; // CL_PLATFORM_*
537 const char *sname; // "CL_PLATFORM_*"
538 const char *pname; // "Platform *"
539 const char *sfx; // suffix for the output in non-raw mode
540 /* pointer to function that retrieves the parameter */
541 void (*show_func)(struct platform_info_ret *,
542 const struct info_loc *, const struct platform_info_checks *,
543 const struct opt_out *);
544 /* pointer to function that checks if the parameter should be retrieved */
545 cl_bool (*check_func)(const struct platform_info_checks *);
548 cl_bool khr_icd_p(const struct platform_info_checks *chk)
550 return chk->has_khr_icd;
553 cl_bool plat_is_12(const struct platform_info_checks *chk)
555 return !(chk->plat_version < 12);
558 cl_bool plat_is_20(const struct platform_info_checks *chk)
560 return !(chk->plat_version < 20);
563 cl_bool plat_is_21(const struct platform_info_checks *chk)
565 return !(chk->plat_version < 21);
568 cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk)
570 return chk->has_amd_object_metadata;
574 #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr }
575 #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL }
576 struct platform_info_traits pinfo_traits[] = {
577 PINFO(CL_PLATFORM_NAME, "Name", NULL, str),
578 PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str),
579 PINFO(CL_PLATFORM_VERSION, "Version", NULL, str),
580 PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str),
581 PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str),
582 PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata),
583 PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21),
584 PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p)
587 /* Collect (and optionally show) infomation on a specific platform,
588 * initializing relevant arrays and optionally showing the collected
592 gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output)
596 struct platform_data *pdata = plist->pdata + p;
597 struct platform_info_checks *pinfo_checks = plist->platform_checks + p;
598 struct platform_info_ret ret;
601 pinfo_checks->plat_version = 10;
603 INIT_RET(ret, "platform");
604 reset_loc(&loc, __func__);
605 loc.plat = plist->platform[p];
607 for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) {
608 const struct platform_info_traits *traits = pinfo_traits + loc.line;
610 /* checked is true if there was no condition to check for, or if the
611 * condition was satisfied
613 int checked = !(traits->check_func && !traits->check_func(pinfo_checks));
615 if (output->cond == COND_PROP_CHECK && !checked)
618 loc.sname = traits->sname;
619 loc.pname = (output->mode == CLINFO_HUMAN ?
620 traits->pname : traits->sname);
621 loc.param.plat = traits->param;
623 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
625 ret.str.buf[0] = '\0';
626 ret.err_str.buf[0] = '\0';
627 traits->show_func(&ret, &loc, pinfo_checks, output);
629 /* The property is skipped if this was a conditional property,
630 * unsatisfied, there was an error retrieving it and cond_prop_mode is not
633 if (ret.err && !checked && output->cond != COND_PROP_SHOW)
636 /* when only listing, do not print anything, we're just gathering
638 if (output->detailed) {
639 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
645 /* post-processing */
647 switch (traits->param) {
648 case CL_PLATFORM_NAME:
649 /* Store name for future reference */
650 len = strlen(ret.str.buf);
651 ALLOC(pdata->pname, len+1, "platform name copy");
652 /* memcpy instead of strncpy since we already have the len
653 * and memcpy is possibly more optimized */
654 memcpy(pdata->pname, ret.str.buf, len);
655 pdata->pname[len] = '\0';
657 case CL_PLATFORM_VERSION:
658 /* compute numeric value for OpenCL version */
659 pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7);
661 case CL_PLATFORM_EXTENSIONS:
662 pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd");
663 pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata");
664 pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices");
666 case CL_PLATFORM_ICD_SUFFIX_KHR:
667 /* Store ICD suffix for future reference */
668 len = strlen(ret.str.buf);
669 ALLOC(pdata->sname, len+1, "platform ICD suffix copy");
670 /* memcpy instead of strncpy since we already have the len
671 * and memcpy is possibly more optimized */
672 memcpy(pdata->sname, ret.str.buf, len);
673 pdata->sname[len] = '\0';
681 if (pinfo_checks->plat_version > plist->max_plat_version)
682 plist->max_plat_version = pinfo_checks->plat_version;
684 /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */
687 ALLOC(pdata->sname, SNAME_MAX, "platform symbolic name");
688 snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p);
691 len = (cl_int)strlen(pdata->sname);
692 if (len > plist->max_sname_len)
693 plist->max_sname_len = len;
695 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs);
696 if (ret.err == CL_DEVICE_NOT_FOUND)
699 CHECK_ERROR(ret.err, "number of devices");
700 plist->ndevs_total += pdata->ndevs;
701 plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0;
702 plist_devs_reserve(plist, plist->ndevs_total);
704 if (pdata->ndevs > 0) {
705 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL,
707 plist->all_devs + plist->dev_offset[p], NULL);
710 if (pdata->ndevs > plist->max_devs)
711 plist->max_devs = pdata->ndevs;
717 * Device properties/extensions used in traits checks, and relevant functions
720 struct device_info_checks {
721 const struct platform_info_checks *pinfo_checks;
722 cl_device_type devtype;
723 cl_device_mem_cache_type cachetype;
724 cl_device_local_mem_type lmemtype;
725 cl_bool image_support;
726 cl_bool compiler_available;
731 char has_amd_svm[11];
732 char has_arm_svm[29];
733 char has_arm_core_id[15];
734 char has_fission[22];
735 char has_atomic_counters[26];
736 char has_image2d_buffer[27];
737 char has_il_program[18];
738 char has_intel_local_thread[30];
739 char has_intel_AME[36];
740 char has_intel_AVC_ME[43];
741 char has_intel_planar_yuv[20];
742 char has_intel_required_subgroup_size[32];
743 char has_altera_dev_temp[29];
746 char has_qcom_ext_host_ptr[21];
747 char has_simultaneous_sharing[30];
748 char has_subgroup_named_barrier[30];
749 char has_terminate_context[25];
753 #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \
755 return !!(chk->has_##ext[0]); \
758 DEFINE_EXT_CHECK(half)
759 DEFINE_EXT_CHECK(double)
761 DEFINE_EXT_CHECK(amd)
762 DEFINE_EXT_CHECK(amd_svm)
763 DEFINE_EXT_CHECK(arm_svm)
764 DEFINE_EXT_CHECK(arm_core_id)
765 DEFINE_EXT_CHECK(fission)
766 DEFINE_EXT_CHECK(atomic_counters)
767 DEFINE_EXT_CHECK(image2d_buffer)
768 DEFINE_EXT_CHECK(il_program)
769 DEFINE_EXT_CHECK(intel_local_thread)
770 DEFINE_EXT_CHECK(intel_AME)
771 DEFINE_EXT_CHECK(intel_AVC_ME)
772 DEFINE_EXT_CHECK(intel_planar_yuv)
773 DEFINE_EXT_CHECK(intel_required_subgroup_size)
774 DEFINE_EXT_CHECK(altera_dev_temp)
775 DEFINE_EXT_CHECK(p2p)
776 DEFINE_EXT_CHECK(spir)
777 DEFINE_EXT_CHECK(qcom_ext_host_ptr)
778 DEFINE_EXT_CHECK(simultaneous_sharing)
779 DEFINE_EXT_CHECK(subgroup_named_barrier)
780 DEFINE_EXT_CHECK(terminate_context)
782 /* In the version checks we negate the opposite conditions
783 * instead of double-negating the actual condition
786 // device supports 1.1
787 cl_bool dev_is_11(const struct device_info_checks *chk)
789 return !(chk->dev_version < 11);
793 // device supports 1.2
794 cl_bool dev_is_12(const struct device_info_checks *chk)
796 return !(chk->dev_version < 12);
799 // device supports 2.0
800 cl_bool dev_is_20(const struct device_info_checks *chk)
802 return !(chk->dev_version < 20);
805 // device supports 2.1
806 cl_bool dev_is_21(const struct device_info_checks *chk)
808 return !(chk->dev_version < 21);
811 // device does not support 2.0
812 cl_bool dev_not_20(const struct device_info_checks *chk)
814 return !(chk->dev_version >= 20);
818 cl_bool dev_is_gpu(const struct device_info_checks *chk)
820 return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
823 cl_bool dev_is_gpu_amd(const struct device_info_checks *chk)
825 return dev_is_gpu(chk) && dev_has_amd(chk);
828 /* Device supports cl_amd_device_attribute_query v4 */
829 cl_bool dev_has_amd_v4(const struct device_info_checks *chk)
831 /* We don't actually have a criterion to check if the device
832 * supports a specific version of an extension, so for the time
833 * being rely on them being GPU devices with cl_amd_device_attribute_query
834 * and the platform supporting OpenCL 2.0 or later
835 * TODO FIXME tune criteria
837 return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks);
840 /* Device supports cl_arm_core_id v2 */
841 cl_bool dev_has_arm_core_id_v2(const struct device_info_checks *chk)
843 /* We don't actually have a criterion to check if the device
844 * supports a specific version of an extension, so for the time
845 * being rely on them having cl_arm_core_id and the platform
846 * supporting OpenCL 1.2 or later
847 * TODO FIXME tune criteria
849 return dev_has_arm_core_id(chk) && plat_is_12(chk->pinfo_checks);
852 cl_bool dev_has_svm(const struct device_info_checks *chk)
854 return dev_is_20(chk) || dev_has_amd_svm(chk);
857 cl_bool dev_has_partition(const struct device_info_checks *chk)
859 return dev_is_12(chk) || dev_has_fission(chk);
862 cl_bool dev_has_cache(const struct device_info_checks *chk)
864 return chk->cachetype != CL_NONE;
867 cl_bool dev_has_lmem(const struct device_info_checks *chk)
869 return chk->lmemtype != CL_NONE;
872 cl_bool dev_has_il(const struct device_info_checks *chk)
874 return dev_is_21(chk) || dev_has_il_program(chk);
877 cl_bool dev_has_images(const struct device_info_checks *chk)
879 return chk->image_support;
882 cl_bool dev_has_images_12(const struct device_info_checks *chk)
884 return dev_has_images(chk) && dev_is_12(chk);
887 cl_bool dev_has_images_20(const struct device_info_checks *chk)
889 return dev_has_images(chk) && dev_is_20(chk);
892 cl_bool dev_has_compiler(const struct device_info_checks *chk)
894 return chk->compiler_available;
897 cl_bool dev_has_compiler_11(const struct device_info_checks *chk)
899 return dev_is_11(chk) && dev_has_compiler(chk);
903 void identify_device_extensions(const char *extensions, struct device_info_checks *chk)
905 #define _HAS_EXT(ext) (strstr(extensions, ext))
906 #define HAS_EXT(ext) _HAS_EXT(#ext)
907 #define CPY_EXT(what, ext) do { \
908 strncpy(chk->has_##what, has, sizeof(ext)); \
909 chk->has_##what[sizeof(ext)-1] = '\0'; \
911 #define CHECK_EXT(what, ext) do { \
912 has = _HAS_EXT(#ext); \
913 if (has) CPY_EXT(what, #ext); \
917 CHECK_EXT(half, cl_khr_fp16);
918 CHECK_EXT(spir, cl_khr_spir);
919 CHECK_EXT(double, cl_khr_fp64);
920 if (!dev_has_double(chk))
921 CHECK_EXT(double, cl_amd_fp64);
922 if (!dev_has_double(chk))
923 CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
924 CHECK_EXT(nv, cl_nv_device_attribute_query);
925 CHECK_EXT(amd, cl_amd_device_attribute_query);
926 CHECK_EXT(amd_svm, cl_amd_svm);
927 CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory);
928 CHECK_EXT(arm_core_id, cl_arm_core_id);
929 CHECK_EXT(fission, cl_ext_device_fission);
930 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
931 if (dev_has_atomic_counters(chk))
932 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
933 CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
934 CHECK_EXT(il_program, cl_khr_il_program);
935 CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
936 CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation);
937 CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation);
938 CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv);
939 CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size);
940 CHECK_EXT(altera_dev_temp, cl_altera_device_temperature);
941 CHECK_EXT(p2p, cl_amd_copy_buffer_p2p);
942 CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr);
943 CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing);
944 CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier);
945 CHECK_EXT(terminate_context, cl_khr_terminate_context);
950 * Device info print functions
953 #define _GET_VAL(ret, loc, val) \
954 ret->err = REPORT_ERROR_LOC(ret, \
955 clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \
957 CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev);
959 #define _GET_VAL_ARRAY(ret, loc) \
960 ret->err = REPORT_ERROR_LOC(ret, \
961 clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \
962 loc, "get number of %s"); \
963 numval = szval/sizeof(*val); \
965 REALLOC(val, numval, loc->sname); \
966 ret->err = REPORT_ERROR_LOC(ret, \
967 clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \
969 if (ret->err) { free(val); val = NULL; } \
972 #define GET_VAL(ret, loc, field) do { \
973 _GET_VAL(ret, (loc), ret->value.field) \
976 #define GET_VAL_ARRAY(ret, loc) do { \
977 _GET_VAL_ARRAY(ret, (loc)) \
980 #define DEFINE_DEVINFO_FETCH(type, field) \
982 device_fetch_##type(struct device_info_ret *ret, \
983 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \
984 const struct opt_out *output) \
986 GET_VAL(ret, loc, field); \
987 return ret->value.field; \
990 DEFINE_DEVINFO_FETCH(size_t, s)
991 DEFINE_DEVINFO_FETCH(cl_bool, b)
992 DEFINE_DEVINFO_FETCH(cl_uint, u32)
993 DEFINE_DEVINFO_FETCH(cl_ulong, u64)
994 DEFINE_DEVINFO_FETCH(cl_device_type, devtype)
995 DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype)
996 DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype)
997 DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo)
998 DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain)
999 DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig)
1000 DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop)
1001 DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap)
1002 DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap)
1003 DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap)
1005 #define DEV_FETCH_LOC(type, var, loc) \
1006 type var = device_fetch_##type(ret, loc, chk, output)
1007 #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc)
1009 #define FMT_VAL(ret, fmt, val) if (!ret->err) strbuf_printf(&ret->str, fmt, val)
1011 #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \
1013 device_info_##how(struct device_info_ret *ret, \
1014 const struct info_loc *loc, const struct device_info_checks* chk, \
1015 const struct opt_out *output) \
1017 DEV_FETCH(type, val); \
1018 if (!ret->err) FMT_VAL(ret, fmt, val); \
1021 DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32)
1022 DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, "%#" PRIx32)
1023 DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64)
1024 DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS)
1027 device_info_str(struct device_info_ret *ret,
1028 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1029 const struct opt_out* UNUSED(output))
1031 GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev);
1035 device_info_bool(struct device_info_ret *ret,
1036 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1037 const struct opt_out *output)
1039 DEV_FETCH(cl_bool, val);
1041 const char * const * str = (output->mode == CLINFO_HUMAN ?
1042 bool_str : bool_raw_str);
1043 strbuf_printf(&ret->str, "%s", str[val]);
1048 device_info_bits(struct device_info_ret *ret,
1049 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1050 const struct opt_out *output)
1052 DEV_FETCH(cl_uint, val);
1054 strbuf_printf(&ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8);
1058 size_t strbuf_mem(struct _strbuf *str, cl_ulong val, size_t szval)
1060 double dbl = (double)val;
1062 while (dbl > 1024 && sfx < memsfx_end) {
1066 return sprintf(str->buf + szval, " (%.4lg%s)",
1071 device_info_mem(struct device_info_ret *ret,
1072 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1073 const struct opt_out *output)
1075 DEV_FETCH(cl_ulong, val);
1077 size_t szval = strbuf_printf(&ret->str, "%" PRIu64, val);
1078 if (output->mode == CLINFO_HUMAN && val > 1024)
1079 strbuf_mem(&ret->str, val, szval);
1084 device_info_mem_int(struct device_info_ret *ret,
1085 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1086 const struct opt_out *output)
1088 DEV_FETCH(cl_uint, val);
1090 size_t szval = strbuf_printf(&ret->str, "%" PRIu32, val);
1091 if (output->mode == CLINFO_HUMAN && val > 1024)
1092 strbuf_mem(&ret->str, val, szval);
1097 device_info_mem_sz(struct device_info_ret *ret,
1098 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1099 const struct opt_out *output)
1101 DEV_FETCH(size_t, val);
1103 size_t szval = strbuf_printf(&ret->str, "%" PRIuS, val);
1104 if (output->mode == CLINFO_HUMAN && val > 1024)
1105 strbuf_mem(&ret->str, val, szval);
1110 device_info_free_mem_amd(struct device_info_ret *ret,
1111 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1112 const struct opt_out *output)
1115 size_t szval = 0, numval = 0;
1116 GET_VAL_ARRAY(ret, loc);
1120 for (cursor = 0; cursor < numval; ++cursor) {
1122 ret->str.buf[szval] = ' ';
1125 szval += sprintf(ret->str.buf + szval, "%" PRIuS, val[cursor]);
1126 if (output->mode == CLINFO_HUMAN)
1127 szval += strbuf_mem(&ret->str, val[cursor]*UINT64_C(1024), szval);
1129 // TODO: ret->value.??? = val;
1135 device_info_time_offset(struct device_info_ret *ret,
1136 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1137 const struct opt_out *output)
1139 DEV_FETCH(cl_ulong, val);
1142 time_t time = val/UINT64_C(1000000000);
1143 szval += strbuf_printf(&ret->str, "%" PRIu64 "ns (", val);
1144 szval += bufcpy(&ret->str, szval, ctime(&time));
1145 /* overwrite ctime's newline with the closing parenthesis */
1146 if (szval < ret->str.sz)
1147 ret->str.buf[szval - 1] = ')';
1152 device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep,
1153 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1154 const struct opt_out *output)
1157 size_t szval = 0, numval = 0;
1158 GET_VAL_ARRAY(ret, loc);
1161 set_separator(output->mode == CLINFO_HUMAN ? human_sep : spc_str);
1163 for (counter = 0; counter < numval; ++counter) {
1164 add_separator(&ret->str, &szval);
1165 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%" PRIuS, val[counter]);
1166 if (szval >= ret->str.sz) {
1167 trunc_strbuf(&ret->str);
1171 // TODO: ret->value.??? = val;
1177 device_info_szptr_times(struct device_info_ret *ret,
1178 const struct info_loc *loc, const struct device_info_checks* chk,
1179 const struct opt_out *output)
1181 device_info_szptr_sep(ret, times_str, loc, chk, output);
1185 device_info_szptr_comma(struct device_info_ret *ret,
1186 const struct info_loc *loc, const struct device_info_checks* chk,
1187 const struct opt_out *output)
1189 device_info_szptr_sep(ret, comma_str, loc, chk, output);
1193 getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz,
1194 const struct opt_out* UNUSED(output))
1198 cl_context_properties ctxpft[] = {
1199 CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat,
1202 cl_context ctx = NULL;
1203 cl_program prg = NULL;
1204 cl_kernel krn = NULL;
1206 ret->err = CL_SUCCESS;
1208 ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err);
1209 if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out;
1210 prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err);
1211 if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out;
1212 ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL);
1213 log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program");
1215 /* for a program build failure, dump the log to stderr before bailing */
1216 if (log_err == CL_BUILD_PROGRAM_FAILURE) {
1217 struct _strbuf logbuf;
1218 init_strbuf(&logbuf);
1219 GET_STRING(&logbuf, ret->err,
1220 clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev);
1221 if (ret->err == CL_SUCCESS) {
1224 fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr);
1225 fputs(logbuf.buf, stderr);
1228 free_strbuf(&logbuf);
1233 for (cursor = 0; cursor < wgm_sz; ++cursor) {
1234 strbuf_printf(&ret->str, "sum%u", 1<<cursor);
1236 ret->str.buf[3] = 0; // scalar kernel is called 'sum'
1237 krn = clCreateKernel(prg, ret->str.buf, &ret->err);
1238 if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out;
1239 ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1240 sizeof(*wgm), wgm + cursor, NULL);
1241 if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out;
1242 clReleaseKernel(krn);
1248 clReleaseKernel(krn);
1250 clReleaseProgram(prg);
1252 clReleaseContext(ctx);
1257 device_info_wg(struct device_info_ret *ret,
1258 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1259 const struct opt_out *output)
1261 /* preferred workgroup size multiple for each kernel
1262 * have not found a platform where the WG multiple changes,
1263 * but keep this flexible (this can grow up to 5)
1265 #define NUM_KERNELS 1
1266 size_t wgm[NUM_KERNELS] = {0};
1268 getWGsizes(ret, loc, wgm, NUM_KERNELS, output);
1270 strbuf_printf(&ret->str, "%" PRIuS, wgm[0]);
1272 ret->value.s = wgm[0];
1276 device_info_img_sz_2d(struct device_info_ret *ret,
1277 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1278 const struct opt_out *output)
1280 struct info_loc loc2 = *loc;
1281 size_t width = 0, height = 0;
1282 _GET_VAL(ret, loc, height); /* HEIGHT */
1284 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH);
1285 _GET_VAL(ret, &loc2, width);
1287 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1290 ret->value.u32v.s[0] = width;
1291 ret->value.u32v.s[1] = height;
1295 device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret,
1296 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1297 const struct opt_out *output)
1299 struct info_loc loc2 = *loc;
1300 size_t width = 0, height = 0;
1301 _GET_VAL(ret, loc, height); /* HEIGHT */
1303 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL);
1304 _GET_VAL(ret, &loc2, width);
1306 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1309 ret->value.u32v.s[0] = width;
1310 ret->value.u32v.s[1] = height;
1315 device_info_img_sz_3d(struct device_info_ret *ret,
1316 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1317 const struct opt_out *output)
1319 struct info_loc loc2 = *loc;
1320 size_t width = 0, height = 0, depth = 0;
1321 _GET_VAL(ret, loc, height); /* HEIGHT */
1323 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH);
1324 _GET_VAL(ret, &loc2, width);
1326 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH);
1327 _GET_VAL(ret, &loc2, depth);
1329 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS,
1330 width, height, depth);
1334 ret->value.u32v.s[0] = width;
1335 ret->value.u32v.s[1] = height;
1336 ret->value.u32v.s[2] = depth;
1341 device_info_devtype(struct device_info_ret *ret,
1342 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1343 const struct opt_out *output)
1345 DEV_FETCH(cl_device_type, val);
1347 /* iterate over device type strings, appending their textual form
1349 cl_uint i = (cl_uint)actual_devtype_count;
1350 const char * const *devstr = (output->mode == CLINFO_HUMAN ?
1351 device_type_str : device_type_raw_str);
1353 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1354 for (; i > 0; --i) {
1355 /* assemble CL_DEVICE_TYPE_* from index i */
1356 cl_device_type cur = (cl_device_type)(1) << (i-1);
1358 /* match: add separator if not first match */
1359 add_separator(&ret->str, &szval);
1360 szval += bufcpy(&ret->str, szval, devstr[i]);
1363 /* check for extra bits */
1364 if (szval < ret->str.sz) {
1365 cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1;
1366 cl_device_type extra = val & ~known_mask;
1368 add_separator(&ret->str, &szval);
1369 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1376 device_info_cachetype(struct device_info_ret *ret,
1377 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1378 const struct opt_out *output)
1380 DEV_FETCH(cl_device_mem_cache_type, val);
1382 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1383 cache_type_str : cache_type_raw_str);
1384 bufcpy(&ret->str, 0, ar[val]);
1389 device_info_lmemtype(struct device_info_ret *ret,
1390 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1391 const struct opt_out *output)
1393 DEV_FETCH(cl_device_local_mem_type, val);
1395 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1396 lmem_type_str : lmem_type_raw_str);
1397 bufcpy(&ret->str, 0, ar[val]);
1399 ret->value.lmemtype = val;
1402 /* cl_arm_core_id */
1404 device_info_core_ids(struct device_info_ret *ret,
1405 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1406 const struct opt_out *output)
1408 DEV_FETCH(cl_ulong, val);
1411 /* The value is a bitfield where each set bit corresponds to a core ID
1412 * value that can be returned by the device-side function. We print them
1413 * here as ranges, such as 0-4, 8-12 */
1415 int range_start = -1;
1417 set_separator(empty_str);
1418 #define CORE_ID_END 64
1420 /* Find the start of the range */
1421 while ((cur_bit < CORE_ID_END) && !((val >> cur_bit) & 1))
1423 range_start = cur_bit++;
1425 /* Find the end of the range */
1426 while ((cur_bit < CORE_ID_END) && ((val >> cur_bit) & 1))
1429 /* print the range [range_start, cur_bit[ */
1430 if (range_start >= 0 && range_start < CORE_ID_END) {
1431 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1432 "%s%d", sep, range_start);
1433 if (cur_bit - range_start > 1)
1434 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1435 "-%d", cur_bit - 1);
1436 set_separator(comma_str);
1438 } while (cur_bit < CORE_ID_END);
1440 ret->value.u64 = val;
1443 /* stringify a cl_device_topology_amd */
1444 void devtopo_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo)
1446 switch (devtopo->raw.type) {
1450 case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD:
1451 strbuf_printf(&ret->str, "PCI-E, %02x:%02x.%u",
1452 (cl_uchar)(devtopo->pcie.bus),
1453 devtopo->pcie.device, devtopo->pcie.function);
1456 strbuf_printf(&ret->str, "<unknown (%u): %u %u %u %u %u>",
1458 devtopo->raw.data[0], devtopo->raw.data[1],
1459 devtopo->raw.data[2],
1460 devtopo->raw.data[3], devtopo->raw.data[4]);
1465 device_info_devtopo_amd(struct device_info_ret *ret,
1466 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1467 const struct opt_out *output)
1469 DEV_FETCH(cl_device_topology_amd, val);
1470 /* TODO how to do this in CLINFO_RAW mode */
1472 devtopo_str(ret, &val);
1476 /* we assemble a cl_device_topology_amd struct from the NVIDIA info */
1478 device_info_devtopo_nv(struct device_info_ret *ret,
1479 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1480 const struct opt_out *output)
1482 struct info_loc loc2 = *loc;
1483 cl_device_topology_amd devtopo;
1484 DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */
1486 devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
1487 devtopo.pcie.bus = val & 0xff;
1488 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV);
1489 _GET_VAL(ret, &loc2, val);
1492 devtopo.pcie.device = (val >> 3) & 0xff;
1493 devtopo.pcie.function = val & 7;
1494 devtopo_str(ret, &devtopo);
1496 ret->value.devtopo = devtopo;
1500 /* NVIDIA Compute Capability */
1502 device_info_cc_nv(struct device_info_ret *ret,
1503 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1504 const struct opt_out *output)
1506 struct info_loc loc2 = *loc;
1507 cl_uint major = 0, minor = 0;
1508 _GET_VAL(ret, loc, major); /* MAJOR */
1510 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
1511 _GET_VAL(ret, &loc2, minor);
1513 strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1516 ret->value.u32v.s[0] = major;
1517 ret->value.u32v.s[1] = minor;
1522 device_info_gfxip_amd(struct device_info_ret *ret,
1523 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1524 const struct opt_out *output)
1526 struct info_loc loc2 = *loc;
1527 cl_uint major = 0, minor = 0;
1528 _GET_VAL(ret, loc, major); /* MAJOR */
1530 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD);
1531 _GET_VAL(ret, &loc2, minor);
1533 strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1536 ret->value.u32v.s[0] = major;
1537 ret->value.u32v.s[1] = minor;
1541 /* Device Partition, CLINFO_HUMAN header */
1543 device_info_partition_header(struct device_info_ret *ret,
1544 const struct info_loc *UNUSED(loc), const struct device_info_checks *chk,
1545 const struct opt_out* UNUSED(output))
1547 cl_bool is_12 = dev_is_12(chk);
1548 cl_bool has_fission = dev_has_fission(chk);
1549 size_t szval = strbuf_printf(&ret->str, "(%s%s%s%s)",
1550 (is_12 ? core : empty_str),
1551 (is_12 && has_fission ? comma_str : empty_str),
1553 (!(is_12 || has_fission) ? na : empty_str));
1555 ret->err = CL_SUCCESS;
1557 if (szval >= ret->str.sz)
1558 trunc_strbuf(&ret->str);
1561 /* Device partition properties */
1563 device_info_partition_types(struct device_info_ret *ret,
1564 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1565 const struct opt_out *output)
1567 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1568 cl_device_partition_property *val = NULL;
1569 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1570 partition_type_str : partition_type_raw_str);
1572 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1574 GET_VAL_ARRAY(ret, loc);
1578 for (cursor = 0; cursor < numval; ++cursor) {
1581 /* add separator for values past the first */
1582 add_separator(&ret->str, &szval);
1584 switch (val[cursor]) {
1585 case 0: str_idx = 0; break;
1586 case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break;
1587 case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break;
1588 case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break;
1589 case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break;
1591 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIxPTR ")", val[cursor]);
1595 /* string length, minus _EXT */
1596 slen = strlen(ptstr[str_idx]);
1597 if (output->mode == CLINFO_RAW && str_idx > 0)
1599 szval += bufcpy_len(&ret->str, szval, ptstr[str_idx], slen);
1601 if (szval >= ret->str.sz) {
1602 trunc_strbuf(&ret->str);
1606 // TODO ret->value.??? = val
1612 device_info_partition_types_ext(struct device_info_ret *ret,
1613 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1614 const struct opt_out *output)
1616 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1617 cl_device_partition_property_ext *val = NULL;
1618 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1619 partition_type_str : partition_type_raw_str);
1621 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1623 GET_VAL_ARRAY(ret, loc);
1627 for (cursor = 0; cursor < numval; ++cursor) {
1630 /* add separator for values past the first */
1631 add_separator(&ret->str, &szval);
1633 switch (val[cursor]) {
1634 case 0: str_idx = 0; break;
1635 case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break;
1636 case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break;
1637 case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break;
1638 case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break;
1640 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIx64 ")", val[cursor]);
1645 slen = strlen(ptstr[str_idx]);
1646 strncpy(ret->str.buf + szval, ptstr[str_idx], slen);
1649 if (szval >= ret->str.sz) {
1650 trunc_strbuf(&ret->str);
1654 if (szval < ret->str.sz)
1655 ret->str.buf[szval] = '\0';
1656 // TODO ret->value.??? = val
1662 /* Device partition affinity domains */
1664 device_info_partition_affinities(struct device_info_ret *ret,
1665 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1666 const struct opt_out *output)
1668 DEV_FETCH(cl_device_affinity_domain, val);
1669 if (!ret->err && val) {
1670 /* iterate over affinity domain strings appending their textual form
1674 const char * const *affstr = (output->mode == CLINFO_HUMAN ?
1675 affinity_domain_str : affinity_domain_raw_str);
1676 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1677 for (i = 0; i < affinity_domain_count; ++i) {
1678 cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i;
1680 /* match: add separator if not first match */
1681 add_separator(&ret->str, &szval);
1682 szval += bufcpy(&ret->str, szval, affstr[i]);
1684 if (szval >= ret->str.sz)
1687 /* check for extra bits */
1688 if (szval < ret->str.sz) {
1689 cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1;
1690 cl_device_affinity_domain extra = val & ~known_mask;
1692 add_separator(&ret->str, &szval);
1693 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1700 device_info_partition_affinities_ext(struct device_info_ret *ret,
1701 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1702 const struct opt_out *output)
1704 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1705 cl_device_partition_property_ext *val = NULL;
1706 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1707 affinity_domain_ext_str : affinity_domain_raw_ext_str);
1709 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1711 GET_VAL_ARRAY(ret, loc);
1715 for (cursor = 0; cursor < numval; ++cursor) {
1718 /* add separator for values past the first */
1719 add_separator(&ret->str, &szval);
1721 switch (val[cursor]) {
1722 case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break;
1723 case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break;
1724 case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break;
1725 case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break;
1726 case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break;
1727 case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break;
1729 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "<unknown> (%#" PRIx64 ")", val[cursor]);
1734 const char *str = ptstr[str_idx];
1736 strncpy(ret->str.buf + szval, str, slen);
1739 if (szval >= ret->str.sz) {
1740 trunc_strbuf(&ret->str);
1744 ret->str.buf[szval] = '\0';
1745 // TODO: ret->value.??? = val
1750 /* Preferred / native vector widths */
1752 device_info_vecwidth(struct device_info_ret *ret,
1753 const struct info_loc *loc, const struct device_info_checks *chk,
1754 const struct opt_out *output)
1756 struct info_loc loc2 = *loc;
1757 cl_uint preferred = 0, native = 0;
1758 _GET_VAL(ret, loc, preferred);
1760 /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF,
1761 * which is at +0x08 */
1763 (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30);
1764 /* TODO update loc2.sname */
1765 _GET_VAL(ret, &loc2, native);
1769 const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ?
1770 chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ?
1771 chk->has_double : NULL));
1772 szval = strbuf_printf(&ret->str, "%8u / %-8u", preferred, native);
1774 sprintf(ret->str.buf + szval, " (%s)", *ext ? ext : na);
1777 ret->value.u32v.s[0] = preferred;
1778 ret->value.u32v.s[1] = native;
1781 /* Floating-point configurations */
1783 device_info_fpconf(struct device_info_ret *ret,
1784 const struct info_loc *loc, const struct device_info_checks *chk,
1785 const struct opt_out *output)
1787 /* When in HUMAN output, we are called unconditionally,
1788 * so we have to do some manual checks ourselves */
1789 const cl_bool get_it = (output->mode != CLINFO_HUMAN) ||
1790 (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) ||
1791 (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) ||
1792 (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk));
1794 DEV_FETCH(cl_device_fp_config, val);
1796 if (ret->err && !get_it) {
1797 ret->err = CL_SUCCESS;
1805 const char * const *fpstr = (output->mode == CLINFO_HUMAN ?
1806 fp_conf_str : fp_conf_raw_str);
1807 set_separator(vbar_str);
1808 if (output->mode == CLINFO_HUMAN) {
1809 const char *why = na;
1810 switch (loc->param.dev) {
1811 case CL_DEVICE_HALF_FP_CONFIG:
1813 why = chk->has_half;
1815 case CL_DEVICE_SINGLE_FP_CONFIG:
1818 case CL_DEVICE_DOUBLE_FP_CONFIG:
1820 why = chk->has_double;
1823 /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */
1824 fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname);
1826 /* show 'why' it's being shown */
1827 szval += strbuf_printf(&ret->str, "(%s)", why);
1830 size_t num_flags = fp_conf_count;
1831 /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered
1832 * in the single-precision case. half and double don't consider it,
1833 * so we skip it altogether */
1834 if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG)
1837 for (i = 0; i < num_flags; ++i) {
1838 cl_device_fp_config cur = (cl_device_fp_config)(1) << i;
1839 if (output->mode == CLINFO_HUMAN) {
1840 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1841 line_pfx, fpstr[i], bool_str[!!(val & cur)]);
1842 } else if (val & cur) {
1843 add_separator(&ret->str, &szval);
1844 szval += bufcpy(&ret->str, szval, fpstr[i]);
1851 /* Queue properties */
1853 device_info_qprop(struct device_info_ret *ret,
1854 const struct info_loc *loc, const struct device_info_checks *chk,
1855 const struct opt_out *output)
1857 DEV_FETCH(cl_command_queue_properties, val);
1861 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
1862 queue_prop_str : queue_prop_raw_str);
1863 set_separator(vbar_str);
1864 for (i = 0; i < queue_prop_count; ++i) {
1865 cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i;
1866 if (output->mode == CLINFO_HUMAN) {
1867 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1868 line_pfx, qpstr[i], bool_str[!!(val & cur)]);
1869 } else if (val & cur) {
1870 add_separator(&ret->str, &szval);
1871 szval += bufcpy(&ret->str, szval, qpstr[i]);
1874 if (output->mode == CLINFO_HUMAN && loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES &&
1875 dev_has_intel_local_thread(chk))
1876 sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1877 line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]);
1881 /* Execution capbilities */
1883 device_info_execap(struct device_info_ret *ret,
1884 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1885 const struct opt_out *output)
1887 DEV_FETCH(cl_device_exec_capabilities, val);
1891 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
1892 execap_str : execap_raw_str);
1893 set_separator(vbar_str);
1894 for (i = 0; i < execap_count; ++i) {
1895 cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i;
1896 if (output->mode == CLINFO_HUMAN) {
1897 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1898 line_pfx, qpstr[i], bool_str[!!(val & cur)]);
1899 } else if (val & cur) {
1900 add_separator(&ret->str, &szval);
1901 szval += bufcpy(&ret->str, szval, qpstr[i]);
1907 /* Arch bits and endianness (HUMAN) */
1909 device_info_arch(struct device_info_ret *ret,
1910 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1911 const struct opt_out *output)
1913 DEV_FETCH(cl_uint, bits);
1914 struct info_loc loc2 = *loc;
1915 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE);
1918 DEV_FETCH_LOC(cl_bool, val, &loc2);
1920 strbuf_printf(&ret->str, "%" PRIu32 ", %s", bits, endian_str[val]);
1925 /* SVM capabilities */
1927 device_info_svm_cap(struct device_info_ret *ret,
1928 const struct info_loc *loc, const struct device_info_checks *chk,
1929 const struct opt_out *output)
1931 const cl_bool is_20 = dev_is_20(chk);
1932 const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES);
1933 const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk));
1934 DEV_FETCH(cl_device_svm_capabilities, val);
1939 const char * const *scstr = (output->mode == CLINFO_HUMAN ?
1940 svm_cap_str : svm_cap_raw_str);
1941 set_separator(vbar_str);
1942 if (output->mode == CLINFO_HUMAN && checking_core) {
1943 /* show 'why' it's being shown */
1944 szval += strbuf_printf(&ret->str, "(%s%s%s)",
1945 (is_20 ? core : empty_str),
1946 (is_20 && has_amd_svm ? comma_str : empty_str),
1949 for (i = 0; i < svm_cap_count; ++i) {
1950 cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i;
1951 if (output->mode == CLINFO_HUMAN) {
1952 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1953 line_pfx, scstr[i], bool_str[!!(val & cur)]);
1954 } else if (val & cur) {
1955 add_separator(&ret->str, &szval);
1956 szval += bufcpy(&ret->str, szval, scstr[i]);
1962 /* Device terminate capability */
1964 device_info_terminate_capability(struct device_info_ret *ret,
1965 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1966 const struct opt_out *output)
1968 DEV_FETCH(cl_device_terminate_capability_khr, val);
1969 if (!ret->err && val) {
1970 /* iterate over terminate capability strings appending their textual form
1974 const char * const *capstr = (output->mode == CLINFO_HUMAN ?
1975 terminate_capability_str : terminate_capability_raw_str);
1976 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1977 for (i = 0; i < terminate_capability_count; ++i) {
1978 cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i;
1980 /* match: add separator if not first match */
1981 add_separator(&ret->str, &szval);
1982 szval += bufcpy(&ret->str, szval, capstr[i]);
1984 if (szval >= ret->str.sz)
1987 /* check for extra bits */
1988 if (szval < ret->str.sz) {
1989 cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1;
1990 cl_device_terminate_capability_khr extra = val & ~known_mask;
1992 add_separator(&ret->str, &szval);
1993 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
2000 device_info_p2p_dev_list(struct device_info_ret *ret,
2001 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2002 const struct opt_out* UNUSED(output))
2004 cl_device_id *val = NULL;
2005 size_t szval = 0, numval = 0;
2006 GET_VAL_ARRAY(ret, loc);
2010 for (cursor= 0; cursor < numval; ++cursor) {
2012 ret->str.buf[szval] = ' ';
2015 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%p", (void*)val[cursor]);
2017 // TODO: ret->value.??? = val;
2023 device_info_interop_list(struct device_info_ret *ret,
2024 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2025 const struct opt_out *output)
2027 cl_uint *val = NULL;
2028 size_t szval = 0, numval = 0;
2029 GET_VAL_ARRAY(ret, loc);
2032 const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops;
2033 cl_uint human_raw = output->mode - CLINFO_HUMAN;
2034 const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2035 cl_bool first = CL_TRUE;
2037 for (cursor = 0; cursor < numval; ++cursor) {
2038 cl_uint current = val[cursor];
2039 if (!current && cursor < numval - 1) {
2040 /* A null value is used as group terminator, but we only print it
2041 * if it's not the final one
2043 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", groupsep);
2047 cl_bool found = CL_FALSE;
2048 const cl_interop_name *n = cl_interop_names;
2051 ret->str.buf[szval] = ' ';
2055 while (n < interop_name_end) {
2056 if (current >= n->from && current <= n->to) {
2063 cl_uint i = current - n->from;
2064 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", n->value[i][human_raw]);
2066 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx32, val[cursor]);
2070 if (szval >= ret->str.sz) {
2071 trunc_strbuf(&ret->str);
2075 // TODO: ret->value.??? = val;
2082 * Device info traits
2085 /* A CL_FALSE param means "just print pname" */
2087 struct device_info_traits {
2088 enum output_modes output_mode;
2089 cl_device_info param; // CL_DEVICE_*
2090 const char *sname; // "CL_DEVICE_*"
2091 const char *pname; // "Device *"
2092 const char *sfx; // suffix for the output in non-raw mode
2093 /* pointer to function that retrieves the parameter */
2094 void (*show_func)(struct device_info_ret *,
2095 const struct info_loc *, const struct device_info_checks *,
2096 const struct opt_out *);
2097 /* pointer to function that checks if the parameter should be retrieved */
2098 cl_bool (*check_func)(const struct device_info_checks *);
2101 #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ
2102 #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ
2104 struct device_info_traits dinfo_traits[] = {
2105 { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL },
2106 { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL },
2107 { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL },
2108 { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL },
2109 { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL },
2110 { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), dev_is_11 },
2111 { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL },
2112 { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL },
2114 { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd },
2115 { CLINFO_BOTH, DINFO(CL_DEVICE_PCIE_ID_AMD, "Device PCI-e ID (AMD)", hex), dev_has_amd },
2116 { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd },
2118 /* Device Topology (NV) is multipart, so different for HUMAN and RAW */
2119 { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv },
2120 { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv },
2121 { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv },
2123 { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL },
2124 { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL },
2125 { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL },
2126 { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 },
2128 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL },
2129 { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD, "Available core IDs", core_ids), dev_has_arm_core_id_v2 },
2130 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD, "Available core IDs", long), dev_has_arm_core_id_v2 },
2131 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd },
2132 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd },
2133 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd },
2134 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL },
2136 /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */
2137 { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv },
2138 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv },
2139 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv },
2141 /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */
2142 /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query
2143 * extension specification calls it “core engine GFXIP”, which honestly is not better than
2144 * our name choice. */
2145 { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd },
2146 { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd },
2147 { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd },
2149 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp },
2151 /* Device partition support: summary is only presented in HUMAN case */
2152 { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition },
2153 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 },
2154 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 },
2155 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 },
2156 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission },
2157 { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission },
2159 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL },
2160 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL },
2161 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL },
2163 /* cl_amd_device_attribute_query v4 */
2164 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 },
2165 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 },
2167 { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple", wg), dev_has_compiler_11 },
2168 { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv },
2169 { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd },
2170 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 },
2171 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier },
2172 { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size },
2174 /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs
2175 * PREFERRED and NATIVE in a single line */
2176 #define DINFO_VECWIDTH(Type, type) \
2177 { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \
2178 { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \
2179 { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), dev_is_11 }
2181 { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL },
2182 DINFO_VECWIDTH(CHAR, char),
2183 DINFO_VECWIDTH(SHORT, short),
2184 DINFO_VECWIDTH(INT, int),
2185 DINFO_VECWIDTH(LONG, long),
2186 DINFO_VECWIDTH(HALF, half), /* this should be excluded for 1.0 */
2187 DINFO_VECWIDTH(FLOAT, float),
2188 DINFO_VECWIDTH(DOUBLE, double),
2190 /* Floating point configurations */
2191 #define DINFO_FPCONF(Type, type, cond) \
2192 { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \
2193 { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond }
2195 DINFO_FPCONF(HALF, Half, dev_has_half),
2196 DINFO_FPCONF(SINGLE, Single, NULL),
2197 DINFO_FPCONF(DOUBLE, Double, dev_has_double),
2199 /* Address bits and endianness are written together for HUMAN, separate for RAW */
2200 { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL },
2201 { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL },
2202 { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL },
2205 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL },
2206 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd },
2207 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd },
2208 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd },
2209 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd },
2210 { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL },
2211 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL },
2212 { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), dev_is_11 },
2213 { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv },
2215 { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm },
2216 { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm },
2219 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL },
2220 { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL },
2221 { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL },
2223 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2224 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2226 /* Atomics alignment, with HUMAN-only header */
2227 { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 },
2228 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 },
2229 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 },
2230 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 },
2232 /* Global variables. TODO some 1.2 devices respond to this too */
2233 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 },
2234 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 },
2236 /* Global memory cache */
2237 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL },
2238 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache },
2239 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache },
2242 { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL },
2243 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images },
2244 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 },
2245 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 },
2246 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer },
2247 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer },
2249 /* Image dimensions are split for RAW, combined for HUMAN */
2250 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size", pixels_str, img_sz_2d), dev_has_images },
2251 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height", sz), dev_has_images },
2252 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width", sz), dev_has_images },
2253 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size", pixels_str, img_sz_2d), dev_has_intel_planar_yuv },
2254 { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height", sz), dev_has_intel_planar_yuv },
2255 { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width", sz), dev_has_intel_planar_yuv },
2256 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size", pixels_str, img_sz_3d), dev_has_images },
2257 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height", sz), dev_has_images },
2258 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width", sz), dev_has_images },
2259 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth", sz), dev_has_images },
2261 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images },
2262 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images },
2263 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 },
2266 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 },
2267 { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 },
2268 { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 },
2271 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL },
2272 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem },
2273 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory syze per CU (AMD)", mem), dev_is_gpu_amd },
2274 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd },
2275 { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv },
2277 /* Constant memory */
2278 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL },
2279 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL },
2280 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 },
2282 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL },
2283 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters },
2285 /* Queue properties */
2286 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 },
2287 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 },
2288 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 },
2289 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 },
2290 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 },
2291 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 },
2292 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 },
2294 /* Terminate context */
2295 { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context },
2296 { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context },
2299 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 },
2300 { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing },
2301 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing },
2303 /* P2P buffer copy */
2304 { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p },
2305 { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p },
2307 /* Profiling resolution */
2308 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL },
2309 { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd },
2310 { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd },
2312 /* Kernel execution capabilities */
2313 { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL },
2314 { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 },
2315 { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd },
2316 { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv },
2317 { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv },
2318 { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv },
2319 { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 },
2320 /* TODO FIXME undocumented, experimental */
2321 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 },
2322 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 },
2324 /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */
2325 { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il },
2326 { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir },
2327 { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 },
2328 { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 },
2329 { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME },
2330 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME },
2331 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME },
2332 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME },
2335 /* Process all the device info in the traits, except if param_whitelist is not NULL,
2336 * in which case only those in the whitelist will be processed.
2337 * If present, the whitelist should be sorted in the order of appearance of the parameters
2338 * in the traits table, and terminated by the value CL_FALSE
2342 printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p,
2343 const cl_device_info *param_whitelist, /* list of device info to process, or NULL */
2344 const struct opt_out *output)
2346 char *extensions = NULL;
2348 /* pointer to the traits for CL_DEVICE_EXTENSIONS */
2349 const struct device_info_traits *extensions_traits = NULL;
2351 struct device_info_checks chk;
2352 struct device_info_ret ret;
2353 struct info_loc loc;
2355 memset(&chk, 0, sizeof(chk));
2356 chk.pinfo_checks = plist->platform_checks + p;
2357 chk.dev_version = 10;
2359 INIT_RET(ret, "device");
2361 reset_loc(&loc, __func__);
2362 loc.plat = plist->platform[p];
2365 for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) {
2367 const struct device_info_traits *traits = dinfo_traits + loc.line;
2369 /* checked is true if there was no condition to check for, or if the
2370 * condition was satisfied
2372 int checked = !(traits->check_func && !traits->check_func(&chk));
2374 loc.sname = traits->sname;
2375 loc.pname = (output->mode == CLINFO_HUMAN ?
2376 traits->pname : traits->sname);
2377 loc.param.dev = traits->param;
2379 /* Whitelist check: finish if done traversing the list,
2380 * skip current param if it's not the right one
2382 if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) {
2383 if (*param_whitelist == CL_FALSE)
2385 if (traits->param != *param_whitelist)
2390 /* skip if it's not for this output mode */
2391 if (!(output->mode & traits->output_mode))
2394 if (output->cond == COND_PROP_CHECK && !checked)
2397 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
2399 ret.str.buf[0] = '\0';
2400 ret.err_str.buf[0] = '\0';
2402 /* Handle headers */
2403 if (traits->param == CL_FALSE) {
2404 ret.err = CL_SUCCESS;
2405 show_strbuf(&ret.str, loc.pname, 0, ret.err);
2409 traits->show_func(&ret, &loc, &chk, output);
2411 if (traits->param == CL_DEVICE_EXTENSIONS) {
2412 /* make a backup of the extensions string, regardless of
2414 const char *msg = RET_BUF(ret)->buf;
2415 size_t len = strlen(msg);
2416 extensions_traits = traits;
2417 ALLOC(extensions, len+1, "extensions");
2418 memcpy(extensions, msg, len);
2419 extensions[len] = '\0';
2422 /* if there was an error retrieving the property,
2423 * skip if it wasn't expected to work and we
2424 * weren't asked to show everything regardless of
2426 if (!checked && output->cond != COND_PROP_SHOW)
2430 /* on success, but empty result, show (n/a) */
2431 if (ret.str.buf[0] == '\0')
2432 bufcpy(&ret.str, 0, not_specified(output));
2435 printf("%s%s\n", line_pfx, RET_BUF(ret)->buf);
2437 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
2443 switch (traits->param) {
2444 case CL_DEVICE_VERSION:
2445 /* compute numeric value for OpenCL version */
2446 chk.dev_version = getOpenCLVersion(ret.str.buf + 7);
2448 case CL_DEVICE_EXTENSIONS:
2449 identify_device_extensions(extensions, &chk);
2451 case CL_DEVICE_TYPE:
2452 chk.devtype = ret.value.devtype;
2454 case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
2455 chk.cachetype = ret.value.cachetype;
2457 case CL_DEVICE_LOCAL_MEM_TYPE:
2458 chk.lmemtype = ret.value.lmemtype;
2460 case CL_DEVICE_IMAGE_SUPPORT:
2461 chk.image_support = ret.value.b;
2463 case CL_DEVICE_COMPILER_AVAILABLE:
2464 chk.compiler_available = ret.value.b;
2472 // and finally the extensions, if we retrieved them
2474 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2475 extensions_traits->pname :
2476 extensions_traits->sname), extensions);
2482 /* list of allowed properties for AMD offline devices */
2483 /* everything else seems to be set to 0, and all the other string properties
2484 * actually segfault the driver */
2486 static const cl_device_info amd_offline_info_whitelist[] = {
2488 /* These are present, but all the same, so just skip them:
2490 CL_DEVICE_VENDOR_ID,
2493 CL_DEVICE_OPENCL_C_VERSION,
2495 CL_DEVICE_EXTENSIONS,
2497 CL_DEVICE_GFXIP_MAJOR_AMD,
2498 CL_DEVICE_GFXIP_MINOR_AMD,
2499 CL_DEVICE_MAX_WORK_GROUP_SIZE,
2503 static const cl_device_info list_info_whitelist[] = {
2508 /* return a list of offline devices from the AMD extension */
2510 fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p,
2511 /* the number of devices will be returned in ret->value.u32,
2512 * the associated context in ret->base.ctx;
2514 struct device_info_ret *ret)
2516 cl_platform_id pid = plist->platform[p];
2517 cl_device_id *device = NULL;
2518 cl_uint num_devs = 0;
2519 cl_context ctx = NULL;
2521 cl_context_properties ctxpft[] = {
2522 CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
2523 CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE,
2527 ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL,
2528 NULL, NULL, &ret->err);
2529 REPORT_ERROR(&ret->err_str, ret->err, "create context");
2532 ret->err = REPORT_ERROR(&ret->err_str,
2533 clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES,
2534 sizeof(num_devs), &num_devs, NULL),
2539 ALLOC(device, num_devs, "offline devices");
2541 ret->err = REPORT_ERROR(&ret->err_str,
2542 clGetContextInfo(ctx, CL_CONTEXT_DEVICES,
2543 num_devs*sizeof(*device), device, NULL),
2548 if (ctx) clReleaseContext(ctx);
2552 ret->value.u32 = num_devs;
2553 ret->base.ctx = ctx;
2558 void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str,
2559 const struct opt_out *output)
2561 const struct platform_data *pdata = plist->pdata + p;
2562 const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : "");
2563 const char *title = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname :
2564 pinfo_traits[0].sname);
2565 const int prefix_width = -line_pfx_len*(!output->brief);
2566 if (output->brief) {
2567 strbuf_printf(str, "%s%" PRIu32 ": ", brief_prefix, p);
2568 } else if (output->mode == CLINFO_RAW) {
2569 strbuf_printf(str, "[%s/*]", pdata->sname);
2571 sprintf(line_pfx, "%*s", prefix_width, str->buf);
2574 printf("%s%s\n", line_pfx, pdata->pname);
2576 printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname);
2579 void printPlatformDevices(const struct platform_list *plist, cl_uint p,
2580 const cl_device_id *device, cl_uint ndevs,
2581 struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline)
2583 const struct platform_data *pdata = plist->pdata + p;
2584 const cl_device_info *param_whitelist = output->brief ? list_info_whitelist :
2585 these_are_offline ? amd_offline_info_whitelist : NULL;
2588 if (output->detailed)
2589 printf("%s" I0_STR "%" PRIu32 "\n",
2591 num_devs_header(output, these_are_offline),
2594 for (d = 0; d < ndevs; ++d) {
2595 const cl_device_id dev = device[d];
2596 if (output->brief) {
2597 const cl_bool last_device = (d == ndevs - 1 &&
2598 output->mode != CLINFO_RAW &&
2599 (!output->offline ||
2600 !pdata->has_amd_offline ||
2601 these_are_offline));
2602 if (output->mode == CLINFO_RAW)
2603 sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ",
2605 these_are_offline ? '*' : '.',
2608 sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ",
2609 these_are_offline ? "Offline " : "",
2613 } else if (line_pfx_len > 0) {
2614 cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d;
2615 strbuf_printf(str, "[%s/%" PRId32 "]", pdata->sname, sd);
2616 sprintf(line_pfx, "%*s", -line_pfx_len, str->buf);
2618 printDeviceInfo(dev, plist, p, param_whitelist, output);
2619 if (output->detailed && d < pdata[p].ndevs - 1)
2627 void showDevices(const struct platform_list *plist, const struct opt_out *output)
2629 const cl_uint num_platforms = plist->num_platforms;
2630 const cl_uint maxdevs = plist->max_devs;
2631 const struct platform_data *pdata = plist->pdata;
2636 realloc_strbuf(&str, 1024, "show devices");
2638 if (output->mode == CLINFO_RAW) {
2640 strbuf_printf(&str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs);
2642 strbuf_printf(&str, "[%*s/%" PRIu32 "] ",
2643 plist->max_sname_len, "", maxdevs);
2646 strbuf_printf(&str, " +-- %sDevice #%" PRIu32 ": ",
2647 (output->offline ? "Offline " : ""), maxdevs);
2649 str.buf[0] = '\0'; /* reset */
2650 /* TODO we have no prefix in HUMAN detailed output mode,
2651 * consider adding one
2656 line_pfx_len = (int)(strlen(str.buf) + 1);
2657 REALLOC(line_pfx, line_pfx_len, "line prefix");
2658 str.buf[0] = '\0'; /* reset */
2661 for (p = 0; p < num_platforms; ++p) {
2662 printPlatformName(plist, p, &str, output);
2664 printPlatformDevices(plist, p,
2665 get_platform_devs(plist, p), pdata[p].ndevs,
2666 &str, output, CL_FALSE);
2668 if (output->offline && pdata[p].has_amd_offline) {
2669 struct device_info_ret ret;
2670 cl_device_id *devs = NULL;
2672 INIT_RET(ret, "offline device");
2673 if (output->detailed)
2676 devs = fetchOfflineDevicesAMD(plist, p, &ret);
2678 puts(ret.err_str.buf);
2680 printPlatformDevices(plist, p, devs, ret.value.u32,
2681 &str, output, CL_TRUE);
2682 clReleaseContext(ret.base.ctx);
2687 if (output->detailed)
2693 /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */
2694 void checkNullGetPlatformName(const struct opt_out *output)
2696 struct device_info_ret ret;
2697 struct info_loc loc;
2699 INIT_RET(ret, "null ctx");
2700 reset_loc(&loc, __func__);
2701 RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME);
2703 ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL);
2704 if (ret.err == CL_INVALID_PLATFORM) {
2705 bufcpy(&ret.err_str, 0, no_plat(output));
2707 loc.line = __LINE__ + 1;
2708 REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s");
2710 printf(I1_STR "%s\n",
2711 "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf);
2715 /* check the behavior of clGetDeviceIDs() when given a NULL platform ID;
2716 * return the index of the default platform in our array of platform IDs,
2717 * or num_platforms (which is an invalid platform index) in case of errors
2718 * or no platform or device found.
2720 cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output)
2722 const cl_uint num_platforms = plist->num_platforms;
2723 const struct platform_data *pdata = plist->pdata;
2724 const cl_platform_id *platform = plist->platform;
2726 struct device_info_ret ret;
2727 struct info_loc loc;
2729 cl_uint i = 0; /* generic iterator */
2730 cl_device_id dev = NULL; /* sample device */
2731 cl_platform_id plat = NULL; /* detected platform */
2733 cl_uint found = 0; /* number of platforms found */
2734 cl_uint pidx = num_platforms; /* index of the platform found */
2735 cl_uint numdevs = 0;
2737 INIT_RET(ret, "null get devices");
2739 reset_loc(&loc, __func__);
2740 loc.sname = "device IDs";
2742 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs);
2743 /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart
2744 * implementation might give you a different default platform for GPUs
2746 * Of course the “no devices” case would then need to be handled differently.
2747 * The logic might be maintained similarly, provided we also gather
2748 * the number of devices of each type for each platform, although it's
2749 * obviously more likely to have multiple platforms with no devices
2754 case CL_INVALID_PLATFORM:
2755 bufcpy(&ret.err_str, 0, no_plat(output));
2757 case CL_DEVICE_NOT_FOUND:
2758 /* No devices were found, see if there are platforms with
2759 * no devices, and if there's only one, assume this is the
2760 * one being used as default by the ICD loader */
2761 for (i = 0; i < num_platforms; ++i) {
2762 if (pdata[i].ndevs == 0) {
2775 bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
2776 "<error: 0 devices, no matching platform!>" :
2777 "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM"));
2780 bufcpy(&ret.str, 0, (output->mode == CLINFO_HUMAN ?
2782 pdata[pidx].sname));
2784 default: /* found > 1 */
2785 bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
2786 "<error: 0 devices, multiple matching platforms!>" :
2787 "CL_DEVICE_NOT_FOUND | ????"));
2792 loc.line = __LINE__+1;
2793 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break;
2795 /* Determine platform by looking at the CL_DEVICE_PLATFORM of
2796 * one of the devices */
2797 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL);
2798 loc.line = __LINE__+1;
2799 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2801 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
2802 ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM,
2803 sizeof(plat), &plat, NULL);
2804 loc.line = __LINE__+1;
2805 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2807 for (i = 0; i < num_platforms; ++i) {
2808 if (platform[i] == plat) {
2810 strbuf_printf(&ret.str, "%s [%s]",
2811 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2816 if (i == num_platforms) {
2817 ret.err = CL_INVALID_PLATFORM;
2818 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
2821 printf(I1_STR "%s\n",
2822 "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf);
2828 void checkNullCtx(struct device_info_ret *ret,
2829 const struct platform_list *plist, cl_uint pidx, const char *which,
2830 const struct opt_out *output)
2832 const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx];
2833 struct info_loc loc;
2834 cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err);
2836 reset_loc(&loc, __func__);
2838 loc.line = __LINE__+2;
2840 if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform"))
2841 strbuf_printf(&ret->str, "%s [%s]",
2842 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2843 plist->pdata[pidx].sname);
2845 clReleaseContext(ctx);
2850 /* check behavior of clCreateContextFromType() with NULL cl_context_properties */
2851 void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output)
2853 const cl_uint num_platforms = plist->num_platforms;
2854 const struct platform_data *pdata = plist->pdata;
2855 const cl_platform_id *platform = plist->platform;
2857 size_t t; /* type iterator */
2858 size_t i; /* generic iterator */
2860 cl_context ctx = NULL;
2864 size_t cursz = ndevs*sizeof(cl_device_id);
2865 cl_platform_id plat = NULL;
2866 cl_device_id *devs = NULL;
2868 struct device_info_ret ret;
2869 struct info_loc loc;
2871 const char *platname_prop = (output->mode == CLINFO_HUMAN ?
2872 pinfo_traits[0].pname :
2873 pinfo_traits[0].sname);
2875 const char *devname_prop = (output->mode == CLINFO_HUMAN ?
2876 dinfo_traits[0].pname :
2877 dinfo_traits[0].sname);
2879 reset_loc(&loc, __func__);
2880 INIT_RET(ret, "null ctx from type");
2882 ALLOC(devs, ndevs, "context devices");
2884 for (t = 1; t < devtype_count; ++t) { /* we skip 0 */
2885 loc.sname = device_type_raw_str[t];
2887 strbuf_printf(&ret.str, "clCreateContextFromType(NULL, %s)", loc.sname);
2888 sprintf(def, I1_STR, ret.str.buf);
2890 loc.line = __LINE__+1;
2891 ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err);
2894 case CL_INVALID_PLATFORM:
2895 bufcpy(&ret.err_str, 0, no_plat(output)); break;
2896 case CL_DEVICE_NOT_FOUND:
2897 bufcpy(&ret.err_str, 0, no_dev_found(output)); break;
2898 case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */
2899 bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
2900 case CL_INVALID_VALUE: /* This is what apple returns for the case above */
2901 bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
2902 case CL_DEVICE_NOT_AVAILABLE:
2903 bufcpy(&ret.err_str, 0, no_dev_avail(output)); break;
2905 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break;
2907 /* get the devices */
2908 loc.sname = "CL_CONTEXT_DEVICES";
2909 loc.line = __LINE__+2;
2911 ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval);
2912 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break;
2913 if (szval > cursz) {
2914 REALLOC(devs, szval, "context devices");
2918 loc.line = __LINE__+1;
2919 ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL);
2920 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2921 ndevs = szval/sizeof(cl_device_id);
2923 ret.err = CL_DEVICE_NOT_FOUND;
2924 bufcpy(&ret.err_str, 0, "<error: context created with no devices>");
2927 /* get the platform from the first device */
2928 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
2929 loc.line = __LINE__+1;
2930 ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
2931 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2935 for (i = 0; i < num_platforms; ++i) {
2936 if (platform[i] == plat)
2939 if (i == num_platforms) {
2940 ret.err = CL_INVALID_PLATFORM;
2941 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
2944 szval += strbuf_printf(&ret.str, "%s (%" PRIuS ")",
2945 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2947 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR "%s",
2948 platname_prop, pdata[i].pname);
2950 for (i = 0; i < ndevs; ++i) {
2952 /* for each device, show the device name */
2953 /* TODO some other unique ID too, e.g. PCI address, if available? */
2955 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR, devname_prop);
2956 if (szval >= ret.str.sz) {
2957 trunc_strbuf(&ret.str);
2961 RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME);
2963 loc.line = __LINE__+1;
2964 ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - szval, ret.str.buf + szval, &szname);
2965 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2966 szval += szname - 1;
2969 break; /* had an error earlier, bail */
2974 clReleaseContext(ctx);
2977 printf("%s%s\n", def, RET_BUF(ret)->buf);
2983 /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices)
2984 * and in clCreateContext() */
2985 void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output)
2987 const cl_uint num_platforms = plist->num_platforms;
2988 const struct platform_data *pdata = plist->pdata;
2991 struct device_info_ret ret;
2993 INIT_RET(ret, "null behavior");
2995 printf("NULL platform behavior\n");
2997 checkNullGetPlatformName(output);
2999 p = checkNullGetDevices(plist, output);
3001 /* If there's a default platform, and it has devices, try
3002 * creating a context with its first device and see if it works */
3004 if (p == num_platforms) {
3005 ret.err = CL_INVALID_PLATFORM;
3006 bufcpy(&ret.err_str, 0, no_plat(output));
3007 } else if (pdata[p].ndevs == 0) {
3008 ret.err = CL_DEVICE_NOT_FOUND;
3009 bufcpy(&ret.err_str, 0, no_dev_found(output));
3011 if (p < num_platforms) {
3012 checkNullCtx(&ret, plist, p, "default", output);
3014 /* this shouldn't happen, but still ... */
3015 ret.err = CL_OUT_OF_HOST_MEMORY;
3016 bufcpy(&ret.err_str, 0, "<error: overflow in default platform scan>");
3019 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf);
3021 /* Look for a device from a non-default platform, if there are any */
3022 if (p == num_platforms || num_platforms > 1) {
3024 while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) {
3027 if (p2 < num_platforms) {
3028 checkNullCtx(&ret, plist, p2, "non-default", output);
3030 ret.err = CL_DEVICE_NOT_FOUND;
3031 bufcpy(&ret.str, 0, "<error: no devices in non-default plaforms>");
3033 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf);
3036 checkNullCtxFromType(plist, output);
3042 /* Get properties of the ocl-icd loader, if available */
3043 /* All properties are currently char[] */
3045 /* Function pointer to the ICD loader info function */
3047 typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*);
3048 icdl_info_fn_ptr clGetICDLoaderInfoOCLICD;
3050 /* We want to auto-detect the OpenCL version supported by the ICD loader.
3051 * To do this, we will progressively find symbols introduced in new APIs,
3052 * until a NULL symbol is found.
3055 struct icd_loader_test {
3058 } icd_loader_tests[] = {
3059 { 11, "clCreateSubBuffer" },
3060 { 12, "clCreateImage" },
3061 { 20, "clSVMAlloc" },
3062 { 21, "clGetHostTimer" },
3063 { 22, "clSetProgramSpecializationConstant" },
3068 icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc)
3070 GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl);
3074 struct icdl_info_traits {
3075 cl_icdl_info param; // CL_ICDL_*
3076 const char *sname; // "CL_ICDL_*"
3077 const char *pname; // "ICD loader *"
3080 static const char * const oclicdl_pfx = "OCLICD";
3082 #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name }
3083 struct icdl_info_traits linfo_traits[] = {
3084 LINFO(CL_ICDL_NAME, "Name"),
3085 LINFO(CL_ICDL_VENDOR, "Vendor"),
3086 LINFO(CL_ICDL_VERSION, "Version"),
3087 LINFO(CL_ICDL_OCL_VERSION, "Profile")
3090 /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress,
3091 * which returns a void pointer.
3092 * ISO C forbids assignments between function pointers and void pointers,
3093 * but POSIX allows it. To compile without warnings even in -pedantic mode,
3094 * we take advantage of the fact that we _can_ do the conversion via
3095 * pointers-to-pointers. This is supported on most compilers, except
3096 * for some rather old GCC versions whose strict aliasing rules are
3097 * too strict. Disable strict aliasing warnings for these compilers.
3099 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3100 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
3103 struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output)
3105 const cl_uint max_plat_version = plist->max_plat_version;
3107 struct icdl_data icdl;
3109 /* Counter that'll be used to walk the icd_loader_tests */
3112 /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used
3113 * to query the ICD loader properties.
3114 * It should be noted that in this specific case we cannot replace the
3115 * call to clGetExtensionFunctionAddress with a call to the superseding function
3116 * clGetExtensionFunctionAddressForPlatform because the extension is in the
3117 * loader itself, not in a specific platform.
3119 void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD");
3120 clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack);
3122 /* Initialize icdl_data ret versions */
3123 icdl.detected_version = 10;
3124 icdl.reported_version = 0;
3126 /* Step #1: try to auto-detect the supported ICD loader version */
3128 struct icd_loader_test check = icd_loader_tests[i];
3129 if (check.symbol == NULL)
3131 if (dlsym(DL_MODULE, check.symbol) == NULL)
3133 icdl.detected_version = check.version;
3137 /* Step #2: query properties from extension, if available */
3138 if (clGetICDLoaderInfoOCLICD != NULL) {
3139 struct info_loc loc;
3140 struct icdl_info_ret ret;
3141 reset_loc(&loc, __func__);
3142 INIT_RET(ret, "ICD loader");
3144 /* TODO think of a sensible header in CLINFO_RAW */
3145 if (output->mode != CLINFO_RAW)
3146 puts("\nICD loader properties");
3148 if (output->mode == CLINFO_RAW) {
3149 line_pfx_len = (int)(strlen(oclicdl_pfx) + 5);
3150 REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD");
3151 strbuf_printf(&ret.str, "[%s/*]", oclicdl_pfx);
3152 sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf);
3155 for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) {
3156 const struct icdl_info_traits *traits = linfo_traits + loc.line;
3157 loc.sname = traits->sname;
3158 loc.pname = (output->mode == CLINFO_HUMAN ?
3159 traits->pname : traits->sname);
3160 loc.param.icdl = traits->param;
3162 ret.str.buf[0] = '\0';
3163 ret.err_str.buf[0] = '\0';
3164 icdl_info_str(&ret, &loc);
3165 show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err);
3167 if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) {
3168 icdl.reported_version = getOpenCLVersion(ret.str.buf + 7);
3174 /* Step #3: show it */
3175 if (output->mode == CLINFO_HUMAN) {
3176 if (icdl.reported_version &&
3177 icdl.reported_version != icdl.detected_version) {
3178 printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3179 "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n",
3180 SPLIT_CL_VERSION(icdl.reported_version),
3181 SPLIT_CL_VERSION(icdl.detected_version),
3182 icdl.detected_version < icdl.reported_version ?
3185 if (icdl.detected_version < max_plat_version) {
3186 printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3187 "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n"
3188 "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n"
3189 "\t\tor behave unexpectedly\n",
3190 SPLIT_CL_VERSION(icdl.detected_version),
3191 SPLIT_CL_VERSION(max_plat_version),
3192 SPLIT_CL_VERSION(max_plat_version));
3198 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3199 #pragma GCC diagnostic warning "-Wstrict-aliasing"
3204 puts("clinfo version 2.2.18.04.06");
3210 puts("Display properties of all available OpenCL platforms and devices");
3211 puts("Usage: clinfo [options ...]\n");
3213 puts("\t--all-props, -a\t\ttry all properties, only show valid ones");
3214 puts("\t--always-all-props, -A\t\tshow all properties, even if invalid");
3215 puts("\t--human\t\thuman-friendly output (default)");
3216 puts("\t--raw\t\traw output");
3217 puts("\t--offline\talso show offline devices");
3218 puts("\t--list, -l\tonly list the platforms and devices by name");
3219 puts("\t-h, -?\t\tshow usage");
3220 puts("\t--version, -v\tshow version\n");
3221 puts("Defaults to raw mode if invoked with");
3222 puts("a name that contains the string \"raw\"");
3225 int main(int argc, char *argv[])
3231 struct opt_out output;
3233 struct platform_list plist;
3236 output.mode = CLINFO_HUMAN;
3237 output.cond = COND_PROP_CHECK;
3238 output.brief = CL_FALSE;
3239 output.offline = CL_FALSE;
3240 output.check_size = CL_FALSE;
3242 /* if there's a 'raw' in the program name, switch to raw output mode */
3243 if (strstr(argv[0], "raw"))
3244 output.mode = CLINFO_RAW;
3246 /* process command-line arguments */
3247 while (++a < argc) {
3248 if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props"))
3249 output.cond = COND_PROP_TRY;
3250 else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props"))
3251 output.cond = COND_PROP_SHOW;
3252 else if (!strcmp(argv[a], "--raw"))
3253 output.mode = CLINFO_RAW;
3254 else if (!strcmp(argv[a], "--human"))
3255 output.mode = CLINFO_HUMAN;
3256 else if (!strcmp(argv[a], "--offline"))
3257 output.offline = CL_TRUE;
3258 else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list"))
3259 output.brief = CL_TRUE;
3260 else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) {
3263 } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) {
3267 fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]);
3270 output.detailed = !output.brief;
3272 err = clGetPlatformIDs(0, NULL, &plist.num_platforms);
3273 if (err != CL_PLATFORM_NOT_FOUND_KHR)
3274 CHECK_ERROR(err, "number of platforms");
3277 printf(I0_STR "%" PRIu32 "\n",
3278 (output.mode == CLINFO_HUMAN ?
3279 "Number of platforms" : "#PLATFORMS"),
3280 plist.num_platforms);
3281 if (!plist.num_platforms)
3284 alloc_plist(&plist);
3285 err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL);
3286 CHECK_ERROR(err, "platform IDs");
3288 ALLOC(line_pfx, 1, "line prefix");
3290 for (p = 0; p < plist.num_platforms; ++p) {
3291 gatherPlatformInfo(&plist, p, &output);
3292 if (output.detailed)
3295 showDevices(&plist, &output);
3296 if (output.detailed) {
3297 if (output.mode != CLINFO_RAW)
3298 checkNullBehavior(&plist, &output);
3299 oclIcdProps(&plist, &output);