1 /* Collect all available information on all available devices
2 * on all available OpenCL platforms present in the system
8 /* We will want to check for symbols in the OpenCL library.
9 * On Windows, we must get the module handle for it, on Unix-like
10 * systems we can just use RTLD_DEFAULT
14 # define dlsym GetProcAddress
15 # define DL_MODULE GetModuleHandle("OpenCL")
18 # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */
21 /* Load STDC format macros (PRI*), or define them
22 * for those crappy, non-standard compilers
24 #include "fmtmacros.h"
26 // More support for the horrible MS C compiler
28 #include "ms_support.h"
41 #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar))
44 #define UNUSED(x) x __attribute__((unused))
47 struct platform_data {
48 char *pname; /* CL_PLATFORM_NAME */
49 char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */
50 cl_uint ndevs; /* number of devices */
51 cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */
54 struct platform_info_checks {
57 cl_bool has_amd_object_metadata;
58 cl_bool has_extended_versioning;
61 struct platform_list {
62 /* Number of platforms in the system */
63 cl_uint num_platforms;
64 /* Total number of devices across all platforms */
66 /* Number of devices allocated in all_devs array */
68 /* Highest OpenCL version supported by any platform.
69 * If the OpenCL library / ICD loader only supports
70 * a lower version, problems may arise (such as
71 * API calls causing segfaults or any other unexpected
74 cl_uint max_plat_version;
75 /* Largest number of devices on any platform */
77 /* Length of the longest platform sname */
79 /* Array of platform IDs */
80 cl_platform_id *platform;
81 /* Array of device IDs (across all platforms) */
82 cl_device_id *all_devs;
83 /* Array of offsets in all_devs where the devices
84 * of each platform begin */
86 /* Array of clinfo-specific platform data */
87 struct platform_data *pdata;
88 /* Array of clinfo-specific platform checks */
89 struct platform_info_checks *platform_checks;
93 init_plist(struct platform_list *plist)
95 plist->num_platforms = 0;
96 plist->ndevs_total = 0;
97 plist->alloc_devs = 0;
98 plist->max_plat_version = 0;
100 plist->max_sname_len = 0;
101 plist->platform = NULL;
102 plist->all_devs = NULL;
103 plist->dev_offset = NULL;
105 plist->platform_checks = NULL;
108 void plist_devs_reserve(struct platform_list *plist, cl_uint amount)
110 if (amount > plist->alloc_devs) {
111 REALLOC(plist->all_devs, amount, "all devices");
112 plist->alloc_devs = amount;
118 alloc_plist(struct platform_list *plist)
120 ALLOC(plist->platform, plist->num_platforms, "platform IDs");
121 ALLOC(plist->dev_offset, plist->num_platforms, "platform device list offset");
122 /* The actual sizing for this will change as we gather platform info,
123 * but assume at least one device per platform
125 plist_devs_reserve(plist, plist->num_platforms);
126 ALLOC(plist->pdata, plist->num_platforms, "platform data");
127 ALLOC(plist->platform_checks, plist->num_platforms, "platform checks data");
130 free_plist(struct platform_list *plist)
132 free(plist->platform);
133 free(plist->all_devs);
134 free(plist->dev_offset);
135 for (cl_uint p = 0 ; p < plist->num_platforms; ++p) {
136 free(plist->pdata[p].sname);
137 free(plist->pdata[p].pname);
140 free(plist->platform_checks);
145 get_platform_devs(const struct platform_list *plist, cl_uint p)
147 return plist->all_devs + plist->dev_offset[p];
151 get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d)
153 return get_platform_devs(plist, p)[d];
156 /* Data for the OpenCL library / ICD loader */
158 /* auto-detected OpenCL version support for the ICD loader */
159 cl_uint detected_version;
160 /* OpenCL version support declared by the ICD loader */
161 cl_uint reported_version;
164 /* line prefix, used to identify the platform/device for each
165 * device property in RAW output mode */
169 #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \
170 /* check if the issue is with param size */ \
171 if (output->check_size && ret->err == CL_INVALID_VALUE) { \
173 if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \
174 REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \
179 static const char unk[] = "Unknown";
180 static const char none[] = "None";
181 static const char none_raw[] = "CL_NONE";
182 static const char na[] = "n/a"; // not available
183 static const char na_wrap[] = "(n/a)"; // not available
184 static const char core[] = "core";
186 static const char bytes_str[] = " bytes";
187 static const char pixels_str[] = " pixels";
188 static const char images_str[] = " images";
190 static const char* bool_str[] = { "No", "Yes" };
191 static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" };
193 static const char* endian_str[] = { "Big-Endian", "Little-Endian" };
195 static const cl_device_type devtype[] = { 0,
196 CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU,
197 CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL };
199 const size_t devtype_count = ARRAY_SIZE(devtype);
200 /* number of actual device types, without ALL */
201 const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1;
203 static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" };
204 static const char* device_type_raw_str[] = { unk,
205 "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU",
206 "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL"
209 static const char* partition_type_str[] = {
210 none, "equally", "by counts", "by affinity domain", "by names (Intel)"
212 static const char* partition_type_raw_str[] = {
214 "CL_DEVICE_PARTITION_EQUALLY_EXT",
215 "CL_DEVICE_PARTITION_BY_COUNTS_EXT",
216 "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT",
217 "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT"
220 static const char* atomic_cap_str[] = {
221 "relaxed", "acquire/release", "sequentially-consistent",
222 "work-item scope", "work-group scope", "device scope", "all-devices scope"
224 static const char* atomic_cap_raw_str[] = {
225 "CL_DEVICE_ATOMIC_ORDER_RELAXED",
226 "CL_DEVICE_ATOMIC_ORDER_ACQ_REL",
227 "CL_DEVICE_ATOMIC_ORDER_SEQ_CST",
228 "CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM",
229 "CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP",
230 "CL_DEVICE_ATOMIC_SCOPE_DEVICE",
231 "CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES"
233 const size_t atomic_cap_count = ARRAY_SIZE(atomic_cap_str);
235 static const char *device_enqueue_cap_str[] = {
236 "supported", "replaceable default queue"
239 static const char *device_enqueue_cap_raw_str[] = {
240 "CL_DEVICE_QUEUE_SUPPORTED",
241 "CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT"
243 const size_t device_enqueue_cap_count = ARRAY_SIZE(atomic_cap_str);
246 static const char numa[] = "NUMA";
247 static const char l1cache[] = "L1 cache";
248 static const char l2cache[] = "L2 cache";
249 static const char l3cache[] = "L3 cache";
250 static const char l4cache[] = "L4 cache";
252 static const char* affinity_domain_str[] = {
253 numa, l4cache, l3cache, l2cache, l1cache, "next partitionable"
256 static const char* affinity_domain_ext_str[] = {
257 numa, l4cache, l3cache, l2cache, l1cache, "next fissionable"
260 static const char* affinity_domain_raw_str[] = {
261 "CL_DEVICE_AFFINITY_DOMAIN_NUMA",
262 "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE",
263 "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE",
264 "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE",
265 "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE",
266 "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE"
269 static const char* affinity_domain_raw_ext_str[] = {
270 "CL_AFFINITY_DOMAIN_NUMA_EXT",
271 "CL_AFFINITY_DOMAIN_L4_CACHE_EXT",
272 "CL_AFFINITY_DOMAIN_L3_CACHE_EXT",
273 "CL_AFFINITY_DOMAIN_L2_CACHE_EXT",
274 "CL_AFFINITY_DOMAIN_L1_CACHE_EXT",
275 "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT"
278 const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str);
280 static const char *terminate_capability_str[] = {
284 static const char *terminate_capability_raw_str[] = {
285 "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR"
288 const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str);
290 static const char* fp_conf_str[] = {
291 "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero",
292 "Round to infinity", "IEEE754-2008 fused multiply-add",
293 "Support is emulated in software",
294 "Correctly-rounded divide and sqrt operations"
297 static const char* fp_conf_raw_str[] = {
300 "CL_FP_ROUND_TO_NEAREST",
301 "CL_FP_ROUND_TO_ZERO",
302 "CL_FP_ROUND_TO_INF",
305 "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT"
308 const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str);
310 static const char* svm_cap_str[] = {
311 "Coarse-grained buffer sharing",
312 "Fine-grained buffer sharing",
313 "Fine-grained system sharing",
317 static const char* svm_cap_raw_str[] = {
318 "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER",
319 "CL_DEVICE_SVM_FINE_GRAIN_BUFFER",
320 "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM",
321 "CL_DEVICE_SVM_ATOMICS",
324 const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str);
326 /* SI suffixes for memory sizes. Note that in OpenCL most of them are
327 * passed via a cl_ulong, which at most can mode 16 EiB, but hey,
328 * let's be forward-thinking ;-)
330 static const char* memsfx[] = {
331 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
334 const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1;
336 static const char* lmem_type_str[] = { none, "Local", "Global" };
337 static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" };
338 static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" };
339 static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" };
341 static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" };
342 static const char* queue_prop_raw_str[] = {
343 "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
344 "CL_QUEUE_PROFILING_ENABLE"
347 const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str);
349 static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" };
350 static const char* execap_raw_str[] = {
352 "CL_EXEC_NATIVE_KERNEL"
355 const size_t execap_count = ARRAY_SIZE(execap_str);
357 static const char* sources[] = {
358 "#define GWO(type) global type* restrict\n",
359 "#define GRO(type) global const type* restrict\n",
360 "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n",
361 "#define _KRN(T, N) kernel void sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n",
362 "#define KRN(N) _KRN(float, N)\n",
363 "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n",
366 const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline)
368 return output->mode == CLINFO_HUMAN ?
369 (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") :
370 (these_are_offline ? "#OFFDEVICES" : "#DEVICES");
373 const char *not_specified(const struct opt_out *output)
375 return output->mode == CLINFO_HUMAN ?
379 const char *no_plat(const struct opt_out *output)
381 return output->mode == CLINFO_HUMAN ?
383 "CL_INVALID_PLATFORM";
386 const char *invalid_dev_type(const struct opt_out *output)
388 return output->mode == CLINFO_HUMAN ?
389 "Invalid device type for platform" :
390 "CL_INVALID_DEVICE_TYPE";
393 const char *invalid_dev_value(const struct opt_out *output)
395 return output->mode == CLINFO_HUMAN ?
396 "Invalid device type value for platform" :
400 const char *no_dev_found(const struct opt_out *output)
402 return output->mode == CLINFO_HUMAN ?
403 "No devices found in platform" :
404 "CL_DEVICE_NOT_FOUND";
407 const char *no_dev_avail(const struct opt_out *output)
409 return output->mode == CLINFO_HUMAN ?
410 "No devices available in platform" :
411 "CL_DEVICE_NOT_AVAILABLE";
414 /* OpenCL context interop names */
416 typedef struct cl_interop_name {
419 /* 5 because that's the largest we know of,
420 * 2 because it's HUMAN, RAW */
421 const char *value[5][2];
424 static const cl_interop_name cl_interop_names[] = {
425 { /* cl_khr_gl_sharing */
427 CL_CGL_SHAREGROUP_KHR,
429 { "GL", "CL_GL_CONTEXT_KHR" },
430 { "EGL", "CL_EGL_DISPALY_KHR" },
431 { "GLX", "CL_GLX_DISPLAY_KHR" },
432 { "WGL", "CL_WGL_HDC_KHR" },
433 { "CGL", "CL_CGL_SHAREGROUP_KHR" }
436 { /* cl_khr_dx9_media_sharing */
437 CL_CONTEXT_ADAPTER_D3D9_KHR,
438 CL_CONTEXT_ADAPTER_DXVA_KHR,
440 { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" },
441 { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" },
442 { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" }
445 { /* cl_khr_d3d10_sharing */
446 CL_CONTEXT_D3D10_DEVICE_KHR,
447 CL_CONTEXT_D3D10_DEVICE_KHR,
449 { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" }
452 { /* cl_khr_d3d11_sharing */
453 CL_CONTEXT_D3D11_DEVICE_KHR,
454 CL_CONTEXT_D3D11_DEVICE_KHR,
456 { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" }
459 /* cl_intel_dx9_media_sharing is split in two because the allowed values are not consecutive */
460 { /* cl_intel_dx9_media_sharing part 1 */
461 CL_CONTEXT_D3D9_DEVICE_INTEL,
462 CL_CONTEXT_D3D9_DEVICE_INTEL,
464 { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" }
467 { /* cl_intel_dx9_media_sharing part 2 */
468 CL_CONTEXT_D3D9EX_DEVICE_INTEL,
469 CL_CONTEXT_DXVA_DEVICE_INTEL,
471 { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" },
472 { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" }
475 { /* cl_intel_va_api_media_sharing */
476 CL_CONTEXT_VA_API_DISPLAY_INTEL,
477 CL_CONTEXT_VA_API_DISPLAY_INTEL,
479 { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" }
484 const size_t num_known_interops = ARRAY_SIZE(cl_interop_names);
488 #define I0_STR "%-48s "
489 #define I1_STR " %-46s "
490 #define I2_STR " %-44s "
492 /* New line and a full padding */
493 static const char full_padding[] = "\n"
494 INDENT INDENT INDENT INDENT INDENT
495 INDENT INDENT INDENT INDENT INDENT
496 INDENT INDENT INDENT INDENT INDENT
497 INDENT INDENT INDENT INDENT INDENT
498 INDENT INDENT INDENT INDENT INDENT;
500 static const char empty_str[] = "";
501 static const char spc_str[] = " ";
502 static const char times_str[] = "x";
503 static const char comma_str[] = ", ";
504 static const char vbar_str[] = " | ";
506 const char *cur_sfx = empty_str;
508 /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version.
509 * Returns an unsigned integer in the form major*10 + minor
512 getOpenCLVersion(const char *version)
516 const char *from = version;
518 parse = strtol(from, &next, 10);
522 // skip the dot TODO should we actually check for the dot?
524 parse = strtol(from, &next, 10);
531 #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10)
533 /* OpenCL 3.0 introduced “proper” versioning, in the form of a major.minor.patch struct
534 * packed into a single cl_uint (type aliased to cl_version)
536 struct unpacked_cl_version {
542 struct unpacked_cl_version unpack_cl_version(cl_uint version)
544 struct unpacked_cl_version ret;
545 ret.major = (version >> 22);
546 ret.minor = (version >> 12) & ((1<<10)-1);
547 ret.patch = version & ((1<<12)-1);
551 void strbuf_version(const char *what, struct _strbuf *str, cl_uint version)
553 struct unpacked_cl_version u = unpack_cl_version(version);
554 strbuf_append(what, str, " (%" PRIu32 ".%" PRIu32 ".%" PRIu32 ")",
555 u.major, u.minor, u.patch);
558 void strbuf_name_version(const char *what, struct _strbuf *str, const cl_name_version *ext, size_t num_exts,
559 const struct opt_out *output)
561 realloc_strbuf(str, num_exts*(CL_NAME_VERSION_MAX_NAME_SIZE + 128), "extension versions");
562 set_separator(output->mode == CLINFO_HUMAN ? full_padding : spc_str);
563 for (size_t i = 0; i < num_exts; ++i) {
564 const cl_name_version *e = ext + i;
565 if (i > 0) strbuf_append_str(what, str, sep);
566 if (output->mode == CLINFO_HUMAN) {
567 struct unpacked_cl_version u = unpack_cl_version(e->version);
568 strbuf_append(what, str, "%-65s%#8" PRIx32 " (%d.%d.%d)",
569 e->name, e->version, u.major, u.minor, u.patch);
571 strbuf_append(what, str, "%s:%#" PRIx32, e->name, e->version);
576 /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero,
577 * affixing cur_sfx */
579 void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err)
581 printf("%s" I1_STR "%s%s\n",
583 (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf),
584 err ? empty_str : cur_sfx);
588 platform_info_str(struct platform_info_ret *ret,
589 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
590 const struct opt_out* UNUSED(output))
592 GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat);
596 platform_info_ulong(struct platform_info_ret *ret,
597 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
598 const struct opt_out *output)
600 ret->err = REPORT_ERROR_LOC(ret,
601 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL),
603 CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat);
604 strbuf_append(loc->pname, &ret->str, "%" PRIu64, ret->value.u64);
608 platform_info_sz(struct platform_info_ret *ret,
609 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
610 const struct opt_out *output)
612 ret->err = REPORT_ERROR_LOC(ret,
613 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL),
615 CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat);
616 strbuf_append(loc->pname, &ret->str, "%" PRIuS, ret->value.s);
620 platform_info_version(struct platform_info_ret *ret,
621 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
622 const struct opt_out *output)
624 ret->err = REPORT_ERROR_LOC(ret,
625 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u32), &ret->value.u32, NULL),
627 CHECK_SIZE(ret, loc, ret->value.u32, clGetPlatformInfo, loc->plat, loc->param.plat);
628 strbuf_append(loc->pname, &ret->str, "%#" PRIx32, ret->value.u32);
629 if (output->mode == CLINFO_HUMAN) {
630 strbuf_version(loc->pname, &ret->str, ret->value.u32);
635 platform_info_ext_version(struct platform_info_ret *ret,
636 const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
637 const struct opt_out *output)
639 cl_name_version *ext = NULL;
641 ret->err = REPORT_ERROR_LOC(ret,
642 clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz),
645 REALLOC(ext, nusz, loc->sname);
646 ret->err = REPORT_ERROR_LOC(ret,
647 clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL),
651 size_t num_exts = nusz / sizeof(cl_name_version);
652 strbuf_name_version(loc->pname, &ret->str, ext, num_exts, output);
657 struct platform_info_traits {
658 cl_platform_info param; // CL_PLATFORM_*
659 const char *sname; // "CL_PLATFORM_*"
660 const char *pname; // "Platform *"
661 const char *sfx; // suffix for the output in non-raw mode
662 /* pointer to function that retrieves the parameter */
663 void (*show_func)(struct platform_info_ret *,
664 const struct info_loc *, const struct platform_info_checks *,
665 const struct opt_out *);
666 /* pointer to function that checks if the parameter should be retrieved */
667 cl_bool (*check_func)(const struct platform_info_checks *);
670 cl_bool khr_icd_p(const struct platform_info_checks *chk)
672 return chk->has_khr_icd;
675 cl_bool plat_is_12(const struct platform_info_checks *chk)
677 return !(chk->plat_version < 12);
680 cl_bool plat_is_20(const struct platform_info_checks *chk)
682 return !(chk->plat_version < 20);
685 cl_bool plat_is_21(const struct platform_info_checks *chk)
687 return !(chk->plat_version < 21);
690 cl_bool plat_is_30(const struct platform_info_checks *chk)
692 return !(chk->plat_version < 30);
695 cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk)
697 return chk->has_amd_object_metadata;
700 cl_bool plat_has_ext_ver(const struct platform_info_checks *chk)
702 return plat_is_30(chk) || chk->has_extended_versioning;
706 #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr }
707 #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL }
708 struct platform_info_traits pinfo_traits[] = {
709 PINFO(CL_PLATFORM_NAME, "Name", NULL, str),
710 PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str),
711 PINFO(CL_PLATFORM_VERSION, "Version", NULL, str),
712 PINFO_COND(CL_PLATFORM_NUMERIC_VERSION, "Numeric Version", NULL, version, plat_has_ext_ver),
713 PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str),
714 PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str),
715 PINFO_COND(CL_PLATFORM_EXTENSIONS_WITH_VERSION, "Extensions with Version", NULL, ext_version, plat_has_ext_ver),
716 PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata),
717 PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21),
718 PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p)
721 /* Collect (and optionally show) information on a specific platform,
722 * initializing relevant arrays and optionally showing the collected
726 gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output)
730 struct platform_data *pdata = plist->pdata + p;
731 struct platform_info_checks *pinfo_checks = plist->platform_checks + p;
732 struct platform_info_ret ret;
735 pinfo_checks->plat_version = 10;
737 INIT_RET(ret, "platform");
738 reset_loc(&loc, __func__);
739 loc.plat = plist->platform[p];
741 for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) {
742 const struct platform_info_traits *traits = pinfo_traits + loc.line;
744 /* checked is true if there was no condition to check for, or if the
745 * condition was satisfied
747 int checked = !(traits->check_func && !traits->check_func(pinfo_checks));
749 if (output->cond == COND_PROP_CHECK && !checked)
752 loc.sname = traits->sname;
753 loc.pname = (output->mode == CLINFO_HUMAN ?
754 traits->pname : traits->sname);
755 loc.param.plat = traits->param;
757 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
759 reset_strbuf(&ret.str);
760 reset_strbuf(&ret.err_str);
761 traits->show_func(&ret, &loc, pinfo_checks, output);
763 /* The property is skipped if this was a conditional property,
764 * unsatisfied, there was an error retrieving it and cond_prop_mode is not
767 if (ret.err && !checked && output->cond != COND_PROP_SHOW)
770 /* The property gets printed if we are not just listing,
771 * or if the user requested a property and this one matches.
772 * Otherwise, we're just gathering information */
773 cl_bool requested = (output->prop && strstr(loc.sname, output->prop) != NULL);
774 if (output->detailed || requested) {
775 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
781 /* post-processing */
783 switch (traits->param) {
784 case CL_PLATFORM_NAME:
785 /* Store name for future reference */
786 len = strlen(ret.str.buf);
787 ALLOC(pdata->pname, len+1, "platform name copy");
788 /* memcpy instead of strncpy since we already have the len
789 * and memcpy is possibly more optimized */
790 memcpy(pdata->pname, ret.str.buf, len);
791 pdata->pname[len] = '\0';
793 case CL_PLATFORM_VERSION:
794 /* compute numeric value for OpenCL version */
795 pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7);
797 case CL_PLATFORM_EXTENSIONS:
798 pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd");
799 pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata");
800 pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices");
802 case CL_PLATFORM_ICD_SUFFIX_KHR:
803 /* Store ICD suffix for future reference */
804 len = strlen(ret.str.buf);
805 ALLOC(pdata->sname, len+1, "platform ICD suffix copy");
806 /* memcpy instead of strncpy since we already have the len
807 * and memcpy is possibly more optimized */
808 memcpy(pdata->sname, ret.str.buf, len);
809 pdata->sname[len] = '\0';
817 if (pinfo_checks->plat_version > plist->max_plat_version)
818 plist->max_plat_version = pinfo_checks->plat_version;
820 /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */
823 ALLOC(pdata->sname, SNAME_MAX+1, "platform symbolic name");
824 snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p);
827 len = (cl_int)strlen(pdata->sname);
828 if (len > plist->max_sname_len)
829 plist->max_sname_len = len;
831 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs);
832 if (ret.err == CL_DEVICE_NOT_FOUND)
835 CHECK_ERROR(ret.err, "number of devices");
836 plist->ndevs_total += pdata->ndevs;
837 plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0;
838 plist_devs_reserve(plist, plist->ndevs_total);
840 if (pdata->ndevs > 0) {
841 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL,
843 plist->all_devs + plist->dev_offset[p], NULL);
846 if (pdata->ndevs > plist->max_devs)
847 plist->max_devs = pdata->ndevs;
853 * Device properties/extensions used in traits checks, and relevant functions
854 * TODO add version control for 3.0+ platforms
857 struct device_info_checks {
858 const struct platform_info_checks *pinfo_checks;
859 cl_device_type devtype;
860 cl_device_mem_cache_type cachetype;
861 cl_device_local_mem_type lmemtype;
862 cl_bool image_support;
863 cl_bool compiler_available;
868 char has_amd_svm[11];
869 char has_arm_svm[29];
870 char has_arm_core_id[15];
871 char has_arm_job_slots[26];
872 char has_fission[22];
873 char has_atomic_counters[26];
874 char has_image2d_buffer[27];
875 char has_il_program[18];
876 char has_intel_local_thread[30];
877 char has_intel_AME[36];
878 char has_intel_AVC_ME[43];
879 char has_intel_planar_yuv[20];
880 char has_intel_required_subgroup_size[32];
881 char has_altera_dev_temp[29];
884 char has_qcom_ext_host_ptr[21];
885 char has_simultaneous_sharing[30];
886 char has_subgroup_named_barrier[30];
887 char has_terminate_context[25];
888 char has_extended_versioning[27];
889 char has_cxx_for_opencl[22];
890 char has_device_uuid[19];
892 cl_uint p2p_num_devs;
895 #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \
897 return !!(chk->has_##ext[0]); \
900 DEFINE_EXT_CHECK(half)
901 DEFINE_EXT_CHECK(double)
903 DEFINE_EXT_CHECK(amd)
904 DEFINE_EXT_CHECK(amd_svm)
905 DEFINE_EXT_CHECK(arm_svm)
906 DEFINE_EXT_CHECK(arm_core_id)
907 DEFINE_EXT_CHECK(arm_job_slots)
908 DEFINE_EXT_CHECK(fission)
909 DEFINE_EXT_CHECK(atomic_counters)
910 DEFINE_EXT_CHECK(image2d_buffer)
911 DEFINE_EXT_CHECK(il_program)
912 DEFINE_EXT_CHECK(intel_local_thread)
913 DEFINE_EXT_CHECK(intel_AME)
914 DEFINE_EXT_CHECK(intel_AVC_ME)
915 DEFINE_EXT_CHECK(intel_planar_yuv)
916 DEFINE_EXT_CHECK(intel_required_subgroup_size)
917 DEFINE_EXT_CHECK(altera_dev_temp)
918 DEFINE_EXT_CHECK(p2p)
919 DEFINE_EXT_CHECK(spir)
920 DEFINE_EXT_CHECK(qcom_ext_host_ptr)
921 DEFINE_EXT_CHECK(simultaneous_sharing)
922 DEFINE_EXT_CHECK(subgroup_named_barrier)
923 DEFINE_EXT_CHECK(terminate_context)
924 DEFINE_EXT_CHECK(extended_versioning)
925 DEFINE_EXT_CHECK(cxx_for_opencl)
926 DEFINE_EXT_CHECK(device_uuid)
928 /* In the version checks we negate the opposite conditions
929 * instead of double-negating the actual condition
932 // device supports 1.1
933 cl_bool dev_is_11(const struct device_info_checks *chk)
935 return !(chk->dev_version < 11);
939 // device supports 1.2
940 cl_bool dev_is_12(const struct device_info_checks *chk)
942 return !(chk->dev_version < 12);
945 // device supports 2.0
946 cl_bool dev_is_20(const struct device_info_checks *chk)
948 return !(chk->dev_version < 20);
951 // device supports 2.1
952 cl_bool dev_is_21(const struct device_info_checks *chk)
954 return !(chk->dev_version < 21);
957 // device does not support 2.0
958 cl_bool dev_not_20(const struct device_info_checks *chk)
960 return !(chk->dev_version >= 20);
963 // device supports 3.0
964 cl_bool dev_is_30(const struct device_info_checks *chk)
966 return !(chk->dev_version < 30);
969 // device has extended versioning: 3.0 or has_extended_versioning
970 cl_bool dev_has_ext_ver(const struct device_info_checks *chk)
972 return dev_is_30(chk) || dev_has_extended_versioning(chk);
975 cl_bool dev_is_gpu(const struct device_info_checks *chk)
977 return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
980 cl_bool dev_is_gpu_amd(const struct device_info_checks *chk)
982 return dev_is_gpu(chk) && dev_has_amd(chk);
985 /* Device supports cl_amd_device_attribute_query v4 */
986 cl_bool dev_has_amd_v4(const struct device_info_checks *chk)
988 /* We don't actually have a criterion to check if the device
989 * supports a specific version of an extension, so for the time
990 * being rely on them being GPU devices with cl_amd_device_attribute_query
991 * and the platform supporting OpenCL 2.0 or later
992 * TODO FIXME tune criteria
994 return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks);
997 /* Device supports cl_arm_core_id v2 */
998 cl_bool dev_has_arm_core_id_v2(const struct device_info_checks *chk)
1000 /* We don't actually have a criterion to check if the device
1001 * supports a specific version of an extension, so for the time
1002 * being rely on them having cl_arm_core_id and the platform
1003 * supporting OpenCL 1.2 or later
1004 * TODO FIXME tune criteria
1006 return dev_has_arm_core_id(chk) && plat_is_12(chk->pinfo_checks);
1009 cl_bool dev_has_svm(const struct device_info_checks *chk)
1011 return dev_is_20(chk) || dev_has_amd_svm(chk);
1014 cl_bool dev_has_partition(const struct device_info_checks *chk)
1016 return dev_is_12(chk) || dev_has_fission(chk);
1019 cl_bool dev_has_cache(const struct device_info_checks *chk)
1021 return chk->cachetype != CL_NONE;
1024 cl_bool dev_has_lmem(const struct device_info_checks *chk)
1026 return chk->lmemtype != CL_NONE;
1029 cl_bool dev_has_il(const struct device_info_checks *chk)
1031 return dev_is_21(chk) || dev_has_il_program(chk);
1034 cl_bool dev_has_images(const struct device_info_checks *chk)
1036 return chk->image_support;
1039 cl_bool dev_has_images_12(const struct device_info_checks *chk)
1041 return dev_has_images(chk) && dev_is_12(chk);
1044 cl_bool dev_has_images_20(const struct device_info_checks *chk)
1046 return dev_has_images(chk) && dev_is_20(chk);
1049 cl_bool dev_has_compiler(const struct device_info_checks *chk)
1051 return chk->compiler_available;
1054 cl_bool dev_has_compiler_11(const struct device_info_checks *chk)
1056 return dev_is_11(chk) && dev_has_compiler(chk);
1059 cl_bool dev_has_p2p_devs(const struct device_info_checks *chk)
1061 return dev_has_p2p(chk) && chk->p2p_num_devs > 0;
1065 void identify_device_extensions(const char *extensions, struct device_info_checks *chk)
1067 #define _HAS_EXT(ext) (strstr(extensions, ext))
1068 #define CPY_EXT(what, ext) do { \
1069 strncpy(chk->has_##what, has+1, sizeof(ext)); \
1070 chk->has_##what[sizeof(ext)-1] = '\0'; \
1072 #define CHECK_EXT(what, ext) do { \
1073 has = _HAS_EXT(" " #ext " "); \
1074 if (has) CPY_EXT(what, #ext); \
1078 CHECK_EXT(half, cl_khr_fp16);
1079 CHECK_EXT(spir, cl_khr_spir);
1080 CHECK_EXT(double, cl_khr_fp64);
1081 if (!dev_has_double(chk))
1082 CHECK_EXT(double, cl_amd_fp64);
1083 if (!dev_has_double(chk))
1084 CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
1085 CHECK_EXT(nv, cl_nv_device_attribute_query);
1086 CHECK_EXT(amd, cl_amd_device_attribute_query);
1087 CHECK_EXT(amd_svm, cl_amd_svm);
1088 CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory);
1089 CHECK_EXT(arm_core_id, cl_arm_core_id);
1090 CHECK_EXT(arm_job_slots, cl_arm_job_slot_selection);
1091 CHECK_EXT(fission, cl_ext_device_fission);
1092 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
1093 if (dev_has_atomic_counters(chk))
1094 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
1095 CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
1096 CHECK_EXT(il_program, cl_khr_il_program);
1097 CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
1098 CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation);
1099 CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation);
1100 CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv);
1101 CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size);
1102 CHECK_EXT(altera_dev_temp, cl_altera_device_temperature);
1103 CHECK_EXT(p2p, cl_amd_copy_buffer_p2p);
1104 CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr);
1105 CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing);
1106 CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier);
1107 CHECK_EXT(terminate_context, cl_khr_terminate_context);
1108 CHECK_EXT(extended_versioning, cl_khr_extended_versioning);
1109 CHECK_EXT(cxx_for_opencl, cl_ext_cxx_for_opencl);
1110 CHECK_EXT(device_uuid, cl_khr_device_uuid);
1115 * Device info print functions
1118 #define _GET_VAL(ret, loc, val) \
1119 ret->err = REPORT_ERROR_LOC(ret, \
1120 clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \
1122 CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev);
1124 #define _GET_VAL_VALUES(ret, loc) \
1125 REALLOC(val, numval, loc->sname); \
1126 ret->err = REPORT_ERROR_LOC(ret, \
1127 clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \
1129 if (ret->err) { free(val); val = NULL; } \
1131 #define _GET_VAL_ARRAY(ret, loc) \
1132 ret->err = REPORT_ERROR_LOC(ret, \
1133 clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \
1134 loc, "get number of %s"); \
1135 numval = szval/sizeof(*val); \
1137 _GET_VAL_VALUES(ret, loc) \
1140 #define GET_VAL(ret, loc, field) do { \
1141 _GET_VAL(ret, (loc), ret->value.field) \
1144 #define GET_VAL_ARRAY(ret, loc) do { \
1145 _GET_VAL_ARRAY(ret, (loc)) \
1148 #define DEFINE_DEVINFO_FETCH(type, field) \
1150 device_fetch_##type(struct device_info_ret *ret, \
1151 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \
1152 const struct opt_out *output) \
1154 GET_VAL(ret, loc, field); \
1155 return ret->value.field; \
1158 DEFINE_DEVINFO_FETCH(size_t, s)
1159 DEFINE_DEVINFO_FETCH(cl_bool, b)
1160 DEFINE_DEVINFO_FETCH(cl_uint, u32)
1161 DEFINE_DEVINFO_FETCH(cl_version, u32)
1162 DEFINE_DEVINFO_FETCH(cl_ulong, u64)
1163 DEFINE_DEVINFO_FETCH(cl_bitfield, u64)
1164 DEFINE_DEVINFO_FETCH(cl_device_type, devtype)
1165 DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype)
1166 DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype)
1167 DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo)
1168 DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain)
1169 DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig)
1170 DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop)
1171 DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap)
1172 DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap)
1173 DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap)
1175 #define DEV_FETCH_LOC(type, var, loc) \
1176 type var = device_fetch_##type(ret, loc, chk, output)
1177 #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc)
1179 #define FMT_VAL(loc, ret, fmt, val) if (!ret->err) strbuf_append(loc->pname, &ret->str, fmt, val)
1181 #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \
1183 device_info_##how(struct device_info_ret *ret, \
1184 const struct info_loc *loc, const struct device_info_checks* chk, \
1185 const struct opt_out *output) \
1187 DEV_FETCH(type, val); \
1188 if (!ret->err) FMT_VAL(loc, ret, fmt, val); \
1191 DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32)
1192 DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, "%#" PRIx32)
1193 DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64)
1194 DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS)
1197 device_info_str(struct device_info_ret *ret,
1198 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1199 const struct opt_out* UNUSED(output))
1201 GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev);
1205 device_info_bool(struct device_info_ret *ret,
1206 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1207 const struct opt_out *output)
1209 DEV_FETCH(cl_bool, val);
1211 const char * const * str = (output->mode == CLINFO_HUMAN ?
1212 bool_str : bool_raw_str);
1213 strbuf_printf(&ret->str, "%s", str[val]);
1218 device_info_bits(struct device_info_ret *ret,
1219 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1220 const struct opt_out *output)
1222 DEV_FETCH(cl_uint, val);
1224 strbuf_printf(&ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8);
1228 device_info_version(struct device_info_ret *ret,
1229 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1230 const struct opt_out *output)
1232 DEV_FETCH(cl_version, val);
1234 strbuf_append(loc->pname, &ret->str, "%#" PRIx32, val);
1235 if (output->mode == CLINFO_HUMAN) {
1236 strbuf_version(loc->pname, &ret->str, val);
1242 device_info_ext_version(struct device_info_ret *ret,
1243 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1244 const struct opt_out *output)
1246 cl_name_version *val = NULL;
1247 size_t szval = 0, numval = 0;
1248 GET_VAL_ARRAY(ret, loc);
1250 strbuf_name_version(loc->pname, &ret->str, val, numval, output);
1255 size_t strbuf_mem(struct _strbuf *str, cl_ulong val, size_t szval)
1257 double dbl = (double)val;
1259 while (dbl > 1024 && sfx < memsfx_end) {
1263 return sprintf(str->buf + szval, " (%.4lg%s)",
1268 device_info_mem(struct device_info_ret *ret,
1269 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1270 const struct opt_out *output)
1272 DEV_FETCH(cl_ulong, val);
1274 size_t szval = strbuf_printf(&ret->str, "%" PRIu64, val);
1275 if (output->mode == CLINFO_HUMAN && val > 1024)
1276 strbuf_mem(&ret->str, val, szval);
1281 device_info_mem_int(struct device_info_ret *ret,
1282 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1283 const struct opt_out *output)
1285 DEV_FETCH(cl_uint, val);
1287 size_t szval = strbuf_printf(&ret->str, "%" PRIu32, val);
1288 if (output->mode == CLINFO_HUMAN && val > 1024)
1289 strbuf_mem(&ret->str, val, szval);
1294 device_info_mem_sz(struct device_info_ret *ret,
1295 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1296 const struct opt_out *output)
1298 DEV_FETCH(size_t, val);
1300 size_t szval = strbuf_printf(&ret->str, "%" PRIuS, val);
1301 if (output->mode == CLINFO_HUMAN && val > 1024)
1302 strbuf_mem(&ret->str, val, szval);
1307 device_info_free_mem_amd(struct device_info_ret *ret,
1308 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1309 const struct opt_out *output)
1311 // Apparently, with the introduction of ROCm, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
1312 // returns 1 or 2 values depending on how it's called: if it's called with a
1313 // szval < 2*sizeof(size_t), it will only return 1 value, otherwise it will return 2.
1314 // At least now these are documented in the ROCm source code: the first value
1315 // is the total amount of free memory, and the second is the size of the largest
1316 // free block. So let's just manually ask for both values
1318 size_t numval = 2, szval = numval*sizeof(*val);
1319 _GET_VAL_VALUES(ret, loc);
1323 for (cursor = 0; cursor < numval; ++cursor) {
1325 ret->str.buf[szval] = ' ';
1328 szval += sprintf(ret->str.buf + szval, "%" PRIuS, val[cursor]);
1329 if (output->mode == CLINFO_HUMAN)
1330 szval += strbuf_mem(&ret->str, val[cursor]*UINT64_C(1024), szval);
1331 ret->value.u64v.s[cursor] = val[cursor];
1338 device_info_time_offset(struct device_info_ret *ret,
1339 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1340 const struct opt_out *output)
1342 GET_VAL(ret, loc, u64);
1345 time_t time = ret->value.u64/UINT64_C(1000000000);
1346 szval += strbuf_printf(&ret->str, "%" PRIu64 "ns (", ret->value.u64);
1347 szval += bufcpy(&ret->str, szval, ctime(&time));
1348 /* overwrite ctime's newline with the closing parenthesis */
1349 if (szval < ret->str.sz)
1350 ret->str.buf[szval - 1] = ')';
1355 device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep,
1356 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1357 const struct opt_out *output)
1360 size_t szval = 0, numval = 0;
1361 GET_VAL_ARRAY(ret, loc);
1364 set_separator(output->mode == CLINFO_HUMAN ? human_sep : spc_str);
1366 for (counter = 0; counter < numval; ++counter) {
1367 add_separator(&ret->str, &szval);
1368 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%" PRIuS, val[counter]);
1369 if (szval >= ret->str.sz) {
1370 trunc_strbuf(&ret->str);
1374 // TODO: ret->value.??? = val;
1380 device_info_szptr_times(struct device_info_ret *ret,
1381 const struct info_loc *loc, const struct device_info_checks* chk,
1382 const struct opt_out *output)
1384 device_info_szptr_sep(ret, times_str, loc, chk, output);
1388 device_info_szptr_comma(struct device_info_ret *ret,
1389 const struct info_loc *loc, const struct device_info_checks* chk,
1390 const struct opt_out *output)
1392 device_info_szptr_sep(ret, comma_str, loc, chk, output);
1396 getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz,
1397 const struct opt_out* UNUSED(output))
1401 cl_context_properties ctxpft[] = {
1402 CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat,
1405 cl_context ctx = NULL;
1406 cl_program prg = NULL;
1407 cl_kernel krn = NULL;
1409 ret->err = CL_SUCCESS;
1411 ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err);
1412 if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out;
1413 prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err);
1414 if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out;
1415 ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL);
1416 log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program");
1418 /* for a program build failure, dump the log to stderr before bailing */
1419 if (log_err == CL_BUILD_PROGRAM_FAILURE) {
1420 struct _strbuf logbuf;
1421 init_strbuf(&logbuf, "program build log");
1422 GET_STRING(&logbuf, ret->err,
1423 clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev);
1424 if (ret->err == CL_SUCCESS) {
1427 fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr);
1428 fputs(logbuf.buf, stderr);
1431 free_strbuf(&logbuf);
1436 for (cursor = 0; cursor < wgm_sz; ++cursor) {
1437 strbuf_printf(&ret->str, "sum%u", 1<<cursor);
1439 ret->str.buf[3] = 0; // scalar kernel is called 'sum'
1440 krn = clCreateKernel(prg, ret->str.buf, &ret->err);
1441 if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out;
1442 ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1443 sizeof(*wgm), wgm + cursor, NULL);
1444 if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out;
1445 clReleaseKernel(krn);
1451 clReleaseKernel(krn);
1453 clReleaseProgram(prg);
1455 clReleaseContext(ctx);
1460 device_info_wg(struct device_info_ret *ret,
1461 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1462 const struct opt_out *output)
1464 /* preferred workgroup size multiple for each kernel
1465 * have not found a platform where the WG multiple changes,
1466 * but keep this flexible (this can grow up to 5)
1468 #define NUM_KERNELS 1
1469 size_t wgm[NUM_KERNELS] = {0};
1471 getWGsizes(ret, loc, wgm, NUM_KERNELS, output);
1473 strbuf_printf(&ret->str, "%" PRIuS, wgm[0]);
1475 ret->value.s = wgm[0];
1479 device_info_img_sz_2d(struct device_info_ret *ret,
1480 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1481 const struct opt_out *output)
1483 struct info_loc loc2 = *loc;
1484 size_t width = 0, height = 0;
1485 _GET_VAL(ret, loc, height); /* HEIGHT */
1487 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH);
1488 _GET_VAL(ret, &loc2, width);
1490 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1493 ret->value.u32v.s[0] = width;
1494 ret->value.u32v.s[1] = height;
1498 device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret,
1499 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1500 const struct opt_out *output)
1502 struct info_loc loc2 = *loc;
1503 size_t width = 0, height = 0;
1504 _GET_VAL(ret, loc, height); /* HEIGHT */
1506 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL);
1507 _GET_VAL(ret, &loc2, width);
1509 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1512 ret->value.u32v.s[0] = width;
1513 ret->value.u32v.s[1] = height;
1518 device_info_img_sz_3d(struct device_info_ret *ret,
1519 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1520 const struct opt_out *output)
1522 struct info_loc loc2 = *loc;
1523 size_t width = 0, height = 0, depth = 0;
1524 _GET_VAL(ret, loc, height); /* HEIGHT */
1526 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH);
1527 _GET_VAL(ret, &loc2, width);
1529 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH);
1530 _GET_VAL(ret, &loc2, depth);
1532 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS,
1533 width, height, depth);
1537 ret->value.u32v.s[0] = width;
1538 ret->value.u32v.s[1] = height;
1539 ret->value.u32v.s[2] = depth;
1544 device_info_devtype(struct device_info_ret *ret,
1545 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1546 const struct opt_out *output)
1548 DEV_FETCH(cl_device_type, val);
1550 /* iterate over device type strings, appending their textual form
1552 cl_uint i = (cl_uint)actual_devtype_count;
1553 const char * const *devstr = (output->mode == CLINFO_HUMAN ?
1554 device_type_str : device_type_raw_str);
1556 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1557 for (; i > 0; --i) {
1558 /* assemble CL_DEVICE_TYPE_* from index i */
1559 cl_device_type cur = (cl_device_type)(1) << (i-1);
1561 /* match: add separator if not first match */
1562 add_separator(&ret->str, &szval);
1563 szval += bufcpy(&ret->str, szval, devstr[i]);
1566 /* check for extra bits */
1567 if (szval < ret->str.sz) {
1568 cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1;
1569 cl_device_type extra = val & ~known_mask;
1571 add_separator(&ret->str, &szval);
1572 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1579 device_info_cachetype(struct device_info_ret *ret,
1580 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1581 const struct opt_out *output)
1583 DEV_FETCH(cl_device_mem_cache_type, val);
1585 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1586 cache_type_str : cache_type_raw_str);
1587 bufcpy(&ret->str, 0, ar[val]);
1592 device_info_lmemtype(struct device_info_ret *ret,
1593 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1594 const struct opt_out *output)
1596 DEV_FETCH(cl_device_local_mem_type, val);
1598 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1599 lmem_type_str : lmem_type_raw_str);
1600 bufcpy(&ret->str, 0, ar[val]);
1602 ret->value.lmemtype = val;
1606 device_info_atomic_caps(struct device_info_ret *ret,
1607 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1608 const struct opt_out *output)
1610 DEV_FETCH(cl_bitfield, val);
1614 const char * const * capstr = (output->mode == CLINFO_HUMAN ?
1615 atomic_cap_str : atomic_cap_raw_str);
1616 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1617 for (i = 0; i < atomic_cap_count; ++i) {
1618 if (val & (1 << i)) {
1619 add_separator(&ret->str, &szval);
1620 szval += bufcpy(&ret->str, szval, capstr[i]);
1622 if (szval >= ret->str.sz)
1625 /* check for extra bits */
1626 if (szval < ret->str.sz) {
1627 cl_bitfield known_mask = ((cl_bitfield)(1) << atomic_cap_count) - 1;
1628 cl_bitfield extra = val & ~known_mask;
1630 add_separator(&ret->str, &szval);
1631 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1638 device_info_device_enqueue_caps(struct device_info_ret *ret,
1639 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1640 const struct opt_out *output)
1642 DEV_FETCH(cl_bitfield, val);
1646 const char * const * capstr = (output->mode == CLINFO_HUMAN ?
1647 device_enqueue_cap_str : device_enqueue_cap_raw_str);
1648 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1649 for (i = 0; i < device_enqueue_cap_count; ++i) {
1650 if (val & (1 << i)) {
1651 add_separator(&ret->str, &szval);
1652 szval += bufcpy(&ret->str, szval, capstr[i]);
1654 if (szval >= ret->str.sz)
1657 /* check for extra bits */
1658 if (szval < ret->str.sz) {
1659 cl_bitfield known_mask = ((cl_bitfield)(1) << device_enqueue_cap_count) - 1;
1660 cl_bitfield extra = val & ~known_mask;
1662 add_separator(&ret->str, &szval);
1663 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1669 /* cl_arm_core_id */
1671 device_info_core_ids(struct device_info_ret *ret,
1672 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1673 const struct opt_out *output)
1675 DEV_FETCH(cl_ulong, val);
1678 /* The value is a bitfield where each set bit corresponds to a core ID
1679 * value that can be returned by the device-side function. We print them
1680 * here as ranges, such as 0-4, 8-12 */
1682 int range_start = -1;
1684 set_separator(empty_str);
1685 #define CORE_ID_END 64
1687 /* Find the start of the range */
1688 while ((cur_bit < CORE_ID_END) && !((val >> cur_bit) & 1))
1690 range_start = cur_bit++;
1692 /* Find the end of the range */
1693 while ((cur_bit < CORE_ID_END) && ((val >> cur_bit) & 1))
1696 /* print the range [range_start, cur_bit[ */
1697 if (range_start >= 0 && range_start < CORE_ID_END) {
1698 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1699 "%s%d", sep, range_start);
1700 if (cur_bit - range_start > 1)
1701 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1702 "-%d", cur_bit - 1);
1703 set_separator(comma_str);
1705 } while (cur_bit < CORE_ID_END);
1707 ret->value.u64 = val;
1710 /* cl_arm_job_slot_selection */
1712 device_info_job_slots(struct device_info_ret *ret,
1713 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1714 const struct opt_out *output)
1716 DEV_FETCH(cl_uint, val);
1719 /* The value is a bitfield where each set bit corresponds to an available job slot.
1720 * We print them here as ranges, such as 0-4, 8-12 */
1722 int range_start = -1;
1724 set_separator(empty_str);
1725 #define JOB_SLOT_END 32
1727 /* Find the start of the range */
1728 while ((cur_bit < JOB_SLOT_END) && !((val >> cur_bit) & 1))
1730 range_start = cur_bit++;
1732 /* Find the end of the range */
1733 while ((cur_bit < JOB_SLOT_END) && ((val >> cur_bit) & 1))
1736 /* print the range [range_start, cur_bit[ */
1737 if (range_start >= 0 && range_start < JOB_SLOT_END) {
1738 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1739 "%s%d", sep, range_start);
1740 if (cur_bit - range_start > 1)
1741 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1742 "-%d", cur_bit - 1);
1743 set_separator(comma_str);
1745 } while (cur_bit < JOB_SLOT_END);
1747 ret->value.u32 = val;
1750 /* stringify a cl_device_topology_amd */
1751 void devtopo_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo)
1753 switch (devtopo->raw.type) {
1757 case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD:
1758 strbuf_printf(&ret->str, "PCI-E, %02x:%02x.%u",
1759 (cl_uchar)(devtopo->pcie.bus),
1760 devtopo->pcie.device, devtopo->pcie.function);
1763 strbuf_printf(&ret->str, "<unknown (%u): %u %u %u %u %u>",
1765 devtopo->raw.data[0], devtopo->raw.data[1],
1766 devtopo->raw.data[2],
1767 devtopo->raw.data[3], devtopo->raw.data[4]);
1772 device_info_devtopo_amd(struct device_info_ret *ret,
1773 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1774 const struct opt_out *output)
1776 DEV_FETCH(cl_device_topology_amd, val);
1777 /* TODO how to do this in CLINFO_RAW mode */
1779 devtopo_str(ret, &val);
1783 /* we assemble a cl_device_topology_amd struct from the NVIDIA info */
1785 device_info_devtopo_nv(struct device_info_ret *ret,
1786 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1787 const struct opt_out *output)
1789 struct info_loc loc2 = *loc;
1790 cl_device_topology_amd devtopo;
1791 DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */
1793 devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
1794 devtopo.pcie.bus = val & 0xff;
1795 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV);
1796 _GET_VAL(ret, &loc2, val);
1799 devtopo.pcie.device = (val >> 3) & 0xff;
1800 devtopo.pcie.function = val & 7;
1801 devtopo_str(ret, &devtopo);
1803 ret->value.devtopo = devtopo;
1807 /* NVIDIA Compute Capability */
1809 device_info_cc_nv(struct device_info_ret *ret,
1810 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1811 const struct opt_out *output)
1813 struct info_loc loc2 = *loc;
1814 cl_uint major = 0, minor = 0;
1815 _GET_VAL(ret, loc, major); /* MAJOR */
1817 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
1818 _GET_VAL(ret, &loc2, minor);
1820 strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1823 ret->value.u32v.s[0] = major;
1824 ret->value.u32v.s[1] = minor;
1829 device_info_gfxip_amd(struct device_info_ret *ret,
1830 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1831 const struct opt_out *output)
1833 struct info_loc loc2 = *loc;
1834 cl_uint major = 0, minor = 0;
1835 _GET_VAL(ret, loc, major); /* MAJOR */
1837 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD);
1838 _GET_VAL(ret, &loc2, minor);
1840 strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1843 ret->value.u32v.s[0] = major;
1844 ret->value.u32v.s[1] = minor;
1848 /* Device Partition, CLINFO_HUMAN header */
1850 device_info_partition_header(struct device_info_ret *ret,
1851 const struct info_loc *UNUSED(loc), const struct device_info_checks *chk,
1852 const struct opt_out* UNUSED(output))
1854 cl_bool is_12 = dev_is_12(chk);
1855 cl_bool has_fission = dev_has_fission(chk);
1856 size_t szval = strbuf_printf(&ret->str, "(%s%s%s%s)",
1857 (is_12 ? core : empty_str),
1858 (is_12 && has_fission ? comma_str : empty_str),
1860 (!(is_12 || has_fission) ? na : empty_str));
1862 ret->err = CL_SUCCESS;
1864 if (szval >= ret->str.sz)
1865 trunc_strbuf(&ret->str);
1868 /* Device partition properties */
1870 device_info_partition_types(struct device_info_ret *ret,
1871 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1872 const struct opt_out *output)
1874 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1875 cl_device_partition_property *val = NULL;
1876 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1877 partition_type_str : partition_type_raw_str);
1879 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1881 GET_VAL_ARRAY(ret, loc);
1885 for (cursor = 0; cursor < numval; ++cursor) {
1888 /* add separator for values past the first */
1889 add_separator(&ret->str, &szval);
1891 switch (val[cursor]) {
1892 case 0: str_idx = 0; break;
1893 case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break;
1894 case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break;
1895 case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break;
1896 case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break;
1898 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIxPTR ")", val[cursor]);
1902 /* string length, minus _EXT */
1903 slen = strlen(ptstr[str_idx]);
1904 if (output->mode == CLINFO_RAW && str_idx > 0)
1906 szval += bufcpy_len(&ret->str, szval, ptstr[str_idx], slen);
1908 if (szval >= ret->str.sz) {
1909 trunc_strbuf(&ret->str);
1913 // TODO ret->value.??? = val
1919 device_info_partition_types_ext(struct device_info_ret *ret,
1920 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1921 const struct opt_out *output)
1923 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1924 cl_device_partition_property_ext *val = NULL;
1925 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1926 partition_type_str : partition_type_raw_str);
1928 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1930 GET_VAL_ARRAY(ret, loc);
1934 for (cursor = 0; cursor < numval; ++cursor) {
1937 /* add separator for values past the first */
1938 add_separator(&ret->str, &szval);
1940 switch (val[cursor]) {
1941 case 0: str_idx = 0; break;
1942 case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break;
1943 case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break;
1944 case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break;
1945 case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break;
1947 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIx64 ")", val[cursor]);
1952 slen = strlen(ptstr[str_idx]);
1953 strncpy(ret->str.buf + szval, ptstr[str_idx], slen);
1956 if (szval >= ret->str.sz) {
1957 trunc_strbuf(&ret->str);
1961 if (szval < ret->str.sz)
1962 ret->str.buf[szval] = '\0';
1963 // TODO ret->value.??? = val
1969 /* Device partition affinity domains */
1971 device_info_partition_affinities(struct device_info_ret *ret,
1972 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1973 const struct opt_out *output)
1975 DEV_FETCH(cl_device_affinity_domain, val);
1976 if (!ret->err && val) {
1977 /* iterate over affinity domain strings appending their textual form
1981 const char * const *affstr = (output->mode == CLINFO_HUMAN ?
1982 affinity_domain_str : affinity_domain_raw_str);
1983 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1984 for (i = 0; i < affinity_domain_count; ++i) {
1985 cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i;
1987 /* match: add separator if not first match */
1988 add_separator(&ret->str, &szval);
1989 szval += bufcpy(&ret->str, szval, affstr[i]);
1991 if (szval >= ret->str.sz)
1994 /* check for extra bits */
1995 if (szval < ret->str.sz) {
1996 cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1;
1997 cl_device_affinity_domain extra = val & ~known_mask;
1999 add_separator(&ret->str, &szval);
2000 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
2007 device_info_partition_affinities_ext(struct device_info_ret *ret,
2008 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2009 const struct opt_out *output)
2011 size_t numval = 0, szval = 0, cursor = 0, slen = 0;
2012 cl_device_partition_property_ext *val = NULL;
2013 const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
2014 affinity_domain_ext_str : affinity_domain_raw_ext_str);
2016 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2018 GET_VAL_ARRAY(ret, loc);
2022 for (cursor = 0; cursor < numval; ++cursor) {
2025 /* add separator for values past the first */
2026 add_separator(&ret->str, &szval);
2028 switch (val[cursor]) {
2029 case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break;
2030 case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break;
2031 case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break;
2032 case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break;
2033 case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break;
2034 case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break;
2036 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "<unknown> (%#" PRIx64 ")", val[cursor]);
2041 const char *str = ptstr[str_idx];
2043 strncpy(ret->str.buf + szval, str, slen);
2046 if (szval >= ret->str.sz) {
2047 trunc_strbuf(&ret->str);
2051 ret->str.buf[szval] = '\0';
2052 // TODO: ret->value.??? = val
2057 /* Preferred / native vector widths */
2059 device_info_vecwidth(struct device_info_ret *ret,
2060 const struct info_loc *loc, const struct device_info_checks *chk,
2061 const struct opt_out *output)
2063 struct info_loc loc2 = *loc;
2064 cl_uint preferred = 0, native = 0;
2065 _GET_VAL(ret, loc, preferred);
2067 /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF,
2068 * which is at +0x08 */
2070 (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30);
2071 /* TODO update loc2.sname */
2072 _GET_VAL(ret, &loc2, native);
2076 const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ?
2077 chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ?
2078 chk->has_double : NULL));
2079 szval = strbuf_printf(&ret->str, "%8u / %-8u", preferred, native);
2081 sprintf(ret->str.buf + szval, " (%s)", *ext ? ext : na);
2084 ret->value.u32v.s[0] = preferred;
2085 ret->value.u32v.s[1] = native;
2088 /* Floating-point configurations */
2090 device_info_fpconf(struct device_info_ret *ret,
2091 const struct info_loc *loc, const struct device_info_checks *chk,
2092 const struct opt_out *output)
2094 /* When in HUMAN output, we are called unconditionally,
2095 * so we have to do some manual checks ourselves */
2096 const cl_bool get_it = (output->mode != CLINFO_HUMAN) ||
2097 (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) ||
2098 (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) ||
2099 (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk));
2101 DEV_FETCH(cl_device_fp_config, val);
2103 if (ret->err && !get_it) {
2104 ret->err = CL_SUCCESS;
2112 const char * const *fpstr = (output->mode == CLINFO_HUMAN ?
2113 fp_conf_str : fp_conf_raw_str);
2114 set_separator(vbar_str);
2115 if (output->mode == CLINFO_HUMAN) {
2116 const char *why = na;
2117 switch (loc->param.dev) {
2118 case CL_DEVICE_HALF_FP_CONFIG:
2120 why = chk->has_half;
2122 case CL_DEVICE_SINGLE_FP_CONFIG:
2125 case CL_DEVICE_DOUBLE_FP_CONFIG:
2127 why = chk->has_double;
2130 /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */
2131 fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname);
2133 /* show 'why' it's being shown */
2134 szval += strbuf_printf(&ret->str, "(%s)", why);
2137 size_t num_flags = fp_conf_count;
2138 /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered
2139 * in the single-precision case. half and double don't consider it,
2140 * so we skip it altogether */
2141 if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG)
2144 for (i = 0; i < num_flags; ++i) {
2145 cl_device_fp_config cur = (cl_device_fp_config)(1) << i;
2146 if (output->mode == CLINFO_HUMAN) {
2147 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2148 line_pfx, fpstr[i], bool_str[!!(val & cur)]);
2149 } else if (val & cur) {
2150 add_separator(&ret->str, &szval);
2151 szval += bufcpy(&ret->str, szval, fpstr[i]);
2158 /* Queue properties */
2160 device_info_qprop(struct device_info_ret *ret,
2161 const struct info_loc *loc, const struct device_info_checks *chk,
2162 const struct opt_out *output)
2164 DEV_FETCH(cl_command_queue_properties, val);
2168 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
2169 queue_prop_str : queue_prop_raw_str);
2170 set_separator(vbar_str);
2171 for (i = 0; i < queue_prop_count; ++i) {
2172 cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i;
2173 if (output->mode == CLINFO_HUMAN) {
2174 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2175 line_pfx, qpstr[i], bool_str[!!(val & cur)]);
2176 } else if (val & cur) {
2177 add_separator(&ret->str, &szval);
2178 szval += bufcpy(&ret->str, szval, qpstr[i]);
2181 if (output->mode == CLINFO_HUMAN && loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES &&
2182 dev_has_intel_local_thread(chk))
2183 sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2184 line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]);
2188 /* Execution capbilities */
2190 device_info_execap(struct device_info_ret *ret,
2191 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2192 const struct opt_out *output)
2194 DEV_FETCH(cl_device_exec_capabilities, val);
2198 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
2199 execap_str : execap_raw_str);
2200 set_separator(vbar_str);
2201 for (i = 0; i < execap_count; ++i) {
2202 cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i;
2203 if (output->mode == CLINFO_HUMAN) {
2204 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2205 line_pfx, qpstr[i], bool_str[!!(val & cur)]);
2206 } else if (val & cur) {
2207 add_separator(&ret->str, &szval);
2208 szval += bufcpy(&ret->str, szval, qpstr[i]);
2214 /* Arch bits and endianness (HUMAN) */
2216 device_info_arch(struct device_info_ret *ret,
2217 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2218 const struct opt_out *output)
2220 DEV_FETCH(cl_uint, bits);
2221 struct info_loc loc2 = *loc;
2222 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE);
2225 DEV_FETCH_LOC(cl_bool, val, &loc2);
2227 strbuf_printf(&ret->str, "%" PRIu32 ", %s", bits, endian_str[val]);
2232 /* SVM capabilities */
2234 device_info_svm_cap(struct device_info_ret *ret,
2235 const struct info_loc *loc, const struct device_info_checks *chk,
2236 const struct opt_out *output)
2238 const cl_bool is_20 = dev_is_20(chk);
2239 const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES);
2240 const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk));
2241 DEV_FETCH(cl_device_svm_capabilities, val);
2246 const char * const *scstr = (output->mode == CLINFO_HUMAN ?
2247 svm_cap_str : svm_cap_raw_str);
2248 set_separator(vbar_str);
2249 if (output->mode == CLINFO_HUMAN && checking_core) {
2250 /* show 'why' it's being shown */
2251 szval += strbuf_printf(&ret->str, "(%s%s%s)",
2252 (is_20 ? core : empty_str),
2253 (is_20 && has_amd_svm ? comma_str : empty_str),
2256 for (i = 0; i < svm_cap_count; ++i) {
2257 cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i;
2258 if (output->mode == CLINFO_HUMAN) {
2259 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2260 line_pfx, scstr[i], bool_str[!!(val & cur)]);
2261 } else if (val & cur) {
2262 add_separator(&ret->str, &szval);
2263 szval += bufcpy(&ret->str, szval, scstr[i]);
2269 /* Device terminate capability */
2271 device_info_terminate_capability(struct device_info_ret *ret,
2272 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2273 const struct opt_out *output)
2275 DEV_FETCH(cl_device_terminate_capability_khr, val);
2276 if (!ret->err && val) {
2277 /* iterate over terminate capability strings appending their textual form
2281 const char * const *capstr = (output->mode == CLINFO_HUMAN ?
2282 terminate_capability_str : terminate_capability_raw_str);
2283 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2284 for (i = 0; i < terminate_capability_count; ++i) {
2285 cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i;
2287 /* match: add separator if not first match */
2288 add_separator(&ret->str, &szval);
2289 szval += bufcpy(&ret->str, szval, capstr[i]);
2291 if (szval >= ret->str.sz)
2294 /* check for extra bits */
2295 if (szval < ret->str.sz) {
2296 cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1;
2297 cl_device_terminate_capability_khr extra = val & ~known_mask;
2299 add_separator(&ret->str, &szval);
2300 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
2307 device_info_p2p_dev_list(struct device_info_ret *ret,
2308 const struct info_loc *loc, const struct device_info_checks *chk,
2309 const struct opt_out* UNUSED(output))
2311 // Contrary to most array values in OpenCL, the AMD platform does not support querying
2312 // CL_DEVICE_P2P_DEVICES_AMD with a NULL ptr to get the number of results.
2313 // The user is assumed to have queried for the CL_DEVICE_NUM_P2P_DEVICES_AMD first,
2314 // and to have allocated the return array beforehand.
2315 cl_device_id *val = NULL;
2316 size_t numval = chk->p2p_num_devs, szval = numval*sizeof(*val);
2317 _GET_VAL_VALUES(ret, loc);
2321 for (cursor= 0; cursor < numval; ++cursor) {
2323 ret->str.buf[szval] = ' ';
2326 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%p", (void*)val[cursor]);
2328 // TODO: ret->value.??? = val;
2334 device_info_interop_list(struct device_info_ret *ret,
2335 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2336 const struct opt_out *output)
2338 cl_uint *val = NULL;
2339 size_t szval = 0, numval = 0;
2340 GET_VAL_ARRAY(ret, loc);
2343 const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops;
2344 cl_uint human_raw = output->mode - CLINFO_HUMAN;
2345 const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2346 cl_bool first = CL_TRUE;
2348 for (cursor = 0; cursor < numval; ++cursor) {
2349 cl_uint current = val[cursor];
2350 if (!current && cursor < numval - 1) {
2351 /* A null value is used as group terminator, but we only print it
2352 * if it's not the final one
2354 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", groupsep);
2358 cl_bool found = CL_FALSE;
2359 const cl_interop_name *n = cl_interop_names;
2362 ret->str.buf[szval] = ' ';
2366 while (n < interop_name_end) {
2367 if (current >= n->from && current <= n->to) {
2374 cl_uint i = current - n->from;
2375 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", n->value[i][human_raw]);
2377 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx32, val[cursor]);
2381 if (szval >= ret->str.sz) {
2382 trunc_strbuf(&ret->str);
2386 // TODO: ret->value.??? = val;
2391 void device_info_uuid(struct device_info_ret *ret,
2392 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2393 const struct opt_out *output)
2395 cl_uchar uuid[CL_UUID_SIZE_KHR];
2396 _GET_VAL(ret, loc, uuid);
2398 strbuf_printf(&ret->str,
2403 "%02x%02x%02x%02x%02x%02x",
2404 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
2408 uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
2412 void device_info_luid(struct device_info_ret *ret,
2413 const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2414 const struct opt_out *output)
2416 cl_uchar uuid[CL_LUID_SIZE_KHR];
2417 _GET_VAL(ret, loc, uuid);
2419 /* TODO not sure this is the correct representation for LUIDs? */
2420 strbuf_printf(&ret->str, "%02x%02x-%02x%02x%02x%02x%02x%02x",
2422 uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7]);
2428 * Device info traits
2431 /* A CL_FALSE param means "just print pname" */
2433 struct device_info_traits {
2434 enum output_modes output_mode;
2435 cl_device_info param; // CL_DEVICE_*
2436 const char *sname; // "CL_DEVICE_*"
2437 const char *pname; // "Device *"
2438 const char *sfx; // suffix for the output in non-raw mode
2439 /* pointer to function that retrieves the parameter */
2440 void (*show_func)(struct device_info_ret *,
2441 const struct info_loc *, const struct device_info_checks *,
2442 const struct opt_out *);
2443 /* pointer to function that checks if the parameter should be retrieved */
2444 cl_bool (*check_func)(const struct device_info_checks *);
2447 #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ
2448 #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ
2450 struct device_info_traits dinfo_traits[] = {
2451 { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL },
2452 { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL },
2453 { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL },
2454 { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL },
2456 /* This has to be made before calling NUMERIC_VERSION , since to know if it's supported
2457 * we need to know about the extensions */
2458 { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL },
2459 { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS_WITH_VERSION, "Device Extensions with Version", ext_version), dev_has_ext_ver },
2461 { CLINFO_BOTH, DINFO(CL_DEVICE_UUID_KHR, "Device UUID", uuid), dev_has_device_uuid },
2462 { CLINFO_BOTH, DINFO(CL_DRIVER_UUID_KHR, "Driver UUID", uuid), dev_has_device_uuid },
2463 { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_VALID_KHR, "Valid Device LUID", bool), dev_has_device_uuid },
2464 { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_KHR, "Device LUID", luid), dev_has_device_uuid },
2465 { CLINFO_BOTH, DINFO(CL_DEVICE_NODE_MASK_KHR, "Device Node Mask", hex), dev_has_device_uuid },
2467 { CLINFO_BOTH, DINFO(CL_DEVICE_NUMERIC_VERSION, "Device Numeric Version", version), dev_has_ext_ver },
2468 { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL },
2469 { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), dev_is_11 },
2470 { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_ALL_VERSIONS, "Device OpenCL C all versions", ext_version), dev_has_ext_ver },
2471 { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_FEATURES, "Device OpenCL C features", ext_version), dev_is_30 },
2473 { CLINFO_BOTH, DINFO(CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT, "Device C++ for OpenCL Numeric Version", version), dev_has_cxx_for_opencl },
2475 { CLINFO_BOTH, DINFO(CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, "Latest comfornace test passed", str), dev_is_30 },
2476 { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL },
2478 { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd },
2479 { CLINFO_BOTH, DINFO(CL_DEVICE_PCIE_ID_AMD, "Device PCI-e ID (AMD)", hex), dev_has_amd },
2480 { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd },
2482 /* Device Topology (NV) is multipart, so different for HUMAN and RAW */
2483 { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv },
2484 { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv },
2485 { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv },
2487 { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL },
2488 { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL },
2489 { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL },
2490 { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 },
2492 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL },
2493 { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs", core_ids), dev_has_arm_core_id_v2 },
2494 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs", long), dev_has_arm_core_id_v2 },
2495 { CLINFO_HUMAN, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", job_slots), dev_has_arm_job_slots },
2496 { CLINFO_RAW, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", int), dev_has_arm_job_slots },
2497 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd },
2498 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd },
2499 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd },
2500 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL },
2502 /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */
2503 { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv },
2504 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv },
2505 { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv },
2507 /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */
2508 /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query
2509 * extension specification calls it “core engine GFXIP”, which honestly is not better than
2510 * our name choice. */
2511 { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd },
2512 { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd },
2513 { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd },
2515 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp },
2517 /* Device partition support: summary is only presented in HUMAN case */
2518 { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition },
2519 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 },
2520 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 },
2521 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 },
2522 { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission },
2523 { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission },
2525 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL },
2526 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL },
2527 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL },
2529 /* cl_amd_device_attribute_query v4 */
2530 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 },
2531 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 },
2533 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (device)", sz), dev_is_30 },
2534 { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (kernel)", wg), dev_has_compiler_11 },
2535 { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv },
2536 { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd },
2537 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 },
2538 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier },
2539 { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size },
2541 /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs
2542 * PREFERRED and NATIVE in a single line */
2543 #define DINFO_VECWIDTH(Type, type) \
2544 { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \
2545 { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \
2546 { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), dev_is_11 }
2548 { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL },
2549 DINFO_VECWIDTH(CHAR, char),
2550 DINFO_VECWIDTH(SHORT, short),
2551 DINFO_VECWIDTH(INT, int),
2552 DINFO_VECWIDTH(LONG, long),
2553 DINFO_VECWIDTH(HALF, half), /* this should be excluded for 1.0 */
2554 DINFO_VECWIDTH(FLOAT, float),
2555 DINFO_VECWIDTH(DOUBLE, double),
2557 /* Floating point configurations */
2558 #define DINFO_FPCONF(Type, type, cond) \
2559 { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \
2560 { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond }
2562 DINFO_FPCONF(HALF, Half, dev_has_half),
2563 DINFO_FPCONF(SINGLE, Single, NULL),
2564 DINFO_FPCONF(DOUBLE, Double, dev_has_double),
2566 /* Address bits and endianness are written together for HUMAN, separate for RAW */
2567 { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL },
2568 { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL },
2569 { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL },
2572 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL },
2573 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd },
2574 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd },
2575 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd },
2576 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd },
2577 { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL },
2578 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL },
2579 { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), dev_is_11 },
2580 { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv },
2582 { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm },
2583 { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm },
2586 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL },
2587 { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL },
2588 { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL },
2590 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2591 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2593 /* Atomics alignment, with HUMAN-only header */
2594 { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 },
2595 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 },
2596 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 },
2597 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 },
2599 /* 3.0+ Atomic memory and fence capabilities */
2600 { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, "Atomic memory capabilities", atomic_caps), dev_is_30 },
2601 { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, "Atomic fence capabilities", atomic_caps), dev_is_30 },
2603 /* Global variables. TODO some 1.2 devices respond to this too */
2604 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 },
2605 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 },
2607 /* Global memory cache */
2608 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL },
2609 { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache },
2610 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache },
2613 { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL },
2614 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images },
2615 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 },
2616 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 },
2617 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer },
2618 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer },
2620 /* Image dimensions are split for RAW, combined for HUMAN */
2621 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size", pixels_str, img_sz_2d), dev_has_images },
2622 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height", sz), dev_has_images },
2623 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width", sz), dev_has_images },
2624 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size", pixels_str, img_sz_2d), dev_has_intel_planar_yuv },
2625 { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height", sz), dev_has_intel_planar_yuv },
2626 { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width", sz), dev_has_intel_planar_yuv },
2627 { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size", pixels_str, img_sz_3d), dev_has_images },
2628 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height", sz), dev_has_images },
2629 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width", sz), dev_has_images },
2630 { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth", sz), dev_has_images },
2632 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images },
2633 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images },
2634 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 },
2637 { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_SUPPORT, "Pipe support", bool), dev_is_30 },
2638 /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */
2639 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 },
2640 { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 },
2641 { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 },
2644 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL },
2645 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem },
2646 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory size per CU (AMD)", mem), dev_is_gpu_amd },
2647 { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd },
2648 { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv },
2650 /* Constant memory */
2651 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL },
2652 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL },
2653 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 },
2655 /* Generic address space support */
2656 { CLINFO_BOTH, DINFO(CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, "Generic address space support", bool), dev_is_30},
2658 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL },
2659 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters },
2661 /* Queue properties */
2662 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 },
2663 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 },
2664 { CLINFO_BOTH, DINFO(CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, "Device enqueue capabilities", device_enqueue_caps), dev_is_30 },
2665 /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */
2666 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 },
2667 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 },
2668 { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 },
2669 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 },
2670 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 },
2672 /* Terminate context */
2673 { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context },
2674 { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context },
2677 { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 },
2678 { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing },
2679 { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing },
2681 /* P2P buffer copy */
2682 { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p },
2683 { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p_devs },
2685 /* Profiling resolution */
2686 { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL },
2687 { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd },
2688 { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd },
2690 /* Kernel execution capabilities */
2691 { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL },
2692 { CLINFO_BOTH, DINFO(CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, INDENT "Non-uniform work-groups", bool), dev_is_30 },
2693 { CLINFO_BOTH, DINFO(CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, INDENT "Work-group collective functions", bool), dev_is_30 },
2694 { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 },
2695 { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd },
2696 { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv },
2697 { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv },
2698 { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv },
2699 { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 },
2700 /* TODO FIXME undocumented, experimental */
2701 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 },
2702 { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 },
2704 /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */
2705 { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il },
2706 { CLINFO_BOTH, DINFO(CL_DEVICE_ILS_WITH_VERSION, INDENT "ILs with version", ext_version), dev_has_ext_ver },
2707 { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir },
2708 { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 },
2709 { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 },
2710 { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, "Built-in kernels with version", ext_version), dev_has_ext_ver },
2711 { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME },
2712 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME },
2713 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME },
2714 { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME },
2717 /* Process all the device info in the traits, except if param_whitelist is not NULL,
2718 * in which case only those in the whitelist will be processed.
2719 * If present, the whitelist should be sorted in the order of appearance of the parameters
2720 * in the traits table, and terminated by the value CL_FALSE
2724 printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p,
2725 const cl_device_info *param_whitelist, /* list of device info to process, or NULL */
2726 const struct opt_out *output)
2728 char *extensions = NULL;
2730 char *versioned_extensions = NULL;
2732 /* pointers to the traits for CL_DEVICE_EXTENSIONS and CL_DEVICE_EXTENSIONS_WITH_VERSION */
2733 const struct device_info_traits *extensions_traits = NULL;
2734 const struct device_info_traits *versioned_extensions_traits = NULL;
2736 struct device_info_checks chk;
2737 struct device_info_ret ret;
2738 struct info_loc loc;
2740 memset(&chk, 0, sizeof(chk));
2741 chk.pinfo_checks = plist->platform_checks + p;
2742 chk.dev_version = 10;
2744 INIT_RET(ret, "device");
2746 reset_loc(&loc, __func__);
2747 loc.plat = plist->platform[p];
2750 for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) {
2752 const struct device_info_traits *traits = dinfo_traits + loc.line;
2754 /* checked is true if there was no condition to check for, or if the
2755 * condition was satisfied
2757 int checked = !(traits->check_func && !traits->check_func(&chk));
2759 loc.sname = traits->sname;
2760 loc.pname = (output->mode == CLINFO_HUMAN ?
2761 traits->pname : traits->sname);
2762 loc.param.dev = traits->param;
2764 /* Whitelist check: finish if done traversing the list,
2765 * skip current param if it's not the right one
2767 if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) {
2768 if (*param_whitelist == CL_FALSE)
2770 if (traits->param != *param_whitelist)
2775 /* skip if it's not for this output mode */
2776 if (!(output->mode & traits->output_mode))
2779 if (output->cond == COND_PROP_CHECK && !checked)
2782 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
2784 reset_strbuf(&ret.str);
2785 reset_strbuf(&ret.err_str);
2787 /* Handle headers */
2788 if (traits->param == CL_FALSE) {
2789 ret.err = CL_SUCCESS;
2790 show_strbuf(&ret.str, loc.pname, 0, ret.err);
2794 traits->show_func(&ret, &loc, &chk, output);
2796 /* Do not print this property if the user requested one and this does not match */
2797 const cl_bool requested = !(output->prop && strstr(loc.sname, output->prop) == NULL);
2798 if (traits->param == CL_DEVICE_EXTENSIONS) {
2799 /* make a backup of the extensions string, regardless of
2800 * errors and requested, because we need the information
2801 * to fetch further information */
2802 const char *msg = RET_BUF(ret)->buf;
2803 ext_len = strlen(msg);
2804 extensions_traits = traits;
2805 /* pad with spaces: this will make it easier to check for extension presence
2806 * without erroneously matching substrings by simply padding the extension name
2809 ALLOC(extensions, ext_len+3, "extensions");
2810 memcpy(extensions + 1, msg, ext_len);
2811 extensions[0] = ' ';
2812 extensions[ext_len+1] = ' ';
2813 extensions[ext_len+2] = '\0';
2814 } else if (traits->param == CL_DEVICE_EXTENSIONS_WITH_VERSION) {
2817 /* This will be displayed at the end, after we display the output of CL_DEVICE_EXTENSIONS */
2818 const char *msg = RET_BUF(ret)->buf;
2819 const size_t len = RET_BUF(ret)->sz;
2820 versioned_extensions_traits = traits;
2821 ALLOC(versioned_extensions, len, "versioned extensions");
2822 memcpy(versioned_extensions, msg, len);
2823 } else if (requested) {
2825 /* if there was an error retrieving the property,
2826 * skip if it wasn't expected to work and we
2827 * weren't asked to show everything regardless of
2829 if (!checked && output->cond != COND_PROP_SHOW)
2833 /* on success, but empty result, show (n/a) */
2834 if (ret.str.buf[0] == '\0')
2835 bufcpy(&ret.str, 0, not_specified(output));
2838 printf("%s%s\n", line_pfx, RET_BUF(ret)->buf);
2840 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
2846 switch (traits->param) {
2847 case CL_DEVICE_VERSION:
2848 /* compute numeric value for OpenCL version */
2849 chk.dev_version = getOpenCLVersion(ret.str.buf + 7);
2851 case CL_DEVICE_EXTENSIONS:
2852 identify_device_extensions(extensions, &chk);
2858 case CL_DEVICE_TYPE:
2859 chk.devtype = ret.value.devtype;
2861 case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
2862 chk.cachetype = ret.value.cachetype;
2864 case CL_DEVICE_LOCAL_MEM_TYPE:
2865 chk.lmemtype = ret.value.lmemtype;
2867 case CL_DEVICE_IMAGE_SUPPORT:
2868 chk.image_support = ret.value.b;
2870 case CL_DEVICE_COMPILER_AVAILABLE:
2871 chk.compiler_available = ret.value.b;
2873 case CL_DEVICE_NUM_P2P_DEVICES_AMD:
2874 chk.p2p_num_devs = ret.value.u32;
2882 // and finally the extensions, if we retrieved them
2885 extensions[ext_len + 1] = '\0';
2886 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2887 extensions_traits->pname :
2888 extensions_traits->sname), extensions + 1);
2890 if (versioned_extensions) {
2891 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2892 versioned_extensions_traits->pname :
2893 versioned_extensions_traits->sname), versioned_extensions);
2896 free(versioned_extensions);
2901 /* list of allowed properties for AMD offline devices */
2902 /* everything else seems to be set to 0, and all the other string properties
2903 * actually segfault the driver */
2905 static const cl_device_info amd_offline_info_whitelist[] = {
2907 /* These are present, but all the same, so just skip them:
2909 CL_DEVICE_VENDOR_ID,
2912 CL_DEVICE_OPENCL_C_VERSION,
2914 CL_DEVICE_EXTENSIONS,
2916 CL_DEVICE_GFXIP_MAJOR_AMD,
2917 CL_DEVICE_GFXIP_MINOR_AMD,
2918 CL_DEVICE_MAX_WORK_GROUP_SIZE,
2922 static const cl_device_info list_info_whitelist[] = {
2927 /* return a list of offline devices from the AMD extension */
2929 fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p,
2930 /* the number of devices will be returned in ret->value.u32,
2931 * the associated context in ret->base.ctx;
2933 struct device_info_ret *ret)
2935 cl_platform_id pid = plist->platform[p];
2936 cl_device_id *device = NULL;
2937 cl_uint num_devs = 0;
2938 cl_context ctx = NULL;
2940 cl_context_properties ctxpft[] = {
2941 CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
2942 CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE,
2946 ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL,
2947 NULL, NULL, &ret->err);
2948 REPORT_ERROR(&ret->err_str, ret->err, "create context");
2951 ret->err = REPORT_ERROR(&ret->err_str,
2952 clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES,
2953 sizeof(num_devs), &num_devs, NULL),
2958 ALLOC(device, num_devs, "offline devices");
2960 ret->err = REPORT_ERROR(&ret->err_str,
2961 clGetContextInfo(ctx, CL_CONTEXT_DEVICES,
2962 num_devs*sizeof(*device), device, NULL),
2967 if (ctx) clReleaseContext(ctx);
2971 ret->value.u32 = num_devs;
2972 ret->base.ctx = ctx;
2977 void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str,
2978 const struct opt_out *output)
2980 const struct platform_data *pdata = plist->pdata + p;
2981 const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : "");
2982 const char *title = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname :
2983 pinfo_traits[0].sname);
2984 const int prefix_width = -line_pfx_len*(!output->brief);
2985 if (output->brief) {
2986 strbuf_printf(str, "%s%" PRIu32 ": ", brief_prefix, p);
2987 } else if (output->mode == CLINFO_RAW) {
2988 strbuf_printf(str, "[%s/*]", pdata->sname);
2990 sprintf(line_pfx, "%*s", prefix_width, str->buf);
2993 printf("%s%s\n", line_pfx, pdata->pname);
2995 printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname);
2998 void printPlatformDevices(const struct platform_list *plist, cl_uint p,
2999 const cl_device_id *device, cl_uint ndevs,
3000 struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline)
3002 const struct platform_data *pdata = plist->pdata + p;
3003 const cl_device_info *param_whitelist = output->brief ? list_info_whitelist :
3004 these_are_offline ? amd_offline_info_whitelist : NULL;
3007 if (output->detailed)
3008 printf("%s" I0_STR "%" PRIu32 "\n",
3010 num_devs_header(output, these_are_offline),
3013 for (d = 0; d < ndevs; ++d) {
3014 if (output->selected && output->device != d) continue;
3015 const cl_device_id dev = device[d];
3016 if (output->brief) {
3017 const cl_bool last_device = (d == ndevs - 1 &&
3018 output->mode != CLINFO_RAW &&
3019 (!output->offline ||
3020 !pdata->has_amd_offline ||
3021 these_are_offline));
3022 if (output->mode == CLINFO_RAW)
3023 sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ",
3025 these_are_offline ? '*' : '.',
3028 sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ",
3029 these_are_offline ? "Offline " : "",
3033 } else if (line_pfx_len > 0) {
3034 cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d;
3035 strbuf_printf(str, "[%s/%" PRId32 "]", pdata->sname, sd);
3036 sprintf(line_pfx, "%*s", -line_pfx_len, str->buf);
3038 printDeviceInfo(dev, plist, p, param_whitelist, output);
3039 if (output->detailed && d < pdata[p].ndevs - 1)
3047 void showDevices(const struct platform_list *plist, const struct opt_out *output)
3049 const cl_uint num_platforms = plist->num_platforms;
3050 const cl_uint maxdevs = plist->max_devs;
3051 const struct platform_data *pdata = plist->pdata;
3055 init_strbuf(&str, __func__);
3057 if (output->mode == CLINFO_RAW) {
3059 strbuf_printf(&str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs);
3061 strbuf_printf(&str, "[%*s/%" PRIu32 "] ",
3062 plist->max_sname_len, "", maxdevs);
3065 strbuf_printf(&str, " +-- %sDevice #%" PRIu32 ": ",
3066 (output->offline ? "Offline " : ""), maxdevs);
3069 /* TODO we have no prefix in HUMAN detailed output mode,
3070 * consider adding one
3075 line_pfx_len = (int)(strlen(str.buf) + 1);
3076 REALLOC(line_pfx, line_pfx_len, "line prefix");
3080 for (p = 0; p < num_platforms; ++p) {
3081 /* skip non-selected platforms altogether */
3082 if (output->selected && output->platform != p) continue;
3084 /* skip platform header if only printing specfic properties */
3086 printPlatformName(plist, p, &str, output);
3088 printPlatformDevices(plist, p,
3089 get_platform_devs(plist, p), pdata[p].ndevs,
3090 &str, output, CL_FALSE);
3092 if (output->offline && pdata[p].has_amd_offline) {
3093 struct device_info_ret ret;
3094 cl_device_id *devs = NULL;
3096 INIT_RET(ret, "offline device");
3097 if (output->detailed)
3100 devs = fetchOfflineDevicesAMD(plist, p, &ret);
3102 puts(ret.err_str.buf);
3104 printPlatformDevices(plist, p, devs, ret.value.u32,
3105 &str, output, CL_TRUE);
3106 clReleaseContext(ret.base.ctx);
3111 if (output->detailed)
3117 /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */
3118 void checkNullGetPlatformName(const struct opt_out *output)
3120 struct device_info_ret ret;
3121 struct info_loc loc;
3123 INIT_RET(ret, "null ctx");
3124 reset_loc(&loc, __func__);
3125 RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME);
3127 ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL);
3128 if (ret.err == CL_INVALID_PLATFORM) {
3129 bufcpy(&ret.err_str, 0, no_plat(output));
3131 loc.line = __LINE__ + 1;
3132 REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s");
3134 printf(I1_STR "%s\n",
3135 "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf);
3139 /* check the behavior of clGetDeviceIDs() when given a NULL platform ID;
3140 * return the index of the default platform in our array of platform IDs,
3141 * or num_platforms (which is an invalid platform index) in case of errors
3142 * or no platform or device found.
3144 cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output)
3146 const cl_uint num_platforms = plist->num_platforms;
3147 const struct platform_data *pdata = plist->pdata;
3148 const cl_platform_id *platform = plist->platform;
3150 struct device_info_ret ret;
3151 struct info_loc loc;
3153 cl_uint i = 0; /* generic iterator */
3154 cl_device_id dev = NULL; /* sample device */
3155 cl_platform_id plat = NULL; /* detected platform */
3157 cl_uint found = 0; /* number of platforms found */
3158 cl_uint pidx = num_platforms; /* index of the platform found */
3159 cl_uint numdevs = 0;
3161 INIT_RET(ret, "null get devices");
3163 reset_loc(&loc, __func__);
3164 loc.sname = "device IDs";
3166 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs);
3167 /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart
3168 * implementation might give you a different default platform for GPUs
3170 * Of course the “no devices” case would then need to be handled differently.
3171 * The logic might be maintained similarly, provided we also gather
3172 * the number of devices of each type for each platform, although it's
3173 * obviously more likely to have multiple platforms with no devices
3178 case CL_INVALID_PLATFORM:
3179 bufcpy(&ret.err_str, 0, no_plat(output));
3181 case CL_DEVICE_NOT_FOUND:
3182 /* No devices were found, see if there are platforms with
3183 * no devices, and if there's only one, assume this is the
3184 * one being used as default by the ICD loader */
3185 for (i = 0; i < num_platforms; ++i) {
3186 if (pdata[i].ndevs == 0) {
3199 bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
3200 "<error: 0 devices, no matching platform!>" :
3201 "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM"));
3204 strbuf_printf(&ret.err_str, "%s%s%s%s",
3205 no_dev_found(output),
3206 (output->mode == CLINFO_HUMAN ? " [" : " | "),
3207 (output->mode == CLINFO_HUMAN ? pdata[pidx].pname : pdata[pidx].sname),
3208 (output->mode == CLINFO_HUMAN ? "?]" : "?"));
3210 default: /* found > 1 */
3211 bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
3212 "<error: 0 devices, multiple matching platforms!>" :
3213 "CL_DEVICE_NOT_FOUND | ????"));
3218 loc.line = __LINE__+1;
3219 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break;
3221 /* Determine platform by looking at the CL_DEVICE_PLATFORM of
3222 * one of the devices */
3223 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL);
3224 loc.line = __LINE__+1;
3225 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3227 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
3228 ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM,
3229 sizeof(plat), &plat, NULL);
3230 loc.line = __LINE__+1;
3231 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3233 for (i = 0; i < num_platforms; ++i) {
3234 if (platform[i] == plat) {
3236 strbuf_printf(&ret.str, "%s [%s]",
3237 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3242 if (i == num_platforms) {
3243 ret.err = CL_INVALID_PLATFORM;
3244 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
3247 printf(I1_STR "%s\n",
3248 "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf);
3254 void checkNullCtx(struct device_info_ret *ret,
3255 const struct platform_list *plist, cl_uint pidx, const char *which,
3256 const struct opt_out *output)
3258 const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx];
3259 struct info_loc loc;
3260 cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err);
3262 reset_loc(&loc, __func__);
3264 loc.line = __LINE__+2;
3266 if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform"))
3267 strbuf_printf(&ret->str, "%s [%s]",
3268 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3269 plist->pdata[pidx].sname);
3271 clReleaseContext(ctx);
3276 /* check behavior of clCreateContextFromType() with NULL cl_context_properties */
3277 void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output)
3279 const cl_uint num_platforms = plist->num_platforms;
3280 const struct platform_data *pdata = plist->pdata;
3281 const cl_platform_id *platform = plist->platform;
3283 size_t t; /* type iterator */
3284 size_t i; /* generic iterator */
3286 cl_context ctx = NULL;
3290 size_t cursz = ndevs*sizeof(cl_device_id);
3291 cl_platform_id plat = NULL;
3292 cl_device_id *devs = NULL;
3294 struct device_info_ret ret;
3295 struct info_loc loc;
3297 const char *platname_prop = (output->mode == CLINFO_HUMAN ?
3298 pinfo_traits[0].pname :
3299 pinfo_traits[0].sname);
3301 const char *devname_prop = (output->mode == CLINFO_HUMAN ?
3302 dinfo_traits[0].pname :
3303 dinfo_traits[0].sname);
3305 reset_loc(&loc, __func__);
3306 INIT_RET(ret, "null ctx from type");
3308 ALLOC(devs, ndevs, "context devices");
3310 for (t = 1; t < devtype_count; ++t) { /* we skip 0 */
3311 loc.sname = device_type_raw_str[t];
3313 strbuf_printf(&ret.str, "clCreateContextFromType(NULL, %s)", loc.sname);
3314 sprintf(def, I1_STR, ret.str.buf);
3316 loc.line = __LINE__+1;
3317 ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err);
3320 case CL_INVALID_PLATFORM:
3321 bufcpy(&ret.err_str, 0, no_plat(output)); break;
3322 case CL_DEVICE_NOT_FOUND:
3323 bufcpy(&ret.err_str, 0, no_dev_found(output)); break;
3324 case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */
3325 bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
3326 case CL_INVALID_VALUE: /* This is what apple returns for the case above */
3327 bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
3328 case CL_DEVICE_NOT_AVAILABLE:
3329 bufcpy(&ret.err_str, 0, no_dev_avail(output)); break;
3331 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break;
3333 /* get the devices */
3334 loc.sname = "CL_CONTEXT_DEVICES";
3335 loc.line = __LINE__+2;
3337 ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval);
3338 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break;
3339 if (szval > cursz) {
3340 REALLOC(devs, szval, "context devices");
3344 loc.line = __LINE__+1;
3345 ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL);
3346 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3347 ndevs = szval/sizeof(cl_device_id);
3349 ret.err = CL_DEVICE_NOT_FOUND;
3350 bufcpy(&ret.err_str, 0, "<error: context created with no devices>");
3353 /* get the platform from the first device */
3354 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
3355 loc.line = __LINE__+1;
3356 ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
3357 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3361 for (i = 0; i < num_platforms; ++i) {
3362 if (platform[i] == plat)
3365 if (i == num_platforms) {
3366 ret.err = CL_INVALID_PLATFORM;
3367 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
3370 szval += strbuf_printf(&ret.str, "%s (%" PRIuS ")",
3371 (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3373 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR "%s",
3374 platname_prop, pdata[i].pname);
3376 for (i = 0; i < ndevs; ++i) {
3378 /* for each device, show the device name */
3379 /* TODO some other unique ID too, e.g. PCI address, if available? */
3381 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR, devname_prop);
3382 if (szval >= ret.str.sz) {
3383 trunc_strbuf(&ret.str);
3387 RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME);
3389 loc.line = __LINE__+1;
3390 ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - szval, ret.str.buf + szval, &szname);
3391 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3392 szval += szname - 1;
3395 break; /* had an error earlier, bail */
3400 clReleaseContext(ctx);
3403 printf("%s%s\n", def, RET_BUF(ret)->buf);
3409 /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices)
3410 * and in clCreateContext() */
3411 void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output)
3413 const cl_uint num_platforms = plist->num_platforms;
3414 const struct platform_data *pdata = plist->pdata;
3417 struct device_info_ret ret;
3419 INIT_RET(ret, "null behavior");
3421 printf("NULL platform behavior\n");
3423 checkNullGetPlatformName(output);
3425 p = checkNullGetDevices(plist, output);
3427 /* If there's a default platform, and it has devices, try
3428 * creating a context with its first device and see if it works */
3430 if (p == num_platforms) {
3431 ret.err = CL_INVALID_PLATFORM;
3432 bufcpy(&ret.err_str, 0, no_plat(output));
3433 } else if (pdata[p].ndevs == 0) {
3434 ret.err = CL_DEVICE_NOT_FOUND;
3435 bufcpy(&ret.err_str, 0, no_dev_found(output));
3437 if (p < num_platforms) {
3438 checkNullCtx(&ret, plist, p, "default", output);
3440 /* this shouldn't happen, but still ... */
3441 ret.err = CL_OUT_OF_HOST_MEMORY;
3442 bufcpy(&ret.err_str, 0, "<error: overflow in default platform scan>");
3445 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf);
3447 /* Look for a device from a non-default platform, if there are any */
3448 if (p == num_platforms || num_platforms > 1) {
3450 while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) {
3453 if (p2 < num_platforms) {
3454 checkNullCtx(&ret, plist, p2, "non-default", output);
3456 ret.err = CL_DEVICE_NOT_FOUND;
3457 bufcpy(&ret.str, 0, "<error: no devices in non-default plaforms>");
3459 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf);
3462 checkNullCtxFromType(plist, output);
3468 /* Get properties of the ocl-icd loader, if available */
3469 /* All properties are currently char[] */
3471 /* Function pointer to the ICD loader info function */
3473 typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*);
3474 icdl_info_fn_ptr clGetICDLoaderInfoOCLICD;
3476 /* We want to auto-detect the OpenCL version supported by the ICD loader.
3477 * To do this, we will progressively find symbols introduced in new APIs,
3478 * until a NULL symbol is found.
3481 struct icd_loader_test {
3484 } icd_loader_tests[] = {
3485 { 11, "clCreateSubBuffer" },
3486 { 12, "clCreateImage" },
3487 { 20, "clSVMAlloc" },
3488 { 21, "clGetHostTimer" },
3489 { 22, "clSetProgramSpecializationConstant" },
3490 { 30, "clSetContextDestructorCallback" },
3495 icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc)
3497 GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl);
3501 struct icdl_info_traits {
3502 cl_icdl_info param; // CL_ICDL_*
3503 const char *sname; // "CL_ICDL_*"
3504 const char *pname; // "ICD loader *"
3507 static const char * const oclicdl_pfx = "OCLICD";
3509 #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name }
3510 struct icdl_info_traits linfo_traits[] = {
3511 LINFO(CL_ICDL_NAME, "Name"),
3512 LINFO(CL_ICDL_VENDOR, "Vendor"),
3513 LINFO(CL_ICDL_VERSION, "Version"),
3514 LINFO(CL_ICDL_OCL_VERSION, "Profile")
3517 /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress,
3518 * which returns a void pointer.
3519 * ISO C forbids assignments between function pointers and void pointers,
3520 * but POSIX allows it. To compile without warnings even in -pedantic mode,
3521 * we take advantage of the fact that we _can_ do the conversion via
3522 * pointers-to-pointers. This is supported on most compilers, except
3523 * for some rather old GCC versions whose strict aliasing rules are
3524 * too strict. Disable strict aliasing warnings for these compilers.
3526 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3527 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
3530 struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output)
3532 const cl_uint max_plat_version = plist->max_plat_version;
3534 struct icdl_data icdl;
3536 /* Counter that'll be used to walk the icd_loader_tests */
3539 /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used
3540 * to query the ICD loader properties.
3541 * It should be noted that in this specific case we cannot replace the
3542 * call to clGetExtensionFunctionAddress with a call to the superseding function
3543 * clGetExtensionFunctionAddressForPlatform because the extension is in the
3544 * loader itself, not in a specific platform.
3546 void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD");
3547 clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack);
3549 /* Initialize icdl_data ret versions */
3550 icdl.detected_version = 10;
3551 icdl.reported_version = 0;
3553 /* clinfo may lag behind the OpenCL standard or loader version,
3554 * and we don't want to give a warning if we can't tell if the loader
3555 * correctly supports a version unknown to us
3557 cl_uint clinfo_highest_known_version = 0;
3559 /* Step #1: try to auto-detect the supported ICD loader version */
3561 struct icd_loader_test check = icd_loader_tests[i];
3562 if (check.symbol == NULL)
3564 if (dlsym(DL_MODULE, check.symbol) == NULL)
3566 clinfo_highest_known_version = icdl.detected_version = check.version;
3570 /* Step #2: query properties from extension, if available */
3571 if (clGetICDLoaderInfoOCLICD != NULL) {
3572 struct info_loc loc;
3573 struct icdl_info_ret ret;
3574 reset_loc(&loc, __func__);
3575 INIT_RET(ret, "ICD loader");
3577 /* TODO think of a sensible header in CLINFO_RAW */
3578 if (output->mode != CLINFO_RAW)
3579 puts("\nICD loader properties");
3581 if (output->mode == CLINFO_RAW) {
3582 line_pfx_len = (int)(strlen(oclicdl_pfx) + 5);
3583 REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD");
3584 strbuf_printf(&ret.str, "[%s/*]", oclicdl_pfx);
3585 sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf);
3588 for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) {
3589 const struct icdl_info_traits *traits = linfo_traits + loc.line;
3590 loc.sname = traits->sname;
3591 loc.pname = (output->mode == CLINFO_HUMAN ?
3592 traits->pname : traits->sname);
3593 loc.param.icdl = traits->param;
3595 reset_strbuf(&ret.str);
3596 reset_strbuf(&ret.err_str);
3597 icdl_info_str(&ret, &loc);
3599 /* Do not print this property if the user requested one and this does not match */
3600 const cl_bool requested = !(output->prop && strstr(loc.sname, output->prop) == NULL);
3602 show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err);
3604 if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) {
3605 icdl.reported_version = getOpenCLVersion(ret.str.buf + 7);
3611 /* Step #3: show it */
3612 if (output->mode == CLINFO_HUMAN) {
3613 if (icdl.reported_version &&
3614 icdl.reported_version <= clinfo_highest_known_version &&
3615 icdl.reported_version != icdl.detected_version) {
3616 printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3617 "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n",
3618 SPLIT_CL_VERSION(icdl.reported_version),
3619 SPLIT_CL_VERSION(icdl.detected_version),
3620 icdl.detected_version < icdl.reported_version ?
3624 // for the loader vs platform max version check we use the version we detected
3625 // if the reported version is known to us, and the reported version if it's higher
3626 // than the standard versions we know about
3627 cl_uint max_version_check = icdl.reported_version > clinfo_highest_known_version ?
3628 icdl.reported_version : icdl.detected_version;
3629 if (max_version_check < max_plat_version) {
3630 printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3631 "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n"
3632 "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n"
3633 "\t\tor behave unexpectedly\n",
3634 SPLIT_CL_VERSION(icdl.detected_version),
3635 SPLIT_CL_VERSION(max_plat_version),
3636 SPLIT_CL_VERSION(max_plat_version));
3642 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3643 #pragma GCC diagnostic warning "-Wstrict-aliasing"
3648 puts("clinfo version 3.0.20.11.20");
3651 void parse_device_spec(const char *str, struct opt_out *output)
3654 fprintf(stderr, "please specify a device in the form P:D where P is the platform number and D the device number\n");
3658 int n = sscanf(str, "%d:%d", &p, &d);
3659 if (n != 2 || p < 0 || d < 0) {
3660 fprintf(stderr, "invalid device specification '%s'\n", str);
3663 output->platform = p;
3667 void parse_prop(const char *input, struct opt_out *output)
3669 /* We normalize the property name by upcasing it and replacing the minus sign (-)
3670 * with the underscore (_). If any other character is found, we consider it an error
3673 size_t len = strlen(input);
3675 ALLOC(normalized, len+1, "normalized property name");
3676 for (size_t i = 0; i < len; ++i)
3679 if ( (c == '_') || ( c >= 'A' && c <= 'Z'))
3681 else if (c >= 'a' && c <= 'z')
3682 normalized[i] = 'A' + (c - 'a');
3684 normalized[i] = '_';
3686 fprintf(stderr, "invalid property name substring '%s'\n", input);
3690 output->prop = normalized;
3696 puts("Display properties of all available OpenCL platforms and devices");
3697 puts("Usage: clinfo [options ...]\n");
3699 puts("\t--all-props, -a\t\ttry all properties, only show valid ones");
3700 puts("\t--always-all-props, -At\tshow all properties, even if invalid");
3701 puts("\t--human\t\thuman-friendly output (default)");
3702 puts("\t--raw\t\traw output");
3703 puts("\t--offline\talso show offline devices");
3704 puts("\t--list, -l\tonly list the platforms and devices by name");
3705 puts("\t--prop prop-name\tonly list properties matching the given name");
3706 puts("\t--device p:d,");
3707 puts("\t-d p:d\t\tonly show information about device number d from platform number p");
3708 puts("\t-h, -?\t\tshow usage");
3709 puts("\t--version, -v\tshow version\n");
3710 puts("Defaults to raw mode if invoked with");
3711 puts("a name that contains the string \"raw\"");
3714 int main(int argc, char *argv[])
3720 struct opt_out output;
3722 struct platform_list plist;
3725 output.platform = CL_UINT_MAX;
3726 output.device = CL_UINT_MAX;
3728 output.mode = CLINFO_HUMAN;
3729 output.cond = COND_PROP_CHECK;
3730 output.brief = CL_FALSE;
3731 output.offline = CL_FALSE;
3732 output.check_size = CL_FALSE;
3734 /* if there's a 'raw' in the program name, switch to raw output mode */
3735 if (strstr(argv[0], "raw"))
3736 output.mode = CLINFO_RAW;
3738 /* process command-line arguments */
3739 while (++a < argc) {
3740 if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props"))
3741 output.cond = COND_PROP_TRY;
3742 else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props"))
3743 output.cond = COND_PROP_SHOW;
3744 else if (!strcmp(argv[a], "--raw"))
3745 output.mode = CLINFO_RAW;
3746 else if (!strcmp(argv[a], "--human"))
3747 output.mode = CLINFO_HUMAN;
3748 else if (!strcmp(argv[a], "--offline"))
3749 output.offline = CL_TRUE;
3750 else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list"))
3751 output.brief = CL_TRUE;
3752 else if (!strcmp(argv[a], "-d") || !strcmp(argv[a], "--device")) {
3754 parse_device_spec(argv[a], &output);
3755 } else if (!strncmp(argv[a], "-d", 2)) {
3756 parse_device_spec(argv[a] + 2, &output);
3757 } else if (!strcmp(argv[a], "--prop")) {
3759 parse_prop(argv[a], &output);
3760 } else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) {
3763 } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) {
3767 fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]);
3770 /* If a property was specified, we only print in RAW mode */
3772 output.mode = CLINFO_RAW;
3773 output.selected = (output.device != CL_UINT_MAX);
3774 output.detailed = !output.brief && !output.selected && !output.prop;
3776 err = clGetPlatformIDs(0, NULL, &plist.num_platforms);
3777 if (err != CL_PLATFORM_NOT_FOUND_KHR)
3778 CHECK_ERROR(err, "number of platforms");
3780 if (output.detailed)
3781 printf(I0_STR "%" PRIu32 "\n",
3782 (output.mode == CLINFO_HUMAN ?
3783 "Number of platforms" : "#PLATFORMS"),
3784 plist.num_platforms);
3785 if (!plist.num_platforms)
3788 alloc_plist(&plist);
3789 err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL);
3790 CHECK_ERROR(err, "platform IDs");
3792 ALLOC(line_pfx, 1, "line prefix");
3794 for (p = 0; p < plist.num_platforms; ++p) {
3795 // skip non-selected platforms altogether
3796 if (output.selected && output.platform != p) continue;
3797 gatherPlatformInfo(&plist, p, &output);
3798 if (output.detailed)
3801 showDevices(&plist, &output);
3802 if (output.prop || (output.detailed && !output.selected)) {
3803 if (output.mode != CLINFO_RAW)
3804 checkNullBehavior(&plist, &output);
3805 oclIcdProps(&plist, &output);
3810 free((char*)output.prop);