Missing attribute from cl_amd_device_attribute_query
[clinfo] / src / clinfo.c
1 /* Collect all available information on all available devices
2  * on all available OpenCL platforms present in the system
3  */
4
5 #include <time.h>
6 #include <string.h>
7
8 /* We will want to check for symbols in the OpenCL library.
9  * On Windows, we must get the module handle for it, on Unix-like
10  * systems we can just use RTLD_DEFAULT
11  */
12 #ifdef _MSC_VER
13 # include <windows.h>
14 # define dlsym GetProcAddress
15 # define DL_MODULE GetModuleHandle("OpenCL")
16 #else
17 # include <dlfcn.h>
18 # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */
19 #endif
20
21 /* Load STDC format macros (PRI*), or define them
22  * for those crappy, non-standard compilers
23  */
24 #include "fmtmacros.h"
25
26 // More support for the horrible MS C compiler
27 #ifdef _MSC_VER
28 #include "ms_support.h"
29 #endif
30
31 #include "error.h"
32 #include "memory.h"
33 #include "strbuf.h"
34
35 #include "ext.h"
36 #include "ctx_prop.h"
37 #include "info_loc.h"
38 #include "info_ret.h"
39 #include "opt_out.h"
40
41 #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar))
42
43 #ifndef UNUSED
44 #define UNUSED(x) x __attribute__((unused))
45 #endif
46
47 struct platform_data {
48         char *pname; /* CL_PLATFORM_NAME */
49         char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */
50         cl_uint ndevs; /* number of devices */
51         cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */
52 };
53
54 struct platform_info_checks {
55         cl_uint plat_version;
56         cl_bool has_khr_icd;
57         cl_bool has_amd_object_metadata;
58 };
59
60 struct platform_list {
61         /* Number of platforms in the system */
62         cl_uint num_platforms;
63         /* Total number of devices across all platforms */
64         cl_uint ndevs_total;
65         /* Number of devices allocated in all_devs array */
66         cl_uint alloc_devs;
67         /* Highest OpenCL version supported by any platform.
68          * If the OpenCL library / ICD loader only supports
69          * a lower version, problems may arise (such as
70          * API calls causing segfaults or any other unexpected
71          * behavior
72          */
73         cl_uint max_plat_version;
74         /* Largest number of devices on any platform */
75         cl_uint max_devs;
76         /* Length of the longest platform sname */
77         cl_int max_sname_len;
78         /* Array of platform IDs */
79         cl_platform_id *platform;
80         /* Array of device IDs (across all platforms) */
81         cl_device_id *all_devs;
82         /* Array of offsets in all_devs where the devices
83          * of each platform begin */
84         cl_uint *dev_offset;
85         /* Array of clinfo-specific platform data */
86         struct platform_data *pdata;
87         /* Arrau of clinfo-specifici platform checks */
88         struct platform_info_checks *platform_checks;
89 };
90
91 void
92 init_plist(struct platform_list *plist)
93 {
94         plist->num_platforms = 0;
95         plist->ndevs_total = 0;
96         plist->alloc_devs = 0;
97         plist->max_plat_version = 0;
98         plist->platform = NULL;
99         plist->all_devs = NULL;
100         plist->dev_offset = NULL;
101         plist->pdata = NULL;
102         plist->platform_checks = NULL;
103 }
104
105 void plist_devs_reserve(struct platform_list *plist, cl_uint amount)
106 {
107         if (amount > plist->alloc_devs) {
108                 REALLOC(plist->all_devs, amount, "all devices");
109                 plist->alloc_devs = amount;
110         }
111 }
112
113
114 void
115 alloc_plist(struct platform_list *plist)
116 {
117         ALLOC(plist->platform, plist->num_platforms, "platform IDs");
118         ALLOC(plist->dev_offset, plist->num_platforms, "platform device list offset");
119         /* The actual sizing for this will change as we gather platform info,
120          * but assume at least one device per platform
121          */
122         plist_devs_reserve(plist, plist->num_platforms);
123         ALLOC(plist->pdata, plist->num_platforms, "platform data");
124         ALLOC(plist->platform_checks, plist->num_platforms, "platform checks data");
125 }
126 void
127 free_plist(struct platform_list *plist)
128 {
129         free(plist->platform);
130         free(plist->all_devs);
131         free(plist->dev_offset);
132         free(plist->pdata);
133         free(plist->platform_checks);
134         init_plist(plist);
135 }
136
137 const cl_device_id *
138 get_platform_devs(const struct platform_list *plist, cl_uint p)
139 {
140         return plist->all_devs + plist->dev_offset[p];
141 }
142
143 cl_device_id
144 get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d)
145 {
146         return get_platform_devs(plist, p)[d];
147 }
148
149 /* Data for the OpenCL library / ICD loader */
150 struct icdl_data {
151         /* auto-detected OpenCL version support for the ICD loader */
152         cl_uint detected_version;
153         /* OpenCL version support declared by the ICD loader */
154         cl_uint reported_version;
155 };
156
157 /* line prefix, used to identify the platform/device for each
158  * device property in RAW output mode */
159 char *line_pfx;
160 int line_pfx_len;
161
162 #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \
163         /* check if the issue is with param size */ \
164         if (output->check_size && ret->err == CL_INVALID_VALUE) { \
165                 size_t _actual_sz; \
166                 if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \
167                         REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \
168                 } \
169         } \
170 } while (0)
171
172 static const char unk[] = "Unknown";
173 static const char none[] = "None";
174 static const char none_raw[] = "CL_NONE";
175 static const char na[] = "n/a"; // not available
176 static const char na_wrap[] = "(n/a)"; // not available
177 static const char core[] = "core";
178
179 static const char bytes_str[] = " bytes";
180 static const char pixels_str[] = " pixels";
181 static const char images_str[] = " images";
182
183 static const char* bool_str[] = { "No", "Yes" };
184 static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" };
185
186 static const char* endian_str[] = { "Big-Endian", "Little-Endian" };
187
188 static const cl_device_type devtype[] = { 0,
189         CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU,
190         CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL };
191
192 const size_t devtype_count = ARRAY_SIZE(devtype);
193 /* number of actual device types, without ALL */
194 const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1;
195
196 static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" };
197 static const char* device_type_raw_str[] = { unk,
198         "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU",
199         "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL"
200 };
201
202 static const char* partition_type_str[] = {
203         none, "equally", "by counts", "by affinity domain", "by names (Intel)"
204 };
205 static const char* partition_type_raw_str[] = {
206         none_raw,
207         "CL_DEVICE_PARTITION_EQUALLY_EXT",
208         "CL_DEVICE_PARTITION_BY_COUNTS_EXT",
209         "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT",
210         "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT"
211 };
212
213 static const char numa[] = "NUMA";
214 static const char l1cache[] = "L1 cache";
215 static const char l2cache[] = "L2 cache";
216 static const char l3cache[] = "L3 cache";
217 static const char l4cache[] = "L4 cache";
218
219 static const char* affinity_domain_str[] = {
220         numa, l4cache, l3cache, l2cache, l1cache, "next partitionable"
221 };
222
223 static const char* affinity_domain_ext_str[] = {
224         numa, l4cache, l3cache, l2cache, l1cache, "next fissionable"
225 };
226
227 static const char* affinity_domain_raw_str[] = {
228         "CL_DEVICE_AFFINITY_DOMAIN_NUMA",
229         "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE",
230         "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE",
231         "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE",
232         "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE",
233         "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE"
234 };
235
236 static const char* affinity_domain_raw_ext_str[] = {
237         "CL_AFFINITY_DOMAIN_NUMA_EXT",
238         "CL_AFFINITY_DOMAIN_L4_CACHE_EXT",
239         "CL_AFFINITY_DOMAIN_L3_CACHE_EXT",
240         "CL_AFFINITY_DOMAIN_L2_CACHE_EXT",
241         "CL_AFFINITY_DOMAIN_L1_CACHE_EXT",
242         "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT"
243 };
244
245 const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str);
246
247 static const char *terminate_capability_str[] = {
248         "Context"
249 };
250
251 static const char *terminate_capability_raw_str[] = {
252         "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR"
253 };
254
255 const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str);
256
257 static const char* fp_conf_str[] = {
258         "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero",
259         "Round to infinity", "IEEE754-2008 fused multiply-add",
260         "Support is emulated in software",
261         "Correctly-rounded divide and sqrt operations"
262 };
263
264 static const char* fp_conf_raw_str[] = {
265         "CL_FP_DENORM",
266         "CL_FP_INF_NAN",
267         "CL_FP_ROUND_TO_NEAREST",
268         "CL_FP_ROUND_TO_ZERO",
269         "CL_FP_ROUND_TO_INF",
270         "CL_FP_FMA",
271         "CL_FP_SOFT_FLOAT",
272         "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT"
273 };
274
275 const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str);
276
277 static const char* svm_cap_str[] = {
278         "Coarse-grained buffer sharing",
279         "Fine-grained buffer sharing",
280         "Fine-grained system sharing",
281         "Atomics"
282 };
283
284 static const char* svm_cap_raw_str[] = {
285         "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER",
286         "CL_DEVICE_SVM_FINE_GRAIN_BUFFER",
287         "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM",
288         "CL_DEVICE_SVM_ATOMICS",
289 };
290
291 const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str);
292
293 /* SI suffixes for memory sizes. Note that in OpenCL most of them are
294  * passed via a cl_ulong, which at most can mode 16 EiB, but hey,
295  * let's be forward-thinking ;-)
296  */
297 static const char* memsfx[] = {
298         "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
299 };
300
301 const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1;
302
303 static const char* lmem_type_str[] = { none, "Local", "Global" };
304 static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" };
305 static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" };
306 static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" };
307
308 static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" };
309 static const char* queue_prop_raw_str[] = {
310         "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
311         "CL_QUEUE_PROFILING_ENABLE"
312 };
313
314 const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str);
315
316 static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" };
317 static const char* execap_raw_str[] = {
318         "CL_EXEC_KERNEL",
319         "CL_EXEC_NATIVE_KERNEL"
320 };
321
322 const size_t execap_count = ARRAY_SIZE(execap_str);
323
324 static const char* sources[] = {
325         "#define GWO(type) global type* restrict\n",
326         "#define GRO(type) global const type* restrict\n",
327         "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n",
328         "#define _KRN(T, N) kernel void sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n",
329         "#define KRN(N) _KRN(float, N)\n",
330         "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n",
331 };
332
333 const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline)
334 {
335         return output->mode == CLINFO_HUMAN ?
336                 (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") :
337                 (these_are_offline ? "#OFFDEVICES" : "#DEVICES");
338 }
339
340 const char *not_specified(const struct opt_out *output)
341 {
342         return output->mode == CLINFO_HUMAN ?
343                 na_wrap : "";
344 }
345
346 const char *no_plat(const struct opt_out *output)
347 {
348         return output->mode == CLINFO_HUMAN ?
349                 "No platform" :
350                 "CL_INVALID_PLATFORM";
351 }
352
353 const char *invalid_dev_type(const struct opt_out *output)
354 {
355         return output->mode == CLINFO_HUMAN ?
356                 "Invalid device type for platform" :
357                 "CL_INVALID_DEVICE_TYPE";
358 }
359
360 const char *invalid_dev_value(const struct opt_out *output)
361 {
362         return output->mode == CLINFO_HUMAN ?
363                 "Invalid device type value for platform" :
364                 "CL_INVALID_VALUE";
365 }
366
367 const char *no_dev_found(const struct opt_out *output)
368 {
369         return output->mode == CLINFO_HUMAN ?
370                 "No devices found in platform" :
371                 "CL_DEVICE_NOT_FOUND";
372 }
373
374 const char *no_dev_avail(const struct opt_out *output)
375 {
376         return output->mode == CLINFO_HUMAN ?
377                 "No devices available in platform" :
378                 "CL_DEVICE_NOT_AVAILABLE";
379 }
380
381 /* OpenCL context interop names */
382
383 typedef struct cl_interop_name {
384         cl_uint from;
385         cl_uint to;
386         /* 5 because that's the largest we know of,
387          * 2 because it's HUMAN, RAW */
388         const char *value[5][2];
389 } cl_interop_name;
390
391 static const cl_interop_name cl_interop_names[] = {
392         { /* cl_khr_gl_sharing */
393                  CL_GL_CONTEXT_KHR,
394                  CL_CGL_SHAREGROUP_KHR,
395                  {
396                         { "GL", "CL_GL_CONTEXT_KHR" },
397                         { "EGL", "CL_EGL_DISPALY_KHR" },
398                         { "GLX", "CL_GLX_DISPLAY_KHR" },
399                         { "WGL", "CL_WGL_HDC_KHR" },
400                         { "CGL", "CL_CGL_SHAREGROUP_KHR" }
401                 }
402         },
403         { /* cl_khr_dx9_media_sharing */
404                 CL_CONTEXT_ADAPTER_D3D9_KHR,
405                 CL_CONTEXT_ADAPTER_DXVA_KHR,
406                 {
407                         { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" },
408                         { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" },
409                         { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" }
410                 }
411         },
412         { /* cl_khr_d3d10_sharing */
413                 CL_CONTEXT_D3D10_DEVICE_KHR,
414                 CL_CONTEXT_D3D10_DEVICE_KHR,
415                 {
416                         { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" }
417                 }
418         },
419         { /* cl_khr_d3d11_sharing */
420                 CL_CONTEXT_D3D11_DEVICE_KHR,
421                 CL_CONTEXT_D3D11_DEVICE_KHR,
422                 {
423                         { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" }
424                 }
425         },
426         /* cl_intel_dx9_media_sharing is split in two because the allowed values are not consecutive */
427         { /* cl_intel_dx9_media_sharing part 1 */
428                 CL_CONTEXT_D3D9_DEVICE_INTEL,
429                 CL_CONTEXT_D3D9_DEVICE_INTEL,
430                 {
431                         { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" }
432                 }
433         },
434         { /* cl_intel_dx9_media_sharing part 2 */
435                 CL_CONTEXT_D3D9EX_DEVICE_INTEL,
436                 CL_CONTEXT_DXVA_DEVICE_INTEL,
437                 {
438                         { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" },
439                         { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" }
440                 }
441         },
442         { /* cl_intel_va_api_media_sharing */
443                 CL_CONTEXT_VA_API_DISPLAY_INTEL,
444                 CL_CONTEXT_VA_API_DISPLAY_INTEL,
445                 {
446                         { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" }
447                 }
448         }
449 };
450
451 const size_t num_known_interops = ARRAY_SIZE(cl_interop_names);
452
453
454 #define INDENT "  "
455 #define I0_STR "%-48s  "
456 #define I1_STR "  %-46s  "
457 #define I2_STR "    %-44s  "
458
459 static const char empty_str[] = "";
460 static const char spc_str[] = " ";
461 static const char times_str[] = "x";
462 static const char comma_str[] = ", ";
463 static const char vbar_str[] = " | ";
464
465 const char *cur_sfx = empty_str;
466
467 /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version.
468  * Returns an unsigned integer in the form major*10 + minor
469  */
470 cl_uint
471 getOpenCLVersion(const char *version)
472 {
473         cl_uint ret = 10;
474         long parse = 0;
475         const char *from = version;
476         char *next = NULL;
477         parse = strtol(from, &next, 10);
478
479         if (next != from) {
480                 ret = parse*10;
481                 // skip the dot TODO should we actually check for the dot?
482                 from = ++next;
483                 parse = strtol(from, &next, 10);
484                 if (next != from)
485                         ret += parse;
486         }
487         return ret;
488 }
489
490 #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10)
491
492 /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero,
493  * affixing cur_sfx */
494 static inline
495 void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err)
496 {
497         printf("%s" I1_STR "%s%s\n",
498                 line_pfx, pname,
499                 (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf),
500                 err ? empty_str : cur_sfx);
501 }
502
503 void
504 platform_info_str(struct platform_info_ret *ret,
505         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
506         const struct opt_out* UNUSED(output))
507 {
508         GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat);
509 }
510
511 void
512 platform_info_ulong(struct platform_info_ret *ret,
513         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
514         const struct opt_out *output)
515 {
516         ret->err = REPORT_ERROR_LOC(ret,
517                 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL),
518                 loc, "get %s");
519         CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat);
520         strbuf_printf(&ret->str, "%" PRIu64, ret->value.u64);
521 }
522
523 void
524 platform_info_sz(struct platform_info_ret *ret,
525         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
526         const struct opt_out *output)
527 {
528         ret->err = REPORT_ERROR_LOC(ret,
529                 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL),
530                 loc, "get %s");
531         CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat);
532         strbuf_printf(&ret->str, "%" PRIuS, ret->value.s);
533 }
534
535 struct platform_info_traits {
536         cl_platform_info param; // CL_PLATFORM_*
537         const char *sname; // "CL_PLATFORM_*"
538         const char *pname; // "Platform *"
539         const char *sfx; // suffix for the output in non-raw mode
540         /* pointer to function that retrieves the parameter */
541         void (*show_func)(struct platform_info_ret *,
542                 const struct info_loc *, const struct platform_info_checks *,
543                 const struct opt_out *);
544         /* pointer to function that checks if the parameter should be retrieved */
545         cl_bool (*check_func)(const struct platform_info_checks *);
546 };
547
548 cl_bool khr_icd_p(const struct platform_info_checks *chk)
549 {
550         return chk->has_khr_icd;
551 }
552
553 cl_bool plat_is_12(const struct platform_info_checks *chk)
554 {
555         return !(chk->plat_version < 12);
556 }
557
558 cl_bool plat_is_20(const struct platform_info_checks *chk)
559 {
560         return !(chk->plat_version < 20);
561 }
562
563 cl_bool plat_is_21(const struct platform_info_checks *chk)
564 {
565         return !(chk->plat_version < 21);
566 }
567
568 cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk)
569 {
570         return chk->has_amd_object_metadata;
571 }
572
573
574 #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr }
575 #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL }
576 struct platform_info_traits pinfo_traits[] = {
577         PINFO(CL_PLATFORM_NAME, "Name", NULL, str),
578         PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str),
579         PINFO(CL_PLATFORM_VERSION, "Version", NULL, str),
580         PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str),
581         PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str),
582         PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata),
583         PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21),
584         PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p)
585 };
586
587 /* Collect (and optionally show) infomation on a specific platform,
588  * initializing relevant arrays and optionally showing the collected
589  * information
590  */
591 void
592 gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output)
593 {
594         cl_int len = 0;
595
596         struct platform_data *pdata = plist->pdata + p;
597         struct platform_info_checks *pinfo_checks = plist->platform_checks + p;
598         struct platform_info_ret ret;
599         struct info_loc loc;
600
601         pinfo_checks->plat_version = 10;
602
603         INIT_RET(ret, "platform");
604         reset_loc(&loc, __func__);
605         loc.plat = plist->platform[p];
606
607         for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) {
608                 const struct platform_info_traits *traits = pinfo_traits + loc.line;
609
610                 /* checked is true if there was no condition to check for, or if the
611                  * condition was satisfied
612                  */
613                 int checked = !(traits->check_func && !traits->check_func(pinfo_checks));
614
615                 if (output->cond == COND_PROP_CHECK && !checked)
616                         continue;
617
618                 loc.sname = traits->sname;
619                 loc.pname = (output->mode == CLINFO_HUMAN ?
620                         traits->pname : traits->sname);
621                 loc.param.plat = traits->param;
622
623                 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
624
625                 ret.str.buf[0] = '\0';
626                 ret.err_str.buf[0] = '\0';
627                 traits->show_func(&ret, &loc, pinfo_checks, output);
628
629                 /* The property is skipped if this was a conditional property,
630                  * unsatisfied, there was an error retrieving it and cond_prop_mode is not
631                  * COND_PROP_SHOW.
632                  */
633                 if (ret.err && !checked && output->cond != COND_PROP_SHOW)
634                         continue;
635
636                 /* when only listing, do not print anything, we're just gathering
637                  * information */
638                 if (output->detailed) {
639                         show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
640                 }
641
642                 if (ret.err)
643                         continue;
644
645                 /* post-processing */
646
647                 switch (traits->param) {
648                 case CL_PLATFORM_NAME:
649                         /* Store name for future reference */
650                         len = strlen(ret.str.buf);
651                         ALLOC(pdata->pname, len+1, "platform name copy");
652                         /* memcpy instead of strncpy since we already have the len
653                          * and memcpy is possibly more optimized */
654                         memcpy(pdata->pname, ret.str.buf, len);
655                         pdata->pname[len] = '\0';
656                         break;
657                 case CL_PLATFORM_VERSION:
658                         /* compute numeric value for OpenCL version */
659                         pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7);
660                         break;
661                 case CL_PLATFORM_EXTENSIONS:
662                         pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd");
663                         pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata");
664                         pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices");
665                         break;
666                 case CL_PLATFORM_ICD_SUFFIX_KHR:
667                         /* Store ICD suffix for future reference */
668                         len = strlen(ret.str.buf);
669                         ALLOC(pdata->sname, len+1, "platform ICD suffix copy");
670                         /* memcpy instead of strncpy since we already have the len
671                          * and memcpy is possibly more optimized */
672                         memcpy(pdata->sname, ret.str.buf, len);
673                         pdata->sname[len] = '\0';
674                 default:
675                         /* do nothing */
676                         break;
677                 }
678
679         }
680
681         if (pinfo_checks->plat_version > plist->max_plat_version)
682                 plist->max_plat_version = pinfo_checks->plat_version;
683
684         /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */
685         if (!pdata->sname) {
686 #define SNAME_MAX 32
687                 ALLOC(pdata->sname, SNAME_MAX, "platform symbolic name");
688                 snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p);
689         }
690
691         len = (cl_int)strlen(pdata->sname);
692         if (len > plist->max_sname_len)
693                 plist->max_sname_len = len;
694
695         ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs);
696         if (ret.err == CL_DEVICE_NOT_FOUND)
697                 pdata->ndevs = 0;
698         else
699                 CHECK_ERROR(ret.err, "number of devices");
700         plist->ndevs_total += pdata->ndevs;
701         plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0;
702         plist_devs_reserve(plist, plist->ndevs_total);
703
704         if (pdata->ndevs > 0) {
705                 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL,
706                         pdata->ndevs,
707                         plist->all_devs + plist->dev_offset[p], NULL);
708         }
709
710         if (pdata->ndevs > plist->max_devs)
711                 plist->max_devs = pdata->ndevs;
712
713         UNINIT_RET(ret);
714 }
715
716 /*
717  * Device properties/extensions used in traits checks, and relevant functions
718  */
719
720 struct device_info_checks {
721         const struct platform_info_checks *pinfo_checks;
722         cl_device_type devtype;
723         cl_device_mem_cache_type cachetype;
724         cl_device_local_mem_type lmemtype;
725         cl_bool image_support;
726         cl_bool compiler_available;
727         char has_half[12];
728         char has_double[24];
729         char has_nv[29];
730         char has_amd[30];
731         char has_amd_svm[11];
732         char has_arm_svm[29];
733         char has_arm_core_id[15];
734         char has_fission[22];
735         char has_atomic_counters[26];
736         char has_image2d_buffer[27];
737         char has_il_program[18];
738         char has_intel_local_thread[30];
739         char has_intel_AME[36];
740         char has_intel_AVC_ME[43];
741         char has_intel_planar_yuv[20];
742         char has_intel_required_subgroup_size[32];
743         char has_altera_dev_temp[29];
744         char has_p2p[23];
745         char has_spir[12];
746         char has_qcom_ext_host_ptr[21];
747         char has_simultaneous_sharing[30];
748         char has_subgroup_named_barrier[30];
749         char has_terminate_context[25];
750         cl_uint dev_version;
751 };
752
753 #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \
754 { \
755         return !!(chk->has_##ext[0]); \
756 }
757
758 DEFINE_EXT_CHECK(half)
759 DEFINE_EXT_CHECK(double)
760 DEFINE_EXT_CHECK(nv)
761 DEFINE_EXT_CHECK(amd)
762 DEFINE_EXT_CHECK(amd_svm)
763 DEFINE_EXT_CHECK(arm_svm)
764 DEFINE_EXT_CHECK(arm_core_id)
765 DEFINE_EXT_CHECK(fission)
766 DEFINE_EXT_CHECK(atomic_counters)
767 DEFINE_EXT_CHECK(image2d_buffer)
768 DEFINE_EXT_CHECK(il_program)
769 DEFINE_EXT_CHECK(intel_local_thread)
770 DEFINE_EXT_CHECK(intel_AME)
771 DEFINE_EXT_CHECK(intel_AVC_ME)
772 DEFINE_EXT_CHECK(intel_planar_yuv)
773 DEFINE_EXT_CHECK(intel_required_subgroup_size)
774 DEFINE_EXT_CHECK(altera_dev_temp)
775 DEFINE_EXT_CHECK(p2p)
776 DEFINE_EXT_CHECK(spir)
777 DEFINE_EXT_CHECK(qcom_ext_host_ptr)
778 DEFINE_EXT_CHECK(simultaneous_sharing)
779 DEFINE_EXT_CHECK(subgroup_named_barrier)
780 DEFINE_EXT_CHECK(terminate_context)
781
782 /* In the version checks we negate the opposite conditions
783  * instead of double-negating the actual condition
784  */
785
786 // device supports 1.1
787 cl_bool dev_is_11(const struct device_info_checks *chk)
788 {
789         return !(chk->dev_version < 11);
790 }
791
792
793 // device supports 1.2
794 cl_bool dev_is_12(const struct device_info_checks *chk)
795 {
796         return !(chk->dev_version < 12);
797 }
798
799 // device supports 2.0
800 cl_bool dev_is_20(const struct device_info_checks *chk)
801 {
802         return !(chk->dev_version < 20);
803 }
804
805 // device supports 2.1
806 cl_bool dev_is_21(const struct device_info_checks *chk)
807 {
808         return !(chk->dev_version < 21);
809 }
810
811 // device does not support 2.0
812 cl_bool dev_not_20(const struct device_info_checks *chk)
813 {
814         return !(chk->dev_version >= 20);
815 }
816
817
818 cl_bool dev_is_gpu(const struct device_info_checks *chk)
819 {
820         return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
821 }
822
823 cl_bool dev_is_gpu_amd(const struct device_info_checks *chk)
824 {
825         return dev_is_gpu(chk) && dev_has_amd(chk);
826 }
827
828 /* Device supports cl_amd_device_attribute_query v4 */
829 cl_bool dev_has_amd_v4(const struct device_info_checks *chk)
830 {
831         /* We don't actually have a criterion to check if the device
832          * supports a specific version of an extension, so for the time
833          * being rely on them being GPU devices with cl_amd_device_attribute_query
834          * and the platform supporting OpenCL 2.0 or later
835          * TODO FIXME tune criteria
836          */
837         return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks);
838 }
839
840 /* Device supports cl_arm_core_id v2 */
841 cl_bool dev_has_arm_core_id_v2(const struct device_info_checks *chk)
842 {
843         /* We don't actually have a criterion to check if the device
844          * supports a specific version of an extension, so for the time
845          * being rely on them having cl_arm_core_id and the platform
846          * supporting OpenCL 1.2 or later
847          * TODO FIXME tune criteria
848          */
849         return dev_has_arm_core_id(chk) && plat_is_12(chk->pinfo_checks);
850 }
851
852 cl_bool dev_has_svm(const struct device_info_checks *chk)
853 {
854         return dev_is_20(chk) || dev_has_amd_svm(chk);
855 }
856
857 cl_bool dev_has_partition(const struct device_info_checks *chk)
858 {
859         return dev_is_12(chk) || dev_has_fission(chk);
860 }
861
862 cl_bool dev_has_cache(const struct device_info_checks *chk)
863 {
864         return chk->cachetype != CL_NONE;
865 }
866
867 cl_bool dev_has_lmem(const struct device_info_checks *chk)
868 {
869         return chk->lmemtype != CL_NONE;
870 }
871
872 cl_bool dev_has_il(const struct device_info_checks *chk)
873 {
874         return dev_is_21(chk) || dev_has_il_program(chk);
875 }
876
877 cl_bool dev_has_images(const struct device_info_checks *chk)
878 {
879         return chk->image_support;
880 }
881
882 cl_bool dev_has_images_12(const struct device_info_checks *chk)
883 {
884         return dev_has_images(chk) && dev_is_12(chk);
885 }
886
887 cl_bool dev_has_images_20(const struct device_info_checks *chk)
888 {
889         return dev_has_images(chk) && dev_is_20(chk);
890 }
891
892 cl_bool dev_has_compiler(const struct device_info_checks *chk)
893 {
894         return chk->compiler_available;
895 }
896
897 cl_bool dev_has_compiler_11(const struct device_info_checks *chk)
898 {
899         return dev_is_11(chk) && dev_has_compiler(chk);
900 }
901
902
903 void identify_device_extensions(const char *extensions, struct device_info_checks *chk)
904 {
905 #define _HAS_EXT(ext) (strstr(extensions, ext))
906 #define HAS_EXT(ext) _HAS_EXT(#ext)
907 #define CPY_EXT(what, ext) do { \
908         strncpy(chk->has_##what, has, sizeof(ext)); \
909         chk->has_##what[sizeof(ext)-1] = '\0'; \
910 } while (0)
911 #define CHECK_EXT(what, ext) do { \
912         has = _HAS_EXT(#ext); \
913         if (has) CPY_EXT(what, #ext); \
914 } while(0)
915
916         char *has;
917         CHECK_EXT(half, cl_khr_fp16);
918         CHECK_EXT(spir, cl_khr_spir);
919         CHECK_EXT(double, cl_khr_fp64);
920         if (!dev_has_double(chk))
921                 CHECK_EXT(double, cl_amd_fp64);
922         if (!dev_has_double(chk))
923                 CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
924         CHECK_EXT(nv, cl_nv_device_attribute_query);
925         CHECK_EXT(amd, cl_amd_device_attribute_query);
926         CHECK_EXT(amd_svm, cl_amd_svm);
927         CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory);
928         CHECK_EXT(arm_core_id, cl_arm_core_id);
929         CHECK_EXT(fission, cl_ext_device_fission);
930         CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
931         if (dev_has_atomic_counters(chk))
932                 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
933         CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
934         CHECK_EXT(il_program, cl_khr_il_program);
935         CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
936         CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation);
937         CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation);
938         CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv);
939         CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size);
940         CHECK_EXT(altera_dev_temp, cl_altera_device_temperature);
941         CHECK_EXT(p2p, cl_amd_copy_buffer_p2p);
942         CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr);
943         CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing);
944         CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier);
945         CHECK_EXT(terminate_context, cl_khr_terminate_context);
946 }
947
948
949 /*
950  * Device info print functions
951  */
952
953 #define _GET_VAL(ret, loc, val) \
954         ret->err = REPORT_ERROR_LOC(ret, \
955                 clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \
956                 loc, "get %s"); \
957         CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev);
958
959 #define _GET_VAL_ARRAY(ret, loc) \
960         ret->err = REPORT_ERROR_LOC(ret, \
961                 clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \
962                 loc, "get number of %s"); \
963         numval = szval/sizeof(*val); \
964         if (!ret->err) { \
965                 REALLOC(val, numval, loc->sname); \
966                 ret->err = REPORT_ERROR_LOC(ret, \
967                         clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \
968                         loc, "get %s"); \
969                 if (ret->err) { free(val); val = NULL; } \
970         }
971
972 #define GET_VAL(ret, loc, field) do { \
973         _GET_VAL(ret, (loc), ret->value.field) \
974 } while (0)
975
976 #define GET_VAL_ARRAY(ret, loc) do { \
977         _GET_VAL_ARRAY(ret, (loc)) \
978 } while (0)
979
980 #define DEFINE_DEVINFO_FETCH(type, field) \
981 type \
982 device_fetch_##type(struct device_info_ret *ret, \
983         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \
984         const struct opt_out *output) \
985 { \
986         GET_VAL(ret, loc, field); \
987         return ret->value.field; \
988 }
989
990 DEFINE_DEVINFO_FETCH(size_t, s)
991 DEFINE_DEVINFO_FETCH(cl_bool, b)
992 DEFINE_DEVINFO_FETCH(cl_uint, u32)
993 DEFINE_DEVINFO_FETCH(cl_ulong, u64)
994 DEFINE_DEVINFO_FETCH(cl_device_type, devtype)
995 DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype)
996 DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype)
997 DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo)
998 DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain)
999 DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig)
1000 DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop)
1001 DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap)
1002 DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap)
1003 DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap)
1004
1005 #define DEV_FETCH_LOC(type, var, loc) \
1006         type var = device_fetch_##type(ret, loc, chk, output)
1007 #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc)
1008
1009 #define FMT_VAL(ret, fmt, val) if (!ret->err) strbuf_printf(&ret->str, fmt, val)
1010
1011 #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \
1012 void \
1013 device_info_##how(struct device_info_ret *ret, \
1014         const struct info_loc *loc, const struct device_info_checks* chk, \
1015         const struct opt_out *output) \
1016 { \
1017         DEV_FETCH(type, val); \
1018         if (!ret->err) FMT_VAL(ret, fmt, val); \
1019 }
1020
1021 DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32)
1022 DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, "%#" PRIx32)
1023 DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64)
1024 DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS)
1025
1026 void
1027 device_info_str(struct device_info_ret *ret,
1028         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1029         const struct opt_out* UNUSED(output))
1030 {
1031         GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev);
1032 }
1033
1034 void
1035 device_info_bool(struct device_info_ret *ret,
1036         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1037         const struct opt_out *output)
1038 {
1039         DEV_FETCH(cl_bool, val);
1040         if (!ret->err) {
1041                 const char * const * str = (output->mode == CLINFO_HUMAN ?
1042                         bool_str : bool_raw_str);
1043                 strbuf_printf(&ret->str, "%s", str[val]);
1044         }
1045 }
1046
1047 void
1048 device_info_bits(struct device_info_ret *ret,
1049         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1050         const struct opt_out *output)
1051 {
1052         DEV_FETCH(cl_uint, val);
1053         if (!ret->err)
1054                 strbuf_printf(&ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8);
1055 }
1056
1057
1058 size_t strbuf_mem(struct _strbuf *str, cl_ulong val, size_t szval)
1059 {
1060         double dbl = (double)val;
1061         size_t sfx = 0;
1062         while (dbl > 1024 && sfx < memsfx_end) {
1063                 dbl /= 1024;
1064                 ++sfx;
1065         }
1066         return sprintf(str->buf + szval, " (%.4lg%s)",
1067                 dbl, memsfx[sfx]);
1068 }
1069
1070 void
1071 device_info_mem(struct device_info_ret *ret,
1072         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1073         const struct opt_out *output)
1074 {
1075         DEV_FETCH(cl_ulong, val);
1076         if (!ret->err) {
1077                 size_t szval = strbuf_printf(&ret->str, "%" PRIu64, val);
1078                 if (output->mode == CLINFO_HUMAN && val > 1024)
1079                         strbuf_mem(&ret->str, val, szval);
1080         }
1081 }
1082
1083 void
1084 device_info_mem_int(struct device_info_ret *ret,
1085         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1086         const struct opt_out *output)
1087 {
1088         DEV_FETCH(cl_uint, val);
1089         if (!ret->err) {
1090                 size_t szval = strbuf_printf(&ret->str, "%" PRIu32, val);
1091                 if (output->mode == CLINFO_HUMAN && val > 1024)
1092                         strbuf_mem(&ret->str, val, szval);
1093         }
1094 }
1095
1096 void
1097 device_info_mem_sz(struct device_info_ret *ret,
1098         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1099         const struct opt_out *output)
1100 {
1101         DEV_FETCH(size_t, val);
1102         if (!ret->err) {
1103                 size_t szval = strbuf_printf(&ret->str, "%" PRIuS, val);
1104                 if (output->mode == CLINFO_HUMAN && val > 1024)
1105                         strbuf_mem(&ret->str, val, szval);
1106         }
1107 }
1108
1109 void
1110 device_info_free_mem_amd(struct device_info_ret *ret,
1111         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1112         const struct opt_out *output)
1113 {
1114         size_t *val = NULL;
1115         size_t szval = 0, numval = 0;
1116         GET_VAL_ARRAY(ret, loc);
1117         if (!ret->err) {
1118                 size_t cursor = 0;
1119                 szval = 0;
1120                 for (cursor = 0; cursor < numval; ++cursor) {
1121                         if (szval > 0) {
1122                                 ret->str.buf[szval] = ' ';
1123                                 ++szval;
1124                         }
1125                         szval += sprintf(ret->str.buf + szval, "%" PRIuS, val[cursor]);
1126                         if (output->mode == CLINFO_HUMAN)
1127                                 szval += strbuf_mem(&ret->str, val[cursor]*UINT64_C(1024), szval);
1128                 }
1129                 // TODO: ret->value.??? = val;
1130         }
1131         free(val);
1132 }
1133
1134 void
1135 device_info_time_offset(struct device_info_ret *ret,
1136         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1137         const struct opt_out *output)
1138 {
1139         DEV_FETCH(cl_ulong, val);
1140         if (!ret->err) {
1141                 size_t szval = 0;
1142                 time_t time = val/UINT64_C(1000000000);
1143                 szval += strbuf_printf(&ret->str, "%" PRIu64 "ns (", val);
1144                 szval += bufcpy(&ret->str, szval, ctime(&time));
1145                 /* overwrite ctime's newline with the closing parenthesis */
1146                 if (szval < ret->str.sz)
1147                         ret->str.buf[szval - 1] = ')';
1148         }
1149 }
1150
1151 void
1152 device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep,
1153         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1154         const struct opt_out *output)
1155 {
1156         size_t *val = NULL;
1157         size_t szval = 0, numval = 0;
1158         GET_VAL_ARRAY(ret, loc);
1159         if (!ret->err) {
1160                 size_t counter = 0;
1161                 set_separator(output->mode == CLINFO_HUMAN ? human_sep : spc_str);
1162                 szval = 0;
1163                 for (counter = 0; counter < numval; ++counter) {
1164                         add_separator(&ret->str, &szval);
1165                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%" PRIuS, val[counter]);
1166                         if (szval >= ret->str.sz) {
1167                                 trunc_strbuf(&ret->str);
1168                                 break;
1169                         }
1170                 }
1171                 // TODO: ret->value.??? = val;
1172         }
1173         free(val);
1174 }
1175
1176 void
1177 device_info_szptr_times(struct device_info_ret *ret,
1178         const struct info_loc *loc, const struct device_info_checks* chk,
1179         const struct opt_out *output)
1180 {
1181         device_info_szptr_sep(ret, times_str, loc, chk, output);
1182 }
1183
1184 void
1185 device_info_szptr_comma(struct device_info_ret *ret,
1186         const struct info_loc *loc, const struct device_info_checks* chk,
1187         const struct opt_out *output)
1188 {
1189         device_info_szptr_sep(ret, comma_str, loc, chk, output);
1190 }
1191
1192 void
1193 getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz,
1194         const struct opt_out* UNUSED(output))
1195 {
1196         cl_int log_err;
1197
1198         cl_context_properties ctxpft[] = {
1199                 CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat,
1200                 0, 0 };
1201         cl_uint cursor = 0;
1202         cl_context ctx = NULL;
1203         cl_program prg = NULL;
1204         cl_kernel krn = NULL;
1205
1206         ret->err = CL_SUCCESS;
1207
1208         ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err);
1209         if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out;
1210         prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err);
1211         if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out;
1212         ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL);
1213         log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program");
1214
1215         /* for a program build failure, dump the log to stderr before bailing */
1216         if (log_err == CL_BUILD_PROGRAM_FAILURE) {
1217                 struct _strbuf logbuf;
1218                 init_strbuf(&logbuf);
1219                 GET_STRING(&logbuf, ret->err,
1220                         clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev);
1221                 if (ret->err == CL_SUCCESS) {
1222                         fflush(stdout);
1223                         fflush(stderr);
1224                         fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr);
1225                         fputs(logbuf.buf, stderr);
1226                         fflush(stderr);
1227                 }
1228                 free_strbuf(&logbuf);
1229         }
1230         if (ret->err)
1231                 goto out;
1232
1233         for (cursor = 0; cursor < wgm_sz; ++cursor) {
1234                 strbuf_printf(&ret->str, "sum%u", 1<<cursor);
1235                 if (cursor == 0)
1236                         ret->str.buf[3] = 0; // scalar kernel is called 'sum'
1237                 krn = clCreateKernel(prg, ret->str.buf, &ret->err);
1238                 if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out;
1239                 ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1240                         sizeof(*wgm), wgm + cursor, NULL);
1241                 if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out;
1242                 clReleaseKernel(krn);
1243                 krn = NULL;
1244         }
1245
1246 out:
1247         if (krn)
1248                 clReleaseKernel(krn);
1249         if (prg)
1250                 clReleaseProgram(prg);
1251         if (ctx)
1252                 clReleaseContext(ctx);
1253 }
1254
1255
1256 void
1257 device_info_wg(struct device_info_ret *ret,
1258         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1259         const struct opt_out *output)
1260 {
1261         /* preferred workgroup size multiple for each kernel
1262          * have not found a platform where the WG multiple changes,
1263          * but keep this flexible (this can grow up to 5)
1264          */
1265 #define NUM_KERNELS 1
1266         size_t wgm[NUM_KERNELS] = {0};
1267
1268         getWGsizes(ret, loc, wgm, NUM_KERNELS, output);
1269         if (!ret->err) {
1270                 strbuf_printf(&ret->str, "%" PRIuS, wgm[0]);
1271         }
1272         ret->value.s = wgm[0];
1273 }
1274
1275 void
1276 device_info_img_sz_2d(struct device_info_ret *ret,
1277         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1278         const struct opt_out *output)
1279 {
1280         struct info_loc loc2 = *loc;
1281         size_t width = 0, height = 0;
1282         _GET_VAL(ret, loc, height); /* HEIGHT */
1283         if (!ret->err) {
1284                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH);
1285                 _GET_VAL(ret, &loc2, width);
1286                 if (!ret->err) {
1287                         strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1288                 }
1289         }
1290         ret->value.u32v.s[0] = width;
1291         ret->value.u32v.s[1] = height;
1292 }
1293
1294 void
1295 device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret,
1296         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1297         const struct opt_out *output)
1298 {
1299         struct info_loc loc2 = *loc;
1300         size_t width = 0, height = 0;
1301         _GET_VAL(ret, loc, height); /* HEIGHT */
1302         if (!ret->err) {
1303                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL);
1304                 _GET_VAL(ret, &loc2, width);
1305                 if (!ret->err) {
1306                         strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1307                 }
1308         }
1309         ret->value.u32v.s[0] = width;
1310         ret->value.u32v.s[1] = height;
1311 }
1312
1313
1314 void
1315 device_info_img_sz_3d(struct device_info_ret *ret,
1316         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1317         const struct opt_out *output)
1318 {
1319         struct info_loc loc2 = *loc;
1320         size_t width = 0, height = 0, depth = 0;
1321         _GET_VAL(ret, loc, height); /* HEIGHT */
1322         if (!ret->err) {
1323                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH);
1324                 _GET_VAL(ret, &loc2, width);
1325                 if (!ret->err) {
1326                         RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH);
1327                         _GET_VAL(ret, &loc2, depth);
1328                         if (!ret->err) {
1329                                 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS,
1330                                         width, height, depth);
1331                         }
1332                 }
1333         }
1334         ret->value.u32v.s[0] = width;
1335         ret->value.u32v.s[1] = height;
1336         ret->value.u32v.s[2] = depth;
1337 }
1338
1339
1340 void
1341 device_info_devtype(struct device_info_ret *ret,
1342         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1343         const struct opt_out *output)
1344 {
1345         DEV_FETCH(cl_device_type, val);
1346         if (!ret->err) {
1347                 /* iterate over device type strings, appending their textual form
1348                  * to ret->str */
1349                 cl_uint i = (cl_uint)actual_devtype_count;
1350                 const char * const *devstr = (output->mode == CLINFO_HUMAN ?
1351                         device_type_str : device_type_raw_str);
1352                 size_t szval = 0;
1353                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1354                 for (; i > 0; --i) {
1355                         /* assemble CL_DEVICE_TYPE_* from index i */
1356                         cl_device_type cur = (cl_device_type)(1) << (i-1);
1357                         if (val & cur) {
1358                                 /* match: add separator if not first match */
1359                                 add_separator(&ret->str, &szval);
1360                                 szval += bufcpy(&ret->str, szval, devstr[i]);
1361                         }
1362                 }
1363                 /* check for extra bits */
1364                 if (szval < ret->str.sz) {
1365                         cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1;
1366                         cl_device_type extra = val & ~known_mask;
1367                         if (extra) {
1368                                 add_separator(&ret->str, &szval);
1369                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1370                         }
1371                 }
1372         }
1373 }
1374
1375 void
1376 device_info_cachetype(struct device_info_ret *ret,
1377         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1378         const struct opt_out *output)
1379 {
1380         DEV_FETCH(cl_device_mem_cache_type, val);
1381         if (!ret->err) {
1382                 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1383                         cache_type_str : cache_type_raw_str);
1384                 bufcpy(&ret->str, 0, ar[val]);
1385         }
1386 }
1387
1388 void
1389 device_info_lmemtype(struct device_info_ret *ret,
1390         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1391         const struct opt_out *output)
1392 {
1393         DEV_FETCH(cl_device_local_mem_type, val);
1394         if (!ret->err) {
1395                 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1396                         lmem_type_str : lmem_type_raw_str);
1397                 bufcpy(&ret->str, 0, ar[val]);
1398         }
1399         ret->value.lmemtype = val;
1400 }
1401
1402 /* cl_arm_core_id */
1403 void
1404 device_info_core_ids(struct device_info_ret *ret,
1405         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1406         const struct opt_out *output)
1407 {
1408         DEV_FETCH(cl_ulong, val);
1409
1410         if (!ret->err) {
1411                 /* The value is a bitfield where each set bit corresponds to a core ID
1412                  * value that can be returned by the device-side function. We print them
1413                  * here as ranges, such as 0-4, 8-12 */
1414                 size_t szval = 0;
1415                 int range_start = -1;
1416                 int cur_bit = 0;
1417                 set_separator(empty_str);
1418 #define CORE_ID_END 64
1419                 do {
1420                         /* Find the start of the range */
1421                         while ((cur_bit < CORE_ID_END) && !((val >> cur_bit) & 1))
1422                                 ++cur_bit;
1423                         range_start = cur_bit++;
1424
1425                         /* Find the end of the range */
1426                         while ((cur_bit < CORE_ID_END) && ((val >> cur_bit) & 1))
1427                                 ++cur_bit;
1428
1429                         /* print the range [range_start, cur_bit[ */
1430                         if (range_start >= 0 && range_start < CORE_ID_END) {
1431                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1432                                         "%s%d", sep, range_start);
1433                                 if (cur_bit - range_start > 1)
1434                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1435                                                 "-%d", cur_bit - 1);
1436                                 set_separator(comma_str);
1437                         }
1438                 } while (cur_bit < CORE_ID_END);
1439         }
1440         ret->value.u64 = val;
1441 }
1442
1443 /* stringify a cl_device_topology_amd */
1444 void devtopo_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo)
1445 {
1446         switch (devtopo->raw.type) {
1447         case 0:
1448                 /* leave empty */
1449                 break;
1450         case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD:
1451                 strbuf_printf(&ret->str, "PCI-E, %02x:%02x.%u",
1452                         (cl_uchar)(devtopo->pcie.bus),
1453                         devtopo->pcie.device, devtopo->pcie.function);
1454                 break;
1455         default:
1456                 strbuf_printf(&ret->str, "<unknown (%u): %u %u %u %u %u>",
1457                         devtopo->raw.type,
1458                         devtopo->raw.data[0], devtopo->raw.data[1],
1459                         devtopo->raw.data[2],
1460                         devtopo->raw.data[3], devtopo->raw.data[4]);
1461         }
1462 }
1463
1464 void
1465 device_info_devtopo_amd(struct device_info_ret *ret,
1466         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1467         const struct opt_out *output)
1468 {
1469         DEV_FETCH(cl_device_topology_amd, val);
1470         /* TODO how to do this in CLINFO_RAW mode */
1471         if (!ret->err) {
1472                 devtopo_str(ret, &val);
1473         }
1474 }
1475
1476 /* we assemble a cl_device_topology_amd struct from the NVIDIA info */
1477 void
1478 device_info_devtopo_nv(struct device_info_ret *ret,
1479         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1480         const struct opt_out *output)
1481 {
1482         struct info_loc loc2 = *loc;
1483         cl_device_topology_amd devtopo;
1484         DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */
1485         if (!ret->err) {
1486                 devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
1487                 devtopo.pcie.bus = val & 0xff;
1488                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV);
1489                 _GET_VAL(ret, &loc2, val);
1490
1491                 if (!ret->err) {
1492                         devtopo.pcie.device = (val >> 3) & 0xff;
1493                         devtopo.pcie.function = val & 7;
1494                         devtopo_str(ret, &devtopo);
1495                 }
1496                 ret->value.devtopo = devtopo;
1497         }
1498 }
1499
1500 /* NVIDIA Compute Capability */
1501 void
1502 device_info_cc_nv(struct device_info_ret *ret,
1503         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1504         const struct opt_out *output)
1505 {
1506         struct info_loc loc2 = *loc;
1507         cl_uint major = 0, minor = 0;
1508         _GET_VAL(ret, loc, major); /* MAJOR */
1509         if (!ret->err) {
1510                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
1511                 _GET_VAL(ret, &loc2, minor);
1512                 if (!ret->err) {
1513                         strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1514                 }
1515         }
1516         ret->value.u32v.s[0] = major;
1517         ret->value.u32v.s[1] = minor;
1518 }
1519
1520 /* AMD GFXIP */
1521 void
1522 device_info_gfxip_amd(struct device_info_ret *ret,
1523         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1524         const struct opt_out *output)
1525 {
1526         struct info_loc loc2 = *loc;
1527         cl_uint major = 0, minor = 0;
1528         _GET_VAL(ret, loc, major); /* MAJOR */
1529         if (!ret->err) {
1530                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD);
1531                 _GET_VAL(ret, &loc2, minor);
1532                 if (!ret->err) {
1533                         strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1534                 }
1535         }
1536         ret->value.u32v.s[0] = major;
1537         ret->value.u32v.s[1] = minor;
1538 }
1539
1540
1541 /* Device Partition, CLINFO_HUMAN header */
1542 void
1543 device_info_partition_header(struct device_info_ret *ret,
1544         const struct info_loc *UNUSED(loc), const struct device_info_checks *chk,
1545         const struct opt_out* UNUSED(output))
1546 {
1547         cl_bool is_12 = dev_is_12(chk);
1548         cl_bool has_fission = dev_has_fission(chk);
1549         size_t szval = strbuf_printf(&ret->str, "(%s%s%s%s)",
1550                 (is_12 ? core : empty_str),
1551                 (is_12 && has_fission ? comma_str : empty_str),
1552                 chk->has_fission,
1553                 (!(is_12 || has_fission) ? na : empty_str));
1554
1555         ret->err = CL_SUCCESS;
1556
1557         if (szval >= ret->str.sz)
1558                 trunc_strbuf(&ret->str);
1559 }
1560
1561 /* Device partition properties */
1562 void
1563 device_info_partition_types(struct device_info_ret *ret,
1564         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1565         const struct opt_out *output)
1566 {
1567         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1568         cl_device_partition_property *val = NULL;
1569         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1570                 partition_type_str : partition_type_raw_str);
1571
1572         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1573
1574         GET_VAL_ARRAY(ret, loc);
1575
1576         szval = 0;
1577         if (!ret->err) {
1578                 for (cursor = 0; cursor < numval; ++cursor) {
1579                         int str_idx = -1;
1580
1581                         /* add separator for values past the first */
1582                         add_separator(&ret->str, &szval);
1583
1584                         switch (val[cursor]) {
1585                         case 0: str_idx = 0; break;
1586                         case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break;
1587                         case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break;
1588                         case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break;
1589                         case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break;
1590                         default:
1591                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIxPTR ")", val[cursor]);
1592                                 break;
1593                         }
1594                         if (str_idx >= 0) {
1595                                 /* string length, minus _EXT */
1596                                 slen = strlen(ptstr[str_idx]);
1597                                 if (output->mode == CLINFO_RAW && str_idx > 0)
1598                                         slen -= 4;
1599                                 szval += bufcpy_len(&ret->str, szval, ptstr[str_idx], slen);
1600                         }
1601                         if (szval >= ret->str.sz) {
1602                                 trunc_strbuf(&ret->str);
1603                                 break;
1604                         }
1605                 }
1606                 // TODO ret->value.??? = val
1607         }
1608         free(val);
1609 }
1610
1611 void
1612 device_info_partition_types_ext(struct device_info_ret *ret,
1613         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1614         const struct opt_out *output)
1615 {
1616         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1617         cl_device_partition_property_ext *val = NULL;
1618         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1619                 partition_type_str : partition_type_raw_str);
1620
1621         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1622
1623         GET_VAL_ARRAY(ret, loc);
1624
1625         szval = 0;
1626         if (!ret->err) {
1627                 for (cursor = 0; cursor < numval; ++cursor) {
1628                         int str_idx = -1;
1629
1630                         /* add separator for values past the first */
1631                         add_separator(&ret->str, &szval);
1632
1633                         switch (val[cursor]) {
1634                         case 0: str_idx = 0; break;
1635                         case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break;
1636                         case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break;
1637                         case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break;
1638                         case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break;
1639                         default:
1640                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIx64 ")", val[cursor]);
1641                                 break;
1642                         }
1643                         if (str_idx >= 0) {
1644                                 /* string length */
1645                                 slen = strlen(ptstr[str_idx]);
1646                                 strncpy(ret->str.buf + szval, ptstr[str_idx], slen);
1647                                 szval += slen;
1648                         }
1649                         if (szval >= ret->str.sz) {
1650                                 trunc_strbuf(&ret->str);
1651                                 break;
1652                         }
1653                 }
1654                 if (szval < ret->str.sz)
1655                         ret->str.buf[szval] = '\0';
1656                 // TODO ret->value.??? = val
1657         }
1658         free(val);
1659 }
1660
1661
1662 /* Device partition affinity domains */
1663 void
1664 device_info_partition_affinities(struct device_info_ret *ret,
1665         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1666         const struct opt_out *output)
1667 {
1668         DEV_FETCH(cl_device_affinity_domain, val);
1669         if (!ret->err && val) {
1670                 /* iterate over affinity domain strings appending their textual form
1671                  * to ret->str */
1672                 size_t szval = 0;
1673                 cl_uint i = 0;
1674                 const char * const *affstr = (output->mode == CLINFO_HUMAN ?
1675                         affinity_domain_str : affinity_domain_raw_str);
1676                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1677                 for (i = 0; i < affinity_domain_count; ++i) {
1678                         cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i;
1679                         if (val & cur) {
1680                                 /* match: add separator if not first match */
1681                                 add_separator(&ret->str, &szval);
1682                                 szval += bufcpy(&ret->str, szval, affstr[i]);
1683                         }
1684                         if (szval >= ret->str.sz)
1685                                 break;
1686                 }
1687                 /* check for extra bits */
1688                 if (szval < ret->str.sz) {
1689                         cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1;
1690                         cl_device_affinity_domain extra = val & ~known_mask;
1691                         if (extra) {
1692                                 add_separator(&ret->str, &szval);
1693                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1694                         }
1695                 }
1696         }
1697 }
1698
1699 void
1700 device_info_partition_affinities_ext(struct device_info_ret *ret,
1701         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1702         const struct opt_out *output)
1703 {
1704         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1705         cl_device_partition_property_ext *val = NULL;
1706         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1707                 affinity_domain_ext_str : affinity_domain_raw_ext_str);
1708
1709         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1710
1711         GET_VAL_ARRAY(ret, loc);
1712
1713         szval = 0;
1714         if (!ret->err) {
1715                 for (cursor = 0; cursor < numval; ++cursor) {
1716                         int str_idx = -1;
1717
1718                         /* add separator for values past the first */
1719                         add_separator(&ret->str, &szval);
1720
1721                         switch (val[cursor]) {
1722                         case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break;
1723                         case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break;
1724                         case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break;
1725                         case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break;
1726                         case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break;
1727                         case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break;
1728                         default:
1729                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "<unknown> (%#" PRIx64 ")", val[cursor]);
1730                                 break;
1731                         }
1732                         if (str_idx >= 0) {
1733                                 /* string length */
1734                                 const char *str = ptstr[str_idx];
1735                                 slen = strlen(str);
1736                                 strncpy(ret->str.buf + szval, str, slen);
1737                                 szval += slen;
1738                         }
1739                         if (szval >= ret->str.sz) {
1740                                 trunc_strbuf(&ret->str);
1741                                 break;
1742                         }
1743                 }
1744                 ret->str.buf[szval] = '\0';
1745                 // TODO: ret->value.??? = val
1746         }
1747         free(val);
1748 }
1749
1750 /* Preferred / native vector widths */
1751 void
1752 device_info_vecwidth(struct device_info_ret *ret,
1753         const struct info_loc *loc, const struct device_info_checks *chk,
1754         const struct opt_out *output)
1755 {
1756         struct info_loc loc2 = *loc;
1757         cl_uint preferred = 0, native = 0;
1758         _GET_VAL(ret, loc, preferred);
1759         if (!ret->err) {
1760                 /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF,
1761                  * which is at +0x08 */
1762                 loc2.param.dev +=
1763                         (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30);
1764                 /* TODO update loc2.sname */
1765                 _GET_VAL(ret, &loc2, native);
1766
1767                 if (!ret->err) {
1768                         size_t szval = 0;
1769                         const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ?
1770                                 chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ?
1771                                 chk->has_double : NULL));
1772                         szval = strbuf_printf(&ret->str, "%8u / %-8u", preferred, native);
1773                         if (ext)
1774                                 sprintf(ret->str.buf + szval, " (%s)", *ext ? ext : na);
1775                 }
1776         }
1777         ret->value.u32v.s[0] = preferred;
1778         ret->value.u32v.s[1] = native;
1779 }
1780
1781 /* Floating-point configurations */
1782 void
1783 device_info_fpconf(struct device_info_ret *ret,
1784         const struct info_loc *loc, const struct device_info_checks *chk,
1785         const struct opt_out *output)
1786 {
1787         /* When in HUMAN output, we are called unconditionally,
1788          * so we have to do some manual checks ourselves */
1789         const cl_bool get_it = (output->mode != CLINFO_HUMAN) ||
1790                 (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) ||
1791                 (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) ||
1792                 (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk));
1793
1794         DEV_FETCH(cl_device_fp_config, val);
1795         /* Sanitize! */
1796         if (ret->err && !get_it) {
1797                 ret->err = CL_SUCCESS;
1798                 val = 0;
1799         }
1800
1801
1802         if (!ret->err) {
1803                 size_t szval = 0;
1804                 cl_uint i = 0;
1805                 const char * const *fpstr = (output->mode == CLINFO_HUMAN ?
1806                         fp_conf_str : fp_conf_raw_str);
1807                 set_separator(vbar_str);
1808                 if (output->mode == CLINFO_HUMAN) {
1809                         const char *why = na;
1810                         switch (loc->param.dev) {
1811                         case CL_DEVICE_HALF_FP_CONFIG:
1812                                 if (get_it)
1813                                         why = chk->has_half;
1814                                 break;
1815                         case CL_DEVICE_SINGLE_FP_CONFIG:
1816                                 why = core;
1817                                 break;
1818                         case CL_DEVICE_DOUBLE_FP_CONFIG:
1819                                 if (get_it)
1820                                         why = chk->has_double;
1821                                 break;
1822                         default:
1823                                 /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */
1824                                 fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname);
1825                         }
1826                         /* show 'why' it's being shown */
1827                         szval += strbuf_printf(&ret->str, "(%s)", why);
1828                 }
1829                 if (get_it) {
1830                         size_t num_flags = fp_conf_count;
1831                         /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered
1832                          * in the single-precision case. half and double don't consider it,
1833                          * so we skip it altogether */
1834                         if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG)
1835                                 num_flags -= 1;
1836
1837                         for (i = 0; i < num_flags; ++i) {
1838                                 cl_device_fp_config cur = (cl_device_fp_config)(1) << i;
1839                                 if (output->mode == CLINFO_HUMAN) {
1840                                         szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1841                                                 line_pfx, fpstr[i], bool_str[!!(val & cur)]);
1842                                 } else if (val & cur) {
1843                                         add_separator(&ret->str, &szval);
1844                                         szval += bufcpy(&ret->str, szval, fpstr[i]);
1845                                 }
1846                         }
1847                 }
1848         }
1849 }
1850
1851 /* Queue properties */
1852 void
1853 device_info_qprop(struct device_info_ret *ret,
1854         const struct info_loc *loc, const struct device_info_checks *chk,
1855         const struct opt_out *output)
1856 {
1857         DEV_FETCH(cl_command_queue_properties, val);
1858         if (!ret->err) {
1859                 size_t szval = 0;
1860                 cl_uint i = 0;
1861                 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
1862                         queue_prop_str : queue_prop_raw_str);
1863                 set_separator(vbar_str);
1864                 for (i = 0; i < queue_prop_count; ++i) {
1865                         cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i;
1866                         if (output->mode == CLINFO_HUMAN) {
1867                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1868                                         line_pfx, qpstr[i], bool_str[!!(val & cur)]);
1869                         } else if (val & cur) {
1870                                 add_separator(&ret->str, &szval);
1871                                 szval += bufcpy(&ret->str, szval, qpstr[i]);
1872                         }
1873                 }
1874                 if (output->mode == CLINFO_HUMAN && loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES &&
1875                         dev_has_intel_local_thread(chk))
1876                         sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1877                                 line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]);
1878         }
1879 }
1880
1881 /* Execution capbilities */
1882 void
1883 device_info_execap(struct device_info_ret *ret,
1884         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1885         const struct opt_out *output)
1886 {
1887         DEV_FETCH(cl_device_exec_capabilities, val);
1888         if (!ret->err) {
1889                 size_t szval = 0;
1890                 cl_uint i = 0;
1891                 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
1892                         execap_str : execap_raw_str);
1893                 set_separator(vbar_str);
1894                 for (i = 0; i < execap_count; ++i) {
1895                         cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i;
1896                         if (output->mode == CLINFO_HUMAN) {
1897                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1898                                         line_pfx, qpstr[i], bool_str[!!(val & cur)]);
1899                         } else if (val & cur) {
1900                                 add_separator(&ret->str, &szval);
1901                                 szval += bufcpy(&ret->str, szval, qpstr[i]);
1902                         }
1903                 }
1904         }
1905 }
1906
1907 /* Arch bits and endianness (HUMAN) */
1908 void
1909 device_info_arch(struct device_info_ret *ret,
1910         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1911         const struct opt_out *output)
1912 {
1913         DEV_FETCH(cl_uint, bits);
1914         struct info_loc loc2 = *loc;
1915         RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE);
1916
1917         if (!ret->err) {
1918                 DEV_FETCH_LOC(cl_bool, val, &loc2);
1919                 if (!ret->err) {
1920                         strbuf_printf(&ret->str, "%" PRIu32 ", %s", bits, endian_str[val]);
1921                 }
1922         }
1923 }
1924
1925 /* SVM capabilities */
1926 void
1927 device_info_svm_cap(struct device_info_ret *ret,
1928         const struct info_loc *loc, const struct device_info_checks *chk,
1929         const struct opt_out *output)
1930 {
1931         const cl_bool is_20 = dev_is_20(chk);
1932         const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES);
1933         const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk));
1934         DEV_FETCH(cl_device_svm_capabilities, val);
1935
1936         if (!ret->err) {
1937                 size_t szval = 0;
1938                 cl_uint i = 0;
1939                 const char * const *scstr = (output->mode == CLINFO_HUMAN ?
1940                         svm_cap_str : svm_cap_raw_str);
1941                 set_separator(vbar_str);
1942                 if (output->mode == CLINFO_HUMAN && checking_core) {
1943                         /* show 'why' it's being shown */
1944                         szval += strbuf_printf(&ret->str, "(%s%s%s)",
1945                                 (is_20 ? core : empty_str),
1946                                 (is_20 && has_amd_svm ? comma_str : empty_str),
1947                                 chk->has_amd_svm);
1948                 }
1949                 for (i = 0; i < svm_cap_count; ++i) {
1950                         cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i;
1951                         if (output->mode == CLINFO_HUMAN) {
1952                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
1953                                         line_pfx, scstr[i], bool_str[!!(val & cur)]);
1954                         } else if (val & cur) {
1955                                 add_separator(&ret->str, &szval);
1956                                 szval += bufcpy(&ret->str, szval, scstr[i]);
1957                         }
1958                 }
1959         }
1960 }
1961
1962 /* Device terminate capability */
1963 void
1964 device_info_terminate_capability(struct device_info_ret *ret,
1965         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1966         const struct opt_out *output)
1967 {
1968         DEV_FETCH(cl_device_terminate_capability_khr, val);
1969         if (!ret->err && val) {
1970                 /* iterate over terminate capability strings appending their textual form
1971                  * to ret->str */
1972                 size_t szval = 0;
1973                 cl_uint i = 0;
1974                 const char * const *capstr = (output->mode == CLINFO_HUMAN ?
1975                         terminate_capability_str : terminate_capability_raw_str);
1976                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1977                 for (i = 0; i < terminate_capability_count; ++i) {
1978                         cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i;
1979                         if (val & cur) {
1980                                 /* match: add separator if not first match */
1981                                 add_separator(&ret->str, &szval);
1982                                 szval += bufcpy(&ret->str, szval, capstr[i]);
1983                         }
1984                         if (szval >= ret->str.sz)
1985                                 break;
1986                 }
1987                 /* check for extra bits */
1988                 if (szval < ret->str.sz) {
1989                         cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1;
1990                         cl_device_terminate_capability_khr extra = val & ~known_mask;
1991                         if (extra) {
1992                                 add_separator(&ret->str, &szval);
1993                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1994                         }
1995                 }
1996         }
1997 }
1998
1999 void
2000 device_info_p2p_dev_list(struct device_info_ret *ret,
2001         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2002         const struct opt_out* UNUSED(output))
2003 {
2004         cl_device_id *val = NULL;
2005         size_t szval = 0, numval = 0;
2006         GET_VAL_ARRAY(ret, loc);
2007         if (!ret->err) {
2008                 size_t cursor = 0;
2009                 szval = 0;
2010                 for (cursor= 0; cursor < numval; ++cursor) {
2011                         if (szval > 0) {
2012                                 ret->str.buf[szval] = ' ';
2013                                 ++szval;
2014                         }
2015                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%p", (void*)val[cursor]);
2016                 }
2017                 // TODO: ret->value.??? = val;
2018         }
2019         free(val);
2020 }
2021
2022 void
2023 device_info_interop_list(struct device_info_ret *ret,
2024         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2025         const struct opt_out *output)
2026 {
2027         cl_uint *val = NULL;
2028         size_t szval = 0, numval = 0;
2029         GET_VAL_ARRAY(ret, loc);
2030         if (!ret->err) {
2031                 size_t cursor = 0;
2032                 const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops;
2033                 cl_uint human_raw = output->mode - CLINFO_HUMAN;
2034                 const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2035                 cl_bool first = CL_TRUE;
2036                 szval = 0;
2037                 for (cursor = 0; cursor < numval; ++cursor) {
2038                         cl_uint current = val[cursor];
2039                         if (!current && cursor < numval - 1) {
2040                                 /* A null value is used as group terminator, but we only print it
2041                                  * if it's not the final one
2042                                  */
2043                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", groupsep);
2044                                 first = CL_TRUE;
2045                         }
2046                         if (current) {
2047                                 cl_bool found = CL_FALSE;
2048                                 const cl_interop_name *n = cl_interop_names;
2049
2050                                 if (!first) {
2051                                         ret->str.buf[szval] = ' ';
2052                                         ++szval;
2053                                 }
2054
2055                                 while (n < interop_name_end) {
2056                                         if (current >= n->from && current <= n->to) {
2057                                                 found = CL_TRUE;
2058                                                 break;
2059                                         }
2060                                         ++n;
2061                                 }
2062                                 if (found) {
2063                                         cl_uint i = current - n->from;
2064                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", n->value[i][human_raw]);
2065                                 } else {
2066                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx32, val[cursor]);
2067                                 }
2068                                 first = CL_FALSE;
2069                         }
2070                         if (szval >= ret->str.sz) {
2071                                 trunc_strbuf(&ret->str);
2072                                 break;
2073                         }
2074                 }
2075                 // TODO: ret->value.??? = val;
2076         }
2077         free(val);
2078 }
2079
2080
2081 /*
2082  * Device info traits
2083  */
2084
2085 /* A CL_FALSE param means "just print pname" */
2086
2087 struct device_info_traits {
2088         enum output_modes output_mode;
2089         cl_device_info param; // CL_DEVICE_*
2090         const char *sname; // "CL_DEVICE_*"
2091         const char *pname; // "Device *"
2092         const char *sfx; // suffix for the output in non-raw mode
2093         /* pointer to function that retrieves the parameter */
2094         void (*show_func)(struct device_info_ret *,
2095                 const struct info_loc *, const struct device_info_checks *,
2096                 const struct opt_out *);
2097         /* pointer to function that checks if the parameter should be retrieved */
2098         cl_bool (*check_func)(const struct device_info_checks *);
2099 };
2100
2101 #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ
2102 #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ
2103
2104 struct device_info_traits dinfo_traits[] = {
2105         { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL },
2106         { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL },
2107         { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL },
2108         { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL },
2109         { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL },
2110         { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), dev_is_11 },
2111         { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL },
2112         { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL },
2113
2114         { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd },
2115         { CLINFO_BOTH, DINFO(CL_DEVICE_PCIE_ID_AMD, "Device PCI-e ID (AMD)", hex), dev_has_amd },
2116         { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd },
2117
2118         /* Device Topology (NV) is multipart, so different for HUMAN and RAW */
2119         { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv },
2120         { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv },
2121         { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv },
2122
2123         { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL },
2124         { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL },
2125         { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL },
2126         { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 },
2127
2128         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL },
2129         { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD, "Available core IDs", core_ids), dev_has_arm_core_id_v2 },
2130         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD, "Available core IDs", long), dev_has_arm_core_id_v2 },
2131         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd },
2132         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd },
2133         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd },
2134         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL },
2135
2136         /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */
2137         { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv },
2138         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv },
2139         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv },
2140
2141         /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */
2142         /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query
2143          * extension specification calls it “core engine GFXIP”, which honestly is not better than
2144          * our name choice. */
2145         { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd },
2146         { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd },
2147         { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd },
2148
2149         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp },
2150
2151         /* Device partition support: summary is only presented in HUMAN case */
2152         { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition },
2153         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 },
2154         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 },
2155         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 },
2156         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission },
2157         { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission },
2158
2159         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL },
2160         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL },
2161         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL },
2162
2163         /* cl_amd_device_attribute_query v4 */
2164         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 },
2165         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 },
2166
2167         { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple", wg), dev_has_compiler_11 },
2168         { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv },
2169         { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd },
2170         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 },
2171         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier },
2172         { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size },
2173
2174         /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs
2175          * PREFERRED and NATIVE in a single line */
2176 #define DINFO_VECWIDTH(Type, type) \
2177         { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \
2178         { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \
2179         { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), dev_is_11 }
2180
2181         { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL },
2182         DINFO_VECWIDTH(CHAR, char),
2183         DINFO_VECWIDTH(SHORT, short),
2184         DINFO_VECWIDTH(INT, int),
2185         DINFO_VECWIDTH(LONG, long),
2186         DINFO_VECWIDTH(HALF, half), /* this should be excluded for 1.0 */
2187         DINFO_VECWIDTH(FLOAT, float),
2188         DINFO_VECWIDTH(DOUBLE, double),
2189
2190         /* Floating point configurations */
2191 #define DINFO_FPCONF(Type, type, cond) \
2192         { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \
2193         { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond }
2194
2195         DINFO_FPCONF(HALF, Half, dev_has_half),
2196         DINFO_FPCONF(SINGLE, Single, NULL),
2197         DINFO_FPCONF(DOUBLE, Double, dev_has_double),
2198
2199         /* Address bits and endianness are written together for HUMAN, separate for RAW */
2200         { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL },
2201         { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL },
2202         { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL },
2203
2204         /* Global memory */
2205         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL },
2206         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd },
2207         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd },
2208         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd },
2209         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd },
2210         { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL },
2211         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL },
2212         { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), dev_is_11 },
2213         { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv },
2214
2215         { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm },
2216         { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm },
2217
2218         /* Alignment */
2219         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL },
2220         { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL },
2221         { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL },
2222
2223         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2224         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2225
2226         /* Atomics alignment, with HUMAN-only header */
2227         { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 },
2228         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 },
2229         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 },
2230         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 },
2231
2232         /* Global variables. TODO some 1.2 devices respond to this too */
2233         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 },
2234         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 },
2235
2236         /* Global memory cache */
2237         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL },
2238         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache },
2239         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache },
2240
2241         /* Image support */
2242         { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL },
2243         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images },
2244         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 },
2245         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 },
2246         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer },
2247         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer },
2248
2249         /* Image dimensions are split for RAW, combined for HUMAN */
2250         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size",  pixels_str, img_sz_2d), dev_has_images },
2251         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height",  sz), dev_has_images },
2252         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width",  sz), dev_has_images },
2253         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size",  pixels_str, img_sz_2d), dev_has_intel_planar_yuv },
2254         { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height",  sz), dev_has_intel_planar_yuv },
2255         { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width",  sz), dev_has_intel_planar_yuv },
2256         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size",  pixels_str, img_sz_3d), dev_has_images },
2257         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height",  sz), dev_has_images },
2258         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width",  sz), dev_has_images },
2259         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth",  sz), dev_has_images },
2260
2261         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images },
2262         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images },
2263         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 },
2264
2265         /* Pipes */
2266         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 },
2267         { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 },
2268         { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 },
2269
2270         /* Local memory */
2271         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL },
2272         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem },
2273         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory syze per CU (AMD)", mem), dev_is_gpu_amd },
2274         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd },
2275         { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv },
2276
2277         /* Constant memory */
2278         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL },
2279         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL },
2280         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 },
2281
2282         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL },
2283         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters },
2284
2285         /* Queue properties */
2286         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 },
2287         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 },
2288         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 },
2289         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 },
2290         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 },
2291         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 },
2292         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 },
2293
2294         /* Terminate context */
2295         { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context },
2296         { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context },
2297
2298         /* Interop */
2299         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 },
2300         { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing },
2301         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing },
2302
2303         /* P2P buffer copy */
2304         { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p },
2305         { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p },
2306
2307         /* Profiling resolution */
2308         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL },
2309         { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd },
2310         { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd },
2311
2312         /* Kernel execution capabilities */
2313         { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL },
2314         { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 },
2315         { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd },
2316         { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv },
2317         { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv },
2318         { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv },
2319         { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 },
2320         /* TODO FIXME undocumented, experimental */
2321         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 },
2322         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 },
2323
2324         /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */
2325         { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il },
2326         { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir },
2327         { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 },
2328         { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 },
2329         { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME },
2330         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME },
2331         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME },
2332         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME },
2333 };
2334
2335 /* Process all the device info in the traits, except if param_whitelist is not NULL,
2336  * in which case only those in the whitelist will be processed.
2337  * If present, the whitelist should be sorted in the order of appearance of the parameters
2338  * in the traits table, and terminated by the value CL_FALSE
2339  */
2340
2341 void
2342 printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p,
2343         const cl_device_info *param_whitelist, /* list of device info to process, or NULL */
2344         const struct opt_out *output)
2345 {
2346         char *extensions = NULL;
2347
2348         /* pointer to the traits for CL_DEVICE_EXTENSIONS */
2349         const struct device_info_traits *extensions_traits = NULL;
2350
2351         struct device_info_checks chk;
2352         struct device_info_ret ret;
2353         struct info_loc loc;
2354
2355         memset(&chk, 0, sizeof(chk));
2356         chk.pinfo_checks = plist->platform_checks + p;
2357         chk.dev_version = 10;
2358
2359         INIT_RET(ret, "device");
2360
2361         reset_loc(&loc, __func__);
2362         loc.plat = plist->platform[p];
2363         loc.dev = dev;
2364
2365         for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) {
2366
2367                 const struct device_info_traits *traits = dinfo_traits + loc.line;
2368
2369                 /* checked is true if there was no condition to check for, or if the
2370                  * condition was satisfied
2371                  */
2372                 int checked = !(traits->check_func && !traits->check_func(&chk));
2373
2374                 loc.sname = traits->sname;
2375                 loc.pname = (output->mode == CLINFO_HUMAN ?
2376                         traits->pname : traits->sname);
2377                 loc.param.dev = traits->param;
2378
2379                 /* Whitelist check: finish if done traversing the list,
2380                  * skip current param if it's not the right one
2381                  */
2382                 if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) {
2383                         if (*param_whitelist == CL_FALSE)
2384                                 break;
2385                         if (traits->param != *param_whitelist)
2386                                 continue;
2387                         ++param_whitelist;
2388                 }
2389
2390                 /* skip if it's not for this output mode */
2391                 if (!(output->mode & traits->output_mode))
2392                         continue;
2393
2394                 if (output->cond == COND_PROP_CHECK && !checked)
2395                         continue;
2396
2397                 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
2398
2399                 ret.str.buf[0] = '\0';
2400                 ret.err_str.buf[0] = '\0';
2401
2402                 /* Handle headers */
2403                 if (traits->param == CL_FALSE) {
2404                         ret.err = CL_SUCCESS;
2405                         show_strbuf(&ret.str, loc.pname, 0, ret.err);
2406                         continue;
2407                 }
2408
2409                 traits->show_func(&ret, &loc, &chk, output);
2410
2411                 if (traits->param == CL_DEVICE_EXTENSIONS) {
2412                         /* make a backup of the extensions string, regardless of
2413                          * errors */
2414                         const char *msg = RET_BUF(ret)->buf;
2415                         size_t len = strlen(msg);
2416                         extensions_traits = traits;
2417                         ALLOC(extensions, len+1, "extensions");
2418                         memcpy(extensions, msg, len);
2419                         extensions[len] = '\0';
2420                 } else {
2421                         if (ret.err) {
2422                                 /* if there was an error retrieving the property,
2423                                  * skip if it wasn't expected to work and we
2424                                  * weren't asked to show everything regardless of
2425                                  * error */
2426                                 if (!checked && output->cond != COND_PROP_SHOW)
2427                                         continue;
2428
2429                         } else {
2430                                 /* on success, but empty result, show (n/a) */
2431                                 if (ret.str.buf[0] == '\0')
2432                                         bufcpy(&ret.str, 0, not_specified(output));
2433                         }
2434                         if (output->brief)
2435                                 printf("%s%s\n", line_pfx, RET_BUF(ret)->buf);
2436                         else
2437                                 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
2438                 }
2439
2440                 if (ret.err)
2441                         continue;
2442
2443                 switch (traits->param) {
2444                 case CL_DEVICE_VERSION:
2445                         /* compute numeric value for OpenCL version */
2446                         chk.dev_version = getOpenCLVersion(ret.str.buf + 7);
2447                         break;
2448                 case CL_DEVICE_EXTENSIONS:
2449                         identify_device_extensions(extensions, &chk);
2450                         break;
2451                 case CL_DEVICE_TYPE:
2452                         chk.devtype = ret.value.devtype;
2453                         break;
2454                 case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
2455                         chk.cachetype = ret.value.cachetype;
2456                         break;
2457                 case CL_DEVICE_LOCAL_MEM_TYPE:
2458                         chk.lmemtype = ret.value.lmemtype;
2459                         break;
2460                 case CL_DEVICE_IMAGE_SUPPORT:
2461                         chk.image_support = ret.value.b;
2462                         break;
2463                 case CL_DEVICE_COMPILER_AVAILABLE:
2464                         chk.compiler_available = ret.value.b;
2465                         break;
2466                 default:
2467                         /* do nothing */
2468                         break;
2469                 }
2470         }
2471
2472         // and finally the extensions, if we retrieved them
2473         if (extensions)
2474                 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2475                                 extensions_traits->pname :
2476                                 extensions_traits->sname), extensions);
2477         free(extensions);
2478         extensions = NULL;
2479         UNINIT_RET(ret);
2480 }
2481
2482 /* list of allowed properties for AMD offline devices */
2483 /* everything else seems to be set to 0, and all the other string properties
2484  * actually segfault the driver */
2485
2486 static const cl_device_info amd_offline_info_whitelist[] = {
2487         CL_DEVICE_NAME,
2488         /* These are present, but all the same, so just skip them:
2489         CL_DEVICE_VENDOR,
2490         CL_DEVICE_VENDOR_ID,
2491         CL_DEVICE_VERSION,
2492         CL_DRIVER_VERSION,
2493         CL_DEVICE_OPENCL_C_VERSION,
2494         */
2495         CL_DEVICE_EXTENSIONS,
2496         CL_DEVICE_TYPE,
2497         CL_DEVICE_GFXIP_MAJOR_AMD,
2498         CL_DEVICE_GFXIP_MINOR_AMD,
2499         CL_DEVICE_MAX_WORK_GROUP_SIZE,
2500         CL_FALSE
2501 };
2502
2503 static const cl_device_info list_info_whitelist[] = {
2504         CL_DEVICE_NAME,
2505         CL_FALSE
2506 };
2507
2508 /* return a list of offline devices from the AMD extension */
2509 cl_device_id *
2510 fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p,
2511         /* the number of devices will be returned in ret->value.u32,
2512          * the associated context in ret->base.ctx;
2513          */
2514         struct device_info_ret *ret)
2515 {
2516         cl_platform_id pid = plist->platform[p];
2517         cl_device_id *device = NULL;
2518         cl_uint num_devs = 0;
2519         cl_context ctx = NULL;
2520
2521         cl_context_properties ctxpft[] = {
2522                 CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
2523                 CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE,
2524                 0
2525         };
2526
2527         ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL,
2528                 NULL, NULL, &ret->err);
2529         REPORT_ERROR(&ret->err_str, ret->err, "create context");
2530
2531         if (!ret->err) {
2532                 ret->err = REPORT_ERROR(&ret->err_str,
2533                         clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES,
2534                                 sizeof(num_devs), &num_devs, NULL),
2535                         "get num devs");
2536         }
2537
2538         if (!ret->err) {
2539                 ALLOC(device, num_devs, "offline devices");
2540
2541                 ret->err = REPORT_ERROR(&ret->err_str,
2542                         clGetContextInfo(ctx, CL_CONTEXT_DEVICES,
2543                                 num_devs*sizeof(*device), device, NULL),
2544                         "get devs");
2545         }
2546
2547         if (ret->err) {
2548                 if (ctx) clReleaseContext(ctx);
2549                 free(device);
2550                 device = NULL;
2551         } else {
2552                 ret->value.u32 = num_devs;
2553                 ret->base.ctx = ctx;
2554         }
2555         return device;
2556 }
2557
2558 void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str,
2559         const struct opt_out *output)
2560 {
2561         const struct platform_data *pdata = plist->pdata + p;
2562         const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : "");
2563         const char *title = (output->mode == CLINFO_HUMAN  ? pinfo_traits[0].pname :
2564                 pinfo_traits[0].sname);
2565         const int prefix_width = -line_pfx_len*(!output->brief);
2566         if (output->brief) {
2567                 strbuf_printf(str, "%s%" PRIu32 ": ", brief_prefix, p);
2568         } else if (output->mode == CLINFO_RAW) {
2569                 strbuf_printf(str, "[%s/*]", pdata->sname);
2570         }
2571         sprintf(line_pfx, "%*s", prefix_width, str->buf);
2572
2573         if (output->brief)
2574                 printf("%s%s\n", line_pfx, pdata->pname);
2575         else
2576                 printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname);
2577 }
2578
2579 void printPlatformDevices(const struct platform_list *plist, cl_uint p,
2580         const cl_device_id *device, cl_uint ndevs,
2581         struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline)
2582 {
2583         const struct platform_data *pdata = plist->pdata + p;
2584         const cl_device_info *param_whitelist = output->brief ? list_info_whitelist :
2585                 these_are_offline ? amd_offline_info_whitelist : NULL;
2586         cl_uint d;
2587
2588         if (output->detailed)
2589                 printf("%s" I0_STR "%" PRIu32 "\n",
2590                         line_pfx,
2591                         num_devs_header(output, these_are_offline),
2592                         ndevs);
2593
2594         for (d = 0; d < ndevs; ++d) {
2595                 const cl_device_id dev = device[d];
2596                 if (output->brief) {
2597                         const cl_bool last_device = (d == ndevs - 1 &&
2598                                 output->mode != CLINFO_RAW &&
2599                                 (!output->offline ||
2600                                  !pdata->has_amd_offline ||
2601                                  these_are_offline));
2602                         if (output->mode == CLINFO_RAW)
2603                                 sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ",
2604                                         p,
2605                                         these_are_offline ? '*' : '.',
2606                                         d);
2607                         else
2608                                 sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ",
2609                                         these_are_offline ? "Offline " : "",
2610                                         d);
2611                         if (last_device)
2612                                 line_pfx[1] = '`';
2613                 } else if (line_pfx_len > 0) {
2614                         cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d;
2615                         strbuf_printf(str, "[%s/%" PRId32 "]", pdata->sname, sd);
2616                         sprintf(line_pfx, "%*s", -line_pfx_len, str->buf);
2617                 }
2618                 printDeviceInfo(dev, plist, p, param_whitelist, output);
2619                 if (output->detailed && d < pdata[p].ndevs - 1)
2620                         puts("");
2621                 fflush(stdout);
2622                 fflush(stderr);
2623         }
2624 }
2625
2626
2627 void showDevices(const struct platform_list *plist, const struct opt_out *output)
2628 {
2629         const cl_uint num_platforms = plist->num_platforms;
2630         const cl_uint maxdevs = plist->max_devs;
2631         const struct platform_data *pdata = plist->pdata;
2632
2633         cl_uint p;
2634         struct _strbuf str;
2635         init_strbuf(&str);
2636         realloc_strbuf(&str, 1024, "show devices");
2637
2638         if (output->mode == CLINFO_RAW) {
2639                 if (output->brief)
2640                         strbuf_printf(&str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs);
2641                 else
2642                         strbuf_printf(&str, "[%*s/%" PRIu32 "] ",
2643                                 plist->max_sname_len, "", maxdevs);
2644         } else {
2645                 if (output->brief)
2646                         strbuf_printf(&str, " +-- %sDevice #%" PRIu32 ": ",
2647                                 (output->offline ? "Offline " : ""), maxdevs);
2648                 else
2649                         str.buf[0] = '\0'; /* reset */
2650                 /* TODO we have no prefix in HUMAN detailed output mode,
2651                  * consider adding one
2652                  */
2653         }
2654
2655         if (str.buf[0]) {
2656                 line_pfx_len = (int)(strlen(str.buf) + 1);
2657                 REALLOC(line_pfx, line_pfx_len, "line prefix");
2658                 str.buf[0] = '\0'; /* reset */
2659         }
2660
2661         for (p = 0; p < num_platforms; ++p) {
2662                 printPlatformName(plist, p, &str, output);
2663
2664                 printPlatformDevices(plist, p,
2665                         get_platform_devs(plist, p), pdata[p].ndevs,
2666                         &str, output, CL_FALSE);
2667
2668                 if (output->offline && pdata[p].has_amd_offline) {
2669                         struct device_info_ret ret;
2670                         cl_device_id *devs = NULL;
2671
2672                         INIT_RET(ret, "offline device");
2673                         if (output->detailed)
2674                                 puts("");
2675
2676                         devs = fetchOfflineDevicesAMD(plist, p, &ret);
2677                         if (ret.err) {
2678                                 puts(ret.err_str.buf);
2679                         } else {
2680                                 printPlatformDevices(plist, p, devs, ret.value.u32,
2681                                         &str, output, CL_TRUE);
2682                                 clReleaseContext(ret.base.ctx);
2683                                 free(devs);
2684                         }
2685                         UNINIT_RET(ret);
2686                 }
2687                 if (output->detailed)
2688                         puts("");
2689         }
2690         free_strbuf(&str);
2691 }
2692
2693 /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */
2694 void checkNullGetPlatformName(const struct opt_out *output)
2695 {
2696         struct device_info_ret ret;
2697         struct info_loc loc;
2698
2699         INIT_RET(ret, "null ctx");
2700         reset_loc(&loc, __func__);
2701         RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME);
2702
2703         ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL);
2704         if (ret.err == CL_INVALID_PLATFORM) {
2705                 bufcpy(&ret.err_str, 0, no_plat(output));
2706         } else {
2707                 loc.line = __LINE__ + 1;
2708                 REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s");
2709         }
2710         printf(I1_STR "%s\n",
2711                 "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf);
2712         UNINIT_RET(ret);
2713 }
2714
2715 /* check the behavior of clGetDeviceIDs() when given a NULL platform ID;
2716  * return the index of the default platform in our array of platform IDs,
2717  * or num_platforms (which is an invalid platform index) in case of errors
2718  * or no platform or device found.
2719  */
2720 cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output)
2721 {
2722         const cl_uint num_platforms = plist->num_platforms;
2723         const struct platform_data *pdata = plist->pdata;
2724         const cl_platform_id *platform = plist->platform;
2725
2726         struct device_info_ret ret;
2727         struct info_loc loc;
2728
2729         cl_uint i = 0; /* generic iterator */
2730         cl_device_id dev = NULL; /* sample device */
2731         cl_platform_id plat = NULL; /* detected platform */
2732
2733         cl_uint found = 0; /* number of platforms found */
2734         cl_uint pidx = num_platforms; /* index of the platform found */
2735         cl_uint numdevs = 0;
2736
2737         INIT_RET(ret, "null get devices");
2738
2739         reset_loc(&loc, __func__);
2740         loc.sname = "device IDs";
2741
2742         ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs);
2743         /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart
2744          * implementation might give you a different default platform for GPUs
2745          * and for CPUs.
2746          * Of course the “no devices” case would then need to be handled differently.
2747          * The logic might be maintained similarly, provided we also gather
2748          * the number of devices of each type for each platform, although it's
2749          * obviously more likely to have multiple platforms with no devices
2750          * of a given type.
2751          */
2752
2753         switch (ret.err) {
2754         case CL_INVALID_PLATFORM:
2755                 bufcpy(&ret.err_str, 0, no_plat(output));
2756                 break;
2757         case CL_DEVICE_NOT_FOUND:
2758                  /* No devices were found, see if there are platforms with
2759                   * no devices, and if there's only one, assume this is the
2760                   * one being used as default by the ICD loader */
2761                 for (i = 0; i < num_platforms; ++i) {
2762                         if (pdata[i].ndevs == 0) {
2763                                 ++found;
2764                                 if (found > 1)
2765                                         break;
2766                                 else {
2767                                         plat = platform[i];
2768                                         pidx = i;
2769                                 }
2770                         }
2771                 }
2772
2773                 switch (found) {
2774                 case 0:
2775                         bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
2776                                 "<error: 0 devices, no matching platform!>" :
2777                                 "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM"));
2778                         break;
2779                 case 1:
2780                         bufcpy(&ret.str, 0, (output->mode == CLINFO_HUMAN ?
2781                                 pdata[pidx].pname :
2782                                 pdata[pidx].sname));
2783                         break;
2784                 default: /* found > 1 */
2785                         bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
2786                                 "<error: 0 devices, multiple matching platforms!>" :
2787                                 "CL_DEVICE_NOT_FOUND | ????"));
2788                         break;
2789                 }
2790                 break;
2791         default:
2792                 loc.line = __LINE__+1;
2793                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break;
2794
2795                 /* Determine platform by looking at the CL_DEVICE_PLATFORM of
2796                  * one of the devices */
2797                 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL);
2798                 loc.line = __LINE__+1;
2799                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2800
2801                 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
2802                 ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM,
2803                         sizeof(plat), &plat, NULL);
2804                 loc.line = __LINE__+1;
2805                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2806
2807                 for (i = 0; i < num_platforms; ++i) {
2808                         if (platform[i] == plat) {
2809                                 pidx = i;
2810                                 strbuf_printf(&ret.str, "%s [%s]",
2811                                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2812                                         pdata[i].sname);
2813                                 break;
2814                         }
2815                 }
2816                 if (i == num_platforms) {
2817                         ret.err = CL_INVALID_PLATFORM;
2818                         strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
2819                 }
2820         }
2821         printf(I1_STR "%s\n",
2822                 "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf);
2823
2824         UNINIT_RET(ret);
2825         return pidx;
2826 }
2827
2828 void checkNullCtx(struct device_info_ret *ret,
2829         const struct platform_list *plist, cl_uint pidx, const char *which,
2830         const struct opt_out *output)
2831 {
2832         const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx];
2833         struct info_loc loc;
2834         cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err);
2835
2836         reset_loc(&loc, __func__);
2837         loc.sname = which;
2838         loc.line = __LINE__+2;
2839
2840         if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform"))
2841                 strbuf_printf(&ret->str, "%s [%s]",
2842                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2843                         plist->pdata[pidx].sname);
2844         if (ctx) {
2845                 clReleaseContext(ctx);
2846                 ctx = NULL;
2847         }
2848 }
2849
2850 /* check behavior of clCreateContextFromType() with NULL cl_context_properties */
2851 void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output)
2852 {
2853         const cl_uint num_platforms = plist->num_platforms;
2854         const struct platform_data *pdata = plist->pdata;
2855         const cl_platform_id *platform = plist->platform;
2856
2857         size_t t; /* type iterator */
2858         size_t i; /* generic iterator */
2859         char def[1024];
2860         cl_context ctx = NULL;
2861
2862         size_t ndevs = 8;
2863         size_t szval = 0;
2864         size_t cursz = ndevs*sizeof(cl_device_id);
2865         cl_platform_id plat = NULL;
2866         cl_device_id *devs = NULL;
2867
2868         struct device_info_ret ret;
2869         struct info_loc loc;
2870
2871         const char *platname_prop = (output->mode == CLINFO_HUMAN ?
2872                 pinfo_traits[0].pname :
2873                 pinfo_traits[0].sname);
2874
2875         const char *devname_prop = (output->mode == CLINFO_HUMAN ?
2876                 dinfo_traits[0].pname :
2877                 dinfo_traits[0].sname);
2878
2879         reset_loc(&loc, __func__);
2880         INIT_RET(ret, "null ctx from type");
2881
2882         ALLOC(devs, ndevs, "context devices");
2883
2884         for (t = 1; t < devtype_count; ++t) { /* we skip 0 */
2885                 loc.sname = device_type_raw_str[t];
2886
2887                 strbuf_printf(&ret.str, "clCreateContextFromType(NULL, %s)", loc.sname);
2888                 sprintf(def, I1_STR, ret.str.buf);
2889
2890                 loc.line = __LINE__+1;
2891                 ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err);
2892
2893                 switch (ret.err) {
2894                 case CL_INVALID_PLATFORM:
2895                         bufcpy(&ret.err_str, 0, no_plat(output)); break;
2896                 case CL_DEVICE_NOT_FOUND:
2897                         bufcpy(&ret.err_str, 0, no_dev_found(output)); break;
2898                 case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */
2899                         bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
2900                 case CL_INVALID_VALUE: /* This is what apple returns for the case above */
2901                         bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
2902                 case CL_DEVICE_NOT_AVAILABLE:
2903                         bufcpy(&ret.err_str, 0, no_dev_avail(output)); break;
2904                 default:
2905                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break;
2906
2907                         /* get the devices */
2908                         loc.sname = "CL_CONTEXT_DEVICES";
2909                         loc.line = __LINE__+2;
2910
2911                         ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval);
2912                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break;
2913                         if (szval > cursz) {
2914                                 REALLOC(devs, szval, "context devices");
2915                                 cursz = szval;
2916                         }
2917
2918                         loc.line = __LINE__+1;
2919                         ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL);
2920                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2921                         ndevs = szval/sizeof(cl_device_id);
2922                         if (ndevs < 1) {
2923                                 ret.err = CL_DEVICE_NOT_FOUND;
2924                                 bufcpy(&ret.err_str, 0, "<error: context created with no devices>");
2925                         }
2926
2927                         /* get the platform from the first device */
2928                         RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
2929                         loc.line = __LINE__+1;
2930                         ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
2931                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2932                         loc.plat = plat;
2933
2934                         szval = 0;
2935                         for (i = 0; i < num_platforms; ++i) {
2936                                 if (platform[i] == plat)
2937                                         break;
2938                         }
2939                         if (i == num_platforms) {
2940                                 ret.err = CL_INVALID_PLATFORM;
2941                                 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
2942                                 break;
2943                         } else {
2944                                 szval += strbuf_printf(&ret.str, "%s (%" PRIuS ")",
2945                                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
2946                                         ndevs);
2947                                 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR "%s",
2948                                         platname_prop, pdata[i].pname);
2949                         }
2950                         for (i = 0; i < ndevs; ++i) {
2951                                 size_t szname = 0;
2952                                 /* for each device, show the device name */
2953                                 /* TODO some other unique ID too, e.g. PCI address, if available? */
2954
2955                                 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR, devname_prop);
2956                                 if (szval >= ret.str.sz) {
2957                                         trunc_strbuf(&ret.str);
2958                                         break;
2959                                 }
2960
2961                                 RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME);
2962                                 loc.dev = devs[i];
2963                                 loc.line = __LINE__+1;
2964                                 ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - szval, ret.str.buf + szval, &szname);
2965                                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
2966                                 szval += szname - 1;
2967                         }
2968                         if (i != ndevs)
2969                                 break; /* had an error earlier, bail */
2970
2971                 }
2972
2973                 if (ctx) {
2974                         clReleaseContext(ctx);
2975                         ctx = NULL;
2976                 }
2977                 printf("%s%s\n", def, RET_BUF(ret)->buf);
2978         }
2979         free(devs);
2980         UNINIT_RET(ret);
2981 }
2982
2983 /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices)
2984  * and in clCreateContext() */
2985 void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output)
2986 {
2987         const cl_uint num_platforms = plist->num_platforms;
2988         const struct platform_data *pdata = plist->pdata;
2989
2990         cl_uint p = 0;
2991         struct device_info_ret ret;
2992
2993         INIT_RET(ret, "null behavior");
2994
2995         printf("NULL platform behavior\n");
2996
2997         checkNullGetPlatformName(output);
2998
2999         p = checkNullGetDevices(plist, output);
3000
3001         /* If there's a default platform, and it has devices, try
3002          * creating a context with its first device and see if it works */
3003
3004         if (p == num_platforms) {
3005                 ret.err = CL_INVALID_PLATFORM;
3006                 bufcpy(&ret.err_str, 0, no_plat(output));
3007         } else if (pdata[p].ndevs == 0) {
3008                 ret.err = CL_DEVICE_NOT_FOUND;
3009                 bufcpy(&ret.err_str, 0, no_dev_found(output));
3010         } else {
3011                 if (p < num_platforms) {
3012                         checkNullCtx(&ret, plist, p, "default", output);
3013                 } else {
3014                         /* this shouldn't happen, but still ... */
3015                         ret.err = CL_OUT_OF_HOST_MEMORY;
3016                         bufcpy(&ret.err_str, 0, "<error: overflow in default platform scan>");
3017                 }
3018         }
3019         printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf);
3020
3021         /* Look for a device from a non-default platform, if there are any */
3022         if (p == num_platforms || num_platforms > 1) {
3023                 cl_uint p2 = 0;
3024                 while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) {
3025                         p2++;
3026                 }
3027                 if (p2 < num_platforms) {
3028                         checkNullCtx(&ret, plist, p2, "non-default", output);
3029                 } else {
3030                         ret.err = CL_DEVICE_NOT_FOUND;
3031                         bufcpy(&ret.str, 0, "<error: no devices in non-default plaforms>");
3032                 }
3033                 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf);
3034         }
3035
3036         checkNullCtxFromType(plist, output);
3037
3038         UNINIT_RET(ret);
3039 }
3040
3041
3042 /* Get properties of the ocl-icd loader, if available */
3043 /* All properties are currently char[] */
3044
3045 /* Function pointer to the ICD loader info function */
3046
3047 typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*);
3048 icdl_info_fn_ptr clGetICDLoaderInfoOCLICD;
3049
3050 /* We want to auto-detect the OpenCL version supported by the ICD loader.
3051  * To do this, we will progressively find symbols introduced in new APIs,
3052  * until a NULL symbol is found.
3053  */
3054
3055 struct icd_loader_test {
3056         cl_uint version;
3057         const char *symbol;
3058 } icd_loader_tests[] = {
3059         { 11, "clCreateSubBuffer" },
3060         { 12, "clCreateImage" },
3061         { 20, "clSVMAlloc" },
3062         { 21, "clGetHostTimer" },
3063         { 22, "clSetProgramSpecializationConstant" },
3064         { 0, NULL }
3065 };
3066
3067 void
3068 icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc)
3069 {
3070         GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl);
3071         return;
3072 }
3073
3074 struct icdl_info_traits {
3075         cl_icdl_info param; // CL_ICDL_*
3076         const char *sname; // "CL_ICDL_*"
3077         const char *pname; // "ICD loader *"
3078 };
3079
3080 static const char * const oclicdl_pfx = "OCLICD";
3081
3082 #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name }
3083 struct icdl_info_traits linfo_traits[] = {
3084         LINFO(CL_ICDL_NAME, "Name"),
3085         LINFO(CL_ICDL_VENDOR, "Vendor"),
3086         LINFO(CL_ICDL_VERSION, "Version"),
3087         LINFO(CL_ICDL_OCL_VERSION, "Profile")
3088 };
3089
3090 /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress,
3091  * which returns a void pointer.
3092  * ISO C forbids assignments between function pointers and void pointers,
3093  * but POSIX allows it. To compile without warnings even in -pedantic mode,
3094  * we take advantage of the fact that we _can_ do the conversion via
3095  * pointers-to-pointers. This is supported on most compilers, except
3096  * for some rather old GCC versions whose strict aliasing rules are
3097  * too strict. Disable strict aliasing warnings for these compilers.
3098  */
3099 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3100 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
3101 #endif
3102
3103 struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output)
3104 {
3105         const cl_uint max_plat_version = plist->max_plat_version;
3106
3107         struct icdl_data icdl;
3108
3109         /* Counter that'll be used to walk the icd_loader_tests */
3110         int i = 0;
3111
3112         /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used
3113          * to query the ICD loader properties.
3114          * It should be noted that in this specific case we cannot replace the
3115          * call to clGetExtensionFunctionAddress with a call to the superseding function
3116          * clGetExtensionFunctionAddressForPlatform because the extension is in the
3117          * loader itself, not in a specific platform.
3118          */
3119         void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD");
3120         clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack);
3121
3122         /* Initialize icdl_data ret versions */
3123         icdl.detected_version = 10;
3124         icdl.reported_version = 0;
3125
3126         /* Step #1: try to auto-detect the supported ICD loader version */
3127         do {
3128                 struct icd_loader_test check = icd_loader_tests[i];
3129                 if (check.symbol == NULL)
3130                         break;
3131                 if (dlsym(DL_MODULE, check.symbol) == NULL)
3132                         break;
3133                 icdl.detected_version = check.version;
3134                 ++i;
3135         } while (1);
3136
3137         /* Step #2: query properties from extension, if available */
3138         if (clGetICDLoaderInfoOCLICD != NULL) {
3139                 struct info_loc loc;
3140                 struct icdl_info_ret ret;
3141                 reset_loc(&loc, __func__);
3142                 INIT_RET(ret, "ICD loader");
3143
3144                 /* TODO think of a sensible header in CLINFO_RAW */
3145                 if (output->mode != CLINFO_RAW)
3146                         puts("\nICD loader properties");
3147
3148                 if (output->mode == CLINFO_RAW) {
3149                         line_pfx_len = (int)(strlen(oclicdl_pfx) + 5);
3150                         REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD");
3151                         strbuf_printf(&ret.str, "[%s/*]", oclicdl_pfx);
3152                         sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf);
3153                 }
3154
3155                 for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) {
3156                         const struct icdl_info_traits *traits = linfo_traits + loc.line;
3157                         loc.sname = traits->sname;
3158                         loc.pname = (output->mode == CLINFO_HUMAN ?
3159                                 traits->pname : traits->sname);
3160                         loc.param.icdl = traits->param;
3161
3162                         ret.str.buf[0] = '\0';
3163                         ret.err_str.buf[0] = '\0';
3164                         icdl_info_str(&ret, &loc);
3165                         show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err);
3166
3167                         if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) {
3168                                 icdl.reported_version = getOpenCLVersion(ret.str.buf + 7);
3169                         }
3170                 }
3171                 UNINIT_RET(ret);
3172         }
3173
3174         /* Step #3: show it */
3175         if (output->mode == CLINFO_HUMAN) {
3176                 if (icdl.reported_version &&
3177                         icdl.reported_version != icdl.detected_version) {
3178                         printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3179                                 "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n",
3180                                 SPLIT_CL_VERSION(icdl.reported_version),
3181                                 SPLIT_CL_VERSION(icdl.detected_version),
3182                                 icdl.detected_version < icdl.reported_version  ?
3183                                 "only" : "too");
3184                 }
3185                 if (icdl.detected_version < max_plat_version) {
3186                         printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3187                                 "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n"
3188                                 "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n"
3189                                 "\t\tor behave unexpectedly\n",
3190                                 SPLIT_CL_VERSION(icdl.detected_version),
3191                                 SPLIT_CL_VERSION(max_plat_version),
3192                                 SPLIT_CL_VERSION(max_plat_version));
3193                 }
3194         }
3195         return icdl;
3196 }
3197
3198 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3199 #pragma GCC diagnostic warning "-Wstrict-aliasing"
3200 #endif
3201
3202 void version(void)
3203 {
3204         puts("clinfo version 2.2.18.04.06");
3205 }
3206
3207 void usage(void)
3208 {
3209         version();
3210         puts("Display properties of all available OpenCL platforms and devices");
3211         puts("Usage: clinfo [options ...]\n");
3212         puts("Options:");
3213         puts("\t--all-props, -a\t\ttry all properties, only show valid ones");
3214         puts("\t--always-all-props, -A\t\tshow all properties, even if invalid");
3215         puts("\t--human\t\thuman-friendly output (default)");
3216         puts("\t--raw\t\traw output");
3217         puts("\t--offline\talso show offline devices");
3218         puts("\t--list, -l\tonly list the platforms and devices by name");
3219         puts("\t-h, -?\t\tshow usage");
3220         puts("\t--version, -v\tshow version\n");
3221         puts("Defaults to raw mode if invoked with");
3222         puts("a name that contains the string \"raw\"");
3223 }
3224
3225 int main(int argc, char *argv[])
3226 {
3227         cl_uint p;
3228         cl_int err;
3229         int a = 0;
3230
3231         struct opt_out output;
3232
3233         struct platform_list plist;
3234         init_plist(&plist);
3235
3236         output.mode = CLINFO_HUMAN;
3237         output.cond = COND_PROP_CHECK;
3238         output.brief = CL_FALSE;
3239         output.offline = CL_FALSE;
3240         output.check_size = CL_FALSE;
3241
3242         /* if there's a 'raw' in the program name, switch to raw output mode */
3243         if (strstr(argv[0], "raw"))
3244                 output.mode = CLINFO_RAW;
3245
3246         /* process command-line arguments */
3247         while (++a < argc) {
3248                 if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props"))
3249                         output.cond = COND_PROP_TRY;
3250                 else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props"))
3251                         output.cond = COND_PROP_SHOW;
3252                 else if (!strcmp(argv[a], "--raw"))
3253                         output.mode = CLINFO_RAW;
3254                 else if (!strcmp(argv[a], "--human"))
3255                         output.mode = CLINFO_HUMAN;
3256                 else if (!strcmp(argv[a], "--offline"))
3257                         output.offline = CL_TRUE;
3258                 else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list"))
3259                         output.brief = CL_TRUE;
3260                 else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) {
3261                         usage();
3262                         return 0;
3263                 } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) {
3264                         version();
3265                         return 0;
3266                 } else {
3267                         fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]);
3268                 }
3269         }
3270         output.detailed = !output.brief;
3271
3272         err = clGetPlatformIDs(0, NULL, &plist.num_platforms);
3273         if (err != CL_PLATFORM_NOT_FOUND_KHR)
3274                 CHECK_ERROR(err, "number of platforms");
3275
3276         if (!output.brief)
3277                 printf(I0_STR "%" PRIu32 "\n",
3278                         (output.mode == CLINFO_HUMAN ?
3279                          "Number of platforms" : "#PLATFORMS"),
3280                         plist.num_platforms);
3281         if (!plist.num_platforms)
3282                 return 0;
3283
3284         alloc_plist(&plist);
3285         err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL);
3286         CHECK_ERROR(err, "platform IDs");
3287
3288         ALLOC(line_pfx, 1, "line prefix");
3289
3290         for (p = 0; p < plist.num_platforms; ++p) {
3291                 gatherPlatformInfo(&plist, p, &output);
3292                 if (output.detailed)
3293                         puts("");
3294         }
3295         showDevices(&plist, &output);
3296         if (output.detailed) {
3297                 if (output.mode != CLINFO_RAW)
3298                         checkNullBehavior(&plist, &output);
3299                 oclIcdProps(&plist, &output);
3300         }
3301
3302         free_plist(&plist);
3303         return 0;
3304 }