Introduce (and start using) functions to autoexpand a strbuf
[clinfo] / src / clinfo.c
1 /* Collect all available information on all available devices
2  * on all available OpenCL platforms present in the system
3  */
4
5 #include <time.h>
6 #include <string.h>
7
8 /* We will want to check for symbols in the OpenCL library.
9  * On Windows, we must get the module handle for it, on Unix-like
10  * systems we can just use RTLD_DEFAULT
11  */
12 #ifdef _MSC_VER
13 # include <windows.h>
14 # define dlsym GetProcAddress
15 # define DL_MODULE GetModuleHandle("OpenCL")
16 #else
17 # include <dlfcn.h>
18 # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */
19 #endif
20
21 /* Load STDC format macros (PRI*), or define them
22  * for those crappy, non-standard compilers
23  */
24 #include "fmtmacros.h"
25
26 // More support for the horrible MS C compiler
27 #ifdef _MSC_VER
28 #include "ms_support.h"
29 #endif
30
31 #include "error.h"
32 #include "memory.h"
33 #include "strbuf.h"
34
35 #include "ext.h"
36 #include "ctx_prop.h"
37 #include "info_loc.h"
38 #include "info_ret.h"
39 #include "opt_out.h"
40
41 #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar))
42
43 #ifndef UNUSED
44 #define UNUSED(x) x __attribute__((unused))
45 #endif
46
47 struct platform_data {
48         char *pname; /* CL_PLATFORM_NAME */
49         char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */
50         cl_uint ndevs; /* number of devices */
51         cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */
52 };
53
54 struct platform_info_checks {
55         cl_uint plat_version;
56         cl_bool has_khr_icd;
57         cl_bool has_amd_object_metadata;
58         cl_bool has_extended_versioning;
59 };
60
61 struct platform_list {
62         /* Number of platforms in the system */
63         cl_uint num_platforms;
64         /* Total number of devices across all platforms */
65         cl_uint ndevs_total;
66         /* Number of devices allocated in all_devs array */
67         cl_uint alloc_devs;
68         /* Highest OpenCL version supported by any platform.
69          * If the OpenCL library / ICD loader only supports
70          * a lower version, problems may arise (such as
71          * API calls causing segfaults or any other unexpected
72          * behavior
73          */
74         cl_uint max_plat_version;
75         /* Largest number of devices on any platform */
76         cl_uint max_devs;
77         /* Length of the longest platform sname */
78         cl_int max_sname_len;
79         /* Array of platform IDs */
80         cl_platform_id *platform;
81         /* Array of device IDs (across all platforms) */
82         cl_device_id *all_devs;
83         /* Array of offsets in all_devs where the devices
84          * of each platform begin */
85         cl_uint *dev_offset;
86         /* Array of clinfo-specific platform data */
87         struct platform_data *pdata;
88         /* Array of clinfo-specific platform checks */
89         struct platform_info_checks *platform_checks;
90 };
91
92 void
93 init_plist(struct platform_list *plist)
94 {
95         plist->num_platforms = 0;
96         plist->ndevs_total = 0;
97         plist->alloc_devs = 0;
98         plist->max_plat_version = 0;
99         plist->max_devs = 0;
100         plist->max_sname_len = 0;
101         plist->platform = NULL;
102         plist->all_devs = NULL;
103         plist->dev_offset = NULL;
104         plist->pdata = NULL;
105         plist->platform_checks = NULL;
106 }
107
108 void plist_devs_reserve(struct platform_list *plist, cl_uint amount)
109 {
110         if (amount > plist->alloc_devs) {
111                 REALLOC(plist->all_devs, amount, "all devices");
112                 plist->alloc_devs = amount;
113         }
114 }
115
116
117 void
118 alloc_plist(struct platform_list *plist)
119 {
120         ALLOC(plist->platform, plist->num_platforms, "platform IDs");
121         ALLOC(plist->dev_offset, plist->num_platforms, "platform device list offset");
122         /* The actual sizing for this will change as we gather platform info,
123          * but assume at least one device per platform
124          */
125         plist_devs_reserve(plist, plist->num_platforms);
126         ALLOC(plist->pdata, plist->num_platforms, "platform data");
127         ALLOC(plist->platform_checks, plist->num_platforms, "platform checks data");
128 }
129 void
130 free_plist(struct platform_list *plist)
131 {
132         free(plist->platform);
133         free(plist->all_devs);
134         free(plist->dev_offset);
135         for (cl_uint p = 0 ; p < plist->num_platforms; ++p) {
136                 free(plist->pdata[p].sname);
137                 free(plist->pdata[p].pname);
138         }
139         free(plist->pdata);
140         free(plist->platform_checks);
141         init_plist(plist);
142 }
143
144 const cl_device_id *
145 get_platform_devs(const struct platform_list *plist, cl_uint p)
146 {
147         return plist->all_devs + plist->dev_offset[p];
148 }
149
150 cl_device_id
151 get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d)
152 {
153         return get_platform_devs(plist, p)[d];
154 }
155
156 /* Data for the OpenCL library / ICD loader */
157 struct icdl_data {
158         /* auto-detected OpenCL version support for the ICD loader */
159         cl_uint detected_version;
160         /* OpenCL version support declared by the ICD loader */
161         cl_uint reported_version;
162 };
163
164 /* line prefix, used to identify the platform/device for each
165  * device property in RAW output mode */
166 char *line_pfx;
167 int line_pfx_len;
168
169 #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \
170         /* check if the issue is with param size */ \
171         if (output->check_size && ret->err == CL_INVALID_VALUE) { \
172                 size_t _actual_sz; \
173                 if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \
174                         REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \
175                 } \
176         } \
177 } while (0)
178
179 static const char unk[] = "Unknown";
180 static const char none[] = "None";
181 static const char none_raw[] = "CL_NONE";
182 static const char na[] = "n/a"; // not available
183 static const char na_wrap[] = "(n/a)"; // not available
184 static const char core[] = "core";
185
186 static const char bytes_str[] = " bytes";
187 static const char pixels_str[] = " pixels";
188 static const char images_str[] = " images";
189
190 static const char* bool_str[] = { "No", "Yes" };
191 static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" };
192
193 static const char* endian_str[] = { "Big-Endian", "Little-Endian" };
194
195 static const cl_device_type devtype[] = { 0,
196         CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU,
197         CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL };
198
199 const size_t devtype_count = ARRAY_SIZE(devtype);
200 /* number of actual device types, without ALL */
201 const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1;
202
203 static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" };
204 static const char* device_type_raw_str[] = { unk,
205         "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU",
206         "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL"
207 };
208
209 static const char* partition_type_str[] = {
210         none, "equally", "by counts", "by affinity domain", "by names (Intel)"
211 };
212 static const char* partition_type_raw_str[] = {
213         none_raw,
214         "CL_DEVICE_PARTITION_EQUALLY_EXT",
215         "CL_DEVICE_PARTITION_BY_COUNTS_EXT",
216         "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT",
217         "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT"
218 };
219
220 static const char* atomic_cap_str[] = {
221         "relaxed", "acquire/release", "sequentially-consistent",
222         "work-item scope", "work-group scope", "device scope", "all-devices scope"
223 };
224 static const char* atomic_cap_raw_str[] = {
225         "CL_DEVICE_ATOMIC_ORDER_RELAXED",
226         "CL_DEVICE_ATOMIC_ORDER_ACQ_REL",
227         "CL_DEVICE_ATOMIC_ORDER_SEQ_CST",
228         "CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM",
229         "CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP",
230         "CL_DEVICE_ATOMIC_SCOPE_DEVICE",
231         "CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES"
232 };
233 const size_t atomic_cap_count = ARRAY_SIZE(atomic_cap_str);
234
235 static const char *device_enqueue_cap_str[] = {
236         "supported", "replaceable default queue"
237 };
238
239 static const char *device_enqueue_cap_raw_str[] = {
240         "CL_DEVICE_QUEUE_SUPPORTED",
241         "CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT"
242 };
243 const size_t device_enqueue_cap_count = ARRAY_SIZE(atomic_cap_str);
244
245
246 static const char numa[] = "NUMA";
247 static const char l1cache[] = "L1 cache";
248 static const char l2cache[] = "L2 cache";
249 static const char l3cache[] = "L3 cache";
250 static const char l4cache[] = "L4 cache";
251
252 static const char* affinity_domain_str[] = {
253         numa, l4cache, l3cache, l2cache, l1cache, "next partitionable"
254 };
255
256 static const char* affinity_domain_ext_str[] = {
257         numa, l4cache, l3cache, l2cache, l1cache, "next fissionable"
258 };
259
260 static const char* affinity_domain_raw_str[] = {
261         "CL_DEVICE_AFFINITY_DOMAIN_NUMA",
262         "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE",
263         "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE",
264         "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE",
265         "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE",
266         "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE"
267 };
268
269 static const char* affinity_domain_raw_ext_str[] = {
270         "CL_AFFINITY_DOMAIN_NUMA_EXT",
271         "CL_AFFINITY_DOMAIN_L4_CACHE_EXT",
272         "CL_AFFINITY_DOMAIN_L3_CACHE_EXT",
273         "CL_AFFINITY_DOMAIN_L2_CACHE_EXT",
274         "CL_AFFINITY_DOMAIN_L1_CACHE_EXT",
275         "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT"
276 };
277
278 const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str);
279
280 static const char *terminate_capability_str[] = {
281         "Context"
282 };
283
284 static const char *terminate_capability_raw_str[] = {
285         "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR"
286 };
287
288 const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str);
289
290 static const char* fp_conf_str[] = {
291         "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero",
292         "Round to infinity", "IEEE754-2008 fused multiply-add",
293         "Support is emulated in software",
294         "Correctly-rounded divide and sqrt operations"
295 };
296
297 static const char* fp_conf_raw_str[] = {
298         "CL_FP_DENORM",
299         "CL_FP_INF_NAN",
300         "CL_FP_ROUND_TO_NEAREST",
301         "CL_FP_ROUND_TO_ZERO",
302         "CL_FP_ROUND_TO_INF",
303         "CL_FP_FMA",
304         "CL_FP_SOFT_FLOAT",
305         "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT"
306 };
307
308 const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str);
309
310 static const char* svm_cap_str[] = {
311         "Coarse-grained buffer sharing",
312         "Fine-grained buffer sharing",
313         "Fine-grained system sharing",
314         "Atomics"
315 };
316
317 static const char* svm_cap_raw_str[] = {
318         "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER",
319         "CL_DEVICE_SVM_FINE_GRAIN_BUFFER",
320         "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM",
321         "CL_DEVICE_SVM_ATOMICS",
322 };
323
324 const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str);
325
326 /* SI suffixes for memory sizes. Note that in OpenCL most of them are
327  * passed via a cl_ulong, which at most can mode 16 EiB, but hey,
328  * let's be forward-thinking ;-)
329  */
330 static const char* memsfx[] = {
331         "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
332 };
333
334 const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1;
335
336 static const char* lmem_type_str[] = { none, "Local", "Global" };
337 static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" };
338 static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" };
339 static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" };
340
341 static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" };
342 static const char* queue_prop_raw_str[] = {
343         "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
344         "CL_QUEUE_PROFILING_ENABLE"
345 };
346
347 const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str);
348
349 static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" };
350 static const char* execap_raw_str[] = {
351         "CL_EXEC_KERNEL",
352         "CL_EXEC_NATIVE_KERNEL"
353 };
354
355 const size_t execap_count = ARRAY_SIZE(execap_str);
356
357 static const char* sources[] = {
358         "#define GWO(type) global type* restrict\n",
359         "#define GRO(type) global const type* restrict\n",
360         "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n",
361         "#define _KRN(T, N) kernel void sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n",
362         "#define KRN(N) _KRN(float, N)\n",
363         "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n",
364 };
365
366 const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline)
367 {
368         return output->mode == CLINFO_HUMAN ?
369                 (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") :
370                 (these_are_offline ? "#OFFDEVICES" : "#DEVICES");
371 }
372
373 const char *not_specified(const struct opt_out *output)
374 {
375         return output->mode == CLINFO_HUMAN ?
376                 na_wrap : "";
377 }
378
379 const char *no_plat(const struct opt_out *output)
380 {
381         return output->mode == CLINFO_HUMAN ?
382                 "No platform" :
383                 "CL_INVALID_PLATFORM";
384 }
385
386 const char *invalid_dev_type(const struct opt_out *output)
387 {
388         return output->mode == CLINFO_HUMAN ?
389                 "Invalid device type for platform" :
390                 "CL_INVALID_DEVICE_TYPE";
391 }
392
393 const char *invalid_dev_value(const struct opt_out *output)
394 {
395         return output->mode == CLINFO_HUMAN ?
396                 "Invalid device type value for platform" :
397                 "CL_INVALID_VALUE";
398 }
399
400 const char *no_dev_found(const struct opt_out *output)
401 {
402         return output->mode == CLINFO_HUMAN ?
403                 "No devices found in platform" :
404                 "CL_DEVICE_NOT_FOUND";
405 }
406
407 const char *no_dev_avail(const struct opt_out *output)
408 {
409         return output->mode == CLINFO_HUMAN ?
410                 "No devices available in platform" :
411                 "CL_DEVICE_NOT_AVAILABLE";
412 }
413
414 /* OpenCL context interop names */
415
416 typedef struct cl_interop_name {
417         cl_uint from;
418         cl_uint to;
419         /* 5 because that's the largest we know of,
420          * 2 because it's HUMAN, RAW */
421         const char *value[5][2];
422 } cl_interop_name;
423
424 static const cl_interop_name cl_interop_names[] = {
425         { /* cl_khr_gl_sharing */
426                  CL_GL_CONTEXT_KHR,
427                  CL_CGL_SHAREGROUP_KHR,
428                  {
429                         { "GL", "CL_GL_CONTEXT_KHR" },
430                         { "EGL", "CL_EGL_DISPALY_KHR" },
431                         { "GLX", "CL_GLX_DISPLAY_KHR" },
432                         { "WGL", "CL_WGL_HDC_KHR" },
433                         { "CGL", "CL_CGL_SHAREGROUP_KHR" }
434                 }
435         },
436         { /* cl_khr_dx9_media_sharing */
437                 CL_CONTEXT_ADAPTER_D3D9_KHR,
438                 CL_CONTEXT_ADAPTER_DXVA_KHR,
439                 {
440                         { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" },
441                         { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" },
442                         { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" }
443                 }
444         },
445         { /* cl_khr_d3d10_sharing */
446                 CL_CONTEXT_D3D10_DEVICE_KHR,
447                 CL_CONTEXT_D3D10_DEVICE_KHR,
448                 {
449                         { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" }
450                 }
451         },
452         { /* cl_khr_d3d11_sharing */
453                 CL_CONTEXT_D3D11_DEVICE_KHR,
454                 CL_CONTEXT_D3D11_DEVICE_KHR,
455                 {
456                         { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" }
457                 }
458         },
459         /* cl_intel_dx9_media_sharing is split in two because the allowed values are not consecutive */
460         { /* cl_intel_dx9_media_sharing part 1 */
461                 CL_CONTEXT_D3D9_DEVICE_INTEL,
462                 CL_CONTEXT_D3D9_DEVICE_INTEL,
463                 {
464                         { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" }
465                 }
466         },
467         { /* cl_intel_dx9_media_sharing part 2 */
468                 CL_CONTEXT_D3D9EX_DEVICE_INTEL,
469                 CL_CONTEXT_DXVA_DEVICE_INTEL,
470                 {
471                         { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" },
472                         { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" }
473                 }
474         },
475         { /* cl_intel_va_api_media_sharing */
476                 CL_CONTEXT_VA_API_DISPLAY_INTEL,
477                 CL_CONTEXT_VA_API_DISPLAY_INTEL,
478                 {
479                         { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" }
480                 }
481         }
482 };
483
484 const size_t num_known_interops = ARRAY_SIZE(cl_interop_names);
485
486
487 #define INDENT "  "
488 #define I0_STR "%-48s  "
489 #define I1_STR "  %-46s  "
490 #define I2_STR "    %-44s  "
491
492 /* New line and a full padding */
493 static const char full_padding[] = "\n"
494 INDENT INDENT INDENT INDENT INDENT
495 INDENT INDENT INDENT INDENT INDENT
496 INDENT INDENT INDENT INDENT INDENT
497 INDENT INDENT INDENT INDENT INDENT
498 INDENT INDENT INDENT INDENT INDENT;
499
500 static const char empty_str[] = "";
501 static const char spc_str[] = " ";
502 static const char times_str[] = "x";
503 static const char comma_str[] = ", ";
504 static const char vbar_str[] = " | ";
505
506 const char *cur_sfx = empty_str;
507
508 /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version.
509  * Returns an unsigned integer in the form major*10 + minor
510  */
511 cl_uint
512 getOpenCLVersion(const char *version)
513 {
514         cl_uint ret = 10;
515         long parse = 0;
516         const char *from = version;
517         char *next = NULL;
518         parse = strtol(from, &next, 10);
519
520         if (next != from) {
521                 ret = parse*10;
522                 // skip the dot TODO should we actually check for the dot?
523                 from = ++next;
524                 parse = strtol(from, &next, 10);
525                 if (next != from)
526                         ret += parse;
527         }
528         return ret;
529 }
530
531 #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10)
532
533 /* OpenCL 3.0 introduced “proper” versioning, in the form of a major.minor.patch struct
534  * packed into a single cl_uint (type aliased to cl_version)
535  */
536 struct unpacked_cl_version {
537         cl_uint major;
538         cl_uint minor;
539         cl_uint patch;
540 };
541
542 struct unpacked_cl_version unpack_cl_version(cl_uint version)
543 {
544         struct unpacked_cl_version ret;
545         ret.major = (version >> 22);
546         ret.minor = (version >> 12) & ((1<<10)-1);
547         ret.patch =  version & ((1<<12)-1);
548         return ret;
549 }
550
551 void strbuf_version(const char *what, struct _strbuf *str, cl_uint version)
552 {
553         struct unpacked_cl_version u = unpack_cl_version(version);
554         strbuf_append(what, str, " (%" PRIu32 ".%" PRIu32 ".%" PRIu32 ")",
555                                 u.major, u.minor, u.patch);
556 }
557
558 void strbuf_name_version(const char *what, struct _strbuf *str, const cl_name_version *ext, size_t num_exts,
559         const struct opt_out *output)
560 {
561         realloc_strbuf(str, num_exts*(CL_NAME_VERSION_MAX_NAME_SIZE + 128), "extension versions");
562         set_separator(output->mode == CLINFO_HUMAN ? full_padding : spc_str);
563         for (size_t i = 0; i < num_exts; ++i) {
564                 const cl_name_version  *e = ext + i;
565                 if (i > 0) strbuf_append_str(what, str, sep);
566                 if (output->mode == CLINFO_HUMAN) {
567                         struct unpacked_cl_version u = unpack_cl_version(e->version);
568                         strbuf_append(what, str, "%-65s%#8" PRIx32 " (%d.%d.%d)",
569                                 e->name, e->version, u.major, u.minor, u.patch);
570                 } else {
571                         strbuf_append(what, str, "%s:%#" PRIx32, e->name, e->version);
572                 }
573         }
574 }
575
576 /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero,
577  * affixing cur_sfx */
578 static inline
579 void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err)
580 {
581         printf("%s" I1_STR "%s%s\n",
582                 line_pfx, pname,
583                 (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf),
584                 err ? empty_str : cur_sfx);
585 }
586
587 void
588 platform_info_str(struct platform_info_ret *ret,
589         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
590         const struct opt_out* UNUSED(output))
591 {
592         GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat);
593 }
594
595 void
596 platform_info_ulong(struct platform_info_ret *ret,
597         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
598         const struct opt_out *output)
599 {
600         ret->err = REPORT_ERROR_LOC(ret,
601                 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL),
602                 loc, "get %s");
603         CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat);
604         strbuf_append(loc->pname, &ret->str, "%" PRIu64, ret->value.u64);
605 }
606
607 void
608 platform_info_sz(struct platform_info_ret *ret,
609         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
610         const struct opt_out *output)
611 {
612         ret->err = REPORT_ERROR_LOC(ret,
613                 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL),
614                 loc, "get %s");
615         CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat);
616         strbuf_append(loc->pname, &ret->str, "%" PRIuS, ret->value.s);
617 }
618
619 void
620 platform_info_version(struct platform_info_ret *ret,
621         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
622         const struct opt_out *output)
623 {
624         ret->err = REPORT_ERROR_LOC(ret,
625                 clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u32), &ret->value.u32, NULL),
626                 loc, "get %s");
627         CHECK_SIZE(ret, loc, ret->value.u32, clGetPlatformInfo, loc->plat, loc->param.plat);
628         strbuf_append(loc->pname, &ret->str, "%#" PRIx32, ret->value.u32);
629         if (output->mode == CLINFO_HUMAN) {
630                 strbuf_version(loc->pname, &ret->str, ret->value.u32);
631         }
632 }
633
634 void
635 platform_info_ext_version(struct platform_info_ret *ret,
636         const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk),
637         const struct opt_out *output)
638 {
639         cl_name_version *ext = NULL;
640         size_t nusz = 0;
641         ret->err = REPORT_ERROR_LOC(ret,
642                 clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz),
643                 loc, "get %s size");
644         if (!ret->err) {
645                 REALLOC(ext, nusz, loc->sname);
646                 ret->err = REPORT_ERROR_LOC(ret,
647                         clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL),
648                         loc, "get %s");
649         }
650         if (!ret->err) {
651                 size_t num_exts = nusz / sizeof(cl_name_version);
652                 strbuf_name_version(loc->pname, &ret->str, ext, num_exts, output);
653         }
654         free(ext);
655 }
656
657 struct platform_info_traits {
658         cl_platform_info param; // CL_PLATFORM_*
659         const char *sname; // "CL_PLATFORM_*"
660         const char *pname; // "Platform *"
661         const char *sfx; // suffix for the output in non-raw mode
662         /* pointer to function that retrieves the parameter */
663         void (*show_func)(struct platform_info_ret *,
664                 const struct info_loc *, const struct platform_info_checks *,
665                 const struct opt_out *);
666         /* pointer to function that checks if the parameter should be retrieved */
667         cl_bool (*check_func)(const struct platform_info_checks *);
668 };
669
670 cl_bool khr_icd_p(const struct platform_info_checks *chk)
671 {
672         return chk->has_khr_icd;
673 }
674
675 cl_bool plat_is_12(const struct platform_info_checks *chk)
676 {
677         return !(chk->plat_version < 12);
678 }
679
680 cl_bool plat_is_20(const struct platform_info_checks *chk)
681 {
682         return !(chk->plat_version < 20);
683 }
684
685 cl_bool plat_is_21(const struct platform_info_checks *chk)
686 {
687         return !(chk->plat_version < 21);
688 }
689
690 cl_bool plat_is_30(const struct platform_info_checks *chk)
691 {
692         return !(chk->plat_version < 30);
693 }
694
695 cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk)
696 {
697         return chk->has_amd_object_metadata;
698 }
699
700 cl_bool plat_has_ext_ver(const struct platform_info_checks *chk)
701 {
702         return plat_is_30(chk) || chk->has_extended_versioning;
703 }
704
705
706 #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr }
707 #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL }
708 struct platform_info_traits pinfo_traits[] = {
709         PINFO(CL_PLATFORM_NAME, "Name", NULL, str),
710         PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str),
711         PINFO(CL_PLATFORM_VERSION, "Version", NULL, str),
712         PINFO_COND(CL_PLATFORM_NUMERIC_VERSION, "Numeric Version", NULL, version, plat_has_ext_ver),
713         PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str),
714         PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str),
715         PINFO_COND(CL_PLATFORM_EXTENSIONS_WITH_VERSION, "Extensions with Version", NULL, ext_version, plat_has_ext_ver),
716         PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata),
717         PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21),
718         PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p)
719 };
720
721 /* Collect (and optionally show) information on a specific platform,
722  * initializing relevant arrays and optionally showing the collected
723  * information
724  */
725 void
726 gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output)
727 {
728         cl_int len = 0;
729
730         struct platform_data *pdata = plist->pdata + p;
731         struct platform_info_checks *pinfo_checks = plist->platform_checks + p;
732         struct platform_info_ret ret;
733         struct info_loc loc;
734
735         pinfo_checks->plat_version = 10;
736
737         INIT_RET(ret, "platform");
738         reset_loc(&loc, __func__);
739         loc.plat = plist->platform[p];
740
741         for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) {
742                 const struct platform_info_traits *traits = pinfo_traits + loc.line;
743
744                 /* checked is true if there was no condition to check for, or if the
745                  * condition was satisfied
746                  */
747                 int checked = !(traits->check_func && !traits->check_func(pinfo_checks));
748
749                 if (output->cond == COND_PROP_CHECK && !checked)
750                         continue;
751
752                 loc.sname = traits->sname;
753                 loc.pname = (output->mode == CLINFO_HUMAN ?
754                         traits->pname : traits->sname);
755                 loc.param.plat = traits->param;
756
757                 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
758
759                 reset_strbuf(&ret.str);
760                 reset_strbuf(&ret.err_str);
761                 traits->show_func(&ret, &loc, pinfo_checks, output);
762
763                 /* The property is skipped if this was a conditional property,
764                  * unsatisfied, there was an error retrieving it and cond_prop_mode is not
765                  * COND_PROP_SHOW.
766                  */
767                 if (ret.err && !checked && output->cond != COND_PROP_SHOW)
768                         continue;
769
770                 /* The property gets printed if we are not just listing,
771                  * or if the user requested a property and this one matches.
772                  * Otherwise, we're just gathering information */
773                 cl_bool requested = (output->prop && strstr(loc.sname, output->prop) != NULL);
774                 if (output->detailed || requested) {
775                         show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
776                 }
777
778                 if (ret.err)
779                         continue;
780
781                 /* post-processing */
782
783                 switch (traits->param) {
784                 case CL_PLATFORM_NAME:
785                         /* Store name for future reference */
786                         len = strlen(ret.str.buf);
787                         ALLOC(pdata->pname, len+1, "platform name copy");
788                         /* memcpy instead of strncpy since we already have the len
789                          * and memcpy is possibly more optimized */
790                         memcpy(pdata->pname, ret.str.buf, len);
791                         pdata->pname[len] = '\0';
792                         break;
793                 case CL_PLATFORM_VERSION:
794                         /* compute numeric value for OpenCL version */
795                         pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7);
796                         break;
797                 case CL_PLATFORM_EXTENSIONS:
798                         pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd");
799                         pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata");
800                         pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices");
801                         break;
802                 case CL_PLATFORM_ICD_SUFFIX_KHR:
803                         /* Store ICD suffix for future reference */
804                         len = strlen(ret.str.buf);
805                         ALLOC(pdata->sname, len+1, "platform ICD suffix copy");
806                         /* memcpy instead of strncpy since we already have the len
807                          * and memcpy is possibly more optimized */
808                         memcpy(pdata->sname, ret.str.buf, len);
809                         pdata->sname[len] = '\0';
810                 default:
811                         /* do nothing */
812                         break;
813                 }
814
815         }
816
817         if (pinfo_checks->plat_version > plist->max_plat_version)
818                 plist->max_plat_version = pinfo_checks->plat_version;
819
820         /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */
821         if (!pdata->sname) {
822 #define SNAME_MAX 32
823                 ALLOC(pdata->sname, SNAME_MAX+1, "platform symbolic name");
824                 snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p);
825         }
826
827         len = (cl_int)strlen(pdata->sname);
828         if (len > plist->max_sname_len)
829                 plist->max_sname_len = len;
830
831         ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs);
832         if (ret.err == CL_DEVICE_NOT_FOUND)
833                 pdata->ndevs = 0;
834         else
835                 CHECK_ERROR(ret.err, "number of devices");
836         plist->ndevs_total += pdata->ndevs;
837         plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0;
838         plist_devs_reserve(plist, plist->ndevs_total);
839
840         if (pdata->ndevs > 0) {
841                 ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL,
842                         pdata->ndevs,
843                         plist->all_devs + plist->dev_offset[p], NULL);
844         }
845
846         if (pdata->ndevs > plist->max_devs)
847                 plist->max_devs = pdata->ndevs;
848
849         UNINIT_RET(ret);
850 }
851
852 /*
853  * Device properties/extensions used in traits checks, and relevant functions
854  * TODO add version control for 3.0+ platforms
855  */
856
857 struct device_info_checks {
858         const struct platform_info_checks *pinfo_checks;
859         cl_device_type devtype;
860         cl_device_mem_cache_type cachetype;
861         cl_device_local_mem_type lmemtype;
862         cl_bool image_support;
863         cl_bool compiler_available;
864         char has_half[12];
865         char has_double[24];
866         char has_nv[29];
867         char has_amd[30];
868         char has_amd_svm[11];
869         char has_arm_svm[29];
870         char has_arm_core_id[15];
871         char has_arm_job_slots[26];
872         char has_fission[22];
873         char has_atomic_counters[26];
874         char has_image2d_buffer[27];
875         char has_il_program[18];
876         char has_intel_local_thread[30];
877         char has_intel_AME[36];
878         char has_intel_AVC_ME[43];
879         char has_intel_planar_yuv[20];
880         char has_intel_required_subgroup_size[32];
881         char has_altera_dev_temp[29];
882         char has_p2p[23];
883         char has_spir[12];
884         char has_qcom_ext_host_ptr[21];
885         char has_simultaneous_sharing[30];
886         char has_subgroup_named_barrier[30];
887         char has_terminate_context[25];
888         char has_extended_versioning[27];
889         char has_cxx_for_opencl[22];
890         char has_device_uuid[19];
891         cl_uint dev_version;
892         cl_uint p2p_num_devs;
893 };
894
895 #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \
896 { \
897         return !!(chk->has_##ext[0]); \
898 }
899
900 DEFINE_EXT_CHECK(half)
901 DEFINE_EXT_CHECK(double)
902 DEFINE_EXT_CHECK(nv)
903 DEFINE_EXT_CHECK(amd)
904 DEFINE_EXT_CHECK(amd_svm)
905 DEFINE_EXT_CHECK(arm_svm)
906 DEFINE_EXT_CHECK(arm_core_id)
907 DEFINE_EXT_CHECK(arm_job_slots)
908 DEFINE_EXT_CHECK(fission)
909 DEFINE_EXT_CHECK(atomic_counters)
910 DEFINE_EXT_CHECK(image2d_buffer)
911 DEFINE_EXT_CHECK(il_program)
912 DEFINE_EXT_CHECK(intel_local_thread)
913 DEFINE_EXT_CHECK(intel_AME)
914 DEFINE_EXT_CHECK(intel_AVC_ME)
915 DEFINE_EXT_CHECK(intel_planar_yuv)
916 DEFINE_EXT_CHECK(intel_required_subgroup_size)
917 DEFINE_EXT_CHECK(altera_dev_temp)
918 DEFINE_EXT_CHECK(p2p)
919 DEFINE_EXT_CHECK(spir)
920 DEFINE_EXT_CHECK(qcom_ext_host_ptr)
921 DEFINE_EXT_CHECK(simultaneous_sharing)
922 DEFINE_EXT_CHECK(subgroup_named_barrier)
923 DEFINE_EXT_CHECK(terminate_context)
924 DEFINE_EXT_CHECK(extended_versioning)
925 DEFINE_EXT_CHECK(cxx_for_opencl)
926 DEFINE_EXT_CHECK(device_uuid)
927
928 /* In the version checks we negate the opposite conditions
929  * instead of double-negating the actual condition
930  */
931
932 // device supports 1.1
933 cl_bool dev_is_11(const struct device_info_checks *chk)
934 {
935         return !(chk->dev_version < 11);
936 }
937
938
939 // device supports 1.2
940 cl_bool dev_is_12(const struct device_info_checks *chk)
941 {
942         return !(chk->dev_version < 12);
943 }
944
945 // device supports 2.0
946 cl_bool dev_is_20(const struct device_info_checks *chk)
947 {
948         return !(chk->dev_version < 20);
949 }
950
951 // device supports 2.1
952 cl_bool dev_is_21(const struct device_info_checks *chk)
953 {
954         return !(chk->dev_version < 21);
955 }
956
957 // device does not support 2.0
958 cl_bool dev_not_20(const struct device_info_checks *chk)
959 {
960         return !(chk->dev_version >= 20);
961 }
962
963 // device supports 3.0
964 cl_bool dev_is_30(const struct device_info_checks *chk)
965 {
966         return !(chk->dev_version < 30);
967 }
968
969 // device has extended versioning: 3.0 or has_extended_versioning
970 cl_bool dev_has_ext_ver(const struct device_info_checks *chk)
971 {
972         return dev_is_30(chk) || dev_has_extended_versioning(chk);
973 }
974
975 cl_bool dev_is_gpu(const struct device_info_checks *chk)
976 {
977         return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
978 }
979
980 cl_bool dev_is_gpu_amd(const struct device_info_checks *chk)
981 {
982         return dev_is_gpu(chk) && dev_has_amd(chk);
983 }
984
985 /* Device supports cl_amd_device_attribute_query v4 */
986 cl_bool dev_has_amd_v4(const struct device_info_checks *chk)
987 {
988         /* We don't actually have a criterion to check if the device
989          * supports a specific version of an extension, so for the time
990          * being rely on them being GPU devices with cl_amd_device_attribute_query
991          * and the platform supporting OpenCL 2.0 or later
992          * TODO FIXME tune criteria
993          */
994         return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks);
995 }
996
997 /* Device supports cl_arm_core_id v2 */
998 cl_bool dev_has_arm_core_id_v2(const struct device_info_checks *chk)
999 {
1000         /* We don't actually have a criterion to check if the device
1001          * supports a specific version of an extension, so for the time
1002          * being rely on them having cl_arm_core_id and the platform
1003          * supporting OpenCL 1.2 or later
1004          * TODO FIXME tune criteria
1005          */
1006         return dev_has_arm_core_id(chk) && plat_is_12(chk->pinfo_checks);
1007 }
1008
1009 cl_bool dev_has_svm(const struct device_info_checks *chk)
1010 {
1011         return dev_is_20(chk) || dev_has_amd_svm(chk);
1012 }
1013
1014 cl_bool dev_has_partition(const struct device_info_checks *chk)
1015 {
1016         return dev_is_12(chk) || dev_has_fission(chk);
1017 }
1018
1019 cl_bool dev_has_cache(const struct device_info_checks *chk)
1020 {
1021         return chk->cachetype != CL_NONE;
1022 }
1023
1024 cl_bool dev_has_lmem(const struct device_info_checks *chk)
1025 {
1026         return chk->lmemtype != CL_NONE;
1027 }
1028
1029 cl_bool dev_has_il(const struct device_info_checks *chk)
1030 {
1031         return dev_is_21(chk) || dev_has_il_program(chk);
1032 }
1033
1034 cl_bool dev_has_images(const struct device_info_checks *chk)
1035 {
1036         return chk->image_support;
1037 }
1038
1039 cl_bool dev_has_images_12(const struct device_info_checks *chk)
1040 {
1041         return dev_has_images(chk) && dev_is_12(chk);
1042 }
1043
1044 cl_bool dev_has_images_20(const struct device_info_checks *chk)
1045 {
1046         return dev_has_images(chk) && dev_is_20(chk);
1047 }
1048
1049 cl_bool dev_has_compiler(const struct device_info_checks *chk)
1050 {
1051         return chk->compiler_available;
1052 }
1053
1054 cl_bool dev_has_compiler_11(const struct device_info_checks *chk)
1055 {
1056         return dev_is_11(chk) && dev_has_compiler(chk);
1057 }
1058
1059 cl_bool dev_has_p2p_devs(const struct device_info_checks *chk)
1060 {
1061         return dev_has_p2p(chk) && chk->p2p_num_devs > 0;
1062 }
1063
1064
1065 void identify_device_extensions(const char *extensions, struct device_info_checks *chk)
1066 {
1067 #define _HAS_EXT(ext) (strstr(extensions, ext))
1068 #define CPY_EXT(what, ext) do { \
1069         strncpy(chk->has_##what, has+1, sizeof(ext)); \
1070         chk->has_##what[sizeof(ext)-1] = '\0'; \
1071 } while (0)
1072 #define CHECK_EXT(what, ext) do { \
1073         has = _HAS_EXT(" " #ext " "); \
1074         if (has) CPY_EXT(what, #ext); \
1075 } while(0)
1076
1077         char *has;
1078         CHECK_EXT(half, cl_khr_fp16);
1079         CHECK_EXT(spir, cl_khr_spir);
1080         CHECK_EXT(double, cl_khr_fp64);
1081         if (!dev_has_double(chk))
1082                 CHECK_EXT(double, cl_amd_fp64);
1083         if (!dev_has_double(chk))
1084                 CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
1085         CHECK_EXT(nv, cl_nv_device_attribute_query);
1086         CHECK_EXT(amd, cl_amd_device_attribute_query);
1087         CHECK_EXT(amd_svm, cl_amd_svm);
1088         CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory);
1089         CHECK_EXT(arm_core_id, cl_arm_core_id);
1090         CHECK_EXT(arm_job_slots, cl_arm_job_slot_selection);
1091         CHECK_EXT(fission, cl_ext_device_fission);
1092         CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
1093         if (dev_has_atomic_counters(chk))
1094                 CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
1095         CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
1096         CHECK_EXT(il_program, cl_khr_il_program);
1097         CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
1098         CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation);
1099         CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation);
1100         CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv);
1101         CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size);
1102         CHECK_EXT(altera_dev_temp, cl_altera_device_temperature);
1103         CHECK_EXT(p2p, cl_amd_copy_buffer_p2p);
1104         CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr);
1105         CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing);
1106         CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier);
1107         CHECK_EXT(terminate_context, cl_khr_terminate_context);
1108         CHECK_EXT(extended_versioning, cl_khr_extended_versioning);
1109         CHECK_EXT(cxx_for_opencl, cl_ext_cxx_for_opencl);
1110         CHECK_EXT(device_uuid, cl_khr_device_uuid);
1111 }
1112
1113
1114 /*
1115  * Device info print functions
1116  */
1117
1118 #define _GET_VAL(ret, loc, val) \
1119         ret->err = REPORT_ERROR_LOC(ret, \
1120                 clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \
1121                 loc, "get %s"); \
1122         CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev);
1123
1124 #define _GET_VAL_VALUES(ret, loc) \
1125         REALLOC(val, numval, loc->sname); \
1126         ret->err = REPORT_ERROR_LOC(ret, \
1127                 clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \
1128                 loc, "get %s"); \
1129         if (ret->err) { free(val); val = NULL; } \
1130
1131 #define _GET_VAL_ARRAY(ret, loc) \
1132         ret->err = REPORT_ERROR_LOC(ret, \
1133                 clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \
1134                 loc, "get number of %s"); \
1135         numval = szval/sizeof(*val); \
1136         if (!ret->err) { \
1137                 _GET_VAL_VALUES(ret, loc) \
1138         }
1139
1140 #define GET_VAL(ret, loc, field) do { \
1141         _GET_VAL(ret, (loc), ret->value.field) \
1142 } while (0)
1143
1144 #define GET_VAL_ARRAY(ret, loc) do { \
1145         _GET_VAL_ARRAY(ret, (loc)) \
1146 } while (0)
1147
1148 #define DEFINE_DEVINFO_FETCH(type, field) \
1149 type \
1150 device_fetch_##type(struct device_info_ret *ret, \
1151         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \
1152         const struct opt_out *output) \
1153 { \
1154         GET_VAL(ret, loc, field); \
1155         return ret->value.field; \
1156 }
1157
1158 DEFINE_DEVINFO_FETCH(size_t, s)
1159 DEFINE_DEVINFO_FETCH(cl_bool, b)
1160 DEFINE_DEVINFO_FETCH(cl_uint, u32)
1161 DEFINE_DEVINFO_FETCH(cl_version, u32)
1162 DEFINE_DEVINFO_FETCH(cl_ulong, u64)
1163 DEFINE_DEVINFO_FETCH(cl_bitfield, u64)
1164 DEFINE_DEVINFO_FETCH(cl_device_type, devtype)
1165 DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype)
1166 DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype)
1167 DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo)
1168 DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain)
1169 DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig)
1170 DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop)
1171 DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap)
1172 DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap)
1173 DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap)
1174
1175 #define DEV_FETCH_LOC(type, var, loc) \
1176         type var = device_fetch_##type(ret, loc, chk, output)
1177 #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc)
1178
1179 #define FMT_VAL(loc, ret, fmt, val) if (!ret->err) strbuf_append(loc->pname, &ret->str, fmt, val)
1180
1181 #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \
1182 void \
1183 device_info_##how(struct device_info_ret *ret, \
1184         const struct info_loc *loc, const struct device_info_checks* chk, \
1185         const struct opt_out *output) \
1186 { \
1187         DEV_FETCH(type, val); \
1188         if (!ret->err) FMT_VAL(loc, ret, fmt, val); \
1189 }
1190
1191 DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32)
1192 DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, "%#" PRIx32)
1193 DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64)
1194 DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS)
1195
1196 void
1197 device_info_str(struct device_info_ret *ret,
1198         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1199         const struct opt_out* UNUSED(output))
1200 {
1201         GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev);
1202 }
1203
1204 void
1205 device_info_bool(struct device_info_ret *ret,
1206         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1207         const struct opt_out *output)
1208 {
1209         DEV_FETCH(cl_bool, val);
1210         if (!ret->err) {
1211                 const char * const * str = (output->mode == CLINFO_HUMAN ?
1212                         bool_str : bool_raw_str);
1213                 strbuf_printf(&ret->str, "%s", str[val]);
1214         }
1215 }
1216
1217 void
1218 device_info_bits(struct device_info_ret *ret,
1219         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1220         const struct opt_out *output)
1221 {
1222         DEV_FETCH(cl_uint, val);
1223         if (!ret->err)
1224                 strbuf_printf(&ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8);
1225 }
1226
1227 void
1228 device_info_version(struct device_info_ret *ret,
1229         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1230         const struct opt_out *output)
1231 {
1232         DEV_FETCH(cl_version, val);
1233         if (!ret->err) {
1234                 strbuf_append(loc->pname, &ret->str, "%#" PRIx32, val);
1235                 if (output->mode == CLINFO_HUMAN) {
1236                         strbuf_version(loc->pname, &ret->str, val);
1237                 }
1238         }
1239 }
1240
1241 void
1242 device_info_ext_version(struct device_info_ret *ret,
1243         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1244         const struct opt_out *output)
1245 {
1246         cl_name_version *val = NULL;
1247         size_t szval = 0, numval = 0;
1248         GET_VAL_ARRAY(ret, loc);
1249         if (!ret->err) {
1250                 strbuf_name_version(loc->pname, &ret->str, val, numval, output);
1251         }
1252         free(val);
1253 }
1254
1255 size_t strbuf_mem(struct _strbuf *str, cl_ulong val, size_t szval)
1256 {
1257         double dbl = (double)val;
1258         size_t sfx = 0;
1259         while (dbl > 1024 && sfx < memsfx_end) {
1260                 dbl /= 1024;
1261                 ++sfx;
1262         }
1263         return sprintf(str->buf + szval, " (%.4lg%s)",
1264                 dbl, memsfx[sfx]);
1265 }
1266
1267 void
1268 device_info_mem(struct device_info_ret *ret,
1269         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1270         const struct opt_out *output)
1271 {
1272         DEV_FETCH(cl_ulong, val);
1273         if (!ret->err) {
1274                 size_t szval = strbuf_printf(&ret->str, "%" PRIu64, val);
1275                 if (output->mode == CLINFO_HUMAN && val > 1024)
1276                         strbuf_mem(&ret->str, val, szval);
1277         }
1278 }
1279
1280 void
1281 device_info_mem_int(struct device_info_ret *ret,
1282         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1283         const struct opt_out *output)
1284 {
1285         DEV_FETCH(cl_uint, val);
1286         if (!ret->err) {
1287                 size_t szval = strbuf_printf(&ret->str, "%" PRIu32, val);
1288                 if (output->mode == CLINFO_HUMAN && val > 1024)
1289                         strbuf_mem(&ret->str, val, szval);
1290         }
1291 }
1292
1293 void
1294 device_info_mem_sz(struct device_info_ret *ret,
1295         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1296         const struct opt_out *output)
1297 {
1298         DEV_FETCH(size_t, val);
1299         if (!ret->err) {
1300                 size_t szval = strbuf_printf(&ret->str, "%" PRIuS, val);
1301                 if (output->mode == CLINFO_HUMAN && val > 1024)
1302                         strbuf_mem(&ret->str, val, szval);
1303         }
1304 }
1305
1306 void
1307 device_info_free_mem_amd(struct device_info_ret *ret,
1308         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1309         const struct opt_out *output)
1310 {
1311         // Apparently, with the introduction of ROCm, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
1312         // returns 1 or 2 values depending on how it's called: if it's called with a
1313         // szval < 2*sizeof(size_t), it will only return 1 value, otherwise it will return 2.
1314         // At least now these are documented in the ROCm source code: the first value
1315         // is the total amount of free memory, and the second is the size of the largest
1316         // free block. So let's just manually ask for both values
1317         size_t *val = NULL;
1318         size_t numval = 2, szval = numval*sizeof(*val);
1319         _GET_VAL_VALUES(ret, loc);
1320         if (!ret->err) {
1321                 size_t cursor = 0;
1322                 szval = 0;
1323                 for (cursor = 0; cursor < numval; ++cursor) {
1324                         if (szval > 0) {
1325                                 ret->str.buf[szval] = ' ';
1326                                 ++szval;
1327                         }
1328                         szval += sprintf(ret->str.buf + szval, "%" PRIuS, val[cursor]);
1329                         if (output->mode == CLINFO_HUMAN)
1330                                 szval += strbuf_mem(&ret->str, val[cursor]*UINT64_C(1024), szval);
1331                         ret->value.u64v.s[cursor] = val[cursor];
1332                 }
1333         }
1334         free(val);
1335 }
1336
1337 void
1338 device_info_time_offset(struct device_info_ret *ret,
1339         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1340         const struct opt_out *output)
1341 {
1342         GET_VAL(ret, loc, u64);
1343         if (!ret->err) {
1344                 size_t szval = 0;
1345                 time_t time = ret->value.u64/UINT64_C(1000000000);
1346                 szval += strbuf_printf(&ret->str, "%" PRIu64 "ns (", ret->value.u64);
1347                 szval += bufcpy(&ret->str, szval, ctime(&time));
1348                 /* overwrite ctime's newline with the closing parenthesis */
1349                 if (szval < ret->str.sz)
1350                         ret->str.buf[szval - 1] = ')';
1351         }
1352 }
1353
1354 void
1355 device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep,
1356         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1357         const struct opt_out *output)
1358 {
1359         size_t *val = NULL;
1360         size_t szval = 0, numval = 0;
1361         GET_VAL_ARRAY(ret, loc);
1362         if (!ret->err) {
1363                 size_t counter = 0;
1364                 set_separator(output->mode == CLINFO_HUMAN ? human_sep : spc_str);
1365                 szval = 0;
1366                 for (counter = 0; counter < numval; ++counter) {
1367                         add_separator(&ret->str, &szval);
1368                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%" PRIuS, val[counter]);
1369                         if (szval >= ret->str.sz) {
1370                                 trunc_strbuf(&ret->str);
1371                                 break;
1372                         }
1373                 }
1374                 // TODO: ret->value.??? = val;
1375         }
1376         free(val);
1377 }
1378
1379 void
1380 device_info_szptr_times(struct device_info_ret *ret,
1381         const struct info_loc *loc, const struct device_info_checks* chk,
1382         const struct opt_out *output)
1383 {
1384         device_info_szptr_sep(ret, times_str, loc, chk, output);
1385 }
1386
1387 void
1388 device_info_szptr_comma(struct device_info_ret *ret,
1389         const struct info_loc *loc, const struct device_info_checks* chk,
1390         const struct opt_out *output)
1391 {
1392         device_info_szptr_sep(ret, comma_str, loc, chk, output);
1393 }
1394
1395 void
1396 getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz,
1397         const struct opt_out* UNUSED(output))
1398 {
1399         cl_int log_err;
1400
1401         cl_context_properties ctxpft[] = {
1402                 CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat,
1403                 0, 0 };
1404         cl_uint cursor = 0;
1405         cl_context ctx = NULL;
1406         cl_program prg = NULL;
1407         cl_kernel krn = NULL;
1408
1409         ret->err = CL_SUCCESS;
1410
1411         ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err);
1412         if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out;
1413         prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err);
1414         if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out;
1415         ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL);
1416         log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program");
1417
1418         /* for a program build failure, dump the log to stderr before bailing */
1419         if (log_err == CL_BUILD_PROGRAM_FAILURE) {
1420                 struct _strbuf logbuf;
1421                 init_strbuf(&logbuf, "program build log");
1422                 GET_STRING(&logbuf, ret->err,
1423                         clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev);
1424                 if (ret->err == CL_SUCCESS) {
1425                         fflush(stdout);
1426                         fflush(stderr);
1427                         fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr);
1428                         fputs(logbuf.buf, stderr);
1429                         fflush(stderr);
1430                 }
1431                 free_strbuf(&logbuf);
1432         }
1433         if (ret->err)
1434                 goto out;
1435
1436         for (cursor = 0; cursor < wgm_sz; ++cursor) {
1437                 strbuf_printf(&ret->str, "sum%u", 1<<cursor);
1438                 if (cursor == 0)
1439                         ret->str.buf[3] = 0; // scalar kernel is called 'sum'
1440                 krn = clCreateKernel(prg, ret->str.buf, &ret->err);
1441                 if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out;
1442                 ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1443                         sizeof(*wgm), wgm + cursor, NULL);
1444                 if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out;
1445                 clReleaseKernel(krn);
1446                 krn = NULL;
1447         }
1448
1449 out:
1450         if (krn)
1451                 clReleaseKernel(krn);
1452         if (prg)
1453                 clReleaseProgram(prg);
1454         if (ctx)
1455                 clReleaseContext(ctx);
1456 }
1457
1458
1459 void
1460 device_info_wg(struct device_info_ret *ret,
1461         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1462         const struct opt_out *output)
1463 {
1464         /* preferred workgroup size multiple for each kernel
1465          * have not found a platform where the WG multiple changes,
1466          * but keep this flexible (this can grow up to 5)
1467          */
1468 #define NUM_KERNELS 1
1469         size_t wgm[NUM_KERNELS] = {0};
1470
1471         getWGsizes(ret, loc, wgm, NUM_KERNELS, output);
1472         if (!ret->err) {
1473                 strbuf_printf(&ret->str, "%" PRIuS, wgm[0]);
1474         }
1475         ret->value.s = wgm[0];
1476 }
1477
1478 void
1479 device_info_img_sz_2d(struct device_info_ret *ret,
1480         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1481         const struct opt_out *output)
1482 {
1483         struct info_loc loc2 = *loc;
1484         size_t width = 0, height = 0;
1485         _GET_VAL(ret, loc, height); /* HEIGHT */
1486         if (!ret->err) {
1487                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH);
1488                 _GET_VAL(ret, &loc2, width);
1489                 if (!ret->err) {
1490                         strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1491                 }
1492         }
1493         ret->value.u32v.s[0] = width;
1494         ret->value.u32v.s[1] = height;
1495 }
1496
1497 void
1498 device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret,
1499         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1500         const struct opt_out *output)
1501 {
1502         struct info_loc loc2 = *loc;
1503         size_t width = 0, height = 0;
1504         _GET_VAL(ret, loc, height); /* HEIGHT */
1505         if (!ret->err) {
1506                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL);
1507                 _GET_VAL(ret, &loc2, width);
1508                 if (!ret->err) {
1509                         strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height);
1510                 }
1511         }
1512         ret->value.u32v.s[0] = width;
1513         ret->value.u32v.s[1] = height;
1514 }
1515
1516
1517 void
1518 device_info_img_sz_3d(struct device_info_ret *ret,
1519         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1520         const struct opt_out *output)
1521 {
1522         struct info_loc loc2 = *loc;
1523         size_t width = 0, height = 0, depth = 0;
1524         _GET_VAL(ret, loc, height); /* HEIGHT */
1525         if (!ret->err) {
1526                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH);
1527                 _GET_VAL(ret, &loc2, width);
1528                 if (!ret->err) {
1529                         RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH);
1530                         _GET_VAL(ret, &loc2, depth);
1531                         if (!ret->err) {
1532                                 strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS,
1533                                         width, height, depth);
1534                         }
1535                 }
1536         }
1537         ret->value.u32v.s[0] = width;
1538         ret->value.u32v.s[1] = height;
1539         ret->value.u32v.s[2] = depth;
1540 }
1541
1542
1543 void
1544 device_info_devtype(struct device_info_ret *ret,
1545         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1546         const struct opt_out *output)
1547 {
1548         DEV_FETCH(cl_device_type, val);
1549         if (!ret->err) {
1550                 /* iterate over device type strings, appending their textual form
1551                  * to ret->str */
1552                 cl_uint i = (cl_uint)actual_devtype_count;
1553                 const char * const *devstr = (output->mode == CLINFO_HUMAN ?
1554                         device_type_str : device_type_raw_str);
1555                 size_t szval = 0;
1556                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1557                 for (; i > 0; --i) {
1558                         /* assemble CL_DEVICE_TYPE_* from index i */
1559                         cl_device_type cur = (cl_device_type)(1) << (i-1);
1560                         if (val & cur) {
1561                                 /* match: add separator if not first match */
1562                                 add_separator(&ret->str, &szval);
1563                                 szval += bufcpy(&ret->str, szval, devstr[i]);
1564                         }
1565                 }
1566                 /* check for extra bits */
1567                 if (szval < ret->str.sz) {
1568                         cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1;
1569                         cl_device_type extra = val & ~known_mask;
1570                         if (extra) {
1571                                 add_separator(&ret->str, &szval);
1572                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1573                         }
1574                 }
1575         }
1576 }
1577
1578 void
1579 device_info_cachetype(struct device_info_ret *ret,
1580         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1581         const struct opt_out *output)
1582 {
1583         DEV_FETCH(cl_device_mem_cache_type, val);
1584         if (!ret->err) {
1585                 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1586                         cache_type_str : cache_type_raw_str);
1587                 bufcpy(&ret->str, 0, ar[val]);
1588         }
1589 }
1590
1591 void
1592 device_info_lmemtype(struct device_info_ret *ret,
1593         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1594         const struct opt_out *output)
1595 {
1596         DEV_FETCH(cl_device_local_mem_type, val);
1597         if (!ret->err) {
1598                 const char * const *ar = (output->mode == CLINFO_HUMAN ?
1599                         lmem_type_str : lmem_type_raw_str);
1600                 bufcpy(&ret->str, 0, ar[val]);
1601         }
1602         ret->value.lmemtype = val;
1603 }
1604
1605 void
1606 device_info_atomic_caps(struct device_info_ret *ret,
1607         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1608         const struct opt_out *output)
1609 {
1610         DEV_FETCH(cl_bitfield, val);
1611         if (!ret->err) {
1612                 size_t szval = 0;
1613                 cl_uint i = 0;
1614                 const char * const * capstr = (output->mode == CLINFO_HUMAN ?
1615                         atomic_cap_str : atomic_cap_raw_str);
1616                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1617                 for (i = 0; i < atomic_cap_count; ++i) {
1618                         if (val & (1 << i)) {
1619                                 add_separator(&ret->str, &szval);
1620                                 szval += bufcpy(&ret->str, szval, capstr[i]);
1621                         }
1622                         if (szval >= ret->str.sz)
1623                                 break;
1624                 }
1625                 /* check for extra bits */
1626                 if (szval < ret->str.sz) {
1627                         cl_bitfield known_mask = ((cl_bitfield)(1) << atomic_cap_count) - 1;
1628                         cl_bitfield extra = val & ~known_mask;
1629                         if (extra) {
1630                                 add_separator(&ret->str, &szval);
1631                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1632                         }
1633                 }
1634         }
1635 }
1636
1637 void
1638 device_info_device_enqueue_caps(struct device_info_ret *ret,
1639         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1640         const struct opt_out *output)
1641 {
1642         DEV_FETCH(cl_bitfield, val);
1643         if (!ret->err) {
1644                 size_t szval = 0;
1645                 cl_uint i = 0;
1646                 const char * const * capstr = (output->mode == CLINFO_HUMAN ?
1647                         device_enqueue_cap_str : device_enqueue_cap_raw_str);
1648                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1649                 for (i = 0; i < device_enqueue_cap_count; ++i) {
1650                         if (val & (1 << i)) {
1651                                 add_separator(&ret->str, &szval);
1652                                 szval += bufcpy(&ret->str, szval, capstr[i]);
1653                         }
1654                         if (szval >= ret->str.sz)
1655                                 break;
1656                 }
1657                 /* check for extra bits */
1658                 if (szval < ret->str.sz) {
1659                         cl_bitfield known_mask = ((cl_bitfield)(1) << device_enqueue_cap_count) - 1;
1660                         cl_bitfield extra = val & ~known_mask;
1661                         if (extra) {
1662                                 add_separator(&ret->str, &szval);
1663                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
1664                         }
1665                 }
1666         }
1667 }
1668
1669 /* cl_arm_core_id */
1670 void
1671 device_info_core_ids(struct device_info_ret *ret,
1672         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1673         const struct opt_out *output)
1674 {
1675         DEV_FETCH(cl_ulong, val);
1676
1677         if (!ret->err) {
1678                 /* The value is a bitfield where each set bit corresponds to a core ID
1679                  * value that can be returned by the device-side function. We print them
1680                  * here as ranges, such as 0-4, 8-12 */
1681                 size_t szval = 0;
1682                 int range_start = -1;
1683                 int cur_bit = 0;
1684                 set_separator(empty_str);
1685 #define CORE_ID_END 64
1686                 do {
1687                         /* Find the start of the range */
1688                         while ((cur_bit < CORE_ID_END) && !((val >> cur_bit) & 1))
1689                                 ++cur_bit;
1690                         range_start = cur_bit++;
1691
1692                         /* Find the end of the range */
1693                         while ((cur_bit < CORE_ID_END) && ((val >> cur_bit) & 1))
1694                                 ++cur_bit;
1695
1696                         /* print the range [range_start, cur_bit[ */
1697                         if (range_start >= 0 && range_start < CORE_ID_END) {
1698                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1699                                         "%s%d", sep, range_start);
1700                                 if (cur_bit - range_start > 1)
1701                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1702                                                 "-%d", cur_bit - 1);
1703                                 set_separator(comma_str);
1704                         }
1705                 } while (cur_bit < CORE_ID_END);
1706         }
1707         ret->value.u64 = val;
1708 }
1709
1710 /* cl_arm_job_slot_selection */
1711 void
1712 device_info_job_slots(struct device_info_ret *ret,
1713         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1714         const struct opt_out *output)
1715 {
1716         DEV_FETCH(cl_uint, val);
1717
1718         if (!ret->err) {
1719                 /* The value is a bitfield where each set bit corresponds to an available job slot.
1720                  * We print them here as ranges, such as 0-4, 8-12 */
1721                 size_t szval = 0;
1722                 int range_start = -1;
1723                 int cur_bit = 0;
1724                 set_separator(empty_str);
1725 #define JOB_SLOT_END 32
1726                 do {
1727                         /* Find the start of the range */
1728                         while ((cur_bit < JOB_SLOT_END) && !((val >> cur_bit) & 1))
1729                                 ++cur_bit;
1730                         range_start = cur_bit++;
1731
1732                         /* Find the end of the range */
1733                         while ((cur_bit < JOB_SLOT_END) && ((val >> cur_bit) & 1))
1734                                 ++cur_bit;
1735
1736                         /* print the range [range_start, cur_bit[ */
1737                         if (range_start >= 0 && range_start < JOB_SLOT_END) {
1738                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1739                                         "%s%d", sep, range_start);
1740                                 if (cur_bit - range_start > 1)
1741                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1,
1742                                                 "-%d", cur_bit - 1);
1743                                 set_separator(comma_str);
1744                         }
1745                 } while (cur_bit < JOB_SLOT_END);
1746         }
1747         ret->value.u32 = val;
1748 }
1749
1750 /* stringify a cl_device_topology_amd */
1751 void devtopo_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo)
1752 {
1753         switch (devtopo->raw.type) {
1754         case 0:
1755                 /* leave empty */
1756                 break;
1757         case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD:
1758                 strbuf_printf(&ret->str, "PCI-E, %02x:%02x.%u",
1759                         (cl_uchar)(devtopo->pcie.bus),
1760                         devtopo->pcie.device, devtopo->pcie.function);
1761                 break;
1762         default:
1763                 strbuf_printf(&ret->str, "<unknown (%u): %u %u %u %u %u>",
1764                         devtopo->raw.type,
1765                         devtopo->raw.data[0], devtopo->raw.data[1],
1766                         devtopo->raw.data[2],
1767                         devtopo->raw.data[3], devtopo->raw.data[4]);
1768         }
1769 }
1770
1771 void
1772 device_info_devtopo_amd(struct device_info_ret *ret,
1773         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1774         const struct opt_out *output)
1775 {
1776         DEV_FETCH(cl_device_topology_amd, val);
1777         /* TODO how to do this in CLINFO_RAW mode */
1778         if (!ret->err) {
1779                 devtopo_str(ret, &val);
1780         }
1781 }
1782
1783 /* we assemble a cl_device_topology_amd struct from the NVIDIA info */
1784 void
1785 device_info_devtopo_nv(struct device_info_ret *ret,
1786         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1787         const struct opt_out *output)
1788 {
1789         struct info_loc loc2 = *loc;
1790         cl_device_topology_amd devtopo;
1791         DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */
1792         if (!ret->err) {
1793                 devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
1794                 devtopo.pcie.bus = val & 0xff;
1795                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV);
1796                 _GET_VAL(ret, &loc2, val);
1797
1798                 if (!ret->err) {
1799                         devtopo.pcie.device = (val >> 3) & 0xff;
1800                         devtopo.pcie.function = val & 7;
1801                         devtopo_str(ret, &devtopo);
1802                 }
1803                 ret->value.devtopo = devtopo;
1804         }
1805 }
1806
1807 /* NVIDIA Compute Capability */
1808 void
1809 device_info_cc_nv(struct device_info_ret *ret,
1810         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1811         const struct opt_out *output)
1812 {
1813         struct info_loc loc2 = *loc;
1814         cl_uint major = 0, minor = 0;
1815         _GET_VAL(ret, loc, major); /* MAJOR */
1816         if (!ret->err) {
1817                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV);
1818                 _GET_VAL(ret, &loc2, minor);
1819                 if (!ret->err) {
1820                         strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1821                 }
1822         }
1823         ret->value.u32v.s[0] = major;
1824         ret->value.u32v.s[1] = minor;
1825 }
1826
1827 /* AMD GFXIP */
1828 void
1829 device_info_gfxip_amd(struct device_info_ret *ret,
1830         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1831         const struct opt_out *output)
1832 {
1833         struct info_loc loc2 = *loc;
1834         cl_uint major = 0, minor = 0;
1835         _GET_VAL(ret, loc, major); /* MAJOR */
1836         if (!ret->err) {
1837                 RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD);
1838                 _GET_VAL(ret, &loc2, minor);
1839                 if (!ret->err) {
1840                         strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor);
1841                 }
1842         }
1843         ret->value.u32v.s[0] = major;
1844         ret->value.u32v.s[1] = minor;
1845 }
1846
1847
1848 /* Device Partition, CLINFO_HUMAN header */
1849 void
1850 device_info_partition_header(struct device_info_ret *ret,
1851         const struct info_loc *UNUSED(loc), const struct device_info_checks *chk,
1852         const struct opt_out* UNUSED(output))
1853 {
1854         cl_bool is_12 = dev_is_12(chk);
1855         cl_bool has_fission = dev_has_fission(chk);
1856         size_t szval = strbuf_printf(&ret->str, "(%s%s%s%s)",
1857                 (is_12 ? core : empty_str),
1858                 (is_12 && has_fission ? comma_str : empty_str),
1859                 chk->has_fission,
1860                 (!(is_12 || has_fission) ? na : empty_str));
1861
1862         ret->err = CL_SUCCESS;
1863
1864         if (szval >= ret->str.sz)
1865                 trunc_strbuf(&ret->str);
1866 }
1867
1868 /* Device partition properties */
1869 void
1870 device_info_partition_types(struct device_info_ret *ret,
1871         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1872         const struct opt_out *output)
1873 {
1874         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1875         cl_device_partition_property *val = NULL;
1876         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1877                 partition_type_str : partition_type_raw_str);
1878
1879         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1880
1881         GET_VAL_ARRAY(ret, loc);
1882
1883         szval = 0;
1884         if (!ret->err) {
1885                 for (cursor = 0; cursor < numval; ++cursor) {
1886                         int str_idx = -1;
1887
1888                         /* add separator for values past the first */
1889                         add_separator(&ret->str, &szval);
1890
1891                         switch (val[cursor]) {
1892                         case 0: str_idx = 0; break;
1893                         case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break;
1894                         case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break;
1895                         case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break;
1896                         case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break;
1897                         default:
1898                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIxPTR ")", val[cursor]);
1899                                 break;
1900                         }
1901                         if (str_idx >= 0) {
1902                                 /* string length, minus _EXT */
1903                                 slen = strlen(ptstr[str_idx]);
1904                                 if (output->mode == CLINFO_RAW && str_idx > 0)
1905                                         slen -= 4;
1906                                 szval += bufcpy_len(&ret->str, szval, ptstr[str_idx], slen);
1907                         }
1908                         if (szval >= ret->str.sz) {
1909                                 trunc_strbuf(&ret->str);
1910                                 break;
1911                         }
1912                 }
1913                 // TODO ret->value.??? = val
1914         }
1915         free(val);
1916 }
1917
1918 void
1919 device_info_partition_types_ext(struct device_info_ret *ret,
1920         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1921         const struct opt_out *output)
1922 {
1923         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
1924         cl_device_partition_property_ext *val = NULL;
1925         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
1926                 partition_type_str : partition_type_raw_str);
1927
1928         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1929
1930         GET_VAL_ARRAY(ret, loc);
1931
1932         szval = 0;
1933         if (!ret->err) {
1934                 for (cursor = 0; cursor < numval; ++cursor) {
1935                         int str_idx = -1;
1936
1937                         /* add separator for values past the first */
1938                         add_separator(&ret->str, &szval);
1939
1940                         switch (val[cursor]) {
1941                         case 0: str_idx = 0; break;
1942                         case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break;
1943                         case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break;
1944                         case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break;
1945                         case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break;
1946                         default:
1947                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by <unknown> (%#" PRIx64 ")", val[cursor]);
1948                                 break;
1949                         }
1950                         if (str_idx >= 0) {
1951                                 /* string length */
1952                                 slen = strlen(ptstr[str_idx]);
1953                                 strncpy(ret->str.buf + szval, ptstr[str_idx], slen);
1954                                 szval += slen;
1955                         }
1956                         if (szval >= ret->str.sz) {
1957                                 trunc_strbuf(&ret->str);
1958                                 break;
1959                         }
1960                 }
1961                 if (szval < ret->str.sz)
1962                         ret->str.buf[szval] = '\0';
1963                 // TODO ret->value.??? = val
1964         }
1965         free(val);
1966 }
1967
1968
1969 /* Device partition affinity domains */
1970 void
1971 device_info_partition_affinities(struct device_info_ret *ret,
1972         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
1973         const struct opt_out *output)
1974 {
1975         DEV_FETCH(cl_device_affinity_domain, val);
1976         if (!ret->err && val) {
1977                 /* iterate over affinity domain strings appending their textual form
1978                  * to ret->str */
1979                 size_t szval = 0;
1980                 cl_uint i = 0;
1981                 const char * const *affstr = (output->mode == CLINFO_HUMAN ?
1982                         affinity_domain_str : affinity_domain_raw_str);
1983                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
1984                 for (i = 0; i < affinity_domain_count; ++i) {
1985                         cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i;
1986                         if (val & cur) {
1987                                 /* match: add separator if not first match */
1988                                 add_separator(&ret->str, &szval);
1989                                 szval += bufcpy(&ret->str, szval, affstr[i]);
1990                         }
1991                         if (szval >= ret->str.sz)
1992                                 break;
1993                 }
1994                 /* check for extra bits */
1995                 if (szval < ret->str.sz) {
1996                         cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1;
1997                         cl_device_affinity_domain extra = val & ~known_mask;
1998                         if (extra) {
1999                                 add_separator(&ret->str, &szval);
2000                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
2001                         }
2002                 }
2003         }
2004 }
2005
2006 void
2007 device_info_partition_affinities_ext(struct device_info_ret *ret,
2008         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2009         const struct opt_out *output)
2010 {
2011         size_t numval = 0, szval = 0, cursor = 0, slen = 0;
2012         cl_device_partition_property_ext *val = NULL;
2013         const char * const *ptstr = (output->mode == CLINFO_HUMAN ?
2014                 affinity_domain_ext_str : affinity_domain_raw_ext_str);
2015
2016         set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2017
2018         GET_VAL_ARRAY(ret, loc);
2019
2020         szval = 0;
2021         if (!ret->err) {
2022                 for (cursor = 0; cursor < numval; ++cursor) {
2023                         int str_idx = -1;
2024
2025                         /* add separator for values past the first */
2026                         add_separator(&ret->str, &szval);
2027
2028                         switch (val[cursor]) {
2029                         case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break;
2030                         case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break;
2031                         case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break;
2032                         case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break;
2033                         case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break;
2034                         case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break;
2035                         default:
2036                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "<unknown> (%#" PRIx64 ")", val[cursor]);
2037                                 break;
2038                         }
2039                         if (str_idx >= 0) {
2040                                 /* string length */
2041                                 const char *str = ptstr[str_idx];
2042                                 slen = strlen(str);
2043                                 strncpy(ret->str.buf + szval, str, slen);
2044                                 szval += slen;
2045                         }
2046                         if (szval >= ret->str.sz) {
2047                                 trunc_strbuf(&ret->str);
2048                                 break;
2049                         }
2050                 }
2051                 ret->str.buf[szval] = '\0';
2052                 // TODO: ret->value.??? = val
2053         }
2054         free(val);
2055 }
2056
2057 /* Preferred / native vector widths */
2058 void
2059 device_info_vecwidth(struct device_info_ret *ret,
2060         const struct info_loc *loc, const struct device_info_checks *chk,
2061         const struct opt_out *output)
2062 {
2063         struct info_loc loc2 = *loc;
2064         cl_uint preferred = 0, native = 0;
2065         _GET_VAL(ret, loc, preferred);
2066         if (!ret->err) {
2067                 /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF,
2068                  * which is at +0x08 */
2069                 loc2.param.dev +=
2070                         (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30);
2071                 /* TODO update loc2.sname */
2072                 _GET_VAL(ret, &loc2, native);
2073
2074                 if (!ret->err) {
2075                         size_t szval = 0;
2076                         const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ?
2077                                 chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ?
2078                                 chk->has_double : NULL));
2079                         szval = strbuf_printf(&ret->str, "%8u / %-8u", preferred, native);
2080                         if (ext)
2081                                 sprintf(ret->str.buf + szval, " (%s)", *ext ? ext : na);
2082                 }
2083         }
2084         ret->value.u32v.s[0] = preferred;
2085         ret->value.u32v.s[1] = native;
2086 }
2087
2088 /* Floating-point configurations */
2089 void
2090 device_info_fpconf(struct device_info_ret *ret,
2091         const struct info_loc *loc, const struct device_info_checks *chk,
2092         const struct opt_out *output)
2093 {
2094         /* When in HUMAN output, we are called unconditionally,
2095          * so we have to do some manual checks ourselves */
2096         const cl_bool get_it = (output->mode != CLINFO_HUMAN) ||
2097                 (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) ||
2098                 (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) ||
2099                 (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk));
2100
2101         DEV_FETCH(cl_device_fp_config, val);
2102         /* Sanitize! */
2103         if (ret->err && !get_it) {
2104                 ret->err = CL_SUCCESS;
2105                 val = 0;
2106         }
2107
2108
2109         if (!ret->err) {
2110                 size_t szval = 0;
2111                 cl_uint i = 0;
2112                 const char * const *fpstr = (output->mode == CLINFO_HUMAN ?
2113                         fp_conf_str : fp_conf_raw_str);
2114                 set_separator(vbar_str);
2115                 if (output->mode == CLINFO_HUMAN) {
2116                         const char *why = na;
2117                         switch (loc->param.dev) {
2118                         case CL_DEVICE_HALF_FP_CONFIG:
2119                                 if (get_it)
2120                                         why = chk->has_half;
2121                                 break;
2122                         case CL_DEVICE_SINGLE_FP_CONFIG:
2123                                 why = core;
2124                                 break;
2125                         case CL_DEVICE_DOUBLE_FP_CONFIG:
2126                                 if (get_it)
2127                                         why = chk->has_double;
2128                                 break;
2129                         default:
2130                                 /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */
2131                                 fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname);
2132                         }
2133                         /* show 'why' it's being shown */
2134                         szval += strbuf_printf(&ret->str, "(%s)", why);
2135                 }
2136                 if (get_it) {
2137                         size_t num_flags = fp_conf_count;
2138                         /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered
2139                          * in the single-precision case. half and double don't consider it,
2140                          * so we skip it altogether */
2141                         if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG)
2142                                 num_flags -= 1;
2143
2144                         for (i = 0; i < num_flags; ++i) {
2145                                 cl_device_fp_config cur = (cl_device_fp_config)(1) << i;
2146                                 if (output->mode == CLINFO_HUMAN) {
2147                                         szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2148                                                 line_pfx, fpstr[i], bool_str[!!(val & cur)]);
2149                                 } else if (val & cur) {
2150                                         add_separator(&ret->str, &szval);
2151                                         szval += bufcpy(&ret->str, szval, fpstr[i]);
2152                                 }
2153                         }
2154                 }
2155         }
2156 }
2157
2158 /* Queue properties */
2159 void
2160 device_info_qprop(struct device_info_ret *ret,
2161         const struct info_loc *loc, const struct device_info_checks *chk,
2162         const struct opt_out *output)
2163 {
2164         DEV_FETCH(cl_command_queue_properties, val);
2165         if (!ret->err) {
2166                 size_t szval = 0;
2167                 cl_uint i = 0;
2168                 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
2169                         queue_prop_str : queue_prop_raw_str);
2170                 set_separator(vbar_str);
2171                 for (i = 0; i < queue_prop_count; ++i) {
2172                         cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i;
2173                         if (output->mode == CLINFO_HUMAN) {
2174                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2175                                         line_pfx, qpstr[i], bool_str[!!(val & cur)]);
2176                         } else if (val & cur) {
2177                                 add_separator(&ret->str, &szval);
2178                                 szval += bufcpy(&ret->str, szval, qpstr[i]);
2179                         }
2180                 }
2181                 if (output->mode == CLINFO_HUMAN && loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES &&
2182                         dev_has_intel_local_thread(chk))
2183                         sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2184                                 line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]);
2185         }
2186 }
2187
2188 /* Execution capbilities */
2189 void
2190 device_info_execap(struct device_info_ret *ret,
2191         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2192         const struct opt_out *output)
2193 {
2194         DEV_FETCH(cl_device_exec_capabilities, val);
2195         if (!ret->err) {
2196                 size_t szval = 0;
2197                 cl_uint i = 0;
2198                 const char * const *qpstr = (output->mode == CLINFO_HUMAN ?
2199                         execap_str : execap_raw_str);
2200                 set_separator(vbar_str);
2201                 for (i = 0; i < execap_count; ++i) {
2202                         cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i;
2203                         if (output->mode == CLINFO_HUMAN) {
2204                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2205                                         line_pfx, qpstr[i], bool_str[!!(val & cur)]);
2206                         } else if (val & cur) {
2207                                 add_separator(&ret->str, &szval);
2208                                 szval += bufcpy(&ret->str, szval, qpstr[i]);
2209                         }
2210                 }
2211         }
2212 }
2213
2214 /* Arch bits and endianness (HUMAN) */
2215 void
2216 device_info_arch(struct device_info_ret *ret,
2217         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2218         const struct opt_out *output)
2219 {
2220         DEV_FETCH(cl_uint, bits);
2221         struct info_loc loc2 = *loc;
2222         RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE);
2223
2224         if (!ret->err) {
2225                 DEV_FETCH_LOC(cl_bool, val, &loc2);
2226                 if (!ret->err) {
2227                         strbuf_printf(&ret->str, "%" PRIu32 ", %s", bits, endian_str[val]);
2228                 }
2229         }
2230 }
2231
2232 /* SVM capabilities */
2233 void
2234 device_info_svm_cap(struct device_info_ret *ret,
2235         const struct info_loc *loc, const struct device_info_checks *chk,
2236         const struct opt_out *output)
2237 {
2238         const cl_bool is_20 = dev_is_20(chk);
2239         const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES);
2240         const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk));
2241         DEV_FETCH(cl_device_svm_capabilities, val);
2242
2243         if (!ret->err) {
2244                 size_t szval = 0;
2245                 cl_uint i = 0;
2246                 const char * const *scstr = (output->mode == CLINFO_HUMAN ?
2247                         svm_cap_str : svm_cap_raw_str);
2248                 set_separator(vbar_str);
2249                 if (output->mode == CLINFO_HUMAN && checking_core) {
2250                         /* show 'why' it's being shown */
2251                         szval += strbuf_printf(&ret->str, "(%s%s%s)",
2252                                 (is_20 ? core : empty_str),
2253                                 (is_20 && has_amd_svm ? comma_str : empty_str),
2254                                 chk->has_amd_svm);
2255                 }
2256                 for (i = 0; i < svm_cap_count; ++i) {
2257                         cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i;
2258                         if (output->mode == CLINFO_HUMAN) {
2259                                 szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s",
2260                                         line_pfx, scstr[i], bool_str[!!(val & cur)]);
2261                         } else if (val & cur) {
2262                                 add_separator(&ret->str, &szval);
2263                                 szval += bufcpy(&ret->str, szval, scstr[i]);
2264                         }
2265                 }
2266         }
2267 }
2268
2269 /* Device terminate capability */
2270 void
2271 device_info_terminate_capability(struct device_info_ret *ret,
2272         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2273         const struct opt_out *output)
2274 {
2275         DEV_FETCH(cl_device_terminate_capability_khr, val);
2276         if (!ret->err && val) {
2277                 /* iterate over terminate capability strings appending their textual form
2278                  * to ret->str */
2279                 size_t szval = 0;
2280                 cl_uint i = 0;
2281                 const char * const *capstr = (output->mode == CLINFO_HUMAN ?
2282                         terminate_capability_str : terminate_capability_raw_str);
2283                 set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2284                 for (i = 0; i < terminate_capability_count; ++i) {
2285                         cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i;
2286                         if (val & cur) {
2287                                 /* match: add separator if not first match */
2288                                 add_separator(&ret->str, &szval);
2289                                 szval += bufcpy(&ret->str, szval, capstr[i]);
2290                         }
2291                         if (szval >= ret->str.sz)
2292                                 break;
2293                 }
2294                 /* check for extra bits */
2295                 if (szval < ret->str.sz) {
2296                         cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1;
2297                         cl_device_terminate_capability_khr extra = val & ~known_mask;
2298                         if (extra) {
2299                                 add_separator(&ret->str, &szval);
2300                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra);
2301                         }
2302                 }
2303         }
2304 }
2305
2306 void
2307 device_info_p2p_dev_list(struct device_info_ret *ret,
2308         const struct info_loc *loc, const struct device_info_checks *chk,
2309         const struct opt_out* UNUSED(output))
2310 {
2311         // Contrary to most array values in OpenCL, the AMD platform does not support querying
2312         // CL_DEVICE_P2P_DEVICES_AMD with a NULL ptr to get the number of results.
2313         // The user is assumed to have queried for the CL_DEVICE_NUM_P2P_DEVICES_AMD first,
2314         // and to have allocated the return array beforehand.
2315         cl_device_id *val = NULL;
2316         size_t numval = chk->p2p_num_devs, szval = numval*sizeof(*val);
2317         _GET_VAL_VALUES(ret, loc);
2318         if (!ret->err) {
2319                 size_t cursor = 0;
2320                 szval = 0;
2321                 for (cursor= 0; cursor < numval; ++cursor) {
2322                         if (szval > 0) {
2323                                 ret->str.buf[szval] = ' ';
2324                                 ++szval;
2325                         }
2326                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%p", (void*)val[cursor]);
2327                 }
2328                 // TODO: ret->value.??? = val;
2329         }
2330         free(val);
2331 }
2332
2333 void
2334 device_info_interop_list(struct device_info_ret *ret,
2335         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2336         const struct opt_out *output)
2337 {
2338         cl_uint *val = NULL;
2339         size_t szval = 0, numval = 0;
2340         GET_VAL_ARRAY(ret, loc);
2341         if (!ret->err) {
2342                 size_t cursor = 0;
2343                 const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops;
2344                 cl_uint human_raw = output->mode - CLINFO_HUMAN;
2345                 const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str);
2346                 cl_bool first = CL_TRUE;
2347                 szval = 0;
2348                 for (cursor = 0; cursor < numval; ++cursor) {
2349                         cl_uint current = val[cursor];
2350                         if (!current && cursor < numval - 1) {
2351                                 /* A null value is used as group terminator, but we only print it
2352                                  * if it's not the final one
2353                                  */
2354                                 szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", groupsep);
2355                                 first = CL_TRUE;
2356                         }
2357                         if (current) {
2358                                 cl_bool found = CL_FALSE;
2359                                 const cl_interop_name *n = cl_interop_names;
2360
2361                                 if (!first) {
2362                                         ret->str.buf[szval] = ' ';
2363                                         ++szval;
2364                                 }
2365
2366                                 while (n < interop_name_end) {
2367                                         if (current >= n->from && current <= n->to) {
2368                                                 found = CL_TRUE;
2369                                                 break;
2370                                         }
2371                                         ++n;
2372                                 }
2373                                 if (found) {
2374                                         cl_uint i = current - n->from;
2375                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", n->value[i][human_raw]);
2376                                 } else {
2377                                         szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx32, val[cursor]);
2378                                 }
2379                                 first = CL_FALSE;
2380                         }
2381                         if (szval >= ret->str.sz) {
2382                                 trunc_strbuf(&ret->str);
2383                                 break;
2384                         }
2385                 }
2386                 // TODO: ret->value.??? = val;
2387         }
2388         free(val);
2389 }
2390
2391 void device_info_uuid(struct device_info_ret *ret,
2392         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2393         const struct opt_out *output)
2394 {
2395         cl_uchar uuid[CL_UUID_SIZE_KHR];
2396         _GET_VAL(ret, loc, uuid);
2397         if (!ret->err) {
2398                 strbuf_printf(&ret->str,
2399                         "%02x%02x%02x%02x-"
2400                         "%02x%02x-"
2401                         "%02x%02x-"
2402                         "%02x%02x-"
2403                         "%02x%02x%02x%02x%02x%02x",
2404                         uuid[0],  uuid[1],  uuid[2],  uuid[3],  uuid[4],
2405                         uuid[5],  uuid[6],
2406                         uuid[7],  uuid[8],
2407                         uuid[9],  uuid[10],
2408                         uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
2409         }
2410 }
2411
2412 void device_info_luid(struct device_info_ret *ret,
2413         const struct info_loc *loc, const struct device_info_checks* UNUSED(chk),
2414         const struct opt_out *output)
2415 {
2416         cl_uchar uuid[CL_LUID_SIZE_KHR];
2417         _GET_VAL(ret, loc, uuid);
2418         if (!ret->err) {
2419                 /* TODO not sure this is the correct representation for LUIDs? */
2420                 strbuf_printf(&ret->str, "%02x%02x-%02x%02x%02x%02x%02x%02x",
2421                         uuid[0], uuid[1],
2422                         uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7]);
2423         }
2424 }
2425
2426
2427 /*
2428  * Device info traits
2429  */
2430
2431 /* A CL_FALSE param means "just print pname" */
2432
2433 struct device_info_traits {
2434         enum output_modes output_mode;
2435         cl_device_info param; // CL_DEVICE_*
2436         const char *sname; // "CL_DEVICE_*"
2437         const char *pname; // "Device *"
2438         const char *sfx; // suffix for the output in non-raw mode
2439         /* pointer to function that retrieves the parameter */
2440         void (*show_func)(struct device_info_ret *,
2441                 const struct info_loc *, const struct device_info_checks *,
2442                 const struct opt_out *);
2443         /* pointer to function that checks if the parameter should be retrieved */
2444         cl_bool (*check_func)(const struct device_info_checks *);
2445 };
2446
2447 #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ
2448 #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ
2449
2450 struct device_info_traits dinfo_traits[] = {
2451         { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL },
2452         { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL },
2453         { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL },
2454         { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL },
2455
2456         /* This has to be made before calling NUMERIC_VERSION , since to know if it's supported
2457          * we need to know about the extensions */
2458         { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL },
2459         { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS_WITH_VERSION, "Device Extensions with Version", ext_version), dev_has_ext_ver },
2460
2461         { CLINFO_BOTH, DINFO(CL_DEVICE_UUID_KHR, "Device UUID", uuid), dev_has_device_uuid },
2462         { CLINFO_BOTH, DINFO(CL_DRIVER_UUID_KHR, "Driver UUID", uuid), dev_has_device_uuid },
2463         { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_VALID_KHR, "Valid Device LUID", bool), dev_has_device_uuid },
2464         { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_KHR, "Device LUID", luid), dev_has_device_uuid },
2465         { CLINFO_BOTH, DINFO(CL_DEVICE_NODE_MASK_KHR, "Device Node Mask", hex), dev_has_device_uuid },
2466
2467         { CLINFO_BOTH, DINFO(CL_DEVICE_NUMERIC_VERSION, "Device Numeric Version", version), dev_has_ext_ver },
2468         { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL },
2469         { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), dev_is_11 },
2470         { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_ALL_VERSIONS, "Device OpenCL C all versions", ext_version), dev_has_ext_ver },
2471         { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_FEATURES, "Device OpenCL C features", ext_version), dev_is_30 },
2472
2473         { CLINFO_BOTH, DINFO(CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT, "Device C++ for OpenCL Numeric Version", version), dev_has_cxx_for_opencl },
2474
2475         { CLINFO_BOTH, DINFO(CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, "Latest comfornace test passed", str), dev_is_30 },
2476         { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL },
2477
2478         { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd },
2479         { CLINFO_BOTH, DINFO(CL_DEVICE_PCIE_ID_AMD, "Device PCI-e ID (AMD)", hex), dev_has_amd },
2480         { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd },
2481
2482         /* Device Topology (NV) is multipart, so different for HUMAN and RAW */
2483         { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv },
2484         { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv },
2485         { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv },
2486
2487         { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL },
2488         { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL },
2489         { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL },
2490         { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 },
2491
2492         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL },
2493         { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs", core_ids), dev_has_arm_core_id_v2 },
2494         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs", long), dev_has_arm_core_id_v2 },
2495         { CLINFO_HUMAN, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", job_slots), dev_has_arm_job_slots },
2496         { CLINFO_RAW, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", int), dev_has_arm_job_slots },
2497         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd },
2498         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd },
2499         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd },
2500         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL },
2501
2502         /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */
2503         { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv },
2504         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv },
2505         { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv },
2506
2507         /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */
2508         /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query
2509          * extension specification calls it “core engine GFXIP”, which honestly is not better than
2510          * our name choice. */
2511         { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd },
2512         { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd },
2513         { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd },
2514
2515         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp },
2516
2517         /* Device partition support: summary is only presented in HUMAN case */
2518         { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition },
2519         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 },
2520         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 },
2521         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 },
2522         { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission },
2523         { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission },
2524
2525         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL },
2526         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL },
2527         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL },
2528
2529         /* cl_amd_device_attribute_query v4 */
2530         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 },
2531         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 },
2532
2533         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (device)", sz), dev_is_30 },
2534         { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (kernel)", wg), dev_has_compiler_11 },
2535         { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv },
2536         { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd },
2537         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 },
2538         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier },
2539         { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size },
2540
2541         /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs
2542          * PREFERRED and NATIVE in a single line */
2543 #define DINFO_VECWIDTH(Type, type) \
2544         { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \
2545         { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \
2546         { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), dev_is_11 }
2547
2548         { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL },
2549         DINFO_VECWIDTH(CHAR, char),
2550         DINFO_VECWIDTH(SHORT, short),
2551         DINFO_VECWIDTH(INT, int),
2552         DINFO_VECWIDTH(LONG, long),
2553         DINFO_VECWIDTH(HALF, half), /* this should be excluded for 1.0 */
2554         DINFO_VECWIDTH(FLOAT, float),
2555         DINFO_VECWIDTH(DOUBLE, double),
2556
2557         /* Floating point configurations */
2558 #define DINFO_FPCONF(Type, type, cond) \
2559         { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \
2560         { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond }
2561
2562         DINFO_FPCONF(HALF, Half, dev_has_half),
2563         DINFO_FPCONF(SINGLE, Single, NULL),
2564         DINFO_FPCONF(DOUBLE, Double, dev_has_double),
2565
2566         /* Address bits and endianness are written together for HUMAN, separate for RAW */
2567         { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL },
2568         { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL },
2569         { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL },
2570
2571         /* Global memory */
2572         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL },
2573         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd },
2574         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd },
2575         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd },
2576         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd },
2577         { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL },
2578         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL },
2579         { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), dev_is_11 },
2580         { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv },
2581
2582         { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm },
2583         { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm },
2584
2585         /* Alignment */
2586         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL },
2587         { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL },
2588         { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL },
2589
2590         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2591         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
2592
2593         /* Atomics alignment, with HUMAN-only header */
2594         { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 },
2595         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 },
2596         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 },
2597         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 },
2598
2599         /* 3.0+ Atomic memory and fence capabilities */
2600         { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, "Atomic memory capabilities", atomic_caps), dev_is_30 },
2601         { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, "Atomic fence capabilities", atomic_caps), dev_is_30 },
2602
2603         /* Global variables. TODO some 1.2 devices respond to this too */
2604         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 },
2605         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 },
2606
2607         /* Global memory cache */
2608         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL },
2609         { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache },
2610         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache },
2611
2612         /* Image support */
2613         { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL },
2614         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images },
2615         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 },
2616         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 },
2617         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer },
2618         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer },
2619
2620         /* Image dimensions are split for RAW, combined for HUMAN */
2621         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size",  pixels_str, img_sz_2d), dev_has_images },
2622         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height",  sz), dev_has_images },
2623         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width",  sz), dev_has_images },
2624         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size",  pixels_str, img_sz_2d), dev_has_intel_planar_yuv },
2625         { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height",  sz), dev_has_intel_planar_yuv },
2626         { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width",  sz), dev_has_intel_planar_yuv },
2627         { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size",  pixels_str, img_sz_3d), dev_has_images },
2628         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height",  sz), dev_has_images },
2629         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width",  sz), dev_has_images },
2630         { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth",  sz), dev_has_images },
2631
2632         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images },
2633         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images },
2634         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 },
2635
2636         /* Pipes */
2637         { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_SUPPORT, "Pipe support", bool), dev_is_30 },
2638         /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */
2639         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 },
2640         { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 },
2641         { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 },
2642
2643         /* Local memory */
2644         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL },
2645         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem },
2646         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory size per CU (AMD)", mem), dev_is_gpu_amd },
2647         { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd },
2648         { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv },
2649
2650         /* Constant memory */
2651         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL },
2652         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL },
2653         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 },
2654
2655         /* Generic address space support */
2656         { CLINFO_BOTH, DINFO(CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, "Generic address space support", bool), dev_is_30},
2657
2658         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL },
2659         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters },
2660
2661         /* Queue properties */
2662         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 },
2663         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 },
2664         { CLINFO_BOTH, DINFO(CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, "Device enqueue capabilities", device_enqueue_caps), dev_is_30 },
2665         /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */
2666         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 },
2667         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 },
2668         { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 },
2669         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 },
2670         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 },
2671
2672         /* Terminate context */
2673         { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context },
2674         { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context },
2675
2676         /* Interop */
2677         { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 },
2678         { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing },
2679         { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing },
2680
2681         /* P2P buffer copy */
2682         { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p },
2683         { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p_devs },
2684
2685         /* Profiling resolution */
2686         { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL },
2687         { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd },
2688         { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd },
2689
2690         /* Kernel execution capabilities */
2691         { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL },
2692         { CLINFO_BOTH, DINFO(CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, INDENT "Non-uniform work-groups",  bool), dev_is_30 },
2693         { CLINFO_BOTH, DINFO(CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, INDENT "Work-group collective functions",  bool), dev_is_30 },
2694         { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 },
2695         { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd },
2696         { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv },
2697         { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv },
2698         { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv },
2699         { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 },
2700         /* TODO FIXME undocumented, experimental */
2701         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 },
2702         { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 },
2703
2704         /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */
2705         { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il },
2706         { CLINFO_BOTH, DINFO(CL_DEVICE_ILS_WITH_VERSION, INDENT "ILs with version", ext_version), dev_has_ext_ver },
2707         { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir },
2708         { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 },
2709         { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 },
2710         { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, "Built-in kernels with version", ext_version), dev_has_ext_ver },
2711         { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME },
2712         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME },
2713         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME },
2714         { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME },
2715 };
2716
2717 /* Process all the device info in the traits, except if param_whitelist is not NULL,
2718  * in which case only those in the whitelist will be processed.
2719  * If present, the whitelist should be sorted in the order of appearance of the parameters
2720  * in the traits table, and terminated by the value CL_FALSE
2721  */
2722
2723 void
2724 printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p,
2725         const cl_device_info *param_whitelist, /* list of device info to process, or NULL */
2726         const struct opt_out *output)
2727 {
2728         char *extensions = NULL;
2729         size_t ext_len = 0;
2730         char *versioned_extensions = NULL;
2731
2732         /* pointers to the traits for CL_DEVICE_EXTENSIONS and CL_DEVICE_EXTENSIONS_WITH_VERSION */
2733         const struct device_info_traits *extensions_traits = NULL;
2734         const struct device_info_traits *versioned_extensions_traits = NULL;
2735
2736         struct device_info_checks chk;
2737         struct device_info_ret ret;
2738         struct info_loc loc;
2739
2740         memset(&chk, 0, sizeof(chk));
2741         chk.pinfo_checks = plist->platform_checks + p;
2742         chk.dev_version = 10;
2743
2744         INIT_RET(ret, "device");
2745
2746         reset_loc(&loc, __func__);
2747         loc.plat = plist->platform[p];
2748         loc.dev = dev;
2749
2750         for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) {
2751
2752                 const struct device_info_traits *traits = dinfo_traits + loc.line;
2753
2754                 /* checked is true if there was no condition to check for, or if the
2755                  * condition was satisfied
2756                  */
2757                 int checked = !(traits->check_func && !traits->check_func(&chk));
2758
2759                 loc.sname = traits->sname;
2760                 loc.pname = (output->mode == CLINFO_HUMAN ?
2761                         traits->pname : traits->sname);
2762                 loc.param.dev = traits->param;
2763
2764                 /* Whitelist check: finish if done traversing the list,
2765                  * skip current param if it's not the right one
2766                  */
2767                 if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) {
2768                         if (*param_whitelist == CL_FALSE)
2769                                 break;
2770                         if (traits->param != *param_whitelist)
2771                                 continue;
2772                         ++param_whitelist;
2773                 }
2774
2775                 /* skip if it's not for this output mode */
2776                 if (!(output->mode & traits->output_mode))
2777                         continue;
2778
2779                 if (output->cond == COND_PROP_CHECK && !checked)
2780                         continue;
2781
2782                 cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
2783
2784                 reset_strbuf(&ret.str);
2785                 reset_strbuf(&ret.err_str);
2786
2787                 /* Handle headers */
2788                 if (traits->param == CL_FALSE) {
2789                         ret.err = CL_SUCCESS;
2790                         show_strbuf(&ret.str, loc.pname, 0, ret.err);
2791                         continue;
2792                 }
2793
2794                 traits->show_func(&ret, &loc, &chk, output);
2795
2796                 /* Do not print this property if the user requested one and this does not match */
2797                 const cl_bool requested = !(output->prop && strstr(loc.sname, output->prop) == NULL);
2798                 if (traits->param == CL_DEVICE_EXTENSIONS) {
2799                         /* make a backup of the extensions string, regardless of
2800                          * errors and requested, because we need the information
2801                          * to fetch further information */
2802                         const char *msg = RET_BUF(ret)->buf;
2803                         ext_len = strlen(msg);
2804                         extensions_traits = traits;
2805                         /* pad with spaces: this will make it easier to check for extension presence
2806                          * without erroneously matching substrings by simply padding the extension name
2807                          * with spaces.
2808                          */
2809                         ALLOC(extensions, ext_len+3, "extensions");
2810                         memcpy(extensions + 1, msg, ext_len);
2811                         extensions[0] = ' ';
2812                         extensions[ext_len+1] = ' ';
2813                         extensions[ext_len+2] = '\0';
2814                 } else if (traits->param == CL_DEVICE_EXTENSIONS_WITH_VERSION) {
2815                         if (!requested)
2816                                 continue;
2817                         /* This will be displayed at the end, after we display the output of CL_DEVICE_EXTENSIONS */
2818                         const char *msg = RET_BUF(ret)->buf;
2819                         const size_t len = RET_BUF(ret)->sz;
2820                         versioned_extensions_traits = traits;
2821                         ALLOC(versioned_extensions, len, "versioned extensions");
2822                         memcpy(versioned_extensions, msg, len);
2823                 } else if (requested) {
2824                         if (ret.err) {
2825                                 /* if there was an error retrieving the property,
2826                                  * skip if it wasn't expected to work and we
2827                                  * weren't asked to show everything regardless of
2828                                  * error */
2829                                 if (!checked && output->cond != COND_PROP_SHOW)
2830                                         continue;
2831
2832                         } else {
2833                                 /* on success, but empty result, show (n/a) */
2834                                 if (ret.str.buf[0] == '\0')
2835                                         bufcpy(&ret.str, 0, not_specified(output));
2836                         }
2837                         if (output->brief)
2838                                 printf("%s%s\n", line_pfx, RET_BUF(ret)->buf);
2839                         else
2840                                 show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err);
2841                 }
2842
2843                 if (ret.err)
2844                         continue;
2845
2846                 switch (traits->param) {
2847                 case CL_DEVICE_VERSION:
2848                         /* compute numeric value for OpenCL version */
2849                         chk.dev_version = getOpenCLVersion(ret.str.buf + 7);
2850                         break;
2851                 case CL_DEVICE_EXTENSIONS:
2852                         identify_device_extensions(extensions, &chk);
2853                         if (!requested) {
2854                                 free(extensions);
2855                                 extensions = NULL;
2856                         }
2857                         break;
2858                 case CL_DEVICE_TYPE:
2859                         chk.devtype = ret.value.devtype;
2860                         break;
2861                 case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
2862                         chk.cachetype = ret.value.cachetype;
2863                         break;
2864                 case CL_DEVICE_LOCAL_MEM_TYPE:
2865                         chk.lmemtype = ret.value.lmemtype;
2866                         break;
2867                 case CL_DEVICE_IMAGE_SUPPORT:
2868                         chk.image_support = ret.value.b;
2869                         break;
2870                 case CL_DEVICE_COMPILER_AVAILABLE:
2871                         chk.compiler_available = ret.value.b;
2872                         break;
2873                 case CL_DEVICE_NUM_P2P_DEVICES_AMD:
2874                         chk.p2p_num_devs = ret.value.u32;
2875                         break;
2876                 default:
2877                         /* do nothing */
2878                         break;
2879                 }
2880         }
2881
2882         // and finally the extensions, if we retrieved them
2883         if (extensions) {
2884                 // undo the padding
2885                 extensions[ext_len + 1] = '\0';
2886                 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2887                                 extensions_traits->pname :
2888                                 extensions_traits->sname), extensions + 1);
2889         }
2890         if (versioned_extensions) {
2891                 printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ?
2892                                 versioned_extensions_traits->pname :
2893                                 versioned_extensions_traits->sname), versioned_extensions);
2894         }
2895         free(extensions);
2896         free(versioned_extensions);
2897         extensions = NULL;
2898         UNINIT_RET(ret);
2899 }
2900
2901 /* list of allowed properties for AMD offline devices */
2902 /* everything else seems to be set to 0, and all the other string properties
2903  * actually segfault the driver */
2904
2905 static const cl_device_info amd_offline_info_whitelist[] = {
2906         CL_DEVICE_NAME,
2907         /* These are present, but all the same, so just skip them:
2908         CL_DEVICE_VENDOR,
2909         CL_DEVICE_VENDOR_ID,
2910         CL_DEVICE_VERSION,
2911         CL_DRIVER_VERSION,
2912         CL_DEVICE_OPENCL_C_VERSION,
2913         */
2914         CL_DEVICE_EXTENSIONS,
2915         CL_DEVICE_TYPE,
2916         CL_DEVICE_GFXIP_MAJOR_AMD,
2917         CL_DEVICE_GFXIP_MINOR_AMD,
2918         CL_DEVICE_MAX_WORK_GROUP_SIZE,
2919         CL_FALSE
2920 };
2921
2922 static const cl_device_info list_info_whitelist[] = {
2923         CL_DEVICE_NAME,
2924         CL_FALSE
2925 };
2926
2927 /* return a list of offline devices from the AMD extension */
2928 cl_device_id *
2929 fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p,
2930         /* the number of devices will be returned in ret->value.u32,
2931          * the associated context in ret->base.ctx;
2932          */
2933         struct device_info_ret *ret)
2934 {
2935         cl_platform_id pid = plist->platform[p];
2936         cl_device_id *device = NULL;
2937         cl_uint num_devs = 0;
2938         cl_context ctx = NULL;
2939
2940         cl_context_properties ctxpft[] = {
2941                 CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
2942                 CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE,
2943                 0
2944         };
2945
2946         ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL,
2947                 NULL, NULL, &ret->err);
2948         REPORT_ERROR(&ret->err_str, ret->err, "create context");
2949
2950         if (!ret->err) {
2951                 ret->err = REPORT_ERROR(&ret->err_str,
2952                         clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES,
2953                                 sizeof(num_devs), &num_devs, NULL),
2954                         "get num devs");
2955         }
2956
2957         if (!ret->err) {
2958                 ALLOC(device, num_devs, "offline devices");
2959
2960                 ret->err = REPORT_ERROR(&ret->err_str,
2961                         clGetContextInfo(ctx, CL_CONTEXT_DEVICES,
2962                                 num_devs*sizeof(*device), device, NULL),
2963                         "get devs");
2964         }
2965
2966         if (ret->err) {
2967                 if (ctx) clReleaseContext(ctx);
2968                 free(device);
2969                 device = NULL;
2970         } else {
2971                 ret->value.u32 = num_devs;
2972                 ret->base.ctx = ctx;
2973         }
2974         return device;
2975 }
2976
2977 void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str,
2978         const struct opt_out *output)
2979 {
2980         const struct platform_data *pdata = plist->pdata + p;
2981         const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : "");
2982         const char *title = (output->mode == CLINFO_HUMAN  ? pinfo_traits[0].pname :
2983                 pinfo_traits[0].sname);
2984         const int prefix_width = -line_pfx_len*(!output->brief);
2985         if (output->brief) {
2986                 strbuf_printf(str, "%s%" PRIu32 ": ", brief_prefix, p);
2987         } else if (output->mode == CLINFO_RAW) {
2988                 strbuf_printf(str, "[%s/*]", pdata->sname);
2989         }
2990         sprintf(line_pfx, "%*s", prefix_width, str->buf);
2991
2992         if (output->brief)
2993                 printf("%s%s\n", line_pfx, pdata->pname);
2994         else
2995                 printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname);
2996 }
2997
2998 void printPlatformDevices(const struct platform_list *plist, cl_uint p,
2999         const cl_device_id *device, cl_uint ndevs,
3000         struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline)
3001 {
3002         const struct platform_data *pdata = plist->pdata + p;
3003         const cl_device_info *param_whitelist = output->brief ? list_info_whitelist :
3004                 these_are_offline ? amd_offline_info_whitelist : NULL;
3005         cl_uint d;
3006
3007         if (output->detailed)
3008                 printf("%s" I0_STR "%" PRIu32 "\n",
3009                         line_pfx,
3010                         num_devs_header(output, these_are_offline),
3011                         ndevs);
3012
3013         for (d = 0; d < ndevs; ++d) {
3014                 if (output->selected && output->device != d) continue;
3015                 const cl_device_id dev = device[d];
3016                 if (output->brief) {
3017                         const cl_bool last_device = (d == ndevs - 1 &&
3018                                 output->mode != CLINFO_RAW &&
3019                                 (!output->offline ||
3020                                  !pdata->has_amd_offline ||
3021                                  these_are_offline));
3022                         if (output->mode == CLINFO_RAW)
3023                                 sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ",
3024                                         p,
3025                                         these_are_offline ? '*' : '.',
3026                                         d);
3027                         else
3028                                 sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ",
3029                                         these_are_offline ? "Offline " : "",
3030                                         d);
3031                         if (last_device)
3032                                 line_pfx[1] = '`';
3033                 } else if (line_pfx_len > 0) {
3034                         cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d;
3035                         strbuf_printf(str, "[%s/%" PRId32 "]", pdata->sname, sd);
3036                         sprintf(line_pfx, "%*s", -line_pfx_len, str->buf);
3037                 }
3038                 printDeviceInfo(dev, plist, p, param_whitelist, output);
3039                 if (output->detailed && d < pdata[p].ndevs - 1)
3040                         puts("");
3041                 fflush(stdout);
3042                 fflush(stderr);
3043         }
3044 }
3045
3046
3047 void showDevices(const struct platform_list *plist, const struct opt_out *output)
3048 {
3049         const cl_uint num_platforms = plist->num_platforms;
3050         const cl_uint maxdevs = plist->max_devs;
3051         const struct platform_data *pdata = plist->pdata;
3052
3053         cl_uint p;
3054         struct _strbuf str;
3055         init_strbuf(&str, __func__);
3056
3057         if (output->mode == CLINFO_RAW) {
3058                 if (output->brief)
3059                         strbuf_printf(&str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs);
3060                 else
3061                         strbuf_printf(&str, "[%*s/%" PRIu32 "] ",
3062                                 plist->max_sname_len, "", maxdevs);
3063         } else {
3064                 if (output->brief)
3065                         strbuf_printf(&str, " +-- %sDevice #%" PRIu32 ": ",
3066                                 (output->offline ? "Offline " : ""), maxdevs);
3067                 else
3068                         reset_strbuf(&str);
3069                 /* TODO we have no prefix in HUMAN detailed output mode,
3070                  * consider adding one
3071                  */
3072         }
3073
3074         if (str.buf[0]) {
3075                 line_pfx_len = (int)(strlen(str.buf) + 1);
3076                 REALLOC(line_pfx, line_pfx_len, "line prefix");
3077                 reset_strbuf(&str);
3078         }
3079
3080         for (p = 0; p < num_platforms; ++p) {
3081                 /* skip non-selected platforms altogether */
3082                 if (output->selected && output->platform != p) continue;
3083
3084                 /* skip platform header if only printing specfic properties */
3085                 if (!output->prop)
3086                         printPlatformName(plist, p, &str, output);
3087
3088                 printPlatformDevices(plist, p,
3089                         get_platform_devs(plist, p), pdata[p].ndevs,
3090                         &str, output, CL_FALSE);
3091
3092                 if (output->offline && pdata[p].has_amd_offline) {
3093                         struct device_info_ret ret;
3094                         cl_device_id *devs = NULL;
3095
3096                         INIT_RET(ret, "offline device");
3097                         if (output->detailed)
3098                                 puts("");
3099
3100                         devs = fetchOfflineDevicesAMD(plist, p, &ret);
3101                         if (ret.err) {
3102                                 puts(ret.err_str.buf);
3103                         } else {
3104                                 printPlatformDevices(plist, p, devs, ret.value.u32,
3105                                         &str, output, CL_TRUE);
3106                                 clReleaseContext(ret.base.ctx);
3107                                 free(devs);
3108                         }
3109                         UNINIT_RET(ret);
3110                 }
3111                 if (output->detailed)
3112                         puts("");
3113         }
3114         free_strbuf(&str);
3115 }
3116
3117 /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */
3118 void checkNullGetPlatformName(const struct opt_out *output)
3119 {
3120         struct device_info_ret ret;
3121         struct info_loc loc;
3122
3123         INIT_RET(ret, "null ctx");
3124         reset_loc(&loc, __func__);
3125         RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME);
3126
3127         ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL);
3128         if (ret.err == CL_INVALID_PLATFORM) {
3129                 bufcpy(&ret.err_str, 0, no_plat(output));
3130         } else {
3131                 loc.line = __LINE__ + 1;
3132                 REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s");
3133         }
3134         printf(I1_STR "%s\n",
3135                 "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf);
3136         UNINIT_RET(ret);
3137 }
3138
3139 /* check the behavior of clGetDeviceIDs() when given a NULL platform ID;
3140  * return the index of the default platform in our array of platform IDs,
3141  * or num_platforms (which is an invalid platform index) in case of errors
3142  * or no platform or device found.
3143  */
3144 cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output)
3145 {
3146         const cl_uint num_platforms = plist->num_platforms;
3147         const struct platform_data *pdata = plist->pdata;
3148         const cl_platform_id *platform = plist->platform;
3149
3150         struct device_info_ret ret;
3151         struct info_loc loc;
3152
3153         cl_uint i = 0; /* generic iterator */
3154         cl_device_id dev = NULL; /* sample device */
3155         cl_platform_id plat = NULL; /* detected platform */
3156
3157         cl_uint found = 0; /* number of platforms found */
3158         cl_uint pidx = num_platforms; /* index of the platform found */
3159         cl_uint numdevs = 0;
3160
3161         INIT_RET(ret, "null get devices");
3162
3163         reset_loc(&loc, __func__);
3164         loc.sname = "device IDs";
3165
3166         ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs);
3167         /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart
3168          * implementation might give you a different default platform for GPUs
3169          * and for CPUs.
3170          * Of course the “no devices” case would then need to be handled differently.
3171          * The logic might be maintained similarly, provided we also gather
3172          * the number of devices of each type for each platform, although it's
3173          * obviously more likely to have multiple platforms with no devices
3174          * of a given type.
3175          */
3176
3177         switch (ret.err) {
3178         case CL_INVALID_PLATFORM:
3179                 bufcpy(&ret.err_str, 0, no_plat(output));
3180                 break;
3181         case CL_DEVICE_NOT_FOUND:
3182                  /* No devices were found, see if there are platforms with
3183                   * no devices, and if there's only one, assume this is the
3184                   * one being used as default by the ICD loader */
3185                 for (i = 0; i < num_platforms; ++i) {
3186                         if (pdata[i].ndevs == 0) {
3187                                 ++found;
3188                                 if (found > 1)
3189                                         break;
3190                                 else {
3191                                         plat = platform[i];
3192                                         pidx = i;
3193                                 }
3194                         }
3195                 }
3196
3197                 switch (found) {
3198                 case 0:
3199                         bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
3200                                 "<error: 0 devices, no matching platform!>" :
3201                                 "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM"));
3202                         break;
3203                 case 1:
3204                         strbuf_printf(&ret.err_str, "%s%s%s%s",
3205                                 no_dev_found(output),
3206                                 (output->mode == CLINFO_HUMAN ? " [" : " | "),
3207                                 (output->mode == CLINFO_HUMAN ? pdata[pidx].pname : pdata[pidx].sname),
3208                                 (output->mode == CLINFO_HUMAN ? "?]" : "?"));
3209                         break;
3210                 default: /* found > 1 */
3211                         bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ?
3212                                 "<error: 0 devices, multiple matching platforms!>" :
3213                                 "CL_DEVICE_NOT_FOUND | ????"));
3214                         break;
3215                 }
3216                 break;
3217         default:
3218                 loc.line = __LINE__+1;
3219                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break;
3220
3221                 /* Determine platform by looking at the CL_DEVICE_PLATFORM of
3222                  * one of the devices */
3223                 ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL);
3224                 loc.line = __LINE__+1;
3225                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3226
3227                 RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
3228                 ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM,
3229                         sizeof(plat), &plat, NULL);
3230                 loc.line = __LINE__+1;
3231                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3232
3233                 for (i = 0; i < num_platforms; ++i) {
3234                         if (platform[i] == plat) {
3235                                 pidx = i;
3236                                 strbuf_printf(&ret.str, "%s [%s]",
3237                                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3238                                         pdata[i].sname);
3239                                 break;
3240                         }
3241                 }
3242                 if (i == num_platforms) {
3243                         ret.err = CL_INVALID_PLATFORM;
3244                         strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
3245                 }
3246         }
3247         printf(I1_STR "%s\n",
3248                 "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf);
3249
3250         UNINIT_RET(ret);
3251         return pidx;
3252 }
3253
3254 void checkNullCtx(struct device_info_ret *ret,
3255         const struct platform_list *plist, cl_uint pidx, const char *which,
3256         const struct opt_out *output)
3257 {
3258         const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx];
3259         struct info_loc loc;
3260         cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err);
3261
3262         reset_loc(&loc, __func__);
3263         loc.sname = which;
3264         loc.line = __LINE__+2;
3265
3266         if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform"))
3267                 strbuf_printf(&ret->str, "%s [%s]",
3268                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3269                         plist->pdata[pidx].sname);
3270         if (ctx) {
3271                 clReleaseContext(ctx);
3272                 ctx = NULL;
3273         }
3274 }
3275
3276 /* check behavior of clCreateContextFromType() with NULL cl_context_properties */
3277 void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output)
3278 {
3279         const cl_uint num_platforms = plist->num_platforms;
3280         const struct platform_data *pdata = plist->pdata;
3281         const cl_platform_id *platform = plist->platform;
3282
3283         size_t t; /* type iterator */
3284         size_t i; /* generic iterator */
3285         char def[1024];
3286         cl_context ctx = NULL;
3287
3288         size_t ndevs = 8;
3289         size_t szval = 0;
3290         size_t cursz = ndevs*sizeof(cl_device_id);
3291         cl_platform_id plat = NULL;
3292         cl_device_id *devs = NULL;
3293
3294         struct device_info_ret ret;
3295         struct info_loc loc;
3296
3297         const char *platname_prop = (output->mode == CLINFO_HUMAN ?
3298                 pinfo_traits[0].pname :
3299                 pinfo_traits[0].sname);
3300
3301         const char *devname_prop = (output->mode == CLINFO_HUMAN ?
3302                 dinfo_traits[0].pname :
3303                 dinfo_traits[0].sname);
3304
3305         reset_loc(&loc, __func__);
3306         INIT_RET(ret, "null ctx from type");
3307
3308         ALLOC(devs, ndevs, "context devices");
3309
3310         for (t = 1; t < devtype_count; ++t) { /* we skip 0 */
3311                 loc.sname = device_type_raw_str[t];
3312
3313                 strbuf_printf(&ret.str, "clCreateContextFromType(NULL, %s)", loc.sname);
3314                 sprintf(def, I1_STR, ret.str.buf);
3315
3316                 loc.line = __LINE__+1;
3317                 ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err);
3318
3319                 switch (ret.err) {
3320                 case CL_INVALID_PLATFORM:
3321                         bufcpy(&ret.err_str, 0, no_plat(output)); break;
3322                 case CL_DEVICE_NOT_FOUND:
3323                         bufcpy(&ret.err_str, 0, no_dev_found(output)); break;
3324                 case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */
3325                         bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
3326                 case CL_INVALID_VALUE: /* This is what apple returns for the case above */
3327                         bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break;
3328                 case CL_DEVICE_NOT_AVAILABLE:
3329                         bufcpy(&ret.err_str, 0, no_dev_avail(output)); break;
3330                 default:
3331                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break;
3332
3333                         /* get the devices */
3334                         loc.sname = "CL_CONTEXT_DEVICES";
3335                         loc.line = __LINE__+2;
3336
3337                         ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval);
3338                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break;
3339                         if (szval > cursz) {
3340                                 REALLOC(devs, szval, "context devices");
3341                                 cursz = szval;
3342                         }
3343
3344                         loc.line = __LINE__+1;
3345                         ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL);
3346                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3347                         ndevs = szval/sizeof(cl_device_id);
3348                         if (ndevs < 1) {
3349                                 ret.err = CL_DEVICE_NOT_FOUND;
3350                                 bufcpy(&ret.err_str, 0, "<error: context created with no devices>");
3351                         }
3352
3353                         /* get the platform from the first device */
3354                         RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM);
3355                         loc.line = __LINE__+1;
3356                         ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
3357                         if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3358                         loc.plat = plat;
3359
3360                         szval = 0;
3361                         for (i = 0; i < num_platforms; ++i) {
3362                                 if (platform[i] == plat)
3363                                         break;
3364                         }
3365                         if (i == num_platforms) {
3366                                 ret.err = CL_INVALID_PLATFORM;
3367                                 strbuf_printf(&ret.err_str, "<error: platform %p not found>", (void*)plat);
3368                                 break;
3369                         } else {
3370                                 szval += strbuf_printf(&ret.str, "%s (%" PRIuS ")",
3371                                         (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
3372                                         ndevs);
3373                                 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR "%s",
3374                                         platname_prop, pdata[i].pname);
3375                         }
3376                         for (i = 0; i < ndevs; ++i) {
3377                                 size_t szname = 0;
3378                                 /* for each device, show the device name */
3379                                 /* TODO some other unique ID too, e.g. PCI address, if available? */
3380
3381                                 szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR, devname_prop);
3382                                 if (szval >= ret.str.sz) {
3383                                         trunc_strbuf(&ret.str);
3384                                         break;
3385                                 }
3386
3387                                 RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME);
3388                                 loc.dev = devs[i];
3389                                 loc.line = __LINE__+1;
3390                                 ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - szval, ret.str.buf + szval, &szname);
3391                                 if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break;
3392                                 szval += szname - 1;
3393                         }
3394                         if (i != ndevs)
3395                                 break; /* had an error earlier, bail */
3396
3397                 }
3398
3399                 if (ctx) {
3400                         clReleaseContext(ctx);
3401                         ctx = NULL;
3402                 }
3403                 printf("%s%s\n", def, RET_BUF(ret)->buf);
3404         }
3405         free(devs);
3406         UNINIT_RET(ret);
3407 }
3408
3409 /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices)
3410  * and in clCreateContext() */
3411 void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output)
3412 {
3413         const cl_uint num_platforms = plist->num_platforms;
3414         const struct platform_data *pdata = plist->pdata;
3415
3416         cl_uint p = 0;
3417         struct device_info_ret ret;
3418
3419         INIT_RET(ret, "null behavior");
3420
3421         printf("NULL platform behavior\n");
3422
3423         checkNullGetPlatformName(output);
3424
3425         p = checkNullGetDevices(plist, output);
3426
3427         /* If there's a default platform, and it has devices, try
3428          * creating a context with its first device and see if it works */
3429
3430         if (p == num_platforms) {
3431                 ret.err = CL_INVALID_PLATFORM;
3432                 bufcpy(&ret.err_str, 0, no_plat(output));
3433         } else if (pdata[p].ndevs == 0) {
3434                 ret.err = CL_DEVICE_NOT_FOUND;
3435                 bufcpy(&ret.err_str, 0, no_dev_found(output));
3436         } else {
3437                 if (p < num_platforms) {
3438                         checkNullCtx(&ret, plist, p, "default", output);
3439                 } else {
3440                         /* this shouldn't happen, but still ... */
3441                         ret.err = CL_OUT_OF_HOST_MEMORY;
3442                         bufcpy(&ret.err_str, 0, "<error: overflow in default platform scan>");
3443                 }
3444         }
3445         printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf);
3446
3447         /* Look for a device from a non-default platform, if there are any */
3448         if (p == num_platforms || num_platforms > 1) {
3449                 cl_uint p2 = 0;
3450                 while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) {
3451                         p2++;
3452                 }
3453                 if (p2 < num_platforms) {
3454                         checkNullCtx(&ret, plist, p2, "non-default", output);
3455                 } else {
3456                         ret.err = CL_DEVICE_NOT_FOUND;
3457                         bufcpy(&ret.str, 0, "<error: no devices in non-default plaforms>");
3458                 }
3459                 printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf);
3460         }
3461
3462         checkNullCtxFromType(plist, output);
3463
3464         UNINIT_RET(ret);
3465 }
3466
3467
3468 /* Get properties of the ocl-icd loader, if available */
3469 /* All properties are currently char[] */
3470
3471 /* Function pointer to the ICD loader info function */
3472
3473 typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*);
3474 icdl_info_fn_ptr clGetICDLoaderInfoOCLICD;
3475
3476 /* We want to auto-detect the OpenCL version supported by the ICD loader.
3477  * To do this, we will progressively find symbols introduced in new APIs,
3478  * until a NULL symbol is found.
3479  */
3480
3481 struct icd_loader_test {
3482         cl_uint version;
3483         const char *symbol;
3484 } icd_loader_tests[] = {
3485         { 11, "clCreateSubBuffer" },
3486         { 12, "clCreateImage" },
3487         { 20, "clSVMAlloc" },
3488         { 21, "clGetHostTimer" },
3489         { 22, "clSetProgramSpecializationConstant" },
3490         { 30, "clSetContextDestructorCallback" },
3491         { 0, NULL }
3492 };
3493
3494 void
3495 icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc)
3496 {
3497         GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl);
3498         return;
3499 }
3500
3501 struct icdl_info_traits {
3502         cl_icdl_info param; // CL_ICDL_*
3503         const char *sname; // "CL_ICDL_*"
3504         const char *pname; // "ICD loader *"
3505 };
3506
3507 static const char * const oclicdl_pfx = "OCLICD";
3508
3509 #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name }
3510 struct icdl_info_traits linfo_traits[] = {
3511         LINFO(CL_ICDL_NAME, "Name"),
3512         LINFO(CL_ICDL_VENDOR, "Vendor"),
3513         LINFO(CL_ICDL_VERSION, "Version"),
3514         LINFO(CL_ICDL_OCL_VERSION, "Profile")
3515 };
3516
3517 /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress,
3518  * which returns a void pointer.
3519  * ISO C forbids assignments between function pointers and void pointers,
3520  * but POSIX allows it. To compile without warnings even in -pedantic mode,
3521  * we take advantage of the fact that we _can_ do the conversion via
3522  * pointers-to-pointers. This is supported on most compilers, except
3523  * for some rather old GCC versions whose strict aliasing rules are
3524  * too strict. Disable strict aliasing warnings for these compilers.
3525  */
3526 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3527 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
3528 #endif
3529
3530 struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output)
3531 {
3532         const cl_uint max_plat_version = plist->max_plat_version;
3533
3534         struct icdl_data icdl;
3535
3536         /* Counter that'll be used to walk the icd_loader_tests */
3537         int i = 0;
3538
3539         /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used
3540          * to query the ICD loader properties.
3541          * It should be noted that in this specific case we cannot replace the
3542          * call to clGetExtensionFunctionAddress with a call to the superseding function
3543          * clGetExtensionFunctionAddressForPlatform because the extension is in the
3544          * loader itself, not in a specific platform.
3545          */
3546         void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD");
3547         clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack);
3548
3549         /* Initialize icdl_data ret versions */
3550         icdl.detected_version = 10;
3551         icdl.reported_version = 0;
3552
3553         /* clinfo may lag behind the OpenCL standard or loader version,
3554          * and we don't want to give a warning if we can't tell if the loader
3555          * correctly supports a version unknown to us
3556          */
3557         cl_uint clinfo_highest_known_version = 0;
3558
3559         /* Step #1: try to auto-detect the supported ICD loader version */
3560         do {
3561                 struct icd_loader_test check = icd_loader_tests[i];
3562                 if (check.symbol == NULL)
3563                         break;
3564                 if (dlsym(DL_MODULE, check.symbol) == NULL)
3565                         break;
3566                 clinfo_highest_known_version = icdl.detected_version = check.version;
3567                 ++i;
3568         } while (1);
3569
3570         /* Step #2: query properties from extension, if available */
3571         if (clGetICDLoaderInfoOCLICD != NULL) {
3572                 struct info_loc loc;
3573                 struct icdl_info_ret ret;
3574                 reset_loc(&loc, __func__);
3575                 INIT_RET(ret, "ICD loader");
3576
3577                 /* TODO think of a sensible header in CLINFO_RAW */
3578                 if (output->mode != CLINFO_RAW)
3579                         puts("\nICD loader properties");
3580
3581                 if (output->mode == CLINFO_RAW) {
3582                         line_pfx_len = (int)(strlen(oclicdl_pfx) + 5);
3583                         REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD");
3584                         strbuf_printf(&ret.str, "[%s/*]", oclicdl_pfx);
3585                         sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf);
3586                 }
3587
3588                 for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) {
3589                         const struct icdl_info_traits *traits = linfo_traits + loc.line;
3590                         loc.sname = traits->sname;
3591                         loc.pname = (output->mode == CLINFO_HUMAN ?
3592                                 traits->pname : traits->sname);
3593                         loc.param.icdl = traits->param;
3594
3595                         reset_strbuf(&ret.str);
3596                         reset_strbuf(&ret.err_str);
3597                         icdl_info_str(&ret, &loc);
3598
3599                         /* Do not print this property if the user requested one and this does not match */
3600                         const cl_bool requested = !(output->prop && strstr(loc.sname, output->prop) == NULL);
3601                         if (requested)
3602                                 show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err);
3603
3604                         if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) {
3605                                 icdl.reported_version = getOpenCLVersion(ret.str.buf + 7);
3606                         }
3607                 }
3608                 UNINIT_RET(ret);
3609         }
3610
3611         /* Step #3: show it */
3612         if (output->mode == CLINFO_HUMAN) {
3613                 if (icdl.reported_version &&
3614                         icdl.reported_version <= clinfo_highest_known_version &&
3615                         icdl.reported_version != icdl.detected_version) {
3616                         printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3617                                 "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n",
3618                                 SPLIT_CL_VERSION(icdl.reported_version),
3619                                 SPLIT_CL_VERSION(icdl.detected_version),
3620                                 icdl.detected_version < icdl.reported_version  ?
3621                                 "only" : "too");
3622                 }
3623
3624                 // for the loader vs platform max version check we use the version we detected
3625                 // if the reported version is known to us, and the reported version if it's higher
3626                 // than the standard versions we know about
3627                 cl_uint max_version_check = icdl.reported_version > clinfo_highest_known_version ?
3628                         icdl.reported_version : icdl.detected_version;
3629                 if (max_version_check < max_plat_version) {
3630                         printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n"
3631                                 "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n"
3632                                 "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n"
3633                                 "\t\tor behave unexpectedly\n",
3634                                 SPLIT_CL_VERSION(icdl.detected_version),
3635                                 SPLIT_CL_VERSION(max_plat_version),
3636                                 SPLIT_CL_VERSION(max_plat_version));
3637                 }
3638         }
3639         return icdl;
3640 }
3641
3642 #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
3643 #pragma GCC diagnostic warning "-Wstrict-aliasing"
3644 #endif
3645
3646 void version(void)
3647 {
3648         puts("clinfo version 3.0.20.11.20");
3649 }
3650
3651 void parse_device_spec(const char *str, struct opt_out *output)
3652 {
3653         if (!str) {
3654                 fprintf(stderr, "please specify a device in the form P:D where P is the platform number and D the device number\n");
3655                 exit(1);
3656         }
3657         int p, d;
3658         int n = sscanf(str, "%d:%d", &p, &d);
3659         if (n != 2 || p < 0 || d < 0) {
3660                 fprintf(stderr, "invalid device specification '%s'\n", str);
3661                 exit(1);
3662         }
3663         output->platform = p;
3664         output->device = d;
3665 }
3666
3667 void parse_prop(const char *input, struct opt_out *output)
3668 {
3669         /* We normalize the property name by upcasing it and replacing the minus sign (-)
3670          * with the underscore (_). If any other character is found, we consider it an error
3671          */
3672
3673         size_t len = strlen(input);
3674         char *normalized;
3675         ALLOC(normalized, len+1, "normalized property name");
3676         for (size_t i = 0; i < len; ++i)
3677         {
3678                 char c = input[i];
3679                 if ( (c == '_') || ( c >= 'A' && c <= 'Z'))
3680                         normalized[i] = c;
3681                 else if (c >= 'a' && c <= 'z')
3682                         normalized[i] = 'A' + (c - 'a');
3683                 else if (c == '-')
3684                         normalized[i] = '_';
3685                 else {
3686                         fprintf(stderr, "invalid property name substring '%s'\n", input);
3687                         exit(1);
3688                 }
3689         }
3690         output->prop = normalized;
3691 }
3692
3693 void usage(void)
3694 {
3695         version();
3696         puts("Display properties of all available OpenCL platforms and devices");
3697         puts("Usage: clinfo [options ...]\n");
3698         puts("Options:");
3699         puts("\t--all-props, -a\t\ttry all properties, only show valid ones");
3700         puts("\t--always-all-props, -At\tshow all properties, even if invalid");
3701         puts("\t--human\t\thuman-friendly output (default)");
3702         puts("\t--raw\t\traw output");
3703         puts("\t--offline\talso show offline devices");
3704         puts("\t--list, -l\tonly list the platforms and devices by name");
3705         puts("\t--prop prop-name\tonly list properties matching the given name");
3706         puts("\t--device p:d,");
3707         puts("\t-d p:d\t\tonly show information about device number d from platform number p");
3708         puts("\t-h, -?\t\tshow usage");
3709         puts("\t--version, -v\tshow version\n");
3710         puts("Defaults to raw mode if invoked with");
3711         puts("a name that contains the string \"raw\"");
3712 }
3713
3714 int main(int argc, char *argv[])
3715 {
3716         cl_uint p;
3717         cl_int err;
3718         int a = 0;
3719
3720         struct opt_out output;
3721
3722         struct platform_list plist;
3723         init_plist(&plist);
3724
3725         output.platform = CL_UINT_MAX;
3726         output.device = CL_UINT_MAX;
3727         output.prop = NULL;
3728         output.mode = CLINFO_HUMAN;
3729         output.cond = COND_PROP_CHECK;
3730         output.brief = CL_FALSE;
3731         output.offline = CL_FALSE;
3732         output.check_size = CL_FALSE;
3733
3734         /* if there's a 'raw' in the program name, switch to raw output mode */
3735         if (strstr(argv[0], "raw"))
3736                 output.mode = CLINFO_RAW;
3737
3738         /* process command-line arguments */
3739         while (++a < argc) {
3740                 if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props"))
3741                         output.cond = COND_PROP_TRY;
3742                 else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props"))
3743                         output.cond = COND_PROP_SHOW;
3744                 else if (!strcmp(argv[a], "--raw"))
3745                         output.mode = CLINFO_RAW;
3746                 else if (!strcmp(argv[a], "--human"))
3747                         output.mode = CLINFO_HUMAN;
3748                 else if (!strcmp(argv[a], "--offline"))
3749                         output.offline = CL_TRUE;
3750                 else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list"))
3751                         output.brief = CL_TRUE;
3752                 else if (!strcmp(argv[a], "-d") || !strcmp(argv[a], "--device")) {
3753                         ++a;
3754                         parse_device_spec(argv[a], &output);
3755                 } else if (!strncmp(argv[a], "-d", 2)) {
3756                         parse_device_spec(argv[a] + 2, &output);
3757                 } else if (!strcmp(argv[a], "--prop")) {
3758                         ++a;
3759                         parse_prop(argv[a], &output);
3760                 } else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) {
3761                         usage();
3762                         return 0;
3763                 } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) {
3764                         version();
3765                         return 0;
3766                 } else {
3767                         fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]);
3768                 }
3769         }
3770         /* If a property was specified, we only print in RAW mode */
3771         if (output.prop)
3772                 output.mode = CLINFO_RAW;
3773         output.selected = (output.device != CL_UINT_MAX);
3774         output.detailed = !output.brief && !output.selected && !output.prop;
3775
3776         err = clGetPlatformIDs(0, NULL, &plist.num_platforms);
3777         if (err != CL_PLATFORM_NOT_FOUND_KHR)
3778                 CHECK_ERROR(err, "number of platforms");
3779
3780         if (output.detailed)
3781                 printf(I0_STR "%" PRIu32 "\n",
3782                         (output.mode == CLINFO_HUMAN ?
3783                          "Number of platforms" : "#PLATFORMS"),
3784                         plist.num_platforms);
3785         if (!plist.num_platforms)
3786                 return 0;
3787
3788         alloc_plist(&plist);
3789         err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL);
3790         CHECK_ERROR(err, "platform IDs");
3791
3792         ALLOC(line_pfx, 1, "line prefix");
3793
3794         for (p = 0; p < plist.num_platforms; ++p) {
3795                 // skip non-selected platforms altogether
3796                 if (output.selected && output.platform != p) continue;
3797                 gatherPlatformInfo(&plist, p, &output);
3798                 if (output.detailed)
3799                         puts("");
3800         }
3801         showDevices(&plist, &output);
3802         if (output.prop || (output.detailed && !output.selected)) {
3803                 if (output.mode != CLINFO_RAW)
3804                         checkNullBehavior(&plist, &output);
3805                 oclIcdProps(&plist, &output);
3806         }
3807
3808         free_plist(&plist);
3809         free(line_pfx);
3810         free((char*)output.prop);
3811         return 0;
3812 }