1 /* usual C/C++ includes */
3 #include <math.h> // for ceil()
13 void check_ocl_error(const cl_int &error, const char *message) {
14 if (error != CL_SUCCESS) {
15 fprintf(stderr, "error %d %s\n", error, message);
20 char *read_file(const char *fname) {
21 size_t fsize, readsize;
24 FILE *fd = fopen(fname, "rb");
26 fprintf(stderr, "%s not found\n", fname);
30 fseek(fd, 0, SEEK_END);
33 buff = (char *)malloc(fsize+1);
35 readsize = fread(buff, 1, fsize, fd);
36 if (fsize != readsize) {
37 fprintf(stderr, "could only read %lu/%lu bytes from %s\n",
38 readsize, fsize, fname);
44 printf("read %lu bytes from %s\n", fsize, fname);
51 int main(int argc, char **argv) {
54 if (SYMBOLS + 1 != sizeof(alphabet)) {
55 fprintf(stderr, "Wrong alphabet: %s has %lu symbols, expected %d\n",
56 alphabet, sizeof(alphabet) - 1, SYMBOLS);
60 char page[CHARS_PER_PAGE+1];
61 page[CHARS_PER_PAGE] = '\0';
64 /* auxiliary buffer to read platform and device info */
67 /* platform selection */
68 cl_uint num_platforms = 0;
69 cl_platform_id *platform_list = NULL;
70 cl_platform_id platform = NULL;
72 clGetPlatformIDs(0, NULL, &num_platforms); // retrieve number of platform IDs
73 platform_list = (cl_platform_id *)calloc(num_platforms, sizeof(cl_platform_id));
74 cl_int error = clGetPlatformIDs(num_platforms, platform_list, NULL); // retrieve the actual platform IDs
76 /* a quicker way if we are only interested in the first/default platform, ID, would be to have:
77 clGetPlatformIDs(1, &platform, NULL);
80 check_ocl_error(error, "getting platform IDs");
82 printf("%d OpenCL platforms found:\n", num_platforms);
84 for (cl_uint i = 0; i < num_platforms; ++i) {
85 /* last param: actual size of the query result */
86 error = clGetPlatformInfo(platform_list[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
87 check_ocl_error(error, "getting platform name");
88 printf("\tplatform %u: %s ", i, buffer);
89 error = clGetPlatformInfo(platform_list[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, NULL);
90 check_ocl_error(error, "getting platform vendor");
91 printf(" (%s)\n", buffer);
96 platnum = atoi(argv[1]);
97 platform = platform_list[platnum];
98 printf("using platform %u\n", platnum);
100 /* device selection */
102 cl_uint num_devs = 0;
103 cl_device_id *device_list = NULL;
104 cl_device_id device = NULL;
106 /* possible types: CPU, GPU, ACCELERATOR, DEFAULT, ALL */
107 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs);
108 device_list = (cl_device_id *)calloc(num_devs, sizeof(cl_device_id));
109 error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devs, device_list, NULL);
111 check_ocl_error(error, "getting device IDs");
113 printf("%d devices found:\n", num_devs);
115 for (cl_uint i = 0; i < num_devs; ++i) {
116 /* last param: actual size of the query result */
117 error = clGetDeviceInfo(device_list[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
118 check_ocl_error(error, "getting device name");
119 printf("\tdevice %u: %s\n", i, buffer);
124 devnum = atoi(argv[2]);
125 device = device_list[devnum];
126 printf("using device %u\n", devnum);
128 /* creating a context for one devices */
130 cl_context_properties ctx_prop[] = {
131 CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
135 cl_context ctx = clCreateContext(ctx_prop, 1, &device, NULL, NULL, &error);
136 check_ocl_error(error, "creating context");
138 /* and a command queue to go with it */
139 cl_command_queue queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &error);
140 check_ocl_error(error, "creating command queue");
143 /* allocate device memory */
146 page_d = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, CHARS_PER_PAGE, NULL, &error);
147 check_ocl_error(error, "allocating device page memory buffer");
149 /* load and build program */
150 char *prog_source = read_file("babel.cl");
151 if (prog_source == NULL)
154 cl_program program = clCreateProgramWithSource(ctx, 1, (const char **)&prog_source, NULL, &error);
155 check_ocl_error(error, "creating program");
157 /* AMD APP doesn't include the current directory by default, apparently */
158 const char *clopts = "-I.";
159 error = clBuildProgram(program,
160 1, &device, // device(s)
161 clopts, // compiler options
164 if (error == CL_BUILD_PROGRAM_FAILURE) {
167 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
168 check_ocl_error(error, "getting program build info size");
169 log = (char *)malloc(logSize);
170 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
171 check_ocl_error(error, "getting program build info");
176 check_ocl_error(error, "building program");
179 /* loading the kernel */
180 cl_kernel fillpageKernel = clCreateKernel(program, "fillpage", &error);
181 check_ocl_error(error, "creating kernel");
183 error = clSetKernelArg(fillpageKernel, 0, sizeof(page_d), &page_d);
184 check_ocl_error(error, "setting kernel param 0");
187 size_t group_size = CHARS_PER_LINE;
189 group_size = atoi(argv[3]);
191 /* work_size must be a multiple of group_size */
192 size_t work_size = ceil(float(CHARS_PER_PAGE)/group_size)*group_size;
194 /* launch kernel, with an event to collect profiling info */
195 cl_ulong startTime, endTime;
198 clEnqueueNDRangeKernel(queue, fillpageKernel,
200 NULL, &work_size, &group_size,
204 error = clFinish(queue); // sync on queue
205 check_ocl_error(error, "finishing queue");
207 clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL);
208 error = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL);
209 check_ocl_error(error, "getting profiling info");
211 printf("Kernel runtime: %gms\n", double(endTime-startTime)/1000000);
213 /* copy memory down */
214 error = clEnqueueReadBuffer(queue, page_d, true, 0, CHARS_PER_PAGE, page, 0, NULL, NULL);
215 check_ocl_error(error, "getting results");
219 clReleaseMemObject(page_d);
221 clReleaseProgram(program);
222 clReleaseCommandQueue(queue);
223 clReleaseContext(ctx);
225 for(size_t line = 0; line < LINES_PER_PAGE; ++line) {
226 for(size_t col = 0; col < CHARS_PER_LINE; ++col) {
227 putchar(page[line*CHARS_PER_LINE + col]);