1 /* usual C/C++ includes */
3 #include <math.h> // for ceil()
13 void check_ocl_error(const cl_int &error, const char *message) {
14 if (error != CL_SUCCESS) {
15 fprintf(stderr, "error %d %s\n", error, message);
20 char *read_file(const char *fname) {
21 size_t fsize, readsize;
24 FILE *fd = fopen(fname, "rb");
26 fprintf(stderr, "%s not found\n", fname);
30 fseek(fd, 0, SEEK_END);
33 buff = (char *)malloc(fsize+1);
35 readsize = fread(buff, 1, fsize, fd);
36 if (fsize != readsize) {
37 fprintf(stderr, "could only read %lu/%lu bytes from %s\n",
38 readsize, fsize, fname);
44 printf("read %lu bytes from %s\n", fsize, fname);
51 int main(int argc, char **argv) {
54 if (SYMBOLS + 1 != sizeof(alphabet)) {
55 fprintf(stderr, "Wrong alphabet: %s has %lu symbols, expected %d\n",
56 alphabet, sizeof(alphabet) - 1, SYMBOLS);
60 /* prepare enc_h data */
61 const char *str="CICERENELLA TENEVA NU GALLO, E TUTTA NOTTE CI IEVA A CAVALLO. ";
62 cl_uint numpacks = encode_string(str, NULL);
63 size_t enc_size = numpacks*sizeof(ushort);
64 packel *enc_h = (ushort *)calloc(numpacks,sizeof(packel));
65 encode_string(str, enc_h);
67 size_t text_size = SYM_PER_PACKEL*numpacks;
68 cl_uint page_size = CHARS_PER_PAGE;
69 while (page_size < text_size)
70 page_size += CHARS_PER_PAGE;
71 char *page = (char *)calloc(page_size+1, 1);
73 /* auxiliary buffer to read platform and device info */
76 /* platform selection */
77 cl_uint num_platforms = 0;
78 cl_platform_id *platform_list = NULL;
79 cl_platform_id platform = NULL;
81 clGetPlatformIDs(0, NULL, &num_platforms); // retrieve number of platform IDs
82 platform_list = (cl_platform_id *)calloc(num_platforms, sizeof(cl_platform_id));
83 cl_int error = clGetPlatformIDs(num_platforms, platform_list, NULL); // retrieve the actual platform IDs
85 /* a quicker way if we are only interested in the first/default platform, ID, would be to have:
86 clGetPlatformIDs(1, &platform, NULL);
89 check_ocl_error(error, "getting platform IDs");
91 printf("%d OpenCL platforms found:\n", num_platforms);
93 for (cl_uint i = 0; i < num_platforms; ++i) {
94 /* last param: actual size of the query result */
95 error = clGetPlatformInfo(platform_list[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
96 check_ocl_error(error, "getting platform name");
97 printf("\tplatform %u: %s ", i, buffer);
98 error = clGetPlatformInfo(platform_list[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, NULL);
99 check_ocl_error(error, "getting platform vendor");
100 printf(" (%s)\n", buffer);
105 platnum = atoi(argv[1]);
106 platform = platform_list[platnum];
107 printf("using platform %u\n", platnum);
109 /* device selection */
111 cl_uint num_devs = 0;
112 cl_device_id *device_list = NULL;
113 cl_device_id device = NULL;
115 /* possible types: CPU, GPU, ACCELERATOR, DEFAULT, ALL */
116 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs);
117 device_list = (cl_device_id *)calloc(num_devs, sizeof(cl_device_id));
118 error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devs, device_list, NULL);
120 check_ocl_error(error, "getting device IDs");
122 printf("%d devices found:\n", num_devs);
124 for (cl_uint i = 0; i < num_devs; ++i) {
125 /* last param: actual size of the query result */
126 error = clGetDeviceInfo(device_list[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
127 check_ocl_error(error, "getting device name");
128 printf("\tdevice %u: %s\n", i, buffer);
133 devnum = atoi(argv[2]);
134 device = device_list[devnum];
135 printf("using device %u\n", devnum);
137 /* creating a context for one devices */
139 cl_context_properties ctx_prop[] = {
140 CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
144 cl_context ctx = clCreateContext(ctx_prop, 1, &device, NULL, NULL, &error);
145 check_ocl_error(error, "creating context");
147 /* and a command queue to go with it */
148 cl_command_queue queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &error);
149 check_ocl_error(error, "creating command queue");
152 /* allocate device memory */
153 cl_mem page_d, enc_d;
155 page_d = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, page_size, NULL, &error);
156 check_ocl_error(error, "allocating device page memory buffer");
158 enc_d = clCreateBuffer(ctx, CL_MEM_READ_ONLY, enc_size, NULL, &error);
159 check_ocl_error(error, "allocating device page memory buffer");
162 error = clEnqueueWriteBuffer(queue, enc_d, false, 0, enc_size, enc_h, 0, NULL, NULL);
163 check_ocl_error(error, "loading encoded data");
165 /* load and build program */
166 char *prog_source = read_file("babel.cl");
167 if (prog_source == NULL)
170 cl_program program = clCreateProgramWithSource(ctx, 1, (const char **)&prog_source, NULL, &error);
171 check_ocl_error(error, "creating program");
173 /* AMD APP doesn't include the current directory by default, apparently */
174 const char *clopts = "-I.";
175 error = clBuildProgram(program,
176 1, &device, // device(s)
177 clopts, // compiler options
180 if (error == CL_BUILD_PROGRAM_FAILURE) {
183 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
184 check_ocl_error(error, "getting program build info size");
185 log = (char *)malloc(logSize);
186 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
187 check_ocl_error(error, "getting program build info");
192 check_ocl_error(error, "building program");
195 size_t group_size = 64;
197 group_size = atoi(argv[3]);
199 /* work_size must be a multiple of group_size */
200 size_t work_size = ceil(float((page_size+SYM_PER_PACKEL-1)/SYM_PER_PACKEL)/group_size)*group_size;
202 printf("work size %lu, group size %lu\n", work_size, group_size);
204 /* loading the kernel */
205 cl_kernel decode_string = clCreateKernel(program, "decode_string", &error);
206 check_ocl_error(error, "creating kernel");
208 error = clSetKernelArg(decode_string, 0, sizeof(page_d), &page_d);
209 check_ocl_error(error, "setting kernel param 0");
211 error = clSetKernelArg(decode_string, 1, sizeof(enc_d), &enc_d);
212 check_ocl_error(error, "setting kernel param 1");
214 error = clSetKernelArg(decode_string, 2, sizeof(page_size), &page_size);
215 check_ocl_error(error, "setting kernel param 2");
217 error = clSetKernelArg(decode_string, 3, sizeof(numpacks), &numpacks);
218 check_ocl_error(error, "setting kernel param 3");
220 /* launch kernel, with an event to collect profiling info */
221 cl_ulong startTime, endTime;
224 clFinish(queue); // wait for memory transfers to finish
226 clEnqueueNDRangeKernel(queue, decode_string,
228 NULL, &work_size, &group_size,
232 error = clFinish(queue); // sync on queue
233 check_ocl_error(error, "finishing queue");
235 clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL);
236 error = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL);
237 check_ocl_error(error, "getting profiling info");
239 printf("Kernel runtime: %gms\n", double(endTime-startTime)/1000000);
241 /* copy memory down */
242 error = clEnqueueReadBuffer(queue, page_d, true, 0, page_size, page, 0, NULL, NULL);
243 check_ocl_error(error, "getting results");
247 clReleaseMemObject(page_d);
248 clReleaseMemObject(enc_d);
250 clReleaseProgram(program);
251 clReleaseCommandQueue(queue);
252 clReleaseContext(ctx);
254 for (size_t l = 0; l < LINES_PER_PAGE; ++l) {
255 for (size_t col = 0; col < CHARS_PER_LINE; ++col) {
256 putchar(page[l*CHARS_PER_LINE + col]);