1 /* This header defines OpenCL data types that can handle the immensity
2 * of possible lines/pages/books in the Library of Babel */
9 /* There are 25^80, which is slightly less than 2^372, possible lines.
10 * 372 happens to fit 11.625 times in 32, so we can get by with an
11 * uint16 to encode all possible lines
14 #ifdef __OPENCL_VERSION__
15 typedef uint16 line_t;
17 typedef cl_uint16 line_t;
21 If we encode each symbol (index) in 5 bits (2^5 = 32 > 25), we get 8
22 symbols every 5 bytes, and 80 symbols (a line) in 50 bytes, which
23 fits happily within the uint16 (4×16 = 64). However, decoding the
24 i-th character requires shifts and/or bitwise ANDs (with 0x1f) across
25 vector components. A mess. We don't do that.
27 A slightly better approach still uses 5 bits per symbol, but only
28 gets 6 symbols every 4 bytes (i.e. each uint). To get 80 symbols we
29 then need 13 uints plus something. Or, put differently, we can encode
30 up to 6×16 = 96 symbols in one line_t. With this strategy, we
31 can encode a line wit just 14 uints (84 symbols).
34 - an uint encodes 6 symbols, using only 30 of the 32 available bits.
35 The two extra bits should be zero, until we find interesting
36 information to encode there.
37 - an uint16 only uses 14 of the 16 uints to encode symbols. The other
38 two uints should be left to 0, again until we find interesting
39 information to encode there.
46 #define MAX_CHARS_PER_LINE_T (SYM_PER_INT*16)
47 #define CHARS_PER_LINE_T (SYM_PER_INT*14)
49 /* this function takes 6 bytes and encodes them in an int */
52 encode_bytes(const char *b)
55 ret |= b[5] & SYM_MASK;
57 ret |= b[4] & SYM_MASK;
59 ret |= b[3] & SYM_MASK;
61 ret |= b[2] & SYM_MASK;
63 ret |= b[1] & SYM_MASK;
65 ret |= b[0] & SYM_MASK;
70 #ifndef __OPENCL_VERSION__
76 encode_string(const char* str, line_t *line) {
77 size_t maxl = strlen(str);
79 if (maxl > CHARS_PER_LINE_T) {
80 fprintf(stderr, "String too long, truncated");
81 maxl = CHARS_PER_LINE_T;
87 for (size_t i = 0; i < 6; ++i) {
89 b[i] = strchr(alphabet, str[base+i]) - alphabet;
93 line->s[component++] = encode_bytes(b);
96 while (component < 16) {
97 line->s[component++] = 0;
102 /* this function gets the symbol index (0..SYMBOLS, inclusive)
103 from column col of encoded line */
106 get_symbol_index(const line_t* line, size_t col)
108 size_t component = col/SYM_PER_INT;
109 size_t shift = col - component*SYM_PER_INT;
110 /* oh God, this is not nice at all */
111 #define CASE(c) case 0x##c: \
112 return ((*line).s##c >> (shift*SYM_SHIFT)) & SYM_MASK
128 default: /* shouldn't happen */