1 // parser_macros.h written by Mitchell Foral. mitchell<att>caladbolg.net.
2 // See COPYING for license information.
4 #ifndef OHCOUNT_PARSER_MACROS_H
5 #define OHCOUNT_PARSER_MACROS_H
10 #include "languages.h"
13 * @struct CallbackItem
14 * @brief Holds a series of callbacks for in a queue (linked list).
16 typedef struct CallbackItem {
18 * The language associated with this callback item.
19 * Must not be 'free'd.
24 * The name of the entity associated with this callback.
25 * Must not be 'free'd.
29 /** The start position of the entity in the buffer. */
32 /** The end position of the entity in the buffer. */
38 /** The next callback in the linked list. */
39 struct CallbackItem *next;
43 /** The head of the Callback queue. */
44 Callback *callback_list_head = NULL;
46 /** The tail of the Callback queue. */
47 Callback *callback_list_tail = NULL;
50 * Enqueues a callback for calling upon commit.
51 * This is only necessary for line counting machines.
52 * Ragel will execute actions in real-time rather than after a complete match.
53 * This is a problem for entities that contain internal newlines, since there is
54 * a callback for each internal newline whether or not the end of the entity
55 * matches. This means that if, for example, the beginning of a string entity is
56 * matched, the text following is treated as code until the ending delimiter. If
57 * there is no ending delimiter (it was not actually a string entity), Ragel
58 * will jump back to the beginning of the string and reparse the text again.
59 * This means all the callbacks called were probably not accurate.
60 * To remedy this, any entity which needs an ending delimiter that may not
61 * appear will have its callbacks enqueued and then committed when the ending
62 * delimitter is reached. If that delimitter is not reached, the callbacks are
64 * @param lang The language name.
65 * @param entity The entity (lcode, lcomment, lblank).
66 * @param s The start position of the entity in the buffer.
67 * @param e The end position of the entity in the buffer.
68 * @param udata Userdata.
70 void enqueue(const char *lang, const char *entity, int s, int e, void *udata) {
71 Callback *item = (Callback *) malloc(sizeof(Callback));
72 if (!item) printf("Failed to allocate memory for enqueued callback.\n");
75 item->entity = entity;
81 if (!callback_list_head) {
82 callback_list_head = item;
83 callback_list_tail = item;
85 callback_list_tail->next = item;
86 callback_list_tail = item;
90 /** Frees the memory used by a queue. */
92 Callback *item = callback_list_head;
94 Callback *next = item->next;
98 callback_list_head = NULL;
99 callback_list_tail = NULL;
103 * Restores settings for a failed enqueued entity.
104 * This is typically used in the ls, code, and comment macros.
105 * @note Applies only to line counting parsers.
109 line_start = last_line_start; \
110 line_contains_code = last_line_contains_code; \
111 whole_line_comment = last_whole_line_comment; \
115 * Sets the line_start variable to ts.
116 * This is typically used for the SPACE entity in the main action.
117 * @note Applies only to line counting parsers.
120 if (inqueue) { dequeue; } \
121 if (!line_start) line_start = ts; \
125 * The C equivalent of the Ragel 'code' action.
126 * This is tyically used in the main action for entities where Ragel actions
127 * cannot, for one reason or another, be used.
128 * @note Applies only to line counting parsers.
131 if (inqueue) { dequeue; } \
132 if (!line_contains_code && !line_start) line_start = ts; \
133 line_contains_code = 1; \
137 * The C equivalent of the Ragel 'comment' action.
138 * This is typically unused, but here for consistency.
139 * @note Applies only to line counting parsers.
142 if (inqueue) { dequeue; } \
143 if (!line_contains_code) { \
144 whole_line_comment = 1; \
145 if (!line_start) line_start = ts; \
150 * Sets up for having seen an embedded language.
151 * This is typically used when entering an embedded language which usually does
152 * not span multiple lines (e.g. php for <?php echo 'blah' ?> on single lines)
153 * so the line is counted as embedded code or comment, not parent code.
154 * @param lang The language name string.
155 * @note Applies only to line counting parsers.
157 #define saw(lang) { \
159 whole_line_comment = 0; \
160 line_contains_code = 0; \
164 * Executes standard line counting actions for INTERNAL_NL entities.
165 * This is typically used in the main action for the INTERNAL_NL entity.
166 * @param lang The language name string.
167 * @note Applies only to line counting parsers.
169 #define std_internal_newline(lang) { \
170 if (callback && p > line_start) { \
171 if (line_contains_code) { \
173 enqueue(lang, "lcode", cint(line_start), cint(p), userdata); \
175 callback(lang, "lcode", cint(line_start), cint(p), userdata); \
176 } else if (whole_line_comment) { \
178 enqueue(lang, "lcomment", cint(line_start), cint(p), userdata); \
180 callback(lang, "lcomment", cint(line_start), cint(p), userdata); \
183 enqueue(lang, "lblank", cint(line_start), cint(p), userdata); \
185 callback(lang, "lblank", cint(line_start), cint(p), userdata); \
188 whole_line_comment = 0; \
189 line_contains_code = 0; \
194 * Executes emebedded language line counting actions for INTERNAL_NL entities
195 * based on whether or not the embedded language's code has been seen in a
197 * This is typically used in the main action for the INTERNAL_NL entity.
198 * @param lang The language name string.
199 * @note Applies only to line counting parsers.
201 #define emb_internal_newline(lang) { \
202 if (seen && seen != lang) \
203 std_internal_newline(seen) \
205 std_internal_newline(lang) \
210 * Executes standard line counting actions for NEWLINE entities.
211 * This is typically used in the main action for the NEWLINE entity.
212 * @param lang The language name string.
213 * @note Applies only to line counting parsers.
215 #define std_newline(lang) {\
216 if (inqueue) { dequeue; } \
217 if (callback && te > line_start) { \
218 if (line_contains_code) \
219 callback(lang, "lcode", cint(line_start), cint(te), userdata); \
220 else if (whole_line_comment) \
221 callback(lang, "lcomment", cint(line_start), cint(te), userdata); \
223 callback(lang, "lblank", cint(ts), cint(te), userdata); \
225 whole_line_comment = 0; \
226 line_contains_code = 0; \
231 * Executes embedded language line counting actions for NEWLINE entities based
232 * on whether or not the embedded language's code has been seen in a parent
234 * This is typically used in the main action for the NEWLINE entity.
235 * @param lang The language name string.
236 * @note Applies only to line counting parsers.
238 #define emb_newline(lang) { \
239 if (seen && seen != lang) \
247 * Processes the last line for buffers that don't have a newline at EOF.
248 * This is typically used at the end of the parse_lang function after the Ragel
249 * parser has been executed.
250 * @param lang The language name string.
251 * @note Applies only to line counting parsers.
253 #define process_last_line(lang) {\
254 if ((whole_line_comment || line_contains_code) && callback) { \
255 if (line_contains_code) \
256 callback(lang, "lcode", cint(line_start), cint(pe), userdata); \
257 else if (whole_line_comment) \
258 callback(lang, "lcomment", cint(line_start), cint(pe), userdata); \
263 * Determines whether or not the rest of the line is blank.
264 * This is typically used when entering an embedded language.
265 * @param p The position of entry into the emebedded language.
266 * @return 0 if the rest of the line is not blank, the position at the end of
267 * the newline otherwise (inclusive).
268 * @note Applies only to line counting parsers.
270 int is_blank_entry(char **p) {
272 while (*pos != '\n' && *pos != '\r' && *pos != '\f') {
273 if (*pos != '\t' && *pos != ' ') return 0;
276 if (*pos == '\r' && *(pos+1) == '\n') pos++;
282 * If there is a transition into an embedded language and there is only parent
283 * language code on the line (the rest of the line is blank with no child code),
284 * count the line as a line of parent code.
285 * Moves p and te to the end of the newline and calls the std_newline macro. (p
286 * is inclusive, te is not.)
287 * This is typically used in the main action for the CHECK_BLANK_ENTRY entity.
288 * @param lang The language name string.
289 * @note Applies only to line counting parsers.
291 #define check_blank_entry(lang) { \
292 if (is_blank_entry(&p)) { \
298 // Variables used by all parsers. Do not modify.
302 * @note This is only used for line counting parsers.
307 * Internal newline entity.
308 * Used for newlines inside patterns like strings and comments that can have
310 * @note This is only used for line counting parsers.
312 #define INTERNAL_NL -2
315 * Check blank entry entity.
316 * Used for embedded language transitions. If a newline follows immediately
317 * after such a transition, the line should be counted as parent code, not
319 * @note This is only used for line counting parsers.
321 #define CHECK_BLANK_ENTRY -3
323 /** Required by Ragel. */
326 /** Required by Ragel. */
329 /** Required by Ragel. */
332 /** Required by Ragel. */
335 /** Required by Ragel. */
338 /** Required by Ragel. */
341 /** Required by Ragel. */
344 /** Required by Ragel. */
347 /** Required by Ragel. */
350 /** The buffer currently being parsed. */
354 * Returns the absolute location in memory for a position relative to the start
355 * of the buffer being parsed.
356 * @param c Position relative to the start of the buffer.
357 * @note This is only used for line counting parsers.
359 #define cint(c) ((int) (c - buffer_start))
362 * Flag indicating whether or not the current line contains only a comment.
363 * @note This is only used for line counting parsers.
365 int whole_line_comment;
368 * Flag indicating whether or not the current line contains any code.
369 * @note This is only used for line counting parsers.
371 int line_contains_code;
374 * The beginning of the current line in the buffer being parsed.
375 * @note This is only used for line counting parsers.
379 /** State variable for the current entity being matched. */
383 * Keeps track of an embedded language.
384 * @note This is only used for line counting parsers.
389 * Flag indicating whether or not to enqueue callbacks instead of calling them
391 * @note This is only used for line counting parsers.
396 * Backup variable for 'inqueue'ing.
397 * @note This is only used for line counting parsers.
399 char *last_line_start;
402 * Backup variable for 'inqueue'ing.
403 * @note This is only used for line counting parsers.
405 int last_line_contains_code;
408 * Backup variable for 'inqueue'ing.
409 * @note This is only used for line counting parsers.
411 int last_whole_line_comment;
414 * Initializes variables for parsing a buffer.
415 * Required at the beginning of every parser function.
419 pe = buffer + length; \
422 buffer_start = buffer; \
423 whole_line_comment = 0; \
424 line_contains_code = 0; \