1 // c.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net.
3 /************************* Required for every parser *************************/
4 #include "ragel_parser_macros.h"
6 // the name of the language
7 const char *C_LANG = "c";
9 // the languages entities
10 const char *c_entities[] = {
11 "space", "comment", "string", "number", "preproc",
12 "keyword", "identifier", "operator", "any"
15 // constants associated with the entities
17 C_SPACE = 0, C_COMMENT, C_STRING, C_NUMBER, C_PREPROC,
18 C_KEYWORD, C_IDENTIFIER, C_OPERATOR, C_ANY
21 // do not change the following variables
26 // used for newlines inside patterns like strings and comments that can have
28 #define INTERNAL_NL -2
32 char *p, *pe, *eof, *ts, *te;
34 // used for calculating offsets from buffer start for start and end positions
36 #define cint(c) ((int) (c - buffer_start))
38 // state flags for line and comment counting
39 int whole_line_comment;
40 int line_contains_code;
42 // the beginning of a line in the buffer for line and comment counting
45 // state variable for the current entity being matched
48 /*****************************************************************************/
53 include common "common.rl";
55 # Line counting machine
66 std_internal_newline(C_LANG)
75 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
79 (nonnewline - ws) @comment
83 newline %{ entity = INTERNAL_NL; } %c_ccallback
87 (nonnewline - ws) @comment
89 c_comment = c_line_comment | c_block_comment;
93 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
103 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
109 '\\' nonnewline @code
111 c_string = c_sq_str | c_dq_str;
114 spaces ${ entity = C_SPACE; } => c_ccallback;
117 newline ${ entity = NEWLINE; } => c_ccallback;
118 ^space ${ entity = C_ANY; } => c_ccallback;
124 callback(C_LANG, c_entities[entity], cint(ts), cint(te));
127 c_line_comment_entity = '//' (escaped_newline | nonnewline)*;
128 c_block_comment_entity = '/*' any* :>> '*/';
129 c_comment_entity = c_line_comment_entity | c_block_comment_entity;
131 c_sq_str_entity = '\'' ([^'\\] | '\\' any)* '\'';
132 c_dq_str_entity = '"' ([^"\\] | '\\' any)* '"';
133 c_string_entity = c_sq_str_entity | c_dq_str_entity;
135 c_number_entity = float | integer;
138 'define' | 'elif' | 'else' | 'endif' | 'error' | 'if' | 'ifdef' |
139 'ifndef' | 'import' | 'include' | 'line' | 'pragma' | 'undef' |
141 # TODO: find some way of making preproc match the beginning of a line.
142 # Putting a 'when starts_line' conditional throws an assertion error.
144 '#' space* (c_block_comment_entity space*)?
145 c_preproc_word (escaped_newline | nonnewline)*;
147 c_identifier_entity = (alpha | '_') (alnum | '_')*;
150 'and' | 'and_eq' | 'asm' | 'auto' | 'bitand' | 'bitor' | 'bool' |
151 'break' | 'case' | 'catch' | 'char' | 'class' | 'compl' | 'const' |
152 'const_cast' | 'continue' | 'default' | 'delete' | 'do' | 'double' |
153 'dynamic_cast' | 'else' | 'enum' | 'explicit' | 'export' | 'extern' |
154 'false' | 'float' | 'for' | 'friend' | 'goto' | 'if' | 'inline' | 'int' |
155 'long' | 'mutable' | 'namespace' | 'new' | 'not' | 'not_eq' |
156 'operator' | 'or' | 'or_eq' | 'private' | 'protected' | 'public' |
157 'register' | 'reinterpret_cast' | 'return' | 'short' | 'signed' |
158 'sizeof' | 'static' | 'static_cast' | 'struct' | 'switch' |
159 'template' | 'this' | 'throw' | 'true' | 'try' | 'typedef' | 'typeid' |
160 'typename' | 'union' | 'unsigned' | 'using' | 'virtual' | 'void' |
161 'volatile' | 'wchar_t' | 'while' | 'xor' | 'xor_eq';
163 c_operator_entity = [+\-/*%<>!=^&|?~:;.,()\[\]{}];
166 space+ ${ entity = C_SPACE; } => c_ecallback;
167 c_comment_entity ${ entity = C_COMMENT; } => c_ecallback;
168 c_string_entity ${ entity = C_STRING; } => c_ecallback;
169 c_number_entity ${ entity = C_NUMBER; } => c_ecallback;
170 c_preproc_entity ${ entity = C_PREPROC; } => c_ecallback;
171 c_identifier_entity ${ entity = C_IDENTIFIER; } => c_ecallback;
172 c_keyword_entity ${ entity = C_KEYWORD; } => c_ecallback;
173 c_operator_entity ${ entity = C_OPERATOR; } => c_ecallback;
174 ^space ${ entity = C_ANY; } => c_ecallback;
178 /* Parses a string buffer with C/C++ code.
180 * @param *buffer The string to parse.
181 * @param length The length of the string to parse.
182 * @param count Integer flag specifying whether or not to count lines. If yes,
183 * uses the Ragel machine optimized for counting. Otherwise uses the Ragel
184 * machine optimized for returning entity positions.
185 * @param *callback Callback function. If count is set, callback is called for
186 * every line of code, comment, or blank with 'lcode', 'lcomment', and
187 * 'lblank' respectively. Otherwise callback is called for each entity found.
189 void parse_c(char *buffer, int length, int count,
190 void (*callback) (const char *lang, const char *entity, int start, int end)
193 pe = buffer + length;
196 buffer_start = buffer;
197 whole_line_comment = 0;
198 line_contains_code = 0;
203 cs = (count) ? c_en_c_line : c_en_c_entity;
206 // if no newline at EOF; callback contents of last line
207 if (count) { process_last_line(C_LANG) }