1 // c.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net.
3 /************************* Required for every parser *************************/
7 #include "ragel_parser_macros.h"
9 // the name of the language
10 const char *C_LANG = "c";
12 // the languages entities
13 const char *c_entities[] = {
14 "space", "comment", "string", "number", "preproc",
15 "keyword", "identifier", "operator", "any"
18 // constants associated with the entities
20 C_SPACE = 0, C_COMMENT, C_STRING, C_NUMBER, C_PREPROC,
21 C_KEYWORD, C_IDENTIFIER, C_OPERATOR, C_ANY
24 /*****************************************************************************/
29 include common "common.rl";
31 # Line counting machine
42 std_internal_newline(C_LANG)
51 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
55 (nonnewline - ws) @comment
59 newline %{ entity = INTERNAL_NL; } %c_ccallback
63 (nonnewline - ws) @comment
65 c_comment = c_line_comment | c_block_comment;
69 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
79 escaped_newline %{ entity = INTERNAL_NL; } %c_ccallback
87 c_string = c_sq_str | c_dq_str;
90 spaces ${ entity = C_SPACE; } => c_ccallback;
93 newline ${ entity = NEWLINE; } => c_ccallback;
94 ^space ${ entity = C_ANY; } => c_ccallback;
100 callback(C_LANG, c_entities[entity], cint(ts), cint(te));
103 c_line_comment_entity = '//' (escaped_newline | nonnewline)*;
104 c_block_comment_entity = '/*' any* :>> '*/';
105 c_comment_entity = c_line_comment_entity | c_block_comment_entity;
107 c_sq_str_entity = '\'' ([^'\\] | '\\' any)* '\'';
108 c_dq_str_entity = '"' ([^"\\] | '\\' any)* '"';
109 c_string_entity = c_sq_str_entity | c_dq_str_entity;
111 c_number_entity = float | integer;
114 'define' | 'elif' | 'else' | 'endif' | 'error' | 'if' | 'ifdef' |
115 'ifndef' | 'import' | 'include' | 'line' | 'pragma' | 'undef' |
117 # TODO: find some way of making preproc match the beginning of a line.
118 # Putting a 'when starts_line' conditional throws an assertion error.
120 '#' space* (c_block_comment_entity space*)?
121 c_preproc_word (escaped_newline | nonnewline)*;
123 c_identifier_entity = (alpha | '_') (alnum | '_')*;
126 'and' | 'and_eq' | 'asm' | 'auto' | 'bitand' | 'bitor' | 'bool' |
127 'break' | 'case' | 'catch' | 'char' | 'class' | 'compl' | 'const' |
128 'const_cast' | 'continue' | 'default' | 'delete' | 'do' | 'double' |
129 'dynamic_cast' | 'else' | 'enum' | 'explicit' | 'export' | 'extern' |
130 'false' | 'float' | 'for' | 'friend' | 'goto' | 'if' | 'inline' | 'int' |
131 'long' | 'mutable' | 'namespace' | 'new' | 'not' | 'not_eq' |
132 'operator' | 'or' | 'or_eq' | 'private' | 'protected' | 'public' |
133 'register' | 'reinterpret_cast' | 'return' | 'short' | 'signed' |
134 'sizeof' | 'static' | 'static_cast' | 'struct' | 'switch' |
135 'template' | 'this' | 'throw' | 'true' | 'try' | 'typedef' | 'typeid' |
136 'typename' | 'union' | 'unsigned' | 'using' | 'virtual' | 'void' |
137 'volatile' | 'wchar_t' | 'while' | 'xor' | 'xor_eq';
139 c_operator_entity = [+\-/*%<>!=^&|?~:;.,()\[\]{}];
142 space+ ${ entity = C_SPACE; } => c_ecallback;
143 c_comment_entity ${ entity = C_COMMENT; } => c_ecallback;
144 c_string_entity ${ entity = C_STRING; } => c_ecallback;
145 c_number_entity ${ entity = C_NUMBER; } => c_ecallback;
146 c_preproc_entity ${ entity = C_PREPROC; } => c_ecallback;
147 c_identifier_entity ${ entity = C_IDENTIFIER; } => c_ecallback;
148 c_keyword_entity ${ entity = C_KEYWORD; } => c_ecallback;
149 c_operator_entity ${ entity = C_OPERATOR; } => c_ecallback;
150 ^space ${ entity = C_ANY; } => c_ecallback;
154 /************************* Required for every parser *************************/
156 /* Parses a string buffer with C/C++ code.
158 * @param *buffer The string to parse.
159 * @param length The length of the string to parse.
160 * @param count Integer flag specifying whether or not to count lines. If yes,
161 * uses the Ragel machine optimized for counting. Otherwise uses the Ragel
162 * machine optimized for returning entity positions.
163 * @param *callback Callback function. If count is set, callback is called for
164 * every line of code, comment, or blank with 'lcode', 'lcomment', and
165 * 'lblank' respectively. Otherwise callback is called for each entity found.
167 void parse_c(char *buffer, int length, int count,
168 void (*callback) (const char *lang, const char *entity, int start, int end)
173 cs = (count) ? c_en_c_line : c_en_c_entity;
176 // if no newline at EOF; callback contents of last line
177 if (count) { process_last_line(C_LANG) }
180 const char *CPP_LANG = "cpp";
181 const char *ORIG_C_LANG = "c";
182 void parse_cpp(char *buffer, int length, int count,
183 void (*callback) (const char *lang, const char *entity, int start, int end)
186 parse_c(buffer, length, count, callback);
187 C_LANG = ORIG_C_LANG;
190 const char *CSHARP_LANG = "csharp";
191 void parse_csharp(char *buffer, int length, int count,
192 void (*callback) (const char *lang, const char *entity, int start, int end)
194 C_LANG = CSHARP_LANG;
195 parse_c(buffer, length, count, callback);
196 C_LANG = ORIG_C_LANG;
201 /*****************************************************************************/