1 /************************* Required for every parser *************************/
3 // the name of the language
4 const char *LANG = "c";
6 // the languages entities
7 const char *c_entities[] = {
8 "space", "comment", "string", "number", "preproc", "keyword",
9 "identifier", "operator", "escaped_newline", "newline", "any"
12 // constants associated with the entities
14 SPACE = 0, COMMENT, STRING, NUMBER, PREPROC, KEYWORD,
15 IDENTIFIER, OPERATOR, ESCAPED_NL, NEWLINE, ANY
18 // do not change the following variables
20 // used for newlines inside patterns like strings and comments that can have
22 #define INTERNAL_NL -1
26 char *p, *pe, *eof, *ts, *te;
28 // used for calculating offsets from buffer start for start and end positions
30 #define cint(c) ((int) (c - buffer_start))
32 // state flags for line and comment counting
33 int whole_line_comment;
34 int line_contains_code;
36 // the beginning of a line in the buffer for line and comment counting
39 // state variable for the current entity being matched
42 /*****************************************************************************/
47 include common "common.rl";
53 if (!line_start) line_start = ts;
62 if (!line_contains_code && !line_start) line_start = ts;
63 line_contains_code = 1;
67 if (c_callback && p > line_start) {
68 if (line_contains_code)
69 c_callback(LANG, "lcode", cint(line_start), cint(p));
70 else if (whole_line_comment)
71 c_callback(LANG, "lcomment", cint(line_start), cint(p));
73 c_callback(LANG, "lblank", cint(line_start), cint(p));
74 whole_line_comment = 0;
75 line_contains_code = 0;
80 if (c_callback && te > line_start) {
81 if (line_contains_code)
82 c_callback(LANG, "lcode", cint(line_start), cint(te));
83 else if (whole_line_comment)
84 c_callback(LANG, "lcomment", cint(line_start), cint(te));
86 c_callback(LANG, "lblank", cint(ts), cint(te));
88 whole_line_comment = 0;
89 line_contains_code = 0;
92 if (c_callback && entity != INTERNAL_NL)
93 c_callback(LANG, c_entities[entity], cint(ts), cint(te));
98 escaped_newline %{ entity = INTERNAL_NL; } %c_callback
106 newline %{ entity = INTERNAL_NL; } %c_callback
112 c_comment = c_line_comment | c_block_comment;
116 newline %{ entity = INTERNAL_NL; } %c_callback
126 newline %{ entity = INTERNAL_NL; } %c_callback
134 c_string = c_sq_str | c_dq_str;
136 c_number = float | integer;
139 'define' | 'elif' | 'else' | 'endif' | 'error' | 'if' | 'ifdef' |
140 'ifndef' | 'import' | 'include' | 'line' | 'pragma' | 'undef' |
143 ('#' when no_code) ws* c_preproc_word
145 escaped_newline %{ entity = INTERNAL_NL; } %c_callback
152 c_identifier = (alpha | '_') (alnum | '_')*;
155 'and' | 'and_eq' | 'asm' | 'auto' | 'bitand' | 'bitor' | 'bool' |
156 'break' | 'case' | 'catch' | 'char' | 'class' | 'compl' | 'const' |
157 'const_cast' | 'continue' | 'default' | 'delete' | 'do' | 'double' |
158 'dynamic_cast' | 'else' | 'enum' | 'explicit' | 'export' | 'extern' |
159 'false' | 'float' | 'for' | 'friend' | 'goto' | 'if' | 'inline' | 'int' |
160 'long' | 'mutable' | 'namespace' | 'new' | 'not' | 'not_eq' |
161 'operator' | 'or' | 'or_eq' | 'private' | 'protected' | 'public' |
162 'register' | 'reinterpret_cast' | 'return' | 'short' | 'signed' |
163 'sizeof' | 'static' | 'static_cast' | 'struct' | 'switch' |
164 'template' | 'this' | 'throw' | 'true' | 'try' | 'typedef' | 'typeid' |
165 'typename' | 'union' | 'unsigned' | 'using' | 'virtual' | 'void' |
166 'volatile' | 'wchar_t' | 'while' | 'xor' | 'xor_eq';
168 c_operator = [+\-/*%<>!=^&|?~:;.,()\[\]{}@];
171 spaces ${ entity = SPACE; } => c_callback;
172 c_comment ${ entity = COMMENT; } => c_callback;
173 c_string ${ entity = STRING; } => c_callback;
174 c_number ${ entity = NUMBER; } => c_callback;
175 c_preproc ${ entity = PREPROC; } => c_callback;
176 c_identifier ${ entity = IDENTIFIER; } => c_callback;
177 c_keyword ${ entity = KEYWORD; } => c_callback;
178 c_operator ${ entity = OPERATOR; } => c_callback;
179 escaped_newline ${ entity = ESCAPED_NL; } => c_callback;
180 newline ${ entity = NEWLINE; } => c_callback;
181 nonprintable_char ${ entity = ANY; } => c_callback;
185 /* Parses a string buffer with C/C++ code.
187 * @param *buffer The string to parse.
188 * @param length The length of the string to parse.
189 * @param *c_callback Callback function called for each entity. Entities are
190 * the ones defined in the lexer as well as 3 additional entities used by
191 * Ohcount for counting lines: lcode, lcomment, lblank.
193 void parse_c(char *buffer, int length,
194 void (*c_callback) (const char *lang, const char *entity, int start, int end)
197 pe = buffer + length;
200 buffer_start = buffer;
201 whole_line_comment = 0;
202 line_contains_code = 0;
209 // no newline at EOF; get contents of last line
210 if ((whole_line_comment || line_contains_code) && c_callback) {
211 if (line_contains_code)
212 c_callback(LANG, "lcode", cint(line_start), cint(pe));
213 else if (whole_line_comment)
214 c_callback(LANG, "lcomment", cint(line_start), cint(pe));