1 // perl.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net
3 /************************* Required for every parser *************************/
4 #ifndef OHCOUNT_PERL_PARSER_H
5 #define OHCOUNT_PERL_PARSER_H
7 #include "../parser_macros.h"
9 // the name of the language
10 const char *PERL_LANG = LANG_PERL;
12 // the languages entities
13 const char *perl_entities[] = {
14 "space", "comment", "string", "any"
17 // constants associated with the entities
19 PERL_SPACE = 0, PERL_COMMENT, PERL_STRING, PERL_ANY
22 /*****************************************************************************/
27 include common "common.rl";
29 # Line counting machine
31 action perl_ccallback {
40 std_internal_newline(PERL_LANG)
43 std_newline(PERL_LANG)
47 perl_line_comment = '#' @comment nonnewline*;
49 '=' when starts_line @enqueue @comment nonnewline+ (
50 '=' when starts_line 'cut' @commit @comment @{ fgoto perl_line; }
52 newline %{ entity = INTERNAL_NL; } %perl_ccallback
58 perl_comment = perl_line_comment | perl_block_comment;
62 newline %{ entity = INTERNAL_NL; } %perl_ccallback
69 )* '\'' @commit @code;
72 newline %{ entity = INTERNAL_NL; } %perl_ccallback
82 newline %{ entity = INTERNAL_NL; } %perl_ccallback
90 perl_regex = '/' ([^\r\n\f/\\] | '\\' nonnewline)* '/' @code;
91 # TODO: heredoc detection
92 # This is impossible with current Ragel. We need to extract what the end
93 # delimiter should be from the heredoc and search up to it on a new line.
95 perl_string = perl_sq_str | perl_dq_str | perl_cmd_str | perl_regex;
98 spaces ${ entity = PERL_SPACE; } => perl_ccallback;
101 newline ${ entity = NEWLINE; } => perl_ccallback;
102 '=' when !starts_line;
103 ^(space | '=') ${ entity = PERL_ANY; } => perl_ccallback;
108 action perl_ecallback {
109 callback(PERL_LANG, perl_entities[entity], cint(ts), cint(te), userdata);
112 perl_line_comment_entity = '#' nonnewline*;
113 perl_block_comment_entity =
114 ('=' when starts_line) alpha+ any* :>> (('=' when starts_line) 'cut');
115 perl_comment_entity = perl_line_comment_entity | perl_block_comment_entity;
118 space+ ${ entity = PERL_SPACE; } => perl_ecallback;
119 perl_comment_entity ${ entity = PERL_COMMENT; } => perl_ecallback;
125 /************************* Required for every parser *************************/
127 /* Parses a string buffer with Perl code.
129 * @param *buffer The string to parse.
130 * @param length The length of the string to parse.
131 * @param count Integer flag specifying whether or not to count lines. If yes,
132 * uses the Ragel machine optimized for counting. Otherwise uses the Ragel
133 * machine optimized for returning entity positions.
134 * @param *callback Callback function. If count is set, callback is called for
135 * every line of code, comment, or blank with 'lcode', 'lcomment', and
136 * 'lblank' respectively. Otherwise callback is called for each entity found.
138 void parse_perl(char *buffer, int length, int count,
139 void (*callback) (const char *lang, const char *entity, int s,
146 cs = (count) ? perl_en_perl_line : perl_en_perl_entity;
149 // if no newline at EOF; callback contents of last line
150 if (count) { process_last_line(PERL_LANG) }
155 /*****************************************************************************/