1 // ragel_parser.c written by Mitchell Foral. mitchell<att>caladbolg<dott>net.
6 // BEGIN parser includes
8 #include "lua_parser.h"
9 #include "ruby_parser.h"
10 #include "css_parser.h"
11 #include "javascript_parser.h"
12 #include "html_parser.h"
13 #include "java_parser.h"
14 #include "objective_c_parser.h"
15 #include "visual_basic_parser.h"
16 #include "sql_parser.h"
17 #include "actionscript_parser.h"
18 #include "ada_parser.h"
19 #include "assembler_parser.h"
20 #include "autoconf_parser.h"
21 #include "automake_parser.h"
22 // END parser includes
29 char name[MAX_LANGUAGE_NAME];
30 void (*parser) (char*, int, int, void*);
33 struct language languages[] = {
38 { "ruby", parse_ruby },
40 { "javascript", parse_javascript },
41 { "html", parse_html },
42 { "java", parse_java },
43 { "objective_c", parse_objective_c },
44 { "visualbasic", parse_visual_basic },
46 { "actionscript", parse_actionscript },
48 { "assembler", parse_assembler },
49 { "autoconf", parse_autoconf },
50 { "automake", parse_automake },
55 /* Returns a language_breakdown for a given language name. */
56 LanguageBreakdown *get_language_breakdown(char *name) {
58 for (i = 0; i < pr->language_breakdown_count; i++)
59 if (strcmp(pr->language_breakdowns[i].name, name) == 0)
60 return &pr->language_breakdowns[i]; // found one
62 language_breakdown_initialize(
63 &pr->language_breakdowns[pr->language_breakdown_count],
64 name, parse_buffer_len); // create one
65 return &pr->language_breakdowns[pr->language_breakdown_count++];
68 /* Yields a line's language, semantic, and text to an optional Ruby block. */
69 void ragel_parse_yield_line(const char *lang, const char *entity, int s, int e) {
70 if (rb_block_given_p()) {
73 rb_ary_store(ary, 0, ID2SYM(rb_intern(lang)));
74 if (strcmp(entity, "lcode") == 0)
75 rb_ary_store(ary, 1, ID2SYM(rb_intern("code")));
76 else if (strcmp(entity, "lcomment") == 0)
77 rb_ary_store(ary, 1, ID2SYM(rb_intern("comment")));
78 else if (strcmp(entity, "lblank") == 0)
79 rb_ary_store(ary, 1, ID2SYM(rb_intern("blank")));
80 rb_ary_store(ary, 2, rb_str_new(parse_buffer + s, e - s));
85 /* Callback function called for every entity in the source file discovered.
87 * Entities are defined in the parser and are things like comments, strings,
89 * This callback yields for a Ruby block if necessary:
90 * |language, semantic, line|
91 * @param *lang The language associated with the entity.
92 * @param *entity The entity discovered. There are 3 additional entities used
93 * by Ohcount for counting: lcode, lcomment, and lblank for a line of code,
94 * a whole line comment, or a blank line respectively.
95 * @param s The start position of the entity relative to the start of the
97 * @param e The end position of the entity relative to the start of the buffer
100 void ragel_parser_callback(const char *lang, const char *entity, int s, int e) {
101 LanguageBreakdown *lb = get_language_breakdown((char *) lang);
102 if (strcmp(entity, "lcode") == 0) {
103 language_breakdown_copy_code(lb, parse_buffer + s, parse_buffer + e);
104 ragel_parse_yield_line(lang, entity, s, e);
105 } else if (strcmp(entity, "lcomment") == 0) {
106 language_breakdown_copy_comment(lb, parse_buffer + s, parse_buffer + e);
107 ragel_parse_yield_line(lang, entity, s, e);
108 } else if (strcmp(entity, "lblank") == 0) {
110 ragel_parse_yield_line(lang, entity, s, e);
114 /* Tries to use an existing Ragel parser for the given language.
116 * @param *parse_result An allocated, empty ParseResult to hold parse results.
117 * @param *buffer A pointer to the buffer or character in the buffer to start
119 * @param buffer_len The length of the buffer to parse.
120 * @param *lang The language name associated with the buffer to parse.
121 * @return 1 if a Ragel parser is found, 0 otherwise.
123 int ragel_parser_parse(ParseResult *parse_result,
124 char *buffer, int buffer_len, char *lang) {
126 pr->language_breakdown_count = 0;
127 parse_buffer = buffer;
128 parse_buffer_len = buffer_len;
130 for (i = 0; strlen(languages[i].name) != 0; i++)
131 if (strcmp(languages[i].name, lang) == 0) {
132 languages[i].parser(buffer, buffer_len, 1, ragel_parser_callback);