1 // html.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net.
3 /************************* Required for every parser *************************/
4 #ifndef RAGEL_HTML_PARSER
5 #define RAGEL_HTML_PARSER
7 #include "ragel_parser_macros.h"
9 // the name of the language
10 const char *HTML_LANG = "html";
12 // the languages entities
13 const char *html_entities[] = {
14 "space", "comment", "doctype",
15 "tag", "entity", "any"
18 // constants associated with the entities
20 HTML_SPACE = 0, HTML_COMMENT, HTML_DOCTYPE,
21 HTML_TAG, HTML_ENTITY, HTML_ANY
24 /*****************************************************************************/
26 #include "css_parser.h"
27 #include "javascript_parser.h"
32 include common "common.rl";
36 # Line counting machine
38 action html_ccallback {
47 std_internal_newline(HTML_LANG)
50 std_newline(HTML_LANG)
52 case CHECK_BLANK_ENTRY:
53 check_blank_entry(HTML_LANG)
59 newline %{ entity = INTERNAL_NL; } %html_ccallback
63 (nonnewline - ws) @comment
68 newline %{ entity = INTERNAL_NL; } %html_ccallback
78 newline %{ entity = INTERNAL_NL; } %html_ccallback
86 html_string = html_sq_str | html_dq_str;
88 ws_or_inl = (ws | newline @{ entity = INTERNAL_NL; } %html_ccallback);
90 html_css_entry = '<' /style/i [^>]+ :>> 'text/css' [^>]+ '>' @code;
91 html_css_outry = '</' /style/i ws_or_inl* '>' @code;
93 html_css_outry @{ p = ts; fgoto html_line; };
94 # unmodified CSS patterns
95 spaces ${ entity = CSS_SPACE; } => css_ccallback;
98 newline ${ entity = NEWLINE; } => css_ccallback;
99 ^space ${ entity = CSS_ANY; } => css_ccallback;
102 html_js_entry = '<' /script/i [^>]+ :>> 'text/javascript' [^>]+ '>' @code;
103 html_js_outry = '</' /script/i ws_or_inl* '>' @code;
105 html_js_outry @{ p = ts; fgoto html_line; };
106 # unmodified Javascript patterns
107 spaces ${ entity = JS_SPACE; } => js_ccallback;
110 newline ${ entity = NEWLINE; } => js_ccallback;
111 ^space ${ entity = JS_ANY; } => js_ccallback;
115 html_css_entry @{ entity = CHECK_BLANK_ENTRY; } @html_ccallback
116 @{ fgoto html_css_line; };
117 html_js_entry @{ entity = CHECK_BLANK_ENTRY; } @html_ccallback
118 @{ fgoto html_js_line; };
119 # standard HTML patterns
120 spaces ${ entity = HTML_SPACE; } => html_ccallback;
123 newline ${ entity = NEWLINE; } => html_ccallback;
124 ^space ${ entity = HTML_ANY; } => html_ccallback;
129 action html_ecallback {
130 callback(HTML_LANG, entity, cint(ts), cint(te));
133 html_entity := 'TODO:';
136 /************************* Required for every parser *************************/
138 /* Parses a string buffer with HTML markup.
140 * @param *buffer The string to parse.
141 * @param length The length of the string to parse.
142 * @param count Integer flag specifying whether or not to count lines. If yes,
143 * uses the Ragel machine optimized for counting. Otherwise uses the Ragel
144 * machine optimized for returning entity positions.
145 * @param *callback Callback function. If count is set, callback is called for
146 * every line of code, comment, or blank with 'lcode', 'lcomment', and
147 * 'lblank' respectively. Otherwise callback is called for each entity found.
149 void parse_html(char *buffer, int length, int count,
150 void (*callback) (const char *lang, const char *entity, int start, int end)
155 cs = (count) ? html_en_html_line : html_en_html_entity;
158 // if no newline at EOF; callback contents of last line
159 if (count) { process_last_line(HTML_LANG) }
164 /*****************************************************************************/