1 // xml.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net.
3 /************************* Required for every parser *************************/
4 #ifndef OHCOUNT_XML_PARSER_H
5 #define OHCOUNT_XML_PARSER_H
7 #include "../parser_macros.h"
9 // the name of the language
10 const char *XML_LANG = LANG_XML;
12 // the languages entities
13 const char *xml_entities[] = {
14 "space", "comment", "doctype",
15 "tag", "entity", "any"
18 // constants associated with the entities
20 XML_SPACE = 0, XML_COMMENT, XML_DOCTYPE,
21 XML_TAG, XML_ENTITY, XML_ANY
24 /*****************************************************************************/
29 include common "common.rl";
31 # Line counting machine
33 action xml_ccallback {
42 std_internal_newline(XML_LANG)
47 case CHECK_BLANK_ENTRY:
48 check_blank_entry(XML_LANG)
54 newline %{ entity = INTERNAL_NL; } %xml_ccallback
58 (nonnewline - ws) @comment
61 xml_sq_str = '\'' [^\r\n\f']* '\'' @code;
62 xml_dq_str = '"' [^\r\n\f"]* '"' @code;
65 newline %{ entity = INTERNAL_NL; } %xml_ccallback
69 (nonnewline - ws) @code
71 xml_string = xml_sq_str | xml_dq_str | xml_cdata_str;
74 spaces ${ entity = XML_SPACE; } => xml_ccallback;
77 newline ${ entity = NEWLINE; } => xml_ccallback;
78 ^space ${ entity = XML_ANY; } => xml_ccallback;
83 action xml_ecallback {
84 callback(XML_LANG, xml_entities[entity], cint(ts), cint(te), userdata);
87 xml_comment_entity = '<!--' any* :>> '-->';
90 space+ ${ entity = XML_SPACE; } => xml_ecallback;
91 xml_comment_entity ${ entity = XML_COMMENT; } => xml_ecallback;
97 /************************* Required for every parser *************************/
99 /* Parses a string buffer with XML markup.
101 * @param *buffer The string to parse.
102 * @param length The length of the string to parse.
103 * @param count Integer flag specifying whether or not to count lines. If yes,
104 * uses the Ragel machine optimized for counting. Otherwise uses the Ragel
105 * machine optimized for returning entity positions.
106 * @param *callback Callback function. If count is set, callback is called for
107 * every line of code, comment, or blank with 'lcode', 'lcomment', and
108 * 'lblank' respectively. Otherwise callback is called for each entity found.
110 void parse_xml(char *buffer, int length, int count,
111 void (*callback) (const char *lang, const char *entity, int s,
118 cs = (count) ? xml_en_xml_line : xml_en_xml_entity;
121 // if no newline at EOF; callback contents of last line
122 if (count) { process_last_line(XML_LANG) }
127 /*****************************************************************************/