From 2d9228c5f1c6739e7a76f5b4af98eb3611f5220a Mon Sep 17 00:00:00 2001 From: mitchell Date: Sat, 31 May 2008 18:40:23 -0400 Subject: [PATCH] Added XML Schema parser. --- ext/ohcount_native/ragel_parser.c | 2 + ext/ohcount_native/ragel_parsers/xmlschema.rl | 136 ++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 ext/ohcount_native/ragel_parsers/xmlschema.rl diff --git a/ext/ohcount_native/ragel_parser.c b/ext/ohcount_native/ragel_parser.c index af78ac1..51c9be7 100644 --- a/ext/ohcount_native/ragel_parser.c +++ b/ext/ohcount_native/ragel_parser.c @@ -47,6 +47,7 @@ //#include "vim_parser.h" #include "xml_parser.h" #include "xslt_parser.h" +#include "xmlschema_parser.h" // END parser includes ParseResult *pr; @@ -107,6 +108,7 @@ struct language languages[] = { //{ "vim", parse_vim }, { "xml", parse_xml }, { "xslt", parse_xslt }, + { "xmlschema", parse_xmlschema }, // END languages { "", NULL } }; diff --git a/ext/ohcount_native/ragel_parsers/xmlschema.rl b/ext/ohcount_native/ragel_parsers/xmlschema.rl new file mode 100644 index 0000000..0041416 --- /dev/null +++ b/ext/ohcount_native/ragel_parsers/xmlschema.rl @@ -0,0 +1,136 @@ +// xmlschema.rl written by Mitchell Foral. mitchellcaladbolgnet. + +/************************* Required for every parser *************************/ +#ifndef RAGEL_XMLSCHEMA_PARSER +#define RAGEL_XMLSCHEMA_PARSER + +#include "ragel_parser_macros.h" + +// the name of the language +const char *XMLSCHEMA_LANG = "xmlschema"; + +// the languages entities +const char *xmlschema_entities[] = { + "space", "comment", "doctype", + "tag", "entity", "any" +}; + +// constants associated with the entities +enum { + XMLSCHEMA_SPACE = 0, XMLSCHEMA_COMMENT, XMLSCHEMA_DOCTYPE, + XMLSCHEMA_TAG, XMLSCHEMA_ENTITY, XMLSCHEMA_ANY +}; + +/*****************************************************************************/ + +%%{ + machine xmlschema; + write data; + include common "common.rl"; + + # Line counting machine + + action xmlschema_ccallback { + switch(entity) { + case XMLSCHEMA_SPACE: + ls + break; + case XMLSCHEMA_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(XMLSCHEMA_LANG) + break; + case NEWLINE: + std_newline(XMLSCHEMA_LANG) + break; + case CHECK_BLANK_ENTRY: + check_blank_entry(XMLSCHEMA_LANG) + } + } + + xmlschema_comment = + ''; + + xmlschema_sq_str = + '\'' @code ( + newline %{ entity = INTERNAL_NL; } %xmlschema_ccallback + | + ws + | + [^\r\n\f\t '\\] @code + | + '\\' nonnewline @code + )* '\''; + xmlschema_dq_str = + '"' @code ( + newline %{ entity = INTERNAL_NL; } %xmlschema_ccallback + | + ws + | + [^\r\n\f\t "\\] @code + | + '\\' nonnewline @code + )* '"'; + xmlschema_cdata_str = + '> ']]>'; + xmlschema_string = xmlschema_sq_str | xmlschema_dq_str | xmlschema_cdata_str; + + xmlschema_line := |* + spaces ${ entity = XMLSCHEMA_SPACE; } => xmlschema_ccallback; + xmlschema_comment; + xmlschema_string; + newline ${ entity = NEWLINE; } => xmlschema_ccallback; + ^space ${ entity = XMLSCHEMA_ANY; } => xmlschema_ccallback; + *|; + + # Entity machine + + action xmlschema_ecallback { + callback(XMLSCHEMA_LANG, entity, cint(ts), cint(te)); + } + + xmlschema_entity := 'TODO:'; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with XML Schema markup. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_xmlschema(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int start, int end) + ) { + init + + %% write init; + cs = (count) ? xmlschema_en_xmlschema_line : xmlschema_en_xmlschema_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(XMLSCHEMA_LANG) } +} + +#endif + +/*****************************************************************************/ -- 2.32.0.93.g670b81a890