git.oblomov.eu Git - ohcount/blob - ext/ohcount_native/ragel_parsers/ruby.rl

   1 // ruby.rl written by Mitchell Foral. mitchell<att>caladbolg<dott>net
   2
   3 /************************* Required for every parser *************************/
   4 #ifndef RAGEL_RUBY_PARSER
   5 #define RAGEL_RUBY_PARSER
   6
   7 #include "ragel_parser_macros.h"
   8
   9 // the name of the language
  10 const char *RUBY_LANG = "ruby";
  11
  12 // the languages entities
  13 const char *ruby_entities[] = {
  14   "space", "comment", "string", "any"
  15 };
  16
  17 // constants associated with the entities
  18 enum {
  19   RUBY_SPACE = 0, RUBY_COMMENT, RUBY_STRING, RUBY_ANY
  20 };
  21
  22 /*****************************************************************************/
  23
  24 %%{
  25   machine ruby;
  26   write data;
  27   include common "common.rl";
  28
  29   # Line counting machine
  30
  31   action ruby_ccallback {
  32     switch(entity) {
  33     case RUBY_SPACE:
  34       ls
  35       break;
  36     case RUBY_ANY:
  37       code
  38       break;
  39     case INTERNAL_NL:
  40       std_internal_newline(RUBY_LANG)
  41       break;
  42     case NEWLINE:
  43       std_newline(RUBY_LANG)
  44     }
  45   }
  46
  47   ruby_line_comment = '#' @comment nonnewline*;
  48   # TODO: detect =begin and =end at start of their lines
  49   # Can't do that now because using 'when starts_line' fails a Ragel assertion.
  50   ruby_block_comment =
  51     '=begin' @comment (
  52       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
  53       |
  54       ws
  55       |
  56       (nonnewline - ws) @comment
  57     )* :>> '=end';
  58   ruby_comment = ruby_line_comment | ruby_block_comment;
  59
  60   ruby_sq_str =
  61     '\'' @code (
  62       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
  63       |
  64       ws
  65       |
  66       [^\r\n\f\t '\\] @code
  67       |
  68       '\\' nonnewline @code
  69     )* '\'' @code;
  70   ruby_dq_str =
  71     '"' @code (
  72       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
  73       |
  74       ws
  75       |
  76       [^\r\n\f\t "\\] @code
  77       |
  78       '\\' nonnewline @code
  79     )* '"' @code;
  80   # TODO: true literal string detection
  81   # Turns out any non-alphanum char can be after the initial '%' for a literal
  82   # string. I only have '(', '[', '{' for now because they are common(?). Their
  83   # respective closing characters need to be escaped though, which is not
  84   # accurate; only the single closing character needs to be escaped in a literal
  85   # string.
  86   # We need to detect which non-alphanum char opens a literal string, somehow
  87   # let Ragel know what it is (currently unsupported), and put its respective
  88   # closing char in the literal string below.
  89   ruby_lit_str =
  90     '%' [qQ]? [(\[{] @code (
  91       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
  92       |
  93       ws
  94       |
  95       [^\r\n\f\t )\]}\\] @code
  96       |
  97       '\\' nonnewline @code
  98     )* [)\]}] @code;
  99   ruby_cmd_str =
 100     '`' @code (
 101       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
 102       |
 103       ws
 104       |
 105       [^\r\n\f\t `\\] @code
 106       |
 107       '\\' nonnewline @code
 108     )* '`' @code;
 109   ruby_regex =
 110     '/' @code (
 111       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
 112       |
 113       ws
 114       |
 115       [^\r\n\f\t /\\] @code
 116       |
 117       '\\' nonnewline @code
 118     )* '/' @code;
 119   # TODO: true literal array and command detection
 120   # See TODO above about literal string detection
 121   ruby_lit_other =
 122     '%' [wrx] [(\[{] @code (
 123       newline %{ entity = INTERNAL_NL; } %ruby_ccallback
 124       |
 125       ws
 126       |
 127       [^\r\n\f\t )\]}\\] @code
 128       |
 129       '\\' nonnewline @code
 130     )* [)\]}] @code;
 131   # TODO: heredoc detection
 132   # This is impossible with current Ragel. We need to extract what the end
 133   # delimiter should be from the heredoc and search up to it on a new line.
 134   # ruby_heredoc =
 135   ruby_string =
 136     ruby_sq_str | ruby_dq_str | ruby_lit_str | ruby_cmd_str | ruby_regex |
 137     ruby_lit_other;
 138
 139   ruby_line := |*
 140     spaces        ${ entity = RUBY_SPACE; } => ruby_ccallback;
 141     ruby_comment;
 142     ruby_string;
 143     newline       ${ entity = NEWLINE;    } => ruby_ccallback;
 144     ^space        ${ entity = RUBY_ANY;   } => ruby_ccallback;
 145   *|;
 146
 147   # Entity machine
 148
 149   action ruby_ecallback {
 150     callback(RUBY_LANG, entity, cint(ts), cint(te));
 151   }
 152
 153   ruby_entity := 'TODO:';
 154 }%%
 155
 156 /************************* Required for every parser *************************/
 157
 158 /* Parses a string buffer with Ruby code.
 159  *
 160  * @param *buffer The string to parse.
 161  * @param length The length of the string to parse.
 162  * @param count Integer flag specifying whether or not to count lines. If yes,
 163  *   uses the Ragel machine optimized for counting. Otherwise uses the Ragel
 164  *   machine optimized for returning entity positions.
 165  * @param *callback Callback function. If count is set, callback is called for
 166  *   every line of code, comment, or blank with 'lcode', 'lcomment', and
 167  *   'lblank' respectively. Otherwise callback is called for each entity found.
 168  */
 169 void parse_ruby(char *buffer, int length, int count,
 170   void (*callback) (const char *lang, const char *entity, int start, int end)
 171   ) {
 172   init
 173
 174   %% write init;
 175   cs = (count) ? ruby_en_ruby_line : ruby_en_ruby_entity;
 176   %% write exec;
 177
 178   // if no newline at EOF; callback contents of last line
 179   if (count) { process_last_line(RUBY_LANG) }
 180 }
 181
 182 #endif
 183
 184 /*****************************************************************************/