Initial Revision
[ohcount] / ext / ohcount_native / glots / c_monoglot.rb
1 require 'monoglot'
2
3 module Ohcount
4         # A generalized Monoglot that can generate the state machine for any C-like language.
5         #
6         # C-like languages support:
7         #   - Single-quoted strings
8         #   - Double-quoted strings
9         #   - Single-line comments
10         #   - Block comments
11         #
12         # When you instantiate a new CMonoglot, you specify the name of the language and provide
13         # regular expressions to define the delimiters for the various string and comment types.
14         class CMonoglot < Monoglot
15
16                 # Instantiate a new monoglot to generate a C-like language parser. Provide the following:
17                 #
18                 # * +language+ - the unique name of the language to be parsed.
19                 # * +line_comment+ - one or more regular expressions that signify the start of a single-line comment
20                 # * +block_comment+ - one or more pairs of regular expressions that signify the beginning and end of
21                 #   a block comment
22                 # * +double_quote+ - set to true if the language supports double-quote characters to indicate string literals
23                 # * +single_quote+ - set to true if the language supports single-quote characters to indicate string literals
24                 # * +options+ - an optional hash of additional options.
25                 #
26                 # Options include:
27                 #
28                 # * +no_escape_dquote+ - By default, the generated state machine will support \" as an escaped double-quote character
29                 #   within a double-quoted string literal. To turn off this feature, specify <tt>:no_escape_dquote => true</tt>.
30                 #
31                 def initialize(language, line_comment, block_comment, double_quote = true, single_quote = false, options = {})
32                         @name = options[:polyglot_name] || language
33
34                         # spit out states
35                         @states = [
36                                 State.new(language, :code, :code),
37                                 State.new(language, :dquote_string, :code),
38                                 State.new(language, :squote_string, :code),
39                                 State.new(language, :line_comment, :comment),
40                                 State.new(language, :block_comment, :comment)
41                         ]
42
43                         @transitions = []
44                         if line_comment
45                                 [ line_comment ].flatten.each_with_index do |line_comment_token, index|
46                                         @transitions << Transition.new(language, line_comment_token, :code, :line_comment, :to, false, index)
47                                 end
48                                 @transitions << Transition.new(language, '\n', :line_comment, :return, :from, false)
49                         end
50
51                         if block_comment && !block_comment.empty?
52                                 block_comment = [ block_comment ] unless block_comment[0].is_a?(Array)
53                                 block_comment.each_with_index do |block_comment_delimiters, index|
54                                         @transitions << Transition.new(language, block_comment_delimiters[0], :code, :block_comment, :to, false, index)
55                                         @transitions << Transition.new(language, block_comment_delimiters[1], :block_comment, :return, :from, false, index)
56                                 end
57                         end
58
59                         if single_quote
60                                 @transitions << Transition.new(language, "'", :code, :squote_string, :to, false)
61                                 @transitions << Transition.new(language, e('\\\\'), :squote_string, :squote_string, :from, true, "ESC_SLASH")
62                                 @transitions << Transition.new(language, e("\\'"), :squote_string, :squote_string, :from, true, "ESC")
63                                 @transitions << Transition.new(language, "'", :squote_string, :return, :from, false)
64                         end
65
66                         if double_quote
67                                 @transitions << Transition.new(language, e('"'), :code, :dquote_string, :to, false)
68                                 unless options[:no_escape_dquote]
69                                         @transitions << Transition.new(language, e('\\\\'), :dquote_string, :dquote_string, :to, true, "ESC_SLASH")
70                                         @transitions << Transition.new(language, e('\\"'), :dquote_string, :dquote_string, :to, true, "ESC")
71                                 end
72                                 @transitions << Transition.new(language, e('"'), :dquote_string, :return, :from, false)
73                         end
74                 end
75
76         end
77 end