support CWEB (literate programming for C)
[ohcount] / ext / ohcount_native / generator.rb
1 require 'state'
2 require 'transition'
3 require 'escape_helper'
4
5 # So that monoglot and polyglot source files can easily require eachother
6 $LOAD_PATH << File.join(File.dirname(__FILE__), 'glots')
7
8 # Load all monoglots and polyglots
9 Dir.glob(File.join(File.dirname(__FILE__), 'glots/*.rb')).each {|f| require f }
10
11 module Ohcount
12         class Generator
13                 include EscapeHelper
14
15                 # This script loads all of the Monoglot and Polyglot files found in
16                 # <tt>ext/ohcount_native/glots</tt>.
17                 #
18                 # These glots are used to generate the C file polyglots.c, which will define
19                 # all of the languages parsers used by ohcount. Do not edit polyglots.c directly.
20                 def generate
21
22                         # Defines all of the monoglots and polyglots known to the parser.
23                         ada = CMonoglot.new("ada",                 '--',             nil,                true,  false)
24                         assembler = CMonoglot.new("assembler",     [';', '!', '//'], [e('/*'), e('*/')], false, false)
25                         awk = CMonoglot.new("awk",                 '#',              nil,                true,  false, {:no_escape_dquote => true})
26                         bat = LineCommentMonoglot.new("bat",        '^\\\\s*(?i)REM(?-i)')
27                         boo = PythonMonoglot.new("boo")
28                         clearsilver = CMonoglot.new("clearsilver", '#',              nil,                true,  true)
29                         c = CMonoglot.new("c",                     '//',             [e('/*'), e('*/')], true,  true)
30                         cpp = CMonoglot.new("cpp",                 '//',             [e('/*'), e('*/')], true,  true)
31                         csharp = CMonoglot.new("csharp",           '//',             [e('/*'), e('*/')], true,  false)
32                         css = CMonoglot.new("css",                  nil,             [e('/*'), e('*/')], false,  false)
33                         dcl = DclMonoglot.new("dcl")
34                         dylan = CMonoglot.new("dylan",             '//',             nil,                true,  false)
35                         documentation = Monoglot.new("documentation", [State.new("documentation", :text, :comment)], [])
36                         c_web = Biglot.new("c_web", documentation, c, [
37                                         ["documentation", :c_inner, :comment],
38                         ], [
39                                         ["@c(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_block'],
40                                         # This doesn't catch it the proper way if there are newlines, so use the next one
41                                         # ["@<.+?@>\\\\s*=", :documentation_text, :c_code, :from, false, 'code_ref'],
42                                         ["@>\\\\s*=", :documentation_text, :c_code, :from, false, 'code_ref'],
43                                         ["@d(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_def'],
44                                         ["@f(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_fmt'],
45                                         ["@[@'{}+/!;]", :c_code, :c_code, :to, true, 'quoted_stuff'],
46                                         ["@[.:^t<(]", :c_code, :documentation_c_inner, :to, false, 'inline'],
47                                         ["@>;?", :documentation_c_inner, :return, :from, false, 'inline'],
48                                         ["@\\\\*?(?=\\\\s)", :c_code, :return, :to, false, 'section']
49                         ]);
50                         erlang = CMonoglot.new("erlang",           '%%',             nil,                true,  true)
51                         java = CMonoglot.new("java",               '//',             [e('/*'), e('*/')], true,  false)
52                         javascript = CMonoglot.new("javascript",   '//',             [e('/*'), e('*/')], true,  true)
53                         emacslisp = LineCommentMonoglot.new("emacslisp", ";")
54                         fortranfixed = CMonoglot.new("fortranfixed", '^[^ \n]',          nil,                true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
55                         fortranfree  = CMonoglot.new("fortranfree",  '!',            nil,                true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
56                         haskell = CMonoglot.new("haskell",         '--',             [e('{-'), e('-}')], true, false)
57                         lisp = LineCommentMonoglot.new("lisp", ";")
58                         lua = CMonoglot.new("lua",                 '--',             nil,                true,  true)
59                         matlab = CMonoglot.new("matlab",           '#|%',            ['{%', '%}'], false,true)
60                         metafont = LineCommentMonoglot.new("metafont", "%");
61                         metapost = LineCommentMonoglot.new("metapost", "%");
62                         objective_c = CMonoglot.new("objective_c", '//',             [e('/*'), e('*/')], true,  false)
63                         pascal = CMonoglot.new("pascal",           '//',             ['{','}'],          true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
64                         knuth_web = Biglot.new("knuth_web", documentation, pascal, [
65                                         ["documentation", :inner, :comment],
66                                         ["documentation", :incomment, :comment],
67                         ], [
68                                         ["@p(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_block'],
69                                         # This doesn't catch it the proper way if there are newlines, so use the next one
70                                         # ["@<.+?@>\\\\s*=", :documentation_text, :pascal_code, :from, false, 'code_ref'],
71                                         ["@>\\\\s*=", :documentation_text, :pascal_code, :from, false, 'code_ref'],
72                                         ["@d(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_def'],
73                                         ["@f(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_fmt'],
74                                         ["@[@'{}+/!;]", :pascal_code, :pascal_code, :to, true, 'quoted_stuff'],
75                                         ["@[.:^t<]", :pascal_code, :documentation_inner, :to, false, 'inline'],
76                                         ["@>;?", :documentation_inner, :return, :from, false, 'inline'],
77                                         [e('\&{'), :pascal_block_comment, :documentation_incomment, :to, false, 'ampersand'],
78                                         [e('\.{'), :pascal_block_comment, :documentation_incomment, :to, false, 'dot'],
79                                         [e('\\{'), :documentation_incomment, :documentation_incomment, :to, true, 'open'],
80                                         [e('\\}'), :documentation_incomment, :documentation_incomment, :to, true, 'close'],
81                                         [e('}'), :documentation_incomment, :return, :from, false, 'incomment'],
82                                         ["@\\\\*?(?=\\\\s)", :pascal_code, :return, :to, false, 'section']
83                         ]);
84                         perl = CMonoglot.new("perl",               '#',              ['^=\\\\w+', '^=cut[ \t]*\\\\n'],  true,  true)
85                         phplanguage = CMonoglot.new("php",         '//',             [e('/*'), e('*/')], true,  true, {:polyglot_name => 'phplanguage'})
86                         pike = CMonoglot.new("pike",             '//',             [e('/*'), e('*/')], true,  false)
87                         python = PythonMonoglot.new("python")
88                         ruby = CMonoglot.new("ruby",               '#',              nil,                true,  true)
89                         rexx = CMonoglot.new("rexx",               nil,              [e('/*'), e('*/')], true,  true)
90                         scheme = LineCommentMonoglot.new("scheme", ";")
91                         shell = CMonoglot.new("shell",             '#',              nil,                false, false)
92                         smalltalk = CMonoglot.new("smalltalk",            nil,             [e('"'), e('"')], false,  true, options = {:no_escape_squote => true})
93                         sql = CMonoglot.new("sql",                 ['--','//'],      [['{','}'], [e('/*'), e('*/')]], true, true)
94                         tcl = CMonoglot.new("tcl",                 '#',              nil,                true,  false)
95                         vala = CMonoglot.new("vala",               '//',             [e('/*'), e('*/')], true,  false)
96                         visualbasic = CMonoglot.new("visualbasic", '\'',             nil,                true,  false)
97                         xml = XmlMonoglot.new("xml")
98                         xslt = XmlMonoglot.new("xslt")
99                         xmlschema = XmlMonoglot.new("xmlschema")
100                         html = HtmlPolyglot.new("html", javascript, css)
101                         php = HtmlWithPhpPolyglot.new("php", html, phplanguage)
102                         rhtml = RhtmlPolyglot.new("rhtml", html, ruby)
103                         jsp = JspPolyglot.new("jsp", html, java)
104                         groovy = CMonoglot.new("groovy",           '//',             [e('/*'), e('*/')], true,  false)
105                         clearsilver_template = ClearsilverTemplate.new("clearsilver_template", html, clearsilver)
106                         dmd = DMonoglot.new('dmd')
107                         tex = CMonoglot.new("tex",             '%',              nil,                false, false)
108                         metapost_with_tex = Biglot.new('metapost_with_tex', metapost, tex, [], [
109                                 ["verbatimtex", :metapost_code, :tex_code, :from, false, 'verbatim'],
110                                 ["btex", :metapost_code, :tex_code, :from, false, 'btex'],
111                                 ["etex", :tex_code, :return, :to, false, 'etex']
112                         ]);
113                         polyglots = [
114                                 ada ,
115                                 assembler ,
116                                 awk ,
117                                 bat ,
118                                 boo ,
119                                 clearsilver ,
120                                 c ,
121                                 cpp ,
122                                 csharp ,
123                                 css ,
124                                 dcl,
125                                 dylan ,
126                                 documentation,
127                                 erlang ,
128                                 groovy ,
129                                 java ,
130                                 javascript ,
131                                 emacslisp ,
132                                 fortranfixed ,
133                                 fortranfree ,
134                                 haskell,
135                                 lisp ,
136                                 lua ,
137                                 matlab,
138                                 metafont,
139                                 metapost,
140                                 objective_c,
141                                 pascal ,
142                                 perl ,
143                                 pike ,
144                                 phplanguage ,
145                                 python ,
146                                 ruby ,
147                                 rexx ,
148                                 scheme ,
149                                 shell ,
150                                 smalltalk ,
151                                 sql ,
152                                 tcl ,
153                                 vala ,
154                                 visualbasic ,
155                                 xml ,
156                                 xmlschema ,
157                                 xslt ,
158                                 dmd ,
159
160                                 # poly
161                                 html,
162                                 php,
163                                 rhtml,
164                                 jsp,
165                                 c_web,
166                                 knuth_web,
167                                 clearsilver_template,
168                                 tex,
169                                 metapost_with_tex
170                         ]
171                         File.open("polyglots.c", "w") do |io|
172
173                                 # spit out the preamble to our source code
174                                 io.puts <<PREAMBLE
175 /*
176  * polyglots.c
177  * Ohcount
178  *
179  * GENERATED FILE **DO NOT EDIT**
180  *
181  */
182
183 #define __polyglots_c__
184 #include "common.h"
185
186 #define RETURN (State *)NULL
187 PREAMBLE
188
189                                 # spits out the actual POLYGLOTS array, which contains a reference to all the polyglots we define in our library
190                                 polyglots.each do |p|
191                                         p.print(io)
192                                 end
193                                 io.puts "\n"
194                                 Monoglot::print_banner(io, "POLYGLOTS")
195                                 io.puts "Polyglot *POLYGLOTS[] = {"
196                                 polyglots.each do |p|
197                                         io.puts "       &#{ p.definition },"
198                                 end
199                                 io.puts "       NULL\n};"
200                         end
201                 end
202         end
203 end
204
205 Ohcount::Generator.new.generate