support Literate Haskell
[ohcount] / ext / ohcount_native / generator.rb
1 require 'state'
2 require 'transition'
3 require 'escape_helper'
4
5 # So that monoglot and polyglot source files can easily require eachother
6 $LOAD_PATH << File.join(File.dirname(__FILE__), 'glots')
7
8 # Load all monoglots and polyglots
9 Dir.glob(File.join(File.dirname(__FILE__), 'glots/*.rb')).each {|f| require f }
10
11 module Ohcount
12         class Generator
13                 include EscapeHelper
14
15                 # This script loads all of the Monoglot and Polyglot files found in
16                 # <tt>ext/ohcount_native/glots</tt>.
17                 #
18                 # These glots are used to generate the C file polyglots.c, which will define
19                 # all of the languages parsers used by ohcount. Do not edit polyglots.c directly.
20                 def generate
21
22                         # Defines all of the monoglots and polyglots known to the parser.
23                         ada = CMonoglot.new("ada",                 '--',             nil,                true,  false)
24                         assembler = CMonoglot.new("assembler",     [';', '!', '//'], [e('/*'), e('*/')], false, false)
25                         awk = CMonoglot.new("awk",                 '#',              nil,                true,  false, {:no_escape_dquote => true})
26                         bat = LineCommentMonoglot.new("bat",        '^\\\\s*(?i)REM(?-i)')
27                         boo = PythonMonoglot.new("boo")
28                         clearsilver = CMonoglot.new("clearsilver", '#',              nil,                true,  true)
29                         c = CMonoglot.new("c",                     '//',             [e('/*'), e('*/')], true,  true)
30                         cpp = CMonoglot.new("cpp",                 '//',             [e('/*'), e('*/')], true,  true)
31                         csharp = CMonoglot.new("csharp",           '//',             [e('/*'), e('*/')], true,  false)
32                         css = CMonoglot.new("css",                  nil,             [e('/*'), e('*/')], false,  false)
33                         dcl = DclMonoglot.new("dcl")
34                         dylan = CMonoglot.new("dylan",             '//',             nil,                true,  false)
35                         documentation = Monoglot.new("documentation", [State.new("documentation", :text, :comment)], [])
36                         c_web = Biglot.new("c_web", documentation, c, [
37                                         ["documentation", :c_inner, :comment],
38                         ], [
39                                         ["@c(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_block'],
40                                         # This doesn't catch it the proper way if there are newlines, so use the next one
41                                         # ["@<.+?@>\\\\s*=", :documentation_text, :c_code, :from, false, 'code_ref'],
42                                         ["@>\\\\s*=", :documentation_text, :c_code, :from, false, 'code_ref'],
43                                         ["@d(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_def'],
44                                         ["@f(?=\\\\s)", :documentation_text, :c_code, :from, false, 'code_fmt'],
45                                         ["@[@'{}+/!;]", :c_code, :c_code, :to, true, 'quoted_stuff'],
46                                         ["@[.:^t<(]", :c_code, :documentation_c_inner, :to, false, 'inline'],
47                                         ["@>;?", :documentation_c_inner, :return, :from, false, 'inline'],
48                                         ["@\\\\*?(?=\\\\s)", :c_code, :return, :to, false, 'section']
49                         ]);
50                         erlang = CMonoglot.new("erlang",           '%%',             nil,                true,  true)
51                         java = CMonoglot.new("java",               '//',             [e('/*'), e('*/')], true,  false)
52                         javascript = CMonoglot.new("javascript",   '//',             [e('/*'), e('*/')], true,  true)
53                         emacslisp = LineCommentMonoglot.new("emacslisp", ";")
54                         fortranfixed = CMonoglot.new("fortranfixed", '^[^ \n]',          nil,                true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
55                         fortranfree  = CMonoglot.new("fortranfree",  '!',            nil,                true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
56                         haskell = CMonoglot.new("haskell",         '--',             [e('{-'), e('-}')], true, false)
57                         lit_haskell = Biglot.new("lit_haskell", documentation, haskell, [
58                                         ["haskell", :oneliner, :code],
59                         ], [
60                                          # techinically, the next one should only work when following a blank ... how to enforce it?
61                                         ['^>', :documentation_text, :haskell_oneliner, :to, false, 'chicken'],
62                                         ['\n', :haskell_oneliner, :return, :from, false, 'chicken'],
63                                         ['^'+e('\begin{code}'), :documentation_text, :haskell_code, :from, false, 'code_block'],
64                                         ['^'+e('\end{code}'), :haskell_code, :return, :to, false, 'code_block']
65                         ]);
66                         lisp = LineCommentMonoglot.new("lisp", ";")
67                         lua = CMonoglot.new("lua",                 '--',             nil,                true,  true)
68                         matlab = CMonoglot.new("matlab",           '#|%',            ['{%', '%}'], false,true)
69                         metafont = LineCommentMonoglot.new("metafont", "%");
70                         metapost = LineCommentMonoglot.new("metapost", "%");
71                         objective_c = CMonoglot.new("objective_c", '//',             [e('/*'), e('*/')], true,  false)
72                         pascal = CMonoglot.new("pascal",           '//',             ['{','}'],          true,  true, {:no_escape_dquote => true, :no_escape_squote => true})
73                         knuth_web = Biglot.new("knuth_web", documentation, pascal, [
74                                         ["documentation", :inner, :comment],
75                                         ["documentation", :incomment, :comment],
76                         ], [
77                                         ["@p(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_block'],
78                                         # This doesn't catch it the proper way if there are newlines, so use the next one
79                                         # ["@<.+?@>\\\\s*=", :documentation_text, :pascal_code, :from, false, 'code_ref'],
80                                         ["@>\\\\s*=", :documentation_text, :pascal_code, :from, false, 'code_ref'],
81                                         ["@d(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_def'],
82                                         ["@f(?=\\\\s)", :documentation_text, :pascal_code, :from, false, 'code_fmt'],
83                                         ["@[@'{}+/!;]", :pascal_code, :pascal_code, :to, true, 'quoted_stuff'],
84                                         ["@[.:^t<]", :pascal_code, :documentation_inner, :to, false, 'inline'],
85                                         ["@>;?", :documentation_inner, :return, :from, false, 'inline'],
86                                         [e('\&{'), :pascal_block_comment, :documentation_incomment, :to, false, 'ampersand'],
87                                         [e('\.{'), :pascal_block_comment, :documentation_incomment, :to, false, 'dot'],
88                                         [e('\\{'), :documentation_incomment, :documentation_incomment, :to, true, 'open'],
89                                         [e('\\}'), :documentation_incomment, :documentation_incomment, :to, true, 'close'],
90                                         [e('}'), :documentation_incomment, :return, :from, false, 'incomment'],
91                                         ["@\\\\*?(?=\\\\s)", :pascal_code, :return, :to, false, 'section']
92                         ]);
93                         perl = CMonoglot.new("perl",               '#',              ['^=\\\\w+', '^=cut[ \t]*\\\\n'],  true,  true)
94                         phplanguage = CMonoglot.new("php",         '//',             [e('/*'), e('*/')], true,  true, {:polyglot_name => 'phplanguage'})
95                         pike = CMonoglot.new("pike",             '//',             [e('/*'), e('*/')], true,  false)
96                         python = PythonMonoglot.new("python")
97                         ruby = CMonoglot.new("ruby",               '#',              nil,                true,  true)
98                         rexx = CMonoglot.new("rexx",               nil,              [e('/*'), e('*/')], true,  true)
99                         scheme = LineCommentMonoglot.new("scheme", ";")
100                         shell = CMonoglot.new("shell",             '#',              nil,                false, false)
101                         smalltalk = CMonoglot.new("smalltalk",            nil,             [e('"'), e('"')], false,  true, options = {:no_escape_squote => true})
102                         sql = CMonoglot.new("sql",                 ['--','//'],      [['{','}'], [e('/*'), e('*/')]], true, true)
103                         tcl = CMonoglot.new("tcl",                 '#',              nil,                true,  false)
104                         vala = CMonoglot.new("vala",               '//',             [e('/*'), e('*/')], true,  false)
105                         visualbasic = CMonoglot.new("visualbasic", '\'',             nil,                true,  false)
106                         xml = XmlMonoglot.new("xml")
107                         xslt = XmlMonoglot.new("xslt")
108                         xmlschema = XmlMonoglot.new("xmlschema")
109                         html = HtmlPolyglot.new("html", javascript, css)
110                         php = HtmlWithPhpPolyglot.new("php", html, phplanguage)
111                         rhtml = RhtmlPolyglot.new("rhtml", html, ruby)
112                         jsp = JspPolyglot.new("jsp", html, java)
113                         groovy = CMonoglot.new("groovy",           '//',             [e('/*'), e('*/')], true,  false)
114                         clearsilver_template = ClearsilverTemplate.new("clearsilver_template", html, clearsilver)
115                         dmd = DMonoglot.new('dmd')
116                         tex = CMonoglot.new("tex",             '%',              nil,                false, false)
117                         metapost_with_tex = Biglot.new('metapost_with_tex', metapost, tex, [], [
118                                 ["verbatimtex", :metapost_code, :tex_code, :from, false, 'verbatim'],
119                                 ["btex", :metapost_code, :tex_code, :from, false, 'btex'],
120                                 ["etex", :tex_code, :return, :to, false, 'etex']
121                         ]);
122                         polyglots = [
123                                 ada ,
124                                 assembler ,
125                                 awk ,
126                                 bat ,
127                                 boo ,
128                                 clearsilver ,
129                                 c ,
130                                 cpp ,
131                                 csharp ,
132                                 css ,
133                                 dcl,
134                                 dylan ,
135                                 documentation,
136                                 erlang ,
137                                 groovy ,
138                                 java ,
139                                 javascript ,
140                                 emacslisp ,
141                                 fortranfixed ,
142                                 fortranfree ,
143                                 haskell,
144                                 lisp ,
145                                 lua ,
146                                 matlab,
147                                 metafont,
148                                 metapost,
149                                 objective_c,
150                                 pascal ,
151                                 perl ,
152                                 pike ,
153                                 phplanguage ,
154                                 python ,
155                                 ruby ,
156                                 rexx ,
157                                 scheme ,
158                                 shell ,
159                                 smalltalk ,
160                                 sql ,
161                                 tcl ,
162                                 vala ,
163                                 visualbasic ,
164                                 xml ,
165                                 xmlschema ,
166                                 xslt ,
167                                 dmd ,
168
169                                 # poly
170                                 html,
171                                 php,
172                                 rhtml,
173                                 jsp,
174                                 c_web,
175                                 knuth_web,
176                                 lit_haskell,
177                                 clearsilver_template,
178                                 tex,
179                                 metapost_with_tex
180                         ]
181                         File.open("polyglots.c", "w") do |io|
182
183                                 # spit out the preamble to our source code
184                                 io.puts <<PREAMBLE
185 /*
186  * polyglots.c
187  * Ohcount
188  *
189  * GENERATED FILE **DO NOT EDIT**
190  *
191  */
192
193 #define __polyglots_c__
194 #include "common.h"
195
196 #define RETURN (State *)NULL
197 PREAMBLE
198
199                                 # spits out the actual POLYGLOTS array, which contains a reference to all the polyglots we define in our library
200                                 polyglots.each do |p|
201                                         p.print(io)
202                                 end
203                                 io.puts "\n"
204                                 Monoglot::print_banner(io, "POLYGLOTS")
205                                 io.puts "Polyglot *POLYGLOTS[] = {"
206                                 polyglots.each do |p|
207                                         io.puts "       &#{ p.definition },"
208                                 end
209                                 io.puts "       NULL\n};"
210                         end
211                 end
212         end
213 end
214
215 Ohcount::Generator.new.generate