msctf: Stub implementation of ITfCategoryMgr.
[wine] / tools / winapi / winapi_parser.pm
1 #
2 # Copyright 1999, 2000, 2001 Patrik Stridvall
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 #
18
19 package winapi_parser;
20
21 use strict;
22
23 use output qw($output);
24 use options qw($options);
25
26 # Defined a couple common regexp tidbits
27 my $CALL_CONVENTION="__cdecl|__stdcall|" .
28                     "__RPC_API|__RPC_STUB|__RPC_USER|RPC_ENTRY|" .
29                     "RPC_VAR_ENTRY|STDMETHODCALLTYPE|NET_API_FUNCTION|" .
30                     "CALLBACK|CDECL|NTAPI|PASCAL|APIENTRY|" .
31                     "VFWAPI|VFWAPIV|WINAPI|WINAPIV|";
32
33 sub parse_c_file($$) {
34     my $file = shift;
35     my $callbacks = shift;
36
37     my $empty_callback = sub { };
38
39     my $c_comment_found_callback = $$callbacks{c_comment_found} || $empty_callback;
40     my $cplusplus_comment_found_callback = $$callbacks{cplusplus_comment_found} || $empty_callback;
41     my $function_create_callback = $$callbacks{function_create} || $empty_callback;
42     my $function_found_callback = $$callbacks{function_found} || $empty_callback;
43     my $type_create_callback = $$callbacks{type_create} || $empty_callback;
44     my $type_found_callback = $$callbacks{type_found} || $empty_callback;
45     my $preprocessor_found_callback = $$callbacks{preprocessor_found} || $empty_callback;
46
47     # global
48     my $debug_channels = [];
49
50     my $in_function = 0;
51     my $function_begin;
52     my $function_end;
53     {
54         my $documentation_line;
55         my $documentation;
56         my $function_line;
57         my $linkage;
58         my $return_type;
59         my $calling_convention;
60         my $internal_name = "";
61         my $argument_types;
62         my $argument_names;
63         my $argument_documentations;
64         my $statements_line;
65         my $statements;
66
67         $function_begin = sub {
68             $documentation_line = shift;
69             $documentation = shift;
70             $function_line = shift;
71             $linkage = shift;
72             $return_type= shift;
73             $calling_convention = shift;
74             $internal_name = shift;
75             $argument_types = shift;
76             $argument_names = shift;
77             $argument_documentations = shift;
78
79             if(defined($argument_names) && defined($argument_types) &&
80                $#$argument_names == -1)
81             {
82                 foreach my $n (0..$#$argument_types) {
83                     push @$argument_names, "";
84                 }
85             }
86
87             if(defined($argument_documentations) &&
88                $#$argument_documentations == -1)
89             {
90                 foreach my $n (0..$#$argument_documentations) {
91                     push @$argument_documentations, "";
92                 }
93             }
94
95             $in_function = 1;
96         };
97
98         $function_end = sub {
99             $statements_line = shift;
100             $statements = shift;
101
102             my $function = &$function_create_callback();
103
104             if(!defined($documentation_line)) {
105                 $documentation_line = 0;
106             }
107
108             $function->file($file);
109             $function->debug_channels([@$debug_channels]);
110             $function->documentation_line($documentation_line);
111             $function->documentation($documentation);
112             $function->function_line($function_line);
113             $function->linkage($linkage);
114             $function->return_type($return_type);
115             $function->calling_convention($calling_convention);
116             $function->internal_name($internal_name);
117             if(defined($argument_types)) {
118                 $function->argument_types([@$argument_types]);
119             }
120             if(defined($argument_names)) {
121                 $function->argument_names([@$argument_names]);
122             }
123             if(defined($argument_documentations)) {
124                 $function->argument_documentations([@$argument_documentations]);
125             }
126             $function->statements_line($statements_line);
127             $function->statements($statements);
128
129             &$function_found_callback($function);
130
131             $in_function = 0;
132         };
133     }
134
135     my $in_type = 0;
136     my $type_begin;
137     my $type_end;
138     {
139         my $type;
140
141         $type_begin = sub {
142             $type = shift;
143             $in_type = 1;
144         };
145
146         $type_end = sub {
147             my $names = shift;
148
149             foreach my $name (@$names) {
150                 if($type =~ /^(?:enum|interface|struct|union)/) {
151                     # $output->write("typedef $type {\n");
152                     # $output->write("} $name;\n");
153                 } else {
154                     # $output->write("typedef $type $name;\n");
155                 }
156             }
157             $in_type = 0;
158         };
159     }
160
161     my %regs_entrypoints;
162     my @comment_lines = ();
163     my @comments = ();
164     my $statements_line;
165     my $statements;
166     my $level = 0;
167     my $extern_c = 0;
168     my $again = 0;
169     my $lookahead = 0;
170     my $lookahead_count = 0;
171
172     print STDERR "Processing file '$file' ... " if $options->verbose;
173     open(IN, "< $file") || die "<internal>: $file: $!\n";
174     local $_ = "";
175     readmore: while($again || defined(my $line = <IN>)) {
176         $_ = "" if !defined($_);
177         if(!$again) {
178             chomp $line;
179
180             if($lookahead) {
181                 $lookahead = 0;
182                 $_ .= "\n" . $line;
183                 $lookahead_count++;
184             } else {
185                 $_ = $line;
186                 $lookahead_count = 0;
187             }
188             $output->write(" $level($lookahead_count): $line\n") if $options->debug >= 2;
189             $output->write("*** $_\n") if $options->debug >= 3;
190         } else {
191             $lookahead_count = 0;
192             $again = 0;
193         }
194
195         # CVS merge conflicts in file?
196         if(/^(<<<<<<<|=======|>>>>>>>)/) {
197             $output->write("$file: merge conflicts in file\n");
198             last;
199         }
200
201         my $prefix="";
202         while ($_ ne "")
203         {
204             if (s/^([^\"\/]+|\"(?:[^\\\"]*|\\.)*\")//)
205             {
206                 $prefix.=$1;
207             }
208             elsif (/^\/\*/)
209             {
210                 # remove C comments
211                 if(s/^(\/\*.*?\*\/)//s) {
212                     my @lines = split(/\n/, $1);
213                     push @comment_lines, $.;
214                     push @comments, $1;
215                     &$c_comment_found_callback($. - $#lines, $., $1);
216                     if($#lines <= 0) {
217                         $_ = "$prefix $_";
218                     } else {
219                         $_ = $prefix . ("\n" x $#lines) . $_;
220                     }
221                     $again = 1;
222                 } else {
223                     $_ = "$prefix$_";
224                     $lookahead = 1;
225                 }
226                 next readmore;
227             }
228             elsif (s/^(\/\/.*)$//)
229             {
230                 # remove C++ comments
231                 &$cplusplus_comment_found_callback($., $1);
232                 $again = 1;
233             }
234             elsif (s/^(.)//)
235             {
236                 $prefix.=$1;
237             }
238         }
239         $_=$prefix;
240
241         # remove preprocessor directives
242         if(s/^\s*\#/\#/s) {
243             if(/^(\#.*?)\\$/s) {
244                 $_ = "$1\n";
245                 $lookahead = 1;
246                 next;
247             } elsif(s/^\#\s*(\w+)((?:\s+(.*?))?\s*)$//s) {
248                 my @lines = split(/\n/, $2);
249                 if($#lines > 0) {
250                     $_ = "\n" x $#lines;
251                 }
252                 if(defined($3)) {
253                     &$preprocessor_found_callback($1, $3);
254                 } else {
255                     &$preprocessor_found_callback($1, "");
256                 }
257                 $again = 1;
258                 next;
259             }
260         }
261
262         # Remove extern "C"
263         if(s/^\s*extern\s+"C"\s+\{//m) {
264             $extern_c = 1;
265             $again = 1;
266             next;
267         }
268
269         my $documentation_line;
270         my $documentation;
271         my @argument_documentations = ();
272         {
273             my $n = $#comments;
274             while($n >= 0 && ($comments[$n] !~ /^\/\*\*/ ||
275                               $comments[$n] =~ /^\/\*\*+\/$/))
276             {
277                 $n--;
278             }
279
280             if(defined($comments[$n]) && $n >= 0) {
281                 my @lines = split(/\n/, $comments[$n]);
282
283                 $documentation_line = $comment_lines[$n] - scalar(@lines) + 1;
284                 $documentation = $comments[$n];
285
286                 for(my $m=$n+1; $m <= $#comments; $m++) {
287                     if($comments[$m] =~ /^\/\*\*+\/$/ ||
288                        $comments[$m] =~ /^\/\*\s*(?:\!)?defined/) # FIXME: Kludge
289                     {
290                         @argument_documentations = ();
291                         next;
292                     }
293                     push @argument_documentations, $comments[$m];
294                 }
295             } else {
296                 $documentation = "";
297             }
298         }
299
300         if($level > 0)
301         {
302             my $line = "";
303             while(/^[^\{\}]/) {
304                 s/^([^\{\}\'\"]*)//s;
305                 $line .= $1;
306                 if(s/^\'//) {
307                     $line .= "\'";
308                     while(/^./ && !s/^\'//) {
309                         s/^([^\'\\]*)//s;
310                         $line .= $1;
311                         if(s/^\\//) {
312                             $line .= "\\";
313                             if(s/^(.)//s) {
314                                 $line .= $1;
315                                 if($1 eq "0") {
316                                     s/^(\d{0,3})//s;
317                                     $line .= $1;
318                                 }
319                             }
320                         }
321                     }
322                     $line .= "\'";
323                 } elsif(s/^\"//) {
324                     $line .= "\"";
325                     while(/^./ && !s/^\"//) {
326                         s/^([^\"\\]*)//s;
327                         $line .= $1;
328                         if(s/^\\//) {
329                             $line .= "\\";
330                             if(s/^(.)//s) {
331                                 $line .= $1;
332                                 if($1 eq "0") {
333                                     s/^(\d{0,3})//s;
334                                     $line .= $1;
335                                 }
336                             }
337                         }
338                     }
339                     $line .= "\"";
340                 }
341             }
342
343             if(s/^\{//) {
344                 $_ = $'; $again = 1;
345                 $line .= "{";
346                 print "+1: \{$_\n" if $options->debug >= 2;
347                 $level++;
348                 $statements .= $line;
349             } elsif(s/^\}//) {
350                 $_ = $'; $again = 1;
351                 $line .= "}" if $level > 1;
352                 print "-1: \}$_\n" if $options->debug >= 2;
353                 $level--;
354                 if($level == -1 && $extern_c) {
355                     $extern_c = 0;
356                     $level = 0;
357                 }
358                 $statements .= $line;
359             } else {
360                 $statements .= "$line\n";
361             }
362
363             if($level == 0) {
364                 if($in_function) {
365                     &$function_end($statements_line, $statements);
366                     $statements = undef;
367                 } elsif($in_type) {
368                     if(/^\s*((?:(?:FAR\s*)?\*\s*(?:RESTRICTED_POINTER\s+)?)?
369                             (?:volatile\s+)?
370                             (?:\w+|WS\(\w+\))\s*
371                             (?:\s*,\s*(?:(?:FAR\s*)?\*+\s*(?:RESTRICTED_POINTER\s+)?)?(?:volatile\s+)?(?:\w+|WS\(\w+\)))*\s*);/sx) {
372                         my @parts = split(/\s*,\s*/, $1);
373                         &$type_end([@parts]);
374                     } elsif(/;/s) {
375                         die "$file: $.: syntax error: '$_'\n";
376                     } else {
377                         $lookahead = 1;
378                     }
379                 }
380             }
381             next;
382         } elsif(/(extern\s+|static\s+)?((interface\s+|struct\s+|union\s+|enum\s+|signed\s+|unsigned\s+)?\w+((\s*\*)+\s*|\s+))
383             (($CALL_CONVENTION)\s+)?
384             (\w+(\(\w+\))?)\s*\((.*?)\)\s*(\{|\;)/sx)
385         {
386             my @lines = split(/\n/, $&);
387             my $function_line = $. - scalar(@lines) + 1;
388
389             $_ = $'; $again = 1;
390
391             if($11 eq "{")  {
392                 $level++;
393             }
394
395             my $linkage = $1;
396             my $return_type = $2;
397             my $calling_convention = $7;
398             my $name = $8;
399             my $arguments = $10;
400
401             if(!defined($linkage)) {
402                 $linkage = "";
403             }
404
405             if(!defined($calling_convention)) {
406                 $calling_convention = "";
407             }
408
409             $linkage =~ s/\s*$//;
410
411             $return_type =~ s/\s*$//;
412             $return_type =~ s/\s*\*\s*/*/g;
413             $return_type =~ s/(\*+)/ $1/g;
414
415             if($regs_entrypoints{$name}) {
416                 $name = $regs_entrypoints{$name};
417             }
418
419             $arguments =~ y/\t\n/  /;
420             $arguments =~ s/^\s*(.*?)\s*$/$1/;
421             if($arguments eq "") { $arguments = "..." }
422
423             my @argument_types;
424             my @argument_names;
425             my @arguments;
426             my $n = 0;
427             while ($arguments =~ s/^((?:[^,\(\)]*|(?:\([^\)]*\))?)+)(?:,|$)// && $1) {
428                 my $argument = $1;
429                 push @arguments, $argument;
430
431                 my $argument_type = "";
432                 my $argument_name = "";
433
434                 $argument =~ s/^\s*(.*?)\s*$/$1/;
435                 # print "  " . ($n + 1) . ": '$argument'\n";
436                 $argument =~ s/^(?:IN OUT|IN|OUT)?\s+//;
437                 $argument =~ s/^(?:const|CONST|GDIPCONST|volatile)?\s+//;
438                 if($argument =~ /^\.\.\.$/) {
439                     $argument_type = "...";
440                     $argument_name = "...";
441                 } elsif($argument =~ /^
442                         ((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)?
443                           (?:short\s+(?=int)|long\s+(?=int))?)?(?:\w+|ElfW\(\w+\)|WS\(\w+\)))\s*
444                         ((?:__RPC_FAR|const|CONST|GDIPCONST|volatile)?\s*(?:\*\s*(?:__RPC_FAR|const|CONST|volatile)?\s*?)*)\s*
445                         (\w*)\s*(\[\])?(?:\s+OPTIONAL)?$/x)
446                 {
447                     $argument_type = $1;
448                     if ($2) {
449                         $argument_type .= " $2";
450                     }
451                     if ($4) {
452                         $argument_type .= "$4";
453                     }
454                     $argument_name = $3;
455                 } elsif ($argument =~ /^
456                         ((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)?
457                           (?:short\s+(?=int)|long\s+(?=int))?)?\w+)\s*
458                         ((?:const|volatile)?\s*(?:\*\s*(?:const|volatile)?\s*?)*)\s*
459                         (?:(?:$CALL_CONVENTION)\s+)?
460                         \(\s*(?:$CALL_CONVENTION)?\s*\*\s*((?:\w+)?)\s*\)\s*
461                         \(\s*(.*?)\s*\)$/x) 
462                 {
463                     my $return_type = $1;
464                     if($2) {
465                         $return_type .= " $2";
466                     }
467                     $argument_name = $3;
468                     my $arguments = $4;
469
470                     $return_type =~ s/\s+/ /g;
471                     $arguments =~ s/\s*,\s*/,/g;
472                     
473                     $argument_type = "$return_type (*)($arguments)";
474                 } elsif ($argument =~ /^
475                         ((?:interface\s+|struct\s+|union\s+|enum\s+|register\s+|(?:signed\s+|unsigned\s+)
476                           (?:short\s+(?=int)|long\s+(?=int))?)?\w+)\s*
477                         ((?:const|volatile)?\s*(?:\*\s*(?:const|volatile)?\s*?)*)\s*
478                         (\w+)\s*\[\s*(.*?)\s*\](?:\[\s*(.*?)\s*\])?$/x)
479                 {
480                     my $return_type = $1;
481                     if($2) {
482                         $return_type .= " $2";
483                     }
484                     $argument_name = $3;
485
486                     $argument_type = "$return_type\[$4\]";
487
488                     if (defined($5)) {
489                         $argument_type .= "\[$5\]";
490                     }
491
492                     # die "$file: $.: syntax error: '$argument_type':'$argument_name'\n";
493                 } else {
494                     die "$file: $.: syntax error: '$argument'\n";
495                 }
496
497                 $argument_type =~ s/\s*(?:const|volatile)\s*/ /g; # Remove const/volatile
498                 $argument_type =~ s/([^\*\(\s])\*/$1 \*/g; # Assure whitespace between non-* and *
499                 $argument_type =~ s/,([^\s])/, $1/g; # Assure whitespace after ,
500                 $argument_type =~ s/\*\s+\*/\*\*/g; # Remove whitespace between * and *
501                 $argument_type =~ s/([\(\[])\s+/$1/g; # Remove whitespace after ( and [
502                 $argument_type =~ s/\s+([\)\]])/$1/g; # Remove whitespace before ] and )
503                 $argument_type =~ s/\s+/ /; # Remove multiple whitespace
504                 $argument_type =~ s/^\s*(.*?)\s*$/$1/; # Remove leading and trailing whitespace
505
506                 $argument_name =~ s/^\s*(.*?)\s*$/$1/; # Remove leading and trailing whitespace
507
508                 $argument_types[$n] = $argument_type;
509                 $argument_names[$n] = $argument_name;
510                 # print "  " . ($n + 1) . ": '" . $argument_types[$n] . "', '" . $argument_names[$n] . "'\n";
511
512                 $n++;
513             }
514             if($#argument_types == 0 && $argument_types[0] =~ /^void$/i) {
515                 $#argument_types = -1;
516                 $#argument_names = -1;
517             }
518
519             if($options->debug) {
520                 print "$file: $return_type $calling_convention $name(" . join(",", @arguments) . ")\n";
521             }
522
523             &$function_begin($documentation_line, $documentation,
524                              $function_line, $linkage, $return_type, $calling_convention, $name,
525                              \@argument_types,\@argument_names,\@argument_documentations);
526             if($level == 0) {
527                 &$function_end(undef, undef);
528             }
529             $statements_line = $.;
530             $statements = "";
531         } elsif(/__ASM_GLOBAL_FUNC\(\s*(.*?)\s*,/s) {
532             my @lines = split(/\n/, $&);
533             my $function_line = $. - scalar(@lines) + 1;
534
535             $_ = $'; $again = 1;
536
537             &$function_begin($documentation_line, $documentation,
538                              $function_line, "", "void", "__asm", $1);
539             &$function_end($., "");
540         } elsif(/DEFINE_THISCALL_WRAPPER\((\S*)\)/s) {
541             my @lines = split(/\n/, $&);
542             my $function_line = $. - scalar(@lines) + 1;
543
544             $_ = $'; $again = 1;
545
546             &$function_begin($documentation_line, $documentation,
547                              $function_line, "", "void", "", "__thiscall_" . $1, \());
548             &$function_end($function_line, "");
549         } elsif(/DEFINE_REGS_ENTRYPOINT_\d+\(\s*(\S*)\s*,\s*([^\s,\)]*).*?\)/s) {
550             $_ = $'; $again = 1;
551             $regs_entrypoints{$2} = $1;
552         } elsif(/DEFAULT_DEBUG_CHANNEL\s*\((\S+)\)/s) {
553             $_ = $'; $again = 1;
554             unshift @$debug_channels, $1;
555         } elsif(/(DEFAULT|DECLARE)_DEBUG_CHANNEL\s*\((\S+)\)/s) {
556             $_ = $'; $again = 1;
557             push @$debug_channels, $1;
558         } elsif(/typedef\s+(enum|interface|struct|union)(?:\s+(\w+))?\s*\{/s) {
559             $_ = $'; $again = 1;
560             $level++;
561             my $type = $1;
562             if(defined($2)) {
563                $type .= " $2";
564             }
565             &$type_begin($type);
566         } elsif(/typedef\s+
567                 ((?:const\s+|CONST\s+|enum\s+|interface\s+|long\s+|signed\s+|short\s+|struct\s+|union\s+|unsigned\s+|volatile\s+)*?)
568                 (\w+)
569                 (?:\s+const|\s+volatile)?
570                 ((?:\s*(?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+\s*|\s+)(?:volatile\s+|DECLSPEC_ALIGN\(\d+\)\s+)?\w+\s*(?:\[[^\]]*\])*
571                 (?:\s*,\s*(?:\s*(?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+\s*|\s+)\w+\s*(?:\[[^\]]*\])?)*)
572                 \s*;/sx)
573         {
574             $_ = $'; $again = 1;
575
576             my $type = "$1 $2";
577
578             my @names;
579             my @parts = split(/\s*,\s*/, $2);
580             foreach my $part (@parts) {
581                 if($part =~ /(?:\s*((?:(?:FAR|__RPC_FAR|TW_HUGE)?\s*)?\*+)\s*|\s+)(\w+)\s*(\[[^\]]*\])?/) {
582                     my $name = $2;
583                     if(defined($1)) {
584                         $name = "$1$2";
585                     }
586                     if(defined($3)) {
587                         $name .= $3;
588                     }
589                     push @names, $name;
590                 }
591             }
592             &$type_begin($type);
593             &$type_end([@names]);
594         } elsif(/typedef\s+
595                 (?:(?:const\s+|enum\s+|interface\s+|long\s+|signed\s+|short\s+|struct\s+|union\s+|unsigned\s+|volatile\s+)*?)
596                 (\w+(?:\s*\*+\s*)?)\s*
597                 (?:(\w+)\s*)?
598                 \((?:(\w+)\s*)?\s*(?:\*\s*(\w+)|_ATL_CATMAPFUNC)\s*\)\s*
599                 (?:\(([^\)]*)\)|\[([^\]]*)\])\s*;/sx)
600         {
601             $_ = $'; $again = 1;
602             my $type;
603             if(defined($2) || defined($3)) {
604                 my $cc = $2 || $3;
605                 if(defined($5)) {
606                     $type = "$1 ($cc *)($5)";
607                 } else {
608                     $type = "$1 ($cc *)[$6]";
609                 }
610             } else {
611                 if(defined($5)) {
612                     $type = "$1 (*)($5)";
613                 } else {
614                     $type = "$1 (*)[$6]";
615                 }
616             }
617             my $name = $4;
618             &$type_begin($type);
619             &$type_end([$name]);
620         } elsif(/typedef[^\{;]*;/s) {
621             $_ = $'; $again = 1;
622             $output->write("$file: $.: can't parse: '$&'\n");
623         } elsif(/typedef[^\{]*\{[^\}]*\}[^;];/s) {
624             $_ = $'; $again = 1;
625             $output->write("$file: $.: can't parse: '$&'\n");
626         } elsif(/\'[^\']*\'/s) {
627             $_ = $'; $again = 1;
628         } elsif(/\"(?:[^\\\"]*|\\.)*\"/s) {
629             $_ = $'; $again = 1;
630         } elsif(/;/s) {
631             $_ = $'; $again = 1;
632         } elsif(/extern\s+"C"\s+{/s) {
633             $_ = $'; $again = 1;
634         } elsif(/\{/s) {
635             $_ = $'; $again = 1;
636             print "+1: $_\n" if $options->debug >= 2;
637             $level++;
638         } else {
639             $lookahead = 1;
640         }
641     }
642     close(IN);
643     print STDERR "done\n" if $options->verbose;
644     $output->write("$file: not at toplevel at end of file\n") unless $level == 0;
645 }
646
647 1;