3 * Copyright 1998-2000 Bertho A. Stultiens (BS)
5 * 21-May-2000 BS - Fixed the ident requirement of resource names
6 * which can be keywords.
7 * 30-Apr-2000 BS - Reintegration into the wine-tree
8 * 11-Jan-2000 BS - Very drastic cleanup because we don't have a
9 * preprocessor in here anymore.
10 * 02-Jan-2000 BS - Removed the preprocessor code
11 * 23-Dec-1999 BS - Removed the copyright for Martin von Loewis.
12 * There is really nothing left of his code in
14 * 20-Jun-1998 BS - Changed the filename conversion. Filenames are
15 * case-sensitive inder *nix, but not under dos.
16 * default behaviour is to convert to lower case.
17 * - All backslashes are converted to forward and
18 * both single and double slash is recognized as
20 * - Fixed a bug in 'yywf' case that prevented
21 * double quoted names to be scanned propperly.
23 * 19-May-1998 BS - Started to build a preprocessor.
24 * - Changed keyword processing completely to
27 * 20-Apr-1998 BS - Added ';' comment stripping
29 * 17-Apr-1998 BS - Made the win32 keywords optional when compiling in
32 * 15-Apr-1998 BS - Changed string handling to include escapes
33 * - Added unicode string handling (no codepage
34 * translation though).
35 * - 'Borrowed' the main idea of string scanning from
36 * the flex manual pages.
37 * - Added conditional handling of scanning depending
38 * on the state of the parser. This was mainly required
39 * to distinguish a file to load or raw data that
40 * follows. MS's definition of filenames is rather
41 * complex... It can be unquoted or double quoted. If
42 * double quoted, then the '\\' char is not automatically
43 * escaped according to Borland's rc compiler, but it
44 * accepts both "\\path\\file.rc" and "\path\file.rc".
45 * This makes life very hard! I go for the escaped
46 * version, as this seems to be the documented way...
47 * - Single quoted strings are now parsed and converted
49 * - Added comment stripping. The implementation is
50 * 'borrowed' from the flex manpages.
51 * - Rebuild string processing so that it may contain
55 /* Exclusive string handling */
57 /* Exclusive unicode string handling */
59 /* Exclusive rcdata single quoted data handling */
61 /* Exclusive comment eating... */
63 /* Set when stripping c-junk */
70 %option never-interactive
72 /* Some shortcut definitions */
74 cident [a-zA-Z_][0-9a-zA-Z_]*
98 #define YY_NO_TOP_STATE
100 /* Always update the current character position within a line */
101 #define YY_USER_ACTION char_number+=yyleng; wanted_id = want_id; want_id = 0;
103 static void addcchar(char c);
104 static void addwchar(short s);
105 static string_t *get_buffered_cstring(void);
106 static string_t *get_buffered_wstring(void);
107 static string_t *make_string(char *s);
109 static char *cbuffer; /* Buffers for string collection */
111 static int cbufalloc = 0;
112 static short *wbuffer;
114 static int wbufalloc = 0;
115 static int stripslevel = 0; /* Count {} during pp_strips/pp_stripe mode */
116 static int stripplevel = 0; /* Count () during pp_strips mode */
117 static int cjunk_tagline; /* Where did we start stripping (helps error tracking) */
120 * This one is a bit tricky.
121 * We set 'want_id' in the parser to get the first
122 * identifier we get across in the scanner, but we
123 * also want it to be reset at nearly any token we
124 * see. Exceptions are:
129 * The scanner will automatically reset 'want_id'
130 * after *each* scanner reduction and puts is value
131 * into the var below. In this way we can see the
132 * state after the YY_RULE_SETUP (i.e. the user action;
133 * see above) and don't have to worry too much when
134 * it needs to be reset.
136 static int wanted_id = 0;
137 static int save_wanted_id; /* To save across comment reductions */
147 static struct keyword keywords[] = {
148 { "ACCELERATORS", tACCELERATORS, 0, 0, 0},
149 { "ALT", tALT, 0, 0, 0},
150 { "ASCII", tASCII, 0, 0, 0},
151 { "AUTO3STATE", tAUTO3STATE, 1, 0, 0},
152 { "AUTOCHECKBOX", tAUTOCHECKBOX, 1, 0, 0},
153 { "AUTORADIOBUTTON", tAUTORADIOBUTTON, 1, 0, 0},
154 { "BEGIN", tBEGIN, 0, 0, 0},
155 { "BITMAP", tBITMAP, 0, 0, 0},
156 { "BLOCK", tBLOCK, 0, 0, 0},
157 { "BUTTON", tBUTTON, 1, 0, 0},
158 { "CAPTION", tCAPTION, 0, 0, 0},
159 { "CHARACTERISTICS", tCHARACTERISTICS, 1, 0, 0},
160 { "CHECKBOX", tCHECKBOX, 0, 0, 0},
161 { "CHECKED", tCHECKED, 0, 0, 0},
162 { "CLASS", tCLASS, 0, 0, 0},
163 { "COMBOBOX", tCOMBOBOX, 0, 0, 0},
164 { "CONTROL", tCONTROL, 0, 0, 0},
165 { "CTEXT", tCTEXT, 0, 0, 0},
166 { "CURSOR", tCURSOR, 0, 0, 0},
167 { "DEFPUSHBUTTON", tDEFPUSHBUTTON, 0, 0, 0},
168 { "DIALOG", tDIALOG, 0, 0, 0},
169 { "DIALOGEX", tDIALOGEX, 1, 0, 0},
170 { "DISCARDABLE", tDISCARDABLE, 0, 0, 0},
171 { "DLGINIT", tDLGINIT, 0, 0, 0},
172 { "EDITTEXT", tEDITTEXT, 0, 0, 0},
173 { "END", tEND, 0, 0, 0},
174 { "enum", tENUM, 0, 1, 1},
175 { "EXSTYLE", tEXSTYLE, 0, 0, 0},
176 { "extern", tEXTERN, 0, 1, 1},
177 { "FILEFLAGS", tFILEFLAGS, 0, 0, 0},
178 { "FILEFLAGSMASK", tFILEFLAGSMASK, 0, 0, 0},
179 { "FILEOS", tFILEOS, 0, 0, 0},
180 { "FILESUBTYPE", tFILESUBTYPE, 0, 0, 0},
181 { "FILETYPE", tFILETYPE, 0, 0, 0},
182 { "FILEVERSION", tFILEVERSION, 0, 0, 0},
183 { "FIXED", tFIXED, 0, 0, 0},
184 { "FONT", tFONT, 0, 0, 0},
185 { "FONTDIR", tFONTDIR, 0, 0, 0}, /* This is a Borland BRC extension */
186 { "GRAYED", tGRAYED, 0, 0, 0},
187 { "GROUPBOX", tGROUPBOX, 0, 0, 0},
188 { "HELP", tHELP, 0, 0, 0},
189 { "ICON", tICON, 0, 0, 0},
190 { "IMPURE", tIMPURE, 0, 0, 0},
191 { "INACTIVE", tINACTIVE, 0, 0, 0},
192 { "inline", tINLINE, 0, 1, 1},
193 { "LANGUAGE", tLANGUAGE, 1, 0, 1},
194 { "LISTBOX", tLISTBOX, 0, 0, 0},
195 { "LOADONCALL", tLOADONCALL, 0, 0, 0},
196 { "LTEXT", tLTEXT, 0, 0, 0},
197 { "MENU", tMENU, 0, 0, 0},
198 { "MENUBARBREAK", tMENUBARBREAK, 0, 0, 0},
199 { "MENUBREAK", tMENUBREAK, 0, 0, 0},
200 { "MENUEX", tMENUEX, 1, 0, 0},
201 { "MENUITEM", tMENUITEM, 0, 0, 0},
202 { "MESSAGETABLE", tMESSAGETABLE, 1, 0, 0},
203 { "MOVEABLE", tMOVEABLE, 0, 0, 0},
204 { "NOINVERT", tNOINVERT, 0, 0, 0},
205 { "NOT", tNOT, 0, 0, 0},
206 { "POPUP", tPOPUP, 0, 0, 0},
207 { "PRELOAD", tPRELOAD, 0, 0, 0},
208 { "PRODUCTVERSION", tPRODUCTVERSION, 0, 0, 0},
209 { "PURE", tPURE, 0, 0, 0},
210 { "PUSHBUTTON", tPUSHBUTTON, 0, 0, 0},
211 { "RADIOBUTTON", tRADIOBUTTON, 0, 0, 0},
212 { "RCDATA", tRCDATA, 0, 0, 0},
213 { "RTEXT", tRTEXT, 0, 0, 0},
214 { "SCROLLBAR", tSCROLLBAR, 0, 0, 0},
215 { "SEPARATOR", tSEPARATOR, 0, 0, 0},
216 { "SHIFT", tSHIFT, 0, 0, 0},
217 { "STATE3", tSTATE3, 1, 0, 0},
218 { "static", tSTATIC, 0, 1, 1},
219 { "STRING", tSTRING, 0, 0, 0},
220 { "STRINGTABLE", tSTRINGTABLE, 0, 0, 1},
221 { "struct", tSTRUCT, 0, 1, 1},
222 { "STYLE", tSTYLE, 0, 0, 0},
223 { "TOOLBAR", tTOOLBAR, 1, 0, 0},
224 { "typedef", tTYPEDEF, 0, 1, 1},
225 { "VALUE", tVALUE, 0, 0, 0},
226 { "VERSION", tVERSION, 1, 0, 0},
227 { "VERSIONINFO", tVERSIONINFO, 0, 0, 0},
228 { "VIRTKEY", tVIRTKEY, 0, 0, 0}
231 #define NKEYWORDS (sizeof(keywords)/sizeof(keywords[0]))
232 #define KWP(p) ((struct keyword *)(p))
233 static int kw_cmp_func(const void *s1, const void *s2)
236 ret = strcasecmp(KWP(s1)->keyword, KWP(s2)->keyword);
237 if(!ret && (KWP(s1)->needcase || KWP(s2)->needcase))
238 return strcmp(KWP(s1)->keyword, KWP(s2)->keyword);
245 static struct keyword *iskeyword(char *kw)
253 /* Make sure that it is sorted for bsearsh */
254 static int sorted = 0;
257 qsort(keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
263 kwp = bsearch(&key, keywords, NKEYWORDS, sizeof(keywords[0]), kw_cmp_func);
267 for(i = 0; i < NKEYWORDS; i++)
269 if(!kw_cmp_func(&key, &keywords[i]))
279 if(kwp == NULL || (kwp->isextension && !extensions))
288 **************************************************************************
289 * The flexer starts here
290 **************************************************************************
294 * Strip everything until a ';' taking
295 * into account braces {} for structures,
298 <pp_strips>\{ stripslevel++;
299 <pp_strips>\} stripslevel--;
300 <pp_strips>; if(!stripslevel) yy_pop_state();
301 <pp_strips>\/[^*\n] ; /* To catch comments */
302 <pp_strips>[^\{\};\n#/]* ; /* Ignore rest */
303 <pp_strips>\n line_number++; char_number = 1;
305 <pp_stripp>\( stripplevel++;
311 yy_push_state(pp_stripp_final);
314 <pp_stripp>\/[^*\n] ; /* To catch comments */
315 <pp_stripp>[^\(\);\n#/]* ; /* Ignore rest */
316 <pp_stripp>\n line_number++; char_number = 1;
318 <pp_stripp_final>{ws}* ; /* Ignore */
319 <pp_stripp_final>; yy_pop_state(); /* Kill the semicolon */
320 <pp_stripp_final>\n line_number++; char_number = 1; yy_pop_state();
321 <pp_stripp_final>. yyless(0); yy_pop_state();
326 [0-9]+[lL]? { yylval.num = strtoul(yytext, 0, 10); return toupper(yytext[yyleng-1]) == 'L' ? tLNUMBER : tNUMBER; }
327 0[xX][0-9A-Fa-f]+[lL]? { yylval.num = strtoul(yytext, 0, 16); return toupper(yytext[yyleng-1]) == 'L' ? tLNUMBER : tNUMBER; }
328 0[oO][0-7]+[lL]? { yylval.num = strtoul(yytext+2, 0, 8); return toupper(yytext[yyleng-1]) == 'L' ? tLNUMBER : tNUMBER; }
331 * The next to rules scan identifiers and filenames.
332 * This is achieved by using the priority ruling
333 * of the scanner where a '.' is valid in a filename
334 * and *only* in a filename. In this case, the second
335 * rule will be reduced because it is longer.
338 struct keyword *tok = iskeyword(yytext);
342 if(tok->token == tCLASS && !strcmp(yytext, "class"))
344 else if(wanted_id && !tok->alwayskw)
346 yylval.str = make_string(yytext);
354 yylval.str = make_string(yytext);
358 [A-Za-z_0-9.]+ yylval.str = make_string(yytext); return tFILENAME;
361 * Wide string scanning
364 yy_push_state(yylstr);
367 yywarning("16bit resource contains unicode strings\n");
372 yylval.str = get_buffered_wstring();
375 <yylstr>\\[0-7]{1,6} { /* octal escape sequence */
377 result = strtol(yytext+1, 0, 8);
378 if ( result > 0xffff )
379 yyerror("Character constant out of range");
380 addwchar((short)result);
382 <yylstr>\\x[0-9a-fA-F]{4} { /* hex escape sequence */
384 result = strtol(yytext+2, 0, 16);
385 addwchar((short)result);
387 <yylstr>\\x[0-9a-fA-F]{1,3} { yyerror("Invalid hex escape sequence '%s'", yytext); }
389 <yylstr>\\[0-9]+ yyerror("Bad escape secuence");
390 <yylstr>\\a addwchar('\a');
391 <yylstr>\\b addwchar('\b');
392 <yylstr>\\f addwchar('\f');
393 <yylstr>\\n addwchar('\n');
394 <yylstr>\\r addwchar('\r');
395 <yylstr>\\t addwchar('\t');
396 <yylstr>\\v addwchar('\v');
397 <yylstr>\\. if(yytext[1] != '\n') addwchar(yytext[1]);
398 <yylstr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
399 <yylstr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
400 <yylstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
403 while(*yptr) /* FIXME: codepage translation */
404 addwchar(*yptr++ & 0xff);
406 <yylstr>\n yyerror("Unterminated string");
409 * Normal string scanning
411 \" yy_push_state(yystr); cbufidx = 0;
415 yylval.str = get_buffered_cstring();
418 <yystr>\\[0-7]{1,3} { /* octal escape sequence */
420 result = strtol(yytext+1, 0, 8);
422 yyerror("Character constant out of range");
423 addcchar((char)result);
425 <yystr>\\x[0-9a-fA-F]{2} { /* hex escape sequence */
427 result = strtol(yytext+2, 0, 16);
428 addcchar((char)result);
430 <yystr>\\x[0-9a-fA-F] { yyerror("Invalid hex escape sequence '%s'", yytext); }
432 <yystr>\\[0-9]+ yyerror("Bad escape secuence");
433 <yystr>\\a addcchar('\a');
434 <yystr>\\b addcchar('\b');
435 <yystr>\\f addcchar('\f');
436 <yystr>\\n addcchar('\n');
437 <yystr>\\r addcchar('\r');
438 <yystr>\\t addcchar('\t');
439 <yystr>\\v addcchar('\v');
440 <yystr>\\. if(yytext[1] != '\n') addcchar(yytext[1]);
446 <yystr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
447 <yystr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
448 <yystr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
449 <yystr>\n yyerror("Unterminated string");
454 \' yy_push_state(yyrcd); cbufidx = 0;
457 yylval.raw = new_raw_data();
458 yylval.raw->size = cbufidx;
459 yylval.raw->data = xmalloc(yylval.raw->size);
460 memcpy(yylval.raw->data, cbuffer, yylval.raw->size);
463 <yyrcd>[0-9a-fA-F]{2} {
465 result = strtol(yytext, 0, 16);
466 addcchar((char)result);
468 <yyrcd>{ws}+ ; /* Ignore space */
469 <yyrcd>\n line_number++; char_number = 1;
470 <yyrcd>. yyerror("Malformed data-line");
474 * Should never occur after preprocessing
476 <INITIAL,pp_stripp,pp_strips>"/*" {
477 yy_push_state(comment);
478 save_wanted_id = wanted_id;
480 yywarning("Found comments after preprocessing, please report");
483 <comment>"*"+[^*/\n]* ;
484 <comment>\n line_number++; char_number = 1;
485 <comment>"*"+"/" yy_pop_state(); want_id = save_wanted_id;
487 ;[^\n]* want_id = wanted_id; /* not really comment, but left-over c-junk */
488 "//"[^\n]* want_id = wanted_id; if(!no_preprocess) yywarning("Found comments after preprocessing, please report");
500 {ws}+ want_id = wanted_id; /* Eat whitespace */
502 <INITIAL>. return yytext[0];
505 if(YY_START == pp_strips || YY_START == pp_stripe || YY_START == pp_stripp || YY_START == pp_stripp_final)
506 yyerror("Unexpected end of file during c-junk scanning (started at %d)", cjunk_tagline);
512 /* Catch all rule to find any unmatched text */
518 yywarning("Unmatched text '%c' (0x%02x) YY_START=%d stripslevel=%d",
519 isprint(*yytext) ? *yytext : '.', *yytext, YY_START,stripslevel);
528 if(bufferstackidx > 0)
537 /* These dup functions copy the enclosed '\0' from
538 * the resource string.
540 static void addcchar(char c)
542 if(cbufidx >= cbufalloc)
545 cbuffer = xrealloc(cbuffer, cbufalloc * sizeof(cbuffer[0]));
546 if(cbufalloc > 65536)
547 yywarning("Reallocating string buffer larger than 64kB");
549 cbuffer[cbufidx++] = c;
552 static void addwchar(short s)
554 if(wbufidx >= wbufalloc)
557 wbuffer = xrealloc(wbuffer, wbufalloc * sizeof(wbuffer[0]));
558 if(wbufalloc > 65536)
559 yywarning("Reallocating wide string buffer larger than 64kB");
563 * BS 08-Aug-1999 FIXME: The '& 0xff' is probably a bug, but I have
564 * not experienced it yet and I seem to remember that this was for
565 * a reason. But, as so many things you tend to forget why.
566 * I guess that there were problems due to the sign extension of
567 * shorts WRT chars (e.g. 0x80 becomes 0xff80 instead of 0x0080).
568 * This should then be fixed in the lexer calling the function.
570 wbuffer[wbufidx++] = (short)(s & 0xff);
573 static string_t *get_buffered_cstring(void)
575 string_t *str = new_string();
577 str->type = str_char;
578 str->str.cstr = (char *)xmalloc(cbufidx+1);
579 memcpy(str->str.cstr, cbuffer, cbufidx);
580 str->str.cstr[cbufidx] = '\0';
584 static string_t *get_buffered_wstring(void)
586 string_t *str = new_string();
588 str->type = str_unicode;
589 str->str.wstr = (short *)xmalloc(2*(wbufidx+1));
590 memcpy(str->str.wstr, wbuffer, wbufidx);
591 str->str.wstr[wbufidx] = 0;
595 static string_t *make_string(char *s)
597 string_t *str = new_string();
598 str->size = strlen(s);
599 str->type = str_char;
600 str->str.cstr = (char *)xmalloc(str->size+1);
601 memcpy(str->str.cstr, s, str->size+1);
605 /* Called from the parser to kill c-junk */
606 void strip_extern(void)
608 cjunk_tagline = line_number;
609 yy_push_state(pp_stripe);
612 void strip_til_semicolon(void)
614 cjunk_tagline = line_number;
615 yy_push_state(pp_strips);
618 void strip_til_parenthesis(void)
620 cjunk_tagline = line_number;
621 stripplevel = 1; /* One scanned already */
622 yy_push_state(pp_stripp);