widl: Use type_get_type to determine the types of types during typelib generation.
[wine] / tools / wmc / mcl.c
1 /*
2  * Wine Message Compiler lexical scanner
3  *
4  * Copyright 2000 Bertho A. Stultiens (BS)
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20
21 #include "config.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <ctype.h>
26 #include <assert.h>
27 #include <string.h>
28
29 #include "utils.h"
30 #include "wmc.h"
31 #include "lang.h"
32
33 #include "mcy.tab.h"
34
35 /*
36  * Keywords are case insensitive. All normal input is treated as
37  * being in codepage iso-8859-1 for ascii input files (unicode
38  * page 0) and as equivalent unicode if unicode input is selected.
39  * All normal input, which is not part of a message text, is
40  * enforced to be unicode page 0. Otherwise an error will be
41  * generated. The normal file data should only be ASCII because
42  * that is the basic definition of the grammar.
43  *
44  * Byteorder or unicode input is determined automatically by
45  * reading the first 8 bytes and checking them against unicode
46  * page 0 byteorder (hibyte must be 0).
47  * -- FIXME --
48  * Alternatively, the input is checked against a special byte
49  * sequence to identify the file.
50  * -- FIXME --
51  *
52  *
53  * Keywords:
54  *      Codepages
55  *      Facility
56  *      FacilityNames
57  *      LanguageNames
58  *      MessageId
59  *      MessageIdTypedef
60  *      Severity
61  *      SeverityNames
62  *      SymbolicName
63  *
64  * Default added identifiers for classes:
65  * SeverityNames:
66  *      Success         = 0x0
67  *      Informational   = 0x1
68  *      Warning         = 0x2
69  *      Error           = 0x3
70  * FacilityNames:
71  *      System          = 0x0FF
72  *      Application     = 0xFFF
73  *
74  * The 'Codepages' keyword is a wmc extension.
75  */
76
77 static const WCHAR ustr_application[]   = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
78 static const WCHAR ustr_codepages[]     = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
79 static const WCHAR ustr_english[]       = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
80 static const WCHAR ustr_error[]         = { 'E', 'r', 'r', 'o', 'r', 0 };
81 static const WCHAR ustr_facility[]      = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
82 static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
83 static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
84 static const WCHAR ustr_language[]      = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
85 static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
86 static const WCHAR ustr_messageid[]     = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
87 static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
88 static const WCHAR ustr_outputbase[]    = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
89 static const WCHAR ustr_severity[]      = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
90 static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
91 static const WCHAR ustr_success[]       = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
92 static const WCHAR ustr_symbolicname[]  = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
93 static const WCHAR ustr_system[]        = { 'S', 'y', 's', 't', 'e', 'm', 0 };
94 static const WCHAR ustr_warning[]       = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
95 static const WCHAR ustr_msg00001[]      = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
96 /*
97  * This table is to beat any form of "expression building" to check for
98  * correct filename characters. It is also used for ident checks.
99  * FIXME: use it more consistently.
100  */
101
102 #define CH_SHORTNAME    0x01
103 #define CH_LONGNAME     0x02
104 #define CH_IDENT        0x04
105 #define CH_NUMBER       0x08
106 /*#define CH_WILDCARD   0x10*/
107 /*#define CH_DOT        0x20*/
108 #define CH_PUNCT        0x40
109 #define CH_INVALID      0x80
110
111 static const char char_table[256] = {
112         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
113         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
114         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
115         0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
116         0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
117         0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
118         0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
119         0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
120         0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
121         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
122         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
123         0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
124         0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
125         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
126         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
127         0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
128         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
129         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
130         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
131         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
132         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
133         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
134         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
135         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
136         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
137         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
138         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
139         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
140         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
141         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
142         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
143         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
144 };
145
146 static int isisochar(int ch)
147 {
148         return !(ch & (~0xff));
149 }
150
151 static int codepage;
152 static const union cptable *codepage_def;
153
154 void set_codepage(int cp)
155 {
156         codepage = cp;
157         codepage_def = find_codepage(codepage);
158         if(!codepage_def)
159                 xyyerror("Codepage %d not found; cannot process\n", codepage);
160 }
161
162 /*
163  * Input functions
164  */
165 static int nungetstack = 0;
166 static int allocungetstack = 0;
167 static char *ungetstack = NULL;
168 static int ninputbuffer = 0;
169 static WCHAR *inputbuffer = NULL;
170 static char *xlatebuffer = NULL;
171
172 #define INPUTBUFFER_SIZE        2048    /* Must be larger than 4 and approx. large enough to hold a line */
173
174 /*
175  * Fill the input buffer with *one* line of input.
176  * The line is '\n' terminated so that scanning
177  * messages with translation works as expected
178  * (otherwise we cannot pre-translate because the
179  * language is first known one line before the
180  * actual message).
181  */
182 static int fill_inputbuffer(void)
183 {
184         int n;
185         static const char err_fatalread[] = "Fatal: reading input failed";
186         static int endian = -1;
187
188         if(!inputbuffer)
189         {
190                 inputbuffer = xmalloc(INPUTBUFFER_SIZE*sizeof(WCHAR));
191                 xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
192         }
193
194 try_again:
195         if(!unicodein)
196         {
197                 char *cptr;
198                 cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
199                 if(!cptr && ferror(yyin))
200                         xyyerror(err_fatalread);
201                 else if(!cptr)
202                         return 0;
203                 assert(codepage_def != NULL);
204                 n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
205                 if(n < 0)
206                         internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n);
207                 if(n <= 1)
208                         goto try_again; /* Should not happen */
209                 n--;    /* Strip added conversion '\0' from input length */
210                 /*
211                  * FIXME:
212                  * Detect UTF-8 in the first time we read some bytes by
213                  * checking the special sequence "FE..." or something like
214                  * that. I need to check www.unicode.org for details.
215                  */
216         }
217         else
218         {
219                 if(endian == -1)
220                 {
221                         n = fread(inputbuffer, 1, 8, yyin);
222                         if(n != 8)
223                         {
224                                 if(!n && ferror(yyin))
225                                         xyyerror(err_fatalread);
226                                 else
227                                         xyyerror("Fatal: file to short to determine byteorder (should never happen)\n");
228                         }
229                         if(isisochar(inputbuffer[0]) &&
230                                 isisochar(inputbuffer[1]) &&
231                                 isisochar(inputbuffer[2]) &&
232                                 isisochar(inputbuffer[3]))
233                         {
234 #ifdef WORDS_BIGENDIAN
235                                 endian = WMC_BO_BIG;
236 #else
237                                 endian = WMC_BO_LITTLE;
238 #endif
239                         }
240                         else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
241                                 isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
242                                 isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
243                                 isisochar(BYTESWAP_WORD(inputbuffer[3])))
244                         {
245 #ifdef WORDS_BIGENDIAN
246                                 endian = WMC_BO_LITTLE;
247 #else
248                                 endian = WMC_BO_BIG;
249 #endif
250                         }
251                         else
252                                 xyyerror("Fatal: cannot determine file's byteorder\n");
253                         /* FIXME:
254                          * Determine the file-endian with the leader-bytes
255                          * "FF FE..."; can't remember the exact sequence.
256                          */
257                         n /= 2;
258 #ifdef WORDS_BIGENDIAN
259                         if(endian == WMC_BO_LITTLE)
260 #else
261                         if(endian == WMC_BO_BIG)
262 #endif
263                         {
264                                 inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
265                                 inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
266                                 inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
267                                 inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
268                         }
269
270                 }
271                 else
272                 {
273                         int i;
274                         n = 0;
275                         for(i = 0; i < INPUTBUFFER_SIZE; i++)
276                         {
277                                 int t;
278                                 t = fread(&inputbuffer[i], 2, 1, yyin);
279                                 if(!t && ferror(yyin))
280                                         xyyerror(err_fatalread);
281                                 else if(!t && n)
282                                         break;
283                                 n++;
284 #ifdef WORDS_BIGENDIAN
285                                 if(endian == WMC_BO_LITTLE)
286 #else
287                                 if(endian == WMC_BO_BIG)
288 #endif
289                                 {
290                                         if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
291                                                 break;
292                                 }
293                                 else
294                                 {
295                                         if(inputbuffer[i] == '\n')
296                                                 break;
297                                 }
298                         }
299                 }
300
301         }
302
303         if(!n)
304         {
305                 mcy_warning("Re-read line (input was or converted to zilch)\n");
306                 goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
307         }
308
309         ninputbuffer += n;
310         return 1;
311 }
312
313 static int get_unichar(void)
314 {
315         static WCHAR *b = NULL;
316         char_number++;
317
318         if(nungetstack)
319                 return ungetstack[--nungetstack];
320
321         if(!ninputbuffer)
322         {
323                 if(!fill_inputbuffer())
324                         return EOF;
325                 b = inputbuffer;
326         }
327
328         ninputbuffer--;
329         return (int)(*b++ & 0xffff);
330 }
331
332 static void unget_unichar(int ch)
333 {
334         if(ch == EOF)
335                 return;
336
337         char_number--;
338
339         if(nungetstack == allocungetstack)
340         {
341                 allocungetstack += 32;
342                 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
343         }
344
345         ungetstack[nungetstack++] = (WCHAR)ch;
346 }
347
348
349 /*
350  * Normal character stack.
351  * Used for number scanning.
352  */
353 static int ncharstack = 0;
354 static int alloccharstack = 0;
355 static char *charstack = NULL;
356
357 static void empty_char_stack(void)
358 {
359         ncharstack = 0;
360 }
361
362 static void push_char(int ch)
363 {
364         if(ncharstack == alloccharstack)
365         {
366                 alloccharstack += 32;
367                 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
368         }
369         charstack[ncharstack++] = (char)ch;
370 }
371
372 static int tos_char_stack(void)
373 {
374         if(!ncharstack)
375                 return 0;
376         else
377                 return (int)(charstack[ncharstack-1] & 0xff);
378 }
379
380 static char *get_char_stack(void)
381 {
382         return charstack;
383 }
384
385 /*
386  * Unicode character stack.
387  * Used for general scanner.
388  */
389 static int nunicharstack = 0;
390 static int allocunicharstack = 0;
391 static WCHAR *unicharstack = NULL;
392
393 static void empty_unichar_stack(void)
394 {
395         nunicharstack = 0;
396 }
397
398 static void push_unichar(int ch)
399 {
400         if(nunicharstack == allocunicharstack)
401         {
402                 allocunicharstack += 128;
403                 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
404         }
405         unicharstack[nunicharstack++] = (WCHAR)ch;
406 }
407
408 #if 0
409 static int tos_unichar_stack(void)
410 {
411         if(!nunicharstack)
412                 return 0;
413         else
414                 return (int)(unicharstack[nunicharstack-1] & 0xffff);
415 }
416 #endif
417
418 static WCHAR *get_unichar_stack(void)
419 {
420         return unicharstack;
421 }
422
423 /*
424  * Number scanner
425  *
426  * state |      ch         | next state
427  * ------+-----------------+--------------------------
428  *   0   | [0]             | 1
429  *   0   | [1-9]           | 4
430  *   0   | .               | error (should never occur)
431  *   1   | [xX]            | 2
432  *   1   | [0-7]           | 3
433  *   1   | [89a-wyzA-WYZ_] | error invalid digit
434  *   1   | .               | return 0
435  *   2   | [0-9a-fA-F]     | 2
436  *   2   | [g-zG-Z_]       | error invalid hex digit
437  *   2   | .               | return (hex-number) if TOS != [xX] else error
438  *   3   | [0-7]           | 3
439  *   3   | [89a-zA-Z_]     | error invalid octal digit
440  *   3   | .               | return (octal-number)
441  *   4   | [0-9]           | 4
442  *   4   | [a-zA-Z_]       | error invalid decimal digit
443  *   4   | .               | return (decimal-number)
444  *
445  * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
446  * and return the value. This is not entirely correct, but close
447  * enough (should check punctuators as trailing context, but the
448  * char_table is not adapted to that and it is questionable whether
449  * it is worth the trouble).
450  * All non-iso-8859-1 characters are an error.
451  */
452 static int scan_number(int ch)
453 {
454         int state = 0;
455         int base = 10;
456         empty_char_stack();
457
458         while(1)
459         {
460                 if(!isisochar(ch))
461                         xyyerror("Invalid digit\n");
462
463                 switch(state)
464                 {
465                 case 0:
466                         if(isdigit(ch))
467                         {
468                                 push_char(ch);
469                                 if(ch == '0')
470                                         state = 1;
471                                 else
472                                         state = 4;
473                         }
474                         else
475                                 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
476                         break;
477                 case 1:
478                         if(ch == 'x' || ch == 'X')
479                         {
480                                 push_char(ch);
481                                 state = 2;
482                         }
483                         else if(ch >= '0' && ch <= '7')
484                         {
485                                 push_char(ch);
486                                 state = 3;
487                         }
488                         else if(isalpha(ch) || ch == '_')
489                                 xyyerror("Invalid number digit\n");
490                         else
491                         {
492                                 unget_unichar(ch);
493                                 mcy_lval.num = 0;
494                                 return tNUMBER;
495                         }
496                         break;
497                 case 2:
498                         if(isxdigit(ch))
499                                 push_char(ch);
500                         else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
501                                 xyyerror("Invalid hex digit\n");
502                         else
503                         {
504                                 base = 16;
505                                 goto finish;
506                         }
507                         break;
508                 case 3:
509                         if(ch >= '0' && ch <= '7')
510                                 push_char(ch);
511                         else if(isalnum(ch) || ch == '_')
512                                 xyyerror("Invalid octal digit\n");
513                         else
514                         {
515                                 base = 8;
516                                 goto finish;
517                         }
518                         break;
519                 case 4:
520                         if(isdigit(ch))
521                                 push_char(ch);
522                         else if(isalnum(ch) || ch == '_')
523                                 xyyerror("Invalid decimal digit\n");
524                         else
525                         {
526                                 base = 10;
527                                 goto finish;
528                         }
529                         break;
530                 default:
531                         internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
532                 }
533                 ch = get_unichar();
534         }
535 finish:
536         unget_unichar(ch);
537         push_char(0);
538         mcy_lval.num = strtoul(get_char_stack(), NULL, base);
539         return tNUMBER;
540 }
541
542 static void newline(void)
543 {
544         line_number++;
545         char_number = 1;
546 }
547
548 static int unisort(const void *p1, const void *p2)
549 {
550         return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
551 }
552
553 static token_t *tokentable = NULL;
554 static int ntokentable = 0;
555
556 token_t *lookup_token(const WCHAR *s)
557 {
558         token_t tok;
559
560         tok.name = s;
561         return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
562 }
563
564 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
565 {
566         ntokentable++;
567         tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
568         tokentable[ntokentable-1].type = type;
569         tokentable[ntokentable-1].name = name;
570         tokentable[ntokentable-1].token = tok;
571         tokentable[ntokentable-1].codepage = cp;
572         tokentable[ntokentable-1].alias = alias;
573         tokentable[ntokentable-1].fixed = fix;
574         qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
575 }
576
577 void get_tokentable(token_t **tab, int *len)
578 {
579         assert(tab != NULL);
580         assert(len != NULL);
581         *tab = tokentable;
582         *len = ntokentable;
583 }
584
585 /*
586  * The scanner
587  *
588  */
589 int mcy_lex(void)
590 {
591         static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
592         static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
593         static int isinit = 0;
594         int ch;
595
596         if(!isinit)
597         {
598                 isinit++;
599                 set_codepage(WMC_DEFAULT_CODEPAGE);
600                 add_token(tok_keyword,  ustr_codepages,         tCODEPAGE,      0, NULL, 0);
601                 add_token(tok_keyword,  ustr_facility,          tFACILITY,      0, NULL, 1);
602                 add_token(tok_keyword,  ustr_facilitynames,     tFACNAMES,      0, NULL, 1);
603                 add_token(tok_keyword,  ustr_language,          tLANGUAGE,      0, NULL, 1);
604                 add_token(tok_keyword,  ustr_languagenames,     tLANNAMES,      0, NULL, 1);
605                 add_token(tok_keyword,  ustr_messageid,         tMSGID,         0, NULL, 1);
606                 add_token(tok_keyword,  ustr_messageidtypedef,  tTYPEDEF,       0, NULL, 1);
607                 add_token(tok_keyword,  ustr_outputbase,        tBASE,          0, NULL, 1);
608                 add_token(tok_keyword,  ustr_severity,          tSEVERITY,      0, NULL, 1);
609                 add_token(tok_keyword,  ustr_severitynames,     tSEVNAMES,      0, NULL, 1);
610                 add_token(tok_keyword,  ustr_symbolicname,      tSYMNAME,       0, NULL, 1);
611                 add_token(tok_severity, ustr_error,             0x03,           0, NULL, 0);
612                 add_token(tok_severity, ustr_warning,           0x02,           0, NULL, 0);
613                 add_token(tok_severity, ustr_informational,     0x01,           0, NULL, 0);
614                 add_token(tok_severity, ustr_success,           0x00,           0, NULL, 0);
615                 add_token(tok_facility, ustr_application,       0xFFF,          0, NULL, 0);
616                 add_token(tok_facility, ustr_system,            0x0FF,          0, NULL, 0);
617                 add_token(tok_language, ustr_english,           0x409,          437, ustr_msg00001, 0);
618         }
619
620         empty_unichar_stack();
621
622         while(1)
623         {
624                 if(want_line)
625                 {
626                         while((ch = get_unichar()) != '\n')
627                         {
628                                 if(ch == EOF)
629                                         xyyerror("Unexpected EOF\n");
630                                 push_unichar(ch);
631                         }
632                         newline();
633                         push_unichar(ch);
634                         push_unichar(0);
635                         if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
636                         {
637                                 want_line = 0;
638                                 /* Reset the codepage to our default after each message */
639                                 set_codepage(WMC_DEFAULT_CODEPAGE);
640                                 return tMSGEND;
641                         }
642                         mcy_lval.str = xunistrdup(get_unichar_stack());
643                         return tLINE;
644                 }
645
646                 ch = get_unichar();
647
648                 if(ch == EOF)
649                         return EOF;
650
651                 if(ch == '\n')
652                 {
653                         newline();
654                         if(want_nl)
655                         {
656                                 want_nl = 0;
657                                 return tNL;
658                         }
659                         continue;
660                 }
661
662                 if(isisochar(ch))
663                 {
664                         if(want_file)
665                         {
666                                 int n = 0;
667                                 while(n < 8 && isisochar(ch))
668                                 {
669                                         int t = char_table[ch];
670                                         if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
671                                                 break;
672
673                                         push_unichar(ch);
674                                         n++;
675                                         ch = get_unichar();
676                                 }
677                                 unget_unichar(ch);
678                                 push_unichar(0);
679                                 want_file = 0;
680                                 mcy_lval.str = xunistrdup(get_unichar_stack());
681                                 return tFILE;
682                         }
683
684                         if(char_table[ch] & CH_IDENT)
685                         {
686                                 token_t *tok;
687                                 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
688                                 {
689                                         push_unichar(ch);
690                                         ch = get_unichar();
691                                 }
692                                 unget_unichar(ch);
693                                 push_unichar(0);
694                                 if(!(tok = lookup_token(get_unichar_stack())))
695                                 {
696                                         mcy_lval.str = xunistrdup(get_unichar_stack());
697                                         return tIDENT;
698                                 }
699                                 switch(tok->type)
700                                 {
701                                 case tok_keyword:
702                                         return tok->token;
703
704                                 case tok_language:
705                                         codepage = tok->codepage;
706                                         /* Fall through */
707                                 case tok_severity:
708                                 case tok_facility:
709                                         mcy_lval.tok = tok;
710                                         return tTOKEN;
711
712                                 default:
713                                         internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
714                                 }
715                         }
716
717                         if(isspace(ch)) /* Ignore space */
718                                 continue;
719
720                         if(isdigit(ch))
721                                 return scan_number(ch);
722                 }
723
724                 switch(ch)
725                 {
726                 case ':':
727                 case '=':
728                 case '+':
729                 case '(':
730                 case ')':
731                         return ch;
732                 case ';':
733                         while(ch != '\n' && ch != EOF)
734                         {
735                                 push_unichar(ch);
736                                 ch = get_unichar();
737                         }
738                         newline();
739                         push_unichar(ch);       /* Include the newline */
740                         push_unichar(0);
741                         mcy_lval.str = xunistrdup(get_unichar_stack());
742                         return tCOMMENT;
743                 default:
744                         xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
745                 }
746         }
747 }