git.oblomov.eu Git - wine/blob - dlls/vbscript/lex.c

   1 /*
   2  * Copyright 2011 Jacek Caban for CodeWeavers
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  17  */
  18
  19 #include <assert.h>
  20
  21 #include "vbscript.h"
  22 #include "parse.h"
  23 #include "parser.tab.h"
  24
  25 #include "wine/debug.h"
  26
  27 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
  28
  29 static const WCHAR andW[] = {'a','n','d',0};
  30 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
  31 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
  32 static const WCHAR callW[] = {'c','a','l','l',0};
  33 static const WCHAR classW[] = {'c','l','a','s','s',0};
  34 static const WCHAR constW[] = {'c','o','n','s','t',0};
  35 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
  36 static const WCHAR dimW[] = {'d','i','m',0};
  37 static const WCHAR doW[] = {'d','o',0};
  38 static const WCHAR eachW[] = {'e','a','c','h',0};
  39 static const WCHAR elseW[] = {'e','l','s','e',0};
  40 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
  41 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
  42 static const WCHAR endW[] = {'e','n','d',0};
  43 static const WCHAR eqvW[] = {'e','q','v',0};
  44 static const WCHAR errorW[] = {'e','r','r','o','r',0};
  45 static const WCHAR exitW[] = {'e','x','i','t',0};
  46 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
  47 static const WCHAR falseW[] = {'f','a','l','s','e',0};
  48 static const WCHAR forW[] = {'f','o','r',0};
  49 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
  50 static const WCHAR getW[] = {'g','e','t',0};
  51 static const WCHAR gotoW[] = {'g','o','t','o',0};
  52 static const WCHAR ifW[] = {'i','f',0};
  53 static const WCHAR impW[] = {'i','m','p',0};
  54 static const WCHAR inW[] = {'i','n',0};
  55 static const WCHAR isW[] = {'i','s',0};
  56 static const WCHAR letW[] = {'l','e','t',0};
  57 static const WCHAR loopW[] = {'l','o','o','p',0};
  58 static const WCHAR meW[] = {'m','e',0};
  59 static const WCHAR modW[] = {'m','o','d',0};
  60 static const WCHAR newW[] = {'n','e','w',0};
  61 static const WCHAR nextW[] = {'n','e','x','t',0};
  62 static const WCHAR notW[] = {'n','o','t',0};
  63 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
  64 static const WCHAR nullW[] = {'n','u','l','l',0};
  65 static const WCHAR onW[] = {'o','n',0};
  66 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
  67 static const WCHAR orW[] = {'o','r',0};
  68 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
  69 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
  70 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
  71 static const WCHAR remW[] = {'r','e','m',0};
  72 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
  73 static const WCHAR setW[] = {'s','e','t',0};
  74 static const WCHAR stepW[] = {'s','t','e','p',0};
  75 static const WCHAR stopW[] = {'s','t','o','p',0};
  76 static const WCHAR subW[] = {'s','u','b',0};
  77 static const WCHAR thenW[] = {'t','h','e','n',0};
  78 static const WCHAR toW[] = {'t','o',0};
  79 static const WCHAR trueW[] = {'t','r','u','e',0};
  80 static const WCHAR untilW[] = {'u','n','t','i','l',0};
  81 static const WCHAR wendW[] = {'w','e','n','d',0};
  82 static const WCHAR whileW[] = {'w','h','i','l','e',0};
  83 static const WCHAR xorW[] = {'x','o','r',0};
  84
  85 static const struct {
  86     const WCHAR *word;
  87     int token;
  88 } keywords[] = {
  89     {andW,       tAND},
  90     {byrefW,     tBYREF},
  91     {byvalW,     tBYVAL},
  92     {callW,      tCALL},
  93     {classW,     tCLASS},
  94     {constW,     tCONST},
  95     {defaultW,   tDEFAULT},
  96     {dimW,       tDIM},
  97     {doW,        tDO},
  98     {eachW,      tEACH},
  99     {elseW,      tELSE},
 100     {elseifW,    tELSEIF},
 101     {emptyW,     tEMPTY},
 102     {endW,       tEND},
 103     {eqvW,       tEQV},
 104     {errorW,     tERROR},
 105     {exitW,      tEXIT},
 106     {explicitW,  tEXPLICIT},
 107     {falseW,     tFALSE},
 108     {forW,       tFOR},
 109     {functionW,  tFUNCTION},
 110     {getW,       tGET},
 111     {gotoW,      tGOTO},
 112     {ifW,        tIF},
 113     {impW,       tIMP},
 114     {inW,        tIN},
 115     {isW,        tIS},
 116     {letW,       tLET},
 117     {loopW,      tLOOP},
 118     {meW,        tME},
 119     {modW,       tMOD},
 120     {newW,       tNEW},
 121     {nextW,      tNEXT},
 122     {notW,       tNOT},
 123     {nothingW,   tNOTHING},
 124     {nullW,      tNULL},
 125     {onW,        tON},
 126     {optionW,    tOPTION},
 127     {orW,        tOR},
 128     {privateW,   tPRIVATE},
 129     {propertyW,  tPROPERTY},
 130     {publicW,    tPUBLIC},
 131     {remW,       tREM},
 132     {resumeW,    tRESUME},
 133     {setW,       tSET},
 134     {stepW,      tSTEP},
 135     {stopW,      tSTOP},
 136     {subW,       tSUB},
 137     {thenW,      tTHEN},
 138     {toW,        tTO},
 139     {trueW,      tTRUE},
 140     {untilW,     tUNTIL},
 141     {wendW,      tWEND},
 142     {whileW,     tWHILE},
 143     {xorW,       tXOR}
 144 };
 145
 146 static inline BOOL is_identifier_char(WCHAR c)
 147 {
 148     return isalnumW(c) || c == '_';
 149 }
 150
 151 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word)
 152 {
 153     const WCHAR *p1 = ctx->ptr;
 154     const WCHAR *p2 = word;
 155     WCHAR c;
 156
 157     while(p1 < ctx->end && *p2) {
 158         c = tolowerW(*p1);
 159         if(c != *p2)
 160             return c - *p2;
 161         p1++;
 162         p2++;
 163     }
 164
 165     if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
 166         return 1;
 167
 168     ctx->ptr = p1;
 169     return 0;
 170 }
 171
 172 static int check_keywords(parser_ctx_t *ctx)
 173 {
 174     int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
 175
 176     while(min <= max) {
 177         i = (min+max)/2;
 178
 179         r = check_keyword(ctx, keywords[i].word);
 180         if(!r)
 181             return keywords[i].token;
 182
 183         if(r > 0)
 184             min = i+1;
 185         else
 186             max = i-1;
 187     }
 188
 189     return 0;
 190 }
 191
 192 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
 193 {
 194     const WCHAR *ptr = ctx->ptr++;
 195     WCHAR *str;
 196     int len;
 197
 198     while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
 199         ctx->ptr++;
 200     len = ctx->ptr-ptr;
 201
 202     str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 203     if(!str)
 204         return 0;
 205
 206     memcpy(str, ptr, (len+1)*sizeof(WCHAR));
 207     str[len] = 0;
 208     *ret = str;
 209     return tIdentifier;
 210 }
 211
 212 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
 213 {
 214     const WCHAR *ptr = ++ctx->ptr;
 215     WCHAR *rptr;
 216     int len = 0;
 217
 218     while(ctx->ptr < ctx->end) {
 219         if(*ctx->ptr == '\n') {
 220             FIXME("newline inside string literal\n");
 221             return 0;
 222         }
 223
 224        if(*ctx->ptr == '"') {
 225             if(ctx->ptr[1] != '"')
 226                 break;
 227             len--;
 228             ctx->ptr++;
 229         }
 230         ctx->ptr++;
 231     }
 232
 233     if(ctx->ptr == ctx->end) {
 234         FIXME("unterminated string literal\n");
 235         return 0;
 236     }
 237
 238     len += ctx->ptr-ptr;
 239
 240     *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 241     if(!rptr)
 242         return 0;
 243
 244     while(ptr < ctx->ptr) {
 245         if(*ptr == '"')
 246             ptr++;
 247         *rptr++ = *ptr++;
 248     }
 249
 250     *rptr = 0;
 251     ctx->ptr++;
 252     return tString;
 253 }
 254
 255 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
 256 {
 257     double n = 0;
 258
 259     if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
 260         return *ctx->ptr++;
 261
 262     do {
 263         n = n*10 + *ctx->ptr++ - '0';
 264     }while('0' <= *ctx->ptr && *ctx->ptr <= '9');
 265
 266     if(*ctx->ptr != '.') {
 267         if((LONG)n == n) {
 268             LONG l = n;
 269             *(LONG*)ret = l;
 270             return (short)l == l ? tShort : tLong;
 271         }
 272     }else {
 273         double e = 1.0;
 274         while('0' <= *++ctx->ptr && *ctx->ptr <= '9')
 275             n += (e /= 10.0)*(*ctx->ptr-'0');
 276     }
 277
 278     *(double*)ret = n;
 279     return tDouble;
 280 }
 281
 282 static int hex_to_int(WCHAR c)
 283 {
 284     if('0' <= c && c <= '9')
 285         return c-'0';
 286     if('a' <= c && c <= 'f')
 287         return c+10-'a';
 288     if('A' <= c && c <= 'F')
 289         return c+10-'A';
 290     return -1;
 291 }
 292
 293 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
 294 {
 295     const WCHAR *begin = ctx->ptr;
 296     LONG l = 0, d;
 297
 298     while((d = hex_to_int(*++ctx->ptr)) != -1)
 299         l = l*16 + d;
 300
 301     if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
 302         FIXME("invalid literal\n");
 303         return 0;
 304     }
 305
 306     if(*ctx->ptr == '&')
 307         ctx->ptr++;
 308
 309     *ret = l;
 310     return (short)l == l ? tShort : tLong;
 311 }
 312
 313 static void skip_spaces(parser_ctx_t *ctx)
 314 {
 315     while(*ctx->ptr == ' ' || *ctx->ptr == '\t' || *ctx->ptr == '\r')
 316         ctx->ptr++;
 317 }
 318
 319 static int parse_next_token(void *lval, parser_ctx_t *ctx)
 320 {
 321     WCHAR c;
 322
 323     skip_spaces(ctx);
 324     if(ctx->ptr == ctx->end)
 325         return ctx->last_token == tNL ? tEOF : tNL;
 326
 327     c = *ctx->ptr;
 328
 329     if('0' <= c && c <= '9')
 330         return parse_numeric_literal(ctx, lval);
 331
 332     if(isalphaW(c)) {
 333         int ret = check_keywords(ctx);
 334         if(!ret)
 335             return parse_identifier(ctx, lval);
 336         if(ret != tREM)
 337             return ret;
 338         c = '\'';
 339     }
 340
 341     switch(c) {
 342     case '\n':
 343         ctx->ptr++;
 344         return tNL;
 345     case '\'':
 346         ctx->ptr = strchrW(ctx->ptr, '\n');
 347         if(ctx->ptr)
 348             ctx->ptr++;
 349         else
 350             ctx->ptr = ctx->end;
 351         return tNL;
 352     case ':':
 353     case ')':
 354     case ',':
 355     case '=':
 356     case '+':
 357     case '-':
 358     case '*':
 359     case '/':
 360     case '^':
 361     case '\\':
 362     case '.':
 363     case '_':
 364         return *ctx->ptr++;
 365     case '(':
 366         /* NOTE:
 367          * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
 368          * in call statement special case |f()| without 'call' keyword
 369          */
 370         ctx->ptr++;
 371         skip_spaces(ctx);
 372         if(*ctx->ptr == ')') {
 373             ctx->ptr++;
 374             return tEMPTYBRACKETS;
 375         }
 376         return '(';
 377     case '"':
 378         return parse_string_literal(ctx, lval);
 379     case '&':
 380         if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
 381             return parse_hex_literal(ctx, lval);
 382         return '&';
 383     case '<':
 384         switch(*++ctx->ptr) {
 385         case '>':
 386             ctx->ptr++;
 387             return tNEQ;
 388         case '=':
 389             ctx->ptr++;
 390             return tLTEQ;
 391         }
 392         return '<';
 393     case '>':
 394         if(*++ctx->ptr == '=') {
 395             ctx->ptr++;
 396             return tGTEQ;
 397         }
 398         return '>';
 399     default:
 400         FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
 401     }
 402
 403     return 0;
 404 }
 405
 406 int parser_lex(void *lval, parser_ctx_t *ctx)
 407 {
 408     int ret;
 409
 410     while(1) {
 411         ret = parse_next_token(lval, ctx);
 412         if(ret == '_') {
 413             skip_spaces(ctx);
 414             if(*ctx->ptr != '\n') {
 415                 FIXME("'_' not followed by newline\n");
 416                 return 0;
 417             }
 418             ctx->ptr++;
 419             continue;
 420         }
 421         if(ret != tNL || ctx->last_token != tNL)
 422             break;
 423
 424         ctx->last_nl = ctx->ptr-ctx->code;
 425     }
 426
 427     return (ctx->last_token = ret);
 428 }