git.oblomov.eu Git - wine/blob - dlls/vbscript/lex.c

   1 /*
   2  * Copyright 2011 Jacek Caban for CodeWeavers
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  17  */
  18
  19 #include <assert.h>
  20
  21 #include "vbscript.h"
  22 #include "parse.h"
  23 #include "parser.tab.h"
  24
  25 #include "wine/debug.h"
  26
  27 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
  28
  29 static const WCHAR andW[] = {'a','n','d',0};
  30 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
  31 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
  32 static const WCHAR callW[] = {'c','a','l','l',0};
  33 static const WCHAR caseW[] = {'c','a','s','e',0};
  34 static const WCHAR classW[] = {'c','l','a','s','s',0};
  35 static const WCHAR constW[] = {'c','o','n','s','t',0};
  36 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
  37 static const WCHAR dimW[] = {'d','i','m',0};
  38 static const WCHAR doW[] = {'d','o',0};
  39 static const WCHAR eachW[] = {'e','a','c','h',0};
  40 static const WCHAR elseW[] = {'e','l','s','e',0};
  41 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
  42 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
  43 static const WCHAR endW[] = {'e','n','d',0};
  44 static const WCHAR eqvW[] = {'e','q','v',0};
  45 static const WCHAR errorW[] = {'e','r','r','o','r',0};
  46 static const WCHAR exitW[] = {'e','x','i','t',0};
  47 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
  48 static const WCHAR falseW[] = {'f','a','l','s','e',0};
  49 static const WCHAR forW[] = {'f','o','r',0};
  50 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
  51 static const WCHAR getW[] = {'g','e','t',0};
  52 static const WCHAR gotoW[] = {'g','o','t','o',0};
  53 static const WCHAR ifW[] = {'i','f',0};
  54 static const WCHAR impW[] = {'i','m','p',0};
  55 static const WCHAR inW[] = {'i','n',0};
  56 static const WCHAR isW[] = {'i','s',0};
  57 static const WCHAR letW[] = {'l','e','t',0};
  58 static const WCHAR loopW[] = {'l','o','o','p',0};
  59 static const WCHAR meW[] = {'m','e',0};
  60 static const WCHAR modW[] = {'m','o','d',0};
  61 static const WCHAR newW[] = {'n','e','w',0};
  62 static const WCHAR nextW[] = {'n','e','x','t',0};
  63 static const WCHAR notW[] = {'n','o','t',0};
  64 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
  65 static const WCHAR nullW[] = {'n','u','l','l',0};
  66 static const WCHAR onW[] = {'o','n',0};
  67 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
  68 static const WCHAR orW[] = {'o','r',0};
  69 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
  70 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
  71 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
  72 static const WCHAR remW[] = {'r','e','m',0};
  73 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
  74 static const WCHAR selectW[] = {'s','e','l','e','c','t',0};
  75 static const WCHAR setW[] = {'s','e','t',0};
  76 static const WCHAR stepW[] = {'s','t','e','p',0};
  77 static const WCHAR stopW[] = {'s','t','o','p',0};
  78 static const WCHAR subW[] = {'s','u','b',0};
  79 static const WCHAR thenW[] = {'t','h','e','n',0};
  80 static const WCHAR toW[] = {'t','o',0};
  81 static const WCHAR trueW[] = {'t','r','u','e',0};
  82 static const WCHAR untilW[] = {'u','n','t','i','l',0};
  83 static const WCHAR wendW[] = {'w','e','n','d',0};
  84 static const WCHAR whileW[] = {'w','h','i','l','e',0};
  85 static const WCHAR xorW[] = {'x','o','r',0};
  86
  87 static const struct {
  88     const WCHAR *word;
  89     int token;
  90 } keywords[] = {
  91     {andW,       tAND},
  92     {byrefW,     tBYREF},
  93     {byvalW,     tBYVAL},
  94     {callW,      tCALL},
  95     {caseW,      tCASE},
  96     {classW,     tCLASS},
  97     {constW,     tCONST},
  98     {defaultW,   tDEFAULT},
  99     {dimW,       tDIM},
 100     {doW,        tDO},
 101     {eachW,      tEACH},
 102     {elseW,      tELSE},
 103     {elseifW,    tELSEIF},
 104     {emptyW,     tEMPTY},
 105     {endW,       tEND},
 106     {eqvW,       tEQV},
 107     {errorW,     tERROR},
 108     {exitW,      tEXIT},
 109     {explicitW,  tEXPLICIT},
 110     {falseW,     tFALSE},
 111     {forW,       tFOR},
 112     {functionW,  tFUNCTION},
 113     {getW,       tGET},
 114     {gotoW,      tGOTO},
 115     {ifW,        tIF},
 116     {impW,       tIMP},
 117     {inW,        tIN},
 118     {isW,        tIS},
 119     {letW,       tLET},
 120     {loopW,      tLOOP},
 121     {meW,        tME},
 122     {modW,       tMOD},
 123     {newW,       tNEW},
 124     {nextW,      tNEXT},
 125     {notW,       tNOT},
 126     {nothingW,   tNOTHING},
 127     {nullW,      tNULL},
 128     {onW,        tON},
 129     {optionW,    tOPTION},
 130     {orW,        tOR},
 131     {privateW,   tPRIVATE},
 132     {propertyW,  tPROPERTY},
 133     {publicW,    tPUBLIC},
 134     {remW,       tREM},
 135     {resumeW,    tRESUME},
 136     {selectW,    tSELECT},
 137     {setW,       tSET},
 138     {stepW,      tSTEP},
 139     {stopW,      tSTOP},
 140     {subW,       tSUB},
 141     {thenW,      tTHEN},
 142     {toW,        tTO},
 143     {trueW,      tTRUE},
 144     {untilW,     tUNTIL},
 145     {wendW,      tWEND},
 146     {whileW,     tWHILE},
 147     {xorW,       tXOR}
 148 };
 149
 150 static inline BOOL is_identifier_char(WCHAR c)
 151 {
 152     return isalnumW(c) || c == '_';
 153 }
 154
 155 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word)
 156 {
 157     const WCHAR *p1 = ctx->ptr;
 158     const WCHAR *p2 = word;
 159     WCHAR c;
 160
 161     while(p1 < ctx->end && *p2) {
 162         c = tolowerW(*p1);
 163         if(c != *p2)
 164             return c - *p2;
 165         p1++;
 166         p2++;
 167     }
 168
 169     if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
 170         return 1;
 171
 172     ctx->ptr = p1;
 173     return 0;
 174 }
 175
 176 static int check_keywords(parser_ctx_t *ctx)
 177 {
 178     int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
 179
 180     while(min <= max) {
 181         i = (min+max)/2;
 182
 183         r = check_keyword(ctx, keywords[i].word);
 184         if(!r)
 185             return keywords[i].token;
 186
 187         if(r > 0)
 188             min = i+1;
 189         else
 190             max = i-1;
 191     }
 192
 193     return 0;
 194 }
 195
 196 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
 197 {
 198     const WCHAR *ptr = ctx->ptr++;
 199     WCHAR *str;
 200     int len;
 201
 202     while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
 203         ctx->ptr++;
 204     len = ctx->ptr-ptr;
 205
 206     str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 207     if(!str)
 208         return 0;
 209
 210     memcpy(str, ptr, (len+1)*sizeof(WCHAR));
 211     str[len] = 0;
 212     *ret = str;
 213     return tIdentifier;
 214 }
 215
 216 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
 217 {
 218     const WCHAR *ptr = ++ctx->ptr;
 219     WCHAR *rptr;
 220     int len = 0;
 221
 222     while(ctx->ptr < ctx->end) {
 223         if(*ctx->ptr == '\n') {
 224             FIXME("newline inside string literal\n");
 225             return 0;
 226         }
 227
 228        if(*ctx->ptr == '"') {
 229             if(ctx->ptr[1] != '"')
 230                 break;
 231             len--;
 232             ctx->ptr++;
 233         }
 234         ctx->ptr++;
 235     }
 236
 237     if(ctx->ptr == ctx->end) {
 238         FIXME("unterminated string literal\n");
 239         return 0;
 240     }
 241
 242     len += ctx->ptr-ptr;
 243
 244     *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 245     if(!rptr)
 246         return 0;
 247
 248     while(ptr < ctx->ptr) {
 249         if(*ptr == '"')
 250             ptr++;
 251         *rptr++ = *ptr++;
 252     }
 253
 254     *rptr = 0;
 255     ctx->ptr++;
 256     return tString;
 257 }
 258
 259 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
 260 {
 261     double n = 0;
 262
 263     if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
 264         return *ctx->ptr++;
 265
 266     do {
 267         n = n*10 + *ctx->ptr++ - '0';
 268     }while('0' <= *ctx->ptr && *ctx->ptr <= '9');
 269
 270     if(*ctx->ptr != '.') {
 271         if((LONG)n == n) {
 272             LONG l = n;
 273             *(LONG*)ret = l;
 274             return (short)l == l ? tShort : tLong;
 275         }
 276     }else {
 277         double e = 1.0;
 278         while('0' <= *++ctx->ptr && *ctx->ptr <= '9')
 279             n += (e /= 10.0)*(*ctx->ptr-'0');
 280     }
 281
 282     *(double*)ret = n;
 283     return tDouble;
 284 }
 285
 286 static int hex_to_int(WCHAR c)
 287 {
 288     if('0' <= c && c <= '9')
 289         return c-'0';
 290     if('a' <= c && c <= 'f')
 291         return c+10-'a';
 292     if('A' <= c && c <= 'F')
 293         return c+10-'A';
 294     return -1;
 295 }
 296
 297 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
 298 {
 299     const WCHAR *begin = ctx->ptr;
 300     LONG l = 0, d;
 301
 302     while((d = hex_to_int(*++ctx->ptr)) != -1)
 303         l = l*16 + d;
 304
 305     if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
 306         FIXME("invalid literal\n");
 307         return 0;
 308     }
 309
 310     if(*ctx->ptr == '&')
 311         ctx->ptr++;
 312
 313     *ret = l;
 314     return (short)l == l ? tShort : tLong;
 315 }
 316
 317 static void skip_spaces(parser_ctx_t *ctx)
 318 {
 319     while(*ctx->ptr == ' ' || *ctx->ptr == '\t' || *ctx->ptr == '\r')
 320         ctx->ptr++;
 321 }
 322
 323 static int comment_line(parser_ctx_t *ctx)
 324 {
 325     ctx->ptr = strchrW(ctx->ptr, '\n');
 326     if(ctx->ptr)
 327         ctx->ptr++;
 328     else
 329         ctx->ptr = ctx->end;
 330     return tNL;
 331 }
 332
 333 static int parse_next_token(void *lval, parser_ctx_t *ctx)
 334 {
 335     WCHAR c;
 336
 337     skip_spaces(ctx);
 338     if(ctx->ptr == ctx->end)
 339         return ctx->last_token == tNL ? tEOF : tNL;
 340
 341     c = *ctx->ptr;
 342
 343     if('0' <= c && c <= '9')
 344         return parse_numeric_literal(ctx, lval);
 345
 346     if(isalphaW(c)) {
 347         int ret = check_keywords(ctx);
 348         if(!ret)
 349             return parse_identifier(ctx, lval);
 350         if(ret != tREM)
 351             return ret;
 352         c = '\'';
 353     }
 354
 355     switch(c) {
 356     case '\n':
 357         ctx->ptr++;
 358         return tNL;
 359     case '\'':
 360         return comment_line(ctx);
 361     case ':':
 362     case ')':
 363     case ',':
 364     case '=':
 365     case '+':
 366     case '*':
 367     case '/':
 368     case '^':
 369     case '\\':
 370     case '.':
 371     case '_':
 372         return *ctx->ptr++;
 373     case '-':
 374         if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
 375             return comment_line(ctx);
 376         ctx->ptr++;
 377         return '-';
 378     case '(':
 379         /* NOTE:
 380          * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
 381          * in call statement special case |f()| without 'call' keyword
 382          */
 383         ctx->ptr++;
 384         skip_spaces(ctx);
 385         if(*ctx->ptr == ')') {
 386             ctx->ptr++;
 387             return tEMPTYBRACKETS;
 388         }
 389         return '(';
 390     case '"':
 391         return parse_string_literal(ctx, lval);
 392     case '&':
 393         if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
 394             return parse_hex_literal(ctx, lval);
 395         return '&';
 396     case '<':
 397         switch(*++ctx->ptr) {
 398         case '>':
 399             ctx->ptr++;
 400             return tNEQ;
 401         case '=':
 402             ctx->ptr++;
 403             return tLTEQ;
 404         case '!':
 405             if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
 406                 return comment_line(ctx);
 407         }
 408         return '<';
 409     case '>':
 410         if(*++ctx->ptr == '=') {
 411             ctx->ptr++;
 412             return tGTEQ;
 413         }
 414         return '>';
 415     default:
 416         FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
 417     }
 418
 419     return 0;
 420 }
 421
 422 int parser_lex(void *lval, parser_ctx_t *ctx)
 423 {
 424     int ret;
 425
 426     while(1) {
 427         ret = parse_next_token(lval, ctx);
 428         if(ret == '_') {
 429             skip_spaces(ctx);
 430             if(*ctx->ptr != '\n') {
 431                 FIXME("'_' not followed by newline\n");
 432                 return 0;
 433             }
 434             ctx->ptr++;
 435             continue;
 436         }
 437         if(ret != tNL || ctx->last_token != tNL)
 438             break;
 439
 440         ctx->last_nl = ctx->ptr-ctx->code;
 441     }
 442
 443     return (ctx->last_token = ret);
 444 }