2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
39 #include "wine/debug.h"
41 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
43 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
44 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
45 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
46 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
48 typedef BYTE JSPackedBool;
49 typedef BYTE jsbytecode;
52 * This struct holds a bitmap representation of a class from a regexp.
53 * There's a list of these referenced by the classList field in the JSRegExp
54 * struct below. The initial state has startIndex set to the offset in the
55 * original regexp source of the beginning of the class contents. The first
56 * use of the class converts the source representation into a bitmap.
59 typedef struct RECharSet {
60 JSPackedBool converted;
73 WORD flags; /* flags, see jsapi.h's JSREG_* defines */
74 size_t parenCount; /* number of parenthesized submatches */
75 size_t classCount; /* count [...] bitmaps */
76 RECharSet *classList; /* list of [...] bitmaps */
77 const WCHAR *source; /* locked source string, sans // */
79 jsbytecode program[1]; /* regular expression bytecode */
88 jsval_t last_index_val;
91 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
92 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
93 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
94 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
95 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
96 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
97 static const WCHAR execW[] = {'e','x','e','c',0};
98 static const WCHAR testW[] = {'t','e','s','t',0};
100 static const WCHAR leftContextW[] =
101 {'l','e','f','t','C','o','n','t','e','x','t',0};
102 static const WCHAR rightContextW[] =
103 {'r','i','g','h','t','C','o','n','t','e','x','t',0};
105 static const WCHAR idx1W[] = {'$','1',0};
106 static const WCHAR idx2W[] = {'$','2',0};
107 static const WCHAR idx3W[] = {'$','3',0};
108 static const WCHAR idx4W[] = {'$','4',0};
109 static const WCHAR idx5W[] = {'$','5',0};
110 static const WCHAR idx6W[] = {'$','6',0};
111 static const WCHAR idx7W[] = {'$','7',0};
112 static const WCHAR idx8W[] = {'$','8',0};
113 static const WCHAR idx9W[] = {'$','9',0};
115 static const WCHAR undefinedW[] = {'u','n','d','e','f','i','n','e','d',0};
116 static const WCHAR emptyW[] = {0};
118 /* FIXME: Better error handling */
119 #define ReportRegExpError(a,b,c)
120 #define ReportRegExpErrorHelper(a,b,c,d)
121 #define JS_ReportErrorNumber(a,b,c,d)
122 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
123 #define js_ReportOutOfScriptQuota(a)
124 #define JS_ReportOutOfMemory(a)
125 #define JS_COUNT_OPERATION(a,b)
127 #define JSMSG_MIN_TOO_BIG 47
128 #define JSMSG_MAX_TOO_BIG 48
129 #define JSMSG_OUT_OF_ORDER 49
130 #define JSMSG_OUT_OF_MEMORY 137
132 #define LINE_SEPARATOR 0x2028
133 #define PARA_SEPARATOR 0x2029
135 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
136 ((c >= 'a') && (c <= 'z')) )
137 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
138 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
140 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
142 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
143 #define JS7_UNDEC(c) ((c) - '0')
195 REOP_LIMIT /* META: no operator >= to this */
198 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
200 static const char *reop_names[] = {
253 typedef struct RECapture {
254 ptrdiff_t index; /* start of contents, -1 for empty */
255 size_t length; /* length of capture */
258 typedef struct REMatchState {
260 RECapture parens[1]; /* first of 're->parenCount' captures,
261 allocated at end of this struct */
264 typedef struct REProgState {
265 jsbytecode *continue_pc; /* current continuation data */
266 jsbytecode continue_op;
267 ptrdiff_t index; /* progress in text */
268 size_t parenSoFar; /* highest indexed paren started */
271 UINT min; /* current quantifier limits */
275 size_t top; /* backtrack stack state */
281 typedef struct REBackTrackData {
282 size_t sz; /* size of previous stack entry */
283 jsbytecode *backtrack_pc; /* where to backtrack to */
284 jsbytecode backtrack_op;
285 const WCHAR *cp; /* index in text of match at backtrack */
286 size_t parenIndex; /* start index of saved paren contents */
287 size_t parenCount; /* # of saved paren contents */
288 size_t saveStateStackTop; /* number of parent states */
289 /* saved parent states follow */
290 /* saved paren contents follow */
293 #define INITIAL_STATESTACK 100
294 #define INITIAL_BACKTRACK 8000
296 typedef struct REGlobalData {
298 JSRegExp *regexp; /* the RE in execution */
299 BOOL ok; /* runtime error (out_of_memory only?) */
300 size_t start; /* offset to start at */
301 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
302 const WCHAR *cpbegin; /* text base address */
303 const WCHAR *cpend; /* text limit address */
305 REProgState *stateStack; /* stack of state of current parents */
306 size_t stateStackTop;
307 size_t stateStackLimit;
309 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
310 REBackTrackData *backTrackSP;
311 size_t backTrackStackSize;
312 size_t cursz; /* size of current stack entry */
313 size_t backTrackCount; /* how many times we've backtracked */
314 size_t backTrackLimit; /* upper limit on backtrack states */
316 heap_pool_t *pool; /* It's faster to use one malloc'd pool
317 than to malloc/free the three items
318 that are allocated from this pool */
321 typedef struct RENode RENode;
323 REOp op; /* r.e. op bytecode */
324 RENode *next; /* next in concatenation order */
325 void *kid; /* first operand */
327 void *kid2; /* second operand */
328 INT num; /* could be a number */
329 size_t parenIndex; /* or a parenthesis index */
330 struct { /* or a quantifier range */
335 struct { /* or a character class */
337 size_t kidlen; /* length of string at kid, in jschars */
338 size_t index; /* index into class list */
339 WORD bmsize; /* bitmap size, based on max char code */
342 struct { /* or a literal sequence */
343 WCHAR chr; /* of one character */
344 size_t length; /* or many (via the kid) */
347 RENode *kid2; /* second operand from ALT */
348 WCHAR ch1; /* match char for ALTPREREQ */
349 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
354 #define CLASS_CACHE_SIZE 4
356 typedef struct CompilerState {
358 const WCHAR *cpbegin;
362 size_t classCount; /* number of [] encountered */
363 size_t treeDepth; /* maximum depth of parse tree */
364 size_t progLength; /* estimated bytecode length */
366 size_t classBitmapsMem; /* memory to hold all class bitmaps */
368 const WCHAR *start; /* small cache of class strings */
369 size_t length; /* since they're often the same */
371 } classCache[CLASS_CACHE_SIZE];
374 heap_pool_t *pool; /* It's faster to use one malloc'd pool
375 than to malloc/free */
378 typedef struct EmitStateStackEntry {
379 jsbytecode *altHead; /* start of REOP_ALT* opcode */
380 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
381 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
382 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
383 RENode *continueNode; /* original REOP_ALT* node being stacked */
384 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
385 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
386 avoid 16-bit unsigned offset overflow */
387 } EmitStateStackEntry;
390 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
391 * the getters and setters take the pc of the offset, not of the opcode before
395 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
396 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
397 (pc)[1] = (jsbytecode) (arg))
399 #define OFFSET_LEN ARG_LEN
400 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
401 #define GET_OFFSET(pc) GET_ARG(pc)
403 static BOOL ParseRegExp(CompilerState*);
406 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
407 * For sanity, we limit it to 2^24 bytes.
409 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
412 * The maximum memory that can be allocated for class bitmaps.
413 * For sanity, we limit it to 2^24 bytes.
415 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
418 * Functions to get size and write/read bytecode that represent small indexes
420 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
421 * indicates that the following byte brings more bits to the index. Otherwise
422 * this is the last byte in the index bytecode representing highest index bits.
425 GetCompactIndexWidth(size_t index)
429 for (width = 1; (index >>= 7) != 0; ++width) { }
433 static inline jsbytecode *
434 WriteCompactIndex(jsbytecode *pc, size_t index)
438 while ((next = index >> 7) != 0) {
439 *pc++ = (jsbytecode)(index | 0x80);
442 *pc++ = (jsbytecode)index;
446 static inline jsbytecode *
447 ReadCompactIndex(jsbytecode *pc, size_t *result)
452 if ((nextByte & 0x80) == 0) {
454 * Short-circuit the most common case when compact index <= 127.
459 *result = 0x7F & nextByte;
462 *result |= (nextByte & 0x7F) << shift;
464 } while ((nextByte & 0x80) != 0);
469 /* Construct and initialize an RENode, returning NULL for out-of-memory */
471 NewRENode(CompilerState *state, REOp op)
475 ren = heap_pool_alloc(state->pool, sizeof(*ren));
477 /* js_ReportOutOfScriptQuota(cx); */
487 * Validates and converts hex ascii value.
490 isASCIIHexDigit(WCHAR c, UINT *digit)
501 if (cv >= 'a' && cv <= 'f') {
502 *digit = cv - 'a' + 10;
514 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
515 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
518 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
520 ptrdiff_t offset = target - jump;
522 /* Check that target really points forward. */
524 if ((size_t)offset > OFFSET_MAX)
527 jump[0] = JUMP_OFFSET_HI(offset);
528 jump[1] = JUMP_OFFSET_LO(offset);
533 * Generate bytecode for the tree rooted at t using an explicit stack instead
537 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
538 jsbytecode *pc, RENode *t)
540 EmitStateStackEntry *emitStateSP, *emitStateStack;
544 if (treeDepth == 0) {
545 emitStateStack = NULL;
547 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
551 emitStateSP = emitStateStack;
553 assert(op < REOP_LIMIT);
562 case REOP_ALTPREREQ2:
565 emitStateSP->altHead = pc - 1;
566 emitStateSP->endTermFixup = pc;
568 SET_ARG(pc, t->u.altprereq.ch1);
570 SET_ARG(pc, t->u.altprereq.ch2);
573 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
576 emitStateSP->continueNode = t;
577 emitStateSP->continueOp = REOP_JUMP;
578 emitStateSP->jumpToJumpFlag = FALSE;
580 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
583 assert(op < REOP_LIMIT);
587 emitStateSP->nextTermFixup = pc; /* offset to following term */
589 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
591 emitStateSP->continueOp = REOP_ENDALT;
593 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
596 assert(op < REOP_LIMIT);
601 * If we already patched emitStateSP->nextTermFixup to jump to
602 * a nearer jump, to avoid 16-bit immediate offset overflow, we
605 if (emitStateSP->jumpToJumpFlag)
609 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
610 * REOP_ENDALT is executed only on successful match of the last
611 * alternate in a group.
613 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
615 if (t->op != REOP_ALT) {
616 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
621 * If the program is bigger than the REOP_JUMP offset range, then
622 * we must check for alternates before this one that are part of
623 * the same group, and fix up their jump offsets to target jumps
624 * close enough to fit in a 16-bit unsigned offset immediate.
626 if ((size_t)(pc - re->program) > OFFSET_MAX &&
627 emitStateSP > emitStateStack) {
628 EmitStateStackEntry *esp, *esp2;
629 jsbytecode *alt, *jump;
630 ptrdiff_t span, header;
634 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
635 if (esp->continueOp == REOP_ENDALT &&
636 !esp->jumpToJumpFlag &&
637 esp->nextTermFixup + OFFSET_LEN == alt &&
638 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
640 : esp->nextTermFixup)) > OFFSET_MAX) {
642 jump = esp->nextTermFixup;
645 * The span must be 1 less than the distance from
646 * jump offset to jump offset, so we actually jump
647 * to a REOP_JUMP bytecode, not to its offset!
650 assert(jump < esp2->nextTermFixup);
651 span = esp2->nextTermFixup - jump - 1;
652 if ((size_t)span <= OFFSET_MAX)
657 } while (esp2->continueOp != REOP_ENDALT);
660 jump[0] = JUMP_OFFSET_HI(span);
661 jump[1] = JUMP_OFFSET_LO(span);
663 if (esp->continueNode->op != REOP_ALT) {
665 * We must patch the offset at esp->endTermFixup
666 * as well, for the REOP_ALTPREREQ{,2} opcodes.
667 * If we're unlucky and endTermFixup is more than
668 * OFFSET_MAX bytes from its target, we cheat by
669 * jumping 6 bytes to the jump whose offset is at
670 * esp->nextTermFixup, which has the same target.
672 jump = esp->endTermFixup;
673 header = esp->nextTermFixup - jump;
675 if ((size_t)span > OFFSET_MAX)
678 jump[0] = JUMP_OFFSET_HI(span);
679 jump[1] = JUMP_OFFSET_LO(span);
682 esp->jumpToJumpFlag = TRUE;
690 emitStateSP->altHead = pc - 1;
691 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
693 emitStateSP->continueNode = t;
694 emitStateSP->continueOp = REOP_JUMP;
695 emitStateSP->jumpToJumpFlag = FALSE;
697 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
700 assert(op < REOP_LIMIT);
705 * Coalesce FLATs if possible and if it would not increase bytecode
706 * beyond preallocated limit. The latter happens only when bytecode
707 * size for coalesced string with offset p and length 2 exceeds 6
708 * bytes preallocated for 2 single char nodes, i.e. when
709 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
710 * GetCompactIndexWidth(p) > 4.
711 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
712 * nodes strictly decreases bytecode size, the check has to be
713 * done only for the first coalescing.
716 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
719 t->next->op == REOP_FLAT &&
720 (WCHAR*)t->kid + t->u.flat.length ==
722 t->u.flat.length += t->next->u.flat.length;
723 t->next = t->next->next;
726 if (t->kid && t->u.flat.length > 1) {
727 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
728 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
729 pc = WriteCompactIndex(pc, t->u.flat.length);
730 } else if (t->u.flat.chr < 256) {
731 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
732 *pc++ = (jsbytecode) t->u.flat.chr;
734 pc[-1] = (state->flags & JSREG_FOLD)
737 SET_ARG(pc, t->u.flat.chr);
744 pc = WriteCompactIndex(pc, t->u.parenIndex);
745 emitStateSP->continueNode = t;
746 emitStateSP->continueOp = REOP_RPAREN;
748 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
754 pc = WriteCompactIndex(pc, t->u.parenIndex);
758 pc = WriteCompactIndex(pc, t->u.parenIndex);
763 emitStateSP->nextTermFixup = pc;
765 emitStateSP->continueNode = t;
766 emitStateSP->continueOp = REOP_ASSERTTEST;
768 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
773 case REOP_ASSERTTEST:
774 case REOP_ASSERTNOTTEST:
775 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
779 case REOP_ASSERT_NOT:
781 emitStateSP->nextTermFixup = pc;
783 emitStateSP->continueNode = t;
784 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
786 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
793 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
794 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
795 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
796 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
797 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
798 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
800 if (!t->u.range.greedy)
801 pc[-1] = REOP_MINIMALQUANT;
802 pc = WriteCompactIndex(pc, t->u.range.min);
804 * Write max + 1 to avoid using size_t(max) + 1 bytes
805 * for (UINT)-1 sentinel.
807 pc = WriteCompactIndex(pc, t->u.range.max + 1);
809 emitStateSP->nextTermFixup = pc;
811 emitStateSP->continueNode = t;
812 emitStateSP->continueOp = REOP_ENDCHILD;
814 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
820 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
825 if (!t->u.ucclass.sense)
826 pc[-1] = REOP_NCLASS;
827 pc = WriteCompactIndex(pc, t->u.ucclass.index);
828 charSet = &re->classList[t->u.ucclass.index];
829 charSet->converted = FALSE;
830 charSet->length = t->u.ucclass.bmsize;
831 charSet->u.src.startIndex = t->u.ucclass.startIndex;
832 charSet->u.src.length = t->u.ucclass.kidlen;
833 charSet->sense = t->u.ucclass.sense;
844 if (emitStateSP == emitStateStack)
847 t = emitStateSP->continueNode;
848 op = (REOp) emitStateSP->continueOp;
853 heap_free(emitStateStack);
857 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
863 * Process the op against the two top operands, reducing them to a single
864 * operand in the penultimate slot. Update progLength and treeDepth.
867 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
872 switch (opData->op) {
874 result = NewRENode(state, REOP_ALT);
877 result->kid = operandStack[operandSP - 2];
878 result->u.kid2 = operandStack[operandSP - 1];
879 operandStack[operandSP - 2] = result;
881 if (state->treeDepth == TREE_DEPTH_MAX) {
882 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
888 * Look at both alternates to see if there's a FLAT or a CLASS at
889 * the start of each. If so, use a prerequisite match.
891 if (((RENode *) result->kid)->op == REOP_FLAT &&
892 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
893 (state->flags & JSREG_FOLD) == 0) {
894 result->op = REOP_ALTPREREQ;
895 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
896 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
897 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
898 JUMP, <end> ... ENDALT */
899 state->progLength += 13;
902 if (((RENode *) result->kid)->op == REOP_CLASS &&
903 ((RENode *) result->kid)->u.ucclass.index < 256 &&
904 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
905 (state->flags & JSREG_FOLD) == 0) {
906 result->op = REOP_ALTPREREQ2;
907 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
908 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
909 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
910 JUMP, <end> ... ENDALT */
911 state->progLength += 13;
914 if (((RENode *) result->kid)->op == REOP_FLAT &&
915 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
916 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
917 (state->flags & JSREG_FOLD) == 0) {
918 result->op = REOP_ALTPREREQ2;
919 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
920 result->u.altprereq.ch2 =
921 ((RENode *) result->u.kid2)->u.ucclass.index;
922 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
923 JUMP, <end> ... ENDALT */
924 state->progLength += 13;
927 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
928 state->progLength += 7;
933 result = operandStack[operandSP - 2];
935 result = result->next;
936 result->next = operandStack[operandSP - 1];
940 case REOP_ASSERT_NOT:
943 /* These should have been processed by a close paren. */
944 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
954 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
955 * its being on the stack, and to propagate errors to its callers.
957 #define JSREG_FIND_PAREN_COUNT 0x8000
958 #define JSREG_FIND_PAREN_ERROR 0x4000
961 * Magic return value from FindParenCount and GetDecimalValue, to indicate
962 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
963 * its findMax parameter is non-null.
965 #define OVERFLOW_VALUE ((UINT)-1)
968 FindParenCount(CompilerState *state)
973 if (state->flags & JSREG_FIND_PAREN_COUNT)
974 return OVERFLOW_VALUE;
977 * Copy state into temp, flag it so we never report an invalid backref,
978 * and reset its members to parse the entire regexp. This is obviously
979 * suboptimal, but GetDecimalValue calls us only if a backref appears to
980 * refer to a forward parenthetical, which is rare.
983 temp.flags |= JSREG_FIND_PAREN_COUNT;
984 temp.cp = temp.cpbegin;
989 temp.classBitmapsMem = 0;
990 for (i = 0; i < CLASS_CACHE_SIZE; i++)
991 temp.classCache[i].start = NULL;
993 if (!ParseRegExp(&temp)) {
994 state->flags |= JSREG_FIND_PAREN_ERROR;
995 return OVERFLOW_VALUE;
997 return temp.parenCount;
1001 * Extract and return a decimal value at state->cp. The initial character c
1002 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
1003 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
1004 * state->flags to discover whether an error occurred under findMax.
1007 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
1008 CompilerState *state)
1010 UINT value = JS7_UNDEC(c);
1011 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
1013 /* The following restriction allows simpler overflow checks. */
1014 assert(max <= ((UINT)-1 - 9) / 10);
1015 while (state->cp < state->cpend) {
1019 value = 10 * value + JS7_UNDEC(c);
1020 if (!overflow && value > max && (!findMax || value > findMax(state)))
1024 return overflow ? OVERFLOW_VALUE : value;
1028 * Calculate the total size of the bitmap required for a class expression.
1031 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
1035 BOOL inRange = FALSE;
1036 WCHAR c, rangeStart = 0;
1037 UINT n, digit, nDigits, i;
1039 target->u.ucclass.bmsize = 0;
1040 target->u.ucclass.sense = TRUE;
1047 target->u.ucclass.sense = FALSE;
1050 while (src != end) {
1051 BOOL canStartRange = TRUE;
1078 if (src < end && RE_IS_LETTER(*src)) {
1079 localMax = (UINT) (*src++) & 0x1F;
1092 for (i = 0; (i < nDigits) && (src < end); i++) {
1094 if (!isASCIIHexDigit(c, &digit)) {
1096 * Back off to accepting the original
1103 n = (n << 4) | digit;
1108 canStartRange = FALSE;
1110 JS_ReportErrorNumber(state->context,
1111 js_GetErrorMessage, NULL,
1112 JSMSG_BAD_CLASS_RANGE);
1122 canStartRange = FALSE;
1124 JS_ReportErrorNumber(state->context,
1125 js_GetErrorMessage, NULL,
1126 JSMSG_BAD_CLASS_RANGE);
1132 * If this is the start of a range, ensure that it's less than
1146 * This is a non-ECMA extension - decimal escapes (in this
1147 * case, octal!) are supposed to be an error inside class
1148 * ranges, but supported here for backwards compatibility.
1153 if ('0' <= c && c <= '7') {
1155 n = 8 * n + JS7_UNDEC(c);
1157 if ('0' <= c && c <= '7') {
1159 i = 8 * n + JS7_UNDEC(c);
1180 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1181 if (rangeStart > localMax) {
1182 JS_ReportErrorNumber(state->context,
1183 js_GetErrorMessage, NULL,
1184 JSMSG_BAD_CLASS_RANGE);
1189 if (canStartRange && src < end - 1) {
1193 rangeStart = (WCHAR)localMax;
1197 if (state->flags & JSREG_FOLD)
1198 rangeStart = localMax; /* one run of the uc/dc loop below */
1201 if (state->flags & JSREG_FOLD) {
1202 WCHAR maxch = localMax;
1204 for (i = rangeStart; i <= localMax; i++) {
1220 target->u.ucclass.bmsize = max;
1225 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1229 const WCHAR *errp = state->cp++;
1234 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1237 if (!ignoreValues && min == OVERFLOW_VALUE)
1238 return JSMSG_MIN_TOO_BIG;
1244 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1246 if (!ignoreValues && max == OVERFLOW_VALUE)
1247 return JSMSG_MAX_TOO_BIG;
1248 if (!ignoreValues && min > max)
1249 return JSMSG_OUT_OF_ORDER;
1257 state->result = NewRENode(state, REOP_QUANT);
1259 return JSMSG_OUT_OF_MEMORY;
1260 state->result->u.range.min = min;
1261 state->result->u.range.max = max;
1263 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1264 * where <max> is written as compact(max+1) to make
1265 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1267 state->progLength += (1 + GetCompactIndexWidth(min)
1268 + GetCompactIndexWidth(max + 1)
1279 ParseQuantifier(CompilerState *state)
1282 term = state->result;
1283 if (state->cp < state->cpend) {
1284 switch (*state->cp) {
1286 state->result = NewRENode(state, REOP_QUANT);
1289 state->result->u.range.min = 1;
1290 state->result->u.range.max = (UINT)-1;
1291 /* <PLUS>, <next> ... <ENDCHILD> */
1292 state->progLength += 4;
1295 state->result = NewRENode(state, REOP_QUANT);
1298 state->result->u.range.min = 0;
1299 state->result->u.range.max = (UINT)-1;
1300 /* <STAR>, <next> ... <ENDCHILD> */
1301 state->progLength += 4;
1304 state->result = NewRENode(state, REOP_QUANT);
1307 state->result->u.range.min = 0;
1308 state->result->u.range.max = 1;
1309 /* <OPT>, <next> ... <ENDCHILD> */
1310 state->progLength += 4;
1312 case '{': /* balance '}' */
1316 err = ParseMinMaxQuantifier(state, FALSE);
1322 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1331 if (state->treeDepth == TREE_DEPTH_MAX) {
1332 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1338 state->result->kid = term;
1339 if (state->cp < state->cpend && *state->cp == '?') {
1341 state->result->u.range.greedy = FALSE;
1343 state->result->u.range.greedy = TRUE;
1349 * item: assertion An item is either an assertion or
1350 * quantatom a quantified atom.
1352 * assertion: '^' Assertions match beginning of string
1353 * (or line if the class static property
1354 * RegExp.multiline is true).
1355 * '$' End of string (or line if the class
1356 * static property RegExp.multiline is
1358 * '\b' Word boundary (between \w and \W).
1359 * '\B' Word non-boundary.
1361 * quantatom: atom An unquantified atom.
1362 * quantatom '{' n ',' m '}'
1363 * Atom must occur between n and m times.
1364 * quantatom '{' n ',' '}' Atom must occur at least n times.
1365 * quantatom '{' n '}' Atom must occur exactly n times.
1366 * quantatom '*' Zero or more times (same as {0,}).
1367 * quantatom '+' One or more times (same as {1,}).
1368 * quantatom '?' Zero or one time (same as {0,1}).
1370 * any of which can be optionally followed by '?' for ungreedy
1372 * atom: '(' regexp ')' A parenthesized regexp (what matched
1373 * can be addressed using a backreference,
1375 * '.' Matches any char except '\n'.
1376 * '[' classlist ']' A character class.
1377 * '[' '^' classlist ']' A negated character class.
1379 * '\n' Newline (Line Feed).
1380 * '\r' Carriage Return.
1381 * '\t' Horizontal Tab.
1382 * '\v' Vertical Tab.
1383 * '\d' A digit (same as [0-9]).
1385 * '\w' A word character, [0-9a-z_A-Z].
1386 * '\W' A non-word character.
1387 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1388 * '\S' A non-whitespace character.
1389 * '\' n A backreference to the nth (n decimal
1390 * and positive) parenthesized expression.
1391 * '\' octal An octal escape sequence (octal must be
1392 * two or three digits long, unless it is
1393 * 0 for the null character).
1394 * '\x' hex A hex escape (hex must be two digits).
1395 * '\u' unicode A unicode escape (must be four digits).
1396 * '\c' ctrl A control character, ctrl is a letter.
1397 * '\' literalatomchar Any character except one of the above
1398 * that follow '\' in an atom.
1399 * otheratomchar Any character not first among the other
1400 * atom right-hand sides.
1403 ParseTerm(CompilerState *state)
1405 WCHAR c = *state->cp++;
1407 UINT num, tmp, n, i;
1408 const WCHAR *termStart;
1411 /* assertions and atoms */
1413 state->result = NewRENode(state, REOP_BOL);
1416 state->progLength++;
1419 state->result = NewRENode(state, REOP_EOL);
1422 state->progLength++;
1425 if (state->cp >= state->cpend) {
1426 /* a trailing '\' is an error */
1427 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1432 /* assertion escapes */
1434 state->result = NewRENode(state, REOP_WBDRY);
1437 state->progLength++;
1440 state->result = NewRENode(state, REOP_WNONBDRY);
1443 state->progLength++;
1445 /* Decimal escape */
1447 /* Give a strict warning. See also the note below. */
1448 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1451 while (state->cp < state->cpend) {
1453 if (c < '0' || '7' < c)
1456 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1463 state->result = NewRENode(state, REOP_FLAT);
1466 state->result->u.flat.chr = c;
1467 state->result->u.flat.length = 1;
1468 state->progLength += 3;
1479 termStart = state->cp - 1;
1480 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1481 if (state->flags & JSREG_FIND_PAREN_ERROR)
1483 if (num == OVERFLOW_VALUE) {
1484 /* Give a strict mode warning. */
1485 WARN("back-reference exceeds number of capturing parentheses\n");
1488 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1489 * error here. However, for compatibility with IE, we treat the
1490 * whole backref as flat if the first character in it is not a
1491 * valid octal character, and as an octal escape otherwise.
1493 state->cp = termStart;
1495 /* Treat this as flat. termStart - 1 is the \. */
1500 /* Treat this as an octal escape. */
1503 assert(1 <= num && num <= 0x10000);
1504 state->result = NewRENode(state, REOP_BACKREF);
1507 state->result->u.parenIndex = num - 1;
1509 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1511 /* Control escape */
1527 /* Control letter */
1529 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1530 c = (WCHAR) (*state->cp++ & 0x1F);
1532 /* back off to accepting the original '\' as a literal */
1537 /* HexEscapeSequence */
1541 /* UnicodeEscapeSequence */
1546 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1549 if (!isASCIIHexDigit(c, &digit)) {
1551 * Back off to accepting the original 'u' or 'x' as a
1558 n = (n << 4) | digit;
1562 /* Character class escapes */
1564 state->result = NewRENode(state, REOP_DIGIT);
1568 state->progLength++;
1571 state->result = NewRENode(state, REOP_NONDIGIT);
1574 state->result = NewRENode(state, REOP_SPACE);
1577 state->result = NewRENode(state, REOP_NONSPACE);
1580 state->result = NewRENode(state, REOP_ALNUM);
1583 state->result = NewRENode(state, REOP_NONALNUM);
1585 /* IdentityEscape */
1587 state->result = NewRENode(state, REOP_FLAT);
1590 state->result->u.flat.chr = c;
1591 state->result->u.flat.length = 1;
1592 state->result->kid = (void *) (state->cp - 1);
1593 state->progLength += 3;
1598 state->result = NewRENode(state, REOP_CLASS);
1601 termStart = state->cp;
1602 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1604 if (state->cp == state->cpend) {
1605 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1606 JSMSG_UNTERM_CLASS, termStart);
1610 if (*state->cp == '\\') {
1612 if (state->cp != state->cpend)
1616 if (*state->cp == ']') {
1617 state->result->u.ucclass.kidlen = state->cp - termStart;
1622 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1623 if (!state->classCache[i].start) {
1624 state->classCache[i].start = termStart;
1625 state->classCache[i].length = state->result->u.ucclass.kidlen;
1626 state->classCache[i].index = state->classCount;
1629 if (state->classCache[i].length ==
1630 state->result->u.ucclass.kidlen) {
1631 for (n = 0; ; n++) {
1632 if (n == state->classCache[i].length) {
1633 state->result->u.ucclass.index
1634 = state->classCache[i].index;
1637 if (state->classCache[i].start[n] != termStart[n])
1642 state->result->u.ucclass.index = state->classCount++;
1646 * Call CalculateBitmapSize now as we want any errors it finds
1647 * to be reported during the parse phase, not at execution.
1649 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1652 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1653 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1654 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1656 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1657 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1658 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1661 state->classBitmapsMem += n;
1662 /* CLASS, <index> */
1664 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1668 state->result = NewRENode(state, REOP_DOT);
1673 const WCHAR *errp = state->cp--;
1676 err = ParseMinMaxQuantifier(state, TRUE);
1687 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1688 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1692 state->result = NewRENode(state, REOP_FLAT);
1695 state->result->u.flat.chr = c;
1696 state->result->u.flat.length = 1;
1697 state->result->kid = (void *) (state->cp - 1);
1698 state->progLength += 3;
1701 return ParseQuantifier(state);
1705 * Top-down regular expression grammar, based closely on Perl4.
1707 * regexp: altern A regular expression is one or more
1708 * altern '|' regexp alternatives separated by vertical bar.
1710 #define INITIAL_STACK_SIZE 128
1713 ParseRegExp(CompilerState *state)
1717 REOpData *operatorStack;
1718 RENode **operandStack;
1721 BOOL result = FALSE;
1723 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1724 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1726 /* Watch out for empty regexp */
1727 if (state->cp == state->cpend) {
1728 state->result = NewRENode(state, REOP_EMPTY);
1729 return (state->result != NULL);
1732 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1736 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1741 parenIndex = state->parenCount;
1742 if (state->cp == state->cpend) {
1744 * If we are at the end of the regexp and we're short one or more
1745 * operands, the regexp must have the form /x|/ or some such, with
1746 * left parentheses making us short more than one operand.
1748 if (operatorSP >= operandSP) {
1749 operand = NewRENode(state, REOP_EMPTY);
1755 switch (*state->cp) {
1758 if (state->cp + 1 < state->cpend &&
1759 *state->cp == '?' &&
1760 (state->cp[1] == '=' ||
1761 state->cp[1] == '!' ||
1762 state->cp[1] == ':')) {
1763 switch (state->cp[1]) {
1766 /* ASSERT, <next>, ... ASSERTTEST */
1767 state->progLength += 4;
1770 op = REOP_ASSERT_NOT;
1771 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1772 state->progLength += 4;
1775 op = REOP_LPARENNON;
1781 /* LPAREN, <index>, ... RPAREN, <index> */
1783 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1784 state->parenCount++;
1785 if (state->parenCount == 65535) {
1786 ReportRegExpError(state, JSREPORT_ERROR,
1787 JSMSG_TOO_MANY_PARENS);
1795 * If there's no stacked open parenthesis, throw syntax error.
1797 for (i = operatorSP - 1; ; i--) {
1799 ReportRegExpError(state, JSREPORT_ERROR,
1800 JSMSG_UNMATCHED_RIGHT_PAREN);
1803 if (operatorStack[i].op == REOP_ASSERT ||
1804 operatorStack[i].op == REOP_ASSERT_NOT ||
1805 operatorStack[i].op == REOP_LPARENNON ||
1806 operatorStack[i].op == REOP_LPAREN) {
1813 /* Expected an operand before these, so make an empty one */
1814 operand = NewRENode(state, REOP_EMPTY);
1820 if (!ParseTerm(state))
1822 operand = state->result;
1824 if (operandSP == operandStackSize) {
1826 operandStackSize += operandStackSize;
1827 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1832 operandStack[operandSP++] = operand;
1837 /* At the end; process remaining operators. */
1839 if (state->cp == state->cpend) {
1840 while (operatorSP) {
1842 if (!ProcessOp(state, &operatorStack[operatorSP],
1843 operandStack, operandSP))
1847 assert(operandSP == 1);
1848 state->result = operandStack[0];
1853 switch (*state->cp) {
1855 /* Process any stacked 'concat' operators */
1857 while (operatorSP &&
1858 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1860 if (!ProcessOp(state, &operatorStack[operatorSP],
1861 operandStack, operandSP)) {
1871 * If there's no stacked open parenthesis, throw syntax error.
1873 for (i = operatorSP - 1; ; i--) {
1875 ReportRegExpError(state, JSREPORT_ERROR,
1876 JSMSG_UNMATCHED_RIGHT_PAREN);
1879 if (operatorStack[i].op == REOP_ASSERT ||
1880 operatorStack[i].op == REOP_ASSERT_NOT ||
1881 operatorStack[i].op == REOP_LPARENNON ||
1882 operatorStack[i].op == REOP_LPAREN) {
1888 /* Process everything on the stack until the open parenthesis. */
1892 switch (operatorStack[operatorSP].op) {
1894 case REOP_ASSERT_NOT:
1896 operand = NewRENode(state, operatorStack[operatorSP].op);
1899 operand->u.parenIndex =
1900 operatorStack[operatorSP].parenIndex;
1902 operand->kid = operandStack[operandSP - 1];
1903 operandStack[operandSP - 1] = operand;
1904 if (state->treeDepth == TREE_DEPTH_MAX) {
1905 ReportRegExpError(state, JSREPORT_ERROR,
1906 JSMSG_REGEXP_TOO_COMPLEX);
1912 case REOP_LPARENNON:
1913 state->result = operandStack[operandSP - 1];
1914 if (!ParseQuantifier(state))
1916 operandStack[operandSP - 1] = state->result;
1917 goto restartOperator;
1919 if (!ProcessOp(state, &operatorStack[operatorSP],
1920 operandStack, operandSP))
1930 const WCHAR *errp = state->cp;
1932 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1934 * This didn't even scan correctly as a quantifier, so we should
1948 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1954 /* Anything else is the start of the next term. */
1957 if (operatorSP == operatorStackSize) {
1959 operatorStackSize += operatorStackSize;
1960 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1963 operatorStack = tmp;
1965 operatorStack[operatorSP].op = op;
1966 operatorStack[operatorSP].errPos = state->cp;
1967 operatorStack[operatorSP++].parenIndex = parenIndex;
1972 heap_free(operatorStack);
1973 heap_free(operandStack);
1978 * Save the current state of the match - the position in the input
1979 * text as well as the position in the bytecode. The state of any
1980 * parent expressions is also saved (preceding state).
1981 * Contents of parenCount parentheses from parenIndex are also saved.
1983 static REBackTrackData *
1984 PushBackTrackState(REGlobalData *gData, REOp op,
1985 jsbytecode *target, REMatchState *x, const WCHAR *cp,
1986 size_t parenIndex, size_t parenCount)
1989 REBackTrackData *result =
1990 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1992 size_t sz = sizeof(REBackTrackData) +
1993 gData->stateStackTop * sizeof(REProgState) +
1994 parenCount * sizeof(RECapture);
1996 ptrdiff_t btsize = gData->backTrackStackSize;
1997 ptrdiff_t btincr = ((char *)result + sz) -
1998 ((char *)gData->backTrackStack + btsize);
2000 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
2002 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
2004 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2006 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
2007 btincr = ((btincr+btsize-1)/btsize)*btsize;
2008 gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
2009 if (!gData->backTrackStack) {
2010 js_ReportOutOfScriptQuota(gData->cx);
2014 gData->backTrackStackSize = btsize + btincr;
2015 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2017 gData->backTrackSP = result;
2018 result->sz = gData->cursz;
2021 result->backtrack_op = op;
2022 result->backtrack_pc = target;
2024 result->parenCount = parenCount;
2025 result->parenIndex = parenIndex;
2027 result->saveStateStackTop = gData->stateStackTop;
2028 assert(gData->stateStackTop);
2029 memcpy(result + 1, gData->stateStack,
2030 sizeof(REProgState) * result->saveStateStackTop);
2032 if (parenCount != 0) {
2033 memcpy((char *)(result + 1) +
2034 sizeof(REProgState) * result->saveStateStackTop,
2035 &x->parens[parenIndex],
2036 sizeof(RECapture) * parenCount);
2037 for (i = 0; i != parenCount; i++)
2038 x->parens[parenIndex + i].index = -1;
2044 static inline REMatchState *
2045 FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
2049 assert(gData->cpend >= x->cp);
2050 if (length > (size_t)(gData->cpend - x->cp))
2052 for (i = 0; i != length; i++) {
2053 if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
2061 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2062 * 2. If E is not a character then go to step 6.
2063 * 3. Let ch be E's character.
2064 * 4. Let A be a one-element RECharSet containing the character ch.
2065 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2066 * 6. E must be an integer. Let n be that integer.
2067 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2068 * 8. Return an internal Matcher closure that takes two arguments, a State x
2069 * and a Continuation c, and performs the following:
2070 * 1. Let cap be x's captures internal array.
2071 * 2. Let s be cap[n].
2072 * 3. If s is undefined, then call c(x) and return its result.
2073 * 4. Let e be x's endIndex.
2074 * 5. Let len be s's length.
2075 * 6. Let f be e+len.
2076 * 7. If f>InputLength, return failure.
2077 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2078 * such that Canonicalize(s[i]) is not the same character as
2079 * Canonicalize(Input [e+i]), then return failure.
2080 * 9. Let y be the State (f, cap).
2081 * 10. Call c(y) and return its result.
2083 static REMatchState *
2084 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2087 const WCHAR *parenContent;
2088 RECapture *cap = &x->parens[parenIndex];
2090 if (cap->index == -1)
2094 if (x->cp + len > gData->cpend)
2097 parenContent = &gData->cpbegin[cap->index];
2098 if (gData->regexp->flags & JSREG_FOLD) {
2099 for (i = 0; i < len; i++) {
2100 if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2104 for (i = 0; i < len; i++) {
2105 if (parenContent[i] != x->cp[i])
2113 /* Add a single character to the RECharSet */
2115 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2117 UINT byteIndex = (UINT)(c >> 3);
2118 assert(c <= cs->length);
2119 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2123 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2125 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2129 UINT byteIndex1 = c1 >> 3;
2130 UINT byteIndex2 = c2 >> 3;
2132 assert(c2 <= cs->length && c1 <= c2);
2137 if (byteIndex1 == byteIndex2) {
2138 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2140 cs->u.bits[byteIndex1] |= 0xFF << c1;
2141 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2142 cs->u.bits[i] = 0xFF;
2143 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2147 /* Compile the source of the class into a RECharSet */
2149 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2151 const WCHAR *src, *end;
2152 BOOL inRange = FALSE;
2153 WCHAR rangeStart = 0;
2158 assert(!charSet->converted);
2160 * Assert that startIndex and length points to chars inside [] inside
2163 assert(1 <= charSet->u.src.startIndex);
2164 assert(charSet->u.src.startIndex < gData->regexp->source_len);
2165 assert(charSet->u.src.length <= gData->regexp->source_len
2166 - 1 - charSet->u.src.startIndex);
2168 charSet->converted = TRUE;
2169 src = gData->regexp->source + charSet->u.src.startIndex;
2171 end = src + charSet->u.src.length;
2173 assert(src[-1] == '[' && end[0] == ']');
2175 byteLength = (charSet->length >> 3) + 1;
2176 charSet->u.bits = heap_alloc(byteLength);
2177 if (!charSet->u.bits) {
2178 JS_ReportOutOfMemory(gData->cx);
2182 memset(charSet->u.bits, 0, byteLength);
2188 assert(charSet->sense == FALSE);
2191 assert(charSet->sense == TRUE);
2194 while (src != end) {
2219 if (src < end && JS_ISWORD(*src)) {
2220 thisCh = (WCHAR)(*src++ & 0x1F);
2233 for (i = 0; (i < nDigits) && (src < end); i++) {
2236 if (!isASCIIHexDigit(c, &digit)) {
2238 * Back off to accepting the original '\'
2245 n = (n << 4) | digit;
2258 * This is a non-ECMA extension - decimal escapes (in this
2259 * case, octal!) are supposed to be an error inside class
2260 * ranges, but supported here for backwards compatibility.
2264 if ('0' <= c && c <= '7') {
2266 n = 8 * n + JS7_UNDEC(c);
2268 if ('0' <= c && c <= '7') {
2270 i = 8 * n + JS7_UNDEC(c);
2281 AddCharacterRangeToCharSet(charSet, '0', '9');
2282 continue; /* don't need range processing */
2284 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2285 AddCharacterRangeToCharSet(charSet,
2287 (WCHAR)charSet->length);
2290 for (i = (INT)charSet->length; i >= 0; i--)
2292 AddCharacterToCharSet(charSet, (WCHAR)i);
2295 for (i = (INT)charSet->length; i >= 0; i--)
2297 AddCharacterToCharSet(charSet, (WCHAR)i);
2300 for (i = (INT)charSet->length; i >= 0; i--)
2302 AddCharacterToCharSet(charSet, (WCHAR)i);
2305 for (i = (INT)charSet->length; i >= 0; i--)
2307 AddCharacterToCharSet(charSet, (WCHAR)i);
2322 if (gData->regexp->flags & JSREG_FOLD) {
2323 assert(rangeStart <= thisCh);
2324 for (i = rangeStart; i <= thisCh; i++) {
2327 AddCharacterToCharSet(charSet, i);
2331 AddCharacterToCharSet(charSet, uch);
2333 AddCharacterToCharSet(charSet, dch);
2336 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2340 if (gData->regexp->flags & JSREG_FOLD) {
2341 AddCharacterToCharSet(charSet, toupperW(thisCh));
2342 AddCharacterToCharSet(charSet, tolowerW(thisCh));
2344 AddCharacterToCharSet(charSet, thisCh);
2346 if (src < end - 1) {
2350 rangeStart = thisCh;
2359 ReallocStateStack(REGlobalData *gData)
2361 size_t limit = gData->stateStackLimit;
2362 size_t sz = sizeof(REProgState) * limit;
2364 gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2365 if (!gData->stateStack) {
2366 js_ReportOutOfScriptQuota(gData->cx);
2370 gData->stateStackLimit = limit + limit;
2374 #define PUSH_STATE_STACK(data) \
2376 ++(data)->stateStackTop; \
2377 if ((data)->stateStackTop == (data)->stateStackLimit && \
2378 !ReallocStateStack((data))) { \
2384 * Apply the current op against the given input to see if it's going to match
2385 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2386 * true, then update the current state's cp. Always update startpc to the next
2389 static inline REMatchState *
2390 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2391 jsbytecode **startpc, BOOL updatecp)
2393 REMatchState *result = NULL;
2396 size_t offset, length, index;
2397 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2398 const WCHAR *source;
2399 const WCHAR *startcp = x->cp;
2403 const char *opname = reop_names[op];
2404 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2405 (int)gData->stateStackTop * 2, "", opname);
2412 if (x->cp != gData->cpbegin) {
2413 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2414 !(gData->regexp->flags & JSREG_MULTILINE)) {
2417 if (!RE_IS_LINE_TERM(x->cp[-1]))
2423 if (x->cp != gData->cpend) {
2424 if (/*!gData->cx->regExpStatics.multiline &&*/
2425 !(gData->regexp->flags & JSREG_MULTILINE)) {
2428 if (!RE_IS_LINE_TERM(*x->cp))
2434 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2435 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2440 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2441 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2446 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2452 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2458 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2464 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2470 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2476 if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2482 if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2488 pc = ReadCompactIndex(pc, &parenIndex);
2489 assert(parenIndex < gData->regexp->parenCount);
2490 result = BackrefMatcher(gData, x, parenIndex);
2493 pc = ReadCompactIndex(pc, &offset);
2494 assert(offset < gData->regexp->source_len);
2495 pc = ReadCompactIndex(pc, &length);
2496 assert(1 <= length);
2497 assert(length <= gData->regexp->source_len - offset);
2498 if (length <= (size_t)(gData->cpend - x->cp)) {
2499 source = gData->regexp->source + offset;
2500 TRACE("%s\n", debugstr_wn(source, length));
2501 for (index = 0; index != length; index++) {
2502 if (source[index] != x->cp[index])
2511 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2512 if (x->cp != gData->cpend && *x->cp == matchCh) {
2518 pc = ReadCompactIndex(pc, &offset);
2519 assert(offset < gData->regexp->source_len);
2520 pc = ReadCompactIndex(pc, &length);
2521 assert(1 <= length);
2522 assert(length <= gData->regexp->source_len - offset);
2523 source = gData->regexp->source;
2524 result = FlatNIMatcher(gData, x, source + offset, length);
2528 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2534 matchCh = GET_ARG(pc);
2535 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2537 if (x->cp != gData->cpend && *x->cp == matchCh) {
2543 matchCh = GET_ARG(pc);
2545 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2551 pc = ReadCompactIndex(pc, &index);
2552 assert(index < gData->regexp->classCount);
2553 if (x->cp != gData->cpend) {
2554 charSet = &gData->regexp->classList[index];
2555 assert(charSet->converted);
2558 if (charSet->length != 0 &&
2559 ch <= charSet->length &&
2560 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2567 pc = ReadCompactIndex(pc, &index);
2568 assert(index < gData->regexp->classCount);
2569 if (x->cp != gData->cpend) {
2570 charSet = &gData->regexp->classList[index];
2571 assert(charSet->converted);
2574 if (charSet->length == 0 ||
2575 ch > charSet->length ||
2576 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2597 static inline REMatchState *
2598 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2600 REMatchState *result = NULL;
2601 REBackTrackData *backTrackData;
2602 jsbytecode *nextpc, *testpc;
2605 REProgState *curState;
2606 const WCHAR *startcp;
2607 size_t parenIndex, k;
2608 size_t parenSoFar = 0;
2610 WCHAR matchCh1, matchCh2;
2614 jsbytecode *pc = gData->regexp->program;
2615 REOp op = (REOp) *pc++;
2618 * If the first node is a simple match, step the index into the string
2619 * until that match is made, or fail if it can't be found at all.
2621 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2623 while (x->cp <= gData->cpend) {
2624 nextpc = pc; /* reset back to start each time */
2625 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2629 pc = nextpc; /* accept skip to next opcode */
2631 assert(op < REOP_LIMIT);
2642 const char *opname = reop_names[op];
2643 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2644 (int)gData->stateStackTop * 2, "", opname);
2646 if (REOP_IS_SIMPLE(op)) {
2647 result = SimpleMatch(gData, x, op, &pc, TRUE);
2649 curState = &gData->stateStack[gData->stateStackTop];
2653 case REOP_ALTPREREQ2:
2654 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2656 matchCh2 = GET_ARG(pc);
2661 if (x->cp != gData->cpend) {
2662 if (*x->cp == matchCh2)
2665 charSet = &gData->regexp->classList[k];
2666 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2670 if ((charSet->length == 0 ||
2671 matchCh1 > charSet->length ||
2672 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2680 case REOP_ALTPREREQ:
2681 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2683 matchCh1 = GET_ARG(pc);
2685 matchCh2 = GET_ARG(pc);
2687 if (x->cp == gData->cpend ||
2688 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2692 /* else fall through... */
2696 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2697 pc += ARG_LEN; /* start of this alternate */
2698 curState->parenSoFar = parenSoFar;
2699 PUSH_STATE_STACK(gData);
2702 if (REOP_IS_SIMPLE(op)) {
2703 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2704 op = (REOp) *nextpc++;
2711 nextop = (REOp) *nextpc++;
2712 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2717 * Occurs at (successful) end of REOP_ALT,
2721 * If we have not gotten a result here, it is because of an
2722 * empty match. Do the same thing REOP_EMPTY would do.
2727 --gData->stateStackTop;
2728 pc += GET_OFFSET(pc);
2733 * Occurs at last (successful) end of REOP_ALT,
2737 * If we have not gotten a result here, it is because of an
2738 * empty match. Do the same thing REOP_EMPTY would do.
2743 --gData->stateStackTop;
2748 pc = ReadCompactIndex(pc, &parenIndex);
2749 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2750 assert(parenIndex < gData->regexp->parenCount);
2751 if (parenIndex + 1 > parenSoFar)
2752 parenSoFar = parenIndex + 1;
2753 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2754 x->parens[parenIndex].length = 0;
2762 pc = ReadCompactIndex(pc, &parenIndex);
2763 assert(parenIndex < gData->regexp->parenCount);
2764 cap = &x->parens[parenIndex];
2765 delta = x->cp - (gData->cpbegin + cap->index);
2766 cap->length = (delta < 0) ? 0 : (size_t) delta;
2774 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2775 pc += ARG_LEN; /* start of ASSERT child */
2778 if (REOP_IS_SIMPLE(op) &&
2779 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2783 curState->u.assertion.top =
2784 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2785 curState->u.assertion.sz = gData->cursz;
2786 curState->index = x->cp - gData->cpbegin;
2787 curState->parenSoFar = parenSoFar;
2788 PUSH_STATE_STACK(gData);
2789 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2790 nextpc, x, x->cp, 0, 0)) {
2795 case REOP_ASSERT_NOT:
2796 nextpc = pc + GET_OFFSET(pc);
2800 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2801 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2802 *testpc == REOP_ASSERTNOTTEST) {
2806 curState->u.assertion.top
2807 = (char *)gData->backTrackSP -
2808 (char *)gData->backTrackStack;
2809 curState->u.assertion.sz = gData->cursz;
2810 curState->index = x->cp - gData->cpbegin;
2811 curState->parenSoFar = parenSoFar;
2812 PUSH_STATE_STACK(gData);
2813 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2814 nextpc, x, x->cp, 0, 0)) {
2819 case REOP_ASSERTTEST:
2820 --gData->stateStackTop;
2822 x->cp = gData->cpbegin + curState->index;
2823 gData->backTrackSP =
2824 (REBackTrackData *) ((char *)gData->backTrackStack +
2825 curState->u.assertion.top);
2826 gData->cursz = curState->u.assertion.sz;
2831 case REOP_ASSERTNOTTEST:
2832 --gData->stateStackTop;
2834 x->cp = gData->cpbegin + curState->index;
2835 gData->backTrackSP =
2836 (REBackTrackData *) ((char *)gData->backTrackStack +
2837 curState->u.assertion.top);
2838 gData->cursz = curState->u.assertion.sz;
2839 result = (!result) ? x : NULL;
2842 curState->u.quantifier.min = 0;
2843 curState->u.quantifier.max = (UINT)-1;
2846 curState->u.quantifier.min = 1;
2847 curState->u.quantifier.max = (UINT)-1;
2850 curState->u.quantifier.min = 0;
2851 curState->u.quantifier.max = 1;
2854 pc = ReadCompactIndex(pc, &k);
2855 curState->u.quantifier.min = k;
2856 pc = ReadCompactIndex(pc, &k);
2857 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2858 curState->u.quantifier.max = k - 1;
2859 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2861 if (curState->u.quantifier.max == 0) {
2862 pc = pc + GET_OFFSET(pc);
2867 /* Step over <next> */
2868 nextpc = pc + ARG_LEN;
2869 op = (REOp) *nextpc++;
2871 if (REOP_IS_SIMPLE(op)) {
2872 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2873 if (curState->u.quantifier.min == 0)
2877 pc = pc + GET_OFFSET(pc);
2880 op = (REOp) *nextpc++;
2883 curState->index = startcp - gData->cpbegin;
2884 curState->continue_op = REOP_REPEAT;
2885 curState->continue_pc = pc;
2886 curState->parenSoFar = parenSoFar;
2887 PUSH_STATE_STACK(gData);
2888 if (curState->u.quantifier.min == 0 &&
2889 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2896 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2897 pc = curState[-1].continue_pc;
2898 op = (REOp) curState[-1].continue_op;
2907 --gData->stateStackTop;
2909 /* Failed, see if we have enough children. */
2910 if (curState->u.quantifier.min == 0)
2914 if (curState->u.quantifier.min == 0 &&
2915 x->cp == gData->cpbegin + curState->index) {
2916 /* matched an empty string, that'll get us nowhere */
2920 if (curState->u.quantifier.min != 0)
2921 curState->u.quantifier.min--;
2922 if (curState->u.quantifier.max != (UINT) -1)
2923 curState->u.quantifier.max--;
2924 if (curState->u.quantifier.max == 0)
2926 nextpc = pc + ARG_LEN;
2927 nextop = (REOp) *nextpc;
2929 if (REOP_IS_SIMPLE(nextop)) {
2931 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2932 if (curState->u.quantifier.min == 0)
2939 curState->index = startcp - gData->cpbegin;
2940 PUSH_STATE_STACK(gData);
2941 if (curState->u.quantifier.min == 0 &&
2942 !PushBackTrackState(gData, REOP_REPEAT,
2944 curState->parenSoFar,
2946 curState->parenSoFar)) {
2949 } while (*nextpc == REOP_ENDCHILD);
2952 parenSoFar = curState->parenSoFar;
2957 pc += GET_OFFSET(pc);
2960 case REOP_MINIMALSTAR:
2961 curState->u.quantifier.min = 0;
2962 curState->u.quantifier.max = (UINT)-1;
2963 goto minimalquantcommon;
2964 case REOP_MINIMALPLUS:
2965 curState->u.quantifier.min = 1;
2966 curState->u.quantifier.max = (UINT)-1;
2967 goto minimalquantcommon;
2968 case REOP_MINIMALOPT:
2969 curState->u.quantifier.min = 0;
2970 curState->u.quantifier.max = 1;
2971 goto minimalquantcommon;
2972 case REOP_MINIMALQUANT:
2973 pc = ReadCompactIndex(pc, &k);
2974 curState->u.quantifier.min = k;
2975 pc = ReadCompactIndex(pc, &k);
2976 /* See REOP_QUANT comments about k - 1. */
2977 curState->u.quantifier.max = k - 1;
2978 assert(curState->u.quantifier.min
2979 <= curState->u.quantifier.max);
2981 curState->index = x->cp - gData->cpbegin;
2982 curState->parenSoFar = parenSoFar;
2983 PUSH_STATE_STACK(gData);
2984 if (curState->u.quantifier.min != 0) {
2985 curState->continue_op = REOP_MINIMALREPEAT;
2986 curState->continue_pc = pc;
2987 /* step over <next> */
2991 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2992 pc, x, x->cp, 0, 0)) {
2995 --gData->stateStackTop;
2996 pc = pc + GET_OFFSET(pc);
3001 case REOP_MINIMALREPEAT:
3002 --gData->stateStackTop;
3005 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
3006 #define PREPARE_REPEAT() \
3008 curState->index = x->cp - gData->cpbegin; \
3009 curState->continue_op = REOP_MINIMALREPEAT; \
3010 curState->continue_pc = pc; \
3012 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3013 x->parens[k].index = -1; \
3014 PUSH_STATE_STACK(gData); \
3015 op = (REOp) *pc++; \
3016 assert(op < REOP_LIMIT); \
3022 * Non-greedy failure - try to consume another child.
3024 if (curState->u.quantifier.max == (UINT) -1 ||
3025 curState->u.quantifier.max > 0) {
3029 /* Don't need to adjust pc since we're going to pop. */
3032 if (curState->u.quantifier.min == 0 &&
3033 x->cp == gData->cpbegin + curState->index) {
3034 /* Matched an empty string, that'll get us nowhere. */
3038 if (curState->u.quantifier.min != 0)
3039 curState->u.quantifier.min--;
3040 if (curState->u.quantifier.max != (UINT) -1)
3041 curState->u.quantifier.max--;
3042 if (curState->u.quantifier.min != 0) {
3046 curState->index = x->cp - gData->cpbegin;
3047 curState->parenSoFar = parenSoFar;
3048 PUSH_STATE_STACK(gData);
3049 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3051 curState->parenSoFar,
3052 parenSoFar - curState->parenSoFar)) {
3055 --gData->stateStackTop;
3056 pc = pc + GET_OFFSET(pc);
3058 assert(op < REOP_LIMIT);
3068 * If the match failed and there's a backtrack option, take it.
3069 * Otherwise this is a complete and utter failure.
3072 if (gData->cursz == 0)
3075 /* Potentially detect explosive regex here. */
3076 gData->backTrackCount++;
3077 if (gData->backTrackLimit &&
3078 gData->backTrackCount >= gData->backTrackLimit) {
3079 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3080 JSMSG_REGEXP_TOO_COMPLEX);
3085 backTrackData = gData->backTrackSP;
3086 gData->cursz = backTrackData->sz;
3087 gData->backTrackSP =
3088 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3089 x->cp = backTrackData->cp;
3090 pc = backTrackData->backtrack_pc;
3091 op = (REOp) backTrackData->backtrack_op;
3092 assert(op < REOP_LIMIT);
3093 gData->stateStackTop = backTrackData->saveStateStackTop;
3094 assert(gData->stateStackTop);
3096 memcpy(gData->stateStack, backTrackData + 1,
3097 sizeof(REProgState) * backTrackData->saveStateStackTop);
3098 curState = &gData->stateStack[gData->stateStackTop - 1];
3100 if (backTrackData->parenCount) {
3101 memcpy(&x->parens[backTrackData->parenIndex],
3102 (char *)(backTrackData + 1) +
3103 sizeof(REProgState) * backTrackData->saveStateStackTop,
3104 sizeof(RECapture) * backTrackData->parenCount);
3105 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3107 for (k = curState->parenSoFar; k < parenSoFar; k++)
3108 x->parens[k].index = -1;
3109 parenSoFar = curState->parenSoFar;
3112 TRACE("\tBT_Pop: %ld,%ld\n",
3113 (ULONG_PTR)backTrackData->parenIndex,
3114 (ULONG_PTR)backTrackData->parenCount);
3120 * Continue with the expression.
3123 assert(op < REOP_LIMIT);
3135 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
3137 REMatchState *result;
3138 const WCHAR *cp = x->cp;
3143 * Have to include the position beyond the last character
3144 * in order to detect end-of-input/line condition.
3146 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3147 gData->skipped = cp2 - cp;
3149 for (j = 0; j < gData->regexp->parenCount; j++)
3150 x->parens[j].index = -1;
3151 result = ExecuteREBytecode(gData, x);
3152 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3154 gData->backTrackSP = gData->backTrackStack;
3156 gData->stateStackTop = 0;
3157 cp2 = cp + gData->skipped;
3162 #define MIN_BACKTRACK_LIMIT 400000
3164 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3166 REMatchState *result;
3169 gData->backTrackStackSize = INITIAL_BACKTRACK;
3170 gData->backTrackStack = heap_pool_alloc(gData->pool, INITIAL_BACKTRACK);
3171 if (!gData->backTrackStack)
3174 gData->backTrackSP = gData->backTrackStack;
3176 gData->backTrackCount = 0;
3177 gData->backTrackLimit = 0;
3179 gData->stateStackLimit = INITIAL_STATESTACK;
3180 gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3181 if (!gData->stateStack)
3184 gData->stateStackTop = 0;
3189 result = heap_pool_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
3193 for (i = 0; i < re->classCount; i++) {
3194 if (!re->classList[i].converted &&
3195 !ProcessCharSet(gData, &re->classList[i])) {
3203 js_ReportOutOfScriptQuota(cx);
3208 static HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, DWORD str_len,
3209 const WCHAR **cp, heap_pool_t *pool, REMatchState **result, DWORD *matchlen)
3211 REMatchState *x, *res;
3214 gData.cpbegin = str;
3215 gData.cpend = str+str_len;
3216 gData.start = *cp-str;
3220 x = InitMatch(NULL, &gData, jsregexp, gData.cpend - gData.cpbegin);
3222 WARN("InitMatch failed\n");
3227 res = MatchRegExp(&gData, x);
3229 WARN("MatchRegExp failed\n");
3239 *matchlen = (res->cp-*cp) - gData.skipped;
3245 js_DestroyRegExp(JSRegExp *re)
3247 if (re->classList) {
3249 for (i = 0; i < re->classCount; i++) {
3250 if (re->classList[i].converted)
3251 heap_free(re->classList[i].u.bits);
3252 re->classList[i].u.bits = NULL;
3254 heap_free(re->classList);
3260 js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, UINT flags, BOOL flat)
3264 CompilerState state;
3271 mark = heap_pool_mark(pool);
3279 state.cpbegin = state.cp;
3280 state.cpend = state.cp + len;
3281 state.flags = flags;
3282 state.parenCount = 0;
3283 state.classCount = 0;
3284 state.progLength = 0;
3285 state.treeDepth = 0;
3286 state.classBitmapsMem = 0;
3287 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3288 state.classCache[i].start = NULL;
3290 if (len != 0 && flat) {
3291 state.result = NewRENode(&state, REOP_FLAT);
3294 state.result->u.flat.chr = *state.cpbegin;
3295 state.result->u.flat.length = len;
3296 state.result->kid = (void *) state.cpbegin;
3297 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3298 state.progLength += 1 + GetCompactIndexWidth(0)
3299 + GetCompactIndexWidth(len);
3301 if (!ParseRegExp(&state))
3304 resize = offsetof(JSRegExp, program) + state.progLength + 1;
3305 re = heap_alloc(resize);
3309 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3310 re->classCount = state.classCount;
3311 if (re->classCount) {
3312 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3313 if (!re->classList) {
3314 js_DestroyRegExp(re);
3318 for (i = 0; i < re->classCount; i++)
3319 re->classList[i].converted = FALSE;
3321 re->classList = NULL;
3323 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3325 js_DestroyRegExp(re);
3329 *endPC++ = REOP_END;
3331 * Check whether size was overestimated and shrink using realloc.
3332 * This is safe since no pointers to newly parsed regexp or its parts
3333 * besides re exist here.
3335 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3337 assert((size_t)(endPC - re->program) < state.progLength + 1);
3338 resize = offsetof(JSRegExp, program) + (endPC - re->program);
3339 tmp = heap_realloc(re, resize);
3345 re->parenCount = state.parenCount;
3347 re->source_len = str_len;
3350 heap_pool_clear(mark);
3354 static inline RegExpInstance *regexp_from_vdisp(vdisp_t *vdisp)
3356 return (RegExpInstance*)vdisp->u.jsdisp;
3359 static void set_last_index(RegExpInstance *This, DWORD last_index)
3361 This->last_index = last_index;
3362 jsval_release(This->last_index_val);
3363 This->last_index_val = jsval_number(last_index);
3366 static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, DWORD rem_flags,
3367 jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
3368 DWORD *parens_cnt, match_result_t *ret)
3370 REMatchState *result;
3374 hres = MatchRegExpNext(regexp->jsregexp, str->str, jsstr_length(str),
3375 cp, &ctx->tmp_heap, &result, &matchlen);
3378 if(hres == S_FALSE) {
3379 if(rem_flags & REM_RESET_INDEX)
3380 set_last_index(regexp, 0);
3385 if(regexp->jsregexp->parenCount > *parens_size) {
3386 match_result_t *new_parens;
3389 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
3391 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
3393 return E_OUTOFMEMORY;
3395 *parens_size = regexp->jsregexp->parenCount;
3396 *parens = new_parens;
3400 if(!(rem_flags & REM_NO_CTX_UPDATE) && ctx->last_match != str) {
3401 jsstr_release(ctx->last_match);
3402 ctx->last_match = jsstr_addref(str);
3408 *parens_cnt = regexp->jsregexp->parenCount;
3410 for(i=0; i < regexp->jsregexp->parenCount; i++) {
3411 if(result->parens[i].index == -1) {
3412 (*parens)[i].str = NULL;
3413 (*parens)[i].len = 0;
3415 (*parens)[i].str = str->str + result->parens[i].index;
3416 (*parens)[i].len = result->parens[i].length;
3421 if(!(rem_flags & REM_NO_CTX_UPDATE)) {
3422 DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), regexp->jsregexp->parenCount);
3424 for(i=0; i < n; i++) {
3425 if(result->parens[i].index == -1) {
3426 ctx->match_parens[i].str = NULL;
3427 ctx->match_parens[i].len = 0;
3429 ctx->match_parens[i].str = ctx->last_match->str + result->parens[i].index;
3430 ctx->match_parens[i].len = result->parens[i].length;
3434 if(n < sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]))
3435 memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0]));
3438 ret->str = result->cp-matchlen;
3439 ret->len = matchlen;
3440 set_last_index(regexp, result->cp-str->str);
3442 if(!(rem_flags & REM_NO_CTX_UPDATE)) {
3443 ctx->last_match_index = ret->str-str->str;
3444 ctx->last_match_length = matchlen;
3450 HRESULT regexp_match_next(script_ctx_t *ctx, jsdisp_t *dispex, DWORD rem_flags, jsstr_t *str,
3451 const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt,
3452 match_result_t *ret)
3454 RegExpInstance *regexp = (RegExpInstance*)dispex;
3458 if((rem_flags & REM_CHECK_GLOBAL) && !(regexp->jsregexp->flags & JSREG_GLOB))
3461 mark = heap_pool_mark(&ctx->tmp_heap);
3463 hres = do_regexp_match_next(ctx, regexp, rem_flags, str, cp, parens, parens_size, parens_cnt, ret);
3465 heap_pool_clear(mark);
3469 static HRESULT regexp_match(script_ctx_t *ctx, jsdisp_t *dispex, jsstr_t *str, BOOL gflag,
3470 match_result_t **match_result, DWORD *result_cnt)
3472 RegExpInstance *This = (RegExpInstance*)dispex;
3473 match_result_t *ret = NULL, cres;
3474 const WCHAR *cp = str->str;
3475 DWORD i=0, ret_size = 0;
3479 mark = heap_pool_mark(&ctx->tmp_heap);
3482 hres = do_regexp_match_next(ctx, This, 0, str, &cp, NULL, NULL, NULL, &cres);
3483 if(hres == S_FALSE) {
3493 match_result_t *old_ret = ret;
3495 ret = heap_realloc(old_ret, (ret_size <<= 1) * sizeof(match_result_t));
3499 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
3502 hres = E_OUTOFMEMORY;
3509 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
3515 heap_pool_clear(mark);
3521 *match_result = ret;
3526 static HRESULT RegExp_source(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3532 case DISPATCH_PROPERTYGET: {
3533 RegExpInstance *This = regexp_from_vdisp(jsthis);
3534 *r = jsval_string(jsstr_addref(This->str));
3538 FIXME("Unimplemented flags %x\n", flags);
3545 static HRESULT RegExp_global(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3552 static HRESULT RegExp_ignoreCase(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3559 static HRESULT RegExp_multiline(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3566 static INT index_from_val(script_ctx_t *ctx, jsval_t v)
3571 hres = to_number(ctx, v, &n);
3573 clear_ei(ctx); /* FIXME: Move ignoring exceptions to to_primitive */
3578 return is_int32(n) ? n : 0;
3581 static HRESULT RegExp_lastIndex(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3587 case DISPATCH_PROPERTYGET: {
3588 RegExpInstance *regexp = regexp_from_vdisp(jsthis);
3590 return jsval_copy(regexp->last_index_val, r);
3592 case DISPATCH_PROPERTYPUT: {
3593 RegExpInstance *regexp = regexp_from_vdisp(jsthis);
3596 hres = jsval_copy(argv[0], ®exp->last_index_val);
3600 regexp->last_index = index_from_val(ctx, argv[0]);
3604 FIXME("unimplemented flags: %x\n", flags);
3611 static HRESULT RegExp_toString(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3618 static HRESULT create_match_array(script_ctx_t *ctx, jsstr_t *input, const match_result_t *result,
3619 const match_result_t *parens, DWORD parens_cnt, IDispatch **ret)
3624 HRESULT hres = S_OK;
3626 static const WCHAR indexW[] = {'i','n','d','e','x',0};
3627 static const WCHAR inputW[] = {'i','n','p','u','t',0};
3628 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
3629 static const WCHAR zeroW[] = {'0',0};
3631 hres = create_array(ctx, parens_cnt+1, &array);
3635 for(i=0; i < parens_cnt; i++) {
3636 str = jsstr_alloc_len(parens[i].str, parens[i].len);
3638 hres = E_OUTOFMEMORY;
3642 hres = jsdisp_propput_idx(array, i+1, jsval_string(str));
3648 while(SUCCEEDED(hres)) {
3649 hres = jsdisp_propput_name(array, indexW, jsval_number(result->str-input->str));
3653 hres = jsdisp_propput_name(array, lastIndexW, jsval_number(result->str-input->str+result->len));
3657 hres = jsdisp_propput_name(array, inputW, jsval_string(jsstr_addref(input)));
3661 str = jsstr_alloc_len(result->str, result->len);
3663 hres = E_OUTOFMEMORY;
3666 hres = jsdisp_propput_name(array, zeroW, jsval_string(str));
3672 jsdisp_release(array);
3676 *ret = to_disp(array);
3680 static HRESULT run_exec(script_ctx_t *ctx, vdisp_t *jsthis, jsval_t arg, jsstr_t **input,
3681 match_result_t *match, match_result_t **parens, DWORD *parens_cnt, BOOL *ret)
3683 RegExpInstance *regexp;
3684 DWORD parens_size = 0, last_index = 0;
3689 if(!is_vclass(jsthis, JSCLASS_REGEXP)) {
3690 FIXME("Not a RegExp\n");
3694 regexp = regexp_from_vdisp(jsthis);
3696 hres = to_string(ctx, arg, &string);
3700 if(regexp->jsregexp->flags & JSREG_GLOB) {
3701 if(regexp->last_index < 0) {
3702 jsstr_release(string);
3703 set_last_index(regexp, 0);
3706 *input = jsstr_empty();
3710 last_index = regexp->last_index;
3713 cp = string->str + last_index;
3714 hres = regexp_match_next(ctx, ®exp->dispex, REM_RESET_INDEX, string, &cp, parens,
3715 parens ? &parens_size : NULL, parens_cnt, match);
3717 jsstr_release(string);
3721 *ret = hres == S_OK;
3725 jsstr_release(string);
3729 static HRESULT RegExp_exec(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3732 match_result_t *parens = NULL, match;
3733 DWORD parens_cnt = 0;
3740 hres = run_exec(ctx, jsthis, argc ? argv[0] : jsval_string(jsstr_empty()), &string, &match, &parens, &parens_cnt, &b);
3750 hres = create_match_array(ctx, string, &match, parens, parens_cnt, &ret);
3752 *r = jsval_disp(ret);
3759 jsstr_release(string);
3763 static HRESULT RegExp_test(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3766 match_result_t match;
3774 undef_str = jsstr_alloc(undefinedW);
3776 return E_OUTOFMEMORY;
3779 hres = run_exec(ctx, jsthis, argc ? argv[0] : jsval_string(undef_str), NULL, &match, NULL, NULL, &b);
3781 jsstr_release(undef_str);
3790 static HRESULT RegExp_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
3797 return throw_type_error(ctx, JS_E_FUNCTION_EXPECTED, NULL);
3799 FIXME("unimplemented flags %x\n", flags);
3806 static void RegExp_destructor(jsdisp_t *dispex)
3808 RegExpInstance *This = (RegExpInstance*)dispex;
3811 js_DestroyRegExp(This->jsregexp);
3812 jsval_release(This->last_index_val);
3813 jsstr_release(This->str);
3817 static const builtin_prop_t RegExp_props[] = {
3818 {execW, RegExp_exec, PROPF_METHOD|1},
3819 {globalW, RegExp_global, 0},
3820 {ignoreCaseW, RegExp_ignoreCase, 0},
3821 {lastIndexW, RegExp_lastIndex, 0},
3822 {multilineW, RegExp_multiline, 0},
3823 {sourceW, RegExp_source, 0},
3824 {testW, RegExp_test, PROPF_METHOD|1},
3825 {toStringW, RegExp_toString, PROPF_METHOD}
3828 static const builtin_info_t RegExp_info = {
3830 {NULL, RegExp_value, 0},
3831 sizeof(RegExp_props)/sizeof(*RegExp_props),
3837 static const builtin_prop_t RegExpInst_props[] = {
3838 {globalW, RegExp_global, 0},
3839 {ignoreCaseW, RegExp_ignoreCase, 0},
3840 {lastIndexW, RegExp_lastIndex, 0},
3841 {multilineW, RegExp_multiline, 0},
3842 {sourceW, RegExp_source, 0}
3845 static const builtin_info_t RegExpInst_info = {
3847 {NULL, RegExp_value, 0},
3848 sizeof(RegExpInst_props)/sizeof(*RegExpInst_props),
3854 static HRESULT alloc_regexp(script_ctx_t *ctx, jsdisp_t *object_prototype, RegExpInstance **ret)
3856 RegExpInstance *regexp;
3859 regexp = heap_alloc_zero(sizeof(RegExpInstance));
3861 return E_OUTOFMEMORY;
3863 if(object_prototype)
3864 hres = init_dispex(®exp->dispex, ctx, &RegExp_info, object_prototype);
3866 hres = init_dispex_from_constr(®exp->dispex, ctx, &RegExpInst_info, ctx->regexp_constr);
3877 HRESULT create_regexp(script_ctx_t *ctx, jsstr_t *src, DWORD flags, jsdisp_t **ret)
3879 RegExpInstance *regexp;
3882 TRACE("%s %x\n", debugstr_jsstr(src), flags);
3884 hres = alloc_regexp(ctx, NULL, ®exp);
3888 regexp->str = jsstr_addref(src);
3889 regexp->last_index_val = jsval_number(0);
3891 regexp->jsregexp = js_NewRegExp(ctx, &ctx->tmp_heap, regexp->str->str,
3892 jsstr_length(regexp->str), flags, FALSE);
3893 if(!regexp->jsregexp) {
3894 WARN("js_NewRegExp failed\n");
3895 jsdisp_release(®exp->dispex);
3899 *ret = ®exp->dispex;
3903 HRESULT create_regexp_var(script_ctx_t *ctx, jsval_t src_arg, jsval_t *flags_arg, jsdisp_t **ret)
3905 jsstr_t *src, *opt = NULL;
3909 if(is_object_instance(src_arg)) {
3912 obj = iface_to_jsdisp((IUnknown*)get_object(src_arg));
3914 if(is_class(obj, JSCLASS_REGEXP)) {
3915 RegExpInstance *regexp = (RegExpInstance*)obj;
3917 hres = create_regexp(ctx, regexp->str, regexp->jsregexp->flags, ret);
3918 jsdisp_release(obj);
3922 jsdisp_release(obj);
3926 if(!is_string(src_arg)) {
3927 FIXME("src_arg = %s\n", debugstr_jsval(src_arg));
3931 src = get_string(src_arg);
3934 if(!is_string(*flags_arg)) {
3935 FIXME("unimplemented for %s\n", debugstr_jsval(*flags_arg));
3939 opt = get_string(*flags_arg);
3942 hres = parse_regexp_flags(opt ? opt->str : NULL, opt ? jsstr_length(opt) : 0, &flags);
3946 return create_regexp(ctx, src, flags, ret);
3949 HRESULT regexp_string_match(script_ctx_t *ctx, jsdisp_t *re, jsstr_t *str, jsval_t *r)
3951 static const WCHAR indexW[] = {'i','n','d','e','x',0};
3952 static const WCHAR inputW[] = {'i','n','p','u','t',0};
3953 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
3955 RegExpInstance *regexp = (RegExpInstance*)re;
3956 match_result_t *match_result;
3957 unsigned match_cnt, i;
3961 if(!(regexp->jsregexp->flags & JSREG_GLOB)) {
3962 match_result_t match, *parens = NULL;
3963 DWORD parens_cnt, parens_size = 0;
3964 const WCHAR *cp = str->str;
3966 hres = regexp_match_next(ctx, ®exp->dispex, 0, str, &cp, &parens, &parens_size, &parens_cnt, &match);
3976 hres = create_match_array(ctx, str, &match, parens, parens_cnt, &ret);
3978 *r = jsval_disp(ret);
3988 hres = regexp_match(ctx, ®exp->dispex, str, FALSE, &match_result, &match_cnt);
3993 TRACE("no match\n");
4000 hres = create_array(ctx, match_cnt, &array);
4004 for(i=0; i < match_cnt; i++) {
4007 tmp_str = jsstr_alloc_len(match_result[i].str, match_result[i].len);
4009 hres = E_OUTOFMEMORY;
4013 hres = jsdisp_propput_idx(array, i, jsval_string(tmp_str));
4014 jsstr_release(tmp_str);
4019 while(SUCCEEDED(hres)) {
4020 hres = jsdisp_propput_name(array, indexW, jsval_number(match_result[match_cnt-1].str-str->str));
4024 hres = jsdisp_propput_name(array, lastIndexW,
4025 jsval_number(match_result[match_cnt-1].str-str->str+match_result[match_cnt-1].len));
4029 hres = jsdisp_propput_name(array, inputW, jsval_string(str));
4033 heap_free(match_result);
4035 if(SUCCEEDED(hres) && r)
4036 *r = jsval_obj(array);
4038 jsdisp_release(array);
4042 static HRESULT global_idx(script_ctx_t *ctx, DWORD flags, DWORD idx, jsval_t *r)
4045 case DISPATCH_PROPERTYGET: {
4048 ret = jsstr_alloc_len(ctx->match_parens[idx].str, ctx->match_parens[idx].len);
4050 return E_OUTOFMEMORY;
4052 *r = jsval_string(ret);
4055 case DISPATCH_PROPERTYPUT:
4058 FIXME("unsupported flags\n");
4065 static HRESULT RegExpConstr_idx1(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4066 unsigned argc, jsval_t *argv, jsval_t *r)
4069 return global_idx(ctx, flags, 0, r);
4072 static HRESULT RegExpConstr_idx2(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4073 unsigned argc, jsval_t *argv, jsval_t *r)
4076 return global_idx(ctx, flags, 1, r);
4079 static HRESULT RegExpConstr_idx3(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4080 unsigned argc, jsval_t *argv, jsval_t *r)
4083 return global_idx(ctx, flags, 2, r);
4086 static HRESULT RegExpConstr_idx4(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4087 unsigned argc, jsval_t *argv, jsval_t *r)
4090 return global_idx(ctx, flags, 3, r);
4093 static HRESULT RegExpConstr_idx5(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4094 unsigned argc, jsval_t *argv, jsval_t *r)
4097 return global_idx(ctx, flags, 4, r);
4100 static HRESULT RegExpConstr_idx6(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4101 unsigned argc, jsval_t *argv, jsval_t *r)
4104 return global_idx(ctx, flags, 5, r);
4107 static HRESULT RegExpConstr_idx7(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4108 unsigned argc, jsval_t *argv, jsval_t *r)
4111 return global_idx(ctx, flags, 6, r);
4114 static HRESULT RegExpConstr_idx8(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4115 unsigned argc, jsval_t *argv, jsval_t *r)
4118 return global_idx(ctx, flags, 7, r);
4121 static HRESULT RegExpConstr_idx9(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4122 unsigned argc, jsval_t *argv, jsval_t *r)
4125 return global_idx(ctx, flags, 8, r);
4128 static HRESULT RegExpConstr_leftContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4129 unsigned argc, jsval_t *argv, jsval_t *r)
4134 case DISPATCH_PROPERTYGET: {
4137 ret = jsstr_alloc_len(ctx->last_match->str, ctx->last_match_index);
4139 return E_OUTOFMEMORY;
4141 *r = jsval_string(ret);
4144 case DISPATCH_PROPERTYPUT:
4147 FIXME("unsupported flags\n");
4154 static HRESULT RegExpConstr_rightContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4155 unsigned argc, jsval_t *argv, jsval_t *r)
4160 case DISPATCH_PROPERTYGET: {
4163 ret = jsstr_alloc(ctx->last_match->str+ctx->last_match_index+ctx->last_match_length);
4165 return E_OUTOFMEMORY;
4167 *r = jsval_string(ret);
4170 case DISPATCH_PROPERTYPUT:
4173 FIXME("unsupported flags\n");
4180 static HRESULT RegExpConstr_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, unsigned argc, jsval_t *argv,
4186 case DISPATCH_METHOD:
4188 if(is_object_instance(argv[0])) {
4189 jsdisp_t *jsdisp = iface_to_jsdisp((IUnknown*)get_object(argv[0]));
4191 if(is_class(jsdisp, JSCLASS_REGEXP)) {
4192 if(argc > 1 && !is_undefined(argv[1])) {
4193 jsdisp_release(jsdisp);
4194 return throw_regexp_error(ctx, JS_E_REGEXP_SYNTAX, NULL);
4198 *r = jsval_obj(jsdisp);
4200 jsdisp_release(jsdisp);
4203 jsdisp_release(jsdisp);
4208 case DISPATCH_CONSTRUCT: {
4217 hres = create_regexp_var(ctx, argv[0], argc > 1 ? argv+1 : NULL, &ret);
4222 *r = jsval_obj(ret);
4224 jsdisp_release(ret);
4228 FIXME("unimplemented flags: %x\n", flags);
4235 static const builtin_prop_t RegExpConstr_props[] = {
4236 {idx1W, RegExpConstr_idx1, 0},
4237 {idx2W, RegExpConstr_idx2, 0},
4238 {idx3W, RegExpConstr_idx3, 0},
4239 {idx4W, RegExpConstr_idx4, 0},
4240 {idx5W, RegExpConstr_idx5, 0},
4241 {idx6W, RegExpConstr_idx6, 0},
4242 {idx7W, RegExpConstr_idx7, 0},
4243 {idx8W, RegExpConstr_idx8, 0},
4244 {idx9W, RegExpConstr_idx9, 0},
4245 {leftContextW, RegExpConstr_leftContext, 0},
4246 {rightContextW, RegExpConstr_rightContext, 0}
4249 static const builtin_info_t RegExpConstr_info = {
4251 {NULL, Function_value, 0},
4252 sizeof(RegExpConstr_props)/sizeof(*RegExpConstr_props),
4258 HRESULT create_regexp_constr(script_ctx_t *ctx, jsdisp_t *object_prototype, jsdisp_t **ret)
4260 RegExpInstance *regexp;
4263 static const WCHAR RegExpW[] = {'R','e','g','E','x','p',0};
4265 hres = alloc_regexp(ctx, object_prototype, ®exp);
4269 hres = create_builtin_constructor(ctx, RegExpConstr_value, RegExpW, &RegExpConstr_info,
4270 PROPF_CONSTR|2, ®exp->dispex, ret);
4272 jsdisp_release(®exp->dispex);
4276 HRESULT parse_regexp_flags(const WCHAR *str, DWORD str_len, DWORD *ret)
4281 for (p = str; p < str+str_len; p++) {
4284 flags |= JSREG_GLOB;
4287 flags |= JSREG_FOLD;
4290 flags |= JSREG_MULTILINE;
4293 flags |= JSREG_STICKY;
4296 WARN("wrong flag %c\n", *p);