2 * Copyright 2008 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * Code in this file is based on files:
23 * from Mozilla project, released under LGPL 2.1 or later.
25 * The Original Code is Mozilla Communicator client code, released
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
39 #include "wine/debug.h"
41 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
43 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */
44 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */
45 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */
46 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */
48 typedef BYTE JSPackedBool;
49 typedef BYTE jsbytecode;
52 * This struct holds a bitmap representation of a class from a regexp.
53 * There's a list of these referenced by the classList field in the JSRegExp
54 * struct below. The initial state has startIndex set to the offset in the
55 * original regexp source of the beginning of the class contents. The first
56 * use of the class converts the source representation into a bitmap.
59 typedef struct RECharSet {
60 JSPackedBool converted;
73 WORD flags; /* flags, see jsapi.h's JSREG_* defines */
74 size_t parenCount; /* number of parenthesized submatches */
75 size_t classCount; /* count [...] bitmaps */
76 RECharSet *classList; /* list of [...] bitmaps */
77 BSTR source; /* locked source string, sans // */
78 jsbytecode program[1]; /* regular expression bytecode */
87 VARIANT last_index_var;
90 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
91 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
92 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
93 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
94 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
95 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
96 static const WCHAR execW[] = {'e','x','e','c',0};
97 static const WCHAR testW[] = {'t','e','s','t',0};
99 static const WCHAR leftContextW[] =
100 {'l','e','f','t','C','o','n','t','e','x','t',0};
101 static const WCHAR rightContextW[] =
102 {'r','i','g','h','t','C','o','n','t','e','x','t',0};
104 static const WCHAR undefinedW[] = {'u','n','d','e','f','i','n','e','d',0};
105 static const WCHAR emptyW[] = {0};
107 /* FIXME: Better error handling */
108 #define ReportRegExpError(a,b,c)
109 #define ReportRegExpErrorHelper(a,b,c,d)
110 #define JS_ReportErrorNumber(a,b,c,d)
111 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
112 #define js_ReportOutOfScriptQuota(a)
113 #define JS_ReportOutOfMemory(a)
114 #define JS_COUNT_OPERATION(a,b)
116 #define JSMSG_MIN_TOO_BIG 47
117 #define JSMSG_MAX_TOO_BIG 48
118 #define JSMSG_OUT_OF_ORDER 49
119 #define JSMSG_OUT_OF_MEMORY 137
121 #define LINE_SEPARATOR 0x2028
122 #define PARA_SEPARATOR 0x2029
124 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
125 ((c >= 'a') && (c <= 'z')) )
126 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
127 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
129 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
131 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
132 #define JS7_UNDEC(c) ((c) - '0')
184 REOP_LIMIT /* META: no operator >= to this */
187 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
189 static const char *reop_names[] = {
242 typedef struct RECapture {
243 ptrdiff_t index; /* start of contents, -1 for empty */
244 size_t length; /* length of capture */
247 typedef struct REMatchState {
249 RECapture parens[1]; /* first of 're->parenCount' captures,
250 allocated at end of this struct */
253 typedef struct REProgState {
254 jsbytecode *continue_pc; /* current continuation data */
255 jsbytecode continue_op;
256 ptrdiff_t index; /* progress in text */
257 size_t parenSoFar; /* highest indexed paren started */
260 UINT min; /* current quantifier limits */
264 size_t top; /* backtrack stack state */
270 typedef struct REBackTrackData {
271 size_t sz; /* size of previous stack entry */
272 jsbytecode *backtrack_pc; /* where to backtrack to */
273 jsbytecode backtrack_op;
274 const WCHAR *cp; /* index in text of match at backtrack */
275 size_t parenIndex; /* start index of saved paren contents */
276 size_t parenCount; /* # of saved paren contents */
277 size_t saveStateStackTop; /* number of parent states */
278 /* saved parent states follow */
279 /* saved paren contents follow */
282 #define INITIAL_STATESTACK 100
283 #define INITIAL_BACKTRACK 8000
285 typedef struct REGlobalData {
287 JSRegExp *regexp; /* the RE in execution */
288 BOOL ok; /* runtime error (out_of_memory only?) */
289 size_t start; /* offset to start at */
290 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
291 const WCHAR *cpbegin; /* text base address */
292 const WCHAR *cpend; /* text limit address */
294 REProgState *stateStack; /* stack of state of current parents */
295 size_t stateStackTop;
296 size_t stateStackLimit;
298 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
299 REBackTrackData *backTrackSP;
300 size_t backTrackStackSize;
301 size_t cursz; /* size of current stack entry */
302 size_t backTrackCount; /* how many times we've backtracked */
303 size_t backTrackLimit; /* upper limit on backtrack states */
305 jsheap_t *pool; /* It's faster to use one malloc'd pool
306 than to malloc/free the three items
307 that are allocated from this pool */
310 typedef struct RENode RENode;
312 REOp op; /* r.e. op bytecode */
313 RENode *next; /* next in concatenation order */
314 void *kid; /* first operand */
316 void *kid2; /* second operand */
317 INT num; /* could be a number */
318 size_t parenIndex; /* or a parenthesis index */
319 struct { /* or a quantifier range */
324 struct { /* or a character class */
326 size_t kidlen; /* length of string at kid, in jschars */
327 size_t index; /* index into class list */
328 WORD bmsize; /* bitmap size, based on max char code */
331 struct { /* or a literal sequence */
332 WCHAR chr; /* of one character */
333 size_t length; /* or many (via the kid) */
336 RENode *kid2; /* second operand from ALT */
337 WCHAR ch1; /* match char for ALTPREREQ */
338 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
343 #define CLASS_CACHE_SIZE 4
345 typedef struct CompilerState {
346 script_ctx_t *context;
347 const WCHAR *cpbegin;
351 size_t classCount; /* number of [] encountered */
352 size_t treeDepth; /* maximum depth of parse tree */
353 size_t progLength; /* estimated bytecode length */
355 size_t classBitmapsMem; /* memory to hold all class bitmaps */
357 const WCHAR *start; /* small cache of class strings */
358 size_t length; /* since they're often the same */
360 } classCache[CLASS_CACHE_SIZE];
364 typedef struct EmitStateStackEntry {
365 jsbytecode *altHead; /* start of REOP_ALT* opcode */
366 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
367 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
368 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
369 RENode *continueNode; /* original REOP_ALT* node being stacked */
370 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
371 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
372 avoid 16-bit unsigned offset overflow */
373 } EmitStateStackEntry;
376 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
377 * the getters and setters take the pc of the offset, not of the opcode before
381 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
382 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
383 (pc)[1] = (jsbytecode) (arg))
385 #define OFFSET_LEN ARG_LEN
386 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
387 #define GET_OFFSET(pc) GET_ARG(pc)
389 static BOOL ParseRegExp(CompilerState*);
392 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
393 * For sanity, we limit it to 2^24 bytes.
395 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
398 * The maximum memory that can be allocated for class bitmaps.
399 * For sanity, we limit it to 2^24 bytes.
401 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
404 * Functions to get size and write/read bytecode that represent small indexes
406 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
407 * indicates that the following byte brings more bits to the index. Otherwise
408 * this is the last byte in the index bytecode representing highest index bits.
411 GetCompactIndexWidth(size_t index)
415 for (width = 1; (index >>= 7) != 0; ++width) { }
419 static inline jsbytecode *
420 WriteCompactIndex(jsbytecode *pc, size_t index)
424 while ((next = index >> 7) != 0) {
425 *pc++ = (jsbytecode)(index | 0x80);
428 *pc++ = (jsbytecode)index;
432 static inline jsbytecode *
433 ReadCompactIndex(jsbytecode *pc, size_t *result)
438 if ((nextByte & 0x80) == 0) {
440 * Short-circuit the most common case when compact index <= 127.
445 *result = 0x7F & nextByte;
448 *result |= (nextByte & 0x7F) << shift;
450 } while ((nextByte & 0x80) != 0);
455 /* Construct and initialize an RENode, returning NULL for out-of-memory */
457 NewRENode(CompilerState *state, REOp op)
461 ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren));
463 /* js_ReportOutOfScriptQuota(cx); */
473 * Validates and converts hex ascii value.
476 isASCIIHexDigit(WCHAR c, UINT *digit)
487 if (cv >= 'a' && cv <= 'f') {
488 *digit = cv - 'a' + 10;
500 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
501 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
504 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
506 ptrdiff_t offset = target - jump;
508 /* Check that target really points forward. */
510 if ((size_t)offset > OFFSET_MAX)
513 jump[0] = JUMP_OFFSET_HI(offset);
514 jump[1] = JUMP_OFFSET_LO(offset);
519 * Generate bytecode for the tree rooted at t using an explicit stack instead
523 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
524 jsbytecode *pc, RENode *t)
526 EmitStateStackEntry *emitStateSP, *emitStateStack;
530 if (treeDepth == 0) {
531 emitStateStack = NULL;
533 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
537 emitStateSP = emitStateStack;
539 assert(op < REOP_LIMIT);
548 case REOP_ALTPREREQ2:
551 emitStateSP->altHead = pc - 1;
552 emitStateSP->endTermFixup = pc;
554 SET_ARG(pc, t->u.altprereq.ch1);
556 SET_ARG(pc, t->u.altprereq.ch2);
559 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
562 emitStateSP->continueNode = t;
563 emitStateSP->continueOp = REOP_JUMP;
564 emitStateSP->jumpToJumpFlag = FALSE;
566 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
569 assert(op < REOP_LIMIT);
573 emitStateSP->nextTermFixup = pc; /* offset to following term */
575 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
577 emitStateSP->continueOp = REOP_ENDALT;
579 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
582 assert(op < REOP_LIMIT);
587 * If we already patched emitStateSP->nextTermFixup to jump to
588 * a nearer jump, to avoid 16-bit immediate offset overflow, we
591 if (emitStateSP->jumpToJumpFlag)
595 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
596 * REOP_ENDALT is executed only on successful match of the last
597 * alternate in a group.
599 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
601 if (t->op != REOP_ALT) {
602 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
607 * If the program is bigger than the REOP_JUMP offset range, then
608 * we must check for alternates before this one that are part of
609 * the same group, and fix up their jump offsets to target jumps
610 * close enough to fit in a 16-bit unsigned offset immediate.
612 if ((size_t)(pc - re->program) > OFFSET_MAX &&
613 emitStateSP > emitStateStack) {
614 EmitStateStackEntry *esp, *esp2;
615 jsbytecode *alt, *jump;
616 ptrdiff_t span, header;
620 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
621 if (esp->continueOp == REOP_ENDALT &&
622 !esp->jumpToJumpFlag &&
623 esp->nextTermFixup + OFFSET_LEN == alt &&
624 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
626 : esp->nextTermFixup)) > OFFSET_MAX) {
628 jump = esp->nextTermFixup;
631 * The span must be 1 less than the distance from
632 * jump offset to jump offset, so we actually jump
633 * to a REOP_JUMP bytecode, not to its offset!
636 assert(jump < esp2->nextTermFixup);
637 span = esp2->nextTermFixup - jump - 1;
638 if ((size_t)span <= OFFSET_MAX)
643 } while (esp2->continueOp != REOP_ENDALT);
646 jump[0] = JUMP_OFFSET_HI(span);
647 jump[1] = JUMP_OFFSET_LO(span);
649 if (esp->continueNode->op != REOP_ALT) {
651 * We must patch the offset at esp->endTermFixup
652 * as well, for the REOP_ALTPREREQ{,2} opcodes.
653 * If we're unlucky and endTermFixup is more than
654 * OFFSET_MAX bytes from its target, we cheat by
655 * jumping 6 bytes to the jump whose offset is at
656 * esp->nextTermFixup, which has the same target.
658 jump = esp->endTermFixup;
659 header = esp->nextTermFixup - jump;
661 if ((size_t)span > OFFSET_MAX)
664 jump[0] = JUMP_OFFSET_HI(span);
665 jump[1] = JUMP_OFFSET_LO(span);
668 esp->jumpToJumpFlag = TRUE;
676 emitStateSP->altHead = pc - 1;
677 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
679 emitStateSP->continueNode = t;
680 emitStateSP->continueOp = REOP_JUMP;
681 emitStateSP->jumpToJumpFlag = FALSE;
683 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
686 assert(op < REOP_LIMIT);
691 * Coalesce FLATs if possible and if it would not increase bytecode
692 * beyond preallocated limit. The latter happens only when bytecode
693 * size for coalesced string with offset p and length 2 exceeds 6
694 * bytes preallocated for 2 single char nodes, i.e. when
695 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
696 * GetCompactIndexWidth(p) > 4.
697 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
698 * nodes strictly decreases bytecode size, the check has to be
699 * done only for the first coalescing.
702 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
705 t->next->op == REOP_FLAT &&
706 (WCHAR*)t->kid + t->u.flat.length ==
708 t->u.flat.length += t->next->u.flat.length;
709 t->next = t->next->next;
712 if (t->kid && t->u.flat.length > 1) {
713 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
714 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
715 pc = WriteCompactIndex(pc, t->u.flat.length);
716 } else if (t->u.flat.chr < 256) {
717 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
718 *pc++ = (jsbytecode) t->u.flat.chr;
720 pc[-1] = (state->flags & JSREG_FOLD)
723 SET_ARG(pc, t->u.flat.chr);
730 pc = WriteCompactIndex(pc, t->u.parenIndex);
731 emitStateSP->continueNode = t;
732 emitStateSP->continueOp = REOP_RPAREN;
734 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
740 pc = WriteCompactIndex(pc, t->u.parenIndex);
744 pc = WriteCompactIndex(pc, t->u.parenIndex);
749 emitStateSP->nextTermFixup = pc;
751 emitStateSP->continueNode = t;
752 emitStateSP->continueOp = REOP_ASSERTTEST;
754 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
759 case REOP_ASSERTTEST:
760 case REOP_ASSERTNOTTEST:
761 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
765 case REOP_ASSERT_NOT:
767 emitStateSP->nextTermFixup = pc;
769 emitStateSP->continueNode = t;
770 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
772 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
779 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
780 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
781 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
782 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
783 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
784 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
786 if (!t->u.range.greedy)
787 pc[-1] = REOP_MINIMALQUANT;
788 pc = WriteCompactIndex(pc, t->u.range.min);
790 * Write max + 1 to avoid using size_t(max) + 1 bytes
791 * for (UINT)-1 sentinel.
793 pc = WriteCompactIndex(pc, t->u.range.max + 1);
795 emitStateSP->nextTermFixup = pc;
797 emitStateSP->continueNode = t;
798 emitStateSP->continueOp = REOP_ENDCHILD;
800 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
806 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
811 if (!t->u.ucclass.sense)
812 pc[-1] = REOP_NCLASS;
813 pc = WriteCompactIndex(pc, t->u.ucclass.index);
814 charSet = &re->classList[t->u.ucclass.index];
815 charSet->converted = FALSE;
816 charSet->length = t->u.ucclass.bmsize;
817 charSet->u.src.startIndex = t->u.ucclass.startIndex;
818 charSet->u.src.length = t->u.ucclass.kidlen;
819 charSet->sense = t->u.ucclass.sense;
830 if (emitStateSP == emitStateStack)
833 t = emitStateSP->continueNode;
834 op = (REOp) emitStateSP->continueOp;
839 heap_free(emitStateStack);
843 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
849 * Process the op against the two top operands, reducing them to a single
850 * operand in the penultimate slot. Update progLength and treeDepth.
853 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
858 switch (opData->op) {
860 result = NewRENode(state, REOP_ALT);
863 result->kid = operandStack[operandSP - 2];
864 result->u.kid2 = operandStack[operandSP - 1];
865 operandStack[operandSP - 2] = result;
867 if (state->treeDepth == TREE_DEPTH_MAX) {
868 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
874 * Look at both alternates to see if there's a FLAT or a CLASS at
875 * the start of each. If so, use a prerequisite match.
877 if (((RENode *) result->kid)->op == REOP_FLAT &&
878 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
879 (state->flags & JSREG_FOLD) == 0) {
880 result->op = REOP_ALTPREREQ;
881 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
882 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
883 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
884 JUMP, <end> ... ENDALT */
885 state->progLength += 13;
888 if (((RENode *) result->kid)->op == REOP_CLASS &&
889 ((RENode *) result->kid)->u.ucclass.index < 256 &&
890 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
891 (state->flags & JSREG_FOLD) == 0) {
892 result->op = REOP_ALTPREREQ2;
893 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
894 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
895 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
896 JUMP, <end> ... ENDALT */
897 state->progLength += 13;
900 if (((RENode *) result->kid)->op == REOP_FLAT &&
901 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
902 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
903 (state->flags & JSREG_FOLD) == 0) {
904 result->op = REOP_ALTPREREQ2;
905 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
906 result->u.altprereq.ch2 =
907 ((RENode *) result->u.kid2)->u.ucclass.index;
908 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
909 JUMP, <end> ... ENDALT */
910 state->progLength += 13;
913 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
914 state->progLength += 7;
919 result = operandStack[operandSP - 2];
921 result = result->next;
922 result->next = operandStack[operandSP - 1];
926 case REOP_ASSERT_NOT:
929 /* These should have been processed by a close paren. */
930 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
940 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
941 * its being on the stack, and to propagate errors to its callers.
943 #define JSREG_FIND_PAREN_COUNT 0x8000
944 #define JSREG_FIND_PAREN_ERROR 0x4000
947 * Magic return value from FindParenCount and GetDecimalValue, to indicate
948 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
949 * its findMax parameter is non-null.
951 #define OVERFLOW_VALUE ((UINT)-1)
954 FindParenCount(CompilerState *state)
959 if (state->flags & JSREG_FIND_PAREN_COUNT)
960 return OVERFLOW_VALUE;
963 * Copy state into temp, flag it so we never report an invalid backref,
964 * and reset its members to parse the entire regexp. This is obviously
965 * suboptimal, but GetDecimalValue calls us only if a backref appears to
966 * refer to a forward parenthetical, which is rare.
969 temp.flags |= JSREG_FIND_PAREN_COUNT;
970 temp.cp = temp.cpbegin;
975 temp.classBitmapsMem = 0;
976 for (i = 0; i < CLASS_CACHE_SIZE; i++)
977 temp.classCache[i].start = NULL;
979 if (!ParseRegExp(&temp)) {
980 state->flags |= JSREG_FIND_PAREN_ERROR;
981 return OVERFLOW_VALUE;
983 return temp.parenCount;
987 * Extract and return a decimal value at state->cp. The initial character c
988 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
989 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
990 * state->flags to discover whether an error occurred under findMax.
993 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
994 CompilerState *state)
996 UINT value = JS7_UNDEC(c);
997 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
999 /* The following restriction allows simpler overflow checks. */
1000 assert(max <= ((UINT)-1 - 9) / 10);
1001 while (state->cp < state->cpend) {
1005 value = 10 * value + JS7_UNDEC(c);
1006 if (!overflow && value > max && (!findMax || value > findMax(state)))
1010 return overflow ? OVERFLOW_VALUE : value;
1014 * Calculate the total size of the bitmap required for a class expression.
1017 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
1021 BOOL inRange = FALSE;
1022 WCHAR c, rangeStart = 0;
1023 UINT n, digit, nDigits, i;
1025 target->u.ucclass.bmsize = 0;
1026 target->u.ucclass.sense = TRUE;
1033 target->u.ucclass.sense = FALSE;
1036 while (src != end) {
1037 BOOL canStartRange = TRUE;
1064 if (src < end && RE_IS_LETTER(*src)) {
1065 localMax = (UINT) (*src++) & 0x1F;
1078 for (i = 0; (i < nDigits) && (src < end); i++) {
1080 if (!isASCIIHexDigit(c, &digit)) {
1082 * Back off to accepting the original
1089 n = (n << 4) | digit;
1094 canStartRange = FALSE;
1096 JS_ReportErrorNumber(state->context,
1097 js_GetErrorMessage, NULL,
1098 JSMSG_BAD_CLASS_RANGE);
1108 canStartRange = FALSE;
1110 JS_ReportErrorNumber(state->context,
1111 js_GetErrorMessage, NULL,
1112 JSMSG_BAD_CLASS_RANGE);
1118 * If this is the start of a range, ensure that it's less than
1132 * This is a non-ECMA extension - decimal escapes (in this
1133 * case, octal!) are supposed to be an error inside class
1134 * ranges, but supported here for backwards compatibility.
1139 if ('0' <= c && c <= '7') {
1141 n = 8 * n + JS7_UNDEC(c);
1143 if ('0' <= c && c <= '7') {
1145 i = 8 * n + JS7_UNDEC(c);
1166 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1167 if (rangeStart > localMax) {
1168 JS_ReportErrorNumber(state->context,
1169 js_GetErrorMessage, NULL,
1170 JSMSG_BAD_CLASS_RANGE);
1175 if (canStartRange && src < end - 1) {
1179 rangeStart = (WCHAR)localMax;
1183 if (state->flags & JSREG_FOLD)
1184 rangeStart = localMax; /* one run of the uc/dc loop below */
1187 if (state->flags & JSREG_FOLD) {
1188 WCHAR maxch = localMax;
1190 for (i = rangeStart; i <= localMax; i++) {
1206 target->u.ucclass.bmsize = max;
1211 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
1215 const WCHAR *errp = state->cp++;
1220 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1223 if (!ignoreValues && min == OVERFLOW_VALUE)
1224 return JSMSG_MIN_TOO_BIG;
1230 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1232 if (!ignoreValues && max == OVERFLOW_VALUE)
1233 return JSMSG_MAX_TOO_BIG;
1234 if (!ignoreValues && min > max)
1235 return JSMSG_OUT_OF_ORDER;
1243 state->result = NewRENode(state, REOP_QUANT);
1245 return JSMSG_OUT_OF_MEMORY;
1246 state->result->u.range.min = min;
1247 state->result->u.range.max = max;
1249 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1250 * where <max> is written as compact(max+1) to make
1251 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1253 state->progLength += (1 + GetCompactIndexWidth(min)
1254 + GetCompactIndexWidth(max + 1)
1265 ParseQuantifier(CompilerState *state)
1268 term = state->result;
1269 if (state->cp < state->cpend) {
1270 switch (*state->cp) {
1272 state->result = NewRENode(state, REOP_QUANT);
1275 state->result->u.range.min = 1;
1276 state->result->u.range.max = (UINT)-1;
1277 /* <PLUS>, <next> ... <ENDCHILD> */
1278 state->progLength += 4;
1281 state->result = NewRENode(state, REOP_QUANT);
1284 state->result->u.range.min = 0;
1285 state->result->u.range.max = (UINT)-1;
1286 /* <STAR>, <next> ... <ENDCHILD> */
1287 state->progLength += 4;
1290 state->result = NewRENode(state, REOP_QUANT);
1293 state->result->u.range.min = 0;
1294 state->result->u.range.max = 1;
1295 /* <OPT>, <next> ... <ENDCHILD> */
1296 state->progLength += 4;
1298 case '{': /* balance '}' */
1302 err = ParseMinMaxQuantifier(state, FALSE);
1308 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1317 if (state->treeDepth == TREE_DEPTH_MAX) {
1318 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1324 state->result->kid = term;
1325 if (state->cp < state->cpend && *state->cp == '?') {
1327 state->result->u.range.greedy = FALSE;
1329 state->result->u.range.greedy = TRUE;
1335 * item: assertion An item is either an assertion or
1336 * quantatom a quantified atom.
1338 * assertion: '^' Assertions match beginning of string
1339 * (or line if the class static property
1340 * RegExp.multiline is true).
1341 * '$' End of string (or line if the class
1342 * static property RegExp.multiline is
1344 * '\b' Word boundary (between \w and \W).
1345 * '\B' Word non-boundary.
1347 * quantatom: atom An unquantified atom.
1348 * quantatom '{' n ',' m '}'
1349 * Atom must occur between n and m times.
1350 * quantatom '{' n ',' '}' Atom must occur at least n times.
1351 * quantatom '{' n '}' Atom must occur exactly n times.
1352 * quantatom '*' Zero or more times (same as {0,}).
1353 * quantatom '+' One or more times (same as {1,}).
1354 * quantatom '?' Zero or one time (same as {0,1}).
1356 * any of which can be optionally followed by '?' for ungreedy
1358 * atom: '(' regexp ')' A parenthesized regexp (what matched
1359 * can be addressed using a backreference,
1361 * '.' Matches any char except '\n'.
1362 * '[' classlist ']' A character class.
1363 * '[' '^' classlist ']' A negated character class.
1365 * '\n' Newline (Line Feed).
1366 * '\r' Carriage Return.
1367 * '\t' Horizontal Tab.
1368 * '\v' Vertical Tab.
1369 * '\d' A digit (same as [0-9]).
1371 * '\w' A word character, [0-9a-z_A-Z].
1372 * '\W' A non-word character.
1373 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1374 * '\S' A non-whitespace character.
1375 * '\' n A backreference to the nth (n decimal
1376 * and positive) parenthesized expression.
1377 * '\' octal An octal escape sequence (octal must be
1378 * two or three digits long, unless it is
1379 * 0 for the null character).
1380 * '\x' hex A hex escape (hex must be two digits).
1381 * '\u' unicode A unicode escape (must be four digits).
1382 * '\c' ctrl A control character, ctrl is a letter.
1383 * '\' literalatomchar Any character except one of the above
1384 * that follow '\' in an atom.
1385 * otheratomchar Any character not first among the other
1386 * atom right-hand sides.
1389 ParseTerm(CompilerState *state)
1391 WCHAR c = *state->cp++;
1393 UINT num, tmp, n, i;
1394 const WCHAR *termStart;
1397 /* assertions and atoms */
1399 state->result = NewRENode(state, REOP_BOL);
1402 state->progLength++;
1405 state->result = NewRENode(state, REOP_EOL);
1408 state->progLength++;
1411 if (state->cp >= state->cpend) {
1412 /* a trailing '\' is an error */
1413 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1418 /* assertion escapes */
1420 state->result = NewRENode(state, REOP_WBDRY);
1423 state->progLength++;
1426 state->result = NewRENode(state, REOP_WNONBDRY);
1429 state->progLength++;
1431 /* Decimal escape */
1433 /* Give a strict warning. See also the note below. */
1434 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1437 while (state->cp < state->cpend) {
1439 if (c < '0' || '7' < c)
1442 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1449 state->result = NewRENode(state, REOP_FLAT);
1452 state->result->u.flat.chr = c;
1453 state->result->u.flat.length = 1;
1454 state->progLength += 3;
1465 termStart = state->cp - 1;
1466 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1467 if (state->flags & JSREG_FIND_PAREN_ERROR)
1469 if (num == OVERFLOW_VALUE) {
1470 /* Give a strict mode warning. */
1471 WARN("back-reference exceeds number of capturing parentheses\n");
1474 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1475 * error here. However, for compatibility with IE, we treat the
1476 * whole backref as flat if the first character in it is not a
1477 * valid octal character, and as an octal escape otherwise.
1479 state->cp = termStart;
1481 /* Treat this as flat. termStart - 1 is the \. */
1486 /* Treat this as an octal escape. */
1489 assert(1 <= num && num <= 0x10000);
1490 state->result = NewRENode(state, REOP_BACKREF);
1493 state->result->u.parenIndex = num - 1;
1495 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1497 /* Control escape */
1513 /* Control letter */
1515 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1516 c = (WCHAR) (*state->cp++ & 0x1F);
1518 /* back off to accepting the original '\' as a literal */
1523 /* HexEscapeSequence */
1527 /* UnicodeEscapeSequence */
1532 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1535 if (!isASCIIHexDigit(c, &digit)) {
1537 * Back off to accepting the original 'u' or 'x' as a
1544 n = (n << 4) | digit;
1548 /* Character class escapes */
1550 state->result = NewRENode(state, REOP_DIGIT);
1554 state->progLength++;
1557 state->result = NewRENode(state, REOP_NONDIGIT);
1560 state->result = NewRENode(state, REOP_SPACE);
1563 state->result = NewRENode(state, REOP_NONSPACE);
1566 state->result = NewRENode(state, REOP_ALNUM);
1569 state->result = NewRENode(state, REOP_NONALNUM);
1571 /* IdentityEscape */
1573 state->result = NewRENode(state, REOP_FLAT);
1576 state->result->u.flat.chr = c;
1577 state->result->u.flat.length = 1;
1578 state->result->kid = (void *) (state->cp - 1);
1579 state->progLength += 3;
1584 state->result = NewRENode(state, REOP_CLASS);
1587 termStart = state->cp;
1588 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1590 if (state->cp == state->cpend) {
1591 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1592 JSMSG_UNTERM_CLASS, termStart);
1596 if (*state->cp == '\\') {
1598 if (state->cp != state->cpend)
1602 if (*state->cp == ']') {
1603 state->result->u.ucclass.kidlen = state->cp - termStart;
1608 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1609 if (!state->classCache[i].start) {
1610 state->classCache[i].start = termStart;
1611 state->classCache[i].length = state->result->u.ucclass.kidlen;
1612 state->classCache[i].index = state->classCount;
1615 if (state->classCache[i].length ==
1616 state->result->u.ucclass.kidlen) {
1617 for (n = 0; ; n++) {
1618 if (n == state->classCache[i].length) {
1619 state->result->u.ucclass.index
1620 = state->classCache[i].index;
1623 if (state->classCache[i].start[n] != termStart[n])
1628 state->result->u.ucclass.index = state->classCount++;
1632 * Call CalculateBitmapSize now as we want any errors it finds
1633 * to be reported during the parse phase, not at execution.
1635 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1638 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1639 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1640 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1642 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1643 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1644 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1647 state->classBitmapsMem += n;
1648 /* CLASS, <index> */
1650 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1654 state->result = NewRENode(state, REOP_DOT);
1659 const WCHAR *errp = state->cp--;
1662 err = ParseMinMaxQuantifier(state, TRUE);
1673 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1674 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1678 state->result = NewRENode(state, REOP_FLAT);
1681 state->result->u.flat.chr = c;
1682 state->result->u.flat.length = 1;
1683 state->result->kid = (void *) (state->cp - 1);
1684 state->progLength += 3;
1687 return ParseQuantifier(state);
1691 * Top-down regular expression grammar, based closely on Perl4.
1693 * regexp: altern A regular expression is one or more
1694 * altern '|' regexp alternatives separated by vertical bar.
1696 #define INITIAL_STACK_SIZE 128
1699 ParseRegExp(CompilerState *state)
1703 REOpData *operatorStack;
1704 RENode **operandStack;
1707 BOOL result = FALSE;
1709 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1710 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1712 /* Watch out for empty regexp */
1713 if (state->cp == state->cpend) {
1714 state->result = NewRENode(state, REOP_EMPTY);
1715 return (state->result != NULL);
1718 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1722 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1727 parenIndex = state->parenCount;
1728 if (state->cp == state->cpend) {
1730 * If we are at the end of the regexp and we're short one or more
1731 * operands, the regexp must have the form /x|/ or some such, with
1732 * left parentheses making us short more than one operand.
1734 if (operatorSP >= operandSP) {
1735 operand = NewRENode(state, REOP_EMPTY);
1741 switch (*state->cp) {
1744 if (state->cp + 1 < state->cpend &&
1745 *state->cp == '?' &&
1746 (state->cp[1] == '=' ||
1747 state->cp[1] == '!' ||
1748 state->cp[1] == ':')) {
1749 switch (state->cp[1]) {
1752 /* ASSERT, <next>, ... ASSERTTEST */
1753 state->progLength += 4;
1756 op = REOP_ASSERT_NOT;
1757 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1758 state->progLength += 4;
1761 op = REOP_LPARENNON;
1767 /* LPAREN, <index>, ... RPAREN, <index> */
1769 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1770 state->parenCount++;
1771 if (state->parenCount == 65535) {
1772 ReportRegExpError(state, JSREPORT_ERROR,
1773 JSMSG_TOO_MANY_PARENS);
1781 * If there's no stacked open parenthesis, throw syntax error.
1783 for (i = operatorSP - 1; ; i--) {
1785 ReportRegExpError(state, JSREPORT_ERROR,
1786 JSMSG_UNMATCHED_RIGHT_PAREN);
1789 if (operatorStack[i].op == REOP_ASSERT ||
1790 operatorStack[i].op == REOP_ASSERT_NOT ||
1791 operatorStack[i].op == REOP_LPARENNON ||
1792 operatorStack[i].op == REOP_LPAREN) {
1799 /* Expected an operand before these, so make an empty one */
1800 operand = NewRENode(state, REOP_EMPTY);
1806 if (!ParseTerm(state))
1808 operand = state->result;
1810 if (operandSP == operandStackSize) {
1812 operandStackSize += operandStackSize;
1813 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1818 operandStack[operandSP++] = operand;
1823 /* At the end; process remaining operators. */
1825 if (state->cp == state->cpend) {
1826 while (operatorSP) {
1828 if (!ProcessOp(state, &operatorStack[operatorSP],
1829 operandStack, operandSP))
1833 assert(operandSP == 1);
1834 state->result = operandStack[0];
1839 switch (*state->cp) {
1841 /* Process any stacked 'concat' operators */
1843 while (operatorSP &&
1844 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1846 if (!ProcessOp(state, &operatorStack[operatorSP],
1847 operandStack, operandSP)) {
1857 * If there's no stacked open parenthesis, throw syntax error.
1859 for (i = operatorSP - 1; ; i--) {
1861 ReportRegExpError(state, JSREPORT_ERROR,
1862 JSMSG_UNMATCHED_RIGHT_PAREN);
1865 if (operatorStack[i].op == REOP_ASSERT ||
1866 operatorStack[i].op == REOP_ASSERT_NOT ||
1867 operatorStack[i].op == REOP_LPARENNON ||
1868 operatorStack[i].op == REOP_LPAREN) {
1874 /* Process everything on the stack until the open parenthesis. */
1878 switch (operatorStack[operatorSP].op) {
1880 case REOP_ASSERT_NOT:
1882 operand = NewRENode(state, operatorStack[operatorSP].op);
1885 operand->u.parenIndex =
1886 operatorStack[operatorSP].parenIndex;
1888 operand->kid = operandStack[operandSP - 1];
1889 operandStack[operandSP - 1] = operand;
1890 if (state->treeDepth == TREE_DEPTH_MAX) {
1891 ReportRegExpError(state, JSREPORT_ERROR,
1892 JSMSG_REGEXP_TOO_COMPLEX);
1898 case REOP_LPARENNON:
1899 state->result = operandStack[operandSP - 1];
1900 if (!ParseQuantifier(state))
1902 operandStack[operandSP - 1] = state->result;
1903 goto restartOperator;
1905 if (!ProcessOp(state, &operatorStack[operatorSP],
1906 operandStack, operandSP))
1916 const WCHAR *errp = state->cp;
1918 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1920 * This didn't even scan correctly as a quantifier, so we should
1934 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1940 /* Anything else is the start of the next term. */
1943 if (operatorSP == operatorStackSize) {
1945 operatorStackSize += operatorStackSize;
1946 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1949 operatorStack = tmp;
1951 operatorStack[operatorSP].op = op;
1952 operatorStack[operatorSP].errPos = state->cp;
1953 operatorStack[operatorSP++].parenIndex = parenIndex;
1958 heap_free(operatorStack);
1959 heap_free(operandStack);
1964 * Save the current state of the match - the position in the input
1965 * text as well as the position in the bytecode. The state of any
1966 * parent expressions is also saved (preceding state).
1967 * Contents of parenCount parentheses from parenIndex are also saved.
1969 static REBackTrackData *
1970 PushBackTrackState(REGlobalData *gData, REOp op,
1971 jsbytecode *target, REMatchState *x, const WCHAR *cp,
1972 size_t parenIndex, size_t parenCount)
1975 REBackTrackData *result =
1976 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1978 size_t sz = sizeof(REBackTrackData) +
1979 gData->stateStackTop * sizeof(REProgState) +
1980 parenCount * sizeof(RECapture);
1982 ptrdiff_t btsize = gData->backTrackStackSize;
1983 ptrdiff_t btincr = ((char *)result + sz) -
1984 ((char *)gData->backTrackStack + btsize);
1986 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1988 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1990 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1992 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1993 btincr = ((btincr+btsize-1)/btsize)*btsize;
1994 gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1995 if (!gData->backTrackStack) {
1996 js_ReportOutOfScriptQuota(gData->cx);
2000 gData->backTrackStackSize = btsize + btincr;
2001 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2003 gData->backTrackSP = result;
2004 result->sz = gData->cursz;
2007 result->backtrack_op = op;
2008 result->backtrack_pc = target;
2010 result->parenCount = parenCount;
2011 result->parenIndex = parenIndex;
2013 result->saveStateStackTop = gData->stateStackTop;
2014 assert(gData->stateStackTop);
2015 memcpy(result + 1, gData->stateStack,
2016 sizeof(REProgState) * result->saveStateStackTop);
2018 if (parenCount != 0) {
2019 memcpy((char *)(result + 1) +
2020 sizeof(REProgState) * result->saveStateStackTop,
2021 &x->parens[parenIndex],
2022 sizeof(RECapture) * parenCount);
2023 for (i = 0; i != parenCount; i++)
2024 x->parens[parenIndex + i].index = -1;
2030 static inline REMatchState *
2031 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars,
2035 assert(gData->cpend >= x->cp);
2036 if (length > (size_t)(gData->cpend - x->cp))
2038 for (i = 0; i != length; i++) {
2039 if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
2047 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2048 * 2. If E is not a character then go to step 6.
2049 * 3. Let ch be E's character.
2050 * 4. Let A be a one-element RECharSet containing the character ch.
2051 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2052 * 6. E must be an integer. Let n be that integer.
2053 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2054 * 8. Return an internal Matcher closure that takes two arguments, a State x
2055 * and a Continuation c, and performs the following:
2056 * 1. Let cap be x's captures internal array.
2057 * 2. Let s be cap[n].
2058 * 3. If s is undefined, then call c(x) and return its result.
2059 * 4. Let e be x's endIndex.
2060 * 5. Let len be s's length.
2061 * 6. Let f be e+len.
2062 * 7. If f>InputLength, return failure.
2063 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2064 * such that Canonicalize(s[i]) is not the same character as
2065 * Canonicalize(Input [e+i]), then return failure.
2066 * 9. Let y be the State (f, cap).
2067 * 10. Call c(y) and return its result.
2069 static REMatchState *
2070 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2073 const WCHAR *parenContent;
2074 RECapture *cap = &x->parens[parenIndex];
2076 if (cap->index == -1)
2080 if (x->cp + len > gData->cpend)
2083 parenContent = &gData->cpbegin[cap->index];
2084 if (gData->regexp->flags & JSREG_FOLD) {
2085 for (i = 0; i < len; i++) {
2086 if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2090 for (i = 0; i < len; i++) {
2091 if (parenContent[i] != x->cp[i])
2099 /* Add a single character to the RECharSet */
2101 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
2103 UINT byteIndex = (UINT)(c >> 3);
2104 assert(c <= cs->length);
2105 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2109 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2111 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
2115 UINT byteIndex1 = c1 >> 3;
2116 UINT byteIndex2 = c2 >> 3;
2118 assert(c2 <= cs->length && c1 <= c2);
2123 if (byteIndex1 == byteIndex2) {
2124 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2126 cs->u.bits[byteIndex1] |= 0xFF << c1;
2127 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2128 cs->u.bits[i] = 0xFF;
2129 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2133 /* Compile the source of the class into a RECharSet */
2135 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2137 const WCHAR *src, *end;
2138 BOOL inRange = FALSE;
2139 WCHAR rangeStart = 0;
2144 assert(!charSet->converted);
2146 * Assert that startIndex and length points to chars inside [] inside
2149 assert(1 <= charSet->u.src.startIndex);
2150 assert(charSet->u.src.startIndex
2151 < SysStringLen(gData->regexp->source));
2152 assert(charSet->u.src.length <= SysStringLen(gData->regexp->source)
2153 - 1 - charSet->u.src.startIndex);
2155 charSet->converted = TRUE;
2156 src = gData->regexp->source + charSet->u.src.startIndex;
2158 end = src + charSet->u.src.length;
2160 assert(src[-1] == '[' && end[0] == ']');
2162 byteLength = (charSet->length >> 3) + 1;
2163 charSet->u.bits = heap_alloc(byteLength);
2164 if (!charSet->u.bits) {
2165 JS_ReportOutOfMemory(gData->cx);
2169 memset(charSet->u.bits, 0, byteLength);
2175 assert(charSet->sense == FALSE);
2178 assert(charSet->sense == TRUE);
2181 while (src != end) {
2206 if (src < end && JS_ISWORD(*src)) {
2207 thisCh = (WCHAR)(*src++ & 0x1F);
2220 for (i = 0; (i < nDigits) && (src < end); i++) {
2223 if (!isASCIIHexDigit(c, &digit)) {
2225 * Back off to accepting the original '\'
2232 n = (n << 4) | digit;
2245 * This is a non-ECMA extension - decimal escapes (in this
2246 * case, octal!) are supposed to be an error inside class
2247 * ranges, but supported here for backwards compatibility.
2251 if ('0' <= c && c <= '7') {
2253 n = 8 * n + JS7_UNDEC(c);
2255 if ('0' <= c && c <= '7') {
2257 i = 8 * n + JS7_UNDEC(c);
2268 AddCharacterRangeToCharSet(charSet, '0', '9');
2269 continue; /* don't need range processing */
2271 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2272 AddCharacterRangeToCharSet(charSet,
2274 (WCHAR)charSet->length);
2277 for (i = (INT)charSet->length; i >= 0; i--)
2279 AddCharacterToCharSet(charSet, (WCHAR)i);
2282 for (i = (INT)charSet->length; i >= 0; i--)
2284 AddCharacterToCharSet(charSet, (WCHAR)i);
2287 for (i = (INT)charSet->length; i >= 0; i--)
2289 AddCharacterToCharSet(charSet, (WCHAR)i);
2292 for (i = (INT)charSet->length; i >= 0; i--)
2294 AddCharacterToCharSet(charSet, (WCHAR)i);
2309 if (gData->regexp->flags & JSREG_FOLD) {
2312 assert(rangeStart <= thisCh);
2313 for (i = rangeStart; i <= thisCh; i++) {
2316 AddCharacterToCharSet(charSet, i);
2320 AddCharacterToCharSet(charSet, uch);
2322 AddCharacterToCharSet(charSet, dch);
2325 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2329 if (gData->regexp->flags & JSREG_FOLD) {
2330 AddCharacterToCharSet(charSet, toupperW(thisCh));
2331 AddCharacterToCharSet(charSet, tolowerW(thisCh));
2333 AddCharacterToCharSet(charSet, thisCh);
2335 if (src < end - 1) {
2339 rangeStart = thisCh;
2348 ReallocStateStack(REGlobalData *gData)
2350 size_t limit = gData->stateStackLimit;
2351 size_t sz = sizeof(REProgState) * limit;
2353 gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz);
2354 if (!gData->stateStack) {
2355 js_ReportOutOfScriptQuota(gData->cx);
2359 gData->stateStackLimit = limit + limit;
2363 #define PUSH_STATE_STACK(data) \
2365 ++(data)->stateStackTop; \
2366 if ((data)->stateStackTop == (data)->stateStackLimit && \
2367 !ReallocStateStack((data))) { \
2373 * Apply the current op against the given input to see if it's going to match
2374 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2375 * true, then update the current state's cp. Always update startpc to the next
2378 static inline REMatchState *
2379 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2380 jsbytecode **startpc, BOOL updatecp)
2382 REMatchState *result = NULL;
2385 size_t offset, length, index;
2386 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2388 const WCHAR *startcp = x->cp;
2392 const char *opname = reop_names[op];
2393 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2394 (int)gData->stateStackTop * 2, "", opname);
2401 if (x->cp != gData->cpbegin) {
2402 if (/*!gData->cx->regExpStatics.multiline && FIXME !!! */
2403 !(gData->regexp->flags & JSREG_MULTILINE)) {
2406 if (!RE_IS_LINE_TERM(x->cp[-1]))
2412 if (x->cp != gData->cpend) {
2413 if (/*!gData->cx->regExpStatics.multiline &&*/
2414 !(gData->regexp->flags & JSREG_MULTILINE)) {
2417 if (!RE_IS_LINE_TERM(*x->cp))
2423 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2424 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2429 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2430 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2435 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2441 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2447 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2453 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2459 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2465 if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2471 if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2477 pc = ReadCompactIndex(pc, &parenIndex);
2478 assert(parenIndex < gData->regexp->parenCount);
2479 result = BackrefMatcher(gData, x, parenIndex);
2482 pc = ReadCompactIndex(pc, &offset);
2483 assert(offset < SysStringLen(gData->regexp->source));
2484 pc = ReadCompactIndex(pc, &length);
2485 assert(1 <= length);
2486 assert(length <= SysStringLen(gData->regexp->source) - offset);
2487 if (length <= (size_t)(gData->cpend - x->cp)) {
2488 source = gData->regexp->source + offset;
2489 TRACE("%s\n", debugstr_wn(source, length));
2490 for (index = 0; index != length; index++) {
2491 if (source[index] != x->cp[index])
2500 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2501 if (x->cp != gData->cpend && *x->cp == matchCh) {
2507 pc = ReadCompactIndex(pc, &offset);
2508 assert(offset < SysStringLen(gData->regexp->source));
2509 pc = ReadCompactIndex(pc, &length);
2510 assert(1 <= length);
2511 assert(length <= SysStringLen(gData->regexp->source) - offset);
2512 source = gData->regexp->source;
2513 result = FlatNIMatcher(gData, x, source + offset, length);
2517 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2523 matchCh = GET_ARG(pc);
2524 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2526 if (x->cp != gData->cpend && *x->cp == matchCh) {
2532 matchCh = GET_ARG(pc);
2534 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2540 pc = ReadCompactIndex(pc, &index);
2541 assert(index < gData->regexp->classCount);
2542 if (x->cp != gData->cpend) {
2543 charSet = &gData->regexp->classList[index];
2544 assert(charSet->converted);
2547 if (charSet->length != 0 &&
2548 ch <= charSet->length &&
2549 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2556 pc = ReadCompactIndex(pc, &index);
2557 assert(index < gData->regexp->classCount);
2558 if (x->cp != gData->cpend) {
2559 charSet = &gData->regexp->classList[index];
2560 assert(charSet->converted);
2563 if (charSet->length == 0 ||
2564 ch > charSet->length ||
2565 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2586 static inline REMatchState *
2587 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2589 REMatchState *result = NULL;
2590 REBackTrackData *backTrackData;
2591 jsbytecode *nextpc, *testpc;
2594 REProgState *curState;
2595 const WCHAR *startcp;
2596 size_t parenIndex, k;
2597 size_t parenSoFar = 0;
2599 WCHAR matchCh1, matchCh2;
2603 jsbytecode *pc = gData->regexp->program;
2604 REOp op = (REOp) *pc++;
2607 * If the first node is a simple match, step the index into the string
2608 * until that match is made, or fail if it can't be found at all.
2610 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
2612 while (x->cp <= gData->cpend) {
2613 nextpc = pc; /* reset back to start each time */
2614 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2618 pc = nextpc; /* accept skip to next opcode */
2620 assert(op < REOP_LIMIT);
2631 const char *opname = reop_names[op];
2632 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2633 (int)gData->stateStackTop * 2, "", opname);
2635 if (REOP_IS_SIMPLE(op)) {
2636 result = SimpleMatch(gData, x, op, &pc, TRUE);
2638 curState = &gData->stateStack[gData->stateStackTop];
2642 case REOP_ALTPREREQ2:
2643 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2645 matchCh2 = GET_ARG(pc);
2650 if (x->cp != gData->cpend) {
2651 if (*x->cp == matchCh2)
2654 charSet = &gData->regexp->classList[k];
2655 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2659 if ((charSet->length == 0 ||
2660 matchCh1 > charSet->length ||
2661 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2669 case REOP_ALTPREREQ:
2670 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2672 matchCh1 = GET_ARG(pc);
2674 matchCh2 = GET_ARG(pc);
2676 if (x->cp == gData->cpend ||
2677 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2681 /* else false thru... */
2685 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2686 pc += ARG_LEN; /* start of this alternate */
2687 curState->parenSoFar = parenSoFar;
2688 PUSH_STATE_STACK(gData);
2691 if (REOP_IS_SIMPLE(op)) {
2692 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2693 op = (REOp) *nextpc++;
2700 nextop = (REOp) *nextpc++;
2701 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2706 * Occurs at (successful) end of REOP_ALT,
2710 * If we have not gotten a result here, it is because of an
2711 * empty match. Do the same thing REOP_EMPTY would do.
2716 --gData->stateStackTop;
2717 pc += GET_OFFSET(pc);
2722 * Occurs at last (successful) end of REOP_ALT,
2726 * If we have not gotten a result here, it is because of an
2727 * empty match. Do the same thing REOP_EMPTY would do.
2732 --gData->stateStackTop;
2737 pc = ReadCompactIndex(pc, &parenIndex);
2738 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2739 assert(parenIndex < gData->regexp->parenCount);
2740 if (parenIndex + 1 > parenSoFar)
2741 parenSoFar = parenIndex + 1;
2742 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2743 x->parens[parenIndex].length = 0;
2751 pc = ReadCompactIndex(pc, &parenIndex);
2752 assert(parenIndex < gData->regexp->parenCount);
2753 cap = &x->parens[parenIndex];
2754 delta = x->cp - (gData->cpbegin + cap->index);
2755 cap->length = (delta < 0) ? 0 : (size_t) delta;
2763 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2764 pc += ARG_LEN; /* start of ASSERT child */
2767 if (REOP_IS_SIMPLE(op) &&
2768 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2772 curState->u.assertion.top =
2773 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2774 curState->u.assertion.sz = gData->cursz;
2775 curState->index = x->cp - gData->cpbegin;
2776 curState->parenSoFar = parenSoFar;
2777 PUSH_STATE_STACK(gData);
2778 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2779 nextpc, x, x->cp, 0, 0)) {
2784 case REOP_ASSERT_NOT:
2785 nextpc = pc + GET_OFFSET(pc);
2789 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2790 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2791 *testpc == REOP_ASSERTNOTTEST) {
2795 curState->u.assertion.top
2796 = (char *)gData->backTrackSP -
2797 (char *)gData->backTrackStack;
2798 curState->u.assertion.sz = gData->cursz;
2799 curState->index = x->cp - gData->cpbegin;
2800 curState->parenSoFar = parenSoFar;
2801 PUSH_STATE_STACK(gData);
2802 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2803 nextpc, x, x->cp, 0, 0)) {
2808 case REOP_ASSERTTEST:
2809 --gData->stateStackTop;
2811 x->cp = gData->cpbegin + curState->index;
2812 gData->backTrackSP =
2813 (REBackTrackData *) ((char *)gData->backTrackStack +
2814 curState->u.assertion.top);
2815 gData->cursz = curState->u.assertion.sz;
2820 case REOP_ASSERTNOTTEST:
2821 --gData->stateStackTop;
2823 x->cp = gData->cpbegin + curState->index;
2824 gData->backTrackSP =
2825 (REBackTrackData *) ((char *)gData->backTrackStack +
2826 curState->u.assertion.top);
2827 gData->cursz = curState->u.assertion.sz;
2828 result = (!result) ? x : NULL;
2831 curState->u.quantifier.min = 0;
2832 curState->u.quantifier.max = (UINT)-1;
2835 curState->u.quantifier.min = 1;
2836 curState->u.quantifier.max = (UINT)-1;
2839 curState->u.quantifier.min = 0;
2840 curState->u.quantifier.max = 1;
2843 pc = ReadCompactIndex(pc, &k);
2844 curState->u.quantifier.min = k;
2845 pc = ReadCompactIndex(pc, &k);
2846 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2847 curState->u.quantifier.max = k - 1;
2848 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2850 if (curState->u.quantifier.max == 0) {
2851 pc = pc + GET_OFFSET(pc);
2856 /* Step over <next> */
2857 nextpc = pc + ARG_LEN;
2858 op = (REOp) *nextpc++;
2860 if (REOP_IS_SIMPLE(op)) {
2861 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2862 if (curState->u.quantifier.min == 0)
2866 pc = pc + GET_OFFSET(pc);
2869 op = (REOp) *nextpc++;
2872 curState->index = startcp - gData->cpbegin;
2873 curState->continue_op = REOP_REPEAT;
2874 curState->continue_pc = pc;
2875 curState->parenSoFar = parenSoFar;
2876 PUSH_STATE_STACK(gData);
2877 if (curState->u.quantifier.min == 0 &&
2878 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2885 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2886 pc = curState[-1].continue_pc;
2887 op = (REOp) curState[-1].continue_op;
2896 --gData->stateStackTop;
2898 /* Failed, see if we have enough children. */
2899 if (curState->u.quantifier.min == 0)
2903 if (curState->u.quantifier.min == 0 &&
2904 x->cp == gData->cpbegin + curState->index) {
2905 /* matched an empty string, that'll get us nowhere */
2909 if (curState->u.quantifier.min != 0)
2910 curState->u.quantifier.min--;
2911 if (curState->u.quantifier.max != (UINT) -1)
2912 curState->u.quantifier.max--;
2913 if (curState->u.quantifier.max == 0)
2915 nextpc = pc + ARG_LEN;
2916 nextop = (REOp) *nextpc;
2918 if (REOP_IS_SIMPLE(nextop)) {
2920 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2921 if (curState->u.quantifier.min == 0)
2928 curState->index = startcp - gData->cpbegin;
2929 PUSH_STATE_STACK(gData);
2930 if (curState->u.quantifier.min == 0 &&
2931 !PushBackTrackState(gData, REOP_REPEAT,
2933 curState->parenSoFar,
2935 curState->parenSoFar)) {
2938 } while (*nextpc == REOP_ENDCHILD);
2941 parenSoFar = curState->parenSoFar;
2946 pc += GET_OFFSET(pc);
2949 case REOP_MINIMALSTAR:
2950 curState->u.quantifier.min = 0;
2951 curState->u.quantifier.max = (UINT)-1;
2952 goto minimalquantcommon;
2953 case REOP_MINIMALPLUS:
2954 curState->u.quantifier.min = 1;
2955 curState->u.quantifier.max = (UINT)-1;
2956 goto minimalquantcommon;
2957 case REOP_MINIMALOPT:
2958 curState->u.quantifier.min = 0;
2959 curState->u.quantifier.max = 1;
2960 goto minimalquantcommon;
2961 case REOP_MINIMALQUANT:
2962 pc = ReadCompactIndex(pc, &k);
2963 curState->u.quantifier.min = k;
2964 pc = ReadCompactIndex(pc, &k);
2965 /* See REOP_QUANT comments about k - 1. */
2966 curState->u.quantifier.max = k - 1;
2967 assert(curState->u.quantifier.min
2968 <= curState->u.quantifier.max);
2970 curState->index = x->cp - gData->cpbegin;
2971 curState->parenSoFar = parenSoFar;
2972 PUSH_STATE_STACK(gData);
2973 if (curState->u.quantifier.min != 0) {
2974 curState->continue_op = REOP_MINIMALREPEAT;
2975 curState->continue_pc = pc;
2976 /* step over <next> */
2980 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
2981 pc, x, x->cp, 0, 0)) {
2984 --gData->stateStackTop;
2985 pc = pc + GET_OFFSET(pc);
2990 case REOP_MINIMALREPEAT:
2991 --gData->stateStackTop;
2994 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2995 #define PREPARE_REPEAT() \
2997 curState->index = x->cp - gData->cpbegin; \
2998 curState->continue_op = REOP_MINIMALREPEAT; \
2999 curState->continue_pc = pc; \
3001 for (k = curState->parenSoFar; k < parenSoFar; k++) \
3002 x->parens[k].index = -1; \
3003 PUSH_STATE_STACK(gData); \
3004 op = (REOp) *pc++; \
3005 assert(op < REOP_LIMIT); \
3011 * Non-greedy failure - try to consume another child.
3013 if (curState->u.quantifier.max == (UINT) -1 ||
3014 curState->u.quantifier.max > 0) {
3018 /* Don't need to adjust pc since we're going to pop. */
3021 if (curState->u.quantifier.min == 0 &&
3022 x->cp == gData->cpbegin + curState->index) {
3023 /* Matched an empty string, that'll get us nowhere. */
3027 if (curState->u.quantifier.min != 0)
3028 curState->u.quantifier.min--;
3029 if (curState->u.quantifier.max != (UINT) -1)
3030 curState->u.quantifier.max--;
3031 if (curState->u.quantifier.min != 0) {
3035 curState->index = x->cp - gData->cpbegin;
3036 curState->parenSoFar = parenSoFar;
3037 PUSH_STATE_STACK(gData);
3038 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3040 curState->parenSoFar,
3041 parenSoFar - curState->parenSoFar)) {
3044 --gData->stateStackTop;
3045 pc = pc + GET_OFFSET(pc);
3047 assert(op < REOP_LIMIT);
3057 * If the match failed and there's a backtrack option, take it.
3058 * Otherwise this is a complete and utter failure.
3061 if (gData->cursz == 0)
3064 /* Potentially detect explosive regex here. */
3065 gData->backTrackCount++;
3066 if (gData->backTrackLimit &&
3067 gData->backTrackCount >= gData->backTrackLimit) {
3068 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3069 JSMSG_REGEXP_TOO_COMPLEX);
3074 backTrackData = gData->backTrackSP;
3075 gData->cursz = backTrackData->sz;
3076 gData->backTrackSP =
3077 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3078 x->cp = backTrackData->cp;
3079 pc = backTrackData->backtrack_pc;
3080 op = (REOp) backTrackData->backtrack_op;
3081 assert(op < REOP_LIMIT);
3082 gData->stateStackTop = backTrackData->saveStateStackTop;
3083 assert(gData->stateStackTop);
3085 memcpy(gData->stateStack, backTrackData + 1,
3086 sizeof(REProgState) * backTrackData->saveStateStackTop);
3087 curState = &gData->stateStack[gData->stateStackTop - 1];
3089 if (backTrackData->parenCount) {
3090 memcpy(&x->parens[backTrackData->parenIndex],
3091 (char *)(backTrackData + 1) +
3092 sizeof(REProgState) * backTrackData->saveStateStackTop,
3093 sizeof(RECapture) * backTrackData->parenCount);
3094 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3096 for (k = curState->parenSoFar; k < parenSoFar; k++)
3097 x->parens[k].index = -1;
3098 parenSoFar = curState->parenSoFar;
3101 TRACE("\tBT_Pop: %ld,%ld\n",
3102 (ULONG_PTR)backTrackData->parenIndex,
3103 (ULONG_PTR)backTrackData->parenCount);
3109 * Continue with the expression.
3112 assert(op < REOP_LIMIT);
3124 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
3126 REMatchState *result;
3127 const WCHAR *cp = x->cp;
3132 * Have to include the position beyond the last character
3133 * in order to detect end-of-input/line condition.
3135 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3136 gData->skipped = cp2 - cp;
3138 for (j = 0; j < gData->regexp->parenCount; j++)
3139 x->parens[j].index = -1;
3140 result = ExecuteREBytecode(gData, x);
3141 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
3143 gData->backTrackSP = gData->backTrackStack;
3145 gData->stateStackTop = 0;
3146 cp2 = cp + gData->skipped;
3151 #define MIN_BACKTRACK_LIMIT 400000
3153 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
3155 REMatchState *result;
3158 gData->backTrackStackSize = INITIAL_BACKTRACK;
3159 gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK);
3160 if (!gData->backTrackStack)
3163 gData->backTrackSP = gData->backTrackStack;
3165 gData->backTrackCount = 0;
3166 gData->backTrackLimit = 0;
3168 gData->stateStackLimit = INITIAL_STATESTACK;
3169 gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3170 if (!gData->stateStack)
3173 gData->stateStackTop = 0;
3178 result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
3182 for (i = 0; i < re->classCount; i++) {
3183 if (!re->classList[i].converted &&
3184 !ProcessCharSet(gData, &re->classList[i])) {
3192 js_ReportOutOfScriptQuota(cx);
3198 js_DestroyRegExp(JSRegExp *re)
3200 if (re->classList) {
3202 for (i = 0; i < re->classCount; i++) {
3203 if (re->classList[i].converted)
3204 heap_free(re->classList[i].u.bits);
3205 re->classList[i].u.bits = NULL;
3207 heap_free(re->classList);
3213 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat)
3217 CompilerState state;
3224 mark = jsheap_mark(&cx->tmp_heap);
3225 len = SysStringLen(str);
3231 state.cpbegin = state.cp;
3232 state.cpend = state.cp + len;
3233 state.flags = flags;
3234 state.parenCount = 0;
3235 state.classCount = 0;
3236 state.progLength = 0;
3237 state.treeDepth = 0;
3238 state.classBitmapsMem = 0;
3239 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3240 state.classCache[i].start = NULL;
3242 if (len != 0 && flat) {
3243 state.result = NewRENode(&state, REOP_FLAT);
3246 state.result->u.flat.chr = *state.cpbegin;
3247 state.result->u.flat.length = len;
3248 state.result->kid = (void *) state.cpbegin;
3249 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3250 state.progLength += 1 + GetCompactIndexWidth(0)
3251 + GetCompactIndexWidth(len);
3253 if (!ParseRegExp(&state))
3256 resize = offsetof(JSRegExp, program) + state.progLength + 1;
3257 re = heap_alloc(resize);
3261 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3262 re->classCount = state.classCount;
3263 if (re->classCount) {
3264 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3265 if (!re->classList) {
3266 js_DestroyRegExp(re);
3270 for (i = 0; i < re->classCount; i++)
3271 re->classList[i].converted = FALSE;
3273 re->classList = NULL;
3275 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3277 js_DestroyRegExp(re);
3281 *endPC++ = REOP_END;
3283 * Check whether size was overestimated and shrink using realloc.
3284 * This is safe since no pointers to newly parsed regexp or its parts
3285 * besides re exist here.
3287 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3289 assert((size_t)(endPC - re->program) < state.progLength + 1);
3290 resize = offsetof(JSRegExp, program) + (endPC - re->program);
3291 tmp = heap_realloc(re, resize);
3297 re->parenCount = state.parenCount;
3305 static inline RegExpInstance *regexp_from_vdisp(vdisp_t *vdisp)
3307 return (RegExpInstance*)vdisp->u.jsdisp;
3310 static void set_last_index(RegExpInstance *This, DWORD last_index)
3312 This->last_index = last_index;
3313 VariantClear(&This->last_index_var);
3314 num_set_val(&This->last_index_var, last_index);
3317 static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, DWORD rem_flags,
3318 const WCHAR *str, DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
3319 DWORD *parens_cnt, match_result_t *ret)
3321 REMatchState *x, *result;
3325 gData.cpbegin = *cp;
3326 gData.cpend = str + len;
3327 gData.start = *cp-str;
3329 gData.pool = &ctx->tmp_heap;
3331 x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin);
3333 WARN("InitMatch failed\n");
3338 result = MatchRegExp(&gData, x);
3340 WARN("MatchRegExp failed\n");
3345 if(rem_flags & REM_RESET_INDEX)
3346 set_last_index(regexp, 0);
3351 if(regexp->jsregexp->parenCount > *parens_size) {
3352 match_result_t *new_parens;
3355 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
3357 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
3359 return E_OUTOFMEMORY;
3361 *parens = new_parens;
3365 /* FIXME: We often already have a copy of input string that we could use to store last match */
3366 if(!(rem_flags & REM_NO_CTX_UPDATE) &&
3367 (!ctx->last_match || len != SysStringLen(ctx->last_match) || strncmpW(ctx->last_match, str, len))) {
3370 last_match = SysAllocStringLen(str, len);
3372 return E_OUTOFMEMORY;
3373 SysFreeString(ctx->last_match);
3374 ctx->last_match = last_match;
3380 *parens_cnt = regexp->jsregexp->parenCount;
3382 for(i=0; i < regexp->jsregexp->parenCount; i++) {
3383 if(result->parens[i].index == -1) {
3384 (*parens)[i].str = NULL;
3385 (*parens)[i].len = 0;
3387 (*parens)[i].str = *cp + result->parens[i].index;
3388 (*parens)[i].len = result->parens[i].length;
3393 matchlen = (result->cp-*cp) - gData.skipped;
3395 ret->str = result->cp-matchlen;
3396 ret->len = matchlen;
3397 set_last_index(regexp, result->cp-str);
3399 if(!(rem_flags & REM_NO_CTX_UPDATE)) {
3400 ctx->last_match_index = ret->str-str;
3401 ctx->last_match_length = matchlen;
3407 HRESULT regexp_match_next(script_ctx_t *ctx, jsdisp_t *dispex, DWORD rem_flags, const WCHAR *str,
3408 DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt,
3409 match_result_t *ret)
3411 RegExpInstance *regexp = (RegExpInstance*)dispex;
3415 if((rem_flags & REM_CHECK_GLOBAL) && !(regexp->jsregexp->flags & JSREG_GLOB))
3418 mark = jsheap_mark(&ctx->tmp_heap);
3420 hres = do_regexp_match_next(ctx, regexp, rem_flags, str, len, cp, parens, parens_size, parens_cnt, ret);
3426 HRESULT regexp_match(script_ctx_t *ctx, jsdisp_t *dispex, const WCHAR *str, DWORD len, BOOL gflag,
3427 match_result_t **match_result, DWORD *result_cnt)
3429 RegExpInstance *This = (RegExpInstance*)dispex;
3430 match_result_t *ret = NULL, cres;
3431 const WCHAR *cp = str;
3432 DWORD i=0, ret_size = 0;
3436 mark = jsheap_mark(&ctx->tmp_heap);
3439 hres = do_regexp_match_next(ctx, This, 0, str, len, &cp, NULL, NULL, NULL, &cres);
3440 if(hres == S_FALSE) {
3450 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t));
3452 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
3454 hres = E_OUTOFMEMORY;
3461 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
3473 *match_result = ret;
3478 static HRESULT RegExp_source(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3479 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3484 case DISPATCH_PROPERTYGET: {
3485 RegExpInstance *This = regexp_from_vdisp(jsthis);
3487 V_VT(retv) = VT_BSTR;
3488 V_BSTR(retv) = SysAllocString(This->str);
3490 return E_OUTOFMEMORY;
3494 FIXME("Unimplemnted flags %x\n", flags);
3501 static HRESULT RegExp_global(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3502 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3508 static HRESULT RegExp_ignoreCase(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3509 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3515 static HRESULT RegExp_multiline(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3516 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3522 static INT index_from_var(script_ctx_t *ctx, VARIANT *v)
3528 memset(&ei, 0, sizeof(ei));
3529 hres = to_number(ctx, v, &ei, &num);
3530 if(FAILED(hres)) { /* FIXME: Move ignoring exceptions to to_promitive */
3531 VariantClear(&ei.var);
3535 if(V_VT(&num) == VT_R8) {
3536 DOUBLE d = floor(V_R8(&num));
3537 return (DOUBLE)(INT)d == d ? d : 0;
3543 static HRESULT RegExp_lastIndex(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3544 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3549 case DISPATCH_PROPERTYGET: {
3550 RegExpInstance *regexp = regexp_from_vdisp(jsthis);
3552 V_VT(retv) = VT_EMPTY;
3553 return VariantCopy(retv, ®exp->last_index_var);
3555 case DISPATCH_PROPERTYPUT: {
3556 RegExpInstance *regexp = regexp_from_vdisp(jsthis);
3560 arg = get_arg(dp,0);
3561 hres = VariantCopy(®exp->last_index_var, arg);
3565 regexp->last_index = index_from_var(ctx, arg);
3569 FIXME("unimplemented flags: %x\n", flags);
3576 static HRESULT RegExp_toString(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3577 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3583 static HRESULT create_match_array(script_ctx_t *ctx, BSTR input, const match_result_t *result,
3584 const match_result_t *parens, DWORD parens_cnt, jsexcept_t *ei, IDispatch **ret)
3589 HRESULT hres = S_OK;
3591 static const WCHAR indexW[] = {'i','n','d','e','x',0};
3592 static const WCHAR inputW[] = {'i','n','p','u','t',0};
3593 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
3594 static const WCHAR zeroW[] = {'0',0};
3596 hres = create_array(ctx, parens_cnt+1, &array);
3600 for(i=0; i < parens_cnt; i++) {
3601 V_VT(&var) = VT_BSTR;
3602 V_BSTR(&var) = SysAllocStringLen(parens[i].str, parens[i].len);
3604 hres = E_OUTOFMEMORY;
3608 hres = jsdisp_propput_idx(array, i+1, &var, ei, NULL/*FIXME*/);
3609 SysFreeString(V_BSTR(&var));
3614 while(SUCCEEDED(hres)) {
3616 V_I4(&var) = result->str-input;
3617 hres = jsdisp_propput_name(array, indexW, &var, ei, NULL/*FIXME*/);
3621 V_I4(&var) = result->str-input+result->len;
3622 hres = jsdisp_propput_name(array, lastIndexW, &var, ei, NULL/*FIXME*/);
3626 V_VT(&var) = VT_BSTR;
3627 V_BSTR(&var) = input;
3628 hres = jsdisp_propput_name(array, inputW, &var, ei, NULL/*FIXME*/);
3632 V_BSTR(&var) = SysAllocStringLen(result->str, result->len);
3634 hres = E_OUTOFMEMORY;
3637 hres = jsdisp_propput_name(array, zeroW, &var, ei, NULL/*FIXME*/);
3638 SysFreeString(V_BSTR(&var));
3643 jsdisp_release(array);
3647 *ret = to_disp(array);
3651 static HRESULT run_exec(script_ctx_t *ctx, vdisp_t *jsthis, VARIANT *arg, jsexcept_t *ei, BSTR *input,
3652 match_result_t *match, match_result_t **parens, DWORD *parens_cnt, VARIANT_BOOL *ret)
3654 RegExpInstance *regexp;
3655 DWORD parens_size = 0, last_index = 0, length;
3660 if(!is_vclass(jsthis, JSCLASS_REGEXP)) {
3661 FIXME("Not a RegExp\n");
3665 regexp = regexp_from_vdisp(jsthis);
3668 hres = to_string(ctx, arg, ei, &string);
3671 length = SysStringLen(string);
3677 if(regexp->jsregexp->flags & JSREG_GLOB) {
3678 if(regexp->last_index < 0) {
3679 SysFreeString(string);
3680 set_last_index(regexp, 0);
3681 *ret = VARIANT_FALSE;
3688 last_index = regexp->last_index;
3691 cp = string + last_index;
3692 hres = regexp_match_next(ctx, ®exp->dispex, REM_RESET_INDEX, string, length, &cp, parens,
3693 parens ? &parens_size : NULL, parens_cnt, match);
3695 SysFreeString(string);
3699 *ret = hres == S_OK ? VARIANT_TRUE : VARIANT_FALSE;
3703 SysFreeString(string);
3708 static HRESULT RegExp_exec(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3709 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3711 match_result_t *parens = NULL, match;
3712 DWORD parens_cnt = 0;
3719 hres = run_exec(ctx, jsthis, arg_cnt(dp) ? get_arg(dp,0) : NULL, ei, &string, &match, &parens, &parens_cnt, &b);
3727 hres = create_match_array(ctx, string, &match, parens, parens_cnt, ei, &ret);
3728 if(SUCCEEDED(hres)) {
3729 V_VT(retv) = VT_DISPATCH;
3730 V_DISPATCH(retv) = ret;
3733 V_VT(retv) = VT_NULL;
3738 SysFreeString(string);
3742 static HRESULT RegExp_test(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3743 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3745 match_result_t match;
3755 V_VT(&undef_var) = VT_BSTR;
3756 V_BSTR(&undef_var) = SysAllocString(undefinedW);
3757 if(!V_BSTR(&undef_var))
3758 return E_OUTOFMEMORY;
3761 hres = run_exec(ctx, jsthis, argc ? get_arg(dp,0) : &undef_var, ei, NULL, &match, NULL, NULL, &b);
3763 SysFreeString(V_BSTR(&undef_var));
3768 V_VT(retv) = VT_BOOL;
3774 static HRESULT RegExp_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
3775 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
3781 return throw_type_error(ctx, ei, JS_E_FUNCTION_EXPECTED, NULL);
3783 FIXME("unimplemented flags %x\n", flags);
3790 static void RegExp_destructor(jsdisp_t *dispex)
3792 RegExpInstance *This = (RegExpInstance*)dispex;
3795 js_DestroyRegExp(This->jsregexp);
3796 VariantClear(&This->last_index_var);
3797 SysFreeString(This->str);
3801 static const builtin_prop_t RegExp_props[] = {
3802 {execW, RegExp_exec, PROPF_METHOD|1},
3803 {globalW, RegExp_global, 0},
3804 {ignoreCaseW, RegExp_ignoreCase, 0},
3805 {lastIndexW, RegExp_lastIndex, 0},
3806 {multilineW, RegExp_multiline, 0},
3807 {sourceW, RegExp_source, 0},
3808 {testW, RegExp_test, PROPF_METHOD|1},
3809 {toStringW, RegExp_toString, PROPF_METHOD}
3812 static const builtin_info_t RegExp_info = {
3814 {NULL, RegExp_value, 0},
3815 sizeof(RegExp_props)/sizeof(*RegExp_props),
3821 static HRESULT alloc_regexp(script_ctx_t *ctx, jsdisp_t *object_prototype, RegExpInstance **ret)
3823 RegExpInstance *regexp;
3826 regexp = heap_alloc_zero(sizeof(RegExpInstance));
3828 return E_OUTOFMEMORY;
3830 if(object_prototype)
3831 hres = init_dispex(®exp->dispex, ctx, &RegExp_info, object_prototype);
3833 hres = init_dispex_from_constr(®exp->dispex, ctx, &RegExp_info, ctx->regexp_constr);
3844 HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, jsdisp_t **ret)
3846 RegExpInstance *regexp;
3849 TRACE("%s %x\n", debugstr_w(exp), flags);
3851 hres = alloc_regexp(ctx, NULL, ®exp);
3856 regexp->str = SysAllocString(exp);
3858 regexp->str = SysAllocStringLen(exp, len);
3860 jsdisp_release(®exp->dispex);
3861 return E_OUTOFMEMORY;
3864 regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE);
3865 if(!regexp->jsregexp) {
3866 WARN("js_NewRegExp failed\n");
3867 jsdisp_release(®exp->dispex);
3871 V_VT(®exp->last_index_var) = VT_I4;
3872 V_I4(®exp->last_index_var) = 0;
3874 *ret = ®exp->dispex;
3878 HRESULT create_regexp_var(script_ctx_t *ctx, VARIANT *src_arg, VARIANT *flags_arg, jsdisp_t **ret)
3880 const WCHAR *opt = emptyW, *src;
3884 if(V_VT(src_arg) == VT_DISPATCH) {
3887 obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(src_arg));
3889 if(is_class(obj, JSCLASS_REGEXP)) {
3890 RegExpInstance *regexp = (RegExpInstance*)obj;
3892 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, ret);
3893 jsdisp_release(obj);
3897 jsdisp_release(obj);
3901 if(V_VT(src_arg) != VT_BSTR) {
3902 FIXME("flags_arg = %s\n", debugstr_variant(flags_arg));
3906 src = V_BSTR(src_arg);
3909 if(V_VT(flags_arg) != VT_BSTR) {
3910 FIXME("unimplemented for vt %d\n", V_VT(flags_arg));
3914 opt = V_BSTR(flags_arg);
3917 hres = parse_regexp_flags(opt, strlenW(opt), &flags);
3921 return create_regexp(ctx, src, -1, flags, ret);
3924 HRESULT regexp_string_match(script_ctx_t *ctx, jsdisp_t *re, BSTR str,
3925 VARIANT *retv, jsexcept_t *ei)
3927 static const WCHAR indexW[] = {'i','n','d','e','x',0};
3928 static const WCHAR inputW[] = {'i','n','p','u','t',0};
3929 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
3931 RegExpInstance *regexp = (RegExpInstance*)re;
3932 match_result_t *match_result;
3933 DWORD match_cnt, i, length;
3938 length = SysStringLen(str);
3940 if(!(regexp->jsregexp->flags & JSREG_GLOB)) {
3941 match_result_t match, *parens = NULL;
3942 DWORD parens_cnt, parens_size = 0;
3943 const WCHAR *cp = str;
3945 hres = regexp_match_next(ctx, ®exp->dispex, 0, str, length, &cp, &parens, &parens_size, &parens_cnt, &match);
3953 hres = create_match_array(ctx, str, &match, parens, parens_cnt, ei, &ret);
3954 if(SUCCEEDED(hres)) {
3955 V_VT(retv) = VT_DISPATCH;
3956 V_DISPATCH(retv) = ret;
3959 V_VT(retv) = VT_NULL;
3967 hres = regexp_match(ctx, ®exp->dispex, str, length, FALSE, &match_result, &match_cnt);
3972 TRACE("no match\n");
3975 V_VT(retv) = VT_NULL;
3979 hres = create_array(ctx, match_cnt, &array);
3983 V_VT(&var) = VT_BSTR;
3985 for(i=0; i < match_cnt; i++) {
3986 V_BSTR(&var) = SysAllocStringLen(match_result[i].str, match_result[i].len);
3988 hres = E_OUTOFMEMORY;
3992 hres = jsdisp_propput_idx(array, i, &var, ei, NULL/*FIXME*/);
3993 SysFreeString(V_BSTR(&var));
3998 while(SUCCEEDED(hres)) {
4000 V_I4(&var) = match_result[match_cnt-1].str-str;
4001 hres = jsdisp_propput_name(array, indexW, &var, ei, NULL/*FIXME*/);
4005 V_I4(&var) = match_result[match_cnt-1].str-str+match_result[match_cnt-1].len;
4006 hres = jsdisp_propput_name(array, lastIndexW, &var, ei, NULL/*FIXME*/);
4010 V_VT(&var) = VT_BSTR;
4012 hres = jsdisp_propput_name(array, inputW, &var, ei, NULL/*FIXME*/);
4016 heap_free(match_result);
4018 if(SUCCEEDED(hres) && retv)
4019 var_set_jsdisp(retv, array);
4021 jsdisp_release(array);
4025 static HRESULT RegExpConstr_leftContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4026 DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
4031 case DISPATCH_PROPERTYGET: {
4034 ret = SysAllocStringLen(ctx->last_match, ctx->last_match_index);
4036 return E_OUTOFMEMORY;
4038 V_VT(retv) = VT_BSTR;
4041 case DISPATCH_PROPERTYPUT:
4044 FIXME("unsupported flags\n");
4051 static HRESULT RegExpConstr_rightContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
4052 DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
4057 case DISPATCH_PROPERTYGET: {
4060 ret = SysAllocString(ctx->last_match+ctx->last_match_index+ctx->last_match_length);
4062 return E_OUTOFMEMORY;
4064 V_VT(retv) = VT_BSTR;
4067 case DISPATCH_PROPERTYPUT:
4070 FIXME("unsupported flags\n");
4077 static HRESULT RegExpConstr_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
4078 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
4083 case DISPATCH_METHOD:
4085 VARIANT *arg = get_arg(dp,0);
4086 if(V_VT(arg) == VT_DISPATCH) {
4087 jsdisp_t *jsdisp = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg));
4089 if(is_class(jsdisp, JSCLASS_REGEXP)) {
4090 if(arg_cnt(dp) > 1 && V_VT(get_arg(dp,1)) != VT_EMPTY) {
4091 jsdisp_release(jsdisp);
4092 return throw_regexp_error(ctx, ei, JS_E_REGEXP_SYNTAX, NULL);
4096 var_set_jsdisp(retv, jsdisp);
4098 jsdisp_release(jsdisp);
4101 jsdisp_release(jsdisp);
4106 case DISPATCH_CONSTRUCT: {
4115 hres = create_regexp_var(ctx, get_arg(dp,0), arg_cnt(dp) > 1 ? get_arg(dp,1) : NULL, &ret);
4120 var_set_jsdisp(retv, ret);
4122 jsdisp_release(ret);
4126 FIXME("unimplemented flags: %x\n", flags);
4133 static const builtin_prop_t RegExpConstr_props[] = {
4134 {leftContextW, RegExpConstr_leftContext, 0},
4135 {rightContextW, RegExpConstr_rightContext, 0}
4138 static const builtin_info_t RegExpConstr_info = {
4140 {NULL, Function_value, 0},
4141 sizeof(RegExpConstr_props)/sizeof(*RegExpConstr_props),
4147 HRESULT create_regexp_constr(script_ctx_t *ctx, jsdisp_t *object_prototype, jsdisp_t **ret)
4149 RegExpInstance *regexp;
4152 static const WCHAR RegExpW[] = {'R','e','g','E','x','p',0};
4154 hres = alloc_regexp(ctx, object_prototype, ®exp);
4158 hres = create_builtin_function(ctx, RegExpConstr_value, RegExpW, &RegExpConstr_info,
4159 PROPF_CONSTR|2, ®exp->dispex, ret);
4161 jsdisp_release(®exp->dispex);
4165 HRESULT parse_regexp_flags(const WCHAR *str, DWORD str_len, DWORD *ret)
4170 for (p = str; p < str+str_len; p++) {
4173 flags |= JSREG_GLOB;
4176 flags |= JSREG_FOLD;
4179 flags |= JSREG_MULTILINE;
4182 flags |= JSREG_STICKY;
4185 WARN("wrong flag %c\n", *p);