From 8541f94e2ced1af23ee3dc41961fe891986d449b Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Tue, 23 Apr 2013 15:02:37 +0200 Subject: [PATCH] jscript: Store concatenated strings as a rope string to avoid useless copying. --- dlls/jscript/jsstr.c | 204 +++++++++++++++++++++++++++++++++++++++++-- dlls/jscript/jsstr.h | 107 ++++++++++++++++++++--- 2 files changed, 290 insertions(+), 21 deletions(-) diff --git a/dlls/jscript/jsstr.c b/dlls/jscript/jsstr.c index a9dfc49d87..0d0eb0fef6 100644 --- a/dlls/jscript/jsstr.c +++ b/dlls/jscript/jsstr.c @@ -16,13 +16,54 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +#include + #include "jscript.h" #include "wine/debug.h" +/* + * This is the length of a string that is considered to be long enough to be + * worth the rope to avoid copy. + * This probably could be tuned, but keep it low for a while to better test rope's code. + */ +#define JSSTR_SHORT_STRING_LENGTH 8 + +/* + * This is the max rope depth. While faster to allocate, ropes may become slow at access. + */ +#define JSSTR_MAX_ROPE_DEPTH 100 + const char *debugstr_jsstr(jsstr_t *str) { - return debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str)); + return jsstr_is_inline(str) ? debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str)) + : jsstr_is_heap(str) ? debugstr_wn(jsstr_as_heap(str)->buf, jsstr_length(str)) + : wine_dbg_sprintf("%s...", debugstr_jsstr(jsstr_as_rope(str)->left)); +} + +void jsstr_free(jsstr_t *str) +{ + switch(jsstr_tag(str)) { + case JSSTR_HEAP: + heap_free(jsstr_as_heap(str)->buf); + break; + case JSSTR_ROPE: { + jsstr_rope_t *rope = jsstr_as_rope(str); + jsstr_release(rope->left); + jsstr_release(rope->right); + break; + } + case JSSTR_INLINE: + break; + } + + heap_free(str); +} + +static inline void jsstr_init(jsstr_t *str, unsigned len, jsstr_tag_t tag) +{ + str->length_flags = len << JSSTR_LENGTH_SHIFT | tag; + str->ref = 1; } WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r) @@ -36,8 +77,7 @@ WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r) if(!ret) return NULL; - ret->str.length_flags = len << JSSTR_LENGTH_SHIFT; - ret->str.ref = 1; + jsstr_init(&ret->str, len, JSSTR_INLINE); ret->buf[len] = 0; *r = &ret->str; return ret->buf; @@ -55,17 +95,116 @@ jsstr_t *jsstr_alloc_len(const WCHAR *buf, unsigned len) return ret; } +static void jsstr_rope_extract(jsstr_rope_t *str, unsigned off, unsigned len, WCHAR *buf) +{ + unsigned left_len = jsstr_length(str->left); + + if(left_len <= off) { + jsstr_extract(str->right, off-left_len, len, buf); + }else if(left_len >= len+off) { + jsstr_extract(str->left, off, len, buf); + }else { + left_len -= off; + jsstr_extract(str->left, off, left_len, buf); + jsstr_extract(str->right, 0, len-left_len, buf+left_len); + } +} + +void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf) +{ + switch(jsstr_tag(str)) { + case JSSTR_INLINE: + memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR)); + return; + case JSSTR_HEAP: + memcpy(buf, jsstr_as_heap(str)->buf+off, len*sizeof(WCHAR)); + return; + case JSSTR_ROPE: + return jsstr_rope_extract(jsstr_as_rope(str), off, len, buf); + } +} + +static int jsstr_cmp_str(jsstr_t *jsstr, const WCHAR *str, unsigned len) +{ + int ret; + + switch(jsstr_tag(jsstr)) { + case JSSTR_INLINE: + ret = memcmp(jsstr_as_inline(jsstr)->buf, str, len*sizeof(WCHAR)); + return ret || jsstr_length(jsstr) == len ? ret : 1; + case JSSTR_HEAP: + ret = memcmp(jsstr_as_heap(jsstr)->buf, str, len*sizeof(WCHAR)); + return ret || jsstr_length(jsstr) == len ? ret : 1; + case JSSTR_ROPE: { + jsstr_rope_t *rope = jsstr_as_rope(jsstr); + unsigned left_len = jsstr_length(rope->left); + + ret = jsstr_cmp_str(rope->left, str, min(len, left_len)); + if(ret || len <= left_len) + return ret; + return jsstr_cmp_str(rope->right, str+left_len, len-left_len); + } + } + + assert(0); + return 0; +} + +#define TMP_BUF_SIZE 256 + +static int ropes_cmp(jsstr_rope_t *left, jsstr_rope_t *right) +{ + WCHAR left_buf[TMP_BUF_SIZE], right_buf[TMP_BUF_SIZE]; + unsigned left_len = jsstr_length(&left->str); + unsigned right_len = jsstr_length(&right->str); + unsigned cmp_off = 0, cmp_size; + int ret; + + /* FIXME: We can avoid temporary buffers here. */ + while(cmp_off < min(left_len, right_len)) { + cmp_size = min(left_len, right_len) - cmp_off; + if(cmp_size > TMP_BUF_SIZE) + cmp_size = TMP_BUF_SIZE; + + jsstr_rope_extract(left, cmp_off, cmp_size, left_buf); + jsstr_rope_extract(right, cmp_off, cmp_size, right_buf); + ret = memcmp(left_buf, right_buf, cmp_size); + if(ret) + return ret; + + cmp_off += cmp_size; + } + + return left_len - right_len; +} + +static inline const WCHAR *jsstr_try_flat(jsstr_t *str) +{ + return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf + : jsstr_is_heap(str) ? jsstr_as_heap(str)->buf + : NULL; +} + int jsstr_cmp(jsstr_t *str1, jsstr_t *str2) { - int len1 = jsstr_length(str1); - int len2 = jsstr_length(str2); + unsigned len1 = jsstr_length(str1); + unsigned len2 = jsstr_length(str2); + const WCHAR *str; int ret; - ret = memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, min(len1, len2)*sizeof(WCHAR)); - if(!ret) - ret = len1 - len2; + str = jsstr_try_flat(str2); + if(str) { + ret = jsstr_cmp_str(str1, str, min(len1, len2)); + return ret || len1 == len2 ? ret : -1; + } - return ret; + str = jsstr_try_flat(str1); + if(str) { + ret = jsstr_cmp_str(str2, str, min(len1, len2)); + return ret || len1 == len2 ? -ret : 1; + } + + return ropes_cmp(jsstr_as_rope(str1), jsstr_as_rope(str2)); } jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) @@ -82,6 +221,31 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) if(!len2) return jsstr_addref(str1); + if(len1 + len2 >= JSSTR_SHORT_STRING_LENGTH) { + unsigned depth, depth2; + jsstr_rope_t *rope; + + depth = jsstr_is_rope(str1) ? jsstr_as_rope(str1)->depth : 0; + depth2 = jsstr_is_rope(str2) ? jsstr_as_rope(str2)->depth : 0; + if(depth2 > depth) + depth = depth2; + + if(depth++ < JSSTR_MAX_ROPE_DEPTH) { + if(len1+len2 > JSSTR_MAX_LENGTH) + return NULL; + + rope = heap_alloc(sizeof(*rope)); + if(!rope) + return NULL; + + jsstr_init(&rope->str, len1+len2, JSSTR_ROPE); + rope->left = jsstr_addref(str1); + rope->right = jsstr_addref(str2); + rope->depth = depth; + return &rope->str; + } + } + ptr = jsstr_alloc_buf(len1+len2, &ret); if(!ret) return NULL; @@ -89,6 +253,28 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) jsstr_flush(str1, ptr); jsstr_flush(str2, ptr+len1); return ret; + +} + +C_ASSERT(sizeof(jsstr_heap_t) <= sizeof(jsstr_rope_t)); + +const WCHAR *jsstr_rope_flatten(jsstr_rope_t *str) +{ + WCHAR *buf; + + buf = heap_alloc((jsstr_length(&str->str)+1) * sizeof(WCHAR)); + if(!buf) + return NULL; + + jsstr_flush(str->left, buf); + jsstr_flush(str->right, buf+jsstr_length(str->left)); + buf[jsstr_length(&str->str)] = 0; + + /* Trasform to heap string */ + jsstr_release(str->left); + jsstr_release(str->right); + str->str.length_flags |= JSSTR_FLAG_FLAT; + return jsstr_as_heap(&str->str)->buf = buf; } static jsstr_t *empty_str, *nan_str, *undefined_str; diff --git a/dlls/jscript/jsstr.h b/dlls/jscript/jsstr.h index 4d005dceeb..727b2d007a 100644 --- a/dlls/jscript/jsstr.h +++ b/dlls/jscript/jsstr.h @@ -16,6 +16,23 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ +/* + * This is a common header for all string representations. The exact layout of the string + * representation may be: + * + * - inline string - string bytes directly follow string headers. + * - heap string - a structure containing a pointer to buffer on the heap. + * - roper string - a product of concatenation of two strings. Instead of copying whole + * buffers, we may store just references to concatenated strings. + * + * String layout may change over life time of the string. Currently possible transformation + * is when a rope string becomes a heap stream. That happens when we need a real, linear + * zero-terminated buffer (a flat buffer). At this point the type of the string is changed + * and the new buffer is stored in the string, so that subsequent operations requiring + * a flat string won't need to flatten it again. + * + * In the future more layouts and transformations may be added. + */ struct _jsstr_t { unsigned length_flags; unsigned ref; @@ -25,18 +42,60 @@ struct _jsstr_t { #define JSSTR_MAX_LENGTH (1 << (32-JSSTR_LENGTH_SHIFT)) #define JSSTR_FLAGS_MASK ((1 << JSSTR_LENGTH_SHIFT)-1) -#define JSSTR_FLAG_NULLBSTR 1 +#define JSSTR_FLAG_NULLBSTR 4 + +#define JSSTR_FLAG_LBIT 1 +#define JSSTR_FLAG_FLAT 2 +#define JSSTR_FLAG_TAG_MASK 3 + +typedef enum { + JSSTR_INLINE = JSSTR_FLAG_FLAT, + JSSTR_HEAP = JSSTR_FLAG_FLAT|JSSTR_FLAG_LBIT, + JSSTR_ROPE = JSSTR_FLAG_LBIT +} jsstr_tag_t; static inline unsigned jsstr_length(jsstr_t *str) { return str->length_flags >> JSSTR_LENGTH_SHIFT; } +static inline jsstr_tag_t jsstr_tag(jsstr_t *str) +{ + return str->length_flags & JSSTR_FLAG_TAG_MASK; +} + +static inline BOOL jsstr_is_inline(jsstr_t *str) +{ + return jsstr_tag(str) == JSSTR_INLINE; +} + +static inline BOOL jsstr_is_heap(jsstr_t *str) +{ + return jsstr_tag(str) == JSSTR_HEAP; +} + +static inline BOOL jsstr_is_rope(jsstr_t *str) +{ + return jsstr_tag(str) == JSSTR_ROPE; +} + typedef struct { jsstr_t str; WCHAR buf[1]; } jsstr_inline_t; +typedef struct { + jsstr_t str; + WCHAR *buf; +} jsstr_heap_t; + +typedef struct { + jsstr_t str; + jsstr_t *left; + jsstr_t *right; + unsigned depth; +} jsstr_rope_t; + jsstr_t *jsstr_alloc_len(const WCHAR*,unsigned) DECLSPEC_HIDDEN; WCHAR *jsstr_alloc_buf(unsigned,jsstr_t**) DECLSPEC_HIDDEN; @@ -45,10 +104,16 @@ static inline jsstr_t *jsstr_alloc(const WCHAR *str) return jsstr_alloc_len(str, strlenW(str)); } +void jsstr_free(jsstr_t*) DECLSPEC_HIDDEN; + static inline void jsstr_release(jsstr_t *str) { - if(!--str->ref) - heap_free(str); + if(!--str->ref) { + if(jsstr_is_inline(str)) + heap_free(str); + else + jsstr_free(str); + } } static inline jsstr_t *jsstr_addref(jsstr_t *str) @@ -62,27 +127,39 @@ static inline jsstr_inline_t *jsstr_as_inline(jsstr_t *str) return CONTAINING_RECORD(str, jsstr_inline_t, str); } -/* This will be failable in the future. */ -static inline const WCHAR *jsstr_flatten(jsstr_t *str) +static inline jsstr_heap_t *jsstr_as_heap(jsstr_t *str) { - return jsstr_as_inline(str)->buf; + return CONTAINING_RECORD(str, jsstr_heap_t, str); } -static inline BOOL jsstr_eq(jsstr_t *str1, jsstr_t *str2) +static inline jsstr_rope_t *jsstr_as_rope(jsstr_t *str) { - unsigned len = jsstr_length(str1); - return len == jsstr_length(str2) && !memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, len*sizeof(WCHAR)); + return CONTAINING_RECORD(str, jsstr_rope_t, str); } -static inline void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf) +const WCHAR *jsstr_rope_flatten(jsstr_rope_t*) DECLSPEC_HIDDEN; + +static inline const WCHAR *jsstr_flatten(jsstr_t *str) { - memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR)); + return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf + : jsstr_is_heap(str) ? jsstr_as_heap(str)->buf + : jsstr_rope_flatten(jsstr_as_rope(str)); } +void jsstr_extract(jsstr_t*,unsigned,unsigned,WCHAR*) DECLSPEC_HIDDEN; + static inline unsigned jsstr_flush(jsstr_t *str, WCHAR *buf) { unsigned len = jsstr_length(str); - memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR)); + if(jsstr_is_inline(str)) { + memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR)); + }else if(jsstr_is_heap(str)) { + memcpy(buf, jsstr_as_heap(str)->buf, len*sizeof(WCHAR)); + }else { + jsstr_rope_t *rope = jsstr_as_rope(str); + jsstr_flush(rope->left, buf); + jsstr_flush(rope->right, buf+jsstr_length(rope->left)); + } return len; } @@ -98,6 +175,12 @@ static inline jsstr_t *jsstr_substr(jsstr_t *str, unsigned off, unsigned len) } int jsstr_cmp(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN; + +static inline BOOL jsstr_eq(jsstr_t *left, jsstr_t *right) +{ + return jsstr_length(left) == jsstr_length(right) && !jsstr_cmp(left, right); +} + jsstr_t *jsstr_concat(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN; jsstr_t *jsstr_nan(void) DECLSPEC_HIDDEN; -- 2.32.0.93.g670b81a890