diff options
Diffstat (limited to 'src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c')
-rw-r--r-- | src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c new file mode 100644 index 000000000..ace607e60 --- /dev/null +++ b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c @@ -0,0 +1,298 @@ +/* + * String cache. + * + * Provides a cache to optimize indexed string lookups. The cache keeps + * track of (byte offset, char offset) states for a fixed number of strings. + * Otherwise we'd need to scan from either end of the string, as we store + * strings in (extended) UTF-8. + */ + +#include "duk_internal.h" + +/* + * Delete references to given hstring from the heap string cache. + * + * String cache references are 'weak': they are not counted towards + * reference counts, nor serve as roots for mark-and-sweep. When an + * object is about to be freed, such references need to be removed. + */ + +DUK_INTERNAL void duk_heap_strcache_string_remove(duk_heap *heap, duk_hstring *h) { + duk_small_int_t i; + for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { + duk_strcache *c = heap->strcache + i; + if (c->h == h) { + DUK_DD(DUK_DDPRINT("deleting weak strcache reference to hstring %p from heap %p", + (void *) h, (void *) heap)); + c->h = NULL; + + /* XXX: the string shouldn't appear twice, but we now loop to the + * end anyway; if fixed, add a looping assertion to ensure there + * is no duplicate. + */ + } + } +} + +/* + * String scanning helpers + * + * All bytes other than UTF-8 continuation bytes ([0x80,0xbf]) are + * considered to contribute a character. This must match how string + * character length is computed. + */ + +DUK_LOCAL const duk_uint8_t *duk__scan_forwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) { + while (n > 0) { + for (;;) { + p++; + if (p >= q) { + return NULL; + } + if ((*p & 0xc0) != 0x80) { + break; + } + } + n--; + } + return p; +} + +DUK_LOCAL const duk_uint8_t *duk__scan_backwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) { + while (n > 0) { + for (;;) { + p--; + if (p < q) { + return NULL; + } + if ((*p & 0xc0) != 0x80) { + break; + } + } + n--; + } + return p; +} + +/* + * Convert char offset to byte offset + * + * Avoid using the string cache if possible: for ASCII strings byte and + * char offsets are equal and for short strings direct scanning may be + * better than using the string cache (which may evict a more important + * entry). + * + * Typing now assumes 32-bit string byte/char offsets (duk_uint_fast32_t). + * Better typing might be to use duk_size_t. + */ + +DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) { + duk_heap *heap; + duk_strcache *sce; + duk_uint_fast32_t byte_offset; + duk_small_int_t i; + duk_bool_t use_cache; + duk_uint_fast32_t dist_start, dist_end, dist_sce; + const duk_uint8_t *p_start; + const duk_uint8_t *p_end; + const duk_uint8_t *p_found; + + if (char_offset > DUK_HSTRING_GET_CHARLEN(h)) { + goto error; + } + + /* + * For ASCII strings, the answer is simple. + */ + + if (DUK_HSTRING_IS_ASCII(h)) { + /* clen == blen -> pure ascii */ + return char_offset; + } + + /* + * For non-ASCII strings, we need to scan forwards or backwards + * from some starting point. The starting point may be the start + * or end of the string, or some cached midpoint in the string + * cache. + * + * For "short" strings we simply scan without checking or updating + * the cache. For longer strings we check and update the cache as + * necessary, inserting a new cache entry if none exists. + */ + + DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld", + (void *) h, (long) char_offset, + (long) DUK_HSTRING_GET_CHARLEN(h), + (long) DUK_HSTRING_GET_BYTELEN(h))); + + heap = thr->heap; + sce = NULL; + use_cache = (DUK_HSTRING_GET_CHARLEN(h) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT); + + if (use_cache) { +#ifdef DUK_USE_DDDPRINT + DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):")); + for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { + duk_strcache *c = heap->strcache + i; + DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", + (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); + } +#endif + + for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { + duk_strcache *c = heap->strcache + i; + + if (c->h == h) { + sce = c; + break; + } + } + } + + /* + * Scan from shortest distance: + * - start of string + * - end of string + * - cache entry (if exists) + */ + + DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset); + dist_start = char_offset; + dist_end = DUK_HSTRING_GET_CHARLEN(h) - char_offset; + dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */ + + p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h); + p_end = (const duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h)); + p_found = NULL; + + if (sce) { + if (char_offset >= sce->cidx) { + dist_sce = char_offset - sce->cidx; + if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { + DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " + "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " + "scan forwards from sce", + (long) use_cache, (void *) (sce ? sce->h : NULL), + (sce ? (long) sce->cidx : (long) -1), + (sce ? (long) sce->bidx : (long) -1), + (long) dist_start, (long) dist_end, (long) dist_sce)); + + p_found = duk__scan_forwards(p_start + sce->bidx, + p_end, + dist_sce); + goto scan_done; + } + } else { + dist_sce = sce->cidx - char_offset; + if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) { + DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " + "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " + "scan backwards from sce", + (long) use_cache, (void *) (sce ? sce->h : NULL), + (sce ? (long) sce->cidx : (long) -1), + (sce ? (long) sce->bidx : (long) -1), + (long) dist_start, (long) dist_end, (long) dist_sce)); + + p_found = duk__scan_backwards(p_start + sce->bidx, + p_start, + dist_sce); + goto scan_done; + } + } + } + + /* no sce, or sce scan not best */ + + if (dist_start <= dist_end) { + DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " + "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " + "scan forwards from string start", + (long) use_cache, (void *) (sce ? sce->h : NULL), + (sce ? (long) sce->cidx : (long) -1), + (sce ? (long) sce->bidx : (long) -1), + (long) dist_start, (long) dist_end, (long) dist_sce)); + + p_found = duk__scan_forwards(p_start, + p_end, + dist_start); + } else { + DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, " + "dist_start=%ld, dist_end=%ld, dist_sce=%ld => " + "scan backwards from string end", + (long) use_cache, (void *) (sce ? sce->h : NULL), + (sce ? (long) sce->cidx : (long) -1), + (sce ? (long) sce->bidx : (long) -1), + (long) dist_start, (long) dist_end, (long) dist_sce)); + + p_found = duk__scan_backwards(p_end, + p_start, + dist_end); + } + + scan_done: + + if (!p_found) { + /* Scan error: this shouldn't normally happen; it could happen if + * string is not valid UTF-8 data, and clen/blen are not consistent + * with the scanning algorithm. + */ + goto error; + } + + DUK_ASSERT(p_found >= p_start); + DUK_ASSERT(p_found <= p_end); /* may be equal */ + byte_offset = (duk_uint32_t) (p_found - p_start); + + DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld", + (void *) h, (long) char_offset, (long) byte_offset)); + + /* + * Update cache entry (allocating if necessary), and move the + * cache entry to the first place (in an "LRU" policy). + */ + + if (use_cache) { + /* update entry, allocating if necessary */ + if (!sce) { + sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */ + sce->h = h; + } + DUK_ASSERT(sce != NULL); + sce->bidx = (duk_uint32_t) (p_found - p_start); + sce->cidx = (duk_uint32_t) char_offset; + + /* LRU: move our entry to first */ + if (sce > &heap->strcache[0]) { + /* + * A C + * B A + * C <- sce ==> B + * D D + */ + duk_strcache tmp; + + tmp = *sce; + DUK_MEMMOVE((void *) (&heap->strcache[1]), + (const void *) (&heap->strcache[0]), + (size_t) (((char *) sce) - ((char *) &heap->strcache[0]))); + heap->strcache[0] = tmp; + + /* 'sce' points to the wrong entry here, but is no longer used */ + } +#ifdef DUK_USE_DDDPRINT + DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):")); + for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) { + duk_strcache *c = heap->strcache + i; + DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld", + (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx)); + } +#endif + } + + return byte_offset; + + error: + DUK_ERROR_INTERNAL_DEFMSG(thr); + return 0; +} |