summaryrefslogtreecommitdiffstats
path: root/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c')
-rw-r--r--src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c
new file mode 100644
index 000000000..ace607e60
--- /dev/null
+++ b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_heap_stringcache.c
@@ -0,0 +1,298 @@
+/*
+ * String cache.
+ *
+ * Provides a cache to optimize indexed string lookups. The cache keeps
+ * track of (byte offset, char offset) states for a fixed number of strings.
+ * Otherwise we'd need to scan from either end of the string, as we store
+ * strings in (extended) UTF-8.
+ */
+
+#include "duk_internal.h"
+
+/*
+ * Delete references to given hstring from the heap string cache.
+ *
+ * String cache references are 'weak': they are not counted towards
+ * reference counts, nor serve as roots for mark-and-sweep. When an
+ * object is about to be freed, such references need to be removed.
+ */
+
+DUK_INTERNAL void duk_heap_strcache_string_remove(duk_heap *heap, duk_hstring *h) {
+ duk_small_int_t i;
+ for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
+ duk_strcache *c = heap->strcache + i;
+ if (c->h == h) {
+ DUK_DD(DUK_DDPRINT("deleting weak strcache reference to hstring %p from heap %p",
+ (void *) h, (void *) heap));
+ c->h = NULL;
+
+ /* XXX: the string shouldn't appear twice, but we now loop to the
+ * end anyway; if fixed, add a looping assertion to ensure there
+ * is no duplicate.
+ */
+ }
+ }
+}
+
+/*
+ * String scanning helpers
+ *
+ * All bytes other than UTF-8 continuation bytes ([0x80,0xbf]) are
+ * considered to contribute a character. This must match how string
+ * character length is computed.
+ */
+
+DUK_LOCAL const duk_uint8_t *duk__scan_forwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
+ while (n > 0) {
+ for (;;) {
+ p++;
+ if (p >= q) {
+ return NULL;
+ }
+ if ((*p & 0xc0) != 0x80) {
+ break;
+ }
+ }
+ n--;
+ }
+ return p;
+}
+
+DUK_LOCAL const duk_uint8_t *duk__scan_backwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
+ while (n > 0) {
+ for (;;) {
+ p--;
+ if (p < q) {
+ return NULL;
+ }
+ if ((*p & 0xc0) != 0x80) {
+ break;
+ }
+ }
+ n--;
+ }
+ return p;
+}
+
+/*
+ * Convert char offset to byte offset
+ *
+ * Avoid using the string cache if possible: for ASCII strings byte and
+ * char offsets are equal and for short strings direct scanning may be
+ * better than using the string cache (which may evict a more important
+ * entry).
+ *
+ * Typing now assumes 32-bit string byte/char offsets (duk_uint_fast32_t).
+ * Better typing might be to use duk_size_t.
+ */
+
+DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) {
+ duk_heap *heap;
+ duk_strcache *sce;
+ duk_uint_fast32_t byte_offset;
+ duk_small_int_t i;
+ duk_bool_t use_cache;
+ duk_uint_fast32_t dist_start, dist_end, dist_sce;
+ const duk_uint8_t *p_start;
+ const duk_uint8_t *p_end;
+ const duk_uint8_t *p_found;
+
+ if (char_offset > DUK_HSTRING_GET_CHARLEN(h)) {
+ goto error;
+ }
+
+ /*
+ * For ASCII strings, the answer is simple.
+ */
+
+ if (DUK_HSTRING_IS_ASCII(h)) {
+ /* clen == blen -> pure ascii */
+ return char_offset;
+ }
+
+ /*
+ * For non-ASCII strings, we need to scan forwards or backwards
+ * from some starting point. The starting point may be the start
+ * or end of the string, or some cached midpoint in the string
+ * cache.
+ *
+ * For "short" strings we simply scan without checking or updating
+ * the cache. For longer strings we check and update the cache as
+ * necessary, inserting a new cache entry if none exists.
+ */
+
+ DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld",
+ (void *) h, (long) char_offset,
+ (long) DUK_HSTRING_GET_CHARLEN(h),
+ (long) DUK_HSTRING_GET_BYTELEN(h)));
+
+ heap = thr->heap;
+ sce = NULL;
+ use_cache = (DUK_HSTRING_GET_CHARLEN(h) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT);
+
+ if (use_cache) {
+#ifdef DUK_USE_DDDPRINT
+ DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):"));
+ for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
+ duk_strcache *c = heap->strcache + i;
+ DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
+ (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
+ }
+#endif
+
+ for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
+ duk_strcache *c = heap->strcache + i;
+
+ if (c->h == h) {
+ sce = c;
+ break;
+ }
+ }
+ }
+
+ /*
+ * Scan from shortest distance:
+ * - start of string
+ * - end of string
+ * - cache entry (if exists)
+ */
+
+ DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset);
+ dist_start = char_offset;
+ dist_end = DUK_HSTRING_GET_CHARLEN(h) - char_offset;
+ dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */
+
+ p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h);
+ p_end = (const duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h));
+ p_found = NULL;
+
+ if (sce) {
+ if (char_offset >= sce->cidx) {
+ dist_sce = char_offset - sce->cidx;
+ if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
+ DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
+ "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
+ "scan forwards from sce",
+ (long) use_cache, (void *) (sce ? sce->h : NULL),
+ (sce ? (long) sce->cidx : (long) -1),
+ (sce ? (long) sce->bidx : (long) -1),
+ (long) dist_start, (long) dist_end, (long) dist_sce));
+
+ p_found = duk__scan_forwards(p_start + sce->bidx,
+ p_end,
+ dist_sce);
+ goto scan_done;
+ }
+ } else {
+ dist_sce = sce->cidx - char_offset;
+ if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
+ DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
+ "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
+ "scan backwards from sce",
+ (long) use_cache, (void *) (sce ? sce->h : NULL),
+ (sce ? (long) sce->cidx : (long) -1),
+ (sce ? (long) sce->bidx : (long) -1),
+ (long) dist_start, (long) dist_end, (long) dist_sce));
+
+ p_found = duk__scan_backwards(p_start + sce->bidx,
+ p_start,
+ dist_sce);
+ goto scan_done;
+ }
+ }
+ }
+
+ /* no sce, or sce scan not best */
+
+ if (dist_start <= dist_end) {
+ DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
+ "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
+ "scan forwards from string start",
+ (long) use_cache, (void *) (sce ? sce->h : NULL),
+ (sce ? (long) sce->cidx : (long) -1),
+ (sce ? (long) sce->bidx : (long) -1),
+ (long) dist_start, (long) dist_end, (long) dist_sce));
+
+ p_found = duk__scan_forwards(p_start,
+ p_end,
+ dist_start);
+ } else {
+ DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
+ "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
+ "scan backwards from string end",
+ (long) use_cache, (void *) (sce ? sce->h : NULL),
+ (sce ? (long) sce->cidx : (long) -1),
+ (sce ? (long) sce->bidx : (long) -1),
+ (long) dist_start, (long) dist_end, (long) dist_sce));
+
+ p_found = duk__scan_backwards(p_end,
+ p_start,
+ dist_end);
+ }
+
+ scan_done:
+
+ if (!p_found) {
+ /* Scan error: this shouldn't normally happen; it could happen if
+ * string is not valid UTF-8 data, and clen/blen are not consistent
+ * with the scanning algorithm.
+ */
+ goto error;
+ }
+
+ DUK_ASSERT(p_found >= p_start);
+ DUK_ASSERT(p_found <= p_end); /* may be equal */
+ byte_offset = (duk_uint32_t) (p_found - p_start);
+
+ DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld",
+ (void *) h, (long) char_offset, (long) byte_offset));
+
+ /*
+ * Update cache entry (allocating if necessary), and move the
+ * cache entry to the first place (in an "LRU" policy).
+ */
+
+ if (use_cache) {
+ /* update entry, allocating if necessary */
+ if (!sce) {
+ sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */
+ sce->h = h;
+ }
+ DUK_ASSERT(sce != NULL);
+ sce->bidx = (duk_uint32_t) (p_found - p_start);
+ sce->cidx = (duk_uint32_t) char_offset;
+
+ /* LRU: move our entry to first */
+ if (sce > &heap->strcache[0]) {
+ /*
+ * A C
+ * B A
+ * C <- sce ==> B
+ * D D
+ */
+ duk_strcache tmp;
+
+ tmp = *sce;
+ DUK_MEMMOVE((void *) (&heap->strcache[1]),
+ (const void *) (&heap->strcache[0]),
+ (size_t) (((char *) sce) - ((char *) &heap->strcache[0])));
+ heap->strcache[0] = tmp;
+
+ /* 'sce' points to the wrong entry here, but is no longer used */
+ }
+#ifdef DUK_USE_DDDPRINT
+ DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):"));
+ for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
+ duk_strcache *c = heap->strcache + i;
+ DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
+ (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
+ }
+#endif
+ }
+
+ return byte_offset;
+
+ error:
+ DUK_ERROR_INTERNAL_DEFMSG(thr);
+ return 0;
+}