1 files changed, 1008 insertions, 0 deletions
diff --git a/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_regexp_executor.c b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_regexp_executor.c
new file mode 100644
index 000000000..8139f4cab
--- /dev/null
+++ b/src/civetweb/src/third_party/duktape-1.8.0/src-separate/duk_regexp_executor.c
@@ -0,0 +1,1008 @@
+/*
+ *  Regexp executor.
+ *
+ *  Safety: the Ecmascript executor should prevent user from reading and
+ *  replacing regexp bytecode.  Even so, the executor must validate all
+ *  memory accesses etc.  When an invalid access is detected (e.g. a 'save'
+ *  opcode to invalid, unallocated index) it should fail with an internal
+ *  error but not cause a segmentation fault.
+ *
+ *  Notes:
+ *
+ *    - Backtrack counts are limited to unsigned 32 bits but should
+ *      technically be duk_size_t for strings longer than 4G chars.
+ *      This also requires a regexp bytecode change.
+ */
+
+#include "duk_internal.h"
+
+#ifdef DUK_USE_REGEXP_SUPPORT
+
+/*
+ *  Helpers for UTF-8 handling
+ *
+ *  For bytecode readers the duk_uint32_t and duk_int32_t types are correct
+ *  because they're used for more than just codepoints.
+ */
+
+DUK_LOCAL duk_uint32_t duk__bc_get_u32(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **pc) {
+	return (duk_uint32_t) duk_unicode_decode_xutf8_checked(re_ctx->thr, pc, re_ctx->bytecode, re_ctx->bytecode_end);
+}
+
+DUK_LOCAL duk_int32_t duk__bc_get_i32(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **pc) {
+	duk_uint32_t t;
+
+	/* signed integer encoding needed to work with UTF-8 */
+	t = (duk_uint32_t) duk_unicode_decode_xutf8_checked(re_ctx->thr, pc, re_ctx->bytecode, re_ctx->bytecode_end);
+	if (t & 1) {
+		return -((duk_int32_t) (t >> 1));
+	} else {
+		return (duk_int32_t) (t >> 1);
+	}
+}
+
+DUK_LOCAL const duk_uint8_t *duk__utf8_backtrack(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end, duk_uint_fast32_t count) {
+	const duk_uint8_t *p;
+
+	/* Note: allow backtracking from p == ptr_end */
+	p = *ptr;
+	if (p < ptr_start || p > ptr_end) {
+		goto fail;
+	}
+
+	while (count > 0) {
+		for (;;) {
+			p--;
+			if (p < ptr_start) {
+				goto fail;
+			}
+			if ((*p & 0xc0) != 0x80) {
+				/* utf-8 continuation bytes have the form 10xx xxxx */
+				break;
+			}
+		}
+		count--;
+	}
+	*ptr = p;
+	return p;
+
+ fail:
+	DUK_ERROR_INTERNAL_DEFMSG(thr);
+	return NULL;  /* never here */
+}
+
+DUK_LOCAL const duk_uint8_t *duk__utf8_advance(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end, duk_uint_fast32_t count) {
+	const duk_uint8_t *p;
+
+	p = *ptr;
+	if (p < ptr_start || p >= ptr_end) {
+		goto fail;
+	}
+
+	while (count > 0) {
+		for (;;) {
+			p++;
+
+			/* Note: if encoding ends by hitting end of input, we don't check that
+			 * the encoding is valid, we just assume it is.
+			 */
+			if (p >= ptr_end || ((*p & 0xc0) != 0x80)) {
+				/* utf-8 continuation bytes have the form 10xx xxxx */
+				break;
+			}
+		}
+		count--;
+	}
+
+	*ptr = p;
+	return p;
+
+ fail:
+	DUK_ERROR_INTERNAL_DEFMSG(thr);
+	return NULL;  /* never here */
+}
+
+/*
+ *  Helpers for dealing with the input string
+ */
+
+/* Get a (possibly canonicalized) input character from current sp.  The input
+ * itself is never modified, and captures always record non-canonicalized
+ * characters even in case-insensitive matching.
+ */
+DUK_LOCAL duk_codepoint_t duk__inp_get_cp(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **sp) {
+	duk_codepoint_t res = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(re_ctx->thr, sp, re_ctx->input, re_ctx->input_end);
+	if (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) {
+		res = duk_unicode_re_canonicalize_char(re_ctx->thr, res);
+	}
+	return res;
+}
+
+DUK_LOCAL const duk_uint8_t *duk__inp_backtrack(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **sp, duk_uint_fast32_t count) {
+	return duk__utf8_backtrack(re_ctx->thr, sp, re_ctx->input, re_ctx->input_end, count);
+}
+
+/* Backtrack utf-8 input and return a (possibly canonicalized) input character. */
+DUK_LOCAL duk_codepoint_t duk__inp_get_prev_cp(duk_re_matcher_ctx *re_ctx, const duk_uint8_t *sp) {
+	/* note: caller 'sp' is intentionally not updated here */
+	(void) duk__inp_backtrack(re_ctx, &sp, (duk_uint_fast32_t) 1);
+	return duk__inp_get_cp(re_ctx, &sp);
+}
+
+/*
+ *  Regexp recursive matching function.
+ *
+ *  Returns 'sp' on successful match (points to character after last matched one),
+ *  NULL otherwise.
+ *
+ *  The C recursion depth limit check is only performed in this function, this
+ *  suffices because the function is present in all true recursion required by
+ *  regexp execution.
+ */
+
+DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const duk_uint8_t *pc, const duk_uint8_t *sp) {
+	if (re_ctx->recursion_depth >= re_ctx->recursion_limit) {
+		DUK_ERROR_RANGE(re_ctx->thr, DUK_STR_REGEXP_EXECUTOR_RECURSION_LIMIT);
+	}
+	re_ctx->recursion_depth++;
+
+	for (;;) {
+		duk_small_int_t op;
+
+		if (re_ctx->steps_count >= re_ctx->steps_limit) {
+			DUK_ERROR_RANGE(re_ctx->thr, DUK_STR_REGEXP_EXECUTOR_STEP_LIMIT);
+		}
+		re_ctx->steps_count++;
+
+		op = (duk_small_int_t) duk__bc_get_u32(re_ctx, &pc);
+
+		DUK_DDD(DUK_DDDPRINT("match: rec=%ld, steps=%ld, pc (after op)=%ld, sp=%ld, op=%ld",
+		                     (long) re_ctx->recursion_depth,
+		                     (long) re_ctx->steps_count,
+		                     (long) (pc - re_ctx->bytecode),
+		                     (long) (sp - re_ctx->input),
+		                     (long) op));
+
+		switch (op) {
+		case DUK_REOP_MATCH: {
+			goto match;
+		}
+		case DUK_REOP_CHAR: {
+			/*
+			 *  Byte-based matching would be possible for case-sensitive
+			 *  matching but not for case-insensitive matching.  So, we
+			 *  match by decoding the input and bytecode character normally.
+			 *
+			 *  Bytecode characters are assumed to be already canonicalized.
+			 *  Input characters are canonicalized automatically by
+			 *  duk__inp_get_cp() if necessary.
+			 *
+			 *  There is no opcode for matching multiple characters.  The
+			 *  regexp compiler has trouble joining strings efficiently
+			 *  during compilation.  See doc/regexp.rst for more discussion.
+			 */
+			duk_codepoint_t c1, c2;
+
+			c1 = (duk_codepoint_t) duk__bc_get_u32(re_ctx, &pc);
+			DUK_ASSERT(!(re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) ||
+			           c1 == duk_unicode_re_canonicalize_char(re_ctx->thr, c1));  /* canonicalized by compiler */
+			if (sp >= re_ctx->input_end) {
+				goto fail;
+			}
+			c2 = duk__inp_get_cp(re_ctx, &sp);
+			DUK_DDD(DUK_DDDPRINT("char match, c1=%ld, c2=%ld", (long) c1, (long) c2));
+			if (c1 != c2) {
+				goto fail;
+			}
+			break;
+		}
+		case DUK_REOP_PERIOD: {
+			duk_codepoint_t c;
+
+			if (sp >= re_ctx->input_end) {
+				goto fail;
+			}
+			c = duk__inp_get_cp(re_ctx, &sp);
+			if (duk_unicode_is_line_terminator(c)) {
+				/* E5 Sections 15.10.2.8, 7.3 */
+				goto fail;
+			}
+			break;
+		}
+		case DUK_REOP_RANGES:
+		case DUK_REOP_INVRANGES: {
+			duk_uint32_t n;
+			duk_codepoint_t c;
+			duk_small_int_t match;
+
+			n = duk__bc_get_u32(re_ctx, &pc);
+			if (sp >= re_ctx->input_end) {
+				goto fail;
+			}
+			c = duk__inp_get_cp(re_ctx, &sp);
+
+			match = 0;
+			while (n) {
+				duk_codepoint_t r1, r2;
+				r1 = (duk_codepoint_t) duk__bc_get_u32(re_ctx, &pc);
+				r2 = (duk_codepoint_t) duk__bc_get_u32(re_ctx, &pc);
+				DUK_DDD(DUK_DDDPRINT("matching ranges/invranges, n=%ld, r1=%ld, r2=%ld, c=%ld",
+				                     (long) n, (long) r1, (long) r2, (long) c));
+				if (c >= r1 && c <= r2) {
+					/* Note: don't bail out early, we must read all the ranges from
+					 * bytecode.  Another option is to skip them efficiently after
+					 * breaking out of here.  Prefer smallest code.
+					 */
+					match = 1;
+				}
+				n--;
+			}
+
+			if (op == DUK_REOP_RANGES) {
+				if (!match) {
+					goto fail;
+				}
+			} else {
+				DUK_ASSERT(op == DUK_REOP_INVRANGES);
+				if (match) {
+					goto fail;
+				}
+			}
+			break;
+		}
+		case DUK_REOP_ASSERT_START: {
+			duk_codepoint_t c;
+
+			if (sp <= re_ctx->input) {
+				break;
+			}
+			if (!(re_ctx->re_flags & DUK_RE_FLAG_MULTILINE)) {
+				goto fail;
+			}
+			c = duk__inp_get_prev_cp(re_ctx, sp);
+			if (duk_unicode_is_line_terminator(c)) {
+				/* E5 Sections 15.10.2.8, 7.3 */
+				break;
+			}
+			goto fail;
+		}
+		case DUK_REOP_ASSERT_END: {
+			duk_codepoint_t c;
+			const duk_uint8_t *tmp_sp;
+
+			if (sp >= re_ctx->input_end) {
+				break;
+			}
+			if (!(re_ctx->re_flags & DUK_RE_FLAG_MULTILINE)) {
+				goto fail;
+			}
+			tmp_sp = sp;
+			c = duk__inp_get_cp(re_ctx, &tmp_sp);
+			if (duk_unicode_is_line_terminator(c)) {
+				/* E5 Sections 15.10.2.8, 7.3 */
+				break;
+			}
+			goto fail;
+		}
+		case DUK_REOP_ASSERT_WORD_BOUNDARY:
+		case DUK_REOP_ASSERT_NOT_WORD_BOUNDARY: {
+			/*
+			 *  E5 Section 15.10.2.6.  The previous and current character
+			 *  should -not- be canonicalized as they are now.  However,
+			 *  canonicalization does not affect the result of IsWordChar()
+			 *  (which depends on Unicode characters never canonicalizing
+			 *  into ASCII characters) so this does not matter.
+			 */
+			duk_small_int_t w1, w2;
+
+			if (sp <= re_ctx->input) {
+				w1 = 0;  /* not a wordchar */
+			} else {
+				duk_codepoint_t c;
+				c = duk__inp_get_prev_cp(re_ctx, sp);
+				w1 = duk_unicode_re_is_wordchar(c);
+			}
+			if (sp >= re_ctx->input_end) {
+				w2 = 0;  /* not a wordchar */
+			} else {
+				const duk_uint8_t *tmp_sp = sp;  /* dummy so sp won't get updated */
+				duk_codepoint_t c;
+				c = duk__inp_get_cp(re_ctx, &tmp_sp);
+				w2 = duk_unicode_re_is_wordchar(c);
+			}
+
+			if (op == DUK_REOP_ASSERT_WORD_BOUNDARY) {
+				if (w1 == w2) {
+					goto fail;
+				}
+			} else {
+				DUK_ASSERT(op == DUK_REOP_ASSERT_NOT_WORD_BOUNDARY);
+				if (w1 != w2) {
+					goto fail;
+				}
+			}
+			break;
+		}
+		case DUK_REOP_JUMP: {
+			duk_int32_t skip;
+
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			pc += skip;
+			break;
+		}
+		case DUK_REOP_SPLIT1: {
+			/* split1: prefer direct execution (no jump) */
+			const duk_uint8_t *sub_sp;
+			duk_int32_t skip;
+
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			sub_sp = duk__match_regexp(re_ctx, pc, sp);
+			if (sub_sp) {
+				sp = sub_sp;
+				goto match;
+			}
+			pc += skip;
+			break;
+		}
+		case DUK_REOP_SPLIT2: {
+			/* split2: prefer jump execution (not direct) */
+			const duk_uint8_t *sub_sp;
+			duk_int32_t skip;
+
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			sub_sp = duk__match_regexp(re_ctx, pc + skip, sp);
+			if (sub_sp) {
+				sp = sub_sp;
+				goto match;
+			}
+			break;
+		}
+		case DUK_REOP_SQMINIMAL: {
+			duk_uint32_t q, qmin, qmax;
+			duk_int32_t skip;
+			const duk_uint8_t *sub_sp;
+
+			qmin = duk__bc_get_u32(re_ctx, &pc);
+			qmax = duk__bc_get_u32(re_ctx, &pc);
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			DUK_DDD(DUK_DDDPRINT("minimal quantifier, qmin=%lu, qmax=%lu, skip=%ld",
+			                     (unsigned long) qmin, (unsigned long) qmax, (long) skip));
+
+			q = 0;
+			while (q <= qmax) {
+				if (q >= qmin) {
+					sub_sp = duk__match_regexp(re_ctx, pc + skip, sp);
+					if (sub_sp) {
+						sp = sub_sp;
+						goto match;
+					}
+				}
+				sub_sp = duk__match_regexp(re_ctx, pc, sp);
+				if (!sub_sp) {
+					break;
+				}
+				sp = sub_sp;
+				q++;
+			}
+			goto fail;
+		}
+		case DUK_REOP_SQGREEDY: {
+			duk_uint32_t q, qmin, qmax, atomlen;
+			duk_int32_t skip;
+			const duk_uint8_t *sub_sp;
+
+			qmin = duk__bc_get_u32(re_ctx, &pc);
+			qmax = duk__bc_get_u32(re_ctx, &pc);
+			atomlen = duk__bc_get_u32(re_ctx, &pc);
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			DUK_DDD(DUK_DDDPRINT("greedy quantifier, qmin=%lu, qmax=%lu, atomlen=%lu, skip=%ld",
+			                     (unsigned long) qmin, (unsigned long) qmax, (unsigned long) atomlen, (long) skip));
+
+			q = 0;
+			while (q < qmax) {
+				sub_sp = duk__match_regexp(re_ctx, pc, sp);
+				if (!sub_sp) {
+					break;
+				}
+				sp = sub_sp;
+				q++;
+			}
+			while (q >= qmin) {
+				sub_sp = duk__match_regexp(re_ctx, pc + skip, sp);
+				if (sub_sp) {
+					sp = sub_sp;
+					goto match;
+				}
+				if (q == qmin) {
+					break;
+				}
+
+				/* Note: if atom were to contain e.g. captures, we would need to
+				 * re-match the atom to get correct captures.  Simply quantifiers
+				 * do not allow captures in their atom now, so this is not an issue.
+				 */
+
+				DUK_DDD(DUK_DDDPRINT("greedy quantifier, backtrack %ld characters (atomlen)",
+				                     (long) atomlen));
+				sp = duk__inp_backtrack(re_ctx, &sp, (duk_uint_fast32_t) atomlen);
+				q--;
+			}
+			goto fail;
+		}
+		case DUK_REOP_SAVE: {
+			duk_uint32_t idx;
+			const duk_uint8_t *old;
+			const duk_uint8_t *sub_sp;
+
+			idx = duk__bc_get_u32(re_ctx, &pc);
+			if (idx >= re_ctx->nsaved) {
+				/* idx is unsigned, < 0 check is not necessary */
+				DUK_D(DUK_DPRINT("internal error, regexp save index insane: idx=%ld", (long) idx));
+				goto internal_error;
+			}
+			old = re_ctx->saved[idx];
+			re_ctx->saved[idx] = sp;
+			sub_sp = duk__match_regexp(re_ctx, pc, sp);
+			if (sub_sp) {
+				sp = sub_sp;
+				goto match;
+			}
+			re_ctx->saved[idx] = old;
+			goto fail;
+		}
+		case DUK_REOP_WIPERANGE: {
+			/* Wipe capture range and save old values for backtracking.
+			 *
+			 * XXX: this typically happens with a relatively small idx_count.
+			 * It might be useful to handle cases where the count is small
+			 * (say <= 8) by saving the values in stack instead.  This would
+			 * reduce memory churn and improve performance, at the cost of a
+			 * slightly higher code footprint.
+			 */
+			duk_uint32_t idx_start, idx_count;
+#ifdef DUK_USE_EXPLICIT_NULL_INIT
+			duk_uint32_t idx_end, idx;
+#endif
+			duk_uint8_t **range_save;
+			const duk_uint8_t *sub_sp;
+
+			idx_start = duk__bc_get_u32(re_ctx, &pc);
+			idx_count = duk__bc_get_u32(re_ctx, &pc);
+			DUK_DDD(DUK_DDDPRINT("wipe saved range: start=%ld, count=%ld -> [%ld,%ld] (captures [%ld,%ld])",
+			                     (long) idx_start, (long) idx_count,
+			                     (long) idx_start, (long) (idx_start + idx_count - 1),
+			                     (long) (idx_start / 2), (long) ((idx_start + idx_count - 1) / 2)));
+			if (idx_start + idx_count > re_ctx->nsaved || idx_count == 0) {
+				/* idx is unsigned, < 0 check is not necessary */
+				DUK_D(DUK_DPRINT("internal error, regexp wipe indices insane: idx_start=%ld, idx_count=%ld",
+				                 (long) idx_start, (long) idx_count));
+				goto internal_error;
+			}
+			DUK_ASSERT(idx_count > 0);
+
+			duk_require_stack((duk_context *) re_ctx->thr, 1);
+			range_save = (duk_uint8_t **) duk_push_fixed_buffer((duk_context *) re_ctx->thr,
+			                                                    sizeof(duk_uint8_t *) * idx_count);
+			DUK_ASSERT(range_save != NULL);
+			DUK_MEMCPY(range_save, re_ctx->saved + idx_start, sizeof(duk_uint8_t *) * idx_count);
+#ifdef DUK_USE_EXPLICIT_NULL_INIT
+			idx_end = idx_start + idx_count;
+			for (idx = idx_start; idx < idx_end; idx++) {
+				re_ctx->saved[idx] = NULL;
+			}
+#else
+			DUK_MEMZERO((void *) (re_ctx->saved + idx_start), sizeof(duk_uint8_t *) * idx_count);
+#endif
+
+			sub_sp = duk__match_regexp(re_ctx, pc, sp);
+			if (sub_sp) {
+				/* match: keep wiped/resaved values */
+				DUK_DDD(DUK_DDDPRINT("match: keep wiped/resaved values [%ld,%ld] (captures [%ld,%ld])",
+				                     (long) idx_start, (long) (idx_start + idx_count - 1),
+			                             (long) (idx_start / 2), (long) ((idx_start + idx_count - 1) / 2)));
+				duk_pop((duk_context *) re_ctx->thr);
+				sp = sub_sp;
+				goto match;
+			}
+
+			/* fail: restore saves */
+			DUK_DDD(DUK_DDDPRINT("fail: restore wiped/resaved values [%ld,%ld] (captures [%ld,%ld])",
+			                     (long) idx_start, (long) (idx_start + idx_count - 1),
+			                     (long) (idx_start / 2), (long) ((idx_start + idx_count - 1) / 2)));
+			DUK_MEMCPY((void *) (re_ctx->saved + idx_start),
+			           (const void *) range_save,
+			           sizeof(duk_uint8_t *) * idx_count);
+			duk_pop((duk_context *) re_ctx->thr);
+			goto fail;
+		}
+		case DUK_REOP_LOOKPOS:
+		case DUK_REOP_LOOKNEG: {
+			/*
+			 *  Needs a save of multiple saved[] entries depending on what range
+			 *  may be overwritten.  Because the regexp parser does no such analysis,
+			 *  we currently save the entire saved array here.  Lookaheads are thus
+			 *  a bit expensive.  Note that the saved array is not needed for just
+			 *  the lookahead sub-match, but for the matching of the entire sequel.
+			 *
+			 *  The temporary save buffer is pushed on to the valstack to handle
+			 *  errors correctly.  Each lookahead causes a C recursion and pushes
+			 *  more stuff on the value stack.  If the C recursion limit is less
+			 *  than the value stack spare, there is no need to check the stack.
+			 *  We do so regardless, just in case.
+			 */
+
+			duk_int32_t skip;
+			duk_uint8_t **full_save;
+			const duk_uint8_t *sub_sp;
+
+			DUK_ASSERT(re_ctx->nsaved > 0);
+
+			duk_require_stack((duk_context *) re_ctx->thr, 1);
+			full_save = (duk_uint8_t **) duk_push_fixed_buffer((duk_context *) re_ctx->thr,
+			                                                   sizeof(duk_uint8_t *) * re_ctx->nsaved);
+			DUK_ASSERT(full_save != NULL);
+			DUK_MEMCPY(full_save, re_ctx->saved, sizeof(duk_uint8_t *) * re_ctx->nsaved);
+
+			skip = duk__bc_get_i32(re_ctx, &pc);
+			sub_sp = duk__match_regexp(re_ctx, pc, sp);
+			if (op == DUK_REOP_LOOKPOS) {
+				if (!sub_sp) {
+					goto lookahead_fail;
+				}
+			} else {
+				if (sub_sp) {
+					goto lookahead_fail;
+				}
+			}
+			sub_sp = duk__match_regexp(re_ctx, pc + skip, sp);
+			if (sub_sp) {
+				/* match: keep saves */
+				duk_pop((duk_context *) re_ctx->thr);
+				sp = sub_sp;
+				goto match;
+			}
+
+			/* fall through */
+
+		 lookahead_fail:
+			/* fail: restore saves */
+			DUK_MEMCPY((void *) re_ctx->saved,
+			           (const void *) full_save,
+			           sizeof(duk_uint8_t *) * re_ctx->nsaved);
+			duk_pop((duk_context *) re_ctx->thr);
+			goto fail;
+		}
+		case DUK_REOP_BACKREFERENCE: {
+			/*
+			 *  Byte matching for back-references would be OK in case-
+			 *  sensitive matching.  In case-insensitive matching we need
+			 *  to canonicalize characters, so back-reference matching needs
+			 *  to be done with codepoints instead.  So, we just decode
+			 *  everything normally here, too.
+			 *
+			 *  Note: back-reference index which is 0 or higher than
+			 *  NCapturingParens (= number of capturing parens in the
+			 *  -entire- regexp) is a compile time error.  However, a
+			 *  backreference referring to a valid capture which has
+			 *  not matched anything always succeeds!  See E5 Section
+			 *  15.10.2.9, step 5, sub-step 3.
+			 */
+			duk_uint32_t idx;
+			const duk_uint8_t *p;
+
+			idx = duk__bc_get_u32(re_ctx, &pc);
+			idx = idx << 1;  /* backref n -> saved indices [n*2, n*2+1] */
+			if (idx < 2 || idx + 1 >= re_ctx->nsaved) {
+				/* regexp compiler should catch these */
+				DUK_D(DUK_DPRINT("internal error, backreference index insane"));
+				goto internal_error;
+			}
+			if (!re_ctx->saved[idx] || !re_ctx->saved[idx+1]) {
+				/* capture is 'undefined', always matches! */
+				DUK_DDD(DUK_DDDPRINT("backreference: saved[%ld,%ld] not complete, always match",
+				                     (long) idx, (long) (idx + 1)));
+				break;
+			}
+			DUK_DDD(DUK_DDDPRINT("backreference: match saved[%ld,%ld]", (long) idx, (long) (idx + 1)));
+
+			p = re_ctx->saved[idx];
+			while (p < re_ctx->saved[idx+1]) {
+				duk_codepoint_t c1, c2;
+
+				/* Note: not necessary to check p against re_ctx->input_end:
+				 * the memory access is checked by duk__inp_get_cp(), while
+				 * valid compiled regexps cannot write a saved[] entry
+				 * which points to outside the string.
+				 */
+				if (sp >= re_ctx->input_end) {
+					goto fail;
+				}
+				c1 = duk__inp_get_cp(re_ctx, &p);
+				c2 = duk__inp_get_cp(re_ctx, &sp);
+				if (c1 != c2) {
+					goto fail;
+				}
+			}
+			break;
+		}
+		default: {
+			DUK_D(DUK_DPRINT("internal error, regexp opcode error: %ld", (long) op));
+			goto internal_error;
+		}
+		}
+	}
+
+ match:
+	re_ctx->recursion_depth--;
+	return sp;
+
+ fail:
+	re_ctx->recursion_depth--;
+	return NULL;
+
+ internal_error:
+	DUK_ERROR_INTERNAL_DEFMSG(re_ctx->thr);
+	return NULL;  /* never here */
+}
+
+/*
+ *  Exposed matcher function which provides the semantics of RegExp.prototype.exec().
+ *
+ *  RegExp.prototype.test() has the same semantics as exec() but does not return the
+ *  result object (which contains the matching string and capture groups).  Currently
+ *  there is no separate test() helper, so a temporary result object is created and
+ *  discarded if test() is needed.  This is intentional, to save code space.
+ *
+ *  Input stack:  [ ... re_obj input ]
+ *  Output stack: [ ... result ]
+ */
+
+DUK_LOCAL void duk__regexp_match_helper(duk_hthread *thr, duk_small_int_t force_global) {
+	duk_context *ctx = (duk_context *) thr;
+	duk_re_matcher_ctx re_ctx;
+	duk_hobject *h_regexp;
+	duk_hstring *h_bytecode;
+	duk_hstring *h_input;
+	duk_uint8_t *p_buf;
+	const duk_uint8_t *pc;
+	const duk_uint8_t *sp;
+	duk_small_int_t match = 0;
+	duk_small_int_t global;
+	duk_uint_fast32_t i;
+	double d;
+	duk_uint32_t char_offset;
+
+	DUK_ASSERT(thr != NULL);
+	DUK_ASSERT(ctx != NULL);
+
+	DUK_DD(DUK_DDPRINT("regexp match: regexp=%!T, input=%!T",
+	                   (duk_tval *) duk_get_tval(ctx, -2),
+	                   (duk_tval *) duk_get_tval(ctx, -1)));
+
+	/*
+	 *  Regexp instance check, bytecode check, input coercion.
+	 *
+	 *  See E5 Section 15.10.6.
+	 */
+
+	/* TypeError if wrong; class check, see E5 Section 15.10.6 */
+	h_regexp = duk_require_hobject_with_class(ctx, -2, DUK_HOBJECT_CLASS_REGEXP);
+	DUK_ASSERT(h_regexp != NULL);
+	DUK_ASSERT(DUK_HOBJECT_GET_CLASS_NUMBER(h_regexp) == DUK_HOBJECT_CLASS_REGEXP);
+	DUK_UNREF(h_regexp);
+
+	duk_to_string(ctx, -1);
+	h_input = duk_get_hstring(ctx, -1);
+	DUK_ASSERT(h_input != NULL);
+
+	duk_get_prop_stridx(ctx, -2, DUK_STRIDX_INT_BYTECODE);  /* [ ... re_obj input ] -> [ ... re_obj input bc ] */
+	h_bytecode = duk_require_hstring(ctx, -1);  /* no regexp instance should exist without a non-configurable bytecode property */
+	DUK_ASSERT(h_bytecode != NULL);
+
+	/*
+	 *  Basic context initialization.
+	 *
+	 *  Some init values are read from the bytecode header
+	 *  whose format is (UTF-8 codepoints):
+	 *
+	 *    uint   flags
+	 *    uint   nsaved (even, 2n+2 where n = num captures)
+	 */
+
+	/* [ ... re_obj input bc ] */
+
+	DUK_MEMZERO(&re_ctx, sizeof(re_ctx));
+
+	re_ctx.thr = thr;
+	re_ctx.input = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
+	re_ctx.input_end = re_ctx.input + DUK_HSTRING_GET_BYTELEN(h_input);
+	re_ctx.bytecode = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_bytecode);
+	re_ctx.bytecode_end = re_ctx.bytecode + DUK_HSTRING_GET_BYTELEN(h_bytecode);
+	re_ctx.saved = NULL;
+	re_ctx.recursion_limit = DUK_USE_REGEXP_EXECUTOR_RECLIMIT;
+	re_ctx.steps_limit = DUK_RE_EXECUTE_STEPS_LIMIT;
+
+	/* read header */
+	pc = re_ctx.bytecode;
+	re_ctx.re_flags = duk__bc_get_u32(&re_ctx, &pc);
+	re_ctx.nsaved = duk__bc_get_u32(&re_ctx, &pc);
+	re_ctx.bytecode = pc;
+
+	DUK_ASSERT(DUK_RE_FLAG_GLOBAL < 0x10000UL);  /* must fit into duk_small_int_t */
+	global = (duk_small_int_t) (force_global | (re_ctx.re_flags & DUK_RE_FLAG_GLOBAL));
+
+	DUK_ASSERT(re_ctx.nsaved >= 2);
+	DUK_ASSERT((re_ctx.nsaved % 2) == 0);
+
+	p_buf = (duk_uint8_t *) duk_push_fixed_buffer(ctx, sizeof(duk_uint8_t *) * re_ctx.nsaved);
+	DUK_UNREF(p_buf);
+	re_ctx.saved = (const duk_uint8_t **) duk_get_buffer(ctx, -1, NULL);
+	DUK_ASSERT(re_ctx.saved != NULL);
+
+	/* [ ... re_obj input bc saved_buf ] */
+
+#if defined(DUK_USE_EXPLICIT_NULL_INIT)
+	for (i = 0; i < re_ctx.nsaved; i++) {
+		re_ctx.saved[i] = (duk_uint8_t *) NULL;
+	}
+#elif defined(DUK_USE_ZERO_BUFFER_DATA)
+	/* buffer is automatically zeroed */
+#else
+	DUK_MEMZERO((void *) p_buf, sizeof(duk_uint8_t *) * re_ctx.nsaved);
+#endif
+
+	DUK_DDD(DUK_DDDPRINT("regexp ctx initialized, flags=0x%08lx, nsaved=%ld, recursion_limit=%ld, steps_limit=%ld",
+	                     (unsigned long) re_ctx.re_flags, (long) re_ctx.nsaved, (long) re_ctx.recursion_limit,
+	                     (long) re_ctx.steps_limit));
+
+	/*
+	 *  Get starting character offset for match, and initialize 'sp' based on it.
+	 *
+	 *  Note: lastIndex is non-configurable so it must be present (we check the
+	 *  internal class of the object above, so we know it is).  User code can set
+	 *  its value to an arbitrary (garbage) value though; E5 requires that lastIndex
+	 *  be coerced to a number before using.  The code below works even if the
+	 *  property is missing: the value will then be coerced to zero.
+	 *
+	 *  Note: lastIndex may be outside Uint32 range even after ToInteger() coercion.
+	 *  For instance, ToInteger(+Infinity) = +Infinity.  We track the match offset
+	 *  as an integer, but pre-check it to be inside the 32-bit range before the loop.
+	 *  If not, the check in E5 Section 15.10.6.2, step 9.a applies.
+	 */
+
+	/* XXX: lastIndex handling produces a lot of asm */
+
+	/* [ ... re_obj input bc saved_buf ] */
+
+	duk_get_prop_stridx(ctx, -4, DUK_STRIDX_LAST_INDEX);  /* -> [ ... re_obj input bc saved_buf lastIndex ] */
+	(void) duk_to_int(ctx, -1);  /* ToInteger(lastIndex) */
+	d = duk_get_number(ctx, -1);  /* integer, but may be +/- Infinite, +/- zero (not NaN, though) */
+	duk_pop(ctx);
+
+	if (global) {
+		if (d < 0.0 || d > (double) DUK_HSTRING_GET_CHARLEN(h_input)) {
+			/* match fail */
+			char_offset = 0;   /* not really necessary */
+			DUK_ASSERT(match == 0);
+			goto match_over;
+		}
+		char_offset = (duk_uint32_t) d;
+	} else {
+		/* lastIndex must be ignored for non-global regexps, but get the
+		 * value for (theoretical) side effects.  No side effects can
+		 * really occur, because lastIndex is a normal property and is
+		 * always non-configurable for RegExp instances.
+		 */
+		char_offset = (duk_uint32_t) 0;
+	}
+
+	sp = re_ctx.input + duk_heap_strcache_offset_char2byte(thr, h_input, char_offset);
+
+	/*
+	 *  Match loop.
+	 *
+	 *  Try matching at different offsets until match found or input exhausted.
+	 */
+
+	/* [ ... re_obj input bc saved_buf ] */
+
+	DUK_ASSERT(match == 0);
+
+	for (;;) {
+		/* char offset in [0, h_input->clen] (both ends inclusive), checked before entry */
+		DUK_ASSERT_DISABLE(char_offset >= 0);
+		DUK_ASSERT(char_offset <= DUK_HSTRING_GET_CHARLEN(h_input));
+
+		/* Note: ctx.steps is intentionally not reset, it applies to the entire unanchored match */
+		DUK_ASSERT(re_ctx.recursion_depth == 0);
+
+		DUK_DDD(DUK_DDDPRINT("attempt match at char offset %ld; %p [%p,%p]",
+		                     (long) char_offset, (const void *) sp,
+		                     (const void *) re_ctx.input, (const void *) re_ctx.input_end));
+
+		/*
+		 *  Note:
+		 *
+		 *    - duk__match_regexp() is required not to longjmp() in ordinary "non-match"
+		 *      conditions; a longjmp() will terminate the entire matching process.
+		 *
+		 *    - Clearing saved[] is not necessary because backtracking does it
+		 *
+		 *    - Backtracking also rewinds ctx.recursion back to zero, unless an
+		 *      internal/limit error occurs (which causes a longjmp())
+		 *
+		 *    - If we supported anchored matches, we would break out here
+		 *      unconditionally; however, Ecmascript regexps don't have anchored
+		 *      matches.  It might make sense to implement a fast bail-out if
+		 *      the regexp begins with '^' and sp is not 0: currently we'll just
+		 *      run through the entire input string, trivially failing the match
+		 *      at every non-zero offset.
+		 */
+
+		if (duk__match_regexp(&re_ctx, re_ctx.bytecode, sp) != NULL) {
+			DUK_DDD(DUK_DDDPRINT("match at offset %ld", (long) char_offset));
+			match = 1;
+			break;
+		}
+
+		/* advance by one character (code point) and one char_offset */
+		char_offset++;
+		if (char_offset > DUK_HSTRING_GET_CHARLEN(h_input)) {
+			/*
+			 *  Note:
+			 *
+			 *    - Intentionally attempt (empty) match at char_offset == k_input->clen
+			 *
+			 *    - Negative char_offsets have been eliminated and char_offset is duk_uint32_t
+			 *      -> no need or use for a negative check
+			 */
+
+			DUK_DDD(DUK_DDDPRINT("no match after trying all sp offsets"));
+			break;
+		}
+
+		/* avoid calling at end of input, will DUK_ERROR (above check suffices to avoid this) */
+		(void) duk__utf8_advance(thr, &sp, re_ctx.input, re_ctx.input_end, (duk_uint_fast32_t) 1);
+	}
+
+ match_over:
+
+	/*
+	 *  Matching complete, create result array or return a 'null'.  Update lastIndex
+	 *  if necessary.  See E5 Section 15.10.6.2.
+	 *
+	 *  Because lastIndex is a character (not byte) offset, we need the character
+	 *  length of the match which we conveniently get as a side effect of interning
+	 *  the matching substring (0th index of result array).
+	 *
+	 *  saved[0]         start pointer (~ byte offset) of current match
+	 *  saved[1]         end pointer (~ byte offset) of current match (exclusive)
+	 *  char_offset      start character offset of current match (-> .index of result)
+	 *  char_end_offset  end character offset (computed below)
+	 */
+
+	/* [ ... re_obj input bc saved_buf ] */
+
+	if (match) {
+#ifdef DUK_USE_ASSERTIONS
+		duk_hobject *h_res;
+#endif
+		duk_uint32_t char_end_offset = 0;
+
+		DUK_DDD(DUK_DDDPRINT("regexp matches at char_offset %ld", (long) char_offset));
+
+		DUK_ASSERT(re_ctx.nsaved >= 2);        /* must have start and end */
+		DUK_ASSERT((re_ctx.nsaved % 2) == 0);  /* and even number */
+
+		/* XXX: Array size is known before and (2 * re_ctx.nsaved) but not taken
+		 * advantage of now.  The array is not compacted either, as regexp match
+		 * objects are usually short lived.
+		 */
+
+		duk_push_array(ctx);
+
+#ifdef DUK_USE_ASSERTIONS
+		h_res = duk_require_hobject(ctx, -1);
+		DUK_ASSERT(DUK_HOBJECT_HAS_EXTENSIBLE(h_res));
+		DUK_ASSERT(DUK_HOBJECT_HAS_EXOTIC_ARRAY(h_res));
+		DUK_ASSERT(DUK_HOBJECT_GET_CLASS_NUMBER(h_res) == DUK_HOBJECT_CLASS_ARRAY);
+#endif
+
+		/* [ ... re_obj input bc saved_buf res_obj ] */
+
+		duk_push_u32(ctx, char_offset);
+		duk_xdef_prop_stridx_wec(ctx, -2, DUK_STRIDX_INDEX);
+
+		duk_dup(ctx, -4);
+		duk_xdef_prop_stridx_wec(ctx, -2, DUK_STRIDX_INPUT);
+
+		for (i = 0; i < re_ctx.nsaved; i += 2) {
+			/* Captures which are undefined have NULL pointers and are returned
+			 * as 'undefined'.  The same is done when saved[] pointers are insane
+			 * (this should, of course, never happen in practice).
+			 */
+			if (re_ctx.saved[i] && re_ctx.saved[i+1] && re_ctx.saved[i+1] >= re_ctx.saved[i]) {
+				duk_hstring *h_saved;
+
+				duk_push_lstring(ctx,
+				                 (const char *) re_ctx.saved[i],
+				                 (duk_size_t) (re_ctx.saved[i+1] - re_ctx.saved[i]));
+				h_saved = duk_get_hstring(ctx, -1);
+				DUK_ASSERT(h_saved != NULL);
+
+				if (i == 0) {
+					/* Assumes that saved[0] and saved[1] are always
+					 * set by regexp bytecode (if not, char_end_offset
+					 * will be zero).  Also assumes clen reflects the
+					 * correct char length.
+					 */
+					char_end_offset = char_offset + DUK_HSTRING_GET_CHARLEN(h_saved);
+				}
+			} else {
+				duk_push_undefined(ctx);
+			}
+
+			/* [ ... re_obj input bc saved_buf res_obj val ] */
+			duk_put_prop_index(ctx, -2, i / 2);
+		}
+
+		/* [ ... re_obj input bc saved_buf res_obj ] */
+
+		/* NB: 'length' property is automatically updated by the array setup loop */
+
+		if (global) {
+			/* global regexp: lastIndex updated on match */
+			duk_push_u32(ctx, char_end_offset);
+			duk_put_prop_stridx(ctx, -6, DUK_STRIDX_LAST_INDEX);
+		} else {
+			/* non-global regexp: lastIndex never updated on match */
+			;
+		}
+	} else {
+		/*
+		 *  No match, E5 Section 15.10.6.2, step 9.a.i - 9.a.ii apply, regardless
+		 *  of 'global' flag of the RegExp.  In particular, if lastIndex is invalid
+		 *  initially, it is reset to zero.
+		 */
+
+		DUK_DDD(DUK_DDDPRINT("regexp does not match"));
+
+		duk_push_null(ctx);
+
+		/* [ ... re_obj input bc saved_buf res_obj ] */
+
+		duk_push_int(ctx, 0);
+		duk_put_prop_stridx(ctx, -6, DUK_STRIDX_LAST_INDEX);
+	}
+
+	/* [ ... re_obj input bc saved_buf res_obj ] */
+
+	duk_insert(ctx, -5);
+
+	/* [ ... res_obj re_obj input bc saved_buf ] */
+
+	duk_pop_n(ctx, 4);
+
+	/* [ ... res_obj ] */
+
+	/* XXX: these last tricks are unnecessary if the function is made
+	 * a genuine native function.
+	 */
+}
+
+DUK_INTERNAL void duk_regexp_match(duk_hthread *thr) {
+	duk__regexp_match_helper(thr, 0 /*force_global*/);
+}
+
+/* This variant is needed by String.prototype.split(); it needs to perform
+ * global-style matching on a cloned RegExp which is potentially non-global.
+ */
+DUK_INTERNAL void duk_regexp_match_force_global(duk_hthread *thr) {
+	duk__regexp_match_helper(thr, 1 /*force_global*/);
+}
+
+#else  /* DUK_USE_REGEXP_SUPPORT */
+
+/* regexp support disabled */
+
+#endif  /* DUK_USE_REGEXP_SUPPORT */