diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/fluent-bit/lib/onigmo/regexec.c | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/fluent-bit/lib/onigmo/regexec.c')
-rw-r--r-- | src/fluent-bit/lib/onigmo/regexec.c | 4265 |
1 files changed, 4265 insertions, 0 deletions
diff --git a/src/fluent-bit/lib/onigmo/regexec.c b/src/fluent-bit/lib/onigmo/regexec.c new file mode 100644 index 000000000..d408f6199 --- /dev/null +++ b/src/fluent-bit/lib/onigmo/regexec.c @@ -0,0 +1,4265 @@ +/********************************************************************** + regexec.c - Onigmo (Oniguruma-mod) (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifdef RUBY +# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#else +# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#endif + +#ifndef USE_TOKEN_THREADED_VM +# ifdef __GNUC__ +# define USE_TOKEN_THREADED_VM 1 +# else +# define USE_TOKEN_THREADED_VM 0 +# endif +#endif + +#ifdef RUBY +# define ENC_DUMMY_FLAG (1<<24) +static inline int +rb_enc_asciicompat(OnigEncoding enc) +{ + return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG); +} +# undef ONIGENC_IS_MBC_ASCII_WORD +# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ + (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \ + onigenc_ascii_is_code_ctype( \ + ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) +#endif /* RUBY */ + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +# define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10) +# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ + is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) +static int +is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, + const UChar *end, OnigOptionType option, int check_prev) +{ + if (IS_NEWLINE_CRLF(option)) { + if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) { + if (check_prev) { + const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); + if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) + return 0; + else + return 1; + } + else + return 1; + } + else { + const UChar *pnext = p + enclen(enc, p, end); + if (pnext < end && + ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && + ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) + return 1; + if (ONIGENC_IS_MBC_NEWLINE(enc, p, end)) + return 1; + return 0; + } + } + else { + return ONIGENC_IS_MBC_NEWLINE(enc, p, end); + } +} +#else /* USE_CRNL_AS_LINE_TERMINATOR */ +# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ + ONIGENC_IS_MBC_NEWLINE((enc), (p), (end)) +#endif /* USE_CRNL_AS_LINE_TERMINATOR */ + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + xfree(node->childs); + node->childs = (OnigCaptureTreeNode** )0; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new(void) +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +# define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + CHECK_NULL_RETURN_MEMERR(parent->childs); + } + else { + OnigCaptureTreeNode** tmp; + n = parent->allocated * 2; + tmp = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + if (tmp == 0) { + history_tree_clear(parent); + return ONIGERR_MEMORY; + } + parent->childs = tmp; + } + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i, r; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + r = history_tree_add_child(clone, child); + if (r != 0) { + history_tree_free(child); + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); + if (region->beg == 0) + return ONIGERR_MEMORY; + + region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); + if (region->end == 0) { + xfree(region->beg); + return ONIGERR_MEMORY; + } + + region->allocated = n; + } + else if (region->allocated < n) { + OnigPosition *tmp; + + region->allocated = 0; + tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); + if (tmp == 0) { + xfree(region->beg); + xfree(region->end); + return ONIGERR_MEMORY; + } + region->beg = tmp; + tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); + if (tmp == 0) { + xfree(region->beg); + xfree(region->end); + return ONIGERR_MEMORY; + } + region->end = tmp; + + region->allocated = n; + } + + return 0; +} + +static int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (OnigPosition* )0; + region->end = (OnigPosition* )0; +#ifdef USE_CAPTURE_HISTORY + region->history_root = (OnigCaptureTreeNode* )0; +#endif +} + +extern OnigRegion* +onig_region_new(void) +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + if (r) + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, const OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i, r; + + if (to == from) return; + + r = onig_region_resize(to, from->num_regs); + if (r) return; + + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 +/* handled by normal-POP */ +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 +/* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ +#define STK_ABSENT_POS 0x0b00 /* for absent */ +#define STK_ABSENT 0x0c00 /* absent inner loop marker */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).gpos = (arg_gpos);\ + (msa).best_len = ONIG_MISMATCH;\ + (msa).counter = 0;\ +} while(0) +#else +# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).gpos = (arg_gpos);\ + (msa).counter = 0;\ +} while(0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ + (msa).state_check_buff = (void* )xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ + }\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + } while(0) + +# define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while(0) +#else /* USE_COMBINATION_EXPLOSION_CHECK */ +# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + + +#define MAX_PTR_NUM 100 + +#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\ + if (ptr_num > MAX_PTR_NUM) {\ + alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\ + heap_addr = alloc_addr;\ + if (msa->stack_p) {\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + } else {\ + stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ + } else if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\ + heap_addr = NULL;\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\ + + sizeof(OnigStackType) * (stack_num));\ + heap_addr = NULL;\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) +{ + size_t n; + OnigStackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); + n *= 2; + } + else { + unsigned int limit_size = MatchStackLimitSize; + n *= 2; + if (limit_size != 0 && n > limit_size) { + if ((unsigned int )(stk_end - stk_base) == limit_size) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = limit_size; + } + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) {\ + STACK_SAVE;\ + if (xmalloc_base) xfree(xmalloc_base);\ + return r;\ + }\ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +# define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +# define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + ptrdiff_t x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +# define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + +# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ + stk->u.state.pkeep = (keep);\ + STACK_INC;\ +} while(0) + +# define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + stk->u.state.pkeep = (keep);\ + STACK_INC;\ +} while(0) + +# define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +# define ELSE_IF_STATE_CHECK_MARK(stk) + +# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.pkeep = (keep);\ + STACK_INC;\ +} while(0) + +# define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep) +#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep) +#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep) +#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while(0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ABSENT_POS(start, end) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ABSENT_POS;\ + stk->u.absent_pos.abs_pstr = (start);\ + stk->u.absent_pos.end_pstr = (end);\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +# define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } +#else +# define STACK_BASE_CHECK(p, at) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_ABSENT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \ + if (stk->type == STK_ABSENT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_ABSENT_POS(start, end) do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \ + (start) = stk->u.absent_pos.abs_pstr;\ + (end) = stk->u.absent_pos.end_pstr;\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + OnigStackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while(0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, + UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) +{ + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2; + int len1, len2; + + s2 = *ps2; + end1 = s1 + mblen; + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + + +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +# define DATA_ENSURE_CHECK1 (s < right_range) +# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue +# define ABSENT_END_POS right_range +#else +# define DATA_ENSURE_CHECK1 (s < end) +# define DATA_ENSURE_CHECK(n) (s + (n) <= end) +# define DATA_ENSURE(n) if (s + (n) > end) goto fail +# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue +# define ABSENT_END_POS end +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + OnigStackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = k->u.mem.pstr - str; + r = history_tree_add_child(node, child); + if (r != 0) { + history_tree_free(child); + return r; + } + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = k->u.mem.pstr - str; + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = k->u.mem.pstr - str; + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif /* USE_CAPTURE_HISTORY */ + +#ifdef USE_BACKREF_WITH_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg, + OnigStackType* top, OnigStackType* stk_base, + int ignore_case, int case_fold_flag, + int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + OnigStackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, pend - pstart, send) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_WITH_LEVEL */ + + +#ifdef ONIG_DEBUG_STATISTICS + +# ifdef _WIN32 +# include <windows.h> +static LARGE_INTEGER ts, te, freq; +# define GETTIME(t) QueryPerformanceCounter(&(t)) +# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \ + * 1000000 / freq.QuadPart) +# else /* _WIN32 */ + +# define USE_TIMEOFDAY + +# ifdef USE_TIMEOFDAY +# ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +# endif +# ifdef HAVE_UNISTD_H +# include <unistd.h> +# endif +static struct timeval ts, te; +# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +# else /* USE_TIMEOFDAY */ +# ifdef HAVE_SYS_TIMES_H +# include <sys/times.h> +# endif +static struct tms ts, te; +# define GETTIME(t) times(&(t)) +# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +# endif /* USE_TIMEOFDAY */ + +# endif /* _WIN32 */ + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +# define MOP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while(0) + +# define MOP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while(0) + +extern void +onig_statistics_init(void) +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; +# ifdef _WIN32 + QueryPerformanceFrequency(&freq); +# endif +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10lu: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +# define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while(0) + +#else /* ONIG_DEBUG_STATISTICS */ +# define STACK_INC stk++ + +# define MOP_IN(opcode) +# define MOP_OUT +#endif /* ONIG_DEBUG_STATISTICS */ + + +#ifdef ONIG_DEBUG_MATCH +static char * +stack_type_str(int stack_type) +{ + switch (stack_type) { + case STK_ALT: return "Alt "; + case STK_LOOK_BEHIND_NOT: return "LBNot "; + case STK_POS_NOT: return "PosNot"; + case STK_MEM_START: return "MemS "; + case STK_MEM_END: return "MemE "; + case STK_REPEAT_INC: return "RepInc"; + case STK_STATE_CHECK_MARK: return "StChMk"; + case STK_NULL_CHECK_START: return "NulChS"; + case STK_NULL_CHECK_END: return "NulChE"; + case STK_MEM_END_MARK: return "MemEMk"; + case STK_POS: return "Pos "; + case STK_STOP_BT: return "StopBt"; + case STK_REPEAT: return "Rep "; + case STK_CALL_FRAME: return "Call "; + case STK_RETURN: return "Ret "; + case STK_VOID: return "Void "; + case STK_ABSENT_POS: return "AbsPos"; + case STK_ABSENT: return "Absent"; + default: return " "; + } +} +#endif + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static OnigPosition +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) +{ + static const UChar FinishCode[] = { OP_FINISH }; + + int i, num_mem, pop_level; + ptrdiff_t n, best_len; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + UChar *pkeep; + char *alloca_base; + char *xmalloc_base = NULL; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif + +#if USE_TOKEN_THREADED_VM +# define OP_OFFSET 1 +# define VM_LOOP JUMP; +# define VM_LOOP_END +# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; +# define DEFAULT L_DEFAULT: +# define NEXT sprev = sbegin; JUMP +# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++]) + + RB_GNUC_EXTENSION static const void *oplabels[] = { + &&L_OP_FINISH, /* matching process terminator (no more alternative) */ + &&L_OP_END, /* pattern code terminator (success end) */ + + &&L_OP_EXACT1, /* single byte, N = 1 */ + &&L_OP_EXACT2, /* single byte, N = 2 */ + &&L_OP_EXACT3, /* single byte, N = 3 */ + &&L_OP_EXACT4, /* single byte, N = 4 */ + &&L_OP_EXACT5, /* single byte, N = 5 */ + &&L_OP_EXACTN, /* single byte */ + &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ + &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ + &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ + &&L_OP_EXACTMB2N, /* mb-length = 2 */ + &&L_OP_EXACTMB3N, /* mb-length = 3 */ + &&L_OP_EXACTMBN, /* other length */ + + &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */ + &&L_OP_EXACTN_IC, /* single byte, ignore case */ + + &&L_OP_CCLASS, + &&L_OP_CCLASS_MB, + &&L_OP_CCLASS_MIX, + &&L_OP_CCLASS_NOT, + &&L_OP_CCLASS_MB_NOT, + &&L_OP_CCLASS_MIX_NOT, + + &&L_OP_ANYCHAR, /* "." */ + &&L_OP_ANYCHAR_ML, /* "." multi-line */ + &&L_OP_ANYCHAR_STAR, /* ".*" */ + &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ + &&L_OP_ANYCHAR_STAR_PEEK_NEXT, + &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT, + + &&L_OP_WORD, + &&L_OP_NOT_WORD, + &&L_OP_WORD_BOUND, + &&L_OP_NOT_WORD_BOUND, +# ifdef USE_WORD_BEGIN_END + &&L_OP_WORD_BEGIN, + &&L_OP_WORD_END, +# else + &&L_DEFAULT, + &&L_DEFAULT, +# endif + &&L_OP_ASCII_WORD, + &&L_OP_NOT_ASCII_WORD, + &&L_OP_ASCII_WORD_BOUND, + &&L_OP_NOT_ASCII_WORD_BOUND, +# ifdef USE_WORD_BEGIN_END + &&L_OP_ASCII_WORD_BEGIN, + &&L_OP_ASCII_WORD_END, +# else + &&L_DEFAULT, + &&L_DEFAULT, +# endif + + &&L_OP_BEGIN_BUF, + &&L_OP_END_BUF, + &&L_OP_BEGIN_LINE, + &&L_OP_END_LINE, + &&L_OP_SEMI_END_BUF, + &&L_OP_BEGIN_POSITION, + + &&L_OP_BACKREF1, + &&L_OP_BACKREF2, + &&L_OP_BACKREFN, + &&L_OP_BACKREFN_IC, + &&L_OP_BACKREF_MULTI, + &&L_OP_BACKREF_MULTI_IC, +# ifdef USE_BACKREF_WITH_LEVEL + &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ +# else + &&L_DEFAULT, +# endif + &&L_OP_MEMORY_START, + &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */ + &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */ +# ifdef USE_SUBEXP_CALL + &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ +# else + &&L_DEFAULT, +# endif + &&L_OP_MEMORY_END, +# ifdef USE_SUBEXP_CALL + &&L_OP_MEMORY_END_REC, /* push marker to stack */ +# else + &&L_DEFAULT, +# endif + + &&L_OP_KEEP, + + &&L_OP_FAIL, /* pop stack and move */ + &&L_OP_JUMP, + &&L_OP_PUSH, + &&L_OP_POP, +# ifdef USE_OP_PUSH_OR_JUMP_EXACT + &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ +# else + &&L_DEFAULT, +# endif + &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ + &&L_OP_REPEAT, /* {n,m} */ + &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */ + &&L_OP_REPEAT_INC, + &&L_OP_REPEAT_INC_NG, /* non greedy */ + &&L_OP_REPEAT_INC_SG, /* search and get in stack */ + &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ + &&L_OP_NULL_CHECK_START, /* null loop checker start */ + &&L_OP_NULL_CHECK_END, /* null loop checker end */ +# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ +# else + &&L_DEFAULT, +# endif +# ifdef USE_SUBEXP_CALL + &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ +# else + &&L_DEFAULT, +# endif + + &&L_OP_PUSH_POS, /* (?=...) start */ + &&L_OP_POP_POS, /* (?=...) end */ + &&L_OP_PUSH_POS_NOT, /* (?!...) start */ + &&L_OP_FAIL_POS, /* (?!...) end */ + &&L_OP_PUSH_STOP_BT, /* (?>...) start */ + &&L_OP_POP_STOP_BT, /* (?>...) end */ + &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ + &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ + &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */ + &&L_OP_ABSENT, /* (?~...) start of inner loop */ + &&L_OP_ABSENT_END, /* (?~...) end */ + +# ifdef USE_SUBEXP_CALL + &&L_OP_CALL, /* \g<name> */ + &&L_OP_RETURN, +# else + &&L_DEFAULT, + &&L_DEFAULT, +# endif + &&L_OP_CONDITION, + +# ifdef USE_COMBINATION_EXPLOSION_CHECK + &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + &&L_OP_STATE_CHECK, /* check only */ +# else + &&L_DEFAULT, + &&L_DEFAULT, + &&L_DEFAULT, +# endif +# ifdef USE_COMBINATION_EXPLOSION_CHECK + &&L_OP_STATE_CHECK_ANYCHAR_STAR, + &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR, +# else + &&L_DEFAULT, + &&L_DEFAULT, +# endif + /* no need: IS_DYNAMIC_OPTION() == 0 */ +# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */ + &&L_OP_SET_OPTION /* set option */ +# else + &&L_DEFAULT, + &&L_DEFAULT +# endif + }; +#else /* USE_TOKEN_THREADED_VM */ + +# define OP_OFFSET 0 +# define VM_LOOP \ + while (1) { \ + OPCODE_EXEC_HOOK; \ + sbegin = s; \ + switch (*p++) { +# define VM_LOOP_END } sprev = sbegin; } +# define CASE(x) case x: +# define DEFAULT default: +# define NEXT break +# define JUMP continue; break +#endif /* USE_TOKEN_THREADED_VM */ + + +#ifdef USE_SUBEXP_CALL +/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, + etc. Additional space is required. */ +# define ADD_NUMMEM 1 +#else +/* Stack #0 not is used. */ +# define ADD_NUMMEM 0 +#endif + + n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2; + + STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (OnigStackIndex* )alloca_base; + + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM); + { + OnigStackIndex *pp = mem_start_stk; + for (; pp < repeat_stk + n; pp += 2) { + pp[0] = INVALID_STACK_INDEX; + pp[1] = INVALID_STACK_INDEX; + } + } +#ifndef USE_SUBEXP_CALL + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ +#endif + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n", + (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); + fprintf(stderr, "\n ofs> str stk:type addr:opcode\n"); +#endif + + STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + pkeep = (UChar* )sstart; + + +#ifdef ONIG_DEBUG_MATCH +# define OPCODE_EXEC_HOOK \ + if (s) { \ + UChar *op, *q, *bp, buf[50]; \ + int len; \ + op = p - OP_OFFSET; \ + fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \ + bp = buf; \ + q = s; \ + if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \ + for (i = 0; i < 7 && q < end; i++) { \ + len = enclen(encode, q, end); \ + while (len-- > 0) *bp++ = *q++; \ + } \ + if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \ + } \ + xmemcpy(bp, "\"", 1); bp += 1; \ + *bp = 0; \ + fputs((char* )buf, stderr); \ + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \ + fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \ + stk - stk_base - 1, \ + (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ + (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ + onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \ + fprintf(stderr, "\n"); \ + } +#else +# define OPCODE_EXEC_HOOK ((void) 0) +#endif + + + VM_LOOP { + CASE(OP_END) MOP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif + best_len = n; + region = msa->region; + if (region) { + region->beg[0] = ((pkeep > s) ? s : pkeep) - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = ((pkeep > s) ? s : pkeep) - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ + } /* if (region) */ + } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif + MOP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + + CASE(OP_EXACT1) MOP_IN(OP_EXACT1); + DATA_ENSURE(1); + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + NEXT; + + CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + MOP_OUT; + NEXT; + + CASE(OP_EXACT2) MOP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACT3) MOP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACT4) MOP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACT5) MOP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACTN) MOP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + MOP_OUT; + JUMP; + + CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC); + { + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } + } + + MOP_OUT; + JUMP; + + CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + NEXT; + + CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + JUMP; + + CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + MOP_OUT; + JUMP; + + CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + MOP_OUT; + JUMP; + + CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + MOP_OUT; + JUMP; + + CASE(OP_CCLASS) MOP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; + NEXT; + + CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enclen(encode, s, end); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + MOP_OUT; + NEXT; + + CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + NEXT; + + CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); + MOP_OUT; + NEXT; + + CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s, end); + + if (! DATA_ENSURE_CHECK(mb_len)) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + MOP_OUT; + NEXT; + + CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + NEXT; + + CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + s += n; + MOP_OUT; + NEXT; + + CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + s += n; + MOP_OUT; + NEXT; + + CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev, pkeep); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + JUMP; + + CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev, pkeep); + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + JUMP; + + CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev, pkeep); + } + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; + } + p++; + MOP_OUT; + NEXT; + + CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev, pkeep); + } + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + MOP_OUT; + NEXT; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + NEXT; + + CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR) + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + NEXT; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + CASE(OP_WORD) MOP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + NEXT; + + CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + NEXT; + + CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + NEXT; + + CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + NEXT; + + CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + JUMP; + + CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) + == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + JUMP; + + CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + JUMP; + + CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) + != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + JUMP; + +#ifdef USE_WORD_BEGIN_END + CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + JUMP; + } + } + goto fail; + + CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { + MOP_OUT; + JUMP; + } + } + goto fail; + + CASE(OP_WORD_END) MOP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + JUMP; + } + } + goto fail; + + CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { + MOP_OUT; + JUMP; + } + } + goto fail; +#endif + + CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + if (IS_NOTBOS(msa->options)) goto fail; + + MOP_OUT; + JUMP; + + CASE(OP_END_BUF) MOP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + if (IS_NOTEOS(msa->options)) goto fail; + + MOP_OUT; + JUMP; + + CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + JUMP; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && !(IS_NEWLINE_CRLF(option) + && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) +#endif + && !ON_STR_END(s)) { + MOP_OUT; + JUMP; + } + goto fail; + + CASE(OP_END_LINE) MOP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + JUMP; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { + MOP_OUT; + JUMP; + } + goto fail; + + CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + JUMP; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { + UChar* ss = s + enclen(encode, s, end); + if (ON_STR_END(ss)) { + MOP_OUT; + JUMP; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (IS_NEWLINE_CRLF(option) + && ONIGENC_IS_MBC_CRNL(encode, s, end)) { + ss += enclen(encode, ss, end); + if (ON_STR_END(ss)) { + MOP_OUT; + JUMP; + } + } +#endif + } + goto fail; + + CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); + if (s != msa->gpos) + goto fail; + + MOP_OUT; + JUMP; + + CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + MOP_OUT; + JUMP; + + CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + mem_end_stk[mem] = INVALID_STACK_INDEX; + MOP_OUT; + JUMP; + + CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + MOP_OUT; + JUMP; + + CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + JUMP; + + CASE(OP_KEEP) MOP_IN(OP_KEEP); + pkeep = s; + MOP_OUT; + JUMP; + +#ifdef USE_SUBEXP_CALL + CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + MOP_OUT; + JUMP; + + CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + MOP_OUT; + JUMP; +#endif + + CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1); + mem = 1; + goto backref; + + CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2); + mem = 2; + goto backref; + + CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + JUMP; + } + + CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + JUMP; + } + NEXT; + + CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE_CONTINUE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + JUMP; + } + NEXT; + + CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE_CONTINUE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + JUMP; + } + +#ifdef USE_BACKREF_WITH_LEVEL + CASE(OP_BACKREF_WITH_LEVEL) + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, + case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + JUMP; + } + +#endif + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH); + GET_OPTION_INC(option, p); + STACK_PUSH_ALT(p, s, sprev, pkeep); + p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; + MOP_OUT; + JUMP; + + CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION); + GET_OPTION_INC(option, p); + MOP_OUT; + JUMP; +#endif + + CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + MOP_OUT; + JUMP; + + CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + MOP_OUT; + JUMP; + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +# ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); +# endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + MOP_OUT; + JUMP; +#endif + +#ifdef USE_SUBEXP_CALL + CASE(OP_NULL_CHECK_END_MEMST_PUSH) + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +# else + STACK_NULL_CHECK_REC(isnull, mem, s); +# endif + if (isnull) { +# ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n", + (int )mem, (uintptr_t )s, s); +# endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + MOP_OUT; + JUMP; +#endif + + CASE(OP_JUMP) MOP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + JUMP; + + CASE(OP_PUSH) MOP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); + MOP_OUT; + JUMP; + + CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); + } + MOP_OUT; + JUMP; + + CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + MOP_OUT; + JUMP; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + CASE(OP_POP) MOP_IN(OP_POP); + STACK_POP_ONE; + MOP_OUT; + JUMP; + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK1) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; + } + p += (addr + 1); + MOP_OUT; + JUMP; +#endif + + CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; + } + p++; + MOP_OUT; + JUMP; + + CASE(OP_REPEAT) MOP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); + } + } + MOP_OUT; + JUMP; + + CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev, pkeep); + p += addr; + } + } + MOP_OUT; + JUMP; + + CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev, pkeep); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + JUMP; + + CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + + CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev, pkeep); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + JUMP; + + CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + + CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev, pkeep); + MOP_OUT; + JUMP; + + CASE(OP_POP_POS) MOP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + MOP_OUT; + JUMP; + + CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep); + MOP_OUT; + JUMP; + + CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + + CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + MOP_OUT; + JUMP; + + CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + MOP_OUT; + JUMP; + + CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); + MOP_OUT; + JUMP; + + CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?<!XXX)a/.match("a") + If you want to change to fail, replace following line. */ + p += addr; + /* goto fail; */ + } + else { + STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep); + s = q; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); + } + MOP_OUT; + JUMP; + + CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); + STACK_POP_TIL_LOOK_BEHIND_NOT; + goto fail; + + CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS); + /* Save the absent-start-pos and the original end-pos. */ + STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS); + MOP_OUT; + JUMP; + + CASE(OP_ABSENT) MOP_IN(OP_ABSENT); + { + const UChar* aend = ABSENT_END_POS; + UChar* absent; + UChar* selfp = p - 1; + + STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */ + GET_RELADDR_INC(addr, p); +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend); +#endif + if ((absent > aend) && (s > absent)) { + /* An empty match occurred in (?~...) at the start point. + * Never match. */ + STACK_POP; + goto fail; + } + else if ((s >= aend) && (s > absent)) { + if (s > aend) { + /* Only one (or less) character matched in the last iteration. + * This is not a possible point. */ + goto fail; + } + /* All possible points were found. Try matching after (?~...). */ + DATA_ENSURE(0); + p += addr; + } + else { + STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */ + /* For approximating enclen. Strict version of enclen does not work here. */ + n = enclen_approximate(encode, s, end); + STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */ + STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */ + STACK_PUSH_ABSENT; + ABSENT_END_POS = aend; + } + } + MOP_OUT; + JUMP; + + CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END); + /* The pattern inside (?~...) was matched. + * Set the end-pos temporary and go to next iteration. */ + if (sprev < ABSENT_END_POS) + ABSENT_END_POS = sprev; +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS); +#endif + STACK_POP_TIL_ABSENT; + goto fail; + +#ifdef USE_SUBEXP_CALL + CASE(OP_CALL) MOP_IN(OP_CALL); + GET_ABSADDR_INC(addr, p); + STACK_PUSH_CALL_FRAME(p); + p = reg->p + addr; + MOP_OUT; + JUMP; + + CASE(OP_RETURN) MOP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + MOP_OUT; + JUMP; +#endif + + CASE(OP_CONDITION) MOP_IN(OP_CONDITION); + GET_MEMNUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + if ((mem > num_mem) || + (mem_end_stk[mem] == INVALID_STACK_INDEX) || + (mem_start_stk[mem] == INVALID_STACK_INDEX)) { + p += addr; + } + MOP_OUT; + JUMP; + + CASE(OP_FINISH) + goto finish; + + CASE(OP_FAIL) + if (0) { + /* fall */ + fail: + MOP_OUT; + } + MOP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + pkeep = stk->u.state.pkeep; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + JUMP; + + DEFAULT + goto bytecode_error; + } VM_LOOP_END + + finish: + STACK_SAVE; + if (xmalloc_base) xfree(xmalloc_base); + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + if (xmalloc_base) xfree(xmalloc_base); + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + if (xmalloc_base) xfree(xmalloc_base); + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + if (xmalloc_base) xfree(xmalloc_base); + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + if (enc->max_enc_len == enc->min_enc_len) { + int n = enc->max_enc_len; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += n; + } + return (UChar* )NULL; + } + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int case_fold_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + while (t < tend) { + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) return 0; + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) + return s; + + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (str_lower_case_match(enc, case_fold_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +/* Sunday's quick search applied to a multibyte string */ +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + ptrdiff_t skip, tlen1; + OnigEncoding enc = reg->enc; + +# ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); +# endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); + } + + return (UChar* )NULL; +} + +/* Sunday's quick search */ +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + ptrdiff_t tlen1; + +# ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); +# endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range + tlen1; + if (end > text_end) + end = text_end; + + s = text + tlen1; + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + if (s + 1 >= end) break; + s += reg->map[s[1]]; + } + + return (UChar* )NULL; +} + +/* Sunday's quick search applied to a multibyte string (ignore case) */ +static UChar* +bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *end; + const UChar *tail; + ptrdiff_t skip, tlen1; + OnigEncoding enc = reg->enc; + int case_fold_flag = reg->case_fold_flag; + +# ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); +# endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + while (s < end) { + se = s + tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, se + 1)) + return (UChar* )s; + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); + } + + return (UChar* )NULL; +} + +/* Sunday's quick search (ignore case) */ +static UChar* +bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *p, *end; + const UChar *tail; + ptrdiff_t tlen1; + OnigEncoding enc = reg->enc; + int case_fold_flag = reg->case_fold_flag; + +# ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", + (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); +# endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range + tlen1; + if (end > text_end) + end = text_end; + + s = text + tlen1; + while (s < end) { + p = s - tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + p, s + 1)) + return (UChar* )p; + if (s + 1 >= end) break; + s += reg->map[s[1]]; + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range, const UChar* text_end) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enclen(enc, s, text_end); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start, const UChar* text_end) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + return (UChar* )NULL; +} + +extern OnigPosition +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + ptrdiff_t r; + UChar *prev; + OnigMatchArg msa; + + MATCH_ARG_INIT(msa, option, region, at, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + ptrdiff_t offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + if (region) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", + (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + + if (q >= end) return 0; /* fail */ + while (p < q) p += enclen(reg->enc, p, end); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_IC: + p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: + p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range, end); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enclen(reg->enc, p, end); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) + goto retry_gate; +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + if (p < str + reg->dmax) { + *low = (UChar* )str; + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); + } + else { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, end, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low, end); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low, end); + } + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n", + *low - str, *high - str, reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static int +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + case ONIG_OPTIMIZE_EXACT_BM_IC: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + goto exact_method; + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p, end); + if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern OnigPosition +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + return onig_search_gpos(reg, str, end, start, start, range, region, option); +} + +extern OnigPosition +onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, + const UChar* global_pos, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + ptrdiff_t r; + UChar *s, *prev; + OnigMatchArg msa; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_start = start; + const UChar *orig_range = range; +#endif + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", + (uintptr_t )str, str, end - str, start - str, range - str); +#endif + + if (region) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +# define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +# else +# define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else +# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +# define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +# else +# define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar *min_semi_end, *max_semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + { + if (global_pos > start) + { + if (global_pos < range) + range = global_pos + 1; + } + else + range = start + 1; + } + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + min_semi_end = max_semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); + } + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start > range) goto mismatch_no_msa; + /* If start == range, match with empty at end. + Backward search is used. */ + } + else { + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); + } + if (range > start) goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); + + max_semi_end = (UChar* )end; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); + if (IS_NOT_NULL(pre_end) && + IS_NEWLINE_CRLF(reg->options) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } + } + else { + min_semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar address_for_empty_string[] = ""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ +#endif + MATCH_AND_RETURN_CHECK(end); + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, start, global_pos); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + ptrdiff_t offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + + if ((end - start) < reg->threshold_len) + goto mismatch; + + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) + && s < range) { + prev = s; + s += enclen(reg->enc, s, end); + } + } + } while (s < range); + goto mismatch; + } + } + } + + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK(orig_range); + } + } + else { /* backward search */ + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start, end); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } while (s >= range); + } + + mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not set in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); +#endif + return r; + + match: + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigPosition +onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), + void* callback_arg) +{ + OnigPosition r; + OnigPosition n; + int rs; + const UChar* start; + + n = 0; + start = str; + while (1) { + r = onig_search(reg, str, end, start, end, region, option); + if (r >= 0) { + rs = scan_callback(n, r, region, callback_arg); + n++; + if (rs != 0) + return rs; + + if (region->end[0] == start - str) { + if (start >= end) break; + start += enclen(reg->enc, start, end); + } + else + start = str + region->end[0]; + + if (start > end) + break; + } + else if (r == ONIG_MISMATCH) { + break; + } + else { /* error */ + return r; + } + } + + return n; +} + +extern OnigEncoding +onig_get_encoding(const regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(const regex_t* reg) +{ + return reg->options; +} + +extern OnigCaseFoldType +onig_get_case_fold_flag(const regex_t* reg) +{ + return reg->case_fold_flag; +} + +extern const OnigSyntaxType* +onig_get_syntax(const regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(const regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(const regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncodingType *to, OnigEncoding from) +{ + *to = *from; +} |