diff options
Diffstat (limited to '')
-rw-r--r-- | storage/mroonga/vendor/groonga/lib/snip.c | 841 |
1 files changed, 841 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/snip.c b/storage/mroonga/vendor/groonga/lib/snip.c new file mode 100644 index 00000000..4693dc2b --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/snip.c @@ -0,0 +1,841 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2009-2014 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ +#include "grn.h" +#include <string.h> +#include <stddef.h> +#include "grn_snip.h" +#include "grn_ctx.h" + +#if !defined MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#if !defined MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +static int +grn_bm_check_euc(const unsigned char *x, const size_t y) +{ + const unsigned char *p; + for (p = x + y - 1; p >= x && *p >= 0x80U; p--); + return (int) ((x + y - p) & 1); +} + +static int +grn_bm_check_sjis(const unsigned char *x, const size_t y) +{ + const unsigned char *p; + for (p = x + y - 1; p >= x; p--) + if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU)) + break; + return (int) ((x + y - p) & 1); +} + +/* +static void +grn_bm_suffixes(const unsigned char *x, size_t m, size_t *suff) +{ + size_t f, g; + intptr_t i; + f = 0; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) { + if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g) + suff[i] = suff[i + m - 1 - f]; + else { + if (i < (intptr_t) g) + g = i; + f = i; + while (g > 0 && x[g] == x[g + m - 1 - f]) + --g; + suff[i] = f - g; + } + } +} +*/ + +static void +grn_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc) +{ + size_t i; + for (i = 0; i < ASIZE; ++i) { + bmBc[i] = m; + } + for (i = 0; i < m - 1; ++i) { + bmBc[(unsigned int) x[i]] = m - (i + 1); + } +} + +#define GRN_BM_COMPARE do { \ + if (string_checks[found]) { \ + size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \ + /* calc real offset */\ + for (i = cond->last_found; i < found; i++) { \ + if (string_checks[i] > 0) { \ + found_alpha_head = i; \ + offset += string_checks[i]; \ + } \ + } \ + /* if real offset is in a character, move it the head of the character */ \ + if (string_checks[found] < 0) { \ + offset -= string_checks[found_alpha_head]; \ + cond->last_found = found_alpha_head; \ + } else { \ + cond->last_found = found; \ + } \ + cond->start_offset = cond->last_offset = offset; \ + if (flags & GRN_SNIP_SKIP_LEADING_SPACES) { \ + while (cond->start_offset < string_original_length_in_bytes && \ + (i = grn_isspace(string_original + cond->start_offset, \ + string_encoding))) { cond->start_offset += i; } \ + } \ + for (i = cond->last_found; i < found + m; i++) { \ + if (string_checks[i] > 0) { \ + offset += string_checks[i]; \ + } \ + } \ + cond->end_offset = offset; \ + cond->found = found + shift; \ + cond->found_alpha_head = found_alpha_head; \ + /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \ + return; \ + } \ +} while (0) + +#define GRN_BM_BM_COMPARE do { \ + if (p[-2] == ck) { \ + for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \ + } \ + if (i > m) { \ + found = p - y - m; \ + GRN_BM_COMPARE; \ + } \ + } \ +} while (0) + +void +grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags) +{ + register unsigned char *limit, ck; + register const unsigned char *p, *cp; + register size_t *bmBc, delta1, i; + + const unsigned char *x; + unsigned char *y; + size_t shift, found; + + const char *string_original; + unsigned int string_original_length_in_bytes; + const short *string_checks; + grn_encoding string_encoding; + const char *string_norm, *keyword_norm; + unsigned int n, m; + + grn_string_get_original(ctx, string, + &string_original, &string_original_length_in_bytes); + string_checks = grn_string_get_checks(ctx, string); + string_encoding = grn_string_get_encoding(ctx, string); + grn_string_get_normalized(ctx, string, &string_norm, &n, NULL); + grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL); + + y = (unsigned char *)string_norm; + if (m == 1) { + if (n > cond->found) { + shift = 1; + p = memchr(y + cond->found, keyword_norm[0], n - cond->found); + if (p != NULL) { + found = p - y; + GRN_BM_COMPARE; + } + } + cond->stopflag = SNIPCOND_STOP; + return; + } + + x = (unsigned char *)keyword_norm; + bmBc = cond->bmBc; + shift = cond->shift; + + /* Restart */ + p = y + m + cond->found; + cp = x + m; + ck = cp[-2]; + + /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */ + if (n - cond->found > 12 * m) { + limit = y + n - 11 * m; + while (p <= limit) { + p += bmBc[p[-1]]; + if(!(delta1 = bmBc[p[-1]])) { + goto check; + } + p += delta1; + p += bmBc[p[-1]]; + p += bmBc[p[-1]]; + if(!(delta1 = bmBc[p[-1]])) { + goto check; + } + p += delta1; + p += bmBc[p[-1]]; + p += bmBc[p[-1]]; + if(!(delta1 = bmBc[p[-1]])) { + goto check; + } + p += delta1; + p += bmBc[p[-1]]; + p += bmBc[p[-1]]; + continue; + check: + GRN_BM_BM_COMPARE; + p += shift; + } + } + /* limit check + search */ + limit = y + n; + while(p <= limit) { + if (!(delta1 = bmBc[p[-1]])) { + GRN_BM_BM_COMPARE; + p += shift; + } + p += delta1; + } + cond->stopflag = SNIPCOND_STOP; +} + +static size_t +count_mapped_chars(const char *str, const char *end) +{ + const char *p; + size_t dl; + + dl = 0; + for (p = str; p != end; p++) { + switch (*p) { + case '<': + case '>': + dl += 4; /* < or > */ + break; + case '&': + dl += 5; /* & */ + break; + case '"': + dl += 6; /* " */ + break; + default: + dl++; + break; + } + } + return dl; +} + +grn_rc +grn_snip_cond_close(grn_ctx *ctx, snip_cond *cond) +{ + if (!cond) { + return GRN_INVALID_ARGUMENT; + } + if (cond->keyword) { + grn_obj_close(ctx, cond->keyword); + } + return GRN_SUCCESS; +} + +grn_rc +grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len, + grn_encoding enc, grn_obj *normalizer, int flags) +{ + const char *norm; + unsigned int norm_blen; + int f = GRN_STR_REMOVEBLANK; + memset(sc, 0, sizeof(snip_cond)); + if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len, + normalizer, f))) { + GRN_LOG(ctx, GRN_LOG_ALERT, + "grn_string_open on snip_cond_init failed!"); + return GRN_NO_MEMORY_AVAILABLE; + } + grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL); + if (!norm_blen) { + grn_snip_cond_close(ctx, sc); + return GRN_INVALID_ARGUMENT; + } + if (norm_blen != 1) { + grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc); + sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]]; + sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0; + } + return GRN_SUCCESS; +} + +void +grn_snip_cond_reinit(snip_cond *cond) +{ + cond->found = 0; + cond->last_found = 0; + cond->last_offset = 0; + cond->start_offset = 0; + cond->end_offset = 0; + + cond->count = 0; + cond->stopflag = SNIPCOND_NONSTOP; +} + +inline static char * +grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len) +{ + char *copied_string; + + copied_string = GRN_MALLOC(string_len + 1); + if (!copied_string) { + return NULL; + } + grn_memcpy(copied_string, string, string_len); + copied_string[string_len]= '\0'; /* not required, but for ql use */ + return copied_string; +} + +inline static grn_rc +grn_snip_cond_set_tag(grn_ctx *ctx, + const char **dest_tag, size_t *dest_tag_len, + const char *tag, unsigned int tag_len, + const char *default_tag, unsigned int default_tag_len, + int copy_tag) +{ + if (tag) { + if (copy_tag) { + char *copied_tag; + copied_tag = grn_snip_strndup(ctx, tag, tag_len); + if (!copied_tag) { + return GRN_NO_MEMORY_AVAILABLE; + } + *dest_tag = copied_tag; + } else { + *dest_tag = tag; + } + *dest_tag_len = tag_len; + } else { + *dest_tag = default_tag; + *dest_tag_len = default_tag_len; + } + return GRN_SUCCESS; +} + +grn_rc +grn_snip_set_normalizer(grn_ctx *ctx, grn_obj *snip, + grn_obj *normalizer) +{ + grn_snip *snip_; + if (!snip) { + return GRN_INVALID_ARGUMENT; + } + + snip_ = (grn_snip *)snip; + snip_->normalizer = normalizer; + return GRN_SUCCESS; +} + +grn_obj * +grn_snip_get_normalizer(grn_ctx *ctx, grn_obj *snip) +{ + grn_snip *snip_; + + if (!snip) { + return NULL; + } + + snip_ = (grn_snip *)snip; + return snip_->normalizer; +} + +grn_rc +grn_snip_add_cond(grn_ctx *ctx, grn_obj *snip, + const char *keyword, unsigned int keyword_len, + const char *opentag, unsigned int opentag_len, + const char *closetag, unsigned int closetag_len) +{ + grn_rc rc; + int copy_tag; + snip_cond *cond; + unsigned int norm_blen; + grn_snip *snip_; + + snip_ = (grn_snip *)snip; + if (!snip_ || !keyword || !keyword_len || snip_->cond_len >= MAX_SNIP_COND_COUNT) { + return GRN_INVALID_ARGUMENT; + } + + cond = snip_->cond + snip_->cond_len; + if ((rc = grn_snip_cond_init(ctx, cond, keyword, keyword_len, + snip_->encoding, snip_->normalizer, snip_->flags))) { + return rc; + } + grn_string_get_normalized(ctx, cond->keyword, NULL, &norm_blen, NULL); + if (norm_blen > snip_->width) { + grn_snip_cond_close(ctx, cond); + return GRN_INVALID_ARGUMENT; + } + + copy_tag = snip_->flags & GRN_SNIP_COPY_TAG; + rc = grn_snip_cond_set_tag(ctx, + &(cond->opentag), &(cond->opentag_len), + opentag, opentag_len, + snip_->defaultopentag, snip_->defaultopentag_len, + copy_tag); + if (rc) { + grn_snip_cond_close(ctx, cond); + return rc; + } + + rc = grn_snip_cond_set_tag(ctx, + &(cond->closetag), &(cond->closetag_len), + closetag, closetag_len, + snip_->defaultclosetag, snip_->defaultclosetag_len, + copy_tag); + if (rc) { + if (opentag && copy_tag) { + GRN_FREE((void *)cond->opentag); + } + grn_snip_cond_close(ctx, cond); + return rc; + } + + snip_->cond_len++; + return GRN_SUCCESS; +} + +static size_t +grn_snip_find_firstbyte(const char *string, grn_encoding encoding, size_t offset, + size_t doffset) +{ + switch (encoding) { + case GRN_ENC_EUC_JP: + while (!(grn_bm_check_euc((unsigned char *) string, offset))) + offset += doffset; + break; + case GRN_ENC_SJIS: + if (!(grn_bm_check_sjis((unsigned char *) string, offset))) + offset += doffset; + break; + case GRN_ENC_UTF8: + while ((signed char)string[offset] <= (signed char)0xc0) + offset += doffset; + break; + default: + break; + } + return offset; +} + +inline static grn_rc +grn_snip_set_default_tag(grn_ctx *ctx, + const char **dest_tag, size_t *dest_tag_len, + const char *tag, unsigned int tag_len, + int copy_tag) +{ + if (copy_tag && tag) { + char *copied_tag; + copied_tag = grn_snip_strndup(ctx, tag, tag_len); + if (!copied_tag) { + return GRN_NO_MEMORY_AVAILABLE; + } + *dest_tag = copied_tag; + } else { + *dest_tag = tag; + } + *dest_tag_len = tag_len; + return GRN_SUCCESS; +} + +grn_obj * +grn_snip_open(grn_ctx *ctx, int flags, unsigned int width, + unsigned int max_results, + const char *defaultopentag, unsigned int defaultopentag_len, + const char *defaultclosetag, unsigned int defaultclosetag_len, + grn_snip_mapping *mapping) +{ + int copy_tag; + grn_snip *ret = NULL; + if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) { + GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open"); + return NULL; + } + if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) { + GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open"); + GRN_FREE(ret); + return NULL; + } + GRN_API_ENTER; + ret->encoding = ctx->encoding; + ret->flags = flags; + ret->width = width; + ret->max_results = max_results; + ret->defaultopentag = NULL; + ret->defaultclosetag = NULL; + + copy_tag = flags & GRN_SNIP_COPY_TAG; + if (grn_snip_set_default_tag(ctx, + &(ret->defaultopentag), + &(ret->defaultopentag_len), + defaultopentag, defaultopentag_len, + copy_tag)) { + GRN_FREE(ret); + GRN_API_RETURN(NULL); + } + + if (grn_snip_set_default_tag(ctx, + &(ret->defaultclosetag), + &(ret->defaultclosetag_len), + defaultclosetag, defaultclosetag_len, + copy_tag)) { + if (copy_tag && ret->defaultopentag) { + GRN_FREE((void *)ret->defaultopentag); + } + GRN_FREE(ret); + GRN_API_RETURN(NULL); + } + + ret->cond_len = 0; + ret->mapping = mapping; + ret->nstr = NULL; + ret->tag_count = 0; + ret->snip_count = 0; + if (ret->flags & GRN_SNIP_NORMALIZE) { + ret->normalizer = GRN_NORMALIZER_AUTO; + } else { + ret->normalizer = NULL; + } + + GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP); + { + grn_obj *db; + grn_id id; + db = grn_ctx_db(ctx); + id = grn_obj_register(ctx, db, NULL, 0); + DB_OBJ(ret)->header.domain = GRN_ID_NIL; + DB_OBJ(ret)->range = GRN_ID_NIL; + grn_db_obj_init(ctx, db, id, DB_OBJ(ret)); + } + + GRN_API_RETURN((grn_obj *)ret); +} + +static grn_rc +exec_clean(grn_ctx *ctx, grn_snip *snip) +{ + snip_cond *cond, *cond_end; + if (snip->nstr) { + grn_obj_close(ctx, snip->nstr); + snip->nstr = NULL; + } + snip->tag_count = 0; + snip->snip_count = 0; + for (cond = snip->cond, cond_end = cond + snip->cond_len; + cond < cond_end; cond++) { + grn_snip_cond_reinit(cond); + } + return GRN_SUCCESS; +} + +grn_rc +grn_snip_close(grn_ctx *ctx, grn_snip *snip) +{ + snip_cond *cond, *cond_end; + if (!snip) { return GRN_INVALID_ARGUMENT; } + GRN_API_ENTER; + if (snip->flags & GRN_SNIP_COPY_TAG) { + int i; + snip_cond *sc; + const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag; + for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) { + if (sc->opentag != dot) { GRN_FREE((void *)sc->opentag); } + if (sc->closetag != dct) { GRN_FREE((void *)sc->closetag); } + } + if (dot) { GRN_FREE((void *)dot); } + if (dct) { GRN_FREE((void *)dct); } + } + if (snip->nstr) { + grn_obj_close(ctx, snip->nstr); + } + for (cond = snip->cond, cond_end = cond + snip->cond_len; + cond < cond_end; cond++) { + grn_snip_cond_close(ctx, cond); + } + GRN_FREE(snip); + GRN_API_RETURN(GRN_SUCCESS); +} + +grn_rc +grn_snip_exec(grn_ctx *ctx, grn_obj *snip, const char *string, unsigned int string_len, + unsigned int *nresults, unsigned int *max_tagged_len) +{ + size_t i; + grn_snip *snip_; + int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK; + if (!snip || !string || !nresults || !max_tagged_len) { + return GRN_INVALID_ARGUMENT; + } + GRN_API_ENTER; + snip_ = (grn_snip *)snip; + exec_clean(ctx, snip_); + *nresults = 0; + snip_->nstr = grn_string_open(ctx, string, string_len, snip_->normalizer, f); + if (!snip_->nstr) { + exec_clean(ctx, snip_); + GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !"); + GRN_API_RETURN(ctx->rc); + } + for (i = 0; i < snip_->cond_len; i++) { + grn_bm_tunedbm(ctx, snip_->cond + i, snip_->nstr, snip_->flags); + } + + { + _snip_tag_result *tag_result = snip_->tag_result; + _snip_result *snip_result = snip_->snip_result; + size_t last_end_offset = 0, last_last_end_offset = 0; + unsigned int unfound_cond_count = snip_->cond_len; + + *max_tagged_len = 0; + while (1) { + size_t tagged_len = 0, last_tag_end = 0; + int_least8_t all_stop = 1, found_cond = 0; + snip_result->tag_count = 0; + + while (1) { + size_t min_start_offset = (size_t) -1; + size_t max_end_offset = 0; + snip_cond *cond = NULL; + + /* get condition which have minimum offset and is not stopped */ + for (i = 0; i < snip_->cond_len; i++) { + if (snip_->cond[i].stopflag == SNIPCOND_NONSTOP && + (min_start_offset > snip_->cond[i].start_offset || + (min_start_offset == snip_->cond[i].start_offset && + max_end_offset < snip_->cond[i].end_offset))) { + min_start_offset = snip_->cond[i].start_offset; + max_end_offset = snip_->cond[i].end_offset; + cond = &snip_->cond[i]; + } + } + if (!cond) { + break; + } + /* check whether condtion is the first condition in snippet */ + if (snip_result->tag_count == 0) { + /* skip condition if the number of rest snippet field is smaller than */ + /* the number of unfound keywords. */ + if (snip_->max_results - *nresults <= unfound_cond_count && cond->count > 0) { + int_least8_t exclude_other_cond = 1; + for (i = 0; i < snip_->cond_len; i++) { + if ((snip_->cond + i) != cond + && snip_->cond[i].end_offset <= cond->start_offset + snip_->width + && snip_->cond[i].count == 0) { + exclude_other_cond = 0; + } + } + if (exclude_other_cond) { + grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags); + continue; + } + } + snip_result->start_offset = cond->start_offset; + snip_result->first_tag_result_idx = snip_->tag_count; + } else { + if (cond->start_offset >= snip_result->start_offset + snip_->width) { + break; + } + /* check nesting to make valid HTML */ + /* ToDo: allow <test><te>te</te><st>st</st></test> */ + if (cond->start_offset < last_tag_end) { + grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags); + continue; + } + } + if (cond->end_offset > snip_result->start_offset + snip_->width) { + /* If a keyword gets across a snippet, */ + /* it was skipped and never to be tagged. */ + cond->stopflag = SNIPCOND_ACROSS; + grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags); + } else { + found_cond = 1; + if (cond->count == 0) { + unfound_cond_count--; + } + cond->count++; + last_end_offset = cond->end_offset; + + tag_result->cond = cond; + tag_result->start_offset = cond->start_offset; + tag_result->end_offset = last_tag_end = cond->end_offset; + + snip_result->tag_count++; + tag_result++; + tagged_len += cond->opentag_len + cond->closetag_len; + if (++snip_->tag_count >= MAX_SNIP_TAG_COUNT) { + break; + } + grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags); + } + } + if (!found_cond) { + break; + } + if (snip_result->start_offset + last_end_offset < snip_->width) { + snip_result->start_offset = 0; + } else { + snip_result->start_offset = + MAX(MIN + ((snip_result->start_offset + last_end_offset - snip_->width) / 2, + string_len - snip_->width), last_last_end_offset); + } + snip_result->start_offset = + grn_snip_find_firstbyte(string, snip_->encoding, snip_result->start_offset, 1); + + snip_result->end_offset = snip_result->start_offset + snip_->width; + if (snip_result->end_offset < string_len) { + snip_result->end_offset = + grn_snip_find_firstbyte(string, snip_->encoding, snip_result->end_offset, -1); + } else { + snip_result->end_offset = string_len; + } + last_last_end_offset = snip_result->end_offset; + + if (snip_->mapping == (grn_snip_mapping *) -1) { + tagged_len += + count_mapped_chars(&string[snip_result->start_offset], + &string[snip_result->end_offset]) + 1; + } else { + tagged_len += snip_result->end_offset - snip_result->start_offset + 1; + } + + *max_tagged_len = MAX(*max_tagged_len, tagged_len); + + snip_result->last_tag_result_idx = snip_->tag_count - 1; + (*nresults)++; + snip_result++; + + if (*nresults == snip_->max_results || snip_->tag_count == MAX_SNIP_TAG_COUNT) { + break; + } + for (i = 0; i < snip_->cond_len; i++) { + if (snip_->cond[i].stopflag != SNIPCOND_STOP) { + all_stop = 0; + snip_->cond[i].stopflag = SNIPCOND_NONSTOP; + } + } + if (all_stop) { + break; + } + } + } + snip_->snip_count = *nresults; + snip_->string = string; + + snip_->max_tagged_len = *max_tagged_len; + + GRN_API_RETURN(ctx->rc); +} + +grn_rc +grn_snip_get_result(grn_ctx *ctx, grn_obj *snip, const unsigned int index, char *result, unsigned int *result_len) +{ + char *p; + size_t i, j, k; + _snip_result *sres; + grn_snip *snip_; + + snip_ = (grn_snip *)snip; + if (snip_->snip_count <= index || !snip_->nstr) { + return GRN_INVALID_ARGUMENT; + } + + GRN_ASSERT(snip_->snip_count != 0 && snip_->tag_count != 0); + + GRN_API_ENTER; + sres = &snip_->snip_result[index]; + j = sres->first_tag_result_idx; + for (p = result, i = sres->start_offset; i < sres->end_offset; i++) { + for (; j <= sres->last_tag_result_idx && snip_->tag_result[j].start_offset == i; j++) { + if (snip_->tag_result[j].end_offset > sres->end_offset) { + continue; + } + grn_memcpy(p, + snip_->tag_result[j].cond->opentag, + snip_->tag_result[j].cond->opentag_len); + p += snip_->tag_result[j].cond->opentag_len; + } + + if (snip_->mapping == GRN_SNIP_MAPPING_HTML_ESCAPE) { + switch (snip_->string[i]) { + case '<': + *p++ = '&'; + *p++ = 'l'; + *p++ = 't'; + *p++ = ';'; + break; + case '>': + *p++ = '&'; + *p++ = 'g'; + *p++ = 't'; + *p++ = ';'; + break; + case '&': + *p++ = '&'; + *p++ = 'a'; + *p++ = 'm'; + *p++ = 'p'; + *p++ = ';'; + break; + case '"': + *p++ = '&'; + *p++ = 'q'; + *p++ = 'u'; + *p++ = 'o'; + *p++ = 't'; + *p++ = ';'; + break; + default: + *p++ = snip_->string[i]; + break; + } + } else { + *p++ = snip_->string[i]; + } + + for (k = sres->last_tag_result_idx; + snip_->tag_result[k].end_offset <= sres->end_offset; k--) { + /* TODO: avoid all loop */ + if (snip_->tag_result[k].end_offset == i + 1) { + grn_memcpy(p, + snip_->tag_result[k].cond->closetag, + snip_->tag_result[k].cond->closetag_len); + p += snip_->tag_result[k].cond->closetag_len; + } + if (k <= sres->first_tag_result_idx) { + break; + } + }; + } + *p = '\0'; + + if(result_len) { *result_len = (unsigned int)(p - result); } + GRN_ASSERT((unsigned int)(p - result) <= snip_->max_tagged_len); + + GRN_API_RETURN(ctx->rc); +} |