From a175314c3e5827eb193872241446f2f8f5c9d33c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 20:07:14 +0200 Subject: Adding upstream version 1:10.5.12. Signed-off-by: Daniel Baumann --- storage/mroonga/vendor/groonga/lib/pat.c | 3674 ++++++++++++++++++++++++++++++ 1 file changed, 3674 insertions(+) create mode 100644 storage/mroonga/vendor/groonga/lib/pat.c (limited to 'storage/mroonga/vendor/groonga/lib/pat.c') diff --git a/storage/mroonga/vendor/groonga/lib/pat.c b/storage/mroonga/vendor/groonga/lib/pat.c new file mode 100644 index 00000000..01f6108f --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/pat.c @@ -0,0 +1,3674 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ +#include "grn.h" +#include +#include +#include "grn_pat.h" +#include "grn_output.h" +#include "grn_util.h" +#include "grn_normalizer.h" + +#define GRN_PAT_DELETED (GRN_ID_MAX + 1) + +#define GRN_PAT_SEGMENT_SIZE 0x400000 +#define W_OF_KEY_IN_A_SEGMENT 22 +#define W_OF_PAT_IN_A_SEGMENT 18 +#define W_OF_SIS_IN_A_SEGMENT 19 +#define KEY_MASK_IN_A_SEGMENT 0x3fffff +#define PAT_MASK_IN_A_SEGMENT 0x3ffff +#define SIS_MASK_IN_A_SEGMENT 0x7ffff +#define SEG_NOT_ASSIGNED 0xffff +#define GRN_PAT_MAX_SEGMENT 0x1000 +#define GRN_PAT_MDELINFOS (GRN_PAT_NDELINFOS - 1) + +#define GRN_PAT_BIN_KEY 0x70000 + +typedef struct { + grn_id lr[2]; + /* + lr[0]: the left node. + lr[1]: the right node. + + The left node has 0 at the nth bit at the nth byte. + The right node has 1 at the nth bit at the nth byte. + 'check' value indicate 'at the nth bit at the nth byte'. + + The both available nodes has larger check value rather + than the current node. + + The first node (PAT_AT(pat, GRN_ID_NIL, node)) has only + the right node and the node is the start point. + */ + uint32_t key; + /* + PAT_IMD(node) == 0: key bytes offset in memory map. + PAT_IMD(node) == 1: the key bytes. + */ + uint16_t check; + /* + nth byte: 12, nth bit: 3, terminated: 1 + + nth byte is different in key bytes: (check >> 4): max == 4095 + the left most byte is the 0th byte and the right most byte is the 11th byte. + + nth bit is different in nth byte: ((check >> 1) & 0b111) + the left most bit is the 0th bit and the right most bit is the 7th bit. + + terminated: (check & 0b1) + terminated == 1: key is terminated. + */ + uint16_t bits; + /* length: 13, immediate: 1, deleting: 1 */ +} pat_node; + +#define PAT_DELETING (1<<1) +#define PAT_IMMEDIATE (1<<2) + +#define PAT_DEL(x) ((x)->bits & PAT_DELETING) +#define PAT_IMD(x) ((x)->bits & PAT_IMMEDIATE) +#define PAT_LEN(x) (((x)->bits >> 3) + 1) +#define PAT_CHK(x) ((x)->check) +#define PAT_DEL_ON(x) ((x)->bits |= PAT_DELETING) +#define PAT_IMD_ON(x) ((x)->bits |= PAT_IMMEDIATE) +#define PAT_DEL_OFF(x) ((x)->bits &= ~PAT_DELETING) +#define PAT_IMD_OFF(x) ((x)->bits &= ~PAT_IMMEDIATE) +#define PAT_LEN_SET(x,v) ((x)->bits = ((x)->bits & ((1<<3) - 1))|(((v) - 1) << 3)) +#define PAT_CHK_SET(x,v) ((x)->check = (v)) + +typedef struct { + grn_id children; + grn_id sibling; +} sis_node; + +enum { + segment_key = 0, + segment_pat = 1, + segment_sis = 2 +}; + +void grn_p_pat_node(grn_ctx *ctx, grn_pat *pat, pat_node *node); + +/* error utilities */ +inline static int +grn_pat_name(grn_ctx *ctx, grn_pat *pat, char *buffer, int buffer_size) +{ + int name_size; + + if (DB_OBJ(pat)->id == GRN_ID_NIL) { + grn_strcpy(buffer, buffer_size, "(anonymous)"); + name_size = strlen(buffer); + } else { + name_size = grn_obj_name(ctx, (grn_obj *)pat, buffer, buffer_size); + } + + return name_size; +} + +/* bit operation */ + +#define nth_bit(key,n,l) ((((key)[(n)>>4]) >> (7 - (((n)>>1) & 7))) & 1) + +/* segment operation */ + +/* patricia array operation */ + +#define PAT_AT(pat,id,n) do {\ + int flags = 0;\ + GRN_IO_ARRAY_AT(pat->io, segment_pat, id, &flags, n);\ +} while (0) + +inline static pat_node * +pat_get(grn_ctx *ctx, grn_pat *pat, grn_id id) +{ + pat_node *res; + int flags = GRN_TABLE_ADD; + if (id > GRN_ID_MAX) { return NULL; } + GRN_IO_ARRAY_AT(pat->io, segment_pat, id, &flags, res); + return res; +} + +/* sis operation */ + +inline static sis_node * +sis_at(grn_ctx *ctx, grn_pat *pat, grn_id id) +{ + sis_node *res; + int flags = 0; + if (id > GRN_ID_MAX) { return NULL; } + GRN_IO_ARRAY_AT(pat->io, segment_sis, id, &flags, res); + return res; +} + +inline static sis_node * +sis_get(grn_ctx *ctx, grn_pat *pat, grn_id id) +{ + sis_node *res; + int flags = GRN_TABLE_ADD; + if (id > GRN_ID_MAX) { return NULL; } + GRN_IO_ARRAY_AT(pat->io, segment_sis, id, &flags, res); + return res; +} + +#define MAX_LEVEL 16 + +static void +sis_collect(grn_ctx *ctx, grn_pat *pat, grn_hash *h, grn_id id, uint32_t level) +{ + uint32_t *offset; + sis_node *sl = sis_at(ctx, pat, id); + if (sl) { + grn_id sid = sl->children; + while (sid && sid != id) { + if (grn_hash_add(ctx, h, &sid, sizeof(grn_id), (void **) &offset, NULL)) { + *offset = level; + if (level < MAX_LEVEL) { sis_collect(ctx, pat, h, sid, level + 1); } + if (!(sl = sis_at(ctx, pat, sid))) { break; } + sid = sl->sibling; + } else { + /* todo : must be handled */ + } + } + } +} + +/* key operation */ + +#define KEY_AT(pat,pos,ptr,addp) do {\ + int flags = addp;\ + GRN_IO_ARRAY_AT(pat->io, segment_key, pos, &flags, ptr);\ +} while (0) + +inline static uint32_t +key_put(grn_ctx *ctx, grn_pat *pat, const uint8_t *key, uint32_t len) +{ + uint32_t res, ts; +// if (len >= GRN_PAT_SEGMENT_SIZE) { return 0; /* error */ } + res = pat->header->curr_key; + if (res < GRN_PAT_MAX_TOTAL_KEY_SIZE && + len > GRN_PAT_MAX_TOTAL_KEY_SIZE - res) { + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size; + name_size = grn_pat_name(ctx, pat, name, GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_NOT_ENOUGH_SPACE, + "[pat][key][put] total key size is over: <%.*s>: " + "max=%u: current=%u: new key size=%u", + name_size, name, + GRN_PAT_MAX_TOTAL_KEY_SIZE, + res, + len); + return 0; + } + + ts = (res + len) >> W_OF_KEY_IN_A_SEGMENT; + if (res >> W_OF_KEY_IN_A_SEGMENT != ts) { + res = pat->header->curr_key = ts << W_OF_KEY_IN_A_SEGMENT; + } + { + uint8_t *dest; + KEY_AT(pat, res, dest, GRN_TABLE_ADD); + if (!dest) { + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size; + name_size = grn_pat_name(ctx, pat, name, GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_NO_MEMORY_AVAILABLE, + "[pat][key][put] failed to allocate memory for new key: <%.*s>: " + "new offset:%u key size:%u", + name_size, name, + res, + len); + return 0; + } + grn_memcpy(dest, key, len); + } + pat->header->curr_key += len; + return res; +} + +inline static uint8_t * +pat_node_get_key(grn_ctx *ctx, grn_pat *pat, pat_node *n) +{ + if (PAT_IMD(n)) { + return (uint8_t *) &n->key; + } else { + uint8_t *res; + KEY_AT(pat, n->key, res, 0); + return res; + } +} + +inline static grn_rc +pat_node_set_key(grn_ctx *ctx, grn_pat *pat, pat_node *n, const uint8_t *key, uint32_t len) +{ + grn_rc rc; + if (!key || !len) { return GRN_INVALID_ARGUMENT; } + PAT_LEN_SET(n, len); + if (len <= sizeof(uint32_t)) { + PAT_IMD_ON(n); + grn_memcpy(&n->key, key, len); + rc = GRN_SUCCESS; + } else { + PAT_IMD_OFF(n); + n->key = key_put(ctx, pat, key, len); + rc = ctx->rc; + } + return rc; +} + +/* delinfo operation */ + +enum { + /* The delinfo is currently not used. */ + DL_EMPTY = 0, + /* + * stat->d refers to a deleting node (in a tree). + * The deletion requires an additional operation. + */ + DL_PHASE1, + /* + * stat->d refers to a deleted node (not in a tree). + * The node is pending for safety. + */ + DL_PHASE2 +}; + +inline static grn_pat_delinfo * +delinfo_search(grn_pat *pat, grn_id id) +{ + int i; + grn_pat_delinfo *di; + for (i = (pat->header->curr_del2) & GRN_PAT_MDELINFOS; + i != pat->header->curr_del; + i = (i + 1) & GRN_PAT_MDELINFOS) { + di = &pat->header->delinfos[i]; + if (di->stat != DL_PHASE1) { continue; } + if (di->ld == id) { return di; } + if (di->d == id) { return di; } + } + return NULL; +} + +inline static grn_rc +delinfo_turn_2(grn_ctx *ctx, grn_pat *pat, grn_pat_delinfo *di) +{ + grn_id d, *p = NULL; + pat_node *ln, *dn; + // grn_log("delinfo_turn_2> di->d=%d di->ld=%d stat=%d", di->d, di->ld, di->stat); + if (di->stat != DL_PHASE1) { + return GRN_SUCCESS; + } + PAT_AT(pat, di->ld, ln); + if (!ln) { + return GRN_INVALID_ARGUMENT; + } + d = di->d; + if (!d) { + return GRN_INVALID_ARGUMENT; + } + PAT_AT(pat, d, dn); + if (!dn) { + return GRN_INVALID_ARGUMENT; + } + PAT_DEL_OFF(ln); + PAT_DEL_OFF(dn); + { + grn_id *p0; + pat_node *rn; + int c0 = -1, c; + uint32_t len = PAT_LEN(dn) * 16; + const uint8_t *key = pat_node_get_key(ctx, pat, dn); + if (!key) { + return GRN_INVALID_ARGUMENT; + } + PAT_AT(pat, 0, rn); + p0 = &rn->lr[1]; + for (;;) { + grn_id r = *p0; + if (!r) { + break; + } + if (r == d) { + p = p0; + break; + } + PAT_AT(pat, r, rn); + if (!rn) { + return GRN_FILE_CORRUPT; + } + c = PAT_CHK(rn); + if (c <= c0 || len <= c) { + break; + } + if (c & 1) { + p0 = (c + 1 < len) ? &rn->lr[1] : &rn->lr[0]; + } else { + p0 = &rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + c0 = c; + } + } + if (p) { + PAT_CHK_SET(ln, PAT_CHK(dn)); + ln->lr[1] = dn->lr[1]; + ln->lr[0] = dn->lr[0]; + *p = di->ld; + } else { + /* debug */ + int j; + grn_id dd; + grn_pat_delinfo *ddi; + GRN_LOG(ctx, GRN_LOG_DEBUG, "failed to find d=%d", d); + for (j = (pat->header->curr_del2 + 1) & GRN_PAT_MDELINFOS; + j != pat->header->curr_del; + j = (j + 1) & GRN_PAT_MDELINFOS) { + ddi = &pat->header->delinfos[j]; + if (ddi->stat != DL_PHASE1) { continue; } + PAT_AT(pat, ddi->ld, ln); + if (!ln) { continue; } + if (!(dd = ddi->d)) { continue; } + if (d == ddi->ld) { + GRN_LOG(ctx, GRN_LOG_DEBUG, "found!!!, d(%d) become ld of (%d)", d, dd); + } + } + /* debug */ + } + di->stat = DL_PHASE2; + di->d = d; + // grn_log("delinfo_turn_2< di->d=%d di->ld=%d", di->d, di->ld); + return GRN_SUCCESS; +} + +inline static grn_rc +delinfo_turn_3(grn_ctx *ctx, grn_pat *pat, grn_pat_delinfo *di) +{ + pat_node *dn; + uint32_t size; + if (di->stat != DL_PHASE2) { return GRN_SUCCESS; } + PAT_AT(pat, di->d, dn); + if (!dn) { return GRN_INVALID_ARGUMENT; } + if (di->shared) { + PAT_IMD_ON(dn); + size = 0; + } else { + if (PAT_IMD(dn)) { + size = 0; + } else { + size = PAT_LEN(dn); + } + } + di->stat = DL_EMPTY; + // dn->lr[1] = GRN_PAT_DELETED; + dn->lr[0] = pat->header->garbages[size]; + pat->header->garbages[size] = di->d; + return GRN_SUCCESS; +} + +inline static grn_pat_delinfo * +delinfo_new(grn_ctx *ctx, grn_pat *pat) +{ + grn_pat_delinfo *res = &pat->header->delinfos[pat->header->curr_del]; + uint32_t n = (pat->header->curr_del + 1) & GRN_PAT_MDELINFOS; + int gap = ((n + GRN_PAT_NDELINFOS - pat->header->curr_del2) & GRN_PAT_MDELINFOS) + - (GRN_PAT_NDELINFOS / 2); + while (gap-- > 0) { + if (delinfo_turn_2(ctx, pat, &pat->header->delinfos[pat->header->curr_del2])) { + GRN_LOG(ctx, GRN_LOG_CRIT, "d2 failed: %d", pat->header->delinfos[pat->header->curr_del2].ld); + } + pat->header->curr_del2 = (pat->header->curr_del2 + 1) & GRN_PAT_MDELINFOS; + } + if (n == pat->header->curr_del3) { + if (delinfo_turn_3(ctx, pat, &pat->header->delinfos[pat->header->curr_del3])) { + GRN_LOG(ctx, GRN_LOG_CRIT, "d3 failed: %d", pat->header->delinfos[pat->header->curr_del3].ld); + } + pat->header->curr_del3 = (pat->header->curr_del3 + 1) & GRN_PAT_MDELINFOS; + } + pat->header->curr_del = n; + return res; +} + +/* pat operation */ + +inline static grn_pat * +_grn_pat_create(grn_ctx *ctx, grn_pat *pat, + const char *path, uint32_t key_size, + uint32_t value_size, uint32_t flags) { + grn_io *io; + pat_node *node0; + struct grn_pat_header *header; + uint32_t entry_size, w_of_element; + grn_encoding encoding = ctx->encoding; + if (flags & GRN_OBJ_KEY_WITH_SIS) { + entry_size = sizeof(sis_node) + value_size; + } else { + entry_size = value_size; + } + for (w_of_element = 0; (1 << w_of_element) < entry_size; w_of_element++) { + /* nop */ + } + { + grn_io_array_spec array_spec[3]; + array_spec[segment_key].w_of_element = 0; + array_spec[segment_key].max_n_segments = 0x400; + array_spec[segment_pat].w_of_element = 4; + array_spec[segment_pat].max_n_segments = 1 << (30 - (22 - 4)); + array_spec[segment_sis].w_of_element = w_of_element; + array_spec[segment_sis].max_n_segments = 1 << (30 - (22 - w_of_element)); + io = grn_io_create_with_array(ctx, path, sizeof(struct grn_pat_header), + GRN_PAT_SEGMENT_SIZE, grn_io_auto, 3, array_spec); + } + if (!io) { return NULL; } + if (encoding == GRN_ENC_DEFAULT) { encoding = grn_gctx.encoding; } + header = grn_io_header(io); + grn_io_set_type(io, GRN_TABLE_PAT_KEY); + header->flags = flags; + header->encoding = encoding; + header->key_size = key_size; + header->value_size = value_size; + header->n_entries = 0; + header->curr_rec = 0; + header->curr_key = 0; + header->curr_del = 0; + header->curr_del2 = 0; + header->curr_del3 = 0; + header->n_garbages = 0; + header->tokenizer = GRN_ID_NIL; + if (header->flags & GRN_OBJ_KEY_NORMALIZE) { + header->flags &= ~GRN_OBJ_KEY_NORMALIZE; + pat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); + header->normalizer = grn_obj_id(ctx, pat->normalizer); + } else { + pat->normalizer = NULL; + header->normalizer = GRN_ID_NIL; + } + header->truncated = GRN_FALSE; + GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + pat->io = io; + pat->header = header; + pat->key_size = key_size; + pat->value_size = value_size; + pat->tokenizer = NULL; + pat->encoding = encoding; + pat->obj.header.flags = header->flags; + if (!(node0 = pat_get(ctx, pat, 0))) { + grn_io_close(ctx, io); + return NULL; + } + node0->lr[1] = 0; + node0->lr[0] = 0; + node0->key = 0; + return pat; +} + +grn_pat * +grn_pat_create(grn_ctx *ctx, const char *path, uint32_t key_size, + uint32_t value_size, uint32_t flags) +{ + grn_pat *pat; + if (!(pat = GRN_CALLOC(sizeof(grn_pat)))) { + return NULL; + } + GRN_DB_OBJ_SET_TYPE(pat, GRN_TABLE_PAT_KEY); + if (!_grn_pat_create(ctx, pat, path, key_size, value_size, flags)) { + GRN_FREE(pat); + return NULL; + } + pat->cache = NULL; + pat->cache_size = 0; + pat->is_dirty = GRN_FALSE; + CRITICAL_SECTION_INIT(pat->lock); + return pat; +} + +/* + grn_pat_cache_enable() and grn_pat_cache_disable() are not thread-safe. + So far, they can be used only from single threaded programs. + */ + +grn_rc +grn_pat_cache_enable(grn_ctx *ctx, grn_pat *pat, uint32_t cache_size) +{ + if (pat->cache || pat->cache_size) { + ERR(GRN_INVALID_ARGUMENT, "cache is already enabled"); + return ctx->rc; + } + if (cache_size & (cache_size - 1)) { + ERR(GRN_INVALID_ARGUMENT, "cache_size(%u) must be a power of two", cache_size); + return ctx->rc; + } + if (!(pat->cache = GRN_CALLOC(cache_size * sizeof(grn_id)))) { + return ctx->rc; + } + pat->cache_size = cache_size; + return GRN_SUCCESS; +} + +void +grn_pat_cache_disable(grn_ctx *ctx, grn_pat *pat) +{ + if (pat->cache) { + GRN_FREE(pat->cache); + pat->cache_size = 0; + pat->cache = NULL; + } +} + +grn_pat * +grn_pat_open(grn_ctx *ctx, const char *path) +{ + grn_io *io; + grn_pat *pat; + pat_node *node0; + struct grn_pat_header *header; + uint32_t io_type; + io = grn_io_open(ctx, path, grn_io_auto); + if (!io) { return NULL; } + header = grn_io_header(io); + io_type = grn_io_get_type(io); + if (io_type != GRN_TABLE_PAT_KEY) { + ERR(GRN_INVALID_FORMAT, "[table][pat] file type must be %#04x: <%#04x>", + GRN_TABLE_PAT_KEY, io_type); + grn_io_close(ctx, io); + return NULL; + } + if (!(pat = GRN_MALLOC(sizeof(grn_pat)))) { + grn_io_close(ctx, io); + return NULL; + } + GRN_DB_OBJ_SET_TYPE(pat, GRN_TABLE_PAT_KEY); + pat->io = io; + pat->header = header; + pat->key_size = header->key_size; + pat->value_size = header->value_size; + pat->encoding = header->encoding; + pat->tokenizer = grn_ctx_at(ctx, header->tokenizer); + if (header->flags & GRN_OBJ_KEY_NORMALIZE) { + header->flags &= ~GRN_OBJ_KEY_NORMALIZE; + pat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); + header->normalizer = grn_obj_id(ctx, pat->normalizer); + } else { + pat->normalizer = grn_ctx_at(ctx, header->normalizer); + } + GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + pat->obj.header.flags = header->flags; + PAT_AT(pat, 0, node0); + if (!node0) { + grn_io_close(ctx, io); + GRN_FREE(pat); + return NULL; + } + pat->cache = NULL; + pat->cache_size = 0; + pat->is_dirty = GRN_FALSE; + CRITICAL_SECTION_INIT(pat->lock); + return pat; +} + +/* + * grn_pat_error_if_truncated() logs an error and returns its error code if + * a pat is truncated by another process. + * Otherwise, this function returns GRN_SUCCESS. + * Note that `ctx` and `pat` must be valid. + * + * FIXME: A pat should be reopened if possible. + */ +static grn_rc +grn_pat_error_if_truncated(grn_ctx *ctx, grn_pat *pat) +{ + if (pat->header->truncated) { + ERR(GRN_FILE_CORRUPT, + "pat is truncated, please unmap or reopen the database"); + return GRN_FILE_CORRUPT; + } + return GRN_SUCCESS; +} + +grn_rc +grn_pat_close(grn_ctx *ctx, grn_pat *pat) +{ + grn_rc rc; + + CRITICAL_SECTION_FIN(pat->lock); + + if (pat->is_dirty) { + uint32_t n_dirty_opens; + GRN_ATOMIC_ADD_EX(&(pat->header->n_dirty_opens), -1, n_dirty_opens); + } + + if ((rc = grn_io_close(ctx, pat->io))) { + ERR(rc, "grn_io_close failed"); + } else { + grn_pvector_fin(ctx, &pat->token_filters); + if (pat->cache) { grn_pat_cache_disable(ctx, pat); } + GRN_FREE(pat); + } + + return rc; +} + +grn_rc +grn_pat_remove(grn_ctx *ctx, const char *path) +{ + if (!path) { + ERR(GRN_INVALID_ARGUMENT, "path is null"); + return GRN_INVALID_ARGUMENT; + } + return grn_io_remove(ctx, path); +} + +grn_rc +grn_pat_truncate(grn_ctx *ctx, grn_pat *pat) +{ + grn_rc rc; + const char *io_path; + char *path; + uint32_t key_size, value_size, flags; + + rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + if ((io_path = grn_io_path(pat->io)) && *io_path != '\0') { + if (!(path = GRN_STRDUP(io_path))) { + ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%s>", io_path); + return GRN_NO_MEMORY_AVAILABLE; + } + } else { + path = NULL; + } + key_size = pat->key_size; + value_size = pat->value_size; + flags = pat->obj.header.flags; + if (path) { + pat->header->truncated = GRN_TRUE; + } + if ((rc = grn_io_close(ctx, pat->io))) { goto exit; } + grn_pvector_fin(ctx, &pat->token_filters); + pat->io = NULL; + if (path && (rc = grn_io_remove(ctx, path))) { goto exit; } + if (!_grn_pat_create(ctx, pat, path, key_size, value_size, flags)) { + rc = GRN_UNKNOWN_ERROR; + } + if (pat->cache && pat->cache_size) { + memset(pat->cache, 0, pat->cache_size * sizeof(grn_id)); + } +exit: + if (path) { GRN_FREE(path); } + return rc; +} + +inline static grn_id +_grn_pat_add(grn_ctx *ctx, grn_pat *pat, const uint8_t *key, uint32_t size, uint32_t *new, uint32_t *lkey) +{ + grn_id r, r0, *p0, *p1 = NULL; + pat_node *rn, *rn0; + int c, c0 = -1, c1 = -1, len; + uint32_t cache_id = 0; + + *new = 0; + if (pat->cache) { + const uint8_t *p = key; + uint32_t length = size; + for (cache_id = 0; length--; p++) { cache_id = (cache_id * 37) + *p; } + cache_id &= (pat->cache_size - 1); + if (pat->cache[cache_id]) { + PAT_AT(pat, pat->cache[cache_id], rn); + if (rn) { + const uint8_t *k = pat_node_get_key(ctx, pat, rn); + if (k && size == PAT_LEN(rn) && !memcmp(k, key, size)) { + return pat->cache[cache_id]; + } + } + } + } + + len = (int)size * 16; + PAT_AT(pat, 0, rn0); + p0 = &rn0->lr[1]; + if (*p0) { + uint32_t size2; + int xor, mask; + const uint8_t *s, *d; + for (;;) { + if (!(r0 = *p0)) { + if (!(s = pat_node_get_key(ctx, pat, rn0))) { return GRN_ID_NIL; } + size2 = PAT_LEN(rn0); + break; + } + PAT_AT(pat, r0, rn0); + if (!rn0) { return GRN_ID_NIL; } + if (c0 < rn0->check && rn0->check < len) { + c1 = c0; c0 = rn0->check; + p1 = p0; + if (c0 & 1) { + p0 = (c0 + 1 < len) ? &rn0->lr[1] : &rn0->lr[0]; + } else { + p0 = &rn0->lr[nth_bit(key, c0, len)]; + } + } else { + if (!(s = pat_node_get_key(ctx, pat, rn0))) { return GRN_ID_NIL; } + size2 = PAT_LEN(rn0); + if (size == size2 && !memcmp(s, key, size)) { + if (pat->cache) { pat->cache[cache_id] = r0; } + return r0; + } + break; + } + } + { + uint32_t min = size > size2 ? size2 : size; + for (c = 0, d = key; min && *s == *d; c += 16, s++, d++, min--); + if (min) { + for (xor = *s ^ *d, mask = 0x80; !(xor & mask); mask >>= 1, c += 2); + } else { + c--; + } + } + if (c == c0 && !*p0) { + if (c < len - 2) { c += 2; } + } else { + if (c < c0) { + if (c > c1) { + p0 = p1; + } else { + PAT_AT(pat, 0, rn0); + p0 = &rn0->lr[1]; + while ((r0 = *p0)) { + PAT_AT(pat, r0, rn0); + if (!rn0) { return GRN_ID_NIL; } + c0 = PAT_CHK(rn0); + if (c < c0) { break; } + if (c0 & 1) { + p0 = (c0 + 1 < len) ? &rn0->lr[1] : &rn0->lr[0]; + } else { + p0 = &rn0->lr[nth_bit(key, c0, len)]; + } + } + } + } + } + if (c >= len) { return GRN_ID_NIL; } + } else { + c = len - 2; + } + { + uint32_t size2 = size > sizeof(uint32_t) ? size : 0; + if (*lkey && size2) { + if (pat->header->garbages[0]) { + r = pat->header->garbages[0]; + PAT_AT(pat, r, rn); + if (!rn) { return GRN_ID_NIL; } + pat->header->n_entries++; + pat->header->n_garbages--; + pat->header->garbages[0] = rn->lr[0]; + } else { + r = pat->header->curr_rec + 1; + rn = pat_get(ctx, pat, r); + if (!rn) { return GRN_ID_NIL; } + pat->header->curr_rec = r; + pat->header->n_entries++; + } + PAT_IMD_OFF(rn); + PAT_LEN_SET(rn, size); + rn->key = *lkey; + } else { + if (pat->header->garbages[size2]) { + uint8_t *keybuf; + r = pat->header->garbages[size2]; + PAT_AT(pat, r, rn); + if (!rn) { return GRN_ID_NIL; } + if (!(keybuf = pat_node_get_key(ctx, pat, rn))) { return GRN_ID_NIL; } + pat->header->n_entries++; + pat->header->n_garbages--; + pat->header->garbages[size2] = rn->lr[0]; + PAT_LEN_SET(rn, size); + grn_memcpy(keybuf, key, size); + } else { + r = pat->header->curr_rec + 1; + rn = pat_get(ctx, pat, r); + if (!rn) { return GRN_ID_NIL; } + if (pat_node_set_key(ctx, pat, rn, key, size)) { return GRN_ID_NIL; } + pat->header->curr_rec = r; + pat->header->n_entries++; + } + *lkey = rn->key; + } + } + PAT_CHK_SET(rn, c); + PAT_DEL_OFF(rn); + if ((c & 1) ? (c + 1 < len) : nth_bit(key, c, len)) { + rn->lr[1] = r; + rn->lr[0] = *p0; + } else { + rn->lr[1] = *p0; + rn->lr[0] = r; + } + // smp_wmb(); + *p0 = r; + *new = 1; + if (pat->cache) { pat->cache[cache_id] = r; } + return r; +} + +inline static grn_bool +chop(grn_ctx *ctx, grn_pat *pat, const char **key, const char *end, uint32_t *lkey) +{ + size_t len = grn_charlen(ctx, *key, end); + if (len) { + *lkey += len; + *key += len; + return (end - *key) > 0; + } else { + return GRN_FALSE; + } +} + +#define MAX_FIXED_KEY_SIZE (sizeof(int64_t)) + +#define KEY_NEEDS_CONVERT(pat,size) \ + (!((pat)->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) && (size) <= MAX_FIXED_KEY_SIZE) + +#define KEY_ENC(pat,keybuf,key,size) do {\ + switch ((pat)->obj.header.flags & GRN_OBJ_KEY_MASK) {\ + case GRN_OBJ_KEY_UINT :\ + if (((pat)->obj.header.domain != GRN_DB_TOKYO_GEO_POINT) &&\ + ((pat)->obj.header.domain != GRN_DB_WGS84_GEO_POINT)) {\ + grn_hton((keybuf), (key), (size));\ + break;\ + }\ + case GRN_OBJ_KEY_GEO_POINT :\ + grn_gton((keybuf), (key), (size));\ + break;\ + case GRN_OBJ_KEY_INT :\ + grn_hton((keybuf), (key), (size));\ + *((uint8_t *)(keybuf)) ^= 0x80;\ + break;\ + case GRN_OBJ_KEY_FLOAT :\ + if ((size) == sizeof(int64_t)) {\ + int64_t v = *(int64_t *)(key);\ + v ^= ((v >> 63)|(1ULL << 63));\ + grn_hton((keybuf), &v, (size));\ + }\ + break;\ + }\ +} while (0) + +#define KEY_DEC(pat,keybuf,key,size) do {\ + switch ((pat)->obj.header.flags & GRN_OBJ_KEY_MASK) {\ + case GRN_OBJ_KEY_UINT :\ + if (((pat)->obj.header.domain != GRN_DB_TOKYO_GEO_POINT) &&\ + ((pat)->obj.header.domain != GRN_DB_WGS84_GEO_POINT)) {\ + grn_ntoh((keybuf), (key), (size));\ + break;\ + }\ + case GRN_OBJ_KEY_GEO_POINT :\ + grn_ntog((keybuf), (key), (size));\ + break;\ + case GRN_OBJ_KEY_INT :\ + grn_ntohi((keybuf), (key), (size));\ + break;\ + case GRN_OBJ_KEY_FLOAT :\ + if ((size) == sizeof(int64_t)) {\ + int64_t v;\ + grn_hton(&v, (key), (size));\ + *((int64_t *)(keybuf)) = v ^ ((((int64_t)(v^(1ULL<<63)))>> 63)|(1ULL<<63)); \ + }\ + break;\ + }\ +} while (0) + +#define KEY_ENCODE(pat,keybuf,key,size) do {\ + if (KEY_NEEDS_CONVERT(pat,size)) {\ + KEY_ENC((pat), (keybuf), (key), (size));\ + (key) = (keybuf);\ + }\ +} while (0) + +grn_id +grn_pat_add(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, + void **value, int *added) +{ + uint32_t new, lkey = 0; + grn_id r0; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + if (!key || !key_size) { return GRN_ID_NIL; } + if (key_size > GRN_TABLE_MAX_KEY_SIZE) { + ERR(GRN_INVALID_ARGUMENT, "too long key: (%u)", key_size); + return GRN_ID_NIL; + } + KEY_ENCODE(pat, keybuf, key, key_size); + r0 = _grn_pat_add(ctx, pat, (uint8_t *)key, key_size, &new, &lkey); + if (r0 == GRN_ID_NIL) { return GRN_ID_NIL; } + if (added) { *added = new; } + if (r0 && (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) && + (*((uint8_t *)key) & 0x80)) { // todo: refine!! + sis_node *sl, *sr; + grn_id l = r0, r; + if (new && (sl = sis_get(ctx, pat, l))) { + const char *sis = key, *end = sis + key_size; + sl->children = l; + sl->sibling = 0; + while (chop(ctx, pat, &sis, end, &lkey)) { + if (!(*sis & 0x80)) { break; } + if (!(r = _grn_pat_add(ctx, pat, (uint8_t *)sis, end - sis, &new, &lkey))) { + break; + } + if (!(sr = sis_get(ctx, pat, r))) { break; } + if (new) { + sl->sibling = r; + sr->children = l; + sr->sibling = 0; + } else { + sl->sibling = sr->children; + sr->children = l; + break; + } + l = r; + sl = sr; + } + } + } + if (r0 && value) { + byte *v = (byte *)sis_get(ctx, pat, r0); + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + *value = v + sizeof(sis_node); + } else { + *value = v; + } + } + return r0; +} + +inline static grn_id +_grn_pat_get(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, void **value) +{ + grn_id r; + pat_node *rn; + int c0 = -1, c; + uint32_t len = key_size * 16; + PAT_AT(pat, 0, rn); + for (r = rn->lr[1]; r;) { + PAT_AT(pat, r, rn); + if (!rn) { break; /* corrupt? */ } + c = PAT_CHK(rn); + if (len <= c) { break; } + if (c <= c0) { + const uint8_t *k = pat_node_get_key(ctx, pat, rn); + if (k && key_size == PAT_LEN(rn) && !memcmp(k, key, key_size)) { + if (value) { + byte *v = (byte *)sis_get(ctx, pat, r); + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + *value = v + sizeof(sis_node); + } else { + *value = v; + } + } + return r; + } + break; + } + if (c & 1) { + r = (c + 1 < len) ? rn->lr[1] : rn->lr[0]; + } else { + r = rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + c0 = c; + } + return GRN_ID_NIL; +} + +grn_id +grn_pat_get(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, void **value) +{ + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + KEY_ENCODE(pat, keybuf, key, key_size); + return _grn_pat_get(ctx, pat, key, key_size, value); +} + +grn_id +grn_pat_nextid(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size) +{ + grn_id r = GRN_ID_NIL; + if (pat && key) { + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + if (!(r = pat->header->garbages[key_size > sizeof(uint32_t) ? key_size : 0])) { + r = pat->header->curr_rec + 1; + } + } + return r; +} + +static void +get_tc(grn_ctx *ctx, grn_pat *pat, grn_hash *h, pat_node *rn) +{ + grn_id id; + pat_node *node; + id = rn->lr[1]; + if (id) { + PAT_AT(pat, id, node); + if (node) { + if (PAT_CHK(node) > PAT_CHK(rn)) { + get_tc(ctx, pat, h, node); + } else { + grn_hash_add(ctx, h, &id, sizeof(grn_id), NULL, NULL); + } + } + } + id = rn->lr[0]; + if (id) { + PAT_AT(pat, id, node); + if (node) { + if (PAT_CHK(node) > PAT_CHK(rn)) { + get_tc(ctx, pat, h, node); + } else { + grn_hash_add(ctx, h, &id, sizeof(grn_id), NULL, NULL); + } + } + } +} + +grn_rc +grn_pat_prefix_search(grn_ctx *ctx, grn_pat *pat, + const void *key, uint32_t key_size, grn_hash *h) +{ + int c0 = -1, c; + const uint8_t *k; + uint32_t len = key_size * 16; + grn_id r; + pat_node *rn; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + grn_rc rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + KEY_ENCODE(pat, keybuf, key, key_size); + PAT_AT(pat, 0, rn); + r = rn->lr[1]; + while (r) { + PAT_AT(pat, r, rn); + if (!rn) { return GRN_FILE_CORRUPT; } + c = PAT_CHK(rn); + if (c0 < c && c < len - 1) { + if (c & 1) { + r = (c + 1 < len) ? rn->lr[1] : rn->lr[0]; + } else { + r = rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + c0 = c; + continue; + } + if (!(k = pat_node_get_key(ctx, pat, rn))) { break; } + if (PAT_LEN(rn) < key_size) { break; } + if (!memcmp(k, key, key_size)) { + if (c >= len - 1) { + get_tc(ctx, pat, h, rn); + } else { + grn_hash_add(ctx, h, &r, sizeof(grn_id), NULL, NULL); + } + return GRN_SUCCESS; + } + break; + } + return GRN_END_OF_DATA; +} + +grn_hash * +grn_pat_prefix_search2(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size) +{ + grn_hash *h; + if (!pat || !key) { return NULL; } + if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) { + if (grn_pat_prefix_search(ctx, pat, key, key_size, h)) { + grn_hash_close(ctx, h); + h = NULL; + } + } + return h; +} + +grn_rc +grn_pat_suffix_search(grn_ctx *ctx, grn_pat *pat, + const void *key, uint32_t key_size, grn_hash *h) +{ + grn_id r; + if ((r = grn_pat_get(ctx, pat, key, key_size, NULL))) { + uint32_t *offset; + if (grn_hash_add(ctx, h, &r, sizeof(grn_id), (void **) &offset, NULL)) { + *offset = 0; + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { sis_collect(ctx, pat, h, r, 1); } + return GRN_SUCCESS; + } + } + return GRN_END_OF_DATA; +} + +grn_hash * +grn_pat_suffix_search2(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size) +{ + grn_hash *h; + if (!pat || !key) { return NULL; } + if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(uint32_t), 0))) { + if (grn_pat_suffix_search(ctx, pat, key, key_size, h)) { + grn_hash_close(ctx, h); + h = NULL; + } + } + return h; +} + +grn_id +grn_pat_lcp_search(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size) +{ + pat_node *rn; + grn_id r, r2 = GRN_ID_NIL; + uint32_t len = key_size * 16; + int c0 = -1, c; + if (!pat || !key) { + return GRN_ID_NIL; + } + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + if (!(pat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE)) { return GRN_ID_NIL; } + PAT_AT(pat, 0, rn); + for (r = rn->lr[1]; r;) { + PAT_AT(pat, r, rn); + if (!rn) { break; /* corrupt? */ } + c = PAT_CHK(rn); + if (c <= c0) { + if (PAT_LEN(rn) <= key_size) { + uint8_t *p = pat_node_get_key(ctx, pat, rn); + if (!p) { break; } + if (!memcmp(p, key, PAT_LEN(rn))) { return r; } + } + break; + } + if (len <= c) { break; } + if (c & 1) { + uint8_t *p; + pat_node *rn0; + grn_id r0 = rn->lr[0]; + PAT_AT(pat, r0, rn0); + if (!rn0) { break; /* corrupt? */ } + p = pat_node_get_key(ctx, pat, rn0); + if (!p) { break; } + if (PAT_LEN(rn0) <= key_size && !memcmp(p, key, PAT_LEN(rn0))) { r2 = r0; } + r = (c + 1 < len) ? rn->lr[1] : rn->lr[0]; + } else { + r = rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + c0 = c; + } + return r2; +} + +static grn_id +common_prefix_pat_node_get(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size) +{ + int c0 = -1, c; + const uint8_t *k; + uint32_t len = key_size * 16; + grn_id r; + pat_node *rn; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + + KEY_ENCODE(pat, keybuf, key, key_size); + PAT_AT(pat, 0, rn); + r = rn->lr[1]; + while (r) { + PAT_AT(pat, r, rn); + if (!rn) { return GRN_ID_NIL; } + c = PAT_CHK(rn); + if (c0 < c && c < len - 1) { + if (c & 1) { + r = (c + 1 < len) ? rn->lr[1] : rn->lr[0]; + } else { + r = rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + c0 = c; + continue; + } + if (!(k = pat_node_get_key(ctx, pat, rn))) { break; } + if (PAT_LEN(rn) < key_size) { break; } + if (!memcmp(k, key, key_size)) { + return r; + } + break; + } + return GRN_ID_NIL; +} + +typedef struct { + grn_id id; + uint16_t distance; +} fuzzy_heap_node; + +typedef struct { + int n_entries; + int limit; + fuzzy_heap_node *nodes; +} fuzzy_heap; + +static inline fuzzy_heap * +fuzzy_heap_open(grn_ctx *ctx, int max) +{ + fuzzy_heap *h = GRN_MALLOC(sizeof(fuzzy_heap)); + if (!h) { return NULL; } + h->nodes = GRN_MALLOC(sizeof(fuzzy_heap_node) * max); + if (!h->nodes) { + GRN_FREE(h); + return NULL; + } + h->n_entries = 0; + h->limit = max; + return h; +} + +static inline grn_bool +fuzzy_heap_push(grn_ctx *ctx, fuzzy_heap *h, grn_id id, uint16_t distance) +{ + int n, n2; + fuzzy_heap_node node = {id, distance}; + fuzzy_heap_node node2; + if (h->n_entries >= h->limit) { + int max = h->limit * 2; + fuzzy_heap_node *nodes = GRN_REALLOC(h->nodes, sizeof(fuzzy_heap) * max); + if (!h) { + return GRN_FALSE; + } + h->limit = max; + h->nodes = nodes; + } + h->nodes[h->n_entries] = node; + n = h->n_entries++; + while (n) { + n2 = (n - 1) >> 1; + if (h->nodes[n2].distance <= h->nodes[n].distance) { break; } + node2 = h->nodes[n]; + h->nodes[n] = h->nodes[n2]; + h->nodes[n2] = node2; + n = n2; + } + return GRN_TRUE; +} + +static inline void +fuzzy_heap_close(grn_ctx *ctx, fuzzy_heap *h) +{ + GRN_FREE(h->nodes); + GRN_FREE(h); +} + +#define DIST(ox,oy) (dists[((lx + 1) * (oy)) + (ox)]) + +inline static uint16_t +calc_edit_distance_by_offset(grn_ctx *ctx, + const char *sx, const char *ex, + const char *sy, const char *ey, + uint16_t *dists, uint32_t lx, + uint32_t offset, uint32_t max_distance, + grn_bool *can_transition, int flags) +{ + uint32_t cx, cy, x, y; + const char *px, *py; + + /* Skip already calculated rows */ + for (py = sy, y = 1; py < ey && (cy = grn_charlen(ctx, py, ey)); py += cy, y++) { + if (py - sy >= offset) { + break; + } + } + for (; py < ey && (cy = grn_charlen(ctx, py, ey)); py += cy, y++) { + /* children nodes will be no longer smaller than max distance + * with only insertion costs. + * This is end of row on allocated memory. */ + if (y > lx + max_distance) { + *can_transition = GRN_FALSE; + return max_distance + 1; + } + + for (px = sx, x = 1; px < ex && (cx = grn_charlen(ctx, px, ex)); px += cx, x++) { + if (cx == cy && !memcmp(px, py, cx)) { + DIST(x, y) = DIST(x - 1, y - 1); + } else { + uint32_t a, b, c; + a = DIST(x - 1, y) + 1; + b = DIST(x, y - 1) + 1; + c = DIST(x - 1, y - 1) + 1; + DIST(x, y) = ((a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c)); + if (flags & GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION && + x > 1 && y > 1 && + cx == cy && + memcmp(px, py - cy, cx) == 0 && + memcmp(px - cx, py, cx) == 0) { + uint32_t t = DIST(x - 2, y - 2) + 1; + DIST(x, y) = ((DIST(x, y) < t) ? DIST(x, y) : t); + } + } + } + } + if (lx) { + /* If there is no cell which is smaller than equal to max distance on end of row, + * children nodes will be no longer smaller than max distance */ + *can_transition = GRN_FALSE; + for (x = 1; x <= lx; x++) { + if (DIST(x, y - 1) <= max_distance) { + *can_transition = GRN_TRUE; + break; + } + } + } + return DIST(lx, y - 1); +} + +typedef struct { + const char *key; + int key_length; + grn_bool can_transition; +} fuzzy_node; + +inline static void +_grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, grn_id id, + const char *key, uint32_t key_size, + uint16_t *dists, uint32_t lx, + int last_check, fuzzy_node *last_node, + uint32_t max_distance, int flags, fuzzy_heap *heap) +{ + pat_node *node = NULL; + int check, len; + const char *k; + uint32_t offset = 0; + + PAT_AT(pat, id, node); + if (!node) { + return; + } + check = PAT_CHK(node); + len = PAT_LEN(node); + k = pat_node_get_key(ctx, pat, node); + + if (check > last_check) { + if (len >= last_node->key_length && + !memcmp(k, last_node->key, last_node->key_length)) { + if (last_node->can_transition == GRN_FALSE) { + return; + } + } + _grn_pat_fuzzy_search(ctx, pat, node->lr[0], + key, key_size, dists, lx, + check, last_node, + max_distance, flags, heap); + + _grn_pat_fuzzy_search(ctx, pat, node->lr[1], + key, key_size, dists, lx, + check, last_node, + max_distance, flags, heap); + } else { + if (id) { + /* Set already calculated common prefix length */ + if (len >= last_node->key_length && + !memcmp(k, last_node->key, last_node->key_length)) { + if (last_node->can_transition == GRN_FALSE) { + return; + } + offset = last_node->key_length; + } else { + if (last_node->can_transition == GRN_FALSE) { + last_node->can_transition = GRN_TRUE; + } + if (last_node->key_length) { + const char *kp = k; + const char *ke = k + len; + const char *p = last_node->key; + const char *e = last_node->key + last_node->key_length; + int lp; + for (;p < e && kp < ke && (lp = grn_charlen(ctx, p, e)); + p += lp, kp += lp) { + if (p + lp <= e && kp + lp <= ke && memcmp(p, kp, lp)) { + break; + } + } + offset = kp - k; + } + } + if (len - offset) { + uint16_t distance; + distance = + calc_edit_distance_by_offset(ctx, + key, key + key_size, + k, k + len, + dists, lx, + offset, max_distance, + &(last_node->can_transition), flags); + if (distance <= max_distance) { + fuzzy_heap_push(ctx, heap, id, distance); + } + } + last_node->key = k; + last_node->key_length = len; + } + } + return; +} + +#define HEAP_SIZE 256 + +grn_rc +grn_pat_fuzzy_search(grn_ctx *ctx, grn_pat *pat, + const void *key, uint32_t key_size, + grn_fuzzy_search_optarg *args, grn_hash *h) +{ + pat_node *node; + grn_id id; + uint16_t *dists; + uint32_t lx, len, x, y, i; + const char *s = key; + const char *e = (const char *)key + key_size; + fuzzy_node last_node; + fuzzy_heap *heap; + uint32_t max_distance = 1; + uint32_t max_expansion = 0; + uint32_t prefix_match_size = 0; + int flags = 0; + grn_rc rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + if (args) { + max_distance = args->max_distance; + max_expansion = args->max_expansion; + prefix_match_size = args->prefix_match_size; + flags = args->flags; + } + if (key_size > GRN_TABLE_MAX_KEY_SIZE || + max_distance > GRN_TABLE_MAX_KEY_SIZE || + prefix_match_size > key_size) { + return GRN_INVALID_ARGUMENT; + } + + heap = fuzzy_heap_open(ctx, HEAP_SIZE); + if (!heap) { + return GRN_NO_MEMORY_AVAILABLE; + } + + PAT_AT(pat, GRN_ID_NIL, node); + id = node->lr[1]; + + if (prefix_match_size) { + grn_id tid; + tid = common_prefix_pat_node_get(ctx, pat, key, prefix_match_size); + if (tid != GRN_ID_NIL) { + id = tid; + } else { + return GRN_END_OF_DATA; + } + } + for (lx = 0; s < e && (len = grn_charlen(ctx, s, e)); s += len) { + lx++; + } + dists = GRN_MALLOC((lx + 1) * (lx + max_distance + 1) * sizeof(uint16_t)); + if (!dists) { + return GRN_NO_MEMORY_AVAILABLE; + } + + for (x = 0; x <= lx; x++) { DIST(x, 0) = x; } + for (y = 0; y <= lx + max_distance ; y++) { DIST(0, y) = y; } + + last_node.key = NULL; + last_node.key_length = 0; + last_node.can_transition = GRN_TRUE; + _grn_pat_fuzzy_search(ctx, pat, id, + key, key_size, dists, lx, + -1, &last_node, max_distance, flags, heap); + GRN_FREE(dists); + for (i = 0; i < heap->n_entries; i++) { + if (max_expansion > 0 && i >= max_expansion) { + break; + } + if (DB_OBJ(h)->header.flags & GRN_OBJ_WITH_SUBREC) { + grn_rset_recinfo *ri; + if (grn_hash_add(ctx, h, &(heap->nodes[i].id), sizeof(grn_id), (void **)&ri, NULL)) { + ri->score = max_distance - heap->nodes[i].distance + 1; + } + } else { + grn_hash_add(ctx, h, &(heap->nodes[i].id), sizeof(grn_id), NULL, NULL); + } + } + fuzzy_heap_close(ctx, heap); + if (grn_hash_size(ctx, h)) { + return GRN_SUCCESS; + } else { + return GRN_END_OF_DATA; + } +} + +inline static grn_rc +_grn_pat_del(grn_ctx *ctx, grn_pat *pat, const char *key, uint32_t key_size, int shared, + grn_table_delete_optarg *optarg) +{ + grn_pat_delinfo *di; + pat_node *rn, *rn0 = NULL, *rno = NULL; + int c = -1, c0 = -1, ch; + uint32_t len = key_size * 16; + grn_id r, otherside, *proot, *p, *p0 = NULL; + + /* delinfo_new() must be called before searching for rn. */ + di = delinfo_new(ctx, pat); + di->shared = shared; + + /* + * Search a patricia tree for a given key. + * If the key exists, get its output node. + * + * rn, rn0: the output node and its previous node. + * rno: the other side of rn (the other destination of rn0). + * c, c0: checks of rn0 and its previous node. + * p, p0: pointers to transitions (IDs) that refer to rn and rn0. + */ + PAT_AT(pat, 0, rn); + proot = p = &rn->lr[1]; + for (;;) { + r = *p; + if (!r) { + return GRN_INVALID_ARGUMENT; + } + PAT_AT(pat, r, rn); + if (!rn) { + return GRN_FILE_CORRUPT; + } + ch = PAT_CHK(rn); + if (len <= ch) { + return GRN_INVALID_ARGUMENT; + } + if (c >= ch) { + /* Output node found. */ + const uint8_t *k = pat_node_get_key(ctx, pat, rn); + if (!k) { + return GRN_INVALID_ARGUMENT; + } + if (key_size != PAT_LEN(rn) || memcmp(k, key, key_size)) { + return GRN_INVALID_ARGUMENT; + } + /* Given key found. */ + break; + } + c0 = c; + p0 = p; + c = ch; + if (c & 1) { + p = (c + 1 < len) ? &rn->lr[1] : &rn->lr[0]; + } else { + p = &rn->lr[nth_bit((uint8_t *)key, c, len)]; + } + rn0 = rn; + } + if (optarg && optarg->func && + !optarg->func(ctx, (grn_obj *)pat, r, optarg->func_arg)) { + return GRN_SUCCESS; + } + if (rn0->lr[0] == rn0->lr[1]) { + GRN_LOG(ctx, GRN_LOG_DEBUG, "*p0 (%d), rn0->lr[0] == rn0->lr[1] (%d)", + *p0, rn0->lr[0]); + return GRN_FILE_CORRUPT; + } + otherside = (rn0->lr[1] == r) ? rn0->lr[0] : rn0->lr[1]; + if (otherside) { + PAT_AT(pat, otherside, rno); + if (!rno) { + return GRN_FILE_CORRUPT; + } + } + + if (rn == rn0) { + /* The last transition (p) is a self-loop. */ + di->stat = DL_PHASE2; + di->d = r; + if (otherside) { + if (c0 < PAT_CHK(rno) && PAT_CHK(rno) <= c) { + /* To keep rno as an output node, its check is set to zero. */ + if (!delinfo_search(pat, otherside)) { + GRN_LOG(ctx, GRN_LOG_DEBUG, "no delinfo found %d", otherside); + } + PAT_CHK_SET(rno, 0); + } + if (proot == p0 && !rno->check) { + /* + * Update rno->lr because the first node, rno becomes the new first + * node, is not an output node even if its check is zero. + */ + const uint8_t *k = pat_node_get_key(ctx, pat, rno); + int direction = k ? (*k >> 7) : 1; + rno->lr[direction] = otherside; + rno->lr[!direction] = 0; + } + } + *p0 = otherside; + } else if ((!rn->lr[0] && rn->lr[1] == r) || + (!rn->lr[1] && rn->lr[0] == r)) { + /* The output node has only a disabled self-loop. */ + di->stat = DL_PHASE2; + di->d = r; + *p = 0; + } else { + /* The last transition (p) is not a self-loop. */ + grn_pat_delinfo *ldi = NULL, *ddi = NULL; + if (PAT_DEL(rn)) { + ldi = delinfo_search(pat, r); + } + if (PAT_DEL(rn0)) { + ddi = delinfo_search(pat, *p0); + } + if (ldi) { + PAT_DEL_OFF(rn); + di->stat = DL_PHASE2; + if (ddi) { + PAT_DEL_OFF(rn0); + ddi->stat = DL_PHASE2; + if (ddi == ldi) { + if (r != ddi->ld) { + GRN_LOG(ctx, GRN_LOG_ERROR, "r(%d) != ddi->ld(%d)", r, ddi->ld); + } + di->d = r; + } else { + ldi->ld = ddi->ld; + di->d = r; + } + } else { + PAT_DEL_ON(rn0); + ldi->ld = *p0; + di->d = r; + } + } else { + PAT_DEL_ON(rn); + if (ddi) { + if (ddi->d != *p0) { + GRN_LOG(ctx, GRN_LOG_ERROR, "ddi->d(%d) != *p0(%d)", ddi->d, *p0); + } + PAT_DEL_OFF(rn0); + ddi->stat = DL_PHASE2; + di->stat = DL_PHASE1; + di->ld = ddi->ld; + di->d = r; + /* + PAT_DEL_OFF(rn0); + ddi->d = r; + di->stat = DL_PHASE2; + di->d = *p0; + */ + } else { + PAT_DEL_ON(rn0); + di->stat = DL_PHASE1; + di->ld = *p0; + di->d = r; + // grn_log("pat_del d=%d ld=%d stat=%d", r, *p0, DL_PHASE1); + } + } + if (*p0 == otherside) { + /* The previous node (*p0) has a self-loop (rn0 == rno). */ + PAT_CHK_SET(rno, 0); + if (proot == p0) { + /* + * Update rno->lr because the first node, rno becomes the new first + * node, is not an output node even if its check is zero. + */ + const uint8_t *k = pat_node_get_key(ctx, pat, rno); + int direction = k ? (*k >> 7) : 1; + rno->lr[direction] = otherside; + rno->lr[!direction] = 0; + } + } else { + if (otherside) { + if (c0 < PAT_CHK(rno) && PAT_CHK(rno) <= c) { + /* To keep rno as an output node, its check is set to zero. */ + if (!delinfo_search(pat, otherside)) { + GRN_LOG(ctx, GRN_LOG_ERROR, "no delinfo found %d", otherside); + } + PAT_CHK_SET(rno, 0); + } + if (proot == p0 && !rno->check) { + /* + * Update rno->lr because the first node, rno becomes the new first + * node, is not an output node even if its check is zero. + */ + const uint8_t *k = pat_node_get_key(ctx, pat, rno); + int direction = k ? (*k >> 7) : 1; + rno->lr[direction] = otherside; + rno->lr[!direction] = 0; + } + } + *p0 = otherside; + } + } + pat->header->n_entries--; + pat->header->n_garbages++; + return GRN_SUCCESS; +} + +static grn_rc +_grn_pat_delete(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, + grn_table_delete_optarg *optarg) +{ + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + grn_id id = grn_pat_get(ctx, pat, key, key_size, NULL); + if (id && grn_pat_delete_with_sis(ctx, pat, id, optarg)) { + return GRN_SUCCESS; + } + return GRN_INVALID_ARGUMENT; + } + return _grn_pat_del(ctx, pat, key, key_size, 0, optarg); +} + +grn_rc +grn_pat_delete(grn_ctx *ctx, grn_pat *pat, const void *key, uint32_t key_size, + grn_table_delete_optarg *optarg) +{ + grn_rc rc; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + if (!pat || !key || !key_size) { return GRN_INVALID_ARGUMENT; } + rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + KEY_ENCODE(pat, keybuf, key, key_size); + return _grn_pat_delete(ctx, pat, key, key_size, optarg); +} + +uint32_t +grn_pat_size(grn_ctx *ctx, grn_pat *pat) +{ + if (!pat) { return GRN_INVALID_ARGUMENT; } + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + return pat->header->n_entries; +} + +const char * +_grn_pat_key(grn_ctx *ctx, grn_pat *pat, grn_id id, uint32_t *key_size) +{ + pat_node *node; + uint8_t *key; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + *key_size = 0; + return NULL; + } + PAT_AT(pat, id, node); + if (!node) { + *key_size = 0; + return NULL; + } + key = pat_node_get_key(ctx, pat, node); + if (key) { + *key_size = PAT_LEN(node); + } else { + *key_size = 0; + } + return (const char *)key; +} + +grn_rc +grn_pat_delete_by_id(grn_ctx *ctx, grn_pat *pat, grn_id id, + grn_table_delete_optarg *optarg) +{ + grn_rc rc; + if (!pat || !id) { return GRN_INVALID_ARGUMENT; } + rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + { + uint32_t key_size; + const char *key = _grn_pat_key(ctx, pat, id, &key_size); + return _grn_pat_delete(ctx, pat, key, key_size, optarg); + } +} + +int +grn_pat_get_key(grn_ctx *ctx, grn_pat *pat, grn_id id, void *keybuf, int bufsize) +{ + int len; + uint8_t *key; + pat_node *node; + if (!pat) { return 0; } + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + if (!id) { return 0; } + PAT_AT(pat, id, node); + if (!node) { return 0; } + if (!(key = pat_node_get_key(ctx, pat, node))) { return 0; } + len = PAT_LEN(node); + if (keybuf && bufsize >= len) { + if (KEY_NEEDS_CONVERT(pat, len)) { + KEY_DEC(pat, keybuf, key, len); + } else { + grn_memcpy(keybuf, key, len); + } + } + return len; +} + +int +grn_pat_get_key2(grn_ctx *ctx, grn_pat *pat, grn_id id, grn_obj *bulk) +{ + uint32_t len; + uint8_t *key; + pat_node *node; + if (!pat) { return GRN_INVALID_ARGUMENT; } + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + if (!id) { return 0; } + PAT_AT(pat, id, node); + if (!node) { return 0; } + if (!(key = pat_node_get_key(ctx, pat, node))) { return 0; } + len = PAT_LEN(node); + if (KEY_NEEDS_CONVERT(pat, len)) { + if (bulk->header.impl_flags & GRN_OBJ_REFER) { + GRN_TEXT_INIT(bulk, 0); + } + if (!grn_bulk_reserve(ctx, bulk, len)) { + char *curr = GRN_BULK_CURR(bulk); + KEY_DEC(pat, curr, key, len); + grn_bulk_truncate(ctx, bulk, GRN_BULK_VSIZE(bulk) + len); + } + } else { + if (bulk->header.impl_flags & GRN_OBJ_REFER) { + bulk->u.b.head = (char *)key; + bulk->u.b.curr = (char *)key + len; + } else { + grn_bulk_write(ctx, bulk, (char *)key, len); + } + } + return len; +} + +int +grn_pat_get_value(grn_ctx *ctx, grn_pat *pat, grn_id id, void *valuebuf) +{ + int value_size; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + value_size = (int)pat->value_size; + if (value_size) { + byte *v = (byte *)sis_at(ctx, pat, id); + if (v) { + if (valuebuf) { + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + grn_memcpy(valuebuf, v + sizeof(sis_node), value_size); + } else { + grn_memcpy(valuebuf, v, value_size); + } + } + return value_size; + } + } + return 0; +} + +const char * +grn_pat_get_value_(grn_ctx *ctx, grn_pat *pat, grn_id id, uint32_t *size) +{ + const char *value = NULL; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return NULL; + } + if ((*size = pat->value_size)) { + if ((value = (const char *)sis_at(ctx, pat, id)) + && (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS)) { + value += sizeof(sis_node); + } + } + return value; +} + +grn_rc +grn_pat_set_value(grn_ctx *ctx, grn_pat *pat, grn_id id, + const void *value, int flags) +{ + grn_rc rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + if (value) { + uint32_t value_size = pat->value_size; + if (value_size) { + byte *v = (byte *)sis_get(ctx, pat, id); + if (v) { + if (pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { v += sizeof(sis_node); } + switch ((flags & GRN_OBJ_SET_MASK)) { + case GRN_OBJ_SET : + grn_memcpy(v, value, value_size); + return GRN_SUCCESS; + case GRN_OBJ_INCR : + switch (value_size) { + case sizeof(int32_t) : + *((int32_t *)v) += *((int32_t *)value); + return GRN_SUCCESS; + case sizeof(int64_t) : + *((int64_t *)v) += *((int64_t *)value); + return GRN_SUCCESS; + default : + return GRN_INVALID_ARGUMENT; + } + break; + case GRN_OBJ_DECR : + switch (value_size) { + case sizeof(int32_t) : + *((int32_t *)v) -= *((int32_t *)value); + return GRN_SUCCESS; + case sizeof(int64_t) : + *((int64_t *)v) -= *((int64_t *)value); + return GRN_SUCCESS; + default : + return GRN_INVALID_ARGUMENT; + } + break; + default : + // todo : support other types. + return GRN_INVALID_ARGUMENT; + } + } else { + return GRN_NO_MEMORY_AVAILABLE; + } + } + } + return GRN_INVALID_ARGUMENT; +} + +grn_rc +grn_pat_info(grn_ctx *ctx, grn_pat *pat, int *key_size, unsigned int *flags, + grn_encoding *encoding, unsigned int *n_entries, unsigned int *file_size) +{ + grn_rc rc; + ERRCLR(NULL); + if (!pat) { return GRN_INVALID_ARGUMENT; } + rc = grn_pat_error_if_truncated(ctx, pat); + if (rc != GRN_SUCCESS) { + return rc; + } + if (key_size) { *key_size = pat->key_size; } + if (flags) { *flags = pat->obj.header.flags; } + if (encoding) { *encoding = pat->encoding; } + if (n_entries) { *n_entries = pat->header->n_entries; } + if (file_size) { + uint64_t tmp = 0; + if ((rc = grn_io_size(ctx, pat->io, &tmp))) { + return rc; + } + *file_size = (unsigned int) tmp; /* FIXME: inappropriate cast */ + } + return GRN_SUCCESS; +} + +int +grn_pat_delete_with_sis(grn_ctx *ctx, grn_pat *pat, grn_id id, + grn_table_delete_optarg *optarg) +{ + int level = 0, shared; + const char *key = NULL, *_key; + sis_node *sp, *ss = NULL, *si; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + si = sis_at(ctx, pat, id); + while (id) { + pat_node *rn; + uint32_t key_size; + if ((si && si->children && si->children != id) || + (optarg && optarg->func && + !optarg->func(ctx, (grn_obj *)pat, id, optarg->func_arg))) { + break; + } + PAT_AT(pat, id, rn); + if (!(_key = (char *)pat_node_get_key(ctx, pat, rn))) { return 0; } + if (_key == key) { + shared = 1; + } else { + key = _key; + shared = 0; + } + key_size = PAT_LEN(rn); + if (key && key_size) { _grn_pat_del(ctx, pat, key, key_size, shared, NULL); } + if (si) { + grn_id *p, sid; + uint32_t lkey = 0; + if ((*key & 0x80) && chop(ctx, pat, &key, key + key_size, &lkey)) { + if ((sid = grn_pat_get(ctx, pat, key, key_size - lkey, NULL)) && + (ss = sis_at(ctx, pat, sid))) { + for (p = &ss->children; *p && *p != sid; p = &sp->sibling) { + if (*p == id) { + *p = si->sibling; + break; + } + if (!(sp = sis_at(ctx, pat, *p))) { break; } + } + } + } else { + sid = GRN_ID_NIL; + } + si->sibling = 0; + si->children = 0; + id = sid; + si = ss; + } else { + id = GRN_ID_NIL; + } + level++; + } + if (level) { + uint32_t lkey = 0; + while (id && key) { + uint32_t key_size; + if (_grn_pat_key(ctx, pat, id, &key_size) != key) { break; } + { + pat_node *rn; + PAT_AT(pat, id, rn); + if (!rn) { break; } + if (lkey) { + rn->key = lkey; + } else { + pat_node_set_key(ctx, pat, rn, (uint8_t *)key, key_size); + lkey = rn->key; + } + } + { + const char *end = key + key_size; + if (!((*key & 0x80) && chop(ctx, pat, &key, end, &lkey))) { break; } + id = grn_pat_get(ctx, pat, key, end - key, NULL); + } + } + } + return level; +} + +grn_id +grn_pat_next(grn_ctx *ctx, grn_pat *pat, grn_id id) +{ + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + while (++id <= pat->header->curr_rec) { + uint32_t key_size; + const char *key = _grn_pat_key(ctx, pat, id, &key_size); + if (id == grn_pat_get(ctx, pat, key, key_size, NULL)) { + return id; + } + } + return GRN_ID_NIL; +} + +grn_id +grn_pat_at(grn_ctx *ctx, grn_pat *pat, grn_id id) +{ + uint32_t key_size; + const char *key = _grn_pat_key(ctx, pat, id, &key_size); + if (key && (id == _grn_pat_get(ctx, pat, key, key_size, NULL))) { return id; } + return GRN_ID_NIL; +} + +grn_id +grn_pat_curr_id(grn_ctx *ctx, grn_pat *pat) +{ + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return GRN_ID_NIL; + } + return pat->header->curr_rec; +} + +int +grn_pat_scan(grn_ctx *ctx, grn_pat *pat, const char *str, unsigned int str_len, + grn_pat_scan_hit *sh, unsigned int sh_size, const char **rest) +{ + int n = 0; + grn_id tid; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return 0; + } + if (pat->normalizer) { + int flags = + GRN_STRING_REMOVE_BLANK | + GRN_STRING_WITH_TYPES | + GRN_STRING_WITH_CHECKS; + grn_obj *nstr = grn_string_open(ctx, str, str_len, + pat->normalizer, flags); + if (nstr) { + const short *cp = grn_string_get_checks(ctx, nstr); + const unsigned char *tp = grn_string_get_types(ctx, nstr); + unsigned int offset = 0, offset0 = 0; + unsigned int normalized_length_in_bytes; + const char *sp, *se; + grn_string_get_normalized(ctx, nstr, &sp, &normalized_length_in_bytes, + NULL); + se = sp + normalized_length_in_bytes; + while (n < sh_size) { + if ((tid = grn_pat_lcp_search(ctx, pat, sp, se - sp))) { + const char *key; + uint32_t len; + int first_key_char_len; + key = _grn_pat_key(ctx, pat, tid, &len); + sh[n].id = tid; + sh[n].offset = (*cp > 0) ? offset : offset0; + first_key_char_len = grn_charlen(ctx, key, key + len); + if (sh[n].offset > 0 && + GRN_CHAR_IS_BLANK(tp[-1]) && + ((first_key_char_len == 1 && key[0] != ' ') || + first_key_char_len > 1)){ + /* Remove leading spaces. */ + const char *original_str = str + sh[n].offset; + while (grn_charlen(ctx, original_str, str + str_len) == 1 && + original_str[0] == ' ') { + original_str++; + sh[n].offset++; + } + } + { + grn_bool blank_in_alnum = GRN_FALSE; + const unsigned char *start_tp = tp; + const unsigned char *blank_in_alnum_check_tp; + while (len--) { + if (*cp > 0) { offset0 = offset; offset += *cp; tp++; } + sp++; cp++; + } + sh[n].length = offset - sh[n].offset; + for (blank_in_alnum_check_tp = start_tp + 1; + blank_in_alnum_check_tp < tp; + blank_in_alnum_check_tp++) { +#define GRN_CHAR_IS_ALNUM(char_type) \ + (GRN_CHAR_TYPE(char_type) == GRN_CHAR_ALPHA || \ + GRN_CHAR_TYPE(char_type) == GRN_CHAR_DIGIT) + if (GRN_CHAR_IS_BLANK(blank_in_alnum_check_tp[0]) && + GRN_CHAR_IS_ALNUM(blank_in_alnum_check_tp[-1]) && + (blank_in_alnum_check_tp + 1) < tp && + GRN_CHAR_IS_ALNUM(blank_in_alnum_check_tp[1])) { + blank_in_alnum = GRN_TRUE; + } +#undef GRN_CHAR_IS_ALNUM + } + if (!blank_in_alnum) { + n++; + } + } + } else { + if (*cp > 0) { offset0 = offset; offset += *cp; tp++; } + do { + sp++; cp++; + } while (sp < se && !*cp); + } + if (se <= sp) { offset = str_len; break; } + } + if (rest) { + grn_string_get_original(ctx, nstr, rest, NULL); + *rest += offset; + } + grn_obj_close(ctx, nstr); + } else { + n = -1; + if (rest) { *rest = str; } + } + } else { + uint32_t len; + const char *sp, *se = str + str_len; + for (sp = str; sp < se && n < sh_size; sp += len) { + if ((tid = grn_pat_lcp_search(ctx, pat, sp, se - sp))) { + _grn_pat_key(ctx, pat, tid, &len); + sh[n].id = tid; + sh[n].offset = sp - str; + sh[n].length = len; + n++; + } else { + len = grn_charlen(ctx, sp, se); + } + if (!len) { break; } + } + if (rest) { *rest = sp; } + } + return n; +} + +#define INITIAL_SIZE 512 + +inline static void +push(grn_pat_cursor *c, grn_id id, uint16_t check) +{ + grn_ctx *ctx = c->ctx; + grn_pat_cursor_entry *se; + if (c->size <= c->sp) { + if (c->ss) { + uint32_t size = c->size * 4; + grn_pat_cursor_entry *ss = GRN_REALLOC(c->ss, size); + if (!ss) { return; /* give up */ } + c->ss = ss; + c->size = size; + } else { + if (!(c->ss = GRN_MALLOC(sizeof(grn_pat_cursor_entry) * INITIAL_SIZE))) { + return; /* give up */ + } + c->size = INITIAL_SIZE; + } + } + se = &c->ss[c->sp++]; + se->id = id; + se->check = check; +} + +inline static grn_pat_cursor_entry * +pop(grn_pat_cursor *c) +{ + return c->sp ? &c->ss[--c->sp] : NULL; +} + +static grn_id +grn_pat_cursor_next_by_id(grn_ctx *ctx, grn_pat_cursor *c) +{ + grn_pat *pat = c->pat; + int dir = (c->obj.header.flags & GRN_CURSOR_DESCENDING) ? -1 : 1; + while (c->curr_rec != c->tail) { + c->curr_rec += dir; + if (pat->header->n_garbages) { + uint32_t key_size; + const void *key = _grn_pat_key(ctx, pat, c->curr_rec, &key_size); + if (_grn_pat_get(ctx, pat, key, key_size, NULL) != c->curr_rec) { + continue; + } + } + c->rest--; + return c->curr_rec; + } + return GRN_ID_NIL; +} + +grn_id +grn_pat_cursor_next(grn_ctx *ctx, grn_pat_cursor *c) +{ + pat_node *node; + grn_pat_cursor_entry *se; + if (!c->rest) { return GRN_ID_NIL; } + if ((c->obj.header.flags & GRN_CURSOR_BY_ID)) { + return grn_pat_cursor_next_by_id(ctx, c); + } + while ((se = pop(c))) { + grn_id id = se->id; + int check = se->check, ch; + while (id) { + PAT_AT(c->pat, id, node); + if (!node) { + break; + } + ch = PAT_CHK(node); + if (ch > check) { + if (c->obj.header.flags & GRN_CURSOR_DESCENDING) { + push(c, node->lr[0], ch); + id = node->lr[1]; + } else { + push(c, node->lr[1], ch); + id = node->lr[0]; + } + check = ch; + continue; + } else { + if (id == c->tail) { + c->sp = 0; + } else { + if (!c->curr_rec && c->tail) { + uint32_t lmin, lmax; + pat_node *nmin, *nmax; + const uint8_t *kmin, *kmax; + if (c->obj.header.flags & GRN_CURSOR_DESCENDING) { + PAT_AT(c->pat, c->tail, nmin); + PAT_AT(c->pat, id, nmax); + } else { + PAT_AT(c->pat, id, nmin); + PAT_AT(c->pat, c->tail, nmax); + } + lmin = PAT_LEN(nmin); + lmax = PAT_LEN(nmax); + kmin = pat_node_get_key(ctx, c->pat, nmin); + kmax = pat_node_get_key(ctx, c->pat, nmax); + if ((lmin < lmax) ? + (memcmp(kmin, kmax, lmin) > 0) : + (memcmp(kmin, kmax, lmax) >= 0)) { + c->sp = 0; + break; + } + } + } + c->curr_rec = id; + c->rest--; + return id; + } + } + } + return GRN_ID_NIL; +} + +void +grn_pat_cursor_close(grn_ctx *ctx, grn_pat_cursor *c) +{ + GRN_ASSERT(c->ctx == ctx); + if (c->ss) { GRN_FREE(c->ss); } + GRN_FREE(c); +} + +inline static int +bitcmp(const void *s1, const void *s2, int offset, int length) +{ + int r, rest = length + (offset & 7) - 8, bl = offset >> 3, mask = 0xff >> (offset & 7); + unsigned char *a = (unsigned char *)s1 + bl, *b = (unsigned char *)s2 + bl; + if (rest <= 0) { + mask &= 0xff << -rest; + return (*a & mask) - (*b & mask); + } + if ((r = (*a & mask) - (*b & mask))) { return r; } + a++; b++; + if ((bl = rest >> 3)) { + if ((r = memcmp(a, b, bl))) { return r; } + a += bl; b += bl; + } + mask = 0xff << (8 - (rest & 7)); + return (*a & mask) - (*b & mask); +} + +inline static grn_rc +set_cursor_prefix(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + const void *key, uint32_t key_size, int flags) +{ + int c0 = -1, ch; + const uint8_t *k; + uint32_t len, byte_len; + grn_id id; + pat_node *node; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + if (flags & GRN_CURSOR_SIZE_BY_BIT) { + len = key_size * 2; + byte_len = key_size >> 3; + } else { + len = key_size * 16; + byte_len = key_size; + } + KEY_ENCODE(pat, keybuf, key, byte_len); + PAT_AT(pat, 0, node); + id = node->lr[1]; + while (id) { + PAT_AT(pat, id, node); + if (!node) { return GRN_FILE_CORRUPT; } + ch = PAT_CHK(node); + if (c0 < ch && ch < len - 1) { + if (ch & 1) { + id = (ch + 1 < len) ? node->lr[1] : node->lr[0]; + } else { + id = node->lr[nth_bit((uint8_t *)key, ch, len)]; + } + c0 = ch; + continue; + } + if (!(k = pat_node_get_key(ctx, pat, node))) { break; } + if (PAT_LEN(node) < byte_len) { break; } + if ((flags & GRN_CURSOR_SIZE_BY_BIT) + ? !bitcmp(k, key, 0, key_size) + : !memcmp(k, key, key_size)) { + if (c0 < ch) { + if (flags & GRN_CURSOR_DESCENDING) { + if ((ch > len - 1) || !(flags & GRN_CURSOR_GT)) { + push(c, node->lr[0], ch); + } + push(c, node->lr[1], ch); + } else { + push(c, node->lr[1], ch); + if ((ch > len - 1) || !(flags & GRN_CURSOR_GT)) { + push(c, node->lr[0], ch); + } + } + } else { + if (PAT_LEN(node) * 16 > len || !(flags & GRN_CURSOR_GT)) { + push(c, id, ch); + } + } + } + break; + } + return GRN_SUCCESS; +} + +inline static grn_rc +set_cursor_near(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + uint32_t min_size, const void *key, int flags) +{ + grn_id id; + pat_node *node; + const uint8_t *k; + int r, check = -1, ch; + uint32_t min = min_size * 16; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + KEY_ENCODE(pat, keybuf, key, pat->key_size); + PAT_AT(pat, 0, node); + for (id = node->lr[1]; id;) { + PAT_AT(pat, id, node); + if (!node) { return GRN_FILE_CORRUPT; } + ch = PAT_CHK(node); + if (ch <= check) { + if (check >= min) { push(c, id, check); } + break; + } + if ((check += 2) < ch) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + if ((r = bitcmp(key, k, check >> 1, (ch - check) >> 1))) { + if (ch >= min) { + push(c, node->lr[1], ch); + push(c, node->lr[0], ch); + } + break; + } + } + check = ch; + if (nth_bit((uint8_t *)key, check, pat->key_size)) { + if (check >= min) { push(c, node->lr[0], check); } + id = node->lr[1]; + } else { + if (check >= min) { push(c, node->lr[1], check); } + id = node->lr[0]; + } + } + return GRN_SUCCESS; +} + +inline static grn_rc +set_cursor_common_prefix(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + uint32_t min_size, const void *key, uint32_t key_size, int flags) +{ + grn_id id; + pat_node *node; + const uint8_t *k; + int check = -1, ch; + uint32_t len = key_size * 16; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + KEY_ENCODE(pat, keybuf, key, key_size); + PAT_AT(pat, 0, node); + for (id = node->lr[1]; id;) { + PAT_AT(pat, id, node); + if (!node) { return GRN_FILE_CORRUPT; } + ch = PAT_CHK(node); + if (ch <= check) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + { + uint32_t l = PAT_LEN(node); + if (min_size <= l && l <= key_size) { + if (!memcmp(key, k, l)) { push(c, id, check); } + } + } + break; + } + check = ch; + if (len <= check) { break; } + if (check & 1) { + grn_id id0 = node->lr[0]; + pat_node *node0; + PAT_AT(pat, id0, node0); + if (!node0) { return GRN_FILE_CORRUPT; } + if (!(k = pat_node_get_key(ctx, pat, node0))) { return GRN_FILE_CORRUPT; } + { + uint32_t l = PAT_LEN(node0); + if (memcmp(key, k, l)) { break; } + if (min_size <= l) { + push(c, id0, check); + } + } + id = node->lr[1]; + } else { + id = node->lr[nth_bit((uint8_t *)key, check, len)]; + } + } + return GRN_SUCCESS; +} + +inline static grn_rc +set_cursor_ascend(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + const void *key, uint32_t key_size, int flags) +{ + grn_id id; + pat_node *node; + const uint8_t *k; + int r, check = -1, ch, c2; + uint32_t len = key_size * 16; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + KEY_ENCODE(pat, keybuf, key, key_size); + PAT_AT(pat, 0, node); + for (id = node->lr[1]; id;) { + PAT_AT(pat, id, node); + if (!node) { return GRN_FILE_CORRUPT; } + ch = PAT_CHK(node); + if (ch <= check) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + { + uint32_t l = PAT_LEN(node); + if (l == key_size) { + if (flags & GRN_CURSOR_GT) { + if (memcmp(key, k, l) < 0) { push(c, id, check); } + } else { + if (memcmp(key, k, l) <= 0) { push(c, id, check); } + } + } else if (l < key_size) { + if (memcmp(key, k, l) < 0) { push(c, id, check); } + } else { + if (memcmp(key, k, key_size) <= 0) { push(c, id, check); } + } + } + break; + } + c2 = len < ch ? len : ch; + if ((check += 2) < c2) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + if ((r = bitcmp(key, k, check >> 1, ((c2 + 1) >> 1) - (check >> 1)))) { + if (r < 0) { + push(c, node->lr[1], ch); + push(c, node->lr[0], ch); + } + break; + } + } + check = ch; + if (len <= check) { + push(c, node->lr[1], ch); + push(c, node->lr[0], ch); + break; + } + if (check & 1) { + if (check + 1 < len) { + id = node->lr[1]; + } else { + push(c, node->lr[1], check); + id = node->lr[0]; + } + } else { + if (nth_bit((uint8_t *)key, check, len)) { + id = node->lr[1]; + } else { + push(c, node->lr[1], check); + id = node->lr[0]; + } + } + } + return GRN_SUCCESS; +} + +inline static grn_rc +set_cursor_descend(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + const void *key, uint32_t key_size, int flags) +{ + grn_id id; + pat_node *node; + const uint8_t *k; + int r, check = -1, ch, c2; + uint32_t len = key_size * 16; + uint8_t keybuf[MAX_FIXED_KEY_SIZE]; + KEY_ENCODE(pat, keybuf, key, key_size); + PAT_AT(pat, 0, node); + for (id = node->lr[1]; id;) { + PAT_AT(pat, id, node); + if (!node) { return GRN_FILE_CORRUPT; } + ch = PAT_CHK(node); + if (ch <= check) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + { + uint32_t l = PAT_LEN(node); + if (l <= key_size) { + if ((flags & GRN_CURSOR_LT) && l == key_size) { + if (memcmp(key, k, l) > 0) { push(c, id, check); } + } else { + if (memcmp(key, k, l) >= 0) { push(c, id, check); } + } + } else { + if (memcmp(key, k, key_size) > 0) { push(c, id, check); } + } + } + break; + } + c2 = len < ch ? len : ch; + if ((check += 2) < c2) { + if (!(k = pat_node_get_key(ctx, pat, node))) { return GRN_FILE_CORRUPT; } + if ((r = bitcmp(key, k, check >> 1, ((c2 + 1) >> 1) - (check >> 1)))) { + if (r >= 0) { + push(c, node->lr[0], ch); + push(c, node->lr[1], ch); + } + break; + } + } + check = ch; + if (len <= check) { break; } + if (check & 1) { + if (check + 1 < len) { + push(c, node->lr[0], check); + id = node->lr[1]; + } else { + id = node->lr[0]; + } + } else { + if (nth_bit((uint8_t *)key, check, len)) { + push(c, node->lr[0], check); + id = node->lr[1]; + } else { + id = node->lr[0]; + } + } + } + return GRN_SUCCESS; +} + +static grn_pat_cursor * +grn_pat_cursor_open_by_id(grn_ctx *ctx, grn_pat *pat, + const void *min, uint32_t min_size, + const void *max, uint32_t max_size, + int offset, int limit, int flags) +{ + int dir; + grn_pat_cursor *c; + if (!pat || !ctx) { return NULL; } + if (!(c = GRN_MALLOCN(grn_pat_cursor, 1))) { return NULL; } + GRN_DB_OBJ_SET_TYPE(c, GRN_CURSOR_TABLE_PAT_KEY); + c->pat = pat; + c->ctx = ctx; + c->obj.header.flags = flags; + c->obj.header.domain = GRN_ID_NIL; + c->size = 0; + c->sp = 0; + c->ss = NULL; + c->tail = 0; + if (flags & GRN_CURSOR_DESCENDING) { + dir = -1; + if (max) { + if (!(c->curr_rec = grn_pat_get(ctx, pat, max, max_size, NULL))) { + c->tail = GRN_ID_NIL; + goto exit; + } + if (!(flags & GRN_CURSOR_LT)) { c->curr_rec++; } + } else { + c->curr_rec = pat->header->curr_rec + 1; + } + if (min) { + if (!(c->tail = grn_pat_get(ctx, pat, min, min_size, NULL))) { + c->curr_rec = GRN_ID_NIL; + goto exit; + } + if ((flags & GRN_CURSOR_GT)) { c->tail++; } + } else { + c->tail = GRN_ID_NIL + 1; + } + if (c->curr_rec < c->tail) { c->tail = c->curr_rec; } + } else { + dir = 1; + if (min) { + if (!(c->curr_rec = grn_pat_get(ctx, pat, min, min_size, NULL))) { + c->tail = GRN_ID_NIL; + goto exit; + } + if (!(flags & GRN_CURSOR_GT)) { c->curr_rec--; } + } else { + c->curr_rec = GRN_ID_NIL; + } + if (max) { + if (!(c->tail = grn_pat_get(ctx, pat, max, max_size, NULL))) { + c->curr_rec = GRN_ID_NIL; + goto exit; + } + if ((flags & GRN_CURSOR_LT)) { c->tail--; } + } else { + c->tail = pat->header->curr_rec; + } + if (c->tail < c->curr_rec) { c->tail = c->curr_rec; } + } + if (pat->header->n_garbages) { + while (offset && c->curr_rec != c->tail) { + uint32_t key_size; + const void *key; + c->curr_rec += dir; + key = _grn_pat_key(ctx, pat, c->curr_rec, &key_size); + if (_grn_pat_get(ctx, pat, key, key_size, NULL) == c->curr_rec) { + offset--; + } + } + } else { + if ((dir * (c->tail - c->curr_rec)) < offset) { + c->curr_rec = c->tail; + } else { + c->curr_rec += dir * offset; + } + } + c->rest = (limit < 0) ? GRN_ID_MAX : limit; +exit : + return c; +} + +static grn_rc set_cursor_rk(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + const void *key, uint32_t key_size, int flags); + +grn_pat_cursor * +grn_pat_cursor_open(grn_ctx *ctx, grn_pat *pat, + const void *min, uint32_t min_size, + const void *max, uint32_t max_size, + int offset, int limit, int flags) +{ + grn_id id; + pat_node *node; + grn_pat_cursor *c; + if (!pat || !ctx) { return NULL; } + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return NULL; + } + if ((flags & GRN_CURSOR_BY_ID)) { + return grn_pat_cursor_open_by_id(ctx, pat, min, min_size, max, max_size, + offset, limit, flags); + } + if (!(c = GRN_MALLOCN(grn_pat_cursor, 1))) { return NULL; } + GRN_DB_OBJ_SET_TYPE(c, GRN_CURSOR_TABLE_PAT_KEY); + c->pat = pat; + c->ctx = ctx; + c->size = 0; + c->sp = 0; + c->ss = NULL; + c->tail = 0; + c->rest = GRN_ID_MAX; + c->curr_rec = GRN_ID_NIL; + c->obj.header.domain = GRN_ID_NIL; + if (flags & GRN_CURSOR_PREFIX) { + if (max && max_size) { + if ((pat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE)) { + set_cursor_common_prefix(ctx, pat, c, min_size, max, max_size, flags); + } else { + set_cursor_near(ctx, pat, c, min_size, max, flags); + } + goto exit; + } else { + if (min && min_size) { + if (flags & GRN_CURSOR_RK) { + set_cursor_rk(ctx, pat, c, min, min_size, flags); + } else { + set_cursor_prefix(ctx, pat, c, min, min_size, flags); + } + goto exit; + } + } + } + if (flags & GRN_CURSOR_DESCENDING) { + if (min && min_size) { + set_cursor_ascend(ctx, pat, c, min, min_size, flags); + c->obj.header.flags = GRN_CURSOR_ASCENDING; + c->tail = grn_pat_cursor_next(ctx, c); + c->sp = 0; + if (!c->tail) { goto exit; } + } + if (max && max_size) { + set_cursor_descend(ctx, pat, c, max, max_size, flags); + } else { + PAT_AT(pat, 0, node); + if (!node) { + grn_pat_cursor_close(ctx, c); + return NULL; + } + if ((id = node->lr[1])) { + PAT_AT(pat, id, node); + if (node) { + int ch = PAT_CHK(node); + push(c, node->lr[0], ch); + push(c, node->lr[1], ch); + } + } + } + } else { + if (max && max_size) { + set_cursor_descend(ctx, pat, c, max, max_size, flags); + c->obj.header.flags = GRN_CURSOR_DESCENDING; + c->tail = grn_pat_cursor_next(ctx, c); + c->sp = 0; + if (!c->tail) { goto exit; } + } + if (min && min_size) { + set_cursor_ascend(ctx, pat, c, min, min_size, flags); + } else { + PAT_AT(pat, 0, node); + if (!node) { + grn_pat_cursor_close(ctx, c); + return NULL; + } + if ((id = node->lr[1])) { + PAT_AT(pat, id, node); + if (node) { + int ch = PAT_CHK(node); + push(c, node->lr[1], ch); + push(c, node->lr[0], ch); + } + } + } + } +exit : + c->obj.header.flags = flags; + c->curr_rec = GRN_ID_NIL; + while (offset--) { grn_pat_cursor_next(ctx, c); } + c->rest = (limit < 0) ? GRN_ID_MAX : limit; + return c; +} + +int +grn_pat_cursor_get_key(grn_ctx *ctx, grn_pat_cursor *c, void **key) +{ + *key = c->curr_key; + return grn_pat_get_key(ctx, c->pat, c->curr_rec, *key, GRN_TABLE_MAX_KEY_SIZE); +} + +int +grn_pat_cursor_get_value(grn_ctx *ctx, grn_pat_cursor *c, void **value) +{ + int value_size = (int)c->pat->value_size; + if (value_size) { + byte *v = (byte *)sis_at(ctx, c->pat, c->curr_rec); + if (v) { + if (c->pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + *value = v + sizeof(sis_node); + } else { + *value = v; + } + } else { + *value = NULL; + } + } + return value_size; +} + +int +grn_pat_cursor_get_key_value(grn_ctx *ctx, grn_pat_cursor *c, + void **key, uint32_t *key_size, void **value) +{ + int value_size = (int)c->pat->value_size; + if (key_size) { + *key_size = (uint32_t) grn_pat_get_key(ctx, c->pat, c->curr_rec, c->curr_key, + GRN_TABLE_MAX_KEY_SIZE); + if (key) { *key = c->curr_key; } + } + if (value && value_size) { + byte *v = (byte *)sis_at(ctx, c->pat, c->curr_rec); + if (v) { + if (c->pat->obj.header.flags & GRN_OBJ_KEY_WITH_SIS) { + *value = v + sizeof(sis_node); + } else { + *value = v; + } + } else { + *value = NULL; + } + } + return value_size; +} + +grn_rc +grn_pat_cursor_set_value(grn_ctx *ctx, grn_pat_cursor *c, + const void *value, int flags) +{ + return grn_pat_set_value(ctx, c->pat, c->curr_rec, value, flags); +} + +grn_rc +grn_pat_cursor_delete(grn_ctx *ctx, grn_pat_cursor *c, + grn_table_delete_optarg *optarg) +{ + return grn_pat_delete_by_id(ctx, c->pat, c->curr_rec, optarg); +} + +void +grn_pat_check(grn_ctx *ctx, grn_pat *pat) +{ + char buf[8]; + struct grn_pat_header *h = pat->header; + if (grn_pat_error_if_truncated(ctx, pat) != GRN_SUCCESS) { + return; + } + GRN_OUTPUT_ARRAY_OPEN("RESULT", 1); + GRN_OUTPUT_MAP_OPEN("SUMMARY", 23); + GRN_OUTPUT_CSTR("flags"); + grn_itoh(h->flags, buf, 8); + GRN_OUTPUT_STR(buf, 8); + GRN_OUTPUT_CSTR("key size"); + GRN_OUTPUT_INT64(h->key_size); + GRN_OUTPUT_CSTR("value_size"); + GRN_OUTPUT_INT64(h->value_size); + GRN_OUTPUT_CSTR("tokenizer"); + GRN_OUTPUT_INT64(h->tokenizer); + GRN_OUTPUT_CSTR("normalizer"); + GRN_OUTPUT_INT64(h->normalizer); + GRN_OUTPUT_CSTR("n_entries"); + GRN_OUTPUT_INT64(h->n_entries); + GRN_OUTPUT_CSTR("curr_rec"); + GRN_OUTPUT_INT64(h->curr_rec); + GRN_OUTPUT_CSTR("curr_key"); + GRN_OUTPUT_INT64(h->curr_key); + GRN_OUTPUT_CSTR("curr_del"); + GRN_OUTPUT_INT64(h->curr_del); + GRN_OUTPUT_CSTR("curr_del2"); + GRN_OUTPUT_INT64(h->curr_del2); + GRN_OUTPUT_CSTR("curr_del3"); + GRN_OUTPUT_INT64(h->curr_del3); + GRN_OUTPUT_CSTR("n_garbages"); + GRN_OUTPUT_INT64(h->n_garbages); + GRN_OUTPUT_MAP_CLOSE(); + GRN_OUTPUT_ARRAY_CLOSE(); +} + +/* utilities */ +void +grn_p_pat_node(grn_ctx *ctx, grn_pat *pat, pat_node *node) +{ + uint8_t *key = NULL; + + if (!node) { + printf("#\n"); + return; + } + + if (PAT_IMD(node)) { + key = (uint8_t *)&(node->key); + } else { + KEY_AT(pat, node->key, key, 0); + } + + printf("#" + ">\n", + node, + node->lr[0], + node->lr[1], + PAT_DEL(node) ? "true" : "false", + PAT_IMD(node) ? "true" : "false", + PAT_LEN(node), + PAT_CHK(node) >> 4, + (PAT_CHK(node) >> 1) & 0x7, + (PAT_CHK(node) & 0x1) ? "true" : "false", + PAT_LEN(node), + (char *)key); +} + +static void +grn_pat_inspect_check(grn_ctx *ctx, grn_obj *buf, int check) +{ + GRN_TEXT_PUTS(ctx, buf, "{"); + grn_text_lltoa(ctx, buf, check >> 4); + GRN_TEXT_PUTS(ctx, buf, ","); + grn_text_lltoa(ctx, buf, (check >> 1) & 7); + GRN_TEXT_PUTS(ctx, buf, ","); + grn_text_lltoa(ctx, buf, check & 1); + GRN_TEXT_PUTS(ctx, buf, "}"); +} + +static void +grn_pat_inspect_node(grn_ctx *ctx, grn_pat *pat, grn_id id, int check, + grn_obj *key_buf, int indent, const char *prefix, + grn_obj *buf) +{ + pat_node *node = NULL; + int i, c; + + PAT_AT(pat, id, node); + c = PAT_CHK(node); + + for (i = 0; i < indent; i++) { + GRN_TEXT_PUTC(ctx, buf, ' '); + } + GRN_TEXT_PUTS(ctx, buf, prefix); + grn_text_lltoa(ctx, buf, id); + grn_pat_inspect_check(ctx, buf, c); + + if (c > check) { + GRN_TEXT_PUTS(ctx, buf, "\n"); + grn_pat_inspect_node(ctx, pat, node->lr[0], c, key_buf, + indent + 2, "L:", buf); + GRN_TEXT_PUTS(ctx, buf, "\n"); + grn_pat_inspect_node(ctx, pat, node->lr[1], c, key_buf, + indent + 2, "R:", buf); + } else if (id) { + int key_size; + uint8_t *key; + + key_size = PAT_LEN(node); + GRN_BULK_REWIND(key_buf); + grn_bulk_space(ctx, key_buf, key_size); + grn_pat_get_key(ctx, pat, id, GRN_BULK_HEAD(key_buf), key_size); + GRN_TEXT_PUTS(ctx, buf, "("); + grn_inspect(ctx, buf, key_buf); + GRN_TEXT_PUTS(ctx, buf, ")"); + + GRN_TEXT_PUTS(ctx, buf, "["); + key = pat_node_get_key(ctx, pat, node); + for (i = 0; i < key_size; i++) { + int j; + uint8_t byte = key[i]; + if (i != 0) { + GRN_TEXT_PUTS(ctx, buf, " "); + } + for (j = 0; j < 8; j++) { + grn_text_lltoa(ctx, buf, (byte >> (7 - j)) & 1); + } + } + GRN_TEXT_PUTS(ctx, buf, "]"); + } +} + +void +grn_pat_inspect_nodes(grn_ctx *ctx, grn_pat *pat, grn_obj *buf) +{ + pat_node *node; + grn_obj key_buf; + + GRN_TEXT_PUTS(ctx, buf, "{"); + PAT_AT(pat, GRN_ID_NIL, node); + if (node->lr[1]) { + GRN_TEXT_PUTS(ctx, buf, "\n"); + GRN_OBJ_INIT(&key_buf, GRN_BULK, 0, pat->obj.header.domain); + grn_pat_inspect_node(ctx, pat, node->lr[1], -1, &key_buf, 0, "", buf); + GRN_OBJ_FIN(ctx, &key_buf); + GRN_TEXT_PUTS(ctx, buf, "\n"); + } + GRN_TEXT_PUTS(ctx, buf, "}"); +} + +static void +grn_pat_cursor_inspect_entries(grn_ctx *ctx, grn_pat_cursor *c, grn_obj *buf) +{ + int i; + GRN_TEXT_PUTS(ctx, buf, "["); + for (i = 0; i < c->sp; i++) { + grn_pat_cursor_entry *e = c->ss + i; + if (i != 0) { + GRN_TEXT_PUTS(ctx, buf, ", "); + } + GRN_TEXT_PUTS(ctx, buf, "["); + grn_text_lltoa(ctx, buf, e->id); + GRN_TEXT_PUTS(ctx, buf, ","); + grn_pat_inspect_check(ctx, buf, e->check); + GRN_TEXT_PUTS(ctx, buf, "]"); + } + GRN_TEXT_PUTS(ctx, buf, "]"); +} + +void +grn_pat_cursor_inspect(grn_ctx *ctx, grn_pat_cursor *c, grn_obj *buf) +{ + GRN_TEXT_PUTS(ctx, buf, "#pat)); + + GRN_TEXT_PUTS(ctx, buf, " "); + GRN_TEXT_PUTS(ctx, buf, "current:"); + grn_text_lltoa(ctx, buf, c->curr_rec); + + GRN_TEXT_PUTS(ctx, buf, " "); + GRN_TEXT_PUTS(ctx, buf, "tail:"); + grn_text_lltoa(ctx, buf, c->tail); + + GRN_TEXT_PUTS(ctx, buf, " "); + GRN_TEXT_PUTS(ctx, buf, "flags:"); + if (c->obj.header.flags & GRN_CURSOR_PREFIX) { + GRN_TEXT_PUTS(ctx, buf, "prefix"); + } else { + if (c->obj.header.flags & GRN_CURSOR_DESCENDING) { + GRN_TEXT_PUTS(ctx, buf, "descending"); + } else { + GRN_TEXT_PUTS(ctx, buf, "ascending"); + } + GRN_TEXT_PUTS(ctx, buf, "|"); + if (c->obj.header.flags & GRN_CURSOR_GT) { + GRN_TEXT_PUTS(ctx, buf, "greater-than"); + } else { + GRN_TEXT_PUTS(ctx, buf, "greater"); + } + GRN_TEXT_PUTS(ctx, buf, "|"); + if (c->obj.header.flags & GRN_CURSOR_LT) { + GRN_TEXT_PUTS(ctx, buf, "less-than"); + } else { + GRN_TEXT_PUTS(ctx, buf, "less"); + } + if (c->obj.header.flags & GRN_CURSOR_BY_ID) { + GRN_TEXT_PUTS(ctx, buf, "|by-id"); + } + if (c->obj.header.flags & GRN_CURSOR_BY_KEY) { + GRN_TEXT_PUTS(ctx, buf, "|by-key"); + } + } + + GRN_TEXT_PUTS(ctx, buf, " "); + GRN_TEXT_PUTS(ctx, buf, "rest:"); + grn_text_lltoa(ctx, buf, c->rest); + + GRN_TEXT_PUTS(ctx, buf, " "); + GRN_TEXT_PUTS(ctx, buf, "entries:"); + grn_pat_cursor_inspect_entries(ctx, c, buf); + + GRN_TEXT_PUTS(ctx, buf, ">"); +} + +typedef struct { + uint8_t code; + uint8_t next; + uint8_t emit; + uint8_t attr; +} rk_tree_node; + +static uint16_t rk_str_idx[] = { + 0x0003, 0x0006, 0x0009, 0x000c, 0x0012, 0x0015, 0x0018, 0x001e, 0x0024, 0x002a, + 0x0030, 0x0036, 0x003c, 0x0042, 0x0048, 0x004e, 0x0054, 0x005a, 0x0060, 0x0066, + 0x006c, 0x0072, 0x0078, 0x007e, 0x0084, 0x008a, 0x0090, 0x0096, 0x009c, 0x00a2, + 0x00a8, 0x00ae, 0x00b4, 0x00ba, 0x00c0, 0x00c3, 0x00c6, 0x00c9, 0x00cc, 0x00cf, + 0x00d2, 0x00d5, 0x00db, 0x00e1, 0x00e7, 0x00ea, 0x00f0, 0x00f6, 0x00fc, 0x00ff, + 0x0105, 0x0108, 0x010e, 0x0111, 0x0114, 0x0117, 0x011a, 0x011d, 0x0120, 0x0123, + 0x0129, 0x012f, 0x0135, 0x013b, 0x013e, 0x0144, 0x014a, 0x0150, 0x0156, 0x0159, + 0x015c, 0x015f, 0x0162, 0x0165, 0x0168, 0x016b, 0x016e, 0x0171, 0x0177, 0x017d, + 0x0183, 0x0189, 0x018c, 0x0192, 0x0198, 0x019e, 0x01a1, 0x01a4, 0x01aa, 0x01b0, + 0x01b6, 0x01bc, 0x01bf, 0x01c2, 0x01c8, 0x01ce, 0x01d1, 0x01d7, 0x01dd, 0x01e0, + 0x01e6, 0x01e9, 0x01ef, 0x01f2, 0x01f5, 0x01fb, 0x0201, 0x0207, 0x020d, 0x0213, + 0x0216, 0x0219, 0x021c, 0x021f, 0x0222, 0x0225, 0x0228, 0x022e, 0x0234, 0x023a, + 0x023d, 0x0243, 0x0249, 0x024f, 0x0252, 0x0258, 0x025e, 0x0264, 0x0267, 0x026d, + 0x0273, 0x0279, 0x027f, 0x0285, 0x0288, 0x028b, 0x028e, 0x0291, 0x0294, 0x0297, + 0x029a, 0x029d, 0x02a0, 0x02a3, 0x02a9, 0x02af, 0x02b5, 0x02b8, 0x02bb, 0x02be, + 0x02c1, 0x02c4, 0x02c7, 0x02ca, 0x02cd, 0x02d0, 0x02d3, 0x02d6, 0x02dc, 0x02e2, + 0x02e8, 0x02eb, 0x02ee, 0x02f1, 0x02f4, 0x02f7, 0x02fa, 0x02fd, 0x0300, 0x0303, + 0x0309, 0x030c, 0x0312, 0x0318, 0x031e, 0x0324, 0x0327, 0x032a, 0x032d +}; +static char rk_str[] = { + 0xe3, 0x82, 0xa1, 0xe3, 0x82, 0xa2, 0xe3, 0x82, 0xa3, 0xe3, 0x82, 0xa4, 0xe3, + 0x82, 0xa4, 0xe3, 0x82, 0xa7, 0xe3, 0x82, 0xa5, 0xe3, 0x82, 0xa6, 0xe3, 0x82, + 0xa6, 0xe3, 0x82, 0xa2, 0xe3, 0x82, 0xa6, 0xe3, 0x82, 0xa3, 0xe3, 0x82, 0xa6, + 0xe3, 0x82, 0xa4, 0xe3, 0x82, 0xa6, 0xe3, 0x82, 0xa6, 0xe3, 0x82, 0xa6, 0xe3, + 0x82, 0xa7, 0xe3, 0x82, 0xa6, 0xe3, 0x82, 0xa8, 0xe3, 0x82, 0xa6, 0xe3, 0x82, + 0xaa, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa0, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa1, + 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa2, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa3, 0xe3, + 0x82, 0xa6, 0xe3, 0x83, 0xa4, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa5, 0xe3, 0x82, + 0xa6, 0xe3, 0x83, 0xa6, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa7, 0xe3, 0x82, 0xa6, + 0xe3, 0x83, 0xa8, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xa9, 0xe3, 0x82, 0xa6, 0xe3, + 0x83, 0xaa, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xab, 0xe3, 0x82, 0xa6, 0xe3, 0x83, + 0xac, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xad, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xae, + 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xaf, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, + 0x82, 0xa6, 0xe3, 0x83, 0xb1, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb2, 0xe3, 0x82, + 0xa6, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xbc, 0xe3, 0x82, 0xa7, + 0xe3, 0x82, 0xa8, 0xe3, 0x82, 0xa9, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xab, 0xe3, + 0x82, 0xac, 0xe3, 0x82, 0xad, 0xe3, 0x82, 0xad, 0xe3, 0x83, 0xa3, 0xe3, 0x82, + 0xad, 0xe3, 0x83, 0xa5, 0xe3, 0x82, 0xad, 0xe3, 0x83, 0xa7, 0xe3, 0x82, 0xae, + 0xe3, 0x82, 0xae, 0xe3, 0x83, 0xa3, 0xe3, 0x82, 0xae, 0xe3, 0x83, 0xa5, 0xe3, + 0x82, 0xae, 0xe3, 0x83, 0xa7, 0xe3, 0x82, 0xaf, 0xe3, 0x82, 0xaf, 0xe3, 0x82, + 0xa1, 0xe3, 0x82, 0xb0, 0xe3, 0x82, 0xb0, 0xe3, 0x82, 0xa1, 0xe3, 0x82, 0xb1, + 0xe3, 0x82, 0xb2, 0xe3, 0x82, 0xb3, 0xe3, 0x82, 0xb4, 0xe3, 0x82, 0xb5, 0xe3, + 0x82, 0xb6, 0xe3, 0x82, 0xb7, 0xe3, 0x82, 0xb7, 0xe3, 0x82, 0xa7, 0xe3, 0x82, + 0xb7, 0xe3, 0x83, 0xa3, 0xe3, 0x82, 0xb7, 0xe3, 0x83, 0xa5, 0xe3, 0x82, 0xb7, + 0xe3, 0x83, 0xa7, 0xe3, 0x82, 0xb8, 0xe3, 0x82, 0xb8, 0xe3, 0x82, 0xa7, 0xe3, + 0x82, 0xb8, 0xe3, 0x83, 0xa3, 0xe3, 0x82, 0xb8, 0xe3, 0x83, 0xa5, 0xe3, 0x82, + 0xb8, 0xe3, 0x83, 0xa7, 0xe3, 0x82, 0xb9, 0xe3, 0x82, 0xba, 0xe3, 0x82, 0xbb, + 0xe3, 0x82, 0xbc, 0xe3, 0x82, 0xbd, 0xe3, 0x82, 0xbe, 0xe3, 0x82, 0xbf, 0xe3, + 0x83, 0x80, 0xe3, 0x83, 0x81, 0xe3, 0x83, 0x81, 0xe3, 0x82, 0xa7, 0xe3, 0x83, + 0x81, 0xe3, 0x83, 0xa3, 0xe3, 0x83, 0x81, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x81, + 0xe3, 0x83, 0xa7, 0xe3, 0x83, 0x82, 0xe3, 0x83, 0x82, 0xe3, 0x83, 0xa3, 0xe3, + 0x83, 0x82, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x82, 0xe3, 0x83, 0xa7, 0xe3, 0x83, + 0x83, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x84, 0xe3, 0x82, 0xa1, 0xe3, 0x83, 0x84, + 0xe3, 0x82, 0xa3, 0xe3, 0x83, 0x84, 0xe3, 0x82, 0xa7, 0xe3, 0x83, 0x84, 0xe3, + 0x82, 0xa9, 0xe3, 0x83, 0x85, 0xe3, 0x83, 0x86, 0xe3, 0x83, 0x86, 0xe3, 0x82, + 0xa3, 0xe3, 0x83, 0x86, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x87, 0xe3, 0x83, 0x87, + 0xe3, 0x82, 0xa3, 0xe3, 0x83, 0x87, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x88, 0xe3, + 0x83, 0x88, 0xe3, 0x82, 0xa5, 0xe3, 0x83, 0x89, 0xe3, 0x83, 0x89, 0xe3, 0x82, + 0xa5, 0xe3, 0x83, 0x8a, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x8b, 0xe3, 0x82, 0xa3, + 0xe3, 0x83, 0x8b, 0xe3, 0x82, 0xa7, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0xa3, 0xe3, + 0x83, 0x8b, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0xa7, 0xe3, 0x83, + 0x8c, 0xe3, 0x83, 0x8d, 0xe3, 0x83, 0x8e, 0xe3, 0x83, 0x8f, 0xe3, 0x83, 0x90, + 0xe3, 0x83, 0x91, 0xe3, 0x83, 0x92, 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa3, 0xe3, + 0x83, 0x92, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa7, 0xe3, 0x83, + 0x93, 0xe3, 0x83, 0x93, 0xe3, 0x83, 0xa3, 0xe3, 0x83, 0x93, 0xe3, 0x83, 0xa5, + 0xe3, 0x83, 0x93, 0xe3, 0x83, 0xa7, 0xe3, 0x83, 0x94, 0xe3, 0x83, 0x94, 0xe3, + 0x83, 0xa3, 0xe3, 0x83, 0x94, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x94, 0xe3, 0x83, + 0xa7, 0xe3, 0x83, 0x95, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xa1, 0xe3, 0x83, 0x95, + 0xe3, 0x82, 0xa3, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xa7, 0xe3, 0x83, 0x95, 0xe3, + 0x82, 0xa9, 0xe3, 0x83, 0x95, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0x96, 0xe3, 0x83, + 0x97, 0xe3, 0x83, 0x98, 0xe3, 0x83, 0x99, 0xe3, 0x83, 0x9a, 0xe3, 0x83, 0x9b, + 0xe3, 0x83, 0x9c, 0xe3, 0x83, 0x9d, 0xe3, 0x83, 0x9e, 0xe3, 0x83, 0x9f, 0xe3, + 0x83, 0x9f, 0xe3, 0x83, 0xa3, 0xe3, 0x83, 0x9f, 0xe3, 0x83, 0xa5, 0xe3, 0x83, + 0x9f, 0xe3, 0x83, 0xa7, 0xe3, 0x83, 0xa0, 0xe3, 0x83, 0xa1, 0xe3, 0x83, 0xa2, + 0xe3, 0x83, 0xa3, 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0xa6, 0xe3, + 0x83, 0xa7, 0xe3, 0x83, 0xa8, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xaa, 0xe3, 0x83, + 0xaa, 0xe3, 0x83, 0xa3, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0xa5, 0xe3, 0x83, 0xaa, + 0xe3, 0x83, 0xa7, 0xe3, 0x83, 0xab, 0xe3, 0x83, 0xac, 0xe3, 0x83, 0xad, 0xe3, + 0x83, 0xae, 0xe3, 0x83, 0xaf, 0xe3, 0x83, 0xb0, 0xe3, 0x83, 0xb1, 0xe3, 0x83, + 0xb2, 0xe3, 0x83, 0xb3, 0xe3, 0x83, 0xb3, 0xe3, 0x83, 0xbc, 0xe3, 0x83, 0xb4, + 0xe3, 0x83, 0xb4, 0xe3, 0x82, 0xa1, 0xe3, 0x83, 0xb4, 0xe3, 0x82, 0xa3, 0xe3, + 0x83, 0xb4, 0xe3, 0x82, 0xa7, 0xe3, 0x83, 0xb4, 0xe3, 0x82, 0xa9, 0xe3, 0x83, + 0xb5, 0xe3, 0x83, 0xb6, 0xe3, 0x83, 0xbc +}; +static uint16_t rk_tree_idx[] = { + 0x001b, 0x0022, 0x0025, 0x0028, 0x002d, 0x0030, 0x0039, 0x003b, 0x003c, 0x003f, + 0x0046, 0x0047, 0x004f, 0x0050, 0x0053, 0x005a, 0x005d, 0x0064, 0x0067, 0x006f, + 0x0070, 0x0073, 0x007d, 0x007f, 0x0081, 0x0082, 0x0083, 0x0088, 0x008f, 0x0092, + 0x00af, 0x00b5, 0x00bc, 0x00bf, 0x00c6, 0x00c9, 0x00d1, 0x00d6, 0x00da, 0x00e4, + 0x00e6, 0x00eb, 0x00ec, 0x00f0, 0x00f6, 0x00fc, 0x00fe, 0x0108, 0x010a, 0x010c, + 0x010d, 0x010e, 0x0113, 0x0118, 0x011f, 0x0123, 0x0125, 0x0164, 0x0180, 0x0183, + 0x0199, 0x01ad +}; +static rk_tree_node rk_tree[] = { + {0x2d, 0x00, 0xb2, 0x01}, {0x61, 0x00, 0x01, 0x01}, {0x62, 0x01, 0xff, 0x01}, + {0x63, 0x03, 0xff, 0x01}, {0x64, 0x06, 0xff, 0x01}, {0x65, 0x00, 0x24, 0x01}, + {0x66, 0x0a, 0xff, 0x01}, {0x67, 0x0c, 0xff, 0x01}, {0x68, 0x0f, 0xff, 0x01}, + {0x69, 0x00, 0x03, 0x01}, {0x6a, 0x11, 0xff, 0x01}, {0x6b, 0x13, 0xff, 0x01}, + {0x6c, 0x16, 0xff, 0x01}, {0x6d, 0x1c, 0xff, 0x01}, {0x6e, 0x1e, 0xff, 0x01}, + {0x6f, 0x00, 0x26, 0x01}, {0x70, 0x20, 0xff, 0x01}, {0x72, 0x22, 0xff, 0x01}, + {0x73, 0x24, 0xff, 0x01}, {0x74, 0x27, 0xff, 0x01}, {0x75, 0x00, 0x06, 0x01}, + {0x76, 0x2c, 0xff, 0x01}, {0x77, 0x2d, 0xff, 0x01}, {0x78, 0x2f, 0xff, 0x01}, + {0x79, 0x35, 0xff, 0x01}, {0x7a, 0x36, 0xff, 0x01}, {0xe3, 0x38, 0xff, 0x01}, + {0x61, 0x00, 0x72, 0x01}, {0x62, 0x01, 0x56, 0x01}, {0x65, 0x00, 0x89, 0x01}, + {0x69, 0x00, 0x78, 0x01}, {0x6f, 0x00, 0x8c, 0x01}, {0x75, 0x00, 0x86, 0x01}, + {0x79, 0x02, 0xff, 0x00}, {0x61, 0x00, 0x79, 0x01}, {0x6f, 0x00, 0x7b, 0x01}, + {0x75, 0x00, 0x7a, 0x01}, {0x63, 0x03, 0x56, 0x01}, {0x68, 0x04, 0xff, 0x01}, + {0x79, 0x05, 0xff, 0x01}, {0x61, 0x00, 0x4f, 0x00}, {0x65, 0x00, 0x4e, 0x00}, + {0x69, 0x00, 0x4d, 0x01}, {0x6f, 0x00, 0x51, 0x00}, {0x75, 0x00, 0x50, 0x00}, + {0x61, 0x00, 0x4f, 0x01}, {0x6f, 0x00, 0x51, 0x01}, {0x75, 0x00, 0x50, 0x01}, + {0x61, 0x00, 0x4c, 0x01}, {0x64, 0x06, 0x56, 0x01}, {0x65, 0x00, 0x60, 0x01}, + {0x68, 0x07, 0xff, 0x00}, {0x69, 0x00, 0x61, 0x00}, {0x6f, 0x00, 0x65, 0x01}, + {0x75, 0x00, 0x5c, 0x01}, {0x77, 0x08, 0xff, 0x00}, {0x79, 0x09, 0xff, 0x01}, + {0x69, 0x00, 0x61, 0x01}, {0x75, 0x00, 0x62, 0x01}, {0x75, 0x00, 0x66, 0x01}, + {0x61, 0x00, 0x53, 0x01}, {0x6f, 0x00, 0x55, 0x01}, {0x75, 0x00, 0x54, 0x01}, + {0x61, 0x00, 0x81, 0x00}, {0x65, 0x00, 0x83, 0x00}, {0x66, 0x0a, 0x56, 0x01}, + {0x69, 0x00, 0x82, 0x00}, {0x6f, 0x00, 0x84, 0x00}, {0x75, 0x00, 0x80, 0x01}, + {0x79, 0x0b, 0xff, 0x00}, {0x75, 0x00, 0x85, 0x01}, {0x61, 0x00, 0x28, 0x01}, + {0x65, 0x00, 0x36, 0x01}, {0x67, 0x0c, 0x56, 0x01}, {0x69, 0x00, 0x2d, 0x01}, + {0x6f, 0x00, 0x38, 0x01}, {0x75, 0x00, 0x33, 0x01}, {0x77, 0x0d, 0xff, 0x00}, + {0x79, 0x0e, 0xff, 0x00}, {0x61, 0x00, 0x34, 0x01}, {0x61, 0x00, 0x2e, 0x01}, + {0x6f, 0x00, 0x30, 0x01}, {0x75, 0x00, 0x2f, 0x01}, {0x61, 0x00, 0x71, 0x01}, + {0x65, 0x00, 0x88, 0x01}, {0x68, 0x0f, 0x56, 0x01}, {0x69, 0x00, 0x74, 0x01}, + {0x6f, 0x00, 0x8b, 0x01}, {0x75, 0x00, 0x80, 0x01}, {0x79, 0x10, 0xff, 0x00}, + {0x61, 0x00, 0x75, 0x01}, {0x6f, 0x00, 0x77, 0x01}, {0x75, 0x00, 0x76, 0x01}, + {0x61, 0x00, 0x42, 0x00}, {0x65, 0x00, 0x41, 0x00}, {0x69, 0x00, 0x40, 0x01}, + {0x6a, 0x11, 0x56, 0x01}, {0x6f, 0x00, 0x44, 0x00}, {0x75, 0x00, 0x43, 0x00}, + {0x79, 0x12, 0xff, 0x00}, {0x61, 0x00, 0x42, 0x01}, {0x6f, 0x00, 0x44, 0x01}, + {0x75, 0x00, 0x43, 0x01}, {0x61, 0x00, 0x27, 0x01}, {0x65, 0x00, 0x35, 0x01}, + {0x69, 0x00, 0x29, 0x01}, {0x6b, 0x13, 0x56, 0x01}, {0x6f, 0x00, 0x37, 0x01}, + {0x75, 0x00, 0x31, 0x01}, {0x77, 0x14, 0xff, 0x00}, {0x79, 0x15, 0xff, 0x00}, + {0x61, 0x00, 0x32, 0x01}, {0x61, 0x00, 0x2a, 0x01}, {0x6f, 0x00, 0x2c, 0x01}, + {0x75, 0x00, 0x2b, 0x01}, {0x61, 0x00, 0x00, 0x01}, {0x65, 0x00, 0x23, 0x01}, + {0x69, 0x00, 0x02, 0x01}, {0x6b, 0x17, 0xff, 0x01}, {0x6c, 0x16, 0x56, 0x01}, + {0x6f, 0x00, 0x25, 0x01}, {0x74, 0x18, 0xff, 0x01}, {0x75, 0x00, 0x05, 0x01}, + {0x77, 0x1a, 0xff, 0x01}, {0x79, 0x1b, 0xff, 0x01}, {0x61, 0x00, 0xb0, 0x01}, + {0x65, 0x00, 0xb1, 0x01}, {0x73, 0x19, 0xff, 0x00}, {0x75, 0x00, 0x56, 0x01}, + {0x75, 0x00, 0x56, 0x01}, {0x61, 0x00, 0xa4, 0x01}, {0x61, 0x00, 0x96, 0x01}, + {0x65, 0x00, 0x23, 0x01}, {0x69, 0x00, 0x02, 0x01}, {0x6f, 0x00, 0x9a, 0x01}, + {0x75, 0x00, 0x98, 0x01}, {0x61, 0x00, 0x8e, 0x01}, {0x65, 0x00, 0x94, 0x01}, + {0x69, 0x00, 0x8f, 0x01}, {0x6d, 0x1c, 0x56, 0x01}, {0x6f, 0x00, 0x95, 0x01}, + {0x75, 0x00, 0x93, 0x01}, {0x79, 0x1d, 0xff, 0x00}, {0x61, 0x00, 0x90, 0x01}, + {0x6f, 0x00, 0x92, 0x01}, {0x75, 0x00, 0x91, 0x01}, {0x00, 0x00, 0xa9, 0x01}, + {0x27, 0x00, 0xa9, 0x00}, {0x2d, 0x00, 0xaa, 0x00}, {0x61, 0x00, 0x67, 0x01}, + {0x62, 0x01, 0xa9, 0x00}, {0x63, 0x03, 0xa9, 0x00}, {0x64, 0x06, 0xa9, 0x00}, + {0x65, 0x00, 0x6f, 0x01}, {0x66, 0x0a, 0xa9, 0x00}, {0x67, 0x0c, 0xa9, 0x00}, + {0x68, 0x0f, 0xa9, 0x00}, {0x69, 0x00, 0x68, 0x01}, {0x6a, 0x11, 0xa9, 0x00}, + {0x6b, 0x13, 0xa9, 0x00}, {0x6c, 0x16, 0xa9, 0x00}, {0x6d, 0x1c, 0xa9, 0x00}, + {0x6e, 0x00, 0xa9, 0x00}, {0x6f, 0x00, 0x70, 0x01}, {0x70, 0x20, 0xa9, 0x00}, + {0x72, 0x22, 0xa9, 0x00}, {0x73, 0x24, 0xa9, 0x00}, {0x74, 0x27, 0xa9, 0x00}, + {0x75, 0x00, 0x6e, 0x01}, {0x76, 0x2c, 0xa9, 0x00}, {0x77, 0x2d, 0xa9, 0x00}, + {0x78, 0x2f, 0xa9, 0x00}, {0x79, 0x1f, 0xff, 0x00}, {0x7a, 0x36, 0xa9, 0x00}, + {0xe3, 0x38, 0xa9, 0x00}, {0x00, 0x00, 0xa9, 0x01}, {0x61, 0x00, 0x6b, 0x01}, + {0x65, 0x00, 0x6a, 0x01}, {0x69, 0x00, 0x69, 0x01}, {0x6f, 0x00, 0x6d, 0x01}, + {0x75, 0x00, 0x6c, 0x01}, {0x61, 0x00, 0x73, 0x01}, {0x65, 0x00, 0x8a, 0x01}, + {0x69, 0x00, 0x7c, 0x01}, {0x6f, 0x00, 0x8d, 0x01}, {0x70, 0x20, 0x56, 0x01}, + {0x75, 0x00, 0x87, 0x01}, {0x79, 0x21, 0xff, 0x00}, {0x61, 0x00, 0x7d, 0x01}, + {0x6f, 0x00, 0x7f, 0x01}, {0x75, 0x00, 0x7e, 0x01}, {0x61, 0x00, 0x9c, 0x01}, + {0x65, 0x00, 0xa2, 0x01}, {0x69, 0x00, 0x9d, 0x01}, {0x6f, 0x00, 0xa3, 0x01}, + {0x72, 0x22, 0x56, 0x01}, {0x75, 0x00, 0xa1, 0x01}, {0x79, 0x23, 0xff, 0x00}, + {0x61, 0x00, 0x9e, 0x01}, {0x6f, 0x00, 0xa0, 0x01}, {0x75, 0x00, 0x9f, 0x01}, + {0x61, 0x00, 0x39, 0x01}, {0x65, 0x00, 0x47, 0x01}, {0x68, 0x25, 0xff, 0x00}, + {0x69, 0x00, 0x3b, 0x01}, {0x6f, 0x00, 0x49, 0x01}, {0x73, 0x24, 0x56, 0x01}, + {0x75, 0x00, 0x45, 0x01}, {0x79, 0x26, 0xff, 0x00}, {0x61, 0x00, 0x3d, 0x00}, + {0x65, 0x00, 0x3c, 0x00}, {0x69, 0x00, 0x3b, 0x01}, {0x6f, 0x00, 0x3f, 0x00}, + {0x75, 0x00, 0x3e, 0x00}, {0x61, 0x00, 0x3d, 0x01}, {0x65, 0x00, 0x3c, 0x01}, + {0x6f, 0x00, 0x3f, 0x01}, {0x75, 0x00, 0x3e, 0x01}, {0x61, 0x00, 0x4b, 0x01}, + {0x65, 0x00, 0x5d, 0x01}, {0x68, 0x28, 0xff, 0x00}, {0x69, 0x00, 0x4d, 0x01}, + {0x6f, 0x00, 0x63, 0x01}, {0x73, 0x29, 0xff, 0x00}, {0x74, 0x27, 0x56, 0x01}, + {0x75, 0x00, 0x57, 0x01}, {0x77, 0x2a, 0xff, 0x00}, {0x79, 0x2b, 0xff, 0x00}, + {0x69, 0x00, 0x5e, 0x01}, {0x75, 0x00, 0x5f, 0x01}, {0x61, 0x00, 0x58, 0x00}, + {0x65, 0x00, 0x5a, 0x00}, {0x69, 0x00, 0x59, 0x00}, {0x6f, 0x00, 0x5b, 0x00}, + {0x75, 0x00, 0x57, 0x01}, {0x75, 0x00, 0x64, 0x01}, {0x61, 0x00, 0x4f, 0x01}, + {0x65, 0x00, 0x4e, 0x01}, {0x6f, 0x00, 0x51, 0x01}, {0x75, 0x00, 0x50, 0x01}, + {0x61, 0x00, 0xac, 0x00}, {0x65, 0x00, 0xae, 0x00}, {0x69, 0x00, 0xad, 0x00}, + {0x6f, 0x00, 0xaf, 0x00}, {0x75, 0x00, 0xab, 0x01}, {0x76, 0x2c, 0x56, 0x01}, + {0x61, 0x00, 0xa5, 0x01}, {0x65, 0x00, 0x0b, 0x01}, {0x69, 0x00, 0x08, 0x01}, + {0x6f, 0x00, 0xa8, 0x01}, {0x77, 0x2d, 0x56, 0x01}, {0x79, 0x2e, 0xff, 0x01}, + {0x65, 0x00, 0xa7, 0x01}, {0x69, 0x00, 0xa6, 0x01}, {0x61, 0x00, 0x00, 0x01}, + {0x65, 0x00, 0x23, 0x01}, {0x69, 0x00, 0x02, 0x01}, {0x6b, 0x30, 0xff, 0x01}, + {0x6f, 0x00, 0x25, 0x01}, {0x74, 0x31, 0xff, 0x01}, {0x75, 0x00, 0x05, 0x01}, + {0x77, 0x33, 0xff, 0x01}, {0x78, 0x2f, 0x56, 0x01}, {0x79, 0x34, 0xff, 0x01}, + {0x61, 0x00, 0xb0, 0x01}, {0x65, 0x00, 0xb1, 0x01}, {0x73, 0x32, 0xff, 0x00}, + {0x75, 0x00, 0x56, 0x01}, {0x75, 0x00, 0x56, 0x01}, {0x61, 0x00, 0xa4, 0x01}, + {0x61, 0x00, 0x96, 0x01}, {0x65, 0x00, 0x23, 0x01}, {0x69, 0x00, 0x02, 0x01}, + {0x6f, 0x00, 0x9a, 0x01}, {0x75, 0x00, 0x98, 0x01}, {0x61, 0x00, 0x97, 0x01}, + {0x65, 0x00, 0x04, 0x01}, {0x6f, 0x00, 0x9b, 0x01}, {0x75, 0x00, 0x99, 0x01}, + {0x79, 0x35, 0x56, 0x01}, {0x61, 0x00, 0x3a, 0x01}, {0x65, 0x00, 0x48, 0x01}, + {0x69, 0x00, 0x40, 0x01}, {0x6f, 0x00, 0x4a, 0x01}, {0x75, 0x00, 0x46, 0x01}, + {0x79, 0x37, 0xff, 0x00}, {0x7a, 0x36, 0x56, 0x01}, {0x61, 0x00, 0x42, 0x01}, + {0x65, 0x00, 0x41, 0x01}, {0x6f, 0x00, 0x44, 0x01}, {0x75, 0x00, 0x43, 0x01}, + {0x81, 0x39, 0xff, 0x01}, {0x82, 0x3d, 0xff, 0x01}, {0x81, 0x00, 0x00, 0x01}, + {0x82, 0x00, 0x01, 0x01}, {0x83, 0x00, 0x02, 0x01}, {0x84, 0x00, 0x03, 0x01}, + {0x85, 0x00, 0x05, 0x01}, {0x86, 0x3a, 0xff, 0x01}, {0x87, 0x00, 0x23, 0x01}, + {0x88, 0x00, 0x24, 0x01}, {0x89, 0x00, 0x25, 0x01}, {0x8a, 0x00, 0x26, 0x01}, + {0x8b, 0x00, 0x27, 0x01}, {0x8c, 0x00, 0x28, 0x01}, {0x8d, 0x00, 0x29, 0x01}, + {0x8e, 0x00, 0x2d, 0x01}, {0x8f, 0x00, 0x31, 0x01}, {0x90, 0x00, 0x33, 0x01}, + {0x91, 0x00, 0x35, 0x01}, {0x92, 0x00, 0x36, 0x01}, {0x93, 0x00, 0x37, 0x01}, + {0x94, 0x00, 0x38, 0x01}, {0x95, 0x00, 0x39, 0x01}, {0x96, 0x00, 0x3a, 0x01}, + {0x97, 0x00, 0x3b, 0x01}, {0x98, 0x00, 0x40, 0x01}, {0x99, 0x00, 0x45, 0x01}, + {0x9a, 0x00, 0x46, 0x01}, {0x9b, 0x00, 0x47, 0x01}, {0x9c, 0x00, 0x48, 0x01}, + {0x9d, 0x00, 0x49, 0x01}, {0x9e, 0x00, 0x4a, 0x01}, {0x9f, 0x00, 0x4b, 0x01}, + {0xa0, 0x00, 0x4c, 0x01}, {0xa1, 0x00, 0x4d, 0x01}, {0xa2, 0x00, 0x52, 0x01}, + {0xa3, 0x00, 0x56, 0x01}, {0xa4, 0x00, 0x57, 0x01}, {0xa5, 0x00, 0x5c, 0x01}, + {0xa6, 0x00, 0x5d, 0x01}, {0xa7, 0x00, 0x60, 0x01}, {0xa8, 0x00, 0x63, 0x01}, + {0xa9, 0x00, 0x65, 0x01}, {0xaa, 0x00, 0x67, 0x01}, {0xab, 0x00, 0x68, 0x01}, + {0xac, 0x00, 0x6e, 0x01}, {0xad, 0x00, 0x6f, 0x01}, {0xae, 0x00, 0x70, 0x01}, + {0xaf, 0x00, 0x71, 0x01}, {0xb0, 0x00, 0x72, 0x01}, {0xb1, 0x00, 0x73, 0x01}, + {0xb2, 0x00, 0x74, 0x01}, {0xb3, 0x00, 0x78, 0x01}, {0xb4, 0x00, 0x7c, 0x01}, + {0xb5, 0x00, 0x80, 0x01}, {0xb6, 0x00, 0x86, 0x01}, {0xb7, 0x00, 0x87, 0x01}, + {0xb8, 0x00, 0x88, 0x01}, {0xb9, 0x00, 0x89, 0x01}, {0xba, 0x00, 0x8a, 0x01}, + {0xbb, 0x00, 0x8b, 0x01}, {0xbc, 0x00, 0x8c, 0x01}, {0xbd, 0x00, 0x8d, 0x01}, + {0xbe, 0x00, 0x8e, 0x01}, {0xbf, 0x00, 0x8f, 0x01}, {0x00, 0x00, 0x06, 0x00}, + {0x2d, 0x00, 0x22, 0x00}, {0x61, 0x00, 0x07, 0x00}, {0x62, 0x01, 0x06, 0x00}, + {0x63, 0x03, 0x06, 0x00}, {0x64, 0x06, 0x06, 0x00}, {0x65, 0x00, 0x0c, 0x00}, + {0x66, 0x0a, 0x06, 0x00}, {0x67, 0x0c, 0x06, 0x00}, {0x68, 0x0f, 0x06, 0x00}, + {0x69, 0x00, 0x09, 0x00}, {0x6a, 0x11, 0x06, 0x00}, {0x6b, 0x13, 0x06, 0x00}, + {0x6c, 0x16, 0x06, 0x00}, {0x6d, 0x1c, 0x06, 0x00}, {0x6e, 0x1e, 0x06, 0x00}, + {0x6f, 0x00, 0x0d, 0x00}, {0x70, 0x20, 0x06, 0x00}, {0x72, 0x22, 0x06, 0x00}, + {0x73, 0x24, 0x06, 0x00}, {0x74, 0x27, 0x06, 0x00}, {0x75, 0x00, 0x0a, 0x00}, + {0x76, 0x2c, 0x06, 0x00}, {0x77, 0x2d, 0x06, 0x00}, {0x78, 0x2f, 0x06, 0x00}, + {0x79, 0x35, 0x06, 0x00}, {0x7a, 0x36, 0x06, 0x00}, {0xe3, 0x3b, 0xff, 0x01}, + {0x00, 0x00, 0x06, 0x00}, {0x81, 0x39, 0x06, 0x00}, {0x82, 0x3c, 0xff, 0x01}, + {0x00, 0x00, 0x06, 0x01}, {0x80, 0x00, 0x0e, 0x00}, {0x81, 0x00, 0x0f, 0x00}, + {0x82, 0x00, 0x10, 0x00}, {0x83, 0x00, 0x11, 0x00}, {0x84, 0x00, 0x12, 0x00}, + {0x85, 0x00, 0x13, 0x00}, {0x86, 0x00, 0x14, 0x00}, {0x87, 0x00, 0x15, 0x00}, + {0x88, 0x00, 0x16, 0x00}, {0x89, 0x00, 0x17, 0x00}, {0x8a, 0x00, 0x18, 0x00}, + {0x8b, 0x00, 0x19, 0x00}, {0x8c, 0x00, 0x1a, 0x00}, {0x8d, 0x00, 0x1b, 0x00}, + {0x8e, 0x00, 0x1c, 0x00}, {0x8f, 0x00, 0x1d, 0x00}, {0x90, 0x00, 0x1e, 0x00}, + {0x91, 0x00, 0x1f, 0x00}, {0x92, 0x00, 0x20, 0x00}, {0x93, 0x00, 0x21, 0x00}, + {0x9b, 0x00, 0xab, 0x01}, {0x80, 0x00, 0x93, 0x01}, {0x81, 0x00, 0x94, 0x01}, + {0x82, 0x00, 0x95, 0x01}, {0x83, 0x00, 0x96, 0x01}, {0x84, 0x00, 0x97, 0x01}, + {0x85, 0x00, 0x98, 0x01}, {0x86, 0x00, 0x99, 0x01}, {0x87, 0x00, 0x9a, 0x01}, + {0x88, 0x00, 0x9b, 0x01}, {0x89, 0x00, 0x9c, 0x01}, {0x8a, 0x00, 0x9d, 0x01}, + {0x8b, 0x00, 0xa1, 0x01}, {0x8c, 0x00, 0xa2, 0x01}, {0x8d, 0x00, 0xa3, 0x01}, + {0x8e, 0x00, 0xa4, 0x01}, {0x8f, 0x00, 0xa5, 0x01}, {0x90, 0x00, 0xa6, 0x01}, + {0x91, 0x00, 0xa7, 0x01}, {0x92, 0x00, 0xa8, 0x01}, {0x93, 0x00, 0xa9, 0x01} +}; + +static rk_tree_node * +rk_lookup(uint8_t state, uint8_t code) +{ + if (state < sizeof(rk_tree_idx)/sizeof(uint16_t)) { + uint16_t ns = state ? rk_tree_idx[state - 1] : 0; + uint16_t ne = rk_tree_idx[state]; + while (ns < ne) { + uint16_t m = (ns + ne)>>1; + rk_tree_node *rn = &rk_tree[m]; + if (rn->code == code) { return rn; } + if (rn->code < code) { + ns = m + 1; + } else { + ne = m; + } + } + } + return NULL; +} + +static uint32_t +rk_emit(rk_tree_node *rn, char **str) +{ + if (rn && rn->emit != 0xff) { + uint16_t pos = rn->emit ? rk_str_idx[rn->emit - 1] : 0; + *str = &rk_str[pos]; + return (uint32_t)(rk_str_idx[rn->emit] - pos); + } else { + *str = NULL; + return 0; + } +} + +#define RK_OUTPUT(e,l) do {\ + if (oc < oe) {\ + uint32_t l_ = (oc + (l) < oe) ? (l) : (oe - oc);\ + grn_memcpy(oc, (e), l_);\ + oc += l_;\ + ic_ = ic;\ + }\ +} while (0) + +static uint32_t +rk_conv(const char *str, uint32_t str_len, uint8_t *buf, uint32_t buf_size, uint8_t *statep) +{ + uint32_t l; + uint8_t state = 0; + rk_tree_node *rn; + char *e; + uint8_t *oc = buf, *oe = oc + buf_size; + const uint8_t *ic = (uint8_t *)str, *ic_ = ic, *ie = ic + str_len; + while (ic < ie) { + if ((rn = rk_lookup(state, *ic))) { + ic++; + if ((l = rk_emit(rn, &e))) { RK_OUTPUT(e, l); } + state = rn->next; + } else { + if (!state) { ic++; } + if (ic_ < ic) { RK_OUTPUT(ic_, ic - ic_); } + state = 0; + } + } +#ifdef FLUSH_UNRESOLVED_INPUT + if ((rn = rk_lookup(state, 0))) { + if ((l = rk_emit(rn, &e))) { RK_OUTPUT(e, l); } + state = rn->next; + } else { + if (ic_ < ic) { RK_OUTPUT(ic_, ic - ic_); } + } +#endif /* FLUSH_UNRESOLVED_INPUT */ + *statep = state; + return oc - buf; +} + +static grn_id +sub_search(grn_ctx *ctx, grn_pat *pat, grn_id id, + int *c0, uint8_t *key, uint32_t key_len) +{ + pat_node *pn; + uint32_t len = key_len * 16; + if (!key_len) { return id; } + PAT_AT(pat, id, pn); + while (pn) { + int ch; + ch = PAT_CHK(pn); + if (*c0 < ch && ch < len - 1) { + if (ch & 1) { + id = (ch + 1 < len) ? pn->lr[1] : pn->lr[0]; + } else { + id = pn->lr[nth_bit(key, ch, len)]; + } + *c0 = ch; + PAT_AT(pat, id, pn); + } else { + const uint8_t *k = pat_node_get_key(ctx, pat, pn); + return (k && key_len <= PAT_LEN(pn) && !memcmp(k, key, key_len)) ? id : GRN_ID_NIL; + } + } + return GRN_ID_NIL; +} + +static void +search_push(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + uint8_t *key, uint32_t key_len, uint8_t state, grn_id id, int c0, int flags) +{ + if (state) { + int step; + uint16_t ns, ne; + if (flags & GRN_CURSOR_DESCENDING) { + ns = rk_tree_idx[state - 1]; + ne = rk_tree_idx[state]; + step = 1; + } else { + ns = rk_tree_idx[state] - 1; + ne = rk_tree_idx[state - 1] - 1; + step = -1; + } + for (; ns != ne; ns += step) { + rk_tree_node *rn = &rk_tree[ns]; + if (rn->attr) { + char *e; + uint32_t l = rk_emit(rn, &e); + if (l) { + if (l + key_len <= GRN_TABLE_MAX_KEY_SIZE) { + int ch = c0; + grn_id i; + grn_memcpy(key + key_len, e, l); + if ((i = sub_search(ctx, pat, id, &ch, key, key_len + l))) { + search_push(ctx, pat, c, key, key_len + l, rn->next, i, ch, flags); + } + } + } else { + search_push(ctx, pat, c, key, key_len, rn->next, id, c0, flags); + } + } + } + } else { + pat_node *pn; + PAT_AT(pat, id, pn); + if (pn) { + int ch = PAT_CHK(pn); + uint32_t len = key_len * 16; + if (c0 < ch) { + if (flags & GRN_CURSOR_DESCENDING) { + if ((ch > len - 1) || !(flags & GRN_CURSOR_GT)) { push(c, pn->lr[0], ch); } + push(c, pn->lr[1], ch); + } else { + push(c, pn->lr[1], ch); + if ((ch > len - 1) || !(flags & GRN_CURSOR_GT)) { push(c, pn->lr[0], ch); } + } + } else { + if (PAT_LEN(pn) * 16 > len || !(flags & GRN_CURSOR_GT)) { push(c, id, ch); } + } + } + } +} + +static grn_rc +set_cursor_rk(grn_ctx *ctx, grn_pat *pat, grn_pat_cursor *c, + const void *key, uint32_t key_len, int flags) +{ + grn_id id; + uint8_t state; + pat_node *pn; + int c0 = -1; + uint32_t len, byte_len; + uint8_t keybuf[GRN_TABLE_MAX_KEY_SIZE]; + if (flags & GRN_CURSOR_SIZE_BY_BIT) { return GRN_OPERATION_NOT_SUPPORTED; } + byte_len = rk_conv(key, key_len, keybuf, GRN_TABLE_MAX_KEY_SIZE, &state); + len = byte_len * 16; + PAT_AT(pat, 0, pn); + id = pn->lr[1]; + if ((id = sub_search(ctx, pat, id, &c0, keybuf, byte_len))) { + search_push(ctx, pat, c, keybuf, byte_len, state, id, c0, flags); + } + return ctx->rc; +} + +uint32_t +grn_pat_total_key_size(grn_ctx *ctx, grn_pat *pat) +{ + return pat->header->curr_key; +} + +grn_bool +grn_pat_is_key_encoded(grn_ctx *ctx, grn_pat *pat) +{ + grn_obj *domain; + uint32_t key_size; + + domain = grn_ctx_at(ctx, pat->obj.header.domain); + if (grn_obj_is_type(ctx, domain)) { + key_size = grn_type_size(ctx, domain); + } else { + key_size = sizeof(grn_id); + } + + return KEY_NEEDS_CONVERT(pat, key_size); +} + +grn_rc +grn_pat_dirty(grn_ctx *ctx, grn_pat *pat) +{ + grn_rc rc = GRN_SUCCESS; + + CRITICAL_SECTION_ENTER(pat->lock); + if (!pat->is_dirty) { + uint32_t n_dirty_opens; + pat->is_dirty = GRN_TRUE; + GRN_ATOMIC_ADD_EX(&(pat->header->n_dirty_opens), 1, n_dirty_opens); + rc = grn_io_flush(ctx, pat->io); + } + CRITICAL_SECTION_LEAVE(pat->lock); + + return rc; +} + +grn_bool +grn_pat_is_dirty(grn_ctx *ctx, grn_pat *pat) +{ + return pat->header->n_dirty_opens > 0; +} + +grn_rc +grn_pat_clean(grn_ctx *ctx, grn_pat *pat) +{ + grn_rc rc = GRN_SUCCESS; + + CRITICAL_SECTION_ENTER(pat->lock); + if (pat->is_dirty) { + uint32_t n_dirty_opens; + pat->is_dirty = GRN_FALSE; + GRN_ATOMIC_ADD_EX(&(pat->header->n_dirty_opens), -1, n_dirty_opens); + rc = grn_io_flush(ctx, pat->io); + } + CRITICAL_SECTION_LEAVE(pat->lock); + + return rc; +} + +grn_rc +grn_pat_clear_dirty(grn_ctx *ctx, grn_pat *pat) +{ + grn_rc rc = GRN_SUCCESS; + + CRITICAL_SECTION_ENTER(pat->lock); + pat->is_dirty = GRN_FALSE; + pat->header->n_dirty_opens = 0; + rc = grn_io_flush(ctx, pat->io); + CRITICAL_SECTION_LEAVE(pat->lock); + + return rc; +} -- cgit v1.2.3