summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/lib/ii.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/ii.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/ii.c12816
1 files changed, 12816 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/ii.c b/storage/mroonga/vendor/groonga/lib/ii.c
new file mode 100644
index 00000000..2abd0747
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/lib/ii.c
@@ -0,0 +1,12816 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2009-2017 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+#include "grn.h"
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#ifdef WIN32
+# include <io.h>
+# include <share.h>
+#endif /* WIN32 */
+
+#include "grn_ii.h"
+#include "grn_ctx_impl.h"
+#include "grn_token_cursor.h"
+#include "grn_pat.h"
+#include "grn_db.h"
+#include "grn_output.h"
+#include "grn_scorer.h"
+#include "grn_util.h"
+
+#ifdef GRN_WITH_ONIGMO
+# define GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
+#endif
+
+#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
+# include "grn_string.h"
+# include <onigmo.h>
+#endif
+
+#define MAX_PSEG 0x20000
+#define MAX_PSEG_SMALL 0x00200
+/* MAX_PSEG_MEDIUM has enough space for the following source:
+ * * Single source.
+ * * Source is a fixed size column or _key of a table.
+ * * Source column is a scalar column.
+ * * Lexicon doesn't have tokenizer.
+ */
+#define MAX_PSEG_MEDIUM 0x10000
+#define S_CHUNK (1 << GRN_II_W_CHUNK)
+#define W_SEGMENT 18
+#define S_SEGMENT (1 << W_SEGMENT)
+#define W_ARRAY_ELEMENT 3
+#define S_ARRAY_ELEMENT (1 << W_ARRAY_ELEMENT)
+#define W_ARRAY (W_SEGMENT - W_ARRAY_ELEMENT)
+#define ARRAY_MASK_IN_A_SEGMENT ((1 << W_ARRAY) - 1)
+
+#define S_GARBAGE (1<<12)
+
+#define CHUNK_SPLIT 0x80000000
+#define CHUNK_SPLIT_THRESHOLD 0x60000
+
+#define MAX_N_ELEMENTS 5
+
+#define DEFINE_NAME(ii) \
+ const char *name; \
+ char name_buffer[GRN_TABLE_MAX_KEY_SIZE]; \
+ int name_size; \
+ do { \
+ if (DB_OBJ(ii)->id == GRN_ID_NIL) { \
+ name = "(temporary)"; \
+ name_size = strlen(name); \
+ } else { \
+ name_size = grn_obj_name(ctx, (grn_obj *)ii, \
+ name_buffer, GRN_TABLE_MAX_KEY_SIZE); \
+ name = name_buffer; \
+ } \
+ } while (GRN_FALSE)
+
+#define LSEG(pos) ((pos) >> 16)
+#define LPOS(pos) (((pos) & 0xffff) << 2)
+#define SEG2POS(seg,pos) ((((uint32_t)(seg)) << 16) + (((uint32_t)(pos)) >> 2))
+
+#ifndef S_IRUSR
+# define S_IRUSR 0400
+#endif /* S_IRUSR */
+#ifndef S_IWUSR
+# define S_IWUSR 0200
+#endif /* S_IWUSR */
+
+static grn_bool grn_ii_cursor_set_min_enable = GRN_TRUE;
+static double grn_ii_select_too_many_index_match_ratio = -1;
+static double grn_ii_estimate_size_for_query_reduce_ratio = 0.9;
+static grn_bool grn_ii_overlap_token_skip_enable = GRN_FALSE;
+static uint32_t grn_ii_builder_block_threshold_force = 0;
+static uint32_t grn_ii_max_n_segments_small = MAX_PSEG_SMALL;
+static uint32_t grn_ii_max_n_chunks_small = GRN_II_MAX_CHUNK_SMALL;
+
+void
+grn_ii_init_from_env(void)
+{
+ {
+ char grn_ii_cursor_set_min_enable_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_CURSOR_SET_MIN_ENABLE",
+ grn_ii_cursor_set_min_enable_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (strcmp(grn_ii_cursor_set_min_enable_env, "no") == 0) {
+ grn_ii_cursor_set_min_enable = GRN_FALSE;
+ } else {
+ grn_ii_cursor_set_min_enable = GRN_TRUE;
+ }
+ }
+
+ {
+ char grn_ii_select_too_many_index_match_ratio_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_SELECT_TOO_MANY_INDEX_MATCH_RATIO",
+ grn_ii_select_too_many_index_match_ratio_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_select_too_many_index_match_ratio_env[0]) {
+ grn_ii_select_too_many_index_match_ratio =
+ atof(grn_ii_select_too_many_index_match_ratio_env);
+ }
+ }
+
+ {
+ char grn_ii_estimate_size_for_query_reduce_ratio_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_ESTIMATE_SIZE_FOR_QUERY_REDUCE_RATIO",
+ grn_ii_estimate_size_for_query_reduce_ratio_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_estimate_size_for_query_reduce_ratio_env[0]) {
+ grn_ii_estimate_size_for_query_reduce_ratio =
+ atof(grn_ii_estimate_size_for_query_reduce_ratio_env);
+ }
+ }
+
+ {
+ char grn_ii_overlap_token_skip_enable_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_OVERLAP_TOKEN_SKIP_ENABLE",
+ grn_ii_overlap_token_skip_enable_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_overlap_token_skip_enable_env[0]) {
+ grn_ii_overlap_token_skip_enable = GRN_TRUE;
+ } else {
+ grn_ii_overlap_token_skip_enable = GRN_FALSE;
+ }
+ }
+
+ {
+ char grn_ii_builder_block_threshold_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_BUILDER_BLOCK_THRESHOLD",
+ grn_ii_builder_block_threshold_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_builder_block_threshold_env[0]) {
+ grn_ii_builder_block_threshold_force =
+ grn_atoui(grn_ii_builder_block_threshold_env,
+ grn_ii_builder_block_threshold_env +
+ strlen(grn_ii_builder_block_threshold_env),
+ NULL);
+ } else {
+ grn_ii_builder_block_threshold_force = 0;
+ }
+ }
+
+ {
+ char grn_ii_max_n_segments_small_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_MAX_N_SEGMENTS_SMALL",
+ grn_ii_max_n_segments_small_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_max_n_segments_small_env[0]) {
+ grn_ii_max_n_segments_small =
+ grn_atoui(grn_ii_max_n_segments_small_env,
+ grn_ii_max_n_segments_small_env +
+ strlen(grn_ii_max_n_segments_small_env),
+ NULL);
+ if (grn_ii_max_n_segments_small > MAX_PSEG) {
+ grn_ii_max_n_segments_small = MAX_PSEG;
+ }
+ }
+ }
+
+ {
+ char grn_ii_max_n_chunks_small_env[GRN_ENV_BUFFER_SIZE];
+ grn_getenv("GRN_II_MAX_N_CHUNKS_SMALL",
+ grn_ii_max_n_chunks_small_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (grn_ii_max_n_chunks_small_env[0]) {
+ grn_ii_max_n_chunks_small =
+ grn_atoui(grn_ii_max_n_chunks_small_env,
+ grn_ii_max_n_chunks_small_env +
+ strlen(grn_ii_max_n_chunks_small_env),
+ NULL);
+ if (grn_ii_max_n_chunks_small > GRN_II_MAX_CHUNK) {
+ grn_ii_max_n_chunks_small = GRN_II_MAX_CHUNK;
+ }
+ }
+ }
+}
+
+void
+grn_ii_cursor_set_min_enable_set(grn_bool enable)
+{
+ grn_ii_cursor_set_min_enable = enable;
+}
+
+grn_bool
+grn_ii_cursor_set_min_enable_get(void)
+{
+ return grn_ii_cursor_set_min_enable;
+}
+
+/* segment */
+
+inline static uint32_t
+segment_get(grn_ctx *ctx, grn_ii *ii)
+{
+ uint32_t pseg;
+ if (ii->header->bgqtail == ((ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1))) {
+ pseg = ii->header->bgqbody[ii->header->bgqtail];
+ ii->header->bgqtail = (ii->header->bgqtail + 1) & (GRN_II_BGQSIZE - 1);
+ } else {
+ pseg = ii->header->pnext;
+#ifndef CUT_OFF_COMPATIBILITY
+ if (!pseg) {
+ int i;
+ uint32_t pmax = 0;
+ char *used;
+ uint32_t max_segment = ii->seg->header->max_segment;
+ used = GRN_CALLOC(max_segment);
+ if (!used) { return max_segment; }
+ for (i = 0; i < GRN_II_MAX_LSEG && i < max_segment; i++) {
+ if ((pseg = ii->header->ainfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
+ if (pseg > pmax) { pmax = pseg; }
+ used[pseg] = 1;
+ }
+ if ((pseg = ii->header->binfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
+ if (pseg > pmax) { pmax = pseg; }
+ used[pseg] = 1;
+ }
+ }
+ for (pseg = 0; pseg < max_segment && used[pseg]; pseg++) ;
+ GRN_FREE(used);
+ ii->header->pnext = pmax + 1;
+ } else
+#endif /* CUT_OFF_COMPATIBILITY */
+ if (ii->header->pnext < ii->seg->header->max_segment) {
+ ii->header->pnext++;
+ }
+ }
+ return pseg;
+}
+
+inline static grn_rc
+segment_get_clear(grn_ctx *ctx, grn_ii *ii, uint32_t *pseg)
+{
+ uint32_t seg = segment_get(ctx, ii);
+ if (seg < ii->seg->header->max_segment) {
+ void *p = NULL;
+ GRN_IO_SEG_REF(ii->seg, seg, p);
+ if (!p) { return GRN_NO_MEMORY_AVAILABLE; }
+ memset(p, 0, S_SEGMENT);
+ GRN_IO_SEG_UNREF(ii->seg, seg);
+ *pseg = seg;
+ return GRN_SUCCESS;
+ } else {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+}
+
+inline static grn_rc
+buffer_segment_new(grn_ctx *ctx, grn_ii *ii, uint32_t *segno)
+{
+ uint32_t lseg, pseg;
+ if (*segno < GRN_II_MAX_LSEG) {
+ if (ii->header->binfo[*segno] != GRN_II_PSEG_NOT_ASSIGNED) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ lseg = *segno;
+ } else {
+ for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
+ if (ii->header->binfo[lseg] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
+ }
+ if (lseg == GRN_II_MAX_LSEG) { return GRN_NO_MEMORY_AVAILABLE; }
+ *segno = lseg;
+ }
+ pseg = segment_get(ctx, ii);
+ if (pseg < ii->seg->header->max_segment) {
+ ii->header->binfo[lseg] = pseg;
+ if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; }
+ return GRN_SUCCESS;
+ } else {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+}
+
+static grn_rc
+buffer_segment_reserve(grn_ctx *ctx, grn_ii *ii,
+ uint32_t *lseg0, uint32_t *pseg0,
+ uint32_t *lseg1, uint32_t *pseg1)
+{
+ uint32_t i = 0;
+ for (;; i++) {
+ if (i == GRN_II_MAX_LSEG) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][segment][reserve] "
+ "couldn't find a free buffer: <%.*s>: max:<%u>",
+ name_size, name,
+ GRN_II_MAX_LSEG);
+ return ctx->rc;
+ }
+ if (ii->header->binfo[i] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
+ }
+ *lseg0 = i++;
+ for (;; i++) {
+ if (i == GRN_II_MAX_LSEG) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][segment][reserve] "
+ "couldn't find two free buffers: "
+ "<%.*s>: "
+ "found:<%u>, max:<%u>",
+ name_size, name,
+ *lseg0, GRN_II_MAX_LSEG);
+ return ctx->rc;
+ }
+ if (ii->header->binfo[i] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
+ }
+ *lseg1 = i;
+ if ((*pseg0 = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][segment][reserve] "
+ "couldn't allocate a free segment: <%.*s>: "
+ "buffer:<%u>, max:<%u>",
+ name_size, name,
+ *lseg0, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ if ((*pseg1 = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][segment][reserve] "
+ "couldn't allocate two free segments: "
+ "<%.*s>: "
+ "found:<%u>, not-found:<%u>, max:<%u>",
+ name_size, name,
+ *lseg0, *lseg1, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ /*
+ {
+ uint32_t pseg;
+ char *used = GRN_CALLOC(ii->seg->header->max_segment);
+ if (!used) { return GRN_NO_MEMORY_AVAILABLE; }
+ for (i = 0; i < GRN_II_MAX_LSEG; i++) {
+ if ((pseg = ii->header->ainfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
+ used[pseg] = 1;
+ }
+ if ((pseg = ii->header->binfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
+ used[pseg] = 1;
+ }
+ }
+ for (pseg = 0;; pseg++) {
+ if (pseg == ii->seg->header->max_segment) {
+ GRN_FREE(used);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (!used[pseg]) { break; }
+ }
+ *pseg0 = pseg++;
+ for (;; pseg++) {
+ if (pseg == ii->seg->header->max_segment) {
+ GRN_FREE(used);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (!used[pseg]) { break; }
+ }
+ *pseg1 = pseg;
+ GRN_FREE(used);
+ }
+ */
+ return ctx->rc;
+}
+
+#define BGQENQUE(lseg) do {\
+ if (ii->header->binfo[lseg] != GRN_II_PSEG_NOT_ASSIGNED) {\
+ ii->header->bgqbody[ii->header->bgqhead] = ii->header->binfo[lseg];\
+ ii->header->bgqhead = (ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1);\
+ GRN_ASSERT(ii->header->bgqhead != ii->header->bgqtail);\
+ }\
+} while (0)
+
+inline static void
+buffer_segment_update(grn_ii *ii, uint32_t lseg, uint32_t pseg)
+{
+ BGQENQUE(lseg);
+ // smb_wmb();
+ ii->header->binfo[lseg] = pseg;
+ if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; }
+}
+
+inline static void
+buffer_segment_clear(grn_ii *ii, uint32_t lseg)
+{
+ BGQENQUE(lseg);
+ // smb_wmb();
+ ii->header->binfo[lseg] = GRN_II_PSEG_NOT_ASSIGNED;
+}
+
+/* chunk */
+
+#define HEADER_CHUNK_AT(ii,offset) \
+ ((((ii)->header->chunks[((offset) >> 3)]) >> ((offset) & 7)) & 1)
+
+#define HEADER_CHUNK_ON(ii,offset) \
+ (((ii)->header->chunks[((offset) >> 3)]) |= (1 << ((offset) & 7)))
+
+#define HEADER_CHUNK_OFF(ii,offset) \
+ (((ii)->header->chunks[((offset) >> 3)]) &= ~(1 << ((offset) & 7)))
+
+#define N_GARBAGES_TH 1
+
+#define N_GARBAGES ((S_GARBAGE - (sizeof(uint32_t) * 4))/(sizeof(uint32_t)))
+
+typedef struct {
+ uint32_t head;
+ uint32_t tail;
+ uint32_t nrecs;
+ uint32_t next;
+ uint32_t recs[N_GARBAGES];
+} grn_ii_ginfo;
+
+#define WIN_MAP(chunk,ctx,iw,seg,pos,size,mode)\
+ grn_io_win_map(chunk, ctx, iw,\
+ ((seg) >> GRN_II_N_CHUNK_VARIATION),\
+ (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK) + (pos),\
+ size, mode)
+/*
+static int new_histogram[32];
+static int free_histogram[32];
+*/
+static grn_rc
+chunk_new(grn_ctx *ctx, grn_ii *ii, uint32_t *res, uint32_t size)
+{
+ uint32_t n_chunks;
+
+ n_chunks = ii->chunk->header->max_segment;
+
+ /*
+ if (size) {
+ int m, es = size - 1;
+ GRN_BIT_SCAN_REV(es, m);
+ m++;
+ new_histogram[m]++;
+ }
+ */
+ if (size > S_CHUNK) {
+ int i, j;
+ uint32_t n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK;
+ for (i = 0, j = -1; i < n_chunks; i++) {
+ if (HEADER_CHUNK_AT(ii, i)) {
+ j = i;
+ } else {
+ if (i == j + n) {
+ j++;
+ *res = j << GRN_II_N_CHUNK_VARIATION;
+ for (; j <= i; j++) { HEADER_CHUNK_ON(ii, j); }
+ return GRN_SUCCESS;
+ }
+ }
+ }
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][new] index is full: "
+ "<%.*s>: "
+ "size:<%u>, n-chunks:<%u>",
+ name_size, name,
+ size, n_chunks);
+ }
+ return ctx->rc;
+ } else {
+ uint32_t *vp;
+ int m, aligned_size;
+ if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
+ int es = size - 1;
+ GRN_BIT_SCAN_REV(es, m);
+ m++;
+ } else {
+ m = GRN_II_W_LEAST_CHUNK;
+ }
+ aligned_size = 1 << (m - GRN_II_W_LEAST_CHUNK);
+ if (ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK] > N_GARBAGES_TH) {
+ grn_ii_ginfo *ginfo;
+ uint32_t *gseg;
+ grn_io_win iw, iw_;
+ iw_.addr = NULL;
+ gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
+ while (*gseg != GRN_II_PSEG_NOT_ASSIGNED) {
+ ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
+ //GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
+ if (!ginfo) {
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][new] failed to allocate garbage segment: "
+ "<%.*s>: "
+ "n-garbages:<%u>, size:<%u>, n-chunks:<%u>",
+ name_size, name,
+ ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK],
+ size,
+ n_chunks);
+ }
+ return ctx->rc;
+ }
+ if (ginfo->next != GRN_II_PSEG_NOT_ASSIGNED ||
+ ginfo->nrecs > N_GARBAGES_TH) {
+ *res = ginfo->recs[ginfo->tail];
+ if (++ginfo->tail == N_GARBAGES) { ginfo->tail = 0; }
+ ginfo->nrecs--;
+ ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]--;
+ if (!ginfo->nrecs) {
+ HEADER_CHUNK_OFF(ii, *gseg);
+ *gseg = ginfo->next;
+ }
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ grn_io_win_unmap(&iw);
+ return GRN_SUCCESS;
+ }
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ iw_ = iw;
+ gseg = &ginfo->next;
+ }
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ }
+ vp = &ii->header->free_chunks[m - GRN_II_W_LEAST_CHUNK];
+ if (*vp == GRN_II_PSEG_NOT_ASSIGNED) {
+ int i = 0;
+ while (HEADER_CHUNK_AT(ii, i)) {
+ if (++i >= n_chunks) {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][new] failed to find a free chunk: "
+ "<%.*s>: "
+ "index:<%u>, size:<%u>, n-chunks:<%u>",
+ name_size, name,
+ m - GRN_II_W_LEAST_CHUNK,
+ size,
+ n_chunks);
+ return ctx->rc;
+ }
+ }
+ HEADER_CHUNK_ON(ii, i);
+ *vp = i << GRN_II_N_CHUNK_VARIATION;
+ }
+ *res = *vp;
+ *vp += 1 << (m - GRN_II_W_LEAST_CHUNK);
+ if (!(*vp & ((1 << GRN_II_N_CHUNK_VARIATION) - 1))) {
+ *vp = GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ return GRN_SUCCESS;
+ }
+}
+
+static grn_rc
+chunk_free(grn_ctx *ctx, grn_ii *ii,
+ uint32_t offset, uint32_t dummy, uint32_t size)
+{
+ /*
+ if (size) {
+ int m, es = size - 1;
+ GRN_BIT_SCAN_REV(es, m);
+ m++;
+ free_histogram[m]++;
+ }
+ */
+ grn_io_win iw, iw_;
+ grn_ii_ginfo *ginfo= 0;
+ uint32_t seg, m, *gseg;
+ seg = offset >> GRN_II_N_CHUNK_VARIATION;
+ if (size > S_CHUNK) {
+ int n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK;
+ for (; n--; seg++) { HEADER_CHUNK_OFF(ii, seg); }
+ return GRN_SUCCESS;
+ }
+ if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
+ int es = size - 1;
+ GRN_BIT_SCAN_REV(es, m);
+ m++;
+ } else {
+ m = GRN_II_W_LEAST_CHUNK;
+ }
+ gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
+ iw_.addr = NULL;
+ while (*gseg != GRN_II_PSEG_NOT_ASSIGNED) {
+ ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
+ // GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
+ if (!ginfo) {
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (ginfo->nrecs < N_GARBAGES) { break; }
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ iw_ = iw;
+ gseg = &ginfo->next;
+ }
+ if (*gseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_rc rc;
+ if ((rc = chunk_new(ctx, ii, gseg, S_GARBAGE))) {
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ return rc;
+ }
+ ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
+ /*
+ uint32_t i = 0;
+ while (HEADER_CHUNK_AT(ii, i)) {
+ if (++i >= ii->chunk->header->max_segment) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ HEADER_CHUNK_ON(ii, i);
+ *gseg = i;
+ GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
+ */
+ if (!ginfo) {
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ ginfo->head = 0;
+ ginfo->tail = 0;
+ ginfo->nrecs = 0;
+ ginfo->next = GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ if (iw_.addr) { grn_io_win_unmap(&iw_); }
+ ginfo->recs[ginfo->head] = offset;
+ if (++ginfo->head == N_GARBAGES) { ginfo->head = 0; }
+ ginfo->nrecs++;
+ grn_io_win_unmap(&iw);
+ ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]++;
+ return GRN_SUCCESS;
+}
+
+#define UNIT_SIZE 0x80
+#define UNIT_MASK (UNIT_SIZE - 1)
+
+/* <generated> */
+static uint8_t *
+pack_1(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 7;
+ v += *p++ << 6;
+ v += *p++ << 5;
+ v += *p++ << 4;
+ v += *p++ << 3;
+ v += *p++ << 2;
+ v += *p++ << 1;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_1(uint32_t *p, uint8_t *dp)
+{
+ *p++ = (*dp >> 7);
+ *p++ = ((*dp >> 6) & 0x1);
+ *p++ = ((*dp >> 5) & 0x1);
+ *p++ = ((*dp >> 4) & 0x1);
+ *p++ = ((*dp >> 3) & 0x1);
+ *p++ = ((*dp >> 2) & 0x1);
+ *p++ = ((*dp >> 1) & 0x1);
+ *p++ = (*dp++ & 0x1);
+ return dp;
+}
+static uint8_t *
+pack_2(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 6;
+ v += *p++ << 4;
+ v += *p++ << 2;
+ *rp++ = v + *p++;
+ v = *p++ << 6;
+ v += *p++ << 4;
+ v += *p++ << 2;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_2(uint32_t *p, uint8_t *dp)
+{
+ *p++ = (*dp >> 6);
+ *p++ = ((*dp >> 4) & 0x3);
+ *p++ = ((*dp >> 2) & 0x3);
+ *p++ = (*dp++ & 0x3);
+ *p++ = (*dp >> 6);
+ *p++ = ((*dp >> 4) & 0x3);
+ *p++ = ((*dp >> 2) & 0x3);
+ *p++ = (*dp++ & 0x3);
+ return dp;
+}
+static uint8_t *
+pack_3(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 5;
+ v += *p++ << 2;
+ *rp++ = v + (*p >> 1); v = *p++ << 7;
+ v += *p++ << 4;
+ v += *p++ << 1;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ v += *p++ << 3;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_3(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ *p++ = (*dp >> 5);
+ *p++ = ((*dp >> 2) & 0x7);
+ v = ((*dp++ << 1) & 0x7); *p++ = v + (*dp >> 7);
+ *p++ = ((*dp >> 4) & 0x7);
+ *p++ = ((*dp >> 1) & 0x7);
+ v = ((*dp++ << 2) & 0x7); *p++ = v + (*dp >> 6);
+ *p++ = ((*dp >> 3) & 0x7);
+ *p++ = (*dp++ & 0x7);
+ return dp;
+}
+static uint8_t *
+pack_4(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 4;
+ *rp++ = v + *p++;
+ v = *p++ << 4;
+ *rp++ = v + *p++;
+ v = *p++ << 4;
+ *rp++ = v + *p++;
+ v = *p++ << 4;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_4(uint32_t *p, uint8_t *dp)
+{
+ *p++ = (*dp >> 4);
+ *p++ = (*dp++ & 0xf);
+ *p++ = (*dp >> 4);
+ *p++ = (*dp++ & 0xf);
+ *p++ = (*dp >> 4);
+ *p++ = (*dp++ & 0xf);
+ *p++ = (*dp >> 4);
+ *p++ = (*dp++ & 0xf);
+ return dp;
+}
+static uint8_t *
+pack_5(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 3;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ v += *p++ << 1;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 1); v = *p++ << 7;
+ v += *p++ << 2;
+ *rp++ = v + (*p >> 3); v = *p++ << 5;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_5(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ *p++ = (*dp >> 3);
+ v = ((*dp++ << 2) & 0x1f); *p++ = v + (*dp >> 6);
+ *p++ = ((*dp >> 1) & 0x1f);
+ v = ((*dp++ << 4) & 0x1f); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 1) & 0x1f); *p++ = v + (*dp >> 7);
+ *p++ = ((*dp >> 2) & 0x1f);
+ v = ((*dp++ << 3) & 0x1f); *p++ = v + (*dp >> 5);
+ *p++ = (*dp++ & 0x1f);
+ return dp;
+}
+static uint8_t *
+pack_6(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 2;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ *rp++ = v + *p++;
+ v = *p++ << 2;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_6(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ *p++ = (*dp >> 2);
+ v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6);
+ *p++ = (*dp++ & 0x3f);
+ *p++ = (*dp >> 2);
+ v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6);
+ *p++ = (*dp++ & 0x3f);
+ return dp;
+}
+static uint8_t *
+pack_7(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ v = *p++ << 1;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 1); v = *p++ << 7;
+ *rp++ = v + *p++;
+ return rp;
+}
+static uint8_t *
+unpack_7(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ *p++ = (*dp >> 1);
+ v = ((*dp++ << 6) & 0x7f); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 5) & 0x7f); *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 4) & 0x7f); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 3) & 0x7f); *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 2) & 0x7f); *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 1) & 0x7f); *p++ = v + (*dp >> 7);
+ *p++ = (*dp++ & 0x7f);
+ return dp;
+}
+static uint8_t *
+pack_8(uint32_t *p, uint8_t *rp)
+{
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_8(uint32_t *p, uint8_t *dp)
+{
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ *p++ = *dp++;
+ return dp;
+}
+static uint8_t *
+pack_9(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_9(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 2) & 0x1ff); *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 3) & 0x1ff); *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 4) & 0x1ff); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 5) & 0x1ff); *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 6) & 0x1ff); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 7) & 0x1ff); *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 8) & 0x1ff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_10(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_10(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++;
+ v = *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_11(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_11(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 6) & 0x7ff); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 9) & 0x7ff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 4) & 0x7ff); *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 7) & 0x7ff); *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 10) & 0x7ff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 5) & 0x7ff); *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 8) & 0x7ff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_12(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_12(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
+ v = *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
+ v = *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
+ v = *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_13(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_13(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 10) & 0x1fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 7) & 0x1fff); *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 12) & 0x1fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 9) & 0x1fff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 6) & 0x1fff); *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 11) & 0x1fff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 8) & 0x1fff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_14(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_14(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++;
+ v = *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_15(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_15(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 14) & 0x7fff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 13) & 0x7fff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 12) & 0x7fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 11) & 0x7fff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 10) & 0x7fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 9) & 0x7fff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 8) & 0x7fff); *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_16(uint32_t *p, uint8_t *rp)
+{
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_16(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_17(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_17(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 10) & 0x1ffff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 11) & 0x1ffff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 12) & 0x1ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 13) & 0x1ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 14) & 0x1ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 15) & 0x1ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 16) & 0x1ffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_18(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_18(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_19(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_19(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 14) & 0x7ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 17) & 0x7ffff); v += *dp++ << 9; v += *dp++ << 1;
+ *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 12) & 0x7ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 15) & 0x7ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 18) & 0x7ffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 13) & 0x7ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 16) & 0x7ffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_20(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_20(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_21(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_21(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 18) & 0x1fffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 15) & 0x1fffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 20) & 0x1fffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 17) & 0x1fffff); v += *dp++ << 9; v += *dp++ << 1;
+ *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 14) & 0x1fffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 19) & 0x1fffff); v += *dp++ << 11; v += *dp++ << 3;
+ *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 16) & 0x1fffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_22(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_22(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_23(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_23(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 22) & 0x7fffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 21) & 0x7fffff); v += *dp++ << 13; v += *dp++ << 5;
+ *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 20) & 0x7fffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 19) & 0x7fffff); v += *dp++ << 11; v += *dp++ << 3;
+ *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 18) & 0x7fffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 17) & 0x7fffff); v += *dp++ << 9; v += *dp++ << 1;
+ *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 16) & 0x7fffff); v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_24(uint32_t *p, uint8_t *rp)
+{
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_24(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_25(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_25(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 18) & 0x1ffffff); v += *dp++ << 10; v += *dp++ << 2;
+ *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 19) & 0x1ffffff); v += *dp++ << 11; v += *dp++ << 3;
+ *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 20) & 0x1ffffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 21) & 0x1ffffff); v += *dp++ << 13; v += *dp++ << 5;
+ *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 22) & 0x1ffffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 23) & 0x1ffffff); v += *dp++ << 15; v += *dp++ << 7;
+ *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 24) & 0x1ffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_26(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_26(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_27(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
+ *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_27(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 19; v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 22) & 0x7ffffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 25) & 0x7ffffff); v += *dp++ << 17; v += *dp++ << 9;
+ v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 20) & 0x7ffffff); v += *dp++ << 12; v += *dp++ << 4;
+ *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 23) & 0x7ffffff); v += *dp++ << 15; v += *dp++ << 7;
+ *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 26) & 0x7ffffff); v += *dp++ << 18; v += *dp++ << 10;
+ v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 21) & 0x7ffffff); v += *dp++ << 13; v += *dp++ << 5;
+ *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 24) & 0x7ffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_28(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_28(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_29(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
+ *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11);
+ *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_29(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 21; v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 26) & 0x1fffffff); v += *dp++ << 18; v += *dp++ << 10;
+ v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 23) & 0x1fffffff); v += *dp++ << 15; v += *dp++ << 7;
+ *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 28) & 0x1fffffff); v += *dp++ << 20; v += *dp++ << 12;
+ v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 25) & 0x1fffffff); v += *dp++ << 17; v += *dp++ << 9;
+ v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 22) & 0x1fffffff); v += *dp++ << 14; v += *dp++ << 6;
+ *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 27) & 0x1fffffff); v += *dp++ << 19; v += *dp++ << 11;
+ v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 24) & 0x1fffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_30(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8);
+ *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_30(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12;
+ v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10;
+ v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12;
+ v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10;
+ v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_31(uint32_t *p, uint8_t *rp)
+{
+ uint8_t v;
+ *rp++ = (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
+ *rp++ = v + (*p >> 30); *rp++ = (*p >> 22); *rp++ = (*p >> 14);
+ *rp++ = (*p >> 6); v = *p++ << 2;
+ *rp++ = v + (*p >> 29); *rp++ = (*p >> 21); *rp++ = (*p >> 13);
+ *rp++ = (*p >> 5); v = *p++ << 3;
+ *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
+ *rp++ = (*p >> 4); v = *p++ << 4;
+ *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11);
+ *rp++ = (*p >> 3); v = *p++ << 5;
+ *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
+ *rp++ = (*p >> 2); v = *p++ << 6;
+ *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
+ *rp++ = (*p >> 1); v = *p++ << 7;
+ *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8);
+ *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_31(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 23; v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1);
+ v = ((*dp++ << 30) & 0x7fffffff); v += *dp++ << 22; v += *dp++ << 14;
+ v += *dp++ << 6; *p++ = v + (*dp >> 2);
+ v = ((*dp++ << 29) & 0x7fffffff); v += *dp++ << 21; v += *dp++ << 13;
+ v += *dp++ << 5; *p++ = v + (*dp >> 3);
+ v = ((*dp++ << 28) & 0x7fffffff); v += *dp++ << 20; v += *dp++ << 12;
+ v += *dp++ << 4; *p++ = v + (*dp >> 4);
+ v = ((*dp++ << 27) & 0x7fffffff); v += *dp++ << 19; v += *dp++ << 11;
+ v += *dp++ << 3; *p++ = v + (*dp >> 5);
+ v = ((*dp++ << 26) & 0x7fffffff); v += *dp++ << 18; v += *dp++ << 10;
+ v += *dp++ << 2; *p++ = v + (*dp >> 6);
+ v = ((*dp++ << 25) & 0x7fffffff); v += *dp++ << 17; v += *dp++ << 9;
+ v += *dp++ << 1; *p++ = v + (*dp >> 7);
+ v = ((*dp++ << 24) & 0x7fffffff); v += *dp++ << 16; v += *dp++ << 8;
+ *p++ = v + *dp++;
+ return dp;
+}
+static uint8_t *
+pack_32(uint32_t *p, uint8_t *rp)
+{
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
+ return rp;
+}
+static uint8_t *
+unpack_32(uint32_t *p, uint8_t *dp)
+{
+ uint32_t v;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
+ return dp;
+}
+/* </generated> */
+
+static uint8_t *
+pack_(uint32_t *p, uint32_t i, int w, uint8_t *rp)
+{
+ while (i >= 8) {
+ switch (w) {
+ case 0 : break;
+ case 1 : rp = pack_1(p, rp); break;
+ case 2 : rp = pack_2(p, rp); break;
+ case 3 : rp = pack_3(p, rp); break;
+ case 4 : rp = pack_4(p, rp); break;
+ case 5 : rp = pack_5(p, rp); break;
+ case 6 : rp = pack_6(p, rp); break;
+ case 7 : rp = pack_7(p, rp); break;
+ case 8 : rp = pack_8(p, rp); break;
+ case 9 : rp = pack_9(p, rp); break;
+ case 10 : rp = pack_10(p, rp); break;
+ case 11 : rp = pack_11(p, rp); break;
+ case 12 : rp = pack_12(p, rp); break;
+ case 13 : rp = pack_13(p, rp); break;
+ case 14 : rp = pack_14(p, rp); break;
+ case 15 : rp = pack_15(p, rp); break;
+ case 16 : rp = pack_16(p, rp); break;
+ case 17 : rp = pack_17(p, rp); break;
+ case 18 : rp = pack_18(p, rp); break;
+ case 19 : rp = pack_19(p, rp); break;
+ case 20 : rp = pack_20(p, rp); break;
+ case 21 : rp = pack_21(p, rp); break;
+ case 22 : rp = pack_22(p, rp); break;
+ case 23 : rp = pack_23(p, rp); break;
+ case 24 : rp = pack_24(p, rp); break;
+ case 25 : rp = pack_25(p, rp); break;
+ case 26 : rp = pack_26(p, rp); break;
+ case 27 : rp = pack_27(p, rp); break;
+ case 28 : rp = pack_28(p, rp); break;
+ case 29 : rp = pack_29(p, rp); break;
+ case 30 : rp = pack_30(p, rp); break;
+ case 31 : rp = pack_31(p, rp); break;
+ case 32 : rp = pack_32(p, rp); break;
+ }
+ p += 8;
+ i -= 8;
+ }
+ {
+ int b;
+ uint8_t v;
+ uint32_t *pe = p + i;
+ for (b = 8 - w, v = 0; p < pe;) {
+ if (b > 0) {
+ v += *p++ << b;
+ b -= w;
+ } else if (b < 0) {
+ *rp++ = v + (*p >> -b);
+ b += 8;
+ v = 0;
+ } else {
+ *rp++ = v + *p++;
+ b = 8 - w;
+ v = 0;
+ }
+ }
+ if (b + w != 8) { *rp++ = v; }
+ return rp;
+ }
+}
+
+static uint8_t *
+pack(uint32_t *p, uint32_t i, uint8_t *freq, uint8_t *rp)
+{
+ int32_t k, w;
+ uint8_t ebuf[UNIT_SIZE], *ep = ebuf;
+ uint32_t s, *pe = p + i, r, th = i - (i >> 3);
+ for (w = 0, s = 0; w <= 32; w++) {
+ if ((s += freq[w]) >= th) { break; }
+ }
+ if (i == s) {
+ *rp++ = w;
+ return pack_(p, i, w, rp);
+ }
+ r = 1 << w;
+ *rp++ = w + 0x80;
+ *rp++ = i - s;
+ if (r >= UNIT_SIZE) {
+ uint32_t first, *last = &first;
+ for (k = 0; p < pe; p++, k++) {
+ if (*p >= r) {
+ GRN_B_ENC(*p - r, ep);
+ *last = k;
+ last = p;
+ }
+ }
+ *last = 0;
+ *rp++ = (uint8_t) first;
+ } else {
+ for (k = 0; p < pe; p++, k++) {
+ if (*p >= r) {
+ *ep++ = k;
+ GRN_B_ENC(*p - r, ep);
+ *p = 0;
+ }
+ }
+ }
+ rp = pack_(p - i, i, w, rp);
+ grn_memcpy(rp, ebuf, ep - ebuf);
+ return rp + (ep - ebuf);
+}
+
+int
+grn_p_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res)
+{
+ uint8_t *rp, freq[33];
+ uint32_t j, *dp, *dpe, d, w, buf[UNIT_SIZE];
+ *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2);
+ GRN_B_ENC(data_size, rp);
+ memset(freq, 0, 33);
+ for (j = 0, dp = data, dpe = dp + data_size; dp < dpe; j++, dp++) {
+ if (j == UNIT_SIZE) {
+ rp = pack(buf, j, freq, rp);
+ memset(freq, 0, 33);
+ j = 0;
+ }
+ if ((d = buf[j] = *dp)) {
+ GRN_BIT_SCAN_REV(d, w);
+ freq[w + 1]++;
+ } else {
+ freq[0]++;
+ }
+ }
+ if (j) { rp = pack(buf, j, freq, rp); }
+ return rp - *res;
+}
+
+#define USE_P_ENC (1<<0) /* Use PForDelta */
+#define CUT_OFF (1<<1) /* Deprecated */
+#define ODD (1<<2) /* Variable size data */
+
+typedef struct {
+ uint32_t *data;
+ uint32_t data_size;
+ uint32_t flags;
+} datavec;
+
+static grn_rc
+datavec_reset(grn_ctx *ctx, datavec *dv, uint32_t dvlen,
+ size_t unitsize, size_t totalsize)
+{
+ int i;
+ if (!dv[0].data || dv[dvlen].data < dv[0].data + totalsize) {
+ if (dv[0].data) { GRN_FREE(dv[0].data); }
+ if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) {
+ MERR("[ii][data-vector][reset] failed to allocate data: "
+ "length:<%u>, "
+ "unit-size:<%" GRN_FMT_SIZE ">, "
+ "total-size:<%" GRN_FMT_SIZE ">",
+ dvlen,
+ unitsize,
+ totalsize);
+ return ctx->rc;
+ }
+ dv[dvlen].data = dv[0].data + totalsize;
+ }
+ for (i = 1; i < dvlen; i++) {
+ dv[i].data = dv[i - 1].data + unitsize;
+ }
+ return GRN_SUCCESS;
+}
+
+static grn_rc
+datavec_init(grn_ctx *ctx, datavec *dv, uint32_t dvlen,
+ size_t unitsize, size_t totalsize)
+{
+ int i;
+ if (!totalsize) {
+ memset(dv, 0, sizeof(datavec) * (dvlen + 1));
+ return GRN_SUCCESS;
+ }
+ if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) {
+ MERR("[ii][data-vector][init] failed to allocate data: "
+ "length:<%u>, "
+ "unit-size:<%" GRN_FMT_SIZE ">, "
+ "total-size:<%" GRN_FMT_SIZE ">",
+ dvlen,
+ unitsize,
+ totalsize);
+ return ctx->rc;
+ }
+ dv[dvlen].data = dv[0].data + totalsize;
+ for (i = 1; i < dvlen; i++) {
+ dv[i].data = dv[i - 1].data + unitsize;
+ }
+ return GRN_SUCCESS;
+}
+
+static void
+datavec_fin(grn_ctx *ctx, datavec *dv)
+{
+ if (dv[0].data) { GRN_FREE(dv[0].data); }
+}
+
+size_t
+grn_p_encv(grn_ctx *ctx, datavec *dv, uint32_t dvlen, uint8_t *res)
+{
+ uint8_t *rp = res, freq[33];
+ uint32_t pgap, usep, l, df, data_size, *dp, *dpe;
+ if (!dvlen || !(df = dv[0].data_size)) { return 0; }
+ for (usep = 0, data_size = 0, l = 0; l < dvlen; l++) {
+ uint32_t dl = dv[l].data_size;
+ if (dl < df || ((dl > df) && (l != dvlen - 1))) {
+ /* invalid argument */
+ return 0;
+ }
+ usep += (dv[l].flags & USE_P_ENC) << l;
+ data_size += dl;
+ }
+ pgap = data_size - df * dvlen;
+ if (!usep) {
+ GRN_B_ENC((df << 1) + 1, rp);
+ for (l = 0; l < dvlen; l++) {
+ for (dp = dv[l].data, dpe = dp + dv[l].data_size; dp < dpe; dp++) {
+ GRN_B_ENC(*dp, rp);
+ }
+ }
+ } else {
+ uint32_t buf[UNIT_SIZE];
+ GRN_B_ENC((usep << 1), rp);
+ GRN_B_ENC(df, rp);
+ if (dv[dvlen - 1].flags & ODD) {
+ GRN_B_ENC(pgap, rp);
+ } else {
+ GRN_ASSERT(!pgap);
+ }
+ for (l = 0; l < dvlen; l++) {
+ dp = dv[l].data;
+ dpe = dp + dv[l].data_size;
+ if ((dv[l].flags & USE_P_ENC)) {
+ uint32_t j = 0, d;
+ memset(freq, 0, 33);
+ while (dp < dpe) {
+ if (j == UNIT_SIZE) {
+ rp = pack(buf, j, freq, rp);
+ memset(freq, 0, 33);
+ j = 0;
+ }
+ if ((d = buf[j++] = *dp++)) {
+ uint32_t w;
+ GRN_BIT_SCAN_REV(d, w);
+ freq[w + 1]++;
+ } else {
+ freq[0]++;
+ }
+ }
+ if (j) { rp = pack(buf, j, freq, rp); }
+ } else {
+ while (dp < dpe) { GRN_B_ENC(*dp++, rp); }
+ }
+ }
+ }
+ return rp - res;
+}
+
+#define GRN_B_DEC_CHECK(v,p,pe) do { \
+ uint8_t *_p = (uint8_t *)p; \
+ uint32_t _v; \
+ if (_p >= pe) { return 0; } \
+ _v = *_p++; \
+ switch (_v >> 4) { \
+ case 0x08 : \
+ if (_v == 0x8f) { \
+ if (_p + sizeof(uint32_t) > pe) { return 0; } \
+ grn_memcpy(&_v, _p, sizeof(uint32_t)); \
+ _p += sizeof(uint32_t); \
+ } \
+ break; \
+ case 0x09 : \
+ if (_p + 3 > pe) { return 0; } \
+ _v = (_v - 0x90) * 0x100 + *_p++; \
+ _v = _v * 0x100 + *_p++; \
+ _v = _v * 0x100 + *_p++ + 0x20408f; \
+ break; \
+ case 0x0a : \
+ case 0x0b : \
+ if (_p + 2 > pe) { return 0; } \
+ _v = (_v - 0xa0) * 0x100 + *_p++; \
+ _v = _v * 0x100 + *_p++ + 0x408f; \
+ break; \
+ case 0x0c : \
+ case 0x0d : \
+ case 0x0e : \
+ case 0x0f : \
+ if (_p + 1 > pe) { return 0; } \
+ _v = (_v - 0xc0) * 0x100 + *_p++ + 0x8f; \
+ break; \
+ } \
+ v = _v; \
+ p = _p; \
+} while (0)
+
+static uint8_t *
+unpack(uint8_t *dp, uint8_t *dpe, int i, uint32_t *rp)
+{
+ uint8_t ne = 0, k = 0, w = *dp++;
+ uint32_t m, *p = rp;
+ if (w & 0x80) {
+ ne = *dp++;
+ w -= 0x80;
+ m = (1 << w) - 1;
+ if (m >= UNIT_MASK) { k = *dp++; }
+ } else {
+ m = (1 << w) - 1;
+ }
+ if (w) {
+ while (i >= 8) {
+ if (dp + w > dpe) { return NULL; }
+ switch (w) {
+ case 1 : dp = unpack_1(p, dp); break;
+ case 2 : dp = unpack_2(p, dp); break;
+ case 3 : dp = unpack_3(p, dp); break;
+ case 4 : dp = unpack_4(p, dp); break;
+ case 5 : dp = unpack_5(p, dp); break;
+ case 6 : dp = unpack_6(p, dp); break;
+ case 7 : dp = unpack_7(p, dp); break;
+ case 8 : dp = unpack_8(p, dp); break;
+ case 9 : dp = unpack_9(p, dp); break;
+ case 10 : dp = unpack_10(p, dp); break;
+ case 11 : dp = unpack_11(p, dp); break;
+ case 12 : dp = unpack_12(p, dp); break;
+ case 13 : dp = unpack_13(p, dp); break;
+ case 14 : dp = unpack_14(p, dp); break;
+ case 15 : dp = unpack_15(p, dp); break;
+ case 16 : dp = unpack_16(p, dp); break;
+ case 17 : dp = unpack_17(p, dp); break;
+ case 18 : dp = unpack_18(p, dp); break;
+ case 19 : dp = unpack_19(p, dp); break;
+ case 20 : dp = unpack_20(p, dp); break;
+ case 21 : dp = unpack_21(p, dp); break;
+ case 22 : dp = unpack_22(p, dp); break;
+ case 23 : dp = unpack_23(p, dp); break;
+ case 24 : dp = unpack_24(p, dp); break;
+ case 25 : dp = unpack_25(p, dp); break;
+ case 26 : dp = unpack_26(p, dp); break;
+ case 27 : dp = unpack_27(p, dp); break;
+ case 28 : dp = unpack_28(p, dp); break;
+ case 29 : dp = unpack_29(p, dp); break;
+ case 30 : dp = unpack_30(p, dp); break;
+ case 31 : dp = unpack_31(p, dp); break;
+ case 32 : dp = unpack_32(p, dp); break;
+ }
+ i -= 8;
+ p += 8;
+ }
+ {
+ int b;
+ uint32_t v, *pe;
+ for (b = 8 - w, v = 0, pe = p + i; p < pe && dp < dpe;) {
+ if (b > 0) {
+ *p++ = v + ((*dp >> b) & m);
+ b -= w;
+ v = 0;
+ } else if (b < 0) {
+ v += (*dp++ << -b) & m;
+ b += 8;
+ } else {
+ *p++ = v + (*dp++ & m);
+ b = 8 - w;
+ v = 0;
+ }
+ }
+ if (b + w != 8) { dp++; }
+ }
+ } else {
+ memset(p, 0, sizeof(uint32_t) * i);
+ }
+ if (ne) {
+ if (m >= UNIT_MASK) {
+ uint32_t *pp;
+ while (ne--) {
+ pp = &rp[k];
+ k = *pp;
+ GRN_B_DEC_CHECK(*pp, dp, dpe);
+ *pp += (m + 1);
+ }
+ } else {
+ while (ne--) {
+ k = *dp++;
+ GRN_B_DEC_CHECK(rp[k], dp, dpe);
+ rp[k] += (m + 1);
+ }
+ }
+ }
+ return dp;
+}
+
+int
+grn_p_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t nreq, uint32_t **res)
+{
+ uint8_t *dp = data, *dpe = data + data_size;
+ uint32_t rest, orig_size, *rp, *rpe;
+ GRN_B_DEC(orig_size, dp);
+ if (!orig_size) {
+ if (!nreq || nreq > data_size) { nreq = data_size; }
+ if ((*res = rp = GRN_MALLOC(nreq * 4))) {
+ for (rpe = rp + nreq; dp < data + data_size && rp < rpe; rp++) {
+ GRN_B_DEC(*rp, dp);
+ }
+ }
+ return rp - *res;
+ } else {
+ if (!(*res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t)))) {
+ return 0;
+ }
+ if (!nreq || nreq > orig_size) { nreq = orig_size; }
+ for (rest = nreq; rest >= UNIT_SIZE; rest -= UNIT_SIZE) {
+ if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; }
+ rp += UNIT_SIZE;
+ }
+ if (rest) { if (!(dp = unpack(dp, dpe, rest, rp))) { return 0; } }
+ GRN_ASSERT(data + data_size == dp);
+ return nreq;
+ }
+}
+
+int
+grn_p_decv(grn_ctx *ctx, uint8_t *data, uint32_t data_size, datavec *dv, uint32_t dvlen)
+{
+ size_t size;
+ uint32_t df, l, i, *rp, nreq;
+ uint8_t *dp = data, *dpe = data + data_size;
+ if (!data_size) {
+ dv[0].data_size = 0;
+ return 0;
+ }
+ for (nreq = 0; nreq < dvlen; nreq++) {
+ if (dv[nreq].flags & CUT_OFF) { break; }
+ }
+ if (!nreq) { return 0; }
+ GRN_B_DEC_CHECK(df, dp, dpe);
+ if ((df & 1)) {
+ df >>= 1;
+ size = nreq == dvlen ? data_size : df * nreq;
+ if (dv[dvlen].data < dv[0].data + size) {
+ if (dv[0].data) { GRN_FREE(dv[0].data); }
+ if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; }
+ dv[dvlen].data = rp + size;
+ } else {
+ rp = dv[0].data;
+ }
+ for (l = 0; l < dvlen; l++) {
+ if (dv[l].flags & CUT_OFF) { break; }
+ dv[l].data = rp;
+ if (l < dvlen - 1) {
+ for (i = 0; i < df; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); }
+ } else {
+ for (i = 0; dp < dpe; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); }
+ }
+ dv[l].data_size = i;
+ }
+ } else {
+ uint32_t n, rest, usep = df >> 1;
+ GRN_B_DEC_CHECK(df, dp, dpe);
+ if (dv[dvlen -1].flags & ODD) {
+ GRN_B_DEC_CHECK(rest, dp, dpe);
+ } else {
+ rest = 0;
+ }
+ size = df * nreq + (nreq == dvlen ? rest : 0);
+ if (dv[dvlen].data < dv[0].data + size) {
+ if (dv[0].data) { GRN_FREE(dv[0].data); }
+ if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; }
+ dv[dvlen].data = rp + size;
+ } else {
+ rp = dv[0].data;
+ }
+ for (l = 0; l < dvlen; l++) {
+ if (dv[l].flags & CUT_OFF) { break; }
+ dv[l].data = rp;
+ dv[l].data_size = n = (l < dvlen - 1) ? df : df + rest;
+ if (usep & (1 << l)) {
+ for (; n >= UNIT_SIZE; n -= UNIT_SIZE) {
+ if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; }
+ rp += UNIT_SIZE;
+ }
+ if (n) {
+ if (!(dp = unpack(dp, dpe, n, rp))) { return 0; }
+ rp += n;
+ }
+ dv[l].flags |= USE_P_ENC;
+ } else {
+ for (; n; n--, rp++) {
+ GRN_B_DEC_CHECK(*rp, dp, dpe);
+ }
+ }
+ }
+ GRN_ASSERT(dp == dpe);
+ if (dp != dpe) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "data_size=%d, %" GRN_FMT_LLD,
+ data_size, (long long int)(dpe - dp));
+ }
+ }
+ return rp - dv[0].data;
+}
+
+int
+grn_b_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res)
+{
+ uint8_t *rp;
+ uint32_t *dp, i;
+ *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2);
+ GRN_B_ENC(data_size, rp);
+ for (i = data_size, dp = data; i; i--, dp++) {
+ GRN_B_ENC(*dp, rp);
+ }
+ return rp - *res;
+}
+
+int
+grn_b_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t **res)
+{
+ uint32_t i, *rp, orig_size;
+ uint8_t *dp = data;
+ GRN_B_DEC(orig_size, dp);
+ *res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t));
+ for (i = orig_size; i; i--, rp++) {
+ GRN_B_DEC(*rp, dp);
+ }
+ return orig_size;
+}
+
+/* buffer */
+
+typedef struct {
+ uint32_t tid;
+ uint32_t size_in_chunk;
+ uint32_t pos_in_chunk;
+ uint16_t size_in_buffer;
+ uint16_t pos_in_buffer;
+} buffer_term;
+
+typedef struct {
+ uint16_t step;
+ uint16_t jump;
+} buffer_rec;
+
+typedef struct {
+ uint32_t chunk;
+ uint32_t chunk_size;
+ uint32_t buffer_free;
+ uint16_t nterms;
+ uint16_t nterms_void;
+} buffer_header;
+
+struct grn_ii_buffer {
+ buffer_header header;
+ buffer_term terms[(S_SEGMENT - sizeof(buffer_header))/sizeof(buffer_term)];
+};
+
+typedef struct grn_ii_buffer buffer;
+
+inline static uint32_t
+buffer_open(grn_ctx *ctx, grn_ii *ii, uint32_t pos, buffer_term **bt, buffer **b)
+{
+ byte *p = NULL;
+ uint16_t lseg = (uint16_t) (LSEG(pos));
+ uint32_t pseg = ii->header->binfo[lseg];
+ if (pseg != GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_IO_SEG_REF(ii->seg, pseg, p);
+ if (!p) { return GRN_II_PSEG_NOT_ASSIGNED; }
+ if (b) { *b = (buffer *)p; }
+ if (bt) { *bt = (buffer_term *)(p + LPOS(pos)); }
+ }
+ return pseg;
+}
+
+inline static grn_rc
+buffer_close(grn_ctx *ctx, grn_ii *ii, uint32_t pseg)
+{
+ if (pseg >= ii->seg->header->max_segment) {
+ GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid pseg buffer_close(%d)", pseg);
+ return GRN_INVALID_ARGUMENT;
+ }
+ GRN_IO_SEG_UNREF(ii->seg, pseg);
+ return GRN_SUCCESS;
+}
+
+typedef struct {
+ uint32_t rid;
+ uint32_t sid;
+} docid;
+
+#define BUFFER_REC_DEL(r) ((r)->jump = 1)
+#define BUFFER_REC_DELETED(r) ((r)->jump == 1)
+
+#define BUFFER_REC_AT(b,pos) ((buffer_rec *)(b) + (pos))
+#define BUFFER_REC_POS(b,rec) ((uint16_t)((rec) - (buffer_rec *)(b)))
+
+inline static void
+buffer_term_dump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt)
+{
+ int pos, rid, sid;
+ uint8_t *p;
+ buffer_rec *r;
+
+ if (!grn_logger_pass(ctx, GRN_LOG_DEBUG)) {
+ return;
+ }
+
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "b=(%x %u %u %u)", b->header.chunk, b->header.chunk_size,
+ b->header.buffer_free, b->header.nterms);
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "bt=(%u %u %u %u %u)", bt->tid, bt->size_in_chunk, bt->pos_in_chunk,
+ bt->size_in_buffer, bt->pos_in_buffer);
+ for (pos = bt->pos_in_buffer; pos; pos = r->step) {
+ r = BUFFER_REC_AT(b, pos);
+ p = GRN_NEXT_ADDR(r);
+ GRN_B_DEC(rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(sid, p);
+ } else {
+ sid = 1;
+ }
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "%d=(%d:%d),(%d:%d)", pos, r->jump, r->step, rid, sid);
+ }
+}
+
+inline static grn_rc
+check_jump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *r, int j)
+{
+ uint16_t i = BUFFER_REC_POS(b, r);
+ uint8_t *p;
+ buffer_rec *r2;
+ docid id, id2;
+ if (!j) { return GRN_SUCCESS; }
+ p = GRN_NEXT_ADDR(r);
+ GRN_B_DEC(id.rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(id.sid, p);
+ } else {
+ id.sid = 1;
+ }
+ if (j == 1) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "deleting! %d(%d:%d)", i, id.rid, id.sid);
+ return GRN_SUCCESS;
+ }
+ r2 = BUFFER_REC_AT(b, j);
+ p = GRN_NEXT_ADDR(r2);
+ GRN_B_DEC(id2.rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(id2.sid, p);
+ } else {
+ id2.sid = 1;
+ }
+ if (r2->step == i) {
+ GRN_LOG(ctx, GRN_LOG_EMERG, "cycle! %d(%d:%d)<->%d(%d:%d)",
+ i, id.rid, id.sid, j, id2.rid, id2.sid);
+ return GRN_FILE_CORRUPT;
+ }
+ if (id2.rid < id.rid || (id2.rid == id.rid && id2.sid <= id.sid)) {
+ GRN_LOG(ctx, GRN_LOG_CRIT,
+ "invalid jump! %d(%d:%d)(%d:%d)->%d(%d:%d)(%d:%d)",
+ i, r->jump, r->step, id.rid, id.sid, j, r2->jump, r2->step,
+ id2.rid, id2.sid);
+ return GRN_FILE_CORRUPT;
+ }
+ return GRN_SUCCESS;
+}
+
+inline static grn_rc
+set_jump_r(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *from, int to)
+{
+ int i, j, max_jump = 100;
+ buffer_rec *r, *r2;
+ for (r = from, j = to; j > 1 && max_jump--; r = BUFFER_REC_AT(b, r->step)) {
+ r2 = BUFFER_REC_AT(b, j);
+ if (r == r2) { break; }
+ if (BUFFER_REC_DELETED(r2)) { break; }
+ if (j == (i = r->jump)) { break; }
+ if (j == r->step) { break; }
+ if (check_jump(ctx, ii, b, r, j)) {
+ ERR(GRN_FILE_CORRUPT, "check_jump failed");
+ return ctx->rc;
+ }
+ r->jump = j;
+ j = i;
+ if (!r->step) { return GRN_FILE_CORRUPT; }
+ }
+ return GRN_SUCCESS;
+}
+
+#define GET_NUM_BITS(x,n) do {\
+ n = x;\
+ n = (n & 0x55555555) + ((n >> 1) & 0x55555555);\
+ n = (n & 0x33333333) + ((n >> 2) & 0x33333333);\
+ n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F0F);\
+ n = (n & 0x00FF00FF) + ((n >> 8) & 0x00FF00FF);\
+ n = (n & 0x0000FFFF) + ((n >>16) & 0x0000FFFF);\
+} while (0)
+
+inline static grn_rc
+buffer_put(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt,
+ buffer_rec *rnew, uint8_t *bs, grn_ii_updspec *u, int size)
+{
+ uint8_t *p;
+ docid id_curr = {0, 0}, id_start = {0, 0}, id_post = {0, 0};
+ buffer_rec *r_curr, *r_start = NULL;
+ uint16_t last = 0, *lastp = &bt->pos_in_buffer, pos = BUFFER_REC_POS(b, rnew);
+ int vdelta = 0, delta, delta0 = 0, vhops = 0, nhops = 0, reset = 1;
+ grn_memcpy(GRN_NEXT_ADDR(rnew), bs, size - sizeof(buffer_rec));
+ for (;;) {
+ if (!*lastp) {
+ rnew->step = 0;
+ rnew->jump = 0;
+ // smb_wmb();
+ *lastp = pos;
+ if (bt->size_in_buffer++ > 1) {
+ buffer_rec *rhead = BUFFER_REC_AT(b, bt->pos_in_buffer);
+ rhead->jump = pos;
+ if (!(bt->size_in_buffer & 1)) {
+ int n;
+ buffer_rec *r = BUFFER_REC_AT(b, rhead->step), *r2;
+ GET_NUM_BITS(bt->size_in_buffer, n);
+ while (n-- && (r->jump > 1)) {
+ r2 = BUFFER_REC_AT(b, r->jump);
+ if (BUFFER_REC_DELETED(r2)) { break; }
+ r = r2;
+ }
+ if (r != rnew) { set_jump_r(ctx, ii, b, r, last); }
+ }
+ }
+ break;
+ }
+ r_curr = BUFFER_REC_AT(b, *lastp);
+ p = GRN_NEXT_ADDR(r_curr);
+ GRN_B_DEC(id_curr.rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(id_curr.sid, p);
+ } else {
+ id_curr.sid = 1;
+ }
+ if (id_curr.rid < id_post.rid ||
+ (id_curr.rid == id_post.rid && id_curr.sid < id_post.sid)) {
+ {
+ DEFINE_NAME(ii);
+ CRIT(GRN_FILE_CORRUPT,
+ "[ii][buffer][put] loop is found: "
+ "<%.*s>: "
+ "(%d:%d)->(%d:%d)",
+ name_size, name,
+ id_post.rid, id_post.sid, id_curr.rid, id_curr.sid);
+ }
+ buffer_term_dump(ctx, ii, b, bt);
+ bt->pos_in_buffer = 0;
+ bt->size_in_buffer = 0;
+ lastp = &bt->pos_in_buffer;
+ continue;
+ }
+ id_post.rid = id_curr.rid;
+ id_post.sid = id_curr.sid;
+ if (u->rid < id_curr.rid || (u->rid == id_curr.rid && u->sid <= id_curr.sid)) {
+ uint16_t step = *lastp, jump = r_curr->jump;
+ if (u->rid == id_curr.rid) {
+ if (u->sid == 0) {
+ while (id_curr.rid == u->rid) {
+ BUFFER_REC_DEL(r_curr);
+ if (!(step = r_curr->step)) { break; }
+ r_curr = BUFFER_REC_AT(b, step);
+ p = GRN_NEXT_ADDR(r_curr);
+ GRN_B_DEC(id_curr.rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(id_curr.sid, p);
+ } else {
+ id_curr.sid = 1;
+ }
+ }
+ } else if (u->sid == id_curr.sid) {
+ BUFFER_REC_DEL(r_curr);
+ step = r_curr->step;
+ }
+ }
+ rnew->step = step;
+ rnew->jump = check_jump(ctx, ii, b, rnew, jump) ? 0 : jump;
+ // smb_wmb();
+ *lastp = pos;
+ break;
+ }
+
+ if (reset) {
+ r_start = r_curr;
+ id_start.rid = id_curr.rid;
+ id_start.sid = id_curr.sid;
+ if (!(delta0 = u->rid - id_start.rid)) { delta0 = u->sid - id_start.sid; }
+ nhops = 0;
+ vhops = 1;
+ vdelta = delta0 >> 1;
+ } else {
+ if (!(delta = id_curr.rid - id_start.rid)) {
+ delta = id_curr.sid - id_start.sid;
+ }
+ if (vdelta < delta) {
+ vdelta += (delta0 >> ++vhops);
+ r_start = r_curr;
+ }
+ if (nhops > vhops) {
+ set_jump_r(ctx, ii, b, r_start, *lastp);
+ } else {
+ nhops++;
+ }
+ }
+
+ last = *lastp;
+ lastp = &r_curr->step;
+ reset = 0;
+ {
+ uint16_t posj = r_curr->jump;
+ if (posj > 1) {
+ buffer_rec *rj = BUFFER_REC_AT(b, posj);
+ if (!BUFFER_REC_DELETED(rj)) {
+ docid idj;
+ p = GRN_NEXT_ADDR(rj);
+ GRN_B_DEC(idj.rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(idj.sid, p);
+ } else {
+ idj.sid = 1;
+ }
+ if (idj.rid < u->rid || (idj.rid == u->rid && idj.sid < u->sid)) {
+ last = posj;
+ lastp = &rj->step;
+ } else {
+ reset = 1;
+ }
+ }
+ }
+ }
+ }
+ return ctx->rc;
+}
+
+/* array */
+
+inline static uint32_t *
+array_at(grn_ctx *ctx, grn_ii *ii, uint32_t id)
+{
+ byte *p = NULL;
+ uint32_t seg, pseg;
+ if (id > GRN_ID_MAX) { return NULL; }
+ seg = id >> W_ARRAY;
+ if ((pseg = ii->header->ainfo[seg]) == GRN_II_PSEG_NOT_ASSIGNED) {
+ return NULL;
+ }
+ GRN_IO_SEG_REF(ii->seg, pseg, p);
+ if (!p) { return NULL; }
+ return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT);
+}
+
+inline static uint32_t *
+array_get(grn_ctx *ctx, grn_ii *ii, uint32_t id)
+{
+ byte *p = NULL;
+ uint16_t seg;
+ uint32_t pseg;
+ if (id > GRN_ID_MAX) { return NULL; }
+ seg = id >> W_ARRAY;
+ if ((pseg = ii->header->ainfo[seg]) == GRN_II_PSEG_NOT_ASSIGNED) {
+ if (segment_get_clear(ctx, ii, &pseg)) { return NULL; }
+ ii->header->ainfo[seg] = pseg;
+ if (seg >= ii->header->amax) { ii->header->amax = seg + 1; }
+ }
+ GRN_IO_SEG_REF(ii->seg, pseg, p);
+ if (!p) { return NULL; }
+ return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT);
+}
+
+inline static void
+array_unref(grn_ii *ii, uint32_t id)
+{
+ GRN_IO_SEG_UNREF(ii->seg, ii->header->ainfo[id >> W_ARRAY]);
+}
+
+/* updspec */
+
+grn_ii_updspec *
+grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid)
+{
+ grn_ii_updspec *u;
+ if (!(u = GRN_MALLOC(sizeof(grn_ii_updspec)))) { return NULL; }
+ u->rid = rid;
+ u->sid = sid;
+ u->weight = 0;
+ u->tf = 0;
+ u->atf = 0;
+ u->pos = NULL;
+ u->tail = NULL;
+ // u->vnodes = NULL;
+ return u;
+}
+
+#define GRN_II_MAX_TF 0x1ffff
+
+grn_rc
+grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight)
+{
+ struct _grn_ii_pos *p;
+ u->atf++;
+ if (u->tf >= GRN_II_MAX_TF) { return GRN_SUCCESS; }
+ if (!(p = GRN_MALLOC(sizeof(struct _grn_ii_pos)))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ u->weight += weight;
+ p->pos = pos;
+ p->next = NULL;
+ if (u->tail) {
+ u->tail->next = p;
+ } else {
+ u->pos = p;
+ }
+ u->tail = p;
+ u->tf++;
+ return GRN_SUCCESS;
+}
+
+int
+grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b)
+{
+ struct _grn_ii_pos *pa, *pb;
+ if (a->rid != b->rid) { return a->rid - b->rid; }
+ if (a->sid != b->sid) { return a->sid - b->sid; }
+ if (a->weight != b->weight) { return a->weight - b->weight; }
+ if (a->tf != b->tf) { return a->tf - b->tf; }
+ for (pa = a->pos, pb = b->pos; pa && pb; pa = pa->next, pb = pb->next) {
+ if (pa->pos != pb->pos) { return pa->pos - pb->pos; }
+ }
+ if (pa) { return 1; }
+ if (pb) { return -1; }
+ return 0;
+}
+
+grn_rc
+grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u)
+{
+ struct _grn_ii_pos *p = u->pos, *q;
+ while (p) {
+ q = p->next;
+ GRN_FREE(p);
+ p = q;
+ }
+ GRN_FREE(u);
+ return GRN_SUCCESS;
+}
+
+inline static uint8_t *
+encode_rec(grn_ctx *ctx, grn_ii *ii, grn_ii_updspec *u, unsigned int *size, int deletep)
+{
+ uint8_t *br, *p;
+ struct _grn_ii_pos *pp;
+ uint32_t lpos, tf, weight;
+ if (deletep) {
+ tf = 0;
+ weight = 0;
+ } else {
+ tf = u->tf;
+ weight = u->weight;
+ }
+ if (!(br = GRN_MALLOC((tf + 4) * 5))) {
+ return NULL;
+ }
+ p = br;
+ GRN_B_ENC(u->rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_ENC(u->sid, p);
+ } else {
+ u->sid = 1;
+ }
+ GRN_B_ENC(tf, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_ENC(weight, p); }
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ for (lpos = 0, pp = u->pos; pp && tf--; lpos = pp->pos, pp = pp->next) {
+ GRN_B_ENC(pp->pos - lpos, p);
+ }
+ }
+ while (((intptr_t)p & 0x03)) { *p++ = 0; }
+ *size = (unsigned int) ((p - br) + sizeof(buffer_rec));
+ return br;
+}
+
+typedef struct {
+ grn_ii *ii;
+ grn_hash *h;
+} lexicon_deletable_arg;
+
+#ifdef CASCADE_DELETE_LEXICON
+static int
+lexicon_deletable(grn_ctx *ctx, grn_obj *lexicon, grn_id tid, void *arg)
+{
+ uint32_t *a;
+ grn_hash *h = ((lexicon_deletable_arg *)arg)->h;
+ grn_ii *ii = ((lexicon_deletable_arg *)arg)->ii;
+ if (!h) { return 0; }
+ if ((a = array_at(ctx, ii, tid))) {
+ if (a[0]) {
+ array_unref(ii, tid);
+ return 0;
+ }
+ array_unref(ii, tid);
+ }
+ {
+ grn_ii_updspec **u;
+ if (!grn_hash_get(ctx, h, &tid, sizeof(grn_id), (void **) &u)) {
+ return (ERRP(ctx, GRN_ERROR)) ? 0 : 1;
+ }
+ if (!(*u)->tf || !(*u)->sid) { return 1; }
+ return 0;
+ }
+}
+#endif /* CASCADE_DELETE_LEXICON */
+
+inline static void
+lexicon_delete(grn_ctx *ctx, grn_ii *ii, uint32_t tid, grn_hash *h)
+{
+#ifdef CASCADE_DELETE_LEXICON
+ lexicon_deletable_arg arg = {ii, h};
+ grn_table_delete_optarg optarg = {0, lexicon_deletable, &arg};
+ _grn_table_delete_by_id(ctx, ii->lexicon, tid, &optarg);
+#endif /* CASCADE_DELETE_LEXICON */
+}
+
+typedef struct {
+ grn_id rid;
+ uint32_t sid;
+ uint32_t tf;
+ uint32_t weight;
+ uint32_t flags;
+} docinfo;
+
+#define GETNEXTC() do {\
+ if (sdf) {\
+ uint32_t dgap = *srp++;\
+ cid.rid += dgap;\
+ if (dgap) { cid.sid = 0; }\
+ snp += cid.tf;\
+ cid.tf = 1 + *stp++;\
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { cid.weight = *sop++; }\
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
+ cid.sid += 1 + *ssp++;\
+ } else {\
+ cid.sid = 1;\
+ }\
+ sdf--;\
+ } else {\
+ cid.rid = 0;\
+ }\
+} while (0)
+
+#define PUTNEXT_(id) do {\
+ uint32_t dgap = id.rid - lid.rid;\
+ uint32_t sgap = (dgap ? id.sid : id.sid - lid.sid) - 1;\
+ *ridp++ = dgap;\
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
+ *sidp++ = sgap;\
+ }\
+ *tfp++ = id.tf - 1;\
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { *weightp++ = id.weight; }\
+ lid.rid = id.rid;\
+ lid.sid = id.sid;\
+} while (0)
+
+#define PUTNEXTC() do {\
+ if (cid.rid) {\
+ if (cid.tf) {\
+ if (lid.rid > cid.rid || (lid.rid == cid.rid && lid.sid >= cid.sid)) {\
+ DEFINE_NAME(ii);\
+ CRIT(GRN_FILE_CORRUPT,\
+ "[ii][broken] posting in list is larger than posting in chunk: "\
+ "<%.*s>: (%d:%d) -> (%d:%d)",\
+ name_size, name, lid.rid, lid.sid, cid.rid, cid.sid);\
+ break;\
+ }\
+ PUTNEXT_(cid);\
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\
+ uint32_t i;\
+ for (i = 0; i < cid.tf; i++) {\
+ *posp++ = snp[i];\
+ spos += snp[i];\
+ }\
+ }\
+ } else {\
+ DEFINE_NAME(ii);\
+ CRIT(GRN_FILE_CORRUPT,\
+ "[ii][broken] invalid posting in chunk: <%.*s>: (%d,%d)",\
+ name_size, name, bt->tid, cid.rid);\
+ break;\
+ }\
+ }\
+ GETNEXTC();\
+} while (0)
+
+#define GETNEXTB() do {\
+ if (nextb) {\
+ uint32_t lrid = bid.rid, lsid = bid.sid;\
+ buffer_rec *br = BUFFER_REC_AT(sb, nextb);\
+ sbp = GRN_NEXT_ADDR(br);\
+ GRN_B_DEC(bid.rid, sbp);\
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
+ GRN_B_DEC(bid.sid, sbp);\
+ } else {\
+ bid.sid = 1;\
+ }\
+ if (lrid > bid.rid || (lrid == bid.rid && lsid >= bid.sid)) {\
+ DEFINE_NAME(ii);\
+ CRIT(GRN_FILE_CORRUPT,\
+ "[ii][broken] postings in block aren't sorted: "\
+ "<%.*s>: (%d:%d) -> (%d:%d)",\
+ name_size, name, lrid, lsid, bid.rid, bid.sid);\
+ break;\
+ }\
+ nextb = br->step;\
+ } else {\
+ bid.rid = 0;\
+ }\
+} while (0)
+
+#define PUTNEXTB() do {\
+ if (bid.rid && bid.sid) {\
+ GRN_B_DEC(bid.tf, sbp);\
+ if (bid.tf > 0) {\
+ if (lid.rid > bid.rid || (lid.rid == bid.rid && lid.sid >= bid.sid)) {\
+ DEFINE_NAME(ii);\
+ CRIT(GRN_FILE_CORRUPT,\
+ "[ii][broken] posting in list is larger than posting in buffer: "\
+ "<%.*s>: (%d:%d) -> (%d:%d)",\
+ name_size, name, lid.rid, lid.sid, bid.rid, bid.sid);\
+ break;\
+ }\
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {\
+ GRN_B_DEC(bid.weight, sbp);\
+ }\
+ PUTNEXT_(bid);\
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\
+ while (bid.tf--) { GRN_B_DEC(*posp, sbp); spos += *posp++; }\
+ }\
+ }\
+ }\
+ GETNEXTB();\
+} while (0)
+
+#define MERGE_BC(cond) do {\
+ if (bid.rid) {\
+ if (cid.rid) {\
+ if (cid.rid < bid.rid) {\
+ PUTNEXTC();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ } else {\
+ if (bid.rid < cid.rid) {\
+ PUTNEXTB();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ } else {\
+ if (bid.sid) {\
+ if (cid.sid < bid.sid) {\
+ PUTNEXTC();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ } else {\
+ if (bid.sid == cid.sid) { GETNEXTC(); }\
+ PUTNEXTB();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ }\
+ } else {\
+ GETNEXTC();\
+ }\
+ }\
+ }\
+ } else {\
+ PUTNEXTB();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ }\
+ } else {\
+ if (cid.rid) {\
+ PUTNEXTC();\
+ if (ctx->rc != GRN_SUCCESS) { break; }\
+ } else {\
+ break;\
+ }\
+ }\
+} while (cond)
+
+typedef struct {
+ uint32_t segno;
+ uint32_t size;
+ uint32_t dgap;
+} chunk_info;
+
+static grn_rc
+chunk_flush(grn_ctx *ctx, grn_ii *ii, chunk_info *cinfo, uint8_t *enc, uint32_t encsize)
+{
+ uint8_t *dc;
+ uint32_t dcn;
+ grn_io_win dw;
+ if (encsize) {
+ chunk_new(ctx, ii, &dcn, encsize);
+ if (ctx->rc == GRN_SUCCESS) {
+ if ((dc = WIN_MAP(ii->chunk, ctx, &dw, dcn, 0, encsize, grn_io_wronly))) {
+ grn_memcpy(dc, enc, encsize);
+ grn_io_win_unmap(&dw);
+ cinfo->segno = dcn;
+ cinfo->size = encsize;
+ } else {
+ chunk_free(ctx, ii, dcn, 0, encsize);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][flush] failed to allocate a destination chunk: "
+ "<%.*s> :"
+ "segment:<%u>, size:<%u>",
+ name_size, name,
+ dcn, encsize);
+ }
+ }
+ }
+ } else {
+ cinfo->segno = 0;
+ cinfo->size = 0;
+ }
+ return ctx->rc;
+}
+
+static grn_rc
+chunk_merge(grn_ctx *ctx, grn_ii *ii, buffer *sb, buffer_term *bt,
+ chunk_info *cinfo, grn_id rid, datavec *dv,
+ uint16_t *nextbp, uint8_t **sbpp, docinfo *bidp, int32_t *balance)
+{
+ grn_io_win sw;
+ uint64_t spos = 0;
+ uint32_t segno = cinfo->segno, size = cinfo->size, sdf = 0, ndf = 0;
+ uint32_t *ridp = NULL, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL;
+ docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = *bidp;
+ uint8_t *scp = WIN_MAP(ii->chunk, ctx, &sw, segno, 0, size, grn_io_rdonly);
+
+ if (scp) {
+ uint16_t nextb = *nextbp;
+ uint32_t snn = 0, *srp, *ssp = NULL, *stp, *sop = NULL, *snp;
+ uint8_t *sbp = *sbpp;
+ datavec rdv[MAX_N_ELEMENTS + 1];
+ size_t bufsize = S_SEGMENT * ii->n_elements;
+ datavec_init(ctx, rdv, ii->n_elements, 0, 0);
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ rdv[ii->n_elements - 1].flags = ODD;
+ }
+ bufsize += grn_p_decv(ctx, scp, cinfo->size, rdv, ii->n_elements);
+ // (df in chunk list) = a[1] - sdf;
+ {
+ int j = 0;
+ sdf = rdv[j].data_size;
+ srp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
+ stp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
+ snn = rdv[j].data_size;
+ snp = rdv[j].data;
+ }
+ datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, bufsize);
+ if (ctx->rc == GRN_SUCCESS) {
+ {
+ int j = 0;
+ ridp = dv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; }
+ tfp = dv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; }
+ posp = dv[j].data;
+ }
+ GETNEXTC();
+ MERGE_BC(bid.rid <= rid || cid.rid);
+ if (ctx->rc == GRN_SUCCESS) {
+ *sbpp = sbp;
+ *nextbp = nextb;
+ *bidp = bid;
+ GRN_ASSERT(posp < dv[ii->n_elements].data);
+ ndf = ridp - dv[0].data;
+ }
+ }
+ datavec_fin(ctx, rdv);
+ grn_io_win_unmap(&sw);
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][merge] failed to allocate a source chunk: "
+ "<%.*s> :"
+ "record:<%u>, segment:<%u>, size:<%u>",
+ name_size, name,
+ rid,
+ segno,
+ size);
+ }
+ if (ctx->rc == GRN_SUCCESS) {
+ int j = 0;
+ uint8_t *enc;
+ uint32_t encsize;
+ uint32_t np = posp - dv[ii->n_elements - 1].data;
+ uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC;
+ uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC;
+ dv[j].data_size = ndf; dv[j++].flags = f_d;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ }
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ }
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC;
+ dv[j].data_size = np; dv[j].flags = f_p|ODD;
+ }
+ if ((enc = GRN_MALLOC((ndf * 4 + np) * 2))) {
+ encsize = grn_p_encv(ctx, dv, ii->n_elements, enc);
+ chunk_flush(ctx, ii, cinfo, enc, encsize);
+ if (ctx->rc == GRN_SUCCESS) {
+ chunk_free(ctx, ii, segno, 0, size);
+ }
+ GRN_FREE(enc);
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][chunk][merge] failed to allocate a encode buffer: "
+ "<%.*s> :"
+ "record:<%u>, segment:<%u>, size:<%u>",
+ name_size, name,
+ rid,
+ segno,
+ size);
+ }
+ }
+ *balance += (ndf - sdf);
+ return ctx->rc;
+}
+
+static void
+buffer_merge_dump_datavec(grn_ctx *ctx,
+ grn_ii *ii,
+ datavec *dv,
+ datavec *rdv)
+{
+ int i, j;
+ grn_obj buffer;
+
+ GRN_TEXT_INIT(&buffer, 0);
+ for (i = 0; i < ii->n_elements; i++) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "rdv[%d] data_size=%d, flags=%d",
+ i, rdv[i].data_size, rdv[i].flags);
+ GRN_BULK_REWIND(&buffer);
+ for (j = 0; j < rdv[i].data_size;) {
+ grn_text_printf(ctx, &buffer, " %d", rdv[i].data[j]);
+ j++;
+ if (!(j % 32) || j == rdv[i].data_size) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "rdv[%d].data[%d]%.*s",
+ i, j,
+ (int)GRN_TEXT_LEN(&buffer),
+ GRN_TEXT_VALUE(&buffer));
+ GRN_BULK_REWIND(&buffer);
+ }
+ }
+ }
+
+ for (i = 0; i < ii->n_elements; i++) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "dv[%d] data_size=%d, flags=%d",
+ i, dv[i].data_size, dv[i].flags);
+ GRN_BULK_REWIND(&buffer);
+ for (j = 0; j < dv[i].data_size;) {
+ grn_text_printf(ctx, &buffer, " %d", dv[i].data[j]);
+ j++;
+ if (!(j % 32) || j == dv[i].data_size) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "dv[%d].data[%d]%.*s",
+ i, j,
+ (int)GRN_TEXT_LEN(&buffer),
+ GRN_TEXT_VALUE(&buffer));
+ GRN_BULK_REWIND(&buffer);
+ }
+ }
+ }
+
+ GRN_OBJ_FIN(ctx, &buffer);
+}
+
+/* If dc doesn't have enough space, program may be crashed.
+ * TODO: Support auto space extension or max size check.
+ */
+static grn_rc
+buffer_merge(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h,
+ buffer *sb, uint8_t *sc, buffer *db, uint8_t *dc)
+{
+ buffer_term *bt;
+ uint8_t *sbp = NULL, *dcp = dc;
+ datavec dv[MAX_N_ELEMENTS + 1];
+ datavec rdv[MAX_N_ELEMENTS + 1];
+ uint16_t n = db->header.nterms, nterms_void = 0;
+ size_t unitsize = (S_SEGMENT + sb->header.chunk_size / sb->header.nterms) * 2;
+ // size_t unitsize = (S_SEGMENT + sb->header.chunk_size) * 2 + (1<<24);
+ size_t totalsize = unitsize * ii->n_elements;
+ //todo : realloc
+ datavec_init(ctx, dv, ii->n_elements, unitsize, totalsize);
+ if (ctx->rc != GRN_SUCCESS) {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][buffer][merge] failed to initialize data vector: "
+ "<%.*s>: "
+ "unit-size:<%" GRN_FMT_SIZE ">, "
+ "total-size:<%" GRN_FMT_SIZE ">",
+ name_size, name,
+ unitsize,
+ totalsize);
+ return ctx->rc;
+ }
+ datavec_init(ctx, rdv, ii->n_elements, 0, 0);
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ rdv[ii->n_elements - 1].flags = ODD;
+ }
+ for (bt = db->terms; n; n--, bt++) {
+ uint16_t nextb;
+ uint64_t spos = 0;
+ int32_t balance = 0;
+ uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp, nchunks = 0;
+ uint32_t nvchunks = 0;
+ chunk_info *cinfo = NULL;
+ grn_id crid = GRN_ID_NIL;
+ docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = {0, 0};
+ uint32_t sdf = 0, snn = 0, ndf;
+ uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
+ if (!bt->tid) {
+ nterms_void++;
+ continue;
+ }
+ if (!bt->pos_in_buffer) {
+ GRN_ASSERT(!bt->size_in_buffer);
+ if (bt->size_in_chunk) {
+ grn_memcpy(dcp, sc + bt->pos_in_chunk, bt->size_in_chunk);
+ bt->pos_in_chunk = (uint32_t)(dcp - dc);
+ dcp += bt->size_in_chunk;
+ }
+ continue;
+ }
+ nextb = bt->pos_in_buffer;
+ GETNEXTB();
+ if (sc && bt->size_in_chunk) {
+ uint8_t *scp = sc + bt->pos_in_chunk;
+ uint8_t *sce = scp + bt->size_in_chunk;
+ size_t size = S_SEGMENT * ii->n_elements;
+ if ((bt->tid & CHUNK_SPLIT)) {
+ int i;
+ GRN_B_DEC(nchunks, scp);
+ if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
+ datavec_fin(ctx, dv);
+ datavec_fin(ctx, rdv);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][merge] failed to allocate chunk info: "
+ "<%.*s> :"
+ "segment:<%u>, "
+ "n-chunks:<%u>, "
+ "unit-size:<%" GRN_FMT_SIZE ">, "
+ "total-size:<%" GRN_FMT_SIZE ">",
+ name_size, name,
+ seg,
+ nchunks,
+ unitsize,
+ totalsize);
+ }
+ return ctx->rc;
+ }
+ for (i = 0; i < nchunks; i++) {
+ GRN_B_DEC(cinfo[i].segno, scp);
+ GRN_B_DEC(cinfo[i].size, scp);
+ GRN_B_DEC(cinfo[i].dgap, scp);
+ crid += cinfo[i].dgap;
+ if (bid.rid <= crid) {
+ chunk_merge(ctx, ii, sb, bt, &cinfo[i], crid, dv,
+ &nextb, &sbp, &bid, &balance);
+ if (ctx->rc != GRN_SUCCESS) {
+ if (cinfo) { GRN_FREE(cinfo); }
+ datavec_fin(ctx, dv);
+ datavec_fin(ctx, rdv);
+ {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][buffer][merge] failed to merge chunk: "
+ "<%.*s>: "
+ "chunk:<%u>, "
+ "n-chunks:<%u>",
+ name_size, name,
+ i,
+ nchunks);
+ }
+ return ctx->rc;
+ }
+ }
+ if (cinfo[i].size) {
+ nvchunks++;
+ } else {
+ crid -= cinfo[i].dgap;
+ cinfo[i + 1].dgap += cinfo[i].dgap;
+ }
+ }
+ }
+ if (sce > scp) {
+ size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
+ {
+ int j = 0;
+ sdf = rdv[j].data_size;
+ srp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
+ stp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
+ snn = rdv[j].data_size;
+ snp = rdv[j].data;
+ }
+ datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, size);
+ if (ctx->rc != GRN_SUCCESS) {
+ if (cinfo) { GRN_FREE(cinfo); }
+ datavec_fin(ctx, dv);
+ datavec_fin(ctx, rdv);
+ {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][buffer][merge] failed to reset data vector: "
+ "<%.*s>: "
+ "unit-size:<%" GRN_FMT_SIZE ">, "
+ "total-size:<%" GRN_FMT_SIZE ">",
+ name_size, name,
+ (size_t)(sdf + S_SEGMENT),
+ size);
+ }
+ return ctx->rc;
+ }
+ }
+ }
+ {
+ int j = 0;
+ ridp = dv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; }
+ tfp = dv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; }
+ posp = dv[j].data;
+ }
+ GETNEXTC();
+ MERGE_BC(1);
+ if (ctx->rc != GRN_SUCCESS) {
+ if (cinfo) { GRN_FREE(cinfo); }
+ datavec_fin(ctx, dv);
+ datavec_fin(ctx, rdv);
+ {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][buffer][merge] failed to merge chunk: <%.*s>",
+ name_size, name);
+ }
+ return ctx->rc;
+ }
+ GRN_ASSERT(posp < dv[ii->n_elements].data);
+ ndf = ridp - dv[0].data;
+ /*
+ {
+ grn_obj buf;
+ uint32_t rid, sid, tf, i, pos, *pp;
+ GRN_TEXT_INIT(&buf, 0);
+ rid = 0;
+ pp = dv[3].data;
+ for (i = 0; i < ndf; i++) {
+ GRN_BULK_REWIND(&buf);
+ rid += dv[0].data[i];
+ if (dv[0].data[i]) { sid = 0; }
+ sid += dv[1].data[i] + 1;
+ tf = dv[2].data[i] + 1;
+ pos = 0;
+ grn_text_itoa(ctx, &buf, rid);
+ GRN_TEXT_PUTC(ctx, &buf, ':');
+ grn_text_itoa(ctx, &buf, sid);
+ GRN_TEXT_PUTC(ctx, &buf, ':');
+ grn_text_itoa(ctx, &buf, tf);
+ GRN_TEXT_PUTC(ctx, &buf, ':');
+ while (tf--) {
+ pos += *pp++;
+ grn_text_itoa(ctx, &buf, pos);
+ if (tf) { GRN_TEXT_PUTC(ctx, &buf, ','); }
+ }
+ GRN_TEXT_PUTC(ctx, &buf, '\0');
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "Posting:%s", GRN_TEXT_VALUE(&buf));
+ }
+ GRN_OBJ_FIN(ctx, &buf);
+ }
+ */
+ {
+ grn_id tid = bt->tid & GRN_ID_MAX;
+ uint32_t *a = array_at(ctx, ii, tid);
+ if (!a) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "array_entry not found tid=%d", tid);
+ memset(bt, 0, sizeof(buffer_term));
+ nterms_void++;
+ } else {
+ if (!ndf && !nvchunks) {
+ a[0] = 0;
+ a[1] = 0;
+ lexicon_delete(ctx, ii, tid, h);
+ memset(bt, 0, sizeof(buffer_term));
+ nterms_void++;
+ } else if ((ii->header->flags & GRN_OBJ_WITH_SECTION)
+ && !nvchunks && ndf == 1 && lid.rid < 0x100000 &&
+ lid.sid < 0x800 && lid.tf == 1 && lid.weight == 0) {
+ a[0] = (lid.rid << 12) + (lid.sid << 1) + 1;
+ a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0;
+ memset(bt, 0, sizeof(buffer_term));
+ nterms_void++;
+ } else if (!(ii->header->flags & GRN_OBJ_WITH_SECTION)
+ && !nvchunks && ndf == 1 && lid.tf == 1 && lid.weight == 0) {
+ a[0] = (lid.rid << 1) + 1;
+ a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0;
+ memset(bt, 0, sizeof(buffer_term));
+ nterms_void++;
+ } else {
+ int j = 0;
+ uint8_t *dcp0;
+ uint32_t encsize;
+ uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC;
+ uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC;
+ dv[j].data_size = ndf; dv[j++].flags = f_d;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ }
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ dv[j].data_size = ndf; dv[j++].flags = f_s;
+ }
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ uint32_t np = posp - dv[ii->n_elements - 1].data;
+ uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC;
+ dv[j].data_size = np; dv[j].flags = f_p|ODD;
+ }
+ dcp0 = dcp;
+ a[1] = (bt->size_in_chunk ? a[1] : 0) + (ndf - sdf) + balance;
+ if (nvchunks) {
+ int i;
+ GRN_B_ENC(nvchunks, dcp);
+ for (i = 0; i < nchunks; i++) {
+ if (cinfo[i].size) {
+ GRN_B_ENC(cinfo[i].segno, dcp);
+ GRN_B_ENC(cinfo[i].size, dcp);
+ GRN_B_ENC(cinfo[i].dgap, dcp);
+ }
+ }
+ }
+ encsize = grn_p_encv(ctx, dv, ii->n_elements, dcp);
+
+ if (grn_logger_pass(ctx, GRN_LOG_DEBUG)) {
+ if (sb->header.chunk_size + S_SEGMENT <= (dcp - dc) + encsize) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "cs(%d)+(%d)=(%d)"
+ "<=(%" GRN_FMT_LLD ")+(%d)="
+ "(%" GRN_FMT_LLD ")",
+ sb->header.chunk_size,
+ S_SEGMENT,
+ sb->header.chunk_size + S_SEGMENT,
+ (long long int)(dcp - dc),
+ encsize,
+ (long long int)((dcp - dc) + encsize));
+ buffer_merge_dump_datavec(ctx, ii, dv, rdv);
+ }
+ }
+
+ if (encsize > CHUNK_SPLIT_THRESHOLD &&
+ (cinfo || (cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) &&
+ !chunk_flush(ctx, ii, &cinfo[nchunks], dcp, encsize)) {
+ int i;
+ cinfo[nchunks].dgap = lid.rid - crid;
+ nvchunks++;
+ dcp = dcp0;
+ GRN_B_ENC(nvchunks, dcp);
+ for (i = 0; i <= nchunks; i++) {
+ if (cinfo[i].size) {
+ GRN_B_ENC(cinfo[i].segno, dcp);
+ GRN_B_ENC(cinfo[i].size, dcp);
+ GRN_B_ENC(cinfo[i].dgap, dcp);
+ }
+ }
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "split (%d) encsize=%d", tid, encsize);
+ bt->tid |= CHUNK_SPLIT;
+ } else {
+ dcp += encsize;
+ if (!nvchunks) {
+ bt->tid &= ~CHUNK_SPLIT;
+ }
+ }
+ bt->pos_in_chunk = (uint32_t)(dcp0 - dc);
+ bt->size_in_chunk = (uint32_t)(dcp - dcp0);
+ bt->size_in_buffer = 0;
+ bt->pos_in_buffer = 0;
+ }
+ array_unref(ii, tid);
+ }
+ }
+ if (cinfo) { GRN_FREE(cinfo); }
+ }
+ datavec_fin(ctx, rdv);
+ datavec_fin(ctx, dv);
+ db->header.chunk_size = (uint32_t)(dcp - dc);
+ db->header.buffer_free =
+ S_SEGMENT - sizeof(buffer_header) - db->header.nterms * sizeof(buffer_term);
+ db->header.nterms_void = nterms_void;
+ return ctx->rc;
+}
+
+static void
+fake_map(grn_ctx *ctx, grn_io *io, grn_io_win *iw, void *addr, uint32_t seg, uint32_t size)
+{
+ iw->ctx = ctx;
+ iw->diff = 0;
+ iw->io = io;
+ iw->mode = grn_io_wronly;
+ iw->segment = ((seg) >> GRN_II_N_CHUNK_VARIATION);
+ iw->offset = (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK);
+ iw->size = size;
+ iw->cached = 0;
+ iw->addr = addr;
+}
+
+static grn_rc
+buffer_flush(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h)
+{
+ grn_io_win sw, dw;
+ buffer *sb, *db = NULL;
+ uint8_t *dc, *sc = NULL;
+ uint32_t ds, pseg, scn, dcn = 0;
+ if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ CRIT(GRN_FILE_CORRUPT,
+ "[ii][buffer][flush] invalid segment: "
+ "<%.*s> :"
+ "request:<%u>, max:<%u>",
+ name_size, name,
+ seg, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ if ((ds = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][flush] segment is full: "
+ "<%.*s> :"
+ "request:<%u>, max:<%u>",
+ name_size, name,
+ seg, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
+ if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][flush] failed to open buffer: "
+ "<%.*s> :"
+ "segment:<%u>, position:<%u>, max:<%u>",
+ name_size, name,
+ seg, SEG2POS(seg, 0), ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ {
+ GRN_IO_SEG_REF(ii->seg, ds, db);
+ if (db) {
+ uint32_t actual_chunk_size = 0;
+ uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT;
+ if ((dc = GRN_MALLOC(max_dest_chunk_size * 2))) {
+ if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED ||
+ (sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0,
+ sb->header.chunk_size, grn_io_rdonly))) {
+ uint16_t n = sb->header.nterms;
+ memset(db, 0, S_SEGMENT);
+ grn_memcpy(db->terms, sb->terms, n * sizeof(buffer_term));
+ db->header.nterms = n;
+ buffer_merge(ctx, ii, seg, h, sb, sc, db, dc);
+ if (ctx->rc == GRN_SUCCESS) {
+ actual_chunk_size = db->header.chunk_size;
+ if (actual_chunk_size > 0) {
+ chunk_new(ctx, ii, &dcn, actual_chunk_size);
+ }
+ if (ctx->rc == GRN_SUCCESS) {
+ grn_rc rc;
+ db->header.chunk =
+ actual_chunk_size ? dcn : GRN_II_PSEG_NOT_ASSIGNED;
+ fake_map(ctx, ii->chunk, &dw, dc, dcn, actual_chunk_size);
+ rc = grn_io_win_unmap(&dw);
+ if (rc == GRN_SUCCESS) {
+ buffer_segment_update(ii, seg, ds);
+ ii->header->total_chunk_size += actual_chunk_size;
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ chunk_free(ctx, ii, scn, 0, sb->header.chunk_size);
+ ii->header->total_chunk_size -= sb->header.chunk_size;
+ }
+ } else {
+ GRN_FREE(dc);
+ if (actual_chunk_size) {
+ chunk_free(ctx, ii, dcn, 0, actual_chunk_size);
+ }
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
+ {
+ DEFINE_NAME(ii);
+ ERR(rc,
+ "[ii][buffer][flush] failed to unmap a destination chunk: "
+ "<%.*s> : "
+ "segment:<%u>, destination-segment:<%u>, actual-size:<%u>",
+ name_size, name,
+ seg,
+ dcn,
+ actual_chunk_size);
+ }
+ }
+ } else {
+ GRN_FREE(dc);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
+ }
+ } else {
+ GRN_FREE(dc);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
+ }
+ } else {
+ GRN_FREE(dc);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][flush] failed to map a source chunk: "
+ "<%.*s> :"
+ "segment:<%u>, source-segment:<%u>, chunk-size:<%u>",
+ name_size, name,
+ seg,
+ scn,
+ sb->header.chunk_size);
+ }
+ }
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][flush] failed to allocate a destination chunk: "
+ "<%.*s> :"
+ "segment:<%u>, destination-segment:<%u>",
+ name_size, name,
+ seg,
+ ds);
+ }
+ GRN_IO_SEG_UNREF(ii->seg, ds);
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][flush] failed to allocate a destination segment: "
+ "<%.*s> :"
+ "segment:<%u>, destination-segment:<%u>",
+ name_size, name,
+ seg,
+ ds);
+ }
+ buffer_close(ctx, ii, pseg);
+ }
+ return ctx->rc;
+}
+
+void
+grn_ii_buffer_check(grn_ctx *ctx, grn_ii *ii, uint32_t seg)
+{
+ grn_io_win sw;
+ buffer *sb;
+ uint8_t *sc = NULL;
+ uint32_t pseg, scn, nterms_with_corrupt_chunk = 0, nterm_with_chunk = 0;
+ uint32_t ndeleted_terms_with_value = 0;
+ buffer_term *bt;
+ uint8_t *sbp = NULL;
+ datavec rdv[MAX_N_ELEMENTS + 1];
+ uint16_t n;
+ int nterms_void = 0;
+ int size_in_buffer = 0;
+ grn_obj buf;
+ size_t lower_bound;
+ int64_t nloops = 0, nviolations = 0;
+ if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_OUTPUT_BOOL(GRN_FALSE);
+ return;
+ }
+ pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
+ if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_OUTPUT_BOOL(GRN_FALSE);
+ return;
+ }
+ lower_bound =
+ (sb->header.buffer_free + sizeof(buffer_term) * sb->header.nterms)
+ / sizeof(buffer_rec);
+ datavec_init(ctx, rdv, ii->n_elements, 0, 0);
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ rdv[ii->n_elements - 1].flags = ODD;
+ }
+ GRN_OUTPUT_MAP_OPEN("BUFFER", -1);
+ GRN_OUTPUT_CSTR("buffer id");
+ GRN_OUTPUT_INT64(seg);
+ if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_OUTPUT_CSTR("void chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ } else {
+ if ((sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size,
+ grn_io_rdonly))) {
+ GRN_OUTPUT_CSTR("chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ } else {
+ GRN_OUTPUT_CSTR("unmappable chunk size");
+ GRN_OUTPUT_INT64(sb->header.chunk_size);
+ }
+ }
+ GRN_OUTPUT_CSTR("buffer term");
+ GRN_OUTPUT_ARRAY_OPEN("TERMS", sb->header.nterms);
+
+ GRN_OBJ_INIT(&buf, GRN_BULK, 0, ii->lexicon->header.domain);
+ for (bt = sb->terms, n = sb->header.nterms; n; n--, bt++) {
+ grn_id tid, tid_;
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ int key_size;
+ uint16_t nextb;
+ uint32_t nchunks = 0;
+ chunk_info *cinfo = NULL;
+ grn_id crid = GRN_ID_NIL;
+ docinfo bid = {0, 0};
+ uint32_t sdf = 0, snn = 0;
+ uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
+ if (!bt->tid && !bt->pos_in_buffer && !bt->size_in_buffer) {
+ nterms_void++;
+ continue;
+ }
+ GRN_OUTPUT_ARRAY_OPEN("TERM", -1);
+ tid = (bt->tid & GRN_ID_MAX);
+ key_size = grn_table_get_key(ctx, ii->lexicon, tid, key,
+ GRN_TABLE_MAX_KEY_SIZE);
+ tid_ = grn_table_get(ctx, ii->lexicon, key, key_size);
+ GRN_TEXT_SET(ctx, &buf, key, key_size);
+ GRN_OUTPUT_OBJ(&buf, NULL);
+ GRN_OUTPUT_INT64(bt->tid);
+ GRN_OUTPUT_INT64(tid_);
+ nextb = bt->pos_in_buffer;
+ size_in_buffer += bt->size_in_buffer;
+ if (tid != tid_ && (bt->size_in_buffer || bt->size_in_chunk)) {
+ ndeleted_terms_with_value++;
+ }
+ GETNEXTB();
+ GRN_OUTPUT_INT64(bt->size_in_buffer);
+ GRN_OUTPUT_INT64(bt->size_in_chunk);
+ if (sc && bt->size_in_chunk) {
+ uint8_t *scp = sc + bt->pos_in_chunk;
+ uint8_t *sce = scp + bt->size_in_chunk;
+ size_t size = S_SEGMENT * ii->n_elements;
+ if ((bt->tid & CHUNK_SPLIT)) {
+ int i;
+ GRN_B_DEC(nchunks, scp);
+ if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
+ datavec_fin(ctx, rdv);
+ GRN_OBJ_FIN(ctx, &buf);
+ return;
+ }
+ for (i = 0; i < nchunks; i++) {
+ GRN_B_DEC(cinfo[i].segno, scp);
+ GRN_B_DEC(cinfo[i].size, scp);
+ GRN_B_DEC(cinfo[i].dgap, scp);
+ crid += cinfo[i].dgap;
+ }
+ }
+ if (sce > scp) {
+ size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
+ {
+ int j = 0;
+ sdf = rdv[j].data_size;
+ GRN_OUTPUT_INT64(sdf);
+ srp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
+ if (sdf != rdv[j].data_size) {
+ nterms_with_corrupt_chunk++;
+ }
+ stp = rdv[j++].data;
+ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
+ GRN_OUTPUT_INT64(rdv[j].data_size);
+ snn = rdv[j].data_size;
+ snp = rdv[j].data;
+ }
+ nterm_with_chunk++;
+ }
+ }
+ {
+ uint16_t pos;
+ grn_id rid, sid, rid_ = 0, sid_ = 0;
+ uint8_t *p;
+ buffer_rec *r;
+ for (pos = bt->pos_in_buffer; pos; pos = r->step) {
+ if (pos < lower_bound) {
+ nviolations++;
+ }
+ r = BUFFER_REC_AT(sb, pos);
+ p = GRN_NEXT_ADDR(r);
+ GRN_B_DEC(rid, p);
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(sid, p);
+ } else {
+ sid = 1;
+ }
+ if (rid < rid_ || (rid == rid_ && sid < sid_)) {
+ nloops++;
+ }
+ rid_ = rid;
+ sid_ = sid;
+ }
+ }
+ GRN_OUTPUT_ARRAY_CLOSE();
+ if (cinfo) { GRN_FREE(cinfo); }
+ }
+ GRN_OBJ_FIN(ctx, &buf);
+
+ GRN_OUTPUT_ARRAY_CLOSE();
+ GRN_OUTPUT_CSTR("buffer free");
+ GRN_OUTPUT_INT64(sb->header.buffer_free);
+ GRN_OUTPUT_CSTR("size in buffer");
+ GRN_OUTPUT_INT64(size_in_buffer);
+ GRN_OUTPUT_CSTR("nterms");
+ GRN_OUTPUT_INT64(sb->header.nterms);
+ if (nterms_void != sb->header.nterms_void) {
+ GRN_OUTPUT_CSTR("nterms void gap");
+ GRN_OUTPUT_INT64(nterms_void - sb->header.nterms_void);
+ }
+ GRN_OUTPUT_CSTR("nterms with chunk");
+ GRN_OUTPUT_INT64(nterm_with_chunk);
+ if (nterms_with_corrupt_chunk) {
+ GRN_OUTPUT_CSTR("nterms with corrupt chunk");
+ GRN_OUTPUT_INT64(nterms_with_corrupt_chunk);
+ }
+ if (ndeleted_terms_with_value) {
+ GRN_OUTPUT_CSTR("number of deleted terms with value");
+ GRN_OUTPUT_INT64(ndeleted_terms_with_value);
+ }
+ if (nloops) {
+ GRN_OUTPUT_CSTR("number of loops");
+ GRN_OUTPUT_INT64(nloops);
+ }
+ if (nviolations) {
+ GRN_OUTPUT_CSTR("number of violations");
+ GRN_OUTPUT_INT64(nviolations);
+ }
+ GRN_OUTPUT_MAP_CLOSE();
+ datavec_fin(ctx, rdv);
+ if (sc) { grn_io_win_unmap(&sw); }
+ buffer_close(ctx, ii, pseg);
+}
+
+typedef struct {
+ buffer_term *bt;
+ const char *key;
+ uint32_t key_size;
+} term_sort;
+
+static int
+term_compar(const void *t1, const void *t2)
+{
+ int r;
+ const term_sort *x = (term_sort *)t1, *y = (term_sort *)t2;
+ if (x->key_size > y->key_size) {
+ r = memcmp(x->key, y->key, y->key_size);
+ return r ? r : x->key_size - y->key_size;
+ } else {
+ r = memcmp(x->key, y->key, x->key_size);
+ return r ? r : x->key_size - y->key_size;
+ }
+}
+
+static grn_rc
+term_split(grn_ctx *ctx, grn_obj *lexicon, buffer *sb, buffer *db0, buffer *db1)
+{
+ uint16_t i, n, *nt;
+ buffer_term *bt;
+ uint32_t s, th = (sb->header.chunk_size + sb->header.nterms) >> 1;
+ term_sort *ts = GRN_MALLOC(sb->header.nterms * sizeof(term_sort));
+ if (!ts) { return GRN_NO_MEMORY_AVAILABLE; }
+ for (i = 0, n = sb->header.nterms, bt = sb->terms; n; bt++, n--) {
+ if (bt->tid) {
+ grn_id tid = bt->tid & GRN_ID_MAX;
+ ts[i].key = _grn_table_key(ctx, lexicon, tid, &ts[i].key_size);
+ ts[i].bt = bt;
+ i++;
+ }
+ }
+ qsort(ts, i, sizeof(term_sort), term_compar);
+ memset(db0, 0, S_SEGMENT);
+ bt = db0->terms;
+ nt = &db0->header.nterms;
+ for (s = 0; n + 1 < i && s <= th; n++, bt++) {
+ grn_memcpy(bt, ts[n].bt, sizeof(buffer_term));
+ (*nt)++;
+ s += ts[n].bt->size_in_chunk + 1;
+ }
+ memset(db1, 0, S_SEGMENT);
+ bt = db1->terms;
+ nt = &db1->header.nterms;
+ for (; n < i; n++, bt++) {
+ grn_memcpy(bt, ts[n].bt, sizeof(buffer_term));
+ (*nt)++;
+ }
+ GRN_FREE(ts);
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "d0=%d d1=%d",
+ db0->header.nterms, db1->header.nterms);
+ return GRN_SUCCESS;
+}
+
+static void
+array_update(grn_ctx *ctx, grn_ii *ii, uint32_t dls, buffer *db)
+{
+ uint16_t n;
+ buffer_term *bt;
+ uint32_t *a, pos = SEG2POS(dls, sizeof(buffer_header));
+ for (n = db->header.nterms, bt = db->terms; n; n--, bt++) {
+ if (bt->tid) {
+ grn_id tid = bt->tid & GRN_ID_MAX;
+ if ((a = array_at(ctx, ii, tid))) {
+ a[0] = pos;
+ array_unref(ii, tid);
+ } else {
+ GRN_LOG(ctx, GRN_LOG_WARNING, "array_at failed (%d)", tid);
+ }
+ }
+ pos += sizeof(buffer_term) >> 2;
+ }
+}
+
+static grn_rc
+buffer_split(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h)
+{
+ grn_io_win sw, dw0, dw1;
+ buffer *sb, *db0 = NULL, *db1 = NULL;
+ uint8_t *sc = NULL, *dc0, *dc1;
+ uint32_t dps0 = 0, dps1 = 0, dls0 = 0, dls1 = 0, sps, scn, dcn0 = 0, dcn1 = 0;
+ if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ CRIT(GRN_FILE_CORRUPT,
+ "[ii][buffer][split] invalid segment: "
+ "<%.*s> :"
+ "request:<%u>, max:<%u>",
+ name_size, name,
+ seg, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ buffer_segment_reserve(ctx, ii, &dls0, &dps0, &dls1, &dps1);
+ if (ctx->rc != GRN_SUCCESS) {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][buffer][split] failed to reserve buffer segments: "
+ "<%.*s> :"
+ "request:<%u>, max:<%u>",
+ name_size, name,
+ seg, ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ sps = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
+ if (sps == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] failed to open buffer: "
+ "<%.*s> :"
+ "segment:<%u>, position:<%u>, max-segment:<%u>",
+ name_size, name,
+ seg, SEG2POS(seg, 0), ii->seg->header->max_segment);
+ } else {
+ GRN_IO_SEG_REF(ii->seg, dps0, db0);
+ if (db0) {
+ GRN_IO_SEG_REF(ii->seg, dps1, db1);
+ if (db1) {
+ uint32_t actual_db0_chunk_size = 0;
+ uint32_t actual_db1_chunk_size = 0;
+ uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT;
+ if ((dc0 = GRN_MALLOC(max_dest_chunk_size * 2))) {
+ if ((dc1 = GRN_MALLOC(max_dest_chunk_size * 2))) {
+ if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED ||
+ (sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0,
+ sb->header.chunk_size, grn_io_rdonly))) {
+ term_split(ctx, ii->lexicon, sb, db0, db1);
+ buffer_merge(ctx, ii, seg, h, sb, sc, db0, dc0);
+ if (ctx->rc == GRN_SUCCESS) {
+ actual_db0_chunk_size = db0->header.chunk_size;
+ if (actual_db0_chunk_size > 0) {
+ chunk_new(ctx, ii, &dcn0, actual_db0_chunk_size);
+ }
+ if (ctx->rc == GRN_SUCCESS) {
+ grn_rc rc;
+ db0->header.chunk =
+ actual_db0_chunk_size ? dcn0 : GRN_II_PSEG_NOT_ASSIGNED;
+ fake_map(ctx, ii->chunk, &dw0, dc0, dcn0, actual_db0_chunk_size);
+ rc = grn_io_win_unmap(&dw0);
+ if (rc == GRN_SUCCESS) {
+ buffer_merge(ctx, ii, seg, h, sb, sc, db1, dc1);
+ if (ctx->rc == GRN_SUCCESS) {
+ actual_db1_chunk_size = db1->header.chunk_size;
+ if (actual_db1_chunk_size > 0) {
+ chunk_new(ctx, ii, &dcn1, actual_db1_chunk_size);
+ }
+ if (ctx->rc == GRN_SUCCESS) {
+ fake_map(ctx, ii->chunk, &dw1, dc1, dcn1,
+ actual_db1_chunk_size);
+ rc = grn_io_win_unmap(&dw1);
+ if (rc == GRN_SUCCESS) {
+ db1->header.chunk =
+ actual_db1_chunk_size ? dcn1 : GRN_II_PSEG_NOT_ASSIGNED;
+ buffer_segment_update(ii, dls0, dps0);
+ buffer_segment_update(ii, dls1, dps1);
+ array_update(ctx, ii, dls0, db0);
+ array_update(ctx, ii, dls1, db1);
+ buffer_segment_clear(ii, seg);
+ ii->header->total_chunk_size += actual_db0_chunk_size;
+ ii->header->total_chunk_size += actual_db1_chunk_size;
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ chunk_free(ctx, ii, scn, 0, sb->header.chunk_size);
+ ii->header->total_chunk_size -= sb->header.chunk_size;
+ }
+ } else {
+ if (actual_db1_chunk_size) {
+ chunk_free(ctx, ii, dcn1, 0, actual_db1_chunk_size);
+ }
+ if (actual_db0_chunk_size) {
+ chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
+ }
+ GRN_FREE(dc1);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ }
+ {
+ DEFINE_NAME(ii);
+ ERR(rc,
+ "[ii][buffer[merge] "
+ "failed to unmap a destination chunk2: "
+ "<%.*s> :"
+ "segment:<%u>, "
+ "destination-chunk1:<%u>, "
+ "destination-chunk2:<%u>, "
+ "actual-size1:<%u>, "
+ "actual-size2:<%u>",
+ name_size, name,
+ seg,
+ dcn0,
+ dcn1,
+ actual_db0_chunk_size,
+ actual_db1_chunk_size);
+ }
+ }
+ } else {
+ if (actual_db0_chunk_size) {
+ chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
+ }
+ GRN_FREE(dc1);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ }
+ }
+ } else {
+ if (actual_db0_chunk_size) {
+ chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
+ }
+ GRN_FREE(dc1);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ }
+ }
+ } else {
+ if (actual_db0_chunk_size) {
+ chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
+ }
+ GRN_FREE(dc1);
+ GRN_FREE(dc0);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win_unmap(&sw);
+ }
+ {
+ DEFINE_NAME(ii);
+ ERR(rc,
+ "[ii][buffer[merge] "
+ "failed to unmap a destination chunk1: "
+ "<%.*s> :"
+ "segment:<%u>, "
+ "destination-chunk1:<%u>, "
+ "actual-size1:<%u>",
+ name_size, name,
+ seg,
+ dcn0,
+ actual_db0_chunk_size);
+ }
+ }
+ } else {
+ GRN_FREE(dc1);
+ GRN_FREE(dc0);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
+ }
+ } else {
+ GRN_FREE(dc1);
+ GRN_FREE(dc0);
+ if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
+ }
+ } else {
+ GRN_FREE(dc1);
+ GRN_FREE(dc0);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] failed to map a source chunk: "
+ "<%.*s> :"
+ "segment:<%u>, "
+ "source-segment:<%u>, "
+ "chunk-size:<%u>",
+ name_size, name,
+ seg,
+ scn,
+ sb->header.chunk_size);
+ }
+ }
+ } else {
+ GRN_FREE(dc0);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] "
+ "failed to allocate a destination chunk2: "
+ "<%.*s> :"
+ "segment:<%u>, "
+ "destination-segment1:<%u>, "
+ "destination-segment2:<%u>",
+ name_size, name,
+ seg,
+ dps0,
+ dps1);
+ }
+ }
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] failed to allocate a destination chunk1: "
+ "<%.*s>: "
+ "segment:<%u>, "
+ "destination-segment1:<%u>, "
+ "destination-segment2:<%u>",
+ name_size, name,
+ seg,
+ dps0,
+ dps1);
+ }
+ GRN_IO_SEG_UNREF(ii->seg, dps1);
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] failed to allocate a destination segment2: "
+ "<%.*s>: "
+ "segment:<%u>, "
+ "destination-segment1:<%u>, "
+ "destination-segment2:<%u>",
+ name_size, name,
+ seg,
+ dps0,
+ dps1);
+ }
+ GRN_IO_SEG_UNREF(ii->seg, dps0);
+ } else {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][split] failed to allocate a destination segment1: "
+ "<%.*s>: "
+ "segment:<%u>, "
+ "destination-segment1:<%u>, "
+ "destination-segment2:<%u>",
+ name_size, name,
+ seg,
+ dps0,
+ dps1);
+ }
+ buffer_close(ctx, ii, sps);
+ }
+ return ctx->rc;
+}
+
+#define SCALE_FACTOR 2048
+#define MAX_NTERMS 8192
+#define SPLIT_COND(ii, buffer)\
+ ((buffer)->header.nterms > 1024 ||\
+ ((buffer)->header.nterms > 1 &&\
+ (buffer)->header.chunk_size * 100 > (ii)->header->total_chunk_size))
+
+inline static void
+buffer_new_find_segment(grn_ctx *ctx,
+ grn_ii *ii,
+ int size,
+ grn_id tid,
+ grn_hash *h,
+ buffer **b,
+ uint32_t *lseg,
+ uint32_t *pseg)
+{
+ uint32_t *a;
+
+ a = array_at(ctx, ii, tid);
+ if (!a) {
+ return;
+ }
+
+ for (;;) {
+ uint32_t pos = a[0];
+ if (!pos || (pos & 1)) { break; }
+ *pseg = buffer_open(ctx, ii, pos, NULL, b);
+ if (*pseg == GRN_II_PSEG_NOT_ASSIGNED) { break; }
+ if ((*b)->header.buffer_free >= size + sizeof(buffer_term)) {
+ *lseg = LSEG(pos);
+ break;
+ }
+ buffer_close(ctx, ii, *pseg);
+ if (SPLIT_COND(ii, (*b))) {
+ /* ((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax -
+ (*b)->header.nterms * sizeof(buffer_term)) * 4 <
+ (*b)->header.chunk_size) */
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "nterms=%d chunk=%d total=%" GRN_FMT_INT64U,
+ (*b)->header.nterms,
+ (*b)->header.chunk_size,
+ ii->header->total_chunk_size >> 10);
+ if (buffer_split(ctx, ii, LSEG(pos), h)) { break; }
+ } else {
+ if (S_SEGMENT - sizeof(buffer_header)
+ - (*b)->header.nterms * sizeof(buffer_term)
+ < size + sizeof(buffer_term)) {
+ break;
+ }
+ if (buffer_flush(ctx, ii, LSEG(pos), h)) { break; }
+ }
+ }
+
+ array_unref(ii, tid);
+}
+
+inline static void
+buffer_new_lexicon_pat(grn_ctx *ctx,
+ grn_ii *ii,
+ int size,
+ grn_id id,
+ grn_hash *h,
+ buffer **b,
+ uint32_t *lseg,
+ uint32_t *pseg)
+{
+ grn_pat_cursor *cursor;
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ int key_size;
+
+ key_size = grn_table_get_key(ctx, ii->lexicon, id, key,
+ GRN_TABLE_MAX_KEY_SIZE);
+ if (ii->lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
+ grn_obj *tokenizer = NULL;
+
+ grn_table_get_info(ctx, ii->lexicon, NULL, NULL, &tokenizer, NULL, NULL);
+ if (tokenizer) {
+ /* For natural language */
+ cursor = grn_pat_cursor_open(ctx,
+ (grn_pat *)(ii->lexicon),
+ key,
+ key_size,
+ NULL,
+ 0,
+ 0,
+ -1,
+ GRN_CURSOR_ASCENDING|GRN_CURSOR_GT);
+ if (cursor) {
+ grn_id tid;
+ while (ctx->rc == GRN_SUCCESS &&
+ *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
+ (tid = grn_pat_cursor_next(ctx, cursor))) {
+ buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
+ }
+ grn_pat_cursor_close(ctx, cursor);
+ }
+ } else {
+ /* For text data */
+ int target_key_size = key_size;
+ int reduced_key_size = 0;
+
+ while (*lseg == GRN_II_PSEG_NOT_ASSIGNED && target_key_size > 0) {
+ grn_id tid;
+
+ cursor = grn_pat_cursor_open(ctx,
+ (grn_pat *)(ii->lexicon),
+ key, target_key_size,
+ NULL, 0, 0, -1,
+ GRN_CURSOR_PREFIX);
+ if (!cursor) {
+ break;
+ }
+
+ if (reduced_key_size == 0) {
+ while (ctx->rc == GRN_SUCCESS &&
+ *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
+ (tid = grn_pat_cursor_next(ctx, cursor))) {
+ buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
+ }
+ } else {
+ while (ctx->rc == GRN_SUCCESS &&
+ *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
+ (tid = grn_pat_cursor_next(ctx, cursor))) {
+ void *current_key;
+ int current_key_size;
+
+ current_key_size = grn_pat_cursor_get_key(ctx, cursor, &current_key);
+ if (memcmp(((char *)current_key) + target_key_size,
+ key + target_key_size,
+ reduced_key_size) == 0) {
+ continue;
+ }
+ buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
+ }
+ }
+ grn_pat_cursor_close(ctx, cursor);
+
+ if (reduced_key_size == 0) {
+ reduced_key_size = 1;
+ } else {
+ reduced_key_size *= 2;
+ }
+ target_key_size -= reduced_key_size;
+ }
+ }
+ } else {
+ /* For other data */
+ cursor = grn_pat_cursor_open(ctx,
+ (grn_pat *)(ii->lexicon),
+ NULL, 0, key, key_size, 0, -1,
+ GRN_CURSOR_PREFIX);
+ if (cursor) {
+ grn_id tid;
+ while (ctx->rc == GRN_SUCCESS &&
+ *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
+ (tid = grn_pat_cursor_next(ctx, cursor))) {
+ buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
+ }
+ grn_pat_cursor_close(ctx, cursor);
+ }
+ }
+}
+
+inline static void
+buffer_new_lexicon_other(grn_ctx *ctx,
+ grn_ii *ii,
+ int size,
+ grn_id id,
+ grn_hash *h,
+ buffer **b,
+ uint32_t *lseg,
+ uint32_t *pseg)
+{
+ GRN_TABLE_EACH_BEGIN(ctx, ii->lexicon, cursor, tid) {
+ if (ctx->rc != GRN_SUCCESS || *lseg != GRN_II_PSEG_NOT_ASSIGNED) {
+ break;
+ }
+ buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
+ } GRN_TABLE_EACH_END(ctx, cursor);
+}
+
+
+inline static uint32_t
+buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos,
+ buffer_term **bt, buffer_rec **br, buffer **bp, grn_id id, grn_hash *h)
+{
+ buffer *b = NULL;
+ uint16_t offset;
+ uint32_t lseg = GRN_II_PSEG_NOT_ASSIGNED, pseg = GRN_II_PSEG_NOT_ASSIGNED;
+ if (S_SEGMENT - sizeof(buffer_header) < size + sizeof(buffer_term)) {
+ DEFINE_NAME(ii);
+ MERR("[ii][buffer][new] requested size is too large: "
+ "<%.*s> :"
+ "requested:<%" GRN_FMT_SIZE ">, max:<%" GRN_FMT_SIZE ">",
+ name_size, name,
+ (size_t)(size + sizeof(buffer_term)),
+ (size_t)(S_SEGMENT - sizeof(buffer_header)));
+ return GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ if (ii->lexicon->header.type == GRN_TABLE_PAT_KEY) {
+ buffer_new_lexicon_pat(ctx, ii, size, id, h, &b, &lseg, &pseg);
+ } else {
+ buffer_new_lexicon_other(ctx, ii, size, id, h, &b, &lseg, &pseg);
+ }
+ if (lseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ if (buffer_segment_new(ctx, ii, &lseg) ||
+ (pseg = buffer_open(ctx, ii, SEG2POS(lseg, 0), NULL, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ return GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ memset(b, 0, S_SEGMENT);
+ b->header.buffer_free = S_SEGMENT - sizeof(buffer_header);
+ b->header.chunk = GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ if (b->header.nterms_void) {
+ for (offset = 0; offset < b->header.nterms; offset++) {
+ if (!b->terms[offset].tid) { break; }
+ }
+ if (offset == b->header.nterms) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "inconsistent buffer(%d)", lseg);
+ b->header.nterms_void = 0;
+ b->header.nterms++;
+ b->header.buffer_free -= size + sizeof(buffer_term);
+ } else {
+ b->header.nterms_void--;
+ b->header.buffer_free -= size;
+ }
+ } else {
+ offset = b->header.nterms++;
+ b->header.buffer_free -= size + sizeof(buffer_term);
+ }
+ *pos = SEG2POS(lseg, (sizeof(buffer_header) + sizeof(buffer_term) * offset));
+ *bt = &b->terms[offset];
+ *br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free);
+ *bp = b;
+ return pseg;
+}
+
+/* ii */
+
+static grn_ii *
+_grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uint32_t flags)
+{
+ int i;
+ uint32_t max_n_segments;
+ uint32_t max_n_chunks;
+ grn_io *seg, *chunk;
+ char path2[PATH_MAX];
+ struct grn_ii_header *header;
+ grn_table_flags lflags;
+ grn_encoding encoding;
+ grn_obj *tokenizer;
+ /*
+ for (i = 0; i < 32; i++) {
+ new_histogram[i] = 0;
+ free_histogram[i] = 0;
+ }
+ */
+ if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer,
+ NULL, NULL)) {
+ return NULL;
+ }
+ if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; }
+
+ if (flags & GRN_OBJ_INDEX_SMALL) {
+ max_n_segments = grn_ii_max_n_segments_small;
+ max_n_chunks = grn_ii_max_n_chunks_small;
+ } else if (flags & GRN_OBJ_INDEX_MEDIUM) {
+ max_n_segments = MAX_PSEG_MEDIUM;
+ max_n_chunks = GRN_II_MAX_CHUNK_MEDIUM;
+ } else {
+ max_n_segments = MAX_PSEG;
+ max_n_chunks = GRN_II_MAX_CHUNK;
+ }
+
+ seg = grn_io_create(ctx,
+ path,
+ sizeof(struct grn_ii_header),
+ S_SEGMENT,
+ max_n_segments,
+ grn_io_auto,
+ GRN_IO_EXPIRE_SEGMENT);
+ if (!seg) { return NULL; }
+ if (path) {
+ grn_strcpy(path2, PATH_MAX, path);
+ grn_strcat(path2, PATH_MAX, ".c");
+ chunk = grn_io_create(ctx, path2, 0, S_CHUNK, max_n_chunks, grn_io_auto,
+ GRN_IO_EXPIRE_SEGMENT);
+ } else {
+ chunk = grn_io_create(ctx, NULL, 0, S_CHUNK, max_n_chunks, grn_io_auto, 0);
+ }
+ if (!chunk) {
+ grn_io_close(ctx, seg);
+ grn_io_remove(ctx, path);
+ return NULL;
+ }
+ header = grn_io_header(seg);
+ grn_io_set_type(seg, GRN_COLUMN_INDEX);
+ for (i = 0; i < GRN_II_MAX_LSEG; i++) {
+ header->ainfo[i] = GRN_II_PSEG_NOT_ASSIGNED;
+ header->binfo[i] = GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ for (i = 0; i <= GRN_II_N_CHUNK_VARIATION; i++) {
+ header->free_chunks[i] = GRN_II_PSEG_NOT_ASSIGNED;
+ header->garbages[i] = GRN_II_PSEG_NOT_ASSIGNED;
+ }
+ header->flags = flags;
+ ii->seg = seg;
+ ii->chunk = chunk;
+ ii->lexicon = lexicon;
+ ii->lflags = lflags;
+ ii->encoding = encoding;
+ ii->header = header;
+ ii->n_elements = 2;
+ if ((flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; }
+ if ((flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; }
+ if ((flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; }
+ return ii;
+}
+
+grn_ii *
+grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon, uint32_t flags)
+{
+ grn_ii *ii = NULL;
+ if (!(ii = GRN_MALLOCN(grn_ii, 1))) {
+ return NULL;
+ }
+ GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX);
+ if (!_grn_ii_create(ctx, ii, path, lexicon, flags)) {
+ GRN_FREE(ii);
+ return NULL;
+ }
+ return ii;
+}
+
+grn_rc
+grn_ii_remove(grn_ctx *ctx, const char *path)
+{
+ grn_rc rc;
+ char buffer[PATH_MAX];
+ if (!path || strlen(path) > PATH_MAX - 4) { return GRN_INVALID_ARGUMENT; }
+ if ((rc = grn_io_remove(ctx, path))) { goto exit; }
+ grn_snprintf(buffer, PATH_MAX, PATH_MAX,
+ "%-.256s.c", path);
+ rc = grn_io_remove(ctx, buffer);
+exit :
+ return rc;
+}
+
+grn_rc
+grn_ii_truncate(grn_ctx *ctx, grn_ii *ii)
+{
+ grn_rc rc;
+ const char *io_segpath, *io_chunkpath;
+ char *segpath, *chunkpath = NULL;
+ grn_obj *lexicon;
+ uint32_t flags;
+ if ((io_segpath = grn_io_path(ii->seg)) && *io_segpath != '\0') {
+ if (!(segpath = GRN_STRDUP(io_segpath))) {
+ ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%-.256s>", io_segpath);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if ((io_chunkpath = grn_io_path(ii->chunk)) && *io_chunkpath != '\0') {
+ if (!(chunkpath = GRN_STRDUP(io_chunkpath))) {
+ ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%-.256s>", io_chunkpath);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ } else {
+ chunkpath = NULL;
+ }
+ } else {
+ segpath = NULL;
+ }
+ lexicon = ii->lexicon;
+ flags = ii->header->flags;
+ if ((rc = grn_io_close(ctx, ii->seg))) { goto exit; }
+ if ((rc = grn_io_close(ctx, ii->chunk))) { goto exit; }
+ ii->seg = NULL;
+ ii->chunk = NULL;
+ if (segpath && (rc = grn_io_remove(ctx, segpath))) { goto exit; }
+ if (chunkpath && (rc = grn_io_remove(ctx, chunkpath))) { goto exit; }
+ if (!_grn_ii_create(ctx, ii, segpath, lexicon, flags)) {
+ rc = GRN_UNKNOWN_ERROR;
+ }
+exit:
+ if (segpath) { GRN_FREE(segpath); }
+ if (chunkpath) { GRN_FREE(chunkpath); }
+ return rc;
+}
+
+grn_ii *
+grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon)
+{
+ grn_io *seg, *chunk;
+ grn_ii *ii;
+ char path2[PATH_MAX];
+ struct grn_ii_header *header;
+ uint32_t io_type;
+ grn_table_flags lflags;
+ grn_encoding encoding;
+ grn_obj *tokenizer;
+ if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer,
+ NULL, NULL)) {
+ return NULL;
+ }
+ if (strlen(path) + 6 >= PATH_MAX) { return NULL; }
+ grn_strcpy(path2, PATH_MAX, path);
+ grn_strcat(path2, PATH_MAX, ".c");
+ seg = grn_io_open(ctx, path, grn_io_auto);
+ if (!seg) { return NULL; }
+ chunk = grn_io_open(ctx, path2, grn_io_auto);
+ if (!chunk) {
+ grn_io_close(ctx, seg);
+ return NULL;
+ }
+ header = grn_io_header(seg);
+ io_type = grn_io_get_type(seg);
+ if (io_type != GRN_COLUMN_INDEX) {
+ ERR(GRN_INVALID_FORMAT,
+ "[column][index] file type must be %#04x: <%#04x>",
+ GRN_COLUMN_INDEX, io_type);
+ grn_io_close(ctx, seg);
+ grn_io_close(ctx, chunk);
+ return NULL;
+ }
+ if (!(ii = GRN_MALLOCN(grn_ii, 1))) {
+ grn_io_close(ctx, seg);
+ grn_io_close(ctx, chunk);
+ return NULL;
+ }
+ GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX);
+ ii->seg = seg;
+ ii->chunk = chunk;
+ ii->lexicon = lexicon;
+ ii->lflags = lflags;
+ ii->encoding = encoding;
+ ii->header = header;
+ ii->n_elements = 2;
+ if ((header->flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; }
+ if ((header->flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; }
+ if ((header->flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; }
+ return ii;
+}
+
+grn_rc
+grn_ii_close(grn_ctx *ctx, grn_ii *ii)
+{
+ grn_rc rc;
+ if (!ii) { return GRN_INVALID_ARGUMENT; }
+ if ((rc = grn_io_close(ctx, ii->seg))) { return rc; }
+ if ((rc = grn_io_close(ctx, ii->chunk))) { return rc; }
+ GRN_FREE(ii);
+ /*
+ {
+ int i;
+ for (i = 0; i < 32; i++) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "new[%d]=%d free[%d]=%d",
+ i, new_histogram[i],
+ i, free_histogram[i]);
+ }
+ }
+ */
+ return rc;
+}
+
+grn_rc
+grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size)
+{
+ grn_rc rc;
+
+ if (seg_size) {
+ if ((rc = grn_io_size(ctx, ii->seg, seg_size))) {
+ return rc;
+ }
+ }
+
+ if (chunk_size) {
+ if ((rc = grn_io_size(ctx, ii->chunk, chunk_size))) {
+ return rc;
+ }
+ }
+
+ return GRN_SUCCESS;
+}
+
+grn_column_flags
+grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii)
+{
+ if (!ii) {
+ return 0;
+ }
+
+ return ii->header->flags;
+}
+
+uint32_t
+grn_ii_get_n_elements(grn_ctx *ctx, grn_ii *ii)
+{
+ if (!ii) {
+ return 0;
+ }
+
+ return ii->n_elements;
+}
+
+void
+grn_ii_expire(grn_ctx *ctx, grn_ii *ii)
+{
+ /*
+ grn_io_expire(ctx, ii->seg, 128, 1000000);
+ */
+ grn_io_expire(ctx, ii->chunk, 0, 1000000);
+}
+
+grn_rc
+grn_ii_flush(grn_ctx *ctx, grn_ii *ii)
+{
+ grn_rc rc;
+
+ rc = grn_io_flush(ctx, ii->seg);
+ if (rc == GRN_SUCCESS) {
+ rc = grn_io_flush(ctx, ii->chunk);
+ }
+
+ return rc;
+}
+
+size_t
+grn_ii_get_disk_usage(grn_ctx *ctx, grn_ii *ii)
+{
+ size_t usage;
+
+ usage = grn_io_get_disk_usage(ctx, ii->seg);
+ usage += grn_io_get_disk_usage(ctx, ii->chunk);
+
+ return usage;
+}
+
+#define BIT11_01(x) ((x >> 1) & 0x7ff)
+#define BIT31_12(x) (x >> 12)
+
+grn_rc
+grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h)
+{
+ buffer *b;
+ uint8_t *bs;
+ buffer_rec *br = NULL;
+ buffer_term *bt;
+ uint32_t pseg = 0, pos = 0, size, *a;
+ if (!tid) { return ctx->rc; }
+ if (!u->tf || !u->sid) { return grn_ii_delete_one(ctx, ii, tid, u, h); }
+ if (u->sid > ii->header->smax) { ii->header->smax = u->sid; }
+ if (!(a = array_get(ctx, ii, tid))) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to allocate an array: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid);
+ return ctx->rc;
+ }
+ if (!(bs = encode_rec(ctx, ii, u, &size, 0))) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to encode a record: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid);
+ goto exit;
+ }
+ for (;;) {
+ if (a[0]) {
+ if (!(a[0] & 1)) {
+ pos = a[0];
+ if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to allocate a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>: "
+ "segment:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ pos);
+ goto exit;
+ }
+ if (b->header.buffer_free < size) {
+ int bfb = b->header.buffer_free;
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing a[0]=%d seg=%d(%p) free=%d",
+ a[0], LSEG(a[0]), b, b->header.buffer_free);
+ buffer_close(ctx, ii, pseg);
+ if (SPLIT_COND(ii, b)) {
+ /*((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax -
+ b->header.nterms * sizeof(buffer_term)) * 4 <
+ b->header.chunk_size)*/
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "nterms=%d chunk=%d total=%" GRN_FMT_INT64U,
+ b->header.nterms,
+ b->header.chunk_size,
+ ii->header->total_chunk_size >> 10);
+ buffer_split(ctx, ii, LSEG(pos), h);
+ if (ctx->rc != GRN_SUCCESS) {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][update][one] failed to split a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "segment:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ pos);
+ goto exit;
+ }
+ continue;
+ }
+ buffer_flush(ctx, ii, LSEG(pos), h);
+ if (ctx->rc != GRN_SUCCESS) {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][update][one] failed to flush a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "segment:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ pos);
+ goto exit;
+ }
+ if (a[0] != pos) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "grn_ii_update_one: a[0] changed %d->%d", a[0], pos);
+ continue;
+ }
+ if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_LOG(ctx, GRN_LOG_CRIT, "buffer not found a[0]=%d", a[0]);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to reallocate a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>: "
+ "segment:<%u>, new-segment:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ pos, a[0]);
+ }
+ goto exit;
+ }
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "flushed a[0]=%d seg=%d(%p) free=%d->%d nterms=%d v=%d",
+ a[0], LSEG(a[0]), b, bfb, b->header.buffer_free,
+ b->header.nterms, b->header.nterms_void);
+ if (b->header.buffer_free < size) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] buffer is full: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "segment:<%u>, new-segment:<%u>, free:<%u>, required:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ pos, a[0], b->header.buffer_free, size);
+ buffer_close(ctx, ii, pseg);
+ /* todo: direct merge */
+ goto exit;
+ }
+ }
+ b->header.buffer_free -= size;
+ br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms])
+ + b->header.buffer_free);
+ } else {
+ grn_ii_updspec u2;
+ uint32_t size2 = 0, v = a[0];
+ struct _grn_ii_pos pos2;
+ pos2.pos = a[1];
+ pos2.next = NULL;
+ u2.pos = &pos2;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ u2.rid = BIT31_12(v);
+ u2.sid = BIT11_01(v);
+ } else {
+ u2.rid = v >> 1;
+ u2.sid = 1;
+ }
+ u2.tf = 1;
+ u2.weight = 0;
+ if (u2.rid != u->rid || u2.sid != u->sid) {
+ uint8_t *bs2 = encode_rec(ctx, ii, &u2, &size2, 0);
+ if (!bs2) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to encode a record2: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ u2.rid, u2.sid, tid);
+ goto exit;
+ }
+ pseg = buffer_new(ctx, ii, size + size2, &pos, &bt, &br, &b, tid, h);
+ if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_FREE(bs2);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to create a buffer2: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>: "
+ "size:<%u>",
+ name_size, name,
+ u2.rid, u2.sid, tid,
+ size + size2);
+ }
+ goto exit;
+ }
+ bt->tid = tid;
+ bt->size_in_chunk = 0;
+ bt->pos_in_chunk = 0;
+ bt->size_in_buffer = 0;
+ bt->pos_in_buffer = 0;
+ buffer_put(ctx, ii, b, bt, br, bs2, &u2, size2);
+ if (ctx->rc != GRN_SUCCESS) {
+ GRN_FREE(bs2);
+ buffer_close(ctx, ii, pseg);
+ {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to put to buffer: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ u2.rid, u2.sid, tid);
+ }
+ goto exit;
+ }
+ br = (buffer_rec *)(((byte *)br) + size2);
+ GRN_FREE(bs2);
+ }
+ }
+ }
+ break;
+ }
+ if (!br) {
+ if (u->tf == 1 && u->weight == 0) {
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ if (u->rid < 0x100000 && u->sid < 0x800) {
+ a[0] = (u->rid << 12) + (u->sid << 1) + 1;
+ a[1] = u->pos->pos;
+ goto exit;
+ }
+ } else {
+ a[0] = (u->rid << 1) + 1;
+ a[1] = u->pos->pos;
+ goto exit;
+ }
+ }
+ pseg = buffer_new(ctx, ii, size, &pos, &bt, &br, &b, tid, h);
+ if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][one] failed to create a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>: "
+ "size:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ size);
+ goto exit;
+ }
+ bt->tid = tid;
+ bt->size_in_chunk = 0;
+ bt->pos_in_chunk = 0;
+ bt->size_in_buffer = 0;
+ bt->pos_in_buffer = 0;
+ }
+ buffer_put(ctx, ii, b, bt, br, bs, u, size);
+ buffer_close(ctx, ii, pseg);
+ if (!a[0] || (a[0] & 1)) { a[0] = pos; }
+exit :
+ array_unref(ii, tid);
+ if (bs) { GRN_FREE(bs); }
+ if (u->tf != u->atf) {
+ grn_obj *source_table;
+ char source_table_name[GRN_TABLE_MAX_KEY_SIZE];
+ int source_table_name_size;
+ char term[GRN_TABLE_MAX_KEY_SIZE];
+ int term_size;
+
+ source_table = grn_ctx_at(ctx, DB_OBJ(ii)->range);
+ if (source_table) {
+ source_table_name_size = grn_obj_name(ctx,
+ source_table,
+ source_table_name,
+ GRN_TABLE_MAX_KEY_SIZE);
+ } else {
+ grn_strcpy(source_table_name, GRN_TABLE_MAX_KEY_SIZE, "(null)");
+ source_table_name_size = strlen(source_table_name);
+ }
+ term_size = grn_table_get_key(ctx, ii->lexicon, tid,
+ term, GRN_TABLE_MAX_KEY_SIZE);
+ {
+ DEFINE_NAME(ii);
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][update][one] too many postings: "
+ "<%.*s>: "
+ "record:<%.*s>(%d), "
+ "n-postings:<%d>, "
+ "n-discarded-postings:<%d>, "
+ "term:<%d>(<%.*s>)",
+ name_size, name,
+ source_table_name_size, source_table_name,
+ u->rid,
+ u->atf,
+ u->atf - u->tf,
+ tid, term_size, term);
+ }
+ }
+ grn_ii_expire(ctx, ii);
+ return ctx->rc;
+}
+
+grn_rc
+grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h)
+{
+ buffer *b;
+ uint8_t *bs = NULL;
+ buffer_rec *br;
+ buffer_term *bt;
+ uint32_t pseg, size, *a;
+ if (!tid) { return ctx->rc; }
+ if (!(a = array_at(ctx, ii, tid))) {
+ return ctx->rc;
+ }
+ for (;;) {
+ if (!a[0]) { goto exit; }
+ if (a[0] & 1) {
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ uint32_t rid = BIT31_12(a[0]);
+ uint32_t sid = BIT11_01(a[0]);
+ if (u->rid == rid && (!u->sid || u->sid == sid)) {
+ a[0] = 0;
+ lexicon_delete(ctx, ii, tid, h);
+ }
+ } else {
+ uint32_t rid = a[0] >> 1;
+ if (u->rid == rid) {
+ a[0] = 0;
+ lexicon_delete(ctx, ii, tid, h);
+ }
+ }
+ goto exit;
+ }
+ if (!(bs = encode_rec(ctx, ii, u, &size, 1))) {
+ DEFINE_NAME(ii);
+ MERR("[ii][delete][one] failed to encode a record: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid);
+ goto exit;
+ }
+ if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][delete][one] failed to allocate a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "position:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ a[0]);
+ goto exit;
+ }
+ if (b->header.buffer_free < size) {
+ uint32_t _a = a[0];
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing! b=%p free=%d, seg(%d)",
+ b, b->header.buffer_free, LSEG(a[0]));
+ buffer_close(ctx, ii, pseg);
+ buffer_flush(ctx, ii, LSEG(a[0]), h);
+ if (ctx->rc != GRN_SUCCESS) {
+ DEFINE_NAME(ii);
+ ERR(ctx->rc,
+ "[ii][delete][one] failed to flush a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "position:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ a[0]);
+ goto exit;
+ }
+ if (a[0] != _a) {
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "grn_ii_delete_one: a[0] changed %d->%d)",
+ a[0], _a);
+ continue;
+ }
+ if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ DEFINE_NAME(ii);
+ MERR("[ii][delete][one] failed to reallocate a buffer: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "position:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ a[0]);
+ goto exit;
+ }
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed! b=%p free=%d, seg(%d)",
+ b, b->header.buffer_free, LSEG(a[0]));
+ if (b->header.buffer_free < size) {
+ DEFINE_NAME(ii);
+ MERR("[ii][delete][one] buffer is full: "
+ "<%.*s>: "
+ "<%u>:<%u><%u>: "
+ "segment:<%u>, free:<%u>, required:<%u>",
+ name_size, name,
+ u->rid, u->sid, tid,
+ a[0], b->header.buffer_free, size);
+ buffer_close(ctx, ii, pseg);
+ goto exit;
+ }
+ }
+
+ b->header.buffer_free -= size;
+ br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free);
+ buffer_put(ctx, ii, b, bt, br, bs, u, size);
+ buffer_close(ctx, ii, pseg);
+ break;
+ }
+exit :
+ array_unref(ii, tid);
+ if (bs) { GRN_FREE(bs); }
+ return ctx->rc;
+}
+
+#define CHUNK_USED 1
+#define BUFFER_USED 2
+#define SOLE_DOC_USED 4
+#define SOLE_POS_USED 8
+
+struct _grn_ii_cursor {
+ grn_db_obj obj;
+ grn_ctx *ctx;
+ grn_ii *ii;
+ grn_id id;
+ grn_posting *post;
+
+ grn_id min; /* Minimum record ID */
+ grn_id max;
+ grn_posting pc;
+ grn_posting pb;
+
+ uint32_t cdf; /* Document frequency */
+ uint32_t *cdp;
+ uint32_t *crp; /* Record ID */
+ uint32_t *csp; /* Section ID */
+ uint32_t *ctp; /* Term frequency */
+ uint32_t *cwp; /* Weight */
+ uint32_t *cpp; /* Position */
+
+ uint8_t *bp;
+
+ int nelements;
+ uint32_t nchunks;
+ uint32_t curr_chunk;
+ chunk_info *cinfo;
+ grn_io_win iw;
+ uint8_t *cp;
+ uint8_t *cpe;
+ datavec rdv[MAX_N_ELEMENTS + 1];
+
+ struct grn_ii_buffer *buf;
+ uint16_t stat;
+ uint16_t nextb;
+ uint32_t buffer_pseg;
+ int flags;
+ uint32_t *ppseg;
+
+ int weight;
+
+ uint32_t prev_chunk_rid;
+};
+
+static grn_bool
+buffer_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c)
+{
+ if (*c->ppseg != c->buffer_pseg) {
+ uint32_t i;
+ for (i = ii->header->bgqtail; i != ii->header->bgqhead;
+ i = (i + 1) & (GRN_II_BGQSIZE - 1)) {
+ if (ii->header->bgqbody[i] == c->buffer_pseg) { return GRN_FALSE; }
+ }
+ return GRN_TRUE;
+ }
+ return GRN_FALSE;
+}
+
+static int
+chunk_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c, uint32_t offset, uint32_t size)
+{
+ if (*c->ppseg != c->buffer_pseg) {
+ uint32_t i, m, gseg;
+ if (size > S_CHUNK) { return 1; }
+ if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
+ int es = size - 1;
+ GRN_BIT_SCAN_REV(es, m);
+ m++;
+ } else {
+ m = GRN_II_W_LEAST_CHUNK;
+ }
+ gseg = ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
+ while (gseg != GRN_II_PSEG_NOT_ASSIGNED) {
+ grn_io_win iw;
+ grn_ii_ginfo *ginfo = WIN_MAP(ii->chunk, ctx, &iw, gseg, 0, S_GARBAGE,
+ grn_io_rdwr);
+ if (!ginfo) { break; }
+ for (i = 0; i < ginfo->nrecs; i++) {
+ if (ginfo->recs[i] == offset) {
+ grn_io_win_unmap(&iw);
+ return 0;
+ }
+ }
+ gseg = ginfo->next;
+ grn_io_win_unmap(&iw);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+#define GRN_II_CURSOR_CMP(c1,c2) \
+ (((c1)->post->rid > (c2)->post->rid) || \
+ (((c1)->post->rid == (c2)->post->rid) && \
+ (((c1)->post->sid > (c2)->post->sid) || \
+ (((c1)->post->sid == (c2)->post->sid) && \
+ ((c1)->post->pos > (c2)->post->pos)))))
+
+grn_ii_cursor *
+grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid,
+ grn_id min, grn_id max, int nelements, int flags)
+{
+ grn_ii_cursor *c = NULL;
+ uint32_t pos, *a;
+ if (!(a = array_at(ctx, ii, tid))) { return NULL; }
+ for (;;) {
+ c = NULL;
+ if (!(pos = a[0])) { goto exit; }
+ if (!(c = GRN_MALLOC(sizeof(grn_ii_cursor)))) { goto exit; }
+ memset(c, 0, sizeof(grn_ii_cursor));
+ c->ctx = ctx;
+ c->ii = ii;
+ c->id = tid;
+ c->min = min;
+ c->max = max;
+ c->nelements = nelements;
+ c->flags = flags;
+ c->weight = 0;
+ if (pos & 1) {
+ c->stat = 0;
+ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ c->pb.rid = BIT31_12(pos);
+ c->pb.sid = BIT11_01(pos);
+ } else {
+ c->pb.rid = pos >> 1;
+ c->pb.sid = 1;
+ }
+ c->pb.tf = 1;
+ c->pb.weight = 0;
+ c->pb.pos = a[1];
+ } else {
+ uint32_t chunk;
+ buffer_term *bt;
+ c->buffer_pseg = buffer_open(ctx, ii, pos, &bt, &c->buf);
+ if (c->buffer_pseg == GRN_II_PSEG_NOT_ASSIGNED) {
+ GRN_FREE(c);
+ c = NULL;
+ goto exit;
+ }
+ c->ppseg = &ii->header->binfo[LSEG(pos)];
+ if (bt->size_in_chunk && (chunk = c->buf->header.chunk) != GRN_II_PSEG_NOT_ASSIGNED) {
+ if (!(c->cp = WIN_MAP(ii->chunk, ctx, &c->iw, chunk, bt->pos_in_chunk,
+ bt->size_in_chunk, grn_io_rdonly))) {
+ buffer_close(ctx, ii, c->buffer_pseg);
+ GRN_FREE(c);
+ c = NULL;
+ goto exit;
+ }
+ if (buffer_is_reused(ctx, ii, c)) {
+ grn_ii_cursor_close(ctx, c);
+ continue;
+ }
+ c->cpe = c->cp + bt->size_in_chunk;
+ if ((bt->tid & CHUNK_SPLIT)) {
+ int i;
+ grn_id crid;
+ GRN_B_DEC(c->nchunks, c->cp);
+ if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) {
+ grn_ii_cursor_close(ctx, c);
+ continue;
+ }
+ if (!(c->cinfo = GRN_MALLOCN(chunk_info, c->nchunks))) {
+ buffer_close(ctx, ii, c->buffer_pseg);
+ grn_io_win_unmap(&c->iw);
+ GRN_FREE(c);
+ c = NULL;
+ goto exit;
+ }
+ for (i = 0, crid = GRN_ID_NIL; i < c->nchunks; i++) {
+ GRN_B_DEC(c->cinfo[i].segno, c->cp);
+ GRN_B_DEC(c->cinfo[i].size, c->cp);
+ GRN_B_DEC(c->cinfo[i].dgap, c->cp);
+ crid += c->cinfo[i].dgap;
+ if (crid < min) {
+ c->pc.rid = crid;
+ c->curr_chunk = i + 1;
+ }
+ }
+ if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) {
+ grn_ii_cursor_close(ctx, c);
+ continue;
+ }
+ }
+ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ c->rdv[ii->n_elements - 1].flags = ODD;
+ }
+ }
+ c->nextb = bt->pos_in_buffer;
+ c->stat = CHUNK_USED|BUFFER_USED;
+ }
+ if (pos == a[0]) { break; }
+ grn_ii_cursor_close(ctx, c);
+ }
+exit :
+ array_unref(ii, tid);
+ return c;
+}
+
+static inline void
+grn_ii_cursor_set_min(grn_ctx *ctx, grn_ii_cursor *c, grn_id min)
+{
+ if (c->min >= min) {
+ return;
+ }
+
+ if (grn_ii_cursor_set_min_enable) {
+ grn_id old_min = c->min;
+ c->min = min;
+ if (c->buf &&
+ c->pc.rid != GRN_ID_NIL &&
+ c->pc.rid < c->min &&
+ c->prev_chunk_rid < c->min &&
+ c->curr_chunk < c->nchunks) {
+ uint32_t i;
+ uint32_t skip_chunk = 0;
+ grn_id rid = c->prev_chunk_rid;
+
+ if (c->curr_chunk > 0) {
+ i = c->curr_chunk - 1;
+ } else {
+ i = 0;
+ }
+ for (; i < c->nchunks; i++) {
+ rid += c->cinfo[i].dgap;
+ if (rid < c->min) {
+ skip_chunk = i + 1;
+ } else {
+ rid -= c->cinfo[i].dgap;
+ break;
+ }
+ }
+ if (skip_chunk > c->curr_chunk) {
+ uint32_t old_chunk = c->curr_chunk;
+ grn_bool old_chunk_used = (c->stat & CHUNK_USED);
+ c->pc.rid = rid;
+ c->pc.rest = 0;
+ c->prev_chunk_rid = rid - c->cinfo[skip_chunk - 1].dgap;
+ c->curr_chunk = skip_chunk;
+ c->crp = c->cdp + c->cdf;
+ c->stat |= CHUNK_USED;
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "[ii][cursor][min] skip: %p: min(%u->%u): chunk(%u->%u): "
+ "chunk-used(%-.256s->%-.256s)",
+ c,
+ old_min, min,
+ old_chunk, c->curr_chunk,
+ old_chunk_used ? "true" : "false",
+ (c->stat & CHUNK_USED) ? "true" : "false");
+ }
+ }
+ }
+}
+
+typedef struct {
+ grn_bool include_garbage;
+} grn_ii_cursor_next_options;
+
+static inline grn_posting *
+grn_ii_cursor_next_internal(grn_ctx *ctx, grn_ii_cursor *c,
+ grn_ii_cursor_next_options *options)
+{
+ const grn_bool include_garbage = options->include_garbage;
+ if (c->buf) {
+ for (;;) {
+ if (c->stat & CHUNK_USED) {
+ for (;;) {
+ if (c->crp < c->cdp + c->cdf) {
+ uint32_t dgap = *c->crp++;
+ c->pc.rid += dgap;
+ if (dgap) { c->pc.sid = 0; }
+ if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ c->pc.sid += 1 + *c->csp++;
+ } else {
+ c->pc.sid = 1;
+ }
+ c->cpp += c->pc.rest;
+ c->pc.rest = c->pc.tf = 1 + *c->ctp++;
+ if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ c->pc.weight = *c->cwp++;
+ } else {
+ c->pc.weight = 0;
+ }
+ c->pc.pos = 0;
+ /*
+ {
+ static int count = 0;
+ int tf = c->pc.tf, pos = 0, *pp = (int *)c->cpp;
+ grn_obj buf;
+ GRN_TEXT_INIT(&buf, 0);
+ grn_text_itoa(ctx, &buf, c->pc.rid);
+ GRN_TEXT_PUTC(ctx, &buf, ':');
+ grn_text_itoa(ctx, &buf, c->pc.sid);
+ GRN_TEXT_PUTC(ctx, &buf, ':');
+ grn_text_itoa(ctx, &buf, c->pc.tf);
+ GRN_TEXT_PUTC(ctx, &buf, '(');
+ while (tf--) {
+ pos += *pp++;
+ count++;
+ grn_text_itoa(ctx, &buf, pos);
+ if (tf) { GRN_TEXT_PUTC(ctx, &buf, ':'); }
+ }
+ GRN_TEXT_PUTC(ctx, &buf, ')');
+ GRN_TEXT_PUTC(ctx, &buf, '\0');
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "posting(%d):%-.256s", count, GRN_TEXT_VALUE(&buf));
+ GRN_OBJ_FIN(ctx, &buf);
+ }
+ */
+ } else {
+ if (c->curr_chunk <= c->nchunks) {
+ if (c->curr_chunk == c->nchunks) {
+ if (c->cp < c->cpe) {
+ int decoded_size;
+ decoded_size =
+ grn_p_decv(ctx, c->cp, c->cpe - c->cp,
+ c->rdv, c->ii->n_elements);
+ if (decoded_size == 0) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][chunk][last] "
+ "chunk(%d) is changed by another thread "
+ "while decoding: %p",
+ c->cinfo[c->curr_chunk].segno,
+ c);
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ if (buffer_is_reused(ctx, c->ii, c)) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][chunk][last] "
+ "buffer is reused by another thread: %p",
+ c);
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ if (chunk_is_reused(ctx, c->ii, c,
+ c->buf->header.chunk,
+ c->buf->header.chunk_size)) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][chunk][last] "
+ "chunk(%d) is reused by another thread: %p",
+ c->buf->header.chunk,
+ c);
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ } else {
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ } else {
+ uint8_t *cp;
+ grn_io_win iw;
+ uint32_t size = c->cinfo[c->curr_chunk].size;
+ if (size && (cp = WIN_MAP(c->ii->chunk, ctx, &iw,
+ c->cinfo[c->curr_chunk].segno, 0,
+ size, grn_io_rdonly))) {
+ int decoded_size;
+ decoded_size =
+ grn_p_decv(ctx, cp, size, c->rdv, c->ii->n_elements);
+ grn_io_win_unmap(&iw);
+ if (decoded_size == 0) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][chunk] "
+ "chunk(%d) is changed by another thread "
+ "while decoding: %p",
+ c->cinfo[c->curr_chunk].segno,
+ c);
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ if (chunk_is_reused(ctx, c->ii, c,
+ c->cinfo[c->curr_chunk].segno, size)) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][chunk] "
+ "chunk(%d) is reused by another thread: %p",
+ c->cinfo[c->curr_chunk].segno,
+ c);
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ } else {
+ c->pc.rid = GRN_ID_NIL;
+ break;
+ }
+ }
+ {
+ int j = 0;
+ c->cdf = c->rdv[j].data_size;
+ c->crp = c->cdp = c->rdv[j++].data;
+ if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ c->csp = c->rdv[j++].data;
+ }
+ c->ctp = c->rdv[j++].data;
+ if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ c->cwp = c->rdv[j++].data;
+ }
+ if ((c->ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ c->cpp = c->rdv[j].data;
+ }
+ }
+ c->prev_chunk_rid = c->pc.rid;
+ c->pc.rid = GRN_ID_NIL;
+ c->pc.sid = 0;
+ c->pc.rest = 0;
+ c->curr_chunk++;
+ continue;
+ } else {
+ c->pc.rid = GRN_ID_NIL;
+ }
+ }
+ break;
+ }
+ }
+ if (c->stat & BUFFER_USED) {
+ for (;;) {
+ if (c->nextb) {
+ uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
+ buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
+ if (buffer_is_reused(ctx, c->ii, c)) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][buffer] "
+ "buffer(%d,%d) is reused by another thread: %p",
+ c->buffer_pseg, *c->ppseg,
+ c);
+ c->pb.rid = GRN_ID_NIL;
+ break;
+ }
+ c->bp = GRN_NEXT_ADDR(br);
+ GRN_B_DEC(c->pb.rid, c->bp);
+ if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
+ GRN_B_DEC(c->pb.sid, c->bp);
+ } else {
+ c->pb.sid = 1;
+ }
+ if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
+ DEFINE_NAME(c->ii);
+ ERR(GRN_FILE_CORRUPT,
+ "[ii][broken][cursor][next][buffer] "
+ "posting in list in buffer isn't sorted: "
+ "<%.*s>: (%d:%d) -> (%d:%d) (%d->%d)",
+ name_size, name,
+ lrid, lsid,
+ c->pb.rid, c->pb.sid,
+ c->buffer_pseg, *c->ppseg);
+ c->pb.rid = GRN_ID_NIL;
+ break;
+ }
+ if (c->pb.rid < c->min) {
+ c->pb.rid = 0;
+ if (br->jump > 0 && !BUFFER_REC_DELETED(br)) {
+ buffer_rec *jump_br = BUFFER_REC_AT(c->buf, br->jump);
+ if (BUFFER_REC_DELETED(jump_br)) {
+ c->nextb = br->step;
+ } else {
+ uint8_t *jump_bp;
+ uint32_t jump_rid;
+ jump_bp = GRN_NEXT_ADDR(jump_br);
+ GRN_B_DEC(jump_rid, jump_bp);
+ if (jump_rid < c->min) {
+ c->nextb = br->jump;
+ } else {
+ c->nextb = br->step;
+ }
+ }
+ } else {
+ c->nextb = br->step;
+ }
+ continue;
+ }
+ c->nextb = br->step;
+ GRN_B_DEC(c->pb.tf, c->bp);
+ if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ GRN_B_DEC(c->pb.weight, c->bp);
+ } else {
+ c->pb.weight = 0;
+ }
+ c->pb.rest = c->pb.tf;
+ c->pb.pos = 0;
+ } else {
+ c->pb.rid = 0;
+ }
+ break;
+ }
+ }
+ if (c->pb.rid) {
+ if (c->pc.rid) {
+ if (c->pc.rid < c->pb.rid) {
+ c->stat = CHUNK_USED;
+ if (include_garbage || (c->pc.tf && c->pc.sid)) {
+ c->post = &c->pc;
+ break;
+ }
+ } else {
+ if (c->pb.rid < c->pc.rid) {
+ c->stat = BUFFER_USED;
+ if (include_garbage || (c->pb.tf && c->pb.sid)) {
+ c->post = &c->pb;
+ break;
+ }
+ } else {
+ if (c->pb.sid) {
+ if (c->pc.sid < c->pb.sid) {
+ c->stat = CHUNK_USED;
+ if (include_garbage || (c->pc.tf && c->pc.sid)) {
+ c->post = &c->pc;
+ break;
+ }
+ } else {
+ c->stat = BUFFER_USED;
+ if (c->pb.sid == c->pc.sid) { c->stat |= CHUNK_USED; }
+ if (include_garbage || (c->pb.tf)) {
+ c->post = &c->pb;
+ break;
+ }
+ }
+ } else {
+ c->stat = CHUNK_USED;
+ }
+ }
+ }
+ } else {
+ c->stat = BUFFER_USED;
+ if (include_garbage || (c->pb.tf && c->pb.sid)) {
+ c->post = &c->pb;
+ break;
+ }
+ }
+ } else {
+ if (c->pc.rid) {
+ c->stat = CHUNK_USED;
+ if (include_garbage || (c->pc.tf && c->pc.sid)) {
+ c->post = &c->pc;
+ break;
+ }
+ } else {
+ c->post = NULL;
+ return NULL;
+ }
+ }
+ }
+ } else {
+ if (c->stat & SOLE_DOC_USED) {
+ c->post = NULL;
+ return NULL;
+ } else {
+ c->post = &c->pb;
+ c->stat |= SOLE_DOC_USED;
+ if (c->post->rid < c->min) {
+ c->post = NULL;
+ return NULL;
+ }
+ }
+ }
+ return c->post;
+}
+
+grn_posting *
+grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
+{
+ grn_ii_cursor_next_options options = {
+ .include_garbage = GRN_FALSE
+ };
+ return grn_ii_cursor_next_internal(ctx, c, &options);
+}
+
+grn_posting *
+grn_ii_cursor_next_pos(grn_ctx *ctx, grn_ii_cursor *c)
+{
+ uint32_t gap;
+ if ((c->ii->header->flags & GRN_OBJ_WITH_POSITION)) {
+ if (c->nelements == c->ii->n_elements) {
+ if (c->buf) {
+ if (c->post == &c->pc) {
+ if (c->pc.rest) {
+ c->pc.rest--;
+ c->pc.pos += *c->cpp++;
+ } else {
+ return NULL;
+ }
+ } else if (c->post == &c->pb) {
+ if (buffer_is_reused(ctx, c->ii, c)) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][cursor][next][pos][buffer] "
+ "buffer(%d,%d) is reused by another thread: %p",
+ c->buffer_pseg, *c->ppseg,
+ c);
+ return NULL;
+ }
+ if (c->pb.rest) {
+ c->pb.rest--;
+ GRN_B_DEC(gap, c->bp);
+ c->pb.pos += gap;
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+ } else {
+ if (c->stat & SOLE_POS_USED) {
+ return NULL;
+ } else {
+ c->stat |= SOLE_POS_USED;
+ }
+ }
+ }
+ } else {
+ if (c->stat & SOLE_POS_USED) {
+ return NULL;
+ } else {
+ c->stat |= SOLE_POS_USED;
+ }
+ }
+ return c->post;
+}
+
+grn_rc
+grn_ii_cursor_close(grn_ctx *ctx, grn_ii_cursor *c)
+{
+ if (!c) { return GRN_INVALID_ARGUMENT; }
+ datavec_fin(ctx, c->rdv);
+ if (c->cinfo) { GRN_FREE(c->cinfo); }
+ if (c->buf) { buffer_close(ctx, c->ii, c->buffer_pseg); }
+ if (c->cp) { grn_io_win_unmap(&c->iw); }
+ GRN_FREE(c);
+ return GRN_SUCCESS;
+}
+
+uint32_t
+grn_ii_get_chunksize(grn_ctx *ctx, grn_ii *ii, grn_id tid)
+{
+ uint32_t res, pos, *a;
+ a = array_at(ctx, ii, tid);
+ if (!a) { return 0; }
+ if ((pos = a[0])) {
+ if (pos & 1) {
+ res = 0;
+ } else {
+ buffer *buf;
+ uint32_t pseg;
+ buffer_term *bt;
+ if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ res = 0;
+ } else {
+ res = bt->size_in_chunk;
+ buffer_close(ctx, ii, pseg);
+ }
+ }
+ } else {
+ res = 0;
+ }
+ array_unref(ii, tid);
+ return res;
+}
+
+uint32_t
+grn_ii_estimate_size(grn_ctx *ctx, grn_ii *ii, grn_id tid)
+{
+ uint32_t res, pos, *a;
+ a = array_at(ctx, ii, tid);
+ if (!a) { return 0; }
+ if ((pos = a[0])) {
+ if (pos & 1) {
+ res = 1;
+ } else {
+ buffer *buf;
+ uint32_t pseg;
+ buffer_term *bt;
+ if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == GRN_II_PSEG_NOT_ASSIGNED) {
+ res = 0;
+ } else {
+ res = a[1] + bt->size_in_buffer + 2;
+ buffer_close(ctx, ii, pseg);
+ }
+ }
+ } else {
+ res = 0;
+ }
+ array_unref(ii, tid);
+ return res;
+}
+
+int
+grn_ii_entry_info(grn_ctx *ctx, grn_ii *ii, grn_id tid, unsigned int *a,
+ unsigned int *chunk, unsigned int *chunk_size,
+ unsigned int *buffer_free,
+ unsigned int *nterms, unsigned int *nterms_void,
+ unsigned int *bt_tid,
+ unsigned int *size_in_chunk, unsigned int *pos_in_chunk,
+ unsigned int *size_in_buffer, unsigned int *pos_in_buffer)
+{
+ buffer *b;
+ buffer_term *bt;
+ uint32_t pseg, *ap;
+ ERRCLR(NULL);
+ ap = array_at(ctx, ii, tid);
+ if (!ap) { return 0; }
+ a[0] = *ap;
+ array_unref(ii, tid);
+ if (!a[0]) { return 1; }
+ if (a[0] & 1) { return 2; }
+ if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) { return 3; }
+ *chunk = b->header.chunk;
+ *chunk_size = b->header.chunk_size;
+ *buffer_free = b->header.buffer_free;
+ *nterms = b->header.nterms;
+ *bt_tid = bt->tid;
+ *size_in_chunk = bt->size_in_chunk;
+ *pos_in_chunk = bt->pos_in_chunk;
+ *size_in_buffer = bt->size_in_buffer;
+ *pos_in_buffer = bt->pos_in_buffer;
+ buffer_close(ctx, ii, pseg);
+ return 4;
+}
+
+const char *
+grn_ii_path(grn_ii *ii)
+{
+ return grn_io_path(ii->seg);
+}
+
+uint32_t
+grn_ii_max_section(grn_ii *ii)
+{
+ return ii->header->smax;
+}
+
+grn_obj *
+grn_ii_lexicon(grn_ii *ii)
+{
+ return ii->lexicon;
+}
+
+/* private classes */
+
+/* b-heap */
+
+typedef struct {
+ int n_entries;
+ int n_bins;
+ grn_ii_cursor **bins;
+} cursor_heap;
+
+static inline cursor_heap *
+cursor_heap_open(grn_ctx *ctx, int max)
+{
+ cursor_heap *h = GRN_MALLOC(sizeof(cursor_heap));
+ if (!h) { return NULL; }
+ h->bins = GRN_MALLOC(sizeof(grn_ii_cursor *) * max);
+ if (!h->bins) {
+ GRN_FREE(h);
+ return NULL;
+ }
+ h->n_entries = 0;
+ h->n_bins = max;
+ return h;
+}
+
+static inline grn_rc
+cursor_heap_push(grn_ctx *ctx, cursor_heap *h, grn_ii *ii, grn_id tid, uint32_t offset2,
+ int weight, grn_id min)
+{
+ int n, n2;
+ grn_ii_cursor *c, *c2;
+ if (h->n_entries >= h->n_bins) {
+ int max = h->n_bins * 2;
+ grn_ii_cursor **bins = GRN_REALLOC(h->bins, sizeof(grn_ii_cursor *) * max);
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "expanded cursor_heap to %d,%p", max, bins);
+ if (!bins) { return GRN_NO_MEMORY_AVAILABLE; }
+ h->n_bins = max;
+ h->bins = bins;
+ }
+ {
+ if (!(c = grn_ii_cursor_open(ctx, ii, tid, min, GRN_ID_MAX,
+ ii->n_elements, 0))) {
+ GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed");
+ return ctx->rc;
+ }
+ if (!grn_ii_cursor_next(ctx, c)) {
+ grn_ii_cursor_close(ctx, c);
+ return GRN_END_OF_DATA;
+ }
+ if (!grn_ii_cursor_next_pos(ctx, c)) {
+ if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
+ char token[GRN_TABLE_MAX_KEY_SIZE];
+ int token_size;
+ token_size = grn_table_get_key(ctx,
+ c->ii->lexicon,
+ c->id,
+ &token,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "[ii][cursor][heap][push] invalid cursor: "
+ "%p: token:<%.*s>(%u)",
+ c, token_size, token, c->id);
+ }
+ grn_ii_cursor_close(ctx, c);
+ return GRN_END_OF_DATA;
+ }
+ if (weight) {
+ c->weight = weight;
+ }
+ n = h->n_entries++;
+ while (n) {
+ n2 = (n - 1) >> 1;
+ c2 = h->bins[n2];
+ if (GRN_II_CURSOR_CMP(c, c2)) { break; }
+ h->bins[n] = c2;
+ n = n2;
+ }
+ h->bins[n] = c;
+ }
+ return GRN_SUCCESS;
+}
+
+static inline grn_rc
+cursor_heap_push2(cursor_heap *h)
+{
+ grn_rc rc = GRN_SUCCESS;
+ return rc;
+}
+
+static inline grn_ii_cursor *
+cursor_heap_min(cursor_heap *h)
+{
+ return h->n_entries ? h->bins[0] : NULL;
+}
+
+static inline void
+cursor_heap_recalc_min(cursor_heap *h)
+{
+ int n = 0, n1, n2, m;
+ if ((m = h->n_entries) > 1) {
+ grn_ii_cursor *c = h->bins[0], *c1, *c2;
+ for (;;) {
+ n1 = n * 2 + 1;
+ n2 = n1 + 1;
+ c1 = n1 < m ? h->bins[n1] : NULL;
+ c2 = n2 < m ? h->bins[n2] : NULL;
+ if (c1 && GRN_II_CURSOR_CMP(c, c1)) {
+ if (c2 && GRN_II_CURSOR_CMP(c, c2) && GRN_II_CURSOR_CMP(c1, c2)) {
+ h->bins[n] = c2;
+ n = n2;
+ } else {
+ h->bins[n] = c1;
+ n = n1;
+ }
+ } else {
+ if (c2 && GRN_II_CURSOR_CMP(c, c2)) {
+ h->bins[n] = c2;
+ n = n2;
+ } else {
+ h->bins[n] = c;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static inline void
+cursor_heap_pop(grn_ctx *ctx, cursor_heap *h, grn_id min)
+{
+ if (h->n_entries) {
+ grn_ii_cursor *c = h->bins[0];
+ grn_ii_cursor_set_min(ctx, c, min);
+ if (!grn_ii_cursor_next(ctx, c)) {
+ grn_ii_cursor_close(ctx, c);
+ h->bins[0] = h->bins[--h->n_entries];
+ } else if (!grn_ii_cursor_next_pos(ctx, c)) {
+ if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
+ char token[GRN_TABLE_MAX_KEY_SIZE];
+ int token_size;
+ token_size = grn_table_get_key(ctx,
+ c->ii->lexicon,
+ c->id,
+ &token,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "[ii][cursor][heap][pop] invalid cursor: "
+ "%p: token:<%.*s>(%u)",
+ c, token_size, token, c->id);
+ }
+ grn_ii_cursor_close(ctx, c);
+ h->bins[0] = h->bins[--h->n_entries];
+ }
+ if (h->n_entries > 1) { cursor_heap_recalc_min(h); }
+ }
+}
+
+static inline void
+cursor_heap_pop_pos(grn_ctx *ctx, cursor_heap *h)
+{
+ if (h->n_entries) {
+ grn_ii_cursor *c = h->bins[0];
+ if (!grn_ii_cursor_next_pos(ctx, c)) {
+ if (!grn_ii_cursor_next(ctx, c)) {
+ grn_ii_cursor_close(ctx, c);
+ h->bins[0] = h->bins[--h->n_entries];
+ } else if (!grn_ii_cursor_next_pos(ctx, c)) {
+ if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
+ char token[GRN_TABLE_MAX_KEY_SIZE];
+ int token_size;
+ token_size = grn_table_get_key(ctx,
+ c->ii->lexicon,
+ c->id,
+ &token,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "[ii][cursor][heap][pop][position] invalid cursor: "
+ "%p: token:<%.*s>(%u)",
+ c, token_size, token, c->id);
+ }
+ grn_ii_cursor_close(ctx, c);
+ h->bins[0] = h->bins[--h->n_entries];
+ }
+ }
+ if (h->n_entries > 1) { cursor_heap_recalc_min(h); }
+ }
+}
+
+static inline void
+cursor_heap_close(grn_ctx *ctx, cursor_heap *h)
+{
+ int i;
+ if (!h) { return; }
+ for (i = h->n_entries; i--;) { grn_ii_cursor_close(ctx, h->bins[i]); }
+ GRN_FREE(h->bins);
+ GRN_FREE(h);
+}
+
+/* update */
+#ifdef USE_VGRAM
+
+inline static grn_rc
+index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram,
+ const char *value, size_t value_len)
+{
+ grn_hash *h;
+ unsigned int token_flags = 0;
+ grn_token_cursor *token_cursor;
+ grn_ii_updspec **u;
+ grn_id tid, *tp;
+ grn_rc r, rc = GRN_SUCCESS;
+ grn_vgram_buf *sbuf = NULL;
+ if (!rid) { return GRN_INVALID_ARGUMENT; }
+ if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len,
+ GRN_TOKEN_ADD, token_flags))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (vgram) { sbuf = grn_vgram_buf_open(value_len); }
+ h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!h) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_add failed !");
+ grn_token_cursor_close(ctx, token_cursor);
+ if (sbuf) { grn_vgram_buf_close(sbuf); }
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ while (!token_cursor->status) {
+ (tid = grn_token_cursor_next(ctx, token_cursor));
+ if (tid) {
+ if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, 1))) {
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "grn_ii_updspec_open on index_add failed!");
+ goto exit;
+ }
+ }
+ if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, 0)) {
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "grn_ii_updspec_add on index_add failed!");
+ goto exit;
+ }
+ if (sbuf) { grn_vgram_buf_add(sbuf, tid); }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ // todo : support vgram
+ // if (sbuf) { grn_vgram_update(vgram, rid, sbuf, (grn_set *)h); }
+ GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, {
+ if ((r = grn_ii_update_one(ctx, ii, *tp, *u, h))) { rc = r; }
+ grn_ii_updspec_close(ctx, *u);
+ });
+ grn_hash_close(ctx, h);
+ if (sbuf) { grn_vgram_buf_close(sbuf); }
+ return rc;
+exit:
+ grn_hash_close(ctx, h);
+ grn_token_cursor_close(ctx, token_cursor);
+ if (sbuf) { grn_vgram_buf_close(sbuf); }
+ return GRN_NO_MEMORY_AVAILABLE;
+}
+
+inline static grn_rc
+index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram,
+ const char *value, size_t value_len)
+{
+ grn_rc rc = GRN_SUCCESS;
+ grn_hash *h;
+ unsigned int token_flags = 0;
+ grn_token_cursor *token_cursor;
+ grn_ii_updspec **u;
+ grn_id tid, *tp;
+ if (!rid) { return GRN_INVALID_ARGUMENT; }
+ if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len,
+ GRN_TOKEN_DEL, token_flags))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!h) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_del failed !");
+ grn_token_cursor_close(ctx, token_cursor);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ while (!token_cursor->status) {
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, 0))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_open on index_del failed !");
+ grn_hash_close(ctx, h);
+ grn_token_cursor_close(ctx, token_cursor);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, {
+ if (*tp) {
+ grn_rc r;
+ r = grn_ii_delete_one(ctx, ii, *tp, *u, NULL);
+ if (r) {
+ rc = r;
+ }
+ }
+ grn_ii_updspec_close(ctx, *u);
+ });
+ grn_hash_close(ctx, h);
+ return rc;
+}
+
+grn_rc
+grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
+ const char *oldvalue, unsigned int oldvalue_len,
+ const char *newvalue, unsigned int newvalue_len)
+{
+ grn_rc rc;
+ grn_obj *lexicon = ii->lexicon;
+ if (!rid) { return GRN_INVALID_ARGUMENT; }
+ if (oldvalue && *oldvalue) {
+ if ((rc = index_del(ctx, rid, lexicon, ii, vgram, oldvalue, oldvalue_len))) {
+ GRN_LOG(ctx, GRN_LOG_ERROR, "index_del on grn_ii_upd failed !");
+ goto exit;
+ }
+ }
+ if (newvalue && *newvalue) {
+ rc = index_add(ctx, rid, lexicon, ii, vgram, newvalue, newvalue_len);
+ }
+exit :
+ return rc;
+}
+
+grn_rc
+grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned int section,
+ grn_values *oldvalues, grn_values *newvalues)
+{
+ int j;
+ grn_value *v;
+ unsigned int token_flags = 0;
+ grn_token_cursor *token_cursor;
+ grn_rc rc = GRN_SUCCESS;
+ grn_hash *old, *new;
+ grn_id tid, *tp;
+ grn_ii_updspec **u, **un;
+ grn_obj *lexicon = ii->lexicon;
+ if (!lexicon || !ii || !rid) {
+ GRN_LOG(ctx, GRN_LOG_WARNING, "grn_ii_update: invalid argument");
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (newvalues) {
+ new = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!new) {
+ GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !");
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) {
+ if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str,
+ v->str_len, GRN_TOKEN_ADD,
+ token_flags))) {
+ while (!token_cursor->status) {
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u,
+ NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_open on grn_ii_update failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ grn_hash_close(ctx, new);
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ }
+ if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_add on grn_ii_update failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ grn_hash_close(ctx, new);
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+ }
+ if (!GRN_HASH_SIZE(new)) {
+ grn_hash_close(ctx, new);
+ new = NULL;
+ }
+ } else {
+ new = NULL;
+ }
+ if (oldvalues) {
+ old = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!old) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!");
+ if (new) { grn_hash_close(ctx, new); }
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) {
+ if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str,
+ v->str_len, GRN_TOKEN_DEL,
+ token_flags))) {
+ while (!token_cursor->status) {
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u,
+ NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_open on grn_ii_update failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ if (new) { grn_hash_close(ctx, new); };
+ grn_hash_close(ctx, old);
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ }
+ if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_add on grn_ii_update failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ if (new) { grn_hash_close(ctx, new); };
+ grn_hash_close(ctx, old);
+ rc = GRN_NO_MEMORY_AVAILABLE;
+ goto exit;
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+ }
+ } else {
+ old = NULL;
+ }
+ if (old) {
+ grn_id eid;
+ GRN_HASH_EACH(ctx, old, id, &tp, NULL, &u, {
+ if (new && (eid = grn_hash_get(ctx, new, tp, sizeof(grn_id),
+ (void **) &un))) {
+ if (!grn_ii_updspec_cmp(*u, *un)) {
+ grn_ii_updspec_close(ctx, *un);
+ grn_hash_delete_by_id(ctx, new, eid, NULL);
+ }
+ } else {
+ grn_rc r;
+ r = grn_ii_delete_one(ctx, ii, *tp, *u, new);
+ if (r) {
+ rc = r;
+ }
+ }
+ grn_ii_updspec_close(ctx, *u);
+ });
+ grn_hash_close(ctx, old);
+ }
+ if (new) {
+ GRN_HASH_EACH(ctx, new, id, &tp, NULL, &u, {
+ grn_rc r;
+ if ((r = grn_ii_update_one(ctx, ii, *tp, *u, new))) { rc = r; }
+ grn_ii_updspec_close(ctx, *u);
+ });
+ grn_hash_close(ctx, new);
+ } else {
+ if (!section) {
+ /* todo: delete key when all sections deleted */
+ }
+ }
+exit :
+ return rc;
+}
+#endif /* USE_VGRAM */
+
+static grn_rc
+grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
+ grn_obj *in, grn_obj *out, grn_tokenize_mode mode,
+ grn_obj *posting)
+{
+ int j;
+ grn_id tid;
+ grn_section *v;
+ grn_token_cursor *token_cursor;
+ grn_ii_updspec **u;
+ grn_hash *h = (grn_hash *)out;
+ grn_obj *lexicon = ii->lexicon;
+ if (in->u.v.body) {
+ const char *head = GRN_BULK_HEAD(in->u.v.body);
+ for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) {
+ unsigned int token_flags = 0;
+ if (v->length &&
+ (token_cursor = grn_token_cursor_open(ctx, lexicon, head + v->offset,
+ v->length, mode,
+ token_flags))) {
+ while (!token_cursor->status) {
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
+ if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u,
+ NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][spec] failed to create an update spec: "
+ "<%.*s>: "
+ "record:<%u>:<%u>, token:<%u>:<%d>:<%u>",
+ name_size, name,
+ rid, section,
+ tid, token_cursor->pos, v->weight);
+ grn_token_cursor_close(ctx, token_cursor);
+ return ctx->rc;
+ }
+ }
+ if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
+ DEFINE_NAME(ii);
+ MERR("[ii][update][spec] failed to add to update spec: "
+ "<%.*s>: "
+ "record:<%u>:<%u>, token:<%u>:<%d>:<%u>",
+ name_size, name,
+ rid, section,
+ tid, token_cursor->pos, v->weight);
+ grn_token_cursor_close(ctx, token_cursor);
+ return ctx->rc;
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+ }
+ }
+ return ctx->rc;
+}
+
+static grn_rc
+grn_uvector2updspecs_data(grn_ctx *ctx, grn_ii *ii, grn_id rid,
+ unsigned int section, grn_obj *in, grn_obj *out,
+ grn_tokenize_mode mode, grn_obj *posting)
+{
+ int i, n;
+ grn_hash *h = (grn_hash *)out;
+ grn_obj *lexicon = ii->lexicon;
+ unsigned int element_size;
+
+ n = grn_uvector_size(ctx, in);
+ element_size = grn_uvector_element_size(ctx, in);
+ for (i = 0; i < n; i++) {
+ grn_obj *tokenizer;
+ grn_token_cursor *token_cursor;
+ unsigned int token_flags = 0;
+ const char *element;
+
+ tokenizer = grn_obj_get_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER,
+ NULL);
+
+ element = GRN_BULK_HEAD(in) + (element_size * i);
+ token_cursor = grn_token_cursor_open(ctx, lexicon,
+ element, element_size,
+ mode, token_flags);
+ if (!token_cursor) {
+ continue;
+ }
+
+ while (!token_cursor->status) {
+ grn_id tid;
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ grn_ii_updspec **u;
+ int pos;
+
+ if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
+ if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&u, NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_open on grn_uvector2updspecs_data failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ if (tokenizer) {
+ pos = token_cursor->pos;
+ } else {
+ pos = i;
+ }
+ if (grn_ii_updspec_add(ctx, *u, pos, 0)) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_add on grn_uvector2updspecs failed!");
+ grn_token_cursor_close(ctx, token_cursor);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ }
+
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+
+ return GRN_SUCCESS;
+}
+
+static grn_rc
+grn_uvector2updspecs_id(grn_ctx *ctx, grn_ii *ii, grn_id rid,
+ unsigned int section, grn_obj *in, grn_obj *out)
+{
+ int i, n;
+ grn_ii_updspec **u;
+ grn_hash *h = (grn_hash *)out;
+
+ n = grn_vector_size(ctx, in);
+ for (i = 0; i < n; i++) {
+ grn_id id;
+ unsigned int weight;
+
+ id = grn_uvector_get_element(ctx, in, i, &weight);
+ if (!grn_hash_add(ctx, h, &id, sizeof(grn_id), (void **)&u, NULL)) {
+ break;
+ }
+ if (!*u) {
+ if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_open on grn_ii_update failed!");
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ if (grn_ii_updspec_add(ctx, *u, i, weight)) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_ii_updspec_add on grn_ii_update failed!");
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+static grn_rc
+grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid,
+ unsigned int section, grn_obj *in, grn_obj *out,
+ grn_tokenize_mode mode, grn_obj *posting)
+{
+ if (in->header.domain < GRN_N_RESERVED_TYPES) {
+ return grn_uvector2updspecs_data(ctx, ii, rid, section, in, out,
+ mode, posting);
+ } else {
+ return grn_uvector2updspecs_id(ctx, ii, rid, section, in, out);
+ }
+}
+
+grn_rc
+grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
+ grn_obj *oldvalue, grn_obj *newvalue, grn_obj *posting)
+{
+ grn_id *tp;
+ grn_bool do_grn_ii_updspec_cmp = GRN_TRUE;
+ grn_ii_updspec **u, **un;
+ grn_obj *old_, *old = oldvalue, *new_, *new = newvalue, oldv, newv;
+ grn_obj buf, *post = NULL;
+
+ if (!ii) {
+ ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] ii is NULL");
+ return ctx->rc;
+ }
+ if (!ii->lexicon) {
+ ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] lexicon is NULL");
+ return ctx->rc;
+ }
+ if (rid == GRN_ID_NIL) {
+ ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] record ID is nil");
+ return ctx->rc;
+ }
+ if (old || new) {
+ unsigned char type = GRN_VOID;
+ if (old) {
+ type = (ii->obj.header.domain == old->header.domain)
+ ? GRN_UVECTOR
+ : old->header.type;
+ }
+ if (new) {
+ type = (ii->obj.header.domain == new->header.domain)
+ ? GRN_UVECTOR
+ : new->header.type;
+ }
+ if (type == GRN_VECTOR) {
+ grn_obj *tokenizer;
+ grn_table_get_info(ctx, ii->lexicon, NULL, NULL, &tokenizer, NULL, NULL);
+ if (tokenizer) {
+ grn_obj old_elem, new_elem;
+ unsigned int i, max_n;
+ unsigned int old_n = 0, new_n = 0;
+ if (old) {
+ old_n = grn_vector_size(ctx, old);
+ }
+ if (new) {
+ new_n = grn_vector_size(ctx, new);
+ }
+ max_n = (old_n > new_n) ? old_n : new_n;
+ GRN_OBJ_INIT(&old_elem, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, old->header.domain);
+ GRN_OBJ_INIT(&new_elem, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, new->header.domain);
+ for (i = 0; i < max_n; i++) {
+ grn_rc rc;
+ grn_obj *old_p = NULL, *new_p = NULL;
+ if (i < old_n) {
+ const char *str;
+ unsigned int size = grn_vector_get_element(ctx, old, i, &str, NULL, NULL);
+ GRN_TEXT_SET_REF(&old_elem, str, size);
+ old_p = &old_elem;
+ }
+ if (i < new_n) {
+ const char *str;
+ unsigned int size = grn_vector_get_element(ctx, new, i, &str, NULL, NULL);
+ GRN_TEXT_SET_REF(&new_elem, str, size);
+ new_p = &new_elem;
+ }
+ rc = grn_ii_column_update(ctx, ii, rid, section + i, old_p, new_p, posting);
+ if (rc != GRN_SUCCESS) {
+ break;
+ }
+ }
+ GRN_OBJ_FIN(ctx, &old_elem);
+ GRN_OBJ_FIN(ctx, &new_elem);
+ return ctx->rc;
+ }
+ }
+ }
+ if (posting) {
+ GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, grn_obj_id(ctx, ii->lexicon));
+ post = &buf;
+ }
+ if (grn_io_lock(ctx, ii->seg, grn_lock_timeout)) { return ctx->rc; }
+ if (new) {
+ unsigned char type = (ii->obj.header.domain == new->header.domain)
+ ? GRN_UVECTOR
+ : new->header.type;
+ switch (type) {
+ case GRN_BULK :
+ {
+ if (grn_bulk_is_zero(ctx, new)) {
+ do_grn_ii_updspec_cmp = GRN_FALSE;
+ }
+ new_ = new;
+ GRN_OBJ_INIT(&newv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
+ newv.u.v.body = new;
+ new = &newv;
+ grn_vector_delimit(ctx, new, 0, GRN_ID_NIL);
+ if (new_ != newvalue) { grn_obj_close(ctx, new_); }
+ }
+ /* fallthru */
+ case GRN_VECTOR :
+ new_ = new;
+ new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
+ sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!new) {
+ DEFINE_NAME(ii);
+ MERR("[ii][column][update][new][vector] failed to create a hash table: "
+ "<%.*s>: ",
+ name_size, name);
+ } else {
+ grn_vector2updspecs(ctx, ii, rid, section, new_, new,
+ GRN_TOKEN_ADD, post);
+ }
+ if (new_ != newvalue) { grn_obj_close(ctx, new_); }
+ if (ctx->rc != GRN_SUCCESS) { goto exit; }
+ break;
+ case GRN_UVECTOR :
+ new_ = new;
+ new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
+ sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!new) {
+ DEFINE_NAME(ii);
+ MERR("[ii][column][update][new][uvector] failed to create a hash table: "
+ "<%.*s>: ",
+ name_size, name);
+ } else {
+ if (new_->header.type == GRN_UVECTOR) {
+ grn_uvector2updspecs(ctx, ii, rid, section, new_, new,
+ GRN_TOKEN_ADD, post);
+ } else {
+ grn_obj uvector;
+ unsigned int weight = 0;
+ GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR,
+ new_->header.domain);
+ if (new_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
+ uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
+ }
+ grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(new_),
+ weight);
+ grn_uvector2updspecs(ctx, ii, rid, section, &uvector, new,
+ GRN_TOKEN_ADD, post);
+ GRN_OBJ_FIN(ctx, &uvector);
+ }
+ }
+ if (new_ != newvalue) { grn_obj_close(ctx, new_); }
+ if (ctx->rc != GRN_SUCCESS) { goto exit; }
+ break;
+ case GRN_TABLE_HASH_KEY :
+ break;
+ default :
+ {
+ DEFINE_NAME(ii);
+ ERR(GRN_INVALID_ARGUMENT,
+ "[ii][column][update][new] invalid object: "
+ "<%.*s>: "
+ "<%-.256s>(%#x)",
+ name_size, name,
+ grn_obj_type_to_string(type),
+ type);
+ }
+ goto exit;
+ }
+ }
+ if (posting) {
+ grn_ii_updspec *u_;
+ uint32_t offset = 0;
+ grn_id tid_ = 0, gap, tid, *tpe;
+ grn_table_sort_optarg arg = {GRN_TABLE_SORT_ASC|
+ GRN_TABLE_SORT_AS_NUMBER|
+ GRN_TABLE_SORT_AS_UNSIGNED, NULL, NULL,0 };
+ grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0);
+ grn_hash_sort(ctx, (grn_hash *)new, -1, sorted, &arg);
+ GRN_TEXT_PUT(ctx, posting, ((grn_hash *)new)->n_entries, sizeof(uint32_t));
+ GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, {
+ grn_hash_get_key(ctx, (grn_hash *)new, *tp, &tid, sizeof(grn_id));
+ gap = tid - tid_;
+ GRN_TEXT_PUT(ctx, posting, &gap, sizeof(grn_id));
+ tid_ = tid;
+ });
+ GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, {
+ grn_hash_get_value(ctx, (grn_hash *)new, *tp, &u_);
+ u_->offset = offset++;
+ GRN_TEXT_PUT(ctx, posting, &u_->tf, sizeof(int32_t));
+ });
+ tpe = (grn_id *)GRN_BULK_CURR(post);
+ for (tp = (grn_id *)GRN_BULK_HEAD(post); tp < tpe; tp++) {
+ grn_hash_get(ctx, (grn_hash *)new, (void *)tp, sizeof(grn_id),
+ (void **)&u);
+ GRN_TEXT_PUT(ctx, posting, &(*u)->offset, sizeof(int32_t));
+ }
+ GRN_OBJ_FIN(ctx, post);
+ grn_array_close(ctx, sorted);
+ }
+
+ if (old) {
+ unsigned char type = (ii->obj.header.domain == old->header.domain)
+ ? GRN_UVECTOR
+ : old->header.type;
+ switch (type) {
+ case GRN_BULK :
+ {
+ // const char *str = GRN_BULK_HEAD(old);
+ // unsigned int str_len = GRN_BULK_VSIZE(old);
+ old_ = old;
+ GRN_OBJ_INIT(&oldv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
+ oldv.u.v.body = old;
+ old = &oldv;
+ grn_vector_delimit(ctx, old, 0, GRN_ID_NIL);
+ if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
+ }
+ /* fallthru */
+ case GRN_VECTOR :
+ old_ = old;
+ old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
+ sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!old) {
+ DEFINE_NAME(ii);
+ MERR("[ii][column][update][old][vector] failed to create a hash table: "
+ "<%.*s>: ",
+ name_size, name);
+ } else {
+ grn_vector2updspecs(ctx, ii, rid, section, old_, old,
+ GRN_TOKEN_DEL, NULL);
+ }
+ if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
+ if (ctx->rc != GRN_SUCCESS) { goto exit; }
+ break;
+ case GRN_UVECTOR :
+ old_ = old;
+ old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
+ sizeof(grn_ii_updspec *),
+ GRN_HASH_TINY);
+ if (!old) {
+ DEFINE_NAME(ii);
+ MERR("[ii][column][update][old][uvector] failed to create a hash table: "
+ "<%.*s>: ",
+ name_size, name);
+ } else {
+ if (old_->header.type == GRN_UVECTOR) {
+ grn_uvector2updspecs(ctx, ii, rid, section, old_, old,
+ GRN_TOKEN_DEL, NULL);
+ } else {
+ grn_obj uvector;
+ unsigned int weight = 0;
+ GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR,
+ old_->header.domain);
+ if (old_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
+ uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
+ }
+ grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(old_),
+ weight);
+ grn_uvector2updspecs(ctx, ii, rid, section, &uvector, old,
+ GRN_TOKEN_DEL, NULL);
+ GRN_OBJ_FIN(ctx, &uvector);
+ }
+ }
+ if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
+ if (ctx->rc != GRN_SUCCESS) { goto exit; }
+ break;
+ case GRN_TABLE_HASH_KEY :
+ break;
+ default :
+ {
+ DEFINE_NAME(ii);
+ ERR(GRN_INVALID_ARGUMENT,
+ "[ii][column][update][old] invalid object: "
+ "<%.*s>: "
+ "<%-.256s>(%#x)",
+ name_size, name,
+ grn_obj_type_to_string(type),
+ type);
+ }
+ goto exit;
+ }
+ }
+
+ if (old) {
+ grn_id eid;
+ grn_hash *o = (grn_hash *)old;
+ grn_hash *n = (grn_hash *)new;
+ GRN_HASH_EACH(ctx, o, id, &tp, NULL, &u, {
+ if (n && (eid = grn_hash_get(ctx, n, tp, sizeof(grn_id),
+ (void **) &un))) {
+ if (do_grn_ii_updspec_cmp && !grn_ii_updspec_cmp(*u, *un)) {
+ grn_ii_updspec_close(ctx, *un);
+ grn_hash_delete_by_id(ctx, n, eid, NULL);
+ }
+ } else {
+ grn_ii_delete_one(ctx, ii, *tp, *u, n);
+ }
+ grn_ii_updspec_close(ctx, *u);
+ if (ctx->rc != GRN_SUCCESS) {
+ break;
+ }
+ });
+ }
+ if (new) {
+ grn_hash *n = (grn_hash *)new;
+ GRN_HASH_EACH(ctx, n, id, &tp, NULL, &u, {
+ grn_ii_update_one(ctx, ii, *tp, *u, n);
+ grn_ii_updspec_close(ctx, *u);
+ if (ctx->rc != GRN_SUCCESS) {
+ break;
+ }
+ });
+ } else {
+ if (!section) {
+ /* todo: delete key when all sections deleted */
+ }
+ }
+exit :
+ grn_io_unlock(ii->seg);
+ if (old && old != oldvalue) { grn_obj_close(ctx, old); }
+ if (new && new != newvalue) { grn_obj_close(ctx, new); }
+ return ctx->rc;
+}
+
+/* token_info */
+
+typedef struct {
+ cursor_heap *cursors;
+ int offset;
+ int pos;
+ int size;
+ int ntoken;
+ grn_posting *p;
+} token_info;
+
+#define EX_NONE 0
+#define EX_PREFIX 1
+#define EX_SUFFIX 2
+#define EX_BOTH 3
+#define EX_FUZZY 4
+
+inline static void
+token_info_expand_both(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
+ const char *key, unsigned int key_size, token_info *ti)
+{
+ int s = 0;
+ grn_hash *h, *g;
+ uint32_t *offset2;
+ grn_hash_cursor *c;
+ grn_id *tp, *tq;
+ if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
+ grn_table_search(ctx, lexicon, key, key_size,
+ GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
+ if (GRN_HASH_SIZE(h)) {
+ if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h) + 256))) {
+ if ((c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0))) {
+ uint32_t key2_size;
+ const char *key2;
+ while (grn_hash_cursor_next(ctx, c)) {
+ grn_hash_cursor_get_key(ctx, c, (void **) &tp);
+ key2 = _grn_table_key(ctx, lexicon, *tp, &key2_size);
+ if (!key2) { break; }
+ if ((lexicon->header.type != GRN_TABLE_PAT_KEY) ||
+ !(lexicon->header.flags & GRN_OBJ_KEY_WITH_SIS) ||
+ key2_size <= 2) { // todo: refine
+ if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
+ cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, GRN_ID_NIL);
+ ti->ntoken++;
+ ti->size += s;
+ }
+ } else {
+ if ((g = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+ GRN_HASH_TINY))) {
+ grn_pat_suffix_search(ctx, (grn_pat *)lexicon, key2, key2_size,
+ g);
+ GRN_HASH_EACH(ctx, g, id, &tq, NULL, &offset2, {
+ if ((s = grn_ii_estimate_size(ctx, ii, *tq))) {
+ cursor_heap_push(ctx, ti->cursors, ii, *tq,
+ /* *offset2 */ 0, 0, GRN_ID_NIL);
+ ti->ntoken++;
+ ti->size += s;
+ }
+ });
+ grn_hash_close(ctx, g);
+ }
+ }
+ }
+ grn_hash_cursor_close(ctx, c);
+ }
+ }
+ }
+ grn_hash_close(ctx, h);
+ }
+}
+
+inline static grn_rc
+token_info_close(grn_ctx *ctx, token_info *ti)
+{
+ cursor_heap_close(ctx, ti->cursors);
+ GRN_FREE(ti);
+ return GRN_SUCCESS;
+}
+
+inline static token_info *
+token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
+ const char *key, unsigned int key_size, uint32_t offset,
+ int mode, grn_fuzzy_search_optarg *args, grn_id min)
+{
+ int s = 0;
+ grn_hash *h;
+ token_info *ti;
+ grn_id tid;
+ grn_id *tp;
+ if (!key) { return NULL; }
+ if (!(ti = GRN_MALLOC(sizeof(token_info)))) { return NULL; }
+ ti->cursors = NULL;
+ ti->size = 0;
+ ti->ntoken = 0;
+ ti->offset = offset;
+ switch (mode) {
+ case EX_BOTH :
+ token_info_expand_both(ctx, lexicon, ii, key, key_size, ti);
+ break;
+ case EX_NONE :
+ if ((tid = grn_table_get(ctx, lexicon, key, key_size)) &&
+ (s = grn_ii_estimate_size(ctx, ii, tid)) &&
+ (ti->cursors = cursor_heap_open(ctx, 1))) {
+ cursor_heap_push(ctx, ti->cursors, ii, tid, 0, 0, min);
+ ti->ntoken++;
+ ti->size = s;
+ }
+ break;
+ case EX_PREFIX :
+ if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
+ grn_table_search(ctx, lexicon, key, key_size,
+ GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
+ if (GRN_HASH_SIZE(h)) {
+ if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
+ GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, {
+ if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
+ cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, min);
+ ti->ntoken++;
+ ti->size += s;
+ }
+ });
+ }
+ }
+ grn_hash_close(ctx, h);
+ }
+ break;
+ case EX_SUFFIX :
+ if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
+ grn_table_search(ctx, lexicon, key, key_size,
+ GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR);
+ if (GRN_HASH_SIZE(h)) {
+ if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
+ uint32_t *offset2;
+ GRN_HASH_EACH(ctx, h, id, &tp, NULL, &offset2, {
+ if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
+ cursor_heap_push(ctx, ti->cursors, ii, *tp, /* *offset2 */ 0, 0, min);
+ ti->ntoken++;
+ ti->size += s;
+ }
+ });
+ }
+ }
+ grn_hash_close(ctx, h);
+ }
+ break;
+ case EX_FUZZY :
+ if ((h = (grn_hash *)grn_table_create(ctx, NULL, 0, NULL,
+ GRN_OBJ_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC,
+ grn_ctx_at(ctx, GRN_DB_UINT32), NULL))) {
+ grn_table_fuzzy_search(ctx, lexicon, key, key_size,
+ args, (grn_obj *)h, GRN_OP_OR);
+ if (GRN_HASH_SIZE(h)) {
+ if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
+ grn_rset_recinfo *ri;
+ GRN_HASH_EACH(ctx, h, id, &tp, NULL, (void **)&ri, {
+ if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
+ cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, ri->score - 1, min);
+ ti->ntoken++;
+ ti->size += s;
+ }
+ });
+ }
+ }
+ grn_obj_close(ctx, (grn_obj *)h);
+ }
+ break;
+ }
+ if (cursor_heap_push2(ti->cursors)) {
+ token_info_close(ctx, ti);
+ return NULL;
+ }
+ {
+ grn_ii_cursor *ic;
+ if (ti->cursors && (ic = cursor_heap_min(ti->cursors))) {
+ grn_posting *p = ic->post;
+ ti->pos = p->pos - ti->offset;
+ ti->p = p;
+ } else {
+ token_info_close(ctx, ti);
+ ti = NULL;
+ }
+ }
+ return ti;
+}
+
+static inline grn_rc
+token_info_skip(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid)
+{
+ grn_ii_cursor *c;
+ grn_posting *p;
+ for (;;) {
+ if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; }
+ p = c->post;
+ if (p->rid > rid || (p->rid == rid && p->sid >= sid)) { break; }
+ cursor_heap_pop(ctx, ti->cursors, rid);
+ }
+ ti->pos = p->pos - ti->offset;
+ ti->p = p;
+ return GRN_SUCCESS;
+}
+
+static inline grn_rc
+token_info_skip_pos(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid, uint32_t pos)
+{
+ grn_ii_cursor *c;
+ grn_posting *p;
+ pos += ti->offset;
+ for (;;) {
+ if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; }
+ p = c->post;
+ if (p->rid != rid || p->sid != sid || p->pos >= pos) { break; }
+ cursor_heap_pop_pos(ctx, ti->cursors);
+ }
+ ti->pos = p->pos - ti->offset;
+ ti->p = p;
+ return GRN_SUCCESS;
+}
+
+inline static int
+token_compare(const void *a, const void *b)
+{
+ const token_info *t1 = *((token_info **)a), *t2 = *((token_info **)b);
+ return t1->size - t2->size;
+}
+
+#define TOKEN_CANDIDATE_NODE_SIZE 32
+#define TOKEN_CANDIDATE_ADJACENT_MAX_SIZE 16
+#define TOKEN_CANDIDATE_QUEUE_SIZE 64
+#define TOKEN_CANDIDATE_SIZE 16
+
+typedef struct {
+ grn_id tid;
+ const unsigned char *token;
+ uint32_t token_size;
+ int32_t pos;
+ grn_token_cursor_status status;
+ int ef;
+ uint32_t estimated_size;
+ uint8_t adjacent[TOKEN_CANDIDATE_ADJACENT_MAX_SIZE]; /* Index of adjacent node from top */
+ uint8_t n_adjacent;
+} token_candidate_node;
+
+typedef struct {
+ uint32_t *candidates; /* Standing bits indicate index of token_candidate_node */
+ int top;
+ int rear;
+ int size;
+} token_candidate_queue;
+
+inline static void
+token_candidate_adjacent_set(grn_ctx *ctx, grn_token_cursor *token_cursor,
+ token_candidate_node *top, token_candidate_node *curr)
+{
+ grn_bool exists_adjacent = GRN_FALSE;
+ token_candidate_node *adj;
+ for (adj = top; adj < curr; adj++) {
+ if (token_cursor->curr <= adj->token + adj->token_size) {
+ if (adj->n_adjacent < TOKEN_CANDIDATE_ADJACENT_MAX_SIZE) {
+ adj->adjacent[adj->n_adjacent] = curr - top;
+ adj->n_adjacent++;
+ exists_adjacent = GRN_TRUE;
+ }
+ }
+ }
+ if (!exists_adjacent) {
+ adj = curr - 1;
+ if (adj->n_adjacent < TOKEN_CANDIDATE_ADJACENT_MAX_SIZE) {
+ adj->adjacent[adj->n_adjacent] = curr - top;
+ adj->n_adjacent++;
+ }
+ }
+}
+
+inline static grn_rc
+token_candidate_init(grn_ctx *ctx, grn_ii *ii, grn_token_cursor *token_cursor,
+ grn_id tid, int ef, token_candidate_node **nodes, int *n_nodes,
+ uint32_t *max_estimated_size)
+{
+ grn_rc rc;
+ token_candidate_node *top, *curr;
+ int size = TOKEN_CANDIDATE_NODE_SIZE;
+
+ *nodes = GRN_MALLOC(TOKEN_CANDIDATE_NODE_SIZE * sizeof(token_candidate_node));
+ if (!*nodes) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ top = *nodes;
+ curr = top;
+
+#define TOKEN_CANDIDATE_NODE_SET() { \
+ curr->tid = tid; \
+ curr->token = token_cursor->curr; \
+ curr->token_size = token_cursor->curr_size; \
+ curr->pos = token_cursor->pos; \
+ curr->status = token_cursor->status; \
+ curr->ef = ef; \
+ curr->estimated_size = grn_ii_estimate_size(ctx, ii, tid); \
+ curr->n_adjacent = 0; \
+}
+ TOKEN_CANDIDATE_NODE_SET();
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "[ii][overlap_token_skip] tid=%u pos=%d estimated_size=%u",
+ curr->tid, curr->pos, curr->estimated_size);
+ *max_estimated_size = curr->estimated_size;
+ curr++;
+
+ while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
+ if (curr - top >= size) {
+ if (!(*nodes = GRN_REALLOC(*nodes,
+ (curr - top + TOKEN_CANDIDATE_NODE_SIZE) * sizeof(token_candidate_node)))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ top = *nodes;
+ curr = top + size;
+ size += TOKEN_CANDIDATE_NODE_SIZE;
+ }
+ tid = grn_token_cursor_next(ctx, token_cursor);
+ if (token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) {
+ if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+ TOKEN_CANDIDATE_NODE_SET();
+ token_candidate_adjacent_set(ctx, token_cursor, top, curr);
+ if (curr->estimated_size > *max_estimated_size) {
+ *max_estimated_size = curr->estimated_size;
+ }
+ curr++;
+ }
+ }
+ *n_nodes = curr - top;
+ rc = GRN_SUCCESS;
+ return rc;
+#undef TOKEN_CANDIDATE_NODE_SET
+}
+
+inline static grn_rc
+token_candidate_queue_init(grn_ctx *ctx, token_candidate_queue *q)
+{
+ q->top = 0;
+ q->rear = 0;
+ q->size = TOKEN_CANDIDATE_QUEUE_SIZE;
+
+ q->candidates = GRN_MALLOC(TOKEN_CANDIDATE_QUEUE_SIZE * sizeof(uint32_t));
+ if (!q->candidates) {
+ q->size = 0;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ return GRN_SUCCESS;
+}
+
+inline static grn_rc
+token_candidate_enqueue(grn_ctx *ctx, token_candidate_queue *q, uint32_t candidate)
+{
+ if (q->rear >= q->size) {
+ if (!(q->candidates =
+ GRN_REALLOC(q->candidates,
+ (q->rear + TOKEN_CANDIDATE_QUEUE_SIZE) * sizeof(uint32_t)))) {
+ q->size = 0;
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ q->size += TOKEN_CANDIDATE_QUEUE_SIZE;
+ }
+ *(q->candidates + q->rear) = candidate;
+ q->rear++;
+ return GRN_SUCCESS;
+}
+
+inline static grn_rc
+token_candidate_dequeue(grn_ctx *ctx, token_candidate_queue *q, uint32_t *candidate)
+{
+ if (q->top == q->rear) {
+ return GRN_END_OF_DATA;
+ }
+ *candidate = *(q->candidates + q->top);
+ q->top++;
+ return GRN_SUCCESS;
+}
+
+inline static void
+token_candidate_queue_fin(grn_ctx *ctx, token_candidate_queue *q)
+{
+ GRN_FREE(q->candidates);
+}
+
+inline static token_candidate_node*
+token_candidate_last_node(grn_ctx *ctx, token_candidate_node *nodes, uint32_t candidate, int offset)
+{
+ int i;
+ GRN_BIT_SCAN_REV(candidate, i);
+ return nodes + i + offset;
+}
+
+inline static uint64_t
+token_candidate_score(grn_ctx *ctx, token_candidate_node *nodes, uint32_t candidate,
+ int offset, uint32_t max_estimated_size)
+{
+ int i, last;
+ uint64_t score = 0;
+ GRN_BIT_SCAN_REV(candidate, last);
+ for (i = 0; i <= last; i++) {
+ if (candidate & (1 << i)) {
+ token_candidate_node *node = nodes + i + offset;
+ if (node->estimated_size > 0) {
+ score += max_estimated_size / node->estimated_size;
+ }
+ }
+ }
+ return score;
+}
+
+inline static grn_rc
+token_candidate_select(grn_ctx *ctx, token_candidate_node *nodes,
+ int offset, int limit, int end,
+ uint32_t *selected_candidate, uint32_t max_estimated_size)
+{
+ grn_rc rc;
+ token_candidate_queue q;
+ uint32_t candidate;
+ uint64_t max_score = 0;
+ int i, min_n_nodes = 0;
+
+ if (offset + limit > end) {
+ limit = end - offset;
+ }
+ rc = token_candidate_queue_init(ctx, &q);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = token_candidate_enqueue(ctx, &q, 1);
+ if (rc != GRN_SUCCESS) {
+ goto exit;
+ }
+ while (token_candidate_dequeue(ctx, &q, &candidate) != GRN_END_OF_DATA) {
+ token_candidate_node *candidate_last_node =
+ token_candidate_last_node(ctx, nodes, candidate, offset);
+ for (i = 0; i < candidate_last_node->n_adjacent; i++) {
+ int adjacent, n_nodes = 0;
+ uint32_t new_candidate;
+ adjacent = candidate_last_node->adjacent[i] - offset;
+ if (adjacent > limit) {
+ break;
+ }
+ new_candidate = candidate | (1 << adjacent);
+ GET_NUM_BITS(new_candidate, n_nodes);
+ if (min_n_nodes > 0 && n_nodes > min_n_nodes + 1) {
+ goto exit;
+ }
+ rc = token_candidate_enqueue(ctx, &q, new_candidate);
+ if (rc != GRN_SUCCESS) {
+ goto exit;
+ }
+ if (adjacent == limit) {
+ if (min_n_nodes == 0) {
+ min_n_nodes = n_nodes;
+ }
+ if (n_nodes >= min_n_nodes && n_nodes <= min_n_nodes + 1) {
+ uint64_t score;
+ score = token_candidate_score(ctx, nodes, new_candidate, offset, max_estimated_size);
+ if (score > max_score) {
+ max_score = score;
+ *selected_candidate = new_candidate;
+ }
+ }
+ }
+ }
+ }
+ rc = GRN_SUCCESS;
+exit :
+ token_candidate_queue_fin(ctx, &q);
+ return rc;
+}
+
+inline static grn_rc
+token_candidate_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
+ token_info **tis, uint32_t *n,
+ token_candidate_node *nodes, uint32_t selected_candidate,
+ int offset, grn_id min)
+{
+ grn_rc rc = GRN_END_OF_DATA;
+ token_info *ti;
+ const char *key;
+ uint32_t size;
+ int i, last = 0;
+ GRN_BIT_SCAN_REV(selected_candidate, last);
+ for (i = 1; i <= last; i++) {
+ if (selected_candidate & (1 << i)) {
+ token_candidate_node *node = nodes + i + offset;
+ switch (node->status) {
+ case GRN_TOKEN_CURSOR_DOING :
+ key = _grn_table_key(ctx, lexicon, node->tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, key, size, node->pos,
+ EX_NONE, NULL, min);
+ break;
+ case GRN_TOKEN_CURSOR_DONE :
+ if (node->tid) {
+ key = _grn_table_key(ctx, lexicon, node->tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, key, size, node->pos,
+ node->ef & EX_PREFIX, NULL, min);
+ break;
+ } /* else fallthru */
+ default :
+ ti = token_info_open(ctx, lexicon, ii, (char *)node->token,
+ node->token_size, node->pos,
+ node->ef & EX_PREFIX, NULL, min);
+ break;
+ }
+ if (!ti) {
+ goto exit;
+ }
+ tis[(*n)++] = ti;
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "[ii][overlap_token_skip] tid=%u pos=%d estimated_size=%u",
+ node->tid, node->pos, node->estimated_size);
+ }
+ }
+ rc = GRN_SUCCESS;
+exit :
+ return rc;
+}
+
+inline static grn_rc
+token_info_build_skipping_overlap(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
+ token_info **tis, uint32_t *n,
+ grn_token_cursor *token_cursor,
+ grn_id tid, int ef, grn_id min)
+{
+ grn_rc rc;
+ token_candidate_node *nodes = NULL;
+ int n_nodes = 0, offset = 0, limit = TOKEN_CANDIDATE_SIZE - 1;
+ uint32_t max_estimated_size;
+
+ rc = token_candidate_init(ctx, ii, token_cursor, tid, ef, &nodes, &n_nodes, &max_estimated_size);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ while (offset < n_nodes - 1) {
+ uint32_t selected_candidate = 0;
+ rc = token_candidate_select(ctx, nodes, offset, limit, n_nodes - 1,
+ &selected_candidate, max_estimated_size);
+ if (rc != GRN_SUCCESS) {
+ goto exit;
+ }
+ rc = token_candidate_build(ctx, lexicon, ii, tis, n, nodes, selected_candidate, offset, min);
+ if (rc != GRN_SUCCESS) {
+ goto exit;
+ }
+ offset += limit;
+ }
+ rc = GRN_SUCCESS;
+exit :
+ if (nodes) {
+ GRN_FREE(nodes);
+ }
+ return rc;
+}
+
+inline static grn_rc
+token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len,
+ token_info **tis, uint32_t *n, grn_bool *only_skip_token, grn_id min,
+ grn_operator mode)
+{
+ token_info *ti;
+ const char *key;
+ uint32_t size;
+ grn_rc rc = GRN_END_OF_DATA;
+ unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
+ grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon,
+ string, string_len,
+ GRN_TOKEN_GET,
+ token_flags);
+ *only_skip_token = GRN_FALSE;
+ if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; }
+ if (mode == GRN_OP_UNSPLIT) {
+ if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
+ token_cursor->orig_blen, 0, EX_BOTH, NULL, min))) {
+ tis[(*n)++] = ti;
+ rc = GRN_SUCCESS;
+ }
+ } else {
+ grn_id tid;
+ int ef;
+ switch (mode) {
+ case GRN_OP_PREFIX :
+ ef = EX_PREFIX;
+ break;
+ case GRN_OP_SUFFIX :
+ ef = EX_SUFFIX;
+ break;
+ case GRN_OP_PARTIAL :
+ ef = EX_BOTH;
+ break;
+ default :
+ ef = EX_NONE;
+ break;
+ }
+ tid = grn_token_cursor_next(ctx, token_cursor);
+ if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+ switch (token_cursor->status) {
+ case GRN_TOKEN_CURSOR_DOING :
+ key = _grn_table_key(ctx, lexicon, tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
+ ef & EX_SUFFIX, NULL, min);
+ break;
+ case GRN_TOKEN_CURSOR_DONE :
+ ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
+ token_cursor->curr_size, 0, ef, NULL, min);
+ /*
+ key = _grn_table_key(ctx, lexicon, tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL, GRN_ID_NIL);
+ ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
+ token_cursor->orig_blen, token_cursor->pos, ef, NULL, GRN_ID_NIL);
+ */
+ break;
+ case GRN_TOKEN_CURSOR_NOT_FOUND :
+ ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
+ token_cursor->orig_blen, 0, ef, NULL, min);
+ break;
+ case GRN_TOKEN_CURSOR_DONE_SKIP :
+ *only_skip_token = GRN_TRUE;
+ goto exit;
+ default :
+ goto exit;
+ }
+ if (!ti) { goto exit ; }
+ tis[(*n)++] = ti;
+
+ if (grn_ii_overlap_token_skip_enable) {
+ rc = token_info_build_skipping_overlap(ctx, lexicon, ii, tis, n, token_cursor, tid, ef, min);
+ goto exit;
+ }
+
+ while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
+ tid = grn_token_cursor_next(ctx, token_cursor);
+ if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+ switch (token_cursor->status) {
+ case GRN_TOKEN_CURSOR_DONE_SKIP :
+ continue;
+ case GRN_TOKEN_CURSOR_DOING :
+ key = _grn_table_key(ctx, lexicon, tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
+ EX_NONE, NULL, min);
+ break;
+ case GRN_TOKEN_CURSOR_DONE :
+ if (tid) {
+ key = _grn_table_key(ctx, lexicon, tid, &size);
+ ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
+ ef & EX_PREFIX, NULL, min);
+ break;
+ } /* else fallthru */
+ default :
+ ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr,
+ token_cursor->curr_size, token_cursor->pos,
+ ef & EX_PREFIX, NULL, min);
+ break;
+ }
+ if (!ti) {
+ goto exit;
+ }
+ tis[(*n)++] = ti;
+ }
+ rc = GRN_SUCCESS;
+ }
+exit :
+ grn_token_cursor_close(ctx, token_cursor);
+ return rc;
+}
+
+inline static grn_rc
+token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
+ const char *string, unsigned int string_len,
+ token_info **tis, uint32_t *n, grn_bool *only_skip_token,
+ grn_id min, grn_operator mode, grn_fuzzy_search_optarg *args)
+{
+ token_info *ti;
+ grn_rc rc = GRN_END_OF_DATA;
+ unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
+ grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon,
+ string, string_len,
+ GRN_TOKENIZE_ONLY,
+ token_flags);
+ *only_skip_token = GRN_FALSE;
+ if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; }
+ grn_token_cursor_next(ctx, token_cursor);
+ switch (token_cursor->status) {
+ case GRN_TOKEN_CURSOR_DONE_SKIP :
+ *only_skip_token = GRN_TRUE;
+ goto exit;
+ case GRN_TOKEN_CURSOR_DOING :
+ case GRN_TOKEN_CURSOR_DONE :
+ ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
+ token_cursor->curr_size, token_cursor->pos, EX_FUZZY,
+ args, min);
+ break;
+ default :
+ ti = NULL;
+ break;
+ }
+ if (!ti) {
+ goto exit ;
+ }
+ tis[(*n)++] = ti;
+ while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
+ grn_token_cursor_next(ctx, token_cursor);
+ switch (token_cursor->status) {
+ case GRN_TOKEN_CURSOR_DONE_SKIP :
+ continue;
+ case GRN_TOKEN_CURSOR_DOING :
+ case GRN_TOKEN_CURSOR_DONE :
+ ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
+ token_cursor->curr_size, token_cursor->pos, EX_FUZZY,
+ args, min);
+ break;
+ default :
+ break;
+ }
+ if (!ti) {
+ goto exit;
+ }
+ tis[(*n)++] = ti;
+ }
+ rc = GRN_SUCCESS;
+exit :
+ grn_token_cursor_close(ctx, token_cursor);
+ return rc;
+}
+
+static void
+token_info_clear_offset(token_info **tis, uint32_t n)
+{
+ token_info **tie;
+ for (tie = tis + n; tis < tie; tis++) { (*tis)->offset = 0; }
+}
+
+/* select */
+
+inline static void
+res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, double score,
+ grn_operator op)
+{
+ grn_rset_recinfo *ri;
+ switch (op) {
+ case GRN_OP_OR :
+ if (grn_hash_add(ctx, s, pi, s->key_size, (void **)&ri, NULL)) {
+ if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
+ grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1);
+ }
+ }
+ break;
+ case GRN_OP_AND :
+ if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) {
+ if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
+ ri->n_subrecs |= GRN_RSET_UTIL_BIT;
+ grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1);
+ }
+ }
+ break;
+ case GRN_OP_AND_NOT :
+ {
+ grn_id id;
+ if ((id = grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri))) {
+ grn_hash_delete_by_id(ctx, s, id, NULL);
+ }
+ }
+ break;
+ case GRN_OP_ADJUST :
+ if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) {
+ if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
+ ri->score += score;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+}
+
+grn_rc
+grn_ii_posting_add(grn_ctx *ctx, grn_posting *pos, grn_hash *s, grn_operator op)
+{
+ res_add(ctx, s, (grn_rset_posinfo *)(pos), (1 + pos->weight), op);
+ return ctx->rc;
+}
+
+#ifdef USE_BHEAP
+
+/* todo */
+
+#else /* USE_BHEAP */
+
+struct _btr_node {
+ struct _btr_node *car;
+ struct _btr_node *cdr;
+ token_info *ti;
+};
+
+typedef struct _btr_node btr_node;
+
+typedef struct {
+ int n;
+ token_info *min;
+ token_info *max;
+ btr_node *root;
+ btr_node *nodes;
+} btr;
+
+inline static void
+bt_zap(btr *bt)
+{
+ bt->n = 0;
+ bt->min = NULL;
+ bt->max = NULL;
+ bt->root = NULL;
+}
+
+inline static btr *
+bt_open(grn_ctx *ctx, int size)
+{
+ btr *bt = GRN_MALLOC(sizeof(btr));
+ if (bt) {
+ bt_zap(bt);
+ if (!(bt->nodes = GRN_MALLOC(sizeof(btr_node) * size))) {
+ GRN_FREE(bt);
+ bt = NULL;
+ }
+ }
+ return bt;
+}
+
+inline static void
+bt_close(grn_ctx *ctx, btr *bt)
+{
+ if (!bt) { return; }
+ GRN_FREE(bt->nodes);
+ GRN_FREE(bt);
+}
+
+inline static void
+bt_push(btr *bt, token_info *ti)
+{
+ int pos = ti->pos, minp = 1, maxp = 1;
+ btr_node *node, *new, **last;
+ new = bt->nodes + bt->n++;
+ new->ti = ti;
+ new->car = NULL;
+ new->cdr = NULL;
+ for (last = &bt->root; (node = *last);) {
+ if (pos < node->ti->pos) {
+ last = &node->car;
+ maxp = 0;
+ } else {
+ last = &node->cdr;
+ minp = 0;
+ }
+ }
+ *last = new;
+ if (minp) { bt->min = ti; }
+ if (maxp) { bt->max = ti; }
+}
+
+inline static void
+bt_pop(btr *bt)
+{
+ btr_node *node, *min, *newmin, **last;
+ for (last = &bt->root; (min = *last) && min->car; last = &min->car) ;
+ if (min) {
+ int pos = min->ti->pos, minp = 1, maxp = 1;
+ *last = min->cdr;
+ min->cdr = NULL;
+ for (last = &bt->root; (node = *last);) {
+ if (pos < node->ti->pos) {
+ last = &node->car;
+ maxp = 0;
+ } else {
+ last = &node->cdr;
+ minp = 0;
+ }
+ }
+ *last = min;
+ if (maxp) { bt->max = min->ti; }
+ if (!minp) {
+ for (newmin = bt->root; newmin->car; newmin = newmin->car) ;
+ bt->min = newmin->ti;
+ }
+ }
+}
+
+#endif /* USE_BHEAP */
+
+typedef enum {
+ grn_wv_none = 0,
+ grn_wv_static,
+ grn_wv_dynamic,
+ grn_wv_constant
+} grn_wv_mode;
+
+inline static double
+get_weight(grn_ctx *ctx, grn_hash *s, grn_id rid, int sid,
+ grn_wv_mode wvm, grn_select_optarg *optarg)
+{
+ switch (wvm) {
+ case grn_wv_none :
+ return 1;
+ case grn_wv_static :
+ return sid <= optarg->vector_size ? optarg->weight_vector[sid - 1] : 0;
+ case grn_wv_dynamic :
+ /* todo : support hash with keys
+ if (s->keys) {
+ uint32_t key_size;
+ const char *key = _grn_table_key(ctx, s->keys, rid, &key_size);
+ // todo : change grn_select_optarg
+ return key ? optarg->func(s, key, key_size, sid, optarg->func_arg) : 0;
+ }
+ */
+ /* todo : cast */
+ return optarg->func(ctx, (void *)s, (void *)(intptr_t)rid, sid,
+ optarg->func_arg);
+ case grn_wv_constant :
+ return optarg->vector_size;
+ default :
+ return 1;
+ }
+}
+
+grn_rc
+grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
+ const char *string, unsigned int string_len,
+ grn_hash *s, grn_operator op, grn_select_optarg *optarg)
+{
+ int *w1, limit;
+ grn_id tid, *tp, max_size;
+ grn_rc rc = GRN_SUCCESS;
+ grn_hash *h;
+ grn_token_cursor *token_cursor;
+ unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
+ grn_obj *lexicon = ii->lexicon;
+ if (!lexicon || !ii || !string || !string_len || !s || !optarg) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len,
+ GRN_TOKEN_GET, token_flags))) {
+ grn_hash_close(ctx, h);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (!(max_size = optarg->max_size)) { max_size = 1048576; }
+ while (token_cursor->status != GRN_TOKEN_CURSOR_DONE &&
+ token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) {
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ if (grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&w1, NULL)) {
+ (*w1)++;
+ }
+ }
+ if (tid && token_cursor->curr_size) {
+ if (optarg->mode == GRN_OP_UNSPLIT) {
+ grn_table_search(ctx, lexicon, token_cursor->curr,
+ token_cursor->curr_size,
+ GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
+ }
+ if (optarg->mode == GRN_OP_PARTIAL) {
+ grn_table_search(ctx, lexicon, token_cursor->curr,
+ token_cursor->curr_size,
+ GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR);
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ {
+ grn_hash_cursor *c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0,
+ 0, -1, 0);
+ if (!c) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_hash_cursor_open on grn_ii_similar_search failed !");
+ grn_hash_close(ctx, h);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ while (grn_hash_cursor_next(ctx, c)) {
+ uint32_t es;
+ grn_hash_cursor_get_key_value(ctx, c, (void **) &tp, NULL, (void **) &w1);
+ if ((es = grn_ii_estimate_size(ctx, ii, *tp))) {
+ *w1 += max_size / es;
+ } else {
+ grn_hash_cursor_delete(ctx, c, NULL);
+ }
+ }
+ grn_hash_cursor_close(ctx, c);
+ }
+ limit = optarg->similarity_threshold
+ ? (optarg->similarity_threshold > GRN_HASH_SIZE(h)
+ ? GRN_HASH_SIZE(h)
+ : optarg->similarity_threshold)
+ : (GRN_HASH_SIZE(h) >> 3) + 1;
+ if (GRN_HASH_SIZE(h)) {
+ grn_id j, id;
+ int w2, rep;
+ grn_ii_cursor *c;
+ grn_posting *pos;
+ grn_wv_mode wvm = grn_wv_none;
+ grn_table_sort_optarg arg = {
+ GRN_TABLE_SORT_DESC|GRN_TABLE_SORT_BY_VALUE|GRN_TABLE_SORT_AS_NUMBER,
+ NULL,
+ NULL,
+ 0
+ };
+ grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0);
+ if (!sorted) {
+ GRN_LOG(ctx, GRN_LOG_ALERT,
+ "grn_hash_sort on grn_ii_similar_search failed !");
+ grn_hash_close(ctx, h);
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ grn_hash_sort(ctx, h, limit, sorted, &arg);
+ /* todo support subrec
+ rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
+ */
+ rep = 0;
+ if (optarg->func) {
+ wvm = grn_wv_dynamic;
+ } else if (optarg->vector_size) {
+ wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
+ }
+ for (j = 1; j <= limit; j++) {
+ grn_array_get_value(ctx, sorted, j, &id);
+ _grn_hash_get_key_value(ctx, h, id, (void **) &tp, (void **) &w1);
+ if (!*tp || !(c = grn_ii_cursor_open(ctx, ii, *tp, GRN_ID_NIL, GRN_ID_MAX,
+ rep
+ ? ii->n_elements
+ : ii->n_elements - 1, 0))) {
+ GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", *tp);
+ continue;
+ }
+ if (rep) {
+ while (grn_ii_cursor_next(ctx, c)) {
+ pos = c->post;
+ if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg)) > 0) {
+ while (grn_ii_cursor_next_pos(ctx, c)) {
+ res_add(ctx, s, (grn_rset_posinfo *) pos,
+ *w1 * w2 * (1 + pos->weight), op);
+ }
+ }
+ }
+ } else {
+ while (grn_ii_cursor_next(ctx, c)) {
+ pos = c->post;
+ if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg)) > 0) {
+ res_add(ctx, s, (grn_rset_posinfo *) pos,
+ *w1 * w2 * (pos->tf + pos->weight), op);
+ }
+ }
+ }
+ grn_ii_cursor_close(ctx, c);
+ }
+ grn_array_close(ctx, sorted);
+ }
+ grn_hash_close(ctx, h);
+ grn_ii_resolve_sel_and(ctx, s, op);
+ // grn_hash_cursor_clear(r);
+ return rc;
+}
+
+#define TERM_EXTRACT_EACH_POST 0
+#define TERM_EXTRACT_EACH_TERM 1
+
+grn_rc
+grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
+ unsigned int string_len, grn_hash *s,
+ grn_operator op, grn_select_optarg *optarg)
+{
+ grn_rset_posinfo pi;
+ grn_id tid;
+ const char *p, *pe;
+ grn_obj *nstr;
+ const char *normalized;
+ unsigned int normalized_length_in_bytes;
+ grn_ii_cursor *c;
+ grn_posting *pos;
+ int skip, rep, policy;
+ grn_rc rc = GRN_SUCCESS;
+ grn_wv_mode wvm = grn_wv_none;
+ if (!ii || !string || !string_len || !s || !optarg) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (!(nstr = grn_string_open(ctx, string, string_len, NULL, 0))) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ policy = optarg->max_interval;
+ if (optarg->func) {
+ wvm = grn_wv_dynamic;
+ } else if (optarg->vector_size) {
+ wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
+ }
+ /* todo support subrec
+ if (policy == TERM_EXTRACT_EACH_POST) {
+ if ((rc = grn_records_reopen(s, grn_rec_section, grn_rec_none, 0))) { goto exit; }
+ }
+ rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
+ */
+ rep = 0;
+ grn_string_get_normalized(ctx, nstr, &normalized, &normalized_length_in_bytes,
+ NULL);
+ for (p = normalized, pe = p + normalized_length_in_bytes; p < pe; p += skip) {
+ if ((tid = grn_table_lcp_search(ctx, ii->lexicon, p, pe - p))) {
+ if (policy == TERM_EXTRACT_EACH_POST) {
+ if (!(skip = grn_table_get_key(ctx, ii->lexicon, tid, NULL, 0))) { break; }
+ } else {
+ if (!(skip = (int)grn_charlen(ctx, p, pe))) { break; }
+ }
+ if (!(c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX,
+ rep
+ ? ii->n_elements
+ : ii->n_elements - 1, 0))) {
+ GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", tid);
+ continue;
+ }
+ if (rep) {
+ while (grn_ii_cursor_next(ctx, c)) {
+ pos = c->post;
+ while (grn_ii_cursor_next_pos(ctx, c)) {
+ res_add(ctx, s, (grn_rset_posinfo *) pos,
+ get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op);
+ }
+ }
+ } else {
+ while (grn_ii_cursor_next(ctx, c)) {
+ if (policy == TERM_EXTRACT_EACH_POST) {
+ pi.rid = c->post->rid;
+ pi.sid = p - normalized;
+ res_add(ctx, s, &pi, pi.sid + 1, op);
+ } else {
+ pos = c->post;
+ res_add(ctx, s, (grn_rset_posinfo *) pos,
+ get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op);
+ }
+ }
+ }
+ grn_ii_cursor_close(ctx, c);
+ } else {
+ if (!(skip = (int)grn_charlen(ctx, p, pe))) {
+ break;
+ }
+ }
+ }
+ grn_obj_close(ctx, nstr);
+ return rc;
+}
+
+typedef struct {
+ grn_id rid;
+ uint32_t sid;
+ uint32_t start_pos;
+ uint32_t end_pos;
+ uint32_t tf;
+ uint32_t weight;
+} grn_ii_select_cursor_posting;
+
+typedef struct {
+ btr *bt;
+ grn_ii *ii;
+ token_info **tis;
+ uint32_t n_tis;
+ int max_interval;
+ grn_operator mode;
+ grn_ii_select_cursor_posting posting;
+ const char *string;
+ unsigned int string_len;
+ grn_bool done;
+ grn_ii_select_cursor_posting unshifted_posting;
+ grn_bool have_unshifted_posting;
+} grn_ii_select_cursor;
+
+static grn_rc
+grn_ii_select_cursor_close(grn_ctx *ctx,
+ grn_ii_select_cursor *cursor)
+{
+ token_info **tip;
+
+ if (!cursor) {
+ return GRN_SUCCESS;
+ }
+
+ for (tip = cursor->tis; tip < cursor->tis + cursor->n_tis; tip++) {
+ if (*tip) {
+ token_info_close(ctx, *tip);
+ }
+ }
+ if (cursor->tis) {
+ GRN_FREE(cursor->tis);
+ }
+ bt_close(ctx, cursor->bt);
+ GRN_FREE(cursor);
+
+ return GRN_SUCCESS;
+}
+
+static grn_ii_select_cursor *
+grn_ii_select_cursor_open(grn_ctx *ctx,
+ grn_ii *ii,
+ const char *string,
+ unsigned int string_len,
+ grn_select_optarg *optarg)
+{
+ grn_operator mode = GRN_OP_EXACT;
+ grn_ii_select_cursor *cursor;
+
+ if (string_len == 0) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[ii][select][cursor][open] empty string");
+ return NULL;
+ }
+
+ if (optarg) {
+ mode = optarg->mode;
+ }
+ switch (mode) {
+ case GRN_OP_EXACT :
+ case GRN_OP_FUZZY :
+ case GRN_OP_NEAR :
+ case GRN_OP_NEAR2 :
+ break;
+ default :
+ ERR(GRN_INVALID_ARGUMENT,
+ "[ii][select][cursor][open] "
+ "EXACT, FUZZY, NEAR and NEAR2 are only supported mode: %-.256s",
+ grn_operator_to_string(mode));
+ break;
+ }
+
+ cursor = GRN_CALLOC(sizeof(grn_ii_select_cursor));
+ if (!cursor) {
+ ERR(ctx->rc,
+ "[ii][select][cursor][open] failed to allocate cursor: %-.256s",
+ ctx->errbuf);
+ return NULL;
+ }
+
+ cursor->ii = ii;
+ cursor->mode = mode;
+
+ if (!(cursor->tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) {
+ ERR(ctx->rc,
+ "[ii][select][cursor][open] failed to allocate token info container: %-.256s",
+ ctx->errbuf);
+ GRN_FREE(cursor);
+ return NULL;
+ }
+ cursor->n_tis = 0;
+ if (cursor->mode == GRN_OP_FUZZY) {
+ grn_bool only_skip_token = GRN_FALSE;
+ grn_id previous_min = GRN_ID_NIL;
+ if (token_info_build_fuzzy(ctx, ii->lexicon, ii, string, string_len,
+ cursor->tis, &(cursor->n_tis),
+ &only_skip_token, previous_min,
+ cursor->mode, &(optarg->fuzzy)) != GRN_SUCCESS) {
+ grn_ii_select_cursor_close(ctx, cursor);
+ return NULL;
+ }
+ } else {
+ grn_bool only_skip_token = GRN_FALSE;
+ grn_id previous_min = GRN_ID_NIL;
+ if (token_info_build(ctx, ii->lexicon, ii, string, string_len,
+ cursor->tis, &(cursor->n_tis),
+ &only_skip_token, previous_min,
+ cursor->mode) != GRN_SUCCESS) {
+ grn_ii_select_cursor_close(ctx, cursor);
+ return NULL;
+ }
+ }
+ if (cursor->n_tis == 0) {
+ grn_ii_select_cursor_close(ctx, cursor);
+ return NULL;
+ }
+
+ switch (cursor->mode) {
+ case GRN_OP_NEAR2 :
+ token_info_clear_offset(cursor->tis, cursor->n_tis);
+ cursor->mode = GRN_OP_NEAR;
+ /* fallthru */
+ case GRN_OP_NEAR :
+ if (!(cursor->bt = bt_open(ctx, cursor->n_tis))) {
+ ERR(ctx->rc,
+ "[ii][select][cursor][open] failed to allocate btree: %-.256s",
+ ctx->errbuf);
+ grn_ii_select_cursor_close(ctx, cursor);
+ return NULL;
+ }
+ cursor->max_interval = optarg->max_interval;
+ break;
+ default :
+ break;
+ }
+ qsort(cursor->tis, cursor->n_tis, sizeof(token_info *), token_compare);
+ GRN_LOG(ctx, GRN_LOG_INFO,
+ "[ii][select][cursor][open] n=%d <%.*s>",
+ cursor->n_tis,
+ string_len, string);
+
+ cursor->string = string;
+ cursor->string_len = string_len;
+
+ cursor->done = GRN_FALSE;
+
+ cursor->have_unshifted_posting = GRN_FALSE;
+
+ return cursor;
+}
+
+static grn_ii_select_cursor_posting *
+grn_ii_select_cursor_next(grn_ctx *ctx,
+ grn_ii_select_cursor *cursor)
+{
+ btr *bt = cursor->bt;
+ token_info **tis = cursor->tis;
+ token_info **tie = tis + cursor->n_tis;
+ uint32_t n_tis = cursor->n_tis;
+ int max_interval = cursor->max_interval;
+ grn_operator mode = cursor->mode;
+
+ if (cursor->have_unshifted_posting) {
+ cursor->have_unshifted_posting = GRN_FALSE;
+ return &(cursor->unshifted_posting);
+ }
+
+ if (cursor->done) {
+ return NULL;
+ }
+
+ for (;;) {
+ grn_id rid;
+ grn_id sid;
+ grn_id next_rid;
+ grn_id next_sid;
+ token_info **tip;
+
+ rid = (*tis)->p->rid;
+ sid = (*tis)->p->sid;
+ for (tip = tis + 1, next_rid = rid, next_sid = sid + 1;
+ tip < tie;
+ tip++) {
+ token_info *ti = *tip;
+ if (token_info_skip(ctx, ti, rid, sid)) { return NULL; }
+ if (ti->p->rid != rid || ti->p->sid != sid) {
+ next_rid = ti->p->rid;
+ next_sid = ti->p->sid;
+ break;
+ }
+ }
+
+ if (tip == tie) {
+ int start_pos = 0;
+ int pos = 0;
+ int end_pos = 0;
+ int score = 0;
+ int tf = 0;
+ int tscore = 0;
+
+#define SKIP_OR_BREAK(pos) {\
+ if (token_info_skip_pos(ctx, ti, rid, sid, pos)) { break; } \
+ if (ti->p->rid != rid || ti->p->sid != sid) { \
+ next_rid = ti->p->rid; \
+ next_sid = ti->p->sid; \
+ break; \
+ } \
+}
+
+#define RETURN_POSTING() do { \
+ cursor->posting.rid = rid; \
+ cursor->posting.sid = sid; \
+ cursor->posting.start_pos = start_pos; \
+ cursor->posting.end_pos = end_pos; \
+ cursor->posting.tf = tf; \
+ cursor->posting.weight = tscore; \
+ if (token_info_skip_pos(ctx, *tis, rid, sid, pos) != GRN_SUCCESS) { \
+ if (token_info_skip(ctx, *tis, next_rid, next_sid) != GRN_SUCCESS) { \
+ cursor->done = GRN_TRUE; \
+ } \
+ } \
+ return &(cursor->posting); \
+} while (GRN_FALSE)
+
+ if (n_tis == 1) {
+ start_pos = pos = end_pos = (*tis)->p->pos;
+ pos++;
+ tf = (*tis)->p->tf;
+ tscore = (*tis)->p->weight + (*tis)->cursors->bins[0]->weight;
+ RETURN_POSTING();
+ } else if (mode == GRN_OP_NEAR) {
+ bt_zap(bt);
+ for (tip = tis; tip < tie; tip++) {
+ token_info *ti = *tip;
+ SKIP_OR_BREAK(pos);
+ bt_push(bt, ti);
+ }
+ if (tip == tie) {
+ for (;;) {
+ token_info *ti;
+ int min;
+ int max;
+
+ ti = bt->min;
+ min = ti->pos;
+ max = bt->max->pos;
+ if (min > max) {
+ char ii_name[GRN_TABLE_MAX_KEY_SIZE];
+ int ii_name_size;
+ ii_name_size = grn_obj_name(ctx,
+ (grn_obj *)(cursor->ii),
+ ii_name,
+ GRN_TABLE_MAX_KEY_SIZE);
+ ERR(GRN_FILE_CORRUPT,
+ "[ii][select][cursor][near] "
+ "max position must be larger than min position: "
+ "min:<%d> max:<%d> ii:<%.*s> string:<%.*s>",
+ min, max,
+ ii_name_size, ii_name,
+ cursor->string_len,
+ cursor->string);
+ return NULL;
+ }
+ if ((max_interval < 0) || (max - min <= max_interval)) {
+ /* TODO: Set start_pos, pos, end_pos, tf and tscore */
+ RETURN_POSTING();
+ if (ti->pos == max + 1) {
+ break;
+ }
+ SKIP_OR_BREAK(max + 1);
+ } else {
+ if (ti->pos == max - max_interval) {
+ break;
+ }
+ SKIP_OR_BREAK(max - max_interval);
+ }
+ bt_pop(bt);
+ }
+ }
+ } else {
+ int count = 0;
+ for (tip = tis; ; tip++) {
+ token_info *ti;
+
+ if (tip == tie) { tip = tis; }
+ ti = *tip;
+ SKIP_OR_BREAK(pos);
+ if (ti->pos == pos) {
+ score += ti->p->weight + ti->cursors->bins[0]->weight;
+ count++;
+ if (ti->p->pos > end_pos) {
+ end_pos = ti->p->pos;
+ }
+ } else {
+ score = ti->p->weight + ti->cursors->bins[0]->weight;
+ count = 1;
+ start_pos = pos = ti->pos;
+ end_pos = ti->p->pos;
+ }
+ if (count == n_tis) {
+ pos++;
+ if (ti->p->pos > end_pos) {
+ end_pos = ti->p->pos;
+ }
+ tf = 1;
+ tscore += score;
+ RETURN_POSTING();
+ }
+ }
+ }
+#undef SKIP_OR_BREAK
+ }
+ if (token_info_skip(ctx, *tis, next_rid, next_sid)) {
+ return NULL;
+ }
+ }
+}
+
+static void
+grn_ii_select_cursor_unshift(grn_ctx *ctx,
+ grn_ii_select_cursor *cursor,
+ grn_ii_select_cursor_posting *posting)
+{
+ cursor->unshifted_posting = *posting;
+ cursor->have_unshifted_posting = GRN_TRUE;
+}
+
+static grn_rc
+grn_ii_parse_regexp_query(grn_ctx *ctx,
+ const char *log_tag,
+ const char *string, unsigned int string_len,
+ grn_obj *parsed_strings)
+{
+ grn_bool escaping = GRN_FALSE;
+ int nth_char = 0;
+ const char *current = string;
+ const char *string_end = string + string_len;
+ grn_obj buffer;
+
+ GRN_TEXT_INIT(&buffer, 0);
+ while (current < string_end) {
+ const char *target;
+ int char_len;
+
+ char_len = grn_charlen(ctx, current, string_end);
+ if (char_len == 0) {
+ GRN_OBJ_FIN(ctx, &buffer);
+ ERR(GRN_INVALID_ARGUMENT,
+ "%-.256s invalid encoding character: <%.*s|%#x|>",
+ log_tag,
+ (int)(current - string), string,
+ *current);
+ return ctx->rc;
+ }
+ target = current;
+ current += char_len;
+
+ if (escaping) {
+ escaping = GRN_FALSE;
+ if (char_len == 1) {
+ switch (*target) {
+ case 'A' :
+ if (nth_char == 0) {
+ target = GRN_TOKENIZER_BEGIN_MARK_UTF8;
+ char_len = GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN;
+ }
+ break;
+ case 'z' :
+ if (current == string_end) {
+ target = GRN_TOKENIZER_END_MARK_UTF8;
+ char_len = GRN_TOKENIZER_END_MARK_UTF8_LEN;
+ }
+ break;
+ default :
+ break;
+ }
+ }
+ } else {
+ if (char_len == 1) {
+ if (*target == '\\') {
+ escaping = GRN_TRUE;
+ continue;
+ } else if (*target == '.' &&
+ grn_charlen(ctx, current, string_end) == 1 &&
+ *current == '*') {
+ if (GRN_TEXT_LEN(&buffer) > 0) {
+ grn_vector_add_element(ctx,
+ parsed_strings,
+ GRN_TEXT_VALUE(&buffer),
+ GRN_TEXT_LEN(&buffer),
+ 0,
+ GRN_DB_TEXT);
+ GRN_BULK_REWIND(&buffer);
+ }
+ current++;
+ nth_char++;
+ continue;
+ }
+ }
+ }
+
+ GRN_TEXT_PUT(ctx, &buffer, target, char_len);
+ nth_char++;
+ }
+ if (GRN_TEXT_LEN(&buffer) > 0) {
+ grn_vector_add_element(ctx,
+ parsed_strings,
+ GRN_TEXT_VALUE(&buffer),
+ GRN_TEXT_LEN(&buffer),
+ 0,
+ GRN_DB_TEXT);
+ }
+ GRN_OBJ_FIN(ctx, &buffer);
+
+ return GRN_SUCCESS;
+}
+
+static grn_rc
+grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
+ const char *string, unsigned int string_len,
+ grn_hash *s, grn_operator op, grn_select_optarg *optarg)
+{
+ grn_rc rc;
+ grn_obj parsed_strings;
+ unsigned int n_parsed_strings;
+
+ GRN_TEXT_INIT(&parsed_strings, GRN_OBJ_VECTOR);
+ rc = grn_ii_parse_regexp_query(ctx, "[ii][select][regexp]",
+ string, string_len, &parsed_strings);
+ if (rc != GRN_SUCCESS) {
+ GRN_OBJ_FIN(ctx, &parsed_strings);
+ return rc;
+ }
+
+ if (optarg) {
+ optarg->mode = GRN_OP_EXACT;
+ }
+
+ n_parsed_strings = grn_vector_size(ctx, &parsed_strings);
+ if (n_parsed_strings == 1) {
+ const char *parsed_string;
+ unsigned int parsed_string_len;
+ parsed_string_len = grn_vector_get_element(ctx,
+ &parsed_strings,
+ 0,
+ &parsed_string,
+ NULL,
+ NULL);
+ rc = grn_ii_select(ctx, ii,
+ parsed_string,
+ parsed_string_len,
+ s, op, optarg);
+ } else {
+ int i;
+ grn_ii_select_cursor **cursors;
+ grn_bool have_error = GRN_FALSE;
+
+ cursors = GRN_CALLOC(sizeof(grn_ii_select_cursor *) * n_parsed_strings);
+ for (i = 0; i < n_parsed_strings; i++) {
+ const char *parsed_string;
+ unsigned int parsed_string_len;
+ parsed_string_len = grn_vector_get_element(ctx,
+ &parsed_strings,
+ i,
+ &parsed_string,
+ NULL,
+ NULL);
+ cursors[i] = grn_ii_select_cursor_open(ctx,
+ ii,
+ parsed_string,
+ parsed_string_len,
+ optarg);
+ if (!cursors[i]) {
+ have_error = GRN_TRUE;
+ break;
+ }
+ }
+
+ while (!have_error) {
+ grn_ii_select_cursor_posting *posting;
+ uint32_t pos;
+
+ posting = grn_ii_select_cursor_next(ctx, cursors[0]);
+ if (!posting) {
+ break;
+ }
+
+ pos = posting->end_pos;
+ for (i = 1; i < n_parsed_strings; i++) {
+ grn_ii_select_cursor_posting *posting_i;
+
+ for (;;) {
+ posting_i = grn_ii_select_cursor_next(ctx, cursors[i]);
+ if (!posting_i) {
+ break;
+ }
+
+ if (posting_i->rid == posting->rid &&
+ posting_i->sid == posting->sid &&
+ posting_i->start_pos > pos) {
+ grn_ii_select_cursor_unshift(ctx, cursors[i], posting_i);
+ break;
+ }
+ if (posting_i->rid > posting->rid) {
+ grn_ii_select_cursor_unshift(ctx, cursors[i], posting_i);
+ break;
+ }
+ }
+
+ if (!posting_i) {
+ break;
+ }
+
+ if (posting_i->rid != posting->rid || posting_i->sid != posting->sid) {
+ break;
+ }
+
+ pos = posting_i->end_pos;
+ }
+
+ if (i == n_parsed_strings) {
+ grn_rset_posinfo pi = {posting->rid, posting->sid, pos};
+ double record_score = 1.0;
+ res_add(ctx, s, &pi, record_score, op);
+ }
+ }
+
+ for (i = 0; i < n_parsed_strings; i++) {
+ if (cursors[i]) {
+ grn_ii_select_cursor_close(ctx, cursors[i]);
+ }
+ }
+ GRN_FREE(cursors);
+ }
+ GRN_OBJ_FIN(ctx, &parsed_strings);
+
+ if (optarg) {
+ optarg->mode = GRN_OP_REGEXP;
+ }
+
+ return rc;
+}
+
+#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
+static grn_bool
+grn_ii_select_sequential_search_should_use(grn_ctx *ctx,
+ grn_ii *ii,
+ const char *raw_query,
+ unsigned int raw_query_len,
+ grn_hash *result,
+ grn_operator op,
+ grn_wv_mode wvm,
+ grn_select_optarg *optarg,
+ token_info **token_infos,
+ uint32_t n_token_infos,
+ double too_many_index_match_ratio)
+{
+ int n_sources;
+
+ if (too_many_index_match_ratio < 0.0) {
+ return GRN_FALSE;
+ }
+
+ if (op != GRN_OP_AND) {
+ return GRN_FALSE;
+ }
+
+ if (optarg->mode != GRN_OP_EXACT) {
+ return GRN_FALSE;
+ }
+
+ n_sources = ii->obj.source_size / sizeof(grn_id);
+ if (n_sources == 0) {
+ return GRN_FALSE;
+ }
+
+ {
+ uint32_t i;
+ int n_existing_records;
+
+ n_existing_records = GRN_HASH_SIZE(result);
+ for (i = 0; i < n_token_infos; i++) {
+ token_info *info = token_infos[i];
+ if (n_existing_records <= (info->size * too_many_index_match_ratio)) {
+ return GRN_TRUE;
+ }
+ }
+ return GRN_FALSE;
+ }
+}
+
+static void
+grn_ii_select_sequential_search_body(grn_ctx *ctx,
+ grn_ii *ii,
+ grn_obj *normalizer,
+ grn_encoding encoding,
+ OnigRegex regex,
+ grn_hash *result,
+ grn_operator op,
+ grn_wv_mode wvm,
+ grn_select_optarg *optarg)
+{
+ int i, n_sources;
+ grn_id *source_ids = ii->obj.source;
+ grn_obj buffer;
+
+ GRN_TEXT_INIT(&buffer, 0);
+ n_sources = ii->obj.source_size / sizeof(grn_id);
+ for (i = 0; i < n_sources; i++) {
+ grn_id source_id = source_ids[i];
+ grn_obj *source;
+ grn_obj *accessor;
+
+ source = grn_ctx_at(ctx, source_id);
+ switch (source->header.type) {
+ case GRN_TABLE_HASH_KEY :
+ case GRN_TABLE_PAT_KEY :
+ case GRN_TABLE_DAT_KEY :
+ accessor = grn_obj_column(ctx,
+ (grn_obj *)result,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN);
+ break;
+ default :
+ {
+ char column_name[GRN_TABLE_MAX_KEY_SIZE];
+ int column_name_size;
+ column_name_size = grn_column_name(ctx, source,
+ column_name,
+ GRN_TABLE_MAX_KEY_SIZE);
+ accessor = grn_obj_column(ctx, (grn_obj *)result, column_name,
+ column_name_size);
+ }
+ break;
+ }
+
+ {
+ grn_hash_cursor *cursor;
+ grn_id id;
+ cursor = grn_hash_cursor_open(ctx, result, NULL, 0, NULL, 0, 0, -1, 0);
+ while ((id = grn_hash_cursor_next(ctx, cursor)) != GRN_ID_NIL) {
+ OnigPosition position;
+ grn_obj *value;
+ const char *normalized_value;
+ unsigned int normalized_value_length;
+
+ GRN_BULK_REWIND(&buffer);
+ grn_obj_get_value(ctx, accessor, id, &buffer);
+ value = grn_string_open_(ctx,
+ GRN_TEXT_VALUE(&buffer),
+ GRN_TEXT_LEN(&buffer),
+ normalizer, 0, encoding);
+ grn_string_get_normalized(ctx, value,
+ &normalized_value, &normalized_value_length,
+ NULL);
+ position = onig_search(regex,
+ normalized_value,
+ normalized_value + normalized_value_length,
+ normalized_value,
+ normalized_value + normalized_value_length,
+ NULL,
+ 0);
+ if (position != ONIG_MISMATCH) {
+ grn_id *record_id;
+ grn_rset_posinfo info;
+ double score;
+
+ grn_hash_cursor_get_key(ctx, cursor, (void **)&record_id);
+
+ info.rid = *record_id;
+ info.sid = i + 1;
+ info.pos = 0;
+ score = get_weight(ctx, result, info.rid, info.sid, wvm, optarg);
+ res_add(ctx, result, &info, score, op);
+ }
+ grn_obj_unlink(ctx, value);
+ }
+ grn_hash_cursor_close(ctx, cursor);
+ }
+ grn_obj_unlink(ctx, accessor);
+ }
+ grn_obj_unlink(ctx, &buffer);
+}
+
+static grn_bool
+grn_ii_select_sequential_search(grn_ctx *ctx,
+ grn_ii *ii,
+ const char *raw_query,
+ unsigned int raw_query_len,
+ grn_hash *result,
+ grn_operator op,
+ grn_wv_mode wvm,
+ grn_select_optarg *optarg,
+ token_info **token_infos,
+ uint32_t n_token_infos)
+{
+ grn_bool processed = GRN_TRUE;
+
+ {
+ if (!grn_ii_select_sequential_search_should_use(ctx,
+ ii,
+ raw_query,
+ raw_query_len,
+ result,
+ op,
+ wvm,
+ optarg,
+ token_infos,
+ n_token_infos,
+ grn_ii_select_too_many_index_match_ratio)) {
+ return GRN_FALSE;
+ }
+ }
+
+ {
+ grn_encoding encoding;
+ grn_obj *normalizer;
+ int nflags = 0;
+ grn_obj *query;
+ const char *normalized_query;
+ unsigned int normalized_query_length;
+
+ grn_table_get_info(ctx, ii->lexicon,
+ NULL, &encoding, NULL, &normalizer, NULL);
+ query = grn_string_open_(ctx, raw_query, raw_query_len,
+ normalizer, nflags, encoding);
+ grn_string_get_normalized(ctx, query,
+ &normalized_query, &normalized_query_length,
+ NULL);
+ {
+ OnigRegex regex;
+ int onig_result;
+ OnigErrorInfo error_info;
+ onig_result = onig_new(&regex,
+ normalized_query,
+ normalized_query + normalized_query_length,
+ ONIG_OPTION_NONE,
+ ONIG_ENCODING_UTF8,
+ ONIG_SYNTAX_ASIS,
+ &error_info);
+ if (onig_result == ONIG_NORMAL) {
+ grn_ii_select_sequential_search_body(ctx, ii, normalizer, encoding,
+ regex, result, op, wvm, optarg);
+ onig_free(regex);
+ } else {
+ char message[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(message, onig_result, error_info);
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[ii][select][sequential] "
+ "failed to create regular expression object: %-.256s",
+ message);
+ processed = GRN_FALSE;
+ }
+ }
+ grn_obj_unlink(ctx, query);
+ }
+
+ return processed;
+}
+#endif
+
+grn_rc
+grn_ii_select(grn_ctx *ctx, grn_ii *ii,
+ const char *string, unsigned int string_len,
+ grn_hash *s, grn_operator op, grn_select_optarg *optarg)
+{
+ btr *bt = NULL;
+ grn_rc rc = GRN_SUCCESS;
+ int rep, orp, weight, max_interval = 0;
+ token_info *ti, **tis = NULL, **tip, **tie;
+ uint32_t n = 0, rid, sid, nrid, nsid;
+ grn_bool only_skip_token = GRN_FALSE;
+ grn_operator mode = GRN_OP_EXACT;
+ grn_wv_mode wvm = grn_wv_none;
+ grn_obj *lexicon = ii->lexicon;
+ grn_scorer_score_func *score_func = NULL;
+ grn_scorer_matched_record record;
+ grn_id previous_min = GRN_ID_NIL;
+ grn_id current_min = GRN_ID_NIL;
+ grn_bool set_min_enable_for_and_query = GRN_FALSE;
+
+ if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; }
+ if (optarg) {
+ mode = optarg->mode;
+ if (optarg->func) {
+ wvm = grn_wv_dynamic;
+ } else if (optarg->vector_size) {
+ wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
+ }
+ if (optarg->match_info) {
+ if (optarg->match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ previous_min = optarg->match_info->min;
+ set_min_enable_for_and_query = GRN_TRUE;
+ }
+ }
+ }
+ if (mode == GRN_OP_SIMILAR) {
+ return grn_ii_similar_search(ctx, ii, string, string_len, s, op, optarg);
+ }
+ if (mode == GRN_OP_TERM_EXTRACT) {
+ return grn_ii_term_extract(ctx, ii, string, string_len, s, op, optarg);
+ }
+ if (mode == GRN_OP_REGEXP) {
+ return grn_ii_select_regexp(ctx, ii, string, string_len, s, op, optarg);
+ }
+ /* todo : support subrec
+ rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
+ orp = (s->record_unit == grn_rec_position || op == GRN_OP_OR);
+ */
+ rep = 0;
+ orp = op == GRN_OP_OR;
+ if (!string_len) { goto exit; }
+ if (!(tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (mode == GRN_OP_FUZZY) {
+ if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len,
+ tis, &n, &only_skip_token, previous_min,
+ mode, &(optarg->fuzzy)) ||
+ !n) {
+ goto exit;
+ }
+ } else {
+ if (token_info_build(ctx, lexicon, ii, string, string_len,
+ tis, &n, &only_skip_token, previous_min, mode) ||
+ !n) {
+ goto exit;
+ }
+ }
+ switch (mode) {
+ case GRN_OP_NEAR2 :
+ token_info_clear_offset(tis, n);
+ mode = GRN_OP_NEAR;
+ /* fallthru */
+ case GRN_OP_NEAR :
+ if (!(bt = bt_open(ctx, n))) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; }
+ max_interval = optarg->max_interval;
+ break;
+ default :
+ break;
+ }
+ qsort(tis, n, sizeof(token_info *), token_compare);
+ tie = tis + n;
+ /*
+ for (tip = tis; tip < tie; tip++) {
+ ti = *tip;
+ grn_log("o=%d n=%d s=%d r=%d", ti->offset, ti->ntoken, ti->size, ti->rid);
+ }
+ */
+ GRN_LOG(ctx, GRN_LOG_INFO, "n=%d (%.*s)", n, string_len, string);
+ /* todo : array as result
+ if (n == 1 && (*tis)->cursors->n_entries == 1 && op == GRN_OP_OR
+ && !GRN_HASH_SIZE(s) && !s->garbages
+ && s->record_unit == grn_rec_document && !s->max_n_subrecs
+ && grn_ii_max_section(ii) == 1) {
+ grn_ii_cursor *c = (*tis)->cursors->bins[0];
+ if ((rc = grn_hash_array_init(s, (*tis)->size + 32768))) { goto exit; }
+ do {
+ grn_rset_recinfo *ri;
+ grn_posting *p = c->post;
+ if ((weight = get_weight(ctx, s, p->rid, p->sid, wvm, optarg))) {
+ GRN_HASH_INT_ADD(s, p, ri);
+ ri->score = (p->tf + p->score) * weight;
+ ri->n_subrecs = 1;
+ }
+ } while (grn_ii_cursor_next(ctx, c));
+ goto exit;
+ }
+ */
+#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
+ if (grn_ii_select_sequential_search(ctx, ii, string, string_len,
+ s, op, wvm, optarg, tis, n)) {
+ goto exit;
+ }
+#endif
+
+ if (optarg && optarg->scorer) {
+ grn_proc *scorer = (grn_proc *)(optarg->scorer);
+ score_func = scorer->callbacks.scorer.score;
+ record.table = grn_ctx_at(ctx, s->obj.header.domain);
+ record.lexicon = lexicon;
+ record.id = GRN_ID_NIL;
+ GRN_RECORD_INIT(&(record.terms), GRN_OBJ_VECTOR, lexicon->header.domain);
+ GRN_UINT32_INIT(&(record.term_weights), GRN_OBJ_VECTOR);
+ record.total_term_weights = 0;
+ record.n_documents = grn_table_size(ctx, record.table);
+ record.n_occurrences = 0;
+ record.n_candidates = 0;
+ record.n_tokens = 0;
+ record.weight = 0;
+ record.args_expr = optarg->scorer_args_expr;
+ record.args_expr_offset = optarg->scorer_args_expr_offset;
+ }
+
+ for (;;) {
+ rid = (*tis)->p->rid;
+ sid = (*tis)->p->sid;
+ for (tip = tis + 1, nrid = rid, nsid = sid + 1; tip < tie; tip++) {
+ ti = *tip;
+ if (token_info_skip(ctx, ti, rid, sid)) { goto exit; }
+ if (ti->p->rid != rid || ti->p->sid != sid) {
+ nrid = ti->p->rid;
+ nsid = ti->p->sid;
+ break;
+ }
+ }
+ weight = get_weight(ctx, s, rid, sid, wvm, optarg);
+ if (tip == tie && weight != 0) {
+ grn_rset_posinfo pi = {rid, sid, 0};
+ if (orp || grn_hash_get(ctx, s, &pi, s->key_size, NULL)) {
+ int count = 0, noccur = 0, pos = 0, score = 0, tscore = 0, min, max;
+
+ if (score_func) {
+ GRN_BULK_REWIND(&(record.terms));
+ GRN_BULK_REWIND(&(record.term_weights));
+ record.n_candidates = 0;
+ record.n_tokens = 0;
+ }
+
+#define SKIP_OR_BREAK(pos) {\
+ if (token_info_skip_pos(ctx, ti, rid, sid, pos)) { break; } \
+ if (ti->p->rid != rid || ti->p->sid != sid) { \
+ nrid = ti->p->rid; \
+ nsid = ti->p->sid; \
+ break; \
+ } \
+}
+ if (n == 1 && !rep) {
+ noccur = (*tis)->p->tf;
+ tscore = (*tis)->p->weight + (*tis)->cursors->bins[0]->weight;
+ if (score_func) {
+ GRN_RECORD_PUT(ctx, &(record.terms), (*tis)->cursors->bins[0]->id);
+ GRN_UINT32_PUT(ctx, &(record.term_weights), tscore);
+ record.n_occurrences = noccur;
+ record.n_candidates = (*tis)->size;
+ record.n_tokens = (*tis)->ntoken;
+ }
+ } else if (mode == GRN_OP_NEAR) {
+ bt_zap(bt);
+ for (tip = tis; tip < tie; tip++) {
+ ti = *tip;
+ SKIP_OR_BREAK(pos);
+ bt_push(bt, ti);
+ }
+ if (tip == tie) {
+ for (;;) {
+ ti = bt->min; min = ti->pos; max = bt->max->pos;
+ if (min > max) {
+ char ii_name[GRN_TABLE_MAX_KEY_SIZE];
+ int ii_name_size;
+ ii_name_size = grn_obj_name(ctx, (grn_obj *)ii, ii_name,
+ GRN_TABLE_MAX_KEY_SIZE);
+ ERR(GRN_FILE_CORRUPT,
+ "[ii][select][near] "
+ "max position must be larger than min position: "
+ "min:<%d> max:<%d> ii:<%.*s> string:<%.*s>",
+ min, max,
+ ii_name_size, ii_name,
+ string_len, string);
+ rc = ctx->rc;
+ goto exit;
+ }
+ if ((max_interval < 0) || (max - min <= max_interval)) {
+ if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); }
+ noccur++;
+ if (ti->pos == max + 1) {
+ break;
+ }
+ SKIP_OR_BREAK(max + 1);
+ } else {
+ if (ti->pos == max - max_interval) {
+ break;
+ }
+ SKIP_OR_BREAK(max - max_interval);
+ }
+ bt_pop(bt);
+ }
+ }
+ } else {
+ for (tip = tis; ; tip++) {
+ if (tip == tie) { tip = tis; }
+ ti = *tip;
+ SKIP_OR_BREAK(pos);
+ if (ti->pos == pos) {
+ score += ti->p->weight + ti->cursors->bins[0]->weight; count++;
+ } else {
+ score = ti->p->weight + ti->cursors->bins[0]->weight; count = 1;
+ pos = ti->pos;
+ if (noccur == 0 && score_func) {
+ GRN_BULK_REWIND(&(record.terms));
+ GRN_BULK_REWIND(&(record.term_weights));
+ record.n_candidates = 0;
+ record.n_tokens = 0;
+ }
+ }
+ if (noccur == 0 && score_func) {
+ GRN_RECORD_PUT(ctx, &(record.terms), ti->cursors->bins[0]->id);
+ GRN_UINT32_PUT(ctx, &(record.term_weights),
+ ti->p->weight + ti->cursors->bins[0]->weight);
+ record.n_candidates += ti->size;
+ record.n_tokens += ti->ntoken;
+ }
+ if (count == n) {
+ if (rep) {
+ pi.pos = pos; res_add(ctx, s, &pi, (score + 1) * weight, op);
+ }
+ tscore += score;
+ score = 0; count = 0; pos++;
+ noccur++;
+ }
+ }
+ }
+ if (noccur && !rep) {
+ double record_score;
+ if (score_func) {
+ record.id = rid;
+ record.weight = weight;
+ record.n_occurrences = noccur;
+ record.total_term_weights = tscore;
+ record_score = score_func(ctx, &record) * weight;
+ } else {
+ record_score = (noccur + tscore) * weight;
+ }
+ if (set_min_enable_for_and_query) {
+ if (current_min == GRN_ID_NIL) {
+ current_min = rid;
+ }
+ }
+ res_add(ctx, s, &pi, record_score, op);
+ }
+#undef SKIP_OR_BREAK
+ }
+ }
+ if (token_info_skip(ctx, *tis, nrid, nsid)) { goto exit; }
+ }
+exit :
+ if (score_func) {
+ GRN_OBJ_FIN(ctx, &(record.terms));
+ GRN_OBJ_FIN(ctx, &(record.term_weights));
+ }
+
+ if (set_min_enable_for_and_query) {
+ if (current_min > previous_min) {
+ optarg->match_info->min = current_min;
+ }
+ }
+
+ for (tip = tis; tip < tis + n; tip++) {
+ if (*tip) { token_info_close(ctx, *tip); }
+ }
+ if (tis) { GRN_FREE(tis); }
+ if (!only_skip_token) {
+ grn_ii_resolve_sel_and(ctx, s, op);
+ }
+ // grn_hash_cursor_clear(r);
+ bt_close(ctx, bt);
+#ifdef DEBUG
+ {
+ uint32_t segno = GRN_II_MAX_LSEG, nnref = 0;
+ grn_io_mapinfo *info = ii->seg->maps;
+ for (; segno; segno--, info++) { if (info->nref) { nnref++; } }
+ GRN_LOG(ctx, GRN_LOG_INFO, "nnref=%d", nnref);
+ }
+#endif /* DEBUG */
+ return rc;
+}
+
+static uint32_t
+grn_ii_estimate_size_for_query_regexp(grn_ctx *ctx, grn_ii *ii,
+ const char *query, unsigned int query_len,
+ grn_search_optarg *optarg)
+{
+ grn_rc rc;
+ grn_obj parsed_query;
+ uint32_t size;
+
+ GRN_TEXT_INIT(&parsed_query, 0);
+ rc = grn_ii_parse_regexp_query(ctx, "[ii][estimate-size][query][regexp]",
+ query, query_len, &parsed_query);
+ if (rc != GRN_SUCCESS) {
+ GRN_OBJ_FIN(ctx, &parsed_query);
+ return 0;
+ }
+
+ if (optarg) {
+ optarg->mode = GRN_OP_EXACT;
+ }
+
+ size = grn_ii_estimate_size_for_query(ctx, ii,
+ GRN_TEXT_VALUE(&parsed_query),
+ GRN_TEXT_LEN(&parsed_query),
+ optarg);
+ GRN_OBJ_FIN(ctx, &parsed_query);
+
+ if (optarg) {
+ optarg->mode = GRN_OP_REGEXP;
+ }
+
+ return size;
+}
+
+uint32_t
+grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
+ const char *query, unsigned int query_len,
+ grn_search_optarg *optarg)
+{
+ grn_rc rc;
+ grn_obj *lexicon = ii->lexicon;
+ token_info **tis = NULL;
+ uint32_t i;
+ uint32_t n_tis = 0;
+ grn_bool only_skip_token = GRN_FALSE;
+ grn_operator mode = GRN_OP_EXACT;
+ double estimated_size = 0;
+ double normalized_ratio = 1.0;
+ grn_id min = GRN_ID_NIL;
+
+ if (query_len == 0) {
+ return 0;
+ }
+
+ if (optarg) {
+ switch (optarg->mode) {
+ case GRN_OP_NEAR :
+ case GRN_OP_NEAR2 :
+ mode = optarg->mode;
+ break;
+ case GRN_OP_SIMILAR :
+ mode = optarg->mode;
+ break;
+ case GRN_OP_REGEXP :
+ mode = optarg->mode;
+ break;
+ case GRN_OP_FUZZY :
+ mode = optarg->mode;
+ default :
+ break;
+ }
+ if (optarg->match_info.flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ min = optarg->match_info.min;
+ }
+ }
+
+ if (mode == GRN_OP_REGEXP) {
+ return grn_ii_estimate_size_for_query_regexp(ctx, ii, query, query_len,
+ optarg);
+ }
+
+ tis = GRN_MALLOC(sizeof(token_info *) * query_len * 2);
+ if (!tis) {
+ return 0;
+ }
+
+ switch (mode) {
+ case GRN_OP_FUZZY :
+ rc = token_info_build_fuzzy(ctx, lexicon, ii, query, query_len,
+ tis, &n_tis, &only_skip_token, min,
+ mode, &(optarg->fuzzy));
+ break;
+ default :
+ rc = token_info_build(ctx, lexicon, ii, query, query_len,
+ tis, &n_tis, &only_skip_token, min, mode);
+ break;
+ }
+
+ if (rc != GRN_SUCCESS) {
+ goto exit;
+ }
+
+ for (i = 0; i < n_tis; i++) {
+ token_info *ti = tis[i];
+ double term_estimated_size;
+ term_estimated_size = ((double)ti->size / ti->ntoken);
+ if (i == 0) {
+ estimated_size = term_estimated_size;
+ } else {
+ if (term_estimated_size < estimated_size) {
+ estimated_size = term_estimated_size;
+ }
+ normalized_ratio *= grn_ii_estimate_size_for_query_reduce_ratio;
+ }
+ }
+
+ estimated_size *= normalized_ratio;
+ if (estimated_size > 0.0 && estimated_size < 1.0) {
+ estimated_size = 1.0;
+ }
+
+exit :
+ for (i = 0; i < n_tis; i++) {
+ token_info *ti = tis[i];
+ if (ti) {
+ token_info_close(ctx, ti);
+ }
+ }
+ if (tis) {
+ GRN_FREE(tis);
+ }
+
+ return estimated_size;
+}
+
+uint32_t
+grn_ii_estimate_size_for_lexicon_cursor(grn_ctx *ctx, grn_ii *ii,
+ grn_table_cursor *lexicon_cursor)
+{
+ grn_id term_id;
+ uint32_t estimated_size = 0;
+
+ while ((term_id = grn_table_cursor_next(ctx, lexicon_cursor)) != GRN_ID_NIL) {
+ uint32_t term_estimated_size;
+ term_estimated_size = grn_ii_estimate_size(ctx, ii, term_id);
+ estimated_size += term_estimated_size;
+ }
+
+ return estimated_size;
+}
+
+grn_rc
+grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
+ grn_hash *s, grn_operator op, grn_search_optarg *optarg)
+{
+ ERRCLR(ctx);
+ GRN_LOG(ctx, GRN_LOG_INFO, "grn_ii_sel > (%.*s)", string_len, string);
+ {
+ grn_select_optarg arg;
+ if (!s) { return GRN_INVALID_ARGUMENT; }
+ memset(&arg, 0, sizeof(grn_select_optarg));
+ arg.mode = GRN_OP_EXACT;
+ if (optarg) {
+ switch (optarg->mode) {
+ case GRN_OP_NEAR :
+ case GRN_OP_NEAR2 :
+ arg.mode = optarg->mode;
+ arg.max_interval = optarg->max_interval;
+ break;
+ case GRN_OP_SIMILAR :
+ arg.mode = optarg->mode;
+ arg.similarity_threshold = optarg->similarity_threshold;
+ break;
+ case GRN_OP_REGEXP :
+ arg.mode = optarg->mode;
+ break;
+ case GRN_OP_FUZZY :
+ arg.mode = optarg->mode;
+ arg.fuzzy = optarg->fuzzy;
+ break;
+ default :
+ break;
+ }
+ if (optarg->vector_size != 0) {
+ arg.weight_vector = optarg->weight_vector;
+ arg.vector_size = optarg->vector_size;
+ }
+ arg.scorer = optarg->scorer;
+ arg.scorer_args_expr = optarg->scorer_args_expr;
+ arg.scorer_args_expr_offset = optarg->scorer_args_expr_offset;
+ arg.match_info = &(optarg->match_info);
+ }
+ /* todo : support subrec
+ grn_rset_init(ctx, s, grn_rec_document, 0, grn_rec_none, 0, 0);
+ */
+ if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
+ GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_select on grn_ii_sel(1) failed !");
+ return ctx->rc;
+ }
+ GRN_LOG(ctx, GRN_LOG_INFO, "exact: %d", GRN_HASH_SIZE(s));
+ if (op == GRN_OP_OR) {
+ grn_id min = GRN_ID_NIL;
+ if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) {
+ arg.mode = GRN_OP_UNSPLIT;
+ if (arg.match_info) {
+ if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ min = arg.match_info->min;
+ arg.match_info->min = GRN_ID_NIL;
+ }
+ }
+ if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "grn_ii_select on grn_ii_sel(2) failed !");
+ return ctx->rc;
+ }
+ GRN_LOG(ctx, GRN_LOG_INFO, "unsplit: %d", GRN_HASH_SIZE(s));
+ if (arg.match_info) {
+ if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ if (min > GRN_ID_NIL && min < arg.match_info->min) {
+ arg.match_info->min = min;
+ }
+ }
+ }
+ }
+ if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) {
+ arg.mode = GRN_OP_PARTIAL;
+ if (arg.match_info) {
+ if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ min = arg.match_info->min;
+ arg.match_info->min = GRN_ID_NIL;
+ }
+ }
+ if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
+ GRN_LOG(ctx, GRN_LOG_ERROR,
+ "grn_ii_select on grn_ii_sel(3) failed !");
+ return ctx->rc;
+ }
+ GRN_LOG(ctx, GRN_LOG_INFO, "partial: %d", GRN_HASH_SIZE(s));
+ if (arg.match_info) {
+ if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
+ if (min > GRN_ID_NIL && min < arg.match_info->min) {
+ arg.match_info->min = min;
+ }
+ }
+ }
+ }
+ }
+ GRN_LOG(ctx, GRN_LOG_INFO, "hits=%d", GRN_HASH_SIZE(s));
+ return GRN_SUCCESS;
+ }
+}
+
+grn_rc
+grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op)
+{
+ int rep = 0;
+ grn_ii_cursor *c;
+ grn_posting *pos;
+ if ((c = grn_ii_cursor_open(ctx, ii, id, GRN_ID_NIL, GRN_ID_MAX,
+ rep ? ii->n_elements : ii->n_elements - 1, 0))) {
+ while ((pos = grn_ii_cursor_next(ctx, c))) {
+ res_add(ctx, s, (grn_rset_posinfo *) pos, (1 + pos->weight), op);
+ }
+ grn_ii_cursor_close(ctx, c);
+ }
+ return ctx->rc;
+}
+
+void
+grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op)
+{
+ if (op == GRN_OP_AND
+ && !(ctx->flags & GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND)) {
+ grn_id eid;
+ grn_rset_recinfo *ri;
+ grn_hash_cursor *c = grn_hash_cursor_open(ctx, s, NULL, 0, NULL, 0,
+ 0, -1, 0);
+ if (c) {
+ while ((eid = grn_hash_cursor_next(ctx, c))) {
+ grn_hash_cursor_get_value(ctx, c, (void **) &ri);
+ if ((ri->n_subrecs & GRN_RSET_UTIL_BIT)) {
+ ri->n_subrecs &= ~GRN_RSET_UTIL_BIT;
+ } else {
+ grn_hash_delete_by_id(ctx, s, eid, NULL);
+ }
+ }
+ grn_hash_cursor_close(ctx, c);
+ }
+ }
+}
+
+void
+grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf)
+{
+ grn_obj key_buf;
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ int key_size;
+ int i = 0;
+ grn_ii_cursor_next_options options = {
+ .include_garbage = GRN_TRUE
+ };
+
+ GRN_TEXT_PUTS(ctx, buf, " #<");
+ key_size = grn_table_get_key(ctx, c->ii->lexicon, c->id,
+ key, GRN_TABLE_MAX_KEY_SIZE);
+ GRN_OBJ_INIT(&key_buf, GRN_BULK, 0, c->ii->lexicon->header.domain);
+ GRN_TEXT_SET(ctx, &key_buf, key, key_size);
+ grn_inspect(ctx, buf, &key_buf);
+ GRN_OBJ_FIN(ctx, &key_buf);
+
+ GRN_TEXT_PUTS(ctx, buf, "\n elements:[\n ");
+ while (grn_ii_cursor_next_internal(ctx, c, &options)) {
+ grn_posting *pos = c->post;
+ if (i > 0) {
+ GRN_TEXT_PUTS(ctx, buf, ",\n ");
+ }
+ i++;
+ GRN_TEXT_PUTS(ctx, buf, "{status:");
+ if (pos->tf && pos->sid) {
+ GRN_TEXT_PUTS(ctx, buf, "available");
+ } else {
+ GRN_TEXT_PUTS(ctx, buf, "garbage");
+ }
+ GRN_TEXT_PUTS(ctx, buf, ", rid:");
+ grn_text_lltoa(ctx, buf, pos->rid);
+ GRN_TEXT_PUTS(ctx, buf, ", sid:");
+ grn_text_lltoa(ctx, buf, pos->sid);
+ GRN_TEXT_PUTS(ctx, buf, ", pos:");
+ grn_text_lltoa(ctx, buf, pos->pos);
+ GRN_TEXT_PUTS(ctx, buf, ", tf:");
+ grn_text_lltoa(ctx, buf, pos->tf);
+ GRN_TEXT_PUTS(ctx, buf, ", weight:");
+ grn_text_lltoa(ctx, buf, pos->weight);
+ GRN_TEXT_PUTS(ctx, buf, ", rest:");
+ grn_text_lltoa(ctx, buf, pos->rest);
+ GRN_TEXT_PUTS(ctx, buf, "}");
+ }
+ GRN_TEXT_PUTS(ctx, buf, "\n ]\n >");
+}
+
+void
+grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf)
+{
+ grn_table_cursor *tc;
+ GRN_TEXT_PUTS(ctx, buf, "[");
+ if ((tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0, 0, -1,
+ GRN_CURSOR_ASCENDING))) {
+ int i = 0;
+ grn_id tid;
+ grn_ii_cursor *c;
+ while ((tid = grn_table_cursor_next(ctx, tc))) {
+ if (i > 0) {
+ GRN_TEXT_PUTS(ctx, buf, ",");
+ }
+ i++;
+ GRN_TEXT_PUTS(ctx, buf, "\n");
+ if ((c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX,
+ ii->n_elements,
+ GRN_OBJ_WITH_POSITION|GRN_OBJ_WITH_SECTION))) {
+ grn_ii_cursor_inspect(ctx, c, buf);
+ grn_ii_cursor_close(ctx, c);
+ }
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ GRN_TEXT_PUTS(ctx, buf, "]");
+}
+
+/********************** buffered index builder ***********************/
+
+const grn_id II_BUFFER_TYPE_MASK = 0xc0000000;
+#define II_BUFFER_TYPE_RID 0x80000000
+#define II_BUFFER_TYPE_WEIGHT 0x40000000
+#define II_BUFFER_TYPE(id) (((id) & II_BUFFER_TYPE_MASK))
+#define II_BUFFER_PACK(value, type) ((value) | (type))
+#define II_BUFFER_UNPACK(id, type) ((id) & ~(type))
+#define II_BUFFER_ORDER GRN_CURSOR_BY_KEY
+const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 16380;
+const uint32_t II_BUFFER_PACKED_BUF_SIZE = 0x4000000;
+const char *TMPFILE_PATH = "grn_ii_buffer_tmp";
+const uint32_t II_BUFFER_NCOUNTERS_MARGIN = 0x100000;
+const size_t II_BUFFER_BLOCK_SIZE = 0x1000000;
+const uint32_t II_BUFFER_BLOCK_READ_UNIT_SIZE = 0x200000;
+
+typedef struct {
+ unsigned int sid; /* Section ID */
+ unsigned int weight; /* Weight */
+ const char *p; /* Value address */
+ uint32_t len; /* Value length */
+ char *buf; /* Buffer address */
+ uint32_t cap; /* Buffer size */
+} ii_buffer_value;
+
+/* ii_buffer_counter is associated with a combination of a block an a term. */
+typedef struct {
+ uint32_t nrecs; /* Number of records or sections */
+ uint32_t nposts; /* Number of occurrences */
+
+ /* Information of the last value */
+ grn_id last_rid; /* Record ID */
+ uint32_t last_sid; /* Section ID */
+ uint32_t last_tf; /* Term frequency */
+ uint32_t last_weight; /* Total weight */
+ uint32_t last_pos; /* Token position */
+
+ /* Meaning of offset_* is different before/after encoding. */
+ /* Before encoding: size in encoded sequence */
+ /* After encoding: Offset in encoded sequence */
+ uint32_t offset_rid; /* Record ID */
+ uint32_t offset_sid; /* Section ID */
+ uint32_t offset_tf; /* Term frequency */
+ uint32_t offset_weight; /* Weight */
+ uint32_t offset_pos; /* Token position */
+} ii_buffer_counter;
+
+typedef struct {
+ off64_t head;
+ off64_t tail;
+ uint32_t nextsize;
+ uint8_t *buffer;
+ uint32_t buffersize;
+ uint8_t *bufcur;
+ uint32_t rest;
+ grn_id tid;
+ uint32_t nrecs;
+ uint32_t nposts;
+ grn_id *recs;
+ uint32_t *tfs;
+ uint32_t *posts;
+} ii_buffer_block;
+
+struct _grn_ii_buffer {
+ grn_obj *lexicon; /* Global lexicon */
+ grn_obj *tmp_lexicon; /* Temporary lexicon for each block */
+ ii_buffer_block *blocks; /* Blocks */
+ uint32_t nblocks; /* Number of blocks */
+ int tmpfd; /* Descriptor of temporary file */
+ char tmpfpath[PATH_MAX]; /* Path of temporary file */
+ uint64_t update_buffer_size;
+
+ // stuff for parsing
+ off64_t filepos; /* Write position of temporary file */
+ grn_id *block_buf; /* Buffer for the current block */
+ size_t block_buf_size; /* Size of block_buf */
+ size_t block_pos; /* Write position of block_buf */
+ ii_buffer_counter *counters; /* Status of terms */
+ uint32_t ncounters; /* Number of counters */
+ size_t total_size;
+ size_t curr_size;
+ ii_buffer_value *values; /* Values in block */
+ unsigned int nvalues; /* Number of values in block */
+ unsigned int max_nvalues; /* Size of values */
+ grn_id last_rid;
+
+ // stuff for merging
+ grn_ii *ii;
+ uint32_t lseg;
+ uint32_t dseg;
+ buffer *term_buffer;
+ datavec data_vectors[MAX_N_ELEMENTS + 1];
+ uint8_t *packed_buf;
+ size_t packed_buf_size;
+ size_t packed_len;
+ size_t total_chunk_size;
+};
+
+/* block_new returns a new ii_buffer_block to store block information. */
+static ii_buffer_block *
+block_new(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ ii_buffer_block *block;
+ if (!(ii_buffer->nblocks & 0x3ff)) {
+ ii_buffer_block *blocks;
+ if (!(blocks = GRN_REALLOC(ii_buffer->blocks,
+ (ii_buffer->nblocks + 0x400) *
+ sizeof(ii_buffer_block)))) {
+ return NULL;
+ }
+ ii_buffer->blocks = blocks;
+ }
+ block = &ii_buffer->blocks[ii_buffer->nblocks];
+ block->head = ii_buffer->filepos;
+ block->rest = 0;
+ block->buffer = NULL;
+ block->buffersize = 0;
+ return block;
+}
+
+/* allocate_outbuf allocates memory to flush a block. */
+static uint8_t *
+allocate_outbuf(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ size_t bufsize = 0, bufsize_ = 0;
+ uint32_t flags = ii_buffer->ii->header->flags;
+ ii_buffer_counter *counter = ii_buffer->counters;
+ grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon);
+ for (tid = 1; tid <= tid_max; counter++, tid++) {
+ counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
+ counter->last_rid = 0;
+ counter->last_tf = 0;
+ bufsize += 5;
+ bufsize += GRN_B_ENC_SIZE(counter->nrecs);
+ bufsize += GRN_B_ENC_SIZE(counter->nposts);
+ bufsize += counter->offset_rid;
+ if ((flags & GRN_OBJ_WITH_SECTION)) {
+ bufsize += counter->offset_sid;
+ }
+ bufsize += counter->offset_tf;
+ if ((flags & GRN_OBJ_WITH_WEIGHT)) {
+ bufsize += counter->offset_weight;
+ }
+ if ((flags & GRN_OBJ_WITH_POSITION)) {
+ bufsize += counter->offset_pos;
+ }
+ if (bufsize_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < bufsize) {
+ bufsize += sizeof(uint32_t);
+ bufsize_ = bufsize;
+ }
+ }
+ GRN_LOG(ctx, GRN_LOG_INFO, "flushing:%d bufsize:%" GRN_FMT_SIZE,
+ ii_buffer->nblocks, bufsize);
+ return (uint8_t *)GRN_MALLOC(bufsize);
+}
+
+/*
+ * The temporary file format is roughly as follows:
+ *
+ * File = Block...
+ * Block = Unit...
+ * Unit = TermChunk (key order)
+ * NextUnitSize (The first unit size is kept on memory)
+ * Chunk = Term...
+ * Term = ID (gtid)
+ * NumRecordsOrSections (nrecs), NumOccurrences (nposts)
+ * RecordID... (rid, diff)
+ * [SectionID... (sid, diff)]
+ * TermFrequency... (tf, diff)
+ * [Weight... (weight, diff)]
+ * [Position... (pos, diff)]
+ */
+
+/*
+ * encode_terms encodes terms in ii_buffer->tmp_lexicon and returns the
+ * expected temporary file size.
+ */
+static size_t
+encode_terms(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ uint8_t *outbuf, ii_buffer_block *block)
+{
+ grn_id tid;
+ uint8_t *outbufp = outbuf;
+ uint8_t *outbufp_ = outbuf;
+ grn_table_cursor *tc;
+ /* The first size is written into block->nextsize. */
+ uint8_t *pnext = (uint8_t *)&block->nextsize;
+ uint32_t flags = ii_buffer->ii->header->flags;
+ tc = grn_table_cursor_open(ctx, ii_buffer->tmp_lexicon,
+ NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER);
+ while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ int key_size = grn_table_get_key(ctx, ii_buffer->tmp_lexicon, tid,
+ key, GRN_TABLE_MAX_KEY_SIZE);
+ /* gtid is a global term ID, not in a temporary lexicon. */
+ grn_id gtid = grn_table_add(ctx, ii_buffer->lexicon, key, key_size, NULL);
+ ii_buffer_counter *counter = &ii_buffer->counters[tid - 1];
+ if (counter->nrecs) {
+ uint32_t offset_rid = counter->offset_rid;
+ uint32_t offset_sid = counter->offset_sid;
+ uint32_t offset_tf = counter->offset_tf;
+ uint32_t offset_weight = counter->offset_weight;
+ uint32_t offset_pos = counter->offset_pos;
+ GRN_B_ENC(gtid, outbufp);
+ GRN_B_ENC(counter->nrecs, outbufp);
+ GRN_B_ENC(counter->nposts, outbufp);
+ ii_buffer->total_size += counter->nrecs + counter->nposts;
+ counter->offset_rid = outbufp - outbuf;
+ outbufp += offset_rid;
+ if ((flags & GRN_OBJ_WITH_SECTION)) {
+ counter->offset_sid = outbufp - outbuf;
+ outbufp += offset_sid;
+ }
+ counter->offset_tf = outbufp - outbuf;
+ outbufp += offset_tf;
+ if ((flags & GRN_OBJ_WITH_WEIGHT)) {
+ counter->offset_weight = outbufp - outbuf;
+ outbufp += offset_weight;
+ }
+ if ((flags & GRN_OBJ_WITH_POSITION)) {
+ counter->offset_pos = outbufp - outbuf;
+ outbufp += offset_pos;
+ }
+ }
+ if (outbufp_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < outbufp) {
+ uint32_t size = outbufp - outbufp_ + sizeof(uint32_t);
+ grn_memcpy(pnext, &size, sizeof(uint32_t));
+ pnext = outbufp;
+ outbufp += sizeof(uint32_t);
+ outbufp_ = outbufp;
+ }
+ }
+ grn_table_cursor_close(ctx, tc);
+ if (outbufp_ < outbufp) {
+ uint32_t size = outbufp - outbufp_;
+ grn_memcpy(pnext, &size, sizeof(uint32_t));
+ }
+ return outbufp - outbuf;
+}
+
+/* encode_postings encodes data in ii_buffer->block_buf. */
+static void
+encode_postings(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf)
+{
+ grn_id rid = 0;
+ unsigned int sid = 1;
+ unsigned int weight = 0;
+ uint32_t pos = 0;
+ uint32_t rest;
+ grn_id *bp = ii_buffer->block_buf;
+ uint32_t flags = ii_buffer->ii->header->flags;
+ for (rest = ii_buffer->block_pos; rest; bp++, rest--) {
+ grn_id id = *bp;
+ switch (II_BUFFER_TYPE(id)) {
+ case II_BUFFER_TYPE_RID :
+ rid = II_BUFFER_UNPACK(id, II_BUFFER_TYPE_RID);
+ if ((flags & GRN_OBJ_WITH_SECTION) && rest) {
+ sid = *++bp;
+ rest--;
+ }
+ weight = 0;
+ pos = 0;
+ break;
+ case II_BUFFER_TYPE_WEIGHT :
+ weight = II_BUFFER_UNPACK(id, II_BUFFER_TYPE_WEIGHT);
+ break;
+ default :
+ {
+ ii_buffer_counter *counter = &ii_buffer->counters[id - 1];
+ if (counter->last_rid == rid && counter->last_sid == sid) {
+ counter->last_tf++;
+ counter->last_weight += weight;
+ } else {
+ if (counter->last_tf) {
+ uint8_t *p = outbuf + counter->offset_tf;
+ GRN_B_ENC(counter->last_tf - 1, p);
+ counter->offset_tf = p - outbuf;
+ if (flags & GRN_OBJ_WITH_WEIGHT) {
+ p = outbuf + counter->offset_weight;
+ GRN_B_ENC(counter->last_weight, p);
+ counter->offset_weight = p - outbuf;
+ }
+ }
+ {
+ uint8_t *p = outbuf + counter->offset_rid;
+ GRN_B_ENC(rid - counter->last_rid, p);
+ counter->offset_rid = p - outbuf;
+ }
+ if (flags & GRN_OBJ_WITH_SECTION) {
+ uint8_t *p = outbuf + counter->offset_sid;
+ if (counter->last_rid != rid) {
+ GRN_B_ENC(sid - 1, p);
+ } else {
+ GRN_B_ENC(sid - counter->last_sid - 1, p);
+ }
+ counter->offset_sid = p - outbuf;
+ }
+ counter->last_rid = rid;
+ counter->last_sid = sid;
+ counter->last_tf = 1;
+ counter->last_weight = weight;
+ counter->last_pos = 0;
+ }
+ if ((flags & GRN_OBJ_WITH_POSITION) && rest) {
+ uint8_t *p = outbuf + counter->offset_pos;
+ pos = *++bp;
+ rest--;
+ GRN_B_ENC(pos - counter->last_pos, p);
+ counter->offset_pos = p - outbuf;
+ counter->last_pos = pos;
+ }
+ }
+ break;
+ }
+ }
+}
+
+/* encode_last_tf encodes last_tf and last_weight in counters. */
+static void
+encode_last_tf(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf)
+{
+ ii_buffer_counter *counter = ii_buffer->counters;
+ grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon);
+ for (tid = 1; tid <= tid_max; counter++, tid++) {
+ uint8_t *p = outbuf + counter->offset_tf;
+ GRN_B_ENC(counter->last_tf - 1, p);
+ }
+ if ((ii_buffer->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+ for (tid = 1; tid <= tid_max; counter++, tid++) {
+ uint8_t *p = outbuf + counter->offset_weight;
+ GRN_B_ENC(counter->last_weight, p);
+ }
+ }
+}
+
+/*
+ * grn_ii_buffer_flush flushes the current block (ii_buffer->block_buf,
+ * counters and tmp_lexicon) to a temporary file (ii_buffer->tmpfd).
+ * Also, block information is stored into ii_buffer->blocks.
+ */
+static void
+grn_ii_buffer_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ size_t encsize;
+ uint8_t *outbuf;
+ ii_buffer_block *block;
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing:%d npostings:%" GRN_FMT_SIZE,
+ ii_buffer->nblocks, ii_buffer->block_pos);
+ if (!(block = block_new(ctx, ii_buffer))) { return; }
+ if (!(outbuf = allocate_outbuf(ctx, ii_buffer))) { return; }
+ encsize = encode_terms(ctx, ii_buffer, outbuf, block);
+ encode_postings(ctx, ii_buffer, outbuf);
+ encode_last_tf(ctx, ii_buffer, outbuf);
+ {
+ ssize_t r = grn_write(ii_buffer->tmpfd, outbuf, encsize);
+ if (r != encsize) {
+ ERR(GRN_INPUT_OUTPUT_ERROR,
+ "write returned %" GRN_FMT_LLD " != %" GRN_FMT_LLU,
+ (long long int)r, (unsigned long long int)encsize);
+ GRN_FREE(outbuf);
+ return;
+ }
+ ii_buffer->filepos += r;
+ block->tail = ii_buffer->filepos;
+ }
+ GRN_FREE(outbuf);
+ memset(ii_buffer->counters, 0,
+ grn_table_size(ctx, ii_buffer->tmp_lexicon) *
+ sizeof(ii_buffer_counter));
+ grn_obj_close(ctx, ii_buffer->tmp_lexicon);
+ GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed: %d encsize:%" GRN_FMT_SIZE,
+ ii_buffer->nblocks, encsize);
+ ii_buffer->tmp_lexicon = NULL;
+ ii_buffer->nblocks++;
+ ii_buffer->block_pos = 0;
+}
+
+const uint32_t PAT_CACHE_SIZE = 1<<20;
+
+/*
+ * get_tmp_lexicon returns a temporary lexicon.
+ *
+ * Note that a lexicon is created for each block and ii_buffer->tmp_lexicon is
+ * closed in grn_ii_buffer_flush.
+ */
+static grn_obj *
+get_tmp_lexicon(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ grn_obj *tmp_lexicon = ii_buffer->tmp_lexicon;
+ if (!tmp_lexicon) {
+ grn_obj *domain = grn_ctx_at(ctx, ii_buffer->lexicon->header.domain);
+ grn_obj *range = grn_ctx_at(ctx, DB_OBJ(ii_buffer->lexicon)->range);
+ grn_obj *tokenizer;
+ grn_obj *normalizer;
+ grn_obj *token_filters;
+ grn_table_flags flags;
+ grn_table_get_info(ctx, ii_buffer->lexicon, &flags, NULL,
+ &tokenizer, &normalizer, &token_filters);
+ flags &= ~GRN_OBJ_PERSISTENT;
+ tmp_lexicon = grn_table_create(ctx, NULL, 0, NULL, flags, domain, range);
+ if (tmp_lexicon) {
+ ii_buffer->tmp_lexicon = tmp_lexicon;
+ grn_obj_set_info(ctx, tmp_lexicon,
+ GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
+ grn_obj_set_info(ctx, tmp_lexicon,
+ GRN_INFO_NORMALIZER, normalizer);
+ grn_obj_set_info(ctx, tmp_lexicon,
+ GRN_INFO_TOKEN_FILTERS, token_filters);
+ if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
+ grn_pat_cache_enable(ctx, (grn_pat *)tmp_lexicon, PAT_CACHE_SIZE);
+ }
+ }
+ }
+ return tmp_lexicon;
+}
+
+/* get_buffer_counter returns a counter associated with tid. */
+static ii_buffer_counter *
+get_buffer_counter(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_obj *tmp_lexicon, grn_id tid)
+{
+ if (tid > ii_buffer->ncounters) {
+ ii_buffer_counter *counters;
+ uint32_t ncounters =
+ grn_table_size(ctx, tmp_lexicon) + II_BUFFER_NCOUNTERS_MARGIN;
+ counters = GRN_REALLOC(ii_buffer->counters,
+ ncounters * sizeof(ii_buffer_counter));
+ if (!counters) { return NULL; }
+ memset(&counters[ii_buffer->ncounters], 0,
+ (ncounters - ii_buffer->ncounters) * sizeof(ii_buffer_counter));
+ ii_buffer->ncounters = ncounters;
+ ii_buffer->counters = counters;
+ }
+ return &ii_buffer->counters[tid - 1];
+}
+
+/*
+ * grn_ii_buffer_tokenize_value tokenizes a value.
+ *
+ * The result is written into the current block (ii_buffer->tmp_lexicon,
+ * ii_buffer->block_buf, ii_buffer->counters, etc.).
+ */
+static void
+grn_ii_buffer_tokenize_value(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_id rid, const ii_buffer_value *value)
+{
+ grn_obj *tmp_lexicon;
+ if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) {
+ unsigned int token_flags = 0;
+ grn_token_cursor *token_cursor;
+ grn_id *buffer = ii_buffer->block_buf;
+ uint32_t block_pos = ii_buffer->block_pos;
+ uint32_t ii_flags = ii_buffer->ii->header->flags;
+ buffer[block_pos++] = II_BUFFER_PACK(rid, II_BUFFER_TYPE_RID);
+ if (ii_flags & GRN_OBJ_WITH_SECTION) {
+ buffer[block_pos++] = value->sid;
+ }
+ if (value->weight) {
+ buffer[block_pos++] = II_BUFFER_PACK(value->weight,
+ II_BUFFER_TYPE_WEIGHT);
+ }
+ if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon,
+ value->p, value->len,
+ GRN_TOKEN_ADD, token_flags))) {
+ while (!token_cursor->status) {
+ grn_id tid;
+ if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+ ii_buffer_counter *counter;
+ counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid);
+ if (!counter) { return; }
+ buffer[block_pos++] = tid;
+ if (ii_flags & GRN_OBJ_WITH_POSITION) {
+ buffer[block_pos++] = token_cursor->pos;
+ }
+ if (counter->last_rid != rid) {
+ counter->offset_rid += GRN_B_ENC_SIZE(rid - counter->last_rid);
+ counter->last_rid = rid;
+ counter->offset_sid += GRN_B_ENC_SIZE(value->sid - 1);
+ counter->last_sid = value->sid;
+ if (counter->last_tf) {
+ counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
+ counter->last_tf = 0;
+ counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
+ counter->last_weight = 0;
+ }
+ counter->last_pos = 0;
+ counter->nrecs++;
+ } else if (counter->last_sid != value->sid) {
+ counter->offset_rid += GRN_B_ENC_SIZE(0);
+ counter->offset_sid +=
+ GRN_B_ENC_SIZE(value->sid - counter->last_sid - 1);
+ counter->last_sid = value->sid;
+ if (counter->last_tf) {
+ counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
+ counter->last_tf = 0;
+ counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
+ counter->last_weight = 0;
+ }
+ counter->last_pos = 0;
+ counter->nrecs++;
+ }
+ counter->offset_pos +=
+ GRN_B_ENC_SIZE(token_cursor->pos - counter->last_pos);
+ counter->last_pos = token_cursor->pos;
+ counter->last_tf++;
+ counter->last_weight += value->weight;
+ counter->nposts++;
+ }
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+ ii_buffer->block_pos = block_pos;
+ }
+}
+
+/*
+ * grn_ii_buffer_tokenize tokenizes ii_buffer->values.
+ *
+ * grn_ii_buffer_tokenize estimates the size of tokenized values.
+ * If the remaining space of the current block is not enough to store the new
+ * tokenized values, the current block is flushed.
+ * Then, grn_ii_buffer_tokenize tokenizes values.
+ */
+static void
+grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid)
+{
+ unsigned int i;
+ uint32_t est_len = 0;
+ for (i = 0; i < ii_buffer->nvalues; i++) {
+ est_len += ii_buffer->values[i].len * 2 + 2;
+ }
+ if (ii_buffer->block_buf_size < ii_buffer->block_pos + est_len) {
+ grn_ii_buffer_flush(ctx, ii_buffer);
+ }
+ if (ii_buffer->block_buf_size < est_len) {
+ grn_id *block_buf = (grn_id *)GRN_REALLOC(ii_buffer->block_buf,
+ est_len * sizeof(grn_id));
+ if (block_buf) {
+ ii_buffer->block_buf = block_buf;
+ ii_buffer->block_buf_size = est_len;
+ }
+ }
+
+ for (i = 0; i < ii_buffer->nvalues; i++) {
+ const ii_buffer_value *value = &ii_buffer->values[i];
+ if (value->len) {
+ uint32_t est_len = value->len * 2 + 2;
+ if (ii_buffer->block_buf_size >= ii_buffer->block_pos + est_len) {
+ grn_ii_buffer_tokenize_value(ctx, ii_buffer, rid, value);
+ }
+ }
+ }
+ ii_buffer->nvalues = 0;
+}
+
+/* grn_ii_buffer_fetch fetches the next term. */
+static void
+grn_ii_buffer_fetch(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ ii_buffer_block *block)
+{
+ if (!block->rest) {
+ /* Read the next unit. */
+ if (block->head < block->tail) {
+ size_t bytesize = block->nextsize;
+ if (block->buffersize < block->nextsize) {
+ void *r = GRN_REALLOC(block->buffer, bytesize);
+ if (r) {
+ block->buffer = (uint8_t *)r;
+ block->buffersize = block->nextsize;
+ } else {
+ GRN_LOG(ctx, GRN_LOG_WARNING, "realloc: %" GRN_FMT_LLU,
+ (unsigned long long int)bytesize);
+ return;
+ }
+ }
+ {
+ off64_t seeked_position;
+ seeked_position = grn_lseek(ii_buffer->tmpfd, block->head, SEEK_SET);
+ if (seeked_position != block->head) {
+ ERRNO_ERR("failed to "
+ "grn_lseek(%" GRN_FMT_OFF64_T ") -> %" GRN_FMT_OFF64_T,
+ block->head,
+ seeked_position);
+ return;
+ }
+ }
+ {
+ size_t read_bytesize;
+ read_bytesize = grn_read(ii_buffer->tmpfd, block->buffer, bytesize);
+ if (read_bytesize != bytesize) {
+ SERR("failed to grn_read(%" GRN_FMT_SIZE ") -> %" GRN_FMT_SIZE,
+ bytesize, read_bytesize);
+ return;
+ }
+ }
+ block->head += bytesize;
+ block->bufcur = block->buffer;
+ if (block->head >= block->tail) {
+ if (block->head > block->tail) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "fetch error: %" GRN_FMT_INT64D " > %" GRN_FMT_INT64D,
+ block->head, block->tail);
+ }
+ block->rest = block->nextsize;
+ block->nextsize = 0;
+ } else {
+ block->rest = block->nextsize - sizeof(uint32_t);
+ grn_memcpy(&block->nextsize,
+ &block->buffer[block->rest], sizeof(uint32_t));
+ }
+ }
+ }
+ if (block->rest) {
+ uint8_t *p = block->bufcur;
+ GRN_B_DEC(block->tid, p);
+ GRN_B_DEC(block->nrecs, p);
+ GRN_B_DEC(block->nposts, p);
+ block->rest -= (p - block->bufcur);
+ block->bufcur = p;
+ } else {
+ block->tid = 0;
+ }
+}
+
+/* grn_ii_buffer_chunk_flush flushes the current buffer for packed postings. */
+static void
+grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ grn_io_win io_win;
+ uint32_t chunk_number;
+ chunk_new(ctx, ii_buffer->ii, &chunk_number, ii_buffer->packed_len);
+ GRN_LOG(ctx, GRN_LOG_INFO, "chunk:%d, packed_len:%" GRN_FMT_SIZE,
+ chunk_number, ii_buffer->packed_len);
+ fake_map(ctx, ii_buffer->ii->chunk, &io_win, ii_buffer->packed_buf,
+ chunk_number, ii_buffer->packed_len);
+ grn_io_win_unmap(&io_win);
+ ii_buffer->term_buffer->header.chunk = chunk_number;
+ ii_buffer->term_buffer->header.chunk_size = ii_buffer->packed_len;
+ ii_buffer->term_buffer->header.buffer_free =
+ S_SEGMENT - sizeof(buffer_header) -
+ ii_buffer->term_buffer->header.nterms * sizeof(buffer_term);
+ ii_buffer->term_buffer->header.nterms_void = 0;
+ buffer_segment_update(ii_buffer->ii, ii_buffer->lseg, ii_buffer->dseg);
+ ii_buffer->ii->header->total_chunk_size += ii_buffer->packed_len;
+ ii_buffer->total_chunk_size += ii_buffer->packed_len;
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "nterms=%d chunk=%d total=%" GRN_FMT_INT64U "KB",
+ ii_buffer->term_buffer->header.nterms,
+ ii_buffer->term_buffer->header.chunk_size,
+ ii_buffer->ii->header->total_chunk_size >> 10);
+ ii_buffer->term_buffer = NULL;
+ ii_buffer->packed_buf = NULL;
+ ii_buffer->packed_len = 0;
+ ii_buffer->packed_buf_size = 0;
+ ii_buffer->curr_size = 0;
+}
+
+/*
+ * merge_hit_blocks merges hit blocks into ii_buffer->data_vectors.
+ * merge_hit_blocks returns the estimated maximum size in bytes.
+ */
+static size_t
+merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ ii_buffer_block *hits[], int nhits)
+{
+ uint64_t nrecs = 0;
+ uint64_t nposts = 0;
+ size_t max_size;
+ uint64_t flags = ii_buffer->ii->header->flags;
+ int i;
+ for (i = 0; i < nhits; i++) {
+ ii_buffer_block *block = hits[i];
+ nrecs += block->nrecs;
+ nposts += block->nposts;
+ }
+ ii_buffer->curr_size += nrecs + nposts;
+ max_size = nrecs * (ii_buffer->ii->n_elements);
+ if (flags & GRN_OBJ_WITH_POSITION) { max_size += nposts - nrecs; }
+ datavec_reset(ctx, ii_buffer->data_vectors,
+ ii_buffer->ii->n_elements, nrecs, max_size);
+ {
+ int i;
+ uint32_t lr = 0; /* Last rid */
+ uint64_t spos = 0;
+ uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL;
+ {
+ /* Get write positions in datavec. */
+ int j = 0;
+ ridp = ii_buffer->data_vectors[j++].data;
+ if (flags & GRN_OBJ_WITH_SECTION) {
+ sidp = ii_buffer->data_vectors[j++].data;
+ }
+ tfp = ii_buffer->data_vectors[j++].data;
+ if (flags & GRN_OBJ_WITH_WEIGHT) {
+ weightp = ii_buffer->data_vectors[j++].data;
+ }
+ if (flags & GRN_OBJ_WITH_POSITION) {
+ posp = ii_buffer->data_vectors[j++].data;
+ }
+ }
+ for (i = 0; i < nhits; i++) {
+ /* Read postings from hit blocks and join the postings into datavec. */
+ ii_buffer_block *block = hits[i];
+ uint8_t *p = block->bufcur;
+ uint32_t n = block->nrecs;
+ if (n) {
+ GRN_B_DEC(*ridp, p);
+ *ridp -= lr;
+ lr += *ridp++;
+ while (--n) {
+ GRN_B_DEC(*ridp, p);
+ lr += *ridp++;
+ }
+ }
+ if ((flags & GRN_OBJ_WITH_SECTION)) {
+ for (n = block->nrecs; n; n--) {
+ GRN_B_DEC(*sidp++, p);
+ }
+ }
+ for (n = block->nrecs; n; n--) {
+ GRN_B_DEC(*tfp++, p);
+ }
+ if ((flags & GRN_OBJ_WITH_WEIGHT)) {
+ for (n = block->nrecs; n; n--) {
+ GRN_B_DEC(*weightp++, p);
+ }
+ }
+ if ((flags & GRN_OBJ_WITH_POSITION)) {
+ for (n = block->nposts; n; n--) {
+ GRN_B_DEC(*posp, p);
+ spos += *posp++;
+ }
+ }
+ block->rest -= (p - block->bufcur);
+ block->bufcur = p;
+ grn_ii_buffer_fetch(ctx, ii_buffer, block);
+ }
+ {
+ /* Set size and flags of datavec. */
+ int j = 0;
+ uint32_t f_s = (nrecs < 3) ? 0 : USE_P_ENC;
+ uint32_t f_d = ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC;
+ ii_buffer->data_vectors[j].data_size = nrecs;
+ ii_buffer->data_vectors[j++].flags = f_d;
+ if ((flags & GRN_OBJ_WITH_SECTION)) {
+ ii_buffer->data_vectors[j].data_size = nrecs;
+ ii_buffer->data_vectors[j++].flags = f_s;
+ }
+ ii_buffer->data_vectors[j].data_size = nrecs;
+ ii_buffer->data_vectors[j++].flags = f_s;
+ if ((flags & GRN_OBJ_WITH_WEIGHT)) {
+ ii_buffer->data_vectors[j].data_size = nrecs;
+ ii_buffer->data_vectors[j++].flags = f_s;
+ }
+ if ((flags & GRN_OBJ_WITH_POSITION)) {
+ uint32_t f_p = (((nposts < 32) ||
+ (nposts <= (spos >> 13))) ? 0 : USE_P_ENC);
+ ii_buffer->data_vectors[j].data_size = nposts;
+ ii_buffer->data_vectors[j++].flags = f_p|ODD;
+ }
+ }
+ }
+ return (max_size + ii_buffer->ii->n_elements) * 4;
+}
+
+static buffer *
+get_term_buffer(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ if (!ii_buffer->term_buffer) {
+ uint32_t lseg;
+ void *term_buffer;
+ for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
+ if (ii_buffer->ii->header->binfo[lseg] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
+ }
+ if (lseg == GRN_II_MAX_LSEG) {
+ DEFINE_NAME(ii_buffer->ii);
+ MERR("[ii][buffer][term-buffer] couldn't find a free buffer: "
+ "<%.*s>",
+ name_size, name);
+ return NULL;
+ }
+ ii_buffer->lseg = lseg;
+ ii_buffer->dseg = segment_get(ctx, ii_buffer->ii);
+ GRN_IO_SEG_REF(ii_buffer->ii->seg, ii_buffer->dseg, term_buffer);
+ ii_buffer->term_buffer = (buffer *)term_buffer;
+ }
+ return ii_buffer->term_buffer;
+}
+
+/*
+ * try_in_place_packing tries to pack a posting in an array element.
+ *
+ * The requirements are as follows:
+ * - nposts == 1
+ * - nhits == 1 && nrecs == 1 && tf == 0
+ * - weight == 0
+ * - !(flags & GRN_OBJ_WITH_SECTION) || (rid < 0x100000 && sid < 0x800)
+ */
+static grn_bool
+try_in_place_packing(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_id tid, ii_buffer_block *hits[], int nhits)
+{
+ if (nhits == 1 && hits[0]->nrecs == 1 && hits[0]->nposts == 1) {
+ grn_id rid;
+ uint32_t sid = 1, tf, pos = 0, weight = 0;
+ ii_buffer_block *block = hits[0];
+ uint8_t *p = block->bufcur;
+ uint32_t flags = ii_buffer->ii->header->flags;
+ GRN_B_DEC(rid, p);
+ if (flags & GRN_OBJ_WITH_SECTION) {
+ GRN_B_DEC(sid, p);
+ sid++;
+ }
+ GRN_B_DEC(tf, p);
+ if (tf != 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "tf=%d", tf); }
+ if (flags & GRN_OBJ_WITH_WEIGHT) { GRN_B_DEC(weight, p); }
+ if (flags & GRN_OBJ_WITH_POSITION) { GRN_B_DEC(pos, p); }
+ if (!weight) {
+ if (flags & GRN_OBJ_WITH_SECTION) {
+ if (rid < 0x100000 && sid < 0x800) {
+ uint32_t *a = array_get(ctx, ii_buffer->ii, tid);
+ a[0] = (rid << 12) + (sid << 1) + 1;
+ a[1] = pos;
+ array_unref(ii_buffer->ii, tid);
+ } else {
+ return GRN_FALSE;
+ }
+ } else {
+ uint32_t *a = array_get(ctx, ii_buffer->ii, tid);
+ a[0] = (rid << 1) + 1;
+ a[1] = pos;
+ array_unref(ii_buffer->ii, tid);
+ }
+ block->rest -= (p - block->bufcur);
+ block->bufcur = p;
+ grn_ii_buffer_fetch(ctx, ii_buffer, block);
+ return GRN_TRUE;
+ }
+ }
+ return GRN_FALSE;
+}
+
+/* grn_ii_buffer_merge merges hit blocks and pack it. */
+static void
+grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_id tid, ii_buffer_block *hits[], int nhits)
+{
+ if (!try_in_place_packing(ctx, ii_buffer, tid, hits, nhits)) {
+ /* Merge hit blocks and reserve a buffer for packed data. */
+ size_t max_size = merge_hit_blocks(ctx, ii_buffer, hits, nhits);
+ if (ii_buffer->packed_buf &&
+ ii_buffer->packed_buf_size < ii_buffer->packed_len + max_size) {
+ grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ }
+ if (!ii_buffer->packed_buf) {
+ size_t buf_size = (max_size > II_BUFFER_PACKED_BUF_SIZE)
+ ? max_size : II_BUFFER_PACKED_BUF_SIZE;
+ if ((ii_buffer->packed_buf = GRN_MALLOC(buf_size))) {
+ ii_buffer->packed_buf_size = buf_size;
+ }
+ }
+ {
+ /* Pack postings into the current buffer. */
+ uint16_t nterm;
+ size_t packed_len;
+ buffer_term *bt;
+ uint32_t *a;
+ buffer *term_buffer;
+
+ a = array_get(ctx, ii_buffer->ii, tid);
+ if (!a) {
+ DEFINE_NAME(ii_buffer->ii);
+ MERR("[ii][buffer][merge] failed to allocate an array: "
+ "<%.*s>: "
+ "<%u>",
+ name_size, name,
+ tid);
+ return;
+ }
+ term_buffer = get_term_buffer(ctx, ii_buffer);
+ if (!term_buffer) {
+ DEFINE_NAME(ii_buffer->ii);
+ MERR("[ii][buffer][merge] failed to allocate a term buffer: "
+ "<%.*s>: "
+ "<%u>",
+ name_size, name,
+ tid);
+ return;
+ }
+ nterm = term_buffer->header.nterms++;
+ bt = &term_buffer->terms[nterm];
+ a[0] = SEG2POS(ii_buffer->lseg,
+ (sizeof(buffer_header) + sizeof(buffer_term) * nterm));
+ packed_len = grn_p_encv(ctx, ii_buffer->data_vectors,
+ ii_buffer->ii->n_elements,
+ ii_buffer->packed_buf +
+ ii_buffer->packed_len);
+ a[1] = ii_buffer->data_vectors[0].data_size;
+ bt->tid = tid;
+ bt->size_in_buffer = 0;
+ bt->pos_in_buffer = 0;
+ bt->size_in_chunk = packed_len;
+ bt->pos_in_chunk = ii_buffer->packed_len;
+ ii_buffer->packed_len += packed_len;
+ if (((ii_buffer->curr_size * ii_buffer->update_buffer_size) +
+ (ii_buffer->total_size * term_buffer->header.nterms * 16)) >=
+ (ii_buffer->total_size * II_BUFFER_NTERMS_PER_BUFFER * 16)) {
+ grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ }
+ array_unref(ii_buffer->ii, tid);
+ }
+ }
+}
+
+grn_ii_buffer *
+grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii,
+ long long unsigned int update_buffer_size)
+{
+ if (ii && ii->lexicon) {
+ grn_ii_buffer *ii_buffer = GRN_MALLOCN(grn_ii_buffer, 1);
+ if (ii_buffer) {
+ ii_buffer->ii = ii;
+ ii_buffer->lexicon = ii->lexicon;
+ ii_buffer->tmp_lexicon = NULL;
+ ii_buffer->nblocks = 0;
+ ii_buffer->blocks = NULL;
+ ii_buffer->ncounters = II_BUFFER_NCOUNTERS_MARGIN;
+ ii_buffer->block_pos = 0;
+ ii_buffer->filepos = 0;
+ ii_buffer->curr_size = 0;
+ ii_buffer->total_size = 0;
+ ii_buffer->update_buffer_size = update_buffer_size;
+ ii_buffer->counters = GRN_CALLOC(ii_buffer->ncounters *
+ sizeof(ii_buffer_counter));
+ ii_buffer->term_buffer = NULL;
+ ii_buffer->packed_buf = NULL;
+ ii_buffer->packed_len = 0;
+ ii_buffer->packed_buf_size = 0;
+ ii_buffer->total_chunk_size = 0;
+ ii_buffer->values = NULL;
+ ii_buffer->nvalues = 0;
+ ii_buffer->max_nvalues = 0;
+ ii_buffer->last_rid = 0;
+ if (ii_buffer->counters) {
+ ii_buffer->block_buf = GRN_MALLOCN(grn_id, II_BUFFER_BLOCK_SIZE);
+ if (ii_buffer->block_buf) {
+ grn_snprintf(ii_buffer->tmpfpath, PATH_MAX, PATH_MAX,
+ "%-.256sXXXXXX", grn_io_path(ii->seg));
+ ii_buffer->block_buf_size = II_BUFFER_BLOCK_SIZE;
+ ii_buffer->tmpfd = grn_mkstemp(ii_buffer->tmpfpath);
+ if (ii_buffer->tmpfd != -1) {
+ grn_table_flags flags;
+ grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL,
+ NULL);
+ if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
+ grn_pat_cache_enable(ctx, (grn_pat *)ii->lexicon,
+ PAT_CACHE_SIZE);
+ }
+ return ii_buffer;
+ } else {
+ SERR("failed grn_mkstemp(%-.256s)",
+ ii_buffer->tmpfpath);
+ }
+ GRN_FREE(ii_buffer->block_buf);
+ }
+ GRN_FREE(ii_buffer->counters);
+ }
+ GRN_FREE(ii_buffer);
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "ii or ii->lexicon is NULL");
+ }
+ return NULL;
+}
+
+static void
+ii_buffer_value_init(grn_ctx *ctx, ii_buffer_value *value)
+{
+ value->sid = 0;
+ value->weight = 0;
+ value->p = NULL;
+ value->len = 0;
+ value->buf = NULL;
+ value->cap = 0;
+}
+
+static void
+ii_buffer_value_fin(grn_ctx *ctx, ii_buffer_value *value)
+{
+ if (value->buf) {
+ GRN_FREE(value->buf);
+ }
+}
+
+/*
+ * ii_buffer_values_append appends a value to ii_buffer.
+ * This function deep-copies the value if need_copy == GRN_TRUE.
+ */
+static void
+ii_buffer_values_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ unsigned int sid, unsigned weight,
+ const char *p, uint32_t len, grn_bool need_copy)
+{
+ if (ii_buffer->nvalues == ii_buffer->max_nvalues) {
+ unsigned int i;
+ unsigned int new_max_nvalues = ii_buffer->max_nvalues * 2;
+ unsigned int new_size;
+ ii_buffer_value *new_values;
+ if (new_max_nvalues == 0) {
+ new_max_nvalues = 1;
+ }
+ new_size = new_max_nvalues * sizeof(ii_buffer_value);
+ new_values = (ii_buffer_value *)GRN_REALLOC(ii_buffer->values, new_size);
+ if (!new_values) {
+ return;
+ }
+ for (i = ii_buffer->max_nvalues; i < new_max_nvalues; i++) {
+ ii_buffer_value_init(ctx, &new_values[i]);
+ }
+ ii_buffer->values = new_values;
+ ii_buffer->max_nvalues = new_max_nvalues;
+ }
+
+ {
+ ii_buffer_value *value = &ii_buffer->values[ii_buffer->nvalues];
+ if (need_copy) {
+ if (len > value->cap) {
+ char *new_buf = (char *)GRN_REALLOC(value->buf, len);
+ if (!new_buf) {
+ return;
+ }
+ value->buf = new_buf;
+ value->cap = len;
+ }
+ grn_memcpy(value->buf, p, len);
+ p = value->buf;
+ }
+ value->sid = sid;
+ value->weight = weight;
+ value->p = p;
+ value->len = len;
+ ii_buffer->nvalues++;
+ }
+}
+
+grn_rc
+grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_id rid, unsigned int sid, grn_obj *value)
+{
+ if (rid != ii_buffer->last_rid) {
+ if (ii_buffer->last_rid) {
+ grn_ii_buffer_tokenize(ctx, ii_buffer, ii_buffer->last_rid);
+ }
+ ii_buffer->last_rid = rid;
+ }
+ ii_buffer_values_append(ctx, ii_buffer, sid, 0,
+ GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value),
+ GRN_TRUE);
+ return ctx->rc;
+}
+
+/*
+ * grn_ii_buffer_commit completes tokenization and builds an inverted index
+ * from data in a temporary file.
+ */
+grn_rc
+grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ /* Tokenize the remaining values and free resources. */
+ if (ii_buffer->last_rid && ii_buffer->nvalues) {
+ grn_ii_buffer_tokenize(ctx, ii_buffer, ii_buffer->last_rid);
+ }
+ if (ii_buffer->block_pos) {
+ grn_ii_buffer_flush(ctx, ii_buffer);
+ }
+ if (ii_buffer->tmpfd != -1) {
+ grn_close(ii_buffer->tmpfd);
+ }
+ if (ii_buffer->block_buf) {
+ GRN_FREE(ii_buffer->block_buf);
+ ii_buffer->block_buf = NULL;
+ }
+ if (ii_buffer->counters) {
+ GRN_FREE(ii_buffer->counters);
+ ii_buffer->counters = NULL;
+ }
+
+ if (ii_buffer->update_buffer_size &&
+ ii_buffer->update_buffer_size < 20) {
+ if (ii_buffer->update_buffer_size < 10) {
+ ii_buffer->update_buffer_size =
+ ii_buffer->total_size >> (10 - ii_buffer->update_buffer_size);
+ } else {
+ ii_buffer->update_buffer_size =
+ ii_buffer->total_size << (ii_buffer->update_buffer_size - 10);
+ }
+ }
+
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "nblocks=%d, update_buffer_size=%" GRN_FMT_INT64U,
+ ii_buffer->nblocks, ii_buffer->update_buffer_size);
+
+ datavec_init(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements, 0, 0);
+ grn_open(ii_buffer->tmpfd,
+ ii_buffer->tmpfpath,
+ O_RDONLY | GRN_OPEN_FLAG_BINARY);
+ if (ii_buffer->tmpfd == -1) {
+ ERRNO_ERR("failed to open path: <%-.256s>", ii_buffer->tmpfpath);
+ return ctx->rc;
+ }
+ {
+ /* Fetch the first term of each block. */
+ uint32_t i;
+ for (i = 0; i < ii_buffer->nblocks; i++) {
+ grn_ii_buffer_fetch(ctx, ii_buffer, &ii_buffer->blocks[i]);
+ }
+ }
+ {
+ ii_buffer_block **hits;
+ if ((hits = GRN_MALLOCN(ii_buffer_block *, ii_buffer->nblocks))) {
+ grn_id tid;
+ grn_table_cursor *tc;
+ tc = grn_table_cursor_open(ctx, ii_buffer->lexicon,
+ NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER);
+ if (tc) {
+ while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ /*
+ * Find blocks which contain the current term.
+ * Then, merge the postings.
+ */
+ int nrests = 0;
+ int nhits = 0;
+ uint32_t i;
+ for (i = 0; i < ii_buffer->nblocks; i++) {
+ if (ii_buffer->blocks[i].tid == tid) {
+ hits[nhits++] = &ii_buffer->blocks[i];
+ }
+ if (ii_buffer->blocks[i].tid) { nrests++; }
+ }
+ if (nhits) { grn_ii_buffer_merge(ctx, ii_buffer, tid, hits, nhits); }
+ if (!nrests) { break; }
+ }
+ if (ii_buffer->packed_len) {
+ grn_ii_buffer_chunk_flush(ctx, ii_buffer);
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ GRN_FREE(hits);
+ }
+ }
+ datavec_fin(ctx, ii_buffer->data_vectors);
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "tmpfile_size:%" GRN_FMT_INT64D " > total_chunk_size:%" GRN_FMT_SIZE,
+ ii_buffer->filepos, ii_buffer->total_chunk_size);
+ grn_close(ii_buffer->tmpfd);
+ if (grn_unlink(ii_buffer->tmpfpath) == 0) {
+ GRN_LOG(ctx, GRN_LOG_INFO,
+ "[ii][buffer][commit] removed temporary path: <%-.256s>",
+ ii_buffer->tmpfpath);
+ } else {
+ ERRNO_ERR("[ii][buffer][commit] failed to remove temporary path: <%-.256s>",
+ ii_buffer->tmpfpath);
+ }
+ ii_buffer->tmpfd = -1;
+ return ctx->rc;
+}
+
+grn_rc
+grn_ii_buffer_close(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
+{
+ uint32_t i;
+ grn_table_flags flags;
+ grn_table_get_info(ctx, ii_buffer->ii->lexicon, &flags, NULL, NULL, NULL,
+ NULL);
+ if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
+ grn_pat_cache_disable(ctx, (grn_pat *)ii_buffer->ii->lexicon);
+ }
+ if (ii_buffer->tmp_lexicon) {
+ grn_obj_close(ctx, ii_buffer->tmp_lexicon);
+ }
+ if (ii_buffer->tmpfd != -1) {
+ grn_close(ii_buffer->tmpfd);
+ if (grn_unlink(ii_buffer->tmpfpath) == 0) {
+ GRN_LOG(ctx, GRN_LOG_INFO,
+ "[ii][buffer][close] removed temporary path: <%-.256s>",
+ ii_buffer->tmpfpath);
+ } else {
+ ERRNO_ERR("[ii][buffer][close] failed to remove temporary path: <%-.256s>",
+ ii_buffer->tmpfpath);
+ }
+ }
+ if (ii_buffer->block_buf) {
+ GRN_FREE(ii_buffer->block_buf);
+ }
+ if (ii_buffer->counters) {
+ GRN_FREE(ii_buffer->counters);
+ }
+ if (ii_buffer->blocks) {
+ for (i = 0; i < ii_buffer->nblocks; i++) {
+ if (ii_buffer->blocks[i].buffer) {
+ GRN_FREE(ii_buffer->blocks[i].buffer);
+ }
+ }
+ GRN_FREE(ii_buffer->blocks);
+ }
+ if (ii_buffer->values) {
+ for (i = 0; i < ii_buffer->max_nvalues; i++) {
+ ii_buffer_value_fin(ctx, &ii_buffer->values[i]);
+ }
+ GRN_FREE(ii_buffer->values);
+ }
+ GRN_FREE(ii_buffer);
+ return ctx->rc;
+}
+
+/*
+ * grn_ii_buffer_parse tokenizes values to be indexed.
+ *
+ * For each record of the target table, grn_ii_buffer_parse makes a list of
+ * target values and calls grn_ii_buffer_tokenize. To make a list of target
+ * values, ii_buffer_values_append is called for each value. Note that
+ * ii_buffer_values_append is called for each element for a vector.
+ */
+static void
+grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+ grn_obj *target, int ncols, grn_obj **cols)
+{
+ grn_table_cursor *tc;
+ grn_obj *vobjs;
+ if ((vobjs = GRN_MALLOCN(grn_obj, ncols))) {
+ int i;
+ for (i = 0; i < ncols; i++) {
+ GRN_TEXT_INIT(&vobjs[i], 0);
+ }
+ if ((tc = grn_table_cursor_open(ctx, target,
+ NULL, 0, NULL, 0, 0, -1,
+ GRN_CURSOR_BY_ID))) {
+ grn_id rid;
+ while ((rid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ unsigned int j;
+ int sid;
+ grn_obj **col;
+ for (sid = 1, col = cols; sid <= ncols; sid++, col++) {
+ grn_obj *rv = &vobjs[sid - 1];
+ grn_obj_reinit_for(ctx, rv, *col);
+ if (GRN_OBJ_TABLEP(*col)) {
+ grn_table_get_key2(ctx, *col, rid, rv);
+ } else {
+ grn_obj_get_value(ctx, *col, rid, rv);
+ }
+ switch (rv->header.type) {
+ case GRN_BULK :
+ ii_buffer_values_append(ctx, ii_buffer, sid, 0,
+ GRN_TEXT_VALUE(rv), GRN_TEXT_LEN(rv),
+ GRN_FALSE);
+ break;
+ case GRN_UVECTOR :
+ {
+ unsigned int size;
+ unsigned int elem_size;
+ size = grn_uvector_size(ctx, rv);
+ elem_size = grn_uvector_element_size(ctx, rv);
+ for (j = 0; j < size; j++) {
+ ii_buffer_values_append(ctx, ii_buffer, sid, 0,
+ GRN_BULK_HEAD(rv) + (elem_size * j),
+ elem_size, GRN_FALSE);
+ }
+ }
+ break;
+ case GRN_VECTOR :
+ if (rv->u.v.body) {
+ int j;
+ int n_sections = rv->u.v.n_sections;
+ grn_section *sections = rv->u.v.sections;
+ const char *head = GRN_BULK_HEAD(rv->u.v.body);
+ for (j = 0; j < n_sections; j++) {
+ grn_section *section = sections + j;
+ if (section->length == 0) {
+ continue;
+ }
+ ii_buffer_values_append(ctx, ii_buffer, sid, section->weight,
+ head + section->offset,
+ section->length, GRN_FALSE);
+ }
+ }
+ break;
+ default :
+ ERR(GRN_INVALID_ARGUMENT,
+ "[index] invalid object assigned as value");
+ break;
+ }
+ }
+ grn_ii_buffer_tokenize(ctx, ii_buffer, rid);
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ for (i = 0; i < ncols; i++) {
+ GRN_OBJ_FIN(ctx, &vobjs[i]);
+ }
+ GRN_FREE(vobjs);
+ }
+}
+
+grn_rc
+grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity)
+{
+ grn_ii_buffer *ii_buffer;
+
+ {
+ /* Do nothing if there are no targets. */
+ grn_obj *data_table = grn_ctx_at(ctx, DB_OBJ(ii)->range);
+ if (!data_table) {
+ return ctx->rc;
+ }
+ if (grn_table_size(ctx, data_table) == 0) {
+ return ctx->rc;
+ }
+ }
+
+ ii_buffer = grn_ii_buffer_open(ctx, ii, sparsity);
+ if (ii_buffer) {
+ grn_id *source = (grn_id *)ii->obj.source;
+ if (ii->obj.source_size && ii->obj.source) {
+ int ncols = ii->obj.source_size / sizeof(grn_id);
+ grn_obj **cols = GRN_MALLOCN(grn_obj *, ncols);
+ if (cols) {
+ int i;
+ for (i = 0; i < ncols; i++) {
+ if (!(cols[i] = grn_ctx_at(ctx, source[i]))) { break; }
+ }
+ if (i == ncols) { /* All the source columns are available. */
+ grn_obj *target = cols[0];
+ if (!GRN_OBJ_TABLEP(target)) {
+ target = grn_ctx_at(ctx, target->header.domain);
+ }
+ if (target) {
+ grn_ii_buffer_parse(ctx, ii_buffer, target, ncols, cols);
+ grn_ii_buffer_commit(ctx, ii_buffer);
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "failed to resolve the target");
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "failed to resolve a column (%d)", i);
+ }
+ GRN_FREE(cols);
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "ii->obj.source is void");
+ }
+ grn_ii_buffer_close(ctx, ii_buffer);
+ }
+ return ctx->rc;
+}
+
+/*
+ * ==========================================================================
+ * The following part provides constants, structures and functions for static
+ * indexing.
+ * ==========================================================================
+ */
+
+#define GRN_II_BUILDER_BUFFER_CHUNK_SIZE (S_CHUNK >> 2)
+
+#define GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE (1 << 24)
+
+#define GRN_II_BUILDER_MIN_BLOCK_THRESHOLD 1
+#define GRN_II_BUILDER_MAX_BLOCK_THRESHOLD (1 << 28)
+
+#define GRN_II_BUILDER_MIN_FILE_BUF_SIZE (1 << 12)
+#define GRN_II_BUILDER_MAX_FILE_BUF_SIZE (1 << 30)
+
+#define GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE (1 << 12)
+#define GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE (1 << 30)
+
+#define GRN_II_BUILDER_MIN_CHUNK_THRESHOLD 1
+#define GRN_II_BUILDER_MAX_CHUNK_THRESHOLD (1 << 28)
+
+#define GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS 1
+#define GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS \
+ ((S_SEGMENT - sizeof(buffer_header)) / sizeof(buffer_term))
+
+struct grn_ii_builder_options {
+ uint32_t lexicon_cache_size; /* Cache size of temporary lexicon */
+ /* A block is flushed if builder->n reaches this value. */
+ uint32_t block_threshold;
+ uint32_t file_buf_size; /* Buffer size for buffered output */
+ uint32_t block_buf_size; /* Buffer size for buffered input */
+ /* A chunk is flushed if chunk->n reaches this value. */
+ uint32_t chunk_threshold;
+ uint32_t buffer_max_n_terms; /* Maximum number of terms in each buffer */
+};
+
+static const grn_ii_builder_options grn_ii_builder_default_options = {
+ 0x80000, /* lexicon_cache_size */
+ 0x4000000, /* block_threshold */
+ 0x10000, /* file_buf_size */
+ 0x10000, /* block_buf_size */
+ 0x1000, /* chunk_threshold */
+ 0x3000, /* buffer_max_n_terms */
+};
+
+/* grn_ii_builder_options_init fills options with the default options. */
+void
+grn_ii_builder_options_init(grn_ii_builder_options *options)
+{
+ *options = grn_ii_builder_default_options;
+}
+
+/* grn_ii_builder_options_fix fixes out-of-range options. */
+static void
+grn_ii_builder_options_fix(grn_ii_builder_options *options)
+{
+ if (options->lexicon_cache_size > GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE) {
+ options->lexicon_cache_size = GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE;
+ }
+
+ if (options->block_threshold < GRN_II_BUILDER_MIN_BLOCK_THRESHOLD) {
+ options->block_threshold = GRN_II_BUILDER_MIN_BLOCK_THRESHOLD;
+ }
+ if (options->block_threshold > GRN_II_BUILDER_MAX_BLOCK_THRESHOLD) {
+ options->block_threshold = GRN_II_BUILDER_MAX_BLOCK_THRESHOLD;
+ }
+
+ if (options->file_buf_size < GRN_II_BUILDER_MIN_FILE_BUF_SIZE) {
+ options->file_buf_size = GRN_II_BUILDER_MIN_FILE_BUF_SIZE;
+ }
+ if (options->file_buf_size > GRN_II_BUILDER_MAX_FILE_BUF_SIZE) {
+ options->file_buf_size = GRN_II_BUILDER_MAX_FILE_BUF_SIZE;
+ }
+
+ if (options->block_buf_size < GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE) {
+ options->block_buf_size = GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE;
+ }
+ if (options->block_buf_size > GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE) {
+ options->block_buf_size = GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE;
+ }
+
+ if (options->chunk_threshold < GRN_II_BUILDER_MIN_CHUNK_THRESHOLD) {
+ options->chunk_threshold = GRN_II_BUILDER_MIN_CHUNK_THRESHOLD;
+ }
+ if (options->chunk_threshold > GRN_II_BUILDER_MAX_CHUNK_THRESHOLD) {
+ options->chunk_threshold = GRN_II_BUILDER_MAX_CHUNK_THRESHOLD;
+ }
+
+ if (options->buffer_max_n_terms < GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS) {
+ options->buffer_max_n_terms = GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS;
+ }
+ if (options->buffer_max_n_terms > GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS) {
+ options->buffer_max_n_terms = GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS;
+ }
+}
+
+#define GRN_II_BUILDER_TERM_INPLACE_SIZE\
+ (sizeof(grn_ii_builder_term) - (uintptr_t)&((grn_ii_builder_term *)0)->dummy)
+
+typedef struct {
+ grn_id rid; /* Last record ID */
+ uint32_t sid; /* Last section ID */
+ /* Last position (GRN_OBJ_WITH_POSITION) or frequency. */
+ uint32_t pos_or_freq;
+ uint32_t offset; /* Buffer write offset */
+ uint32_t size; /* Buffer size */
+ uint32_t dummy; /* Padding */
+ uint8_t *buf; /* Buffer (to be freed) */
+} grn_ii_builder_term;
+
+/* grn_ii_builder_term_is_inplace returns whether a term buffer is inplace. */
+inline static grn_bool
+grn_ii_builder_term_is_inplace(grn_ii_builder_term *term)
+{
+ return term->size == GRN_II_BUILDER_TERM_INPLACE_SIZE;
+}
+
+/* grn_ii_builder_term_get_buf returns a term buffer. */
+inline static uint8_t *
+grn_ii_builder_term_get_buf(grn_ii_builder_term *term)
+{
+ if (grn_ii_builder_term_is_inplace(term)) {
+ return (uint8_t *)&term->dummy;
+ } else {
+ return term->buf;
+ }
+}
+
+/*
+ * grn_ii_builder_term_init initializes a term. Note that an initialized term
+ * must be finalized by grn_ii_builder_term_fin.
+ */
+static void
+grn_ii_builder_term_init(grn_ctx *ctx, grn_ii_builder_term *term)
+{
+ term->rid = GRN_ID_NIL;
+ term->sid = 0;
+ term->pos_or_freq = 0;
+ term->offset = 0;
+ term->size = GRN_II_BUILDER_TERM_INPLACE_SIZE;
+}
+
+/* grn_ii_builder_term_fin finalizes a term. */
+static void
+grn_ii_builder_term_fin(grn_ctx *ctx, grn_ii_builder_term *term)
+{
+ if (!grn_ii_builder_term_is_inplace(term)) {
+ GRN_FREE(term->buf);
+ }
+}
+
+/* grn_ii_builder_term_reinit reinitializes a term. */
+static void
+grn_ii_builder_term_reinit(grn_ctx *ctx, grn_ii_builder_term *term)
+{
+ grn_ii_builder_term_fin(ctx, term);
+ grn_ii_builder_term_init(ctx, term);
+}
+
+/* grn_ii_builder_term_extend extends a term buffer. */
+static grn_rc
+grn_ii_builder_term_extend(grn_ctx *ctx, grn_ii_builder_term *term)
+{
+ uint8_t *buf;
+ uint32_t size = term->size * 2;
+ if (grn_ii_builder_term_is_inplace(term)) {
+ buf = (uint8_t *)GRN_MALLOC(size);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for term buffer: size = %u", size);
+ return ctx->rc;
+ }
+ grn_memcpy(buf, &term->dummy, term->offset);
+ } else {
+ buf = (uint8_t *)GRN_REALLOC(term->buf, size);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to reallocate memory for term buffer: size = %u", size);
+ return ctx->rc;
+ }
+ }
+ term->buf = buf;
+ term->size = size;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_term_append appends an integer to a term buffer. */
+inline static grn_rc
+grn_ii_builder_term_append(grn_ctx *ctx, grn_ii_builder_term *term,
+ uint64_t value)
+{
+ uint8_t *p;
+ if (value < (uint64_t)1 << 5) {
+ if (term->offset + 1 > term->size) {
+ grn_rc rc = grn_ii_builder_term_extend(ctx, term);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ p = grn_ii_builder_term_get_buf(term) + term->offset;
+ p[0] = (uint8_t)value;
+ term->offset++;
+ return GRN_SUCCESS;
+ } else if (value < (uint64_t)1 << 13) {
+ if (term->offset + 2 > term->size) {
+ grn_rc rc = grn_ii_builder_term_extend(ctx, term);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ p = grn_ii_builder_term_get_buf(term) + term->offset;
+ p[0] = (uint8_t)((value & 0x1f) | (1 << 5));
+ p[1] = (uint8_t)(value >> 5);
+ term->offset += 2;
+ return GRN_SUCCESS;
+ } else {
+ uint8_t i, n;
+ if (value < (uint64_t)1 << 21) {
+ n = 3;
+ } else if (value < (uint64_t)1 << 29) {
+ n = 4;
+ } else if (value < (uint64_t)1 << 37) {
+ n = 5;
+ } else if (value < (uint64_t)1 << 45) {
+ n = 6;
+ } else if (value < (uint64_t)1 << 53) {
+ n = 7;
+ } else {
+ n = 8;
+ }
+ if (term->offset + n > term->size) {
+ grn_rc rc = grn_ii_builder_term_extend(ctx, term);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ p = grn_ii_builder_term_get_buf(term) + term->offset;
+ p[0] = (uint8_t)(value & 0x1f) | ((n - 1) << 5);
+ value >>= 5;
+ for (i = 1; i < n; i++) {
+ p[i] = (uint8_t)value;
+ value >>= 8;
+ }
+ term->offset += n;
+ return GRN_SUCCESS;
+ }
+}
+
+typedef struct {
+ uint64_t offset; /* File offset */
+ uint32_t rest; /* Remaining size */
+ uint8_t *buf; /* Buffer (to be freed) */
+ uint8_t *cur; /* Current pointer */
+ uint8_t *end; /* End pointer */
+ uint32_t tid; /* Term ID */
+} grn_ii_builder_block;
+
+/*
+ * grn_ii_builder_block_init initializes a block. Note that an initialized
+ * block must be finalized by grn_ii_builder_block_fin.
+ */
+static void
+grn_ii_builder_block_init(grn_ctx *ctx, grn_ii_builder_block *block)
+{
+ block->offset = 0;
+ block->rest = 0;
+ block->buf = NULL;
+ block->cur = NULL;
+ block->end = NULL;
+ block->tid = GRN_ID_NIL;
+}
+
+/* grn_ii_builder_block_fin finalizes a block. */
+static void
+grn_ii_builder_block_fin(grn_ctx *ctx, grn_ii_builder_block *block)
+{
+ if (block->buf) {
+ GRN_FREE(block->buf);
+ }
+}
+
+/*
+ * grn_ii_builder_block_next reads the next integer. Note that this function
+ * returns GRN_END_OF_DATA if it reaches the end of a block.
+ */
+inline static grn_rc
+grn_ii_builder_block_next(grn_ctx *ctx, grn_ii_builder_block *block,
+ uint64_t *value)
+{
+ uint8_t n;
+ if (block->cur == block->end) {
+ return GRN_END_OF_DATA;
+ }
+ n = (*block->cur >> 5) + 1;
+ if (n > block->end - block->cur) {
+ return GRN_END_OF_DATA;
+ }
+ *value = 0;
+ switch (n) {
+ case 8 :
+ *value |= (uint64_t)block->cur[7] << 53;
+ case 7 :
+ *value |= (uint64_t)block->cur[6] << 45;
+ case 6 :
+ *value |= (uint64_t)block->cur[5] << 37;
+ case 5 :
+ *value |= (uint64_t)block->cur[4] << 29;
+ case 4 :
+ *value |= (uint64_t)block->cur[3] << 21;
+ case 3 :
+ *value |= (uint64_t)block->cur[2] << 13;
+ case 2 :
+ *value |= (uint64_t)block->cur[1] << 5;
+ case 1 :
+ *value |= block->cur[0] & 0x1f;
+ break;
+ }
+ block->cur += n;
+ return GRN_SUCCESS;
+}
+
+typedef struct {
+ grn_ii *ii; /* Inverted index */
+ uint32_t buf_id; /* Buffer ID */
+ uint32_t buf_seg_id; /* Buffer segment ID */
+ buffer *buf; /* Buffer (to be unreferenced) */
+ uint32_t chunk_id; /* Chunk ID */
+ uint32_t chunk_seg_id; /* Chunk segment ID */
+ uint8_t *chunk; /* Chunk (to be unreferenced) */
+ uint32_t chunk_offset; /* Chunk write position */
+ uint32_t chunk_size; /* Chunk size */
+} grn_ii_builder_buffer;
+
+/*
+ * grn_ii_builder_buffer_init initializes a buffer. Note that a buffer must be
+ * finalized by grn_ii_builder_buffer_fin.
+ */
+static void
+grn_ii_builder_buffer_init(grn_ctx *ctx, grn_ii_builder_buffer *buf,
+ grn_ii *ii)
+{
+ buf->ii = ii;
+ buf->buf_id = 0;
+ buf->buf_seg_id = 0;
+ buf->buf = NULL;
+ buf->chunk_id = 0;
+ buf->chunk_seg_id = 0;
+ buf->chunk = NULL;
+ buf->chunk_offset = 0;
+ buf->chunk_size = 0;
+}
+
+/* grn_ii_builder_buffer_fin finalizes a buffer. */
+static void
+grn_ii_builder_buffer_fin(grn_ctx *ctx, grn_ii_builder_buffer *buf)
+{
+ if (buf->buf) {
+ GRN_IO_SEG_UNREF(buf->ii->seg, buf->buf_seg_id);
+ }
+ if (buf->chunk) {
+ GRN_IO_SEG_UNREF(buf->ii->chunk, buf->chunk_seg_id);
+ }
+}
+
+/* grn_ii_builder_buffer_is_assigned returns whether a buffer is assigned. */
+static grn_bool
+grn_ii_builder_buffer_is_assigned(grn_ctx *ctx, grn_ii_builder_buffer *buf)
+{
+ return buf->buf != NULL;
+}
+
+/* grn_ii_builder_buffer_assign assigns a buffer. */
+static grn_rc
+grn_ii_builder_buffer_assign(grn_ctx *ctx, grn_ii_builder_buffer *buf,
+ size_t min_chunk_size)
+{
+ void *seg;
+ size_t chunk_size;
+ grn_rc rc;
+
+ /* Create a buffer. */
+ buf->buf_id = GRN_II_PSEG_NOT_ASSIGNED;
+ rc = buffer_segment_new(ctx, buf->ii, &buf->buf_id);
+ if (rc != GRN_SUCCESS) {
+ if (ctx->rc != GRN_SUCCESS) {
+ ERR(rc, "failed to allocate segment for buffer");
+ }
+ return rc;
+ }
+ buf->buf_seg_id = buf->ii->header->binfo[buf->buf_id];
+ GRN_IO_SEG_REF(buf->ii->seg, buf->buf_seg_id, seg);
+ if (!seg) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "failed access buffer segment: buf_id = %u, seg_id = %u",
+ buf->buf_id, buf->buf_seg_id);
+ }
+ return ctx->rc;
+ }
+ buf->buf = (buffer *)seg;
+
+ /* Create a chunk. */
+ chunk_size = GRN_II_BUILDER_BUFFER_CHUNK_SIZE;
+ while (chunk_size < min_chunk_size) {
+ chunk_size *= 2;
+ }
+ rc = chunk_new(ctx, buf->ii, &buf->chunk_id, chunk_size);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ buf->chunk_seg_id = buf->chunk_id >> GRN_II_N_CHUNK_VARIATION;
+ GRN_IO_SEG_REF(buf->ii->chunk, buf->chunk_seg_id, seg);
+ if (!seg) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "failed access chunk segment: chunk_id = %u, seg_id = %u",
+ buf->chunk_id, buf->chunk_seg_id);
+ }
+ return ctx->rc;
+ }
+ buf->chunk = (uint8_t *)seg;
+ buf->chunk += (buf->chunk_id & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) <<
+ GRN_II_W_LEAST_CHUNK;
+ buf->chunk_offset = 0;
+ buf->chunk_size = chunk_size;
+
+ buf->buf->header.chunk = buf->chunk_id;
+ buf->buf->header.chunk_size = chunk_size;
+ buf->buf->header.buffer_free = S_SEGMENT - sizeof(buffer_header);
+ buf->buf->header.nterms = 0;
+ buf->buf->header.nterms_void = 0;
+ buf->ii->header->total_chunk_size += chunk_size;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_buffer_flush flushes a buffer. */
+static grn_rc
+grn_ii_builder_buffer_flush(grn_ctx *ctx, grn_ii_builder_buffer *buf)
+{
+ grn_ii *ii;
+
+ buf->buf->header.buffer_free = S_SEGMENT - sizeof(buffer_header) -
+ buf->buf->header.nterms * sizeof(buffer_term);
+ GRN_LOG(ctx, GRN_LOG_DEBUG,
+ "n_terms = %u, chunk_offset = %u, chunk_size = %u, total = %"
+ GRN_FMT_INT64U "KB",
+ buf->buf->header.nterms,
+ buf->chunk_offset,
+ buf->buf->header.chunk_size,
+ buf->ii->header->total_chunk_size >> 10);
+
+ ii = buf->ii;
+ grn_ii_builder_buffer_fin(ctx, buf);
+ grn_ii_builder_buffer_init(ctx, buf, ii);
+ return GRN_SUCCESS;
+}
+
+typedef struct {
+ grn_id tid; /* Term ID */
+ uint32_t n; /* Number of integers in buffers */
+ grn_id rid; /* Record ID */
+ uint32_t rid_gap; /* Record ID gap */
+ uint64_t pos_sum; /* Sum of position gaps */
+
+ uint32_t offset; /* Write offset */
+ uint32_t size; /* Buffer size */
+ grn_id *rid_buf; /* Buffer for record IDs (to be freed) */
+ uint32_t *sid_buf; /* Buffer for section IDs (to be freed) */
+ uint32_t *freq_buf; /* Buffer for frequencies (to be freed) */
+ uint32_t *weight_buf; /* Buffer for weights (to be freed) */
+
+ uint32_t pos_offset; /* Write offset of pos_buf */
+ uint32_t pos_size; /* Buffer size of pos_buf */
+ uint32_t *pos_buf; /* Buffer for positions (to be freed) */
+
+ size_t enc_offset; /* Write offset of enc_buf */
+ size_t enc_size; /* Buffer size of enc_buf */
+ uint8_t *enc_buf; /* Buffer for encoded data (to be freed) */
+} grn_ii_builder_chunk;
+
+/*
+ * grn_ii_builder_chunk_init initializes a chunk. Note that an initialized
+ * chunk must be finalized by grn_ii_builder_chunk_fin.
+ */
+static void
+grn_ii_builder_chunk_init(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
+{
+ chunk->tid = GRN_ID_NIL;
+ chunk->n = 0;
+ chunk->rid = GRN_ID_NIL;
+ chunk->rid_gap = 0;
+ chunk->pos_sum = 0;
+
+ chunk->offset = 0;
+ chunk->size = 0;
+ chunk->rid_buf = NULL;
+ chunk->sid_buf = NULL;
+ chunk->freq_buf = NULL;
+ chunk->weight_buf = NULL;
+
+ chunk->pos_offset = 0;
+ chunk->pos_size = 0;
+ chunk->pos_buf = NULL;
+
+ chunk->enc_offset = 0;
+ chunk->enc_size = 0;
+ chunk->enc_buf = NULL;
+}
+
+/* grn_ii_builder_chunk_fin finalizes a chunk. */
+static void
+grn_ii_builder_chunk_fin(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
+{
+ if (chunk->enc_buf) {
+ GRN_FREE(chunk->enc_buf);
+ }
+ if (chunk->pos_buf) {
+ GRN_FREE(chunk->pos_buf);
+ }
+ if (chunk->weight_buf) {
+ GRN_FREE(chunk->weight_buf);
+ }
+ if (chunk->freq_buf) {
+ GRN_FREE(chunk->freq_buf);
+ }
+ if (chunk->sid_buf) {
+ GRN_FREE(chunk->sid_buf);
+ }
+ if (chunk->rid_buf) {
+ GRN_FREE(chunk->rid_buf);
+ }
+}
+
+/*
+ * grn_ii_builder_chunk_clear clears stats except rid and buffers except
+ * enc_buf.
+ */
+static void
+grn_ii_builder_chunk_clear(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
+{
+ chunk->n = 0;
+ chunk->rid_gap = 0;
+ chunk->pos_sum = 0;
+ chunk->offset = 0;
+ chunk->pos_offset = 0;
+}
+
+/*
+ * grn_ii_builder_chunk_extend_bufs extends buffers except pos_buf and enc_buf.
+ */
+static grn_rc
+grn_ii_builder_chunk_extend_bufs(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
+ uint32_t ii_flags)
+{
+ uint32_t *buf, size = chunk->size ? chunk->size * 2 : 1;
+ size_t n_bytes = size * sizeof(uint32_t);
+
+ buf = (uint32_t *)GRN_REALLOC(chunk->rid_buf, n_bytes);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for record IDs: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ chunk->rid_buf = buf;
+
+ if (ii_flags & GRN_OBJ_WITH_SECTION) {
+ buf = (uint32_t *)GRN_REALLOC(chunk->sid_buf, n_bytes);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for section IDs:"
+ " n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ chunk->sid_buf = buf;
+ }
+
+ buf = (uint32_t *)GRN_REALLOC(chunk->freq_buf, n_bytes);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for frequencies: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ chunk->freq_buf = buf;
+
+ if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
+ buf = (uint32_t *)GRN_REALLOC(chunk->weight_buf, n_bytes);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for weights: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ chunk->weight_buf = buf;
+ }
+
+ chunk->size = size;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_chunk_extend_pos_buf extends pos_buf. */
+static grn_rc
+grn_ii_builder_chunk_extend_pos_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
+{
+ uint32_t *buf, size = chunk->pos_size ? chunk->pos_size * 2 : 1;
+ size_t n_bytes = size * sizeof(uint32_t);
+ buf = (uint32_t *)GRN_REALLOC(chunk->pos_buf, n_bytes);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for positions: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ chunk->pos_buf = buf;
+ chunk->pos_size = size;
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_chunk_reserve_enc_buf estimates a size that is enough to
+ * store encoded data and allocates memory to enc_buf.
+ */
+static grn_rc
+grn_ii_builder_chunk_reserve_enc_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
+ uint32_t n_cinfos)
+{
+ size_t rich_size = (chunk->n + 4) * sizeof(uint32_t) +
+ n_cinfos * sizeof(chunk_info);
+ if (chunk->enc_size < rich_size) {
+ size_t size = chunk->enc_size ? chunk->enc_size * 2 : 1;
+ uint8_t *buf;
+ while (size < rich_size) {
+ size *= 2;
+ }
+ buf = GRN_REALLOC(chunk->enc_buf, size);
+ if (!buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for encoding: size = %" GRN_FMT_SIZE,
+ size);
+ return ctx->rc;
+ }
+ chunk->enc_buf = buf;
+ chunk->enc_size = size;
+ }
+ chunk->enc_offset = 0;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_chunk_encode encodes a chunk buffer. */
+static void
+grn_ii_builder_chunk_encode_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
+ uint32_t *values, uint32_t n_values,
+ grn_bool use_p_enc)
+{
+ uint8_t *p = chunk->enc_buf + chunk->enc_offset;
+ uint32_t i;
+ if (use_p_enc) {
+ uint8_t freq[33];
+ uint32_t buf[UNIT_SIZE];
+ while (n_values >= UNIT_SIZE) {
+ memset(freq, 0, 33);
+ for (i = 0; i < UNIT_SIZE; i++) {
+ buf[i] = values[i];
+ if (buf[i]) {
+ uint32_t w;
+ GRN_BIT_SCAN_REV(buf[i], w);
+ freq[w + 1]++;
+ } else {
+ freq[0]++;
+ }
+ }
+ p = pack(buf, UNIT_SIZE, freq, p);
+ values += UNIT_SIZE;
+ n_values -= UNIT_SIZE;
+ }
+ if (n_values) {
+ memset(freq, 0, 33);
+ for (i = 0; i < n_values; i++) {
+ buf[i] = values[i];
+ if (buf[i]) {
+ uint32_t w;
+ GRN_BIT_SCAN_REV(buf[i], w);
+ freq[w + 1]++;
+ } else {
+ freq[0]++;
+ }
+ }
+ p = pack(buf, n_values, freq, p);
+ }
+ } else {
+ for (i = 0; i < n_values; i++) {
+ GRN_B_ENC(values[i], p);
+ }
+ }
+ chunk->enc_offset = p - chunk->enc_buf;
+}
+
+/* grn_ii_builder_chunk_encode encodes a chunk. */
+static grn_rc
+grn_ii_builder_chunk_encode(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
+ chunk_info *cinfos, uint32_t n_cinfos)
+{
+ grn_rc rc;
+ uint8_t *p;
+ uint8_t shift = 0, use_p_enc_flags = 0;
+ uint8_t rid_use_p_enc, rest_use_p_enc, pos_use_p_enc = 0;
+
+ /* Choose an encoding. */
+ rid_use_p_enc = chunk->offset >= 16 && chunk->offset > (chunk->rid >> 8);
+ use_p_enc_flags |= rid_use_p_enc << shift++;
+ rest_use_p_enc = chunk->offset >= 3;
+ if (chunk->sid_buf) {
+ use_p_enc_flags |= rest_use_p_enc << shift++;
+ }
+ use_p_enc_flags |= rest_use_p_enc << shift++;
+ if (chunk->weight_buf) {
+ use_p_enc_flags |= rest_use_p_enc << shift++;
+ }
+ if (chunk->pos_buf) {
+ pos_use_p_enc = chunk->pos_offset >= 32 &&
+ chunk->pos_offset > (chunk->pos_sum >> 13);
+ use_p_enc_flags |= pos_use_p_enc << shift++;
+ }
+
+ rc = grn_ii_builder_chunk_reserve_enc_buf(ctx, chunk, n_cinfos);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+
+ /* Encode a header. */
+ p = chunk->enc_buf;
+ if (n_cinfos) {
+ uint32_t i;
+ GRN_B_ENC(n_cinfos, p);
+ for (i = 0; i < n_cinfos; i++) {
+ GRN_B_ENC(cinfos[i].segno, p);
+ GRN_B_ENC(cinfos[i].size, p);
+ GRN_B_ENC(cinfos[i].dgap, p);
+ }
+ }
+ if (use_p_enc_flags) {
+ GRN_B_ENC(use_p_enc_flags << 1, p);
+ GRN_B_ENC(chunk->offset, p);
+ if (chunk->pos_buf) {
+ GRN_B_ENC(chunk->pos_offset - chunk->offset, p);
+ }
+ } else {
+ GRN_B_ENC((chunk->offset << 1) | 1, p);
+ }
+ chunk->enc_offset = p - chunk->enc_buf;
+
+ /* Encode a body. */
+ grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->rid_buf, chunk->offset,
+ rid_use_p_enc);
+ if (chunk->sid_buf) {
+ grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->sid_buf, chunk->offset,
+ rest_use_p_enc);
+ }
+ grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->freq_buf, chunk->offset,
+ rest_use_p_enc);
+ if (chunk->weight_buf) {
+ grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->weight_buf,
+ chunk->offset, rest_use_p_enc);
+ }
+ if (chunk->pos_buf) {
+ grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->pos_buf,
+ chunk->pos_offset, pos_use_p_enc);
+ }
+
+ return GRN_SUCCESS;
+}
+
+typedef struct {
+ grn_ii *ii; /* Building inverted index */
+ grn_ii_builder_options options; /* Options */
+
+ grn_obj *src_table; /* Source table */
+ grn_obj **srcs; /* Source columns (to be freed) */
+ uint32_t n_srcs; /* Number of source columns */
+ uint8_t sid_bits; /* Number of bits for section ID */
+ uint64_t sid_mask; /* Mask bits for section ID */
+
+ grn_obj *lexicon; /* Block lexicon (to be closed) */
+ grn_obj *tokenizer; /* Lexicon's tokenizer */
+ grn_obj *normalizer; /* Lexicon's normalzier */
+
+ uint32_t n; /* Number of integers appended to the current block */
+ grn_id rid; /* Record ID */
+ uint32_t sid; /* Section ID */
+ uint32_t pos; /* Position */
+
+ grn_ii_builder_term *terms; /* Terms (to be freed) */
+ uint32_t n_terms; /* Number of distinct terms */
+ uint32_t max_n_terms; /* Maximum number of distinct terms */
+ uint32_t terms_size; /* Buffer size of terms */
+
+ /* A temporary file to save blocks. */
+ char path[PATH_MAX]; /* File path */
+ int fd; /* File descriptor (to be closed) */
+ uint8_t *file_buf; /* File buffer for buffered output (to be freed) */
+ uint32_t file_buf_offset; /* File buffer write offset */
+
+ grn_ii_builder_block *blocks; /* Blocks (to be freed) */
+ uint32_t n_blocks; /* Number of blocks */
+ uint32_t blocks_size; /* Buffer size of blocks */
+
+ grn_ii_builder_buffer buf; /* Buffer (to be finalized) */
+ grn_ii_builder_chunk chunk; /* Chunk (to be finalized) */
+
+ uint32_t df; /* Document frequency (number of sections) */
+ chunk_info *cinfos; /* Chunk headers (to be freed) */
+ uint32_t n_cinfos; /* Number of chunks */
+ uint32_t cinfos_size; /* Size of cinfos */
+} grn_ii_builder;
+
+/*
+ * grn_ii_builder_init initializes a builder. Note that an initialized builder
+ * must be finalized by grn_ii_builder_fin.
+ */
+static grn_rc
+grn_ii_builder_init(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_ii *ii, const grn_ii_builder_options *options)
+{
+ builder->ii = ii;
+ builder->options = *options;
+ if (grn_ii_builder_block_threshold_force > 0) {
+ builder->options.block_threshold = grn_ii_builder_block_threshold_force;
+ }
+ grn_ii_builder_options_fix(&builder->options);
+
+ builder->src_table = NULL;
+ builder->srcs = NULL;
+ builder->n_srcs = 0;
+ builder->sid_bits = 0;
+ builder->sid_mask = 0;
+
+ builder->lexicon = NULL;
+ builder->tokenizer = NULL;
+ builder->normalizer = NULL;
+
+ builder->n = 0;
+ builder->rid = GRN_ID_NIL;
+ builder->sid = 0;
+ builder->pos = 0;
+
+ builder->terms = NULL;
+ builder->n_terms = 0;
+ builder->max_n_terms = 0;
+ builder->terms_size = 0;
+
+ builder->path[0] = '\0';
+ builder->fd = -1;
+ builder->file_buf = NULL;
+ builder->file_buf_offset = 0;
+
+ builder->blocks = NULL;
+ builder->n_blocks = 0;
+ builder->blocks_size = 0;
+
+ grn_ii_builder_buffer_init(ctx, &builder->buf, ii);
+ grn_ii_builder_chunk_init(ctx, &builder->chunk);
+
+ builder->df = 0;
+ builder->cinfos = NULL;
+ builder->n_cinfos = 0;
+ builder->cinfos_size = 0;
+
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_fin_terms finalizes terms. */
+static void
+grn_ii_builder_fin_terms(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ if (builder->terms) {
+ uint32_t i;
+ for (i = 0; i < builder->max_n_terms; i++) {
+ grn_ii_builder_term_fin(ctx, &builder->terms[i]);
+ }
+ GRN_FREE(builder->terms);
+
+ /* To avoid double finalization. */
+ builder->terms = NULL;
+ }
+}
+
+/* grn_ii_builder_fin finalizes a builder. */
+static grn_rc
+grn_ii_builder_fin(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ if (builder->cinfos) {
+ GRN_FREE(builder->cinfos);
+ }
+ grn_ii_builder_chunk_fin(ctx, &builder->chunk);
+ grn_ii_builder_buffer_fin(ctx, &builder->buf);
+ if (builder->blocks) {
+ uint32_t i;
+ for (i = 0; i < builder->n_blocks; i++) {
+ grn_ii_builder_block_fin(ctx, &builder->blocks[i]);
+ }
+ GRN_FREE(builder->blocks);
+ }
+ if (builder->file_buf) {
+ GRN_FREE(builder->file_buf);
+ }
+ if (builder->fd != -1) {
+ grn_close(builder->fd);
+ if (grn_unlink(builder->path) == 0) {
+ GRN_LOG(ctx, GRN_LOG_INFO,
+ "[ii][builder][fin] removed path: <%-.256s>",
+ builder->path);
+ } else {
+ ERRNO_ERR("[ii][builder][fin] failed to remove path: <%-.256s>",
+ builder->path);
+ }
+ }
+ grn_ii_builder_fin_terms(ctx, builder);
+ if (builder->lexicon) {
+ grn_obj_close(ctx, builder->lexicon);
+ }
+ if (builder->srcs) {
+ GRN_FREE(builder->srcs);
+ }
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_open creates a builder. Note that a builder must be closed by
+ * grn_ii_builder_close.
+ */
+static grn_rc
+grn_ii_builder_open(grn_ctx *ctx, grn_ii *ii,
+ const grn_ii_builder_options *options,
+ grn_ii_builder **builder)
+{
+ grn_rc rc;
+ grn_ii_builder *new_builder = GRN_MALLOCN(grn_ii_builder, 1);
+ if (!new_builder) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ if (!options) {
+ options = &grn_ii_builder_default_options;
+ }
+ rc = grn_ii_builder_init(ctx, new_builder, ii, options);
+ if (rc != GRN_SUCCESS) {
+ GRN_FREE(new_builder);
+ return rc;
+ }
+ *builder = new_builder;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_close closes a builder. */
+static grn_rc
+grn_ii_builder_close(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_rc rc;
+ if (!builder) {
+ ERR(GRN_INVALID_ARGUMENT, "builder is null");
+ return ctx->rc;
+ }
+ rc = grn_ii_builder_fin(ctx, builder);
+ GRN_FREE(builder);
+ return rc;
+}
+
+/* grn_ii_builder_create_lexicon creates a block lexicon. */
+static grn_rc
+grn_ii_builder_create_lexicon(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_table_flags flags;
+ grn_obj *domain = grn_ctx_at(ctx, builder->ii->lexicon->header.domain);
+ grn_obj *range = grn_ctx_at(ctx, DB_OBJ(builder->ii->lexicon)->range);
+ grn_obj *tokenizer, *normalizer, *token_filters;
+ grn_rc rc = grn_table_get_info(ctx, builder->ii->lexicon, &flags, NULL,
+ &tokenizer, &normalizer, &token_filters);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ flags &= ~GRN_OBJ_PERSISTENT;
+ builder->lexicon = grn_table_create(ctx, NULL, 0, NULL,
+ flags, domain, range);
+ if (!builder->lexicon) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR, "[index] failed to create a block lexicon");
+ }
+ return ctx->rc;
+ }
+ builder->tokenizer = tokenizer;
+ builder->normalizer = normalizer;
+ rc = grn_obj_set_info(ctx, builder->lexicon,
+ GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
+ if (rc == GRN_SUCCESS) {
+ rc = grn_obj_set_info(ctx, builder->lexicon,
+ GRN_INFO_NORMALIZER, normalizer);
+ if (rc == GRN_SUCCESS) {
+ rc = grn_obj_set_info(ctx, builder->lexicon,
+ GRN_INFO_TOKEN_FILTERS, token_filters);
+ }
+ }
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
+ if (builder->options.lexicon_cache_size) {
+ rc = grn_pat_cache_enable(ctx, (grn_pat *)builder->lexicon,
+ builder->options.lexicon_cache_size);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_extend_terms extends a buffer for terms in order to make
+ * terms[n_terms - 1] available.
+ */
+static grn_rc
+grn_ii_builder_extend_terms(grn_ctx *ctx, grn_ii_builder *builder,
+ uint32_t n_terms)
+{
+ if (n_terms <= builder->n_terms) {
+ return GRN_SUCCESS;
+ }
+
+ if (n_terms > builder->max_n_terms) {
+ uint32_t i;
+ if (n_terms > builder->terms_size) {
+ /* Resize builder->terms for new terms. */
+ size_t n_bytes;
+ uint32_t terms_size = builder->terms_size ? builder->terms_size * 2 : 1;
+ grn_ii_builder_term *terms;
+ while (terms_size < n_terms) {
+ terms_size *= 2;
+ }
+ n_bytes = terms_size * sizeof(grn_ii_builder_term);
+ terms = (grn_ii_builder_term *)GRN_REALLOC(builder->terms, n_bytes);
+ if (!terms) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for terms: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ builder->terms = terms;
+ builder->terms_size = terms_size;
+ }
+ /* Initialize new terms. */
+ for (i = builder->max_n_terms; i < n_terms; i++) {
+ grn_ii_builder_term_init(ctx, &builder->terms[i]);
+ }
+ builder->max_n_terms = n_terms;
+ }
+
+ builder->n += n_terms - builder->n_terms;
+ builder->n_terms = n_terms;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_get_term gets a term associated with tid. */
+inline static grn_rc
+grn_ii_builder_get_term(grn_ctx *ctx, grn_ii_builder *builder, grn_id tid,
+ grn_ii_builder_term **term)
+{
+ uint32_t n_terms = tid;
+ if (n_terms > builder->n_terms) {
+ grn_rc rc = grn_ii_builder_extend_terms(ctx, builder, n_terms);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ *term = &builder->terms[tid - 1];
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_flush_file_buf flushes buffered data as a block. */
+static grn_rc
+grn_ii_builder_flush_file_buf(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ if (builder->file_buf_offset) {
+ ssize_t size = grn_write(builder->fd, builder->file_buf,
+ builder->file_buf_offset);
+ if ((uint64_t)size != builder->file_buf_offset) {
+ SERR("failed to write data: expected = %u, actual = %" GRN_FMT_INT64D,
+ builder->file_buf_offset, (int64_t)size);
+ }
+ builder->file_buf_offset = 0;
+ }
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_flush_term flushes a term and clears it */
+static grn_rc
+grn_ii_builder_flush_term(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_ii_builder_term *term)
+{
+ grn_rc rc;
+ uint8_t *term_buf;
+
+ /* Append sentinels. */
+ if (term->rid != GRN_ID_NIL) {
+ if (builder->ii->header->flags & GRN_OBJ_WITH_POSITION) {
+ rc = grn_ii_builder_term_append(ctx, term, 0);
+ } else {
+ rc = grn_ii_builder_term_append(ctx, term, term->pos_or_freq);
+ }
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ rc = grn_ii_builder_term_append(ctx, term, 0);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+
+ {
+ /* Put the global term ID. */
+ int key_size;
+ char key[GRN_TABLE_MAX_KEY_SIZE];
+ uint8_t *p;
+ uint32_t rest, value;
+ grn_rc rc;
+ grn_id local_tid = term - builder->terms + 1, global_tid;
+ key_size = grn_table_get_key(ctx, builder->lexicon, local_tid,
+ key, GRN_TABLE_MAX_KEY_SIZE);
+ if (!key_size) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR, "failed to get key: tid = %u", local_tid);
+ }
+ return ctx->rc;
+ }
+ global_tid = grn_table_add(ctx, builder->ii->lexicon, key, key_size, NULL);
+ if (global_tid == GRN_ID_NIL) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "failed to get global term ID: tid = %u, key = \"%.*s\"",
+ local_tid, key_size, key);
+ }
+ return ctx->rc;
+ }
+
+ rest = builder->options.file_buf_size - builder->file_buf_offset;
+ if (rest < 10) {
+ rc = grn_ii_builder_flush_file_buf(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ value = global_tid;
+ p = builder->file_buf + builder->file_buf_offset;
+ if (value < 1U << 5) {
+ p[0] = (uint8_t)value;
+ builder->file_buf_offset++;
+ } else if (value < 1U << 13) {
+ p[0] = (uint8_t)((value & 0x1f) | (1 << 5));
+ p[1] = (uint8_t)(value >> 5);
+ builder->file_buf_offset += 2;
+ } else {
+ uint8_t i, n;
+ if (value < 1U << 21) {
+ n = 3;
+ } else if (value < 1U << 29) {
+ n = 4;
+ } else {
+ n = 5;
+ }
+ p[0] = (uint8_t)(value & 0x1f) | ((n - 1) << 5);
+ value >>= 5;
+ for (i = 1; i < n; i++) {
+ p[i] = (uint8_t)value;
+ value >>= 8;
+ }
+ builder->file_buf_offset += n;
+ }
+ }
+
+ /* Flush a term buffer. */
+ term_buf = grn_ii_builder_term_get_buf(term);
+ if (term->offset > builder->options.file_buf_size) {
+ ssize_t size;
+ rc = grn_ii_builder_flush_file_buf(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ size = grn_write(builder->fd, term_buf, term->offset);
+ if ((uint64_t)size != term->offset) {
+ SERR("failed to write data: expected = %u, actual = %" GRN_FMT_INT64D,
+ term->offset, (int64_t)size);
+ }
+ } else {
+ uint32_t rest = builder->options.file_buf_size - builder->file_buf_offset;
+ if (term->offset <= rest) {
+ grn_memcpy(builder->file_buf + builder->file_buf_offset,
+ term_buf, term->offset);
+ builder->file_buf_offset += term->offset;
+ } else {
+ grn_memcpy(builder->file_buf + builder->file_buf_offset,
+ term_buf, rest);
+ builder->file_buf_offset += rest;
+ rc = grn_ii_builder_flush_file_buf(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->file_buf_offset = term->offset - rest;
+ grn_memcpy(builder->file_buf, term_buf + rest, builder->file_buf_offset);
+ }
+ }
+ grn_ii_builder_term_reinit(ctx, term);
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_create_file creates a temporary file and allocates memory for
+ * buffered output.
+ */
+static grn_rc
+grn_ii_builder_create_file(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_snprintf(builder->path, PATH_MAX, PATH_MAX,
+ "%-.256sXXXXXX", grn_io_path(builder->ii->seg));
+ builder->fd = grn_mkstemp(builder->path);
+ if (builder->fd == -1) {
+ SERR("failed to create a temporary file: path = \"%-.256s\"",
+ builder->path);
+ return ctx->rc;
+ }
+ builder->file_buf = (uint8_t *)GRN_MALLOC(builder->options.file_buf_size);
+ if (!builder->file_buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for buffered output: size = %u",
+ builder->options.file_buf_size);
+ return ctx->rc;
+ }
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_register_block registers a block. */
+static grn_rc
+grn_ii_builder_register_block(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_ii_builder_block *block;
+ uint64_t file_offset = grn_lseek(builder->fd, 0, SEEK_CUR);
+ if (file_offset == (uint64_t)-1) {
+ SERR("failed to get file offset");
+ return ctx->rc;
+ }
+ if (builder->n_blocks >= builder->blocks_size) {
+ size_t n_bytes;
+ uint32_t blocks_size = 1;
+ grn_ii_builder_block *blocks;
+ while (blocks_size <= builder->n_blocks) {
+ blocks_size *= 2;
+ }
+ n_bytes = blocks_size * sizeof(grn_ii_builder_block);
+ blocks = (grn_ii_builder_block *)GRN_REALLOC(builder->blocks, n_bytes);
+ if (!blocks) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for block: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ builder->blocks = blocks;
+ builder->blocks_size = blocks_size;
+ }
+ block = &builder->blocks[builder->n_blocks];
+ grn_ii_builder_block_init(ctx, block);
+ if (!builder->n_blocks) {
+ block->offset = 0;
+ } else {
+ grn_ii_builder_block *prev_block = &builder->blocks[builder->n_blocks - 1];
+ block->offset = prev_block->offset + prev_block->rest;
+ }
+ block->rest = (uint32_t)(file_offset - block->offset);
+ builder->n_blocks++;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_flush_block flushes a block to a temporary file. */
+static grn_rc
+grn_ii_builder_flush_block(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_rc rc;
+ grn_table_cursor *cursor;
+
+ if (!builder->n) {
+ /* Do nothing if there are no output data. */
+ return GRN_SUCCESS;
+ }
+ if (builder->fd == -1) {
+ rc = grn_ii_builder_create_file(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+
+ /* Flush terms into a temporary file. */
+ cursor = grn_table_cursor_open(ctx, builder->lexicon,
+ NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_KEY);
+ for (;;) {
+ grn_id tid = grn_table_cursor_next(ctx, cursor);
+ if (tid == GRN_ID_NIL) {
+ break;
+ }
+ rc = grn_ii_builder_flush_term(ctx, builder, &builder->terms[tid - 1]);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ grn_table_cursor_close(ctx, cursor);
+ rc = grn_ii_builder_flush_file_buf(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+
+ /* Register a block and clear the current data. */
+ rc = grn_ii_builder_register_block(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_table_truncate(ctx, builder->lexicon);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->rid = GRN_ID_NIL;
+ builder->n_terms = 0;
+ builder->n = 0;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_append_token appends a token. */
+static grn_rc
+grn_ii_builder_append_token(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_id rid, uint32_t sid, uint32_t weight,
+ grn_id tid, uint32_t pos)
+{
+ grn_rc rc;
+ uint32_t ii_flags = builder->ii->header->flags;
+ grn_ii_builder_term *term;
+ rc = grn_ii_builder_get_term(ctx, builder, tid, &term);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (rid != term->rid || sid != term->sid) {
+ uint64_t rsid;
+ if (term->rid != GRN_ID_NIL) {
+ if (ii_flags & GRN_OBJ_WITH_POSITION) {
+ /* Append the end of positions. */
+ rc = grn_ii_builder_term_append(ctx, term, 0);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->n++;
+ } else {
+ /* Append a frequency if positions are not available. */
+ rc = grn_ii_builder_term_append(ctx, term, term->pos_or_freq);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->n++;
+ }
+ }
+ rsid = ((uint64_t)(rid - term->rid) << builder->sid_bits) | (sid - 1);
+ rc = grn_ii_builder_term_append(ctx, term, rsid);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->n++;
+ if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
+ rc = grn_ii_builder_term_append(ctx, term, weight);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->n++;
+ }
+ term->rid = rid;
+ term->sid = sid;
+ term->pos_or_freq = 0;
+ }
+ if (ii_flags & GRN_OBJ_WITH_POSITION) {
+ rc = grn_ii_builder_term_append(ctx, term, pos - term->pos_or_freq);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->n++;
+ term->pos_or_freq = pos;
+ } else {
+ term->pos_or_freq++;
+ }
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_append_value appends a value. Note that values must be
+ * appended in ascending rid and sid order.
+ */
+static grn_rc
+grn_ii_builder_append_value(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_id rid, uint32_t sid, uint32_t weight,
+ const char *value, uint32_t value_size)
+{
+ uint32_t pos = 0;
+ grn_token_cursor *cursor;
+ if (rid != builder->rid) {
+ builder->rid = rid;
+ builder->sid = sid;
+ builder->pos = 1;
+ } else if (sid != builder->sid) {
+ builder->sid = sid;
+ builder->pos = 1;
+ } else {
+ /* Insert a space between values. */
+ builder->pos++;
+ }
+ if (value_size) {
+ if (!builder->tokenizer && !builder->normalizer) {
+ grn_id tid;
+ switch (builder->lexicon->header.type) {
+ case GRN_TABLE_PAT_KEY :
+ tid = grn_pat_add(ctx, (grn_pat *)builder->lexicon,
+ value, value_size, NULL, NULL);
+ break;
+ case GRN_TABLE_DAT_KEY :
+ tid = grn_dat_add(ctx, (grn_dat *)builder->lexicon,
+ value, value_size, NULL, NULL);
+ break;
+ case GRN_TABLE_HASH_KEY :
+ tid = grn_hash_add(ctx, (grn_hash *)builder->lexicon,
+ value, value_size, NULL, NULL);
+ break;
+ case GRN_TABLE_NO_KEY :
+ tid = *(grn_id *)value;
+ break;
+ default :
+ tid = GRN_ID_NIL;
+ break;
+ }
+ if (tid != GRN_ID_NIL) {
+ grn_rc rc;
+ pos = builder->pos;
+ rc = grn_ii_builder_append_token(ctx, builder, rid, sid,
+ weight, tid, pos);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ } else {
+ cursor = grn_token_cursor_open(ctx, builder->lexicon, value, value_size,
+ GRN_TOKEN_ADD, 0);
+ if (!cursor) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "grn_token_cursor_open failed: value = <%.*s>",
+ value_size, value);
+ }
+ return ctx->rc;
+ }
+ while (cursor->status == GRN_TOKEN_CURSOR_DOING) {
+ grn_id tid = grn_token_cursor_next(ctx, cursor);
+ if (tid != GRN_ID_NIL) {
+ grn_rc rc;
+ pos = builder->pos + cursor->pos;
+ rc = grn_ii_builder_append_token(ctx, builder, rid, sid,
+ weight, tid, pos);
+ if (rc != GRN_SUCCESS) {
+ break;
+ }
+ }
+ }
+ grn_token_cursor_close(ctx, cursor);
+ }
+ }
+ builder->pos = pos + 1;
+ return ctx->rc;
+}
+
+/* grn_ii_builder_append_obj appends a BULK, UVECTOR or VECTOR object. */
+static grn_rc
+grn_ii_builder_append_obj(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_id rid, uint32_t sid, grn_obj *obj)
+{
+ switch (obj->header.type) {
+ case GRN_BULK :
+ return grn_ii_builder_append_value(ctx, builder, rid, sid, 0,
+ GRN_TEXT_VALUE(obj), GRN_TEXT_LEN(obj));
+ case GRN_UVECTOR :
+ {
+ const char *p = GRN_BULK_HEAD(obj);
+ uint32_t i, n_values = grn_uvector_size(ctx, obj);
+ uint32_t value_size = grn_uvector_element_size(ctx, obj);
+ for (i = 0; i < n_values; i++) {
+ grn_rc rc = grn_ii_builder_append_value(ctx, builder, rid, sid, 0,
+ p, value_size);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ p += value_size;
+ }
+ }
+ return GRN_SUCCESS;
+ case GRN_VECTOR :
+ if (obj->u.v.body) {
+ /*
+ * Note that the following sections and n_sections don't correspond to
+ * source columns.
+ */
+ int i, n_secs = obj->u.v.n_sections;
+ grn_section *secs = obj->u.v.sections;
+ const char *head = GRN_BULK_HEAD(obj->u.v.body);
+ for (i = 0; i < n_secs; i++) {
+ grn_rc rc;
+ grn_section *sec = &secs[i];
+ if (sec->length == 0) {
+ continue;
+ }
+ if (builder->tokenizer) {
+ sid = i + 1;
+ }
+ rc = grn_ii_builder_append_value(ctx, builder, rid, sid, sec->weight,
+ head + sec->offset, sec->length);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ }
+ return GRN_SUCCESS;
+ default :
+ ERR(GRN_INVALID_ARGUMENT, "[index] invalid object assigned as value");
+ return ctx->rc;
+ }
+}
+
+/*
+ * grn_ii_builder_append_srcs reads values from source columns and appends the
+ * values.
+ */
+static grn_rc
+grn_ii_builder_append_srcs(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ size_t i;
+ grn_rc rc = GRN_SUCCESS;
+ grn_obj *objs;
+ grn_table_cursor *cursor;
+
+ /* Allocate memory for objects to store source values. */
+ objs = GRN_MALLOCN(grn_obj, builder->n_srcs);
+ if (!objs) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for objs: n_srcs = %u", builder->n_srcs);
+ return ctx->rc;
+ }
+
+ /* Create a cursor to get records in the ID order. */
+ cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0,
+ 0, -1, GRN_CURSOR_BY_ID);
+ if (!cursor) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_OBJECT_CORRUPT, "[index] failed to open table cursor");
+ }
+ GRN_FREE(objs);
+ return ctx->rc;
+ }
+
+ /* Read source values and append it. */
+ for (i = 0; i < builder->n_srcs; i++) {
+ GRN_TEXT_INIT(&objs[i], 0);
+ }
+ while (rc == GRN_SUCCESS) {
+ grn_id rid = grn_table_cursor_next(ctx, cursor);
+ if (rid == GRN_ID_NIL) {
+ break;
+ }
+ for (i = 0; i < builder->n_srcs; i++) {
+ grn_obj *obj = &objs[i];
+ grn_obj *src = builder->srcs[i];
+ rc = grn_obj_reinit_for(ctx, obj, src);
+ if (rc == GRN_SUCCESS) {
+ if (GRN_OBJ_TABLEP(src)) {
+ int len = grn_table_get_key2(ctx, src, rid, obj);
+ if (len <= 0) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR, "failed to get key: rid = %u, len = %d",
+ rid, len);
+ }
+ rc = ctx->rc;
+ }
+ } else {
+ if (!grn_obj_get_value(ctx, src, rid, obj)) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR, "failed to get value: rid = %u", rid);
+ }
+ rc = ctx->rc;
+ }
+ }
+ if (rc == GRN_SUCCESS) {
+ uint32_t sid = (uint32_t)(i + 1);
+ rc = grn_ii_builder_append_obj(ctx, builder, rid, sid, obj);
+ }
+ }
+ }
+ if (rc == GRN_SUCCESS && builder->n >= builder->options.block_threshold) {
+ rc = grn_ii_builder_flush_block(ctx, builder);
+ }
+ }
+ if (rc == GRN_SUCCESS) {
+ rc = grn_ii_builder_flush_block(ctx, builder);
+ }
+ for (i = 0; i < builder->n_srcs; i++) {
+ GRN_OBJ_FIN(ctx, &objs[i]);
+ }
+ grn_table_cursor_close(ctx, cursor);
+ GRN_FREE(objs);
+ return rc;
+}
+
+/* grn_ii_builder_set_src_table sets a source table. */
+static grn_rc
+grn_ii_builder_set_src_table(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ builder->src_table = grn_ctx_at(ctx, DB_OBJ(builder->ii)->range);
+ if (!builder->src_table) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_INVALID_ARGUMENT, "source table is null: range = %d",
+ DB_OBJ(builder->ii)->range);
+ }
+ return ctx->rc;
+ }
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_set_sid_bits calculates sid_bits and sid_mask. */
+static grn_rc
+grn_ii_builder_set_sid_bits(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ /* Calculate the number of bits required to represent a section ID. */
+ if (builder->n_srcs == 1 && builder->tokenizer &&
+ (builder->srcs[0]->header.flags & GRN_OBJ_COLUMN_VECTOR) != 0) {
+ /* If the source column is a vector column and the index has a tokenizer, */
+ /* the maximum sid equals to the maximum number of elements. */
+ size_t max_elems = 0;
+ grn_table_cursor *cursor;
+ grn_obj obj;
+ cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0,
+ 0, -1, GRN_CURSOR_BY_ID);
+ if (!cursor) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_OBJECT_CORRUPT, "[index] failed to open table cursor");
+ }
+ return ctx->rc;
+ }
+ GRN_TEXT_INIT(&obj, 0);
+ for (;;) {
+ grn_id rid = grn_table_cursor_next(ctx, cursor);
+ if (rid == GRN_ID_NIL) {
+ break;
+ }
+ if (!grn_obj_get_value(ctx, builder->srcs[0], rid, &obj)) {
+ continue;
+ }
+ if (obj.u.v.n_sections > max_elems) {
+ max_elems = obj.u.v.n_sections;
+ }
+ }
+ GRN_OBJ_FIN(ctx, &obj);
+ grn_table_cursor_close(ctx, cursor);
+ while (((uint32_t)1 << builder->sid_bits) < max_elems) {
+ builder->sid_bits++;
+ }
+ }
+ if (builder->sid_bits == 0) {
+ while (((uint32_t)1 << builder->sid_bits) < builder->n_srcs) {
+ builder->sid_bits++;
+ }
+ }
+ builder->sid_mask = ((uint64_t)1 << builder->sid_bits) - 1;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_set_srcs sets source columns. */
+static grn_rc
+grn_ii_builder_set_srcs(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ size_t i;
+ grn_id *source;
+ builder->n_srcs = builder->ii->obj.source_size / sizeof(grn_id);
+ source = (grn_id *)builder->ii->obj.source;
+ if (!source || !builder->n_srcs) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "source is not available: source = %p, source_size = %u",
+ builder->ii->obj.source, builder->ii->obj.source_size);
+ return ctx->rc;
+ }
+ builder->srcs = GRN_MALLOCN(grn_obj *, builder->n_srcs);
+ if (!builder->srcs) {
+ return GRN_NO_MEMORY_AVAILABLE;
+ }
+ for (i = 0; i < builder->n_srcs; i++) {
+ builder->srcs[i] = grn_ctx_at(ctx, source[i]);
+ if (!builder->srcs[i]) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_OBJECT_CORRUPT, "source not found: id = %d", source[i]);
+ }
+ return ctx->rc;
+ }
+ }
+ return grn_ii_builder_set_sid_bits(ctx, builder);
+}
+
+/* grn_ii_builder_append_source appends values in source columns. */
+static grn_rc
+grn_ii_builder_append_source(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_rc rc = grn_ii_builder_set_src_table(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (grn_table_size(ctx, builder->src_table) == 0) {
+ /* Nothing to do because there are no values. */
+ return ctx->rc;
+ }
+ /* Create a block lexicon. */
+ rc = grn_ii_builder_create_lexicon(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_ii_builder_set_srcs(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_ii_builder_append_srcs(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ grn_ii_builder_fin_terms(ctx, builder);
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ii_builder_fill_block reads the next data from a temporary file and fill
+ * a block buffer.
+ */
+static grn_rc
+grn_ii_builder_fill_block(grn_ctx *ctx, grn_ii_builder *builder,
+ uint32_t block_id)
+{
+ ssize_t size;
+ uint32_t buf_rest;
+ uint64_t file_offset;
+ grn_ii_builder_block *block = &builder->blocks[block_id];
+ if (!block->rest) {
+ return GRN_END_OF_DATA;
+ }
+ if (!block->buf) {
+ block->buf = (uint8_t *)GRN_MALLOC(builder->options.block_buf_size);
+ if (!block->buf) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for buffered input: size = %u",
+ builder->options.block_buf_size);
+ return ctx->rc;
+ }
+ }
+
+ /* Move the remaining data to the head. */
+ buf_rest = block->end - block->cur;
+ if (buf_rest) {
+ grn_memmove(block->buf, block->cur, buf_rest);
+ }
+ block->cur = block->buf;
+ block->end = block->buf + buf_rest;
+
+ /* Read the next data. */
+ file_offset = grn_lseek(builder->fd, block->offset, SEEK_SET);
+ if (file_offset != block->offset) {
+ SERR("failed to seek file: expected = %" GRN_FMT_INT64U
+ ", actual = %" GRN_FMT_INT64D,
+ block->offset, file_offset);
+ return ctx->rc;
+ }
+ buf_rest = builder->options.block_buf_size - buf_rest;
+ if (block->rest < buf_rest) {
+ buf_rest = block->rest;
+ }
+ size = grn_read(builder->fd, block->end, buf_rest);
+ if (size <= 0) {
+ SERR("failed to read data: expected = %u, actual = %" GRN_FMT_INT64D,
+ buf_rest, (int64_t)size);
+ return ctx->rc;
+ }
+ block->offset += size;
+ block->rest -= size;
+ block->end += size;
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_read_from_block reads the next value from a block. */
+static grn_rc
+grn_ii_builder_read_from_block(grn_ctx *ctx, grn_ii_builder *builder,
+ uint32_t block_id, uint64_t *value)
+{
+ grn_ii_builder_block *block = &builder->blocks[block_id];
+ grn_rc rc = grn_ii_builder_block_next(ctx, block, value);
+ if (rc == GRN_SUCCESS) {
+ return GRN_SUCCESS;
+ } else if (rc == GRN_END_OF_DATA) {
+ rc = grn_ii_builder_fill_block(ctx, builder, block_id);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ return grn_ii_builder_block_next(ctx, block, value);
+ }
+ return rc;
+}
+
+/* grn_ii_builder_pack_chunk tries to pack a chunk. */
+static grn_rc
+grn_ii_builder_pack_chunk(grn_ctx *ctx, grn_ii_builder *builder,
+ grn_bool *packed)
+{
+ grn_id rid;
+ uint32_t sid, pos, *a;
+ grn_ii_builder_chunk *chunk = &builder->chunk;
+ *packed = GRN_FALSE;
+ if (chunk->offset != 1) { /* df != 1 */
+ return GRN_SUCCESS;
+ }
+ if (chunk->weight_buf && chunk->weight_buf[0]) { /* weight != 0 */
+ return GRN_SUCCESS;
+ }
+ if (chunk->freq_buf[0] != 0) { /* freq != 1 */
+ return GRN_SUCCESS;
+ }
+ rid = chunk->rid_buf[0];
+ if (chunk->sid_buf) {
+ if (rid >= 0x100000) {
+ return GRN_SUCCESS;
+ }
+ sid = chunk->sid_buf[0] + 1;
+ if (sid >= 0x800) {
+ return GRN_SUCCESS;
+ }
+ a = array_get(ctx, builder->ii, chunk->tid);
+ if (!a) {
+ DEFINE_NAME(builder->ii);
+ MERR("[ii][builder][chunk][pack] failed to allocate an array: "
+ "<%.*s>: "
+ "<%u>:<%u>:<%u>",
+ name_size, name,
+ rid, sid, chunk->tid);
+ return ctx->rc;
+ }
+ a[0] = ((rid << 12) + (sid << 1)) | 1;
+ } else {
+ a = array_get(ctx, builder->ii, chunk->tid);
+ if (!a) {
+ DEFINE_NAME(builder->ii);
+ MERR("[ii][builder][chunk][pack] failed to allocate an array: "
+ "<%.*s>: "
+ "<%u>:<%u>",
+ name_size, name,
+ rid, chunk->tid);
+ return ctx->rc;
+ }
+ a[0] = (rid << 1) | 1;
+ }
+ pos = 0;
+ if (chunk->pos_buf) {
+ pos = chunk->pos_buf[0];
+ }
+ a[1] = pos;
+ array_unref(builder->ii, chunk->tid);
+ *packed = GRN_TRUE;
+
+ grn_ii_builder_chunk_clear(ctx, chunk);
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_get_cinfo returns a new cinfo. */
+static grn_rc
+grn_ii_builder_get_cinfo(grn_ctx *ctx, grn_ii_builder *builder,
+ chunk_info **cinfo)
+{
+ if (builder->n_cinfos == builder->cinfos_size) {
+ uint32_t size = builder->cinfos_size ? (builder->cinfos_size * 2) : 1;
+ size_t n_bytes = size * sizeof(chunk_info);
+ chunk_info *cinfos = (chunk_info *)GRN_REALLOC(builder->cinfos, n_bytes);
+ if (!cinfos) {
+ ERR(GRN_NO_MEMORY_AVAILABLE,
+ "failed to allocate memory for cinfos: n_bytes = %" GRN_FMT_SIZE,
+ n_bytes);
+ return ctx->rc;
+ }
+ builder->cinfos = cinfos;
+ builder->cinfos_size = size;
+ }
+ *cinfo = &builder->cinfos[builder->n_cinfos++];
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_flush_chunk flushes a chunk. */
+static grn_rc
+grn_ii_builder_flush_chunk(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_rc rc;
+ chunk_info *cinfo = NULL;
+ grn_ii_builder_chunk *chunk = &builder->chunk;
+ void *seg;
+ uint8_t *in;
+ uint32_t in_size, chunk_id, seg_id, seg_offset, seg_rest;
+
+ rc = grn_ii_builder_chunk_encode(ctx, chunk, NULL, 0);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ in = chunk->enc_buf;
+ in_size = chunk->enc_offset;
+
+ rc = chunk_new(ctx, builder->ii, &chunk_id, chunk->enc_offset);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+
+ /* Copy to the first segment. */
+ seg_id = chunk_id >> GRN_II_N_CHUNK_VARIATION;
+ seg_offset = (chunk_id & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) <<
+ GRN_II_W_LEAST_CHUNK;
+ GRN_IO_SEG_REF(builder->ii->chunk, seg_id, seg);
+ if (!seg) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "failed access chunk segment: chunk_id = %u, seg_id = %u",
+ chunk_id, seg_id);
+ }
+ return ctx->rc;
+ }
+ seg_rest = S_CHUNK - seg_offset;
+ if (in_size <= seg_rest) {
+ grn_memcpy((uint8_t *)seg + seg_offset, in, in_size);
+ in_size = 0;
+ } else {
+ grn_memcpy((uint8_t *)seg + seg_offset, in, seg_rest);
+ in += seg_rest;
+ in_size -= seg_rest;
+ }
+ GRN_IO_SEG_UNREF(builder->ii->chunk, seg_id);
+
+ /* Copy to the next segments. */
+ while (in_size) {
+ seg_id++;
+ GRN_IO_SEG_REF(builder->ii->chunk, seg_id, seg);
+ if (!seg) {
+ if (ctx->rc == GRN_SUCCESS) {
+ ERR(GRN_UNKNOWN_ERROR,
+ "failed access chunk segment: chunk_id = %u, seg_id = %u",
+ chunk_id, seg_id);
+ }
+ return ctx->rc;
+ }
+ if (in_size <= S_CHUNK) {
+ grn_memcpy(seg, in, in_size);
+ in_size = 0;
+ } else {
+ grn_memcpy(seg, in, S_CHUNK);
+ in += S_CHUNK;
+ in_size -= S_CHUNK;
+ }
+ GRN_IO_SEG_UNREF(builder->ii->chunk, seg_id);
+ }
+
+ /* Append a cinfo. */
+ rc = grn_ii_builder_get_cinfo(ctx, builder, &cinfo);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ cinfo->segno = chunk_id;
+ cinfo->size = chunk->enc_offset;
+ cinfo->dgap = chunk->rid_gap;
+
+ builder->buf.ii->header->total_chunk_size += chunk->enc_offset;
+ grn_ii_builder_chunk_clear(ctx, chunk);
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_read_to_chunk read values from a block to a chunk. */
+static grn_rc
+grn_ii_builder_read_to_chunk(grn_ctx *ctx, grn_ii_builder *builder,
+ uint32_t block_id)
+{
+ grn_rc rc;
+ uint64_t value;
+ uint32_t rid = GRN_ID_NIL, last_sid = 0;
+ uint32_t ii_flags = builder->ii->header->flags;
+ grn_ii_builder_chunk *chunk = &builder->chunk;
+
+ for (;;) {
+ uint32_t gap, freq;
+ uint64_t value;
+ rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (!value) {
+ break;
+ }
+ if (builder->chunk.offset == builder->chunk.size) {
+ rc = grn_ii_builder_chunk_extend_bufs(ctx, chunk, ii_flags);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+
+ /* Read record ID. */
+ gap = value >> builder->sid_bits; /* In-block gap */
+ if (gap) {
+ if (chunk->n >= builder->options.chunk_threshold) {
+ rc = grn_ii_builder_flush_chunk(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ last_sid = 0;
+ }
+ rid += gap;
+ gap = rid - chunk->rid; /* Global gap */
+ chunk->rid_buf[chunk->offset] = chunk->offset ? gap : rid;
+ chunk->n++;
+ chunk->rid = rid;
+ chunk->rid_gap += gap;
+ builder->df++;
+
+ /* Read section ID. */
+ if (ii_flags & GRN_OBJ_WITH_SECTION) {
+ uint32_t sid = (value & builder->sid_mask) + 1;
+ chunk->sid_buf[chunk->offset] = sid - last_sid - 1;
+ chunk->n++;
+ last_sid = sid;
+ }
+
+ /* Read weight. */
+ if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
+ uint32_t weight;
+ rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ weight = value;
+ chunk->weight_buf[chunk->offset] = weight;
+ chunk->n++;
+ }
+
+ /* Read positions or a frequency. */
+ if (ii_flags & GRN_OBJ_WITH_POSITION) {
+ uint32_t pos = -1;
+ freq = 0;
+ for (;;) {
+ rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (!value) {
+ break;
+ }
+ if (builder->chunk.pos_offset == builder->chunk.pos_size) {
+ rc = grn_ii_builder_chunk_extend_pos_buf(ctx, chunk);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ if (pos == -1) {
+ chunk->pos_buf[chunk->pos_offset] = value - 1;
+ chunk->pos_sum += value - 1;
+ } else {
+ chunk->pos_buf[chunk->pos_offset] = value;
+ chunk->pos_sum += value;
+ }
+ chunk->n++;
+ pos += value;
+ chunk->pos_offset++;
+ freq++;
+ }
+ } else {
+ rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ freq = value;
+ }
+ chunk->freq_buf[chunk->offset] = freq - 1;
+ chunk->n++;
+ chunk->offset++;
+ }
+ rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
+ if (rc == GRN_SUCCESS) {
+ builder->blocks[block_id].tid = value;
+ } else if (rc == GRN_END_OF_DATA) {
+ builder->blocks[block_id].tid = GRN_ID_NIL;
+ } else {
+ return rc;
+ }
+ return GRN_SUCCESS;
+}
+
+/* grn_ii_builder_register_chunks registers chunks. */
+static grn_rc
+grn_ii_builder_register_chunks(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ grn_rc rc;
+ uint32_t buf_tid, *a;
+ buffer_term *buf_term;
+
+ rc = grn_ii_builder_chunk_encode(ctx, &builder->chunk, builder->cinfos,
+ builder->n_cinfos);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+
+ if (!grn_ii_builder_buffer_is_assigned(ctx, &builder->buf)) {
+ rc = grn_ii_builder_buffer_assign(ctx, &builder->buf,
+ builder->chunk.enc_offset);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ buf_tid = builder->buf.buf->header.nterms;
+ if (buf_tid >= builder->options.buffer_max_n_terms ||
+ builder->buf.chunk_size - builder->buf.chunk_offset <
+ builder->chunk.enc_offset) {
+ rc = grn_ii_builder_buffer_flush(ctx, &builder->buf);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_ii_builder_buffer_assign(ctx, &builder->buf,
+ builder->chunk.enc_offset);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ buf_tid = 0;
+ }
+ buf_term = &builder->buf.buf->terms[buf_tid];
+ buf_term->tid = builder->chunk.tid;
+ if (builder->n_cinfos) {
+ buf_term->tid |= CHUNK_SPLIT;
+ }
+ buf_term->size_in_buffer = 0;
+ buf_term->pos_in_buffer = 0;
+ buf_term->size_in_chunk = builder->chunk.enc_offset;
+ buf_term->pos_in_chunk = builder->buf.chunk_offset;
+
+ grn_memcpy(builder->buf.chunk + builder->buf.chunk_offset,
+ builder->chunk.enc_buf, builder->chunk.enc_offset);
+ builder->buf.chunk_offset += builder->chunk.enc_offset;
+
+ a = array_get(ctx, builder->ii, builder->chunk.tid);
+ if (!a) {
+ DEFINE_NAME(builder->ii);
+ MERR("[ii][builder][chunk][register] "
+ "failed to allocate an array in segment: "
+ "<%.*s>: "
+ "tid=<%u>: max_n_segments=<%u>",
+ name_size, name,
+ builder->chunk.tid,
+ builder->ii->seg->header->max_segment);
+ return ctx->rc;
+ }
+ a[0] = SEG2POS(builder->buf.buf_id,
+ sizeof(buffer_header) + buf_tid * sizeof(buffer_term));
+ a[1] = builder->df;
+ array_unref(builder->ii, builder->chunk.tid);
+
+ builder->buf.buf->header.nterms++;
+ builder->n_cinfos = 0;
+ grn_ii_builder_chunk_clear(ctx, &builder->chunk);
+ return GRN_SUCCESS;
+}
+
+static grn_rc
+grn_ii_builder_commit(grn_ctx *ctx, grn_ii_builder *builder)
+{
+ uint32_t i;
+ grn_rc rc;
+ grn_table_cursor *cursor;
+
+ for (i = 0; i < builder->n_blocks; i++) {
+ uint64_t value;
+ rc = grn_ii_builder_read_from_block(ctx, builder, i, &value);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ builder->blocks[i].tid = value;
+ }
+
+ cursor = grn_table_cursor_open(ctx, builder->ii->lexicon,
+ NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_KEY);
+ for (;;) {
+ grn_id tid = grn_table_cursor_next(ctx, cursor);
+ if (tid == GRN_ID_NIL) {
+ break;
+ }
+ builder->chunk.tid = tid;
+ builder->chunk.rid = GRN_ID_NIL;
+ builder->df = 0;
+ for (i = 0; i < builder->n_blocks; i++) {
+ if (tid == builder->blocks[i].tid) {
+ rc = grn_ii_builder_read_to_chunk(ctx, builder, i);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ }
+ if (!builder->chunk.n) {
+ /* This term does not appear. */
+ continue;
+ }
+ if (!builder->n_cinfos) {
+ grn_bool packed;
+ rc = grn_ii_builder_pack_chunk(ctx, builder, &packed);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (packed) {
+ continue;
+ }
+ }
+ rc = grn_ii_builder_register_chunks(ctx, builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ grn_table_cursor_close(ctx, cursor);
+ if (grn_ii_builder_buffer_is_assigned(ctx, &builder->buf)) {
+ rc = grn_ii_builder_buffer_flush(ctx, &builder->buf);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_ii_build2(grn_ctx *ctx, grn_ii *ii, const grn_ii_builder_options *options)
+{
+ grn_rc rc, rc_close;
+ grn_ii_builder *builder;
+ rc = grn_ii_builder_open(ctx, ii, options, &builder);
+ if (rc == GRN_SUCCESS) {
+ rc = grn_ii_builder_append_source(ctx, builder);
+ if (rc == GRN_SUCCESS) {
+ rc = grn_ii_builder_commit(ctx, builder);
+ }
+ rc_close = grn_ii_builder_close(ctx, builder);
+ if (rc == GRN_SUCCESS) {
+ rc = rc_close;
+ }
+ }
+ return rc;
+}