diff options
Diffstat (limited to '')
31 files changed, 6297 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/dat.cpp b/storage/mroonga/vendor/groonga/lib/dat.cpp new file mode 100644 index 00000000..1446be7c --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat.cpp @@ -0,0 +1,1338 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ +#include "grn.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <cstring> +#include <new> +#include "grn_str.h" +#include "grn_io.h" +#include "grn_dat.h" +#include "grn_util.h" +#include "grn_normalizer.h" + +#include "dat/trie.hpp" +#include "dat/cursor-factory.hpp" + +namespace { + +const uint32_t FILE_ID_LENGTH = 3; + +class CriticalSection { + public: + CriticalSection() : lock_(NULL) {} + explicit CriticalSection(grn_critical_section *lock) : lock_(lock) { + CRITICAL_SECTION_ENTER(*lock_); + } + ~CriticalSection() { + leave(); + } + + void enter(grn_critical_section *lock) { + leave(); + lock_ = lock; + } + void leave() { + if (lock_ != NULL) { + CRITICAL_SECTION_LEAVE(*lock_); + lock_ = NULL; + } + } + + private: + grn_critical_section *lock_; + + // Disallows copy and assignment. + CriticalSection(const CriticalSection &); + CriticalSection &operator=(const CriticalSection &); +}; + +/* + grn_dat_remove_file() removes a file specified by `path' and then returns + true on success, false on failure. Note that grn_dat_remove_file() does not + change `ctx->rc'. + */ +bool +grn_dat_remove_file(grn_ctx *ctx, const char *path) +{ + struct stat stat; + + if (::stat(path, &stat) == -1) { + return false; + } + + if (grn_unlink(path) == -1) { + const char *system_message = grn_strerror(errno); + GRN_LOG(ctx, GRN_LOG_WARNING, + "[dat][remove-file] failed to remove path: %s: <%s>", + system_message, path); + return false; + } + + GRN_LOG(ctx, GRN_LOG_INFO, + "[dat][remove-file] removed: <%s>", path); + return true; +} + +grn_rc +grn_dat_translate_error_code(grn::dat::ErrorCode error_code) { + switch (error_code) { + case grn::dat::PARAM_ERROR: { + return GRN_INVALID_ARGUMENT; + } + case grn::dat::IO_ERROR: { + return GRN_INPUT_OUTPUT_ERROR; + } + case grn::dat::FORMAT_ERROR: { + return GRN_INVALID_FORMAT; + } + case grn::dat::MEMORY_ERROR: { + return GRN_NO_MEMORY_AVAILABLE; + } + case grn::dat::SIZE_ERROR: + case grn::dat::UNEXPECTED_ERROR: { + return GRN_UNKNOWN_ERROR; + } + case grn::dat::STATUS_ERROR: { + return GRN_FILE_CORRUPT; + } + default: { + return GRN_UNKNOWN_ERROR; + } + } +} + +void +grn_dat_init(grn_ctx *, grn_dat *dat) +{ + GRN_DB_OBJ_SET_TYPE(dat, GRN_TABLE_DAT_KEY); + dat->io = NULL; + dat->header = NULL; + dat->file_id = 0; + dat->encoding = GRN_ENC_DEFAULT; + dat->trie = NULL; + dat->old_trie = NULL; + dat->tokenizer = NULL; + dat->normalizer = NULL; + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + CRITICAL_SECTION_INIT(dat->lock); + dat->is_dirty = GRN_FALSE; +} + +void +grn_dat_fin(grn_ctx *ctx, grn_dat *dat) +{ + CRITICAL_SECTION_FIN(dat->lock); + delete static_cast<grn::dat::Trie *>(dat->old_trie); + delete static_cast<grn::dat::Trie *>(dat->trie); + dat->old_trie = NULL; + dat->trie = NULL; + if (dat->io) { + if (dat->is_dirty) { + uint32_t n_dirty_opens; + GRN_ATOMIC_ADD_EX(&(dat->header->n_dirty_opens), -1, n_dirty_opens); + } + grn_io_close(ctx, dat->io); + dat->io = NULL; + } + GRN_OBJ_FIN(ctx, &(dat->token_filters)); +} + +/* + grn_dat_generate_trie_path() generates the path from `base_path' and + `file_id'. The generated path is stored in `trie_path'. + */ +void +grn_dat_generate_trie_path(const char *base_path, char *trie_path, uint32_t file_id) +{ + if (!base_path || !base_path[0]) { + trie_path[0] = '\0'; + return; + } + const size_t len = std::strlen(base_path); + grn_memcpy(trie_path, base_path, len); + trie_path[len] = '.'; + grn_itoh(file_id % (1U << (4 * FILE_ID_LENGTH)), + trie_path + len + 1, FILE_ID_LENGTH); + trie_path[len + 1 + FILE_ID_LENGTH] = '\0'; +} + +bool +grn_dat_open_trie_if_needed(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat) { + ERR(GRN_INVALID_ARGUMENT, "dat is null"); + return false; + } + + const uint32_t file_id = dat->header->file_id; + if (!file_id || (dat->trie && (file_id <= dat->file_id))) { + /* + There is no need to open file when no trie file is available or the + current trie file is the latest one. + */ + return true; + } + + CriticalSection critical_section(&dat->lock); + + if (dat->trie && (file_id <= dat->file_id)) { + /* + There is no need to open file if the latest file has been opened by + another thread. + */ + return true; + } + + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, file_id); + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + grn::dat::Trie * const old_trie = static_cast<grn::dat::Trie *>(dat->old_trie); + grn::dat::Trie * const new_trie = new (std::nothrow) grn::dat::Trie; + if (!new_trie) { + MERR("new grn::dat::Trie failed"); + return false; + } + + if (trie_path[0] == '\0') { + try { + new_trie->create(trie_path); + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::create failed: %s", + ex.what()); + delete new_trie; + return false; + } + } else { + try { + new_trie->open(trie_path); + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::open failed: %s", + ex.what()); + delete new_trie; + return false; + } + } + + dat->old_trie = trie; + dat->trie = new_trie; + dat->file_id = file_id; + + critical_section.leave(); + + delete old_trie; + if (file_id >= 3) { + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, file_id - 2); + grn_dat_remove_file(ctx, trie_path); + } + return true; +} + +bool grn_dat_rebuild_trie(grn_ctx *ctx, grn_dat *dat) { + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + grn::dat::Trie * const new_trie = new (std::nothrow) grn::dat::Trie; + if (!new_trie) { + MERR("new grn::dat::Trie failed"); + return false; + } + + const uint32_t file_id = dat->header->file_id; + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, file_id + 1); + + for (uint64_t file_size = trie->file_size() * 2;; file_size *= 2) { + try { + new_trie->create(*trie, trie_path, file_size); + } catch (const grn::dat::SizeError &) { + continue; + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::open failed: %s", + ex.what()); + delete new_trie; + return false; + } + break; + } + + grn::dat::Trie * const old_trie = static_cast<grn::dat::Trie *>(dat->old_trie); + dat->old_trie = dat->trie; + dat->trie = new_trie; + dat->header->file_id = dat->file_id = file_id + 1; + + delete old_trie; + if (file_id >= 2) { + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, file_id - 1); + grn_dat_remove_file(ctx, trie_path); + } + return true; +} + +void grn_dat_cursor_init(grn_ctx *, grn_dat_cursor *cursor) { + GRN_DB_OBJ_SET_TYPE(cursor, GRN_CURSOR_TABLE_DAT_KEY); + cursor->dat = NULL; + cursor->cursor = NULL; + cursor->key = &grn::dat::Key::invalid_key(); + cursor->curr_rec = GRN_ID_NIL; +} + +void grn_dat_cursor_fin(grn_ctx *, grn_dat_cursor *cursor) { + delete static_cast<grn::dat::Cursor *>(cursor->cursor); + cursor->dat = NULL; + cursor->cursor = NULL; + cursor->key = &grn::dat::Key::invalid_key(); + cursor->curr_rec = GRN_ID_NIL; +} + +} // namespace + +extern "C" { + +grn_dat * +grn_dat_create(grn_ctx *ctx, const char *path, uint32_t, + uint32_t, uint32_t flags) +{ + if (path) { + if (path[0] == '\0') { + path = NULL; + } else if (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1))) { + ERR(GRN_FILENAME_TOO_LONG, "too long path"); + return NULL; + } + } + + grn_dat * const dat = static_cast<grn_dat *>(GRN_CALLOC(sizeof(grn_dat))); + if (!dat) { + return NULL; + } + grn_dat_init(ctx, dat); + + dat->io = grn_io_create(ctx, path, sizeof(struct grn_dat_header), + 4096, 0, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); + if (!dat->io) { + GRN_FREE(dat); + return NULL; + } + grn_io_set_type(dat->io, GRN_TABLE_DAT_KEY); + + dat->header = static_cast<struct grn_dat_header *>(grn_io_header(dat->io)); + if (!dat->header) { + grn_io_close(ctx, dat->io); + grn_dat_remove_file(ctx, path); + GRN_FREE(dat); + return NULL; + } + const grn_encoding encoding = (ctx->encoding != GRN_ENC_DEFAULT) ? + ctx->encoding : grn_gctx.encoding; + dat->header->flags = flags; + dat->header->encoding = encoding; + dat->header->tokenizer = GRN_ID_NIL; + dat->header->file_id = 0; + if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) { + dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE; + dat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); + dat->header->normalizer = grn_obj_id(ctx, dat->normalizer); + } else { + dat->normalizer = NULL; + dat->header->normalizer = GRN_ID_NIL; + } + dat->encoding = encoding; + dat->tokenizer = NULL; + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + + dat->obj.header.flags = dat->header->flags; + + return dat; +} + +grn_dat * +grn_dat_open(grn_ctx *ctx, const char *path) +{ + if (path && (std::strlen(path) >= (PATH_MAX - (FILE_ID_LENGTH + 1)))) { + ERR(GRN_FILENAME_TOO_LONG, "too long path"); + return NULL; + } + + grn_dat * const dat = static_cast<grn_dat *>(GRN_MALLOC(sizeof(grn_dat))); + if (!dat) { + return NULL; + } + + grn_dat_init(ctx, dat); + dat->io = grn_io_open(ctx, path, grn_io_auto); + if (!dat->io) { + GRN_FREE(dat); + return NULL; + } + + dat->header = (struct grn_dat_header *)grn_io_header(dat->io); + if (!dat->header) { + grn_io_close(ctx, dat->io); + GRN_FREE(dat); + return NULL; + } + dat->file_id = dat->header->file_id; + dat->encoding = dat->header->encoding; + dat->tokenizer = grn_ctx_at(ctx, dat->header->tokenizer); + if (dat->header->flags & GRN_OBJ_KEY_NORMALIZE) { + dat->header->flags &= ~GRN_OBJ_KEY_NORMALIZE; + dat->normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); + dat->header->normalizer = grn_obj_id(ctx, dat->normalizer); + } else { + dat->normalizer = grn_ctx_at(ctx, dat->header->normalizer); + } + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + dat->obj.header.flags = dat->header->flags; + return dat; +} + +grn_rc +grn_dat_close(grn_ctx *ctx, grn_dat *dat) +{ + if (dat) { + grn_dat_fin(ctx, dat); + GRN_FREE(dat); + } + return GRN_SUCCESS; +} + +grn_rc +grn_dat_remove(grn_ctx *ctx, const char *path) +{ + if (!path) { + ERR(GRN_INVALID_ARGUMENT, "path is null"); + return GRN_INVALID_ARGUMENT; + } + + grn_dat * const dat = grn_dat_open(ctx, path); + if (!dat) { + return ctx->rc; + } + const uint32_t file_id = dat->header->file_id; + grn_dat_close(ctx, dat); + + /* + grn_dat_remove() tries to remove (file_id + 1)th trie file because + grn::dat::Trie::create() might leave an incomplete file on failure. + */ + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(path, trie_path, file_id + 1); + grn_dat_remove_file(ctx, trie_path); + for (uint32_t i = file_id; i > 0; --i) { + grn_dat_generate_trie_path(path, trie_path, i); + if (!grn_dat_remove_file(ctx, trie_path)) { + break; + } + } + + /* + grn_io_remove() reports an error when it fails to remove `path'. + */ + return grn_io_remove(ctx, path); +} + +grn_id +grn_dat_get(grn_ctx *ctx, grn_dat *dat, const void *key, + unsigned int key_size, void **) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return GRN_ID_NIL; + } + const grn::dat::Trie * const trie = static_cast<const grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_ID_NIL; + } + grn::dat::UInt32 key_pos; + try { + if (trie->search(key, key_size, &key_pos)) { + return trie->get_key(key_pos).id(); + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::search failed: %s", + ex.what()); + } + return GRN_ID_NIL; +} + +grn_id +grn_dat_add(grn_ctx *ctx, grn_dat *dat, const void *key, + unsigned int key_size, void **, int *added) +{ + if (!key_size) { + return GRN_ID_NIL; + } else if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return GRN_ID_NIL; + } + + if (!dat->trie) { + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, 1); + grn::dat::Trie * const new_trie = new (std::nothrow) grn::dat::Trie; + if (!new_trie) { + MERR("new grn::dat::Trie failed"); + return GRN_ID_NIL; + } + try { + new_trie->create(trie_path); + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::create failed: %s", + ex.what()); + delete new_trie; + return GRN_ID_NIL; + } + dat->trie = new_trie; + dat->file_id = dat->header->file_id = 1; + } + + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + try { + grn::dat::UInt32 key_pos; + const bool res = trie->insert(key, key_size, &key_pos); + if (added) { + *added = res ? 1 : 0; + } + return trie->get_key(key_pos).id(); + } catch (const grn::dat::SizeError &) { + if (!grn_dat_rebuild_trie(ctx, dat)) { + return GRN_ID_NIL; + } + grn::dat::Trie * const new_trie = static_cast<grn::dat::Trie *>(dat->trie); + grn::dat::UInt32 key_pos; + const bool res = new_trie->insert(key, key_size, &key_pos); + if (added) { + *added = res ? 1 : 0; + } + return new_trie->get_key(key_pos).id(); + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::insert failed: %s", + ex.what()); + return GRN_ID_NIL; + } +} + +int +grn_dat_get_key(grn_ctx *ctx, grn_dat *dat, grn_id id, void *keybuf, int bufsize) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return 0; + } + const grn::dat::Trie * const trie = static_cast<const grn::dat::Trie *>(dat->trie); + if (!trie) { + return 0; + } + const grn::dat::Key &key = trie->ith_key(id); + if (!key.is_valid()) { + return 0; + } + if (keybuf && (bufsize >= (int)key.length())) { + grn_memcpy(keybuf, key.ptr(), key.length()); + } + return (int)key.length(); +} + +int +grn_dat_get_key2(grn_ctx *ctx, grn_dat *dat, grn_id id, grn_obj *bulk) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return 0; + } + const grn::dat::Trie * const trie = static_cast<const grn::dat::Trie *>(dat->trie); + if (!trie) { + return 0; + } + const grn::dat::Key &key = trie->ith_key(id); + if (!key.is_valid()) { + return 0; + } + if (bulk->header.impl_flags & GRN_OBJ_REFER) { + bulk->u.b.head = static_cast<char *>(const_cast<void *>(key.ptr())); + bulk->u.b.curr = bulk->u.b.head + key.length(); + } else { + grn_bulk_write(ctx, bulk, static_cast<const char *>(key.ptr()), key.length()); + } + return (int)key.length(); +} + +grn_rc +grn_dat_delete_by_id(grn_ctx *ctx, grn_dat *dat, grn_id id, + grn_table_delete_optarg *optarg) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } else if (!dat->trie || (id == GRN_ID_NIL)) { + return GRN_INVALID_ARGUMENT; + } + + if (optarg && optarg->func) { + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->ith_entry(id).is_valid()) { + return GRN_INVALID_ARGUMENT; + } else if (!optarg->func(ctx, reinterpret_cast<grn_obj *>(dat), id, optarg->func_arg)) { + return GRN_SUCCESS; + } + } + + try { + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->remove(id)) { + return GRN_INVALID_ARGUMENT; + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::remove failed: %s", + ex.what()); + return ctx->rc; + } + return GRN_SUCCESS; +} + +grn_rc +grn_dat_delete(grn_ctx *ctx, grn_dat *dat, const void *key, unsigned int key_size, + grn_table_delete_optarg *optarg) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } else if (!dat->trie || !key || !key_size) { + return GRN_INVALID_ARGUMENT; + } + + if (optarg && optarg->func) { + try { + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + grn::dat::UInt32 key_pos; + if (!trie->search(key, key_size, &key_pos)) { + return GRN_INVALID_ARGUMENT; + } else if (!optarg->func(ctx, reinterpret_cast<grn_obj *>(dat), + trie->get_key(key_pos).id(), optarg->func_arg)) { + return GRN_SUCCESS; + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::search failed: %s", + ex.what()); + return ctx->rc; + } + } + + try { + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->remove(key, key_size)) { + return GRN_INVALID_ARGUMENT; + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::remove failed: %s", + ex.what()); + return ctx->rc; + } + return GRN_SUCCESS; +} + +grn_rc +grn_dat_update_by_id(grn_ctx *ctx, grn_dat *dat, grn_id src_key_id, + const void *dest_key, unsigned int dest_key_size) +{ + if (!dest_key_size) { + return GRN_INVALID_ARGUMENT; + } else if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } else if (!dat->trie) { + return GRN_INVALID_ARGUMENT; + } + try { + try { + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->update(src_key_id, dest_key, dest_key_size)) { + return GRN_INVALID_ARGUMENT; + } + } catch (const grn::dat::SizeError &) { + if (!grn_dat_rebuild_trie(ctx, dat)) { + return ctx->rc; + } + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->update(src_key_id, dest_key, dest_key_size)) { + return GRN_INVALID_ARGUMENT; + } + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::update failed: %s", + ex.what()); + return ctx->rc; + } + return GRN_SUCCESS; +} + +grn_rc +grn_dat_update(grn_ctx *ctx, grn_dat *dat, + const void *src_key, unsigned int src_key_size, + const void *dest_key, unsigned int dest_key_size) +{ + if (!dest_key_size) { + return GRN_INVALID_ARGUMENT; + } else if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } else if (!dat->trie) { + return GRN_INVALID_ARGUMENT; + } + try { + try { + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->update(src_key, src_key_size, dest_key, dest_key_size)) { + return GRN_INVALID_ARGUMENT; + } + } catch (const grn::dat::SizeError &) { + if (!grn_dat_rebuild_trie(ctx, dat)) { + return ctx->rc; + } + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie->update(src_key, src_key_size, dest_key, dest_key_size)) { + return GRN_INVALID_ARGUMENT; + } + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::update failed: %s", + ex.what()); + return ctx->rc; + } + return GRN_SUCCESS; +} + +int +grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str, + unsigned int str_size, grn_dat_scan_hit *scan_hits, + unsigned int max_num_scan_hits, const char **str_rest) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat) || !str || + !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) { + if (str_rest) { + *str_rest = str; + } + return -1; + } + + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + if (str_rest) { + *str_rest = str + str_size; + } + return 0; + } + + if (!max_num_scan_hits || !str_size) { + if (str_rest) { + *str_rest = str; + } + return 0; + } + + unsigned int num_scan_hits = 0; + try { + if (dat->normalizer) { + int flags = GRN_STRING_WITH_CHECKS; + grn_obj * const normalized_string = grn_string_open(ctx, str, str_size, + dat->normalizer, + flags); + if (!normalized_string) { + if (str_rest) { + *str_rest = str; + } + return -1; + } + grn_string_get_normalized(ctx, normalized_string, &str, &str_size, NULL); + const short *checks = grn_string_get_checks(ctx, normalized_string); + unsigned int offset = 0; + while (str_size) { + if (*checks) { + grn::dat::UInt32 key_pos; + if (trie->lcp_search(str, str_size, &key_pos)) { + const grn::dat::Key &key = trie->get_key(key_pos); + const grn::dat::UInt32 key_length = key.length(); + if ((key_length == str_size) || (checks[key_length])) { + unsigned int length = 0; + for (grn::dat::UInt32 i = 0; i < key_length; ++i) { + if (checks[i] > 0) { + length += checks[i]; + } + } + scan_hits[num_scan_hits].id = key.id(); + scan_hits[num_scan_hits].offset = offset; + scan_hits[num_scan_hits].length = length; + offset += length; + str += key_length; + str_size -= key_length; + checks += key_length; + if (++num_scan_hits >= max_num_scan_hits) { + break; + } + continue; + } + } + if (*checks > 0) { + offset += *checks; + } + } + ++str; + --str_size; + ++checks; + } + if (str_rest) { + grn_string_get_original(ctx, normalized_string, str_rest, NULL); + *str_rest += offset; + } + grn_obj_close(ctx, normalized_string); + } else { + const char * const begin = str; + while (str_size) { + grn::dat::UInt32 key_pos; + if (trie->lcp_search(str, str_size, &key_pos)) { + const grn::dat::Key &key = trie->get_key(key_pos); + scan_hits[num_scan_hits].id = key.id(); + scan_hits[num_scan_hits].offset = str - begin; + scan_hits[num_scan_hits].length = key.length(); + str += key.length(); + str_size -= key.length(); + if (++num_scan_hits >= max_num_scan_hits) { + break; + } + } else { + const int char_length = grn_charlen(ctx, str, str + str_size); + if (char_length) { + str += char_length; + str_size -= char_length; + } else { + ++str; + --str_size; + } + } + } + if (str_rest) { + *str_rest = str; + } + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::lcp_search failed: %s", + ex.what()); + if (str_rest) { + *str_rest = str; + } + return -1; + } + return static_cast<int>(num_scan_hits); +} + +grn_id +grn_dat_lcp_search(grn_ctx *ctx, grn_dat *dat, + const void *key, unsigned int key_size) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat) || !key || + !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE)) { + return GRN_ID_NIL; + } + + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_ID_NIL; + } + + try { + grn::dat::UInt32 key_pos; + if (!trie->lcp_search(key, key_size, &key_pos)) { + return GRN_ID_NIL; + } + return trie->get_key(key_pos).id(); + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::PrefixCursor::open failed: %s", + ex.what()); + return GRN_ID_NIL; + } +} + +unsigned int +grn_dat_size(grn_ctx *ctx, grn_dat *dat) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return 0; + } + const grn::dat::Trie * const trie = static_cast<const grn::dat::Trie *>(dat->trie); + if (trie) { + return trie->num_keys(); + } + return 0; +} + +grn_dat_cursor * +grn_dat_cursor_open(grn_ctx *ctx, grn_dat *dat, + const void *min, unsigned int min_size, + const void *max, unsigned int max_size, + int offset, int limit, int flags) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return NULL; + } + + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + grn_dat_cursor * const dc = + static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor))); + if (dc) { + grn_dat_cursor_init(ctx, dc); + } + return dc; + } + + grn_dat_cursor * const dc = + static_cast<grn_dat_cursor *>(GRN_MALLOC(sizeof(grn_dat_cursor))); + if (!dc) { + return NULL; + } + grn_dat_cursor_init(ctx, dc); + + try { + if ((flags & GRN_CURSOR_BY_ID) != 0) { + dc->cursor = grn::dat::CursorFactory::open(*trie, + min, min_size, max, max_size, offset, limit, + grn::dat::ID_RANGE_CURSOR | + ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | + ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) | + ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0)); + } else if ((flags & GRN_CURSOR_PREFIX) != 0) { + if (max && max_size) { + if ((dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) != 0) { + dc->cursor = grn::dat::CursorFactory::open(*trie, + NULL, min_size, max, max_size, offset, limit, + grn::dat::PREFIX_CURSOR | grn::dat::DESCENDING_CURSOR); + } else { + // TODO: near + } + } else if (min && min_size) { + if ((flags & GRN_CURSOR_RK) != 0) { + // TODO: rk search + } else { + dc->cursor = grn::dat::CursorFactory::open(*trie, + min, min_size, NULL, 0, offset, limit, + grn::dat::PREDICTIVE_CURSOR | + ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | + ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_EXACT_MATCH : 0)); + } + } + } else { + dc->cursor = grn::dat::CursorFactory::open(*trie, + min, min_size, max, max_size, offset, limit, + grn::dat::KEY_RANGE_CURSOR | + ((flags & GRN_CURSOR_DESCENDING) ? grn::dat::DESCENDING_CURSOR : 0) | + ((flags & GRN_CURSOR_GT) ? grn::dat::EXCEPT_LOWER_BOUND : 0) | + ((flags & GRN_CURSOR_LT) ? grn::dat::EXCEPT_UPPER_BOUND : 0)); + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::CursorFactory::open failed: %s", + ex.what()); + GRN_FREE(dc); + return NULL; + } + if (!dc->cursor) { + ERR(GRN_INVALID_ARGUMENT, "unsupported query"); + GRN_FREE(dc); + return NULL; + } + dc->dat = dat; + return dc; +} + +grn_id +grn_dat_cursor_next(grn_ctx *ctx, grn_dat_cursor *c) +{ + if (!c || !c->cursor) { + return GRN_ID_NIL; + } + try { + grn::dat::Cursor * const cursor = static_cast<grn::dat::Cursor *>(c->cursor); + const grn::dat::Key &key = cursor->next(); + c->key = &key; + c->curr_rec = key.is_valid() ? key.id() : GRN_ID_NIL; + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Cursor::next failed: %s", + ex.what()); + return GRN_ID_NIL; + } + return c->curr_rec; +} + +void +grn_dat_cursor_close(grn_ctx *ctx, grn_dat_cursor *c) +{ + if (c) { + grn_dat_cursor_fin(ctx, c); + GRN_FREE(c); + } +} + +int +grn_dat_cursor_get_key(grn_ctx *ctx, grn_dat_cursor *c, const void **key) +{ + if (c) { + const grn::dat::Key &key_ref = *static_cast<const grn::dat::Key *>(c->key); + if (key_ref.is_valid()) { + *key = key_ref.ptr(); + return (int)key_ref.length(); + } + } + return 0; +} + +grn_rc +grn_dat_cursor_delete(grn_ctx *ctx, grn_dat_cursor *c, + grn_table_delete_optarg *optarg) +{ + if (!c || !c->cursor) { + return GRN_INVALID_ARGUMENT; + } else if (!grn_dat_open_trie_if_needed(ctx, c->dat)) { + return ctx->rc; + } + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(c->dat->trie); + if (!trie) { + return GRN_INVALID_ARGUMENT; + } + try { + if (trie->remove(c->curr_rec)) { + return GRN_SUCCESS; + } + } catch (const grn::dat::Exception &ex) { + ERR(grn_dat_translate_error_code(ex.code()), + "grn::dat::Trie::remove failed: %s", + ex.what()); + return GRN_INVALID_ARGUMENT; + } + return GRN_INVALID_ARGUMENT; +} + +grn_id +grn_dat_curr_id(grn_ctx *ctx, grn_dat *dat) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return GRN_ID_NIL; + } + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (trie) { + return trie->max_key_id(); + } + return GRN_ID_NIL; +} + +grn_rc +grn_dat_truncate(grn_ctx *ctx, grn_dat *dat) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } + const grn::dat::Trie * const trie = static_cast<const grn::dat::Trie *>(dat->trie); + if (!trie || !trie->max_key_id()) { + return GRN_SUCCESS; + } + + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, dat->header->file_id + 1); + try { + grn::dat::Trie().create(trie_path); + } catch (const grn::dat::Exception &ex) { + const grn_rc error_code = grn_dat_translate_error_code(ex.code()); + ERR(error_code, "grn::dat::Trie::create failed: %s", ex.what()); + return error_code; + } + ++dat->header->file_id; + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } + return GRN_SUCCESS; +} + +const char * +_grn_dat_key(grn_ctx *ctx, grn_dat *dat, grn_id id, uint32_t *key_size) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + *key_size = 0; + return NULL; + } + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + *key_size = 0; + return NULL; + } + const grn::dat::Key &key = trie->ith_key(id); + if (!key.is_valid()) { + *key_size = 0; + return NULL; + } + *key_size = key.length(); + return static_cast<const char *>(key.ptr()); +} + +grn_id +grn_dat_next(grn_ctx *ctx, grn_dat *dat, grn_id id) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return GRN_ID_NIL; + } + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_ID_NIL; + } + while (id < trie->max_key_id()) { + if (trie->ith_key(++id).is_valid()) { + return id; + } + } + return GRN_ID_NIL; +} + +grn_id +grn_dat_at(grn_ctx *ctx, grn_dat *dat, grn_id id) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return GRN_ID_NIL; + } + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_ID_NIL; + } + const grn::dat::Key &key = trie->ith_key(id); + if (!key.is_valid()) { + return GRN_ID_NIL; + } + return id; +} + +grn_rc +grn_dat_clear_status_flags(grn_ctx *ctx, grn_dat *dat) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_INVALID_ARGUMENT; + } + trie->clear_status_flags(); + return GRN_SUCCESS; +} + +grn_rc +grn_dat_repair(grn_ctx *ctx, grn_dat *dat) +{ + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } + const grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + if (!trie) { + return GRN_INVALID_ARGUMENT; + } + + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), trie_path, dat->header->file_id + 1); + try { + grn::dat::Trie().repair(*trie, trie_path); + } catch (const grn::dat::Exception &ex) { + const grn_rc error_code = grn_dat_translate_error_code(ex.code()); + ERR(error_code, "grn::dat::Trie::create failed: %s", ex.what()); + return error_code; + } + ++dat->header->file_id; + if (!grn_dat_open_trie_if_needed(ctx, dat)) { + return ctx->rc; + } + return GRN_SUCCESS; +} + +grn_rc +grn_dat_flush(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat->io) { + return GRN_SUCCESS; + } + + grn_rc rc = grn_io_flush(ctx, dat->io); + if (rc != GRN_SUCCESS) { + return rc; + } + + if (dat->trie) { + grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie); + try { + trie->flush(); + } catch (const grn::dat::Exception &ex) { + const grn_rc error_code = grn_dat_translate_error_code(ex.code()); + if (error_code == GRN_INPUT_OUTPUT_ERROR) { + SERR("grn::dat::Trie::flush failed: %s", ex.what()); + } else { + ERR(error_code, "grn::dat::Trie::flush failed: %s", ex.what()); + } + return error_code; + } + } + + return GRN_SUCCESS; +} + +grn_rc +grn_dat_dirty(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat->io) { + return GRN_SUCCESS; + } + + grn_rc rc = GRN_SUCCESS; + + { + CriticalSection critical_section(&dat->lock); + if (!dat->is_dirty) { + uint32_t n_dirty_opens; + dat->is_dirty = GRN_TRUE; + GRN_ATOMIC_ADD_EX(&(dat->header->n_dirty_opens), 1, n_dirty_opens); + rc = grn_io_flush(ctx, dat->io); + } + } + + return rc; +} + +grn_bool +grn_dat_is_dirty(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat->header) { + return GRN_FALSE; + } + + return dat->header->n_dirty_opens > 0; +} + +grn_rc +grn_dat_clean(grn_ctx *ctx, grn_dat *dat) +{ + grn_rc rc = GRN_SUCCESS; + + if (!dat->io) { + return rc; + } + + { + CriticalSection critical_section(&dat->lock); + if (dat->is_dirty) { + uint32_t n_dirty_opens; + dat->is_dirty = GRN_FALSE; + GRN_ATOMIC_ADD_EX(&(dat->header->n_dirty_opens), -1, n_dirty_opens); + rc = grn_io_flush(ctx, dat->io); + } + } + + return rc; +} + +grn_rc +grn_dat_clear_dirty(grn_ctx *ctx, grn_dat *dat) +{ + grn_rc rc = GRN_SUCCESS; + + if (!dat->io) { + return rc; + } + + { + CriticalSection critical_section(&dat->lock); + dat->is_dirty = GRN_FALSE; + dat->header->n_dirty_opens = 0; + rc = grn_io_flush(ctx, dat->io); + } + + return rc; +} + +grn_bool +grn_dat_is_corrupt(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat->io) { + return GRN_FALSE; + } + + { + CriticalSection critical_section(&dat->lock); + + if (grn_io_is_corrupt(ctx, dat->io)) { + return GRN_TRUE; + } + + if (dat->header->file_id == 0) { + return GRN_FALSE; + } + + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), + trie_path, + dat->header->file_id); + struct stat stat; + if (::stat(trie_path, &stat) != 0) { + SERR("[dat][corrupt] used path doesn't exist: <%s>", + trie_path); + return GRN_TRUE; + } + } + + return GRN_FALSE; +} + +size_t +grn_dat_get_disk_usage(grn_ctx *ctx, grn_dat *dat) +{ + if (!dat->io) { + return 0; + } + + { + CriticalSection critical_section(&dat->lock); + size_t usage; + + usage = grn_io_get_disk_usage(ctx, dat->io); + + if (dat->header->file_id == 0) { + return usage; + } + + char trie_path[PATH_MAX]; + grn_dat_generate_trie_path(grn_io_path(dat->io), + trie_path, + dat->header->file_id); + struct stat stat; + if (::stat(trie_path, &stat) == 0) { + usage += stat.st_size; + } + + return usage; + } +} + +} // extern "C" diff --git a/storage/mroonga/vendor/groonga/lib/dat/Makefile.am b/storage/mroonga/vendor/groonga/lib/dat/Makefile.am new file mode 100644 index 00000000..0a58629c --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/Makefile.am @@ -0,0 +1,11 @@ +DEFS += -D_REENTRANT $(GRN_DEFS) -DGRN_DAT_EXPORT + +DEFAULT_INCLUDES = \ + -I$(top_builddir) \ + -I$(top_srcdir)/include + +noinst_LTLIBRARIES = libgrndat.la + +include sources.am + +CLEANFILES = *.gcno *.gcda diff --git a/storage/mroonga/vendor/groonga/lib/dat/array.hpp b/storage/mroonga/vendor/groonga/lib/dat/array.hpp new file mode 100644 index 00000000..de60e3bd --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/array.hpp @@ -0,0 +1,98 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// This class is used to detect an out-of-range access in debug mode. +template <typename T> +class GRN_DAT_API Array { + public: + Array() : ptr_(NULL), size_(0) {} + Array(void *ptr, UInt32 size) : ptr_(static_cast<T *>(ptr)), size_(size) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0)); + } + template <UInt32 U> + explicit Array(T (&array)[U]) : ptr_(array), size_(U) {} + ~Array() {} + + const T &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= size_); + return ptr_[i]; + } + T &operator[](UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= size_); + return ptr_[i]; + } + + const T *begin() const { + return ptr(); + } + T *begin() { + return ptr(); + } + + const T *end() const { + return ptr() + size(); + } + T *end() { + return ptr() + size(); + } + + void assign(void *ptr, UInt32 size) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0)); + ptr_ = static_cast<T *>(ptr); + size_ = size; + } + template <UInt32 U> + void assign(T (&array)[U]) { + assign(array, U); + } + + void swap(Array *rhs) { + T * const temp_ptr = ptr_; + ptr_ = rhs->ptr_; + rhs->ptr_ = temp_ptr; + + const UInt32 temp_size = size_; + size_ = rhs->size_; + rhs->size_ = temp_size; + } + + T *ptr() const { + return ptr_; + } + UInt32 size() const { + return size_; + } + + private: + T *ptr_; + UInt32 size_; + + // Disallows copy and assignment. + Array(const Array &); + Array &operator=(const Array &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/base.hpp b/storage/mroonga/vendor/groonga/lib/dat/base.hpp new file mode 100644 index 00000000..51ec6f2f --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/base.hpp @@ -0,0 +1,67 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// The most significant bit represents whether or not the node is a linker. +// BASE of a linker represents the position of its associated key and BASE of +// a non-linker represents the offset to its child nodes. +class GRN_DAT_API Base { + public: + Base() : value_(0) {} + + bool operator==(const Base &rhs) const { + return value_ == rhs.value_; + } + + bool is_linker() const { + return (value_ & IS_LINKER_FLAG) == IS_LINKER_FLAG; + } + UInt32 offset() const { + GRN_DAT_DEBUG_THROW_IF(is_linker()); + return value_; + } + UInt32 key_pos() const { + GRN_DAT_DEBUG_THROW_IF(!is_linker()); + return value_ & ~IS_LINKER_FLAG; + } + + void set_offset(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0); + GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET); + value_ = x; + } + void set_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0); + GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET); + value_ = IS_LINKER_FLAG | x; + } + + private: + UInt32 value_; + + static const UInt32 IS_LINKER_FLAG = 0x80000000U; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/block.hpp b/storage/mroonga/vendor/groonga/lib/dat/block.hpp new file mode 100644 index 00000000..34e3620a --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/block.hpp @@ -0,0 +1,94 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Block { + public: + Block() : next_(0), prev_(0), first_phantom_(0), num_phantoms_(0) {} + + // Blocks in the same level are stored in a doubly-linked list which is + // represented by the following next() and prev(). + UInt32 next() const { + return next_ / BLOCK_SIZE; + } + UInt32 prev() const { + return prev_ / BLOCK_SIZE; + } + + // A level indicates how easyily find_offset() can find a good offset in that + // block. It is easier in lower level blocks. + UInt32 level() const { + return next_ & BLOCK_MASK; + } + // A block level rises when find_offset() fails to find a good offset + // MAX_FAILURE_COUNT times in that block. + UInt32 failure_count() const { + return prev_ & BLOCK_MASK; + } + + UInt32 first_phantom() const { + return first_phantom_; + } + UInt32 num_phantoms() const { + return num_phantoms_; + } + + void set_next(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_ID); + next_ = (next_ & BLOCK_MASK) | (x * BLOCK_SIZE); + } + void set_prev(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_ID); + prev_ = (prev_ & BLOCK_MASK) | (x * BLOCK_SIZE); + } + + void set_level(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_LEVEL); + GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); + next_ = (next_ & ~BLOCK_MASK) | x; + } + void set_failure_count(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_FAILURE_COUNT); + GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); + prev_ = (prev_ & ~BLOCK_MASK) | x; + } + + void set_first_phantom(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x >= BLOCK_SIZE); + first_phantom_ = (UInt16)x; + } + void set_num_phantoms(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > BLOCK_SIZE); + num_phantoms_ = (UInt16)x; + } + + private: + UInt32 next_; + UInt32 prev_; + UInt16 first_phantom_; + UInt16 num_phantoms_; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/check.hpp b/storage/mroonga/vendor/groonga/lib/dat/check.hpp new file mode 100644 index 00000000..f77148c6 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/check.hpp @@ -0,0 +1,149 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Check { + public: + Check() : value_(0) {} + + bool operator==(const Check &rhs) const { + return value_ == rhs.value_; + } + + // The most significant bit represents whether or not the node ID is used as + // an offset. Note that the MSB is independent of the other bits. + bool is_offset() const { + return (value_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG; + } + + UInt32 except_is_offset() const { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + return value_ & ~IS_OFFSET_FLAG; + } + + // A phantom node is a node that has never been used, and such a node is also + // called an empty element. Phantom nodes form a doubly linked list in each + // block, and the linked list is represented by next() and prev(). + bool is_phantom() const { + return (value_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG; + } + + UInt32 next() const { + GRN_DAT_DEBUG_THROW_IF(!is_phantom()); + return (value_ >> NEXT_SHIFT) & BLOCK_MASK; + } + UInt32 prev() const { + GRN_DAT_DEBUG_THROW_IF(!is_phantom()); + return (value_ >> PREV_SHIFT) & BLOCK_MASK; + } + + // A label is attached to each non-phantom node. A label is represented by + // a byte except for a terminal label '\256'. Note that a phantom node always + // returns an invalid label with its phantom bit flag so as to reject invalid + // transitions. + UInt32 label() const { + return value_ & (IS_PHANTOM_FLAG | LABEL_MASK); + } + + // A non-phantom node has the labels of the first child and the next sibling. + // Note that INVALID_LABEL is stored if the node has no child nodes or has + // no more siblings. + UInt32 child() const { + return (value_ >> CHILD_SHIFT) & LABEL_MASK; + } + UInt32 sibling() const { + return (value_ >> SIBLING_SHIFT) & LABEL_MASK; + } + + void set_is_offset(bool x) { + if (x) { + GRN_DAT_DEBUG_THROW_IF(is_offset()); + value_ |= IS_OFFSET_FLAG; + } else { + GRN_DAT_DEBUG_THROW_IF(!is_offset()); + value_ &= ~IS_OFFSET_FLAG; + } + } + + void set_except_is_offset(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + GRN_DAT_DEBUG_THROW_IF((x & IS_OFFSET_FLAG) == IS_OFFSET_FLAG); + value_ = (value_ & IS_OFFSET_FLAG) | x; + } + + // To reject a transition to an incomplete node, set_is_phantom() invalidates + // its label and links when it becomes non-phantom. + void set_is_phantom(bool x) { + if (x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + value_ |= IS_PHANTOM_FLAG; + } else { + GRN_DAT_DEBUG_THROW_IF(!is_phantom()); + value_ = (value_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) | + (INVALID_LABEL << SIBLING_SHIFT) | INVALID_LABEL; + } + } + + void set_next(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(!is_phantom()); + GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); + value_ = (value_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT); + } + void set_prev(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(!is_phantom()); + GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); + value_ = (value_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT); + } + + void set_label(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL); + value_ = (value_ & ~LABEL_MASK) | x; + } + + void set_child(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL); + value_ = (value_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT); + } + void set_sibling(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + GRN_DAT_DEBUG_THROW_IF(label() > MAX_LABEL); + GRN_DAT_DEBUG_THROW_IF((sibling() != INVALID_LABEL) && (x == INVALID_LABEL)); + value_ = (value_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT); + } + + private: + UInt32 value_; + + static const UInt32 IS_OFFSET_FLAG = 1U << 31; + static const UInt32 IS_PHANTOM_FLAG = 1U << 30; + static const UInt32 NEXT_SHIFT = 9; + static const UInt32 PREV_SHIFT = 18; + static const UInt32 CHILD_SHIFT = 9; + static const UInt32 SIBLING_SHIFT = 18; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.cpp b/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.cpp new file mode 100644 index 00000000..0e97e527 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.cpp @@ -0,0 +1,92 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "cursor-factory.hpp" +#include "id-cursor.hpp" +#include "key-cursor.hpp" +#include "prefix-cursor.hpp" +#include "predictive-cursor.hpp" + +#include <new> + +namespace grn { +namespace dat { + +Cursor *CursorFactory::open(const Trie &trie, + const void *min_ptr, UInt32 min_length, + const void *max_ptr, UInt32 max_length, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + switch (cursor_type) { + case ID_RANGE_CURSOR: { + IdCursor *cursor = new (std::nothrow) IdCursor; + GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL); + try { + cursor->open(trie, String(min_ptr, min_length), + String(max_ptr, max_length), offset, limit, flags); + } catch (...) { + delete cursor; + throw; + } + return cursor; + } + case KEY_RANGE_CURSOR: { + KeyCursor *cursor = new (std::nothrow) KeyCursor; + GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL); + try { + cursor->open(trie, String(min_ptr, min_length), + String(max_ptr, max_length), offset, limit, flags); + } catch (...) { + delete cursor; + throw; + } + return cursor; + } + case PREFIX_CURSOR: { + PrefixCursor *cursor = new (std::nothrow) PrefixCursor; + GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL); + try { + cursor->open(trie, String(max_ptr, max_length), min_length, + offset, limit, flags); + } catch (...) { + delete cursor; + throw; + } + return cursor; + } + case PREDICTIVE_CURSOR: { + PredictiveCursor *cursor = new (std::nothrow) PredictiveCursor; + GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL); + try { + cursor->open(trie, String(min_ptr, min_length), + offset, limit, flags); + } catch (...) { + delete cursor; + throw; + } + return cursor; + } + default: { + GRN_DAT_THROW(PARAM_ERROR, "unknown cursor type"); + } + } +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.hpp b/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.hpp new file mode 100644 index 00000000..d48ab16d --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/cursor-factory.hpp @@ -0,0 +1,44 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "cursor.hpp" + +namespace grn { +namespace dat { + +class Trie; + +class GRN_DAT_API CursorFactory { + public: + static Cursor *open(const Trie &trie, + const void *min_ptr, UInt32 min_length, + const void *max_ptr, UInt32 max_length, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + private: + // Disallows copy and assignment. + CursorFactory(const CursorFactory &); + CursorFactory &operator=(const CursorFactory &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/cursor.hpp b/storage/mroonga/vendor/groonga/lib/dat/cursor.hpp new file mode 100644 index 00000000..357b5250 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/cursor.hpp @@ -0,0 +1,46 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "key.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Cursor { + public: + Cursor() {} + virtual ~Cursor() {} + + virtual void close() = 0; + + virtual const Key &next() = 0; + + virtual UInt32 offset() const = 0; + virtual UInt32 limit() const = 0; + virtual UInt32 flags() const = 0; + + private: + // Disallows copy and assignment. + Cursor(const Cursor &); + Cursor &operator=(const Cursor &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/dat.hpp b/storage/mroonga/vendor/groonga/lib/dat/dat.hpp new file mode 100644 index 00000000..1afbd095 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/dat.hpp @@ -0,0 +1,248 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#ifndef _MSC_VER +# include <stddef.h> +# include <stdint.h> +#endif // _MSC_VER + +#include <cstddef> +#include <exception> + +#ifdef _DEBUG +# include <iostream> +#endif // _DEBUG + +#ifndef GRN_DAT_API +# ifdef WIN32 +# ifdef GRN_DAT_EXPORT +# define GRN_DAT_API __declspec(dllexport) +# else // GRN_DAT_EXPORT +# define GRN_DAT_API __declspec(dllimport) +# endif // GRN_DAT_EXPORT +# else // WIN32 +# define GRN_DAT_API +# endif // WIN32 +#endif // GRN_DAT_API + +#ifdef WIN32 +# define grn_memcpy(dest, src, n) ::memcpy_s((dest), (n), (src), (n)) +#else // WIN32 +# define grn_memcpy(dest, src, n) std::memcpy((dest), (src), (n)) +#endif // WIN32 + +namespace grn { +namespace dat { + +#ifdef _MSC_VER +typedef unsigned __int8 UInt8; +typedef unsigned __int16 UInt16; +typedef unsigned __int32 UInt32; +typedef unsigned __int64 UInt64; +#else // _MSC_VER +typedef ::uint8_t UInt8; +typedef ::uint16_t UInt16; +typedef ::uint32_t UInt32; +typedef ::uint64_t UInt64; +#endif // _MSC_VER + +const UInt8 MAX_UINT8 = static_cast<UInt8>(0xFFU); +const UInt16 MAX_UINT16 = static_cast<UInt16>(0xFFFFU); +const UInt32 MAX_UINT32 = static_cast<UInt32>(0xFFFFFFFFU); +const UInt64 MAX_UINT64 = static_cast<UInt64>(0xFFFFFFFFFFFFFFFFULL); + +// If a key is a prefix of another key, such a key is associated with a special +// terminal node which has TERMINAL_LABEL. +const UInt16 TERMINAL_LABEL = 0x100; +const UInt16 MIN_LABEL = '\0'; +const UInt16 MAX_LABEL = TERMINAL_LABEL; +const UInt32 INVALID_LABEL = 0x1FF; +const UInt32 LABEL_MASK = 0x1FF; + +// The MSB of BASE is used to represent whether the node is a linker node or +// not and the other 31 bits represent the offset to its child nodes. So, the +// number of nodes is limited to 2^31. +const UInt32 ROOT_NODE_ID = 0; +const UInt32 MAX_NODE_ID = 0x7FFFFFFF; +const UInt32 MAX_NUM_NODES = MAX_NODE_ID + 1; +const UInt32 INVALID_NODE_ID = MAX_NODE_ID + 1; + +// 0 is reserved for non-linker leaf nodes. For example, the root node of an +// initial double-array is a non-linker leaf node. +const UInt32 MAX_OFFSET = MAX_NODE_ID; +const UInt32 INVALID_OFFSET = 0; + +// Phantom nodes are managed in each block because siblings are always put in +// the same block. +const UInt32 BLOCK_SIZE = 0x200; +const UInt32 BLOCK_MASK = 0x1FF; +const UInt32 MAX_BLOCK_ID = MAX_NODE_ID / BLOCK_SIZE; +const UInt32 MAX_NUM_BLOCKS = MAX_BLOCK_ID + 1; + +// Blocks are divided by their levels, which indicate how easily update +// operations can find a good offset in them. The level of a block rises when +// find_offset() fails in that block many times. MAX_FAILURE_COUNT is the +// threshold. Also, in order to limit the time cost, find_offset() scans at +// most MAX_BLOCK_COUNT blocks. +// Larger parameters bring more chances of finding good offsets but it leads to +// more node renumberings, which are costly operations, and thus results in +// a degradation of space/time efficiencies. +const UInt32 MAX_FAILURE_COUNT = 4; +const UInt32 MAX_BLOCK_COUNT = 16; +const UInt32 MAX_BLOCK_LEVEL = 5; + +// Blocks in the same level compose a doubly linked list. The entry block of +// a linked list is called a leader. INVALID_LEADER means that a linked list is +// empty and there exists no leader. +const UInt32 INVALID_LEADER = 0x7FFFFFFF; + +const UInt32 MIN_KEY_ID = 1; +const UInt32 MAX_KEY_ID = MAX_NODE_ID; +const UInt32 INVALID_KEY_ID = 0; + +// A key length is represented as a 12-bit unsigned integer in Key. +// A key ID is represented as a 28-bit unsigned integer in Key. +const UInt32 MAX_KEY_LENGTH = (1U << 12) - 1; +const UInt32 MAX_NUM_KEYS = (1U << 28) - 1; + +const UInt64 MIN_FILE_SIZE = 1 << 16; +const UInt64 DEFAULT_FILE_SIZE = 1 << 20; +const UInt64 MAX_FILE_SIZE = (UInt64)1 << 40; +const double DEFAULT_NUM_NODES_PER_KEY = 4.0; +const double MAX_NUM_NODES_PER_KEY = 16.0; +const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0; +const UInt32 MAX_KEY_BUF_SIZE = 0x80000000U; +const UInt32 MAX_TOTAL_KEY_LENGTH = 0xFFFFFFFFU; + +const UInt32 ID_RANGE_CURSOR = 0x00001; +const UInt32 KEY_RANGE_CURSOR = 0x00002; +const UInt32 PREFIX_CURSOR = 0x00004; +const UInt32 PREDICTIVE_CURSOR = 0x00008; +const UInt32 CURSOR_TYPE_MASK = 0x000FF; + +const UInt32 ASCENDING_CURSOR = 0x00100; +const UInt32 DESCENDING_CURSOR = 0x00200; +const UInt32 CURSOR_ORDER_MASK = 0x00F00; + +const UInt32 EXCEPT_LOWER_BOUND = 0x01000; +const UInt32 EXCEPT_UPPER_BOUND = 0x02000; +const UInt32 EXCEPT_EXACT_MATCH = 0x04000; +const UInt32 CURSOR_OPTIONS_MASK = 0xFF000; + +const UInt32 REMOVING_FLAG = 1U << 0; +const UInt32 INSERTING_FLAG = 1U << 1; +const UInt32 UPDATING_FLAG = 1U << 2; +const UInt32 CHANGING_MASK = REMOVING_FLAG | INSERTING_FLAG | UPDATING_FLAG; + +const UInt32 MKQ_SORT_THRESHOLD = 10; + +enum ErrorCode { + PARAM_ERROR = -1, + IO_ERROR = -2, + FORMAT_ERROR = -3, + MEMORY_ERROR = -4, + SIZE_ERROR = -5, + UNEXPECTED_ERROR = -6, + STATUS_ERROR = -7 +}; + +class Exception : public std::exception { + public: + Exception() throw() + : std::exception(), + file_(""), + line_(-1), + what_("") {} + Exception(const char *file, int line, const char *what) throw() + : std::exception(), + file_(file), + line_(line), + what_((what != NULL) ? what : "") {} + Exception(const Exception &ex) throw() + : std::exception(ex), + file_(ex.file_), + line_(ex.line_), + what_(ex.what_) {} + virtual ~Exception() throw() {} + + virtual ErrorCode code() const throw() = 0; + virtual const char *file() const throw() { + return file_; + } + virtual int line() const throw() { + return line_; + } + virtual const char *what() const throw() { + return what_; + } + + private: + const char *file_; + int line_; + const char *what_; +}; + +template <ErrorCode T> +class Error : public Exception { + public: + Error() throw() + : Exception() {} + Error(const char *file, int line, const char *what) throw() + : Exception(file, line, what) {} + Error(const Error &ex) throw() + : Exception(ex) {} + virtual ~Error() throw() {} + + virtual ErrorCode code() const throw() { + return T; + } +}; + +typedef Error<PARAM_ERROR> ParamError; +typedef Error<IO_ERROR> IOError; +typedef Error<FORMAT_ERROR> FormatError; +typedef Error<MEMORY_ERROR> MemoryError; +typedef Error<SIZE_ERROR> SizeError; +typedef Error<UNEXPECTED_ERROR> UnexpectedError; +typedef Error<STATUS_ERROR> StatusError; + +#define GRN_DAT_INT_TO_STR(value) #value +#define GRN_DAT_LINE_TO_STR(line) GRN_DAT_INT_TO_STR(line) +#define GRN_DAT_LINE_STR GRN_DAT_LINE_TO_STR(__LINE__) + +#define GRN_DAT_THROW(code, msg)\ + (throw grn::dat::Error<code>(__FILE__, __LINE__,\ + __FILE__ ":" GRN_DAT_LINE_STR ": " #code ": " msg)) +#define GRN_DAT_THROW_IF(code, cond)\ + (void)((!(cond)) || (GRN_DAT_THROW(code, #cond), 0)) + +#ifdef _DEBUG + #define GRN_DAT_DEBUG_THROW_IF(cond)\ + GRN_DAT_THROW_IF(grn::dat::UNEXPECTED_ERROR, cond) + #define GRN_DAT_DEBUG_LOG(var)\ + (std::clog << __FILE__ ":" GRN_DAT_LINE_STR ": " #var ": "\ + << (var) << std::endl) +#else // _DEBUG + #define GRN_DAT_DEBUG_THROW_IF(cond) + #define GRN_DAT_DEBUG_LOG(var) +#endif // _DEBUG + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/entry.hpp b/storage/mroonga/vendor/groonga/lib/dat/entry.hpp new file mode 100644 index 00000000..ecb9b53e --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/entry.hpp @@ -0,0 +1,59 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// The most significant bit represents whether or not the entry is valid. +// A valid entry stores the position of its associated key and an invalid entry +// stores the index of the next invalid entry. +class GRN_DAT_API Entry { + public: + Entry() : value_(0) {} + + bool is_valid() const { + return (value_ & IS_VALID_FLAG) == IS_VALID_FLAG; + } + UInt32 key_pos() const { + GRN_DAT_DEBUG_THROW_IF(!is_valid()); + return value_ & ~IS_VALID_FLAG; + } + UInt32 next() const { + GRN_DAT_DEBUG_THROW_IF(is_valid()); + return value_; + } + + void set_key_pos(UInt32 x) { + value_ = IS_VALID_FLAG | x; + } + void set_next(UInt32 x) { + value_ = x; + } + + private: + UInt32 value_; + + static const UInt32 IS_VALID_FLAG = 0x80000000U; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/file-impl.cpp b/storage/mroonga/vendor/groonga/lib/dat/file-impl.cpp new file mode 100644 index 00000000..7032eff3 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/file-impl.cpp @@ -0,0 +1,279 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "file-impl.hpp" + +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef WIN32 +# ifdef min +# undef min +# endif // min +# ifdef max +# undef max +# endif // max +#else // WIN32 +# include <fcntl.h> +# include <sys/mman.h> +# include <unistd.h> +#endif // WIN32 + +#include <algorithm> +#include <limits> + +/* Must be the same value as GRN_OPEN_CREATE_MODE */ +#ifdef WIN32 +# define GRN_IO_FILE_CREATE_MODE (GENERIC_READ | GENERIC_WRITE) +#else /* WIN32 */ +# define GRN_IO_FILE_CREATE_MODE 0640 +#endif /* WIN32 */ + +namespace grn { +namespace dat { + +#ifdef WIN32 + +FileImpl::FileImpl() + : ptr_(NULL), + size_(0), + file_(INVALID_HANDLE_VALUE), + map_(INVALID_HANDLE_VALUE), + addr_(NULL) {} + +FileImpl::~FileImpl() { + if (addr_ != NULL) { + ::UnmapViewOfFile(addr_); + } + + if (map_ != INVALID_HANDLE_VALUE) { + ::CloseHandle(map_); + } + + if (file_ != INVALID_HANDLE_VALUE) { + ::CloseHandle(file_); + } +} + +#else // WIN32 + +FileImpl::FileImpl() + : ptr_(NULL), + size_(0), + fd_(-1), + addr_(MAP_FAILED), + length_(0) {} + +FileImpl::~FileImpl() { + if (addr_ != MAP_FAILED) { + ::munmap(addr_, length_); + } + + if (fd_ != -1) { + ::close(fd_); + } +} + +#endif // WIN32 + +void FileImpl::create(const char *path, UInt64 size) { + GRN_DAT_THROW_IF(PARAM_ERROR, size == 0); + GRN_DAT_THROW_IF(PARAM_ERROR, + size > static_cast<UInt64>(std::numeric_limits< ::size_t>::max())); + + FileImpl new_impl; + new_impl.create_(path, size); + new_impl.swap(this); +} + +void FileImpl::open(const char *path) { + GRN_DAT_THROW_IF(PARAM_ERROR, path == NULL); + GRN_DAT_THROW_IF(PARAM_ERROR, path[0] == '\0'); + + FileImpl new_impl; + new_impl.open_(path); + new_impl.swap(this); +} + +void FileImpl::close() { + FileImpl new_impl; + new_impl.swap(this); +} + +#ifdef WIN32 + +void FileImpl::swap(FileImpl *rhs) { + std::swap(ptr_, rhs->ptr_); + std::swap(size_, rhs->size_); + std::swap(file_, rhs->file_); + std::swap(map_, rhs->map_); + std::swap(addr_, rhs->addr_); +} + +void FileImpl::flush() { + if (!addr_) { + return; + } + + BOOL succeeded = ::FlushViewOfFile(addr_, static_cast<SIZE_T>(size_)); + GRN_DAT_THROW_IF(IO_ERROR, !succeeded); + + SYSTEMTIME system_time; + GetSystemTime(&system_time); + FILETIME file_time; + succeeded = SystemTimeToFileTime(&system_time, &file_time); + GRN_DAT_THROW_IF(IO_ERROR, !succeeded); + + succeeded = SetFileTime(file_, NULL, NULL, &file_time); + GRN_DAT_THROW_IF(IO_ERROR, !succeeded); +} + +void FileImpl::create_(const char *path, UInt64 size) { + if ((path != NULL) && (path[0] != '\0')) { + file_ = ::CreateFileA(path, GRN_IO_FILE_CREATE_MODE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + GRN_DAT_THROW_IF(IO_ERROR, file_ == INVALID_HANDLE_VALUE); + + const LONG size_low = static_cast<LONG>(size & 0xFFFFFFFFU); + LONG size_high = static_cast<LONG>(size >> 32); + const DWORD file_pos = ::SetFilePointer(file_, size_low, &size_high, + FILE_BEGIN); + GRN_DAT_THROW_IF(IO_ERROR, (file_pos == INVALID_SET_FILE_POINTER) && + (::GetLastError() != 0)); + GRN_DAT_THROW_IF(IO_ERROR, ::SetEndOfFile(file_) == 0); + + map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE, 0, 0, NULL); + GRN_DAT_THROW_IF(IO_ERROR, map_ == INVALID_HANDLE_VALUE); + } else { + const DWORD size_low = static_cast<DWORD>(size & 0xFFFFFFFFU); + const DWORD size_high = static_cast<DWORD>(size >> 32); + + map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE, + size_high, size_low, NULL); + GRN_DAT_THROW_IF(IO_ERROR, map_ == INVALID_HANDLE_VALUE); + } + + addr_ = ::MapViewOfFile(map_, FILE_MAP_WRITE, 0, 0, 0); + GRN_DAT_THROW_IF(IO_ERROR, addr_ == NULL); + + ptr_ = addr_; + size_ = static_cast< ::size_t>(size); +} + +void FileImpl::open_(const char *path) { +#ifdef _MSC_VER + struct __stat64 st; + GRN_DAT_THROW_IF(IO_ERROR, ::_stat64(path, &st) == -1); +#else // _MSC_VER + struct _stat st; + GRN_DAT_THROW_IF(IO_ERROR, ::_stat(path, &st) == -1); +#endif // _MSC_VER + GRN_DAT_THROW_IF(IO_ERROR, st.st_size == 0); + GRN_DAT_THROW_IF(IO_ERROR, + static_cast<UInt64>(st.st_size) > std::numeric_limits< ::size_t>::max()); + + file_ = ::CreateFileA(path, GRN_IO_FILE_CREATE_MODE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + GRN_DAT_THROW_IF(IO_ERROR, file_ == NULL); + + map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE, 0, 0, NULL); + GRN_DAT_THROW_IF(IO_ERROR, map_ == NULL); + + addr_ = ::MapViewOfFile(map_, FILE_MAP_WRITE, 0, 0, 0); + GRN_DAT_THROW_IF(IO_ERROR, addr_ == NULL); + + ptr_ = addr_; + size_ = static_cast< ::size_t>(st.st_size); +} + +#else // WIN32 + +void FileImpl::swap(FileImpl *rhs) { + std::swap(ptr_, rhs->ptr_); + std::swap(size_, rhs->size_); + std::swap(fd_, rhs->fd_); + std::swap(addr_, rhs->addr_); + std::swap(length_, rhs->length_); +} + +void FileImpl::flush() { + if (!addr_) { + return; + } + + int result = ::msync(addr_, length_, MS_SYNC); + GRN_DAT_THROW_IF(IO_ERROR, result != 0); +} + +void FileImpl::create_(const char *path, UInt64 size) { + GRN_DAT_THROW_IF(PARAM_ERROR, + size > static_cast<UInt64>(std::numeric_limits< ::off_t>::max())); + + if ((path != NULL) && (path[0] != '\0')) { + fd_ = ::open(path, O_RDWR | O_CREAT | O_TRUNC, GRN_IO_FILE_CREATE_MODE); + GRN_DAT_THROW_IF(IO_ERROR, fd_ == -1); + + const ::off_t file_size = static_cast< ::off_t>(size); + GRN_DAT_THROW_IF(IO_ERROR, ::ftruncate(fd_, file_size) == -1); + } + +#ifdef MAP_ANONYMOUS + const int flags = (fd_ == -1) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED; +#else // MAP_ANONYMOUS + const int flags = (fd_ == -1) ? (MAP_PRIVATE | MAP_ANON) : MAP_SHARED; +#endif // MAP_ANONYMOUS + + length_ = static_cast< ::size_t>(size); +#ifdef USE_MAP_HUGETLB + addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE, + flags | MAP_HUGETLB, fd_, 0); +#endif // USE_MAP_HUGETLB + if (addr_ == MAP_FAILED) { + addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE, flags, fd_, 0); + GRN_DAT_THROW_IF(IO_ERROR, addr_ == MAP_FAILED); + } + + ptr_ = addr_; + size_ = length_; +} + +void FileImpl::open_(const char *path) { + struct stat st; + GRN_DAT_THROW_IF(IO_ERROR, ::stat(path, &st) == -1); + GRN_DAT_THROW_IF(IO_ERROR, (st.st_mode & S_IFMT) != S_IFREG); + GRN_DAT_THROW_IF(IO_ERROR, st.st_size == 0); + GRN_DAT_THROW_IF(IO_ERROR, + static_cast<UInt64>(st.st_size) > std::numeric_limits< ::size_t>::max()); + + fd_ = ::open(path, O_RDWR); + GRN_DAT_THROW_IF(IO_ERROR, fd_ == -1); + + length_ = static_cast<std::size_t>(st.st_size); + addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); + GRN_DAT_THROW_IF(IO_ERROR, addr_ == MAP_FAILED); + + ptr_ = addr_; + size_ = length_; +} + +#endif // WIN32 + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/file-impl.hpp b/storage/mroonga/vendor/groonga/lib/dat/file-impl.hpp new file mode 100644 index 00000000..7b9c8c76 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/file-impl.hpp @@ -0,0 +1,73 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#ifdef WIN32 +# include <windows.h> +#endif // WIN32 + +#include "dat.hpp" + +namespace grn { +namespace dat { + +class FileImpl { + public: + FileImpl(); + ~FileImpl(); + + void create(const char *path, UInt64 size); + void open(const char *path); + void close(); + + void *ptr() const { + return ptr_; + } + UInt64 size() const { + return size_; + } + + void swap(FileImpl *rhs); + + void flush(); + + private: + void *ptr_; + UInt64 size_; + +#ifdef WIN32 + HANDLE file_; + HANDLE map_; + LPVOID addr_; +#else // WIN32 + int fd_; + void *addr_; + ::size_t length_; +#endif // WIN32 + + void create_(const char *path, UInt64 size); + void open_(const char *path); + + // Disallows copy and assignment. + FileImpl(const FileImpl &); + FileImpl &operator=(const FileImpl &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/file.cpp b/storage/mroonga/vendor/groonga/lib/dat/file.cpp new file mode 100644 index 00000000..84f2a1fb --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/file.cpp @@ -0,0 +1,73 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011-2015 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "file.hpp" +#include "file-impl.hpp" + +#include <new> + +namespace grn { +namespace dat { + +File::File() : impl_(NULL) {} + +File::~File() { + delete impl_; +} + +void File::create(const char *path, UInt64 size) { + File new_file; + new_file.impl_ = new (std::nothrow) FileImpl; + GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL); + new_file.impl_->create(path, size); + new_file.swap(this); +} + +void File::open(const char *path) { + File new_file; + new_file.impl_ = new (std::nothrow) FileImpl; + GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL); + new_file.impl_->open(path); + new_file.swap(this); +} + +void File::close() { + File().swap(this); +} + +void *File::ptr() const { + return (impl_ != NULL) ? impl_->ptr() : NULL; +} + +UInt64 File::size() const { + return (impl_ != NULL) ? impl_->size() : 0; +} + +void File::swap(File *rhs) { + FileImpl * const temp = impl_; + impl_ = rhs->impl_; + rhs->impl_ = temp; +} + +void File::flush() { + if (impl_) { + impl_->flush(); + } +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/file.hpp b/storage/mroonga/vendor/groonga/lib/dat/file.hpp new file mode 100644 index 00000000..722b93dd --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/file.hpp @@ -0,0 +1,60 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// This implementation class hides environment dependent codes required for +// memory-mapped I/O. +class FileImpl; + +class GRN_DAT_API File { + public: + File(); + ~File(); + + // This function creates a file and maps the entire file to a certain range + // of the address space. Note that a file is truncated if exists. + void create(const char *path, UInt64 size); + + // This function opens a file and maps the entire file to a certain range of + // the address space. + void open(const char *path); + void close(); + + void *ptr() const; + UInt64 size() const; + + void swap(File *rhs); + + void flush(); + + private: + FileImpl *impl_; + + // Disallows copy and assignment. + File(const File &); + File &operator=(const File &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/header.hpp b/storage/mroonga/vendor/groonga/lib/dat/header.hpp new file mode 100644 index 00000000..dbbb1efd --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/header.hpp @@ -0,0 +1,179 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Header { + public: + Header() + : file_size_(0), + total_key_length_(0), + next_key_id_(grn::dat::MIN_KEY_ID), + max_key_id_(0), + num_keys_(0), + max_num_keys_(0), + num_phantoms_(0), + num_zombies_(0), + num_blocks_(0), + max_num_blocks_(0), + next_key_pos_(0), + key_buf_size_(0), + status_flags_(0) { + for (UInt32 i = 0; i <= MAX_BLOCK_LEVEL; ++i) { + leaders_[i] = INVALID_LEADER; + } + for (UInt32 i = 0; i < (sizeof(reserved_) / sizeof(*reserved_)); ++i) { + reserved_[i] = 0; + } + } + + UInt64 file_size() const { + return file_size_; + } + UInt32 total_key_length() const { + return total_key_length_; + } + UInt32 min_key_id() const { + return MIN_KEY_ID; + } + UInt32 next_key_id() const { + return next_key_id_; + } + UInt32 max_key_id() const { + return max_key_id_; + } + UInt32 num_keys() const { + return num_keys_; + } + UInt32 max_num_keys() const { + return max_num_keys_; + } + UInt32 num_nodes() const { + return num_blocks() * BLOCK_SIZE; + } + UInt32 num_phantoms() const { + return num_phantoms_; + } + UInt32 num_zombies() const { + return num_zombies_; + } + UInt32 max_num_nodes() const { + return max_num_blocks() * BLOCK_SIZE; + } + UInt32 num_blocks() const { + return num_blocks_; + } + UInt32 max_num_blocks() const { + return max_num_blocks_; + } + UInt32 next_key_pos() const { + return next_key_pos_; + } + UInt32 key_buf_size() const { + return key_buf_size_; + } + UInt32 status_flags() const { + return status_flags_; + } + UInt32 ith_leader(UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); + return leaders_[i]; + } + + void set_file_size(UInt64 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_FILE_SIZE); + file_size_ = x; + } + void set_total_key_length(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_TOTAL_KEY_LENGTH); + total_key_length_ = x; + } + void set_next_key_id(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x - 1) > MAX_KEY_ID); + next_key_id_ = x; + } + void set_max_key_id(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_ID); + max_key_id_ = x; + } + void set_num_keys(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS); + num_keys_ = x; + } + void set_max_num_keys(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS); + max_num_keys_ = x; + } + void set_num_phantoms(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes()); + num_phantoms_ = x; + } + void set_num_zombies(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes()); + num_zombies_ = x; + } + void set_num_blocks(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_blocks()); + num_blocks_ = x; + } + void set_max_num_blocks(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_BLOCKS); + max_num_blocks_ = x; + } + void set_next_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > key_buf_size()); + next_key_pos_ = x; + } + void set_key_buf_size(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_BUF_SIZE); + key_buf_size_ = x; + } + void set_status_flags(UInt32 x) { + status_flags_ = x; + } + void set_ith_leader(UInt32 i, UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); + GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks())); + leaders_[i] = x; + } + + private: + UInt64 file_size_; + UInt32 total_key_length_; + UInt32 next_key_id_; + UInt32 max_key_id_; + UInt32 num_keys_; + UInt32 max_num_keys_; + UInt32 num_phantoms_; + UInt32 num_zombies_; + UInt32 num_blocks_; + UInt32 max_num_blocks_; + UInt32 next_key_pos_; + UInt32 key_buf_size_; + UInt32 leaders_[MAX_BLOCK_LEVEL + 1]; + UInt32 status_flags_; + UInt32 reserved_[12]; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/id-cursor.cpp b/storage/mroonga/vendor/groonga/lib/dat/id-cursor.cpp new file mode 100644 index 00000000..de969839 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/id-cursor.cpp @@ -0,0 +1,184 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "id-cursor.hpp" + +#include <algorithm> + +#include "trie.hpp" + +namespace grn { +namespace dat { + +IdCursor::IdCursor() + : trie_(NULL), + offset_(0), + limit_(MAX_UINT32), + flags_(ID_RANGE_CURSOR), + cur_(INVALID_KEY_ID), + end_(INVALID_KEY_ID), + count_(0) {} + +IdCursor::~IdCursor() {} + +void IdCursor::open(const Trie &trie, + const String &min_str, + const String &max_str, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + UInt32 min_id = INVALID_KEY_ID; + if (min_str.ptr() != NULL) { + UInt32 key_pos; + GRN_DAT_THROW_IF(PARAM_ERROR, + !trie.search(min_str.ptr(), min_str.length(), &key_pos)); + min_id = trie.get_key(key_pos).id(); + } + + UInt32 max_id = INVALID_KEY_ID; + if (max_str.ptr() != NULL) { + UInt32 key_pos; + GRN_DAT_THROW_IF(PARAM_ERROR, + !trie.search(max_str.ptr(), max_str.length(), &key_pos)); + max_id = trie.get_key(key_pos).id(); + } + + open(trie, min_id, max_id, offset, limit, flags); +} + +void IdCursor::open(const Trie &trie, + UInt32 min_id, + UInt32 max_id, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + flags = fix_flags(flags); + + IdCursor new_cursor(trie, offset, limit, flags); + new_cursor.init(min_id, max_id); + new_cursor.swap(this); +} + +void IdCursor::close() { + IdCursor new_cursor; + new_cursor.swap(this); +} + +const Key &IdCursor::next() { + if (count_ >= limit_) { + return Key::invalid_key(); + } + while (cur_ != end_) { + const Key &key = trie_->ith_key(cur_); + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + ++cur_; + } else { + --cur_; + } + if (key.is_valid()) { + ++count_; + return key; + } + } + return Key::invalid_key(); +} + +IdCursor::IdCursor(const Trie &trie, + UInt32 offset, + UInt32 limit, + UInt32 flags) + : trie_(&trie), + offset_(offset), + limit_(limit), + flags_(flags), + cur_(INVALID_KEY_ID), + end_(INVALID_KEY_ID), + count_(0) {} + +UInt32 IdCursor::fix_flags(UInt32 flags) const { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && + (cursor_type != ID_RANGE_CURSOR)); + flags |= ID_RANGE_CURSOR; + + const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && + (cursor_order != ASCENDING_CURSOR) && + (cursor_order != DESCENDING_CURSOR)); + if (cursor_order == 0) { + flags |= ASCENDING_CURSOR; + } + + const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, + cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND)); + + return flags; +} + +void IdCursor::init(UInt32 min_id, UInt32 max_id) { + if (min_id == INVALID_KEY_ID) { + min_id = trie_->min_key_id(); + } else if ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND) { + ++min_id; + } + + if (max_id == INVALID_KEY_ID) { + max_id = trie_->max_key_id(); + } else if ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND) { + --max_id; + } + + if ((max_id < min_id) || ((max_id - min_id) < offset_)) { + return; + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + cur_ = min_id; + end_ = max_id + 1; + for (UInt32 i = 0; (i < offset_) && (cur_ != end_); ++i) { + while (cur_ != end_) { + if (trie_->ith_key(cur_++).is_valid()) { + break; + } + } + } + } else { + cur_ = max_id; + end_ = min_id - 1; + for (UInt32 i = 0; (i < offset_) && (cur_ != end_); ++i) { + while (cur_ != end_) { + if (trie_->ith_key(cur_--).is_valid()) { + break; + } + } + } + } +} + +void IdCursor::swap(IdCursor *cursor) { + std::swap(trie_, cursor->trie_); + std::swap(offset_, cursor->offset_); + std::swap(limit_, cursor->limit_); + std::swap(flags_, cursor->flags_); + std::swap(cur_, cursor->cur_); + std::swap(end_, cursor->end_); + std::swap(count_, cursor->count_); +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/id-cursor.hpp b/storage/mroonga/vendor/groonga/lib/dat/id-cursor.hpp new file mode 100644 index 00000000..60953fae --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/id-cursor.hpp @@ -0,0 +1,83 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "cursor.hpp" + +namespace grn { +namespace dat { + +class Trie; + +class GRN_DAT_API IdCursor : public Cursor { + public: + IdCursor(); + ~IdCursor(); + + void open(const Trie &trie, + const String &min_str, + const String &max_str, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + void open(const Trie &trie, + UInt32 min_id, + UInt32 max_id, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + void close(); + + const Key &next(); + + UInt32 offset() const { + return offset_; + } + UInt32 limit() const { + return limit_; + } + UInt32 flags() const { + return flags_; + } + + private: + const Trie *trie_; + UInt32 offset_; + UInt32 limit_; + UInt32 flags_; + + UInt32 cur_; + UInt32 end_; + UInt32 count_; + + IdCursor(const Trie &trie, UInt32 offset, UInt32 limit, UInt32 flags); + + UInt32 fix_flags(UInt32 flags) const; + void init(UInt32 min_id, UInt32 max_id); + void swap(IdCursor *cursor); + + // Disallows copy and assignment. + IdCursor(const IdCursor &); + IdCursor &operator=(const IdCursor &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/key-cursor.cpp b/storage/mroonga/vendor/groonga/lib/dat/key-cursor.cpp new file mode 100644 index 00000000..2ce04fee --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/key-cursor.cpp @@ -0,0 +1,349 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "key-cursor.hpp" + +#include <algorithm> +#include <cstring> + +#include "trie.hpp" + +namespace grn { +namespace dat { + +KeyCursor::KeyCursor() + : trie_(NULL), + offset_(0), + limit_(MAX_UINT32), + flags_(KEY_RANGE_CURSOR), + buf_(), + count_(0), + max_count_(0), + finished_(false), + end_buf_(NULL), + end_str_() {} + +KeyCursor::~KeyCursor() { + if (end_buf_ != NULL) { + delete [] end_buf_; + } +} + +void KeyCursor::open(const Trie &trie, + const String &min_str, + const String &max_str, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + GRN_DAT_THROW_IF(PARAM_ERROR, + (min_str.ptr() == NULL) && (min_str.length() != 0)); + GRN_DAT_THROW_IF(PARAM_ERROR, + (max_str.ptr() == NULL) && (max_str.length() != 0)); + + flags = fix_flags(flags); + KeyCursor new_cursor(trie, offset, limit, flags); + new_cursor.init(min_str, max_str); + new_cursor.swap(this); +} + +void KeyCursor::close() { + KeyCursor new_cursor; + new_cursor.swap(this); +} + +const Key &KeyCursor::next() { + if (finished_ || (count_ >= max_count_)) { + return Key::invalid_key(); + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + return ascending_next(); + } else { + return descending_next(); + } +} + +KeyCursor::KeyCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) + : trie_(&trie), + offset_(offset), + limit_(limit), + flags_(flags), + buf_(), + count_(0), + max_count_(0), + finished_(false), + end_buf_(NULL), + end_str_() {} + +UInt32 KeyCursor::fix_flags(UInt32 flags) const { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && + (cursor_type != KEY_RANGE_CURSOR)); + flags |= KEY_RANGE_CURSOR; + + const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && + (cursor_order != ASCENDING_CURSOR) && + (cursor_order != DESCENDING_CURSOR)); + if (cursor_order == 0) { + flags |= ASCENDING_CURSOR; + } + + const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, + cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND)); + + return flags; +} + +void KeyCursor::init(const String &min_str, const String &max_str) { + if (offset_ > (MAX_UINT32 - limit_)) { + max_count_ = MAX_UINT32; + } else { + max_count_ = offset_ + limit_; + } + + if (limit_ == 0) { + return; + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + ascending_init(min_str, max_str); + } else { + descending_init(min_str, max_str); + } +} + +void KeyCursor::ascending_init(const String &min_str, const String &max_str) { + if (max_str.ptr() != NULL) { + if (max_str.length() != 0) { + end_buf_ = new UInt8[max_str.length()]; + grn_memcpy(end_buf_, max_str.ptr(), max_str.length()); + end_str_.assign(end_buf_, max_str.length()); + } + } + + if ((min_str.ptr() == NULL) || (min_str.length() == 0)) { + buf_.push_back(ROOT_NODE_ID); + return; + } + + UInt32 node_id = ROOT_NODE_ID; + Node node; + for (UInt32 i = 0; i < min_str.length(); ++i) { + node = trie_->ith_node(node_id); + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); + const int result = key.str().compare(min_str, i); + if ((result > 0) || ((result == 0) && + ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) { + buf_.push_back(node_id); + } else if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + return; + } else if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + node_id = node.offset() ^ min_str[i]; + if (trie_->ith_node(node_id).label() != min_str[i]) { + UInt16 label = node.child(); + if (label == TERMINAL_LABEL) { + label = trie_->ith_node(node.offset() ^ label).sibling(); + } + while (label != INVALID_LABEL) { + if (label > min_str[i]) { + buf_.push_back(node.offset() ^ label); + break; + } + label = trie_->ith_node(node.offset() ^ label).sibling(); + } + return; + } + } + + node = trie_->ith_node(node_id); + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); + if ((key.length() != min_str.length()) || + ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) { + buf_.push_back(node_id); + } else if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + return; + } else if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + UInt16 label = node.child(); + if ((label == TERMINAL_LABEL) && + ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND)) { + label = trie_->ith_node(node.offset() ^ label).sibling(); + } + if (label != INVALID_LABEL) { + buf_.push_back(node.offset() ^ label); + } +} + +void KeyCursor::descending_init(const String &min_str, const String &max_str) { + if (min_str.ptr() != NULL) { + if (min_str.length() != 0) { + end_buf_ = new UInt8[min_str.length()]; + grn_memcpy(end_buf_, min_str.ptr(), min_str.length()); + end_str_.assign(end_buf_, min_str.length()); + } + } + + if ((max_str.ptr() == NULL) || (max_str.length() == 0)) { + buf_.push_back(ROOT_NODE_ID); + return; + } + + UInt32 node_id = ROOT_NODE_ID; + for (UInt32 i = 0; i < max_str.length(); ++i) { + const Base base = trie_->ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + const int result = key.str().compare(max_str, i); + if ((result < 0) || ((result == 0) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) { + buf_.push_back(node_id | POST_ORDER_FLAG); + } + return; + } + + UInt32 label = trie_->ith_node(node_id).child(); + if (label == TERMINAL_LABEL) { + node_id = base.offset() ^ label; + buf_.push_back(node_id | POST_ORDER_FLAG); + label = trie_->ith_node(node_id).sibling(); + } + while (label != INVALID_LABEL) { + node_id = base.offset() ^ label; + if (label < max_str[i]) { + buf_.push_back(node_id); + } else if (label > max_str[i]) { + return; + } else { + break; + } + label = trie_->ith_node(node_id).sibling(); + } + if (label == INVALID_LABEL) { + return; + } + } + + const Base base = trie_->ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if ((key.length() == max_str.length()) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { + buf_.push_back(node_id | POST_ORDER_FLAG); + } + return; + } + + UInt16 label = trie_->ith_node(node_id).child(); + if ((label == TERMINAL_LABEL) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { + buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG); + } +} + +void KeyCursor::swap(KeyCursor *cursor) { + std::swap(trie_, cursor->trie_); + std::swap(offset_, cursor->offset_); + std::swap(limit_, cursor->limit_); + std::swap(flags_, cursor->flags_); + buf_.swap(&cursor->buf_); + std::swap(count_, cursor->count_); + std::swap(max_count_, cursor->max_count_); + std::swap(finished_, cursor->finished_); + std::swap(end_buf_, cursor->end_buf_); + end_str_.swap(&cursor->end_str_); +} + +const Key &KeyCursor::ascending_next() { + while (!buf_.empty()) { + const UInt32 node_id = buf_.back(); + buf_.pop_back(); + + const Node node = trie_->ith_node(node_id); + if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); + if (end_buf_ != NULL) { + const int result = key.str().compare(end_str_); + if ((result > 0) || ((result == 0) && + ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) { + finished_ = true; + return Key::invalid_key(); + } + } + if (count_++ >= offset_) { + return key; + } + } else if (node.child() != INVALID_LABEL) { + buf_.push_back(node.offset() ^ node.child()); + } + } + return Key::invalid_key(); +} + +const Key &KeyCursor::descending_next() { + while (!buf_.empty()) { + const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; + const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; + + const Base base = trie_->ith_node(node_id).base(); + if (post_order) { + buf_.pop_back(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if (end_buf_ != NULL) { + const int result = key.str().compare(end_str_); + if ((result < 0) || ((result == 0) && + ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) { + finished_ = true; + return Key::invalid_key(); + } + } + if (count_++ >= offset_) { + return key; + } + } + } else { + buf_.back() |= POST_ORDER_FLAG; + UInt16 label = trie_->ith_node(node_id).child(); + while (label != INVALID_LABEL) { + buf_.push_back(base.offset() ^ label); + label = trie_->ith_node(base.offset() ^ label).sibling(); + } + } + } + return Key::invalid_key(); +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/key-cursor.hpp b/storage/mroonga/vendor/groonga/lib/dat/key-cursor.hpp new file mode 100644 index 00000000..56392b63 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/key-cursor.hpp @@ -0,0 +1,88 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "cursor.hpp" +#include "vector.hpp" + +namespace grn { +namespace dat { + +class Trie; + +class GRN_DAT_API KeyCursor : public Cursor { + public: + KeyCursor(); + ~KeyCursor(); + + void open(const Trie &trie, + const String &min_str, + const String &max_str, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + void close(); + + const Key &next(); + + UInt32 offset() const { + return offset_; + } + UInt32 limit() const { + return limit_; + } + UInt32 flags() const { + return flags_; + } + + private: + const Trie *trie_; + UInt32 offset_; + UInt32 limit_; + UInt32 flags_; + + Vector<UInt32> buf_; + UInt32 count_; + UInt32 max_count_; + bool finished_; + UInt8 *end_buf_; + String end_str_; + + KeyCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags); + + UInt32 fix_flags(UInt32 flags) const; + void init(const String &min_str, const String &max_str); + void ascending_init(const String &min_str, const String &max_str); + void descending_init(const String &min_str, const String &max_str); + void swap(KeyCursor *cursor); + + const Key &ascending_next(); + const Key &descending_next(); + + static const UInt32 POST_ORDER_FLAG = 0x80000000U; + + // Disallows copy and assignment. + KeyCursor(const KeyCursor &); + KeyCursor &operator=(const KeyCursor &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/key.hpp b/storage/mroonga/vendor/groonga/lib/dat/key.hpp new file mode 100644 index 00000000..eb0324cd --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/key.hpp @@ -0,0 +1,110 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "string.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Key { + public: + const UInt8 &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= length()); + return buf_[i]; + } + + bool is_valid() const { + return id() != INVALID_KEY_ID; + } + + String str() const { + return String(ptr(), length()); + } + + const void *ptr() const { + return buf_; + } + UInt32 length() const { + return (length_high_ << 4) | (id_and_length_low_ & 0x0F); + } + UInt32 id() const { + return id_and_length_low_ >> 4; + } + + bool equals_to(const void *ptr, UInt32 length, UInt32 offset = 0) const { + if (length != this->length()) { + return false; + } + for ( ; offset < length; ++offset) { + if ((*this)[offset] != static_cast<const UInt8 *>(ptr)[offset]) { + return false; + } + } + return true; + } + + // Creates an object of Key from given parameters. Then, the created object + // is embedded into a specified buffer. + static const Key &create(UInt32 *buf, UInt32 key_id, + const void *key_ptr, UInt32 key_length) { + GRN_DAT_DEBUG_THROW_IF(buf == NULL); + GRN_DAT_DEBUG_THROW_IF(key_id > MAX_KEY_ID); + GRN_DAT_DEBUG_THROW_IF((key_ptr == NULL) && (key_length != 0)); + GRN_DAT_DEBUG_THROW_IF(key_length > MAX_KEY_LENGTH); + + *buf = (key_id << 4) | (key_length & 0x0F); + UInt8 *ptr = reinterpret_cast<UInt8 *>(buf + 1); + *ptr++ = key_length >> 4; + for (UInt32 i = 0; i < key_length; ++i) { + ptr[i] = static_cast<const UInt8 *>(key_ptr)[i]; + } + return *reinterpret_cast<const Key *>(buf); + } + + // Calculates how many UInt32s are required for a string. It is guaranteed + // that the estimated size is not less than the actual size. + static UInt32 estimate_size(UInt32 length) { + return 2 + (length / sizeof(UInt32)); + } + + // Returns a reference to an invalid key. + static const Key &invalid_key() { + static const Key invalid_key; + return invalid_key; +// static const UInt32 invalid_key_buf[2] = { INVALID_KEY_ID << 4, 0 }; +// return *reinterpret_cast<const Key *>(invalid_key_buf); + } + + private: + UInt32 id_and_length_low_; + UInt8 length_high_; + UInt8 buf_[3]; + + // Disallows instantiation. + Key() : id_and_length_low_(INVALID_KEY_ID << 4), length_high_(0) {} + ~Key() {} + + // Disallows copy and assignment. + Key(const Key &); + Key &operator=(const Key &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/node.hpp b/storage/mroonga/vendor/groonga/lib/dat/node.hpp new file mode 100644 index 00000000..29febc7d --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/node.hpp @@ -0,0 +1,127 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +// See base.hpp and check.hpp for details. +#include "base.hpp" +#include "check.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Node { + public: + Node() : base_(), check_() {} + + Base base() const { + return base_; + } + bool is_linker() const { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + return base_.is_linker(); + } + UInt32 offset() const { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + return base_.offset(); + } + UInt32 key_pos() const { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + return base_.key_pos(); + } + + Check check() const { + return check_; + } + bool is_offset() const { + return check_.is_offset(); + } + UInt32 except_is_offset() const { + return check_.except_is_offset(); + } + bool is_phantom() const { + return check_.is_phantom(); + } + UInt32 next() const { + return check_.next(); + } + UInt32 prev() const { + return check_.prev(); + } + UInt32 label() const { + return check_.label(); + } + UInt32 child() const { + return check_.child(); + } + UInt32 sibling() const { + return check_.sibling(); + } + + void set_base(Base x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + base_ = x; + } + void set_offset(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + base_.set_offset(x); + } + void set_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(is_phantom()); + base_.set_key_pos(x); + } + + void set_check(Check x) { + check_ = x; + } + void set_is_offset(bool x) { + check_.set_is_offset(x); + } + void set_except_is_offset(UInt32 x) { + check_.set_except_is_offset(x); + } + void set_is_phantom(bool x) { + GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET); + check_.set_is_phantom(x); + } + void set_next(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET); + check_.set_next(x); + } + void set_prev(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET); + check_.set_prev(x); + } + void set_label(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(offset() != INVALID_OFFSET); + check_.set_label(x); + } + void set_child(UInt32 x) { + check_.set_child(x); + } + void set_sibling(UInt32 x) { + check_.set_sibling(x); + } + + private: + Base base_; + Check check_; +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.cpp b/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.cpp new file mode 100644 index 00000000..67520305 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.cpp @@ -0,0 +1,206 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "predictive-cursor.hpp" + +#include <algorithm> +#include <cstring> + +#include "trie.hpp" + +namespace grn { +namespace dat { + +PredictiveCursor::PredictiveCursor() + : trie_(NULL), + offset_(0), + limit_(MAX_UINT32), + flags_(PREDICTIVE_CURSOR), + buf_(), + cur_(0), + end_(0), + min_length_(0) {} + +PredictiveCursor::~PredictiveCursor() {} + +void PredictiveCursor::open(const Trie &trie, + const String &str, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0)); + + flags = fix_flags(flags); + PredictiveCursor new_cursor(trie, offset, limit, flags); + new_cursor.init(str); + new_cursor.swap(this); +} + +void PredictiveCursor::close() { + PredictiveCursor new_cursor; + new_cursor.swap(this); +} + +const Key &PredictiveCursor::next() { + if (cur_ == end_) { + return Key::invalid_key(); + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + return ascending_next(); + } else { + return descending_next(); + } +} + +PredictiveCursor::PredictiveCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) + : trie_(&trie), + offset_(offset), + limit_(limit), + flags_(flags), + buf_(), + cur_(0), + end_(0), + min_length_(0) {} + +UInt32 PredictiveCursor::fix_flags(UInt32 flags) const { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && + (cursor_type != PREDICTIVE_CURSOR)); + flags |= PREDICTIVE_CURSOR; + + const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && + (cursor_order != ASCENDING_CURSOR) && + (cursor_order != DESCENDING_CURSOR)); + if (cursor_order == 0) { + flags |= ASCENDING_CURSOR; + } + + const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~(EXCEPT_EXACT_MATCH)); + + return flags; +} + +void PredictiveCursor::init(const String &str) { + if (limit_ == 0) { + return; + } + + min_length_ = str.length(); + if ((flags_ & EXCEPT_EXACT_MATCH) == EXCEPT_EXACT_MATCH) { + ++min_length_; + } + end_ = (offset_ > (MAX_UINT32 - limit_)) ? MAX_UINT32 : (offset_ + limit_); + + UInt32 node_id = ROOT_NODE_ID; + for (UInt32 i = 0; i < str.length(); ++i) { + const Base base = trie_->ith_node(node_id).base(); + if (base.is_linker()) { + if (offset_ == 0) { + const Key &key = trie_->get_key(base.key_pos()); + if ((key.length() >= str.length()) && + (key.str().substr(0, str.length()).compare(str, i) == 0)) { + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + node_id |= IS_ROOT_FLAG; + } + buf_.push_back(node_id); + } + } + return; + } + + node_id = base.offset() ^ str[i]; + if (trie_->ith_node(node_id).label() != str[i]) { + return; + } + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + node_id |= IS_ROOT_FLAG; + } + buf_.push_back(node_id); +} + +void PredictiveCursor::swap(PredictiveCursor *cursor) { + std::swap(trie_, cursor->trie_); + std::swap(offset_, cursor->offset_); + std::swap(limit_, cursor->limit_); + std::swap(flags_, cursor->flags_); + buf_.swap(&cursor->buf_); + std::swap(cur_, cursor->cur_); + std::swap(end_, cursor->end_); + std::swap(min_length_, cursor->min_length_); +} + +const Key &PredictiveCursor::ascending_next() { + while (!buf_.empty()) { + const bool is_root = (buf_.back() & IS_ROOT_FLAG) == IS_ROOT_FLAG; + const UInt32 node_id = buf_.back() & ~IS_ROOT_FLAG; + buf_.pop_back(); + + const Node node = trie_->ith_node(node_id); + if (!is_root && (node.sibling() != INVALID_LABEL)) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); + if (key.length() >= min_length_) { + if (cur_++ >= offset_) { + return key; + } + } + } else if (node.child() != INVALID_LABEL) { + buf_.push_back(node.offset() ^ node.child()); + } + } + return Key::invalid_key(); +} + +const Key &PredictiveCursor::descending_next() { + while (!buf_.empty()) { + const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; + const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; + + const Base base = trie_->ith_node(node_id).base(); + if (post_order) { + buf_.pop_back(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if (key.length() >= min_length_) { + if (cur_++ >= offset_) { + return key; + } + } + } + } else { + buf_.back() |= POST_ORDER_FLAG; + UInt16 label = trie_->ith_node(node_id).child(); + while (label != INVALID_LABEL) { + buf_.push_back(base.offset() ^ label); + label = trie_->ith_node(base.offset() ^ label).sibling(); + } + } + } + return Key::invalid_key(); +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.hpp b/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.hpp new file mode 100644 index 00000000..88a950b8 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/predictive-cursor.hpp @@ -0,0 +1,84 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "cursor.hpp" +#include "vector.hpp" + +namespace grn { +namespace dat { + +class Trie; + +class GRN_DAT_API PredictiveCursor : public Cursor { + public: + PredictiveCursor(); + ~PredictiveCursor(); + + void open(const Trie &trie, + const String &str, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + void close(); + + const Key &next(); + + UInt32 offset() const { + return offset_; + } + UInt32 limit() const { + return limit_; + } + UInt32 flags() const { + return flags_; + } + + private: + const Trie *trie_; + UInt32 offset_; + UInt32 limit_; + UInt32 flags_; + + Vector<UInt32> buf_; + UInt32 cur_; + UInt32 end_; + UInt32 min_length_; + + PredictiveCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags); + + UInt32 fix_flags(UInt32 flags) const; + void init(const String &str); + void swap(PredictiveCursor *cursor); + + const Key &ascending_next(); + const Key &descending_next(); + + static const UInt32 IS_ROOT_FLAG = 0x80000000U; + static const UInt32 POST_ORDER_FLAG = 0x80000000U; + + // Disallows copy and assignment. + PredictiveCursor(const PredictiveCursor &); + PredictiveCursor &operator=(const PredictiveCursor &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.cpp b/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.cpp new file mode 100644 index 00000000..83adeb37 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.cpp @@ -0,0 +1,175 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "prefix-cursor.hpp" + +#include <algorithm> + +#include "trie.hpp" + +namespace grn { +namespace dat { + +PrefixCursor::PrefixCursor() + : trie_(NULL), + offset_(0), + limit_(MAX_UINT32), + flags_(PREFIX_CURSOR), + buf_(), + cur_(0), + end_(0) {} + +PrefixCursor::~PrefixCursor() {} + +void PrefixCursor::open(const Trie &trie, + const String &str, + UInt32 min_length, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0)); + GRN_DAT_THROW_IF(PARAM_ERROR, min_length > str.length()); + + flags = fix_flags(flags); + PrefixCursor new_cursor(trie, offset, limit, flags); + new_cursor.init(str, min_length); + new_cursor.swap(this); +} + +void PrefixCursor::close() { + PrefixCursor new_cursor; + new_cursor.swap(this); +} + +const Key &PrefixCursor::next() { + if (cur_ == end_) { + return Key::invalid_key(); + } + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + return trie_->get_key(buf_[cur_++]); + } else { + return trie_->get_key(buf_[--cur_]); + } +} + +PrefixCursor::PrefixCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) + : trie_(&trie), + offset_(offset), + limit_(limit), + flags_(flags), + buf_(), + cur_(0), + end_(0) {} + +UInt32 PrefixCursor::fix_flags(UInt32 flags) const { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && + (cursor_type != PREFIX_CURSOR)); + flags |= PREFIX_CURSOR; + + const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && + (cursor_order != ASCENDING_CURSOR) && + (cursor_order != DESCENDING_CURSOR)); + if (cursor_order == 0) { + flags |= ASCENDING_CURSOR; + } + + const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; + GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~EXCEPT_EXACT_MATCH); + + return flags; +} + +void PrefixCursor::init(const String &str, UInt32 min_length) { + if ((limit_ == 0) || (offset_ > (str.length() - min_length))) { + return; + } + + UInt32 node_id = ROOT_NODE_ID; + UInt32 i; + for (i = 0; i < str.length(); ++i) { + const Base base = trie_->ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if ((key.length() >= min_length) && (key.length() <= str.length()) && + (str.substr(0, key.length()).compare(key.str(), i) == 0) && + ((key.length() < str.length()) || + ((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH))) { + buf_.push_back(base.key_pos()); + } + break; + } + + if ((i >= min_length) && + (trie_->ith_node(node_id).child() == TERMINAL_LABEL)) { + const Base linker_base = + trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + buf_.push_back(linker_base.key_pos()); + } + } + + node_id = base.offset() ^ str[i]; + if (trie_->ith_node(node_id).label() != str[i]) { + break; + } + } + + if ((i == str.length()) && + ((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH)) { + const Base base = trie_->ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if ((key.length() >= min_length) && (key.length() <= str.length())) { + buf_.push_back(base.key_pos()); + } + } else if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) { + const Base linker_base = + trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + buf_.push_back(linker_base.key_pos()); + } + } + } + + if (buf_.size() <= offset_) { + return; + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + cur_ = offset_; + end_ = (limit_ < (buf_.size() - cur_)) ? (cur_ + limit_) : buf_.size(); + } else { + cur_ = buf_.size() - offset_; + end_ = (limit_ < cur_) ? (cur_ - limit_) : 0; + } +} + +void PrefixCursor::swap(PrefixCursor *cursor) { + std::swap(trie_, cursor->trie_); + std::swap(offset_, cursor->offset_); + std::swap(limit_, cursor->limit_); + std::swap(flags_, cursor->flags_); + buf_.swap(&cursor->buf_); + std::swap(cur_, cursor->cur_); + std::swap(end_, cursor->end_); +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.hpp b/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.hpp new file mode 100644 index 00000000..07a59186 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/prefix-cursor.hpp @@ -0,0 +1,78 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "cursor.hpp" +#include "vector.hpp" + +namespace grn { +namespace dat { + +class Trie; + +class GRN_DAT_API PrefixCursor : public Cursor { + public: + PrefixCursor(); + ~PrefixCursor(); + + void open(const Trie &trie, + const String &str, + UInt32 min_length = 0, + UInt32 offset = 0, + UInt32 limit = MAX_UINT32, + UInt32 flags = 0); + + void close(); + + const Key &next(); + + UInt32 offset() const { + return offset_; + } + UInt32 limit() const { + return limit_; + } + UInt32 flags() const { + return flags_; + } + + private: + const Trie *trie_; + UInt32 offset_; + UInt32 limit_; + UInt32 flags_; + + Vector<UInt32> buf_; + UInt32 cur_; + UInt32 end_; + + PrefixCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags); + + UInt32 fix_flags(UInt32 flags) const; + void init(const String &str, UInt32 min_length); + void swap(PrefixCursor *cursor); + + // Disallows copy and assignment. + PrefixCursor(const PrefixCursor &); + PrefixCursor &operator=(const PrefixCursor &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/sources.am b/storage/mroonga/vendor/groonga/lib/dat/sources.am new file mode 100644 index 00000000..26c9f09f --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/sources.am @@ -0,0 +1,29 @@ +libgrndat_la_SOURCES = \ + cursor-factory.cpp \ + file-impl.cpp \ + file.cpp \ + id-cursor.cpp \ + key-cursor.cpp \ + predictive-cursor.cpp \ + prefix-cursor.cpp \ + trie.cpp \ + array.hpp \ + base.hpp \ + block.hpp \ + check.hpp \ + cursor-factory.hpp \ + cursor.hpp \ + dat.hpp \ + entry.hpp \ + file-impl.hpp \ + file.hpp \ + header.hpp \ + id-cursor.hpp \ + key-cursor.hpp \ + key.hpp \ + node.hpp \ + predictive-cursor.hpp \ + prefix-cursor.hpp \ + string.hpp \ + trie.hpp \ + vector.hpp diff --git a/storage/mroonga/vendor/groonga/lib/dat/string.hpp b/storage/mroonga/vendor/groonga/lib/dat/string.hpp new file mode 100644 index 00000000..aead21ca --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/string.hpp @@ -0,0 +1,173 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API String { + public: + String() + : ptr_(NULL), + length_(0) {} + String(const void *ptr, UInt32 length) + : ptr_(static_cast<const UInt8 *>(ptr)), + length_(length) {} + template <UInt32 T> + explicit String(const char (&str)[T]) + : ptr_(reinterpret_cast<const UInt8 *>(str)), + length_(T - 1) {} + String(const String &rhs) + : ptr_(rhs.ptr_), + length_(rhs.length_) {} + + String &operator=(const String &rhs) { + set_ptr(rhs.ptr()); + set_length(rhs.length()); + return *this; + } + + const UInt8 &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= length_); + return ptr_[i]; + } + + const void *ptr() const { + return ptr_; + } + UInt32 length() const { + return length_; + } + + void set_ptr(const void *x) { + ptr_ = static_cast<const UInt8 *>(x); + } + void set_length(UInt32 x) { + length_ = x; + } + + void assign(const void *ptr, UInt32 length) { + set_ptr(ptr); + set_length(length); + } + + String substr(UInt32 offset = 0) const { + return String(ptr_ + offset, length_ - offset); + } + String substr(UInt32 offset, UInt32 length) const { + return String(ptr_ + offset, length); + } + + // This function returns an integer as follows: + // - a negative value if *this < rhs, + // - zero if *this == rhs, + // - a positive value if *this > rhs, + // but if the offset is too large, the result is undefined. + int compare(const String &rhs, UInt32 offset = 0) const { + GRN_DAT_DEBUG_THROW_IF(offset > length()); + GRN_DAT_DEBUG_THROW_IF(offset > rhs.length()); + + for (UInt32 i = offset; i < length(); ++i) { + if (i >= rhs.length()) { + return 1; + } else if ((*this)[i] != rhs[i]) { + return (*this)[i] - rhs[i]; + } + } + return (length() == rhs.length()) ? 0 : -1; + } + + bool starts_with(const String &str) const { + if (length() < str.length()) { + return false; + } + for (UInt32 i = 0; i < str.length(); ++i) { + if ((*this)[i] != str[i]) { + return false; + } + } + return true; + } + + bool ends_with(const String &str) const { + if (length() < str.length()) { + return false; + } + UInt32 offset = length() - str.length(); + for (UInt32 i = 0; i < str.length(); ++i) { + if ((*this)[offset + i] != str[i]) { + return false; + } + } + return true; + } + + void swap(String *rhs) { + const UInt8 * const ptr_temp = ptr_; + ptr_ = rhs->ptr_; + rhs->ptr_ = ptr_temp; + + const UInt32 length_temp = length_; + length_ = rhs->length_; + rhs->length_ = length_temp; + } + + private: + const UInt8 *ptr_; + UInt32 length_; +}; + +inline bool operator==(const String &lhs, const String &rhs) { + if (lhs.length() != rhs.length()) { + return false; + } else if (lhs.ptr() == rhs.ptr()) { + return true; + } + for (UInt32 i = 0; i < lhs.length(); ++i) { + if (lhs[i] != rhs[i]) { + return false; + } + } + return true; +} + +inline bool operator!=(const String &lhs, const String &rhs) { + return !(lhs == rhs); +} + +inline bool operator<(const String &lhs, const String &rhs) { + return lhs.compare(rhs) < 0; +} + +inline bool operator>(const String &lhs, const String &rhs) { + return rhs < lhs; +} + +inline bool operator<=(const String &lhs, const String &rhs) { + return !(lhs > rhs); +} + +inline bool operator>=(const String &lhs, const String &rhs) { + return !(lhs < rhs); +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/trie.cpp b/storage/mroonga/vendor/groonga/lib/dat/trie.cpp new file mode 100644 index 00000000..b2c6a84f --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/trie.cpp @@ -0,0 +1,1225 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011-2015 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "trie.hpp" + +#include <algorithm> +#include <cstring> + +#include "vector.hpp" + +namespace grn { +namespace dat { +namespace { + +class StatusFlagManager { + public: + StatusFlagManager(Header *header, UInt32 status_flag) + : header_(header), status_flag_(status_flag) { + header_->set_status_flags(header_->status_flags() | status_flag_); + } + ~StatusFlagManager() { + header_->set_status_flags(header_->status_flags() & ~status_flag_); + } + + private: + Header *header_; + UInt32 status_flag_; + + // Disallows copy and assignment. + StatusFlagManager(const StatusFlagManager &); + StatusFlagManager &operator=(const StatusFlagManager &); +}; + +} // namespace + +Trie::Trie() + : file_(), + header_(NULL), + nodes_(), + blocks_(), + entries_(), + key_buf_() {} + +Trie::~Trie() {} + +void Trie::create(const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + double num_nodes_per_key, + double average_key_length) { + GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); + + if (num_nodes_per_key < 1.0) { + num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY; + } + if (num_nodes_per_key > MAX_NUM_NODES_PER_KEY) { + num_nodes_per_key = MAX_NUM_NODES_PER_KEY; + } + GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key < 1.0); + GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key > MAX_NUM_NODES_PER_KEY); + + if (average_key_length < 1.0) { + average_key_length = DEFAULT_AVERAGE_KEY_LENGTH; + } + GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length < 1.0); + GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length > MAX_KEY_LENGTH); + + if (max_num_keys == 0) { + if (file_size == 0) { + file_size = DEFAULT_FILE_SIZE; + } else { + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); + GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); + } + } else { + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); + } + + Trie new_trie; + new_trie.create_file(file_name, file_size, max_num_keys, + num_nodes_per_key, average_key_length); + new_trie.swap(this); +} + +void Trie::create(const Trie &trie, + const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + double num_nodes_per_key, + double average_key_length) { + GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); + + if (num_nodes_per_key < 1.0) { + if (trie.num_keys() == 0) { + num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY; + } else { + num_nodes_per_key = 1.0 * trie.num_nodes() / trie.num_keys(); + if (num_nodes_per_key > MAX_NUM_NODES_PER_KEY) { + num_nodes_per_key = MAX_NUM_NODES_PER_KEY; + } + } + } + GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key < 1.0); + GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key > MAX_NUM_NODES_PER_KEY); + + if (average_key_length < 1.0) { + if (trie.num_keys() == 0) { + average_key_length = DEFAULT_AVERAGE_KEY_LENGTH; + } else { + average_key_length = 1.0 * trie.total_key_length() / trie.num_keys(); + } + } + GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length < 1.0); + GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length > MAX_KEY_LENGTH); + + if (max_num_keys == 0) { + if (file_size == 0) { + file_size = trie.file_size(); + } + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); + GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < trie.virtual_size()); + } else { + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.num_keys()); + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.max_key_id()); + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); + } + + Trie new_trie; + new_trie.create_file(file_name, file_size, max_num_keys, + num_nodes_per_key, average_key_length); + new_trie.build_from_trie(trie); + new_trie.swap(this); +} + +void Trie::repair(const Trie &trie, const char *file_name) { + Trie new_trie; + new_trie.create_file(file_name, trie.file_size(), trie.max_num_keys(), + trie.max_num_blocks(), trie.key_buf_size()); + new_trie.repair_trie(trie); + new_trie.swap(this); +} + +void Trie::open(const char *file_name) { + GRN_DAT_THROW_IF(PARAM_ERROR, file_name == NULL); + + Trie new_trie; + new_trie.open_file(file_name); + new_trie.swap(this); +} + +void Trie::close() { + Trie().swap(this); +} + +void Trie::swap(Trie *trie) { + file_.swap(&trie->file_); + std::swap(header_, trie->header_); + nodes_.swap(&trie->nodes_); + blocks_.swap(&trie->blocks_); + entries_.swap(&trie->entries_); + key_buf_.swap(&trie->key_buf_); +} + +void Trie::flush() { + file_.flush(); +} + +void Trie::create_file(const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + double num_nodes_per_key, + double average_key_length) { + GRN_DAT_THROW_IF(PARAM_ERROR, (file_size == 0) && (max_num_keys == 0)); + GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); + if (max_num_keys == 0) { + const UInt64 avail = file_size - sizeof(Header); + const double num_bytes_per_key = (sizeof(Node) * num_nodes_per_key) + + (1.0 * sizeof(Block) / BLOCK_SIZE * num_nodes_per_key) + + sizeof(Entry) + + sizeof(UInt32) + sizeof(UInt8) + average_key_length + 1.5; + if ((avail / num_bytes_per_key) > MAX_NUM_KEYS) { + max_num_keys = MAX_NUM_KEYS; + } else { + max_num_keys = (UInt32)(avail / num_bytes_per_key); + } + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys == 0); + } + + UInt32 max_num_blocks; + { + const double max_num_nodes = num_nodes_per_key * max_num_keys; + GRN_DAT_THROW_IF(PARAM_ERROR, (max_num_nodes - 1.0) >= MAX_NUM_NODES); + max_num_blocks = ((UInt32)max_num_nodes + BLOCK_SIZE - 1) / BLOCK_SIZE; + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks == 0); + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks > MAX_NUM_BLOCKS); + } + + UInt32 key_buf_size; + if (file_size == 0) { + const double total_key_length = average_key_length * max_num_keys; + GRN_DAT_THROW_IF(PARAM_ERROR, + (total_key_length - 1.0) >= MAX_TOTAL_KEY_LENGTH); + + // The last term is the estimated number of bytes that will be used for + // 32-bit alignment. + const UInt64 total_num_bytes = (UInt64)(total_key_length) + + (UInt64)(sizeof(UInt32) + sizeof(UInt8)) * max_num_keys + + (UInt32)(max_num_keys * 1.5); + GRN_DAT_THROW_IF(PARAM_ERROR, + (total_num_bytes / sizeof(UInt32)) >= MAX_KEY_BUF_SIZE); + key_buf_size = (UInt32)(total_num_bytes / sizeof(UInt32)); + + file_size = sizeof(Header) + + (sizeof(Block) * max_num_blocks) + + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) + + (sizeof(Entry) * max_num_keys) + + (sizeof(UInt32) * key_buf_size); + } else { + const UInt64 avail = file_size - sizeof(Header) + - (sizeof(Block) * max_num_blocks) + - (sizeof(Node) * BLOCK_SIZE * max_num_blocks) + - (sizeof(Entry) * max_num_keys); + GRN_DAT_THROW_IF(PARAM_ERROR, (avail / sizeof(UInt32)) > MAX_KEY_BUF_SIZE); + key_buf_size = (UInt32)(avail / sizeof(UInt32)); + } + + create_file(file_name, file_size, max_num_keys, + max_num_blocks, key_buf_size); +} + +void Trie::create_file(const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + UInt32 max_num_blocks, + UInt32 key_buf_size) { + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < (sizeof(Header) + + (sizeof(Block) * max_num_blocks) + + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) + + (sizeof(Entry) * max_num_keys) + + (sizeof(UInt32) * key_buf_size))); + + file_.create(file_name, file_size); + + Header * const header = static_cast<Header *>(file_.ptr()); + *header = Header(); + header->set_file_size(file_size); + header->set_max_num_keys(max_num_keys); + header->set_max_num_blocks(max_num_blocks); + header->set_key_buf_size(key_buf_size); + + map_address(file_.ptr()); + + reserve_node(ROOT_NODE_ID); + ith_node(INVALID_OFFSET).set_is_offset(true); +} + +void Trie::open_file(const char *file_name) { + GRN_DAT_THROW_IF(PARAM_ERROR, file_name == NULL); + + file_.open(file_name); + map_address(file_.ptr()); + GRN_DAT_THROW_IF(FORMAT_ERROR, file_size() != file_.size()); +} + +void Trie::map_address(void *address) { + GRN_DAT_THROW_IF(PARAM_ERROR, address == NULL); + + header_ = static_cast<Header *>(address); + nodes_.assign(header_ + 1, max_num_nodes()); + blocks_.assign(nodes_.end(), max_num_blocks()); + entries_.assign(reinterpret_cast<Entry *>(blocks_.end()) - 1, + max_num_keys() + 1); + key_buf_.assign(entries_.end(), key_buf_size()); + + GRN_DAT_THROW_IF(UNEXPECTED_ERROR, static_cast<void *>(key_buf_.end()) + > static_cast<void *>(static_cast<char *>(address) + file_size())); +} + +void Trie::build_from_trie(const Trie &trie) { + GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.num_keys()); + GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.max_key_id()); + + header_->set_total_key_length(trie.total_key_length()); + header_->set_num_keys(trie.num_keys()); + header_->set_max_key_id(trie.max_key_id()); + header_->set_next_key_id(trie.next_key_id()); + for (UInt32 i = min_key_id(); i <= max_key_id(); ++i) { + ith_entry(i) = trie.ith_entry(i); + } + build_from_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID); +} + +void Trie::build_from_trie(const Trie &trie, UInt32 src, UInt32 dest) { + // Keys are sorted in lexicographic order. + if (trie.ith_node(src).is_linker()) { + const Key &key = trie.get_key(trie.ith_node(src).key_pos()); + Key::create(key_buf_.ptr() + next_key_pos(), + key.id(), key.ptr(), key.length()); + ith_node(dest).set_key_pos(next_key_pos()); + ith_entry(key.id()).set_key_pos(next_key_pos()); + header_->set_next_key_pos( + next_key_pos() + Key::estimate_size(key.length())); + return; + } + + const UInt32 src_offset = trie.ith_node(src).offset(); + UInt32 dest_offset; + { + UInt16 labels[MAX_LABEL + 1]; + UInt32 num_labels = 0; + + UInt32 label = trie.ith_node(src).child(); + while (label != INVALID_LABEL) { + GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); + const UInt32 child = src_offset ^ label; + if (trie.ith_node(child).is_linker() || + (trie.ith_node(child).child() != INVALID_LABEL)) { + labels[num_labels++] = label; + } + label = trie.ith_node(child).sibling(); + } + if (num_labels == 0) { + return; + } + + dest_offset = find_offset(labels, num_labels); + for (UInt32 i = 0; i < num_labels; ++i) { + const UInt32 child = dest_offset ^ labels[i]; + reserve_node(child); + ith_node(child).set_label(labels[i]); + if ((i + 1) < num_labels) { + ith_node(child).set_sibling(labels[i + 1]); + } + } + + GRN_DAT_DEBUG_THROW_IF(ith_node(dest_offset).is_offset()); + ith_node(dest_offset).set_is_offset(true); + ith_node(dest).set_offset(dest_offset); + ith_node(dest).set_child(labels[0]); + } + + UInt32 label = ith_node(dest).child(); + while (label != INVALID_LABEL) { + build_from_trie(trie, src_offset ^ label, dest_offset ^ label); + label = ith_node(dest_offset ^ label).sibling(); + } +} + +void Trie::repair_trie(const Trie &trie) { + Vector<UInt32> valid_ids; + header_->set_max_key_id(trie.max_key_id()); + header_->set_next_key_id(trie.max_key_id() + 1); + UInt32 prev_invalid_key_id = INVALID_KEY_ID; + for (UInt32 i = min_key_id(); i <= max_key_id(); ++i) { + const Entry &entry = trie.ith_entry(i); + if (entry.is_valid()) { + valid_ids.push_back(i); + ith_entry(i) = entry; + const Key &key = trie.get_key(entry.key_pos()); + Key::create(key_buf_.ptr() + next_key_pos(), + key.id(), key.ptr(), key.length()); + ith_entry(i).set_key_pos(next_key_pos()); + header_->set_next_key_pos( + next_key_pos() + Key::estimate_size(key.length())); + header_->set_total_key_length( + total_key_length() + key.length()); + header_->set_num_keys(num_keys() + 1); + } else { + if (prev_invalid_key_id == INVALID_KEY_ID) { + header_->set_next_key_id(i); + } else { + ith_entry(prev_invalid_key_id).set_next(i); + } + prev_invalid_key_id = i; + } + } + if (prev_invalid_key_id != INVALID_KEY_ID) { + ith_entry(prev_invalid_key_id).set_next(max_key_id() + 1); + } + mkq_sort(valid_ids.begin(), valid_ids.end(), 0); + build_from_keys(valid_ids.begin(), valid_ids.end(), 0, ROOT_NODE_ID); +} + +void Trie::build_from_keys(const UInt32 *begin, const UInt32 *end, + UInt32 depth, UInt32 node_id) { + if ((end - begin) == 1) { + ith_node(node_id).set_key_pos(ith_entry(*begin).key_pos()); + return; + } + + UInt32 offset; + { + UInt16 labels[MAX_LABEL + 2]; + UInt32 num_labels = 0; + + const UInt32 *it = begin; + if (ith_key(*it).length() == depth) { + labels[num_labels++] = TERMINAL_LABEL; + ++it; + } + + labels[num_labels++] = (UInt8)ith_key(*it)[depth]; + for (++it; it < end; ++it) { + const Key &key = ith_key(*it); + if ((UInt8)key[depth] != labels[num_labels - 1]) { + labels[num_labels++] = (UInt8)key[depth]; + } + } + labels[num_labels] = INVALID_LABEL; + + offset = find_offset(labels, num_labels); + ith_node(node_id).set_child(labels[0]); + for (UInt32 i = 0; i < num_labels; ++i) { + const UInt32 next = offset ^ labels[i]; + reserve_node(next); + ith_node(next).set_label(labels[i]); + ith_node(next).set_sibling(labels[i + 1]); + } + + if (offset >= num_nodes()) { + reserve_block(num_blocks()); + } + ith_node(offset).set_is_offset(true); + ith_node(node_id).set_offset(offset); + } + + if (ith_key(*begin).length() == depth) { + build_from_keys(begin, begin + 1, depth + 1, offset ^ TERMINAL_LABEL); + ++begin; + } + + UInt16 label = ith_key(*begin)[depth]; + for (const UInt32 *it = begin + 1; it < end; ++it) { + const Key &key = ith_key(*it); + if ((UInt8)key[depth] != label) { + build_from_keys(begin, it, depth + 1, offset ^ label); + label = (UInt8)key[depth]; + begin = it; + } + } + build_from_keys(begin, end, depth + 1, offset ^ label); +} + +void Trie::mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth) { + while ((r - l) >= MKQ_SORT_THRESHOLD) { + UInt32 *pl = l; + UInt32 *pr = r; + UInt32 *pivot_l = l; + UInt32 *pivot_r = r; + + const int pivot = get_median(*l, *(l + (r - l) / 2), *(r - 1), depth); + for ( ; ; ) { + while (pl < pr) { + const int label = get_label(*pl, depth); + if (label > pivot) { + break; + } else if (label == pivot) { + swap_ids(pl, pivot_l); + ++pivot_l; + } + ++pl; + } + while (pl < pr) { + const int label = get_label(*--pr, depth); + if (label < pivot) { + break; + } else if (label == pivot) { + swap_ids(pr, --pivot_r); + } + } + if (pl >= pr) { + break; + } + swap_ids(pl, pr); + ++pl; + } + while (pivot_l > l) { + swap_ids(--pivot_l, --pl); + } + while (pivot_r < r) { + swap_ids(pivot_r, pr); + ++pivot_r; + ++pr; + } + + if (((pl - l) > (pr - pl)) || ((r - pr) > (pr - pl))) { + if ((pr - pl) > 1) { + mkq_sort(pl, pr, depth + 1); + } + + if ((pl - l) < (r - pr)) { + if ((pl - l) > 1) { + mkq_sort(l, pl, depth); + } + l = pr; + } else { + if ((r - pr) > 1) { + mkq_sort(pr, r, depth); + } + r = pl; + } + } else { + if ((pl - l) > 1) { + mkq_sort(l, pl, depth); + } + + if ((r - pr) > 1) { + mkq_sort(pr, r, depth); + } + + l = pl, r = pr; + if ((pr - pl) > 1) { + ++depth; + } + } + } + + if ((r - l) > 1) { + insertion_sort(l, r, depth); + } +} + +void Trie::insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth) { + for (UInt32 *i = l + 1; i < r; ++i) { + for (UInt32 *j = i; j > l; --j) { + if (less_than(*(j - 1), *j, depth)) { + break; + } + swap_ids(j - 1, j); + } + } +} + +int Trie::get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const { + const int x = get_label(a, depth); + const int y = get_label(b, depth); + const int z = get_label(c, depth); + if (x < y) { + if (y < z) { + return y; + } else if (x < z) { + return z; + } + return x; + } else if (x < z) { + return x; + } else if (y < z) { + return z; + } + return y; +} + +int Trie::get_label(UInt32 key_id, UInt32 depth) const { + const Key &key = ith_key(key_id); + if (depth == key.length()) { + return -1; + } else { + return (UInt8)key[depth]; + } +} + +bool Trie::less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const { + const Key &lhs_key = ith_key(lhs); + const Key &rhs_key = ith_key(rhs); + const UInt32 length = (lhs_key.length() < rhs_key.length()) ? + lhs_key.length() : rhs_key.length(); + for (UInt32 i = depth; i < length; ++i) { + if (lhs_key[i] != rhs_key[i]) { + return (UInt8)lhs_key[i] < (UInt8)rhs_key[i]; + } + } + return lhs_key.length() < rhs_key.length(); +} + +void Trie::swap_ids(UInt32 *lhs, UInt32 *rhs) { + UInt32 temp = *lhs; + *lhs = *rhs; + *rhs = temp; +} + +bool Trie::search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + if (!search_linker(ptr, length, node_id, query_pos)) { + return false; + } + + const Base base = ith_node(node_id).base(); + if (!base.is_linker()) { + return false; + } + + if (get_key(base.key_pos()).equals_to(ptr, length, query_pos)) { + if (key_pos != NULL) { + *key_pos = base.key_pos(); + } + return true; + } + return false; +} + +bool Trie::search_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 &query_pos) const { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + for ( ; query_pos < length; ++query_pos) { + const Base base = ith_node(node_id).base(); + if (base.is_linker()) { + return true; + } + + const UInt32 next = base.offset() ^ ptr[query_pos]; + if (ith_node(next).label() != ptr[query_pos]) { + return false; + } + node_id = next; + } + + const Base base = ith_node(node_id).base(); + if (base.is_linker()) { + return true; + } + + const UInt32 next = base.offset() ^ TERMINAL_LABEL; + if (ith_node(next).label() != TERMINAL_LABEL) { + return false; + } + node_id = next; + return ith_node(next).is_linker(); +} + +bool Trie::lcp_search_key(const UInt8 *ptr, UInt32 length, + UInt32 *key_pos) const { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + bool found = false; + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + + for ( ; query_pos < length; ++query_pos) { + const Base base = ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = get_key(base.key_pos()); + if ((key.length() <= length) && + key.equals_to(ptr, key.length(), query_pos)) { + if (key_pos != NULL) { + *key_pos = base.key_pos(); + } + found = true; + } + return found; + } + + if (ith_node(node_id).child() == TERMINAL_LABEL) { + const Base linker_base = + ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + if (key_pos != NULL) { + *key_pos = linker_base.key_pos(); + } + found = true; + } + } + + node_id = base.offset() ^ ptr[query_pos]; + if (ith_node(node_id).label() != ptr[query_pos]) { + return found; + } + } + + const Base base = ith_node(node_id).base(); + if (base.is_linker()) { + const Key &key = get_key(base.key_pos()); + if (key.length() <= length) { + if (key_pos != NULL) { + *key_pos = base.key_pos(); + } + found = true; + } + } else if (ith_node(node_id).child() == TERMINAL_LABEL) { + const Base linker_base = ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + if (key_pos != NULL) { + *key_pos = linker_base.key_pos(); + } + found = true; + } + } + return found; +} + +bool Trie::remove_key(const UInt8 *ptr, UInt32 length) { + GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); + StatusFlagManager status_flag_manager(header_, REMOVING_FLAG); + + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + if (!search_linker(ptr, length, node_id, query_pos)) { + return false; + } + + const UInt32 key_pos = ith_node(node_id).key_pos(); + if (!get_key(key_pos).equals_to(ptr, length, query_pos)) { + return false; + } + + const UInt32 key_id = get_key(key_pos).id(); + ith_node(node_id).set_offset(INVALID_OFFSET); + ith_entry(key_id).set_next(next_key_id()); + + header_->set_next_key_id(key_id); + header_->set_total_key_length(total_key_length() - length); + header_->set_num_keys(num_keys() - 1); + return true; +} + +bool Trie::insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) { + GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); + StatusFlagManager status_flag_manager(header_, INSERTING_FLAG); + + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + + search_linker(ptr, length, node_id, query_pos); + if (!insert_linker(ptr, length, node_id, query_pos)) { + if (key_pos != NULL) { + *key_pos = ith_node(node_id).key_pos(); + } + return false; + } + + const UInt32 new_key_id = next_key_id(); + const UInt32 new_key_pos = append_key(ptr, length, new_key_id); + + header_->set_total_key_length(total_key_length() + length); + header_->set_num_keys(num_keys() + 1); + if (new_key_id > max_key_id()) { + header_->set_max_key_id(new_key_id); + header_->set_next_key_id(new_key_id + 1); + } else { + header_->set_next_key_id(ith_entry(new_key_id).next()); + } + + ith_entry(new_key_id).set_key_pos(new_key_pos); + ith_node(node_id).set_key_pos(new_key_pos); + if (key_pos != NULL) { + *key_pos = new_key_pos; + } + return true; +} + +bool Trie::insert_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 query_pos) { + if (ith_node(node_id).is_linker()) { + const Key &key = get_key(ith_node(node_id).key_pos()); + UInt32 i = query_pos; + while ((i < length) && (i < key.length())) { + if (ptr[i] != key[i]) { + break; + } + ++i; + } + if ((i == length) && (i == key.length())) { + return false; + } + GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); + GRN_DAT_DEBUG_THROW_IF(next_key_id() > max_num_keys()); + + for (UInt32 j = query_pos; j < i; ++j) { + node_id = insert_node(node_id, ptr[j]); + } + node_id = separate(ptr, length, node_id, i); + return true; + } else if (ith_node(node_id).label() == TERMINAL_LABEL) { + return true; + } else { + GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); + const UInt16 label = (query_pos < length) ? + (UInt16)ptr[query_pos] : (UInt16)TERMINAL_LABEL; + const Base base = ith_node(node_id).base(); + if ((base.offset() == INVALID_OFFSET) || + !ith_node(base.offset() ^ label).is_phantom()) { + resolve(node_id, label); + } + node_id = insert_node(node_id, label); + return true; + } +} + +bool Trie::update_key(const Key &key, const UInt8 *ptr, UInt32 length, + UInt32 *key_pos) { + GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); + StatusFlagManager status_flag_manager(header_, UPDATING_FLAG); + + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); + + if (!key.is_valid()) { + return false; + } + + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + + search_linker(ptr, length, node_id, query_pos); + if (!insert_linker(ptr, length, node_id, query_pos)) { + if (key_pos != NULL) { + *key_pos = ith_node(node_id).key_pos(); + } + return false; + } + + const UInt32 new_key_pos = append_key(ptr, length, key.id()); + header_->set_total_key_length(total_key_length() + length - key.length()); + ith_entry(key.id()).set_key_pos(new_key_pos); + ith_node(node_id).set_key_pos(new_key_pos); + if (key_pos != NULL) { + *key_pos = new_key_pos; + } + + node_id = ROOT_NODE_ID; + query_pos = 0; + GRN_DAT_THROW_IF(UNEXPECTED_ERROR, + !search_linker(static_cast<const UInt8 *>(key.ptr()), key.length(), + node_id, query_pos)); + ith_node(node_id).set_offset(INVALID_OFFSET); + return true; +} + +UInt32 Trie::insert_node(UInt32 node_id, UInt16 label) { + GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); + GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); + + const Base base = ith_node(node_id).base(); + UInt32 offset; + if (base.is_linker() || (base.offset() == INVALID_OFFSET)) { + offset = find_offset(&label, 1); + } else { + offset = base.offset(); + } + + const UInt32 next = offset ^ label; + reserve_node(next); + + ith_node(next).set_label(label); + if (base.is_linker()) { + GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); + ith_node(offset).set_is_offset(true); + ith_node(next).set_key_pos(base.key_pos()); + } else if (base.offset() == INVALID_OFFSET) { + GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); + ith_node(offset).set_is_offset(true); + } else { + GRN_DAT_DEBUG_THROW_IF(!ith_node(offset).is_offset()); + } + ith_node(node_id).set_offset(offset); + + const UInt32 child_label = ith_node(node_id).child(); + GRN_DAT_DEBUG_THROW_IF(child_label == label); + if (child_label == INVALID_LABEL) { + ith_node(node_id).set_child(label); + } else if ((label == TERMINAL_LABEL) || + ((child_label != TERMINAL_LABEL) && (label < child_label))) { + GRN_DAT_DEBUG_THROW_IF(ith_node(offset ^ child_label).is_phantom()); + GRN_DAT_DEBUG_THROW_IF(ith_node(offset ^ child_label).label() != child_label); + ith_node(next).set_sibling(child_label); + ith_node(node_id).set_child(label); + } else { + UInt32 prev = offset ^ child_label; + GRN_DAT_DEBUG_THROW_IF(ith_node(prev).label() != child_label); + UInt32 sibling_label = ith_node(prev).sibling(); + while (label > sibling_label) { + prev = offset ^ sibling_label; + GRN_DAT_DEBUG_THROW_IF(ith_node(prev).label() != sibling_label); + sibling_label = ith_node(prev).sibling(); + } + GRN_DAT_DEBUG_THROW_IF(label == sibling_label); + ith_node(next).set_sibling(ith_node(prev).sibling()); + ith_node(prev).set_sibling(label); + } + return next; +} + +UInt32 Trie::append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id) { + GRN_DAT_THROW_IF(SIZE_ERROR, key_id > max_num_keys()); + + const UInt32 key_pos = next_key_pos(); + const UInt32 key_size = Key::estimate_size(length); + + GRN_DAT_THROW_IF(SIZE_ERROR, key_size > (key_buf_size() - key_pos)); + Key::create(key_buf_.ptr() + key_pos, key_id, ptr, length); + + header_->set_next_key_pos(key_pos + key_size); + return key_pos; +} + +UInt32 Trie::separate(const UInt8 *ptr, UInt32 length, + UInt32 node_id, UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); + GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_linker()); + GRN_DAT_DEBUG_THROW_IF(i > length); + + const UInt32 key_pos = ith_node(node_id).key_pos(); + const Key &key = get_key(key_pos); + + UInt16 labels[2]; + labels[0] = (i < key.length()) ? (UInt16)key[i] : (UInt16)TERMINAL_LABEL; + labels[1] = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL; + GRN_DAT_DEBUG_THROW_IF(labels[0] == labels[1]); + + const UInt32 offset = find_offset(labels, 2); + + UInt32 next = offset ^ labels[0]; + reserve_node(next); + GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); + + ith_node(next).set_label(labels[0]); + ith_node(next).set_key_pos(key_pos); + + next = offset ^ labels[1]; + reserve_node(next); + + ith_node(next).set_label(labels[1]); + + ith_node(offset).set_is_offset(true); + ith_node(node_id).set_offset(offset); + + if ((labels[0] == TERMINAL_LABEL) || + ((labels[1] != TERMINAL_LABEL) && (labels[0] < labels[1]))) { + ith_node(node_id).set_child(labels[0]); + ith_node(offset ^ labels[0]).set_sibling(labels[1]); + } else { + ith_node(node_id).set_child(labels[1]); + ith_node(offset ^ labels[1]).set_sibling(labels[0]); + } + return next; +} + +void Trie::resolve(UInt32 node_id, UInt16 label) { + GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); + GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); + GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); + + UInt32 offset = ith_node(node_id).offset(); + if (offset != INVALID_OFFSET) { + UInt16 labels[MAX_LABEL + 1]; + UInt32 num_labels = 0; + + UInt32 next_label = ith_node(node_id).child(); + GRN_DAT_DEBUG_THROW_IF(next_label == INVALID_LABEL); + while (next_label != INVALID_LABEL) { + GRN_DAT_DEBUG_THROW_IF(next_label > MAX_LABEL); + labels[num_labels++] = next_label; + next_label = ith_node(offset ^ next_label).sibling(); + } + GRN_DAT_DEBUG_THROW_IF(num_labels == 0); + + labels[num_labels] = label; + offset = find_offset(labels, num_labels + 1); + migrate_nodes(node_id, offset, labels, num_labels); + } else { + offset = find_offset(&label, 1); + if (offset >= num_nodes()) { + GRN_DAT_DEBUG_THROW_IF((offset / BLOCK_SIZE) != num_blocks()); + reserve_block(num_blocks()); + } + ith_node(offset).set_is_offset(true); + ith_node(node_id).set_offset(offset); + } +} + +void Trie::migrate_nodes(UInt32 node_id, UInt32 dest_offset, + const UInt16 *labels, UInt32 num_labels) { + GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); + GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); + GRN_DAT_DEBUG_THROW_IF(labels == NULL); + GRN_DAT_DEBUG_THROW_IF(num_labels == 0); + GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); + + const UInt32 src_offset = ith_node(node_id).offset(); + GRN_DAT_DEBUG_THROW_IF(src_offset == INVALID_OFFSET); + GRN_DAT_DEBUG_THROW_IF(!ith_node(src_offset).is_offset()); + + for (UInt32 i = 0; i < num_labels; ++i) { + const UInt32 src_node_id = src_offset ^ labels[i]; + const UInt32 dest_node_id = dest_offset ^ labels[i]; + GRN_DAT_DEBUG_THROW_IF(ith_node(src_node_id).is_phantom()); + GRN_DAT_DEBUG_THROW_IF(ith_node(src_node_id).label() != labels[i]); + + reserve_node(dest_node_id); + ith_node(dest_node_id).set_except_is_offset( + ith_node(src_node_id).except_is_offset()); + ith_node(dest_node_id).set_base(ith_node(src_node_id).base()); + } + header_->set_num_zombies(num_zombies() + num_labels); + + GRN_DAT_DEBUG_THROW_IF(ith_node(dest_offset).is_offset()); + ith_node(dest_offset).set_is_offset(true); + ith_node(node_id).set_offset(dest_offset); +} + +UInt32 Trie::find_offset(const UInt16 *labels, UInt32 num_labels) { + GRN_DAT_DEBUG_THROW_IF(labels == NULL); + GRN_DAT_DEBUG_THROW_IF(num_labels == 0); + GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); + + // Blocks are tested in descending order of level. Basically, a lower level + // block contains more phantom nodes. + UInt32 level = 1; + while (num_labels >= (1U << level)) { + ++level; + } + level = (level < MAX_BLOCK_LEVEL) ? (MAX_BLOCK_LEVEL - level) : 0; + + UInt32 block_count = 0; + do { + UInt32 leader = header_->ith_leader(level); + if (leader == INVALID_LEADER) { + // This level has no blocks and it is thus skipped. + continue; + } + + UInt32 block_id = leader; + do { + const Block &block = ith_block(block_id); + GRN_DAT_DEBUG_THROW_IF(block.level() != level); + + const UInt32 first = (block_id * BLOCK_SIZE) | block.first_phantom(); + UInt32 node_id = first; + do { + GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_phantom()); + const UInt32 offset = node_id ^ labels[0]; + if (!ith_node(offset).is_offset()) { + UInt32 i = 1; + for ( ; i < num_labels; ++i) { + if (!ith_node(offset ^ labels[i]).is_phantom()) { + break; + } + } + if (i >= num_labels) { + return offset; + } + } + node_id = (block_id * BLOCK_SIZE) | ith_node(node_id).next(); + } while (node_id != first); + + const UInt32 prev = block_id; + const UInt32 next = block.next(); + block_id = next; + ith_block(prev).set_failure_count(ith_block(prev).failure_count() + 1); + + // The level of a block is updated when this function fails many times, + // actually MAX_FAILURE_COUNT times, in that block. + if (ith_block(prev).failure_count() == MAX_FAILURE_COUNT) { + update_block_level(prev, level + 1); + if (next == leader) { + break; + } else { + // Note that the leader might be updated in the level update. + leader = header_->ith_leader(level); + continue; + } + } + } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != leader)); + } while ((block_count < MAX_BLOCK_COUNT) && (level-- != 0)); + + return num_nodes() ^ labels[0]; +} + +void Trie::reserve_node(UInt32 node_id) { + GRN_DAT_DEBUG_THROW_IF(node_id > num_nodes()); + if (node_id >= num_nodes()) { + reserve_block(node_id / BLOCK_SIZE); + } + + Node &node = ith_node(node_id); + GRN_DAT_DEBUG_THROW_IF(!node.is_phantom()); + + const UInt32 block_id = node_id / BLOCK_SIZE; + Block &block = ith_block(block_id); + GRN_DAT_DEBUG_THROW_IF(block.num_phantoms() == 0); + + const UInt32 next = (block_id * BLOCK_SIZE) | node.next(); + const UInt32 prev = (block_id * BLOCK_SIZE) | node.prev(); + GRN_DAT_DEBUG_THROW_IF(next >= num_nodes()); + GRN_DAT_DEBUG_THROW_IF(prev >= num_nodes()); + + if ((node_id & BLOCK_MASK) == block.first_phantom()) { + // The first phantom node is removed from the block and the second phantom + // node comes first. + block.set_first_phantom(next & BLOCK_MASK); + } + + ith_node(next).set_prev(prev & BLOCK_MASK); + ith_node(prev).set_next(next & BLOCK_MASK); + + if (block.level() != MAX_BLOCK_LEVEL) { + const UInt32 threshold = 1U << ((MAX_BLOCK_LEVEL - block.level() - 1) * 2); + if (block.num_phantoms() == threshold) { + update_block_level(block_id, block.level() + 1); + } + } + block.set_num_phantoms(block.num_phantoms() - 1); + + node.set_is_phantom(false); + + GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET); + GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL); + + header_->set_num_phantoms(num_phantoms() - 1); +} + +void Trie::reserve_block(UInt32 block_id) { + GRN_DAT_DEBUG_THROW_IF(block_id != num_blocks()); + GRN_DAT_THROW_IF(SIZE_ERROR, block_id >= max_num_blocks()); + + header_->set_num_blocks(block_id + 1); + ith_block(block_id).set_failure_count(0); + ith_block(block_id).set_first_phantom(0); + ith_block(block_id).set_num_phantoms(BLOCK_SIZE); + + const UInt32 begin = block_id * BLOCK_SIZE; + const UInt32 end = begin + BLOCK_SIZE; + GRN_DAT_DEBUG_THROW_IF(end != num_nodes()); + + Base base; + base.set_offset(INVALID_OFFSET); + + Check check; + check.set_is_phantom(true); + + for (UInt32 i = begin; i < end; ++i) { + check.set_prev((i - 1) & BLOCK_MASK); + check.set_next((i + 1) & BLOCK_MASK); + ith_node(i).set_base(base); + ith_node(i).set_check(check); + } + + // The level of the new block is 0. + set_block_level(block_id, 0); + header_->set_num_phantoms(num_phantoms() + BLOCK_SIZE); +} + +void Trie::update_block_level(UInt32 block_id, UInt32 level) { + GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); + GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); + + unset_block_level(block_id); + set_block_level(block_id, level); +} + +void Trie::set_block_level(UInt32 block_id, UInt32 level) { + GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); + GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); + + const UInt32 leader = header_->ith_leader(level); + if (leader == INVALID_LEADER) { + // The new block becomes the only one block of the linked list. + ith_block(block_id).set_next(block_id); + ith_block(block_id).set_prev(block_id); + header_->set_ith_leader(level, block_id); + } else { + // The new block is added to the end of the list. + const UInt32 next = leader; + const UInt32 prev = ith_block(leader).prev(); + GRN_DAT_DEBUG_THROW_IF(next >= num_blocks()); + GRN_DAT_DEBUG_THROW_IF(prev >= num_blocks()); + ith_block(block_id).set_next(next); + ith_block(block_id).set_prev(prev); + ith_block(next).set_prev(block_id); + ith_block(prev).set_next(block_id); + } + ith_block(block_id).set_level(level); + ith_block(block_id).set_failure_count(0); +} + +void Trie::unset_block_level(UInt32 block_id) { + GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); + + const UInt32 level = ith_block(block_id).level(); + GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); + + const UInt32 leader = header_->ith_leader(level); + GRN_DAT_DEBUG_THROW_IF(leader == INVALID_LEADER); + + const UInt32 next = ith_block(block_id).next(); + const UInt32 prev = ith_block(block_id).prev(); + GRN_DAT_DEBUG_THROW_IF(next >= num_blocks()); + GRN_DAT_DEBUG_THROW_IF(prev >= num_blocks()); + + if (next == block_id) { + // The linked list becomes empty. + header_->set_ith_leader(level, INVALID_LEADER); + } else { + ith_block(next).set_prev(prev); + ith_block(prev).set_next(next); + if (block_id == leader) { + // The second block becomes the leader of the linked list. + header_->set_ith_leader(level, next); + } + } +} + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/trie.hpp b/storage/mroonga/vendor/groonga/lib/dat/trie.hpp new file mode 100644 index 00000000..bf7d0b98 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/trie.hpp @@ -0,0 +1,285 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "array.hpp" +#include "header.hpp" +#include "node.hpp" +#include "block.hpp" +#include "entry.hpp" +#include "key.hpp" +#include "file.hpp" + +namespace grn { +namespace dat { + +class GRN_DAT_API Trie { + public: + Trie(); + ~Trie(); + + void create(const char *file_name = NULL, + UInt64 file_size = 0, + UInt32 max_num_keys = 0, + double num_nodes_per_key = 0.0, + double average_key_length = 0.0); + + void create(const Trie &trie, + const char *file_name = NULL, + UInt64 file_size = 0, + UInt32 max_num_keys = 0, + double num_nodes_per_key = 0.0, + double average_key_length = 0.0); + + void repair(const Trie &trie, const char *file_name = NULL); + + void open(const char *file_name); + void close(); + + void swap(Trie *trie); + + // Users can access a key by its position or ID. + const Key &get_key(UInt32 key_pos) const { + GRN_DAT_DEBUG_THROW_IF(key_pos >= next_key_pos()); + return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos); + } + // If a specified ID is invalid, e.g. the key is already deleted, this + // function returns a reference to an invalid key object whose id() returns + // INVALID_KEY_ID. + const Key &ith_key(UInt32 key_id) const { + if ((key_id >= min_key_id()) && (key_id <= max_key_id()) && + ith_entry(key_id).is_valid()) { + return get_key(ith_entry(key_id).key_pos()); + } + return Key::invalid_key(); + } + + bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const { + return search_key(static_cast<const UInt8 *>(ptr), length, key_pos); + } + // Longest prefix match search. + bool lcp_search(const void *ptr, UInt32 length, + UInt32 *key_pos = NULL) const { + return lcp_search_key(static_cast<const UInt8 *>(ptr), length, key_pos); + } + + bool remove(UInt32 key_id) { + const Key &key = ith_key(key_id); + if (key.is_valid()) { + return remove(key.ptr(), key.length()); + } + return false; + } + bool remove(const void *ptr, UInt32 length) { + return remove_key(static_cast<const UInt8 *>(ptr), length); + } + + bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) { + return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos); + } + + bool update(UInt32 key_id, const void *ptr, UInt32 length, + UInt32 *key_pos = NULL) { + return update_key(ith_key(key_id), static_cast<const UInt8 *>(ptr), + length, key_pos); + } + bool update(const void *src_ptr, UInt32 src_length, + const void *dest_ptr, UInt32 dest_length, + UInt32 *key_pos = NULL) { + UInt32 src_key_pos; + if (!search(src_ptr, src_length, &src_key_pos)) { + return false; + } + const Key &src_key = get_key(src_key_pos); + return update_key(src_key, static_cast<const UInt8 *>(dest_ptr), + dest_length, key_pos); + } + + const Node &ith_node(UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); + return nodes_[i]; + } + const Block &ith_block(UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); + return blocks_[i]; + } + const Entry &ith_entry(UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); + GRN_DAT_DEBUG_THROW_IF(i > max_key_id()); + return entries_[i]; + } + + const Header &header() const { + return *header_; + } + + UInt64 file_size() const { + return header_->file_size(); + } + UInt64 virtual_size() const { + return sizeof(Header) + + (sizeof(Entry) * num_keys()) + + (sizeof(Block) * num_blocks()) + + (sizeof(Node) * num_nodes()) + + total_key_length(); + } + UInt32 total_key_length() const { + return header_->total_key_length(); + } + UInt32 num_keys() const { + return header_->num_keys(); + } + UInt32 min_key_id() const { + return header_->min_key_id(); + } + UInt32 next_key_id() const { + return header_->next_key_id(); + } + UInt32 max_key_id() const { + return header_->max_key_id(); + } + UInt32 max_num_keys() const { + return header_->max_num_keys(); + } + UInt32 num_nodes() const { + return header_->num_nodes(); + } + UInt32 num_phantoms() const { + return header_->num_phantoms(); + } + UInt32 num_zombies() const { + return header_->num_zombies(); + } + UInt32 max_num_nodes() const { + return header_->max_num_nodes(); + } + UInt32 num_blocks() const { + return header_->num_blocks(); + } + UInt32 max_num_blocks() const { + return header_->max_num_blocks(); + } + UInt32 next_key_pos() const { + return header_->next_key_pos(); + } + UInt32 key_buf_size() const { + return header_->key_buf_size(); + } + UInt32 status_flags() const { + return header_->status_flags(); + } + + void clear_status_flags() { + header_->set_status_flags(status_flags() & ~CHANGING_MASK); + } + + void flush(); + + private: + File file_; + Header *header_; + Array<Node> nodes_; + Array<Block> blocks_; + Array<Entry> entries_; + Array<UInt32> key_buf_; + + void create_file(const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + double num_nodes_per_key, + double average_key_length); + void create_file(const char *file_name, + UInt64 file_size, + UInt32 max_num_keys, + UInt32 max_num_blocks, + UInt32 key_buf_size); + + void open_file(const char *file_name); + + void map_address(void *address); + + void build_from_trie(const Trie &trie); + void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest); + + void repair_trie(const Trie &trie); + void build_from_keys(const UInt32 *begin, const UInt32 *end, + UInt32 depth, UInt32 node_id); + + void mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth); + void insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth); + + inline int get_label(UInt32 key_id, UInt32 depth) const; + inline int get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const; + inline bool less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const; + inline static void swap_ids(UInt32 *lhs, UInt32 *rhs); + + bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; + bool search_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 &query_pos) const; + + bool lcp_search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; + + bool remove_key(const UInt8 *ptr, UInt32 length); + + bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos); + bool insert_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 query_pos); + + bool update_key(const Key &key, const UInt8 *ptr, UInt32 length, + UInt32 *key_pos); + + UInt32 insert_node(UInt32 node_id, UInt16 label); + UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id); + + UInt32 separate(const UInt8 *ptr, UInt32 length, + UInt32 node_id, UInt32 i); + void resolve(UInt32 node_id, UInt16 label); + void migrate_nodes(UInt32 node_id, UInt32 dest_offset, + const UInt16 *labels, UInt32 num_labels); + + UInt32 find_offset(const UInt16 *labels, UInt32 num_labels); + + void reserve_node(UInt32 node_id); + void reserve_block(UInt32 block_id); + + void update_block_level(UInt32 block_id, UInt32 level); + void set_block_level(UInt32 block_id, UInt32 level); + void unset_block_level(UInt32 block_id); + + Node &ith_node(UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); + return nodes_[i]; + } + Block &ith_block(UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); + return blocks_[i]; + } + Entry &ith_entry(UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); + GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1)); + return entries_[i]; + } + + // Disallows copy and assignment. + Trie(const Trie &); + Trie &operator=(const Trie &); +}; + +} // namespace dat +} // namespace grn diff --git a/storage/mroonga/vendor/groonga/lib/dat/vector.hpp b/storage/mroonga/vendor/groonga/lib/dat/vector.hpp new file mode 100644 index 00000000..8a67b27b --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/dat/vector.hpp @@ -0,0 +1,191 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2011-2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#pragma once + +#include "dat.hpp" + +#include <new> + +namespace grn { +namespace dat { + +template <typename T> +class GRN_DAT_API Vector { + public: + Vector() : buf_(NULL), size_(0), capacity_(0) {} + ~Vector() { + for (UInt32 i = 0; i < size(); ++i) { + buf_[i].~T(); + } + delete [] reinterpret_cast<char *>(buf_); + } + + const T &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= size()); + return buf_[i]; + } + T &operator[](UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= size()); + return buf_[i]; + } + + const T &front() const { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[0]; + } + T &front() { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[0]; + } + + const T &back() const { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[size() - 1]; + } + T &back() { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[size() - 1]; + } + + const T *begin() const { + return buf_; + } + T *begin() { + return buf_; + } + + const T *end() const { + return buf_ + size_; + } + T *end() { + return buf_ + size_; + } + + void push_back() { + reserve(size() + 1); + new (&buf_[size()]) T; + ++size_; + } + void push_back(const T &x) { + reserve(size() + 1); + new (&buf_[size()]) T(x); + ++size_; + } + + void pop_back() { + GRN_DAT_DEBUG_THROW_IF(empty()); + back().~T(); + --size_; + } + + void clear() { + resize(0); + } + + void resize(UInt32 new_size) { + if (new_size > capacity()) { + reserve(new_size); + } + for (UInt32 i = size(); i < new_size; ++i) { + new (&buf_[i]) T; + } + for (UInt32 i = new_size; i < size(); ++i) { + buf_[i].~T(); + } + size_ = new_size; + } + template <typename U> + void resize(UInt32 new_size, const U &value) { + if (new_size > capacity()) { + reserve(new_size); + } + for (UInt32 i = size(); i < new_size; ++i) { + new (&buf_[i]) T(value); + } + for (UInt32 i = new_size; i < size(); ++i) { + buf_[i].~T(); + } + size_ = new_size; + } + + void reserve(UInt32 new_capacity) { + if (new_capacity <= capacity()) { + return; + } else if ((new_capacity / 2) < capacity()) { + if (capacity() < (MAX_UINT32 / 2)) { + new_capacity = capacity() * 2; + } else { + new_capacity = MAX_UINT32; + } + } + + T *new_buf = reinterpret_cast<T *>( + new (std::nothrow) char[sizeof(new_capacity) * new_capacity]); + GRN_DAT_THROW_IF(MEMORY_ERROR, new_buf == NULL); + + for (UInt32 i = 0; i < size(); ++i) { + new (&new_buf[i]) T(buf_[i]); + } + for (UInt32 i = 0; i < size(); ++i) { + buf_[i].~T(); + } + + T *old_buf = buf_; + buf_ = new_buf; + delete [] reinterpret_cast<char *>(old_buf); + + capacity_ = new_capacity; + } + + void swap(Vector *rhs) { + T * const temp_buf = buf_; + buf_ = rhs->buf_; + rhs->buf_ = temp_buf; + + const UInt32 temp_size = size_; + size_ = rhs->size_; + rhs->size_ = temp_size; + + const UInt32 temp_capacity = capacity_; + capacity_ = rhs->capacity_; + rhs->capacity_ = temp_capacity; + } + + bool empty() const { + return size_ == 0; + } + UInt32 size() const { + return size_; + } + UInt32 capacity() const { + return capacity_; + } + + private: + T *buf_; + UInt32 size_; + UInt32 capacity_; + + // Disallows copy and assignment. + Vector(const Vector &); + Vector &operator=(const Vector &); +}; + +} // namespace dat +} // namespace grn |