diff options
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/proc/proc_dump.c')
-rw-r--r-- | storage/mroonga/vendor/groonga/lib/proc/proc_dump.c | 1138 |
1 files changed, 1138 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/proc/proc_dump.c b/storage/mroonga/vendor/groonga/lib/proc/proc_dump.c new file mode 100644 index 00000000..391925d8 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/proc/proc_dump.c @@ -0,0 +1,1138 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2009-2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "../grn_proc.h" +#include "../grn_ctx_impl.h" +#include "../grn_db.h" +#include "../grn_str.h" + +#include <groonga/plugin.h> + +static const size_t DUMP_FLUSH_THRESHOLD_SIZE = 256 * 1024; + +typedef struct { + grn_obj *output; + grn_bool is_close_opened_object_mode; + grn_bool have_reference_column; + grn_bool have_index_column; + grn_bool is_sort_hash_table; + grn_obj column_name_buffer; +} grn_dumper; + +static void +dumper_collect_statistics_table(grn_ctx *ctx, + grn_dumper *dumper, + grn_obj *table) +{ + grn_hash *columns; + + columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, + GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); + if (!columns) { + return; + } + + grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns); + GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) { + void *key; + grn_id column_id; + grn_obj *column; + + grn_hash_cursor_get_key(ctx, cursor, &key); + column_id = *((grn_id *)key); + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + column = grn_ctx_at(ctx, column_id); + if (!column) { + GRN_PLUGIN_CLEAR_ERROR(ctx); + goto next_loop; + } + + if (grn_obj_is_index_column(ctx, column)) { + dumper->have_index_column = GRN_TRUE; + } else if (grn_obj_is_reference_column(ctx, column)) { + dumper->have_reference_column = GRN_TRUE; + } + + next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_HASH_EACH_END(ctx, cursor); + grn_hash_close(ctx, columns); +} + +static void +dumper_collect_statistics(grn_ctx *ctx, grn_dumper *dumper) +{ + GRN_DB_EACH_BEGIN_BY_ID(ctx, cursor, id) { + void *name; + int name_size; + grn_obj *object; + + if (grn_id_is_builtin(ctx, id)) { + continue; + } + + name_size = grn_table_cursor_get_key(ctx, cursor, &name); + if (grn_obj_name_is_column(ctx, name, name_size)) { + continue; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + object = grn_ctx_at(ctx, id); + if (!object) { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + goto next_loop; + } + + if (!grn_obj_is_table(ctx, object)) { + goto next_loop; + } + + dumper_collect_statistics_table(ctx, dumper, object); + +next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_DB_EACH_END(ctx, cursor); +} + +static void +dump_value_raw(grn_ctx *ctx, grn_obj *output, const char *value, int value_len) +{ + grn_obj escaped_value; + GRN_TEXT_INIT(&escaped_value, 0); + grn_text_esc(ctx, &escaped_value, value, value_len); + /* is no character escaped? */ + /* TODO false positive with spaces inside values */ + if (GRN_TEXT_LEN(&escaped_value) == value_len + 2) { + GRN_TEXT_PUT(ctx, output, value, value_len); + } else { + GRN_TEXT_PUT(ctx, output, + GRN_TEXT_VALUE(&escaped_value), GRN_TEXT_LEN(&escaped_value)); + } + grn_obj_close(ctx, &escaped_value); +} + +static void +dump_value(grn_ctx *ctx, grn_dumper *dumper, const char *value, int value_len) +{ + dump_value_raw(ctx, dumper->output, value, value_len); +} + +static void +dump_configs(grn_ctx *ctx, grn_dumper *dumper) +{ + grn_obj *config_cursor; + + config_cursor = grn_config_cursor_open(ctx); + if (!config_cursor) + return; + + while (grn_config_cursor_next(ctx, config_cursor)) { + const char *key; + uint32_t key_size; + const char *value; + uint32_t value_size; + + key_size = grn_config_cursor_get_key(ctx, config_cursor, &key); + value_size = grn_config_cursor_get_value(ctx, config_cursor, &value); + + GRN_TEXT_PUTS(ctx, dumper->output, "config_set "); + dump_value(ctx, dumper, key, key_size); + GRN_TEXT_PUTS(ctx, dumper->output, " "); + dump_value(ctx, dumper, value, value_size); + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + } + grn_obj_close(ctx, config_cursor); +} + +static void +dump_plugins(grn_ctx *ctx, grn_dumper *dumper) +{ + grn_obj plugin_names; + unsigned int i, n; + + GRN_TEXT_INIT(&plugin_names, GRN_OBJ_VECTOR); + + grn_plugin_get_names(ctx, &plugin_names); + + n = grn_vector_size(ctx, &plugin_names); + if (n == 0) { + GRN_OBJ_FIN(ctx, &plugin_names); + return; + } + + if (GRN_TEXT_LEN(dumper->output) > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + grn_ctx_output_flush(ctx, 0); + } + for (i = 0; i < n; i++) { + const char *name; + unsigned int name_size; + + name_size = grn_vector_get_element(ctx, &plugin_names, i, &name, NULL, NULL); + grn_text_printf(ctx, dumper->output, "plugin_register %.*s\n", + (int)name_size, name); + } + + GRN_OBJ_FIN(ctx, &plugin_names); +} + +static void +dump_obj_name_raw(grn_ctx *ctx, grn_obj *output, grn_obj *obj) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_len; + name_len = grn_obj_name(ctx, obj, name, GRN_TABLE_MAX_KEY_SIZE); + dump_value_raw(ctx, output, name, name_len); +} + +static void +dump_obj_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *obj) +{ + dump_obj_name_raw(ctx, dumper->output, obj); +} + +static void +dump_column_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_len; + name_len = grn_column_name(ctx, column, name, GRN_TABLE_MAX_KEY_SIZE); + dump_value(ctx, dumper, name, name_len); +} + +static void +dump_index_column_sources(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column) +{ + grn_obj sources; + grn_id *source_ids; + int i, n; + + GRN_OBJ_INIT(&sources, GRN_BULK, 0, GRN_ID_NIL); + grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &sources); + + n = GRN_BULK_VSIZE(&sources) / sizeof(grn_id); + source_ids = (grn_id *)GRN_BULK_HEAD(&sources); + if (n > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + } + for (i = 0; i < n; i++) { + grn_id source_id; + grn_obj *source; + + source_id = *source_ids; + source_ids++; + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + source = grn_ctx_at(ctx, source_id); + if (!source) { + goto next_loop; + } + + if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } + switch (source->header.type) { + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_HASH_KEY: + GRN_TEXT_PUT(ctx, + dumper->output, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); + break; + default: + dump_column_name(ctx, dumper, source); + break; + } + + next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } + grn_obj_close(ctx, &sources); +} + +static void +dump_column(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, grn_obj *column) +{ + grn_id type_id; + grn_obj *type; + grn_column_flags flags; + grn_column_flags default_flags = GRN_OBJ_PERSISTENT; + + type_id = grn_obj_get_range(ctx, column); + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + type = grn_ctx_at(ctx, type_id); + if (!type) { + /* ERR(GRN_RANGE_ERROR, "couldn't get column's type object"); */ + goto exit; + } + + GRN_TEXT_PUTS(ctx, dumper->output, "column_create "); + dump_obj_name(ctx, dumper, table); + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + dump_column_name(ctx, dumper, column); + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + if (type->header.type == GRN_TYPE) { + default_flags |= type->header.flags; + } + flags = grn_column_get_flags(ctx, column); + grn_dump_column_create_flags(ctx, + flags & ~default_flags, + dumper->output); + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + dump_obj_name(ctx, dumper, type); + if (column->header.flags & GRN_OBJ_COLUMN_INDEX) { + dump_index_column_sources(ctx, dumper, column); + } + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + +exit : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } +} + +static void +dump_columns(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, + grn_bool dump_data_column, + grn_bool dump_reference_column, + grn_bool dump_index_column) +{ + grn_hash *columns; + columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, + GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); + if (!columns) { + GRN_PLUGIN_ERROR(ctx, + GRN_NO_MEMORY_AVAILABLE, + "couldn't create a hash to hold columns"); + return; + } + + if (grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns) >= 0) { + GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) { + void *key; + grn_id column_id; + grn_obj *column; + + grn_hash_cursor_get_key(ctx, cursor, &key); + column_id = *((grn_id *)key); + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + column = grn_ctx_at(ctx, column_id); + if (!column) { + GRN_PLUGIN_CLEAR_ERROR(ctx); + goto next_loop; + } + + if (grn_obj_is_index_column(ctx, column)) { + if (dump_index_column) { + dump_column(ctx, dumper, table, column); + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + } else if (grn_obj_is_reference_column(ctx, column)) { + if (dump_reference_column) { + dump_column(ctx, dumper, table, column); + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + } else { + if (dump_data_column) { + dump_column(ctx, dumper, table, column); + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + } + + next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_HASH_EACH_END(ctx, cursor); + } + grn_hash_close(ctx, columns); +} + +static void +dump_record_column_vector(grn_ctx *ctx, grn_dumper *dumper, grn_id id, + grn_obj *column, grn_id range_id, grn_obj *buf) +{ + grn_obj *range; + grn_obj_format *format_argument = NULL; + grn_obj_format format; + + range = grn_ctx_at(ctx, range_id); + if (column->header.flags & GRN_OBJ_WITH_WEIGHT) { + format.flags = GRN_OBJ_FORMAT_WITH_WEIGHT; + format_argument = &format; + } + + if (grn_obj_is_table(ctx, range) || + (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) == 0) { + GRN_OBJ_INIT(buf, GRN_UVECTOR, 0, range_id); + grn_obj_get_value(ctx, column, id, buf); + grn_text_otoj(ctx, dumper->output, buf, format_argument); + } else { + GRN_OBJ_INIT(buf, GRN_VECTOR, 0, range_id); + grn_obj_get_value(ctx, column, id, buf); + grn_text_otoj(ctx, dumper->output, buf, format_argument); + } + + grn_obj_unlink(ctx, range); + grn_obj_unlink(ctx, buf); +} + +static void +dump_record(grn_ctx *ctx, grn_dumper *dumper, + grn_obj *table, + grn_id id, + grn_obj *columns, int n_columns) +{ + int j; + grn_obj buf; + grn_obj *column_name = &(dumper->column_name_buffer); + + GRN_TEXT_PUTC(ctx, dumper->output, '['); + for (j = 0; j < n_columns; j++) { + grn_bool is_value_column; + grn_id range; + grn_obj *column; + column = GRN_PTR_VALUE_AT(columns, j); + /* TODO: use grn_obj_is_value_accessor() */ + GRN_BULK_REWIND(column_name); + grn_column_name_(ctx, column, column_name); + if (GRN_TEXT_LEN(column_name) == GRN_COLUMN_NAME_VALUE_LEN && + !memcmp(GRN_TEXT_VALUE(column_name), + GRN_COLUMN_NAME_VALUE, + GRN_COLUMN_NAME_VALUE_LEN)) { + is_value_column = GRN_TRUE; + } else { + is_value_column = GRN_FALSE; + } + range = grn_obj_get_range(ctx, column); + + if (j) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } + switch (column->header.type) { + case GRN_COLUMN_VAR_SIZE: + case GRN_COLUMN_FIX_SIZE: + switch (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) { + case GRN_OBJ_COLUMN_VECTOR: + dump_record_column_vector(ctx, dumper, id, column, range, &buf); + break; + case GRN_OBJ_COLUMN_SCALAR: + { + GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); + grn_obj_get_value(ctx, column, id, &buf); + grn_text_otoj(ctx, dumper->output, &buf, NULL); + grn_obj_unlink(ctx, &buf); + } + break; + default: + GRN_PLUGIN_ERROR(ctx, + GRN_OPERATION_NOT_SUPPORTED, + "unsupported column type: %#x", + column->header.type); + break; + } + break; + case GRN_COLUMN_INDEX: + break; + case GRN_ACCESSOR: + { + GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); + grn_obj_get_value(ctx, column, id, &buf); + /* XXX maybe, grn_obj_get_range() should not unconditionally return + GRN_DB_INT32 when column is GRN_ACCESSOR and + GRN_ACCESSOR_GET_VALUE */ + if (is_value_column) { + buf.header.domain = grn_obj_get_range(ctx, table); + } + grn_text_otoj(ctx, dumper->output, &buf, NULL); + grn_obj_unlink(ctx, &buf); + } + break; + default: + GRN_PLUGIN_ERROR(ctx, + GRN_OPERATION_NOT_SUPPORTED, + "unsupported header type %#x", + column->header.type); + break; + } + } + GRN_TEXT_PUTC(ctx, dumper->output, ']'); + if ((size_t) GRN_TEXT_LEN(dumper->output) >= DUMP_FLUSH_THRESHOLD_SIZE) { + grn_ctx_output_flush(ctx, 0); + } +} + +static void +dump_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) +{ + grn_table_cursor *cursor; + int i, n_columns; + grn_obj columns; + grn_bool have_index_column = GRN_FALSE; + grn_bool have_data_column = GRN_FALSE; + + if (grn_table_size(ctx, table) == 0) { + return; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + GRN_PTR_INIT(&columns, GRN_OBJ_VECTOR, GRN_ID_NIL); + + if (table->header.type == GRN_TABLE_NO_KEY) { + grn_obj *id_accessor; + id_accessor = grn_obj_column(ctx, + table, + GRN_COLUMN_NAME_ID, + GRN_COLUMN_NAME_ID_LEN); + GRN_PTR_PUT(ctx, &columns, id_accessor); + } else if (table->header.domain != GRN_ID_NIL) { + grn_obj *key_accessor; + key_accessor = grn_obj_column(ctx, + table, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); + GRN_PTR_PUT(ctx, &columns, key_accessor); + } + + if (grn_obj_get_range(ctx, table) != GRN_ID_NIL) { + grn_obj *value_accessor; + value_accessor = grn_obj_column(ctx, + table, + GRN_COLUMN_NAME_VALUE, + GRN_COLUMN_NAME_VALUE_LEN); + GRN_PTR_PUT(ctx, &columns, value_accessor); + } + + { + grn_hash *real_columns; + + real_columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, + GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); + grn_table_columns(ctx, table, NULL, 0, (grn_obj *)real_columns); + GRN_HASH_EACH_BEGIN(ctx, real_columns, cursor, id) { + void *key; + grn_id column_id; + grn_obj *column; + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + grn_hash_cursor_get_key(ctx, cursor, &key); + column_id = *((grn_id *)key); + + column = grn_ctx_at(ctx, column_id); + if (column) { + if (grn_obj_is_index_column(ctx, column)) { + have_index_column = GRN_TRUE; + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } else { + have_data_column = GRN_TRUE; + GRN_PTR_PUT(ctx, &columns, column); + if (dumper->is_close_opened_object_mode) { + grn_ctx_merge_temporary_open_space(ctx); + } + } + } else { + GRN_PLUGIN_CLEAR_ERROR(ctx); + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } + } GRN_HASH_EACH_END(ctx, cursor); + grn_hash_close(ctx, real_columns); + } + + n_columns = GRN_BULK_VSIZE(&columns) / sizeof(grn_obj *); + + if (have_index_column && !have_data_column) { + goto exit; + } + + if (GRN_TEXT_LEN(dumper->output) > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + } + + GRN_TEXT_PUTS(ctx, dumper->output, "load --table "); + dump_obj_name(ctx, dumper, table); + GRN_TEXT_PUTS(ctx, dumper->output, "\n[\n"); + + GRN_TEXT_PUTC(ctx, dumper->output, '['); + for (i = 0; i < n_columns; i++) { + grn_obj *column; + grn_obj *column_name = &(dumper->column_name_buffer); + + column = GRN_PTR_VALUE_AT(&columns, i); + if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } + GRN_BULK_REWIND(column_name); + grn_column_name_(ctx, column, column_name); + grn_text_otoj(ctx, dumper->output, column_name, NULL); + } + GRN_TEXT_PUTS(ctx, dumper->output, "],\n"); + + if (table->header.type == GRN_TABLE_HASH_KEY && dumper->is_sort_hash_table) { + grn_obj *sorted; + grn_table_sort_key sort_keys[1]; + uint32_t n_sort_keys = 1; + grn_bool is_first_record = GRN_TRUE; + + sort_keys[0].key = grn_obj_column(ctx, table, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); + sort_keys[0].flags = GRN_TABLE_SORT_ASC; + sort_keys[0].offset = 0; + sorted = grn_table_create(ctx, + NULL, 0, NULL, + GRN_TABLE_NO_KEY, + NULL, + table); + grn_table_sort(ctx, + table, 0, -1, + sorted, + sort_keys, n_sort_keys); + cursor = grn_table_cursor_open(ctx, + sorted, + NULL, 0, NULL, 0, + 0, -1, + 0); + while (grn_table_cursor_next(ctx, cursor) != GRN_ID_NIL) { + void *value_raw; + grn_id id; + + grn_table_cursor_get_value(ctx, cursor, &value_raw); + id = *((grn_id *)value_raw); + + if (is_first_record) { + is_first_record = GRN_FALSE; + } else { + GRN_TEXT_PUTS(ctx, dumper->output, ",\n"); + } + dump_record(ctx, dumper, table, id, &columns, n_columns); + } + GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n"); + grn_obj_close(ctx, sorted); + grn_obj_unlink(ctx, sort_keys[0].key); + } else { + grn_obj delete_commands; + grn_id old_id = GRN_ID_NIL; + grn_id id; + + GRN_TEXT_INIT(&delete_commands, 0); + cursor = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, + GRN_CURSOR_BY_KEY); + while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) { + if (old_id != GRN_ID_NIL) { GRN_TEXT_PUTS(ctx, dumper->output, ",\n"); } + if (table->header.type == GRN_TABLE_NO_KEY && old_id + 1 < id) { + grn_id current_id; + for (current_id = old_id + 1; current_id < id; current_id++) { + GRN_TEXT_PUTS(ctx, dumper->output, "[],\n"); + GRN_TEXT_PUTS(ctx, &delete_commands, "delete --table "); + dump_obj_name_raw(ctx, &delete_commands, table); + GRN_TEXT_PUTS(ctx, &delete_commands, " --id "); + grn_text_lltoa(ctx, &delete_commands, current_id); + GRN_TEXT_PUTC(ctx, &delete_commands, '\n'); + } + } + dump_record(ctx, dumper, table, id, &columns, n_columns); + + old_id = id; + } + grn_table_cursor_close(ctx, cursor); + GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n"); + GRN_TEXT_PUT(ctx, dumper->output, + GRN_TEXT_VALUE(&delete_commands), + GRN_TEXT_LEN(&delete_commands)); + GRN_OBJ_FIN(ctx, &delete_commands); + } +exit : + for (i = 0; i < n_columns; i++) { + grn_obj *column; + + column = GRN_PTR_VALUE_AT(&columns, i); + if (column->header.type == GRN_ACCESSOR) { + grn_obj_close(ctx, column); + } + } + GRN_OBJ_FIN(ctx, &columns); + + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } +} + +static void +dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) +{ + grn_obj *domain = NULL; + grn_id range_id; + grn_obj *range = NULL; + grn_table_flags flags; + grn_table_flags default_flags = GRN_OBJ_PERSISTENT; + grn_obj *default_tokenizer; + grn_obj *normalizer; + grn_obj *token_filters; + + switch (table->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + domain = grn_ctx_at(ctx, table->header.domain); + break; + default: + break; + } + + if (GRN_TEXT_LEN(dumper->output) > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + grn_ctx_output_flush(ctx, 0); + } + + grn_table_get_info(ctx, table, + &flags, + NULL, + &default_tokenizer, + &normalizer, + &token_filters); + + GRN_TEXT_PUTS(ctx, dumper->output, "table_create "); + dump_obj_name(ctx, dumper, table); + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + grn_dump_table_create_flags(ctx, + flags & ~default_flags, + dumper->output); + if (domain) { + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + dump_obj_name(ctx, dumper, domain); + } + range_id = grn_obj_get_range(ctx, table); + if (range_id != GRN_ID_NIL) { + range = grn_ctx_at(ctx, range_id); + if (!range) { + // ERR(GRN_RANGE_ERROR, "couldn't get table's value_type object"); + return; + } + if (table->header.type != GRN_TABLE_NO_KEY) { + GRN_TEXT_PUTC(ctx, dumper->output, ' '); + } else { + GRN_TEXT_PUTS(ctx, dumper->output, " --value_type "); + } + dump_obj_name(ctx, dumper, range); + grn_obj_unlink(ctx, range); + } + if (default_tokenizer) { + GRN_TEXT_PUTS(ctx, dumper->output, " --default_tokenizer "); + dump_obj_name(ctx, dumper, default_tokenizer); + } + if (normalizer) { + GRN_TEXT_PUTS(ctx, dumper->output, " --normalizer "); + dump_obj_name(ctx, dumper, normalizer); + } + if (table->header.type != GRN_TABLE_NO_KEY) { + int n_token_filters; + + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + if (n_token_filters > 0) { + int i; + GRN_TEXT_PUTS(ctx, dumper->output, " --token_filters "); + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); + if (i > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, ','); + } + dump_obj_name(ctx, dumper, token_filter); + } + } + } + + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + + dump_columns(ctx, dumper, table, GRN_TRUE, GRN_FALSE, GRN_FALSE); +} + +static void +dump_schema(grn_ctx *ctx, grn_dumper *dumper) +{ + GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { + void *name; + int name_size; + grn_obj *object; + + if (grn_id_is_builtin(ctx, id)) { + continue; + } + + name_size = grn_table_cursor_get_key(ctx, cursor, &name); + if (grn_obj_name_is_column(ctx, name, name_size)) { + continue; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + if ((object = grn_ctx_at(ctx, id))) { + switch (object->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + dump_table(ctx, dumper, object); + break; + default: + break; + } + } else { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_DB_EACH_END(ctx, cursor); + + if (!dumper->have_reference_column) { + return; + } + + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + grn_ctx_output_flush(ctx, 0); + + GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { + void *name; + int name_size; + grn_obj *object; + + if (grn_id_is_builtin(ctx, id)) { + continue; + } + + name_size = grn_table_cursor_get_key(ctx, cursor, &name); + if (grn_obj_name_is_column(ctx, name, name_size)) { + continue; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + if ((object = grn_ctx_at(ctx, id))) { + switch (object->header.type) { + case GRN_TABLE_HASH_KEY: + case GRN_TABLE_PAT_KEY: + case GRN_TABLE_DAT_KEY: + case GRN_TABLE_NO_KEY: + dump_columns(ctx, dumper, object, GRN_FALSE, GRN_TRUE, GRN_FALSE); + break; + default: + break; + } + } else { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_DB_EACH_END(ctx, cursor); +} + +static void +dump_selected_tables_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *tables) +{ + const char *p, *e; + + p = GRN_TEXT_VALUE(tables); + e = p + GRN_TEXT_LEN(tables); + while (p < e) { + int len; + grn_obj *table; + const char *token, *token_e; + + if ((len = grn_isspace(p, ctx->encoding))) { + p += len; + continue; + } + + token = p; + if (!(('a' <= *p && *p <= 'z') || + ('A' <= *p && *p <= 'Z') || + (*p == '_'))) { + while (p < e && !grn_isspace(p, ctx->encoding)) { + p++; + } + GRN_LOG(ctx, GRN_LOG_WARNING, "invalid table name is ignored: <%.*s>\n", + (int)(p - token), token); + continue; + } + while (p < e && + (('a' <= *p && *p <= 'z') || + ('A' <= *p && *p <= 'Z') || + ('0' <= *p && *p <= '9') || + (*p == '_'))) { + p++; + } + token_e = p; + while (p < e && (len = grn_isspace(p, ctx->encoding))) { + p += len; + continue; + } + if (p < e && *p == ',') { + p++; + } + + table = grn_ctx_get(ctx, token, token_e - token); + if (!table) { + GRN_LOG(ctx, GRN_LOG_WARNING, + "nonexistent table name is ignored: <%.*s>\n", + (int)(token_e - token), token); + continue; + } + + if (grn_obj_is_table(ctx, table)) { + dump_records(ctx, dumper, table); + } + grn_obj_unlink(ctx, table); + } +} + +static void +dump_all_records(grn_ctx *ctx, grn_dumper *dumper) +{ + GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { + void *name; + int name_size; + grn_obj *table; + + if (grn_id_is_builtin(ctx, id)) { + continue; + } + + name_size = grn_table_cursor_get_key(ctx, cursor, &name); + if (grn_obj_name_is_column(ctx, name, name_size)) { + continue; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + table = grn_ctx_at(ctx, id); + if (!table) { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + goto next_loop; + } + + if (grn_obj_is_table(ctx, table)) { + dump_records(ctx, dumper, table); + } + + next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_DB_EACH_END(ctx, cursor); +} + +static void +dump_indexes(grn_ctx *ctx, grn_dumper *dumper) +{ + if (!dumper->have_index_column) { + return; + } + + if (GRN_TEXT_LEN(dumper->output) > 0) { + GRN_TEXT_PUTC(ctx, dumper->output, '\n'); + } + + GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { + void *name; + int name_size; + grn_obj *object; + + if (grn_id_is_builtin(ctx, id)) { + continue; + } + + name_size = grn_table_cursor_get_key(ctx, cursor, &name); + if (grn_obj_name_is_column(ctx, name, name_size)) { + continue; + } + + if (dumper->is_close_opened_object_mode) { + grn_ctx_push_temporary_open_space(ctx); + } + + object = grn_ctx_at(ctx, id); + if (!object) { + /* XXX: this clause is executed when MeCab tokenizer is enabled in + database but the groonga isn't supported MeCab. + We should return error mesage about it and error exit status + but it's too difficult for this architecture. :< */ + GRN_PLUGIN_CLEAR_ERROR(ctx); + goto next_loop; + } + + if (grn_obj_is_table(ctx, object)) { + dump_columns(ctx, dumper, object, GRN_FALSE, GRN_FALSE, GRN_TRUE); + } + + next_loop : + if (dumper->is_close_opened_object_mode) { + grn_ctx_pop_temporary_open_space(ctx); + } + } GRN_DB_EACH_END(ctx, cursor); +} + +static grn_obj * +command_dump(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) +{ + grn_dumper dumper; + grn_obj *tables; + grn_bool is_dump_plugins; + grn_bool is_dump_schema; + grn_bool is_dump_records; + grn_bool is_dump_indexes; + grn_bool is_dump_configs; + + dumper.output = ctx->impl->output.buf; + if (grn_thread_get_limit() == 1) { + dumper.is_close_opened_object_mode = GRN_TRUE; + } else { + dumper.is_close_opened_object_mode = GRN_FALSE; + } + dumper.have_reference_column = GRN_FALSE; + dumper.have_index_column = GRN_FALSE; + + tables = grn_plugin_proc_get_var(ctx, user_data, "tables", -1); + is_dump_plugins = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_plugins", -1, + GRN_TRUE); + is_dump_schema = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_schema", -1, + GRN_TRUE); + is_dump_records = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_records", -1, + GRN_TRUE); + is_dump_indexes = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_indexes", -1, + GRN_TRUE); + is_dump_configs = grn_plugin_proc_get_var_bool(ctx, user_data, + "dump_configs", -1, + GRN_TRUE); + dumper.is_sort_hash_table = + grn_plugin_proc_get_var_bool(ctx, user_data, + "sort_hash_table", -1, + GRN_FALSE); + GRN_TEXT_INIT(&(dumper.column_name_buffer), 0); + + grn_ctx_set_output_type(ctx, GRN_CONTENT_GROONGA_COMMAND_LIST); + + dumper_collect_statistics(ctx, &dumper); + + if (is_dump_configs) { + dump_configs(ctx, &dumper); + } + if (is_dump_plugins) { + dump_plugins(ctx, &dumper); + } + if (is_dump_schema) { + dump_schema(ctx, &dumper); + } + if (is_dump_records) { + /* To update index columns correctly, we first create the whole schema, then + load non-derivative records, while skipping records of index columns. That + way, Groonga will silently do the job of updating index columns for us. */ + if (GRN_TEXT_LEN(tables) > 0) { + dump_selected_tables_records(ctx, &dumper, tables); + } else { + dump_all_records(ctx, &dumper); + } + } + if (is_dump_indexes) { + dump_indexes(ctx, &dumper); + } + /* remove the last newline because another one will be added by the caller. + maybe, the caller of proc functions currently doesn't consider the + possibility of multiple-line output from proc functions. */ + if (GRN_BULK_VSIZE(dumper.output) > 0) { + grn_bulk_truncate(ctx, dumper.output, GRN_BULK_VSIZE(dumper.output) - 1); + } + + GRN_OBJ_FIN(ctx, &(dumper.column_name_buffer)); + + return NULL; +} + +void +grn_proc_init_dump(grn_ctx *ctx) +{ + grn_expr_var vars[7]; + + grn_plugin_expr_var_init(ctx, &(vars[0]), "tables", -1); + grn_plugin_expr_var_init(ctx, &(vars[1]), "dump_plugins", -1); + grn_plugin_expr_var_init(ctx, &(vars[2]), "dump_schema", -1); + grn_plugin_expr_var_init(ctx, &(vars[3]), "dump_records", -1); + grn_plugin_expr_var_init(ctx, &(vars[4]), "dump_indexes", -1); + grn_plugin_expr_var_init(ctx, &(vars[5]), "dump_configs", -1); + grn_plugin_expr_var_init(ctx, &(vars[6]), "sort_hash_table", -1); + grn_plugin_command_create(ctx, + "dump", -1, + command_dump, + sizeof(vars) / sizeof(vars[0]), + vars); +} |